glample · Trenza1ore · Mar 2, 2025 · Mar 2, 2025 · Mar 2, 2025 · Mar 2, 2025
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,4 @@ lib/
 __pycache__/
 *.py[cod]
 _vizdoom.ini
+/dumped
diff --git a/arnold.py b/arnold.py
@@ -1,5 +1,6 @@
 import argparse
 import os
+import sys
 import vizdoom
 from src.utils import get_dump_path
 from src.logger import get_logger
@@ -16,7 +17,10 @@
 
 # create a directory for the experiment / create a logger
 dump_path = get_dump_path(args.main_dump_path, args.exp_name)
-logger = get_logger(filepath=os.path.join(dump_path, 'train.log'))
+log_path = os.path.join(dump_path, 'train.log')
+if sys.platform == "win32":
+    log_path = log_path.replace('\\', '/')
+logger = get_logger(filepath=log_path)
 logger.info('========== Running DOOM ==========')
 logger.info('Experiment will be saved in: %s' % dump_path)
 

diff --git a/src/logger.py b/src/logger.py
@@ -39,6 +39,10 @@ def get_logger(filepath=None):
     log_formatter = LogFormatter()
 
     # create file handler and set level to debug
+    file_dir = os.path.split(filepath)[0]
+    if file_dir and not os.path.exists(file_dir):
+        os.makedirs(file_dir)
+        print("Directory", file_dir, "didn't exist, and was created", flush=True)
     file_handler = logging.FileHandler(filepath, "a")
     file_handler.setLevel(logging.DEBUG)
     file_handler.setFormatter(log_formatter)

diff --git a/src/model/bucketed_embedding.py b/src/model/bucketed_embedding.py
@@ -9,4 +9,5 @@ def __init__(self, bucket_size, num_embeddings, *args, **kwargs):
         super(BucketedEmbedding, self).__init__(real_num_embeddings, *args, **kwargs)
 
     def forward(self, indices):
-        return super(BucketedEmbedding, self).forward(indices.div(self.bucket_size))
+        x = super(BucketedEmbedding, self).forward(indices // self.bucket_size)
+        return x.unsqueeze_(0) if x.ndim < 2 else x
diff --git a/src/model/dqn/base.py b/src/model/dqn/base.py
@@ -1,7 +1,6 @@
 import numpy as np
 import torch
 import torch.nn as nn
-from torch.autograd import Variable
 from logging import getLogger
 
 from ...utils import bool_flag
@@ -78,7 +77,7 @@ def base_forward(self, x_screens, x_variables):
 
         # create state input
         if self.n_variables:
-            output = torch.cat([conv_output] + embeddings, 1)
+            output = torch.cat([conv_output] + embeddings, dim=1)
         else:
             output = conv_output
 
@@ -120,7 +119,7 @@ def __init__(self, params):
 
     def get_var(self, x):
         """Move a tensor to a CPU / GPU variable."""
-        x = Variable(x)
+        x = x.detach()
         return x.cuda() if self.cuda else x
 
     def reset(self):
@@ -185,8 +184,8 @@ def prepare_f_train_args(self, screens, variables, features,
         return screens, variables, features, actions, rewards, isfinal
 
     def register_loss(self, loss_history, loss_sc, loss_gf):
-        loss_history['dqn_loss'].append(loss_sc.data[0])
-        loss_history['gf_loss'].append(loss_gf.data[0]
+        loss_history['dqn_loss'].append(loss_sc.cpu().item())
+        loss_history['gf_loss'].append(loss_gf.cpu().item()
                                        if self.n_features else 0)
 
     def next_action(self, last_states, save_graph=False):
@@ -205,7 +204,7 @@ def next_action(self, last_states, save_graph=False):
             if pred_features is not None:
                 assert pred_features.size() == (1, seq_len, self.module.n_features)
                 pred_features = pred_features[0, -1]
-        action_id = scores.data.max(0)[1][0]
+        action_id = scores.data.max(0)[1].item()
         self.pred_features = pred_features
         return action_id
 

diff --git a/src/model/dqn/feedforward.py b/src/model/dqn/feedforward.py
@@ -1,9 +1,7 @@
 import torch
 import torch.nn as nn
-from torch.autograd import Variable
 from .base import DQNModuleBase, DQN
 
-
 class DQNModuleFeedforward(DQNModuleBase):
 
     def __init__(self, params):
@@ -21,10 +19,13 @@ def forward(self, x_screens, x_variables):
         """
 
         batch_size = x_screens.size(0)
+
         assert x_screens.ndimension() == 4
         assert len(x_variables) == self.n_variables
-        assert all(x.ndimension() == 1 and x.size(0) == batch_size
-                   for x in x_variables)
+
+        if batch_size > 1:
+            assert all(x.ndimension() == 1 and x.size(0) == batch_size for x in x_variables), \
+                f"{[(x.ndimension(), x.size(), x) for x in x_variables]} {batch_size}"
 
         # state input (screen / depth / labels buffer + variables)
         state_input, output_gf = self.base_forward(x_screens, x_variables)
@@ -45,7 +46,6 @@ class DQNFeedforward(DQN):
     def f_eval(self, last_states):
 
         screens, variables = self.prepare_f_eval_args(last_states)
-
         return self.module(
             screens.view(1, -1, *self.screen_shape[1:]),
             [variables[-1, i] for i in range(self.params.n_variables)]
@@ -74,7 +74,7 @@ def f_train(self, screens, variables, features, actions, rewards, isfinal,
         )
 
         # compute scores
-        mask = torch.ByteTensor(output_sc1.size()).fill_(0)
+        mask = torch.BoolTensor(output_sc1.size()).fill_(0)
         for i in range(batch_size):
             mask[i, int(actions[i, -1])] = 1
         scores1 = output_sc1.masked_select(self.get_var(mask))
@@ -83,7 +83,7 @@ def f_train(self, screens, variables, features, actions, rewards, isfinal,
         )
 
         # dqn loss
-        loss_sc = self.loss_fn_sc(scores1, Variable(scores2.data))
+        loss_sc = self.loss_fn_sc(scores1, scores2.detach())
 
         # game features loss
         loss_gf = 0

diff --git a/src/model/dqn/recurrent.py b/src/model/dqn/recurrent.py
@@ -1,5 +1,4 @@
 import torch
-from torch.autograd import Variable
 from .base import DQNModuleBase, DQN
 from ..utils import get_recurrent_module
 from ...utils import bool_flag
@@ -63,7 +62,7 @@ def __init__(self, params):
         h_0 = torch.FloatTensor(params.n_rec_layers, params.batch_size,
                                 params.hidden_dim).zero_()
         self.init_state_t = self.get_var(h_0)
-        self.init_state_e = Variable(self.init_state_t[:, :1, :].data.clone(), volatile=True)
+        self.init_state_e = self.init_state_t[:, :1, :].detach().clone()
         if params.recurrence == 'lstm':
             self.init_state_t = (self.init_state_t, self.init_state_t)
             self.init_state_e = (self.init_state_e, self.init_state_e)
@@ -116,7 +115,7 @@ def f_train(self, screens, variables, features, actions, rewards, isfinal,
         )
 
         # compute scores
-        mask = torch.ByteTensor(output_sc.size()).fill_(0)
+        mask = torch.BoolTensor(output_sc.size()).fill_(0)
         for i in range(batch_size):
             for j in range(seq_len - 1):
                 mask[i, j, int(actions[i, j])] = 1
@@ -128,7 +127,7 @@ def f_train(self, screens, variables, features, actions, rewards, isfinal,
         # dqn loss
         loss_sc = self.loss_fn_sc(
             scores1.view(batch_size, -1)[:, -self.params.n_rec_updates:],
-            Variable(scores2.data[:, -self.params.n_rec_updates:])
+            scores2.detach()[:, -self.params.n_rec_updates:]
         )
 
         # game features loss

diff --git a/src/model/utils.py b/src/model/utils.py
@@ -1,6 +1,5 @@
 import torch
 import torch.nn as nn
-from torch.autograd import Variable
 from logging import getLogger
 from .bucketed_embedding import BucketedEmbedding
 
@@ -76,7 +75,7 @@ def build_CNN_network(module, params):
     ]))
 
     # get the size of the convolution network output
-    x = Variable(torch.FloatTensor(1, in_channels, height, width).zero_())
+    x = torch.FloatTensor(1, in_channels, height, width).zero_()
     module.conv_output_dim = module.conv(x).nelement()
 
 

diff --git a/src/replay_memory.py b/src/replay_memory.py
@@ -18,7 +18,7 @@ def __init__(self, max_size, screen_shape, n_variables, n_features):
             self.features = np.zeros((max_size, n_features), dtype=np.int32)
         self.actions = np.zeros(max_size, dtype=np.int32)
         self.rewards = np.zeros(max_size, dtype=np.float32)
-        self.isfinal = np.zeros(max_size, dtype=np.bool)
+        self.isfinal = np.zeros(max_size, dtype=bool)
 
     @property
     def size(self):

diff --git a/src/trainer.py b/src/trainer.py
@@ -59,8 +59,8 @@ def run(self):
             self.n_iter += 1
 
             if self.game.is_final():
-                self.game.reset()     # dead or end of episode
-                self.network.reset()  # reset internal state (RNNs only)
+                self.game.new_episode()  # dead or end of episode
+                self.network.reset()     # reset internal state (RNNs only)
 
             self.game.observe_state(self.params, last_states)
 

diff --git a/src/utils.py b/src/utils.py
@@ -90,7 +90,7 @@ def get_optimizer(s):
         raise Exception('Unknown optimization method: "%s"' % method)
 
     # check that we give good parameters to the optimizer
-    expected_args = inspect.getargspec(optim_fn.__init__)[0]
+    expected_args = inspect.getfullargspec(optim_fn.__init__)[0]
     assert expected_args[:2] == ['self', 'params']
     if not all(k in expected_args[2:] for k in optim_params.keys()):
         raise Exception('Unexpected parameters: expected "%s", received "%s"' %