diff --git a/.gitignore b/.gitignore index a26ad14..fe46302 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ lib/ __pycache__/ *.py[cod] _vizdoom.ini +/dumped diff --git a/arnold.py b/arnold.py index 7fd9242..b28896e 100644 --- a/arnold.py +++ b/arnold.py @@ -1,5 +1,6 @@ import argparse import os +import sys import vizdoom from src.utils import get_dump_path from src.logger import get_logger @@ -16,7 +17,10 @@ # create a directory for the experiment / create a logger dump_path = get_dump_path(args.main_dump_path, args.exp_name) -logger = get_logger(filepath=os.path.join(dump_path, 'train.log')) +log_path = os.path.join(dump_path, 'train.log') +if sys.platform == "win32": + log_path = log_path.replace('\\', '/') +logger = get_logger(filepath=log_path) logger.info('========== Running DOOM ==========') logger.info('Experiment will be saved in: %s' % dump_path) diff --git a/src/logger.py b/src/logger.py index 94a5b7d..1b1da83 100644 --- a/src/logger.py +++ b/src/logger.py @@ -39,6 +39,10 @@ def get_logger(filepath=None): log_formatter = LogFormatter() # create file handler and set level to debug + file_dir = os.path.split(filepath)[0] + if file_dir and not os.path.exists(file_dir): + os.makedirs(file_dir) + print("Directory", file_dir, "didn't exist, and was created", flush=True) file_handler = logging.FileHandler(filepath, "a") file_handler.setLevel(logging.DEBUG) file_handler.setFormatter(log_formatter) diff --git a/src/model/bucketed_embedding.py b/src/model/bucketed_embedding.py index f88a4d8..cc444eb 100644 --- a/src/model/bucketed_embedding.py +++ b/src/model/bucketed_embedding.py @@ -9,4 +9,5 @@ def __init__(self, bucket_size, num_embeddings, *args, **kwargs): super(BucketedEmbedding, self).__init__(real_num_embeddings, *args, **kwargs) def forward(self, indices): - return super(BucketedEmbedding, self).forward(indices.div(self.bucket_size)) + x = super(BucketedEmbedding, self).forward(indices // self.bucket_size) + return x.unsqueeze_(0) if x.ndim < 2 else x diff --git a/src/model/dqn/base.py b/src/model/dqn/base.py index 6e1c61c..7bf5d28 100644 --- a/src/model/dqn/base.py +++ b/src/model/dqn/base.py @@ -1,7 +1,6 @@ import numpy as np import torch import torch.nn as nn -from torch.autograd import Variable from logging import getLogger from ...utils import bool_flag @@ -78,7 +77,7 @@ def base_forward(self, x_screens, x_variables): # create state input if self.n_variables: - output = torch.cat([conv_output] + embeddings, 1) + output = torch.cat([conv_output] + embeddings, dim=1) else: output = conv_output @@ -120,7 +119,7 @@ def __init__(self, params): def get_var(self, x): """Move a tensor to a CPU / GPU variable.""" - x = Variable(x) + x = x.detach() return x.cuda() if self.cuda else x def reset(self): @@ -185,8 +184,8 @@ def prepare_f_train_args(self, screens, variables, features, return screens, variables, features, actions, rewards, isfinal def register_loss(self, loss_history, loss_sc, loss_gf): - loss_history['dqn_loss'].append(loss_sc.data[0]) - loss_history['gf_loss'].append(loss_gf.data[0] + loss_history['dqn_loss'].append(loss_sc.cpu().item()) + loss_history['gf_loss'].append(loss_gf.cpu().item() if self.n_features else 0) def next_action(self, last_states, save_graph=False): @@ -205,7 +204,7 @@ def next_action(self, last_states, save_graph=False): if pred_features is not None: assert pred_features.size() == (1, seq_len, self.module.n_features) pred_features = pred_features[0, -1] - action_id = scores.data.max(0)[1][0] + action_id = scores.data.max(0)[1].item() self.pred_features = pred_features return action_id diff --git a/src/model/dqn/feedforward.py b/src/model/dqn/feedforward.py index e2af5a6..bdfcc7d 100644 --- a/src/model/dqn/feedforward.py +++ b/src/model/dqn/feedforward.py @@ -1,9 +1,7 @@ import torch import torch.nn as nn -from torch.autograd import Variable from .base import DQNModuleBase, DQN - class DQNModuleFeedforward(DQNModuleBase): def __init__(self, params): @@ -21,10 +19,13 @@ def forward(self, x_screens, x_variables): """ batch_size = x_screens.size(0) + assert x_screens.ndimension() == 4 assert len(x_variables) == self.n_variables - assert all(x.ndimension() == 1 and x.size(0) == batch_size - for x in x_variables) + + if batch_size > 1: + assert all(x.ndimension() == 1 and x.size(0) == batch_size for x in x_variables), \ + f"{[(x.ndimension(), x.size(), x) for x in x_variables]} {batch_size}" # state input (screen / depth / labels buffer + variables) state_input, output_gf = self.base_forward(x_screens, x_variables) @@ -45,7 +46,6 @@ class DQNFeedforward(DQN): def f_eval(self, last_states): screens, variables = self.prepare_f_eval_args(last_states) - return self.module( screens.view(1, -1, *self.screen_shape[1:]), [variables[-1, i] for i in range(self.params.n_variables)] @@ -74,7 +74,7 @@ def f_train(self, screens, variables, features, actions, rewards, isfinal, ) # compute scores - mask = torch.ByteTensor(output_sc1.size()).fill_(0) + mask = torch.BoolTensor(output_sc1.size()).fill_(0) for i in range(batch_size): mask[i, int(actions[i, -1])] = 1 scores1 = output_sc1.masked_select(self.get_var(mask)) @@ -83,7 +83,7 @@ def f_train(self, screens, variables, features, actions, rewards, isfinal, ) # dqn loss - loss_sc = self.loss_fn_sc(scores1, Variable(scores2.data)) + loss_sc = self.loss_fn_sc(scores1, scores2.detach()) # game features loss loss_gf = 0 diff --git a/src/model/dqn/recurrent.py b/src/model/dqn/recurrent.py index 3e17dc3..3e9045b 100644 --- a/src/model/dqn/recurrent.py +++ b/src/model/dqn/recurrent.py @@ -1,5 +1,4 @@ import torch -from torch.autograd import Variable from .base import DQNModuleBase, DQN from ..utils import get_recurrent_module from ...utils import bool_flag @@ -63,7 +62,7 @@ def __init__(self, params): h_0 = torch.FloatTensor(params.n_rec_layers, params.batch_size, params.hidden_dim).zero_() self.init_state_t = self.get_var(h_0) - self.init_state_e = Variable(self.init_state_t[:, :1, :].data.clone(), volatile=True) + self.init_state_e = self.init_state_t[:, :1, :].detach().clone() if params.recurrence == 'lstm': self.init_state_t = (self.init_state_t, self.init_state_t) self.init_state_e = (self.init_state_e, self.init_state_e) @@ -116,7 +115,7 @@ def f_train(self, screens, variables, features, actions, rewards, isfinal, ) # compute scores - mask = torch.ByteTensor(output_sc.size()).fill_(0) + mask = torch.BoolTensor(output_sc.size()).fill_(0) for i in range(batch_size): for j in range(seq_len - 1): mask[i, j, int(actions[i, j])] = 1 @@ -128,7 +127,7 @@ def f_train(self, screens, variables, features, actions, rewards, isfinal, # dqn loss loss_sc = self.loss_fn_sc( scores1.view(batch_size, -1)[:, -self.params.n_rec_updates:], - Variable(scores2.data[:, -self.params.n_rec_updates:]) + scores2.detach()[:, -self.params.n_rec_updates:] ) # game features loss diff --git a/src/model/utils.py b/src/model/utils.py index 1d4cc7a..bc2e9a0 100644 --- a/src/model/utils.py +++ b/src/model/utils.py @@ -1,6 +1,5 @@ import torch import torch.nn as nn -from torch.autograd import Variable from logging import getLogger from .bucketed_embedding import BucketedEmbedding @@ -76,7 +75,7 @@ def build_CNN_network(module, params): ])) # get the size of the convolution network output - x = Variable(torch.FloatTensor(1, in_channels, height, width).zero_()) + x = torch.FloatTensor(1, in_channels, height, width).zero_() module.conv_output_dim = module.conv(x).nelement() diff --git a/src/replay_memory.py b/src/replay_memory.py index 02bed81..365f706 100644 --- a/src/replay_memory.py +++ b/src/replay_memory.py @@ -18,7 +18,7 @@ def __init__(self, max_size, screen_shape, n_variables, n_features): self.features = np.zeros((max_size, n_features), dtype=np.int32) self.actions = np.zeros(max_size, dtype=np.int32) self.rewards = np.zeros(max_size, dtype=np.float32) - self.isfinal = np.zeros(max_size, dtype=np.bool) + self.isfinal = np.zeros(max_size, dtype=bool) @property def size(self): diff --git a/src/trainer.py b/src/trainer.py index 1053ae9..820b1e3 100644 --- a/src/trainer.py +++ b/src/trainer.py @@ -59,8 +59,8 @@ def run(self): self.n_iter += 1 if self.game.is_final(): - self.game.reset() # dead or end of episode - self.network.reset() # reset internal state (RNNs only) + self.game.new_episode() # dead or end of episode + self.network.reset() # reset internal state (RNNs only) self.game.observe_state(self.params, last_states) diff --git a/src/utils.py b/src/utils.py index 31fc1ea..9706643 100644 --- a/src/utils.py +++ b/src/utils.py @@ -90,7 +90,7 @@ def get_optimizer(s): raise Exception('Unknown optimization method: "%s"' % method) # check that we give good parameters to the optimizer - expected_args = inspect.getargspec(optim_fn.__init__)[0] + expected_args = inspect.getfullargspec(optim_fn.__init__)[0] assert expected_args[:2] == ['self', 'params'] if not all(k in expected_args[2:] for k in optim_params.keys()): raise Exception('Unexpected parameters: expected "%s", received "%s"' %