From 8570c6a20bdf27f2d958af22b2d67bd6d2bc5c2a Mon Sep 17 00:00:00 2001 From: Hugo^3 Date: Sun, 2 Mar 2025 02:47:01 +0000 Subject: [PATCH 01/17] Update .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index a26ad14..fe46302 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ lib/ __pycache__/ *.py[cod] _vizdoom.ini +/dumped From c7c6625f06a0f068420409ab15340e555adde580 Mon Sep 17 00:00:00 2001 From: Hugo^3 Date: Sun, 2 Mar 2025 03:05:25 +0000 Subject: [PATCH 02/17] Create logging directory if not existing --- src/logger.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/logger.py b/src/logger.py index 94a5b7d..1b1da83 100644 --- a/src/logger.py +++ b/src/logger.py @@ -39,6 +39,10 @@ def get_logger(filepath=None): log_formatter = LogFormatter() # create file handler and set level to debug + file_dir = os.path.split(filepath)[0] + if file_dir and not os.path.exists(file_dir): + os.makedirs(file_dir) + print("Directory", file_dir, "didn't exist, and was created", flush=True) file_handler = logging.FileHandler(filepath, "a") file_handler.setLevel(logging.DEBUG) file_handler.setFormatter(log_formatter) From fd89584707f0d1e2b52f8228235e4887eec72ed5 Mon Sep 17 00:00:00 2001 From: Hugo^3 Date: Sun, 2 Mar 2025 03:13:56 +0000 Subject: [PATCH 03/17] Support window pathing --- arnold.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arnold.py b/arnold.py index 7fd9242..b28896e 100644 --- a/arnold.py +++ b/arnold.py @@ -1,5 +1,6 @@ import argparse import os +import sys import vizdoom from src.utils import get_dump_path from src.logger import get_logger @@ -16,7 +17,10 @@ # create a directory for the experiment / create a logger dump_path = get_dump_path(args.main_dump_path, args.exp_name) -logger = get_logger(filepath=os.path.join(dump_path, 'train.log')) +log_path = os.path.join(dump_path, 'train.log') +if sys.platform == "win32": + log_path = log_path.replace('\\', '/') +logger = get_logger(filepath=log_path) logger.info('========== Running DOOM ==========') logger.info('Experiment will be saved in: %s' % dump_path) From 6532b98d960ea151ca2123f3ef53023a9247e524 Mon Sep 17 00:00:00 2001 From: Hugo^3 Date: Sun, 2 Mar 2025 03:15:48 +0000 Subject: [PATCH 04/17] np.bool is now np.bool8 or np.bool_... use Python bool for future-proof --- src/replay_memory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/replay_memory.py b/src/replay_memory.py index 02bed81..365f706 100644 --- a/src/replay_memory.py +++ b/src/replay_memory.py @@ -18,7 +18,7 @@ def __init__(self, max_size, screen_shape, n_variables, n_features): self.features = np.zeros((max_size, n_features), dtype=np.int32) self.actions = np.zeros(max_size, dtype=np.int32) self.rewards = np.zeros(max_size, dtype=np.float32) - self.isfinal = np.zeros(max_size, dtype=np.bool) + self.isfinal = np.zeros(max_size, dtype=bool) @property def size(self): From ce95f5726406b33958194d8c03c929a3f13fa49a Mon Sep 17 00:00:00 2001 From: Hugo^3 Date: Sun, 2 Mar 2025 03:17:01 +0000 Subject: [PATCH 05/17] Update to be compatible with up-to-date inspect module --- src/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils.py b/src/utils.py index 31fc1ea..9706643 100644 --- a/src/utils.py +++ b/src/utils.py @@ -90,7 +90,7 @@ def get_optimizer(s): raise Exception('Unknown optimization method: "%s"' % method) # check that we give good parameters to the optimizer - expected_args = inspect.getargspec(optim_fn.__init__)[0] + expected_args = inspect.getfullargspec(optim_fn.__init__)[0] assert expected_args[:2] == ['self', 'params'] if not all(k in expected_args[2:] for k in optim_params.keys()): raise Exception('Unexpected parameters: expected "%s", received "%s"' % From 674437f6c7cb03e7b1c4715ed294472472cd0757 Mon Sep 17 00:00:00 2001 From: Hugo^3 Date: Sun, 2 Mar 2025 03:21:00 +0000 Subject: [PATCH 06/17] Patch in PyTorch 2.x compatibility --- src/doom/actions.py | 5 +++-- src/model/bucketed_embedding.py | 3 ++- src/model/dqn/base.py | 10 +++++----- src/model/dqn/feedforward.py | 14 +++++++++----- 4 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/doom/actions.py b/src/doom/actions.py index 835b4dd..46bb48b 100644 --- a/src/doom/actions.py +++ b/src/doom/actions.py @@ -88,8 +88,9 @@ def get_action(self, action): for k in self.available_buttons] return doom_action else: - assert type(action) is int - return self.doom_actions[action] + a = action if type(action) == int else int(action.item()) + assert type(a) is int + return self.doom_actions[a] action_categories_discrete = { diff --git a/src/model/bucketed_embedding.py b/src/model/bucketed_embedding.py index f88a4d8..b24756e 100644 --- a/src/model/bucketed_embedding.py +++ b/src/model/bucketed_embedding.py @@ -9,4 +9,5 @@ def __init__(self, bucket_size, num_embeddings, *args, **kwargs): super(BucketedEmbedding, self).__init__(real_num_embeddings, *args, **kwargs) def forward(self, indices): - return super(BucketedEmbedding, self).forward(indices.div(self.bucket_size)) + x = super(BucketedEmbedding, self).forward(indices // self.bucket_size) + return x.squeeze(0) if x.ndim > 2 else x diff --git a/src/model/dqn/base.py b/src/model/dqn/base.py index 6e1c61c..196d660 100644 --- a/src/model/dqn/base.py +++ b/src/model/dqn/base.py @@ -78,7 +78,7 @@ def base_forward(self, x_screens, x_variables): # create state input if self.n_variables: - output = torch.cat([conv_output] + embeddings, 1) + output = torch.cat([conv_output] + embeddings, dim=1) else: output = conv_output @@ -120,7 +120,7 @@ def __init__(self, params): def get_var(self, x): """Move a tensor to a CPU / GPU variable.""" - x = Variable(x) + x = x.detach() return x.cuda() if self.cuda else x def reset(self): @@ -185,8 +185,8 @@ def prepare_f_train_args(self, screens, variables, features, return screens, variables, features, actions, rewards, isfinal def register_loss(self, loss_history, loss_sc, loss_gf): - loss_history['dqn_loss'].append(loss_sc.data[0]) - loss_history['gf_loss'].append(loss_gf.data[0] + loss_history['dqn_loss'].append(loss_sc.data.cpu().numpy()) + loss_history['gf_loss'].append(loss_gf.data.cpu().numpy() if self.n_features else 0) def next_action(self, last_states, save_graph=False): @@ -205,7 +205,7 @@ def next_action(self, last_states, save_graph=False): if pred_features is not None: assert pred_features.size() == (1, seq_len, self.module.n_features) pred_features = pred_features[0, -1] - action_id = scores.data.max(0)[1][0] + action_id = scores.data.max(0)[1] self.pred_features = pred_features return action_id diff --git a/src/model/dqn/feedforward.py b/src/model/dqn/feedforward.py index e2af5a6..2d51f30 100644 --- a/src/model/dqn/feedforward.py +++ b/src/model/dqn/feedforward.py @@ -21,10 +21,15 @@ def forward(self, x_screens, x_variables): """ batch_size = x_screens.size(0) + + for x in x_variables: + x.unsqueeze_(0) + assert x_screens.ndimension() == 4 assert len(x_variables) == self.n_variables - assert all(x.ndimension() == 1 and x.size(0) == batch_size - for x in x_variables) + + #assert all(x.ndimension() == 0 and len(list(x.size())) == batch_size + # for x in x_variables) # state input (screen / depth / labels buffer + variables) state_input, output_gf = self.base_forward(x_screens, x_variables) @@ -45,7 +50,6 @@ class DQNFeedforward(DQN): def f_eval(self, last_states): screens, variables = self.prepare_f_eval_args(last_states) - return self.module( screens.view(1, -1, *self.screen_shape[1:]), [variables[-1, i] for i in range(self.params.n_variables)] @@ -74,7 +78,7 @@ def f_train(self, screens, variables, features, actions, rewards, isfinal, ) # compute scores - mask = torch.ByteTensor(output_sc1.size()).fill_(0) + mask = torch.BoolTensor(output_sc1.size()).fill_(0) for i in range(batch_size): mask[i, int(actions[i, -1])] = 1 scores1 = output_sc1.masked_select(self.get_var(mask)) @@ -83,7 +87,7 @@ def f_train(self, screens, variables, features, actions, rewards, isfinal, ) # dqn loss - loss_sc = self.loss_fn_sc(scores1, Variable(scores2.data)) + loss_sc = self.loss_fn_sc(scores1, scores2.detach()) # game features loss loss_gf = 0 From 7b90d79d0fa8a79d7b2683cd30d5644813bb2106 Mon Sep 17 00:00:00 2001 From: Hugo Date: Wed, 5 Mar 2025 11:44:59 +0000 Subject: [PATCH 07/17] Reverted changes in actions.py --- src/doom/actions.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/doom/actions.py b/src/doom/actions.py index 46bb48b..835b4dd 100644 --- a/src/doom/actions.py +++ b/src/doom/actions.py @@ -88,9 +88,8 @@ def get_action(self, action): for k in self.available_buttons] return doom_action else: - a = action if type(action) == int else int(action.item()) - assert type(a) is int - return self.doom_actions[a] + assert type(action) is int + return self.doom_actions[action] action_categories_discrete = { From 343fc4adec26f20c395f78388f46e4b3387704d1 Mon Sep 17 00:00:00 2001 From: Hugo Date: Wed, 5 Mar 2025 11:48:50 +0000 Subject: [PATCH 08/17] Update base.py --- src/model/dqn/base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/model/dqn/base.py b/src/model/dqn/base.py index 196d660..0e5e6eb 100644 --- a/src/model/dqn/base.py +++ b/src/model/dqn/base.py @@ -1,7 +1,6 @@ import numpy as np import torch import torch.nn as nn -from torch.autograd import Variable from logging import getLogger from ...utils import bool_flag @@ -205,7 +204,7 @@ def next_action(self, last_states, save_graph=False): if pred_features is not None: assert pred_features.size() == (1, seq_len, self.module.n_features) pred_features = pred_features[0, -1] - action_id = scores.data.max(0)[1] + action_id = scores.data.max(0)[1].item() self.pred_features = pred_features return action_id From 49f515b711d406ecbe00b234791870efefb995a4 Mon Sep 17 00:00:00 2001 From: Hugo Date: Wed, 5 Mar 2025 12:06:16 +0000 Subject: [PATCH 09/17] Update bucketed_embedding.py --- src/model/bucketed_embedding.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/model/bucketed_embedding.py b/src/model/bucketed_embedding.py index b24756e..cc444eb 100644 --- a/src/model/bucketed_embedding.py +++ b/src/model/bucketed_embedding.py @@ -10,4 +10,4 @@ def __init__(self, bucket_size, num_embeddings, *args, **kwargs): def forward(self, indices): x = super(BucketedEmbedding, self).forward(indices // self.bucket_size) - return x.squeeze(0) if x.ndim > 2 else x + return x.unsqueeze_(0) if x.ndim < 2 else x From 8cd1b5eeb80c1701656bf655c05533e6dddaf9f6 Mon Sep 17 00:00:00 2001 From: Hugo Date: Wed, 5 Mar 2025 12:09:41 +0000 Subject: [PATCH 10/17] Update utils.py --- src/model/utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/model/utils.py b/src/model/utils.py index 1d4cc7a..bc2e9a0 100644 --- a/src/model/utils.py +++ b/src/model/utils.py @@ -1,6 +1,5 @@ import torch import torch.nn as nn -from torch.autograd import Variable from logging import getLogger from .bucketed_embedding import BucketedEmbedding @@ -76,7 +75,7 @@ def build_CNN_network(module, params): ])) # get the size of the convolution network output - x = Variable(torch.FloatTensor(1, in_channels, height, width).zero_()) + x = torch.FloatTensor(1, in_channels, height, width).zero_() module.conv_output_dim = module.conv(x).nelement() From b6dadb471c87c8eb176de37128e51020fe5a7390 Mon Sep 17 00:00:00 2001 From: Hugo Date: Wed, 5 Mar 2025 12:12:59 +0000 Subject: [PATCH 11/17] Update feedforward.py --- src/model/dqn/feedforward.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/model/dqn/feedforward.py b/src/model/dqn/feedforward.py index 2d51f30..376576a 100644 --- a/src/model/dqn/feedforward.py +++ b/src/model/dqn/feedforward.py @@ -21,9 +21,6 @@ def forward(self, x_screens, x_variables): """ batch_size = x_screens.size(0) - - for x in x_variables: - x.unsqueeze_(0) assert x_screens.ndimension() == 4 assert len(x_variables) == self.n_variables From 4c98bc6960b4a2d9e4cfe4fc3e8476a55f995cd2 Mon Sep 17 00:00:00 2001 From: Hugo Date: Thu, 6 Mar 2025 15:22:49 +0000 Subject: [PATCH 12/17] Update feedforward.py --- src/model/dqn/feedforward.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/model/dqn/feedforward.py b/src/model/dqn/feedforward.py index 376576a..26d2dc9 100644 --- a/src/model/dqn/feedforward.py +++ b/src/model/dqn/feedforward.py @@ -84,7 +84,7 @@ def f_train(self, screens, variables, features, actions, rewards, isfinal, ) # dqn loss - loss_sc = self.loss_fn_sc(scores1, scores2.detach()) + loss_sc = self.loss_fn_sc(scores1, scores2) # game features loss loss_gf = 0 From ad36f1a8daf6a96799f8561953e0fffdd1b4a1ac Mon Sep 17 00:00:00 2001 From: Hugo Date: Thu, 6 Mar 2025 15:23:26 +0000 Subject: [PATCH 13/17] Update base.py --- src/model/dqn/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/model/dqn/base.py b/src/model/dqn/base.py index 0e5e6eb..d714c9b 100644 --- a/src/model/dqn/base.py +++ b/src/model/dqn/base.py @@ -119,7 +119,6 @@ def __init__(self, params): def get_var(self, x): """Move a tensor to a CPU / GPU variable.""" - x = x.detach() return x.cuda() if self.cuda else x def reset(self): From 36799274305afb9f283b2aa52dd94c27fc0a76db Mon Sep 17 00:00:00 2001 From: Hugo Date: Thu, 6 Mar 2025 15:27:30 +0000 Subject: [PATCH 14/17] Update recurrent.py --- src/model/dqn/recurrent.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/model/dqn/recurrent.py b/src/model/dqn/recurrent.py index 3e17dc3..9097239 100644 --- a/src/model/dqn/recurrent.py +++ b/src/model/dqn/recurrent.py @@ -1,5 +1,4 @@ import torch -from torch.autograd import Variable from .base import DQNModuleBase, DQN from ..utils import get_recurrent_module from ...utils import bool_flag @@ -63,7 +62,7 @@ def __init__(self, params): h_0 = torch.FloatTensor(params.n_rec_layers, params.batch_size, params.hidden_dim).zero_() self.init_state_t = self.get_var(h_0) - self.init_state_e = Variable(self.init_state_t[:, :1, :].data.clone(), volatile=True) + self.init_state_e = self.init_state_t[:, :1, :].detach().clone() if params.recurrence == 'lstm': self.init_state_t = (self.init_state_t, self.init_state_t) self.init_state_e = (self.init_state_e, self.init_state_e) @@ -116,7 +115,7 @@ def f_train(self, screens, variables, features, actions, rewards, isfinal, ) # compute scores - mask = torch.ByteTensor(output_sc.size()).fill_(0) + mask = torch.BoolTensor(output_sc.size()).fill_(0) for i in range(batch_size): for j in range(seq_len - 1): mask[i, j, int(actions[i, j])] = 1 @@ -128,7 +127,7 @@ def f_train(self, screens, variables, features, actions, rewards, isfinal, # dqn loss loss_sc = self.loss_fn_sc( scores1.view(batch_size, -1)[:, -self.params.n_rec_updates:], - Variable(scores2.data[:, -self.params.n_rec_updates:]) + torch.Tensor(scores2.data[:, -self.params.n_rec_updates:]) ) # game features loss From 8f7e4ddcafccf58536ba4e14508d53e91a8b93cf Mon Sep 17 00:00:00 2001 From: Hugo Date: Thu, 6 Mar 2025 15:33:45 +0000 Subject: [PATCH 15/17] Should have finished fixing everything --- src/model/dqn/recurrent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/model/dqn/recurrent.py b/src/model/dqn/recurrent.py index 9097239..3c86f74 100644 --- a/src/model/dqn/recurrent.py +++ b/src/model/dqn/recurrent.py @@ -127,7 +127,7 @@ def f_train(self, screens, variables, features, actions, rewards, isfinal, # dqn loss loss_sc = self.loss_fn_sc( scores1.view(batch_size, -1)[:, -self.params.n_rec_updates:], - torch.Tensor(scores2.data[:, -self.params.n_rec_updates:]) + scores2[:, -self.params.n_rec_updates:] ) # game features loss From 3c47eb9e6b82a1971bfc19244d8363f63c968cd3 Mon Sep 17 00:00:00 2001 From: Hugo Date: Fri, 7 Mar 2025 11:53:16 +0000 Subject: [PATCH 16/17] Remove deprecated import in feedforward.py --- src/model/dqn/feedforward.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/model/dqn/feedforward.py b/src/model/dqn/feedforward.py index 26d2dc9..f0890ab 100644 --- a/src/model/dqn/feedforward.py +++ b/src/model/dqn/feedforward.py @@ -1,9 +1,7 @@ import torch import torch.nn as nn -from torch.autograd import Variable from .base import DQNModuleBase, DQN - class DQNModuleFeedforward(DQNModuleBase): def __init__(self, params): From d6d7451c40631eca2140bd8e1deabe14c012704b Mon Sep 17 00:00:00 2001 From: Hugo^3 Date: Sat, 5 Jul 2025 01:20:23 +0100 Subject: [PATCH 17/17] fix: ensure detachment --- src/model/dqn/base.py | 5 +++-- src/model/dqn/feedforward.py | 7 ++++--- src/model/dqn/recurrent.py | 2 +- src/trainer.py | 4 ++-- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/model/dqn/base.py b/src/model/dqn/base.py index d714c9b..7bf5d28 100644 --- a/src/model/dqn/base.py +++ b/src/model/dqn/base.py @@ -119,6 +119,7 @@ def __init__(self, params): def get_var(self, x): """Move a tensor to a CPU / GPU variable.""" + x = x.detach() return x.cuda() if self.cuda else x def reset(self): @@ -183,8 +184,8 @@ def prepare_f_train_args(self, screens, variables, features, return screens, variables, features, actions, rewards, isfinal def register_loss(self, loss_history, loss_sc, loss_gf): - loss_history['dqn_loss'].append(loss_sc.data.cpu().numpy()) - loss_history['gf_loss'].append(loss_gf.data.cpu().numpy() + loss_history['dqn_loss'].append(loss_sc.cpu().item()) + loss_history['gf_loss'].append(loss_gf.cpu().item() if self.n_features else 0) def next_action(self, last_states, save_graph=False): diff --git a/src/model/dqn/feedforward.py b/src/model/dqn/feedforward.py index f0890ab..bdfcc7d 100644 --- a/src/model/dqn/feedforward.py +++ b/src/model/dqn/feedforward.py @@ -23,8 +23,9 @@ def forward(self, x_screens, x_variables): assert x_screens.ndimension() == 4 assert len(x_variables) == self.n_variables - #assert all(x.ndimension() == 0 and len(list(x.size())) == batch_size - # for x in x_variables) + if batch_size > 1: + assert all(x.ndimension() == 1 and x.size(0) == batch_size for x in x_variables), \ + f"{[(x.ndimension(), x.size(), x) for x in x_variables]} {batch_size}" # state input (screen / depth / labels buffer + variables) state_input, output_gf = self.base_forward(x_screens, x_variables) @@ -82,7 +83,7 @@ def f_train(self, screens, variables, features, actions, rewards, isfinal, ) # dqn loss - loss_sc = self.loss_fn_sc(scores1, scores2) + loss_sc = self.loss_fn_sc(scores1, scores2.detach()) # game features loss loss_gf = 0 diff --git a/src/model/dqn/recurrent.py b/src/model/dqn/recurrent.py index 3c86f74..3e9045b 100644 --- a/src/model/dqn/recurrent.py +++ b/src/model/dqn/recurrent.py @@ -127,7 +127,7 @@ def f_train(self, screens, variables, features, actions, rewards, isfinal, # dqn loss loss_sc = self.loss_fn_sc( scores1.view(batch_size, -1)[:, -self.params.n_rec_updates:], - scores2[:, -self.params.n_rec_updates:] + scores2.detach()[:, -self.params.n_rec_updates:] ) # game features loss diff --git a/src/trainer.py b/src/trainer.py index 1053ae9..820b1e3 100644 --- a/src/trainer.py +++ b/src/trainer.py @@ -59,8 +59,8 @@ def run(self): self.n_iter += 1 if self.game.is_final(): - self.game.reset() # dead or end of episode - self.network.reset() # reset internal state (RNNs only) + self.game.new_episode() # dead or end of episode + self.network.reset() # reset internal state (RNNs only) self.game.observe_state(self.params, last_states)