Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ lib/
__pycache__/
*.py[cod]
_vizdoom.ini
/dumped
6 changes: 5 additions & 1 deletion arnold.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import argparse
import os
import sys
import vizdoom
from src.utils import get_dump_path
from src.logger import get_logger
Expand All @@ -16,7 +17,10 @@

# create a directory for the experiment / create a logger
dump_path = get_dump_path(args.main_dump_path, args.exp_name)
logger = get_logger(filepath=os.path.join(dump_path, 'train.log'))
log_path = os.path.join(dump_path, 'train.log')
if sys.platform == "win32":
log_path = log_path.replace('\\', '/')
logger = get_logger(filepath=log_path)
logger.info('========== Running DOOM ==========')
logger.info('Experiment will be saved in: %s' % dump_path)

Expand Down
4 changes: 4 additions & 0 deletions src/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ def get_logger(filepath=None):
log_formatter = LogFormatter()

# create file handler and set level to debug
file_dir = os.path.split(filepath)[0]
if file_dir and not os.path.exists(file_dir):
os.makedirs(file_dir)
print("Directory", file_dir, "didn't exist, and was created", flush=True)
file_handler = logging.FileHandler(filepath, "a")
file_handler.setLevel(logging.DEBUG)
file_handler.setFormatter(log_formatter)
Expand Down
3 changes: 2 additions & 1 deletion src/model/bucketed_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ def __init__(self, bucket_size, num_embeddings, *args, **kwargs):
super(BucketedEmbedding, self).__init__(real_num_embeddings, *args, **kwargs)

def forward(self, indices):
return super(BucketedEmbedding, self).forward(indices.div(self.bucket_size))
x = super(BucketedEmbedding, self).forward(indices // self.bucket_size)
return x.unsqueeze_(0) if x.ndim < 2 else x
11 changes: 5 additions & 6 deletions src/model/dqn/base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import numpy as np
import torch
import torch.nn as nn
from torch.autograd import Variable
from logging import getLogger

from ...utils import bool_flag
Expand Down Expand Up @@ -78,7 +77,7 @@ def base_forward(self, x_screens, x_variables):

# create state input
if self.n_variables:
output = torch.cat([conv_output] + embeddings, 1)
output = torch.cat([conv_output] + embeddings, dim=1)
else:
output = conv_output

Expand Down Expand Up @@ -120,7 +119,7 @@ def __init__(self, params):

def get_var(self, x):
"""Move a tensor to a CPU / GPU variable."""
x = Variable(x)
x = x.detach()
return x.cuda() if self.cuda else x

def reset(self):
Expand Down Expand Up @@ -185,8 +184,8 @@ def prepare_f_train_args(self, screens, variables, features,
return screens, variables, features, actions, rewards, isfinal

def register_loss(self, loss_history, loss_sc, loss_gf):
loss_history['dqn_loss'].append(loss_sc.data[0])
loss_history['gf_loss'].append(loss_gf.data[0]
loss_history['dqn_loss'].append(loss_sc.cpu().item())
loss_history['gf_loss'].append(loss_gf.cpu().item()
if self.n_features else 0)

def next_action(self, last_states, save_graph=False):
Expand All @@ -205,7 +204,7 @@ def next_action(self, last_states, save_graph=False):
if pred_features is not None:
assert pred_features.size() == (1, seq_len, self.module.n_features)
pred_features = pred_features[0, -1]
action_id = scores.data.max(0)[1][0]
action_id = scores.data.max(0)[1].item()
self.pred_features = pred_features
return action_id

Expand Down
14 changes: 7 additions & 7 deletions src/model/dqn/feedforward.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import torch
import torch.nn as nn
from torch.autograd import Variable
from .base import DQNModuleBase, DQN


class DQNModuleFeedforward(DQNModuleBase):

def __init__(self, params):
Expand All @@ -21,10 +19,13 @@ def forward(self, x_screens, x_variables):
"""

batch_size = x_screens.size(0)

assert x_screens.ndimension() == 4
assert len(x_variables) == self.n_variables
assert all(x.ndimension() == 1 and x.size(0) == batch_size
for x in x_variables)

if batch_size > 1:
assert all(x.ndimension() == 1 and x.size(0) == batch_size for x in x_variables), \
f"{[(x.ndimension(), x.size(), x) for x in x_variables]} {batch_size}"

# state input (screen / depth / labels buffer + variables)
state_input, output_gf = self.base_forward(x_screens, x_variables)
Expand All @@ -45,7 +46,6 @@ class DQNFeedforward(DQN):
def f_eval(self, last_states):

screens, variables = self.prepare_f_eval_args(last_states)

return self.module(
screens.view(1, -1, *self.screen_shape[1:]),
[variables[-1, i] for i in range(self.params.n_variables)]
Expand Down Expand Up @@ -74,7 +74,7 @@ def f_train(self, screens, variables, features, actions, rewards, isfinal,
)

# compute scores
mask = torch.ByteTensor(output_sc1.size()).fill_(0)
mask = torch.BoolTensor(output_sc1.size()).fill_(0)
for i in range(batch_size):
mask[i, int(actions[i, -1])] = 1
scores1 = output_sc1.masked_select(self.get_var(mask))
Expand All @@ -83,7 +83,7 @@ def f_train(self, screens, variables, features, actions, rewards, isfinal,
)

# dqn loss
loss_sc = self.loss_fn_sc(scores1, Variable(scores2.data))
loss_sc = self.loss_fn_sc(scores1, scores2.detach())

# game features loss
loss_gf = 0
Expand Down
7 changes: 3 additions & 4 deletions src/model/dqn/recurrent.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import torch
from torch.autograd import Variable
from .base import DQNModuleBase, DQN
from ..utils import get_recurrent_module
from ...utils import bool_flag
Expand Down Expand Up @@ -63,7 +62,7 @@ def __init__(self, params):
h_0 = torch.FloatTensor(params.n_rec_layers, params.batch_size,
params.hidden_dim).zero_()
self.init_state_t = self.get_var(h_0)
self.init_state_e = Variable(self.init_state_t[:, :1, :].data.clone(), volatile=True)
self.init_state_e = self.init_state_t[:, :1, :].detach().clone()
if params.recurrence == 'lstm':
self.init_state_t = (self.init_state_t, self.init_state_t)
self.init_state_e = (self.init_state_e, self.init_state_e)
Expand Down Expand Up @@ -116,7 +115,7 @@ def f_train(self, screens, variables, features, actions, rewards, isfinal,
)

# compute scores
mask = torch.ByteTensor(output_sc.size()).fill_(0)
mask = torch.BoolTensor(output_sc.size()).fill_(0)
for i in range(batch_size):
for j in range(seq_len - 1):
mask[i, j, int(actions[i, j])] = 1
Expand All @@ -128,7 +127,7 @@ def f_train(self, screens, variables, features, actions, rewards, isfinal,
# dqn loss
loss_sc = self.loss_fn_sc(
scores1.view(batch_size, -1)[:, -self.params.n_rec_updates:],
Variable(scores2.data[:, -self.params.n_rec_updates:])
scores2.detach()[:, -self.params.n_rec_updates:]
)

# game features loss
Expand Down
3 changes: 1 addition & 2 deletions src/model/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import torch
import torch.nn as nn
from torch.autograd import Variable
from logging import getLogger
from .bucketed_embedding import BucketedEmbedding

Expand Down Expand Up @@ -76,7 +75,7 @@ def build_CNN_network(module, params):
]))

# get the size of the convolution network output
x = Variable(torch.FloatTensor(1, in_channels, height, width).zero_())
x = torch.FloatTensor(1, in_channels, height, width).zero_()
module.conv_output_dim = module.conv(x).nelement()


Expand Down
2 changes: 1 addition & 1 deletion src/replay_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def __init__(self, max_size, screen_shape, n_variables, n_features):
self.features = np.zeros((max_size, n_features), dtype=np.int32)
self.actions = np.zeros(max_size, dtype=np.int32)
self.rewards = np.zeros(max_size, dtype=np.float32)
self.isfinal = np.zeros(max_size, dtype=np.bool)
self.isfinal = np.zeros(max_size, dtype=bool)

@property
def size(self):
Expand Down
4 changes: 2 additions & 2 deletions src/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ def run(self):
self.n_iter += 1

if self.game.is_final():
self.game.reset() # dead or end of episode
self.network.reset() # reset internal state (RNNs only)
self.game.new_episode() # dead or end of episode
self.network.reset() # reset internal state (RNNs only)

self.game.observe_state(self.params, last_states)

Expand Down
2 changes: 1 addition & 1 deletion src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def get_optimizer(s):
raise Exception('Unknown optimization method: "%s"' % method)

# check that we give good parameters to the optimizer
expected_args = inspect.getargspec(optim_fn.__init__)[0]
expected_args = inspect.getfullargspec(optim_fn.__init__)[0]
assert expected_args[:2] == ['self', 'params']
if not all(k in expected_args[2:] for k in optim_params.keys()):
raise Exception('Unexpected parameters: expected "%s", received "%s"' %
Expand Down