From caf518a89ea1a5150fc7d5f938b1182727c3da9a Mon Sep 17 00:00:00 2001
From: Felix Parker <flixpar@gmail.com>
Date: Wed, 23 Oct 2019 17:18:35 -0400
Subject: [PATCH 01/13] implemented supervised learning

---
 alphatsp/experiments/supervised.py     | 75 ++++++++++++++++++++++++++
 alphatsp/solvers/example_generators.py | 38 +++++++++++++
 alphatsp/solvers/policy_networks.py    | 46 +++++++++++++++-
 3 files changed, 158 insertions(+), 1 deletion(-)
 create mode 100644 alphatsp/experiments/supervised.py

diff --git a/alphatsp/experiments/supervised.py b/alphatsp/experiments/supervised.py
new file mode 100644
index 0000000..eed70d9
--- /dev/null
+++ b/alphatsp/experiments/supervised.py
@@ -0,0 +1,75 @@
+import alphatsp.tsp
+import alphatsp.util
+
+import alphatsp.solvers.policy_solvers
+from alphatsp.solvers.example_generators import NNExampleGenerator
+from alphatsp.solvers.policy_networks import SupervisedPolicyNetworkTrainer
+
+import torch
+import numpy as np
+
+import matplotlib
+matplotlib.use("agg")
+import matplotlib.pyplot as plt
+
+from multiprocessing import Process, Manager
+
+
+def run(args):
+
+	# setup
+	N, D = args.N, args.D
+	n_examples = args.n_train_examples
+	n_threads = args.n_threads
+	n_examples_per_thread = n_examples//n_threads
+
+	# create policy network
+	policy_network = alphatsp.util.get_policy_network(args.policy_network)
+
+	# generate examples
+	print("Generating examples and training...")
+
+	manager = Manager()
+	train_queue = manager.Queue()
+	parent_conn, child_conn = manager.Pipe(False)
+
+	producers = []
+	for _ in range(n_threads):
+		producers.append(Process(target=generate_examples, args=(n_examples_per_thread, train_queue, args)))
+
+	for p in producers:
+		p.start()
+
+	c = Process(target=train, args=(policy_network, train_queue, child_conn, args))
+	c.start()
+
+	for p in producers:
+		p.join()
+	train_queue.put(None)
+
+	c.join()
+
+	train_losses = child_conn.recv()
+	policy_network = child_conn.recv()
+
+	# display training loss
+	plt.scatter(x=np.arange(len(train_losses)), y=train_losses, marker='.')
+	plt.title("Loss")
+	plt.xlabel("examples")
+	plt.ylabel("loss")
+	plt.savefig("saves/loss_parallel.png")
+
+	# save network
+	torch.save(policy_network.state_dict(), "saves/policy_network.pth")
+
+def generate_examples(n_examples, train_queue, args):
+	generator = NNExampleGenerator(train_queue, args)
+	generator.generate_examples(n_examples)
+	return
+
+def train(policy_network, train_queue, connection, args):
+	trainer = PolicyNetworkTrainer(policy_network, train_queue)
+	trainer.train_all()
+	connection.send(trainer.losses)
+	connection.send(trainer.model)
+	return
diff --git a/alphatsp/solvers/example_generators.py b/alphatsp/solvers/example_generators.py
index d28529f..ac060b8 100644
--- a/alphatsp/solvers/example_generators.py
+++ b/alphatsp/solvers/example_generators.py
@@ -1,6 +1,10 @@
 import torch
 import copy
+import random
+from alphatsp.tsp import TSP
 from alphatsp.solvers.mcts import MCTSNode, MCTSTree
+from alphatsp.solvers import heuristics
+from alphatsp.util import get_graph_constructor
 
 class MCTSExampleGenerator:
 
@@ -82,3 +86,37 @@ def solve(self):
 		mcts_payoff = self.tsp.tour_length(mcts_tour)
 
 		return mcts_tour, mcts_payoff
+
+class NNExampleGenerator:
+
+	def __init__(self, args, example_queue):
+		self.args = args
+		self.graph_constructor = get_graph_constructor(args.graph_construction)
+		self.example_queue = example_queue
+		self.n_samples = max(args.N//10, 1)
+
+	def generate_example(self, n_examples):
+
+		for _ in range(n_examples//n_samples):
+
+			# generate tsp
+			tsp = TSP(self.args.N, self.args.D)
+
+			# solve
+			tour, tour_len = heuristics.nearest_greedy(tsp)
+
+			# generate examples
+			remaining = set(range(self.args.N))
+			for i in sorted(random.sample(range(self.args.N), self.n_samples)):
+
+				partial_tour = tour[:i]
+				remaining = remaining - set(partial_tour)
+
+				graph = self.graph_constructor(tsp, partial_tour, list(remaining))
+
+				example = {
+					"graph": graph,
+					"choice": tour[i+1],
+					"pred_value": tour_len
+				}
+				self.example_queue.put(copy.deepcopy(example))
diff --git a/alphatsp/solvers/policy_networks.py b/alphatsp/solvers/policy_networks.py
index 969bfb4..ad28e04 100644
--- a/alphatsp/solvers/policy_networks.py
+++ b/alphatsp/solvers/policy_networks.py
@@ -194,4 +194,48 @@ def train_all(self):
 		return 0
 
 	def save_model(self):
-		torch.save(self.model.state_dict(), f"saves/policynet_{self.n_examples_used:06d}.pth")
\ No newline at end of file
+		torch.save(self.model.state_dict(), f"saves/policynet_{self.n_examples_used:06d}.pth")
+
+class SupervisedPolicyNetworkTrainer:
+
+	def __init__(self, model, example_queue):
+
+		self.model = model
+		self.value_loss_fn = nn.MSELoss()
+		self.choice_loss_fn = nn.CrossEntropyLoss()
+		self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr=1e-5)
+
+		self.example_queue = example_queue
+		self.losses = []
+		self.n_examples_used = 0
+
+	def train_all(self):
+		while True:
+			if not self.train_queue.empty():
+				return_code = self.train_example()
+				if self.n_examples_used//10000 == 0:
+					self.save_model()
+				if return_code == -1:
+					return
+
+	def train_example(self):
+		self.model.train()
+
+		example = self.example_queue.get()
+		if example is None: return -1
+		graph, choice, value = example["graph"], example["choice"], example["pred_value"]
+
+		pred_choices, pred_value = self.model(graph)
+		loss = self.choice_loss_fn(pred_choices, choice) + 0.2 * self.value_loss_fn(pred_value, value)
+
+		self.losses.append(loss.item())
+
+		self.optimizer.zero_grad()
+		loss.backward()
+		self.optimizer.step()
+
+		self.n_examples_used += 1
+		return 0
+
+	def save_model(self):
+		torch.save(self.model.state_dict(), f"saves/policynet_{self.n_examples_used:06d}.pth")

From 63e8ce719b047d2b6050b81e6d254c0696024f24 Mon Sep 17 00:00:00 2001
From: Felix Parker <flixpar@gmail.com>
Date: Wed, 23 Oct 2019 17:21:54 -0400
Subject: [PATCH 02/13] added supervised experiment option

---
 main.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/main.py b/main.py
index bdc09dd..7d3fdb5 100644
--- a/main.py
+++ b/main.py
@@ -9,7 +9,8 @@
 	insertion,
 	policy,
 	parallel,
-	selfplay
+	selfplay,
+	supervised
 )
 
 def main(args):
@@ -30,6 +31,8 @@ def main(args):
 		parallel.run(a)
 	elif args.experiment == "selfplay":
 		selfplay.run(a)
+	elif args.experiment == "supervised":
+		supervised.run(a)
 	else:
 		raise ValueError("Invalid experiment selection.")
 

From e6edc760fe413c3d7149c7e19b9e2f2f2d4be507 Mon Sep 17 00:00:00 2001
From: Felix Parker <flixpar@gmail.com>
Date: Mon, 28 Oct 2019 10:30:30 -0400
Subject: [PATCH 03/13] bug fixes for supervised

---
 alphatsp/experiments/supervised.py     | 26 ++++++++++++++++++--------
 alphatsp/solvers/example_generators.py | 15 ++++++++-------
 alphatsp/solvers/policy_networks.py    | 23 +++++++++++++++--------
 3 files changed, 41 insertions(+), 23 deletions(-)

diff --git a/alphatsp/experiments/supervised.py b/alphatsp/experiments/supervised.py
index eed70d9..d88d7b5 100644
--- a/alphatsp/experiments/supervised.py
+++ b/alphatsp/experiments/supervised.py
@@ -12,7 +12,9 @@
 matplotlib.use("agg")
 import matplotlib.pyplot as plt
 
-from multiprocessing import Process, Manager
+import copy
+
+from torch.multiprocessing import Process, Manager
 
 
 def run(args):
@@ -31,7 +33,9 @@ def run(args):
 
 	manager = Manager()
 	train_queue = manager.Queue()
-	parent_conn, child_conn = manager.Pipe(False)
+	shared_dict = manager.dict()
+
+	shared_dict["success"] = False
 
 	producers = []
 	for _ in range(n_threads):
@@ -40,7 +44,7 @@ def run(args):
 	for p in producers:
 		p.start()
 
-	c = Process(target=train, args=(policy_network, train_queue, child_conn, args))
+	c = Process(target=train, args=(policy_network, train_queue, shared_dict, args))
 	c.start()
 
 	for p in producers:
@@ -49,8 +53,13 @@ def run(args):
 
 	c.join()
 
-	train_losses = child_conn.recv()
-	policy_network = child_conn.recv()
+	status = shared_dict["success"]
+	if not status:
+		print("Experiment failed.")
+		return -1
+
+	train_losses = shared_dict["losses"]
+	policy_network = shared_dict["model"]
 
 	# display training loss
 	plt.scatter(x=np.arange(len(train_losses)), y=train_losses, marker='.')
@@ -68,8 +77,9 @@ def generate_examples(n_examples, train_queue, args):
 	return
 
 def train(policy_network, train_queue, connection, args):
-	trainer = PolicyNetworkTrainer(policy_network, train_queue)
+	trainer = SupervisedPolicyNetworkTrainer(policy_network, train_queue)
 	trainer.train_all()
-	connection.send(trainer.losses)
-	connection.send(trainer.model)
+	shared_dict["losses"] = copy.deepcopy(trainer.losses)
+	shared_dict["model"] = copy.deepcopy(trainer.model)
+	shared_dict["success"] = True
 	return
diff --git a/alphatsp/solvers/example_generators.py b/alphatsp/solvers/example_generators.py
index ac060b8..0359628 100644
--- a/alphatsp/solvers/example_generators.py
+++ b/alphatsp/solvers/example_generators.py
@@ -89,15 +89,15 @@ def solve(self):
 
 class NNExampleGenerator:
 
-	def __init__(self, args, example_queue):
+	def __init__(self, example_queue, args):
 		self.args = args
 		self.graph_constructor = get_graph_constructor(args.graph_construction)
 		self.example_queue = example_queue
 		self.n_samples = max(args.N//10, 1)
 
-	def generate_example(self, n_examples):
+	def generate_examples(self, n_examples):
 
-		for _ in range(n_examples//n_samples):
+		for _ in range(n_examples//self.n_samples):
 
 			# generate tsp
 			tsp = TSP(self.args.N, self.args.D)
@@ -107,16 +107,17 @@ def generate_example(self, n_examples):
 
 			# generate examples
 			remaining = set(range(self.args.N))
-			for i in sorted(random.sample(range(self.args.N), self.n_samples)):
+			for i in sorted(random.sample(range(self.args.N-1), self.n_samples)):
 
 				partial_tour = tour[:i]
 				remaining = remaining - set(partial_tour)
+				r = sorted(list(remaining))
 
-				graph = self.graph_constructor(tsp, partial_tour, list(remaining))
+				graph = self.graph_constructor(tsp, partial_tour, r)
 
 				example = {
 					"graph": graph,
-					"choice": tour[i+1],
+					"choice": r.index(tour[i+1]),
 					"pred_value": tour_len
 				}
-				self.example_queue.put(copy.deepcopy(example))
+				self.example_queue.put(example)
diff --git a/alphatsp/solvers/policy_networks.py b/alphatsp/solvers/policy_networks.py
index ad28e04..ffbde3b 100644
--- a/alphatsp/solvers/policy_networks.py
+++ b/alphatsp/solvers/policy_networks.py
@@ -6,6 +6,10 @@
 from torch_geometric.nn import GCNConv, global_mean_pool, ARMAConv, XConv, SAGEConv
 from torch_geometric.data import Data, DataLoader
 
+if torch.cuda.is_available(): device = torch.device("cuda:0")
+else:                         device = torch.device("cpu")
+
+
 class GCNPolicyNetwork(nn.Module):
 	def __init__(self, d=3):
 		super(GCNPolicyNetwork, self).__init__()
@@ -26,7 +30,7 @@ def forward(self, graph):
 		choice = torch.masked_select(c.squeeze(), choices)
 		choice = F.softmax(choice, dim=0)
 
-		v = global_mean_pool(x, torch.zeros(graph.num_nodes, dtype=torch.long))
+		v = global_mean_pool(x, torch.zeros(graph.num_nodes, dtype=torch.long, device=x.device))
 		value = self.fc(v)
 
 		return choice, value
@@ -75,7 +79,7 @@ def forward(self, graph):
 		choice = torch.masked_select(c.squeeze(), choices)
 		choice = F.softmax(choice, dim=0)
 
-		v = global_mean_pool(x, torch.zeros(graph.num_nodes, dtype=torch.long))
+		v = global_mean_pool(x, torch.zeros(graph.num_nodes, dtype=torch.long, device=x.device))
 		value = self.fc(v)
 
 		return choice, value
@@ -100,7 +104,7 @@ def forward(self, graph):
 		choice = torch.masked_select(c.squeeze(), choices)
 		choice = F.softmax(choice, dim=0)
 
-		v = global_mean_pool(x, torch.zeros(graph.num_nodes, dtype=torch.long))
+		v = global_mean_pool(x, torch.zeros(graph.num_nodes, dtype=torch.long, device=x.device))
 		value = self.fc(v)
 
 		return choice, value
@@ -125,7 +129,7 @@ def forward(self, graph):
 		choice = torch.masked_select(c.squeeze(), choices)
 		choice = F.softmax(choice, dim=0)
 
-		v = global_mean_pool(x, torch.zeros(graph.num_nodes, dtype=torch.long))
+		v = global_mean_pool(x, torch.zeros(graph.num_nodes, dtype=torch.long, device=x.device))
 		value = self.fc(v)
 
 		return choice, value
@@ -150,7 +154,7 @@ def forward(self, graph):
 		choice = torch.masked_select(c.squeeze(), choices)
 		choice = F.softmax(choice, dim=0)
 
-		v = global_mean_pool(x, torch.zeros(graph.num_nodes, dtype=torch.long))
+		v = global_mean_pool(x, torch.zeros(graph.num_nodes, dtype=torch.long, device=x.device))
 		value = self.fc(v)
 
 		return choice, value
@@ -200,7 +204,7 @@ class SupervisedPolicyNetworkTrainer:
 
 	def __init__(self, model, example_queue):
 
-		self.model = model
+		self.model = model.to(device)
 		self.value_loss_fn = nn.MSELoss()
 		self.choice_loss_fn = nn.CrossEntropyLoss()
 		self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr=1e-5)
@@ -211,9 +215,9 @@ def __init__(self, model, example_queue):
 
 	def train_all(self):
 		while True:
-			if not self.train_queue.empty():
+			if not self.example_queue.empty():
 				return_code = self.train_example()
-				if self.n_examples_used//10000 == 0:
+				if self.n_examples_used%10000 == 0:
 					self.save_model()
 				if return_code == -1:
 					return
@@ -224,8 +228,11 @@ def train_example(self):
 		example = self.example_queue.get()
 		if example is None: return -1
 		graph, choice, value = example["graph"], example["choice"], example["pred_value"]
+		graph = graph.to(device)
 
 		pred_choices, pred_value = self.model(graph)
+		choice, value = torch.tensor([choice], device=device), torch.tensor([value], device=device)
+		pred_choices, pred_value = pred_choices.unsqueeze(0).to(device), pred_value.squeeze(0).to(device)
 		loss = self.choice_loss_fn(pred_choices, choice) + 0.2 * self.value_loss_fn(pred_value, value)
 
 		self.losses.append(loss.item())

From cf7824d574ef1a4c79b715157f81d612fb315e24 Mon Sep 17 00:00:00 2001
From: Felix Parker <flixpar@gmail.com>
Date: Mon, 28 Oct 2019 10:30:49 -0400
Subject: [PATCH 04/13] improved main

---
 main.py | 42 ++++++++----------------------------------
 1 file changed, 8 insertions(+), 34 deletions(-)

diff --git a/main.py b/main.py
index 7d3fdb5..4d459a2 100644
--- a/main.py
+++ b/main.py
@@ -1,43 +1,17 @@
 import argparse
-import multiprocessing as mp
 from args import Args
-from alphatsp.experiments import (
-	nearestneighbor,
-	mcts,
-	exact,
-	gurobi,
-	insertion,
-	policy,
-	parallel,
-	selfplay,
-	supervised
-)
+import importlib
+
+import torch.multiprocessing as mp
+mp.set_sharing_strategy("file_system")
 
 def main(args):
-	a = Args()
-	if args.experiment == "nearestneighbor":
-		nearestneighbor.run(a)
-	elif args.experiment == "mcts":
-		mcts.run(a)
-	elif args.experiment == "exact":
-		exact.run(a)
-	elif args.experiment == "gurobi":
-		gurobi.run(a)
-	elif args.experiment == "insertion":
-		insertion.run(a)
-	elif args.experiment == "policy":
-		policy.run(a)
-	elif args.experiment == "parallel":
-		parallel.run(a)
-	elif args.experiment == "selfplay":
-		selfplay.run(a)
-	elif args.experiment == "supervised":
-		supervised.run(a)
-	else:
-		raise ValueError("Invalid experiment selection.")
+	config = Args()
+	experiment = importlib.import_module(f"alphatsp.experiments.{args.experiment}")
+	experiment.run(config)
 
 if __name__ == "__main__":
-	mp.set_start_method('spawn', force=True)
+	mp.set_start_method("spawn")
 	parser = argparse.ArgumentParser()
 	parser.add_argument("--experiment", type=str, required=True, help="experiment name")
 	args = parser.parse_args()

From d960cbf108cfcd19e3f83c4fb05c27249c72b975 Mon Sep 17 00:00:00 2001
From: Felix Parker <flixpar@gmail.com>
Date: Mon, 28 Oct 2019 10:32:01 -0400
Subject: [PATCH 05/13] removed c++

---
 c++/MCTSNode.cpp | 149 --------------------------------
 c++/MCTSNode.h   |  35 --------
 c++/Makefile     |  13 ---
 c++/mcts.cpp     | 217 -----------------------------------------------
 4 files changed, 414 deletions(-)
 delete mode 100644 c++/MCTSNode.cpp
 delete mode 100644 c++/MCTSNode.h
 delete mode 100644 c++/Makefile
 delete mode 100644 c++/mcts.cpp

diff --git a/c++/MCTSNode.cpp b/c++/MCTSNode.cpp
deleted file mode 100644
index 239dd3c..0000000
--- a/c++/MCTSNode.cpp
+++ /dev/null
@@ -1,149 +0,0 @@
-#include "MCTSNode.h"
-#include <algorithm>
-#include <random>
-#include <cmath>
-#include <iostream>
-
-float tour_len(std::vector<int> tour, std::vector<std::vector<float>> points);
-
-MCTSNode::MCTSNode(int n) {
-	this->parent = nullptr;
-	this->visits = 0;
-	this->total_score = 0.0;
-	this->avg_score = 0.0;
-	this->n = n;
-	this->tour = std::vector<int>();
-	this->tour.push_back(0);
-	this->remaining = std::set<int>();
-	for (int i = 1; i < n; i++)
-		this->remaining.insert(i);
-}
-
-MCTSNode::MCTSNode(MCTSNode* p, std::vector<int> tour, std::set<int> remaining, int n) {
-	this->parent = p;
-	this->visits = 0;
-	this->total_score = 0.0;
-	this->avg_score = 0.0;
-	this->n = n;
-	this->tour = tour;
-	this->remaining = remaining;
-}
-
-std::random_device MCTSNode::rd = std::random_device();
-std::mt19937 MCTSNode::g = std::mt19937(MCTSNode::rd());
-
-bool MCTSNode::has_children() {
-	return this->children.size() > 0;
-}
-
-bool MCTSNode::is_leaf() {
-	return this->tour.size() == this->n;
-}
-
-bool MCTSNode::is_expanded() {
-	return this->children.size() == this->remaining.size();
-}
-
-std::vector<int> MCTSNode::get_tour() {
-	std::vector<int> t(this->tour);
-	t.push_back(t[0]);
-	return t;
-}
-
-std::shared_ptr<MCTSNode> MCTSNode::best_child_score() {
-	float best_score = -1;
-	std::shared_ptr<MCTSNode> best_node(nullptr);
-	for (std::shared_ptr<MCTSNode> n : this->children) {
-		if (n->avg_score > best_score) {
-			best_score = n->avg_score;
-			best_node = n;
-		}
-	}
-	return best_node;
-}
-
-std::shared_ptr<MCTSNode> MCTSNode::best_child_visits() {
-	float best_score = -1;
-	std::shared_ptr<MCTSNode> best_node(nullptr);
-	for (std::shared_ptr<MCTSNode> n : this->children) {
-		if (n->visits > best_score) {
-			best_score = n->visits;
-			best_node = n;
-		}
-	}
-	return best_node;
-}
-
-std::shared_ptr<MCTSNode> MCTSNode::best_child_uct() {
-	float best_score = -1;
-	std::shared_ptr<MCTSNode> best_node(nullptr);
-	for (std::shared_ptr<MCTSNode> n : this->children) {
-		float score = n->avg_score + std::sqrt(2 * std::log(this->visits) / n->visits);
-		if (score > best_score) {
-			best_score = score;
-			best_node = n;
-		}
-	}
-	return best_node;
-}
-
-std::shared_ptr<MCTSNode> MCTSNode::expand() {
-	std::uniform_int_distribution<> dis(0, this->remaining.size()-1);
-	auto it(this->remaining.begin());
-	advance(it, dis(g));
-	int k = *it;
-
-	std::vector<int> next_tour(this->tour);
-	next_tour.push_back(k);
-
-	std::set<int> next_remaining(this->remaining);
-	next_remaining.erase(k);
-
-	std::shared_ptr<MCTSNode> m = std::make_shared<MCTSNode>(this, next_tour, next_remaining, this->n);
-	this->children.push_back(m);
-
-	return m;
-}
-
-void MCTSNode::backprop(float reward) {
-	this->visits += 1;
-	this->total_score += reward;
-	this->avg_score = this->total_score / (float)(this->visits);
-	if (this->parent != nullptr) {
-		this->parent->backprop(reward);
-	}
-}
-
-float MCTSNode::simulate(std::vector<std::vector<float>> points) {
-
-	// 1. randomly permute remaining nodes
-	std::vector<int> r(this->remaining.begin(), this->remaining.end());
-	std::shuffle(r.begin(), r.end(), this->g);
-
-	// 2. merge current tour with permuted remaining nodes
-	std::vector<int> sim_tour(this->tour);
-	sim_tour.insert(sim_tour.end(), r.begin(), r.end());
-	sim_tour.push_back(sim_tour[0]);
-
-	// 3. compute the length of the new tour and return
-	float len = tour_len(sim_tour, points);
-	return len;
-
-}
-
-float tour_len(std::vector<int> tour, std::vector<std::vector<float>> points) {
-	float len = 0;
-	int d = points[0].size();
-	int n = points.size();
-	for (int i = 1; i < n+1; i++) {
-		float edge_len = 0;
-		for (int j = 0; j < d; j++) {
-			float diff = points[tour[i]][j] - points[tour[i-1]][j];
-			diff = diff * diff;
-			edge_len += diff;
-		}
-		edge_len = std::sqrt(edge_len);
-		len += edge_len;
-	}
-	return len;
-}
diff --git a/c++/MCTSNode.h b/c++/MCTSNode.h
deleted file mode 100644
index e655488..0000000
--- a/c++/MCTSNode.h
+++ /dev/null
@@ -1,35 +0,0 @@
-#ifndef MCTSNODEH
-#define MCTSNODEH
-
-#include <random>
-#include <vector>
-#include <set>
-
-class MCTSNode {
-private:
-	MCTSNode* parent;
-	std::vector<int> tour;
-	std::set<int> remaining;
-	int visits;
-	float total_score;
-	float avg_score;
-	int n;
-	static std::random_device rd;
-	static std::mt19937 g;
-public:
-	MCTSNode(int n);
-	MCTSNode(MCTSNode* p, std::vector<int> tour, std::set<int> remaining, int n);
-	std::shared_ptr<MCTSNode> expand();
-	void backprop(float reward);
-	float simulate(std::vector<std::vector<float>> points);
-	std::vector<int> get_tour();
-	bool has_children();
-	bool is_leaf();
-	bool is_expanded();
-	std::shared_ptr<MCTSNode> best_child_score();
-	std::shared_ptr<MCTSNode> best_child_visits();
-	std::shared_ptr<MCTSNode> best_child_uct();
-	std::vector<std::shared_ptr<MCTSNode>> children;
-};
-
-#endif
\ No newline at end of file
diff --git a/c++/Makefile b/c++/Makefile
deleted file mode 100644
index 0c60cd6..0000000
--- a/c++/Makefile
+++ /dev/null
@@ -1,13 +0,0 @@
-all: mcts
-
-mcts: mcts.o mctsnode.o
-	g++ --std=c++17 -O3 -o mcts mcts.o mctsnode.o
-
-mcts.o: mcts.cpp
-	g++ --std=c++17 -O3 -c -o mcts.o mcts.cpp
-
-mctsnode.o: MCTSNode.cpp MCTSNode.h
-	g++ --std=c++17 -O3 -c -o mctsnode.o MCTSNode.cpp
-
-clean:
-	rm *.o mcts
\ No newline at end of file
diff --git a/c++/mcts.cpp b/c++/mcts.cpp
deleted file mode 100644
index 939b214..0000000
--- a/c++/mcts.cpp
+++ /dev/null
@@ -1,217 +0,0 @@
-#include <iostream>
-#include <random>
-#include <vector>
-#include <limits>
-#include "MCTSNode.h"
-
-std::shared_ptr<MCTSNode> mcts(std::shared_ptr<MCTSNode> rootnode, std::vector<std::vector<float>> points, int iterations);
-float compute_tour_length(std::vector<int> tour, std::vector<std::vector<float>> points);
-std::vector<int> greedy(std::vector<std::vector<float>> points);
-void random_tours(std::vector<std::vector<float>> points);
-
-int main() {
-
-	// 1. Create TSP instance
-
-	int n = 60;
-	int d = 2;
-	int iterations = 1000;
-
-	std::vector<std::vector<float>> points;
-
-	std::random_device rd;
-	std::mt19937 gen(rd());
-	std::uniform_real_distribution<> dis(0.0, 1.0);
-
-	for (int i = 0; i < n; i++) {
-		std::vector<float> p = {(float)dis(gen), (float)dis(gen)};
-		points.push_back(p);
-	}
-
-	// 2. Construct MCTS tree
-
-	std::shared_ptr<MCTSNode> rootnode = std::make_shared<MCTSNode>(n);
-	std::shared_ptr<MCTSNode> node(rootnode);
-
-	// 3. Run MCTS at each level of the tree
-
-	while (!node->is_leaf()) {
-		node = mcts(node, points, iterations);
-	}
-
-	// 4. Display result
-
-	std::vector<int> optimal_tour(node->get_tour());
-	float optimal_tour_length = compute_tour_length(optimal_tour, points);
-
-	for (int i = 0; i < optimal_tour.size(); i++) {
-		std::cout << optimal_tour[i];
-		if (i != optimal_tour.size() - 1) {
-			std::cout << " -> ";
-		}
-	}
-	std::cout << std::endl;
-	std::cout << "Tour length: " << optimal_tour_length << std::endl;
-
-	// 5. Run greedy
-
-	std::vector<int> greedy_tour = greedy(points);
-	float greedy_tour_length = compute_tour_length(greedy_tour, points);
-
-	for (int i = 0; i < greedy_tour.size(); i++) {
-		std::cout << greedy_tour[i];
-		if (i != greedy_tour.size() - 1) {
-			std::cout << " -> ";
-		}
-	}
-	std::cout << std::endl;
-	std::cout << "Greedy tour length: " << greedy_tour_length << std::endl;
-
-	// 6. Random tours
-	random_tours(points);
-
-	// 7. Return
-	return 0;
-}
-
-std::shared_ptr<MCTSNode> mcts(std::shared_ptr<MCTSNode> rootnode, std::vector<std::vector<float>> points, int iterations) {
-	
-	int n = points.size();
-
-	// 1. Begin search
-	for (int it=0; it < iterations; it++) {
-
-		std::shared_ptr<MCTSNode> node(rootnode);
-
-		// 2. Descend
-		while (!node->is_leaf()) {
-			if (!node->is_expanded()) {
-				node = node->expand();
-				break;
-			} else {
-				node = node->best_child_uct();
-			}
-		}
-
-		// 3. Simulate
-		float tour_len = node->simulate(points);
-		float reward = ((2.0 * n) - tour_len) / (2.0 * n);
-
-		// 4. Backprop
-		node->backprop(reward);
-
-	}
-
-	// 5. Select and return best child node
-	return rootnode->best_child_score();
-
-}
-
-float compute_tour_length(std::vector<int> tour, std::vector<std::vector<float>> points) {
-	float len = 0;
-	int d = points[0].size();
-	int n = points.size();
-	for (int i = 1; i < n+1; i++) {
-		float edge_len = 0;
-		for (int j = 0; j < d; j++) {
-			float diff = points[tour[i]][j] - points[tour[i-1]][j];
-			diff = diff * diff;
-			edge_len += diff;
-		}
-		edge_len = std::sqrt(edge_len);
-		len += edge_len;
-	}
-	return len;
-}
-
-std::vector<int> greedy(std::vector<std::vector<float>> points) {
-
-	// 1. Get start node
-	std::random_device rd;
-	std::mt19937 gen(rd());
-	std::uniform_int_distribution<> dis(0, points.size()-1);
-	int start = dis(gen);
-
-	// 2. Start tour
-	std::vector<int> tour = {start};
-
-	// 3. Compute remaining
-	std::set<int> remaining;
-	for (int i = 0; i < points.size(); i++) {
-		if (i != start) {
-			remaining.insert(i);
-		}
-	}
-
-	// 4. Build tour
-	std::vector<float> pt1 = points[start];
-	while (!remaining.empty()) {
-
-		// 4.1 Compute min distance
-		int next_node = -1;
-		float min_dist = std::numeric_limits<float>::max();
-		for (int ind2 : remaining) {
-
-			std::vector<float> pt2 = points[ind2];
-
-			float edge_len = 0;
-			for (int j = 0; j < pt1.size(); j++) {
-				float diff = pt1[j] - pt2[j];
-				diff = diff * diff;
-				edge_len += diff;
-			}
-			edge_len = std::sqrt(edge_len);
-
-			if (edge_len < min_dist) {
-				min_dist = edge_len;
-				next_node = ind2;
-			}
-
-		}
-
-		// 4.2 Add to tour, remove from remaining
-		pt1 = points[next_node];
-		tour.push_back(next_node);
-		remaining.erase(next_node);
-
-	}
-
-	// 5. Complete tour
-	tour.push_back(tour[0]);
-
-	// 6. Return tour
-	return tour;
-
-}
-
-void random_tours(std::vector<std::vector<float>> points) {
-
-	int iterations = 100000;
-
-	std::random_device rd;
-	std::mt19937 gen(rd());
-
-	std::vector<int> tour;
-	for (int i = 0; i < points.size(); i++) {
-		tour.push_back(i);
-	}
-
-	float total_len = 0;
-	float best_len = std::numeric_limits<float>::max();
-
-	for (int i = 0; i < iterations; i++) {
-		std::vector<int> t(tour);
-		std::shuffle(t.begin(), t.end(), gen);
-		t.push_back(t[0]);
-		float l = compute_tour_length(t, points);
-		total_len += l;
-		if (l < best_len)
-			best_len = l;
-	}
-
-	float avg_len = total_len / (float)iterations;
-
-	std::cout << "Random avg length: " << avg_len << std::endl;
-	std::cout << "Random best length: " << best_len << std::endl;
-
-}

From 61c4a71d574842870364ac64e2426d0318f297da Mon Sep 17 00:00:00 2001
From: Felix Parker <flixpar@gmail.com>
Date: Wed, 13 Nov 2019 16:19:47 -0500
Subject: [PATCH 06/13] multiprocessing fixes

---
 alphatsp/solvers/graph_construction.py | 8 ++++----
 alphatsp/solvers/policy_networks.py    | 5 +++++
 main.py                                | 2 +-
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/alphatsp/solvers/graph_construction.py b/alphatsp/solvers/graph_construction.py
index ce5fba6..82217b0 100644
--- a/alphatsp/solvers/graph_construction.py
+++ b/alphatsp/solvers/graph_construction.py
@@ -17,7 +17,7 @@ def construct_graph_grow(tsp, tour, remaining):
 
 	x = torch.cat([points, choices.unsqueeze(-1).to(dtype=torch.float)], dim=-1)
 
-	graph = Data(x=x, pos=points, edge_index=edges, y=choices)
+	graph = {"x": x, "pos": points, "edge_index": edges, "edge_attr": edge_lengths, "y": choices}
 	return graph
 
 def construct_graph_prune(tsp, tour, remaining):
@@ -40,7 +40,7 @@ def construct_graph_prune(tsp, tour, remaining):
 
 	x = torch.cat([points, choices.unsqueeze(-1).to(dtype=torch.float)], dim=-1)
 
-	graph = Data(x=x, pos=points, edge_index=edges, y=choices)
+	graph = {"x": x, "pos": points, "edge_index": edges, "edge_attr": edge_lengths, "y": choices}
 	return graph
 
 def construct_graph_prune_weighted(tsp, tour, remaining):
@@ -71,5 +71,5 @@ def construct_graph_prune_weighted(tsp, tour, remaining):
 
 	x = torch.cat([points, choices.unsqueeze(-1).to(dtype=torch.float)], dim=-1)
 
-	graph = Data(x=x, pos=points, edge_index=edges, edge_attr=edge_lengths, y=choices)
-	return graph
\ No newline at end of file
+	graph = {"x": x, "pos": points, "edge_index": edges, "edge_attr": edge_lengths, "y": choices}
+	return graph
diff --git a/alphatsp/solvers/policy_networks.py b/alphatsp/solvers/policy_networks.py
index ffbde3b..b16d98d 100644
--- a/alphatsp/solvers/policy_networks.py
+++ b/alphatsp/solvers/policy_networks.py
@@ -178,6 +178,9 @@ def train_example(self):
 		if example is None: return -1
 		graph, choice_probs, value = example["graph"], example["choice_probs"], example["pred_value"]
 
+		graph = Data(**graph)
+		graph = graph.to(device)
+
 		pred_choices, pred_value = self.model(graph)
 		loss = self.loss_fn(pred_choices, choice_probs) + (0.2 * self.loss_fn(pred_value, value))
 
@@ -228,6 +231,8 @@ def train_example(self):
 		example = self.example_queue.get()
 		if example is None: return -1
 		graph, choice, value = example["graph"], example["choice"], example["pred_value"]
+
+		graph = Data(**graph)
 		graph = graph.to(device)
 
 		pred_choices, pred_value = self.model(graph)
diff --git a/main.py b/main.py
index 4d459a2..4ab9b18 100644
--- a/main.py
+++ b/main.py
@@ -11,7 +11,7 @@ def main(args):
 	experiment.run(config)
 
 if __name__ == "__main__":
-	mp.set_start_method("spawn")
+	mp.set_start_method("spawn", force=True)
 	parser = argparse.ArgumentParser()
 	parser.add_argument("--experiment", type=str, required=True, help="experiment name")
 	args = parser.parse_args()

From 8dfce7630f782f500fd81d33bbdbf59229c351bd Mon Sep 17 00:00:00 2001
From: Felix Parker <flixpar@gmail.com>
Date: Wed, 13 Nov 2019 16:24:44 -0500
Subject: [PATCH 07/13] minor fixes

---
 alphatsp/experiments/supervised.py  | 2 +-
 alphatsp/solvers/policy_networks.py | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/alphatsp/experiments/supervised.py b/alphatsp/experiments/supervised.py
index d88d7b5..fd6a7df 100644
--- a/alphatsp/experiments/supervised.py
+++ b/alphatsp/experiments/supervised.py
@@ -76,7 +76,7 @@ def generate_examples(n_examples, train_queue, args):
 	generator.generate_examples(n_examples)
 	return
 
-def train(policy_network, train_queue, connection, args):
+def train(policy_network, train_queue, shared_dict, args):
 	trainer = SupervisedPolicyNetworkTrainer(policy_network, train_queue)
 	trainer.train_all()
 	shared_dict["losses"] = copy.deepcopy(trainer.losses)
diff --git a/alphatsp/solvers/policy_networks.py b/alphatsp/solvers/policy_networks.py
index b16d98d..a646e95 100644
--- a/alphatsp/solvers/policy_networks.py
+++ b/alphatsp/solvers/policy_networks.py
@@ -220,6 +220,8 @@ def train_all(self):
 		while True:
 			if not self.example_queue.empty():
 				return_code = self.train_example()
+				if self.n_examples_used%1000 == 0:
+					print(f"iter={self.n_examples_used}, avg_loss={sum(self.losses[-100:])/100:.4f}")
 				if self.n_examples_used%10000 == 0:
 					self.save_model()
 				if return_code == -1:

From a27d75be71d6d8e44fd2918618660731b5484793 Mon Sep 17 00:00:00 2001
From: Felix Parker <flixpar@gmail.com>
Date: Sat, 16 Nov 2019 17:09:28 -0500
Subject: [PATCH 08/13] added basic logger for supervised experiment

---
 alphatsp/experiments/supervised.py  | 20 +-------
 alphatsp/logger.py                  | 80 +++++++++++++++++++++++++++++
 alphatsp/solvers/policy_networks.py | 21 ++++----
 3 files changed, 92 insertions(+), 29 deletions(-)
 create mode 100644 alphatsp/logger.py

diff --git a/alphatsp/experiments/supervised.py b/alphatsp/experiments/supervised.py
index fd6a7df..a83c168 100644
--- a/alphatsp/experiments/supervised.py
+++ b/alphatsp/experiments/supervised.py
@@ -8,10 +8,6 @@
 import torch
 import numpy as np
 
-import matplotlib
-matplotlib.use("agg")
-import matplotlib.pyplot as plt
-
 import copy
 
 from torch.multiprocessing import Process, Manager
@@ -58,19 +54,6 @@ def run(args):
 		print("Experiment failed.")
 		return -1
 
-	train_losses = shared_dict["losses"]
-	policy_network = shared_dict["model"]
-
-	# display training loss
-	plt.scatter(x=np.arange(len(train_losses)), y=train_losses, marker='.')
-	plt.title("Loss")
-	plt.xlabel("examples")
-	plt.ylabel("loss")
-	plt.savefig("saves/loss_parallel.png")
-
-	# save network
-	torch.save(policy_network.state_dict(), "saves/policy_network.pth")
-
 def generate_examples(n_examples, train_queue, args):
 	generator = NNExampleGenerator(train_queue, args)
 	generator.generate_examples(n_examples)
@@ -79,7 +62,6 @@ def generate_examples(n_examples, train_queue, args):
 def train(policy_network, train_queue, shared_dict, args):
 	trainer = SupervisedPolicyNetworkTrainer(policy_network, train_queue)
 	trainer.train_all()
-	shared_dict["losses"] = copy.deepcopy(trainer.losses)
-	shared_dict["model"] = copy.deepcopy(trainer.model)
+	shared_dict["model"] = copy.deepcopy(trainer.model.cpu())
 	shared_dict["success"] = True
 	return
diff --git a/alphatsp/logger.py b/alphatsp/logger.py
new file mode 100644
index 0000000..a37c9a5
--- /dev/null
+++ b/alphatsp/logger.py
@@ -0,0 +1,80 @@
+import os
+import datetime
+import csv
+import shutil
+import torch
+import numpy as np
+import pandas as pd
+
+import matplotlib
+matplotlib.use("agg")
+import matplotlib.pyplot as plt
+plt.style.use("seaborn")
+
+class Logger:
+
+	def __init__(self, args, enabled=True):
+		self.logging = enabled
+		if not self.logging:
+			return
+
+		self.dt = datetime.datetime.now().strftime("%m%d_%H%M")
+		self.path = f"./saves/{self.dt}"
+
+		if not os.path.exists(self.path):
+			os.makedirs(self.path)
+
+		self.losses = []
+		self.eval = []
+
+		self.main_log_fn = os.path.join(self.path, "log.txt")
+		shutil.copy2("args.py", self.path)
+
+	def save_model(self, model, iterations):
+		if not self.logging: return
+		if isinstance(iterations, int):
+			fn = os.path.join(self.path, f"policynet_{epoch:07d}.pth")
+		else:
+			fn = os.path.join(self.path, f"policynet_{epoch}.pth")
+		torch.save(model.state_dict(), fn)
+		self.print(f"Saved model to: {fn}\n")
+
+	def print(self, *x):
+		print(*x)
+		self.log(*x)
+
+	def log(self, *x):
+		if not self.logging: return
+		with open(self.main_log_fn, "a") as f:
+			print(*x, file=f, flush=True)
+
+	def log_loss(self, l):
+		self.losses.append(l)
+
+	def log_eval(self, data):
+		if not self.logging: return
+		self.eval.append(data)
+
+	def save(self):
+		if not self.logging: return
+
+		with open(os.path.join(self.path, "loss.csv"), "w") as f:
+			csvwriter = csv.DictWriter(f, ["it", "loss"])
+			csvwriter.writeheader()
+			for it, loss in enumerate(self.losses):
+				row = {"it": it, "loss": loss}
+				csvwriter.writerow(row)
+
+		with open(os.path.join(self.path, "eval.csv"), "w") as f:
+			cols = ["it"] + sorted(list(set(self.eval[0].keys()) - set(["it"])))
+			csvwriter = csv.DictWriter(f, cols)
+			csvwriter.writeheader()
+			for row in self.eval_scores:
+				csvwriter.writerow(row)
+
+        plt.clf()
+
+        plt.plot(self.losses)
+        plt.xlabel("iterations")
+        plt.ylabel("training loss")
+        plt.savefig(os.path.join(self.path, "losses.png"))
diff --git a/alphatsp/solvers/policy_networks.py b/alphatsp/solvers/policy_networks.py
index a646e95..f4fe67a 100644
--- a/alphatsp/solvers/policy_networks.py
+++ b/alphatsp/solvers/policy_networks.py
@@ -6,6 +6,8 @@
 from torch_geometric.nn import GCNConv, global_mean_pool, ARMAConv, XConv, SAGEConv
 from torch_geometric.data import Data, DataLoader
 
+from alphatsp.logger import Logger
+
 if torch.cuda.is_available(): device = torch.device("cuda:0")
 else:                         device = torch.device("cpu")
 
@@ -213,18 +215,21 @@ def __init__(self, model, example_queue):
 		self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr=1e-5)
 
 		self.example_queue = example_queue
-		self.losses = []
 		self.n_examples_used = 0
 
+		self.logger = Logger()
+
 	def train_all(self):
 		while True:
 			if not self.example_queue.empty():
 				return_code = self.train_example()
-				if self.n_examples_used%1000 == 0:
-					print(f"iter={self.n_examples_used}, avg_loss={sum(self.losses[-100:])/100:.4f}")
-				if self.n_examples_used%10000 == 0:
-					self.save_model()
+				if self.n_examples_used%1000 == 0 and self.n_examples_used!=0:
+					self.logger.print(f"iter={self.n_examples_used}, avg_loss={sum(self.logger.losses[-100:])/100:.4f}")
+				if self.n_examples_used%10000 == 0 and self.n_examples_used!=0:
+					self.logger.save_model(self.model, self.n_examples_used)
 				if return_code == -1:
+					self.logger.save()
+					self.logger.save_model(self.model, "final")
 					return
 
 	def train_example(self):
@@ -242,14 +247,10 @@ def train_example(self):
 		pred_choices, pred_value = pred_choices.unsqueeze(0).to(device), pred_value.squeeze(0).to(device)
 		loss = self.choice_loss_fn(pred_choices, choice) + 0.2 * self.value_loss_fn(pred_value, value)
 
-		self.losses.append(loss.item())
-
 		self.optimizer.zero_grad()
 		loss.backward()
 		self.optimizer.step()
 
+		self.logger.log_loss(loss.item())
 		self.n_examples_used += 1
 		return 0
-
-	def save_model(self):
-		torch.save(self.model.state_dict(), f"saves/policynet_{self.n_examples_used:06d}.pth")

From 7b0c11f2a58417587836edf72b5477d7dc78b54c Mon Sep 17 00:00:00 2001
From: Michel Gokan <gokan.michel@gmail.com>
Date: Fri, 31 Jan 2020 11:12:37 +0100
Subject: [PATCH 09/13] Adding some missing dependencies

---
 requirements.txt | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 5a68f7e..983afdd 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,10 @@ torch
 numpy
 matplotlib
 torch_geometric
+torch_sparse
+torch_scatter
+torch_cluster
 scipy
 cython
 tqdm
-pyconcorde
\ No newline at end of file
+pyconcorde

From be2b2b03030c4cff13bc81043101b13b35dbc3f6 Mon Sep 17 00:00:00 2001
From: Michel Gokan <gokan.michel@gmail.com>
Date: Fri, 31 Jan 2020 11:38:58 +0100
Subject: [PATCH 10/13] Fixing inconsistent use of tabs and spaces in
 indentation in logger.py

---
 alphatsp/logger.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/alphatsp/logger.py b/alphatsp/logger.py
index a37c9a5..f3a3251 100644
--- a/alphatsp/logger.py
+++ b/alphatsp/logger.py
@@ -72,9 +72,9 @@ def save(self):
 			for row in self.eval_scores:
 				csvwriter.writerow(row)
 
-        plt.clf()
+		plt.clf()
 
-        plt.plot(self.losses)
-        plt.xlabel("iterations")
-        plt.ylabel("training loss")
-        plt.savefig(os.path.join(self.path, "losses.png"))
+		plt.plot(self.losses)
+		plt.xlabel("iterations")
+		plt.ylabel("training loss")
+		plt.savefig(os.path.join(self.path, "losses.png"))

From 7240cc67b9cdc2ff003eb177db40b927da06481f Mon Sep 17 00:00:00 2001
From: Michel Gokan <gokan.michel@gmail.com>
Date: Fri, 31 Jan 2020 16:44:18 +0100
Subject: [PATCH 11/13] Adding PyCharm project related folder (.idea) to
 .gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 9006844..8292d48 100644
--- a/.gitignore
+++ b/.gitignore
@@ -245,3 +245,4 @@ c++/mcts
 backup/
 old/
 saves/
+.idea/

From ce0b71d3e09ad7de64c4f36a5f46e8b9d5fc35a8 Mon Sep 17 00:00:00 2001
From: Michel Gokan <gokan.michel@gmail.com>
Date: Tue, 4 Feb 2020 20:17:36 +0100
Subject: [PATCH 12/13] Fixing an error in policy_networks.py related to wrong
 usage of dots instead of brackets when working with dictionaries (graph
 object)

---
 alphatsp/solvers/policy_networks.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/alphatsp/solvers/policy_networks.py b/alphatsp/solvers/policy_networks.py
index f4fe67a..ee68149 100644
--- a/alphatsp/solvers/policy_networks.py
+++ b/alphatsp/solvers/policy_networks.py
@@ -65,12 +65,12 @@ def __init__(self, d=3):
 			num_layers=2,
 			shared_weights=True,
 			dropout=0.1,
-			act=None)
+			act=None).to(device)
 
-		self.fc = nn.Linear(16, 1)
+		self.fc = nn.Linear(16, 1).to(device)
 
 	def forward(self, graph):
-		x, edges, choices = graph.x, graph.edge_index, graph.y
+		x, edges, choices = graph['x'], graph['edge_index'], graph['y']
 
 		x = self.conv1(x, edges)
 		x = F.relu(x)

From 0f5e3c2c489d572f09f4d3ed2722c9d5cbda8179 Mon Sep 17 00:00:00 2001
From: Michel Gokan <gokan.michel@gmail.com>
Date: Tue, 4 Feb 2020 20:26:15 +0100
Subject: [PATCH 13/13] Replacing graph.num_nodes with x.size(0)

---
 alphatsp/solvers/policy_networks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/alphatsp/solvers/policy_networks.py b/alphatsp/solvers/policy_networks.py
index ee68149..5d0dcd9 100644
--- a/alphatsp/solvers/policy_networks.py
+++ b/alphatsp/solvers/policy_networks.py
@@ -81,7 +81,7 @@ def forward(self, graph):
 		choice = torch.masked_select(c.squeeze(), choices)
 		choice = F.softmax(choice, dim=0)
 
-		v = global_mean_pool(x, torch.zeros(graph.num_nodes, dtype=torch.long, device=x.device))
+		v = global_mean_pool(x, torch.zeros(x.size(0), dtype=torch.long, device=x.device))
 		value = self.fc(v)
 
 		return choice, value