Roberto09
diff --git a/‎.gitignore
+1-4 b/‎.gitignore
+1-4
diff --git a/‎attention_dynamic_model.py
+44-7 b/‎attention_dynamic_model.py
+44-7
diff --git a/‎backup_results_VRP_20_2021-08-31.csv
+41 b/‎backup_results_VRP_20_2021-08-31.csv
+41
diff --git a/‎checkpts/.gitignore
+3 b/‎checkpts/.gitignore
+3
diff --git a/‎learning_curve_plot_VRP_20_2021-08-31_start=0, end=40.jpg
69.1 KB b/‎learning_curve_plot_VRP_20_2021-08-31_start=0, end=40.jpg
69.1 KB
@@ -1,6 +1,3 @@
 /.ipynb_checkpoints
 **/__pycache__
-/lkh_data/*
-/.vscode
-/checkpts/*
-/valsets/*
+/.vscode
@@ -185,7 +185,32 @@ def get_projections(self, embeddings, context_vectors):
 
         return K_tanh, Q_context, K, V
 
-    def forward(self, inputs, return_pi=False):
+
+    def fwd_rein_loss(self, inputs, baseline, bl_vals, num_batch, return_pi=False):
+        """
+        Forward and calculate loss for REINFORCE algorithm in a memory efficient way.
+        This sacrifices a bit of performance but is way better in memory terms and works
+        by reordering the terms in the gradient formula such that we don't store gradients
+        for all the seguence for a long time which hence produces a lot of memory consumption.
+        """
+
+        on_training = self.training
+        self.eval()
+        with torch.no_grad():
+            cost, log_likelihood, seq = self(inputs, True)
+            bl_val = bl_vals[num_batch] if bl_vals is not None else baseline.eval(inputs, cost)
+            pre_cost = cost - bl_val.detach()
+            detached_loss = torch.mean((pre_cost) * log_likelihood)
+
+        if on_training: self.train()
+        return detached_loss, self(inputs, return_pi, seq, pre_cost)
+
+    def forward(self, inputs, return_pi=False, pre_selects=None, pre_cost=None):
+        """
+        Forward method. Works as expected except and as described on the paper, however
+        if pre_selects is None which hence implies that pre_cost should be none it's because
+        fwd_rein_loss is calling it; check that method for a description of why this is useful.
+        """
 
         self.batch_size = inputs[0].shape[0]
 
@@ -194,7 +219,10 @@ def forward(self, inputs, return_pi=False):
         sequences = []
         ll = torch.zeros(self.batch_size)
 
+        if pre_selects is not None:
+            pre_selects = pre_selects.transpose(0, 1)
         # Perform decoding steps
+        pre_select_idx = 0
         while not state.all_finished():
 
             state.i = torch.zeros(1, dtype=torch.int64)
@@ -222,22 +250,31 @@ def forward(self, inputs, return_pi=False):
                 log_p = self.get_log_p(mha, K_tanh, mask)  # (batch_size, 1, n_nodes)
 
                 # next step is to select node
-                selected = self._select_node(log_p.detach()) # (batch_size,)
+                if pre_selects is None:
+                    selected = self._select_node(log_p.detach()) # (batch_size,)
+                else:
+                    selected = pre_selects[pre_select_idx]
 
                 state.step(selected.detach().cpu())
 
-                ll += self.get_likelihood_selection(log_p[:, 0, :].cpu(), selected.detach().cpu())
-                
+                curr_ll = self.get_likelihood_selection(log_p[:, 0, :].cpu(), selected.detach().cpu())
+                if pre_selects is not None:
+                    curr_loss = (curr_ll * pre_cost).sum() / self.batch_size
+                    curr_loss.backward(retain_graph=True)
+                    curr_ll = curr_ll.detach()
+                ll += curr_ll
+
                 sequences.append(selected.detach().cpu())
+                pre_select_idx += 1
                 # torch.cuda.empty_cache()    
             # torch.cuda.empty_cache()
 
         pi = torch.stack(sequences, dim=1) # (batch_size, len(outputs))
         cost = self.problem.get_costs((inputs[0].detach().cpu(), inputs[1].detach().cpu(), inputs[2].detach().cpu()), pi)
-        if return_pi:
-            return cost, ll, pi
 
-        return cost, ll
+        ret = [cost, ll]
+        if return_pi: ret.append(pi)
+        return ret
 
     def set_input_device(self, inp_tens):
         if self.dev is None: self.dev = get_dev_of_mod(self)
 
@@ -0,0 +1,41 @@
+epochs,train_loss,train_cost,val_cost
+0,0.057725433,8.126152,7.142528
+1,0.43523985,7.0198774,6.8331623
+2,-0.05244048,6.799927,6.692892
+3,-0.21513543,6.7059703,6.6481056
+4,-0.16910644,6.6476564,6.585553
+5,-0.21084756,6.606468,6.55772
+6,-0.20123357,6.5790243,6.533435
+7,-0.20451881,6.5594954,6.523542
+8,-0.17670833,6.5427947,6.505786
+9,-0.17904146,6.530585,6.4873476
+10,-0.19809744,6.519344,6.4940104
+11,-0.15416735,6.508296,6.478488
+12,-0.16621172,6.4995356,6.4869556
+13,-0.13755234,6.4940825,6.47636
+14,-0.11532383,6.4876075,6.460555
+15,-0.14074893,6.4802,6.4586205
+16,-0.13124135,6.474483,6.4573145
+17,-0.11284375,6.4697905,6.4460807
+18,-0.12861899,6.464823,6.437971
+19,-0.13243529,6.459164,6.4339194
+20,-0.13290825,6.4535875,6.438004
+21,-0.12190188,6.44997,6.443571
+22,-0.11239375,6.447536,6.420986
+23,-0.12801744,6.443129,6.431984
+24,-0.11823546,6.440166,6.42777
+25,-0.107929625,6.436949,6.418355
+26,-0.09892246,6.4335003,6.4274316
+27,-0.09260898,6.430896,6.412428
+28,-0.11238246,6.429055,6.4071865
+29,-0.11275884,6.424093,6.4058414
+30,-0.11065548,6.4216547,6.401159
+31,-0.10574161,6.4185834,6.3972263
+32,-0.1168384,6.415805,6.399093
+33,-0.11321704,6.4131637,6.389376
+34,-0.1172516,6.4110584,6.393464
+35,-0.11503467,6.410205,6.3980913
+36,-0.109255955,6.4086895,6.392976
+37,-0.10428008,6.4064713,6.39075
+38,-0.0974484,6.4040713,6.3854094
+39,-0.108119674,6.403427,6.39277
@@ -0,0 +1,3 @@
+*
+*/
+!.gitignore