final doc fixes

HilaManor · HilaManor · commit 7a54b72085c8 · 2022-03-15T17:08:04.000+02:00
diff --git a/code/FlowersDataset.py b/code/FlowersDataset.py
@@ -79,9 +79,9 @@ def __getitem__(self, idx):
         return im, txt2im_labels, im2txt_masked_labels, img_idx, txt_idx
 
     def get_captions_of_image(self, img_idx):
-        """Get all the captions (10) of a given image
+        """Get all the captions (10) of a given image by its index
 
-        :param img_idx: idx of a given image
+        :param img_idx: index of a given image
         :return: a list of all the captions (10) of a given image
         """
         with open(os.path.join(self.txts_path, f'image_{img_idx:05}.txt')) as f:
diff --git a/code/fid_score_override.py b/code/fid_score_override.py
@@ -44,8 +44,8 @@ def get_activations(files, model, batch_size=50, dims=2048, device='cpu',
                'Setting batch size to data size'))
         batch_size = len(files)
 
-    dataset = fid_score.ImagePathDataset(files, transforms=TF.Compose([TF.Resize((224, 224)), 
-                                                             TF.ToTensor()]))
+    dataset = fid_score.ImagePathDataset(files, transforms=TF.Compose([TF.Resize((224, 224)),
+                                                                       TF.ToTensor()]))
     dataloader = torch.utils.data.DataLoader(dataset,
                                              batch_size=batch_size,
                                              shuffle=False,
diff --git a/code/generating.py b/code/generating.py
@@ -211,7 +211,7 @@ def generate_test_examples(device, gens_dir, im2txt_model, test_loader, txt2im_m
     parser.add_argument('--out_dir', required=True, type=str, help='A directory of a trained model to generate for')
     parser.add_argument('--text', type=str, default=None, help='Text prompt for which to generate an image')
     parser.add_argument('--img_path', type=str, default=None, help='Path to the image for which to generate a caption')
-    parser.add_argument('--amount', type=int, default=1, help="The amount of images to generate from the cutsom text, "
+    parser.add_argument('--amount', type=int, default=1, help="The amount of images to generate from the custom text, "
                                                               "if given (via '--text')")
     parsed_args = parser.parse_args()
   
diff --git a/code/main.py b/code/main.py
@@ -34,6 +34,7 @@
         args = yaml.load(f, Loader=yaml.FullLoader)
     args.update(vars(parsed_args))
 
+    # If the option for training continuation from a checkpoint was marked, check if it's possible to continue
     if args["continue_training"] is not None:
         # The first saved thing is the generator_k1.pth file, so if no such file exists - there is no mid-training
         # state to continue from
diff --git a/code/tests.py b/code/tests.py
diff --git a/code/training.py b/code/training.py
@@ -340,14 +340,19 @@ def load_checkpoint(txt2im_model, im2txt_model, txt2im_optimizer, im2txt_optimiz
     start_epoch = 1
     start_k = 1
 
+    # search for saved weights files
     pth_files = [x for x in os.listdir(args["output_dir"]) if x.endswith('.pth')]
+
+    # Search for end-of-epoch saved models (containing both parts - txt2im and im2txt)
     avail_model_epochs = [int(x[8:-4]) for x in pth_files if x.startswith('mod')]
     if len(avail_model_epochs):
-        max_model_epoch = max(avail_model_epochs)
+        max_model_epoch = max(avail_model_epochs)  # get the max epoch saved
         model_pth_path = os.path.join(args["output_dir"], f'models_e{max_model_epoch}.pth')
-        epoch_checkpoint = torch.load(model_pth_path, map_location=device)
-        start_epoch = epoch_checkpoint["epochs"] + 1
+        epoch_checkpoint = torch.load(model_pth_path, map_location=device)  # load it
+        start_epoch = epoch_checkpoint["epochs"] + 1  # the starting epoch should be the following epoch
         losses = epoch_checkpoint["losses"]
+
+        # load the last saved models
         im2txt_model.load_state_dict(epoch_checkpoint["im2txt"])
         im2txt_optimizer.load_state_dict(epoch_checkpoint["optimizer_im2txt"])
         txt2im_model.load_state_dict(epoch_checkpoint["txt2im"])
@@ -357,6 +362,8 @@ def load_checkpoint(txt2im_model, im2txt_model, txt2im_optimizer, im2txt_optimiz
         del epoch_checkpoint
         torch.cuda.empty_cache()
 
+    # the txt2im model can continue to be updated (and saved) in the middle of an epoch, so now we search for the
+    # latest saved gstep after the max epoch found previously
     gen_files = [x for x in pth_files if x.startswith('gen')]
     max_gstep = 0
     best_gstep_checkpoint = None
@@ -368,20 +375,24 @@ def load_checkpoint(txt2im_model, im2txt_model, txt2im_optimizer, im2txt_optimiz
     
     del gstep_checkpoint
     torch.cuda.empty_cache()
-    
+
+    # if we found a more recent gstep we now need to load the updated txt2im
     if max_gstep != 0:
         best_gstep_checkpoint = torch.load(os.path.join(args["output_dir"], f'generator_k{max_gstep}.pth'), map_location=device)
 
     if best_gstep_checkpoint is None and len(avail_model_epochs):
         print(f"loaded TXT2IM from {model_pth_path}\n"
               f"Starting on epoch {start_epoch}, k {start_k}")
+        # the TXT2IM model was loaded from the last saved epoch
     elif best_gstep_checkpoint is None:
+        # If we got here someone deleted the saved pth files while the code was running
         raise RuntimeError("No pth files saved. the code shouldn't reach here")
     else:
         txt2im_model.load_state_dict(best_gstep_checkpoint["txt2im"])
         txt2im_optimizer.load_state_dict(best_gstep_checkpoint["optimizer_txt2im"])
         start_k = best_gstep_checkpoint["k"] + 1
         print(f"loaded TXT2IM from {os.path.join(args['output_dir'], f'generator_k{max_gstep}.pth')}\n"
               f"Starting on epoch {start_epoch}, k {start_k}")
+        # the TXT2IM model was successfully loaded from the last saved gstep
 
     return txt2im_model, im2txt_model, txt2im_optimizer, im2txt_optimizer, losses, start_epoch, start_k