Allow non-strict weight loading

fynnsu · fynnsu · commit 38fc772de217 · 2025-10-23T21:35:28.000Z
Signed-off-by: Fynn Schmitt-Ulms &lt;fschmitt@redhat.com&gt;
diff --git a/src/speculators/train/checkpointer.py b/src/speculators/train/checkpointer.py
@@ -84,7 +84,8 @@ def load_model_state_dict(self, model: PreTrainedModel):
         full_state_dict = load_safetensors_state_dict(
             self.model_path(self.previous_epoch), "cuda:0"
         )
-        model.load_state_dict(full_state_dict)
+        # Note: `strict=False` because we don't load the verifier weights
+        model.load_state_dict(full_state_dict, strict=False)
 
     def load_optimizer_state_dict(
         self,
@@ -110,10 +111,13 @@ def load_model_state_dict(self, model: PreTrainedModel):
         full_state_dict = load_safetensors_state_dict(
             self.model_path(self.previous_epoch), "cpu"
         )
+        # Note: `strict=False` because we don't load the verifier weights
         set_model_state_dict(
             model,
             full_state_dict,  # type: ignore[arg-type]
-            options=StateDictOptions(full_state_dict=True, broadcast_from_rank0=True),
+            options=StateDictOptions(
+                full_state_dict=True, broadcast_from_rank0=True, strict=False
+            ),
         )
         dist.barrier()