nvidia-cosmos · jashshah999 · Feb 25, 2026
diff --git a/cosmos_transfer2/_src/predict2/checkpointer/dcp.py b/cosmos_transfer2/_src/predict2/checkpointer/dcp.py
@@ -244,7 +244,9 @@ def __init__(self, model: Union[nn.Module, List[nn.Module]], load_ema_to_reg: bo
                 f"ModelWrapper only supports DiffusionModel when load_ema_to_reg is True, but got {type(model)}"
             )
 
-    def state_dict(self, mapping_keys: dict[str, str] = {}) -> Dict[str, Any]:
+    def state_dict(self, mapping_keys: dict[str, str] | None = None) -> Dict[str, Any]:
+        if mapping_keys is None:
+            mapping_keys = {}
         _state_dict = {k: v for sd in map(get_model_state_dict, self.model) for k, v in sd.items()}
         if self.load_ema_to_reg:
             assert not self.model[0].config.ema.enabled, (

diff --git a/cosmos_transfer2/_src/predict2/text_encoders/reason1.py b/cosmos_transfer2/_src/predict2/text_encoders/reason1.py
@@ -231,10 +231,12 @@ def _forward(
     MODIFICATIONS: adding the hidden states to the output.
     """
 
-    def forward(self, tokens, data_batch={}, start_pos: int = 0) -> torch.Tensor:
+    def forward(self, tokens, data_batch=None, start_pos: int = 0) -> torch.Tensor:
         """
         The training step of the model, including the loss computation.
         """
+        if data_batch is None:
+            data_batch = {}
         assert "pixel_values" not in data_batch, "pixel_values should not be in data_batch, use images instead"
         pixel_values = data_batch.get("images", None)
         image_grid_thw = data_batch.get("image_grid_thw", None)

diff --git a/cosmos_transfer2/_src/reason1/models/vlm_base.py b/cosmos_transfer2/_src/reason1/models/vlm_base.py
@@ -414,7 +414,7 @@ def training_step(
     def build_model(self, model_config):
         raise NotImplementedError
 
-    def forward(self, tokens, data_batch={}, start_pos: int = 0) -> torch.Tensor:
+    def forward(self, tokens, data_batch=None, start_pos: int = 0) -> torch.Tensor:
         """
         The forward pass of the model.
         Returns:

diff --git a/cosmos_transfer2/_src/reason1/models/vlm_qwen.py b/cosmos_transfer2/_src/reason1/models/vlm_qwen.py
@@ -370,10 +370,12 @@ def _forward(
             logits = DTensor.from_local(logits, device_mesh=self.cp_mesh, placements=[Shard(1)]).full_tensor()
         return logits
 
-    def forward(self, tokens, data_batch={}, start_pos: int = 0) -> torch.Tensor:
+    def forward(self, tokens, data_batch=None, start_pos: int = 0) -> torch.Tensor:
         """
         The training step of the model, including the loss computation.
         """
+        if data_batch is None:
+            data_batch = {}
         assert "pixel_values" not in data_batch, "pixel_values should not be in data_batch, use images instead"
         pixel_values = data_batch.get("images", None)
         image_grid_thw = data_batch.get("image_grid_thw", None)

diff --git a/cosmos_transfer2/_src/reason1/models/vlm_qwen_omni.py b/cosmos_transfer2/_src/reason1/models/vlm_qwen_omni.py
@@ -268,19 +268,19 @@ def _forward(
     MODIFICATIONS: adding the hidden states to the output.
     """
 
-    def forward(self, tokens, data_batch={}, start_pos: int = 0) -> torch.Tensor:
+    def forward(self, tokens, data_batch=None, start_pos: int = 0) -> torch.Tensor:
         """
         The training step of the model, including the loss computation.
         """
+        if data_batch is None:
+            data_batch = {}
         assert "pixel_values" not in data_batch, "pixel_values should not be in data_batch, use images instead"
         pixel_values = data_batch.get("images", None)
         image_grid_thw = data_batch.get("image_grid_thw", None)
         pixel_values_videos = data_batch.get("videos", None)
         video_grid_thw = data_batch.get("video_grid_thw", None)
         attention_mask = data_batch.get("padding_mask", None)
 
-        attention_mask = data_batch.get("padding_mask", None)
-
         if image_grid_thw is not None:
             assert len(image_grid_thw) == 1, "Only batch=1 is supported for now, due to `get_rope_index`"
             image_grid_thw = image_grid_thw[0]  # 1, N_img, 3 -> N_img, 3

diff --git a/cosmos_transfer2/inference.py b/cosmos_transfer2/inference.py
@@ -198,7 +198,8 @@ def _generate_sample(self, sample: InferenceArguments, output_dir: Path, sample_
 
         if self.device_rank == 0:
             output_dir.mkdir(parents=True, exist_ok=True)
-            open(f"{output_path}.json", "w").write(sample.model_dump_json())
+            with open(f"{output_path}.json", "w") as f:
+                f.write(sample.model_dump_json())
             log.info(f"Saved arguments to {output_path}.json")
 
             with self.benchmark_timer("text_guardrail"):

diff --git a/cosmos_transfer2/multiview.py b/cosmos_transfer2/multiview.py
@@ -155,7 +155,8 @@ def _generate_sample(self, sample: MultiviewInferenceArguments, output_dir: Path
 
         if self.rank0:
             output_dir.mkdir(parents=True, exist_ok=True)
-            open(f"{output_path}.json", "w").write(sample.model_dump_json())
+            with open(f"{output_path}.json", "w") as f:
+                f.write(sample.model_dump_json())
             log.info(f"Saved arguments to {output_path}.json")
 
         # setup the control and input videos dict

diff --git a/packages/cosmos-gradio/cosmos_gradio/model_ipc/command_ipc.py b/packages/cosmos-gradio/cosmos_gradio/model_ipc/command_ipc.py
@@ -81,11 +81,11 @@ def wait_for_command(self, rank: int) -> CommandData:
 
 
 class WorkerException(Exception):
-    def __init__(self, rank, status, result_json: dict[str, Any] = {}):
+    def __init__(self, rank, status, result_json: dict[str, Any] | None = None):
         super().__init__("worker exception")
         self.rank = rank
         self.status = status
-        self.results = result_json
+        self.results = result_json if result_json is not None else {}
 
     def __str__(self):
         rank = self.rank
@@ -135,14 +135,16 @@ def cleanup(self):
                 if os.path.exists(file_path):
                     os.remove(file_path)
 
-    def signal_status(self, rank: int, status: str, request_id: int, result: dict[str, Any] = {}) -> None:
+    def signal_status(self, rank: int, status: str, request_id: int, result: dict[str, Any] | None = None) -> None:
         """signal individual worker status per rank
 
         Args:
             rank (int): The rank of the worker
             status (str): The status of the worker is either "success" or an error string
             results_json (dict[str, Any]): The result json of the worker/model. Model can place arbitrary data here.
         """
+        if result is None:
+            result = {}
         status_file = f"/tmp/worker_{rank}_status.json"
 
         status_data = StatusData(rank=rank, status=status, request_id=request_id, result=result)