mlcommons
diff --git a/‎algoperf/checkpoint_utils.py‎
Lines changed: 38 additions & 35 deletions b/‎algoperf/checkpoint_utils.py‎
Lines changed: 38 additions & 35 deletions
diff --git a/‎algoperf/pytorch_utils.py‎
Lines changed: 5 additions & 1 deletion b/‎algoperf/pytorch_utils.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎algoperf/workloads/lm/input_pipeline.py‎
Lines changed: 30 additions & 24 deletions b/‎algoperf/workloads/lm/input_pipeline.py‎
Lines changed: 30 additions & 24 deletions
diff --git a/‎algoperf/workloads/lm/lm_jax/models.py‎
Lines changed: 0 additions & 20 deletions b/‎algoperf/workloads/lm/lm_jax/models.py‎
Lines changed: 0 additions & 20 deletions
@@ -31,49 +31,52 @@
   int,
 ]
 
+
 class BoolHandler(NumpyHandler):
+  """
+  An implementation of TypeHandler for np.bool_ that inherits from NumpyHandler.
+  It works by treating the scalar as a 0-dimensional array.
+  """
+
+  def typestr(self) -> str:
+    """Unique string identifier for this handler."""
+    return 'np.bool_'
+
+  async def serialize(
+    self,
+    values: Sequence[np.bool_],
+    infos: Sequence,
+    args: Optional[Sequence[ocp.SaveArgs]] = None,
+  ):
     """
-    An implementation of TypeHandler for np.bool_ that inherits from NumpyHandler.
-    It works by treating the scalar as a 0-dimensional array.
+    Serializes a sequence of np.bool_ scalars by first converting them
+    to 0-dim numpy arrays and then calling the parent NumpyHandler.
     """
+    # Convert each scalar np.bool_ to a 0-dimensional np.ndarray
+    array_values = [np.asarray(v, dtype=np.bool_) for v in values]
+    # Use the parent class's robust serialization logic
+    return await super().serialize(array_values, infos, args)
+
+  async def deserialize(
+    self,
+    infos: Sequence,
+    args: Optional[Sequence[ocp.RestoreArgs]] = None,
+  ) -> Sequence[np.bool_]:
+    """
+    Deserializes into a sequence of np.bool_ scalars by calling the
+    parent handler and then converting the resulting 0-dim arrays.
+    """
+    # Parent deserialize will return a sequence of 0-dimensional np.ndarray
+    results = await super().deserialize(infos, args)
 
-    def typestr(self) -> str:
-        """Unique string identifier for this handler."""
-        return 'np.bool_'
+    # Convert each 0-d array back to an np.bool_ scalar using .item()
+    scalar_results = [np.bool_(r.item()) for r in results]
+    return scalar_results
 
-    async def serialize(
-        self,
-        values: Sequence[np.bool_],
-        infos: Sequence,
-        args: Optional[Sequence[ocp.SaveArgs]] = None,
-    ):
-        """
-        Serializes a sequence of np.bool_ scalars by first converting them
-        to 0-dim numpy arrays and then calling the parent NumpyHandler.
-        """
-        # Convert each scalar np.bool_ to a 0-dimensional np.ndarray
-        array_values = [np.asarray(v, dtype=np.bool_) for v in values]
-        # Use the parent class's robust serialization logic
-        return await super().serialize(array_values, infos, args)
-
-    async def deserialize(
-        self,
-        infos: Sequence,
-        args: Optional[Sequence[ocp.RestoreArgs]] = None,
-    ) -> Sequence[np.bool_]:
-        """
-        Deserializes into a sequence of np.bool_ scalars by calling the
-        parent handler and then converting the resulting 0-dim arrays.
-        """
-        # Parent deserialize will return a sequence of 0-dimensional np.ndarray
-        results = await super().deserialize(infos, args)
-
-        # Convert each 0-d array back to an np.bool_ scalar using .item()
-        scalar_results = [np.bool_(r.item()) for r in results]
-        return scalar_results
 
 ocp.type_handlers.register_type_handler(np.bool_, BoolHandler(), override=True)
 
+
 def maybe_restore_checkpoint(
   framework: str,
   optimizer_state: spec.OptimizerState,
 
@@ -27,7 +27,9 @@ def pytorch_setup() -> Tuple[bool, int, torch.device, int]:
   return use_pytorch_ddp, rank, device, n_gpus
 
 
-def pytorch_init(use_pytorch_ddp: bool, rank: int, profiler: Profiler, limit_tf_threads = True) -> None:
+def pytorch_init(
+  use_pytorch_ddp: bool, rank: int, profiler: Profiler, limit_tf_threads=True
+) -> None:
   # Make sure no GPU memory is preallocated to Jax.
   os.environ['XLA_PYTHON_CLIENT_PREALLOCATE'] = 'false'
   # Only use CPU for Jax to avoid memory issues.
@@ -47,8 +49,10 @@ def pytorch_init(use_pytorch_ddp: bool, rank: int, profiler: Profiler, limit_tf_
     profiler.set_local_rank(rank)
     # Only log once (for local rank == 0).
     if rank != 0:
+
       def logging_pass(*args):
         pass
+
       logging.info = logging_pass
     # Initialize the process group.
     dist.init_process_group('nccl')
 
@@ -50,14 +50,15 @@ def batch_with_padding(
   return padded_batched_dataset
 
 
-def get_data_iter(data_rng: jax.random.PRNGKey,
+def get_data_iter(
+  data_rng: jax.random.PRNGKey,
   split: str,
   data_dir: str,
   batch_size: int,
-  num_batches: Optional[int] = None,):
-
+  num_batches: Optional[int] = None,
+):
   ds = get_lm_dataset(data_rng, split, data_dir, batch_size, num_batches)
-  
+
   it = map(
     functools.partial(
       data_utils.shard_and_maybe_pad_np, global_batch_size=batch_size
@@ -67,6 +68,7 @@ def get_data_iter(data_rng: jax.random.PRNGKey,
 
   return iter(it)
 
+
 def get_lm_dataset(
   data_rng: jax.random.PRNGKey,
   split: str,
@@ -78,7 +80,7 @@ def get_lm_dataset(
   if split not in TFDS_SPLIT_NAME:
     raise NotImplementedError
 
-  shuffle_seed = jax.random.randint(data_rng, (), -2**31, 2**31-1)
+  shuffle_seed = jax.random.randint(data_rng, (), -(2**31), 2**31 - 1)
 
   data_dir = os.path.join(data_dir, TFDS_SPLIT_NAME[split])
   tokens_ds = tf.data.Dataset.load(data_dir)
@@ -98,19 +100,17 @@ def get_lm_dataset(
     num_parallel_calls=AUTOTUNE,
   )
   if split == 'train':
-    ds = sequences_ds.shuffle(
-      SHUFFLE_BUFFER_SIZE, seed=shuffle_seed
-    )
-    ds = ds.batch(
-      batch_size, drop_remainder=False
-    )
+    ds = sequences_ds.shuffle(SHUFFLE_BUFFER_SIZE, seed=shuffle_seed)
+    ds = ds.batch(batch_size, drop_remainder=False)
     ds = ds.take(num_batches) if num_batches is not None else ds
     ds = ds.repeat()
-    ds = ds.map(lambda x: {
-         'inputs': x['inputs'],
-         'targets': x['targets'],
-         'weights': None,
-     })
+    ds = ds.map(
+      lambda x: {
+        'inputs': x['inputs'],
+        'targets': x['targets'],
+        'weights': None,
+      }
+    )
     ds = ds.prefetch(tf.data.experimental.AUTOTUNE)
   elif split == 'eval_train':
     ds = batch_with_padding(
@@ -123,10 +123,13 @@ def get_lm_dataset(
     )
     ds = ds.take(num_batches) if num_batches is not None else ds
     ds = ds.repeat()
-    ds = ds.map(lambda x: {'inputs': x['inputs'],
-                          'targets': x['targets'],
-                          'weights': tf.where(tf.equal(x['inputs'], PAD_ID), 0.0, 1.0)
-                          })
+    ds = ds.map(
+      lambda x: {
+        'inputs': x['inputs'],
+        'targets': x['targets'],
+        'weights': tf.where(tf.equal(x['inputs'], PAD_ID), 0.0, 1.0),
+      }
+    )
     ds = ds.prefetch(tf.data.experimental.AUTOTUNE)
   elif split == 'validation':
     ds = batch_with_padding(
@@ -139,9 +142,12 @@ def get_lm_dataset(
     )
     ds = ds.take(num_batches) if num_batches is not None else ds
     ds = ds.repeat()
-    ds = ds.map(lambda x: {'inputs': x['inputs'],
-                          'targets': x['targets'],
-                          'weights': tf.where(tf.equal(x['inputs'], PAD_ID), 0.0, 1.0)
-                          })
+    ds = ds.map(
+      lambda x: {
+        'inputs': x['inputs'],
+        'targets': x['targets'],
+        'weights': tf.where(tf.equal(x['inputs'], PAD_ID), 0.0, 1.0),
+      }
+    )
     ds = ds.prefetch(tf.data.experimental.AUTOTUNE)
   return ds