diff --git a/executors/accelerate/src/hypha/accelerate_executor/dataset.py b/executors/accelerate/src/hypha/accelerate_executor/dataset.py index 79fa87eb..0c78a282 100644 --- a/executors/accelerate/src/hypha/accelerate_executor/dataset.py +++ b/executors/accelerate/src/hypha/accelerate_executor/dataset.py @@ -97,9 +97,10 @@ def __iter__(self): # type: ignore[no-untyped-def] while cursor + self.batch_size <= num_new: end = cursor + self.batch_size - # Yield a CLEAN, CONTIGUOUS copy - # This prevents sending a "View" of the whole file to the worker queue - batch = {k: v[cursor:end].contiguous() for k, v in processed.items()} + # Yield a clean, INDEPENDENT copy using .clone() + # This prevents sending a "View" of the underlying raw_bytes buffer + # which might be gc'ed when the loop iterates. + batch = {k: v[cursor:end].clone() for k, v in processed.items()} yield batch cursor = end