Convert local dataset to LeRobot format error based on Dataset v3

### System Info

```Shell
- lerobot version: 0.3.4
- Platform: Linux-5.15.0-94-generic-x86_64-with-glibc2.35
- Python version: 3.10.12
- Huggingface Hub version: 0.35.1
- Datasets version: 4.1.1
- Numpy version: 2.2.6
- PyTorch version: 2.7.1+cu126
- Is PyTorch built with CUDA support?: True
- Cuda version: 12.6
- GPU model: NVIDIA GeForce RTX 4090
- Using GPU in script?: <fill in>
```

### Information

- [ ] One of the scripts in the examples/ folder of LeRobot
- [x] My own task or dataset (give details below)

### Reproduction

First of all, I appreciate your excellent work.   

I have a script for dataset conversion. It can be converted normally when it is based on datasetv2.1, but it will report errors after upgrading to datasetv3.0. My code is as follows:
```python

"""Convert local dataset to LeRobot format."""
import os
import shutil
from pathlib import Path

import h5py
import numpy as np
from PIL import Image

from lerobot.datasets.lerobot_dataset import LeRobotDataset

features = {
    "observation.images.hand_right": {
        "dtype": "video",
        "shape": (480, 848, 3),
        "names": ["height", "width", "channel"],
    },
    "observation.state": {
        "dtype": "float32",
        "shape": (8,),
        "names": {
            "motors": [
                "state_0",
                "state_1",
                "state_2",
                "state_3",
                "state_4",
                "state_5",
                "state_6",
                "state_7",
            ],
        },
    },
    "action": {
        "dtype": "float32",
        "shape": (8,),
        "names": {
            "motors": [
                "action_0",
                "action_1",
                "action_2",
                "action_3",
                "action_4",
                "action_5",
                "action_6",
                "action_7",
            ],
        },
    },
}


def get_image(episode_path, episode_id, frame_index):
    """Load image for a specific episode and frame."""
    abs_path = os.path.join(episode_path, episode_id)
    img_folder = os.path.join(abs_path, "camera")

    img_result = {}
    # hand_left_img_path = os.path.join(img_folder, str(frame_index), "hand_left.jpg")
    hand_right_img_path = os.path.join(img_folder, str(frame_index), "hand_right.jpg")
    # head_img_path = os.path.join(img_folder, str(frame_index), "head.jpg")

    # img_result["observation.images.head"] = Image.open(head_img_path)
    img_result["observation.images.hand_right"] = Image.open(hand_right_img_path)
    # img_result["observation.images.hand_left"] = Image.open(hand_left_img_path)
    return img_result


def load_local_dataset(episode_path, episode_id):
    """Load and process local dataset episode data."""
    abs_path = os.path.join(episode_path, episode_id)
    h5_file_path = os.path.join(abs_path, "aligned_joints.h5")

    h5_result = {}
    with h5py.File(h5_file_path, "r") as f:
        # 获取右臂关节位置 (7-13) 和右臂夹爪位置 (1)
        state_joint_position = np.array(f["state/joint/position"])[:, 7:14]  # (T, 7)
        state_effector_position = np.array(f["state/effector/position"])[
            :, 1:2
        ]  # (T, 1)
        h5_result["observation.state"] = np.concatenate(
            [state_joint_position, state_effector_position], axis=-1
        )  # (T, 8)

        action_joint_position = np.array(f["action/joint/position"])[:, 7:14]  # (T, 7)
        action_effector_position = np.array(f["action/effector/position"])[
            :, 1:2
        ]  # (T, 1)
        h5_result["action"] = np.concatenate(
            [action_joint_position, action_effector_position], axis=-1
        )  # (T, 8)

        num_frames = h5_result["observation.state"].shape[0]

        assert (
            num_frames
            == h5_result["action"].shape[0]
            == len(os.listdir(os.path.join(episode_path, episode_id, "camera")))
        )
        print(f"episode_id={episode_id} has {num_frames} frames")

    frames = []
    for i in range(num_frames):
        frame = {}
        img_result = get_image(episode_path, episode_id, i)
        frame.update(img_result)
        frame["observation.state"] = h5_result["observation.state"][i]
        frame["action"] = h5_result["action"][i]
        frames.append(frame)
    return frames


def create_lerobot_dataset(episode_path, output_path="./datasets/agibot_dp_joints/", fps=30):
    """Create LeRobot dataset from local episode data."""
    output_path = Path(output_path)
    if output_path.exists():
        shutil.rmtree(output_path)
    dataset = LeRobotDataset.create(
        repo_id="agibot",
        root=output_path,
        fps=fps,
        robot_type="a2d",
        features=features,
        use_videos=True,
    )
    print(dataset)

    episode_list = os.listdir(episode_path)
    for episode_id in episode_list:
        frames = load_local_dataset(episode_path, episode_id)
        for frame in frames:
            frame["task"] = "task_instruction"
            dataset.add_frame(frame)
        dataset.save_episode()


if __name__ == "__main__":
    create_lerobot_dataset(episode_path="/mnt/public/yekehe/processed_joints/")

```

When I run this code snippet, I get the following error when save_episode from second episode:
```
Traceback (most recent call last):
  File "/mnt/hukongtao/codebase/lerobot_1009/convert.py", line 138, in <module>
    create_lerobot_dataset(episode_path="/mnt/public/yekehe/processed_joints/")
  File "/mnt/hukongtao/codebase/lerobot_1009/convert.py", line 134, in create_lerobot_dataset
    dataset.save_episode()
  File "/mnt/hukongtao/codebase/lerobot_1009/src/lerobot/datasets/lerobot_dataset.py", line 1006, in save_episode
    ep_metadata = self._save_episode_data(episode_buffer)
  File "/mnt/hukongtao/codebase/lerobot_1009/src/lerobot/datasets/lerobot_dataset.py", line 1144, in _save_episode_data
    shutil.rmtree(cached_dir)
  File "/usr/lib/python3.10/shutil.py", line 725, in rmtree
    _rmtree_safe_fd(fd, path, onerror)
  File "/usr/lib/python3.10/shutil.py", line 658, in _rmtree_safe_fd
    _rmtree_safe_fd(dirfd, fullname, onerror)
  File "/usr/lib/python3.10/shutil.py", line 664, in _rmtree_safe_fd
    onerror(os.rmdir, fullname, sys.exc_info())
  File "/usr/lib/python3.10/shutil.py", line 662, in _rmtree_safe_fd
    os.rmdir(entry.name, dir_fd=topfd)
OSError: [Errno 39] Directory not empty: '9c460aabd2aa27d1496e5e38d2060760561f0ac2cd6a110134eefa5b3f153b8d'

```
By analyzing the error log, I know that the error comes from the following code snippet：
https://github.com/huggingface/lerobot/blob/829d2d1ad9bc0acc20fbf64f22027c615055385e/src/lerobot/datasets/lerobot_dataset.py#L322-L326
https://github.com/huggingface/lerobot/blob/829d2d1ad9bc0acc20fbf64f22027c615055385e/src/lerobot/datasets/lerobot_dataset.py#L1140-L1144
So I commented out these two pieces of code (I think it has no effect on the final result)   


Afterwards I executed my conversion script again，But  I get an error again when save_episode from forth episode:
```
[concat @ 0x55ce5a2834c0] DTS 238592 < 1395000 out of order
[mp4 @ 0x55ce5a279480] Application provided invalid, non monotonically increasing dts to muxer in stream 0: 1395000 >= 238592
Traceback (most recent call last):
  File "/mnt/hukongtao/codebase/lerobot_1009/convert.py", line 138, in <module>
    create_lerobot_dataset(episode_path="/mnt/public/yekehe/processed_joints/")
  File "/mnt/hukongtao/codebase/lerobot_1009/convert.py", line 134, in create_lerobot_dataset
    dataset.save_episode()
  File "/mnt/hukongtao/codebase/lerobot_1009/src/lerobot/datasets/lerobot_dataset.py", line 1012, in save_episode
    ep_metadata.update(self._save_episode_video(video_key, episode_index))
  File "/mnt/hukongtao/codebase/lerobot_1009/src/lerobot/datasets/lerobot_dataset.py", line 1197, in _save_episode_video
    concatenate_video_files(
  File "/mnt/hukongtao/codebase/lerobot_1009/src/lerobot/datasets/video_utils.py", line 474, in concatenate_video_files
    output_container.mux(packet)
  File "av/container/output.py", line 375, in av.container.output.OutputContainer.mux
  File "av/container/output.py", line 399, in av.container.output.OutputContainer.mux_one
  File "av/container/core.pyx", line 302, in av.container.core.Container.err_check
  File "av/error.pyx", line 424, in av.error.err_check
av.error.ValueError: [Errno 22] Invalid argument: '/mnt/hukongtao/codebase/lerobot_1009/tmp0x85p87w.mp4'
[mp4 @ 0x55ce5a279480] Starting second pass: moving the moov atom to the beginning of the file

```
I don't know how to fix this bug this time.


In summary, I encountered two problems with the data format conversion, and these problems only occur when there is more than one episode. Can you help me find the possible cause?

### Expected behavior

Conversion successful

	if self.episodes is not None:
	# Remove the episodes cache directory, necessary to avoid cache bloat
	cached_dir = get_hf_dataset_cache_dir(self.episodes)
	if cached_dir is not None:
	shutil.rmtree(cached_dir)

	if self.hf_dataset is not None:
	# Remove hf dataset cache directory, necessary to avoid cache bloat
	cached_dir = get_hf_dataset_cache_dir(self.hf_dataset)
	if cached_dir is not None:
	shutil.rmtree(cached_dir)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Convert local dataset to LeRobot format error based on Dataset v3 #2158

System Info

Information

Reproduction

Expected behavior

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Convert local dataset to LeRobot format error based on Dataset v3 #2158

Description

System Info

Information

Reproduction

Expected behavior

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions