-
Notifications
You must be signed in to change notification settings - Fork 3k
Description
System Info
- lerobot version: 0.3.4
- Platform: Linux-5.15.0-94-generic-x86_64-with-glibc2.35
- Python version: 3.10.12
- Huggingface Hub version: 0.35.1
- Datasets version: 4.1.1
- Numpy version: 2.2.6
- PyTorch version: 2.7.1+cu126
- Is PyTorch built with CUDA support?: True
- Cuda version: 12.6
- GPU model: NVIDIA GeForce RTX 4090
- Using GPU in script?: <fill in>Information
- One of the scripts in the examples/ folder of LeRobot
- My own task or dataset (give details below)
Reproduction
First of all, I appreciate your excellent work.
I have a script for dataset conversion. It can be converted normally when it is based on datasetv2.1, but it will report errors after upgrading to datasetv3.0. My code is as follows:
"""Convert local dataset to LeRobot format."""
import os
import shutil
from pathlib import Path
import h5py
import numpy as np
from PIL import Image
from lerobot.datasets.lerobot_dataset import LeRobotDataset
features = {
"observation.images.hand_right": {
"dtype": "video",
"shape": (480, 848, 3),
"names": ["height", "width", "channel"],
},
"observation.state": {
"dtype": "float32",
"shape": (8,),
"names": {
"motors": [
"state_0",
"state_1",
"state_2",
"state_3",
"state_4",
"state_5",
"state_6",
"state_7",
],
},
},
"action": {
"dtype": "float32",
"shape": (8,),
"names": {
"motors": [
"action_0",
"action_1",
"action_2",
"action_3",
"action_4",
"action_5",
"action_6",
"action_7",
],
},
},
}
def get_image(episode_path, episode_id, frame_index):
"""Load image for a specific episode and frame."""
abs_path = os.path.join(episode_path, episode_id)
img_folder = os.path.join(abs_path, "camera")
img_result = {}
# hand_left_img_path = os.path.join(img_folder, str(frame_index), "hand_left.jpg")
hand_right_img_path = os.path.join(img_folder, str(frame_index), "hand_right.jpg")
# head_img_path = os.path.join(img_folder, str(frame_index), "head.jpg")
# img_result["observation.images.head"] = Image.open(head_img_path)
img_result["observation.images.hand_right"] = Image.open(hand_right_img_path)
# img_result["observation.images.hand_left"] = Image.open(hand_left_img_path)
return img_result
def load_local_dataset(episode_path, episode_id):
"""Load and process local dataset episode data."""
abs_path = os.path.join(episode_path, episode_id)
h5_file_path = os.path.join(abs_path, "aligned_joints.h5")
h5_result = {}
with h5py.File(h5_file_path, "r") as f:
# 获取右臂关节位置 (7-13) 和右臂夹爪位置 (1)
state_joint_position = np.array(f["state/joint/position"])[:, 7:14] # (T, 7)
state_effector_position = np.array(f["state/effector/position"])[
:, 1:2
] # (T, 1)
h5_result["observation.state"] = np.concatenate(
[state_joint_position, state_effector_position], axis=-1
) # (T, 8)
action_joint_position = np.array(f["action/joint/position"])[:, 7:14] # (T, 7)
action_effector_position = np.array(f["action/effector/position"])[
:, 1:2
] # (T, 1)
h5_result["action"] = np.concatenate(
[action_joint_position, action_effector_position], axis=-1
) # (T, 8)
num_frames = h5_result["observation.state"].shape[0]
assert (
num_frames
== h5_result["action"].shape[0]
== len(os.listdir(os.path.join(episode_path, episode_id, "camera")))
)
print(f"episode_id={episode_id} has {num_frames} frames")
frames = []
for i in range(num_frames):
frame = {}
img_result = get_image(episode_path, episode_id, i)
frame.update(img_result)
frame["observation.state"] = h5_result["observation.state"][i]
frame["action"] = h5_result["action"][i]
frames.append(frame)
return frames
def create_lerobot_dataset(episode_path, output_path="./datasets/agibot_dp_joints/", fps=30):
"""Create LeRobot dataset from local episode data."""
output_path = Path(output_path)
if output_path.exists():
shutil.rmtree(output_path)
dataset = LeRobotDataset.create(
repo_id="agibot",
root=output_path,
fps=fps,
robot_type="a2d",
features=features,
use_videos=True,
)
print(dataset)
episode_list = os.listdir(episode_path)
for episode_id in episode_list:
frames = load_local_dataset(episode_path, episode_id)
for frame in frames:
frame["task"] = "task_instruction"
dataset.add_frame(frame)
dataset.save_episode()
if __name__ == "__main__":
create_lerobot_dataset(episode_path="/mnt/public/yekehe/processed_joints/")When I run this code snippet, I get the following error when save_episode from second episode:
Traceback (most recent call last):
File "/mnt/hukongtao/codebase/lerobot_1009/convert.py", line 138, in <module>
create_lerobot_dataset(episode_path="/mnt/public/yekehe/processed_joints/")
File "/mnt/hukongtao/codebase/lerobot_1009/convert.py", line 134, in create_lerobot_dataset
dataset.save_episode()
File "/mnt/hukongtao/codebase/lerobot_1009/src/lerobot/datasets/lerobot_dataset.py", line 1006, in save_episode
ep_metadata = self._save_episode_data(episode_buffer)
File "/mnt/hukongtao/codebase/lerobot_1009/src/lerobot/datasets/lerobot_dataset.py", line 1144, in _save_episode_data
shutil.rmtree(cached_dir)
File "/usr/lib/python3.10/shutil.py", line 725, in rmtree
_rmtree_safe_fd(fd, path, onerror)
File "/usr/lib/python3.10/shutil.py", line 658, in _rmtree_safe_fd
_rmtree_safe_fd(dirfd, fullname, onerror)
File "/usr/lib/python3.10/shutil.py", line 664, in _rmtree_safe_fd
onerror(os.rmdir, fullname, sys.exc_info())
File "/usr/lib/python3.10/shutil.py", line 662, in _rmtree_safe_fd
os.rmdir(entry.name, dir_fd=topfd)
OSError: [Errno 39] Directory not empty: '9c460aabd2aa27d1496e5e38d2060760561f0ac2cd6a110134eefa5b3f153b8d'
By analyzing the error log, I know that the error comes from the following code snippet:
lerobot/src/lerobot/datasets/lerobot_dataset.py
Lines 322 to 326 in 829d2d1
| if self.episodes is not None: | |
| # Remove the episodes cache directory, necessary to avoid cache bloat | |
| cached_dir = get_hf_dataset_cache_dir(self.episodes) | |
| if cached_dir is not None: | |
| shutil.rmtree(cached_dir) |
lerobot/src/lerobot/datasets/lerobot_dataset.py
Lines 1140 to 1144 in 829d2d1
| if self.hf_dataset is not None: | |
| # Remove hf dataset cache directory, necessary to avoid cache bloat | |
| cached_dir = get_hf_dataset_cache_dir(self.hf_dataset) | |
| if cached_dir is not None: | |
| shutil.rmtree(cached_dir) |
So I commented out these two pieces of code (I think it has no effect on the final result)
Afterwards I executed my conversion script again,But I get an error again when save_episode from forth episode:
[concat @ 0x55ce5a2834c0] DTS 238592 < 1395000 out of order
[mp4 @ 0x55ce5a279480] Application provided invalid, non monotonically increasing dts to muxer in stream 0: 1395000 >= 238592
Traceback (most recent call last):
File "/mnt/hukongtao/codebase/lerobot_1009/convert.py", line 138, in <module>
create_lerobot_dataset(episode_path="/mnt/public/yekehe/processed_joints/")
File "/mnt/hukongtao/codebase/lerobot_1009/convert.py", line 134, in create_lerobot_dataset
dataset.save_episode()
File "/mnt/hukongtao/codebase/lerobot_1009/src/lerobot/datasets/lerobot_dataset.py", line 1012, in save_episode
ep_metadata.update(self._save_episode_video(video_key, episode_index))
File "/mnt/hukongtao/codebase/lerobot_1009/src/lerobot/datasets/lerobot_dataset.py", line 1197, in _save_episode_video
concatenate_video_files(
File "/mnt/hukongtao/codebase/lerobot_1009/src/lerobot/datasets/video_utils.py", line 474, in concatenate_video_files
output_container.mux(packet)
File "av/container/output.py", line 375, in av.container.output.OutputContainer.mux
File "av/container/output.py", line 399, in av.container.output.OutputContainer.mux_one
File "av/container/core.pyx", line 302, in av.container.core.Container.err_check
File "av/error.pyx", line 424, in av.error.err_check
av.error.ValueError: [Errno 22] Invalid argument: '/mnt/hukongtao/codebase/lerobot_1009/tmp0x85p87w.mp4'
[mp4 @ 0x55ce5a279480] Starting second pass: moving the moov atom to the beginning of the file
I don't know how to fix this bug this time.
In summary, I encountered two problems with the data format conversion, and these problems only occur when there is more than one episode. Can you help me find the possible cause?
Expected behavior
Conversion successful