|
| 1 | +#!/usr/bin/env python |
| 2 | + |
| 3 | +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. |
| 4 | +# |
| 5 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | +# you may not use this file except in compliance with the License. |
| 7 | +# You may obtain a copy of the License at |
| 8 | +# |
| 9 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +# |
| 11 | +# Unless required by applicable law or agreed to in writing, software |
| 12 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | +# See the License for the specific language governing permissions and |
| 15 | +# limitations under the License. |
| 16 | + |
| 17 | +""" |
| 18 | +Example script demonstrating dataset tools utilities. |
| 19 | +
|
| 20 | +This script shows how to: |
| 21 | +1. Delete episodes from a dataset |
| 22 | +2. Split a dataset into train/val sets |
| 23 | +3. Add/remove features |
| 24 | +4. Merge datasets |
| 25 | +
|
| 26 | +Usage: |
| 27 | + python examples/dataset/use_dataset_tools.py |
| 28 | +""" |
| 29 | + |
| 30 | +import numpy as np |
| 31 | + |
| 32 | +from lerobot.datasets.dataset_tools import ( |
| 33 | + add_feature, |
| 34 | + delete_episodes, |
| 35 | + merge_datasets, |
| 36 | + remove_feature, |
| 37 | + split_dataset, |
| 38 | +) |
| 39 | +from lerobot.datasets.lerobot_dataset import LeRobotDataset |
| 40 | + |
| 41 | + |
| 42 | +def main(): |
| 43 | + dataset = LeRobotDataset("lerobot/pusht") |
| 44 | + |
| 45 | + print(f"Original dataset: {dataset.meta.total_episodes} episodes, {dataset.meta.total_frames} frames") |
| 46 | + print(f"Features: {list(dataset.meta.features.keys())}") |
| 47 | + |
| 48 | + print("\n1. Deleting episodes 0 and 2...") |
| 49 | + filtered_dataset = delete_episodes(dataset, episode_indices=[0, 2], repo_id="lerobot/pusht_filtered") |
| 50 | + print(f"Filtered dataset: {filtered_dataset.meta.total_episodes} episodes") |
| 51 | + |
| 52 | + print("\n2. Splitting dataset into train/val...") |
| 53 | + splits = split_dataset( |
| 54 | + dataset, |
| 55 | + splits={"train": 0.8, "val": 0.2}, |
| 56 | + ) |
| 57 | + print(f"Train split: {splits['train'].meta.total_episodes} episodes") |
| 58 | + print(f"Val split: {splits['val'].meta.total_episodes} episodes") |
| 59 | + |
| 60 | + print("\n3. Adding a reward feature...") |
| 61 | + |
| 62 | + reward_values = np.random.randn(dataset.meta.total_frames).astype(np.float32) |
| 63 | + dataset_with_reward = add_feature( |
| 64 | + dataset, |
| 65 | + feature_name="reward", |
| 66 | + feature_values=reward_values, |
| 67 | + feature_info={ |
| 68 | + "dtype": "float32", |
| 69 | + "shape": (1,), |
| 70 | + "names": None, |
| 71 | + }, |
| 72 | + repo_id="lerobot/pusht_with_reward", |
| 73 | + ) |
| 74 | + |
| 75 | + def compute_success(row_dict, episode_index, frame_index): |
| 76 | + episode_length = 10 |
| 77 | + return float(frame_index >= episode_length - 10) |
| 78 | + |
| 79 | + dataset_with_success = add_feature( |
| 80 | + dataset_with_reward, |
| 81 | + feature_name="success", |
| 82 | + feature_values=compute_success, |
| 83 | + feature_info={ |
| 84 | + "dtype": "float32", |
| 85 | + "shape": (1,), |
| 86 | + "names": None, |
| 87 | + }, |
| 88 | + repo_id="lerobot/pusht_with_reward_and_success", |
| 89 | + ) |
| 90 | + |
| 91 | + print(f"New features: {list(dataset_with_success.meta.features.keys())}") |
| 92 | + |
| 93 | + print("\n4. Removing the success feature...") |
| 94 | + dataset_cleaned = remove_feature( |
| 95 | + dataset_with_success, feature_names="success", repo_id="lerobot/pusht_cleaned" |
| 96 | + ) |
| 97 | + print(f"Features after removal: {list(dataset_cleaned.meta.features.keys())}") |
| 98 | + |
| 99 | + print("\n5. Merging train and val splits back together...") |
| 100 | + merged = merge_datasets([splits["train"], splits["val"]], output_repo_id="lerobot/pusht_merged") |
| 101 | + print(f"Merged dataset: {merged.meta.total_episodes} episodes") |
| 102 | + |
| 103 | + print("\n6. Complex workflow example...") |
| 104 | + |
| 105 | + if len(dataset.meta.camera_keys) > 1: |
| 106 | + camera_to_remove = dataset.meta.camera_keys[0] |
| 107 | + print(f"Removing camera: {camera_to_remove}") |
| 108 | + dataset_no_cam = remove_feature( |
| 109 | + dataset, feature_names=camera_to_remove, repo_id="pusht_no_first_camera" |
| 110 | + ) |
| 111 | + print(f"Remaining cameras: {dataset_no_cam.meta.camera_keys}") |
| 112 | + |
| 113 | + print("\nDone! Check ~/.cache/huggingface/lerobot/ for the created datasets.") |
| 114 | + |
| 115 | + |
| 116 | +if __name__ == "__main__": |
| 117 | + main() |
0 commit comments