diff --git a/.gitignore b/.gitignore index d1e4c693..932e8c6a 100644 --- a/.gitignore +++ b/.gitignore @@ -137,4 +137,6 @@ celerybeat.pid dmypy.json # Pyre type checker -.pyre/ \ No newline at end of file +.pyre/ + +training_outputs/ \ No newline at end of file diff --git a/README.md b/README.md index 2c6f726d..461b4d94 100644 --- a/README.md +++ b/README.md @@ -29,20 +29,7 @@ You can find all scripted/human demo for simulated environments [here](https://d conda create -n aloha python=3.8.10 conda activate aloha - pip install torchvision - pip install torch - pip install pyquaternion - pip install pyyaml - pip install rospkg - pip install pexpect - pip install mujoco==2.3.7 - pip install dm_control==1.0.14 - pip install opencv-python - pip install matplotlib - pip install einops - pip install packaging - pip install h5py - pip install ipython + pip install -r requirements.txt cd act/detr && pip install -e . ### Example Usages diff --git a/detr/experiment_configs/real_env_constants.py b/detr/experiment_configs/real_env_constants.py new file mode 100644 index 00000000..95dad660 --- /dev/null +++ b/detr/experiment_configs/real_env_constants.py @@ -0,0 +1,79 @@ +### Task parameters + +import os + +DATA_DIR = '' +TASK_CONFIGS = { + # this config is only inserting in the middle + 'insert_redness_relief_slanted':{ + 'dataset_dir': os.environ['DATA'] + '/closed_loop_demos/insert_redness_relief_slanted', + 'num_episodes': -1, + 'episode_len': 80, + 'camera_names': ['color_image'], + }, + + # this config is only inserting anywhere + 'insert_redness_relief_slanted_anywhere':{ + 'dataset_dir': [ + os.environ['DATA'] + '/closed_loop_demos/insert_redness_relief_slanted_anywhere', + ], + 'num_episodes': -1, + 'episode_len': 80, + 'camera_names': ['color_image'], + }, + + # this config is only inserting anywhere + 'pick_vial':{ + 'dataset_dir': [ + os.environ['DATA'] + '/closed_loop_demos/pick_place_phenylephrine/', + ], + 'stage_key': '1_pick', + 'num_episodes': -1, + 'episode_len': 80, + 'camera_names': ['color_image'], + }, + + 'place_vial':{ + 'dataset_dir': [ + os.environ['DATA'] + '/closed_loop_demos/pick_place_phenylephrine/', + ], + 'stage_key': '2_place', + 'num_episodes': -1, + 'episode_len': 80, + 'camera_names': ['color_image'], + }, + + 'insert_ibuprofen':{ + 'dataset_dir': [ + os.environ['DATA'] + '/closed_loop_demos/insert_ibuprofen', + ], + 'num_episodes': -1, + 'episode_len': 80, + 'camera_names': ['color_image'], + }, + + 'insert_ibuprofen_zeroqpos':{ + 'dataset_dir': [ + os.environ['DATA'] + '/closed_loop_demos/insert_ibuprofen', + ], + 'num_episodes': -1, + 'episode_len': 80, + 'camera_names': ['color_image'], + 'zero_qpos': True + }, + + 'insert_ibuprofen_rel':{ + 'dataset_dir': [ + os.environ['DATA'] + '/closed_loop_demos/insert_ibuprofen_relactions', + ], + 'num_episodes': -1, + 'episode_len': 80, + 'camera_names': ['color_image'], + 'actiondim':7, + 'relative_actions': True + }, +} + +### ALOHA fixed constants +DT = 0.02 + diff --git a/constants.py b/detr/experiment_configs/sim_constants.py similarity index 100% rename from constants.py rename to detr/experiment_configs/sim_constants.py diff --git a/detr/main.py b/detr/main.py index 3c4a3390..ccc6d2bf 100644 --- a/detr/main.py +++ b/detr/main.py @@ -30,6 +30,11 @@ def get_args_parser(): help="Type of positional embedding to use on top of the image features") parser.add_argument('--camera_names', default=[], type=list, # will be overridden help="A list of camera names") + + parser.add_argument('--statedim', type=int, default=9,# will be overridden + help="statedim") + parser.add_argument('--actiondim', type=int, default=10, # will be overridden + help="actiondim") # * Transformer parser.add_argument('--enc_layers', default=4, type=int, # will be overridden @@ -56,6 +61,7 @@ def get_args_parser(): parser.add_argument('--eval', action='store_true') parser.add_argument('--onscreen_render', action='store_true') parser.add_argument('--ckpt_dir', action='store', type=str, help='ckpt_dir', required=True) + parser.add_argument('--ckpt_name', action='store', type=str, help='ckpt_name', required=False) parser.add_argument('--policy_class', action='store', type=str, help='policy_class, capitalize', required=True) parser.add_argument('--task_name', action='store', type=str, help='task_name', required=True) parser.add_argument('--seed', action='store', type=int, help='seed', required=True) @@ -63,6 +69,9 @@ def get_args_parser(): parser.add_argument('--kl_weight', action='store', type=int, help='KL Weight', required=False) parser.add_argument('--chunk_size', action='store', type=int, help='chunk_size', required=False) parser.add_argument('--temporal_agg', action='store_true') + parser.add_argument('--run_name', action='store', type=str, help='run_name', required=True) + + parser.add_argument('--wandb_mode', type=str, default='online', help='wandb mode') # online, disabled return parser diff --git a/detr/models/backbone.py b/detr/models/backbone.py index f28637ea..a714340c 100644 --- a/detr/models/backbone.py +++ b/detr/models/backbone.py @@ -11,7 +11,8 @@ from torchvision.models._utils import IntermediateLayerGetter from typing import Dict, List -from util.misc import NestedTensor, is_main_process +# from util.misc import NestedTensor, is_main_process +from detr.util.misc import NestedTensor, is_main_process from .position_encoding import build_position_encoding diff --git a/detr/models/detr_vae.py b/detr/models/detr_vae.py index bccfca75..0fd67f4a 100644 --- a/detr/models/detr_vae.py +++ b/detr/models/detr_vae.py @@ -31,9 +31,13 @@ def get_position_angle_vec(position): return torch.FloatTensor(sinusoid_table).unsqueeze(0) +# STATE_DIM = 9 # 3 for xyz + 6 for r6 rotations +# ACTION_DIM = 10 + + class DETRVAE(nn.Module): """ This is the DETR module that performs object detection """ - def __init__(self, backbones, transformer, encoder, state_dim, num_queries, camera_names): + def __init__(self, backbones, transformer, encoder, num_queries, camera_names, statedim, actiondim): """ Initializes the model. Parameters: backbones: torch module of the backbone to be used. See backbone.py @@ -49,16 +53,16 @@ def __init__(self, backbones, transformer, encoder, state_dim, num_queries, came self.transformer = transformer self.encoder = encoder hidden_dim = transformer.d_model - self.action_head = nn.Linear(hidden_dim, state_dim) + self.action_head = nn.Linear(hidden_dim, actiondim) self.is_pad_head = nn.Linear(hidden_dim, 1) self.query_embed = nn.Embedding(num_queries, hidden_dim) if backbones is not None: self.input_proj = nn.Conv2d(backbones[0].num_channels, hidden_dim, kernel_size=1) self.backbones = nn.ModuleList(backbones) - self.input_proj_robot_state = nn.Linear(14, hidden_dim) + self.input_proj_robot_state = nn.Linear(statedim, hidden_dim) else: # input_dim = 14 + 7 # robot_state + env_state - self.input_proj_robot_state = nn.Linear(14, hidden_dim) + self.input_proj_robot_state = nn.Linear(statedim, hidden_dim) self.input_proj_env_state = nn.Linear(7, hidden_dim) self.pos = torch.nn.Embedding(2, hidden_dim) self.backbones = None @@ -66,8 +70,8 @@ def __init__(self, backbones, transformer, encoder, state_dim, num_queries, came # encoder extra parameters self.latent_dim = 32 # final size of latent z # TODO tune self.cls_embed = nn.Embedding(1, hidden_dim) # extra cls token embedding - self.encoder_action_proj = nn.Linear(14, hidden_dim) # project action to embedding - self.encoder_joint_proj = nn.Linear(14, hidden_dim) # project qpos to embedding + self.encoder_action_proj = nn.Linear(actiondim, hidden_dim) # project action to embedding + self.encoder_joint_proj = nn.Linear(statedim, hidden_dim) # project qpos to embedding self.latent_proj = nn.Linear(hidden_dim, self.latent_dim*2) # project hidden state to latent std, var self.register_buffer('pos_table', get_sinusoid_encoding_table(1+1+num_queries, hidden_dim)) # [CLS], qpos, a_seq @@ -82,6 +86,7 @@ def forward(self, qpos, image, env_state, actions=None, is_pad=None): env_state: None actions: batch, seq, action_dim """ + is_training = actions is not None # train or val bs, _ = qpos.shape ### Obtain latent z from action sequence @@ -166,8 +171,8 @@ def __init__(self, backbones, state_dim, camera_names): backbone_down_projs.append(down_proj) self.backbone_down_projs = nn.ModuleList(backbone_down_projs) - mlp_in_dim = 768 * len(backbones) + 14 - self.mlp = mlp(input_dim=mlp_in_dim, hidden_dim=1024, output_dim=14, hidden_depth=2) + mlp_in_dim = 768 * len(backbones) + STATE_DIM + self.mlp = mlp(input_dim=mlp_in_dim, hidden_dim=1024, output_dim=ACTION_DIM, hidden_depth=2) else: raise NotImplementedError @@ -192,7 +197,7 @@ def forward(self, qpos, image, env_state, actions=None): for cam_feature in all_cam_features: flattened_features.append(cam_feature.reshape([bs, -1])) flattened_features = torch.cat(flattened_features, axis=1) # 768 each - features = torch.cat([flattened_features, qpos], axis=1) # qpos: 14 + features = torch.cat([flattened_features, qpos], axis=1) a_hat = self.mlp(features) return a_hat @@ -227,7 +232,6 @@ def build_encoder(args): def build(args): - state_dim = 14 # TODO hardcode # From state # backbone = None # from state for now, no need for conv nets @@ -244,7 +248,8 @@ def build(args): backbones, transformer, encoder, - state_dim=state_dim, + statedim=args.statedim, + actiondim=args.actiondim, num_queries=args.num_queries, camera_names=args.camera_names, ) diff --git a/detr/models/position_encoding.py b/detr/models/position_encoding.py index 209d9171..d5a5af3a 100644 --- a/detr/models/position_encoding.py +++ b/detr/models/position_encoding.py @@ -6,7 +6,8 @@ import torch from torch import nn -from util.misc import NestedTensor +# from util.misc import NestedTensor +from detr.util.misc import NestedTensor import IPython e = IPython.embed diff --git a/detr/util/misc.py b/detr/util/misc.py index dfa9fb5b..1d7697e5 100644 --- a/detr/util/misc.py +++ b/detr/util/misc.py @@ -17,12 +17,29 @@ import torch.distributed as dist from torch import Tensor +import wandb + # needed due to empty tensor bug in pytorch and torchvision 0.5 import torchvision if version.parse(torchvision.__version__) < version.parse('0.7'): from torchvision.ops import _new_empty_tensor from torchvision.ops.misc import _output_size +def wandb_setup(params, save_dir, mode, run_id=None): + os.environ['WANDB_API_KEY'] = '0d98919f1dac74f61fae02938822ece99e40d095' + os.environ['WANDB_USER_EMAIL'] = 'febert@emancro.ai' + os.environ['WANDB_USERNAME'] ='febert1' + wandb_run = wandb.init( + project='closed_loop_policy', + config=params, + name=params['run_name'], + id=run_id, + resume=run_id is not None, + dir=save_dir, + mode=mode + ) + return wandb_run.id + class SmoothedValue(object): """Track a series of values and provide access to smoothed values over a diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 00000000..313a22b0 --- /dev/null +++ b/docker/README.md @@ -0,0 +1,2 @@ +# copy entire folder to the cloud: +gcloud compute scp --recurse act febert@a100-2:/home/febert/code --zone=us-central1-c \ No newline at end of file diff --git a/docker/access_docker.sh b/docker/access_docker.sh new file mode 100755 index 00000000..d5b7b9f7 --- /dev/null +++ b/docker/access_docker.sh @@ -0,0 +1,5 @@ +docker run --gpus all -it --rm --shm-size 8G \ + --volume $CODE/act:/home/app/act \ + --volume $DATA:/home/app/data \ + -e "DATA=/home/app/data/" \ + emancro/act:latest /bin/bash -c "cd /home/app/act/detr && pip install -e . && cd /home/app/act && /bin/bash " diff --git a/docker/build.sh b/docker/build.sh new file mode 100755 index 00000000..5d0803c5 --- /dev/null +++ b/docker/build.sh @@ -0,0 +1,2 @@ +cd .. +docker build -f docker/dockerfile -t emancro/act:latest . diff --git a/docker/dockerfile b/docker/dockerfile new file mode 100644 index 00000000..d34e5dbc --- /dev/null +++ b/docker/dockerfile @@ -0,0 +1,20 @@ +FROM continuumio/miniconda3 +RUN conda create -n aloha python=3.10 + +# Set the working directory +WORKDIR /app +# Copy the requirements file from the local folder to the container +COPY requirements.txt /app/requirements.txt + +ENV DATA /workspace + + +RUN /bin/bash -c "source activate aloha && \ + pip install -r /app/requirements.txt" + +WORKDIR /workspace/act/detr + +RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y + + +CMD ["bash"] diff --git a/download_files.py b/download_files.py new file mode 100644 index 00000000..494edb8b --- /dev/null +++ b/download_files.py @@ -0,0 +1,54 @@ +import os + + +GRAB_ITERATIONS = [ + 'policy_epoch_12000_seed_0.ckpt', + 'policy_epoch_18000_seed_0.ckpt', + 'policy_epoch_5000_seed_0.ckpt' +] + +OTHER_FOLDERS_AND_FILES = [ + 'dataset_stats.pkl' +] + + + +def download_from_gcs(args): + + destination_path = f'/home/user/data/act_training_runs/{args.run_name}' + source_path = f'/mnt/disks/extra_data/data/act_training/training_outputs/{args.run_name}' + + if not args.dry_run: + os.makedirs(destination_path, exist_ok=True) + else: + print('would create', destination_path) + + for file in OTHER_FOLDERS_AND_FILES: + # cmd = 'scp -r ' + args.machine + ':' + source_path + '/' + file + ' ' + destination_path + cmd = 'gcloud compute scp --recurse ' + args.machine + ':' + source_path + '/' + file + ' ' + destination_path + ' --zone=us-central1-c' + if not args.dry_run: + os.system(cmd) + else: + print('would execute', cmd) + + for iteration in GRAB_ITERATIONS: + # cmd = 'scp -r ' + args.machine + ':' + source_path + '/' + iteration + ' ' + destination_path + cmd = 'gcloud compute scp --recurse ' + args.machine + ':' + source_path + '/' + iteration + ' ' + destination_path + ' --zone=us-central1-c' + if not args.dry_run: + os.system(cmd) + else: + print('would execute', cmd) + + + +if __name__ == "__main__": + # example + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--machine', default='a100-2', type=str) + parser.add_argument('--run_name', default='experiment_20240610_205136', type=str, required=True) + parser.add_argument('--dry_run', action='store_true') + + args = parser.parse_args() + + download_from_gcs(args) \ No newline at end of file diff --git a/ee_sim_env.py b/ee_sim_env.py index 01df2337..1feb882d 100644 --- a/ee_sim_env.py +++ b/ee_sim_env.py @@ -2,11 +2,11 @@ import collections import os -from constants import DT, XML_DIR, START_ARM_POSE -from constants import PUPPET_GRIPPER_POSITION_CLOSE -from constants import PUPPET_GRIPPER_POSITION_UNNORMALIZE_FN -from constants import PUPPET_GRIPPER_POSITION_NORMALIZE_FN -from constants import PUPPET_GRIPPER_VELOCITY_NORMALIZE_FN +from detr.experiment_configs.sim_constants import DT, XML_DIR, START_ARM_POSE +from detr.experiment_configs.sim_constants import PUPPET_GRIPPER_POSITION_CLOSE +from detr.experiment_configs.sim_constants import PUPPET_GRIPPER_POSITION_UNNORMALIZE_FN +from detr.experiment_configs.sim_constants import PUPPET_GRIPPER_POSITION_NORMALIZE_FN +from detr.experiment_configs.sim_constants import PUPPET_GRIPPER_VELOCITY_NORMALIZE_FN from utils import sample_box_pose, sample_insertion_pose from dm_control import mujoco diff --git a/imitate_episodes.py b/imitate_episodes.py index 34f9a372..8d6cd405 100644 --- a/imitate_episodes.py +++ b/imitate_episodes.py @@ -8,15 +8,21 @@ from tqdm import tqdm from einops import rearrange -from constants import DT -from constants import PUPPET_GRIPPER_JOINT_OPEN +import copy + +from detr.experiment_configs.sim_constants import DT +from detr.experiment_configs.sim_constants import PUPPET_GRIPPER_JOINT_OPEN from utils import load_data # data functions from utils import sample_box_pose, sample_insertion_pose # robot functions from utils import compute_dict_mean, set_seed, detach_dict # helper functions +from utils import get_single_qpos from policy import ACTPolicy, CNNMLPPolicy from visualize_episodes import save_videos +from detr.util.misc import wandb_setup +import wandb from sim_env import BOX_POSE +from datetime import datetime import IPython e = IPython.embed @@ -26,6 +32,7 @@ def main(args): # command line parameters is_eval = args['eval'] ckpt_dir = args['ckpt_dir'] + policy_class = args['policy_class'] onscreen_render = args['onscreen_render'] task_name = args['task_name'] @@ -33,21 +40,35 @@ def main(args): batch_size_val = args['batch_size'] num_epochs = args['num_epochs'] + if not is_eval: + now = datetime.now() + date_string = now.strftime("%Y-%m-%d-%H-%M-%S") + ckpt_dir = args['chkpt_dir'] = os.path.join(ckpt_dir, date_string + task_name + args['run_name']) + os.makedirs(ckpt_dir) + # get task parameters is_sim = task_name[:4] == 'sim_' if is_sim: - from constants import SIM_TASK_CONFIGS + from detr.experiment_configs.sim_constants import SIM_TASK_CONFIGS task_config = SIM_TASK_CONFIGS[task_name] else: - from aloha_scripts.constants import TASK_CONFIGS + from detr.experiment_configs.real_env_constants import TASK_CONFIGS task_config = TASK_CONFIGS[task_name] dataset_dir = task_config['dataset_dir'] num_episodes = task_config['num_episodes'] episode_len = task_config['episode_len'] camera_names = task_config['camera_names'] + if 'zero_qpos' in task_config: + zero_qpos = task_config['zero_qpos'] + else: + zero_qpos = False + if 'actiondim' in task_config: + actiondim = task_config['actiondim'] + else: + actiondim = 9 # used in earlier tasks # fixed parameters - state_dim = 14 + state_dim = 9 lr_backbone = 1e-5 backbone = 'resnet18' if policy_class == 'ACT': @@ -65,6 +86,7 @@ def main(args): 'dec_layers': dec_layers, 'nheads': nheads, 'camera_names': camera_names, + 'actiondim': actiondim, } elif policy_class == 'CNNMLP': policy_config = {'lr': args['lr'], 'lr_backbone': lr_backbone, 'backbone' : backbone, 'num_queries': 1, @@ -73,6 +95,7 @@ def main(args): raise NotImplementedError config = { + 'run_name': args['run_name'], 'num_epochs': num_epochs, 'ckpt_dir': ckpt_dir, 'episode_len': episode_len, @@ -87,12 +110,14 @@ def main(args): 'camera_names': camera_names, 'real_robot': not is_sim } + if 'relative_actions' in task_config: + config['relative_actions'] = task_config['relative_actions'] if is_eval: - ckpt_names = [f'policy_best.ckpt'] + ckpt_names = [args['ckpt_name']] results = [] for ckpt_name in ckpt_names: - success_rate, avg_return = eval_bc(config, ckpt_name, save_episode=True) + success_rate, avg_return = eval_bc(config, ckpt_name, save_episode=True, zero_qpos=zero_qpos) results.append([ckpt_name, success_rate, avg_return]) for ckpt_name, success_rate, avg_return in results: @@ -100,7 +125,8 @@ def main(args): print() exit() - train_dataloader, val_dataloader, stats, _ = load_data(dataset_dir, num_episodes, camera_names, batch_size_train, batch_size_val) + train_dataloader, val_dataloader, stats, _ = load_data(dataset_dir, num_episodes, camera_names, + batch_size_train, batch_size_val, task_config, zero_qpos=zero_qpos) # save dataset stats if not os.path.isdir(ckpt_dir): @@ -109,6 +135,7 @@ def main(args): with open(stats_path, 'wb') as f: pickle.dump(stats, f) + wandb_setup(config, ckpt_dir, args['wandb_mode']) best_ckpt_info = train_bc(train_dataloader, val_dataloader, config) best_epoch, min_val_loss, best_state_dict = best_ckpt_info @@ -138,17 +165,35 @@ def make_optimizer(policy_class, policy): return optimizer +def pre_position(env, teleop_policy): + print('pre-positioning, press handle button to pre-position robot.Relase Handle Button to stop recording') + teleop_policy.wait_for_start() + print('started') + + done = False + obs = env.reset() + t = 0 + while not done: + action_infos = teleop_policy.act(obs) + if action_infos['button_infos']['A']: + done = True + obs, info = env.step(action_infos, t) + + t += 1 + print('pre-positioning done.') + + def get_image(ts, camera_names): curr_images = [] for cam_name in camera_names: - curr_image = rearrange(ts.observation['images'][cam_name], 'h w c -> c h w') + curr_image = rearrange(ts.images[cam_name], 'h w c -> c h w') curr_images.append(curr_image) curr_image = np.stack(curr_images, axis=0) curr_image = torch.from_numpy(curr_image / 255.0).float().cuda().unsqueeze(0) return curr_image -def eval_bc(config, ckpt_name, save_episode=True): +def eval_bc(config, ckpt_name, save_episode=False, zero_qpos=False): set_seed(1000) ckpt_dir = config['ckpt_dir'] state_dim = config['state_dim'] @@ -179,14 +224,22 @@ def eval_bc(config, ckpt_name, save_episode=True): # load environment if real_robot: - from aloha_scripts.robot_utils import move_grippers # requires aloha - from aloha_scripts.real_env import make_real_env # requires aloha - env = make_real_env(init_node=True) - env_max_reward = 0 + from emancro_base.robot_infra.xarm.visio_motor.xarm_mdp_env import XarmMDP + env = XarmMDP(control_freq=50) + + from emancro_base.robot_infra.oculus_teleop.vr_teleop_policy import VRTeleopPolicy + from emancro_base.robot_infra.transform_publisher.transform_publisher.transform_broadcast import TransformPublisherNodeManager + from oculus_reader.reader import OculusReader + + nodemanager = TransformPublisherNodeManager() + oculus_reader = OculusReader() + teleop_policy = VRTeleopPolicy(node_manager=nodemanager, environment=env, oculus_reader=oculus_reader) + else: from sim_env import make_sim_env env = make_sim_env(task_name) env_max_reward = env.task.max_reward + query_frequency = policy_config['num_queries'] if temporal_agg: @@ -196,17 +249,20 @@ def eval_bc(config, ckpt_name, save_episode=True): max_timesteps = int(max_timesteps * 1) # may increase for real-world tasks num_rollouts = 50 - episode_returns = [] - highest_rewards = [] + + if config['relative_actions']: + action_mode = 'delta_pos_delta_rot' + else: + action_mode = 'delta_pos_abs_rot' + for rollout_id in range(num_rollouts): + + pre_position(env, teleop_policy) + rollout_id += 0 - ### set task - if 'sim_transfer_cube' in task_name: - BOX_POSE[0] = sample_box_pose() # used in sim reset - elif 'sim_insertion' in task_name: - BOX_POSE[0] = np.concatenate(sample_insertion_pose()) # used in sim reset - - ts = env.reset() + + obs = env.reset() + obs = copy.deepcopy(obs) ### onscreen render if onscreen_render: @@ -221,8 +277,9 @@ def eval_bc(config, ckpt_name, save_episode=True): qpos_history = torch.zeros((1, max_timesteps, state_dim)).cuda() image_list = [] # for visualization qpos_list = [] - target_qpos_list = [] + action_list = [] rewards = [] + with torch.inference_mode(): for t in range(max_timesteps): ### update onscreen render and wait for DT @@ -232,16 +289,19 @@ def eval_bc(config, ckpt_name, save_episode=True): plt.pause(DT) ### process previous timestep to get qpos and image_list - obs = ts.observation if 'images' in obs: image_list.append(obs['images']) else: image_list.append({'main': obs['image']}) - qpos_numpy = np.array(obs['qpos']) + + qpos_numpy = get_single_qpos(obs.robot_pose) qpos = pre_process(qpos_numpy) qpos = torch.from_numpy(qpos).float().cuda().unsqueeze(0) + if zero_qpos: + print('zeroing qpos') + qpos = torch.zeros_like(qpos) qpos_history[:, t] = qpos - curr_image = get_image(ts, camera_names) + curr_image = get_image(obs, camera_names) ### query policy if config['policy_class'] == "ACT": @@ -270,47 +330,17 @@ def eval_bc(config, ckpt_name, save_episode=True): target_qpos = action ### step the environment - ts = env.step(target_qpos) + obs, info = env.step(target_qpos, t, action_mode=action_mode) + obs = copy.deepcopy(obs) ### for visualization qpos_list.append(qpos_numpy) - target_qpos_list.append(target_qpos) - rewards.append(ts.reward) + action_list.append(target_qpos) plt.close() - if real_robot: - move_grippers([env.puppet_bot_left, env.puppet_bot_right], [PUPPET_GRIPPER_JOINT_OPEN] * 2, move_time=0.5) # open - pass - rewards = np.array(rewards) - episode_return = np.sum(rewards[rewards!=None]) - episode_returns.append(episode_return) - episode_highest_reward = np.max(rewards) - highest_rewards.append(episode_highest_reward) - print(f'Rollout {rollout_id}\n{episode_return=}, {episode_highest_reward=}, {env_max_reward=}, Success: {episode_highest_reward==env_max_reward}') - - if save_episode: - save_videos(image_list, DT, video_path=os.path.join(ckpt_dir, f'video{rollout_id}.mp4')) - - success_rate = np.mean(np.array(highest_rewards) == env_max_reward) - avg_return = np.mean(episode_returns) - summary_str = f'\nSuccess rate: {success_rate}\nAverage return: {avg_return}\n\n' - for r in range(env_max_reward+1): - more_or_equal_r = (np.array(highest_rewards) >= r).sum() - more_or_equal_r_rate = more_or_equal_r / num_rollouts - summary_str += f'Reward >= {r}: {more_or_equal_r}/{num_rollouts} = {more_or_equal_r_rate*100}%\n' - - print(summary_str) - - # save success rate to txt - result_file_name = 'result_' + ckpt_name.split('.')[0] + '.txt' - with open(os.path.join(ckpt_dir, result_file_name), 'w') as f: - f.write(summary_str) - f.write(repr(episode_returns)) - f.write('\n\n') - f.write(repr(highest_rewards)) - - return success_rate, avg_return + # if save_episode: + # save_videos(image_list, DT, video_path=os.path.join(ckpt_dir, f'video{rollout_id}.mp4')) def forward_pass(data, policy): @@ -339,24 +369,26 @@ def train_bc(train_dataloader, val_dataloader, config): for epoch in tqdm(range(num_epochs)): print(f'\nEpoch {epoch}') # validation - with torch.inference_mode(): - policy.eval() - epoch_dicts = [] - for batch_idx, data in enumerate(val_dataloader): - forward_dict = forward_pass(data, policy) - epoch_dicts.append(forward_dict) - epoch_summary = compute_dict_mean(epoch_dicts) - validation_history.append(epoch_summary) - - epoch_val_loss = epoch_summary['loss'] - if epoch_val_loss < min_val_loss: - min_val_loss = epoch_val_loss - best_ckpt_info = (epoch, min_val_loss, deepcopy(policy.state_dict())) - print(f'Val loss: {epoch_val_loss:.5f}') - summary_string = '' - for k, v in epoch_summary.items(): - summary_string += f'{k}: {v.item():.3f} ' - print(summary_string) + if epoch % 50 == 0: + with torch.inference_mode(): + policy.eval() + epoch_dicts = [] + for batch_idx, data in enumerate(val_dataloader): + forward_dict = forward_pass(data, policy) + epoch_dicts.append(forward_dict) + epoch_summary = compute_dict_mean(epoch_dicts) + validation_history.append(epoch_summary) + + epoch_val_loss = epoch_summary['loss'] + if epoch_val_loss < min_val_loss: + min_val_loss = epoch_val_loss + best_ckpt_info = (epoch, min_val_loss, deepcopy(policy.state_dict())) + print(f'Val loss: {epoch_val_loss:.5f}') + wandb.log({'val_loss': epoch_val_loss}) + summary_string = '' + for k, v in epoch_summary.items(): + summary_string += f'{k}: {v.item():.3f} ' + print(summary_string) # training policy.train() @@ -372,13 +404,15 @@ def train_bc(train_dataloader, val_dataloader, config): epoch_summary = compute_dict_mean(train_history[(batch_idx+1)*epoch:(batch_idx+1)*(epoch+1)]) epoch_train_loss = epoch_summary['loss'] print(f'Train loss: {epoch_train_loss:.5f}') + wandb.log({'train_loss': epoch_train_loss}) summary_string = '' for k, v in epoch_summary.items(): summary_string += f'{k}: {v.item():.3f} ' print(summary_string) - if epoch % 100 == 0: + if epoch % 500 == 0: ckpt_path = os.path.join(ckpt_dir, f'policy_epoch_{epoch}_seed_{seed}.ckpt') + print('saving to ', ckpt_path) torch.save(policy.state_dict(), ckpt_path) plot_history(train_history, validation_history, epoch, ckpt_dir, seed) @@ -386,7 +420,7 @@ def train_bc(train_dataloader, val_dataloader, config): torch.save(policy.state_dict(), ckpt_path) best_epoch, min_val_loss, best_state_dict = best_ckpt_info - ckpt_path = os.path.join(ckpt_dir, f'policy_epoch_{best_epoch}_seed_{seed}.ckpt') + ckpt_path = os.path.join(ckpt_dir, f'best_policy_epoch_{best_epoch}_seed_{seed}.ckpt') torch.save(best_state_dict, ckpt_path) print(f'Training finished:\nSeed {seed}, val loss {min_val_loss:.6f} at epoch {best_epoch}') @@ -417,13 +451,16 @@ def plot_history(train_history, validation_history, num_epochs, ckpt_dir, seed): parser = argparse.ArgumentParser() parser.add_argument('--eval', action='store_true') parser.add_argument('--onscreen_render', action='store_true') + parser.add_argument('--run_name', action='store', type=str, help='run_name', required=True) parser.add_argument('--ckpt_dir', action='store', type=str, help='ckpt_dir', required=True) + parser.add_argument('--ckpt_name', action='store', type=str, help='ckpt_dir', required=False) parser.add_argument('--policy_class', action='store', type=str, help='policy_class, capitalize', required=True) parser.add_argument('--task_name', action='store', type=str, help='task_name', required=True) parser.add_argument('--batch_size', action='store', type=int, help='batch_size', required=True) parser.add_argument('--seed', action='store', type=int, help='seed', required=True) parser.add_argument('--num_epochs', action='store', type=int, help='num_epochs', required=True) parser.add_argument('--lr', action='store', type=float, help='lr', required=True) + # for ACT parser.add_argument('--kl_weight', action='store', type=int, help='KL Weight', required=False) @@ -431,5 +468,7 @@ def plot_history(train_history, validation_history, num_epochs, ckpt_dir, seed): parser.add_argument('--hidden_dim', action='store', type=int, help='hidden_dim', required=False) parser.add_argument('--dim_feedforward', action='store', type=int, help='dim_feedforward', required=False) parser.add_argument('--temporal_agg', action='store_true') + + parser.add_argument('--wandb_mode', type=str, default='online', help='wandb mode') # online, disabled main(vars(parser.parse_args())) diff --git a/record_sim_episodes.py b/record_sim_episodes.py index 253fdea1..1d4e7ca7 100644 --- a/record_sim_episodes.py +++ b/record_sim_episodes.py @@ -5,7 +5,7 @@ import matplotlib.pyplot as plt import h5py -from constants import PUPPET_GRIPPER_POSITION_NORMALIZE_FN, SIM_TASK_CONFIGS +from detr.experiment_configs.sim_constants import PUPPET_GRIPPER_POSITION_NORMALIZE_FN, SIM_TASK_CONFIGS from ee_sim_env import make_ee_sim_env from sim_env import make_sim_env, BOX_POSE from scripted_policy import PickAndTransferPolicy, InsertionPolicy diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..9585ab6d --- /dev/null +++ b/requirements.txt @@ -0,0 +1,15 @@ +torchvision +torch +pyquaternion +pyyaml +rospkg +pexpect +mujoco==2.3.7 +dm_control==1.0.14 +opencv-python +matplotlib +einops +packaging +h5py +ipython +wandb diff --git a/scripted_policy.py b/scripted_policy.py index 4fd8f000..2a9e0c32 100644 --- a/scripted_policy.py +++ b/scripted_policy.py @@ -2,7 +2,7 @@ import matplotlib.pyplot as plt from pyquaternion import Quaternion -from constants import SIM_TASK_CONFIGS +from detr.experiment_configs.sim_constants import SIM_TASK_CONFIGS from ee_sim_env import make_ee_sim_env import IPython diff --git a/sim_env.py b/sim_env.py index b79b935b..3de4a4c9 100644 --- a/sim_env.py +++ b/sim_env.py @@ -6,11 +6,11 @@ from dm_control.rl import control from dm_control.suite import base -from constants import DT, XML_DIR, START_ARM_POSE -from constants import PUPPET_GRIPPER_POSITION_UNNORMALIZE_FN -from constants import MASTER_GRIPPER_POSITION_NORMALIZE_FN -from constants import PUPPET_GRIPPER_POSITION_NORMALIZE_FN -from constants import PUPPET_GRIPPER_VELOCITY_NORMALIZE_FN +from detr.experiment_configs.sim_constants import DT, XML_DIR, START_ARM_POSE +from detr.experiment_configs.sim_constants import PUPPET_GRIPPER_POSITION_UNNORMALIZE_FN +from detr.experiment_configs.sim_constants import MASTER_GRIPPER_POSITION_NORMALIZE_FN +from detr.experiment_configs.sim_constants import PUPPET_GRIPPER_POSITION_NORMALIZE_FN +from detr.experiment_configs.sim_constants import PUPPET_GRIPPER_VELOCITY_NORMALIZE_FN import IPython e = IPython.embed diff --git a/training.sh b/training.sh new file mode 100644 index 00000000..e6c27fee --- /dev/null +++ b/training.sh @@ -0,0 +1,51 @@ +python3 imitate_episodes.py \ + --task_name insert_redness_relief_slanted \ + --ckpt_dir training_outputs \ + --policy_class ACT --kl_weight 10 --chunk_size 40 --hidden_dim 512 --batch_size 32 --dim_feedforward 3200 \ + --num_epochs 5000 --lr 2e-5 \ + --seed 0 + + + +python3 imitate_episodes.py \ + --task_name insert_redness_relief_slanted_anywhere \ + --ckpt_dir training_outputs \ + --policy_class ACT --kl_weight 10 --chunk_size 40 --hidden_dim 512 --batch_size 32 --dim_feedforward 3200 \ + --num_epochs 5000 --lr 2e-5 \ + --seed 0 \ + --run_name anywhere_slanted + + +python3 imitate_episodes.py \ + --task_name pick_vial \ + --ckpt_dir training_outputs \ + --policy_class ACT --kl_weight 10 --chunk_size 40 --hidden_dim 512 --batch_size 32 --dim_feedforward 3200 \ + --num_epochs 20000 --lr 2e-5 \ + --seed 0 \ + --run_name pick_vial_05_10 + + +python3 imitate_episodes.py \ + --task_name insert_ibuprofen \ + --ckpt_dir training_outputs \ + --policy_class ACT --kl_weight 10 --chunk_size 40 --hidden_dim 512 --batch_size 32 --dim_feedforward 3200 \ + --num_epochs 10000 --lr 2e-5 \ + --seed 0 \ + --run_name insert_ibuprofen + + +python3 imitate_episodes.py \ + --task_name insert_ibuprofen_zeroqpos \ + --ckpt_dir training_outputs \ + --policy_class ACT --kl_weight 10 --chunk_size 40 --hidden_dim 512 --batch_size 32 --dim_feedforward 3200 \ + --num_epochs 10000 --lr 2e-5 \ + --seed 0 \ + --run_name insert_ibuprofen_zeroqpos + +python3 imitate_episodes.py \ + --task_name insert_ibuprofen_rel \ + --ckpt_dir $DATA/act_training/training_outputs \ + --policy_class ACT --kl_weight 10 --chunk_size 40 --hidden_dim 512 --batch_size 128 --dim_feedforward 3200 \ + --num_epochs 20000 --lr 2e-5 \ + --seed 0 \ + --run_name insert_ibuprofen_rel \ No newline at end of file diff --git a/utils.py b/utils.py index d90b7824..b35453ef 100644 --- a/utils.py +++ b/utils.py @@ -3,39 +3,67 @@ import os import h5py from torch.utils.data import TensorDataset, DataLoader +from scipy.spatial.transform import Rotation +import glob +import random +import io +from PIL import Image + +def euler_to_r6(euler, degrees=False): + rot_mat = Rotation.from_euler("xyz", euler, degrees=degrees).as_matrix() + a1, a2 = rot_mat[0], rot_mat[1] + return np.concatenate((a1, a2)).astype(np.float32) import IPython e = IPython.embed +def get_qpos(root): + # goes together with the function below! Do not change separately! + xyz = root['observations']['robot_poses'][:, :3] # get the xyz + euler = root['observations']['robot_poses'][:, 3:] # get the euler angles and convert to r6 + r6s = np.array([euler_to_r6(degrees, degrees=True) for degrees in euler]) + return np.concatenate([xyz, r6s], axis=1) + +def get_single_qpos(pose): + # goes together with the function above! Do not change separately! + xyz = pose[:3] + joint_angles = pose[3:] + r6s = euler_to_r6(joint_angles, degrees=True) + return np.concatenate([xyz, r6s], axis=0) + + class EpisodicDataset(torch.utils.data.Dataset): - def __init__(self, episode_ids, dataset_dir, camera_names, norm_stats): + def __init__(self, episode_filenames, dataset_dir, camera_names, norm_stats, slice_episode_len, zero_qpos=False): super(EpisodicDataset).__init__() - self.episode_ids = episode_ids + self.episode_filenames = episode_filenames self.dataset_dir = dataset_dir self.camera_names = camera_names self.norm_stats = norm_stats self.is_sim = None + self.slice_episode_len = slice_episode_len + self.zero_qpos = zero_qpos self.__getitem__(0) # initialize self.is_sim def __len__(self): - return len(self.episode_ids) + return len(self.episode_filenames) def __getitem__(self, index): sample_full_episode = False # hardcode + # print('getting index', index) - episode_id = self.episode_ids[index] - dataset_path = os.path.join(self.dataset_dir, f'episode_{episode_id}.hdf5') + dataset_path = self.episode_filenames[index] with h5py.File(dataset_path, 'r') as root: is_sim = root.attrs['sim'] - original_action_shape = root['/action'].shape - episode_len = original_action_shape[0] + episode_len, action_dim = root['/action'].shape + if sample_full_episode: start_ts = 0 else: start_ts = np.random.choice(episode_len) + # get observation at start_ts only - qpos = root['/observations/qpos'][start_ts] - qvel = root['/observations/qvel'][start_ts] + qpos = get_qpos(root)[start_ts] + image_dict = dict() for cam_name in self.camera_names: image_dict[cam_name] = root[f'/observations/images/{cam_name}'][start_ts] @@ -48,15 +76,23 @@ def __getitem__(self, index): action_len = episode_len - max(0, start_ts - 1) # hack, to make timesteps more aligned self.is_sim = is_sim - padded_action = np.zeros(original_action_shape, dtype=np.float32) + padded_action = np.zeros([self.slice_episode_len, action_dim], dtype=np.float32) padded_action[:action_len] = action - is_pad = np.zeros(episode_len) + is_pad = np.zeros(self.slice_episode_len) is_pad[action_len:] = 1 # new axis for different cameras all_cam_images = [] for cam_name in self.camera_names: - all_cam_images.append(image_dict[cam_name]) + image = image_dict[cam_name] + if isinstance(image, np.bytes_): # if image is compressed JPEG string + jpg_bytes = io.BytesIO(image) + image = Image.open(jpg_bytes) + image = np.array(image) + all_cam_images.append(image) + else: + all_cam_images.append(image) + all_cam_images = np.stack(all_cam_images, axis=0) # construct observations @@ -71,57 +107,96 @@ def __getitem__(self, index): # normalize image and change dtype to float image_data = image_data / 255.0 action_data = (action_data - self.norm_stats["action_mean"]) / self.norm_stats["action_std"] - qpos_data = (qpos_data - self.norm_stats["qpos_mean"]) / self.norm_stats["qpos_std"] + if self.zero_qpos: + qpos_data = torch.zeros_like(qpos_data) + else: + qpos_data = (qpos_data - self.norm_stats["qpos_mean"]) / self.norm_stats["qpos_std"] + + # print('image_data', image_data.shape) + # print('qpos_data', qpos_data.shape) + # print('action_data', action_data.shape) + # print('is_pad', is_pad.shape) return image_data, qpos_data, action_data, is_pad -def get_norm_stats(dataset_dir, num_episodes): +def get_norm_stats(all_hdf5_files): all_qpos_data = [] all_action_data = [] - for episode_idx in range(num_episodes): - dataset_path = os.path.join(dataset_dir, f'episode_{episode_idx}.hdf5') - with h5py.File(dataset_path, 'r') as root: - qpos = root['/observations/qpos'][()] - qvel = root['/observations/qvel'][()] - action = root['/action'][()] + episode_lens = [] + for file in all_hdf5_files: + # print("opening ", file) + try: + with h5py.File(file, 'r') as root: + qpos = get_qpos(root) + action = root['/action'][()] + except: + print("error opening ", file) + continue all_qpos_data.append(torch.from_numpy(qpos)) + episode_lens.append(action.shape[0]) all_action_data.append(torch.from_numpy(action)) - all_qpos_data = torch.stack(all_qpos_data) - all_action_data = torch.stack(all_action_data) + all_qpos_data = torch.cat(all_qpos_data) + all_action_data = torch.cat(all_action_data) all_action_data = all_action_data # normalize action data - action_mean = all_action_data.mean(dim=[0, 1], keepdim=True) - action_std = all_action_data.std(dim=[0, 1], keepdim=True) + action_mean = all_action_data.mean(dim=[0], keepdim=True) + action_std = all_action_data.std(dim=[0], keepdim=True) action_std = torch.clip(action_std, 1e-2, np.inf) # clipping # normalize qpos data - qpos_mean = all_qpos_data.mean(dim=[0, 1], keepdim=True) - qpos_std = all_qpos_data.std(dim=[0, 1], keepdim=True) + qpos_mean = all_qpos_data.mean(dim=[0], keepdim=True) + qpos_std = all_qpos_data.std(dim=[0], keepdim=True) qpos_std = torch.clip(qpos_std, 1e-2, np.inf) # clipping stats = {"action_mean": action_mean.numpy().squeeze(), "action_std": action_std.numpy().squeeze(), "qpos_mean": qpos_mean.numpy().squeeze(), "qpos_std": qpos_std.numpy().squeeze(), - "example_qpos": qpos} + "example_qpos": qpos} - return stats + return stats, np.max(np.array(episode_lens)) -def load_data(dataset_dir, num_episodes, camera_names, batch_size_train, batch_size_val): +def load_data(dataset_dir, max_num_episodes, camera_names, batch_size_train, batch_size_val, task_config, zero_qpos=False): print(f'\nData from: {dataset_dir}\n') + + if 'stage_key' in task_config: # for multi stage tasks + stage_key = task_config['stage_key'] + search_pattern = f'**/*/*{stage_key}.hdf5' + else: + search_pattern = '**/*/*.hdf5' + + if isinstance(dataset_dir, list): + all_hdf5 = [] + for d in dataset_dir: + new_files = glob.glob(os.path.join(d, search_pattern), recursive=True) + assert len(new_files) > 0, f"no hdf5 files found in {d}" + all_hdf5 += new_files + else: + all_hdf5 = glob.glob(os.path.join(dataset_dir, search_pattern), recursive=True) + + print(f"found {len(all_hdf5)} hdf5 files") + random.shuffle(all_hdf5) + + if max_num_episodes == -1: + num_episodes = len(all_hdf5) + else: + num_episodes = min(num_episodes, len(all_hdf5)) + # obtain train test split - train_ratio = 0.8 - shuffled_indices = np.random.permutation(num_episodes) - train_indices = shuffled_indices[:int(train_ratio * num_episodes)] - val_indices = shuffled_indices[int(train_ratio * num_episodes):] + train_ratio = 0.9 + train_files = all_hdf5[:int(train_ratio * num_episodes)] + val_files = all_hdf5[int(train_ratio * num_episodes):] # obtain normalization stats for qpos and action - norm_stats = get_norm_stats(dataset_dir, num_episodes) + norm_stats, max_episode_len = get_norm_stats(all_hdf5) # construct dataset and dataloader - train_dataset = EpisodicDataset(train_indices, dataset_dir, camera_names, norm_stats) - val_dataset = EpisodicDataset(val_indices, dataset_dir, camera_names, norm_stats) + train_dataset = EpisodicDataset(train_files, dataset_dir, camera_names, norm_stats, max_episode_len, zero_qpos=zero_qpos) + val_dataset = EpisodicDataset(val_files, dataset_dir, camera_names, norm_stats, max_episode_len, zero_qpos=zero_qpos) + print('train_dataset len:', len(train_dataset)) + print('val_dataset len:', len(val_dataset)) + train_dataloader = DataLoader(train_dataset, batch_size=batch_size_train, shuffle=True, pin_memory=True, num_workers=1, prefetch_factor=1) val_dataloader = DataLoader(val_dataset, batch_size=batch_size_val, shuffle=True, pin_memory=True, num_workers=1, prefetch_factor=1) diff --git a/validate.sh b/validate.sh new file mode 100644 index 00000000..4ecfa19b --- /dev/null +++ b/validate.sh @@ -0,0 +1,110 @@ +python3 imitate_episodes.py \ + --task_name insert_redness_relief_slanted \ + --ckpt_dir training_outputs/2024-04-05-10-42-07insert_redness_relief_slantedtest1 \ + --policy_class ACT --kl_weight 10 --chunk_size 40 --hidden_dim 512 --batch_size 32 --dim_feedforward 3200 \ + --num_epochs 5000 --lr 2e-5 \ + --seed 0 \ + --eval \ + --run_name slanted + + + +python3 imitate_episodes.py \ + --task_name insert_redness_relief_slanted_anywhere \ + --ckpt_dir training_outputs/2024-04-26-10-53-06insert_redness_relief_slanted_anywhereanywhere_slanted \ + --policy_class ACT --kl_weight 10 --chunk_size 40 --hidden_dim 512 --batch_size 32 --dim_feedforward 3200 \ + --num_epochs 5000 --lr 2e-5 \ + --seed 0 \ + --eval \ + --run_name anywhere_Vincent + +python3 imitate_episodes.py \ + --task_name insert_redness_relief_slanted_anywhere \ + --ckpt_dir training_outputs/2024-05-06-18-16-32pick_vialpick_vial \ + --policy_class ACT --kl_weight 10 --chunk_size 40 --hidden_dim 512 --batch_size 32 --dim_feedforward 3200 \ + --num_epochs 5000 --lr 2e-5 \ + --seed 0 \ + --eval \ + --run_name pick_vialpick_vial + + + + /home/user/exp_checkpoints/closed_loop_demos_weight/2024-05-10-22-45-56place_vialplace_vial_05_10_bs128_lr8e-5 + + +# placing works okay, fails when tolerances are tight +python3 imitate_episodes.py \ + --task_name insert_redness_relief_slanted_anywhere \ + --ckpt_dir /home/user/exp_checkpoints/closed_loop_demos_weight/2024-05-10-22-45-56place_vialplace_vial_05_10_bs128_lr8e-5 \ + --ckpt_name best_policy_epoch_2500_seed_0.ckpt \ + --policy_class ACT --kl_weight 10 --chunk_size 40 --hidden_dim 512 --batch_size 32 --dim_feedforward 3200 \ + --num_epochs 5000 --lr 2e-5 \ + --seed 0 \ + --eval \ + --run_name eval + + +python3 imitate_episodes.py \ + --task_name insert_redness_relief_slanted_anywhere \ + --ckpt_dir /home/user/exp_checkpoints/closed_loop_demos_weight/2024-05-10-22-46-12pick_vialpick_vial_05_10_bs128_lr8e-5 \ + --ckpt_name best_policy_epoch_19700_seed_0.ckpt \ + --policy_class ACT --kl_weight 10 --chunk_size 40 --hidden_dim 512 --batch_size 32 --dim_feedforward 3200 \ + --num_epochs 5000 --lr 2e-5 \ + --seed 0 \ + --eval \ + --run_name eval + + +python3 imitate_episodes.py \ + --task_name insert_redness_relief_slanted_anywhere \ + --ckpt_dir /home/user/code/act/training_outputs/2024-05-17-22-42-30insert_ibuprofeninsert_ibuprofen \ + --ckpt_name policy_epoch_5500_seed_0.ckpt \ + --policy_class ACT --kl_weight 10 --chunk_size 40 --hidden_dim 512 --batch_size 32 --dim_feedforward 3200 \ + --num_epochs 5000 --lr 2e-5 \ + --seed 0 \ + --eval \ + --run_name eval + +python3 imitate_episodes.py \ + --task_name insert_ibuprofen_zeroqpos \ + --ckpt_dir /home/user/code/act/training_outputs/2024-06-06-17-51-39insert_ibuprofen_zeroqposinsert_ibuprofen_zeroqpos \ + --ckpt_name policy_epoch_7500_seed_0.ckpt \ + --policy_class ACT --kl_weight 10 --chunk_size 40 --hidden_dim 512 --batch_size 32 --dim_feedforward 3200 \ + --num_epochs 5000 --lr 2e-5 \ + --seed 0 \ + --eval \ + --run_name eval + + +#trained with bsize 128 +python3 imitate_episodes.py \ + --task_name insert_ibuprofen_zeroqpos \ + --ckpt_dir /home/user/code/act/training_outputs/2024-06-09-00-46-29insert_ibuprofen_zeroqposrun_1 \ + --ckpt_name policy_epoch_10000_seed_0.ckpt \ + --policy_class ACT --kl_weight 10 --chunk_size 40 --hidden_dim 512 --batch_size 32 --dim_feedforward 3200 \ + --num_epochs 5000 --lr 2e-5 \ + --seed 0 \ + --eval \ + --run_name eval + +#trained with bsize 128, for much longer +python3 imitate_episodes.py \ + --task_name insert_ibuprofen_zeroqpos \ + --ckpt_dir /home/user/code/act/training_outputs/2024-06-10-21-57-10insert_ibuprofen_zeroqposb128_100kepoch \ + --ckpt_name policy_epoch_24500_seed_0.ckpt \ + --policy_class ACT --kl_weight 10 --chunk_size 40 --hidden_dim 512 --batch_size 32 --dim_feedforward 3200 \ + --num_epochs 5000 --lr 2e-5 \ + --seed 0 \ + --eval \ + --run_name eval + +# test on relative actions: +python3 imitate_episodes.py \ + --task_name insert_ibuprofen_rel \ + --ckpt_dir /home/user/data/act_training_runs/2024-06-24-18-01-33insert_ibuprofen_relinsert_ibuprofen_rel \ + --ckpt_name policy_epoch_18000_seed_0.ckpt \ + --policy_class ACT --kl_weight 10 --chunk_size 40 --hidden_dim 512 --batch_size 32 --dim_feedforward 3200 \ + --num_epochs 5000 --lr 2e-5 \ + --seed 0 \ + --eval \ + --run_name eval \ No newline at end of file diff --git a/visualize_episodes.py b/visualize_episodes.py index 4e55e471..d541d1ad 100644 --- a/visualize_episodes.py +++ b/visualize_episodes.py @@ -5,7 +5,7 @@ import argparse import matplotlib.pyplot as plt -from constants import DT +from detr.experiment_configs.sim_constants import DT import IPython e = IPython.embed