From 2677cbe8c0c71ea6bf9687f5d05fa5a9814075b2 Mon Sep 17 00:00:00 2001
From: ShahRutav <rutavms@utexas.edu>
Date: Sat, 23 Dec 2023 20:30:13 +0530
Subject: [PATCH 1/5] fixed env names with latest robohive

---
 rlhive/envs.py | 39 +++++++++++++++++++--------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/rlhive/envs.py b/rlhive/envs.py
index ab67bf2e..74b0a106 100644
--- a/rlhive/envs.py
+++ b/rlhive/envs.py
@@ -64,26 +64,25 @@ def register_kitchen_envs():
     print("RLHive:> Registering Kitchen Envs")
 
     env_list = [
-        "kitchen_knob1_off-v3",
-        "kitchen_knob1_on-v3",
-        "kitchen_knob2_off-v3",
-        "kitchen_knob2_on-v3",
-        "kitchen_knob3_off-v3",
-        "kitchen_knob3_on-v3",
-        "kitchen_knob4_off-v3",
-        "kitchen_knob4_on-v3",
-        "kitchen_light_off-v3",
-        "kitchen_light_on-v3",
-        "kitchen_sdoor_close-v3",
-        "kitchen_sdoor_open-v3",
-        "kitchen_ldoor_close-v3",
-        "kitchen_ldoor_open-v3",
-        "kitchen_rdoor_close-v3",
-        "kitchen_rdoor_open-v3",
-        "kitchen_micro_close-v3",
-        "kitchen_micro_open-v3",
-        "FK1_RelaxFixed-v4",
-        # "kitchen_close-v3",
+        "FK1_Knob1OffRandom-v4",
+        "FK1_Knob1OnRandom-v4",
+        "FK1_Knob2OffRandom-v4",
+        "FK1_Knob2OnRandom-v4",
+        "FK1_Knob3OffRandom-v4",
+        "FK1_Knob3OnRandom-v4",
+        "FK1_Knob4OffRandom-v4",
+        "FK1_Knob4OnRandom-v4",
+        "FK1_LightOffRandom-v4",
+        "FK1_LightOnRandom-v4",
+        "FK1_SdoorCloseRandom-v4",
+        "FK1_SdoorOpenRandom-v4",
+        "FK1_LdoorCloseRandom-v4",
+        "FK1_LdoorOpenRandom-v4",
+        "FK1_RdoorCloseRandom-v4",
+        "FK1_RdoorOpenRandom-v4",
+        "FK1_MicroOpenRandom-v4",
+        "FK1_MicroCloseRandom-v4",
+        "FK1_RelaxRandom-v4",
     ]
 
     obs_keys_wt = {

From b695e91956607f1794a4dee280b01fb749a90146 Mon Sep 17 00:00:00 2001
From: ShahRutav <rutavms@utexas.edu>
Date: Fri, 12 Jan 2024 12:18:30 +0530
Subject: [PATCH 2/5] removed examples from get_started

moving examples to readme from get_started
---
 GET_STARTED.md | 123 +------------------------------------------------
 1 file changed, 2 insertions(+), 121 deletions(-)

diff --git a/GET_STARTED.md b/GET_STARTED.md
index 9eb0aedf..63ebc547 100644
--- a/GET_STARTED.md
+++ b/GET_STARTED.md
@@ -26,8 +26,9 @@ You can run these two commands to check that installation was successful:
 ```shell
 python -c "import robohive"
 MUJOCO_GL=egl sim_backend=MUJOCO python -c """
+import robohive
 from torchrl.envs import RoboHiveEnv
-env_name = 'visual_franka_slide_random-v3'
+env_name = 'FrankaReachFixed-v0'
 base_env = RoboHiveEnv(env_name)
 print(base_env.rollout(3))
 
@@ -36,123 +37,3 @@ from torchrl.envs.utils import check_env_specs
 check_env_specs(base_env)
 """
 ```
-
-## Build your environment (and collector)
-
-Once you have installed the libraries and the sanity checks run, you can start using the envs.
-Here's a step-by-step example of how to create an env, pass the output through R3M and create a data collector.
-For more info, check the [torchrl environments doc](https://pytorch.org/rl/reference/envs.html).
-
-```python
-from torchrl.envs import RoboHiveEnv
-from torchrl.envs import ParallelEnv, TransformedEnv, R3MTransform
-import torch
-
-from torchrl.collectors.collectors import SyncDataCollector, MultiaSyncDataCollector, RandomPolicy
-# make sure your ParallelEnv is inside the `if __name__ == "__main__":` condition, otherwise you'll
-# be creating an infinite tree of subprocesses
-if __name__ == "__main__":
-    device = torch.device("cpu") # could be 'cuda:0'
-    env_name = 'visual_franka_slide_random-v3'
-    base_env = ParallelEnv(4, lambda: RoboHiveEnv(env_name, device=device))
-    # build a transformed env with the R3M transform. The transform will be applied on a batch of data.
-    # You can append other transforms by doing `env.append_transform(...)` if needed
-    env = TransformedEnv(base_env, R3MTransform('resnet50', in_keys=["pixels"], download=True))
-    assert env.device == device
-    # example of a rollout
-    print(env.rollout(3))
-
-    # a simple, single-process data collector
-    collector = SyncDataCollector(env, policy=RandomPolicy(env.action_spec), total_frames=1_000_000, frames_per_batch=200, init_random_frames=200, )
-    for data in collector:
-        print(data)
-
-    # async multi-proc data collector
-    collector = MultiaSyncDataCollector([env, env], policy=RandomPolicy(env.action_spec), total_frames=1_000_000, frames_per_batch=200, init_random_frames=200, )
-    for data in collector:
-        print(data)
-
-```
-
-## Designing experiments and logging values
-
-TorchRL provides a series of wrappers around common loggers (tensorboard, mlflow, wandb etc).
-We generally default to wandb.
-Here are the details on how to set up your logger: wandb can work in one of two
-modes: `online`, where you need an account and the machine you're running your experiment on must be
-connected to the cloud, and `offline` where the logs are stored locally.
-The latter is more general and easier to collect, hence we suggest you use this mode instead.
-To configure and use your logger using TorchRL, procede as follows (notice that
-using the plain wandb API is very similar to this, TorchRL's conveniance just relies in the
-interchangeability with other loggers):
-
-```python
-import argparse
-import os
-
-from torchrl.record.loggers import WandbLogger
-import torch
-
-parser = argparse.ArgumentParser()
-
-parser.add_argument("--total_frames", default=300, type=int)
-parser.add_argument("--training_steps", default=3, type=int)
-parser.add_argument("--wandb_exp_name", default="a2c")
-parser.add_argument("--wandb_save_dir", default="./mylogs")
-parser.add_argument("--wandb_project", default="rlhive")
-parser.add_argument("--wandb_mode", default="offline",
-                    choices=["online", "offline"])
-
-if __name__ == "__main__":
-    args = parser.parse_args()
-    training_steps = args.training_steps
-    if args.wandb_mode == "offline":
-        # This will be integrated in torchrl
-        dest_dir = args.wandb_save_dir
-        os.makedirs(dest_dir, exist_ok=True)
-    logger = WandbLogger(
-        exp_name=args.wandb_exp_name,
-        save_dir=dest_dir,
-        project=args.wandb_project,
-        mode=args.wandb_mode,
-    )
-
-    # we collect 3 frames in each batch
-    collector = (torch.randn(3, 4, 0) for _ in range(args.total_frames // 3))
-    total_frames = 0
-    # main loop: collection of batches
-    for batch in collector:
-        for step in range(training_steps):
-            pass
-        total_frames += batch.shape[0]
-        # We log according to the frames, which we believe is the less subject to experiment
-        # hyperparameters
-        logger.log_scalar("loss_value", torch.randn([]).item(),
-                          step=total_frames)
-        # one can log videos too! But custom steps do not work as expected :(
-        video = torch.randint(255, (10, 11, 3, 64, 64))  # 10 videos of 11 frames, 64x64 pixels
-        logger.log_video("demo", video)
-
-```
-
-
-This script will save your logs in `./mylogs`. Don't worry too much about `project` or `entity`, which can be [overwritten
-at upload time](https://docs.wandb.ai/ref/cli/wandb-sync):
-
-Once we'll have collected these logs, we will upload them to a wandb account using `wandb sync path/to/log --entity someone --project something`.
-
-## What to log
-
-In general, experiments should log the following items:
-- dense reward (train and test)
-- sparse reward (train and test)
-- success perc (train and test)
-- video: after every 1M runs or so, a test run should be performed. A video recorder should be appended
-  to the test env to log the behaviour.
-- number of training steps: since our "x"-axis will be the number of frames collected, keeping track of the
-  training steps will help us interpolate one with the other.
-- For behavioural cloning we should log the number of epochs instead.
-
-## A more concrete example
-
-TODO

From cf0c7abfbcd107388cbdb70d9b6786468cec1a6e Mon Sep 17 00:00:00 2001
From: ShahRutav <rutavms@utexas.edu>
Date: Fri, 12 Jan 2024 12:57:06 +0530
Subject: [PATCH 3/5] updated envs.py with latest keys

---
 rlhive/envs.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/rlhive/envs.py b/rlhive/envs.py
index 74b0a106..f1726369 100644
--- a/rlhive/envs.py
+++ b/rlhive/envs.py
@@ -43,7 +43,7 @@ def new_fun(*args, **kwargs):
 
 override_keys = [
     "objs_jnt",
-    "end_effector",
+    "ee_pose",
     "knob1_site_err",
     "knob2_site_err",
     "knob3_site_err",
@@ -87,7 +87,7 @@ def register_kitchen_envs():
 
     obs_keys_wt = {
         "robot_jnt": 1.0,
-        "end_effector": 1.0,
+        "ee_pose": 1.0,
     }
     visual_obs_keys = {
         "rgb:right_cam:224x224:2d": 1.0,
@@ -126,7 +126,7 @@ def register_franka_envs():
     # Franka Appliance ======================================================================
     obs_keys_wt = {
         "robot_jnt": 1.0,
-        "end_effector": 1.0,
+        "ee_pose": 1.0,
     }
     visual_obs_keys = {
         "rgb:right_cam:224x224:2d": 1.0,
@@ -137,7 +137,10 @@ def register_franka_envs():
             new_env_name = "visual_" + env
             register_env_variant(
                 env,
-                variants={"obs_keys_wt": obs_keys_wt, "visual_keys": visual_obs_keys},
+                variants={
+                    "obs_keys_wt": obs_keys_wt,
+                    "visual_keys": list(visual_obs_keys.keys()),
+                },
                 variant_id=new_env_name,
                 override_keys=override_keys,
             )
@@ -193,7 +196,7 @@ def register_myo_envs():
                 env,
                 variants={
                     "obs_keys": [
-                        "hand_jnt",
+                        "qpos", # TODO: Check if this is correct
                     ],
                     "visual_keys": visual_keys,
                 },

From 46be9aab14193eeb00c7db20b710c7407326afa6 Mon Sep 17 00:00:00 2001
From: ShahRutav <rutavms@utexas.edu>
Date: Fri, 12 Jan 2024 12:59:03 +0530
Subject: [PATCH 4/5] Update README.md

---
 README.md | 59 ++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 34 insertions(+), 25 deletions(-)

diff --git a/README.md b/README.md
index 9168a419..d66ab196 100644
--- a/README.md
+++ b/README.md
@@ -25,37 +25,46 @@ Find examples in `test/test_envs.py`.
 
 The basic usage is:
 ```python
+import robohive
+import rlhive.envs
+from torchrl.envs import RoboHiveEnv
 env = RoboHiveEnv(env_name="FrankaReachRandom_v2d-v0")
 ```
 
 The following `kitchen` and `franka` visual environments should be used (they will be executed without flattening/unflattening of
 the images which is an expensive process):
 ```python
-env_list = ["visual_franka_slide_random-v3",
-   "visual_franka_slide_close-v3",
-   "visual_franka_slide_open-v3",
-   "visual_franka_micro_random-v3",
-   "visual_franka_micro_close-v3",
-   "visual_franka_micro_open-v3",
-   "visual_kitchen_knob1_off-v3",
-   "visual_kitchen_knob1_on-v3",
-   "visual_kitchen_knob2_off-v3",
-   "visual_kitchen_knob2_on-v3",
-   "visual_kitchen_knob3_off-v3",
-   "visual_kitchen_knob3_on-v3",
-   "visual_kitchen_knob4_off-v3",
-   "visual_kitchen_knob4_on-v3",
-   "visual_kitchen_light_off-v3",
-   "visual_kitchen_light_on-v3",
-   "visual_kitchen_sdoor_close-v3",
-   "visual_kitchen_sdoor_open-v3",
-   "visual_kitchen_ldoor_close-v3",
-   "visual_kitchen_ldoor_open-v3",
-   "visual_kitchen_rdoor_close-v3",
-   "visual_kitchen_rdoor_open-v3",
-   "visual_kitchen_micro_close-v3",
-   "visual_kitchen_micro_open-v3",
-   "visual_kitchen_close-v3"
+env_list = [
+    "visual_motorFingerReachFixed-v0",
+    "visual_door-v1",
+    "visual_hammer-v1",
+    "visual_pen-v1",
+    "visual_relocate-v1",
+    "visual_franka_slide_random-v3",
+    "visual_franka_slide_close-v3",
+    "visual_franka_slide_open-v3",
+    "visual_franka_micro_random-v3",
+    "visual_franka_micro_close-v3",
+    "visual_franka_micro_open-v3",
+    "visual_FK1_Knob1OffRandom-v4",
+    "visual_FK1_Knob1OnRandom-v4",
+    "visual_FK1_Knob2OffRandom-v4",
+    "visual_FK1_Knob2OnRandom-v4",
+    "visual_FK1_Knob3OffRandom-v4",
+    "visual_FK1_Knob3OnRandom-v4",
+    "visual_FK1_Knob4OffRandom-v4",
+    "visual_FK1_Knob4OnRandom-v4",
+    "visual_FK1_LightOffRandom-v4",
+    "visual_FK1_LightOnRandom-v4",
+    "visual_FK1_SdoorCloseRandom-v4",
+    "visual_FK1_SdoorOpenRandom-v4",
+    "visual_FK1_LdoorCloseRandom-v4",
+    "visual_FK1_LdoorOpenRandom-v4",
+    "visual_FK1_RdoorCloseRandom-v4",
+    "visual_FK1_RdoorOpenRandom-v4",
+    "visual_FK1_MicroOpenRandom-v4",
+    "visual_FK1_MicroCloseRandom-v4",
+    "visual_FK1_RelaxRandom-v4",
 ]
 ```
 

From b0e81fdc89b992b962f8ac4e72e349509941f1bc Mon Sep 17 00:00:00 2001
From: ShahRutav <rutavms@utexas.edu>
Date: Fri, 12 Jan 2024 13:43:28 +0530
Subject: [PATCH 5/5] Update README.md

update readme examples with torchrl updates.
to track bug on MultiaSyncDataCollector with agenthive: https://github.com/facebookresearch/agenthive/issues/23
---
 README.md | 76 +++++++++++++++++++++----------------------------------
 1 file changed, 29 insertions(+), 47 deletions(-)

diff --git a/README.md b/README.md
index d66ab196..78299616 100644
--- a/README.md
+++ b/README.md
@@ -76,53 +76,43 @@ env = ParallelEnv(3, EnvCreator(lambda: RoboHiveEnv(env_name="FrankaReachRandom_
 
 To use transforms (normalization, grayscale etc), use the env transforms:
 ```python
-from torchrl.envs import EnvCreator, ParallelEnv, TransformedEnv, R3MTransform
-env = ParallelEnv(3, EnvCreator(lambda: RoboHiveEnv(env_name="FrankaReachRandom_v2d-v0")))
-env = TransformedEnv(
-    base_env,
-    R3MTransform(
-        "resnet18",
-        ["pixels"],
-        ["pixels_embed"],
-    ),
-)
+import torch
+from rlhive.rl_envs import make_r3m_env
+
+if __name__ == '__main__':
+    device = torch.device("cpu") # could be 'cuda:0'
+    env_name = 'FrankaReachFixed-v0'
+    env = make_r3m_env(env_name, model_name="resnet18", download=True)
+    assert env.device == device
+    # example of a rollout
+    print(env.rollout(3))
 ```
 Make sure that the R3M or VIP transform is appended after the ParallelEnv, otherwise you will
 pass as many images as there are processes through the ResNet module (and quickly run into an OOM
 exception).
 
-Finally, the script of a typical data collector (executed on 4 different GPUs in an asynchronous manner) reads
-as follows:
+Finally, the script of a typical data collector reads as follows (For more info, check the [torchrl environments doc](https://pytorch.org/rl/reference/envs.html)):
 ```python
-import tqdm
-from torchrl.collectors.collectors import MultiaSyncDataCollector, RandomPolicy
-from agenthive.rl_envs import RoboHiveEnv
-from torchrl.envs import ParallelEnv, TransformedEnv, GrayScale, ToTensorImage, Resize, ObservationNorm, EnvCreator, Compose, CatFrames
-
-if __name__ == '__main__':
-    # create a parallel env with 4 envs running independendly.
-    # I put the 'cuda:0' device to show how to create an env on cuda (ie: the output tensors will be on cuda)
-    # but this will be overwritten in the collector below
-    penv = ParallelEnv(4, EnvCreator(lambda: RoboHiveEnv('FrankaReachRandom_v2d-v0', device='cuda:0', from_pixels=True)))
-    # we append a series of standard transforms, all running on cuda
-    tenv = TransformedEnv(penv, Compose(ToTensorImage(), Resize(84, 84), GrayScale(), CatFrames(4, in_keys=['pixels']), ObservationNorm(in_keys=['pixels'])))
-    # this is how you initialize your observation norm transform (the API will be improved shortly)
-    tenv.transform[-1].init_stats(reduce_dim=(0, 1), cat_dim=1, num_iter=1000)
-    # we cheat a bit by using a totally random policy. A CNN will obviously slow down collection a bit
-    policy = RandomPolicy(tenv.action_spec)  # some random policy
-
-    # we create an async collector on 4 different devices. The "passing_devices"  indicate where the env is placed, and the "device" where the policy is executed.
-    # For a maximum efficiency they should match. Also, you can either pass a string for those args (ie all devices match) or a list of strings/devices.
-    collector = MultiaSyncDataCollector([tenv, tenv, tenv, tenv], policy=policy, frames_per_batch=400, max_frames_per_traj=1000, total_frames=1_000_000,
-                                        passing_devices=['cuda:0', 'cuda:1', 'cuda:2', 'cuda:3'],
-                                        devices=['cuda:0', 'cuda:1', 'cuda:2', 'cuda:3'])
-    # a simple collection loop to log the speed
-    pbar = tqdm.tqdm(total=1_000_000)
+import torch
+import robohive
+from rlhive.rl_envs import make_r3m_env
+from torchrl.collectors.collectors import SyncDataCollector, MultiaSyncDataCollector, RandomPolicy
+# make sure your ParallelEnv is inside the `if __name__ == "__main__":` condition, otherwise you'll
+# be creating an infinite tree of subprocesses
+if __name__ == "__main__":
+    device = torch.device("cpu") # could be 'cuda:0'
+    env_name = 'FrankaReachFixed-v0'
+    env = make_r3m_env(env_name, model_name="resnet18", download=True)
+
+    # a simple, single-process data collector
+    collector = SyncDataCollector(env, policy=RandomPolicy(env.action_spec), total_frames=1_000, frames_per_batch=200, init_random_frames=200, )
     for data in collector:
-        pbar.update(data.numel())
-    del collector
-    del tenv
+        print(data)
 
+    ## async multi-proc data collector
+    collector = MultiaSyncDataCollector([env, env], policy=RandomPolicy(env.action_spec), total_frames=1_000, frames_per_batch=200, init_random_frames=200, )
+    for data in collector:
+        print(data)
 ```
 
 ### Model training
@@ -132,14 +122,6 @@ torchrl examples:
 - [torchrl](https://github.com/pytorch/rl/tree/main/examples)
 - [torchrl_examples](https://github.com/compsciencelab/torchrl_examples)
 
-## Execution
-
-AgentHive is optimized for the `MUJOCO` backend. Make sure to set the `sim_backend` environment variable to `"MUJOCO"`
-before running the code:
-```
-sim_backend=MUJOCO python script.py
-```
-
 ## Installation
 AgentHive has two core dependencies: torchrl and RoboHive. RoboHive relies on mujoco
 and mujoco-py for physics simulation and rendering. As of now, RoboHive requires