Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Humanoid #40

Merged
merged 39 commits into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
45777b0
Install humanoid
yardenas Aug 19, 2024
f30fd8e
Add humanoid bench
yardenas Aug 19, 2024
68bc748
Revert "Install humanoid"
yardenas Aug 19, 2024
823fe91
Ensure version
yardenas Aug 19, 2024
2438af0
Update versions
yardenas Aug 19, 2024
220d3c9
Just making it run
yardenas Aug 19, 2024
c52e5e7
Debugging
yardenas Aug 19, 2024
c0f23d0
Porting environment to dm control
yardenas Aug 19, 2024
011e56f
Runs
yardenas Aug 19, 2024
29ff343
It works off the shelf
yardenas Aug 19, 2024
8f7e267
Exclude all hooks
yardenas Aug 19, 2024
0f5088c
Pre-commit fix
yardenas Aug 19, 2024
870ebd0
Clean up
yardenas Aug 19, 2024
9034780
Clean up
yardenas Aug 19, 2024
83d3bd4
Clean up
yardenas Aug 19, 2024
4fa05ac
Clean up
yardenas Aug 19, 2024
cdb98c6
Update policy to reach
yardenas Aug 20, 2024
7ec905a
Revert "Update policy to reach"
yardenas Aug 20, 2024
5f65b95
Load low level policy
yardenas Aug 20, 2024
90297e4
Load low level policy
yardenas Aug 20, 2024
41b6f08
Merge branch 'main' of github.com:yardenas/safe-opax into humanoid
yardenas Aug 20, 2024
94ad3af
Add humanoid experiment
yardenas Aug 20, 2024
9fd37c0
No termination
yardenas Aug 20, 2024
a782ab6
Merge branch 'main' of github.com:yardenas/safe-opax into humanoid
yardenas Aug 21, 2024
267052f
Model loading
yardenas Aug 21, 2024
8bcff57
Merge branch 'main' of github.com:yardenas/safe-opax into humanoid
yardenas Aug 30, 2024
fbb5636
Update model path
yardenas Aug 30, 2024
6c73c9d
Fix model loading
yardenas Aug 30, 2024
f340ad1
Load numpy
yardenas Aug 30, 2024
487eb3e
Filter jit
yardenas Aug 30, 2024
62e7ef2
No import torch model
yardenas Aug 30, 2024
a93e743
Use only one parallel env
yardenas Aug 30, 2024
00788ee
Add constraint wrapper
yardenas Aug 30, 2024
da4dbf5
Update humanoid to have multiple obs
yardenas Sep 3, 2024
cf15725
Training seems to run
yardenas Sep 3, 2024
88dd5f3
More epochs
yardenas Sep 4, 2024
d521333
Use CPU for reacher policy
yardenas Sep 6, 2024
76dfb6f
task as param
yardenas Sep 7, 2024
39ac76b
Updates from Carlo
yardenas Sep 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 40 additions & 5 deletions safe_opax/benchmark_suites/humanoid_bench/__init__.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,65 @@
import os
import numpy as np
from gymnasium import RewardWrapper
from omegaconf import DictConfig
from gymnasium.spaces import Box

from safe_opax.benchmark_suites.utils import get_domain_and_task
from safe_opax.rl.types import EnvironmentFactory
from safe_opax.rl.wrappers import ImageObservation

class ConstraintWrapper(RewardWrapper):
def __init__(self, env):
self.env = env

def step(self, action):
observation, reward, terminal, truncated, info = self.env.step(action)
small_control = info["small_control"]
stand_reward = info["stand_reward"]
move = info["move"]
reward = (
0.5 * (small_control * stand_reward) + 0.5 * move
)
collision_discount = info["collision_discount"]
info["cost"] = collision_discount < 1.
return observation, reward, terminal, truncated, info

def __getattr__(self, name):
return getattr(self.env, name)


class HumanoidImageObservation(ImageObservation):
def __init__(self, env, image_size, image_format="channels_first"):
super().__init__(env, image_size, image_format)
size = image_size + (6,) if image_format == "chw" else (6,) + image_size
self.observation_space = Box(0, 255, size, np.float32)

def observation(self, observation):
third_person = super().observation(observation)
left = observation["image_left_eye"]
left = self.preprocess(left)
return np.concatenate([third_person, left], axis=0)

def make(cfg: DictConfig) -> EnvironmentFactory:
def make_env():
from .env import HumanoidEnv

_, task_cfg = get_domain_and_task(cfg)
env_name = "h1hand-pole-v0"
reach_data_path = os.path.join(os.path.dirname(__file__), "data", "reach_one_hand")
env = HumanoidEnv(robot="h1hand",
robot, task = task_cfg.task.split("-")
env = HumanoidEnv(robot=robot,
control="pos",
task="pole",
task=task,
policy_type="reach_single",
policy_path=reach_data_path + "/torch_model.pt",
policy_path=reach_data_path + "/model.ckpt",
mean_path=reach_data_path + "/mean.npy",
var_path=reach_data_path + "/var.npy",
sensors="image",
obs_wrapper="true",
)
env = ConstraintWrapper(env)
if task_cfg.image_observation.enabled:
env = ImageObservation(
env = HumanoidImageObservation(
env,
task_cfg.image_observation.image_size,
task_cfg.image_observation.image_format
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<mujoco model="h1hand_pos_stand">
<option timestep="0.002" iterations="100" ls_iterations="50" solver="Newton">
<flag eulerdamp="enable"/>
</option>
<include file="../common/visual.xml"/>
<include file="../common/floor.xml"/>
<include file="../robots/h1simplehand_pos.xml"/>
<keyframe>
<key name="qpos0" qpos="0 0 0.98 1 0 0 0 0 0 -0.4 0.8 -0.4 0 0 -0.4 0.8 -0.4 0
0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0"/>
</keyframe>
</mujoco>
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
<mujoco model="h1">
<compiler angle="radian" meshdir="../h1/assets" autolimits="true"/>

<default>
<default class="h1">
<geom type="mesh"/>
Expand All @@ -12,7 +11,7 @@
<geom group="3" mass="0" density="0"/>
</default>
<site size="0.001" rgba="0.5 0.5 0.5 0.3" group="4"/>
<default class="hip">
<default class="hip">
<position forcerange="-200 200" kp="200" kv="5"/>
</default>
<default class="knee">
Expand All @@ -35,11 +34,9 @@
</default>
</default>
</default>

<asset>
<material name="black" rgba="0.1 0.1 0.1 1"/>
<material name="white" rgba="1 1 1 1"/>

<mesh file="pelvis.stl"/>
<mesh file="left_hip_yaw_link.stl"/>
<mesh file="left_hip_roll_link.stl"/>
Expand All @@ -62,7 +59,6 @@
<mesh file="right_elbow_link.stl"/>
<mesh file="logo_link.stl"/>
</asset>

<worldbody>
<light mode="targetbodycom" target="torso_link" pos="1 0 2.5"/>
<body name="pelvis" pos="0 0 1.1" childclass="h1">
Expand Down Expand Up @@ -142,7 +138,7 @@
<geom class="visual" mesh="torso_link"/>
<site name="head" class="visual" size="0.01" pos="0 0 0.7" rgba="1 1 1 1"/>
<geom class="visual" material="white" mesh="logo_link"/>
<site name="imu" size="0.01" pos="-0.04452 -0.01891 0.27756"/>
<site name="imu" size="0.01" pos="-0.04452 0. 0.27756"/>
<body name="left_shoulder_pitch_link" pos="0.0055 0.15535 0.42999" quat="0.976296 0.216438 0 0">
<inertial pos="0.005045 0.053657 -0.015715" quat="0.814858 0.579236 -0.0201072 -0.00936488" mass="1.033"
diaginertia="0.00129936 0.000987113 0.000858198"/>
Expand Down Expand Up @@ -196,7 +192,6 @@
</body>
</body>
</worldbody>

<actuator>
<position name="left_hip_yaw" joint="left_hip_yaw" ctrlrange="-0.43 0.43" class="hip" />
<position name="left_hip_roll" joint="left_hip_roll" ctrlrange="-0.43 0.43" class="hip" />
Expand All @@ -218,9 +213,8 @@
<position name="right_shoulder_yaw" joint="right_shoulder_yaw" ctrlrange="-4.45 1.3" class="shoulder2" />
<position name="right_elbow" joint="right_elbow" ctrlrange="-1.25 2.61" class="elbow" />
</actuator>

<sensor>
<touch name="left_foot_sensor" site="left_foot"/>
<touch name="right_foot_sensor" site="right_foot"/>
</sensor>
</mujoco>
</mujoco>
Loading