Skip to content

Commit

Permalink
updating models
Browse files Browse the repository at this point in the history
  • Loading branch information
ZhengyiLuo committed Feb 24, 2023
1 parent 90e1a6a commit 3a49f51
Show file tree
Hide file tree
Showing 7 changed files with 130 additions and 172 deletions.
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,13 @@ python scripts/train_uhc.py.py --cfg uhc --num_threads 35

To evaluate our dynamics-regulated kinematic policy, run:
```
python scripts/eval_ar_policy.py --cfg kin_poly --iter 750
python scripts/eval_ar_policy.py --cfg kin_poly --iter 750 # Mocal data
python scripts/eval_ar_policy.py --cfg kin_poly --iter 750 --wild # Real-world data
```

To evalutes our kinematic policy using only supervised learning, run:
```
python scripts/exp_arnet_all.py --cfg kin_poly --test --iter 1000
```

To compute metrics, run:
Expand Down
3 changes: 2 additions & 1 deletion kin_poly/core/agent_ar.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
from kin_poly.core.trajbatch_ego import TrajBatchEgo
from kin_poly.core.reward_function import reward_func
from kin_poly.utils.torch_ext import get_scheduler
from uhc.utils.tools import CustomUnpickler


class AgentAR(AgentPPO):
Expand Down Expand Up @@ -324,7 +325,7 @@ def load_checkpoint(self, i_iter):
cp_path = "%s/iter_%04d.p" % (cfg.policy_model_dir, i_iter)

self.logger.info("loading model from checkpoint: %s" % cp_path)
model_cp = pickle.load(open(cp_path, "rb"))
model_cp = CustomUnpickler(open(cp_path, "rb")).load()
self.policy_net.load_state_dict(model_cp["policy_dict"])

# policy_net.old_arnet[0].load_state_dict(copy.deepcopy(policy_net.traj_ar_net.state_dict())) # ZL: should use the new old net as well
Expand Down
121 changes: 33 additions & 88 deletions kin_poly/envs/humanoid_ar_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,26 +20,17 @@
from scipy.linalg import cho_solve, cho_factor
import joblib
from kin_poly.utils.flags import flags
from uhc.utils.tools import CustomUnpickler


class HumanoidAREnv(HumanoidEnv):

def __init__(self,
cfg,
cc_cfg,
init_context,
cc_iter=-1,
mode="train",
wild=False,
ar_mode=False):
def __init__(self, cfg, cc_cfg, init_context, cc_iter=-1, mode="train", wild=False, ar_mode=False):
mujoco_env.MujocoEnv.__init__(self, cc_cfg.mujoco_model_file, 15)
self.cc_cfg = cc_cfg
self.kin_cfg = cfg
self.wild = wild
self.setup_constants(cc_cfg,
cc_cfg.data_specs,
mode=mode,
no_root=False)
self.setup_constants(cc_cfg, cc_cfg.data_specs, mode=mode, no_root=False)

# env specific
self.num_obj = self.get_obj_qpos().shape[0] // 7
Expand All @@ -60,28 +51,19 @@ def __init__(self,
self.ar_model_v = self.kin_cfg.model_specs.get("model_v", 1)
self.ar_mode = ar_mode
self.body_diff_thresh = cfg.policy_specs.get('body_diff_thresh', 10)
self.body_diff_gt_thresh = cfg.policy_specs.get(
'body_diff_gt_thresh', 12)
self.body_diff_gt_thresh = cfg.policy_specs.get('body_diff_gt_thresh', 12)

print(f"scheduled_sampling: {self.scheduled_sampling}")

self.set_spaces()
self.jpos_diffw = np.array(
cfg.reward_weights.get("jpos_diffw", [
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1
]))[:, None]
self.jpos_diffw = np.array(cfg.reward_weights.get("jpos_diffw", [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]))[:, None]
''' Load CC Controller '''
state_dim = self.get_cc_obs().shape[0]
action_dim = self.cc_action_dim
if cc_cfg.actor_type == "gauss":
self.cc_policy = PolicyGaussian(cc_cfg,
action_dim=action_dim,
state_dim=state_dim)
self.cc_policy = PolicyGaussian(cc_cfg, action_dim=action_dim, state_dim=state_dim)
elif cc_cfg.actor_type == "mcp":
self.cc_policy = PolicyMCP(cc_cfg,
action_dim=action_dim,
state_dim=state_dim)
self.cc_policy = PolicyMCP(cc_cfg, action_dim=action_dim, state_dim=state_dim)

self.cc_value_net = Value(MLP(state_dim, cc_cfg.value_hsize, cc_cfg.value_htype))
print(cc_cfg.model_dir)
Expand All @@ -91,29 +73,22 @@ def __init__(self,
cc_iter = np.max([int(i.split("_")[-1].split(".")[0]) for i in os.listdir(cc_cfg.model_dir)])
cp_path = '%s/iter_%04d.p' % (cc_cfg.model_dir, cc_iter)
print(('loading model from checkpoint: %s' % cp_path))
model_cp = pickle.load(open(cp_path, "rb"))
running_state = ZFilter((state_dim,), clip=5)
running_state.set_mean_std(model_cp['running_state']['mean'], model_cp['running_state']['std'], model_cp['running_state']['n'])
model_cp = CustomUnpickler(open(cp_path, "rb")).load()
running_state = model_cp['running_state']
self.cc_running_state = running_state
if not self.kin_cfg.joint_controller:
self.cc_policy.load_state_dict(model_cp['policy_dict'])
self.cc_value_net.load_state_dict(model_cp['value_dict'])

def load_context(self, data_dict):
self.ar_context = {
k: v[0].detach().cpu().numpy()
if v.requires_grad else v[0].cpu().numpy()
for k, v in data_dict.items()
}
self.ar_context = {k: v[0].detach().cpu().numpy() if v.requires_grad else v[0].cpu().numpy() for k, v in data_dict.items()}

self.ar_context['len'] = self.ar_context['qpos'].shape[0] - 1
self.gt_targets = self.smpl_humanoid.qpos_fk_batch(
self.ar_context['qpos'])
self.gt_targets = self.smpl_humanoid.qpos_fk_batch(self.ar_context['qpos'])
self.target = self.smpl_humanoid.qpos_fk(self.ar_context['ar_qpos'][0])

def set_model_params(self):
if self.cc_cfg.action_type == 'torque' and hasattr(
self.cc_cfg, 'j_stiff'):
if self.cc_cfg.action_type == 'torque' and hasattr(self.cc_cfg, 'j_stiff'):
self.model.jnt_stiffness[1:] = self.cc_cfg.j_stiff
self.model.dof_damping[6:] = self.cc_cfg.j_damp

Expand All @@ -134,9 +109,7 @@ def set_spaces(self):
self.cc_action_dim = self.ndof + self.vf_dim

self.action_dim = 75
self.action_space = spaces.Box(low=-np.ones(self.action_dim),
high=np.ones(self.action_dim),
dtype=np.float32)
self.action_space = spaces.Box(low=-np.ones(self.action_dim), high=np.ones(self.action_dim), dtype=np.float32)
obs = self.get_obs()

self.obs_dim = obs.size
Expand Down Expand Up @@ -167,8 +140,7 @@ def get_ar_obs_v1(self):
curr_qpos_local = curr_qpos.copy()
curr_qpos_local[3:7] = de_heading(curr_qpos_local[3:7])

pred_wbpos, pred_wbquat = self.get_wbody_pos().reshape(
-1, 3), self.get_wbody_quat().reshape(-1, 4)
pred_wbpos, pred_wbquat = self.get_wbody_pos().reshape(-1, 3), self.get_wbody_quat().reshape(-1, 4)

head_idx = self.get_head_idx()
pred_hrot = pred_wbquat[head_idx]
Expand All @@ -189,14 +161,12 @@ def get_ar_obs_v1(self):
# get target head vel
t_havel = self.ar_context['head_vels'][t, 3:].copy()
t_hlvel = self.ar_context['head_vels'][t, :3].copy()
t_obj_relative_head = self.ar_context["obj_head_relative_poses"][
t, :].copy()
t_obj_relative_head = self.ar_context["obj_head_relative_poses"][t, :].copy()

# difference in head, in head's heading coordinates
diff_hpos = t_hpos - pred_hpos
diff_hpos = transform_vec(diff_hpos, pred_hrot_heading, "heading")
diff_hrot = quaternion_multiply(quaternion_inverse(t_hrot),
pred_hrot)
diff_hrot = quaternion_multiply(quaternion_inverse(t_hrot), pred_hrot)

q_heading = get_heading_q(pred_hrot_heading).copy()

Expand All @@ -205,16 +175,12 @@ def get_ar_obs_v1(self):

diff_obj = obj_pos - pred_hpos
diff_obj_loc = transform_vec(diff_obj, pred_hrot_heading, "heading")
obj_rot_local = quaternion_multiply(quaternion_inverse(q_heading),
obj_rot) # Object local coordinate
pred_obj_relative_head = np.concatenate([diff_obj_loc, obj_rot_local],
axis=0)
obj_rot_local = quaternion_multiply(quaternion_inverse(q_heading), obj_rot) # Object local coordinate
pred_obj_relative_head = np.concatenate([diff_obj_loc, obj_rot_local], axis=0)

# state
# order of these matters !!!
obs.append(
curr_qpos_local[2:]
) # current height + local body orientation + body pose self.qpose_lm 74
obs.append(curr_qpos_local[2:]) # current height + local body orientation + body pose self.qpose_lm 74
if self.kin_cfg.use_vel:
obs.append(curr_qvel) # current velocities 75

Expand All @@ -223,15 +189,13 @@ def get_ar_obs_v1(self):
obs.append(diff_hrot) # diff head rotation 4

if self.kin_cfg.use_obj:
obs.append(
pred_obj_relative_head) # predicted object relative to head 7
obs.append(pred_obj_relative_head) # predicted object relative to head 7

if self.kin_cfg.use_head:
obs.append(t_havel) # target head angular velocity 3
obs.append(t_hlvel) # target head linear velocity 3
if self.kin_cfg.use_obj:
obs.append(
t_obj_relative_head) # target object relative to head 7
obs.append(t_obj_relative_head) # target object relative to head 7

if self.kin_cfg.use_action and self.ar_model_v > 0:
obs.append(curr_action)
Expand Down Expand Up @@ -265,8 +229,7 @@ def step_ar(self, a, dt=1 / 30):
body_pose[body_pose > np.pi] -= 2 * np.pi
body_pose[body_pose < -np.pi] += 2 * np.pi

next_qpos = np.concatenate(
[curr_pos[:2], a[:(pose_start - 2)], body_pose], axis=0)
next_qpos = np.concatenate([curr_pos[:2], a[:(pose_start - 2)], body_pose], axis=0)
root_qvel = a[qpos_lm:]
linv = quat_mul_vec(curr_heading, root_qvel[:3])
next_qpos[:2] += linv[:2] * dt
Expand All @@ -290,32 +253,26 @@ def step(self, a):
elif self.policy_v == 2:
next_qpos = a.copy()

self.target = self.smpl_humanoid.qpos_fk(
next_qpos) # forming target from arnet
self.target = self.smpl_humanoid.qpos_fk(next_qpos) # forming target from arnet

# if flags.debug:
# next_qpos = self.step_ar(self.ar_context['target'][self.cur_t]) #
# self.target = self.smpl_humanoid.qpos_fk(self.ar_context['qpos'][self.cur_t + 1]) # GT
# self.target = self.smpl_humanoid.qpos_fk(self.ar_context['ar_qpos'][self.cur_t + 1]) # Debug
if self.ar_mode:
self.target = self.smpl_humanoid.qpos_fk(
self.ar_context['ar_qpos'][self.cur_t + 1]) #
self.target = self.smpl_humanoid.qpos_fk(self.ar_context['ar_qpos'][self.cur_t + 1]) #

cc_obs = self.get_cc_obs()
cc_obs = self.cc_running_state(cc_obs, update=False)
mean_action = self.mode == "test" or (self.mode == "train" and
self.kin_cfg.joint_controller)
cc_action = self.cc_policy.select_action(
torch.from_numpy(cc_obs)[None, ],
mean_action=mean_action)[0].numpy() # CC step
mean_action = self.mode == "test" or (self.mode == "train" and self.kin_cfg.joint_controller)
cc_action = self.cc_policy.select_action(torch.from_numpy(cc_obs)[None,], mean_action=mean_action)[0].numpy() # CC step

if flags.debug:
# self.do_simulation(cc_a, self.frame_skip)
# self.data.qpos[:self.qpos_lim] = self.get_target_qpos() # debug
# self.data.qpos[:self.qpos_lim] = self.ar_context['qpos'][self.cur_t + 1] # debug
# self.data.qpos[:self.qpos_lim] = self.gt_targets['qpos'][self.cur_t + 1] # debug
self.data.qpos[:self.qpos_lim] = self.ar_context['ar_qpos'][
self.cur_t + 1] # ARNet Qpos
self.data.qpos[:self.qpos_lim] = self.ar_context['ar_qpos'][self.cur_t + 1] # ARNet Qpos
self.sim.forward() # debug

else:
Expand Down Expand Up @@ -355,32 +312,23 @@ def step(self, a):
else:
raise NotImplemented()

end = (self.cur_t >= cfg.env_episode_len) or (
self.cur_t + self.start_ind >= self.ar_context['len'])
end = (self.cur_t >= cfg.env_episode_len) or (self.cur_t + self.start_ind >= self.ar_context['len'])
done = fail or end
# if done: # ZL: Debug
# exit()
# print("done!!!", self.cur_t, self.ar_context['len'] )

percent = self.cur_t / self.ar_context['len']
obs = self.get_obs()
return obs, reward, done, {
'fail': fail,
'end': end,
"percent": percent,
"cc_action": cc_action,
"cc_state": cc_obs
}
return obs, reward, done, {'fail': fail, 'end': end, "percent": percent, "cc_action": cc_action, "cc_state": cc_obs}

def set_mode(self, mode):
self.mode = mode

def ar_fail_safe(self):
self.data.qpos[:self.qpos_lim] = self.ar_context['ar_qpos'][self.cur_t
+ 1]
self.data.qpos[:self.qpos_lim] = self.ar_context['ar_qpos'][self.cur_t + 1]
# self.data.qpos[:self.qpos_lim] = self.get_target_qpos()
self.data.qvel[:self.qvel_lim] = self.ar_context['ar_qvel'][self.cur_t
+ 1]
self.data.qvel[:self.qvel_lim] = self.ar_context['ar_qvel'][self.cur_t + 1]
self.sim.forward()

def reset_model(self):
Expand Down Expand Up @@ -429,8 +377,7 @@ def reset_model(self):
# init_pose[2] += 1.0
# self.set_state(init_pose, self.data.qvel)

obj_pose = self.convert_obj_qpos(self.ar_context["action_one_hot"][0],
self.ar_context['obj_pose'][0])
obj_pose = self.convert_obj_qpos(self.ar_context["action_one_hot"][0], self.ar_context['obj_pose'][0])
init_pose = np.concatenate([init_pose_exp, obj_pose])
init_vel = np.concatenate([init_vel_exp, np.zeros(self.num_obj * 6)])

Expand Down Expand Up @@ -527,9 +474,7 @@ def get_obj_qpos(self, action_one_hot=None):
obj_start = self.action_index_map[action_idx]
obj_end = obj_start + self.action_len[action_idx]

return obj_pose_full[
obj_start:
obj_end][:7] # ZL: only support handling one obj right now...
return obj_pose_full[obj_start:obj_end][:7] # ZL: only support handling one obj right now...

def convert_obj_qpos(self, action_one_hot, obj_pose):
if np.sum(action_one_hot) == 0:
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,5 @@ sklearn
chumpy
patchelf
chardet
pyopengl
pyopengl
torchgeometry
Loading

0 comments on commit 3a49f51

Please sign in to comment.