diff --git a/scripts/reinforcement_learning/rsl_rl/train.py b/scripts/reinforcement_learning/rsl_rl/train.py index b2056551f0c..a73c3b2881b 100644 --- a/scripts/reinforcement_learning/rsl_rl/train.py +++ b/scripts/reinforcement_learning/rsl_rl/train.py @@ -56,7 +56,7 @@ from packaging import version # check minimum supported rsl-rl version -RSL_RL_VERSION = "3.0.1" +RSL_RL_VERSION = "3.1.0" installed_version = metadata.version("rsl-rl-lib") if version.parse(installed_version) < version.parse(RSL_RL_VERSION): if platform.system() == "Windows": diff --git a/source/isaaclab/isaaclab/envs/mdp/rewards.py b/source/isaaclab/isaaclab/envs/mdp/rewards.py index 20df2a557eb..4232c0a8895 100644 --- a/source/isaaclab/isaaclab/envs/mdp/rewards.py +++ b/source/isaaclab/isaaclab/envs/mdp/rewards.py @@ -241,11 +241,17 @@ def applied_torque_limits(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg = Sc ) return torch.sum(out_of_limits, dim=1) +def action_rate(env: ManagerBasedRLEnv) -> torch.Tensor: + """Penalize the rate of change of the actions using L2 norm.""" + return torch.norm(env.action_manager.action - env.action_manager.prev_action, p=2, dim=-1) def action_rate_l2(env: ManagerBasedRLEnv) -> torch.Tensor: """Penalize the rate of change of the actions using L2 squared kernel.""" return torch.sum(torch.square(env.action_manager.action - env.action_manager.prev_action), dim=1) +def action(env: ManagerBasedRLEnv) -> torch.Tensor: + """Penalize the actions using L2 squared kernel (summed for each environment).""" + return torch.sum(env.action_manager.action**2, dim=-1) def action_l2(env: ManagerBasedRLEnv) -> torch.Tensor: """Penalize the actions using L2 squared kernel.""" diff --git a/source/isaaclab/isaaclab/utils/noise/__init__.py b/source/isaaclab/isaaclab/utils/noise/__init__.py index d2f703758b0..ef2cea78582 100644 --- a/source/isaaclab/isaaclab/utils/noise/__init__.py +++ b/source/isaaclab/isaaclab/utils/noise/__init__.py @@ -26,8 +26,8 @@ """ from .noise_cfg import NoiseCfg # noqa: F401 -from .noise_cfg import ConstantNoiseCfg, GaussianNoiseCfg, NoiseModelCfg, NoiseModelWithAdditiveBiasCfg, UniformNoiseCfg -from .noise_model import NoiseModel, NoiseModelWithAdditiveBias, constant_noise, gaussian_noise, uniform_noise +from .noise_cfg import ConstantNoiseCfg, GaussianNoiseCfg, NoiseModelCfg, NoiseModelWithAdditiveBiasCfg, UniformNoiseCfg, ResetSampledNoiseModelCfg +from .noise_model import NoiseModel, NoiseModelWithAdditiveBias, ResetSampledNoiseModel, constant_noise, gaussian_noise, uniform_noise # Backward compatibility ConstantBiasNoiseCfg = ConstantNoiseCfg diff --git a/source/isaaclab/isaaclab/utils/noise/noise_cfg.py b/source/isaaclab/isaaclab/utils/noise/noise_cfg.py index 0c49828b3ff..3de46bc89f1 100644 --- a/source/isaaclab/isaaclab/utils/noise/noise_cfg.py +++ b/source/isaaclab/isaaclab/utils/noise/noise_cfg.py @@ -109,3 +109,15 @@ class NoiseModelWithAdditiveBiasCfg(NoiseModelCfg): Defaults to True. """ + +@configclass +class ResetSampledNoiseModelCfg(NoiseModelCfg): + """Configuration for a noise model that samples noise ONLY during reset.""" + + class_type: type = noise_model.ResetSampledNoiseModel + + noise_cfg: NoiseCfg = MISSING + """The noise configuration for the noise. + + Based on this configuration, the noise is sampled at every reset of the noise model. + """ diff --git a/source/isaaclab/isaaclab/utils/noise/noise_model.py b/source/isaaclab/isaaclab/utils/noise/noise_model.py index dae36b55c72..d780d916067 100644 --- a/source/isaaclab/isaaclab/utils/noise/noise_model.py +++ b/source/isaaclab/isaaclab/utils/noise/noise_model.py @@ -189,3 +189,71 @@ def __call__(self, data: torch.Tensor) -> torch.Tensor: # now re-sample that expanded bias in-place self.reset() return super().__call__(data) + self._bias + +class ResetSampledNoiseModel(NoiseModel): + """Noise model that samples noise ONLY during reset and applies it consistently. + + The noise is sampled from the configured distribution ONLY during reset and applied consistently + until the next reset. Unlike regular noise that generates new random values every step, + this model maintains the same noise values throughout an episode. + """ + + def __init__(self, noise_model_cfg: noise_cfg.NoiseModelCfg, num_envs: int, device: str): + # initialize parent class + super().__init__(noise_model_cfg, num_envs, device) + # store the noise configuration + self._noise_cfg = noise_model_cfg.noise_cfg + self._sampled_noise = torch.zeros((num_envs, 1), device=self._device) + self._num_components: int | None = None + + def reset(self, env_ids: Sequence[int] | None = None): + """Reset the noise model by sampling NEW noise values. + + This method samples new noise for the specified environments using the configured noise function. + The sampled noise will remain constant until the next reset. + + Args: + env_ids: The environment ids to reset the noise model for. Defaults to None, + in which case all environments are considered. + """ + # resolve the environment ids + if env_ids is None: + env_ids = slice(None) + + # Use the existing noise function to sample new noise + # Create dummy data to sample from the noise function + dummy_data = torch.zeros((env_ids.stop - env_ids.start if isinstance(env_ids, slice) else len(env_ids), 1), + device=self._device) + + # Sample noise using the configured noise function + sampled_noise = self._noise_model_cfg.noise_cfg.func(dummy_data, self._noise_model_cfg.noise_cfg) + + self._sampled_noise[env_ids] = sampled_noise + + def __call__(self, data: torch.Tensor) -> torch.Tensor: + """Apply the pre-sampled noise to the data. + + This method applies the noise that was sampled during the last reset. + No new noise is generated - the same values are used consistently. + + Args: + data: The data to apply the noise to. Shape is (num_envs, ...). + + Returns: + The data with the noise applied. Shape is the same as the input data. + """ + # on first apply, expand noise to match last dim of data + if self._num_components is None: + *_, self._num_components = data.shape + # expand noise from (num_envs,1) to (num_envs, num_components) + self._sampled_noise = self._sampled_noise.repeat(1, self._num_components) + + # apply the noise based on operation + if self._noise_cfg.operation == "add": + return data + self._sampled_noise + elif self._noise_cfg.operation == "scale": + return data * self._sampled_noise + elif self._noise_cfg.operation == "abs": + return self._sampled_noise + else: + raise ValueError(f"Unknown operation in noise: {self._noise_cfg.operation}") diff --git a/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py b/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py index 4433b824235..053f0ac49ef 100644 --- a/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py +++ b/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py @@ -55,47 +55,63 @@ UR10e_CFG = ArticulationCfg( spawn=sim_utils.UsdFileCfg( usd_path=f"{ISAAC_NUCLEUS_DIR}/Robots/UniversalRobots/ur10e/ur10e.usd", + activate_contact_sensors=True, rigid_props=sim_utils.RigidBodyPropertiesCfg( disable_gravity=True, max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, ), articulation_props=sim_utils.ArticulationRootPropertiesCfg( - enabled_self_collisions=False, solver_position_iteration_count=16, solver_velocity_iteration_count=1 + enabled_self_collisions=False, solver_position_iteration_count=192, solver_velocity_iteration_count=1 ), - activate_contact_sensors=False, + collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), ), init_state=ArticulationCfg.InitialStateCfg( joint_pos={ - "shoulder_pan_joint": 3.141592653589793, - "shoulder_lift_joint": -1.5707963267948966, - "elbow_joint": 1.5707963267948966, - "wrist_1_joint": -1.5707963267948966, - "wrist_2_joint": -1.5707963267948966, - "wrist_3_joint": 0.0, + "shoulder_pan_joint": 2.7228e+00, + "shoulder_lift_joint": -8.3962e-01, + "elbow_joint": 1.3684e+00, + "wrist_1_joint": -2.1048e+00, + "wrist_2_joint": -1.5691e+00, + "wrist_3_joint": -1.9896e+00, + "finger_joint": 0.0, }, pos=(0.0, 0.0, 0.0), - rot=(1.0, 0.0, 0.0, 0.0), + rot=(0.0, 0.0, 0.0, 1.0), ), actuators={ # 'shoulder_pan_joint', 'shoulder_lift_joint', 'elbow_joint', 'wrist_1_joint', 'wrist_2_joint', 'wrist_3_joint' "shoulder": ImplicitActuatorCfg( joint_names_expr=["shoulder_.*"], + effort_limit=330.0, + velocity_limit=2.175, stiffness=1320.0, - damping=72.6636085, + damping=72.0, friction=0.0, armature=0.0, ), "elbow": ImplicitActuatorCfg( joint_names_expr=["elbow_joint"], + effort_limit=150.0, + velocity_limit=2.175, stiffness=600.0, - damping=34.64101615, + damping=50.0, friction=0.0, armature=0.0, ), "wrist": ImplicitActuatorCfg( joint_names_expr=["wrist_.*"], + effort_limit=56.0, + velocity_limit=2.175, stiffness=216.0, - damping=29.39387691, + damping=30.0, friction=0.0, armature=0.0, ), @@ -132,34 +148,139 @@ UR10e_ROBOTIQ_GRIPPER_CFG.init_state.joint_pos[".*_inner_finger_pad_joint"] = 0.0 UR10e_ROBOTIQ_GRIPPER_CFG.init_state.joint_pos[".*_outer_.*_joint"] = 0.0 # the major actuator joint for gripper -UR10e_ROBOTIQ_GRIPPER_CFG.actuators["gripper_drive"] = ImplicitActuatorCfg( +UR10e_ROBOTIQ_GRIPPER_CFG.actuators["finger_joint"] = ImplicitActuatorCfg( joint_names_expr=["finger_joint"], - effort_limit_sim=10.0, - velocity_limit_sim=1.0, - stiffness=11.25, - damping=0.1, + effort_limit_sim=1.0, + velocity_limit=2.175, + stiffness=20.0, + damping=8.94, friction=0.0, - armature=0.0, + armature=0.0, # 0.57 ) # the auxiliary actuator joint for gripper -UR10e_ROBOTIQ_GRIPPER_CFG.actuators["gripper_finger"] = ImplicitActuatorCfg( - joint_names_expr=[".*_inner_finger_joint"], +UR10e_ROBOTIQ_GRIPPER_CFG.actuators["others_1"] = ImplicitActuatorCfg( + joint_names_expr=['right_outer_knuckle_joint', 'left_outer_finger_joint'], effort_limit_sim=1.0, - velocity_limit_sim=1.0, - stiffness=0.2, - damping=0.001, + velocity_limit=2.175, + stiffness=0.0, + damping=0.0, friction=0.0, - armature=0.0, + armature=0.0, # 0.57 ) # the passive joints for gripper -UR10e_ROBOTIQ_GRIPPER_CFG.actuators["gripper_passive"] = ImplicitActuatorCfg( - joint_names_expr=[".*_inner_finger_pad_joint", ".*_outer_finger_joint", "right_outer_knuckle_joint"], +UR10e_ROBOTIQ_GRIPPER_CFG.actuators["others_2"] = ImplicitActuatorCfg( + joint_names_expr=['right_outer_finger_joint', 'left_inner_finger_joint', 'right_inner_finger_joint', 'left_inner_finger_pad_joint', 'right_inner_finger_pad_joint'], effort_limit_sim=1.0, - velocity_limit_sim=1.0, - stiffness=0.0, - damping=0.0, + velocity_limit=2.175, + stiffness=400.0, + damping=20.0, friction=0.0, - armature=0.0, + armature=0.0, # 0.57 +) + +UR10e_2f140_CFG = ArticulationCfg( + spawn=sim_utils.UsdFileCfg( + usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaac_ros_gear_insertion/ur10e_robotiq_140_variant.usd", + # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/IsaacARM/Assets/UR10/iakinola/ur10e_robotiq_140_variant.usd", + # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Isaac/Samples/Rigging/Manipulator/import_manipulator/ur10e/ur/ur_gripper.usd", + # rigid_props=sim_utils.RigidBodyPropertiesCfg( + # disable_gravity=True, + # max_depenetration_velocity=5.0, + # ), + activate_contact_sensors=True, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=True, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + articulation_props=sim_utils.ArticulationRootPropertiesCfg( + enabled_self_collisions=False, solver_position_iteration_count=192, solver_velocity_iteration_count=1 + ), + collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + # articulation_props=sim_utils.ArticulationRootPropertiesCfg( + # enabled_self_collisions=False, solver_position_iteration_count=12, solver_velocity_iteration_count=1 + # ), + # activate_contact_sensors=False, + ), + init_state=ArticulationCfg.InitialStateCfg( + joint_pos={ + "shoulder_pan_joint": 2.7228e+00, + "shoulder_lift_joint": -8.3962e-01, + "elbow_joint": 1.3684e+00, + "wrist_1_joint": -2.1048e+00, + "wrist_2_joint": -1.5691e+00, + "wrist_3_joint": -1.9896e+00, + "finger_joint": 0.0, + }, + pos=(0.0, 0.0, 0.0), + # @ireti: Figure out why this rotaion is requried. + # Otherwise the IK during initialization is does not converge. + rot=(0.0, 0.0, 0.0, 1.0), + ), + actuators={ + # @ireti: These values were obtained from 2025-05-30_20-35-06/params/env.yaml + "shoulder": ImplicitActuatorCfg( + joint_names_expr=["shoulder_.*"], + effort_limit=330.0, + velocity_limit=2.175, + stiffness=1320.0, + damping=72.0, + friction=0.0, + armature=0.0, + ), + "elbow": ImplicitActuatorCfg( + joint_names_expr=["elbow_joint"], + effort_limit=150.0, + velocity_limit=2.175, + stiffness=600.0, + damping=50.0, + friction=0.0, + armature=0.0, + ), + "wrist": ImplicitActuatorCfg( + joint_names_expr=["wrist_.*"], + effort_limit=56.0, + velocity_limit=2.175, + stiffness=216.0, + damping=30.0, + friction=0.0, + armature=0.0, + ), + "finger_joint": ImplicitActuatorCfg( + joint_names_expr=["finger_joint"], + effort_limit_sim=1.0, + velocity_limit=2.175, + stiffness=20.0, + damping=8.94, + friction=0.0, + armature=0.0, # 0.57 + ), + "others_1": ImplicitActuatorCfg( + joint_names_expr=['right_outer_knuckle_joint', 'left_outer_finger_joint'], + effort_limit_sim=1.0, + velocity_limit=2.175, + stiffness=0.0, + damping=0.0, + friction=0.0, + armature=0.0, # 0.57 + ), + "others_2": ImplicitActuatorCfg( + joint_names_expr=['right_outer_finger_joint', 'left_inner_finger_joint', 'right_inner_finger_joint', 'left_inner_finger_pad_joint', 'right_inner_finger_pad_joint'], + effort_limit_sim=1.0, + velocity_limit=2.175, + stiffness=400.0, + damping=20.0, + friction=0.0, + armature=0.0, # 0.57 + ), + } ) """Configuration of UR-10E arm with Robotiq_2f_140 gripper.""" diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py index 5b03a7c639b..bfc212f4006 100644 --- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py +++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py @@ -31,6 +31,9 @@ class RslRlPpoActorCriticCfg: noise_std_type: Literal["scalar", "log"] = "scalar" """The type of noise standard deviation for the policy. Default is scalar.""" + state_dependent_std: bool = False + """Whether to use state-dependent standard deviation for the policy. Default is False.""" + actor_obs_normalization: bool = MISSING """Whether to normalize the observation for the actor network.""" diff --git a/source/isaaclab_rl/setup.py b/source/isaaclab_rl/setup.py index f9ddcdb0fa5..1d69ba74a4f 100644 --- a/source/isaaclab_rl/setup.py +++ b/source/isaaclab_rl/setup.py @@ -46,11 +46,10 @@ "rl-games @ git+https://github.com/isaac-sim/rl_games.git@python3.11", "gym", ], # rl-games still needs gym :( - "rsl-rl": ["rsl-rl-lib==3.0.1"], + "rsl-rl": ["rsl-rl-lib==3.1.0"], } # Add the names with hyphens as aliases for convenience EXTRAS_REQUIRE["rl_games"] = EXTRAS_REQUIRE["rl-games"] -EXTRAS_REQUIRE["rsl_rl"] = EXTRAS_REQUIRE["rsl-rl"] # Cumulation of all extra-requires EXTRAS_REQUIRE["all"] = list(itertools.chain.from_iterable(EXTRAS_REQUIRE.values())) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/__init__.py new file mode 100644 index 00000000000..525f8c7d27a --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Assemble 3 gears into a base.""" diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/__init__.py new file mode 100644 index 00000000000..257dea87052 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Configurations for arm-based gear assembly environments.""" + +# We leave this file empty since we don't want to expose any configs in this package directly. +# We still need this file to import the "config" module in the parent package. diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/__init__.py new file mode 100644 index 00000000000..0f3d90bbcad --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/__init__.py @@ -0,0 +1,34 @@ +# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +from . import agents +## +# Register Gym environments. +## + +gym.register( + id="Isaac-GearAssembly-UR10e-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:UR10eGearAssemblyEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10GearAssemblyRNNPPORunnerCfg", + # "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10GearAssemblyPPORunnerCfg", + }, +) + +gym.register( + id="Isaac-GearAssembly-UR10e-Play-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:UR10eGearAssemblyEnvCfg_PLAY", + # "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", + # "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10ReachPPORunnerCfg", + # "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", + }, +) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/__init__.py new file mode 100644 index 00000000000..bcc238c84a9 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py new file mode 100644 index 00000000000..7d74c5e7d8e --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py @@ -0,0 +1,74 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoActorCriticRecurrentCfg, RslRlPpoAlgorithmCfg + + +@configclass +class UR10GearAssemblyPPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 512 + max_iterations = 1500 + save_interval = 10 + experiment_name = "gear_assembly_ur10e" + empirical_normalization = True + policy = RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_hidden_dims=[256, 128, 64], + critic_hidden_dims=[256, 128, 64], + activation="elu", + ) + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.0, + num_learning_epochs=8, + num_mini_batches=8, + learning_rate=5.0e-4, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.008, + max_grad_norm=1.0, + ) + + +@configclass +class UR10GearAssemblyRNNPPORunnerCfg(RslRlOnPolicyRunnerCfg): + seed = 7858 + num_steps_per_env = 512 + max_iterations = 1500 + save_interval = 50 + experiment_name = "gear_assembly_ur10e" + empirical_normalization = True + clip_actions = 1.0 + resume = False + policy = RslRlPpoActorCriticRecurrentCfg( + state_dependent_std=True, + init_noise_std=1.0, + actor_hidden_dims=[256, 128, 64], + critic_hidden_dims=[256, 128, 64], + noise_std_type="log", + activation="elu", + rnn_type="lstm", + rnn_hidden_dim=256, + rnn_num_layers=2, + ) + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.0, + num_learning_epochs=8, + num_mini_batches=16, + learning_rate=5.0e-4, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.008, + max_grad_norm=1.0, + ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py new file mode 100644 index 00000000000..2b48e109d1e --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py @@ -0,0 +1,231 @@ +# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import math +import os +import torch + +from isaaclab.utils import configclass + +from isaaclab.managers import EventTermCfg as EventTerm +from isaaclab.managers import SceneEntityCfg + + +import isaaclab_tasks.manager_based.manipulation.deploy.mdp as mdp +from isaaclab_tasks.manager_based.manipulation.deploy.gear_assembly.gear_assembly_env_cfg import GearAssemblyEnvCfg +from isaaclab.sensors import FrameTransformerCfg, OffsetCfg +from isaaclab.markers.config import FRAME_MARKER_CFG + +import isaaclab_tasks.manager_based.manipulation.deploy.mdp.events as gear_assembly_events + + +## +# Pre-defined configs +## +from isaaclab_assets.robots.universal_robots import UR10e_2f140_CFG # isort: skip + + +## +# Environment configuration +## + +@configclass +class EventCfg: + """Configuration for events.""" + + robot_joint_stiffness_and_damping = EventTerm( + func=mdp.randomize_actuator_gains, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("robot", joint_names=["shoulder_.*", "elbow_.*", "wrist_.*"]), # only the arm joints are randomized + "stiffness_distribution_params": (0.75, 1.5), + "damping_distribution_params": (0.3, 3.0), + "operation": "scale", + "distribution": "log_uniform", + }, + ) + + joint_friction = EventTerm( + func=mdp.randomize_joint_parameters, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("robot", joint_names=["shoulder_.*", "elbow_.*", "wrist_.*"]), + "friction_distribution_params": (0.3, 0.7), + "operation": "add", + "distribution": "uniform", + }, + ) + + medium_gear_physics_material = EventTerm( + func=mdp.randomize_rigid_body_material, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("factory_gear_medium", body_names=".*"), + "static_friction_range": (0.75, 0.75), + "dynamic_friction_range": (0.75, 0.75), + "restitution_range": (0.0, 0.0), + "num_buckets": 16, + }, + ) + gear_base_physics_material = EventTerm( + func=mdp.randomize_rigid_body_material, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("factory_gear_base", body_names=".*"), + "static_friction_range": (0.75, 0.75), + "dynamic_friction_range": (0.75, 0.75), + "restitution_range": (0.0, 0.0), + "num_buckets": 16, + }, + ) + + robot_physics_material = EventTerm( + func=mdp.randomize_rigid_body_material, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("robot", body_names=".*"), + "static_friction_range": (0.75, 0.75), + "dynamic_friction_range": (0.75, 0.75), + "restitution_range": (0.0, 0.0), + "num_buckets": 16, + }, + ) + + randomize_gear_type = EventTerm( + func=gear_assembly_events.randomize_gear_type, + mode="reset", + params={ + # "gear_types": ["gear_small", "gear_medium", "gear_large"] + "gear_types": ["gear_medium"] + }, + ) + + + reset_all = EventTerm(func=mdp.reset_scene_to_default, mode="reset") + + + randomize_gears_and_base_pose = EventTerm( + func=gear_assembly_events.randomize_gears_and_base_pose, + mode="reset", + params={ + "pose_range": { + # "x": [0.0, 0.0], + # "y": [0.0, 0.0], + # "z": [0.0, 0.0], + # "roll": [-0.0, 0.0], # 0 degree + # "pitch": [-0.0, 0.0], # 0 degree + # "yaw": [-0.0, 0.0], # 0 degree + "x": [-0.1, 0.1], + "y": [-0.25, 0.25], + "z": [-0.1, 0.1], + "roll": [-math.pi/90, math.pi/90], # 2 degree + "pitch": [-math.pi/90, math.pi/90], # 2 degree + "yaw": [-math.pi/6, math.pi/6], # 2 degree + }, + "gear_pos_range": { + "x": [-0.02, 0.02], + "y": [-0.02, 0.02], + "z": [0.0575, 0.0775], # 0.045 + 0.0225 + # "x": [-0.0, 0.0], + # "y": [-0.0, 0.0], + # "z": [0.0675, 0.0675], + }, + "rot_randomization_range": { + "roll": [-math.pi/36, math.pi/36], # 5 degree + "pitch": [-math.pi/36, math.pi/36], # 5 degree + "yaw": [-math.pi/36, math.pi/36], # 5 degree + }, + "velocity_range": {}, + }, + ) + + set_robot_to_grasp_pose = EventTerm( + func=gear_assembly_events.set_robot_to_grasp_pose, + mode="reset", + params={ + "robot_asset_cfg": SceneEntityCfg("robot"), + "rot_offset": [0.0, math.sqrt(2)/2, math.sqrt(2)/2, 0.0], + "pos_randomization_range": { + "x": [-0.0, 0.0], + "y": [-0.005, 0.005], + "z": [-0.003, 0.003] + }, + # "pos_randomization_range": None + }, + ) + + + + +@configclass +class UR10eGearAssemblyEnvCfg(GearAssemblyEnvCfg): + def __post_init__(self): + # post init of parent + super().__post_init__() + + # switch robot to ur10 with local asset path + self.scene.robot = UR10e_2f140_CFG.replace( + prim_path="{ENV_REGEX_NS}/Robot", + # spawn=UR10e_2f140_CFG.spawn.replace( + # usd_path=os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), "assets/ur10e_robotiq_140_variant.usd") + # ) + ) + + + self.observations.policy.joint_pos.params["asset_cfg"].joint_names = ["shoulder_pan_joint", "shoulder_lift_joint", "elbow_joint", "wrist_1_joint", "wrist_2_joint", "wrist_3_joint"] + self.observations.policy.joint_vel.params["asset_cfg"].joint_names = ["shoulder_pan_joint", "shoulder_lift_joint", "elbow_joint", "wrist_1_joint", "wrist_2_joint", "wrist_3_joint"] + + # override events + self.events = EventCfg() + + # overriderride command generator body + self.joint_action_scale = 0.025 + self.action_scale_joint_space = [self.joint_action_scale, self.joint_action_scale, self.joint_action_scale, self.joint_action_scale, self.joint_action_scale, self.joint_action_scale] + self.actions.arm_action = mdp.RelativeJointPositionActionCfg( + asset_name="robot", joint_names=["shoulder_pan_joint", "shoulder_lift_joint", "elbow_joint", "wrist_1_joint", "wrist_2_joint", "wrist_3_joint"], scale=self.joint_action_scale, use_zero_offset=True + ) + + self.obs_order = ["arm_dof_pos", "arm_dof_vel", "shaft_pos", "shaft_quat"] + self.policy_action_space = "joint" + self.arm_joint_names = [ + "shoulder_pan_joint", + "shoulder_lift_joint", + "elbow_joint", + "wrist_1_joint", + "wrist_2_joint", + "wrist_3_joint", + ] + self.policy_action_space = "joint" + self.action_space = 6 + self.state_space = 0 + self.observation_space = 19 + + # Set joint_action_scale from the existing arm_action.scale + self.joint_action_scale = self.actions.arm_action.scale + + self.action_scale_joint_space = [ + self.joint_action_scale, + self.joint_action_scale, + self.joint_action_scale, + self.joint_action_scale, + self.joint_action_scale, + self.joint_action_scale, + ] + self.fixed_asset_init_pos_center = (1.0200, -0.2100, -0.1) + self.fixed_asset_init_pos_range = (0.1, 0.25, 0.1) + self.fixed_asset_init_orn_deg = (180.0, 0.0, 90.0) + self.fixed_asset_init_orn_deg_range = (5.0, 5.0, 30.0) + + +@configclass +class UR10eGearAssemblyEnvCfg_PLAY(UR10eGearAssemblyEnvCfg): + def __post_init__(self): + # post init of parent + super().__post_init__() + # make a smaller scene for play + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + # disable randomization for play + self.observations.policy.enable_corruption = False diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py new file mode 100644 index 00000000000..4e438ac979b --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py @@ -0,0 +1,329 @@ +# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from dataclasses import MISSING +import os + +import torch +import isaaclab.sim as sim_utils +from isaaclab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg +from isaaclab.envs import ManagerBasedRLEnvCfg +from isaaclab.sensors import FrameTransformerCfg +from isaaclab.managers import ActionTermCfg as ActionTerm +from isaaclab.managers import CurriculumTermCfg as CurrTerm +from isaaclab.managers import EventTermCfg as EventTerm + +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.managers import ObservationTermCfg as ObsTerm +from isaaclab.managers import RewardTermCfg as RewTerm +from isaaclab.managers import SceneEntityCfg +from isaaclab.managers import TerminationTermCfg as DoneTerm +from isaaclab.sim.simulation_cfg import PhysxCfg, SimulationCfg +from isaaclab.sim.spawners.materials.physics_materials_cfg import RigidBodyMaterialCfg +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.utils import configclass +from isaaclab.utils.noise import ResetSampledNoiseModelCfg, UniformNoiseCfg +import isaaclab_tasks.manager_based.manipulation.deploy.mdp as mdp +from isaaclab_tasks.manager_based.manipulation.deploy.reach.reach_env_cfg import ReachEnvCfg + +from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR + +# Get the directory where this configuration file is located +CONFIG_DIR = os.path.dirname(os.path.abspath(__file__)) +ASSETS_DIR = os.path.join(CONFIG_DIR, "assets") + +## +# Environment configuration +## + +@configclass +class GearAssemblySceneCfg(InteractiveSceneCfg): + """Configuration for the scene with a robotic arm.""" + + # Disable scene replication to allow USD-level randomization + replicate_physics = False + + # world + ground = AssetBaseCfg( + prim_path="/World/ground", + spawn=sim_utils.GroundPlaneCfg(), + init_state=AssetBaseCfg.InitialStateCfg(pos=(0.0, 0.0, -1.05)), + ) + + factory_gear_base = RigidObjectCfg( + prim_path="{ENV_REGEX_NS}/FactoryGearBase", + # TODO: change to common isaac sim directory + spawn=sim_utils.UsdFileCfg( + usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaac_ros_gear_insertion/Factory/Gears_1.5x/factory_gear_base.usd", + # usd_path=os.path.join(ASSETS_DIR, "Factory/Gears_1.5x/factory_gear_base.usd"), + # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Users/ashwinvk@nvidia.com/props/factory_gear_base.usd", + # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaacsim/Props/gear_assembly/gear_base_scale_1.5_usd/gear_base_scale_1.5.usd", + activate_contact_sensors=True, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + kinematic_enabled=True, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=196, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + mass_props=sim_utils.MassPropertiesCfg(mass=None), + collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaacsim/Props/gear_assembly/gear_base_scale_1.5_usd/gear_base_scale_1.5.usd", + # rigid_props=sim_utils.RigidBodyPropertiesCfg(kinematic_enabled=True), + ), + init_state=RigidObjectCfg.InitialStateCfg(pos=(1.0200, -0.2100, -0.1), rot=(-0.70711, 0.0, 0.0, 0.70711)), + ) + + # factory_gear_small = RigidObjectCfg( + # prim_path="{ENV_REGEX_NS}/FactoryGearSmall", + # # TODO: change to common isaac sim directory + # spawn=sim_utils.UsdFileCfg( + # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaac_ros_gear_insertion/Factory/Gears_1.5x/factory_gear_small.usd", + # # usd_path=os.path.join(ASSETS_DIR, "Factory/Gears_1.5x/factory_gear_small.usd"), + # # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Users/ashwinvk@nvidia.com/props/factory_gear_small.usd", + # # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaacsim/Props/gear_assembly/small_gear_scale_1p5_usd/small_gear_scale_1p5.usd", + # activate_contact_sensors=True, + # rigid_props=sim_utils.RigidBodyPropertiesCfg( + # disable_gravity=False, + # kinematic_enabled=False, + # max_depenetration_velocity=5.0, + # linear_damping=0.0, + # angular_damping=0.0, + # max_linear_velocity=1000.0, + # max_angular_velocity=3666.0, + # enable_gyroscopic_forces=True, + # solver_position_iteration_count=196, + # solver_velocity_iteration_count=1, + # max_contact_impulse=1e32, + # ), + # mass_props=sim_utils.MassPropertiesCfg(mass=0.019), + # collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + # ), + # init_state=RigidObjectCfg.InitialStateCfg(pos=(1.0200, -0.2100, -0.1), rot=(-0.70711, 0.0, 0.0, 0.70711)), + # ) + + factory_gear_medium = RigidObjectCfg( + prim_path="{ENV_REGEX_NS}/FactoryGearMedium", + # TODO: change to common isaac sim directory + spawn=sim_utils.UsdFileCfg( + usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaac_ros_gear_insertion/Factory/Gears_1.5x/factory_gear_medium.usd", + # usd_path=os.path.join(ASSETS_DIR, "Factory/Gears_1.5x/factory_gear_medium.usd"), + # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Users/ashwinvk@nvidia.com/props/factory_gear_medium.usd", + # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaacsim/Props/gear_assembly/medium_gear_scale_1p5_usd/medium_gear_scale_1p5.usd", + activate_contact_sensors=True, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + kinematic_enabled=False, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=196, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + mass_props=sim_utils.MassPropertiesCfg(mass=0.019), + collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + ), + init_state=RigidObjectCfg.InitialStateCfg(pos=(1.0200, -0.2100, -0.1), rot=(-0.70711, 0.0, 0.0, 0.70711)), + ) + + # factory_gear_large = RigidObjectCfg( + # prim_path="{ENV_REGEX_NS}/FactoryGearLarge", + # # TODO: change to common isaac sim directory + # spawn=sim_utils.UsdFileCfg( + # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaac_ros_gear_insertion/Factory/Gears_1.5x/factory_gear_large.usd", + # # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Users/ashwinvk@nvidia.com/props/factory_gear_large.usd", + # # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaacsim/Props/gear_assembly/large_gear_scale_1p5_usd/large_gear_scale_1p5.usd", + # activate_contact_sensors=True, + # rigid_props=sim_utils.RigidBodyPropertiesCfg( + # disable_gravity=False, + # kinematic_enabled=False, + # max_depenetration_velocity=5.0, + # linear_damping=0.0, + # angular_damping=0.0, + # max_linear_velocity=1000.0, + # max_angular_velocity=3666.0, + # enable_gyroscopic_forces=True, + # solver_position_iteration_count=196, + # solver_velocity_iteration_count=1, + # max_contact_impulse=1e32, + # ), + # mass_props=sim_utils.MassPropertiesCfg(mass=0.019), + # collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + # ), + # init_state=RigidObjectCfg.InitialStateCfg(pos=(1.0200, -0.2100, -0.1), rot=(-0.70711, 0.0, 0.0, 0.70711)), + # ) + + + # robots + robot: ArticulationCfg = MISSING + + + # lights + light = AssetBaseCfg( + prim_path="/World/light", + spawn=sim_utils.DomeLightCfg(color=(0.75, 0.75, 0.75), intensity=2500.0), + ) + +@configclass +class ActionsCfg: + """Action specifications for the MDP.""" + + arm_action: ActionTerm = MISSING + gripper_action: ActionTerm | None = None + + +@configclass +class ObservationsCfg: + """Observation specifications for the MDP.""" + + @configclass + class PolicyCfg(ObsGroup): + """Observations for policy group.""" + + # observation terms (order preserved) + joint_pos = ObsTerm(func=mdp.joint_pos, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])}) + joint_vel = ObsTerm(func=mdp.joint_vel, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])}) + gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w, noise=ResetSampledNoiseModelCfg( + noise_cfg=UniformNoiseCfg(n_min=-0.005, n_max=0.005, operation="add") + )) + gear_shaft_quat = ObsTerm(func=mdp.gear_shaft_quat_w) + + def __post_init__(self): + self.enable_corruption = True + self.concatenate_terms = True + + # @configclass + # class CriticCfg(ObsGroup): + # """Observations for policy group.""" + + # # observation terms (order preserved) + # joint_pos = ObsTerm(func=mdp.joint_pos, + # params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])}) + # joint_vel = ObsTerm(func=mdp.joint_vel, + # params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])}) + # # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w, + # # noise=AdditiveUniformNoiseModelCfg(n_min=-0.0025, n_max=0.0025)) + # # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w, noise=Unoise(n_min=-0.005, n_max=0.005, operation="add")) + # # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w) + # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w) + # gear_shaft_quat = ObsTerm(func=mdp.gear_shaft_quat_w) + + # gear_pos = ObsTerm(func=mdp.gear_pos_w) + # gear_quat = ObsTerm(func=mdp.gear_quat_w) + + # observation groups + policy: PolicyCfg = PolicyCfg() + # critic: CriticCfg = CriticCfg() + + +@configclass +class EventCfg: + """Configuration for events.""" + + reset_all = EventTerm(func=mdp.reset_scene_to_default, mode="reset") + + reset_gear = EventTerm( + func=mdp.reset_root_state_uniform, + mode="reset", + params={ + "pose_range": { + "x": [-0.05, 0.05], + "y": [-0.05, 0.05], + "z": [0.1, 0.15], + }, + "velocity_range": {}, + "asset_cfg": SceneEntityCfg("factory_gear_small"), + }, + ) + + +@configclass +class RewardsCfg: + """Reward terms for the MDP.""" + + end_effector_gear_keypoint_tracking = RewTerm( + func=mdp.keypoint_entity_error, + weight=-1.5, + params={ + "asset_cfg_1": SceneEntityCfg("factory_gear_base"), + "keypoint_scale": 0.15, + }, + ) + + end_effector_gear_keypoint_tracking_exp = RewTerm( + func=mdp.keypoint_entity_error_exp, + weight=1.5, + params={ + "asset_cfg_1": SceneEntityCfg("factory_gear_base"), + "kp_exp_coeffs": [(50, 0.0001), (300, 0.0001)], + "kp_use_sum_of_exps": False, + "keypoint_scale": 0.15, + }, + ) + + action_rate = RewTerm(func=mdp.action_rate, weight=-5.0e-06) + + +@configclass +class TerminationsCfg: + """Termination terms for the MDP.""" + + time_out = DoneTerm(func=mdp.time_out, time_out=True) + +@configclass +class GearAssemblyEnvCfg(ManagerBasedRLEnvCfg): + # Scene settings + scene: GearAssemblySceneCfg = GearAssemblySceneCfg(num_envs=4096, env_spacing=2.5) + # Basic settings + observations: ObservationsCfg = ObservationsCfg() + actions: ActionsCfg = ActionsCfg() + # MDP settings + rewards: RewardsCfg = RewardsCfg() + terminations: TerminationsCfg = TerminationsCfg() + events: EventCfg = EventCfg() + sim: SimulationCfg = SimulationCfg( + + physx=PhysxCfg( + gpu_collision_stack_size=2**31, # Important to prevent collisionStackSize buffer overflow in contact-rich environments. + gpu_max_rigid_contact_count=2**23, + gpu_max_rigid_patch_count=2**23 + ), + ) + + def __post_init__(self): + """Post initialization.""" + # general settings + self.episode_length_s = 6.66 + self.viewer.eye = (3.5, 3.5, 3.5) + # simulation settings + self.decimation = 2 + self.sim.render_interval = self.decimation + self.sim.dt = 1.0 / 120.0 + + self.hand_close_pos = 0.7 + + self.gear_offsets = {'gear_small': [0.076125, 0.0, 0.0], + 'gear_medium': [0.030375, 0.0, 0.0], + 'gear_large': [-0.045375, 0.0, 0.0]} + + self.gear_offsets_grasp = {'gear_small': [0.0, 0.076125, -0.26], + 'gear_medium': [0.0, 0.030375, -0.26], + 'gear_large': [0.0, -0.045375, -0.26]} + + self.hand_grasp_pos = {"gear_small": 0.67, + "gear_medium": 0.54, + "gear_large": 0.52} diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/__init__.py index 6686f9f5276..57059c62dac 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/__init__.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/__init__.py @@ -8,3 +8,5 @@ from isaaclab.envs.mdp import * # noqa: F401, F403 from .rewards import * # noqa: F401, F403 +from .events import * # noqa: F401, F403 +from .observations import * # noqa: F401, F403 diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/events.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/events.py new file mode 100644 index 00000000000..1e70454510e --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/events.py @@ -0,0 +1,451 @@ +# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Functions specific to the in-hand dexterous manipulation environments.""" + + +from __future__ import annotations + +import carb +import time +import torch +from typing import TYPE_CHECKING, Literal, Optional, List + +from isaaclab.assets import Articulation, RigidObject +from isaaclab.managers import EventTermCfg, ManagerTermBase, SceneEntityCfg +from isaaclab.utils.math import sample_uniform +import isaaclab.utils.math as math_utils +import isaaclab.sim as sim_utils + +from isaaclab_tasks.direct.automate import factory_control as fc +import random + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedEnv + + +def set_default_joint_pose( + env: ManagerBasedEnv, + env_ids: torch.Tensor, + default_pose: torch.Tensor, + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), +): + # Set the default pose for robots in all envs + asset = env.scene[asset_cfg.name] + asset.data.default_joint_pos = torch.tensor(default_pose, device=env.device).repeat(env.num_envs, 1) + +def randomize_gear_type( + env: ManagerBasedEnv, + env_ids: torch.Tensor, + gear_types: List[str] = ["gear_small", "gear_medium", "gear_large"] +): + """Randomize the gear type being used for the specified environments. + + Args: + env: The environment containing the assets + env_ids: Environment IDs to randomize + gear_types: List of available gear types to choose from + """ + # Randomly select gear type for each environment + selected_gears = [random.choice(gear_types) for _ in range(len(env_ids))] + + # Store the selected gear type in the environment instance + # This will be used by the observation functions + if not hasattr(env, '_current_gear_type'): + env._current_gear_type = ["gear_medium"] * env.num_envs + + for i, env_id in enumerate(env_ids): + env._current_gear_type[env_id] = selected_gears[i] + # print(f"env._current_gear_type: {env._current_gear_type}") + +def set_robot_to_grasp_pose( + env: ManagerBasedEnv, + env_ids: torch.Tensor, + robot_asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), + pos_threshold: float = 1e-6, + rot_threshold: float = 1e-6, + max_iterations: int = 10, + rot_offset: Optional[list[float]] = None, + pos_randomization_range: Optional[dict] = None, + num_arm_joints: int = 6 +): + # Convert rotation offset to tensor if provided + if rot_offset is not None: + rot_offset_tensor = torch.tensor(rot_offset, device=env.device).unsqueeze(0).expand(len(env_ids), -1) + else: + rot_offset_tensor = None + + # Check if environment has gear type selection + if not hasattr(env, '_current_gear_type'): + raise ValueError("Environment does not have '_current_gear_type' attribute. Ensure randomize_gear_type event is configured.") + + i = 0 + robot_asset: Articulation = env.scene[robot_asset_cfg.name] + joint_pos = robot_asset.data.joint_pos[env_ids].clone() + joint_vel = robot_asset.data.joint_vel[env_ids].clone() + + while i < max_iterations: + robot_asset: Articulation = env.scene[robot_asset_cfg.name] + # Add gaussian noise to joint states + joint_pos = robot_asset.data.joint_pos[env_ids].clone() + joint_vel = robot_asset.data.joint_vel[env_ids].clone() + + # Get gear positions and orientations for each environment based on selected gear type + env_ids_list = env_ids.tolist() if isinstance(env_ids, torch.Tensor) else list(env_ids) + grasp_object_pos_world = torch.zeros(len(env_ids), 3, device=env.device) + grasp_object_quat = torch.zeros(len(env_ids), 4, device=env.device) + + for row_idx, env_id in enumerate(env_ids_list): + # Get the gear type for this environment + gear_key = env._current_gear_type[env_id] + selected_asset_name = f"factory_{gear_key}" + + # Get the gear asset for this environment + gear_asset: RigidObject = env.scene[selected_asset_name] + grasp_object_pos_world[row_idx] = gear_asset.data.root_link_pos_w[env_id].clone() + grasp_object_quat[row_idx] = gear_asset.data.root_link_quat_w[env_id].clone() + + + # First apply rotation offset to get the object's orientation + if rot_offset_tensor is not None: + # Apply rotation offset by quaternion multiplication + # rot_offset is assumed to be in quaternion format (w, x, y, z) + grasp_object_quat = math_utils.quat_mul(grasp_object_quat, rot_offset_tensor) + # print(f"Applied rot_offset: {rot_offset}") + # print(f"grasp_object_quat after offset: {grasp_object_quat}") + + if pos_randomization_range is not None: + pos_keys = ["x", "y", "z"] + range_list_pos = [pos_randomization_range.get(key, (0.0, 0.0)) for key in pos_keys] + ranges_pos = torch.tensor(range_list_pos, device=env.device) + rand_pos_offsets = math_utils.sample_uniform(ranges_pos[:, 0], ranges_pos[:, 1], (len(env_ids), 3), device=env.device) + # Apply gear-specific grasp offsets to each environment + for row_idx, env_id in enumerate(env_ids_list): + gear_key = env._current_gear_type[env_id] + gear_grasp_offset = env.cfg.gear_offsets_grasp[gear_key] + grasp_offset_tensor = torch.tensor(gear_grasp_offset, device=env.device, dtype=grasp_object_pos_world.dtype) + + if pos_randomization_range is not None: + grasp_offset_tensor += rand_pos_offsets[row_idx] + + # Transform position offset from rotated object frame to world frame + grasp_object_pos_world[row_idx] = grasp_object_pos_world[row_idx] + math_utils.quat_apply( + grasp_object_quat[row_idx:row_idx+1], grasp_offset_tensor.unsqueeze(0) + )[0] + + # Convert to environment-relative coordinates by subtracting environment origins + grasp_object_pos = grasp_object_pos_world + + # Get end effector pose of the robot + # Get the specific wrist_3_link pose + try: + # Find the index of the wrist_3_link body + wrist_3_indices, wrist_3_names = robot_asset.find_bodies(["wrist_3_link"]) + wrist_3_idx = wrist_3_indices[0] + + if len(wrist_3_indices) == 1: + wrist_3_idx = wrist_3_indices[0] # Get the first (and should be only) index + + # Get the specific wrist_3_link pose + eef_pos_world = robot_asset.data.body_link_pos_w[env_ids, wrist_3_idx] # Shape: (len(env_ids), 3) + eef_quat = robot_asset.data.body_link_quat_w[env_ids, wrist_3_idx] # Shape: (len(env_ids), 4) + + # Convert to environment-relative coordinates + eef_pos = eef_pos_world + + + # You can also get the full pose as [pos, quat] (7-dimensional) + eef_pos = robot_asset.data.body_pos_w[env_ids, wrist_3_idx] + eef_quat = robot_asset.data.body_quat_w[env_ids, wrist_3_idx] + # print(f"eef_pos: {eef_pos}") + # print(f"eef_quat: {eef_quat}") + elif len(wrist_3_indices) > 1: + print("wrist_3_link found multiple times in robot body names") + print(f"Available body names: {robot_asset.body_names}") + else: + print("wrist_3_link not found in robot body names") + print(f"Available body names: {robot_asset.body_names}") + + except Exception as e: + print(f"Could not get end effector pose: {e}") + print("You may need to adjust the body name or access method based on your robot configuration") + + + # Compute error to target using wrist_3_link as current and grasp_object as target + if len(wrist_3_indices) > 0: + # Get current end effector pose (wrist_3_link) + current_eef_pos = eef_pos # wrist_3_link position + current_eef_quat = eef_quat # wrist_3_link orientation + + # Get target pose (grasp object) + target_eef_pos = grasp_object_pos # grasp object position + target_eef_quat = grasp_object_quat # grasp object orientation + + # print(f"current_eef_pos: {current_eef_pos[0]}") + # print(f"current_eef_quat: {current_eef_quat[0]}") + # print(f"target_eef_pos: {target_eef_pos[0]}") + # print(f"target_eef_quat: {target_eef_quat[0]}") + + # Compute pose error + pos_error, axis_angle_error = fc.get_pose_error( + fingertip_midpoint_pos=current_eef_pos, + fingertip_midpoint_quat=current_eef_quat, + ctrl_target_fingertip_midpoint_pos=target_eef_pos, + ctrl_target_fingertip_midpoint_quat=target_eef_quat, + jacobian_type='geometric', + rot_error_type='axis_angle') + delta_hand_pose = torch.cat((pos_error, axis_angle_error), dim=-1) + # Check if pose error is within threshold + pos_error_norm = torch.norm(pos_error, dim=-1) + rot_error_norm = torch.norm(axis_angle_error, dim=-1) + + # print(f"Pose error - position: {pos_error[0]}") + # print(f"Pose error - orientation: {axis_angle_error[0]}") + # print(f"Position error norm: {pos_error_norm[0]}") + # print(f"Rotation error norm: {rot_error_norm[0]}") + + # Check if all environments have converged + if torch.all(pos_error_norm < pos_threshold) and torch.all(rot_error_norm < rot_threshold): + # print(f"Converged after {i} iterations!") + # print(f"pos_error_norm: {pos_error_norm}") + # print(f"rot_error_norm: {rot_error_norm}") + # print(f"pos_error: {pos_error}") + # print(f"axis_angle_error: {axis_angle_error}") + break + + # Solve DLS problem for inverse kinematics + # Get jacobian for the wrist_3_link using the same approach as task_space_actions.py + try: + # Get all jacobians from the robot + jacobians = robot_asset.root_physx_view.get_jacobians().clone() + + # Select the jacobian for the wrist_3_link body + # For fixed-base robots, Jacobian excludes the base body (index 0) + # So if wrist_3_idx is 6, the Jacobian index should be 5 + jacobi_body_idx = wrist_3_idx - 1 + + + jacobian = jacobians[env_ids, jacobi_body_idx, :, :] # Only first 6 joints (arm, not gripper) + + delta_dof_pos = fc._get_delta_dof_pos( + delta_pose=delta_hand_pose, + ik_method="dls", + jacobian=jacobian, + device=env.device, + ) + + # Update joint positions - only update arm joints (first 6) + joint_pos += delta_dof_pos + joint_vel = torch.zeros_like(joint_pos) + + # Set into the physics simulation + + + except Exception as e: + print(f"Error in IK computation: {e}") + print("Note: You may need to implement proper jacobian computation for your robot") + + # Set into the physics simulation + robot_asset.set_joint_position_target(joint_pos, env_ids=env_ids) + robot_asset.set_joint_velocity_target(joint_vel, env_ids=env_ids) + robot_asset.write_joint_state_to_sim(joint_pos, joint_vel, env_ids=env_ids) + + + # print(f"delta_dof_pos.abs().max(): {delta_dof_pos.abs().max()}") + + # if delta_dof_pos.abs().max() < 1e-5: + # break + + # print(f"Step {i}") + + i += 1 + # if i >= max_iterations: + # print(f"IK did not converge after {max_iterations} iterations") + # # print(f"pos_error_norm: {pos_error_norm}") + # # print(f"rot_error_norm: {rot_error_norm}") + # # print(f"pos_error: {pos_error}") + # # print(f"axis_angle_error: {axis_angle_error}") + # # raise RuntimeError(f"IK did not converge after {max_iterations} iterations") + + # # print(f"delta_dof_pos.abs().max(): {delta_dof_pos.abs().max()}") + # for i in range(10): + # env.sim.render() + # input("Going to set object position...") + + # TODO: Resttting the gear based on teh IK solution is not working as expected. The gear is not being placed in the correct position. + # # Set the grasp object's pose to the current end-effector pose in world coordinates (with optional offsets) + # # The offsets should be applied in reverse to get from wrist_3_link to gear pose + # gear_pos_world = current_eef_pos + # gear_quat_world = current_eef_quat + + # # Apply position offset (subtract to get from wrist_3_link to gear position) + # if pos_offset_tensor is not None: + # gear_pos_world = gear_pos_world - pos_offset_tensor + + # # Apply rotation offset (inverse to get from wrist_3_link to gear orientation) + # if rot_offset_tensor is not None: + # # Apply inverse rotation offset + # gear_quat_world = math_utils.quat_mul(current_eef_quat, math_utils.quat_conjugate(rot_offset_tensor)) + + # for i in range(10): + # env.sim.render() + # input("Press Enter to continue 1...") + + # # Set the grasp object's pose for each environment based on selected gear type + # for row_idx, env_id in enumerate(env_ids_list): + # gear_key = env._current_gear_type[env_id] + # selected_asset_name = f"factory_{gear_key}" + # gear_asset: RigidObject = env.scene[selected_asset_name] + # gear_asset.write_root_pose_to_sim( + # torch.cat([gear_pos_world[row_idx:row_idx+1], gear_quat_world[row_idx:row_idx+1]], dim=-1), + # env_ids=torch.tensor([env_id], device=env.device) + # ) + + # Close the gripper by setting the finger_joint based on gear type + all_joints, all_joints_names = robot_asset.find_joints([".*"]) + + joint_pos = robot_asset.data.joint_pos[env_ids].clone() + + # @ireti: We need to change this so it workes with teh public 2f140 gripper that uses mimic joints + finger_joints = all_joints[num_arm_joints:] + finger_joint_names = all_joints_names[num_arm_joints:] + + # for i in range(10): + # env.sim.render() + # input("Press Enter to continue 2...") + + hand_grasp_pos = env.cfg.hand_grasp_pos[gear_key] + set_finger_joint_pos_2f_140(joint_pos, env_ids_list, finger_joints, hand_grasp_pos) + + robot_asset.set_joint_position_target(joint_pos, joint_ids=all_joints, env_ids=env_ids) + robot_asset.write_joint_state_to_sim(joint_pos, joint_vel, env_ids=env_ids) + + # for i in range(10): + # env.sim.render() + # input("Press Enter to continue 3...") + + # Set finger joints based on gear type for each environment + hand_close_pos = env.cfg.hand_close_pos + set_finger_joint_pos_2f_140(joint_pos, env_ids_list, finger_joints, hand_close_pos) + + robot_asset.set_joint_position_target(joint_pos, joint_ids=all_joints, env_ids=env_ids) + + # for i in range(10): + # env.sim.render() + # input("Press Enter to continue 4...") + +def randomize_gears_and_base_pose( + env: ManagerBasedEnv, + env_ids: torch.Tensor, + pose_range: dict = {}, + velocity_range: dict = {}, + gear_pos_range: dict = {}, + rot_randomization_range: dict = {}, + +): + """Randomize both the gear base pose and the poses of all gear types with the same value, + then apply the per-env `gear_pos_range` only to the gear selected by `randomize_gear_type`. + """ + if not hasattr(env, '_current_gear_type'): + raise ValueError("Environment does not have '_current_gear_type' attribute. Ensure randomize_gear_type event is configured.") + + device = env.device + + # Shared pose samples for all assets (base and all gears) + pose_keys = ["x", "y", "z", "roll", "pitch", "yaw"] + range_list_pose = [pose_range.get(key, (0.0, 0.0)) for key in pose_keys] + ranges_pose = torch.tensor(range_list_pose, device=device) + rand_pose_samples = math_utils.sample_uniform(ranges_pose[:, 0], ranges_pose[:, 1], (len(env_ids), 6), device=device) + + orientations_delta = math_utils.quat_from_euler_xyz( + rand_pose_samples[:, 3], rand_pose_samples[:, 4], rand_pose_samples[:, 5] + ) + + # Shared velocity samples for all assets (base and all gears) + range_list_vel = [velocity_range.get(key, (0.0, 0.0)) for key in pose_keys] + ranges_vel = torch.tensor(range_list_vel, device=device) + rand_vel_samples = math_utils.sample_uniform(ranges_vel[:, 0], ranges_vel[:, 1], (len(env_ids), 6), device=device) + + # Prepare assets: base + all possible gears (only those present in scene will be processed) + base_asset_name = "factory_gear_base" + possible_gear_assets = [ + # "factory_gear_small", + "factory_gear_medium", + # "factory_gear_large", + # "table", + ] + + + positions_by_asset = {} + orientations_by_asset = {} + velocities_by_asset = {} + + # Combine base and gear assets into one pass to avoid duplication + asset_names_to_process = [base_asset_name] + possible_gear_assets + for asset_name in asset_names_to_process: + asset: RigidObject | Articulation = env.scene[asset_name] + root_states = asset.data.default_root_state[env_ids].clone() + positions = root_states[:, 0:3] + env.scene.env_origins[env_ids] + rand_pose_samples[:, 0:3] + orientations = math_utils.quat_mul(root_states[:, 3:7], orientations_delta) + velocities = root_states[:, 7:13] + rand_vel_samples + positions_by_asset[asset_name] = positions + orientations_by_asset[asset_name] = orientations + velocities_by_asset[asset_name] = velocities + + # Per-env gear offset (gear_pos_range) applied only to the selected gear + range_list_gear = [gear_pos_range.get(key, (0.0, 0.0)) for key in ["x", "y", "z"]] + ranges_gear = torch.tensor(range_list_gear, device=device) + rand_gear_offsets = math_utils.sample_uniform(ranges_gear[:, 0], ranges_gear[:, 1], (len(env_ids), 3), device=device) + + # Per-env gear orientation offset (rot_randomization_range) applied only to the selected gear + range_list_rot = [rot_randomization_range.get(key, (0.0, 0.0)) for key in ["roll", "pitch", "yaw"]] + ranges_rot = torch.tensor(range_list_rot, device=device) + rand_rot_offsets = math_utils.sample_uniform(ranges_rot[:, 0], ranges_rot[:, 1], (len(env_ids), 3), device=device) + rand_rot_quats = math_utils.quat_from_euler_xyz(rand_rot_offsets[:, 0], rand_rot_offsets[:, 1], rand_rot_offsets[:, 2]) + + env_ids_list = env_ids.tolist() if isinstance(env_ids, torch.Tensor) else list(env_ids) + for row_idx, env_id in enumerate(env_ids_list): + # env._current_gear_type[env_id] is expected to be one of: "gear_small", "gear_medium", "gear_large" + gear_key = env._current_gear_type[env_id] + selected_asset_name = f"factory_{gear_key}" + if selected_asset_name in positions_by_asset: + positions_by_asset[selected_asset_name][row_idx] = ( + positions_by_asset[selected_asset_name][row_idx] + rand_gear_offsets[row_idx] + ) + # Apply additional orientation randomization to the selected gear + orientations_by_asset[selected_asset_name][row_idx] = math_utils.quat_mul( + orientations_by_asset[selected_asset_name][row_idx], rand_rot_quats[row_idx] + ) + + # Write back to sim for all prepared assets + for asset_name in positions_by_asset.keys(): + asset = env.scene[asset_name] + positions = positions_by_asset[asset_name] + orientations = orientations_by_asset[asset_name] + velocities = velocities_by_asset[asset_name] + asset.write_root_pose_to_sim(torch.cat([positions, orientations], dim=-1), env_ids=env_ids) + asset.write_root_velocity_to_sim(velocities, env_ids=env_ids) + +def set_finger_joint_pos_2f_140( + joint_pos: torch.Tensor, + env_ids_list: list[int], + finger_joints: list[int], + finger_joint_position: float, +): + # Get hand close positions for each environment based on gear type + for row_idx, env_id in enumerate(env_ids_list): + + joint_pos[row_idx, finger_joints[0]] = finger_joint_position + joint_pos[row_idx, finger_joints[1]] = finger_joint_position + + # outer finger joints + joint_pos[row_idx, finger_joints[2]] = 0 + joint_pos[row_idx, finger_joints[3]] = 0 + + joint_pos[row_idx, finger_joints[4]] = -finger_joint_position + joint_pos[row_idx, finger_joints[5]] = -finger_joint_position + + joint_pos[row_idx, finger_joints[6]] = finger_joint_position + joint_pos[row_idx, finger_joints[7]] = finger_joint_position \ No newline at end of file diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/observations.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/observations.py new file mode 100644 index 00000000000..a434018bea0 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/observations.py @@ -0,0 +1,191 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +import torch +from typing import TYPE_CHECKING + +import isaaclab.utils.math as math_utils +from isaaclab.assets import RigidObject +from isaaclab.managers import SceneEntityCfg +from isaacsim.core.utils.torch.transformations import tf_combine + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + + +def gear_shaft_pos_w( + env: ManagerBasedRLEnv, + asset_cfg: SceneEntityCfg = SceneEntityCfg("factory_gear_base") +) -> torch.Tensor: + """Gear shaft position in world frame with offset applied. + + Args: + env: The environment containing the assets + asset_cfg: Configuration of the gear base asset + + Returns: + Gear shaft position tensor of shape (num_envs, 3) + """ + # extract the asset (to enable type hinting) + asset: RigidObject = env.scene[asset_cfg.name] + + # get base gear position and orientation + base_pos = asset.data.root_pos_w + base_quat = asset.data.root_quat_w + + + # get current gear type from environment, use default if not set + if not hasattr(env, '_current_gear_type'): + print("Environment does not have attribute '_current_gear_type'. Using default_gear_type from configuration.") + default_gear_type = getattr(env.cfg, 'default_gear_type', 'gear_medium') + current_gear_type = [default_gear_type] * env.num_envs + else: + current_gear_type = env._current_gear_type # type: ignore + + # get offset for the specific gear type + gear_offsets = getattr(env.cfg, 'gear_offsets', {}) + + # Initialize shaft positions + shaft_pos = torch.zeros_like(base_pos) + + # Apply offsets for each environment based on their gear type + for i in range(env.num_envs): + # gear_type = current_gear_type[i] if i < len(current_gear_type) else "gear_medium" + gear_type = current_gear_type[i] + + offset = torch.tensor(gear_offsets[gear_type], device=base_pos.device, dtype=base_pos.dtype) + offset_pos = offset.unsqueeze(0) + shaft_pos[i] = base_pos[i] + offset_pos[0] + # Apply position and rotation offsets if provided + # create identity quaternion + rot_offset_tensor = torch.tensor([1.0, 0.0, 0.0, 0.0], device=base_pos.device, dtype=base_pos.dtype).unsqueeze(0) + _, shaft_pos[i] = tf_combine( + base_quat[i:i+1], base_pos[i:i+1], + rot_offset_tensor, offset_pos + ) + + return shaft_pos - env.scene.env_origins + + +def gear_shaft_quat_w( + env: ManagerBasedRLEnv, + asset_cfg: SceneEntityCfg = SceneEntityCfg("factory_gear_base"), +) -> torch.Tensor: + """Gear shaft orientation in world frame. + + Args: + env: The environment containing the assets + asset_cfg: Configuration of the gear base asset + gear_type: Type of gear ('gear_small', 'gear_medium', 'gear_large') + + Returns: + Gear shaft orientation tensor of shape (num_envs, 4) + """ + # extract the asset (to enable type hinting) + asset: RigidObject = env.scene[asset_cfg.name] + + + # get base quaternion + base_quat = asset.data.root_quat_w + + # ensure w component is positive for each environment + # if w is negative, negate the entire quaternion to maintain same orientation + w_negative = base_quat[:, 0] < 0 + positive_quat = base_quat.clone() + positive_quat[w_negative] = -base_quat[w_negative] + + return positive_quat + +def gear_pos_w( + env: ManagerBasedRLEnv, +) -> torch.Tensor: + """Gear position in world frame. + + Args: + env: The environment containing the assets + + Returns: + Gear position tensor of shape (num_envs, 3) + """ + # get current gear type from environment, use default if not set + if not hasattr(env, '_current_gear_type'): + print("Environment does not have attribute '_current_gear_type'. Using default_gear_type from configuration.") + default_gear_type = getattr(env.cfg, 'default_gear_type', 'gear_medium') + current_gear_type = [default_gear_type] * env.num_envs + else: + current_gear_type = env._current_gear_type # type: ignore + + # Create a mapping from gear type to asset name + gear_to_asset_mapping = { + 'gear_small': 'factory_gear_small', + 'gear_medium': 'factory_gear_medium', + 'gear_large': 'factory_gear_large' + } + + # Initialize positions array + gear_positions = torch.zeros((env.num_envs, 3), device=env.device) + + # Get positions for each environment based on their gear type + for i in range(env.num_envs): + gear_type = current_gear_type[i] + asset_name = gear_to_asset_mapping.get(gear_type) + + asset: RigidObject = env.scene[asset_name] + + # Get position for this specific environment + gear_positions[i] = asset.data.root_pos_w[i] + + return gear_positions + +def gear_quat_w( + env: ManagerBasedRLEnv, +) -> torch.Tensor: + """Gear orientation in world frame. + + Args: + env: The environment containing the assets + + Returns: + Gear orientation tensor of shape (num_envs, 4) + """ + # get current gear type from environment, use default if not set + if not hasattr(env, '_current_gear_type'): + print("Environment does not have attribute '_current_gear_type'. Using default_gear_type from configuration.") + default_gear_type = getattr(env.cfg, 'default_gear_type', 'gear_medium') + current_gear_type = [default_gear_type] * env.num_envs + else: + current_gear_type = env._current_gear_type # type: ignore + + # Create a mapping from gear type to asset name + gear_to_asset_mapping = { + 'gear_small': 'factory_gear_small', + 'gear_medium': 'factory_gear_medium', + 'gear_large': 'factory_gear_large' + } + + # Initialize quaternions array + gear_positive_quat = torch.zeros((env.num_envs, 4), device=env.device) + + # Get quaternions for each environment based on their gear type + for i in range(env.num_envs): + gear_type = current_gear_type[i] + asset_name = gear_to_asset_mapping.get(gear_type) + + # Get the asset for this specific gear type + asset: RigidObject = env.scene[asset_name] + + # Get quaternion for this specific environment + gear_quat = asset.data.root_quat_w[i] + + # ensure w component is positive for each environment + # if w is negative, negate the entire quaternion to maintain same orientation + if gear_quat[0] < 0: + gear_quat = -gear_quat + + gear_positive_quat[i] = gear_quat + + return gear_positive_quat \ No newline at end of file diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py index 0d14620e225..8e44e65f416 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py @@ -12,6 +12,7 @@ from isaaclab.managers import SceneEntityCfg from isaaclab.sensors.frame_transformer.frame_transformer import FrameTransformer +from isaaclab.utils.math import quat_mul if TYPE_CHECKING: from isaaclab.envs import ManagerBasedRLEnv @@ -229,3 +230,289 @@ def keypoint_command_error_exp( keypoint_reward_exp += 1.0 / (torch.exp(a * keypoint_dist) + b + torch.exp(-a * keypoint_dist)) return keypoint_reward_exp + +def keypoint_entity_error( + env: ManagerBasedRLEnv, + asset_cfg_1: SceneEntityCfg, + keypoint_scale: float = 1.0, + add_cube_center_kp: bool = True, + quat_offset: torch.Tensor | None = None, + pos_offset: torch.Tensor | None = None +) -> torch.Tensor: + """Compute keypoint distance between a RigidObject and the dynamically selected gear. + + The function computes the keypoint distance between the poses of two entities. + Keypoints are created by applying offsets to both poses and the distance is + computed as the L2-norm between corresponding keypoints. + + Args: + env: The environment containing the assets + asset_cfg_1: Configuration of the first asset (RigidObject) + keypoint_scale: Scale factor for keypoint offsets + add_cube_center_kp: Whether to include the center keypoint (0, 0, 0) + quat_offset: Optional quaternion offset to apply to asset_1's orientation (w, x, y, z) + pos_offset: Optional position offset to apply to asset_1's position (x, y, z) + + Returns: + Keypoint distance tensor of shape (num_envs,) where each element + is the mean L2 norm distance between corresponding keypoints + """ + # extract the assets (to enable type hinting) + asset_1 = env.scene[asset_cfg_1.name] # RigidObject + + # get current poses in world frame for asset_1 + # asset_1 is RigidObject - use body_pos_w and body_quat_w + curr_pos_1 = asset_1.data.body_pos_w[:, 0] # type: ignore + curr_quat_1 = asset_1.data.body_quat_w[:, 0] # type: ignore + + # Apply position offset to asset_1 if provided + if pos_offset is not None: + # Convert list to tensor if needed + if isinstance(pos_offset, list): + pos_offset = torch.tensor(pos_offset, device=curr_pos_1.device, dtype=curr_pos_1.dtype) + # Ensure pos_offset has the right shape (num_envs, 3) + if pos_offset.dim() == 1: + pos_offset = pos_offset.unsqueeze(0).expand(curr_pos_1.shape[0], -1) + # Apply the offset by adding to position + curr_pos_1 = curr_pos_1 + pos_offset + + # Apply quaternion offset to asset_1 if provided + if quat_offset is not None: + # Convert list to tensor if needed + if isinstance(quat_offset, list): + quat_offset = torch.tensor(quat_offset, device=curr_quat_1.device, dtype=curr_quat_1.dtype) + # Ensure quat_offset has the right shape (num_envs, 4) + if quat_offset.dim() == 1: + quat_offset = quat_offset.unsqueeze(0).expand(curr_quat_1.shape[0], -1) + # Apply the offset by quaternion multiplication + # TODO: check if this is correct or if its shoudl be the inverse + curr_quat_1 = quat_mul(quat_offset, curr_quat_1) + + # Handle per-environment gear type selection + if hasattr(env, '_current_gear_type'): + # Get the current gear type for each environment + current_gear_type = env._current_gear_type # type: ignore + + # Create a mapping from gear type to asset name + gear_to_asset_mapping = { + 'gear_small': 'factory_gear_small', + 'gear_medium': 'factory_gear_medium', + 'gear_large': 'factory_gear_large' + } + + # Create arrays to store poses for all environments + curr_pos_2_all = torch.zeros_like(curr_pos_1) + curr_quat_2_all = torch.zeros_like(curr_quat_1) + + # Get poses for each environment based on their gear type + for env_id in range(env.num_envs): + # Get the gear type for this specific environment + if env_id < len(current_gear_type): + selected_gear_type = current_gear_type[env_id] + else: + print(f"ERROR: env_id {env_id} is out of bounds for current_gear_type (length: {len(current_gear_type)}), using 'gear_medium' as fallback") + selected_gear_type = 'gear_medium' + selected_asset_name = gear_to_asset_mapping.get(selected_gear_type, 'factory_gear_medium') + + # Get the asset for this specific gear type + try: + asset_2 = env.scene[selected_asset_name] + except KeyError: + # Fallback to factory_gear_medium if the asset doesn't exist + asset_2 = env.scene['factory_gear_medium'] + + # Get poses for this specific environment + curr_pos_2_all[env_id] = asset_2.data.body_pos_w[env_id, 0] # type: ignore + curr_quat_2_all[env_id] = asset_2.data.body_quat_w[env_id, 0] # type: ignore + + # Compute keypoint distance for all environments at once + keypoint_dist_sep = compute_keypoint_distance( + current_pos=curr_pos_1, + current_quat=curr_quat_1, + target_pos=curr_pos_2_all, + target_quat=curr_quat_2_all, + keypoint_scale=keypoint_scale, + add_cube_center_kp=add_cube_center_kp, + device=curr_pos_1.device + ) + else: + # Fallback to factory_gear_medium if _current_gear_type is not available + print("WARNING: env._current_gear_type is not available, using factory_gear_medium as fallback") + asset_2 = env.scene['factory_gear_medium'] + + # get current poses in world frame for asset_2 + curr_pos_2 = asset_2.data.body_pos_w[:, 0] # type: ignore + curr_quat_2 = asset_2.data.body_quat_w[:, 0] # type: ignore + + # compute keypoint distance + keypoint_dist_sep = compute_keypoint_distance( + current_pos=curr_pos_1, + current_quat=curr_quat_1, + target_pos=curr_pos_2, + target_quat=curr_quat_2, + keypoint_scale=keypoint_scale, + add_cube_center_kp=add_cube_center_kp, + device=curr_pos_1.device + ) + + # Return mean distance across keypoints to match expected reward shape (num_envs,) + return keypoint_dist_sep.mean(-1) + + +def keypoint_entity_error_exp( + env: ManagerBasedRLEnv, + asset_cfg_1: SceneEntityCfg, + kp_exp_coeffs: list[tuple[float, float]] = [(1.0, 0.1)], + kp_use_sum_of_exps: bool = True, + keypoint_scale: float = 1.0, + add_cube_center_kp: bool = True, + quat_offset: torch.Tensor | None = None, + pos_offset: torch.Tensor | None = None +) -> torch.Tensor: + """Compute exponential keypoint reward between a RigidObject and the dynamically selected gear. + + The function computes the keypoint distance between the poses of two entities, + then applies an exponential reward function. The reward is computed using the + formula: 1 / (exp(a * distance) + b + exp(-a * distance)) where a and b are coefficients. + + Args: + env: The environment containing the assets + asset_cfg_1: Configuration of the first asset (RigidObject) + kp_exp_coeffs: List of (a, b) coefficient pairs for exponential reward + kp_use_sum_of_exps: Whether to use sum of exponentials (True) or single exponential (False) + keypoint_scale: Scale factor for keypoint offsets + add_cube_center_kp: Whether to include the center keypoint (0, 0, 0) + + Returns: + Exponential keypoint reward tensor of shape (num_envs,) where each element + is the exponential reward value + """ + # extract the assets (to enable type hinting) + asset_1 = env.scene[asset_cfg_1.name] # RigidObject + + # get current poses in world frame for asset_1 + # asset_1 is RigidObject - use body_pos_w and body_quat_w + curr_pos_1 = asset_1.data.body_pos_w[:, 0] # type: ignore + curr_quat_1 = asset_1.data.body_quat_w[:, 0] # type: ignore + + # Apply position offset to asset_1 if provided + if pos_offset is not None: + # Convert list to tensor if needed + if isinstance(pos_offset, list): + pos_offset = torch.tensor(pos_offset, device=curr_pos_1.device, dtype=curr_pos_1.dtype) + # Ensure pos_offset has the right shape (num_envs, 3) + if pos_offset.dim() == 1: + pos_offset = pos_offset.unsqueeze(0).expand(curr_pos_1.shape[0], -1) + # Apply the offset by adding to position + curr_pos_1 = curr_pos_1 + pos_offset + + # Apply quaternion offset to asset_1 if provided + if quat_offset is not None: + # Convert list to tensor if needed + if isinstance(quat_offset, list): + quat_offset = torch.tensor(quat_offset, device=curr_quat_1.device, dtype=curr_quat_1.dtype) + # Ensure quat_offset has the right shape (num_envs, 4) + if quat_offset.dim() == 1: + quat_offset = quat_offset.unsqueeze(0).expand(curr_quat_1.shape[0], -1) + # Apply the offset by quaternion multiplication + # TODO: check if this is correct or if its shoudl be the inverse + curr_quat_1 = quat_mul(quat_offset, curr_quat_1) + + # Handle per-environment gear type selection + if hasattr(env, '_current_gear_type'): + # Get the current gear type for each environment + current_gear_type = env._current_gear_type # type: ignore + + # Create a mapping from gear type to asset name + gear_to_asset_mapping = { + 'gear_small': 'factory_gear_small', + 'gear_medium': 'factory_gear_medium', + 'gear_large': 'factory_gear_large' + } + + # Create arrays to store poses for all environments + curr_pos_2_all = torch.zeros_like(curr_pos_1) + curr_quat_2_all = torch.zeros_like(curr_quat_1) + + # Get poses for each environment based on their gear type + for env_id in range(env.num_envs): + # Get the gear type for this specific environment + if env_id < len(current_gear_type): + selected_gear_type = current_gear_type[env_id] + else: + print(f"ERROR: env_id {env_id} is out of bounds for current_gear_type (length: {len(current_gear_type)}), using 'gear_medium' as fallback") + selected_gear_type = 'gear_medium' + selected_asset_name = gear_to_asset_mapping.get(selected_gear_type, 'factory_gear_medium') + + # Get the asset for this specific gear type + try: + asset_2 = env.scene[selected_asset_name] + except KeyError: + # Fallback to factory_gear_medium if the asset doesn't exist + asset_2 = env.scene['factory_gear_medium'] + + # Get poses for this specific environment + curr_pos_2_all[env_id] = asset_2.data.body_pos_w[env_id, 0] # type: ignore + curr_quat_2_all[env_id] = asset_2.data.body_quat_w[env_id, 0] # type: ignore + + # Compute keypoint distance for all environments at once + keypoint_dist_sep = compute_keypoint_distance( + current_pos=curr_pos_1, + current_quat=curr_quat_1, + target_pos=curr_pos_2_all, + target_quat=curr_quat_2_all, + keypoint_scale=keypoint_scale, + add_cube_center_kp=add_cube_center_kp, + device=curr_pos_1.device + ) + + # compute exponential reward + keypoint_reward_exp = torch.zeros_like(keypoint_dist_sep[:, 0]) + + if kp_use_sum_of_exps: + # Use sum of exponentials: average across keypoints for each coefficient + for coeff in kp_exp_coeffs: + a, b = coeff + keypoint_reward_exp += (1. / (torch.exp(a * keypoint_dist_sep) + b + torch.exp(-a * keypoint_dist_sep))).mean(-1) + else: + # Use single exponential: average keypoint distance first, then apply exponential + keypoint_dist = keypoint_dist_sep.mean(-1) # shape: (num_envs,) + for coeff in kp_exp_coeffs: + a, b = coeff + keypoint_reward_exp += 1. / (torch.exp(a * keypoint_dist) + b + torch.exp(-a * keypoint_dist)) + else: + # Fallback to factory_gear_medium if _current_gear_type is not available + print("WARNING: env._current_gear_type is not available, using factory_gear_medium as fallback") + asset_2 = env.scene['factory_gear_medium'] + + # get current poses in world frame for asset_2 + curr_pos_2 = asset_2.data.body_pos_w[:, 0] # type: ignore + curr_quat_2 = asset_2.data.body_quat_w[:, 0] # type: ignore + + # compute keypoint distance + keypoint_dist_sep = compute_keypoint_distance( + current_pos=curr_pos_1, + current_quat=curr_quat_1, + target_pos=curr_pos_2, + target_quat=curr_quat_2, + keypoint_scale=keypoint_scale, + add_cube_center_kp=add_cube_center_kp, + device=curr_pos_1.device + ) + + # compute exponential reward + keypoint_reward_exp = torch.zeros_like(keypoint_dist_sep[:, 0]) + + if kp_use_sum_of_exps: + # Use sum of exponentials: average across keypoints for each coefficient + for coeff in kp_exp_coeffs: + a, b = coeff + keypoint_reward_exp += (1. / (torch.exp(a * keypoint_dist_sep) + b + torch.exp(-a * keypoint_dist_sep))).mean(-1) + else: + # Use single exponential: average keypoint distance first, then apply exponential + keypoint_dist = keypoint_dist_sep.mean(-1) # shape: (num_envs,) + for coeff in kp_exp_coeffs: + a, b = coeff + keypoint_reward_exp += 1. / (torch.exp(a * keypoint_dist) + b + torch.exp(-a * keypoint_dist)) + + return keypoint_reward_exp