Skip to content
Open
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please capture a high quality image with robot base. : )))

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done. Does this one work?

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions docs/source/setup/walkthrough/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@ represents a different stage of modifying the default template project to achiev
technical_env_design
training_jetbot_gt
training_jetbot_reward_exploration
sim_to_real_training
495 changes: 495 additions & 0 deletions docs/source/setup/walkthrough/sim_to_real_training.rst

Large diffs are not rendered by default.

10 changes: 10 additions & 0 deletions source/isaaclab/isaaclab/envs/mdp/rewards.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,11 +242,21 @@ def applied_torque_limits(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg = Sc
return torch.sum(out_of_limits, dim=1)


def action_rate(env: ManagerBasedRLEnv) -> torch.Tensor:
"""Penalize the rate of change of the actions using L2 norm."""
return torch.norm(env.action_manager.action - env.action_manager.prev_action, p=2, dim=-1)


def action_rate_l2(env: ManagerBasedRLEnv) -> torch.Tensor:
"""Penalize the rate of change of the actions using L2 squared kernel."""
return torch.sum(torch.square(env.action_manager.action - env.action_manager.prev_action), dim=1)


def action(env: ManagerBasedRLEnv) -> torch.Tensor:
"""Penalize the actions using L2 squared kernel (summed for each environment)."""
return torch.sum(env.action_manager.action**2, dim=-1)


def action_l2(env: ManagerBasedRLEnv) -> torch.Tensor:
"""Penalize the actions using L2 squared kernel."""
return torch.sum(torch.square(env.action_manager.action), dim=1)
Expand Down
18 changes: 16 additions & 2 deletions source/isaaclab/isaaclab/utils/noise/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,22 @@

"""
from .noise_cfg import NoiseCfg # noqa: F401
from .noise_cfg import ConstantNoiseCfg, GaussianNoiseCfg, NoiseModelCfg, NoiseModelWithAdditiveBiasCfg, UniformNoiseCfg
from .noise_model import NoiseModel, NoiseModelWithAdditiveBias, constant_noise, gaussian_noise, uniform_noise
from .noise_cfg import (
ConstantNoiseCfg,
GaussianNoiseCfg,
NoiseModelCfg,
NoiseModelWithAdditiveBiasCfg,
ResetSampledNoiseModelCfg,
UniformNoiseCfg,
)
from .noise_model import (
NoiseModel,
NoiseModelWithAdditiveBias,
ResetSampledNoiseModel,
constant_noise,
gaussian_noise,
uniform_noise,
)

# Backward compatibility
ConstantBiasNoiseCfg = ConstantNoiseCfg
Expand Down
13 changes: 13 additions & 0 deletions source/isaaclab/isaaclab/utils/noise/noise_cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,3 +109,16 @@ class NoiseModelWithAdditiveBiasCfg(NoiseModelCfg):
Defaults to True.
"""


@configclass
class ResetSampledNoiseModelCfg(NoiseModelCfg):
"""Configuration for a noise model that samples noise ONLY during reset."""

class_type: type = noise_model.ResetSampledNoiseModel

noise_cfg: NoiseCfg = MISSING
"""The noise configuration for the noise.
Based on this configuration, the noise is sampled at every reset of the noise model.
"""
70 changes: 70 additions & 0 deletions source/isaaclab/isaaclab/utils/noise/noise_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,3 +189,73 @@ def __call__(self, data: torch.Tensor) -> torch.Tensor:
# now re-sample that expanded bias in-place
self.reset()
return super().__call__(data) + self._bias


class ResetSampledNoiseModel(NoiseModel):
"""Noise model that samples noise ONLY during reset and applies it consistently.

The noise is sampled from the configured distribution ONLY during reset and applied consistently
until the next reset. Unlike regular noise that generates new random values every step,
this model maintains the same noise values throughout an episode.
"""

def __init__(self, noise_model_cfg: noise_cfg.NoiseModelCfg, num_envs: int, device: str):
# initialize parent class
super().__init__(noise_model_cfg, num_envs, device)
# store the noise configuration
self._noise_cfg = noise_model_cfg.noise_cfg
self._sampled_noise = torch.zeros((num_envs, 1), device=self._device)
self._num_components: int | None = None

def reset(self, env_ids: Sequence[int] | None = None):
"""Reset the noise model by sampling NEW noise values.

This method samples new noise for the specified environments using the configured noise function.
The sampled noise will remain constant until the next reset.

Args:
env_ids: The environment ids to reset the noise model for. Defaults to None,
in which case all environments are considered.
"""
# resolve the environment ids
if env_ids is None:
env_ids = slice(None)

# Use the existing noise function to sample new noise
# Create dummy data to sample from the noise function
dummy_data = torch.zeros(
(env_ids.stop - env_ids.start if isinstance(env_ids, slice) else len(env_ids), 1), device=self._device
)

# Sample noise using the configured noise function
sampled_noise = self._noise_model_cfg.noise_cfg.func(dummy_data, self._noise_model_cfg.noise_cfg)

self._sampled_noise[env_ids] = sampled_noise

def __call__(self, data: torch.Tensor) -> torch.Tensor:
"""Apply the pre-sampled noise to the data.

This method applies the noise that was sampled during the last reset.
No new noise is generated - the same values are used consistently.

Args:
data: The data to apply the noise to. Shape is (num_envs, ...).

Returns:
The data with the noise applied. Shape is the same as the input data.
"""
# on first apply, expand noise to match last dim of data
if self._num_components is None:
*_, self._num_components = data.shape
# expand noise from (num_envs,1) to (num_envs, num_components)
self._sampled_noise = self._sampled_noise.repeat(1, self._num_components)

# apply the noise based on operation
if self._noise_cfg.operation == "add":
return data + self._sampled_noise
elif self._noise_cfg.operation == "scale":
return data * self._sampled_noise
elif self._noise_cfg.operation == "abs":
return self._sampled_noise
else:
raise ValueError(f"Unknown operation in noise: {self._noise_cfg.operation}")
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

* :obj:`UR10_CFG`: The UR10 arm without a gripper.
* :obj:`UR10E_ROBOTIQ_GRIPPER_CFG`: The UR10E arm with Robotiq_2f_140 gripper.
* :obj:`UR10e_ROBOTIQ_2F_85_CFG`: The UR10E arm with Robotiq 2F-85 gripper.

Reference: https://github.com/ros-industrial/universal_robot
"""
Expand Down Expand Up @@ -163,3 +164,43 @@
)

"""Configuration of UR-10E arm with Robotiq_2f_140 gripper."""

UR10e_ROBOTIQ_2F_85_CFG = UR10e_CFG.copy()
UR10e_ROBOTIQ_2F_85_CFG.spawn.variants = {"Gripper": "Robotiq_2f_85"}
UR10e_ROBOTIQ_2F_85_CFG.spawn.rigid_props.disable_gravity = True
UR10e_ROBOTIQ_2F_85_CFG.init_state.joint_pos["finger_joint"] = 0.0
UR10e_ROBOTIQ_2F_85_CFG.init_state.joint_pos[".*_inner_finger_joint"] = 0.0
UR10e_ROBOTIQ_2F_85_CFG.init_state.joint_pos[".*_inner_finger_knuckle_joint"] = 0.0
UR10e_ROBOTIQ_2F_85_CFG.init_state.joint_pos[".*_outer_.*_joint"] = 0.0
# the major actuator joint for gripper
UR10e_ROBOTIQ_2F_85_CFG.actuators["gripper_drive"] = ImplicitActuatorCfg(
joint_names_expr=["finger_joint"], # "right_outer_knuckle_joint" is its mimic joint
effort_limit_sim=10.0,
velocity_limit_sim=1.0,
stiffness=11.25,
damping=0.1,
friction=0.0,
armature=0.0,
)
# enable the gripper to grasp in a parallel manner
UR10e_ROBOTIQ_2F_85_CFG.actuators["gripper_finger"] = ImplicitActuatorCfg(
joint_names_expr=[".*_inner_finger_joint"],
effort_limit_sim=1.0,
velocity_limit_sim=1.0,
stiffness=0.2,
damping=0.001,
friction=0.0,
armature=0.0,
)
# set PD to zero for passive joints in close-loop gripper
UR10e_ROBOTIQ_2F_85_CFG.actuators["gripper_passive"] = ImplicitActuatorCfg(
joint_names_expr=[".*_inner_finger_knuckle_joint", "right_outer_knuckle_joint"],
effort_limit_sim=1.0,
velocity_limit_sim=1.0,
stiffness=0.0,
damping=0.0,
friction=0.0,
armature=0.0,
)

"""Configuration of UR-10E arm with Robotiq 2F-85 gripper."""
3 changes: 3 additions & 0 deletions source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ class RslRlPpoActorCriticCfg:
noise_std_type: Literal["scalar", "log"] = "scalar"
"""The type of noise standard deviation for the policy. Default is scalar."""

state_dependent_std: bool = False
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this might be a different PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This param in used in the rsl rl config for the sim to real env. Are you saying that it should be seperated into a new PR?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes a different PR to introduce this argument in rl_cfg.py

"""Whether to use state-dependent standard deviation for the policy. Default is False."""

actor_obs_normalization: bool = MISSING
"""Whether to normalize the observation for the actor network."""

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

"""Assemble 3 gears into a base."""
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

"""Configurations for arm-based gear assembly environments."""

# We leave this file empty since we don't want to expose any configs in this package directly.
# We still need this file to import the "config" module in the parent package.
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

import gymnasium as gym

from . import agents

##
# Register Gym environments.
##


# UR10e with 2F-140 gripper
gym.register(
id="Isaac-Deploy-GearAssembly-UR10e-2F140-v0",
entry_point="isaaclab.envs:ManagerBasedRLEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:UR10e2F140GearAssemblyEnvCfg",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10GearAssemblyRNNPPORunnerCfg",
},
)

gym.register(
id="Isaac-Deploy-GearAssembly-UR10e-2F140-Play-v0",
entry_point="isaaclab.envs:ManagerBasedRLEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:UR10e2F140GearAssemblyEnvCfg_PLAY",
},
)

# UR10e with 2F-85 gripper
gym.register(
id="Isaac-Deploy-GearAssembly-UR10e-2F85-v0",
entry_point="isaaclab.envs:ManagerBasedRLEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:UR10e2F85GearAssemblyEnvCfg",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10GearAssemblyRNNPPORunnerCfg",
},
)

gym.register(
id="Isaac-Deploy-GearAssembly-UR10e-2F85-Play-v0",
entry_point="isaaclab.envs:ManagerBasedRLEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:UR10e2F85GearAssemblyEnvCfg_PLAY",
},
)

# UR10e with 2F-140 gripper - ROS Inference
gym.register(
id="Isaac-Deploy-GearAssembly-UR10e-2F140-ROS-Inference-v0",
entry_point="isaaclab.envs:ManagerBasedRLEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}.ros_inference_env_cfg:UR10e2F140GearAssemblyROSInferenceEnvCfg",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10GearAssemblyRNNPPORunnerCfg",
},
)

# UR10e with 2F-85 gripper - ROS Inference
gym.register(
id="Isaac-Deploy-GearAssembly-UR10e-2F85-ROS-Inference-v0",
entry_point="isaaclab.envs:ManagerBasedRLEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}.ros_inference_env_cfg:UR10e2F85GearAssemblyROSInferenceEnvCfg",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10GearAssemblyRNNPPORunnerCfg",
},
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

from isaaclab.utils import configclass

from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticRecurrentCfg, RslRlPpoAlgorithmCfg


@configclass
class UR10GearAssemblyRNNPPORunnerCfg(RslRlOnPolicyRunnerCfg):
seed = 7858
num_steps_per_env = 512
max_iterations = 1500
save_interval = 50
experiment_name = "gear_assembly_ur10e"
clip_actions = 1.0
resume = False
value_normalization = False
obs_groups = {
"policy": ["policy"],
"critic": ["critic"],
}
policy = RslRlPpoActorCriticRecurrentCfg(
state_dependent_std=True,
init_noise_std=1.0,
actor_obs_normalization=True,
critic_obs_normalization=True,
actor_hidden_dims=[256, 128, 64],
critic_hidden_dims=[256, 128, 64],
noise_std_type="log",
activation="elu",
rnn_type="lstm",
rnn_hidden_dim=256,
rnn_num_layers=2,
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.0,
num_learning_epochs=8,
num_mini_batches=16,
learning_rate=5.0e-4,
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.008,
max_grad_norm=1.0,
)
Loading
Loading