isaac-sim · ashwinvkNV · Nov 19, 2025 · Nov 19, 2025 · Nov 20, 2025 · Nov 21, 2025
diff --git a/docs/source/_static/setup/walkthrough_gear_assembly_sim_real.gif b/docs/source/_static/setup/walkthrough_gear_assembly_sim_real.gif
diff --git a/docs/source/_static/setup/walkthrough_sim_real_gear_assembly_train.png b/docs/source/_static/setup/walkthrough_sim_real_gear_assembly_train.png
diff --git a/docs/source/setup/walkthrough/index.rst b/docs/source/setup/walkthrough/index.rst
@@ -22,3 +22,4 @@ represents a different stage of modifying the default template project to achiev
   technical_env_design
   training_jetbot_gt
   training_jetbot_reward_exploration
+  sim_to_real_training
diff --git a/docs/source/setup/walkthrough/sim_to_real_training.rst b/docs/source/setup/walkthrough/sim_to_real_training.rst
diff --git a/source/isaaclab/isaaclab/envs/mdp/rewards.py b/source/isaaclab/isaaclab/envs/mdp/rewards.py
@@ -242,11 +242,21 @@ def applied_torque_limits(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg = Sc
     return torch.sum(out_of_limits, dim=1)
 
 
+def action_rate(env: ManagerBasedRLEnv) -> torch.Tensor:
+    """Penalize the rate of change of the actions using L2 norm."""
+    return torch.norm(env.action_manager.action - env.action_manager.prev_action, p=2, dim=-1)
+
+
 def action_rate_l2(env: ManagerBasedRLEnv) -> torch.Tensor:
     """Penalize the rate of change of the actions using L2 squared kernel."""
     return torch.sum(torch.square(env.action_manager.action - env.action_manager.prev_action), dim=1)
 
 
+def action(env: ManagerBasedRLEnv) -> torch.Tensor:
+    """Penalize the actions using L2 squared kernel (summed for each environment)."""
+    return torch.sum(env.action_manager.action**2, dim=-1)
+
+
 def action_l2(env: ManagerBasedRLEnv) -> torch.Tensor:
     """Penalize the actions using L2 squared kernel."""
     return torch.sum(torch.square(env.action_manager.action), dim=1)

@@ -26,8 +26,22 @@
 
 """
 from .noise_cfg import NoiseCfg  # noqa: F401
-from .noise_cfg import ConstantNoiseCfg, GaussianNoiseCfg, NoiseModelCfg, NoiseModelWithAdditiveBiasCfg, UniformNoiseCfg
-from .noise_model import NoiseModel, NoiseModelWithAdditiveBias, constant_noise, gaussian_noise, uniform_noise
+from .noise_cfg import (
+    ConstantNoiseCfg,
+    GaussianNoiseCfg,
+    NoiseModelCfg,
+    NoiseModelWithAdditiveBiasCfg,
+    ResetSampledNoiseModelCfg,
+    UniformNoiseCfg,
+)
+from .noise_model import (
+    NoiseModel,
+    NoiseModelWithAdditiveBias,
+    ResetSampledNoiseModel,
+    constant_noise,
+    gaussian_noise,
+    uniform_noise,
+)
 
 # Backward compatibility
 ConstantBiasNoiseCfg = ConstantNoiseCfg

@@ -109,3 +109,16 @@ class NoiseModelWithAdditiveBiasCfg(NoiseModelCfg):
 
     Defaults to True.
     """
+
+
+@configclass
+class ResetSampledNoiseModelCfg(NoiseModelCfg):
+    """Configuration for a noise model that samples noise ONLY during reset."""
+
+    class_type: type = noise_model.ResetSampledNoiseModel
+
+    noise_cfg: NoiseCfg = MISSING
+    """The noise configuration for the noise.
+
+    Based on this configuration, the noise is sampled at every reset of the noise model.
+    """
@@ -189,3 +189,73 @@ def __call__(self, data: torch.Tensor) -> torch.Tensor:
             # now re-sample that expanded bias in-place
             self.reset()
         return super().__call__(data) + self._bias
+
+
+class ResetSampledNoiseModel(NoiseModel):
+    """Noise model that samples noise ONLY during reset and applies it consistently.
+
+    The noise is sampled from the configured distribution ONLY during reset and applied consistently
+    until the next reset. Unlike regular noise that generates new random values every step,
+    this model maintains the same noise values throughout an episode.
+    """
+
+    def __init__(self, noise_model_cfg: noise_cfg.NoiseModelCfg, num_envs: int, device: str):
+        # initialize parent class
+        super().__init__(noise_model_cfg, num_envs, device)
+        # store the noise configuration
+        self._noise_cfg = noise_model_cfg.noise_cfg
+        self._sampled_noise = torch.zeros((num_envs, 1), device=self._device)
+        self._num_components: int | None = None
+
+    def reset(self, env_ids: Sequence[int] | None = None):
+        """Reset the noise model by sampling NEW noise values.
+
+        This method samples new noise for the specified environments using the configured noise function.
+        The sampled noise will remain constant until the next reset.
+
+        Args:
+            env_ids: The environment ids to reset the noise model for. Defaults to None,
+                in which case all environments are considered.
+        """
+        # resolve the environment ids
+        if env_ids is None:
+            env_ids = slice(None)
+
+        # Use the existing noise function to sample new noise
+        # Create dummy data to sample from the noise function
+        dummy_data = torch.zeros(
+            (env_ids.stop - env_ids.start if isinstance(env_ids, slice) else len(env_ids), 1), device=self._device
+        )
+
+        # Sample noise using the configured noise function
+        sampled_noise = self._noise_model_cfg.noise_cfg.func(dummy_data, self._noise_model_cfg.noise_cfg)
+
+        self._sampled_noise[env_ids] = sampled_noise
+
+    def __call__(self, data: torch.Tensor) -> torch.Tensor:
+        """Apply the pre-sampled noise to the data.
+
+        This method applies the noise that was sampled during the last reset.
+        No new noise is generated - the same values are used consistently.
+
+        Args:
+            data: The data to apply the noise to. Shape is (num_envs, ...).
+
+        Returns:
+            The data with the noise applied. Shape is the same as the input data.
+        """
+        # on first apply, expand noise to match last dim of data
+        if self._num_components is None:
+            *_, self._num_components = data.shape
+            # expand noise from (num_envs,1) to (num_envs, num_components)
+            self._sampled_noise = self._sampled_noise.repeat(1, self._num_components)
+
+        # apply the noise based on operation
+        if self._noise_cfg.operation == "add":
+            return data + self._sampled_noise
+        elif self._noise_cfg.operation == "scale":
+            return data * self._sampled_noise
+        elif self._noise_cfg.operation == "abs":
+            return self._sampled_noise
+        else:
+            raise ValueError(f"Unknown operation in noise: {self._noise_cfg.operation}")
@@ -10,6 +10,7 @@
 
 * :obj:`UR10_CFG`: The UR10 arm without a gripper.
 * :obj:`UR10E_ROBOTIQ_GRIPPER_CFG`: The UR10E arm with Robotiq_2f_140 gripper.
+* :obj:`UR10e_ROBOTIQ_2F_85_CFG`: The UR10E arm with Robotiq 2F-85 gripper.
 
 Reference: https://github.com/ros-industrial/universal_robot
 """
@@ -163,3 +164,43 @@
 )
 
 """Configuration of UR-10E arm with Robotiq_2f_140 gripper."""
+
+UR10e_ROBOTIQ_2F_85_CFG = UR10e_CFG.copy()
+UR10e_ROBOTIQ_2F_85_CFG.spawn.variants = {"Gripper": "Robotiq_2f_85"}
+UR10e_ROBOTIQ_2F_85_CFG.spawn.rigid_props.disable_gravity = True
+UR10e_ROBOTIQ_2F_85_CFG.init_state.joint_pos["finger_joint"] = 0.0
+UR10e_ROBOTIQ_2F_85_CFG.init_state.joint_pos[".*_inner_finger_joint"] = 0.0
+UR10e_ROBOTIQ_2F_85_CFG.init_state.joint_pos[".*_inner_finger_knuckle_joint"] = 0.0
+UR10e_ROBOTIQ_2F_85_CFG.init_state.joint_pos[".*_outer_.*_joint"] = 0.0
+# the major actuator joint for gripper
+UR10e_ROBOTIQ_2F_85_CFG.actuators["gripper_drive"] = ImplicitActuatorCfg(
+    joint_names_expr=["finger_joint"],  # "right_outer_knuckle_joint" is its mimic joint
+    effort_limit_sim=10.0,
+    velocity_limit_sim=1.0,
+    stiffness=11.25,
+    damping=0.1,
+    friction=0.0,
+    armature=0.0,
+)
+# enable the gripper to grasp in a parallel manner
+UR10e_ROBOTIQ_2F_85_CFG.actuators["gripper_finger"] = ImplicitActuatorCfg(
+    joint_names_expr=[".*_inner_finger_joint"],
+    effort_limit_sim=1.0,
+    velocity_limit_sim=1.0,
+    stiffness=0.2,
+    damping=0.001,
+    friction=0.0,
+    armature=0.0,
+)
+# set PD to zero for passive joints in close-loop gripper
+UR10e_ROBOTIQ_2F_85_CFG.actuators["gripper_passive"] = ImplicitActuatorCfg(
+    joint_names_expr=[".*_inner_finger_knuckle_joint", "right_outer_knuckle_joint"],
+    effort_limit_sim=1.0,
+    velocity_limit_sim=1.0,
+    stiffness=0.0,
+    damping=0.0,
+    friction=0.0,
+    armature=0.0,
+)
+
+"""Configuration of UR-10E arm with Robotiq 2F-85 gripper."""
diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py
@@ -31,6 +31,9 @@ class RslRlPpoActorCriticCfg:
     noise_std_type: Literal["scalar", "log"] = "scalar"
     """The type of noise standard deviation for the policy. Default is scalar."""
 
+    state_dependent_std: bool = False
+    """Whether to use state-dependent standard deviation for the policy. Default is False."""
+
     actor_obs_normalization: bool = MISSING
     """Whether to normalize the observation for the actor network."""
 

@@ -0,0 +1,6 @@
+# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Assemble 3 gears into a base."""
@@ -0,0 +1,9 @@
+# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Configurations for arm-based gear assembly environments."""
+
+# We leave this file empty since we don't want to expose any configs in this package directly.
+# We still need this file to import the "config" module in the parent package.
@@ -0,0 +1,75 @@
+# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import gymnasium as gym
+
+from . import agents
+
+##
+# Register Gym environments.
+##
+
+
+# UR10e with 2F-140 gripper
+gym.register(
+    id="Isaac-Deploy-GearAssembly-UR10e-2F140-v0",
+    entry_point="isaaclab.envs:ManagerBasedRLEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:UR10e2F140GearAssemblyEnvCfg",
+        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10GearAssemblyRNNPPORunnerCfg",
+    },
+)
+
+gym.register(
+    id="Isaac-Deploy-GearAssembly-UR10e-2F140-Play-v0",
+    entry_point="isaaclab.envs:ManagerBasedRLEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:UR10e2F140GearAssemblyEnvCfg_PLAY",
+    },
+)
+
+# UR10e with 2F-85 gripper
+gym.register(
+    id="Isaac-Deploy-GearAssembly-UR10e-2F85-v0",
+    entry_point="isaaclab.envs:ManagerBasedRLEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:UR10e2F85GearAssemblyEnvCfg",
+        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10GearAssemblyRNNPPORunnerCfg",
+    },
+)
+
+gym.register(
+    id="Isaac-Deploy-GearAssembly-UR10e-2F85-Play-v0",
+    entry_point="isaaclab.envs:ManagerBasedRLEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:UR10e2F85GearAssemblyEnvCfg_PLAY",
+    },
+)
+
+# UR10e with 2F-140 gripper - ROS Inference
+gym.register(
+    id="Isaac-Deploy-GearAssembly-UR10e-2F140-ROS-Inference-v0",
+    entry_point="isaaclab.envs:ManagerBasedRLEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.ros_inference_env_cfg:UR10e2F140GearAssemblyROSInferenceEnvCfg",
+        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10GearAssemblyRNNPPORunnerCfg",
+    },
+)
+
+# UR10e with 2F-85 gripper - ROS Inference
+gym.register(
+    id="Isaac-Deploy-GearAssembly-UR10e-2F85-ROS-Inference-v0",
+    entry_point="isaaclab.envs:ManagerBasedRLEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.ros_inference_env_cfg:UR10e2F85GearAssemblyROSInferenceEnvCfg",
+        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10GearAssemblyRNNPPORunnerCfg",
+    },
+)
@@ -0,0 +1,4 @@
+# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
@@ -0,0 +1,51 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from isaaclab.utils import configclass
+
+from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticRecurrentCfg, RslRlPpoAlgorithmCfg
+
+
+@configclass
+class UR10GearAssemblyRNNPPORunnerCfg(RslRlOnPolicyRunnerCfg):
+    seed = 7858
+    num_steps_per_env = 512
+    max_iterations = 1500
+    save_interval = 50
+    experiment_name = "gear_assembly_ur10e"
+    clip_actions = 1.0
+    resume = False
+    value_normalization = False
+    obs_groups = {
+        "policy": ["policy"],
+        "critic": ["critic"],
+    }
+    policy = RslRlPpoActorCriticRecurrentCfg(
+        state_dependent_std=True,
+        init_noise_std=1.0,
+        actor_obs_normalization=True,
+        critic_obs_normalization=True,
+        actor_hidden_dims=[256, 128, 64],
+        critic_hidden_dims=[256, 128, 64],
+        noise_std_type="log",
+        activation="elu",
+        rnn_type="lstm",
+        rnn_hidden_dim=256,
+        rnn_num_layers=2,
+    )
+    algorithm = RslRlPpoAlgorithmCfg(
+        value_loss_coef=1.0,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        entropy_coef=0.0,
+        num_learning_epochs=8,
+        num_mini_batches=16,
+        learning_rate=5.0e-4,
+        schedule="adaptive",
+        gamma=0.99,
+        lam=0.95,
+        desired_kl=0.008,
+        max_grad_norm=1.0,
+    )