diff --git a/scripts/reinforcement_learning/rsl_rl/train.py b/scripts/reinforcement_learning/rsl_rl/train.py
index b2056551f0c..a73c3b2881b 100644
--- a/scripts/reinforcement_learning/rsl_rl/train.py
+++ b/scripts/reinforcement_learning/rsl_rl/train.py
@@ -56,7 +56,7 @@
 from packaging import version
 
 # check minimum supported rsl-rl version
-RSL_RL_VERSION = "3.0.1"
+RSL_RL_VERSION = "3.1.0"
 installed_version = metadata.version("rsl-rl-lib")
 if version.parse(installed_version) < version.parse(RSL_RL_VERSION):
     if platform.system() == "Windows":
diff --git a/source/isaaclab/isaaclab/envs/mdp/rewards.py b/source/isaaclab/isaaclab/envs/mdp/rewards.py
index 20df2a557eb..4232c0a8895 100644
--- a/source/isaaclab/isaaclab/envs/mdp/rewards.py
+++ b/source/isaaclab/isaaclab/envs/mdp/rewards.py
@@ -241,11 +241,17 @@ def applied_torque_limits(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg = Sc
     )
     return torch.sum(out_of_limits, dim=1)
 
+def action_rate(env: ManagerBasedRLEnv) -> torch.Tensor:
+    """Penalize the rate of change of the actions using L2 norm."""
+    return torch.norm(env.action_manager.action - env.action_manager.prev_action, p=2, dim=-1)
 
 def action_rate_l2(env: ManagerBasedRLEnv) -> torch.Tensor:
     """Penalize the rate of change of the actions using L2 squared kernel."""
     return torch.sum(torch.square(env.action_manager.action - env.action_manager.prev_action), dim=1)
 
+def action(env: ManagerBasedRLEnv) -> torch.Tensor:
+    """Penalize the actions using L2 squared kernel (summed for each environment)."""
+    return torch.sum(env.action_manager.action**2, dim=-1)
 
 def action_l2(env: ManagerBasedRLEnv) -> torch.Tensor:
     """Penalize the actions using L2 squared kernel."""
diff --git a/source/isaaclab/isaaclab/utils/noise/__init__.py b/source/isaaclab/isaaclab/utils/noise/__init__.py
index d2f703758b0..ef2cea78582 100644
--- a/source/isaaclab/isaaclab/utils/noise/__init__.py
+++ b/source/isaaclab/isaaclab/utils/noise/__init__.py
@@ -26,8 +26,8 @@
 
 """
 from .noise_cfg import NoiseCfg  # noqa: F401
-from .noise_cfg import ConstantNoiseCfg, GaussianNoiseCfg, NoiseModelCfg, NoiseModelWithAdditiveBiasCfg, UniformNoiseCfg
-from .noise_model import NoiseModel, NoiseModelWithAdditiveBias, constant_noise, gaussian_noise, uniform_noise
+from .noise_cfg import ConstantNoiseCfg, GaussianNoiseCfg, NoiseModelCfg, NoiseModelWithAdditiveBiasCfg, UniformNoiseCfg, ResetSampledNoiseModelCfg
+from .noise_model import NoiseModel, NoiseModelWithAdditiveBias, ResetSampledNoiseModel, constant_noise, gaussian_noise, uniform_noise
 
 # Backward compatibility
 ConstantBiasNoiseCfg = ConstantNoiseCfg
diff --git a/source/isaaclab/isaaclab/utils/noise/noise_cfg.py b/source/isaaclab/isaaclab/utils/noise/noise_cfg.py
index 0c49828b3ff..3de46bc89f1 100644
--- a/source/isaaclab/isaaclab/utils/noise/noise_cfg.py
+++ b/source/isaaclab/isaaclab/utils/noise/noise_cfg.py
@@ -109,3 +109,15 @@ class NoiseModelWithAdditiveBiasCfg(NoiseModelCfg):
 
     Defaults to True.
     """
+
+@configclass
+class ResetSampledNoiseModelCfg(NoiseModelCfg):
+    """Configuration for a noise model that samples noise ONLY during reset."""
+
+    class_type: type = noise_model.ResetSampledNoiseModel
+
+    noise_cfg: NoiseCfg = MISSING
+    """The noise configuration for the noise.
+
+    Based on this configuration, the noise is sampled at every reset of the noise model.
+    """
diff --git a/source/isaaclab/isaaclab/utils/noise/noise_model.py b/source/isaaclab/isaaclab/utils/noise/noise_model.py
index dae36b55c72..d780d916067 100644
--- a/source/isaaclab/isaaclab/utils/noise/noise_model.py
+++ b/source/isaaclab/isaaclab/utils/noise/noise_model.py
@@ -189,3 +189,71 @@ def __call__(self, data: torch.Tensor) -> torch.Tensor:
             # now re-sample that expanded bias in-place
             self.reset()
         return super().__call__(data) + self._bias
+
+class ResetSampledNoiseModel(NoiseModel):
+    """Noise model that samples noise ONLY during reset and applies it consistently.
+
+    The noise is sampled from the configured distribution ONLY during reset and applied consistently
+    until the next reset. Unlike regular noise that generates new random values every step,
+    this model maintains the same noise values throughout an episode.
+    """
+
+    def __init__(self, noise_model_cfg: noise_cfg.NoiseModelCfg, num_envs: int, device: str):
+        # initialize parent class
+        super().__init__(noise_model_cfg, num_envs, device)
+        # store the noise configuration
+        self._noise_cfg = noise_model_cfg.noise_cfg
+        self._sampled_noise = torch.zeros((num_envs, 1), device=self._device)
+        self._num_components: int | None = None
+
+    def reset(self, env_ids: Sequence[int] | None = None):
+        """Reset the noise model by sampling NEW noise values.
+
+        This method samples new noise for the specified environments using the configured noise function.
+        The sampled noise will remain constant until the next reset.
+
+        Args:
+            env_ids: The environment ids to reset the noise model for. Defaults to None,
+                in which case all environments are considered.
+        """
+        # resolve the environment ids
+        if env_ids is None:
+            env_ids = slice(None)
+        
+        # Use the existing noise function to sample new noise
+        # Create dummy data to sample from the noise function
+        dummy_data = torch.zeros((env_ids.stop - env_ids.start if isinstance(env_ids, slice) else len(env_ids), 1), 
+                                device=self._device)
+        
+        # Sample noise using the configured noise function
+        sampled_noise = self._noise_model_cfg.noise_cfg.func(dummy_data, self._noise_model_cfg.noise_cfg)
+        
+        self._sampled_noise[env_ids] = sampled_noise
+
+    def __call__(self, data: torch.Tensor) -> torch.Tensor:
+        """Apply the pre-sampled noise to the data.
+
+        This method applies the noise that was sampled during the last reset.
+        No new noise is generated - the same values are used consistently.
+
+        Args:
+            data: The data to apply the noise to. Shape is (num_envs, ...).
+
+        Returns:
+            The data with the noise applied. Shape is the same as the input data.
+        """
+        # on first apply, expand noise to match last dim of data
+        if self._num_components is None:
+            *_, self._num_components = data.shape
+            # expand noise from (num_envs,1) to (num_envs, num_components)
+            self._sampled_noise = self._sampled_noise.repeat(1, self._num_components)
+        
+        # apply the noise based on operation
+        if self._noise_cfg.operation == "add":
+            return data + self._sampled_noise
+        elif self._noise_cfg.operation == "scale":
+            return data * self._sampled_noise
+        elif self._noise_cfg.operation == "abs":
+            return self._sampled_noise
+        else:
+            raise ValueError(f"Unknown operation in noise: {self._noise_cfg.operation}")
diff --git a/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py b/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py
index 4433b824235..053f0ac49ef 100644
--- a/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py
+++ b/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py
@@ -55,47 +55,63 @@
 UR10e_CFG = ArticulationCfg(
     spawn=sim_utils.UsdFileCfg(
         usd_path=f"{ISAAC_NUCLEUS_DIR}/Robots/UniversalRobots/ur10e/ur10e.usd",
+        activate_contact_sensors=True,
         rigid_props=sim_utils.RigidBodyPropertiesCfg(
             disable_gravity=True,
             max_depenetration_velocity=5.0,
+            linear_damping=0.0,
+            angular_damping=0.0,
+            max_linear_velocity=1000.0,
+            max_angular_velocity=3666.0,
+            enable_gyroscopic_forces=True,
+            solver_position_iteration_count=192,
+            solver_velocity_iteration_count=1,
+            max_contact_impulse=1e32,
         ),
         articulation_props=sim_utils.ArticulationRootPropertiesCfg(
-            enabled_self_collisions=False, solver_position_iteration_count=16, solver_velocity_iteration_count=1
+            enabled_self_collisions=False, solver_position_iteration_count=192, solver_velocity_iteration_count=1
         ),
-        activate_contact_sensors=False,
+        collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0),
     ),
     init_state=ArticulationCfg.InitialStateCfg(
         joint_pos={
-            "shoulder_pan_joint": 3.141592653589793,
-            "shoulder_lift_joint": -1.5707963267948966,
-            "elbow_joint": 1.5707963267948966,
-            "wrist_1_joint": -1.5707963267948966,
-            "wrist_2_joint": -1.5707963267948966,
-            "wrist_3_joint": 0.0,
+            "shoulder_pan_joint": 2.7228e+00,
+            "shoulder_lift_joint": -8.3962e-01,
+            "elbow_joint": 1.3684e+00,
+            "wrist_1_joint": -2.1048e+00,
+            "wrist_2_joint": -1.5691e+00,
+            "wrist_3_joint": -1.9896e+00,
+            "finger_joint": 0.0,
         },
         pos=(0.0, 0.0, 0.0),
-        rot=(1.0, 0.0, 0.0, 0.0),
+        rot=(0.0, 0.0, 0.0, 1.0),
     ),
     actuators={
         # 'shoulder_pan_joint', 'shoulder_lift_joint', 'elbow_joint', 'wrist_1_joint', 'wrist_2_joint', 'wrist_3_joint'
         "shoulder": ImplicitActuatorCfg(
             joint_names_expr=["shoulder_.*"],
+            effort_limit=330.0,
+            velocity_limit=2.175,
             stiffness=1320.0,
-            damping=72.6636085,
+            damping=72.0,
             friction=0.0,
             armature=0.0,
         ),
         "elbow": ImplicitActuatorCfg(
             joint_names_expr=["elbow_joint"],
+            effort_limit=150.0,
+            velocity_limit=2.175,
             stiffness=600.0,
-            damping=34.64101615,
+            damping=50.0,
             friction=0.0,
             armature=0.0,
         ),
         "wrist": ImplicitActuatorCfg(
             joint_names_expr=["wrist_.*"],
+            effort_limit=56.0,
+            velocity_limit=2.175,
             stiffness=216.0,
-            damping=29.39387691,
+            damping=30.0,
             friction=0.0,
             armature=0.0,
         ),
@@ -132,34 +148,139 @@
 UR10e_ROBOTIQ_GRIPPER_CFG.init_state.joint_pos[".*_inner_finger_pad_joint"] = 0.0
 UR10e_ROBOTIQ_GRIPPER_CFG.init_state.joint_pos[".*_outer_.*_joint"] = 0.0
 # the major actuator joint for gripper
-UR10e_ROBOTIQ_GRIPPER_CFG.actuators["gripper_drive"] = ImplicitActuatorCfg(
+UR10e_ROBOTIQ_GRIPPER_CFG.actuators["finger_joint"] = ImplicitActuatorCfg(
     joint_names_expr=["finger_joint"],
-    effort_limit_sim=10.0,
-    velocity_limit_sim=1.0,
-    stiffness=11.25,
-    damping=0.1,
+    effort_limit_sim=1.0,
+    velocity_limit=2.175,
+    stiffness=20.0,
+    damping=8.94,
     friction=0.0,
-    armature=0.0,
+    armature=0.0,  # 0.57
 )
 # the auxiliary actuator joint for gripper
-UR10e_ROBOTIQ_GRIPPER_CFG.actuators["gripper_finger"] = ImplicitActuatorCfg(
-    joint_names_expr=[".*_inner_finger_joint"],
+UR10e_ROBOTIQ_GRIPPER_CFG.actuators["others_1"] = ImplicitActuatorCfg(
+    joint_names_expr=['right_outer_knuckle_joint', 'left_outer_finger_joint'],
     effort_limit_sim=1.0,
-    velocity_limit_sim=1.0,
-    stiffness=0.2,
-    damping=0.001,
+    velocity_limit=2.175,
+    stiffness=0.0,
+    damping=0.0,
     friction=0.0,
-    armature=0.0,
+    armature=0.0,  # 0.57
 )
 # the passive joints for gripper
-UR10e_ROBOTIQ_GRIPPER_CFG.actuators["gripper_passive"] = ImplicitActuatorCfg(
-    joint_names_expr=[".*_inner_finger_pad_joint", ".*_outer_finger_joint", "right_outer_knuckle_joint"],
+UR10e_ROBOTIQ_GRIPPER_CFG.actuators["others_2"] = ImplicitActuatorCfg(
+    joint_names_expr=['right_outer_finger_joint', 'left_inner_finger_joint', 'right_inner_finger_joint', 'left_inner_finger_pad_joint', 'right_inner_finger_pad_joint'],
     effort_limit_sim=1.0,
-    velocity_limit_sim=1.0,
-    stiffness=0.0,
-    damping=0.0,
+    velocity_limit=2.175,
+    stiffness=400.0,
+    damping=20.0,
     friction=0.0,
-    armature=0.0,
+    armature=0.0,  # 0.57
+)
+
+UR10e_2f140_CFG = ArticulationCfg(
+    spawn=sim_utils.UsdFileCfg(
+        usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaac_ros_gear_insertion/ur10e_robotiq_140_variant.usd",
+        # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/IsaacARM/Assets/UR10/iakinola/ur10e_robotiq_140_variant.usd",
+        # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Isaac/Samples/Rigging/Manipulator/import_manipulator/ur10e/ur/ur_gripper.usd",
+        # rigid_props=sim_utils.RigidBodyPropertiesCfg(
+        #     disable_gravity=True,
+        #     max_depenetration_velocity=5.0,
+        # ),
+        activate_contact_sensors=True,
+        rigid_props=sim_utils.RigidBodyPropertiesCfg(
+            disable_gravity=True,
+            max_depenetration_velocity=5.0,
+            linear_damping=0.0,
+            angular_damping=0.0,
+            max_linear_velocity=1000.0,
+            max_angular_velocity=3666.0,
+            enable_gyroscopic_forces=True,
+            solver_position_iteration_count=192,
+            solver_velocity_iteration_count=1,
+            max_contact_impulse=1e32,
+        ),
+        articulation_props=sim_utils.ArticulationRootPropertiesCfg(
+            enabled_self_collisions=False, solver_position_iteration_count=192, solver_velocity_iteration_count=1
+        ),
+        collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0),
+        # articulation_props=sim_utils.ArticulationRootPropertiesCfg(
+        #         enabled_self_collisions=False, solver_position_iteration_count=12, solver_velocity_iteration_count=1
+        #     ),
+        # activate_contact_sensors=False,
+    ),    
+    init_state=ArticulationCfg.InitialStateCfg(
+            joint_pos={
+                "shoulder_pan_joint": 2.7228e+00,
+                "shoulder_lift_joint": -8.3962e-01,
+                "elbow_joint": 1.3684e+00,
+                "wrist_1_joint": -2.1048e+00,
+                "wrist_2_joint": -1.5691e+00,
+                "wrist_3_joint": -1.9896e+00,
+                "finger_joint": 0.0,
+            },
+            pos=(0.0, 0.0, 0.0),
+            # @ireti: Figure out why this rotaion is requried.
+            # Otherwise the IK during initialization is does not converge.
+            rot=(0.0, 0.0, 0.0, 1.0),
+        ),
+    actuators={
+            # @ireti: These values were obtained from 2025-05-30_20-35-06/params/env.yaml
+            "shoulder": ImplicitActuatorCfg(
+                joint_names_expr=["shoulder_.*"],
+                effort_limit=330.0,
+                velocity_limit=2.175,
+                stiffness=1320.0,
+                damping=72.0,
+                friction=0.0,
+                armature=0.0,
+            ),
+            "elbow": ImplicitActuatorCfg(
+                joint_names_expr=["elbow_joint"],
+                effort_limit=150.0,
+                velocity_limit=2.175,
+                stiffness=600.0,
+                damping=50.0,
+                friction=0.0,
+                armature=0.0,
+            ),
+            "wrist": ImplicitActuatorCfg(
+                joint_names_expr=["wrist_.*"],
+                effort_limit=56.0,
+                velocity_limit=2.175,
+                stiffness=216.0,
+                damping=30.0,
+                friction=0.0,
+                armature=0.0,
+            ),
+            "finger_joint": ImplicitActuatorCfg(
+                joint_names_expr=["finger_joint"],
+                effort_limit_sim=1.0,
+                velocity_limit=2.175,
+                stiffness=20.0,
+                damping=8.94,
+                friction=0.0,
+                armature=0.0,  # 0.57
+            ),
+            "others_1": ImplicitActuatorCfg(
+                joint_names_expr=['right_outer_knuckle_joint', 'left_outer_finger_joint'],
+                effort_limit_sim=1.0,
+                velocity_limit=2.175,
+                stiffness=0.0,
+                damping=0.0,
+                friction=0.0,
+                armature=0.0,  # 0.57
+            ),
+            "others_2": ImplicitActuatorCfg(
+                joint_names_expr=['right_outer_finger_joint', 'left_inner_finger_joint', 'right_inner_finger_joint', 'left_inner_finger_pad_joint', 'right_inner_finger_pad_joint'],
+                effort_limit_sim=1.0,
+                velocity_limit=2.175,
+                stiffness=400.0,
+                damping=20.0,
+                friction=0.0,
+                armature=0.0,  # 0.57
+            ),
+    }
 )
 
 """Configuration of UR-10E arm with Robotiq_2f_140 gripper."""
diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py
index 5b03a7c639b..bfc212f4006 100644
--- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py
+++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py
@@ -31,6 +31,9 @@ class RslRlPpoActorCriticCfg:
     noise_std_type: Literal["scalar", "log"] = "scalar"
     """The type of noise standard deviation for the policy. Default is scalar."""
 
+    state_dependent_std: bool = False
+    """Whether to use state-dependent standard deviation for the policy. Default is False."""
+
     actor_obs_normalization: bool = MISSING
     """Whether to normalize the observation for the actor network."""
 
diff --git a/source/isaaclab_rl/setup.py b/source/isaaclab_rl/setup.py
index f9ddcdb0fa5..1d69ba74a4f 100644
--- a/source/isaaclab_rl/setup.py
+++ b/source/isaaclab_rl/setup.py
@@ -46,11 +46,10 @@
         "rl-games @ git+https://github.com/isaac-sim/rl_games.git@python3.11",
         "gym",
     ],  # rl-games still needs gym :(
-    "rsl-rl": ["rsl-rl-lib==3.0.1"],
+    "rsl-rl": ["rsl-rl-lib==3.1.0"],
 }
 # Add the names with hyphens as aliases for convenience
 EXTRAS_REQUIRE["rl_games"] = EXTRAS_REQUIRE["rl-games"]
-EXTRAS_REQUIRE["rsl_rl"] = EXTRAS_REQUIRE["rsl-rl"]
 
 # Cumulation of all extra-requires
 EXTRAS_REQUIRE["all"] = list(itertools.chain.from_iterable(EXTRAS_REQUIRE.values()))
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/__init__.py
new file mode 100644
index 00000000000..525f8c7d27a
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Assemble 3 gears into a base."""
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/__init__.py
new file mode 100644
index 00000000000..257dea87052
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/__init__.py
@@ -0,0 +1,9 @@
+# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Configurations for arm-based gear assembly environments."""
+
+# We leave this file empty since we don't want to expose any configs in this package directly.
+# We still need this file to import the "config" module in the parent package.
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/__init__.py
new file mode 100644
index 00000000000..0f3d90bbcad
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/__init__.py
@@ -0,0 +1,34 @@
+# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import gymnasium as gym
+
+from . import agents
+##
+# Register Gym environments.
+##
+
+gym.register(
+    id="Isaac-GearAssembly-UR10e-v0",
+    entry_point="isaaclab.envs:ManagerBasedRLEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:UR10eGearAssemblyEnvCfg",
+        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10GearAssemblyRNNPPORunnerCfg",
+        # "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10GearAssemblyPPORunnerCfg",
+    },
+)
+
+gym.register(
+    id="Isaac-GearAssembly-UR10e-Play-v0",
+    entry_point="isaaclab.envs:ManagerBasedRLEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:UR10eGearAssemblyEnvCfg_PLAY",
+        # "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
+        # "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10ReachPPORunnerCfg",
+        # "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
+    },
+)
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/__init__.py
new file mode 100644
index 00000000000..bcc238c84a9
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py
new file mode 100644
index 00000000000..7d74c5e7d8e
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py
@@ -0,0 +1,74 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from isaaclab.utils import configclass
+
+from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoActorCriticRecurrentCfg, RslRlPpoAlgorithmCfg
+
+
+@configclass
+class UR10GearAssemblyPPORunnerCfg(RslRlOnPolicyRunnerCfg):
+    num_steps_per_env = 512
+    max_iterations = 1500
+    save_interval = 10
+    experiment_name = "gear_assembly_ur10e"
+    empirical_normalization = True
+    policy = RslRlPpoActorCriticCfg(
+        init_noise_std=1.0,
+        actor_hidden_dims=[256, 128, 64],
+        critic_hidden_dims=[256, 128, 64],
+        activation="elu",
+    )
+    algorithm = RslRlPpoAlgorithmCfg(
+        value_loss_coef=1.0,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        entropy_coef=0.0,
+        num_learning_epochs=8,
+        num_mini_batches=8,
+        learning_rate=5.0e-4,
+        schedule="adaptive",
+        gamma=0.99,
+        lam=0.95,
+        desired_kl=0.008,
+        max_grad_norm=1.0,
+    )
+
+
+@configclass
+class UR10GearAssemblyRNNPPORunnerCfg(RslRlOnPolicyRunnerCfg):
+    seed = 7858
+    num_steps_per_env = 512
+    max_iterations = 1500
+    save_interval = 50
+    experiment_name = "gear_assembly_ur10e"
+    empirical_normalization = True
+    clip_actions = 1.0
+    resume = False
+    policy = RslRlPpoActorCriticRecurrentCfg(
+        state_dependent_std=True,
+        init_noise_std=1.0,
+        actor_hidden_dims=[256, 128, 64],
+        critic_hidden_dims=[256, 128, 64],
+        noise_std_type="log",
+        activation="elu",
+        rnn_type="lstm",
+        rnn_hidden_dim=256,
+        rnn_num_layers=2,
+    )
+    algorithm = RslRlPpoAlgorithmCfg(
+        value_loss_coef=1.0,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        entropy_coef=0.0,
+        num_learning_epochs=8,
+        num_mini_batches=16,
+        learning_rate=5.0e-4,
+        schedule="adaptive",
+        gamma=0.99,
+        lam=0.95,
+        desired_kl=0.008,
+        max_grad_norm=1.0,
+    )
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py
new file mode 100644
index 00000000000..2b48e109d1e
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py
@@ -0,0 +1,231 @@
+# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import math
+import os
+import torch
+
+from isaaclab.utils import configclass
+
+from isaaclab.managers import EventTermCfg as EventTerm
+from isaaclab.managers import SceneEntityCfg
+
+
+import isaaclab_tasks.manager_based.manipulation.deploy.mdp as mdp
+from isaaclab_tasks.manager_based.manipulation.deploy.gear_assembly.gear_assembly_env_cfg import GearAssemblyEnvCfg
+from isaaclab.sensors import FrameTransformerCfg, OffsetCfg
+from isaaclab.markers.config import FRAME_MARKER_CFG
+
+import isaaclab_tasks.manager_based.manipulation.deploy.mdp.events as gear_assembly_events
+
+
+##
+# Pre-defined configs
+##
+from isaaclab_assets.robots.universal_robots import UR10e_2f140_CFG  # isort: skip
+
+
+##
+# Environment configuration
+##
+
+@configclass
+class EventCfg:
+    """Configuration for events."""
+
+    robot_joint_stiffness_and_damping = EventTerm(
+        func=mdp.randomize_actuator_gains,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("robot", joint_names=["shoulder_.*", "elbow_.*", "wrist_.*"]),  # only the arm joints are randomized
+            "stiffness_distribution_params": (0.75, 1.5),
+            "damping_distribution_params": (0.3, 3.0),
+            "operation": "scale",
+            "distribution": "log_uniform",
+        },
+    )
+
+    joint_friction = EventTerm(
+        func=mdp.randomize_joint_parameters,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("robot", joint_names=["shoulder_.*", "elbow_.*", "wrist_.*"]),
+            "friction_distribution_params": (0.3, 0.7),
+            "operation": "add",
+            "distribution": "uniform",
+        },
+    )
+
+    medium_gear_physics_material = EventTerm(
+        func=mdp.randomize_rigid_body_material,
+        mode="startup",
+        params={
+            "asset_cfg": SceneEntityCfg("factory_gear_medium", body_names=".*"),
+            "static_friction_range": (0.75, 0.75),
+            "dynamic_friction_range": (0.75, 0.75),
+            "restitution_range": (0.0, 0.0),
+            "num_buckets": 16,
+        },
+    )
+    gear_base_physics_material = EventTerm(
+        func=mdp.randomize_rigid_body_material,
+        mode="startup",
+        params={
+            "asset_cfg": SceneEntityCfg("factory_gear_base", body_names=".*"),
+            "static_friction_range": (0.75, 0.75),
+            "dynamic_friction_range": (0.75, 0.75),
+            "restitution_range": (0.0, 0.0),
+            "num_buckets": 16,
+        },
+    )
+
+    robot_physics_material = EventTerm(
+        func=mdp.randomize_rigid_body_material,
+        mode="startup",
+        params={
+            "asset_cfg": SceneEntityCfg("robot", body_names=".*"),
+            "static_friction_range": (0.75, 0.75),
+            "dynamic_friction_range": (0.75, 0.75),
+            "restitution_range": (0.0, 0.0),
+            "num_buckets": 16,
+        },
+    )
+
+    randomize_gear_type = EventTerm(
+        func=gear_assembly_events.randomize_gear_type,
+        mode="reset",
+        params={
+            # "gear_types": ["gear_small", "gear_medium", "gear_large"]
+            "gear_types": ["gear_medium"]
+        },
+    )
+
+
+    reset_all = EventTerm(func=mdp.reset_scene_to_default, mode="reset")
+
+
+    randomize_gears_and_base_pose = EventTerm(
+        func=gear_assembly_events.randomize_gears_and_base_pose,
+        mode="reset",
+        params={
+            "pose_range": {
+                # "x": [0.0, 0.0],
+                # "y": [0.0, 0.0],
+                # "z": [0.0, 0.0],
+                # "roll": [-0.0, 0.0], # 0 degree
+                # "pitch": [-0.0, 0.0], # 0 degree
+                # "yaw": [-0.0, 0.0], # 0 degree
+                "x": [-0.1, 0.1],
+                "y": [-0.25, 0.25],
+                "z": [-0.1,  0.1],
+                "roll": [-math.pi/90, math.pi/90], # 2 degree
+                "pitch": [-math.pi/90, math.pi/90], # 2 degree
+                "yaw": [-math.pi/6, math.pi/6], # 2 degree
+            },
+            "gear_pos_range": {
+                "x": [-0.02, 0.02],
+                "y": [-0.02, 0.02],
+                "z": [0.0575, 0.0775], # 0.045 + 0.0225
+                # "x": [-0.0, 0.0],
+                # "y": [-0.0, 0.0],
+                # "z": [0.0675, 0.0675],
+            },
+            "rot_randomization_range": {
+                "roll": [-math.pi/36, math.pi/36], # 5 degree
+                "pitch": [-math.pi/36, math.pi/36], # 5 degree
+                "yaw": [-math.pi/36, math.pi/36], # 5 degree
+            },
+            "velocity_range": {},
+        },
+    )
+
+    set_robot_to_grasp_pose = EventTerm(
+        func=gear_assembly_events.set_robot_to_grasp_pose,
+        mode="reset",
+        params={
+            "robot_asset_cfg": SceneEntityCfg("robot"),
+            "rot_offset": [0.0, math.sqrt(2)/2, math.sqrt(2)/2, 0.0],
+            "pos_randomization_range": {
+                "x": [-0.0, 0.0],
+                "y": [-0.005, 0.005],
+                "z": [-0.003, 0.003]
+            },
+            # "pos_randomization_range": None
+        },
+    )
+
+
+
+
+@configclass
+class UR10eGearAssemblyEnvCfg(GearAssemblyEnvCfg):
+    def __post_init__(self):
+        # post init of parent
+        super().__post_init__()
+
+        # switch robot to ur10 with local asset path
+        self.scene.robot = UR10e_2f140_CFG.replace(
+            prim_path="{ENV_REGEX_NS}/Robot",
+            # spawn=UR10e_2f140_CFG.spawn.replace(
+            #     usd_path=os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), "assets/ur10e_robotiq_140_variant.usd")
+            # )
+        )
+
+
+        self.observations.policy.joint_pos.params["asset_cfg"].joint_names = ["shoulder_pan_joint", "shoulder_lift_joint", "elbow_joint", "wrist_1_joint", "wrist_2_joint", "wrist_3_joint"]
+        self.observations.policy.joint_vel.params["asset_cfg"].joint_names = ["shoulder_pan_joint", "shoulder_lift_joint", "elbow_joint", "wrist_1_joint", "wrist_2_joint", "wrist_3_joint"]
+
+        # override events
+        self.events = EventCfg()
+
+        # overriderride command generator body
+        self.joint_action_scale = 0.025
+        self.action_scale_joint_space = [self.joint_action_scale, self.joint_action_scale, self.joint_action_scale, self.joint_action_scale, self.joint_action_scale, self.joint_action_scale]
+        self.actions.arm_action = mdp.RelativeJointPositionActionCfg(
+            asset_name="robot", joint_names=["shoulder_pan_joint", "shoulder_lift_joint", "elbow_joint", "wrist_1_joint", "wrist_2_joint", "wrist_3_joint"], scale=self.joint_action_scale, use_zero_offset=True
+        )
+
+        self.obs_order = ["arm_dof_pos", "arm_dof_vel", "shaft_pos", "shaft_quat"]
+        self.policy_action_space = "joint"
+        self.arm_joint_names = [
+            "shoulder_pan_joint",
+            "shoulder_lift_joint",
+            "elbow_joint",
+            "wrist_1_joint",
+            "wrist_2_joint",
+            "wrist_3_joint",
+        ]
+        self.policy_action_space = "joint"
+        self.action_space = 6
+        self.state_space = 0
+        self.observation_space = 19
+
+        # Set joint_action_scale from the existing arm_action.scale
+        self.joint_action_scale = self.actions.arm_action.scale
+
+        self.action_scale_joint_space = [
+            self.joint_action_scale,
+            self.joint_action_scale,
+            self.joint_action_scale,
+            self.joint_action_scale,
+            self.joint_action_scale,
+            self.joint_action_scale,
+        ]
+        self.fixed_asset_init_pos_center = (1.0200, -0.2100, -0.1)
+        self.fixed_asset_init_pos_range = (0.1, 0.25, 0.1)
+        self.fixed_asset_init_orn_deg = (180.0, 0.0, 90.0)
+        self.fixed_asset_init_orn_deg_range = (5.0, 5.0, 30.0)
+
+
+@configclass
+class UR10eGearAssemblyEnvCfg_PLAY(UR10eGearAssemblyEnvCfg):
+    def __post_init__(self):
+        # post init of parent
+        super().__post_init__()
+        # make a smaller scene for play
+        self.scene.num_envs = 50
+        self.scene.env_spacing = 2.5
+        # disable randomization for play
+        self.observations.policy.enable_corruption = False
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
new file mode 100644
index 00000000000..4e438ac979b
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -0,0 +1,329 @@
+# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from dataclasses import MISSING
+import os
+
+import torch
+import isaaclab.sim as sim_utils
+from isaaclab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg
+from isaaclab.envs import ManagerBasedRLEnvCfg
+from isaaclab.sensors import FrameTransformerCfg
+from isaaclab.managers import ActionTermCfg as ActionTerm
+from isaaclab.managers import CurriculumTermCfg as CurrTerm
+from isaaclab.managers import EventTermCfg as EventTerm
+
+from isaaclab.managers import ObservationGroupCfg as ObsGroup
+from isaaclab.managers import ObservationTermCfg as ObsTerm
+from isaaclab.managers import RewardTermCfg as RewTerm
+from isaaclab.managers import SceneEntityCfg
+from isaaclab.managers import TerminationTermCfg as DoneTerm
+from isaaclab.sim.simulation_cfg import PhysxCfg, SimulationCfg
+from isaaclab.sim.spawners.materials.physics_materials_cfg import RigidBodyMaterialCfg
+from isaaclab.scene import InteractiveSceneCfg
+from isaaclab.utils import configclass
+from isaaclab.utils.noise import ResetSampledNoiseModelCfg, UniformNoiseCfg
+import isaaclab_tasks.manager_based.manipulation.deploy.mdp as mdp
+from isaaclab_tasks.manager_based.manipulation.deploy.reach.reach_env_cfg import ReachEnvCfg
+
+from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR
+
+# Get the directory where this configuration file is located
+CONFIG_DIR = os.path.dirname(os.path.abspath(__file__))
+ASSETS_DIR = os.path.join(CONFIG_DIR, "assets")
+
+##
+# Environment configuration
+##
+
+@configclass
+class GearAssemblySceneCfg(InteractiveSceneCfg):
+    """Configuration for the scene with a robotic arm."""
+    
+    # Disable scene replication to allow USD-level randomization
+    replicate_physics = False
+
+    # world
+    ground = AssetBaseCfg(
+        prim_path="/World/ground",
+        spawn=sim_utils.GroundPlaneCfg(),
+        init_state=AssetBaseCfg.InitialStateCfg(pos=(0.0, 0.0, -1.05)),
+    )
+
+    factory_gear_base = RigidObjectCfg(
+        prim_path="{ENV_REGEX_NS}/FactoryGearBase",
+        # TODO: change to common isaac sim directory
+        spawn=sim_utils.UsdFileCfg(
+            usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaac_ros_gear_insertion/Factory/Gears_1.5x/factory_gear_base.usd",
+            # usd_path=os.path.join(ASSETS_DIR, "Factory/Gears_1.5x/factory_gear_base.usd"),
+            # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Users/ashwinvk@nvidia.com/props/factory_gear_base.usd",
+            # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaacsim/Props/gear_assembly/gear_base_scale_1.5_usd/gear_base_scale_1.5.usd",
+            activate_contact_sensors=True,
+            rigid_props=sim_utils.RigidBodyPropertiesCfg(
+                disable_gravity=False,
+                kinematic_enabled=True,
+                max_depenetration_velocity=5.0,
+                linear_damping=0.0,
+                angular_damping=0.0,
+                max_linear_velocity=1000.0,
+                max_angular_velocity=3666.0,
+                enable_gyroscopic_forces=True,
+                solver_position_iteration_count=196,
+                solver_velocity_iteration_count=1,
+                max_contact_impulse=1e32,
+            ),
+            mass_props=sim_utils.MassPropertiesCfg(mass=None),
+            collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0),
+            # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaacsim/Props/gear_assembly/gear_base_scale_1.5_usd/gear_base_scale_1.5.usd",
+            # rigid_props=sim_utils.RigidBodyPropertiesCfg(kinematic_enabled=True),
+        ),
+        init_state=RigidObjectCfg.InitialStateCfg(pos=(1.0200, -0.2100, -0.1), rot=(-0.70711, 0.0, 0.0, 0.70711)),
+    )
+
+    # factory_gear_small = RigidObjectCfg(
+    #     prim_path="{ENV_REGEX_NS}/FactoryGearSmall",
+    #     # TODO: change to common isaac sim directory
+    #     spawn=sim_utils.UsdFileCfg(
+    #         usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaac_ros_gear_insertion/Factory/Gears_1.5x/factory_gear_small.usd",
+    #         # usd_path=os.path.join(ASSETS_DIR, "Factory/Gears_1.5x/factory_gear_small.usd"),
+    #         # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Users/ashwinvk@nvidia.com/props/factory_gear_small.usd",
+    #         # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaacsim/Props/gear_assembly/small_gear_scale_1p5_usd/small_gear_scale_1p5.usd",
+    #         activate_contact_sensors=True,
+    #         rigid_props=sim_utils.RigidBodyPropertiesCfg(
+    #             disable_gravity=False,
+    #             kinematic_enabled=False,
+    #             max_depenetration_velocity=5.0,
+    #             linear_damping=0.0,
+    #             angular_damping=0.0,
+    #             max_linear_velocity=1000.0,
+    #             max_angular_velocity=3666.0,
+    #             enable_gyroscopic_forces=True,
+    #             solver_position_iteration_count=196,
+    #             solver_velocity_iteration_count=1,
+    #             max_contact_impulse=1e32,
+    #         ),
+    #         mass_props=sim_utils.MassPropertiesCfg(mass=0.019),
+    #         collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0),
+    #     ),
+    #     init_state=RigidObjectCfg.InitialStateCfg(pos=(1.0200, -0.2100, -0.1), rot=(-0.70711, 0.0, 0.0, 0.70711)),
+    # )
+
+    factory_gear_medium = RigidObjectCfg(
+        prim_path="{ENV_REGEX_NS}/FactoryGearMedium",
+        # TODO: change to common isaac sim directory
+        spawn=sim_utils.UsdFileCfg(
+            usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaac_ros_gear_insertion/Factory/Gears_1.5x/factory_gear_medium.usd",
+            # usd_path=os.path.join(ASSETS_DIR, "Factory/Gears_1.5x/factory_gear_medium.usd"),
+            # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Users/ashwinvk@nvidia.com/props/factory_gear_medium.usd",
+            # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaacsim/Props/gear_assembly/medium_gear_scale_1p5_usd/medium_gear_scale_1p5.usd",
+            activate_contact_sensors=True,
+            rigid_props=sim_utils.RigidBodyPropertiesCfg(
+                disable_gravity=False,
+                kinematic_enabled=False,
+                max_depenetration_velocity=5.0,
+                linear_damping=0.0,
+                angular_damping=0.0,
+                max_linear_velocity=1000.0,
+                max_angular_velocity=3666.0,
+                enable_gyroscopic_forces=True,
+                solver_position_iteration_count=196,
+                solver_velocity_iteration_count=1,
+                max_contact_impulse=1e32,
+            ),
+            mass_props=sim_utils.MassPropertiesCfg(mass=0.019),
+            collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0),
+        ),
+        init_state=RigidObjectCfg.InitialStateCfg(pos=(1.0200, -0.2100, -0.1), rot=(-0.70711, 0.0, 0.0, 0.70711)),
+    )
+
+    # factory_gear_large = RigidObjectCfg(
+    #     prim_path="{ENV_REGEX_NS}/FactoryGearLarge",
+    #     # TODO: change to common isaac sim directory
+    #     spawn=sim_utils.UsdFileCfg(
+    #         usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaac_ros_gear_insertion/Factory/Gears_1.5x/factory_gear_large.usd",
+    #         # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Users/ashwinvk@nvidia.com/props/factory_gear_large.usd",
+    #         # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaacsim/Props/gear_assembly/large_gear_scale_1p5_usd/large_gear_scale_1p5.usd",
+    #         activate_contact_sensors=True,
+    #         rigid_props=sim_utils.RigidBodyPropertiesCfg(
+    #             disable_gravity=False,
+    #             kinematic_enabled=False,
+    #             max_depenetration_velocity=5.0,
+    #             linear_damping=0.0,
+    #             angular_damping=0.0,
+    #             max_linear_velocity=1000.0,
+    #             max_angular_velocity=3666.0,
+    #             enable_gyroscopic_forces=True,
+    #             solver_position_iteration_count=196,
+    #             solver_velocity_iteration_count=1,
+    #             max_contact_impulse=1e32,
+    #         ),
+    #         mass_props=sim_utils.MassPropertiesCfg(mass=0.019),
+    #         collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0),
+    #     ),
+    #     init_state=RigidObjectCfg.InitialStateCfg(pos=(1.0200, -0.2100, -0.1), rot=(-0.70711, 0.0, 0.0, 0.70711)),
+    # )
+    
+
+    # robots
+    robot: ArticulationCfg = MISSING
+
+
+    # lights
+    light = AssetBaseCfg(
+        prim_path="/World/light",
+        spawn=sim_utils.DomeLightCfg(color=(0.75, 0.75, 0.75), intensity=2500.0),
+    )
+
+@configclass
+class ActionsCfg:
+    """Action specifications for the MDP."""
+
+    arm_action: ActionTerm = MISSING
+    gripper_action: ActionTerm | None = None
+
+
+@configclass
+class ObservationsCfg:
+    """Observation specifications for the MDP."""
+
+    @configclass
+    class PolicyCfg(ObsGroup):
+        """Observations for policy group."""
+
+        # observation terms (order preserved)
+        joint_pos = ObsTerm(func=mdp.joint_pos,
+                            params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])})
+        joint_vel = ObsTerm(func=mdp.joint_vel,
+                            params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])})
+        gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w, noise=ResetSampledNoiseModelCfg(
+            noise_cfg=UniformNoiseCfg(n_min=-0.005, n_max=0.005, operation="add")
+        ))
+        gear_shaft_quat = ObsTerm(func=mdp.gear_shaft_quat_w)
+
+        def __post_init__(self):
+            self.enable_corruption = True
+            self.concatenate_terms = True
+    
+    # @configclass
+    # class CriticCfg(ObsGroup):
+    #     """Observations for policy group."""
+
+    #     # observation terms (order preserved)
+    #     joint_pos = ObsTerm(func=mdp.joint_pos,
+    #                         params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])})
+    #     joint_vel = ObsTerm(func=mdp.joint_vel,
+    #                         params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])})
+    #     # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w,
+    #     #                          noise=AdditiveUniformNoiseModelCfg(n_min=-0.0025, n_max=0.0025))
+    #     # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w, noise=Unoise(n_min=-0.005, n_max=0.005, operation="add"))
+    #     # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w)
+    #     gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w)
+    #     gear_shaft_quat = ObsTerm(func=mdp.gear_shaft_quat_w)
+
+    #     gear_pos = ObsTerm(func=mdp.gear_pos_w)
+    #     gear_quat = ObsTerm(func=mdp.gear_quat_w)
+
+    # observation groups
+    policy: PolicyCfg = PolicyCfg()
+    # critic: CriticCfg = CriticCfg()
+
+
+@configclass
+class EventCfg:
+    """Configuration for events."""
+
+    reset_all = EventTerm(func=mdp.reset_scene_to_default, mode="reset")
+
+    reset_gear = EventTerm(
+        func=mdp.reset_root_state_uniform,
+        mode="reset",
+        params={
+            "pose_range": {
+                "x": [-0.05, 0.05],
+                "y": [-0.05, 0.05],
+                "z": [0.1, 0.15],
+            },
+            "velocity_range": {},
+            "asset_cfg": SceneEntityCfg("factory_gear_small"),
+        },
+    )
+
+
+@configclass
+class RewardsCfg:
+    """Reward terms for the MDP."""
+
+    end_effector_gear_keypoint_tracking = RewTerm(
+        func=mdp.keypoint_entity_error,
+        weight=-1.5,
+        params={
+            "asset_cfg_1": SceneEntityCfg("factory_gear_base"), 
+            "keypoint_scale": 0.15,
+        },
+    )
+
+    end_effector_gear_keypoint_tracking_exp = RewTerm(
+        func=mdp.keypoint_entity_error_exp,
+        weight=1.5,
+        params={
+            "asset_cfg_1": SceneEntityCfg("factory_gear_base"), 
+            "kp_exp_coeffs": [(50, 0.0001), (300, 0.0001)],
+            "kp_use_sum_of_exps": False,
+            "keypoint_scale": 0.15,
+        },
+    )
+
+    action_rate = RewTerm(func=mdp.action_rate, weight=-5.0e-06)
+
+
+@configclass
+class TerminationsCfg:
+    """Termination terms for the MDP."""
+
+    time_out = DoneTerm(func=mdp.time_out, time_out=True)
+
+@configclass
+class GearAssemblyEnvCfg(ManagerBasedRLEnvCfg):
+    # Scene settings
+    scene: GearAssemblySceneCfg = GearAssemblySceneCfg(num_envs=4096, env_spacing=2.5)
+    # Basic settings
+    observations: ObservationsCfg = ObservationsCfg()
+    actions: ActionsCfg = ActionsCfg()
+    # MDP settings
+    rewards: RewardsCfg = RewardsCfg()
+    terminations: TerminationsCfg = TerminationsCfg()
+    events: EventCfg = EventCfg()
+    sim: SimulationCfg = SimulationCfg(
+
+        physx=PhysxCfg(
+            gpu_collision_stack_size=2**31,  # Important to prevent collisionStackSize buffer overflow in contact-rich environments.
+            gpu_max_rigid_contact_count=2**23,
+            gpu_max_rigid_patch_count=2**23
+        ),
+    )
+    
+    def __post_init__(self):
+        """Post initialization."""
+        # general settings
+        self.episode_length_s = 6.66
+        self.viewer.eye = (3.5, 3.5, 3.5)
+        # simulation settings
+        self.decimation = 2
+        self.sim.render_interval = self.decimation
+        self.sim.dt = 1.0 / 120.0
+
+        self.hand_close_pos = 0.7
+
+        self.gear_offsets = {'gear_small': [0.076125, 0.0, 0.0],
+                            'gear_medium': [0.030375, 0.0, 0.0],
+                            'gear_large': [-0.045375, 0.0, 0.0]}
+        
+        self.gear_offsets_grasp = {'gear_small': [0.0, 0.076125, -0.26],
+                            'gear_medium': [0.0, 0.030375, -0.26],
+                            'gear_large': [0.0, -0.045375, -0.26]}
+
+        self.hand_grasp_pos = {"gear_small": 0.67,
+                               "gear_medium": 0.54,
+                               "gear_large": 0.52}
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/__init__.py
index 6686f9f5276..57059c62dac 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/__init__.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/__init__.py
@@ -8,3 +8,5 @@
 from isaaclab.envs.mdp import *  # noqa: F401, F403
 
 from .rewards import *  # noqa: F401, F403
+from .events import *  # noqa: F401, F403
+from .observations import *  # noqa: F401, F403
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/events.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/events.py
new file mode 100644
index 00000000000..1e70454510e
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/events.py
@@ -0,0 +1,451 @@
+# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Functions specific to the in-hand dexterous manipulation environments."""
+
+
+from __future__ import annotations
+
+import carb
+import time
+import torch
+from typing import TYPE_CHECKING, Literal, Optional, List
+
+from isaaclab.assets import Articulation, RigidObject
+from isaaclab.managers import EventTermCfg, ManagerTermBase, SceneEntityCfg
+from isaaclab.utils.math import sample_uniform
+import isaaclab.utils.math as math_utils
+import isaaclab.sim as sim_utils
+
+from isaaclab_tasks.direct.automate import factory_control as fc
+import random
+
+if TYPE_CHECKING:
+    from isaaclab.envs import ManagerBasedEnv
+
+
+def set_default_joint_pose(
+    env: ManagerBasedEnv,
+    env_ids: torch.Tensor,
+    default_pose: torch.Tensor,
+    asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
+):
+    # Set the default pose for robots in all envs
+    asset = env.scene[asset_cfg.name]
+    asset.data.default_joint_pos = torch.tensor(default_pose, device=env.device).repeat(env.num_envs, 1)
+
+def randomize_gear_type(
+    env: ManagerBasedEnv,
+    env_ids: torch.Tensor,
+    gear_types: List[str] = ["gear_small", "gear_medium", "gear_large"]
+):
+    """Randomize the gear type being used for the specified environments.
+    
+    Args:
+        env: The environment containing the assets
+        env_ids: Environment IDs to randomize
+        gear_types: List of available gear types to choose from
+    """
+    # Randomly select gear type for each environment
+    selected_gears = [random.choice(gear_types) for _ in range(len(env_ids))]
+    
+    # Store the selected gear type in the environment instance
+    # This will be used by the observation functions
+    if not hasattr(env, '_current_gear_type'):
+        env._current_gear_type = ["gear_medium"] * env.num_envs
+    
+    for i, env_id in enumerate(env_ids):
+        env._current_gear_type[env_id] = selected_gears[i]
+    # print(f"env._current_gear_type: {env._current_gear_type}")
+
+def set_robot_to_grasp_pose(
+    env: ManagerBasedEnv,
+    env_ids: torch.Tensor,
+    robot_asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
+    pos_threshold: float = 1e-6,
+    rot_threshold: float = 1e-6,
+    max_iterations: int = 10,
+    rot_offset: Optional[list[float]] = None,
+    pos_randomization_range: Optional[dict] = None,
+    num_arm_joints: int = 6
+):
+    # Convert rotation offset to tensor if provided
+    if rot_offset is not None:
+        rot_offset_tensor = torch.tensor(rot_offset, device=env.device).unsqueeze(0).expand(len(env_ids), -1)
+    else:
+        rot_offset_tensor = None
+    
+    # Check if environment has gear type selection
+    if not hasattr(env, '_current_gear_type'):
+        raise ValueError("Environment does not have '_current_gear_type' attribute. Ensure randomize_gear_type event is configured.")
+
+    i = 0
+    robot_asset: Articulation = env.scene[robot_asset_cfg.name]
+    joint_pos = robot_asset.data.joint_pos[env_ids].clone()
+    joint_vel = robot_asset.data.joint_vel[env_ids].clone()
+
+    while i < max_iterations:
+        robot_asset: Articulation = env.scene[robot_asset_cfg.name]
+        # Add gaussian noise to joint states
+        joint_pos = robot_asset.data.joint_pos[env_ids].clone()
+        joint_vel = robot_asset.data.joint_vel[env_ids].clone()
+
+        # Get gear positions and orientations for each environment based on selected gear type
+        env_ids_list = env_ids.tolist() if isinstance(env_ids, torch.Tensor) else list(env_ids)
+        grasp_object_pos_world = torch.zeros(len(env_ids), 3, device=env.device)
+        grasp_object_quat = torch.zeros(len(env_ids), 4, device=env.device)
+        
+        for row_idx, env_id in enumerate(env_ids_list):
+            # Get the gear type for this environment
+            gear_key = env._current_gear_type[env_id]
+            selected_asset_name = f"factory_{gear_key}"
+            
+            # Get the gear asset for this environment
+            gear_asset: RigidObject = env.scene[selected_asset_name]
+            grasp_object_pos_world[row_idx] = gear_asset.data.root_link_pos_w[env_id].clone()
+            grasp_object_quat[row_idx] = gear_asset.data.root_link_quat_w[env_id].clone()
+
+        
+        # First apply rotation offset to get the object's orientation
+        if rot_offset_tensor is not None:
+            # Apply rotation offset by quaternion multiplication
+            # rot_offset is assumed to be in quaternion format (w, x, y, z)
+            grasp_object_quat = math_utils.quat_mul(grasp_object_quat, rot_offset_tensor)
+            # print(f"Applied rot_offset: {rot_offset}")
+            # print(f"grasp_object_quat after offset: {grasp_object_quat}")
+        
+        if pos_randomization_range is not None:
+            pos_keys = ["x", "y", "z"]
+            range_list_pos = [pos_randomization_range.get(key, (0.0, 0.0)) for key in pos_keys]
+            ranges_pos = torch.tensor(range_list_pos, device=env.device)
+            rand_pos_offsets = math_utils.sample_uniform(ranges_pos[:, 0], ranges_pos[:, 1], (len(env_ids), 3), device=env.device)
+        # Apply gear-specific grasp offsets to each environment
+        for row_idx, env_id in enumerate(env_ids_list):
+            gear_key = env._current_gear_type[env_id]
+            gear_grasp_offset = env.cfg.gear_offsets_grasp[gear_key]
+            grasp_offset_tensor = torch.tensor(gear_grasp_offset, device=env.device, dtype=grasp_object_pos_world.dtype)
+
+            if pos_randomization_range is not None:
+                grasp_offset_tensor += rand_pos_offsets[row_idx]
+            
+            # Transform position offset from rotated object frame to world frame
+            grasp_object_pos_world[row_idx] = grasp_object_pos_world[row_idx] + math_utils.quat_apply(
+                grasp_object_quat[row_idx:row_idx+1], grasp_offset_tensor.unsqueeze(0)
+            )[0]
+        
+        # Convert to environment-relative coordinates by subtracting environment origins
+        grasp_object_pos = grasp_object_pos_world
+        
+        # Get end effector pose of the robot
+        # Get the specific wrist_3_link pose
+        try:
+            # Find the index of the wrist_3_link body
+            wrist_3_indices, wrist_3_names = robot_asset.find_bodies(["wrist_3_link"])
+            wrist_3_idx = wrist_3_indices[0]
+            
+            if len(wrist_3_indices) == 1:
+                wrist_3_idx = wrist_3_indices[0]  # Get the first (and should be only) index
+                
+                # Get the specific wrist_3_link pose
+                eef_pos_world = robot_asset.data.body_link_pos_w[env_ids, wrist_3_idx]  # Shape: (len(env_ids), 3)
+                eef_quat = robot_asset.data.body_link_quat_w[env_ids, wrist_3_idx]  # Shape: (len(env_ids), 4)
+                
+                # Convert to environment-relative coordinates
+                eef_pos = eef_pos_world
+
+
+                # You can also get the full pose as [pos, quat] (7-dimensional)
+                eef_pos = robot_asset.data.body_pos_w[env_ids, wrist_3_idx]
+                eef_quat = robot_asset.data.body_quat_w[env_ids, wrist_3_idx]
+                # print(f"eef_pos: {eef_pos}")
+                # print(f"eef_quat: {eef_quat}")
+            elif len(wrist_3_indices) > 1:
+                print("wrist_3_link found multiple times in robot body names")
+                print(f"Available body names: {robot_asset.body_names}")
+            else:
+                print("wrist_3_link not found in robot body names")
+                print(f"Available body names: {robot_asset.body_names}")
+                
+        except Exception as e:
+            print(f"Could not get end effector pose: {e}")
+            print("You may need to adjust the body name or access method based on your robot configuration")
+        
+
+        # Compute error to target using wrist_3_link as current and grasp_object as target
+        if len(wrist_3_indices) > 0:
+            # Get current end effector pose (wrist_3_link)
+            current_eef_pos = eef_pos  # wrist_3_link position
+            current_eef_quat = eef_quat  # wrist_3_link orientation
+            
+            # Get target pose (grasp object)
+            target_eef_pos = grasp_object_pos  # grasp object position
+            target_eef_quat = grasp_object_quat  # grasp object orientation
+
+            # print(f"current_eef_pos: {current_eef_pos[0]}")
+            # print(f"current_eef_quat: {current_eef_quat[0]}")
+            # print(f"target_eef_pos: {target_eef_pos[0]}")
+            # print(f"target_eef_quat: {target_eef_quat[0]}")
+
+            # Compute pose error
+            pos_error, axis_angle_error = fc.get_pose_error(
+                fingertip_midpoint_pos=current_eef_pos,
+                fingertip_midpoint_quat=current_eef_quat,
+                ctrl_target_fingertip_midpoint_pos=target_eef_pos,
+                ctrl_target_fingertip_midpoint_quat=target_eef_quat,
+                jacobian_type='geometric',
+                rot_error_type='axis_angle')
+            delta_hand_pose = torch.cat((pos_error, axis_angle_error), dim=-1)
+            # Check if pose error is within threshold
+            pos_error_norm = torch.norm(pos_error, dim=-1)
+            rot_error_norm = torch.norm(axis_angle_error, dim=-1)
+            
+            # print(f"Pose error - position: {pos_error[0]}")
+            # print(f"Pose error - orientation: {axis_angle_error[0]}")
+            # print(f"Position error norm: {pos_error_norm[0]}")
+            # print(f"Rotation error norm: {rot_error_norm[0]}")
+            
+            # Check if all environments have converged
+            if torch.all(pos_error_norm < pos_threshold) and torch.all(rot_error_norm < rot_threshold):
+                # print(f"Converged after {i} iterations!")
+                # print(f"pos_error_norm: {pos_error_norm}")
+                # print(f"rot_error_norm: {rot_error_norm}")
+                # print(f"pos_error: {pos_error}")
+                # print(f"axis_angle_error: {axis_angle_error}")
+                break
+
+            # Solve DLS problem for inverse kinematics
+            # Get jacobian for the wrist_3_link using the same approach as task_space_actions.py
+            try:
+                # Get all jacobians from the robot
+                jacobians = robot_asset.root_physx_view.get_jacobians().clone()
+				
+                # Select the jacobian for the wrist_3_link body
+                # For fixed-base robots, Jacobian excludes the base body (index 0)
+                # So if wrist_3_idx is 6, the Jacobian index should be 5
+                jacobi_body_idx = wrist_3_idx - 1
+                
+                
+                jacobian = jacobians[env_ids, jacobi_body_idx, :, :]  # Only first 6 joints (arm, not gripper)
+                
+                delta_dof_pos = fc._get_delta_dof_pos(
+                    delta_pose=delta_hand_pose,
+                    ik_method="dls",
+                    jacobian=jacobian,
+                    device=env.device,
+                )
+                
+                # Update joint positions - only update arm joints (first 6)
+                joint_pos += delta_dof_pos
+                joint_vel = torch.zeros_like(joint_pos)
+
+                # Set into the physics simulation
+
+                
+            except Exception as e:
+                print(f"Error in IK computation: {e}")
+                print("Note: You may need to implement proper jacobian computation for your robot")
+
+        # Set into the physics simulation
+        robot_asset.set_joint_position_target(joint_pos, env_ids=env_ids)
+        robot_asset.set_joint_velocity_target(joint_vel, env_ids=env_ids)
+        robot_asset.write_joint_state_to_sim(joint_pos, joint_vel, env_ids=env_ids)
+
+
+        # print(f"delta_dof_pos.abs().max(): {delta_dof_pos.abs().max()}")
+
+        # if delta_dof_pos.abs().max() < 1e-5:
+        #     break
+
+        # print(f"Step {i}")
+        
+        i += 1
+    # if i >= max_iterations:
+    #     print(f"IK did not converge after {max_iterations} iterations")
+    #     # print(f"pos_error_norm: {pos_error_norm}")
+    #     # print(f"rot_error_norm: {rot_error_norm}")
+    #     # print(f"pos_error: {pos_error}")
+    #     # print(f"axis_angle_error: {axis_angle_error}")
+    #     # raise RuntimeError(f"IK did not converge after {max_iterations} iterations")
+
+    # # print(f"delta_dof_pos.abs().max(): {delta_dof_pos.abs().max()}")
+    # for i in range(10):
+    #     env.sim.render()
+    #     input("Going to set object position...")
+    
+    # TODO: Resttting the gear based on teh IK solution is not working as expected. The gear is not being placed in the correct position.
+    # # Set the grasp object's pose to the current end-effector pose in world coordinates (with optional offsets)
+    # # The offsets should be applied in reverse to get from wrist_3_link to gear pose
+    # gear_pos_world = current_eef_pos
+    # gear_quat_world = current_eef_quat
+    
+    # # Apply position offset (subtract to get from wrist_3_link to gear position)
+    # if pos_offset_tensor is not None:
+    #     gear_pos_world = gear_pos_world - pos_offset_tensor
+    
+    # # Apply rotation offset (inverse to get from wrist_3_link to gear orientation)
+    # if rot_offset_tensor is not None:
+    #     # Apply inverse rotation offset
+    #     gear_quat_world = math_utils.quat_mul(current_eef_quat, math_utils.quat_conjugate(rot_offset_tensor))
+    
+    # for i in range(10):
+    #     env.sim.render()
+    #     input("Press Enter to continue 1...")
+    
+    # # Set the grasp object's pose for each environment based on selected gear type
+    # for row_idx, env_id in enumerate(env_ids_list):
+    #     gear_key = env._current_gear_type[env_id]
+    #     selected_asset_name = f"factory_{gear_key}"
+    #     gear_asset: RigidObject = env.scene[selected_asset_name]
+    #     gear_asset.write_root_pose_to_sim(
+    #         torch.cat([gear_pos_world[row_idx:row_idx+1], gear_quat_world[row_idx:row_idx+1]], dim=-1), 
+    #         env_ids=torch.tensor([env_id], device=env.device)
+    #     )
+
+    # Close the gripper by setting the finger_joint based on gear type
+    all_joints, all_joints_names = robot_asset.find_joints([".*"])
+    
+    joint_pos = robot_asset.data.joint_pos[env_ids].clone()
+    
+    # @ireti: We need to change this so it workes with teh public 2f140 gripper that uses mimic joints
+    finger_joints = all_joints[num_arm_joints:]
+    finger_joint_names = all_joints_names[num_arm_joints:]
+
+    # for i in range(10):
+    #     env.sim.render()
+    #     input("Press Enter to continue 2...")
+
+    hand_grasp_pos = env.cfg.hand_grasp_pos[gear_key]
+    set_finger_joint_pos_2f_140(joint_pos, env_ids_list, finger_joints, hand_grasp_pos)
+
+    robot_asset.set_joint_position_target(joint_pos, joint_ids=all_joints, env_ids=env_ids)
+    robot_asset.write_joint_state_to_sim(joint_pos, joint_vel, env_ids=env_ids)
+
+    # for i in range(10):
+    #     env.sim.render()
+    #     input("Press Enter to continue 3...")
+
+    # Set finger joints based on gear type for each environment
+    hand_close_pos = env.cfg.hand_close_pos
+    set_finger_joint_pos_2f_140(joint_pos, env_ids_list, finger_joints, hand_close_pos)
+
+    robot_asset.set_joint_position_target(joint_pos, joint_ids=all_joints, env_ids=env_ids)
+
+    # for i in range(10):
+    #     env.sim.render()
+    #     input("Press Enter to continue 4...")
+
+def randomize_gears_and_base_pose(
+    env: ManagerBasedEnv,
+    env_ids: torch.Tensor,
+    pose_range: dict = {},
+    velocity_range: dict = {},
+    gear_pos_range: dict = {},
+    rot_randomization_range: dict = {},
+
+):
+    """Randomize both the gear base pose and the poses of all gear types with the same value,
+    then apply the per-env `gear_pos_range` only to the gear selected by `randomize_gear_type`.
+    """
+    if not hasattr(env, '_current_gear_type'):
+        raise ValueError("Environment does not have '_current_gear_type' attribute. Ensure randomize_gear_type event is configured.")
+
+    device = env.device
+
+    # Shared pose samples for all assets (base and all gears)
+    pose_keys = ["x", "y", "z", "roll", "pitch", "yaw"]
+    range_list_pose = [pose_range.get(key, (0.0, 0.0)) for key in pose_keys]
+    ranges_pose = torch.tensor(range_list_pose, device=device)
+    rand_pose_samples = math_utils.sample_uniform(ranges_pose[:, 0], ranges_pose[:, 1], (len(env_ids), 6), device=device)
+
+    orientations_delta = math_utils.quat_from_euler_xyz(
+        rand_pose_samples[:, 3], rand_pose_samples[:, 4], rand_pose_samples[:, 5]
+    )
+
+    # Shared velocity samples for all assets (base and all gears)
+    range_list_vel = [velocity_range.get(key, (0.0, 0.0)) for key in pose_keys]
+    ranges_vel = torch.tensor(range_list_vel, device=device)
+    rand_vel_samples = math_utils.sample_uniform(ranges_vel[:, 0], ranges_vel[:, 1], (len(env_ids), 6), device=device)
+
+    # Prepare assets: base + all possible gears (only those present in scene will be processed)
+    base_asset_name = "factory_gear_base"
+    possible_gear_assets = [
+        # "factory_gear_small",
+        "factory_gear_medium",
+        # "factory_gear_large",
+        # "table",
+    ]
+
+
+    positions_by_asset = {}
+    orientations_by_asset = {}
+    velocities_by_asset = {}
+
+    # Combine base and gear assets into one pass to avoid duplication
+    asset_names_to_process = [base_asset_name] + possible_gear_assets
+    for asset_name in asset_names_to_process:
+        asset: RigidObject | Articulation = env.scene[asset_name]
+        root_states = asset.data.default_root_state[env_ids].clone()
+        positions = root_states[:, 0:3] + env.scene.env_origins[env_ids] + rand_pose_samples[:, 0:3]
+        orientations = math_utils.quat_mul(root_states[:, 3:7], orientations_delta)
+        velocities = root_states[:, 7:13] + rand_vel_samples
+        positions_by_asset[asset_name] = positions
+        orientations_by_asset[asset_name] = orientations
+        velocities_by_asset[asset_name] = velocities
+
+    # Per-env gear offset (gear_pos_range) applied only to the selected gear
+    range_list_gear = [gear_pos_range.get(key, (0.0, 0.0)) for key in ["x", "y", "z"]]
+    ranges_gear = torch.tensor(range_list_gear, device=device)
+    rand_gear_offsets = math_utils.sample_uniform(ranges_gear[:, 0], ranges_gear[:, 1], (len(env_ids), 3), device=device)
+
+    # Per-env gear orientation offset (rot_randomization_range) applied only to the selected gear
+    range_list_rot = [rot_randomization_range.get(key, (0.0, 0.0)) for key in ["roll", "pitch", "yaw"]]
+    ranges_rot = torch.tensor(range_list_rot, device=device)
+    rand_rot_offsets = math_utils.sample_uniform(ranges_rot[:, 0], ranges_rot[:, 1], (len(env_ids), 3), device=device)
+    rand_rot_quats = math_utils.quat_from_euler_xyz(rand_rot_offsets[:, 0], rand_rot_offsets[:, 1], rand_rot_offsets[:, 2])
+
+    env_ids_list = env_ids.tolist() if isinstance(env_ids, torch.Tensor) else list(env_ids)
+    for row_idx, env_id in enumerate(env_ids_list):
+        # env._current_gear_type[env_id] is expected to be one of: "gear_small", "gear_medium", "gear_large"
+        gear_key = env._current_gear_type[env_id]
+        selected_asset_name = f"factory_{gear_key}"
+        if selected_asset_name in positions_by_asset:
+            positions_by_asset[selected_asset_name][row_idx] = (
+                positions_by_asset[selected_asset_name][row_idx] + rand_gear_offsets[row_idx]
+            )
+            # Apply additional orientation randomization to the selected gear
+            orientations_by_asset[selected_asset_name][row_idx] = math_utils.quat_mul(
+                orientations_by_asset[selected_asset_name][row_idx], rand_rot_quats[row_idx]
+            )
+
+    # Write back to sim for all prepared assets
+    for asset_name in positions_by_asset.keys():
+        asset = env.scene[asset_name]
+        positions = positions_by_asset[asset_name]
+        orientations = orientations_by_asset[asset_name]
+        velocities = velocities_by_asset[asset_name]
+        asset.write_root_pose_to_sim(torch.cat([positions, orientations], dim=-1), env_ids=env_ids)
+        asset.write_root_velocity_to_sim(velocities, env_ids=env_ids)
+
+def set_finger_joint_pos_2f_140(
+    joint_pos: torch.Tensor,
+    env_ids_list: list[int],
+    finger_joints: list[int],
+    finger_joint_position: float,
+):
+    # Get hand close positions for each environment based on gear type
+    for row_idx, env_id in enumerate(env_ids_list):
+        
+        joint_pos[row_idx, finger_joints[0]] = finger_joint_position
+        joint_pos[row_idx, finger_joints[1]] = finger_joint_position
+        
+        # outer finger joints
+        joint_pos[row_idx, finger_joints[2]] = 0
+        joint_pos[row_idx, finger_joints[3]] = 0
+
+        joint_pos[row_idx, finger_joints[4]] = -finger_joint_position
+        joint_pos[row_idx, finger_joints[5]] = -finger_joint_position
+
+        joint_pos[row_idx, finger_joints[6]] = finger_joint_position
+        joint_pos[row_idx, finger_joints[7]] = finger_joint_position
\ No newline at end of file
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/observations.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/observations.py
new file mode 100644
index 00000000000..a434018bea0
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/observations.py
@@ -0,0 +1,191 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from __future__ import annotations
+
+import torch
+from typing import TYPE_CHECKING
+
+import isaaclab.utils.math as math_utils
+from isaaclab.assets import RigidObject
+from isaaclab.managers import SceneEntityCfg
+from isaacsim.core.utils.torch.transformations import tf_combine
+
+if TYPE_CHECKING:
+    from isaaclab.envs import ManagerBasedRLEnv
+
+
+def gear_shaft_pos_w(
+    env: ManagerBasedRLEnv, 
+    asset_cfg: SceneEntityCfg = SceneEntityCfg("factory_gear_base")
+) -> torch.Tensor:
+    """Gear shaft position in world frame with offset applied.
+    
+    Args:
+        env: The environment containing the assets
+        asset_cfg: Configuration of the gear base asset
+        
+    Returns:
+        Gear shaft position tensor of shape (num_envs, 3)
+    """
+    # extract the asset (to enable type hinting)
+    asset: RigidObject = env.scene[asset_cfg.name]
+    
+    # get base gear position and orientation
+    base_pos = asset.data.root_pos_w
+    base_quat = asset.data.root_quat_w
+    
+
+    # get current gear type from environment, use default if not set
+    if not hasattr(env, '_current_gear_type'):
+        print("Environment does not have attribute '_current_gear_type'. Using default_gear_type from configuration.")
+        default_gear_type = getattr(env.cfg, 'default_gear_type', 'gear_medium')
+        current_gear_type = [default_gear_type] * env.num_envs
+    else:
+        current_gear_type = env._current_gear_type  # type: ignore
+    
+    # get offset for the specific gear type
+    gear_offsets = getattr(env.cfg, 'gear_offsets', {})
+    
+    # Initialize shaft positions
+    shaft_pos = torch.zeros_like(base_pos)
+    
+    # Apply offsets for each environment based on their gear type
+    for i in range(env.num_envs):
+        # gear_type = current_gear_type[i] if i < len(current_gear_type) else "gear_medium"
+        gear_type = current_gear_type[i]
+        
+        offset = torch.tensor(gear_offsets[gear_type], device=base_pos.device, dtype=base_pos.dtype)
+        offset_pos = offset.unsqueeze(0)
+        shaft_pos[i] = base_pos[i] + offset_pos[0]
+        # Apply position and rotation offsets if provided
+        # create identity quaternion
+        rot_offset_tensor = torch.tensor([1.0, 0.0, 0.0, 0.0], device=base_pos.device, dtype=base_pos.dtype).unsqueeze(0)
+        _, shaft_pos[i] = tf_combine(
+            base_quat[i:i+1], base_pos[i:i+1],
+            rot_offset_tensor, offset_pos
+        )
+
+    return shaft_pos - env.scene.env_origins
+
+
+def gear_shaft_quat_w(
+    env: ManagerBasedRLEnv, 
+    asset_cfg: SceneEntityCfg = SceneEntityCfg("factory_gear_base"),
+) -> torch.Tensor:
+    """Gear shaft orientation in world frame.
+    
+    Args:
+        env: The environment containing the assets
+        asset_cfg: Configuration of the gear base asset
+        gear_type: Type of gear ('gear_small', 'gear_medium', 'gear_large')
+        
+    Returns:
+        Gear shaft orientation tensor of shape (num_envs, 4)
+    """
+    # extract the asset (to enable type hinting)
+    asset: RigidObject = env.scene[asset_cfg.name]
+
+    
+    # get base quaternion
+    base_quat = asset.data.root_quat_w
+    
+    # ensure w component is positive for each environment
+    # if w is negative, negate the entire quaternion to maintain same orientation
+    w_negative = base_quat[:, 0] < 0
+    positive_quat = base_quat.clone()
+    positive_quat[w_negative] = -base_quat[w_negative]
+    
+    return positive_quat
+
+def gear_pos_w(
+    env: ManagerBasedRLEnv, 
+) -> torch.Tensor:
+    """Gear position in world frame.
+    
+    Args:
+        env: The environment containing the assets
+
+    Returns:
+        Gear position tensor of shape (num_envs, 3)
+    """
+    # get current gear type from environment, use default if not set
+    if not hasattr(env, '_current_gear_type'):
+        print("Environment does not have attribute '_current_gear_type'. Using default_gear_type from configuration.")
+        default_gear_type = getattr(env.cfg, 'default_gear_type', 'gear_medium')
+        current_gear_type = [default_gear_type] * env.num_envs
+    else:
+        current_gear_type = env._current_gear_type  # type: ignore
+    
+    # Create a mapping from gear type to asset name
+    gear_to_asset_mapping = {
+        'gear_small': 'factory_gear_small',
+        'gear_medium': 'factory_gear_medium', 
+        'gear_large': 'factory_gear_large'
+    }
+    
+    # Initialize positions array
+    gear_positions = torch.zeros((env.num_envs, 3), device=env.device)
+    
+    # Get positions for each environment based on their gear type
+    for i in range(env.num_envs):
+        gear_type = current_gear_type[i]
+        asset_name = gear_to_asset_mapping.get(gear_type)
+        
+        asset: RigidObject = env.scene[asset_name]
+        
+        # Get position for this specific environment
+        gear_positions[i] = asset.data.root_pos_w[i]
+    
+    return gear_positions
+
+def gear_quat_w(
+    env: ManagerBasedRLEnv, 
+) -> torch.Tensor:
+    """Gear orientation in world frame.
+    
+    Args:
+        env: The environment containing the assets
+        
+    Returns:
+        Gear orientation tensor of shape (num_envs, 4)
+    """
+    # get current gear type from environment, use default if not set
+    if not hasattr(env, '_current_gear_type'):
+        print("Environment does not have attribute '_current_gear_type'. Using default_gear_type from configuration.")
+        default_gear_type = getattr(env.cfg, 'default_gear_type', 'gear_medium')
+        current_gear_type = [default_gear_type] * env.num_envs
+    else:
+        current_gear_type = env._current_gear_type  # type: ignore
+    
+    # Create a mapping from gear type to asset name
+    gear_to_asset_mapping = {
+        'gear_small': 'factory_gear_small',
+        'gear_medium': 'factory_gear_medium', 
+        'gear_large': 'factory_gear_large'
+    }
+    
+    # Initialize quaternions array
+    gear_positive_quat = torch.zeros((env.num_envs, 4), device=env.device)
+    
+    # Get quaternions for each environment based on their gear type
+    for i in range(env.num_envs):
+        gear_type = current_gear_type[i]
+        asset_name = gear_to_asset_mapping.get(gear_type)
+        
+        # Get the asset for this specific gear type
+        asset: RigidObject = env.scene[asset_name]
+        
+        # Get quaternion for this specific environment
+        gear_quat = asset.data.root_quat_w[i]
+        
+        # ensure w component is positive for each environment
+        # if w is negative, negate the entire quaternion to maintain same orientation
+        if gear_quat[0] < 0:
+            gear_quat = -gear_quat
+        
+        gear_positive_quat[i] = gear_quat
+    
+    return gear_positive_quat
\ No newline at end of file
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py
index 0d14620e225..8e44e65f416 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py
@@ -12,6 +12,7 @@
 
 from isaaclab.managers import SceneEntityCfg
 from isaaclab.sensors.frame_transformer.frame_transformer import FrameTransformer
+from isaaclab.utils.math import quat_mul
 
 if TYPE_CHECKING:
     from isaaclab.envs import ManagerBasedRLEnv
@@ -229,3 +230,289 @@ def keypoint_command_error_exp(
             keypoint_reward_exp += 1.0 / (torch.exp(a * keypoint_dist) + b + torch.exp(-a * keypoint_dist))
 
     return keypoint_reward_exp
+
+def keypoint_entity_error(
+    env: ManagerBasedRLEnv, 
+    asset_cfg_1: SceneEntityCfg,
+    keypoint_scale: float = 1.0,
+    add_cube_center_kp: bool = True,
+    quat_offset: torch.Tensor | None = None,
+    pos_offset: torch.Tensor | None = None
+) -> torch.Tensor:
+    """Compute keypoint distance between a RigidObject and the dynamically selected gear.
+
+    The function computes the keypoint distance between the poses of two entities.
+    Keypoints are created by applying offsets to both poses and the distance is 
+    computed as the L2-norm between corresponding keypoints.
+
+    Args:
+        env: The environment containing the assets
+        asset_cfg_1: Configuration of the first asset (RigidObject)
+        keypoint_scale: Scale factor for keypoint offsets
+        add_cube_center_kp: Whether to include the center keypoint (0, 0, 0)
+        quat_offset: Optional quaternion offset to apply to asset_1's orientation (w, x, y, z)
+        pos_offset: Optional position offset to apply to asset_1's position (x, y, z)
+
+    Returns:
+        Keypoint distance tensor of shape (num_envs,) where each element
+        is the mean L2 norm distance between corresponding keypoints
+    """
+    # extract the assets (to enable type hinting)
+    asset_1 = env.scene[asset_cfg_1.name]  # RigidObject
+    
+    # get current poses in world frame for asset_1
+    # asset_1 is RigidObject - use body_pos_w and body_quat_w
+    curr_pos_1 = asset_1.data.body_pos_w[:, 0]  # type: ignore
+    curr_quat_1 = asset_1.data.body_quat_w[:, 0]  # type: ignore
+    
+    # Apply position offset to asset_1 if provided
+    if pos_offset is not None:
+        # Convert list to tensor if needed
+        if isinstance(pos_offset, list):
+            pos_offset = torch.tensor(pos_offset, device=curr_pos_1.device, dtype=curr_pos_1.dtype)
+        # Ensure pos_offset has the right shape (num_envs, 3)
+        if pos_offset.dim() == 1:
+            pos_offset = pos_offset.unsqueeze(0).expand(curr_pos_1.shape[0], -1)
+        # Apply the offset by adding to position
+        curr_pos_1 = curr_pos_1 + pos_offset
+    
+    # Apply quaternion offset to asset_1 if provided
+    if quat_offset is not None:
+        # Convert list to tensor if needed
+        if isinstance(quat_offset, list):
+            quat_offset = torch.tensor(quat_offset, device=curr_quat_1.device, dtype=curr_quat_1.dtype)
+        # Ensure quat_offset has the right shape (num_envs, 4)
+        if quat_offset.dim() == 1:
+            quat_offset = quat_offset.unsqueeze(0).expand(curr_quat_1.shape[0], -1)
+        # Apply the offset by quaternion multiplication
+        # TODO: check if this is correct or if its shoudl be the inverse
+        curr_quat_1 = quat_mul(quat_offset, curr_quat_1)
+
+    # Handle per-environment gear type selection
+    if hasattr(env, '_current_gear_type'):
+        # Get the current gear type for each environment
+        current_gear_type = env._current_gear_type  # type: ignore
+        
+        # Create a mapping from gear type to asset name
+        gear_to_asset_mapping = {
+            'gear_small': 'factory_gear_small',
+            'gear_medium': 'factory_gear_medium', 
+            'gear_large': 'factory_gear_large'
+        }
+        
+        # Create arrays to store poses for all environments
+        curr_pos_2_all = torch.zeros_like(curr_pos_1)
+        curr_quat_2_all = torch.zeros_like(curr_quat_1)
+        
+        # Get poses for each environment based on their gear type
+        for env_id in range(env.num_envs):
+            # Get the gear type for this specific environment
+            if env_id < len(current_gear_type):
+                selected_gear_type = current_gear_type[env_id]
+            else:
+                print(f"ERROR: env_id {env_id} is out of bounds for current_gear_type (length: {len(current_gear_type)}), using 'gear_medium' as fallback")
+                selected_gear_type = 'gear_medium'
+            selected_asset_name = gear_to_asset_mapping.get(selected_gear_type, 'factory_gear_medium')
+            
+            # Get the asset for this specific gear type
+            try:
+                asset_2 = env.scene[selected_asset_name]
+            except KeyError:
+                # Fallback to factory_gear_medium if the asset doesn't exist
+                asset_2 = env.scene['factory_gear_medium']
+            
+            # Get poses for this specific environment
+            curr_pos_2_all[env_id] = asset_2.data.body_pos_w[env_id, 0]  # type: ignore
+            curr_quat_2_all[env_id] = asset_2.data.body_quat_w[env_id, 0]  # type: ignore
+        
+        # Compute keypoint distance for all environments at once
+        keypoint_dist_sep = compute_keypoint_distance(
+            current_pos=curr_pos_1,
+            current_quat=curr_quat_1,
+            target_pos=curr_pos_2_all,
+            target_quat=curr_quat_2_all,
+            keypoint_scale=keypoint_scale,
+            add_cube_center_kp=add_cube_center_kp,
+            device=curr_pos_1.device
+        )
+    else:
+        # Fallback to factory_gear_medium if _current_gear_type is not available
+        print("WARNING: env._current_gear_type is not available, using factory_gear_medium as fallback")
+        asset_2 = env.scene['factory_gear_medium']
+        
+        # get current poses in world frame for asset_2
+        curr_pos_2 = asset_2.data.body_pos_w[:, 0]  # type: ignore
+        curr_quat_2 = asset_2.data.body_quat_w[:, 0]  # type: ignore
+        
+        # compute keypoint distance
+        keypoint_dist_sep = compute_keypoint_distance(
+            current_pos=curr_pos_1,
+            current_quat=curr_quat_1,
+            target_pos=curr_pos_2,
+            target_quat=curr_quat_2,
+            keypoint_scale=keypoint_scale,
+            add_cube_center_kp=add_cube_center_kp,
+            device=curr_pos_1.device
+        )
+    
+    # Return mean distance across keypoints to match expected reward shape (num_envs,)
+    return keypoint_dist_sep.mean(-1)
+
+
+def keypoint_entity_error_exp(
+    env: ManagerBasedRLEnv, 
+    asset_cfg_1: SceneEntityCfg,
+    kp_exp_coeffs: list[tuple[float, float]] = [(1.0, 0.1)],
+    kp_use_sum_of_exps: bool = True,
+    keypoint_scale: float = 1.0,
+    add_cube_center_kp: bool = True,
+    quat_offset: torch.Tensor | None = None,
+    pos_offset: torch.Tensor | None = None
+) -> torch.Tensor:
+    """Compute exponential keypoint reward between a RigidObject and the dynamically selected gear.
+
+    The function computes the keypoint distance between the poses of two entities,
+    then applies an exponential reward function. The reward is computed using the 
+    formula: 1 / (exp(a * distance) + b + exp(-a * distance)) where a and b are coefficients.
+
+    Args:
+        env: The environment containing the assets
+        asset_cfg_1: Configuration of the first asset (RigidObject)
+        kp_exp_coeffs: List of (a, b) coefficient pairs for exponential reward
+        kp_use_sum_of_exps: Whether to use sum of exponentials (True) or single exponential (False)
+        keypoint_scale: Scale factor for keypoint offsets
+        add_cube_center_kp: Whether to include the center keypoint (0, 0, 0)
+
+    Returns:
+        Exponential keypoint reward tensor of shape (num_envs,) where each element
+        is the exponential reward value
+    """
+    # extract the assets (to enable type hinting)
+    asset_1 = env.scene[asset_cfg_1.name]  # RigidObject
+    
+    # get current poses in world frame for asset_1
+    # asset_1 is RigidObject - use body_pos_w and body_quat_w
+    curr_pos_1 = asset_1.data.body_pos_w[:, 0]  # type: ignore
+    curr_quat_1 = asset_1.data.body_quat_w[:, 0]  # type: ignore
+    
+    # Apply position offset to asset_1 if provided
+    if pos_offset is not None:
+        # Convert list to tensor if needed
+        if isinstance(pos_offset, list):
+            pos_offset = torch.tensor(pos_offset, device=curr_pos_1.device, dtype=curr_pos_1.dtype)
+        # Ensure pos_offset has the right shape (num_envs, 3)
+        if pos_offset.dim() == 1:
+            pos_offset = pos_offset.unsqueeze(0).expand(curr_pos_1.shape[0], -1)
+        # Apply the offset by adding to position
+        curr_pos_1 = curr_pos_1 + pos_offset
+    
+    # Apply quaternion offset to asset_1 if provided
+    if quat_offset is not None:
+        # Convert list to tensor if needed
+        if isinstance(quat_offset, list):
+            quat_offset = torch.tensor(quat_offset, device=curr_quat_1.device, dtype=curr_quat_1.dtype)
+        # Ensure quat_offset has the right shape (num_envs, 4)
+        if quat_offset.dim() == 1:
+            quat_offset = quat_offset.unsqueeze(0).expand(curr_quat_1.shape[0], -1)
+        # Apply the offset by quaternion multiplication
+        # TODO: check if this is correct or if its shoudl be the inverse
+        curr_quat_1 = quat_mul(quat_offset, curr_quat_1)
+
+    # Handle per-environment gear type selection
+    if hasattr(env, '_current_gear_type'):
+        # Get the current gear type for each environment
+        current_gear_type = env._current_gear_type  # type: ignore
+        
+        # Create a mapping from gear type to asset name
+        gear_to_asset_mapping = {
+            'gear_small': 'factory_gear_small',
+            'gear_medium': 'factory_gear_medium', 
+            'gear_large': 'factory_gear_large'
+        }
+        
+        # Create arrays to store poses for all environments
+        curr_pos_2_all = torch.zeros_like(curr_pos_1)
+        curr_quat_2_all = torch.zeros_like(curr_quat_1)
+        
+        # Get poses for each environment based on their gear type
+        for env_id in range(env.num_envs):
+            # Get the gear type for this specific environment
+            if env_id < len(current_gear_type):
+                selected_gear_type = current_gear_type[env_id]
+            else:
+                print(f"ERROR: env_id {env_id} is out of bounds for current_gear_type (length: {len(current_gear_type)}), using 'gear_medium' as fallback")
+                selected_gear_type = 'gear_medium'
+            selected_asset_name = gear_to_asset_mapping.get(selected_gear_type, 'factory_gear_medium')
+            
+            # Get the asset for this specific gear type
+            try:
+                asset_2 = env.scene[selected_asset_name]
+            except KeyError:
+                # Fallback to factory_gear_medium if the asset doesn't exist
+                asset_2 = env.scene['factory_gear_medium']
+            
+            # Get poses for this specific environment
+            curr_pos_2_all[env_id] = asset_2.data.body_pos_w[env_id, 0]  # type: ignore
+            curr_quat_2_all[env_id] = asset_2.data.body_quat_w[env_id, 0]  # type: ignore
+        
+        # Compute keypoint distance for all environments at once
+        keypoint_dist_sep = compute_keypoint_distance(
+            current_pos=curr_pos_1,
+            current_quat=curr_quat_1,
+            target_pos=curr_pos_2_all,
+            target_quat=curr_quat_2_all,
+            keypoint_scale=keypoint_scale,
+            add_cube_center_kp=add_cube_center_kp,
+            device=curr_pos_1.device
+        )
+        
+        # compute exponential reward
+        keypoint_reward_exp = torch.zeros_like(keypoint_dist_sep[:, 0])
+        
+        if kp_use_sum_of_exps:
+            # Use sum of exponentials: average across keypoints for each coefficient
+            for coeff in kp_exp_coeffs:
+                a, b = coeff
+                keypoint_reward_exp += (1. / (torch.exp(a * keypoint_dist_sep) + b + torch.exp(-a * keypoint_dist_sep))).mean(-1)
+        else:
+            # Use single exponential: average keypoint distance first, then apply exponential
+            keypoint_dist = keypoint_dist_sep.mean(-1)  # shape: (num_envs,)
+            for coeff in kp_exp_coeffs:
+                a, b = coeff
+                keypoint_reward_exp += 1. / (torch.exp(a * keypoint_dist) + b + torch.exp(-a * keypoint_dist))
+    else:
+        # Fallback to factory_gear_medium if _current_gear_type is not available
+        print("WARNING: env._current_gear_type is not available, using factory_gear_medium as fallback")
+        asset_2 = env.scene['factory_gear_medium']
+        
+        # get current poses in world frame for asset_2
+        curr_pos_2 = asset_2.data.body_pos_w[:, 0]  # type: ignore
+        curr_quat_2 = asset_2.data.body_quat_w[:, 0]  # type: ignore
+        
+        # compute keypoint distance
+        keypoint_dist_sep = compute_keypoint_distance(
+            current_pos=curr_pos_1,
+            current_quat=curr_quat_1,
+            target_pos=curr_pos_2,
+            target_quat=curr_quat_2,
+            keypoint_scale=keypoint_scale,
+            add_cube_center_kp=add_cube_center_kp,
+            device=curr_pos_1.device
+        )
+        
+        # compute exponential reward
+        keypoint_reward_exp = torch.zeros_like(keypoint_dist_sep[:, 0])
+        
+        if kp_use_sum_of_exps:
+            # Use sum of exponentials: average across keypoints for each coefficient
+            for coeff in kp_exp_coeffs:
+                a, b = coeff
+                keypoint_reward_exp += (1. / (torch.exp(a * keypoint_dist_sep) + b + torch.exp(-a * keypoint_dist_sep))).mean(-1)
+        else:
+            # Use single exponential: average keypoint distance first, then apply exponential
+            keypoint_dist = keypoint_dist_sep.mean(-1)  # shape: (num_envs,)
+            for coeff in kp_exp_coeffs:
+                a, b = coeff
+                keypoint_reward_exp += 1. / (torch.exp(a * keypoint_dist) + b + torch.exp(-a * keypoint_dist))
+    
+    return keypoint_reward_exp