isaac-sim · ashwinvkNV · Sep 16, 2025 · Sep 16, 2025 · Sep 16, 2025 · Sep 16, 2025
@@ -58,7 +58,7 @@
 import time
 import torch
 
-from rsl_rl.runners import DistillationRunner, OnPolicyRunner
+from rsl_rl.runners import OnPolicyRunner
 
 from isaaclab.envs import (
     DirectMARLEnv,
@@ -71,7 +71,7 @@
 from isaaclab.utils.dict import print_dict
 from isaaclab.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
 
-from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx
+from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx
 
 import isaaclab_tasks  # noqa: F401
 from isaaclab_tasks.utils import get_checkpoint_path
@@ -81,14 +81,14 @@
 
 
 @hydra_task_config(args_cli.task, args_cli.agent)
-def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
+def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlOnPolicyRunnerCfg):
     """Play with RSL-RL agent."""
     # grab task name for checkpoint path
     task_name = args_cli.task.split(":")[-1]
     train_task_name = task_name.replace("-Play", "")
 
     # override configurations with non-hydra CLI arguments
-    agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
+    agent_cfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
     env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
 
     # set the environment seed
@@ -112,9 +112,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
 
     log_dir = os.path.dirname(resume_path)
 
-    # set the log directory for the environment (works for all environment types)
-    env_cfg.log_dir = log_dir
-
     # create isaac environment
     env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
 
@@ -139,43 +136,32 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
 
     print(f"[INFO]: Loading model checkpoint from: {resume_path}")
     # load previously trained model
-    if agent_cfg.class_name == "OnPolicyRunner":
-        runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
-    elif agent_cfg.class_name == "DistillationRunner":
-        runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
-    else:
-        raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
-    runner.load(resume_path)
+    ppo_runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
+    ppo_runner.load(resume_path)
 
     # obtain the trained policy for inference
-    policy = runner.get_inference_policy(device=env.unwrapped.device)
+    policy = ppo_runner.get_inference_policy(device=env.unwrapped.device)
 
     # extract the neural network module
     # we do this in a try-except to maintain backwards compatibility.
     try:
         # version 2.3 onwards
-        policy_nn = runner.alg.policy
+        policy_nn = ppo_runner.alg.policy
     except AttributeError:
         # version 2.2 and below
-        policy_nn = runner.alg.actor_critic
-
-    # extract the normalizer
-    if hasattr(policy_nn, "actor_obs_normalizer"):
-        normalizer = policy_nn.actor_obs_normalizer
-    elif hasattr(policy_nn, "student_obs_normalizer"):
-        normalizer = policy_nn.student_obs_normalizer
-    else:
-        normalizer = None
+        policy_nn = ppo_runner.alg.actor_critic
 
     # export policy to onnx/jit
     export_model_dir = os.path.join(os.path.dirname(resume_path), "exported")
-    export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt")
-    export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx")
+    export_policy_as_jit(policy_nn, ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.pt")
+    export_policy_as_onnx(
+        policy_nn, normalizer=ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.onnx"
+    )
 
     dt = env.unwrapped.step_dt
 
     # reset environment
-    obs = env.get_observations()
+    obs, _ = env.get_observations()
     timestep = 0
     # simulate environment
     while simulation_app.is_running():

@@ -10,6 +10,8 @@
 import argparse
 import sys
 
+import rsl_rl
+
 from isaaclab.app import AppLauncher
 
 # local imports
@@ -56,8 +58,9 @@
 from packaging import version
 
 # check minimum supported rsl-rl version
-RSL_RL_VERSION = "3.0.1"
+RSL_RL_VERSION = "2.3.1"
 installed_version = metadata.version("rsl-rl-lib")
+print("installed_version", installed_version)
 if version.parse(installed_version) < version.parse(RSL_RL_VERSION):
     if platform.system() == "Windows":
         cmd = [r".\isaaclab.bat", "-p", "-m", "pip", "install", f"rsl-rl-lib=={RSL_RL_VERSION}"]
@@ -78,7 +81,7 @@
 from datetime import datetime
 
 import omni
-from rsl_rl.runners import DistillationRunner, OnPolicyRunner
+from rsl_rl.runners import OnPolicyRunner
 
 from isaaclab.envs import (
     DirectMARLEnv,
@@ -199,6 +202,20 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
     dump_pickle(os.path.join(log_dir, "params", "env.pkl"), env_cfg)
     dump_pickle(os.path.join(log_dir, "params", "agent.pkl"), agent_cfg)
 
+    import rsl_rl
+    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
+    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
+    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
+    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
+    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
+    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
+    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
+    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
+    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
+    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
+    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
+    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
+
     # run training
     runner.learn(num_learning_iterations=agent_cfg.max_iterations, init_at_random_ep_len=True)
 

@@ -241,11 +241,17 @@ def applied_torque_limits(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg = Sc
     )
     return torch.sum(out_of_limits, dim=1)
 
+def action_rate(env: ManagerBasedRLEnv) -> torch.Tensor:
+    """Penalize the rate of change of the actions using L2 norm."""
+    return torch.norm(env.action_manager.action - env.action_manager.prev_action, p=2, dim=-1)
 
 def action_rate_l2(env: ManagerBasedRLEnv) -> torch.Tensor:
     """Penalize the rate of change of the actions using L2 squared kernel."""
     return torch.sum(torch.square(env.action_manager.action - env.action_manager.prev_action), dim=1)
 
+def action(env: ManagerBasedRLEnv) -> torch.Tensor:
+    """Penalize the actions using L2 squared kernel (summed for each environment)."""
+    return torch.sum(env.action_manager.action**2, dim=-1)
 
 def action_l2(env: ManagerBasedRLEnv) -> torch.Tensor:
     """Penalize the actions using L2 squared kernel."""

@@ -26,8 +26,8 @@
 
 """
 from .noise_cfg import NoiseCfg  # noqa: F401
-from .noise_cfg import ConstantNoiseCfg, GaussianNoiseCfg, NoiseModelCfg, NoiseModelWithAdditiveBiasCfg, UniformNoiseCfg
-from .noise_model import NoiseModel, NoiseModelWithAdditiveBias, constant_noise, gaussian_noise, uniform_noise
+from .noise_cfg import ConstantNoiseCfg, GaussianNoiseCfg, NoiseModelCfg, NoiseModelWithAdditiveBiasCfg, UniformNoiseCfg, ResetSampledNoiseModelCfg
+from .noise_model import NoiseModel, NoiseModelWithAdditiveBias, ResetSampledNoiseModel, constant_noise, gaussian_noise, uniform_noise
 
 # Backward compatibility
 ConstantBiasNoiseCfg = ConstantNoiseCfg

@@ -109,3 +109,15 @@ class NoiseModelWithAdditiveBiasCfg(NoiseModelCfg):
 
     Defaults to True.
     """
+
+@configclass
+class ResetSampledNoiseModelCfg(NoiseModelCfg):
+    """Configuration for a noise model that samples noise ONLY during reset."""
+
+    class_type: type = noise_model.ResetSampledNoiseModel
+
+    noise_cfg: NoiseCfg = MISSING
+    """The noise configuration for the noise.
+
+    Based on this configuration, the noise is sampled at every reset of the noise model.
+    """
@@ -189,3 +189,71 @@ def __call__(self, data: torch.Tensor) -> torch.Tensor:
             # now re-sample that expanded bias in-place
             self.reset()
         return super().__call__(data) + self._bias
+
+class ResetSampledNoiseModel(NoiseModel):
+    """Noise model that samples noise ONLY during reset and applies it consistently.
+
+    The noise is sampled from the configured distribution ONLY during reset and applied consistently
+    until the next reset. Unlike regular noise that generates new random values every step,
+    this model maintains the same noise values throughout an episode.
+    """
+
+    def __init__(self, noise_model_cfg: noise_cfg.NoiseModelCfg, num_envs: int, device: str):
+        # initialize parent class
+        super().__init__(noise_model_cfg, num_envs, device)
+        # store the noise configuration
+        self._noise_cfg = noise_model_cfg.noise_cfg
+        self._sampled_noise = torch.zeros((num_envs, 1), device=self._device)
+        self._num_components: int | None = None
+
+    def reset(self, env_ids: Sequence[int] | None = None):
+        """Reset the noise model by sampling NEW noise values.
+
+        This method samples new noise for the specified environments using the configured noise function.
+        The sampled noise will remain constant until the next reset.
+
+        Args:
+            env_ids: The environment ids to reset the noise model for. Defaults to None,
+                in which case all environments are considered.
+        """
+        # resolve the environment ids
+        if env_ids is None:
+            env_ids = slice(None)
+
+        # Use the existing noise function to sample new noise
+        # Create dummy data to sample from the noise function
+        dummy_data = torch.zeros((env_ids.stop - env_ids.start if isinstance(env_ids, slice) else len(env_ids), 1), 
+                                device=self._device)
+
+        # Sample noise using the configured noise function
+        sampled_noise = self._noise_model_cfg.noise_cfg.func(dummy_data, self._noise_model_cfg.noise_cfg)
+
+        self._sampled_noise[env_ids] = sampled_noise
+
+    def __call__(self, data: torch.Tensor) -> torch.Tensor:
+        """Apply the pre-sampled noise to the data.
+
+        This method applies the noise that was sampled during the last reset.
+        No new noise is generated - the same values are used consistently.
+
+        Args:
+            data: The data to apply the noise to. Shape is (num_envs, ...).
+
+        Returns:
+            The data with the noise applied. Shape is the same as the input data.
+        """
+        # on first apply, expand noise to match last dim of data
+        if self._num_components is None:
+            *_, self._num_components = data.shape
+            # expand noise from (num_envs,1) to (num_envs, num_components)
+            self._sampled_noise = self._sampled_noise.repeat(1, self._num_components)
+
+        # apply the noise based on operation
+        if self._noise_cfg.operation == "add":
+            return data + self._sampled_noise
+        elif self._noise_cfg.operation == "scale":
+            return data * self._sampled_noise
+        elif self._noise_cfg.operation == "abs":
+            return self._sampled_noise
+        else:
+            raise ValueError(f"Unknown operation in noise: {self._noise_cfg.operation}")