Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 14 additions & 28 deletions scripts/reinforcement_learning/rsl_rl/play.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
import time
import torch

from rsl_rl.runners import DistillationRunner, OnPolicyRunner
from rsl_rl.runners import OnPolicyRunner

from isaaclab.envs import (
DirectMARLEnv,
Expand All @@ -71,7 +71,7 @@
from isaaclab.utils.dict import print_dict
from isaaclab.utils.pretrained_checkpoint import get_published_pretrained_checkpoint

from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx
from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx

import isaaclab_tasks # noqa: F401
from isaaclab_tasks.utils import get_checkpoint_path
Expand All @@ -81,14 +81,14 @@


@hydra_task_config(args_cli.task, args_cli.agent)
def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlOnPolicyRunnerCfg):
"""Play with RSL-RL agent."""
# grab task name for checkpoint path
task_name = args_cli.task.split(":")[-1]
train_task_name = task_name.replace("-Play", "")

# override configurations with non-hydra CLI arguments
agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
agent_cfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs

# set the environment seed
Expand All @@ -112,9 +112,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen

log_dir = os.path.dirname(resume_path)

# set the log directory for the environment (works for all environment types)
env_cfg.log_dir = log_dir

# create isaac environment
env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)

Expand All @@ -139,43 +136,32 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen

print(f"[INFO]: Loading model checkpoint from: {resume_path}")
# load previously trained model
if agent_cfg.class_name == "OnPolicyRunner":
runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
elif agent_cfg.class_name == "DistillationRunner":
runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
else:
raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
runner.load(resume_path)
ppo_runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
ppo_runner.load(resume_path)

# obtain the trained policy for inference
policy = runner.get_inference_policy(device=env.unwrapped.device)
policy = ppo_runner.get_inference_policy(device=env.unwrapped.device)

# extract the neural network module
# we do this in a try-except to maintain backwards compatibility.
try:
# version 2.3 onwards
policy_nn = runner.alg.policy
policy_nn = ppo_runner.alg.policy
except AttributeError:
# version 2.2 and below
policy_nn = runner.alg.actor_critic

# extract the normalizer
if hasattr(policy_nn, "actor_obs_normalizer"):
normalizer = policy_nn.actor_obs_normalizer
elif hasattr(policy_nn, "student_obs_normalizer"):
normalizer = policy_nn.student_obs_normalizer
else:
normalizer = None
policy_nn = ppo_runner.alg.actor_critic

# export policy to onnx/jit
export_model_dir = os.path.join(os.path.dirname(resume_path), "exported")
export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt")
export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx")
export_policy_as_jit(policy_nn, ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.pt")
export_policy_as_onnx(
policy_nn, normalizer=ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.onnx"
)

dt = env.unwrapped.step_dt

# reset environment
obs = env.get_observations()
obs, _ = env.get_observations()
timestep = 0
# simulate environment
while simulation_app.is_running():
Expand Down
21 changes: 19 additions & 2 deletions scripts/reinforcement_learning/rsl_rl/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
import argparse
import sys

import rsl_rl

from isaaclab.app import AppLauncher

# local imports
Expand Down Expand Up @@ -56,8 +58,9 @@
from packaging import version

# check minimum supported rsl-rl version
RSL_RL_VERSION = "3.0.1"
RSL_RL_VERSION = "2.3.1"
installed_version = metadata.version("rsl-rl-lib")
print("installed_version", installed_version)
if version.parse(installed_version) < version.parse(RSL_RL_VERSION):
if platform.system() == "Windows":
cmd = [r".\isaaclab.bat", "-p", "-m", "pip", "install", f"rsl-rl-lib=={RSL_RL_VERSION}"]
Expand All @@ -78,7 +81,7 @@
from datetime import datetime

import omni
from rsl_rl.runners import DistillationRunner, OnPolicyRunner
from rsl_rl.runners import OnPolicyRunner

from isaaclab.envs import (
DirectMARLEnv,
Expand Down Expand Up @@ -199,6 +202,20 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
dump_pickle(os.path.join(log_dir, "params", "env.pkl"), env_cfg)
dump_pickle(os.path.join(log_dir, "params", "agent.pkl"), agent_cfg)

import rsl_rl
print("[INFO] rsl_rl library location:", rsl_rl.__file__)
print("[INFO] rsl_rl library location:", rsl_rl.__file__)
print("[INFO] rsl_rl library location:", rsl_rl.__file__)
print("[INFO] rsl_rl library location:", rsl_rl.__file__)
print("[INFO] rsl_rl library location:", rsl_rl.__file__)
print("[INFO] rsl_rl library location:", rsl_rl.__file__)
print("[INFO] rsl_rl library location:", rsl_rl.__file__)
print("[INFO] rsl_rl library location:", rsl_rl.__file__)
print("[INFO] rsl_rl library location:", rsl_rl.__file__)
print("[INFO] rsl_rl library location:", rsl_rl.__file__)
print("[INFO] rsl_rl library location:", rsl_rl.__file__)
print("[INFO] rsl_rl library location:", rsl_rl.__file__)

# run training
runner.learn(num_learning_iterations=agent_cfg.max_iterations, init_at_random_ep_len=True)

Expand Down
6 changes: 6 additions & 0 deletions source/isaaclab/isaaclab/envs/mdp/rewards.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,11 +241,17 @@ def applied_torque_limits(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg = Sc
)
return torch.sum(out_of_limits, dim=1)

def action_rate(env: ManagerBasedRLEnv) -> torch.Tensor:
"""Penalize the rate of change of the actions using L2 norm."""
return torch.norm(env.action_manager.action - env.action_manager.prev_action, p=2, dim=-1)

def action_rate_l2(env: ManagerBasedRLEnv) -> torch.Tensor:
"""Penalize the rate of change of the actions using L2 squared kernel."""
return torch.sum(torch.square(env.action_manager.action - env.action_manager.prev_action), dim=1)

def action(env: ManagerBasedRLEnv) -> torch.Tensor:
"""Penalize the actions using L2 squared kernel (summed for each environment)."""
return torch.sum(env.action_manager.action**2, dim=-1)

def action_l2(env: ManagerBasedRLEnv) -> torch.Tensor:
"""Penalize the actions using L2 squared kernel."""
Expand Down
4 changes: 2 additions & 2 deletions source/isaaclab/isaaclab/utils/noise/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@

"""
from .noise_cfg import NoiseCfg # noqa: F401
from .noise_cfg import ConstantNoiseCfg, GaussianNoiseCfg, NoiseModelCfg, NoiseModelWithAdditiveBiasCfg, UniformNoiseCfg
from .noise_model import NoiseModel, NoiseModelWithAdditiveBias, constant_noise, gaussian_noise, uniform_noise
from .noise_cfg import ConstantNoiseCfg, GaussianNoiseCfg, NoiseModelCfg, NoiseModelWithAdditiveBiasCfg, UniformNoiseCfg, ResetSampledNoiseModelCfg
from .noise_model import NoiseModel, NoiseModelWithAdditiveBias, ResetSampledNoiseModel, constant_noise, gaussian_noise, uniform_noise

# Backward compatibility
ConstantBiasNoiseCfg = ConstantNoiseCfg
Expand Down
12 changes: 12 additions & 0 deletions source/isaaclab/isaaclab/utils/noise/noise_cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,3 +109,15 @@ class NoiseModelWithAdditiveBiasCfg(NoiseModelCfg):

Defaults to True.
"""

@configclass
class ResetSampledNoiseModelCfg(NoiseModelCfg):
"""Configuration for a noise model that samples noise ONLY during reset."""

class_type: type = noise_model.ResetSampledNoiseModel

noise_cfg: NoiseCfg = MISSING
"""The noise configuration for the noise.

Based on this configuration, the noise is sampled at every reset of the noise model.
"""
68 changes: 68 additions & 0 deletions source/isaaclab/isaaclab/utils/noise/noise_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,3 +189,71 @@ def __call__(self, data: torch.Tensor) -> torch.Tensor:
# now re-sample that expanded bias in-place
self.reset()
return super().__call__(data) + self._bias

class ResetSampledNoiseModel(NoiseModel):
"""Noise model that samples noise ONLY during reset and applies it consistently.

The noise is sampled from the configured distribution ONLY during reset and applied consistently
until the next reset. Unlike regular noise that generates new random values every step,
this model maintains the same noise values throughout an episode.
"""

def __init__(self, noise_model_cfg: noise_cfg.NoiseModelCfg, num_envs: int, device: str):
# initialize parent class
super().__init__(noise_model_cfg, num_envs, device)
# store the noise configuration
self._noise_cfg = noise_model_cfg.noise_cfg
self._sampled_noise = torch.zeros((num_envs, 1), device=self._device)
self._num_components: int | None = None

def reset(self, env_ids: Sequence[int] | None = None):
"""Reset the noise model by sampling NEW noise values.

This method samples new noise for the specified environments using the configured noise function.
The sampled noise will remain constant until the next reset.

Args:
env_ids: The environment ids to reset the noise model for. Defaults to None,
in which case all environments are considered.
"""
# resolve the environment ids
if env_ids is None:
env_ids = slice(None)

# Use the existing noise function to sample new noise
# Create dummy data to sample from the noise function
dummy_data = torch.zeros((env_ids.stop - env_ids.start if isinstance(env_ids, slice) else len(env_ids), 1),
device=self._device)

# Sample noise using the configured noise function
sampled_noise = self._noise_model_cfg.noise_cfg.func(dummy_data, self._noise_model_cfg.noise_cfg)

self._sampled_noise[env_ids] = sampled_noise

def __call__(self, data: torch.Tensor) -> torch.Tensor:
"""Apply the pre-sampled noise to the data.

This method applies the noise that was sampled during the last reset.
No new noise is generated - the same values are used consistently.

Args:
data: The data to apply the noise to. Shape is (num_envs, ...).

Returns:
The data with the noise applied. Shape is the same as the input data.
"""
# on first apply, expand noise to match last dim of data
if self._num_components is None:
*_, self._num_components = data.shape
# expand noise from (num_envs,1) to (num_envs, num_components)
self._sampled_noise = self._sampled_noise.repeat(1, self._num_components)

# apply the noise based on operation
if self._noise_cfg.operation == "add":
return data + self._sampled_noise
elif self._noise_cfg.operation == "scale":
return data * self._sampled_noise
elif self._noise_cfg.operation == "abs":
return self._sampled_noise
else:
raise ValueError(f"Unknown operation in noise: {self._noise_cfg.operation}")
Loading
Loading