diff --git a/amp_rsl_rl/runners/amp_on_policy_runner.py b/amp_rsl_rl/runners/amp_on_policy_runner.py index 7810f90..91c419d 100644 --- a/amp_rsl_rl/runners/amp_on_policy_runner.py +++ b/amp_rsl_rl/runners/amp_on_policy_runner.py @@ -354,8 +354,8 @@ def update_run_name_with_sequence(prefix: str) -> None: mean_task_reward_log += rewards.mean().item() mean_style_reward_log += style_rewards.mean().item() - # Combine the task and style rewards (TODO this can be a hyperparameters) - rewards = 0.5 * rewards + 0.5 * style_rewards + # Combine the task and style rewards + rewards = self.cfg['task_reward_weight'] * rewards + self.cfg['style_reward_weight'] * style_rewards self.alg.process_env_step(rewards, dones, infos) self.alg.process_amp_step(next_amp_obs) diff --git a/example/agents_config.py b/example/agents_config.py new file mode 100644 index 0000000..1f50fa8 --- /dev/null +++ b/example/agents_config.py @@ -0,0 +1,26 @@ +from isaaclab.utils import configclass +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg + +@configclass +class YourConfigName(RslRlOnPolicyRunnerCfg): + experiment_name = "XXX" + discriminator = { + "hidden_dims": [512, 256], + "reward_scale": 1.0, + "loss_type": "BCEWithLogits" # Choose between BCEWithLogits or Wasserstein + } + + # Weights for combining task and style rewards + task_reward_weight = 0.5 + style_reward_weight = 0.5 + + amp_data_path = "path of the dataset folder" + dataset_names = ["files", + "without", + ".npy", + ] + dataset_weights = [1.0 for i in range(len(dataset_names))] + slow_down_factor = 1.0 + + def __post_init__(self): + self.algorithm.class_name = "AMP_PPO" \ No newline at end of file