configs/explanations.yaml

# explanations config
# explains all attributes and lists all options,
# this config is not meant to be run, it's for documentation only
# note: for numerical inputs only plainly written numbers are allowed, this means that scientific notation like 4e-3 is not supported
#       this is due to a quirk/bug in the PyYAML implementation

# run attributes
# these are meta attributes about how we interact with the gym env from the outside
run:
  # settings for the algorithm that is to be used
  algorithm:
    # bool, determines whether a saved model will be loaded
    load_model: False  
    # str, if the above is true, this is the path that will be loaded from
    model_path: ""  
    # str, algorithm type, can be PPO, RecurrentPPO, AttentionPPO, TD3, SAC, A2C or DDPG
    type: "PPO"
    # float, gamma value for RL
    gamma: 0.99
    # float, learning rate for the model
    learning_rate: 0.0003
    # algorithm specific custom configuration
    # here you can change the parameters of the chosen algorithm, you can find them and what they do in the official stable baselines documentation
    # below, we give a few examples
    config:
      n_steps: 2048
      batch_size: 4096
    # this defines a custom policy network for the RL agent
    # you can leave this section out entirely, in that case the default policy by SB3 will be used
    custom_policy:
      # str, name of the activation function, can be ReLU or tanh
      activation_function: "ReLU"
      # list, each entry must be an int and determines the amount of neurons in that layer of the value function, the list must have at least one entry
      value_function:
        - 256
        - 256
        - 256
        - 256
      # list, same as above but for the policy function
      policy_function:
        - 256
        - 256
        - 256
        - 256
  # training config
  train:
    # int, number of training envs working in parallel
    num_envs : 16  
    # int, logging level, see explanation down in the eval section
    logging: 0
    # int, amount of env steps total across all parallel envs after which training will stop
    timesteps: 15000000 
    # int, amount of steps that need to pass in one env such that a checkpoint of the current model is saved (again, this is per env, so if you have 16 parallel envs and a save freq of 1000, the first saved model will have seen ~16000 steps of experience)
    save_freq : 30000
    # str, name of the folder in which models will be saved
    save_folder: "./models/weights"  
    # str, name of the save files, the algorithm name will be added as a prefix and the number of timesteps as a suffix
    save_name: "randomobstacles_default"
  # attributes for evaluation
  eval:
    # int, number of episodes for this eval run, infinitely many if -1
    max_episodes: -1  
    # int, 0: no logging at all, 1: logging into console at the end of an episode, 
    # 2: same as 1 + log for entire episode put into csv file in model/env_logs (if max_episodes is not -1 this will only happen at the end of the last episode such that the file contains all episodes)
    logging: 1 
    # float, sleep added to make movements pass slower for better visiblity (you can also use this to make the simulation run in sync with real time)
    display_delay: 0
    # bool, flags for showing visual aides like goal markings or workspace borders
    show_world_aux: True
    show_goal_aux: True
    show_sensor_aux: False

# env attributes
env:
  #   general attributes
  # int, maximum steps an episode can go on in this env
  max_steps_per_episode: 1024  
  # int, number of past episodes that are used to calculate running average stats for env performance
  stat_buffer_size: 25
  # bool, whether to normalize observations or not
  normalize_observations: False
  # bool, whether to normalize rewards or not
  normalize_rewards: False
  # engine config, governs which physics sim will power the environment
  engine:
    # str, engine name, at the moment this can be either Pybullet or Isaac
    type: "Pybullet"
    # bool, whether to use acurate physics sim for movement or not, False will result in no sim time passing and movements being instanteneous
    use_physics_sim: True
    # list of floats, determines gravity force in N in xyz world directions when using the physics sim
    gravity: [0, 0, -9.8]
    # float, time step of virtual simulated time in seconds for each env step, inverse is frame rate
    sim_step: 0.00416666666  # 1/240 s <-> 240 Hz
    # int, the amount of physics steps we run per env step, is used only when use_physics_sim is True
    # note: the lower this value, the worse the performance, but the higher the accuracy
    sim_steps_per_env_step: 1  
  
  #   robots definition
  # this section adds robots into the env, for demonstration purposes we will add every possible robot here
  robots:
    - type: "UR5" 
      config:
        # str, the name under which the robot will be run in the env, appears in logs and sensor names, make sure to give unique ones in order to avoid confusion
        name: "ur5_1"
        # floats, the xyz position where the base of the robot is mounted in the world
        base_position: [0, 0, 0]
        # floats, the orientation of the robot base w.r.t the world coordinate system, in degrees and roll-pitch-yaw format
        base_orientation: [0, 0, 90]
        # list of floats, the resting angles in degrees of the robot's joints a.k.a. its default pose, must be the same amount as the (movable) joints of the robot
        # if the scenario does not define a starting pose for the robot this will also be the pose in which the robot will start each env episode
        resting_angles: [-180, -45, -90, -135, 90, 0]
        # int, determines the way in which the robot will be controlled by the agent, possible values: 0: inverse kinematics, 1: joint angles, 2: joint velocities
        control_mode: 2
        # float, determines the maximum xyz movement when using inverse kinematics
        xyz_delta: 0.005
        # float, determines the maximum rpy movement when using inverse kinematics
        rpy_delta: 0.005

      #   sensor definition
      # here we define all the sensors that are bound to this specific robot
      # for illustration purposes, this robot will have all availabe sensors at once (with the exception of sensors that belong to a robot but need another robot as reference, the examples for that are further below with the second example robot)
      # note: any robot will by default have a position/rotation sensor for its end effector and a joints sensor
      # you can however define position sensors for other parts of the robot here
      sensors:
        - type: "PositionRotation"  # sensor type that reports position and orientation of a robot link
          config:
            # int, every x steps the sensor will update its data, higher values get more fps for the simulation but lower accuracy
            update_steps: 1
            # whether this sensor's output will be added to the observation space
            add_to_observation_space: True
            # whether this sensor's logging data will be collected
            add_to_logging: True

            # sensor type specific instructions
            # int, link id for which the position and rotation are to be reported
            link_id: 6
            # bool, whether to report rotation as quaternion (True) or rpy (False)
            quaternion: True
        - type: "LidarSensorUR5"  # sensor that sends rays and reports the result
          config:
            update_steps: 1
            normalize: False
            add_to_observation_space: True
            add_to_logging: True

            # sensor type specific instructions
            # int, amount of values that the lidar indicator is able to report, the higher this value the finer the resolution of the raw lidar data
            indicator_buckets: 6
            # float, offset of the lidar rays from mesh center they're starting from
            ray_start: 0
            # float, end of the lidar as measured from the mesh center, ray length = this - ray_start
            ray_end: 0.3
            ray_setup:
              # the keys here are all the robot links equppid with lidar rays
              # you can remove ones you don't want to have
              # the first number is the amount of directions, the second is the number of rays per direction (see class implementation for details on what these numbers mean exactly)
              ee_forward: [1, 1]
              wrist3_circle: [10, 10]
              wrist2_circle: [10, 10]
              wrist1_circle: [10, 10]
              upper_arm: [10, 10]
            # bool, whether the output will report the indicator or raw measured distances
            indicator: True
        - type: "Obstacle"  # sensor that uses an engine call to report the relative position of the nearest obstacles
          config:
            update_steps: 1
            add_to_observation_space: True
            add_to_logging: False

            # sensor type specific instructions
            # int, the number of obstacles that this sensor reports on
            num_obstacles: 2
            # float, max distance in which to pick up obstacles in, if there are fewer in this radius than num_obstacles a generic output will be reported instead
            max_distance: 15
            # int, link id for which the distance mesaures will be given
            reference_link_id: 7
        - type: "OnBodyCameraUR5"  # a camera mounted on the robot end effector
          config:
            update_steps: 1
            normalize: False
            add_to_observation_space: True
            add_to_logging: True

            # sensor type specific instructions
            camera_args:
              # str, camera name
              name: "OnBody_ur5_1"
              # int, image height
              height: 128
              # int, image width
              width: 128
              # str, camera type, can be "grayscale", "rgb" or "rgbd"
              type: "grayscale"
              # floats, up vector of the camera
              up_vector: [0, 0, 1]
              # float, field of view of the camera in degrees
              fov: 60
              # float, aspect ratio of the camera
              aspect_ratio: 1
              # float, distance of the near plane of the camera
              near_val: 0.05
              # float, distance of the far plane of the camera
              far_val: 5
            # floats, relative position of the camera w.r.t. to the robot's end effector, can also be empty (results in a default positioning)
            position_relative_to_effector: []
        - type: "StaticFloatingCameraFollowEffector"  # a camera floating in free space but always centered on the robot's end effector
          config:
            update_steps: 1
            normalize: False
            add_to_observation_space: True
            add_to_logging: True
            name: "EEFollow_ur5_1"
            camera_args:
              height: 128
              width: 128
              type: "grayscale"
              up_vector: [0, 0, 1]
              fov: 60
              aspect_ratio: 1
              near_val: 0.05
              far_val: 5
            # floats, position of the camera in world space
            position: [2, 2, 1.5]

      #   goal definition
      # here we define the goal that this roboter is supposed to pursue
      # each robot can only have one goal at a time
      # a robot can also have no goal at all, but at least one robot in the env must have a goal
      # there are a few scenarios that rely on the first robot being the main one, so it's best practice to put the robot with the goal (in case there are others without one) first
      goal:
        type: "PositionCollision"  # a goal for reaching a position without colliding
        config:
          # bool, whether this goal's logging data is collected
          add_to_logging: True
          # bool, whether the robot can continue to do things or be frozen if it achieved success (only relevant if there are other robots with goals in the env)
          continue_after_success: True
          
          # goal type specific settings
          # float, reward given for reaching the position goal
          reward_success: 10
          # float, reward given for collision
          reward_collision: -5
          # float, multiplier for the reward given for distance to goal position
          reward_distance_mult: -0.01
          # float, initial distance threshold for position success
          dist_threshold_start: 0.2
          # float, final distance threshold for position success, also used when in env is in eval mode
          dist_threshold_end : 0.01
          # float, increment that is subtracted from current distance threshold when the threshold is high and env success rate is above 80%
          dist_threshold_increment_start: 0.01
          # float, increment that is subtracted from current distance threshold when the threshold is small and env success rate is above 80%
          dist_threshold_increment_end: 0.001
          # float, overwrite for the start of the distance threshold, works for both train and eval, useful for resuming training, does nothing if empty
          dist_threshold_overwrite: 

    - type: "KR16" 
      name: "kr16_1"
      config:
        base_position: [0, 0, 0]
        base_orientation: [0, 0, 90]
        resting_angles: [-180, -45, -90, -135, 90, 0]
        control_mode: 2
        xyz_delta: 0.005
        rpy_delta: 0.005

      #   sensor definition
      # for this robot we show all examples of robot-bound sensors that need another robot
      # sensors of this type need another robot as reference
      # therefore, robots with this sensor must ALWAYS be mentioned after the robot which this sensor is focusing on
      sensors:
        - type: "BuddyRobotCamera" 
          config: 
            update_steps: 1
            normalize: False
            add_to_observation_space: True
            add_to_logging: True
            name: "BuddyCam_generated_1"
            camera_args:
              height: 128
              width: 128
              type: "grayscale"
              up_vector: [0, 0, 1]
              fov: 60
              aspect_ratio: 1
              near_val: 0.05
              far_val: 5
            # str or floats, robot or position in world space to focus on
            # if str, must be the name of a robot that was mentioned above this one in the config
            target: "ur5_1"

  #   sensor definition
  # here we will define all sensors that are not bound (in some way or another) to a robot, e.g. free floating cameras
  # for illustration purposes, we will add all possible types of non-robot bound sensors
  sensors:
    - type: "StaticFloatingCamera"
      config:
        update_steps: 1
        normalize: False
        add_to_observation_space: True
        add_to_logging: True
        name: "free_floating_1"
        camera_args:
          height: 128
          width: 128
          type: "grayscale"
          up_vector: [0, 0, 1]
          fov: 60
          aspect_ratio: 1
          near_val: 0.05
          far_val: 5
        position: [-2, -2, 1.5]
    
  world:
    type: "RandomObstacle"
    config:
      # floats, workspace boundaries in the following format: xmin, xmax, ymin, ymax, zmin, zmax
      workspace_boundaries: [-0.4, 0.4, 0.3, 0.7, 0.2, 0.5]
      
      # type specific settings
      # int, number of static obstacles
      num_static_obstacles: 3
      # int, number of moving obstacles
      num_moving_obstacles: 1
      # floats, measurements of the three dimensions for random box obstacles
      # format: widthmin, widthmax, lengthmin, lengthmax, heightmin, heightmax
      box_measurements: [0.025, 0.075, 0.025, 0.075, 0.00075, 0.00125]
      # floats, size of the random radii of sphere obstacles, format: rmin, rmax
      sphere_measurements: [0.005, 0.02]
      # floats, bounds for the random velocities of the moving obstacles, format: vmin, vmax
      moving_obstacles_vels: [0.5, 2]
      # lists of floats, directions for the moving obstacles, must either be the same length as num_moving_obstacles or empty (in which case directions will be generated randomly)
      moving_obstacles_directions: []
      # floats, bounds for the random trajectory lengths of the moving obstacles, format: tmin, tmax
      moving_obstacles_trajectory_length: [0.05, 0.75]