Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
159 changes: 159 additions & 0 deletions examples/rl/sf2/sf2_procgen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
# Copyright 2022 The HuggingFace Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Lint as: python3

import argparse
import math
import random
import sys

from simulate import logging


logger = logging.get_logger(__name__)

try:
from sample_factory.algo.utils.misc import ExperimentStatus
from sample_factory.cfg.arguments import parse_full_cfg, parse_sf_args
from sample_factory.envs.env_utils import register_env
from sample_factory.train import make_runner

except ImportError:
logger.warning(
"sample-factory is required for this example and is not installed. To install: pip install simulate[sf2]"
)
exit()

import simulate as sm
from simulate.assets.object import ProcGenPrimsMaze3D
from simulate.assets.sensors import RaycastSensor, StateSensor


def generate_map(index):

maze_width = 3
maze_depth = 3
n_objects = 1
maze = ProcGenPrimsMaze3D(maze_width, maze_depth, wall_material=sm.Material.YELLOW)
maze += sm.Box(
position=[0, 0, 0],
bounds=[0.0, maze_width, 0, 0.1, 0.0, maze_depth],
material=sm.Material.BLUE,
with_collider=True,
)
actor_position = [math.floor(maze_width / 2.0) + 0.5, 0.5, math.floor(maze_depth / 2.0) + 0.5]

actor = sm.EgocentricCameraActor(position=actor_position)
# actor += StateSensor(actor, maze)
# actor += RaycastSensor()

maze += actor

for r in range(n_objects):
position = [random.randint(0, maze_width - 1) + 0.5, 0.5, random.randint(0, maze_depth - 1) + 0.5]
while ((position[0] - actor_position[0]) ** 2 + (position[2] - actor_position[2]) ** 2) < 1.0:
# avoid overlapping collectables
position = [random.randint(0, maze_width - 1) + 0.5, 0.5, random.randint(0, maze_depth - 1) + 0.5]

collectable = sm.Sphere(position=position, radius=0.2, material=sm.Material.RED, with_collider=True)
maze += collectable
reward_function = sm.RewardFunction(
type="sparse",
entity_a=actor,
entity_b=collectable,
distance_metric="euclidean",
threshold=0.5,
is_terminal=True,
is_collectable=False,
)
actor += reward_function

timeout_reward_function = sm.RewardFunction(
type="timeout",
entity_a=actor,
entity_b=actor,
distance_metric="euclidean",
threshold=100,
is_terminal=True,
scalar=-1.0,
)
actor += timeout_reward_function

return maze


def make_env_func(full_env_name, cfg=None, env_config=None):
port = 56000
if env_config:
port += 1 + env_config.env_id

return sm.RLEnv(generate_map, cfg.n_maps, cfg.n_show, engine_exe=cfg.build_exe, engine_port=port)


def add_simulate_env_args(parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"--build_exe",
default="builds/simulate_unity.x86_64",
type=str,
required=False,
help="Pre-built unity app for simulate",
)
parser.add_argument("--n_maps", default=16, type=int, required=False, help="Number of maps to spawn")
parser.add_argument("--n_show", default=8, type=int, required=False, help="Number of maps to show")


def simulate_override_defaults(parser: argparse.ArgumentParser) -> None:
parser.set_defaults(
encoder_conv_architecture="convnet_atari",
nonlinearity="relu",
rollout=32,
num_epochs=1,
env_framestack=1,
num_workers=8,
num_envs_per_worker=2,
train_for_env_steps=10000000,
normalize_input=True,
normalize_returns=False,
batched_sampling=True,
use_rnn=False,
)


def parse_simulate_args(argv=None, evaluation=False):
parser, cfg = parse_sf_args(argv, evaluation=evaluation)
add_simulate_env_args(parser)
simulate_override_defaults(parser)
cfg = parse_full_cfg(parser, argv)
return cfg


def main():
"""Script entry point."""
cfg = parse_simulate_args()

# explicitly create the runner instead of simply calling run_rl()
# this allows us to register additional message handlers
cfg, runner = make_runner(cfg)
register_env("simulate", make_env_func)

status = runner.init()
if status == ExperimentStatus.SUCCESS:
status = runner.run()

return status


if __name__ == "__main__":
sys.exit(main())
17 changes: 12 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@
"gym==0.21.0", # For RL action spaces and API
"stable-baselines3"
]
SF2_REQUIRE = [
"gym==0.22.0", # For RL action spaces and API
# "sample-factory @ git+https://github.com/alex-petrenko/sample-factory.git@multi_input_model_builder",
# see discussion here, I tried everything and failed.
# https://stackoverflow.com/questions/32688688/how-to-write-setup-py-to-include-a-git-repository-as-a-dependency
]

DEV_REQUIRE = [
"gym==0.21.0", # For RL action spaces and API
Expand All @@ -97,11 +103,12 @@
QUALITY_REQUIRE = ["black~=22.0", "flake8>=3.8.3", "isort>=5.0.0", "pyyaml>=5.3.1"]

EXTRAS_REQUIRE = {
"rl" : RL_REQUIRE,
"sb3" : SB3_REQUIRE,
"dev": DEV_REQUIRE + TESTS_REQUIRE + QUALITY_REQUIRE,
"test": TESTS_REQUIRE,
"quality": QUALITY_REQUIRE,
"rl": RL_REQUIRE,
"sb3": SB3_REQUIRE,
"sf2": SF2_REQUIRE,
"dev": DEV_REQUIRE + TESTS_REQUIRE + QUALITY_REQUIRE,
"test": TESTS_REQUIRE,
"quality": QUALITY_REQUIRE,
}

if sys.platform == 'darwin':
Expand Down
23 changes: 19 additions & 4 deletions src/simulate/rl/rl_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,13 +139,21 @@ def step_send_async(self, action):
# A list value for the action – we add the map/actor dimensions
if self.n_show == 1 and self.n_actors == 1:
action[key] = [[value]]
elif self.n_show > 1 and self.n_actors_per_map == 1:
action[key] = (
np.array(value).reshape((self.n_show, self.n_actors_per_map, -1)).tolist()
) # hacky reshape, sorry. (Ed)
else:
raise ValueError(
f"All actions must be list (maps) of list (actors) of list of floats/int (action). "
f"if the number of maps or actors is greater than 1 (in our case n_show: {self.n_show} "
f"and n_actors {self.n_actors})."
)
elif isinstance(value, np.ndarray) and len(value) > 0 and isinstance(value[0], (np.int64, np.float32)):
elif (
isinstance(value, np.ndarray)
and len(value) > 0
and isinstance(value[0], (np.int64, np.int32, np.float32))
):
# actions are a number array
value = value.reshape((self.n_show, self.n_actors_per_map, -1))
action[key] = value.tolist()
Expand All @@ -159,10 +167,9 @@ def step_recv_async(self):
# TODO nathan thinks we should make this for 1 agent, have a separate one for multiple agents.
obs = self._extract_sensor_obs(event["actor_sensor_buffers"])
reward = self._convert_to_numpy(event["actor_reward_buffer"]).flatten()
done = self._convert_to_numpy(event["actor_done_buffer"]).flatten()
done = self._convert_to_numpy(event["actor_done_buffer"]).flatten() > 0

obs = self._squeeze_actor_dimension(obs)

return obs, reward, done, [{}] * len(done)

def _squeeze_actor_dimension(self, obs):
Expand Down Expand Up @@ -206,8 +213,16 @@ def sample_action(self):
action = [self.action_space.sample() for _ in range(self.n_show)]
return np.array(action)

@property
def num_agents(self) -> int:
return self.n_show

@property
def is_multiagent(self) -> bool:
return self.n_show > 1

def env_is_wrapped(self):
return [False] * self.n_agents * self.n_parallel
return [False] * self.n_show * self.n_parallel

# required abstract methods

Expand Down