From c09fc066a09da418f64b3b072045aef085045cd6 Mon Sep 17 00:00:00 2001 From: Edward Beeching Date: Wed, 28 Sep 2022 07:06:09 +0200 Subject: [PATCH 1/3] adds sf2 integration --- examples/rl/sf2/sf2_procgen.py | 151 +++++++++++++++++++++++++++++++++ setup.py | 15 ++-- src/simulate/rl/rl_env.py | 19 +++-- 3 files changed, 175 insertions(+), 10 deletions(-) create mode 100644 examples/rl/sf2/sf2_procgen.py diff --git a/examples/rl/sf2/sf2_procgen.py b/examples/rl/sf2/sf2_procgen.py new file mode 100644 index 00000000..3abce93c --- /dev/null +++ b/examples/rl/sf2/sf2_procgen.py @@ -0,0 +1,151 @@ +# Copyright 2022 The HuggingFace Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 + +import sys +import random +import math +import argparse + +from simulate import logging + + +logger = logging.get_logger(__name__) + +try: + from sample_factory.cfg.arguments import parse_full_cfg, parse_sf_args + from sample_factory.train import make_runner + from sample_factory.envs.env_utils import register_env + from sample_factory.algo.utils.misc import ExperimentStatus + +except ImportError: + logger.warning( + "sample-factory is required for this example and is not installed. To install: pip install simulate[sf2]" + ) + exit() + +import simulate as sm +from simulate.assets.object import ProcGenPrimsMaze3D +from simulate.assets.sensors import RaycastSensor, StateSensor + +def generate_map(index): + + maze_width = 3 + maze_depth = 3 + n_objects = 1 + maze = ProcGenPrimsMaze3D(maze_width, maze_depth, wall_material=sm.Material.YELLOW) + maze += sm.Box( + position=[0, 0, 0], + bounds=[0.0, maze_width, 0, 0.1, 0.0, maze_depth], + material=sm.Material.BLUE, + with_collider=True, + ) + actor_position = [math.floor(maze_width / 2.0) + 0.5, 0.5, math.floor(maze_depth / 2.0) + 0.5] + + actor = sm.EgocentricCameraActor(position=actor_position) + # actor += StateSensor(actor, maze) + # actor += RaycastSensor() + + maze += actor + + for r in range(n_objects): + position = [random.randint(0, maze_width - 1) + 0.5, 0.5, random.randint(0, maze_depth - 1) + 0.5] + while ((position[0] - actor_position[0]) ** 2 + (position[2] - actor_position[2]) ** 2) < 1.0: + # avoid overlapping collectables + position = [random.randint(0, maze_width - 1) + 0.5, 0.5, random.randint(0, maze_depth - 1) + 0.5] + + collectable = sm.Sphere(position=position, radius=0.2, material=sm.Material.RED, with_collider=True) + maze += collectable + reward_function = sm.RewardFunction( + type="sparse", + entity_a=actor, + entity_b=collectable, + distance_metric="euclidean", + threshold=0.5, + is_terminal=True, + is_collectable=False, + ) + actor += reward_function + + timeout_reward_function = sm.RewardFunction( + type="timeout", + entity_a=actor, + entity_b=actor, + distance_metric="euclidean", + threshold=100, + is_terminal=True, + scalar=-1.0, + ) + actor += timeout_reward_function + + return maze + + +def make_env_func(full_env_name, cfg=None, env_config=None): + port = 56000 + if env_config: + port += 1 + env_config.env_id + + return sm.RLEnv(generate_map, cfg.n_maps, cfg.n_show, engine_exe=cfg.build_exe, engine_port=port) + +def add_simulate_env_args(parser: argparse.ArgumentParser) -> None: + parser.add_argument("--build_exe", default="builds/simulate_unity.x86_64", type=str, required=False, help="Pre-built unity app for simulate") + parser.add_argument("--n_maps", default=16, type=int, required=False, help="Number of maps to spawn") + parser.add_argument("--n_show", default=8, type=int, required=False, help="Number of maps to show") + +def simulate_override_defaults(parser: argparse.ArgumentParser) -> None: + parser.set_defaults( + encoder_conv_architecture="convnet_atari", + nonlinearity="relu", + rollout=32, + num_epochs=1, + env_framestack=1, + num_workers=8, + num_envs_per_worker=2, + + train_for_env_steps=10000000, + normalize_input=True, + normalize_returns=False, + batched_sampling=True, + use_rnn=False, + + ) + +def parse_simulate_args(argv=None, evaluation=False): + parser, cfg = parse_sf_args(argv, evaluation=evaluation) + add_simulate_env_args(parser) + simulate_override_defaults(parser) + cfg = parse_full_cfg(parser, argv) + return cfg + + +def main(): + """Script entry point.""" + cfg = parse_simulate_args() + + # explicitly create the runner instead of simply calling run_rl() + # this allows us to register additional message handlers + cfg, runner = make_runner(cfg) + register_env("simulate", make_env_func) + + status = runner.init() + if status == ExperimentStatus.SUCCESS: + status = runner.run() + + return status + + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file diff --git a/setup.py b/setup.py index dba2236c..b2377d06 100644 --- a/setup.py +++ b/setup.py @@ -76,6 +76,10 @@ "gym==0.21.0", # For RL action spaces and API "stable-baselines3" ] +SF2_REQUIRE = [ + "gym==0.22.0", # For RL action spaces and API + "git+https://github.com/alex-petrenko/sample-factory/tree/multi_input_model_builder" +] DEV_REQUIRE = [ "gym==0.21.0", # For RL action spaces and API @@ -97,11 +101,12 @@ QUALITY_REQUIRE = ["black~=22.0", "flake8>=3.8.3", "isort>=5.0.0", "pyyaml>=5.3.1"] EXTRAS_REQUIRE = { - "rl" : RL_REQUIRE, - "sb3" : SB3_REQUIRE, - "dev": DEV_REQUIRE + TESTS_REQUIRE + QUALITY_REQUIRE, - "test": TESTS_REQUIRE, - "quality": QUALITY_REQUIRE, + "rl" : RL_REQUIRE, + "sb3" : SB3_REQUIRE, + "sf2" : SF2_REQUIRE, + "dev": DEV_REQUIRE + TESTS_REQUIRE + QUALITY_REQUIRE, + "test": TESTS_REQUIRE, + "quality": QUALITY_REQUIRE, } if sys.platform == 'darwin': diff --git a/src/simulate/rl/rl_env.py b/src/simulate/rl/rl_env.py index d55d4e43..a16c686a 100644 --- a/src/simulate/rl/rl_env.py +++ b/src/simulate/rl/rl_env.py @@ -139,13 +139,15 @@ def step_send_async(self, action): # A list value for the action – we add the map/actor dimensions if self.n_show == 1 and self.n_actors == 1: action[key] = [[value]] + elif self.n_show > 1 and self.n_actors_per_map == 1: + action[key] = np.array(value).reshape((self.n_show, self.n_actors_per_map, -1)).tolist() # hacky reshape, sorry. (Ed) else: raise ValueError( f"All actions must be list (maps) of list (actors) of list of floats/int (action). " f"if the number of maps or actors is greater than 1 (in our case n_show: {self.n_show} " f"and n_actors {self.n_actors})." ) - elif isinstance(value, np.ndarray) and len(value) > 0 and isinstance(value[0], (np.int64, np.float32)): + elif isinstance(value, np.ndarray) and len(value) > 0 and isinstance(value[0], (np.int64, np.int32, np.float32)): # actions are a number array value = value.reshape((self.n_show, self.n_actors_per_map, -1)) action[key] = value.tolist() @@ -159,11 +161,10 @@ def step_recv_async(self): # TODO nathan thinks we should make this for 1 agent, have a separate one for multiple agents. obs = self._extract_sensor_obs(event["actor_sensor_buffers"]) reward = self._convert_to_numpy(event["actor_reward_buffer"]).flatten() - done = self._convert_to_numpy(event["actor_done_buffer"]).flatten() + done = self._convert_to_numpy(event["actor_done_buffer"]).flatten() > 0 obs = self._squeeze_actor_dimension(obs) - - return obs, reward, done, [{}] * len(done) + return obs, reward, done , [{}] * len(done) def _squeeze_actor_dimension(self, obs): for k, v in obs.items(): @@ -206,8 +207,16 @@ def sample_action(self): action = [self.action_space.sample() for _ in range(self.n_show)] return np.array(action) + @property + def num_agents(self) -> int: + return self.n_show + + @property + def is_multiagent(self) -> bool: + return self.n_show > 1 + def env_is_wrapped(self): - return [False] * self.n_agents * self.n_parallel + return [False] * self.n_show * self.n_parallel # required abstract methods From aeda94a1578f9b0c8d40e65f5e7871ff6a93dc2f Mon Sep 17 00:00:00 2001 From: Edward Beeching Date: Wed, 28 Sep 2022 09:34:34 +0200 Subject: [PATCH 2/3] tried (and failed) to add install from git branch in setup.py --- setup.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/setup.py b/setup.py index b2377d06..94759206 100644 --- a/setup.py +++ b/setup.py @@ -77,8 +77,10 @@ "stable-baselines3" ] SF2_REQUIRE = [ - "gym==0.22.0", # For RL action spaces and API - "git+https://github.com/alex-petrenko/sample-factory/tree/multi_input_model_builder" + "gym==0.22.0", # For RL action spaces and API + # "sample-factory @ git+https://github.com/alex-petrenko/sample-factory.git@multi_input_model_builder", + # see discussion here, I tried everything and failed. + # https://stackoverflow.com/questions/32688688/how-to-write-setup-py-to-include-a-git-repository-as-a-dependency ] DEV_REQUIRE = [ @@ -101,9 +103,9 @@ QUALITY_REQUIRE = ["black~=22.0", "flake8>=3.8.3", "isort>=5.0.0", "pyyaml>=5.3.1"] EXTRAS_REQUIRE = { - "rl" : RL_REQUIRE, - "sb3" : SB3_REQUIRE, - "sf2" : SF2_REQUIRE, + "rl": RL_REQUIRE, + "sb3": SB3_REQUIRE, + "sf2": SF2_REQUIRE, "dev": DEV_REQUIRE + TESTS_REQUIRE + QUALITY_REQUIRE, "test": TESTS_REQUIRE, "quality": QUALITY_REQUIRE, From f4efa5b9a4030079cc27408f608434c0b6f77164 Mon Sep 17 00:00:00 2001 From: Edward Beeching Date: Wed, 28 Sep 2022 09:37:38 +0200 Subject: [PATCH 3/3] make style --- examples/rl/sf2/sf2_procgen.py | 28 ++++++++++++++++++---------- src/simulate/rl/rl_env.py | 12 +++++++++--- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/examples/rl/sf2/sf2_procgen.py b/examples/rl/sf2/sf2_procgen.py index 3abce93c..d545be5c 100644 --- a/examples/rl/sf2/sf2_procgen.py +++ b/examples/rl/sf2/sf2_procgen.py @@ -14,10 +14,10 @@ # Lint as: python3 -import sys -import random -import math import argparse +import math +import random +import sys from simulate import logging @@ -25,10 +25,10 @@ logger = logging.get_logger(__name__) try: + from sample_factory.algo.utils.misc import ExperimentStatus from sample_factory.cfg.arguments import parse_full_cfg, parse_sf_args - from sample_factory.train import make_runner from sample_factory.envs.env_utils import register_env - from sample_factory.algo.utils.misc import ExperimentStatus + from sample_factory.train import make_runner except ImportError: logger.warning( @@ -40,6 +40,7 @@ from simulate.assets.object import ProcGenPrimsMaze3D from simulate.assets.sensors import RaycastSensor, StateSensor + def generate_map(index): maze_width = 3 @@ -99,12 +100,20 @@ def make_env_func(full_env_name, cfg=None, env_config=None): port += 1 + env_config.env_id return sm.RLEnv(generate_map, cfg.n_maps, cfg.n_show, engine_exe=cfg.build_exe, engine_port=port) - + + def add_simulate_env_args(parser: argparse.ArgumentParser) -> None: - parser.add_argument("--build_exe", default="builds/simulate_unity.x86_64", type=str, required=False, help="Pre-built unity app for simulate") + parser.add_argument( + "--build_exe", + default="builds/simulate_unity.x86_64", + type=str, + required=False, + help="Pre-built unity app for simulate", + ) parser.add_argument("--n_maps", default=16, type=int, required=False, help="Number of maps to spawn") parser.add_argument("--n_show", default=8, type=int, required=False, help="Number of maps to show") + def simulate_override_defaults(parser: argparse.ArgumentParser) -> None: parser.set_defaults( encoder_conv_architecture="convnet_atari", @@ -114,15 +123,14 @@ def simulate_override_defaults(parser: argparse.ArgumentParser) -> None: env_framestack=1, num_workers=8, num_envs_per_worker=2, - train_for_env_steps=10000000, normalize_input=True, normalize_returns=False, batched_sampling=True, use_rnn=False, - ) + def parse_simulate_args(argv=None, evaluation=False): parser, cfg = parse_sf_args(argv, evaluation=evaluation) add_simulate_env_args(parser) @@ -148,4 +156,4 @@ def main(): if __name__ == "__main__": - sys.exit(main()) \ No newline at end of file + sys.exit(main()) diff --git a/src/simulate/rl/rl_env.py b/src/simulate/rl/rl_env.py index a16c686a..c297bebd 100644 --- a/src/simulate/rl/rl_env.py +++ b/src/simulate/rl/rl_env.py @@ -140,14 +140,20 @@ def step_send_async(self, action): if self.n_show == 1 and self.n_actors == 1: action[key] = [[value]] elif self.n_show > 1 and self.n_actors_per_map == 1: - action[key] = np.array(value).reshape((self.n_show, self.n_actors_per_map, -1)).tolist() # hacky reshape, sorry. (Ed) + action[key] = ( + np.array(value).reshape((self.n_show, self.n_actors_per_map, -1)).tolist() + ) # hacky reshape, sorry. (Ed) else: raise ValueError( f"All actions must be list (maps) of list (actors) of list of floats/int (action). " f"if the number of maps or actors is greater than 1 (in our case n_show: {self.n_show} " f"and n_actors {self.n_actors})." ) - elif isinstance(value, np.ndarray) and len(value) > 0 and isinstance(value[0], (np.int64, np.int32, np.float32)): + elif ( + isinstance(value, np.ndarray) + and len(value) > 0 + and isinstance(value[0], (np.int64, np.int32, np.float32)) + ): # actions are a number array value = value.reshape((self.n_show, self.n_actors_per_map, -1)) action[key] = value.tolist() @@ -164,7 +170,7 @@ def step_recv_async(self): done = self._convert_to_numpy(event["actor_done_buffer"]).flatten() > 0 obs = self._squeeze_actor_dimension(obs) - return obs, reward, done , [{}] * len(done) + return obs, reward, done, [{}] * len(done) def _squeeze_actor_dimension(self, obs): for k, v in obs.items():