huggingface · edbeeching · Sep 28, 2022 · Sep 28, 2022 · Sep 28, 2022
diff --git a/examples/rl/sf2/sf2_procgen.py b/examples/rl/sf2/sf2_procgen.py
@@ -0,0 +1,159 @@
+# Copyright 2022 The HuggingFace Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+
+import argparse
+import math
+import random
+import sys
+
+from simulate import logging
+
+
+logger = logging.get_logger(__name__)
+
+try:
+    from sample_factory.algo.utils.misc import ExperimentStatus
+    from sample_factory.cfg.arguments import parse_full_cfg, parse_sf_args
+    from sample_factory.envs.env_utils import register_env
+    from sample_factory.train import make_runner
+
+except ImportError:
+    logger.warning(
+        "sample-factory is required for this example and is not installed. To install: pip install simulate[sf2]"
+    )
+    exit()
+
+import simulate as sm
+from simulate.assets.object import ProcGenPrimsMaze3D
+from simulate.assets.sensors import RaycastSensor, StateSensor
+
+
+def generate_map(index):
+
+    maze_width = 3
+    maze_depth = 3
+    n_objects = 1
+    maze = ProcGenPrimsMaze3D(maze_width, maze_depth, wall_material=sm.Material.YELLOW)
+    maze += sm.Box(
+        position=[0, 0, 0],
+        bounds=[0.0, maze_width, 0, 0.1, 0.0, maze_depth],
+        material=sm.Material.BLUE,
+        with_collider=True,
+    )
+    actor_position = [math.floor(maze_width / 2.0) + 0.5, 0.5, math.floor(maze_depth / 2.0) + 0.5]
+
+    actor = sm.EgocentricCameraActor(position=actor_position)
+    # actor += StateSensor(actor, maze)
+    # actor += RaycastSensor()
+
+    maze += actor
+
+    for r in range(n_objects):
+        position = [random.randint(0, maze_width - 1) + 0.5, 0.5, random.randint(0, maze_depth - 1) + 0.5]
+        while ((position[0] - actor_position[0]) ** 2 + (position[2] - actor_position[2]) ** 2) < 1.0:
+            # avoid overlapping collectables
+            position = [random.randint(0, maze_width - 1) + 0.5, 0.5, random.randint(0, maze_depth - 1) + 0.5]
+
+        collectable = sm.Sphere(position=position, radius=0.2, material=sm.Material.RED, with_collider=True)
+        maze += collectable
+        reward_function = sm.RewardFunction(
+            type="sparse",
+            entity_a=actor,
+            entity_b=collectable,
+            distance_metric="euclidean",
+            threshold=0.5,
+            is_terminal=True,
+            is_collectable=False,
+        )
+        actor += reward_function
+
+    timeout_reward_function = sm.RewardFunction(
+        type="timeout",
+        entity_a=actor,
+        entity_b=actor,
+        distance_metric="euclidean",
+        threshold=100,
+        is_terminal=True,
+        scalar=-1.0,
+    )
+    actor += timeout_reward_function
+
+    return maze
+
+
+def make_env_func(full_env_name, cfg=None, env_config=None):
+    port = 56000
+    if env_config:
+        port += 1 + env_config.env_id
+
+    return sm.RLEnv(generate_map, cfg.n_maps, cfg.n_show, engine_exe=cfg.build_exe, engine_port=port)
+
+
+def add_simulate_env_args(parser: argparse.ArgumentParser) -> None:
+    parser.add_argument(
+        "--build_exe",
+        default="builds/simulate_unity.x86_64",
+        type=str,
+        required=False,
+        help="Pre-built unity app for simulate",
+    )
+    parser.add_argument("--n_maps", default=16, type=int, required=False, help="Number of maps to spawn")
+    parser.add_argument("--n_show", default=8, type=int, required=False, help="Number of maps to show")
+
+
+def simulate_override_defaults(parser: argparse.ArgumentParser) -> None:
+    parser.set_defaults(
+        encoder_conv_architecture="convnet_atari",
+        nonlinearity="relu",
+        rollout=32,
+        num_epochs=1,
+        env_framestack=1,
+        num_workers=8,
+        num_envs_per_worker=2,
+        train_for_env_steps=10000000,
+        normalize_input=True,
+        normalize_returns=False,
+        batched_sampling=True,
+        use_rnn=False,
+    )
+
+
+def parse_simulate_args(argv=None, evaluation=False):
+    parser, cfg = parse_sf_args(argv, evaluation=evaluation)
+    add_simulate_env_args(parser)
+    simulate_override_defaults(parser)
+    cfg = parse_full_cfg(parser, argv)
+    return cfg
+
+
+def main():
+    """Script entry point."""
+    cfg = parse_simulate_args()
+
+    # explicitly create the runner instead of simply calling run_rl()
+    # this allows us to register additional message handlers
+    cfg, runner = make_runner(cfg)
+    register_env("simulate", make_env_func)
+
+    status = runner.init()
+    if status == ExperimentStatus.SUCCESS:
+        status = runner.run()
+
+    return status
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/setup.py b/setup.py
@@ -76,6 +76,12 @@
     "gym==0.21.0",  # For RL action spaces and API
     "stable-baselines3"
 ]
+SF2_REQUIRE = [
+    "gym==0.22.0",  # For RL action spaces and API
+    # "sample-factory @ git+https://github.com/alex-petrenko/sample-factory.git@multi_input_model_builder",
+    # see discussion here, I tried everything and failed.
+    # https://stackoverflow.com/questions/32688688/how-to-write-setup-py-to-include-a-git-repository-as-a-dependency
+]
 
 DEV_REQUIRE = [
     "gym==0.21.0",  # For RL action spaces and API
@@ -97,11 +103,12 @@
 QUALITY_REQUIRE = ["black~=22.0", "flake8>=3.8.3", "isort>=5.0.0", "pyyaml>=5.3.1"]
 
 EXTRAS_REQUIRE = {
-    "rl" : RL_REQUIRE,
-    "sb3" : SB3_REQUIRE,
-     "dev": DEV_REQUIRE + TESTS_REQUIRE + QUALITY_REQUIRE,
-     "test": TESTS_REQUIRE,
-     "quality": QUALITY_REQUIRE,
+    "rl": RL_REQUIRE,
+    "sb3": SB3_REQUIRE,
+    "sf2": SF2_REQUIRE,
+    "dev": DEV_REQUIRE + TESTS_REQUIRE + QUALITY_REQUIRE,
+    "test": TESTS_REQUIRE,
+    "quality": QUALITY_REQUIRE,
 }
 
 if sys.platform == 'darwin':

diff --git a/src/simulate/rl/rl_env.py b/src/simulate/rl/rl_env.py
@@ -139,13 +139,21 @@ def step_send_async(self, action):
                 # A list value for the action – we add the map/actor dimensions
                 if self.n_show == 1 and self.n_actors == 1:
                     action[key] = [[value]]
+                elif self.n_show > 1 and self.n_actors_per_map == 1:
+                    action[key] = (
+                        np.array(value).reshape((self.n_show, self.n_actors_per_map, -1)).tolist()
+                    )  # hacky reshape, sorry. (Ed)
                 else:
                     raise ValueError(
                         f"All actions must be list (maps) of list (actors) of list of floats/int (action). "
                         f"if the number of maps or actors is greater than 1 (in our case n_show: {self.n_show} "
                         f"and n_actors {self.n_actors})."
                     )
-            elif isinstance(value, np.ndarray) and len(value) > 0 and isinstance(value[0], (np.int64, np.float32)):
+            elif (
+                isinstance(value, np.ndarray)
+                and len(value) > 0
+                and isinstance(value[0], (np.int64, np.int32, np.float32))
+            ):
                 # actions are a number array
                 value = value.reshape((self.n_show, self.n_actors_per_map, -1))
                 action[key] = value.tolist()
@@ -159,10 +167,9 @@ def step_recv_async(self):
         # TODO nathan thinks we should make this for 1 agent, have a separate one for multiple agents.
         obs = self._extract_sensor_obs(event["actor_sensor_buffers"])
         reward = self._convert_to_numpy(event["actor_reward_buffer"]).flatten()
-        done = self._convert_to_numpy(event["actor_done_buffer"]).flatten()
+        done = self._convert_to_numpy(event["actor_done_buffer"]).flatten() > 0
 
         obs = self._squeeze_actor_dimension(obs)
-
         return obs, reward, done, [{}] * len(done)
 
     def _squeeze_actor_dimension(self, obs):
@@ -206,8 +213,16 @@ def sample_action(self):
             action = [self.action_space.sample() for _ in range(self.n_show)]
         return np.array(action)
 
+    @property
+    def num_agents(self) -> int:
+        return self.n_show
+
+    @property
+    def is_multiagent(self) -> bool:
+        return self.n_show > 1
+
     def env_is_wrapped(self):
-        return [False] * self.n_agents * self.n_parallel
+        return [False] * self.n_show * self.n_parallel
 
     # required abstract methods