From bf5ce18017147f81b0dfd1c78de25ba87481e5db Mon Sep 17 00:00:00 2001 From: Shyam Sudhakaran Date: Sat, 8 Oct 2022 18:38:30 -0700 Subject: [PATCH] Added some basic gym env registration --- examples/advanced/lunar_lander.py | 5 +- src/simulate/rl/rl_env.py | 14 ++++- .../test_wrappers/test_register_env.py | 60 +++++++++++++++++++ 3 files changed, 76 insertions(+), 3 deletions(-) create mode 100644 tests/test_rl/test_wrappers/test_register_env.py diff --git a/examples/advanced/lunar_lander.py b/examples/advanced/lunar_lander.py index a8f13e92..e7caf54f 100644 --- a/examples/advanced/lunar_lander.py +++ b/examples/advanced/lunar_lander.py @@ -17,6 +17,7 @@ import argparse import time +import gym import numpy as np import simulate as sm @@ -238,7 +239,9 @@ def make_lander(engine="unity", engine_exe=""): sc = make_lander(engine="unity", engine_exe=args.build_exe) sc += sm.LightSun() - env = sm.RLEnv(sc, frame_skip=1) + sm.RLEnv.register("SimulateLunarLander-v0", sc, frame_skip=1) + env = gym.make("SimulateLunarLander-v0") + env.reset() for i in range(500): diff --git a/src/simulate/rl/rl_env.py b/src/simulate/rl/rl_env.py index 8f3ddf84..e5064d75 100644 --- a/src/simulate/rl/rl_env.py +++ b/src/simulate/rl/rl_env.py @@ -13,14 +13,16 @@ # limitations under the License. # Lint as: python3 + from typing import Any, Dict, List, Optional, Sequence, Tuple, Union +import gym import numpy as np from simulate.scene import Scene -class RLEnv: +class RLEnv(gym.Env): """ The basic RL environment wrapper for Simulate scene following the Gym API. @@ -41,6 +43,7 @@ def __init__( time_step: Optional[float] = 1 / 30.0, frame_skip: Optional[int] = 4, ): + super().__init__() self.scene = scene @@ -79,6 +82,14 @@ def __init__( n_show=1, ) + @classmethod + def register(cls, id: str, scene: Scene, **kwargs): + def create_env(**kwargs): + # TODO: maybe change this to scene.copy()? + return cls(scene=scene, **kwargs) + + gym.envs.register(id=id, entry_point=create_env, kwargs=kwargs) + def step(self, action: Union[Dict, List, np.ndarray]) -> Tuple[Dict, np.ndarray, np.ndarray, Dict]: """ The step function for the environment, follows the API from OpenAI Gym. @@ -99,7 +110,6 @@ def step(self, action: Union[Dict, List, np.ndarray]) -> Tuple[Dict, np.ndarray, A dictionary of additional information. """ self.step_send_async(action=action) - # receive and return event data from the engine return self.step_recv_async() diff --git a/tests/test_rl/test_wrappers/test_register_env.py b/tests/test_rl/test_wrappers/test_register_env.py new file mode 100644 index 00000000..df6c6ec2 --- /dev/null +++ b/tests/test_rl/test_wrappers/test_register_env.py @@ -0,0 +1,60 @@ +# Copyright 2022 The HuggingFace Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +""" A very simple test for registering simulate scenes as gym environments""" + +import random +import unittest + +import gym +import pytest + +import simulate as sm + + +def create_scene(): + """ " Create a simple scene with a single actor and a single target.""" + scene = sm.Scene() + sm.LightSun() + scene += sm.Box(name="floor", position=[0, 0, 0], bounds=[-11, 11, 0, 0.1, -11, 51], material=sm.Material.BLUE) + scene += sm.Box(name="wall1", position=[-10, 0, 0], bounds=[0, 0.1, 0, 1, -10, 10], material=sm.Material.GRAY) + scene += sm.Box(name="wall2", position=[10, 0, 0], bounds=[0, 0.1, 0, 1, -10, 10], material=sm.Material.GRAY) + scene += sm.Box(name="wall3", position=[0, 0, 10], bounds=[-10, 10, 0, 1, 0, 0.1], material=sm.Material.GRAY) + scene += sm.Box(name="wall4", position=[0, 0, -10], bounds=[-10, 10, 0, 1, 0, 0.1], material=sm.Material.GRAY) + collectable = sm.Sphere(position=[random.uniform(-9, 9), 0.5, random.uniform(-9, 9)], material=sm.Material.GREEN) + actor = sm.EgocentricCameraActor() + reward = sm.RewardFunction("sparse", entity_a=actor, entity_b=collectable, is_collectable=True) + scene += [collectable, actor, reward] + return scene + + +class TestRegisterEnv(unittest.TestCase): + def test_register_env(self): + scene = create_scene() + + # create environment without gym + env = sm.RLEnv(scene=scene, time_step=1 / 30.0, frame_skip=4) + + # register environment + try: + sm.RLEnv.register("TestRegisterEnv-v0", scene=scene, time_step=1 / 30.0, frame_skip=4) + gym_env = gym.make("TestRegisterEnv-v0") + except Exception as e: + pytest.fail(f"Failed to register environment with exception {e}!") + + # Check if RLEnv is a gym.Env + assert isinstance(gym_env, gym.Env) + + # This may need to change if sm.RLEnv.register copies the scene before + assert env.scene == gym_env.scene