diff --git a/environments/minigrid/.gitignore b/environments/minigrid/.gitignore new file mode 100644 index 00000000..e69de29b diff --git a/environments/minigrid/LICENCE b/environments/minigrid/LICENCE new file mode 100644 index 00000000..a1a92b70 --- /dev/null +++ b/environments/minigrid/LICENCE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2019 Maxime Chevalier-Boisvert + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/environments/minigrid/Makefile b/environments/minigrid/Makefile new file mode 100644 index 00000000..79c54efc --- /dev/null +++ b/environments/minigrid/Makefile @@ -0,0 +1,19 @@ +.PHONY: quality style test + +# Check that source code meets quality standards + +quality: + black --check --line-length 119 --target-version py38 . + isort --check-only tests src + flake8 . + +# Format source code automatically + +style: + black --line-length 119 --target-version py38 . + isort . + +# Run tests for the library + +test: + python -m pytest -n auto --dist=loadfile -s -v ./tests/ \ No newline at end of file diff --git a/environments/minigrid/README.md b/environments/minigrid/README.md new file mode 100644 index 00000000..a8f882cd --- /dev/null +++ b/environments/minigrid/README.md @@ -0,0 +1,31 @@ +# Minigrid-like environment + +Minimalistic gridworld environment inspired by [Minigrid](https://github.com/Farama-Foundation/gym-minigrid) + +## Installation +Create a virtual env, activate it, and then install `simenv`: + +``` +cd .. && git clone https://github.com/huggingface/simenv.git +cd simenv +pip install -e ".[dev]" +``` + +Then install the `minigrid` package: + +``` +cd environments/minigrid +pip install -e ".[dev]" +``` + +And it's done! + +### Style + +Before you merge a PR, fix the style (we use `isort` + `black`) +``` +make style +``` + +## Basic Usage + diff --git a/environments/minigrid/assets/textures/agent.png b/environments/minigrid/assets/textures/agent.png new file mode 100644 index 00000000..e8f34827 Binary files /dev/null and b/environments/minigrid/assets/textures/agent.png differ diff --git a/environments/minigrid/assets/textures/ball.png b/environments/minigrid/assets/textures/ball.png new file mode 100644 index 00000000..939df79f Binary files /dev/null and b/environments/minigrid/assets/textures/ball.png differ diff --git a/environments/minigrid/assets/textures/box.png b/environments/minigrid/assets/textures/box.png new file mode 100644 index 00000000..d931b315 Binary files /dev/null and b/environments/minigrid/assets/textures/box.png differ diff --git a/environments/minigrid/assets/textures/crossing.png b/environments/minigrid/assets/textures/crossing.png new file mode 100644 index 00000000..f24d663f Binary files /dev/null and b/environments/minigrid/assets/textures/crossing.png differ diff --git a/environments/minigrid/assets/textures/door_closed.png b/environments/minigrid/assets/textures/door_closed.png new file mode 100644 index 00000000..e126369c Binary files /dev/null and b/environments/minigrid/assets/textures/door_closed.png differ diff --git a/environments/minigrid/assets/textures/door_locked.png b/environments/minigrid/assets/textures/door_locked.png new file mode 100644 index 00000000..e4b21701 Binary files /dev/null and b/environments/minigrid/assets/textures/door_locked.png differ diff --git a/environments/minigrid/assets/textures/door_opened.png b/environments/minigrid/assets/textures/door_opened.png new file mode 100644 index 00000000..180e0289 Binary files /dev/null and b/environments/minigrid/assets/textures/door_opened.png differ diff --git a/environments/minigrid/assets/textures/grid_tile.png b/environments/minigrid/assets/textures/grid_tile.png new file mode 100644 index 00000000..8a1e2d91 Binary files /dev/null and b/environments/minigrid/assets/textures/grid_tile.png differ diff --git a/environments/minigrid/assets/textures/key.png b/environments/minigrid/assets/textures/key.png new file mode 100644 index 00000000..d58b3a3b Binary files /dev/null and b/environments/minigrid/assets/textures/key.png differ diff --git a/environments/minigrid/assets/textures/lava.png b/environments/minigrid/assets/textures/lava.png new file mode 100644 index 00000000..478c5038 Binary files /dev/null and b/environments/minigrid/assets/textures/lava.png differ diff --git a/environments/minigrid/setup.py b/environments/minigrid/setup.py new file mode 100644 index 00000000..3cd825fa --- /dev/null +++ b/environments/minigrid/setup.py @@ -0,0 +1,94 @@ +# Lint as: python3 +""" HuggingFace/minigrid is a simple gridworld environment for RL. + +Note: + + VERSION needs to be formatted following the MAJOR.MINOR.PATCH convention + (we need to follow this convention to be able to retrieve versioned scripts) + +Simple check list for release from AllenNLP repo: https://github.com/allenai/allennlp/blob/main/setup.py + +To create the package for pypi. + +0. Prerequisites: + - Dependencies: + - twine: "pip install twine" + - Create an account in (and join the 'simenv' project): + - PyPI: https://pypi.org/ + - Test PyPI: https://test.pypi.org/ + +1. Change the version in: + - __init__.py + - setup.py + +2. Commit these changes: "git commit -m 'Release: VERSION'" + +3. Add a tag in git to mark the release: "git tag VERSION -m 'Add tag VERSION for pypi'" + Push the tag to remote: git push --tags origin main + +4. Build both the sources and the wheel. Do not change anything in setup.py between + creating the wheel and the source distribution (obviously). + + First, delete any "build" directory that may exist from previous builds. + + For the wheel, run: "python setup.py bdist_wheel" in the top level directory. + (this will build a wheel for the python version you use to build it). + + For the sources, run: "python setup.py sdist" + You should now have a /dist directory with both .whl and .tar.gz source versions. + +5. Check that everything looks correct by uploading the package to the pypi test server: + + twine upload dist/* -r pypitest --repository-url=https://test.pypi.org/legacy/ + + Check that you can install it in a virtualenv/notebook by running: + pip install -i https://testpypi.python.org/pypi simenv + +6. Upload the final version to actual pypi: + twine upload dist/* -r pypi + +7. Fill release notes in the tag in github once everything is looking hunky-dory. + +8. Change the version in __init__.py and setup.py to X.X.X+1.dev0 (e.g. VERSION=1.18.3 -> 1.18.4.dev0). + Then push the change with a message 'set dev version' +""" + +import os +import sys + +from setuptools import find_packages, setup + + +REQUIRED_PKGS = [ + "dataclasses_json", # For GLTF export/imports + "numpy>=1.17", # We use numpy>=1.17 to have np.random.Generator + "simenv", +] + +QUALITY_REQUIRE = ["black~=22.0", "flake8>=3.8.3", "isort>=5.0.0", "pyyaml>=5.3.1"] + +TESTS_REQUIRE = [ + # test dependencies +] + +EXTRAS_REQUIRE = { + "dev": TESTS_REQUIRE + QUALITY_REQUIRE, + "tests": TESTS_REQUIRE, + "quality": QUALITY_REQUIRE, +} + +setup( + name="minigrid", + description="HuggingFace simple gridworld environment for RL.", + long_description=open("README.md", encoding="utf-8").read(), + long_description_content_type="text/markdown", + author="HuggingFace Inc.", + author_email="carl@huggingface.co", + license="Apache 2.0", + version="0.0.1.dev0", + package_dir={"": "src"}, + packages=find_packages("src"), + install_requires=REQUIRED_PKGS, + extras_require=EXTRAS_REQUIRE, + keywords="simulation environments grid world reinforcement machine learning", +) diff --git a/environments/minigrid/src/minigrid/__init__.py b/environments/minigrid/src/minigrid/__init__.py new file mode 100644 index 00000000..22a9f2e0 --- /dev/null +++ b/environments/minigrid/src/minigrid/__init__.py @@ -0,0 +1 @@ +import minigrid.envs \ No newline at end of file diff --git a/environments/minigrid/src/minigrid/envs/__init__.py b/environments/minigrid/src/minigrid/envs/__init__.py new file mode 100644 index 00000000..078b90f5 --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/__init__.py @@ -0,0 +1,21 @@ +from minigrid.envs.empty import * +from minigrid.envs.doorkey import * +from minigrid.envs.multiroom import * +from minigrid.envs.fetch import * +from minigrid.envs.gotoobject import * +from minigrid.envs.gotodoor import * +from minigrid.envs.putnear import * +from minigrid.envs.lockedroom import * +from minigrid.envs.keycorridor import * +from minigrid.envs.unlock import * +from minigrid.envs.unlockpickup import * +from minigrid.envs.blockedunlockpickup import * +from minigrid.envs.playground_v0 import * +from minigrid.envs.redbluedoors import * +from minigrid.envs.obstructedmaze import * +from minigrid.envs.memory import * +from minigrid.envs.fourrooms import * +from minigrid.envs.crossing import * +from minigrid.envs.lavagap import * +from minigrid.envs.dynamicobstacles import * +from minigrid.envs.distshift import * \ No newline at end of file diff --git a/environments/minigrid/src/minigrid/envs/blockedunlockpickup.py b/environments/minigrid/src/minigrid/envs/blockedunlockpickup.py new file mode 100644 index 00000000..9d4303d7 --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/blockedunlockpickup.py @@ -0,0 +1,47 @@ +from minigrid.minigrid import Ball +from minigrid.roomgrid import RoomGrid + + +class BlockedUnlockPickup(RoomGrid): + """ + Unlock a door blocked by a ball, then pick up a box + in another room + """ + + def __init__(self, seed=None): + room_size = 6 + super().__init__( + num_rows=1, + num_cols=2, + room_size=room_size, + max_steps=16*room_size**2, + seed=seed + ) + + def _gen_grid(self, width, height): + super()._gen_grid(width, height) + + # Add a box to the room on the right + obj, _ = self.add_object(1, 0, kind="box") + # Make sure the two rooms are directly connected by a locked door + door, pos = self.add_door(0, 0, 0, locked=True) + # Block the door with a ball + color = self._rand_color() + self.grid.set(pos[0]-1, pos[1], Ball(color)) + # Add a key to unlock the door + self.add_object(0, 0, 'key', door.color) + + self.place_agent(0, 0) + + self.obj = obj + self.mission = "pick up the %s %s" % (obj.color, obj.type) + + def step(self, action): + obs, reward, done, info = super().step(action) + + if action == self.actions.pickup: + if self.carrying and self.carrying == self.obj: + reward = self._reward() + done = True + + return obs, reward, done, info diff --git a/environments/minigrid/src/minigrid/envs/crossing.py b/environments/minigrid/src/minigrid/envs/crossing.py new file mode 100644 index 00000000..2c319186 --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/crossing.py @@ -0,0 +1,121 @@ +from minigrid.minigrid import * +import itertools as itt + + +class CrossingEnv(MiniGridEnv): + """ + Environment with wall or lava obstacles, sparse reward. + """ + + def __init__(self, size=9, num_crossings=1, obstacle_type=Lava, seed=None): + self.num_crossings = num_crossings + self.obstacle_type = obstacle_type + super().__init__( + grid_size=size, + max_steps=4*size*size, + # Set this to True for maximum speed + see_through_walls=False, + seed=None + ) + + def _gen_grid(self, width, height): + assert width % 2 == 1 and height % 2 == 1 # odd size + + # Create an empty grid + self.grid = Grid(width, height) + + # Generate the surrounding walls + self.grid.wall_rect(0, 0, width, height) + + # Place the agent in the top-left corner + self.agent_pos = (1, 1) + self.agent_dir = 0 + + # Place a goal square in the bottom-right corner + self.put_obj(Goal(), width - 2, height - 2) + + # Place obstacles (lava or walls) + v, h = object(), object() # singleton `vertical` and `horizontal` objects + + # Lava rivers or walls specified by direction and position in grid + rivers = [(v, i) for i in range(2, height - 2, 2)] + rivers += [(h, j) for j in range(2, width - 2, 2)] + self.np_random.shuffle(rivers) + rivers = rivers[:self.num_crossings] # sample random rivers + rivers_v = sorted([pos for direction, pos in rivers if direction is v]) + rivers_h = sorted([pos for direction, pos in rivers if direction is h]) + obstacle_pos = itt.chain( + itt.product(range(1, width - 1), rivers_h), + itt.product(rivers_v, range(1, height - 1)), + ) + for i, j in obstacle_pos: + self.put_obj(self.obstacle_type(), i, j) + + # Sample path to goal + path = [h] * len(rivers_v) + [v] * len(rivers_h) + self.np_random.shuffle(path) + + # Create openings + limits_v = [0] + rivers_v + [height - 1] + limits_h = [0] + rivers_h + [width - 1] + room_i, room_j = 0, 0 + for direction in path: + if direction is h: + i = limits_v[room_i + 1] + j = self.np_random.choice( + range(limits_h[room_j] + 1, limits_h[room_j + 1])) + room_i += 1 + elif direction is v: + i = self.np_random.choice( + range(limits_v[room_i] + 1, limits_v[room_i + 1])) + j = limits_h[room_j + 1] + room_j += 1 + else: + assert False + self.grid.set(i, j, None) + + self.mission = ( + "avoid the lava and get to the green goal square" + if self.obstacle_type == Lava + else "find the opening and get to the green goal square" + ) + + +class LavaCrossingEnv(CrossingEnv): + def __init__(self): + super().__init__(size=9, num_crossings=1) + + +class LavaCrossingS9N2Env(CrossingEnv): + def __init__(self): + super().__init__(size=9, num_crossings=2) + + +class LavaCrossingS9N3Env(CrossingEnv): + def __init__(self): + super().__init__(size=9, num_crossings=3) + + +class LavaCrossingS11N5Env(CrossingEnv): + def __init__(self): + super().__init__(size=11, num_crossings=5) + + +class SimpleCrossingEnv(CrossingEnv): + def __init__(self): + super().__init__(size=9, num_crossings=1, obstacle_type=Wall) + + +class SimpleCrossingS9N2Env(CrossingEnv): + def __init__(self): + super().__init__(size=9, num_crossings=2, obstacle_type=Wall) + + +class SimpleCrossingS9N3Env(CrossingEnv): + def __init__(self): + super().__init__(size=9, num_crossings=3, obstacle_type=Wall) + + +class SimpleCrossingS11N5Env(CrossingEnv): + def __init__(self): + super().__init__(size=11, num_crossings=5, obstacle_type=Wall) diff --git a/environments/minigrid/src/minigrid/envs/distshift.py b/environments/minigrid/src/minigrid/envs/distshift.py new file mode 100644 index 00000000..31d7c47d --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/distshift.py @@ -0,0 +1,62 @@ +from minigrid.minigrid import * + + +class DistShiftEnv(MiniGridEnv): + """ + Distributional shift environment. + """ + + def __init__( + self, + width=9, + height=7, + agent_start_pos=(1,1), + agent_start_dir=0, + strip2_row=2 + ): + self.agent_start_pos = agent_start_pos + self.agent_start_dir = agent_start_dir + self.goal_pos = (width-2, 1) + self.strip2_row = strip2_row + + super().__init__( + width=width, + height=height, + max_steps=4*width*height, + # Set this to True for maximum speed + see_through_walls=True + ) + + def _gen_grid(self, width, height): + # Create an empty grid + self.grid = Grid(width, height) + + # Generate the surrounding walls + self.grid.wall_rect(0, 0, width, height) + + # Place a goal square in the bottom-right corner + self.put_obj(Goal(), *self.goal_pos) + + # Place the lava rows + for i in range(self.width - 6): + self.grid.set(3+i, 1, Lava()) + self.grid.set(3+i, self.strip2_row, Lava()) + + # Place the agent + if self.agent_start_pos is not None: + self.agent_pos = self.agent_start_pos + self.agent_dir = self.agent_start_dir + else: + self.place_agent() + + self.mission = "get to the green goal square" + + +class DistShift1(DistShiftEnv): + def __init__(self): + super().__init__(strip2_row=2) + + +class DistShift2(DistShiftEnv): + def __init__(self): + super().__init__(strip2_row=5) diff --git a/environments/minigrid/src/minigrid/envs/doorkey.py b/environments/minigrid/src/minigrid/envs/doorkey.py new file mode 100644 index 00000000..f333de2f --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/doorkey.py @@ -0,0 +1,59 @@ +from minigrid.minigrid import * + + +class DoorKeyEnv(MiniGridEnv): + """ + Environment with a door and key, sparse reward + """ + + def __init__(self, size=8): + super().__init__( + grid_size=size, + max_steps=10*size*size + ) + + def _gen_grid(self, width, height): + # Create an empty grid + self.grid = Grid(width, height) + + # Generate the surrounding walls + self.grid.wall_rect(0, 0, width, height) + + # Place a goal in the bottom-right corner + self.put_obj(Goal(), width - 2, height - 2) + + # Create a vertical splitting wall + splitIdx = self._rand_int(2, width-2) + self.grid.vert_wall(splitIdx, 0) + + # Place the agent at a random position and orientation + # on the left side of the splitting wall + self.place_agent(size=(splitIdx, height)) + + # Place a door in the wall + doorIdx = self._rand_int(1, width-2) + self.put_obj(Door('yellow', is_locked=True), splitIdx, doorIdx) + + # Place a yellow key on the left side + self.place_obj( + obj=Key('yellow'), + top=(0, 0), + size=(splitIdx, height) + ) + + self.mission = "use the key to open the door and then get to the goal" + + +class DoorKeyEnv5x5(DoorKeyEnv): + def __init__(self): + super().__init__(size=5) + + +class DoorKeyEnv6x6(DoorKeyEnv): + def __init__(self): + super().__init__(size=6) + + +class DoorKeyEnv16x16(DoorKeyEnv): + def __init__(self): + super().__init__(size=16) diff --git a/environments/minigrid/src/minigrid/envs/dynamicobstacles.py b/environments/minigrid/src/minigrid/envs/dynamicobstacles.py new file mode 100644 index 00000000..429632f7 --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/dynamicobstacles.py @@ -0,0 +1,114 @@ +from minigrid.minigrid import * +from operator import add + + +class DynamicObstaclesEnv(MiniGridEnv): + """ + Single-room square grid environment with moving obstacles + """ + + def __init__( + self, + size=8, + agent_start_pos=(1, 1), + agent_start_dir=0, + n_obstacles=4 + ): + self.agent_start_pos = agent_start_pos + self.agent_start_dir = agent_start_dir + + # Reduce obstacles if there are too many + if n_obstacles <= size/2 + 1: + self.n_obstacles = int(n_obstacles) + else: + self.n_obstacles = int(size/2) + super().__init__( + grid_size=size, + max_steps=4 * size * size, + # Set this to True for maximum speed + see_through_walls=True, + ) + # Allow only 3 actions permitted: left, right, forward + self.action_space = spaces.Discrete(self.actions.forward + 1) + self.reward_range = (-1, 1) + + def _gen_grid(self, width, height): + # Create an empty grid + self.grid = Grid(width, height) + + # Generate the surrounding walls + self.grid.wall_rect(0, 0, width, height) + + # Place a goal square in the bottom-right corner + self.grid.set(width - 2, height - 2, Goal()) + + # Place the agent + if self.agent_start_pos is not None: + self.agent_pos = self.agent_start_pos + self.agent_dir = self.agent_start_dir + else: + self.place_agent() + + # Place obstacles + self.obstacles = [] + for i_obst in range(self.n_obstacles): + self.obstacles.append(Ball()) + self.place_obj(self.obstacles[i_obst], max_tries=100) + + self.mission = "get to the green goal square" + + def step(self, action): + # Invalid action + if action >= self.action_space.n: + action = 0 + + # Check if there is an obstacle in front of the agent + front_cell = self.grid.get(*self.front_pos) + not_clear = front_cell and front_cell.type != 'goal' + + # Update obstacle positions + for i_obst in range(len(self.obstacles)): + old_pos = self.obstacles[i_obst].cur_pos + top = tuple(map(add, old_pos, (-1, -1))) + + try: + self.place_obj(self.obstacles[i_obst], top=top, size=(3,3), max_tries=100) + self.grid.set(*old_pos, None) + except: + pass + + # Update the agent's position/direction + obs, reward, done, info = MiniGridEnv.step(self, action) + + # If the agent tried to walk over an obstacle or wall + if action == self.actions.forward and not_clear: + reward = -1 + done = True + return obs, reward, done, info + + return obs, reward, done, info + + +class DynamicObstaclesEnv5x5(DynamicObstaclesEnv): + def __init__(self): + super().__init__(size=5, n_obstacles=2) + + +class DynamicObstaclesRandomEnv5x5(DynamicObstaclesEnv): + def __init__(self): + super().__init__(size=5, agent_start_pos=None, n_obstacles=2) + + +class DynamicObstaclesEnv6x6(DynamicObstaclesEnv): + def __init__(self): + super().__init__(size=6, n_obstacles=3) + + +class DynamicObstaclesRandomEnv6x6(DynamicObstaclesEnv): + def __init__(self): + super().__init__(size=6, agent_start_pos=None, n_obstacles=3) + + +class DynamicObstaclesEnv16x16(DynamicObstaclesEnv): + def __init__(self): + super().__init__(size=16, n_obstacles=8) diff --git a/environments/minigrid/src/minigrid/envs/empty.py b/environments/minigrid/src/minigrid/envs/empty.py new file mode 100644 index 00000000..33307fab --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/empty.py @@ -0,0 +1,68 @@ +from minigrid.minigrid import * + + +class EmptyEnv(MiniGridEnv): + """ + Empty grid environment, no obstacles, sparse reward + """ + + def __init__( + self, + size=8, + agent_start_pos=(1,1), + agent_start_dir=0, + ): + self.agent_start_pos = agent_start_pos + self.agent_start_dir = agent_start_dir + + super().__init__( + grid_size=size, + max_steps=4*size*size, + # Set this to True for maximum speed + see_through_walls=True + ) + + def _gen_grid(self, width, height): + # Create an empty grid + self.grid = Grid(width, height) + + # Generate the surrounding walls + self.grid.wall_rect(0, 0, width, height) + + # Place a goal square in the bottom-right corner + self.put_obj(Goal(), width - 2, height - 2) + + # Place the agent + if self.agent_start_pos is not None: + self.agent_pos = self.agent_start_pos + self.agent_dir = self.agent_start_dir + else: + self.place_agent() + + self.mission = "get to the green goal square" + + +class EmptyEnv5x5(EmptyEnv): + def __init__(self, **kwargs): + super().__init__(size=5, **kwargs) + + +class EmptyRandomEnv5x5(EmptyEnv): + def __init__(self): + super().__init__(size=5, agent_start_pos=None) + + +class EmptyEnv6x6(EmptyEnv): + def __init__(self, **kwargs): + super().__init__(size=6, **kwargs) + + +class EmptyRandomEnv6x6(EmptyEnv): + def __init__(self): + super().__init__(size=6, agent_start_pos=None) + + +class EmptyEnv16x16(EmptyEnv): + def __init__(self, **kwargs): + super().__init__(size=16, **kwargs) + diff --git a/environments/minigrid/src/minigrid/envs/fetch.py b/environments/minigrid/src/minigrid/envs/fetch.py new file mode 100644 index 00000000..d87490d5 --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/fetch.py @@ -0,0 +1,96 @@ +from minigrid.minigrid import * + + +class FetchEnv(MiniGridEnv): + """ + Environment in which the agent has to fetch a random object + named using English text strings + """ + + def __init__( + self, + size=8, + numObjs=3 + ): + self.numObjs = numObjs + + super().__init__( + grid_size=size, + max_steps=5*size**2, + # Set this to True for maximum speed + see_through_walls=True + ) + + def _gen_grid(self, width, height): + self.grid = Grid(width, height) + + # Generate the surrounding walls + self.grid.horz_wall(0, 0) + self.grid.horz_wall(0, height-1) + self.grid.vert_wall(0, 0) + self.grid.vert_wall(width-1, 0) + + types = ['key', 'ball'] + + objs = [] + + # For each object to be generated + while len(objs) < self.numObjs: + objType = self._rand_elem(types) + objColor = self._rand_elem(COLOR_NAMES) + + if objType == 'key': + obj = Key(objColor) + elif objType == 'ball': + obj = Ball(objColor) + + self.place_obj(obj) + objs.append(obj) + + # Randomize the player start position and orientation + self.place_agent() + + # Choose a random object to be picked up + target = objs[self._rand_int(0, len(objs))] + self.targetType = target.type + self.targetColor = target.color + + descStr = '%s %s' % (self.targetColor, self.targetType) + + # Generate the mission string + idx = self._rand_int(0, 5) + if idx == 0: + self.mission = 'get a %s' % descStr + elif idx == 1: + self.mission = 'go get a %s' % descStr + elif idx == 2: + self.mission = 'fetch a %s' % descStr + elif idx == 3: + self.mission = 'go fetch a %s' % descStr + elif idx == 4: + self.mission = 'you must fetch a %s' % descStr + assert hasattr(self, 'mission') + + def step(self, action): + obs, reward, done, info = MiniGridEnv.step(self, action) + + if self.carrying: + if self.carrying.color == self.targetColor and \ + self.carrying.type == self.targetType: + reward = self._reward() + done = True + else: + reward = 0 + done = True + + return obs, reward, done, info + + +class FetchEnv5x5N2(FetchEnv): + def __init__(self): + super().__init__(size=5, numObjs=2) + + +class FetchEnv6x6N2(FetchEnv): + def __init__(self): + super().__init__(size=6, numObjs=2) diff --git a/environments/minigrid/src/minigrid/envs/fourrooms.py b/environments/minigrid/src/minigrid/envs/fourrooms.py new file mode 100644 index 00000000..628c3d82 --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/fourrooms.py @@ -0,0 +1,69 @@ +from minigrid.minigrid import * + + +class FourRoomsEnv(MiniGridEnv): + """ + Classic 4 rooms gridworld environment. + Can specify agent and goal position, if not it set at random. + """ + + def __init__(self, agent_pos=None, goal_pos=None): + self._agent_default_pos = agent_pos + self._goal_default_pos = goal_pos + super().__init__(grid_size=19, max_steps=100) + + def _gen_grid(self, width, height): + # Create the grid + self.grid = Grid(width, height) + + # Generate the surrounding walls + self.grid.horz_wall(0, 0) + self.grid.horz_wall(0, height - 1) + self.grid.vert_wall(0, 0) + self.grid.vert_wall(width - 1, 0) + + room_w = width // 2 + room_h = height // 2 + + # For each row of rooms + for j in range(0, 2): + + # For each column + for i in range(0, 2): + xL = i * room_w + yT = j * room_h + xR = xL + room_w + yB = yT + room_h + + # Bottom wall and door + if i + 1 < 2: + self.grid.vert_wall(xR, yT, room_h) + pos = (xR, self._rand_int(yT + 1, yB)) + self.grid.set(*pos, None) + + # Bottom wall and door + if j + 1 < 2: + self.grid.horz_wall(xL, yB, room_w) + pos = (self._rand_int(xL + 1, xR), yB) + self.grid.set(*pos, None) + + # Randomize the player start position and orientation + if self._agent_default_pos is not None: + self.agent_pos = self._agent_default_pos + self.grid.set(*self._agent_default_pos, None) + self.agent_dir = self._rand_int(0, 4) # assuming random start direction + else: + self.place_agent() + + if self._goal_default_pos is not None: + goal = Goal() + self.put_obj(goal, *self._goal_default_pos) + goal.init_pos, goal.cur_pos = self._goal_default_pos + else: + self.place_obj(Goal()) + + self.mission = 'Reach the goal' + + def step(self, action): + obs, reward, done, info = MiniGridEnv.step(self, action) + return obs, reward, done, info diff --git a/environments/minigrid/src/minigrid/envs/gotodoor.py b/environments/minigrid/src/minigrid/envs/gotodoor.py new file mode 100644 index 00000000..0247e33b --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/gotodoor.py @@ -0,0 +1,91 @@ +from minigrid.minigrid import * + + +class GoToDoorEnv(MiniGridEnv): + """ + Environment in which the agent is instructed to go to a given object + named using an English text string + """ + + def __init__( + self, + size=5 + ): + assert size >= 5 + + super().__init__( + grid_size=size, + max_steps=5*size**2, + # Set this to True for maximum speed + see_through_walls=True + ) + + def _gen_grid(self, width, height): + # Create the grid + self.grid = Grid(width, height) + + # Randomly vary the room width and height + width = self._rand_int(5, width+1) + height = self._rand_int(5, height+1) + + # Generate the surrounding walls + self.grid.wall_rect(0, 0, width, height) + + # Generate the 4 doors at random positions + doorPos = [] + doorPos.append((self._rand_int(2, width-2), 0)) + doorPos.append((self._rand_int(2, width-2), height-1)) + doorPos.append((0, self._rand_int(2, height-2))) + doorPos.append((width-1, self._rand_int(2, height-2))) + + # Generate the door colors + doorColors = [] + while len(doorColors) < len(doorPos): + color = self._rand_elem(COLOR_NAMES) + if color in doorColors: + continue + doorColors.append(color) + + # Place the doors in the grid + for idx, pos in enumerate(doorPos): + color = doorColors[idx] + self.grid.set(*pos, Door(color)) + + # Randomize the agent start position and orientation + self.place_agent(size=(width, height)) + + # Select a random target door + doorIdx = self._rand_int(0, len(doorPos)) + self.target_pos = doorPos[doorIdx] + self.target_color = doorColors[doorIdx] + + # Generate the mission string + self.mission = 'go to the %s door' % self.target_color + + def step(self, action): + obs, reward, done, info = super().step(action) + + ax, ay = self.agent_pos + tx, ty = self.target_pos + + # Don't let the agent open any of the doors + if action == self.actions.toggle: + done = True + + # Reward performing done action in front of the target door + if action == self.actions.done: + if (ax == tx and abs(ay - ty) == 1) or (ay == ty and abs(ax - tx) == 1): + reward = self._reward() + done = True + + return obs, reward, done, info + + +class GoToDoor8x8Env(GoToDoorEnv): + def __init__(self): + super().__init__(size=8) + + +class GoToDoor6x6Env(GoToDoorEnv): + def __init__(self): + super().__init__(size=6) diff --git a/environments/minigrid/src/minigrid/envs/gotoobject.py b/environments/minigrid/src/minigrid/envs/gotoobject.py new file mode 100644 index 00000000..33701ab3 --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/gotoobject.py @@ -0,0 +1,89 @@ +from minigrid.minigrid import * + + +class GoToObjectEnv(MiniGridEnv): + """ + Environment in which the agent is instructed to go to a given object + named using an English text string + """ + + def __init__( + self, + size=6, + numObjs=2 + ): + self.numObjs = numObjs + + super().__init__( + grid_size=size, + max_steps=5*size**2, + # Set this to True for maximum speed + see_through_walls=True + ) + + def _gen_grid(self, width, height): + self.grid = Grid(width, height) + + # Generate the surrounding walls + self.grid.wall_rect(0, 0, width, height) + + # Types and colors of objects we can generate + types = ['key', 'ball', 'box'] + + objs = [] + objPos = [] + + # Until we have generated all the objects + while len(objs) < self.numObjs: + objType = self._rand_elem(types) + objColor = self._rand_elem(COLOR_NAMES) + + # If this object already exists, try again + if (objType, objColor) in objs: + continue + + if objType == 'key': + obj = Key(objColor) + elif objType == 'ball': + obj = Ball(objColor) + elif objType == 'box': + obj = Box(objColor) + + pos = self.place_obj(obj) + objs.append((objType, objColor)) + objPos.append(pos) + + # Randomize the agent start position and orientation + self.place_agent() + + # Choose a random object to be picked up + objIdx = self._rand_int(0, len(objs)) + self.targetType, self.target_color = objs[objIdx] + self.target_pos = objPos[objIdx] + + descStr = '%s %s' % (self.target_color, self.targetType) + self.mission = 'go to the %s' % descStr + #print(self.mission) + + def step(self, action): + obs, reward, done, info = MiniGridEnv.step(self, action) + + ax, ay = self.agent_pos + tx, ty = self.target_pos + + # Toggle/pickup action terminates the episode + if action == self.actions.toggle: + done = True + + # Reward performing the done action next to the target object + if action == self.actions.done: + if abs(ax - tx) <= 1 and abs(ay - ty) <= 1: + reward = self._reward() + done = True + + return obs, reward, done, info + + +class GotoEnv8x8N2(GoToObjectEnv): + def __init__(self): + super().__init__(size=8, numObjs=2) diff --git a/environments/minigrid/src/minigrid/envs/keycorridor.py b/environments/minigrid/src/minigrid/envs/keycorridor.py new file mode 100644 index 00000000..0c417600 --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/keycorridor.py @@ -0,0 +1,113 @@ +from minigrid.roomgrid import RoomGrid + + +class KeyCorridor(RoomGrid): + """ + A ball is behind a locked door, the key is placed in a + random room. + """ + + def __init__( + self, + num_rows=3, + obj_type="ball", + room_size=6, + seed=None + ): + self.obj_type = obj_type + + super().__init__( + room_size=room_size, + num_rows=num_rows, + max_steps=30*room_size**2, + seed=seed, + ) + + def _gen_grid(self, width, height): + super()._gen_grid(width, height) + + # Connect the middle column rooms into a hallway + for j in range(1, self.num_rows): + self.remove_wall(1, j, 3) + + # Add a locked door on the bottom right + # Add an object behind the locked door + room_idx = self._rand_int(0, self.num_rows) + door, _ = self.add_door(2, room_idx, 2, locked=True) + obj, _ = self.add_object(2, room_idx, kind=self.obj_type) + + # Add a key in a random room on the left side + self.add_object(0, self._rand_int(0, self.num_rows), 'key', door.color) + + # Place the agent in the middle + self.place_agent(1, self.num_rows // 2) + + # Make sure all rooms are accessible + self.connect_all() + + self.obj = obj + self.mission = "pick up the %s %s" % (obj.color, obj.type) + + def step(self, action): + obs, reward, done, info = super().step(action) + + if action == self.actions.pickup: + if self.carrying and self.carrying == self.obj: + reward = self._reward() + done = True + + return obs, reward, done, info + + +class KeyCorridorS3R1(KeyCorridor): + def __init__(self, seed=None): + super().__init__( + room_size=3, + num_rows=1, + seed=seed + ) + + +class KeyCorridorS3R2(KeyCorridor): + def __init__(self, seed=None): + super().__init__( + room_size=3, + num_rows=2, + seed=seed + ) + + +class KeyCorridorS3R3(KeyCorridor): + def __init__(self, seed=None): + super().__init__( + room_size=3, + num_rows=3, + seed=seed + ) + + +class KeyCorridorS4R3(KeyCorridor): + def __init__(self, seed=None): + super().__init__( + room_size=4, + num_rows=3, + seed=seed + ) + + +class KeyCorridorS5R3(KeyCorridor): + def __init__(self, seed=None): + super().__init__( + room_size=5, + num_rows=3, + seed=seed + ) + + +class KeyCorridorS6R3(KeyCorridor): + def __init__(self, seed=None): + super().__init__( + room_size=6, + num_rows=3, + seed=seed + ) diff --git a/environments/minigrid/src/minigrid/envs/lavagap.py b/environments/minigrid/src/minigrid/envs/lavagap.py new file mode 100644 index 00000000..26565b84 --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/lavagap.py @@ -0,0 +1,68 @@ +from minigrid.minigrid import * + + +class LavaGapEnv(MiniGridEnv): + """ + Environment with one wall of lava with a small gap to cross through + This environment is similar to LavaCrossing but simpler in structure. + """ + + def __init__(self, size, obstacle_type=Lava, seed=None): + self.obstacle_type = obstacle_type + super().__init__( + grid_size=size, + max_steps=4*size*size, + # Set this to True for maximum speed + see_through_walls=False, + seed=None + ) + + def _gen_grid(self, width, height): + assert width >= 5 and height >= 5 + + # Create an empty grid + self.grid = Grid(width, height) + + # Generate the surrounding walls + self.grid.wall_rect(0, 0, width, height) + + # Place the agent in the top-left corner + self.agent_pos = (1, 1) + self.agent_dir = 0 + + # Place a goal square in the bottom-right corner + self.goal_pos = np.array((width - 2, height - 2)) + self.put_obj(Goal(), *self.goal_pos) + + # Generate and store random gap position + self.gap_pos = np.array(( + self._rand_int(2, width - 2), + self._rand_int(1, height - 1), + )) + + # Place the obstacle wall + self.grid.vert_wall(self.gap_pos[0], 1, height - 2, self.obstacle_type) + + # Put a hole in the wall + self.grid.set(*self.gap_pos, None) + + self.mission = ( + "avoid the lava and get to the green goal square" + if self.obstacle_type == Lava + else "find the opening and get to the green goal square" + ) + + +class LavaGapS5Env(LavaGapEnv): + def __init__(self): + super().__init__(size=5) + + +class LavaGapS6Env(LavaGapEnv): + def __init__(self): + super().__init__(size=6) + + +class LavaGapS7Env(LavaGapEnv): + def __init__(self): + super().__init__(size=7) diff --git a/environments/minigrid/src/minigrid/envs/lockedroom.py b/environments/minigrid/src/minigrid/envs/lockedroom.py new file mode 100644 index 00000000..535665b3 --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/lockedroom.py @@ -0,0 +1,119 @@ +from gym_minigrid.minigrid import * + + +class Room: + def __init__(self, + top, + size, + doorPos + ): + self.top = top + self.size = size + self.doorPos = doorPos + self.color = None + self.locked = False + + def rand_pos(self, env): + topX, topY = self.top + sizeX, sizeY = self.size + return env._rand_pos( + topX + 1, topX + sizeX - 1, + topY + 1, topY + sizeY - 1 + ) + + +class LockedRoom(MiniGridEnv): + """ + Environment in which the agent is instructed to go to a given object + named using an English text string + """ + + def __init__( + self, + size=19 + ): + super().__init__(grid_size=size, max_steps=10*size) + + def _gen_grid(self, width, height): + # Create the grid + self.grid = Grid(width, height) + + # Generate the surrounding walls + for i in range(0, width): + self.grid.set(i, 0, Wall()) + self.grid.set(i, height-1, Wall()) + for j in range(0, height): + self.grid.set(0, j, Wall()) + self.grid.set(width-1, j, Wall()) + + # Hallway walls + lWallIdx = width // 2 - 2 + rWallIdx = width // 2 + 2 + for j in range(0, height): + self.grid.set(lWallIdx, j, Wall()) + self.grid.set(rWallIdx, j, Wall()) + + self.rooms = [] + + # Room splitting walls + for n in range(0, 3): + j = n * (height // 3) + for i in range(0, lWallIdx): + self.grid.set(i, j, Wall()) + for i in range(rWallIdx, width): + self.grid.set(i, j, Wall()) + + roomW = lWallIdx + 1 + roomH = height // 3 + 1 + self.rooms.append(Room( + (0, j), + (roomW, roomH), + (lWallIdx, j + 3) + )) + self.rooms.append(Room( + (rWallIdx, j), + (roomW, roomH), + (rWallIdx, j + 3) + )) + + # Choose one random room to be locked + lockedRoom = self._rand_elem(self.rooms) + lockedRoom.locked = True + goalPos = lockedRoom.rand_pos(self) + self.grid.set(*goalPos, Goal()) + + # Assign the door colors + colors = set(COLOR_NAMES) + for room in self.rooms: + color = self._rand_elem(sorted(colors)) + colors.remove(color) + room.color = color + if room.locked: + self.grid.set(*room.doorPos, Door(color, is_locked=True)) + else: + self.grid.set(*room.doorPos, Door(color)) + + # Select a random room to contain the key + while True: + keyRoom = self._rand_elem(self.rooms) + if keyRoom != lockedRoom: + break + keyPos = keyRoom.rand_pos(self) + self.grid.set(*keyPos, Key(lockedRoom.color)) + + # Randomize the player start position and orientation + self.agent_pos = self.place_agent( + top=(lWallIdx, 0), + size=(rWallIdx-lWallIdx, height) + ) + + # Generate the mission string + self.mission = ( + 'get the %s key from the %s room, ' + 'unlock the %s door and ' + 'go to the goal' + ) % (lockedRoom.color, keyRoom.color, lockedRoom.color) + + def step(self, action): + obs, reward, done, info = MiniGridEnv.step(self, action) + return obs, reward, done, info diff --git a/environments/minigrid/src/minigrid/envs/memory.py b/environments/minigrid/src/minigrid/envs/memory.py new file mode 100644 index 00000000..ee7d3902 --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/memory.py @@ -0,0 +1,130 @@ +from minigrid.minigrid import * + + +class MemoryEnv(MiniGridEnv): + """ + This environment is a memory test. The agent starts in a small room + where it sees an object. It then has to go through a narrow hallway + which ends in a split. At each end of the split there is an object, + one of which is the same as the object in the starting room. The + agent has to remember the initial object, and go to the matching + object at split. + """ + + def __init__( + self, + seed, + size=8, + random_length=False, + ): + self.random_length = random_length + super().__init__( + seed=seed, + grid_size=size, + max_steps=5*size**2, + # Set this to True for maximum speed + see_through_walls=False, + ) + + def _gen_grid(self, width, height): + self.grid = Grid(width, height) + + # Generate the surrounding walls + self.grid.horz_wall(0, 0) + self.grid.horz_wall(0, height-1) + self.grid.vert_wall(0, 0) + self.grid.vert_wall(width - 1, 0) + + assert height % 2 == 1 + upper_room_wall = height // 2 - 2 + lower_room_wall = height // 2 + 2 + if self.random_length: + hallway_end = self._rand_int(4, width - 2) + else: + hallway_end = width - 3 + + # Start room + for i in range(1, 5): + self.grid.set(i, upper_room_wall, Wall()) + self.grid.set(i, lower_room_wall, Wall()) + self.grid.set(4, upper_room_wall + 1, Wall()) + self.grid.set(4, lower_room_wall - 1, Wall()) + + # Horizontal hallway + for i in range(5, hallway_end): + self.grid.set(i, upper_room_wall + 1, Wall()) + self.grid.set(i, lower_room_wall - 1, Wall()) + + # Vertical hallway + for j in range(0, height): + if j != height // 2: + self.grid.set(hallway_end, j, Wall()) + self.grid.set(hallway_end + 2, j, Wall()) + + # Fix the player's start position and orientation + self.agent_pos = (self._rand_int(1, hallway_end + 1), height // 2) + self.agent_dir = 0 + + # Place objects + start_room_obj = self._rand_elem([Key, Ball]) + self.grid.set(1, height // 2 - 1, start_room_obj('green')) + + other_objs = self._rand_elem([[Ball, Key], [Key, Ball]]) + pos0 = (hallway_end + 1, height // 2 - 2) + pos1 = (hallway_end + 1, height // 2 + 2) + self.grid.set(*pos0, other_objs[0]('green')) + self.grid.set(*pos1, other_objs[1]('green')) + + # Choose the target objects + if start_room_obj == other_objs[0]: + self.success_pos = (pos0[0], pos0[1] + 1) + self.failure_pos = (pos1[0], pos1[1] - 1) + else: + self.success_pos = (pos1[0], pos1[1] - 1) + self.failure_pos = (pos0[0], pos0[1] + 1) + + self.mission = 'go to the matching object at the end of the hallway' + + def step(self, action): + if action == MiniGridEnv.Actions.pickup: + action = MiniGridEnv.Actions.toggle + obs, reward, done, info = MiniGridEnv.step(self, action) + + if tuple(self.agent_pos) == self.success_pos: + reward = self._reward() + done = True + if tuple(self.agent_pos) == self.failure_pos: + reward = 0 + done = True + + return obs, reward, done, info + + +class MemoryS17Random(MemoryEnv): + def __init__(self, seed=None): + super().__init__(seed=seed, size=17, random_length=True) + + +class MemoryS13Random(MemoryEnv): + def __init__(self, seed=None): + super().__init__(seed=seed, size=13, random_length=True) + + +class MemoryS13(MemoryEnv): + def __init__(self, seed=None): + super().__init__(seed=seed, size=13) + + +class MemoryS11(MemoryEnv): + def __init__(self, seed=None): + super().__init__(seed=seed, size=11) + + +class MemoryS9(MemoryEnv): + def __init__(self, seed=None): + super().__init__(seed=seed, size=9) + + +class MemoryS7(MemoryEnv): + def __init__(self, seed=None): + super().__init__(seed=seed, size=7) diff --git a/environments/minigrid/src/minigrid/envs/multiroom.py b/environments/minigrid/src/minigrid/envs/multiroom.py new file mode 100644 index 00000000..94f9ce62 --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/multiroom.py @@ -0,0 +1,264 @@ +from minigrid.minigrid import * + + +class Room: + def __init__(self, + top, + size, + entryDoorPos, + exitDoorPos + ): + self.top = top + self.size = size + self.entryDoorPos = entryDoorPos + self.exitDoorPos = exitDoorPos + + +class MultiRoomEnv(MiniGridEnv): + """ + Environment with multiple rooms (subgoals) + """ + + def __init__(self, + minNumRooms, + maxNumRooms, + maxRoomSize=10 + ): + assert minNumRooms > 0 + assert maxNumRooms >= minNumRooms + assert maxRoomSize >= 4 + + self.minNumRooms = minNumRooms + self.maxNumRooms = maxNumRooms + self.maxRoomSize = maxRoomSize + + self.rooms = [] + + super(MultiRoomEnv, self).__init__( + grid_size=25, + max_steps=self.maxNumRooms * 20 + ) + + def _gen_grid(self, width, height): + roomList = [] + + # Choose a random number of rooms to generate + numRooms = self._rand_int(self.minNumRooms, self.maxNumRooms+1) + + while len(roomList) < numRooms: + curRoomList = [] + + entryDoorPos = ( + self._rand_int(0, width - 2), + self._rand_int(0, width - 2) + ) + + # Recursively place the rooms + self._placeRoom( + numRooms, + roomList=curRoomList, + minSz=4, + maxSz=self.maxRoomSize, + entryDoorWall=2, + entryDoorPos=entryDoorPos + ) + + if len(curRoomList) > len(roomList): + roomList = curRoomList + + # Store the list of rooms in this environment + assert len(roomList) > 0 + self.rooms = roomList + + # Create the grid + self.grid = Grid(width, height) + wall = Wall() + + prevDoorColor = None + + # For each room + for idx, room in enumerate(roomList): + + topX, topY = room.top + sizeX, sizeY = room.size + + # Draw the top and bottom walls + for i in range(0, sizeX): + self.grid.set(topX + i, topY, wall) + self.grid.set(topX + i, topY + sizeY - 1, wall) + + # Draw the left and right walls + for j in range(0, sizeY): + self.grid.set(topX, topY + j, wall) + self.grid.set(topX + sizeX - 1, topY + j, wall) + + # If this isn't the first room, place the entry door + if idx > 0: + # Pick a door color different from the previous one + doorColors = set(COLOR_NAMES) + if prevDoorColor: + doorColors.remove(prevDoorColor) + # Note: the use of sorting here guarantees determinism, + # This is needed because Python's set is not deterministic + doorColor = self._rand_elem(sorted(doorColors)) + + entryDoor = Door(doorColor) + self.grid.set(*room.entryDoorPos, entryDoor) + prevDoorColor = doorColor + + prevRoom = roomList[idx-1] + prevRoom.exitDoorPos = room.entryDoorPos + + # Randomize the starting agent position and direction + self.place_agent(roomList[0].top, roomList[0].size) + + # Place the final goal in the last room + self.goal_pos = self.place_obj(Goal(), roomList[-1].top, roomList[-1].size) + + self.mission = 'traverse the rooms to get to the goal' + + def _placeRoom( + self, + numLeft, + roomList, + minSz, + maxSz, + entryDoorWall, + entryDoorPos + ): + # Choose the room size randomly + sizeX = self._rand_int(minSz, maxSz+1) + sizeY = self._rand_int(minSz, maxSz+1) + + # The first room will be at the door position + if len(roomList) == 0: + topX, topY = entryDoorPos + # Entry on the right + elif entryDoorWall == 0: + topX = entryDoorPos[0] - sizeX + 1 + y = entryDoorPos[1] + topY = self._rand_int(y - sizeY + 2, y) + # Entry wall on the south + elif entryDoorWall == 1: + x = entryDoorPos[0] + topX = self._rand_int(x - sizeX + 2, x) + topY = entryDoorPos[1] - sizeY + 1 + # Entry wall on the left + elif entryDoorWall == 2: + topX = entryDoorPos[0] + y = entryDoorPos[1] + topY = self._rand_int(y - sizeY + 2, y) + # Entry wall on the top + elif entryDoorWall == 3: + x = entryDoorPos[0] + topX = self._rand_int(x - sizeX + 2, x) + topY = entryDoorPos[1] + else: + assert False, entryDoorWall + + # If the room is out of the grid, can't place a room here + if topX < 0 or topY < 0: + return False + if topX + sizeX > self.width or topY + sizeY >= self.height: + return False + + # If the room intersects with previous rooms, can't place it here + for room in roomList[:-1]: + nonOverlap = \ + topX + sizeX < room.top[0] or \ + room.top[0] + room.size[0] <= topX or \ + topY + sizeY < room.top[1] or \ + room.top[1] + room.size[1] <= topY + + if not nonOverlap: + return False + + # Add this room to the list + roomList.append(Room( + (topX, topY), + (sizeX, sizeY), + entryDoorPos, + None + )) + + # If this was the last room, stop + if numLeft == 1: + return True + + # Try placing the next room + for i in range(0, 8): + + # Pick which wall to place the out door on + wallSet = set((0, 1, 2, 3)) + wallSet.remove(entryDoorWall) + exitDoorWall = self._rand_elem(sorted(wallSet)) + nextEntryWall = (exitDoorWall + 2) % 4 + + # Pick the exit door position + # Exit on right wall + if exitDoorWall == 0: + exitDoorPos = ( + topX + sizeX - 1, + topY + self._rand_int(1, sizeY - 1) + ) + # Exit on south wall + elif exitDoorWall == 1: + exitDoorPos = ( + topX + self._rand_int(1, sizeX - 1), + topY + sizeY - 1 + ) + # Exit on left wall + elif exitDoorWall == 2: + exitDoorPos = ( + topX, + topY + self._rand_int(1, sizeY - 1) + ) + # Exit on north wall + elif exitDoorWall == 3: + exitDoorPos = ( + topX + self._rand_int(1, sizeX - 1), + topY + ) + else: + assert False + + # Recursively create the other rooms + success = self._placeRoom( + numLeft - 1, + roomList=roomList, + minSz=minSz, + maxSz=maxSz, + entryDoorWall=nextEntryWall, + entryDoorPos=exitDoorPos + ) + + if success: + break + + return True + + +class MultiRoomEnvN2S4(MultiRoomEnv): + def __init__(self): + super().__init__( + minNumRooms=2, + maxNumRooms=2, + maxRoomSize=4 + ) + + +class MultiRoomEnvN4S5(MultiRoomEnv): + def __init__(self): + super().__init__( + minNumRooms=4, + maxNumRooms=4, + maxRoomSize=5 + ) + + +class MultiRoomEnvN6(MultiRoomEnv): + def __init__(self): + super().__init__( + minNumRooms=6, + maxNumRooms=6 + ) diff --git a/environments/minigrid/src/minigrid/envs/obstructedmaze.py b/environments/minigrid/src/minigrid/envs/obstructedmaze.py new file mode 100644 index 00000000..fec12421 --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/obstructedmaze.py @@ -0,0 +1,187 @@ +from minigrid.minigrid import * +from minigrid.roomgrid import RoomGrid + + +class ObstructedMazeEnv(RoomGrid): + """ + A blue ball is hidden in the maze. Doors may be locked, + doors may be obstructed by a ball and keys may be hidden in boxes. + """ + + def __init__(self, + num_rows, + num_cols, + num_rooms_visited, + seed=None + ): + room_size = 6 + max_steps = 4 * num_rooms_visited * room_size ** 2 + + super().__init__( + room_size=room_size, + num_rows=num_rows, + num_cols=num_cols, + max_steps=max_steps, + seed=seed + ) + + def _gen_grid(self, width, height): + super()._gen_grid(width, height) + + # Define all possible colors for doors + self.door_colors = self._rand_subset(COLOR_NAMES, len(COLOR_NAMES)) + # Define the color of the ball to pick up + self.ball_to_find_color = COLOR_NAMES[0] + # Define the color of the balls that obstruct doors + self.blocking_ball_color = COLOR_NAMES[1] + # Define the color of boxes in which keys are hidden + self.box_color = COLOR_NAMES[2] + + self.mission = "pick up the %s ball" % self.ball_to_find_color + + def step(self, action): + obs, reward, done, info = super().step(action) + + if action == self.actions.pickup: + if self.carrying and self.carrying == self.obj: + reward = self._reward() + done = True + + return obs, reward, done, info + + def add_door(self, i, j, door_idx=0, color=None, locked=False, key_in_box=False, blocked=False): + """ + Add a door. If the door must be locked, it also adds the key. + If the key must be hidden, it is put in a box. If the door must + be obstructed, it adds a ball in front of the door. + """ + + door, door_pos = super().add_door(i, j, door_idx, color, locked=locked) + + if blocked: + vec = DIR_TO_VEC[door_idx] + blocking_ball = Ball(self.blocking_ball_color) if blocked else None + self.grid.set(door_pos[0] - vec[0], door_pos[1] - vec[1], blocking_ball) + + if locked: + obj = Key(door.color) + if key_in_box: + box = Box(self.box_color) if key_in_box else None + box.contains = obj + obj = box + self.place_in_room(i, j, obj) + + return door, door_pos + + +class ObstructedMaze1Dlhb(ObstructedMazeEnv): + """ + A blue ball is hidden in a 2x1 maze. A locked door separates + rooms. Doors are obstructed by a ball and keys are hidden in boxes. + """ + + def __init__(self, key_in_box=True, blocked=True, seed=None): + self.key_in_box = key_in_box + self.blocked = blocked + + super().__init__( + num_rows=1, + num_cols=2, + num_rooms_visited=2, + seed=seed + ) + + def _gen_grid(self, width, height): + super()._gen_grid(width, height) + + self.add_door(0, 0, door_idx=0, color=self.door_colors[0], + locked=True, + key_in_box=self.key_in_box, + blocked=self.blocked) + + self.obj, _ = self.add_object(1, 0, "ball", color=self.ball_to_find_color) + self.place_agent(0, 0) + + +class ObstructedMaze1Dl(ObstructedMaze1Dlhb): + def __init__(self, seed=None): + super().__init__(False, False, seed) + + +class ObstructedMaze1Dlh(ObstructedMaze1Dlhb): + def __init__(self, seed=None): + super().__init__(True, False, seed) + + +class ObstructedMazeFull(ObstructedMazeEnv): + """ + A blue ball is hidden in one of the 4 corners of a 3x3 maze. Doors + are locked, doors are obstructed by a ball and keys are hidden in + boxes. + """ + + def __init__(self, agent_room=(1, 1), key_in_box=True, blocked=True, + num_quarters=4, num_rooms_visited=25, seed=None): + self.agent_room = agent_room + self.key_in_box = key_in_box + self.blocked = blocked + self.num_quarters = num_quarters + + super().__init__( + num_rows=3, + num_cols=3, + num_rooms_visited=num_rooms_visited, + seed=seed + ) + + def _gen_grid(self, width, height): + super()._gen_grid(width, height) + + middle_room = (1, 1) + # Define positions of "side rooms" i.e. rooms that are neither + # corners nor the center. + side_rooms = [(2, 1), (1, 2), (0, 1), (1, 0)][:self.num_quarters] + for i in range(len(side_rooms)): + side_room = side_rooms[i] + + # Add a door between the center room and the side room + self.add_door(*middle_room, door_idx=i, color=self.door_colors[i], locked=False) + + for k in [-1, 1]: + # Add a door to each side of the side room + self.add_door(*side_room, locked=True, + door_idx=(i + k) % 4, + color=self.door_colors[(i + k) % len(self.door_colors)], + key_in_box=self.key_in_box, + blocked=self.blocked) + + corners = [(2, 0), (2, 2), (0, 2), (0, 0)][:self.num_quarters] + ball_room = self._rand_elem(corners) + + self.obj, _ = self.add_object(*ball_room, "ball", color=self.ball_to_find_color) + self.place_agent(*self.agent_room) + + +class ObstructedMaze2Dl(ObstructedMazeFull): + def __init__(self, seed=None): + super().__init__((2, 1), False, False, 1, 4, seed) + + +class ObstructedMaze2Dlh(ObstructedMazeFull): + def __init__(self, seed=None): + super().__init__((2, 1), True, False, 1, 4, seed) + + +class ObstructedMaze2Dlhb(ObstructedMazeFull): + def __init__(self, seed=None): + super().__init__((2, 1), True, True, 1, 4, seed) + + +class ObstructedMaze1Q(ObstructedMazeFull): + def __init__(self, seed=None): + super().__init__((1, 1), True, True, 1, 5, seed) + + +class ObstructedMaze2Q(ObstructedMazeFull): + def __init__(self, seed=None): + super().__init__((1, 1), True, True, 2, 11, seed) diff --git a/environments/minigrid/src/minigrid/envs/playground_v0.py b/environments/minigrid/src/minigrid/envs/playground_v0.py new file mode 100644 index 00000000..20e2da03 --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/playground_v0.py @@ -0,0 +1,71 @@ +from minigrid.minigrid import * + + +class PlaygroundV0(MiniGridEnv): + """ + Environment with multiple rooms and random objects. + This environment has no specific goals or rewards. + """ + + def __init__(self): + super().__init__(grid_size=19, max_steps=100) + + def _gen_grid(self, width, height): + # Create the grid + self.grid = Grid(width, height) + + # Generate the surrounding walls + self.grid.horz_wall(0, 0) + self.grid.horz_wall(0, height-1) + self.grid.vert_wall(0, 0) + self.grid.vert_wall(width-1, 0) + + roomW = width // 3 + roomH = height // 3 + + # For each row of rooms + for j in range(0, 3): + + # For each column + for i in range(0, 3): + xL = i * roomW + yT = j * roomH + xR = xL + roomW + yB = yT + roomH + + # Bottom wall and door + if i+1 < 3: + self.grid.vert_wall(xR, yT, roomH) + pos = (xR, self._rand_int(yT+1, yB-1)) + color = self._rand_elem(COLOR_NAMES) + self.grid.set(*pos, Door(color)) + + # Bottom wall and door + if j+1 < 3: + self.grid.horz_wall(xL, yB, roomW) + pos = (self._rand_int(xL+1, xR-1), yB) + color = self._rand_elem(COLOR_NAMES) + self.grid.set(*pos, Door(color)) + + # Randomize the player start position and orientation + self.place_agent() + + # Place random objects in the world + types = ['key', 'ball', 'box'] + for i in range(0, 12): + objType = self._rand_elem(types) + objColor = self._rand_elem(COLOR_NAMES) + if objType == 'key': + obj = Key(objColor) + elif objType == 'ball': + obj = Ball(objColor) + elif objType == 'box': + obj = Box(objColor) + self.place_obj(obj) + + # No explicit mission in this environment + self.mission = '' + + def step(self, action): + obs, reward, done, info = MiniGridEnv.step(self, action) + return obs, reward, done, info diff --git a/environments/minigrid/src/minigrid/envs/putnear.py b/environments/minigrid/src/minigrid/envs/putnear.py new file mode 100644 index 00000000..bcd96f62 --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/putnear.py @@ -0,0 +1,117 @@ +from minigrid.minigrid import * + + +class PutNearEnv(MiniGridEnv): + """ + Environment in which the agent is instructed to place an object near + another object through a natural language string. + """ + + def __init__( + self, + size=6, + numObjs=2 + ): + self.numObjs = numObjs + + super().__init__( + grid_size=size, + max_steps=5*size, + # Set this to True for maximum speed + see_through_walls=True + ) + + def _gen_grid(self, width, height): + self.grid = Grid(width, height) + + # Generate the surrounding walls + self.grid.horz_wall(0, 0) + self.grid.horz_wall(0, height-1) + self.grid.vert_wall(0, 0) + self.grid.vert_wall(width-1, 0) + + # Types and colors of objects we can generate + types = ['key', 'ball', 'box'] + + objs = [] + objPos = [] + + def near_obj(env, p1): + for p2 in objPos: + dx = p1[0] - p2[0] + dy = p1[1] - p2[1] + if abs(dx) <= 1 and abs(dy) <= 1: + return True + return False + + # Until we have generated all the objects + while len(objs) < self.numObjs: + objType = self._rand_elem(types) + objColor = self._rand_elem(COLOR_NAMES) + + # If this object already exists, try again + if (objType, objColor) in objs: + continue + + if objType == 'key': + obj = Key(objColor) + elif objType == 'ball': + obj = Ball(objColor) + elif objType == 'box': + obj = Box(objColor) + + pos = self.place_obj(obj, reject_fn=near_obj) + + objs.append((objType, objColor)) + objPos.append(pos) + + # Randomize the agent start position and orientation + self.place_agent() + + # Choose a random object to be moved + objIdx = self._rand_int(0, len(objs)) + self.move_type, self.moveColor = objs[objIdx] + self.move_pos = objPos[objIdx] + + # Choose a target object (to put the first object next to) + while True: + targetIdx = self._rand_int(0, len(objs)) + if targetIdx != objIdx: + break + self.target_type, self.target_color = objs[targetIdx] + self.target_pos = objPos[targetIdx] + + self.mission = 'put the %s %s near the %s %s' % ( + self.moveColor, + self.move_type, + self.target_color, + self.target_type + ) + + def step(self, action): + preCarrying = self.carrying + + obs, reward, done, info = super().step(action) + + u, v = self.dir_vec + ox, oy = (self.agent_pos[0] + u, self.agent_pos[1] + v) + tx, ty = self.target_pos + + # If we picked up the wrong object, terminate the episode + if action == self.actions.pickup and self.carrying: + if self.carrying.type != self.move_type or self.carrying.color != self.moveColor: + done = True + + # If successfully dropping an object near the target + if action == self.actions.drop and preCarrying: + if self.grid.get(ox, oy) is preCarrying: + if abs(ox - tx) <= 1 and abs(oy - ty) <= 1: + reward = self._reward() + done = True + + return obs, reward, done, info + + +class PutNear8x8N3(PutNearEnv): + def __init__(self): + super().__init__(size=8, numObjs=3) diff --git a/environments/minigrid/src/minigrid/envs/redbluedoors.py b/environments/minigrid/src/minigrid/envs/redbluedoors.py new file mode 100644 index 00000000..e847528a --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/redbluedoors.py @@ -0,0 +1,71 @@ +from minigrid.minigrid import * + + +class RedBlueDoorEnv(MiniGridEnv): + """ + Single room with red and blue doors on opposite sides. + The red door must be opened before the blue door to + obtain a reward. + """ + + def __init__(self, size=8): + self.size = size + + super().__init__( + width=2*size, + height=size, + max_steps=20*size*size + ) + + def _gen_grid(self, width, height): + # Create an empty grid + self.grid = Grid(width, height) + + # Generate the grid walls + self.grid.wall_rect(0, 0, 2*self.size, self.size) + self.grid.wall_rect(self.size//2, 0, self.size, self.size) + + # Place the agent in the top-left corner + self.place_agent(top=(self.size//2, 0), size=(self.size, self.size)) + + # Add a red door at a random position in the left wall + pos = self._rand_int(1, self.size - 1) + self.red_door = Door("red") + self.grid.set(self.size//2, pos, self.red_door) + + # Add a blue door at a random position in the right wall + pos = self._rand_int(1, self.size - 1) + self.blue_door = Door("blue") + self.grid.set(self.size//2 + self.size - 1, pos, self.blue_door) + + # Generate the mission string + self.mission = "open the red door then the blue door" + + def step(self, action): + red_door_opened_before = self.red_door.is_open + blue_door_opened_before = self.blue_door.is_open + + obs, reward, done, info = MiniGridEnv.step(self, action) + + red_door_opened_after = self.red_door.is_open + blue_door_opened_after = self.blue_door.is_open + + if blue_door_opened_after: + if red_door_opened_before: + reward = self._reward() + done = True + else: + reward = 0 + done = True + + elif red_door_opened_after: + if blue_door_opened_before: + reward = 0 + done = True + + return obs, reward, done, info + + +class RedBlueDoorEnv6x6(RedBlueDoorEnv): + def __init__(self): + super().__init__(size=6) diff --git a/environments/minigrid/src/minigrid/envs/unlock.py b/environments/minigrid/src/minigrid/envs/unlock.py new file mode 100644 index 00000000..f6b62d4e --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/unlock.py @@ -0,0 +1,40 @@ +from gym_minigrid.roomgrid import RoomGrid + + +class Unlock(RoomGrid): + """ + Unlock a door + """ + + def __init__(self, seed=None): + room_size = 6 + super().__init__( + num_rows=1, + num_cols=2, + room_size=room_size, + max_steps=8*room_size**2, + seed=seed + ) + + def _gen_grid(self, width, height): + super()._gen_grid(width, height) + + # Make sure the two rooms are directly connected by a locked door + door, _ = self.add_door(0, 0, 0, locked=True) + # Add a key to unlock the door + self.add_object(0, 0, 'key', door.color) + + self.place_agent(0, 0) + + self.door = door + self.mission = "open the door" + + def step(self, action): + obs, reward, done, info = super().step(action) + + if action == self.actions.toggle: + if self.door.is_open: + reward = self._reward() + done = True + + return obs, reward, done, info diff --git a/environments/minigrid/src/minigrid/envs/unlockpickup.py b/environments/minigrid/src/minigrid/envs/unlockpickup.py new file mode 100644 index 00000000..8ca7a3bb --- /dev/null +++ b/environments/minigrid/src/minigrid/envs/unlockpickup.py @@ -0,0 +1,42 @@ +from gym_minigrid.roomgrid import RoomGrid + + +class UnlockPickup(RoomGrid): + """ + Unlock a door, then pick up a box in another room + """ + + def __init__(self, seed=None): + room_size = 6 + super().__init__( + num_rows=1, + num_cols=2, + room_size=room_size, + max_steps=8*room_size**2, + seed=seed + ) + + def _gen_grid(self, width, height): + super()._gen_grid(width, height) + + # Add a box to the room on the right + obj, _ = self.add_object(1, 0, kind="box") + # Make sure the two rooms are directly connected by a locked door + door, _ = self.add_door(0, 0, 0, locked=True) + # Add a key to unlock the door + self.add_object(0, 0, 'key', door.color) + + self.place_agent(0, 0) + + self.obj = obj + self.mission = "pick up the %s %s" % (obj.color, obj.type) + + def step(self, action): + obs, reward, done, info = super().step(action) + + if action == self.actions.pickup: + if self.carrying and self.carrying == self.obj: + reward = self._reward() + done = True + + return obs, reward, done, info diff --git a/environments/minigrid/src/minigrid/minigrid.py b/environments/minigrid/src/minigrid/minigrid.py new file mode 100644 index 00000000..ec0e6e71 --- /dev/null +++ b/environments/minigrid/src/minigrid/minigrid.py @@ -0,0 +1,1329 @@ +import math +import hashlib +from enum import IntEnum + +import numpy as np + +import gym +from gym import spaces +from gym.utils import seeding + +from rendering import * +import simenv as sm + +# Size in pixels of a tile in the full-scale human view + + +TILE_PIXELS = 32 + +# Map of color names to RGB values +COLORS = { + 'red': np.array([255, 0, 0]), + 'green': np.array([0, 255, 0]), + 'blue': np.array([0, 0, 255]), + 'purple': np.array([112, 39, 195]), + 'yellow': np.array([255, 255, 0]), + 'grey': np.array([100, 100, 100]) +} + +COLOR_NAMES = sorted(list(COLORS.keys())) + +# Used to map colors to integers +COLOR_TO_IDX = { + 'red': 0, + 'green': 1, + 'blue': 2, + 'purple': 3, + 'yellow': 4, + 'grey': 5 +} + +IDX_TO_COLOR = dict(zip(COLOR_TO_IDX.values(), COLOR_TO_IDX.keys())) + +# Map of object type to integers +OBJECT_TO_IDX = { + 'unseen': 0, + 'empty': 1, + 'wall': 2, + 'floor': 3, + 'door': 4, + 'key': 5, + 'ball': 6, + 'box': 7, + 'goal': 8, + 'lava': 9, + 'agent': 10, +} + +IDX_TO_OBJECT = dict(zip(OBJECT_TO_IDX.values(), OBJECT_TO_IDX.keys())) + +# Map of state names to integers +STATE_TO_IDX = { + 'open': 0, + 'closed': 1, + 'locked': 2, +} + +# Map of agent direction indices to vectors +DIR_TO_VEC = [ + # Pointing right (positive X) + np.array((1, 0)), + # Down (positive Y) + np.array((0, 1)), + # Pointing left (negative X) + np.array((-1, 0)), + # Up (negative Y) + np.array((0, -1)), +] + + +class WorldObj: + """ + Base class for grid world objects + """ + + def __init__(self, type, color): + assert type in OBJECT_TO_IDX, type + assert color in COLOR_TO_IDX, color + self.type = type + self.color = color + self.contains = None + + # Initial position of the object + self.init_pos = None + + # Current position of the object + self.cur_pos = None + + def can_overlap(self): + """Can the agent overlap with this?""" + return False + + def can_pickup(self): + """Can the agent pick this up?""" + return False + + def can_contain(self): + """Can this contain another object?""" + return False + + def see_behind(self): + """Can the agent see behind this object?""" + return True + + def toggle(self, env, pos): + """Method to trigger/toggle an action this object performs""" + return False + + def encode(self): + """Encode the a description of this object as a 3-tuple of integers""" + return OBJECT_TO_IDX[self.type], COLOR_TO_IDX[self.color], 0 + + @staticmethod + def decode(type_idx, color_idx, state): + """Create an object from a 3-tuple state description""" + + obj_type = IDX_TO_OBJECT[type_idx] + color = IDX_TO_COLOR[color_idx] + + if obj_type == 'empty' or obj_type == 'unseen': + return None + + # State, 0: open, 1: closed, 2: locked + is_open = state == 0 + is_locked = state == 2 + + if obj_type == 'wall': + v = Wall(color) + elif obj_type == 'floor': + v = Floor(color) + elif obj_type == 'ball': + v = Ball(color) + elif obj_type == 'key': + v = Key(color) + elif obj_type == 'box': + v = Box(color) + elif obj_type == 'door': + v = Door(color, is_open, is_locked) + elif obj_type == 'goal': + v = Goal() + elif obj_type == 'lava': + v = Lava() + else: + assert False, "unknown object type in decode '%s'" % obj_type + + return v + + def render(self, r): + """Draw this object with the given renderer""" + raise NotImplementedError + + +class Goal(WorldObj): + def __init__(self): + super().__init__('goal', 'green') + + def can_overlap(self): + return True + + def render(self, img): + fill_coords(img, point_in_rect(0, 1, 0, 1), COLORS[self.color]) + sm.Rectangle([], color=COLORS[self.color]).draw(img) + + +class Floor(WorldObj): + """ + Colored floor tile the agent can walk over + """ + + def __init__(self, color='blue'): + super().__init__('floor', color) + + def can_overlap(self): + return True + + def render(self, img): + # Give the floor a pale color + color = COLORS[self.color] / 2 + fill_coords(img, point_in_rect(0.031, 1, 0.031, 1), color) + + +class Lava(WorldObj): + def __init__(self): + super().__init__('lava', 'red') + + def can_overlap(self): + return True + + def render(self, img): + c = (255, 128, 0) + + # Background color + fill_coords(img, point_in_rect(0, 1, 0, 1), c) + + # Little waves + for i in range(3): + ylo = 0.3 + 0.2 * i + yhi = 0.4 + 0.2 * i + fill_coords(img, point_in_line(0.1, ylo, 0.3, yhi, r=0.03), (0, 0, 0)) + fill_coords(img, point_in_line(0.3, yhi, 0.5, ylo, r=0.03), (0, 0, 0)) + fill_coords(img, point_in_line(0.5, ylo, 0.7, yhi, r=0.03), (0, 0, 0)) + fill_coords(img, point_in_line(0.7, yhi, 0.9, ylo, r=0.03), (0, 0, 0)) + + +class Wall(WorldObj): + def __init__(self, color='grey'): + super().__init__('wall', color) + + def see_behind(self): + return False + + def render(self, img): + fill_coords(img, point_in_rect(0, 1, 0, 1), COLORS[self.color]) + + +class Door(WorldObj): + def __init__(self, color, is_open=False, is_locked=False): + super().__init__('door', color) + self.is_open = is_open + self.is_locked = is_locked + + def can_overlap(self): + """The agent can only walk over this cell when the door is open""" + return self.is_open + + def see_behind(self): + return self.is_open + + def toggle(self, env, pos): + # If the player has the right key to open the door + if self.is_locked: + if isinstance(env.carrying, Key) and env.carrying.color == self.color: + self.is_locked = False + self.is_open = True + return True + return False + + self.is_open = not self.is_open + return True + + def encode(self): + """Encode the a description of this object as a 3-tuple of integers""" + + # State, 0: open, 1: closed, 2: locked + state = None + if self.is_open: + state = 0 + elif self.is_locked: + state = 2 + elif not self.is_open: + state = 1 + + return OBJECT_TO_IDX[self.type], COLOR_TO_IDX[self.color], state + + def render(self, img): + c = COLORS[self.color] + + if self.is_open: + fill_coords(img, point_in_rect(0.88, 1.00, 0.00, 1.00), c) + fill_coords(img, point_in_rect(0.92, 0.96, 0.04, 0.96), (0, 0, 0)) + return + + # Door frame and door + if self.is_locked: + fill_coords(img, point_in_rect(0.00, 1.00, 0.00, 1.00), c) + fill_coords(img, point_in_rect(0.06, 0.94, 0.06, 0.94), 0.45 * np.array(c)) + + # Draw key slot + fill_coords(img, point_in_rect(0.52, 0.75, 0.50, 0.56), c) + else: + fill_coords(img, point_in_rect(0.00, 1.00, 0.00, 1.00), c) + fill_coords(img, point_in_rect(0.04, 0.96, 0.04, 0.96), (0, 0, 0)) + fill_coords(img, point_in_rect(0.08, 0.92, 0.08, 0.92), c) + fill_coords(img, point_in_rect(0.12, 0.88, 0.12, 0.88), (0, 0, 0)) + + # Draw door handle + fill_coords(img, point_in_circle(cx=0.75, cy=0.50, r=0.08), c) + + +class Key(WorldObj): + def __init__(self, color='blue'): + super(Key, self).__init__('key', color) + + def can_pickup(self): + return True + + def render(self, img): + c = COLORS[self.color] + + # Vertical quad + fill_coords(img, point_in_rect(0.50, 0.63, 0.31, 0.88), c) + + # Teeth + fill_coords(img, point_in_rect(0.38, 0.50, 0.59, 0.66), c) + fill_coords(img, point_in_rect(0.38, 0.50, 0.81, 0.88), c) + + # Ring + fill_coords(img, point_in_circle(cx=0.56, cy=0.28, r=0.190), c) + fill_coords(img, point_in_circle(cx=0.56, cy=0.28, r=0.064), (0, 0, 0)) + + +class Ball(WorldObj): + def __init__(self, color='blue'): + super(Ball, self).__init__('ball', color) + + def can_pickup(self): + return True + + def render(self, img): + fill_coords(img, point_in_circle(0.5, 0.5, 0.31), COLORS[self.color]) + + +class Box(WorldObj): + def __init__(self, color, contains=None): + super(Box, self).__init__('box', color) + self.contains = contains + + def can_pickup(self): + return True + + def render(self, img): + c = COLORS[self.color] + + # Outline + fill_coords(img, point_in_rect(0.12, 0.88, 0.12, 0.88), c) + fill_coords(img, point_in_rect(0.18, 0.82, 0.18, 0.82), (0, 0, 0)) + + # Horizontal slit + fill_coords(img, point_in_rect(0.16, 0.84, 0.47, 0.53), c) + + def toggle(self, env, pos): + # Replace the box by its contents + env.grid.set(*pos, self.contains) + return True + + +class Grid: + """ + Represent a grid and operations on it + """ + + # Static cache of pre-renderer tiles + tile_cache = {} + + def __init__(self, width, height): + assert width >= 3 + assert height >= 3 + + self.width = width + self.height = height + + self.grid = [None] * width * height + + def __contains__(self, key): + if isinstance(key, WorldObj): + for e in self.grid: + if e is key: + return True + elif isinstance(key, tuple): + for e in self.grid: + if e is None: + continue + if (e.color, e.type) == key: + return True + if key[0] is None and key[1] == e.type: + return True + return False + + def __eq__(self, other): + grid1 = self.encode() + grid2 = other.encode() + return np.array_equal(grid2, grid1) + + def __ne__(self, other): + return not self == other + + def copy(self): + from copy import deepcopy + return deepcopy(self) + + def set(self, i, j, v): + assert 0 <= i < self.width + assert 0 <= j < self.height + self.grid[j * self.width + i] = v + + def get(self, i, j): + assert 0 <= i < self.width + assert 0 <= j < self.height + return self.grid[j * self.width + i] + + def horz_wall(self, x, y, length=None, obj_type=Wall): + if length is None: + length = self.width - x + for i in range(0, length): + self.set(x + i, y, obj_type()) + + def vert_wall(self, x, y, length=None, obj_type=Wall): + if length is None: + length = self.height - y + for j in range(0, length): + self.set(x, y + j, obj_type()) + + def wall_rect(self, x, y, w, h): + self.horz_wall(x, y, w) + self.horz_wall(x, y + h - 1, w) + self.vert_wall(x, y, h) + self.vert_wall(x + w - 1, y, h) + + def rotate_left(self): + """ + Rotate the grid to the left (counter-clockwise) + """ + + grid = Grid(self.height, self.width) + + for i in range(self.width): + for j in range(self.height): + v = self.get(i, j) + grid.set(j, grid.height - 1 - i, v) + + return grid + + def slice(self, topX, topY, width, height): + """ + Get a subset of the grid + """ + + grid = Grid(width, height) + + for j in range(0, height): + for i in range(0, width): + x = topX + i + y = topY + j + + if 0 <= x < self.width and \ + 0 <= y < self.height: + v = self.get(x, y) + else: + v = Wall() + + grid.set(i, j, v) + + return grid + + @classmethod + def render_tile( + cls, + obj, + agent_dir=None, + highlight=False, + tile_size=TILE_PIXELS, + subdivs=3 + ): + """ + Render a tile and cache the result + """ + + # Hash map lookup key for the cache + key = (agent_dir, highlight, tile_size) + key = obj.encode() + key if obj else key + + if key in cls.tile_cache: + return cls.tile_cache[key] + + img = np.zeros(shape=(tile_size * subdivs, tile_size * subdivs, 3), dtype=np.uint8) + + # Draw the grid lines (top and left edges) + fill_coords(img, point_in_rect(0, 0.031, 0, 1), (100, 100, 100)) + fill_coords(img, point_in_rect(0, 1, 0, 0.031), (100, 100, 100)) + + if obj is not None: + obj.render(img) + + # Overlay the agent on top + if agent_dir is not None: + tri_fn = point_in_triangle( + (0.12, 0.19), + (0.87, 0.50), + (0.12, 0.81), + ) + + # Rotate the agent based on its direction + tri_fn = rotate_fn(tri_fn, cx=0.5, cy=0.5, theta=0.5 * math.pi * agent_dir) + fill_coords(img, tri_fn, (255, 0, 0)) + + # Highlight the cell if needed + if highlight: + highlight_img(img) + + # Downsample the image to perform supersampling/anti-aliasing + img = downsample(img, subdivs) + + # Cache the rendered tile + cls.tile_cache[key] = img + + return img + + def render( + self, + tile_size, + agent_pos=None, + agent_dir=None, + highlight_mask=None + ): + """ + Render this grid at a given scale + :param tile_size: tile size in pixels + :param agent_pos: position of the agent + :param agent_dir: direction of the agent + :param highlight_mask: tiles to highlight + """ + + if highlight_mask is None: + highlight_mask = np.zeros(shape=(self.width, self.height), dtype=bool) + + # Compute the total grid size + width_px = self.width * tile_size + height_px = self.height * tile_size + + img = np.zeros(shape=(height_px, width_px, 3), dtype=np.uint8) + + # Render the grid + for j in range(0, self.height): + for i in range(0, self.width): + cell = self.get(i, j) + + agent_here = np.array_equal(agent_pos, (i, j)) + tile_img = Grid.render_tile( + cell, + agent_dir=agent_dir if agent_here else None, + highlight=highlight_mask[i, j], + tile_size=tile_size + ) + + ymin = j * tile_size + ymax = (j + 1) * tile_size + xmin = i * tile_size + xmax = (i + 1) * tile_size + img[ymin:ymax, xmin:xmax, :] = tile_img + + return img + + def encode(self, vis_mask=None): + """ + Produce a compact numpy encoding of the grid + """ + + if vis_mask is None: + vis_mask = np.ones((self.width, self.height), dtype=bool) + + array = np.zeros((self.width, self.height, 3), dtype='uint8') + + for i in range(self.width): + for j in range(self.height): + if vis_mask[i, j]: + v = self.get(i, j) + + if v is None: + array[i, j, 0] = OBJECT_TO_IDX['empty'] + array[i, j, 1] = 0 + array[i, j, 2] = 0 + + else: + array[i, j, :] = v.encode() + + return array + + @staticmethod + def decode(array): + """ + Decode an array grid encoding back into a grid + """ + + width, height, channels = array.shape + assert channels == 3 + + vis_mask = np.ones(shape=(width, height), dtype=bool) + + grid = Grid(width, height) + for i in range(width): + for j in range(height): + type_idx, color_idx, state = array[i, j] + v = WorldObj.decode(type_idx, color_idx, state) + grid.set(i, j, v) + vis_mask[i, j] = (type_idx != OBJECT_TO_IDX['unseen']) + + return grid, vis_mask + + @staticmethod + def process_vis(grid, agent_pos): + mask = np.zeros(shape=(grid.width, grid.height), dtype=bool) + + mask[agent_pos[0], agent_pos[1]] = True + + for j in reversed(range(0, grid.height)): + for i in range(0, grid.width - 1): + if not mask[i, j]: + continue + + cell = grid.get(i, j) + if cell and not cell.see_behind(): + continue + + mask[i + 1, j] = True + if j > 0: + mask[i + 1, j - 1] = True + mask[i, j - 1] = True + + for i in reversed(range(1, grid.width)): + if not mask[i, j]: + continue + + cell = grid.get(i, j) + if cell and not cell.see_behind(): + continue + + mask[i - 1, j] = True + if j > 0: + mask[i - 1, j - 1] = True + mask[i, j - 1] = True + + for j in range(0, grid.height): + for i in range(0, grid.width): + if not mask[i, j]: + grid.set(i, j, None) + + return mask + + +class MiniGridEnv(gym.Env): + """ + 2D grid world game environment + """ + + metadata = { + 'render.modes': ['human', 'rgb_array'], + 'video.frames_per_second': 10 + } + + # Enumeration of possible actions + class Actions(IntEnum): + # Turn left, turn right, move forward + left = 0 + right = 1 + forward = 2 + + # Pick up an object + pickup = 3 + + # Drop an object + drop = 4 + + # Toggle/activate an object + toggle = 5 + + # Done completing task + done = 6 + + def __init__( + self, + grid_size=None, + width=None, + height=None, + max_steps=100, + see_through_walls=False, + seed=1337, + agent_view_size=7 + ): + # Can't set both grid_size and width/height + if grid_size: + assert width is None and height is None + width = grid_size + height = grid_size + + # Action enumeration for this environment + self.actions = MiniGridEnv.Actions + + # Actions are discrete integer values + self.action_space = spaces.Discrete(len(self.actions)) + + # Number of cells (width and height) in the agent view + assert agent_view_size % 2 == 1 + assert agent_view_size >= 3 + self.agent_view_size = agent_view_size + + # Observations are dictionaries containing an + # encoding of the grid and a textual 'mission' string + self.observation_space = spaces.Box( + low=0, + high=255, + shape=(self.agent_view_size, self.agent_view_size, 3), + dtype='uint8' + ) + self.observation_space = spaces.Dict({ + 'image': self.observation_space + }) + + # Range of possible rewards + self.reward_range = (0, 1) + + # Window to use for human rendering mode + self.window = None + + # Environment configuration + self.width = width + self.height = height + self.max_steps = max_steps + self.see_through_walls = see_through_walls + + # Current position and direction of the agent + self.agent_pos = None + self.agent_dir = None + self.carrying = None + + # Step count since episode start + self.step_count = None + + # Seed the random number generator + self.np_random = None + + # Initialize the RNG + self.seed(seed=seed) + + # Initialize the state + self.reset() + + def reset(self): + # Current position and direction of the agent + self.agent_pos = None + self.agent_dir = None + + # Generate a new random grid at the start of each episode + # To keep the same grid for each episode, call env.seed() with + # the same seed before calling env.reset() + self._gen_grid(self.width, self.height) + + # These fields should be defined by _gen_grid + assert self.agent_pos is not None + assert self.agent_dir is not None + + # Check that the agent doesn't overlap with an object + start_cell = self.grid.get(*self.agent_pos) + assert start_cell is None or start_cell.can_overlap() + + # Item picked up, being carried, initially nothing + self.carrying = None + + # Step count since episode start + self.step_count = 0 + + # Return first observation + obs = self.gen_obs() + return obs + + def seed(self, seed=1337): + # Seed the random number generator + self.np_random, _ = seeding.np_random(seed) + return [seed] + + def hash(self, size=16): + """Compute a hash that uniquely identifies the current state of the environment. + :param size: Size of the hashing + """ + sample_hash = hashlib.sha256() + + to_encode = [self.grid.encode().tolist(), self.agent_pos, self.agent_dir] + for item in to_encode: + sample_hash.update(str(item).encode('utf8')) + + return sample_hash.hexdigest()[:size] + + @property + def steps_remaining(self): + return self.max_steps - self.step_count + + def __str__(self): + """ + Produce a pretty string of the environment's grid along with the agent. + A grid cell is represented by 2-character string, the first one for + the object and the second one for the color. + """ + + # Map of object types to short string + OBJECT_TO_STR = { + 'wall': 'W', + 'floor': 'F', + 'door': 'D', + 'key': 'K', + 'ball': 'A', + 'box': 'B', + 'goal': 'G', + 'lava': 'V', + } + + # Map agent's direction to short string + AGENT_DIR_TO_STR = { + 0: '>', + 1: 'V', + 2: '<', + 3: '^' + } + + str = '' + + for j in range(self.grid.height): + + for i in range(self.grid.width): + if i == self.agent_pos[0] and j == self.agent_pos[1]: + str += 2 * AGENT_DIR_TO_STR[self.agent_dir] + continue + + c = self.grid.get(i, j) + + if c is None: + str += ' ' + continue + + if c.type == 'door': + if c.is_open: + str += '__' + elif c.is_locked: + str += 'L' + c.color[0].upper() + else: + str += 'D' + c.color[0].upper() + continue + + str += OBJECT_TO_STR[c.type] + c.color[0].upper() + + if j < self.grid.height - 1: + str += '\n' + + return str + + def _gen_grid(self, width, height): + assert False, "_gen_grid needs to be implemented by each environment" + + def _reward(self): + """ + Compute the reward to be given upon success + """ + + return 1 - 0.9 * (self.step_count / self.max_steps) + + def _rand_int(self, low, high): + """ + Generate random integer in [low,high[ + """ + + return self.np_random.randint(low, high) + + def _rand_float(self, low, high): + """ + Generate random float in [low,high[ + """ + + return self.np_random.uniform(low, high) + + def _rand_bool(self): + """ + Generate random boolean value + """ + + return self.np_random.randint(0, 2) == 0 + + def _rand_elem(self, iterable): + """ + Pick a random element in a list + """ + + lst = list(iterable) + idx = self._rand_int(0, len(lst)) + return lst[idx] + + def _rand_subset(self, iterable, num_elems): + """ + Sample a random subset of distinct elements of a list + """ + + lst = list(iterable) + assert num_elems <= len(lst) + + out = [] + + while len(out) < num_elems: + elem = self._rand_elem(lst) + lst.remove(elem) + out.append(elem) + + return out + + def _rand_color(self): + """ + Generate a random color name (string) + """ + + return self._rand_elem(COLOR_NAMES) + + def _rand_pos(self, xLow, xHigh, yLow, yHigh): + """ + Generate a random (x,y) position tuple + """ + + return ( + self.np_random.randint(xLow, xHigh), + self.np_random.randint(yLow, yHigh) + ) + + def place_obj(self, + obj, + top=None, + size=None, + reject_fn=None, + max_tries=math.inf + ): + """ + Place an object at an empty position in the grid + + :param obj: Object to place + :param top: top-left position of the rectangle where to place + :param size: size of the rectangle where to place + :param reject_fn: function to filter out potential positions + :param max_tries: maximum number of tries (no effect if top is not None) + """ + + if top is None: + top = (0, 0) + else: + top = (max(top[0], 0), max(top[1], 0)) + + if size is None: + size = (self.grid.width, self.grid.height) + + num_tries = 0 + + while True: + # This is to handle with rare cases where rejection sampling + # gets stuck in an infinite loop + if num_tries > max_tries: + raise RecursionError('rejection sampling failed in place_obj') + + num_tries += 1 + + pos = np.array(( + self._rand_int(top[0], min(top[0] + size[0], self.grid.width)), + self._rand_int(top[1], min(top[1] + size[1], self.grid.height)) + )) + + # Don't place the object on top of another object + if self.grid.get(*pos) is not None: + continue + + # Don't place the object where the agent is + if np.array_equal(pos, self.agent_pos): + continue + + # Check if there is a filtering criterion + if reject_fn and reject_fn(self, pos): + continue + + break + + self.grid.set(*pos, obj) + + if obj is not None: + obj.init_pos = pos + obj.cur_pos = pos + + return pos + + def put_obj(self, obj, i, j): + """ + Put an object at a specific position in the grid + """ + + self.grid.set(i, j, obj) + obj.init_pos = (i, j) + obj.cur_pos = (i, j) + + def place_agent( + self, + top=None, + size=None, + rand_dir=True, + max_tries=math.inf + ): + """ + Set the agent's starting point at an empty position in the grid + """ + + self.agent_pos = None + pos = self.place_obj(None, top, size, max_tries=max_tries) + self.agent_pos = pos + + if rand_dir: + self.agent_dir = self._rand_int(0, 4) + + return pos + + @property + def dir_vec(self): + """ + Get the direction vector for the agent, pointing in the direction + of forward movement. + """ + + assert 0 <= self.agent_dir < 4 + return DIR_TO_VEC[self.agent_dir] + + @property + def right_vec(self): + """ + Get the vector pointing to the right of the agent. + """ + + dx, dy = self.dir_vec + return np.array((-dy, dx)) + + @property + def front_pos(self): + """ + Get the position of the cell that is right in front of the agent + """ + + return self.agent_pos + self.dir_vec + + def get_view_coords(self, i, j): + """ + Translate and rotate absolute grid coordinates (i, j) into the + agent's partially observable view (sub-grid). Note that the resulting + coordinates may be negative or outside of the agent's view size. + """ + + ax, ay = self.agent_pos + dx, dy = self.dir_vec + rx, ry = self.right_vec + + # Compute the absolute coordinates of the top-left view corner + sz = self.agent_view_size + hs = self.agent_view_size // 2 + tx = ax + (dx * (sz - 1)) - (rx * hs) + ty = ay + (dy * (sz - 1)) - (ry * hs) + + lx = i - tx + ly = j - ty + + # Project the coordinates of the object relative to the top-left + # corner onto the agent's own coordinate system + vx = (rx * lx + ry * ly) + vy = -(dx * lx + dy * ly) + + return vx, vy + + def get_view_exts(self): + """ + Get the extents of the square set of tiles visible to the agent + Note: the bottom extent indices are not included in the set + """ + + # Facing right + if self.agent_dir == 0: + topX = self.agent_pos[0] + topY = self.agent_pos[1] - self.agent_view_size // 2 + # Facing down + elif self.agent_dir == 1: + topX = self.agent_pos[0] - self.agent_view_size // 2 + topY = self.agent_pos[1] + # Facing left + elif self.agent_dir == 2: + topX = self.agent_pos[0] - self.agent_view_size + 1 + topY = self.agent_pos[1] - self.agent_view_size // 2 + # Facing up + elif self.agent_dir == 3: + topX = self.agent_pos[0] - self.agent_view_size // 2 + topY = self.agent_pos[1] - self.agent_view_size + 1 + else: + assert False, "invalid agent direction" + + botX = topX + self.agent_view_size + botY = topY + self.agent_view_size + + return topX, topY, botX, botY + + def relative_coords(self, x, y): + """ + Check if a grid position belongs to the agent's field of view, and returns the corresponding coordinates + """ + + vx, vy = self.get_view_coords(x, y) + + if vx < 0 or vy < 0 or vx >= self.agent_view_size or vy >= self.agent_view_size: + return None + + return vx, vy + + def in_view(self, x, y): + """ + check if a grid position is visible to the agent + """ + + return self.relative_coords(x, y) is not None + + def agent_sees(self, x, y): + """ + Check if a non-empty grid position is visible to the agent + """ + + coordinates = self.relative_coords(x, y) + if coordinates is None: + return False + vx, vy = coordinates + + obs = self.gen_obs() + obs_grid, _ = Grid.decode(obs['image']) + obs_cell = obs_grid.get(vx, vy) + world_cell = self.grid.get(x, y) + + return obs_cell is not None and obs_cell.type == world_cell.type + + def step(self, action): + self.step_count += 1 + + reward = 0 + done = False + + # Get the position in front of the agent + fwd_pos = self.front_pos + + # Get the contents of the cell in front of the agent + fwd_cell = self.grid.get(*fwd_pos) + + # Rotate left + if action == self.actions.left: + self.agent_dir -= 1 + if self.agent_dir < 0: + self.agent_dir += 4 + + # Rotate right + elif action == self.actions.right: + self.agent_dir = (self.agent_dir + 1) % 4 + + # Move forward + elif action == self.actions.forward: + if fwd_cell is None or fwd_cell.can_overlap(): + self.agent_pos = fwd_pos + if fwd_cell is not None and fwd_cell.type == 'goal': + done = True + reward = self._reward() + if fwd_cell is not None and fwd_cell.type == 'lava': + done = True + + # Pick up an object + elif action == self.actions.pickup: + if fwd_cell and fwd_cell.can_pickup(): + if self.carrying is None: + self.carrying = fwd_cell + self.carrying.cur_pos = np.array([-1, -1]) + self.grid.set(*fwd_pos, None) + + # Drop an object + elif action == self.actions.drop: + if not fwd_cell and self.carrying: + self.grid.set(*fwd_pos, self.carrying) + self.carrying.cur_pos = fwd_pos + self.carrying = None + + # Toggle/activate an object + elif action == self.actions.toggle: + if fwd_cell: + fwd_cell.toggle(self, fwd_pos) + + # Done action (not used by default) + elif action == self.actions.done: + pass + + else: + assert False, "unknown action" + + if self.step_count >= self.max_steps: + done = True + + obs = self.gen_obs() + + return obs, reward, done, {} + + def gen_obs_grid(self): + """ + Generate the sub-grid observed by the agent. + This method also outputs a visibility mask telling us which grid + cells the agent can actually see. + """ + + topX, topY, botX, botY = self.get_view_exts() + + grid = self.grid.slice(topX, topY, self.agent_view_size, self.agent_view_size) + + for i in range(self.agent_dir + 1): + grid = grid.rotate_left() + + # Process occluders and visibility + # Note that this incurs some performance cost + if not self.see_through_walls: + vis_mask = grid.process_vis(agent_pos=(self.agent_view_size // 2, self.agent_view_size - 1)) + else: + vis_mask = np.ones(shape=(grid.width, grid.height), dtype=bool) + + # Make it so the agent sees what it's carrying + # We do this by placing the carried object at the agent's position + # in the agent's partially observable view + agent_pos = grid.width // 2, grid.height - 1 + if self.carrying: + grid.set(*agent_pos, self.carrying) + else: + grid.set(*agent_pos, None) + + return grid, vis_mask + + def gen_obs(self): + """ + Generate the agent's view (partially observable, low-resolution encoding) + """ + + grid, vis_mask = self.gen_obs_grid() + + # Encode the partially observable view into a numpy array + image = grid.encode(vis_mask) + + assert hasattr(self, 'mission'), "environments must define a textual mission string" + + # Observations are dictionaries containing: + # - an image (partially observable view of the environment) + # - the agent's direction/orientation (acting as a compass) + # - a textual mission string (instructions for the agent) + obs = { + 'image': image, + 'direction': self.agent_dir, + 'mission': self.mission + } + + return obs + + def get_obs_render(self, obs, tile_size=TILE_PIXELS // 2): + """ + Render an agent observation for visualization + """ + + grid, vis_mask = Grid.decode(obs) + + # Render the whole grid + img = grid.render( + tile_size, + agent_pos=(self.agent_view_size // 2, self.agent_view_size - 1), + agent_dir=3, + highlight_mask=vis_mask + ) + + return img + + def render(self, mode='human', close=False, highlight=True, tile_size=TILE_PIXELS): + """ + Render the whole-grid human view + """ + + if close: + if self.window: + self.window.close() + return + + if mode == 'human' and not self.window: + import minigrid.window + self.window = minigrid.window.Window('minigrid') + self.window.show(block=False) + + # Compute which cells are visible to the agent + _, vis_mask = self.gen_obs_grid() + + # Compute the world coordinates of the bottom-left corner + # of the agent's view area + f_vec = self.dir_vec + r_vec = self.right_vec + top_left = self.agent_pos + f_vec * (self.agent_view_size - 1) - r_vec * (self.agent_view_size // 2) + + # Mask of which cells to highlight + highlight_mask = np.zeros(shape=(self.width, self.height), dtype=bool) + + # For each cell in the visibility mask + for vis_j in range(0, self.agent_view_size): + for vis_i in range(0, self.agent_view_size): + # If this cell is not visible, don't highlight it + if not vis_mask[vis_i, vis_j]: + continue + + # Compute the world coordinates of this cell + abs_i, abs_j = top_left - (f_vec * vis_j) + (r_vec * vis_i) + + if abs_i < 0 or abs_i >= self.width: + continue + if abs_j < 0 or abs_j >= self.height: + continue + + # Mark this cell to be highlighted + highlight_mask[abs_i, abs_j] = True + + # Render the whole grid + img = self.grid.render( + tile_size, + self.agent_pos, + self.agent_dir, + highlight_mask=highlight_mask if highlight else None + ) + + if mode == 'human': + self.window.set_caption(self.mission) + self.window.show_img(img) + + return img + + def close(self): + if self.window: + self.window.close() + return diff --git a/environments/minigrid/src/minigrid/rendering.py b/environments/minigrid/src/minigrid/rendering.py new file mode 100644 index 00000000..6198c92a --- /dev/null +++ b/environments/minigrid/src/minigrid/rendering.py @@ -0,0 +1,126 @@ +import math +import numpy as np + + +def downsample(img, factor): + """ + Downsample an image along both dimensions by some factor + """ + + assert img.shape[0] % factor == 0 + assert img.shape[1] % factor == 0 + + img = img.reshape([img.shape[0]//factor, factor, img.shape[1]//factor, factor, 3]) + img = img.mean(axis=3) + img = img.mean(axis=1) + + return img + + +def fill_coords(img, fn, color): + """ + Fill pixels of an image with coordinates matching a filter function + """ + + for y in range(img.shape[0]): + for x in range(img.shape[1]): + yf = (y + 0.5) / img.shape[0] + xf = (x + 0.5) / img.shape[1] + if fn(xf, yf): + img[y, x] = color + + return img + + +def rotate_fn(fin, cx, cy, theta): + def fout(x, y): + x = x - cx + y = y - cy + + x2 = cx + x * math.cos(-theta) - y * math.sin(-theta) + y2 = cy + y * math.cos(-theta) + x * math.sin(-theta) + + return fin(x2, y2) + + return fout + + +def point_in_line(x0, y0, x1, y1, r): + p0 = np.array([x0, y0]) + p1 = np.array([x1, y1]) + dir = p1 - p0 + dist = np.linalg.norm(dir) + dir = dir / dist + + xmin = min(x0, x1) - r + xmax = max(x0, x1) + r + ymin = min(y0, y1) - r + ymax = max(y0, y1) + r + + def fn(x, y): + # Fast, early escape test + if x < xmin or x > xmax or y < ymin or y > ymax: + return False + + q = np.array([x, y]) + pq = q - p0 + + # Closest point on line + a = np.dot(pq, dir) + a = np.clip(a, 0, dist) + p = p0 + a * dir + + dist_to_line = np.linalg.norm(q - p) + return dist_to_line <= r + + return fn + + +def point_in_circle(cx, cy, r): + def fn(x, y): + return (x-cx)*(x-cx) + (y-cy)*(y-cy) <= r * r + return fn + + +def point_in_rect(xmin, xmax, ymin, ymax): + def fn(x, y): + return xmin <= x <= xmax and ymin <= y <= ymax + return fn + + +def point_in_triangle(a, b, c): + a = np.array(a) + b = np.array(b) + c = np.array(c) + + def fn(x, y): + v0 = c - a + v1 = b - a + v2 = np.array((x, y)) - a + + # Compute dot products + dot00 = np.dot(v0, v0) + dot01 = np.dot(v0, v1) + dot02 = np.dot(v0, v2) + dot11 = np.dot(v1, v1) + dot12 = np.dot(v1, v2) + + # Compute barycentric coordinates + inv_denom = 1 / (dot00 * dot11 - dot01 * dot01) + u = (dot11 * dot02 - dot01 * dot12) * inv_denom + v = (dot00 * dot12 - dot01 * dot02) * inv_denom + + # Check if point is in triangle + return (u >= 0) and (v >= 0) and (u + v) < 1 + + return fn + + +def highlight_img(img, color=(255, 255, 255), alpha=0.30): + """ + Add highlighting to an image + """ + + blend_img = img + alpha * (np.array(color, dtype=np.uint8) - img) + blend_img = blend_img.clip(0, 255).astype(np.uint8) + img[:, :, :] = blend_img diff --git a/environments/minigrid/src/minigrid/roomgrid.py b/environments/minigrid/src/minigrid/roomgrid.py new file mode 100644 index 00000000..601678d3 --- /dev/null +++ b/environments/minigrid/src/minigrid/roomgrid.py @@ -0,0 +1,403 @@ +from .minigrid import * + + +def reject_next_to(env, pos): + """ + Function to filter out object positions that are right next to + the agent's starting point + """ + + sx, sy = env.agent_pos + x, y = pos + d = abs(sx - x) + abs(sy - y) + return d < 2 + + +class Room: + def __init__( + self, + top, + size + ): + # Top-left corner and size (tuples) + self.top = top + self.size = size + + # List of door objects and door positions + # Order of the doors is right, down, left, up + self.doors = [None] * 4 + self.door_pos = [None] * 4 + + # List of rooms adjacent to this one + # Order of the neighbors is right, down, left, up + self.neighbors = [None] * 4 + + # Indicates if this room is behind a locked door + self.locked = False + + # List of objects contained + self.objs = [] + + def rand_pos(self, env): + topX, topY = self.top + sizeX, sizeY = self.size + return env._randPos( + topX + 1, topX + sizeX - 1, + topY + 1, topY + sizeY - 1 + ) + + def pos_inside(self, x, y): + """ + Check if a position is within the bounds of this room + """ + + topX, topY = self.top + sizeX, sizeY = self.size + + if x < topX or y < topY: + return False + + if x >= topX + sizeX or y >= topY + sizeY: + return False + + return True + + +class RoomGrid(MiniGridEnv): + """ + Environment with multiple rooms and random objects. + This is meant to serve as a base class for other environments. + """ + + def __init__( + self, + room_size=7, + num_rows=3, + num_cols=3, + max_steps=100, + seed=0, + agent_view_size=7 + ): + assert room_size > 0 + assert room_size >= 3 + assert num_rows > 0 + assert num_cols > 0 + self.room_size = room_size + self.num_rows = num_rows + self.num_cols = num_cols + + height = (room_size - 1) * num_rows + 1 + width = (room_size - 1) * num_cols + 1 + + # By default, this environment has no mission + self.mission = '' + + super().__init__( + width=width, + height=height, + max_steps=max_steps, + see_through_walls=False, + seed=seed, + agent_view_size=agent_view_size + ) + + def room_from_pos(self, x, y): + """Get the room a given position maps to""" + + assert x >= 0 + assert y >= 0 + + i = x // (self.room_size-1) + j = y // (self.room_size-1) + + assert i < self.num_cols + assert j < self.num_rows + + return self.room_grid[j][i] + + def get_room(self, i, j): + assert i < self.num_cols + assert j < self.num_rows + return self.room_grid[j][i] + + def _gen_grid(self, width, height): + # Create the grid + self.grid = Grid(width, height) + + self.room_grid = [] + + # For each row of rooms + for j in range(0, self.num_rows): + row = [] + + # For each column of rooms + for i in range(0, self.num_cols): + room = Room( + (i * (self.room_size-1), j * (self.room_size-1)), + (self.room_size, self.room_size) + ) + row.append(room) + + # Generate the walls for this room + self.grid.wall_rect(*room.top, *room.size) + + self.room_grid.append(row) + + # For each row of rooms + for j in range(0, self.num_rows): + # For each column of rooms + for i in range(0, self.num_cols): + room = self.room_grid[j][i] + + x_l, y_l = (room.top[0] + 1, room.top[1] + 1) + x_m, y_m = (room.top[0] + room.size[0] - 1, room.top[1] + room.size[1] - 1) + + # Door positions, order is right, down, left, up + if i < self.num_cols - 1: + room.neighbors[0] = self.room_grid[j][i+1] + room.door_pos[0] = (x_m, self._rand_int(y_l, y_m)) + if j < self.num_rows - 1: + room.neighbors[1] = self.room_grid[j+1][i] + room.door_pos[1] = (self._rand_int(x_l, x_m), y_m) + if i > 0: + room.neighbors[2] = self.room_grid[j][i-1] + room.door_pos[2] = room.neighbors[2].door_pos[0] + if j > 0: + room.neighbors[3] = self.room_grid[j-1][i] + room.door_pos[3] = room.neighbors[3].door_pos[1] + + # The agent starts in the middle, facing right + self.agent_pos = ( + (self.num_cols // 2) * (self.room_size-1) + (self.room_size // 2), + (self.num_rows // 2) * (self.room_size-1) + (self.room_size // 2) + ) + self.agent_dir = 0 + + def place_in_room(self, i, j, obj): + """ + Add an existing object to room (i, j) + """ + + room = self.get_room(i, j) + + pos = self.place_obj( + obj, + room.top, + room.size, + reject_fn=reject_next_to, + max_tries=1000 + ) + + room.objs.append(obj) + + return obj, pos + + def add_object(self, i, j, kind=None, color=None): + """ + Add a new object to room (i, j) + """ + + if kind is None: + kind = self._rand_elem(['key', 'ball', 'box']) + + if color is None: + color = self._rand_color() + + # TODO: we probably want to add an Object.make helper function + assert kind in ['key', 'ball', 'box'] + obj = None + if kind == 'key': + obj = Key(color) + elif kind == 'ball': + obj = Ball(color) + elif kind == 'box': + obj = Box(color) + + return self.place_in_room(i, j, obj) + + def add_door(self, i, j, door_idx=None, color=None, locked=None): + """ + Add a door to a room, connecting it to a neighbor + """ + + room = self.get_room(i, j) + + if door_idx is None: + # Need to make sure that there is a neighbor along this wall + # and that there is not already a door + while True: + door_idx = self._rand_int(0, 4) + if room.neighbors[door_idx] and room.doors[door_idx] is None: + break + + if color is None: + color = self._rand_color() + + if locked is None: + locked = self._rand_bool() + + assert room.doors[door_idx] is None, "door already exists" + + room.locked = locked + door = Door(color, is_locked=locked) + + pos = room.door_pos[door_idx] + self.grid.set(*pos, door) + door.cur_pos = pos + + neighbor = room.neighbors[door_idx] + room.doors[door_idx] = door + neighbor.doors[(door_idx+2) % 4] = door + + return door, pos + + def remove_wall(self, i, j, wall_idx): + """ + Remove a wall between two rooms + """ + + room = self.get_room(i, j) + + assert 0 <= wall_idx < 4 + assert room.doors[wall_idx] is None, "door exists on this wall" + assert room.neighbors[wall_idx], "invalid wall" + + neighbor = room.neighbors[wall_idx] + + tx, ty = room.top + w, h = room.size + + # Ordering of walls is right, down, left, up + if wall_idx == 0: + for i in range(1, h - 1): + self.grid.set(tx + w - 1, ty + i, None) + elif wall_idx == 1: + for i in range(1, w - 1): + self.grid.set(tx + i, ty + h - 1, None) + elif wall_idx == 2: + for i in range(1, h - 1): + self.grid.set(tx, ty + i, None) + elif wall_idx == 3: + for i in range(1, w - 1): + self.grid.set(tx + i, ty, None) + else: + assert False, "invalid wall index" + + # Mark the rooms as connected + room.doors[wall_idx] = True + neighbor.doors[(wall_idx+2) % 4] = True + + def place_agent(self, i=None, j=None, rand_dir=True): + """ + Place the agent in a room + """ + + if i is None: + i = self._rand_int(0, self.num_cols) + if j is None: + j = self._rand_int(0, self.num_rows) + + room = self.room_grid[j][i] + + # Find a position that is not right in front of an object + while True: + super().place_agent(room.top, room.size, rand_dir, max_tries=1000) + front_cell = self.grid.get(*self.front_pos) + if front_cell is None or front_cell.type == 'wall': + break + + return self.agent_pos + + def connect_all(self, door_colors=COLOR_NAMES, max_itrs=5000): + """ + Make sure that all rooms are reachable by the agent from its + starting position + """ + + start_room = self.room_from_pos(*self.agent_pos) + + added_doors = [] + + def find_reach(): + reach = set() + stack = [start_room] + while len(stack) > 0: + room = stack.pop() + if room in reach: + continue + reach.add(room) + for i in range(0, 4): + if room.doors[i]: + stack.append(room.neighbors[i]) + return reach + + num_itrs = 0 + + while True: + # This is to handle rare situations where random sampling produces + # a level that cannot be connected, producing in an infinite loop + if num_itrs > max_itrs: + raise RecursionError('connect_all failed') + num_itrs += 1 + + # If all rooms are reachable, stop + reach = find_reach() + if len(reach) == self.num_rows * self.num_cols: + break + + # Pick a random room and door position + i = self._rand_int(0, self.num_cols) + j = self._rand_int(0, self.num_rows) + k = self._rand_int(0, 4) + room = self.get_room(i, j) + + # If there is already a door there, skip + if not room.door_pos[k] or room.doors[k]: + continue + + if room.locked or room.neighbors[k].locked: + continue + + color = self._rand_elem(door_colors) + door, _ = self.add_door(i, j, k, color, False) + added_doors.append(door) + + return added_doors + + def add_distractors(self, i=None, j=None, num_distractors=10, all_unique=True): + """ + Add random objects that can potentially distract/confuse the agent. + """ + + # Collect a list of existing objects + objs = [] + for row in self.room_grid: + for room in row: + for obj in room.objs: + objs.append((obj.type, obj.color)) + + # List of distractors added + dists = [] + + while len(dists) < num_distractors: + color = self._rand_elem(COLOR_NAMES) + type = self._rand_elem(['key', 'ball', 'box']) + obj = (type, color) + + if all_unique and obj in objs: + continue + + # Add the object to a random room if no room specified + room_i = i + room_j = j + if room_i is None: + room_i = self._rand_int(0, self.num_cols) + if room_j is None: + room_j = self._rand_int(0, self.num_rows) + + dist, pos = self.add_object(room_i, room_j, *obj) + + objs.append(obj) + dists.append(dist) + + return dists diff --git a/environments/minigrid/src/minigrid/simenv_minigrid.py b/environments/minigrid/src/minigrid/simenv_minigrid.py new file mode 100644 index 00000000..e4c14bdd --- /dev/null +++ b/environments/minigrid/src/minigrid/simenv_minigrid.py @@ -0,0 +1,53 @@ +import simenv as sm + + +class Goal: + def __init__(self): + pass + + +class Floor: + def __init__(self): + pass + + +class Lava: + def __init__(self): + pass + + +class Wall: + def __init__(self): + pass + + +class Door: + def __init__(self): + pass + + +class Key: + def __init__(self): + pass + + +class Box: + def __init__(self): + pass + + +class MiniGridEnv: + + def __init__(self, scene: sm.Scene, width, height): + self.scene = scene + self.width = width + self.height = height + self.tile_size = 32 + + scene += sm.Camera(camera_type="orthographic", width=800, height=800) + scene += sm.Light() + + for i in range(width): + for k in range(height): + self.scene += sm.Box(name=f"floor{i * height + k}", position=[i, k, 0], material=sm.Material.BLACK) + diff --git a/integrations/Blender/simenv_blender/__init__.py b/integrations/Blender/simenv_blender/__init__.py index 2a5431f9..329cc5ef 100644 --- a/integrations/Blender/simenv_blender/__init__.py +++ b/integrations/Blender/simenv_blender/__init__.py @@ -11,27 +11,29 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -bl_info = { - "name" : "simenv", - "author" : "Hugging Face", - "description" : "", - "blender" : (3, 2, 0), - "version" : (0, 0, 1), - "location" : "View3D", - "warning" : "", - "category" : "Simulation" -} - import bpy from .simenv_op import SIMENV_OT_ImportScene from .simenv_pnl import SIMENV_PT_Panel +bl_info = { + "name": "simenv", + "author": "Hugging Face", + "description": "", + "blender": (3, 2, 0), + "version": (0, 0, 1), + "location": "View3D", + "warning": "", + "category": "Simulation" +} + classes = (SIMENV_OT_ImportScene, SIMENV_PT_Panel) + def register(): for c in classes: bpy.utils.register_class(c) + def unregister(): for c in classes: bpy.utils.unregister_class(c) diff --git a/integrations/Blender/simenv_blender/client.py b/integrations/Blender/simenv_blender/client.py index 283b3a92..63ed9684 100644 --- a/integrations/Blender/simenv_blender/client.py +++ b/integrations/Blender/simenv_blender/client.py @@ -1,6 +1,4 @@ import socket -import json -import base64 class Client: diff --git a/integrations/Blender/simenv_blender/simenv_op.py b/integrations/Blender/simenv_blender/simenv_op.py index 746190bb..8ce88c59 100644 --- a/integrations/Blender/simenv_blender/simenv_op.py +++ b/integrations/Blender/simenv_blender/simenv_op.py @@ -1,4 +1,3 @@ -import bpy from bpy.types import Operator from .simulator import Simulator diff --git a/integrations/Blender/simenv_blender/simenv_pnl.py b/integrations/Blender/simenv_blender/simenv_pnl.py index f11df9a4..2361776d 100644 --- a/integrations/Blender/simenv_blender/simenv_pnl.py +++ b/integrations/Blender/simenv_blender/simenv_pnl.py @@ -1,6 +1,7 @@ import bpy from bpy.types import Panel + class SIMENV_PT_Panel(Panel): bl_space_type = "VIEW_3D" bl_region_type = "UI" diff --git a/integrations/Blender/simenv_blender/simulator.py b/integrations/Blender/simenv_blender/simulator.py index 55e06d23..3529872a 100644 --- a/integrations/Blender/simenv_blender/simulator.py +++ b/integrations/Blender/simenv_blender/simulator.py @@ -1,8 +1,6 @@ -from email import message import json import os import base64 -from pickle import BINPUT import bpy from .client import Client from pathlib import Path diff --git a/integrations/Godot/simenv-godot/Scenes/scene.tscn b/integrations/Godot/simenv-godot/Scenes/scene.tscn index 1bd497e3..705f7037 100644 --- a/integrations/Godot/simenv-godot/Scenes/scene.tscn +++ b/integrations/Godot/simenv-godot/Scenes/scene.tscn @@ -1,7 +1,7 @@ -[gd_scene load_steps=7 format=3 uid="uid://bpe4rf720wc7w"] +[gd_scene load_steps=7 format=3 uid="uid://bmr58mk5sqro3"] -[ext_resource type="Script" path="res://SimEnv/Simulator.gd" id="1_oo243"] -[ext_resource type="Script" path="res://SimEnv/Camera.gd" id="2_wa3b4"] +[ext_resource type="Script" path="res://SimEnv/simulator.gd" id="1_u0wd1"] +[ext_resource type="Script" path="res://SimEnv/camera.gd" id="2_3niri"] [sub_resource type="PhysicalSkyMaterial" id="PhysicalSkyMaterial_tieyq"] ground_color = Color(0.247059, 0.172549, 0.0823529, 1) @@ -21,7 +21,7 @@ glow_enabled = true [sub_resource type="CameraEffects" id="CameraEffects_noyl2"] [node name="Root" type="Node"] -script = ExtResource("1_oo243") +script = ExtResource("1_u0wd1") [node name="BaseWorld" type="Node3D" parent="."] @@ -30,7 +30,7 @@ transform = Transform3D(-0.597625, 0.452977, -0.661556, -1.49012e-08, 0.825113, environment = SubResource("Environment_ss83i") effects = SubResource("CameraEffects_noyl2") current = true -script = ExtResource("2_wa3b4") +script = ExtResource("2_3niri") [node name="DirectionalLight3D" type="DirectionalLight3D" parent="BaseWorld"] transform = Transform3D(-0.812844, -0.527265, 0.247543, -0.0232084, 0.453958, 0.890721, -0.58202, 0.718271, -0.381234, 0, 0, 0) diff --git a/integrations/Godot/simenv-godot/SimEnv/Bridge/Client.gd b/integrations/Godot/simenv-godot/SimEnv/Bridge/Client.gd index 973f25fd..ed1e8013 100644 --- a/integrations/Godot/simenv-godot/SimEnv/Bridge/Client.gd +++ b/integrations/Godot/simenv-godot/SimEnv/Bridge/Client.gd @@ -1,5 +1,15 @@ -extends Node class_name Client +extends Node +# This class sets up the TCP client to receive data +# from the Python SimEnv API through the TCP server +# +# Reading the stream is synchronized on the _physics_process +# Physics will only step if the command received tells it to do so +# (see SimEnv/Commands/step.gd) +# +# Data is received by chunks of _chunk_size +# _warmed_up is a hacky bugfix to start the TCP stream before the physics sync + signal connected signal data @@ -11,17 +21,21 @@ var _stream: StreamPeerTCP = StreamPeerTCP.new() var _chunk_size: int = 1024 var _warmed_up: bool = false + func _ready() -> void: _status = _stream.get_status() + func _physics_process(_delta): # this is called at a fixed rate update_status() if _status == _stream.STATUS_CONNECTED: + # to sync commands with the physics steps get_tree().paused = true read() + func update_status(): _stream.poll() var new_status: int = _stream.get_status() @@ -40,6 +54,7 @@ func update_status(): print("Error with socket stream.") emit_signal("error") + func read(): update_status() var available_bytes: int = _stream.get_available_bytes() @@ -66,6 +81,7 @@ func read(): else: get_tree().paused = false + func connect_to_host(host: String, port: int) -> void: print("Connecting to %s:%d" % [host, port]) if _status == _stream.STATUS_CONNECTED: @@ -76,6 +92,7 @@ func connect_to_host(host: String, port: int) -> void: _stream.disconnect_from_host() emit_signal("error") + func send(out_data: PackedByteArray) -> bool: if _status != _stream.STATUS_CONNECTED: print("Error: Stream is not currently connected.") diff --git a/integrations/Godot/simenv-godot/SimEnv/Bridge/Command.gd b/integrations/Godot/simenv-godot/SimEnv/Bridge/Command.gd index 87bede67..bc2b4317 100644 --- a/integrations/Godot/simenv-godot/SimEnv/Bridge/Command.gd +++ b/integrations/Godot/simenv-godot/SimEnv/Bridge/Command.gd @@ -1,11 +1,13 @@ -extends Node class_name Command +extends Node +# signal callback var content : Variant var _commands : Dictionary + func load_commands(): var directory: Directory = Directory.new() var com_path : String = "res://SimEnv/Commands" @@ -24,11 +26,13 @@ func load_commands(): directory.list_dir_end() + func execute(type: String) -> void: if type in _commands: _commands[type].execute(content) else: print("Unknown command.") - + + func _handle_callback(callback_data: PackedByteArray): emit_signal("callback", callback_data) diff --git a/integrations/Godot/simenv-godot/SimEnv/Commands/BuildScene.gd b/integrations/Godot/simenv-godot/SimEnv/Commands/BuildScene.gd deleted file mode 100644 index e3b10e71..00000000 --- a/integrations/Godot/simenv-godot/SimEnv/Commands/BuildScene.gd +++ /dev/null @@ -1,15 +0,0 @@ -extends Node - -signal callback - -func execute(content) -> void: - var content_bytes : PackedByteArray = Marshalls.base64_to_raw(content["b64bytes"]) - - var gltf_state : GLTFState = GLTFState.new() - var gltf_doc : GLTFDocument = GLTFDocument.new() - - gltf_doc.append_from_buffer(content_bytes, "", gltf_state) - var gltf_scene = gltf_doc.generate_scene(gltf_state) - get_tree().current_scene.add_child(gltf_scene) - - emit_signal("callback", PackedByteArray([97, 99, 107])) diff --git a/integrations/Godot/simenv-godot/SimEnv/Commands/Close.gd b/integrations/Godot/simenv-godot/SimEnv/Commands/Close.gd index 707cbdf8..b0b883d7 100644 --- a/integrations/Godot/simenv-godot/SimEnv/Commands/Close.gd +++ b/integrations/Godot/simenv-godot/SimEnv/Commands/Close.gd @@ -1,7 +1,9 @@ extends Node +# Close the application signal callback + func execute(_content): get_tree().quit() get_tree().paused = false diff --git a/integrations/Godot/simenv-godot/SimEnv/Commands/GetDone.gd b/integrations/Godot/simenv-godot/SimEnv/Commands/GetDone.gd deleted file mode 100644 index 66e85fc9..00000000 --- a/integrations/Godot/simenv-godot/SimEnv/Commands/GetDone.gd +++ /dev/null @@ -1,6 +0,0 @@ -extends Node - -signal callback - -func execute(_content): - emit_signal("callback", PackedByteArray([97, 99, 107])) diff --git a/integrations/Godot/simenv-godot/SimEnv/Commands/GetObservation.gd b/integrations/Godot/simenv-godot/SimEnv/Commands/GetObservation.gd deleted file mode 100644 index 66e85fc9..00000000 --- a/integrations/Godot/simenv-godot/SimEnv/Commands/GetObservation.gd +++ /dev/null @@ -1,6 +0,0 @@ -extends Node - -signal callback - -func execute(_content): - emit_signal("callback", PackedByteArray([97, 99, 107])) diff --git a/integrations/Godot/simenv-godot/SimEnv/Commands/GetReward.gd b/integrations/Godot/simenv-godot/SimEnv/Commands/GetReward.gd deleted file mode 100644 index 66e85fc9..00000000 --- a/integrations/Godot/simenv-godot/SimEnv/Commands/GetReward.gd +++ /dev/null @@ -1,6 +0,0 @@ -extends Node - -signal callback - -func execute(_content): - emit_signal("callback", PackedByteArray([97, 99, 107])) diff --git a/integrations/Godot/simenv-godot/SimEnv/Commands/Reset.gd b/integrations/Godot/simenv-godot/SimEnv/Commands/Reset.gd index a2f5651c..2e1c8a8d 100644 --- a/integrations/Godot/simenv-godot/SimEnv/Commands/Reset.gd +++ b/integrations/Godot/simenv-godot/SimEnv/Commands/Reset.gd @@ -2,6 +2,7 @@ extends Node signal callback + func execute(_content): get_tree().paused = false emit_signal("callback", PackedByteArray([97, 99, 107])) diff --git a/integrations/Godot/simenv-godot/SimEnv/Commands/Step.gd b/integrations/Godot/simenv-godot/SimEnv/Commands/Step.gd index a2f5651c..68c826a8 100644 --- a/integrations/Godot/simenv-godot/SimEnv/Commands/Step.gd +++ b/integrations/Godot/simenv-godot/SimEnv/Commands/Step.gd @@ -1,7 +1,10 @@ extends Node +# Handles the stepping of the simulation +# Unpause the application to run a step of _physics_process signal callback + func execute(_content): get_tree().paused = false emit_signal("callback", PackedByteArray([97, 99, 107])) diff --git a/integrations/Godot/simenv-godot/SimEnv/Mods/ExampleMod.gd b/integrations/Godot/simenv-godot/SimEnv/Mods/ExampleMod.gd deleted file mode 100644 index e3bfaaba..00000000 --- a/integrations/Godot/simenv-godot/SimEnv/Mods/ExampleMod.gd +++ /dev/null @@ -1,5 +0,0 @@ -extends Node - - -func execute(content): - print(content) diff --git a/integrations/Godot/simenv-godot/SimEnv/Simulation/Agent.gd b/integrations/Godot/simenv-godot/SimEnv/Simulation/Agent.gd deleted file mode 100644 index 87b29ae6..00000000 --- a/integrations/Godot/simenv-godot/SimEnv/Simulation/Agent.gd +++ /dev/null @@ -1,66 +0,0 @@ -extends Node -class_name Agent - - -class Actions: - var name: String - var dist: String - var available: Array = [] - var forward: float = 0.0 - var move_right: float = 0.0 - var turn_right: float = 0.0 - - func set_action(step_action: Array) -> void: - pass - - -class DiscreteActions: - extends Actions - - func set_action(step_action: Array) -> void: - var istep_action: int = int(step_action[0]) - forward = 0.0 - move_right = 0.0 - turn_right = 0.0 - - match available[istep_action]: - "move_foward": - forward = 1.0 - "move_backward": - forward = -1.0 - "move_left": - move_right = 1.0 - "move_right": - move_right = -1.0 - "turn_right": - turn_right = 1.0 - "turn_left": - turn_right = -1.0 - _: - print("Invalid action.") - - -class ContinuousActions: - extends Actions - - func set_action(step_action: Array) -> void: - for i in range(len(step_action)): - match available[i]: - "move_forward_backward": - forward = step_action[i] - "move_left_right": - move_right = step_action[i] - "turn_left_right": - turn_right = step_action[i] - _: - print("Invalid action.") - - -# Called when the node enters the scene tree for the first time. -func _ready(): - pass # Replace with function body. - - -# Called every frame. 'delta' is the elapsed time since the previous frame. -func _process(delta): - pass diff --git a/integrations/Godot/simenv-godot/SimEnv/Simulation/RewardFunciton.gd b/integrations/Godot/simenv-godot/SimEnv/Simulation/RewardFunciton.gd deleted file mode 100644 index e08925d4..00000000 --- a/integrations/Godot/simenv-godot/SimEnv/Simulation/RewardFunciton.gd +++ /dev/null @@ -1,11 +0,0 @@ -extends Node - - -# Called when the node enters the scene tree for the first time. -func _ready(): - pass # Replace with function body. - - -# Called every frame. 'delta' is the elapsed time since the previous frame. -func _process(delta): - pass diff --git a/integrations/Godot/simenv-godot/SimEnv/Simulation/SimAgentBase.gd b/integrations/Godot/simenv-godot/SimEnv/Simulation/SimAgentBase.gd deleted file mode 100644 index e08925d4..00000000 --- a/integrations/Godot/simenv-godot/SimEnv/Simulation/SimAgentBase.gd +++ /dev/null @@ -1,11 +0,0 @@ -extends Node - - -# Called when the node enters the scene tree for the first time. -func _ready(): - pass # Replace with function body. - - -# Called every frame. 'delta' is the elapsed time since the previous frame. -func _process(delta): - pass diff --git a/integrations/Godot/simenv-godot/SimEnv/Simulation/SimObjectBase.gd b/integrations/Godot/simenv-godot/SimEnv/Simulation/SimObjectBase.gd deleted file mode 100644 index e08925d4..00000000 --- a/integrations/Godot/simenv-godot/SimEnv/Simulation/SimObjectBase.gd +++ /dev/null @@ -1,11 +0,0 @@ -extends Node - - -# Called when the node enters the scene tree for the first time. -func _ready(): - pass # Replace with function body. - - -# Called every frame. 'delta' is the elapsed time since the previous frame. -func _process(delta): - pass diff --git a/integrations/Godot/simenv-godot/SimEnv/Simulator.gd b/integrations/Godot/simenv-godot/SimEnv/Simulator.gd index 74cc1a7b..ef11c5f6 100644 --- a/integrations/Godot/simenv-godot/SimEnv/Simulator.gd +++ b/integrations/Godot/simenv-godot/SimEnv/Simulator.gd @@ -1,13 +1,20 @@ +class_name Simulator extends Node +# Manage the global simulation process +# Get data from TCP client and call the command dispatch +# +# The core function of this class is to decode the data into json +# and send this json data to the commands const HOST : String = "127.0.0.1" const PORT : int = 55000 const RECONNECT_TIMEOUT: float = 3.0 +var agent + var _client : Client = Client.new() var _command : Command = Command.new() -var agent func _ready() -> void: _client.connect("connected", _handle_client_connected) @@ -22,13 +29,16 @@ func _ready() -> void: _command.load_commands() _client.connect_to_host(HOST, PORT) + func _connect_after_timeout(timeout: float) -> void: await get_tree().create_timer(timeout).timeout _client.connect_to_host(HOST, PORT) + func _handle_client_connected() -> void: print("Client connected to server.") + func _handle_client_data(data: PackedByteArray) -> void: var str_data : String = data.get_string_from_utf8() @@ -44,14 +54,17 @@ func _handle_client_data(data: PackedByteArray) -> void: else: print("Error parsing data.") + func _handle_client_disconnected() -> void: print("Client disconnected from server.") _connect_after_timeout(RECONNECT_TIMEOUT) + func _handle_client_error() -> void: print("Client error.") _connect_after_timeout(RECONNECT_TIMEOUT) + func _handle_callback(callback_data: PackedByteArray) -> void: print("Sending callback.") _client.send(callback_data) diff --git a/integrations/Godot/simenv-godot/project.godot b/integrations/Godot/simenv-godot/project.godot index efc1f3ac..ee265053 100644 --- a/integrations/Godot/simenv-godot/project.godot +++ b/integrations/Godot/simenv-godot/project.godot @@ -12,22 +12,34 @@ _global_script_classes=[{ "base": "Node", "class": &"Agent", "language": &"GDScript", -"path": "res://SimEnv/Simulation/Agent.gd" +"path": "res://SimEnv/RLAgents/agent.gd" +}, { +"base": "Node", +"class": &"AgentManager", +"language": &"GDScript", +"path": "res://SimEnv/RLAgents/agent_manager.gd" }, { "base": "Node", "class": &"Client", "language": &"GDScript", -"path": "res://SimEnv/Bridge/Client.gd" +"path": "res://SimEnv/Bridge/client.gd" }, { "base": "Node", "class": &"Command", "language": &"GDScript", -"path": "res://SimEnv/Bridge/Command.gd" +"path": "res://SimEnv/Bridge/command.gd" +}, { +"base": "Node", +"class": &"Simulator", +"language": &"GDScript", +"path": "res://SimEnv/simulator.gd" }] _global_script_class_icons={ "Agent": "", +"AgentManager": "", "Client": "", -"Command": "" +"Command": "", +"Simulator": "" } [application]