diff --git a/environments/minigrid/.gitignore b/environments/minigrid/.gitignore
new file mode 100644
index 00000000..e69de29b
diff --git a/environments/minigrid/LICENCE b/environments/minigrid/LICENCE
new file mode 100644
index 00000000..a1a92b70
--- /dev/null
+++ b/environments/minigrid/LICENCE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2019 Maxime Chevalier-Boisvert
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/environments/minigrid/Makefile b/environments/minigrid/Makefile
new file mode 100644
index 00000000..79c54efc
--- /dev/null
+++ b/environments/minigrid/Makefile
@@ -0,0 +1,19 @@
+.PHONY: quality style test
+
+# Check that source code meets quality standards
+
+quality:
+	black --check --line-length 119 --target-version py38 .
+	isort --check-only tests src
+	flake8 .
+
+# Format source code automatically
+
+style:
+	black --line-length 119 --target-version py38 .
+	isort .
+
+# Run tests for the library
+
+test:
+	python -m pytest -n auto --dist=loadfile -s -v ./tests/
\ No newline at end of file
diff --git a/environments/minigrid/README.md b/environments/minigrid/README.md
new file mode 100644
index 00000000..a8f882cd
--- /dev/null
+++ b/environments/minigrid/README.md
@@ -0,0 +1,31 @@
+# Minigrid-like environment
+
+Minimalistic gridworld environment inspired by [Minigrid](https://github.com/Farama-Foundation/gym-minigrid)
+
+## Installation
+Create a virtual env, activate it, and then install `simenv`:
+
+```
+cd .. && git clone https://github.com/huggingface/simenv.git
+cd simenv
+pip install -e ".[dev]"
+```
+
+Then install the `minigrid` package:
+
+```
+cd environments/minigrid
+pip install -e ".[dev]"
+```
+
+And it's done!
+
+### Style
+
+Before you merge a PR, fix the style (we use `isort` + `black`)
+```
+make style
+```
+
+## Basic Usage
+
diff --git a/environments/minigrid/assets/textures/agent.png b/environments/minigrid/assets/textures/agent.png
new file mode 100644
index 00000000..e8f34827
Binary files /dev/null and b/environments/minigrid/assets/textures/agent.png differ
diff --git a/environments/minigrid/assets/textures/ball.png b/environments/minigrid/assets/textures/ball.png
new file mode 100644
index 00000000..939df79f
Binary files /dev/null and b/environments/minigrid/assets/textures/ball.png differ
diff --git a/environments/minigrid/assets/textures/box.png b/environments/minigrid/assets/textures/box.png
new file mode 100644
index 00000000..d931b315
Binary files /dev/null and b/environments/minigrid/assets/textures/box.png differ
diff --git a/environments/minigrid/assets/textures/crossing.png b/environments/minigrid/assets/textures/crossing.png
new file mode 100644
index 00000000..f24d663f
Binary files /dev/null and b/environments/minigrid/assets/textures/crossing.png differ
diff --git a/environments/minigrid/assets/textures/door_closed.png b/environments/minigrid/assets/textures/door_closed.png
new file mode 100644
index 00000000..e126369c
Binary files /dev/null and b/environments/minigrid/assets/textures/door_closed.png differ
diff --git a/environments/minigrid/assets/textures/door_locked.png b/environments/minigrid/assets/textures/door_locked.png
new file mode 100644
index 00000000..e4b21701
Binary files /dev/null and b/environments/minigrid/assets/textures/door_locked.png differ
diff --git a/environments/minigrid/assets/textures/door_opened.png b/environments/minigrid/assets/textures/door_opened.png
new file mode 100644
index 00000000..180e0289
Binary files /dev/null and b/environments/minigrid/assets/textures/door_opened.png differ
diff --git a/environments/minigrid/assets/textures/grid_tile.png b/environments/minigrid/assets/textures/grid_tile.png
new file mode 100644
index 00000000..8a1e2d91
Binary files /dev/null and b/environments/minigrid/assets/textures/grid_tile.png differ
diff --git a/environments/minigrid/assets/textures/key.png b/environments/minigrid/assets/textures/key.png
new file mode 100644
index 00000000..d58b3a3b
Binary files /dev/null and b/environments/minigrid/assets/textures/key.png differ
diff --git a/environments/minigrid/assets/textures/lava.png b/environments/minigrid/assets/textures/lava.png
new file mode 100644
index 00000000..478c5038
Binary files /dev/null and b/environments/minigrid/assets/textures/lava.png differ
diff --git a/environments/minigrid/setup.py b/environments/minigrid/setup.py
new file mode 100644
index 00000000..3cd825fa
--- /dev/null
+++ b/environments/minigrid/setup.py
@@ -0,0 +1,94 @@
+# Lint as: python3
+""" HuggingFace/minigrid is a simple gridworld environment for RL.
+
+Note:
+
+   VERSION needs to be formatted following the MAJOR.MINOR.PATCH convention
+   (we need to follow this convention to be able to retrieve versioned scripts)
+
+Simple check list for release from AllenNLP repo: https://github.com/allenai/allennlp/blob/main/setup.py
+
+To create the package for pypi.
+
+0. Prerequisites:
+   - Dependencies:
+     - twine: "pip install twine"
+   - Create an account in (and join the 'simenv' project):
+     - PyPI: https://pypi.org/
+     - Test PyPI: https://test.pypi.org/
+
+1. Change the version in:
+   - __init__.py
+   - setup.py
+
+2. Commit these changes: "git commit -m 'Release: VERSION'"
+
+3. Add a tag in git to mark the release: "git tag VERSION -m 'Add tag VERSION for pypi'"
+   Push the tag to remote: git push --tags origin main
+
+4. Build both the sources and the wheel. Do not change anything in setup.py between
+   creating the wheel and the source distribution (obviously).
+
+   First, delete any "build" directory that may exist from previous builds.
+
+   For the wheel, run: "python setup.py bdist_wheel" in the top level directory.
+   (this will build a wheel for the python version you use to build it).
+
+   For the sources, run: "python setup.py sdist"
+   You should now have a /dist directory with both .whl and .tar.gz source versions.
+
+5. Check that everything looks correct by uploading the package to the pypi test server:
+
+   twine upload dist/* -r pypitest --repository-url=https://test.pypi.org/legacy/
+
+   Check that you can install it in a virtualenv/notebook by running:
+   pip install -i https://testpypi.python.org/pypi simenv
+
+6. Upload the final version to actual pypi:
+   twine upload dist/* -r pypi
+
+7. Fill release notes in the tag in github once everything is looking hunky-dory.
+
+8. Change the version in __init__.py and setup.py to X.X.X+1.dev0 (e.g. VERSION=1.18.3 -> 1.18.4.dev0).
+   Then push the change with a message 'set dev version'
+"""
+
+import os
+import sys
+
+from setuptools import find_packages, setup
+
+
+REQUIRED_PKGS = [
+    "dataclasses_json",  # For GLTF export/imports
+    "numpy>=1.17",  # We use numpy>=1.17 to have np.random.Generator
+    "simenv",
+]
+
+QUALITY_REQUIRE = ["black~=22.0", "flake8>=3.8.3", "isort>=5.0.0", "pyyaml>=5.3.1"]
+
+TESTS_REQUIRE = [
+    # test dependencies
+]
+
+EXTRAS_REQUIRE = {
+    "dev": TESTS_REQUIRE + QUALITY_REQUIRE,
+    "tests": TESTS_REQUIRE,
+    "quality": QUALITY_REQUIRE,
+}
+
+setup(
+    name="minigrid",
+    description="HuggingFace simple gridworld environment for RL.",
+    long_description=open("README.md", encoding="utf-8").read(),
+    long_description_content_type="text/markdown",
+    author="HuggingFace Inc.",
+    author_email="carl@huggingface.co",
+    license="Apache 2.0",
+    version="0.0.1.dev0",
+    package_dir={"": "src"},
+    packages=find_packages("src"),
+    install_requires=REQUIRED_PKGS,
+    extras_require=EXTRAS_REQUIRE,
+    keywords="simulation environments grid world reinforcement machine learning",
+)
diff --git a/environments/minigrid/src/minigrid/__init__.py b/environments/minigrid/src/minigrid/__init__.py
new file mode 100644
index 00000000..22a9f2e0
--- /dev/null
+++ b/environments/minigrid/src/minigrid/__init__.py
@@ -0,0 +1 @@
+import minigrid.envs
\ No newline at end of file
diff --git a/environments/minigrid/src/minigrid/envs/__init__.py b/environments/minigrid/src/minigrid/envs/__init__.py
new file mode 100644
index 00000000..078b90f5
--- /dev/null
+++ b/environments/minigrid/src/minigrid/envs/__init__.py
@@ -0,0 +1,21 @@
+from minigrid.envs.empty import *
+from minigrid.envs.doorkey import *
+from minigrid.envs.multiroom import *
+from minigrid.envs.fetch import *
+from minigrid.envs.gotoobject import *
+from minigrid.envs.gotodoor import *
+from minigrid.envs.putnear import *
+from minigrid.envs.lockedroom import *
+from minigrid.envs.keycorridor import *
+from minigrid.envs.unlock import *
+from minigrid.envs.unlockpickup import *
+from minigrid.envs.blockedunlockpickup import *
+from minigrid.envs.playground_v0 import *
+from minigrid.envs.redbluedoors import *
+from minigrid.envs.obstructedmaze import *
+from minigrid.envs.memory import *
+from minigrid.envs.fourrooms import *
+from minigrid.envs.crossing import *
+from minigrid.envs.lavagap import *
+from minigrid.envs.dynamicobstacles import *
+from minigrid.envs.distshift import *
\ No newline at end of file
diff --git a/environments/minigrid/src/minigrid/envs/blockedunlockpickup.py b/environments/minigrid/src/minigrid/envs/blockedunlockpickup.py
new file mode 100644
index 00000000..9d4303d7
--- /dev/null
+++ b/environments/minigrid/src/minigrid/envs/blockedunlockpickup.py
@@ -0,0 +1,47 @@
+from minigrid.minigrid import Ball
+from minigrid.roomgrid import RoomGrid
+
+
+class BlockedUnlockPickup(RoomGrid):
+    """
+    Unlock a door blocked by a ball, then pick up a box
+    in another room
+    """
+
+    def __init__(self, seed=None):
+        room_size = 6
+        super().__init__(
+            num_rows=1,
+            num_cols=2,
+            room_size=room_size,
+            max_steps=16*room_size**2,
+            seed=seed
+        )
+
+    def _gen_grid(self, width, height):
+        super()._gen_grid(width, height)
+
+        # Add a box to the room on the right
+        obj, _ = self.add_object(1, 0, kind="box")
+        # Make sure the two rooms are directly connected by a locked door
+        door, pos = self.add_door(0, 0, 0, locked=True)
+        # Block the door with a ball
+        color = self._rand_color()
+        self.grid.set(pos[0]-1, pos[1], Ball(color))
+        # Add a key to unlock the door
+        self.add_object(0, 0, 'key', door.color)
+
+        self.place_agent(0, 0)
+
+        self.obj = obj
+        self.mission = "pick up the %s %s" % (obj.color, obj.type)
+
+    def step(self, action):
+        obs, reward, done, info = super().step(action)
+
+        if action == self.actions.pickup:
+            if self.carrying and self.carrying == self.obj:
+                reward = self._reward()
+                done = True
+
+        return obs, reward, done, info
diff --git a/environments/minigrid/src/minigrid/envs/crossing.py b/environments/minigrid/src/minigrid/envs/crossing.py
new file mode 100644
index 00000000..2c319186
--- /dev/null
+++ b/environments/minigrid/src/minigrid/envs/crossing.py
@@ -0,0 +1,121 @@
+from minigrid.minigrid import *
+import itertools as itt
+
+
+class CrossingEnv(MiniGridEnv):
+    """
+    Environment with wall or lava obstacles, sparse reward.
+    """
+
+    def __init__(self, size=9, num_crossings=1, obstacle_type=Lava, seed=None):
+        self.num_crossings = num_crossings
+        self.obstacle_type = obstacle_type
+        super().__init__(
+            grid_size=size,
+            max_steps=4*size*size,
+            # Set this to True for maximum speed
+            see_through_walls=False,
+            seed=None
+        )
+
+    def _gen_grid(self, width, height):
+        assert width % 2 == 1 and height % 2 == 1  # odd size
+
+        # Create an empty grid
+        self.grid = Grid(width, height)
+
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+
+        # Place the agent in the top-left corner
+        self.agent_pos = (1, 1)
+        self.agent_dir = 0
+
+        # Place a goal square in the bottom-right corner
+        self.put_obj(Goal(), width - 2, height - 2)
+
+        # Place obstacles (lava or walls)
+        v, h = object(), object()  # singleton `vertical` and `horizontal` objects
+
+        # Lava rivers or walls specified by direction and position in grid
+        rivers = [(v, i) for i in range(2, height - 2, 2)]
+        rivers += [(h, j) for j in range(2, width - 2, 2)]
+        self.np_random.shuffle(rivers)
+        rivers = rivers[:self.num_crossings]  # sample random rivers
+        rivers_v = sorted([pos for direction, pos in rivers if direction is v])
+        rivers_h = sorted([pos for direction, pos in rivers if direction is h])
+        obstacle_pos = itt.chain(
+            itt.product(range(1, width - 1), rivers_h),
+            itt.product(rivers_v, range(1, height - 1)),
+        )
+        for i, j in obstacle_pos:
+            self.put_obj(self.obstacle_type(), i, j)
+
+        # Sample path to goal
+        path = [h] * len(rivers_v) + [v] * len(rivers_h)
+        self.np_random.shuffle(path)
+
+        # Create openings
+        limits_v = [0] + rivers_v + [height - 1]
+        limits_h = [0] + rivers_h + [width - 1]
+        room_i, room_j = 0, 0
+        for direction in path:
+            if direction is h:
+                i = limits_v[room_i + 1]
+                j = self.np_random.choice(
+                    range(limits_h[room_j] + 1, limits_h[room_j + 1]))
+                room_i += 1
+            elif direction is v:
+                i = self.np_random.choice(
+                    range(limits_v[room_i] + 1, limits_v[room_i + 1]))
+                j = limits_h[room_j + 1]
+                room_j += 1
+            else:
+                assert False
+            self.grid.set(i, j, None)
+
+        self.mission = (
+            "avoid the lava and get to the green goal square"
+            if self.obstacle_type == Lava
+            else "find the opening and get to the green goal square"
+        )
+
+
+class LavaCrossingEnv(CrossingEnv):
+    def __init__(self):
+        super().__init__(size=9, num_crossings=1)
+
+
+class LavaCrossingS9N2Env(CrossingEnv):
+    def __init__(self):
+        super().__init__(size=9, num_crossings=2)
+
+
+class LavaCrossingS9N3Env(CrossingEnv):
+    def __init__(self):
+        super().__init__(size=9, num_crossings=3)
+
+
+class LavaCrossingS11N5Env(CrossingEnv):
+    def __init__(self):
+        super().__init__(size=11, num_crossings=5)
+
+
+class SimpleCrossingEnv(CrossingEnv):
+    def __init__(self):
+        super().__init__(size=9, num_crossings=1, obstacle_type=Wall)
+
+
+class SimpleCrossingS9N2Env(CrossingEnv):
+    def __init__(self):
+        super().__init__(size=9, num_crossings=2, obstacle_type=Wall)
+
+
+class SimpleCrossingS9N3Env(CrossingEnv):
+    def __init__(self):
+        super().__init__(size=9, num_crossings=3, obstacle_type=Wall)
+
+
+class SimpleCrossingS11N5Env(CrossingEnv):
+    def __init__(self):
+        super().__init__(size=11, num_crossings=5, obstacle_type=Wall)
diff --git a/environments/minigrid/src/minigrid/envs/distshift.py b/environments/minigrid/src/minigrid/envs/distshift.py
new file mode 100644
index 00000000..31d7c47d
--- /dev/null
+++ b/environments/minigrid/src/minigrid/envs/distshift.py
@@ -0,0 +1,62 @@
+from minigrid.minigrid import *
+
+
+class DistShiftEnv(MiniGridEnv):
+    """
+    Distributional shift environment.
+    """
+
+    def __init__(
+        self,
+        width=9,
+        height=7,
+        agent_start_pos=(1,1),
+        agent_start_dir=0,
+        strip2_row=2
+    ):
+        self.agent_start_pos = agent_start_pos
+        self.agent_start_dir = agent_start_dir
+        self.goal_pos = (width-2, 1)
+        self.strip2_row = strip2_row
+
+        super().__init__(
+            width=width,
+            height=height,
+            max_steps=4*width*height,
+            # Set this to True for maximum speed
+            see_through_walls=True
+        )
+
+    def _gen_grid(self, width, height):
+        # Create an empty grid
+        self.grid = Grid(width, height)
+
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+
+        # Place a goal square in the bottom-right corner
+        self.put_obj(Goal(), *self.goal_pos)
+
+        # Place the lava rows
+        for i in range(self.width - 6):
+            self.grid.set(3+i, 1, Lava())
+            self.grid.set(3+i, self.strip2_row, Lava())
+
+        # Place the agent
+        if self.agent_start_pos is not None:
+            self.agent_pos = self.agent_start_pos
+            self.agent_dir = self.agent_start_dir
+        else:
+            self.place_agent()
+
+        self.mission = "get to the green goal square"
+
+
+class DistShift1(DistShiftEnv):
+    def __init__(self):
+        super().__init__(strip2_row=2)
+
+
+class DistShift2(DistShiftEnv):
+    def __init__(self):
+        super().__init__(strip2_row=5)
diff --git a/environments/minigrid/src/minigrid/envs/doorkey.py b/environments/minigrid/src/minigrid/envs/doorkey.py
new file mode 100644
index 00000000..f333de2f
--- /dev/null
+++ b/environments/minigrid/src/minigrid/envs/doorkey.py
@@ -0,0 +1,59 @@
+from minigrid.minigrid import *
+
+
+class DoorKeyEnv(MiniGridEnv):
+    """
+    Environment with a door and key, sparse reward
+    """
+
+    def __init__(self, size=8):
+        super().__init__(
+            grid_size=size,
+            max_steps=10*size*size
+        )
+
+    def _gen_grid(self, width, height):
+        # Create an empty grid
+        self.grid = Grid(width, height)
+
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+
+        # Place a goal in the bottom-right corner
+        self.put_obj(Goal(), width - 2, height - 2)
+
+        # Create a vertical splitting wall
+        splitIdx = self._rand_int(2, width-2)
+        self.grid.vert_wall(splitIdx, 0)
+
+        # Place the agent at a random position and orientation
+        # on the left side of the splitting wall
+        self.place_agent(size=(splitIdx, height))
+
+        # Place a door in the wall
+        doorIdx = self._rand_int(1, width-2)
+        self.put_obj(Door('yellow', is_locked=True), splitIdx, doorIdx)
+
+        # Place a yellow key on the left side
+        self.place_obj(
+            obj=Key('yellow'),
+            top=(0, 0),
+            size=(splitIdx, height)
+        )
+
+        self.mission = "use the key to open the door and then get to the goal"
+
+
+class DoorKeyEnv5x5(DoorKeyEnv):
+    def __init__(self):
+        super().__init__(size=5)
+
+
+class DoorKeyEnv6x6(DoorKeyEnv):
+    def __init__(self):
+        super().__init__(size=6)
+
+
+class DoorKeyEnv16x16(DoorKeyEnv):
+    def __init__(self):
+        super().__init__(size=16)
diff --git a/environments/minigrid/src/minigrid/envs/dynamicobstacles.py b/environments/minigrid/src/minigrid/envs/dynamicobstacles.py
new file mode 100644
index 00000000..429632f7
--- /dev/null
+++ b/environments/minigrid/src/minigrid/envs/dynamicobstacles.py
@@ -0,0 +1,114 @@
+from minigrid.minigrid import *
+from operator import add
+
+
+class DynamicObstaclesEnv(MiniGridEnv):
+    """
+    Single-room square grid environment with moving obstacles
+    """
+
+    def __init__(
+            self,
+            size=8,
+            agent_start_pos=(1, 1),
+            agent_start_dir=0,
+            n_obstacles=4
+    ):
+        self.agent_start_pos = agent_start_pos
+        self.agent_start_dir = agent_start_dir
+
+        # Reduce obstacles if there are too many
+        if n_obstacles <= size/2 + 1:
+            self.n_obstacles = int(n_obstacles)
+        else:
+            self.n_obstacles = int(size/2)
+        super().__init__(
+            grid_size=size,
+            max_steps=4 * size * size,
+            # Set this to True for maximum speed
+            see_through_walls=True,
+        )
+        # Allow only 3 actions permitted: left, right, forward
+        self.action_space = spaces.Discrete(self.actions.forward + 1)
+        self.reward_range = (-1, 1)
+
+    def _gen_grid(self, width, height):
+        # Create an empty grid
+        self.grid = Grid(width, height)
+
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+
+        # Place a goal square in the bottom-right corner
+        self.grid.set(width - 2, height - 2, Goal())
+
+        # Place the agent
+        if self.agent_start_pos is not None:
+            self.agent_pos = self.agent_start_pos
+            self.agent_dir = self.agent_start_dir
+        else:
+            self.place_agent()
+
+        # Place obstacles
+        self.obstacles = []
+        for i_obst in range(self.n_obstacles):
+            self.obstacles.append(Ball())
+            self.place_obj(self.obstacles[i_obst], max_tries=100)
+
+        self.mission = "get to the green goal square"
+
+    def step(self, action):
+        # Invalid action
+        if action >= self.action_space.n:
+            action = 0
+
+        # Check if there is an obstacle in front of the agent
+        front_cell = self.grid.get(*self.front_pos)
+        not_clear = front_cell and front_cell.type != 'goal'
+
+        # Update obstacle positions
+        for i_obst in range(len(self.obstacles)):
+            old_pos = self.obstacles[i_obst].cur_pos
+            top = tuple(map(add, old_pos, (-1, -1)))
+
+            try:
+                self.place_obj(self.obstacles[i_obst], top=top, size=(3,3), max_tries=100)
+                self.grid.set(*old_pos, None)
+            except:
+                pass
+
+        # Update the agent's position/direction
+        obs, reward, done, info = MiniGridEnv.step(self, action)
+
+        # If the agent tried to walk over an obstacle or wall
+        if action == self.actions.forward and not_clear:
+            reward = -1
+            done = True
+            return obs, reward, done, info
+
+        return obs, reward, done, info
+
+
+class DynamicObstaclesEnv5x5(DynamicObstaclesEnv):
+    def __init__(self):
+        super().__init__(size=5, n_obstacles=2)
+
+
+class DynamicObstaclesRandomEnv5x5(DynamicObstaclesEnv):
+    def __init__(self):
+        super().__init__(size=5, agent_start_pos=None, n_obstacles=2)
+
+
+class DynamicObstaclesEnv6x6(DynamicObstaclesEnv):
+    def __init__(self):
+        super().__init__(size=6, n_obstacles=3)
+
+
+class DynamicObstaclesRandomEnv6x6(DynamicObstaclesEnv):
+    def __init__(self):
+        super().__init__(size=6, agent_start_pos=None, n_obstacles=3)
+
+
+class DynamicObstaclesEnv16x16(DynamicObstaclesEnv):
+    def __init__(self):
+        super().__init__(size=16, n_obstacles=8)
diff --git a/environments/minigrid/src/minigrid/envs/empty.py b/environments/minigrid/src/minigrid/envs/empty.py
new file mode 100644
index 00000000..33307fab
--- /dev/null
+++ b/environments/minigrid/src/minigrid/envs/empty.py
@@ -0,0 +1,68 @@
+from minigrid.minigrid import *
+
+
+class EmptyEnv(MiniGridEnv):
+    """
+    Empty grid environment, no obstacles, sparse reward
+    """
+
+    def __init__(
+        self,
+        size=8,
+        agent_start_pos=(1,1),
+        agent_start_dir=0,
+    ):
+        self.agent_start_pos = agent_start_pos
+        self.agent_start_dir = agent_start_dir
+
+        super().__init__(
+            grid_size=size,
+            max_steps=4*size*size,
+            # Set this to True for maximum speed
+            see_through_walls=True
+        )
+
+    def _gen_grid(self, width, height):
+        # Create an empty grid
+        self.grid = Grid(width, height)
+
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+
+        # Place a goal square in the bottom-right corner
+        self.put_obj(Goal(), width - 2, height - 2)
+
+        # Place the agent
+        if self.agent_start_pos is not None:
+            self.agent_pos = self.agent_start_pos
+            self.agent_dir = self.agent_start_dir
+        else:
+            self.place_agent()
+
+        self.mission = "get to the green goal square"
+
+
+class EmptyEnv5x5(EmptyEnv):
+    def __init__(self, **kwargs):
+        super().__init__(size=5, **kwargs)
+
+
+class EmptyRandomEnv5x5(EmptyEnv):
+    def __init__(self):
+        super().__init__(size=5, agent_start_pos=None)
+
+
+class EmptyEnv6x6(EmptyEnv):
+    def __init__(self, **kwargs):
+        super().__init__(size=6, **kwargs)
+
+
+class EmptyRandomEnv6x6(EmptyEnv):
+    def __init__(self):
+        super().__init__(size=6, agent_start_pos=None)
+
+
+class EmptyEnv16x16(EmptyEnv):
+    def __init__(self, **kwargs):
+        super().__init__(size=16, **kwargs)
+
diff --git a/environments/minigrid/src/minigrid/envs/fetch.py b/environments/minigrid/src/minigrid/envs/fetch.py
new file mode 100644
index 00000000..d87490d5
--- /dev/null
+++ b/environments/minigrid/src/minigrid/envs/fetch.py
@@ -0,0 +1,96 @@
+from minigrid.minigrid import *
+
+
+class FetchEnv(MiniGridEnv):
+    """
+    Environment in which the agent has to fetch a random object
+    named using English text strings
+    """
+
+    def __init__(
+        self,
+        size=8,
+        numObjs=3
+    ):
+        self.numObjs = numObjs
+
+        super().__init__(
+            grid_size=size,
+            max_steps=5*size**2,
+            # Set this to True for maximum speed
+            see_through_walls=True
+        )
+
+    def _gen_grid(self, width, height):
+        self.grid = Grid(width, height)
+
+        # Generate the surrounding walls
+        self.grid.horz_wall(0, 0)
+        self.grid.horz_wall(0, height-1)
+        self.grid.vert_wall(0, 0)
+        self.grid.vert_wall(width-1, 0)
+
+        types = ['key', 'ball']
+
+        objs = []
+
+        # For each object to be generated
+        while len(objs) < self.numObjs:
+            objType = self._rand_elem(types)
+            objColor = self._rand_elem(COLOR_NAMES)
+
+            if objType == 'key':
+                obj = Key(objColor)
+            elif objType == 'ball':
+                obj = Ball(objColor)
+
+            self.place_obj(obj)
+            objs.append(obj)
+
+        # Randomize the player start position and orientation
+        self.place_agent()
+
+        # Choose a random object to be picked up
+        target = objs[self._rand_int(0, len(objs))]
+        self.targetType = target.type
+        self.targetColor = target.color
+
+        descStr = '%s %s' % (self.targetColor, self.targetType)
+
+        # Generate the mission string
+        idx = self._rand_int(0, 5)
+        if idx == 0:
+            self.mission = 'get a %s' % descStr
+        elif idx == 1:
+            self.mission = 'go get a %s' % descStr
+        elif idx == 2:
+            self.mission = 'fetch a %s' % descStr
+        elif idx == 3:
+            self.mission = 'go fetch a %s' % descStr
+        elif idx == 4:
+            self.mission = 'you must fetch a %s' % descStr
+        assert hasattr(self, 'mission')
+
+    def step(self, action):
+        obs, reward, done, info = MiniGridEnv.step(self, action)
+
+        if self.carrying:
+            if self.carrying.color == self.targetColor and \
+               self.carrying.type == self.targetType:
+                reward = self._reward()
+                done = True
+            else:
+                reward = 0
+                done = True
+
+        return obs, reward, done, info
+
+
+class FetchEnv5x5N2(FetchEnv):
+    def __init__(self):
+        super().__init__(size=5, numObjs=2)
+
+
+class FetchEnv6x6N2(FetchEnv):
+    def __init__(self):
+        super().__init__(size=6, numObjs=2)
diff --git a/environments/minigrid/src/minigrid/envs/fourrooms.py b/environments/minigrid/src/minigrid/envs/fourrooms.py
new file mode 100644
index 00000000..628c3d82
--- /dev/null
+++ b/environments/minigrid/src/minigrid/envs/fourrooms.py
@@ -0,0 +1,69 @@
+from minigrid.minigrid import *
+
+
+class FourRoomsEnv(MiniGridEnv):
+    """
+    Classic 4 rooms gridworld environment.
+    Can specify agent and goal position, if not it set at random.
+    """
+
+    def __init__(self, agent_pos=None, goal_pos=None):
+        self._agent_default_pos = agent_pos
+        self._goal_default_pos = goal_pos
+        super().__init__(grid_size=19, max_steps=100)
+
+    def _gen_grid(self, width, height):
+        # Create the grid
+        self.grid = Grid(width, height)
+
+        # Generate the surrounding walls
+        self.grid.horz_wall(0, 0)
+        self.grid.horz_wall(0, height - 1)
+        self.grid.vert_wall(0, 0)
+        self.grid.vert_wall(width - 1, 0)
+
+        room_w = width // 2
+        room_h = height // 2
+
+        # For each row of rooms
+        for j in range(0, 2):
+
+            # For each column
+            for i in range(0, 2):
+                xL = i * room_w
+                yT = j * room_h
+                xR = xL + room_w
+                yB = yT + room_h
+
+                # Bottom wall and door
+                if i + 1 < 2:
+                    self.grid.vert_wall(xR, yT, room_h)
+                    pos = (xR, self._rand_int(yT + 1, yB))
+                    self.grid.set(*pos, None)
+
+                # Bottom wall and door
+                if j + 1 < 2:
+                    self.grid.horz_wall(xL, yB, room_w)
+                    pos = (self._rand_int(xL + 1, xR), yB)
+                    self.grid.set(*pos, None)
+
+        # Randomize the player start position and orientation
+        if self._agent_default_pos is not None:
+            self.agent_pos = self._agent_default_pos
+            self.grid.set(*self._agent_default_pos, None)
+            self.agent_dir = self._rand_int(0, 4)  # assuming random start direction
+        else:
+            self.place_agent()
+
+        if self._goal_default_pos is not None:
+            goal = Goal()
+            self.put_obj(goal, *self._goal_default_pos)
+            goal.init_pos, goal.cur_pos = self._goal_default_pos
+        else:
+            self.place_obj(Goal())
+
+        self.mission = 'Reach the goal'
+
+    def step(self, action):
+        obs, reward, done, info = MiniGridEnv.step(self, action)
+        return obs, reward, done, info
diff --git a/environments/minigrid/src/minigrid/envs/gotodoor.py b/environments/minigrid/src/minigrid/envs/gotodoor.py
new file mode 100644
index 00000000..0247e33b
--- /dev/null
+++ b/environments/minigrid/src/minigrid/envs/gotodoor.py
@@ -0,0 +1,91 @@
+from minigrid.minigrid import *
+
+
+class GoToDoorEnv(MiniGridEnv):
+    """
+    Environment in which the agent is instructed to go to a given object
+    named using an English text string
+    """
+
+    def __init__(
+        self,
+        size=5
+    ):
+        assert size >= 5
+
+        super().__init__(
+            grid_size=size,
+            max_steps=5*size**2,
+            # Set this to True for maximum speed
+            see_through_walls=True
+        )
+
+    def _gen_grid(self, width, height):
+        # Create the grid
+        self.grid = Grid(width, height)
+
+        # Randomly vary the room width and height
+        width = self._rand_int(5, width+1)
+        height = self._rand_int(5, height+1)
+
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+
+        # Generate the 4 doors at random positions
+        doorPos = []
+        doorPos.append((self._rand_int(2, width-2), 0))
+        doorPos.append((self._rand_int(2, width-2), height-1))
+        doorPos.append((0, self._rand_int(2, height-2)))
+        doorPos.append((width-1, self._rand_int(2, height-2)))
+
+        # Generate the door colors
+        doorColors = []
+        while len(doorColors) < len(doorPos):
+            color = self._rand_elem(COLOR_NAMES)
+            if color in doorColors:
+                continue
+            doorColors.append(color)
+
+        # Place the doors in the grid
+        for idx, pos in enumerate(doorPos):
+            color = doorColors[idx]
+            self.grid.set(*pos, Door(color))
+
+        # Randomize the agent start position and orientation
+        self.place_agent(size=(width, height))
+
+        # Select a random target door
+        doorIdx = self._rand_int(0, len(doorPos))
+        self.target_pos = doorPos[doorIdx]
+        self.target_color = doorColors[doorIdx]
+
+        # Generate the mission string
+        self.mission = 'go to the %s door' % self.target_color
+
+    def step(self, action):
+        obs, reward, done, info = super().step(action)
+
+        ax, ay = self.agent_pos
+        tx, ty = self.target_pos
+
+        # Don't let the agent open any of the doors
+        if action == self.actions.toggle:
+            done = True
+
+        # Reward performing done action in front of the target door
+        if action == self.actions.done:
+            if (ax == tx and abs(ay - ty) == 1) or (ay == ty and abs(ax - tx) == 1):
+                reward = self._reward()
+            done = True
+
+        return obs, reward, done, info
+
+
+class GoToDoor8x8Env(GoToDoorEnv):
+    def __init__(self):
+        super().__init__(size=8)
+
+
+class GoToDoor6x6Env(GoToDoorEnv):
+    def __init__(self):
+        super().__init__(size=6)
diff --git a/environments/minigrid/src/minigrid/envs/gotoobject.py b/environments/minigrid/src/minigrid/envs/gotoobject.py
new file mode 100644
index 00000000..33701ab3
--- /dev/null
+++ b/environments/minigrid/src/minigrid/envs/gotoobject.py
@@ -0,0 +1,89 @@
+from minigrid.minigrid import *
+
+
+class GoToObjectEnv(MiniGridEnv):
+    """
+    Environment in which the agent is instructed to go to a given object
+    named using an English text string
+    """
+
+    def __init__(
+        self,
+        size=6,
+        numObjs=2
+    ):
+        self.numObjs = numObjs
+
+        super().__init__(
+            grid_size=size,
+            max_steps=5*size**2,
+            # Set this to True for maximum speed
+            see_through_walls=True
+        )
+
+    def _gen_grid(self, width, height):
+        self.grid = Grid(width, height)
+
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+
+        # Types and colors of objects we can generate
+        types = ['key', 'ball', 'box']
+
+        objs = []
+        objPos = []
+
+        # Until we have generated all the objects
+        while len(objs) < self.numObjs:
+            objType = self._rand_elem(types)
+            objColor = self._rand_elem(COLOR_NAMES)
+
+            # If this object already exists, try again
+            if (objType, objColor) in objs:
+                continue
+
+            if objType == 'key':
+                obj = Key(objColor)
+            elif objType == 'ball':
+                obj = Ball(objColor)
+            elif objType == 'box':
+                obj = Box(objColor)
+
+            pos = self.place_obj(obj)
+            objs.append((objType, objColor))
+            objPos.append(pos)
+
+        # Randomize the agent start position and orientation
+        self.place_agent()
+
+        # Choose a random object to be picked up
+        objIdx = self._rand_int(0, len(objs))
+        self.targetType, self.target_color = objs[objIdx]
+        self.target_pos = objPos[objIdx]
+
+        descStr = '%s %s' % (self.target_color, self.targetType)
+        self.mission = 'go to the %s' % descStr
+        #print(self.mission)
+
+    def step(self, action):
+        obs, reward, done, info = MiniGridEnv.step(self, action)
+
+        ax, ay = self.agent_pos
+        tx, ty = self.target_pos
+
+        # Toggle/pickup action terminates the episode
+        if action == self.actions.toggle:
+            done = True
+
+        # Reward performing the done action next to the target object
+        if action == self.actions.done:
+            if abs(ax - tx) <= 1 and abs(ay - ty) <= 1:
+                reward = self._reward()
+            done = True
+
+        return obs, reward, done, info
+
+
+class GotoEnv8x8N2(GoToObjectEnv):
+    def __init__(self):
+        super().__init__(size=8, numObjs=2)
diff --git a/environments/minigrid/src/minigrid/envs/keycorridor.py b/environments/minigrid/src/minigrid/envs/keycorridor.py
new file mode 100644
index 00000000..0c417600
--- /dev/null
+++ b/environments/minigrid/src/minigrid/envs/keycorridor.py
@@ -0,0 +1,113 @@
+from minigrid.roomgrid import RoomGrid
+
+
+class KeyCorridor(RoomGrid):
+    """
+    A ball is behind a locked door, the key is placed in a
+    random room.
+    """
+
+    def __init__(
+        self,
+        num_rows=3,
+        obj_type="ball",
+        room_size=6,
+        seed=None
+    ):
+        self.obj_type = obj_type
+
+        super().__init__(
+            room_size=room_size,
+            num_rows=num_rows,
+            max_steps=30*room_size**2,
+            seed=seed,
+        )
+
+    def _gen_grid(self, width, height):
+        super()._gen_grid(width, height)
+
+        # Connect the middle column rooms into a hallway
+        for j in range(1, self.num_rows):
+            self.remove_wall(1, j, 3)
+
+        # Add a locked door on the bottom right
+        # Add an object behind the locked door
+        room_idx = self._rand_int(0, self.num_rows)
+        door, _ = self.add_door(2, room_idx, 2, locked=True)
+        obj, _ = self.add_object(2, room_idx, kind=self.obj_type)
+
+        # Add a key in a random room on the left side
+        self.add_object(0, self._rand_int(0, self.num_rows), 'key', door.color)
+
+        # Place the agent in the middle
+        self.place_agent(1, self.num_rows // 2)
+
+        # Make sure all rooms are accessible
+        self.connect_all()
+
+        self.obj = obj
+        self.mission = "pick up the %s %s" % (obj.color, obj.type)
+
+    def step(self, action):
+        obs, reward, done, info = super().step(action)
+
+        if action == self.actions.pickup:
+            if self.carrying and self.carrying == self.obj:
+                reward = self._reward()
+                done = True
+
+        return obs, reward, done, info
+
+
+class KeyCorridorS3R1(KeyCorridor):
+    def __init__(self, seed=None):
+        super().__init__(
+            room_size=3,
+            num_rows=1,
+            seed=seed
+        )
+
+
+class KeyCorridorS3R2(KeyCorridor):
+    def __init__(self, seed=None):
+        super().__init__(
+            room_size=3,
+            num_rows=2,
+            seed=seed
+        )
+
+
+class KeyCorridorS3R3(KeyCorridor):
+    def __init__(self, seed=None):
+        super().__init__(
+            room_size=3,
+            num_rows=3,
+            seed=seed
+        )
+
+
+class KeyCorridorS4R3(KeyCorridor):
+    def __init__(self, seed=None):
+        super().__init__(
+            room_size=4,
+            num_rows=3,
+            seed=seed
+        )
+
+
+class KeyCorridorS5R3(KeyCorridor):
+    def __init__(self, seed=None):
+        super().__init__(
+            room_size=5,
+            num_rows=3,
+            seed=seed
+        )
+
+
+class KeyCorridorS6R3(KeyCorridor):
+    def __init__(self, seed=None):
+        super().__init__(
+            room_size=6,
+            num_rows=3,
+            seed=seed
+        )
diff --git a/environments/minigrid/src/minigrid/envs/lavagap.py b/environments/minigrid/src/minigrid/envs/lavagap.py
new file mode 100644
index 00000000..26565b84
--- /dev/null
+++ b/environments/minigrid/src/minigrid/envs/lavagap.py
@@ -0,0 +1,68 @@
+from minigrid.minigrid import *
+
+
+class LavaGapEnv(MiniGridEnv):
+    """
+    Environment with one wall of lava with a small gap to cross through
+    This environment is similar to LavaCrossing but simpler in structure.
+    """
+
+    def __init__(self, size, obstacle_type=Lava, seed=None):
+        self.obstacle_type = obstacle_type
+        super().__init__(
+            grid_size=size,
+            max_steps=4*size*size,
+            # Set this to True for maximum speed
+            see_through_walls=False,
+            seed=None
+        )
+
+    def _gen_grid(self, width, height):
+        assert width >= 5 and height >= 5
+
+        # Create an empty grid
+        self.grid = Grid(width, height)
+
+        # Generate the surrounding walls
+        self.grid.wall_rect(0, 0, width, height)
+
+        # Place the agent in the top-left corner
+        self.agent_pos = (1, 1)
+        self.agent_dir = 0
+
+        # Place a goal square in the bottom-right corner
+        self.goal_pos = np.array((width - 2, height - 2))
+        self.put_obj(Goal(), *self.goal_pos)
+
+        # Generate and store random gap position
+        self.gap_pos = np.array((
+            self._rand_int(2, width - 2),
+            self._rand_int(1, height - 1),
+        ))
+
+        # Place the obstacle wall
+        self.grid.vert_wall(self.gap_pos[0], 1, height - 2, self.obstacle_type)
+
+        # Put a hole in the wall
+        self.grid.set(*self.gap_pos, None)
+
+        self.mission = (
+            "avoid the lava and get to the green goal square"
+            if self.obstacle_type == Lava
+            else "find the opening and get to the green goal square"
+        )
+
+
+class LavaGapS5Env(LavaGapEnv):
+    def __init__(self):
+        super().__init__(size=5)
+
+
+class LavaGapS6Env(LavaGapEnv):
+    def __init__(self):
+        super().__init__(size=6)
+
+
+class LavaGapS7Env(LavaGapEnv):
+    def __init__(self):
+        super().__init__(size=7)
diff --git a/environments/minigrid/src/minigrid/envs/lockedroom.py b/environments/minigrid/src/minigrid/envs/lockedroom.py
new file mode 100644
index 00000000..535665b3
--- /dev/null
+++ b/environments/minigrid/src/minigrid/envs/lockedroom.py
@@ -0,0 +1,119 @@
+from gym_minigrid.minigrid import *
+
+
+class Room:
+    def __init__(self,
+        top,
+        size,
+        doorPos
+    ):
+        self.top = top
+        self.size = size
+        self.doorPos = doorPos
+        self.color = None
+        self.locked = False
+
+    def rand_pos(self, env):
+        topX, topY = self.top
+        sizeX, sizeY = self.size
+        return env._rand_pos(
+            topX + 1, topX + sizeX - 1,
+            topY + 1, topY + sizeY - 1
+        )
+
+
+class LockedRoom(MiniGridEnv):
+    """
+    Environment in which the agent is instructed to go to a given object
+    named using an English text string
+    """
+
+    def __init__(
+        self,
+        size=19
+    ):
+        super().__init__(grid_size=size, max_steps=10*size)
+
+    def _gen_grid(self, width, height):
+        # Create the grid
+        self.grid = Grid(width, height)
+
+        # Generate the surrounding walls
+        for i in range(0, width):
+            self.grid.set(i, 0, Wall())
+            self.grid.set(i, height-1, Wall())
+        for j in range(0, height):
+            self.grid.set(0, j, Wall())
+            self.grid.set(width-1, j, Wall())
+
+        # Hallway walls
+        lWallIdx = width // 2 - 2
+        rWallIdx = width // 2 + 2
+        for j in range(0, height):
+            self.grid.set(lWallIdx, j, Wall())
+            self.grid.set(rWallIdx, j, Wall())
+
+        self.rooms = []
+
+        # Room splitting walls
+        for n in range(0, 3):
+            j = n * (height // 3)
+            for i in range(0, lWallIdx):
+                self.grid.set(i, j, Wall())
+            for i in range(rWallIdx, width):
+                self.grid.set(i, j, Wall())
+
+            roomW = lWallIdx + 1
+            roomH = height // 3 + 1
+            self.rooms.append(Room(
+                (0, j),
+                (roomW, roomH),
+                (lWallIdx, j + 3)
+            ))
+            self.rooms.append(Room(
+                (rWallIdx, j),
+                (roomW, roomH),
+                (rWallIdx, j + 3)
+            ))
+
+        # Choose one random room to be locked
+        lockedRoom = self._rand_elem(self.rooms)
+        lockedRoom.locked = True
+        goalPos = lockedRoom.rand_pos(self)
+        self.grid.set(*goalPos, Goal())
+
+        # Assign the door colors
+        colors = set(COLOR_NAMES)
+        for room in self.rooms:
+            color = self._rand_elem(sorted(colors))
+            colors.remove(color)
+            room.color = color
+            if room.locked:
+                self.grid.set(*room.doorPos, Door(color, is_locked=True))
+            else:
+                self.grid.set(*room.doorPos, Door(color))
+
+        # Select a random room to contain the key
+        while True:
+            keyRoom = self._rand_elem(self.rooms)
+            if keyRoom != lockedRoom:
+                break
+        keyPos = keyRoom.rand_pos(self)
+        self.grid.set(*keyPos, Key(lockedRoom.color))
+
+        # Randomize the player start position and orientation
+        self.agent_pos = self.place_agent(
+            top=(lWallIdx, 0),
+            size=(rWallIdx-lWallIdx, height)
+        )
+
+        # Generate the mission string
+        self.mission = (
+            'get the %s key from the %s room, '
+            'unlock the %s door and '
+            'go to the goal'
+        ) % (lockedRoom.color, keyRoom.color, lockedRoom.color)
+
+    def step(self, action):
+        obs, reward, done, info = MiniGridEnv.step(self, action)
+        return obs, reward, done, info
diff --git a/environments/minigrid/src/minigrid/envs/memory.py b/environments/minigrid/src/minigrid/envs/memory.py
new file mode 100644
index 00000000..ee7d3902
--- /dev/null
+++ b/environments/minigrid/src/minigrid/envs/memory.py
@@ -0,0 +1,130 @@
+from minigrid.minigrid import *
+
+
+class MemoryEnv(MiniGridEnv):
+    """
+    This environment is a memory test. The agent starts in a small room
+    where it sees an object. It then has to go through a narrow hallway
+    which ends in a split. At each end of the split there is an object,
+    one of which is the same as the object in the starting room. The
+    agent has to remember the initial object, and go to the matching
+    object at split.
+    """
+
+    def __init__(
+        self,
+        seed,
+        size=8,
+        random_length=False,
+    ):
+        self.random_length = random_length
+        super().__init__(
+            seed=seed,
+            grid_size=size,
+            max_steps=5*size**2,
+            # Set this to True for maximum speed
+            see_through_walls=False,
+        )
+
+    def _gen_grid(self, width, height):
+        self.grid = Grid(width, height)
+
+        # Generate the surrounding walls
+        self.grid.horz_wall(0, 0)
+        self.grid.horz_wall(0, height-1)
+        self.grid.vert_wall(0, 0)
+        self.grid.vert_wall(width - 1, 0)
+
+        assert height % 2 == 1
+        upper_room_wall = height // 2 - 2
+        lower_room_wall = height // 2 + 2
+        if self.random_length:
+            hallway_end = self._rand_int(4, width - 2)
+        else:
+            hallway_end = width - 3
+
+        # Start room
+        for i in range(1, 5):
+            self.grid.set(i, upper_room_wall, Wall())
+            self.grid.set(i, lower_room_wall, Wall())
+        self.grid.set(4, upper_room_wall + 1, Wall())
+        self.grid.set(4, lower_room_wall - 1, Wall())
+
+        # Horizontal hallway
+        for i in range(5, hallway_end):
+            self.grid.set(i, upper_room_wall + 1, Wall())
+            self.grid.set(i, lower_room_wall - 1, Wall())
+
+        # Vertical hallway
+        for j in range(0, height):
+            if j != height // 2:
+                self.grid.set(hallway_end, j, Wall())
+            self.grid.set(hallway_end + 2, j, Wall())
+
+        # Fix the player's start position and orientation
+        self.agent_pos = (self._rand_int(1, hallway_end + 1), height // 2)
+        self.agent_dir = 0
+
+        # Place objects
+        start_room_obj = self._rand_elem([Key, Ball])
+        self.grid.set(1, height // 2 - 1, start_room_obj('green'))
+
+        other_objs = self._rand_elem([[Ball, Key], [Key, Ball]])
+        pos0 = (hallway_end + 1, height // 2 - 2)
+        pos1 = (hallway_end + 1, height // 2 + 2)
+        self.grid.set(*pos0, other_objs[0]('green'))
+        self.grid.set(*pos1, other_objs[1]('green'))
+
+        # Choose the target objects
+        if start_room_obj == other_objs[0]:
+            self.success_pos = (pos0[0], pos0[1] + 1)
+            self.failure_pos = (pos1[0], pos1[1] - 1)
+        else:
+            self.success_pos = (pos1[0], pos1[1] - 1)
+            self.failure_pos = (pos0[0], pos0[1] + 1)
+
+        self.mission = 'go to the matching object at the end of the hallway'
+
+    def step(self, action):
+        if action == MiniGridEnv.Actions.pickup:
+            action = MiniGridEnv.Actions.toggle
+        obs, reward, done, info = MiniGridEnv.step(self, action)
+
+        if tuple(self.agent_pos) == self.success_pos:
+            reward = self._reward()
+            done = True
+        if tuple(self.agent_pos) == self.failure_pos:
+            reward = 0
+            done = True
+
+        return obs, reward, done, info
+
+
+class MemoryS17Random(MemoryEnv):
+    def __init__(self, seed=None):
+        super().__init__(seed=seed, size=17, random_length=True)
+
+
+class MemoryS13Random(MemoryEnv):
+    def __init__(self, seed=None):
+        super().__init__(seed=seed, size=13, random_length=True)
+
+
+class MemoryS13(MemoryEnv):
+    def __init__(self, seed=None):
+        super().__init__(seed=seed, size=13)
+
+
+class MemoryS11(MemoryEnv):
+    def __init__(self, seed=None):
+        super().__init__(seed=seed, size=11)
+
+
+class MemoryS9(MemoryEnv):
+    def __init__(self, seed=None):
+        super().__init__(seed=seed, size=9)
+
+
+class MemoryS7(MemoryEnv):
+    def __init__(self, seed=None):
+        super().__init__(seed=seed, size=7)
diff --git a/environments/minigrid/src/minigrid/envs/multiroom.py b/environments/minigrid/src/minigrid/envs/multiroom.py
new file mode 100644
index 00000000..94f9ce62
--- /dev/null
+++ b/environments/minigrid/src/minigrid/envs/multiroom.py
@@ -0,0 +1,264 @@
+from minigrid.minigrid import *
+
+
+class Room:
+    def __init__(self,
+        top,
+        size,
+        entryDoorPos,
+        exitDoorPos
+    ):
+        self.top = top
+        self.size = size
+        self.entryDoorPos = entryDoorPos
+        self.exitDoorPos = exitDoorPos
+
+
+class MultiRoomEnv(MiniGridEnv):
+    """
+    Environment with multiple rooms (subgoals)
+    """
+
+    def __init__(self,
+        minNumRooms,
+        maxNumRooms,
+        maxRoomSize=10
+    ):
+        assert minNumRooms > 0
+        assert maxNumRooms >= minNumRooms
+        assert maxRoomSize >= 4
+
+        self.minNumRooms = minNumRooms
+        self.maxNumRooms = maxNumRooms
+        self.maxRoomSize = maxRoomSize
+
+        self.rooms = []
+
+        super(MultiRoomEnv, self).__init__(
+            grid_size=25,
+            max_steps=self.maxNumRooms * 20
+        )
+
+    def _gen_grid(self, width, height):
+        roomList = []
+
+        # Choose a random number of rooms to generate
+        numRooms = self._rand_int(self.minNumRooms, self.maxNumRooms+1)
+
+        while len(roomList) < numRooms:
+            curRoomList = []
+
+            entryDoorPos = (
+                self._rand_int(0, width - 2),
+                self._rand_int(0, width - 2)
+            )
+
+            # Recursively place the rooms
+            self._placeRoom(
+                numRooms,
+                roomList=curRoomList,
+                minSz=4,
+                maxSz=self.maxRoomSize,
+                entryDoorWall=2,
+                entryDoorPos=entryDoorPos
+            )
+
+            if len(curRoomList) > len(roomList):
+                roomList = curRoomList
+
+        # Store the list of rooms in this environment
+        assert len(roomList) > 0
+        self.rooms = roomList
+
+        # Create the grid
+        self.grid = Grid(width, height)
+        wall = Wall()
+
+        prevDoorColor = None
+
+        # For each room
+        for idx, room in enumerate(roomList):
+
+            topX, topY = room.top
+            sizeX, sizeY = room.size
+
+            # Draw the top and bottom walls
+            for i in range(0, sizeX):
+                self.grid.set(topX + i, topY, wall)
+                self.grid.set(topX + i, topY + sizeY - 1, wall)
+
+            # Draw the left and right walls
+            for j in range(0, sizeY):
+                self.grid.set(topX, topY + j, wall)
+                self.grid.set(topX + sizeX - 1, topY + j, wall)
+
+            # If this isn't the first room, place the entry door
+            if idx > 0:
+                # Pick a door color different from the previous one
+                doorColors = set(COLOR_NAMES)
+                if prevDoorColor:
+                    doorColors.remove(prevDoorColor)
+                # Note: the use of sorting here guarantees determinism,
+                # This is needed because Python's set is not deterministic
+                doorColor = self._rand_elem(sorted(doorColors))
+
+                entryDoor = Door(doorColor)
+                self.grid.set(*room.entryDoorPos, entryDoor)
+                prevDoorColor = doorColor
+
+                prevRoom = roomList[idx-1]
+                prevRoom.exitDoorPos = room.entryDoorPos
+
+        # Randomize the starting agent position and direction
+        self.place_agent(roomList[0].top, roomList[0].size)
+
+        # Place the final goal in the last room
+        self.goal_pos = self.place_obj(Goal(), roomList[-1].top, roomList[-1].size)
+
+        self.mission = 'traverse the rooms to get to the goal'
+
+    def _placeRoom(
+        self,
+        numLeft,
+        roomList,
+        minSz,
+        maxSz,
+        entryDoorWall,
+        entryDoorPos
+    ):
+        # Choose the room size randomly
+        sizeX = self._rand_int(minSz, maxSz+1)
+        sizeY = self._rand_int(minSz, maxSz+1)
+
+        # The first room will be at the door position
+        if len(roomList) == 0:
+            topX, topY = entryDoorPos
+        # Entry on the right
+        elif entryDoorWall == 0:
+            topX = entryDoorPos[0] - sizeX + 1
+            y = entryDoorPos[1]
+            topY = self._rand_int(y - sizeY + 2, y)
+        # Entry wall on the south
+        elif entryDoorWall == 1:
+            x = entryDoorPos[0]
+            topX = self._rand_int(x - sizeX + 2, x)
+            topY = entryDoorPos[1] - sizeY + 1
+        # Entry wall on the left
+        elif entryDoorWall == 2:
+            topX = entryDoorPos[0]
+            y = entryDoorPos[1]
+            topY = self._rand_int(y - sizeY + 2, y)
+        # Entry wall on the top
+        elif entryDoorWall == 3:
+            x = entryDoorPos[0]
+            topX = self._rand_int(x - sizeX + 2, x)
+            topY = entryDoorPos[1]
+        else:
+            assert False, entryDoorWall
+
+        # If the room is out of the grid, can't place a room here
+        if topX < 0 or topY < 0:
+            return False
+        if topX + sizeX > self.width or topY + sizeY >= self.height:
+            return False
+
+        # If the room intersects with previous rooms, can't place it here
+        for room in roomList[:-1]:
+            nonOverlap = \
+                topX + sizeX < room.top[0] or \
+                room.top[0] + room.size[0] <= topX or \
+                topY + sizeY < room.top[1] or \
+                room.top[1] + room.size[1] <= topY
+
+            if not nonOverlap:
+                return False
+
+        # Add this room to the list
+        roomList.append(Room(
+            (topX, topY),
+            (sizeX, sizeY),
+            entryDoorPos,
+            None
+        ))
+
+        # If this was the last room, stop
+        if numLeft == 1:
+            return True
+
+        # Try placing the next room
+        for i in range(0, 8):
+
+            # Pick which wall to place the out door on
+            wallSet = set((0, 1, 2, 3))
+            wallSet.remove(entryDoorWall)
+            exitDoorWall = self._rand_elem(sorted(wallSet))
+            nextEntryWall = (exitDoorWall + 2) % 4
+
+            # Pick the exit door position
+            # Exit on right wall
+            if exitDoorWall == 0:
+                exitDoorPos = (
+                    topX + sizeX - 1,
+                    topY + self._rand_int(1, sizeY - 1)
+                )
+            # Exit on south wall
+            elif exitDoorWall == 1:
+                exitDoorPos = (
+                    topX + self._rand_int(1, sizeX - 1),
+                    topY + sizeY - 1
+                )
+            # Exit on left wall
+            elif exitDoorWall == 2:
+                exitDoorPos = (
+                    topX,
+                    topY + self._rand_int(1, sizeY - 1)
+                )
+            # Exit on north wall
+            elif exitDoorWall == 3:
+                exitDoorPos = (
+                    topX + self._rand_int(1, sizeX - 1),
+                    topY
+                )
+            else:
+                assert False
+
+            # Recursively create the other rooms
+            success = self._placeRoom(
+                numLeft - 1,
+                roomList=roomList,
+                minSz=minSz,
+                maxSz=maxSz,
+                entryDoorWall=nextEntryWall,
+                entryDoorPos=exitDoorPos
+            )
+
+            if success:
+                break
+
+        return True
+
+
+class MultiRoomEnvN2S4(MultiRoomEnv):
+    def __init__(self):
+        super().__init__(
+            minNumRooms=2,
+            maxNumRooms=2,
+            maxRoomSize=4
+        )
+
+
+class MultiRoomEnvN4S5(MultiRoomEnv):
+    def __init__(self):
+        super().__init__(
+            minNumRooms=4,
+            maxNumRooms=4,
+            maxRoomSize=5
+        )
+
+
+class MultiRoomEnvN6(MultiRoomEnv):
+    def __init__(self):
+        super().__init__(
+            minNumRooms=6,
+            maxNumRooms=6
+        )
diff --git a/environments/minigrid/src/minigrid/envs/obstructedmaze.py b/environments/minigrid/src/minigrid/envs/obstructedmaze.py
new file mode 100644
index 00000000..fec12421
--- /dev/null
+++ b/environments/minigrid/src/minigrid/envs/obstructedmaze.py
@@ -0,0 +1,187 @@
+from minigrid.minigrid import *
+from minigrid.roomgrid import RoomGrid
+
+
+class ObstructedMazeEnv(RoomGrid):
+    """
+    A blue ball is hidden in the maze. Doors may be locked,
+    doors may be obstructed by a ball and keys may be hidden in boxes.
+    """
+
+    def __init__(self,
+                 num_rows,
+                 num_cols,
+                 num_rooms_visited,
+                 seed=None
+                 ):
+        room_size = 6
+        max_steps = 4 * num_rooms_visited * room_size ** 2
+
+        super().__init__(
+            room_size=room_size,
+            num_rows=num_rows,
+            num_cols=num_cols,
+            max_steps=max_steps,
+            seed=seed
+        )
+
+    def _gen_grid(self, width, height):
+        super()._gen_grid(width, height)
+
+        # Define all possible colors for doors
+        self.door_colors = self._rand_subset(COLOR_NAMES, len(COLOR_NAMES))
+        # Define the color of the ball to pick up
+        self.ball_to_find_color = COLOR_NAMES[0]
+        # Define the color of the balls that obstruct doors
+        self.blocking_ball_color = COLOR_NAMES[1]
+        # Define the color of boxes in which keys are hidden
+        self.box_color = COLOR_NAMES[2]
+
+        self.mission = "pick up the %s ball" % self.ball_to_find_color
+
+    def step(self, action):
+        obs, reward, done, info = super().step(action)
+
+        if action == self.actions.pickup:
+            if self.carrying and self.carrying == self.obj:
+                reward = self._reward()
+                done = True
+
+        return obs, reward, done, info
+
+    def add_door(self, i, j, door_idx=0, color=None, locked=False, key_in_box=False, blocked=False):
+        """
+        Add a door. If the door must be locked, it also adds the key.
+        If the key must be hidden, it is put in a box. If the door must
+        be obstructed, it adds a ball in front of the door.
+        """
+
+        door, door_pos = super().add_door(i, j, door_idx, color, locked=locked)
+
+        if blocked:
+            vec = DIR_TO_VEC[door_idx]
+            blocking_ball = Ball(self.blocking_ball_color) if blocked else None
+            self.grid.set(door_pos[0] - vec[0], door_pos[1] - vec[1], blocking_ball)
+
+        if locked:
+            obj = Key(door.color)
+            if key_in_box:
+                box = Box(self.box_color) if key_in_box else None
+                box.contains = obj
+                obj = box
+            self.place_in_room(i, j, obj)
+
+        return door, door_pos
+
+
+class ObstructedMaze1Dlhb(ObstructedMazeEnv):
+    """
+    A blue ball is hidden in a 2x1 maze. A locked door separates
+    rooms. Doors are obstructed by a ball and keys are hidden in boxes.
+    """
+
+    def __init__(self, key_in_box=True, blocked=True, seed=None):
+        self.key_in_box = key_in_box
+        self.blocked = blocked
+
+        super().__init__(
+            num_rows=1,
+            num_cols=2,
+            num_rooms_visited=2,
+            seed=seed
+        )
+
+    def _gen_grid(self, width, height):
+        super()._gen_grid(width, height)
+
+        self.add_door(0, 0, door_idx=0, color=self.door_colors[0],
+                      locked=True,
+                      key_in_box=self.key_in_box,
+                      blocked=self.blocked)
+
+        self.obj, _ = self.add_object(1, 0, "ball", color=self.ball_to_find_color)
+        self.place_agent(0, 0)
+
+
+class ObstructedMaze1Dl(ObstructedMaze1Dlhb):
+    def __init__(self, seed=None):
+        super().__init__(False, False, seed)
+
+
+class ObstructedMaze1Dlh(ObstructedMaze1Dlhb):
+    def __init__(self, seed=None):
+        super().__init__(True, False, seed)
+
+
+class ObstructedMazeFull(ObstructedMazeEnv):
+    """
+    A blue ball is hidden in one of the 4 corners of a 3x3 maze. Doors
+    are locked, doors are obstructed by a ball and keys are hidden in
+    boxes.
+    """
+
+    def __init__(self, agent_room=(1, 1), key_in_box=True, blocked=True,
+                 num_quarters=4, num_rooms_visited=25, seed=None):
+        self.agent_room = agent_room
+        self.key_in_box = key_in_box
+        self.blocked = blocked
+        self.num_quarters = num_quarters
+
+        super().__init__(
+            num_rows=3,
+            num_cols=3,
+            num_rooms_visited=num_rooms_visited,
+            seed=seed
+        )
+
+    def _gen_grid(self, width, height):
+        super()._gen_grid(width, height)
+
+        middle_room = (1, 1)
+        # Define positions of "side rooms" i.e. rooms that are neither
+        # corners nor the center.
+        side_rooms = [(2, 1), (1, 2), (0, 1), (1, 0)][:self.num_quarters]
+        for i in range(len(side_rooms)):
+            side_room = side_rooms[i]
+
+            # Add a door between the center room and the side room
+            self.add_door(*middle_room, door_idx=i, color=self.door_colors[i], locked=False)
+
+            for k in [-1, 1]:
+                # Add a door to each side of the side room
+                self.add_door(*side_room, locked=True,
+                              door_idx=(i + k) % 4,
+                              color=self.door_colors[(i + k) % len(self.door_colors)],
+                              key_in_box=self.key_in_box,
+                              blocked=self.blocked)
+
+        corners = [(2, 0), (2, 2), (0, 2), (0, 0)][:self.num_quarters]
+        ball_room = self._rand_elem(corners)
+
+        self.obj, _ = self.add_object(*ball_room, "ball", color=self.ball_to_find_color)
+        self.place_agent(*self.agent_room)
+
+
+class ObstructedMaze2Dl(ObstructedMazeFull):
+    def __init__(self, seed=None):
+        super().__init__((2, 1), False, False, 1, 4, seed)
+
+
+class ObstructedMaze2Dlh(ObstructedMazeFull):
+    def __init__(self, seed=None):
+        super().__init__((2, 1), True, False, 1, 4, seed)
+
+
+class ObstructedMaze2Dlhb(ObstructedMazeFull):
+    def __init__(self, seed=None):
+        super().__init__((2, 1), True, True, 1, 4, seed)
+
+
+class ObstructedMaze1Q(ObstructedMazeFull):
+    def __init__(self, seed=None):
+        super().__init__((1, 1), True, True, 1, 5, seed)
+
+
+class ObstructedMaze2Q(ObstructedMazeFull):
+    def __init__(self, seed=None):
+        super().__init__((1, 1), True, True, 2, 11, seed)
diff --git a/environments/minigrid/src/minigrid/envs/playground_v0.py b/environments/minigrid/src/minigrid/envs/playground_v0.py
new file mode 100644
index 00000000..20e2da03
--- /dev/null
+++ b/environments/minigrid/src/minigrid/envs/playground_v0.py
@@ -0,0 +1,71 @@
+from minigrid.minigrid import *
+
+
+class PlaygroundV0(MiniGridEnv):
+    """
+    Environment with multiple rooms and random objects.
+    This environment has no specific goals or rewards.
+    """
+
+    def __init__(self):
+        super().__init__(grid_size=19, max_steps=100)
+
+    def _gen_grid(self, width, height):
+        # Create the grid
+        self.grid = Grid(width, height)
+
+        # Generate the surrounding walls
+        self.grid.horz_wall(0, 0)
+        self.grid.horz_wall(0, height-1)
+        self.grid.vert_wall(0, 0)
+        self.grid.vert_wall(width-1, 0)
+
+        roomW = width // 3
+        roomH = height // 3
+
+        # For each row of rooms
+        for j in range(0, 3):
+
+            # For each column
+            for i in range(0, 3):
+                xL = i * roomW
+                yT = j * roomH
+                xR = xL + roomW
+                yB = yT + roomH
+
+                # Bottom wall and door
+                if i+1 < 3:
+                    self.grid.vert_wall(xR, yT, roomH)
+                    pos = (xR, self._rand_int(yT+1, yB-1))
+                    color = self._rand_elem(COLOR_NAMES)
+                    self.grid.set(*pos, Door(color))
+
+                # Bottom wall and door
+                if j+1 < 3:
+                    self.grid.horz_wall(xL, yB, roomW)
+                    pos = (self._rand_int(xL+1, xR-1), yB)
+                    color = self._rand_elem(COLOR_NAMES)
+                    self.grid.set(*pos, Door(color))
+
+        # Randomize the player start position and orientation
+        self.place_agent()
+
+        # Place random objects in the world
+        types = ['key', 'ball', 'box']
+        for i in range(0, 12):
+            objType = self._rand_elem(types)
+            objColor = self._rand_elem(COLOR_NAMES)
+            if objType == 'key':
+                obj = Key(objColor)
+            elif objType == 'ball':
+                obj = Ball(objColor)
+            elif objType == 'box':
+                obj = Box(objColor)
+            self.place_obj(obj)
+
+        # No explicit mission in this environment
+        self.mission = ''
+
+    def step(self, action):
+        obs, reward, done, info = MiniGridEnv.step(self, action)
+        return obs, reward, done, info
diff --git a/environments/minigrid/src/minigrid/envs/putnear.py b/environments/minigrid/src/minigrid/envs/putnear.py
new file mode 100644
index 00000000..bcd96f62
--- /dev/null
+++ b/environments/minigrid/src/minigrid/envs/putnear.py
@@ -0,0 +1,117 @@
+from minigrid.minigrid import *
+
+
+class PutNearEnv(MiniGridEnv):
+    """
+    Environment in which the agent is instructed to place an object near
+    another object through a natural language string.
+    """
+
+    def __init__(
+        self,
+        size=6,
+        numObjs=2
+    ):
+        self.numObjs = numObjs
+
+        super().__init__(
+            grid_size=size,
+            max_steps=5*size,
+            # Set this to True for maximum speed
+            see_through_walls=True
+        )
+
+    def _gen_grid(self, width, height):
+        self.grid = Grid(width, height)
+
+        # Generate the surrounding walls
+        self.grid.horz_wall(0, 0)
+        self.grid.horz_wall(0, height-1)
+        self.grid.vert_wall(0, 0)
+        self.grid.vert_wall(width-1, 0)
+
+        # Types and colors of objects we can generate
+        types = ['key', 'ball', 'box']
+
+        objs = []
+        objPos = []
+
+        def near_obj(env, p1):
+            for p2 in objPos:
+                dx = p1[0] - p2[0]
+                dy = p1[1] - p2[1]
+                if abs(dx) <= 1 and abs(dy) <= 1:
+                    return True
+            return False
+
+        # Until we have generated all the objects
+        while len(objs) < self.numObjs:
+            objType = self._rand_elem(types)
+            objColor = self._rand_elem(COLOR_NAMES)
+
+            # If this object already exists, try again
+            if (objType, objColor) in objs:
+                continue
+
+            if objType == 'key':
+                obj = Key(objColor)
+            elif objType == 'ball':
+                obj = Ball(objColor)
+            elif objType == 'box':
+                obj = Box(objColor)
+
+            pos = self.place_obj(obj, reject_fn=near_obj)
+
+            objs.append((objType, objColor))
+            objPos.append(pos)
+
+        # Randomize the agent start position and orientation
+        self.place_agent()
+
+        # Choose a random object to be moved
+        objIdx = self._rand_int(0, len(objs))
+        self.move_type, self.moveColor = objs[objIdx]
+        self.move_pos = objPos[objIdx]
+
+        # Choose a target object (to put the first object next to)
+        while True:
+            targetIdx = self._rand_int(0, len(objs))
+            if targetIdx != objIdx:
+                break
+        self.target_type, self.target_color = objs[targetIdx]
+        self.target_pos = objPos[targetIdx]
+
+        self.mission = 'put the %s %s near the %s %s' % (
+            self.moveColor,
+            self.move_type,
+            self.target_color,
+            self.target_type
+        )
+
+    def step(self, action):
+        preCarrying = self.carrying
+
+        obs, reward, done, info = super().step(action)
+
+        u, v = self.dir_vec
+        ox, oy = (self.agent_pos[0] + u, self.agent_pos[1] + v)
+        tx, ty = self.target_pos
+
+        # If we picked up the wrong object, terminate the episode
+        if action == self.actions.pickup and self.carrying:
+            if self.carrying.type != self.move_type or self.carrying.color != self.moveColor:
+                done = True
+
+        # If successfully dropping an object near the target
+        if action == self.actions.drop and preCarrying:
+            if self.grid.get(ox, oy) is preCarrying:
+                if abs(ox - tx) <= 1 and abs(oy - ty) <= 1:
+                    reward = self._reward()
+            done = True
+
+        return obs, reward, done, info
+
+
+class PutNear8x8N3(PutNearEnv):
+    def __init__(self):
+        super().__init__(size=8, numObjs=3)
diff --git a/environments/minigrid/src/minigrid/envs/redbluedoors.py b/environments/minigrid/src/minigrid/envs/redbluedoors.py
new file mode 100644
index 00000000..e847528a
--- /dev/null
+++ b/environments/minigrid/src/minigrid/envs/redbluedoors.py
@@ -0,0 +1,71 @@
+from minigrid.minigrid import *
+
+
+class RedBlueDoorEnv(MiniGridEnv):
+    """
+    Single room with red and blue doors on opposite sides.
+    The red door must be opened before the blue door to
+    obtain a reward.
+    """
+
+    def __init__(self, size=8):
+        self.size = size
+
+        super().__init__(
+            width=2*size,
+            height=size,
+            max_steps=20*size*size
+        )
+
+    def _gen_grid(self, width, height):
+        # Create an empty grid
+        self.grid = Grid(width, height)
+
+        # Generate the grid walls
+        self.grid.wall_rect(0, 0, 2*self.size, self.size)
+        self.grid.wall_rect(self.size//2, 0, self.size, self.size)
+
+        # Place the agent in the top-left corner
+        self.place_agent(top=(self.size//2, 0), size=(self.size, self.size))
+
+        # Add a red door at a random position in the left wall
+        pos = self._rand_int(1, self.size - 1)
+        self.red_door = Door("red")
+        self.grid.set(self.size//2, pos, self.red_door)
+
+        # Add a blue door at a random position in the right wall
+        pos = self._rand_int(1, self.size - 1)
+        self.blue_door = Door("blue")
+        self.grid.set(self.size//2 + self.size - 1, pos, self.blue_door)
+
+        # Generate the mission string
+        self.mission = "open the red door then the blue door"
+
+    def step(self, action):
+        red_door_opened_before = self.red_door.is_open
+        blue_door_opened_before = self.blue_door.is_open
+
+        obs, reward, done, info = MiniGridEnv.step(self, action)
+
+        red_door_opened_after = self.red_door.is_open
+        blue_door_opened_after = self.blue_door.is_open
+
+        if blue_door_opened_after:
+            if red_door_opened_before:
+                reward = self._reward()
+                done = True
+            else:
+                reward = 0
+                done = True
+
+        elif red_door_opened_after:
+            if blue_door_opened_before:
+                reward = 0
+                done = True
+
+        return obs, reward, done, info
+
+
+class RedBlueDoorEnv6x6(RedBlueDoorEnv):
+    def __init__(self):
+        super().__init__(size=6)
diff --git a/environments/minigrid/src/minigrid/envs/unlock.py b/environments/minigrid/src/minigrid/envs/unlock.py
new file mode 100644
index 00000000..f6b62d4e
--- /dev/null
+++ b/environments/minigrid/src/minigrid/envs/unlock.py
@@ -0,0 +1,40 @@
+from gym_minigrid.roomgrid import RoomGrid
+
+
+class Unlock(RoomGrid):
+    """
+    Unlock a door
+    """
+
+    def __init__(self, seed=None):
+        room_size = 6
+        super().__init__(
+            num_rows=1,
+            num_cols=2,
+            room_size=room_size,
+            max_steps=8*room_size**2,
+            seed=seed
+        )
+
+    def _gen_grid(self, width, height):
+        super()._gen_grid(width, height)
+
+        # Make sure the two rooms are directly connected by a locked door
+        door, _ = self.add_door(0, 0, 0, locked=True)
+        # Add a key to unlock the door
+        self.add_object(0, 0, 'key', door.color)
+
+        self.place_agent(0, 0)
+
+        self.door = door
+        self.mission = "open the door"
+
+    def step(self, action):
+        obs, reward, done, info = super().step(action)
+
+        if action == self.actions.toggle:
+            if self.door.is_open:
+                reward = self._reward()
+                done = True
+
+        return obs, reward, done, info
diff --git a/environments/minigrid/src/minigrid/envs/unlockpickup.py b/environments/minigrid/src/minigrid/envs/unlockpickup.py
new file mode 100644
index 00000000..8ca7a3bb
--- /dev/null
+++ b/environments/minigrid/src/minigrid/envs/unlockpickup.py
@@ -0,0 +1,42 @@
+from gym_minigrid.roomgrid import RoomGrid
+
+
+class UnlockPickup(RoomGrid):
+    """
+    Unlock a door, then pick up a box in another room
+    """
+
+    def __init__(self, seed=None):
+        room_size = 6
+        super().__init__(
+            num_rows=1,
+            num_cols=2,
+            room_size=room_size,
+            max_steps=8*room_size**2,
+            seed=seed
+        )
+
+    def _gen_grid(self, width, height):
+        super()._gen_grid(width, height)
+
+        # Add a box to the room on the right
+        obj, _ = self.add_object(1, 0, kind="box")
+        # Make sure the two rooms are directly connected by a locked door
+        door, _ = self.add_door(0, 0, 0, locked=True)
+        # Add a key to unlock the door
+        self.add_object(0, 0, 'key', door.color)
+
+        self.place_agent(0, 0)
+
+        self.obj = obj
+        self.mission = "pick up the %s %s" % (obj.color, obj.type)
+
+    def step(self, action):
+        obs, reward, done, info = super().step(action)
+
+        if action == self.actions.pickup:
+            if self.carrying and self.carrying == self.obj:
+                reward = self._reward()
+                done = True
+
+        return obs, reward, done, info
diff --git a/environments/minigrid/src/minigrid/minigrid.py b/environments/minigrid/src/minigrid/minigrid.py
new file mode 100644
index 00000000..ec0e6e71
--- /dev/null
+++ b/environments/minigrid/src/minigrid/minigrid.py
@@ -0,0 +1,1329 @@
+import math
+import hashlib
+from enum import IntEnum
+
+import numpy as np
+
+import gym
+from gym import spaces
+from gym.utils import seeding
+
+from rendering import *
+import simenv as sm
+
+# Size in pixels of a tile in the full-scale human view
+
+
+TILE_PIXELS = 32
+
+# Map of color names to RGB values
+COLORS = {
+    'red': np.array([255, 0, 0]),
+    'green': np.array([0, 255, 0]),
+    'blue': np.array([0, 0, 255]),
+    'purple': np.array([112, 39, 195]),
+    'yellow': np.array([255, 255, 0]),
+    'grey': np.array([100, 100, 100])
+}
+
+COLOR_NAMES = sorted(list(COLORS.keys()))
+
+# Used to map colors to integers
+COLOR_TO_IDX = {
+    'red': 0,
+    'green': 1,
+    'blue': 2,
+    'purple': 3,
+    'yellow': 4,
+    'grey': 5
+}
+
+IDX_TO_COLOR = dict(zip(COLOR_TO_IDX.values(), COLOR_TO_IDX.keys()))
+
+# Map of object type to integers
+OBJECT_TO_IDX = {
+    'unseen': 0,
+    'empty': 1,
+    'wall': 2,
+    'floor': 3,
+    'door': 4,
+    'key': 5,
+    'ball': 6,
+    'box': 7,
+    'goal': 8,
+    'lava': 9,
+    'agent': 10,
+}
+
+IDX_TO_OBJECT = dict(zip(OBJECT_TO_IDX.values(), OBJECT_TO_IDX.keys()))
+
+# Map of state names to integers
+STATE_TO_IDX = {
+    'open': 0,
+    'closed': 1,
+    'locked': 2,
+}
+
+# Map of agent direction indices to vectors
+DIR_TO_VEC = [
+    # Pointing right (positive X)
+    np.array((1, 0)),
+    # Down (positive Y)
+    np.array((0, 1)),
+    # Pointing left (negative X)
+    np.array((-1, 0)),
+    # Up (negative Y)
+    np.array((0, -1)),
+]
+
+
+class WorldObj:
+    """
+    Base class for grid world objects
+    """
+
+    def __init__(self, type, color):
+        assert type in OBJECT_TO_IDX, type
+        assert color in COLOR_TO_IDX, color
+        self.type = type
+        self.color = color
+        self.contains = None
+
+        # Initial position of the object
+        self.init_pos = None
+
+        # Current position of the object
+        self.cur_pos = None
+
+    def can_overlap(self):
+        """Can the agent overlap with this?"""
+        return False
+
+    def can_pickup(self):
+        """Can the agent pick this up?"""
+        return False
+
+    def can_contain(self):
+        """Can this contain another object?"""
+        return False
+
+    def see_behind(self):
+        """Can the agent see behind this object?"""
+        return True
+
+    def toggle(self, env, pos):
+        """Method to trigger/toggle an action this object performs"""
+        return False
+
+    def encode(self):
+        """Encode the a description of this object as a 3-tuple of integers"""
+        return OBJECT_TO_IDX[self.type], COLOR_TO_IDX[self.color], 0
+
+    @staticmethod
+    def decode(type_idx, color_idx, state):
+        """Create an object from a 3-tuple state description"""
+
+        obj_type = IDX_TO_OBJECT[type_idx]
+        color = IDX_TO_COLOR[color_idx]
+
+        if obj_type == 'empty' or obj_type == 'unseen':
+            return None
+
+        # State, 0: open, 1: closed, 2: locked
+        is_open = state == 0
+        is_locked = state == 2
+
+        if obj_type == 'wall':
+            v = Wall(color)
+        elif obj_type == 'floor':
+            v = Floor(color)
+        elif obj_type == 'ball':
+            v = Ball(color)
+        elif obj_type == 'key':
+            v = Key(color)
+        elif obj_type == 'box':
+            v = Box(color)
+        elif obj_type == 'door':
+            v = Door(color, is_open, is_locked)
+        elif obj_type == 'goal':
+            v = Goal()
+        elif obj_type == 'lava':
+            v = Lava()
+        else:
+            assert False, "unknown object type in decode '%s'" % obj_type
+
+        return v
+
+    def render(self, r):
+        """Draw this object with the given renderer"""
+        raise NotImplementedError
+
+
+class Goal(WorldObj):
+    def __init__(self):
+        super().__init__('goal', 'green')
+
+    def can_overlap(self):
+        return True
+
+    def render(self, img):
+        fill_coords(img, point_in_rect(0, 1, 0, 1), COLORS[self.color])
+        sm.Rectangle([], color=COLORS[self.color]).draw(img)
+
+
+class Floor(WorldObj):
+    """
+    Colored floor tile the agent can walk over
+    """
+
+    def __init__(self, color='blue'):
+        super().__init__('floor', color)
+
+    def can_overlap(self):
+        return True
+
+    def render(self, img):
+        # Give the floor a pale color
+        color = COLORS[self.color] / 2
+        fill_coords(img, point_in_rect(0.031, 1, 0.031, 1), color)
+
+
+class Lava(WorldObj):
+    def __init__(self):
+        super().__init__('lava', 'red')
+
+    def can_overlap(self):
+        return True
+
+    def render(self, img):
+        c = (255, 128, 0)
+
+        # Background color
+        fill_coords(img, point_in_rect(0, 1, 0, 1), c)
+
+        # Little waves
+        for i in range(3):
+            ylo = 0.3 + 0.2 * i
+            yhi = 0.4 + 0.2 * i
+            fill_coords(img, point_in_line(0.1, ylo, 0.3, yhi, r=0.03), (0, 0, 0))
+            fill_coords(img, point_in_line(0.3, yhi, 0.5, ylo, r=0.03), (0, 0, 0))
+            fill_coords(img, point_in_line(0.5, ylo, 0.7, yhi, r=0.03), (0, 0, 0))
+            fill_coords(img, point_in_line(0.7, yhi, 0.9, ylo, r=0.03), (0, 0, 0))
+
+
+class Wall(WorldObj):
+    def __init__(self, color='grey'):
+        super().__init__('wall', color)
+
+    def see_behind(self):
+        return False
+
+    def render(self, img):
+        fill_coords(img, point_in_rect(0, 1, 0, 1), COLORS[self.color])
+
+
+class Door(WorldObj):
+    def __init__(self, color, is_open=False, is_locked=False):
+        super().__init__('door', color)
+        self.is_open = is_open
+        self.is_locked = is_locked
+
+    def can_overlap(self):
+        """The agent can only walk over this cell when the door is open"""
+        return self.is_open
+
+    def see_behind(self):
+        return self.is_open
+
+    def toggle(self, env, pos):
+        # If the player has the right key to open the door
+        if self.is_locked:
+            if isinstance(env.carrying, Key) and env.carrying.color == self.color:
+                self.is_locked = False
+                self.is_open = True
+                return True
+            return False
+
+        self.is_open = not self.is_open
+        return True
+
+    def encode(self):
+        """Encode the a description of this object as a 3-tuple of integers"""
+
+        # State, 0: open, 1: closed, 2: locked
+        state = None
+        if self.is_open:
+            state = 0
+        elif self.is_locked:
+            state = 2
+        elif not self.is_open:
+            state = 1
+
+        return OBJECT_TO_IDX[self.type], COLOR_TO_IDX[self.color], state
+
+    def render(self, img):
+        c = COLORS[self.color]
+
+        if self.is_open:
+            fill_coords(img, point_in_rect(0.88, 1.00, 0.00, 1.00), c)
+            fill_coords(img, point_in_rect(0.92, 0.96, 0.04, 0.96), (0, 0, 0))
+            return
+
+        # Door frame and door
+        if self.is_locked:
+            fill_coords(img, point_in_rect(0.00, 1.00, 0.00, 1.00), c)
+            fill_coords(img, point_in_rect(0.06, 0.94, 0.06, 0.94), 0.45 * np.array(c))
+
+            # Draw key slot
+            fill_coords(img, point_in_rect(0.52, 0.75, 0.50, 0.56), c)
+        else:
+            fill_coords(img, point_in_rect(0.00, 1.00, 0.00, 1.00), c)
+            fill_coords(img, point_in_rect(0.04, 0.96, 0.04, 0.96), (0, 0, 0))
+            fill_coords(img, point_in_rect(0.08, 0.92, 0.08, 0.92), c)
+            fill_coords(img, point_in_rect(0.12, 0.88, 0.12, 0.88), (0, 0, 0))
+
+            # Draw door handle
+            fill_coords(img, point_in_circle(cx=0.75, cy=0.50, r=0.08), c)
+
+
+class Key(WorldObj):
+    def __init__(self, color='blue'):
+        super(Key, self).__init__('key', color)
+
+    def can_pickup(self):
+        return True
+
+    def render(self, img):
+        c = COLORS[self.color]
+
+        # Vertical quad
+        fill_coords(img, point_in_rect(0.50, 0.63, 0.31, 0.88), c)
+
+        # Teeth
+        fill_coords(img, point_in_rect(0.38, 0.50, 0.59, 0.66), c)
+        fill_coords(img, point_in_rect(0.38, 0.50, 0.81, 0.88), c)
+
+        # Ring
+        fill_coords(img, point_in_circle(cx=0.56, cy=0.28, r=0.190), c)
+        fill_coords(img, point_in_circle(cx=0.56, cy=0.28, r=0.064), (0, 0, 0))
+
+
+class Ball(WorldObj):
+    def __init__(self, color='blue'):
+        super(Ball, self).__init__('ball', color)
+
+    def can_pickup(self):
+        return True
+
+    def render(self, img):
+        fill_coords(img, point_in_circle(0.5, 0.5, 0.31), COLORS[self.color])
+
+
+class Box(WorldObj):
+    def __init__(self, color, contains=None):
+        super(Box, self).__init__('box', color)
+        self.contains = contains
+
+    def can_pickup(self):
+        return True
+
+    def render(self, img):
+        c = COLORS[self.color]
+
+        # Outline
+        fill_coords(img, point_in_rect(0.12, 0.88, 0.12, 0.88), c)
+        fill_coords(img, point_in_rect(0.18, 0.82, 0.18, 0.82), (0, 0, 0))
+
+        # Horizontal slit
+        fill_coords(img, point_in_rect(0.16, 0.84, 0.47, 0.53), c)
+
+    def toggle(self, env, pos):
+        # Replace the box by its contents
+        env.grid.set(*pos, self.contains)
+        return True
+
+
+class Grid:
+    """
+    Represent a grid and operations on it
+    """
+
+    # Static cache of pre-renderer tiles
+    tile_cache = {}
+
+    def __init__(self, width, height):
+        assert width >= 3
+        assert height >= 3
+
+        self.width = width
+        self.height = height
+
+        self.grid = [None] * width * height
+
+    def __contains__(self, key):
+        if isinstance(key, WorldObj):
+            for e in self.grid:
+                if e is key:
+                    return True
+        elif isinstance(key, tuple):
+            for e in self.grid:
+                if e is None:
+                    continue
+                if (e.color, e.type) == key:
+                    return True
+                if key[0] is None and key[1] == e.type:
+                    return True
+        return False
+
+    def __eq__(self, other):
+        grid1 = self.encode()
+        grid2 = other.encode()
+        return np.array_equal(grid2, grid1)
+
+    def __ne__(self, other):
+        return not self == other
+
+    def copy(self):
+        from copy import deepcopy
+        return deepcopy(self)
+
+    def set(self, i, j, v):
+        assert 0 <= i < self.width
+        assert 0 <= j < self.height
+        self.grid[j * self.width + i] = v
+
+    def get(self, i, j):
+        assert 0 <= i < self.width
+        assert 0 <= j < self.height
+        return self.grid[j * self.width + i]
+
+    def horz_wall(self, x, y, length=None, obj_type=Wall):
+        if length is None:
+            length = self.width - x
+        for i in range(0, length):
+            self.set(x + i, y, obj_type())
+
+    def vert_wall(self, x, y, length=None, obj_type=Wall):
+        if length is None:
+            length = self.height - y
+        for j in range(0, length):
+            self.set(x, y + j, obj_type())
+
+    def wall_rect(self, x, y, w, h):
+        self.horz_wall(x, y, w)
+        self.horz_wall(x, y + h - 1, w)
+        self.vert_wall(x, y, h)
+        self.vert_wall(x + w - 1, y, h)
+
+    def rotate_left(self):
+        """
+        Rotate the grid to the left (counter-clockwise)
+        """
+
+        grid = Grid(self.height, self.width)
+
+        for i in range(self.width):
+            for j in range(self.height):
+                v = self.get(i, j)
+                grid.set(j, grid.height - 1 - i, v)
+
+        return grid
+
+    def slice(self, topX, topY, width, height):
+        """
+        Get a subset of the grid
+        """
+
+        grid = Grid(width, height)
+
+        for j in range(0, height):
+            for i in range(0, width):
+                x = topX + i
+                y = topY + j
+
+                if 0 <= x < self.width and \
+                        0 <= y < self.height:
+                    v = self.get(x, y)
+                else:
+                    v = Wall()
+
+                grid.set(i, j, v)
+
+        return grid
+
+    @classmethod
+    def render_tile(
+            cls,
+            obj,
+            agent_dir=None,
+            highlight=False,
+            tile_size=TILE_PIXELS,
+            subdivs=3
+    ):
+        """
+        Render a tile and cache the result
+        """
+
+        # Hash map lookup key for the cache
+        key = (agent_dir, highlight, tile_size)
+        key = obj.encode() + key if obj else key
+
+        if key in cls.tile_cache:
+            return cls.tile_cache[key]
+
+        img = np.zeros(shape=(tile_size * subdivs, tile_size * subdivs, 3), dtype=np.uint8)
+
+        # Draw the grid lines (top and left edges)
+        fill_coords(img, point_in_rect(0, 0.031, 0, 1), (100, 100, 100))
+        fill_coords(img, point_in_rect(0, 1, 0, 0.031), (100, 100, 100))
+
+        if obj is not None:
+            obj.render(img)
+
+        # Overlay the agent on top
+        if agent_dir is not None:
+            tri_fn = point_in_triangle(
+                (0.12, 0.19),
+                (0.87, 0.50),
+                (0.12, 0.81),
+            )
+
+            # Rotate the agent based on its direction
+            tri_fn = rotate_fn(tri_fn, cx=0.5, cy=0.5, theta=0.5 * math.pi * agent_dir)
+            fill_coords(img, tri_fn, (255, 0, 0))
+
+        # Highlight the cell if needed
+        if highlight:
+            highlight_img(img)
+
+        # Downsample the image to perform supersampling/anti-aliasing
+        img = downsample(img, subdivs)
+
+        # Cache the rendered tile
+        cls.tile_cache[key] = img
+
+        return img
+
+    def render(
+            self,
+            tile_size,
+            agent_pos=None,
+            agent_dir=None,
+            highlight_mask=None
+    ):
+        """
+        Render this grid at a given scale
+        :param tile_size: tile size in pixels
+        :param agent_pos: position of the agent
+        :param agent_dir: direction of the agent
+        :param highlight_mask: tiles to highlight
+        """
+
+        if highlight_mask is None:
+            highlight_mask = np.zeros(shape=(self.width, self.height), dtype=bool)
+
+        # Compute the total grid size
+        width_px = self.width * tile_size
+        height_px = self.height * tile_size
+
+        img = np.zeros(shape=(height_px, width_px, 3), dtype=np.uint8)
+
+        # Render the grid
+        for j in range(0, self.height):
+            for i in range(0, self.width):
+                cell = self.get(i, j)
+
+                agent_here = np.array_equal(agent_pos, (i, j))
+                tile_img = Grid.render_tile(
+                    cell,
+                    agent_dir=agent_dir if agent_here else None,
+                    highlight=highlight_mask[i, j],
+                    tile_size=tile_size
+                )
+
+                ymin = j * tile_size
+                ymax = (j + 1) * tile_size
+                xmin = i * tile_size
+                xmax = (i + 1) * tile_size
+                img[ymin:ymax, xmin:xmax, :] = tile_img
+
+        return img
+
+    def encode(self, vis_mask=None):
+        """
+        Produce a compact numpy encoding of the grid
+        """
+
+        if vis_mask is None:
+            vis_mask = np.ones((self.width, self.height), dtype=bool)
+
+        array = np.zeros((self.width, self.height, 3), dtype='uint8')
+
+        for i in range(self.width):
+            for j in range(self.height):
+                if vis_mask[i, j]:
+                    v = self.get(i, j)
+
+                    if v is None:
+                        array[i, j, 0] = OBJECT_TO_IDX['empty']
+                        array[i, j, 1] = 0
+                        array[i, j, 2] = 0
+
+                    else:
+                        array[i, j, :] = v.encode()
+
+        return array
+
+    @staticmethod
+    def decode(array):
+        """
+        Decode an array grid encoding back into a grid
+        """
+
+        width, height, channels = array.shape
+        assert channels == 3
+
+        vis_mask = np.ones(shape=(width, height), dtype=bool)
+
+        grid = Grid(width, height)
+        for i in range(width):
+            for j in range(height):
+                type_idx, color_idx, state = array[i, j]
+                v = WorldObj.decode(type_idx, color_idx, state)
+                grid.set(i, j, v)
+                vis_mask[i, j] = (type_idx != OBJECT_TO_IDX['unseen'])
+
+        return grid, vis_mask
+
+    @staticmethod
+    def process_vis(grid, agent_pos):
+        mask = np.zeros(shape=(grid.width, grid.height), dtype=bool)
+
+        mask[agent_pos[0], agent_pos[1]] = True
+
+        for j in reversed(range(0, grid.height)):
+            for i in range(0, grid.width - 1):
+                if not mask[i, j]:
+                    continue
+
+                cell = grid.get(i, j)
+                if cell and not cell.see_behind():
+                    continue
+
+                mask[i + 1, j] = True
+                if j > 0:
+                    mask[i + 1, j - 1] = True
+                    mask[i, j - 1] = True
+
+            for i in reversed(range(1, grid.width)):
+                if not mask[i, j]:
+                    continue
+
+                cell = grid.get(i, j)
+                if cell and not cell.see_behind():
+                    continue
+
+                mask[i - 1, j] = True
+                if j > 0:
+                    mask[i - 1, j - 1] = True
+                    mask[i, j - 1] = True
+
+        for j in range(0, grid.height):
+            for i in range(0, grid.width):
+                if not mask[i, j]:
+                    grid.set(i, j, None)
+
+        return mask
+
+
+class MiniGridEnv(gym.Env):
+    """
+    2D grid world game environment
+    """
+
+    metadata = {
+        'render.modes': ['human', 'rgb_array'],
+        'video.frames_per_second': 10
+    }
+
+    # Enumeration of possible actions
+    class Actions(IntEnum):
+        # Turn left, turn right, move forward
+        left = 0
+        right = 1
+        forward = 2
+
+        # Pick up an object
+        pickup = 3
+
+        # Drop an object
+        drop = 4
+
+        # Toggle/activate an object
+        toggle = 5
+
+        # Done completing task
+        done = 6
+
+    def __init__(
+            self,
+            grid_size=None,
+            width=None,
+            height=None,
+            max_steps=100,
+            see_through_walls=False,
+            seed=1337,
+            agent_view_size=7
+    ):
+        # Can't set both grid_size and width/height
+        if grid_size:
+            assert width is None and height is None
+            width = grid_size
+            height = grid_size
+
+        # Action enumeration for this environment
+        self.actions = MiniGridEnv.Actions
+
+        # Actions are discrete integer values
+        self.action_space = spaces.Discrete(len(self.actions))
+
+        # Number of cells (width and height) in the agent view
+        assert agent_view_size % 2 == 1
+        assert agent_view_size >= 3
+        self.agent_view_size = agent_view_size
+
+        # Observations are dictionaries containing an
+        # encoding of the grid and a textual 'mission' string
+        self.observation_space = spaces.Box(
+            low=0,
+            high=255,
+            shape=(self.agent_view_size, self.agent_view_size, 3),
+            dtype='uint8'
+        )
+        self.observation_space = spaces.Dict({
+            'image': self.observation_space
+        })
+
+        # Range of possible rewards
+        self.reward_range = (0, 1)
+
+        # Window to use for human rendering mode
+        self.window = None
+
+        # Environment configuration
+        self.width = width
+        self.height = height
+        self.max_steps = max_steps
+        self.see_through_walls = see_through_walls
+
+        # Current position and direction of the agent
+        self.agent_pos = None
+        self.agent_dir = None
+        self.carrying = None
+
+        # Step count since episode start
+        self.step_count = None
+
+        # Seed the random number generator
+        self.np_random = None
+
+        # Initialize the RNG
+        self.seed(seed=seed)
+
+        # Initialize the state
+        self.reset()
+
+    def reset(self):
+        # Current position and direction of the agent
+        self.agent_pos = None
+        self.agent_dir = None
+
+        # Generate a new random grid at the start of each episode
+        # To keep the same grid for each episode, call env.seed() with
+        # the same seed before calling env.reset()
+        self._gen_grid(self.width, self.height)
+
+        # These fields should be defined by _gen_grid
+        assert self.agent_pos is not None
+        assert self.agent_dir is not None
+
+        # Check that the agent doesn't overlap with an object
+        start_cell = self.grid.get(*self.agent_pos)
+        assert start_cell is None or start_cell.can_overlap()
+
+        # Item picked up, being carried, initially nothing
+        self.carrying = None
+
+        # Step count since episode start
+        self.step_count = 0
+
+        # Return first observation
+        obs = self.gen_obs()
+        return obs
+
+    def seed(self, seed=1337):
+        # Seed the random number generator
+        self.np_random, _ = seeding.np_random(seed)
+        return [seed]
+
+    def hash(self, size=16):
+        """Compute a hash that uniquely identifies the current state of the environment.
+        :param size: Size of the hashing
+        """
+        sample_hash = hashlib.sha256()
+
+        to_encode = [self.grid.encode().tolist(), self.agent_pos, self.agent_dir]
+        for item in to_encode:
+            sample_hash.update(str(item).encode('utf8'))
+
+        return sample_hash.hexdigest()[:size]
+
+    @property
+    def steps_remaining(self):
+        return self.max_steps - self.step_count
+
+    def __str__(self):
+        """
+        Produce a pretty string of the environment's grid along with the agent.
+        A grid cell is represented by 2-character string, the first one for
+        the object and the second one for the color.
+        """
+
+        # Map of object types to short string
+        OBJECT_TO_STR = {
+            'wall': 'W',
+            'floor': 'F',
+            'door': 'D',
+            'key': 'K',
+            'ball': 'A',
+            'box': 'B',
+            'goal': 'G',
+            'lava': 'V',
+        }
+
+        # Map agent's direction to short string
+        AGENT_DIR_TO_STR = {
+            0: '>',
+            1: 'V',
+            2: '<',
+            3: '^'
+        }
+
+        str = ''
+
+        for j in range(self.grid.height):
+
+            for i in range(self.grid.width):
+                if i == self.agent_pos[0] and j == self.agent_pos[1]:
+                    str += 2 * AGENT_DIR_TO_STR[self.agent_dir]
+                    continue
+
+                c = self.grid.get(i, j)
+
+                if c is None:
+                    str += '  '
+                    continue
+
+                if c.type == 'door':
+                    if c.is_open:
+                        str += '__'
+                    elif c.is_locked:
+                        str += 'L' + c.color[0].upper()
+                    else:
+                        str += 'D' + c.color[0].upper()
+                    continue
+
+                str += OBJECT_TO_STR[c.type] + c.color[0].upper()
+
+            if j < self.grid.height - 1:
+                str += '\n'
+
+        return str
+
+    def _gen_grid(self, width, height):
+        assert False, "_gen_grid needs to be implemented by each environment"
+
+    def _reward(self):
+        """
+        Compute the reward to be given upon success
+        """
+
+        return 1 - 0.9 * (self.step_count / self.max_steps)
+
+    def _rand_int(self, low, high):
+        """
+        Generate random integer in [low,high[
+        """
+
+        return self.np_random.randint(low, high)
+
+    def _rand_float(self, low, high):
+        """
+        Generate random float in [low,high[
+        """
+
+        return self.np_random.uniform(low, high)
+
+    def _rand_bool(self):
+        """
+        Generate random boolean value
+        """
+
+        return self.np_random.randint(0, 2) == 0
+
+    def _rand_elem(self, iterable):
+        """
+        Pick a random element in a list
+        """
+
+        lst = list(iterable)
+        idx = self._rand_int(0, len(lst))
+        return lst[idx]
+
+    def _rand_subset(self, iterable, num_elems):
+        """
+        Sample a random subset of distinct elements of a list
+        """
+
+        lst = list(iterable)
+        assert num_elems <= len(lst)
+
+        out = []
+
+        while len(out) < num_elems:
+            elem = self._rand_elem(lst)
+            lst.remove(elem)
+            out.append(elem)
+
+        return out
+
+    def _rand_color(self):
+        """
+        Generate a random color name (string)
+        """
+
+        return self._rand_elem(COLOR_NAMES)
+
+    def _rand_pos(self, xLow, xHigh, yLow, yHigh):
+        """
+        Generate a random (x,y) position tuple
+        """
+
+        return (
+            self.np_random.randint(xLow, xHigh),
+            self.np_random.randint(yLow, yHigh)
+        )
+
+    def place_obj(self,
+                  obj,
+                  top=None,
+                  size=None,
+                  reject_fn=None,
+                  max_tries=math.inf
+                  ):
+        """
+        Place an object at an empty position in the grid
+
+        :param obj: Object to place
+        :param top: top-left position of the rectangle where to place
+        :param size: size of the rectangle where to place
+        :param reject_fn: function to filter out potential positions
+        :param max_tries: maximum number of tries (no effect if top is not None)
+        """
+
+        if top is None:
+            top = (0, 0)
+        else:
+            top = (max(top[0], 0), max(top[1], 0))
+
+        if size is None:
+            size = (self.grid.width, self.grid.height)
+
+        num_tries = 0
+
+        while True:
+            # This is to handle with rare cases where rejection sampling
+            # gets stuck in an infinite loop
+            if num_tries > max_tries:
+                raise RecursionError('rejection sampling failed in place_obj')
+
+            num_tries += 1
+
+            pos = np.array((
+                self._rand_int(top[0], min(top[0] + size[0], self.grid.width)),
+                self._rand_int(top[1], min(top[1] + size[1], self.grid.height))
+            ))
+
+            # Don't place the object on top of another object
+            if self.grid.get(*pos) is not None:
+                continue
+
+            # Don't place the object where the agent is
+            if np.array_equal(pos, self.agent_pos):
+                continue
+
+            # Check if there is a filtering criterion
+            if reject_fn and reject_fn(self, pos):
+                continue
+
+            break
+
+        self.grid.set(*pos, obj)
+
+        if obj is not None:
+            obj.init_pos = pos
+            obj.cur_pos = pos
+
+        return pos
+
+    def put_obj(self, obj, i, j):
+        """
+        Put an object at a specific position in the grid
+        """
+
+        self.grid.set(i, j, obj)
+        obj.init_pos = (i, j)
+        obj.cur_pos = (i, j)
+
+    def place_agent(
+            self,
+            top=None,
+            size=None,
+            rand_dir=True,
+            max_tries=math.inf
+    ):
+        """
+        Set the agent's starting point at an empty position in the grid
+        """
+
+        self.agent_pos = None
+        pos = self.place_obj(None, top, size, max_tries=max_tries)
+        self.agent_pos = pos
+
+        if rand_dir:
+            self.agent_dir = self._rand_int(0, 4)
+
+        return pos
+
+    @property
+    def dir_vec(self):
+        """
+        Get the direction vector for the agent, pointing in the direction
+        of forward movement.
+        """
+
+        assert 0 <= self.agent_dir < 4
+        return DIR_TO_VEC[self.agent_dir]
+
+    @property
+    def right_vec(self):
+        """
+        Get the vector pointing to the right of the agent.
+        """
+
+        dx, dy = self.dir_vec
+        return np.array((-dy, dx))
+
+    @property
+    def front_pos(self):
+        """
+        Get the position of the cell that is right in front of the agent
+        """
+
+        return self.agent_pos + self.dir_vec
+
+    def get_view_coords(self, i, j):
+        """
+        Translate and rotate absolute grid coordinates (i, j) into the
+        agent's partially observable view (sub-grid). Note that the resulting
+        coordinates may be negative or outside of the agent's view size.
+        """
+
+        ax, ay = self.agent_pos
+        dx, dy = self.dir_vec
+        rx, ry = self.right_vec
+
+        # Compute the absolute coordinates of the top-left view corner
+        sz = self.agent_view_size
+        hs = self.agent_view_size // 2
+        tx = ax + (dx * (sz - 1)) - (rx * hs)
+        ty = ay + (dy * (sz - 1)) - (ry * hs)
+
+        lx = i - tx
+        ly = j - ty
+
+        # Project the coordinates of the object relative to the top-left
+        # corner onto the agent's own coordinate system
+        vx = (rx * lx + ry * ly)
+        vy = -(dx * lx + dy * ly)
+
+        return vx, vy
+
+    def get_view_exts(self):
+        """
+        Get the extents of the square set of tiles visible to the agent
+        Note: the bottom extent indices are not included in the set
+        """
+
+        # Facing right
+        if self.agent_dir == 0:
+            topX = self.agent_pos[0]
+            topY = self.agent_pos[1] - self.agent_view_size // 2
+        # Facing down
+        elif self.agent_dir == 1:
+            topX = self.agent_pos[0] - self.agent_view_size // 2
+            topY = self.agent_pos[1]
+        # Facing left
+        elif self.agent_dir == 2:
+            topX = self.agent_pos[0] - self.agent_view_size + 1
+            topY = self.agent_pos[1] - self.agent_view_size // 2
+        # Facing up
+        elif self.agent_dir == 3:
+            topX = self.agent_pos[0] - self.agent_view_size // 2
+            topY = self.agent_pos[1] - self.agent_view_size + 1
+        else:
+            assert False, "invalid agent direction"
+
+        botX = topX + self.agent_view_size
+        botY = topY + self.agent_view_size
+
+        return topX, topY, botX, botY
+
+    def relative_coords(self, x, y):
+        """
+        Check if a grid position belongs to the agent's field of view, and returns the corresponding coordinates
+        """
+
+        vx, vy = self.get_view_coords(x, y)
+
+        if vx < 0 or vy < 0 or vx >= self.agent_view_size or vy >= self.agent_view_size:
+            return None
+
+        return vx, vy
+
+    def in_view(self, x, y):
+        """
+        check if a grid position is visible to the agent
+        """
+
+        return self.relative_coords(x, y) is not None
+
+    def agent_sees(self, x, y):
+        """
+        Check if a non-empty grid position is visible to the agent
+        """
+
+        coordinates = self.relative_coords(x, y)
+        if coordinates is None:
+            return False
+        vx, vy = coordinates
+
+        obs = self.gen_obs()
+        obs_grid, _ = Grid.decode(obs['image'])
+        obs_cell = obs_grid.get(vx, vy)
+        world_cell = self.grid.get(x, y)
+
+        return obs_cell is not None and obs_cell.type == world_cell.type
+
+    def step(self, action):
+        self.step_count += 1
+
+        reward = 0
+        done = False
+
+        # Get the position in front of the agent
+        fwd_pos = self.front_pos
+
+        # Get the contents of the cell in front of the agent
+        fwd_cell = self.grid.get(*fwd_pos)
+
+        # Rotate left
+        if action == self.actions.left:
+            self.agent_dir -= 1
+            if self.agent_dir < 0:
+                self.agent_dir += 4
+
+        # Rotate right
+        elif action == self.actions.right:
+            self.agent_dir = (self.agent_dir + 1) % 4
+
+        # Move forward
+        elif action == self.actions.forward:
+            if fwd_cell is None or fwd_cell.can_overlap():
+                self.agent_pos = fwd_pos
+            if fwd_cell is not None and fwd_cell.type == 'goal':
+                done = True
+                reward = self._reward()
+            if fwd_cell is not None and fwd_cell.type == 'lava':
+                done = True
+
+        # Pick up an object
+        elif action == self.actions.pickup:
+            if fwd_cell and fwd_cell.can_pickup():
+                if self.carrying is None:
+                    self.carrying = fwd_cell
+                    self.carrying.cur_pos = np.array([-1, -1])
+                    self.grid.set(*fwd_pos, None)
+
+        # Drop an object
+        elif action == self.actions.drop:
+            if not fwd_cell and self.carrying:
+                self.grid.set(*fwd_pos, self.carrying)
+                self.carrying.cur_pos = fwd_pos
+                self.carrying = None
+
+        # Toggle/activate an object
+        elif action == self.actions.toggle:
+            if fwd_cell:
+                fwd_cell.toggle(self, fwd_pos)
+
+        # Done action (not used by default)
+        elif action == self.actions.done:
+            pass
+
+        else:
+            assert False, "unknown action"
+
+        if self.step_count >= self.max_steps:
+            done = True
+
+        obs = self.gen_obs()
+
+        return obs, reward, done, {}
+
+    def gen_obs_grid(self):
+        """
+        Generate the sub-grid observed by the agent.
+        This method also outputs a visibility mask telling us which grid
+        cells the agent can actually see.
+        """
+
+        topX, topY, botX, botY = self.get_view_exts()
+
+        grid = self.grid.slice(topX, topY, self.agent_view_size, self.agent_view_size)
+
+        for i in range(self.agent_dir + 1):
+            grid = grid.rotate_left()
+
+        # Process occluders and visibility
+        # Note that this incurs some performance cost
+        if not self.see_through_walls:
+            vis_mask = grid.process_vis(agent_pos=(self.agent_view_size // 2, self.agent_view_size - 1))
+        else:
+            vis_mask = np.ones(shape=(grid.width, grid.height), dtype=bool)
+
+        # Make it so the agent sees what it's carrying
+        # We do this by placing the carried object at the agent's position
+        # in the agent's partially observable view
+        agent_pos = grid.width // 2, grid.height - 1
+        if self.carrying:
+            grid.set(*agent_pos, self.carrying)
+        else:
+            grid.set(*agent_pos, None)
+
+        return grid, vis_mask
+
+    def gen_obs(self):
+        """
+        Generate the agent's view (partially observable, low-resolution encoding)
+        """
+
+        grid, vis_mask = self.gen_obs_grid()
+
+        # Encode the partially observable view into a numpy array
+        image = grid.encode(vis_mask)
+
+        assert hasattr(self, 'mission'), "environments must define a textual mission string"
+
+        # Observations are dictionaries containing:
+        # - an image (partially observable view of the environment)
+        # - the agent's direction/orientation (acting as a compass)
+        # - a textual mission string (instructions for the agent)
+        obs = {
+            'image': image,
+            'direction': self.agent_dir,
+            'mission': self.mission
+        }
+
+        return obs
+
+    def get_obs_render(self, obs, tile_size=TILE_PIXELS // 2):
+        """
+        Render an agent observation for visualization
+        """
+
+        grid, vis_mask = Grid.decode(obs)
+
+        # Render the whole grid
+        img = grid.render(
+            tile_size,
+            agent_pos=(self.agent_view_size // 2, self.agent_view_size - 1),
+            agent_dir=3,
+            highlight_mask=vis_mask
+        )
+
+        return img
+
+    def render(self, mode='human', close=False, highlight=True, tile_size=TILE_PIXELS):
+        """
+        Render the whole-grid human view
+        """
+
+        if close:
+            if self.window:
+                self.window.close()
+            return
+
+        if mode == 'human' and not self.window:
+            import minigrid.window
+            self.window = minigrid.window.Window('minigrid')
+            self.window.show(block=False)
+
+        # Compute which cells are visible to the agent
+        _, vis_mask = self.gen_obs_grid()
+
+        # Compute the world coordinates of the bottom-left corner
+        # of the agent's view area
+        f_vec = self.dir_vec
+        r_vec = self.right_vec
+        top_left = self.agent_pos + f_vec * (self.agent_view_size - 1) - r_vec * (self.agent_view_size // 2)
+
+        # Mask of which cells to highlight
+        highlight_mask = np.zeros(shape=(self.width, self.height), dtype=bool)
+
+        # For each cell in the visibility mask
+        for vis_j in range(0, self.agent_view_size):
+            for vis_i in range(0, self.agent_view_size):
+                # If this cell is not visible, don't highlight it
+                if not vis_mask[vis_i, vis_j]:
+                    continue
+
+                # Compute the world coordinates of this cell
+                abs_i, abs_j = top_left - (f_vec * vis_j) + (r_vec * vis_i)
+
+                if abs_i < 0 or abs_i >= self.width:
+                    continue
+                if abs_j < 0 or abs_j >= self.height:
+                    continue
+
+                # Mark this cell to be highlighted
+                highlight_mask[abs_i, abs_j] = True
+
+        # Render the whole grid
+        img = self.grid.render(
+            tile_size,
+            self.agent_pos,
+            self.agent_dir,
+            highlight_mask=highlight_mask if highlight else None
+        )
+
+        if mode == 'human':
+            self.window.set_caption(self.mission)
+            self.window.show_img(img)
+
+        return img
+
+    def close(self):
+        if self.window:
+            self.window.close()
+        return
diff --git a/environments/minigrid/src/minigrid/rendering.py b/environments/minigrid/src/minigrid/rendering.py
new file mode 100644
index 00000000..6198c92a
--- /dev/null
+++ b/environments/minigrid/src/minigrid/rendering.py
@@ -0,0 +1,126 @@
+import math
+import numpy as np
+
+
+def downsample(img, factor):
+    """
+    Downsample an image along both dimensions by some factor
+    """
+
+    assert img.shape[0] % factor == 0
+    assert img.shape[1] % factor == 0
+
+    img = img.reshape([img.shape[0]//factor, factor, img.shape[1]//factor, factor, 3])
+    img = img.mean(axis=3)
+    img = img.mean(axis=1)
+
+    return img
+
+
+def fill_coords(img, fn, color):
+    """
+    Fill pixels of an image with coordinates matching a filter function
+    """
+
+    for y in range(img.shape[0]):
+        for x in range(img.shape[1]):
+            yf = (y + 0.5) / img.shape[0]
+            xf = (x + 0.5) / img.shape[1]
+            if fn(xf, yf):
+                img[y, x] = color
+
+    return img
+
+
+def rotate_fn(fin, cx, cy, theta):
+    def fout(x, y):
+        x = x - cx
+        y = y - cy
+
+        x2 = cx + x * math.cos(-theta) - y * math.sin(-theta)
+        y2 = cy + y * math.cos(-theta) + x * math.sin(-theta)
+
+        return fin(x2, y2)
+
+    return fout
+
+
+def point_in_line(x0, y0, x1, y1, r):
+    p0 = np.array([x0, y0])
+    p1 = np.array([x1, y1])
+    dir = p1 - p0
+    dist = np.linalg.norm(dir)
+    dir = dir / dist
+
+    xmin = min(x0, x1) - r
+    xmax = max(x0, x1) + r
+    ymin = min(y0, y1) - r
+    ymax = max(y0, y1) + r
+
+    def fn(x, y):
+        # Fast, early escape test
+        if x < xmin or x > xmax or y < ymin or y > ymax:
+            return False
+
+        q = np.array([x, y])
+        pq = q - p0
+
+        # Closest point on line
+        a = np.dot(pq, dir)
+        a = np.clip(a, 0, dist)
+        p = p0 + a * dir
+
+        dist_to_line = np.linalg.norm(q - p)
+        return dist_to_line <= r
+
+    return fn
+
+
+def point_in_circle(cx, cy, r):
+    def fn(x, y):
+        return (x-cx)*(x-cx) + (y-cy)*(y-cy) <= r * r
+    return fn
+
+
+def point_in_rect(xmin, xmax, ymin, ymax):
+    def fn(x, y):
+        return xmin <= x <= xmax and ymin <= y <= ymax
+    return fn
+
+
+def point_in_triangle(a, b, c):
+    a = np.array(a)
+    b = np.array(b)
+    c = np.array(c)
+
+    def fn(x, y):
+        v0 = c - a
+        v1 = b - a
+        v2 = np.array((x, y)) - a
+
+        # Compute dot products
+        dot00 = np.dot(v0, v0)
+        dot01 = np.dot(v0, v1)
+        dot02 = np.dot(v0, v2)
+        dot11 = np.dot(v1, v1)
+        dot12 = np.dot(v1, v2)
+
+        # Compute barycentric coordinates
+        inv_denom = 1 / (dot00 * dot11 - dot01 * dot01)
+        u = (dot11 * dot02 - dot01 * dot12) * inv_denom
+        v = (dot00 * dot12 - dot01 * dot02) * inv_denom
+
+        # Check if point is in triangle
+        return (u >= 0) and (v >= 0) and (u + v) < 1
+
+    return fn
+
+
+def highlight_img(img, color=(255, 255, 255), alpha=0.30):
+    """
+    Add highlighting to an image
+    """
+
+    blend_img = img + alpha * (np.array(color, dtype=np.uint8) - img)
+    blend_img = blend_img.clip(0, 255).astype(np.uint8)
+    img[:, :, :] = blend_img
diff --git a/environments/minigrid/src/minigrid/roomgrid.py b/environments/minigrid/src/minigrid/roomgrid.py
new file mode 100644
index 00000000..601678d3
--- /dev/null
+++ b/environments/minigrid/src/minigrid/roomgrid.py
@@ -0,0 +1,403 @@
+from .minigrid import *
+
+
+def reject_next_to(env, pos):
+    """
+    Function to filter out object positions that are right next to
+    the agent's starting point
+    """
+
+    sx, sy = env.agent_pos
+    x, y = pos
+    d = abs(sx - x) + abs(sy - y)
+    return d < 2
+
+
+class Room:
+    def __init__(
+        self,
+        top,
+        size
+    ):
+        # Top-left corner and size (tuples)
+        self.top = top
+        self.size = size
+
+        # List of door objects and door positions
+        # Order of the doors is right, down, left, up
+        self.doors = [None] * 4
+        self.door_pos = [None] * 4
+
+        # List of rooms adjacent to this one
+        # Order of the neighbors is right, down, left, up
+        self.neighbors = [None] * 4
+
+        # Indicates if this room is behind a locked door
+        self.locked = False
+
+        # List of objects contained
+        self.objs = []
+
+    def rand_pos(self, env):
+        topX, topY = self.top
+        sizeX, sizeY = self.size
+        return env._randPos(
+            topX + 1, topX + sizeX - 1,
+            topY + 1, topY + sizeY - 1
+        )
+
+    def pos_inside(self, x, y):
+        """
+        Check if a position is within the bounds of this room
+        """
+
+        topX, topY = self.top
+        sizeX, sizeY = self.size
+
+        if x < topX or y < topY:
+            return False
+
+        if x >= topX + sizeX or y >= topY + sizeY:
+            return False
+
+        return True
+
+
+class RoomGrid(MiniGridEnv):
+    """
+    Environment with multiple rooms and random objects.
+    This is meant to serve as a base class for other environments.
+    """
+
+    def __init__(
+        self,
+        room_size=7,
+        num_rows=3,
+        num_cols=3,
+        max_steps=100,
+        seed=0,
+        agent_view_size=7
+    ):
+        assert room_size > 0
+        assert room_size >= 3
+        assert num_rows > 0
+        assert num_cols > 0
+        self.room_size = room_size
+        self.num_rows = num_rows
+        self.num_cols = num_cols
+
+        height = (room_size - 1) * num_rows + 1
+        width = (room_size - 1) * num_cols + 1
+
+        # By default, this environment has no mission
+        self.mission = ''
+
+        super().__init__(
+            width=width,
+            height=height,
+            max_steps=max_steps,
+            see_through_walls=False,
+            seed=seed,
+            agent_view_size=agent_view_size
+        )
+
+    def room_from_pos(self, x, y):
+        """Get the room a given position maps to"""
+
+        assert x >= 0
+        assert y >= 0
+
+        i = x // (self.room_size-1)
+        j = y // (self.room_size-1)
+
+        assert i < self.num_cols
+        assert j < self.num_rows
+
+        return self.room_grid[j][i]
+
+    def get_room(self, i, j):
+        assert i < self.num_cols
+        assert j < self.num_rows
+        return self.room_grid[j][i]
+
+    def _gen_grid(self, width, height):
+        # Create the grid
+        self.grid = Grid(width, height)
+
+        self.room_grid = []
+
+        # For each row of rooms
+        for j in range(0, self.num_rows):
+            row = []
+
+            # For each column of rooms
+            for i in range(0, self.num_cols):
+                room = Room(
+                    (i * (self.room_size-1), j * (self.room_size-1)),
+                    (self.room_size, self.room_size)
+                )
+                row.append(room)
+
+                # Generate the walls for this room
+                self.grid.wall_rect(*room.top, *room.size)
+
+            self.room_grid.append(row)
+
+        # For each row of rooms
+        for j in range(0, self.num_rows):
+            # For each column of rooms
+            for i in range(0, self.num_cols):
+                room = self.room_grid[j][i]
+
+                x_l, y_l = (room.top[0] + 1, room.top[1] + 1)
+                x_m, y_m = (room.top[0] + room.size[0] - 1, room.top[1] + room.size[1] - 1)
+
+                # Door positions, order is right, down, left, up
+                if i < self.num_cols - 1:
+                    room.neighbors[0] = self.room_grid[j][i+1]
+                    room.door_pos[0] = (x_m, self._rand_int(y_l, y_m))
+                if j < self.num_rows - 1:
+                    room.neighbors[1] = self.room_grid[j+1][i]
+                    room.door_pos[1] = (self._rand_int(x_l, x_m), y_m)
+                if i > 0:
+                    room.neighbors[2] = self.room_grid[j][i-1]
+                    room.door_pos[2] = room.neighbors[2].door_pos[0]
+                if j > 0:
+                    room.neighbors[3] = self.room_grid[j-1][i]
+                    room.door_pos[3] = room.neighbors[3].door_pos[1]
+
+        # The agent starts in the middle, facing right
+        self.agent_pos = (
+            (self.num_cols // 2) * (self.room_size-1) + (self.room_size // 2),
+            (self.num_rows // 2) * (self.room_size-1) + (self.room_size // 2)
+        )
+        self.agent_dir = 0
+
+    def place_in_room(self, i, j, obj):
+        """
+        Add an existing object to room (i, j)
+        """
+
+        room = self.get_room(i, j)
+
+        pos = self.place_obj(
+            obj,
+            room.top,
+            room.size,
+            reject_fn=reject_next_to,
+            max_tries=1000
+        )
+
+        room.objs.append(obj)
+
+        return obj, pos
+
+    def add_object(self, i, j, kind=None, color=None):
+        """
+        Add a new object to room (i, j)
+        """
+
+        if kind is None:
+            kind = self._rand_elem(['key', 'ball', 'box'])
+
+        if color is None:
+            color = self._rand_color()
+
+        # TODO: we probably want to add an Object.make helper function
+        assert kind in ['key', 'ball', 'box']
+        obj = None
+        if kind == 'key':
+            obj = Key(color)
+        elif kind == 'ball':
+            obj = Ball(color)
+        elif kind == 'box':
+            obj = Box(color)
+
+        return self.place_in_room(i, j, obj)
+
+    def add_door(self, i, j, door_idx=None, color=None, locked=None):
+        """
+        Add a door to a room, connecting it to a neighbor
+        """
+
+        room = self.get_room(i, j)
+
+        if door_idx is None:
+            # Need to make sure that there is a neighbor along this wall
+            # and that there is not already a door
+            while True:
+                door_idx = self._rand_int(0, 4)
+                if room.neighbors[door_idx] and room.doors[door_idx] is None:
+                    break
+
+        if color is None:
+            color = self._rand_color()
+
+        if locked is None:
+            locked = self._rand_bool()
+
+        assert room.doors[door_idx] is None, "door already exists"
+
+        room.locked = locked
+        door = Door(color, is_locked=locked)
+
+        pos = room.door_pos[door_idx]
+        self.grid.set(*pos, door)
+        door.cur_pos = pos
+
+        neighbor = room.neighbors[door_idx]
+        room.doors[door_idx] = door
+        neighbor.doors[(door_idx+2) % 4] = door
+
+        return door, pos
+
+    def remove_wall(self, i, j, wall_idx):
+        """
+        Remove a wall between two rooms
+        """
+
+        room = self.get_room(i, j)
+
+        assert 0 <= wall_idx < 4
+        assert room.doors[wall_idx] is None, "door exists on this wall"
+        assert room.neighbors[wall_idx], "invalid wall"
+
+        neighbor = room.neighbors[wall_idx]
+
+        tx, ty = room.top
+        w, h = room.size
+
+        # Ordering of walls is right, down, left, up
+        if wall_idx == 0:
+            for i in range(1, h - 1):
+                self.grid.set(tx + w - 1, ty + i, None)
+        elif wall_idx == 1:
+            for i in range(1, w - 1):
+                self.grid.set(tx + i, ty + h - 1, None)
+        elif wall_idx == 2:
+            for i in range(1, h - 1):
+                self.grid.set(tx, ty + i, None)
+        elif wall_idx == 3:
+            for i in range(1, w - 1):
+                self.grid.set(tx + i, ty, None)
+        else:
+            assert False, "invalid wall index"
+
+        # Mark the rooms as connected
+        room.doors[wall_idx] = True
+        neighbor.doors[(wall_idx+2) % 4] = True
+
+    def place_agent(self, i=None, j=None, rand_dir=True):
+        """
+        Place the agent in a room
+        """
+
+        if i is None:
+            i = self._rand_int(0, self.num_cols)
+        if j is None:
+            j = self._rand_int(0, self.num_rows)
+
+        room = self.room_grid[j][i]
+
+        # Find a position that is not right in front of an object
+        while True:
+            super().place_agent(room.top, room.size, rand_dir, max_tries=1000)
+            front_cell = self.grid.get(*self.front_pos)
+            if front_cell is None or front_cell.type == 'wall':
+                break
+
+        return self.agent_pos
+
+    def connect_all(self, door_colors=COLOR_NAMES, max_itrs=5000):
+        """
+        Make sure that all rooms are reachable by the agent from its
+        starting position
+        """
+
+        start_room = self.room_from_pos(*self.agent_pos)
+
+        added_doors = []
+
+        def find_reach():
+            reach = set()
+            stack = [start_room]
+            while len(stack) > 0:
+                room = stack.pop()
+                if room in reach:
+                    continue
+                reach.add(room)
+                for i in range(0, 4):
+                    if room.doors[i]:
+                        stack.append(room.neighbors[i])
+            return reach
+
+        num_itrs = 0
+
+        while True:
+            # This is to handle rare situations where random sampling produces
+            # a level that cannot be connected, producing in an infinite loop
+            if num_itrs > max_itrs:
+                raise RecursionError('connect_all failed')
+            num_itrs += 1
+
+            # If all rooms are reachable, stop
+            reach = find_reach()
+            if len(reach) == self.num_rows * self.num_cols:
+                break
+
+            # Pick a random room and door position
+            i = self._rand_int(0, self.num_cols)
+            j = self._rand_int(0, self.num_rows)
+            k = self._rand_int(0, 4)
+            room = self.get_room(i, j)
+
+            # If there is already a door there, skip
+            if not room.door_pos[k] or room.doors[k]:
+                continue
+
+            if room.locked or room.neighbors[k].locked:
+                continue
+
+            color = self._rand_elem(door_colors)
+            door, _ = self.add_door(i, j, k, color, False)
+            added_doors.append(door)
+
+        return added_doors
+
+    def add_distractors(self, i=None, j=None, num_distractors=10, all_unique=True):
+        """
+        Add random objects that can potentially distract/confuse the agent.
+        """
+
+        # Collect a list of existing objects
+        objs = []
+        for row in self.room_grid:
+            for room in row:
+                for obj in room.objs:
+                    objs.append((obj.type, obj.color))
+
+        # List of distractors added
+        dists = []
+
+        while len(dists) < num_distractors:
+            color = self._rand_elem(COLOR_NAMES)
+            type = self._rand_elem(['key', 'ball', 'box'])
+            obj = (type, color)
+
+            if all_unique and obj in objs:
+                continue
+
+            # Add the object to a random room if no room specified
+            room_i = i
+            room_j = j
+            if room_i is None:
+                room_i = self._rand_int(0, self.num_cols)
+            if room_j is None:
+                room_j = self._rand_int(0, self.num_rows)
+
+            dist, pos = self.add_object(room_i, room_j, *obj)
+
+            objs.append(obj)
+            dists.append(dist)
+
+        return dists
diff --git a/environments/minigrid/src/minigrid/simenv_minigrid.py b/environments/minigrid/src/minigrid/simenv_minigrid.py
new file mode 100644
index 00000000..e4c14bdd
--- /dev/null
+++ b/environments/minigrid/src/minigrid/simenv_minigrid.py
@@ -0,0 +1,53 @@
+import simenv as sm
+
+
+class Goal:
+    def __init__(self):
+        pass
+
+
+class Floor:
+    def __init__(self):
+        pass
+
+
+class Lava:
+    def __init__(self):
+        pass
+
+
+class Wall:
+    def __init__(self):
+        pass
+
+
+class Door:
+    def __init__(self):
+        pass
+
+
+class Key:
+    def __init__(self):
+        pass
+
+
+class Box:
+    def __init__(self):
+        pass
+
+
+class MiniGridEnv:
+
+    def __init__(self, scene: sm.Scene, width, height):
+        self.scene = scene
+        self.width = width
+        self.height = height
+        self.tile_size = 32
+
+        scene += sm.Camera(camera_type="orthographic", width=800, height=800)
+        scene += sm.Light()
+
+        for i in range(width):
+            for k in range(height):
+                self.scene += sm.Box(name=f"floor{i * height + k}", position=[i, k, 0], material=sm.Material.BLACK)
+
diff --git a/integrations/Blender/simenv_blender/__init__.py b/integrations/Blender/simenv_blender/__init__.py
index 2a5431f9..329cc5ef 100644
--- a/integrations/Blender/simenv_blender/__init__.py
+++ b/integrations/Blender/simenv_blender/__init__.py
@@ -11,27 +11,29 @@
 # You should have received a copy of the GNU General Public License
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 
-bl_info = {
-    "name" : "simenv",
-    "author" : "Hugging Face",
-    "description" : "",
-    "blender" : (3, 2, 0),
-    "version" : (0, 0, 1),
-    "location" : "View3D",
-    "warning" : "",
-    "category" : "Simulation"
-}
-
 import bpy
 from .simenv_op import SIMENV_OT_ImportScene
 from .simenv_pnl import SIMENV_PT_Panel
 
+bl_info = {
+    "name": "simenv",
+    "author": "Hugging Face",
+    "description": "",
+    "blender": (3, 2, 0),
+    "version": (0, 0, 1),
+    "location": "View3D",
+    "warning": "",
+    "category": "Simulation"
+}
+
 classes = (SIMENV_OT_ImportScene, SIMENV_PT_Panel)
 
+
 def register():
     for c in classes:
         bpy.utils.register_class(c)
 
+
 def unregister():
     for c in classes:
         bpy.utils.unregister_class(c)
diff --git a/integrations/Blender/simenv_blender/client.py b/integrations/Blender/simenv_blender/client.py
index 283b3a92..63ed9684 100644
--- a/integrations/Blender/simenv_blender/client.py
+++ b/integrations/Blender/simenv_blender/client.py
@@ -1,6 +1,4 @@
 import socket
-import json
-import base64
 
 
 class Client:
diff --git a/integrations/Blender/simenv_blender/simenv_op.py b/integrations/Blender/simenv_blender/simenv_op.py
index 746190bb..8ce88c59 100644
--- a/integrations/Blender/simenv_blender/simenv_op.py
+++ b/integrations/Blender/simenv_blender/simenv_op.py
@@ -1,4 +1,3 @@
-import bpy
 from bpy.types import Operator
 from .simulator import Simulator
 
diff --git a/integrations/Blender/simenv_blender/simenv_pnl.py b/integrations/Blender/simenv_blender/simenv_pnl.py
index f11df9a4..2361776d 100644
--- a/integrations/Blender/simenv_blender/simenv_pnl.py
+++ b/integrations/Blender/simenv_blender/simenv_pnl.py
@@ -1,6 +1,7 @@
 import bpy
 from bpy.types import Panel
 
+
 class SIMENV_PT_Panel(Panel):
     bl_space_type = "VIEW_3D"
     bl_region_type = "UI"
diff --git a/integrations/Blender/simenv_blender/simulator.py b/integrations/Blender/simenv_blender/simulator.py
index 55e06d23..3529872a 100644
--- a/integrations/Blender/simenv_blender/simulator.py
+++ b/integrations/Blender/simenv_blender/simulator.py
@@ -1,8 +1,6 @@
-from email import message
 import json
 import os
 import base64
-from pickle import BINPUT
 import bpy
 from .client import Client
 from pathlib import Path
diff --git a/integrations/Godot/simenv-godot/Scenes/scene.tscn b/integrations/Godot/simenv-godot/Scenes/scene.tscn
index 1bd497e3..705f7037 100644
--- a/integrations/Godot/simenv-godot/Scenes/scene.tscn
+++ b/integrations/Godot/simenv-godot/Scenes/scene.tscn
@@ -1,7 +1,7 @@
-[gd_scene load_steps=7 format=3 uid="uid://bpe4rf720wc7w"]
+[gd_scene load_steps=7 format=3 uid="uid://bmr58mk5sqro3"]
 
-[ext_resource type="Script" path="res://SimEnv/Simulator.gd" id="1_oo243"]
-[ext_resource type="Script" path="res://SimEnv/Camera.gd" id="2_wa3b4"]
+[ext_resource type="Script" path="res://SimEnv/simulator.gd" id="1_u0wd1"]
+[ext_resource type="Script" path="res://SimEnv/camera.gd" id="2_3niri"]
 
 [sub_resource type="PhysicalSkyMaterial" id="PhysicalSkyMaterial_tieyq"]
 ground_color = Color(0.247059, 0.172549, 0.0823529, 1)
@@ -21,7 +21,7 @@ glow_enabled = true
 [sub_resource type="CameraEffects" id="CameraEffects_noyl2"]
 
 [node name="Root" type="Node"]
-script = ExtResource("1_oo243")
+script = ExtResource("1_u0wd1")
 
 [node name="BaseWorld" type="Node3D" parent="."]
 
@@ -30,7 +30,7 @@ transform = Transform3D(-0.597625, 0.452977, -0.661556, -1.49012e-08, 0.825113,
 environment = SubResource("Environment_ss83i")
 effects = SubResource("CameraEffects_noyl2")
 current = true
-script = ExtResource("2_wa3b4")
+script = ExtResource("2_3niri")
 
 [node name="DirectionalLight3D" type="DirectionalLight3D" parent="BaseWorld"]
 transform = Transform3D(-0.812844, -0.527265, 0.247543, -0.0232084, 0.453958, 0.890721, -0.58202, 0.718271, -0.381234, 0, 0, 0)
diff --git a/integrations/Godot/simenv-godot/SimEnv/Bridge/Client.gd b/integrations/Godot/simenv-godot/SimEnv/Bridge/Client.gd
index 973f25fd..ed1e8013 100644
--- a/integrations/Godot/simenv-godot/SimEnv/Bridge/Client.gd
+++ b/integrations/Godot/simenv-godot/SimEnv/Bridge/Client.gd
@@ -1,5 +1,15 @@
-extends Node
 class_name Client
+extends Node
+# This class sets up the TCP client to receive data
+# from the Python SimEnv API through the TCP server
+#
+# Reading the stream is synchronized on the _physics_process
+# Physics will only step if the command received tells it to do so
+# (see SimEnv/Commands/step.gd)
+#
+# Data is received by chunks of _chunk_size
+# _warmed_up is a hacky bugfix to start the TCP stream before the physics sync
+
 
 signal connected
 signal data
@@ -11,17 +21,21 @@ var _stream: StreamPeerTCP = StreamPeerTCP.new()
 var _chunk_size: int = 1024
 var _warmed_up: bool = false
 
+
 func _ready() -> void:
 	_status = _stream.get_status()
 
+
 func _physics_process(_delta):
 	# this is called at a fixed rate
 	update_status()
 
 	if _status == _stream.STATUS_CONNECTED:
+		# to sync commands with the physics steps
 		get_tree().paused = true
 		read()
 
+
 func update_status():
 	_stream.poll()
 	var new_status: int = _stream.get_status()
@@ -40,6 +54,7 @@ func update_status():
 				print("Error with socket stream.")
 				emit_signal("error")
 
+
 func read():
 	update_status()
 	var available_bytes: int = _stream.get_available_bytes()
@@ -66,6 +81,7 @@ func read():
 		else:
 			get_tree().paused = false
 
+
 func connect_to_host(host: String, port: int) -> void:
 	print("Connecting to %s:%d" % [host, port])
 	if _status == _stream.STATUS_CONNECTED:
@@ -76,6 +92,7 @@ func connect_to_host(host: String, port: int) -> void:
 		_stream.disconnect_from_host()
 		emit_signal("error")
 
+
 func send(out_data: PackedByteArray) -> bool:
 	if _status != _stream.STATUS_CONNECTED:
 		print("Error: Stream is not currently connected.")
diff --git a/integrations/Godot/simenv-godot/SimEnv/Bridge/Command.gd b/integrations/Godot/simenv-godot/SimEnv/Bridge/Command.gd
index 87bede67..bc2b4317 100644
--- a/integrations/Godot/simenv-godot/SimEnv/Bridge/Command.gd
+++ b/integrations/Godot/simenv-godot/SimEnv/Bridge/Command.gd
@@ -1,11 +1,13 @@
-extends Node
 class_name Command
+extends Node
+# 
 
 signal callback
 
 var content : Variant
 var _commands : Dictionary
 
+
 func load_commands():
 	var directory: Directory = Directory.new()
 	var com_path : String = "res://SimEnv/Commands"
@@ -24,11 +26,13 @@ func load_commands():
 
 	directory.list_dir_end()
 
+
 func execute(type: String) -> void:
 	if type in _commands:
 		_commands[type].execute(content)
 	else:
 		print("Unknown command.")
-		
+
+
 func _handle_callback(callback_data: PackedByteArray):
 	emit_signal("callback", callback_data)
diff --git a/integrations/Godot/simenv-godot/SimEnv/Commands/BuildScene.gd b/integrations/Godot/simenv-godot/SimEnv/Commands/BuildScene.gd
deleted file mode 100644
index e3b10e71..00000000
--- a/integrations/Godot/simenv-godot/SimEnv/Commands/BuildScene.gd
+++ /dev/null
@@ -1,15 +0,0 @@
-extends Node
-
-signal callback
-
-func execute(content) -> void:
-	var content_bytes : PackedByteArray = Marshalls.base64_to_raw(content["b64bytes"])
-	
-	var gltf_state : GLTFState = GLTFState.new()
-	var gltf_doc : GLTFDocument = GLTFDocument.new()
-	
-	gltf_doc.append_from_buffer(content_bytes, "", gltf_state)
-	var gltf_scene = gltf_doc.generate_scene(gltf_state)
-	get_tree().current_scene.add_child(gltf_scene)
-	
-	emit_signal("callback", PackedByteArray([97, 99, 107]))
diff --git a/integrations/Godot/simenv-godot/SimEnv/Commands/Close.gd b/integrations/Godot/simenv-godot/SimEnv/Commands/Close.gd
index 707cbdf8..b0b883d7 100644
--- a/integrations/Godot/simenv-godot/SimEnv/Commands/Close.gd
+++ b/integrations/Godot/simenv-godot/SimEnv/Commands/Close.gd
@@ -1,7 +1,9 @@
 extends Node
+# Close the application
 
 signal callback
 
+
 func execute(_content):
 	get_tree().quit()
 	get_tree().paused = false
diff --git a/integrations/Godot/simenv-godot/SimEnv/Commands/GetDone.gd b/integrations/Godot/simenv-godot/SimEnv/Commands/GetDone.gd
deleted file mode 100644
index 66e85fc9..00000000
--- a/integrations/Godot/simenv-godot/SimEnv/Commands/GetDone.gd
+++ /dev/null
@@ -1,6 +0,0 @@
-extends Node
-
-signal callback
-
-func execute(_content):
-	emit_signal("callback", PackedByteArray([97, 99, 107]))
diff --git a/integrations/Godot/simenv-godot/SimEnv/Commands/GetObservation.gd b/integrations/Godot/simenv-godot/SimEnv/Commands/GetObservation.gd
deleted file mode 100644
index 66e85fc9..00000000
--- a/integrations/Godot/simenv-godot/SimEnv/Commands/GetObservation.gd
+++ /dev/null
@@ -1,6 +0,0 @@
-extends Node
-
-signal callback
-
-func execute(_content):
-	emit_signal("callback", PackedByteArray([97, 99, 107]))
diff --git a/integrations/Godot/simenv-godot/SimEnv/Commands/GetReward.gd b/integrations/Godot/simenv-godot/SimEnv/Commands/GetReward.gd
deleted file mode 100644
index 66e85fc9..00000000
--- a/integrations/Godot/simenv-godot/SimEnv/Commands/GetReward.gd
+++ /dev/null
@@ -1,6 +0,0 @@
-extends Node
-
-signal callback
-
-func execute(_content):
-	emit_signal("callback", PackedByteArray([97, 99, 107]))
diff --git a/integrations/Godot/simenv-godot/SimEnv/Commands/Reset.gd b/integrations/Godot/simenv-godot/SimEnv/Commands/Reset.gd
index a2f5651c..2e1c8a8d 100644
--- a/integrations/Godot/simenv-godot/SimEnv/Commands/Reset.gd
+++ b/integrations/Godot/simenv-godot/SimEnv/Commands/Reset.gd
@@ -2,6 +2,7 @@ extends Node
 
 signal callback
 
+
 func execute(_content):
 	get_tree().paused = false
 	emit_signal("callback", PackedByteArray([97, 99, 107]))
diff --git a/integrations/Godot/simenv-godot/SimEnv/Commands/Step.gd b/integrations/Godot/simenv-godot/SimEnv/Commands/Step.gd
index a2f5651c..68c826a8 100644
--- a/integrations/Godot/simenv-godot/SimEnv/Commands/Step.gd
+++ b/integrations/Godot/simenv-godot/SimEnv/Commands/Step.gd
@@ -1,7 +1,10 @@
 extends Node
+# Handles the stepping of the simulation
+# Unpause the application to run a step of _physics_process
 
 signal callback
 
+
 func execute(_content):
 	get_tree().paused = false
 	emit_signal("callback", PackedByteArray([97, 99, 107]))
diff --git a/integrations/Godot/simenv-godot/SimEnv/Mods/ExampleMod.gd b/integrations/Godot/simenv-godot/SimEnv/Mods/ExampleMod.gd
deleted file mode 100644
index e3bfaaba..00000000
--- a/integrations/Godot/simenv-godot/SimEnv/Mods/ExampleMod.gd
+++ /dev/null
@@ -1,5 +0,0 @@
-extends Node
-
-
-func execute(content):
-	print(content)
diff --git a/integrations/Godot/simenv-godot/SimEnv/Simulation/Agent.gd b/integrations/Godot/simenv-godot/SimEnv/Simulation/Agent.gd
deleted file mode 100644
index 87b29ae6..00000000
--- a/integrations/Godot/simenv-godot/SimEnv/Simulation/Agent.gd
+++ /dev/null
@@ -1,66 +0,0 @@
-extends Node
-class_name Agent
-
-
-class Actions:
-	var name: 		String
-	var dist: 		String
-	var available: 	Array = []
-	var forward: 	float = 0.0
-	var move_right: float = 0.0
-	var turn_right: float = 0.0
-	
-	func set_action(step_action: Array) -> void:
-		pass
-
-
-class DiscreteActions:
-	extends Actions
-	
-	func set_action(step_action: Array) -> void:
-		var istep_action: int = int(step_action[0])
-		forward = 0.0
-		move_right = 0.0
-		turn_right = 0.0
-		
-		match available[istep_action]:
-			"move_foward":
-				forward = 1.0
-			"move_backward":
-				forward = -1.0
-			"move_left":
-				move_right = 1.0
-			"move_right":
-				move_right = -1.0
-			"turn_right":
-				turn_right = 1.0
-			"turn_left":
-				turn_right = -1.0
-			_:
-				print("Invalid action.")
-
-
-class ContinuousActions:
-	extends Actions
-	
-	func set_action(step_action: Array) -> void:
-		for i in range(len(step_action)):
-			match available[i]:
-				"move_forward_backward":
-					forward = step_action[i]
-				"move_left_right":
-					move_right = step_action[i]
-				"turn_left_right":
-					turn_right = step_action[i]
-				_:
-					print("Invalid action.")
-
-
-# Called when the node enters the scene tree for the first time.
-func _ready():
-	pass # Replace with function body.
-
-
-# Called every frame. 'delta' is the elapsed time since the previous frame.
-func _process(delta):
-	pass
diff --git a/integrations/Godot/simenv-godot/SimEnv/Simulation/RewardFunciton.gd b/integrations/Godot/simenv-godot/SimEnv/Simulation/RewardFunciton.gd
deleted file mode 100644
index e08925d4..00000000
--- a/integrations/Godot/simenv-godot/SimEnv/Simulation/RewardFunciton.gd
+++ /dev/null
@@ -1,11 +0,0 @@
-extends Node
-
-
-# Called when the node enters the scene tree for the first time.
-func _ready():
-	pass # Replace with function body.
-
-
-# Called every frame. 'delta' is the elapsed time since the previous frame.
-func _process(delta):
-	pass
diff --git a/integrations/Godot/simenv-godot/SimEnv/Simulation/SimAgentBase.gd b/integrations/Godot/simenv-godot/SimEnv/Simulation/SimAgentBase.gd
deleted file mode 100644
index e08925d4..00000000
--- a/integrations/Godot/simenv-godot/SimEnv/Simulation/SimAgentBase.gd
+++ /dev/null
@@ -1,11 +0,0 @@
-extends Node
-
-
-# Called when the node enters the scene tree for the first time.
-func _ready():
-	pass # Replace with function body.
-
-
-# Called every frame. 'delta' is the elapsed time since the previous frame.
-func _process(delta):
-	pass
diff --git a/integrations/Godot/simenv-godot/SimEnv/Simulation/SimObjectBase.gd b/integrations/Godot/simenv-godot/SimEnv/Simulation/SimObjectBase.gd
deleted file mode 100644
index e08925d4..00000000
--- a/integrations/Godot/simenv-godot/SimEnv/Simulation/SimObjectBase.gd
+++ /dev/null
@@ -1,11 +0,0 @@
-extends Node
-
-
-# Called when the node enters the scene tree for the first time.
-func _ready():
-	pass # Replace with function body.
-
-
-# Called every frame. 'delta' is the elapsed time since the previous frame.
-func _process(delta):
-	pass
diff --git a/integrations/Godot/simenv-godot/SimEnv/Simulator.gd b/integrations/Godot/simenv-godot/SimEnv/Simulator.gd
index 74cc1a7b..ef11c5f6 100644
--- a/integrations/Godot/simenv-godot/SimEnv/Simulator.gd
+++ b/integrations/Godot/simenv-godot/SimEnv/Simulator.gd
@@ -1,13 +1,20 @@
+class_name Simulator
 extends Node
+# Manage the global simulation process
+# Get data from TCP client and call the command dispatch
+#
+# The core function of this class is to decode the data into json
+# and send this json data to the commands
 
 const HOST : String = "127.0.0.1"
 const PORT : int = 55000
 const RECONNECT_TIMEOUT: float = 3.0
 
+var agent
+
 var _client : Client = Client.new()
 var _command : Command = Command.new()
 
-var agent
 
 func _ready() -> void:
 	_client.connect("connected", _handle_client_connected)
@@ -22,13 +29,16 @@ func _ready() -> void:
 	_command.load_commands()
 	_client.connect_to_host(HOST, PORT)
 
+
 func _connect_after_timeout(timeout: float) -> void:
 	await get_tree().create_timer(timeout).timeout
 	_client.connect_to_host(HOST, PORT)
 
+
 func _handle_client_connected() -> void:
 	print("Client connected to server.")
 
+
 func _handle_client_data(data: PackedByteArray) -> void:
 	var str_data : String = data.get_string_from_utf8()
 	
@@ -44,14 +54,17 @@ func _handle_client_data(data: PackedByteArray) -> void:
 	else:
 		print("Error parsing data.")
 
+
 func _handle_client_disconnected() -> void:
 	print("Client disconnected from server.")
 	_connect_after_timeout(RECONNECT_TIMEOUT)
 
+
 func _handle_client_error() -> void:
 	print("Client error.")
 	_connect_after_timeout(RECONNECT_TIMEOUT)
 
+
 func _handle_callback(callback_data: PackedByteArray) -> void:
 	print("Sending callback.")
 	_client.send(callback_data)
diff --git a/integrations/Godot/simenv-godot/project.godot b/integrations/Godot/simenv-godot/project.godot
index efc1f3ac..ee265053 100644
--- a/integrations/Godot/simenv-godot/project.godot
+++ b/integrations/Godot/simenv-godot/project.godot
@@ -12,22 +12,34 @@ _global_script_classes=[{
 "base": "Node",
 "class": &"Agent",
 "language": &"GDScript",
-"path": "res://SimEnv/Simulation/Agent.gd"
+"path": "res://SimEnv/RLAgents/agent.gd"
+}, {
+"base": "Node",
+"class": &"AgentManager",
+"language": &"GDScript",
+"path": "res://SimEnv/RLAgents/agent_manager.gd"
 }, {
 "base": "Node",
 "class": &"Client",
 "language": &"GDScript",
-"path": "res://SimEnv/Bridge/Client.gd"
+"path": "res://SimEnv/Bridge/client.gd"
 }, {
 "base": "Node",
 "class": &"Command",
 "language": &"GDScript",
-"path": "res://SimEnv/Bridge/Command.gd"
+"path": "res://SimEnv/Bridge/command.gd"
+}, {
+"base": "Node",
+"class": &"Simulator",
+"language": &"GDScript",
+"path": "res://SimEnv/simulator.gd"
 }]
 _global_script_class_icons={
 "Agent": "",
+"AgentManager": "",
 "Client": "",
-"Command": ""
+"Command": "",
+"Simulator": ""
 }
 
 [application]