Export typing annotations when available.

PiperOrigin-RevId: 328527159
kabajau · Sep 11, 2020 · 7e7255e · 7e7255e
1 parent 85187de
commit 7e7255e
Show file tree

Hide file tree

Showing 29 changed files with 3,846 additions and 0 deletions.
diff --git a/physics_planning_games/README.md b/physics_planning_games/README.md
@@ -0,0 +1,108 @@
+# Physically Embedded Planning Environments
+
+This repository contains the three environments introduced in
+'Physically Embedded Planning Problems: New Challenges for Reinforcement
+Learning'
+
+If you use this package, please cite our accompanying [tech report]:
+
+```
+@misc{,
+    title={Physically Embedded Planning Problems: New Challenges for
+           Reinforcement Learning},
+    author={Mehdi Mirza, Andrew Jaegle, Jonathan J. Hunt, Arthur Guez,
+            Saran Tunyasuvunakool, Alistair Muldal, Théophane Weber,
+            Peter Karkus, Sébastien Racanière, Lars Buesing,
+            Timothy Lillicrap, Nicolas Heess},
+    year={2020},
+    eprint={},
+    archivePrefix={arXiv},
+    primaryClass={cs.RO}
+}
+```
+
+## Requirements and Installation
+
+This repository is divided into 'mujoban' and 'board_games' folders.
+Both of them are built on top of [dm_control] which requires MuJoCo. Please
+follow [these] instructions to install MuJoCo.
+Other dependencies can be installed
+by:
+```
+pip3 install -r requirements.txt
+```
+
+### Board games
+The game logic is based on [open_spiel]. Please install as instructed [here].
+[gnugo] is required to play the game of Go against a non-random opponent. [gnugo] can be installed in Ubuntu by:
+```
+apt install gnugo
+```
+. Board game scripts except gnugo binary to be at: `/usr/games/gnugo`
+## Example usage
+
+The code snippets below show examples of instantiating each of the environments.
+
+### Mujoban
+
+```python
+from dm_control import composer
+from dm_control.locomotion import walkers
+from physics_planning_games.mujoban.mujoban import Mujoban
+from physics_planning_games.mujoban.mujoban_level import MujobanLevel
+from physics_planning_games.mujoban.boxoban import boxoban_level_generator
+
+walker = walkers.JumpingBallWithHead(add_ears=True, camera_height=0.25)
+maze = MujobanLevel(boxoban_level_generator)
+task = Mujoban(walker=walker,
+               maze=maze,
+               control_timestep=0.1,
+               top_camera_height=96,
+               top_camera_width=96)
+env = composer.Environment(time_limit=1000, task=task)
+```
+
+### Board games
+
+```python
+from  physics_planning_games  import  board_games
+
+environment_name = 'go_7x7'
+env = board_games.load(environment_name=environment_name)
+```
+
+### Stepping through environment.
+
+The returned environments are of type of `dm_env.Environment` and can be stepped
+through as shown here with random actions:
+
+```python
+import numpy as np
+
+timestep = env.reset()
+action_spec = env.action_spec()
+while True:
+  action = np.stack([
+      np.random.uniform(low=minimum, high=maximum)
+      for minimum, maximum in zip(action_spec.minimum, action_spec.maximum)
+  ])
+  timestep = env.step(action)
+```
+
+### Visualization
+
+For visualization of the environments `explore.py` loads them using the [viewer]
+from [dm_control].
+
+## More details
+
+For more details please refer to the [tech report], [dm_control] and [dm_env].
+
+[tech report]: https://arxiv.org/abs/
+[dm_control]: https://github.com/deepmind/dm_control
+[dm_env]: https://github.com/deepmind/dm_env
+[gnugo]: https://www.gnu.org/software/gnugo/
+[open_spiel]: https://github.com/deepmind/open_spiel
+[here]: https://github.com/deepmind/open_spiel/blob/master/docs/install.md
+[these]: https://github.com/deepmind/dm_control#requirements-and-installation
+[viewer]: https://github.com/deepmind/dm_control/tree/master/dm_control/viewer
diff --git a/physics_planning_games/board_games/__init__.py b/physics_planning_games/board_games/__init__.py
@@ -0,0 +1,73 @@
+# Copyright 2020 DeepMind Technologies Limited.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Physically-grounded board game environments."""
+
+from dm_control import composer as _composer
+
+from physics_planning_games.board_games import go as _go
+from physics_planning_games.board_games import tic_tac_toe as _tic_tac_toe
+from physics_planning_games.board_games._internal import registry as _registry
+
+_registry.done_importing_tasks()
+
+ALL = tuple(_registry.get_all_names())
+TAGS = tuple(_registry.get_tags())
+
+
+def get_environments_by_tag(tag):
+  """Returns the names of all environments matching a given tag.
+
+  Args:
+    tag: A string from `TAGS`.
+
+  Returns:
+    A tuple of environment names.
+  """
+  return tuple(_registry.get_names_by_tag(tag))
+
+
+def load(environment_name,
+         env_kwargs=None,
+         seed=None,
+         time_limit=float('inf'),
+         strip_singleton_obs_buffer_dim=False):
+  """Loads an environment from board_games.
+
+  Args:
+    environment_name: String, the name of the environment to load. Must be in
+      `ALL`.
+    env_kwargs: extra params to pass to task creation.
+    seed: Optional, either an int seed or an `np.random.RandomState`
+      object. If None (default), the random number generator will self-seed
+      from a platform-dependent source of entropy.
+    time_limit: (optional) A float, the time limit in seconds beyond which an
+      episode is forced to terminate.
+    strip_singleton_obs_buffer_dim: (optional) A boolean, if `True`,
+      the array shape of observations with `buffer_size == 1` will not have a
+      leading buffer dimension.
+
+  Returns:
+    An instance of `composer.Environment`.
+  """
+  if env_kwargs is not None:
+    task = _registry.get_constructor(environment_name)(**env_kwargs)
+  else:
+    task = _registry.get_constructor(environment_name)()
+  return _composer.Environment(
+      task=task,
+      time_limit=time_limit,
+      strip_singleton_obs_buffer_dim=strip_singleton_obs_buffer_dim,
+      random_state=seed)
diff --git a/physics_planning_games/board_games/_internal/arenas.py b/physics_planning_games/board_games/_internal/arenas.py
@@ -0,0 +1,164 @@
+# Copyright 2020 DeepMind Technologies Limited.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Board game-specific arena classes."""
+
+
+from dm_control import composer
+from dm_control.composer.observation import observable
+from dm_control.mujoco import wrapper
+
+# Robot geoms will be assigned to this group in order to disable their
+# visibility to the top-down camera.
+ROBOT_GEOM_GROUP = 1
+
+
+class Standard(composer.Arena):
+  """"Board game-specific arena class."""
+
+  def _build(self, name=None):
+    """Initializes this arena.
+
+    Args:
+      name: (optional) A string, the name of this arena. If `None`, use the
+        model name defined in the MJCF file.
+    """
+    super(Standard, self)._build(name=name)
+
+    # Add visual assets.
+    self.mjcf_model.asset.add(
+        'texture',
+        type='skybox',
+        builtin='gradient',
+        rgb1=(0.4, 0.6, 0.8),
+        rgb2=(0., 0., 0.),
+        width=100,
+        height=100)
+    groundplane_texture = self.mjcf_model.asset.add(
+        'texture',
+        name='groundplane',
+        type='2d',
+        builtin='checker',
+        rgb1=(0.2, 0.3, 0.4),
+        rgb2=(0.1, 0.2, 0.3),
+        width=300,
+        height=300,
+        mark='edge',
+        markrgb=(.8, .8, .8))
+    groundplane_material = self.mjcf_model.asset.add(
+        'material',
+        name='groundplane',
+        texture=groundplane_texture,
+        texrepeat=(5, 5),
+        texuniform='true',
+        reflectance=0.2)
+
+    # Add ground plane.
+    self.mjcf_model.worldbody.add(
+        'geom',
+        name='ground',
+        type='plane',
+        material=groundplane_material,
+        size=(1, 1, 0.1),
+        friction=(0.4,),
+        solimp=(0.95, 0.99, 0.001),
+        solref=(0.002, 1))
+
+    # Add lighting
+    self.mjcf_model.worldbody.add(
+        'light',
+        pos=(0, 0, 1.5),
+        dir=(0, 0, -1),
+        diffuse=(0.7, 0.7, 0.7),
+        specular=(.3, .3, .3),
+        directional='false',
+        castshadow='true')
+
+    # Add some fixed cameras to the arena.
+    self._front_camera = self.mjcf_model.worldbody.add(
+        'camera',
+        name='front',
+        pos=(0., -0.6, 0.75),
+        xyaxes=(1., 0., 0., 0., 0.7, 0.75))
+
+    # Ensures a 7x7 go board fits into the view from camera
+    self._front_camera_2 = self.mjcf_model.worldbody.add(
+        'camera',
+        name='front_2',
+        pos=(0., -0.65, 0.85),
+        xyaxes=(1., 0., 0., 0., 0.85, 0.6))
+
+    self._top_down_camera = self.mjcf_model.worldbody.add(
+        'camera',
+        name='top_down',
+        pos=(0., 0., 0.5),
+        xyaxes=(1., 0., 0., 0., 1., 0.))
+
+    # Always initialize the free camera so that it points at the origin.
+    self.mjcf_model.statistic.center = (0., 0., 0.)
+
+  def _build_observables(self):
+    return ArenaObservables(self)
+
+  @property
+  def front_camera(self):
+    return self._front_camera
+
+  @property
+  def front_camera_2(self):
+    return self._front_camera_2
+
+  @property
+  def top_down_camera(self):
+    return self._top_down_camera
+
+  def attach_offset(self, entity, offset, attach_site=None):
+    """Attaches another entity at a position offset from the attachment site.
+
+    Args:
+      entity: The `Entity` to attach.
+      offset: A length 3 array-like object representing the XYZ offset.
+      attach_site: (optional) The site to which to attach the entity's model.
+        If not set, defaults to self.attachment_site.
+    Returns:
+      The frame of the attached model.
+    """
+    frame = self.attach(entity, attach_site=attach_site)
+    frame.pos = offset
+    return frame
+
+
+class ArenaObservables(composer.Observables):
+  """Observables belonging to the arena."""
+
+  @composer.observable
+  def front_camera(self):
+    return observable.MJCFCamera(mjcf_element=self._entity.front_camera)
+
+  @composer.observable
+  def front_camera_2(self):
+    return observable.MJCFCamera(mjcf_element=self._entity.front_camera_2)
+
+  @composer.observable
+  def top_down_camera(self):
+    return observable.MJCFCamera(mjcf_element=self._entity.top_down_camera)
+
+  @composer.observable
+  def top_down_camera_invisible_robot(self):
+    # Custom scene options for making robot geoms invisible.
+    robot_geoms_invisible = wrapper.MjvOption()
+    robot_geoms_invisible.geomgroup[ROBOT_GEOM_GROUP] = 0
+    return observable.MJCFCamera(mjcf_element=self._entity.top_down_camera,
+                                 scene_option=robot_geoms_invisible)