meta-pytorch · VivekSil · Oct 27, 2025 · Oct 27, 2025 · Oct 27, 2025 · Oct 27, 2025
diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml
@@ -79,7 +79,8 @@ jobs:
             dockerfile: src/envs/atari_env/server/Dockerfile
           - name: git-env
             dockerfile: src/envs/git_env/server/Dockerfile
-
+          - name: maze-env
+            dockerfile: src/envs/maze_env/server/Dockerfile
     steps:
       - name: Checkout code
         uses: actions/checkout@v4

diff --git a/examples/maze_human.py b/examples/maze_human.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Simple example of using Maze environment with OpenEnv.
+
+This demonstrates:
+1. Connecting to the Maze environment server
+2. Resetting the environment
+3. Taking actions
+4. Observing rewards
+5. Inspecting environment state
+
+Usage:
+    python examples/maze_simple.py
+"""
+
+import sys
+from pathlib import Path
+
+# Add src to path
+sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
+import numpy as np
+from envs.maze_env import MazeEnv, MazeAction
+
+
+def main():
+    print("🧩 Simple Maze Environment Example")
+    print("=" * 60)
+
+    # Connect to environment server
+    # Ensure server is running: python -m envs.maze_env.server.app
+    env = MazeEnv(base_url="http://localhost:8000")
+    maze = np.array([
+            [0, 1, 0, 0, 0, 0, 0, 0],
+            [0, 1, 0, 1, 0, 1, 0, 0],
+            [0, 0, 0, 1, 1, 0, 1, 0],
+            [0, 1, 0, 1, 0, 0, 0, 0],
+            [1, 0, 0, 1, 0, 1, 0, 0],
+            [0, 0, 0, 1, 0, 1, 1, 1],
+            [0, 1, 1, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 1, 0, 0]
+        ])
+    try:
+        # Reset environment
+        print("\n📍 Resetting environment...")
+        result = env.reset()
+
+        print(f"   Initial position: {result.observation.position}")
+        print(f"   Legal actions: {result.observation.legal_actions}")
+        # Note: Initial total reward is 0 however it is observed it doesn't resets if you run this example again during the same server app session
+        print(f"   Initial Total reward: {result.observation.total_reward}")
+        # Run one episode
+        print("\n🚶 Navigating through maze...")
+        step = 0
+
+        while not result.done and step < 25:
+            # Choose random legal action
+            print(f"   Current position: {result.observation.position}")
+            print(f"   Legal actions: {result.observation.legal_actions}")
+            env.render_ascii_maze(maze,result.observation.position,[0,0],[maze.shape[0],maze.shape[1]])
+            action_id = int(input("Make any move from the legal actions"))
+            # Take action
+            result = env.step(MazeAction(action=action_id))
+            reward = result.observation.total_reward or 0
+
+            print(f"   Step {step + 1}: action={action_id}, pos={result.observation.position}, reward={reward:.2f}, done={result.done}")
+            step += 1
+            print("-----------------------------------------------------")
+
+        print(f"\n✅ Episode finished!")
+        print(f"   Total steps: {step}")
+        print(f"   Total reward: {reward}")
+
+        # Get environment state
+        state = env.state()
+        print(f"\n📊 Environment State:")
+        print(f"   Episode ID: {state.episode_id}")
+        print(f"   Step count: {state.step_count}")
+        print(f"   Done: {state.done}")
+
+    except Exception as e:
+        print(f"\n❌ Error: {e}")
+        print("\nMake sure the server is running:")
+        print("  python -m envs.maze_env.server.app")
+        print("\nOr start with Docker:")
+        print("  docker run -p 8000:8000 maze-env:latest")
+
+    finally:
+        env.close()
+        print("\n👋 Done!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/maze_simple.py b/examples/maze_simple.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Simple example of using Maze environment with OpenEnv.
+
+This demonstrates:
+1. Connecting to the Maze environment server
+2. Resetting the environment
+3. Taking actions
+4. Observing rewards
+5. Inspecting environment state
+
+Usage:
+    python examples/maze_simple.py
+"""
+
+import sys
+from pathlib import Path
+
+# Add src to path
+sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
+import numpy as np
+from envs.maze_env import MazeEnv, MazeAction
+
+
+def main():
+    print("🧩 Simple Maze Environment Example")
+    print("=" * 60)
+
+    # Connect to environment server
+    # Ensure server is running: python -m envs.maze_env.server.app
+    env = MazeEnv(base_url="http://localhost:8000")
+    maze = np.array([
+            [0, 1, 0, 0, 0, 0, 0, 0],
+            [0, 1, 0, 1, 0, 1, 0, 0],
+            [0, 0, 0, 1, 1, 0, 1, 0],
+            [0, 1, 0, 1, 0, 0, 0, 0],
+            [1, 0, 0, 1, 0, 1, 0, 0],
+            [0, 0, 0, 1, 0, 1, 1, 1],
+            [0, 1, 1, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 1, 0, 0]
+        ])
+    try:
+        # Reset environment
+        print("\n📍 Resetting environment...")
+        result = env.reset()
+
+        print(f"   Initial position: {result.observation.position}")
+        print(f"   Legal actions: {result.observation.legal_actions}")
+        # Note: Initial total reward is 0 however it is observed it doesn't resets if you run this example again during the same server app session
+        print(f"   Initial Total reward: {result.observation.total_reward}")
+
+        # Run one episode
+        print("\n🚶 Navigating through maze...")
+        step = 0
+        total_reward = 0
+
+        while not result.done and step < 20:
+            # Choose random legal action
+            print(f"   Current position: {result.observation.position}")
+            print(f"   Legal actions: {result.observation.legal_actions}")
+            env.render_ascii_maze(maze,result.observation.position,[0,0],[maze.shape[0],maze.shape[1]])
+            action_id = result.observation.legal_actions[step % len(result.observation.legal_actions)]
+            # Take action
+            result = env.step(MazeAction(action=action_id))
+
+            reward = result.reward or 0
+            total_reward += reward
+
+            print(f"   Step {step + 1}: action={action_id}, pos={result.observation.position}, reward={reward:.2f}, done={result.done}")
+            step += 1
+            print("-----------------------------------------------------")
+
+        print(f"\n✅ Episode finished!")
+        print(f"   Total steps: {step}")
+        print(f"   Total reward: {total_reward}")
+
+        # Get environment state
+        state = env.state()
+        print(f"\n📊 Environment State:")
+        print(f"   Episode ID: {state.episode_id}")
+        print(f"   Step count: {state.step_count}")
+        print(f"   Done: {state.done}")
+
+    except Exception as e:
+        print(f"\n❌ Error: {e}")
+        print("\nMake sure the server is running:")
+        print("  python -m envs.maze_env.server.app")
+        print("\nOr start with Docker:")
+        print("  docker run -p 8000:8000 maze-env:latest")
+
+    finally:
+        env.close()
+        print("\n👋 Done!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/envs/maze_env/README.md b/src/envs/maze_env/README.md
@@ -0,0 +1,123 @@
+# Maze Environment
+
+Integration of Maze game with the OpenEnv framework.
+
+## Architecture
+
+```
+┌────────────────────────────────────┐
+│ RL Training Code (Client)          │
+│   MazeEnv.step(action)             │
+└──────────────┬─────────────────────┘
+               │ HTTP
+┌──────────────▼─────────────────────┐
+│ FastAPI Server (Docker)            │
+│   MazeEnvironment                  │
+│     ├─ Wraps Maze environment      │
+│     └─ Agent controls player       │
+└────────────────────────────────────┘
+```
+
+## Installation & Usage
+
+### Option 1: Local Development (without Docker)
+
+**Requirements:**
+- Python 3.11+
+- Numpy
+
+```python
+from envs.maze_env import MazeEnv, MazeAction
+
+# Start local server manually
+# python -m envs.maze_env.server.app
+
+# Connect to local server
+env = MazeEnv(base_url="http://localhost:8000")
+
+# Reset environment
+result = env.reset()
+print(f"Initial state: {result.observation.info_state}")
+print(f"Legal actions: {result.observation.legal_actions}")
+
+# Take actions
+for _ in range(10):
+    action_id = result.observation.legal_actions[0]  # Choose first legal action
+    result = env.step(MazeAction(action_id=action_id))
+    print(f"Reward: {result.reward}, Done: {result.done}")
+    if result.done:
+        break
+
+# Cleanup
+env.close()
+```
+
+### Option 2: Docker (Recommended)
+
+**Build Docker image:**
+
+```bash
+cd OpenEnv
+docker build -f src/envs/maze_env/server/Dockerfile -t maze-env:latest .
+```
+
+**Use with from_docker_image():**
+
+```python
+from envs.maze_env import MazeEnv, MazeAction
+
+# Automatically starts container
+env = MazeEnv.from_docker_image("maze-env:latest")
+
+result = env.reset()
+result = env.step(MazeAction(action_id=0))
+
+env.close()  # Stops container
+```
+
+## Configuration
+
+### Variables
+
+- `maze` : Maze as a numpy array saved in mazearray.py
+
+### Example
+
+```bash
+docker run -p 8000:8000 maze-env:latest
+```
+
+## API Reference
+
+### MazeAction
+
+```python
+@dataclass
+class MazeAction(Action):
+    action: int                        # Action to be taken
+```
+
+### MazeObservation
+
+```python
+@dataclass
+class MazeObservation(Observation):
+    position: List[int]  # [row, col]
+    total_reward: float  # Total reward
+    legal_actions: List[int] = field(default_factory=list)  # Legal action based on the current position
+```
+
+### MazeState
+
+```python
+@dataclass
+class MazeState(State):
+    episode_id: str     # Episode
+    step_count: int     # Number of steps
+    done: bool = False  # Solve status
+
+```
+
+## References
+
+- [Maze Environment](https://github.com/erikdelange/Reinforcement-Learning-Maze)
diff --git a/src/envs/maze_env/__init__.py b/src/envs/maze_env/__init__.py
@@ -0,0 +1,16 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Maze Environment Integration.
+
+This module provides integration between Maze game and the OpenEnv framework.
+"""
+
+from .client import MazeEnv
+from .models import MazeAction, MazeObservation, MazeState
+
+__all__ = ["MazeEnv", "MazeAction", "MazeObservation", "MazeState"]