meta-pytorch · VivekSil · Oct 27, 2025 · Oct 27, 2025 · Oct 27, 2025 · Oct 27, 2025
diff --git a/.github/workflows/deploy-hf-env.yml b/.github/workflows/deploy-hf-env.yml
@@ -15,6 +15,7 @@ on:
           - 'chat_env'
           - 'atari_env'
           - 'openspiel_env'
+          - 'maze_env'
       custom_environment:
         description: 'Custom environment to deploy (leave empty for none)'
         required: false
@@ -63,7 +64,7 @@ jobs:
             if [ "${{ github.event.inputs.environment }}" = "all" ]; then
               echo "deploy_all=true" >> $GITHUB_OUTPUT
               echo "use_matrix=true" >> $GITHUB_OUTPUT
-              echo "environments=echo_env,coding_env,chat_env,atari_env,openspiel_env" >> $GITHUB_OUTPUT
+              echo "environments=echo_env,coding_env,chat_env,atari_env,openspiel_env,maze_env" >> $GITHUB_OUTPUT
               echo "Manual trigger - deploying all environments with matrix"
             else
               echo "deploy_all=false" >> $GITHUB_OUTPUT
@@ -78,14 +79,14 @@ jobs:
           if git diff --name-only HEAD~1 HEAD | grep -E '^src/core/' > /dev/null; then
             echo "deploy_all=true" >> $GITHUB_OUTPUT
             echo "use_matrix=true" >> $GITHUB_OUTPUT
-            echo "environments=echo_env,coding_env,chat_env,atari_env,openspiel_env" >> $GITHUB_OUTPUT
+            echo "environments=echo_env,coding_env,chat_env,atari_env,openspiel_env,maze_env" >> $GITHUB_OUTPUT
             echo "Core files changed - deploying all environments with matrix"
             exit 0
           fi
 
           # Check which specific environments changed
           changed_envs=()
-          for env in echo_env coding_env chat_env atari_env openspiel_env; do
+          for env in echo_env coding_env chat_env atari_env openspiel_env maze_env; do
             if git diff --name-only HEAD~1 HEAD | grep -E "^src/envs/$env/" > /dev/null; then
               changed_envs+=("$env")
             fi
@@ -110,7 +111,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        environment: [echo_env, coding_env, chat_env, atari_env, openspiel_env]
+        environment: [echo_env, coding_env, chat_env, atari_env, openspiel_env, maze_env]
     permissions:
       contents: read
 

diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml
@@ -79,6 +79,8 @@ jobs:
             dockerfile: src/envs/atari_env/server/Dockerfile
           - name: git-env
             dockerfile: src/envs/git_env/server/Dockerfile
+          - name: maze-env
+            dockerfile: src/envs/maze_env/server/Dockerfile
           - name: my-env  # Add your environment here
             dockerfile: src/envs/connect4_env/server/Dockerfile
           - name: textarena-env

diff --git a/examples/maze_human.py b/examples/maze_human.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Simple example of using Maze environment with OpenEnv.
+
+This demonstrates:
+1. Connecting to the Maze environment server
+2. Resetting the environment
+3. Taking actions
+4. Observing rewards
+5. Inspecting environment state
+
+Usage:
+    python examples/maze_simple.py
+"""
+
+import sys
+from pathlib import Path
+
+# Add src to path
+sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
+import numpy as np
+from envs.maze_env import MazeEnv, MazeAction
+
+
+def main():
+    print("🧩 Simple Maze Environment Example")
+    print("=" * 60)
+
+    # Connect to environment server
+    # Ensure server is running: python -m envs.maze_env.server.app
+    env = MazeEnv(base_url="http://localhost:8000")
+    maze = np.array([
+            [0, 1, 0, 0, 0, 0, 0, 0],
+            [0, 1, 0, 1, 0, 1, 0, 0],
+            [0, 0, 0, 1, 1, 0, 1, 0],
+            [0, 1, 0, 1, 0, 0, 0, 0],
+            [1, 0, 0, 1, 0, 1, 0, 0],
+            [0, 0, 0, 1, 0, 1, 1, 1],
+            [0, 1, 1, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 1, 0, 0]
+        ])
+    try:
+        # Reset environment
+        print("\n📍 Resetting environment...")
+        result = env.reset()
+
+        print(f"   Initial position: {result.observation.position}")
+        print(f"   Legal actions: {result.observation.legal_actions}")
+        # Note: Initial total reward is 0 however it is observed it doesn't resets if you run this example again during the same server app session
+        print(f"   Initial Total reward: {result.observation.total_reward}")
+        # Run one episode
+        print("\n🚶 Navigating through maze...")
+        step = 0
+
+        while not result.done and step < 25:
+            # Choose random legal action
+            print(f"   Current position: {result.observation.position}")
+            print(f"   Legal actions: {result.observation.legal_actions}")
+            env.render_ascii_maze(maze,result.observation.position,[0,0],[maze.shape[0],maze.shape[1]])
+            action_id = int(input("Make any move from the legal actions"))
+            # Take action
+            result = env.step(MazeAction(action=action_id))
+            reward = result.observation.total_reward or 0
+
+            print(f"   Step {step + 1}: action={action_id}, pos={result.observation.position}, reward={reward:.2f}, done={result.done}")
+            step += 1
+            print("-----------------------------------------------------")
+
+        print("\n✅ Episode finished!")
+        print(f"   Total steps: {step}")
+        print(f"   Total reward: {reward}")
+
+        # Get environment state
+        state = env.state()
+        print("\n📊 Environment State:")
+        print(f"   Episode ID: {state.episode_id}")
+        print(f"   Step count: {state.step_count}")
+        print(f"   Done: {state.done}")
+
+    except Exception as e:
+        print(f"\n❌ Error: {e}")
+        print("\nMake sure the server is running:")
+        print("  python -m envs.maze_env.server.app")
+        print("\nOr start with Docker:")
+        print("  docker run -p 8000:8000 maze-env:latest")
+
+    finally:
+        env.close()
+        print("\n👋 Done!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/maze_simple.py b/examples/maze_simple.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Simple example of using Maze environment with OpenEnv.
+
+This demonstrates:
+1. Connecting to the Maze environment server
+2. Resetting the environment
+3. Taking actions
+4. Observing rewards
+5. Inspecting environment state
+
+Usage:
+    python examples/maze_simple.py
+"""
+
+import sys
+from pathlib import Path
+
+# Add src to path
+sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
+import numpy as np
+from envs.maze_env import MazeEnv, MazeAction
+
+
+def main():
+    print("🧩 Simple Maze Environment Example")
+    print("=" * 60)
+
+    # Connect to environment server
+    # Ensure server is running: python -m envs.maze_env.server.app
+    env = MazeEnv(base_url="http://localhost:8000")
+    maze = np.array([
+            [0, 1, 0, 0, 0, 0, 0, 0],
+            [0, 1, 0, 1, 0, 1, 0, 0],
+            [0, 0, 0, 1, 1, 0, 1, 0],
+            [0, 1, 0, 1, 0, 0, 0, 0],
+            [1, 0, 0, 1, 0, 1, 0, 0],
+            [0, 0, 0, 1, 0, 1, 1, 1],
+            [0, 1, 1, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 1, 0, 0]
+        ])
+    try:
+        # Reset environment
+        print("\n📍 Resetting environment...")
+        result = env.reset()
+
+        print(f"   Initial position: {result.observation.position}")
+        print(f"   Legal actions: {result.observation.legal_actions}")
+        # Note: Initial total reward is 0 however it is observed it doesn't resets if you run this example again during the same server app session
+        print(f"   Initial Total reward: {result.observation.total_reward}")
+
+        # Run one episode
+        print("\n🚶 Navigating through maze...")
+        step = 0
+        total_reward = 0
+
+        while not result.done and step < 20:
+            # Choose random legal action
+            print(f"   Current position: {result.observation.position}")
+            print(f"   Legal actions: {result.observation.legal_actions}")
+            env.render_ascii_maze(maze,result.observation.position,[0,0],[maze.shape[0],maze.shape[1]])
+            action_id = result.observation.legal_actions[step % len(result.observation.legal_actions)]
+            # Take action
+            result = env.step(MazeAction(action=action_id))
+
+            reward = result.reward or 0
+            total_reward += reward
+
+            print(f"   Step {step + 1}: action={action_id}, pos={result.observation.position}, reward={reward:.2f}, done={result.done}")
+            step += 1
+            print("-----------------------------------------------------")
+
+        print("\n✅ Episode finished!")
+        print(f"   Total steps: {step}")
+        print(f"   Total reward: {total_reward}")
+
+        # Get environment state
+        state = env.state()
+        print("\n📊 Environment State:")
+        print(f"   Episode ID: {state.episode_id}")
+        print(f"   Step count: {state.step_count}")
+        print(f"   Done: {state.done}")
+
+    except Exception as e:
+        print(f"\n❌ Error: {e}")
+        print("\nMake sure the server is running:")
+        print("  python -m envs.maze_env.server.app")
+        print("\nOr start with Docker:")
+        print("  docker run -p 8000:8000 maze-env:latest")
+
+    finally:
+        env.close()
+        print("\n👋 Done!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/deploy_to_hf.sh b/scripts/deploy_to_hf.sh
@@ -290,6 +290,13 @@ DOCKERFILE_EOF
             echo "OpenSpiel builds can take 10-15 minutes due to C++ compilation"
             return  # Skip the common parts since OpenSpiel has its own complete Dockerfile
             ;;
+        "maze_env")
+         cat >> "$CURRENT_STAGING_DIR/Dockerfile" << 'DOCKERFILE_EOF'
+# Install additional dependencies for ChatEnvironment
+RUN pip install --no-cache-dir numpy
+DOCKERFILE_EOF
+            # Maze env requre
+            ;;
     esac
 
     # Add common parts

diff --git a/scripts/prepare_hf_deployment.sh b/scripts/prepare_hf_deployment.sh
@@ -157,6 +157,7 @@ README_EOF
         "chat_env") ENV_CLASS="ChatEnv" ;;
         "atari_env") ENV_CLASS="AtariEnv" ;;
         "openspiel_env") ENV_CLASS="OpenSpielEnv" ;;
+        "maze_env") ENV_CLASS="MazeEnv" ;;
         *) ENV_CLASS="Env" ;;
     esac