bcollazo · bcollazo · Dec 21, 2025 · Dec 21, 2025 · Dec 21, 2025 · Dec 21, 2025
diff --git a/.gitignore b/.gitignore
@@ -6,6 +6,10 @@ data/
 profile.pstats
 catanatron-venv
 .DS_Store
+wandb
+videos
+models
+runs
 
 # Byte-compiled / optimized / DLL files
 __pycache__/

diff --git a/README.md b/README.md
@@ -5,13 +5,14 @@
 ![Discord](https://img.shields.io/discord/1385302652014825552)
 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/bcollazo/catanatron/blob/master/examples/Overview.ipynb)
 
-Catanatron is a high-performance simulator and strong AI player for Settlers of Catan. You can run thousands of games in the order of seconds. The goal is to find the strongest Settlers of Catan bot possible. 
+Catanatron is a high-performance simulator and strong AI player for Settlers of Catan. You can run thousands of games in the order of seconds. The goal is to find the strongest Settlers of Catan bot possible.
 
 Get Started with the Full Documentation: https://docs.catanatron.com
 
 Join our Discord: https://discord.gg/FgFmb75TWd!
 
 ## Command Line Interface
+
 Catanatron provides a `catanatron-play` CLI tool to run large scale simulations.
 
 <p align="left">
@@ -22,26 +23,30 @@ Catanatron provides a `catanatron-play` CLI tool to run large scale simulations.
 
 1. Clone the repository:
 
-    ```bash
-    git clone git@github.com:bcollazo/catanatron.git
-    cd catanatron/
-    ```
-2. Create a virtual environment (requires Python 3.11 or higher) 
+   ```bash
+   git clone git@github.com:bcollazo/catanatron.git
+   cd catanatron/
+   ```
+
+2. Create a virtual environment (requires Python 3.11 or higher)
+
+   ```bash
+   python -m venv venv
+   source ./venv/bin/activate
+   # ./venv/Scripts/Activate.ps1 (on windows)
+   ```
 
-    ```bash
-    python -m venv venv
-    source ./venv/bin/activate
-    ```
 3. Install dependencies
 
-    ```bash
-    pip install -e .
-    ```
-4. (Optional) Install developer and advanced dependencies 
+   ```bash
+   pip install -e .
+   ```
+
+4. (Optional) Install developer and advanced dependencies
 
-    ```bash
-    pip install -e ".[web,gym,dev]"
-    ```
+   ```bash
+   pip install -e ".[web,gym,dev]"
+   ```
 
 ### Usage
 
@@ -52,13 +57,13 @@ catanatron-play --players=R,R,R,W --num=100
 ```
 
 Generate datasets from the games to analyze:
+
 ```bash
 catanatron-play --num 100 --output my-data-path/ --output-format json
 ```
 
 See more examples at https://docs.catanatron.com.
 
-
 ## Graphical User Interface
 
 We provide Docker images so that you can watch, inspect, and play games against Catanatron via a web UI!
@@ -67,15 +72,15 @@ We provide Docker images so that you can watch, inspect, and play games against
  <img src="https://raw.githubusercontent.com/bcollazo/catanatron/master/docs/source/_static/CatanatronUI.png">
 </p>
 
-
 ### Installation
 
 1. Ensure you have Docker installed (https://docs.docker.com/engine/install/)
 2. Run the `docker-compose.yaml` in the root folder of the repo:
 
-    ```bash
-    docker compose up
-    ```
+   ```bash
+   docker compose up
+   ```
+
 3. Visit http://localhost:3000 in your browser!
 
 ## Python Library
@@ -100,14 +105,17 @@ print(game.play())  # returns winning color
 See more at http://docs.catanatron.com
 
 ## Gymnasium Interface
+
 For Reinforcement Learning, catanatron provides an Open AI / Gymnasium Environment.
 
 Install it with:
+
 ```bash
 pip install -e .[gym]
 ```
 
 and use it like:
+
 ```python
 import random
 import gymnasium
@@ -128,8 +136,8 @@ env.close()
 
 See more at: https://docs.catanatron.com
 
-
 ## Documentation
+
 Full documentation here: https://docs.catanatron.com
 
 ## Contributing
@@ -144,6 +152,5 @@ coverage run --source=catanatron -m pytest tests/ && coverage report
 See more at: https://docs.catanatron.com
 
 ## Appendix
-See the motivation of the project here: [5 Ways NOT to Build a Catan AI](https://medium.com/@bcollazo2010/5-ways-not-to-build-a-catan-ai-e01bc491af17).
-
 
+See the motivation of the project here: [5 Ways NOT to Build a Catan AI](https://medium.com/@bcollazo2010/5-ways-not-to-build-a-catan-ai-e01bc491af17).
diff --git a/catanatron/catanatron/cli/play.py b/catanatron/catanatron/cli/play.py
@@ -7,9 +7,8 @@
 from rich.console import Console
 from rich.table import Table
 from rich.progress import Progress
-from rich.progress import Progress, BarColumn, TimeRemainingColumn
+from rich.progress import BarColumn, TimeRemainingColumn
 from rich import box
-from rich.console import Console
 from rich.theme import Theme
 from rich.text import Text
 
@@ -207,7 +206,7 @@ class OutputOptions:
 class GameConfigOptions:
     discard_limit: int = 7
     vps_to_win: int = 10
-    catan_map: Literal["BASE", "TOURNAMENT", "MINI"] = "BASE"
+    map_type: Literal["BASE", "TOURNAMENT", "MINI"] = "BASE"
 
 
 COLOR_TO_RICH_STYLE = {
@@ -238,7 +237,7 @@ def play_batch_core(num_games, players, game_config, accumulators=[]):
     for _ in range(num_games):
         for player in players:
             player.reset_state()
-        catan_map = build_map(game_config.catan_map)
+        catan_map = build_map(game_config.map_type)
         game = Game(
             players,
             discard_limit=game_config.discard_limit,
@@ -275,7 +274,10 @@ def play_batch(
 
             accumulators.append(
                 CsvDataAccumulator(
-                    output_options.output, output_options.include_board_tensor
+                    tuple(p.color for p in players),
+                    game_config.map_type,
+                    output_options.output,
+                    output_options.include_board_tensor,
                 )
             )
         elif output_options.output_format == "parquet":
@@ -284,7 +286,10 @@ def play_batch(
 
             accumulators.append(
                 ParquetDataAccumulator(
-                    output_options.output, output_options.include_board_tensor
+                    tuple(p.color for p in players),
+                    game_config.map_type,
+                    output_options.output,
+                    output_options.include_board_tensor,
                 )
             )
         elif output_options.output_format == "json":

diff --git a/catanatron/catanatron/features.py b/catanatron/catanatron/features.py
@@ -12,7 +12,7 @@
 )
 from catanatron.models.board import STATIC_GRAPH, get_edges, get_node_distances
 from catanatron.models.map import NUM_TILES, CatanMap, build_map, number_probability
-from catanatron.models.player import Player, Color, SimplePlayer
+from catanatron.models.player import Color, SimplePlayer
 from catanatron.models.enums import (
     DEVELOPMENT_CARDS,
     RESOURCES,
@@ -106,7 +106,7 @@ def resource_hand_features(game: Game, p0_color: Color):
                 ]
             for card in DEVELOPMENT_CARDS:
                 features[f"P0_{card}_IN_HAND"] = player_state[key + f"_{card}_IN_HAND"]
-            features[f"P0_HAS_PLAYED_DEVELOPMENT_CARD_IN_TURN"] = player_state[
+            features["P0_HAS_PLAYED_DEVELOPMENT_CARD_IN_TURN"] = player_state[
                 key + "_HAS_PLAYED_DEVELOPMENT_CARD_IN_TURN"
             ]
 
@@ -132,7 +132,7 @@ def map_tile_features(catan_map: CatanMap, robber_coordinate):
     for tile_id, tile in catan_map.tiles_by_id.items():
         for resource in RESOURCES:
             features[f"TILE{tile_id}_IS_{resource}"] = tile.resource == resource
-        features[f"TILE{tile_id}_IS_DESERT"] = tile.resource == None
+        features[f"TILE{tile_id}_IS_DESERT"] = tile.resource is None
         features[f"TILE{tile_id}_PROBA"] = (
             0 if tile.resource is None else number_probability(tile.number)
         )

diff --git a/catanatron/catanatron/gym/accumulators.py b/catanatron/catanatron/gym/accumulators.py
@@ -1,34 +1,40 @@
 import os
-from collections import defaultdict
 import time
+from collections import defaultdict
+from typing import Tuple, Literal
 
-from catanatron.utils import format_secs
 import numpy as np
 import pandas as pd
 
+from catanatron import Action, Color, Game
 from catanatron.features import create_sample
 from catanatron.game import GameAccumulator
 from catanatron.gym.board_tensor_features import create_board_tensor
-from catanatron.gym.envs.catanatron_env import to_action_space, to_action_type_space
+from catanatron.gym.envs.action_space import to_action_space, to_action_type_space
 from catanatron.gym.utils import (
     DISCOUNT_FACTOR,
     get_tournament_total_return,
     get_victory_points_total_return,
     populate_matrices,
     simple_total_return,
 )
+from catanatron.utils import format_secs
 
 
 class ReinforcementLearningAccumulator(GameAccumulator):
     def __init__(
         self,
+        player_colors: Tuple[Color],
+        map_type: Literal["BASE", "TOURNAMENT", "MINI"] = "BASE",
         include_board_tensor=True,
         total_return_fns={
             "RETURN": simple_total_return,
             "TOURNAMENT_RETURN": get_tournament_total_return,
             "VICTORY_POINTS_RETURN": get_victory_points_total_return,
         },
     ):
+        self.player_colors = player_colors
+        self.map_type = map_type
         self.include_board_tensor = include_board_tensor
         # TODO: Generalize to "rewards_fn" that can yield intermediary rewards
         #   while still rewarding big on terminal states.
@@ -45,14 +51,17 @@ def before(self, game):
         if self.include_board_tensor:
             self.data["board_tensors"] = []
 
-    def step(self, game_before_action, action):
+    def step(self, game_before_action: Game, action: Action):
         self.data["color_action_indices"][action.color].append(
             len(self.data["samples"])
         )
         self.data["acting_color"].append(action.color)
         self.data["samples"].append(create_sample(game_before_action, action.color))
         self.data["actions"].append(
-            [to_action_space(action), to_action_type_space(action.action_type)]
+            [
+                to_action_space(action, self.player_colors, self.map_type),
+                to_action_type_space(action.action_type),
+            ]
         )
 
         if self.include_board_tensor:
@@ -130,8 +139,14 @@ def after(self, game):
 
 
 class CsvDataAccumulator(ReinforcementLearningAccumulator):
-    def __init__(self, output, include_board_tensor=True):
-        super().__init__(include_board_tensor)
+    def __init__(
+        self,
+        player_colors: Tuple[Color],
+        map_type: Literal["BASE", "TOURNAMENT", "MINI"],
+        output,
+        include_board_tensor=True,
+    ):
+        super().__init__(player_colors, map_type, include_board_tensor)
         self.output = output
 
     def after(self, game):
@@ -164,8 +179,14 @@ def after(self, game):
 
 
 class ParquetDataAccumulator(ReinforcementLearningAccumulator):
-    def __init__(self, output, include_board_tensor=True):
-        super().__init__(include_board_tensor)
+    def __init__(
+        self,
+        player_colors: Tuple[Color],
+        map_type: Literal["BASE", "TOURNAMENT", "MINI"],
+        output,
+        include_board_tensor=True,
+    ):
+        super().__init__(player_colors, map_type, include_board_tensor)
         self.output = output
 
     def after(self, game):