goodfire-ai · ocg-goodfire · Jan 30, 2026 · Jan 30, 2026 · Jan 30, 2026 · Jan 30, 2026
diff --git a/.claude/skills/gpudash.md b/.claude/skills/gpudash.md
@@ -0,0 +1,12 @@
+---
+name: gpudash
+description: Check GPU availability across the SLURM cluster
+user_invocable: true
+---
+
+# gpudash
+
+Run the `gpudash` command to show GPU availability across the cluster.
+
+## Steps
+1. Run `gpudash` and show the output to the user.
diff --git a/.claude/worktrees/bold-elm-8kpb b/.claude/worktrees/bold-elm-8kpb
diff --git a/.claude/worktrees/bright-fox-a4i0 b/.claude/worktrees/bright-fox-a4i0
diff --git a/.claude/worktrees/calm-owl-v4pj b/.claude/worktrees/calm-owl-v4pj
diff --git a/.claude/worktrees/cozy-frolicking-stream b/.claude/worktrees/cozy-frolicking-stream
diff --git a/.claude/worktrees/stateless-dancing-blanket b/.claude/worktrees/stateless-dancing-blanket
diff --git a/.claude/worktrees/swift-owl-yep9 b/.claude/worktrees/swift-owl-yep9
diff --git a/.claude/worktrees/swift-ray-amfs b/.claude/worktrees/swift-ray-amfs
diff --git a/.claude/worktrees/vectorized-wiggling-whisper b/.claude/worktrees/vectorized-wiggling-whisper
diff --git a/.claude/worktrees/xenodochial-germain b/.claude/worktrees/xenodochial-germain
diff --git a/.gitignore b/.gitignore
@@ -177,4 +177,6 @@ cython_debug/
 #.idea/
 
 **/*.db
-**/*.db*
+**/*.db*
+
+.claude/worktrees
diff --git a/.mcp.json b/.mcp.json
@@ -1,8 +1,3 @@
 {
-  "mcpServers": {
-    "svelte-llm": {
-      "type": "http",
-      "url": "https://svelte-llm.stanislav.garden/mcp/mcp"
-    }
-  }
+  "mcpServers": {}
 }
diff --git a/CLAUDE.md b/CLAUDE.md
diff --git a/pyproject.toml b/pyproject.toml
@@ -56,7 +56,9 @@ spd-clustering = "spd.clustering.scripts.run_pipeline:cli"
 spd-harvest = "spd.harvest.scripts.run_slurm_cli:cli"
 spd-autointerp = "spd.autointerp.scripts.run_slurm_cli:cli"
 spd-attributions = "spd.dataset_attributions.scripts.run_slurm_cli:cli"
+spd-investigate = "spd.investigate.scripts.run_slurm_cli:cli"
 spd-postprocess = "spd.postprocess.cli:cli"
+spd-graph-interp = "spd.graph_interp.scripts.run_slurm_cli:cli"
 
 [build-system]
 requires = ["setuptools", "wheel"]
@@ -69,7 +71,7 @@ include = ["spd*"]
 [tool.ruff]
 line-length = 100
 fix = true
-extend-exclude = ["spd/app/frontend"]
+extend-exclude = ["spd/app/frontend", ".circuits-ref"]
 
 [tool.ruff.lint]
 ignore = [

diff --git a/spd/app/CLAUDE.md b/spd/app/CLAUDE.md
@@ -15,6 +15,7 @@ This is a **rapidly iterated research tool**. Key implications:
 - **Database is disposable**: Delete `.data/app/prompt_attr.db` if schema changes break things
 - **Prefer simplicity**: Avoid over-engineering for hypothetical future needs
 - **Fail loud and fast**: The users are a small team of highly technical people. Errors are good. We want to know immediately if something is wrong. No soft failing, assert, assert, assert
+- **Token display**: Always ship token strings rendered server-side via `AppTokenizer`, never raw token IDs. For embed/output layers, `component_idx` is a token ID — resolve it to a display string in the backend response.
 
 ## Running the App
 
@@ -50,6 +51,9 @@ backend/
     ├── intervention.py    # Selective component activation
     ├── correlations.py    # Component correlations + token stats + interpretations
     ├── clusters.py        # Component clustering
+    ├── dataset_search.py  # SimpleStories dataset search
+    ├── agents.py          # Various useful endpoints that AI agents should look at when helping
+    ├── mcp.py             # MCP (Model Context Protocol) endpoint for Claude Code 
     ├── dataset_search.py  # Dataset search (reads dataset from run config)
     └── agents.py          # Various useful endpoints that AI agents should look at when helping
 ```

diff --git a/spd/app/backend/database.py b/spd/app/backend/database.py
@@ -9,6 +9,7 @@
 import hashlib
 import io
 import json
+import os
 import sqlite3
 from pathlib import Path
 from typing import Literal
@@ -24,7 +25,24 @@
 
 # Persistent data directories
 _APP_DATA_DIR = REPO_ROOT / ".data" / "app"
-DEFAULT_DB_PATH = _APP_DATA_DIR / "prompt_attr.db"
+_DEFAULT_DB_PATH = _APP_DATA_DIR / "prompt_attr.db"
+
+
+def get_default_db_path() -> Path:
+    """Get the default database path.
+
+    Checks env vars in order:
+    1. SPD_INVESTIGATION_DIR - investigation mode, db at dir/app.db
+    2. SPD_APP_DB_PATH - explicit override
+    3. Default: .data/app/prompt_attr.db
+    """
+    investigation_dir = os.environ.get("SPD_INVESTIGATION_DIR")
+    if investigation_dir:
+        return Path(investigation_dir) / "app.db"
+    env_path = os.environ.get("SPD_APP_DB_PATH")
+    if env_path:
+        return Path(env_path)
+    return _DEFAULT_DB_PATH
 
 
 class Run(BaseModel):
@@ -111,7 +129,7 @@ class PromptAttrDB:
     """
 
     def __init__(self, db_path: Path | None = None, check_same_thread: bool = True):
-        self.db_path = db_path or DEFAULT_DB_PATH
+        self.db_path = db_path or get_default_db_path()
         self._check_same_thread = check_same_thread
         self._conn: sqlite3.Connection | None = None
 

diff --git a/spd/app/backend/routers/__init__.py b/spd/app/backend/routers/__init__.py
@@ -7,8 +7,11 @@
 from spd.app.backend.routers.data_sources import router as data_sources_router
 from spd.app.backend.routers.dataset_attributions import router as dataset_attributions_router
 from spd.app.backend.routers.dataset_search import router as dataset_search_router
+from spd.app.backend.routers.graph_interp import router as graph_interp_router
 from spd.app.backend.routers.graphs import router as graphs_router
 from spd.app.backend.routers.intervention import router as intervention_router
+from spd.app.backend.routers.investigations import router as investigations_router
+from spd.app.backend.routers.mcp import router as mcp_router
 from spd.app.backend.routers.pretrain_info import router as pretrain_info_router
 from spd.app.backend.routers.prompts import router as prompts_router
 from spd.app.backend.routers.runs import router as runs_router
@@ -20,9 +23,12 @@
     "correlations_router",
     "data_sources_router",
     "dataset_attributions_router",
+    "graph_interp_router",
     "dataset_search_router",
     "graphs_router",
     "intervention_router",
+    "investigations_router",
+    "mcp_router",
     "pretrain_info_router",
     "prompts_router",
     "runs_router",

diff --git a/spd/app/backend/routers/data_sources.py b/spd/app/backend/routers/data_sources.py
@@ -28,15 +28,21 @@ class AutointerpInfo(BaseModel):
 
 class AttributionsInfo(BaseModel):
     subrun_id: str
-    n_batches_processed: int
     n_tokens_processed: int
     ci_threshold: float
 
 
+class GraphInterpInfo(BaseModel):
+    subrun_id: str
+    config: dict[str, Any] | None
+    label_counts: dict[str, int]
+
+
 class DataSourcesResponse(BaseModel):
     harvest: HarvestInfo | None
     autointerp: AutointerpInfo | None
     attributions: AttributionsInfo | None
+    graph_interp: GraphInterpInfo | None
 
 
 router = APIRouter(prefix="/api/data_sources", tags=["data_sources"])
@@ -70,13 +76,21 @@ def get_data_sources(loaded: DepLoadedRun) -> DataSourcesResponse:
         storage = loaded.attributions.get_attributions()
         attributions_info = AttributionsInfo(
             subrun_id=loaded.attributions.subrun_id,
-            n_batches_processed=storage.n_batches_processed,
             n_tokens_processed=storage.n_tokens_processed,
             ci_threshold=storage.ci_threshold,
         )
 
+    graph_interp_info: GraphInterpInfo | None = None
+    if loaded.graph_interp is not None:
+        graph_interp_info = GraphInterpInfo(
+            subrun_id=loaded.graph_interp.subrun_id,
+            config=loaded.graph_interp.get_config(),
+            label_counts=loaded.graph_interp.get_label_counts(),
+        )
+
     return DataSourcesResponse(
         harvest=harvest_info,
         autointerp=autointerp_info,
         attributions=attributions_info,
+        graph_interp=graph_interp_info,
     )