heidi-dang · heidi-dang · May 11, 2026 · gemini-code-assist · May 11, 2026 · gemini-code-assist
diff --git a/.jules/sentinel.md b/.jules/sentinel.md
@@ -4,3 +4,8 @@
 **Vulnerability:** Telemetry HTTP endpoints (`/status`, `/`) were completely unprotected, allowing any local user to view training state, usage, and costs.
 **Learning:** Initial implementation prioritized ease of use and local-only binding (`127.0.0.1`) but neglected defense-in-depth requirements for multi-user or shared environments.
 **Prevention:** Always implement at least Basic Authentication for any endpoint exposing state or metadata, even if restricted to loopback. Use random session-specific credentials if no configuration is provided.
+
+## 2025-01-24 - Environment Leakage in Unit Test Gate
+**Vulnerability:** The `scripts/03_unit_test_gate.py` script was executing generated code in a subprocess while passing the entire host environment, including sensitive API keys like `OPENAI_API_KEY`.
+**Learning:** Executing untrusted code, even for validation purposes, must be done in a highly restricted environment. Relying on simple regex filters for "dangerous code" is insufficient if the execution environment itself is over-privileged.
+**Prevention:** Always use a minimal allowlist for environment variables passed to subprocesses executing untrusted code. Use standard libraries like `textwrap` to ensure code is properly formatted (e.g., indented) when wrapped in security/logging templates to avoid functional failures that might bypass checks.
diff --git a/heidi_engine/telemetry.py b/heidi_engine/telemetry.py
@@ -732,11 +732,6 @@ def get_state(run_id: Optional[str] = None) -> Dict[str, Any]:
             "usage": get_default_usage(),
         }
 
-    # BOLT OPTIMIZATION: Check thread-safe state cache
-    cached = _state_cache.get(target_run_id, state_file)
-    if cached:
-        return cached
-
     try:
         with open(state_file) as f:
             state = json.load(f)

diff --git a/scripts/02_validate_clean.py b/scripts/02_validate_clean.py
@@ -406,7 +406,9 @@ def save_jsonl(samples: List[Dict[str, Any]], path: str) -> None:
     """
     Save samples to JSONL file.
     """
-    os.makedirs(os.path.dirname(path), exist_ok=True)
+    dirname = os.path.dirname(path)
+    if dirname:
+        os.makedirs(dirname, exist_ok=True)
 
     with open(path, "w") as f:
         for sample in samples:

diff --git a/scripts/03_unit_test_gate.py b/scripts/03_unit_test_gate.py
@@ -40,6 +40,7 @@
 import subprocess
 import sys
 import tempfile
+import textwrap
 from typing import Any, Dict, List, Tuple
 
 # =============================================================================
@@ -56,9 +57,9 @@
 # TUNABLE: Adjust regex for different code formats
 CODE_BLOCK_PATTERNS = [
     # Markdown code blocks: ```python ... ```
-    r"```python\n(.*?)```",
+    r"```python\s*(.*?)\s*```",
-    r"```python\s*(.*?)\s*```",
+    r"```\s*python\s*(.*?)\s*```",
-    r"```python\s*(.*?)\s*```",
+    r"```\s*python\s*(.*?)\s*```",
     # Markdown code blocks without language: ``` ... ```
-    r"```\n(.*?)```",
+    r"```\s*(.*?)\s*```",
-    r"```\s*(.*?)\s*```",
+    r"```(?!\s*python)\s*(.*?)\s*```",
-    r"```\s*(.*?)\s*```",
+    r"```(?!\s*python)\s*(.*?)\s*```",
     # Inline code markers
     r"`([^`\n]+)`",
 ]
@@ -84,6 +85,10 @@
     r"\bshelve\.open\b",
     # File operations (specifically writing/appending)
     r"\bopen\s*\([^)]*,\s*(mode\s*=\s*)?['\"][^'\"r]*[wa+x]",
+    # Sandbox escapes
+    r"__subclasses__",
+    r"__globals__",
+    r"__builtins__",
-    r"__subclasses__",
-    r"__globals__",
-    r"__builtins__",
+    r"\b__subclasses__\b",
+    r"\b__globals__\b",
+    r"\b__builtins__\b",
-    r"__subclasses__",
-    r"__globals__",
-    r"__builtins__",
+    r"\b__subclasses__\b",
+    r"\b__globals__\b",
+    r"\b__builtins__\b",
 ]
 
 
@@ -213,6 +218,9 @@ def test_python_code(code: str, temp_dir: str, execution_timeout: int = 5) -> Tu
     # Write code to temp file
     test_file = os.path.join(temp_dir, "test_code.py")
 
+    # Indent the code to fit into the try block
+    indented_code = textwrap.indent(code, "    ")
+
     # Wrap code to capture output safely
     wrapped_code = f"""
 import sys
@@ -229,7 +237,7 @@ def test_python_code(code: str, temp_dir: str, execution_timeout: int = 5) -> Tu
     sys.stderr = stderr_capture
 
     # Execute the user's code
-{code}
+{indented_code}
-{indented_code}
+    exec({repr(code)}, {{'__builtins__': __builtins__}})
-{indented_code}
+    exec({repr(code)}, {{'__builtins__': __builtins__}})
 
     sys.stdout = original_stdout
     sys.stderr = original_stderr
@@ -255,6 +263,13 @@ def test_python_code(code: str, temp_dir: str, execution_timeout: int = 5) -> Tu
     except SyntaxError as e:
         return False, "", f"Syntax error: {e}"
 
+    # Filter environment to prevent secret leakage
+    allowed_env_vars = ["PATH", "PYTHONPATH", "LANG", "PYTHONIOENCODING"]
+    filtered_env = {k: v for k, v in os.environ.items() if k in allowed_env_vars}
+    filtered_env["PYTHONPATH"] = os.path.pathsep.join(
+        [temp_dir, filtered_env.get("PYTHONPATH", "")]
+    ).strip(os.path.pathsep)
+
     # Try to execute with timeout
     try:
         result = subprocess.run(
@@ -263,7 +278,7 @@ def test_python_code(code: str, temp_dir: str, execution_timeout: int = 5) -> Tu
             text=True,
             timeout=execution_timeout,
             cwd=temp_dir,
-            env={**os.environ, "PYTHONPATH": temp_dir},
+            env=filtered_env,
         )
 
         stdout = result.stdout
@@ -367,7 +382,9 @@ def load_jsonl(path: str) -> List[Dict[str, Any]]:
 
 def save_jsonl(samples: List[Dict[str, Any]], path: str) -> None:
     """Save samples to JSONL file."""
-    os.makedirs(os.path.dirname(path), exist_ok=True)
+    dirname = os.path.dirname(path)
+    if dirname:
+        os.makedirs(dirname, exist_ok=True)
 
     with open(path, "w") as f:
         for sample in samples: