diff --git a/.jules/sentinel.md b/.jules/sentinel.md index c46ef11..438e2c4 100644 --- a/.jules/sentinel.md +++ b/.jules/sentinel.md @@ -4,3 +4,8 @@ **Vulnerability:** Telemetry HTTP endpoints (`/status`, `/`) were completely unprotected, allowing any local user to view training state, usage, and costs. **Learning:** Initial implementation prioritized ease of use and local-only binding (`127.0.0.1`) but neglected defense-in-depth requirements for multi-user or shared environments. **Prevention:** Always implement at least Basic Authentication for any endpoint exposing state or metadata, even if restricted to loopback. Use random session-specific credentials if no configuration is provided. + +## 2026-05-14 - Environment Leakage in Unit Test Gate +**Vulnerability:** The unit test gate script inherited all environment variables when executing generated code samples, potentially exposing sensitive API keys and tokens to untrusted code. +**Learning:** Python's `subprocess.run` inherits `os.environ` by default. In a pipeline that generates and tests code, this allows the generated code to exfiltrate secrets from the environment. +**Prevention:** Explicitly filter the `env` argument in `subprocess` calls to an allowlist of essential, non-sensitive variables when executing untrusted code. diff --git a/heidi_engine/telemetry.py b/heidi_engine/telemetry.py index bb89122..26d3351 100644 --- a/heidi_engine/telemetry.py +++ b/heidi_engine/telemetry.py @@ -732,11 +732,6 @@ def get_state(run_id: Optional[str] = None) -> Dict[str, Any]: "usage": get_default_usage(), } - # BOLT OPTIMIZATION: Check thread-safe state cache - cached = _state_cache.get(target_run_id, state_file) - if cached: - return cached - try: with open(state_file) as f: state = json.load(f) diff --git a/scripts/02_validate_clean.py b/scripts/02_validate_clean.py index 33ee636..3c61db3 100755 --- a/scripts/02_validate_clean.py +++ b/scripts/02_validate_clean.py @@ -406,7 +406,8 @@ def save_jsonl(samples: List[Dict[str, Any]], path: str) -> None: """ Save samples to JSONL file. """ - os.makedirs(os.path.dirname(path), exist_ok=True) + if os.path.dirname(path): + os.makedirs(os.path.dirname(path), exist_ok=True) with open(path, "w") as f: for sample in samples: diff --git a/scripts/03_unit_test_gate.py b/scripts/03_unit_test_gate.py index 7507b7e..8bcdb8e 100755 --- a/scripts/03_unit_test_gate.py +++ b/scripts/03_unit_test_gate.py @@ -40,6 +40,7 @@ import subprocess import sys import tempfile +import textwrap from typing import Any, Dict, List, Tuple # ============================================================================= @@ -67,17 +68,20 @@ # TUNABLE: Add more dangerous patterns to block DANGEROUS_PATTERNS = [ # Dangerous imports (including comma-separated and aliased) - r"\bimport\s+[^#\n]*\b(os|subprocess|sys|shutil|socket|requests|urllib|pathlib|pickle|pty|code|bdb|pdb|multiprocessing|threading|tempfile|ftplib|smtplib|telnetlib|http|xmlrpc)\b", - r"\bfrom\s+(os|subprocess|sys|shutil|socket|requests|urllib|pathlib|pickle|pty|code|bdb|pdb|multiprocessing|threading|tempfile|ftplib|smtplib|telnetlib|http|xmlrpc)\b", - # Dangerous built-ins + r"\bimport\s+[^#\n]*\b(os|subprocess|sys|shutil|socket|requests|urllib|pathlib|pickle|pty|code|bdb|pdb|multiprocessing|threading|tempfile|ftplib|smtplib|telnetlib|http|xmlrpc|importlib|pkgutil|pydoc)\b", + r"\bfrom\s+(os|subprocess|sys|shutil|socket|requests|urllib|pathlib|pickle|pty|code|bdb|pdb|multiprocessing|threading|tempfile|ftplib|smtplib|telnetlib|http|xmlrpc|importlib|pkgutil|pydoc)\b", + # Dangerous built-ins and internal attributes r"\beval\s*\(", r"\bexec\s*\(", r"\b__import__\s*\(", r"\bgetattr\s*\(", r"\bsetattr\s*\(", r"\bbreakpoint\s*\(", + r"\b__builtins__\b", + r"\b__globals__\b", + r"\b__subclasses__\b", # Dangerous module functions - r"\bos\.(system|popen|spawn|remove|unlink|rmdir|mkdir|chmod|chown|kill|exec|fork|pipe)\b", + r"\bos\.(system|popen|spawn|remove|unlink|rmdir|mkdir|chmod|chown|kill|exec|fork|pipe|environ)\b", r"\bsubprocess\.(run|call|check_call|check_output|Popen)\b", r"\bshutil\.(rmtree|move|copy|copy2|copyfile|copymode|copystat|chown)\b", r"\bpickle\.(load|loads)\b", @@ -214,6 +218,10 @@ def test_python_code(code: str, temp_dir: str, execution_timeout: int = 5) -> Tu test_file = os.path.join(temp_dir, "test_code.py") # Wrap code to capture output safely + # SECURITY: Indent the user code correctly before inserting into template + # to avoid SyntaxError and ensure proper execution. + indented_code = textwrap.indent(code, " ") + wrapped_code = f""" import sys import io @@ -229,7 +237,7 @@ def test_python_code(code: str, temp_dir: str, execution_timeout: int = 5) -> Tu sys.stderr = stderr_capture # Execute the user's code -{code} +{indented_code} sys.stdout = original_stdout sys.stderr = original_stderr @@ -257,13 +265,19 @@ def test_python_code(code: str, temp_dir: str, execution_timeout: int = 5) -> Tu # Try to execute with timeout try: + # SECURITY: Filter environment to prevent secret leakage from parent process. + # Only allow essential variables for execution. + allowed_env_keys = {"PATH", "PYTHONPATH", "LANG", "PYTHONIOENCODING"} + allowed_env = {k: os.environ[k] for k in allowed_env_keys if k in os.environ} + allowed_env["PYTHONPATH"] = temp_dir + result = subprocess.run( [sys.executable, test_file], capture_output=True, text=True, timeout=execution_timeout, cwd=temp_dir, - env={**os.environ, "PYTHONPATH": temp_dir}, + env=allowed_env, ) stdout = result.stdout @@ -367,7 +381,8 @@ def load_jsonl(path: str) -> List[Dict[str, Any]]: def save_jsonl(samples: List[Dict[str, Any]], path: str) -> None: """Save samples to JSONL file.""" - os.makedirs(os.path.dirname(path), exist_ok=True) + if os.path.dirname(path): + os.makedirs(os.path.dirname(path), exist_ok=True) with open(path, "w") as f: for sample in samples: