Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .jules/sentinel.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,8 @@
**Vulnerability:** Telemetry HTTP endpoints (`/status`, `/`) were completely unprotected, allowing any local user to view training state, usage, and costs.
**Learning:** Initial implementation prioritized ease of use and local-only binding (`127.0.0.1`) but neglected defense-in-depth requirements for multi-user or shared environments.
**Prevention:** Always implement at least Basic Authentication for any endpoint exposing state or metadata, even if restricted to loopback. Use random session-specific credentials if no configuration is provided.

## 2025-01-24 - Environment Leakage in Unit Test Gate
**Vulnerability:** The `scripts/03_unit_test_gate.py` script was executing generated code in a subprocess while passing the entire host environment, including sensitive API keys like `OPENAI_API_KEY`.
**Learning:** Executing untrusted code, even for validation purposes, must be done in a highly restricted environment. Relying on simple regex filters for "dangerous code" is insufficient if the execution environment itself is over-privileged.
**Prevention:** Always use a minimal allowlist for environment variables passed to subprocesses executing untrusted code. Use standard libraries like `textwrap` to ensure code is properly formatted (e.g., indented) when wrapped in security/logging templates to avoid functional failures that might bypass checks.
5 changes: 0 additions & 5 deletions heidi_engine/telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -732,11 +732,6 @@ def get_state(run_id: Optional[str] = None) -> Dict[str, Any]:
"usage": get_default_usage(),
}

# BOLT OPTIMIZATION: Check thread-safe state cache
cached = _state_cache.get(target_run_id, state_file)
if cached:
return cached

try:
with open(state_file) as f:
state = json.load(f)
Expand Down
4 changes: 3 additions & 1 deletion scripts/02_validate_clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,9 @@ def save_jsonl(samples: List[Dict[str, Any]], path: str) -> None:
"""
Save samples to JSONL file.
"""
os.makedirs(os.path.dirname(path), exist_ok=True)
dirname = os.path.dirname(path)
if dirname:
os.makedirs(dirname, exist_ok=True)

with open(path, "w") as f:
for sample in samples:
Expand Down
27 changes: 22 additions & 5 deletions scripts/03_unit_test_gate.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import subprocess
import sys
import tempfile
import textwrap
from typing import Any, Dict, List, Tuple

# =============================================================================
Expand All @@ -56,9 +57,9 @@
# TUNABLE: Adjust regex for different code formats
CODE_BLOCK_PATTERNS = [
# Markdown code blocks: ```python ... ```
r"```python\n(.*?)```",
r"```python\s*(.*?)\s*```",
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This regex can be improved to handle optional whitespace between the backticks and the python identifier, making it more robust against different markdown formatting styles.

Suggested change
r"```python\s*(.*?)\s*```",
r"```\s*python\s*(.*?)\s*```",

# Markdown code blocks without language: ``` ... ```
r"```\n(.*?)```",
r"```\s*(.*?)\s*```",
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The current generic code block pattern will also match blocks that start with python, but it will include the word python as part of the extracted code. This leads to redundant tests and syntax errors when the script tries to execute python\n... as Python code. Using a negative lookahead ensures this pattern only matches blocks that do not specify the language.

Suggested change
r"```\s*(.*?)\s*```",
r"```(?!\s*python)\s*(.*?)\s*```",

# Inline code markers
r"`([^`\n]+)`",
]
Expand All @@ -84,6 +85,10 @@
r"\bshelve\.open\b",
# File operations (specifically writing/appending)
r"\bopen\s*\([^)]*,\s*(mode\s*=\s*)?['\"][^'\"r]*[wa+x]",
# Sandbox escapes
r"__subclasses__",
r"__globals__",
r"__builtins__",
Comment on lines +89 to +91
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-medium medium

Adding word boundaries (\b) to these patterns is recommended to prevent false positives (e.g., matching a variable named my__globals__) while still effectively catching sandbox escape attempts.

Suggested change
r"__subclasses__",
r"__globals__",
r"__builtins__",
r"\b__subclasses__\b",
r"\b__globals__\b",
r"\b__builtins__\b",

]


Expand Down Expand Up @@ -213,6 +218,9 @@ def test_python_code(code: str, temp_dir: str, execution_timeout: int = 5) -> Tu
# Write code to temp file
test_file = os.path.join(temp_dir, "test_code.py")

# Indent the code to fit into the try block
indented_code = textwrap.indent(code, " ")
Comment on lines +221 to +222
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Manual indentation of the user code is unnecessary if the code is executed via exec() using a string literal, which also provides better isolation.


# Wrap code to capture output safely
wrapped_code = f"""
import sys
Expand All @@ -229,7 +237,7 @@ def test_python_code(code: str, temp_dir: str, execution_timeout: int = 5) -> Tu
sys.stderr = stderr_capture

# Execute the user's code
{code}
{indented_code}
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-high high

The current implementation executes untrusted code by directly injecting it into the wrapper script's source. This causes namespace leakage, allowing the generated code to access and potentially manipulate the wrapper's internal variables like sys, io, stdout_capture, and original_stdout.

A more secure approach is to execute the code in a restricted global namespace using exec(). By passing an empty dictionary (or one containing only __builtins__), you isolate the user code from the wrapper's scope.

Suggested change
{indented_code}
exec({repr(code)}, {{'__builtins__': __builtins__}})


sys.stdout = original_stdout
sys.stderr = original_stderr
Expand All @@ -255,6 +263,13 @@ def test_python_code(code: str, temp_dir: str, execution_timeout: int = 5) -> Tu
except SyntaxError as e:
return False, "", f"Syntax error: {e}"

# Filter environment to prevent secret leakage
allowed_env_vars = ["PATH", "PYTHONPATH", "LANG", "PYTHONIOENCODING"]
filtered_env = {k: v for k, v in os.environ.items() if k in allowed_env_vars}
filtered_env["PYTHONPATH"] = os.path.pathsep.join(
[temp_dir, filtered_env.get("PYTHONPATH", "")]
).strip(os.path.pathsep)

# Try to execute with timeout
try:
result = subprocess.run(
Expand All @@ -263,7 +278,7 @@ def test_python_code(code: str, temp_dir: str, execution_timeout: int = 5) -> Tu
text=True,
timeout=execution_timeout,
cwd=temp_dir,
env={**os.environ, "PYTHONPATH": temp_dir},
env=filtered_env,
)

stdout = result.stdout
Expand Down Expand Up @@ -367,7 +382,9 @@ def load_jsonl(path: str) -> List[Dict[str, Any]]:

def save_jsonl(samples: List[Dict[str, Any]], path: str) -> None:
"""Save samples to JSONL file."""
os.makedirs(os.path.dirname(path), exist_ok=True)
dirname = os.path.dirname(path)
if dirname:
os.makedirs(dirname, exist_ok=True)

with open(path, "w") as f:
for sample in samples:
Expand Down