Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ jobs:
python-version: "3.11"

- name: Install dependencies
run: pip install anthropic python-dotenv pytest
run: pip install anthropic python-dotenv pyyaml pytest

- name: Run Python smoke tests
run: python -m pytest tests/test_agents_smoke.py -q
- name: Run Python tests
run: python -m pytest tests/ -q

web-build:
runs-on: ubuntu-latest
Expand Down
67 changes: 67 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""Shared fixtures for loading agent modules with mocked dependencies."""
from __future__ import annotations

import importlib.util
import os
import sys
import types
from pathlib import Path

REPO_ROOT = Path(__file__).resolve().parents[1]
AGENTS_DIR = REPO_ROOT / "agents"


def load_agent_module(module_file: str, temp_cwd: Path):
"""Load an agent module with mocked anthropic/dotenv so it doesn't need real API keys.

Args:
module_file: filename inside agents/, e.g. "s03_todo_write.py"
temp_cwd: temporary working directory (avoids polluting real filesystem)

Returns:
The loaded module object.
"""
module_path = AGENTS_DIR / module_file

# Fake anthropic module
fake_anthropic = types.ModuleType("anthropic")

class FakeAnthropic:
def __init__(self, *args, **kwargs):
self.messages = types.SimpleNamespace(create=None)

setattr(fake_anthropic, "Anthropic", FakeAnthropic)

# Fake dotenv module
fake_dotenv = types.ModuleType("dotenv")
setattr(fake_dotenv, "load_dotenv", lambda **kw: None)

# Save originals
prev_anthropic = sys.modules.get("anthropic")
prev_dotenv = sys.modules.get("dotenv")
prev_cwd = Path.cwd()

spec = importlib.util.spec_from_file_location(
f"agent_{module_file.replace('.py', '')}", module_path
)
if spec is None or spec.loader is None:
raise RuntimeError(f"Unable to load {module_path}")
module = importlib.util.module_from_spec(spec)

sys.modules["anthropic"] = fake_anthropic
sys.modules["dotenv"] = fake_dotenv
try:
os.chdir(temp_cwd)
os.environ.setdefault("MODEL_ID", "test-model")
spec.loader.exec_module(module)
return module
finally:
os.chdir(prev_cwd)
if prev_anthropic is None:
sys.modules.pop("anthropic", None)
else:
sys.modules["anthropic"] = prev_anthropic
if prev_dotenv is None:
sys.modules.pop("dotenv", None)
else:
sys.modules["dotenv"] = prev_dotenv
124 changes: 124 additions & 0 deletions tests/test_context_compact.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
"""Unit tests for micro_compact and estimate_tokens (s06_context_compact.py)."""
from __future__ import annotations

import tempfile
import types
from pathlib import Path

import pytest

from conftest import load_agent_module


@pytest.fixture()
def compact_module():
with tempfile.TemporaryDirectory() as tmp:
module = load_agent_module("s06_context_compact.py", Path(tmp))
yield module


# -- estimate_tokens --

class TestEstimateTokens:
def test_basic(self, compact_module):
msgs = [{"role": "user", "content": "a" * 400}]
result = compact_module.estimate_tokens(msgs)
assert result == len(str(msgs)) // 4

def test_empty_messages(self, compact_module):
assert compact_module.estimate_tokens([]) == len(str([])) // 4


# -- micro_compact --

def _tool_use_block(tool_id: str, name: str):
"""Create a mock tool_use block (SimpleNamespace mimicking Anthropic SDK object)."""
return types.SimpleNamespace(type="tool_use", id=tool_id, name=name, input={})


def _tool_result(tool_id: str, content: str):
"""Create a tool_result dict."""
return {"type": "tool_result", "tool_use_id": tool_id, "content": content}


def _make_messages(n_results: int, tool_name: str = "bash"):
"""Build a message list with n tool_use/tool_result pairs.

Each pair is: assistant message with tool_use block, then user message with tool_result.
"""
messages = []
for i in range(n_results):
tid = f"tool_{i}"
messages.append({
"role": "assistant",
"content": [_tool_use_block(tid, tool_name)],
})
messages.append({
"role": "user",
"content": [_tool_result(tid, f"Output line {'x' * 200} for call {i}")],
})
return messages


class TestMicroCompact:
def test_few_results_unchanged(self, compact_module):
"""With <= KEEP_RECENT results, nothing is compacted."""
messages = _make_messages(3) # exactly KEEP_RECENT
original_contents = [
messages[i]["content"][0]["content"]
for i in range(1, len(messages), 2)
]
compact_module.micro_compact(messages)
for idx, i in enumerate(range(1, len(messages), 2)):
assert messages[i]["content"][0]["content"] == original_contents[idx]

def test_clears_old_results(self, compact_module):
"""With > KEEP_RECENT results, oldest are replaced with placeholder."""
messages = _make_messages(5)
compact_module.micro_compact(messages)
# First 2 results (index 0,1) should be compacted
assert messages[1]["content"][0]["content"] == "[Previous: used bash]"
assert messages[3]["content"][0]["content"] == "[Previous: used bash]"
# Last 3 results should be preserved
assert messages[5]["content"][0]["content"].startswith("Output line")
assert messages[7]["content"][0]["content"].startswith("Output line")
assert messages[9]["content"][0]["content"].startswith("Output line")

def test_preserves_read_file(self, compact_module):
"""read_file results are never compacted."""
messages = _make_messages(5, tool_name="read_file")
compact_module.micro_compact(messages)
# All should be preserved since tool_name is read_file
for i in range(1, len(messages), 2):
assert messages[i]["content"][0]["content"].startswith("Output line")

def test_skips_short_content(self, compact_module):
"""Content <= 100 chars is not compacted."""
messages = []
for i in range(5):
tid = f"tool_{i}"
messages.append({
"role": "assistant",
"content": [_tool_use_block(tid, "bash")],
})
messages.append({
"role": "user",
"content": [_tool_result(tid, "short")], # <= 100 chars
})
compact_module.micro_compact(messages)
# All should be preserved because content is short
for i in range(1, len(messages), 2):
assert messages[i]["content"][0]["content"] == "short"

def test_unknown_tool_name(self, compact_module):
"""When tool_use_id has no matching tool_use block, uses 'unknown'."""
messages = [
# No assistant message with matching tool_use
{"role": "user", "content": [_tool_result("orphan_0", "x" * 200)]},
{"role": "user", "content": [_tool_result("orphan_1", "x" * 200)]},
{"role": "user", "content": [_tool_result("orphan_2", "x" * 200)]},
{"role": "user", "content": [_tool_result("orphan_3", "x" * 200)]},
]
compact_module.micro_compact(messages)
# First result should be compacted with "unknown"
assert messages[0]["content"][0]["content"] == "[Previous: used unknown]"
121 changes: 121 additions & 0 deletions tests/test_skill_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
"""Unit tests for SkillLoader (s05_skill_loading.py)."""
from __future__ import annotations

import tempfile
from pathlib import Path

import pytest

from conftest import load_agent_module


@pytest.fixture()
def SkillLoader():
with tempfile.TemporaryDirectory() as tmp:
module = load_agent_module("s05_skill_loading.py", Path(tmp))
yield module.SkillLoader


def _make_skill(base_dir: Path, name: str, frontmatter: str, body: str):
"""Helper: create a skills/<name>/SKILL.md file."""
skill_dir = base_dir / name
skill_dir.mkdir(parents=True, exist_ok=True)
(skill_dir / "SKILL.md").write_text(f"---\n{frontmatter}\n---\n{body}")


# -- _parse_frontmatter --

class TestParseFrontmatter:
def test_valid_frontmatter(self, SkillLoader):
loader = SkillLoader(Path("/nonexistent"))
meta, body = loader._parse_frontmatter("---\nname: test\ndescription: A test\n---\nBody text here")
assert meta["name"] == "test"
assert meta["description"] == "A test"
assert body == "Body text here"

def test_no_frontmatter(self, SkillLoader):
loader = SkillLoader(Path("/nonexistent"))
meta, body = loader._parse_frontmatter("Just plain text without frontmatter")
assert meta == {}
assert body == "Just plain text without frontmatter"

def test_invalid_yaml(self, SkillLoader):
loader = SkillLoader(Path("/nonexistent"))
meta, body = loader._parse_frontmatter("---\n: [invalid yaml\n---\nBody")
assert meta == {}
assert body == "Body"


# -- _load_all --

class TestLoadAll:
def test_nonexistent_dir(self, SkillLoader):
loader = SkillLoader(Path("/does/not/exist"))
assert loader.skills == {}

def test_empty_dir(self, SkillLoader, tmp_path):
loader = SkillLoader(tmp_path)
assert loader.skills == {}

def test_loads_skill(self, SkillLoader, tmp_path):
_make_skill(tmp_path, "pdf", "name: pdf\ndescription: PDF tools", "Process PDFs here")
loader = SkillLoader(tmp_path)
assert "pdf" in loader.skills
assert loader.skills["pdf"]["body"] == "Process PDFs here"

def test_name_from_directory(self, SkillLoader, tmp_path):
"""When frontmatter has no 'name', directory name is used."""
_make_skill(tmp_path, "my-tool", "description: A tool", "Body")
loader = SkillLoader(tmp_path)
assert "my-tool" in loader.skills

def test_multiple_skills(self, SkillLoader, tmp_path):
_make_skill(tmp_path, "a", "name: a\ndescription: Skill A", "Body A")
_make_skill(tmp_path, "b", "name: b\ndescription: Skill B", "Body B")
loader = SkillLoader(tmp_path)
assert len(loader.skills) == 2


# -- get_descriptions --

class TestGetDescriptions:
def test_no_skills(self, SkillLoader):
loader = SkillLoader(Path("/nonexistent"))
assert loader.get_descriptions() == "(no skills available)"

def test_with_description(self, SkillLoader, tmp_path):
_make_skill(tmp_path, "pdf", "name: pdf\ndescription: Process PDFs", "Body")
loader = SkillLoader(tmp_path)
desc = loader.get_descriptions()
assert "pdf: Process PDFs" in desc

def test_with_tags(self, SkillLoader, tmp_path):
_make_skill(tmp_path, "pdf", "name: pdf\ndescription: Process PDFs\ntags: utils", "Body")
loader = SkillLoader(tmp_path)
desc = loader.get_descriptions()
assert "[utils]" in desc

def test_without_tags(self, SkillLoader, tmp_path):
_make_skill(tmp_path, "pdf", "name: pdf\ndescription: Process PDFs", "Body")
loader = SkillLoader(tmp_path)
desc = loader.get_descriptions()
assert "[" not in desc


# -- get_content --

class TestGetContent:
def test_existing_skill(self, SkillLoader, tmp_path):
_make_skill(tmp_path, "pdf", "name: pdf\ndescription: PDF", "PDF instructions")
loader = SkillLoader(tmp_path)
content = loader.get_content("pdf")
assert '<skill name="pdf">' in content
assert "PDF instructions" in content
assert "</skill>" in content

def test_unknown_skill(self, SkillLoader, tmp_path):
_make_skill(tmp_path, "pdf", "name: pdf\ndescription: PDF", "Body")
loader = SkillLoader(tmp_path)
result = loader.get_content("unknown")
assert "Error: Unknown skill 'unknown'" in result
assert "pdf" in result # lists available skills
Loading