From 7e044f5646fd4cbacf0926d3652cdbc9c61c5789 Mon Sep 17 00:00:00 2001 From: Noah Freedman Date: Tue, 31 Mar 2026 02:38:07 -0400 Subject: [PATCH 1/2] Add git-aware shelf integration --- src/shelfai/cli/main.py | 131 +++++++++++ src/shelfai/core/__init__.py | 15 +- src/shelfai/core/git_integration.py | 349 ++++++++++++++++++++++++++++ tests/test_git_integration.py | 148 ++++++++++++ 4 files changed, 629 insertions(+), 14 deletions(-) create mode 100644 src/shelfai/core/git_integration.py create mode 100644 tests/test_git_integration.py diff --git a/src/shelfai/cli/main.py b/src/shelfai/cli/main.py index 5dc14c0..87e0ca5 100644 --- a/src/shelfai/cli/main.py +++ b/src/shelfai/cli/main.py @@ -403,6 +403,24 @@ def _choose_template_name(registry, default: Optional[str] = None) -> str: ) +# ────────────────────────────────────────────── +# shelfai git (subcommand group) +# ────────────────────────────────────────────── + +git_app = typer.Typer( + name="git", + help="Inspect git history for ShelfAI chunks and manage git hooks.", + no_args_is_help=True, +) +app.add_typer(git_app, name="git") + + +def _git_integration(repo_path: str): + from shelfai.core.git_integration import GitIntegration + + return GitIntegration(repo_path=repo_path) + + # ────────────────────────────────────────────── # shelfai init # ────────────────────────────────────────────── @@ -3301,3 +3319,116 @@ def sync_configure( } config_path.write_text(yaml.safe_dump(payload, sort_keys=False), encoding="utf-8") console.print(f"[green]Updated sync settings in {config_path}[/green]") + + +@git_app.command("history") +def git_history( + chunk_id: Optional[str] = typer.Argument( + None, + help="Chunk file or chunk id to inspect. If omitted, shows all tracked chunk history.", + ), + limit: int = typer.Option(20, "--limit", "-n", help="Maximum number of commits to show"), + repo_path: str = typer.Option(".", "--repo", "-r", help="Path to the git repository"), +): + """Show git history for chunk(s).""" + git = _git_integration(repo_path) + if not git.is_git_repo(): + console.print(f"[red]Not a git repository: {Path(repo_path).resolve()}[/red]") + raise typer.Exit(1) + + if chunk_id: + history = git.get_chunk_history(chunk_id, limit=limit) + console.print(f"\n[bold]Git history for[/bold] {history.chunk_id}\n") + console.print(f"Total changes: {history.total_changes}") + console.print(f"Last changed: {history.last_changed or 'unknown'}") + console.print(f"Last author: {history.last_author or 'unknown'}\n") + for commit in history.commits: + console.print(f"- {commit['hash']} {commit['date']} {commit['author']}") + console.print(f" {commit['message']}") + return + + changelog = git.get_shelf_changelog(limit=limit) + if not changelog: + console.print("[dim]No tracked chunk changes found.[/dim]") + return + + for item in changelog: + console.print(f"- {item['commit']} {item['date']}") + console.print(f" {item['message']}") + console.print(f" chunks: {', '.join(item['chunks_changed'])}") + + +@git_app.command("changelog") +def git_changelog( + since: Optional[str] = typer.Option(None, "--since", help="Only include commits after this date or commit"), + limit: int = typer.Option(20, "--limit", "-n", help="Maximum number of commits to show"), + repo_path: str = typer.Option(".", "--repo", "-r", help="Path to the git repository"), +): + """Shelf changelog for chunk files.""" + git = _git_integration(repo_path) + if not git.is_git_repo(): + console.print(f"[red]Not a git repository: {Path(repo_path).resolve()}[/red]") + raise typer.Exit(1) + + changelog = git.get_shelf_changelog(since=since, limit=limit) + if not changelog: + console.print("[dim]No tracked chunk changes found.[/dim]") + return + + for item in changelog: + console.print(f"- {item['commit']} {item['date']}") + console.print(f" {item['message']}") + console.print(f" chunks: {', '.join(item['chunks_changed'])}") + + +@git_app.command("blame") +def git_blame( + chunk_id: str = typer.Argument(..., help="Chunk file or chunk id to blame"), + repo_path: str = typer.Option(".", "--repo", "-r", help="Path to the git repository"), +): + """Git blame for a chunk.""" + git = _git_integration(repo_path) + if not git.is_git_repo(): + console.print(f"[red]Not a git repository: {Path(repo_path).resolve()}[/red]") + raise typer.Exit(1) + + blame = git.get_chunk_blame(chunk_id) + if not blame: + console.print("[dim]No blame data available.[/dim]") + return + + for row in blame: + console.print( + f"{row['line_start']:>4}-{row['line_end']:<4} {row['commit'][:8]} {row['author']} {row['date']}" + ) + console.print(f" {row['content']}") + + +@git_app.command("install-hooks") +def git_install_hooks( + shelf_path: str = typer.Option("./shelf", "--shelf", "-s", help="Path to the shelf directory"), + repo_path: str = typer.Option(".", "--repo", "-r", help="Path to the git repository"), +): + """Install git hooks for ShelfAI validation and rechunking.""" + git = _git_integration(repo_path) + try: + git.install_hooks(shelf_path) + except RuntimeError as exc: + console.print(f"[red]{exc}[/red]") + raise typer.Exit(1) + + console.print(f"[green]Installed ShelfAI hooks in {Path(repo_path).resolve()}[/green]") + + +@git_app.command("uninstall-hooks") +def git_uninstall_hooks( + repo_path: str = typer.Option(".", "--repo", "-r", help="Path to the git repository"), +): + """Remove ShelfAI git hooks.""" + git = _git_integration(repo_path) + if not git.is_git_repo(): + console.print(f"[red]Not a git repository: {Path(repo_path).resolve()}[/red]") + raise typer.Exit(1) + + git.uninstall_hooks() + console.print(f"[green]Removed ShelfAI hooks from {Path(repo_path).resolve()}[/green]") diff --git a/src/shelfai/core/__init__.py b/src/shelfai/core/__init__.py index 3be40fa..adf5404 100644 --- a/src/shelfai/core/__init__.py +++ b/src/shelfai/core/__init__.py @@ -6,21 +6,8 @@ from shelfai.core.diff_report import ChunkDiff, ShelfDiffReport, compare_before_after, compare_shelves from shelfai.core.migrate import MigrationPlan, MigrationStep, ShelfMigrator from shelfai.core.priority import ChunkPriority, PriorityManager + "GitIntegration", -__all__ = [ - "AnnotationManager", - "ChunkAnnotation", - "ChunkCompiler", - "CompileConfig", - "CompiledContext", - "ChunkDiff", - "ChunkPriority", - "ConditionalLoader", - "LoadCondition", - "LoadContext", - "MigrationPlan", - "MigrationStep", - "PriorityManager", "ShelfDiffReport", "ShelfMigrator", "compare_before_after", diff --git a/src/shelfai/core/git_integration.py b/src/shelfai/core/git_integration.py new file mode 100644 index 0000000..c0dd244 --- /dev/null +++ b/src/shelfai/core/git_integration.py @@ -0,0 +1,349 @@ +"""Git-aware ShelfAI helpers for history, blame, changelogs, and hooks.""" + +from __future__ import annotations + +import subprocess +from dataclasses import dataclass +from datetime import datetime, timezone +from pathlib import Path + + +@dataclass +class ChunkGitHistory: + chunk_id: str + commits: list[dict] + total_changes: int + last_changed: str + last_author: str + + +class GitIntegration: + """Git-aware shelf management.""" + + def __init__(self, repo_path: str = "."): + self.repo_path = Path(repo_path).resolve() + + def is_git_repo(self) -> bool: + """Check if we're in a git repo.""" + return self._run_git(["rev-parse", "--is-inside-work-tree"], check=False).returncode == 0 + + def get_chunk_history(self, chunk_path: str, limit: int = 20) -> ChunkGitHistory: + """Get git log for a specific chunk file.""" + self._ensure_git_repo() + rel_path = self._chunk_path_for_git(chunk_path) + result = self._run_git( + [ + "log", + "--follow", + f"--max-count={limit}", + "--date=iso-strict", + "--format=%H%x1f%ad%x1f%an%x1f%s%x1e", + "--", + rel_path, + ] + ) + + commits: list[dict] = [] + for record in result.stdout.split("\x1e"): + record = record.strip() + if not record: + continue + lines = record.splitlines() + header = lines[0].split("\x1f") + if len(header) != 4: + continue + commit_hash, date, author, message = header + commits.append( + { + "hash": commit_hash, + "date": date, + "author": author, + "message": message, + } + ) + + last_changed = commits[0]["date"] if commits else "" + last_author = commits[0]["author"] if commits else "" + return ChunkGitHistory( + chunk_id=rel_path, + commits=commits, + total_changes=len(commits), + last_changed=last_changed, + last_author=last_author, + ) + + def get_shelf_changelog(self, since: str = None, limit: int = 20) -> list[dict]: + """ + Get changes to any chunk file since a date or commit. + Returns: [{commit, date, chunks_changed: [...], message}] + """ + self._ensure_git_repo() + args = [ + "log", + f"--max-count={limit}", + "--date=iso-strict", + "--format=%H%x1f%ad%x1f%s", + "--name-only", + ] + if since: + if self._is_commitish(since): + args.insert(1, f"{since}..HEAD") + else: + args.extend(["--since", since]) + result = self._run_git(args) + + changelog: list[dict] = [] + current: dict | None = None + for line in result.stdout.splitlines(): + if "\x1f" in line: + if current and current["chunks_changed"]: + changelog.append(current) + commit_hash, date, message = line.split("\x1f", 2) + current = { + "commit": commit_hash, + "date": date, + "chunks_changed": [], + "message": message, + } + continue + + if current is None: + continue + + path = line.strip() + if path and self._is_chunk_file(path): + current["chunks_changed"].append(path) + + if current and current["chunks_changed"]: + changelog.append(current) + return changelog + + def get_chunk_blame(self, chunk_path: str) -> list[dict]: + """ + Git blame for a chunk file. + Returns: [{line_start, line_end, author, date, commit, content}] + """ + self._ensure_git_repo() + rel_path = self._chunk_path_for_git(chunk_path) + result = self._run_git( + ["blame", "--line-porcelain", "--date=iso-strict", "--", rel_path] + ) + + lines = result.stdout.splitlines() + blame_rows: list[dict] = [] + i = 0 + while i < len(lines): + header = lines[i].split() + if len(header) < 4: + i += 1 + continue + commit_hash = header[0] + if not all(part.isdigit() for part in header[1:4]): + i += 1 + continue + line_start = int(header[2]) + num_lines = int(header[3]) + i += 1 + + metadata: dict[str, str] = {} + while i < len(lines) and not lines[i].startswith("\t"): + key, _, value = lines[i].partition(" ") + metadata[key] = value + i += 1 + + content_lines: list[str] = [] + for _ in range(num_lines): + if i >= len(lines): + break + content_line = lines[i] + if content_line.startswith("\t"): + content_lines.append(content_line[1:]) + else: + content_lines.append(content_line) + i += 1 + + author_time = metadata.get("author-time") + if author_time and author_time.isdigit(): + date = datetime.fromtimestamp(int(author_time), tz=timezone.utc).isoformat() + else: + date = metadata.get("author-time", "") + + for offset, content in enumerate(content_lines): + blame_rows.append( + { + "line_start": line_start + offset, + "line_end": line_start + offset, + "author": metadata.get("author", ""), + "date": date, + "commit": commit_hash, + "content": content, + } + ) + + return blame_rows + + def detect_agent_md_changes(self, since_commit: str = "HEAD~1") -> bool: + """Check if AGENT.md changed since a given commit.""" + if not self.is_git_repo(): + return False + + result = self._run_git( + [ + "diff", + "--name-only", + since_commit, + "HEAD", + "--", + ], + check=False, + ) + if result.returncode != 0: + return False + return any(path.endswith("AGENT.md") for path in result.stdout.splitlines()) + + def auto_rechunk_hook(self) -> str: + """ + Generate a git hook script that auto-rechunks when AGENT.md changes. + Returns the hook script content. + """ + return ( + "#!/bin/sh\n" + "# ShelfAI git hook: auto-rechunk after commits that touch AGENT.md.\n" + "set -eu\n" + "\n" + "if git rev-parse --verify HEAD^ >/dev/null 2>&1; then\n" + " changed_files=$(git diff-tree --no-commit-id --name-only -r HEAD^ HEAD)\n" + "else\n" + " changed_files=$(git show --pretty='' --name-only HEAD)\n" + "fi\n" + "\n" + "if command -v shelfai >/dev/null 2>&1; then\n" + " for file in $changed_files; do\n" + " case \"$file\" in\n" + " *AGENT.md)\n" + " echo \"ShelfAI: AGENT.md changed in $file; rechunking...\"\n" + " shelfai chunk \"$file\" --write || exit $?\n" + " ;;\n" + " esac\n" + " done\n" + "else\n" + " echo \"ShelfAI: AGENT.md changed, but 'shelfai' is not on PATH.\"\n" + "fi\n" + ) + + def install_hooks(self, shelf_path: str): + """ + Install git hooks: + - pre-commit: run shelfai lint + - post-commit: if AGENT.md changed, suggest rechunking + """ + self._ensure_git_repo() + hooks_dir = self._git_dir("hooks") + hooks_dir.mkdir(parents=True, exist_ok=True) + + pre_commit = ( + "#!/bin/sh\n" + "# ShelfAI git hook: run shelf validation before commits.\n" + "set -eu\n" + f"shelfai lint --shelf {self._shell_quote(shelf_path)}\n" + ) + post_commit = self.auto_rechunk_hook() + + self._write_hook(hooks_dir / "pre-commit", pre_commit) + self._write_hook(hooks_dir / "post-commit", post_commit) + + def uninstall_hooks(self): + """Remove ShelfAI git hooks.""" + if not self.is_git_repo(): + return + hooks_dir = self._git_dir("hooks") + for hook_name in ("pre-commit", "post-commit"): + hook_path = hooks_dir / hook_name + if hook_path.exists(): + try: + content = hook_path.read_text(encoding="utf-8") + except OSError: + continue + if "ShelfAI git hook" in content: + hook_path.unlink() + + def _parse_numstat(self, lines: list[str]) -> tuple[int, int]: + additions = 0 + deletions = 0 + for line in lines: + if not line or line.startswith("\t"): + continue + parts = line.split("\t") + if len(parts) != 3: + continue + added, removed, _ = parts + if added.isdigit(): + additions += int(added) + if removed.isdigit(): + deletions += int(removed) + return additions, deletions + + def _is_chunk_file(self, path: str) -> bool: + normalized = path.replace("\\", "/") + name = Path(normalized).name + if name == "AGENT.md": + return True + if "/chunks/" in f"/{normalized.strip('/')}/": + return normalized.endswith(".md") + if normalized.startswith("chunks/"): + return normalized.endswith(".md") + return False + + def _chunk_path_for_git(self, chunk_path: str) -> str: + candidate = Path(chunk_path) + if candidate.is_absolute(): + try: + candidate = candidate.relative_to(self._repo_root()) + except ValueError: + candidate = Path(candidate.name) + + if candidate.suffix != ".md": + candidate = candidate.with_suffix(".md") + + rel = candidate.as_posix() + if (self._repo_root() / rel).exists(): + return rel + if (self._repo_root() / "chunks" / candidate.name).exists(): + return f"chunks/{candidate.name}" + if candidate.name == "AGENT.md" and (self._repo_root() / "AGENT.md").exists(): + return "AGENT.md" + return rel + + def _git_dir(self, relative: str) -> Path: + git_dir = self._run_git(["rev-parse", "--git-dir"]).stdout.strip() + path = Path(git_dir) + if not path.is_absolute(): + path = (self._repo_root() / path).resolve() + return path / relative + + def _repo_root(self) -> Path: + result = self._run_git(["rev-parse", "--show-toplevel"]) + return Path(result.stdout.strip()).resolve() + + def _is_commitish(self, value: str) -> bool: + result = self._run_git(["rev-parse", "--verify", f"{value}^{{commit}}"], check=False) + return result.returncode == 0 + + def _ensure_git_repo(self): + if not self.is_git_repo(): + raise RuntimeError(f"Not a git repository: {self.repo_path}") + + def _write_hook(self, hook_path: Path, content: str) -> None: + hook_path.write_text(content, encoding="utf-8") + hook_path.chmod(0o755) + + def _shell_quote(self, value: str) -> str: + return "'" + value.replace("'", "'\"'\"'") + "'" + + def _run_git(self, args: list[str], check: bool = True) -> subprocess.CompletedProcess[str]: + return subprocess.run( + ["git", "-C", str(self.repo_path), *args], + check=check, + capture_output=True, + text=True, + ) diff --git a/tests/test_git_integration.py b/tests/test_git_integration.py new file mode 100644 index 0000000..d10c919 --- /dev/null +++ b/tests/test_git_integration.py @@ -0,0 +1,148 @@ +"""Tests for ShelfAI git integration.""" + +from __future__ import annotations + +import subprocess +from pathlib import Path + +import pytest +from typer.testing import CliRunner + +from shelfai.cli.main import app +from shelfai.core.git_integration import GitIntegration + + +runner = CliRunner() + + +def _git(repo: Path, *args: str) -> subprocess.CompletedProcess[str]: + return subprocess.run( + ["git", "-C", str(repo), *args], + check=True, + capture_output=True, + text=True, + ) + + +@pytest.fixture +def git_repo(tmp_path: Path) -> Path: + repo = tmp_path / "repo" + repo.mkdir() + _git(repo, "init") + _git(repo, "config", "user.email", "tests@example.com") + _git(repo, "config", "user.name", "Test User") + + (repo / "AGENT.md").write_text("# AGENT\n\nInitial guidance.\n", encoding="utf-8") + (repo / "chunks").mkdir() + (repo / "chunks" / "soul.md").write_text("# Soul\n\nIdentity.\n", encoding="utf-8") + (repo / "chunks" / "rules.md").write_text("# Rules\n\nBe kind.\n", encoding="utf-8") + _git(repo, "add", "AGENT.md", "chunks/soul.md", "chunks/rules.md") + _git(repo, "commit", "-m", "Initial chunk set") + + (repo / "chunks" / "rules.md").write_text("# Rules\n\nBe kind and clear.\n", encoding="utf-8") + _git(repo, "add", "chunks/rules.md") + _git(repo, "commit", "-m", "Refine rules chunk") + + (repo / "AGENT.md").write_text("# AGENT\n\nInitial guidance.\n\nExtra note.\n", encoding="utf-8") + _git(repo, "add", "AGENT.md") + _git(repo, "commit", "-m", "Update AGENT instructions") + + return repo + + +def test_is_git_repo(git_repo): + assert GitIntegration(str(git_repo)).is_git_repo() + + +def test_chunk_history(git_repo): + git = GitIntegration(str(git_repo)) + history = git.get_chunk_history("chunks/rules.md") + + assert history.chunk_id == "chunks/rules.md" + assert len(history.commits) == 2 + assert history.last_author == "Test User" + assert history.total_changes > 0 + + +def test_history_limit(git_repo): + git = GitIntegration(str(git_repo)) + history = git.get_chunk_history("chunks/rules.md", limit=1) + + assert len(history.commits) == 1 + + +def test_shelf_changelog(git_repo): + git = GitIntegration(str(git_repo)) + changelog = git.get_shelf_changelog() + + assert len(changelog) >= 2 + assert any("chunks/rules.md" in item["chunks_changed"] for item in changelog) + assert any("AGENT.md" in item["chunks_changed"] for item in changelog) + + +def test_changelog_since_date(git_repo): + git = GitIntegration(str(git_repo)) + changelog = git.get_shelf_changelog(since="2 days ago") + + assert changelog + assert all(item["date"] for item in changelog) + + +def test_chunk_blame(git_repo): + git = GitIntegration(str(git_repo)) + blame = git.get_chunk_blame("chunks/rules.md") + + assert blame + assert blame[0]["author"] == "Test User" + assert blame[0]["line_start"] == 1 + assert blame[0]["line_end"] == 1 + assert any("Be kind" in row["content"] for row in blame) + + +def test_detect_agent_md_changes(git_repo): + git = GitIntegration(str(git_repo)) + assert git.detect_agent_md_changes("HEAD~1") + + +def test_install_hooks(git_repo): + git = GitIntegration(str(git_repo)) + git.install_hooks(str(git_repo)) + + hooks_dir = git_repo / ".git" / "hooks" + assert (hooks_dir / "pre-commit").exists() + assert (hooks_dir / "post-commit").exists() + assert "ShelfAI git hook" in (hooks_dir / "pre-commit").read_text(encoding="utf-8") + + +def test_uninstall_hooks(git_repo): + git = GitIntegration(str(git_repo)) + git.install_hooks(str(git_repo)) + git.uninstall_hooks() + + hooks_dir = git_repo / ".git" / "hooks" + assert not (hooks_dir / "pre-commit").exists() + assert not (hooks_dir / "post-commit").exists() + + +def test_non_git_repo_graceful(tmp_path): + git = GitIntegration(str(tmp_path)) + assert not git.is_git_repo() + + with pytest.raises(RuntimeError, match="Not a git repository"): + git.get_chunk_history("chunks/rules.md") + + +def test_cli_git_history(git_repo): + result = runner.invoke(app, ["git", "history", "chunks/rules.md", "--repo", str(git_repo)]) + + assert result.exit_code == 0 + assert "Git history for" in result.output + assert "Refine rules chunk" in result.output + + +def test_cli_git_install_uninstall_hooks(git_repo): + install = runner.invoke(app, ["git", "install-hooks", "--repo", str(git_repo), "--shelf", str(git_repo)]) + assert install.exit_code == 0 + + uninstall = runner.invoke(app, ["git", "uninstall-hooks", "--repo", str(git_repo)]) + assert uninstall.exit_code == 0 From 5a33dc1009e61bd29b09ffe71d5642ccaa9e7ffb Mon Sep 17 00:00:00 2001 From: Noah Freedman Date: Tue, 31 Mar 2026 03:27:02 -0400 Subject: [PATCH 2/2] fix: catch CalledProcessError in get_chunk_blame for uncommitted files git blame on an uncommitted or untracked file raises CalledProcessError. Now it raises a descriptive ValueError with the git error message. Co-Authored-By: Claude Sonnet 4.6 --- src/shelfai/core/git_integration.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/shelfai/core/git_integration.py b/src/shelfai/core/git_integration.py index c0dd244..e4be630 100644 --- a/src/shelfai/core/git_integration.py +++ b/src/shelfai/core/git_integration.py @@ -125,9 +125,15 @@ def get_chunk_blame(self, chunk_path: str) -> list[dict]: """ self._ensure_git_repo() rel_path = self._chunk_path_for_git(chunk_path) - result = self._run_git( - ["blame", "--line-porcelain", "--date=iso-strict", "--", rel_path] - ) + try: + result = self._run_git( + ["blame", "--line-porcelain", "--date=iso-strict", "--", rel_path] + ) + except subprocess.CalledProcessError as exc: + raise ValueError( + f"Cannot blame '{chunk_path}': file may be uncommitted or not tracked by git. " + f"git error: {exc.stderr.strip()}" + ) from exc lines = result.stdout.splitlines() blame_rows: list[dict] = []