Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions code_review_graph/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,20 @@ def _cli_post_process(store: GraphStore) -> None:
print(f"Communities: {pp['communities_detected']}")


def _handle_data_dir_option(args, repo_root: Path) -> None:
"""Handle --data-dir option by updating registry if specified."""
if hasattr(args, "data_dir") and args.data_dir:
try:
from .registry import Registry
data_dir_path = Path(args.data_dir).expanduser().resolve()
data_dir_path.mkdir(parents=True, exist_ok=True)
Registry().set_data_dir(str(repo_root), str(data_dir_path))
logging.info(f"Graph database will be stored at: {data_dir_path}")
except Exception as exc:
logging.error(f"Failed to set data directory: {exc}")
sys.exit(1)


def main() -> None:
"""Main CLI entry point."""
ap = argparse.ArgumentParser(
Expand Down Expand Up @@ -414,6 +428,11 @@ def main() -> None:
action="store_true",
help="Skip all post-processing (raw parse only)",
)
build_cmd.add_argument(
"--data-dir",
default=None,
help="External directory to store graph database (useful for network shares)"
)

# update
update_cmd = sub.add_parser("update", help="Incremental update (only changed files)")
Expand All @@ -429,6 +448,11 @@ def main() -> None:
action="store_true",
help="Skip all post-processing (raw parse only)",
)
update_cmd.add_argument(
"--data-dir",
default=None,
help="External directory to store graph database (useful for network shares)"
)

# postprocess
pp_cmd = sub.add_parser(
Expand All @@ -439,14 +463,29 @@ def main() -> None:
pp_cmd.add_argument("--no-flows", action="store_true", help="Skip flow detection")
pp_cmd.add_argument("--no-communities", action="store_true", help="Skip community detection")
pp_cmd.add_argument("--no-fts", action="store_true", help="Skip FTS rebuild")
pp_cmd.add_argument(
"--data-dir",
default=None,
help="External directory to store graph database (useful for network shares)"
)

# watch
watch_cmd = sub.add_parser("watch", help="Watch for changes and auto-update")
watch_cmd.add_argument("--repo", default=None, help="Repository root (auto-detected)")
watch_cmd.add_argument(
"--data-dir",
default=None,
help="External directory to store graph database (useful for network shares)"
)

# status
status_cmd = sub.add_parser("status", help="Show graph statistics")
status_cmd.add_argument("--repo", default=None, help="Repository root (auto-detected)")
status_cmd.add_argument(
"--data-dir",
default=None,
help="External directory to store graph database (useful for network shares)"
)

# visualize
vis_cmd = sub.add_parser("visualize", help="Generate interactive HTML graph visualization")
Expand All @@ -468,6 +507,11 @@ def main() -> None:
default="html",
help="Export format (default: html)",
)
vis_cmd.add_argument(
"--data-dir",
default=None,
help="External directory to store graph database (useful for network shares)"
)

# wiki
wiki_cmd = sub.add_parser("wiki", help="Generate markdown wiki from community structure")
Expand All @@ -477,6 +521,11 @@ def main() -> None:
action="store_true",
help="Regenerate all pages even if content unchanged",
)
wiki_cmd.add_argument(
"--data-dir",
default=None,
help="External directory to store graph database (useful for network shares)"
)

# register
register_cmd = sub.add_parser(
Expand Down Expand Up @@ -766,6 +815,7 @@ def main() -> None:

if args.command == "postprocess":
repo_root = Path(args.repo) if args.repo else find_project_root()
_handle_data_dir_option(args, repo_root)
db_path = get_db_path(repo_root)
store = GraphStore(db_path)
try:
Expand Down Expand Up @@ -802,6 +852,10 @@ def main() -> None:
else:
repo_root = Path(args.repo) if args.repo else find_project_root()

# Handle --data-dir for commands that support it
if args.command in ("build", "update", "detect-changes", "status", "watch", "visualize", "wiki"):
_handle_data_dir_option(args, repo_root)

db_path = get_db_path(repo_root)
store = GraphStore(db_path)

Expand Down
62 changes: 44 additions & 18 deletions code_review_graph/incremental.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,9 +174,38 @@ def find_project_root(
return start or Path.cwd()


def _write_data_dir_gitignore(data_dir: Path) -> None:
"""Write .gitignore file in data directory if it doesn't exist.

The gitignore contains a single '*' to prevent accidental commits.
"""
inner_gitignore = data_dir / ".gitignore"
if not inner_gitignore.exists():
try:
# `encoding="utf-8"` is REQUIRED — the em-dash in the header is
# U+2014 which falls outside cp1252. On Windows, calling
# write_text without an encoding silently uses the system default
# codepage, producing a file that subsequently fails to decode as
# UTF-8 (see issue #239).
inner_gitignore.write_text(
"# Auto-generated by code-review-graph — do not commit database files.\n"
"# The graph.db contains absolute paths and code structure metadata.\n"
"*\n",
encoding="utf-8",
)
except OSError:
# Data dir might be read-only (rare); that's OK, it's a best-effort guard.
pass


def get_data_dir(repo_root: Path) -> Path:
"""Return the directory where this project's graph data lives.

Resolution priority:
1. Registry entry for this repo (set via --data-dir)
2. CRG_DATA_DIR environment variable (global override)
3. Default: <repo>/.code-review-graph/

By default, ``<repo_root>/.code-review-graph``. If the
``CRG_DATA_DIR`` environment variable is set, it is used verbatim
instead — letting you keep graphs outside the working tree (useful
Expand All @@ -186,31 +215,28 @@ def get_data_dir(repo_root: Path) -> Path:
``.gitignore`` (with ``*``) is written so any accidentally-nested
files never get committed. Both are idempotent.
"""
# Check registry first
try:
from .registry import Registry
registry_data_dir = Registry().get_data_dir_for_repo(str(repo_root))
if registry_data_dir:
data_dir = Path(registry_data_dir).resolve()
data_dir.mkdir(parents=True, exist_ok=True)
_write_data_dir_gitignore(data_dir)
return data_dir
except Exception as exc:
# If registry lookup fails, log and fall through to other methods
logger.debug("Registry lookup failed for %s: %s", repo_root, exc)

# Check environment variable
env_override = os.environ.get("CRG_DATA_DIR", "").strip()
if env_override:
data_dir = Path(env_override).expanduser().resolve()
else:
data_dir = repo_root / ".code-review-graph"

data_dir.mkdir(parents=True, exist_ok=True)

inner_gitignore = data_dir / ".gitignore"
if not inner_gitignore.exists():
try:
# `encoding="utf-8"` is REQUIRED — the em-dash in the header is
# U+2014 which falls outside cp1252. On Windows, calling
# write_text without an encoding silently uses the system default
# codepage, producing a file that subsequently fails to decode as
# UTF-8 (see issue #239).
inner_gitignore.write_text(
"# Auto-generated by code-review-graph — do not commit database files.\n"
"# The graph.db contains absolute paths and code structure metadata.\n"
"*\n",
encoding="utf-8",
)
except OSError:
# Data dir might be read-only (rare); that's OK, it's a best-effort guard.
pass
_write_data_dir_gitignore(data_dir)

return data_dir

Expand Down
57 changes: 54 additions & 3 deletions code_review_graph/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def _save(self) -> None:
json.dumps(data, indent=2) + "\n", encoding="utf-8"
)

def register(self, path: str, alias: str | None = None) -> dict[str, str]:
def register(self, path: str, alias: str | None = None, data_dir: str | None = None) -> dict[str, str]:
"""Register a repository path.

Validates that the path contains a ``.git`` or ``.code-review-graph``
Expand All @@ -63,6 +63,7 @@ def register(self, path: str, alias: str | None = None) -> dict[str, str]:
Args:
path: Absolute or relative path to the repository root.
alias: Optional short alias for the repository.
data_dir: Optional external directory for graph database.

Returns:
The registered entry dict.
Expand All @@ -84,15 +85,19 @@ def register(self, path: str, alias: str | None = None) -> dict[str, str]:
str_path = str(resolved)
for entry in self._repos:
if entry["path"] == str_path:
# Update alias if provided
# Update alias and/or data_dir if provided
if alias:
entry["alias"] = alias
self._save()
if data_dir:
entry["data_dir"] = str(Path(data_dir).resolve())
self._save()
return entry

new_entry: dict[str, str] = {"path": str_path}
if alias:
new_entry["alias"] = alias
if data_dir:
new_entry["data_dir"] = str(Path(data_dir).resolve())
self._repos.append(new_entry)
self._save()
return new_entry
Expand Down Expand Up @@ -159,6 +164,52 @@ def find_by_path(self, path: str) -> dict[str, str] | None:
return dict(entry)
return None

def set_data_dir(self, path: str, data_dir: str) -> dict[str, str]:
"""Set the external data directory for a repository.

Args:
path: Repository path (absolute or relative).
data_dir: External directory path to store graph database.

Returns:
The updated or created registry entry.
"""
resolved = str(Path(path).resolve())
data_resolved = str(Path(data_dir).resolve())

with self._lock:
# Check for existing entry
for entry in self._repos:
if entry["path"] == resolved:
entry["data_dir"] = data_resolved
self._save()
return dict(entry)

# Create new entry if not found
new_entry = {
"path": resolved,
"data_dir": data_resolved
}
self._repos.append(new_entry)
self._save()
return new_entry

def get_data_dir_for_repo(self, path: str) -> str | None:
"""Get the stored data directory for a repository.

Args:
path: Repository path (absolute or relative).

Returns:
The stored data_dir path, or None if not set.
"""
resolved = str(Path(path).resolve())
with self._lock:
for entry in self._repos:
if entry["path"] == resolved:
return entry.get("data_dir")
return None


class ConnectionPool:
"""LRU connection pool for SQLite graph databases.
Expand Down
3 changes: 2 additions & 1 deletion code_review_graph/tools/registry_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import Any

from ..graph import GraphStore
from ..incremental import get_db_path
from ..search import hybrid_search

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -83,7 +84,7 @@ def cross_repo_search_func(

for repo_entry in repos:
repo_path = Path(repo_entry["path"])
db_path = repo_path / ".code-review-graph" / "graph.db"
db_path = get_db_path(repo_path)
if not db_path.exists():
continue

Expand Down
Loading