diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md deleted file mode 100644 index 6d30d43..0000000 --- a/.github/ISSUE_TEMPLATE/bug-report.md +++ /dev/null @@ -1,29 +0,0 @@ ---- -name: Bug report -about: Create a report to help us improve -title: "[BUG]" -labels: bug -assignees: '' - ---- - -**Describe the bug** - - -**To reproduce** - - -**Expected behavior** - - -**Environment** -codesteward version: -Backend: -Transport: -OS: - -**Additional context** - diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 0000000..d3b0749 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,66 @@ +name: Bug Report +description: Report a bug or unexpected behavior +labels: ["bug"] +body: + - type: markdown + attributes: + value: | + Thank you for taking the time to report a bug. Please fill out the sections below so we can reproduce and fix the issue. + + - type: textarea + id: description + attributes: + label: Description + description: A clear and concise description of what the bug is. + validations: + required: true + + - type: textarea + id: steps + attributes: + label: Steps to Reproduce + description: How can we reproduce the issue? + placeholder: | + 1. Configure ... + 2. Run ... + 3. Observe ... + validations: + required: true + + - type: textarea + id: expected + attributes: + label: Expected Behavior + description: What did you expect to happen? + validations: + required: true + + - type: textarea + id: actual + attributes: + label: Actual Behavior + description: What actually happened? Include error messages or logs if available. + validations: + required: true + + - type: textarea + id: environment + attributes: + label: Environment + description: Relevant environment details. + placeholder: | + - Project version: v0.1.0 + - OS: Ubuntu 22.04 / macOS 15 / Windows 11 + - Runtime/language version (if applicable): Go 1.22 / Node 22 / Python 3.12 + - Deployment method: Docker Compose / Helm / binary / npm + render: text + validations: + required: false + + - type: textarea + id: additional + attributes: + label: Additional Context + description: Any other context, screenshots, or logs that might help. + validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..e97c97a --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,5 @@ +blank_issues_enabled: false +contact_links: + - name: Security Vulnerability + url: mailto:security@bitkaio.com + about: Do not open a public issue for security vulnerabilities. Email us instead. diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md deleted file mode 100644 index 5c5082e..0000000 --- a/.github/ISSUE_TEMPLATE/feature-request.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -name: Feature request -about: Suggest an idea for this project -title: "[FEATURE]" -labels: enhancement -assignees: '' - ---- - -**Is your feature request related to a problem? Please describe.** - - -**Describe the solution you'd like** - - -**Describe alternatives you've considered** - - -**Additional context** - diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 0000000..9efe843 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,40 @@ +name: Feature Request +description: Suggest a new feature or improvement +labels: ["enhancement"] +body: + - type: markdown + attributes: + value: | + Thank you for suggesting an improvement. Please describe the feature and why it would be useful. + + - type: textarea + id: problem + attributes: + label: Problem + description: What problem does this feature solve? Is it related to a frustration or limitation? + validations: + required: true + + - type: textarea + id: solution + attributes: + label: Proposed Solution + description: Describe the solution you'd like. How should it work? + validations: + required: true + + - type: textarea + id: alternatives + attributes: + label: Alternatives Considered + description: Any alternative solutions or workarounds you've considered. + validations: + required: false + + - type: textarea + id: additional + attributes: + label: Additional Context + description: Any other context, mockups, or references that might help. + validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/question.yml b/.github/ISSUE_TEMPLATE/question.yml new file mode 100644 index 0000000..643fbbf --- /dev/null +++ b/.github/ISSUE_TEMPLATE/question.yml @@ -0,0 +1,32 @@ +name: Question +description: Ask a question about usage, configuration, or behavior +labels: ["question"] +body: + - type: markdown + attributes: + value: | + Have a question? We're happy to help. Please provide enough detail so we can give a useful answer. + + - type: textarea + id: question + attributes: + label: Question + description: What would you like to know? + validations: + required: true + + - type: textarea + id: context + attributes: + label: Context + description: What are you trying to do? Include your setup, configuration, or use case. + validations: + required: false + + - type: textarea + id: attempted + attributes: + label: What I've Tried + description: Any documentation, searches, or approaches you've already tried. + validations: + required: false diff --git a/CHANGELOG.md b/CHANGELOG.md index 4bade72..f1bfc66 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,18 @@ Both packages share a version number and are always released together. ## [Unreleased] +### Added — codesteward-graph + +- `.codestewardignore` support: users can place a `.codestewardignore` file in the root of the + repository being analyzed to exclude files and directories from graph construction using + gitignore-style patterns (`**/*.generated.ts`, `internal/`, `src/fixtures/`). The file is + optional — behavior is unchanged when it is absent. Powered by `pathspec` (new direct dependency). +- Extended the hardcoded `_IGNORED_DIRS` blocklist with additional build artifacts, framework + caches, and vendored-dependency directories: `out`, `.output`, `.svelte-kit`, `.solid`, `.turbo`, + `.parcel-cache`, `.vitepress`, `.docusaurus`, `storybook-static`, `htmlcov`, `.nyc_output`, + `site-packages`, `.ruff_cache`, `.hypothesis`, `.hg`, `.svn`, `target`, `bin`, `obj`, `vendor`, + `.gradle`, `.cache`; added `.dist-info` to `_IGNORED_DIR_SUFFIXES`. + --- ## [0.3.0] — 2026-03-20 diff --git a/CLA.md b/CLA.md index 10f64a5..b22fbf8 100644 --- a/CLA.md +++ b/CLA.md @@ -1,28 +1,65 @@ # Contributor License Agreement (CLA) -This Contributor License Agreement ("Agreement") is between you ("Contributor") and the project owner/maintainer ("Maintainer"). +**Version 1.0 — Effective April 2026** + +This Contributor License Agreement ("Agreement") is between you ("Contributor") and bitkaio LLC, a Wyoming limited liability company ("Company"). + +By submitting a Contribution to this project, you accept and agree to the terms of this Agreement. Acceptance is indicated by any of the following: signing via a CLA bot (e.g., CLA Assistant), adding your name to a signature file, or any other method designated by the Company. ## 1. Definitions -"Contribution" means any code, documentation, or other material you submit to this repository, including via pull requests, issues, or patches. -## 2. Grant of Rights -Contributor grants Maintainer a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to: -- use, reproduce, prepare derivative works of, publicly display, publicly perform, sublicense, and distribute the Contribution and derivative works of the Contribution. +"Contribution" means any code, documentation, or other material you submit to this project, including via pull requests, issues, patches, or any other form of submission. + +"Project" means the software project to which the Contribution is made and all associated repositories maintained by the Company. + +## 2. Grant of Copyright License + +Contributor hereby grants to the Company a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to: + +- use, reproduce, prepare derivative works of, publicly display, publicly perform, sublicense, and distribute the Contribution and derivative works thereof. -This license allows Maintainer to distribute the Contribution as part of the project under the project's current license **or any future license**, including proprietary licenses. +The Company may sublicense the Contribution under any terms it chooses, including terms that differ from or are more restrictive than the license under which the Project is currently distributed. This includes, without limitation, the right to distribute the Contribution as part of the Project under any open-source license, proprietary license, or commercial license, whether such license exists now or is created in the future. -## 3. Patent License (optional but recommended) -Contributor grants Maintainer a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated below) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Contribution, where such license applies only to patent claims licensable by Contributor that are necessarily infringed by the Contribution. +## 3. Grant of Patent License -If Contributor initiates patent litigation alleging that the Contribution (or the project) infringes a patent, then any patent license granted under this Agreement for that Contribution terminates as of the date such litigation is filed. +Contributor hereby grants to the Company a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated below) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Contribution, where such license applies only to patent claims licensable by Contributor that are necessarily infringed by the Contribution alone or by combination of the Contribution with the Project to which it was submitted. + +If Contributor institutes patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Contribution or the Project constitutes direct or contributory patent infringement, then any patent license granted under this Agreement for that Contribution shall terminate as of the date such litigation is filed. ## 4. Contributor Representations + Contributor represents that: -- they have the legal right to grant the licenses in this Agreement; and -- each Contribution is their original work or they have the right to submit it under these terms. -## 5. No Obligation -Maintainer has no obligation to include a Contribution in the project. +(a) they have the legal right to grant the licenses in this Agreement; + +(b) each Contribution is their original work, or they have obtained sufficient rights from any third party to submit it under these terms; + +(c) if the Contribution is made in the course of employment, or the Contributor's employer has intellectual property rights over the Contribution, the Contributor has received permission from their employer to make the Contribution and grant the licenses in this Agreement, or the Contributor's employer has waived such rights for the Contribution; + +(d) they will promptly notify the Company if any of the above representations become inaccurate. + +## 5. Entity Contributions + +If the Contribution is submitted on behalf of a legal entity (e.g., an employer or organization), the person submitting the Contribution represents that they are authorized to do so and to bind that entity to this Agreement. In that case, "Contributor" refers to the entity, and the individual signing acts as its authorized representative. + +## 6. Assignment and Successors + +The Company may assign its rights and obligations under this Agreement to any entity that controls, is controlled by, or is under common control with the Company, or to any successor in interest whether by merger, acquisition, reorganization, or transfer of substantially all assets. This Agreement shall be binding upon and inure to the benefit of the parties and their respective successors and assigns. + +Contributor may not assign this Agreement without the prior written consent of the Company. + +## 7. No Obligation + +The Company has no obligation to include any Contribution in the Project. + +## 8. Trademarks + +This Agreement does not grant Contributor any rights in the Company's trademarks, service marks, trade names, or branding. + +## 9. Governing Law + +This Agreement shall be governed by and construed in accordance with the laws of the State of Wyoming, without regard to conflict of law principles. + +## 10. Entire Agreement -## 6. Miscellaneous -This Agreement does not grant Contributor any rights in Maintainer trademarks or branding. \ No newline at end of file +This Agreement constitutes the entire agreement between the parties concerning the subject matter hereof and supersedes all prior agreements and understandings. This Agreement may only be modified by a written amendment signed by both parties. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index eeb1fa4..8262065 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,25 +1,72 @@ -## Issue-first workflow (required) +# Contributing -All contributions **must be associated with a GitHub Issue**. +Thank you for your interest in contributing to this project. This guide applies to all repositories maintained by bitkaio LLC. -### Before you start -- Create a new issue describing the change, or comment on an existing issue to confirm you’re working on it. -- For small changes (typos, tiny refactors), you can create a lightweight issue labeled `chore`. +## Contributor License Agreement -### Branch naming -Create your branch from an issue: -- `issue/-short-description` - - Example: `issue/123-fix-typo` +All contributors must agree to the [Contributor License Agreement](CLA.md) before their first contribution can be merged. This is a one-time requirement per contributor. -### Pull Request requirements -Your Pull Request must: -- Reference the issue in the PR description using one of: - - `Closes #` (preferred when the PR fully resolves it) - - `Refs #` (when it’s partial or related) -- Include a short summary of what changed and why. +When you open your first pull request, a CLA bot will check whether you have signed. If not, it will guide you through the process. Your signature covers all repositories under bitkaio LLC. -### Exceptions (maintainers only) -Maintainers may merge urgent fixes without a prior issue (e.g., security, production breakages), -but must open an issue immediately after the merge and link the commit/PR. +## Getting Started -PRs that are not linked to an issue may be closed without review. \ No newline at end of file +1. Fork the repository +2. Create a feature branch from `main` (`git checkout -b feat/your-feature`) +3. Make your changes +4. Run the project's tests and linters (see the project README for commands) +5. Open a pull request against `main` + +## Pull Request Guidelines + +- Keep PRs focused — one feature, fix, or refactor per PR +- Write a clear title and description explaining **what** changed and **why** +- Include tests for new functionality and bug fixes +- Ensure existing tests pass before submitting +- Link related issues in the PR description (e.g., `Closes #42`) + +## Commit Messages + +Use [Conventional Commits](https://www.conventionalcommits.org/) format: + +```text +type(scope): short description + +Optional longer explanation. +``` + +Common types: `feat`, `fix`, `docs`, `test`, `refactor`, `chore`, `ci`. + +Examples: + +- `feat(search): add query expansion support` +- `fix(scraper): handle timeout on redirect chains` +- `docs: update configuration reference` + +## Code Style + +- Follow the conventions already established in the codebase +- Run the project's linter before submitting (formatting, imports, etc.) +- Prefer clarity over cleverness +- Add comments only where the intent isn't obvious from the code + +## Reporting Issues + +- Use GitHub Issues for bug reports and feature requests +- For bugs, include: steps to reproduce, expected behavior, actual behavior, and environment details +- Check existing issues before opening a new one + +## Security Vulnerabilities + +Do **not** open a public issue for security vulnerabilities. Instead, email security@bitkaio.com with a description of the vulnerability. We will respond within 72 hours. + +## Code of Conduct + +Be respectful and constructive. We expect all contributors to act professionally and treat others with courtesy. Harassment, discrimination, or disruptive behavior will not be tolerated. + +## Questions + +If you're unsure about anything, open a discussion or issue — we're happy to help. + +## License + +By contributing, you agree that your contributions will be licensed under the terms described in the [CLA](CLA.md). The project's current license is specified in the [LICENSE](LICENSE) file. diff --git a/packages/codesteward-graph/README.md b/packages/codesteward-graph/README.md index c0c06c1..eb1ce40 100644 --- a/packages/codesteward-graph/README.md +++ b/packages/codesteward-graph/README.md @@ -46,6 +46,27 @@ async def main(): asyncio.run(main()) ``` +## Ignoring files and directories + +By default, the graph builder skips common build artifacts and cache directories +(`node_modules`, `dist`, `.venv`, `target`, etc.). + +For project-specific exclusions, place a `.codestewardignore` file in the root of +the repository being analyzed. It uses the same gitignore pattern syntax: + +```gitignore +# Exclude generated files +**/*.generated.ts + +# Exclude an entire directory +internal/ + +# Exclude specific paths +src/fixtures/large-dataset.py +``` + +The file is optional — if absent, only the built-in blocklist applies. + ## License BSD 3-Clause — Copyright (c) 2026, bitkaio LLC diff --git a/packages/codesteward-graph/pyproject.toml b/packages/codesteward-graph/pyproject.toml index 2711600..432b1ef 100644 --- a/packages/codesteward-graph/pyproject.toml +++ b/packages/codesteward-graph/pyproject.toml @@ -24,6 +24,7 @@ dependencies = [ "structlog>=24.0", "pyyaml>=6.0", "neo4j>=5.0", + "pathspec>=1.0", ] [project.urls] diff --git a/packages/codesteward-graph/src/codesteward/engine/graph_builder.py b/packages/codesteward-graph/src/codesteward/engine/graph_builder.py index ad7574e..3676cb1 100644 --- a/packages/codesteward-graph/src/codesteward/engine/graph_builder.py +++ b/packages/codesteward-graph/src/codesteward/engine/graph_builder.py @@ -22,6 +22,7 @@ from pathlib import Path from typing import Any +import pathspec import structlog from codesteward.engine.parsers import ( # noqa: F401 all_source_extensions, @@ -48,9 +49,33 @@ # --------------------------------------------------------------------------- _IGNORED_DIRS = frozenset( - ["node_modules", "dist", "build", ".next", ".nuxt", "coverage", "__pycache__", ".git"] + [ + # JS/TS build outputs and framework caches + "node_modules", "dist", "build", "out", ".next", ".nuxt", ".output", + ".svelte-kit", ".solid", ".turbo", ".parcel-cache", ".vitepress", + ".docusaurus", "storybook-static", + # Test/coverage artifacts + "coverage", "htmlcov", ".nyc_output", + # Python runtime and tool caches + "__pycache__", ".venv", "venv", "site-packages", + ".mypy_cache", ".pytest_cache", ".ruff_cache", ".tox", ".eggs", + ".hypothesis", + # Version control + ".git", ".hg", ".svn", + # Compiled output (Rust, Java, .NET) + "target", "bin", "obj", + # Vendored dependencies + "vendor", + # Misc tool caches + ".gradle", ".cache", + ] ) +_IGNORED_DIR_SUFFIXES = (".egg-info", ".dist-info") + +# Name of the per-project ignore file that users place in their repo root +_CODESTEWARD_IGNORE_FILE = ".codestewardignore" + # =========================================================================== # Backward-compatibility shim for MultiLanguageParser @@ -582,6 +607,31 @@ def parse_file( # -- Private helpers ----------------------------------------------------- + def _load_ignore_spec(self, root: Path) -> pathspec.PathSpec: + """Load ignore patterns from .codestewardignore in the project root. + + Reads the .codestewardignore file (if present) from the root of the + repository being analyzed and returns a compiled PathSpec using + gitignore-style pattern matching. + + Args: + root: Root directory of the repository being analyzed. + + Returns: + A PathSpec that matches repo-relative paths to be excluded. + Returns an empty (no-op) PathSpec if the file does not exist. + """ + ignore_file = root / _CODESTEWARD_IGNORE_FILE + if not ignore_file.is_file(): + return pathspec.PathSpec.from_lines("gitignore", []) + try: + lines = ignore_file.read_text(encoding="utf-8").splitlines() + log.debug("codestewardignore_loaded", path=str(ignore_file), patterns=len(lines)) + return pathspec.PathSpec.from_lines("gitignore", lines) + except OSError as exc: + log.warning("codestewardignore_read_failed", path=str(ignore_file), error=str(exc)) + return pathspec.PathSpec.from_lines("gitignore", []) + def _collect_files(self, root: Path, language: str) -> list[Path]: """Walk the repository and collect all parseable source files. @@ -593,12 +643,20 @@ def _collect_files(self, root: Path, language: str) -> list[Path]: Sorted list of Path objects for parseable files. """ all_exts = all_source_extensions() + ignore_spec = self._load_ignore_spec(root) files: list[Path] = [] for path in root.rglob("*"): if not path.is_file(): continue - # Skip ignored directories - if any(part in _IGNORED_DIRS for part in path.parts): + # Skip hardcoded ignored directories + if any( + part in _IGNORED_DIRS or part.endswith(_IGNORED_DIR_SUFFIXES) + for part in path.parts + ): + continue + # Skip files matched by .codestewardignore + rel = path.relative_to(root) + if ignore_spec.match_file(str(rel)): continue if path.suffix in all_exts: files.append(path) diff --git a/templates/README.md b/templates/README.md new file mode 100644 index 0000000..52ca6a9 --- /dev/null +++ b/templates/README.md @@ -0,0 +1,310 @@ +# Codesteward — Setup Templates + +Codesteward parses your codebase into a structural graph stored in Neo4j and exposes it as an MCP server your AI agent can query. Instead of reading files one by one, the agent calls `codebase_graph_query` to get cross-file answers — call chains, auth guards, dependency trees — in a single round trip. + +This folder contains the config and instruction files you copy to connect it to your tool. + +--- + +## How to use this folder + +Every AI tool needs two things: + +| | What it does | Where it lives | +|---|---|---| +| **MCP config** | Tells the tool where the server is running | Tool-specific location (see each section) | +| **Instructions file** | Tells the agent to use graph tools instead of reading files | Copied into your repo root | + +Pick your tool below and follow its section. If you want the setup to work across all your repositories automatically, use the [Global setup](#global-setup) instead. + +--- + +## Jump to your tool + +**Per-project** — copy files into the repo you want to analyse (do this once per repo) + +- [Claude Code](#claude-code) +- [Cursor](#cursor) +- [Windsurf](#windsurf) +- [VS Code — GitHub Copilot](#vs-code--github-copilot) +- [Gemini CLI](#gemini-cli) +- [OpenAI Codex CLI](#openai-codex-cli) + +**Global** — configure once, works in every repository without any per-project files + +- [Claude Code (Global)](#claude-code-global) +- [OpenAI Codex CLI (Global)](#openai-codex-cli-global) +- [Gemini CLI (Global)](#gemini-cli-global) + +--- + +## Step 0 — Start the server + +> **Skip this step if you are using stdio via uvx.** The client spawns the server automatically; nothing to start. + +If you are using the Docker + Neo4j setup (recommended for persistent graphs): + +```bash +# Run from the codesteward repository root +export REPO_PATH=/path/to/the/repo/you/want/to/analyse +docker compose up -d +``` + +The server starts at `http://localhost:3000/sse`. All MCP configs in this folder point to that address. + +**Which should I use?** + +| | Docker + Neo4j | stdio via uvx | +|---|---|---| +| Graph persists between sessions | Yes | No — rebuilt each time | +| Requires Docker | Yes | No | +| Requires [uv](https://docs.astral.sh/uv/) | No | Yes | +| Best for | Teams, daily use | Quick start, one-off analysis | + +--- + +## Per-project setup + +> **Note:** Run all `cp` commands from the root of the **codesteward repository**, not from the repo you are analysing. + +--- + +### Claude Code + +```bash +cp templates/.mcp.json /path/to/your/repo/.mcp.json +cp templates/CLAUDE.md /path/to/your/repo/CLAUDE.md +``` + +| File | Purpose | +|------|---------| +| `.mcp.json` | Registers the MCP server with Claude Code (picked up automatically) | +| `CLAUDE.md` | Loaded at session start — instructs the agent to use graph tools for structural questions | + +> **Already have a `CLAUDE.md`?** Append instead of overwriting: +> ```bash +> cat templates/CLAUDE.md >> /path/to/your/repo/CLAUDE.md +> ``` + +> **Using stdio via uvx instead of Docker?** Replace the contents of `.mcp.json` with the stdio config from [Step 0](#step-0--start-the-server). + +--- + +### Cursor + +```bash +mkdir -p /path/to/your/repo/.cursor +cp templates/cursor/mcp.json /path/to/your/repo/.cursor/mcp.json +cp templates/.cursorrules /path/to/your/repo/.cursorrules +``` + +| File | Purpose | +|------|---------| +| `.cursor/mcp.json` | Registers the MCP server with Cursor | +| `.cursorrules` | Instructs Cursor to use graph tools for structural questions | + +Reload the window after copying: `Ctrl+Shift+P` → *Reload Window*. + +> **Prefer the newer rules format?** Copy `.cursorrules` to `.cursor/rules/codesteward.md` instead — same content, different location. + +--- + +### Windsurf + +```bash +cp templates/.windsurfrules /path/to/your/repo/.windsurfrules +``` + +| File | Purpose | +|------|---------| +| `.windsurfrules` | Instructs Windsurf to use graph tools for structural questions | + +Register the MCP server via the Windsurf UI: + +1. Open **Windsurf Settings → MCP Servers → Add Server** +2. Fill in: + - **Name:** `codesteward-graph` + - **Type:** `HTTP` + - **URL:** `http://localhost:3000/sse` +3. Click **Save**, then reload the window. + +--- + +### VS Code — GitHub Copilot + +```bash +mkdir -p /path/to/your/repo/.vscode /path/to/your/repo/.github +cp templates/vscode/mcp.json /path/to/your/repo/.vscode/mcp.json +cp templates/copilot-instructions.md /path/to/your/repo/.github/copilot-instructions.md +``` + +| File | Purpose | +|------|---------| +| `.vscode/mcp.json` | Registers the MCP server with VS Code | +| `.github/copilot-instructions.md` | Instructs Copilot to use graph tools for structural questions | + +VS Code will prompt you to enable the server when it detects the config file. Accept the prompt, then reload the window. + +--- + +### Gemini CLI + +```bash +cp templates/GEMINI.md /path/to/your/repo/GEMINI.md +``` + +| File | Purpose | +|------|---------| +| `GEMINI.md` | Loaded each session — instructs the agent to use graph tools | + +Gemini CLI has no project-level MCP config file. Register the server globally — see [Gemini CLI (Global)](#gemini-cli-global). + +--- + +### OpenAI Codex CLI + +```bash +cp templates/AGENTS.md /path/to/your/repo/AGENTS.md +``` + +| File | Purpose | +|------|---------| +| `AGENTS.md` | Loaded each session — instructs the agent to use graph tools | + +Codex has no project-level MCP config file. Register the server globally — see [OpenAI Codex CLI (Global)](#openai-codex-cli-global). + +--- + +## Global setup + +Configure once in your home directory. The agent picks up the server and instructions automatically in every repository you open — no per-project files needed. + +--- + +### Claude Code (Global) + +**Step 1 — Register the MCP server** + +Add the following block to `~/.claude/settings.json` (create the file if it does not exist). If the file already has other settings, add only the `mcpServers` object — do not overwrite the whole file. + +```json +{ + "mcpServers": { + "codesteward": { + "command": "uvx", + "args": ["codesteward-mcp[graph-all]", "--transport", "stdio"], + "env": { + "NEO4J_URI": "bolt://localhost:7687", + "NEO4J_USER": "neo4j", + "NEO4J_PASSWORD": "your-neo4j-password" + } + } + } +} +``` + +Claude Code spawns the server as a subprocess — no Docker needed. `uvx` downloads and caches the package on first run. Omit the `env` block to run without Neo4j (graph held in memory per session). + +**Step 2 — Add the global instruction file** + +```bash +cp templates/global-claude-code/CLAUDE.md ~/.claude/CLAUDE.md +``` + +This file is loaded at the start of every Claude Code session. It derives `repo_id` from the current directory and tells the agent to check graph freshness before answering structural questions — works for any repository you open. + +**Step 3 — Add the `/codesteward` skill** *(optional)* + +```bash +mkdir -p ~/.claude/skills +cp templates/global-claude-code/codesteward-skill.md ~/.claude/skills/codesteward.md +``` + +Adds a `/codesteward` slash command you can invoke for an explicit guided workflow: status check → rebuild → query → taint scan. + +--- + +### OpenAI Codex CLI (Global) + +**Step 1 — Register the MCP server** + +Add the following to `~/.codex/config.yaml` (create the file if it does not exist). If the file already has other settings, add only the `mcp_servers` block — do not overwrite the whole file. + +```yaml +mcp_servers: + codesteward: + command: uvx + args: + - "codesteward-mcp[graph-all]" + - "--transport" + - "stdio" + env: + NEO4J_URI: "bolt://localhost:7687" + NEO4J_USER: "neo4j" + NEO4J_PASSWORD: "your-neo4j-password" +``` + +**Step 2 — Add the global instruction file** + +```bash +cp templates/global-codex/AGENTS.md ~/AGENTS.md +``` + +Codex reads `AGENTS.md` from `~/AGENTS.md`, the repo root, and the current directory (in that order). The global file covers every repository automatically. + +--- + +### Gemini CLI (Global) + +Gemini CLI only supports global MCP configuration. Add the server to `~/.gemini/settings.json` (create the file if it does not exist): + +```json +{ + "mcpServers": { + "codesteward-graph": { + "url": "http://localhost:3000/sse" + } + } +} +``` + +Then copy `templates/GEMINI.md` into each repository you want to analyse (see [Gemini CLI](#gemini-cli) per-project setup above). + +--- + +## Quick reference + +| Tool | MCP config location | Instructions file | +|------|--------------------|--------------------| +| Claude Code | `/.mcp.json` | `/CLAUDE.md` | +| Claude Code (global) | `~/.claude/settings.json` | `~/.claude/CLAUDE.md` | +| Cursor | `/.cursor/mcp.json` | `/.cursorrules` | +| Windsurf | Settings UI | `/.windsurfrules` | +| VS Code / GitHub Copilot | `/.vscode/mcp.json` | `/.github/copilot-instructions.md` | +| Gemini CLI | `~/.gemini/settings.json` | `/GEMINI.md` | +| OpenAI Codex (per-project) | `~/.codex/config.yaml` | `/AGENTS.md` | +| OpenAI Codex (global) | `~/.codex/config.yaml` | `~/AGENTS.md` | + +--- + +## Verify the connection + +After completing setup, restart your tool and open a repository. Then ask the agent: + +> *"Use graph_status to check if the codebase graph has been built."* + +The agent should call `graph_status()` and return metadata including node and edge counts. If `last_build` is null, the graph has not been indexed yet — tell it to build: + +> *"Run graph_rebuild to index this codebase."* + +Once the graph is built, test a structural query: + +> *"Use codebase_graph_query to find all functions that call authenticate."* + +**If the agent reads files instead of calling graph tools**, check two things: +1. The instructions file (`CLAUDE.md`, `.cursorrules`, etc.) is in the **project root** of the repo you opened. +2. The MCP server config is in the **correct location** for your tool (see the quick reference table above). + +--- + +For detailed troubleshooting and all supported languages, see [AGENT_SETUP.md](../AGENT_SETUP.md). diff --git a/tests/test_engine/test_graph_builder.py b/tests/test_engine/test_graph_builder.py index 4a0f267..4969ca8 100644 --- a/tests/test_engine/test_graph_builder.py +++ b/tests/test_engine/test_graph_builder.py @@ -724,6 +724,64 @@ async def test_javascript_files_parsed_alongside_typescript(self, repo_dir: Path # 2 .ts files + 1 .js file = 3 total assert summary["files_parsed"] == 3 + # -- .codestewardignore tests -------------------------------------------- + # These tests call _collect_files() directly to isolate ignore-file logic + # from the tree-sitter parser availability. + + def test_codestewardignore_excludes_exact_path(self, repo_dir: Path) -> None: + """An exact path in .codestewardignore removes that file from collection.""" + (repo_dir / ".codestewardignore").write_text("src/auth.ts\n") + builder = GraphBuilder() + collected = builder._collect_files(repo_dir, "typescript") + names = {p.name for p in collected} + assert "auth.ts" not in names + assert "router.ts" in names + assert "util.js" in names + + def test_codestewardignore_excludes_directory(self, repo_dir: Path) -> None: + """A trailing-slash pattern excludes all files inside that directory.""" + (repo_dir / ".codestewardignore").write_text("src/\n") + builder = GraphBuilder() + collected = builder._collect_files(repo_dir, "typescript") + assert collected == [] + + def test_codestewardignore_glob_pattern(self, repo_dir: Path) -> None: + """A glob pattern in .codestewardignore skips all matching files.""" + (repo_dir / "src" / "index.generated.ts").write_text("export {};") + (repo_dir / ".codestewardignore").write_text("**/*.generated.ts\n") + builder = GraphBuilder() + collected = builder._collect_files(repo_dir, "typescript") + names = {p.name for p in collected} + assert "index.generated.ts" not in names + # Original 3 source files are still collected + assert {"auth.ts", "router.ts", "util.js"}.issubset(names) + + def test_no_codestewardignore_is_noop(self, repo_dir: Path) -> None: + """Absence of .codestewardignore does not affect file collection.""" + assert not (repo_dir / ".codestewardignore").exists() + builder = GraphBuilder() + collected = builder._collect_files(repo_dir, "typescript") + assert len(collected) == 3 + + def test_codestewardignore_read_error_is_noop( + self, repo_dir: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """An unreadable .codestewardignore logs a warning and collects all files.""" + (repo_dir / ".codestewardignore").write_text("src/auth.ts\n") + + original_read_text = Path.read_text + + def raise_on_ignore(self: Path, **kwargs: object) -> str: + if self.name == ".codestewardignore": + raise OSError("permission denied") + return original_read_text(self, **kwargs) + + monkeypatch.setattr(Path, "read_text", raise_on_ignore) + builder = GraphBuilder() + collected = builder._collect_files(repo_dir, "typescript") + # Ignore file had no effect; all 3 source files still collected + assert len(collected) == 3 + # =========================================================================== # C# parser tests diff --git a/uv.lock b/uv.lock index a9467d4..7cb0c14 100644 --- a/uv.lock +++ b/uv.lock @@ -129,10 +129,11 @@ wheels = [ [[package]] name = "codesteward-graph" -version = "0.2.2" +version = "0.3.0" source = { editable = "packages/codesteward-graph" } dependencies = [ { name = "neo4j" }, + { name = "pathspec" }, { name = "pyyaml" }, { name = "structlog" }, ] @@ -205,6 +206,7 @@ requires-dist = [ { name = "codesteward-graph", extras = ["graph-rust"], marker = "extra == 'graph-all'" }, { name = "codesteward-graph", extras = ["graph-scala"], marker = "extra == 'graph-all'" }, { name = "neo4j", specifier = ">=5.0" }, + { name = "pathspec", specifier = ">=1.0" }, { name = "pyyaml", specifier = ">=6.0" }, { name = "structlog", specifier = ">=24.0" }, { name = "tree-sitter", marker = "extra == 'graph'", specifier = ">=0.24" }, @@ -233,7 +235,7 @@ provides-extras = ["graph", "graph-csharp", "graph-kotlin", "graph-scala", "grap [[package]] name = "codesteward-mcp" -version = "0.2.2" +version = "0.3.0" source = { editable = "packages/codesteward-mcp" } dependencies = [ { name = "codesteward-graph" },