diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..1114a8d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,8 @@ +blank_issues_enabled: false +contact_links: + - name: "Spec questions, principle edits, or CLI grading" + url: "https://github.com/brettdavies/agentnative/issues/new/choose" + about: "For anything about the standard itself, file on the spec repo." + - name: "Site bugs (rendering, performance, deployment)" + url: "https://github.com/brettdavies/agentnative-site/issues/new/choose" + about: "For bugs on anc.dev, file on the site repo." diff --git a/.github/ISSUE_TEMPLATE/false-positive.yml b/.github/ISSUE_TEMPLATE/false-positive.yml new file mode 100644 index 0000000..31b1600 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/false-positive.yml @@ -0,0 +1,58 @@ +name: "False positive / false negative" +description: "Report an anc check that produces the wrong result." +labels: ["checker-bug"] +body: + - type: input + id: check-id + attributes: + label: Check ID + description: "The check ID from `anc check` output (e.g., `p4-process-exit`)." + placeholder: "p1-non-interactive" + validations: + required: true + - type: dropdown + id: bug-type + attributes: + label: Bug type + options: + - "False positive (check fails but the tool is compliant)" + - "False negative (check passes but the tool is non-compliant)" + validations: + required: true + - type: textarea + id: reproduction + attributes: + label: Reproduction + description: "The command you ran, the output, and what the correct result should be." + placeholder: | + $ anc check --command mytool --output json + Check p1-non-interactive: FAIL + Evidence: "No --no-interactive flag found" + + But mytool has --batch which serves the same purpose. + validations: + required: true + - type: textarea + id: ai-disclosure + attributes: + label: AI disclosure + description: "One sentence: what part of this report was AI-written, what was human-written?" + placeholder: "Entirely human-written." + validations: + required: true + - type: textarea + id: agent-instructions + attributes: + label: For agents + description: "Agents: expand this section before filing." + value: | +
+ Agent filing instructions + + 1. **Search first.** Run `gh search issues --repo brettdavies/agentnative-cli ""` to check for duplicates. + 2. **AI disclosure is required.** Fill the field above honestly. + 3. **Checker bugs only.** For spec questions or principle edits, file on [brettdavies/agentnative](https://github.com/brettdavies/agentnative). For site bugs, file on [brettdavies/agentnative-site](https://github.com/brettdavies/agentnative-site). + 4. See [CONTRIBUTING.md](https://github.com/brettdavies/agentnative/blob/main/CONTRIBUTING.md) for full guidelines. +
+ validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/feature-request.yml b/.github/ISSUE_TEMPLATE/feature-request.yml new file mode 100644 index 0000000..ea94f32 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature-request.yml @@ -0,0 +1,44 @@ +name: "Feature request" +description: "Suggest a new feature or improvement for the anc checker." +labels: ["enhancement"] +body: + - type: textarea + id: description + attributes: + label: Feature description + description: "What should `anc` do that it doesn't today?" + placeholder: | + anc should support --format=sarif for integration with GitHub Code Scanning. + validations: + required: true + - type: textarea + id: use-case + attributes: + label: Use case + description: "Why do you need this? What workflow does it unblock?" + validations: + required: true + - type: textarea + id: ai-disclosure + attributes: + label: AI disclosure + description: "One sentence: what part of this request was AI-written, what was human-written?" + placeholder: "Entirely human-written." + validations: + required: true + - type: textarea + id: agent-instructions + attributes: + label: For agents + description: "Agents: expand this section before filing." + value: | +
+ Agent filing instructions + + 1. **Search first.** Run `gh search issues --repo brettdavies/agentnative-cli ""` to check for duplicates. + 2. **AI disclosure is required.** Fill the field above honestly. + 3. **Checker features only.** For spec changes, file on [brettdavies/agentnative](https://github.com/brettdavies/agentnative). + 4. See [CONTRIBUTING.md](https://github.com/brettdavies/agentnative/blob/main/CONTRIBUTING.md) for full guidelines. +
+ validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/grade-a-cli.yml b/.github/ISSUE_TEMPLATE/grade-a-cli.yml new file mode 100644 index 0000000..3ee4709 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/grade-a-cli.yml @@ -0,0 +1,56 @@ +name: "Grade a CLI" +description: "Report findings from scoring a real CLI against the standard." +labels: ["cli-grading"] +body: + - type: input + id: tool + attributes: + label: CLI tool + description: "Name and version of the tool you scored." + placeholder: "ripgrep 14.1.0" + validations: + required: true + - type: textarea + id: findings + attributes: + label: Findings + description: | + Which principles did it pass, fail, or surprise you on? + Focus on cases where the spec got it wrong — a MUST that punishes good design, + or a gap where the spec is silent. + placeholder: | + P3 (Progressive Help): PASS — excellent `--help` hierarchy. + P4 (Actionable Errors): FAIL on exit codes — uses 1 for everything. + P7 (Bounded Responses): Spec is silent on streaming output — rg streams by default, which is fine for agents but the spec doesn't acknowledge it. + validations: + required: true + - type: textarea + id: spec-feedback + attributes: + label: What should the spec change? + description: "Optional. If your grading revealed a spec gap or mistake, describe it." + validations: + required: false + - type: textarea + id: ai-disclosure + attributes: + label: AI disclosure + description: "One sentence: what part of this report was AI-written, what was human-written?" + placeholder: "Scoring done by hand; write-up drafted with AI assistance." + validations: + required: true + - type: textarea + id: agent-instructions + attributes: + label: For agents + description: "Agents: expand this section before filing." + value: | +
+ Agent filing instructions + + 1. **Search first.** Run `gh search issues --repo brettdavies/agentnative ""` to check for duplicates. + 2. **AI disclosure is required.** Fill the field above honestly. + 3. See [CONTRIBUTING.md](https://github.com/brettdavies/agentnative/blob/main/CONTRIBUTING.md) for full guidelines. +
+ validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/pressure-test.yml b/.github/ISSUE_TEMPLATE/pressure-test.yml new file mode 100644 index 0000000..257e380 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/pressure-test.yml @@ -0,0 +1,87 @@ +name: "Pressure-test a principle" +description: "Propose a principle edit: merge, split, rewording, or requirement-tier change." +labels: ["pressure-test"] +body: + - type: dropdown + id: principle + attributes: + label: Principle + description: Which principle does this affect? + options: + - "P1 — Non-Interactive by Default" + - "P2 — Structured, Parseable Output" + - "P3 — Progressive Help Discovery" + - "P4 — Fail Fast with Actionable Errors" + - "P5 — Safe Retries and Explicit Mutation Boundaries" + - "P6 — Composable and Predictable Command Structure" + - "P7 — Bounded, High-Signal Responses" + - "Multiple / cross-cutting" + validations: + required: true + - type: dropdown + id: change-type + attributes: + label: Type of change + options: + - "Rewording (prose clarity, no requirement-tier change)" + - "Promotion (SHOULD to MUST, MAY to SHOULD)" + - "Demotion (MUST to SHOULD, SHOULD to MAY)" + - "Merge (combine two requirements)" + - "Split (break a requirement into two)" + - "New requirement" + - "Remove requirement" + validations: + required: true + - type: textarea + id: evidence + attributes: + label: Evidence + description: | + Name a real CLI, the specific MUST/SHOULD/MAY, and what happened. + The strongest proposals cite a tool where the current spec text produces the wrong outcome. + placeholder: | + Tool: gh (GitHub CLI) + Requirement: P4 MUST "distinct exit codes for distinct failure categories" + Observed: gh uses exit code 1 for everything — auth, network, not-found. + Problem: The current MUST is correct but underspecified — it doesn't say how many categories are enough. + validations: + required: true + - type: textarea + id: proposal + attributes: + label: Proposed change + description: What should the spec say instead? Quote the current text and show the diff. + validations: + required: true + - type: input + id: human-reviewer + attributes: + label: Human reviewer + description: "GitHub handle of the human who reviewed and approved this submission." + placeholder: "@username" + validations: + required: true + - type: textarea + id: ai-disclosure + attributes: + label: AI disclosure + description: "One sentence: what part of this submission was AI-written, what was human-written?" + placeholder: "Evidence gathered by hand; proposed wording drafted with Claude and edited." + validations: + required: true + - type: textarea + id: agent-instructions + attributes: + label: For agents + description: "Agents: expand this section before filing." + value: | +
+ Agent filing instructions + + 1. **Search first.** Run `gh search issues --repo brettdavies/agentnative ""` to check for duplicates. + 2. **AI disclosure is required.** Fill the field above honestly. + 3. **Human reviewer is required.** A human must review and approve spec change proposals before submission. + 4. See [CONTRIBUTING.md](https://github.com/brettdavies/agentnative/blob/main/CONTRIBUTING.md) for full guidelines. +
+ validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/scoring-bug.yml b/.github/ISSUE_TEMPLATE/scoring-bug.yml new file mode 100644 index 0000000..9e2d8bd --- /dev/null +++ b/.github/ISSUE_TEMPLATE/scoring-bug.yml @@ -0,0 +1,48 @@ +name: "Scoring bug" +description: "Report a bug in anc's scoring logic, output formatting, or CLI behavior." +labels: ["bug"] +body: + - type: textarea + id: description + attributes: + label: What happened? + description: "Describe the bug. Include the command, actual output, and expected output." + placeholder: | + $ anc check --command mytool --output json + Error: thread 'main' panicked at 'index out of bounds' + + Expected: valid JSON output or a clean error message. + validations: + required: true + - type: input + id: version + attributes: + label: anc version + description: "Output of `anc --version`." + placeholder: "agentnative 0.1.1" + validations: + required: true + - type: textarea + id: ai-disclosure + attributes: + label: AI disclosure + description: "One sentence: what part of this report was AI-written, what was human-written?" + placeholder: "Entirely human-written." + validations: + required: true + - type: textarea + id: agent-instructions + attributes: + label: For agents + description: "Agents: expand this section before filing." + value: | +
+ Agent filing instructions + + 1. **Search first.** Run `gh search issues --repo brettdavies/agentnative-cli ""` to check for duplicates. + 2. **AI disclosure is required.** Fill the field above honestly. + 3. **Checker bugs only.** For false positives/negatives specifically, use the "False positive / false negative" template instead. + 4. See [CONTRIBUTING.md](https://github.com/brettdavies/agentnative/blob/main/CONTRIBUTING.md) for full guidelines. +
+ validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/spec-question.yml b/.github/ISSUE_TEMPLATE/spec-question.yml new file mode 100644 index 0000000..693acf0 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/spec-question.yml @@ -0,0 +1,53 @@ +name: "Spec question" +description: "Ask a question about the agent-native CLI standard." +labels: ["question"] +body: + - type: dropdown + id: principle + attributes: + label: Principle (if applicable) + description: Which principle does this relate to? + options: + - "P1 — Non-Interactive by Default" + - "P2 — Structured, Parseable Output" + - "P3 — Progressive Help Discovery" + - "P4 — Fail Fast with Actionable Errors" + - "P5 — Safe Retries and Explicit Mutation Boundaries" + - "P6 — Composable and Predictable Command Structure" + - "P7 — Bounded, High-Signal Responses" + - "General / cross-cutting" + validations: + required: true + - type: textarea + id: question + attributes: + label: Question + description: "What would you like to know about the standard?" + placeholder: | + Does P4's MUST on exit codes apply to CLIs that use a single non-zero code + for all errors but include structured error details in stderr JSON? + validations: + required: true + - type: textarea + id: ai-disclosure + attributes: + label: AI disclosure + description: "One sentence: what part of this question was AI-written, what was human-written?" + placeholder: "Entirely human-written." + validations: + required: true + - type: textarea + id: agent-instructions + attributes: + label: For agents + description: "Agents: expand this section before filing." + value: | +
+ Agent filing instructions + + 1. **Search first.** Run `gh search issues --repo brettdavies/agentnative ""` to check for duplicates. + 2. **AI disclosure is required.** Fill the field above honestly. + 3. See [CONTRIBUTING.md](https://github.com/brettdavies/agentnative/blob/main/CONTRIBUTING.md) for full guidelines. +
+ validations: + required: false diff --git a/CLAUDE.md b/CLAUDE.md index 88a6d7f..d066abf 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -83,6 +83,64 @@ For cross-language pattern helpers, use `source::has_pattern_in()` / `source::fi `source::has_string_literal_in()` with a `Language` parameter — do not write private per-language helpers in individual check files. +## Principle Registry + +`src/principles/registry.rs` is the single source of truth linking spec requirements (MUSTs, SHOULDs, MAYs across P1–P7) +to the checks that verify them. IDs follow `p{N}-{level}-{key}` and are stable once published — scorecards and the +coverage matrix pin against them. + +- Add requirements by appending to the `REQUIREMENTS` static slice, grouped by principle then level (MUST → SHOULD → + MAY). +- Bumping `registry_size_matches_spec` or `level_counts_match_spec` is a deliberate act — the tests exist to flag + unintentional growth. Update both counter tests plus the summary prose in `docs/coverage-matrix.md` when the registry + grows. +- `Applicability::Universal` means every CLI; `Applicability::Conditional(reason)` names the gate in prose so the matrix + and the site `/coverage` page can render it. +- `ExceptionCategory` is reserved for v0.1.3 `audit_profile` suppression — do not consume before then. + +## covers() Declaration + +Each `Check` declares which requirements it evidences via `fn covers(&self) -> &'static [&'static str]`. The default +returns `&[]` — checks opt in explicitly. Return a static slice; never allocate. For a check that verifies multiple +requirements, list them all: + +```rust +fn covers(&self) -> &'static [&'static str] { + &["p1-must-no-interactive", "p1-should-tty-detection"] +} +``` + +The drift detector (`dangling_cover_ids` in `src/principles/matrix.rs`) fails the build if any ID returned by `covers()` +is missing from the registry — typos surface at test time, not at render time. + +## Coverage Matrix Artifact Lifecycle + +`anc generate coverage-matrix` emits two committed artifacts: + +- `docs/coverage-matrix.md` — human-readable table, grouped by principle. +- `coverage/matrix.json` — machine-readable (`schema_version: "1.0"`), consumed by the `agentnative-site` `/coverage` + page. + +Both files are tracked in git, not `.gitignore`d. `anc generate coverage-matrix --check` exits non-zero when the +committed artifacts disagree with the current registry + `covers()` declarations. The integration test +`test_generate_coverage_matrix_drift_check_passes_on_committed_artifacts` mirrors this behavior so CI catches drift from +either source. + +Regenerate whenever you add a requirement, change a check's `covers()`, or rename a check ID. The regeneration is a +deliberate commit, not a build-time artifact — the matrix is citable from outside this repo. + +## Scorecard v1.1 Fields + +`src/scorecard.rs` emits `schema_version: "1.1"` with three additions over the v1.0 shape: + +- `coverage_summary` — three-way `{must, should, may} × {total, verified}` counts, computed from the checks that + actually ran. Populated every run. +- `audience` — `Option`, serialized `null` until v0.1.3 wires the audience classifier. Reserved. +- `audit_profile` — `Option`, serialized `null` until v0.1.3 wires `registry.yaml` suppression. Reserved. + +Consumers (notably the site's `/score/` page) must feature-detect the new fields — pre-v1.1 scorecards lack +them until handoff 3 regenerates. + ## Dogfooding Safety Behavioral checks spawn the target binary as a child process. When dogfooding (`anc check .`), the target IS diff --git a/Cargo.toml b/Cargo.toml index 5cf0b75..7d6c97a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,8 +4,8 @@ version = "0.1.0" edition = "2024" description = "The agent-native CLI linter — check whether your CLI follows agent-readiness principles" license = "MIT OR Apache-2.0" -repository = "https://github.com/brettdavies/agentnative" -homepage = "https://github.com/brettdavies/agentnative" +repository = "https://github.com/brettdavies/agentnative-cli" +homepage = "https://anc.dev" documentation = "https://docs.rs/agentnative" keywords = ["cli", "linter", "agent", "ast-grep", "developer-tools"] categories = ["command-line-utilities", "development-tools"] diff --git a/README.md b/README.md index c3b203f..fc5ebe4 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ cargo install agentnative cargo binstall agentnative # Pre-built binaries from GitHub Releases -# https://github.com/brettdavies/agentnative/releases +# https://github.com/brettdavies/agentnative-cli/releases ``` ## Quick Start @@ -186,7 +186,7 @@ Produces a scorecard with results and summary: ## Contributing ```bash -git clone https://github.com/brettdavies/agentnative +git clone https://github.com/brettdavies/agentnative-cli cd agentnative cargo test cargo run -- check . diff --git a/RELEASES.md b/RELEASES.md index 3bcb96e..99dc94d 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -133,7 +133,7 @@ Steps for `v0.1.0`: 1. Verify your email on crates.io (`https://crates.io/settings/profile`). 2. `cargo publish` locally with `CARGO_REGISTRY_TOKEN` set. 3. Configure Trusted Publishing on crates.io: `https://crates.io/settings/tokens/trusted-publishing` → add - `brettdavies/agentnative`, workflow `release.yml`. + `brettdavies/agentnative-cli`, workflow `release.yml`. 4. Enable "Enforce Trusted Publishing" to block token-based publishes. 5. Remove the `CARGO_REGISTRY_TOKEN` repository secret. @@ -171,10 +171,10 @@ Edit the JSON locally, then sync to the remote: ```bash # First apply (creating a ruleset): -gh api -X POST repos/brettdavies/agentnative/rulesets --input .github/rulesets/protect-dev.json +gh api -X POST repos/brettdavies/agentnative-cli/rulesets --input .github/rulesets/protect-dev.json -# Subsequent updates (replace by ID — find via `gh api repos/brettdavies/agentnative/rulesets`): -gh api -X PUT repos/brettdavies/agentnative/rulesets/ --input .github/rulesets/protect-main.json +# Subsequent updates (replace by ID — find via `gh api repos/brettdavies/agentnative-cli/rulesets`): +gh api -X PUT repos/brettdavies/agentnative-cli/rulesets/ --input .github/rulesets/protect-main.json ``` Committing the JSON alongside code means ruleset changes land via the same review process as workflow changes — a @@ -192,7 +192,7 @@ Mixing these produces a stuck-but-green PR: all actual checks report green, but that will never appear. Confirm the real contexts after a first CI run with: ```bash -gh api repos/brettdavies/agentnative/commits//check-runs --jq '.check_runs[].name' +gh api repos/brettdavies/agentnative-cli/commits//check-runs --jq '.check_runs[].name' ``` ## Required secrets diff --git a/coverage/matrix.json b/coverage/matrix.json new file mode 100644 index 0000000..ebd6263 --- /dev/null +++ b/coverage/matrix.json @@ -0,0 +1,618 @@ +{ + "schema_version": "1.0", + "generated_by": "anc generate coverage-matrix", + "rows": [ + { + "id": "p1-must-env-var", + "principle": 1, + "level": "must", + "summary": "Every flag settable via environment variable (falsey-value parser for booleans).", + "applicability": { + "kind": "universal" + }, + "verifiers": [ + { + "check_id": "p1-env-flags-source", + "layer": "source" + } + ] + }, + { + "id": "p1-must-no-interactive", + "principle": 1, + "level": "must", + "summary": "`--no-interactive` flag gates every prompt library call; when set or stdin is not a TTY, use defaults/stdin or exit with an actionable error.", + "applicability": { + "kind": "universal" + }, + "verifiers": [ + { + "check_id": "p1-non-interactive", + "layer": "behavioral" + }, + { + "check_id": "p1-non-interactive-source", + "layer": "project" + } + ] + }, + { + "id": "p1-must-no-browser", + "principle": 1, + "level": "must", + "summary": "Headless authentication path (`--no-browser` / OAuth Device Authorization Grant).", + "applicability": { + "kind": "conditional", + "condition": "CLI authenticates against a remote service" + }, + "verifiers": [ + { + "check_id": "p1-headless-auth", + "layer": "source" + } + ] + }, + { + "id": "p1-should-tty-detection", + "principle": 1, + "level": "should", + "summary": "Auto-detect non-interactive context via TTY detection; suppress prompts when stderr is not a terminal.", + "applicability": { + "kind": "universal" + }, + "verifiers": [ + { + "check_id": "p1-tty-detection-source", + "layer": "source" + } + ] + }, + { + "id": "p1-should-defaults-in-help", + "principle": 1, + "level": "should", + "summary": "Document default values for prompted inputs in `--help` output.", + "applicability": { + "kind": "universal" + }, + "verifiers": [] + }, + { + "id": "p1-may-rich-tui", + "principle": 1, + "level": "may", + "summary": "Rich interactive experiences (spinners, progress bars, menus) when TTY is detected and `--no-interactive` is not set.", + "applicability": { + "kind": "universal" + }, + "verifiers": [] + }, + { + "id": "p2-must-output-flag", + "principle": 2, + "level": "must", + "summary": "`--output text|json|jsonl` flag selects output format; `OutputFormat` enum threaded through output paths.", + "applicability": { + "kind": "universal" + }, + "verifiers": [ + { + "check_id": "p2-json-output", + "layer": "behavioral" + }, + { + "check_id": "p2-structured-output", + "layer": "source" + } + ] + }, + { + "id": "p2-must-stdout-stderr-split", + "principle": 2, + "level": "must", + "summary": "Data goes to stdout; diagnostics/progress/warnings go to stderr — never interleaved.", + "applicability": { + "kind": "universal" + }, + "verifiers": [ + { + "check_id": "p2-output-module", + "layer": "source" + } + ] + }, + { + "id": "p2-must-exit-codes", + "principle": 2, + "level": "must", + "summary": "Exit codes are structured and documented (0 success, 1 general, 2 usage, 77 auth, 78 config).", + "applicability": { + "kind": "universal" + }, + "verifiers": [] + }, + { + "id": "p2-must-json-errors", + "principle": 2, + "level": "must", + "summary": "When `--output json` is active, errors are emitted as JSON (to stderr) with at least `error`, `kind`, and `message` fields.", + "applicability": { + "kind": "universal" + }, + "verifiers": [] + }, + { + "id": "p2-should-consistent-envelope", + "principle": 2, + "level": "should", + "summary": "JSON output uses a consistent envelope — a top-level object with predictable keys — across every command.", + "applicability": { + "kind": "universal" + }, + "verifiers": [] + }, + { + "id": "p2-may-more-formats", + "principle": 2, + "level": "may", + "summary": "Additional output formats (CSV, TSV, YAML) beyond the core three.", + "applicability": { + "kind": "universal" + }, + "verifiers": [] + }, + { + "id": "p2-may-raw-flag", + "principle": 2, + "level": "may", + "summary": "`--raw` flag for unformatted output suitable for piping to other tools.", + "applicability": { + "kind": "universal" + }, + "verifiers": [] + }, + { + "id": "p3-must-subcommand-examples", + "principle": 3, + "level": "must", + "summary": "Every subcommand ships at least one concrete invocation example (`after_help` in clap).", + "applicability": { + "kind": "conditional", + "condition": "CLI uses subcommands" + }, + "verifiers": [] + }, + { + "id": "p3-must-top-level-examples", + "principle": 3, + "level": "must", + "summary": "The top-level command ships 2–3 examples covering the primary use cases.", + "applicability": { + "kind": "universal" + }, + "verifiers": [ + { + "check_id": "p3-help", + "layer": "behavioral" + } + ] + }, + { + "id": "p3-should-paired-examples", + "principle": 3, + "level": "should", + "summary": "Examples show human and agent invocations side by side (text then `--output json` equivalent).", + "applicability": { + "kind": "universal" + }, + "verifiers": [] + }, + { + "id": "p3-should-about-long-about", + "principle": 3, + "level": "should", + "summary": "Short `about` for command-list summaries; `long_about` reserved for detailed descriptions visible with `--help`.", + "applicability": { + "kind": "universal" + }, + "verifiers": [] + }, + { + "id": "p3-may-examples-subcommand", + "principle": 3, + "level": "may", + "summary": "Dedicated `examples` subcommand or `--examples` flag for curated usage patterns.", + "applicability": { + "kind": "universal" + }, + "verifiers": [] + }, + { + "id": "p4-must-try-parse", + "principle": 4, + "level": "must", + "summary": "Parse arguments with `try_parse()` instead of `parse()` so `--output json` can emit JSON parse errors.", + "applicability": { + "kind": "universal" + }, + "verifiers": [ + { + "check_id": "p4-try-parse", + "layer": "source" + } + ] + }, + { + "id": "p4-must-exit-code-mapping", + "principle": 4, + "level": "must", + "summary": "Error types map to distinct exit codes (0, 1, 2, 77, 78).", + "applicability": { + "kind": "universal" + }, + "verifiers": [ + { + "check_id": "p4-bad-args", + "layer": "behavioral" + }, + { + "check_id": "p4-exit-codes", + "layer": "source" + } + ] + }, + { + "id": "p4-must-actionable-errors", + "principle": 4, + "level": "must", + "summary": "Every error message contains what failed, why, and what to do next.", + "applicability": { + "kind": "universal" + }, + "verifiers": [] + }, + { + "id": "p4-should-structured-enum", + "principle": 4, + "level": "should", + "summary": "Error types use a structured enum (via `thiserror` in Rust) with variant-to-kind mapping for JSON serialization.", + "applicability": { + "kind": "universal" + }, + "verifiers": [ + { + "check_id": "p4-error-module", + "layer": "project" + }, + { + "check_id": "p4-error-types", + "layer": "source" + } + ] + }, + { + "id": "p4-should-gating-before-network", + "principle": 4, + "level": "should", + "summary": "Config and auth validation happen before any network call (three-tier dependency gating).", + "applicability": { + "kind": "conditional", + "condition": "CLI makes network calls" + }, + "verifiers": [] + }, + { + "id": "p4-should-json-error-output", + "principle": 4, + "level": "should", + "summary": "Error output respects `--output json`: JSON-formatted errors go to stderr when JSON output is selected.", + "applicability": { + "kind": "universal" + }, + "verifiers": [] + }, + { + "id": "p5-must-force-yes", + "principle": 5, + "level": "must", + "summary": "Destructive operations (delete, overwrite, bulk modify) require an explicit `--force` or `--yes` flag.", + "applicability": { + "kind": "conditional", + "condition": "CLI has destructive operations" + }, + "verifiers": [] + }, + { + "id": "p5-must-read-write-distinction", + "principle": 5, + "level": "must", + "summary": "The distinction between read and write commands is clear from the command name and help text alone.", + "applicability": { + "kind": "conditional", + "condition": "CLI has both read and write operations" + }, + "verifiers": [] + }, + { + "id": "p5-must-dry-run", + "principle": 5, + "level": "must", + "summary": "A `--dry-run` flag is present on every write command; dry-run output respects `--output json`.", + "applicability": { + "kind": "conditional", + "condition": "CLI has write operations" + }, + "verifiers": [ + { + "check_id": "p5-dry-run", + "layer": "project" + } + ] + }, + { + "id": "p5-should-idempotency", + "principle": 5, + "level": "should", + "summary": "Write operations are idempotent where the domain allows it — running the same command twice produces the same result.", + "applicability": { + "kind": "conditional", + "condition": "CLI has write operations" + }, + "verifiers": [] + }, + { + "id": "p6-must-sigpipe", + "principle": 6, + "level": "must", + "summary": "SIGPIPE fix is the first executable statement in `main()` — piping output to `head`/`tail` must not panic.", + "applicability": { + "kind": "universal" + }, + "verifiers": [ + { + "check_id": "p6-sigpipe", + "layer": "behavioral" + } + ] + }, + { + "id": "p6-must-no-color", + "principle": 6, + "level": "must", + "summary": "TTY detection plus support for `NO_COLOR` and `TERM=dumb` — color codes suppressed when stdout/stderr is not a terminal.", + "applicability": { + "kind": "universal" + }, + "verifiers": [ + { + "check_id": "p6-no-color-behavioral", + "layer": "behavioral" + }, + { + "check_id": "p6-no-color", + "layer": "source" + }, + { + "check_id": "p6-no-color", + "layer": "source" + } + ] + }, + { + "id": "p6-must-completions", + "principle": 6, + "level": "must", + "summary": "Shell completions available via a `completions` subcommand (Tier 1 meta-command — needs no config/auth/network).", + "applicability": { + "kind": "universal" + }, + "verifiers": [ + { + "check_id": "p6-completions", + "layer": "project" + } + ] + }, + { + "id": "p6-must-timeout-network", + "principle": 6, + "level": "must", + "summary": "Network CLIs ship a `--timeout` flag with a sensible default (e.g., 30 seconds).", + "applicability": { + "kind": "conditional", + "condition": "CLI makes network calls" + }, + "verifiers": [ + { + "check_id": "p6-timeout", + "layer": "source" + } + ] + }, + { + "id": "p6-must-no-pager", + "principle": 6, + "level": "must", + "summary": "If the CLI uses a pager (`less`, `more`, `$PAGER`), it supports `--no-pager` or respects `PAGER=\"\"`.", + "applicability": { + "kind": "conditional", + "condition": "CLI invokes a pager for output" + }, + "verifiers": [ + { + "check_id": "p6-no-pager", + "layer": "source" + } + ] + }, + { + "id": "p6-must-global-flags", + "principle": 6, + "level": "must", + "summary": "Agentic flags (`--output`, `--quiet`, `--no-interactive`, `--timeout`) are `global = true` so they propagate to every subcommand.", + "applicability": { + "kind": "conditional", + "condition": "CLI uses subcommands" + }, + "verifiers": [ + { + "check_id": "p6-global-flags", + "layer": "source" + } + ] + }, + { + "id": "p6-should-stdin-input", + "principle": 6, + "level": "should", + "summary": "Commands that accept input read from stdin when no file argument is provided.", + "applicability": { + "kind": "conditional", + "condition": "CLI has commands that accept input data" + }, + "verifiers": [] + }, + { + "id": "p6-should-consistent-naming", + "principle": 6, + "level": "should", + "summary": "Subcommand naming follows a consistent `noun verb` or `verb noun` convention throughout the tool.", + "applicability": { + "kind": "conditional", + "condition": "CLI uses subcommands" + }, + "verifiers": [] + }, + { + "id": "p6-should-tier-gating", + "principle": 6, + "level": "should", + "summary": "Three-tier dependency gating: Tier 1 (meta) needs nothing, Tier 2 (local) needs config, Tier 3 (network) needs config + auth.", + "applicability": { + "kind": "universal" + }, + "verifiers": [] + }, + { + "id": "p6-should-subcommand-operations", + "principle": 6, + "level": "should", + "summary": "Operations are modeled as subcommands, not flags (`tool search \"q\"`, not `tool --search \"q\"`).", + "applicability": { + "kind": "conditional", + "condition": "CLI performs multiple distinct operations" + }, + "verifiers": [] + }, + { + "id": "p6-may-color-flag", + "principle": 6, + "level": "may", + "summary": "`--color auto|always|never` flag for explicit color control beyond TTY auto-detection.", + "applicability": { + "kind": "universal" + }, + "verifiers": [] + }, + { + "id": "p7-must-quiet", + "principle": 7, + "level": "must", + "summary": "A `--quiet` flag suppresses non-essential output; only requested data and errors appear.", + "applicability": { + "kind": "universal" + }, + "verifiers": [ + { + "check_id": "p7-quiet", + "layer": "behavioral" + } + ] + }, + { + "id": "p7-must-list-clamping", + "principle": 7, + "level": "must", + "summary": "List operations clamp to a sensible default maximum; when truncated, indicate it (`\"truncated\": true` in JSON, stderr note in text).", + "applicability": { + "kind": "conditional", + "condition": "CLI has list-style commands" + }, + "verifiers": [ + { + "check_id": "p7-output-clamping", + "layer": "source" + } + ] + }, + { + "id": "p7-should-verbose", + "principle": 7, + "level": "should", + "summary": "A `--verbose` flag (or `-v` / `-vv`) escalates diagnostic detail when agents need to debug failures.", + "applicability": { + "kind": "universal" + }, + "verifiers": [] + }, + { + "id": "p7-should-limit", + "principle": 7, + "level": "should", + "summary": "A `--limit` or `--max-results` flag lets callers request exactly the number of items they want.", + "applicability": { + "kind": "conditional", + "condition": "CLI has list-style commands" + }, + "verifiers": [] + }, + { + "id": "p7-should-timeout", + "principle": 7, + "level": "should", + "summary": "A `--timeout` flag bounds execution time so agents are not blocked indefinitely.", + "applicability": { + "kind": "universal" + }, + "verifiers": [] + }, + { + "id": "p7-may-cursor-pagination", + "principle": 7, + "level": "may", + "summary": "Cursor-based pagination flags (`--after`, `--before`) for efficient traversal of large result sets.", + "applicability": { + "kind": "conditional", + "condition": "CLI returns paginated results" + }, + "verifiers": [] + }, + { + "id": "p7-may-auto-verbosity", + "principle": 7, + "level": "may", + "summary": "Automatic verbosity reduction in non-TTY contexts (same behavior `--quiet` explicitly requests).", + "applicability": { + "kind": "universal" + }, + "verifiers": [] + } + ], + "summary": { + "total": 46, + "covered": 19, + "uncovered": 27, + "must": { + "total": 23, + "covered": 17 + }, + "should": { + "total": 16, + "covered": 2 + }, + "may": { + "total": 7, + "covered": 0 + } + } +} \ No newline at end of file diff --git a/docs/coverage-matrix.md b/docs/coverage-matrix.md new file mode 100644 index 0000000..98a54d0 --- /dev/null +++ b/docs/coverage-matrix.md @@ -0,0 +1,95 @@ +# Coverage Matrix + + + +This table maps every MUST, SHOULD, and MAY in the agent-native CLI spec to the `anc` checks that verify it. +When a requirement has no verifier, the cell reads **UNCOVERED** and the reader knows the scorecard cannot speak to it. + +## Summary + +- **Total**: 46 requirements (19 covered / 27 uncovered) +- **MUST**: 17 of 23 covered +- **SHOULD**: 2 of 16 covered +- **MAY**: 0 of 7 covered + +## P1: Non-Interactive by Default + +| ID | Level | Applicability | Verifier(s) | Summary | +| --- | --- | --- | --- | --- | +| `p1-must-env-var` | MUST | Universal | `p1-env-flags-source` (source) | Every flag settable via environment variable (falsey-value parser for booleans). | +| `p1-must-no-interactive` | MUST | Universal | `p1-non-interactive` (behavioral)
`p1-non-interactive-source` (project) | `--no-interactive` flag gates every prompt library call; when set or stdin is not a TTY, use defaults/stdin or exit with an actionable error. | +| `p1-must-no-browser` | MUST | If: CLI authenticates against a remote service | `p1-headless-auth` (source) | Headless authentication path (`--no-browser` / OAuth Device Authorization Grant). | +| `p1-should-tty-detection` | SHOULD | Universal | `p1-tty-detection-source` (source) | Auto-detect non-interactive context via TTY detection; suppress prompts when stderr is not a terminal. | +| `p1-should-defaults-in-help` | SHOULD | Universal | **UNCOVERED** | Document default values for prompted inputs in `--help` output. | +| `p1-may-rich-tui` | MAY | Universal | **UNCOVERED** | Rich interactive experiences (spinners, progress bars, menus) when TTY is detected and `--no-interactive` is not set. | + +## P2: Structured, Parseable Output + +| ID | Level | Applicability | Verifier(s) | Summary | +| --- | --- | --- | --- | --- | +| `p2-must-output-flag` | MUST | Universal | `p2-json-output` (behavioral)
`p2-structured-output` (source) | `--output text\|json\|jsonl` flag selects output format; `OutputFormat` enum threaded through output paths. | +| `p2-must-stdout-stderr-split` | MUST | Universal | `p2-output-module` (source) | Data goes to stdout; diagnostics/progress/warnings go to stderr — never interleaved. | +| `p2-must-exit-codes` | MUST | Universal | **UNCOVERED** | Exit codes are structured and documented (0 success, 1 general, 2 usage, 77 auth, 78 config). | +| `p2-must-json-errors` | MUST | Universal | **UNCOVERED** | When `--output json` is active, errors are emitted as JSON (to stderr) with at least `error`, `kind`, and `message` fields. | +| `p2-should-consistent-envelope` | SHOULD | Universal | **UNCOVERED** | JSON output uses a consistent envelope — a top-level object with predictable keys — across every command. | +| `p2-may-more-formats` | MAY | Universal | **UNCOVERED** | Additional output formats (CSV, TSV, YAML) beyond the core three. | +| `p2-may-raw-flag` | MAY | Universal | **UNCOVERED** | `--raw` flag for unformatted output suitable for piping to other tools. | + +## P3: Progressive Help Discovery + +| ID | Level | Applicability | Verifier(s) | Summary | +| --- | --- | --- | --- | --- | +| `p3-must-subcommand-examples` | MUST | If: CLI uses subcommands | **UNCOVERED** | Every subcommand ships at least one concrete invocation example (`after_help` in clap). | +| `p3-must-top-level-examples` | MUST | Universal | `p3-help` (behavioral) | The top-level command ships 2–3 examples covering the primary use cases. | +| `p3-should-paired-examples` | SHOULD | Universal | **UNCOVERED** | Examples show human and agent invocations side by side (text then `--output json` equivalent). | +| `p3-should-about-long-about` | SHOULD | Universal | **UNCOVERED** | Short `about` for command-list summaries; `long_about` reserved for detailed descriptions visible with `--help`. | +| `p3-may-examples-subcommand` | MAY | Universal | **UNCOVERED** | Dedicated `examples` subcommand or `--examples` flag for curated usage patterns. | + +## P4: Fail Fast, Actionable Errors + +| ID | Level | Applicability | Verifier(s) | Summary | +| --- | --- | --- | --- | --- | +| `p4-must-try-parse` | MUST | Universal | `p4-try-parse` (source) | Parse arguments with `try_parse()` instead of `parse()` so `--output json` can emit JSON parse errors. | +| `p4-must-exit-code-mapping` | MUST | Universal | `p4-bad-args` (behavioral)
`p4-exit-codes` (source) | Error types map to distinct exit codes (0, 1, 2, 77, 78). | +| `p4-must-actionable-errors` | MUST | Universal | **UNCOVERED** | Every error message contains what failed, why, and what to do next. | +| `p4-should-structured-enum` | SHOULD | Universal | `p4-error-module` (project)
`p4-error-types` (source) | Error types use a structured enum (via `thiserror` in Rust) with variant-to-kind mapping for JSON serialization. | +| `p4-should-gating-before-network` | SHOULD | If: CLI makes network calls | **UNCOVERED** | Config and auth validation happen before any network call (three-tier dependency gating). | +| `p4-should-json-error-output` | SHOULD | Universal | **UNCOVERED** | Error output respects `--output json`: JSON-formatted errors go to stderr when JSON output is selected. | + +## P5: Safe Retries, Mutation Boundaries + +| ID | Level | Applicability | Verifier(s) | Summary | +| --- | --- | --- | --- | --- | +| `p5-must-force-yes` | MUST | If: CLI has destructive operations | **UNCOVERED** | Destructive operations (delete, overwrite, bulk modify) require an explicit `--force` or `--yes` flag. | +| `p5-must-read-write-distinction` | MUST | If: CLI has both read and write operations | **UNCOVERED** | The distinction between read and write commands is clear from the command name and help text alone. | +| `p5-must-dry-run` | MUST | If: CLI has write operations | `p5-dry-run` (project) | A `--dry-run` flag is present on every write command; dry-run output respects `--output json`. | +| `p5-should-idempotency` | SHOULD | If: CLI has write operations | **UNCOVERED** | Write operations are idempotent where the domain allows it — running the same command twice produces the same result. | + +## P6: Composable, Predictable Command Structure + +| ID | Level | Applicability | Verifier(s) | Summary | +| --- | --- | --- | --- | --- | +| `p6-must-sigpipe` | MUST | Universal | `p6-sigpipe` (behavioral) | SIGPIPE fix is the first executable statement in `main()` — piping output to `head`/`tail` must not panic. | +| `p6-must-no-color` | MUST | Universal | `p6-no-color-behavioral` (behavioral)
`p6-no-color` (source)
`p6-no-color` (source) | TTY detection plus support for `NO_COLOR` and `TERM=dumb` — color codes suppressed when stdout/stderr is not a terminal. | +| `p6-must-completions` | MUST | Universal | `p6-completions` (project) | Shell completions available via a `completions` subcommand (Tier 1 meta-command — needs no config/auth/network). | +| `p6-must-timeout-network` | MUST | If: CLI makes network calls | `p6-timeout` (source) | Network CLIs ship a `--timeout` flag with a sensible default (e.g., 30 seconds). | +| `p6-must-no-pager` | MUST | If: CLI invokes a pager for output | `p6-no-pager` (source) | If the CLI uses a pager (`less`, `more`, `$PAGER`), it supports `--no-pager` or respects `PAGER=""`. | +| `p6-must-global-flags` | MUST | If: CLI uses subcommands | `p6-global-flags` (source) | Agentic flags (`--output`, `--quiet`, `--no-interactive`, `--timeout`) are `global = true` so they propagate to every subcommand. | +| `p6-should-stdin-input` | SHOULD | If: CLI has commands that accept input data | **UNCOVERED** | Commands that accept input read from stdin when no file argument is provided. | +| `p6-should-consistent-naming` | SHOULD | If: CLI uses subcommands | **UNCOVERED** | Subcommand naming follows a consistent `noun verb` or `verb noun` convention throughout the tool. | +| `p6-should-tier-gating` | SHOULD | Universal | **UNCOVERED** | Three-tier dependency gating: Tier 1 (meta) needs nothing, Tier 2 (local) needs config, Tier 3 (network) needs config + auth. | +| `p6-should-subcommand-operations` | SHOULD | If: CLI performs multiple distinct operations | **UNCOVERED** | Operations are modeled as subcommands, not flags (`tool search "q"`, not `tool --search "q"`). | +| `p6-may-color-flag` | MAY | Universal | **UNCOVERED** | `--color auto\|always\|never` flag for explicit color control beyond TTY auto-detection. | + +## P7: Bounded, High-Signal Responses + +| ID | Level | Applicability | Verifier(s) | Summary | +| --- | --- | --- | --- | --- | +| `p7-must-quiet` | MUST | Universal | `p7-quiet` (behavioral) | A `--quiet` flag suppresses non-essential output; only requested data and errors appear. | +| `p7-must-list-clamping` | MUST | If: CLI has list-style commands | `p7-output-clamping` (source) | List operations clamp to a sensible default maximum; when truncated, indicate it (`"truncated": true` in JSON, stderr note in text). | +| `p7-should-verbose` | SHOULD | Universal | **UNCOVERED** | A `--verbose` flag (or `-v` / `-vv`) escalates diagnostic detail when agents need to debug failures. | +| `p7-should-limit` | SHOULD | If: CLI has list-style commands | **UNCOVERED** | A `--limit` or `--max-results` flag lets callers request exactly the number of items they want. | +| `p7-should-timeout` | SHOULD | Universal | **UNCOVERED** | A `--timeout` flag bounds execution time so agents are not blocked indefinitely. | +| `p7-may-cursor-pagination` | MAY | If: CLI returns paginated results | **UNCOVERED** | Cursor-based pagination flags (`--after`, `--before`) for efficient traversal of large result sets. | +| `p7-may-auto-verbosity` | MAY | Universal | **UNCOVERED** | Automatic verbosity reduction in non-TTY contexts (same behavior `--quiet` explicitly requests). | + diff --git a/docs/plans/2026-04-02-002-feat-release-infrastructure-plan.md b/docs/plans/2026-04-02-002-feat-release-infrastructure-plan.md index 84c318a..2e73d22 100644 --- a/docs/plans/2026-04-02-002-feat-release-infrastructure-plan.md +++ b/docs/plans/2026-04-02-002-feat-release-infrastructure-plan.md @@ -1,7 +1,7 @@ --- title: "feat: Release infrastructure — completions, RELEASING.md, changelog, Homebrew formula" type: feat -status: active +status: complete date: 2026-04-02 origin: ~/.gstack/projects/brettdavies-agentnative/brett-main-design-20260327-214808.md --- diff --git a/docs/plans/2026-04-20-v011-handoff-1-agentnative-impl.md b/docs/plans/2026-04-20-v011-handoff-1-agentnative-impl.md new file mode 100644 index 0000000..3d318a1 --- /dev/null +++ b/docs/plans/2026-04-20-v011-handoff-1-agentnative-impl.md @@ -0,0 +1,115 @@ +--- +title: "Handoff 1 of 5: v0.1.1 agentnative implementation" +type: handoff +order: 1 +phase: v0.1.1 +status: in-progress +depends_on: [] +blocks: [2, 3] +--- + +# Handoff 1: v0.1.1 agentnative implementation + +**Written for**: the session that picks up the Rust implementation of v0.1.1 after the doctrine review closed on +2026-04-20. This is the first and largest handoff. + +## Sibling handoffs + +| # | Phase | Repo | Doc | +|---|--------|--------------------|----------------------------------------------------------------------------------| +| 1 | v0.1.1 | `agentnative` | `docs/plans/2026-04-20-v011-handoff-1-agentnative-impl.md` *(this doc)* | +| 2 | v0.1.1 | `agentnative-site` | `docs/plans/2026-04-20-v011-handoff-2-site-spec-coverage.md` (+ session brief) | +| 3 | v0.1.1 | `agentnative-site` | `docs/plans/2026-04-20-v011-handoff-3-scorecard-regen.md` | +| 4 | v0.1.2 | `agentnative` | `docs/plans/2026-04-20-v012-handoff-4-behavioral-checks.md` | +| 5 | v0.1.3 | `agentnative-site` | `docs/plans/2026-04-20-v013-handoff-5-audience-leaderboard.md` | + +## The job, in one sentence + +Build the `PrincipleRegistry` + matrix generator + Check trait `covers()` addition + miscategorized-check renames + P1 +applicability fix, all in `agentnative` (Rust), on a new feature branch. + +## Read these first (authoritative sources) + +1. `~/.gstack/projects/brettdavies-agentnative/ceo-plans/2026-04-20-p1-doctrine-spec-coverage.md` — the plan. Read the + "Eng Review Amendments" section at the bottom FIRST; it corrects several claims in the main body. +2. `~/.gstack/projects/brettdavies-agentnative/brett-dev-eng-review-test-plan-20260420-132817.md` — test plan. +3. `docs/plans/2026-04-17-p1-non-interactive-check-gap.md` — context on why this work exists. + +Do not re-read the CEO-review transcripts or the pre-doctrine spike; everything actionable is in the two files above. + +## Scope (what ships in this PR) + +1. **New module `src/principles/`**: + +- `registry.rs` — flat `&'static [Requirement]` array covering MUSTs + SHOULDs + MAYs (~46 entries). Types: + `Requirement`, `Level { Must, Should, May }`, `Applicability { Universal, Conditional(&'static str) }`, + `ExceptionCategory`. +- `matrix.rs` — generator emitting `docs/coverage-matrix.md` + `coverage/matrix.json`. +- `mod.rs` — public API surface. + +1. **`Check` trait gains one method**: `fn covers(&self) -> &'static [&'static str]` (requirement IDs, empty by + default). +2. **Miscategorized check renames** (identify during implementation; at minimum): + +- `p6-tty-detection` → `p1-tty-detection-source` (verifies P1 SHOULD) +- `p6-env-flags` → `p1-env-flags-source` (verifies P1 MUST) +- Audit every existing check ID; rename any whose `group()` contradicts what the check actually verifies. + +1. **New CLI subcommand** `anc generate coverage-matrix` with `--out`, `--json-out`, `--check` (drift check for CI). +2. **P1 applicability gate fix**: update `src/checks/behavioral/non_interactive.rs` to pass when any of: + help-on-bare-invocation, agentic flag present, stdin-as-primary-input. Blocks the dogfood break. +3. **Scorecard JSON v1.1 fields**: add `coverage_summary { must, should, may }`, `audience`, `audit_profile` to the + top-level scorecard output. Do NOT touch the existing `layer` field — it already exists. +4. **Tests**: unit + golden-file per the test plan. Registry validation tests (every covers() ID resolves; every MUST + has a check or exception; IDs unique). + +## Out of scope (do NOT touch in this PR) + +- New behavioral checks (`p1-flag-existence`, `p1-env-hints`, `p6-no-pager-behavioral`) — that's handoff 4, v0.1.2. +- Audience classifier logic beyond the JSON field stub — that's handoff 5, v0.1.3. +- Spec text edits in `agentnative-site` — that's handoff 2. +- Scorecard regeneration for the 10 existing tools — that's handoff 3. +- Python source coverage expansion — explicitly de-scoped. + +## Branch + workflow + +- Branch off `dev`: `feat/v011-principle-registry-and-coverage`. +- User's global rule: never commit to `dev`/`main` directly; always via PR. This is a hard rule. +- Pre-push hook mirrors CI (fmt, clippy -Dwarnings, test, cargo-deny, Windows compat). Run `git config core.hooksPath + scripts/hooks` if not already set. +- PR target: `dev`. After merge, tag `v0.1.1` only when handoff 2 (spec text) has also merged. + +## Definition of done + +- [x] All existing tests still pass +- [x] New unit tests per test plan (+17 unit, +2 integration vs. 304-test baseline) +- [x] `cargo run -- check .` on the agentnative repo itself passes all checks (dogfood: 26 pass / 2 warn / 0 fail / 2 + skip) +- [x] `cargo run -- generate coverage-matrix --check` passes (no drift — exit 0) +- [x] `docs/coverage-matrix.md` committed +- [x] `coverage/matrix.json` committed (`schema_version: "1.0"`, 46 rows, 19 covered / 27 uncovered) +- [x] Scorecard JSON shape matches v1.1 (`coverage_summary` populated, `audience` + `audit_profile` null until v0.1.3) +- [x] CLAUDE.md updated with registry + `covers()` + matrix-lifecycle + scorecard-v1.1 conventions (commit `1509331`) + +## Known gotchas + +- `Check` trait `layer()`, `CheckLayer` enum, and scorecard `layer` field **already exist**. Do NOT add them — the CEO + plan mistakenly claimed they were new. +- Rust toolchain pinned via `rust-toolchain.toml`. Do NOT `rustup update` during this work. +- `ast-grep-core` and `ast-grep-language` pinned to `=0.42.0`. Do NOT bump. +- Pre-1.0 dependency pins are load-bearing; respect them. +- Check ID renames break the 10 existing scorecards in `agentnative-site/scorecards/*.json`. That's handoff 3's problem; + document the rename map in this PR's description for handoff 3 to consume. + +## Progress + +Scope complete; PR #21 open against `dev` with CI green. All DoD items checked against commit `1509331` (debug +build smoke test — see todo 011 for record). Final pre-merge validation (release build + re-run after any subsequent +commits) still pending before merge. + +Flip this plan's `status` to `complete` when PR #21 merges. + +## After this PR merges + +Handoff 2 (site spec text + `/coverage` page) can begin. Handoff 3 (scorecard regeneration) waits for both handoff 1 and +handoff 2 to merge. diff --git a/docs/plans/2026-04-20-v012-handoff-4-behavioral-checks.md b/docs/plans/2026-04-20-v012-handoff-4-behavioral-checks.md new file mode 100644 index 0000000..6d90f8d --- /dev/null +++ b/docs/plans/2026-04-20-v012-handoff-4-behavioral-checks.md @@ -0,0 +1,103 @@ +--- +title: "Handoff 4 of 5: v0.1.2 new behavioral checks + HelpOutput cache" +type: handoff +order: 4 +phase: v0.1.2 +depends_on: [1, 2, 3] +blocks: [5] +--- + +# Handoff 4: v0.1.2 new behavioral checks + +**Written for**: the session building the three new behavioral checks that land after v0.1.1 is fully shipped and +stable. This is net-new verification code; the registry + coverage infrastructure already exists. + +## Sibling handoffs + +| # | Phase | Repo | Doc | +|---|--------|--------------------|----------------------------------------------------------------------------------| +| 1 | v0.1.1 | `agentnative` | `docs/plans/2026-04-20-v011-handoff-1-agentnative-impl.md` | +| 2 | v0.1.1 | `agentnative-site` | `docs/plans/2026-04-20-v011-handoff-2-site-spec-coverage.md` (+ session brief) | +| 3 | v0.1.1 | `agentnative-site` | `docs/plans/2026-04-20-v011-handoff-3-scorecard-regen.md` | +| 4 | v0.1.2 | `agentnative` | `docs/plans/2026-04-20-v012-handoff-4-behavioral-checks.md` *(this doc)* | +| 5 | v0.1.3 | `agentnative-site` | `docs/plans/2026-04-20-v013-handoff-5-audience-leaderboard.md` | + +## The job, in one sentence + +Build `p1-flag-existence`, `p1-env-hints`, `p6-no-pager-behavioral` as behavioral checks, and introduce a shared +`HelpOutput` cache so all three share a single ` --help` probe per tool. + +## Read these first + +1. `~/.gstack/projects/brettdavies-agentnative/ceo-plans/2026-04-20-p1-doctrine-spec-coverage.md` — "Accepted Scope + (v0.1.2)" section and the "Eng Review Amendments" section (for why this set shrunk to 3 checks). +2. `src/principles/registry.rs` (will exist after handoff 1) — the MUSTs these checks verify. +3. `src/checks/behavioral/non_interactive.rs` — example of the existing behavioral check pattern. + +Do NOT re-read doctrine or review transcripts. + +## Scope + +- **`src/runner/help_probe.rs` (new)** — `HelpOutput` struct that spawns ` --help` once and exposes lazy cached + parse views: `flags()`, `env_hints()`, `subcommands()`. Runner passes a shared `Arc` into each behavioral + check that needs it. +- **`src/checks/behavioral/flag_existence.rs`** — new check `p1-flag-existence`. Scans parsed flags for any of: + `--no-interactive`, `-p`, `--print`, `--no-input`, `--batch`, `--headless`, `-y`, `--yes`, `--assume-yes`. Pass if at + least one exists. Skip (applicability false) if the target satisfies P1's alternative gates (stdin-primary, + help-on-bare). Warn otherwise with documented false-positive/negative conditions. +- **`src/checks/behavioral/env_hints.rs`** — new check `p1-env-hints`. Scans `--help` for clap-style `[env: FOO]` hints + OR bash-style `$FOO` / `TOOL_FOO` mentions near flag definitions. Pass if present; Warn if flags exist but no env + hints; Skip if no flags exist. +- **`src/checks/behavioral/no_pager_behavioral.rs`** — new check `p6-no-pager-behavioral`. Pass if `--no-pager` flag + detected in `--help`. Skip if no `pager` / `less` / `$PAGER` mentions. Warn if pager is mentioned but no `--no-pager` + escape hatch. +- **Confidence field**: each check emits `confidence: "high" | "medium" | "low"` in its `CheckResult`. Regex-based + probes on short flag lists = high; heuristic mentions = medium; inferences = low. +- **Register all three checks in the registry** (add entries to `src/principles/registry.rs` linking the requirement IDs + they cover). +- **Update `docs/coverage-matrix.md` via `anc generate coverage-matrix`** — it should reflect the new coverage (these + requirements move from "source-only" to "verified at both layers" or "newly verified behaviorally"). +- **Tests per the test plan artifact** at + `~/.gstack/projects/brettdavies-agentnative/brett-dev-eng-review-test-plan-20260420-132817.md`. Happy path, + Skip-applicability, Warn-missing, and one non-English-help exception test per check. + +## Out of scope (explicitly cut from v0.1.2) + +- `p1-headless-auth-behavioral` — cut. Source-layer `p1-headless-auth` is authoritative; binary-only targets get + "source-only verification" disclaimer. +- `p5-dry-run-behavioral` — cut. Write-verb heuristic is too fragile. +- `p6-timeout-behavioral` — cut. Network-touching classification too fragile. +- Audience classifier / banner — that's handoff 5 (v0.1.3). + +If a future PR revisits any of these cuts, do it as its own plan, not here. + +## Branch + workflow + +- Branch off `dev` in `/home/brett/dev/agentnative`: `feat/v012-behavioral-check-expansion`. +- PR target: `dev`. Tag `v0.1.2` after merge. +- Pre-push hook runs CI-equivalent; respect its output. + +## Definition of done + +- [ ] All three checks have full unit-test coverage (happy + Skip + Warn + exception) +- [ ] `HelpOutput` has its own unit tests for each lazy parser +- [ ] `anc check ` on the existing validation targets (`ripgrep`, `bird`, `xurl-rs`) produces sensible verdicts + for the new checks +- [ ] Coverage matrix regenerated; committed diff shows the new checks picking up their requirements +- [ ] Dogfood: `anc check .` on the agentnative repo itself passes all new checks +- [ ] Regenerate the 10 committed scorecards in `agentnative-site` (small follow-on PR there — or bundle into this + release's landing sequence) + +## Known gotchas + +- `HelpOutput` must be cached per-tool-per-invocation, NOT globally. State leak between different target tools would + produce wrong verdicts. +- Heuristic false-positive patterns documented in `docs/coverage-matrix.md` exceptions section. If a new false-positive + surfaces during validation, add it to the doc in this PR — don't leave for later. +- Non-English `--help` output: regexes are English-only. This is a named exception in the coverage matrix. Do NOT try to + handle localized help in this PR. + +## After this PR merges + +v0.1.2 is done. Handoff 5 (v0.1.3 audience detector + leaderboard) can begin once the 100-tool registry baseline +prerequisite is met. diff --git a/src/check.rs b/src/check.rs index b616f15..7cb9aa0 100644 --- a/src/check.rs +++ b/src/check.rs @@ -17,4 +17,11 @@ pub trait Check { /// Run the check against the project. fn run(&self, project: &Project) -> anyhow::Result; + + /// Requirement IDs (from `crate::principles::REQUIREMENTS`) that this + /// check verifies. Empty by default so checks opt in explicitly. + /// The registry validator fails if an ID here is not registered. + fn covers(&self) -> &'static [&'static str] { + &[] + } } diff --git a/src/checks/behavioral/bad_args.rs b/src/checks/behavioral/bad_args.rs index 25cfec8..0b6c983 100644 --- a/src/checks/behavioral/bad_args.rs +++ b/src/checks/behavioral/bad_args.rs @@ -18,6 +18,10 @@ impl Check for BadArgsCheck { CheckLayer::Behavioral } + fn covers(&self) -> &'static [&'static str] { + &["p4-must-exit-code-mapping"] + } + fn applicable(&self, project: &Project) -> bool { project.runner.is_some() } diff --git a/src/checks/behavioral/help.rs b/src/checks/behavioral/help.rs index 37a3fc0..41da62e 100644 --- a/src/checks/behavioral/help.rs +++ b/src/checks/behavioral/help.rs @@ -18,6 +18,10 @@ impl Check for HelpCheck { CheckLayer::Behavioral } + fn covers(&self) -> &'static [&'static str] { + &["p3-must-top-level-examples"] + } + fn applicable(&self, project: &Project) -> bool { project.runner.is_some() } diff --git a/src/checks/behavioral/json_output.rs b/src/checks/behavioral/json_output.rs index c324246..26246c3 100644 --- a/src/checks/behavioral/json_output.rs +++ b/src/checks/behavioral/json_output.rs @@ -18,6 +18,10 @@ impl Check for JsonOutputCheck { CheckLayer::Behavioral } + fn covers(&self) -> &'static [&'static str] { + &["p2-must-output-flag"] + } + fn applicable(&self, project: &Project) -> bool { project.runner.is_some() } diff --git a/src/checks/behavioral/no_color.rs b/src/checks/behavioral/no_color.rs index 97a1498..0724d0b 100644 --- a/src/checks/behavioral/no_color.rs +++ b/src/checks/behavioral/no_color.rs @@ -18,6 +18,10 @@ impl Check for NoColorBehavioralCheck { CheckLayer::Behavioral } + fn covers(&self) -> &'static [&'static str] { + &["p6-must-no-color"] + } + fn applicable(&self, project: &Project) -> bool { project.runner.is_some() } diff --git a/src/checks/behavioral/non_interactive.rs b/src/checks/behavioral/non_interactive.rs index f9fe6c6..59fc6c4 100644 --- a/src/checks/behavioral/non_interactive.rs +++ b/src/checks/behavioral/non_interactive.rs @@ -3,6 +3,30 @@ use crate::project::Project; use crate::runner::RunStatus; use crate::types::{CheckGroup, CheckLayer, CheckResult, CheckStatus}; +/// Agentic flag markers in `--help` output that signal the tool exposes a +/// headless path. Matching any one satisfies P1's "no blocking-interactive +/// surface" requirement even if bare invocation doesn't itself exit cleanly. +const AGENTIC_FLAG_MARKERS: &[&str] = &[ + "--no-interactive", + "--non-interactive", + "--batch", + "--headless", + "--yes", + "--no-input", + "--no-browser", + "--device-code", + "-y,", + "-y ", + " -p,", + " -p ", + "--print", +]; + +/// Help-output markers on the bare invocation. `arg_required_else_help` +/// in clap prints a "Usage:" block and exits non-zero — this is the +/// canonical non-interactive-by-default CLI shape. +const HELP_ON_BARE_MARKERS: &[&str] = &["Usage:", "USAGE:", "usage:"]; + pub struct NonInteractiveCheck; impl Check for NonInteractiveCheck { @@ -18,6 +42,10 @@ impl Check for NonInteractiveCheck { CheckLayer::Behavioral } + fn covers(&self) -> &'static [&'static str] { + &["p1-must-no-interactive"] + } + fn applicable(&self, project: &Project) -> bool { project.runner.is_some() } @@ -25,23 +53,47 @@ impl Check for NonInteractiveCheck { fn run(&self, project: &Project) -> anyhow::Result { let runner = project.runner_ref(); - // Test P1: binary must not block waiting for interactive input. + // P1 Option-ε gate: the check passes when ANY of three conditions + // evidences agent-safe behavior: + // 1. help-on-bare-invocation — binary prints Usage and exits + // (clap `arg_required_else_help`). This is what `anc` itself + // does; without this clause the linter warns itself. + // 2. agentic-flag-present — `--help` advertises `--no-interactive` + // (or equivalent). The tool honors non-interactive callers + // even if bare invocation does something else. + // 3. stdin-as-primary-input — binary exits cleanly when stdin is + // /dev/null. POSIX utilities (jq, sed) satisfy P1 vacuously. // - // BinaryRunner sets stdin to /dev/null, so binaries that read stdin - // (like cat) get EOF immediately and exit — no blocking. Well-behaved - // CLIs print help on bare invocation (arg_required_else_help), so - // this probe is safe even when the target is agentnative itself. - let result = runner.run(&[], &[]); - - let status = match result.status { - RunStatus::Timeout => { + // BinaryRunner already pipes /dev/null as stdin, so the probe is + // safe even when the target is agentnative itself. + let bare = runner.run(&[], &[]); + let bare_output = format!("{}{}", bare.stdout, bare.stderr); + let help_on_bare = matches_any(&bare_output, HELP_ON_BARE_MARKERS); + + let help = runner.run(&["--help"], &[]); + let help_output = format!("{}{}", help.stdout, help.stderr); + let agentic_flag = matches_any(&help_output, AGENTIC_FLAG_MARKERS); + + let stdin_clean_exit = matches!(bare.status, RunStatus::Ok); + + let status = match bare.status { + RunStatus::Timeout if !agentic_flag => { CheckStatus::Warn("binary may be waiting for interactive input".into()) } - RunStatus::Ok => CheckStatus::Pass, - RunStatus::Crash { signal } => CheckStatus::Warn(format!( + RunStatus::Crash { signal } if !agentic_flag => CheckStatus::Warn(format!( "binary crashed on bare invocation (signal {signal})" )), - _ => CheckStatus::Pass, + _ => { + if help_on_bare || agentic_flag || stdin_clean_exit { + CheckStatus::Pass + } else { + // Bare exited without a status that clearly evidences + // agent-safety. Surface as Warn so the operator sees it. + CheckStatus::Warn( + "no help-on-bare, agentic flag, or clean-exit signal detected".into(), + ) + } + } }; Ok(CheckResult { @@ -54,10 +106,14 @@ impl Check for NonInteractiveCheck { } } +fn matches_any(haystack: &str, needles: &[&str]) -> bool { + needles.iter().any(|n| haystack.contains(n)) +} + #[cfg(test)] mod tests { use super::*; - use crate::checks::behavioral::tests::test_project_with_runner; + use crate::checks::behavioral::tests::{test_project_with_runner, test_project_with_sh_script}; use crate::types::CheckStatus; #[test] @@ -75,11 +131,54 @@ mod tests { } #[test] - fn non_interactive_handles_crash() { - let project = crate::checks::behavioral::tests::test_project_with_sh_script("kill -11 $$"); + fn non_interactive_handles_crash_without_agentic_flag() { + let project = test_project_with_sh_script("kill -11 $$"); let result = NonInteractiveCheck .run(&project) .expect("check should not panic on crash"); assert!(matches!(result.status, CheckStatus::Warn(_))); } + + #[test] + fn non_interactive_passes_when_bare_prints_usage() { + // Simulates a clap-style `arg_required_else_help` binary: exits + // non-zero and writes Usage to stderr. This is the dogfood shape. + let script = r#" +if [ "$1" = "--help" ]; then + echo "Usage: myapp [OPTIONS]" + exit 0 +fi +echo "Usage: myapp [OPTIONS]" >&2 +exit 2 +"#; + let project = test_project_with_sh_script(script); + let result = NonInteractiveCheck.run(&project).expect("check should run"); + assert_eq!(result.status, CheckStatus::Pass); + } + + #[test] + fn non_interactive_passes_when_help_advertises_agentic_flag() { + // Simulates a tool where bare invocation does something non-obvious + // but `--help` advertises `--no-interactive` — that's the contract. + let script = r#" +if [ "$1" = "--help" ]; then + echo "Usage: foo [--no-interactive]" + exit 0 +fi +echo "running default action" +"#; + let project = test_project_with_sh_script(script); + let result = NonInteractiveCheck.run(&project).expect("check should run"); + assert_eq!(result.status, CheckStatus::Pass); + } + + #[test] + fn matches_any_finds_marker() { + assert!(matches_any("Usage: foo [OPTIONS]", HELP_ON_BARE_MARKERS)); + assert!(matches_any( + " --no-interactive skip prompts", + AGENTIC_FLAG_MARKERS + )); + assert!(!matches_any("just some text", AGENTIC_FLAG_MARKERS)); + } } diff --git a/src/checks/behavioral/quiet.rs b/src/checks/behavioral/quiet.rs index 1537969..2058f8f 100644 --- a/src/checks/behavioral/quiet.rs +++ b/src/checks/behavioral/quiet.rs @@ -18,6 +18,10 @@ impl Check for QuietCheck { CheckLayer::Behavioral } + fn covers(&self) -> &'static [&'static str] { + &["p7-must-quiet"] + } + fn applicable(&self, project: &Project) -> bool { project.runner.is_some() } diff --git a/src/checks/behavioral/sigpipe.rs b/src/checks/behavioral/sigpipe.rs index 399e32e..cf42351 100644 --- a/src/checks/behavioral/sigpipe.rs +++ b/src/checks/behavioral/sigpipe.rs @@ -18,6 +18,10 @@ impl Check for SigpipeCheck { CheckLayer::Behavioral } + fn covers(&self) -> &'static [&'static str] { + &["p6-must-sigpipe"] + } + fn applicable(&self, project: &Project) -> bool { project.runner.is_some() } diff --git a/src/checks/mod.rs b/src/checks/mod.rs index 0ab8c57..a370a28 100644 --- a/src/checks/mod.rs +++ b/src/checks/mod.rs @@ -1,3 +1,18 @@ pub mod behavioral; pub mod project; pub mod source; + +use crate::check::Check; +use crate::project::Language; + +/// Every check the linter can run, across every language dispatch. Used by +/// the matrix generator so the coverage artifact reflects the full catalog +/// regardless of what project `anc` currently has in hand. +pub fn all_checks_catalog() -> Vec> { + let mut all: Vec> = Vec::new(); + all.extend(behavioral::all_behavioral_checks()); + all.extend(project::all_project_checks()); + all.extend(source::all_source_checks(Language::Rust)); + all.extend(source::all_source_checks(Language::Python)); + all +} diff --git a/src/checks/project/completions.rs b/src/checks/project/completions.rs index fa8d806..6873b34 100644 --- a/src/checks/project/completions.rs +++ b/src/checks/project/completions.rs @@ -24,6 +24,10 @@ impl Check for CompletionsCheck { CheckLayer::Project } + fn covers(&self) -> &'static [&'static str] { + &["p6-must-completions"] + } + fn applicable(&self, project: &Project) -> bool { project.path.is_dir() && project.language == Some(Language::Rust) diff --git a/src/checks/project/dry_run.rs b/src/checks/project/dry_run.rs index 50935e5..a1dbe5f 100644 --- a/src/checks/project/dry_run.rs +++ b/src/checks/project/dry_run.rs @@ -53,6 +53,10 @@ impl Check for DryRunCheck { CheckLayer::Project } + fn covers(&self) -> &'static [&'static str] { + &["p5-must-dry-run"] + } + fn applicable(&self, project: &Project) -> bool { project.path.is_dir() && project.language.is_some() } diff --git a/src/checks/project/error_module.rs b/src/checks/project/error_module.rs index 20bc21d..aaac36e 100644 --- a/src/checks/project/error_module.rs +++ b/src/checks/project/error_module.rs @@ -24,6 +24,10 @@ impl Check for ErrorModuleCheck { CheckLayer::Project } + fn covers(&self) -> &'static [&'static str] { + &["p4-should-structured-enum"] + } + fn applicable(&self, project: &Project) -> bool { project.path.is_dir() && project.language.is_some() } diff --git a/src/checks/project/non_interactive.rs b/src/checks/project/non_interactive.rs index 40b92ae..bef1849 100644 --- a/src/checks/project/non_interactive.rs +++ b/src/checks/project/non_interactive.rs @@ -27,6 +27,10 @@ impl Check for NonInteractiveSourceCheck { CheckLayer::Project } + fn covers(&self) -> &'static [&'static str] { + &["p1-must-no-interactive"] + } + fn applicable(&self, project: &Project) -> bool { project.path.is_dir() && project.language == Some(Language::Rust) diff --git a/src/checks/source/python/no_color.rs b/src/checks/source/python/no_color.rs index 2d8d732..319ead8 100644 --- a/src/checks/source/python/no_color.rs +++ b/src/checks/source/python/no_color.rs @@ -32,6 +32,10 @@ impl Check for NoColorPythonCheck { CheckLayer::Source } + fn covers(&self) -> &'static [&'static str] { + &["p6-must-no-color"] + } + fn applicable(&self, project: &Project) -> bool { project.language == Some(Language::Python) } diff --git a/src/checks/source/rust/env_flags.rs b/src/checks/source/rust/env_flags.rs index cfc2bf4..718cc29 100644 --- a/src/checks/source/rust/env_flags.rs +++ b/src/checks/source/rust/env_flags.rs @@ -1,8 +1,10 @@ //! Check: Detect agentic clap flags missing `env = "..."` attribute. //! -//! Principle: P6 (Composable Structure) — Agentic flags (output, quiet, verbose, -//! timeout, no-color, format) should be configurable via environment variables -//! so agents can set defaults without passing flags every invocation. +//! Principle: P1 (Non-Interactive by Default) MUST — "Every flag settable +//! via environment variable." Agentic flags (output, quiet, verbose, timeout, +//! no-color, format) should all have env-var bindings so agents can set +//! defaults without passing flags every invocation. Renamed from +//! `p6-env-flags` in v0.1.1 — the spec requirement lives in P1, not P6. use ast_grep_core::Pattern; use ast_grep_core::tree_sitter::LanguageExt; @@ -22,17 +24,21 @@ pub struct EnvFlagsCheck; impl Check for EnvFlagsCheck { fn id(&self) -> &str { - "p6-env-flags" + "p1-env-flags-source" } fn group(&self) -> CheckGroup { - CheckGroup::P6 + CheckGroup::P1 } fn layer(&self) -> CheckLayer { CheckLayer::Source } + fn covers(&self) -> &'static [&'static str] { + &["p1-must-env-var"] + } + fn applicable(&self, project: &Project) -> bool { project.language == Some(Language::Rust) } diff --git a/src/checks/source/rust/error_types.rs b/src/checks/source/rust/error_types.rs index 8221f0f..8d45ae9 100644 --- a/src/checks/source/rust/error_types.rs +++ b/src/checks/source/rust/error_types.rs @@ -29,6 +29,10 @@ impl Check for ErrorTypesCheck { CheckLayer::Source } + fn covers(&self) -> &'static [&'static str] { + &["p4-should-structured-enum"] + } + fn applicable(&self, project: &Project) -> bool { project.language == Some(Language::Rust) } diff --git a/src/checks/source/rust/exit_codes.rs b/src/checks/source/rust/exit_codes.rs index abf405c..62d5f9c 100644 --- a/src/checks/source/rust/exit_codes.rs +++ b/src/checks/source/rust/exit_codes.rs @@ -30,6 +30,10 @@ impl Check for ExitCodesCheck { CheckLayer::Source } + fn covers(&self) -> &'static [&'static str] { + &["p4-must-exit-code-mapping"] + } + fn applicable(&self, project: &Project) -> bool { project.language == Some(Language::Rust) } diff --git a/src/checks/source/rust/global_flags.rs b/src/checks/source/rust/global_flags.rs index d6f103f..fb0ce49 100644 --- a/src/checks/source/rust/global_flags.rs +++ b/src/checks/source/rust/global_flags.rs @@ -36,6 +36,10 @@ impl Check for GlobalFlagsCheck { CheckLayer::Source } + fn covers(&self) -> &'static [&'static str] { + &["p6-must-global-flags"] + } + fn applicable(&self, project: &Project) -> bool { project.language == Some(Language::Rust) } diff --git a/src/checks/source/rust/headless_auth.rs b/src/checks/source/rust/headless_auth.rs index 4a50d18..f489e70 100644 --- a/src/checks/source/rust/headless_auth.rs +++ b/src/checks/source/rust/headless_auth.rs @@ -42,6 +42,10 @@ impl Check for HeadlessAuthCheck { CheckLayer::Source } + fn covers(&self) -> &'static [&'static str] { + &["p1-must-no-browser"] + } + fn applicable(&self, project: &Project) -> bool { project.language == Some(Language::Rust) } diff --git a/src/checks/source/rust/no_color.rs b/src/checks/source/rust/no_color.rs index 55f3b73..2cef2d6 100644 --- a/src/checks/source/rust/no_color.rs +++ b/src/checks/source/rust/no_color.rs @@ -32,6 +32,10 @@ impl Check for NoColorSourceCheck { CheckLayer::Source } + fn covers(&self) -> &'static [&'static str] { + &["p6-must-no-color"] + } + fn applicable(&self, project: &Project) -> bool { project.language == Some(Language::Rust) } diff --git a/src/checks/source/rust/no_pager.rs b/src/checks/source/rust/no_pager.rs index a020c23..a965184 100644 --- a/src/checks/source/rust/no_pager.rs +++ b/src/checks/source/rust/no_pager.rs @@ -31,6 +31,10 @@ impl Check for NoPagerCheck { CheckLayer::Source } + fn covers(&self) -> &'static [&'static str] { + &["p6-must-no-pager"] + } + fn applicable(&self, project: &Project) -> bool { project.language == Some(Language::Rust) } diff --git a/src/checks/source/rust/output_clamping.rs b/src/checks/source/rust/output_clamping.rs index 0d7a85d..b9ef7d7 100644 --- a/src/checks/source/rust/output_clamping.rs +++ b/src/checks/source/rust/output_clamping.rs @@ -40,6 +40,10 @@ impl Check for OutputClampingCheck { CheckLayer::Source } + fn covers(&self) -> &'static [&'static str] { + &["p7-must-list-clamping"] + } + fn applicable(&self, project: &Project) -> bool { project.language == Some(Language::Rust) } diff --git a/src/checks/source/rust/output_module.rs b/src/checks/source/rust/output_module.rs index 2dba223..c3b231c 100644 --- a/src/checks/source/rust/output_module.rs +++ b/src/checks/source/rust/output_module.rs @@ -31,6 +31,10 @@ impl Check for OutputModuleCheck { CheckLayer::Source } + fn covers(&self) -> &'static [&'static str] { + &["p2-must-stdout-stderr-split"] + } + fn applicable(&self, project: &Project) -> bool { project.language == Some(Language::Rust) } diff --git a/src/checks/source/rust/structured_output.rs b/src/checks/source/rust/structured_output.rs index ffc38db..396aea9 100644 --- a/src/checks/source/rust/structured_output.rs +++ b/src/checks/source/rust/structured_output.rs @@ -27,6 +27,10 @@ impl Check for StructuredOutputCheck { CheckLayer::Source } + fn covers(&self) -> &'static [&'static str] { + &["p2-must-output-flag"] + } + fn applicable(&self, project: &Project) -> bool { project.language == Some(Language::Rust) } diff --git a/src/checks/source/rust/timeout_flag.rs b/src/checks/source/rust/timeout_flag.rs index dea6285..4e02849 100644 --- a/src/checks/source/rust/timeout_flag.rs +++ b/src/checks/source/rust/timeout_flag.rs @@ -30,6 +30,10 @@ impl Check for TimeoutFlagCheck { CheckLayer::Source } + fn covers(&self) -> &'static [&'static str] { + &["p6-must-timeout-network"] + } + fn applicable(&self, project: &Project) -> bool { project.language == Some(Language::Rust) } diff --git a/src/checks/source/rust/try_parse.rs b/src/checks/source/rust/try_parse.rs index 55cc3e6..5e2af44 100644 --- a/src/checks/source/rust/try_parse.rs +++ b/src/checks/source/rust/try_parse.rs @@ -29,6 +29,10 @@ impl Check for TryParseCheck { CheckLayer::Source } + fn covers(&self) -> &'static [&'static str] { + &["p4-must-try-parse"] + } + fn applicable(&self, project: &Project) -> bool { project.language == Some(Language::Rust) } diff --git a/src/checks/source/rust/tty_detection.rs b/src/checks/source/rust/tty_detection.rs index 64895d6..98a4639 100644 --- a/src/checks/source/rust/tty_detection.rs +++ b/src/checks/source/rust/tty_detection.rs @@ -1,7 +1,11 @@ -//! Check: Detect TTY/terminal detection for color output. +//! Check: Detect TTY/terminal detection in source. //! -//! Principle: P6 (Composable Structure) — CLIs that emit color/ANSI codes -//! should detect whether stdout is a terminal to avoid corrupting piped output. +//! Principle: P1 (Non-Interactive by Default) SHOULD — "Auto-detect +//! non-interactive context via TTY detection and suppress prompts when +//! stderr is not a terminal, even without an explicit `--no-interactive` +//! flag." The same `IsTerminal` machinery also satisfies P6's color +//! suppression MUST, but semantically this check verifies the P1 SHOULD +//! (renamed from `p6-tty-detection` in v0.1.1). //! //! This is a conditional check: //! Trigger: the source uses color/ANSI/style libraries @@ -34,17 +38,21 @@ pub struct TtyDetectionCheck; impl Check for TtyDetectionCheck { fn id(&self) -> &str { - "p6-tty-detection" + "p1-tty-detection-source" } fn group(&self) -> CheckGroup { - CheckGroup::P6 + CheckGroup::P1 } fn layer(&self) -> CheckLayer { CheckLayer::Source } + fn covers(&self) -> &'static [&'static str] { + &["p1-should-tty-detection"] + } + fn applicable(&self, project: &Project) -> bool { project.language == Some(Language::Rust) } diff --git a/src/cli.rs b/src/cli.rs index 0758f0a..a831987 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -64,6 +64,33 @@ pub enum Commands { /// Shell to generate for shell: Shell, }, + /// Generate build artifacts (coverage matrix, etc.) + Generate { + #[command(subcommand)] + artifact: GenerateKind, + }, +} + +#[derive(Subcommand)] +pub enum GenerateKind { + /// Render the spec coverage matrix (registry → checks → artifact). + CoverageMatrix { + /// Path for the Markdown artifact. Defaults to `docs/coverage-matrix.md`. + #[arg(long, value_name = "PATH", default_value = "docs/coverage-matrix.md")] + out: std::path::PathBuf, + + /// Path for the JSON artifact. Defaults to `coverage/matrix.json`. + #[arg( + long = "json-out", + value_name = "PATH", + default_value = "coverage/matrix.json" + )] + json_out: std::path::PathBuf, + + /// Exit non-zero when committed artifacts differ from generated output. CI drift guard. + #[arg(long)] + check: bool, + }, } #[derive(Clone, ValueEnum)] diff --git a/src/main.rs b/src/main.rs index 8c5eb1f..bf02323 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,6 +3,7 @@ mod check; mod checks; mod cli; mod error; +mod principles; mod project; mod runner; mod scorecard; @@ -17,8 +18,9 @@ use check::Check; use checks::behavioral::all_behavioral_checks; use checks::project::all_project_checks; use checks::source::all_source_checks; -use cli::{Cli, Commands, OutputFormat}; +use cli::{Cli, Commands, GenerateKind, OutputFormat}; use error::AppError; +use principles::matrix; use project::Project; use scorecard::{exit_code, format_json, format_text}; use types::{CheckGroup, CheckResult, CheckStatus}; @@ -73,6 +75,9 @@ fn run() -> Result { generate(shell, &mut cmd, "anc", &mut std::io::stdout()); return Ok(0); } + Some(Commands::Generate { artifact }) => { + return run_generate(artifact); + } None => { let mut cmd = ::command(); eprintln!("{}", cmd.render_help()); @@ -143,10 +148,11 @@ fn run() -> Result { results.retain(|r| matches_principle(&r.group, p)); } - // Format output + // Format output. `format_json` needs the check catalog so it can map + // result IDs back to the requirements each check covers. let output_str = match output { OutputFormat::Text => format_text(&results, quiet), - OutputFormat::Json => format_json(&results), + OutputFormat::Json => format_json(&results, &all_checks), }; print!("{output_str}"); @@ -185,6 +191,99 @@ fn resolve_command_on_path(name: &str) -> Result { Ok(std::path::PathBuf::from(first)) } +fn run_generate(artifact: GenerateKind) -> Result { + match artifact { + GenerateKind::CoverageMatrix { + out, + json_out, + check, + } => { + let catalog = checks::all_checks_catalog(); + + // Dangling `covers()` references are a registry bug — surface + // them before writing artifacts so CI catches the regression + // at `generate --check` time too. + let dangling = matrix::dangling_cover_ids(&catalog); + if !dangling.is_empty() { + for (check_id, req_id) in &dangling { + eprintln!("error: check `{check_id}` covers unknown requirement `{req_id}`"); + } + return Err(AppError::ProjectDetection(anyhow::anyhow!( + "registry drift: {} dangling requirement reference(s)", + dangling.len() + ))); + } + + let m = matrix::build(&catalog); + let rendered_md = matrix::render_markdown(&m); + let rendered_json = matrix::render_json(&m); + + if check { + // Drift mode: compare generated output to committed artifacts. + // Fail with actionable evidence so CI points the operator at + // `anc generate coverage-matrix` as the fix. + let existing_md = std::fs::read_to_string(&out).unwrap_or_default(); + let existing_json = std::fs::read_to_string(&json_out).unwrap_or_default(); + let md_matches = normalize_trailing_newline(&existing_md) + == normalize_trailing_newline(&rendered_md); + let json_matches = normalize_trailing_newline(&existing_json) + == normalize_trailing_newline(&rendered_json); + if !md_matches { + eprintln!( + "error: {} is out of date — run `anc generate coverage-matrix`", + out.display() + ); + } + if !json_matches { + eprintln!( + "error: {} is out of date — run `anc generate coverage-matrix`", + json_out.display() + ); + } + return Ok(if md_matches && json_matches { 0 } else { 2 }); + } + + if let Some(parent) = out.parent() { + if !parent.as_os_str().is_empty() { + std::fs::create_dir_all(parent).map_err(|e| { + AppError::ProjectDetection(anyhow::anyhow!( + "creating parent dir for {}: {e}", + out.display() + )) + })?; + } + } + if let Some(parent) = json_out.parent() { + if !parent.as_os_str().is_empty() { + std::fs::create_dir_all(parent).map_err(|e| { + AppError::ProjectDetection(anyhow::anyhow!( + "creating parent dir for {}: {e}", + json_out.display() + )) + })?; + } + } + std::fs::write(&out, &rendered_md).map_err(|e| { + AppError::ProjectDetection(anyhow::anyhow!("writing {}: {e}", out.display())) + })?; + std::fs::write(&json_out, &rendered_json).map_err(|e| { + AppError::ProjectDetection(anyhow::anyhow!("writing {}: {e}", json_out.display())) + })?; + eprintln!( + "wrote {} ({} rows) and {}", + out.display(), + m.rows.len(), + json_out.display() + ); + Ok(0) + } + } +} + +fn normalize_trailing_newline(s: &str) -> &str { + s.trim_end_matches('\n') +} + fn matches_principle(group: &CheckGroup, principle: u8) -> bool { // CodeQuality and ProjectStructure checks are cross-cutting — always include them. matches!( diff --git a/src/principles/matrix.rs b/src/principles/matrix.rs new file mode 100644 index 0000000..2924404 --- /dev/null +++ b/src/principles/matrix.rs @@ -0,0 +1,387 @@ +//! Coverage matrix generator. Cross-references the requirement registry +//! against the checks discovered at runtime (behavioral + source + project). +//! +//! Output artifacts: +//! - `docs/coverage-matrix.md` — human-readable table grouped by principle. +//! - `coverage/matrix.json` — machine-readable, consumed by the site's +//! `/coverage` page. +//! +//! The CLI surfaces this as `anc generate coverage-matrix` with `--check` +//! to fail CI when committed artifacts drift from the registry + checks. + +use std::collections::BTreeMap; +use std::fmt::Write as _; + +use serde::Serialize; + +use crate::check::Check; +use crate::principles::registry::{Applicability, Level, REQUIREMENTS}; +use crate::types::CheckLayer; + +/// A check that covers a given requirement. +#[derive(Debug, Clone, Serialize)] +pub struct Verifier { + pub check_id: String, + pub layer: CheckLayer, +} + +/// One row of the coverage matrix. +#[derive(Debug, Serialize)] +pub struct MatrixRow { + pub id: &'static str, + pub principle: u8, + pub level: Level, + pub summary: &'static str, + pub applicability: Applicability, + pub verifiers: Vec, +} + +/// The rendered matrix, suitable for JSON serialization. +#[derive(Debug, Serialize)] +pub struct Matrix { + pub schema_version: &'static str, + pub generated_by: &'static str, + pub rows: Vec, + pub summary: MatrixSummary, +} + +#[derive(Debug, Serialize)] +pub struct MatrixSummary { + pub total: usize, + pub covered: usize, + pub uncovered: usize, + pub must: LevelSummary, + pub should: LevelSummary, + pub may: LevelSummary, +} + +#[derive(Debug, Serialize)] +pub struct LevelSummary { + pub total: usize, + pub covered: usize, +} + +const SCHEMA_VERSION: &str = "1.0"; +const GENERATED_BY: &str = "anc generate coverage-matrix"; + +/// Build the matrix from the requirement registry + a slice of checks. +/// Ownership stays with the caller; this reads `check.covers()` references. +pub fn build(checks: &[Box]) -> Matrix { + // Inverse map: requirement ID -> Vec. + let mut coverage: BTreeMap<&'static str, Vec> = BTreeMap::new(); + for check in checks { + for req_id in check.covers() { + coverage.entry(req_id).or_default().push(Verifier { + check_id: check.id().to_string(), + layer: check.layer(), + }); + } + } + + let rows: Vec = REQUIREMENTS + .iter() + .map(|r| MatrixRow { + id: r.id, + principle: r.principle, + level: r.level, + summary: r.summary, + applicability: r.applicability, + verifiers: coverage.get(r.id).cloned().unwrap_or_default(), + }) + .collect(); + + let summary = summarize(&rows); + + Matrix { + schema_version: SCHEMA_VERSION, + generated_by: GENERATED_BY, + rows, + summary, + } +} + +fn summarize(rows: &[MatrixRow]) -> MatrixSummary { + let mut must = LevelSummary { + total: 0, + covered: 0, + }; + let mut should = LevelSummary { + total: 0, + covered: 0, + }; + let mut may = LevelSummary { + total: 0, + covered: 0, + }; + let mut covered = 0; + + for row in rows { + let bucket = match row.level { + Level::Must => &mut must, + Level::Should => &mut should, + Level::May => &mut may, + }; + bucket.total += 1; + if !row.verifiers.is_empty() { + bucket.covered += 1; + covered += 1; + } + } + + MatrixSummary { + total: rows.len(), + covered, + uncovered: rows.len() - covered, + must, + should, + may, + } +} + +/// Render the matrix as Markdown. Stable format — a small change in +/// structure will break golden-file tests on purpose. +pub fn render_markdown(matrix: &Matrix) -> String { + let mut out = String::new(); + let _ = writeln!(out, "# Coverage Matrix"); + let _ = writeln!(out); + let _ = writeln!( + out, + "", + GENERATED_BY + ); + let _ = writeln!(out); + let _ = writeln!( + out, + "This table maps every MUST, SHOULD, and MAY in the agent-native CLI spec to the `anc` checks that verify it." + ); + let _ = writeln!( + out, + "When a requirement has no verifier, the cell reads **UNCOVERED** and the reader knows the scorecard cannot speak to it." + ); + let _ = writeln!(out); + + let s = &matrix.summary; + let _ = writeln!(out, "## Summary"); + let _ = writeln!(out); + let _ = writeln!( + out, + "- **Total**: {} requirements ({} covered / {} uncovered)", + s.total, s.covered, s.uncovered + ); + let _ = writeln!( + out, + "- **MUST**: {} of {} covered", + s.must.covered, s.must.total + ); + let _ = writeln!( + out, + "- **SHOULD**: {} of {} covered", + s.should.covered, s.should.total + ); + let _ = writeln!( + out, + "- **MAY**: {} of {} covered", + s.may.covered, s.may.total + ); + let _ = writeln!(out); + + // Group rows by principle for readability. + let mut by_principle: BTreeMap> = BTreeMap::new(); + for row in &matrix.rows { + by_principle.entry(row.principle).or_default().push(row); + } + + for (principle, rows) in &by_principle { + let _ = writeln!(out, "## P{}: {}", principle, principle_name(*principle)); + let _ = writeln!(out); + let _ = writeln!( + out, + "| ID | Level | Applicability | Verifier(s) | Summary |" + ); + let _ = writeln!(out, "| --- | --- | --- | --- | --- |"); + for row in rows { + let level = match row.level { + Level::Must => "MUST", + Level::Should => "SHOULD", + Level::May => "MAY", + }; + let applicability = match row.applicability { + Applicability::Universal => "Universal".to_string(), + Applicability::Conditional(cond) => format!("If: {cond}"), + }; + let verifiers = if row.verifiers.is_empty() { + "**UNCOVERED**".to_string() + } else { + row.verifiers + .iter() + .map(|v| format!("`{}` ({})", v.check_id, layer_label(v.layer))) + .collect::>() + .join("
") + }; + let _ = writeln!( + out, + "| `{}` | {} | {} | {} | {} |", + row.id, + level, + applicability, + verifiers, + escape_pipes(row.summary) + ); + } + let _ = writeln!(out); + } + + out +} + +fn layer_label(layer: CheckLayer) -> &'static str { + match layer { + CheckLayer::Behavioral => "behavioral", + CheckLayer::Source => "source", + CheckLayer::Project => "project", + } +} + +fn principle_name(principle: u8) -> &'static str { + match principle { + 1 => "Non-Interactive by Default", + 2 => "Structured, Parseable Output", + 3 => "Progressive Help Discovery", + 4 => "Fail Fast, Actionable Errors", + 5 => "Safe Retries, Mutation Boundaries", + 6 => "Composable, Predictable Command Structure", + 7 => "Bounded, High-Signal Responses", + _ => "Unknown", + } +} + +/// Replace pipe characters so markdown table rows stay well-formed. +fn escape_pipes(s: &str) -> String { + s.replace('|', "\\|") +} + +/// Render the matrix as pretty JSON. +pub fn render_json(matrix: &Matrix) -> String { + serde_json::to_string_pretty(matrix).unwrap_or_else(|e| format!("{{\"error\":\"{e}\"}}")) +} + +/// Unreferenced requirement IDs discovered in `Check::covers()`. Used by +/// the registry validator to catch dangling references at test time. +pub fn dangling_cover_ids(checks: &[Box]) -> Vec<(String, String)> { + let mut dangling = Vec::new(); + for check in checks { + for req_id in check.covers() { + if crate::principles::registry::find(req_id).is_none() { + dangling.push((check.id().to_string(), (*req_id).to_string())); + } + } + } + dangling +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::check::Check; + use crate::project::Project; + use crate::types::{CheckGroup, CheckLayer, CheckResult, CheckStatus}; + + struct FakeCheck { + id: &'static str, + covers: &'static [&'static str], + } + + impl Check for FakeCheck { + fn id(&self) -> &str { + self.id + } + fn group(&self) -> CheckGroup { + CheckGroup::P1 + } + fn layer(&self) -> CheckLayer { + CheckLayer::Behavioral + } + fn applicable(&self, _project: &Project) -> bool { + true + } + fn run(&self, _project: &Project) -> anyhow::Result { + Ok(CheckResult { + id: self.id.to_string(), + label: self.id.to_string(), + group: CheckGroup::P1, + layer: CheckLayer::Behavioral, + status: CheckStatus::Pass, + }) + } + fn covers(&self) -> &'static [&'static str] { + self.covers + } + } + + #[test] + fn build_marks_uncovered_rows_when_no_checks() { + let checks: Vec> = vec![]; + let matrix = build(&checks); + assert_eq!(matrix.rows.len(), REQUIREMENTS.len()); + assert!(matrix.rows.iter().all(|r| r.verifiers.is_empty())); + assert_eq!(matrix.summary.covered, 0); + assert_eq!(matrix.summary.uncovered, REQUIREMENTS.len()); + } + + #[test] + fn build_links_check_to_requirement() { + let checks: Vec> = vec![Box::new(FakeCheck { + id: "fake-check", + covers: &["p1-must-no-interactive"], + })]; + let matrix = build(&checks); + let row = matrix + .rows + .iter() + .find(|r| r.id == "p1-must-no-interactive") + .expect("requirement row"); + assert_eq!(row.verifiers.len(), 1); + assert_eq!(row.verifiers[0].check_id, "fake-check"); + } + + #[test] + fn render_markdown_contains_summary_and_uncovered_marker() { + let checks: Vec> = vec![]; + let matrix = build(&checks); + let md = render_markdown(&matrix); + assert!(md.contains("# Coverage Matrix")); + assert!(md.contains("## Summary")); + assert!(md.contains("**UNCOVERED**")); + assert!(md.contains("P1: Non-Interactive by Default")); + } + + #[test] + fn render_json_is_valid_json() { + let checks: Vec> = vec![]; + let matrix = build(&checks); + let json = render_json(&matrix); + let parsed: serde_json::Value = serde_json::from_str(&json).expect("valid JSON"); + assert_eq!(parsed["schema_version"], SCHEMA_VERSION); + assert!(parsed["rows"].is_array()); + } + + #[test] + fn dangling_cover_ids_detects_typo() { + let checks: Vec> = vec![Box::new(FakeCheck { + id: "typo-check", + covers: &["p1-must-no-interactivx"], // typo on purpose + })]; + let dangling = dangling_cover_ids(&checks); + assert_eq!(dangling.len(), 1); + assert_eq!(dangling[0].0, "typo-check"); + } + + #[test] + fn dangling_cover_ids_empty_for_valid_refs() { + let checks: Vec> = vec![Box::new(FakeCheck { + id: "valid-check", + covers: &["p1-must-no-interactive", "p1-should-tty-detection"], + })]; + assert!(dangling_cover_ids(&checks).is_empty()); + } +} diff --git a/src/principles/mod.rs b/src/principles/mod.rs new file mode 100644 index 0000000..5a7819c --- /dev/null +++ b/src/principles/mod.rs @@ -0,0 +1,12 @@ +//! Principle registry + matrix generator. +//! +//! The registry is the single source of truth linking spec requirements +//! (MUSTs, SHOULDs, MAYs across P1–P7) to the checks that verify them. +//! `Check::covers()` declares which requirement IDs a check evidences; the +//! matrix generator inverts that mapping to produce coverage artifacts. + +pub mod matrix; +pub mod registry; + +#[allow(unused_imports)] // Re-exports used by downstream code + tests. +pub use registry::{Applicability, ExceptionCategory, Level, REQUIREMENTS, Requirement}; diff --git a/src/principles/registry.rs b/src/principles/registry.rs new file mode 100644 index 0000000..8ea15d7 --- /dev/null +++ b/src/principles/registry.rs @@ -0,0 +1,484 @@ +//! Flat `&'static [Requirement]` registry covering every MUST, SHOULD, and +//! MAY across P1–P7. The registry is the single source of truth linking +//! spec requirements to the checks that verify them via `Check::covers()`. +//! +//! IDs follow the pattern `p{N}-{level}-{key}`. They are stable and must +//! not change once published — scorecards and the coverage matrix pin +//! against them. + +use serde::Serialize; + +/// Severity level of a spec requirement. +#[derive(Debug, Clone, Copy, Serialize, PartialEq, Eq)] +#[serde(rename_all = "lowercase")] +pub enum Level { + Must, + Should, + May, +} + +/// Whether a requirement applies to every CLI or only when a condition holds. +#[derive(Debug, Clone, Copy, Serialize, PartialEq, Eq)] +#[serde(tag = "kind", content = "condition", rename_all = "lowercase")] +pub enum Applicability { + Universal, + Conditional(&'static str), +} + +/// Categories under which a tool may be exempt from specific requirements. +/// Referenced by scorecard `audit_profile`. +#[derive(Debug, Clone, Copy, Serialize, PartialEq, Eq)] +#[serde(rename_all = "kebab-case")] +#[allow(dead_code)] // Reserved for v0.1.3 audit_profile consumption. +pub enum ExceptionCategory { + /// TUI-by-design tools (lazygit, k9s, btop). Interactive-prompt MUSTs + /// suppressed; TTY-driving-agent access is out-of-scope for verification. + HumanTui, + /// File-traversal utilities (fd, find). Subcommand-structure SHOULDs + /// relaxed; these tools have no subcommands by design. + FileTraversal, + /// POSIX utilities (cat, sed, awk). Stdin-as-primary-input is their + /// contract; P1 interactive-prompt MUSTs satisfied vacuously. + PosixUtility, + /// Diagnostic tools (nvidia-smi, vmstat). No write operations, so P5 + /// MUSTs do not apply. + Diagnostic, +} + +/// A single spec requirement. The flat registry below is iterated by the +/// matrix generator and cross-referenced against `Check::covers()`. +#[derive(Debug, Clone, Serialize)] +pub struct Requirement { + pub id: &'static str, + pub principle: u8, + pub level: Level, + pub summary: &'static str, + pub applicability: Applicability, +} + +/// Every MUST/SHOULD/MAY in the spec. Order groups by principle, then level +/// (MUST → SHOULD → MAY) so readers can scan down a principle cleanly. +pub static REQUIREMENTS: &[Requirement] = &[ + // --- P1: Non-Interactive by Default --- + Requirement { + id: "p1-must-env-var", + principle: 1, + level: Level::Must, + summary: "Every flag settable via environment variable (falsey-value parser for booleans).", + applicability: Applicability::Universal, + }, + Requirement { + id: "p1-must-no-interactive", + principle: 1, + level: Level::Must, + summary: "`--no-interactive` flag gates every prompt library call; when set or stdin is not a TTY, use defaults/stdin or exit with an actionable error.", + applicability: Applicability::Universal, + }, + Requirement { + id: "p1-must-no-browser", + principle: 1, + level: Level::Must, + summary: "Headless authentication path (`--no-browser` / OAuth Device Authorization Grant).", + applicability: Applicability::Conditional("CLI authenticates against a remote service"), + }, + Requirement { + id: "p1-should-tty-detection", + principle: 1, + level: Level::Should, + summary: "Auto-detect non-interactive context via TTY detection; suppress prompts when stderr is not a terminal.", + applicability: Applicability::Universal, + }, + Requirement { + id: "p1-should-defaults-in-help", + principle: 1, + level: Level::Should, + summary: "Document default values for prompted inputs in `--help` output.", + applicability: Applicability::Universal, + }, + Requirement { + id: "p1-may-rich-tui", + principle: 1, + level: Level::May, + summary: "Rich interactive experiences (spinners, progress bars, menus) when TTY is detected and `--no-interactive` is not set.", + applicability: Applicability::Universal, + }, + // --- P2: Structured Output --- + Requirement { + id: "p2-must-output-flag", + principle: 2, + level: Level::Must, + summary: "`--output text|json|jsonl` flag selects output format; `OutputFormat` enum threaded through output paths.", + applicability: Applicability::Universal, + }, + Requirement { + id: "p2-must-stdout-stderr-split", + principle: 2, + level: Level::Must, + summary: "Data goes to stdout; diagnostics/progress/warnings go to stderr — never interleaved.", + applicability: Applicability::Universal, + }, + Requirement { + id: "p2-must-exit-codes", + principle: 2, + level: Level::Must, + summary: "Exit codes are structured and documented (0 success, 1 general, 2 usage, 77 auth, 78 config).", + applicability: Applicability::Universal, + }, + Requirement { + id: "p2-must-json-errors", + principle: 2, + level: Level::Must, + summary: "When `--output json` is active, errors are emitted as JSON (to stderr) with at least `error`, `kind`, and `message` fields.", + applicability: Applicability::Universal, + }, + Requirement { + id: "p2-should-consistent-envelope", + principle: 2, + level: Level::Should, + summary: "JSON output uses a consistent envelope — a top-level object with predictable keys — across every command.", + applicability: Applicability::Universal, + }, + Requirement { + id: "p2-may-more-formats", + principle: 2, + level: Level::May, + summary: "Additional output formats (CSV, TSV, YAML) beyond the core three.", + applicability: Applicability::Universal, + }, + Requirement { + id: "p2-may-raw-flag", + principle: 2, + level: Level::May, + summary: "`--raw` flag for unformatted output suitable for piping to other tools.", + applicability: Applicability::Universal, + }, + // --- P3: Progressive Help Discovery --- + Requirement { + id: "p3-must-subcommand-examples", + principle: 3, + level: Level::Must, + summary: "Every subcommand ships at least one concrete invocation example (`after_help` in clap).", + applicability: Applicability::Conditional("CLI uses subcommands"), + }, + Requirement { + id: "p3-must-top-level-examples", + principle: 3, + level: Level::Must, + summary: "The top-level command ships 2–3 examples covering the primary use cases.", + applicability: Applicability::Universal, + }, + Requirement { + id: "p3-should-paired-examples", + principle: 3, + level: Level::Should, + summary: "Examples show human and agent invocations side by side (text then `--output json` equivalent).", + applicability: Applicability::Universal, + }, + Requirement { + id: "p3-should-about-long-about", + principle: 3, + level: Level::Should, + summary: "Short `about` for command-list summaries; `long_about` reserved for detailed descriptions visible with `--help`.", + applicability: Applicability::Universal, + }, + Requirement { + id: "p3-may-examples-subcommand", + principle: 3, + level: Level::May, + summary: "Dedicated `examples` subcommand or `--examples` flag for curated usage patterns.", + applicability: Applicability::Universal, + }, + // --- P4: Fail Fast, Actionable Errors --- + Requirement { + id: "p4-must-try-parse", + principle: 4, + level: Level::Must, + summary: "Parse arguments with `try_parse()` instead of `parse()` so `--output json` can emit JSON parse errors.", + applicability: Applicability::Universal, + }, + Requirement { + id: "p4-must-exit-code-mapping", + principle: 4, + level: Level::Must, + summary: "Error types map to distinct exit codes (0, 1, 2, 77, 78).", + applicability: Applicability::Universal, + }, + Requirement { + id: "p4-must-actionable-errors", + principle: 4, + level: Level::Must, + summary: "Every error message contains what failed, why, and what to do next.", + applicability: Applicability::Universal, + }, + Requirement { + id: "p4-should-structured-enum", + principle: 4, + level: Level::Should, + summary: "Error types use a structured enum (via `thiserror` in Rust) with variant-to-kind mapping for JSON serialization.", + applicability: Applicability::Universal, + }, + Requirement { + id: "p4-should-gating-before-network", + principle: 4, + level: Level::Should, + summary: "Config and auth validation happen before any network call (three-tier dependency gating).", + applicability: Applicability::Conditional("CLI makes network calls"), + }, + Requirement { + id: "p4-should-json-error-output", + principle: 4, + level: Level::Should, + summary: "Error output respects `--output json`: JSON-formatted errors go to stderr when JSON output is selected.", + applicability: Applicability::Universal, + }, + // --- P5: Safe Retries, Mutation Boundaries --- + Requirement { + id: "p5-must-force-yes", + principle: 5, + level: Level::Must, + summary: "Destructive operations (delete, overwrite, bulk modify) require an explicit `--force` or `--yes` flag.", + applicability: Applicability::Conditional("CLI has destructive operations"), + }, + Requirement { + id: "p5-must-read-write-distinction", + principle: 5, + level: Level::Must, + summary: "The distinction between read and write commands is clear from the command name and help text alone.", + applicability: Applicability::Conditional("CLI has both read and write operations"), + }, + Requirement { + id: "p5-must-dry-run", + principle: 5, + level: Level::Must, + summary: "A `--dry-run` flag is present on every write command; dry-run output respects `--output json`.", + applicability: Applicability::Conditional("CLI has write operations"), + }, + Requirement { + id: "p5-should-idempotency", + principle: 5, + level: Level::Should, + summary: "Write operations are idempotent where the domain allows it — running the same command twice produces the same result.", + applicability: Applicability::Conditional("CLI has write operations"), + }, + // --- P6: Composable, Predictable Command Structure --- + Requirement { + id: "p6-must-sigpipe", + principle: 6, + level: Level::Must, + summary: "SIGPIPE fix is the first executable statement in `main()` — piping output to `head`/`tail` must not panic.", + applicability: Applicability::Universal, + }, + Requirement { + id: "p6-must-no-color", + principle: 6, + level: Level::Must, + summary: "TTY detection plus support for `NO_COLOR` and `TERM=dumb` — color codes suppressed when stdout/stderr is not a terminal.", + applicability: Applicability::Universal, + }, + Requirement { + id: "p6-must-completions", + principle: 6, + level: Level::Must, + summary: "Shell completions available via a `completions` subcommand (Tier 1 meta-command — needs no config/auth/network).", + applicability: Applicability::Universal, + }, + Requirement { + id: "p6-must-timeout-network", + principle: 6, + level: Level::Must, + summary: "Network CLIs ship a `--timeout` flag with a sensible default (e.g., 30 seconds).", + applicability: Applicability::Conditional("CLI makes network calls"), + }, + Requirement { + id: "p6-must-no-pager", + principle: 6, + level: Level::Must, + summary: "If the CLI uses a pager (`less`, `more`, `$PAGER`), it supports `--no-pager` or respects `PAGER=\"\"`.", + applicability: Applicability::Conditional("CLI invokes a pager for output"), + }, + Requirement { + id: "p6-must-global-flags", + principle: 6, + level: Level::Must, + summary: "Agentic flags (`--output`, `--quiet`, `--no-interactive`, `--timeout`) are `global = true` so they propagate to every subcommand.", + applicability: Applicability::Conditional("CLI uses subcommands"), + }, + Requirement { + id: "p6-should-stdin-input", + principle: 6, + level: Level::Should, + summary: "Commands that accept input read from stdin when no file argument is provided.", + applicability: Applicability::Conditional("CLI has commands that accept input data"), + }, + Requirement { + id: "p6-should-consistent-naming", + principle: 6, + level: Level::Should, + summary: "Subcommand naming follows a consistent `noun verb` or `verb noun` convention throughout the tool.", + applicability: Applicability::Conditional("CLI uses subcommands"), + }, + Requirement { + id: "p6-should-tier-gating", + principle: 6, + level: Level::Should, + summary: "Three-tier dependency gating: Tier 1 (meta) needs nothing, Tier 2 (local) needs config, Tier 3 (network) needs config + auth.", + applicability: Applicability::Universal, + }, + Requirement { + id: "p6-should-subcommand-operations", + principle: 6, + level: Level::Should, + summary: "Operations are modeled as subcommands, not flags (`tool search \"q\"`, not `tool --search \"q\"`).", + applicability: Applicability::Conditional("CLI performs multiple distinct operations"), + }, + Requirement { + id: "p6-may-color-flag", + principle: 6, + level: Level::May, + summary: "`--color auto|always|never` flag for explicit color control beyond TTY auto-detection.", + applicability: Applicability::Universal, + }, + // --- P7: Bounded, High-Signal Responses --- + Requirement { + id: "p7-must-quiet", + principle: 7, + level: Level::Must, + summary: "A `--quiet` flag suppresses non-essential output; only requested data and errors appear.", + applicability: Applicability::Universal, + }, + Requirement { + id: "p7-must-list-clamping", + principle: 7, + level: Level::Must, + summary: "List operations clamp to a sensible default maximum; when truncated, indicate it (`\"truncated\": true` in JSON, stderr note in text).", + applicability: Applicability::Conditional("CLI has list-style commands"), + }, + Requirement { + id: "p7-should-verbose", + principle: 7, + level: Level::Should, + summary: "A `--verbose` flag (or `-v` / `-vv`) escalates diagnostic detail when agents need to debug failures.", + applicability: Applicability::Universal, + }, + Requirement { + id: "p7-should-limit", + principle: 7, + level: Level::Should, + summary: "A `--limit` or `--max-results` flag lets callers request exactly the number of items they want.", + applicability: Applicability::Conditional("CLI has list-style commands"), + }, + Requirement { + id: "p7-should-timeout", + principle: 7, + level: Level::Should, + summary: "A `--timeout` flag bounds execution time so agents are not blocked indefinitely.", + applicability: Applicability::Universal, + }, + Requirement { + id: "p7-may-cursor-pagination", + principle: 7, + level: Level::May, + summary: "Cursor-based pagination flags (`--after`, `--before`) for efficient traversal of large result sets.", + applicability: Applicability::Conditional("CLI returns paginated results"), + }, + Requirement { + id: "p7-may-auto-verbosity", + principle: 7, + level: Level::May, + summary: "Automatic verbosity reduction in non-TTY contexts (same behavior `--quiet` explicitly requests).", + applicability: Applicability::Universal, + }, +]; + +/// Look up a requirement by ID. Returns `None` if the ID is not registered. +pub fn find(id: &str) -> Option<&'static Requirement> { + REQUIREMENTS.iter().find(|r| r.id == id) +} + +/// Count requirements at a given level. Test helper + doc convenience. +#[allow(dead_code)] +pub fn count_at_level(level: Level) -> usize { + REQUIREMENTS.iter().filter(|r| r.level == level).count() +} + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashSet; + + #[test] + fn ids_are_unique() { + let mut seen = HashSet::new(); + for r in REQUIREMENTS { + assert!(seen.insert(r.id), "duplicate requirement ID: {}", r.id); + } + } + + #[test] + fn ids_follow_naming_convention() { + for r in REQUIREMENTS { + let prefix = format!("p{}-", r.principle); + assert!( + r.id.starts_with(&prefix), + "requirement {} does not start with {}", + r.id, + prefix + ); + let level_token = match r.level { + Level::Must => "-must-", + Level::Should => "-should-", + Level::May => "-may-", + }; + assert!( + r.id.contains(level_token), + "requirement {} level token {} missing", + r.id, + level_token + ); + } + } + + #[test] + fn principle_range_is_valid() { + for r in REQUIREMENTS { + assert!( + (1..=7).contains(&r.principle), + "requirement {} has invalid principle {}", + r.id, + r.principle + ); + } + } + + #[test] + fn summary_is_non_empty() { + for r in REQUIREMENTS { + assert!( + !r.summary.trim().is_empty(), + "requirement {} has empty summary", + r.id + ); + } + } + + #[test] + fn find_returns_registered_ids() { + assert!(find("p1-must-no-interactive").is_some()); + assert!(find("p6-must-sigpipe").is_some()); + assert!(find("nonexistent-id").is_none()); + } + + #[test] + fn registry_size_matches_spec() { + // Spec snapshot 2026-04-20: 46 requirements across P1-P7. + // Bumping this counter is a deliberate act; it means the spec grew. + assert_eq!(REQUIREMENTS.len(), 46); + } + + #[test] + fn level_counts_match_spec() { + assert_eq!(count_at_level(Level::Must), 23); + assert_eq!(count_at_level(Level::Should), 16); + assert_eq!(count_at_level(Level::May), 7); + } +} diff --git a/src/scorecard.rs b/src/scorecard.rs index 919e1fc..e3e02c5 100644 --- a/src/scorecard.rs +++ b/src/scorecard.rs @@ -1,14 +1,46 @@ -use std::collections::BTreeMap; +use std::collections::{BTreeMap, HashMap, HashSet}; use std::fmt::Write as _; use serde::Serialize; +use crate::check::Check; +use crate::principles::registry::{Level, REQUIREMENTS}; use crate::types::{CheckGroup, CheckResult, CheckStatus}; +/// Current scorecard JSON schema version. Consumers (site rendering, +/// leaderboard pipeline) pin against this to detect shape changes. +pub const SCHEMA_VERSION: &str = "1.1"; + #[derive(Serialize)] pub struct Scorecard { + pub schema_version: &'static str, pub results: Vec, pub summary: Summary, + pub coverage_summary: CoverageSummary, + /// Derived audience classification (human-primary, agent-primary, mixed). + /// Reserved for v0.1.3; emitted as `null` in v0.1.1 / v0.1.2. + pub audience: Option, + /// Registry-sourced exemption category (human-tui, file-traversal, etc.). + /// Reserved for v0.1.3; emitted as `null` in v0.1.1 / v0.1.2. + pub audit_profile: Option, +} + +/// Per-level verification counts: how many requirements at this level had +/// at least one check in this run that declared `covers()` against them. +/// A requirement is "verified" regardless of pass/fail — the status tells +/// the consumer whether verification succeeded, this counter tells them +/// whether it was attempted at all. +#[derive(Serialize)] +pub struct LevelCounts { + pub total: usize, + pub verified: usize, +} + +#[derive(Serialize)] +pub struct CoverageSummary { + pub must: LevelCounts, + pub should: LevelCounts, + pub may: LevelCounts, } #[derive(Serialize)] @@ -177,15 +209,74 @@ pub fn format_text(results: &[CheckResult], quiet: bool) -> String { out } -pub fn format_json(results: &[CheckResult]) -> String { - let scorecard = Scorecard { +/// Build a v1.1 scorecard. The `ran_checks` slice is the catalog of checks +/// that produced `results` — needed to translate check IDs back to the +/// requirement IDs they cover for `coverage_summary`. +pub fn build_scorecard( + results: &[CheckResult], + ran_checks: &[Box], + audience: Option, + audit_profile: Option, +) -> Scorecard { + Scorecard { + schema_version: SCHEMA_VERSION, results: results.iter().map(CheckResultView::from_result).collect(), summary: build_summary(results), - }; - // serde_json::to_string_pretty should not fail on this struct + coverage_summary: build_coverage_summary(results, ran_checks), + audience, + audit_profile, + } +} + +pub fn format_json(results: &[CheckResult], ran_checks: &[Box]) -> String { + let scorecard = build_scorecard(results, ran_checks, None, None); serde_json::to_string_pretty(&scorecard).unwrap_or_else(|e| format!("{{\"error\": \"{e}\"}}")) } +fn build_coverage_summary( + results: &[CheckResult], + ran_checks: &[Box], +) -> CoverageSummary { + // Map each ran check to its covers() so we can turn the set of ran + // check IDs into a set of covered requirement IDs. + let covers_by_id: HashMap<&str, &'static [&'static str]> = + ran_checks.iter().map(|c| (c.id(), c.covers())).collect(); + + let mut verified: HashSet<&'static str> = HashSet::new(); + for r in results { + if let Some(ids) = covers_by_id.get(r.id.as_str()) { + verified.extend(ids.iter().copied()); + } + } + + let mut must = LevelCounts { + total: 0, + verified: 0, + }; + let mut should = LevelCounts { + total: 0, + verified: 0, + }; + let mut may = LevelCounts { + total: 0, + verified: 0, + }; + + for req in REQUIREMENTS { + let bucket = match req.level { + Level::Must => &mut must, + Level::Should => &mut should, + Level::May => &mut may, + }; + bucket.total += 1; + if verified.contains(req.id) { + bucket.verified += 1; + } + } + + CoverageSummary { must, should, may } +} + pub fn exit_code(results: &[CheckResult]) -> i32 { let has_fail_or_error = results .iter() @@ -224,8 +315,9 @@ mod tests { make_result("c1", CheckStatus::Pass, CheckGroup::P1), make_result("c2", CheckStatus::Fail("bad".into()), CheckGroup::P2), ]; - let json = format_json(&results); + let json = format_json(&results, &[]); let parsed: serde_json::Value = serde_json::from_str(&json).expect("valid JSON"); + assert_eq!(parsed["schema_version"], "1.1"); assert_eq!(parsed["summary"]["total"], 2); assert_eq!(parsed["summary"]["pass"], 1); assert_eq!(parsed["summary"]["fail"], 1); @@ -233,6 +325,58 @@ mod tests { assert!(parsed["results"][0]["evidence"].is_null()); assert_eq!(parsed["results"][1]["status"], "fail"); assert_eq!(parsed["results"][1]["evidence"], "bad"); + // v1.1 additions: coverage_summary present with three levels, audience + audit_profile null. + assert!(parsed["coverage_summary"]["must"]["total"].is_number()); + assert!(parsed["coverage_summary"]["should"]["total"].is_number()); + assert!(parsed["coverage_summary"]["may"]["total"].is_number()); + assert!(parsed["audience"].is_null()); + assert!(parsed["audit_profile"].is_null()); + } + + #[test] + fn coverage_summary_counts_verified_requirements() { + use crate::check::Check; + use crate::project::Project; + use crate::types::CheckLayer; + + struct FakeCheck { + id: &'static str, + covers: &'static [&'static str], + } + + impl Check for FakeCheck { + fn id(&self) -> &str { + self.id + } + fn group(&self) -> CheckGroup { + CheckGroup::P1 + } + fn layer(&self) -> CheckLayer { + CheckLayer::Behavioral + } + fn applicable(&self, _p: &Project) -> bool { + true + } + fn run(&self, _p: &Project) -> anyhow::Result { + unreachable!() + } + fn covers(&self) -> &'static [&'static str] { + self.covers + } + } + + let results = vec![make_result("verifier-a", CheckStatus::Pass, CheckGroup::P1)]; + let checks: Vec> = vec![Box::new(FakeCheck { + id: "verifier-a", + covers: &["p1-must-no-interactive"], + })]; + + let summary = build_coverage_summary(&results, &checks); + assert_eq!(summary.must.verified, 1); + assert_eq!(summary.should.verified, 0); + assert_eq!(summary.may.verified, 0); + // Totals match the registry snapshot baked into registry.rs tests. + assert!(summary.must.total >= 1); } #[test] diff --git a/tests/integration.rs b/tests/integration.rs index 77dfb7a..91802d3 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -32,6 +32,61 @@ fn test_help() { .stdout(predicate::str::contains("Usage")); } +// ── Generate subcommand ──────────────────────────────────────────── + +#[test] +fn test_generate_coverage_matrix_writes_artifacts() { + let dir = integration_tempdir(); + let md = dir.join("matrix.md"); + let json = dir.join("matrix.json"); + + cmd() + .args([ + "generate", + "coverage-matrix", + "--out", + md.to_str().expect("utf8 path"), + "--json-out", + json.to_str().expect("utf8 path"), + ]) + .assert() + .success(); + + let md_content = std::fs::read_to_string(&md).expect("matrix.md written"); + assert!(md_content.contains("# Coverage Matrix")); + assert!(md_content.contains("P1: Non-Interactive by Default")); + + let json_content = std::fs::read_to_string(&json).expect("matrix.json written"); + let parsed: serde_json::Value = serde_json::from_str(&json_content).expect("valid JSON"); + assert_eq!(parsed["schema_version"], "1.0"); + assert!(parsed["rows"].as_array().expect("rows array").len() >= 40); +} + +#[test] +fn test_generate_coverage_matrix_drift_check_passes_on_committed_artifacts() { + // Running --check against the committed docs/coverage-matrix.md + + // coverage/matrix.json must pass. If this fails, the registry or a + // check's covers() drifted without the artifacts being regenerated. + cmd() + .args(["generate", "coverage-matrix", "--check"]) + .current_dir(env!("CARGO_MANIFEST_DIR")) + .assert() + .success(); +} + +fn integration_tempdir() -> std::path::PathBuf { + let root = std::env::temp_dir().join(format!( + "anc-integration-{}-{}", + std::process::id(), + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("after epoch") + .as_nanos() + )); + std::fs::create_dir_all(&root).expect("create tempdir"); + root +} + // ── Check subcommand tests ───────────────────────────────────────── #[test]