diff --git a/arbiter_audit.jsonl b/arbiter_audit.jsonl new file mode 100644 index 0000000..bdf0f33 --- /dev/null +++ b/arbiter_audit.jsonl @@ -0,0 +1,3 @@ +{"timestamp": "2026-04-19T15:08:34.257267+00:00", "repo": "arbiter", "score": 89.0, "grade": "CERTIFIED", "findings": 51, "loc": 17379, "dimensions": {"code": 94.7, "governance": 80.5, "dependencies": 100.0, "vitality": 75.0}, "record_hash": "43bce85687ac64eb4c3ff9a4464327896c1770036528920fbb14bad965d382ce", "prev_hash": ""} +{"timestamp": "2026-04-19T15:33:53.324807+00:00", "repo": "agent-governance-demo", "score": 76.5, "grade": "PROVISIONAL", "findings": 5, "loc": 997, "dimensions": {"code": 91.6, "governance": 57.2, "dependencies": 100.0, "vitality": 40.0}, "record_hash": "f7ab4e514711127ed28530f36330d1dc8773414174254c990c1efdb18a238987", "prev_hash": "43bce85687ac64eb4c3ff9a4464327896c1770036528920fbb14bad965d382ce"} +{"timestamp": "2026-04-19T15:34:12.175720+00:00", "repo": "agent-governance-demo", "score": 86.8, "grade": "CERTIFIED", "findings": 1, "loc": 994, "dimensions": {"code": 98.6, "governance": 85.9, "dependencies": 100.0, "vitality": 40.0}, "record_hash": "30f6bf0187a0ebf32cf1f9fda6a1769cb3da7a254f83699902f5021754c22729", "prev_hash": "f7ab4e514711127ed28530f36330d1dc8773414174254c990c1efdb18a238987"} diff --git a/docs/GEMINI_HANDOFF.md b/docs/GEMINI_HANDOFF.md new file mode 100644 index 0000000..3ad57db --- /dev/null +++ b/docs/GEMINI_HANDOFF.md @@ -0,0 +1,220 @@ +--- +packet-version: 1.0 +from: claude-code +to: gemini +type: DISPATCH +task-id: governance-beyond-artifacts +priority: HIGH +execution-mode: side_effecting +authorized-by: human (Reuben, 2026-04-19) +--- + +## Context + +Arbiter is a deterministic code quality + governance scoring CLI at `/Users/others/PROJECTS/arbiter/`. +Install: `pip install -e ".[analyzers]"` from repo root. +Run tests: `PYTHONPATH=src python -m pytest tests/ -v` + +We ran Arbiter against 201 open-source repos and published the results. An ARCANA peer review +(7 analytical lenses) identified a structural weakness: the governance scorer measures artifact +*presence*, not governance *practice*. This is the Goodhart/Scott problem — file-presence checks +are trivially gameable and miss informal governance that actually works. + +This handoff authorizes Gemini to build two new scoring modules that move Arbiter toward +measuring practice, not just artifacts. + +## Finding + +**Current governance scorer** (`src/arbiter/governance_score.py`): 10 binary file-presence checks. +`(repo_path / "SECURITY.md").exists()` → 15 points. No content analysis. No history analysis. +Same score whether SECURITY.md says "email us" or describes a funded bug bounty with SLA. + +**Structural gap 1 — Content quality**: Files exist but quality is unmeasured. +**Structural gap 2 — Temporal/vitality**: Point-in-time snapshot; gameable by adding files today. + +**Existing foundation**: `src/arbiter/git_historian.py` already walks git log via subprocess +(stdlib only, no git library). `walk_commits()` returns `CommitInfo` with hash, author, timestamp, +files_changed, loc_added, loc_removed. Gemini builds ON this, not from scratch. + +## Recommended Action + +### Sprint 1: Governance Quality Scorer (file content analysis) + +**Create**: `src/arbiter/governance_quality.py` + +Score the *content* of governance files, not just their existence. Pure local filesystem reads, +stdlib only, no network. + +Scoring targets (all heuristic/regex, not NLP): + +**SECURITY.md quality** (0–15 pts): +- Has a contact method (email, URL, form) → +5 +- Mentions a response timeline ("within 48 hours", "5 business days") → +5 +- Has a disclosure process (public vs private, CVE process) → +5 + +**CONTRIBUTING.md quality** (0–15 pts): +- Describes how to run tests → +5 +- Describes PR/review process → +5 +- Has a code style or linting section → +5 + +**CI workflow quality** (0–15 pts) — parse `.github/workflows/*.yml`: +- Runs on PR (not just push to main) → +5 +- Has more than one job (test + lint, or matrix) → +5 +- References a coverage or test command → +5 + +**README quality** (0–10 pts): +- Length > 500 chars (already partial in governance_score.py) → base +- Has installation instructions (pip install, npm install, cargo add) → +5 +- Has usage example or code block → +5 + +**Output dataclass**: +```python +@dataclass +class GovernanceQualityReport: + security_score: float # 0-15 + contributing_score: float # 0-15 + ci_quality_score: float # 0-15 + readme_score: float # 0-10 + total: float # 0-55, normalized to 0-100 + details: list[str] # human-readable findings +``` + +**Integration point**: `governance_score.py` calls `score_governance_quality(repo_path)` and +blends it into the governance dimension. Suggested weighting within governance: +- Artifacts sub-score (current 10 checks): 50% +- Quality sub-score (new): 50% + +### Sprint 2: Git Vitality Scorer (history-based governance signals) + +**Create**: `src/arbiter/git_vitality.py` + +Use the existing `git_historian.walk_commits()` to extract governance-relevant signals from +commit history. Addresses the Goodhart vulnerability: a repo that added all governance files +last week scores differently from one that's had them for 3 years with active contributors. + +**Signals to compute**: + +**Bus factor** (0–25 pts): count unique committers in last 90 days +- 1 committer → 5 pts (high concentration risk) +- 2–3 committers → 15 pts +- 4+ committers → 25 pts + +**Commit recency** (0–25 pts): days since last commit +- 0–30 days → 25 pts +- 31–90 days → 15 pts +- 91–180 days → 8 pts +- 180+ days → 0 pts (effectively unmaintained) + +**Release cadence** (0–25 pts): call `git tag --sort=-creatordate` via subprocess +- Has ≥ 1 tag → 10 pts +- Has ≥ 3 tags → 20 pts +- Tags follow SemVer pattern → +5 pts + +**Signed commit ratio** (0–25 pts): percentage of commits with "Signed-off-by" in message +- >75% → 25 pts (DCO genuinely enforced) +- 25–75% → 15 pts +- <25% → 5 pts +- 0% → 0 pts (DCO artifact exists but nothing is actually signed) + +**Output dataclass**: +```python +@dataclass +class GitVitalityReport: + bus_factor: int # unique committers, 90 days + days_since_commit: int + release_count: int + signed_commit_ratio: float # 0.0–1.0 + score: float # 0–100 + details: list[str] +``` + +**Integration point**: Add `git_vitality` as an optional 4th scoring dimension in `scoring.py`. +Weight suggestion when vitality is available: Code (45%) + Governance (25%) + Deps (15%) + Vitality (15%). +When git history unavailable (shallow clone or no commits): fall back to existing 3-dimension weights. + +## File Map + +``` +src/arbiter/ + governance_score.py # MODIFY: call quality scorer, blend into governance dim + governance_quality.py # CREATE: Sprint 1 + git_vitality.py # CREATE: Sprint 2 + scoring.py # MODIFY: add vitality dimension (optional) + +tests/ + test_governance_quality.py # CREATE: Sprint 1 tests + test_git_vitality.py # CREATE: Sprint 2 tests +``` + +## Evidence + +- ARCANA review findings: Scott lens (metis erasure), Measurement lens (Goodhart HIGH), Foucault lens (artifact-vs-practice) +- `src/arbiter/governance_score.py` lines 62–227: all checks are `Path.exists()` booleans +- `src/arbiter/git_historian.py`: existing walk_commits() foundation for Sprint 2 +- `src/arbiter/dep_score.py`: reference pattern for dataclass + scoring function structure + +## Tests to Add + +Sprint 1 (governance_quality.py): +- `test_security_md_with_contact_scores_higher_than_empty` +- `test_contributing_md_with_test_instructions_gets_full_marks` +- `test_ci_workflow_pr_trigger_detected` +- `test_missing_files_score_zero_not_error` +- `test_quality_score_normalized_to_100` + +Sprint 2 (git_vitality.py): +- `test_single_committer_scores_low_bus_factor` +- `test_recent_commit_scores_max_recency` +- `test_semver_tags_detected` +- `test_signed_commit_ratio_computed` +- `test_shallow_clone_degrades_gracefully` (no git history → score=None, not crash) + +## Verification Criteria + +- All new tests pass: `PYTHONPATH=src python -m pytest tests/test_governance_quality.py tests/test_git_vitality.py -v` +- Full test suite green: `PYTHONPATH=src python -m pytest tests/ -v` +- Self-grade passes: `arbiter score . --fail-under 85` +- No new third-party imports (stdlib + existing arbiter deps only) +- Both new modules have module-level docstrings explaining what they measure vs. what they don't + +## Constraints + +- **Stdlib only** — no new third-party imports. Regex, pathlib, subprocess, dataclasses only. +- **Branch**: `feat/gemini/governance-beyond-artifacts` +- **Bus identity**: `gemini` (no variants, no parentheticals) +- **Commit format**: Conventional Commits (`feat:`, `test:`, `fix:`) +- **Soft limit**: 500 LOC / 10 files per PR +- **TDD**: write failing tests first, then implement +- **Closeout packet required** — final STATUS must include: artifact paths, test count delta, + self-grade score before/after, open questions deferred, caveats +- **No DESIGN.md unless explicitly requested** +- **No modifications to**: `.github/`, `.claude/`, `docs/blog/`, `docs/CERTIFICATION_REPORT.md` + +## Grading Criteria (Claude will audit this PR) + +Gemini will be graded on: +1. Correct file paths (all under `src/arbiter/` and `tests/`) +2. TDD discipline (tests written before implementation, or simultaneous) +3. Stdlib-only compliance (no new imports) +4. Graceful degradation (missing files, shallow clones → score=None, not crash) +5. Closeout packet completeness +6. Self-grade score maintained above 85 + +## Session Start Protocol (mandatory first 5 commands) + +```bash +# 1. Confirm correct repo +ls /Users/others/PROJECTS/arbiter/src/arbiter/ + +# 2. Confirm worktree state +git -C /Users/others/PROJECTS/arbiter status --short + +# 3. Create branch +git -C /Users/others/PROJECTS/arbiter checkout -b feat/gemini/governance-beyond-artifacts + +# 4. Confirm working directory +pwd + +# 5. Run existing tests to establish baseline +cd /Users/others/PROJECTS/arbiter && PYTHONPATH=src python -m pytest tests/ -q --tb=no +``` diff --git a/docs/GEMINI_RESUME_2026-04-19.md b/docs/GEMINI_RESUME_2026-04-19.md new file mode 100644 index 0000000..42b9496 --- /dev/null +++ b/docs/GEMINI_RESUME_2026-04-19.md @@ -0,0 +1,166 @@ +--- +packet-version: 1.0 +from: claude-code +to: gemini +type: FOLLOW-UP +task-id: governance-beyond-artifacts +priority: HIGH +execution-mode: side_effecting +authorized-by: human (Reuben, 2026-04-19) +--- + +## Context + +Sprint 1+2 files already exist on branch `feat/gemini/governance-beyond-artifacts` from a prior +partial session. You are resuming, not starting. All 686 tests pass. Nothing is committed yet. + +## Current State (verified 2026-04-19 ~16:00 ET) + +**Untracked (Sprint 1+2 deliverables — your work):** +``` +src/arbiter/governance_quality.py 183 lines +src/arbiter/git_vitality.py 144 lines +tests/test_governance_quality.py 76 lines +tests/test_git_vitality.py 73 lines +``` + +**Modified unstaged (integration work — your work):** +``` +src/arbiter/__main__.py +12 / -2 +src/arbiter/certify.py +23 / -3 +src/arbiter/governance_score.py +23 / -4 +tests/test_certify.py +8 +tests/test_governance_score.py +20 / -7 +``` + +**Test result**: 686 passed, 0 failed, 0 errors (56s). + +## Required Actions + +### Step 1 — Session start protocol (mandatory) + +```bash +# 1. Confirm branch +git -C /Users/others/PROJECTS/arbiter branch --show-current +# Expected: feat/gemini/governance-beyond-artifacts + +# 2. Confirm state +git -C /Users/others/PROJECTS/arbiter status --short + +# 3. Run baseline tests +cd /Users/others/PROJECTS/arbiter && PYTHONPATH=src python -m pytest tests/ -q --tb=no +# Expected: 686 passed + +# 4. Bus post +# gemini → all STATUS "Resuming governance-beyond-artifacts: 686 tests green, starting self-grade + PR" +``` + +### Step 2 — Self-grade + +```bash +cd /Users/others/PROJECTS/arbiter && arbiter score . 2>/dev/null || \ + PYTHONPATH=src python -m arbiter score /Users/others/PROJECTS/arbiter +``` + +Record score before and after your changes. Must be ≥ 85 to pass audit. + +### Step 3 — Review integration diffs + +Before committing, verify the 5 modified files are correct: + +```bash +git -C /Users/others/PROJECTS/arbiter diff src/arbiter/governance_score.py +git -C /Users/others/PROJECTS/arbiter diff src/arbiter/__main__.py +git -C /Users/others/PROJECTS/arbiter diff src/arbiter/certify.py +``` + +Verify: +- `governance_score.py` calls `score_governance_quality()` and blends result (50/50 artifacts/quality) +- `__main__.py` exposes the new dimensions in CLI output +- `certify.py` includes vitality dimension when git history is available +- No new third-party imports in any file (`import re`, `import pathlib`, `import subprocess`, `import dataclasses` are all fine) + +### Step 4 — Commit (two-commit strategy) + +```bash +cd /Users/others/PROJECTS/arbiter + +# Commit 1: Sprint 1 — governance quality scorer +git add src/arbiter/governance_quality.py tests/test_governance_quality.py \ + src/arbiter/governance_score.py tests/test_governance_score.py +git commit -m "feat(governance): add content-quality scoring to governance dimension + +Adds governance_quality.py that scores SECURITY.md, CONTRIBUTING.md, +CI workflows, and README content via regex (not just existence checks). +Addresses Goodhart/Scott finding from ARCANA peer review: file presence +was trivially gameable; content heuristics are not. + +Blends quality sub-score (50%) with artifact sub-score (50%) in governance_score.py." + +# Commit 2: Sprint 2 — git vitality scorer +git add src/arbiter/git_vitality.py tests/test_git_vitality.py \ + src/arbiter/certify.py src/arbiter/__main__.py tests/test_certify.py +git commit -m "feat(vitality): add git history vitality dimension to scoring + +Adds git_vitality.py that scores bus factor, commit recency, release +cadence, and signed-commit ratio from git log. Addresses temporal +Goodhart vulnerability: repos cannot game history by adding files today. + +Vitality is an optional 4th dimension (15% weight) when git history +is available. Degrades gracefully on shallow clones." +``` + +### Step 5 — Push and open PR + +```bash +git -C /Users/others/PROJECTS/arbiter push origin feat/gemini/governance-beyond-artifacts +gh pr create \ + --repo $(git -C /Users/others/PROJECTS/arbiter remote get-url origin | sed 's/.*github.com\///' | sed 's/\.git$//') \ + --title "feat(arbiter): governance quality + git vitality scoring (Sprint 1+2)" \ + --body "..." +``` + +PR body must include: +- What changed and why (ARCANA peer review finding) +- Test count before/after +- Self-grade before/after +- Closeout packet (see Step 6) + +### Step 6 — Closeout packet (mandatory, post to bus) + +Bus post format: +``` +gemini → all STATUS "governance-beyond-artifacts COMPLETE. +ARTIFACTS: governance_quality.py (183L), git_vitality.py (144L), 4 test files. +TESTS: 686 total (10 new), 0 failures. +SELF-GRADE: before=, after=. +PR: . +OPEN: . +CAVEATS: . +SOURCES: ARCANA peer review (Scott/Measurement/Foucault lenses), git_historian.py foundation." +``` + +## Verification Criteria (Claude audit checklist) + +- [ ] `PYTHONPATH=src python -m pytest tests/ -v` → all 686+ pass +- [ ] `git diff --name-only HEAD` shows only expected files +- [ ] `governance_quality.py` has no `import requests` / `import numpy` / any non-stdlib +- [ ] `git_vitality.py` uses only `subprocess`, `re`, `dataclasses`, `datetime` +- [ ] `governance_score.py` blends quality 50/50 (not replaces) +- [ ] Self-grade ≥ 85 +- [ ] Closeout packet posted to bus +- [ ] PR title follows Conventional Commits +- [ ] No modifications to `.github/`, `.claude/`, `docs/blog/` + +## Constraints + +- **Stdlib only** — no new third-party imports +- **Branch**: `feat/gemini/governance-beyond-artifacts` (already checked out) +- **Bus identity**: `gemini` (no parentheticals) +- **GEMINI_SESSION=true** — scope gate enforces this +- Do NOT modify `docs/CERTIFICATION_REPORT.md`, `docs/blog/`, `.github/` + +--- + +*This packet supersedes the dispatch spec at `docs/GEMINI_HANDOFF.md` for current session state. +GEMINI_HANDOFF.md remains authoritative for Sprint 1+2 requirements.* diff --git a/src/arbiter/__main__.py b/src/arbiter/__main__.py index c1ccc89..c9f1764 100644 --- a/src/arbiter/__main__.py +++ b/src/arbiter/__main__.py @@ -423,6 +423,7 @@ def cmd_certify(args: argparse.Namespace) -> None: from arbiter.certify import certify from arbiter.governance_score import score_governance from arbiter.dep_score import find_and_score_deps + from arbiter.git_vitality import score_git_vitality from arbiter.audit_trail import AuditTrail repo_path = Path(args.repo).resolve() @@ -437,8 +438,9 @@ def cmd_certify(args: argparse.Namespace) -> None: code_score = score_findings(findings, loc) gov_report = score_governance(repo_path) dep_report = find_and_score_deps(repo_path) + vit_report = score_git_vitality(repo_path) - result = certify(code_score, gov_report, dep_report, findings) + result = certify(code_score, gov_report, dep_report, findings, vitality_report=vit_report) if args.json: print(json.dumps({ @@ -447,6 +449,7 @@ def cmd_certify(args: argparse.Namespace) -> None: "code_score": result.code_score, "governance_score": result.governance_score, "dep_score": result.dep_score, + "vitality_score": result.vitality_score, "findings": result.findings_count, "reasons": result.reasons, }, indent=2)) @@ -459,7 +462,12 @@ def cmd_certify(args: argparse.Namespace) -> None: trail.append( repo=repo_path.name, score=result.overall, grade=result.decision, findings=result.findings_count, loc=loc, - dimensions={"code": result.code_score, "governance": result.governance_score, "dependencies": result.dep_score}, + dimensions={ + "code": result.code_score, + "governance": result.governance_score, + "dependencies": result.dep_score, + "vitality": result.vitality_score, + }, ) print(f"Recorded in audit trail: {args.trail}", file=sys.stderr) @@ -679,12 +687,22 @@ def _audit_single_repo( noise_threshold: int | None = None, ) -> int: """Analyze a single repo during fleet audit. Returns gap count if auto-detect ran.""" + from arbiter.governance_score import score_governance + from arbiter.git_vitality import score_git_vitality + repo_name = repo_path.name findings = _run_analysis(repo_path, analyzers) findings = _apply_noise_filter(findings, noise_threshold) loc = count_loc(repo_path) score = score_findings(findings, loc) - store.record_snapshot(score, loc, repo_name=repo_name) + gov_report = score_governance(repo_path) + vit_report = score_git_vitality(repo_path) + + gov_score = gov_report.score + vit_score = vit_report.score if vit_report else None + + store.record_snapshot(score, loc, repo_name=repo_name, + governance_score=gov_score, vitality_score=vit_score) _update_file_quality(findings, store, "analysis", repo_name) commits = walk_commits(repo_path, max_count=max_commits, registry=registry) @@ -693,6 +711,8 @@ def _audit_single_repo( commit.hash, commit.timestamp, commit.agent, commit.files_changed, commit.loc_added, commit.loc_removed, score, repo_name=repo_name, + governance_score=gov_score, + vitality_score=vit_score ) gap_count = 0 @@ -711,8 +731,9 @@ def _audit_single_repo( gap_note = f" | gaps={gap_count}" if audit_run_id else "" score_str = f"{score.overall} ({score.grade})" if score.is_scorable else f"n/a ({score.grade})" + vit_str = f"{vit_score}" if vit_score is not None else "n/a" print( - f" Score: {score_str} | {score.total_findings} findings | {loc:,} LOC{gap_note}", + f" Score: {score_str} | Gov: {gov_score} | Vit: {vit_str} | {score.total_findings} findings | {loc:,} LOC{gap_note}", file=sys.stderr, ) return gap_count @@ -788,18 +809,22 @@ def _grade_score(score: float, total_loc: int = 1) -> str: def _print_fleet_report(report: list[dict]) -> None: """Format and print the fleet report.""" - print(f"\n{'Repo':30s} {'Score':>6s} {'Grade':>6s} {'Findings':>9s} {'LOC':>8s}") - print("-" * 63) + print(f"\n{'Repo':31s} {'Score':>6s} {'Gov':>6s} {'Vit':>6s} {'Grade':>6s} {'Findings':>9s} {'LOC':>8s}") + print("-" * 75) grades = {"A": 0, "B": 0, "C": 0, "D": 0, "F": 0, "N/A": 0} for r in report: score = r.get("overall_score", 0) or 0 + gov = r.get("governance_score", 0) or 0 + vit = r.get("vitality_score", 0) or 0 loc = r.get("total_loc", 0) or 0 findings = r.get("total_findings", 0) or 0 g = _grade_score(score, loc) grades[g] += 1 - name = (r.get("repo_name") or "?")[:30] + name = (r.get("repo_name") or "?")[:31] score_cell = f"{score:6.1f}" if g != "N/A" else f"{'n/a':>6s}" - print(f"{name:30s} {score_cell} {g:>6s} {findings:9d} {loc:8,}") + gov_cell = f"{gov:6.1f}" if gov is not None else f"{'n/a':>6s}" + vit_cell = f"{vit:6.1f}" if vit is not None else f"{'n/a':>6s}" + print(f"{name:31s} {score_cell} {gov_cell} {vit_cell} {g:>6s} {findings:9d} {loc:8,}") print(f"\nFleet: {len(report)} repos | ", end="") print(" | ".join(f"{g}:{c}" for g, c in sorted(grades.items()) if c > 0)) diff --git a/src/arbiter/certify.py b/src/arbiter/certify.py index 70d043d..049aecd 100644 --- a/src/arbiter/certify.py +++ b/src/arbiter/certify.py @@ -13,6 +13,7 @@ from arbiter.analyzers.base import Finding from arbiter.scoring import RepoScore +from arbiter.git_vitality import GitVitalityReport @dataclass @@ -23,6 +24,7 @@ class CertificationResult: code_score: float governance_score: float dep_score: float + vitality_score: float | None overall: float findings_count: int governance_checks_passed: int @@ -40,10 +42,15 @@ def summary(self) -> str: f" Code Quality: {self.code_score:5.1f} / 100", f" Governance: {self.governance_score:5.1f} / 100 ({self.governance_checks_passed}/{self.governance_checks_total} checks)", f" Dependencies: {self.dep_score:5.1f} / 100", + ] + if self.vitality_score is not None: + lines.append(f" Git Vitality: {self.vitality_score:5.1f} / 100") + + lines.extend([ f" Overall: {self.overall:5.1f} / 100", f" Findings: {self.findings_count}", "", - ] + ]) if self.reasons: lines.append("Reasons:") for r in self.reasons: @@ -63,22 +70,31 @@ def certify( governance_report, # GovernanceReport dep_report, # DepReport findings: list[Finding], + vitality_report: GitVitalityReport | None = None, ) -> CertificationResult: """Run full certification assessment. Weights: code quality 50%, governance 30%, dependencies 20%. + When vitality is available: Code (45%) + Governance (25%) + Deps (15%) + Vitality (15%). When code is unscorable (no Python LOC, analyzers not installed), reweight to governance 60% + dependencies 40% instead of penalizing. """ code = code_score.overall if code_score.is_scorable else None gov = governance_report.score deps = dep_report.score + vit = vitality_report.score if vitality_report else None if code is not None: - overall = code * 0.50 + gov * 0.30 + deps * 0.20 + if vit is not None: + overall = code * 0.45 + gov * 0.25 + deps * 0.15 + vit * 0.15 + else: + overall = code * 0.50 + gov * 0.30 + deps * 0.20 else: # Reweight: skip code dimension entirely - overall = gov * 0.60 + deps * 0.40 + if vit is not None: + overall = gov * 0.50 + deps * 0.25 + vit * 0.25 + else: + overall = gov * 0.60 + deps * 0.40 code = 0 # for display purposes overall = round(overall, 1) @@ -128,6 +144,7 @@ def certify( code_score=code, governance_score=gov, dep_score=deps, + vitality_score=vit, overall=overall, findings_count=len(findings), governance_checks_passed=gov_passed, diff --git a/src/arbiter/git_vitality.py b/src/arbiter/git_vitality.py new file mode 100644 index 0000000..bf109fb --- /dev/null +++ b/src/arbiter/git_vitality.py @@ -0,0 +1,144 @@ +"""Git Vitality Scorer — extract governance signals from commit history. + +Analyzes bus factor, commit recency, release cadence, and DCO enforcement. +Uses git_historian for commit walking. Stdlib only. +""" + +from __future__ import annotations + +import subprocess +import re +from dataclasses import dataclass, field +from datetime import datetime, timezone +from pathlib import Path + +from arbiter.git_historian import walk_commits + +@dataclass +class GitVitalityReport: + """Detailed vitality report based on git history.""" + bus_factor: int # unique committers, 90 days + days_since_commit: int + release_count: int + signed_commit_ratio: float # 0.0–1.0 + score: float # 0–100 + details: list[str] = field(default_factory=list) + +def score_git_vitality(repo_path: Path) -> GitVitalityReport | None: + """Analyze git history for governance vitality signals.""" + repo_path = repo_path.resolve() + + # Check if git repo exists + if not (repo_path / ".git").exists(): + return None + + # Get commits for last 90 days for bus factor + # For simplicity in this stdlib tool, we'll just get the last 500 commits + # and filter by date. + commits = walk_commits(repo_path, max_count=500) + if not commits: + return None + + now = datetime.now(timezone.utc) + + # 1. Bus Factor (unique committers in last 90 days) + ninety_days_ago = now.timestamp() - (90 * 24 * 3600) + recent_committers = set() + for c in commits: + try: + # git_historian provides ISO 8601 timestamp + c_time = datetime.fromisoformat(c.timestamp).timestamp() + if c_time > ninety_days_ago: + recent_committers.add(c.author_email) + except (ValueError, TypeError): + continue + + bus_factor = len(recent_committers) + bus_score = 0.0 + if bus_factor >= 4: + bus_score = 25.0 + elif bus_factor >= 2: + bus_score = 15.0 + elif bus_factor == 1: + bus_score = 5.0 + + details = [f"Bus factor: {bus_factor} unique committers in 90 days"] + + # 2. Commit Recency + try: + last_commit_time = datetime.fromisoformat(commits[0].timestamp) + days_since = (now - last_commit_time).days + except (ValueError, IndexError): + days_since = 999 + + recency_score = 0.0 + if days_since <= 30: + recency_score = 25.0 + elif days_since <= 90: + recency_score = 15.0 + elif days_since <= 180: + recency_score = 8.0 + + details.append(f"Last commit: {days_since} days ago") + + # 3. Release Cadence + release_count = 0 + has_semver = False + try: + result = subprocess.run( + ["git", "-C", str(repo_path), "tag", "--sort=-creatordate"], + capture_output=True, text=True, timeout=10 + ) + if result.returncode == 0: + tags = result.stdout.strip().split("\n") + tags = [t for t in tags if t] + release_count = len(tags) + # Check for SemVer (v1.2.3 or 1.2.3) + semver_pattern = re.compile(r"^v?\d+\.\d+\.\d+") + for t in tags[:10]: # Check last 10 tags + if semver_pattern.match(t): + has_semver = True + break + except (subprocess.SubprocessError, FileNotFoundError): + pass + + release_score = 0.0 + if release_count >= 3: + release_score = 20.0 + elif release_count >= 1: + release_score = 10.0 + + if has_semver: + release_score += 5.0 + details.append(f"Releases: {release_count} tags found (SemVer detected)") + else: + details.append(f"Releases: {release_count} tags found") + + # 4. Signed Commit Ratio (DCO) + signed_count = 0 + total_checked = min(len(commits), 100) # Check last 100 commits + for i in range(total_checked): + if "signed-off-by:" in commits[i].message.lower(): + signed_count += 1 + + signed_ratio = signed_count / total_checked if total_checked > 0 else 0.0 + signed_score = 0.0 + if signed_ratio > 0.75: + signed_score = 25.0 + elif signed_ratio >= 0.25: + signed_score = 15.0 + elif signed_ratio > 0: + signed_score = 5.0 + + details.append(f"DCO: {int(signed_ratio*100)}% of recent commits signed") + + total_score = bus_score + recency_score + release_score + signed_score + + return GitVitalityReport( + bus_factor=bus_factor, + days_since_commit=days_since, + release_count=release_count, + signed_commit_ratio=round(signed_ratio, 2), + score=total_score, + details=details + ) diff --git a/src/arbiter/governance_quality.py b/src/arbiter/governance_quality.py new file mode 100644 index 0000000..4bdd411 --- /dev/null +++ b/src/arbiter/governance_quality.py @@ -0,0 +1,183 @@ +"""Governance Quality Scorer — evaluate the *content* of governance files. + +Analyzes SECURITY.md, CONTRIBUTING.md, CI workflows, and README for depth +and practical governance details. Stdlib only. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass, field +from pathlib import Path + +@dataclass +class GovernanceQualityReport: + """Detailed quality report for repository governance files.""" + security_score: float # 0-15 + contributing_score: float # 0-15 + ci_quality_score: float # 0-15 + readme_score: float # 0-10 + total: float # 0-100 (normalized from 55 max) + details: list[str] = field(default_factory=list) + +def score_governance_quality(repo_path: Path) -> GovernanceQualityReport: + """Analyze the content quality of governance files in a repository.""" + repo_path = repo_path.resolve() + details = [] + + security_score = _score_security(repo_path, details) + contributing_score = _score_contributing(repo_path, details) + ci_score = _score_ci(repo_path, details) + readme_score = _score_readme(repo_path, details) + + raw_total = security_score + contributing_score + ci_score + readme_score + # Max possible raw_total = 15 + 15 + 15 + 10 = 55 + normalized_total = (raw_total / 55.0) * 100.0 if raw_total > 0 else 0.0 + + return GovernanceQualityReport( + security_score=security_score, + contributing_score=contributing_score, + ci_quality_score=ci_score, + readme_score=readme_score, + total=round(normalized_total, 1), + details=details + ) + +def _score_security(repo_path: Path, details: list[str]) -> float: + score = 0.0 + content = "" + for name in ("SECURITY.md", "SECURITY.rst", "SECURITY.txt", ".github/SECURITY.md"): + p = repo_path / name + if p.is_file(): + content = p.read_text(errors="replace").lower() + break + + if not content: + return 0.0 + + # Contact method: email, URL, or "contact" + if re.search(r"[\w\.-]+@[\w\.-]+\.\w+|https?://|contact", content): + score += 5 + details.append("Contact method found in SECURITY.md") + + # Response timeline + if re.search(r"within \d+ (hours|days|business days)|response time", content): + score += 5 + details.append("Response timeline mentioned in SECURITY.md") + + # Disclosure process + if re.search(r"disclosure|vulnerability|private|cve|process", content): + score += 5 + details.append("Disclosure process described in SECURITY.md") + + return score + +def _score_contributing(repo_path: Path, details: list[str]) -> float: + score = 0.0 + content = "" + for name in ("CONTRIBUTING.md", "CONTRIBUTING.rst", "CONTRIBUTING.txt", ".github/CONTRIBUTING.md"): + p = repo_path / name + if p.is_file(): + content = p.read_text(errors="replace").lower() + break + + if not content: + return 0.0 + + # Test instructions + if re.search(r"test|pytest|nose|tox|unittest|run tests", content): + score += 5 + details.append("Test instructions found in CONTRIBUTING.md") + + # PR process + if re.search(r"pull request| pr |review|submit|branch", content): + score += 5 + details.append("PR process described in CONTRIBUTING.md") + + # Code style + if re.search(r"style|lint|format|black|ruff|flake8|pep 8", content): + score += 5 + details.append("Code style mentioned in CONTRIBUTING.md") + + return score + +def _score_ci(repo_path: Path, details: list[str]) -> float: + score = 0.0 + workflow_dir = repo_path / ".github" / "workflows" + + workflows = [] + if workflow_dir.is_dir(): + workflows = list(workflow_dir.glob("*.yml")) + list(workflow_dir.glob("*.yaml")) + + if not workflows: + # Check for other CI files briefly + for f in (".circleci/config.yml", ".gitlab-ci.yml", ".travis.yml"): + if (repo_path / f).is_file(): + workflows.append(repo_path / f) + break + + if not workflows: + return 0.0 + + combined_content = "" + for wf in workflows: + try: + combined_content += wf.read_text(errors="replace").lower() + "\n" + except OSError: + continue + + # Runs on PR + if "pull_request:" in combined_content or "on: [pull_request]" in combined_content or "on: pull_request" in combined_content: + score += 5 + details.append("CI runs on PR") + + # More than one job + # Simple count of 'jobs:' and then look for child keys. + # We can count keys at indent 2 under jobs: + jobs_match = re.search(r"^jobs:\s*(.*)", combined_content, re.MULTILINE | re.DOTALL) + if jobs_match: + # Heuristic: count lines that start with 2 spaces then a word and then colon + # but this is tricky with multiple files. + # Simpler: count 'runs-on:' which usually indicates a job. + runs_on_count = combined_content.count("runs-on:") + if runs_on_count > 1: + score += 5 + details.append("Multiple CI jobs found") + + # References coverage or test command + if re.search(r"test|pytest|coverage|cov|tox", combined_content): + score += 5 + details.append("Coverage/Test command referenced in CI") + + return score + +def _score_readme(repo_path: Path, details: list[str]) -> float: + score = 0.0 + content = "" + for name in ("README.md", "README.rst", "README.txt", "README"): + p = repo_path / name + if p.is_file(): + content = p.read_text(errors="replace") + break + + if not content: + return 0.0 + + # Length > 500 (already checked existence in governance_score.py, but we check quality here) + if len(content) > 500: + # score += 0 # base, as per handoff + pass + + lower_content = content.lower() + + # Installation instructions + if re.search(r"pip install|npm install|cargo add|yarn add|docker pull|go get", lower_content): + score += 5 + details.append("Installation instructions found in README") + + # Usage example or code block + if "```" in content or "::" in content: # :: is for RST + score += 5 + details.append("Usage example or code block found in README") + + return score diff --git a/src/arbiter/governance_score.py b/src/arbiter/governance_score.py index db66b9d..73200b6 100644 --- a/src/arbiter/governance_score.py +++ b/src/arbiter/governance_score.py @@ -13,6 +13,8 @@ from dataclasses import dataclass, field from pathlib import Path +from arbiter.governance_quality import score_governance_quality, GovernanceQualityReport + @dataclass(frozen=True, slots=True) class GovernanceCheck: @@ -28,6 +30,7 @@ class GovernanceReport: """Aggregate governance score for a repository.""" score: float # 0-100 checks: list[GovernanceCheck] = field(default_factory=list) + quality: GovernanceQualityReport | None = None # License type detection from first line content @@ -204,9 +207,10 @@ def _check_ci_cd(repo_path: Path) -> GovernanceCheck: def score_governance(repo_path: Path) -> GovernanceReport: - """Score a repo's governance artifacts. + """Score a repo's governance artifacts and quality. - Returns a GovernanceReport with a score from 0-100 and individual check results. + Returns a GovernanceReport with a score from 0-100. + Blends artifact presence (50%) and content quality (50%). """ repo_path = repo_path.resolve() @@ -223,5 +227,16 @@ def score_governance(repo_path: Path) -> GovernanceReport: _check_ci_cd(repo_path), ] - total = sum(c.weight for c in checks if c.present) - return GovernanceReport(score=total, checks=checks) + artifact_score = sum(c.weight for c in checks if c.present) + + # New quality scoring + quality_report = score_governance_quality(repo_path) + + # Blending 50/50 + blended_score = (artifact_score * 0.5) + (quality_report.total * 0.5) + + return GovernanceReport( + score=round(blended_score, 1), + checks=checks, + quality=quality_report + ) diff --git a/src/arbiter/store.py b/src/arbiter/store.py index 379d5fe..3dc0d07 100644 --- a/src/arbiter/store.py +++ b/src/arbiter/store.py @@ -58,6 +58,8 @@ def _init_db(self) -> None: lint_score REAL, security_score REAL, complexity_score REAL, + governance_score REAL, + vitality_score REAL, total_findings INTEGER DEFAULT 0, findings_json TEXT, created_at TEXT NOT NULL @@ -80,6 +82,8 @@ def _init_db(self) -> None: lint_score REAL, security_score REAL, complexity_score REAL, + governance_score REAL, + vitality_score REAL, total_findings INTEGER, total_loc INTEGER, findings_json TEXT @@ -123,6 +127,8 @@ def record_commit( loc_removed: int, score: RepoScore, repo_name: str = "", + governance_score: float | None = None, + vitality_score: float | None = None, ) -> None: """Record quality data for a single commit.""" now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") @@ -131,24 +137,30 @@ def record_commit( """INSERT OR REPLACE INTO commit_quality (commit_hash, repo_name, timestamp, agent, files_changed, loc_added, loc_removed, overall_score, lint_score, security_score, complexity_score, + governance_score, vitality_score, total_findings, findings_json, created_at) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", (commit_hash, repo_name, timestamp, agent, files_changed, loc_added, loc_removed, score.overall, score.lint_score, score.security_score, score.complexity_score, + governance_score, vitality_score, score.total_findings, json.dumps(score.findings_by_severity), now), ) - def record_snapshot(self, score: RepoScore, total_loc: int, repo_name: str = "") -> None: + def record_snapshot(self, score: RepoScore, total_loc: int, repo_name: str = "", + governance_score: float | None = None, + vitality_score: float | None = None) -> None: """Record a point-in-time repo quality snapshot.""" now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") with self._conn() as conn: conn.execute( """INSERT INTO repo_snapshots (repo_name, timestamp, overall_score, lint_score, security_score, complexity_score, + governance_score, vitality_score, total_findings, total_loc, findings_json) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""", + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", (repo_name, now, score.overall, score.lint_score, score.security_score, - score.complexity_score, score.total_findings, total_loc, + score.complexity_score, governance_score, vitality_score, + score.total_findings, total_loc, json.dumps(score.findings_by_tool)), ) @@ -393,6 +405,8 @@ def get_fleet_report(self) -> list[dict[str, Any]]: lint_score, security_score, complexity_score, + governance_score, + vitality_score, total_findings, total_loc, timestamp diff --git a/tests/test_certify.py b/tests/test_certify.py index f231486..2ece608 100644 --- a/tests/test_certify.py +++ b/tests/test_certify.py @@ -70,6 +70,14 @@ def test_non_scorable_code_reweights(self): assert result.overall >= 80 assert result.decision == "CERTIFIED" # governance + deps are strong enough + def test_vitality_weighted(self): + from arbiter.git_vitality import GitVitalityReport + vit = GitVitalityReport(bus_factor=5, days_since_commit=5, release_count=5, signed_commit_ratio=1.0, score=100.0) + result = certify(_score(80), _FakeGovReport(80), _FakeDepReport(80), [], vitality_report=vit) + # 80*0.45 + 80*0.25 + 80*0.15 + 100*0.15 = 36 + 20 + 12 + 15 = 83 + assert abs(result.overall - 83.0) < 0.2 + assert result.vitality_score == 100.0 + def test_non_scorable_notes_reason(self): unscorable = RepoScore(overall=0, lint_score=0, security_score=0, complexity_score=0, total_findings=0, is_scorable=False) diff --git a/tests/test_git_vitality.py b/tests/test_git_vitality.py new file mode 100644 index 0000000..dad04c7 --- /dev/null +++ b/tests/test_git_vitality.py @@ -0,0 +1,73 @@ +import pytest +from unittest.mock import MagicMock, patch +from pathlib import Path +from arbiter.git_vitality import score_git_vitality, GitVitalityReport +from arbiter.git_historian import CommitInfo + +@pytest.fixture +def mock_commits(): + return [ + CommitInfo( + hash="h1", author_name="Alice", author_email="alice@example.com", + timestamp="2026-04-10T10:00:00Z", message="feat: something\nSigned-off-by: Alice", + files_changed=1, loc_added=10, loc_removed=2, agent="gemini" + ), + CommitInfo( + hash="h2", author_name="Bob", author_email="bob@example.com", + timestamp="2026-04-01T10:00:00Z", message="fix: bug", + files_changed=1, loc_added=5, loc_removed=1, agent="human" + ) + ] + +def test_single_committer_scores_low_bus_factor(tmp_path, mock_commits): + (tmp_path / ".git").mkdir() + # Only one committer in the list + single_committer = [mock_commits[0]] + with patch("arbiter.git_vitality.walk_commits", return_value=single_committer): + with patch("subprocess.run") as mock_run: + mock_run.return_value.returncode = 0 + mock_run.return_value.stdout = "" + report = score_git_vitality(tmp_path) + assert report.bus_factor == 1 + assert report.score < 100 + +def test_recent_commit_scores_max_recency(tmp_path, mock_commits): + (tmp_path / ".git").mkdir() + # Today is 2026-04-19 (from session context) + # mock_commits[0] is from 2026-04-10 (9 days ago) + with patch("arbiter.git_vitality.walk_commits", return_value=mock_commits): + with patch("subprocess.run") as mock_run: + mock_run.return_value.returncode = 0 + mock_run.return_value.stdout = "" + report = score_git_vitality(tmp_path) + assert report.days_since_commit <= 30 + # 25 points for recency + +def test_semver_tags_detected(tmp_path, mock_commits): + (tmp_path / ".git").mkdir() + with patch("arbiter.git_vitality.walk_commits", return_value=mock_commits): + with patch("subprocess.run") as mock_run: + mock_run.return_value.returncode = 0 + mock_run.return_value.stdout = "v1.0.0\nv0.9.0\nv0.8.0\n" + report = score_git_vitality(tmp_path) + assert report.release_count >= 3 + assert any("SemVer" in d for d in report.details) + +def test_signed_commit_ratio_computed(tmp_path, mock_commits): + (tmp_path / ".git").mkdir() + # 1 out of 2 is signed + with patch("arbiter.git_vitality.walk_commits", return_value=mock_commits): + with patch("subprocess.run") as mock_run: + mock_run.return_value.returncode = 0 + mock_run.return_value.stdout = "" + report = score_git_vitality(tmp_path) + assert report.signed_commit_ratio == 0.5 + +def test_shallow_clone_degrades_gracefully(tmp_path): + (tmp_path / ".git").mkdir() + with patch("arbiter.git_vitality.walk_commits", return_value=[]): + with patch("subprocess.run") as mock_run: + # Simulate no tags and no commits + mock_run.return_value.returncode = 1 + report = score_git_vitality(tmp_path) + assert report is None diff --git a/tests/test_governance_quality.py b/tests/test_governance_quality.py new file mode 100644 index 0000000..3b100da --- /dev/null +++ b/tests/test_governance_quality.py @@ -0,0 +1,76 @@ +import pytest +from pathlib import Path +from arbiter.governance_quality import score_governance_quality, GovernanceQualityReport + +def test_security_md_with_contact_scores_higher_than_empty(tmp_path): + # Empty SECURITY.md + (tmp_path / "SECURITY.md").write_text("") + report_empty = score_governance_quality(tmp_path) + + # SECURITY.md with contact + (tmp_path / "SECURITY.md").write_text("Contact: security@example.com") + report_contact = score_governance_quality(tmp_path) + + assert report_contact.security_score > report_empty.security_score + assert any("Contact method found" in d for d in report_contact.details) + +def test_contributing_md_with_test_instructions_gets_full_marks(tmp_path): + content = """ + # Contributing + ## Running Tests + Use `pytest` to run tests. + ## PR Process + Create a branch and submit a PR. + ## Style + Follow PEP 8. + """ + (tmp_path / "CONTRIBUTING.md").write_text(content) + report = score_governance_quality(tmp_path) + + assert report.contributing_score == 15 + assert any("Test instructions found" in d for d in report.details) + assert any("PR process described" in d for d in report.details) + assert any("Code style mentioned" in d for d in report.details) + +def test_ci_workflow_pr_trigger_detected(tmp_path): + workflow_dir = tmp_path / ".github" / "workflows" + workflow_dir.mkdir(parents=True) + (workflow_dir / "test.yml").write_text(""" +on: + pull_request: + branches: [main] +jobs: + test: + runs-on: ubuntu-latest + steps: + - run: pytest --cov + lint: + runs-on: ubuntu-latest + steps: + - run: ruff check . +""") + report = score_governance_quality(tmp_path) + + assert report.ci_quality_score == 15 + assert any("CI runs on PR" in d for d in report.details) + assert any("Multiple CI jobs found" in d for d in report.details) + assert any("Coverage/Test command referenced" in d for d in report.details) + +def test_missing_files_score_zero_not_error(tmp_path): + report = score_governance_quality(tmp_path) + assert report.security_score == 0 + assert report.contributing_score == 0 + assert report.ci_quality_score == 0 + assert report.readme_score == 0 + assert report.total == 0 + +def test_quality_score_normalized_to_100(tmp_path): + # Mock a "perfect" repo + (tmp_path / "SECURITY.md").write_text("Contact security@example.com. Response within 48 hours. Private disclosure.") + (tmp_path / "CONTRIBUTING.md").write_text("Run pytest. PR process. Use black.") + (tmp_path / ".github" / "workflows").mkdir(parents=True) + (tmp_path / ".github" / "workflows" / "ci.yml").write_text("on: [pull_request]\njobs:\n test:\n runs-on: ubuntu-latest\n steps: [{run: pytest}]\n lint:\n runs-on: ubuntu-latest\n steps: [{run: flake8}]") + (tmp_path / "README.md").write_text("A" * 501 + "\npip install\n```python\nimport x\n```") + + report = score_governance_quality(tmp_path) + assert report.total == 100.0 diff --git a/tests/test_governance_score.py b/tests/test_governance_score.py index 52bc483..e9be3d7 100644 --- a/tests/test_governance_score.py +++ b/tests/test_governance_score.py @@ -26,20 +26,24 @@ def empty_repo(tmp_path: Path) -> Path: @pytest.fixture def full_repo(tmp_path: Path) -> Path: - """Create a repo with all governance artifacts present.""" + """Create a repo with all governance artifacts present and high quality.""" subprocess.run(["git", "init", str(tmp_path)], capture_output=True, check=True) # LICENSE (tmp_path / "LICENSE").write_text("MIT License\n\nCopyright (c) 2026 Test\n" + "x" * 200) # CONTRIBUTING - (tmp_path / "CONTRIBUTING.md").write_text("# Contributing\n\nPlease read this guide.") + (tmp_path / "CONTRIBUTING.md").write_text( + "# Contributing\n\n## PR Process\nSubmit a branch.\n## Running Tests\nUse pytest.\n## Style\nFollow black." + ) # SECURITY - (tmp_path / "SECURITY.md").write_text("# Security Policy\n\nReport vulnerabilities to security@example.com") + (tmp_path / "SECURITY.md").write_text( + "# Security Policy\n\nReport vulnerabilities to security@example.com. Response within 48 hours. Private disclosure process." + ) - # README (>100 chars) - (tmp_path / "README.md").write_text("# Test Project\n\n" + "This is a test project. " * 20) + # README (>500 chars) + (tmp_path / "README.md").write_text("# Test Project\n\n" + "This is a test project with high quality documentation. " * 20 + "\npip install\n```python\nimport test\n```") # CODE_OF_CONDUCT (tmp_path / "CODE_OF_CONDUCT.md").write_text("# Code of Conduct\n\nBe kind.") @@ -62,7 +66,7 @@ def full_repo(tmp_path: Path) -> Path: # CI/CD wf_dir = gh_dir / "workflows" wf_dir.mkdir() - (wf_dir / "ci.yml").write_text("name: CI\non: push\njobs:\n test:\n runs-on: ubuntu-latest\n") + (wf_dir / "ci.yml").write_text("name: CI\non:\n pull_request:\n branches: [main]\njobs:\n test:\n runs-on: ubuntu-latest\n lint:\n runs-on: ubuntu-latest\n steps: [{run: pytest}]\n") return tmp_path @@ -115,7 +119,9 @@ def test_partial_governance(empty_repo: Path) -> None: (empty_repo / "README.md").write_text("# Project\n\n" + "Description goes here. " * 20) report = score_governance(empty_repo) - assert report.score == 30 # 20 (LICENSE) + 10 (README) + # 20 (LICENSE) + 10 (README) = 30 artifacts. Quality = 0. + # Blended = 30 * 0.5 + 0 * 0.5 = 15.0 + assert report.score == 15.0 present = [c for c in report.checks if c.present] missing = [c for c in report.checks if not c.present] diff --git a/tests/test_leaderboard.py b/tests/test_leaderboard.py index 2347318..41e63d2 100644 --- a/tests/test_leaderboard.py +++ b/tests/test_leaderboard.py @@ -22,6 +22,8 @@ def _populate_db(db_path: Path) -> None: lint_score REAL, security_score REAL, complexity_score REAL, + governance_score REAL, + vitality_score REAL, total_findings INTEGER, total_loc INTEGER, findings_json TEXT @@ -40,6 +42,8 @@ def _populate_db(db_path: Path) -> None: lint_score REAL, security_score REAL, complexity_score REAL, + governance_score REAL, + vitality_score REAL, total_findings INTEGER DEFAULT 0, findings_json TEXT, created_at TEXT NOT NULL @@ -71,16 +75,18 @@ def _populate_db(db_path: Path) -> None: ) """) # Insert test data — 3 repos with different scores + # rows: repo_name, timestamp, overall, lint, security, complexity, gov, vitality, findings, loc, json rows = [ - ("pallets/flask", "2026-04-18T12:00:00Z", 96.2, 98.0, 100.0, 90.5, 42, 45000, "{}"), - ("django/django", "2026-04-18T12:00:00Z", 91.5, 95.0, 95.0, 84.5, 575, 513905, "{}"), - ("psf/requests", "2026-04-18T12:00:00Z", 78.3, 85.0, 90.0, 60.0, 120, 12500, "{}"), + ("pallets/flask", "2026-04-18T12:00:00Z", 96.2, 98.0, 100.0, 90.5, 80.0, 100.0, 42, 45000, "{}"), + ("django/django", "2026-04-18T12:00:00Z", 91.5, 95.0, 95.0, 84.5, 90.0, 90.0, 575, 513905, "{}"), + ("psf/requests", "2026-04-18T12:00:00Z", 78.3, 85.0, 90.0, 60.0, 45.0, 50.0, 120, 12500, "{}"), ] conn.executemany( """INSERT INTO repo_snapshots (repo_name, timestamp, overall_score, lint_score, security_score, - complexity_score, total_findings, total_loc, findings_json) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""", + complexity_score, governance_score, vitality_score, + total_findings, total_loc, findings_json) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", rows, ) conn.commit()