From b5231d765a8dd7bbc71bb47d64fbc3dfb184153b Mon Sep 17 00:00:00 2001
From: Greg Jackson <gregario@users.noreply.github.com>
Date: Mon, 30 Mar 2026 01:56:31 +0100
Subject: [PATCH 1/2] feat(review): add PR type triage and non-application
 review categories

Add Step 3.5 to classify PRs by type (application, CI/infra, scripts,
config, docs, tests, mixed) and guide which review categories to
prioritize. Prevents false-confidence N/A results on non-application PRs.

Add three new Pass 2 checklist categories:
- Script & Shell Quality: shell options, shebangs, quoting, portability
- Platform & Convention Consistency: toolchain matching, naming conventions
- Configuration & Infrastructure Safety: hardcoded secrets, CI triggers

Closes #356

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 review/SKILL.md      | 38 ++++++++++++++++++++++++++++++++++++++
 review/SKILL.md.tmpl | 38 ++++++++++++++++++++++++++++++++++++++
 review/checklist.md  | 39 ++++++++++++++++++++++++++++++++++++++-
 3 files changed, 114 insertions(+), 1 deletion(-)
diff --git a/review/SKILL.md b/review/SKILL.md
index 52560d774..305e852d9 100644
--- a/review/SKILL.md
+++ b/review/SKILL.md
@@ -591,6 +591,44 @@ Run `git diff origin/<base>` to get the full diff. This includes both committed
 
 ---
 
+## Step 3.5: Classify PR type
+
+Before applying the checklist, classify the PR based on the files changed in the diff stat:
+
+1. Count files by category from the `git diff --stat` output:
+   - **Application code:** `.ts`, `.js`, `.py`, `.rb`, `.go`, `.rs`, `.java`, `.tsx`, `.jsx`, `.vue`, `.svelte` (source files in `src/`, `lib/`, `app/`, or similar)
+   - **CI/Infra:** `.yml`, `.yaml` in `.github/`, `.circleci/`, `.gitlab-ci/`; `Dockerfile*`, `docker-compose*`, `Makefile`, `Justfile`
+   - **Scripts:** `.sh`, `.bash`, `.zsh`, `.bat`, `.ps1`, `.cmd` files; files in `bin/`, `scripts/` directories
+   - **Config:** `.json`, `.yaml`, `.yml`, `.toml`, `.ini`, `.env*` config files (not in CI dirs)
+   - **Docs:** `.md`, `.txt`, `.rst` files
+   - **Tests:** files in `test/`, `tests/`, `spec/`, `__tests__/` directories; files matching `*.test.*`, `*.spec.*`
+
+2. Determine the **primary PR type** based on which category has the most changed files:
+   - `APPLICATION` — majority application code
+   - `CI_INFRA` — majority CI/infra files
+   - `SCRIPTS` — majority scripts
+   - `CONFIG` — majority config files
+   - `DOCS` — majority documentation
+   - `TESTS` — majority test files
+   - `MIXED` — no single category >50% of changed files
+
+3. Output (before the two-pass review):
+   ```
+   PR Type: [TYPE] ([N] app, [N] ci, [N] scripts, [N] config, [N] docs, [N] tests)
+   ```
+
+4. **Category relevance guide:**
+   - `APPLICATION` or `MIXED`: Run ALL checklist categories (current behavior)
+   - `CI_INFRA`: Prioritize Distribution & CI/CD Pipeline, Configuration & Infrastructure Safety, Script & Shell Quality. Skip SQL, LLM, View/Frontend, Type Coercion categories.
+   - `SCRIPTS`: Prioritize Script & Shell Quality, Platform & Convention Consistency, Shell Injection. Skip SQL, LLM, View/Frontend categories.
+   - `CONFIG`: Prioritize Configuration & Infrastructure Safety, Platform & Convention Consistency. Skip most application categories.
+   - `DOCS`: Check Dead Code & Consistency (stale docs), Platform & Convention Consistency (naming). Skip all code categories.
+   - `TESTS`: Run Test Gaps, Completeness Gaps, and application categories relevant to what's being tested.
+
+   **Important:** "Skip" means "don't spend time on categories that will obviously return N/A." If a skipped category is unexpectedly relevant (e.g., a CI file that also contains SQL), still flag it. Use judgment — the classification is a guide, not a hard rule.
+
+---
+
 ## Prior Learnings
 
 Search for relevant learnings from previous sessions:
diff --git a/review/SKILL.md.tmpl b/review/SKILL.md.tmpl
index fa14f26a7..8618f2e03 100644
--- a/review/SKILL.md.tmpl
+++ b/review/SKILL.md.tmpl
@@ -104,6 +104,44 @@ Run `git diff origin/<base>` to get the full diff. This includes both committed
 
 ---
 
+## Step 3.5: Classify PR type
+
+Before applying the checklist, classify the PR based on the files changed in the diff stat:
+
+1. Count files by category from the `git diff --stat` output:
+   - **Application code:** `.ts`, `.js`, `.py`, `.rb`, `.go`, `.rs`, `.java`, `.tsx`, `.jsx`, `.vue`, `.svelte` (source files in `src/`, `lib/`, `app/`, or similar)
+   - **CI/Infra:** `.yml`, `.yaml` in `.github/`, `.circleci/`, `.gitlab-ci/`; `Dockerfile*`, `docker-compose*`, `Makefile`, `Justfile`
+   - **Scripts:** `.sh`, `.bash`, `.zsh`, `.bat`, `.ps1`, `.cmd` files; files in `bin/`, `scripts/` directories
+   - **Config:** `.json`, `.yaml`, `.yml`, `.toml`, `.ini`, `.env*` config files (not in CI dirs)
+   - **Docs:** `.md`, `.txt`, `.rst` files
+   - **Tests:** files in `test/`, `tests/`, `spec/`, `__tests__/` directories; files matching `*.test.*`, `*.spec.*`
+
+2. Determine the **primary PR type** based on which category has the most changed files:
+   - `APPLICATION` — majority application code
+   - `CI_INFRA` — majority CI/infra files
+   - `SCRIPTS` — majority scripts
+   - `CONFIG` — majority config files
+   - `DOCS` — majority documentation
+   - `TESTS` — majority test files
+   - `MIXED` — no single category >50% of changed files
+
+3. Output (before the two-pass review):
+   ```
+   PR Type: [TYPE] ([N] app, [N] ci, [N] scripts, [N] config, [N] docs, [N] tests)
+   ```
+
+4. **Category relevance guide:**
+   - `APPLICATION` or `MIXED`: Run ALL checklist categories (current behavior)
+   - `CI_INFRA`: Prioritize Distribution & CI/CD Pipeline, Configuration & Infrastructure Safety, Script & Shell Quality. Skip SQL, LLM, View/Frontend, Type Coercion categories.
+   - `SCRIPTS`: Prioritize Script & Shell Quality, Platform & Convention Consistency, Shell Injection. Skip SQL, LLM, View/Frontend categories.
+   - `CONFIG`: Prioritize Configuration & Infrastructure Safety, Platform & Convention Consistency. Skip most application categories.
+   - `DOCS`: Check Dead Code & Consistency (stale docs), Platform & Convention Consistency (naming). Skip all code categories.
+   - `TESTS`: Run Test Gaps, Completeness Gaps, and application categories relevant to what's being tested.
+
+   **Important:** "Skip" means "don't spend time on categories that will obviously return N/A." If a skipped category is unexpectedly relevant (e.g., a CI file that also contains SQL), still flag it. Use judgment — the classification is a guide, not a hard rule.
+
+---
+
 {{LEARNINGS_SEARCH}}
 
 ## Step 4: Two-pass review
diff --git a/review/checklist.md b/review/checklist.md
index cfedcf81f..93da21953 100644
--- a/review/checklist.md
+++ b/review/checklist.md
@@ -154,6 +154,40 @@ To do this: use Grep to find all references to the sibling values (e.g., grep fo
 - Internal tools not distributed outside the team
 - Test-only CI changes (adding test steps, not publish steps)
 
+#### Script & Shell Quality
+- Shell scripts without `set -euo pipefail` (or individual `set -e`, `set -u`) — failures silently ignored
+- Missing or incorrect shebang lines (`#!/usr/bin/env bash` preferred over `#!/bin/bash` for portability)
+- Unquoted variables in shell scripts (`$VAR` instead of `"$VAR"`) — word splitting and globbing bugs
+- `eval` usage with variable interpolation — injection risk
+- Platform-specific commands without guards (e.g., `sed -i ''` is macOS-only, Linux needs `sed -i`)
+- Scripts missing `chmod +x` or not matching the executable permissions of sibling scripts in the same directory
+
+**DO NOT flag:**
+- Scripts that are explicitly single-platform (documented in comments or filename like `*.bat`)
+- Helper scripts only run via a parent script that already sets shell options
+
+#### Platform & Convention Consistency
+- New files using a different toolchain than existing files in the same directory (e.g., `.bat` script added to a directory of `.sh` scripts, `Makefile` added alongside existing `package.json` scripts)
+- CI workflow targeting an OS not covered by the project's test matrix or existing CI jobs (e.g., Windows step in a Linux-only CI)
+- New config files duplicating settings already managed by an existing config (e.g., adding `.prettierrc` when `package.json` already has prettier config)
+- Naming convention violations: new files not matching the case/separator pattern of sibling files (e.g., `myScript.sh` among `kebab-case.sh` files)
+
+**DO NOT flag:**
+- Cross-platform additions that are clearly intentional (documented in PR or commit message)
+- Files in new directories that establish their own conventions
+
+#### Configuration & Infrastructure Safety
+- Secrets or tokens hardcoded in CI workflows — must use `${{ secrets.X }}` or environment variable references
+- CI workflows with `pull_request_target` trigger without explicit commit SHA pinning — allows arbitrary code execution from forks
+- Docker images pinned by mutable tag (`:latest`, `:main`) instead of SHA digest in CI — supply chain risk
+- Overly broad file permissions in scripts (`chmod 777` or world-writable)
+- Missing `.gitignore` entries for new generated/build artifact directories
+- Environment-specific paths hardcoded (e.g., `/home/runner/`, `/Users/`) instead of using variables
+
+**DO NOT flag:**
+- Development-only Docker compose files using `:latest`
+- Local development scripts with relaxed permissions
+
 ---
 
 ## Severity Classification
@@ -171,7 +205,10 @@ CRITICAL (highest severity):      INFORMATIONAL (lower severity):
                                    ├─ Type Coercion at Boundaries
                                    ├─ View/Frontend
                                    ├─ Performance & Bundle Impact
-                                   └─ Distribution & CI/CD Pipeline
+                                   ├─ Distribution & CI/CD Pipeline
+                                   ├─ Script & Shell Quality
+                                   ├─ Platform & Convention Consistency
+                                   └─ Configuration & Infrastructure Safety
 
 All findings are actioned via Fix-First Review. Severity determines
 presentation order and classification of AUTO-FIX vs ASK — critical

From 63b078f4a0b6624856cad55d2ed7e08d3225e4e7 Mon Sep 17 00:00:00 2001
From: Greg Jackson <gregario@users.noreply.github.com>
Date: Mon, 30 Mar 2026 01:58:11 +0100
Subject: [PATCH 2/2] test(review): add E2E test for CI/infra PR blindspot
 detection
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Plants 3 issues in a CI workflow (pull_request_target without SHA
pinning, hardcoded token, chmod 777 on Windows runner) and verifies
the review catches at least 2 of 3.

Registered as gate tier — CI security issues should always be caught.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 test/fixtures/review-eval-ci-baseline.yml  | 17 +++++
 test/fixtures/review-eval-ci-blindspot.yml | 27 +++++++
 test/helpers/touchfiles.ts                 |  2 +
 test/skill-e2e-review.test.ts              | 85 ++++++++++++++++++++++
 4 files changed, 131 insertions(+)
 create mode 100644 test/fixtures/review-eval-ci-baseline.yml
 create mode 100644 test/fixtures/review-eval-ci-blindspot.yml

diff --git a/test/fixtures/review-eval-ci-baseline.yml b/test/fixtures/review-eval-ci-baseline.yml
new file mode 100644
index 000000000..f19b70303
--- /dev/null
+++ b/test/fixtures/review-eval-ci-baseline.yml
@@ -0,0 +1,17 @@
+name: CI
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: '20'
+      - run: npm ci
+      - run: npm test
diff --git a/test/fixtures/review-eval-ci-blindspot.yml b/test/fixtures/review-eval-ci-blindspot.yml
new file mode 100644
index 000000000..0d28dbc6b
--- /dev/null
+++ b/test/fixtures/review-eval-ci-blindspot.yml
@@ -0,0 +1,27 @@
+name: CI
+on:
+  push:
+    branches: [main]
+  pull_request_target:
+    branches: [main]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: '20'
+      - run: npm ci
+      - run: npm test
+
+  deploy:
+    runs-on: windows-latest
+    needs: test
+    steps:
+      - uses: actions/checkout@v4
+      - run: |
+          TOKEN="ghp_abc123secrettoken"
+          curl -H "Authorization: token $TOKEN" https://api.github.com/repos
+      - run: chmod 777 deploy.sh && ./deploy.sh
diff --git a/test/helpers/touchfiles.ts b/test/helpers/touchfiles.ts
index b475daad7..b4620356d 100644
--- a/test/helpers/touchfiles.ts
+++ b/test/helpers/touchfiles.ts
@@ -58,6 +58,7 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
   'review-enum-completeness': ['review/**', 'test/fixtures/review-eval-enum*.rb'],
   'review-base-branch':       ['review/**'],
   'review-design-lite':       ['review/**', 'test/fixtures/review-eval-design-slop.*'],
+  'review-ci-blindspot':      ['review/**', 'test/fixtures/review-eval-ci-*'],
 
   // Office Hours
   'office-hours-spec-review':  ['office-hours/**', 'scripts/gen-skill-docs.ts'],
@@ -199,6 +200,7 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
   'review-enum-completeness': 'gate',
   'review-base-branch': 'gate',
   'review-design-lite': 'periodic',   // 4/7 threshold is subjective
+  'review-ci-blindspot': 'gate',     // Security guardrail: CI blindspot detection
   'review-coverage-audit': 'gate',
   'review-plan-completion': 'gate',
   'review-dashboard-via': 'gate',
diff --git a/test/skill-e2e-review.test.ts b/test/skill-e2e-review.test.ts
index dacd4b166..e9714cd51 100644
--- a/test/skill-e2e-review.test.ts
+++ b/test/skill-e2e-review.test.ts
@@ -252,6 +252,91 @@ Important: The design checklist should catch issues like blacklisted fonts, smal
   }, 300_000);
 });
 
+// --- Review: CI/infra blindspot E2E ---
+
+describeIfSelected('Review CI blindspot E2E', ['review-ci-blindspot'], () => {
+  let ciDir: string;
+
+  beforeAll(() => {
+    ciDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-ci-blindspot-'));
+
+    const run = (cmd: string, args: string[]) =>
+      spawnSync(cmd, args, { cwd: ciDir, stdio: 'pipe', timeout: 5000 });
+
+    run('git', ['init', '-b', 'main']);
+    run('git', ['config', 'user.email', 'test@test.com']);
+    run('git', ['config', 'user.name', 'Test']);
+
+    // Commit clean CI baseline on main
+    fs.mkdirSync(path.join(ciDir, '.github', 'workflows'), { recursive: true });
+    const baseline = fs.readFileSync(path.join(ROOT, 'test', 'fixtures', 'review-eval-ci-baseline.yml'), 'utf-8');
+    fs.writeFileSync(path.join(ciDir, '.github', 'workflows', 'ci.yml'), baseline);
+
+    // Add a few bash scripts to establish repo conventions
+    fs.mkdirSync(path.join(ciDir, 'bin'), { recursive: true });
+    fs.writeFileSync(path.join(ciDir, 'bin', 'test.sh'), '#!/usr/bin/env bash\nset -euo pipefail\nnpm test\n');
+    fs.writeFileSync(path.join(ciDir, 'bin', 'build.sh'), '#!/usr/bin/env bash\nset -euo pipefail\nnpm run build\n');
+
+    run('git', ['add', '.']);
+    run('git', ['commit', '-m', 'initial CI and scripts']);
+
+    // Feature branch with planted CI issues
+    run('git', ['checkout', '-b', 'feature/update-ci']);
+    const blindspot = fs.readFileSync(path.join(ROOT, 'test', 'fixtures', 'review-eval-ci-blindspot.yml'), 'utf-8');
+    fs.writeFileSync(path.join(ciDir, '.github', 'workflows', 'ci.yml'), blindspot);
+    run('git', ['add', '.']);
+    run('git', ['commit', '-m', 'update CI workflow']);
+
+    // Copy review skill files
+    fs.copyFileSync(path.join(ROOT, 'review', 'SKILL.md'), path.join(ciDir, 'review-SKILL.md'));
+    fs.copyFileSync(path.join(ROOT, 'review', 'checklist.md'), path.join(ciDir, 'review-checklist.md'));
+    fs.copyFileSync(path.join(ROOT, 'review', 'greptile-triage.md'), path.join(ciDir, 'review-greptile-triage.md'));
+  });
+
+  afterAll(() => {
+    try { fs.rmSync(ciDir, { recursive: true, force: true }); } catch {}
+  });
+
+  testConcurrentIfSelected('review-ci-blindspot', async () => {
+    const result = await runSkillTest({
+      prompt: `You are in a git repo on branch feature/update-ci with changes against main.
+Read review-SKILL.md for the review workflow instructions.
+Also read review-checklist.md and apply it.
+Skip the preamble bash block, lake intro, telemetry, and contributor mode sections — go straight to the review.
+Run /review on the current diff (git diff main...HEAD).
+
+The diff modifies a CI workflow file. Pay attention to PR type classification and CI-specific review categories.
+Write your review findings to ${ciDir}/review-output.md`,
+      workingDirectory: ciDir,
+      maxTurns: 20,
+      timeout: 180_000,
+      testName: 'review-ci-blindspot',
+      runId,
+    });
+
+    logCost('/review ci-blindspot', result);
+    recordE2E(evalCollector, '/review CI blindspot', 'Review CI blindspot E2E', result);
+    expect(result.exitReason).toBe('success');
+
+    // Verify the review caught at least 2 of 3 planted issues
+    const reviewPath = path.join(ciDir, 'review-output.md');
+    if (fs.existsSync(reviewPath)) {
+      const review = fs.readFileSync(reviewPath, 'utf-8').toLowerCase();
+      let detected = 0;
+
+      // Issue 1: pull_request_target security risk
+      if (review.includes('pull_request_target') || review.includes('fork') || review.includes('arbitrary code')) detected++;
+      // Issue 2: Hardcoded secret/token
+      if (review.includes('hardcoded') || review.includes('secret') || review.includes('token') || review.includes('ghp_')) detected++;
+      // Issue 3: chmod 777 or Windows runner inconsistency
+      if (review.includes('777') || review.includes('windows') || review.includes('permission') || review.includes('platform')) detected++;
+
+      console.log(`CI blindspot review detected ${detected}/3 planted issues`);
+      expect(detected).toBeGreaterThanOrEqual(2);
+    }
+  }, 210_000);
+});
+
 // --- Base branch detection smoke tests ---
 
 describeIfSelected('Base branch detection', ['review-base-branch', 'ship-base-branch', 'retro-base-branch'], () => {