diff --git a/docs/SCRIPTS.md b/docs/SCRIPTS.md index ac0c40371d..8a7671bd7f 100644 --- a/docs/SCRIPTS.md +++ b/docs/SCRIPTS.md @@ -18,10 +18,39 @@ All scripts live in the project root as `.mjs` modules and are exposed via `npm | `npm run update` | `update-system.mjs apply` | Apply upstream update | | `npm run rollback` | `update-system.mjs rollback` | Rollback last update | | `npm run liveness` | `check-liveness.mjs` | Test if job URLs are still active | +| `npm run evidence` | `evidence-manifest.mjs` | Validate evaluation evidence manifests | | `npm run scan` | `scan.mjs` | Zero-token portal scanner | --- +## evidence + +Validates lightweight evidence manifests beside evaluation reports. A report manifest lives at `reports/{num}-{slug}-{date}.evidence.json`. + +Required fields: + +- `report_number` +- `company` +- `role` +- `source` (`https://...`, `local:jds/...`, or pasted/source label) +- `fetched_at` ISO timestamp +- `source_path` (`Playwright`, `WebFetch`, `WebSearch`, `local`, `pasted`) +- `liveness_result` (`active`, `expired`, `uncertain`, `unverified`, `not_applicable`) +- `jd_text_hash` +- `report_path` +- `pdf_path` string or null + +Missing manifests for legacy reports are warning-only. Invalid manifests fail the command. + +```bash +npm run evidence +node evidence-manifest.mjs --self-test +``` + +**Exit codes:** `0` no invalid manifests, `1` one or more manifest files are invalid. + +--- + ## doctor Validates that all prerequisites are in place: Node.js >= 18, dependencies installed, Playwright chromium, required files (`cv.md`, `config/profile.yml`, `portals.yml`), fonts directory, and auto-creates `data/`, `output/`, `reports/` if missing. diff --git a/evidence-manifest.mjs b/evidence-manifest.mjs new file mode 100644 index 0000000000..488f5462b1 --- /dev/null +++ b/evidence-manifest.mjs @@ -0,0 +1,135 @@ +#!/usr/bin/env node +/** + * evidence-manifest.mjs — validate lightweight evaluation evidence manifests. + */ + +import { existsSync, mkdtempSync, readdirSync, readFileSync, rmSync, writeFileSync } from 'fs'; +import { tmpdir } from 'os'; +import { basename, dirname, join } from 'path'; +import { fileURLToPath, pathToFileURL } from 'url'; + +const ROOT = dirname(fileURLToPath(import.meta.url)); +const REPORTS_DIR = process.env.CAREER_OPS_REPORTS || join(ROOT, 'reports'); +const VALID_SOURCE_PATHS = new Set(['Playwright', 'WebFetch', 'WebSearch', 'local', 'pasted']); +const VALID_LIVENESS = new Set(['active', 'expired', 'uncertain', 'unverified', 'not_applicable']); + +function isObject(value) { + return value && typeof value === 'object' && !Array.isArray(value); +} + +function nonEmptyString(value) { + return typeof value === 'string' && value.trim().length > 0; +} + +function validDateTime(value) { + return nonEmptyString(value) && !Number.isNaN(Date.parse(value)); +} + +export function validateManifest(manifest) { + const errors = []; + const warnings = []; + + if (!isObject(manifest)) return { errors: ['manifest must be a JSON object'], warnings }; + + if (!Number.isInteger(manifest.report_number) || manifest.report_number < 1) { + errors.push('report_number must be a positive integer'); + } + for (const key of ['company', 'role', 'source', 'jd_text_hash', 'report_path']) { + if (!nonEmptyString(manifest[key])) errors.push(`${key} must be a non-empty string`); + } + if (!validDateTime(manifest.fetched_at)) errors.push('fetched_at must be an ISO-compatible timestamp'); + if (!VALID_SOURCE_PATHS.has(manifest.source_path)) { + errors.push(`source_path must be one of: ${Array.from(VALID_SOURCE_PATHS).join(', ')}`); + } + if (!VALID_LIVENESS.has(manifest.liveness_result)) { + errors.push(`liveness_result must be one of: ${Array.from(VALID_LIVENESS).join(', ')}`); + } + if ('pdf_path' in manifest && manifest.pdf_path !== null && typeof manifest.pdf_path !== 'string') { + errors.push('pdf_path must be a string or null'); + } + if (manifest.source.startsWith('local:') && manifest.liveness_result !== 'not_applicable') { + warnings.push('local sources should normally use liveness_result=not_applicable'); + } + return { errors, warnings }; +} + +function reportFiles(dir) { + if (!existsSync(dir)) return []; + return readdirSync(dir) + .filter((file) => /^\d{3}-.+\.md$/.test(file)) + .map((file) => join(dir, file)); +} + +function validateFile(path) { + try { + return validateManifest(JSON.parse(readFileSync(path, 'utf-8'))); + } catch (err) { + return { errors: [`invalid JSON: ${err.message}`], warnings: [] }; + } +} + +export function verifyEvidenceManifests({ reportsDir = REPORTS_DIR } = {}) { + const results = []; + for (const report of reportFiles(reportsDir)) { + const manifest = report.replace(/\.md$/, '.evidence.json'); + if (!existsSync(manifest)) { + results.push({ + report: basename(report), + manifest: basename(manifest), + status: 'warning', + errors: [], + warnings: ['missing evidence manifest (legacy reports are warning-only)'], + }); + continue; + } + const { errors, warnings } = validateFile(manifest); + results.push({ + report: basename(report), + manifest: basename(manifest), + status: errors.length ? 'error' : warnings.length ? 'warning' : 'ok', + errors, + warnings, + }); + } + return results; +} + +function selfTest() { + const dir = mkdtempSync(join(tmpdir(), 'co-evidence-')); + try { + writeFileSync(join(dir, '001-acme-2026-06-10.md'), '# Report\n'); + writeFileSync(join(dir, '001-acme-2026-06-10.evidence.json'), JSON.stringify({ + report_number: 1, + company: 'Acme', + role: 'AI Engineer', + source: 'https://jobs.example/acme', + fetched_at: '2026-06-10T00:00:00.000Z', + source_path: 'Playwright', + liveness_result: 'active', + jd_text_hash: 'sha256:abc123', + report_path: 'reports/001-acme-2026-06-10.md', + pdf_path: null, + }, null, 2)); + writeFileSync(join(dir, '002-legacy-2026-06-10.md'), '# Legacy\n'); + const results = verifyEvidenceManifests({ reportsDir: dir }); + const ok = results.find((result) => result.report.startsWith('001-'))?.status === 'ok'; + const legacyWarning = results.find((result) => result.report.startsWith('002-'))?.status === 'warning'; + if (!ok || !legacyWarning) throw new Error(`unexpected self-test result: ${JSON.stringify(results)}`); + console.log('evidence-manifest self-test passed'); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +} + +function main() { + if (process.argv.includes('--self-test')) return selfTest(); + const results = verifyEvidenceManifests(); + const errors = results.reduce((sum, result) => sum + result.errors.length, 0); + const warnings = results.reduce((sum, result) => sum + result.warnings.length, 0); + console.log(JSON.stringify({ errors, warnings, results }, null, 2)); + if (errors > 0) process.exit(1); +} + +if (process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href) { + main(); +} diff --git a/modes/auto-pipeline.md b/modes/auto-pipeline.md index a2cd287743..1ab57a395a 100644 --- a/modes/auto-pipeline.md +++ b/modes/auto-pipeline.md @@ -25,6 +25,12 @@ Execute the same as the `oferta` mode (read `modes/oferta.md` for all A-F blocks Save the full evaluation in `reports/{###}-{company-slug}-{YYYY-MM-DD}.md` (see format in `modes/oferta.md`). Include Block G in the saved report. Add **URL:** {url} and **Legitimacy:** {tier} to the report header. +Also save a lightweight evidence manifest beside the report: + +`reports/{###}-{company-slug}-{YYYY-MM-DD}.evidence.json` + +Include report number, company, role, original URL or `local:jds/...`, fetched timestamp, extraction source path (`Playwright`, `WebFetch`, `WebSearch`, `local`, or `pasted`), liveness result, JD text hash, report path, and PDF path if generated. For cases where liveness was not checked: use `liveness_result: "not_applicable"` for local file sources (`local:jds/...`), or `"unverified"` for pasted/manual text or URL inputs where liveness checking was skipped. + ## Step 3 — Generate PDF Read `config/profile.yml`. Check `cv.output_format`: diff --git a/modes/pipeline.md b/modes/pipeline.md index 3bbf4b781f..173f74b120 100644 --- a/modes/pipeline.md +++ b/modes/pipeline.md @@ -10,6 +10,7 @@ Process job URLs stored in `data/pipeline.md`. The user adds URLs at any time an b. **Extract JD** using Playwright (browser_navigate + browser_snapshot) → WebFetch → WebSearch c. If the URL is not accessible → mark as `- [!]` with a note and continue d. **Execute full auto-pipeline**: Evaluation A-F → Report .md → PDF (if score >= `auto_pdf_score_threshold`) → Tracker + - Save `reports/{###}-{company-slug}-{YYYY-MM-DD}.evidence.json` beside the report with report number, company, role, original URL or `local:jds/...` reference, fetched timestamp, source path (`Playwright`, `WebFetch`, `WebSearch`, `local`, or `pasted`), liveness result, JD hash, report path, and PDF path/null. e. **Move from "Pending" to "Processed"**: `- [x] #NNN | URL | Company | Role | Score/5 | PDF ✅/❌` **About the PDF gate (configurable):** Read `config/profile.yml` → `auto_pdf_score_threshold`. If the key does not exist, default to `3.0` (this mode's original gate). If the evaluation score is less than the threshold, skip PDF generation: write the report normally, show in the header `**PDF:** not generated — run /career-ops pdf {company-slug} to create on demand`, and mark PDF ❌ in the tracker. If the score is ≥ threshold, generate the PDF as usual. diff --git a/package.json b/package.json index 6d02cc32a9..2cc1aafaed 100644 --- a/package.json +++ b/package.json @@ -15,6 +15,7 @@ "update": "node update-system.mjs apply", "rollback": "node update-system.mjs rollback", "liveness": "node check-liveness.mjs", + "evidence": "node evidence-manifest.mjs", "scan": "node scan.mjs", "patterns": "node analyze-patterns.mjs", "gemini:eval": "node gemini-eval.mjs" diff --git a/test-all.mjs b/test-all.mjs index 98842f207b..a27a4de476 100644 --- a/test-all.mjs +++ b/test-all.mjs @@ -71,6 +71,7 @@ const scripts = [ { name: 'dedup-tracker.mjs', expectExit: 0 }, { name: 'merge-tracker.mjs', expectExit: 0 }, { name: 'analyze-patterns.mjs --self-test', expectExit: 0 }, + { name: 'evidence-manifest.mjs --self-test', expectExit: 0 }, { name: 'update-system.mjs check', expectExit: 0 }, ];