From 93d94ed54a0aa778deab0ade82e8d9f524fe2e9b Mon Sep 17 00:00:00 2001 From: Siddharth Pandalai Date: Thu, 11 Jun 2026 11:47:00 +0530 Subject: [PATCH] fix(merge-tracker): company-guard the report-number dedup match MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The report-number duplicate check matched on the bare number, but TSV numbers carry the report sequence while the tracker # column is its own row sequence (#867), so a TSV whose report number collides with the report number on an unrelated company's row was treated as a duplicate — and the update-in-place path overwrote that row's company, role, report link, and notes. Guard the report-number match with the same normalized-company check that #867 added for the entry-number match, hoisting a single normCompany/sameCompany shared by all three duplicate checks. Colliding TSVs fall through to the new-entry path and are appended with the next free number. Adds a regression section to test-all.mjs: the collision scenario corrupts row #1 on current main (verified) and leaves it byte-identical with the guard, plus a positive control proving same-company re-evals still update in place through the guarded path. Fixes #912 Co-Authored-By: Claude Fable 5 --- merge-tracker.mjs | 37 +++++++++++++---------- test-all.mjs | 76 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 16 deletions(-) diff --git a/merge-tracker.mjs b/merge-tracker.mjs index 1ccc0dc2b..cfd0ec2e0 100644 --- a/merge-tracker.mjs +++ b/merge-tracker.mjs @@ -7,7 +7,8 @@ * - 8-col: num\tdate\tcompany\trole\tstatus\tscore\tpdf\treport (no notes) * - Pipe-delimited (markdown table row): | col | col | ... | * - * Dedup: company normalized + role fuzzy match + report number match + * Dedup: report-number or entry-number match (company-guarded — a bare + * number collision is never a duplicate), or company + role fuzzy match * If duplicate with higher score → update in-place, update report link * Validates status against states.yml (rejects non-canonical, logs warning) * @@ -354,17 +355,23 @@ for (const file of tsvFiles) { addition.report = normalizeReportLink(addition.report); // Check for duplicate by: - // 1. Exact report number match - // 2. Company + role fuzzy match + // 1. Report number match (same company only) + // 2. Entry number match (same company only) + // 3. Company + role fuzzy match const reportNum = extractReportNum(addition.report); + const normCompany = normalizeCompany(addition.company); + const sameCompany = (app) => normalizeCompany(app.company) === normCompany; let duplicate = null; if (reportNum) { - // Check if this report number already exists - duplicate = existingApps.find(app => { - const existingReportNum = extractReportNum(app.report); - return existingReportNum === reportNum; - }); + // Report number already used — but only when the company also matches. + // The same drift described for entry numbers below applies here: report + // numbering is its own sequence, so a tracker row whose link happens to + // carry the same number (manually added rows, renumbering after drift) + // is unrelated unless the company matches too. An unguarded hit here + // overwrote other companies' rows (#912). + duplicate = existingApps.find(app => + sameCompany(app) && extractReportNum(app.report) === reportNum); } if (!duplicate) { @@ -374,19 +381,15 @@ for (const file of tsvFiles) { // 067 while the tracker was already at #69). A bare num collision across // *different* companies is that drift, not a duplicate — matching on num // alone silently merges a brand-new role into an unrelated existing row. - const normCompany = normalizeCompany(addition.company); duplicate = existingApps.find(app => - app.num === addition.num && normalizeCompany(app.company) === normCompany + app.num === addition.num && sameCompany(app) ); } if (!duplicate) { // Company + role fuzzy match - const normCompany = normalizeCompany(addition.company); - duplicate = existingApps.find(app => { - if (normalizeCompany(app.company) !== normCompany) return false; - return roleFuzzyMatch(addition.role, app.role); - }); + duplicate = existingApps.find(app => + sameCompany(app) && roleFuzzyMatch(addition.role, app.role)); } if (duplicate) { @@ -406,7 +409,9 @@ for (const file of tsvFiles) { skipped++; } } else { - // New entry — use the number from the TSV + // New entry — use the TSV's number if it extends the sequence; otherwise + // (number already taken, e.g. a report number colliding with another + // company's row) append with the next free number instead of reusing it. const entryNum = addition.num > maxNum ? addition.num : ++maxNum; if (addition.num > maxNum) maxNum = addition.num; diff --git a/test-all.mjs b/test-all.mjs index 0bb06b5d7..15654f1f6 100644 --- a/test-all.mjs +++ b/test-all.mjs @@ -1379,6 +1379,82 @@ try { fail(`merge-tracker fuzzy dedup tests crashed: ${e.message}`); } +// ── MERGE-TRACKER NUMBER-COLLISION GUARD (#912) ───────────────── +// TSV numbers carry the REPORT sequence while the tracker `#` column is its +// own row sequence, and the two drift (#867). A TSV whose report number +// collides with the report number on an unrelated company's row must NOT be +// treated as a duplicate — the unguarded report-number match overwrote that +// row's company/role/notes. The colliding TSV has to be appended as a new +// row with the next free number, leaving the existing row byte-identical. +console.log('\n🧪 Testing merge-tracker number-collision guard (#912)...'); +try { + const collTmp = mkdtempSync(join(tmpdir(), 'career-ops-collision-')); + try { + mkdirSync(join(collTmp, 'data')); + mkdirSync(join(collTmp, 'reports')); + const additionsDir = join(collTmp, 'additions'); + mkdirSync(additionsDir); + const tracker = join(collTmp, 'data', 'applications.md'); + const mergeEnv = { ...process.env, CAREER_OPS_TRACKER: tracker, CAREER_OPS_ADDITIONS: additionsDir }; + + // Row #1 carries report number 1 — the same number the new TSV brings. + const oldcoRow = '| 1 | 2026-01-09 | OldCo | Senior Widget Engineer | 4.1/5 | Interview | ❌ | [1](../reports/001-oldco-2026-01-09.md) | precious row-one notes |'; + writeFileSync(tracker, + '# Applications Tracker\n\n' + + '| # | Date | Company | Role | Score | Status | PDF | Report | Notes |\n' + + '|---|------|---------|------|-------|--------|-----|--------|-------|\n' + + oldcoRow + '\n' + + '| 2 | 2026-01-01 | OtherCo | Widget Engineer | N/A | Applied | ❌ | — | row two |\n'); + for (const n of ['001-oldco-2026-01-09', '001-newco-2026-01-10', '001-newco-2026-01-11']) { + writeFileSync(join(collTmp, 'reports', `${n}.md`), '# fixture\n'); + } + + // Colliding TSV: report number 1 (= row #1's report number), DIFFERENT company, + // higher score than row #1 so the update-in-place branch would fire if matched. + writeFileSync(join(additionsDir, '001-newco.tsv'), + '1\t2026-01-10\tNewCo\tPlatform Reliability Lead\tEvaluated\t4.7/5\t❌\t[1](reports/001-newco-2026-01-10.md)\tfresh eval\n'); + + run(NODE, ['merge-tracker.mjs'], { env: mergeEnv }); + const after = readFileSync(tracker, 'utf-8'); + + if (after.includes(oldcoRow)) { + pass('report-number collision: existing row #1 (OldCo) left byte-identical'); + } else { + fail('report-number collision OVERWROTE row #1 (#912 regression — bare number match must not dedup)'); + } + + const newcoRows = after.split('\n').filter(l => l.includes('NewCo')); + if (newcoRows.length === 1 && /^\| 3 \| 2026-01-10 \| NewCo \| Platform Reliability Lead \|/.test(newcoRows[0])) { + pass('colliding TSV appended as a new row with the next free number (#3)'); + } else { + fail(`colliding TSV not appended as #3: ${JSON.stringify(newcoRows)}`); + } + + // Positive control: SAME company + same report number with a higher score + // must still UPDATE in place via the (now company-guarded) report-number path. + writeFileSync(join(additionsDir, '001-newco.tsv'), + '1\t2026-01-11\tNewCo\tPlatform Reliability Lead\tEvaluated\t4.9/5\t❌\t[1](reports/001-newco-2026-01-11.md)\tre-evaluated\n'); + + run(NODE, ['merge-tracker.mjs'], { env: mergeEnv }); + const after2 = readFileSync(tracker, 'utf-8'); + const newcoRows2 = after2.split('\n').filter(l => l.includes('NewCo')); + if (newcoRows2.length === 1 && newcoRows2[0].startsWith('| 3 |') && newcoRows2[0].includes('4.9/5')) { + pass('same-company report-number match still updates in place (no duplicate row)'); + } else { + fail(`same-company re-eval did not update #3 in place: ${JSON.stringify(newcoRows2)}`); + } + if (after2.includes(oldcoRow)) { + pass('row #1 still intact after the same-company re-eval merge'); + } else { + fail('row #1 was modified by the same-company re-eval merge'); + } + } finally { + rmSync(collTmp, { recursive: true, force: true }); + } +} catch (e) { + fail(`merge-tracker number-collision tests crashed: ${e.message}`); +} + // ── 12. COLD-START TRIGGER ────────────────────────────────────── console.log('\n12. Cold-start trigger (deterministic onboarding state)');