Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 21 additions & 16 deletions merge-tracker.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
* - 8-col: num\tdate\tcompany\trole\tstatus\tscore\tpdf\treport (no notes)
* - Pipe-delimited (markdown table row): | col | col | ... |
*
* Dedup: company normalized + role fuzzy match + report number match
* Dedup: report-number or entry-number match (company-guarded — a bare
* number collision is never a duplicate), or company + role fuzzy match
* If duplicate with higher score → update in-place, update report link
* Validates status against states.yml (rejects non-canonical, logs warning)
*
Expand Down Expand Up @@ -354,17 +355,23 @@ for (const file of tsvFiles) {
addition.report = normalizeReportLink(addition.report);

// Check for duplicate by:
// 1. Exact report number match
// 2. Company + role fuzzy match
// 1. Report number match (same company only)
// 2. Entry number match (same company only)
// 3. Company + role fuzzy match
const reportNum = extractReportNum(addition.report);
const normCompany = normalizeCompany(addition.company);
const sameCompany = (app) => normalizeCompany(app.company) === normCompany;
let duplicate = null;

if (reportNum) {
// Check if this report number already exists
duplicate = existingApps.find(app => {
const existingReportNum = extractReportNum(app.report);
return existingReportNum === reportNum;
});
// Report number already used — but only when the company also matches.
// The same drift described for entry numbers below applies here: report
// numbering is its own sequence, so a tracker row whose link happens to
// carry the same number (manually added rows, renumbering after drift)
// is unrelated unless the company matches too. An unguarded hit here
// overwrote other companies' rows (#912).
duplicate = existingApps.find(app =>
sameCompany(app) && extractReportNum(app.report) === reportNum);
}

if (!duplicate) {
Expand All @@ -374,19 +381,15 @@ for (const file of tsvFiles) {
// 067 while the tracker was already at #69). A bare num collision across
// *different* companies is that drift, not a duplicate — matching on num
// alone silently merges a brand-new role into an unrelated existing row.
const normCompany = normalizeCompany(addition.company);
duplicate = existingApps.find(app =>
app.num === addition.num && normalizeCompany(app.company) === normCompany
app.num === addition.num && sameCompany(app)
);
}

if (!duplicate) {
// Company + role fuzzy match
const normCompany = normalizeCompany(addition.company);
duplicate = existingApps.find(app => {
if (normalizeCompany(app.company) !== normCompany) return false;
return roleFuzzyMatch(addition.role, app.role);
});
duplicate = existingApps.find(app =>
sameCompany(app) && roleFuzzyMatch(addition.role, app.role));
}

if (duplicate) {
Expand All @@ -406,7 +409,9 @@ for (const file of tsvFiles) {
skipped++;
}
} else {
// New entry — use the number from the TSV
// New entry — use the TSV's number if it extends the sequence; otherwise
// (number already taken, e.g. a report number colliding with another
// company's row) append with the next free number instead of reusing it.
const entryNum = addition.num > maxNum ? addition.num : ++maxNum;
if (addition.num > maxNum) maxNum = addition.num;

Expand Down
76 changes: 76 additions & 0 deletions test-all.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -1379,6 +1379,82 @@ try {
fail(`merge-tracker fuzzy dedup tests crashed: ${e.message}`);
}

// ── MERGE-TRACKER NUMBER-COLLISION GUARD (#912) ─────────────────
// TSV numbers carry the REPORT sequence while the tracker `#` column is its
// own row sequence, and the two drift (#867). A TSV whose report number
// collides with the report number on an unrelated company's row must NOT be
// treated as a duplicate — the unguarded report-number match overwrote that
// row's company/role/notes. The colliding TSV has to be appended as a new
// row with the next free number, leaving the existing row byte-identical.
console.log('\n🧪 Testing merge-tracker number-collision guard (#912)...');
try {
const collTmp = mkdtempSync(join(tmpdir(), 'career-ops-collision-'));
try {
mkdirSync(join(collTmp, 'data'));
mkdirSync(join(collTmp, 'reports'));
const additionsDir = join(collTmp, 'additions');
mkdirSync(additionsDir);
const tracker = join(collTmp, 'data', 'applications.md');
const mergeEnv = { ...process.env, CAREER_OPS_TRACKER: tracker, CAREER_OPS_ADDITIONS: additionsDir };

// Row #1 carries report number 1 — the same number the new TSV brings.
const oldcoRow = '| 1 | 2026-01-09 | OldCo | Senior Widget Engineer | 4.1/5 | Interview | ❌ | [1](../reports/001-oldco-2026-01-09.md) | precious row-one notes |';
writeFileSync(tracker,
'# Applications Tracker\n\n' +
'| # | Date | Company | Role | Score | Status | PDF | Report | Notes |\n' +
'|---|------|---------|------|-------|--------|-----|--------|-------|\n' +
oldcoRow + '\n' +
'| 2 | 2026-01-01 | OtherCo | Widget Engineer | N/A | Applied | ❌ | — | row two |\n');
for (const n of ['001-oldco-2026-01-09', '001-newco-2026-01-10', '001-newco-2026-01-11']) {
writeFileSync(join(collTmp, 'reports', `${n}.md`), '# fixture\n');
}

// Colliding TSV: report number 1 (= row #1's report number), DIFFERENT company,
// higher score than row #1 so the update-in-place branch would fire if matched.
writeFileSync(join(additionsDir, '001-newco.tsv'),
'1\t2026-01-10\tNewCo\tPlatform Reliability Lead\tEvaluated\t4.7/5\t❌\t[1](reports/001-newco-2026-01-10.md)\tfresh eval\n');

run(NODE, ['merge-tracker.mjs'], { env: mergeEnv });
const after = readFileSync(tracker, 'utf-8');

if (after.includes(oldcoRow)) {
pass('report-number collision: existing row #1 (OldCo) left byte-identical');
} else {
fail('report-number collision OVERWROTE row #1 (#912 regression — bare number match must not dedup)');
}

const newcoRows = after.split('\n').filter(l => l.includes('NewCo'));
if (newcoRows.length === 1 && /^\| 3 \| 2026-01-10 \| NewCo \| Platform Reliability Lead \|/.test(newcoRows[0])) {
pass('colliding TSV appended as a new row with the next free number (#3)');
} else {
fail(`colliding TSV not appended as #3: ${JSON.stringify(newcoRows)}`);
}

// Positive control: SAME company + same report number with a higher score
// must still UPDATE in place via the (now company-guarded) report-number path.
writeFileSync(join(additionsDir, '001-newco.tsv'),
'1\t2026-01-11\tNewCo\tPlatform Reliability Lead\tEvaluated\t4.9/5\t❌\t[1](reports/001-newco-2026-01-11.md)\tre-evaluated\n');

run(NODE, ['merge-tracker.mjs'], { env: mergeEnv });
const after2 = readFileSync(tracker, 'utf-8');
const newcoRows2 = after2.split('\n').filter(l => l.includes('NewCo'));
if (newcoRows2.length === 1 && newcoRows2[0].startsWith('| 3 |') && newcoRows2[0].includes('4.9/5')) {
pass('same-company report-number match still updates in place (no duplicate row)');
} else {
fail(`same-company re-eval did not update #3 in place: ${JSON.stringify(newcoRows2)}`);
}
if (after2.includes(oldcoRow)) {
pass('row #1 still intact after the same-company re-eval merge');
} else {
fail('row #1 was modified by the same-company re-eval merge');
}
} finally {
rmSync(collTmp, { recursive: true, force: true });
}
} catch (e) {
fail(`merge-tracker number-collision tests crashed: ${e.message}`);
}

// ── 12. COLD-START TRIGGER ──────────────────────────────────────

console.log('\n12. Cold-start trigger (deterministic onboarding state)');
Expand Down