diff --git a/build-cv-latex.mjs b/build-cv-latex.mjs new file mode 100644 index 000000000..ea44bb2f7 --- /dev/null +++ b/build-cv-latex.mjs @@ -0,0 +1,336 @@ +#!/usr/bin/env node + +import { readFile, writeFile, stat } from 'fs/promises'; +import { existsSync } from 'fs'; +import { resolve, dirname, basename, join } from 'path'; +import { fileURLToPath } from 'url'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const TEMPLATE_PATH = resolve(__dirname, 'templates', 'cv-template.tex'); +const PLACEHOLDER_RE = /\{\{[A-Z_]+\}\}/g; + +function escapeLatex(text, mode = 'text') { + if (typeof text !== 'string') return ''; + if (mode === 'url') return text; + const out = []; + for (const ch of text) { + switch (ch) { + case '\\': out.push('\\textbackslash{}'); break; + case '{': case '}': out.push('\\' + ch); break; + case '^': out.push('\\textasciicircum{}'); break; + case '~': out.push('\\textasciitilde{}'); break; + case '_': out.push('\\_'); break; + case '&': out.push('\\&'); break; + case '%': out.push('\\%'); break; + case '$': out.push('\\$'); break; + case '#': out.push('\\#'); break; + case '\u00B1': out.push('$\\pm$'); break; + case '\u2192': out.push('$\\rightarrow$'); break; + default: out.push(ch); + } + } + return out.join(''); +} + +function sanitizeUrl(url) { + if (typeof url !== 'string') return ''; + url = url.trim(); + if (!url) return ''; + const allowedSchemes = ['mailto:', 'http:', 'https:']; + const hasScheme = allowedSchemes.some(s => url.toLowerCase().startsWith(s)); + if (!hasScheme) { + if (url.includes('@') && !url.includes('/')) { + url = 'mailto:' + url; + } else { + url = 'https://' + url; + } + } + url = url.replace(/[{}%$#\\~^]/g, ''); + return url; +} + +function buildEducation(entries) { + if (!Array.isArray(entries) || entries.length === 0) return ''; + const blocks = []; + for (const e of entries) { + if (!e) continue; + let block = ` \\resumeSubheading\n {${escapeLatex(e.institution)}}{${escapeLatex(e.location)}}\n {${escapeLatex(e.degree)}}{${escapeLatex(e.dates)}}`; + if (Array.isArray(e.coursework) && e.coursework.length > 0) { + const courses = e.coursework.map(c => escapeLatex(c)).join(', '); + block += `\n \\resumeItemListStart\n \\resumeItem{\\textbf{Coursework:} ${courses}}\n \\resumeItemListEnd`; + } + blocks.push(block); + } + return blocks.join('\n\n'); +} + +function buildExperience(entries) { + if (!Array.isArray(entries) || entries.length === 0) return ''; + const blocks = []; + for (const e of entries) { + if (!e) continue; + const bullets = Array.isArray(e.bullets) ? e.bullets.map(b => ` \\resumeItem{${escapeLatex(b)}}`).join('\n') : ''; + blocks.push(` \\resumeSubheading\n {${escapeLatex(e.company)}}{${escapeLatex(e.dates)}}\n {${escapeLatex(e.role)}}{${escapeLatex(e.location)}}\n \\resumeItemListStart\n${bullets}\n \\resumeItemListEnd`); + } + return blocks.join('\n\n'); +} + +function buildProjects(entries) { + if (!Array.isArray(entries) || entries.length === 0) return ''; + const blocks = []; + for (const e of entries) { + if (!e) continue; + const context = e.context ? ` \\emph{$|$ ${escapeLatex(e.context)}}` : ''; + const bullets = Array.isArray(e.bullets) ? e.bullets.map(b => ` \\resumeItem{${escapeLatex(b)}}`).join('\n') : ''; + blocks.push(` \\resumeProjectHeading\n {\\textbf{${escapeLatex(e.name)}}${context}}{${escapeLatex(e.dates)}}\n \\resumeItemListStart\n${bullets}\n \\resumeItemListEnd`); + } + return blocks.join('\n\n'); +} + +function buildSkills(categories) { + if (!Array.isArray(categories) || categories.length === 0) return ''; + return categories.map(c => { + if (!c) return ''; + const items = Array.isArray(c.items) ? c.items.join(', ') : (c.items || ''); + return ` \\textbf{${escapeLatex(c.category)}}{: ${escapeLatex(items)}} \\\\`; + }).filter(Boolean).join('\n'); +} + +async function main() { + const args = process.argv.slice(2); + + if (args.length === 0 || args.includes('--help')) { + console.error('Usage:'); + console.error(' node build-cv-latex.mjs '); + console.error(' node build-cv-latex.mjs --test'); + process.exit(1); + } + + if (args.includes('--test')) { + await runSelfTest(); + return; + } + + const [inputPath, outputPath] = args; + + if (!inputPath || !outputPath) { + console.error('Usage: node build-cv-latex.mjs '); + process.exit(1); + } + + const absInput = resolve(inputPath); + const absOutput = resolve(outputPath); + const outDir = dirname(absOutput); + + if (!existsSync(absInput)) { + console.error(`Input file not found: ${absInput}`); + process.exit(1); + } + + let payload; + try { + const raw = await readFile(absInput, 'utf-8'); + payload = JSON.parse(raw); + } catch (err) { + console.error(`Failed to parse input JSON: ${err.message}`); + process.exit(1); + } + + if (!existsSync(TEMPLATE_PATH)) { + console.error(`Template not found: ${TEMPLATE_PATH}`); + process.exit(1); + } + + let template = await readFile(TEMPLATE_PATH, 'utf-8'); + + const emailUrl = sanitizeUrl(payload.email?.url || ''); + const emailDisplay = payload.email?.display || emailUrl; + const linkedinUrl = sanitizeUrl(payload.linkedin?.url || ''); + const linkedinDisplay = payload.linkedin?.display || ''; + const githubUrl = sanitizeUrl(payload.github?.url || ''); + const githubDisplay = payload.github?.display || ''; + + const substitutions = { + NAME: escapeLatex(payload.name || ''), + CONTACT_LINE: escapeLatex(payload.contact_line || ''), + EMAIL_URL: emailUrl, + EMAIL_DISPLAY: escapeLatex(emailDisplay), + LINKEDIN_URL: linkedinUrl, + LINKEDIN_DISPLAY: escapeLatex(linkedinDisplay), + GITHUB_URL: githubUrl, + GITHUB_DISPLAY: escapeLatex(githubDisplay), + EDUCATION: buildEducation(payload.education), + EXPERIENCE: buildExperience(payload.experience), + PROJECTS: buildProjects(payload.projects), + SKILLS: buildSkills(payload.skills), + }; + + for (const [key, value] of Object.entries(substitutions)) { + template = template.replace(new RegExp(`\\{\\{${key}\\}\\}`, 'g'), value); + } + + const unresolved = template.match(PLACEHOLDER_RE); + if (unresolved) { + console.error(`Unresolved placeholders: ${[...new Set(unresolved)].join(', ')}`); + process.exit(1); + } + + if (!existsSync(outDir)) { + const { mkdirSync } = await import('fs'); + mkdirSync(outDir, { recursive: true }); + } + + await writeFile(absOutput, template, 'utf-8'); + + const fileInfo = await stat(absOutput); + const sizeKB = (fileInfo.size / 1024).toFixed(1); + + const report = { + file: basename(absOutput), + path: absOutput, + sizeKB: parseFloat(sizeKB), + counts: { + educationEntries: (payload.education || []).length, + experienceEntries: (payload.experience || []).length, + projectEntries: (payload.projects || []).length, + skillCategories: (payload.skills || []).length, + totalBullets: (() => { + const ex = Array.isArray(payload.experience) ? payload.experience.flatMap(e => Array.isArray(e?.bullets) ? e.bullets : []) : []; + const pr = Array.isArray(payload.projects) ? payload.projects.flatMap(p => Array.isArray(p?.bullets) ? p.bullets : []) : []; + return ex.length + pr.length; + })(), + }, + valid: true, + }; + + console.log(JSON.stringify(report, null, 2)); + process.exit(0); +} + +async function runSelfTest() { + const sample = { + name: 'Test Candidate', + contact_line: 'City, State | +1 234 567 8900', + email: { url: 'test@example.com', display: 'test@example.com' }, + linkedin: { url: 'https://linkedin.com/in/test', display: 'linkedin.com/in/test' }, + github: { url: 'https://github.com/test', display: 'github.com/test' }, + education: [{ + institution: 'Test University', + location: 'City, State', + degree: 'Bachelor of Science in Testing', + dates: '2020 - 2024', + coursework: ['Data Structures', 'Algorithms', 'Machine Learning'], + }], + experience: [{ + company: 'Test Corp', + role: 'Test Engineer', + location: 'Remote', + dates: 'June 2024 - Present', + bullets: [ + 'Built automated testing pipelines with CI/CD integration', + 'Reduced regression test time by 60% through parallel execution', + ], + }], + projects: [{ + name: 'Test Project', + context: 'Python, FastAPI, Docker', + dates: '2024', + bullets: [ + 'Built a REST API with automated test coverage exceeding 90%', + ], + }], + skills: [ + { category: 'Languages', items: 'Python, JavaScript, TypeScript' }, + { category: 'Frameworks', items: 'FastAPI, React, PyTorch' }, + ], + }; + + const testOutput = '/tmp/build-cv-latex-test.tex'; + const raw = JSON.stringify(sample, null, 2); + const tmpInput = '/tmp/build-cv-latex-test-input.json'; + await writeFile(tmpInput, raw, 'utf-8'); + + const absInput = resolve(tmpInput); + const absOutput = resolve(testOutput); + + if (!existsSync(TEMPLATE_PATH)) { + console.error(`Self-test failed: template not found at ${TEMPLATE_PATH}`); + process.exit(1); + } + + let template = await readFile(TEMPLATE_PATH, 'utf-8'); + + const emailUrl = sanitizeUrl(sample.email?.url || ''); + const emailDisplay = sample.email?.display || emailUrl; + const linkedinUrl = sanitizeUrl(sample.linkedin?.url || ''); + const linkedinDisplay = sample.linkedin?.display || ''; + const githubUrl = sanitizeUrl(sample.github?.url || ''); + const githubDisplay = sample.github?.display || ''; + + const substitutions = { + NAME: escapeLatex(sample.name), + CONTACT_LINE: escapeLatex(sample.contact_line), + EMAIL_URL: emailUrl, + EMAIL_DISPLAY: escapeLatex(emailDisplay), + LINKEDIN_URL: linkedinUrl, + LINKEDIN_DISPLAY: escapeLatex(linkedinDisplay), + GITHUB_URL: githubUrl, + GITHUB_DISPLAY: escapeLatex(githubDisplay), + EDUCATION: buildEducation(sample.education), + EXPERIENCE: buildExperience(sample.experience), + PROJECTS: buildProjects(sample.projects), + SKILLS: buildSkills(sample.skills), + }; + + for (const [key, value] of Object.entries(substitutions)) { + template = template.replace(new RegExp(`\\{\\{${key}\\}\\}`, 'g'), value); + } + + const unresolved = template.match(PLACEHOLDER_RE); + if (unresolved) { + console.error(`Self-test failed: unresolved placeholders: ${[...new Set(unresolved)].join(', ')}`); + process.exit(1); + } + + const outDir = dirname(absOutput); + if (!existsSync(outDir)) { + const { mkdirSync } = await import('fs'); + mkdirSync(outDir, { recursive: true }); + } + + await writeFile(absOutput, template, 'utf-8'); + + const fileInfo = await stat(absOutput); + const sizeKB = (fileInfo.size / 1024).toFixed(1); + + const report = { + status: 'self-test-passed', + file: basename(absOutput), + path: absOutput, + sizeKB: parseFloat(sizeKB), + counts: { + educationEntries: sample.education.length, + experienceEntries: sample.experience.length, + projectEntries: sample.projects.length, + skillCategories: sample.skills.length, + totalBullets: (() => { + const ex = Array.isArray(sample.experience) ? sample.experience.flatMap(e => Array.isArray(e?.bullets) ? e.bullets : []) : []; + const pr = Array.isArray(sample.projects) ? sample.projects.flatMap(p => Array.isArray(p?.bullets) ? p.bullets : []) : []; + return ex.length + pr.length; + })(), + }, + }; + + console.log(JSON.stringify(report, null, 2)); + + await import('fs/promises').then(fs => + Promise.all([ + fs.rm(tmpInput).catch(() => {}), + fs.rm(testOutput).catch(() => {}), + ]) + ); + + process.exit(0); +} + +main(); diff --git a/docs/SCRIPTS.md b/docs/SCRIPTS.md index 149af5785..df22ec846 100644 --- a/docs/SCRIPTS.md +++ b/docs/SCRIPTS.md @@ -12,6 +12,7 @@ All scripts live in the project root as `.mjs` modules and are exposed via `npm | `npm run dedup` | `dedup-tracker.mjs` | Remove duplicate tracker entries | | `npm run merge` | `merge-tracker.mjs` | Merge batch TSVs into applications.md | | `npm run pdf` | `generate-pdf.mjs` | Convert HTML to ATS-optimized PDF | +| `npm run build:latex` | `build-cv-latex.mjs` | Build .tex from structured JSON payload | | `npm run sync-check` | `cv-sync-check.mjs` | Validate CV/profile consistency | | `npm run patterns` | `analyze-patterns.mjs` | Analyze tracker outcomes and report patterns | | `npm run update:check` | `update-system.mjs check` | Check for upstream updates | @@ -124,6 +125,19 @@ npm run pdf -- input.html output.pdf --format=a4 # A4 (default) --- +## build:latex + +Builds a `.tex` file from a structured JSON payload, handling template merge and LaTeX escaping automatically. The JSON is produced by the agent during evaluation — this script replaces the manual LaTeX generation step in `modes/latex.md`. + +```bash +node build-cv-latex.mjs input.json output.tex +node build-cv-latex.mjs --test +``` + +**Exit codes:** `0` file generated, `1` missing inputs, invalid JSON, unresolved placeholders, or template not found. + +--- + ## sync-check Validates that the career-ops setup is internally consistent: `cv.md` exists and is not too short, `config/profile.yml` exists with required fields, no hardcoded metrics in `modes/_shared.md` or `batch/batch-prompt.md`, and `article-digest.md` freshness (warns if older than 30 days). diff --git a/modes/latex.md b/modes/latex.md index 02c0887bc..c003971e0 100644 --- a/modes/latex.md +++ b/modes/latex.md @@ -14,89 +14,95 @@ Export a tailored, ATS-optimized CV as a `.tex` file and compile it to PDF via ` 8. Select top 3-4 most relevant projects for the offer 9. Reorder experience bullets by JD relevance 10. Inject keywords naturally into existing achievements -11. Generate the `.tex` file using `templates/cv-template.tex` -12. Write to `output/cv-{candidate}-{company}-{YYYY-MM-DD}.tex` +11. Build a JSON payload (see schema below) and write to `/tmp/cv-{candidate}-{company}.json` +12. Run: `node build-cv-latex.mjs /tmp/cv-{candidate}-{company}.json output/cv-{candidate}-{company}-{YYYY-MM-DD}.tex` 13. Run: `node generate-latex.mjs output/cv-{candidate}-{company}-{YYYY-MM-DD}.tex output/cv-{candidate}-{company}-{YYYY-MM-DD}.pdf` + *(Replace `{candidate}`, `{company}`, `{YYYY-MM-DD}` with actual values.)* 14. Report: .tex path, .pdf path, file sizes, section count, keyword coverage % **Requires:** `tectonic` (preferred — `brew install tectonic`, auto-downloads packages) or `pdflatex` (MiKTeX / TeX Live) on PATH. -## Template Placeholders - -The template at `templates/cv-template.tex` uses `{{PLACEHOLDER}}` syntax: - -| Placeholder | Source | -|-------------|--------| -| `{{NAME}}` | `profile.yml → candidate.full_name` | -| `{{CONTACT_LINE}}` | Phone / City, State / Visa status — built from profile.yml | -| `{{EMAIL_URL}}` | Raw email for `mailto:` URL — must not be LaTeX-escaped (from profile.yml) | -| `{{EMAIL_DISPLAY}}` | Escaped email for display text — LaTeX-special chars like `_` must be escaped, e.g. `first\_name@example.com` | -| `{{LINKEDIN_URL}}` | Full URL with scheme for `\href{}`: e.g. `https://linkedin.com/in/username`. If `profile.yml` stores a bare host+path (no scheme), prepend `https://` before substitution. | -| `{{LINKEDIN_DISPLAY}}` | Display text only (no scheme): `linkedin.com/in/username` | -| `{{GITHUB_URL}}` | Full URL with scheme for `\href{}`: e.g. `https://github.com/username`. If `profile.yml` stores a bare host+path, prepend `https://`. | -| `{{GITHUB_DISPLAY}}` | Display text only (no scheme): `github.com/username` | -| `{{EDUCATION}}` | LaTeX `\resumeSubheading` blocks from cv.md Education section | -| `{{EXPERIENCE}}` | LaTeX `\resumeSubheading` + `\resumeItem` blocks — reordered bullets | -| `{{PROJECTS}}` | LaTeX `\resumeProjectHeading` + `\resumeItem` blocks — top 3-4 selected | -| `{{SKILLS}}` | LaTeX `\textbf{Category}{: items}` lines from cv.md Technical Skills | - -## LaTeX Content Generation Rules - -### Education - -Each entry becomes: - -```latex - \resumeSubheading - {Institution}{City, State} - {Degree}{Date Range} +## JSON Input Schema + +Write a JSON file with this structure. `build-cv-latex.mjs` handles template merge and LaTeX escaping — no need to escape special characters yourself. + +```json +{ + "name": "Jane Smith", + "contact_line": "San Francisco, CA | +1 415 555 0100", + "email": { "url": "jane@example.com", "display": "jane@example.com" }, + "linkedin": { "url": "https://linkedin.com/in/janesmith", "display": "linkedin.com/in/janesmith" }, + "github": { "url": "https://github.com/janesmith", "display": "github.com/janesmith" }, + "education": [ + { + "institution": "University Name", + "location": "City, State", + "degree": "Bachelor of Science in Computer Science", + "dates": "2018 - 2022", + "coursework": ["Data Structures", "Algorithms", "Machine Learning"] + } + ], + "experience": [ + { + "company": "Company Name", + "role": "Job Title", + "location": "Remote", + "dates": "June 2022 - Present", + "bullets": [ + "Achievement bullet with JD keywords injected", + "Another bullet with quantified impact" + ] + } + ], + "projects": [ + { + "name": "Project Name", + "context": "Tech stack summary for the project line", + "dates": "", + "bullets": [ + "What you built and what it does" + ] + } + ], + "skills": [ + { "category": "Languages", "items": "Python, JavaScript, C++" }, + { "category": "Frameworks", "items": "FastAPI, React, PyTorch" } + ] +} ``` -If coursework exists, add: - -```latex - \resumeItemListStart - \resumeItem{\textbf{Coursework:} Course1, Course2, ...} - \resumeItemListEnd -``` - -### Experience - -Each role becomes: - -```latex - \resumeSubheading - {Company}{Date Range} - {Role Title}{Location} - \resumeItemListStart - \resumeItem{Bullet text with JD keywords injected} - ... - \resumeItemListEnd -``` - -### Projects - -Each project becomes: - -```latex -\resumeProjectHeading{Project Name \emph{$|$ Affiliation/Context}}{Date} -\resumeItemListStart - \resumeItem{Bullet text} - ... -\resumeItemListEnd -``` - -### Skills - -```latex - \textbf{Languages}{: C, C++, Java, ...} \\ - \textbf{Frameworks \& ML}{: PyTorch, LangChain, ...} \\ - \textbf{Tools \& Cloud}{: Docker, Kubernetes, ...} -``` - -## LaTeX Escaping (CRITICAL) - -All text content MUST be escaped for LaTeX before insertion: +### Field reference + +| Field | Type | Source | +|-------|------|--------| +| `name` | string | `profile.yml → candidate.full_name` | +| `contact_line` | string | Phone / City, State / Visa — built from profile.yml | +| `email.url` | string | Email for `\href{mailto:...}` (sanitized via sanitizeUrl, not LaTeX-escaped) | +| `email.display` | string | Display text for the email link | +| `linkedin.url` | string | Full URL with scheme for `\href{}` (sanitized via sanitizeUrl, not LaTeX-escaped) | +| `linkedin.display` | string | Display text only (no scheme) | +| `github.url` | string | Full URL with scheme for `\href{}` (sanitized via sanitizeUrl, not LaTeX-escaped) | +| `github.display` | string | Display text only (no scheme) | +| `education[].institution` | string | From cv.md Education | +| `education[].location` | string | Institution location | +| `education[].degree` | string | Degree name | +| `education[].dates` | string | Date range | +| `education[].coursework` | string[] | Optional — generates a coursework line if present | +| `experience[].company` | string | From cv.md Experience | +| `experience[].role` | string | Job title | +| `experience[].location` | string | Work location | +| `experience[].dates` | string | Date range | +| `experience[].bullets` | string[] | Reordered and keyword-injected achievement bullets | +| `projects[].name` | string | From cv.md Projects | +| `projects[].context` | string | Tech stack — appears next to project name | +| `projects[].dates` | string | Date range (or empty) | +| `projects[].bullets` | string[] | Selected project achievements | +| `skills[].category` | string | Skill category name (e.g. "Languages", "Frameworks") | +| `skills[].items` | string | Comma-separated skills in that category | + +## LaTeX Escaping (handled by the script) + +`build-cv-latex.mjs` automatically escapes all user-supplied text before insertion: | Character | Escape | |-----------|--------| @@ -113,12 +119,7 @@ All text content MUST be escaped for LaTeX before insertion: | `±` | `$\pm$` | | `→` | `$\rightarrow$` | -**Exception:** Do NOT escape LaTeX commands themselves (`\resumeItem`, `\textbf`, etc.) — only user-supplied text content. - -**Exception for URLs:** Do NOT escape text inside `\href{URL}{...}` first arguments. The URL must remain raw (or RFC 3986 percent-encoded). Only escape the *display text* (second argument). For example: -```latex -\href{https://example.com/path_with_underscores}{Example\_Display} -``` +**Exception:** URLs inside `\href{}` are NOT escaped by the LaTeX escaper, but `sanitizeUrl()` still validates the scheme (mailto/http/https) and removes dangerous characters to prevent injection. ## ATS Rules (same as pdf mode) diff --git a/update-system.mjs b/update-system.mjs index 6f53cbb6d..4db81a1f3 100644 --- a/update-system.mjs +++ b/update-system.mjs @@ -127,6 +127,7 @@ const SYSTEM_PATHS = [ 'CITATION.cff', '.github/', 'package.json', + 'build-cv-latex.mjs', 'scaffolder/', 'Dockerfile', 'docker-compose.yml',