Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 49 additions & 17 deletions generate-pdf.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@
*/

import { chromium } from 'playwright';
import { resolve, dirname } from 'path';
import { resolve, dirname, relative, isAbsolute } from 'path';
import { readFile } from 'fs/promises';
import { mkdirSync } from 'fs';
import { fileURLToPath } from 'url';
import { fileURLToPath, pathToFileURL } from 'url';

const __dirname = dirname(fileURLToPath(import.meta.url));

Expand Down Expand Up @@ -207,7 +207,6 @@ async function generatePDF() {
console.log(`📁 Output: ${outputPath}`);
console.log(`📏 Format: ${format.toUpperCase()}`);

// Read HTML to inject font paths as absolute file:// URLs
let html = await readFile(inputPath, 'utf-8');
let cvMarkdown = '';
try {
Expand All @@ -217,18 +216,6 @@ async function generatePDF() {
}
validateCvSectionOrder(html, cvMarkdown);

// Resolve font paths relative to career-ops/fonts/
const fontsDir = resolve(__dirname, 'fonts');
html = html.replace(
/url\(['"]?\.\/fonts\//g,
`url('file://${fontsDir}/`
);
// Close any unclosed quotes from the replacement (handles all font formats)
html = html.replace(
/file:\/\/([^'")]+)\.(woff2?|ttf|otf)['"]?\)/g,
`file://$1.$2')`
);

// Normalize text for ATS compatibility (issue #1)
const normalized = normalizeTextForATS(html);
html = normalized.html;
Expand All @@ -241,9 +228,52 @@ async function generatePDF() {
return renderHtmlToPdf(html, outputPath, { format, baseDir: dirname(inputPath) });
}

/**
* Inline url('./fonts/...') references as base64 data: URLs.
*
* Chromium refuses to load file:// subresources from a setContent() page
* (the document stays at about:blank), so fonts referenced by path are
* silently dropped and PDFs fall back to system fonts. data: URLs carry
* no origin restriction, so they load from any page. See #951.
*
* Missing font files keep their original reference and log a warning.
*
* @param {string} html - HTML that may reference url('./fonts/<file>').
* @returns {Promise<string>} HTML with local font references inlined.
*/
export async function inlineLocalFonts(html) {
const FONT_REF = /url\(\s*(['"]?)\.\/fonts\/([^'")\s]+)\1\s*\)/g;
const MIME = { woff2: 'font/woff2', woff: 'font/woff', otf: 'font/otf', ttf: 'font/ttf' };
const fontsDir = resolve(__dirname, 'fonts');
const names = [...new Set([...html.matchAll(FONT_REF)].map((m) => m[2]))];
const dataUrls = new Map();
for (const name of names) {
// Containment check: ".." segments and absolute names (./fonts//etc/passwd)
// would otherwise resolve outside fonts/.
const fontPath = resolve(fontsDir, name);
const rel = relative(fontsDir, fontPath);
if (rel.startsWith('..') || isAbsolute(rel)) {
console.warn(`⚠️ Font reference escapes fonts/, keeping original reference: ${name}`);
continue;
}
try {
const buf = await readFile(fontPath);
const ext = name.slice(name.lastIndexOf('.') + 1).toLowerCase();
dataUrls.set(name, `url('data:${MIME[ext] || 'application/octet-stream'};base64,${buf.toString('base64')}')`);
} catch (err) {
if (err?.code !== 'ENOENT') throw err;
console.warn(`⚠️ Font file not found, keeping original reference: fonts/${name}`);
}
}
return html.replace(FONT_REF, (match, _quote, name) => dataUrls.get(name) || match);
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

/**
* Render an HTML string to a PDF file via headless Chromium.
*
* Local url('./fonts/...') references are inlined as data: URLs first so
* fonts render regardless of page origin (see inlineLocalFonts).
*
* @param {string} html - Full HTML document to render.
* @param {string} outputPath - Absolute path to write the PDF to.
* @param {{format?: 'a4'|'letter', baseDir?: string}} [opts]
Expand All @@ -255,14 +285,16 @@ export async function renderHtmlToPdf(html, outputPath, opts = {}) {

mkdirSync(dirname(outputPath), { recursive: true });

html = await inlineLocalFonts(html);

const browser = await chromium.launch({ headless: true });
try {
const page = await browser.newPage();

// Set content with file base URL for any relative resources
await page.setContent(html, {
waitUntil: 'load',
baseURL: `file://${baseDir}/`,
baseURL: `${pathToFileURL(baseDir).href}/`,
});

// Wait for fonts to load
Expand Down Expand Up @@ -299,7 +331,7 @@ export async function renderHtmlToPdf(html, outputPath, opts = {}) {
}
}

const isMain = process.argv[1] && import.meta.url === `file://${resolve(process.argv[1])}`;
const isMain = process.argv[1] && import.meta.url === pathToFileURL(resolve(process.argv[1])).href;
if (isMain) {
generatePDF().catch((err) => {
console.error('❌ PDF generation failed:', err.message);
Expand Down
47 changes: 47 additions & 0 deletions test-all.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -2200,6 +2200,53 @@ try {
fail(`update-system SEMVER_RE test crashed: ${e.message}`);
}

// ── 17. FONT INLINING (#951) ────────────────────────────────────

console.log('\n17. Font inlining (data: URLs, #951)');

try {
// Importing must not trigger the CLI (the import.meta.url guard); it
// exposes inlineLocalFonts, which renderHtmlToPdf runs before setContent.
const { inlineLocalFonts } = await import(pathToFileURL(join(ROOT, 'generate-pdf.mjs')).href);

// Chromium blocks file:// subresources from setContent() pages (the page
// stays at about:blank), so ./fonts refs must become data: URLs (#951).
const fontFile = readdirSync(join(ROOT, 'fonts')).find(f => f.endsWith('.woff2'));
const inlined = await inlineLocalFonts(
`<style>@font-face { src: url('./fonts/${fontFile}') format('woff2'); }</style>`
);
if (inlined.includes('data:font/woff2;base64,') && !inlined.includes('./fonts/')) {
pass('local ./fonts references are inlined as data: URLs');
} else {
fail('./fonts reference was not inlined as a data: URL — fonts will silently fall back (#951)');
}

// A missing font file must not corrupt the HTML or throw.
const missing = await inlineLocalFonts(`<style>src: url('./fonts/does-not-exist.woff2');</style>`);
if (missing.includes(`url('./fonts/does-not-exist.woff2')`)) {
pass('missing font files keep their original reference');
} else {
fail('missing font file mangled the url() reference');
}

// Traversal outside fonts/ must never be inlined — neither via ".."
// segments nor via absolute names (resolve() returns those verbatim).
const traversal = await inlineLocalFonts(`<style>src: url('./fonts/../cv.md');</style>`);
if (traversal.includes(`url('./fonts/../cv.md')`)) {
pass('path traversal outside fonts/ is not inlined');
} else {
fail('path traversal escaped the fonts/ directory');
}
const absolute = await inlineLocalFonts(`<style>src: url('./fonts//etc/passwd');</style>`);
if (absolute.includes(`url('./fonts//etc/passwd')`)) {
pass('absolute-path escape (./fonts//etc/passwd) is not inlined');
} else {
fail('absolute-path reference escaped the fonts/ directory');
}
} catch (e) {
fail(`font inlining test crashed: ${e.message}`);
}

// ── SUMMARY ─────────────────────────────────────────────────────

console.log('\n' + '='.repeat(50));
Expand Down