From 43379452d733373f6cd4584309b8d62d47d19923 Mon Sep 17 00:00:00 2001 From: Jonathan Popham Date: Wed, 15 Apr 2026 13:40:15 -0400 Subject: [PATCH] fix(npm): use native Windows bsdtar, fail loud on empty extraction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Real failure chain on Windows was two-layered: 1. GNU tar (resolved from PATH under Git Bash, npm's postinstall shell) parses `C:\...` as rsh host:path, prints "Cannot connect to C: resolve failed", and exits 0 with nothing extracted. The try/catch PowerShell fallback never fired because tar didn't throw. 2. Even when the fallback did run, Windows Defender held a scan-lock on the freshly-downloaded .exe-containing zip; Expand-Archive kept failing with "The process cannot access the file ... because it is being used by another process". The existing retry loop silently swallowed all 10 retries and returned, so the caller never knew extraction failed. Fix: - Call the native Windows bsdtar directly at %SystemRoot%\System32\tar.exe (Windows 10+). bsdtar handles both zip format and `C:\` paths natively and isn't blocked by Defender the way Expand-Archive is. - Keep PowerShell Expand-Archive as a fallback for older Windows, with the retry loop bumped from 10 → 20 attempts and — critically — a `throw $lastErr` so exhausted retries surface instead of silently returning. - Always verify `tmpDir` is non-empty after extraction; if a tool exits 0 without writing anything, treat that as failure and try the next one. - Wait for the download stream's `close` event, not just `finish`, so the fd is truly released before any extractor runs. Tests rewritten to cover: first-extractor success, fallback on throw, fallback on silent-empty success, throw-on-exhaustion for both failure modes, PS retry-loop shape, and path propagation. Closes #133 --- npm/install.js | 56 +++++++++--- npm/install.test.js | 206 ++++++++++++++++++++++---------------------- 2 files changed, 150 insertions(+), 112 deletions(-) diff --git a/npm/install.js b/npm/install.js index 652daec..c3b6b43 100644 --- a/npm/install.js +++ b/npm/install.js @@ -39,25 +39,59 @@ function download(url, dest, cb) { return fail(`HTTP ${res.statusCode} downloading ${url}`); } res.pipe(file); - file.on("finish", () => file.close(cb)); + // Wait for 'close' (not just 'finish'): on Windows the fd is not actually + // released until close completes, and subsequent extractors (PowerShell + // Expand-Archive in particular) will fail with a file-in-use error. + file.on("finish", () => file.close()); + file.on("close", () => cb()); }).on("error", (err) => fail(err.message)); } // extractZip extracts a .zip archive into tmpDir. -// Tries native tar first (Windows 10+); falls back to PowerShell Expand-Archive -// with a retry loop to handle transient Antivirus file locks. +// +// On Windows, PATH under Git Bash (npm's postinstall shell) resolves `tar` +// to GNU tar, which parses `C:\...` as `host:path` (rsh syntax), prints +// "Cannot connect to C: resolve failed", and exits 0 with nothing extracted. +// We explicitly call the native Windows bsdtar at %SystemRoot%\System32\tar.exe +// (present on Windows 10+), which handles both Windows paths and the zip +// format. On older Windows we fall back to PowerShell Expand-Archive with +// retries for Defender/indexer file locks. +// +// After extraction we always verify tmpDir is non-empty — some extractors +// (notably GNU tar in the failure mode above) exit 0 without writing anything. // Accepts an optional execFn for testing (defaults to execSync). function extractZip(archive, tmpDir, execFn) { const exec = execFn || execSync; - try { - exec(`tar -xf "${archive}" -C "${tmpDir}"`); - } catch { - const psCommand = - `$RetryCount = 0; while ($RetryCount -lt 10) { try { Expand-Archive` + - ` -Force -Path '${archive}' -DestinationPath '${tmpDir}'; break }` + - ` catch { Start-Sleep -Seconds 1; $RetryCount++ } }`; - exec(`powershell -NoProfile -Command "${psCommand}"`); + const attempts = []; + + const bsdtar = path.join(process.env.SystemRoot || "C:\\Windows", "System32", "tar.exe"); + if (process.platform === "win32" && fs.existsSync(bsdtar)) { + attempts.push({ cmd: `"${bsdtar}" -xf "${archive}" -C "${tmpDir}"`, label: "bsdtar" }); + } + // PowerShell fallback. Retries handle transient locks from Windows Defender + // / Search Indexer on the freshly-written zip. Throws if all retries fail. + const psCommand = + `$ErrorActionPreference='Stop'; ` + + `$lastErr = $null; ` + + `for ($i=0; $i -lt 20; $i++) { ` + + ` try { Expand-Archive -Force -Path '${archive}' -DestinationPath '${tmpDir}'; exit 0 } ` + + ` catch { $lastErr = $_; Start-Sleep -Seconds 1 } ` + + `}; ` + + `throw $lastErr`; + attempts.push({ cmd: `powershell -NoProfile -Command "${psCommand}"`, label: "powershell" }); + + let lastErr = null; + for (const { cmd } of attempts) { + try { + exec(cmd); + let entries = []; + try { entries = fs.readdirSync(tmpDir); } catch { /* missing dir => failure */ } + if (entries.length > 0) return; + } catch (e) { + lastErr = e; + } } + throw lastErr || new Error(`[supermodel] extraction produced no files in ${tmpDir}`); } if (require.main === module) { diff --git a/npm/install.test.js b/npm/install.test.js index 6d29a02..8893f3f 100644 --- a/npm/install.test.js +++ b/npm/install.test.js @@ -11,123 +11,127 @@ const path = require("path"); const { execSync } = require("child_process"); const { extractZip } = require("./install"); -// createTestZip builds a real .zip containing a single file named "supermodel.exe" -// using the system zip/tar command. Skips on platforms where neither is available. -function createTestZip(t) { - const src = fs.mkdtempSync(path.join(os.tmpdir(), "supermodel-test-src-")); - const binary = path.join(src, "supermodel.exe"); - fs.writeFileSync(binary, "fake binary"); +const isWindows = process.platform === "win32"; +const isTarCmd = (c) => /tar(\.exe)?["'\s]/.test(c.split(" ")[0]); +const isPsCmd = (c) => c.includes("powershell"); - const archive = path.join(os.tmpdir(), `supermodel-test-${process.pid}.zip`); +// Make a fake extraction succeed by dropping a file into tmpDir so the +// post-extract verification (readdirSync(tmpDir).length > 0) passes. +const fakeExtract = (tmpDir) => () => { + fs.writeFileSync(path.join(tmpDir, "supermodel.exe"), "fake"); +}; + +test("extractZip succeeds when the first extractor produces files", () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "smt-ok-")); try { - // Use system zip or tar to build the archive. - try { - execSync(`zip -j "${archive}" "${binary}"`, { stdio: "pipe" }); - } catch { - execSync(`tar -cf "${archive}" --format=zip -C "${src}" supermodel.exe`, { stdio: "pipe" }); - } - } catch { - fs.rmSync(src, { recursive: true, force: true }); - return null; // zip tooling not available — caller should skip + const commands = []; + extractZip("/fake/archive.zip", tmpDir, (cmd) => { + commands.push(cmd); + fakeExtract(tmpDir)(); + }); + assert.equal(commands.length, 1, "stops at first successful extractor"); + } finally { + fs.rmSync(tmpDir, { recursive: true, force: true }); } - fs.rmSync(src, { recursive: true, force: true }); - return archive; -} +}); -test("extractZip extracts via tar when tar succeeds", () => { - const archive = createTestZip(); - if (!archive) { - // Skip gracefully if zip tooling unavailable (e.g. minimal CI image). - console.log(" skipped: zip tooling not available"); - return; - } +test("extractZip falls back when first extractor throws", { skip: !isWindows }, () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "smt-throw-")); try { - const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "supermodel-test-out-")); - try { - let called = null; - extractZip(archive, tmpDir, (cmd) => { - called = cmd; - // Only simulate tar; let the actual extraction happen via real execSync - // if this is the tar command. - if (cmd.startsWith("tar")) { - execSync(cmd, { stdio: "pipe" }); - } else { - throw new Error("should not reach PowerShell"); - } - }); - assert.ok(called.startsWith("tar"), "should have called tar first"); - const extracted = fs.readdirSync(tmpDir); - assert.ok(extracted.length > 0, "tmpDir should contain extracted files"); - } finally { - fs.rmSync(tmpDir, { recursive: true, force: true }); - } + const commands = []; + extractZip("/fake/archive.zip", tmpDir, (cmd) => { + commands.push(cmd); + if (isTarCmd(cmd)) throw new Error("tar unavailable"); + fakeExtract(tmpDir)(); + }); + assert.equal(commands.length, 2, "tries tar then PowerShell"); + assert.ok(isTarCmd(commands[0]), "first attempt is tar"); + assert.ok(isPsCmd(commands[1]), "second attempt is PowerShell"); } finally { - fs.unlinkSync(archive); + fs.rmSync(tmpDir, { recursive: true, force: true }); } }); -test("extractZip falls back to PowerShell when tar fails", () => { - const commands = []; - // Simulate tar failing; PowerShell "succeeds" (no-op). - extractZip("/fake/archive.zip", "/fake/tmpdir", (cmd) => { - commands.push(cmd); - if (cmd.startsWith("tar")) throw new Error("tar not available"); - // PowerShell call — just record it, don't execute. - }); - - assert.equal(commands.length, 2, "should have attempted tar then PowerShell"); - assert.ok(commands[0].startsWith("tar"), "first call should be tar"); - assert.ok(commands[1].includes("powershell"), "second call should be PowerShell"); - assert.ok(commands[1].includes("Expand-Archive"), "PowerShell command should use Expand-Archive"); +test("extractZip falls back when first extractor exits 0 but writes nothing", { skip: !isWindows }, () => { + // Reproduces the GNU-tar-in-Git-Bash bug: tar prints "Cannot connect to C:" + // but still exits 0 without extracting anything. + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "smt-silent-")); + try { + const commands = []; + extractZip("/fake/archive.zip", tmpDir, (cmd) => { + commands.push(cmd); + if (isTarCmd(cmd)) return; // silent no-op "success" + fakeExtract(tmpDir)(); + }); + assert.equal(commands.length, 2, "falls back when tar produced no files"); + assert.ok(isPsCmd(commands[1]), "fallback is PowerShell Expand-Archive"); + } finally { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } }); -test("extractZip PowerShell fallback includes retry loop", () => { - const commands = []; - extractZip("/fake/archive.zip", "/fake/tmpdir", (cmd) => { - commands.push(cmd); - if (cmd.startsWith("tar")) throw new Error("tar not available"); - }); - - const psCmd = commands.find((c) => c.includes("powershell")); - assert.ok(psCmd, "PowerShell command should be present"); - assert.ok(psCmd.includes("$RetryCount"), "should include retry counter"); - assert.ok(psCmd.includes("Start-Sleep"), "should include sleep between retries"); - assert.ok(psCmd.includes("-lt 10"), "should retry up to 10 times"); +test("extractZip throws when every extractor fails", () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "smt-fail-")); + try { + assert.throws(() => { + extractZip("/fake/archive.zip", tmpDir, () => { + throw new Error("boom"); + }); + }, /boom/); + } finally { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } }); -test("extractZip uses tar when both succeed — tar wins", () => { - const commands = []; - extractZip("/fake/archive.zip", "/fake/tmpdir", (cmd) => { - commands.push(cmd); - // Both would succeed; tar is tried first and doesn't throw. - }); - - assert.equal(commands.length, 1, "should only call tar when it succeeds"); - assert.ok(commands[0].startsWith("tar"), "the single call should be tar"); +test("extractZip throws when every extractor silently produces nothing", () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "smt-empty-")); + try { + assert.throws(() => { + extractZip("/fake/archive.zip", tmpDir, () => { + // no-op: exit 0, no files — the exact silent-failure mode we must catch + }); + }); + } finally { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } }); -test("extractZip passes archive and tmpDir paths into tar command", () => { - const archive = "/tmp/test.zip"; - const tmpDir = "/tmp/extract-dir"; - let tarCmd = null; - extractZip(archive, tmpDir, (cmd) => { - tarCmd = cmd; - }); - - assert.ok(tarCmd.includes(archive), "tar command should include archive path"); - assert.ok(tarCmd.includes(tmpDir), "tar command should include tmpDir path"); +test("extractZip PowerShell fallback uses Expand-Archive with a retry loop", () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "smt-ps-")); + try { + const commands = []; + try { + extractZip("/fake/archive.zip", tmpDir, (cmd) => { + commands.push(cmd); + throw new Error("all fail"); + }); + } catch {} + const psCmd = commands.find(isPsCmd); + assert.ok(psCmd, "PowerShell command should be attempted"); + assert.ok(psCmd.includes("Expand-Archive"), "uses Expand-Archive"); + assert.ok(psCmd.includes("Start-Sleep"), "sleeps between retries"); + assert.match(psCmd, /-lt\s+\d+/, "has a retry counter"); + } finally { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } }); -test("extractZip passes archive and tmpDir paths into PowerShell fallback", () => { +test("extractZip passes archive and tmpDir paths into every command", () => { const archive = "/tmp/test.zip"; - const tmpDir = "/tmp/extract-dir"; - const commands = []; - extractZip(archive, tmpDir, (cmd) => { - commands.push(cmd); - if (cmd.startsWith("tar")) throw new Error("tar failed"); - }); - - const psCmd = commands[1]; - assert.ok(psCmd.includes(archive), "PowerShell command should include archive path"); - assert.ok(psCmd.includes(tmpDir), "PowerShell command should include tmpDir path"); + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "smt-paths-")); + try { + const commands = []; + try { + extractZip(archive, tmpDir, (cmd) => { + commands.push(cmd); + throw new Error("fail all"); + }); + } catch {} + for (const cmd of commands) { + assert.ok(cmd.includes(archive), `archive path present: ${cmd}`); + assert.ok(cmd.includes(tmpDir), `tmpDir path present: ${cmd}`); + } + } finally { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } });