|
| 1 | +#!/usr/bin/env node |
| 2 | +/** |
| 3 | + * Embedding-backend smoke test — verifies the local @huggingface/transformers |
| 4 | + * feature-extraction path loads and runs the native ONNX runtime without |
| 5 | + * crashing on the host platform. |
| 6 | + * |
| 7 | + * This is the reproducible form of the manual checks requested when migrating |
| 8 | + * off @xenova/transformers: the prior revert (8fb0836) was motivated by native |
| 9 | + * ONNX runtime crashes under Windows + Bun, so this runs in CI across |
| 10 | + * ubuntu / macOS / windows to catch a regression before merge. |
| 11 | + * |
| 12 | + * Uses a tiny model (all-MiniLM-L6-v2, ~25 MB) — the goal is to exercise the |
| 13 | + * runtime load + a real embedding call, not to validate any specific model. |
| 14 | + * |
| 15 | + * Run with either `bun scripts/verify-embedding-backend.mjs` or |
| 16 | + * `node scripts/verify-embedding-backend.mjs`. |
| 17 | + */ |
| 18 | + |
| 19 | +const MODEL = "Xenova/all-MiniLM-L6-v2"; |
| 20 | +const EXPECTED_DIMS = 384; |
| 21 | + |
| 22 | +const runtime = typeof globalThis.Bun !== "undefined" ? "bun" : "node"; |
| 23 | +console.log( |
| 24 | + `[verify-embedding] runtime=${runtime} platform=${process.platform} arch=${process.arch}` |
| 25 | +); |
| 26 | + |
| 27 | +const { pipeline, env } = await import("@huggingface/transformers"); |
| 28 | + |
| 29 | +// Mirror the plugin's runtime configuration. |
| 30 | +env.allowLocalModels = true; |
| 31 | +env.allowRemoteModels = true; |
| 32 | +try { |
| 33 | + env.backends.onnx.wasm.numThreads = 1; |
| 34 | +} catch { |
| 35 | + /* not fatal — only relevant for the wasm backend */ |
| 36 | +} |
| 37 | + |
| 38 | +console.log(`[verify-embedding] loading feature-extraction pipeline for ${MODEL} ...`); |
| 39 | +const extractor = await pipeline("feature-extraction", MODEL); |
| 40 | + |
| 41 | +const samples = [ |
| 42 | + "Hello world, this is a test.", |
| 43 | + "Hallo Welt, dies ist ein Test.", // non-English: exercises the multilingual tokenizer path |
| 44 | +]; |
| 45 | + |
| 46 | +for (const text of samples) { |
| 47 | + const out = await extractor(text, { pooling: "mean", normalize: true }); |
| 48 | + const dims = out.dims?.[out.dims.length - 1]; |
| 49 | + if (dims !== EXPECTED_DIMS) { |
| 50 | + console.error(`[verify-embedding] FAIL: expected ${EXPECTED_DIMS} dims, got ${dims}`); |
| 51 | + process.exit(1); |
| 52 | + } |
| 53 | + const vec = Array.from(out.data); |
| 54 | + const allFinite = vec.every((x) => Number.isFinite(x)); |
| 55 | + const norm = Math.sqrt(vec.reduce((s, x) => s + x * x, 0)); |
| 56 | + if (!allFinite || !(norm > 0.9 && norm < 1.1)) { |
| 57 | + console.error( |
| 58 | + `[verify-embedding] FAIL: bad vector (finite=${allFinite}, norm=${norm.toFixed(4)})` |
| 59 | + ); |
| 60 | + process.exit(1); |
| 61 | + } |
| 62 | + console.log( |
| 63 | + `[verify-embedding] ok: "${text.slice(0, 24)}..." -> ${dims} dims, L2=${norm.toFixed(4)}` |
| 64 | + ); |
| 65 | +} |
| 66 | + |
| 67 | +console.log("[verify-embedding] PASS — ONNX runtime loaded and embeddings produced."); |
0 commit comments