HiveForensics-AI · HiveForensicsAI · Mar 18, 2026 · Mar 18, 2026 · chatgpt-codex-connector · Mar 18, 2026
diff --git a/packages/core/scripts/test.mjs b/packages/core/scripts/test.mjs
@@ -644,7 +644,7 @@ async function testSemanticSidecarRerankAndValidation() {
     queryExpansion: { enabled: false },
     semantic: {
       enabled: true,
-      sidecar,
+      sidecarPath: serializeSidecar(sidecar),
       provider: { type: 'ollama', modelId: 'qwen3-embedding:4b' },
       queryEmbedding: new Float32Array([0, 1]),
       force: true,
@@ -668,6 +668,75 @@ async function testSemanticSidecarRerankAndValidation() {
   assert.deepEqual(loaded, sidecar, 'expected semantic sidecar round trip to remain stable');
 }
 
+async function testSemanticEvidenceScoresRemainCorrectAfterRerank() {
+  const docs = [
+    { id: 'lex-a', text: 'alpha beta alpha beta alpha beta river stone' },
+    { id: 'lex-b', text: 'alpha beta solar wind' },
+  ];
+  const pack = await mountPack({
+    src: await buildPack(docs, {
+      semantic: {
+        enabled: true,
+        modelId: 'test-model',
+        embeddings: [new Float32Array([1, 0]), new Float32Array([0, 1])],
+        quantization: { type: 'int8_l2norm', perVectorScale: true },
+      },
+    }),
+  });
+
+  const lexical = query(pack, 'alpha beta', {
+    topK: 2,
+    queryExpansion: { enabled: false },
+  });
+  const lexicalScores = new Map(lexical.map((h) => [h.blockId, h.evidence?.lexicalScore ?? h.score]));
+  const reranked = query(pack, 'alpha beta', {
+    topK: 2,
+    queryExpansion: { enabled: false },
+    semantic: {
+      enabled: true,
+      queryEmbedding: new Float32Array([0, 1]),
+      force: true,
+      blend: { enabled: true, wLex: 0.5, wSem: 0.5 },
+    },
+  });
+
+  assert.notEqual(
+    reranked[0]?.source,
+    lexical[0]?.source,
+    'expected semantic rerank to change ordering'
+  );
+  for (const hit of reranked) {
+    const before = lexicalScores.get(hit.blockId);
+    assert.equal(
+      hit.evidence?.lexicalScore,
+      before,
+      'expected evidence.lexicalScore to preserve pre-rerank lexical score'
+    );
+    assert.equal(hit.evidence?.retrieval, 'hybrid');
+    assert.equal(typeof hit.evidence?.semanticScore, 'number');
+    assert.equal(typeof hit.evidence?.blendedScore, 'number');
+  }
+}
+
+async function testLexicalOnlyEvidenceRemainsUnchanged() {
+  const docs = [
+    { id: 'a', text: 'alpha beta gamma' },
+    { id: 'b', text: 'alpha beta delta' },
+  ];
+  const pack = await mountPack({ src: await buildPack(docs) });
+  const hits = query(pack, 'alpha beta', {
+    topK: 2,
+    queryExpansion: { enabled: false },
+  });
+  assert.ok(hits.length > 0, 'expected lexical query to return hits');
+  for (const hit of hits) {
+    assert.equal(hit.evidence?.retrieval, 'lexical');
+    assert.equal(typeof hit.evidence?.lexicalScore, 'number');
+    assert.equal(hit.evidence?.semanticScore, undefined);
+    assert.equal(hit.evidence?.blendedScore, undefined);
+  }
+}
+
 async function testCosineHelpers() {
   const a = normalizeVector(new Float32Array([3, 4]));
   const b = normalizeVector(new Float32Array([3, 4]));
@@ -1702,6 +1771,8 @@ await testSemanticRerankLowConfidence();
 await testSemanticRerankRespectsConfidenceAndForce();
 await testSemanticRerankErrorAndDefaults();
 await testSemanticSidecarRerankAndValidation();
+await testSemanticEvidenceScoresRemainCorrectAfterRerank();
+await testLexicalOnlyEvidenceRemainsUnchanged();
 await testCosineHelpers();
 await testSmartQuotePhrase();
 await testFirstBlockRetrieval();

diff --git a/packages/core/src/query.ts b/packages/core/src/query.ts
@@ -19,6 +19,7 @@ import { decodeScaleF16, quantizeEmbeddingInt8L2Norm } from "./semantic.js";
 import { expandQueryWithGraph } from "./graph/query_expand.js";
 import type { RetrievalEvidence, SemanticSidecar } from "./semantic/types.js";
 import { rerankCandidates } from "./semantic/rerank.js";
+import { parseSidecar } from "./semantic/sidecar.js";
 
 export type QueryOptions = {
   topK?: number;
@@ -123,6 +124,9 @@ export function validateSemanticQueryOptions(options?: QueryOptions["semantic"])
   if (options.queryEmbedding !== undefined && !(options.queryEmbedding instanceof Float32Array)) {
     throw new Error("query(...): semantic.queryEmbedding must be a Float32Array.");
   }
+  if (options.sidecarPath !== undefined && typeof options.sidecarPath !== "string") {
+    throw new Error("query(...): semantic.sidecarPath must be a string when provided.");
+  }
   if (options.minSemanticScore !== undefined && (!Number.isFinite(options.minSemanticScore) || options.minSemanticScore < 0 || options.minSemanticScore > 1)) {
     throw new Error("query(...): semantic.minSemanticScore must be a finite number between 0 and 1.");
   }
@@ -179,7 +183,7 @@ export function query(pack: Pack, q: string, opts: QueryOptions = {}): Hit[] {
       wSem: Math.max(0, opts.semantic?.blend?.wSem ?? 0.25),
     },
     queryEmbedding: opts.semantic?.queryEmbedding,
-    sidecar: opts.semantic?.sidecar,
+    sidecar: resolveSemanticSidecar(opts.semantic?.sidecar, opts.semantic?.sidecarPath),
     provider: opts.semantic?.provider,
     minSemanticScore: opts.semantic?.minSemanticScore,
     force: opts.semantic?.force ?? false,
@@ -378,6 +382,7 @@ export function query(pack: Pack, q: string, opts: QueryOptions = {}): Hit[] {
   const confidence = lexConfidence(prelim);
   let semanticScores: Map<number, number> | undefined;
   let blendedScores: Map<number, number> | undefined;
+  const originalLexicalScores = new Map(prelim.map((item) => [item.blockId, item.score]));
   if (shouldRerankWithSemantic(pack, semanticOpts, confidence)) {
     const semanticResult = rerankLexicalHitsWithSemantic(pack, prelim, semanticOpts);
     prelim = semanticResult.hits;
@@ -400,7 +405,7 @@ export function query(pack: Pack, q: string, opts: QueryOptions = {}): Hit[] {
       namespace: pack.namespaces?.[r.blockId] ?? undefined,
       evidence: {
         retrieval: retrievalMode,
-        lexicalScore: r.score,
+        lexicalScore: originalLexicalScores.get(r.blockId) ?? r.score,
         semanticScore: semanticScores?.get(r.blockId),
         blendedScore: blendedScores?.get(r.blockId),
         modelId: semanticOpts.provider?.modelId ?? semanticOpts.sidecar?.modelId,
@@ -443,6 +448,42 @@ function shouldRerankWithSemantic(pack: Pack, opts: ResolvedSemanticOpts, confid
   return opts.force || confidence < opts.minLexConfidence;
 }
 
+function resolveSemanticSidecar(sidecar?: SemanticSidecar, sidecarPath?: string): SemanticSidecar | undefined {
+  if (sidecar) return sidecar;
+  if (!sidecarPath) return undefined;
+  const raw = sidecarPath.trim();
+  if (!raw) return undefined;
+
+  if (raw.startsWith("{")) {
+    return parseSidecar(raw);
+  }
+
+  if (raw.startsWith("data:")) {
+    const comma = raw.indexOf(",");
+    if (comma <= 0) return undefined;
+    const meta = raw.slice(5, comma).toLowerCase();
+    const payload = raw.slice(comma + 1);
+    const decoded = meta.includes(";base64")
+      ? decodeBase64(payload)
+      : decodeURIComponent(payload);
+    if (!decoded.trim()) return undefined;
+    return parseSidecar(decoded);
+  }
+
+  return undefined;
+}
+
+function decodeBase64(input: string): string {
+  const normalized = input.replace(/\s+/g, "");
+  const atobFn = (globalThis as { atob?: (s: string) => string }).atob;
+  if (typeof atobFn === "function") return atobFn(normalized);
+
+  const maybeBufferCtor = (globalThis as { Buffer?: { from: (s: string, enc: string) => { toString: (enc: string) => string } } }).Buffer;
+  if (maybeBufferCtor?.from) return maybeBufferCtor.from(normalized, "base64").toString("utf8");
+
+  throw new Error("query(...): Unable to decode semantic.sidecarPath base64 payload in this runtime.");
+}
+
 function rerankLexicalHitsWithSemantic(
   pack: Pack,
   prelim: Array<{ blockId: number; score: number }>,