Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 72 additions & 1 deletion packages/core/scripts/test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -644,7 +644,7 @@ async function testSemanticSidecarRerankAndValidation() {
queryExpansion: { enabled: false },
semantic: {
enabled: true,
sidecar,
sidecarPath: serializeSidecar(sidecar),
provider: { type: 'ollama', modelId: 'qwen3-embedding:4b' },
queryEmbedding: new Float32Array([0, 1]),
force: true,
Expand All @@ -668,6 +668,75 @@ async function testSemanticSidecarRerankAndValidation() {
assert.deepEqual(loaded, sidecar, 'expected semantic sidecar round trip to remain stable');
}

async function testSemanticEvidenceScoresRemainCorrectAfterRerank() {
const docs = [
{ id: 'lex-a', text: 'alpha beta alpha beta alpha beta river stone' },
{ id: 'lex-b', text: 'alpha beta solar wind' },
];
const pack = await mountPack({
src: await buildPack(docs, {
semantic: {
enabled: true,
modelId: 'test-model',
embeddings: [new Float32Array([1, 0]), new Float32Array([0, 1])],
quantization: { type: 'int8_l2norm', perVectorScale: true },
},
}),
});

const lexical = query(pack, 'alpha beta', {
topK: 2,
queryExpansion: { enabled: false },
});
const lexicalScores = new Map(lexical.map((h) => [h.blockId, h.evidence?.lexicalScore ?? h.score]));
const reranked = query(pack, 'alpha beta', {
topK: 2,
queryExpansion: { enabled: false },
semantic: {
enabled: true,
queryEmbedding: new Float32Array([0, 1]),
force: true,
blend: { enabled: true, wLex: 0.5, wSem: 0.5 },
},
});

assert.notEqual(
reranked[0]?.source,
lexical[0]?.source,
'expected semantic rerank to change ordering'
);
for (const hit of reranked) {
const before = lexicalScores.get(hit.blockId);
assert.equal(
hit.evidence?.lexicalScore,
before,
'expected evidence.lexicalScore to preserve pre-rerank lexical score'
);
assert.equal(hit.evidence?.retrieval, 'hybrid');
assert.equal(typeof hit.evidence?.semanticScore, 'number');
assert.equal(typeof hit.evidence?.blendedScore, 'number');
}
}

async function testLexicalOnlyEvidenceRemainsUnchanged() {
const docs = [
{ id: 'a', text: 'alpha beta gamma' },
{ id: 'b', text: 'alpha beta delta' },
];
const pack = await mountPack({ src: await buildPack(docs) });
const hits = query(pack, 'alpha beta', {
topK: 2,
queryExpansion: { enabled: false },
});
assert.ok(hits.length > 0, 'expected lexical query to return hits');
for (const hit of hits) {
assert.equal(hit.evidence?.retrieval, 'lexical');
assert.equal(typeof hit.evidence?.lexicalScore, 'number');
assert.equal(hit.evidence?.semanticScore, undefined);
assert.equal(hit.evidence?.blendedScore, undefined);
}
}

async function testCosineHelpers() {
const a = normalizeVector(new Float32Array([3, 4]));
const b = normalizeVector(new Float32Array([3, 4]));
Expand Down Expand Up @@ -1702,6 +1771,8 @@ await testSemanticRerankLowConfidence();
await testSemanticRerankRespectsConfidenceAndForce();
await testSemanticRerankErrorAndDefaults();
await testSemanticSidecarRerankAndValidation();
await testSemanticEvidenceScoresRemainCorrectAfterRerank();
await testLexicalOnlyEvidenceRemainsUnchanged();
await testCosineHelpers();
await testSmartQuotePhrase();
await testFirstBlockRetrieval();
Expand Down
45 changes: 43 additions & 2 deletions packages/core/src/query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import { decodeScaleF16, quantizeEmbeddingInt8L2Norm } from "./semantic.js";
import { expandQueryWithGraph } from "./graph/query_expand.js";
import type { RetrievalEvidence, SemanticSidecar } from "./semantic/types.js";
import { rerankCandidates } from "./semantic/rerank.js";
import { parseSidecar } from "./semantic/sidecar.js";

export type QueryOptions = {
topK?: number;
Expand Down Expand Up @@ -123,6 +124,9 @@ export function validateSemanticQueryOptions(options?: QueryOptions["semantic"])
if (options.queryEmbedding !== undefined && !(options.queryEmbedding instanceof Float32Array)) {
throw new Error("query(...): semantic.queryEmbedding must be a Float32Array.");
}
if (options.sidecarPath !== undefined && typeof options.sidecarPath !== "string") {
throw new Error("query(...): semantic.sidecarPath must be a string when provided.");
}
if (options.minSemanticScore !== undefined && (!Number.isFinite(options.minSemanticScore) || options.minSemanticScore < 0 || options.minSemanticScore > 1)) {
throw new Error("query(...): semantic.minSemanticScore must be a finite number between 0 and 1.");
}
Expand Down Expand Up @@ -179,7 +183,7 @@ export function query(pack: Pack, q: string, opts: QueryOptions = {}): Hit[] {
wSem: Math.max(0, opts.semantic?.blend?.wSem ?? 0.25),
},
queryEmbedding: opts.semantic?.queryEmbedding,
sidecar: opts.semantic?.sidecar,
sidecar: resolveSemanticSidecar(opts.semantic?.sidecar, opts.semantic?.sidecarPath),
provider: opts.semantic?.provider,
minSemanticScore: opts.semantic?.minSemanticScore,
force: opts.semantic?.force ?? false,
Expand Down Expand Up @@ -378,6 +382,7 @@ export function query(pack: Pack, q: string, opts: QueryOptions = {}): Hit[] {
const confidence = lexConfidence(prelim);
let semanticScores: Map<number, number> | undefined;
let blendedScores: Map<number, number> | undefined;
const originalLexicalScores = new Map(prelim.map((item) => [item.blockId, item.score]));
if (shouldRerankWithSemantic(pack, semanticOpts, confidence)) {
const semanticResult = rerankLexicalHitsWithSemantic(pack, prelim, semanticOpts);
prelim = semanticResult.hits;
Expand All @@ -400,7 +405,7 @@ export function query(pack: Pack, q: string, opts: QueryOptions = {}): Hit[] {
namespace: pack.namespaces?.[r.blockId] ?? undefined,
evidence: {
retrieval: retrievalMode,
lexicalScore: r.score,
lexicalScore: originalLexicalScores.get(r.blockId) ?? r.score,
semanticScore: semanticScores?.get(r.blockId),
blendedScore: blendedScores?.get(r.blockId),
modelId: semanticOpts.provider?.modelId ?? semanticOpts.sidecar?.modelId,
Expand Down Expand Up @@ -443,6 +448,42 @@ function shouldRerankWithSemantic(pack: Pack, opts: ResolvedSemanticOpts, confid
return opts.force || confidence < opts.minLexConfidence;
}

function resolveSemanticSidecar(sidecar?: SemanticSidecar, sidecarPath?: string): SemanticSidecar | undefined {
if (sidecar) return sidecar;
if (!sidecarPath) return undefined;
const raw = sidecarPath.trim();
if (!raw) return undefined;

if (raw.startsWith("{")) {
return parseSidecar(raw);
Comment on lines +457 to +458
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Validate parsed sidecar payload before reranking

If semantic.sidecarPath contains JSON that passes parseSidecar's minimal checks but is missing required fields (for example, no blocks array), this function still returns it and semantic reranking later crashes with an opaque Cannot read properties of undefined (reading 'find') from rerankCandidates. This is reproducible whenever semantic rerank is enabled/forced with such input, and it should fail fast here with a clear query(...) validation error instead of a downstream TypeError.

Useful? React with 👍 / 👎.

}

if (raw.startsWith("data:")) {
const comma = raw.indexOf(",");
if (comma <= 0) return undefined;
const meta = raw.slice(5, comma).toLowerCase();
const payload = raw.slice(comma + 1);
const decoded = meta.includes(";base64")
? decodeBase64(payload)
: decodeURIComponent(payload);
if (!decoded.trim()) return undefined;
return parseSidecar(decoded);
}

return undefined;
}

function decodeBase64(input: string): string {
const normalized = input.replace(/\s+/g, "");
const atobFn = (globalThis as { atob?: (s: string) => string }).atob;
if (typeof atobFn === "function") return atobFn(normalized);

const maybeBufferCtor = (globalThis as { Buffer?: { from: (s: string, enc: string) => { toString: (enc: string) => string } } }).Buffer;
if (maybeBufferCtor?.from) return maybeBufferCtor.from(normalized, "base64").toString("utf8");

throw new Error("query(...): Unable to decode semantic.sidecarPath base64 payload in this runtime.");
}

function rerankLexicalHitsWithSemantic(
pack: Pack,
prelim: Array<{ blockId: number; score: number }>,
Expand Down
Loading