Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
154 changes: 140 additions & 14 deletions cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,71 @@ function formatJson(obj: any): string {
return JSON.stringify(obj, null, 2);
}

function formatRetrievalDiagnosticsLines(diagnostics: {
originalQuery: string;
bm25Query: string | null;
queryExpanded: boolean;
vectorResultCount: number;
bm25ResultCount: number;
fusedResultCount: number;
finalResultCount: number;
stageCounts: {
afterMinScore: number;
rerankInput: number;
afterRerank: number;
afterHardMinScore: number;
afterNoiseFilter: number;
afterDiversity: number;
};
dropSummary: Array<{ stage: string; dropped: number; before: number; after: number }>;
failureStage?: string;
errorMessage?: string;
}): string[] {
const topDrops =
diagnostics.dropSummary.length > 0
? diagnostics.dropSummary
.slice(0, 3)
.map(
(drop) => `${drop.stage} -${drop.dropped} (${drop.before}->${drop.after})`,
)
.join(", ")
: "none";

const lines = [
"Retrieval diagnostics:",
` • Original query: ${diagnostics.originalQuery}`,
` • BM25 query: ${diagnostics.bm25Query ?? "(disabled)"}`,
` • Query expanded: ${diagnostics.queryExpanded ? "Yes" : "No"}`,
` • Counts: vector=${diagnostics.vectorResultCount}, bm25=${diagnostics.bm25ResultCount}, fused=${diagnostics.fusedResultCount}, final=${diagnostics.finalResultCount}`,
` • Stages: min=${diagnostics.stageCounts.afterMinScore}, rerankIn=${diagnostics.stageCounts.rerankInput}, rerank=${diagnostics.stageCounts.afterRerank}, hard=${diagnostics.stageCounts.afterHardMinScore}, noise=${diagnostics.stageCounts.afterNoiseFilter}, diversity=${diagnostics.stageCounts.afterDiversity}`,
` • Drops: ${topDrops}`,
];

if (diagnostics.failureStage) {
lines.push(` • Failure stage: ${diagnostics.failureStage}`);
}
if (diagnostics.errorMessage) {
lines.push(` • Error: ${diagnostics.errorMessage}`);
}

return lines;
}

function buildSearchErrorPayload(
error: unknown,
diagnostics: unknown,
includeDiagnostics: boolean,
): Record<string, unknown> {
const message = error instanceof Error ? error.message : String(error);
return {
error: {
code: "search_failed",
message,
},
...(includeDiagnostics && diagnostics ? { diagnostics } : {}),
};
}

async function sleep(ms: number): Promise<void> {
await new Promise(resolve => setTimeout(resolve, ms));
}
Expand All @@ -418,6 +483,18 @@ async function sleep(ms: number): Promise<void> {
// ============================================================================

export function registerMemoryCLI(program: Command, context: CLIContext): void {
let lastSearchDiagnostics: ReturnType<MemoryRetriever["getLastDiagnostics"]> =
null;

const captureSearchDiagnostics = (
retriever: Pick<MemoryRetriever, "getLastDiagnostics">,
) => {
lastSearchDiagnostics =
typeof retriever.getLastDiagnostics === "function"
? retriever.getLastDiagnostics()
: null;
};

const getSearchRetriever = (): MemoryRetriever => {
if (!context.embedder) {
return context.retriever;
Expand All @@ -431,26 +508,49 @@ export function registerMemoryCLI(program: Command, context: CLIContext): void {
scopeFilter?: string[],
category?: string,
) => {
let results = await getSearchRetriever().retrieve({
query,
limit,
scopeFilter,
category,
source: "cli",
});

if (results.length === 0 && context.embedder) {
await sleep(75);
results = await getSearchRetriever().retrieve({
lastSearchDiagnostics = null;
const retriever = getSearchRetriever();
let results;
try {
results = await retriever.retrieve({
query,
limit,
scopeFilter,
category,
source: "cli",
});
captureSearchDiagnostics(retriever);
} catch (error) {
captureSearchDiagnostics(retriever);
throw error;
}

return results;
if (results.length === 0 && context.embedder) {
await sleep(75);
const retryRetriever = getSearchRetriever();
try {
results = await retryRetriever.retrieve({
query,
limit,
scopeFilter,
category,
source: "cli",
});
captureSearchDiagnostics(retryRetriever);
} catch (error) {
captureSearchDiagnostics(retryRetriever);
throw error;
}
return {
results,
diagnostics: lastSearchDiagnostics,
};
}

return {
results,
diagnostics: lastSearchDiagnostics,
};
};

const memory = program
Expand Down Expand Up @@ -697,6 +797,7 @@ export function registerMemoryCLI(program: Command, context: CLIContext): void {
.option("--scope <scope>", "Search within specific scope")
.option("--category <category>", "Filter by category")
.option("--limit <n>", "Maximum number of results", "10")
.option("--debug", "Show retrieval diagnostics")
.option("--json", "Output as JSON")
.action(async (query, options) => {
try {
Expand All @@ -707,11 +808,24 @@ export function registerMemoryCLI(program: Command, context: CLIContext): void {
scopeFilter = [options.scope];
}

const results = await runSearch(query, limit, scopeFilter, options.category);
const { results, diagnostics } = await runSearch(
query,
limit,
scopeFilter,
options.category,
);

if (options.json) {
console.log(formatJson(results));
console.log(
formatJson(options.debug ? { diagnostics, results } : results),
);
} else {
if (options.debug && diagnostics) {
for (const line of formatRetrievalDiagnosticsLines(diagnostics)) {
console.log(line);
}
console.log();
}
if (results.length === 0) {
console.log("No relevant memories found.");
} else {
Expand All @@ -730,6 +844,18 @@ export function registerMemoryCLI(program: Command, context: CLIContext): void {
}
}
} catch (error) {
const diagnostics = options.debug ? lastSearchDiagnostics : null;
if (options.json) {
console.log(
formatJson(buildSearchErrorPayload(error, diagnostics, options.debug)),
);
process.exit(1);
}
if (diagnostics) {
for (const line of formatRetrievalDiagnosticsLines(diagnostics)) {
console.error(line);
}
}
console.error("Search failed:", error);
process.exit(1);
}
Expand Down
116 changes: 116 additions & 0 deletions src/query-expander.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
/**
* Lightweight Chinese query expansion for BM25.
* Keeps the vector query untouched and only appends a few high-signal synonyms.
*/

const MAX_EXPANSION_TERMS = 5;

interface SynonymEntry {
cn: string[];
en: string[];
expansions: string[];
}

const SYNONYM_MAP: SynonymEntry[] = [
{
cn: ["挂了", "挂掉", "宕机"],
en: ["shutdown", "crashed"],
expansions: ["崩溃", "crash", "error", "报错", "宕机", "失败"],
},
{
cn: ["卡住", "卡死", "没反应"],
en: ["hung", "frozen"],
expansions: ["hang", "timeout", "超时", "无响应", "stuck"],
},
{
cn: ["炸了", "爆了"],
en: ["oom"],
expansions: ["崩溃", "crash", "OOM", "内存溢出", "error"],
},
{
cn: ["配置", "设置"],
en: ["config", "configuration"],
expansions: ["配置", "config", "configuration", "settings", "设置"],
},
{
cn: ["部署", "上线"],
en: ["deploy", "deployment"],
expansions: ["deploy", "部署", "上线", "发布", "release"],
},
{
cn: ["容器"],
en: ["docker", "container"],
expansions: ["Docker", "容器", "container", "docker-compose"],
},
{
cn: ["报错", "出错", "错误"],
en: ["error", "exception"],
expansions: ["error", "报错", "exception", "错误", "失败", "bug"],
},
{
cn: ["修复", "修了", "修好"],
en: ["bugfix", "hotfix"],
expansions: ["fix", "修复", "patch", "解决"],
},
{
cn: ["踩坑"],
en: ["troubleshoot"],
expansions: ["踩坑", "bug", "问题", "教训", "排查", "troubleshoot"],
},
{
cn: ["记忆", "记忆系统"],
en: ["memory"],
expansions: ["记忆", "memory", "记忆系统", "LanceDB", "索引"],
},
{
cn: ["搜索", "查找", "找不到"],
en: ["search", "retrieval"],
expansions: ["搜索", "search", "retrieval", "检索", "查找"],
},
{
cn: ["推送"],
en: ["git push"],
expansions: ["push", "推送", "git push", "commit"],
},
{
cn: ["日志"],
en: ["logfile", "logging"],
expansions: ["日志", "log", "logging", "输出", "打印"],
},
{
cn: ["权限"],
en: ["permission", "authorization"],
expansions: ["权限", "permission", "access", "授权", "认证"],
},
];

function buildWordBoundaryRegex(term: string): RegExp {
const escaped = term.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
return new RegExp(`\\b${escaped}\\b`, "i");
}

export function expandQuery(query: string): string {
if (!query || query.trim().length < 2) return query;

const lower = query.toLowerCase();
const additions = new Set<string>();

for (const entry of SYNONYM_MAP) {
const cnMatch = entry.cn.some((term) => lower.includes(term.toLowerCase()));
const enMatch = entry.en.some((term) => buildWordBoundaryRegex(term).test(query));

if (!cnMatch && !enMatch) continue;

for (const expansion of entry.expansions) {
if (!lower.includes(expansion.toLowerCase())) {
additions.add(expansion);
}
if (additions.size >= MAX_EXPANSION_TERMS) break;
}

if (additions.size >= MAX_EXPANSION_TERMS) break;
}

if (additions.size === 0) return query;
return `${query} ${[...additions].join(" ")}`;
}
Loading
Loading