diff --git a/cli.ts b/cli.ts
index 99203916..878ee77d 100644
--- a/cli.ts
+++ b/cli.ts
@@ -409,6 +409,71 @@ function formatJson(obj: any): string {
   return JSON.stringify(obj, null, 2);
 }
 
+function formatRetrievalDiagnosticsLines(diagnostics: {
+  originalQuery: string;
+  bm25Query: string | null;
+  queryExpanded: boolean;
+  vectorResultCount: number;
+  bm25ResultCount: number;
+  fusedResultCount: number;
+  finalResultCount: number;
+  stageCounts: {
+    afterMinScore: number;
+    rerankInput: number;
+    afterRerank: number;
+    afterHardMinScore: number;
+    afterNoiseFilter: number;
+    afterDiversity: number;
+  };
+  dropSummary: Array<{ stage: string; dropped: number; before: number; after: number }>;
+  failureStage?: string;
+  errorMessage?: string;
+}): string[] {
+  const topDrops =
+    diagnostics.dropSummary.length > 0
+      ? diagnostics.dropSummary
+          .slice(0, 3)
+          .map(
+            (drop) => `${drop.stage} -${drop.dropped} (${drop.before}->${drop.after})`,
+          )
+          .join(", ")
+      : "none";
+
+  const lines = [
+    "Retrieval diagnostics:",
+    `  • Original query: ${diagnostics.originalQuery}`,
+    `  • BM25 query: ${diagnostics.bm25Query ?? "(disabled)"}`,
+    `  • Query expanded: ${diagnostics.queryExpanded ? "Yes" : "No"}`,
+    `  • Counts: vector=${diagnostics.vectorResultCount}, bm25=${diagnostics.bm25ResultCount}, fused=${diagnostics.fusedResultCount}, final=${diagnostics.finalResultCount}`,
+    `  • Stages: min=${diagnostics.stageCounts.afterMinScore}, rerankIn=${diagnostics.stageCounts.rerankInput}, rerank=${diagnostics.stageCounts.afterRerank}, hard=${diagnostics.stageCounts.afterHardMinScore}, noise=${diagnostics.stageCounts.afterNoiseFilter}, diversity=${diagnostics.stageCounts.afterDiversity}`,
+    `  • Drops: ${topDrops}`,
+  ];
+
+  if (diagnostics.failureStage) {
+    lines.push(`  • Failure stage: ${diagnostics.failureStage}`);
+  }
+  if (diagnostics.errorMessage) {
+    lines.push(`  • Error: ${diagnostics.errorMessage}`);
+  }
+
+  return lines;
+}
+
+function buildSearchErrorPayload(
+  error: unknown,
+  diagnostics: unknown,
+  includeDiagnostics: boolean,
+): Record<string, unknown> {
+  const message = error instanceof Error ? error.message : String(error);
+  return {
+    error: {
+      code: "search_failed",
+      message,
+    },
+    ...(includeDiagnostics && diagnostics ? { diagnostics } : {}),
+  };
+}
+
 async function sleep(ms: number): Promise<void> {
   await new Promise(resolve => setTimeout(resolve, ms));
 }
@@ -418,6 +483,18 @@ async function sleep(ms: number): Promise<void> {
 // ============================================================================
 
 export function registerMemoryCLI(program: Command, context: CLIContext): void {
+  let lastSearchDiagnostics: ReturnType<MemoryRetriever["getLastDiagnostics"]> =
+    null;
+
+  const captureSearchDiagnostics = (
+    retriever: Pick<MemoryRetriever, "getLastDiagnostics">,
+  ) => {
+    lastSearchDiagnostics =
+      typeof retriever.getLastDiagnostics === "function"
+        ? retriever.getLastDiagnostics()
+        : null;
+  };
+
   const getSearchRetriever = (): MemoryRetriever => {
     if (!context.embedder) {
       return context.retriever;
@@ -431,26 +508,49 @@ export function registerMemoryCLI(program: Command, context: CLIContext): void {
     scopeFilter?: string[],
     category?: string,
   ) => {
-    let results = await getSearchRetriever().retrieve({
-      query,
-      limit,
-      scopeFilter,
-      category,
-      source: "cli",
-    });
-
-    if (results.length === 0 && context.embedder) {
-      await sleep(75);
-      results = await getSearchRetriever().retrieve({
+    lastSearchDiagnostics = null;
+    const retriever = getSearchRetriever();
+    let results;
+    try {
+      results = await retriever.retrieve({
         query,
         limit,
         scopeFilter,
         category,
         source: "cli",
       });
+      captureSearchDiagnostics(retriever);
+    } catch (error) {
+      captureSearchDiagnostics(retriever);
+      throw error;
     }
 
-    return results;
+    if (results.length === 0 && context.embedder) {
+      await sleep(75);
+      const retryRetriever = getSearchRetriever();
+      try {
+        results = await retryRetriever.retrieve({
+          query,
+          limit,
+          scopeFilter,
+          category,
+          source: "cli",
+        });
+        captureSearchDiagnostics(retryRetriever);
+      } catch (error) {
+        captureSearchDiagnostics(retryRetriever);
+        throw error;
+      }
+      return {
+        results,
+        diagnostics: lastSearchDiagnostics,
+      };
+    }
+
+    return {
+      results,
+      diagnostics: lastSearchDiagnostics,
+    };
   };
 
   const memory = program
@@ -697,6 +797,7 @@ export function registerMemoryCLI(program: Command, context: CLIContext): void {
     .option("--scope <scope>", "Search within specific scope")
     .option("--category <category>", "Filter by category")
     .option("--limit <n>", "Maximum number of results", "10")
+    .option("--debug", "Show retrieval diagnostics")
     .option("--json", "Output as JSON")
     .action(async (query, options) => {
       try {
@@ -707,11 +808,24 @@ export function registerMemoryCLI(program: Command, context: CLIContext): void {
           scopeFilter = [options.scope];
         }
 
-        const results = await runSearch(query, limit, scopeFilter, options.category);
+        const { results, diagnostics } = await runSearch(
+          query,
+          limit,
+          scopeFilter,
+          options.category,
+        );
 
         if (options.json) {
-          console.log(formatJson(results));
+          console.log(
+            formatJson(options.debug ? { diagnostics, results } : results),
+          );
         } else {
+          if (options.debug && diagnostics) {
+            for (const line of formatRetrievalDiagnosticsLines(diagnostics)) {
+              console.log(line);
+            }
+            console.log();
+          }
           if (results.length === 0) {
             console.log("No relevant memories found.");
           } else {
@@ -730,6 +844,18 @@ export function registerMemoryCLI(program: Command, context: CLIContext): void {
           }
         }
       } catch (error) {
+        const diagnostics = options.debug ? lastSearchDiagnostics : null;
+        if (options.json) {
+          console.log(
+            formatJson(buildSearchErrorPayload(error, diagnostics, options.debug)),
+          );
+          process.exit(1);
+        }
+        if (diagnostics) {
+          for (const line of formatRetrievalDiagnosticsLines(diagnostics)) {
+            console.error(line);
+          }
+        }
         console.error("Search failed:", error);
         process.exit(1);
       }
diff --git a/src/query-expander.ts b/src/query-expander.ts
new file mode 100644
index 00000000..4f00c565
--- /dev/null
+++ b/src/query-expander.ts
@@ -0,0 +1,116 @@
+/**
+ * Lightweight Chinese query expansion for BM25.
+ * Keeps the vector query untouched and only appends a few high-signal synonyms.
+ */
+
+const MAX_EXPANSION_TERMS = 5;
+
+interface SynonymEntry {
+  cn: string[];
+  en: string[];
+  expansions: string[];
+}
+
+const SYNONYM_MAP: SynonymEntry[] = [
+  {
+    cn: ["挂了", "挂掉", "宕机"],
+    en: ["shutdown", "crashed"],
+    expansions: ["崩溃", "crash", "error", "报错", "宕机", "失败"],
+  },
+  {
+    cn: ["卡住", "卡死", "没反应"],
+    en: ["hung", "frozen"],
+    expansions: ["hang", "timeout", "超时", "无响应", "stuck"],
+  },
+  {
+    cn: ["炸了", "爆了"],
+    en: ["oom"],
+    expansions: ["崩溃", "crash", "OOM", "内存溢出", "error"],
+  },
+  {
+    cn: ["配置", "设置"],
+    en: ["config", "configuration"],
+    expansions: ["配置", "config", "configuration", "settings", "设置"],
+  },
+  {
+    cn: ["部署", "上线"],
+    en: ["deploy", "deployment"],
+    expansions: ["deploy", "部署", "上线", "发布", "release"],
+  },
+  {
+    cn: ["容器"],
+    en: ["docker", "container"],
+    expansions: ["Docker", "容器", "container", "docker-compose"],
+  },
+  {
+    cn: ["报错", "出错", "错误"],
+    en: ["error", "exception"],
+    expansions: ["error", "报错", "exception", "错误", "失败", "bug"],
+  },
+  {
+    cn: ["修复", "修了", "修好"],
+    en: ["bugfix", "hotfix"],
+    expansions: ["fix", "修复", "patch", "解决"],
+  },
+  {
+    cn: ["踩坑"],
+    en: ["troubleshoot"],
+    expansions: ["踩坑", "bug", "问题", "教训", "排查", "troubleshoot"],
+  },
+  {
+    cn: ["记忆", "记忆系统"],
+    en: ["memory"],
+    expansions: ["记忆", "memory", "记忆系统", "LanceDB", "索引"],
+  },
+  {
+    cn: ["搜索", "查找", "找不到"],
+    en: ["search", "retrieval"],
+    expansions: ["搜索", "search", "retrieval", "检索", "查找"],
+  },
+  {
+    cn: ["推送"],
+    en: ["git push"],
+    expansions: ["push", "推送", "git push", "commit"],
+  },
+  {
+    cn: ["日志"],
+    en: ["logfile", "logging"],
+    expansions: ["日志", "log", "logging", "输出", "打印"],
+  },
+  {
+    cn: ["权限"],
+    en: ["permission", "authorization"],
+    expansions: ["权限", "permission", "access", "授权", "认证"],
+  },
+];
+
+function buildWordBoundaryRegex(term: string): RegExp {
+  const escaped = term.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+  return new RegExp(`\\b${escaped}\\b`, "i");
+}
+
+export function expandQuery(query: string): string {
+  if (!query || query.trim().length < 2) return query;
+
+  const lower = query.toLowerCase();
+  const additions = new Set<string>();
+
+  for (const entry of SYNONYM_MAP) {
+    const cnMatch = entry.cn.some((term) => lower.includes(term.toLowerCase()));
+    const enMatch = entry.en.some((term) => buildWordBoundaryRegex(term).test(query));
+
+    if (!cnMatch && !enMatch) continue;
+
+    for (const expansion of entry.expansions) {
+      if (!lower.includes(expansion.toLowerCase())) {
+        additions.add(expansion);
+      }
+      if (additions.size >= MAX_EXPANSION_TERMS) break;
+    }
+
+    if (additions.size >= MAX_EXPANSION_TERMS) break;
+  }
+
+  if (additions.size === 0) return query;
+  return `${query} ${[...additions].join(" ")}`;
+}
diff --git a/src/retriever.ts b/src/retriever.ts
index 900db753..170b33bf 100644
--- a/src/retriever.ts
+++ b/src/retriever.ts
@@ -11,6 +11,7 @@ import {
   parseAccessMetadata,
 } from "./access-tracker.js";
 import { filterNoise } from "./noise-filter.js";
+import { expandQuery } from "./query-expander.js";
 import type { DecayEngine, DecayableMemory } from "./decay-engine.js";
 import type { TierManager } from "./tier-manager.js";
 import {
@@ -30,6 +31,8 @@ export interface RetrievalConfig {
   mode: "hybrid" | "vector";
   vectorWeight: number;
   bm25Weight: number;
+  /** Expand BM25 queries with high-signal synonyms for manual / CLI retrieval. */
+  queryExpansion: boolean;
   minScore: number;
   rerank: "cross-encoder" | "lightweight" | "none";
   candidatePoolSize: number;
@@ -111,6 +114,62 @@ export interface RetrievalResult extends MemorySearchResult {
   };
 }
 
+export interface RetrievalDiagnostics {
+  source?: RetrievalContext["source"];
+  mode: RetrievalConfig["mode"];
+  originalQuery: string;
+  bm25Query: string | null;
+  queryExpanded: boolean;
+  limit: number;
+  scopeFilter?: string[];
+  category?: string;
+  vectorResultCount: number;
+  bm25ResultCount: number;
+  fusedResultCount: number;
+  finalResultCount: number;
+  stageCounts: {
+    afterMinScore: number;
+    rerankInput: number;
+    afterRerank: number;
+    afterRecency: number;
+    afterImportance: number;
+    afterLengthNorm: number;
+    afterTimeDecay: number;
+    afterHardMinScore: number;
+    afterNoiseFilter: number;
+    afterDiversity: number;
+  };
+  dropSummary: Array<{
+    stage:
+      | "minScore"
+      | "rerankWindow"
+      | "rerank"
+      | "recencyBoost"
+      | "importanceWeight"
+      | "lengthNorm"
+      | "timeDecay"
+      | "hardMinScore"
+      | "noiseFilter"
+      | "diversity"
+      | "limit";
+    before: number;
+    after: number;
+    dropped: number;
+  }>;
+  failureStage?:
+    | "vector.embedQuery"
+    | "vector.vectorSearch"
+    | "vector.postProcess"
+    | "hybrid.embedQuery"
+    | "hybrid.vectorSearch"
+    | "hybrid.bm25Search"
+    | "hybrid.parallelSearch"
+    | "hybrid.fuseResults"
+    | "hybrid.rerank"
+    | "hybrid.postProcess";
+  errorMessage?: string;
+}
+
 // ============================================================================
 // Default Configuration
 // ============================================================================
@@ -119,6 +178,7 @@ export const DEFAULT_RETRIEVAL_CONFIG: RetrievalConfig = {
   mode: "hybrid",
   vectorWeight: 0.7,
   bm25Weight: 0.3,
+  queryExpansion: true,
   minScore: 0.3,
   rerank: "cross-encoder",
   candidatePoolSize: 20,
@@ -154,6 +214,116 @@ function clamp01WithFloor(value: number, floor: number): number {
   return Math.max(safeFloor, clamp01(value, safeFloor));
 }
 
+type TaggedRetrievalError = Error & {
+  retrievalFailureStage?: NonNullable<RetrievalDiagnostics["failureStage"]>;
+};
+
+function attachFailureStage(
+  error: unknown,
+  stage: NonNullable<RetrievalDiagnostics["failureStage"]>,
+): TaggedRetrievalError {
+  const tagged =
+    error instanceof Error ? (error as TaggedRetrievalError) : new Error(String(error));
+  tagged.retrievalFailureStage = stage;
+  return tagged;
+}
+
+function extractFailureStage(
+  error: unknown,
+): RetrievalDiagnostics["failureStage"] | undefined {
+  return error instanceof Error
+    ? (error as TaggedRetrievalError).retrievalFailureStage
+    : undefined;
+}
+
+function buildDropSummary(
+  diagnostics: RetrievalDiagnostics,
+): RetrievalDiagnostics["dropSummary"] {
+  const stageDrops = [
+    {
+      order: 0,
+      stage: "minScore" as const,
+      before:
+        diagnostics.mode === "vector"
+          ? diagnostics.vectorResultCount
+          : diagnostics.fusedResultCount,
+      after: diagnostics.stageCounts.afterMinScore,
+    },
+    {
+      order: 1,
+      stage: "rerankWindow" as const,
+      before: diagnostics.stageCounts.afterMinScore,
+      after: diagnostics.stageCounts.rerankInput,
+    },
+    {
+      order: 2,
+      stage: "rerank" as const,
+      before: diagnostics.stageCounts.rerankInput,
+      after: diagnostics.stageCounts.afterRerank,
+    },
+    {
+      order: 3,
+      stage: "recencyBoost" as const,
+      before: diagnostics.stageCounts.afterRerank,
+      after: diagnostics.stageCounts.afterRecency,
+    },
+    {
+      order: 4,
+      stage: "importanceWeight" as const,
+      before: diagnostics.stageCounts.afterRecency,
+      after: diagnostics.stageCounts.afterImportance,
+    },
+    {
+      order: 5,
+      stage: "lengthNorm" as const,
+      before: diagnostics.stageCounts.afterImportance,
+      after: diagnostics.stageCounts.afterLengthNorm,
+    },
+    {
+      order: 6,
+      stage: "hardMinScore" as const,
+      before: diagnostics.stageCounts.afterLengthNorm,
+      after: diagnostics.stageCounts.afterHardMinScore,
+    },
+    {
+      order: 7,
+      stage: "timeDecay" as const,
+      before: diagnostics.stageCounts.afterHardMinScore,
+      after: diagnostics.stageCounts.afterTimeDecay,
+    },
+    {
+      order: 8,
+      stage: "noiseFilter" as const,
+      before: diagnostics.stageCounts.afterTimeDecay,
+      after: diagnostics.stageCounts.afterNoiseFilter,
+    },
+    {
+      order: 9,
+      stage: "diversity" as const,
+      before: diagnostics.stageCounts.afterNoiseFilter,
+      after: diagnostics.stageCounts.afterDiversity,
+    },
+    {
+      order: 10,
+      stage: "limit" as const,
+      before: diagnostics.stageCounts.afterDiversity,
+      after: diagnostics.finalResultCount,
+    },
+  ];
+
+  return stageDrops
+    .map(({ order, stage, before, after }) => ({
+      order,
+      stage,
+      before,
+      after,
+      dropped: Math.max(0, before - after),
+    }))
+    .filter((drop) => drop.dropped > 0)
+    .sort((a, b) => b.dropped - a.dropped || a.order - b.order)
+    .map(({ order: _order, ...drop }) => drop);
+}
+
 // ============================================================================
 // Rerank Provider Adapters
 // ============================================================================
@@ -360,6 +530,7 @@ function cosineSimilarity(a: number[], b: number[]): number {
 
 export class MemoryRetriever {
   private accessTracker: AccessTracker | null = null;
+  private lastDiagnostics: RetrievalDiagnostics | null = null;
   private tierManager: TierManager | null = null;
   private _statsCollector: RetrievalStatsCollector | null = null;
 
@@ -393,42 +564,101 @@ export class MemoryRetriever {
   async retrieve(context: RetrievalContext): Promise<RetrievalResult[]> {
     const { query, limit, scopeFilter, category, source } = context;
     const safeLimit = clampInt(limit, 1, 20);
+    this.lastDiagnostics = null;
+    const diagnostics: RetrievalDiagnostics = {
+      source,
+      mode: this.config.mode,
+      originalQuery: query,
+      bm25Query: this.config.mode === "vector" ? null : query,
+      queryExpanded: false,
+      limit: safeLimit,
+      scopeFilter: scopeFilter ? [...scopeFilter] : undefined,
+      category,
+      vectorResultCount: 0,
+      bm25ResultCount: 0,
+      fusedResultCount: 0,
+      finalResultCount: 0,
+      stageCounts: {
+        afterMinScore: 0,
+        rerankInput: 0,
+        afterRerank: 0,
+        afterRecency: 0,
+        afterImportance: 0,
+        afterLengthNorm: 0,
+        afterTimeDecay: 0,
+        afterHardMinScore: 0,
+        afterNoiseFilter: 0,
+        afterDiversity: 0,
+      },
+      dropSummary: [],
+    };
 
-    // Create trace only when stats collector is active (zero overhead otherwise)
-    const trace = this._statsCollector ? new TraceCollector() : undefined;
-
-    // Check if query contains tag prefixes -> use BM25-only + mustContain
-    const tagTokens = this.extractTagTokens(query);
-    let results: RetrievalResult[];
+    try {
+      // Create trace only when stats collector is active (zero overhead otherwise)
+      const trace = this._statsCollector ? new TraceCollector() : undefined;
+
+      // Check if query contains tag prefixes -> use BM25-only + mustContain
+      const tagTokens = this.extractTagTokens(query);
+      let results: RetrievalResult[];
+      if (tagTokens.length > 0) {
+        results = await this.bm25OnlyRetrieval(
+          query,
+          tagTokens,
+          safeLimit,
+          scopeFilter,
+          category,
+          trace,
+          diagnostics,
+        );
+      } else if (this.config.mode === "vector" || !this.store.hasFtsSupport) {
+        results = await this.vectorOnlyRetrieval(
+          query,
+          safeLimit,
+          scopeFilter,
+          category,
+          trace,
+          diagnostics,
+        );
+      } else {
+        results = await this.hybridRetrieval(
+          query,
+          safeLimit,
+          scopeFilter,
+          category,
+          trace,
+          source,
+          diagnostics,
+        );
+      }
 
-    if (tagTokens.length > 0) {
-      results = await this.bm25OnlyRetrieval(
-        query, tagTokens, safeLimit, scopeFilter, category, trace,
-      );
-    } else if (this.config.mode === "vector" || !this.store.hasFtsSupport) {
-      results = await this.vectorOnlyRetrieval(
-        query, safeLimit, scopeFilter, category, trace,
-      );
-    } else {
-      results = await this.hybridRetrieval(
-        query, safeLimit, scopeFilter, category, trace,
-      );
-    }
+      diagnostics.finalResultCount = results.length;
+      diagnostics.dropSummary = buildDropSummary(diagnostics);
+      this.lastDiagnostics = diagnostics;
+
+      if (trace && this._statsCollector) {
+        const mode = tagTokens.length > 0
+          ? "bm25"
+          : (this.config.mode === "vector" || !this.store.hasFtsSupport)
+            ? "vector"
+            : "hybrid";
+        const finalTrace = trace.finalize(query, mode);
+        this._statsCollector.recordQuery(finalTrace, source || "unknown");
+      }
 
-    // Feed completed trace to stats collector
-    if (trace && this._statsCollector) {
-      const mode = tagTokens.length > 0 ? "bm25"
-        : (this.config.mode === "vector" || !this.store.hasFtsSupport) ? "vector" : "hybrid";
-      const finalTrace = trace.finalize(query, mode);
-      this._statsCollector.recordQuery(finalTrace, source || "unknown");
-    }
+      // Record access for reinforcement (manual recall only)
+      if (this.accessTracker && source === "manual" && results.length > 0) {
+        this.accessTracker.recordAccess(results.map((r) => r.entry.id));
+      }
 
-    // Record access for reinforcement (manual recall only)
-    if (this.accessTracker && source === "manual" && results.length > 0) {
-      this.accessTracker.recordAccess(results.map((r) => r.entry.id));
+      return results;
+    } catch (error) {
+      diagnostics.finalResultCount = 0;
+      diagnostics.dropSummary = buildDropSummary(diagnostics);
+      diagnostics.errorMessage =
+        error instanceof Error ? error.message : String(error);
+      this.lastDiagnostics = diagnostics;
+      throw error;
     }
-
-    return results;
   }
 
   /**
@@ -489,63 +719,72 @@ export class MemoryRetriever {
     scopeFilter?: string[],
     category?: string,
     trace?: TraceCollector,
+    diagnostics?: RetrievalDiagnostics,
   ): Promise<RetrievalResult[]> {
-    const queryVector = await this.embedder.embedQuery(query);
-
-    trace?.startStage("vector_search", []);
-    const results = await this.store.vectorSearch(
-      queryVector, limit, this.config.minScore, scopeFilter, { excludeInactive: true },
-    );
-    const filtered = category
-      ? results.filter((r) => r.entry.category === category) : results;
-    const mapped = filtered.map(
-      (result, index) =>
-        ({ ...result, sources: { vector: { score: result.score, rank: index + 1 } } }) as RetrievalResult,
-    );
-    if (trace) {
-      trace.endStage(mapped.map((r) => r.entry.id), mapped.map((r) => r.score));
-    }
-
-    let weighted: RetrievalResult[];
-    if (this.decayEngine) {
-      weighted = mapped;
-    } else {
-      trace?.startStage("recency_boost", mapped.map((r) => r.entry.id));
-      const boosted = this.applyRecencyBoost(mapped);
-      trace?.endStage(boosted.map((r) => r.entry.id), boosted.map((r) => r.score));
-
-      trace?.startStage("importance_weight", boosted.map((r) => r.entry.id));
-      weighted = this.applyImportanceWeight(boosted);
-      trace?.endStage(weighted.map((r) => r.entry.id), weighted.map((r) => r.score));
-    }
-
-    trace?.startStage("length_normalization", weighted.map((r) => r.entry.id));
-    const lengthNormalized = this.applyLengthNormalization(weighted);
-    trace?.endStage(lengthNormalized.map((r) => r.entry.id), lengthNormalized.map((r) => r.score));
-
-    trace?.startStage("hard_cutoff", lengthNormalized.map((r) => r.entry.id));
-    const hardFiltered = lengthNormalized.filter(r => r.score >= this.config.hardMinScore);
-    trace?.endStage(hardFiltered.map((r) => r.entry.id), hardFiltered.map((r) => r.score));
+    let failureStage: RetrievalDiagnostics["failureStage"] = "vector.embedQuery";
+    try {
+      const queryVector = await this.embedder.embedQuery(query);
+      failureStage = "vector.vectorSearch";
+      const results = await this.store.vectorSearch(
+        queryVector,
+        limit,
+        this.config.minScore,
+        scopeFilter,
+        { excludeInactive: true },
+      );
 
-    const decayStageName = this.decayEngine ? "decay_boost" : "time_decay";
-    trace?.startStage(decayStageName, hardFiltered.map((r) => r.entry.id));
-    const lifecycleRanked = this.decayEngine
-      ? this.applyDecayBoost(hardFiltered)
-      : this.applyTimeDecay(hardFiltered);
-    trace?.endStage(lifecycleRanked.map((r) => r.entry.id), lifecycleRanked.map((r) => r.score));
+      const filtered = category
+        ? results.filter((r) => r.entry.category === category)
+        : results;
+      if (diagnostics) {
+        diagnostics.vectorResultCount = filtered.length;
+        diagnostics.fusedResultCount = filtered.length;
+        diagnostics.stageCounts.afterMinScore = filtered.length;
+        diagnostics.stageCounts.rerankInput = filtered.length;
+      }
 
-    trace?.startStage("noise_filter", lifecycleRanked.map((r) => r.entry.id));
-    const denoised = this.config.filterNoise
-      ? filterNoise(lifecycleRanked, r => r.entry.text)
-      : lifecycleRanked;
-    trace?.endStage(denoised.map((r) => r.entry.id), denoised.map((r) => r.score));
+      const mapped = filtered.map(
+        (result, index) =>
+          ({
+            ...result,
+            sources: {
+              vector: { score: result.score, rank: index + 1 },
+            },
+          }) as RetrievalResult,
+      );
 
-    trace?.startStage("mmr_diversity", denoised.map((r) => r.entry.id));
-    const deduplicated = this.applyMMRDiversity(denoised);
-    const finalResults = deduplicated.slice(0, limit);
-    trace?.endStage(finalResults.map((r) => r.entry.id), finalResults.map((r) => r.score));
+      failureStage = "vector.postProcess";
+      const recencyBoosted = this.applyRecencyBoost(mapped);
+      if (diagnostics) diagnostics.stageCounts.afterRecency = recencyBoosted.length;
+      const weighted = this.decayEngine
+        ? recencyBoosted
+        : this.applyImportanceWeight(recencyBoosted);
+      if (diagnostics) diagnostics.stageCounts.afterImportance = weighted.length;
+      const lengthNormalized = this.applyLengthNormalization(weighted);
+      if (diagnostics) diagnostics.stageCounts.afterLengthNorm = lengthNormalized.length;
+      const hardFiltered = lengthNormalized.filter((r) => r.score >= this.config.hardMinScore);
+      if (diagnostics) diagnostics.stageCounts.afterHardMinScore = hardFiltered.length;
+      const timeOrDecayRanked = this.decayEngine
+        ? this.applyDecayBoost(hardFiltered)
+        : this.applyTimeDecay(hardFiltered);
+      if (diagnostics) diagnostics.stageCounts.afterTimeDecay = timeOrDecayRanked.length;
+      const denoised = this.config.filterNoise
+        ? filterNoise(timeOrDecayRanked, (r) => r.entry.text)
+        : timeOrDecayRanked;
+      if (diagnostics) diagnostics.stageCounts.afterNoiseFilter = denoised.length;
+      const deduplicated = this.applyMMRDiversity(denoised);
+      if (diagnostics) {
+        diagnostics.stageCounts.afterRerank = mapped.length;
+        diagnostics.stageCounts.afterDiversity = deduplicated.length;
+      }
 
-    return finalResults;
+      return deduplicated.slice(0, limit);
+    } catch (error) {
+      if (diagnostics) {
+        diagnostics.failureStage = extractFailureStage(error) ?? failureStage;
+      }
+      throw error;
+    }
   }
 
   private async bm25OnlyRetrieval(
@@ -555,61 +794,93 @@ export class MemoryRetriever {
     scopeFilter?: string[],
     category?: string,
     trace?: TraceCollector,
+    diagnostics?: RetrievalDiagnostics,
   ): Promise<RetrievalResult[]> {
     const candidatePoolSize = Math.max(this.config.candidatePoolSize, limit * 2);
 
     trace?.startStage("bm25_search", []);
     const bm25Results = await this.store.bm25Search(
-      query, candidatePoolSize, scopeFilter, { excludeInactive: true },
+      query,
+      candidatePoolSize,
+      scopeFilter,
+      { excludeInactive: true },
     );
     const categoryFiltered = category
-      ? bm25Results.filter((r) => r.entry.category === category) : bm25Results;
+      ? bm25Results.filter((r) => r.entry.category === category)
+      : bm25Results;
     const mustContainFiltered = categoryFiltered.filter((r) => {
       const textLower = r.entry.text.toLowerCase();
       return tagTokens.every((t) => textLower.includes(t.toLowerCase()));
     });
     const mapped = mustContainFiltered.map(
       (result, index) =>
-        ({ ...result, sources: { bm25: { score: result.score, rank: index + 1 } } }) as RetrievalResult,
+        ({
+          ...result,
+          sources: { bm25: { score: result.score, rank: index + 1 } },
+        }) as RetrievalResult,
     );
     trace?.endStage(mapped.map((r) => r.entry.id), mapped.map((r) => r.score));
+    if (diagnostics) {
+      diagnostics.bm25Query = query;
+      diagnostics.bm25ResultCount = mapped.length;
+      diagnostics.fusedResultCount = mapped.length;
+      diagnostics.stageCounts.afterMinScore = mapped.length;
+      diagnostics.stageCounts.rerankInput = mapped.length;
+      diagnostics.stageCounts.afterRerank = mapped.length;
+    }
 
     let temporallyRanked: RetrievalResult[];
     if (this.decayEngine) {
       temporallyRanked = mapped;
+      if (diagnostics) {
+        diagnostics.stageCounts.afterRecency = mapped.length;
+        diagnostics.stageCounts.afterImportance = mapped.length;
+      }
     } else {
       trace?.startStage("recency_boost", mapped.map((r) => r.entry.id));
       const boosted = this.applyRecencyBoost(mapped);
       trace?.endStage(boosted.map((r) => r.entry.id), boosted.map((r) => r.score));
+      if (diagnostics) diagnostics.stageCounts.afterRecency = boosted.length;
 
       trace?.startStage("importance_weight", boosted.map((r) => r.entry.id));
       temporallyRanked = this.applyImportanceWeight(boosted);
-      trace?.endStage(temporallyRanked.map((r) => r.entry.id), temporallyRanked.map((r) => r.score));
+      trace?.endStage(
+        temporallyRanked.map((r) => r.entry.id),
+        temporallyRanked.map((r) => r.score),
+      );
+      if (diagnostics) diagnostics.stageCounts.afterImportance = temporallyRanked.length;
     }
 
     trace?.startStage("length_normalization", temporallyRanked.map((r) => r.entry.id));
     const lengthNormalized = this.applyLengthNormalization(temporallyRanked);
     trace?.endStage(lengthNormalized.map((r) => r.entry.id), lengthNormalized.map((r) => r.score));
+    if (diagnostics) diagnostics.stageCounts.afterLengthNorm = lengthNormalized.length;
 
     trace?.startStage("hard_cutoff", lengthNormalized.map((r) => r.entry.id));
-    const hardFiltered = lengthNormalized.filter(r => r.score >= this.config.hardMinScore);
+    const hardFiltered = lengthNormalized.filter((r) => r.score >= this.config.hardMinScore);
     trace?.endStage(hardFiltered.map((r) => r.entry.id), hardFiltered.map((r) => r.score));
+    if (diagnostics) diagnostics.stageCounts.afterHardMinScore = hardFiltered.length;
 
     const decayStageName = this.decayEngine ? "decay_boost" : "time_decay";
     trace?.startStage(decayStageName, hardFiltered.map((r) => r.entry.id));
     const lifecycleRanked = this.decayEngine
-      ? this.applyDecayBoost(hardFiltered) : this.applyTimeDecay(hardFiltered);
+      ? this.applyDecayBoost(hardFiltered)
+      : this.applyTimeDecay(hardFiltered);
     trace?.endStage(lifecycleRanked.map((r) => r.entry.id), lifecycleRanked.map((r) => r.score));
+    if (diagnostics) diagnostics.stageCounts.afterTimeDecay = lifecycleRanked.length;
 
     trace?.startStage("noise_filter", lifecycleRanked.map((r) => r.entry.id));
     const denoised = this.config.filterNoise
-      ? filterNoise(lifecycleRanked, r => r.entry.text) : lifecycleRanked;
+      ? filterNoise(lifecycleRanked, (r) => r.entry.text)
+      : lifecycleRanked;
     trace?.endStage(denoised.map((r) => r.entry.id), denoised.map((r) => r.score));
+    if (diagnostics) diagnostics.stageCounts.afterNoiseFilter = denoised.length;
 
     trace?.startStage("mmr_diversity", denoised.map((r) => r.entry.id));
     const deduplicated = this.applyMMRDiversity(denoised);
     const finalResults = deduplicated.slice(0, limit);
     trace?.endStage(finalResults.map((r) => r.entry.id), finalResults.map((r) => r.score));
+    if (diagnostics) diagnostics.stageCounts.afterDiversity = deduplicated.length;
 
     return finalResults;
   }
@@ -620,87 +891,150 @@ export class MemoryRetriever {
     scopeFilter?: string[],
     category?: string,
     trace?: TraceCollector,
+    source?: RetrievalContext["source"],
+    diagnostics?: RetrievalDiagnostics,
   ): Promise<RetrievalResult[]> {
-    const candidatePoolSize = Math.max(this.config.candidatePoolSize, limit * 2);
-    const queryVector = await this.embedder.embedQuery(query);
-
-    // Run vector and BM25 searches in parallel.
-    // Trace as a single "parallel_search" stage since both run concurrently —
-    // splitting into separate sequential stages would misrepresent timing.
-    trace?.startStage("parallel_search", []);
-    const [vectorResults, bm25Results] = await Promise.all([
-      this.runVectorSearch(queryVector, candidatePoolSize, scopeFilter, category),
-      this.runBM25Search(query, candidatePoolSize, scopeFilter, category),
-    ]);
-    if (trace) {
-      const allSearchIds = [
-        ...new Set([...vectorResults.map((r) => r.entry.id), ...bm25Results.map((r) => r.entry.id)]),
-      ];
-      const allScores = [...vectorResults.map((r) => r.score), ...bm25Results.map((r) => r.score)];
-      trace.endStage(allSearchIds, allScores);
-    }
+    let failureStage: RetrievalDiagnostics["failureStage"] = "hybrid.embedQuery";
+    try {
+      const candidatePoolSize = Math.max(this.config.candidatePoolSize, limit * 2);
+      const queryVector = await this.embedder.embedQuery(query);
+      const bm25Query = this.buildBM25Query(query, source);
+      if (diagnostics) {
+        diagnostics.bm25Query = bm25Query;
+        diagnostics.queryExpanded = bm25Query !== query;
+      }
 
-    // Fuse results using RRF
-    const allInputIds = [
-      ...new Set([...vectorResults.map((r) => r.entry.id), ...bm25Results.map((r) => r.entry.id)]),
-    ];
-    trace?.startStage("rrf_fusion", allInputIds);
-    const fusedResults = await this.fuseResults(vectorResults, bm25Results);
-    trace?.endStage(fusedResults.map((r) => r.entry.id), fusedResults.map((r) => r.score));
-
-    // Apply minimum score threshold
-    trace?.startStage("min_score_filter", fusedResults.map((r) => r.entry.id));
-    const filtered = fusedResults.filter((r) => r.score >= this.config.minScore);
-    trace?.endStage(filtered.map((r) => r.entry.id), filtered.map((r) => r.score));
-
-    // Rerank if enabled — only emit trace stage when rerank actually runs
-    let reranked: RetrievalResult[];
-    if (this.config.rerank !== "none") {
-      trace?.startStage("rerank", filtered.map((r) => r.entry.id));
-      reranked = await this.rerankResults(query, queryVector, filtered.slice(0, limit * 2));
-      trace?.endStage(reranked.map((r) => r.entry.id), reranked.map((r) => r.score));
-    } else {
-      reranked = filtered;
-    }
+      trace?.startStage("parallel_search", []);
+      failureStage = "hybrid.parallelSearch";
+      const [vectorResults, bm25Results] = await Promise.all([
+        this.runVectorSearch(
+          queryVector,
+          candidatePoolSize,
+          scopeFilter,
+          category,
+        ).catch((error) => {
+          throw attachFailureStage(error, "hybrid.vectorSearch");
+        }),
+        this.runBM25Search(
+          bm25Query,
+          candidatePoolSize,
+          scopeFilter,
+          category,
+        ).catch((error) => {
+          throw attachFailureStage(error, "hybrid.bm25Search");
+        }),
+      ]);
+      if (diagnostics) {
+        diagnostics.vectorResultCount = vectorResults.length;
+        diagnostics.bm25ResultCount = bm25Results.length;
+      }
+      if (trace) {
+        const allSearchIds = [
+          ...new Set([
+            ...vectorResults.map((r) => r.entry.id),
+            ...bm25Results.map((r) => r.entry.id),
+          ]),
+        ];
+        const allScores = [
+          ...vectorResults.map((r) => r.score),
+          ...bm25Results.map((r) => r.score),
+        ];
+        trace.endStage(allSearchIds, allScores);
+      }
 
-    let temporallyRanked: RetrievalResult[];
-    if (this.decayEngine) {
-      temporallyRanked = reranked;
-    } else {
-      trace?.startStage("recency_boost", reranked.map((r) => r.entry.id));
-      const boosted = this.applyRecencyBoost(reranked);
-      trace?.endStage(boosted.map((r) => r.entry.id), boosted.map((r) => r.score));
+      failureStage = "hybrid.fuseResults";
+      const allInputIds = [
+        ...new Set([
+          ...vectorResults.map((r) => r.entry.id),
+          ...bm25Results.map((r) => r.entry.id),
+        ]),
+      ];
+      trace?.startStage("rrf_fusion", allInputIds);
+      const fusedResults = await this.fuseResults(vectorResults, bm25Results);
+      trace?.endStage(fusedResults.map((r) => r.entry.id), fusedResults.map((r) => r.score));
+      if (diagnostics) diagnostics.fusedResultCount = fusedResults.length;
+
+      trace?.startStage("min_score_filter", fusedResults.map((r) => r.entry.id));
+      const filtered = fusedResults.filter((r) => r.score >= this.config.minScore);
+      trace?.endStage(filtered.map((r) => r.entry.id), filtered.map((r) => r.score));
+      if (diagnostics) diagnostics.stageCounts.afterMinScore = filtered.length;
+
+      const rerankInput =
+        this.config.rerank !== "none" ? filtered.slice(0, limit * 2) : filtered;
+      if (diagnostics) diagnostics.stageCounts.rerankInput = rerankInput.length;
+
+      let reranked: RetrievalResult[];
+      failureStage = "hybrid.rerank";
+      if (this.config.rerank !== "none") {
+        trace?.startStage("rerank", filtered.map((r) => r.entry.id));
+        reranked = await this.rerankResults(query, queryVector, rerankInput);
+        trace?.endStage(reranked.map((r) => r.entry.id), reranked.map((r) => r.score));
+      } else {
+        reranked = filtered;
+      }
+      if (diagnostics) diagnostics.stageCounts.afterRerank = reranked.length;
+
+      let temporallyRanked: RetrievalResult[];
+      failureStage = "hybrid.postProcess";
+      if (this.decayEngine) {
+        temporallyRanked = reranked;
+        if (diagnostics) {
+          diagnostics.stageCounts.afterRecency = reranked.length;
+          diagnostics.stageCounts.afterImportance = reranked.length;
+        }
+      } else {
+        trace?.startStage("recency_boost", reranked.map((r) => r.entry.id));
+        const boosted = this.applyRecencyBoost(reranked);
+        trace?.endStage(boosted.map((r) => r.entry.id), boosted.map((r) => r.score));
+        if (diagnostics) diagnostics.stageCounts.afterRecency = boosted.length;
+
+        trace?.startStage("importance_weight", boosted.map((r) => r.entry.id));
+        temporallyRanked = this.applyImportanceWeight(boosted);
+        trace?.endStage(
+          temporallyRanked.map((r) => r.entry.id),
+          temporallyRanked.map((r) => r.score),
+        );
+        if (diagnostics) diagnostics.stageCounts.afterImportance = temporallyRanked.length;
+      }
 
-      trace?.startStage("importance_weight", boosted.map((r) => r.entry.id));
-      temporallyRanked = this.applyImportanceWeight(boosted);
-      trace?.endStage(temporallyRanked.map((r) => r.entry.id), temporallyRanked.map((r) => r.score));
+      trace?.startStage("length_normalization", temporallyRanked.map((r) => r.entry.id));
+      const lengthNormalized = this.applyLengthNormalization(temporallyRanked);
+      trace?.endStage(lengthNormalized.map((r) => r.entry.id), lengthNormalized.map((r) => r.score));
+      if (diagnostics) diagnostics.stageCounts.afterLengthNorm = lengthNormalized.length;
+
+      trace?.startStage("hard_cutoff", lengthNormalized.map((r) => r.entry.id));
+      const hardFiltered = lengthNormalized.filter((r) => r.score >= this.config.hardMinScore);
+      trace?.endStage(hardFiltered.map((r) => r.entry.id), hardFiltered.map((r) => r.score));
+      if (diagnostics) diagnostics.stageCounts.afterHardMinScore = hardFiltered.length;
+
+      const decayStageName = this.decayEngine ? "decay_boost" : "time_decay";
+      trace?.startStage(decayStageName, hardFiltered.map((r) => r.entry.id));
+      const lifecycleRanked = this.decayEngine
+        ? this.applyDecayBoost(hardFiltered)
+        : this.applyTimeDecay(hardFiltered);
+      trace?.endStage(lifecycleRanked.map((r) => r.entry.id), lifecycleRanked.map((r) => r.score));
+      if (diagnostics) diagnostics.stageCounts.afterTimeDecay = lifecycleRanked.length;
+
+      trace?.startStage("noise_filter", lifecycleRanked.map((r) => r.entry.id));
+      const denoised = this.config.filterNoise
+        ? filterNoise(lifecycleRanked, (r) => r.entry.text)
+        : lifecycleRanked;
+      trace?.endStage(denoised.map((r) => r.entry.id), denoised.map((r) => r.score));
+      if (diagnostics) diagnostics.stageCounts.afterNoiseFilter = denoised.length;
+
+      trace?.startStage("mmr_diversity", denoised.map((r) => r.entry.id));
+      const deduplicated = this.applyMMRDiversity(denoised);
+      const finalResults = deduplicated.slice(0, limit);
+      trace?.endStage(finalResults.map((r) => r.entry.id), finalResults.map((r) => r.score));
+      if (diagnostics) diagnostics.stageCounts.afterDiversity = deduplicated.length;
+
+      return finalResults;
+    } catch (error) {
+      if (diagnostics) {
+        diagnostics.failureStage = extractFailureStage(error) ?? failureStage;
+      }
+      throw error;
     }
-
-    trace?.startStage("length_normalization", temporallyRanked.map((r) => r.entry.id));
-    const lengthNormalized = this.applyLengthNormalization(temporallyRanked);
-    trace?.endStage(lengthNormalized.map((r) => r.entry.id), lengthNormalized.map((r) => r.score));
-
-    trace?.startStage("hard_cutoff", lengthNormalized.map((r) => r.entry.id));
-    const hardFiltered = lengthNormalized.filter(r => r.score >= this.config.hardMinScore);
-    trace?.endStage(hardFiltered.map((r) => r.entry.id), hardFiltered.map((r) => r.score));
-
-    const decayStageName = this.decayEngine ? "decay_boost" : "time_decay";
-    trace?.startStage(decayStageName, hardFiltered.map((r) => r.entry.id));
-    const lifecycleRanked = this.decayEngine
-      ? this.applyDecayBoost(hardFiltered) : this.applyTimeDecay(hardFiltered);
-    trace?.endStage(lifecycleRanked.map((r) => r.entry.id), lifecycleRanked.map((r) => r.score));
-
-    trace?.startStage("noise_filter", lifecycleRanked.map((r) => r.entry.id));
-    const denoised = this.config.filterNoise
-      ? filterNoise(lifecycleRanked, r => r.entry.text) : lifecycleRanked;
-    trace?.endStage(denoised.map((r) => r.entry.id), denoised.map((r) => r.score));
-
-    trace?.startStage("mmr_diversity", denoised.map((r) => r.entry.id));
-    const deduplicated = this.applyMMRDiversity(denoised);
-    const finalResults = deduplicated.slice(0, limit);
-    trace?.endStage(finalResults.map((r) => r.entry.id), finalResults.map((r) => r.score));
-
-    return finalResults;
   }
 
   private async runVectorSearch(
@@ -747,6 +1081,15 @@ export class MemoryRetriever {
     }));
   }
 
+  private buildBM25Query(
+    query: string,
+    source?: RetrievalContext["source"],
+  ): string {
+    if (!this.config.queryExpansion) return query;
+    if (source !== "manual" && source !== "cli") return query;
+    return expandQuery(query);
+  }
+
   private async fuseResults(
     vectorResults: Array<MemorySearchResult & { rank: number }>,
     bm25Results: Array<MemorySearchResult & { rank: number }>,
@@ -840,9 +1183,11 @@ export class MemoryRetriever {
     }
 
     // Try cross-encoder rerank via configured provider API
-    if (this.config.rerank === "cross-encoder" && this.config.rerankApiKey) {
+    const provider = this.config.rerankProvider || "jina";
+    const hasApiKey = !!this.config.rerankApiKey;
+
+    if (this.config.rerank === "cross-encoder" && hasApiKey) {
       try {
-        const provider = this.config.rerankProvider || "jina";
         const model = this.config.rerankModel || "jina-reranker-v3";
         const endpoint =
           this.config.rerankEndpoint || "https://api.jina.ai/v1/rerank";
@@ -851,7 +1196,7 @@ export class MemoryRetriever {
         // Build provider-specific request
         const { headers, body } = buildRerankRequest(
           provider,
-          this.config.rerankApiKey,
+          this.config.rerankApiKey || "",
           model,
           query,
           documents,
@@ -1245,6 +1590,20 @@ export class MemoryRetriever {
     return { ...this.config };
   }
 
+  getLastDiagnostics(): RetrievalDiagnostics | null {
+    if (!this.lastDiagnostics) return null;
+    return {
+      ...this.lastDiagnostics,
+      scopeFilter: this.lastDiagnostics.scopeFilter
+        ? [...this.lastDiagnostics.scopeFilter]
+        : undefined,
+      stageCounts: { ...this.lastDiagnostics.stageCounts },
+      dropSummary: this.lastDiagnostics.dropSummary.map((drop) => ({
+        ...drop,
+      })),
+    };
+  }
+
   // Test retrieval system
   async test(query = "test query"): Promise<{
     success: boolean;
diff --git a/test/query-expander.test.mjs b/test/query-expander.test.mjs
new file mode 100644
index 00000000..6035281d
--- /dev/null
+++ b/test/query-expander.test.mjs
@@ -0,0 +1,774 @@
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import path from "node:path";
+import { fileURLToPath } from "node:url";
+import { Command } from "commander";
+import jitiFactory from "jiti";
+
+const testDir = path.dirname(fileURLToPath(import.meta.url));
+const pluginSdkStubPath = path.resolve(testDir, "helpers", "openclaw-plugin-sdk-stub.mjs");
+const jiti = jitiFactory(import.meta.url, {
+  interopDefault: true,
+  alias: {
+    "openclaw/plugin-sdk": pluginSdkStubPath,
+  },
+});
+
+const { expandQuery } = jiti("../src/query-expander.ts");
+const { createRetriever } = jiti("../src/retriever.ts");
+const { createMemoryCLI } = jiti("../cli.ts");
+
+function buildResult(id = "memory-1", text = "服务崩溃 error") {
+  return {
+    entry: {
+      id,
+      text,
+      vector: [0.1, 0.2, 0.3],
+      category: "other",
+      scope: "global",
+      importance: 0.7,
+      timestamp: 1700000000000,
+      metadata: "{}",
+    },
+    score: 0.9,
+  };
+}
+
+describe("query expander", () => {
+  it("expands colloquial Chinese crash queries with technical BM25 terms", () => {
+    const expanded = expandQuery("服务挂了");
+    assert.notEqual(expanded, "服务挂了");
+    assert.match(expanded, /崩溃/);
+    assert.match(expanded, /crash/);
+    assert.match(expanded, /报错|error/);
+  });
+
+  it("avoids english substring false positives", () => {
+    assert.equal(expandQuery("memorybank retention"), "memorybank retention");
+    assert.equal(expandQuery("configurable loader"), "configurable loader");
+  });
+});
+
+describe("retriever BM25 query expansion gating", () => {
+  function createRetrieverHarness(
+    config = {},
+    storeOverrides = {},
+    embedderOverrides = {},
+  ) {
+    const bm25Queries = [];
+    const embeddedQueries = [];
+
+    const retriever = createRetriever(
+      {
+        hasFtsSupport: true,
+        async vectorSearch() {
+          return [];
+        },
+        async bm25Search(query) {
+          bm25Queries.push(query);
+          return [buildResult()];
+        },
+        async hasId() {
+          return true;
+        },
+        ...storeOverrides,
+      },
+      {
+        async embedQuery(query) {
+          embeddedQueries.push(query);
+          return [0.1, 0.2, 0.3];
+        },
+        ...embedderOverrides,
+      },
+      {
+        rerank: "none",
+        filterNoise: false,
+        minScore: 0,
+        hardMinScore: 0,
+        candidatePoolSize: 5,
+        ...config,
+      },
+    );
+
+    return { retriever, bm25Queries, embeddedQueries };
+  }
+
+  it("expands only the BM25 leg for manual retrieval", async () => {
+    const { retriever, bm25Queries, embeddedQueries } = createRetrieverHarness();
+
+    const results = await retriever.retrieve({
+      query: "服务挂了",
+      limit: 1,
+      source: "manual",
+    });
+
+    assert.equal(results.length, 1);
+    assert.deepEqual(embeddedQueries, ["服务挂了"]);
+    assert.equal(bm25Queries.length, 1);
+    assert.notEqual(bm25Queries[0], "服务挂了");
+    assert.match(bm25Queries[0], /crash/);
+    assert.deepEqual(retriever.getLastDiagnostics(), {
+      source: "manual",
+      mode: "hybrid",
+      originalQuery: "服务挂了",
+      bm25Query: bm25Queries[0],
+      queryExpanded: true,
+      limit: 1,
+      scopeFilter: undefined,
+      category: undefined,
+      vectorResultCount: 0,
+      bm25ResultCount: 1,
+      fusedResultCount: 1,
+      finalResultCount: 1,
+      stageCounts: {
+        afterMinScore: 1,
+        rerankInput: 1,
+        afterRerank: 1,
+        afterRecency: 1,
+        afterImportance: 1,
+        afterLengthNorm: 1,
+        afterTimeDecay: 1,
+        afterHardMinScore: 1,
+        afterNoiseFilter: 1,
+        afterDiversity: 1,
+      },
+      dropSummary: [],
+    });
+  });
+
+  it("keeps auto-recall and unspecified retrieval on the original query", async () => {
+    const autoRecallHarness = createRetrieverHarness();
+    await autoRecallHarness.retriever.retrieve({
+      query: "服务挂了",
+      limit: 1,
+      source: "auto-recall",
+    });
+    assert.deepEqual(autoRecallHarness.bm25Queries, ["服务挂了"]);
+
+    const unspecifiedHarness = createRetrieverHarness();
+    await unspecifiedHarness.retriever.retrieve({
+      query: "服务挂了",
+      limit: 1,
+    });
+    assert.deepEqual(unspecifiedHarness.bm25Queries, ["服务挂了"]);
+  });
+
+  it("honors retrieval.queryExpansion = false", async () => {
+    const { retriever, bm25Queries } = createRetrieverHarness({
+      queryExpansion: false,
+    });
+
+    await retriever.retrieve({
+      query: "服务挂了",
+      limit: 1,
+      source: "manual",
+    });
+
+    assert.deepEqual(bm25Queries, ["服务挂了"]);
+  });
+
+  it("summarizes the biggest count drops without changing retrieval behavior", async () => {
+    const { retriever } = createRetrieverHarness(
+      {
+        rerank: "none",
+        filterNoise: false,
+        minScore: 0,
+        hardMinScore: 0,
+      },
+      {
+        async bm25Search() {
+          return [
+            buildResult("memory-1", "故障一"),
+            buildResult("memory-2", "故障二"),
+            buildResult("memory-3", "故障三"),
+          ];
+        },
+      },
+    );
+
+    const results = await retriever.retrieve({
+      query: "普通查询",
+      limit: 1,
+      source: "manual",
+    });
+
+    assert.equal(results.length, 1);
+    assert.deepEqual(retriever.getLastDiagnostics()?.dropSummary, [
+      {
+        stage: "limit",
+        before: 3,
+        after: 1,
+        dropped: 2,
+      },
+    ]);
+  });
+
+  it("captures partial diagnostics when retrieval fails before search completes", async () => {
+    const { retriever } = createRetrieverHarness(
+      {},
+      {},
+      {
+        async embedQuery() {
+          throw new Error("simulated embed failure");
+        },
+      },
+    );
+
+    await assert.rejects(
+      retriever.retrieve({
+        query: "服务挂了",
+        limit: 1,
+        source: "manual",
+      }),
+      /simulated embed failure/,
+    );
+
+    assert.deepEqual(retriever.getLastDiagnostics(), {
+      source: "manual",
+      mode: "hybrid",
+      originalQuery: "服务挂了",
+      bm25Query: "服务挂了",
+      queryExpanded: false,
+      limit: 1,
+      scopeFilter: undefined,
+      category: undefined,
+      vectorResultCount: 0,
+      bm25ResultCount: 0,
+      fusedResultCount: 0,
+      finalResultCount: 0,
+      stageCounts: {
+        afterMinScore: 0,
+        rerankInput: 0,
+        afterRerank: 0,
+        afterRecency: 0,
+        afterImportance: 0,
+        afterLengthNorm: 0,
+        afterTimeDecay: 0,
+        afterHardMinScore: 0,
+        afterNoiseFilter: 0,
+        afterDiversity: 0,
+      },
+      dropSummary: [],
+      failureStage: "hybrid.embedQuery",
+      errorMessage: "simulated embed failure",
+    });
+  });
+
+  it("distinguishes vector-search failures inside the hybrid parallel stage", async () => {
+    const { retriever } = createRetrieverHarness(
+      {},
+      {
+        async vectorSearch() {
+          throw new Error("simulated vector search failure");
+        },
+      },
+    );
+
+    await assert.rejects(
+      retriever.retrieve({
+        query: "普通查询",
+        limit: 1,
+        source: "manual",
+      }),
+      /simulated vector search failure/,
+    );
+
+    assert.equal(
+      retriever.getLastDiagnostics()?.failureStage,
+      "hybrid.vectorSearch",
+    );
+    assert.equal(
+      retriever.getLastDiagnostics()?.errorMessage,
+      "simulated vector search failure",
+    );
+  });
+
+  it("distinguishes bm25-search failures inside the hybrid parallel stage", async () => {
+    const { retriever } = createRetrieverHarness(
+      {},
+      {
+        async bm25Search() {
+          throw new Error("simulated bm25 search failure");
+        },
+      },
+    );
+
+    await assert.rejects(
+      retriever.retrieve({
+        query: "普通查询",
+        limit: 1,
+        source: "manual",
+      }),
+      /simulated bm25 search failure/,
+    );
+
+    assert.equal(
+      retriever.getLastDiagnostics()?.failureStage,
+      "hybrid.bm25Search",
+    );
+    assert.equal(
+      retriever.getLastDiagnostics()?.errorMessage,
+      "simulated bm25 search failure",
+    );
+  });
+});
+
+describe("cli search source tagging", () => {
+  it("marks search requests as cli so query expansion stays scoped to interactive CLI recall", async () => {
+    const searchCalls = [];
+    const logs = [];
+
+    const program = new Command();
+    program.exitOverride();
+    createMemoryCLI({
+      store: {
+        async list() {
+          return [];
+        },
+        async stats() {
+          return {
+            totalCount: 0,
+            scopeCounts: {},
+            categoryCounts: {},
+          };
+        },
+      },
+      retriever: {
+        async retrieve(params) {
+          searchCalls.push(params);
+          return [
+            {
+              ...buildResult("memory-cli", "CLI search hit"),
+              sources: {
+                vector: { score: 0.9, rank: 1 },
+              },
+            },
+          ];
+        },
+        getConfig() {
+          return { mode: "hybrid" };
+        },
+        getLastDiagnostics() {
+          return {
+            source: "cli",
+            mode: "hybrid",
+            originalQuery: "服务挂了",
+            bm25Query: "服务挂了 崩溃 crash error 报错 宕机",
+            queryExpanded: true,
+            limit: 10,
+            scopeFilter: undefined,
+            category: undefined,
+            vectorResultCount: 0,
+            bm25ResultCount: 3,
+            fusedResultCount: 3,
+            finalResultCount: 1,
+            stageCounts: {
+              afterMinScore: 3,
+              rerankInput: 3,
+              afterRerank: 3,
+              afterRecency: 3,
+              afterImportance: 3,
+              afterLengthNorm: 3,
+              afterTimeDecay: 3,
+              afterHardMinScore: 3,
+              afterNoiseFilter: 3,
+              afterDiversity: 3,
+            },
+            dropSummary: [
+              {
+                stage: "limit",
+                before: 3,
+                after: 1,
+                dropped: 2,
+              },
+            ],
+          };
+        },
+      },
+      scopeManager: {
+        getStats() {
+          return { totalScopes: 1 };
+        },
+      },
+      migrator: {},
+    })({ program });
+
+    const origLog = console.log;
+    console.log = (...args) => logs.push(args.join(" "));
+    try {
+      await program.parseAsync([
+        "node",
+        "openclaw",
+        "memory-pro",
+        "search",
+        "服务挂了",
+        "--debug",
+      ]);
+    } finally {
+      console.log = origLog;
+    }
+
+    assert.equal(searchCalls.length, 1);
+    assert.equal(searchCalls[0].source, "cli");
+    assert.match(logs.join("\n"), /Retrieval diagnostics:/);
+    assert.match(logs.join("\n"), /Original query: 服务挂了/);
+    assert.match(logs.join("\n"), /BM25 query: 服务挂了 崩溃 crash error 报错 宕机/);
+    assert.match(logs.join("\n"), /Stages: min=3, rerankIn=3, rerank=3, hard=3, noise=3, diversity=3/);
+    assert.match(logs.join("\n"), /Drops: limit -2 \(3->1\)/);
+    assert.match(logs.join("\n"), /CLI search hit/);
+  });
+
+  it("prints failure diagnostics on debug search errors", async () => {
+    const logs = [];
+    const errors = [];
+    const exitCalls = [];
+
+    const program = new Command();
+    program.exitOverride();
+    createMemoryCLI({
+      store: {
+        async list() {
+          return [];
+        },
+        async stats() {
+          return {
+            totalCount: 0,
+            scopeCounts: {},
+            categoryCounts: {},
+          };
+        },
+      },
+      retriever: {
+        async retrieve() {
+          throw new Error("simulated search failure");
+        },
+        getConfig() {
+          return { mode: "hybrid" };
+        },
+        getLastDiagnostics() {
+          return {
+            source: "cli",
+            mode: "hybrid",
+            originalQuery: "服务挂了",
+            bm25Query: "服务挂了 崩溃 crash error 报错 宕机",
+            queryExpanded: true,
+            limit: 10,
+            scopeFilter: undefined,
+            category: undefined,
+            vectorResultCount: 0,
+            bm25ResultCount: 0,
+            fusedResultCount: 0,
+            finalResultCount: 0,
+            stageCounts: {
+              afterMinScore: 0,
+              rerankInput: 0,
+              afterRerank: 0,
+              afterRecency: 0,
+              afterImportance: 0,
+              afterLengthNorm: 0,
+              afterTimeDecay: 0,
+              afterHardMinScore: 0,
+              afterNoiseFilter: 0,
+              afterDiversity: 0,
+            },
+            dropSummary: [],
+            failureStage: "hybrid.embedQuery",
+            errorMessage: "simulated search failure",
+          };
+        },
+      },
+      scopeManager: {
+        getStats() {
+          return { totalScopes: 1 };
+        },
+      },
+      migrator: {},
+    })({ program });
+
+    const origLog = console.log;
+    const origError = console.error;
+    const origExit = process.exit;
+    console.log = (...args) => logs.push(args.join(" "));
+    console.error = (...args) => errors.push(args.join(" "));
+    process.exit = ((code = 0) => {
+      exitCalls.push(Number(code));
+      throw new Error(`__TEST_EXIT__${code}`);
+    });
+    try {
+      await assert.rejects(
+        program.parseAsync([
+          "node",
+          "openclaw",
+          "memory-pro",
+          "search",
+          "服务挂了",
+          "--debug",
+        ]),
+        /__TEST_EXIT__1/,
+      );
+    } finally {
+      console.log = origLog;
+      console.error = origError;
+      process.exit = origExit;
+    }
+
+    assert.deepEqual(logs, []);
+    assert.deepEqual(exitCalls, [1]);
+    assert.match(errors.join("\n"), /Retrieval diagnostics:/);
+    assert.match(errors.join("\n"), /Failure stage: hybrid\.embedQuery/);
+    assert.match(errors.join("\n"), /Error: simulated search failure/);
+    assert.match(errors.join("\n"), /Search failed:/);
+  });
+
+  it("returns structured JSON failure output for --json --debug search errors", async () => {
+    const logs = [];
+    const errors = [];
+    const exitCalls = [];
+
+    const program = new Command();
+    program.exitOverride();
+    createMemoryCLI({
+      store: {
+        async list() {
+          return [];
+        },
+        async stats() {
+          return {
+            totalCount: 0,
+            scopeCounts: {},
+            categoryCounts: {},
+          };
+        },
+      },
+      retriever: {
+        async retrieve() {
+          throw new Error("simulated json search failure");
+        },
+        getConfig() {
+          return { mode: "hybrid" };
+        },
+        getLastDiagnostics() {
+          return {
+            source: "cli",
+            mode: "hybrid",
+            originalQuery: "服务挂了",
+            bm25Query: "服务挂了 崩溃 crash error 报错 宕机",
+            queryExpanded: true,
+            limit: 10,
+            scopeFilter: undefined,
+            category: undefined,
+            vectorResultCount: 0,
+            bm25ResultCount: 0,
+            fusedResultCount: 0,
+            finalResultCount: 0,
+            stageCounts: {
+              afterMinScore: 0,
+              rerankInput: 0,
+              afterRerank: 0,
+              afterRecency: 0,
+              afterImportance: 0,
+              afterLengthNorm: 0,
+              afterTimeDecay: 0,
+              afterHardMinScore: 0,
+              afterNoiseFilter: 0,
+              afterDiversity: 0,
+            },
+            dropSummary: [],
+            failureStage: "hybrid.embedQuery",
+            errorMessage: "simulated json search failure",
+          };
+        },
+      },
+      scopeManager: {
+        getStats() {
+          return { totalScopes: 1 };
+        },
+      },
+      migrator: {},
+    })({ program });
+
+    const origLog = console.log;
+    const origError = console.error;
+    const origExit = process.exit;
+    console.log = (...args) => logs.push(args.join(" "));
+    console.error = (...args) => errors.push(args.join(" "));
+    process.exit = ((code = 0) => {
+      exitCalls.push(Number(code));
+      throw new Error(`__TEST_EXIT__${code}`);
+    });
+    try {
+      await assert.rejects(
+        program.parseAsync([
+          "node",
+          "openclaw",
+          "memory-pro",
+          "search",
+          "服务挂了",
+          "--json",
+          "--debug",
+        ]),
+        /__TEST_EXIT__1/,
+      );
+    } finally {
+      console.log = origLog;
+      console.error = origError;
+      process.exit = origExit;
+    }
+
+    assert.deepEqual(exitCalls, [1]);
+    assert.deepEqual(errors, []);
+    assert.equal(logs.length, 1);
+    const payload = JSON.parse(logs[0]);
+    assert.deepEqual(payload, {
+      error: {
+        code: "search_failed",
+        message: "simulated json search failure",
+      },
+      diagnostics: {
+        source: "cli",
+        mode: "hybrid",
+        originalQuery: "服务挂了",
+        bm25Query: "服务挂了 崩溃 crash error 报错 宕机",
+        queryExpanded: true,
+        limit: 10,
+        vectorResultCount: 0,
+        bm25ResultCount: 0,
+        fusedResultCount: 0,
+        finalResultCount: 0,
+        stageCounts: {
+          afterMinScore: 0,
+          rerankInput: 0,
+          afterRerank: 0,
+          afterRecency: 0,
+          afterImportance: 0,
+          afterLengthNorm: 0,
+          afterTimeDecay: 0,
+          afterHardMinScore: 0,
+          afterNoiseFilter: 0,
+          afterDiversity: 0,
+        },
+        dropSummary: [],
+        failureStage: "hybrid.embedQuery",
+        errorMessage: "simulated json search failure",
+      },
+    });
+  });
+
+  it("uses diagnostics from the ephemeral search retriever on --json --debug failures", async () => {
+    const logs = [];
+    const errors = [];
+    const exitCalls = [];
+
+    const program = new Command();
+    program.exitOverride();
+    createMemoryCLI({
+      store: {
+        hasFtsSupport: true,
+        async vectorSearch() {
+          throw new Error("vector search should not run after embed failure");
+        },
+        async bm25Search() {
+          throw new Error("bm25 search should not run after embed failure");
+        },
+        async hasId() {
+          return true;
+        },
+      },
+      retriever: {
+        async retrieve() {
+          throw new Error("context retriever should not be used when embedder is present");
+        },
+        getConfig() {
+          return {
+            mode: "hybrid",
+            rerank: "none",
+            filterNoise: false,
+            minScore: 0,
+            hardMinScore: 0,
+            candidatePoolSize: 5,
+          };
+        },
+        getLastDiagnostics() {
+          return null;
+        },
+      },
+      embedder: {
+        async embedQuery() {
+          throw new Error("simulated embedded search failure");
+        },
+      },
+      scopeManager: {
+        getStats() {
+          return { totalScopes: 1 };
+        },
+      },
+      migrator: {},
+    })({ program });
+
+    const origLog = console.log;
+    const origError = console.error;
+    const origExit = process.exit;
+    console.log = (...args) => logs.push(args.join(" "));
+    console.error = (...args) => errors.push(args.join(" "));
+    process.exit = ((code = 0) => {
+      exitCalls.push(Number(code));
+      throw new Error(`__TEST_EXIT__${code}`);
+    });
+    try {
+      await assert.rejects(
+        program.parseAsync([
+          "node",
+          "openclaw",
+          "memory-pro",
+          "search",
+          "服务挂了",
+          "--json",
+          "--debug",
+        ]),
+        /__TEST_EXIT__1/,
+      );
+    } finally {
+      console.log = origLog;
+      console.error = origError;
+      process.exit = origExit;
+    }
+
+    assert.deepEqual(exitCalls, [1]);
+    assert.deepEqual(errors, []);
+    assert.equal(logs.length, 1);
+    const payload = JSON.parse(logs[0]);
+    assert.deepEqual(payload, {
+      error: {
+        code: "search_failed",
+        message: "simulated embedded search failure",
+      },
+      diagnostics: {
+        source: "cli",
+        mode: "hybrid",
+        originalQuery: "服务挂了",
+        bm25Query: "服务挂了",
+        queryExpanded: false,
+        limit: 10,
+        vectorResultCount: 0,
+        bm25ResultCount: 0,
+        fusedResultCount: 0,
+        finalResultCount: 0,
+        stageCounts: {
+          afterMinScore: 0,
+          rerankInput: 0,
+          afterRerank: 0,
+          afterRecency: 0,
+          afterImportance: 0,
+          afterLengthNorm: 0,
+          afterTimeDecay: 0,
+          afterHardMinScore: 0,
+          afterNoiseFilter: 0,
+          afterDiversity: 0,
+        },
+        dropSummary: [],
+        failureStage: "hybrid.embedQuery",
+        errorMessage: "simulated embedded search failure",
+      },
+    });
+  });
+});