From 91301e3534892a5436229e1d849c856fd076e863 Mon Sep 17 00:00:00 2001 From: Tirth Kanani Date: Sun, 14 Jun 2026 18:17:47 +0100 Subject: [PATCH 1/2] fix(language-lesson): drop over-broad "is" keyword that mislabels nodes with the type-guards concept The "type guards" concept pattern included the bare keyword "is", and detectLanguageConcepts matches via unbounded substring includes(). The 2-char substring "is" appears inside common English words (this, list, exists, analysis), so nearly every node was tagged with "type guards", polluting the concepts passed to the LLM lesson prompt. Remove the "is" token and rely on the more specific existing keywords ("type guard", "narrowing", "discriminated union"). Co-Authored-By: Claude Opus 4.8 (1M context) --- .../core/src/__tests__/language-lesson.test.ts | 14 ++++++++++++++ .../packages/core/src/analyzer/language-lesson.ts | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/understand-anything-plugin/packages/core/src/__tests__/language-lesson.test.ts b/understand-anything-plugin/packages/core/src/__tests__/language-lesson.test.ts index 19ebb0bcf..5c1b5c464 100644 --- a/understand-anything-plugin/packages/core/src/__tests__/language-lesson.test.ts +++ b/understand-anything-plugin/packages/core/src/__tests__/language-lesson.test.ts @@ -140,6 +140,20 @@ describe("language-lesson", () => { expect(concepts).toContain("middleware pattern"); }); + it("does not flag type guards from the 'is' substring in common words", () => { + const plainNode: GraphNode = { + id: "function:math:add", + type: "function", + name: "add", + filePath: "src/math/add.ts", + summary: "This function adds two numbers", + tags: ["utility"], + complexity: "simple", + }; + const concepts = detectLanguageConcepts(plainNode, "typescript"); + expect(concepts).not.toContain("type guards"); + }); + it("returns empty for nodes with no detectable concepts", () => { const plainNode: GraphNode = { id: "file:src/config.ts", diff --git a/understand-anything-plugin/packages/core/src/analyzer/language-lesson.ts b/understand-anything-plugin/packages/core/src/analyzer/language-lesson.ts index 53fcc01a4..cab4d4dfb 100644 --- a/understand-anything-plugin/packages/core/src/analyzer/language-lesson.ts +++ b/understand-anything-plugin/packages/core/src/analyzer/language-lesson.ts @@ -24,7 +24,7 @@ const BASE_CONCEPT_PATTERNS: Record = { "listener", ], "singleton": ["singleton", "instance", "shared client"], - "type guards": ["type guard", "is", "narrowing", "discriminated union"], + "type guards": ["type guard", "narrowing", "discriminated union"], "higher-order functions": [ "callback", "factory", From c05a5ae0156bec7692aef92697b6da2d7c82ef61 Mon Sep 17 00:00:00 2001 From: Tirth Kanani Date: Tue, 16 Jun 2026 23:32:11 +0100 Subject: [PATCH 2/2] fix(core): drop over-broad '@' and 'di' concept keywords The same unbounded substring-match false-positive class that motivated removing 'is' also affects other short/symbol keywords. Drop '@' from the decorators pattern (matched any JSDoc @param/@returns or email in a summary) and 'di' from dependency injection (matched audio/edit/directory/modifies/ loading/reading). Both concepts still detect via their specific keywords (decorator/annotation; inject/provider/container). Document the substring over-match limitation above BASE_CONCEPT_PATTERNS for the remaining 'pipe' case and a future word-boundary matcher. Expand detectLanguageConcepts coverage: table-drive the 'is'-substring negatives ('This function...', 'Persists data...', 'Renders a list...'), add a positive type-guard case, and lock in the '@'/'di' removals with both a prose-substring negative and a specific-keyword positive each. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/__tests__/language-lesson.test.ts | 97 +++++++++++++++++-- .../core/src/analyzer/language-lesson.ts | 14 ++- 2 files changed, 101 insertions(+), 10 deletions(-) diff --git a/understand-anything-plugin/packages/core/src/__tests__/language-lesson.test.ts b/understand-anything-plugin/packages/core/src/__tests__/language-lesson.test.ts index 5c1b5c464..10801344f 100644 --- a/understand-anything-plugin/packages/core/src/__tests__/language-lesson.test.ts +++ b/understand-anything-plugin/packages/core/src/__tests__/language-lesson.test.ts @@ -140,18 +140,99 @@ describe("language-lesson", () => { expect(concepts).toContain("middleware pattern"); }); - it("does not flag type guards from the 'is' substring in common words", () => { - const plainNode: GraphNode = { - id: "function:math:add", + it.each([ + "This function adds two numbers", + "Persists data to disk", + "Renders a list of items", + ])( + "does not flag type guards from the 'is' substring in common prose: %s", + (summary) => { + const plainNode: GraphNode = { + id: "function:misc:fn", + type: "function", + name: "fn", + filePath: "src/misc/fn.ts", + summary, + tags: ["utility"], + complexity: "simple", + }; + const concepts = detectLanguageConcepts(plainNode, "typescript"); + expect(concepts).not.toContain("type guards"); + }, + ); + + it("still detects type guards when the summary genuinely describes one", () => { + const guardNode: GraphNode = { + id: "function:guards:isUser", + type: "function", + name: "isUser", + filePath: "src/guards/isUser.ts", + summary: "Type guard that narrows the value to a User", + tags: ["validation"], + complexity: "simple", + }; + const concepts = detectLanguageConcepts(guardNode, "typescript"); + expect(concepts).toContain("type guards"); + }); + + it("detects decorators from a specific keyword, not from '@' in prose", () => { + // '@' was removed from the decorators pattern because it matched any + // JSDoc `@param`/`@returns` fragment or email in a summary. + const jsdocNode: GraphNode = { + id: "function:util:format", type: "function", - name: "add", - filePath: "src/math/add.ts", - summary: "This function adds two numbers", + name: "format", + filePath: "src/util/format.ts", + summary: "Formats a value. @param input the raw value @returns text", tags: ["utility"], complexity: "simple", }; - const concepts = detectLanguageConcepts(plainNode, "typescript"); - expect(concepts).not.toContain("type guards"); + expect( + detectLanguageConcepts(jsdocNode, "typescript"), + ).not.toContain("decorators"); + + const decoratorNode: GraphNode = { + id: "class:http:controller", + type: "class", + name: "Controller", + filePath: "src/http/controller.ts", + summary: "Uses a decorator to register the route handler", + tags: ["http"], + complexity: "moderate", + }; + expect( + detectLanguageConcepts(decoratorNode, "typescript"), + ).toContain("decorators"); + }); + + it("detects dependency injection from a specific keyword, not from 'di' in prose", () => { + // 'di' was removed because it matched "audio", "edit", "directory", + // "modifies", "loading", "reading", etc. + const diSubstringNode: GraphNode = { + id: "function:fs:readDirectory", + type: "function", + name: "readDirectory", + filePath: "src/fs/readDirectory.ts", + summary: "Reads and modifies a directory while loading audio files", + tags: ["fs"], + complexity: "simple", + }; + expect( + detectLanguageConcepts(diSubstringNode, "typescript"), + ).not.toContain("dependency injection"); + + const diNode: GraphNode = { + id: "class:di:service", + type: "class", + name: "Service", + filePath: "src/di/service.ts", + summary: "Resolves dependencies from the injection container", + tags: ["inject"], + complexity: "moderate", + }; + expect( + detectLanguageConcepts(diNode, "typescript"), + ).toContain("dependency injection"); }); it("returns empty for nodes with no detectable concepts", () => { diff --git a/understand-anything-plugin/packages/core/src/analyzer/language-lesson.ts b/understand-anything-plugin/packages/core/src/analyzer/language-lesson.ts index cab4d4dfb..fd3197c6e 100644 --- a/understand-anything-plugin/packages/core/src/analyzer/language-lesson.ts +++ b/understand-anything-plugin/packages/core/src/analyzer/language-lesson.ts @@ -9,13 +9,23 @@ export interface LanguageLessonResult { /** * Base concept patterns that apply across all languages. * These are merged with language-specific concepts from LanguageConfig. + * + * NOTE: Detection uses unbounded substring matching (`text.includes(keyword)`), + * so very short or symbol-only keywords over-match prose. Examples that were + * removed for this reason: "@" (matched every JSDoc `@param`/`@returns` and any + * email in a summary) and "di" (matched "audio", "edit", "directory", + * "modifies", "loading", "reading"). Keep keywords specific enough that a plain + * substring hit is a strong signal. A word-boundary matcher would let us safely + * reintroduce short tokens — see follow-up issue. Until then, prefer adding a + * longer distinctive keyword over a 1-2 char fragment. (`"pipe"` still matches + * "pipeline" for "middleware pattern"/"streams" — noted for the same follow-up.) */ const BASE_CONCEPT_PATTERNS: Record = { "async/await": ["async", "await", "promise", "asynchronous"], "middleware pattern": ["middleware", "interceptor", "pipe"], "generics": ["generic", "type parameter", "template"], - "decorators": ["decorator", "@", "annotation"], - "dependency injection": ["inject", "provider", "container", "di"], + "decorators": ["decorator", "annotation"], + "dependency injection": ["inject", "provider", "container"], "observer pattern": [ "subscribe", "publish",