From 6c26fa709e2a2b486e8ff81f09fadf58f146b0dd Mon Sep 17 00:00:00 2001
From: perf3ct <jonfuller2012@gmail.com>
Date: Wed, 27 Aug 2025 21:11:44 +0000
Subject: [PATCH 1/3] feat(quick_search): just fuzzy match note titles for
 larger notes, while still matching on exact strings

---
 .../expressions/note_content_fulltext.ts      | 31 +++++++++++++++++++
 .../src/services/search/utils/text_utils.ts   |  2 ++
 2 files changed, 33 insertions(+)

diff --git a/apps/server/src/services/search/expressions/note_content_fulltext.ts b/apps/server/src/services/search/expressions/note_content_fulltext.ts
index f1e1bf95ff..ac2c882027 100644
--- a/apps/server/src/services/search/expressions/note_content_fulltext.ts
+++ b/apps/server/src/services/search/expressions/note_content_fulltext.ts
@@ -119,7 +119,38 @@ class NoteContentFulltextExp extends Expression {
             return; // Content too large or invalid
         }
         content = processedContent;
+        
+        // Check if this is a large note that needs optimized search strategy
+        const wordCount = content.split(/\s+/).length;
+        const isLargeNote = wordCount > FUZZY_SEARCH_CONFIG.LARGE_NOTE_THRESHOLD;
+        const isFuzzyOperator = this.operator === "~=" || this.operator === "~*";
+        
+        // For large notes with fuzzy operators, switch to optimized strategy
+        if (isLargeNote && isFuzzyOperator) {
+            const note = becca.notes[noteId];
+            const title = note.title || "";
+            
+            log.info(`Note ${noteId} has ${wordCount} words - using optimized search (fuzzy on title, exact on content)`);
+            
+            // Perform fuzzy search on title
+            const titleMatches = this.fuzzyMatchToken(normalizeSearchText(this.tokens[0]), normalizeSearchText(title));
+            
+            // Perform exact match on content for all tokens
+            const contentMatches = this.tokens.every(token => {
+                const normalizedToken = normalizeSearchText(token);
+                const normalizedContent = normalizeSearchText(content);
+                return normalizedContent.includes(normalizedToken);
+            });
+            
+            // Add to results if either title matches with fuzzy or content matches exactly
+            if (titleMatches || contentMatches) {
+                resultNoteSet.add(becca.notes[noteId]);
+            }
+            
+            return content;
+        }
 
+        // Standard search logic for non-large notes or non-fuzzy operators
         if (this.tokens.length === 1) {
             const [token] = this.tokens;
 
diff --git a/apps/server/src/services/search/utils/text_utils.ts b/apps/server/src/services/search/utils/text_utils.ts
index 9274241cbc..9bba242213 100644
--- a/apps/server/src/services/search/utils/text_utils.ts
+++ b/apps/server/src/services/search/utils/text_utils.ts
@@ -14,6 +14,8 @@ export const FUZZY_SEARCH_CONFIG = {
     MAX_EDIT_DISTANCE: 2,
     // Maximum proximity distance for phrase matching (in words)
     MAX_PHRASE_PROXIMITY: 10,
+    // Large note threshold - above this, use optimized search strategy
+    LARGE_NOTE_THRESHOLD: 50000, // 50K words - switch to title-only fuzzy for performance
     // Absolute hard limits for extreme cases - only to prevent system crashes
     ABSOLUTE_MAX_CONTENT_SIZE: 100 * 1024 * 1024, // 100MB - extreme upper limit to prevent OOM
     ABSOLUTE_MAX_WORD_COUNT: 2000000, // 2M words - extreme upper limit for word processing

From 93e8459d4bf2b0d982a05ca78bb8edeef6bbccb0 Mon Sep 17 00:00:00 2001
From: perf3ct <jonfuller2012@gmail.com>
Date: Wed, 27 Aug 2025 22:33:38 +0000
Subject: [PATCH 2/3] feat(quick_search): remove some old variables that are no
 longer used now

---
 .../expressions/note_content_fulltext.ts      |  5 ----
 .../src/services/search/utils/text_utils.ts   | 29 ++++---------------
 2 files changed, 6 insertions(+), 28 deletions(-)

diff --git a/apps/server/src/services/search/expressions/note_content_fulltext.ts b/apps/server/src/services/search/expressions/note_content_fulltext.ts
index ac2c882027..38206d1c8d 100644
--- a/apps/server/src/services/search/expressions/note_content_fulltext.ts
+++ b/apps/server/src/services/search/expressions/note_content_fulltext.ts
@@ -281,11 +281,6 @@ class NoteContentFulltextExp extends Expression {
             return false;
         }
         
-        // Warn about large word counts but still attempt matching
-        if (words.length > FUZZY_SEARCH_CONFIG.PERFORMANCE_WARNING_WORDS) {
-            console.info(`Large word count for phrase matching: ${words.length} words - may take longer but will attempt full matching`);
-        }
-        
         // Find positions of each token
         const tokenPositions: number[][] = this.tokens.map(token => {
             const normalizedToken = normalizeSearchText(token);
diff --git a/apps/server/src/services/search/utils/text_utils.ts b/apps/server/src/services/search/utils/text_utils.ts
index 9bba242213..026ad79243 100644
--- a/apps/server/src/services/search/utils/text_utils.ts
+++ b/apps/server/src/services/search/utils/text_utils.ts
@@ -14,17 +14,11 @@ export const FUZZY_SEARCH_CONFIG = {
     MAX_EDIT_DISTANCE: 2,
     // Maximum proximity distance for phrase matching (in words)
     MAX_PHRASE_PROXIMITY: 10,
-    // Large note threshold - above this, use optimized search strategy
+    // Large note threshold - above this, use optimized search strategy (fuzzy on title only)
     LARGE_NOTE_THRESHOLD: 50000, // 50K words - switch to title-only fuzzy for performance
     // Absolute hard limits for extreme cases - only to prevent system crashes
     ABSOLUTE_MAX_CONTENT_SIZE: 100 * 1024 * 1024, // 100MB - extreme upper limit to prevent OOM
     ABSOLUTE_MAX_WORD_COUNT: 2000000, // 2M words - extreme upper limit for word processing
-    // Performance warning thresholds - inform user but still attempt search
-    PERFORMANCE_WARNING_SIZE: 5 * 1024 * 1024, // 5MB - warn about potential performance impact
-    PERFORMANCE_WARNING_WORDS: 100000, // 100K words - warn about word count impact
-    // Progressive processing thresholds for very large content
-    PROGRESSIVE_PROCESSING_SIZE: 10 * 1024 * 1024, // 10MB - use progressive processing
-    PROGRESSIVE_PROCESSING_WORDS: 500000, // 500K words - use progressive processing
     // Performance thresholds
     EARLY_TERMINATION_THRESHOLD: 3,
 } as const;
@@ -206,7 +200,8 @@ export function validateFuzzySearchTokens(tokens: string[], operator: string): {
 
 /**
  * Validates and preprocesses content for search operations.
- * Philosophy: Try to search everything! Only block truly extreme cases that could crash the system.
+ * Only blocks truly extreme cases that could crash the system.
+ * Large notes (>50K words) are handled with optimized search strategy instead.
  * 
  * @param content The content to validate and preprocess
  * @param noteId The note ID (for logging purposes)
@@ -224,12 +219,7 @@ export function validateAndPreprocessContent(content: string, noteId?: string):
         return content.substring(0, FUZZY_SEARCH_CONFIG.ABSOLUTE_MAX_CONTENT_SIZE);
     }
 
-    // Warn about very large content but still process it
-    if (content.length > FUZZY_SEARCH_CONFIG.PERFORMANCE_WARNING_SIZE) {
-        console.info(`Large content for note ${noteId || 'unknown'}: ${content.length} bytes - processing may take time but will attempt full search`);
-    }
-
-    // For word count, be even more permissive - only block truly extreme cases
+    // For word count, only block truly extreme cases
     const wordCount = content.split(/\s+/).length;
     if (wordCount > FUZZY_SEARCH_CONFIG.ABSOLUTE_MAX_WORD_COUNT) {
         console.error(`Word count exceeds absolute system limit for note ${noteId || 'unknown'}: ${wordCount} words - this could cause system instability`);
@@ -237,15 +227,8 @@ export function validateAndPreprocessContent(content: string, noteId?: string):
         return content.split(/\s+/).slice(0, FUZZY_SEARCH_CONFIG.ABSOLUTE_MAX_WORD_COUNT).join(' ');
     }
 
-    // Warn about high word counts but still process them
-    if (wordCount > FUZZY_SEARCH_CONFIG.PERFORMANCE_WARNING_WORDS) {
-        console.info(`High word count for note ${noteId || 'unknown'}: ${wordCount} words - phrase matching may take time but will attempt full search`);
-    }
-
-    // Progressive processing warning for very large content
-    if (content.length > FUZZY_SEARCH_CONFIG.PROGRESSIVE_PROCESSING_SIZE || wordCount > FUZZY_SEARCH_CONFIG.PROGRESSIVE_PROCESSING_WORDS) {
-        console.info(`Very large content for note ${noteId || 'unknown'} - using progressive processing to maintain responsiveness`);
-    }
+    // Notes above LARGE_NOTE_THRESHOLD will use optimized search strategy
+    // (handled in note_content_fulltext.ts)
 
     return content;
 }

From 912bc61730c773b5ec62f760c3aa548ccccd31e9 Mon Sep 17 00:00:00 2001
From: perf3ct <jonfuller2012@gmail.com>
Date: Thu, 28 Aug 2025 18:56:06 +0000
Subject: [PATCH 3/3] feat(search): also limit note content that can be
 searched, but keep searchability of titles

---
 .../expressions/note_content_fulltext.ts      | 47 +++++++++++++++++--
 .../src/services/search/utils/text_utils.ts   |  6 ++-
 2 files changed, 46 insertions(+), 7 deletions(-)

diff --git a/apps/server/src/services/search/expressions/note_content_fulltext.ts b/apps/server/src/services/search/expressions/note_content_fulltext.ts
index 38206d1c8d..0f0bbac859 100644
--- a/apps/server/src/services/search/expressions/note_content_fulltext.ts
+++ b/apps/server/src/services/search/expressions/note_content_fulltext.ts
@@ -120,17 +120,54 @@ class NoteContentFulltextExp extends Expression {
         }
         content = processedContent;
         
-        // Check if this is a large note that needs optimized search strategy
-        const wordCount = content.split(/\s+/).length;
-        const isLargeNote = wordCount > FUZZY_SEARCH_CONFIG.LARGE_NOTE_THRESHOLD;
+        // Check note size and determine search strategy
+        const contentSize = content.length;
+        const isExtremeNote = contentSize > FUZZY_SEARCH_CONFIG.EXTREME_NOTE_SIZE_THRESHOLD;
+        const isLargeNote = contentSize > FUZZY_SEARCH_CONFIG.LARGE_NOTE_SIZE_THRESHOLD;
         const isFuzzyOperator = this.operator === "~=" || this.operator === "~*";
         
-        // For large notes with fuzzy operators, switch to optimized strategy
+        // For extremely large notes (>5MB), only search title regardless of operator
+        if (isExtremeNote) {
+            const note = becca.notes[noteId];
+            const title = note.title || "";
+            
+            log.info(`Note ${noteId} is ${(contentSize / (1024 * 1024)).toFixed(1)}MB - searching title only due to extreme size`);
+            
+            // For fuzzy operators, use fuzzy matching on title
+            // For other operators, use exact/wildcard matching on title
+            const normalizedTitle = normalizeSearchText(title);
+            let titleMatches = false;
+            
+            if (isFuzzyOperator) {
+                titleMatches = this.tokens.some(token => 
+                    this.fuzzyMatchToken(normalizeSearchText(token), normalizedTitle)
+                );
+            } else {
+                // Apply the operator to title matching
+                titleMatches = this.tokens.every(token => {
+                    const normalizedToken = normalizeSearchText(token);
+                    if (this.operator === "*=*") return normalizedTitle.includes(normalizedToken);
+                    if (this.operator === "=") return normalizedTitle === normalizedToken;
+                    if (this.operator === "!=") return normalizedTitle !== normalizedToken;
+                    if (this.operator === "*=") return normalizedTitle.endsWith(normalizedToken);
+                    if (this.operator === "=*") return normalizedTitle.startsWith(normalizedToken);
+                    return false;
+                });
+            }
+            
+            if (titleMatches) {
+                resultNoteSet.add(becca.notes[noteId]);
+            }
+            
+            return content;
+        }
+        
+        // For large notes (250KB-5MB) with fuzzy operators, use optimized strategy
         if (isLargeNote && isFuzzyOperator) {
             const note = becca.notes[noteId];
             const title = note.title || "";
             
-            log.info(`Note ${noteId} has ${wordCount} words - using optimized search (fuzzy on title, exact on content)`);
+            log.info(`Note ${noteId} is ${(contentSize / 1024).toFixed(1)}KB - using optimized search (fuzzy on title, exact on content)`);
             
             // Perform fuzzy search on title
             const titleMatches = this.fuzzyMatchToken(normalizeSearchText(this.tokens[0]), normalizeSearchText(title));
diff --git a/apps/server/src/services/search/utils/text_utils.ts b/apps/server/src/services/search/utils/text_utils.ts
index 026ad79243..c828d39833 100644
--- a/apps/server/src/services/search/utils/text_utils.ts
+++ b/apps/server/src/services/search/utils/text_utils.ts
@@ -15,7 +15,9 @@ export const FUZZY_SEARCH_CONFIG = {
     // Maximum proximity distance for phrase matching (in words)
     MAX_PHRASE_PROXIMITY: 10,
     // Large note threshold - above this, use optimized search strategy (fuzzy on title only)
-    LARGE_NOTE_THRESHOLD: 50000, // 50K words - switch to title-only fuzzy for performance
+    LARGE_NOTE_SIZE_THRESHOLD: 250000, // 250KB - switch to title-only fuzzy for performance
+    // Extreme note threshold - above this, skip content search entirely
+    EXTREME_NOTE_SIZE_THRESHOLD: 5 * 1024 * 1024, // 5MB - title search only
     // Absolute hard limits for extreme cases - only to prevent system crashes
     ABSOLUTE_MAX_CONTENT_SIZE: 100 * 1024 * 1024, // 100MB - extreme upper limit to prevent OOM
     ABSOLUTE_MAX_WORD_COUNT: 2000000, // 2M words - extreme upper limit for word processing
@@ -227,7 +229,7 @@ export function validateAndPreprocessContent(content: string, noteId?: string):
         return content.split(/\s+/).slice(0, FUZZY_SEARCH_CONFIG.ABSOLUTE_MAX_WORD_COUNT).join(' ');
     }
 
-    // Notes above LARGE_NOTE_THRESHOLD will use optimized search strategy
+    // Notes above LARGE_NOTE_SIZE_THRESHOLD (250KB) will use optimized search strategy
     // (handled in note_content_fulltext.ts)
 
     return content;