From 95daad9f51dfd0caaaaf7300a6a4d4501df9c4b6 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Sun, 26 Apr 2026 23:15:15 +0000
Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=20Bolt:=20Optimize=20keyword=20densit?=
 =?UTF-8?q?y=20extraction=20and=20lookups?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: anchapin <6326294+anchapin@users.noreply.github.com>
---
 .jules/bolt.md               |   4 ++
 cli/utils/keyword_density.py | 113 ++++++++++++++++++-----------------
 2 files changed, 62 insertions(+), 55 deletions(-)

diff --git a/.jules/bolt.md b/.jules/bolt.md
index 254b8d5..029ed84 100644
--- a/.jules/bolt.md
+++ b/.jules/bolt.md
@@ -13,3 +13,7 @@
 ## 2025-02-18 - Regex Pre-compilation in Hot Paths
 **Learning:** Re-compiling regexes inside a frequently called function (like `latex_escape` which runs for every string) creates significant overhead. Pre-compiling them at module level yielded a ~3.2x speedup.
 **Action:** Always look for regex compilations inside loops or frequently called functions and move them to module level constants.
+
+## 2025-02-18 - Set vs List for keyword lookups
+**Learning:** Using a list for an `in` membership check inside a frequently called function (like `_suggest_sections_for_keyword`) results in O(n) performance and repeated allocations.
+**Action:** Always convert static lists used for membership testing to module-level sets for O(1) performance and to avoid reallocation overhead.
diff --git a/cli/utils/keyword_density.py b/cli/utils/keyword_density.py
index 7a0df7b..26e87f4 100644
--- a/cli/utils/keyword_density.py
+++ b/cli/utils/keyword_density.py
@@ -37,6 +37,59 @@
 
 console = Console()
 
+# Pre-compiled regex patterns to avoid redundant compilation inside loops/frequent function calls
+_TITLE_PATTERNS = [
+    re.compile(r"(?:job title|position|title):\s*([^\n]+)", re.IGNORECASE | re.MULTILINE),
+    re.compile(r"^([^\n]+)\s*[-|]\s*[^|]+$", re.IGNORECASE | re.MULTILINE),
+    re.compile(r"#\s*([^\n]+)", re.IGNORECASE | re.MULTILINE),
+]
+
+_COMPANY_PATTERNS = [
+    re.compile(r"(?:company|organization):\s*([^\n]+)", re.IGNORECASE),
+    re.compile(r"(?:at|from)\s+([A-Z][^\n]+?)(?:\s+[-\u2014]|\s+$)", re.IGNORECASE),
+]
+
+# Using a set for O(1) lookup performance instead of O(N) list search,
+# and defining at module level to prevent reallocation on every function call.
+_TECH_KEYWORDS = {
+    "python",
+    "javascript",
+    "typescript",
+    "react",
+    "vue",
+    "angular",
+    "node.js",
+    "django",
+    "flask",
+    "fastapi",
+    "kubernetes",
+    "docker",
+    "aws",
+    "gcp",
+    "azure",
+    "sql",
+    "mongodb",
+    "postgresql",
+    "redis",
+    "ci/cd",
+    "devops",
+    "machine learning",
+    "ai",
+    "llm",
+    "pytorch",
+    "tensorflow",
+    "graphql",
+    "rest api",
+    "microservices",
+    "java",
+    "go",
+    "rust",
+    "c++",
+    "c#",
+    ".net",
+    "spring",
+}
+
 
 @dataclass
 class KeywordInfo:
@@ -208,26 +261,15 @@ def _extract_job_details(self, job_description: str) -> Tuple[str, str]:
         company = ""
 
         # Try to extract job title (common patterns)
-        title_patterns = [
-            r"(?:job title|position|title):\s*([^\n]+)",
-            r"^([^\n]+)\s*[-|]\s*[^|]+$",
-            r"#\s*([^\n]+)",  # Markdown headers often have job title
-        ]
-
-        for pattern in title_patterns:
-            match = re.search(pattern, job_description, re.IGNORECASE | re.MULTILINE)
+        for pattern in _TITLE_PATTERNS:
+            match = pattern.search(job_description)
             if match:
                 job_title = match.group(1).strip()
                 break
 
         # Try to extract company name
-        company_patterns = [
-            r"(?:company|organization):\s*([^\n]+)",
-            r"(?:at|from)\s+([A-Z][^\n]+?)(?:\s+[-\u2014]|\s+$)",
-        ]
-
-        for pattern in company_patterns:
-            match = re.search(pattern, job_description, re.IGNORECASE)
+        for pattern in _COMPANY_PATTERNS:
+            match = pattern.search(job_description)
             if match:
                 company = match.group(1).strip()
                 break
@@ -398,46 +440,7 @@ def _suggest_sections_for_keyword(
         suggestions = []
 
         # Check if keyword is tech-related
-        tech_keywords = [
-            "python",
-            "javascript",
-            "typescript",
-            "react",
-            "vue",
-            "angular",
-            "node.js",
-            "django",
-            "flask",
-            "fastapi",
-            "kubernetes",
-            "docker",
-            "aws",
-            "gcp",
-            "azure",
-            "sql",
-            "mongodb",
-            "postgresql",
-            "redis",
-            "ci/cd",
-            "devops",
-            "machine learning",
-            "ai",
-            "llm",
-            "pytorch",
-            "tensorflow",
-            "graphql",
-            "rest api",
-            "microservices",
-            "java",
-            "go",
-            "rust",
-            "c++",
-            "c#",
-            ".net",
-            "spring",
-        ]
-
-        if keyword.lower() in tech_keywords:
+        if keyword.lower() in _TECH_KEYWORDS:
             suggestions.append("Skills section")
 
         # Check experience bullets