From 95daad9f51dfd0caaaaf7300a6a4d4501df9c4b6 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sun, 26 Apr 2026 23:15:15 +0000 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=20Bolt:=20Optimize=20keyword=20densit?= =?UTF-8?q?y=20extraction=20and=20lookups?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: anchapin <6326294+anchapin@users.noreply.github.com> --- .jules/bolt.md | 4 ++ cli/utils/keyword_density.py | 113 ++++++++++++++++++----------------- 2 files changed, 62 insertions(+), 55 deletions(-) diff --git a/.jules/bolt.md b/.jules/bolt.md index 254b8d5..029ed84 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -13,3 +13,7 @@ ## 2025-02-18 - Regex Pre-compilation in Hot Paths **Learning:** Re-compiling regexes inside a frequently called function (like `latex_escape` which runs for every string) creates significant overhead. Pre-compiling them at module level yielded a ~3.2x speedup. **Action:** Always look for regex compilations inside loops or frequently called functions and move them to module level constants. + +## 2025-02-18 - Set vs List for keyword lookups +**Learning:** Using a list for an `in` membership check inside a frequently called function (like `_suggest_sections_for_keyword`) results in O(n) performance and repeated allocations. +**Action:** Always convert static lists used for membership testing to module-level sets for O(1) performance and to avoid reallocation overhead. diff --git a/cli/utils/keyword_density.py b/cli/utils/keyword_density.py index 7a0df7b..26e87f4 100644 --- a/cli/utils/keyword_density.py +++ b/cli/utils/keyword_density.py @@ -37,6 +37,59 @@ console = Console() +# Pre-compiled regex patterns to avoid redundant compilation inside loops/frequent function calls +_TITLE_PATTERNS = [ + re.compile(r"(?:job title|position|title):\s*([^\n]+)", re.IGNORECASE | re.MULTILINE), + re.compile(r"^([^\n]+)\s*[-|]\s*[^|]+$", re.IGNORECASE | re.MULTILINE), + re.compile(r"#\s*([^\n]+)", re.IGNORECASE | re.MULTILINE), +] + +_COMPANY_PATTERNS = [ + re.compile(r"(?:company|organization):\s*([^\n]+)", re.IGNORECASE), + re.compile(r"(?:at|from)\s+([A-Z][^\n]+?)(?:\s+[-\u2014]|\s+$)", re.IGNORECASE), +] + +# Using a set for O(1) lookup performance instead of O(N) list search, +# and defining at module level to prevent reallocation on every function call. +_TECH_KEYWORDS = { + "python", + "javascript", + "typescript", + "react", + "vue", + "angular", + "node.js", + "django", + "flask", + "fastapi", + "kubernetes", + "docker", + "aws", + "gcp", + "azure", + "sql", + "mongodb", + "postgresql", + "redis", + "ci/cd", + "devops", + "machine learning", + "ai", + "llm", + "pytorch", + "tensorflow", + "graphql", + "rest api", + "microservices", + "java", + "go", + "rust", + "c++", + "c#", + ".net", + "spring", +} + @dataclass class KeywordInfo: @@ -208,26 +261,15 @@ def _extract_job_details(self, job_description: str) -> Tuple[str, str]: company = "" # Try to extract job title (common patterns) - title_patterns = [ - r"(?:job title|position|title):\s*([^\n]+)", - r"^([^\n]+)\s*[-|]\s*[^|]+$", - r"#\s*([^\n]+)", # Markdown headers often have job title - ] - - for pattern in title_patterns: - match = re.search(pattern, job_description, re.IGNORECASE | re.MULTILINE) + for pattern in _TITLE_PATTERNS: + match = pattern.search(job_description) if match: job_title = match.group(1).strip() break # Try to extract company name - company_patterns = [ - r"(?:company|organization):\s*([^\n]+)", - r"(?:at|from)\s+([A-Z][^\n]+?)(?:\s+[-\u2014]|\s+$)", - ] - - for pattern in company_patterns: - match = re.search(pattern, job_description, re.IGNORECASE) + for pattern in _COMPANY_PATTERNS: + match = pattern.search(job_description) if match: company = match.group(1).strip() break @@ -398,46 +440,7 @@ def _suggest_sections_for_keyword( suggestions = [] # Check if keyword is tech-related - tech_keywords = [ - "python", - "javascript", - "typescript", - "react", - "vue", - "angular", - "node.js", - "django", - "flask", - "fastapi", - "kubernetes", - "docker", - "aws", - "gcp", - "azure", - "sql", - "mongodb", - "postgresql", - "redis", - "ci/cd", - "devops", - "machine learning", - "ai", - "llm", - "pytorch", - "tensorflow", - "graphql", - "rest api", - "microservices", - "java", - "go", - "rust", - "c++", - "c#", - ".net", - "spring", - ] - - if keyword.lower() in tech_keywords: + if keyword.lower() in _TECH_KEYWORDS: suggestions.append("Skills section") # Check experience bullets