diff --git a/.jules/bolt.md b/.jules/bolt.md index 254b8d5..029ed84 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -13,3 +13,7 @@ ## 2025-02-18 - Regex Pre-compilation in Hot Paths **Learning:** Re-compiling regexes inside a frequently called function (like `latex_escape` which runs for every string) creates significant overhead. Pre-compiling them at module level yielded a ~3.2x speedup. **Action:** Always look for regex compilations inside loops or frequently called functions and move them to module level constants. + +## 2025-02-18 - Set vs List for keyword lookups +**Learning:** Using a list for an `in` membership check inside a frequently called function (like `_suggest_sections_for_keyword`) results in O(n) performance and repeated allocations. +**Action:** Always convert static lists used for membership testing to module-level sets for O(1) performance and to avoid reallocation overhead. diff --git a/cli/utils/keyword_density.py b/cli/utils/keyword_density.py index 7a0df7b..26e87f4 100644 --- a/cli/utils/keyword_density.py +++ b/cli/utils/keyword_density.py @@ -37,6 +37,59 @@ console = Console() +# Pre-compiled regex patterns to avoid redundant compilation inside loops/frequent function calls +_TITLE_PATTERNS = [ + re.compile(r"(?:job title|position|title):\s*([^\n]+)", re.IGNORECASE | re.MULTILINE), + re.compile(r"^([^\n]+)\s*[-|]\s*[^|]+$", re.IGNORECASE | re.MULTILINE), + re.compile(r"#\s*([^\n]+)", re.IGNORECASE | re.MULTILINE), +] + +_COMPANY_PATTERNS = [ + re.compile(r"(?:company|organization):\s*([^\n]+)", re.IGNORECASE), + re.compile(r"(?:at|from)\s+([A-Z][^\n]+?)(?:\s+[-\u2014]|\s+$)", re.IGNORECASE), +] + +# Using a set for O(1) lookup performance instead of O(N) list search, +# and defining at module level to prevent reallocation on every function call. +_TECH_KEYWORDS = { + "python", + "javascript", + "typescript", + "react", + "vue", + "angular", + "node.js", + "django", + "flask", + "fastapi", + "kubernetes", + "docker", + "aws", + "gcp", + "azure", + "sql", + "mongodb", + "postgresql", + "redis", + "ci/cd", + "devops", + "machine learning", + "ai", + "llm", + "pytorch", + "tensorflow", + "graphql", + "rest api", + "microservices", + "java", + "go", + "rust", + "c++", + "c#", + ".net", + "spring", +} + @dataclass class KeywordInfo: @@ -208,26 +261,15 @@ def _extract_job_details(self, job_description: str) -> Tuple[str, str]: company = "" # Try to extract job title (common patterns) - title_patterns = [ - r"(?:job title|position|title):\s*([^\n]+)", - r"^([^\n]+)\s*[-|]\s*[^|]+$", - r"#\s*([^\n]+)", # Markdown headers often have job title - ] - - for pattern in title_patterns: - match = re.search(pattern, job_description, re.IGNORECASE | re.MULTILINE) + for pattern in _TITLE_PATTERNS: + match = pattern.search(job_description) if match: job_title = match.group(1).strip() break # Try to extract company name - company_patterns = [ - r"(?:company|organization):\s*([^\n]+)", - r"(?:at|from)\s+([A-Z][^\n]+?)(?:\s+[-\u2014]|\s+$)", - ] - - for pattern in company_patterns: - match = re.search(pattern, job_description, re.IGNORECASE) + for pattern in _COMPANY_PATTERNS: + match = pattern.search(job_description) if match: company = match.group(1).strip() break @@ -398,46 +440,7 @@ def _suggest_sections_for_keyword( suggestions = [] # Check if keyword is tech-related - tech_keywords = [ - "python", - "javascript", - "typescript", - "react", - "vue", - "angular", - "node.js", - "django", - "flask", - "fastapi", - "kubernetes", - "docker", - "aws", - "gcp", - "azure", - "sql", - "mongodb", - "postgresql", - "redis", - "ci/cd", - "devops", - "machine learning", - "ai", - "llm", - "pytorch", - "tensorflow", - "graphql", - "rest api", - "microservices", - "java", - "go", - "rust", - "c++", - "c#", - ".net", - "spring", - ] - - if keyword.lower() in tech_keywords: + if keyword.lower() in _TECH_KEYWORDS: suggestions.append("Skills section") # Check experience bullets