diff --git a/.jules/bolt.md b/.jules/bolt.md index 254b8d5..4efe577 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -13,3 +13,7 @@ ## 2025-02-18 - Regex Pre-compilation in Hot Paths **Learning:** Re-compiling regexes inside a frequently called function (like `latex_escape` which runs for every string) creates significant overhead. Pre-compiling them at module level yielded a ~3.2x speedup. **Action:** Always look for regex compilations inside loops or frequently called functions and move them to module level constants. + +## 2025-02-18 - Hoisting Keyword Density Constants +**Learning:** Re-compiling regexes for title/company patterns and re-allocating static lists/tuples for keywords inside functions called frequently (like during keyword density checks) adds unnecessary overhead and scales poorly with complexity. +**Action:** Move static extraction patterns (`_TITLE_PATTERNS`, `_COMPANY_PATTERNS`) and reference lists (`_TECH_KEYWORDS`, `_COMMON_KEYWORDS`) to module-level constants. Convert lookup collections to sets where possible to achieve O(1) membership checking. diff --git a/cli/utils/keyword_density.py b/cli/utils/keyword_density.py index 7a0df7b..0e96ed4 100644 --- a/cli/utils/keyword_density.py +++ b/cli/utils/keyword_density.py @@ -37,6 +37,105 @@ console = Console() +_TITLE_PATTERNS = [ + re.compile(r"(?:job title|position|title):\s*([^\n]+)", re.IGNORECASE | re.MULTILINE), + re.compile(r"^([^\n]+)\s*[-|]\s*[^|]+$", re.IGNORECASE | re.MULTILINE), + re.compile( + r"#\s*([^\n]+)", re.IGNORECASE | re.MULTILINE + ), # Markdown headers often have job title +] + +_COMPANY_PATTERNS = [ + re.compile(r"(?:company|organization):\s*([^\n]+)", re.IGNORECASE), + re.compile(r"(?:at|from)\s+([A-Z][^\n]+?)(?:\s+[-\u2014]|\s+$)", re.IGNORECASE), +] + +_TECH_KEYWORDS = { + "python", + "javascript", + "typescript", + "react", + "vue", + "angular", + "node.js", + "django", + "flask", + "fastapi", + "kubernetes", + "docker", + "aws", + "gcp", + "azure", + "sql", + "mongodb", + "postgresql", + "redis", + "ci/cd", + "devops", + "machine learning", + "ai", + "llm", + "pytorch", + "tensorflow", + "graphql", + "rest api", + "microservices", + "java", + "go", + "rust", + "c++", + "c#", + ".net", + "spring", +} + +_COMMON_KEYWORDS = [ + ("python", "high"), + ("javascript", "high"), + ("typescript", "high"), + ("react", "high"), + ("vue", "medium"), + ("angular", "medium"), + ("node.js", "high"), + ("django", "medium"), + ("flask", "medium"), + ("fastapi", "medium"), + ("kubernetes", "high"), + ("docker", "high"), + ("aws", "high"), + ("gcp", "medium"), + ("azure", "medium"), + ("sql", "high"), + ("mongodb", "medium"), + ("postgresql", "medium"), + ("redis", "medium"), + ("ci/cd", "high"), + ("devops", "high"), + ("machine learning", "high"), + ("ai", "high"), + ("llm", "high"), + ("pytorch", "medium"), + ("tensorflow", "medium"), + ("react native", "medium"), + ("graphql", "medium"), + ("rest api", "high"), + ("microservices", "high"), + ("java", "high"), + ("go", "medium"), + ("rust", "medium"), + ("c++", "medium"), + ("c#", "medium"), + (".net", "medium"), + ("spring", "medium"), + ("hibernate", "medium"), + ("agile", "high"), + ("scrum", "medium"), + ("kanban", "medium"), + ("leadership", "high"), + ("communication", "high"), + ("teamwork", "medium"), +] + @dataclass class KeywordInfo: @@ -207,27 +306,14 @@ def _extract_job_details(self, job_description: str) -> Tuple[str, str]: job_title = "" company = "" - # Try to extract job title (common patterns) - title_patterns = [ - r"(?:job title|position|title):\s*([^\n]+)", - r"^([^\n]+)\s*[-|]\s*[^|]+$", - r"#\s*([^\n]+)", # Markdown headers often have job title - ] - - for pattern in title_patterns: - match = re.search(pattern, job_description, re.IGNORECASE | re.MULTILINE) + for pattern in _TITLE_PATTERNS: + match = pattern.search(job_description) if match: job_title = match.group(1).strip() break - # Try to extract company name - company_patterns = [ - r"(?:company|organization):\s*([^\n]+)", - r"(?:at|from)\s+([A-Z][^\n]+?)(?:\s+[-\u2014]|\s+$)", - ] - - for pattern in company_patterns: - match = re.search(pattern, job_description, re.IGNORECASE) + for pattern in _COMPANY_PATTERNS: + match = pattern.search(job_description) if match: company = match.group(1).strip() break @@ -289,57 +375,10 @@ def _extract_job_keywords(self, job_description: str) -> List[Tuple[str, str]]: def _simple_keyword_extraction(self, job_description: str) -> List[Tuple[str, str]]: """Simple fallback keyword extraction without AI.""" - common_keywords = [ - ("python", "high"), - ("javascript", "high"), - ("typescript", "high"), - ("react", "high"), - ("vue", "medium"), - ("angular", "medium"), - ("node.js", "high"), - ("django", "medium"), - ("flask", "medium"), - ("fastapi", "medium"), - ("kubernetes", "high"), - ("docker", "high"), - ("aws", "high"), - ("gcp", "medium"), - ("azure", "medium"), - ("sql", "high"), - ("mongodb", "medium"), - ("postgresql", "medium"), - ("redis", "medium"), - ("ci/cd", "high"), - ("devops", "high"), - ("machine learning", "high"), - ("ai", "high"), - ("llm", "high"), - ("pytorch", "medium"), - ("tensorflow", "medium"), - ("react native", "medium"), - ("graphql", "medium"), - ("rest api", "high"), - ("microservices", "high"), - ("java", "high"), - ("go", "medium"), - ("rust", "medium"), - ("c++", "medium"), - ("c#", "medium"), - (".net", "medium"), - ("spring", "medium"), - ("hibernate", "medium"), - ("agile", "high"), - ("scrum", "medium"), - ("kanban", "medium"), - ("leadership", "high"), - ("communication", "high"), - ("teamwork", "medium"), - ] - jd_lower = job_description.lower() found = [] - for kw, importance in common_keywords: + for kw, importance in _COMMON_KEYWORDS: if kw in jd_lower: found.append((kw, importance)) @@ -398,46 +437,7 @@ def _suggest_sections_for_keyword( suggestions = [] # Check if keyword is tech-related - tech_keywords = [ - "python", - "javascript", - "typescript", - "react", - "vue", - "angular", - "node.js", - "django", - "flask", - "fastapi", - "kubernetes", - "docker", - "aws", - "gcp", - "azure", - "sql", - "mongodb", - "postgresql", - "redis", - "ci/cd", - "devops", - "machine learning", - "ai", - "llm", - "pytorch", - "tensorflow", - "graphql", - "rest api", - "microservices", - "java", - "go", - "rust", - "c++", - "c#", - ".net", - "spring", - ] - - if keyword.lower() in tech_keywords: + if keyword.lower() in _TECH_KEYWORDS: suggestions.append("Skills section") # Check experience bullets