Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .jules/bolt.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,7 @@
## 2025-02-18 - Regex Pre-compilation in Hot Paths
**Learning:** Re-compiling regexes inside a frequently called function (like `latex_escape` which runs for every string) creates significant overhead. Pre-compiling them at module level yielded a ~3.2x speedup.
**Action:** Always look for regex compilations inside loops or frequently called functions and move them to module level constants.

## 2024-05-22 - Regex Grouping for Keyword Matching
**Learning:** Checking a string against a large list of keywords individually (e.g., `any(re.search(pattern, text) for pattern in keywords)`) is highly inefficient and incurs massive Python iteration and C-level overhead. Pre-compiling the entire list of keywords into a single regex using alternations (e.g., `re.compile(r"\b(?:kw1|kw2|...)\b")`) reduces overhead significantly.
**Action:** Always combine static lists of keywords into single pre-compiled regex objects when performing membership or presence checks inside large loops.
185 changes: 90 additions & 95 deletions cli/integrations/linkedin.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,93 @@
from pathlib import Path
from typing import Any, Dict, List, Optional

# Pre-compile skill categorization patterns for performance
_LANGUAGE_KEYWORDS = [
"python",
"javascript",
"java",
"go",
"rust",
"c\\+\\+",
"c#",
"ruby",
"php",
"swift",
"kotlin",
"scala",
"haskell",
"typescript",
"sql",
]
_FRAMEWORK_KEYWORDS = [
"django",
"flask",
"fastapi",
"spring",
"react",
"angular",
"vue",
"express",
"rails",
"laravel",
"next\\.js",
"nuxt",
"tensorflow",
"pytorch",
"keras",
"pandas",
"numpy",
"scikit",
"langchain",
]
_CLOUD_KEYWORDS = [
"aws",
"azure",
"gcp",
"google cloud",
"amazon web services",
"heroku",
"vercel",
"netlify",
"digitalocean",
"linode",
]
_DATABASE_KEYWORDS = [
"postgres",
"postgresql",
"mysql",
"mongodb",
"redis",
"sqlite",
"oracle",
"sql server",
"cassandra",
"elasticsearch",
"dynamodb",
]
_TOOL_KEYWORDS = [
"docker",
"kubernetes",
"git",
"github",
"gitlab",
"jenkins",
"circleci",
"terraform",
"ansible",
"nagios",
"grafana",
"prometheus",
]

_SKILL_PATTERNS = [
(re.compile(r"\b(?:%s)\b" % "|".join(_LANGUAGE_KEYWORDS), re.IGNORECASE), "languages"),
(re.compile(r"\b(?:%s)\b" % "|".join(_FRAMEWORK_KEYWORDS), re.IGNORECASE), "frameworks"),
(re.compile(r"\b(?:%s)\b" % "|".join(_CLOUD_KEYWORDS), re.IGNORECASE), "cloud_platforms"),
(re.compile(r"\b(?:%s)\b" % "|".join(_DATABASE_KEYWORDS), re.IGNORECASE), "databases"),
(re.compile(r"\b(?:%s)\b" % "|".join(_TOOL_KEYWORDS), re.IGNORECASE), "tools"),
]


class LinkedInSync:
"""Sync LinkedIn profile data to/from resume.yaml."""
Expand Down Expand Up @@ -442,103 +529,11 @@ def _categorize_skills(self, skills: List[str]) -> Dict[str, List[str]]:
"other": [],
}

language_keywords = [
"python",
"javascript",
"java",
"go",
"rust",
"c\\+\\+",
"c#",
"ruby",
"php",
"swift",
"kotlin",
"scala",
"haskell",
"typescript",
"sql",
]

framework_keywords = [
"django",
"flask",
"fastapi",
"spring",
"react",
"angular",
"vue",
"express",
"rails",
"laravel",
"next\\.js",
"nuxt",
"tensorflow",
"pytorch",
"keras",
"pandas",
"numpy",
"scikit",
"langchain",
]

cloud_keywords = [
"aws",
"azure",
"gcp",
"google cloud",
"amazon web services",
"heroku",
"vercel",
"netlify",
"digitalocean",
"linode",
]

database_keywords = [
"postgres",
"postgresql",
"mysql",
"mongodb",
"redis",
"sqlite",
"oracle",
"sql server",
"cassandra",
"elasticsearch",
"dynamodb",
]

tool_keywords = [
"docker",
"kubernetes",
"git",
"github",
"gitlab",
"jenkins",
"circleci",
"terraform",
"ansible",
"nagios",
"grafana",
"prometheus",
]

for skill in skills:
skill_lower = skill.lower()

# Check each category (use first match)
matched = False
patterns = [
(language_keywords, "languages"),
(framework_keywords, "frameworks"),
(cloud_keywords, "cloud_platforms"),
(database_keywords, "databases"),
(tool_keywords, "tools"),
]

for keywords, category in patterns:
if any(re.search(rf"\b{kw}\b", skill_lower) for kw in keywords):

for pattern, category in _SKILL_PATTERNS:
if pattern.search(skill):
categories[category].append(skill)
matched = True
break
Expand Down
Loading