anchapin · anchapin · May 21, 2026 · May 21, 2026
diff --git a/.jules/bolt.md b/.jules/bolt.md
@@ -13,3 +13,7 @@
 ## 2025-02-18 - Regex Pre-compilation in Hot Paths
 **Learning:** Re-compiling regexes inside a frequently called function (like `latex_escape` which runs for every string) creates significant overhead. Pre-compiling them at module level yielded a ~3.2x speedup.
 **Action:** Always look for regex compilations inside loops or frequently called functions and move them to module level constants.
+
+## 2024-05-22 - Regex Grouping for Keyword Matching
+**Learning:** Checking a string against a large list of keywords individually (e.g., `any(re.search(pattern, text) for pattern in keywords)`) is highly inefficient and incurs massive Python iteration and C-level overhead. Pre-compiling the entire list of keywords into a single regex using alternations (e.g., `re.compile(r"\b(?:kw1|kw2|...)\b")`) reduces overhead significantly.
+**Action:** Always combine static lists of keywords into single pre-compiled regex objects when performing membership or presence checks inside large loops.
diff --git a/cli/integrations/linkedin.py b/cli/integrations/linkedin.py
@@ -7,6 +7,93 @@
 from pathlib import Path
 from typing import Any, Dict, List, Optional
 
+# Pre-compile skill categorization patterns for performance
+_LANGUAGE_KEYWORDS = [
+    "python",
+    "javascript",
+    "java",
+    "go",
+    "rust",
+    "c\\+\\+",
+    "c#",
+    "ruby",
+    "php",
+    "swift",
+    "kotlin",
+    "scala",
+    "haskell",
+    "typescript",
+    "sql",
+]
+_FRAMEWORK_KEYWORDS = [
+    "django",
+    "flask",
+    "fastapi",
+    "spring",
+    "react",
+    "angular",
+    "vue",
+    "express",
+    "rails",
+    "laravel",
+    "next\\.js",
+    "nuxt",
+    "tensorflow",
+    "pytorch",
+    "keras",
+    "pandas",
+    "numpy",
+    "scikit",
+    "langchain",
+]
+_CLOUD_KEYWORDS = [
+    "aws",
+    "azure",
+    "gcp",
+    "google cloud",
+    "amazon web services",
+    "heroku",
+    "vercel",
+    "netlify",
+    "digitalocean",
+    "linode",
+]
+_DATABASE_KEYWORDS = [
+    "postgres",
+    "postgresql",
+    "mysql",
+    "mongodb",
+    "redis",
+    "sqlite",
+    "oracle",
+    "sql server",
+    "cassandra",
+    "elasticsearch",
+    "dynamodb",
+]
+_TOOL_KEYWORDS = [
+    "docker",
+    "kubernetes",
+    "git",
+    "github",
+    "gitlab",
+    "jenkins",
+    "circleci",
+    "terraform",
+    "ansible",
+    "nagios",
+    "grafana",
+    "prometheus",
+]
+
+_SKILL_PATTERNS = [
+    (re.compile(r"\b(?:%s)\b" % "|".join(_LANGUAGE_KEYWORDS), re.IGNORECASE), "languages"),
+    (re.compile(r"\b(?:%s)\b" % "|".join(_FRAMEWORK_KEYWORDS), re.IGNORECASE), "frameworks"),
+    (re.compile(r"\b(?:%s)\b" % "|".join(_CLOUD_KEYWORDS), re.IGNORECASE), "cloud_platforms"),
+    (re.compile(r"\b(?:%s)\b" % "|".join(_DATABASE_KEYWORDS), re.IGNORECASE), "databases"),
+    (re.compile(r"\b(?:%s)\b" % "|".join(_TOOL_KEYWORDS), re.IGNORECASE), "tools"),
+]
+
 
 class LinkedInSync:
     """Sync LinkedIn profile data to/from resume.yaml."""
@@ -442,103 +529,11 @@ def _categorize_skills(self, skills: List[str]) -> Dict[str, List[str]]:
             "other": [],
         }
 
-        language_keywords = [
-            "python",
-            "javascript",
-            "java",
-            "go",
-            "rust",
-            "c\\+\\+",
-            "c#",
-            "ruby",
-            "php",
-            "swift",
-            "kotlin",
-            "scala",
-            "haskell",
-            "typescript",
-            "sql",
-        ]
-
-        framework_keywords = [
-            "django",
-            "flask",
-            "fastapi",
-            "spring",
-            "react",
-            "angular",
-            "vue",
-            "express",
-            "rails",
-            "laravel",
-            "next\\.js",
-            "nuxt",
-            "tensorflow",
-            "pytorch",
-            "keras",
-            "pandas",
-            "numpy",
-            "scikit",
-            "langchain",
-        ]
-
-        cloud_keywords = [
-            "aws",
-            "azure",
-            "gcp",
-            "google cloud",
-            "amazon web services",
-            "heroku",
-            "vercel",
-            "netlify",
-            "digitalocean",
-            "linode",
-        ]
-
-        database_keywords = [
-            "postgres",
-            "postgresql",
-            "mysql",
-            "mongodb",
-            "redis",
-            "sqlite",
-            "oracle",
-            "sql server",
-            "cassandra",
-            "elasticsearch",
-            "dynamodb",
-        ]
-
-        tool_keywords = [
-            "docker",
-            "kubernetes",
-            "git",
-            "github",
-            "gitlab",
-            "jenkins",
-            "circleci",
-            "terraform",
-            "ansible",
-            "nagios",
-            "grafana",
-            "prometheus",
-        ]
-
         for skill in skills:
-            skill_lower = skill.lower()
-
-            # Check each category (use first match)
             matched = False
-            patterns = [
-                (language_keywords, "languages"),
-                (framework_keywords, "frameworks"),
-                (cloud_keywords, "cloud_platforms"),
-                (database_keywords, "databases"),
-                (tool_keywords, "tools"),
-            ]
-
-            for keywords, category in patterns:
-                if any(re.search(rf"\b{kw}\b", skill_lower) for kw in keywords):
+
+            for pattern, category in _SKILL_PATTERNS:
+                if pattern.search(skill):
                     categories[category].append(skill)
                     matched = True
                     break