anchapin · anchapin · May 5, 2026 · sourcery-ai · May 5, 2026
diff --git a/.jules/bolt.md b/.jules/bolt.md
@@ -13,3 +13,7 @@
 ## 2025-02-18 - Regex Pre-compilation in Hot Paths
 **Learning:** Re-compiling regexes inside a frequently called function (like `latex_escape` which runs for every string) creates significant overhead. Pre-compiling them at module level yielded a ~3.2x speedup.
 **Action:** Always look for regex compilations inside loops or frequently called functions and move them to module level constants.
+
+## 2024-05-22 - Python In-Membership Checks vs Sets
+**Learning:** Do not attempt to optimize string membership checks like `ext in [".yaml", ".yml"]` by converting them to module-level sets. Modern CPython already optimizes these into constant tuple lookups at compile-time (`LOAD_CONST`), making such changes useless micro-optimizations.
+**Action:** Focus on algorithmic complexity, DB queries, or actual runtime bottlenecks (like re-compiling regexes inside loops) instead of simple constant membership checks.
diff --git a/cli/commands/convert.py b/cli/commands/convert.py
@@ -13,6 +13,9 @@
 
 from ..utils.json_resume_converter import JSONResumeConverter, convert_yaml_to_json_resume
 
+# Optimize file extension checks with O(1) set lookup to prevent repeated list allocations
+_YAML_EXTENSIONS = {".yaml", ".yml"}
-# Optimize file extension checks with O(1) set lookup to prevent repeated list allocations
-_YAML_EXTENSIONS = {".yaml", ".yml"}
+# Supported YAML file extensions
+_YAML_EXTENSIONS = {".yaml", ".yml"}
-# Optimize file extension checks with O(1) set lookup to prevent repeated list allocations
-_YAML_EXTENSIONS = {".yaml", ".yml"}
+# Supported YAML file extensions
+_YAML_EXTENSIONS = {".yaml", ".yml"}
+
 
 @click.command()
 @click.argument("input_file", type=click.Path(exists=True, path_type=Path))
@@ -67,9 +70,9 @@ def convert(input_file: Path, output_file: Path, direction: str, format: str, no
         input_ext = input_file.suffix.lower()
         output_ext = output_file.suffix.lower()
 
-        if input_ext in [".yaml", ".yml"] and output_ext == ".json":
+        if input_ext in _YAML_EXTENSIONS and output_ext == ".json":
             direction = "to_json"
-        elif input_ext == ".json" and output_ext in [".yaml", ".yml"]:
+        elif input_ext == ".json" and output_ext in _YAML_EXTENSIONS:
             direction = "to_yaml"
         else:
             click.echo(
@@ -269,7 +272,7 @@ def import_resume(input_file: Path, fmt: Optional[str], output: Optional[Path],
         ext = input_file.suffix.lower()
         if ext == ".json":
             fmt = "json"
-        elif ext in [".yaml", ".yml"]:
+        elif ext in _YAML_EXTENSIONS:
             fmt = "yaml"
         else:
             click.echo(
@@ -375,7 +378,7 @@ def export_resume(input_file: Path, fmt: Optional[str], output: Optional[Path]):
         ext = input_file.suffix.lower()
         if ext == ".json":
             fmt = "json"
-        elif ext in [".yaml", ".yml"]:
+        elif ext in _YAML_EXTENSIONS:
             fmt = "yaml"
         else:
             click.echo(

diff --git a/cli/integrations/job_parser.py b/cli/integrations/job_parser.py
@@ -19,10 +19,15 @@
 import re
 from dataclasses import asdict, dataclass, field
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 from bs4 import BeautifulSoup, Tag
 
+# Pre-compile regex patterns for performance in parsing
+_INDEED_HEADER_PATTERN = re.compile(r"jobsearch-JobInfoHeader")
+_REQUIREMENTS_HEADING_PATTERN = re.compile(r"requirements|qualifications|skills", re.IGNORECASE)
+_RESPONSIBILITIES_HEADING_PATTERN = re.compile(r"responsibilities|duties|what you", re.IGNORECASE)
+
 # Optional import for URL fetching
 try:
     import requests
@@ -366,7 +371,7 @@ def _parse_indeed(self, html: str) -> JobDetails:
         # Extract position
         position = self._extract_by_selectors(soup, self.INDEED_SELECTORS["position"])
         if not position:
-            h1 = soup.find("h1", class_=re.compile(r"jobsearch-JobInfoHeader"))
+            h1 = soup.find("h1", class_=_INDEED_HEADER_PATTERN)  # type: ignore[call-overload]
             position = h1.get_text(strip=True) if h1 else ""
 
         # Extract location
@@ -447,10 +452,10 @@ def _parse_generic(self, html: str) -> JobDetails:
         salary = self._extract_salary_from_text(html)
 
         # Extract requirements section - look for heading tags first
-        requirements = []
+        requirements: List[str] = []
         req_heading = soup.find(
             ["h1", "h2", "h3", "h4", "h5", "h6"],
-            string=re.compile(r"requirements|qualifications|skills", re.IGNORECASE),
+            string=_REQUIREMENTS_HEADING_PATTERN,  # type: ignore[call-overload]
         )
         if req_heading:
             # Get the next sibling element(s) containing the list
@@ -462,10 +467,10 @@ def _parse_generic(self, html: str) -> JobDetails:
             requirements = self._extract_list_by_keyword(html, "requirements")
 
         # Extract responsibilities section
-        responsibilities = []
+        responsibilities: List[str] = []
         resp_heading = soup.find(
             ["h1", "h2", "h3", "h4", "h5", "h6"],
-            string=re.compile(r"responsibilities|duties|what you", re.IGNORECASE),
+            string=_RESPONSIBILITIES_HEADING_PATTERN,  # type: ignore[call-overload]
         )
         if resp_heading:
             next_elem = resp_heading.find_next_sibling(["ul", "ol", "div", "p"])
@@ -734,21 +739,23 @@ def _extract_list_items(self, element: Tag) -> List[str]:
 
         return [item for item in items if len(item) > 3][:15]
 
-    def _extract_list_by_keyword(self, html: str, keyword: str) -> List[str]:
+    def _extract_list_by_keyword(self, html: str, keyword: Union[str, re.Pattern]) -> List[str]:
         """
         Extract list items near a keyword.
 
         Args:
             html: HTML content
-            keyword: Keyword to search for
+            keyword: Keyword to search for (string or compiled regex)
 
         Returns:
             List of extracted items
         """
         soup = BeautifulSoup(html, "lxml")
 
+        pattern = keyword if isinstance(keyword, re.Pattern) else re.compile(keyword, re.IGNORECASE)
+
         # Find element containing the keyword
-        for elem in soup.find_all(string=re.compile(keyword, re.IGNORECASE)):
+        for elem in soup.find_all(string=pattern):  # type: ignore[call-overload]
             parent = elem.find_parent(["div", "section", "ul", "li"])
             if parent:
                 # Look for list items in parent or siblings