heidi-dang · heidi-dang · May 12, 2026 · gemini-code-assist · May 12, 2026
diff --git a/.jules/bolt.md b/.jules/bolt.md
@@ -1,3 +1,7 @@
 ## 2026-02-20 - [Optimized Telemetry Redaction and Sanitization]
 **Learning:** Sequential `re.sub` calls are faster than combined regex callbacks for small pattern sets, but the biggest performance win comes from early-exit fast-paths (e.g., checking for `\x1b` or secret keywords) and proper ordering of truncation vs. redaction for large strings.
 **Action:** Always implement fast-path guards for expensive string processing and ensure that heavy operations (like regex) are performed on the smallest possible data subset (e.g., after truncation).
+
+## 2026-05-12 - [Restored Telemetry Cache and Optimized Validation]
+**Learning:** Removing a buggy cache implementation is a performance regression; it should be fixed instead. Also, regex-based fast-paths for secret detection are safer than string-based keyword lists to maintain "fail-closed" security while improving speed.
+**Action:** Fix broken performance optimizations instead of removing them, and use robust fast-path patterns for security-sensitive string processing.
diff --git a/heidi_engine/telemetry.py b/heidi_engine/telemetry.py
@@ -733,7 +733,7 @@ def get_state(run_id: Optional[str] = None) -> Dict[str, Any]:
         }
 
     # BOLT OPTIMIZATION: Check thread-safe state cache
-    cached = _state_cache.get(target_run_id, state_file)
+    cached = _state_cache.get(resolved_run_id)
     if cached:
         return cached
 

diff --git a/scripts/02_validate_clean.py b/scripts/02_validate_clean.py
@@ -89,6 +89,9 @@
 # TUNABLE: Add/remove fields based on your data structure
 SECRET_CHECK_FIELDS = ["instruction", "input", "output", "response", "completion"]
 
+# BOLT OPTIMIZATION: Pre-compiled regex patterns for secret detection
+_COMPILED_SECRET_PATTERNS = [(re.compile(p), t) for p, t in SECRET_PATTERNS]
+
 
 def parse_args() -> argparse.Namespace:
     """
@@ -192,6 +195,10 @@ def detect_secrets(sample: Dict[str, Any]) -> Tuple[bool, List[str]]:
         - Checks all specified fields against secret patterns
         - FAIL CLOSED: Returns True (has secrets) if ANY pattern matches
 
+    BOLT OPTIMIZATION:
+        Uses pre-compiled regex patterns to improve performance
+        during dataset processing.
+
     TUNABLE:
         - Add more SECRET_PATTERNS for your use case
         - Adjust SECRET_CHECK_FIELDS to check more/less fields
@@ -208,8 +215,8 @@ def detect_secrets(sample: Dict[str, Any]) -> Tuple[bool, List[str]]:
 
         text = str(sample[field])
 
-        for pattern, secret_type in SECRET_PATTERNS:
-            if re.search(pattern, text):
+        for pattern, secret_type in _COMPILED_SECRET_PATTERNS:
+            if pattern.search(text):
                 found_secrets.append(f"{field}:{secret_type}")
 
     return len(found_secrets) > 0, found_secrets
@@ -275,8 +282,9 @@ def fuzzy_hash(sample: Dict[str, Any], n: int = 5) -> str:
         - n=5 is a good balance for code data
     """
     text = (sample.get("instruction", "") + sample.get("output", "")).lower()
-    # Remove whitespace for more robust matching
-    text = re.sub(r"\s+", "", text)
+    # BOLT OPTIMIZATION: "".join(text.split()) is faster than re.sub(r"\s+", "", text)
+    # for whitespace removal.
+    text = "".join(text.split())
 
     if len(text) < n:
         return text