heidi-dang · heidi-dang · May 14, 2026 · gemini-code-assist · May 14, 2026
diff --git a/heidi_engine/telemetry.py b/heidi_engine/telemetry.py
@@ -473,27 +473,39 @@ def load_pricing_config() -> Dict[str, Dict[str, float]]:
         - Falls back to DEFAULT_PRICING
         - Allows user to customize pricing per model
 
+    BOLT OPTIMIZATION:
+        Uses a thread-safe module-level cache (5.0s TTL) to avoid
+        redundant disk I/O and JSON parsing on high-frequency calls.
+
     TUNABLE:
         - Create pricing.json to override default prices
         - Format: {"model_name": {"input": 0.5, "output": 1.5}}
         - Prices are per 1M tokens
     """
-    pricing = DEFAULT_PRICING.copy()
+    global _pricing_cache, _pricing_check_ts
+    with _pricing_lock:
+        now = time.monotonic()
+        if _pricing_cache and (now - _pricing_check_ts) < 5.0:
+            return _pricing_cache.copy()
 
-    # Check for pricing config file
-    pricing_file = (
-        Path(PRICING_CONFIG_PATH) if PRICING_CONFIG_PATH else get_run_dir() / "pricing.json"
-    )
+        pricing = DEFAULT_PRICING.copy()
 
-    if pricing_file.exists():
-        try:
-            with open(pricing_file) as f:
-                custom = json.load(f)
-                pricing.update(custom)
-        except Exception as e:
-            print(f"[WARN] Failed to load pricing config: {e}", file=sys.stderr)
+        # Check for pricing config file
+        pricing_file = (
+            Path(PRICING_CONFIG_PATH) if PRICING_CONFIG_PATH else get_run_dir() / "pricing.json"
+        )
+
+        if pricing_file.exists():
+            try:
+                with open(pricing_file) as f:
+                    custom = json.load(f)
+                    pricing.update(custom)
+            except Exception as e:
+                print(f"[WARN] Failed to load pricing config: {e}", file=sys.stderr)
 
-    return pricing
+        _pricing_cache = pricing
+        _pricing_check_ts = now
+        return pricing.copy()
 
 
 def estimate_cost(input_tokens: int, output_tokens: int, model: str) -> float:
@@ -732,11 +744,6 @@ def get_state(run_id: Optional[str] = None) -> Dict[str, Any]:
             "usage": get_default_usage(),
         }
 
-    # BOLT OPTIMIZATION: Check thread-safe state cache
-    cached = _state_cache.get(target_run_id, state_file)
-    if cached:
-        return cached
-
     try:
         with open(state_file) as f:
             state = json.load(f)
@@ -1372,6 +1379,10 @@ def stage_context(stage: str, round_num: int, message: str, **kwargs):
 _gpu_check_ts = 0.0
 _gpu_lock = threading.Lock()
 
+_pricing_cache: Dict[str, Dict[str, float]] = {}
+_pricing_check_ts = 0.0
+_pricing_lock = threading.Lock()
+
 _event_ts_cache: Dict[str, str] = {}
 _event_ts_check_ts: Dict[str, float] = {}  # run_id -> ts
 _event_lock = threading.Lock()