Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions heidi_engine/telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@
"""

import atexit
import copy
import base64
import copy
import json
import os
import re
Expand All @@ -70,7 +70,7 @@
from contextlib import contextmanager
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional, Set, Tuple
from typing import Any, Dict, List, Optional, Set

# =============================================================================
# CONFIGURATION - Adjust these for your needs
Expand Down Expand Up @@ -733,7 +733,7 @@ def get_state(run_id: Optional[str] = None) -> Dict[str, Any]:
}

# BOLT OPTIMIZATION: Check thread-safe state cache
cached = _state_cache.get(target_run_id, state_file)
cached = _state_cache.get(resolved_run_id)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

While this line correctly fixes the original bug, the entire block from line 735 to 738 is redundant. A cache check is already performed at the beginning of this function (lines 721-723). Since the cache is not modified between the two checks, this second check is unnecessary. Please consider removing the entire block (lines 735-738) to simplify the code.

if cached:
return cached

Expand Down
3 changes: 2 additions & 1 deletion scripts/02_validate_clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,8 +275,9 @@ def fuzzy_hash(sample: Dict[str, Any], n: int = 5) -> str:
- n=5 is a good balance for code data
"""
text = (sample.get("instruction", "") + sample.get("output", "")).lower()
# BOLT OPTIMIZATION: "".join(text.split()) is ~5x faster than re.sub for whitespace removal.
# Remove whitespace for more robust matching
text = re.sub(r"\s+", "", text)
text = "".join(text.split())

if len(text) < n:
return text
Expand Down