Skip to content

Commit 695b0d4

Browse files
committed
Mask in sorted order
1 parent 2b0d899 commit 695b0d4

File tree

1 file changed

+14
-5
lines changed

1 file changed

+14
-5
lines changed

src/guardrails/_base_client.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -309,19 +309,28 @@ def _mask_text(text: str) -> str:
309309
decoded_results = analyzer.analyze(decoded_text_for_masking, entities=entity_types, language="en")
310310

311311
if decoded_results:
312-
# Map detections back to mask encoded chunks
312+
# Build list of (candidate, entity_type) pairs to mask
313+
candidates_to_mask = []
314+
313315
for result in decoded_results:
314316
detected_value = decoded_text_for_masking[result.start : result.end]
315317
entity_type = result.entity_type
316318

317319
# Find candidate that contains this PII
318320
for candidate in candidates_for_masking:
319-
if detected_value in candidate.decoded_text:
320-
# Mask the encoded version
321-
entity_marker = f"<{entity_type}_ENCODED>"
322-
masked = masked[: candidate.start] + entity_marker + masked[candidate.end :]
321+
if candidate.decoded_text and detected_value.lower() in candidate.decoded_text.lower():
322+
candidates_to_mask.append((candidate, entity_type))
323323
break
324324

325+
# Sort by position (reverse) to mask from end to start
326+
# This preserves position validity for subsequent replacements
327+
candidates_to_mask.sort(key=lambda x: x[0].start, reverse=True)
328+
329+
# Mask from end to start
330+
for candidate, entity_type in candidates_to_mask:
331+
entity_marker = f"<{entity_type}_ENCODED>"
332+
masked = masked[: candidate.start] + entity_marker + masked[candidate.end :]
333+
325334
return masked
326335

327336
# Mask each text part

0 commit comments

Comments
 (0)