@@ -309,19 +309,28 @@ def _mask_text(text: str) -> str:
309309 decoded_results = analyzer .analyze (decoded_text_for_masking , entities = entity_types , language = "en" )
310310
311311 if decoded_results :
312- # Map detections back to mask encoded chunks
312+ # Build list of (candidate, entity_type) pairs to mask
313+ candidates_to_mask = []
314+
313315 for result in decoded_results :
314316 detected_value = decoded_text_for_masking [result .start : result .end ]
315317 entity_type = result .entity_type
316318
317319 # Find candidate that contains this PII
318320 for candidate in candidates_for_masking :
319- if detected_value in candidate .decoded_text :
320- # Mask the encoded version
321- entity_marker = f"<{ entity_type } _ENCODED>"
322- masked = masked [: candidate .start ] + entity_marker + masked [candidate .end :]
321+ if candidate .decoded_text and detected_value .lower () in candidate .decoded_text .lower ():
322+ candidates_to_mask .append ((candidate , entity_type ))
323323 break
324324
325+ # Sort by position (reverse) to mask from end to start
326+ # This preserves position validity for subsequent replacements
327+ candidates_to_mask .sort (key = lambda x : x [0 ].start , reverse = True )
328+
329+ # Mask from end to start
330+ for candidate , entity_type in candidates_to_mask :
331+ entity_marker = f"<{ entity_type } _ENCODED>"
332+ masked = masked [: candidate .start ] + entity_marker + masked [candidate .end :]
333+
325334 return masked
326335
327336 # Mask each text part
0 commit comments