Skip to content

Commit 26b6e78

Browse files
committed
fix(deletions): Update GroupHashMetadata in batches
This is a follow-up to #102612. Fixes [SENTRY-5C13](https://sentry.sentry.io/issues/7001709353/).
1 parent dedff5b commit 26b6e78

File tree

2 files changed

+52
-3
lines changed

2 files changed

+52
-3
lines changed

src/sentry/deletions/defaults/group.py

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,51 @@ def delete_project_group_hashes(project_id: int) -> None:
240240
delete_group_hashes(project_id, issue_platform_group_ids)
241241

242242

243+
def update_group_hash_metadata_in_batches(hash_ids: Sequence[int]) -> int:
244+
"""
245+
Update seer_matched_grouphash to None for GroupHashMetadata rows
246+
that reference the given hash_ids, in batches to avoid timeouts.
247+
248+
This function performs the update in smaller batches to reduce lock
249+
contention and prevent statement timeouts when many rows need updating.
250+
251+
Returns the total number of rows updated.
252+
"""
253+
# First, get all the IDs that need updating
254+
metadata_ids = list(
255+
GroupHashMetadata.objects.filter(seer_matched_grouphash_id__in=hash_ids).values_list(
256+
"id", flat=True
257+
)
258+
)
259+
260+
if not metadata_ids:
261+
return 0
262+
263+
batch_size = options.get(
264+
"deletions.group-hashes-metadata.update-seer-matched-grouphash-ids-batch-size", 1000
265+
)
266+
total_updated = 0
267+
for i in range(0, len(metadata_ids), batch_size):
268+
batch = metadata_ids[i : i + batch_size]
269+
updated = GroupHashMetadata.objects.filter(id__in=batch).update(seer_matched_grouphash=None)
270+
total_updated += updated
271+
272+
metrics.incr(
273+
"deletions.group_hash_metadata.rows_updated",
274+
amount=total_updated,
275+
sample_rate=1.0,
276+
)
277+
logger.info(
278+
"update_group_hash_metadata_in_batches.complete",
279+
extra={
280+
"hash_ids_count": len(hash_ids),
281+
"total_updated": total_updated,
282+
},
283+
)
284+
285+
return total_updated
286+
287+
243288
def delete_group_hashes(
244289
project_id: int,
245290
group_ids: Sequence[int],
@@ -276,9 +321,7 @@ def delete_group_hashes(
276321
# If we update the columns first, the deletion of the grouphash metadata rows will have less work to do,
277322
# thus, improving the performance of the deletion.
278323
if options.get("deletions.group-hashes-metadata.update-seer-matched-grouphash-ids"):
279-
GroupHashMetadata.objects.filter(seer_matched_grouphash_id__in=hash_ids).update(
280-
seer_matched_grouphash=None
281-
)
324+
update_group_hash_metadata_in_batches(hash_ids)
282325
GroupHashMetadata.objects.filter(grouphash_id__in=hash_ids).delete()
283326
GroupHash.objects.filter(id__in=hash_ids).delete()
284327

src/sentry/options/defaults.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,12 @@
342342
type=Int,
343343
flags=FLAG_AUTOMATOR_MODIFIABLE,
344344
)
345+
register(
346+
"deletions.group-hashes-metadata.update-seer-matched-grouphash-ids-batch-size",
347+
default=1000,
348+
type=Int,
349+
flags=FLAG_AUTOMATOR_MODIFIABLE,
350+
)
345351
register(
346352
"deletions.group-hashes-metadata.update-seer-matched-grouphash-ids",
347353
default=False,

0 commit comments

Comments
 (0)