Skip to content

Commit fc5e7ef

Browse files
armenzgAhmed-Labs
authored andcommitted
fix(deletions): Update GroupHashMetadata in batches (#102864)
This is a follow-up to #102612. Fixes [SENTRY-5C13](https://sentry.sentry.io/issues/7001709353/). For posterity ``` OperationalError canceling statement due to user request SQL: UPDATE "sentry_grouphashmetadata" SET "seer_matched_grouphash_id" = NULL WHERE "sentry_grouphashmetadata"."seer_matched_grouphash_id" IN (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ``` [Link](https://github.com/getsentry/sentry/blob/cdfbaa2cebe0f119104586cbe11da667072d5637/src/sentry/deletions/defaults/group.py#L267-L272) to code: ```python if options.get("deletions.group-hashes-metadata.update-seer-matched-grouphash-ids"): # This is the line where the error comes from GroupHashMetadata.objects.filter( seer_matched_grouphash_id__in=hash_ids ).update(seer_matched_grouphash=None) GroupHashMetadata.objects.filter(grouphash_id__in=hash_ids).delete() GroupHash.objects.filter(id__in=hash_ids).delete() ```
1 parent 9b45ee3 commit fc5e7ef

File tree

2 files changed

+51
-3
lines changed

2 files changed

+51
-3
lines changed

src/sentry/deletions/defaults/group.py

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,50 @@ def delete_project_group_hashes(project_id: int) -> None:
240240
delete_group_hashes(project_id, issue_platform_group_ids)
241241

242242

243+
def update_group_hash_metadata_in_batches(hash_ids: Sequence[int]) -> int:
244+
"""
245+
Update seer_matched_grouphash to None for GroupHashMetadata rows
246+
that reference the given hash_ids, in batches to avoid timeouts.
247+
248+
This function performs the update in smaller batches to reduce lock
249+
contention and prevent statement timeouts when many rows need updating.
250+
251+
Returns the total number of rows updated.
252+
"""
253+
# First, get all the IDs that need updating
254+
metadata_ids = list(
255+
GroupHashMetadata.objects.filter(seer_matched_grouphash_id__in=hash_ids).values_list(
256+
"id", flat=True
257+
)
258+
)
259+
260+
if not metadata_ids:
261+
return 0
262+
263+
option_batch_size = options.get("deletions.group-hash-metadata.batch-size", 1000)
264+
batch_size = max(1, option_batch_size)
265+
total_updated = 0
266+
for i in range(0, len(metadata_ids), batch_size):
267+
batch = metadata_ids[i : i + batch_size]
268+
updated = GroupHashMetadata.objects.filter(id__in=batch).update(seer_matched_grouphash=None)
269+
total_updated += updated
270+
271+
metrics.incr(
272+
"deletions.group_hash_metadata.rows_updated",
273+
amount=total_updated,
274+
sample_rate=1.0,
275+
)
276+
logger.info(
277+
"update_group_hash_metadata_in_batches.complete",
278+
extra={
279+
"hash_ids_count": len(hash_ids),
280+
"total_updated": total_updated,
281+
},
282+
)
283+
284+
return total_updated
285+
286+
243287
def delete_group_hashes(
244288
project_id: int,
245289
group_ids: Sequence[int],
@@ -276,9 +320,7 @@ def delete_group_hashes(
276320
# If we update the columns first, the deletion of the grouphash metadata rows will have less work to do,
277321
# thus, improving the performance of the deletion.
278322
if options.get("deletions.group-hashes-metadata.update-seer-matched-grouphash-ids"):
279-
GroupHashMetadata.objects.filter(seer_matched_grouphash_id__in=hash_ids).update(
280-
seer_matched_grouphash=None
281-
)
323+
update_group_hash_metadata_in_batches(hash_ids)
282324
GroupHashMetadata.objects.filter(grouphash_id__in=hash_ids).delete()
283325
GroupHash.objects.filter(id__in=hash_ids).delete()
284326

src/sentry/options/defaults.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,12 @@
342342
type=Int,
343343
flags=FLAG_AUTOMATOR_MODIFIABLE,
344344
)
345+
register(
346+
"deletions.group-hash-metadata.batch-size",
347+
default=1000,
348+
type=Int,
349+
flags=FLAG_AUTOMATOR_MODIFIABLE,
350+
)
345351
register(
346352
"deletions.group-hashes-metadata.update-seer-matched-grouphash-ids",
347353
default=False,

0 commit comments

Comments
 (0)