Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 10 additions & 14 deletions src/sentry/deletions/defaults/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,20 +258,16 @@ def delete_group_hashes(
logger.warning("Error scheduling task to delete hashes from seer")
finally:
hash_ids = [gh[0] for gh in hashes_chunk]
# If we delete the grouphash metadata rows first we will not need to update the references to the other grouphashes.
# If we try to delete the group hashes first, then it will require the updating of the columns first.
#
# To understand this, let's say we have the following relationships:
# gh A -> ghm A -> no reference to another grouphash
# gh B -> ghm B -> gh C
# gh C -> ghm C -> gh A
#
# Deleting group hashes A, B & C (since they all point to the same group) will require:
# * Updating columns ghmB & ghmC to point to None
# * Deleting the group hash metadata rows
# * Deleting the group hashes
#
# If we delete the metadata first, we will not need to update the columns before deleting them.
# GroupHashMetadata rows can reference GroupHash rows via seer_matched_grouphash_id.
# Before deleting these GroupHash rows, we need to either:
# 1. Update seer_matched_grouphash to None first (to avoid foreign key constraint errors), OR
# 2. Delete the GroupHashMetadata rows entirely (they'll be deleted anyway)
# If we update the columns first, the deletion of the grouphash metadata rows will have less work to do,
# thus, improving the performance of the deletion.
if options.get("deletions.group-hashes-metadata.update-seer-matched-grouphash-ids"):
GroupHashMetadata.objects.filter(seer_matched_grouphash_id__in=hash_ids).update(
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is what we should have done from the beginning (#101720).

seer_matched_grouphash=None
)
GroupHashMetadata.objects.filter(grouphash_id__in=hash_ids).delete()
GroupHash.objects.filter(id__in=hash_ids).delete()

Expand Down
6 changes: 6 additions & 0 deletions src/sentry/options/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,12 @@
type=Int,
flags=FLAG_AUTOMATOR_MODIFIABLE,
)
register(
"deletions.group-hashes-metadata.update-seer-matched-grouphash-ids",
default=False,
type=Bool,
flags=FLAG_AUTOMATOR_MODIFIABLE,
)

register(
"deletions.group-history.use-bulk-deletion",
Expand Down
Loading