Skip to content

Commit 21ae236

Browse files
committed
fix(deletions): Prevent timeouts when deleting GroupHash records
The issues comes from this block: https://github.com/getsentry/sentry/blob/a3a771719d4777bd747d98fb05eb77c20425e3d6/src/sentry/deletions/defaults/group.py#L248-L259 The update is triggered because of this `on_delete`: https://github.com/getsentry/sentry/blob/b1f684a335128dbc74ad3a7fac1d7052df9e8f01/src/sentry/models/grouphashmetadata.py#L116-L118 Currently, when we try to delete all the group hashes, we update the related group hash metadata first. This query ends up failing for taking longer than 30 seconds: > SQL: UPDATE "sentry_grouphashmetadata" SET "seer_matched_grouphash_id" = NULL WHERE "sentry_grouphashmetadata"."seer_matched_grouphash_id" IN (%s, ..., %s) This can be resolved by deleting the group hash _metadata_ rows before trying to delete the group hash rows. This will avoid the update statement altogether. This fix was initially started in #101545, however, the solution has completely changed, thus, starting a new PR. Fixes [SENTRY-5ABJ](https://sentry.io/organizations/sentry/issues/6930113529/).
1 parent b1f684a commit 21ae236

File tree

5 files changed

+108
-4
lines changed

5 files changed

+108
-4
lines changed

src/sentry/deletions/defaults/group.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,13 @@
1111
from sentry.issues.grouptype import GroupCategory, InvalidGroupTypeError
1212
from sentry.models.group import Group, GroupStatus
1313
from sentry.models.grouphash import GroupHash
14+
from sentry.models.grouphashmetadata import GroupHashMetadata
1415
from sentry.models.rulefirehistory import RuleFireHistory
1516
from sentry.notifications.models.notificationmessage import NotificationMessage
1617
from sentry.services.eventstore.models import Event
1718
from sentry.snuba.dataset import Dataset
1819
from sentry.tasks.delete_seer_grouping_records import may_schedule_task_to_delete_hashes_from_seer
20+
from sentry.utils import metrics
1921
from sentry.utils.query import RangeQuerySetWrapper
2022

2123
from ..base import BaseDeletionTask, BaseRelation, ModelDeletionTask, ModelRelation
@@ -27,6 +29,7 @@
2729
EVENT_CHUNK_SIZE = 10000
2830
GROUP_HASH_ITERATIONS = 10000
2931

32+
3033
# Group models that relate only to groups and not to events. We assume those to
3134
# be safe to delete/mutate within a single transaction for user-triggered
3235
# actions (delete/reprocess/merge/unmerge)
@@ -256,10 +259,38 @@ def delete_group_hashes(
256259
logger.warning("Error scheduling task to delete hashes from seer")
257260
finally:
258261
hash_ids = [gh[0] for gh in hashes_chunk]
262+
if options.get("deletions.group.delete_group_hashes_metadata_first"):
263+
# If we delete the grouphash metadata rows first we will not need to update the references to the other grouphashes.
264+
# If we try to delete the group hashes first, then it will require the updating of the columns first.
265+
#
266+
# To understand this, let's say we have the following relationships:
267+
# gh A -> ghm A -> no reference to another grouphash
268+
# gh B -> ghm B -> gh C
269+
# gh C -> ghm C -> gh A
270+
#
271+
# Deleting group hashes A, B & C (since they all point to the same group) will require:
272+
# * Updating columns ghmB & ghmC to point to None
273+
# * Deleting the group hash metadata rows
274+
# * Deleting the group hashes
275+
#
276+
# If we delete the metadata first, we will not need to update the columns before deleting them.
277+
try:
278+
GroupHashMetadata.objects.filter(grouphash_id__in=hash_ids).delete()
279+
except Exception:
280+
# XXX: Let's make sure that no issues are caused by this and then remove it
281+
logger.exception("Error deleting group hash metadata")
282+
259283
GroupHash.objects.filter(id__in=hash_ids).delete()
260284

261285
iterations += 1
262286

287+
if iterations == GROUP_HASH_ITERATIONS:
288+
metrics.incr("deletions.group_hashes.max_iterations_reached", sample_rate=1.0)
289+
logger.warning(
290+
"Group hashes batch deletion reached the maximum number of iterations. "
291+
"Investigate if we need to change the GROUP_HASH_ITERATIONS value."
292+
)
293+
263294

264295
def separate_by_group_category(instance_list: Sequence[Group]) -> tuple[list[Group], list[Group]]:
265296
error_groups = []

src/sentry/models/grouphash.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,8 @@ def metadata(self) -> GroupHashMetadata | None:
5353
except AttributeError:
5454
return None
5555

56-
__repr__ = sane_repr("group_id", "hash")
56+
__repr__ = sane_repr("group_id", "hash", "metadata")
57+
__str__ = __repr__
5758

5859
def get_associated_fingerprint(self) -> list[str] | None:
5960
"""

src/sentry/models/grouphashmetadata.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,8 @@ def group_id(self) -> int | None:
131131
def hash(self) -> str:
132132
return self.grouphash.hash
133133

134-
__repr__ = sane_repr("grouphash_id", "group_id", "hash")
134+
__repr__ = sane_repr("grouphash_id", "group_id", "hash", "seer_matched_grouphash")
135+
__str__ = __repr__
135136

136137
def get_best_guess_schema_version(self) -> str:
137138
"""

src/sentry/options/defaults.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,14 @@
338338
# Deletions
339339
register(
340340
"deletions.group-hashes-batch-size",
341-
default=10000,
341+
default=100,
342+
type=Int,
343+
flags=FLAG_AUTOMATOR_MODIFIABLE,
344+
)
345+
register(
346+
"deletions.group.delete_group_hashes_metadata_first",
347+
default=False,
348+
type=Bool,
342349
flags=FLAG_AUTOMATOR_MODIFIABLE,
343350
)
344351

tests/sentry/deletions/test_group.py

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from snuba_sdk import Column, Condition, Entity, Function, Op, Query, Request
99

10-
from sentry import nodestore
10+
from sentry import deletions, nodestore
1111
from sentry.deletions.defaults.group import ErrorEventsDeletionTask
1212
from sentry.deletions.tasks.groups import delete_groups_for_project
1313
from sentry.issues.grouptype import FeedbackGroup, GroupCategory
@@ -16,6 +16,7 @@
1616
from sentry.models.group import Group
1717
from sentry.models.groupassignee import GroupAssignee
1818
from sentry.models.grouphash import GroupHash
19+
from sentry.models.grouphashmetadata import GroupHashMetadata
1920
from sentry.models.grouphistory import GroupHistory, GroupHistoryStatus
2021
from sentry.models.groupmeta import GroupMeta
2122
from sentry.models.groupredirect import GroupRedirect
@@ -254,6 +255,69 @@ def test_invalid_group_type_handling(
254255
"args": [self.project.id, error_group_hashes, 0]
255256
}
256257

258+
def test_delete_grouphash_metadata_first(self) -> None:
259+
"""
260+
Test that the group hash metadata is deleted first when the group hash is deleted.
261+
"""
262+
# This enables checking Seer for similarity and to mock the call to return a specific grouphash
263+
self.project.update_option("sentry:similarity_backfill_completed", int(time()))
264+
265+
# Create two events/grouphashes and one of them
266+
# Event A will be deleted
267+
event_a = self.store_event(
268+
data={
269+
"platform": "python",
270+
"stacktrace": {"frames": [{"filename": "error_a.py"}]},
271+
},
272+
project_id=self.project.id,
273+
)
274+
grouphash_a = GroupHash.objects.get(group_id=event_a.group_id)
275+
# assert grouphash_a.metadata is not None
276+
assert grouphash_a.metadata.seer_matched_grouphash is None
277+
metadata_a_id = grouphash_a.metadata.id
278+
279+
with mock.patch(
280+
"sentry.grouping.ingest.seer.get_seer_similar_issues"
281+
) as mock_get_seer_similar_issues:
282+
# This will allow grouphash_b to be matched to grouphash_a by Seer
283+
mock_get_seer_similar_issues.return_value = (0.01, grouphash_a)
284+
285+
# Event B will be kept - different exception to ensure different group hash to grouphash_a
286+
event_b = self.store_event(
287+
data={
288+
"platform": "python",
289+
"stacktrace": {"frames": [{"filename": "error_b.py"}]},
290+
},
291+
project_id=self.project.id,
292+
)
293+
grouphash_b = GroupHash.objects.get(hash=event_b.get_primary_hash())
294+
# assert grouphash_b.metadata is not None
295+
assert grouphash_b.metadata is not None
296+
metadata_b_id = grouphash_b.metadata.id
297+
298+
# Verify that seer matched event_b to event_a's hash
299+
assert event_a.group_id == event_b.group_id
300+
# Make sure it has not changed
301+
assert grouphash_a.metadata is not None
302+
assert grouphash_a.metadata.seer_matched_grouphash is None
303+
assert grouphash_b.metadata is not None
304+
assert grouphash_b.metadata.seer_matched_grouphash == grouphash_a
305+
306+
with (
307+
self.tasks(),
308+
self.options({"deletions.group.delete_group_hashes_metadata_first": True}),
309+
):
310+
# It will delete all groups, group hashes and group hash metadata
311+
task = deletions.get(model=Group, query={"id__in": [event_a.group_id]})
312+
more = task.chunk()
313+
assert not more
314+
315+
assert not Group.objects.filter(id=event_a.group_id).exists()
316+
assert not GroupHash.objects.filter(id=grouphash_a.id).exists()
317+
assert not GroupHashMetadata.objects.filter(id=metadata_a_id).exists()
318+
assert not GroupHash.objects.filter(id=grouphash_b.id).exists()
319+
assert not GroupHashMetadata.objects.filter(id=metadata_b_id).exists()
320+
257321

258322
class DeleteIssuePlatformTest(TestCase, SnubaTestCase, OccurrenceTestMixin):
259323
referrer = Referrer.TESTING_TEST.value

0 commit comments

Comments
 (0)