diff --git a/src/sentry/db/deletion.py b/src/sentry/db/deletion.py index 1543be9ea9b7f2..11624b18fd0262 100644 --- a/src/sentry/db/deletion.py +++ b/src/sentry/db/deletion.py @@ -79,7 +79,7 @@ def _continuous_query(self, query): cursor.execute(query) results = cursor.rowcount > 0 - def iterator(self, chunk_size=100, batch_size=10000) -> Generator[tuple[int, ...]]: + def iterator(self, chunk_size=100, batch_size=1000) -> Generator[tuple[int, ...]]: assert self.days is not None assert self.dtfield is not None diff --git a/src/sentry/runner/commands/cleanup.py b/src/sentry/runner/commands/cleanup.py index 4d71680566340e..a6a055a5f47276 100644 --- a/src/sentry/runner/commands/cleanup.py +++ b/src/sentry/runner/commands/cleanup.py @@ -584,6 +584,7 @@ def run_bulk_deletes_in_deletes( models_attempted: set[str], ) -> None: from sentry.db.deletion import BulkDeleteQuery + from sentry.models.eventattachment import EventAttachment debug_output("Running bulk deletes in DELETES") for model_tp, dtfield, order_by in deletes: @@ -603,7 +604,9 @@ def run_bulk_deletes_in_deletes( order_by=order_by, ) - for chunk in q.iterator(chunk_size=100): + # Use smaller batch size for EventAttachment to avoid query timeouts on massive tables + batch_size = 100 if model_tp == EventAttachment else 1000 + for chunk in q.iterator(chunk_size=100, batch_size=batch_size): task_queue.put((imp, chunk)) # Ensure all tasks are completed before exiting diff --git a/tests/sentry/db/test_deletion.py b/tests/sentry/db/test_deletion.py index f9d5b01e772f41..c43f4be2ca4ebd 100644 --- a/tests/sentry/db/test_deletion.py +++ b/tests/sentry/db/test_deletion.py @@ -86,3 +86,26 @@ def test_iteration_descending(self) -> None: results.update(chunk) assert results == expected_group_ids + + def test_iteration_with_custom_batch_size(self) -> None: + target_project = self.project + expected_group_ids = {self.create_group().id for i in range(2)} + + other_project = self.create_project() + self.create_group(other_project) + self.create_group(other_project) + + # Test with custom batch_size parameter + iterator = BulkDeleteQuery( + model=Group, + project_id=target_project.id, + dtfield="last_seen", + order_by="last_seen", + days=0, + ).iterator(chunk_size=1, batch_size=100) + + results: set[int] = set() + for chunk in iterator: + results.update(chunk) + + assert results == expected_group_ids