Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/sentry/db/deletion.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def _continuous_query(self, query):
cursor.execute(query)
results = cursor.rowcount > 0

def iterator(self, chunk_size=100, batch_size=10000) -> Generator[tuple[int, ...]]:
def iterator(self, chunk_size=100, batch_size=1000) -> Generator[tuple[int, ...]]:
assert self.days is not None
assert self.dtfield is not None

Expand Down
5 changes: 4 additions & 1 deletion src/sentry/runner/commands/cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,7 @@ def run_bulk_deletes_in_deletes(
models_attempted: set[str],
) -> None:
from sentry.db.deletion import BulkDeleteQuery
from sentry.models.eventattachment import EventAttachment

debug_output("Running bulk deletes in DELETES")
for model_tp, dtfield, order_by in deletes:
Expand All @@ -603,7 +604,9 @@ def run_bulk_deletes_in_deletes(
order_by=order_by,
)

for chunk in q.iterator(chunk_size=100):
# Use smaller batch size for EventAttachment to avoid query timeouts on massive tables
batch_size = 100 if model_tp == EventAttachment else 1000
for chunk in q.iterator(chunk_size=100, batch_size=batch_size):
task_queue.put((imp, chunk))

# Ensure all tasks are completed before exiting
Expand Down
23 changes: 23 additions & 0 deletions tests/sentry/db/test_deletion.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,26 @@ def test_iteration_descending(self) -> None:
results.update(chunk)

assert results == expected_group_ids

def test_iteration_with_custom_batch_size(self) -> None:
target_project = self.project
expected_group_ids = {self.create_group().id for i in range(2)}

other_project = self.create_project()
self.create_group(other_project)
self.create_group(other_project)

# Test with custom batch_size parameter
iterator = BulkDeleteQuery(
model=Group,
project_id=target_project.id,
dtfield="last_seen",
order_by="last_seen",
days=0,
).iterator(chunk_size=1, batch_size=100)

results: set[int] = set()
for chunk in iterator:
results.update(chunk)

assert results == expected_group_ids
Loading