Skip to content

Commit 2742aa0

Browse files
authored
Builds: save task_executed_at to help debugging (#12500)
Save the datetime when the build was executed by a worker and log a warning if it's bigger than 10 minutes (we can adjust this number as we want). Note that 5 minutes is the default retry time. This is a POC to help debugging #12472 This data together with other logs like * "Delaying tasks due to concurrency limit." * "Retrying this task." could help us to understand what's going on here.
1 parent e2dd71d commit 2742aa0

File tree

5 files changed

+61
-3
lines changed

5 files changed

+61
-3
lines changed
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Generated by Django 5.2.6 on 2025-09-30 12:39
2+
3+
from django.db import migrations
4+
from django.db import models
5+
from django_safemigrate import Safe
6+
7+
8+
class Migration(migrations.Migration):
9+
safe = Safe.before_deploy()
10+
11+
dependencies = [
12+
("builds", "0064_healthcheck"),
13+
]
14+
15+
operations = [
16+
migrations.AddField(
17+
model_name="build",
18+
name="task_executed_at",
19+
field=models.DateTimeField(
20+
blank=True, null=True, verbose_name="Task executed at datetime"
21+
),
22+
),
23+
]

readthedocs/builds/models.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -714,6 +714,11 @@ class Build(models.Model):
714714
null=True,
715715
blank=True,
716716
)
717+
task_executed_at = models.DateTimeField(
718+
_("Task executed at datetime"),
719+
null=True,
720+
blank=True,
721+
)
717722

718723
notifications = GenericRelation(
719724
Notification,
@@ -779,7 +784,7 @@ def config(self):
779784
"""
780785
# TODO: now that we are using a proper JSONField here, we could
781786
# probably change this field to be a ForeignKey to avoid repeating the
782-
# config file over and over again and re-use them to save db data as
787+
# config file over and over again and reuse them to save db data as
783788
# well
784789
if self._config and self.CONFIG_KEY in self._config:
785790
return Build.objects.only("_config").get(pk=self._config[self.CONFIG_KEY])._config
@@ -977,7 +982,7 @@ class BuildCommandResultMixin:
977982
Mixin for common command result methods/properties.
978983
979984
Shared methods between the database model :py:class:`BuildCommandResult` and
980-
non-model respresentations of build command results from the API
985+
non-model representations of build command results from the API
981986
"""
982987

983988
@property

readthedocs/projects/tasks/builds.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
rebuilding documentation.
66
"""
77

8+
import datetime
89
import os
910
import shutil
1011
import signal
@@ -410,6 +411,19 @@ def before_start(self, task_id, args, kwargs):
410411
version_slug=self.data.version.slug,
411412
)
412413

414+
# Log a warning if the task took more than 10 minutes to be retried
415+
if self.data.build["task_executed_at"]:
416+
task_executed_at = datetime.datetime.fromisoformat(self.data.build["task_executed_at"])
417+
delta = timezone.now() - task_executed_at
418+
if delta > timezone.timedelta(minutes=10):
419+
log.warning(
420+
"This task waited more than 10 minutes to be retried.",
421+
delta_minutes=round(delta.seconds / 60, 1),
422+
)
423+
424+
# Save when the task was executed by a builder
425+
self.data.build["task_executed_at"] = timezone.now()
426+
413427
# Enable scale-in protection on this instance
414428
#
415429
# TODO: move this to the beginning of this method
@@ -727,7 +741,9 @@ def on_retry(self, exc, task_id, args, kwargs, einfo):
727741
message_id=BuildMaxConcurrencyError.LIMIT_REACHED,
728742
format_values=format_values,
729743
)
730-
self.update_build(state=BUILD_STATE_TRIGGERED)
744+
745+
# Always update the build on retry
746+
self.update_build(state=BUILD_STATE_TRIGGERED)
731747

732748
def after_return(self, status, retval, task_id, args, kwargs, einfo):
733749
"""

readthedocs/projects/tests/mockers.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ def _mock_api(self):
186186
"id": self.build.pk,
187187
"state": BUILD_STATE_TRIGGERED,
188188
"commit": self.build.commit,
189+
"task_executed_at": self.build.task_executed_at,
189190
},
190191
headers=headers,
191192
)

readthedocs/projects/tests/test_build_tasks.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,7 @@ def test_successful_build(
537537
"commit": "a1b2c3",
538538
"error": "",
539539
"builder": mock.ANY,
540+
"task_executed_at": mock.ANY,
540541
}
541542

542543
# Update build state: installing
@@ -545,6 +546,7 @@ def test_successful_build(
545546
"state": "installing",
546547
"commit": "a1b2c3",
547548
"builder": mock.ANY,
549+
"task_executed_at": mock.ANY,
548550
"readthedocs_yaml_path": None,
549551
"error": "",
550552
# We update the `config` field at the same time we send the
@@ -618,6 +620,7 @@ def test_successful_build(
618620
"readthedocs_yaml_path": None,
619621
"config": mock.ANY,
620622
"builder": mock.ANY,
623+
"task_executed_at": mock.ANY,
621624
"error": "",
622625
}
623626
# Update build state: uploading
@@ -628,6 +631,7 @@ def test_successful_build(
628631
"readthedocs_yaml_path": None,
629632
"config": mock.ANY,
630633
"builder": mock.ANY,
634+
"task_executed_at": mock.ANY,
631635
"error": "",
632636
}
633637
# Update version state
@@ -654,6 +658,7 @@ def test_successful_build(
654658
"readthedocs_yaml_path": None,
655659
"config": mock.ANY,
656660
"builder": mock.ANY,
661+
"task_executed_at": mock.ANY,
657662
"length": mock.ANY,
658663
"success": True,
659664
"error": "",
@@ -811,6 +816,7 @@ def test_successful_build_with_temporary_s3_credentials(
811816
"commit": "a1b2c3",
812817
"error": "",
813818
"builder": mock.ANY,
819+
"task_executed_at": mock.ANY,
814820
}
815821

816822
# Update build state: installing
@@ -819,6 +825,7 @@ def test_successful_build_with_temporary_s3_credentials(
819825
"state": "installing",
820826
"commit": "a1b2c3",
821827
"builder": mock.ANY,
828+
"task_executed_at": mock.ANY,
822829
"readthedocs_yaml_path": None,
823830
"error": "",
824831
# We update the `config` field at the same time we send the
@@ -900,6 +907,7 @@ def test_successful_build_with_temporary_s3_credentials(
900907
"readthedocs_yaml_path": None,
901908
"config": mock.ANY,
902909
"builder": mock.ANY,
910+
"task_executed_at": mock.ANY,
903911
"error": "",
904912
}
905913
# Update build state: uploading
@@ -910,6 +918,7 @@ def test_successful_build_with_temporary_s3_credentials(
910918
"readthedocs_yaml_path": None,
911919
"config": mock.ANY,
912920
"builder": mock.ANY,
921+
"task_executed_at": mock.ANY,
913922
"error": "",
914923
}
915924

@@ -944,6 +953,7 @@ def test_successful_build_with_temporary_s3_credentials(
944953
"readthedocs_yaml_path": None,
945954
"config": mock.ANY,
946955
"builder": mock.ANY,
956+
"task_executed_at": mock.ANY,
947957
"length": mock.ANY,
948958
"success": True,
949959
"error": "",
@@ -1031,6 +1041,7 @@ def test_failed_build(
10311041
assert build_status_request.path == "/api/v2/build/1/"
10321042
assert build_status_request.json() == {
10331043
"builder": mock.ANY,
1044+
"task_executed_at": mock.ANY,
10341045
"commit": self.build.commit,
10351046
"error": "", # We are not sending ``error`` anymore
10361047
"id": self.build.pk,
@@ -1085,6 +1096,7 @@ def test_cancelled_build(
10851096
assert build_status_request.path == "/api/v2/build/1/"
10861097
assert build_status_request.json() == {
10871098
"builder": mock.ANY,
1099+
"task_executed_at": mock.ANY,
10881100
"commit": self.build.commit,
10891101
"error": "", # We are not sending ``error`` anymore
10901102
"id": self.build.pk,
@@ -2968,6 +2980,7 @@ def test_config_file_exception(self, load_yaml_config):
29682980
"error": "", # We not sending "error" anymore
29692981
"success": False,
29702982
"builder": mock.ANY,
2983+
"task_executed_at": mock.ANY,
29712984
"length": 0,
29722985
}
29732986

0 commit comments

Comments
 (0)