From c49001a9a0c9af34e95dc16821197e6e741685c3 Mon Sep 17 00:00:00 2001 From: ShaochenYu-YW <72957335+ShaochenYu-YW@users.noreply.github.com> Date: Tue, 14 Jan 2025 22:26:10 -0800 Subject: [PATCH] [Ray Data] Increase task size warning threshold This is get rid of flooding warning msg Test run: https://tcp.pinadmin.com/project/homefeed/ray/ekgv9nck Signed-off-by: ShaochenYu-YW <72957335+ShaochenYu-YW@users.noreply.github.com> --- python/ray/data/_internal/planner/plan_read_op.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/ray/data/_internal/planner/plan_read_op.py b/python/ray/data/_internal/planner/plan_read_op.py index e9d800045139..962035b18f1f 100644 --- a/python/ray/data/_internal/planner/plan_read_op.py +++ b/python/ray/data/_internal/planner/plan_read_op.py @@ -20,7 +20,8 @@ from ray.data.context import DataContext from ray.data.datasource.datasource import ReadTask -TASK_SIZE_WARN_THRESHOLD_BYTES = 100000 +# As one readTask can contain multiple paths, we increase to 1MB to prevent excessive log messages. +TASK_SIZE_WARN_THRESHOLD_BYTES = 1000000 # Transient errors that can occur during longer reads. Trigger retry when these occur. READ_FILE_RETRY_ON_ERRORS = ["AWS Error NETWORK_CONNECTION", "AWS Error ACCESS_DENIED"]