diff --git a/python/ray/data/_internal/planner/plan_read_op.py b/python/ray/data/_internal/planner/plan_read_op.py index e9d800045139..962035b18f1f 100644 --- a/python/ray/data/_internal/planner/plan_read_op.py +++ b/python/ray/data/_internal/planner/plan_read_op.py @@ -20,7 +20,8 @@ from ray.data.context import DataContext from ray.data.datasource.datasource import ReadTask -TASK_SIZE_WARN_THRESHOLD_BYTES = 100000 +# As one readTask can contain multiple paths, we increase to 1MB to prevent excessive log messages. +TASK_SIZE_WARN_THRESHOLD_BYTES = 1000000 # Transient errors that can occur during longer reads. Trigger retry when these occur. READ_FILE_RETRY_ON_ERRORS = ["AWS Error NETWORK_CONNECTION", "AWS Error ACCESS_DENIED"]