11from __future__ import annotations
22
3+ import contextlib
34import json
45import logging
56from concurrent .futures import ThreadPoolExecutor
@@ -250,9 +251,12 @@ def wrapper(event: Any, context: LambdaContext) -> MutableMapping[str, Any]:
250251 )
251252
252253 # Use ThreadPoolExecutor for concurrent execution of user code and background checkpoint processing
253- with ThreadPoolExecutor (
254- max_workers = 2 , thread_name_prefix = "dex-handler"
255- ) as executor :
254+ with (
255+ ThreadPoolExecutor (
256+ max_workers = 2 , thread_name_prefix = "dex-handler"
257+ ) as executor ,
258+ contextlib .closing (execution_state ) as execution_state ,
259+ ):
256260 # Thread 1: Run background checkpoint processing
257261 executor .submit (execution_state .checkpoint_batches_forever )
258262
@@ -296,18 +300,12 @@ def wrapper(event: Any, context: LambdaContext) -> MutableMapping[str, Any]:
296300 # Must ensure the result is persisted before returning to Lambda.
297301 # Large results exceed Lambda response limits and must be stored durably
298302 # before the execution completes.
299- execution_state .create_checkpoint_sync (success_operation )
300-
301- # Stop background checkpointing thread
302- execution_state .stop_checkpointing ()
303+ execution_state .create_checkpoint (success_operation , is_sync = True )
303304
304305 return DurableExecutionInvocationOutput .create_succeeded (
305306 result = ""
306307 ).to_dict ()
307308
308- # Stop background checkpointing thread
309- execution_state .stop_checkpointing ()
310-
311309 return DurableExecutionInvocationOutput .create_succeeded (
312310 result = serialized_result
313311 ).to_dict ()
@@ -322,33 +320,28 @@ def wrapper(event: Any, context: LambdaContext) -> MutableMapping[str, Any]:
322320 )
323321 else :
324322 logger .exception ("Checkpoint processing failed" )
325- execution_state .stop_checkpointing ()
326323 # Raise the original exception
327324 raise bg_error .source_exception from bg_error
328325
329326 except SuspendExecution :
330327 # User code suspended - stop background checkpointing thread
331328 logger .debug ("Suspending execution..." )
332- execution_state .stop_checkpointing ()
333329 return DurableExecutionInvocationOutput (
334330 status = InvocationStatus .PENDING
335331 ).to_dict ()
336332
337333 except CheckpointError as e :
338334 # Checkpoint system is broken - stop background thread and exit immediately
339- execution_state .stop_checkpointing ()
340335 logger .exception (
341336 "Checkpoint system failed" ,
342337 extra = e .build_logger_extras (),
343338 )
344339 raise # Terminate Lambda immediately
345340 except InvocationError :
346- execution_state .stop_checkpointing ()
347341 logger .exception ("Invocation error. Must terminate." )
348342 # Throw the error to trigger Lambda retry
349343 raise
350344 except ExecutionError as e :
351- execution_state .stop_checkpointing ()
352345 logger .exception ("Execution error. Must terminate without retry." )
353346 return DurableExecutionInvocationOutput (
354347 status = InvocationStatus .FAILED ,
@@ -357,15 +350,36 @@ def wrapper(event: Any, context: LambdaContext) -> MutableMapping[str, Any]:
357350 except Exception as e :
358351 # all user-space errors go here
359352 logger .exception ("Execution failed" )
360- failed_operation = OperationUpdate .create_execution_fail (
361- error = ErrorObject .from_exception (e )
362- )
363- # TODO: can optimize, if not too large can just return response rather than checkpoint
364- execution_state .create_checkpoint_sync (failed_operation )
365353
366- execution_state .stop_checkpointing ()
367- return DurableExecutionInvocationOutput (
368- status = InvocationStatus .FAILED
354+ result = DurableExecutionInvocationOutput (
355+ status = InvocationStatus .FAILED , error = ErrorObject .from_exception (e )
369356 ).to_dict ()
370357
358+ serialized_result = json .dumps (result )
359+
360+ if (
361+ serialized_result
362+ and len (serialized_result ) > LAMBDA_RESPONSE_SIZE_LIMIT
363+ ):
364+ logger .debug (
365+ "Response size (%s bytes) exceeds Lambda limit (%s) bytes). Checkpointing result." ,
366+ len (serialized_result ),
367+ LAMBDA_RESPONSE_SIZE_LIMIT ,
368+ )
369+ failed_operation = OperationUpdate .create_execution_fail (
370+ error = ErrorObject .from_exception (e )
371+ )
372+
373+ # Checkpoint large result with blocking (is_sync=True, default).
374+ # Must ensure the result is persisted before returning to Lambda.
375+ # Large results exceed Lambda response limits and must be stored durably
376+ # before the execution completes.
377+ execution_state .create_checkpoint_sync (failed_operation )
378+
379+ return DurableExecutionInvocationOutput (
380+ status = InvocationStatus .FAILED
381+ ).to_dict ()
382+
383+ return result
384+
371385 return wrapper
0 commit comments