@@ -92,6 +92,7 @@ def __init__(self, simvue_runner: "simvue.Run", keep_logs: bool = True) -> None:
92
92
self ._exit_codes = self ._manager .dict ()
93
93
self ._std_err = self ._manager .dict ()
94
94
self ._std_out = self ._manager .dict ()
95
+ self ._alert_ids : dict [str , str ] = {}
95
96
self ._command_str : typing .Dict [str , str ] = {}
96
97
self ._processes : typing .Dict [str , multiprocessing .Process ] = {}
97
98
@@ -221,6 +222,9 @@ def callback_function(status_code: int, std_out: str, std_err: str) -> None:
221
222
env ,
222
223
),
223
224
)
225
+ self ._alert_ids [identifier ] = self ._runner .create_alert (
226
+ name = f"{ identifier } _exit_status" , source = "user"
227
+ )
224
228
logger .debug (f"Executing process: { ' ' .join (_command )} " )
225
229
self ._processes [identifier ].start ()
226
230
@@ -239,6 +243,14 @@ def exit_status(self) -> int:
239
243
240
244
return 0
241
245
246
+ def get_error_summary (self ) -> dict [str , typing .Optional [str ]]:
247
+ """Returns the summary messages of all errors"""
248
+ return {
249
+ identifier : self ._get_error_status (identifier )
250
+ for identifier , value in self ._exit_codes .items ()
251
+ if value
252
+ }
253
+
242
254
def get_command (self , process_id : str ) -> str :
243
255
"""Returns the command executed within the given process.
244
256
@@ -256,7 +268,19 @@ def get_command(self, process_id: str) -> str:
256
268
raise KeyError (f"Failed to retrieve '{ process_id } ', no such process" )
257
269
return self ._command_str [process_id ]
258
270
259
- def _log_events (self ) -> None :
271
+ def _get_error_status (self , process_id : str ) -> typing .Optional [str ]:
272
+ err_msg : typing .Optional [str ] = None
273
+
274
+ # Return last 10 lines of stdout if stderr empty
275
+ if not (err_msg := self ._std_err [process_id ]) and (
276
+ std_out := self ._std_out [process_id ]
277
+ ):
278
+ err_msg = " Tail STDOUT:\n \n "
279
+ start_index = - 10 if len (lines := std_out .split ("\n " )) > 10 else 0
280
+ err_msg += "\n " .join (lines [start_index :])
281
+ return err_msg
282
+
283
+ def _update_alerts (self ) -> None :
260
284
"""Send log events for the result of each process"""
261
285
for proc_id , code in self ._exit_codes .items ():
262
286
if code != 0 :
@@ -265,11 +289,9 @@ def _log_events(self) -> None:
265
289
if self ._runner ._dispatcher :
266
290
self ._runner ._dispatcher .purge ()
267
291
268
- _err = self ._std_err [proc_id ]
269
- _msg = f"Process { proc_id } returned non-zero exit status { code } with:\n { _err } "
292
+ self ._runner .log_alert (self ._alert_ids [proc_id ], "critical" )
270
293
else :
271
- _msg = f"Process { proc_id } completed successfully."
272
- self ._runner .log_event (_msg )
294
+ self ._runner .log_alert (self ._alert_ids [proc_id ], "ok" )
273
295
274
296
# Wait for the dispatcher to send the latest information before
275
297
# allowing the executor to finish (and as such the run instance to exit)
@@ -321,7 +343,7 @@ def wait_for_completion(self) -> None:
321
343
for process in self ._processes .values ():
322
344
if process .is_alive ():
323
345
process .join ()
324
- self ._log_events ()
346
+ self ._update_alerts ()
325
347
self ._save_output ()
326
348
327
349
if not self .success :
0 commit comments