15
15
import sys
16
16
import multiprocessing
17
17
import os
18
+ import psutil
18
19
import subprocess
19
20
import pathlib
20
21
import time
26
27
logger = logging .getLogger (__name__ )
27
28
28
29
30
+ class CompletionCallback (typing .Protocol ):
31
+ def __call__ (self , * , status_code : int , std_out : str , std_err : str ) -> None : ...
32
+
33
+
29
34
def _execute_process (
30
35
proc_id : str ,
31
36
command : typing .List [str ],
32
37
runner_name : str ,
33
- exit_status_dict : typing .Dict [str , int ],
34
- std_err : typing .Dict [str , str ],
35
- std_out : typing .Dict [str , str ],
36
- run_on_exit : typing .Optional [typing .Callable [[int , int , str ], None ]],
37
- trigger : typing .Optional [multiprocessing .synchronize .Event ],
38
38
environment : typing .Optional [typing .Dict [str , str ]],
39
- ) -> None :
39
+ ) -> subprocess . Popen :
40
40
with open (f"{ runner_name } _{ proc_id } .err" , "w" ) as err :
41
41
with open (f"{ runner_name } _{ proc_id } .out" , "w" ) as out :
42
42
_result = subprocess .Popen (
@@ -47,24 +47,7 @@ def _execute_process(
47
47
env = environment ,
48
48
)
49
49
50
- _status_code = _result .wait ()
51
- with open (f"{ runner_name } _{ proc_id } .err" ) as err :
52
- std_err [proc_id ] = err .read ()
53
-
54
- with open (f"{ runner_name } _{ proc_id } .out" ) as out :
55
- std_out [proc_id ] = out .read ()
56
-
57
- exit_status_dict [proc_id ] = _status_code
58
-
59
- if run_on_exit :
60
- run_on_exit (
61
- status_code = exit_status_dict [proc_id ],
62
- std_out = std_out [proc_id ],
63
- std_err = std_err [proc_id ],
64
- )
65
-
66
- if trigger :
67
- trigger .set ()
50
+ return _result
68
51
69
52
70
53
class Executor :
@@ -88,13 +71,16 @@ def __init__(self, simvue_runner: "simvue.Run", keep_logs: bool = True) -> None:
88
71
"""
89
72
self ._runner = simvue_runner
90
73
self ._keep_logs = keep_logs
91
- self ._manager = multiprocessing .Manager ()
92
- self ._exit_codes = self ._manager .dict ()
93
- self ._std_err = self ._manager .dict ()
94
- self ._std_out = self ._manager .dict ()
74
+ self ._completion_callbacks : dict [str , typing .Optional [CompletionCallback ]] = {}
75
+ self ._completion_triggers : dict [
76
+ str , typing .Optional [multiprocessing .synchronize .Event ]
77
+ ] = {}
78
+ self ._exit_codes : dict [str , int ] = {}
79
+ self ._std_err : dict [str , str ] = {}
80
+ self ._std_out : dict [str , str ] = {}
95
81
self ._alert_ids : dict [str , str ] = {}
96
- self ._command_str : typing . Dict [str , str ] = {}
97
- self ._processes : typing . Dict [str , multiprocessing . Process ] = {}
82
+ self ._command_str : dict [str , str ] = {}
83
+ self ._processes : dict [str , subprocess . Popen ] = {}
98
84
99
85
def add_process (
100
86
self ,
@@ -104,9 +90,7 @@ def add_process(
104
90
script : typing .Optional [pathlib .Path ] = None ,
105
91
input_file : typing .Optional [pathlib .Path ] = None ,
106
92
env : typing .Optional [typing .Dict [str , str ]] = None ,
107
- completion_callback : typing .Optional [
108
- typing .Callable [[int , str , str ], None ]
109
- ] = None ,
93
+ completion_callback : typing .Optional [CompletionCallback ] = None ,
110
94
completion_trigger : typing .Optional [multiprocessing .synchronize .Event ] = None ,
111
95
** kwargs ,
112
96
) -> None :
@@ -161,6 +145,9 @@ def callback_function(status_code: int, std_out: str, std_err: str) -> None:
161
145
"""
162
146
_pos_args = list (args )
163
147
148
+ if not self ._runner .name :
149
+ raise RuntimeError ("Cannot add process, expected Run instance to have name" )
150
+
164
151
if sys .platform == "win32" and completion_callback :
165
152
logger .warning (
166
153
"Completion callback for 'add_process' may fail on Windows due to "
@@ -207,26 +194,16 @@ def callback_function(status_code: int, std_out: str, std_err: str) -> None:
207
194
_command += _pos_args
208
195
209
196
self ._command_str [identifier ] = " " .join (_command )
197
+ self ._completion_callbacks [identifier ] = completion_callback
198
+ self ._completion_triggers [identifier ] = completion_trigger
210
199
211
- self ._processes [identifier ] = multiprocessing .Process (
212
- target = _execute_process ,
213
- args = (
214
- identifier ,
215
- _command ,
216
- self ._runner .name ,
217
- self ._exit_codes ,
218
- self ._std_err ,
219
- self ._std_out ,
220
- completion_callback ,
221
- completion_trigger ,
222
- env ,
223
- ),
200
+ self ._processes [identifier ] = _execute_process (
201
+ identifier , _command , self ._runner .name , env
224
202
)
203
+
225
204
self ._alert_ids [identifier ] = self ._runner .create_alert (
226
205
name = f"{ identifier } _exit_status" , source = "user"
227
206
)
228
- logger .debug (f"Executing process: { ' ' .join (_command )} " )
229
- self ._processes [identifier ].start ()
230
207
231
208
@property
232
209
def success (self ) -> int :
@@ -272,8 +249,8 @@ def _get_error_status(self, process_id: str) -> typing.Optional[str]:
272
249
err_msg : typing .Optional [str ] = None
273
250
274
251
# Return last 10 lines of stdout if stderr empty
275
- if not (err_msg := self ._std_err [ process_id ] ) and (
276
- std_out := self ._std_out [ process_id ]
252
+ if not (err_msg := self ._std_err . get ( process_id ) ) and (
253
+ std_out := self ._std_out . get ( process_id )
277
254
):
278
255
err_msg = " Tail STDOUT:\n \n "
279
256
start_index = - 10 if len (lines := std_out .split ("\n " )) > 10 else 0
@@ -308,28 +285,42 @@ def _save_output(self) -> None:
308
285
"""Save the output to Simvue"""
309
286
for proc_id in self ._exit_codes .keys ():
310
287
# Only save the file if the contents are not empty
311
- if self ._std_err [ proc_id ] :
288
+ if self ._std_err . get ( proc_id ) :
312
289
self ._runner .save_file (
313
290
f"{ self ._runner .name } _{ proc_id } .err" , category = "output"
314
291
)
315
- if self ._std_out [ proc_id ] :
292
+ if self ._std_out . get ( proc_id ) :
316
293
self ._runner .save_file (
317
294
f"{ self ._runner .name } _{ proc_id } .out" , category = "output"
318
295
)
319
296
320
297
def kill_process (self , process_id : str ) -> None :
321
298
"""Kill a running process by ID"""
322
- if not (_process := self ._processes .get (process_id )):
299
+ if not (process := self ._processes .get (process_id )):
323
300
logger .error (
324
301
f"Failed to terminate process '{ process_id } ', no such identifier."
325
302
)
326
303
return
327
- _process .kill ()
304
+
305
+ parent = psutil .Process (process .pid )
306
+
307
+ for child in parent .children (recursive = True ):
308
+ logger .debug (f"Terminating child process { child .pid } : { child .name ()} " )
309
+ child .kill ()
310
+
311
+ for child in parent .children (recursive = True ):
312
+ child .wait ()
313
+
314
+ logger .debug (f"Terminating child process { process .pid } : { process .args } " )
315
+ process .kill ()
316
+ process .wait ()
317
+
318
+ self ._execute_callback (process_id )
328
319
329
320
def kill_all (self ) -> None :
330
321
"""Kill all running processes"""
331
- for process in self ._processes .values ():
332
- process . kill ( )
322
+ for process in self ._processes .keys ():
323
+ self . kill_process ( process )
333
324
334
325
def _clear_cache_files (self ) -> None :
335
326
"""Clear local log files if required"""
@@ -338,11 +329,28 @@ def _clear_cache_files(self) -> None:
338
329
os .remove (f"{ self ._runner .name } _{ proc_id } .err" )
339
330
os .remove (f"{ self ._runner .name } _{ proc_id } .out" )
340
331
332
+ def _execute_callback (self , identifier : str ) -> None :
333
+ with open (f"{ self ._runner .name } _{ identifier } .err" ) as err :
334
+ std_err = err .read ()
335
+
336
+ with open (f"{ self ._runner .name } _{ identifier } .out" ) as out :
337
+ std_out = out .read ()
338
+
339
+ if callback := self ._completion_callbacks .get (identifier ):
340
+ callback (
341
+ status_code = self ._processes [identifier ].returncode ,
342
+ std_out = std_out ,
343
+ std_err = std_err ,
344
+ )
345
+ if completion_trigger := self ._completion_triggers .get (identifier ):
346
+ completion_trigger .set ()
347
+
341
348
def wait_for_completion (self ) -> None :
342
349
"""Wait for all processes to finish then perform tidy up and upload"""
343
- for process in self ._processes .values ():
344
- if process .is_alive ():
345
- process .join ()
350
+ for identifier , process in self ._processes .items ():
351
+ process .wait ()
352
+ self ._execute_callback (identifier )
353
+
346
354
self ._update_alerts ()
347
355
self ._save_output ()
348
356
0 commit comments