15
15
import sys
16
16
import multiprocessing
17
17
import os
18
+ import psutil
18
19
import subprocess
19
20
import pathlib
20
21
import time
@@ -30,13 +31,8 @@ def _execute_process(
30
31
proc_id : str ,
31
32
command : typing .List [str ],
32
33
runner_name : str ,
33
- exit_status_dict : typing .Dict [str , int ],
34
- std_err : typing .Dict [str , str ],
35
- std_out : typing .Dict [str , str ],
36
- run_on_exit : typing .Optional [typing .Callable [[int , int , str ], None ]],
37
- trigger : typing .Optional [multiprocessing .synchronize .Event ],
38
34
environment : typing .Optional [typing .Dict [str , str ]],
39
- ) -> None :
35
+ ) -> subprocess . Popen :
40
36
with open (f"{ runner_name } _{ proc_id } .err" , "w" ) as err :
41
37
with open (f"{ runner_name } _{ proc_id } .out" , "w" ) as out :
42
38
_result = subprocess .Popen (
@@ -47,24 +43,7 @@ def _execute_process(
47
43
env = environment ,
48
44
)
49
45
50
- _status_code = _result .wait ()
51
- with open (f"{ runner_name } _{ proc_id } .err" ) as err :
52
- std_err [proc_id ] = err .read ()
53
-
54
- with open (f"{ runner_name } _{ proc_id } .out" ) as out :
55
- std_out [proc_id ] = out .read ()
56
-
57
- exit_status_dict [proc_id ] = _status_code
58
-
59
- if run_on_exit :
60
- run_on_exit (
61
- status_code = exit_status_dict [proc_id ],
62
- std_out = std_out [proc_id ],
63
- std_err = std_err [proc_id ],
64
- )
65
-
66
- if trigger :
67
- trigger .set ()
46
+ return _result
68
47
69
48
70
49
class Executor :
@@ -88,13 +67,14 @@ def __init__(self, simvue_runner: "simvue.Run", keep_logs: bool = True) -> None:
88
67
"""
89
68
self ._runner = simvue_runner
90
69
self ._keep_logs = keep_logs
91
- self ._manager = multiprocessing .Manager ()
92
- self ._exit_codes = self ._manager .dict ()
93
- self ._std_err = self ._manager .dict ()
94
- self ._std_out = self ._manager .dict ()
70
+ self ._completion_callbacks = {}
71
+ self ._completion_triggers = {}
72
+ self ._exit_codes = {}
73
+ self ._std_err = {}
74
+ self ._std_out = {}
95
75
self ._alert_ids : dict [str , str ] = {}
96
- self ._command_str : typing . Dict [str , str ] = {}
97
- self ._processes : typing . Dict [str , multiprocessing . Process ] = {}
76
+ self ._command_str : dict [str , str ] = {}
77
+ self ._processes : dict [str , subprocess . Popen ] = {}
98
78
99
79
def add_process (
100
80
self ,
@@ -207,26 +187,16 @@ def callback_function(status_code: int, std_out: str, std_err: str) -> None:
207
187
_command += _pos_args
208
188
209
189
self ._command_str [identifier ] = " " .join (_command )
190
+ self ._completion_callbacks [identifier ] = completion_callback
191
+ self ._completion_triggers [identifier ] = completion_trigger
210
192
211
- self ._processes [identifier ] = multiprocessing .Process (
212
- target = _execute_process ,
213
- args = (
214
- identifier ,
215
- _command ,
216
- self ._runner .name ,
217
- self ._exit_codes ,
218
- self ._std_err ,
219
- self ._std_out ,
220
- completion_callback ,
221
- completion_trigger ,
222
- env ,
223
- ),
193
+ self ._processes [identifier ] = _execute_process (
194
+ identifier , _command , self ._runner .name , env
224
195
)
196
+
225
197
self ._alert_ids [identifier ] = self ._runner .create_alert (
226
198
name = f"{ identifier } _exit_status" , source = "user"
227
199
)
228
- logger .debug (f"Executing process: { ' ' .join (_command )} " )
229
- self ._processes [identifier ].start ()
230
200
231
201
@property
232
202
def success (self ) -> int :
@@ -324,12 +294,22 @@ def kill_process(self, process_id: str) -> None:
324
294
f"Failed to terminate process '{ process_id } ', no such identifier."
325
295
)
326
296
return
327
- _process .kill ()
297
+
298
+ _parent = psutil .Process (_process .pid )
299
+
300
+ for child in _parent .children (recursive = True ):
301
+ logger .debug (f"Terminating child process { child .pid } : { child .name ()} " )
302
+ child .kill ()
303
+
304
+ logger .debug (f"Terminating child process { _process .pid } : { _process .args } " )
305
+ _process .terminate ()
306
+
307
+ self ._execute_callback (process_id )
328
308
329
309
def kill_all (self ) -> None :
330
310
"""Kill all running processes"""
331
- for process in self ._processes .values ():
332
- process . kill ( )
311
+ for process in self ._processes .keys ():
312
+ self . kill_process ( process )
333
313
334
314
def _clear_cache_files (self ) -> None :
335
315
"""Clear local log files if required"""
@@ -338,11 +318,28 @@ def _clear_cache_files(self) -> None:
338
318
os .remove (f"{ self ._runner .name } _{ proc_id } .err" )
339
319
os .remove (f"{ self ._runner .name } _{ proc_id } .out" )
340
320
321
+ def _execute_callback (self , identifier : str ) -> None :
322
+ with open (f"{ self ._runner .name } _{ identifier } .err" ) as err :
323
+ std_err = err .read ()
324
+
325
+ with open (f"{ self ._runner .name } _{ identifier } .out" ) as out :
326
+ std_out = out .read ()
327
+
328
+ if self ._completion_callbacks [identifier ]:
329
+ self ._completion_callbacks [identifier ](
330
+ status_code = self ._processes [identifier ].returncode ,
331
+ std_out = std_out ,
332
+ std_err = std_err ,
333
+ )
334
+ if self ._completion_triggers [identifier ]:
335
+ self ._completion_triggers [identifier ].set ()
336
+
341
337
def wait_for_completion (self ) -> None :
342
338
"""Wait for all processes to finish then perform tidy up and upload"""
343
- for process in self ._processes .values ():
344
- if process .is_alive ():
345
- process .join ()
339
+ for identifier , process in self ._processes .items ():
340
+ process .wait ()
341
+ self ._execute_callback (identifier )
342
+
346
343
self ._update_alerts ()
347
344
self ._save_output ()
348
345
0 commit comments