@@ -308,14 +308,20 @@ def processes(self) -> list[psutil.Process]:
308
308
def _get_sysinfo (self ) -> dict [str , typing .Any ]:
309
309
"""Retrieve system administration
310
310
311
+ Parameters
312
+ ----------
313
+ interval : float | None
314
+ The interval to use for collection of CPU metrics, by default None (non blocking)
315
+
311
316
Returns
312
317
-------
313
318
dict[str, typing.Any]
314
319
retrieved system specifications
315
320
"""
316
- cpu = get_process_cpu (self .processes )
317
- memory = get_process_memory (self .processes )
318
- gpu = get_gpu_metrics (self .processes )
321
+ processes = self .processes
322
+ cpu = get_process_cpu (processes , interval = 0.1 )
323
+ memory = get_process_memory (processes )
324
+ gpu = get_gpu_metrics (processes )
319
325
data : dict [str , typing .Any ] = {}
320
326
321
327
if memory is not None and cpu is not None :
@@ -351,6 +357,9 @@ def _heartbeat(
351
357
last_heartbeat = time .time ()
352
358
last_res_metric_call = time .time ()
353
359
360
+ if self ._resources_metrics_interval :
361
+ self ._add_metrics_to_dispatch (self ._get_sysinfo (), join_on_fail = False )
362
+
354
363
while not heartbeat_trigger .is_set ():
355
364
time .sleep (0.1 )
356
365
@@ -699,6 +708,7 @@ def init(
699
708
self ._sv_obj .alerts = []
700
709
self ._sv_obj .created = time .time ()
701
710
self ._sv_obj .notifications = notification
711
+ self ._sv_obj ._staging ["folder_id" ] = self ._folder .id
702
712
703
713
if self ._status == "running" :
704
714
self ._sv_obj .system = get_system ()
@@ -931,7 +941,7 @@ def reconnect(self, run_id: str) -> bool:
931
941
self ._status = "running"
932
942
933
943
self ._id = run_id
934
- self ._sv_obj = RunObject (identifier = self ._id )
944
+ self ._sv_obj = RunObject (identifier = self ._id , _read_only = False )
935
945
self ._start (reconnect = True )
936
946
937
947
return True
@@ -947,6 +957,7 @@ def set_pid(self, pid: int) -> None:
947
957
PID of the process to be monitored
948
958
"""
949
959
self ._pid = pid
960
+ self ._parent_process = psutil .Process (self ._pid )
950
961
951
962
@skip_if_failed ("_aborted" , "_suppress_errors" , False )
952
963
@pydantic .validate_call
@@ -1602,15 +1613,13 @@ def set_folder_details(
1602
1613
return False
1603
1614
1604
1615
try :
1605
- self ._folder .read_only (False )
1606
1616
if metadata :
1607
1617
self ._folder .metadata = metadata
1608
1618
if tags :
1609
1619
self ._folder .tags = tags
1610
1620
if description :
1611
1621
self ._folder .description = description
1612
1622
self ._folder .commit ()
1613
- self ._folder .read_only (True )
1614
1623
except (RuntimeError , ValueError , pydantic .ValidationError ) as e :
1615
1624
self ._error (f"Failed to update folder '{ self ._folder .name } ' details: { e } " )
1616
1625
return False
@@ -1918,16 +1927,21 @@ def create_user_alert(
1918
1927
@check_run_initialised
1919
1928
@pydantic .validate_call
1920
1929
def log_alert (
1921
- self , identifier : str , state : typing .Literal ["ok" , "critical" ]
1930
+ self ,
1931
+ identifier : str | None = None ,
1932
+ name : str | None = None ,
1933
+ state : typing .Literal ["ok" , "critical" ] = "critical" ,
1922
1934
) -> bool :
1923
- """Set the state of an alert
1935
+ """Set the state of an alert - either specify the alert by ID or name.
1924
1936
1925
1937
Parameters
1926
1938
----------
1927
- identifier : str
1928
- identifier of alert to update
1939
+ identifier : str | None
1940
+ ID of alert to update, by default None
1941
+ name : str | None
1942
+ Name of the alert to update, by default None
1929
1943
state : Literal['ok', 'critical']
1930
- state to set alert to
1944
+ state to set alert to, by default 'critical'
1931
1945
1932
1946
Returns
1933
1947
-------
@@ -1938,13 +1952,33 @@ def log_alert(
1938
1952
self ._error ('state must be either "ok" or "critical"' )
1939
1953
return False
1940
1954
1955
+ if (identifier and name ) or (not identifier and not name ):
1956
+ self ._error ("Please specify alert to update either by ID or by name." )
1957
+ return False
1958
+
1959
+ if name :
1960
+ try :
1961
+ if alerts := Alert .get (offline = self ._user_config .run .mode == "offline" ):
1962
+ identifier = next (
1963
+ (id for id , alert in alerts if alert .name == name ), None
1964
+ )
1965
+ else :
1966
+ self ._error ("No existing alerts" )
1967
+ return False
1968
+ except RuntimeError as e :
1969
+ self ._error (f"{ e .args [0 ]} " )
1970
+ return False
1971
+
1972
+ if not identifier :
1973
+ self ._error (f"Alert with name '{ name } ' could not be found." )
1974
+
1941
1975
_alert = UserAlert (identifier = identifier )
1942
- # if not isinstance(_alert, UserAlert):
1943
- # self._error(
1944
- # f"Cannot update state for alert '{identifier}' "
1945
- # f"of type '{_alert.__class__.__name__.lower()}'"
1946
- # )
1947
- # return False
1976
+ if not isinstance (_alert , UserAlert ):
1977
+ self ._error (
1978
+ f"Cannot update state for alert '{ identifier } ' "
1979
+ f"of type '{ _alert .__class__ .__name__ .lower ()} '"
1980
+ )
1981
+ return False
1948
1982
_alert .read_only (False )
1949
1983
_alert .set_status (run_id = self ._id , status = state )
1950
1984
_alert .commit ()
0 commit comments