Skip to content

Commit 83b9144

Browse files
authored
Merge pull request #725 from simvue-io/wk9874/2.0.0_a2
Bug fixes for 2.0.0a2
2 parents bd90993 + 08a086d commit 83b9144

File tree

8 files changed

+303
-95
lines changed

8 files changed

+303
-95
lines changed

poetry.lock

Lines changed: 25 additions & 25 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

simvue/client.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -990,17 +990,23 @@ def get_alerts(
990990
RuntimeError
991991
if there was a failure retrieving data from the server
992992
"""
993-
994993
if not run_id:
994+
if critical_only:
995+
raise RuntimeError(
996+
"critical_only is ambiguous when returning alerts with no run ID specified."
997+
)
995998
return [alert.name if names_only else alert for _, alert in Alert.get()] # type: ignore
996999

997-
return [
998-
alert.get("name")
999-
if names_only
1000-
else Alert(identifier=alert.get("id"), **alert)
1000+
_alerts = [
1001+
Alert(identifier=alert.get("id"), **alert)
10011002
for alert in Run(identifier=run_id).get_alert_details()
1002-
if not critical_only or alert["status"].get("current") == "critical"
1003-
] # type: ignore
1003+
]
1004+
1005+
return [
1006+
alert.name if names_only else alert
1007+
for alert in _alerts
1008+
if not critical_only or alert.get_status(run_id) == "critical"
1009+
]
10041010

10051011
@prettify_pydantic
10061012
@pydantic.validate_call

simvue/executor.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -348,9 +348,11 @@ def _update_alerts(self) -> None:
348348
if self._runner._dispatcher:
349349
self._runner._dispatcher.purge()
350350

351-
self._runner.log_alert(self._alert_ids[proc_id], "critical")
351+
self._runner.log_alert(
352+
identifier=self._alert_ids[proc_id], state="critical"
353+
)
352354
else:
353-
self._runner.log_alert(self._alert_ids[proc_id], "ok")
355+
self._runner.log_alert(identifier=self._alert_ids[proc_id], state="ok")
354356

355357
_current_time: float = 0
356358
while (

simvue/metrics.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,18 @@ def get_process_memory(processes: list[psutil.Process]) -> int:
3636
return rss
3737

3838

39-
def get_process_cpu(processes: list[psutil.Process]) -> int:
39+
def get_process_cpu(
40+
processes: list[psutil.Process], interval: float | None = None
41+
) -> int:
4042
"""
4143
Get the CPU usage
44+
45+
If first time being called, use a small interval to collect initial CPU metrics.
4246
"""
4347
cpu_percent: int = 0
4448
for process in processes:
4549
with contextlib.suppress(Exception):
46-
cpu_percent += process.cpu_percent()
50+
cpu_percent += process.cpu_percent(interval=interval)
4751

4852
return cpu_percent
4953

simvue/run.py

Lines changed: 51 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -308,14 +308,20 @@ def processes(self) -> list[psutil.Process]:
308308
def _get_sysinfo(self) -> dict[str, typing.Any]:
309309
"""Retrieve system administration
310310
311+
Parameters
312+
----------
313+
interval : float | None
314+
The interval to use for collection of CPU metrics, by default None (non blocking)
315+
311316
Returns
312317
-------
313318
dict[str, typing.Any]
314319
retrieved system specifications
315320
"""
316-
cpu = get_process_cpu(self.processes)
317-
memory = get_process_memory(self.processes)
318-
gpu = get_gpu_metrics(self.processes)
321+
processes = self.processes
322+
cpu = get_process_cpu(processes, interval=0.1)
323+
memory = get_process_memory(processes)
324+
gpu = get_gpu_metrics(processes)
319325
data: dict[str, typing.Any] = {}
320326

321327
if memory is not None and cpu is not None:
@@ -351,6 +357,9 @@ def _heartbeat(
351357
last_heartbeat = time.time()
352358
last_res_metric_call = time.time()
353359

360+
if self._resources_metrics_interval:
361+
self._add_metrics_to_dispatch(self._get_sysinfo(), join_on_fail=False)
362+
354363
while not heartbeat_trigger.is_set():
355364
time.sleep(0.1)
356365

@@ -699,6 +708,7 @@ def init(
699708
self._sv_obj.alerts = []
700709
self._sv_obj.created = time.time()
701710
self._sv_obj.notifications = notification
711+
self._sv_obj._staging["folder_id"] = self._folder.id
702712

703713
if self._status == "running":
704714
self._sv_obj.system = get_system()
@@ -931,7 +941,7 @@ def reconnect(self, run_id: str) -> bool:
931941
self._status = "running"
932942

933943
self._id = run_id
934-
self._sv_obj = RunObject(identifier=self._id)
944+
self._sv_obj = RunObject(identifier=self._id, _read_only=False)
935945
self._start(reconnect=True)
936946

937947
return True
@@ -947,6 +957,7 @@ def set_pid(self, pid: int) -> None:
947957
PID of the process to be monitored
948958
"""
949959
self._pid = pid
960+
self._parent_process = psutil.Process(self._pid)
950961

951962
@skip_if_failed("_aborted", "_suppress_errors", False)
952963
@pydantic.validate_call
@@ -1602,15 +1613,13 @@ def set_folder_details(
16021613
return False
16031614

16041615
try:
1605-
self._folder.read_only(False)
16061616
if metadata:
16071617
self._folder.metadata = metadata
16081618
if tags:
16091619
self._folder.tags = tags
16101620
if description:
16111621
self._folder.description = description
16121622
self._folder.commit()
1613-
self._folder.read_only(True)
16141623
except (RuntimeError, ValueError, pydantic.ValidationError) as e:
16151624
self._error(f"Failed to update folder '{self._folder.name}' details: {e}")
16161625
return False
@@ -1918,16 +1927,21 @@ def create_user_alert(
19181927
@check_run_initialised
19191928
@pydantic.validate_call
19201929
def log_alert(
1921-
self, identifier: str, state: typing.Literal["ok", "critical"]
1930+
self,
1931+
identifier: str | None = None,
1932+
name: str | None = None,
1933+
state: typing.Literal["ok", "critical"] = "critical",
19221934
) -> bool:
1923-
"""Set the state of an alert
1935+
"""Set the state of an alert - either specify the alert by ID or name.
19241936
19251937
Parameters
19261938
----------
1927-
identifier : str
1928-
identifier of alert to update
1939+
identifier : str | None
1940+
ID of alert to update, by default None
1941+
name : str | None
1942+
Name of the alert to update, by default None
19291943
state : Literal['ok', 'critical']
1930-
state to set alert to
1944+
state to set alert to, by default 'critical'
19311945
19321946
Returns
19331947
-------
@@ -1938,13 +1952,33 @@ def log_alert(
19381952
self._error('state must be either "ok" or "critical"')
19391953
return False
19401954

1955+
if (identifier and name) or (not identifier and not name):
1956+
self._error("Please specify alert to update either by ID or by name.")
1957+
return False
1958+
1959+
if name:
1960+
try:
1961+
if alerts := Alert.get(offline=self._user_config.run.mode == "offline"):
1962+
identifier = next(
1963+
(id for id, alert in alerts if alert.name == name), None
1964+
)
1965+
else:
1966+
self._error("No existing alerts")
1967+
return False
1968+
except RuntimeError as e:
1969+
self._error(f"{e.args[0]}")
1970+
return False
1971+
1972+
if not identifier:
1973+
self._error(f"Alert with name '{name}' could not be found.")
1974+
19411975
_alert = UserAlert(identifier=identifier)
1942-
# if not isinstance(_alert, UserAlert):
1943-
# self._error(
1944-
# f"Cannot update state for alert '{identifier}' "
1945-
# f"of type '{_alert.__class__.__name__.lower()}'"
1946-
# )
1947-
# return False
1976+
if not isinstance(_alert, UserAlert):
1977+
self._error(
1978+
f"Cannot update state for alert '{identifier}' "
1979+
f"of type '{_alert.__class__.__name__.lower()}'"
1980+
)
1981+
return False
19481982
_alert.read_only(False)
19491983
_alert.set_status(run_id=self._id, status=state)
19501984
_alert.commit()

0 commit comments

Comments
 (0)