Skip to content

Commit 85e6f9f

Browse files
committed
Fix suppress errors test
1 parent 976b8f2 commit 85e6f9f

File tree

3 files changed

+55
-90
lines changed

3 files changed

+55
-90
lines changed

simvue/metrics.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,6 @@ def __init__(
139139
self,
140140
processes: list[psutil.Process],
141141
interval: float | None,
142-
cpu_only: bool = False,
143142
) -> None:
144143
"""Perform a measurement of system resource consumption.
145144
@@ -149,14 +148,10 @@ def __init__(
149148
processes to measure across.
150149
interval: float | None
151150
interval to measure, if None previous measure time used for interval.
152-
cpu_only: bool, optional
153-
only record CPU information, default False
154151
"""
155152
self.cpu_percent: float | None = get_process_cpu(processes, interval=interval)
156153
self.cpu_memory: float | None = get_process_memory(processes)
157-
self.gpus: list[dict[str, float]] = (
158-
None if cpu_only else get_gpu_metrics(processes)
159-
)
154+
self.gpus: list[dict[str, float]] = get_gpu_metrics(processes)
160155

161156
def to_dict(self) -> dict[str, float]:
162157
"""Create metrics dictionary for sending to a Simvue server."""

simvue/run.py

Lines changed: 51 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -334,10 +334,7 @@ def _terminate_run(
334334

335335
def _get_internal_metrics(
336336
self,
337-
system_metrics_step: int | None,
338-
emission_metrics_step: int | None,
339-
res_measure_interval: int | None = None,
340-
ems_measure_interval: int | None = None,
337+
system_metrics_step: int,
341338
) -> None:
342339
"""Refresh resource and emissions metrics.
343340
@@ -346,55 +343,49 @@ def _get_internal_metrics(
346343
347344
Parameters
348345
----------
349-
system_metrics_step: int | None
350-
the current step for this resource metric record,
351-
None if skipping resource metrics.
352-
emission_metrics_step: int | None
353-
the current step for this emission metrics record,
354-
None if skipping emission metrics.
355-
res_measure_interval: int | None, optional
356-
the interval for resource metric gathering, default is None
357-
ems_measure_interval: int | None, optional
358-
the interval for emission metric gathering, default is None
346+
system_metrics_step: int
347+
The current step for this system metric record
359348
360349
Return
361350
------
362351
tuple[float, float]
363352
new resource metric measure time
364353
new emissions metric measure time
365354
"""
355+
356+
# In order to get a resource metric reading at t=0
357+
# because there is no previous CPU reading yet we cannot
358+
# use the default of None for the interval here, so we measure
359+
# at an interval of 1s.
366360
_current_system_measure = SystemResourceMeasurement(
367361
self.processes,
368-
interval=res_measure_interval,
369-
cpu_only=not system_metrics_step,
362+
interval=1 if system_metrics_step == 0 else None,
370363
)
371364

372-
if system_metrics_step is not None:
373-
# Set join on fail to false as if an error is thrown
374-
# join would be called on this thread and a thread cannot
375-
# join itself!
376-
self._add_metrics_to_dispatch(
377-
_current_system_measure.to_dict(),
378-
join_on_fail=False,
379-
step=system_metrics_step,
380-
)
365+
# Set join on fail to false as if an error is thrown
366+
# join would be called on this thread and a thread cannot
367+
# join itself!
368+
self._add_metrics_to_dispatch(
369+
_current_system_measure.to_dict(),
370+
join_on_fail=False,
371+
step=system_metrics_step,
372+
)
381373

382-
if (
383-
self._emissions_monitor
384-
and emission_metrics_step is not None
385-
and ems_measure_interval is not None
386-
and _current_system_measure.cpu_percent is not None
387-
):
374+
# For the first emissions metrics reading, the time interval to use
375+
# Is the time since the run started, otherwise just use the time between readings
376+
if self._emissions_monitor:
388377
self._emissions_monitor.estimate_co2_emissions(
389378
process_id=f"{self._name}",
390379
cpu_percent=_current_system_measure.cpu_percent,
391-
measure_interval=ems_measure_interval,
380+
measure_interval=(time.time() - self._start_time)
381+
if system_metrics_step == 0
382+
else self._system_metrics_interval,
392383
gpu_percent=_current_system_measure.gpu_percent,
393384
)
394385
self._add_metrics_to_dispatch(
395386
self._emissions_monitor.simvue_metrics(),
396387
join_on_fail=False,
397-
step=emission_metrics_step,
388+
step=system_metrics_step,
398389
)
399390

400391
def _create_heartbeat_callback(
@@ -416,61 +407,30 @@ def _heartbeat(
416407
raise RuntimeError("Expected initialisation of heartbeat")
417408

418409
last_heartbeat: float = 0
419-
last_res_metric_call: float = 0
420-
last_co2_metric_call: float = 0
421-
422-
co2_step: int = 0
423-
res_step: int = 0
410+
last_sys_metric_call: float = 0
424411

425-
initial_ems_metrics_interval: float = time.time() - self._start_time
412+
sys_step: int = 0
426413

427414
while not heartbeat_trigger.is_set():
428415
with self._configuration_lock:
429416
_current_time: float = time.time()
417+
430418
_update_system_metrics: bool = (
431419
self._system_metrics_interval is not None
432-
and _current_time - last_res_metric_call
433-
> self._system_metrics_interval
434-
and self._status == "running"
435-
)
436-
_update_emissions_metrics: bool = (
437-
self._system_metrics_interval is not None
438-
and self._emissions_monitor
439-
and _current_time - last_co2_metric_call
420+
and _current_time - last_sys_metric_call
440421
> self._system_metrics_interval
441422
and self._status == "running"
442423
)
443424

444-
# In order to get a resource metric reading at t=0
445-
# because there is no previous CPU reading yet we cannot
446-
# use the default of None for the interval here, so we measure
447-
# at an interval of 1s. For emissions metrics the first step
448-
# is time since run start
449-
self._get_internal_metrics(
450-
emission_metrics_step=co2_step
451-
if _update_emissions_metrics
452-
else None,
453-
system_metrics_step=res_step
454-
if _update_system_metrics
455-
else None,
456-
res_measure_interval=1 if res_step == 0 else None,
457-
ems_measure_interval=initial_ems_metrics_interval
458-
if co2_step == 0
459-
else self._system_metrics_interval,
460-
)
425+
if _update_system_metrics:
426+
self._get_internal_metrics(system_metrics_step=sys_step)
461427

462-
res_step += 1
463-
co2_step += 1
428+
sys_step += 1
464429

465-
last_res_metric_call = (
430+
last_sys_metric_call = (
466431
_current_time
467432
if _update_system_metrics
468-
else last_res_metric_call
469-
)
470-
last_co2_metric_call = (
471-
_current_time
472-
if _update_emissions_metrics
473-
else last_co2_metric_call
433+
else last_sys_metric_call
474434
)
475435

476436
if time.time() - last_heartbeat < self._heartbeat_interval:
@@ -1055,7 +1015,7 @@ def config(
10551015
queue_blocking: bool | None = None,
10561016
system_metrics_interval: pydantic.PositiveInt | None = None,
10571017
enable_emission_metrics: bool | None = None,
1058-
disable_system_metrics: bool | None = None,
1018+
disable_resources_metrics: bool | None = None,
10591019
storage_id: str | None = None,
10601020
abort_on_alert: typing.Literal["run", "all", "ignore"] | bool | None = None,
10611021
) -> bool:
@@ -1069,10 +1029,10 @@ def config(
10691029
queue_blocking : bool, optional
10701030
block thread queues during metric/event recording
10711031
system_metrics_interval : int, optional
1072-
frequency at which to collect resource metrics
1032+
frequency at which to collect resource and emissions metrics, if enabled
10731033
enable_emission_metrics : bool, optional
10741034
enable monitoring of emission metrics
1075-
disable_system_metrics : bool, optional
1035+
disable_resources_metrics : bool, optional
10761036
disable monitoring of resource metrics
10771037
storage_id : str, optional
10781038
identifier of storage to use, by default None
@@ -1095,17 +1055,30 @@ def config(
10951055
if queue_blocking is not None:
10961056
self._queue_blocking = queue_blocking
10971057

1098-
if system_metrics_interval and disable_system_metrics:
1058+
if system_metrics_interval and disable_resources_metrics:
10991059
self._error(
11001060
"Setting of resource metric interval and disabling resource metrics is ambiguous"
11011061
)
11021062
return False
11031063

1104-
if disable_system_metrics:
1064+
if system_metrics_interval:
1065+
self._system_metrics_interval = system_metrics_interval
1066+
1067+
if disable_resources_metrics:
1068+
if self._emissions_monitor:
1069+
self._error(
1070+
"Emissions metrics require resource metrics collection."
1071+
)
1072+
return False
11051073
self._pid = None
11061074
self._system_metrics_interval = None
11071075

11081076
if enable_emission_metrics:
1077+
if not self._system_metrics_interval:
1078+
self._error(
1079+
"Emissions metrics require resource metrics collection - make sure resource metrics are enabled!"
1080+
)
1081+
return False
11091082
if self._user_config.run.mode == "offline":
11101083
# Create an emissions monitor with no API calls
11111084
self._emissions_monitor = CO2Monitor(
@@ -1130,9 +1103,6 @@ def config(
11301103
elif enable_emission_metrics is False and self._emissions_monitor:
11311104
self._error("Cannot disable emissions monitor once it has been started")
11321105

1133-
if system_metrics_interval:
1134-
self._system_metrics_interval = system_metrics_interval
1135-
11361106
if abort_on_alert is not None:
11371107
if isinstance(abort_on_alert, bool):
11381108
warnings.warn(

tests/unit/test_suppress_errors.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def test_suppress_errors_false() -> None:
1212
with pytest.raises(RuntimeError) as e:
1313
run.config(
1414
suppress_errors=False,
15-
disable_system_metrics=123,
15+
disable_resources_metrics=123,
1616
)
1717
assert "Input should be a valid boolean, unable to interpret input" in f"{e.value}"
1818

@@ -25,7 +25,7 @@ def test_suppress_errors_true(caplog) -> None:
2525

2626
run.config(suppress_errors=True)
2727
run.config(
28-
disable_system_metrics=123,
28+
disable_resources_metrics=123,
2929
)
3030

3131
caplog.set_level(logging.ERROR)
@@ -41,7 +41,7 @@ def test_suppress_errors_default(caplog) -> None:
4141

4242
run.config(suppress_errors=True)
4343
run.config(
44-
disable_system_metrics=123,
44+
disable_resources_metrics=123,
4545
)
4646

4747
caplog.set_level(logging.ERROR)

0 commit comments

Comments
 (0)