From 7274e149767ba324349c0ba319a3e5ac01ecb7d4 Mon Sep 17 00:00:00 2001 From: AbyAbraham21 <95077500+AbyAbraham21@users.noreply.github.com> Date: Wed, 22 Jan 2025 19:59:04 +0000 Subject: [PATCH 01/16] Update eco.py Changes to the CodeCarbon API means that the output values will need to be computed in a different manner. Delta emissions will need to be derived if needed. --- simvue/eco.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/simvue/eco.py b/simvue/eco.py index 6ff7023b..4dd7f9ba 100644 --- a/simvue/eco.py +++ b/simvue/eco.py @@ -34,10 +34,10 @@ def out( logger.debug("Logging CodeCarbon metadata") self._simvue_run.update_metadata( { - "codecarbon.country": total.country_name, - "codecarbon.country_iso_code": total.country_iso_code, - "codecarbon.region": total.region, - "codecarbon.version": total.codecarbon_version, + "codecarbon.country": total.final_emissions_data.country_name, + "codecarbon.country_iso_code": total.final_emissions_data.country_iso_code, + "codecarbon.region": total.final_emissions_data.region, + "codecarbon.version": total.final_emissions_data.codecarbon_version, } ) @@ -48,10 +48,8 @@ def out( logger.debug("Logging CodeCarbon metrics") self._simvue_run.log_metrics( metrics={ - "codecarbon.total.emissions": total.emissions, - "codecarbon.total.energy_consumed": total.energy_consumed, - "codecarbon.delta.emissions": delta.emissions, - "codecarbon.delta.energy_consumed": delta.energy_consumed, + "codecarbon.total.emissions": total.final_emissions_data.emissions, + "codecarbon.total.energy_consumed": total.final_emissions_data.energy_consumed, }, step=self._metrics_step, timestamp=simvue_timestamp(_cc_timestamp), From 1827fedf3721f8c64aa0ce4061067dd40e224a44 Mon Sep 17 00:00:00 2001 From: AbyAbraham21 Date: Thu, 27 Feb 2025 10:22:36 +0000 Subject: [PATCH 02/16] Adding Code carbon changes --- pyproject.toml | 2 +- simvue/eco.py | 68 +++++++++++++++++++++--------- tests/functional/test_run_class.py | 18 ++++---- 3 files changed, 58 insertions(+), 30 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6837df44..785daf76 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,7 +51,7 @@ gitpython = "^3.1.43" humanfriendly = "^10.0" tabulate = "^0.9.0" randomname = "^0.2.1" -codecarbon = "^2.7.1" +codecarbon = "^2.8.1" numpy = "^2.1.2" flatdict = "^4.0.1" semver = "^3.0.2" diff --git a/simvue/eco.py b/simvue/eco.py index 4dd7f9ba..a9050848 100644 --- a/simvue/eco.py +++ b/simvue/eco.py @@ -1,9 +1,9 @@ import typing import logging import datetime - + from codecarbon import EmissionsTracker -from codecarbon.output_methods.base_output import BaseOutput as cc_BaseOutput +from codecarbon.output import BaseOutput as cc_BaseOutput from simvue.utilities import simvue_timestamp if typing.TYPE_CHECKING: @@ -18,6 +18,8 @@ class CodeCarbonOutput(cc_BaseOutput): def __init__(self, run: "Run") -> None: self._simvue_run = run self._metrics_step: int = 0 + self.emissions = 0.0 # To store the CO2 emissions data + self.energy_consumed = 0.0 # To store the energy consumed data def out( self, total: "EmissionsData", delta: "EmissionsData", meta_update: bool = True @@ -32,33 +34,59 @@ def out( if meta_update: logger.debug("Logging CodeCarbon metadata") - self._simvue_run.update_metadata( - { - "codecarbon.country": total.final_emissions_data.country_name, - "codecarbon.country_iso_code": total.final_emissions_data.country_iso_code, - "codecarbon.region": total.final_emissions_data.region, - "codecarbon.version": total.final_emissions_data.codecarbon_version, - } + try: + self._simvue_run.update_metadata( + { + "codecarbon.country": total.country_name, + "codecarbon.country_iso_code": total.country_iso_code, + "codecarbon.region": total.region, + "codecarbon.version": total.codecarbon_version, + } + ) + except AttributeError as e: + logger.error(f"Failed to update metadata: {e}") + try: + _cc_timestamp = datetime.datetime.strptime( + total.timestamp, "%Y-%m-%dT%H:%M:%S" ) + except ValueError as e: + logger.error(f"Error parsing timestamp: {e}") + return - _cc_timestamp: datetime.datetime = datetime.datetime.strptime( - total.timestamp, "%Y-%m-%dT%H:%M:%S" - ) + # Accumulate the emissions and energy consumed + self.emissions += total.emissions # Add new emissions to the total + self.energy_consumed += total.energy_consumed # Add new energy consumed to the total logger.debug("Logging CodeCarbon metrics") - self._simvue_run.log_metrics( - metrics={ - "codecarbon.total.emissions": total.final_emissions_data.emissions, - "codecarbon.total.energy_consumed": total.final_emissions_data.energy_consumed, - }, - step=self._metrics_step, - timestamp=simvue_timestamp(_cc_timestamp), - ) + print("total.emissions=", self.emissions) + print("total.energy_consumed=", self.energy_consumed) + print("total.timestamp=",total.timestamp) + print("_cc_timestamp=",_cc_timestamp) + try: + self._simvue_run.log_metrics( + metrics={ + "codecarbon.emissions": total.emissions, + "codecarbon.energy_consumed": total.energy_consumed, + }, + step=self._metrics_step, + timestamp=simvue_timestamp(_cc_timestamp), + ) + except ArithmeticError as e: + logger.error(f"Failed to log metrics: {e}") + return + self._metrics_step += 1 def live_out(self, total: "EmissionsData", delta: "EmissionsData") -> None: self.out(total, delta, meta_update=False) + def get_total_emissions(self) -> float: + """Getter for the total accumulated emissions""" + return self.emissions + + def get_total_energy_consumed(self) -> float: + """Getter for the total accumulated energy consumed""" + return self.energy_consumed class SimvueEmissionsTracker(EmissionsTracker): def __init__( diff --git a/tests/functional/test_run_class.py b/tests/functional/test_run_class.py index 304fbad9..8ef02102 100644 --- a/tests/functional/test_run_class.py +++ b/tests/functional/test_run_class.py @@ -47,15 +47,15 @@ def test_check_run_initialised_decorator() -> None: assert "Simvue Run must be initialised" in str(e.value) -# @pytest.mark.run -# def test_run_with_emissions() -> None: -# with sv_run.Run() as run_created: -# run_created.init(retention_period="1 min") -# run_created.config(enable_emission_metrics=True, emission_metrics_interval=1) -# time.sleep(5) -# _run = RunObject(identifier=run_created.id) -# import pdb; pdb.set_trace() -# assert list(_run.metrics) +@pytest.mark.run +def test_run_with_emissions() -> None: + with sv_run.Run() as run_created: + run_created.init(retention_period="1 min") + run_created.config(enable_emission_metrics=True, emission_metrics_interval=1) + time.sleep(5) + _run = RunObject(identifier=run_created.id) + import pdb; pdb.set_trace() + assert list(_run.metrics) @pytest.mark.run From ff603c4d11551af9a5666d3ae44210f95a627777 Mon Sep 17 00:00:00 2001 From: Matt Field Date: Thu, 27 Feb 2025 10:48:02 +0000 Subject: [PATCH 03/16] Add nested metadata dict --- simvue/eco.py | 23 ++++++++++++----------- tests/functional/test_run_class.py | 6 ++++-- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/simvue/eco.py b/simvue/eco.py index a9050848..20ea124e 100644 --- a/simvue/eco.py +++ b/simvue/eco.py @@ -1,7 +1,7 @@ import typing import logging import datetime - + from codecarbon import EmissionsTracker from codecarbon.output import BaseOutput as cc_BaseOutput from simvue.utilities import simvue_timestamp @@ -37,10 +37,12 @@ def out( try: self._simvue_run.update_metadata( { - "codecarbon.country": total.country_name, - "codecarbon.country_iso_code": total.country_iso_code, - "codecarbon.region": total.region, - "codecarbon.version": total.codecarbon_version, + "codecarbon": { + "country": total.country_name, + "country_iso_code": total.country_iso_code, + "region": total.region, + "version": total.codecarbon_version, + } } ) except AttributeError as e: @@ -55,13 +57,11 @@ def out( # Accumulate the emissions and energy consumed self.emissions += total.emissions # Add new emissions to the total - self.energy_consumed += total.energy_consumed # Add new energy consumed to the total + self.energy_consumed += ( + total.energy_consumed + ) # Add new energy consumed to the total logger.debug("Logging CodeCarbon metrics") - print("total.emissions=", self.emissions) - print("total.energy_consumed=", self.energy_consumed) - print("total.timestamp=",total.timestamp) - print("_cc_timestamp=",_cc_timestamp) try: self._simvue_run.log_metrics( metrics={ @@ -74,7 +74,7 @@ def out( except ArithmeticError as e: logger.error(f"Failed to log metrics: {e}") return - + self._metrics_step += 1 def live_out(self, total: "EmissionsData", delta: "EmissionsData") -> None: @@ -88,6 +88,7 @@ def get_total_energy_consumed(self) -> float: """Getter for the total accumulated energy consumed""" return self.energy_consumed + class SimvueEmissionsTracker(EmissionsTracker): def __init__( self, project_name: str, simvue_run: "Run", metrics_interval: int diff --git a/tests/functional/test_run_class.py b/tests/functional/test_run_class.py index e3654e06..bb615de9 100644 --- a/tests/functional/test_run_class.py +++ b/tests/functional/test_run_class.py @@ -53,9 +53,11 @@ def test_run_with_emissions() -> None: with sv_run.Run() as run_created: run_created.init(retention_period="1 min") run_created.config(enable_emission_metrics=True, emission_metrics_interval=1) - time.sleep(5) + time.sleep(60) _run = RunObject(identifier=run_created.id) - assert list(_run.metrics) + _metric_names = [item[0] for item in _run.metrics] + assert 'codecarbon.energy_consumed' in _metric_names + assert 'codecarbon.emissions' in _metric_names @pytest.mark.run From fcaf66108aa139d29cdf83acec847ab6a312f0c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20Zar=C4=99bski?= <64790965+kzscisoft@users.noreply.github.com> Date: Fri, 28 Feb 2025 08:08:35 +0000 Subject: [PATCH 04/16] Update cover image towards PyPi published README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 974a361e..6c27cc20 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ - Simvue + Simvue

From 4c50a630134a600c6fe914d4e795eebd6b1c442b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20Zar=C4=99bski?= Date: Fri, 28 Feb 2025 08:13:13 +0000 Subject: [PATCH 05/16] Update PyPi metadata in pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c49ce77c..73105436 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,10 +16,10 @@ classifiers = [ "Operating System :: Unix", "Operating System :: Microsoft :: Windows", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Topic :: Scientific/Engineering", "Topic :: System :: Monitoring", "Topic :: Utilities", From ec5052559668cf0fca124bf380537a50ae6d6a34 Mon Sep 17 00:00:00 2001 From: Matt Field Date: Fri, 28 Feb 2025 09:33:07 +0000 Subject: [PATCH 06/16] Improved test, fixed api call time --- simvue/eco.py | 13 +++++-------- tests/functional/test_run_class.py | 21 ++++++++++++++++----- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/simvue/eco.py b/simvue/eco.py index 20ea124e..b874923a 100644 --- a/simvue/eco.py +++ b/simvue/eco.py @@ -55,18 +55,14 @@ def out( logger.error(f"Error parsing timestamp: {e}") return - # Accumulate the emissions and energy consumed - self.emissions += total.emissions # Add new emissions to the total - self.energy_consumed += ( - total.energy_consumed - ) # Add new energy consumed to the total - logger.debug("Logging CodeCarbon metrics") try: self._simvue_run.log_metrics( metrics={ - "codecarbon.emissions": total.emissions, - "codecarbon.energy_consumed": total.energy_consumed, + "codecarbon.total.emissions": total.emissions, + "codecarbon.total.energy_consumed": total.energy_consumed, + "codecarbon.delta.emissions": delta.emissions, + "codecarbon.delta.energy_consumed": delta.energy_consumed, }, step=self._metrics_step, timestamp=simvue_timestamp(_cc_timestamp), @@ -98,6 +94,7 @@ def __init__( super().__init__( project_name=project_name, measure_power_secs=metrics_interval, + api_call_interval=1, experiment_id=None, experiment_name=None, logging_logger=CodeCarbonOutput(simvue_run), diff --git a/tests/functional/test_run_class.py b/tests/functional/test_run_class.py index bb615de9..e4f6f55a 100644 --- a/tests/functional/test_run_class.py +++ b/tests/functional/test_run_class.py @@ -53,12 +53,23 @@ def test_run_with_emissions() -> None: with sv_run.Run() as run_created: run_created.init(retention_period="1 min") run_created.config(enable_emission_metrics=True, emission_metrics_interval=1) - time.sleep(60) + time.sleep(5) _run = RunObject(identifier=run_created.id) - _metric_names = [item[0] for item in _run.metrics] - assert 'codecarbon.energy_consumed' in _metric_names - assert 'codecarbon.emissions' in _metric_names - + _metric_names = [item[0] for item in _run.metrics] + client = sv_cl.Client() + for _metric in ["emissions", "energy_consumed"]: + _total_metric_name = f'codecarbon.total.{_metric}' + _delta_metric_name = f'codecarbon.delta.{_metric}' + assert _total_metric_name in _metric_names + assert _delta_metric_name in _metric_names + _metric_values = client.get_metric_values(metric_names=[_total_metric_name, _delta_metric_name], xaxis="time", output_format="dataframe", run_ids=[run_created.id]) + + # Check that total = previous total + latest delta + _total_values = _metric_values[_total_metric_name].tolist() + _delta_values = _metric_values[_delta_metric_name].tolist() + assert len(_total_values) > 1 + for i in range(1, len(_total_values)): + assert _total_values[i] == _total_values[i-1] + _delta_values[i] @pytest.mark.run @pytest.mark.parametrize("timestamp", (datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%f"), None), ids=("timestamp", "no_timestamp")) From a560a8241817b8f8e8c1881818cae0c034b9f4e4 Mon Sep 17 00:00:00 2001 From: Matt Field Date: Fri, 28 Feb 2025 09:39:15 +0000 Subject: [PATCH 07/16] Remove unused attributes --- simvue/eco.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/simvue/eco.py b/simvue/eco.py index b874923a..42caa528 100644 --- a/simvue/eco.py +++ b/simvue/eco.py @@ -18,8 +18,6 @@ class CodeCarbonOutput(cc_BaseOutput): def __init__(self, run: "Run") -> None: self._simvue_run = run self._metrics_step: int = 0 - self.emissions = 0.0 # To store the CO2 emissions data - self.energy_consumed = 0.0 # To store the energy consumed data def out( self, total: "EmissionsData", delta: "EmissionsData", meta_update: bool = True @@ -76,14 +74,6 @@ def out( def live_out(self, total: "EmissionsData", delta: "EmissionsData") -> None: self.out(total, delta, meta_update=False) - def get_total_emissions(self) -> float: - """Getter for the total accumulated emissions""" - return self.emissions - - def get_total_energy_consumed(self) -> float: - """Getter for the total accumulated energy consumed""" - return self.energy_consumed - class SimvueEmissionsTracker(EmissionsTracker): def __init__( From bae99c44f9d3adcce63fa426bfaee4958a5a2793 Mon Sep 17 00:00:00 2001 From: Matt Field Date: Fri, 28 Feb 2025 11:38:06 +0000 Subject: [PATCH 08/16] Added support for codecarbon in offline mode --- simvue/config/parameters.py | 1 + simvue/eco.py | 31 +++++++++++++++++++++++- simvue/run.py | 48 ++++++++++++++++++++++++++++++------- 3 files changed, 70 insertions(+), 10 deletions(-) diff --git a/simvue/config/parameters.py b/simvue/config/parameters.py index c6d65c93..9e0b38bc 100644 --- a/simvue/config/parameters.py +++ b/simvue/config/parameters.py @@ -48,6 +48,7 @@ def check_token(cls, v: typing.Any) -> str | None: class OfflineSpecifications(pydantic.BaseModel): cache: pathlib.Path | None = None + country_iso_code: str | None = None class MetricsSpecifications(pydantic.BaseModel): diff --git a/simvue/eco.py b/simvue/eco.py index 42caa528..50db508f 100644 --- a/simvue/eco.py +++ b/simvue/eco.py @@ -2,7 +2,7 @@ import logging import datetime -from codecarbon import EmissionsTracker +from codecarbon import EmissionsTracker, OfflineEmissionsTracker from codecarbon.output import BaseOutput as cc_BaseOutput from simvue.utilities import simvue_timestamp @@ -101,3 +101,32 @@ def post_init(self) -> None: self._set_from_conf(self._simvue_run._id, "experiment_id") self._set_from_conf(self._simvue_run._name, "experiment_name") self.start() + + +class OfflineSimvueEmissionsTracker(OfflineEmissionsTracker): + def __init__( + self, project_name: str, simvue_run: "Run", metrics_interval: int + ) -> None: + self._simvue_run = simvue_run + logger.setLevel(logging.ERROR) + super().__init__( + country_iso_code=simvue_run._user_config.offline.country_iso_code, + project_name=project_name, + measure_power_secs=metrics_interval, + api_call_interval=1, + experiment_id=None, + experiment_name=None, + logging_logger=CodeCarbonOutput(simvue_run), + save_to_logger=True, + allow_multiple_runs=True, + log_level="error", + ) + + def set_measure_interval(self, interval: int) -> None: + """Set the measure interval""" + self._set_from_conf(interval, "measure_power_secs") + + def post_init(self) -> None: + self._set_from_conf(self._simvue_run._id, "experiment_id") + self._set_from_conf(self._simvue_run._name, "experiment_name") + self.start() diff --git a/simvue/run.py b/simvue/run.py index ec574013..c2448026 100644 --- a/simvue/run.py +++ b/simvue/run.py @@ -43,7 +43,7 @@ from .models import FOLDER_REGEX, NAME_REGEX, MetricKeyString from .system import get_system from .metadata import git_info, environment -from .eco import SimvueEmissionsTracker +from .eco import SimvueEmissionsTracker, OfflineSimvueEmissionsTracker from .utilities import ( skip_if_failed, validate_timestamp, @@ -208,11 +208,28 @@ def __init__( ) else self._user_config.metrics.emission_metrics_interval ) - self._emissions_tracker: SimvueEmissionsTracker | None = ( - SimvueEmissionsTracker("simvue", self, self._emission_metrics_interval) - if self._user_config.metrics.enable_emission_metrics - else None - ) + if mode == "offline": + if ( + self._user_config.metrics.enable_emission_metrics + and not self._user_config.offline.country_iso_code + ): + raise ValueError( + "Country ISO code must be provided if tracking emissions metrics in offline mode." + ) + + self._emissions_tracker: OfflineSimvueEmissionsTracker | None = ( + OfflineSimvueEmissionsTracker( + "simvue", self, self._emission_metrics_interval + ) + if self._user_config.metrics.enable_emission_metrics + else None + ) + else: + self._emissions_tracker: SimvueEmissionsTracker | None = ( + SimvueEmissionsTracker("simvue", self, self._emission_metrics_interval) + if self._user_config.metrics.enable_emission_metrics + else None + ) def __enter__(self) -> Self: return self @@ -1028,9 +1045,22 @@ def config( self._emission_metrics_interval = emission_metrics_interval if enable_emission_metrics: - self._emissions_tracker = SimvueEmissionsTracker( - "simvue", self, self._emission_metrics_interval - ) + if self._user_config.run.mode == "offline": + if not self._user_config.offline.country_iso_code: + self._error( + "Country ISO code must be provided if tracking emissions metrics in offline mode." + ) + self._emissions_tracker: OfflineSimvueEmissionsTracker = ( + OfflineSimvueEmissionsTracker( + "simvue", self, self._emission_metrics_interval + ) + ) + else: + self._emissions_tracker: SimvueEmissionsTracker = ( + SimvueEmissionsTracker( + "simvue", self, self._emission_metrics_interval + ) + ) # If the main Run API object is initialised the run is active # hence the tracker should start too From 0a6fd3807fec52d33378d6c4d82b71085302d97c Mon Sep 17 00:00:00 2001 From: Matt Field Date: Fri, 28 Feb 2025 12:03:48 +0000 Subject: [PATCH 09/16] Added PID to sender lock file so that it can recover if previous sender crashed --- simvue/sender.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/simvue/sender.py b/simvue/sender.py index 8a552f6e..d747dc9b 100644 --- a/simvue/sender.py +++ b/simvue/sender.py @@ -11,6 +11,7 @@ from concurrent.futures import ThreadPoolExecutor import threading import requests +import psutil from simvue.config.user import SimvueConfiguration import simvue.api.objects @@ -167,13 +168,14 @@ def sender( cache_dir = cache_dir or _user_config.offline.cache cache_dir.joinpath("server_ids").mkdir(parents=True, exist_ok=True) + _lock_path = cache_dir.joinpath("sender.lock") # Check that no other sender is already currently running... - if cache_dir.joinpath("sender.lock").exists(): + if _lock_path.exists() and psutil.pid_exists(int(_lock_path.read_text())): raise RuntimeError("A sender is already running for this cache!") # Create lock file to prevent other senders running while this one isn't finished - cache_dir.joinpath("sender.lock").touch() + _lock_path.write_text(str(psutil.Process().pid)) _id_mapping: dict[str, str] = { file_path.name.split(".")[0]: file_path.read_text() @@ -233,5 +235,5 @@ def sender( _heartbeat_files, ) # Remove lock file to allow another sender to start in the future - cache_dir.joinpath("sender.lock").unlink() + _lock_path.unlink() return _id_mapping From ddea4e41f6352febe3867d6d3dad413b2dbc2c29 Mon Sep 17 00:00:00 2001 From: Matt Field Date: Fri, 28 Feb 2025 13:49:39 +0000 Subject: [PATCH 10/16] Add accept gxip encoding --- simvue/api/objects/base.py | 1 + simvue/client.py | 3 ++- simvue/run.py | 3 ++- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/simvue/api/objects/base.py b/simvue/api/objects/base.py index c5930995..84de6e1e 100644 --- a/simvue/api/objects/base.py +++ b/simvue/api/objects/base.py @@ -171,6 +171,7 @@ def __init__( { "Authorization": f"Bearer {self._user_config.server.token.get_secret_value()}", "User-Agent": _user_agent or f"Simvue Python client {__version__}", + "Accept-Encoding": "gzip", } if not self._offline else {} diff --git a/simvue/client.py b/simvue/client.py index 4bc7888d..453c1f75 100644 --- a/simvue/client.py +++ b/simvue/client.py @@ -84,7 +84,8 @@ def __init__( logger.warning(f"No {label} specified") self._headers: dict[str, str] = { - "Authorization": f"Bearer {self._user_config.server.token.get_secret_value()}" + "Authorization": f"Bearer {self._user_config.server.token.get_secret_value()}", + "Accept-Encoding": "gzip", } @prettify_pydantic diff --git a/simvue/run.py b/simvue/run.py index ec574013..823ed88d 100644 --- a/simvue/run.py +++ b/simvue/run.py @@ -186,7 +186,8 @@ def __init__( ) self._headers: dict[str, str] = ( { - "Authorization": f"Bearer {self._user_config.server.token.get_secret_value()}" + "Authorization": f"Bearer {self._user_config.server.token.get_secret_value()}", + "Accept-Encoding": "gzip", } if mode != "offline" else {} From 285a813d8f5d9e09313956c7e32f2dae2f6e86e8 Mon Sep 17 00:00:00 2001 From: Matt Field Date: Fri, 28 Feb 2025 16:44:47 +0000 Subject: [PATCH 11/16] Dont log process alerts if already set by user --- simvue/executor.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/simvue/executor.py b/simvue/executor.py index df1dee37..ac1b949a 100644 --- a/simvue/executor.py +++ b/simvue/executor.py @@ -22,6 +22,7 @@ import pathlib import time import typing +from simvue.api.objects.alert.fetch import Alert if typing.TYPE_CHECKING: import simvue @@ -342,17 +343,26 @@ def _update_alerts(self) -> None: # allowing the executor to finish (and as such the run instance to exit) _wait_limit: float = 1 for proc_id, process in self._processes.items(): + # We don't want to override the user's setting for the alert status + # This is so that if a process incorrectly reports its return code, + # the user can manually set the correct status depending on logs etc. + _alert = Alert(identifier=self._alert_ids[proc_id]) + _is_set = _alert.get_status(run_id=self._id) + if process.returncode != 0: # If the process fails then purge the dispatcher event queue # and ensure that the stderr event is sent before the run closes if self._runner._dispatcher: self._runner._dispatcher.purge() - - self._runner.log_alert( - identifier=self._alert_ids[proc_id], state="critical" - ) + if not _is_set: + self._runner.log_alert( + identifier=self._alert_ids[proc_id], state="critical" + ) else: - self._runner.log_alert(identifier=self._alert_ids[proc_id], state="ok") + if not _is_set: + self._runner.log_alert( + identifier=self._alert_ids[proc_id], state="ok" + ) _current_time: float = 0 while ( From ce19ee4701e279ef14b8de9a8b6ed5d8726954f1 Mon Sep 17 00:00:00 2001 From: Matt Field Date: Fri, 28 Feb 2025 16:47:32 +0000 Subject: [PATCH 12/16] Fix id in executor --- simvue/executor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/simvue/executor.py b/simvue/executor.py index ac1b949a..e7dfcf10 100644 --- a/simvue/executor.py +++ b/simvue/executor.py @@ -347,7 +347,7 @@ def _update_alerts(self) -> None: # This is so that if a process incorrectly reports its return code, # the user can manually set the correct status depending on logs etc. _alert = Alert(identifier=self._alert_ids[proc_id]) - _is_set = _alert.get_status(run_id=self._id) + _is_set = _alert.get_status(run_id=self._runner._id) if process.returncode != 0: # If the process fails then purge the dispatcher event queue From 1ddfb21f2ab7e1a22f8afcad329a8a5652c49c36 Mon Sep 17 00:00:00 2001 From: Matt Field Date: Mon, 3 Mar 2025 09:41:00 +0000 Subject: [PATCH 13/16] =?UTF-8?q?Throw=20error=20if=20trying=20to=20add=20?= =?UTF-8?q?/=20log=20alerts=20by=20name=20in=20offline=20mode=C2=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- simvue/run.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/simvue/run.py b/simvue/run.py index 823ed88d..445d8c5c 100644 --- a/simvue/run.py +++ b/simvue/run.py @@ -1657,6 +1657,11 @@ def add_alerts( names = names or [] if names and not ids: + if self._user_config.run.mode == "offline": + self._error( + "Cannot retrieve alerts based on names in offline mode - please use IDs instead." + ) + return False try: if alerts := Alert.get(offline=self._user_config.run.mode == "offline"): ids += [id for id, alert in alerts if alert.name in names] @@ -1957,6 +1962,12 @@ def log_alert( self._error("Please specify alert to update either by ID or by name.") return False + if self._user_config.run.mode == "offline": + self._error( + "Cannot retrieve alerts based on names in offline mode - please use IDs instead." + ) + return False + if name: try: if alerts := Alert.get(offline=self._user_config.run.mode == "offline"): From 8f797c1002f1e70614a43e916959f374a9d7d57b Mon Sep 17 00:00:00 2001 From: Matt Field Date: Mon, 3 Mar 2025 14:13:37 +0000 Subject: [PATCH 14/16] Fix .processes so that it reuses the same objects --- simvue/executor.py | 46 ++++++++++++++++++++++++++++++++++++++-------- simvue/run.py | 37 +++++++++++++++++++++++++++---------- 2 files changed, 65 insertions(+), 18 deletions(-) diff --git a/simvue/executor.py b/simvue/executor.py index e7dfcf10..951e3c16 100644 --- a/simvue/executor.py +++ b/simvue/executor.py @@ -22,7 +22,7 @@ import pathlib import time import typing -from simvue.api.objects.alert.fetch import Alert +from simvue.api.objects.alert.user import UserAlert if typing.TYPE_CHECKING: import simvue @@ -114,6 +114,7 @@ def __init__(self, simvue_runner: "simvue.Run", keep_logs: bool = True) -> None: self._alert_ids: dict[str, str] = {} self.command_str: dict[str, str] = {} self._processes: dict[str, subprocess.Popen] = {} + self._all_processes: list[psutil.Process] = [] def std_out(self, process_id: str) -> str | None: if not os.path.exists(out_file := f"{self._runner.name}_{process_id}.out"): @@ -271,19 +272,48 @@ def processes(self) -> list[psutil.Process]: if not self._processes: return [] - _all_processes: list[psutil.Process] = [] + _current_processes: list[psutil.Process] = [] for process in self._processes.values(): with contextlib.suppress(psutil.NoSuchProcess): - _all_processes.append(psutil.Process(process.pid)) + _current_processes.append(psutil.Process(process.pid)) with contextlib.suppress(psutil.NoSuchProcess, psutil.ZombieProcess): - for process in _all_processes: + for process in _current_processes: for child in process.children(recursive=True): - if child not in _all_processes: - _all_processes.append(child) + if child not in _current_processes: + _current_processes.append(child) - return list(set(_all_processes)) + _current_pids = set([_process.pid for _process in _current_processes]) + _previous_pids = set([_process.pid for _process in self._all_processes]) + + # Find processes which used to exist, which are no longer running + _expired_process_pids = _previous_pids - _current_pids + + # Remove these processes from list of all processes + self._all_processes = [ + _process + for _process in self._all_processes + if _process.pid not in _expired_process_pids + ] + + # Find new processes + _new_process_pids = _current_pids - _previous_pids + _new_processes = [ + _process + for _process in _current_processes + if _process.pid in _new_process_pids + ] + + # Get CPU usage stats for each of those new processes, so that next time it's measured by the heartbeat the value is accurate + if _new_processes: + [_process.cpu_percent() for _process in _new_processes] + time.sleep(0.1) + + # Add these to the list of all processes + self._all_processes += _new_processes + + return self._all_processes @property def success(self) -> int: @@ -346,7 +376,7 @@ def _update_alerts(self) -> None: # We don't want to override the user's setting for the alert status # This is so that if a process incorrectly reports its return code, # the user can manually set the correct status depending on logs etc. - _alert = Alert(identifier=self._alert_ids[proc_id]) + _alert = UserAlert(identifier=self._alert_ids[proc_id]) _is_set = _alert.get_status(run_id=self._runner._id) if process.returncode != 0: diff --git a/simvue/run.py b/simvue/run.py index 445d8c5c..645d3c3e 100644 --- a/simvue/run.py +++ b/simvue/run.py @@ -297,16 +297,11 @@ def processes(self) -> list[psutil.Process]: return process_list process_list += [self._parent_process] - - # Attach child processes relating to the process set by set_pid - with contextlib.suppress(psutil.NoSuchProcess, psutil.ZombieProcess): - for child in self._parent_process.children(recursive=True): - if child not in process_list: - process_list.append(child) + process_list += self._child_processes return list(set(process_list)) - def _get_sysinfo(self) -> dict[str, typing.Any]: + def _get_sysinfo(self, interval: float | None = None) -> dict[str, typing.Any]: """Retrieve system administration Parameters @@ -320,7 +315,7 @@ def _get_sysinfo(self) -> dict[str, typing.Any]: retrieved system specifications """ processes = self.processes - cpu = get_process_cpu(processes, interval=0.1) + cpu = get_process_cpu(processes, interval=interval) memory = get_process_memory(processes) gpu = get_gpu_metrics(processes) data: dict[str, typing.Any] = {} @@ -359,7 +354,9 @@ def _heartbeat( last_res_metric_call = time.time() if self._resources_metrics_interval: - self._add_metrics_to_dispatch(self._get_sysinfo(), join_on_fail=False) + self._add_metrics_to_dispatch( + self._get_sysinfo(interval=1), join_on_fail=False + ) while not heartbeat_trigger.is_set(): time.sleep(0.1) @@ -490,6 +487,9 @@ def _start(self, reconnect: bool = False) -> bool: self._pid = os.getpid() self._parent_process = psutil.Process(self._pid) if self._pid else None + self._child_processes = ( + self._get_child_processes() if self._parent_process else None + ) self._shutdown_event = threading.Event() self._heartbeat_termination_trigger = threading.Event() @@ -904,6 +904,16 @@ def kill_all_processes(self) -> None: ) self._executor.kill_all() + def _get_child_processes(self) -> list[psutil.Process]: + _process_list = [] + # Attach child processes relating to the process set by set_pid + with contextlib.suppress(psutil.NoSuchProcess, psutil.ZombieProcess): + for child in self._parent_process.children(recursive=True): + if child not in _process_list: + _process_list.append(child) + + return list(set(_process_list)) + @property def executor(self) -> Executor: """Return the executor for this run""" @@ -959,6 +969,13 @@ def set_pid(self, pid: int) -> None: """ self._pid = pid self._parent_process = psutil.Process(self._pid) + self._child_processes = self._get_child_processes() + # Get CPU usage stats for each of those new processes, so that next time it's measured by the heartbeat the value is accurate + [ + _process.cpu_percent() + for _process in self._child_processes + [self._parent_process] + ] + time.sleep(0.1) @skip_if_failed("_aborted", "_suppress_errors", False) @pydantic.validate_call @@ -1962,7 +1979,7 @@ def log_alert( self._error("Please specify alert to update either by ID or by name.") return False - if self._user_config.run.mode == "offline": + if name and self._user_config.run.mode == "offline": self._error( "Cannot retrieve alerts based on names in offline mode - please use IDs instead." ) From 009333e1b5a697ab8bbc22366df36c5e63e1d193 Mon Sep 17 00:00:00 2001 From: Matt Field Date: Mon, 3 Mar 2025 14:49:42 +0000 Subject: [PATCH 15/16] Add step to resource metrics --- simvue/run.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/simvue/run.py b/simvue/run.py index 645d3c3e..74dea3ab 100644 --- a/simvue/run.py +++ b/simvue/run.py @@ -355,8 +355,9 @@ def _heartbeat( if self._resources_metrics_interval: self._add_metrics_to_dispatch( - self._get_sysinfo(interval=1), join_on_fail=False + self._get_sysinfo(interval=1), join_on_fail=False, step=0 ) + res_step = 1 while not heartbeat_trigger.is_set(): time.sleep(0.1) @@ -371,9 +372,10 @@ def _heartbeat( # join would be called on this thread and a thread cannot # join itself! self._add_metrics_to_dispatch( - self._get_sysinfo(), join_on_fail=False + self._get_sysinfo(), join_on_fail=False, step=res_step ) last_res_metric_call = res_time + res_step += 1 if time.time() - last_heartbeat < self._heartbeat_interval: continue From 4c32dd8fb6dd9287df3d3ffd168f6de6b9c8c700 Mon Sep 17 00:00:00 2001 From: Matt Field Date: Tue, 4 Mar 2025 09:01:07 +0000 Subject: [PATCH 16/16] Update changelog and citation --- CHANGELOG.md | 10 ++++++++++ CITATION.cff | 6 +++--- pyproject.toml | 2 +- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d96c2000..6cd8fbb7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,15 @@ # Change log +## [v2.0.0-alpha3](https://github.com/simvue-io/client/releases/tag/v2.0.0a3) - 2025-03-04 +* Updated codecarbon to work with new API +* Codecarbon now works with offline mode +* Codecarbon metadata dict is now nested +* Add PID to sender lock file so it can recover from crashes +* Add accept Gzip encoding +* Fixed list of processes to add / remove from existing list of objects +* Add step to resource metrics +* Fix bug where process user alerts should not be overridden if manually set by the user + ## [v2.0.0-alpha2](https://github.com/simvue-io/client/releases/tag/v2.0.0a2) - 2025-02-27 * Removed 'no config file' and 'unstaged changes' warnings from Offline mode as they do not apply * Made `staging_check` not apply in Offline mode diff --git a/CITATION.cff b/CITATION.cff index d2090269..3fc71db0 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -42,9 +42,9 @@ keywords: - alerting - simulation license: Apache-2.0 -commit: 83b9144abd2092d4be304bf742d72a249ad1d8ff -version: 2.0.0a2 -date-released: '2025-02-27' +commit: 64ff8a5344232d44fc7da5b6ff601d3023497977 +version: 2.0.0a3 +date-released: '2025-03-04' references: - title: mlco2/codecarbon version: v2.8.2 diff --git a/pyproject.toml b/pyproject.toml index 73105436..7b03d9e5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "simvue" -version = "2.0.0a2" +version = "2.0.0a3" description = "Simulation tracking and monitoring" authors = [ {name = "Simvue Development Team", email = "info@simvue.io"}