From 7274e149767ba324349c0ba319a3e5ac01ecb7d4 Mon Sep 17 00:00:00 2001
From: AbyAbraham21 <95077500+AbyAbraham21@users.noreply.github.com>
Date: Wed, 22 Jan 2025 19:59:04 +0000
Subject: [PATCH 01/16] Update eco.py
Changes to the CodeCarbon API means that the output values will need to be computed in a different manner. Delta emissions will need to be derived if needed.
---
simvue/eco.py | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/simvue/eco.py b/simvue/eco.py
index 6ff7023b..4dd7f9ba 100644
--- a/simvue/eco.py
+++ b/simvue/eco.py
@@ -34,10 +34,10 @@ def out(
logger.debug("Logging CodeCarbon metadata")
self._simvue_run.update_metadata(
{
- "codecarbon.country": total.country_name,
- "codecarbon.country_iso_code": total.country_iso_code,
- "codecarbon.region": total.region,
- "codecarbon.version": total.codecarbon_version,
+ "codecarbon.country": total.final_emissions_data.country_name,
+ "codecarbon.country_iso_code": total.final_emissions_data.country_iso_code,
+ "codecarbon.region": total.final_emissions_data.region,
+ "codecarbon.version": total.final_emissions_data.codecarbon_version,
}
)
@@ -48,10 +48,8 @@ def out(
logger.debug("Logging CodeCarbon metrics")
self._simvue_run.log_metrics(
metrics={
- "codecarbon.total.emissions": total.emissions,
- "codecarbon.total.energy_consumed": total.energy_consumed,
- "codecarbon.delta.emissions": delta.emissions,
- "codecarbon.delta.energy_consumed": delta.energy_consumed,
+ "codecarbon.total.emissions": total.final_emissions_data.emissions,
+ "codecarbon.total.energy_consumed": total.final_emissions_data.energy_consumed,
},
step=self._metrics_step,
timestamp=simvue_timestamp(_cc_timestamp),
From 1827fedf3721f8c64aa0ce4061067dd40e224a44 Mon Sep 17 00:00:00 2001
From: AbyAbraham21
Date: Thu, 27 Feb 2025 10:22:36 +0000
Subject: [PATCH 02/16] Adding Code carbon changes
---
pyproject.toml | 2 +-
simvue/eco.py | 68 +++++++++++++++++++++---------
tests/functional/test_run_class.py | 18 ++++----
3 files changed, 58 insertions(+), 30 deletions(-)
diff --git a/pyproject.toml b/pyproject.toml
index 6837df44..785daf76 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,7 +51,7 @@ gitpython = "^3.1.43"
humanfriendly = "^10.0"
tabulate = "^0.9.0"
randomname = "^0.2.1"
-codecarbon = "^2.7.1"
+codecarbon = "^2.8.1"
numpy = "^2.1.2"
flatdict = "^4.0.1"
semver = "^3.0.2"
diff --git a/simvue/eco.py b/simvue/eco.py
index 4dd7f9ba..a9050848 100644
--- a/simvue/eco.py
+++ b/simvue/eco.py
@@ -1,9 +1,9 @@
import typing
import logging
import datetime
-
+
from codecarbon import EmissionsTracker
-from codecarbon.output_methods.base_output import BaseOutput as cc_BaseOutput
+from codecarbon.output import BaseOutput as cc_BaseOutput
from simvue.utilities import simvue_timestamp
if typing.TYPE_CHECKING:
@@ -18,6 +18,8 @@ class CodeCarbonOutput(cc_BaseOutput):
def __init__(self, run: "Run") -> None:
self._simvue_run = run
self._metrics_step: int = 0
+ self.emissions = 0.0 # To store the CO2 emissions data
+ self.energy_consumed = 0.0 # To store the energy consumed data
def out(
self, total: "EmissionsData", delta: "EmissionsData", meta_update: bool = True
@@ -32,33 +34,59 @@ def out(
if meta_update:
logger.debug("Logging CodeCarbon metadata")
- self._simvue_run.update_metadata(
- {
- "codecarbon.country": total.final_emissions_data.country_name,
- "codecarbon.country_iso_code": total.final_emissions_data.country_iso_code,
- "codecarbon.region": total.final_emissions_data.region,
- "codecarbon.version": total.final_emissions_data.codecarbon_version,
- }
+ try:
+ self._simvue_run.update_metadata(
+ {
+ "codecarbon.country": total.country_name,
+ "codecarbon.country_iso_code": total.country_iso_code,
+ "codecarbon.region": total.region,
+ "codecarbon.version": total.codecarbon_version,
+ }
+ )
+ except AttributeError as e:
+ logger.error(f"Failed to update metadata: {e}")
+ try:
+ _cc_timestamp = datetime.datetime.strptime(
+ total.timestamp, "%Y-%m-%dT%H:%M:%S"
)
+ except ValueError as e:
+ logger.error(f"Error parsing timestamp: {e}")
+ return
- _cc_timestamp: datetime.datetime = datetime.datetime.strptime(
- total.timestamp, "%Y-%m-%dT%H:%M:%S"
- )
+ # Accumulate the emissions and energy consumed
+ self.emissions += total.emissions # Add new emissions to the total
+ self.energy_consumed += total.energy_consumed # Add new energy consumed to the total
logger.debug("Logging CodeCarbon metrics")
- self._simvue_run.log_metrics(
- metrics={
- "codecarbon.total.emissions": total.final_emissions_data.emissions,
- "codecarbon.total.energy_consumed": total.final_emissions_data.energy_consumed,
- },
- step=self._metrics_step,
- timestamp=simvue_timestamp(_cc_timestamp),
- )
+ print("total.emissions=", self.emissions)
+ print("total.energy_consumed=", self.energy_consumed)
+ print("total.timestamp=",total.timestamp)
+ print("_cc_timestamp=",_cc_timestamp)
+ try:
+ self._simvue_run.log_metrics(
+ metrics={
+ "codecarbon.emissions": total.emissions,
+ "codecarbon.energy_consumed": total.energy_consumed,
+ },
+ step=self._metrics_step,
+ timestamp=simvue_timestamp(_cc_timestamp),
+ )
+ except ArithmeticError as e:
+ logger.error(f"Failed to log metrics: {e}")
+ return
+
self._metrics_step += 1
def live_out(self, total: "EmissionsData", delta: "EmissionsData") -> None:
self.out(total, delta, meta_update=False)
+ def get_total_emissions(self) -> float:
+ """Getter for the total accumulated emissions"""
+ return self.emissions
+
+ def get_total_energy_consumed(self) -> float:
+ """Getter for the total accumulated energy consumed"""
+ return self.energy_consumed
class SimvueEmissionsTracker(EmissionsTracker):
def __init__(
diff --git a/tests/functional/test_run_class.py b/tests/functional/test_run_class.py
index 304fbad9..8ef02102 100644
--- a/tests/functional/test_run_class.py
+++ b/tests/functional/test_run_class.py
@@ -47,15 +47,15 @@ def test_check_run_initialised_decorator() -> None:
assert "Simvue Run must be initialised" in str(e.value)
-# @pytest.mark.run
-# def test_run_with_emissions() -> None:
-# with sv_run.Run() as run_created:
-# run_created.init(retention_period="1 min")
-# run_created.config(enable_emission_metrics=True, emission_metrics_interval=1)
-# time.sleep(5)
-# _run = RunObject(identifier=run_created.id)
-# import pdb; pdb.set_trace()
-# assert list(_run.metrics)
+@pytest.mark.run
+def test_run_with_emissions() -> None:
+ with sv_run.Run() as run_created:
+ run_created.init(retention_period="1 min")
+ run_created.config(enable_emission_metrics=True, emission_metrics_interval=1)
+ time.sleep(5)
+ _run = RunObject(identifier=run_created.id)
+ import pdb; pdb.set_trace()
+ assert list(_run.metrics)
@pytest.mark.run
From ff603c4d11551af9a5666d3ae44210f95a627777 Mon Sep 17 00:00:00 2001
From: Matt Field
Date: Thu, 27 Feb 2025 10:48:02 +0000
Subject: [PATCH 03/16] Add nested metadata dict
---
simvue/eco.py | 23 ++++++++++++-----------
tests/functional/test_run_class.py | 6 ++++--
2 files changed, 16 insertions(+), 13 deletions(-)
diff --git a/simvue/eco.py b/simvue/eco.py
index a9050848..20ea124e 100644
--- a/simvue/eco.py
+++ b/simvue/eco.py
@@ -1,7 +1,7 @@
import typing
import logging
import datetime
-
+
from codecarbon import EmissionsTracker
from codecarbon.output import BaseOutput as cc_BaseOutput
from simvue.utilities import simvue_timestamp
@@ -37,10 +37,12 @@ def out(
try:
self._simvue_run.update_metadata(
{
- "codecarbon.country": total.country_name,
- "codecarbon.country_iso_code": total.country_iso_code,
- "codecarbon.region": total.region,
- "codecarbon.version": total.codecarbon_version,
+ "codecarbon": {
+ "country": total.country_name,
+ "country_iso_code": total.country_iso_code,
+ "region": total.region,
+ "version": total.codecarbon_version,
+ }
}
)
except AttributeError as e:
@@ -55,13 +57,11 @@ def out(
# Accumulate the emissions and energy consumed
self.emissions += total.emissions # Add new emissions to the total
- self.energy_consumed += total.energy_consumed # Add new energy consumed to the total
+ self.energy_consumed += (
+ total.energy_consumed
+ ) # Add new energy consumed to the total
logger.debug("Logging CodeCarbon metrics")
- print("total.emissions=", self.emissions)
- print("total.energy_consumed=", self.energy_consumed)
- print("total.timestamp=",total.timestamp)
- print("_cc_timestamp=",_cc_timestamp)
try:
self._simvue_run.log_metrics(
metrics={
@@ -74,7 +74,7 @@ def out(
except ArithmeticError as e:
logger.error(f"Failed to log metrics: {e}")
return
-
+
self._metrics_step += 1
def live_out(self, total: "EmissionsData", delta: "EmissionsData") -> None:
@@ -88,6 +88,7 @@ def get_total_energy_consumed(self) -> float:
"""Getter for the total accumulated energy consumed"""
return self.energy_consumed
+
class SimvueEmissionsTracker(EmissionsTracker):
def __init__(
self, project_name: str, simvue_run: "Run", metrics_interval: int
diff --git a/tests/functional/test_run_class.py b/tests/functional/test_run_class.py
index e3654e06..bb615de9 100644
--- a/tests/functional/test_run_class.py
+++ b/tests/functional/test_run_class.py
@@ -53,9 +53,11 @@ def test_run_with_emissions() -> None:
with sv_run.Run() as run_created:
run_created.init(retention_period="1 min")
run_created.config(enable_emission_metrics=True, emission_metrics_interval=1)
- time.sleep(5)
+ time.sleep(60)
_run = RunObject(identifier=run_created.id)
- assert list(_run.metrics)
+ _metric_names = [item[0] for item in _run.metrics]
+ assert 'codecarbon.energy_consumed' in _metric_names
+ assert 'codecarbon.emissions' in _metric_names
@pytest.mark.run
From fcaf66108aa139d29cdf83acec847ab6a312f0c2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kristian=20Zar=C4=99bski?=
<64790965+kzscisoft@users.noreply.github.com>
Date: Fri, 28 Feb 2025 08:08:35 +0000
Subject: [PATCH 04/16] Update cover image towards PyPi published README
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 974a361e..6c27cc20 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@
-
+
From 4c50a630134a600c6fe914d4e795eebd6b1c442b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kristian=20Zar=C4=99bski?=
Date: Fri, 28 Feb 2025 08:13:13 +0000
Subject: [PATCH 05/16] Update PyPi metadata in pyproject.toml
---
pyproject.toml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pyproject.toml b/pyproject.toml
index c49ce77c..73105436 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,10 +16,10 @@ classifiers = [
"Operating System :: Unix",
"Operating System :: Microsoft :: Windows",
"Programming Language :: Python :: 3",
- "Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
+ "Programming Language :: Python :: 3.13",
"Topic :: Scientific/Engineering",
"Topic :: System :: Monitoring",
"Topic :: Utilities",
From ec5052559668cf0fca124bf380537a50ae6d6a34 Mon Sep 17 00:00:00 2001
From: Matt Field
Date: Fri, 28 Feb 2025 09:33:07 +0000
Subject: [PATCH 06/16] Improved test, fixed api call time
---
simvue/eco.py | 13 +++++--------
tests/functional/test_run_class.py | 21 ++++++++++++++++-----
2 files changed, 21 insertions(+), 13 deletions(-)
diff --git a/simvue/eco.py b/simvue/eco.py
index 20ea124e..b874923a 100644
--- a/simvue/eco.py
+++ b/simvue/eco.py
@@ -55,18 +55,14 @@ def out(
logger.error(f"Error parsing timestamp: {e}")
return
- # Accumulate the emissions and energy consumed
- self.emissions += total.emissions # Add new emissions to the total
- self.energy_consumed += (
- total.energy_consumed
- ) # Add new energy consumed to the total
-
logger.debug("Logging CodeCarbon metrics")
try:
self._simvue_run.log_metrics(
metrics={
- "codecarbon.emissions": total.emissions,
- "codecarbon.energy_consumed": total.energy_consumed,
+ "codecarbon.total.emissions": total.emissions,
+ "codecarbon.total.energy_consumed": total.energy_consumed,
+ "codecarbon.delta.emissions": delta.emissions,
+ "codecarbon.delta.energy_consumed": delta.energy_consumed,
},
step=self._metrics_step,
timestamp=simvue_timestamp(_cc_timestamp),
@@ -98,6 +94,7 @@ def __init__(
super().__init__(
project_name=project_name,
measure_power_secs=metrics_interval,
+ api_call_interval=1,
experiment_id=None,
experiment_name=None,
logging_logger=CodeCarbonOutput(simvue_run),
diff --git a/tests/functional/test_run_class.py b/tests/functional/test_run_class.py
index bb615de9..e4f6f55a 100644
--- a/tests/functional/test_run_class.py
+++ b/tests/functional/test_run_class.py
@@ -53,12 +53,23 @@ def test_run_with_emissions() -> None:
with sv_run.Run() as run_created:
run_created.init(retention_period="1 min")
run_created.config(enable_emission_metrics=True, emission_metrics_interval=1)
- time.sleep(60)
+ time.sleep(5)
_run = RunObject(identifier=run_created.id)
- _metric_names = [item[0] for item in _run.metrics]
- assert 'codecarbon.energy_consumed' in _metric_names
- assert 'codecarbon.emissions' in _metric_names
-
+ _metric_names = [item[0] for item in _run.metrics]
+ client = sv_cl.Client()
+ for _metric in ["emissions", "energy_consumed"]:
+ _total_metric_name = f'codecarbon.total.{_metric}'
+ _delta_metric_name = f'codecarbon.delta.{_metric}'
+ assert _total_metric_name in _metric_names
+ assert _delta_metric_name in _metric_names
+ _metric_values = client.get_metric_values(metric_names=[_total_metric_name, _delta_metric_name], xaxis="time", output_format="dataframe", run_ids=[run_created.id])
+
+ # Check that total = previous total + latest delta
+ _total_values = _metric_values[_total_metric_name].tolist()
+ _delta_values = _metric_values[_delta_metric_name].tolist()
+ assert len(_total_values) > 1
+ for i in range(1, len(_total_values)):
+ assert _total_values[i] == _total_values[i-1] + _delta_values[i]
@pytest.mark.run
@pytest.mark.parametrize("timestamp", (datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%f"), None), ids=("timestamp", "no_timestamp"))
From a560a8241817b8f8e8c1881818cae0c034b9f4e4 Mon Sep 17 00:00:00 2001
From: Matt Field
Date: Fri, 28 Feb 2025 09:39:15 +0000
Subject: [PATCH 07/16] Remove unused attributes
---
simvue/eco.py | 10 ----------
1 file changed, 10 deletions(-)
diff --git a/simvue/eco.py b/simvue/eco.py
index b874923a..42caa528 100644
--- a/simvue/eco.py
+++ b/simvue/eco.py
@@ -18,8 +18,6 @@ class CodeCarbonOutput(cc_BaseOutput):
def __init__(self, run: "Run") -> None:
self._simvue_run = run
self._metrics_step: int = 0
- self.emissions = 0.0 # To store the CO2 emissions data
- self.energy_consumed = 0.0 # To store the energy consumed data
def out(
self, total: "EmissionsData", delta: "EmissionsData", meta_update: bool = True
@@ -76,14 +74,6 @@ def out(
def live_out(self, total: "EmissionsData", delta: "EmissionsData") -> None:
self.out(total, delta, meta_update=False)
- def get_total_emissions(self) -> float:
- """Getter for the total accumulated emissions"""
- return self.emissions
-
- def get_total_energy_consumed(self) -> float:
- """Getter for the total accumulated energy consumed"""
- return self.energy_consumed
-
class SimvueEmissionsTracker(EmissionsTracker):
def __init__(
From bae99c44f9d3adcce63fa426bfaee4958a5a2793 Mon Sep 17 00:00:00 2001
From: Matt Field
Date: Fri, 28 Feb 2025 11:38:06 +0000
Subject: [PATCH 08/16] Added support for codecarbon in offline mode
---
simvue/config/parameters.py | 1 +
simvue/eco.py | 31 +++++++++++++++++++++++-
simvue/run.py | 48 ++++++++++++++++++++++++++++++-------
3 files changed, 70 insertions(+), 10 deletions(-)
diff --git a/simvue/config/parameters.py b/simvue/config/parameters.py
index c6d65c93..9e0b38bc 100644
--- a/simvue/config/parameters.py
+++ b/simvue/config/parameters.py
@@ -48,6 +48,7 @@ def check_token(cls, v: typing.Any) -> str | None:
class OfflineSpecifications(pydantic.BaseModel):
cache: pathlib.Path | None = None
+ country_iso_code: str | None = None
class MetricsSpecifications(pydantic.BaseModel):
diff --git a/simvue/eco.py b/simvue/eco.py
index 42caa528..50db508f 100644
--- a/simvue/eco.py
+++ b/simvue/eco.py
@@ -2,7 +2,7 @@
import logging
import datetime
-from codecarbon import EmissionsTracker
+from codecarbon import EmissionsTracker, OfflineEmissionsTracker
from codecarbon.output import BaseOutput as cc_BaseOutput
from simvue.utilities import simvue_timestamp
@@ -101,3 +101,32 @@ def post_init(self) -> None:
self._set_from_conf(self._simvue_run._id, "experiment_id")
self._set_from_conf(self._simvue_run._name, "experiment_name")
self.start()
+
+
+class OfflineSimvueEmissionsTracker(OfflineEmissionsTracker):
+ def __init__(
+ self, project_name: str, simvue_run: "Run", metrics_interval: int
+ ) -> None:
+ self._simvue_run = simvue_run
+ logger.setLevel(logging.ERROR)
+ super().__init__(
+ country_iso_code=simvue_run._user_config.offline.country_iso_code,
+ project_name=project_name,
+ measure_power_secs=metrics_interval,
+ api_call_interval=1,
+ experiment_id=None,
+ experiment_name=None,
+ logging_logger=CodeCarbonOutput(simvue_run),
+ save_to_logger=True,
+ allow_multiple_runs=True,
+ log_level="error",
+ )
+
+ def set_measure_interval(self, interval: int) -> None:
+ """Set the measure interval"""
+ self._set_from_conf(interval, "measure_power_secs")
+
+ def post_init(self) -> None:
+ self._set_from_conf(self._simvue_run._id, "experiment_id")
+ self._set_from_conf(self._simvue_run._name, "experiment_name")
+ self.start()
diff --git a/simvue/run.py b/simvue/run.py
index ec574013..c2448026 100644
--- a/simvue/run.py
+++ b/simvue/run.py
@@ -43,7 +43,7 @@
from .models import FOLDER_REGEX, NAME_REGEX, MetricKeyString
from .system import get_system
from .metadata import git_info, environment
-from .eco import SimvueEmissionsTracker
+from .eco import SimvueEmissionsTracker, OfflineSimvueEmissionsTracker
from .utilities import (
skip_if_failed,
validate_timestamp,
@@ -208,11 +208,28 @@ def __init__(
)
else self._user_config.metrics.emission_metrics_interval
)
- self._emissions_tracker: SimvueEmissionsTracker | None = (
- SimvueEmissionsTracker("simvue", self, self._emission_metrics_interval)
- if self._user_config.metrics.enable_emission_metrics
- else None
- )
+ if mode == "offline":
+ if (
+ self._user_config.metrics.enable_emission_metrics
+ and not self._user_config.offline.country_iso_code
+ ):
+ raise ValueError(
+ "Country ISO code must be provided if tracking emissions metrics in offline mode."
+ )
+
+ self._emissions_tracker: OfflineSimvueEmissionsTracker | None = (
+ OfflineSimvueEmissionsTracker(
+ "simvue", self, self._emission_metrics_interval
+ )
+ if self._user_config.metrics.enable_emission_metrics
+ else None
+ )
+ else:
+ self._emissions_tracker: SimvueEmissionsTracker | None = (
+ SimvueEmissionsTracker("simvue", self, self._emission_metrics_interval)
+ if self._user_config.metrics.enable_emission_metrics
+ else None
+ )
def __enter__(self) -> Self:
return self
@@ -1028,9 +1045,22 @@ def config(
self._emission_metrics_interval = emission_metrics_interval
if enable_emission_metrics:
- self._emissions_tracker = SimvueEmissionsTracker(
- "simvue", self, self._emission_metrics_interval
- )
+ if self._user_config.run.mode == "offline":
+ if not self._user_config.offline.country_iso_code:
+ self._error(
+ "Country ISO code must be provided if tracking emissions metrics in offline mode."
+ )
+ self._emissions_tracker: OfflineSimvueEmissionsTracker = (
+ OfflineSimvueEmissionsTracker(
+ "simvue", self, self._emission_metrics_interval
+ )
+ )
+ else:
+ self._emissions_tracker: SimvueEmissionsTracker = (
+ SimvueEmissionsTracker(
+ "simvue", self, self._emission_metrics_interval
+ )
+ )
# If the main Run API object is initialised the run is active
# hence the tracker should start too
From 0a6fd3807fec52d33378d6c4d82b71085302d97c Mon Sep 17 00:00:00 2001
From: Matt Field
Date: Fri, 28 Feb 2025 12:03:48 +0000
Subject: [PATCH 09/16] Added PID to sender lock file so that it can recover if
previous sender crashed
---
simvue/sender.py | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/simvue/sender.py b/simvue/sender.py
index 8a552f6e..d747dc9b 100644
--- a/simvue/sender.py
+++ b/simvue/sender.py
@@ -11,6 +11,7 @@
from concurrent.futures import ThreadPoolExecutor
import threading
import requests
+import psutil
from simvue.config.user import SimvueConfiguration
import simvue.api.objects
@@ -167,13 +168,14 @@ def sender(
cache_dir = cache_dir or _user_config.offline.cache
cache_dir.joinpath("server_ids").mkdir(parents=True, exist_ok=True)
+ _lock_path = cache_dir.joinpath("sender.lock")
# Check that no other sender is already currently running...
- if cache_dir.joinpath("sender.lock").exists():
+ if _lock_path.exists() and psutil.pid_exists(int(_lock_path.read_text())):
raise RuntimeError("A sender is already running for this cache!")
# Create lock file to prevent other senders running while this one isn't finished
- cache_dir.joinpath("sender.lock").touch()
+ _lock_path.write_text(str(psutil.Process().pid))
_id_mapping: dict[str, str] = {
file_path.name.split(".")[0]: file_path.read_text()
@@ -233,5 +235,5 @@ def sender(
_heartbeat_files,
)
# Remove lock file to allow another sender to start in the future
- cache_dir.joinpath("sender.lock").unlink()
+ _lock_path.unlink()
return _id_mapping
From ddea4e41f6352febe3867d6d3dad413b2dbc2c29 Mon Sep 17 00:00:00 2001
From: Matt Field
Date: Fri, 28 Feb 2025 13:49:39 +0000
Subject: [PATCH 10/16] Add accept gxip encoding
---
simvue/api/objects/base.py | 1 +
simvue/client.py | 3 ++-
simvue/run.py | 3 ++-
3 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/simvue/api/objects/base.py b/simvue/api/objects/base.py
index c5930995..84de6e1e 100644
--- a/simvue/api/objects/base.py
+++ b/simvue/api/objects/base.py
@@ -171,6 +171,7 @@ def __init__(
{
"Authorization": f"Bearer {self._user_config.server.token.get_secret_value()}",
"User-Agent": _user_agent or f"Simvue Python client {__version__}",
+ "Accept-Encoding": "gzip",
}
if not self._offline
else {}
diff --git a/simvue/client.py b/simvue/client.py
index 4bc7888d..453c1f75 100644
--- a/simvue/client.py
+++ b/simvue/client.py
@@ -84,7 +84,8 @@ def __init__(
logger.warning(f"No {label} specified")
self._headers: dict[str, str] = {
- "Authorization": f"Bearer {self._user_config.server.token.get_secret_value()}"
+ "Authorization": f"Bearer {self._user_config.server.token.get_secret_value()}",
+ "Accept-Encoding": "gzip",
}
@prettify_pydantic
diff --git a/simvue/run.py b/simvue/run.py
index ec574013..823ed88d 100644
--- a/simvue/run.py
+++ b/simvue/run.py
@@ -186,7 +186,8 @@ def __init__(
)
self._headers: dict[str, str] = (
{
- "Authorization": f"Bearer {self._user_config.server.token.get_secret_value()}"
+ "Authorization": f"Bearer {self._user_config.server.token.get_secret_value()}",
+ "Accept-Encoding": "gzip",
}
if mode != "offline"
else {}
From 285a813d8f5d9e09313956c7e32f2dae2f6e86e8 Mon Sep 17 00:00:00 2001
From: Matt Field
Date: Fri, 28 Feb 2025 16:44:47 +0000
Subject: [PATCH 11/16] Dont log process alerts if already set by user
---
simvue/executor.py | 20 +++++++++++++++-----
1 file changed, 15 insertions(+), 5 deletions(-)
diff --git a/simvue/executor.py b/simvue/executor.py
index df1dee37..ac1b949a 100644
--- a/simvue/executor.py
+++ b/simvue/executor.py
@@ -22,6 +22,7 @@
import pathlib
import time
import typing
+from simvue.api.objects.alert.fetch import Alert
if typing.TYPE_CHECKING:
import simvue
@@ -342,17 +343,26 @@ def _update_alerts(self) -> None:
# allowing the executor to finish (and as such the run instance to exit)
_wait_limit: float = 1
for proc_id, process in self._processes.items():
+ # We don't want to override the user's setting for the alert status
+ # This is so that if a process incorrectly reports its return code,
+ # the user can manually set the correct status depending on logs etc.
+ _alert = Alert(identifier=self._alert_ids[proc_id])
+ _is_set = _alert.get_status(run_id=self._id)
+
if process.returncode != 0:
# If the process fails then purge the dispatcher event queue
# and ensure that the stderr event is sent before the run closes
if self._runner._dispatcher:
self._runner._dispatcher.purge()
-
- self._runner.log_alert(
- identifier=self._alert_ids[proc_id], state="critical"
- )
+ if not _is_set:
+ self._runner.log_alert(
+ identifier=self._alert_ids[proc_id], state="critical"
+ )
else:
- self._runner.log_alert(identifier=self._alert_ids[proc_id], state="ok")
+ if not _is_set:
+ self._runner.log_alert(
+ identifier=self._alert_ids[proc_id], state="ok"
+ )
_current_time: float = 0
while (
From ce19ee4701e279ef14b8de9a8b6ed5d8726954f1 Mon Sep 17 00:00:00 2001
From: Matt Field
Date: Fri, 28 Feb 2025 16:47:32 +0000
Subject: [PATCH 12/16] Fix id in executor
---
simvue/executor.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/simvue/executor.py b/simvue/executor.py
index ac1b949a..e7dfcf10 100644
--- a/simvue/executor.py
+++ b/simvue/executor.py
@@ -347,7 +347,7 @@ def _update_alerts(self) -> None:
# This is so that if a process incorrectly reports its return code,
# the user can manually set the correct status depending on logs etc.
_alert = Alert(identifier=self._alert_ids[proc_id])
- _is_set = _alert.get_status(run_id=self._id)
+ _is_set = _alert.get_status(run_id=self._runner._id)
if process.returncode != 0:
# If the process fails then purge the dispatcher event queue
From 1ddfb21f2ab7e1a22f8afcad329a8a5652c49c36 Mon Sep 17 00:00:00 2001
From: Matt Field
Date: Mon, 3 Mar 2025 09:41:00 +0000
Subject: [PATCH 13/16] =?UTF-8?q?Throw=20error=20if=20trying=20to=20add=20?=
=?UTF-8?q?/=20log=20alerts=20by=20name=20in=20offline=20mode=C2=A3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
simvue/run.py | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/simvue/run.py b/simvue/run.py
index 823ed88d..445d8c5c 100644
--- a/simvue/run.py
+++ b/simvue/run.py
@@ -1657,6 +1657,11 @@ def add_alerts(
names = names or []
if names and not ids:
+ if self._user_config.run.mode == "offline":
+ self._error(
+ "Cannot retrieve alerts based on names in offline mode - please use IDs instead."
+ )
+ return False
try:
if alerts := Alert.get(offline=self._user_config.run.mode == "offline"):
ids += [id for id, alert in alerts if alert.name in names]
@@ -1957,6 +1962,12 @@ def log_alert(
self._error("Please specify alert to update either by ID or by name.")
return False
+ if self._user_config.run.mode == "offline":
+ self._error(
+ "Cannot retrieve alerts based on names in offline mode - please use IDs instead."
+ )
+ return False
+
if name:
try:
if alerts := Alert.get(offline=self._user_config.run.mode == "offline"):
From 8f797c1002f1e70614a43e916959f374a9d7d57b Mon Sep 17 00:00:00 2001
From: Matt Field
Date: Mon, 3 Mar 2025 14:13:37 +0000
Subject: [PATCH 14/16] Fix .processes so that it reuses the same objects
---
simvue/executor.py | 46 ++++++++++++++++++++++++++++++++++++++--------
simvue/run.py | 37 +++++++++++++++++++++++++++----------
2 files changed, 65 insertions(+), 18 deletions(-)
diff --git a/simvue/executor.py b/simvue/executor.py
index e7dfcf10..951e3c16 100644
--- a/simvue/executor.py
+++ b/simvue/executor.py
@@ -22,7 +22,7 @@
import pathlib
import time
import typing
-from simvue.api.objects.alert.fetch import Alert
+from simvue.api.objects.alert.user import UserAlert
if typing.TYPE_CHECKING:
import simvue
@@ -114,6 +114,7 @@ def __init__(self, simvue_runner: "simvue.Run", keep_logs: bool = True) -> None:
self._alert_ids: dict[str, str] = {}
self.command_str: dict[str, str] = {}
self._processes: dict[str, subprocess.Popen] = {}
+ self._all_processes: list[psutil.Process] = []
def std_out(self, process_id: str) -> str | None:
if not os.path.exists(out_file := f"{self._runner.name}_{process_id}.out"):
@@ -271,19 +272,48 @@ def processes(self) -> list[psutil.Process]:
if not self._processes:
return []
- _all_processes: list[psutil.Process] = []
+ _current_processes: list[psutil.Process] = []
for process in self._processes.values():
with contextlib.suppress(psutil.NoSuchProcess):
- _all_processes.append(psutil.Process(process.pid))
+ _current_processes.append(psutil.Process(process.pid))
with contextlib.suppress(psutil.NoSuchProcess, psutil.ZombieProcess):
- for process in _all_processes:
+ for process in _current_processes:
for child in process.children(recursive=True):
- if child not in _all_processes:
- _all_processes.append(child)
+ if child not in _current_processes:
+ _current_processes.append(child)
- return list(set(_all_processes))
+ _current_pids = set([_process.pid for _process in _current_processes])
+ _previous_pids = set([_process.pid for _process in self._all_processes])
+
+ # Find processes which used to exist, which are no longer running
+ _expired_process_pids = _previous_pids - _current_pids
+
+ # Remove these processes from list of all processes
+ self._all_processes = [
+ _process
+ for _process in self._all_processes
+ if _process.pid not in _expired_process_pids
+ ]
+
+ # Find new processes
+ _new_process_pids = _current_pids - _previous_pids
+ _new_processes = [
+ _process
+ for _process in _current_processes
+ if _process.pid in _new_process_pids
+ ]
+
+ # Get CPU usage stats for each of those new processes, so that next time it's measured by the heartbeat the value is accurate
+ if _new_processes:
+ [_process.cpu_percent() for _process in _new_processes]
+ time.sleep(0.1)
+
+ # Add these to the list of all processes
+ self._all_processes += _new_processes
+
+ return self._all_processes
@property
def success(self) -> int:
@@ -346,7 +376,7 @@ def _update_alerts(self) -> None:
# We don't want to override the user's setting for the alert status
# This is so that if a process incorrectly reports its return code,
# the user can manually set the correct status depending on logs etc.
- _alert = Alert(identifier=self._alert_ids[proc_id])
+ _alert = UserAlert(identifier=self._alert_ids[proc_id])
_is_set = _alert.get_status(run_id=self._runner._id)
if process.returncode != 0:
diff --git a/simvue/run.py b/simvue/run.py
index 445d8c5c..645d3c3e 100644
--- a/simvue/run.py
+++ b/simvue/run.py
@@ -297,16 +297,11 @@ def processes(self) -> list[psutil.Process]:
return process_list
process_list += [self._parent_process]
-
- # Attach child processes relating to the process set by set_pid
- with contextlib.suppress(psutil.NoSuchProcess, psutil.ZombieProcess):
- for child in self._parent_process.children(recursive=True):
- if child not in process_list:
- process_list.append(child)
+ process_list += self._child_processes
return list(set(process_list))
- def _get_sysinfo(self) -> dict[str, typing.Any]:
+ def _get_sysinfo(self, interval: float | None = None) -> dict[str, typing.Any]:
"""Retrieve system administration
Parameters
@@ -320,7 +315,7 @@ def _get_sysinfo(self) -> dict[str, typing.Any]:
retrieved system specifications
"""
processes = self.processes
- cpu = get_process_cpu(processes, interval=0.1)
+ cpu = get_process_cpu(processes, interval=interval)
memory = get_process_memory(processes)
gpu = get_gpu_metrics(processes)
data: dict[str, typing.Any] = {}
@@ -359,7 +354,9 @@ def _heartbeat(
last_res_metric_call = time.time()
if self._resources_metrics_interval:
- self._add_metrics_to_dispatch(self._get_sysinfo(), join_on_fail=False)
+ self._add_metrics_to_dispatch(
+ self._get_sysinfo(interval=1), join_on_fail=False
+ )
while not heartbeat_trigger.is_set():
time.sleep(0.1)
@@ -490,6 +487,9 @@ def _start(self, reconnect: bool = False) -> bool:
self._pid = os.getpid()
self._parent_process = psutil.Process(self._pid) if self._pid else None
+ self._child_processes = (
+ self._get_child_processes() if self._parent_process else None
+ )
self._shutdown_event = threading.Event()
self._heartbeat_termination_trigger = threading.Event()
@@ -904,6 +904,16 @@ def kill_all_processes(self) -> None:
)
self._executor.kill_all()
+ def _get_child_processes(self) -> list[psutil.Process]:
+ _process_list = []
+ # Attach child processes relating to the process set by set_pid
+ with contextlib.suppress(psutil.NoSuchProcess, psutil.ZombieProcess):
+ for child in self._parent_process.children(recursive=True):
+ if child not in _process_list:
+ _process_list.append(child)
+
+ return list(set(_process_list))
+
@property
def executor(self) -> Executor:
"""Return the executor for this run"""
@@ -959,6 +969,13 @@ def set_pid(self, pid: int) -> None:
"""
self._pid = pid
self._parent_process = psutil.Process(self._pid)
+ self._child_processes = self._get_child_processes()
+ # Get CPU usage stats for each of those new processes, so that next time it's measured by the heartbeat the value is accurate
+ [
+ _process.cpu_percent()
+ for _process in self._child_processes + [self._parent_process]
+ ]
+ time.sleep(0.1)
@skip_if_failed("_aborted", "_suppress_errors", False)
@pydantic.validate_call
@@ -1962,7 +1979,7 @@ def log_alert(
self._error("Please specify alert to update either by ID or by name.")
return False
- if self._user_config.run.mode == "offline":
+ if name and self._user_config.run.mode == "offline":
self._error(
"Cannot retrieve alerts based on names in offline mode - please use IDs instead."
)
From 009333e1b5a697ab8bbc22366df36c5e63e1d193 Mon Sep 17 00:00:00 2001
From: Matt Field
Date: Mon, 3 Mar 2025 14:49:42 +0000
Subject: [PATCH 15/16] Add step to resource metrics
---
simvue/run.py | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/simvue/run.py b/simvue/run.py
index 645d3c3e..74dea3ab 100644
--- a/simvue/run.py
+++ b/simvue/run.py
@@ -355,8 +355,9 @@ def _heartbeat(
if self._resources_metrics_interval:
self._add_metrics_to_dispatch(
- self._get_sysinfo(interval=1), join_on_fail=False
+ self._get_sysinfo(interval=1), join_on_fail=False, step=0
)
+ res_step = 1
while not heartbeat_trigger.is_set():
time.sleep(0.1)
@@ -371,9 +372,10 @@ def _heartbeat(
# join would be called on this thread and a thread cannot
# join itself!
self._add_metrics_to_dispatch(
- self._get_sysinfo(), join_on_fail=False
+ self._get_sysinfo(), join_on_fail=False, step=res_step
)
last_res_metric_call = res_time
+ res_step += 1
if time.time() - last_heartbeat < self._heartbeat_interval:
continue
From 4c32dd8fb6dd9287df3d3ffd168f6de6b9c8c700 Mon Sep 17 00:00:00 2001
From: Matt Field
Date: Tue, 4 Mar 2025 09:01:07 +0000
Subject: [PATCH 16/16] Update changelog and citation
---
CHANGELOG.md | 10 ++++++++++
CITATION.cff | 6 +++---
pyproject.toml | 2 +-
3 files changed, 14 insertions(+), 4 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d96c2000..6cd8fbb7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,15 @@
# Change log
+## [v2.0.0-alpha3](https://github.com/simvue-io/client/releases/tag/v2.0.0a3) - 2025-03-04
+* Updated codecarbon to work with new API
+* Codecarbon now works with offline mode
+* Codecarbon metadata dict is now nested
+* Add PID to sender lock file so it can recover from crashes
+* Add accept Gzip encoding
+* Fixed list of processes to add / remove from existing list of objects
+* Add step to resource metrics
+* Fix bug where process user alerts should not be overridden if manually set by the user
+
## [v2.0.0-alpha2](https://github.com/simvue-io/client/releases/tag/v2.0.0a2) - 2025-02-27
* Removed 'no config file' and 'unstaged changes' warnings from Offline mode as they do not apply
* Made `staging_check` not apply in Offline mode
diff --git a/CITATION.cff b/CITATION.cff
index d2090269..3fc71db0 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -42,9 +42,9 @@ keywords:
- alerting
- simulation
license: Apache-2.0
-commit: 83b9144abd2092d4be304bf742d72a249ad1d8ff
-version: 2.0.0a2
-date-released: '2025-02-27'
+commit: 64ff8a5344232d44fc7da5b6ff601d3023497977
+version: 2.0.0a3
+date-released: '2025-03-04'
references:
- title: mlco2/codecarbon
version: v2.8.2
diff --git a/pyproject.toml b/pyproject.toml
index 73105436..7b03d9e5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "simvue"
-version = "2.0.0a2"
+version = "2.0.0a3"
description = "Simulation tracking and monitoring"
authors = [
{name = "Simvue Development Team", email = "info@simvue.io"}