Skip to content

Commit 329fe46

Browse files
committed
feat(WIP): add support for custom metrics
1 parent 18b57b9 commit 329fe46

File tree

1 file changed

+188
-0
lines changed

1 file changed

+188
-0
lines changed

src/openlayer/lib/core/metrics.py

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
"""Module containing the BaseMetric definition for Openlayer."""
2+
3+
from __future__ import annotations
4+
5+
import abc
6+
import argparse
7+
import json
8+
import os
9+
from dataclasses import asdict, dataclass, field
10+
from typing import Any, Dict, List, Optional, Union
11+
12+
import pandas as pd
13+
14+
15+
@dataclass
16+
class MetricReturn:
17+
"""The return type of the `run` method in the BaseMetric."""
18+
19+
value: Union[float, int, bool]
20+
"""The value of the metric."""
21+
22+
unit: Optional[str] = None
23+
"""The unit of the metric."""
24+
25+
meta: Dict[str, Any] = field(default_factory=dict)
26+
"""Any useful metadata in a JSON serializable dict."""
27+
28+
29+
@dataclass
30+
class Dataset:
31+
"""A dataset object containing the configuration, data and dataset outputs path."""
32+
33+
name: str
34+
"""The name of the dataset."""
35+
36+
config: dict
37+
"""The configuration of the dataset."""
38+
39+
df: pd.DataFrame
40+
"""The dataset as a pandas DataFrame."""
41+
42+
output_path: str
43+
"""The path to the dataset outputs."""
44+
45+
46+
class MetricRunner:
47+
"""A class to run a list of metrics."""
48+
49+
def __init__(self):
50+
self.config_path: str = ""
51+
self.config: Dict[str, Any] = {}
52+
self.datasets: List[Dataset] = []
53+
self.selected_metrics: Optional[List[str]] = None
54+
55+
def run_metrics(self, metrics: List[BaseMetric]) -> None:
56+
"""Run a list of metrics."""
57+
58+
# Parse arguments from the command line
59+
self._parse_args()
60+
61+
# Load the openlayer.json file
62+
self._load_openlayer_json()
63+
64+
# Load the datasets from the openlayer.json file
65+
self._load_datasets()
66+
67+
# TODO: Auto-load all the metrics in the current directory
68+
69+
self._compute_metrics(metrics)
70+
71+
def _parse_args(self) -> None:
72+
parser = argparse.ArgumentParser(description="Compute custom metrics.")
73+
parser.add_argument(
74+
"--config-path",
75+
type=str,
76+
required=False,
77+
default="",
78+
help="The path to your openlayer.json. Uses working dir if not provided.",
79+
)
80+
81+
# Parse the arguments
82+
args = parser.parse_args()
83+
self.config_path = args.config_path
84+
85+
def _load_openlayer_json(self) -> None:
86+
"""Load the openlayer.json file."""
87+
88+
if not self.config_path:
89+
openlayer_json_path = os.path.join(os.getcwd(), "openlayer.json")
90+
else:
91+
openlayer_json_path = self.config_path
92+
93+
with open(openlayer_json_path, "r", encoding="utf-8") as f:
94+
self.config = json.load(f)
95+
96+
# Extract selected metrics
97+
if "metrics" in self.config and "settings" in self.config["metrics"]:
98+
self.selected_metrics = [
99+
metric["key"] for metric in self.config["metrics"]["settings"] if metric["selected"]
100+
]
101+
102+
def _load_datasets(self) -> None:
103+
"""Compute the metric from the command line."""
104+
105+
datasets: List[Dataset] = []
106+
107+
# Check first for a model. If it exists, use the output of the model
108+
if "model" in self.config:
109+
model = self.config["model"]
110+
datasets_list = self.config["datasets"]
111+
dataset_names = [dataset["name"] for dataset in datasets_list]
112+
output_directory = model["outputDirectory"]
113+
# Read the outputs directory for dataset folders. For each, load
114+
# the config.json and the dataset.json files into a dict and a dataframe
115+
116+
for dataset_folder in os.listdir(output_directory):
117+
if dataset_folder not in dataset_names:
118+
continue
119+
dataset_path = os.path.join(output_directory, dataset_folder)
120+
config_path = os.path.join(dataset_path, "config.json")
121+
with open(config_path, "r", encoding="utf-8") as f:
122+
dataset_config = json.load(f)
123+
124+
# Load the dataset into a pandas DataFrame
125+
if os.path.exists(os.path.join(dataset_path, "dataset.csv")):
126+
dataset_df = pd.read_csv(os.path.join(dataset_path, "dataset.csv"))
127+
elif os.path.exists(os.path.join(dataset_path, "dataset.json")):
128+
dataset_df = pd.read_json(os.path.join(dataset_path, "dataset.json"), orient="records")
129+
else:
130+
raise ValueError(f"No dataset found in {dataset_folder}.")
131+
132+
datasets.append(
133+
Dataset(name=dataset_folder, config=dataset_config, df=dataset_df, output_path=dataset_path)
134+
)
135+
else:
136+
raise ValueError("No model found in the openlayer.json file. Cannot compute metric.")
137+
138+
if not datasets:
139+
raise ValueError("No datasets found in the openlayer.json file. Cannot compute metric.")
140+
141+
self.datasets = datasets
142+
143+
def _compute_metrics(self, metrics: List[BaseMetric]) -> None:
144+
"""Compute the metrics."""
145+
for metric in metrics:
146+
if self.selected_metrics and metric.key not in self.selected_metrics:
147+
print(f"Skipping metric {metric.key} as it is not a selected metric.")
148+
continue
149+
metric.compute(self.datasets)
150+
151+
152+
class BaseMetric(abc.ABC):
153+
"""Interface for the Base metric.
154+
155+
Your metric's class should inherit from this class and implement the compute method.
156+
"""
157+
158+
@property
159+
def key(self) -> str:
160+
"""Return the key of the metric."""
161+
return self.__class__.__name__
162+
163+
def compute(self, datasets: List[Dataset]) -> None:
164+
"""Compute the metric on the model outputs."""
165+
for dataset in datasets:
166+
metric_return = self.compute_on_dataset(dataset.config, dataset.df)
167+
metric_value = metric_return.value
168+
if metric_return.unit:
169+
metric_value = f"{metric_value} {metric_return.unit}"
170+
print(f"Metric ({self.key}) value for {dataset.name}: {metric_value}")
171+
172+
output_dir = os.path.join(dataset.output_path, "metrics")
173+
self._write_metric_return_to_file(metric_return, output_dir)
174+
175+
@abc.abstractmethod
176+
def compute_on_dataset(self, config: dict, df: pd.DataFrame) -> MetricReturn:
177+
"""Compute the metric on a specific dataset."""
178+
pass
179+
180+
def _write_metric_return_to_file(self, metric_return: MetricReturn, output_dir: str) -> None:
181+
"""Write the metric return to a file."""
182+
183+
# Create the directory if it doesn't exist
184+
os.makedirs(output_dir, exist_ok=True)
185+
186+
with open(os.path.join(output_dir, f"{self.key}.json"), "w", encoding="utf-8") as f:
187+
json.dump(asdict(metric_return), f, indent=4)
188+
print(f"Metric ({self.key}) value written to {output_dir}/{self.key}.json")

0 commit comments

Comments
 (0)