Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/getml_io/getml/columns.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from pydantic import BaseModel


class Column(BaseModel, frozen=True):
index: int
name: str
marker: str
table: str
target: str
importance: float
3 changes: 2 additions & 1 deletion src/getml_io/metadata/pipeline_information.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
)
from pydantic import BaseModel

from getml_io.getml.columns import Column
from getml_io.getml.feature_learning import FeatureLearner
from getml_io.getml.features import Features
from getml_io.getml.predictors import FeatureSelector, Predictor
Expand Down Expand Up @@ -43,6 +44,6 @@ class PipelineInformation(BaseModel, frozen=True):
data_model: DataModelInformation
features: Features
scores: Scores
# columns # TODO @urfoex: #50
columns: Sequence[Column]
# metadata # TODO @urfoex: #51
# tables # TODO @urfoex: #52
32 changes: 31 additions & 1 deletion src/getml_io/serialize/pipeline.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import dataclasses
import functools
from collections.abc import Sequence
from pathlib import Path
from typing import cast

Expand All @@ -13,14 +14,17 @@
Container,
DataFrame,
)
from getml.pipeline import Columns as GetMLColumns
from getml.pipeline import Features as GetMLFeatures
from getml.pipeline import Pipeline
from getml.pipeline import Scores as GetMLScores
from getml.pipeline.column import Column as GetMLColumn
from getml.pipeline.score import ClassificationScore as GetMLClassificationScore
from getml.pipeline.score import RegressionScore as GetMLRegressionScore
from getml.pipeline.score import Score as GetMLScore
from numpy.typing import NDArray

from getml_io.getml.columns import Column
from getml_io.getml.feature_learning import (
Fastboost,
FastProp,
Expand Down Expand Up @@ -136,7 +140,7 @@ def serialize_pipeline(
data_model=serialize_data_model(pipeline.data_model),
features=serialize_features(pipeline.features),
scores=serialize_scores(pipeline.scores),
# columns # TODO @urfoex: #50
columns=serialize_columns(pipeline.columns),
# metadata # TODO @urfoex: #51
# tables # TODO @urfoex: #52
)
Expand Down Expand Up @@ -407,3 +411,29 @@ def _serialize_regression_scores(scores: list[GetMLScore]) -> list[RegressionSco
),
)
return regression_scores


def serialize_columns(getml_columns: GetMLColumns | None) -> list[Column]:
"""Serialize getML Columns into a list of Column objects.

Args:
getml_columns: The getML Columns to serialize.

Returns:
list[Column]: The serialized Columns information.

"""
if getml_columns is None:
return []
columns = cast("Sequence[GetMLColumn]", getml_columns.data)
Comment thread
Urfoex marked this conversation as resolved.
return [
Column(
index=column.index,
name=column.name,
marker=column.marker,
table=column.table,
target=column.target,
importance=column.importance,
)
for column in columns
]
174 changes: 164 additions & 10 deletions tests/integration/data/loans/expected.pipeline.json
Original file line number Diff line number Diff line change
Expand Up @@ -632,10 +632,10 @@
{
"aggregation": [
"AVG",
"MAX",
"COUNT",
"SUM",
"MIN",
"MAX"
"MIN"
],
"allow_sets": true,
"delta_t": 0.0,
Expand All @@ -649,19 +649,19 @@
"num_threads": 0,
"propositionalization": {
"aggregation": [
"COUNT DISTINCT",
"MODE",
"COUNT MINUS COUNT DISTINCT",
"STDDEV",
"AVG",
"FIRST",
"MEDIAN",
"COUNT",
"COUNT MINUS COUNT DISTINCT",
"AVG",
"MEDIAN",
"SUM",
"MAX",
"LAST",
"MIN",
"MODE",
"COUNT DISTINCT",
"TREND",
"SUM",
"MAX"
"MIN"
],
"delta_t": 0.0,
"loss_function": "CrossEntropyLoss",
Expand Down Expand Up @@ -1082,5 +1082,159 @@
"cross_entropy": 0.15581770550714213,
"type": "classification"
}
],
"columns": [
{
"index": 0,
"name": "A10",
"marker": "[PERIPHERAL]",
"table": "meta",
"target": "default",
"importance": 0.01552507205908419
},
{
"index": 1,
"name": "A11",
"marker": "[PERIPHERAL]",
"table": "meta",
"target": "default",
"importance": 0.0006977925939383302
},
{
"index": 2,
"name": "A12",
"marker": "[PERIPHERAL]",
"table": "meta",
"target": "default",
"importance": 0.014233860057894013
},
{
"index": 3,
"name": "A13",
"marker": "[PERIPHERAL]",
"table": "meta",
"target": "default",
"importance": 0.009653255757338346
},
{
"index": 4,
"name": "A14",
"marker": "[PERIPHERAL]",
"table": "meta",
"target": "default",
"importance": 0.050245939036435795
},
{
"index": 5,
"name": "A3",
"marker": "[PERIPHERAL]",
"table": "meta",
"target": "default",
"importance": 0.03182633574184285
},
{
"index": 6,
"name": "A5",
"marker": "[PERIPHERAL]",
"table": "meta",
"target": "default",
"importance": 0.011420646872707484
},
{
"index": 7,
"name": "A8",
"marker": "[PERIPHERAL]",
"table": "meta",
"target": "default",
"importance": 0.002742939298483507
},
{
"index": 8,
"name": "gender",
"marker": "[PERIPHERAL]",
"table": "meta",
"target": "default",
"importance": 0.03563197038951749
},
{
"index": 9,
"name": "type_card",
"marker": "[PERIPHERAL]",
"table": "meta",
"target": "default",
"importance": 0.06889382953361178
},
{
"index": 10,
"name": "type_disp",
"marker": "[PERIPHERAL]",
"table": "meta",
"target": "default",
"importance": 0.03223566356375098
},
{
"index": 11,
"name": "balance",
"marker": "[PERIPHERAL]",
"table": "trans",
"target": "default",
"importance": 0.3285333661842472
},
{
"index": 12,
"name": "date",
"marker": "[PERIPHERAL]",
"table": "trans",
"target": "default",
"importance": 0.02192624082166905
},
{
"index": 13,
"name": "operation",
"marker": "[PERIPHERAL]",
"table": "trans",
"target": "default",
"importance": 0.04596167781626196
},
{
"index": 14,
"name": "amount",
"marker": "[POPULATION]",
"table": "population",
"target": "default",
"importance": 0.09630288841455763
},
{
"index": 15,
"name": "date_loan",
"marker": "[POPULATION]",
"table": "population",
"target": "default",
"importance": 0.02192624082166905
},
{
"index": 16,
"name": "duration",
"marker": "[POPULATION]",
"table": "population",
"target": "default",
"importance": 0.02868542296307802
},
{
"index": 17,
"name": "frequency",
"marker": "[POPULATION]",
"table": "population",
"target": "default",
"importance": 0.015434286910860072
},
{
"index": 18,
"name": "payments",
"marker": "[POPULATION]",
"table": "population",
"target": "default",
"importance": 0.16812257116305224
}
]
}
50 changes: 42 additions & 8 deletions tests/integration/data/numerical/expected.pipeline.json
Original file line number Diff line number Diff line change
Expand Up @@ -570,19 +570,19 @@
"num_threads": 0,
"propositionalization": {
"aggregation": [
"COUNT DISTINCT",
"MODE",
"COUNT MINUS COUNT DISTINCT",
"STDDEV",
"AVG",
"FIRST",
"MEDIAN",
"COUNT",
"COUNT MINUS COUNT DISTINCT",
"AVG",
"MEDIAN",
"SUM",
"MAX",
"LAST",
"MIN",
"MODE",
"COUNT DISTINCT",
"TREND",
"SUM",
"MAX"
"MIN"
],
"delta_t": 0.0,
"loss_function": "SquareLoss",
Expand Down Expand Up @@ -837,5 +837,39 @@
"rsquared": 0.9996960826789243,
"type": "regression"
}
],
"columns": [
{
"index": 0,
"name": "column_01",
"marker": "[PERIPHERAL]",
"table": "perph",
"target": "targets",
"importance": 0.07663134043051129
},
{
"index": 1,
"name": "time_stamp",
"marker": "[PERIPHERAL]",
"table": "perph",
"target": "targets",
"importance": 0.1598504889015953
},
{
"index": 2,
"name": "column_01",
"marker": "[POPULATION]",
"table": "population",
"target": "targets",
"importance": 0.002464713567455494
},
{
"index": 3,
"name": "time_stamp",
"marker": "[POPULATION]",
"table": "population",
"target": "targets",
"importance": 0.7610534571004377
}
]
}
Loading