Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/getml_io/getml/tables.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from pydantic import BaseModel


class Table(BaseModel, frozen=True):
name: str
marker: str
target: str
importance: float
3 changes: 2 additions & 1 deletion src/getml_io/metadata/pipeline_information.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from getml_io.getml.predictors import FeatureSelector, Predictor
from getml_io.getml.preprocessors import Preprocessor
from getml_io.getml.scores import Scores
from getml_io.getml.tables import Table
from getml_io.metadata.data_model_information import DataModelInformation
from getml_io.metadata.dataframe_information import DataFrameInformationByName
from getml_io.metadata.placeholder_information import PlaceholderInformation
Expand Down Expand Up @@ -47,4 +48,4 @@ class PipelineInformation(BaseModel, frozen=True):
scores: Scores
columns: Sequence[Column]
metadata: PipelineMetaData
# tables # TODO @urfoex: #52
tables: Sequence[Table]
26 changes: 25 additions & 1 deletion src/getml_io/serialize/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@
from getml.pipeline import Features as GetMLFeatures
from getml.pipeline import Pipeline
from getml.pipeline import Scores as GetMLScores
from getml.pipeline import Tables as GetMLTables
from getml.pipeline.column import Column as GetMLColumn
from getml.pipeline.metadata import AllMetadata
from getml.pipeline.metadata import Metadata as GetMLMetadata
from getml.pipeline.score import ClassificationScore as GetMLClassificationScore
from getml.pipeline.score import RegressionScore as GetMLRegressionScore
from getml.pipeline.score import Score as GetMLScore
from getml.pipeline.table import Table as GetMLTable
from numpy.typing import NDArray

from getml_io.getml.columns import Column
Expand Down Expand Up @@ -60,6 +62,7 @@
TextFieldSplitter,
)
from getml_io.getml.scores import ClassificationScore, RegressionScore, Scores
from getml_io.getml.tables import Table
from getml_io.metadata.dataframe_information import DataFrameInformationByName
from getml_io.metadata.pipeline_information import (
LossFunction,
Expand Down Expand Up @@ -149,7 +152,7 @@ def serialize_pipeline(
scores=serialize_scores(pipeline.scores),
columns=serialize_columns(pipeline.columns),
metadata=serialize_all_metadata(pipeline.metadata),
# tables # TODO @urfoex: #52
tables=serialize_tables(pipeline.tables),
)
pipeline_information_json_path = serialize_pipeline_information(
pipeline_information=pipeline_information,
Expand Down Expand Up @@ -472,3 +475,24 @@ def _serialize_metadata(metadata: GetMLMetadata) -> DataFrameMetaData:
name=metadata.name,
roles=serialize_roles(metadata.roles),
)


def serialize_tables(tables: GetMLTables) -> list[Table]:
"""Serialize getML Tables into a list of Table objects.

Args:
tables: The getML Tables to serialize.

Returns:
list[Table]: The serialized Tables information.

"""
return [
Table(
name=table.name,
marker=table.marker,
target=table.target,
importance=table.importance,
)
for table in cast("Sequence[GetMLTable]", tables.data)
]
Comment thread
Urfoex marked this conversation as resolved.
40 changes: 30 additions & 10 deletions tests/integration/data/loans/expected.pipeline.json
Original file line number Diff line number Diff line change
Expand Up @@ -631,9 +631,9 @@
"feature_learners": [
{
"aggregation": [
"SUM",
"MIN",
"COUNT",
"SUM",
"MAX",
"AVG"
],
Expand All @@ -649,19 +649,19 @@
"num_threads": 0,
"propositionalization": {
"aggregation": [
"STDDEV",
"MIN",
"COUNT DISTINCT",
"COUNT",
"MEDIAN",
"MAX",
"SUM",
"FIRST",
"MODE",
"LAST",
"TREND",
"AVG",
"MIN",
"COUNT MINUS COUNT DISTINCT",
"MODE"
"COUNT",
"FIRST",
"MEDIAN",
"AVG",
"COUNT DISTINCT",
"STDDEV"
],
"delta_t": 0.0,
"loss_function": "CrossEntropyLoss",
Expand Down Expand Up @@ -1365,5 +1365,25 @@
}
}
]
}
},
"tables": [
{
"name": "meta",
"marker": "[PERIPHERAL]",
"target": "default",
"importance": 0.2731073049046048
},
{
"name": "trans",
"marker": "[PERIPHERAL]",
"target": "default",
"importance": 0.39642128482217825
},
{
"name": "population",
"marker": "[POPULATION]",
"target": "default",
"importance": 0.330471410273217
}
]
}
32 changes: 23 additions & 9 deletions tests/integration/data/numerical/expected.pipeline.json
Original file line number Diff line number Diff line change
Expand Up @@ -570,19 +570,19 @@
"num_threads": 0,
"propositionalization": {
"aggregation": [
"STDDEV",
"MIN",
"COUNT DISTINCT",
"COUNT",
"MEDIAN",
"MAX",
"SUM",
"FIRST",
"MODE",
"LAST",
"TREND",
"AVG",
"MIN",
"COUNT MINUS COUNT DISTINCT",
"MODE"
"COUNT",
"FIRST",
"MEDIAN",
"AVG",
"COUNT DISTINCT",
"STDDEV"
],
"delta_t": 0.0,
"loss_function": "SquareLoss",
Expand Down Expand Up @@ -915,5 +915,19 @@
}
}
]
}
},
"tables": [
{
"name": "perph",
"marker": "[PERIPHERAL]",
"target": "targets",
"importance": 0.2364818293321066
},
{
"name": "population",
"marker": "[POPULATION]",
"target": "targets",
"importance": 0.7635181706678932
}
]
}
56 changes: 47 additions & 9 deletions tests/integration/data/robot/expected.pipeline.json
Original file line number Diff line number Diff line change
Expand Up @@ -7020,19 +7020,19 @@
"num_threads": 0,
"propositionalization": {
"aggregation": [
"STDDEV",
"MIN",
"COUNT DISTINCT",
"COUNT",
"MEDIAN",
"MAX",
"SUM",
"FIRST",
"MODE",
"LAST",
"TREND",
"AVG",
"MIN",
"COUNT MINUS COUNT DISTINCT",
"MODE"
"COUNT",
"FIRST",
"MEDIAN",
"AVG",
"COUNT DISTINCT",
"STDDEV"
],
"delta_t": 0.0,
"loss_function": "SquareLoss",
Expand Down Expand Up @@ -11546,5 +11546,43 @@
}
}
]
}
},
"tables": [
{
"name": "full",
"marker": "[PERIPHERAL]",
"target": "f_x",
"importance": 0.6157352560942454
},
{
"name": "population",
"marker": "[POPULATION]",
"target": "f_x",
"importance": 0.3842647439057541
},
{
"name": "full",
"marker": "[PERIPHERAL]",
"target": "f_y",
"importance": 0.6632312212854787
},
{
"name": "population",
"marker": "[POPULATION]",
"target": "f_y",
"importance": 0.33676877871452054
},
{
"name": "full",
"marker": "[PERIPHERAL]",
"target": "f_z",
"importance": 0.5290353310682591
},
{
"name": "population",
"marker": "[POPULATION]",
"target": "f_z",
"importance": 0.470964668931741
}
]
}
33 changes: 33 additions & 0 deletions tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,14 @@
from getml.pipeline import Features as GetMLFeatures
from getml.pipeline import Pipeline
from getml.pipeline import Scores as GetMLScores
from getml.pipeline import Tables as GetMLTables
from getml.pipeline.columns import Column as GetMLColumn
from getml.pipeline.feature import Feature as GetMLFeature
from getml.pipeline.metadata import AllMetadata as GetMLAllMetadata
from getml.pipeline.metadata import Metadata as GetMLMetadata
from getml.pipeline.score import ClassificationScore as GetMLClassificationScore
from getml.pipeline.score import RegressionScore as GetMLRegressionScore
from getml.pipeline.table import Table as GetMLTable
from numpy.typing import NDArray

from getml_io.getml.columns import Column
Expand All @@ -48,6 +50,7 @@
from getml_io.getml.relationships import Relationship
from getml_io.getml.roles import Role, Roles
from getml_io.getml.scores import ClassificationScore, Scores
from getml_io.getml.tables import Table
from getml_io.metadata.container_information import ContainerInformation
from getml_io.metadata.data_model_information import DataModelInformation
from getml_io.metadata.dataframe_information import (
Expand Down Expand Up @@ -445,6 +448,7 @@ def pipeline_information_empty(
scores=[],
columns=[],
metadata=PipelineMetaData(population=None, peripheral=[]),
tables=[],
)


Expand Down Expand Up @@ -541,6 +545,19 @@ def getml_all_metadata() -> GetMLAllMetadata:
)


@pytest.fixture
def mock_tables(mocker: pytest_mock.MockerFixture) -> GetMLTables:
table = GetMLTable(
name="test_table",
marker="test_marker",
target="target0",
importance=0.0,
)
tables = mocker.MagicMock(spec=GetMLTables)
tables.data = [table]
return tables


@pytest.fixture
def mock_pipeline( # noqa: PLR0913
mocker: pytest_mock.MockerFixture,
Expand All @@ -550,6 +567,7 @@ def mock_pipeline( # noqa: PLR0913
mock_scores_regression: GetMLScores,
mock_columns: GetMLColumns,
getml_all_metadata: GetMLAllMetadata,
mock_tables: GetMLTables,
) -> Pipeline:
pipeline = mocker.Mock()
pipeline.id = "mock_pipeline_id"
Expand Down Expand Up @@ -588,6 +606,7 @@ def pipeline_transform(_: DataFrame | View | Subset, *, df_name: str) -> DataFra
pipeline.scores = mock_scores_regression
pipeline.columns = mock_columns
pipeline.metadata = getml_all_metadata
pipeline.tables = mock_tables
return pipeline


Expand Down Expand Up @@ -830,6 +849,18 @@ def meta_data(
)


@pytest.fixture
def tables() -> list[Table]:
return [
Table(
name="test_table",
marker="test_marker",
target="target0",
importance=0.0,
),
]


@pytest.fixture
def pipeline_information( # noqa: PLR0913
data_model_information: DataModelInformation,
Expand All @@ -842,6 +873,7 @@ def pipeline_information( # noqa: PLR0913
scores: Scores,
columns: Sequence[Column],
meta_data: PipelineMetaData,
tables: Sequence[Table],
) -> PipelineInformation:
return PipelineInformation(
id="pipeline_id",
Expand All @@ -864,6 +896,7 @@ def pipeline_information( # noqa: PLR0913
scores=scores,
columns=columns,
metadata=meta_data,
tables=tables,
)


Expand Down
9 changes: 9 additions & 0 deletions tests/unit/metadata/test_pipeline_information.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def _get_expected_serialized_empty_pipeline_information() -> PipelineInformation
"population": None,
"peripheral": [],
},
"tables": [],
}


Expand Down Expand Up @@ -402,4 +403,12 @@ def _get_expected_serialized_pipeline_information() -> PipelineInformationType:
},
],
},
"tables": [
{
"importance": 0.0,
"marker": "test_marker",
"name": "test_table",
"target": "target0",
},
],
}
Loading