From 2cef32ebed397c8b1ee12394591f7b2f001bcbfa Mon Sep 17 00:00:00 2001 From: Manuel Bellersen Date: Mon, 1 Sep 2025 22:12:24 +0200 Subject: [PATCH] GH-50: feat(pipeline): Add serialization for columns Defines a pydantic model for a pipeline's column information and integrates it into the `PipelineInformation` structure, replacing a TODO. Implements the `serialize_columns` function to convert from the getML library's `Columns` object to the new pydantic model. Updates unit and integration tests to include the new 'columns' field, ensuring correctness and updating test fixtures. Resolves: #50 --- src/getml_io/getml/columns.py | 10 + src/getml_io/metadata/pipeline_information.py | 3 +- src/getml_io/serialize/pipeline.py | 32 +- .../data/loans/expected.pipeline.json | 174 +- .../data/numerical/expected.pipeline.json | 50 +- .../data/robot/expected.pipeline.json | 2898 ++++++++++++++++- tests/unit/conftest.py | 39 +- .../metadata/test_pipeline_information.py | 11 + tests/unit/serialize/test_pipeline.py | 33 + .../serialize/test_pipeline_information.py | 1 + 10 files changed, 3222 insertions(+), 29 deletions(-) create mode 100644 src/getml_io/getml/columns.py diff --git a/src/getml_io/getml/columns.py b/src/getml_io/getml/columns.py new file mode 100644 index 0000000..f41ea16 --- /dev/null +++ b/src/getml_io/getml/columns.py @@ -0,0 +1,10 @@ +from pydantic import BaseModel + + +class Column(BaseModel, frozen=True): + index: int + name: str + marker: str + table: str + target: str + importance: float diff --git a/src/getml_io/metadata/pipeline_information.py b/src/getml_io/metadata/pipeline_information.py index 3030cc2..97535bb 100644 --- a/src/getml_io/metadata/pipeline_information.py +++ b/src/getml_io/metadata/pipeline_information.py @@ -9,6 +9,7 @@ ) from pydantic import BaseModel +from getml_io.getml.columns import Column from getml_io.getml.feature_learning import FeatureLearner from getml_io.getml.features import Features from getml_io.getml.predictors import FeatureSelector, Predictor @@ -43,6 +44,6 @@ class PipelineInformation(BaseModel, frozen=True): data_model: DataModelInformation features: Features scores: Scores - # columns # TODO @urfoex: #50 + columns: Sequence[Column] # metadata # TODO @urfoex: #51 # tables # TODO @urfoex: #52 diff --git a/src/getml_io/serialize/pipeline.py b/src/getml_io/serialize/pipeline.py index 294b308..8f5ca9a 100644 --- a/src/getml_io/serialize/pipeline.py +++ b/src/getml_io/serialize/pipeline.py @@ -1,5 +1,6 @@ import dataclasses import functools +from collections.abc import Sequence from pathlib import Path from typing import cast @@ -13,14 +14,17 @@ Container, DataFrame, ) +from getml.pipeline import Columns as GetMLColumns from getml.pipeline import Features as GetMLFeatures from getml.pipeline import Pipeline from getml.pipeline import Scores as GetMLScores +from getml.pipeline.column import Column as GetMLColumn from getml.pipeline.score import ClassificationScore as GetMLClassificationScore from getml.pipeline.score import RegressionScore as GetMLRegressionScore from getml.pipeline.score import Score as GetMLScore from numpy.typing import NDArray +from getml_io.getml.columns import Column from getml_io.getml.feature_learning import ( Fastboost, FastProp, @@ -136,7 +140,7 @@ def serialize_pipeline( data_model=serialize_data_model(pipeline.data_model), features=serialize_features(pipeline.features), scores=serialize_scores(pipeline.scores), - # columns # TODO @urfoex: #50 + columns=serialize_columns(pipeline.columns), # metadata # TODO @urfoex: #51 # tables # TODO @urfoex: #52 ) @@ -407,3 +411,29 @@ def _serialize_regression_scores(scores: list[GetMLScore]) -> list[RegressionSco ), ) return regression_scores + + +def serialize_columns(getml_columns: GetMLColumns | None) -> list[Column]: + """Serialize getML Columns into a list of Column objects. + + Args: + getml_columns: The getML Columns to serialize. + + Returns: + list[Column]: The serialized Columns information. + + """ + if getml_columns is None: + return [] + columns = cast("Sequence[GetMLColumn]", getml_columns.data) + return [ + Column( + index=column.index, + name=column.name, + marker=column.marker, + table=column.table, + target=column.target, + importance=column.importance, + ) + for column in columns + ] diff --git a/tests/integration/data/loans/expected.pipeline.json b/tests/integration/data/loans/expected.pipeline.json index 2b61ba8..f5761b3 100644 --- a/tests/integration/data/loans/expected.pipeline.json +++ b/tests/integration/data/loans/expected.pipeline.json @@ -632,10 +632,10 @@ { "aggregation": [ "AVG", + "MAX", "COUNT", "SUM", - "MIN", - "MAX" + "MIN" ], "allow_sets": true, "delta_t": 0.0, @@ -649,19 +649,19 @@ "num_threads": 0, "propositionalization": { "aggregation": [ - "COUNT DISTINCT", + "MODE", + "COUNT MINUS COUNT DISTINCT", "STDDEV", - "AVG", "FIRST", - "MEDIAN", "COUNT", - "COUNT MINUS COUNT DISTINCT", + "AVG", + "MEDIAN", + "SUM", + "MAX", "LAST", - "MIN", - "MODE", + "COUNT DISTINCT", "TREND", - "SUM", - "MAX" + "MIN" ], "delta_t": 0.0, "loss_function": "CrossEntropyLoss", @@ -1082,5 +1082,159 @@ "cross_entropy": 0.15581770550714213, "type": "classification" } + ], + "columns": [ + { + "index": 0, + "name": "A10", + "marker": "[PERIPHERAL]", + "table": "meta", + "target": "default", + "importance": 0.01552507205908419 + }, + { + "index": 1, + "name": "A11", + "marker": "[PERIPHERAL]", + "table": "meta", + "target": "default", + "importance": 0.0006977925939383302 + }, + { + "index": 2, + "name": "A12", + "marker": "[PERIPHERAL]", + "table": "meta", + "target": "default", + "importance": 0.014233860057894013 + }, + { + "index": 3, + "name": "A13", + "marker": "[PERIPHERAL]", + "table": "meta", + "target": "default", + "importance": 0.009653255757338346 + }, + { + "index": 4, + "name": "A14", + "marker": "[PERIPHERAL]", + "table": "meta", + "target": "default", + "importance": 0.050245939036435795 + }, + { + "index": 5, + "name": "A3", + "marker": "[PERIPHERAL]", + "table": "meta", + "target": "default", + "importance": 0.03182633574184285 + }, + { + "index": 6, + "name": "A5", + "marker": "[PERIPHERAL]", + "table": "meta", + "target": "default", + "importance": 0.011420646872707484 + }, + { + "index": 7, + "name": "A8", + "marker": "[PERIPHERAL]", + "table": "meta", + "target": "default", + "importance": 0.002742939298483507 + }, + { + "index": 8, + "name": "gender", + "marker": "[PERIPHERAL]", + "table": "meta", + "target": "default", + "importance": 0.03563197038951749 + }, + { + "index": 9, + "name": "type_card", + "marker": "[PERIPHERAL]", + "table": "meta", + "target": "default", + "importance": 0.06889382953361178 + }, + { + "index": 10, + "name": "type_disp", + "marker": "[PERIPHERAL]", + "table": "meta", + "target": "default", + "importance": 0.03223566356375098 + }, + { + "index": 11, + "name": "balance", + "marker": "[PERIPHERAL]", + "table": "trans", + "target": "default", + "importance": 0.3285333661842472 + }, + { + "index": 12, + "name": "date", + "marker": "[PERIPHERAL]", + "table": "trans", + "target": "default", + "importance": 0.02192624082166905 + }, + { + "index": 13, + "name": "operation", + "marker": "[PERIPHERAL]", + "table": "trans", + "target": "default", + "importance": 0.04596167781626196 + }, + { + "index": 14, + "name": "amount", + "marker": "[POPULATION]", + "table": "population", + "target": "default", + "importance": 0.09630288841455763 + }, + { + "index": 15, + "name": "date_loan", + "marker": "[POPULATION]", + "table": "population", + "target": "default", + "importance": 0.02192624082166905 + }, + { + "index": 16, + "name": "duration", + "marker": "[POPULATION]", + "table": "population", + "target": "default", + "importance": 0.02868542296307802 + }, + { + "index": 17, + "name": "frequency", + "marker": "[POPULATION]", + "table": "population", + "target": "default", + "importance": 0.015434286910860072 + }, + { + "index": 18, + "name": "payments", + "marker": "[POPULATION]", + "table": "population", + "target": "default", + "importance": 0.16812257116305224 + } ] } \ No newline at end of file diff --git a/tests/integration/data/numerical/expected.pipeline.json b/tests/integration/data/numerical/expected.pipeline.json index 1c35c59..2dfc12b 100644 --- a/tests/integration/data/numerical/expected.pipeline.json +++ b/tests/integration/data/numerical/expected.pipeline.json @@ -570,19 +570,19 @@ "num_threads": 0, "propositionalization": { "aggregation": [ - "COUNT DISTINCT", + "MODE", + "COUNT MINUS COUNT DISTINCT", "STDDEV", - "AVG", "FIRST", - "MEDIAN", "COUNT", - "COUNT MINUS COUNT DISTINCT", + "AVG", + "MEDIAN", + "SUM", + "MAX", "LAST", - "MIN", - "MODE", + "COUNT DISTINCT", "TREND", - "SUM", - "MAX" + "MIN" ], "delta_t": 0.0, "loss_function": "SquareLoss", @@ -837,5 +837,39 @@ "rsquared": 0.9996960826789243, "type": "regression" } + ], + "columns": [ + { + "index": 0, + "name": "column_01", + "marker": "[PERIPHERAL]", + "table": "perph", + "target": "targets", + "importance": 0.07663134043051129 + }, + { + "index": 1, + "name": "time_stamp", + "marker": "[PERIPHERAL]", + "table": "perph", + "target": "targets", + "importance": 0.1598504889015953 + }, + { + "index": 2, + "name": "column_01", + "marker": "[POPULATION]", + "table": "population", + "target": "targets", + "importance": 0.002464713567455494 + }, + { + "index": 3, + "name": "time_stamp", + "marker": "[POPULATION]", + "table": "population", + "target": "targets", + "importance": 0.7610534571004377 + } ] } \ No newline at end of file diff --git a/tests/integration/data/robot/expected.pipeline.json b/tests/integration/data/robot/expected.pipeline.json index cce0b02..26cb892 100644 --- a/tests/integration/data/robot/expected.pipeline.json +++ b/tests/integration/data/robot/expected.pipeline.json @@ -7020,19 +7020,19 @@ "num_threads": 0, "propositionalization": { "aggregation": [ - "COUNT DISTINCT", + "MODE", + "COUNT MINUS COUNT DISTINCT", "STDDEV", - "AVG", "FIRST", - "MEDIAN", "COUNT", - "COUNT MINUS COUNT DISTINCT", + "AVG", + "MEDIAN", + "SUM", + "MAX", "LAST", - "MIN", - "MODE", + "COUNT DISTINCT", "TREND", - "SUM", - "MAX" + "MIN" ], "delta_t": 0.0, "loss_function": "SquareLoss", @@ -8434,5 +8434,2887 @@ "rsquared": 0.022980212283867575, "type": "regression" } + ], + "columns": [ + { + "index": 0, + "name": "100", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_x", + "importance": 0.05788147224133086 + }, + { + "index": 1, + "name": "101", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 2, + "name": "102", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_x", + "importance": 0.00046300718471272676 + }, + { + "index": 3, + "name": "104", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_x", + "importance": 0.00032144503305671265 + }, + { + "index": 4, + "name": "106", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_x", + "importance": 0.00043752596051533206 + }, + { + "index": 5, + "name": "33", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 6, + "name": "34", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_x", + "importance": 0.0005573419421028088 + }, + { + "index": 7, + "name": "35", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_x", + "importance": 0.0014433699932294161 + }, + { + "index": 8, + "name": "45", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_x", + "importance": 0.01137167073898965 + }, + { + "index": 9, + "name": "46", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_x", + "importance": 0.23297319613469603 + }, + { + "index": 10, + "name": "47", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_x", + "importance": 0.017942643044449917 + }, + { + "index": 11, + "name": "48", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_x", + "importance": 0.0002662561018245653 + }, + { + "index": 12, + "name": "49", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_x", + "importance": 0.0013343612716719172 + }, + { + "index": 13, + "name": "50", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_x", + "importance": 0.06270026387649483 + }, + { + "index": 14, + "name": "56", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_x", + "importance": 0.002341220642450228 + }, + { + "index": 15, + "name": "58", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_x", + "importance": 0.09903733759252636 + }, + { + "index": 16, + "name": "59", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_x", + "importance": 0.034751290854789625 + }, + { + "index": 17, + "name": "61", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_x", + "importance": 0.007027617468670004 + }, + { + "index": 18, + "name": "62", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_x", + "importance": 0.015470623677272264 + }, + { + "index": 19, + "name": "69", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_x", + "importance": 0.022561362620172075 + }, + { + "index": 20, + "name": "70", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_x", + "importance": 0.021629445813743722 + }, + { + "index": 21, + "name": "71", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_x", + "importance": 0.006112615558537666 + }, + { + "index": 22, + "name": "73", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_x", + "importance": 0.010413669165594663 + }, + { + "index": 23, + "name": "74", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_x", + "importance": 0.000040495846502665426 + }, + { + "index": 24, + "name": "99", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_x", + "importance": 0.0003246731110247062 + }, + { + "index": 25, + "name": "rowid", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_x", + "importance": 0.008332350219886668 + }, + { + "index": 26, + "name": "10", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 27, + "name": "100", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.03780339255632804 + }, + { + "index": 28, + "name": "101", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.05711377671754354 + }, + { + "index": 29, + "name": "102", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.02427233545982349 + }, + { + "index": 30, + "name": "103", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.005110170044768906 + }, + { + "index": 31, + "name": "104", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.027614720952668696 + }, + { + "index": 32, + "name": "105", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.022771291591973837 + }, + { + "index": 33, + "name": "106", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 34, + "name": "11", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 35, + "name": "12", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 36, + "name": "13", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 37, + "name": "14", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 38, + "name": "15", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 39, + "name": "16", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 40, + "name": "17", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 41, + "name": "18", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 42, + "name": "19", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 43, + "name": "20", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 44, + "name": "21", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 45, + "name": "22", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 46, + "name": "23", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 47, + "name": "24", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 48, + "name": "25", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 49, + "name": "26", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 50, + "name": "27", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 51, + "name": "28", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 52, + "name": "29", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 53, + "name": "3", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 54, + "name": "30", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 55, + "name": "31", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 56, + "name": "32", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 57, + "name": "33", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 58, + "name": "34", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.005567789389341516 + }, + { + "index": 59, + "name": "35", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.01570419500911792 + }, + { + "index": 60, + "name": "36", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.00014629126879031666 + }, + { + "index": 61, + "name": "37", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 62, + "name": "38", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 63, + "name": "39", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 64, + "name": "4", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 65, + "name": "40", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 66, + "name": "41", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 67, + "name": "42", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 68, + "name": "43", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 69, + "name": "44", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 70, + "name": "45", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0034434486660082026 + }, + { + "index": 71, + "name": "46", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0016000896836187214 + }, + { + "index": 72, + "name": "47", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.004314209865403712 + }, + { + "index": 73, + "name": "48", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.01081946313729998 + }, + { + "index": 74, + "name": "49", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.001867084152873861 + }, + { + "index": 75, + "name": "5", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 76, + "name": "50", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0060478665845882865 + }, + { + "index": 77, + "name": "51", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 78, + "name": "52", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 79, + "name": "53", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 80, + "name": "54", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.00001687348989524414 + }, + { + "index": 81, + "name": "55", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.00007037866576845154 + }, + { + "index": 82, + "name": "56", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.028614496644248772 + }, + { + "index": 83, + "name": "57", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0013381612530546831 + }, + { + "index": 84, + "name": "58", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0009630786295232397 + }, + { + "index": 85, + "name": "59", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.004362008000152149 + }, + { + "index": 86, + "name": "6", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 87, + "name": "60", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0013137568119275707 + }, + { + "index": 88, + "name": "61", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.007610095341739914 + }, + { + "index": 89, + "name": "62", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.009538987665949359 + }, + { + "index": 90, + "name": "63", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 91, + "name": "64", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 92, + "name": "65", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 93, + "name": "66", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 94, + "name": "67", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 95, + "name": "68", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 96, + "name": "69", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.002333389981528578 + }, + { + "index": 97, + "name": "7", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 98, + "name": "70", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.030253621025965065 + }, + { + "index": 99, + "name": "71", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.009748443753852906 + }, + { + "index": 100, + "name": "72", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.02991565988296233 + }, + { + "index": 101, + "name": "73", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0201999218597105 + }, + { + "index": 102, + "name": "74", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.005453646661521814 + }, + { + "index": 103, + "name": "75", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 104, + "name": "76", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 105, + "name": "77", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 106, + "name": "78", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 107, + "name": "79", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 108, + "name": "8", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 109, + "name": "80", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 110, + "name": "81", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 111, + "name": "82", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 112, + "name": "83", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 113, + "name": "84", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 114, + "name": "85", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 115, + "name": "86", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 116, + "name": "9", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 117, + "name": "98", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.0 + }, + { + "index": 118, + "name": "99", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 3.7489379178472854e-6 + }, + { + "index": 119, + "name": "rowid", + "marker": "[POPULATION]", + "table": "population", + "target": "f_x", + "importance": 0.008332350219886668 + }, + { + "index": 0, + "name": "100", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_y", + "importance": 0.00038486065031901424 + }, + { + "index": 1, + "name": "101", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_y", + "importance": 0.00019508848813718578 + }, + { + "index": 2, + "name": "102", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_y", + "importance": 0.000095842132021727 + }, + { + "index": 3, + "name": "104", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_y", + "importance": 0.021938776091981967 + }, + { + "index": 4, + "name": "106", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_y", + "importance": 0.005845028960031051 + }, + { + "index": 5, + "name": "33", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_y", + "importance": 1.3311219262664427e-19 + }, + { + "index": 6, + "name": "34", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_y", + "importance": 0.015403590433465782 + }, + { + "index": 7, + "name": "35", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_y", + "importance": 0.0024897951046593055 + }, + { + "index": 8, + "name": "45", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_y", + "importance": 0.008721006089999131 + }, + { + "index": 9, + "name": "46", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_y", + "importance": 0.012615353654890445 + }, + { + "index": 10, + "name": "47", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_y", + "importance": 0.0012366947806900397 + }, + { + "index": 11, + "name": "48", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_y", + "importance": 0.04773577476552667 + }, + { + "index": 12, + "name": "49", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_y", + "importance": 0.0018793335789175963 + }, + { + "index": 13, + "name": "50", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_y", + "importance": 0.013175581703108107 + }, + { + "index": 14, + "name": "56", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_y", + "importance": 0.00015539525304535373 + }, + { + "index": 15, + "name": "58", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_y", + "importance": 0.0027459406999180464 + }, + { + "index": 16, + "name": "59", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_y", + "importance": 0.1556818610219092 + }, + { + "index": 17, + "name": "61", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_y", + "importance": 0.00605185865753604 + }, + { + "index": 18, + "name": "62", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_y", + "importance": 0.06734523698590579 + }, + { + "index": 19, + "name": "69", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_y", + "importance": 0.009206492719240818 + }, + { + "index": 20, + "name": "70", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_y", + "importance": 0.01389207168471436 + }, + { + "index": 21, + "name": "71", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_y", + "importance": 0.10441462413685006 + }, + { + "index": 22, + "name": "73", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_y", + "importance": 0.16496301495248847 + }, + { + "index": 23, + "name": "74", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_y", + "importance": 0.00033927668534458215 + }, + { + "index": 24, + "name": "99", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_y", + "importance": 0.00008790427700553538 + }, + { + "index": 25, + "name": "rowid", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_y", + "importance": 0.006630817777772444 + }, + { + "index": 26, + "name": "10", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 27, + "name": "100", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.004618683034674527 + }, + { + "index": 28, + "name": "101", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0010844581995878302 + }, + { + "index": 29, + "name": "102", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.08954473978388584 + }, + { + "index": 30, + "name": "103", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.003417250846799207 + }, + { + "index": 31, + "name": "104", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0010127412557236744 + }, + { + "index": 32, + "name": "105", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.00683616914058371 + }, + { + "index": 33, + "name": "106", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.00006042724782552663 + }, + { + "index": 34, + "name": "11", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 35, + "name": "12", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 36, + "name": "13", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 37, + "name": "14", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 38, + "name": "15", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 39, + "name": "16", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 40, + "name": "17", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 41, + "name": "18", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 42, + "name": "19", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 43, + "name": "20", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 44, + "name": "21", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 45, + "name": "22", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 46, + "name": "23", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 47, + "name": "24", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 48, + "name": "25", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 49, + "name": "26", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 50, + "name": "27", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 51, + "name": "28", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 52, + "name": "29", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 53, + "name": "3", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 54, + "name": "30", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 55, + "name": "31", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 56, + "name": "32", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 57, + "name": "33", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 58, + "name": "34", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.01161076685019501 + }, + { + "index": 59, + "name": "35", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.008263552373409769 + }, + { + "index": 60, + "name": "36", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0006951808443116486 + }, + { + "index": 61, + "name": "37", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 62, + "name": "38", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 63, + "name": "39", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 64, + "name": "4", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 65, + "name": "40", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 66, + "name": "41", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 67, + "name": "42", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 68, + "name": "43", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 69, + "name": "44", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 70, + "name": "45", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.021655500014439825 + }, + { + "index": 71, + "name": "46", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.003832010988327632 + }, + { + "index": 72, + "name": "47", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.002560242450186804 + }, + { + "index": 73, + "name": "48", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.04558819885476921 + }, + { + "index": 74, + "name": "49", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.005018015103557598 + }, + { + "index": 75, + "name": "5", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 76, + "name": "50", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.011682981560148319 + }, + { + "index": 77, + "name": "51", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 78, + "name": "52", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 79, + "name": "53", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 80, + "name": "54", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0030216429917877116 + }, + { + "index": 81, + "name": "55", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0000637529487149554 + }, + { + "index": 82, + "name": "56", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.009611352816678978 + }, + { + "index": 83, + "name": "57", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0000931097591097042 + }, + { + "index": 84, + "name": "58", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.007902438550415405 + }, + { + "index": 85, + "name": "59", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0012748021769235895 + }, + { + "index": 86, + "name": "6", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 87, + "name": "60", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 88, + "name": "61", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.004280046462329568 + }, + { + "index": 89, + "name": "62", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.005933624010875754 + }, + { + "index": 90, + "name": "63", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 91, + "name": "64", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 92, + "name": "65", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 93, + "name": "66", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 94, + "name": "67", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 95, + "name": "68", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 96, + "name": "69", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.01020775469651025 + }, + { + "index": 97, + "name": "7", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 98, + "name": "70", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.004040708360244298 + }, + { + "index": 99, + "name": "71", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0051348829115010035 + }, + { + "index": 100, + "name": "72", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.022971709184169877 + }, + { + "index": 101, + "name": "73", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.004420601843470497 + }, + { + "index": 102, + "name": "74", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.03366920684381693 + }, + { + "index": 103, + "name": "75", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 104, + "name": "76", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 105, + "name": "77", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 106, + "name": "78", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 107, + "name": "79", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 108, + "name": "8", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 109, + "name": "80", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 110, + "name": "81", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 111, + "name": "82", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 112, + "name": "83", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 113, + "name": "84", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 114, + "name": "85", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 115, + "name": "86", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 116, + "name": "9", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 117, + "name": "98", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.0 + }, + { + "index": 118, + "name": "99", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.00003140883177355309 + }, + { + "index": 119, + "name": "rowid", + "marker": "[POPULATION]", + "table": "population", + "target": "f_y", + "importance": 0.006630817777772444 + }, + { + "index": 0, + "name": "100", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_z", + "importance": 0.004612525361059204 + }, + { + "index": 1, + "name": "101", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_z", + "importance": 0.004998008763088159 + }, + { + "index": 2, + "name": "102", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_z", + "importance": 0.06302923831919 + }, + { + "index": 3, + "name": "104", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_z", + "importance": 0.00003280820693443541 + }, + { + "index": 4, + "name": "106", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_z", + "importance": 0.00002828898881866848 + }, + { + "index": 5, + "name": "33", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_z", + "importance": 2.0076622383180836e-18 + }, + { + "index": 6, + "name": "34", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_z", + "importance": 0.025752511916461905 + }, + { + "index": 7, + "name": "35", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_z", + "importance": 0.04673251727481971 + }, + { + "index": 8, + "name": "45", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_z", + "importance": 0.0010793957219251997 + }, + { + "index": 9, + "name": "46", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_z", + "importance": 0.035304058051901205 + }, + { + "index": 10, + "name": "47", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_z", + "importance": 0.0051763076505768595 + }, + { + "index": 11, + "name": "48", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_z", + "importance": 0.003351256219643151 + }, + { + "index": 12, + "name": "49", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_z", + "importance": 0.005076236277987188 + }, + { + "index": 13, + "name": "50", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_z", + "importance": 0.018346841564965313 + }, + { + "index": 14, + "name": "56", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_z", + "importance": 0.0005541424684867028 + }, + { + "index": 15, + "name": "58", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_z", + "importance": 0.005615577878228329 + }, + { + "index": 16, + "name": "59", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_z", + "importance": 0.004520119614373425 + }, + { + "index": 17, + "name": "61", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_z", + "importance": 0.10093404330748995 + }, + { + "index": 18, + "name": "62", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_z", + "importance": 0.05283333861464529 + }, + { + "index": 19, + "name": "69", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_z", + "importance": 0.0038337167952758877 + }, + { + "index": 20, + "name": "70", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_z", + "importance": 0.08292492952094901 + }, + { + "index": 21, + "name": "71", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_z", + "importance": 0.00236101321156589 + }, + { + "index": 22, + "name": "73", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_z", + "importance": 0.011383900960309 + }, + { + "index": 23, + "name": "74", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 24, + "name": "99", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_z", + "importance": 0.000036340772796744404 + }, + { + "index": 25, + "name": "rowid", + "marker": "[PERIPHERAL]", + "table": "full", + "target": "f_z", + "importance": 0.050518213606767795 + }, + { + "index": 26, + "name": "10", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 27, + "name": "100", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0020326760269546687 + }, + { + "index": 28, + "name": "101", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.02193976285582127 + }, + { + "index": 29, + "name": "102", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.00782316052213357 + }, + { + "index": 30, + "name": "103", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0018409273298231141 + }, + { + "index": 31, + "name": "104", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.00017263971894891425 + }, + { + "index": 32, + "name": "105", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.07742145397582277 + }, + { + "index": 33, + "name": "106", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 34, + "name": "11", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 35, + "name": "12", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 36, + "name": "13", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 37, + "name": "14", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 38, + "name": "15", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 39, + "name": "16", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 40, + "name": "17", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 41, + "name": "18", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 42, + "name": "19", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 43, + "name": "20", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 44, + "name": "21", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 45, + "name": "22", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 46, + "name": "23", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 47, + "name": "24", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 48, + "name": "25", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 49, + "name": "26", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 50, + "name": "27", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 51, + "name": "28", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 52, + "name": "29", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 53, + "name": "3", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 54, + "name": "30", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 55, + "name": "31", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 56, + "name": "32", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 57, + "name": "33", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 58, + "name": "34", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0034477446206326113 + }, + { + "index": 59, + "name": "35", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.004610081355117007 + }, + { + "index": 60, + "name": "36", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.011957470011312157 + }, + { + "index": 61, + "name": "37", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0019652122880601457 + }, + { + "index": 62, + "name": "38", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 63, + "name": "39", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 64, + "name": "4", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 65, + "name": "40", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 66, + "name": "41", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 67, + "name": "42", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 68, + "name": "43", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 69, + "name": "44", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 70, + "name": "45", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.029535199491831504 + }, + { + "index": 71, + "name": "46", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.01760735872010546 + }, + { + "index": 72, + "name": "47", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0027577189908895776 + }, + { + "index": 73, + "name": "48", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.002232631798275155 + }, + { + "index": 74, + "name": "49", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.011831980834986862 + }, + { + "index": 75, + "name": "5", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 76, + "name": "50", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.05128815882990684 + }, + { + "index": 77, + "name": "51", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 78, + "name": "52", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 79, + "name": "53", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 80, + "name": "54", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0002461164233818155 + }, + { + "index": 81, + "name": "55", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0012078296371794246 + }, + { + "index": 82, + "name": "56", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.006702880620858448 + }, + { + "index": 83, + "name": "57", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0054258093183374234 + }, + { + "index": 84, + "name": "58", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.022789751238411142 + }, + { + "index": 85, + "name": "59", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.003165947096846562 + }, + { + "index": 86, + "name": "6", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 87, + "name": "60", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.005426599876850108 + }, + { + "index": 88, + "name": "61", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0019466180744260077 + }, + { + "index": 89, + "name": "62", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.005057875662036349 + }, + { + "index": 90, + "name": "63", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 91, + "name": "64", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 92, + "name": "65", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 93, + "name": "66", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 94, + "name": "67", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 95, + "name": "68", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 96, + "name": "69", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0250746910341985 + }, + { + "index": 97, + "name": "7", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 98, + "name": "70", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.005784747236428202 + }, + { + "index": 99, + "name": "71", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.002427480288384795 + }, + { + "index": 100, + "name": "72", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.03816211674396784 + }, + { + "index": 101, + "name": "73", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.008972886587351347 + }, + { + "index": 102, + "name": "74", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.03959092811569354 + }, + { + "index": 103, + "name": "75", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 104, + "name": "76", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 105, + "name": "77", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 106, + "name": "78", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 107, + "name": "79", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 108, + "name": "8", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 109, + "name": "80", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 110, + "name": "81", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 111, + "name": "82", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 112, + "name": "83", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 113, + "name": "84", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 114, + "name": "85", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 115, + "name": "86", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 116, + "name": "9", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 117, + "name": "98", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 118, + "name": "99", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.0 + }, + { + "index": 119, + "name": "rowid", + "marker": "[POPULATION]", + "table": "population", + "target": "f_z", + "importance": 0.050518213606767795 + } ] } \ No newline at end of file diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 4cfe85a..53951fc 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -21,14 +21,17 @@ from getml.data import Container, DataFrame, Placeholder, Subset, View from getml.data.roles.types import Role as GetMLRole from getml.feature_learning.loss_functions import CROSSENTROPYLOSS +from getml.pipeline import Columns as GetMLColumns from getml.pipeline import Features as GetMLFeatures from getml.pipeline import Pipeline from getml.pipeline import Scores as GetMLScores +from getml.pipeline.columns import Column as GetMLColumn from getml.pipeline.feature import Feature as GetMLFeature from getml.pipeline.score import ClassificationScore as GetMLClassificationScore from getml.pipeline.score import RegressionScore as GetMLRegressionScore from numpy.typing import NDArray +from getml_io.getml.columns import Column from getml_io.getml.feature_learning import FastProp from getml_io.getml.features import Feature, Features from getml_io.getml.predictors import LinearRegression @@ -433,6 +436,7 @@ def pipeline_information_empty( data_model=data_model_information_empty, features={}, scores=[], + columns=[], ) @@ -499,12 +503,28 @@ def mock_scores_classification(mocker: pytest_mock.MockerFixture) -> GetMLScores @pytest.fixture -def mock_pipeline( +def mock_columns(mocker: pytest_mock.MockerFixture) -> GetMLColumns: + column = GetMLColumn( + index=0, + name="target0", + marker="test_marker_0", + table="test_table_0", + target="target0", + importance=0.0, + ) + columns = mocker.MagicMock(spec=GetMLColumns) + columns.data = [column] + return columns + + +@pytest.fixture +def mock_pipeline( # noqa: PLR0913 mocker: pytest_mock.MockerFixture, ndarray: NDArray[np.float64], mock_dataframe: DataFrame, mock_features: GetMLFeatures, mock_scores_regression: GetMLScores, + mock_columns: GetMLColumns, ) -> Pipeline: pipeline = mocker.Mock() pipeline.id = "mock_pipeline_id" @@ -541,6 +561,7 @@ def pipeline_transform(_: DataFrame | View | Subset, *, df_name: str) -> DataFra pipeline.targets = ["target0"] pipeline.features = mock_features pipeline.scores = mock_scores_regression + pipeline.columns = mock_columns return pipeline @@ -707,6 +728,20 @@ def scores() -> Scores: ] +@pytest.fixture +def columns() -> list[Column]: + return [ + Column( + index=0, + name="target0", + marker="test_marker_0", + table="test_table_0", + target="target0", + importance=0.0, + ), + ] + + @pytest.fixture def pipeline_information( # noqa: PLR0913 dataframe_information_test: DataFrameInformation, @@ -721,6 +756,7 @@ def pipeline_information( # noqa: PLR0913 feature_sets_path: Path, features: Features, scores: Scores, + columns: Sequence[Column], ) -> PipelineInformation: return PipelineInformation( id="pipeline_id", @@ -766,6 +802,7 @@ def pipeline_information( # noqa: PLR0913 data_model=data_model_information, features=features, scores=scores, + columns=columns, ) diff --git a/tests/unit/metadata/test_pipeline_information.py b/tests/unit/metadata/test_pipeline_information.py index e555d10..dfa279e 100644 --- a/tests/unit/metadata/test_pipeline_information.py +++ b/tests/unit/metadata/test_pipeline_information.py @@ -60,6 +60,7 @@ def _get_expected_serialized_empty_pipeline_information() -> PipelineInformation "targets": [], "features": {}, "scores": [], + "columns": [], } @@ -357,4 +358,14 @@ def _get_expected_serialized_pipeline_information() -> PipelineInformationType: "type": "classification", }, ], + "columns": [ + { + "importance": 0.0, + "index": 0, + "marker": "test_marker_0", + "name": "target0", + "table": "test_table_0", + "target": "target0", + }, + ], } diff --git a/tests/unit/serialize/test_pipeline.py b/tests/unit/serialize/test_pipeline.py index 3403aa8..37be437 100644 --- a/tests/unit/serialize/test_pipeline.py +++ b/tests/unit/serialize/test_pipeline.py @@ -6,10 +6,12 @@ from getml import predictors as getml_predictor from getml import preprocessors as getml_preprocessor from getml.data import Container, DataFrame +from getml.pipeline import Columns as GetMLColumns from getml.pipeline import Features as GetMLFeatures from getml.pipeline import Pipeline from getml.pipeline import Scores as GetMLScores +from getml_io.getml.columns import Column from getml_io.getml.feature_learning import ( Fastboost, FastProp, @@ -42,6 +44,7 @@ from getml_io.metadata.pipeline_information import LossFunction from getml_io.serialize.exception import WrongPipelineScoreTypeError from getml_io.serialize.pipeline import ( + serialize_columns, serialize_feature_learner, serialize_feature_sets, serialize_features, @@ -437,3 +440,33 @@ def test_serialize_scores_classification_wrong_type( # When / Then with pytest.raises(WrongPipelineScoreTypeError): _ = serialize_scores(mock_scores_classification) + + +@pytest.mark.unit +def test_serialize_columns(mock_columns: GetMLColumns) -> None: + # Given + + # When + columns = serialize_columns(mock_columns) + + # Then + assert len(columns) == 1 + assert columns[0] == Column( + index=0, + name="target0", + marker="test_marker_0", + table="test_table_0", + target="target0", + importance=0.0, + ) + + +@pytest.mark.unit +def test_serialize_columns_empty() -> None: + # Given + + # When + columns = serialize_columns(None) + + # Then + assert len(columns) == 0 diff --git a/tests/unit/serialize/test_pipeline_information.py b/tests/unit/serialize/test_pipeline_information.py index 685cfce..848d81f 100644 --- a/tests/unit/serialize/test_pipeline_information.py +++ b/tests/unit/serialize/test_pipeline_information.py @@ -77,6 +77,7 @@ def _get_expected_pipeline_information() -> PipelineInformationType: "targets": [], "features": {}, "scores": [], + "columns": [], }