getml · Urfoex · Sep 2, 2025 · Sep 1, 2025
diff --git a/src/getml_io/getml/columns.py b/src/getml_io/getml/columns.py
@@ -0,0 +1,10 @@
+from pydantic import BaseModel
+
+
+class Column(BaseModel, frozen=True):
+    index: int
+    name: str
+    marker: str
+    table: str
+    target: str
+    importance: float
diff --git a/src/getml_io/metadata/pipeline_information.py b/src/getml_io/metadata/pipeline_information.py
@@ -9,6 +9,7 @@
 )
 from pydantic import BaseModel
 
+from getml_io.getml.columns import Column
 from getml_io.getml.feature_learning import FeatureLearner
 from getml_io.getml.features import Features
 from getml_io.getml.predictors import FeatureSelector, Predictor
@@ -43,6 +44,6 @@ class PipelineInformation(BaseModel, frozen=True):
     data_model: DataModelInformation
     features: Features
     scores: Scores
-    # columns # TODO @urfoex: #50
+    columns: Sequence[Column]
     # metadata # TODO @urfoex: #51
     # tables # TODO @urfoex: #52
diff --git a/src/getml_io/serialize/pipeline.py b/src/getml_io/serialize/pipeline.py
@@ -1,5 +1,6 @@
 import dataclasses
 import functools
+from collections.abc import Sequence
 from pathlib import Path
 from typing import cast
 
@@ -13,14 +14,17 @@
     Container,
     DataFrame,
 )
+from getml.pipeline import Columns as GetMLColumns
 from getml.pipeline import Features as GetMLFeatures
 from getml.pipeline import Pipeline
 from getml.pipeline import Scores as GetMLScores
+from getml.pipeline.column import Column as GetMLColumn
 from getml.pipeline.score import ClassificationScore as GetMLClassificationScore
 from getml.pipeline.score import RegressionScore as GetMLRegressionScore
 from getml.pipeline.score import Score as GetMLScore
 from numpy.typing import NDArray
 
+from getml_io.getml.columns import Column
 from getml_io.getml.feature_learning import (
     Fastboost,
     FastProp,
@@ -136,7 +140,7 @@ def serialize_pipeline(
         data_model=serialize_data_model(pipeline.data_model),
         features=serialize_features(pipeline.features),
         scores=serialize_scores(pipeline.scores),
-        # columns # TODO @urfoex: #50
+        columns=serialize_columns(pipeline.columns),
         # metadata # TODO @urfoex: #51
         # tables # TODO @urfoex: #52
     )
@@ -407,3 +411,29 @@ def _serialize_regression_scores(scores: list[GetMLScore]) -> list[RegressionSco
             ),
         )
     return regression_scores
+
+
+def serialize_columns(getml_columns: GetMLColumns | None) -> list[Column]:
+    """Serialize getML Columns into a list of Column objects.
+
+    Args:
+        getml_columns: The getML Columns to serialize.
+
+    Returns:
+        list[Column]: The serialized Columns information.
+
+    """
+    if getml_columns is None:
+        return []
+    columns = cast("Sequence[GetMLColumn]", getml_columns.data)
+    return [
+        Column(
+            index=column.index,
+            name=column.name,
+            marker=column.marker,
+            table=column.table,
+            target=column.target,
+            importance=column.importance,
+        )
+        for column in columns
+    ]
diff --git a/tests/integration/data/loans/expected.pipeline.json b/tests/integration/data/loans/expected.pipeline.json
@@ -632,10 +632,10 @@
     {
       "aggregation": [
         "AVG",
+        "MAX",
         "COUNT",
         "SUM",
-        "MIN",
-        "MAX"
+        "MIN"
       ],
       "allow_sets": true,
       "delta_t": 0.0,
@@ -649,19 +649,19 @@
       "num_threads": 0,
       "propositionalization": {
         "aggregation": [
-          "COUNT DISTINCT",
+          "MODE",
+          "COUNT MINUS COUNT DISTINCT",
           "STDDEV",
-          "AVG",
           "FIRST",
-          "MEDIAN",
           "COUNT",
-          "COUNT MINUS COUNT DISTINCT",
+          "AVG",
+          "MEDIAN",
+          "SUM",
+          "MAX",
           "LAST",
-          "MIN",
-          "MODE",
+          "COUNT DISTINCT",
           "TREND",
-          "SUM",
-          "MAX"
+          "MIN"
         ],
         "delta_t": 0.0,
         "loss_function": "CrossEntropyLoss",
@@ -1082,5 +1082,159 @@
       "cross_entropy": 0.15581770550714213,
       "type": "classification"
     }
+  ],
+  "columns": [
+    {
+      "index": 0,
+      "name": "A10",
+      "marker": "[PERIPHERAL]",
+      "table": "meta",
+      "target": "default",
+      "importance": 0.01552507205908419
+    },
+    {
+      "index": 1,
+      "name": "A11",
+      "marker": "[PERIPHERAL]",
+      "table": "meta",
+      "target": "default",
+      "importance": 0.0006977925939383302
+    },
+    {
+      "index": 2,
+      "name": "A12",
+      "marker": "[PERIPHERAL]",
+      "table": "meta",
+      "target": "default",
+      "importance": 0.014233860057894013
+    },
+    {
+      "index": 3,
+      "name": "A13",
+      "marker": "[PERIPHERAL]",
+      "table": "meta",
+      "target": "default",
+      "importance": 0.009653255757338346
+    },
+    {
+      "index": 4,
+      "name": "A14",
+      "marker": "[PERIPHERAL]",
+      "table": "meta",
+      "target": "default",
+      "importance": 0.050245939036435795
+    },
+    {
+      "index": 5,
+      "name": "A3",
+      "marker": "[PERIPHERAL]",
+      "table": "meta",
+      "target": "default",
+      "importance": 0.03182633574184285
+    },
+    {
+      "index": 6,
+      "name": "A5",
+      "marker": "[PERIPHERAL]",
+      "table": "meta",
+      "target": "default",
+      "importance": 0.011420646872707484
+    },
+    {
+      "index": 7,
+      "name": "A8",
+      "marker": "[PERIPHERAL]",
+      "table": "meta",
+      "target": "default",
+      "importance": 0.002742939298483507
+    },
+    {
+      "index": 8,
+      "name": "gender",
+      "marker": "[PERIPHERAL]",
+      "table": "meta",
+      "target": "default",
+      "importance": 0.03563197038951749
+    },
+    {
+      "index": 9,
+      "name": "type_card",
+      "marker": "[PERIPHERAL]",
+      "table": "meta",
+      "target": "default",
+      "importance": 0.06889382953361178
+    },
+    {
+      "index": 10,
+      "name": "type_disp",
+      "marker": "[PERIPHERAL]",
+      "table": "meta",
+      "target": "default",
+      "importance": 0.03223566356375098
+    },
+    {
+      "index": 11,
+      "name": "balance",
+      "marker": "[PERIPHERAL]",
+      "table": "trans",
+      "target": "default",
+      "importance": 0.3285333661842472
+    },
+    {
+      "index": 12,
+      "name": "date",
+      "marker": "[PERIPHERAL]",
+      "table": "trans",
+      "target": "default",
+      "importance": 0.02192624082166905
+    },
+    {
+      "index": 13,
+      "name": "operation",
+      "marker": "[PERIPHERAL]",
+      "table": "trans",
+      "target": "default",
+      "importance": 0.04596167781626196
+    },
+    {
+      "index": 14,
+      "name": "amount",
+      "marker": "[POPULATION]",
+      "table": "population",
+      "target": "default",
+      "importance": 0.09630288841455763
+    },
+    {
+      "index": 15,
+      "name": "date_loan",
+      "marker": "[POPULATION]",
+      "table": "population",
+      "target": "default",
+      "importance": 0.02192624082166905
+    },
+    {
+      "index": 16,
+      "name": "duration",
+      "marker": "[POPULATION]",
+      "table": "population",
+      "target": "default",
+      "importance": 0.02868542296307802
+    },
+    {
+      "index": 17,
+      "name": "frequency",
+      "marker": "[POPULATION]",
+      "table": "population",
+      "target": "default",
+      "importance": 0.015434286910860072
+    },
+    {
+      "index": 18,
+      "name": "payments",
+      "marker": "[POPULATION]",
+      "table": "population",
+      "target": "default",
+      "importance": 0.16812257116305224
+    }
   ]
 }
diff --git a/tests/integration/data/numerical/expected.pipeline.json b/tests/integration/data/numerical/expected.pipeline.json
@@ -570,19 +570,19 @@
       "num_threads": 0,
       "propositionalization": {
         "aggregation": [
-          "COUNT DISTINCT",
+          "MODE",
+          "COUNT MINUS COUNT DISTINCT",
           "STDDEV",
-          "AVG",
           "FIRST",
-          "MEDIAN",
           "COUNT",
-          "COUNT MINUS COUNT DISTINCT",
+          "AVG",
+          "MEDIAN",
+          "SUM",
+          "MAX",
           "LAST",
-          "MIN",
-          "MODE",
+          "COUNT DISTINCT",
           "TREND",
-          "SUM",
-          "MAX"
+          "MIN"
         ],
         "delta_t": 0.0,
         "loss_function": "SquareLoss",
@@ -837,5 +837,39 @@
       "rsquared": 0.9996960826789243,
       "type": "regression"
     }
+  ],
+  "columns": [
+    {
+      "index": 0,
+      "name": "column_01",
+      "marker": "[PERIPHERAL]",
+      "table": "perph",
+      "target": "targets",
+      "importance": 0.07663134043051129
+    },
+    {
+      "index": 1,
+      "name": "time_stamp",
+      "marker": "[PERIPHERAL]",
+      "table": "perph",
+      "target": "targets",
+      "importance": 0.1598504889015953
+    },
+    {
+      "index": 2,
+      "name": "column_01",
+      "marker": "[POPULATION]",
+      "table": "population",
+      "target": "targets",
+      "importance": 0.002464713567455494
+    },
+    {
+      "index": 3,
+      "name": "time_stamp",
+      "marker": "[POPULATION]",
+      "table": "population",
+      "target": "targets",
+      "importance": 0.7610534571004377
+    }
   ]
 }