From 323c06391123e1f3a7c2d1725653ade57f3723ec Mon Sep 17 00:00:00 2001
From: Lin Yuan <apeforest@gmail.com>
Date: Fri, 25 Apr 2025 14:06:07 -0700
Subject: [PATCH 1/4] use original data type in identifier column

---
 runtime/databricks/automl_runtime/forecast/deepar/utils.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/runtime/databricks/automl_runtime/forecast/deepar/utils.py b/runtime/databricks/automl_runtime/forecast/deepar/utils.py
index 016de93..62afc18 100644
--- a/runtime/databricks/automl_runtime/forecast/deepar/utils.py
+++ b/runtime/databricks/automl_runtime/forecast/deepar/utils.py
@@ -98,9 +98,11 @@ def set_index_and_fill_missing_time_steps(df: pd.DataFrame, time_col: str,
         df_dict = {}
         for grouped_id, grouped_df in df.groupby(id_cols):
             if isinstance(grouped_id, tuple):
+                # TODO (ML-52171): Fix the DeepAR library to support multi-time series id columns
+                # For now, we convert and concatenate the id_cols to a string
                 ts_id = "-".join([str(x) for x in grouped_id])
             else:
-                ts_id = str(grouped_id)
+                ts_id = grouped_id
             df_dict[ts_id] = (grouped_df.set_index(time_col).sort_index()
                               .reindex(valid_index).drop(id_cols, axis=1))
 

From 5b2279612d61c0c81607c8b70733d5c749294e2f Mon Sep 17 00:00:00 2001
From: Lin Yuan <apeforest@gmail.com>
Date: Sun, 27 Apr 2025 14:25:17 -0700
Subject: [PATCH 2/4] Fix DeepAR multi time series id column issue

---
 .../automl_runtime/forecast/deepar/model.py   |  7 +++-
 .../automl_runtime/forecast/deepar/utils.py   | 40 +++++++++----------
 2 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/runtime/databricks/automl_runtime/forecast/deepar/model.py b/runtime/databricks/automl_runtime/forecast/deepar/model.py
index 137c37a..c2454fc 100644
--- a/runtime/databricks/automl_runtime/forecast/deepar/model.py
+++ b/runtime/databricks/automl_runtime/forecast/deepar/model.py
@@ -100,7 +100,7 @@ def predict(self,
 
         pred_df = pred_df.rename(columns={'index': self._time_col})
         if self._id_cols:
-            id_col_name = '-'.join(self._id_cols)
+            id_col_name = self._id_cols[0]
             pred_df = pred_df.rename(columns={'item_id': id_col_name})
         else:
             pred_df = pred_df.drop(columns='item_id')
@@ -121,6 +121,7 @@ def predict_samples(self,
         if num_samples is None:
             num_samples = self._num_samples
 
+        print("Debug:enter predict_samples")
         # Group by the time column in case there are multiple rows for each time column,
         # for example, the user didn't provide all the identity columns for a multi-series dataset
         group_cols = [self._time_col]
@@ -134,11 +135,15 @@ def predict_samples(self,
                                                                         self._frequency_quantity,
                                                                         self._id_cols)
 
+        print(f"Debug model_input_transformed keys type: {type(model_input_transformed)}")
         test_ds = PandasDataset(model_input_transformed, target=self._target_col)
 
         forecast_iter = self._model.predict(test_ds, num_samples=num_samples)
         forecast_sample_list = list(forecast_iter)
 
+        for forecast in forecast_sample_list:
+            print(f"Debug forecast.item_id type: {type(forecast.item_id)}")
+
         return forecast_sample_list
 
 
diff --git a/runtime/databricks/automl_runtime/forecast/deepar/utils.py b/runtime/databricks/automl_runtime/forecast/deepar/utils.py
index 62afc18..9eeda46 100644
--- a/runtime/databricks/automl_runtime/forecast/deepar/utils.py
+++ b/runtime/databricks/automl_runtime/forecast/deepar/utils.py
@@ -13,11 +13,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from typing import List, Optional
+from typing import List, Optional, Union, Dict
 
 import pandas as pd
 
-
 def validate_and_generate_index(df: pd.DataFrame, 
                                 time_col: str, 
                                 frequency_unit: str, 
@@ -66,10 +65,12 @@ def validate_and_generate_index(df: pd.DataFrame,
 
     return new_index_full
 
-def set_index_and_fill_missing_time_steps(df: pd.DataFrame, time_col: str,
-                                          frequency_unit: str,
-                                          frequency_quantity: int,
-                                          id_cols: Optional[List[str]] = None):
+def set_index_and_fill_missing_time_steps(
+        df: pd.DataFrame, time_col: str,
+        frequency_unit: str,
+        frequency_quantity: int,
+        id_cols: Optional[List[str]] = None
+) -> Union[pd.DataFrame, Dict[any, pd.DataFrame]]:
     """
     Transform the input dataframe to an acceptable format for the GluonTS library.
 
@@ -85,7 +86,7 @@ def set_index_and_fill_missing_time_steps(df: pd.DataFrame, time_col: str,
              multi-series - dictionary of transformed dataframes, each key is the (concatenated) id of the time series
     """
     total_min, total_max = df[time_col].min(), df[time_col].max()
-
+    print("Debug:linyuan")
     # We need to adjust the frequency_unit for pd.date_range if it is weekly,
     # otherwise it would always be "W-SUN"
     if frequency_unit.upper() == "W":
@@ -95,26 +96,25 @@ def set_index_and_fill_missing_time_steps(df: pd.DataFrame, time_col: str,
     valid_index = validate_and_generate_index(df=df, time_col=time_col, frequency_unit=frequency_unit, frequency_quantity=frequency_quantity)
 
     if id_cols is not None:
+        if len(id_cols) > 1:
+            raise ValueError("DeepAR does not support multiple time series id columns")
         df_dict = {}
         for grouped_id, grouped_df in df.groupby(id_cols):
             if isinstance(grouped_id, tuple):
                 # TODO (ML-52171): Fix the DeepAR library to support multi-time series id columns
-                # For now, we convert and concatenate the id_cols to a string
-                ts_id = "-".join([str(x) for x in grouped_id])
-            else:
-                ts_id = grouped_id
-            df_dict[ts_id] = (grouped_df.set_index(time_col).sort_index()
-                              .reindex(valid_index).drop(id_cols, axis=1))
+                # For now, DeepAR is dropped for multiple id_cols
+                raise ValueError("DeepAR does not support multiple time series id columns")
+            print(f"Debug groupe_id type: {type(grouped_id)}")
+            df_dict[grouped_id] = (grouped_df.set_index(time_col).sort_index()
+                                   .reindex(valid_index).drop(id_cols, axis=1))
 
         return df_dict
-
-    df = df.set_index(time_col).sort_index()
-
-    # Fill in missing time steps between the min and max time steps
-    df = df.reindex(valid_index)
+    else:
+        df = df.set_index(time_col).sort_index()
+        # Fill in missing time steps between the min and max time steps
+        df = df.reindex(valid_index)
+        return df
 
     if frequency_unit.upper() == "MS":
         # Truncate the day of month to avoid issues with pandas frequency check
         df = df.to_period("M")
-
-    return df

From 7731f8d8a35dabfd0f497912f44ee3d412b72008 Mon Sep 17 00:00:00 2001
From: Lin Yuan <apeforest@gmail.com>
Date: Sun, 27 Apr 2025 20:41:19 -0700
Subject: [PATCH 3/4] Do not concatenate multiple ids for DeepAR

---
 .../automl_runtime/forecast/deepar/utils.py       | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/runtime/databricks/automl_runtime/forecast/deepar/utils.py b/runtime/databricks/automl_runtime/forecast/deepar/utils.py
index 9eeda46..fbf6475 100644
--- a/runtime/databricks/automl_runtime/forecast/deepar/utils.py
+++ b/runtime/databricks/automl_runtime/forecast/deepar/utils.py
@@ -86,7 +86,7 @@ def set_index_and_fill_missing_time_steps(
              multi-series - dictionary of transformed dataframes, each key is the (concatenated) id of the time series
     """
     total_min, total_max = df[time_col].min(), df[time_col].max()
-    print("Debug:linyuan")
+
     # We need to adjust the frequency_unit for pd.date_range if it is weekly,
     # otherwise it would always be "W-SUN"
     if frequency_unit.upper() == "W":
@@ -104,17 +104,18 @@ def set_index_and_fill_missing_time_steps(
                 # TODO (ML-52171): Fix the DeepAR library to support multi-time series id columns
                 # For now, DeepAR is dropped for multiple id_cols
                 raise ValueError("DeepAR does not support multiple time series id columns")
-            print(f"Debug groupe_id type: {type(grouped_id)}")
             df_dict[grouped_id] = (grouped_df.set_index(time_col).sort_index()
                                    .reindex(valid_index).drop(id_cols, axis=1))
 
         return df_dict
-    else:
-        df = df.set_index(time_col).sort_index()
-        # Fill in missing time steps between the min and max time steps
-        df = df.reindex(valid_index)
-        return df
+
+    df = df.set_index(time_col).sort_index()
+
+    # Fill in missing time steps between the min and max time steps
+    df = df.reindex(valid_index)
 
     if frequency_unit.upper() == "MS":
         # Truncate the day of month to avoid issues with pandas frequency check
         df = df.to_period("M")
+
+    return df

From 5ef4e7ecc3973e2238c3349e2aee286b4d524dd4 Mon Sep 17 00:00:00 2001
From: Lin Yuan <apeforest@gmail.com>
Date: Mon, 28 Apr 2025 00:21:06 -0700
Subject: [PATCH 4/4] remove print debug

---
 runtime/databricks/automl_runtime/forecast/deepar/model.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/runtime/databricks/automl_runtime/forecast/deepar/model.py b/runtime/databricks/automl_runtime/forecast/deepar/model.py
index c2454fc..5c13e4a 100644
--- a/runtime/databricks/automl_runtime/forecast/deepar/model.py
+++ b/runtime/databricks/automl_runtime/forecast/deepar/model.py
@@ -121,7 +121,6 @@ def predict_samples(self,
         if num_samples is None:
             num_samples = self._num_samples
 
-        print("Debug:enter predict_samples")
         # Group by the time column in case there are multiple rows for each time column,
         # for example, the user didn't provide all the identity columns for a multi-series dataset
         group_cols = [self._time_col]
@@ -135,15 +134,11 @@ def predict_samples(self,
                                                                         self._frequency_quantity,
                                                                         self._id_cols)
 
-        print(f"Debug model_input_transformed keys type: {type(model_input_transformed)}")
         test_ds = PandasDataset(model_input_transformed, target=self._target_col)
 
         forecast_iter = self._model.predict(test_ds, num_samples=num_samples)
         forecast_sample_list = list(forecast_iter)
 
-        for forecast in forecast_sample_list:
-            print(f"Debug forecast.item_id type: {type(forecast.item_id)}")
-
         return forecast_sample_list