-
Notifications
You must be signed in to change notification settings - Fork 9
Description
Looks like the label column should be called label, otherwise, it will raise analysis exception.
AnalysisException Traceback (most recent call last)
in
5
6 with mlflow.start_run():
----> 7 model = scv.fit(train_sdf_prepared)
8 best_params = dict(
9 eta_best=model.bestModel.getEta(),
/databricks/python_shell/dbruntime/MLWorkloadsInstrumentation/_pyspark.py in patched_method(self, *args, **kwargs)
28 call_succeeded = False
29 try:
---> 30 result = original_method(self, *args, **kwargs)
31 call_succeeded = True
32 return result
/databricks/python/lib/python3.8/site-packages/mlflow/utils/autologging_utils/safety.py in safe_patch_function(*args, **kwargs)
433 reroute_warnings=False,
434 ):
--> 435 return original(*args, **kwargs)
436
437 # Whether or not the original / underlying function has been called during the
/databricks/spark/python/pyspark/ml/base.py in fit(self, dataset, params)
159 return self.copy(params)._fit(dataset)
160 else:
--> 161 return self._fit(dataset)
162 else:
163 raise TypeError("Params must be either a param map or a list/tuple of param maps, "
/databricks/python/lib/python3.8/site-packages/spark_stratifier/stratifier.py in _fit(self, dataset)
45 metrics = [0.0] * numModels
46
---> 47 stratified_data = self.stratify_data(dataset)
48
49 for i in range(nFolds):
/databricks/python/lib/python3.8/site-packages/spark_stratifier/stratifier.py in stratify_data(self, dataset)
26 split_ratio = 1.0 / nFolds
27
---> 28 passes = dataset[dataset['label'] == 1]
29 fails = dataset[dataset['label'] == 0]
30
/databricks/spark/python/pyspark/sql/dataframe.py in getitem(self, item)
1775 """
1776 if isinstance(item, str):
-> 1777 jc = self._jdf.apply(item)
1778 return Column(jc)
1779 elif isinstance(item, Column):
/databricks/spark/python/lib/py4j-0.10.9.1-src.zip/py4j/java_gateway.py in call(self, *args)
1302
1303 answer = self.gateway_client.send_command(command)
-> 1304 return_value = get_return_value(
1305 answer, self.gateway_client, self.target_id, self.name)
1306
/databricks/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
121 # Hide where the exception came from that shows a non-Pythonic
122 # JVM exception message.
--> 123 raise converted from None
124 else:
125 raise
AnalysisException: Cannot resolve column name "label" among (sales_qty, sales_qty_avg_l14, sales_qty_stddev_l14, sales_num_zero_gross_day_l14, sales_num_zero_gross_day_pct_l14, sales_qty_lag1, sales_vs_avg_pct_l14, days_since_last_sale, inventory_on_hand, inventory_on_hand_lag1, inv_avg_l14, inv_stddev_l14, inv_avg_nonzero_l14, inv_stddev_nonzero_l14, package_volume_qty, package_weight_qty, store_size_cd, rz_flag, features)