Skip to content

Analysis Exception: Cannot resolve column name "label" among () #5

@vbabashov

Description

@vbabashov

Looks like the label column should be called label, otherwise, it will raise analysis exception.


AnalysisException Traceback (most recent call last)
in
5
6 with mlflow.start_run():
----> 7 model = scv.fit(train_sdf_prepared)
8 best_params = dict(
9 eta_best=model.bestModel.getEta(),

/databricks/python_shell/dbruntime/MLWorkloadsInstrumentation/_pyspark.py in patched_method(self, *args, **kwargs)
28 call_succeeded = False
29 try:
---> 30 result = original_method(self, *args, **kwargs)
31 call_succeeded = True
32 return result

/databricks/python/lib/python3.8/site-packages/mlflow/utils/autologging_utils/safety.py in safe_patch_function(*args, **kwargs)
433 reroute_warnings=False,
434 ):
--> 435 return original(*args, **kwargs)
436
437 # Whether or not the original / underlying function has been called during the

/databricks/spark/python/pyspark/ml/base.py in fit(self, dataset, params)
159 return self.copy(params)._fit(dataset)
160 else:
--> 161 return self._fit(dataset)
162 else:
163 raise TypeError("Params must be either a param map or a list/tuple of param maps, "

/databricks/python/lib/python3.8/site-packages/spark_stratifier/stratifier.py in _fit(self, dataset)
45 metrics = [0.0] * numModels
46
---> 47 stratified_data = self.stratify_data(dataset)
48
49 for i in range(nFolds):

/databricks/python/lib/python3.8/site-packages/spark_stratifier/stratifier.py in stratify_data(self, dataset)
26 split_ratio = 1.0 / nFolds
27
---> 28 passes = dataset[dataset['label'] == 1]
29 fails = dataset[dataset['label'] == 0]
30

/databricks/spark/python/pyspark/sql/dataframe.py in getitem(self, item)
1775 """
1776 if isinstance(item, str):
-> 1777 jc = self._jdf.apply(item)
1778 return Column(jc)
1779 elif isinstance(item, Column):

/databricks/spark/python/lib/py4j-0.10.9.1-src.zip/py4j/java_gateway.py in call(self, *args)
1302
1303 answer = self.gateway_client.send_command(command)
-> 1304 return_value = get_return_value(
1305 answer, self.gateway_client, self.target_id, self.name)
1306

/databricks/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
121 # Hide where the exception came from that shows a non-Pythonic
122 # JVM exception message.
--> 123 raise converted from None
124 else:
125 raise

AnalysisException: Cannot resolve column name "label" among (sales_qty, sales_qty_avg_l14, sales_qty_stddev_l14, sales_num_zero_gross_day_l14, sales_num_zero_gross_day_pct_l14, sales_qty_lag1, sales_vs_avg_pct_l14, days_since_last_sale, inventory_on_hand, inventory_on_hand_lag1, inv_avg_l14, inv_stddev_l14, inv_avg_nonzero_l14, inv_stddev_nonzero_l14, package_volume_qty, package_weight_qty, store_size_cd, rz_flag, features)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions