Skip to content

Commit 0df4c01

Browse files
committed
[SPARK-43502][PYTHON][CONNECT] DataFrame.drop` should accept empty column
### What changes were proposed in this pull request? Make `DataFrame.drop` accept empty column ### Why are the changes needed? to be consistent with vanilla PySpark ### Does this PR introduce _any_ user-facing change? yes ``` In [1]: df = spark.createDataFrame([(1, 21), (2, 30)], ("id", "age")) In [2]: df.drop() ``` before: ``` In [2]: df.drop() --------------------------------------------------------------------------- PySparkValueError Traceback (most recent call last) Cell In[2], line 1 ----> 1 df.drop() File ~/Dev/spark/python/pyspark/sql/connect/dataframe.py:449, in DataFrame.drop(self, *cols) 444 raise PySparkTypeError( 445 error_class="NOT_COLUMN_OR_STR", 446 message_parameters={"arg_name": "cols", "arg_type": type(cols).__name__}, 447 ) 448 if len(_cols) == 0: --> 449 raise PySparkValueError( 450 error_class="CANNOT_BE_EMPTY", 451 message_parameters={"item": "cols"}, 452 ) 454 return DataFrame.withPlan( 455 plan.Drop( 456 child=self._plan, (...) 459 session=self._session, 460 ) PySparkValueError: [CANNOT_BE_EMPTY] At least one cols must be specified. ``` after ``` In [2]: df.drop() Out[2]: DataFrame[id: bigint, age: bigint] ``` ### How was this patch tested? enabled UT Closes #41180 from zhengruifeng/connect_drop_empty_col. Authored-by: Ruifeng Zheng <[email protected]> Signed-off-by: Ruifeng Zheng <[email protected]>
1 parent 4bf979c commit 0df4c01

File tree

3 files changed

+2
-11
lines changed

3 files changed

+2
-11
lines changed

python/pyspark/sql/connect/dataframe.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -445,11 +445,6 @@ def drop(self, *cols: "ColumnOrName") -> "DataFrame":
445445
error_class="NOT_COLUMN_OR_STR",
446446
message_parameters={"arg_name": "cols", "arg_type": type(cols).__name__},
447447
)
448-
if len(_cols) == 0:
449-
raise PySparkValueError(
450-
error_class="CANNOT_BE_EMPTY",
451-
message_parameters={"item": "cols"},
452-
)
453448

454449
return DataFrame.withPlan(
455450
plan.Drop(

python/pyspark/sql/connect/plan.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -664,7 +664,8 @@ def __init__(
664664
columns: List[Union[Column, str]],
665665
) -> None:
666666
super().__init__(child)
667-
assert len(columns) > 0 and all(isinstance(c, (Column, str)) for c in columns)
667+
if len(columns) > 0:
668+
assert all(isinstance(c, (Column, str)) for c in columns)
668669
self._columns = columns
669670

670671
def plan(self, session: "SparkConnectClient") -> proto.Relation:

python/pyspark/sql/tests/connect/test_parity_dataframe.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -84,11 +84,6 @@ def test_to_pandas_with_duplicated_column_names(self):
8484
def test_to_pandas_from_mixed_dataframe(self):
8585
self.check_to_pandas_from_mixed_dataframe()
8686

87-
# TODO(SPARK-43502): DataFrame.drop should support empty column
88-
@unittest.skip("Fails in Spark Connect, should enable.")
89-
def test_drop_empty_column(self):
90-
super().test_drop_empty_column()
91-
9287

9388
if __name__ == "__main__":
9489
import unittest

0 commit comments

Comments
 (0)