Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions dev/tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,6 @@
ignore =
E203, # Skip as black formatter adds a whitespace around ':'.
E402, # Module top level import is disabled for optional import check, etc.
# 1. Type hints with def are treated as redefinition (e.g., functions.log).
# 2. Some are used for testing.
F811,
# There are too many instances to fix. Ignored for now.
W503,
W504,
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/logger/tests/test_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,6 @@ class LoggerTests(LoggerTestsMixin, ReusedSQLTestCase):


if __name__ == "__main__":
import unittest
from pyspark.logger.tests.test_logger import * # noqa: F401

try:
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/ml/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@
from py4j.java_gateway import JavaGateway, JavaObject
from pyspark.ml._typing import PipelineStage
from pyspark.ml.base import Params
from pyspark.ml.wrapper import JavaWrapper
from pyspark.core.context import SparkContext
from pyspark.sql import DataFrame
from pyspark.sql.connect.dataframe import DataFrame as ConnectDataFrame
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/mllib/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
)
from pyspark.mllib.util import Saveable, Loader, inherit_doc
from pyspark.mllib.linalg import Vector
from pyspark.mllib.regression import LabeledPoint

if TYPE_CHECKING:
from pyspark.mllib._typing import VectorLike
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/mllib/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@

if TYPE_CHECKING:
from pyspark.mllib._typing import VectorLike
from py4j.java_collections import JavaMap

__all__ = [
"Normalizer",
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/mllib/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

import numpy as np

from pyspark import RDD, since
from pyspark import since
from pyspark.streaming.dstream import DStream
from pyspark.mllib.common import callMLlibFunc, _py2java, _java2py, inherit_doc
from pyspark.mllib.linalg import _convert_to_vector
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/mllib/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import sys
import random

from pyspark import RDD, since
from pyspark import since
from pyspark.mllib.common import callMLlibFunc, inherit_doc, JavaModelWrapper
from pyspark.mllib.linalg import _convert_to_vector
from pyspark.mllib.regression import LabeledPoint
Expand Down
3 changes: 1 addition & 2 deletions python/pyspark/mllib/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,14 @@

import numpy as np

from pyspark import SparkContext, since
from pyspark import since
from pyspark.mllib.common import callMLlibFunc, inherit_doc
from pyspark.mllib.linalg import Vectors, SparseVector, _convert_to_vector
from pyspark.sql import DataFrame
from typing import Generic, Iterable, List, Optional, Tuple, Type, TypeVar, cast, TYPE_CHECKING
from pyspark.core.context import SparkContext
from pyspark.mllib.linalg import Vector
from pyspark.core.rdd import RDD
from pyspark.sql.dataframe import DataFrame

T = TypeVar("T")
L = TypeVar("L", bound="Loader")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,6 @@ class DiffFramesCorrWithTests(


if __name__ == "__main__":
import unittest
from pyspark.pandas.tests.diff_frames_ops.test_corrwith import * # noqa: F401

try:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@ class DiffFramesDotFrameTests(DiffFramesDotFrameMixin, PandasOnSparkTestCase, SQ


if __name__ == "__main__":
import unittest
from pyspark.pandas.tests.diff_frames_ops.test_dot_frame import * # noqa: F401

try:
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/indexes/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,6 @@ class CategoricalIndexTests(CategoricalIndexTestsMixin, PandasOnSparkTestCase, T


if __name__ == "__main__":
import unittest
from pyspark.pandas.tests.indexes.test_category import * # noqa: F401

try:
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/indexes/test_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ class TimedeltaIndexTests(


if __name__ == "__main__":
import unittest
from pyspark.pandas.tests.indexes.test_timedelta import * # noqa: F401

try:
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/series/test_string_ops_adv.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,6 @@ class SeriesStringOpsAdvTests(


if __name__ == "__main__":
import unittest
from pyspark.pandas.tests.series.test_string_ops_adv import * # noqa: F401

try:
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/test_namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -683,7 +683,6 @@ class NamespaceTests(NamespaceTestsMixin, PandasOnSparkTestCase, SQLTestUtils):


if __name__ == "__main__":
import unittest
from pyspark.pandas.tests.test_namespace import * # noqa: F401

try:
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/test_numpy_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,6 @@ class NumPyCompatTests(


if __name__ == "__main__":
import unittest
from pyspark.pandas.tests.test_numpy_compat import * # noqa: F401

try:
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/pandas/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def test_dataframe_error_assert_pandas_almost_equal(self):
},
)

def test_series_error_assert_pandas_equal(self):
def test_series_error_assert_pandas_almost_equal_2(self):
series1 = pd.Series([1, 2, 3])
series2 = pd.Series([4, 5, 6])

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import unittest

from pyspark.errors import PySparkException
from pyspark.testing.connectutils import ReusedConnectTestCase
from pyspark.testing.connectutils import (
ReusedConnectTestCase,
should_test_connect,
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/sql/connect/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from pyspark.errors.exceptions.base import (
SessionNotSameException,
PySparkIndexError,
PySparkAttributeError,
)
from pyspark.resource import ResourceProfile
from pyspark.sql.connect.logging import logger
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/sql/connect/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@
PandasGroupedMapFunctionWithState,
)
from pyspark.sql.connect.dataframe import DataFrame
from pyspark.sql.types import StructType


class GroupedData:
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/sql/connect/udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@
UserDefinedFunctionLike,
)
from pyspark.sql.connect.session import SparkSession
from pyspark.sql.types import StringType


def _create_py_udf(
Expand Down
52 changes: 1 addition & 51 deletions python/pyspark/sql/functions/builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -3061,56 +3061,6 @@ def floor(col: "ColumnOrName", scale: Optional[Union[Column, int]] = None) -> Co
return _invoke_function_over_columns("floor", col, scale) # type: ignore[arg-type]


@_try_remote_functions
def log(col: "ColumnOrName") -> Column:
"""
Computes the natural logarithm of the given value.

.. versionadded:: 1.4.0

.. versionchanged:: 3.4.0
Supports Spark Connect.

Parameters
----------
col : :class:`~pyspark.sql.Column` or column name
column to calculate natural logarithm for.

Returns
-------
:class:`~pyspark.sql.Column`
natural logarithm of the given value.

Examples
Copy link
Contributor

@zhengruifeng zhengruifeng Nov 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's move the examples to the remaining log, to keep the doctest coverage

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The remaining log has its examples and I think the coverage is similar (if not more):

    Examples
    --------
    Example 1: Specify both base number and the input value

    >>> from pyspark.sql import functions as sf
    >>> df = spark.sql("SELECT * FROM VALUES (1), (2), (4) AS t(value)")
    >>> df.select("*", sf.log(2.0, df.value)).show()
    +-----+---------------+
    |value|LOG(2.0, value)|
    +-----+---------------+
    |    1|            0.0|
    |    2|            1.0|
    |    4|            2.0|
    +-----+---------------+

    Example 2: Return NULL for invalid input values

    >>> from pyspark.sql import functions as sf
    >>> df = spark.sql("SELECT * FROM VALUES (1), (2), (0), (-1), (NULL) AS t(value)")
    >>> df.select("*", sf.log(3.0, df.value)).show()
    +-----+------------------+
    |value|   LOG(3.0, value)|
    +-----+------------------+
    |    1|               0.0|
    |    2|0.6309297535714...|
    |    0|              NULL|
    |   -1|              NULL|
    | NULL|              NULL|
    +-----+------------------+

    Example 3: Specify only the input value (Natural logarithm)

    >>> from pyspark.sql import functions as sf
    >>> df = spark.sql("SELECT * FROM VALUES (1), (2), (4) AS t(value)")
    >>> df.select("*", sf.log(df.value)).show()
    +-----+------------------+
    |value|         ln(value)|
    +-----+------------------+
    |    1|               0.0|
    |    2|0.6931471805599...|
    |    4|1.3862943611198...|
    +-----+------------------+

--------
Example 1: Compute the natural logarithm of E

>>> from pyspark.sql import functions as sf
>>> spark.range(1).select(sf.log(sf.e())).show()
+-------+
|ln(E())|
+-------+
| 1.0|
+-------+

Example 2: Compute the natural logarithm of invalid values

>>> from pyspark.sql import functions as sf
>>> spark.sql(
... "SELECT * FROM VALUES (-1), (0), (FLOAT('NAN')), (NULL) AS TAB(value)"
... ).select("*", sf.log("value")).show()
+-----+---------+
|value|ln(value)|
+-----+---------+
| -1.0| NULL|
| 0.0| NULL|
| NaN| NaN|
| NULL| NULL|
+-----+---------+
"""
return _invoke_function_over_columns("log", col)


@_try_remote_functions
def log10(col: "ColumnOrName") -> Column:
"""
Expand Down Expand Up @@ -8342,7 +8292,7 @@ def when(condition: Column, value: Any) -> Column:
return _invoke_function("when", condition._jc, v)


@overload # type: ignore[no-redef]
@overload
def log(arg1: "ColumnOrName") -> Column:
...

Expand Down
6 changes: 0 additions & 6 deletions python/pyspark/sql/tests/arrow/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@
from pyspark.errors import ArithmeticException, PySparkTypeError, UnsupportedOperationException
from pyspark.loose_version import LooseVersion
from pyspark.util import is_remote_only
from pyspark.loose_version import LooseVersion

if have_pandas:
import pandas as pd
Expand Down Expand Up @@ -1009,11 +1008,6 @@ def check_createDataFrame_pandas_with_struct_type(self, arrow_enabled):
expected[r][e] == result[r][e], f"{expected[r][e]} == {result[r][e]}"
)

def test_createDataFrame_pandas_with_struct_type(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why remove this one?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is another one at line 986 that is exactly the same.

for arrow_enabled in [True, False]:
with self.subTest(arrow_enabled=arrow_enabled):
self.check_createDataFrame_pandas_with_struct_type(arrow_enabled)

def test_createDataFrame_arrow_with_struct_type_nulls(self):
t = pa.table(
{
Expand Down
14 changes: 7 additions & 7 deletions python/pyspark/sql/tests/arrow/test_arrow_udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,13 +164,13 @@ def test_arrow_udf_wrong_arg(self):
with self.assertRaises(ParseException):

@arrow_udf("blah")
def foo(x):
def _(x):
return x

with self.assertRaises(PySparkTypeError) as pe:

@arrow_udf(returnType="double", functionType=PandasUDFType.SCALAR)
def foo(df):
def _(df):
return df

self.check_error(
Expand All @@ -185,7 +185,7 @@ def foo(df):
with self.assertRaises(PySparkTypeError) as pe:

@arrow_udf(functionType=ArrowUDFType.SCALAR)
def foo(x):
def _(x):
return x

self.check_error(
Expand All @@ -197,7 +197,7 @@ def foo(x):
with self.assertRaises(PySparkTypeError) as pe:

@arrow_udf("double", 100)
def foo(x):
def _(x):
return x

self.check_error(
Expand All @@ -209,7 +209,7 @@ def foo(x):
with self.assertRaises(PySparkTypeError) as pe:

@arrow_udf(returnType=PandasUDFType.GROUPED_MAP)
def foo(df):
def _(df):
return df

self.check_error(
Expand All @@ -224,13 +224,13 @@ def foo(df):
with self.assertRaisesRegex(ValueError, "0-arg arrow_udfs.*not.*supported"):

@arrow_udf(LongType(), ArrowUDFType.SCALAR)
def zero_with_type():
def _():
return 1

with self.assertRaisesRegex(ValueError, "0-arg arrow_udfs.*not.*supported"):

@arrow_udf(LongType(), ArrowUDFType.SCALAR_ITER)
def zero_with_type():
def _():
yield 1
yield 2

Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/tests/arrow/test_arrow_udf_scalar.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from pyspark.util import PythonEvalType

from pyspark.sql.functions import arrow_udf, ArrowUDFType
from pyspark.sql import Row, functions as F
from pyspark.sql import functions as F
from pyspark.sql.types import (
IntegerType,
ByteType,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,6 @@ def test_server_listener_uninterruptible(self):


if __name__ == "__main__":
import unittest
from pyspark.sql.tests.connect.streaming.test_parity_listener import * # noqa: F401

try:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ class DataFrameQueryContextParityTests(DataFrameQueryContextTestsMixin, ReusedCo


if __name__ == "__main__":
import unittest
from pyspark.sql.tests.connect.test_parity_dataframe_query_context import * # noqa: F401

try:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ class GeographyTypeParityTest(GeographyTypeTestMixin, ReusedConnectTestCase):


if __name__ == "__main__":
import unittest
from pyspark.sql.tests.connect.test_parity_geographytype import * # noqa: F401

try:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ class GeometryTypeParityTest(GeometryTypeTestMixin, ReusedConnectTestCase):


if __name__ == "__main__":
import unittest
from pyspark.sql.tests.connect.test_parity_geometrytype import * # noqa: F401

try:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ class DataFrameObservationParityTests(


if __name__ == "__main__":
import unittest
from pyspark.sql.tests.connect.test_parity_observation import * # noqa: F401

try:
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/sql/tests/connect/test_parity_readwriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ def test_partitioning_functions(self):


if __name__ == "__main__":
import unittest
from pyspark.sql.tests.connect.test_parity_readwriter import * # noqa: F401

try:
Expand Down
4 changes: 0 additions & 4 deletions python/pyspark/sql/tests/connect/test_parity_udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,6 @@ def test_udf_defers_judf_initialization(self):
def test_nondeterministic_udf3(self):
super().test_nondeterministic_udf3()

@unittest.skip("Spark Connect doesn't support RDD but the test depends on it.")
def test_worker_original_stdin_closed(self):
super().test_worker_original_stdin_closed()

@unittest.skip("Spark Connect does not support SQLContext but the test depends on it.")
def test_udf_on_sql_context(self):
super().test_udf_on_sql_context()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
ReusedSQLTestCase,
)

from pyspark.testing.sqlutils import ReusedSQLTestCase
from pyspark.sql.tests.pandas.streaming.test_pandas_transform_with_state_state_variable import (
TransformWithStateStateVariableTestsMixin,
)
Expand Down
Loading