Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions dataframely/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -736,9 +736,8 @@ def write_parquet(self, directory: str | Path, **kwargs: Any) -> None:
members which are not provided in the current collection.

Args:
directory: The directory where the Parquet files should be written to. If
the directory does not exist, it is created automatically, including all
of its parents.
directory: The directory where the Parquet files should be written to.
The `mkdir` kwarg controls whether the directory is created if needed.
kwargs: Additional keyword arguments passed directly to
:meth:`polars.write_parquet` of all members. ``metadata`` may only be
provided if it is a dictionary.
Expand Down
3 changes: 2 additions & 1 deletion dataframely/failure.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ def write_parquet(self, file: str | Path | IO[bytes], **kwargs: Any) -> None:
Args:
file: The file path or writable file-like object to which to write the
parquet file. This should be a path to a directory if writing a
partitioned dataset.
partitioned dataset. The `mkdir` kwarg controls whether the directory
is created if needed.
kwargs: Additional keyword arguments passed directly to
:meth:`polars.write_parquet`. ``metadata`` may only be provided if it
is a dictionary.
Expand Down
3 changes: 2 additions & 1 deletion dataframely/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -695,7 +695,8 @@ def write_parquet(
df: The data frame to write to the parquet file.
file: The file path or writable file-like object to which to write the
parquet file. This should be a path to a directory if writing a
partitioned dataset.
partitioned dataset. The `mkdir` kwarg controls whether the directory
is created if needed.
kwargs: Additional keyword arguments passed directly to
:meth:`polars.write_parquet`. ``metadata`` may only be provided if it
is a dictionary.
Expand Down
339 changes: 169 additions & 170 deletions pixi.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pixi.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ python = ">=3.10"
rust = "=1.85"

numpy = "*"
polars = ">=1.32"
polars = ">=1.33"
pytest-mock = ">=3.14.1,<4"

[host-dependencies]
Expand Down
22 changes: 22 additions & 0 deletions tests/collection/test_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,28 @@ def test_read_invalid_parquet_metadata_collection(
assert collection is None


def test_write_nonexistent_directory(tmp_path: Path) -> None:
# Arrange
collection = MyCollection.validate(
{
"first": pl.LazyFrame({"a": [1, 2, 3]}),
"second": pl.LazyFrame({"a": [1, 2], "b": [10, 15]}),
},
cast=True,
)

# Act
path = tmp_path / "non_existent_dir"
collection.write_parquet(path, mkdir=True)

# Assert
out = MyCollection.read_parquet(path)
assert_frame_equal(collection.first, out.first)
assert collection.second is not None
assert out.second is not None
assert_frame_equal(collection.second, out.second)


# ---------------------------- DELTA LAKE SPECIFICS ---------------------------------- #


Expand Down
15 changes: 12 additions & 3 deletions tests/failure_info/test_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,12 @@ def test_invalid_schema_deserialization(


# ------------------------------------ Parquet -----------------------------------------
def test_write_parquet_custom_metadata(tmp_path: Path) -> None:


@pytest.mark.parametrize("check_non_existent_directory", [True, False])
def test_write_parquet_custom_metadata(
tmp_path: Path, check_non_existent_directory: bool
) -> None:
# Arrange
df = pl.DataFrame(
{
Expand All @@ -124,8 +129,12 @@ def test_write_parquet_custom_metadata(tmp_path: Path) -> None:
assert failure._df.height == 4

# Act
p = tmp_path / "failure.parquet"
failure.write_parquet(p, metadata={"custom": "test"})
if check_non_existent_directory:
p = tmp_path / "non_existent" / "failure.parquet"
failure.write_parquet(p, metadata={"custom": "test"}, mkdir=True)
else:
p = tmp_path / "failure.parquet"
failure.write_parquet(p, metadata={"custom": "test"})

# Assert
assert pl.read_parquet_metadata(p)["custom"] == "test"
16 changes: 16 additions & 0 deletions tests/schema/test_read_write_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,19 @@ def test_read_invalid_parquet_metadata_schema(

# Assert
assert schema is None


class MySchema(dy.Schema):
a = dy.Int64()


def test_write_parquet_non_existing_directory(tmp_path: Path) -> None:
# Arrange
df = MySchema.create_empty()
file = tmp_path / "non_existing_dir" / "df.parquet"

# Act
MySchema.write_parquet(df, file=file, mkdir=True)

# Assert
assert file.exists()
Loading