diff --git a/changes/3049.misc.rst b/changes/3049.misc.rst new file mode 100644 index 0000000000..79ecd6ed95 --- /dev/null +++ b/changes/3049.misc.rst @@ -0,0 +1 @@ +Added tests for ``AsyncArray``, ``Array`` and removed duplicate argument parsing. \ No newline at end of file diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 59261cca8a..cdedd5b033 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -1019,11 +1019,6 @@ async def create( warnings.warn("object_codec is not yet implemented", RuntimeWarning, stacklevel=2) if read_only is not None: warnings.warn("read_only is not yet implemented", RuntimeWarning, stacklevel=2) - if dimension_separator is not None and zarr_format == 3: - raise ValueError( - "dimension_separator is not supported for zarr format 3, use chunk_key_encoding instead" - ) - if order is not None: _warn_order_kwarg() if write_empty_chunks is not None: diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index cf4c36cc22..78b5e92ed6 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -140,7 +140,8 @@ def parse_array_metadata(data: Any) -> ArrayMetadata: if isinstance(data, ArrayMetadata): return data elif isinstance(data, dict): - if data["zarr_format"] == 3: + zarr_format = data.get("zarr_format") + if zarr_format == 3: meta_out = ArrayV3Metadata.from_dict(data) if len(meta_out.storage_transformers) > 0: msg = ( @@ -149,9 +150,11 @@ def parse_array_metadata(data: Any) -> ArrayMetadata: ) raise ValueError(msg) return meta_out - elif data["zarr_format"] == 2: + elif zarr_format == 2: return ArrayV2Metadata.from_dict(data) - raise TypeError + else: + raise ValueError(f"Invalid zarr_format: {zarr_format}. Expected 2 or 3") + raise TypeError # pragma: no cover def create_codec_pipeline(metadata: ArrayMetadata) -> CodecPipeline: @@ -160,8 +163,7 @@ def create_codec_pipeline(metadata: ArrayMetadata) -> CodecPipeline: elif isinstance(metadata, ArrayV2Metadata): v2_codec = V2Codec(filters=metadata.filters, compressor=metadata.compressor) return get_pipeline_class().from_codecs([v2_codec]) - else: - raise TypeError + raise TypeError # pragma: no cover async def get_array_metadata( @@ -268,17 +270,6 @@ def __init__( store_path: StorePath, config: ArrayConfigLike | None = None, ) -> None: - if isinstance(metadata, dict): - zarr_format = metadata["zarr_format"] - # TODO: remove this when we extensively type the dict representation of metadata - _metadata = cast(dict[str, JSON], metadata) - if zarr_format == 2: - metadata = ArrayV2Metadata.from_dict(_metadata) - elif zarr_format == 3: - metadata = ArrayV3Metadata.from_dict(_metadata) - else: - raise ValueError(f"Invalid zarr_format: {zarr_format}. Expected 2 or 3") - metadata_parsed = parse_array_metadata(metadata) config_parsed = parse_array_config(config) diff --git a/tests/test_api.py b/tests/test_api.py index d1912f7238..6904f91fe7 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,5 +1,6 @@ from __future__ import annotations +import re from typing import TYPE_CHECKING import zarr.codecs @@ -72,13 +73,19 @@ def test_create(memory_store: Store) -> None: # TODO: parametrize over everything this function takes @pytest.mark.parametrize("store", ["memory"], indirect=True) -def test_create_array(store: Store) -> None: +def test_create_array(store: Store, zarr_format: ZarrFormat) -> None: attrs: dict[str, JSON] = {"foo": 100} # explicit type annotation to avoid mypy error shape = (10, 10) path = "foo" data_val = 1 array_w = create_array( - store, name=path, shape=shape, attributes=attrs, chunks=shape, dtype="uint8" + store, + name=path, + shape=shape, + attributes=attrs, + chunks=shape, + dtype="uint8", + zarr_format=zarr_format, ) array_w[:] = data_val assert array_w.shape == shape @@ -87,18 +94,27 @@ def test_create_array(store: Store) -> None: @pytest.mark.parametrize("write_empty_chunks", [True, False]) -def test_write_empty_chunks_warns(write_empty_chunks: bool) -> None: +def test_write_empty_chunks_warns(write_empty_chunks: bool, zarr_format: ZarrFormat) -> None: """ Test that using the `write_empty_chunks` kwarg on array access will raise a warning. """ match = "The `write_empty_chunks` keyword argument .*" with pytest.warns(RuntimeWarning, match=match): _ = zarr.array( - data=np.arange(10), shape=(10,), dtype="uint8", write_empty_chunks=write_empty_chunks + data=np.arange(10), + shape=(10,), + dtype="uint8", + write_empty_chunks=write_empty_chunks, + zarr_format=zarr_format, ) with pytest.warns(RuntimeWarning, match=match): - _ = zarr.create(shape=(10,), dtype="uint8", write_empty_chunks=write_empty_chunks) + _ = zarr.create( + shape=(10,), + dtype="uint8", + write_empty_chunks=write_empty_chunks, + zarr_format=zarr_format, + ) @pytest.mark.parametrize("path", ["foo", "/", "/foo", "///foo/bar"]) @@ -115,18 +131,18 @@ def test_open_normalized_path( assert node.path == normalize_path(path) -async def test_open_array(memory_store: MemoryStore) -> None: +async def test_open_array(memory_store: MemoryStore, zarr_format: ZarrFormat) -> None: store = memory_store # open array, create if doesn't exist - z = open(store=store, shape=100) + z = open(store=store, shape=100, zarr_format=zarr_format) assert isinstance(z, Array) assert z.shape == (100,) # open array, overwrite # store._store_dict = {} store = MemoryStore() - z = open(store=store, shape=200) + z = open(store=store, shape=200, zarr_format=zarr_format) assert isinstance(z, Array) assert z.shape == (200,) @@ -140,7 +156,16 @@ async def test_open_array(memory_store: MemoryStore) -> None: # path not found with pytest.raises(FileNotFoundError): - open(store="doesnotexist", mode="r") + open(store="doesnotexist", mode="r", zarr_format=zarr_format) + + +@pytest.mark.parametrize("store", ["memory", "local", "zip"], indirect=True) +def test_v2_and_v3_exist_at_same_path(store: Store) -> None: + zarr.create_array(store, shape=(10,), dtype="uint8", zarr_format=3) + zarr.create_array(store, shape=(10,), dtype="uint8", zarr_format=2) + msg = f"Both zarr.json (Zarr format 3) and .zarray (Zarr format 2) metadata objects exist at {store}. Zarr v3 will be used." + with pytest.warns(UserWarning, match=re.escape(msg)): + zarr.open(store=store, mode="r") @pytest.mark.parametrize("store", ["memory"], indirect=True) @@ -163,9 +188,9 @@ async def test_open_group(memory_store: MemoryStore) -> None: assert "foo" in g # open group, overwrite - # g = open_group(store=store) - # assert isinstance(g, Group) - # assert "foo" not in g + g = open_group(store=store, mode="w") + assert isinstance(g, Group) + assert "foo" not in g # open group, read-only store_cls = type(store) @@ -308,7 +333,6 @@ def test_open_with_mode_w_minus(tmp_path: pathlib.Path) -> None: zarr.open(store=tmp_path, mode="w-") -@pytest.mark.parametrize("zarr_format", [2, 3]) def test_array_order(zarr_format: ZarrFormat) -> None: arr = zarr.ones(shape=(2, 2), order=None, zarr_format=zarr_format) expected = zarr.config.get("array.order") @@ -324,7 +348,6 @@ def test_array_order(zarr_format: ZarrFormat) -> None: @pytest.mark.parametrize("order", ["C", "F"]) -@pytest.mark.parametrize("zarr_format", [2, 3]) def test_array_order_warns(order: MemoryOrder | None, zarr_format: ZarrFormat) -> None: with pytest.warns(RuntimeWarning, match="The `order` keyword argument .*"): arr = zarr.ones(shape=(2, 2), order=order, zarr_format=zarr_format) @@ -1095,13 +1118,16 @@ def test_open_falls_back_to_open_group() -> None: assert group.attrs == {"key": "value"} -async def test_open_falls_back_to_open_group_async() -> None: +async def test_open_falls_back_to_open_group_async(zarr_format: ZarrFormat) -> None: # https://github.com/zarr-developers/zarr-python/issues/2309 store = MemoryStore() - await zarr.api.asynchronous.open_group(store, attributes={"key": "value"}) + await zarr.api.asynchronous.open_group( + store, attributes={"key": "value"}, zarr_format=zarr_format + ) group = await zarr.api.asynchronous.open(store=store) assert isinstance(group, zarr.core.group.AsyncGroup) + assert group.metadata.zarr_format == zarr_format assert group.attrs == {"key": "value"} @@ -1137,13 +1163,14 @@ async def test_metadata_validation_error() -> None: ["local", "memory", "zip"], indirect=True, ) -def test_open_array_with_mode_r_plus(store: Store) -> None: +def test_open_array_with_mode_r_plus(store: Store, zarr_format: ZarrFormat) -> None: # 'r+' means read/write (must exist) with pytest.raises(FileNotFoundError): - zarr.open_array(store=store, mode="r+") - zarr.ones(store=store, shape=(3, 3)) + zarr.open_array(store=store, mode="r+", zarr_format=zarr_format) + zarr.ones(store=store, shape=(3, 3), zarr_format=zarr_format) z2 = zarr.open_array(store=store, mode="r+") assert isinstance(z2, Array) + assert z2.metadata.zarr_format == zarr_format result = z2[:] assert isinstance(result, NDArrayLike) assert (result == 1).all() diff --git a/tests/test_array.py b/tests/test_array.py index 989fe30592..eb19f0e7f3 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -41,6 +41,7 @@ from zarr.core.buffer import NDArrayLike, NDArrayLikeOrScalar, default_buffer_prototype from zarr.core.buffer.cpu import NDBuffer from zarr.core.chunk_grids import _auto_partition +from zarr.core.chunk_key_encodings import ChunkKeyEncodingParams from zarr.core.common import JSON, MemoryOrder, ZarrFormat from zarr.core.group import AsyncGroup from zarr.core.indexing import BasicIndexer, ceildiv @@ -51,7 +52,7 @@ if TYPE_CHECKING: from zarr.core.array_spec import ArrayConfigLike - from zarr.core.metadata.v2 import ArrayV2Metadata +from zarr.core.metadata.v2 import ArrayV2Metadata @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) @@ -227,10 +228,13 @@ def test_array_v3_fill_value(store: MemoryStore, fill_value: int, dtype_str: str assert arr.fill_value.dtype == arr.dtype -def test_create_positional_args_deprecated() -> None: - store = MemoryStore() - with pytest.warns(FutureWarning, match="Pass"): - zarr.Array.create(store, (2, 2), dtype="f8") +async def test_create_deprecated() -> None: + with pytest.warns(DeprecationWarning): + with pytest.warns(FutureWarning, match=re.escape("Pass shape=(2, 2) as keyword args")): + await zarr.AsyncArray.create(MemoryStore(), (2, 2), dtype="f8") # type: ignore[call-overload] + with pytest.warns(DeprecationWarning): + with pytest.warns(FutureWarning, match=re.escape("Pass shape=(2, 2) as keyword args")): + zarr.Array.create(MemoryStore(), (2, 2), dtype="f8") def test_selection_positional_args_deprecated() -> None: @@ -321,24 +325,47 @@ def test_serializable_sync_array(store: LocalStore, zarr_format: ZarrFormat) -> @pytest.mark.parametrize("store", ["memory"], indirect=True) -def test_storage_transformers(store: MemoryStore) -> None: +@pytest.mark.parametrize("zarr_format", [2, 3, "invalid"]) +def test_storage_transformers(store: MemoryStore, zarr_format: ZarrFormat | str) -> None: """ Test that providing an actual storage transformer produces a warning and otherwise passes through """ - metadata_dict: dict[str, JSON] = { - "zarr_format": 3, - "node_type": "array", - "shape": (10,), - "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}}, - "data_type": "uint8", - "chunk_key_encoding": {"name": "v2", "configuration": {"separator": "/"}}, - "codecs": (BytesCodec().to_dict(),), - "fill_value": 0, - "storage_transformers": ({"test": "should_raise"}), - } - match = "Arrays with storage transformers are not supported in zarr-python at this time." - with pytest.raises(ValueError, match=match): + metadata_dict: dict[str, JSON] + if zarr_format == 3: + metadata_dict = { + "zarr_format": 3, + "node_type": "array", + "shape": (10,), + "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}}, + "data_type": "uint8", + "chunk_key_encoding": {"name": "v2", "configuration": {"separator": "/"}}, + "codecs": (BytesCodec().to_dict(),), + "fill_value": 0, + "storage_transformers": ({"test": "should_raise"}), + } + else: + metadata_dict = { + "zarr_format": zarr_format, + "shape": (10,), + "chunks": (1,), + "dtype": "uint8", + "dimension_separator": ".", + "codecs": (BytesCodec().to_dict(),), + "fill_value": 0, + "order": "C", + "storage_transformers": ({"test": "should_raise"}), + } + if zarr_format == 3: + match = "Arrays with storage transformers are not supported in zarr-python at this time." + with pytest.raises(ValueError, match=match): + Array.from_dict(StorePath(store), data=metadata_dict) + elif zarr_format == 2: + # no warning Array.from_dict(StorePath(store), data=metadata_dict) + else: + match = f"Invalid zarr_format: {zarr_format}. Expected 2 or 3" + with pytest.raises(ValueError, match=match): + Array.from_dict(StorePath(store), data=metadata_dict) @pytest.mark.parametrize("test_cls", [Array, AsyncArray[Any]]) @@ -1106,6 +1133,111 @@ async def test_v3_chunk_encoding( assert arr.filters == filters_expected assert arr.compressors == compressors_expected + @staticmethod + @pytest.mark.parametrize("name", ["v2", "default", "invalid"]) + @pytest.mark.parametrize("separator", [".", "/"]) + async def test_chunk_key_encoding( + name: str, separator: Literal[".", "/"], zarr_format: ZarrFormat, store: MemoryStore + ) -> None: + chunk_key_encoding = ChunkKeyEncodingParams(name=name, separator=separator) # type: ignore[typeddict-item] + error_msg = "" + if name == "invalid": + error_msg = "Unknown chunk key encoding." + if zarr_format == 2 and name == "default": + error_msg = "Invalid chunk key encoding. For Zarr format 2 arrays, the `name` field of the chunk key encoding must be 'v2'." + if error_msg: + with pytest.raises(ValueError, match=re.escape(error_msg)): + arr = await create_array( + store=store, + dtype="uint8", + shape=(10,), + chunks=(1,), + zarr_format=zarr_format, + chunk_key_encoding=chunk_key_encoding, + ) + else: + arr = await create_array( + store=store, + dtype="uint8", + shape=(10,), + chunks=(1,), + zarr_format=zarr_format, + chunk_key_encoding=chunk_key_encoding, + ) + if isinstance(arr.metadata, ArrayV2Metadata): + assert arr.metadata.dimension_separator == separator + + @staticmethod + @pytest.mark.parametrize( + ("kwargs", "error_msg"), + [ + ({"serializer": "bytes"}, "Zarr format 2 arrays do not support `serializer`."), + ({"dimension_names": ["test"]}, "Zarr format 2 arrays do not support dimension names."), + ], + ) + async def test_create_array_invalid_v2_arguments( + kwargs: dict[str, Any], error_msg: str, store: MemoryStore + ) -> None: + with pytest.raises(ValueError, match=re.escape(error_msg)): + await zarr.api.asynchronous.create_array( + store=store, dtype="uint8", shape=(10,), chunks=(1,), zarr_format=2, **kwargs + ) + + @staticmethod + @pytest.mark.parametrize( + ("kwargs", "error_msg"), + [ + ( + {"dimension_names": ["test"]}, + "dimension_names cannot be used for arrays with zarr_format 2.", + ), + ( + {"chunk_key_encoding": {"name": "default", "separator": "/"}}, + "chunk_key_encoding cannot be used for arrays with zarr_format 2. Use dimension_separator instead.", + ), + ( + {"codecs": "bytes"}, + "codecs cannot be used for arrays with zarr_format 2. Use filters and compressor instead.", + ), + ], + ) + async def test_create_invalid_v2_arguments( + kwargs: dict[str, Any], error_msg: str, store: MemoryStore + ) -> None: + with pytest.raises(ValueError, match=re.escape(error_msg)): + await zarr.api.asynchronous.create( + store=store, dtype="uint8", shape=(10,), chunks=(1,), zarr_format=2, **kwargs + ) + + @staticmethod + @pytest.mark.parametrize( + ("kwargs", "error_msg"), + [ + ( + {"chunk_shape": (1,), "chunks": (2,)}, + "Only one of chunk_shape or chunks can be provided.", + ), + ( + {"dimension_separator": "/"}, + "dimension_separator cannot be used for arrays with zarr_format 3. Use chunk_key_encoding instead.", + ), + ( + {"filters": []}, + "filters cannot be used for arrays with zarr_format 3. Use array-to-array codecs instead", + ), + ( + {"compressor": "blosc"}, + "compressor cannot be used for arrays with zarr_format 3. Use bytes-to-bytes codecs instead", + ), + ], + ) + async def test_invalid_v3_arguments( + kwargs: dict[str, Any], error_msg: str, store: MemoryStore + ) -> None: + kwargs.setdefault("chunks", (1,)) + with pytest.raises(ValueError, match=re.escape(error_msg)): + zarr.create(store=store, dtype="uint8", shape=(10,), zarr_format=3, **kwargs) + @staticmethod @pytest.mark.parametrize("dtype", ["uint8", "float32", "str"]) @pytest.mark.parametrize( @@ -1585,3 +1717,11 @@ async def test_sharding_coordinate_selection() -> None: result = arr[1, [0, 1]] # type: ignore[index] assert isinstance(result, NDArrayLike) assert (result == np.array([[12, 13, 14, 15], [16, 17, 18, 19]])).all() + + +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) +def test_array_repr(store: Store) -> None: + shape = (2, 3, 4) + dtype = "uint8" + arr = zarr.create_array(store, shape=shape, dtype=dtype) + assert str(arr) == f""