diff --git a/changes/3227.feature.rst b/changes/3227.feature.rst new file mode 100644 index 0000000000..ddbedd0a30 --- /dev/null +++ b/changes/3227.feature.rst @@ -0,0 +1 @@ +Add lightweight implementations of .getsize() and .getsize_prefix() for ObjectStore. diff --git a/src/zarr/storage/_obstore.py b/src/zarr/storage/_obstore.py index cbe037d86b..e1469a991e 100644 --- a/src/zarr/storage/_obstore.py +++ b/src/zarr/storage/_obstore.py @@ -212,19 +212,21 @@ def supports_listing(self) -> bool: # docstring inherited return True - def list(self) -> AsyncGenerator[str, None]: - # docstring inherited + async def _list(self, prefix: str | None = None) -> AsyncGenerator[ObjectMeta, None]: import obstore as obs - objects: ListStream[Sequence[ObjectMeta]] = obs.list(self.store) - return _transform_list(objects) + objects: ListStream[Sequence[ObjectMeta]] = obs.list(self.store, prefix=prefix) + async for batch in objects: + for item in batch: + yield item - def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: + def list(self) -> AsyncGenerator[str, None]: # docstring inherited - import obstore as obs + return (obj["path"] async for obj in self._list()) - objects: ListStream[Sequence[ObjectMeta]] = obs.list(self.store, prefix=prefix) - return _transform_list(objects) + def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: + # docstring inherited + return (obj["path"] async for obj in self._list(prefix)) def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: # docstring inherited @@ -233,21 +235,21 @@ def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: coroutine = obs.list_with_delimiter_async(self.store, prefix=prefix) return _transform_list_dir(coroutine, prefix) + async def getsize(self, key: str) -> int: + # docstring inherited + import obstore as obs -async def _transform_list( - list_stream: ListStream[Sequence[ObjectMeta]], -) -> AsyncGenerator[str, None]: - """ - Transform the result of list into an async generator of paths. - """ - async for batch in list_stream: - for item in batch: - yield item["path"] + resp = await obs.head_async(self.store, key) + return resp["size"] + + async def getsize_prefix(self, prefix: str) -> int: + # docstring inherited + sizes = [obj["size"] async for obj in self._list(prefix=prefix)] + return sum(sizes) async def _transform_list_dir( - list_result_coroutine: Coroutine[Any, Any, ListResult[Sequence[ObjectMeta]]], - prefix: str, + list_result_coroutine: Coroutine[Any, Any, ListResult[Sequence[ObjectMeta]]], prefix: str ) -> AsyncGenerator[str, None]: """ Transform the result of list_with_delimiter into an async generator of paths. diff --git a/tests/test_store/test_object.py b/tests/test_store/test_object.py index 4d9e8fcc1f..d8b89e56b7 100644 --- a/tests/test_store/test_object.py +++ b/tests/test_store/test_object.py @@ -75,6 +75,21 @@ def test_store_init_raises(self) -> None: with pytest.raises(TypeError): ObjectStore("path/to/store") + async def test_store_getsize(self, store: ObjectStore) -> None: + buf = cpu.Buffer.from_bytes(b"\x01\x02\x03\x04") + await self.set(store, "key", buf) + size = await store.getsize("key") + assert size == len(buf) + + async def test_store_getsize_prefix(self, store: ObjectStore) -> None: + buf = cpu.Buffer.from_bytes(b"\x01\x02\x03\x04") + await self.set(store, "c/key1/0", buf) + await self.set(store, "c/key2/0", buf) + size = await store.getsize_prefix("c/key1") + assert size == len(buf) + total_size = await store.getsize_prefix("c") + assert total_size == len(buf) * 2 + @pytest.mark.slow_hypothesis def test_zarr_hierarchy():