Skip to content

Commit 39d35b5

Browse files
authored
Move zarr-specific tests to test_zarr.py (#715)
1 parent 7611fe1 commit 39d35b5

File tree

3 files changed

+74
-38
lines changed

3 files changed

+74
-38
lines changed
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
"""zarr
2+
3+
Revision ID: 2381a77e8487
4+
Revises: d58983739401
5+
Create Date: 2023-03-13 14:57:02.474967
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
11+
12+
# revision identifiers, used by Alembic.
13+
revision = '2381a77e8487'
14+
down_revision = 'd58983739401'
15+
branch_labels = None
16+
depends_on = None
17+
18+
19+
def upgrade() -> None:
20+
op.execute(
21+
"""
22+
update test_run
23+
set path = 'benchmarks/test_zarr.py'
24+
where path = 'benchmarks/test_array.py'
25+
and originalname in (
26+
'test_filter_then_average',
27+
'test_access_slices',
28+
'test_sum_residuals'
29+
)
30+
"""
31+
)
32+
33+
34+
def downgrade() -> None:
35+
# ### commands auto generated by Alembic - please adjust! ###
36+
pass
37+
# ### end Alembic commands ###

tests/benchmarks/test_array.py

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,6 @@
2121
)
2222

2323

24-
@pytest.fixture(scope="module")
25-
def zarr_dataset():
26-
s3_uri = (
27-
"s3://coiled-runtime-ci/synthetic-zarr/"
28-
"synth_random_int_array_2000_cubed.zarr"
29-
)
30-
return da.from_zarr(s3_uri)
31-
32-
3324
def test_anom_mean(small_client):
3425
# From https://github.com/dask/distributed/issues/2602#issuecomment-498718651
3526

@@ -244,32 +235,6 @@ def test_map_overlap_sample(small_client):
244235
y[5000:5010, 5000:5010].compute()
245236

246237

247-
@run_up_to_nthreads("small_cluster", 100, reason="fixed dataset")
248-
@pytest.mark.parametrize("threshold", [50, 100, 200, 255])
249-
def test_filter_then_average(threshold, zarr_dataset, small_client):
250-
"""
251-
Compute the mean for increasingly sparse boolean filters of an array
252-
"""
253-
zarr_dataset[zarr_dataset > threshold].mean().compute()
254-
255-
256-
@run_up_to_nthreads("small_cluster", 50, reason="fixed dataset")
257-
@pytest.mark.parametrize("N", [700, 75, 1])
258-
def test_access_slices(N, zarr_dataset, small_client):
259-
"""
260-
Accessing just a few chunks of a zarr array should be quick
261-
"""
262-
distributed.wait(zarr_dataset[:N, :N, :N].persist())
263-
264-
265-
@run_up_to_nthreads("small_cluster", 50, reason="fixed dataset")
266-
def test_sum_residuals(zarr_dataset, small_client):
267-
"""
268-
Simnple test to that computes as reduction, the array op, the reduction again
269-
"""
270-
(zarr_dataset - zarr_dataset.mean(axis=0)).sum().compute()
271-
272-
273238
@run_up_to_nthreads("small_cluster", 50, reason="fixed dataset")
274239
def test_rechunk_in_memory(small_client, configure_rechunking):
275240
x = da.random.random((50000, 50000))

tests/benchmarks/test_zarr.py

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,50 @@
11
from __future__ import annotations
22

3+
import dask.array as da
34
import pytest
45
import xarray
56

6-
from ..utils_test import run_up_to_nthreads
7+
from ..utils_test import run_up_to_nthreads, wait
8+
9+
10+
@pytest.fixture(scope="module")
11+
def zarr_dataset():
12+
# shape = (2000, 2000, 2000)
13+
# chunks = (200, 200, 200)
14+
# Compresses to ~42% of its original size (tested on lz4 4.0)
15+
store = (
16+
"s3://coiled-runtime-ci/synthetic-zarr/synth_random_int_array_2000_cubed.zarr"
17+
)
18+
return da.from_zarr(store)
719

820

921
@pytest.fixture(scope="module")
1022
def cmip6():
1123
store = "s3://coiled-runtime-ci/CMIP6/CMIP/AS-RCEC/TaiESM1/1pctCO2/r1i1p1f1/Amon/zg/gn/v20200225/"
12-
ds = xarray.open_dataset(store, engine="zarr", chunks={})
13-
yield ds
24+
return xarray.open_dataset(store, engine="zarr", chunks={})
25+
26+
27+
@run_up_to_nthreads("small_cluster", 100, reason="fixed dataset")
28+
@pytest.mark.parametrize("threshold", [50, 100, 200, 255])
29+
def test_filter_then_average(small_client, zarr_dataset, threshold):
30+
"""Compute the mean for increasingly sparse boolean filters of an array"""
31+
a = zarr_dataset[zarr_dataset > threshold].mean()
32+
wait(a, small_client, 300)
33+
34+
35+
@run_up_to_nthreads("small_cluster", 50, reason="fixed dataset")
36+
@pytest.mark.parametrize("N", [700, 75, 1])
37+
def test_access_slices(small_client, zarr_dataset, N):
38+
"""Accessing just a few chunks of a zarr array should be quick"""
39+
a = zarr_dataset[:N, :N, :N]
40+
wait(a, small_client, 300)
41+
42+
43+
@run_up_to_nthreads("small_cluster", 50, reason="fixed dataset")
44+
def test_sum_residuals(small_client, zarr_dataset):
45+
"""Compute reduce, then map, then reduce again"""
46+
a = (zarr_dataset - zarr_dataset.mean(axis=0)).sum()
47+
wait(a, small_client, 300)
1448

1549

1650
@run_up_to_nthreads("small_cluster", 50, reason="fixed dataset")

0 commit comments

Comments
 (0)