diff --git a/.github/workflows/codspeed.yml b/.github/workflows/codspeed.yml new file mode 100644 index 0000000000..ddf22ed250 --- /dev/null +++ b/.github/workflows/codspeed.yml @@ -0,0 +1,33 @@ +name: CodSpeed Benchmarks + +on: + push: + branches: + - "main" + pull_request: + # `workflow_dispatch` allows CodSpeed to trigger backtest + # performance analysis in order to generate initial data. + workflow_dispatch: + +permissions: + contents: read + +jobs: + benchmarks: + name: Run benchmarks + runs-on: codspeed-macro + steps: + - uses: actions/checkout@v5 + with: + fetch-depth: 0 # grab all branches and tags + - name: Set up Python + uses: actions/setup-python@v6 + - name: Install Hatch + run: | + python -m pip install --upgrade pip + pip install hatch + - name: Run the benchmarks + uses: CodSpeedHQ/action@v4 + with: + mode: walltime + run: hatch run test.py3.11-1.26-minimal:pytest tests/benchmarks --codspeed diff --git a/changes/3562.misc.md b/changes/3562.misc.md new file mode 100644 index 0000000000..e164ab39f8 --- /dev/null +++ b/changes/3562.misc.md @@ -0,0 +1 @@ +Add continuous performance benchmarking infrastructure. \ No newline at end of file diff --git a/docs/contributing.md b/docs/contributing.md index 7bfa6f6a18..f073b3ea38 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -230,4 +230,11 @@ If an existing Zarr format version changes, or a new version of the Zarr format ## Release procedure Open an issue on GitHub announcing the release using the release checklist template: -[https://github.com/zarr-developers/zarr-python/issues/new?template=release-checklist.md](https://github.com/zarr-developers/zarr-python/issues/new?template=release-checklist.md>). The release checklist includes all steps necessary for the release. \ No newline at end of file +[https://github.com/zarr-developers/zarr-python/issues/new?template=release-checklist.md](https://github.com/zarr-developers/zarr-python/issues/new?template=release-checklist.md>). The release checklist includes all steps necessary for the release. + +## Benchmarks + +Zarr uses [pytest-benchmark](https://pytest-benchmark.readthedocs.io/en/latest/) for running +performance benchmarks as part of our test suite. The benchmarks can be are found in `tests/benchmarks`. +By default pytest is configured to run these benchmarks as plain tests (i.e., no benchmarking). To run +a benchmark with timing measurements, use the `--benchmark-enable` when invoking `pytest`. \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 7f14971396..5145147823 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -83,6 +83,8 @@ test = [ 'numpydoc', "hypothesis", "pytest-xdist", + "pytest-benchmark", + "pytest-codspeed", "packaging", "tomlkit", "uv", @@ -181,6 +183,7 @@ run-pytest = "run" run-verbose = "run-coverage --verbose" run-mypy = "mypy src" run-hypothesis = "run-coverage -nauto --run-slow-hypothesis tests/test_properties.py tests/test_store/test_stateful*" +run-benchmark = "pytest --benchmark-enable tests/benchmarks" list-env = "pip list" [tool.hatch.envs.gputest] @@ -405,7 +408,10 @@ doctest_optionflags = [ "IGNORE_EXCEPTION_DETAIL", ] addopts = [ - "--durations=10", "-ra", "--strict-config", "--strict-markers", + "--benchmark-columns", "min,mean,stddev,outliers,rounds,iterations", + "--benchmark-disable", # run benchmark routines but don't do benchmarking + "--durations", "10", + "-ra", "--strict-config", "--strict-markers", ] filterwarnings = [ "error", diff --git a/tests/benchmarks/__init__.py b/tests/benchmarks/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/benchmarks/common.py b/tests/benchmarks/common.py new file mode 100644 index 0000000000..e8809156a6 --- /dev/null +++ b/tests/benchmarks/common.py @@ -0,0 +1,8 @@ +from dataclasses import dataclass + + +@dataclass(kw_only=True, frozen=True) +class Layout: + shape: tuple[int, ...] + chunks: tuple[int, ...] + shards: tuple[int, ...] | None diff --git a/tests/benchmarks/test_e2e.py b/tests/benchmarks/test_e2e.py new file mode 100644 index 0000000000..65d0e65ac9 --- /dev/null +++ b/tests/benchmarks/test_e2e.py @@ -0,0 +1,82 @@ +""" +Benchmarks for end-to-end read/write performance of Zarr +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from tests.benchmarks.common import Layout + +if TYPE_CHECKING: + from pytest_benchmark.fixture import BenchmarkFixture + + from zarr.abc.store import Store + from zarr.core.common import NamedConfig +from operator import getitem, setitem +from typing import Any, Literal + +import pytest + +from zarr import create_array + +CompressorName = Literal["gzip"] | None + +compressors: dict[CompressorName, NamedConfig[Any, Any] | None] = { + None: None, + "gzip": {"name": "gzip", "configuration": {"level": 1}}, +} + + +layouts: tuple[Layout, ...] = ( + # No shards, just 1000 chunks + Layout(shape=(1_000_000,), chunks=(1000,), shards=None), + # 1:1 chunk:shard shape, should measure overhead of sharding + Layout(shape=(1_000_000,), chunks=(1000,), shards=(1000,)), + # One shard with all the chunks, should measure overhead of handling inner shard chunks + Layout(shape=(1_000_000,), chunks=(100,), shards=(10000 * 100,)), +) + + +@pytest.mark.parametrize("compression_name", [None, "gzip"]) +@pytest.mark.parametrize("layout", layouts, ids=str) +@pytest.mark.parametrize("store", ["memory", "local"], indirect=["store"]) +def test_write_array( + store: Store, layout: Layout, compression_name: CompressorName, benchmark: BenchmarkFixture +) -> None: + """ + Test the time required to fill an array with a single value + """ + arr = create_array( + store, + dtype="uint8", + shape=layout.shape, + chunks=layout.chunks, + shards=layout.shards, + compressors=compressors[compression_name], # type: ignore[arg-type] + fill_value=0, + ) + + benchmark(setitem, arr, Ellipsis, 1) + + +@pytest.mark.parametrize("compression_name", [None, "gzip"]) +@pytest.mark.parametrize("layout", layouts, ids=str) +@pytest.mark.parametrize("store", ["memory", "local"], indirect=["store"]) +def test_read_array( + store: Store, layout: Layout, compression_name: CompressorName, benchmark: BenchmarkFixture +) -> None: + """ + Test the time required to fill an array with a single value + """ + arr = create_array( + store, + dtype="uint8", + shape=layout.shape, + chunks=layout.chunks, + shards=layout.shards, + compressors=compressors[compression_name], # type: ignore[arg-type] + fill_value=0, + ) + arr[:] = 1 + benchmark(getitem, arr, Ellipsis) diff --git a/tests/benchmarks/test_indexing.py b/tests/benchmarks/test_indexing.py new file mode 100644 index 0000000000..1ad4f4b575 --- /dev/null +++ b/tests/benchmarks/test_indexing.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from pytest_benchmark.fixture import BenchmarkFixture + + from zarr.abc.store import Store + +from operator import getitem + +import pytest + +from zarr import create_array + +indexers = ( + (0,) * 3, + (slice(None),) * 3, + (slice(0, None, 4),) * 3, + (slice(10),) * 3, + (slice(10, -10, 4),) * 3, + (slice(None), slice(0, 3, 2), slice(0, 10)), +) + + +@pytest.mark.parametrize("store", ["memory"], indirect=["store"]) +@pytest.mark.parametrize("indexer", indexers, ids=str) +def test_slice_indexing( + store: Store, indexer: tuple[int | slice], benchmark: BenchmarkFixture +) -> None: + data = create_array( + store=store, + shape=(105,) * 3, + dtype="uint8", + chunks=(10,) * 3, + shards=None, + compressors=None, + filters=None, + fill_value=0, + ) + + data[:] = 1 + benchmark(getitem, data, indexer)