Skip to content

Commit

Permalink
Add FlagGrouper
Browse files Browse the repository at this point in the history
Closes #472
  • Loading branch information
dcherian committed Feb 27, 2025
1 parent 6d81913 commit 8fc6a23
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 2 deletions.
2 changes: 1 addition & 1 deletion cf_xarray/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@
from .options import set_options # noqa
from .utils import _get_version

from . import geometry # noqa
from . import geometry, groupers # noqa

__version__ = _get_version()
27 changes: 27 additions & 0 deletions cf_xarray/groupers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import numpy as np
import pandas as pd
from xarray.groupers import EncodedGroups, Grouper


class FlagGrouper(Grouper):
def factorize(self, group) -> EncodedGroups:
assert "flag_values" in group.attrs
assert "flag_meanings" in group.attrs

values = np.array(group.attrs["flag_values"])
full_index = pd.Index(group.attrs["flag_meanings"].split(" "))

if group.dtype.kind in "iu" and (np.diff(values) == 1).all():
# optimize
codes = group.data - group.data[0].astype(int)
else:
codes, _ = pd.factorize(group.data.ravel())

codes_da = group.copy(data=codes.reshape(group.shape))
codes_da.attrs.pop("flag_values")
codes_da.attrs.pop("flag_meanings")

return EncodedGroups(codes=codes_da, full_index=full_index)

def reset(self):
pass
15 changes: 15 additions & 0 deletions cf_xarray/tests/test_groupers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import numpy as np
from xarray.testing import assert_identical

from cf_xarray.datasets import flag_excl
from cf_xarray.groupers import FlagGrouper


def test_flag_grouper():
ds = flag_excl.to_dataset().set_coords("flag_var")
ds["foo"] = ("time", np.arange(8))
actual = ds.groupby(flag_var=FlagGrouper()).mean()
expected = ds.groupby("flag_var").mean()
expected["flag_var"] = ["flag_1", "flag_2", "flag_3"]
expected["flag_var"].attrs["standard_name"] = "flag_mutual_exclusive"
assert_identical(actual, expected)
9 changes: 8 additions & 1 deletion doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,20 @@ Geometries
----------
.. autosummary::
:toctree: generated/

geometry.decode_geometries

geometry.encode_geometries
geometry.shapely_to_cf
geometry.cf_to_shapely
geometry.GeometryNames


Groupers
--------
.. autosummary::
:toctree: generated/
groupers.FlagGrouper

.. currentmodule:: xarray

DataArray
Expand Down
25 changes: 25 additions & 0 deletions doc/flags.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,31 @@ You can also check whether a DataArray has the appropriate attributes to be reco
da.cf.is_flag_variable
```

## GroupBy

Flag variables, such as that above, are naturally used for GroupBy operations.
cf-xarray provides a `FlagGrouper` that understands the `flag_meanings` and `flag_values` attributes.

```{code-cell}
import cf_xarray as cfxr
import numpy as np
from cf_xarray.datasets import flag_excl
from cf_xarray.groupers import FlagGrouper
ds = flag_excl.to_dataset().set_coords('flag_var')
ds["foo"] = ("time", np.arange(8))
ds.flag_var
```

The `flag_var` array has the need attributes.

```{code-cell}
ds.groupby(flag_var=FlagGrouper()).mean()
```

Note how the output coordinate has the values from `flag_meanings`

## Flag Masks

```{warning}
Expand Down

0 comments on commit 8fc6a23

Please sign in to comment.