Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dynamic time selection #2055

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .zenodo.json
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,11 @@
"name": "Lehner, Sebastian",
"affiliation": "GeoSphere Austria, Vienna, Austria",
"orcid": "0000-0002-7562-8172"
},
{
"name": "Hamon, Baptiste",
"affiliation": "University of Canterbury, Christchurch, New Zealand",
"orcid": "0009-0007-4530-9772"
}
],
"keywords": [
Expand Down
1 change: 1 addition & 0 deletions AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,4 @@ Contributors
* Adrien Lamarche `@LamAdr <https://github.com/LamAdr>`_
* Faisal Mahmood <[email protected]> <[email protected]> `@faimahsho <https://github.com/faimahsho>`_
* Sebastian Lehner <[email protected]> `@seblehner <https://github.com/seblehner>`_
* Baptiste Hamon <[email protected]> `@baptistehamon <https://github.com/baptistehamon>`_
147 changes: 132 additions & 15 deletions src/xclim/core/calendar.py
Original file line number Diff line number Diff line change
Expand Up @@ -1117,12 +1117,139 @@ def days_since_to_doy(
return out.convert_calendar(base_calendar).rename(da.name)


def _get_doys(start: int, end: int, inclusive: tuple[bool, bool]):
"""Get the day of year list from start to end.

Parameters
----------
start : int
Start day of year.
end : int
End day of year.
inclusive : 2-tuple of booleans
Whether the bounds should be inclusive or not.

Returns
-------
np.ndarray
Array of day of year between the start and end.
"""
if start <= end:
doys = np.arange(start, end + 1)
else:
doys = np.concatenate((np.arange(start, 367), np.arange(0, end + 1)))
if not inclusive[0]:
doys = doys[1:]
if not inclusive[1]:
doys = doys[:-1]
return doys


def mask_between_doys(
da: xr.DataArray,
doy_bounds: tuple[int | xr.DataArray, int | xr.DataArray],
include_bounds: tuple[bool, bool] = [True, True],
) -> xr.DataArray | xr.Dataset:
"""
Mask the data outside the day of year bounds.

Parameters
----------
da : xr.DataArray or xr.Dataset
Input data.
doy_bounds : 2-tuple of integers or xr.DataArray
The bounds as (start, end) of the period of interest expressed in day-of-year, integers going from
1 (January 1st) to 365 or 366 (December 31st). If a combination of int and xr.DataArray is given,
the int day-of-year corresponds to the year of the xr.DataArray.
include_bounds : 2-tuple of booleans
Whether the bounds of `doy_bounds` should be inclusive or not.

Returns
-------
xr.DataArray or xr.Dataset
Boolean mask array with the same shape as `da` with True value inside the period of
interest and False outside.
"""
if isinstance(doy_bounds[0], int) and isinstance(doy_bounds[1], int):
mask = da.time.dt.dayofyear.isin(_get_doys(*doy_bounds, include_bounds))

else:
cal = get_calendar(da, dim="time")

start, end = doy_bounds
if isinstance(start, int):
start = xr.where(end.isnull(), np.nan, start)
start = start.convert_calendar(cal)
start.attrs["calendar"] = cal
else:
start = start.convert_calendar(cal)
start.attrs["calendar"] = cal
start = doy_to_days_since(start)

if isinstance(end, int):
end = xr.where(start.isnull(), np.nan, end)
end = end.convert_calendar(cal)
end.attrs["calendar"] = cal
else:
end = end.convert_calendar(cal)
end.attrs["calendar"] = cal
end = doy_to_days_since(end)

freq = []
for bound in [start, end]:
try:
freq.append(xr.infer_freq(bound.time))
except ValueError:
freq.append(None)
freq = set(freq) - {None}
if len(freq) != 1:
raise ValueError(
f"Non-inferrable resampling frequency or inconsistent frequencies. Got start, end = {freq}."
)
else:
freq = freq.pop()

out = []
for base_time, indexes in da.resample(time=freq).groups.items():
# get group slice
group = da.isel(time=indexes)

if base_time in start.time:
start_d = start.sel(time=base_time)
else:
start_d = None
if base_time in end.time:
end_d = end.sel(time=base_time)
else:
end_d = None

if start_d is not None and end_d is not None:
if not include_bounds[0]:
start_d += 1
if not include_bounds[1]:
end_d -= 1

# select days between start and end for group
days = (group.time - base_time).dt.days
days[days < 0] = np.nan

mask = (days >= start_d) & (days <= end_d)
else:
# Get an array with the good shape and put False
mask = start.isel(time=0).drop_vars("time").expand_dims(time=group.time)
mask = xr.full_like(mask, False)

out.append(mask)
mask = xr.concat(out, dim="time")
return mask


def select_time(
da: xr.DataArray | xr.Dataset,
drop: bool = False,
season: str | Sequence[str] | None = None,
month: int | Sequence[int] | None = None,
doy_bounds: tuple[int, int] | None = None,
doy_bounds: tuple[int | xr.DataArray, int | xr.DataArray] | None = None,
date_bounds: tuple[str, str] | None = None,
include_bounds: bool | tuple[bool, bool] = True,
) -> DataType:
Expand All @@ -1143,9 +1270,10 @@ def select_time(
One or more of 'DJF', 'MAM', 'JJA' and 'SON'.
month : int or sequence of int, optional
Sequence of month numbers (January = 1 ... December = 12).
doy_bounds : 2-tuple of int, optional
doy_bounds : 2-tuple of int or xr.DataArray, optional
The bounds as (start, end) of the period of interest expressed in day-of-year, integers going from
1 (January 1st) to 365 or 366 (December 31st).
1 (January 1st) to 365 or 366 (December 31st). If a combination of int and xr.DataArray is given,
the int day-of-year corresponds to the year of the xr.DataArray.
If calendar awareness is needed, consider using ``date_bounds`` instead.
date_bounds : 2-tuple of str, optional
The bounds as (start, end) of the period of interest expressed as dates in the month-day (%m-%d) format.
Expand Down Expand Up @@ -1187,17 +1315,6 @@ def select_time(
if N == 0:
return da

def _get_doys(_start, _end, _inclusive):
if _start <= _end:
_doys = np.arange(_start, _end + 1)
else:
_doys = np.concatenate((np.arange(_start, 367), np.arange(0, _end + 1)))
if not _inclusive[0]:
_doys = _doys[1:]
if not _inclusive[1]:
_doys = _doys[:-1]
return _doys

if isinstance(include_bounds, bool):
include_bounds = (include_bounds, include_bounds)

Expand All @@ -1212,7 +1329,7 @@ def _get_doys(_start, _end, _inclusive):
mask = da.time.dt.month.isin(month)

elif doy_bounds is not None:
mask = da.time.dt.dayofyear.isin(_get_doys(*doy_bounds, include_bounds))
mask = mask_between_doys(da, doy_bounds, include_bounds)

elif date_bounds is not None:
# This one is a bit trickier.
Expand Down
Loading