|
21 | 21 |
|
22 | 22 | from meteora import settings
|
23 | 23 |
|
| 24 | +try: |
| 25 | + import xarray as xr |
| 26 | + import xvec # noqa: F401 |
| 27 | +except ImportError: |
| 28 | + xr = None |
| 29 | + |
24 | 30 | RegionType = str | Sequence | gpd.GeoSeries | gpd.GeoDataFrame | os.PathLike | IO
|
25 | 31 | VariablesType = str | int | list[str] | list[int]
|
26 | 32 | DateTimeType = (
|
@@ -85,6 +91,58 @@ def long_to_wide(
|
85 | 91 | )
|
86 | 92 |
|
87 | 93 |
|
| 94 | +def long_to_cube( |
| 95 | + ts_df: pd.DataFrame, |
| 96 | + stations_gdf: gpd.GeoDataFrame, |
| 97 | + *, |
| 98 | + stations_gdf_id_col: str | None = None, |
| 99 | +) -> xr.Dataset | None: |
| 100 | + """Convert a time series data frame and station locations to a vector data cube. |
| 101 | +
|
| 102 | + A vector data cube is an n-D array with at least one dimension indexed by vector |
| 103 | + geometries. In Python, this is represented as an xarray Dataset (or DataArray) |
| 104 | + object with an indexed dimension with vector geometries set using xvec. |
| 105 | +
|
| 106 | + Parameters |
| 107 | + ---------- |
| 108 | + ts_df : pd.DataFrame |
| 109 | + Long form data frame with a time series of measurements (second-level index) at |
| 110 | + each station (first-level index) for each variable (column). |
| 111 | + stations_gdf : gpd.GeoDataFrame |
| 112 | + The stations data as a GeoDataFrame. |
| 113 | + stations_gdf_id_col : str, optional |
| 114 | + The column in `stations_gdf` that matches the first-level index of `ts_df`. If |
| 115 | + None, the first-level index name of `ts_df` is used (however, it may not be |
| 116 | + an actual column in `stations_gdf`, in which case a KeyError is raised). |
| 117 | +
|
| 118 | + Returns |
| 119 | + ------- |
| 120 | + ts_cube : xr.Dataset |
| 121 | + The vector data cube with the time series of measurements for each station. The |
| 122 | + stations are indexed by their geometry. |
| 123 | + """ |
| 124 | + # get the stations id column in the time series data frame |
| 125 | + stations_ts_df_id_col = ts_df.index.names[0] |
| 126 | + # get the stations id column in the GeoDataFrame |
| 127 | + if stations_gdf_id_col is None: |
| 128 | + stations_gdf_id_col = stations_ts_df_id_col |
| 129 | + # convert data frame to xarray |
| 130 | + ts_ds = ts_df.to_xarray() |
| 131 | + # assign the stations geometries as indexed dimension |
| 132 | + return ( |
| 133 | + ts_ds.assign_coords( |
| 134 | + **{ |
| 135 | + stations_ts_df_id_col: stations_gdf.set_index(stations_gdf_id_col).loc[ |
| 136 | + ts_ds[stations_ts_df_id_col].values |
| 137 | + ]["geometry"] |
| 138 | + } |
| 139 | + ) |
| 140 | + # .rename({stations_ts_df_id_col: "geometry"}) |
| 141 | + # .xvec.set_geom_indexes("geometry", crs=stations_gdf.crs) |
| 142 | + .xvec.set_geom_indexes(stations_ts_df_id_col, crs=stations_gdf.crs) |
| 143 | + ) |
| 144 | + |
| 145 | + |
88 | 146 | ########################################################################################
|
89 | 147 | # abstract attribute
|
90 | 148 | # `DummyAttribute` and `abstract_attribute` below are hardcoded from
|
|
0 commit comments