diff --git a/docs/examples/07_cf_coordinate_subsampling.ipynb b/docs/examples/07_cf_coordinate_subsampling.ipynb new file mode 100644 index 0000000..93724ee --- /dev/null +++ b/docs/examples/07_cf_coordinate_subsampling.ipynb @@ -0,0 +1,888 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "30dfc508-8ec2-4b6a-a7c1-886c5c77a1f2", + "metadata": {}, + "source": [ + "# Converting affine transformed stored in GeoTransform to CF subsampled coordinates" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "7b45e4bb-9483-4419-811e-c8ba234486d1", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import xarray as xr" + ] + }, + { + "cell_type": "markdown", + "id": "46f9f02c-9ad5-4218-b784-3221b650c6fa", + "metadata": {}, + "source": [ + "## Read data" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "4893c7bd-7464-4ee0-85b1-89c9118fdfae", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'-3950000.0 25000.0 0.0 4350000.0 0.0 -25000.0'" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "source = \"https://noaadata.apps.nsidc.org/NOAA/G02135/south/daily/geotiff/2024/01_Jan/S_20240101_concentration_v4.0.tif\"\n", + "\n", + "da = xr.open_dataarray(\n", + " source, engine=\"rasterio\", backend_kwargs={\"parse_coordinates\": True}\n", + ")\n", + "\n", + "\n", + "geotransform = da.spatial_ref.attrs.pop(\"GeoTransform\")\n", + "geotransform" + ] + }, + { + "cell_type": "markdown", + "id": "90311e88-c596-4826-9564-2417e2c44a7d", + "metadata": {}, + "source": [ + "## Encode to CF-style subsampled coordinates" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "78a9d489-eb3b-4c53-8cd0-e672d687f471", + "metadata": {}, + "outputs": [], + "source": [ + "x_origin, pixel_width, x_rotation, y_origin, y_rotation, pixel_height = (\n", + " float(x) for x in geotransform.split()\n", + ")\n", + "\n", + "bands, ysize, xsize = da.shape\n", + "\n", + "x_coords = x_origin + (np.arange(xsize) + 0.5) * pixel_width\n", + "y_coords = y_origin + (np.arange(ysize) + 0.5) * pixel_height\n", + "\n", + "xstart_center = x_origin + pixel_width / 2\n", + "xstop_center = xstart_center + (pixel_width * (xsize - 1))\n", + "ystart_center = y_origin + pixel_height / 2\n", + "ystop_center = ystart_center + (pixel_height * (ysize - 1))" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "cd10409a-b14a-4ed4-bf5e-abb04c8c57a3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 425kB\n",
+       "Dimensions:      (band: 1, x: 316, y: 332, tp: 2)\n",
+       "Coordinates:\n",
+       "  * band         (band) int64 8B 1\n",
+       "  * x            (x) float64 3kB -3.938e+06 -3.912e+06 ... 3.912e+06 3.938e+06\n",
+       "  * y            (y) float64 3kB 4.338e+06 4.312e+06 ... -3.912e+06 -3.938e+06\n",
+       "    spatial_ref  int64 8B ...\n",
+       "    interp       int64 8B 0\n",
+       "    x_indices    (tp) int64 16B 0 316\n",
+       "    y_indices    (tp) int64 16B 0 332\n",
+       "    xtp          (tp) float64 16B -3.938e+06 3.938e+06\n",
+       "    ytp          (tp) float64 16B 4.338e+06 -3.938e+06\n",
+       "Dimensions without coordinates: tp\n",
+       "Data variables:\n",
+       "    band_data    (band, y, x) float32 420kB ...
" + ], + "text/plain": [ + " Size: 425kB\n", + "Dimensions: (band: 1, x: 316, y: 332, tp: 2)\n", + "Coordinates:\n", + " * band (band) int64 8B 1\n", + " * x (x) float64 3kB -3.938e+06 -3.912e+06 ... 3.912e+06 3.938e+06\n", + " * y (y) float64 3kB 4.338e+06 4.312e+06 ... -3.912e+06 -3.938e+06\n", + " spatial_ref int64 8B ...\n", + " interp int64 8B 0\n", + " x_indices (tp) int64 16B 0 316\n", + " y_indices (tp) int64 16B 0 332\n", + " xtp (tp) float64 16B -3.938e+06 3.938e+06\n", + " ytp (tp) float64 16B 4.338e+06 -3.938e+06\n", + "Dimensions without coordinates: tp\n", + "Data variables:\n", + " band_data (band, y, x) float32 420kB ..." + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "varname = \"band_data\"\n", + "newds = da.to_dataset().copy(deep=True)\n", + "newds[varname].attrs[\"coordinate_interpolation\"] = \"x: y: interp\"\n", + "newds.coords[\"interp\"] = (\n", + " (),\n", + " 0,\n", + " {\n", + " \"interpolation_name\": \"linear\",\n", + " \"tie_point_mapping\": \"x: x_indices xtp y: y_indices ytp\",\n", + " \"computational_precision\": 64,\n", + " },\n", + ")\n", + "newds.coords[\"x_indices\"] = (\"tp\", [0, xsize])\n", + "newds.coords[\"y_indices\"] = (\"tp\", [0, ysize])\n", + "\n", + "newds.coords[\"xtp\"] = (\n", + " \"tp\",\n", + " [xstart_center, xstop_center],\n", + " {\"standard_name\": \"projection_x_coordinate\", \"units\": \"m\"},\n", + ")\n", + "newds.coords[\"ytp\"] = (\n", + " \"tp\",\n", + " [ystart_center, ystop_center],\n", + " {\"standard_name\": \"projection_y_coordinate\", \"units\": \"m\"},\n", + ")\n", + "\n", + "newds" + ] + }, + { + "cell_type": "markdown", + "id": "47f75416-ae4e-46ab-b2e5-4bfbfc3441c3", + "metadata": {}, + "source": [ + "### Decode from CF to Affine" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "74cb7a87-ce7c-4177-97b3-58b19b8e4066", + "metadata": { + "jupyter": { + "source_hidden": true + } + }, + "outputs": [], + "source": [ + "def parse_mapping_string(input_str):\n", + " \"\"\"\n", + " Converts mapping strings to dictionaries. Supports mixed formats:\n", + " 1. \"x: y: linear\" -> shared value for multiple keys\n", + " 2. \"z: bilinear\" -> individual key-value pair\n", + " 3. \"x: y: linear z: bilinear\" -> combination of both\n", + " 4. \"x: x_indices xtp y: y_indices ytp\" -> multi-word values as tuples\n", + "\n", + " Args:\n", + " input_str (str): String with space-separated mapping expressions\n", + "\n", + " Returns:\n", + " dict: Dictionary mapping keys to values (strings or tuples)\n", + "\n", + " Examples:\n", + " >>> parse_mapping_string(\"x: y: linear z: bilinear\")\n", + " {'x': 'linear', 'y': 'linear', 'z': 'bilinear'}\n", + "\n", + " >>> parse_mapping_string(\"x: y: z: shared w: individual\")\n", + " {'x': 'shared', 'y': 'shared', 'z': 'shared', 'w': 'individual'}\n", + "\n", + " >>> parse_mapping_string(\"a: b: first c: d: e: second f: third\")\n", + " {'a': 'first', 'b': 'first', 'c': 'second', 'd': 'second', 'e': 'second', 'f': 'third'}\n", + "\n", + " >>> parse_mapping_string(\"mode: fast quality: high\")\n", + " {'mode': 'fast', 'quality': 'high'}\n", + "\n", + " >>> parse_mapping_string(\"x: x_indices xtp y: y_indices ytp\")\n", + " {'x': ('x_indices', 'xtp'), 'y': ('y_indices', 'ytp')}\n", + "\n", + " >>> parse_mapping_string(\"a: single b: multi word value\")\n", + " {'a': 'single', 'b': ('multi', 'word', 'value')}\n", + " \"\"\"\n", + " expressions = input_str.split()\n", + " result = {}\n", + " i = 0\n", + "\n", + " while i < len(expressions):\n", + " # Find the start of a key (word ending with ':')\n", + " if not expressions[i].endswith(\":\"):\n", + " i += 1\n", + " continue\n", + "\n", + " # Collect all consecutive keys (words ending with ':')\n", + " keys = []\n", + " while i < len(expressions) and expressions[i].endswith(\":\"):\n", + " keys.append(expressions[i].rstrip(\":\"))\n", + " i += 1\n", + "\n", + " # Collect value words until we hit the next key or end\n", + " value_parts = []\n", + " while i < len(expressions) and not expressions[i].endswith(\":\"):\n", + " value_parts.append(expressions[i])\n", + " i += 1\n", + "\n", + " # Create the mapping\n", + " if keys and value_parts:\n", + " # Convert to tuple if multiple words, otherwise keep as string\n", + " if len(value_parts) == 1:\n", + " value = value_parts[0]\n", + " else:\n", + " value = tuple(value_parts)\n", + "\n", + " for key in keys:\n", + " result[key] = value\n", + "\n", + " return result\n", + "\n", + "\n", + "def compute_explicit_coords(ds: xr.Dataset, variable: str):\n", + " coords = {}\n", + " for coord, methodvarname in parse_mapping_string(\n", + " ds[variable].attrs[\"coordinate_interpolation\"]\n", + " ).items():\n", + " methodvar = ds[methodvarname]\n", + " method = methodvar.attrs[\"interpolation_name\"]\n", + " indices, tiepoints = parse_mapping_string(methodvar.attrs[\"tie_point_mapping\"])[\n", + " coord\n", + " ]\n", + " precision = methodvar.attrs[\"computational_precision\"]\n", + "\n", + " if method == \"linear\":\n", + " coords[coord] = np.interp(\n", + " np.arange(ds.sizes[coord]), ds[indices].data, ds[tiepoints].data\n", + " )\n", + " else:\n", + " raise NotImplementedError\n", + " return coords" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "id": "cb3bd008-ed66-4896-bcd3-498dee91c902", + "metadata": { + "jupyter": { + "source_hidden": true + } + }, + "outputs": [], + "source": [ + "from dataclasses import dataclass\n", + "from typing import Hashable\n", + "\n", + "from affine import Affine\n", + "\n", + "\n", + "@dataclass\n", + "class AffineAxis:\n", + " offset: float\n", + " shear: float\n", + " spacing: float\n", + " size: int\n", + " dimname: Hashable\n", + "\n", + "\n", + "def compute_affine(ds: xr.Dataset, variable: str) -> tuple[Affine, int, int]:\n", + " # TODO: get rid of varname\n", + " coords = {}\n", + " current_dim = None\n", + " axes = {\"X\": None, \"Y\": None}\n", + "\n", + " for coord, methodvarname in parse_mapping_string(\n", + " ds[variable].attrs[\"coordinate_interpolation\"]\n", + " ).items():\n", + " methodvar = ds[methodvarname]\n", + " method = methodvar.attrs[\"interpolation_name\"]\n", + " indices, tiepoints = parse_mapping_string(methodvar.attrs[\"tie_point_mapping\"])[\n", + " coord\n", + " ]\n", + " precision = methodvar.attrs[\"computational_precision\"]\n", + "\n", + " # FIXME: support latitude, longitude also for geographic CRS\n", + " if (stdname := ds[tiepoints].attrs.get(\"standard_name\")) in [\n", + " \"projection_x_coordinate\",\n", + " \"projection_y_coordinate\",\n", + " ]:\n", + " if stdname == \"projection_x_coordinate\":\n", + " currentdim = \"X\"\n", + " else:\n", + " currentdim = \"Y\"\n", + " (dim,) = ds[tiepoints].dims\n", + " else:\n", + " raise ValueError(\n", + " \"Expected a standard_name attribute with either projection_x_coordinate or projection_y_coordinate\"\n", + " )\n", + "\n", + " not_possible = method != \"linear\" or (ds[tiepoints].data.size != 2)\n", + " if not_possible:\n", + " raise ValueError(\"Cannot be represented by an affine\")\n", + "\n", + " if currentdim is None:\n", + " raise ValueError(\"could not detect whether axis is 'X' or 'Y'\")\n", + "\n", + " tp = ds[tiepoints].data\n", + " axes[currentdim] = AffineAxis(\n", + " offset=tp[0].item(),\n", + " shear=0.0,\n", + " spacing=((tp[1] - tp[0]) / (ds.sizes[coord] - 1)).item(),\n", + " size=ds.sizes[dim],\n", + " dimname=dim,\n", + " )\n", + "\n", + " current_dim = None\n", + "\n", + " X, Y = axes[\"X\"], axes[\"Y\"]\n", + " return (\n", + " Affine(\n", + " X.spacing,\n", + " X.shear,\n", + " X.offset - X.spacing / 2,\n", + " Y.shear,\n", + " Y.spacing,\n", + " Y.offset - Y.spacing / 2,\n", + " ),\n", + " X.size,\n", + " Y.size,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "id": "e7b70729-b4f5-4fd7-bf7c-32d35f8c40b1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Affine(25000.0, 0.0, -3950000.0,\n", + " 0.0, -25000.0, 4350000.0)" + ] + }, + "execution_count": 84, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "aff, xsize, ysize = compute_affine(newds, \"band_data\")\n", + "aff" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "id": "d230dc0b-5e6d-45d6-ac7e-ae5ac110ce5f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Affine(25000.0, 0.0, -3950000.0,\n", + " 0.0, -25000.0, 4350000.0)" + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Affine.from_gdal(*map(float, geotransform.split(\" \")))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [conda env:xarray-release]", + "language": "python", + "name": "conda-env-xarray-release-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}