diff --git a/pixi.lock b/pixi.lock index a8e11bff..043f503f 100644 --- a/pixi.lock +++ b/pixi.lock @@ -18923,8 +18923,8 @@ packages: requires_python: '>=3.9' - pypi: ./ name: nlr-revrt - version: 0.7.0 - sha256: b47a59913cf2bb64f2bb96fecaa44a84be4eaf1e24708678a64086419f9e1def + version: 0.7.1 + sha256: 7669c1bc9a7802c7a31dda362899342b3d684b2364d420bb1c4e91c1272609ce requires_dist: - affine>=2.4.0,<3 - bokeh>=3.9.0,<4 diff --git a/pyproject.toml b/pyproject.toml index cf44bb3e..b30c1728 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "NLR-reVRt" -version = "0.7.0" +version = "0.7.1" description = "National Laboratory of the Rockies' (NLR's) Transmission for reV tool" readme = "README.rst" authors = [ diff --git a/revrt/costs/cli.py b/revrt/costs/cli.py index ae999705..0157d3cd 100644 --- a/revrt/costs/cli.py +++ b/revrt/costs/cli.py @@ -226,7 +226,7 @@ def build_routing_layer_file( # noqa: PLR0917, PLR0913 ), log_runtime("Building routing layers"), ): - _build_routing_layer_file( + out_layer_fp = _build_routing_layer_file( config, lock, validate_masks=validate_masks, @@ -237,6 +237,8 @@ def build_routing_layer_file( # noqa: PLR0917, PLR0913 if client is not None: close_dask_client(client) + return out_layer_fp + def _build_routing_layer_file( config, lock, validate_masks=False, create_kwargs=None @@ -262,6 +264,8 @@ def _build_routing_layer_file( if config.merge_friction_and_barriers is not None: _combine_friction_and_barriers(config, lf_handler, lock=lock) + return str(lf_handler.fp) + def _validated_config(**config_dict): """Validate use config inputs""" diff --git a/revrt/costs/layer_creator.py b/revrt/costs/layer_creator.py index 9bc4afe5..e8fa4d05 100644 --- a/revrt/costs/layer_creator.py +++ b/revrt/costs/layer_creator.py @@ -302,8 +302,12 @@ def _process_raster_layer(self, fname, config, tiff_chunks="file"): tiff_chunks=tiff_chunks, layer_dirs=[self.input_layer_dir, self.output_tiff_dir], band_index=0, + fillna=None, # filled below _after_ processing ) - return self._process_raster_data(data, config) + data = self._process_raster_data(data, config) + if config.na_fill is not None: + data = da.where(da.isnan(data), config.na_fill, data) + return data def _process_raster_data(self, data, config): """Create the desired layer from the data array""" @@ -389,6 +393,7 @@ def _process_vector_layer(self, fname, config, tiff_chunks="file"): tiff_chunks=tiff_chunks, layer_dirs=[self.output_tiff_dir], band_index=0, + fillna=config.na_fill, ) def _rasterize_vector_layer(self, fname, config, out_tiff): @@ -415,7 +420,7 @@ def _rasterize_vector_layer(self, fname, config, out_tiff): all_touched=config.rasterize.all_touched, tile_size=config.rasterize.tile_size, simply_before_rasterize=config.rasterize.simply_before_rasterize, - fill=config.rasterize.fill, + fill=config.na_fill, dtype=vector_dtype, **kwargs, ) @@ -427,9 +432,7 @@ def _rasterize_vector_layer(self, fname, config, out_tiff): logger.debug("Writing rasterized vector '%s' to '%s'", fname, out_tiff) save_data_using_layer_file_profile( - layer_fp=self._io_handler.fp, - data=temp, - geotiff=out_tiff, + layer_fp=self._io_handler.fp, data=temp, geotiff=out_tiff ) def _apply_mask(self, config, data): diff --git a/revrt/models/cost_layers.py b/revrt/models/cost_layers.py index e2e2f919..c50a3219 100644 --- a/revrt/models/cost_layers.py +++ b/revrt/models/cost_layers.py @@ -140,12 +140,6 @@ class Rasterize(BaseModel, extra="forbid"): rasterization. By default, ``False``. """ - fill: float | int | None = 0 - """Value used to fill raster cells not burned by vector - - If None, uses np.nan. By default, ``0``. - """ - class LayerBuildConfig(BaseModel, extra="forbid"): """Friction and barrier layers config model @@ -197,6 +191,9 @@ class LayerBuildConfig(BaseModel, extra="forbid"): inclusions are allowed. """ + na_fill: float | int | None = 0 + """Value to fill NA cells with after processing""" + def parse_barrier_values(barrier_values): """Parse barrier comparison text into an operator and threshold""" diff --git a/revrt/utilities/base.py b/revrt/utilities/base.py index ba0f08c3..58018d3a 100644 --- a/revrt/utilities/base.py +++ b/revrt/utilities/base.py @@ -25,8 +25,7 @@ logger = logging.getLogger(__name__) _NUM_GEOTIFF_DIMS = 3 # (band, y, x) -TRANSFORM_ATOL = 0.01 -"""Tolerance in transform comparison when checking GeoTIFFs""" +_TRANSFORM_ATOL = 0.01 def buffer_routes( @@ -247,7 +246,13 @@ def file_full_path(file_name, *layer_dirs): def load_data_using_layer_file_profile( - layer_fp, geotiff, tiff_chunks="file", layer_dirs=None, band_index=None + layer_fp, + geotiff, + tiff_chunks="file", + layer_dirs=None, + band_index=None, + check_tiff=True, + fillna=0, ): """Load GeoTIFF data, reprojecting to LayeredFile CRS if needed @@ -265,10 +270,22 @@ def load_data_using_layer_file_profile( Directories to search for `geotiff` in, if not found in current directory. By default, ``None``, which means only the current directory is searched. + check_tiff : bool, default=True + Whether to check that the GeoTIFF profile matches the template + defined by the layered file. If ``True``, the GeoTIFF profile is + compared to the template, and if they don't match, the GeoTIFF + is re-projected to match the template. If ``False``, no checks + are performed and the GeoTIFF is loaded as-is. + By default, ``True``. band_index : int, optional Optional index of band to load from the GeoTIFF. If provided, only that band will be returned. By default, ``None``, which means all bands will be returned. + fillna : int or float, optional + Value to fill NA cells with after loading the GeoTIFF. If + ``None``, NA cells will not be filled (and will typically be + represented as ``np.nan`` in the output array). + By default, ``0``. Returns ------- @@ -295,20 +312,24 @@ def load_data_using_layer_file_profile( "Using the following chunks to open '%s': %r", geotiff, tiff_chunks ) - tif = rioxarray.open_rasterio(geotiff, chunks=tiff_chunks) - try: - check_geotiff(layer_fp, geotiff, transform_atol=TRANSFORM_ATOL) - except revrtProfileCheckError: - logger.info( - "Profile of '%s' does not match template, reprojecting...", - geotiff, - ) - geo_box = odc.geo.geobox.GeoBox( - shape=(height, width), affine=transform, crs=crs - ) - tif = tif.odc.reproject( - how=geo_box, resampling=Resampling.nearest, INIT_DEST=0 - ) + tif = rioxarray.open_rasterio(geotiff, chunks=tiff_chunks, masked=True) + if check_tiff: + try: + check_geotiff(layer_fp, geotiff, transform_atol=_TRANSFORM_ATOL) + except revrtProfileCheckError: + logger.info( + "Profile of '%s' does not match template, reprojecting...", + geotiff, + ) + geo_box = odc.geo.geobox.GeoBox( + shape=(height, width), affine=transform, crs=crs + ) + tif = tif.astype("float32").odc.reproject( + how=geo_box, resampling=Resampling.nearest, dst_nodata=np.nan + ) + + if fillna is not None: + tif = tif.fillna(fillna) if band_index is not None: tif = tif.isel(band=band_index) diff --git a/revrt/utilities/handlers.py b/revrt/utilities/handlers.py index b9ea97c4..e4a3b9e0 100644 --- a/revrt/utilities/handlers.py +++ b/revrt/utilities/handlers.py @@ -24,12 +24,11 @@ revrtValueError, ) from revrt.utilities.base import ( - check_geotiff, delete_data_file, expand_dim_if_needed, transform_xy, save_data_array_to_geotiff, - TRANSFORM_ATOL, + load_data_using_layer_file_profile, ) from revrt.utilities.monitoring import log_mem, log_runtime @@ -529,14 +528,15 @@ def write_geotiff_to_file( geotiff, ) with log_runtime(f"Writing GeoTIFF to {geotiff}"): - if check_tiff: - logger.debug("\t- Checking %s against %s", geotiff, self.fp) - check_geotiff(self.fp, geotiff, transform_atol=TRANSFORM_ATOL) - - with rioxarray.open_rasterio(geotiff, chunks=tiff_chunks) as tif: - logger.debug( - "\t- Writing data from %s to %s", geotiff, self.fp - ) + tif = load_data_using_layer_file_profile( + self.fp, + geotiff, + tiff_chunks=tiff_chunks, + check_tiff=check_tiff, + fillna=None, + ) + logger.debug("\t- Writing data from %s to %s", geotiff, self.fp) + try: self.write_layer( tif, layer_name, @@ -544,6 +544,8 @@ def write_geotiff_to_file( overwrite=overwrite, nodata=nodata, ) + finally: + tif.close() def layer_to_geotiff( self, layer, geotiff, ds_chunks="auto", lock=None, **profile_kwargs diff --git a/tests/python/integration/test_utilties_handlers.py b/tests/python/integration/test_utilties_handlers.py index bc61c969..12827ffb 100644 --- a/tests/python/integration/test_utilties_handlers.py +++ b/tests/python/integration/test_utilties_handlers.py @@ -260,10 +260,10 @@ def test_geotiff_to_layer_file(tif, tmp_path, test_utility_data_dir): lf.write_geotiff_to_file(in_tiff_fp, layer) with ( - rioxarray.open_rasterio(in_tiff_fp) as truth_tif, + rioxarray.open_rasterio(in_tiff_fp, masked=True) as truth_tif, xr.open_dataset(test_fp, consolidated=False, engine="zarr") as ds, ): - assert np.allclose(ds[layer], truth_tif) + assert np.allclose(ds[layer], truth_tif, equal_nan=True) assert np.allclose( ds[layer].rio.transform(), truth_tif.rio.transform() ) @@ -293,12 +293,12 @@ def test_roundtrip(as_list, tmp_path, test_utility_data_dir): for layer, truth_fp in layer_names.items(): test_tiff_fp = tmp_path / f"{layer}.tif" with ( - rioxarray.open_rasterio(truth_fp) as truth_tif, - rioxarray.open_rasterio(test_tiff_fp) as test_tif, + rioxarray.open_rasterio(truth_fp, masked=True) as truth_tif, + rioxarray.open_rasterio(test_tiff_fp, masked=True) as test_tif, xr.open_dataset(test_fp, consolidated=False, engine="zarr") as ds, ): - assert np.allclose(ds[layer], truth_tif) - assert np.allclose(test_tif, truth_tif) + assert np.allclose(ds[layer], truth_tif, equal_nan=True) + assert np.allclose(test_tif, truth_tif, equal_nan=True) assert np.allclose( ds[layer].rio.transform(), truth_tif.rio.transform() ) @@ -358,14 +358,14 @@ def test_roundtrip_cli(cli_runner, tmp_path, test_utility_data_dir, as_list): for layer, truth_fp in layer_names.items(): test_tiff_fp = tmp_path / f"{layer}.tif" with ( - rioxarray.open_rasterio(truth_fp) as truth_tif, - rioxarray.open_rasterio(test_tiff_fp) as test_tif, + rioxarray.open_rasterio(truth_fp, masked=True) as truth_tif, + rioxarray.open_rasterio(test_tiff_fp, masked=True) as test_tif, xr.open_dataset( out_file_fp, consolidated=False, engine="zarr" ) as ds, ): - assert np.allclose(ds[layer], truth_tif) - assert np.allclose(test_tif, truth_tif) + assert np.allclose(ds[layer], truth_tif, equal_nan=True) + assert np.allclose(test_tif, truth_tif, equal_nan=True) assert np.allclose( ds[layer].rio.transform(), truth_tif.rio.transform() ) diff --git a/tests/python/unit/costs/test_costs_layer_creator.py b/tests/python/unit/costs/test_costs_layer_creator.py index 683687f3..b6d68e19 100644 --- a/tests/python/unit/costs/test_costs_layer_creator.py +++ b/tests/python/unit/costs/test_costs_layer_creator.py @@ -10,6 +10,7 @@ import rasterio import numpy as np import xarray as xr +import dask.array as da import geopandas as gpd from shapely.geometry import box from rasterio.transform import Affine, from_origin @@ -428,6 +429,108 @@ def test_pass_through(builder_instance): assert (result == data).all() +def test_process_raster_layer_fills_na_for_lazy_dask_output( + tmp_path, mask_instance +): + """Test NA fill for pass-through rasters that become Dask arrays""" + layer_dir = tmp_path / "layers" + layer_dir.mkdir(parents=True, exist_ok=True) + + data = xr.DataArray( + np.array( + [[np.nan, 1, 2], [np.nan, 3, 4], [np.nan, 5, 6]], + dtype=np.float32, + ), + dims=("y", "x"), + coords={ + "x": 10 + np.arange(3) + 0.5, + "y": 10 - np.arange(3) - 0.5, + }, + name="test_band", + ) + data = data.rio.write_crs("EPSG:4326") + data.rio.write_transform(from_origin(10, 10, 1, 1)) + + layer_fp = layer_dir / "nan_pass_through.tif" + data.rio.to_raster(layer_fp, driver="GTiff") + + lf = LayeredFile(tmp_path / "test.zarr") + lf.create_new(layer_fp) + + builder = LayerCreator( + lf, + mask_instance, + input_layer_dir=layer_dir, + output_tiff_dir=tmp_path / "out", + ) + config = LayerBuildConfig(extent="wet", pass_through=True, na_fill=7) + + result = builder._process_raster_layer(layer_fp.name, config) + + assert isinstance(result, da.Array) + assert np.array_equal( + result.compute(), + np.array([[7, 0, 0], [7, 0, 0], [7, 0, 0]], dtype=np.float32), + ) + + +def test_process_raster_layer_fills_large_negative_nodata( + tmp_path, mask_instance +): + """Test NA fill when GeoTIFF nodata is a large negative sentinel""" + layer_dir = tmp_path / "layers" + layer_dir.mkdir(parents=True, exist_ok=True) + + nodata_value = np.float32(-3.4028235e38) + raster_data = np.array( + [ + [nodata_value, 1, 2], + [nodata_value, 3, 4], + [nodata_value, 5, 6], + ], + dtype=np.float32, + ) + layer_fp = layer_dir / "large_negative_nodata.tif" + + with rasterio.open( + layer_fp, + "w", + driver="GTiff", + height=raster_data.shape[0], + width=raster_data.shape[1], + count=1, + dtype=raster_data.dtype, + crs="EPSG:4326", + transform=from_origin(10, 10, 1, 1), + nodata=float(nodata_value), + ) as src: + src.write(raster_data, 1) + + with rasterio.open(layer_fp) as src: + loaded = src.read(1) + + assert loaded[0, 0] == nodata_value + + lf = LayeredFile(tmp_path / "test.zarr") + lf.create_new(layer_fp) + + builder = LayerCreator( + lf, + mask_instance, + input_layer_dir=layer_dir, + output_tiff_dir=tmp_path / "out", + ) + config = LayerBuildConfig(extent="wet", pass_through=True, na_fill=7) + + result = builder._process_raster_layer(layer_fp.name, config) + + assert isinstance(result, da.Array) + assert np.array_equal( + result.compute(), + np.array([[7, 0, 0], [7, 0, 0], [7, 0, 0]], dtype=np.float32), + ) + + def test_bin_config_sanity_checking(builder_instance, tiff_layers_for_testing): """Test cost binning config sanity checking""" __, layers = tiff_layers_for_testing @@ -768,6 +871,59 @@ def test_rasterizing_shape_file_uses_column_values(tmp_path, mask_instance): ), f"{np.array(ds.values)}" +def test_rasterizing_shape_file_uses_na_fill_for_background( + tmp_path, mask_instance +): + """Rasterize vector features using na_fill for untouched cells""" + fn = "test_background_fill.gpkg" + vector_file = tmp_path / fn + tiff_fp = tmp_path / "friction.tif" + + template_tiff = tmp_path / "template.tif" + crs = "ESRI:102008" + transform = Affine(5.0, 0.0, -12.5, 0.0, -5.0, 12.5) + width = height = 3 + x0, y0 = -12.5, 12.5 + + data = xr.DataArray( + np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]]), + dims=("y", "x"), + coords={ + "x": x0 + np.arange(width) * 5 + 2.5, + "y": y0 - np.arange(height) * 5 - 2.5, + }, + name="test_band", + ) + data = data.rio.write_crs(crs) + data.rio.write_transform(transform) + data.rio.to_raster(template_tiff, driver="GTiff") + + gpd.GeoDataFrame( + geometry=[box(-2.4, -2.4, 2.4, 12.4)], + crs=crs, + ).to_file(vector_file, driver="GPKG") + + lf = LayeredFile(tmp_path / "test.zarr").create_new(template_tiff) + builder = LayerCreator( + lf, mask_instance, input_layer_dir=tmp_path, output_tiff_dir=tmp_path + ) + config = { + fn: LayerBuildConfig( + extent="all", + rasterize=Rasterize(value=1000, reproject=False), + na_fill=7, + ) + } + + builder.build("friction", config, write_to_file=False) + + with rioxarray.open_rasterio(tiff_fp, chunks="auto") as ds: + assert np.allclose( + np.array([[[7, 7, 1000], [7, 7, 1000], [7, 7, 1000]]]), + ds, + ), f"{np.array(ds.values)}" + + def test_rasterizing_shape_file_rejects_missing_value_column( tmp_path, mask_instance ): diff --git a/tests/python/unit/utilities/test_utilities_handlers.py b/tests/python/unit/utilities/test_utilities_handlers.py index f225a132..a85297b5 100644 --- a/tests/python/unit/utilities/test_utilities_handlers.py +++ b/tests/python/unit/utilities/test_utilities_handlers.py @@ -757,6 +757,51 @@ def test_load_data_using_layer_file_profile( test_tif_2.close() +def test_load_data_using_layer_file_profile_fillna( + sample_tiff_fp, sample_tiff_props, tmp_path +): + """Test loading GeoTIFF data with masked nodata cells filled""" + x0, y0, width, height, cell_size, transform = sample_tiff_props + nodata_value = np.float32(-3.4028235e38) + raster_data = np.arange(width * height, dtype=np.float32).reshape( + (height, width) + ) + raster_data[0, 0] = nodata_value + raster_data[1, 1] = nodata_value + + geotiff = tmp_path / "nodata_fill.tif" + data = xr.DataArray( + raster_data, + dims=("y", "x"), + coords={ + "x": x0 + np.arange(width) * cell_size + cell_size / 2, + "y": y0 - np.arange(height) * cell_size - cell_size / 2, + }, + name="test_band", + ) + data = data.rio.write_crs("EPSG:4326") + data = data.rio.write_transform(transform) + data = data.rio.write_nodata(nodata_value) + data.rio.to_raster(geotiff, driver="GTiff") + + test_fp = tmp_path / "test.zarr" + LayeredFile(test_fp).write_geotiff_to_file(sample_tiff_fp, "test_layer") + + unfilled = load_data_using_layer_file_profile( + test_fp, geotiff, fillna=None + ) + filled = load_data_using_layer_file_profile(test_fp, geotiff, fillna=7) + + assert np.isnan(unfilled.to_numpy()[0, 0, 0]) + assert np.isnan(unfilled.to_numpy()[0, 1, 1]) + assert filled.to_numpy()[0, 0, 0] == 7 + assert filled.to_numpy()[0, 1, 1] == 7 + assert filled.to_numpy()[0, 0, 1] == raster_data[0, 1] + + unfilled.close() + filled.close() + + @pytest.mark.parametrize("in_layer_dir", [True, False]) @pytest.mark.parametrize("band", [None, 0]) def test_load_data_using_file_full_path(