diff --git a/.gitignore b/.gitignore index 2e6a4aa..a111799 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ dist conda* .conda* .spyproject/ +.vscode/ \ No newline at end of file diff --git a/README.md b/README.md index c7fdd3c..8bb3f86 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ Currently this tool supports the following DGGSs: - [H3](https://h3geo.org/) - [rHEALPix](https://datastore.landcareresearch.co.nz/dataset/rhealpix-discrete-global-grid-system) - [S2](https://s2geometry.io/) +- [A5](https://a5geo.org/) ... and the following geocode systems: @@ -164,6 +165,12 @@ Or without activating the shell: poetry run python tests/test_vector2dggs.py ``` +To test a specific DGGS: + +```bash +python -m pytest tests/test_runthrough.py -k "a5" -v +``` + Test data are included at `tests/data/`. ## Example commands diff --git a/poetry.lock b/poetry.lock index 93e1540..ca68623 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. [[package]] name = "backports-tarfile" @@ -1787,6 +1787,22 @@ files = [ {file = "psycopg2-2.9.12.tar.gz", hash = "sha256:1dedb1c7a1d8552c4a6044c6b1c41a52e6a8e2d144af83eccac758076b1b7c15"}, ] +[[package]] +name = "pya5" +version = "0.8.0" +description = "A5 - Global Pentagonal Geospatial Index" +optional = true +python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"a5\" or extra == \"all\"" +files = [ + {file = "pya5-0.8.0-py3-none-any.whl", hash = "sha256:a3c5df400f861ccfb4d69086246f54c95d34dc8a0500b59923666aca155d15c4"}, + {file = "pya5-0.8.0.tar.gz", hash = "sha256:2dbdf7efe300f151d8795d0a95bee5cc6d6b32d9fd46c4284815d316e4233b8f"}, +] + +[package.extras] +test = ["pytest (>=7.0.0)"] + [[package]] name = "pyarrow" version = "20.0.0" @@ -2816,7 +2832,8 @@ test = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more_it type = ["pytest-mypy"] [extras] -all = ["h3", "python-geohash", "rhealpixdggs", "rhppandas", "rusty-polygon-geohasher", "s2geometry"] +a5 = ["pya5"] +all = ["h3", "pya5", "python-geohash", "rhealpixdggs", "rhppandas", "rusty-polygon-geohasher", "s2geometry"] geohash = ["python-geohash", "rusty-polygon-geohasher"] h3 = ["h3"] rhp = ["rhealpixdggs", "rhppandas"] @@ -2825,4 +2842,4 @@ s2 = ["s2geometry"] [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "f05a43de1b7e396b32e55ae07ef59414a002b48e94ee96f34635f3286b402b52" +content-hash = "169154507cb2e135ad7b6ec485adc54c7b1ac832f710f49648810637fbc66d71" diff --git a/pyproject.toml b/pyproject.toml index 951bcab..22adbe7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ rhealpixdggs = { version = "^0.5.12", optional = true} s2geometry = { version = "^0.9.0", optional = true} rusty-polygon-geohasher = { version = "^0.2.3", optional = true} python-geohash = { version = "^0.8.5", optional = true} +pya5 = { version = "^0.8.0", optional = true } urllib3 = "^2.6.3" cryptography = "^46.0.5" @@ -58,6 +59,7 @@ line-length = 88 h3 = ["h3"] rhp = ["rhppandas", "rhealpixdggs"] s2 = ["s2geometry"] +a5 = ["pya5"] geohash = ["rusty-polygon-geohasher", "python-geohash"] # Convenience extras -all = ["h3", "rhppandas", "rhealpixdggs", "s2geometry", "rusty-polygon-geohasher", "python-geohash"] \ No newline at end of file +all = ["h3", "rhppandas", "rhealpixdggs", "s2geometry", "rusty-polygon-geohasher", "python-geohash", "pya5"] \ No newline at end of file diff --git a/tests/classes/a5.py b/tests/classes/a5.py new file mode 100644 index 0000000..f2b89eb --- /dev/null +++ b/tests/classes/a5.py @@ -0,0 +1,215 @@ +from .base import TestRunthrough +from ..data.datapaths import * + +from vector2dggs.a5 import a5 + + +class TestA5(TestRunthrough): + """ + Sends the test data file through A5 indexing using default parameters. + """ + + def test_a5_run(self): + try: + a5( + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "17", + ], + standalone_mode=False, + ) + + except Exception: + self.fail(f"A5 runthrough failed.") + + def test_a5_run_overwrite(self): + try: + a5( + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "17", + ], + standalone_mode=False, + ) + a5( + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "17", + "-o", + ], + standalone_mode=False, + ) + + except Exception: + self.fail(f"A5 runthrough with overwrite failed.") + + def test_a5_cut_crs(self): + try: + a5( + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "17", + "-crs", + "3793", + "-c", + "4000", + ], + standalone_mode=False, + ) + + except Exception: + self.fail("A5 run through using actual CRS failed") + + def test_a5_cut_crs_reproject(self): + try: + a5( + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "17", + "-crs", + "4326", + "-c", + "0.005", + ], + standalone_mode=False, + ) + except Exception: + self.fail("A5 run through with reprojected CRS failed") + + def test_a5_no_bisection(self): + try: + a5( + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "17", + "-c", + "0", + ], + standalone_mode=False, + ) + except Exception: + self.fail("A5 run through without bisection failed") + + def test_a5_compaction(self): + try: + a5( + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "17", + "-co", + "-id", + "LCDB_UID", + ], + standalone_mode=False, + ) + + except Exception: + self.fail(f"A5 runthrough with compaction failed.") + + def test_a5_geo_point(self): + try: + a5( + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "17", + "--geo", + "point", + ], + standalone_mode=False, + ) + except Exception: + self.fail("A5 run through with --geo point failed") + + def test_a5_geo_point_compact(self): + try: + a5( + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "17", + "--geo", + "point", + "-co", + "-id", + "LCDB_UID", + "-o", + ], + standalone_mode=False, + ) + except Exception: + self.fail("A5 run through with --geo point -co failed") + + def test_a5_geo_polygon(self): + try: + a5( + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "17", + "--geo", + "polygon", + ], + standalone_mode=False, + ) + except Exception: + self.fail("A5 run through with --geo polygon failed") + + def test_a5_geo_polygon_compact(self): + try: + a5( + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "17", + "--geo", + "polygon", + "-co", + "-id", + "LCDB_UID", + "-o", + ], + standalone_mode=False, + ) + except Exception: + self.fail("A5 run through with --geo polygon -co failed") diff --git a/tests/classes/geohash.py b/tests/classes/geohash.py index ea8f02a..4d374ea 100644 --- a/tests/classes/geohash.py +++ b/tests/classes/geohash.py @@ -12,7 +12,14 @@ class TestGeohash(TestRunthrough): def test_geohash_run(self): try: geohash( - [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "6"], + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "6", + ], standalone_mode=False, ) @@ -22,11 +29,26 @@ def test_geohash_run(self): def test_geohash_run_overwrite(self): try: geohash( - [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "6"], + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "6", + ], standalone_mode=False, ) geohash( - [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "6", "-o"], + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "6", + "-o", + ], standalone_mode=False, ) @@ -36,7 +58,16 @@ def test_geohash_run_overwrite(self): def test_geohash_cut_crs(self): try: geohash( - [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "6", "-crs", "3793"], + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "6", + "-crs", + "3793", + ], standalone_mode=False, ) @@ -49,6 +80,8 @@ def test_geohash_cut_crs_reproject(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "6", "-crs", @@ -67,6 +100,8 @@ def test_geohash_compaction(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "6", "-co", @@ -85,6 +120,8 @@ def test_geohash_geo_point(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "6", "--geo", @@ -101,6 +138,8 @@ def test_geohash_geo_point_compact(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "6", "--geo", @@ -121,6 +160,8 @@ def test_geohash_geo_polygon(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "6", "--geo", @@ -137,6 +178,8 @@ def test_geohash_geo_polygon_compact(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "6", "--geo", diff --git a/tests/classes/h3.py b/tests/classes/h3.py index be12b70..36311bb 100644 --- a/tests/classes/h3.py +++ b/tests/classes/h3.py @@ -12,7 +12,14 @@ class TestH3(TestRunthrough): def test_h3_run(self): try: h3( - [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "8"], + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "8", + ], standalone_mode=False, ) @@ -22,11 +29,26 @@ def test_h3_run(self): def test_h3_run_overwrite(self): try: h3( - [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "8"], + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "8", + ], standalone_mode=False, ) h3( - [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "8", "-o"], + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "8", + "-o", + ], standalone_mode=False, ) @@ -39,6 +61,8 @@ def test_h3_cut_crs(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "8", "-crs", @@ -58,6 +82,8 @@ def test_h3_cut_crs_reproject(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "8", "-crs", @@ -76,6 +102,8 @@ def test_h3_no_bisection(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "8", "-c", @@ -92,6 +120,8 @@ def test_h3_compaction(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "8", "-co", @@ -110,6 +140,8 @@ def test_h3_geo_point(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "8", "--geo", @@ -127,6 +159,8 @@ def test_h3_geo_point_compact(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "8", "--geo", @@ -147,6 +181,8 @@ def test_h3_geo_polygon(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "8", "--geo", @@ -164,6 +200,8 @@ def test_h3_geo_polygon_compact(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "8", "--geo", diff --git a/tests/classes/rHP.py b/tests/classes/rHP.py index 348fb11..2187d9d 100644 --- a/tests/classes/rHP.py +++ b/tests/classes/rHP.py @@ -12,7 +12,14 @@ class TestRHP(TestRunthrough): def test_rhp_run(self): try: rhp( - [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "8"], + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "8", + ], standalone_mode=False, ) @@ -22,11 +29,26 @@ def test_rhp_run(self): def test_rhp_run_overwrite(self): try: rhp( - [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "8"], + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "8", + ], standalone_mode=False, ) rhp( - [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "8", "-o"], + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "8", + "-o", + ], standalone_mode=False, ) @@ -39,6 +61,8 @@ def test_rhp_cut_crs(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "8", "-crs", @@ -56,6 +80,8 @@ def test_rhp_cut_crs_reproject(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "8", "-crs", @@ -74,6 +100,8 @@ def test_rhp_compaction(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "8", "-co", @@ -92,6 +120,8 @@ def test_rhp_geo_point(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "8", "--geo", @@ -108,6 +138,8 @@ def test_rhp_geo_point_compact(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "8", "--geo", @@ -128,6 +160,8 @@ def test_rhp_geo_polygon(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "8", "--geo", @@ -144,6 +178,8 @@ def test_rhp_geo_polygon_compact(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "8", "--geo", diff --git a/tests/classes/s2.py b/tests/classes/s2.py index 06fd59c..59ae587 100644 --- a/tests/classes/s2.py +++ b/tests/classes/s2.py @@ -12,7 +12,14 @@ class TestS2(TestRunthrough): def test_s2_run(self): try: s2( - [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "13"], + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "13", + ], standalone_mode=False, ) @@ -22,11 +29,26 @@ def test_s2_run(self): def test_s2_run_overwrite(self): try: s2( - [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "13"], + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "13", + ], standalone_mode=False, ) s2( - [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "13", "-o"], + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "13", + "-o", + ], standalone_mode=False, ) @@ -39,6 +61,8 @@ def test_s2_cut_crs(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "13", "-crs", @@ -58,6 +82,8 @@ def test_s2_cut_crs_reproject(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "13", "-crs", @@ -76,6 +102,8 @@ def test_s2_no_bisection(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "13", "-c", @@ -92,6 +120,8 @@ def test_s2_compaction(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "13", "-co", @@ -110,6 +140,8 @@ def test_s2_geo_point(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "13", "--geo", @@ -126,6 +158,8 @@ def test_s2_geo_point_compact(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "13", "--geo", @@ -146,6 +180,8 @@ def test_s2_geo_polygon(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "13", "--geo", @@ -162,6 +198,8 @@ def test_s2_geo_polygon_compact(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "13", "--geo", diff --git a/tests/data/datapaths.py b/tests/data/datapaths.py index 0152124..34b04af 100644 --- a/tests/data/datapaths.py +++ b/tests/data/datapaths.py @@ -7,4 +7,5 @@ DATA_DIR = Path(__file__).resolve().parent TEST_FILE_PATH = str(DATA_DIR / "se-island.gpkg") +TEST_LAYER_NAME = "se_island" TEST_OUTPUT_PATH = DATA_DIR / "output" diff --git a/tests/data/se-island.gpkg b/tests/data/se-island.gpkg index 74b24c1..a158313 100644 Binary files a/tests/data/se-island.gpkg and b/tests/data/se-island.gpkg differ diff --git a/tests/test_runthrough.py b/tests/test_runthrough.py index 33ac6af..a87c0f4 100644 --- a/tests/test_runthrough.py +++ b/tests/test_runthrough.py @@ -2,6 +2,7 @@ @author: ndemaio """ +from .classes.a5 import TestA5 from .classes.h3 import TestH3 from .classes.rHP import TestRHP from .classes.s2 import TestS2 diff --git a/tests/test_vector2dggs.py b/tests/test_vector2dggs.py index aa17d3f..b8d83aa 100644 --- a/tests/test_vector2dggs.py +++ b/tests/test_vector2dggs.py @@ -2,12 +2,14 @@ @author: ndemaio """ +import sys import unittest from pathlib import Path if __name__ == "__main__": tests_dir = Path(__file__).resolve().parent top_level_dir = tests_dir.parent + sys.path.insert(0, str(top_level_dir)) testSuite = unittest.defaultTestLoader.discover( start_dir=str(tests_dir), top_level_dir=str(top_level_dir), diff --git a/vector2dggs/a5.py b/vector2dggs/a5.py new file mode 100644 index 0000000..82a884d --- /dev/null +++ b/vector2dggs/a5.py @@ -0,0 +1,206 @@ +import click +import click_log +import tempfile +import pyproj + +from typing import Union +from pathlib import Path + +import vector2dggs.constants as const +import vector2dggs.common as common + +from vector2dggs import __version__ + + +@click.command(context_settings={"show_default": True}) +@click_log.simple_verbosity_option(common.LOGGER) +@click.argument("vector_input", required=True, type=click.Path(), nargs=1) +@click.argument("output_directory", required=True, type=click.Path(), nargs=1) +@click.option( + "-r", + "--resolution", + required=True, + type=click.Choice(list(map(str, range(const.MIN_A5, const.MAX_A5 + 1)))), + help="A5 resolution to index", + nargs=1, +) +@click.option( + "-pr", + "--parent_res", + required=False, + type=click.Choice(list(map(str, range(const.MIN_A5, const.MAX_A5 + 1)))), + help="A5 parent resolution for the output partition. Defaults to resolution - 6", +) +@click.option( + "-id", + "--id_field", + required=False, + default=const.DEFAULTS["id"], + type=str, + help="Field to use as an ID; defaults to a constructed single 0...n index on the original feature order.", + nargs=1, +) +@click.option( + "-k", + "--keep_attributes", + is_flag=True, + show_default=True, + default=const.DEFAULTS["k"], + help="Retain attributes in output. The default is to create an output that only includes A5 cell ID and the ID given by the -id field (or the default index ID).", +) +@click.option( + "-ch", + "--chunksize", + required=True, + type=int, + default=const.DEFAULTS["ch"], + help="The number of rows per index partition to use when spatially partioning. Adjusting this number will trade off memory use and time.", + nargs=1, +) +@click.option( + "-s", + "--spatial_sorting", + type=click.Choice(const.SPATIAL_SORTING_METHODS), + default=const.DEFAULTS["s"], + help="Spatial sorting method when perfoming spatial partitioning.", +) +@click.option( + "-crs", + "--cut_crs", + required=False, + default=const.DEFAULTS["crs"], + type=int, + help="Set the coordinate reference system (CRS) used for cutting large geometries (see `--cut_threshold`). Defaults to the same CRS as the input. Should be a valid EPSG code.", + nargs=1, +) +@click.option( + "-c", + "--cut_threshold", + required=False, + default=const.DEFAULTS["c"], + type=float, + help="Cutting up large geometries into smaller geometries based on a target area. Units are assumed to match the input CRS units unless the `--cut_crs` is also given, in which case units match the units of the supplied CRS. If left unspecified, the threshold will be the maximum area of a cell at the parent resolution, in square metres or feet according to the CRS. A threshold of 0 will skip bissection entirely (effectively ignoring --cut_crs).", + nargs=1, +) +@click.option( + "-t", + "--threads", + required=False, + default=const.DEFAULTS["t"], + type=int, + help="Amount of threads used for operation", + nargs=1, +) +@click.option( + "-cp", + "--compression", + required=False, + default=const.DEFAULTS["cp"], + type=str, + help="Compression method to use for the output Parquet files. Options include 'snappy', 'gzip', 'brotli', 'lz4', 'zstd', etc. Use 'none' for no compression.", + nargs=1, +) +@click.option( + "-lyr", + "--layer", + required=False, + default=const.DEFAULTS["lyr"], + type=str, + help="Name of the layer or table to read when using an input that supports layers or tables", + nargs=1, +) +@click.option( + "-g", + "--geom_col", + required=False, + default=const.DEFAULTS["g"], + type=str, + help="Column name to use when using a spatial database connection as input", + nargs=1, +) +@click.option( + "--geo", + required=False, + default=const.DEFAULTS["geo"], + type=click.Choice(const.GEOM_TYPES), + help="Select geometry encoding for the output: 'none' for regular Parquet (no GeoParquet metadata), or 'point'/'polygon' to write GeoParquet (v1.1.0) with the corresponding geometry type.", + nargs=1, +) +@click.option( + "--tempdir", + default=const.DEFAULTS["tempdir"], + type=click.Path(), + help="Temporary data is created during the execution of this program. This parameter allows you to control where this data will be written.", +) +@click.option( + "-co", + "--compact", + is_flag=True, + help="Compact the A5 cells up to the parent resolution. Compaction requires an id_field.", +) +@click.option("-o", "--overwrite", is_flag=True) +@click.version_option(version=__version__) +def a5( + vector_input: Union[str, Path], + output_directory: Union[str, Path], + resolution: str, + parent_res: str, + id_field: str, + keep_attributes: bool, + chunksize: int, + spatial_sorting: str, + cut_crs: int, + cut_threshold: int, + threads: int, + compression: str, + layer: str, + geom_col: str, + geo: str, + tempdir: Union[str, Path], + compact: bool, + overwrite: bool, +): + """ + Ingest a vector dataset and index it to the A5 DGGS. + + VECTOR_INPUT is the path to input vector geospatial data. + OUTPUT_DIRECTORY should be a directory, not a file or database table, as it will instead be the write location for an Apache Parquet data store. + """ + tempfile.tempdir = tempdir if tempdir is not None else tempfile.tempdir + + common.check_resolutions(resolution, parent_res) + common.check_compaction_requirements(compact, id_field) + + spatial_sorting = const.SpatialSortingMethod(spatial_sorting).value + geo = const.GeoOutputMode(geo).value + + con, vector_input = common.db_conn_and_input_path(vector_input) + output_directory = common.resolve_output_path(output_directory, overwrite) + + if cut_crs is not None: + cut_crs = pyproj.CRS.from_user_input(cut_crs) + + try: + common.index( + "a5", + vector_input, + output_directory, + int(resolution), + parent_res, + keep_attributes, + chunksize, + spatial_sorting, + cut_threshold, + threads, + compression=compression, + cut_crs=cut_crs, + id_field=id_field, + con=con, + layer=layer, + geom_col=geom_col, + geo=geo, + overwrite=overwrite, + compact=compact, + ) + except: + raise diff --git a/vector2dggs/cli.py b/vector2dggs/cli.py index de2ea1a..07ccf7b 100644 --- a/vector2dggs/cli.py +++ b/vector2dggs/cli.py @@ -1,6 +1,7 @@ import click from vector2dggs import __version__ +from vector2dggs.a5 import a5 from vector2dggs.h3 import h3 from vector2dggs.rHP import rhp from vector2dggs.s2 import s2 @@ -21,6 +22,7 @@ def cli(): pass +cli.add_command(a5) cli.add_command(h3) cli.add_command(rhp) cli.add_command(s2) diff --git a/vector2dggs/common.py b/vector2dggs/common.py index 1e6705f..b7f82e4 100644 --- a/vector2dggs/common.py +++ b/vector2dggs/common.py @@ -562,7 +562,9 @@ def index( # Database connection with con.connect() as connection: parts = layer.rsplit(".", 1) - schema, tbl_name = (parts[0], parts[1]) if len(parts) == 2 else (None, parts[0]) + schema, tbl_name = ( + (parts[0], parts[1]) if len(parts) == 2 else (None, parts[0]) + ) tbl = sqlalchemy.table(tbl_name, schema=schema) if keep_attributes: @@ -574,9 +576,9 @@ def index( else: stmt = sqlalchemy.select(sqlalchemy.column(geom_col)).select_from(tbl) - df = gpd.read_postgis( - stmt, connection, geom_col=geom_col - ).rename_geometry("geometry") + df = gpd.read_postgis(stmt, connection, geom_col=geom_col).rename_geometry( + "geometry" + ) else: # Read file df = gpd.read_file(input_file, layer=layer) @@ -693,6 +695,13 @@ def index( LOGGER.error(f"Task failed with {e}") raise (e) + if not any(Path(tmpdir2).glob("*.parquet")): + LOGGER.warning( + "No features were indexed (resolution %s may be too coarse for the input). Nothing to write; exiting.", + resolution, + ) + return output_directory + parent_partitioning( indexer, Path(tmpdir2), diff --git a/vector2dggs/constants.py b/vector2dggs/constants.py index 3f7c243..b52b0cc 100644 --- a/vector2dggs/constants.py +++ b/vector2dggs/constants.py @@ -7,6 +7,7 @@ MIN_RHP, MAX_RHP = 0, 15 MIN_S2, MAX_S2 = 0, 30 MIN_GEOHASH, MAX_GEOHASH = 1, 12 +MIN_A5, MAX_A5 = 0, 30 @unique @@ -36,6 +37,7 @@ class GeoOutputMode(StrEnum): MIN_GEOHASH, (resolution - DEFAULT_PARENT_OFFSET) ), "s2": lambda resolution: max(MIN_S2, (resolution - DEFAULT_PARENT_OFFSET)), + "a5": lambda resolution: max(MIN_A5, (resolution - DEFAULT_PARENT_OFFSET)), } DEFAULT_PARENT_OFFSET = 6 @@ -114,6 +116,41 @@ class GeoOutputMode(StrEnum): 15: 4.864277038533613 * 1e-7, } +# https://github.com/felixpalmer/a5 +A5_CELL_AREA_M2_BY_LEVEL = { + 0: 42505468731619.93, + 1: 8501093746323.985, + 2: 2125273436580.9963, + 3: 531318359145.2491, + 4: 132829589786.31227, + 5: 33207397446.578068, + 6: 8301849361.644517, + 7: 2075462340.4111292, + 8: 518865585.1027823, + 9: 129716396.27569558, + 10: 32429099.068923894, + 11: 8107274.767230974, + 12: 2026818.6918077434, + 13: 506704.67295193585, + 14: 126676.16823798396, + 15: 31669.04205949599, + 16: 7917.260514873998, + 17: 1979.3151287184994, + 18: 494.82878217962485, + 19: 123.70719554490621, + 20: 30.926798886226553, + 21: 7.731699721556638, + 22: 1.9329249303891596, + 23: 0.4832312325972899, + 24: 0.12080780814932247, + 25: 0.03020195203733062, + 26: 0.007550488009332655, + 27: 0.0018876220023331637, + 28: 0.0004719055005832909, + 29: 0.00011797637514582273, + 30: 2.9494093786455682e-05, +} + # https://www.movable-type.co.uk/scripts/geohash.html GEOHASH_MAX_CELL_AREA_KM2_BY_LEVEL = { 1: 5000 * 5000, @@ -135,6 +172,7 @@ class GeoOutputMode(StrEnum): "h3": lambda res: H3_CELLS_MAX_AREA_KM2_BY_LEVEL[res] * 1e6, "rhp": lambda res: RHP_CELLS_AREA_KM2_BY_LEVEL[res] * 1e6, "geohash": lambda res: GEOHASH_MAX_CELL_AREA_KM2_BY_LEVEL[res] * 1e6, + "a5": lambda res: A5_CELL_AREA_M2_BY_LEVEL[res], } DEFAULT_AREA_THRESHOLD_M2 = lambda dggs, parent_res: DGGS_CELL_AREA_M2_BY_RES[dggs]( diff --git a/vector2dggs/indexerfactory.py b/vector2dggs/indexerfactory.py index b124f97..fa51f45 100644 --- a/vector2dggs/indexerfactory.py +++ b/vector2dggs/indexerfactory.py @@ -16,6 +16,7 @@ "geohash", ), "s2": ("vector2dggs.indexers.s2vectorindexer", "S2VectorIndexer", "s2"), + "a5": ("vector2dggs.indexers.a5vectorindexer", "A5VectorIndexer", "a5"), } diff --git a/vector2dggs/indexers/a5vectorindexer.py b/vector2dggs/indexers/a5vectorindexer.py new file mode 100644 index 0000000..8d4ff9d --- /dev/null +++ b/vector2dggs/indexers/a5vectorindexer.py @@ -0,0 +1,113 @@ +import a5 +import pandas as pd +import geopandas as gpd +from shapely.geometry import Point, Polygon + +from vector2dggs.indexers.vectorindexer import VectorIndexer + + +class A5VectorIndexer(VectorIndexer): + """ + Provides integration for the A5 pentagonal DGGS. + """ + + @staticmethod + def _geo_to_cells( + df: gpd.GeoDataFrame, resolution: int, cell_fn, geom_col: str + ) -> pd.DataFrame: + return ( + df.assign( + __cells__=df[geom_col].apply(lambda geom: cell_fn(geom, resolution)) + ) + .drop(columns=[geom_col]) + .explode("__cells__") + .dropna(subset=["__cells__"]) + .set_index("__cells__") + .rename_axis(None) + ) + + @staticmethod + def _polyfill_polygon(geom, resolution: int) -> list: + cells = set( + a5.uncompact( + a5.polygon_to_cells(list(geom.exterior.coords), resolution), resolution + ) + ) + for interior in geom.interiors: + cells -= set( + a5.uncompact( + a5.polygon_to_cells(list(interior.coords), resolution), resolution + ) + ) + return [a5.u64_to_hex(c) for c in cells] + + @staticmethod + def _linetrace(geom, resolution: int) -> list: + return [ + a5.u64_to_hex(c) + for c in a5.line_string_to_cells(list(geom.coords), resolution) + ] + + def polyfill(self, df: gpd.GeoDataFrame, resolution: int) -> pd.DataFrame: + geom_col = df.geometry.name + parts = [] + + df_polygon = df[df.geom_type == "Polygon"] + if not df_polygon.empty: + parts.append( + self._geo_to_cells( + df_polygon, resolution, self._polyfill_polygon, geom_col + ) + ) + + df_linestring = df[df.geom_type == "LineString"] + if not df_linestring.empty: + ls = self._geo_to_cells( + df_linestring, resolution, self._linetrace, geom_col + ) + parts.append(ls[~ls.index.duplicated(keep="first")]) + + df_point = df[df.geom_type == "Point"] + if not df_point.empty: + parts.append( + self._geo_to_cells( + df_point, + resolution, + lambda geom, res: [ + a5.u64_to_hex(a5.lonlat_to_cell((geom.x, geom.y), res)) + ], + geom_col, + ) + ) + + return pd.concat(parts) if parts else pd.DataFrame() + + def secondary_index(self, df: pd.DataFrame, parent_res: int) -> pd.DataFrame: + df[f"a5_{parent_res:02}"] = df.index.map( + lambda cell: a5.u64_to_hex( + a5.cell_to_parent(a5.hex_to_u64(cell), parent_res) + ) + ) + return df + + def compaction(self, df, res, col_order, dggs_col, id_field): + def _compact_hex(cells): + return [ + a5.u64_to_hex(c) for c in a5.compact([a5.hex_to_u64(c) for c in cells]) + ] + + def _child_hex(cell, res): + return a5.u64_to_hex(a5.cell_to_children(a5.hex_to_u64(cell), res)[0]) + + return self.compaction_common( + df, res, id_field, col_order, dggs_col, _compact_hex, _child_hex + ) + + @staticmethod + def cell_to_point(cell: str) -> Point: + lon, lat = a5.cell_to_lonlat(a5.hex_to_u64(cell)) + return Point(lon, lat) + + @staticmethod + def cell_to_polygon(cell: str) -> Polygon: + return Polygon(a5.cell_to_boundary(a5.hex_to_u64(cell)))