From 9e0f1782b38d7080b85e68cb6a70620070675aaf Mon Sep 17 00:00:00 2001 From: alpha-beta-soup Date: Thu, 14 May 2026 12:01:14 +1200 Subject: [PATCH 1/3] adds the a5 dggs --- .gitignore | 1 + README.md | 7 + poetry.lock | 23 ++- pyproject.toml | 4 +- tests/classes/a5.py | 177 ++++++++++++++++++++ tests/data/se-island.gpkg | Bin 163840 -> 212992 bytes tests/test_runthrough.py | 1 + tests/test_vector2dggs.py | 2 + vector2dggs/a5.py | 206 ++++++++++++++++++++++++ vector2dggs/cli.py | 2 + vector2dggs/common.py | 17 +- vector2dggs/constants.py | 38 +++++ vector2dggs/indexerfactory.py | 1 + vector2dggs/indexers/a5vectorindexer.py | 96 +++++++++++ 14 files changed, 567 insertions(+), 8 deletions(-) create mode 100644 tests/classes/a5.py create mode 100644 vector2dggs/a5.py create mode 100644 vector2dggs/indexers/a5vectorindexer.py diff --git a/.gitignore b/.gitignore index 2e6a4aa..a111799 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ dist conda* .conda* .spyproject/ +.vscode/ \ No newline at end of file diff --git a/README.md b/README.md index c7fdd3c..8bb3f86 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ Currently this tool supports the following DGGSs: - [H3](https://h3geo.org/) - [rHEALPix](https://datastore.landcareresearch.co.nz/dataset/rhealpix-discrete-global-grid-system) - [S2](https://s2geometry.io/) +- [A5](https://a5geo.org/) ... and the following geocode systems: @@ -164,6 +165,12 @@ Or without activating the shell: poetry run python tests/test_vector2dggs.py ``` +To test a specific DGGS: + +```bash +python -m pytest tests/test_runthrough.py -k "a5" -v +``` + Test data are included at `tests/data/`. ## Example commands diff --git a/poetry.lock b/poetry.lock index 93e1540..ca68623 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. [[package]] name = "backports-tarfile" @@ -1787,6 +1787,22 @@ files = [ {file = "psycopg2-2.9.12.tar.gz", hash = "sha256:1dedb1c7a1d8552c4a6044c6b1c41a52e6a8e2d144af83eccac758076b1b7c15"}, ] +[[package]] +name = "pya5" +version = "0.8.0" +description = "A5 - Global Pentagonal Geospatial Index" +optional = true +python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"a5\" or extra == \"all\"" +files = [ + {file = "pya5-0.8.0-py3-none-any.whl", hash = "sha256:a3c5df400f861ccfb4d69086246f54c95d34dc8a0500b59923666aca155d15c4"}, + {file = "pya5-0.8.0.tar.gz", hash = "sha256:2dbdf7efe300f151d8795d0a95bee5cc6d6b32d9fd46c4284815d316e4233b8f"}, +] + +[package.extras] +test = ["pytest (>=7.0.0)"] + [[package]] name = "pyarrow" version = "20.0.0" @@ -2816,7 +2832,8 @@ test = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more_it type = ["pytest-mypy"] [extras] -all = ["h3", "python-geohash", "rhealpixdggs", "rhppandas", "rusty-polygon-geohasher", "s2geometry"] +a5 = ["pya5"] +all = ["h3", "pya5", "python-geohash", "rhealpixdggs", "rhppandas", "rusty-polygon-geohasher", "s2geometry"] geohash = ["python-geohash", "rusty-polygon-geohasher"] h3 = ["h3"] rhp = ["rhealpixdggs", "rhppandas"] @@ -2825,4 +2842,4 @@ s2 = ["s2geometry"] [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "f05a43de1b7e396b32e55ae07ef59414a002b48e94ee96f34635f3286b402b52" +content-hash = "169154507cb2e135ad7b6ec485adc54c7b1ac832f710f49648810637fbc66d71" diff --git a/pyproject.toml b/pyproject.toml index 951bcab..22adbe7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ rhealpixdggs = { version = "^0.5.12", optional = true} s2geometry = { version = "^0.9.0", optional = true} rusty-polygon-geohasher = { version = "^0.2.3", optional = true} python-geohash = { version = "^0.8.5", optional = true} +pya5 = { version = "^0.8.0", optional = true } urllib3 = "^2.6.3" cryptography = "^46.0.5" @@ -58,6 +59,7 @@ line-length = 88 h3 = ["h3"] rhp = ["rhppandas", "rhealpixdggs"] s2 = ["s2geometry"] +a5 = ["pya5"] geohash = ["rusty-polygon-geohasher", "python-geohash"] # Convenience extras -all = ["h3", "rhppandas", "rhealpixdggs", "s2geometry", "rusty-polygon-geohasher", "python-geohash"] \ No newline at end of file +all = ["h3", "rhppandas", "rhealpixdggs", "s2geometry", "rusty-polygon-geohasher", "python-geohash", "pya5"] \ No newline at end of file diff --git a/tests/classes/a5.py b/tests/classes/a5.py new file mode 100644 index 0000000..41324e8 --- /dev/null +++ b/tests/classes/a5.py @@ -0,0 +1,177 @@ +from .base import TestRunthrough +from ..data.datapaths import * + +from vector2dggs.a5 import a5 + + +class TestA5(TestRunthrough): + """ + Sends the test data file through A5 indexing using default parameters. + """ + + def test_a5_run(self): + try: + a5( + [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "17"], + standalone_mode=False, + ) + + except Exception: + self.fail(f"A5 runthrough failed.") + + def test_a5_run_overwrite(self): + try: + a5( + [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "17"], + standalone_mode=False, + ) + a5( + [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "17", "-o"], + standalone_mode=False, + ) + + except Exception: + self.fail(f"A5 runthrough with overwrite failed.") + + def test_a5_cut_crs(self): + try: + a5( + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "-r", + "17", + "-crs", + "3793", + "-c", + "4000", + ], + standalone_mode=False, + ) + + except Exception: + self.fail("A5 run through using actual CRS failed") + + def test_a5_cut_crs_reproject(self): + try: + a5( + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "-r", + "17", + "-crs", + "4326", + "-c", + "0.005", + ], + standalone_mode=False, + ) + except Exception: + self.fail("A5 run through with reprojected CRS failed") + + def test_a5_no_bisection(self): + try: + a5( + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "-r", + "17", + "-c", + "0", + ], + standalone_mode=False, + ) + except Exception: + self.fail("A5 run through without bisection failed") + + def test_a5_compaction(self): + try: + a5( + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "-r", + "17", + "-co", + "-id", + "LCDB_UID", + ], + standalone_mode=False, + ) + + except Exception: + self.fail(f"A5 runthrough with compaction failed.") + + def test_a5_geo_point(self): + try: + a5( + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "-r", + "17", + "--geo", + "point", + ], + standalone_mode=False, + ) + except Exception: + self.fail("A5 run through with --geo point failed") + + def test_a5_geo_point_compact(self): + try: + a5( + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "-r", + "17", + "--geo", + "point", + "-co", + "-id", + "LCDB_UID", + "-o", + ], + standalone_mode=False, + ) + except Exception: + self.fail("A5 run through with --geo point -co failed") + + def test_a5_geo_polygon(self): + try: + a5( + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "-r", + "17", + "--geo", + "polygon", + ], + standalone_mode=False, + ) + except Exception: + self.fail("A5 run through with --geo polygon failed") + + def test_a5_geo_polygon_compact(self): + try: + a5( + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "-r", + "17", + "--geo", + "polygon", + "-co", + "-id", + "LCDB_UID", + "-o", + ], + standalone_mode=False, + ) + except Exception: + self.fail("A5 run through with --geo polygon -co failed") diff --git a/tests/data/se-island.gpkg b/tests/data/se-island.gpkg index 74b24c1cf9fb327c8c9fe6377c0e98dec8bcb137..a158313dc0d72f4aba852beeb7cd1c39f276b95e 100644 GIT binary patch delta 2837 zcma)8drVVT7(eIqmcH+S@tSL(1*eEoxVJ3~tPhZO+KiW29zvuRxdS3EOA(jF$yGo_ z7pOJdzZMt&*k4PQ%3PIf@v+2YnaQRkON?wTCi<5#X`J|4%+76TX~8Sw;j6a)$1~9#>z-r<}_(rhP$Y>bv_6(_I319$zAK-{KE*kxZAq2Fc~oXgj(7-s*-bhrO=0 zYWMcq8v2%R{>VDw<1WB=@nd548%}+X(xopYD4HPOo(xn>?xIC`*9xhs>=uLiQy{*Dw(>p4*RoV2pMDLq@ z>e^AD&#iIwa!w<|8gumyTeD+H!<4Sju<;tUtgIxg@eMDWT1!luIFHzt&KS0cDnjRD zpQRp&fkr5Zs9Nm+lp4~HmOw!byaCm*P$5_g$vCT}K=V{B_46-Ol56H9 zp<*h8FKmOiAXP9bmkR8eP->r?fpbt7%Mh2E$wW$H2vBcnG(_Q(ZtSsA=18{9p z;;NUG9kVF4V02vq^W;Nt940n5vd8x;NY&*D%&&b0e~=_LXN$+AaxEB_SBtVT`16Nh z9a>g_doW!-3DhT))zV2n&r7pTT4*hpN|eCd7#yb(M_@LOmTI(MY)68w8=Zg>l(_3m z?D&vc3r z)G5A3oE=;b=MfKDku^g|b|5*S%c)0sWU<$yO=OvMpw&r)@u=1$d{>V$G8Y|%8HNTr z$iZV?PVuT9ts}O$31teFU8sfJPwzq{g+`gyrjw5iy)CFPrD-|kDn}e;mT=I4D#P)K zi?<047iF*^{$soTpWnEg|K|o|42NtI@*C0SFm*WfVMds0d~@pTa-R9d2oE0)>(Ki} zp+_-Vy*w%*rx~RYP;BuVnnT4u+>E-y<_GuiX@#=P%1X!=h2D#Ae3An$H`nKR%a$qr z-5Es+mv^I0O$J%!)~%4&C3eu)-@!$V!XH=R4iGntiO8#3kQo*Td<#mm#`TT6f4IBj zjeMaUHWA^S7E}T^2+6G|Ej^YnN+p!FqI^=heXZyt*y!i$C1i?9gcofqrl>e;ksw!J i@v*~JueaK&Y~<^rwkGsxAx@QD_|=K>=tbP(X#EG=u^Aiy delta 399 zcmZo@;B9E&njkI6#lXOz0mLw%Fj2=?l#4;Hs*n{Z#Kiv%B%#LtoB!KpL4#KQ%|H3M z1=tu_e=xBA*x2}vb@LBZ9xgUU{?82jpEnCGc*!r$!z|BPoEo25oRgTB0x}(^BrjO< zBLn|Opw8#~lRw4_<2DJR_znaA9iTfd@lU=}uK`s2j)DIjQ1cW1$?y8bkTq}qJYP3} zg>xq7#EFf+Hp_i{CI^&-Tebb8JfrXhE@r;V4E*!?P53TvY&^=h{qjFXLq;xU{)r6y z5BRt8PXt=j%s)MspNV7pOn#;e2YK%43_J`c8Mt}5>Nr1h#&EE+^|F>QpJH0Wc$wiO z-?xd4E4S|nWpd$~o)f~(S(d=0004I%eJTI| diff --git a/tests/test_runthrough.py b/tests/test_runthrough.py index 33ac6af..a87c0f4 100644 --- a/tests/test_runthrough.py +++ b/tests/test_runthrough.py @@ -2,6 +2,7 @@ @author: ndemaio """ +from .classes.a5 import TestA5 from .classes.h3 import TestH3 from .classes.rHP import TestRHP from .classes.s2 import TestS2 diff --git a/tests/test_vector2dggs.py b/tests/test_vector2dggs.py index aa17d3f..b8d83aa 100644 --- a/tests/test_vector2dggs.py +++ b/tests/test_vector2dggs.py @@ -2,12 +2,14 @@ @author: ndemaio """ +import sys import unittest from pathlib import Path if __name__ == "__main__": tests_dir = Path(__file__).resolve().parent top_level_dir = tests_dir.parent + sys.path.insert(0, str(top_level_dir)) testSuite = unittest.defaultTestLoader.discover( start_dir=str(tests_dir), top_level_dir=str(top_level_dir), diff --git a/vector2dggs/a5.py b/vector2dggs/a5.py new file mode 100644 index 0000000..82a884d --- /dev/null +++ b/vector2dggs/a5.py @@ -0,0 +1,206 @@ +import click +import click_log +import tempfile +import pyproj + +from typing import Union +from pathlib import Path + +import vector2dggs.constants as const +import vector2dggs.common as common + +from vector2dggs import __version__ + + +@click.command(context_settings={"show_default": True}) +@click_log.simple_verbosity_option(common.LOGGER) +@click.argument("vector_input", required=True, type=click.Path(), nargs=1) +@click.argument("output_directory", required=True, type=click.Path(), nargs=1) +@click.option( + "-r", + "--resolution", + required=True, + type=click.Choice(list(map(str, range(const.MIN_A5, const.MAX_A5 + 1)))), + help="A5 resolution to index", + nargs=1, +) +@click.option( + "-pr", + "--parent_res", + required=False, + type=click.Choice(list(map(str, range(const.MIN_A5, const.MAX_A5 + 1)))), + help="A5 parent resolution for the output partition. Defaults to resolution - 6", +) +@click.option( + "-id", + "--id_field", + required=False, + default=const.DEFAULTS["id"], + type=str, + help="Field to use as an ID; defaults to a constructed single 0...n index on the original feature order.", + nargs=1, +) +@click.option( + "-k", + "--keep_attributes", + is_flag=True, + show_default=True, + default=const.DEFAULTS["k"], + help="Retain attributes in output. The default is to create an output that only includes A5 cell ID and the ID given by the -id field (or the default index ID).", +) +@click.option( + "-ch", + "--chunksize", + required=True, + type=int, + default=const.DEFAULTS["ch"], + help="The number of rows per index partition to use when spatially partioning. Adjusting this number will trade off memory use and time.", + nargs=1, +) +@click.option( + "-s", + "--spatial_sorting", + type=click.Choice(const.SPATIAL_SORTING_METHODS), + default=const.DEFAULTS["s"], + help="Spatial sorting method when perfoming spatial partitioning.", +) +@click.option( + "-crs", + "--cut_crs", + required=False, + default=const.DEFAULTS["crs"], + type=int, + help="Set the coordinate reference system (CRS) used for cutting large geometries (see `--cut_threshold`). Defaults to the same CRS as the input. Should be a valid EPSG code.", + nargs=1, +) +@click.option( + "-c", + "--cut_threshold", + required=False, + default=const.DEFAULTS["c"], + type=float, + help="Cutting up large geometries into smaller geometries based on a target area. Units are assumed to match the input CRS units unless the `--cut_crs` is also given, in which case units match the units of the supplied CRS. If left unspecified, the threshold will be the maximum area of a cell at the parent resolution, in square metres or feet according to the CRS. A threshold of 0 will skip bissection entirely (effectively ignoring --cut_crs).", + nargs=1, +) +@click.option( + "-t", + "--threads", + required=False, + default=const.DEFAULTS["t"], + type=int, + help="Amount of threads used for operation", + nargs=1, +) +@click.option( + "-cp", + "--compression", + required=False, + default=const.DEFAULTS["cp"], + type=str, + help="Compression method to use for the output Parquet files. Options include 'snappy', 'gzip', 'brotli', 'lz4', 'zstd', etc. Use 'none' for no compression.", + nargs=1, +) +@click.option( + "-lyr", + "--layer", + required=False, + default=const.DEFAULTS["lyr"], + type=str, + help="Name of the layer or table to read when using an input that supports layers or tables", + nargs=1, +) +@click.option( + "-g", + "--geom_col", + required=False, + default=const.DEFAULTS["g"], + type=str, + help="Column name to use when using a spatial database connection as input", + nargs=1, +) +@click.option( + "--geo", + required=False, + default=const.DEFAULTS["geo"], + type=click.Choice(const.GEOM_TYPES), + help="Select geometry encoding for the output: 'none' for regular Parquet (no GeoParquet metadata), or 'point'/'polygon' to write GeoParquet (v1.1.0) with the corresponding geometry type.", + nargs=1, +) +@click.option( + "--tempdir", + default=const.DEFAULTS["tempdir"], + type=click.Path(), + help="Temporary data is created during the execution of this program. This parameter allows you to control where this data will be written.", +) +@click.option( + "-co", + "--compact", + is_flag=True, + help="Compact the A5 cells up to the parent resolution. Compaction requires an id_field.", +) +@click.option("-o", "--overwrite", is_flag=True) +@click.version_option(version=__version__) +def a5( + vector_input: Union[str, Path], + output_directory: Union[str, Path], + resolution: str, + parent_res: str, + id_field: str, + keep_attributes: bool, + chunksize: int, + spatial_sorting: str, + cut_crs: int, + cut_threshold: int, + threads: int, + compression: str, + layer: str, + geom_col: str, + geo: str, + tempdir: Union[str, Path], + compact: bool, + overwrite: bool, +): + """ + Ingest a vector dataset and index it to the A5 DGGS. + + VECTOR_INPUT is the path to input vector geospatial data. + OUTPUT_DIRECTORY should be a directory, not a file or database table, as it will instead be the write location for an Apache Parquet data store. + """ + tempfile.tempdir = tempdir if tempdir is not None else tempfile.tempdir + + common.check_resolutions(resolution, parent_res) + common.check_compaction_requirements(compact, id_field) + + spatial_sorting = const.SpatialSortingMethod(spatial_sorting).value + geo = const.GeoOutputMode(geo).value + + con, vector_input = common.db_conn_and_input_path(vector_input) + output_directory = common.resolve_output_path(output_directory, overwrite) + + if cut_crs is not None: + cut_crs = pyproj.CRS.from_user_input(cut_crs) + + try: + common.index( + "a5", + vector_input, + output_directory, + int(resolution), + parent_res, + keep_attributes, + chunksize, + spatial_sorting, + cut_threshold, + threads, + compression=compression, + cut_crs=cut_crs, + id_field=id_field, + con=con, + layer=layer, + geom_col=geom_col, + geo=geo, + overwrite=overwrite, + compact=compact, + ) + except: + raise diff --git a/vector2dggs/cli.py b/vector2dggs/cli.py index de2ea1a..07ccf7b 100644 --- a/vector2dggs/cli.py +++ b/vector2dggs/cli.py @@ -1,6 +1,7 @@ import click from vector2dggs import __version__ +from vector2dggs.a5 import a5 from vector2dggs.h3 import h3 from vector2dggs.rHP import rhp from vector2dggs.s2 import s2 @@ -21,6 +22,7 @@ def cli(): pass +cli.add_command(a5) cli.add_command(h3) cli.add_command(rhp) cli.add_command(s2) diff --git a/vector2dggs/common.py b/vector2dggs/common.py index 1e6705f..b7f82e4 100644 --- a/vector2dggs/common.py +++ b/vector2dggs/common.py @@ -562,7 +562,9 @@ def index( # Database connection with con.connect() as connection: parts = layer.rsplit(".", 1) - schema, tbl_name = (parts[0], parts[1]) if len(parts) == 2 else (None, parts[0]) + schema, tbl_name = ( + (parts[0], parts[1]) if len(parts) == 2 else (None, parts[0]) + ) tbl = sqlalchemy.table(tbl_name, schema=schema) if keep_attributes: @@ -574,9 +576,9 @@ def index( else: stmt = sqlalchemy.select(sqlalchemy.column(geom_col)).select_from(tbl) - df = gpd.read_postgis( - stmt, connection, geom_col=geom_col - ).rename_geometry("geometry") + df = gpd.read_postgis(stmt, connection, geom_col=geom_col).rename_geometry( + "geometry" + ) else: # Read file df = gpd.read_file(input_file, layer=layer) @@ -693,6 +695,13 @@ def index( LOGGER.error(f"Task failed with {e}") raise (e) + if not any(Path(tmpdir2).glob("*.parquet")): + LOGGER.warning( + "No features were indexed (resolution %s may be too coarse for the input). Nothing to write; exiting.", + resolution, + ) + return output_directory + parent_partitioning( indexer, Path(tmpdir2), diff --git a/vector2dggs/constants.py b/vector2dggs/constants.py index 3f7c243..b52b0cc 100644 --- a/vector2dggs/constants.py +++ b/vector2dggs/constants.py @@ -7,6 +7,7 @@ MIN_RHP, MAX_RHP = 0, 15 MIN_S2, MAX_S2 = 0, 30 MIN_GEOHASH, MAX_GEOHASH = 1, 12 +MIN_A5, MAX_A5 = 0, 30 @unique @@ -36,6 +37,7 @@ class GeoOutputMode(StrEnum): MIN_GEOHASH, (resolution - DEFAULT_PARENT_OFFSET) ), "s2": lambda resolution: max(MIN_S2, (resolution - DEFAULT_PARENT_OFFSET)), + "a5": lambda resolution: max(MIN_A5, (resolution - DEFAULT_PARENT_OFFSET)), } DEFAULT_PARENT_OFFSET = 6 @@ -114,6 +116,41 @@ class GeoOutputMode(StrEnum): 15: 4.864277038533613 * 1e-7, } +# https://github.com/felixpalmer/a5 +A5_CELL_AREA_M2_BY_LEVEL = { + 0: 42505468731619.93, + 1: 8501093746323.985, + 2: 2125273436580.9963, + 3: 531318359145.2491, + 4: 132829589786.31227, + 5: 33207397446.578068, + 6: 8301849361.644517, + 7: 2075462340.4111292, + 8: 518865585.1027823, + 9: 129716396.27569558, + 10: 32429099.068923894, + 11: 8107274.767230974, + 12: 2026818.6918077434, + 13: 506704.67295193585, + 14: 126676.16823798396, + 15: 31669.04205949599, + 16: 7917.260514873998, + 17: 1979.3151287184994, + 18: 494.82878217962485, + 19: 123.70719554490621, + 20: 30.926798886226553, + 21: 7.731699721556638, + 22: 1.9329249303891596, + 23: 0.4832312325972899, + 24: 0.12080780814932247, + 25: 0.03020195203733062, + 26: 0.007550488009332655, + 27: 0.0018876220023331637, + 28: 0.0004719055005832909, + 29: 0.00011797637514582273, + 30: 2.9494093786455682e-05, +} + # https://www.movable-type.co.uk/scripts/geohash.html GEOHASH_MAX_CELL_AREA_KM2_BY_LEVEL = { 1: 5000 * 5000, @@ -135,6 +172,7 @@ class GeoOutputMode(StrEnum): "h3": lambda res: H3_CELLS_MAX_AREA_KM2_BY_LEVEL[res] * 1e6, "rhp": lambda res: RHP_CELLS_AREA_KM2_BY_LEVEL[res] * 1e6, "geohash": lambda res: GEOHASH_MAX_CELL_AREA_KM2_BY_LEVEL[res] * 1e6, + "a5": lambda res: A5_CELL_AREA_M2_BY_LEVEL[res], } DEFAULT_AREA_THRESHOLD_M2 = lambda dggs, parent_res: DGGS_CELL_AREA_M2_BY_RES[dggs]( diff --git a/vector2dggs/indexerfactory.py b/vector2dggs/indexerfactory.py index b124f97..fa51f45 100644 --- a/vector2dggs/indexerfactory.py +++ b/vector2dggs/indexerfactory.py @@ -16,6 +16,7 @@ "geohash", ), "s2": ("vector2dggs.indexers.s2vectorindexer", "S2VectorIndexer", "s2"), + "a5": ("vector2dggs.indexers.a5vectorindexer", "A5VectorIndexer", "a5"), } diff --git a/vector2dggs/indexers/a5vectorindexer.py b/vector2dggs/indexers/a5vectorindexer.py new file mode 100644 index 0000000..3f78b7f --- /dev/null +++ b/vector2dggs/indexers/a5vectorindexer.py @@ -0,0 +1,96 @@ +import a5 +import pandas as pd +import geopandas as gpd +from shapely.geometry import Point, Polygon + +from vector2dggs.indexers.vectorindexer import VectorIndexer + + +class A5VectorIndexer(VectorIndexer): + """ + Provides integration for the A5 pentagonal DGGS. + """ + + @staticmethod + def _geo_to_cells( + df: gpd.GeoDataFrame, resolution: int, cell_fn, geom_col: str + ) -> pd.DataFrame: + return ( + df.assign( + __cells__=df[geom_col].apply(lambda geom: cell_fn(geom, resolution)) + ) + .drop(columns=[geom_col]) + .explode("__cells__") + .dropna(subset=["__cells__"]) + .set_index("__cells__") + .rename_axis(None) + ) + + @staticmethod + def _polyfill_polygon(geom, resolution: int) -> list: + cells = set(a5.uncompact(a5.polygon_to_cells(list(geom.exterior.coords), resolution), resolution)) + for interior in geom.interiors: + cells -= set(a5.uncompact(a5.polygon_to_cells(list(interior.coords), resolution), resolution)) + return [a5.u64_to_hex(c) for c in cells] + + @staticmethod + def _linetrace(geom, resolution: int) -> list: + return [a5.u64_to_hex(c) for c in a5.line_string_to_cells(list(geom.coords), resolution)] + + def polyfill(self, df: gpd.GeoDataFrame, resolution: int) -> pd.DataFrame: + geom_col = df.geometry.name + parts = [] + + df_polygon = df[df.geom_type == "Polygon"] + if not df_polygon.empty: + parts.append( + self._geo_to_cells( + df_polygon, resolution, self._polyfill_polygon, geom_col + ) + ) + + df_linestring = df[df.geom_type == "LineString"] + if not df_linestring.empty: + ls = self._geo_to_cells( + df_linestring, resolution, self._linetrace, geom_col + ) + parts.append(ls[~ls.index.duplicated(keep="first")]) + + df_point = df[df.geom_type == "Point"] + if not df_point.empty: + parts.append( + self._geo_to_cells( + df_point, + resolution, + lambda geom, res: [a5.u64_to_hex(a5.lonlat_to_cell((geom.x, geom.y), res))], + geom_col, + ) + ) + + return pd.concat(parts) if parts else pd.DataFrame() + + def secondary_index(self, df: pd.DataFrame, parent_res: int) -> pd.DataFrame: + df[f"a5_{parent_res:02}"] = df.index.map( + lambda cell: a5.u64_to_hex(a5.cell_to_parent(a5.hex_to_u64(cell), parent_res)) + ) + return df + + def compaction(self, df, res, col_order, dggs_col, id_field): + def _compact_hex(cells): + return [a5.u64_to_hex(c) for c in a5.compact([a5.hex_to_u64(c) for c in cells])] + + def _child_hex(cell, res): + return a5.u64_to_hex(a5.cell_to_children(a5.hex_to_u64(cell), res)[0]) + + return self.compaction_common( + df, res, id_field, col_order, dggs_col, _compact_hex, _child_hex + ) + + @staticmethod + def cell_to_point(cell: str) -> Point: + lon, lat = a5.cell_to_lonlat(a5.hex_to_u64(cell)) + return Point(lon, lat) + + @staticmethod + def cell_to_polygon(cell: str) -> Polygon: + return Polygon(a5.cell_to_boundary(a5.hex_to_u64(cell))) From 423de2d82808e6985b4dc334b0a68fab9cfb4a11 Mon Sep 17 00:00:00 2001 From: alpha-beta-soup Date: Thu, 14 May 2026 12:44:04 +1200 Subject: [PATCH 2/3] explicit layer in tests --- tests/classes/a5.py | 44 +++++++++++++++++++++++++++++++--- tests/classes/geohash.py | 51 ++++++++++++++++++++++++++++++++++++---- tests/classes/h3.py | 44 +++++++++++++++++++++++++++++++--- tests/classes/rHP.py | 42 ++++++++++++++++++++++++++++++--- tests/classes/s2.py | 44 +++++++++++++++++++++++++++++++--- tests/data/datapaths.py | 1 + 6 files changed, 210 insertions(+), 16 deletions(-) diff --git a/tests/classes/a5.py b/tests/classes/a5.py index 41324e8..f2b89eb 100644 --- a/tests/classes/a5.py +++ b/tests/classes/a5.py @@ -12,7 +12,14 @@ class TestA5(TestRunthrough): def test_a5_run(self): try: a5( - [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "17"], + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "17", + ], standalone_mode=False, ) @@ -22,11 +29,26 @@ def test_a5_run(self): def test_a5_run_overwrite(self): try: a5( - [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "17"], + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "17", + ], standalone_mode=False, ) a5( - [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "17", "-o"], + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "17", + "-o", + ], standalone_mode=False, ) @@ -39,6 +61,8 @@ def test_a5_cut_crs(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "17", "-crs", @@ -58,6 +82,8 @@ def test_a5_cut_crs_reproject(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "17", "-crs", @@ -76,6 +102,8 @@ def test_a5_no_bisection(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "17", "-c", @@ -92,6 +120,8 @@ def test_a5_compaction(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "17", "-co", @@ -110,6 +140,8 @@ def test_a5_geo_point(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "17", "--geo", @@ -126,6 +158,8 @@ def test_a5_geo_point_compact(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "17", "--geo", @@ -146,6 +180,8 @@ def test_a5_geo_polygon(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "17", "--geo", @@ -162,6 +198,8 @@ def test_a5_geo_polygon_compact(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "17", "--geo", diff --git a/tests/classes/geohash.py b/tests/classes/geohash.py index ea8f02a..4d374ea 100644 --- a/tests/classes/geohash.py +++ b/tests/classes/geohash.py @@ -12,7 +12,14 @@ class TestGeohash(TestRunthrough): def test_geohash_run(self): try: geohash( - [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "6"], + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "6", + ], standalone_mode=False, ) @@ -22,11 +29,26 @@ def test_geohash_run(self): def test_geohash_run_overwrite(self): try: geohash( - [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "6"], + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "6", + ], standalone_mode=False, ) geohash( - [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "6", "-o"], + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "6", + "-o", + ], standalone_mode=False, ) @@ -36,7 +58,16 @@ def test_geohash_run_overwrite(self): def test_geohash_cut_crs(self): try: geohash( - [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "6", "-crs", "3793"], + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "6", + "-crs", + "3793", + ], standalone_mode=False, ) @@ -49,6 +80,8 @@ def test_geohash_cut_crs_reproject(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "6", "-crs", @@ -67,6 +100,8 @@ def test_geohash_compaction(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "6", "-co", @@ -85,6 +120,8 @@ def test_geohash_geo_point(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "6", "--geo", @@ -101,6 +138,8 @@ def test_geohash_geo_point_compact(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "6", "--geo", @@ -121,6 +160,8 @@ def test_geohash_geo_polygon(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "6", "--geo", @@ -137,6 +178,8 @@ def test_geohash_geo_polygon_compact(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "6", "--geo", diff --git a/tests/classes/h3.py b/tests/classes/h3.py index be12b70..36311bb 100644 --- a/tests/classes/h3.py +++ b/tests/classes/h3.py @@ -12,7 +12,14 @@ class TestH3(TestRunthrough): def test_h3_run(self): try: h3( - [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "8"], + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "8", + ], standalone_mode=False, ) @@ -22,11 +29,26 @@ def test_h3_run(self): def test_h3_run_overwrite(self): try: h3( - [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "8"], + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "8", + ], standalone_mode=False, ) h3( - [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "8", "-o"], + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "8", + "-o", + ], standalone_mode=False, ) @@ -39,6 +61,8 @@ def test_h3_cut_crs(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "8", "-crs", @@ -58,6 +82,8 @@ def test_h3_cut_crs_reproject(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "8", "-crs", @@ -76,6 +102,8 @@ def test_h3_no_bisection(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "8", "-c", @@ -92,6 +120,8 @@ def test_h3_compaction(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "8", "-co", @@ -110,6 +140,8 @@ def test_h3_geo_point(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "8", "--geo", @@ -127,6 +159,8 @@ def test_h3_geo_point_compact(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "8", "--geo", @@ -147,6 +181,8 @@ def test_h3_geo_polygon(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "8", "--geo", @@ -164,6 +200,8 @@ def test_h3_geo_polygon_compact(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "8", "--geo", diff --git a/tests/classes/rHP.py b/tests/classes/rHP.py index 348fb11..2187d9d 100644 --- a/tests/classes/rHP.py +++ b/tests/classes/rHP.py @@ -12,7 +12,14 @@ class TestRHP(TestRunthrough): def test_rhp_run(self): try: rhp( - [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "8"], + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "8", + ], standalone_mode=False, ) @@ -22,11 +29,26 @@ def test_rhp_run(self): def test_rhp_run_overwrite(self): try: rhp( - [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "8"], + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "8", + ], standalone_mode=False, ) rhp( - [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "8", "-o"], + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "8", + "-o", + ], standalone_mode=False, ) @@ -39,6 +61,8 @@ def test_rhp_cut_crs(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "8", "-crs", @@ -56,6 +80,8 @@ def test_rhp_cut_crs_reproject(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "8", "-crs", @@ -74,6 +100,8 @@ def test_rhp_compaction(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "8", "-co", @@ -92,6 +120,8 @@ def test_rhp_geo_point(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "8", "--geo", @@ -108,6 +138,8 @@ def test_rhp_geo_point_compact(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "8", "--geo", @@ -128,6 +160,8 @@ def test_rhp_geo_polygon(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "8", "--geo", @@ -144,6 +178,8 @@ def test_rhp_geo_polygon_compact(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "8", "--geo", diff --git a/tests/classes/s2.py b/tests/classes/s2.py index 06fd59c..59ae587 100644 --- a/tests/classes/s2.py +++ b/tests/classes/s2.py @@ -12,7 +12,14 @@ class TestS2(TestRunthrough): def test_s2_run(self): try: s2( - [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "13"], + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "13", + ], standalone_mode=False, ) @@ -22,11 +29,26 @@ def test_s2_run(self): def test_s2_run_overwrite(self): try: s2( - [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "13"], + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "13", + ], standalone_mode=False, ) s2( - [TEST_FILE_PATH, str(TEST_OUTPUT_PATH), "-r", "13", "-o"], + [ + TEST_FILE_PATH, + str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, + "-r", + "13", + "-o", + ], standalone_mode=False, ) @@ -39,6 +61,8 @@ def test_s2_cut_crs(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "13", "-crs", @@ -58,6 +82,8 @@ def test_s2_cut_crs_reproject(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "13", "-crs", @@ -76,6 +102,8 @@ def test_s2_no_bisection(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "13", "-c", @@ -92,6 +120,8 @@ def test_s2_compaction(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "13", "-co", @@ -110,6 +140,8 @@ def test_s2_geo_point(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "13", "--geo", @@ -126,6 +158,8 @@ def test_s2_geo_point_compact(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "13", "--geo", @@ -146,6 +180,8 @@ def test_s2_geo_polygon(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "13", "--geo", @@ -162,6 +198,8 @@ def test_s2_geo_polygon_compact(self): [ TEST_FILE_PATH, str(TEST_OUTPUT_PATH), + "--layer", + TEST_LAYER_NAME, "-r", "13", "--geo", diff --git a/tests/data/datapaths.py b/tests/data/datapaths.py index 0152124..34b04af 100644 --- a/tests/data/datapaths.py +++ b/tests/data/datapaths.py @@ -7,4 +7,5 @@ DATA_DIR = Path(__file__).resolve().parent TEST_FILE_PATH = str(DATA_DIR / "se-island.gpkg") +TEST_LAYER_NAME = "se_island" TEST_OUTPUT_PATH = DATA_DIR / "output" From ec3112267dc237915b217f6172e82a97bb294b0d Mon Sep 17 00:00:00 2001 From: alpha-beta-soup Date: Thu, 14 May 2026 12:44:13 +1200 Subject: [PATCH 3/3] black --- vector2dggs/indexers/a5vectorindexer.py | 29 ++++++++++++++++++++----- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/vector2dggs/indexers/a5vectorindexer.py b/vector2dggs/indexers/a5vectorindexer.py index 3f78b7f..8d4ff9d 100644 --- a/vector2dggs/indexers/a5vectorindexer.py +++ b/vector2dggs/indexers/a5vectorindexer.py @@ -28,14 +28,25 @@ def _geo_to_cells( @staticmethod def _polyfill_polygon(geom, resolution: int) -> list: - cells = set(a5.uncompact(a5.polygon_to_cells(list(geom.exterior.coords), resolution), resolution)) + cells = set( + a5.uncompact( + a5.polygon_to_cells(list(geom.exterior.coords), resolution), resolution + ) + ) for interior in geom.interiors: - cells -= set(a5.uncompact(a5.polygon_to_cells(list(interior.coords), resolution), resolution)) + cells -= set( + a5.uncompact( + a5.polygon_to_cells(list(interior.coords), resolution), resolution + ) + ) return [a5.u64_to_hex(c) for c in cells] @staticmethod def _linetrace(geom, resolution: int) -> list: - return [a5.u64_to_hex(c) for c in a5.line_string_to_cells(list(geom.coords), resolution)] + return [ + a5.u64_to_hex(c) + for c in a5.line_string_to_cells(list(geom.coords), resolution) + ] def polyfill(self, df: gpd.GeoDataFrame, resolution: int) -> pd.DataFrame: geom_col = df.geometry.name @@ -62,7 +73,9 @@ def polyfill(self, df: gpd.GeoDataFrame, resolution: int) -> pd.DataFrame: self._geo_to_cells( df_point, resolution, - lambda geom, res: [a5.u64_to_hex(a5.lonlat_to_cell((geom.x, geom.y), res))], + lambda geom, res: [ + a5.u64_to_hex(a5.lonlat_to_cell((geom.x, geom.y), res)) + ], geom_col, ) ) @@ -71,13 +84,17 @@ def polyfill(self, df: gpd.GeoDataFrame, resolution: int) -> pd.DataFrame: def secondary_index(self, df: pd.DataFrame, parent_res: int) -> pd.DataFrame: df[f"a5_{parent_res:02}"] = df.index.map( - lambda cell: a5.u64_to_hex(a5.cell_to_parent(a5.hex_to_u64(cell), parent_res)) + lambda cell: a5.u64_to_hex( + a5.cell_to_parent(a5.hex_to_u64(cell), parent_res) + ) ) return df def compaction(self, df, res, col_order, dggs_col, id_field): def _compact_hex(cells): - return [a5.u64_to_hex(c) for c in a5.compact([a5.hex_to_u64(c) for c in cells])] + return [ + a5.u64_to_hex(c) for c in a5.compact([a5.hex_to_u64(c) for c in cells]) + ] def _child_hex(cell, res): return a5.u64_to_hex(a5.cell_to_children(a5.hex_to_u64(cell), res)[0])