-
Notifications
You must be signed in to change notification settings - Fork 22
Geopackage reader #174
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Geopackage reader #174
Changes from all commits
6aae933
53a4b88
43a87af
91ed1ef
3166a1c
0255232
5e60488
39730ce
26b7602
0077fef
de56781
6c3c505
5dc18fb
46b20b9
6538130
e93f91c
d73a169
1cf927e
4201eb2
f401635
82d635a
acc948c
dc722cc
f7e4e85
b2451b1
31d9cba
a884e88
e9cd713
6d2244c
7cd4851
f98d6b6
ddd08d5
b058783
347fe4b
4de2764
97a03e2
f98563c
9db3590
9d6574f
0a12360
41e73d2
3e4b02e
0ee3192
8ad4c2a
c1ec60d
c0c06a4
ff406c0
ec9de5a
c23fdb2
59428a4
62c9393
e4a40f0
a83e48b
7e43151
ed4bb0c
57f3a66
72335cd
97ca89f
694853a
b99a299
c47aeeb
1ad329c
db61a1c
00e988d
00a5290
fb6ef04
79560a3
f3dce85
12a2597
76193b6
6aba388
3dd8bda
d604a94
c563881
0b52233
ed7ac0c
99a37da
435f6bb
7751cd1
c1a2ea7
7399fa2
ef477bb
b912fa8
2aa6ce0
5efb822
f0ff9b2
2a012bc
bd2a0ed
b154726
f2fcb78
0ead8f2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -38,6 +38,10 @@ import PlyIO | |
| # CSV format | ||
| import CSV | ||
|
|
||
| # Database interfaces | ||
| import DBInterface | ||
| import SQLite | ||
|
Comment on lines
+42
to
+43
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Asked a question that was not answered about the need of importing these two.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I left this as my answer to the previous comment This is taken from the docstring for SQLite.execute
...
|
||
|
|
||
| # geostats formats | ||
| import GslibIO | ||
|
|
||
|
|
@@ -72,7 +76,7 @@ const CDMEXTS = [".grib", ".nc"] | |
| const FORMATS = [ | ||
| (extension=".csv", load="CSV.jl", save="CSV.jl"), | ||
| (extension=".geojson", load="GeoJSON.jl", save="GeoJSON.jl"), | ||
| (extension=".gpkg", load="ArchGDAL.jl", save="ArchGDAL.jl"), | ||
| (extension=".gpkg", load="GeoIO.jl", save="ArchGDAL.jl"), | ||
| (extension=".grib", load="GRIBDatasets.jl", save=""), | ||
| (extension=".gslib", load="GslibIO.jl", save="GslibIO.jl"), | ||
| (extension=".jpeg", load="ImageIO.jl", save="ImageIO.jl"), | ||
|
|
@@ -127,6 +131,7 @@ include("extra/csv.jl") | |
| include("extra/gdal.jl") | ||
| include("extra/geotiff.jl") | ||
| include("extra/gis.jl") | ||
| include("extra/gpkg.jl") | ||
| include("extra/img.jl") | ||
| include("extra/msh.jl") | ||
| include("extra/obj.jl") | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| # ------------------------------------------------------------------ | ||
| # Licensed under the MIT License. See LICENSE in the project root. | ||
| # ------------------------------------------------------------------ | ||
|
|
||
| include("gpkg/wkb.jl") | ||
| include("gpkg/read.jl") |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,172 @@ | ||
| # ------------------------------------------------------------------ | ||
| # Licensed under the MIT License. See LICENSE in the project root. | ||
| # ------------------------------------------------------------------ | ||
|
|
||
| function gpkgread(fname; layer=1) | ||
| db = gpkgdatabase(fname) | ||
| table, geoms = gpkgextract(db; layer) | ||
| DBInterface.close!(db) | ||
| georef(table, geoms) | ||
| end | ||
|
|
||
| function gpkgdatabase(fname) | ||
| # connect to SQLite database on disk | ||
| db = SQLite.DB(fname) | ||
|
|
||
| # According to https://www.geopackage.org/spec/#r6 and https://www.geopackage.org/spec/#r7 | ||
| # PRAGMA integrity_check returns a single row with the value 'ok' | ||
| # PRAGMA foreign_key_check (w/ no parameter value) returns an empty result set | ||
| if first(DBInterface.execute(db, "PRAGMA integrity_check;")).integrity_check != "ok" || | ||
| !(isempty(DBInterface.execute(db, "PRAGMA foreign_key_check;"))) | ||
| throw(ErrorException("database integrity at risk or foreign key violation(s)")) | ||
| end | ||
|
|
||
| # According to https://www.geopackage.org/spec/#r10 and https://www.geopackage.org/spec/#r13 | ||
| # A GeoPackage SHALL include a 'gpkg_spatial_ref_sys' table and a 'gpkg_contents table' | ||
| if first(DBInterface.execute( | ||
| db, | ||
| """ | ||
| SELECT COUNT(*) AS n FROM sqlite_master WHERE | ||
| name IN ('gpkg_spatial_ref_sys', 'gpkg_contents') AND | ||
| type IN ('table', 'view'); | ||
| """ | ||
| )).n != 2 | ||
| throw(ErrorException("missing required metadata tables in the GeoPackage SQL database")) | ||
| end | ||
|
|
||
| db | ||
| end | ||
|
|
||
| # According to Geometry Columns Table Requirements | ||
| # https://www.geopackage.org/spec/#:~:text=2.1.5.1.2.%20Table%20Data%20Values | ||
| #------------------------------------------------------------------------------ | ||
| # Requirement 16: https://www.geopackage.org/spec/#r16 | ||
| # Values of the gpkg_contents table srs_id column | ||
| # SHALL reference values in the gpkg_spatial_ref_sys table srs_id column | ||
| # | ||
| # Requirement 18: https://www.geopackage.org/spec/#r18 | ||
| # The gpkg_contents table SHALL contain a row | ||
| # with a lowercase data_type column value of "features" | ||
| # for each vector features user data table or view. | ||
| # | ||
| # Requirement 22: gpkg_geometry_columns table | ||
| # SHALL contain one row record for the geometry column | ||
| # in each vector feature data table | ||
| # | ||
| # Requirement 23: gpkg_geometry_columns table_name column | ||
| # SHALL reference values in the gpkg_contents table_name column | ||
| # for rows with a data_type of 'features' | ||
| # | ||
| # Requirement 24: The column_name column value in a gpkg_geometry_columns row | ||
| # SHALL be the name of a column in the table or view specified by the table_name | ||
| # column value for that row. | ||
| # | ||
| # Requirement 25: The geometry_type_name value in a gpkg_geometry_columns row | ||
| # SHALL be one of the uppercase geometry type names specified | ||
| # | ||
| # Requirement 146: The srs_id value in a gpkg_geometry_columns table row | ||
| # SHALL match the srs_id column value from the corresponding row in the | ||
| # gpkg_contents table. | ||
| function gpkgextract(db; layer=1) | ||
| # get the first (and only) feature table returned in sqlite query results | ||
| metadata = first(DBInterface.execute( | ||
| db, | ||
| """ | ||
| SELECT g.table_name AS tablename, g.column_name AS geomcolumn, g.z as zextent, | ||
| c.srs_id AS srsid, srs.organization AS org, srs.organization_coordsys_id AS code | ||
| FROM gpkg_geometry_columns g, gpkg_spatial_ref_sys srs | ||
| JOIN gpkg_contents c ON ( g.table_name = c.table_name ) | ||
| WHERE c.data_type = 'features' | ||
| AND g.srs_id = c.srs_id | ||
| LIMIT 1 OFFSET ($layer-1); | ||
| """ | ||
| )) | ||
|
|
||
| # According to https://www.geopackage.org/spec/#r33, feature table geometry columns | ||
| # SHALL contain geometries with the srs_id specified for the column by the gpkg_geometry_columns table srs_id column value. | ||
| org = metadata.org | ||
| code = metadata.code | ||
| srsid = metadata.srsid | ||
| if srsid == 0 || srsid == 4326 | ||
| crs = isone(metadata.zextent) ? LatLonAlt{WGS84Latest} : LatLon{WGS84Latest} | ||
|
Comment on lines
+90
to
+91
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Where is this in the spec? I couldn't find from the link in the comment. |
||
| elseif srsid == -1 | ||
| crs = Cartesian{NoDatum} | ||
| else | ||
| if org == "EPSG" | ||
| crs = CoordRefSystems.get(EPSG{code}) | ||
| elseif org == "ESRI" | ||
| crs = CoordRefSystems.get(ESRI{code}) | ||
| end | ||
| end | ||
|
|
||
| # According to https://www.geopackage.org/spec/#r14 | ||
| # The table_name column value in a gpkg_contents table row | ||
| # SHALL contain the name of a SQLite table or view. | ||
| tablename = metadata.tablename | ||
| geomcolumn = metadata.geomcolumn | ||
| tableinfo = SQLite.tableinfo(db, tablename) | ||
|
|
||
| # "pk" (either zero for columns that are not part of the primary key, or the 1-based index of the column within the primary key) | ||
| columns = [name for (name, pk) in zip(tableinfo.name, tableinfo.pk) if pk == 0] | ||
| gpkgbinary = DBInterface.execute(db, "SELECT $(join(columns, ',')) FROM $tablename;") | ||
| table = map(gpkgbinary) do row | ||
| # According to https://www.geopackage.org/spec/#r30 | ||
| # A feature table or view SHALL have only one geometry column. | ||
| geomindex = findfirst(==(Symbol(geomcolumn)), keys(row)) | ||
| values = map(keys(row)[[begin:(geomindex - 1); (geomindex + 1):end]]) do key | ||
| key, getproperty(row, key) | ||
| end | ||
|
Comment on lines
+115
to
+118
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this code be simplified in terms of the named tuple snippet I shared?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. as long we test against duplicate fieldnames then I don't think we can simplfy this to check for the Symbol(geomcolumn)
Comment on lines
+115
to
+118
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I asked a few times without response: can we use a simpler namedtuple syntax here? Shared an example in a previous comment... |
||
|
|
||
| # create IOBuffer and seek geometry binary data | ||
| buff = wkbgeombuffer(row, geomcolumn) | ||
|
|
||
| geom = wkb2geom(buff, crs) | ||
|
|
||
| # returns a tuple of the corresponding aspatial attributes and the geometries for each row in the feature table | ||
| return (NamedTuple(values), geom) | ||
| end | ||
|
|
||
| # aspatial attributes and geometries | ||
| getindex.(table, 1), getindex.(table, 2) | ||
| end | ||
|
|
||
| function wkbgeombuffer(row, geomcolumn) | ||
| # get the column of SQL Geometry Binary specified by gpkg_geometry_columns table in column_name field | ||
| buff = IOBuffer(getproperty(row, Symbol(geomcolumn))) | ||
|
|
||
| # According to https://www.geopackage.org/spec/#r19 | ||
| # A GeoPackage SHALL store feature table geometries in SQL BLOBs using the Standard GeoPackageBinary format | ||
| # check the GeoPackageBinaryHeader for the first byte[2] to be 'GP' in ASCII | ||
| read(buff, UInt16) == 0x5047 || @warn "Missing magic 'GP' string in GPkgBinaryGeometry" | ||
|
|
||
| # byte[1] version: 8-bit unsigned integer, 0 = version 1 | ||
| read(buff, UInt8) | ||
|
|
||
| # bit layout of GeoPackageBinary flags byte | ||
| # https://www.geopackage.org/spec/#flags_layout | ||
| # --------------------------------------- | ||
| # bit # 7 # 6 # 5 # 4 # 3 # 2 # 1 # 0 # | ||
| # use # R # R # X # Y # E # E # E # B # | ||
| # --------------------------------------- | ||
| # R: reserved for future use; set to 0 | ||
| # X: GeoPackageBinary type | ||
| # Y: empty geometry flag | ||
| # E: envelope contents indicator code (3-bit unsigned integer) | ||
| # B: byte order for SRS_ID and envelope values in header | ||
| flag = read(buff, UInt8) | ||
|
|
||
| # 0x07 is a 3-bit mask 0x00001110 | ||
| # left-shift moves the 3-bit mask by one to align with E bits in flag layout | ||
| # bitwise AND operation isolates the E bits | ||
| # right-shift moves the E bits by one to align with the least significant bits | ||
| # results in a 3-bit unsigned integer | ||
| envelope = (flag & (0x07 << 1)) >> 1 | ||
|
|
||
| # calculate GeoPackageBinaryHeader size in byte stream given extent of envelope: | ||
| # envelope is [minx, maxx, miny, maxy, minz, maxz], 48 bytes or envelope is [minx, maxx, miny, maxy], 32 bytes or no envelope, 0 bytes | ||
| # byte[2] magic + byte[1] version + byte[1] flag + byte[4] srs_id + byte[(8*2)×(x,y{,z})] envelope | ||
| skiplen = iszero(envelope) ? 4 : 4 + 8 * 2 * (envelope + 1) | ||
|
|
||
| # Skip reading the double[] envelope and start reading Well-Known Binary geometry | ||
| skip(buff, skiplen) | ||
|
Comment on lines
+168
to
+171
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Asked a question here that was not addressed again... |
||
| end | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,106 @@ | ||
| # ------------------------------------------------------------------ | ||
| # Licensed under the MIT License. See LICENSE in the project root. | ||
| # ------------------------------------------------------------------ | ||
|
|
||
| function wkb2geom(buff, crs) | ||
| byteswap = isone(read(buff, UInt8)) ? ltoh : ntoh | ||
| wkbtype = read(buff, UInt32) | ||
| # Input variants of WKB supported are standard, extended, and ISO WKB geometry with Z dimensions (M/ZM not supported) | ||
| # SQL/MM Part 3 and SFSQL 1.2 use offsets of 1000 (Z), 2000 (M) and 3000 (ZM) | ||
| # to indicate the present of higher dimensional coordinates in a WKB geometry | ||
| if wkbtype >= 1001 && wkbtype <= 1007 | ||
| # the SFSQL 1.2 offset of 1000 (Z) is present and subtracting a round number of 1000 gives the standard WKB type | ||
| wkbtype -= UInt32(1000) | ||
| # 99-402 was a short-lived extension to SFSQL 1.1 that used a high-bit flag to indicate the presence of Z coordinates in a WKB geometry | ||
| # the high-bit flag 0x80000000 for Z (or 0x40000000 for M) is set and masking it off gives the standard WKB type | ||
| elseif wkbtype > 0x80000000 | ||
| # the SFSQL 1.1 high-bit flag 0x80000000 (Z) is present and removing the flag reveals the standard WKB type | ||
| wkbtype -= 0x80000000 | ||
| end | ||
| if wkbtype <= 3 | ||
| # 0 - 3 [Geometry, Point, Linestring, Polygon] | ||
| wkb2single(buff, crs, wkbtype, byteswap) | ||
| else | ||
| # 4 - 7 [MultiPoint, MultiLinestring, MultiPolygon, GeometryCollection] | ||
| wkb2multi(buff, crs, byteswap) | ||
| end | ||
| end | ||
|
|
||
| # read single features from Well-Known Binary IO Buffer and return Concrete Geometry | ||
| function wkb2single(buff, crs, wkbtype, byteswap) | ||
| if wkbtype == 1 | ||
| wkb2point(buff, crs, byteswap) | ||
| elseif wkbtype == 2 | ||
| wkb2chain(buff, crs, byteswap) | ||
| elseif wkbtype == 3 | ||
| wkb2poly(buff, crs, byteswap) | ||
| else | ||
| error("Unsupported WKB Geometry Type: $wkbtype") | ||
| end | ||
| end | ||
|
|
||
| function wkb2point(buff, crs, byteswap) | ||
| coordinates = wkb2coords(buff, crs, byteswap) | ||
| Point(referencecoords(coordinates, crs)) | ||
| end | ||
|
|
||
| function wkb2coords(buff, crs, byteswap) | ||
| if CoordRefSystems.ncoords(crs) == 2 | ||
| x = byteswap(read(buff, Float64)) | ||
| y = byteswap(read(buff, Float64)) | ||
| return (x, y) | ||
| elseif CoordRefSystems.ncoords(crs) == 3 | ||
| x = byteswap(read(buff, Float64)) | ||
| y = byteswap(read(buff, Float64)) | ||
| z = byteswap(read(buff, Float64)) | ||
| return (x, y, z) | ||
| end | ||
| end | ||
|
|
||
| function referencecoords(coordinates, crs) | ||
| if crs <: LatLon | ||
| crs(coordinates[2], coordinates[1]) | ||
| elseif crs <: LatLonAlt | ||
| crs(coordinates[2], coordinates[1], coordinates[3]) | ||
| else | ||
| crs(coordinates...) | ||
| end | ||
| end | ||
|
|
||
| function wkb2points(buff, npoints, crs, byteswap) | ||
| map(1:npoints) do _ | ||
| coordinates = wkb2coords(buff, crs, byteswap) | ||
| Point(referencecoords(coordinates, crs)) | ||
| end | ||
| end | ||
|
|
||
| function wkb2chain(buff, crs, byteswap) | ||
| npoints = byteswap(read(buff, UInt32)) | ||
| chain = wkb2points(buff, npoints, crs, byteswap) | ||
| if length(chain) >= 2 && first(chain) == last(chain) | ||
| Ring(chain[1:(end - 1)]) | ||
| elseif length(chain) >= 2 | ||
| Rope(chain) | ||
| else | ||
| # single point or closed single point | ||
| Ring(chain) | ||
| end | ||
| end | ||
|
|
||
| function wkb2poly(buff, crs, byteswap) | ||
| nrings = byteswap(read(buff, UInt32)) | ||
| rings = map(1:nrings) do _ | ||
| wkb2chain(buff, crs, byteswap) | ||
| end | ||
| PolyArea(rings) | ||
| end | ||
|
|
||
| function wkb2multi(buff, crs, byteswap) | ||
| ngeoms = byteswap(read(buff, UInt32)) | ||
| geoms = map(1:ngeoms) do _ | ||
| wkbbswap = isone(read(buff, UInt8)) ? ltoh : ntoh | ||
| wkbtype = read(buff, UInt32) | ||
| wkb2single(buff, crs, wkbtype, wkbbswap) | ||
| end | ||
| Multi(geoms) | ||
| end |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we really need both dependencies? Could you please elaborate on the rationale for using SQLite.jl and DBInterface.jl in different places of the code? Convenience? DBInterface.jl provides higher-level constructs that justify its use?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
SQLite.execute(db, stmt)executes but returns nothing