vector2dggs 0.6.3__tar.gz → 0.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vector2dggs-0.6.3 → vector2dggs-0.9.0}/PKG-INFO +54 -15
- {vector2dggs-0.6.3 → vector2dggs-0.9.0}/README.md +49 -12
- {vector2dggs-0.6.3 → vector2dggs-0.9.0}/pyproject.toml +5 -3
- vector2dggs-0.9.0/vector2dggs/__init__.py +1 -0
- {vector2dggs-0.6.3 → vector2dggs-0.9.0}/vector2dggs/cli.py +4 -0
- {vector2dggs-0.6.3 → vector2dggs-0.9.0}/vector2dggs/common.py +39 -41
- vector2dggs-0.9.0/vector2dggs/constants.py +75 -0
- vector2dggs-0.9.0/vector2dggs/geohash.py +240 -0
- {vector2dggs-0.6.3 → vector2dggs-0.9.0}/vector2dggs/h3.py +15 -11
- {vector2dggs-0.6.3 → vector2dggs-0.9.0}/vector2dggs/katana.py +26 -20
- {vector2dggs-0.6.3 → vector2dggs-0.9.0}/vector2dggs/rHP.py +24 -25
- vector2dggs-0.9.0/vector2dggs/s2.py +349 -0
- vector2dggs-0.6.3/vector2dggs/__init__.py +0 -1
- vector2dggs-0.6.3/vector2dggs/constants.py +0 -26
- {vector2dggs-0.6.3 → vector2dggs-0.9.0}/COPYING +0 -0
- {vector2dggs-0.6.3 → vector2dggs-0.9.0}/COPYING.LESSER +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vector2dggs
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.9.0
|
4
4
|
Summary: CLI DGGS indexer for vector geospatial data
|
5
5
|
Home-page: https://github.com/manaakiwhenua/vector2dggs
|
6
6
|
License: LGPL-3.0-or-later
|
@@ -29,8 +29,10 @@ Requires-Dist: pillow (>=11.2.1,<12.0.0)
|
|
29
29
|
Requires-Dist: psycopg2 (>=2.9.9,<3.0.0)
|
30
30
|
Requires-Dist: pyarrow (>=20.0,<21.0)
|
31
31
|
Requires-Dist: pyproj (>=3.7,<4.0)
|
32
|
-
Requires-Dist:
|
33
|
-
Requires-Dist: rhppandas (>=0.
|
32
|
+
Requires-Dist: python-geohash (>=0.8.5,<0.9.0)
|
33
|
+
Requires-Dist: rhppandas (>=0.2.0,<0.3.0)
|
34
|
+
Requires-Dist: rusty-polygon-geohasher (>=0.2.3,<0.3.0)
|
35
|
+
Requires-Dist: s2geometry (>=0.9.0,<0.10.0)
|
34
36
|
Requires-Dist: shapely (>=2.1,<3.0)
|
35
37
|
Requires-Dist: sqlalchemy (>=2.0.32,<3.0.0)
|
36
38
|
Requires-Dist: tqdm (>=4.67,<5.0)
|
@@ -47,8 +49,13 @@ This is the vector equivalent of [raster2dggs](https://github.com/manaakiwhenua/
|
|
47
49
|
|
48
50
|
Currently this tool supports the following DGGSs:
|
49
51
|
|
50
|
-
- H3
|
51
|
-
- rHEALPix
|
52
|
+
- [H3](https://h3geo.org/)
|
53
|
+
- [rHEALPix](https://datastore.landcareresearch.co.nz/dataset/rhealpix-discrete-global-grid-system)
|
54
|
+
- [S2](https://s2geometry.io/)
|
55
|
+
|
56
|
+
... and the following geocode systems:
|
57
|
+
|
58
|
+
- [Geohash](https://en.wikipedia.org/wiki/Geohash) (points, polygons)
|
52
59
|
|
53
60
|
Contributions (espeically for other DGGSs), suggestions, bug reports and strongly worded letters are all welcome.
|
54
61
|
|
@@ -63,7 +70,8 @@ pip install vector2dggs
|
|
63
70
|
## Usage
|
64
71
|
|
65
72
|
```bash
|
66
|
-
vector2dggs --help
|
73
|
+
vector2dggs --help
|
74
|
+
|
67
75
|
Usage: vector2dggs [OPTIONS] COMMAND [ARGS]...
|
68
76
|
|
69
77
|
Options:
|
@@ -71,8 +79,10 @@ Options:
|
|
71
79
|
--help Show this message and exit.
|
72
80
|
|
73
81
|
Commands:
|
74
|
-
|
75
|
-
|
82
|
+
geohash Ingest a vector dataset and index it using the Geohash geocode...
|
83
|
+
h3 Ingest a vector dataset and index it to the H3 DGGS.
|
84
|
+
rhp Ingest a vector dataset and index it to the rHEALPix DGGS.
|
85
|
+
s2 Ingest a vector dataset and index it to the S2 DGGS.
|
76
86
|
```
|
77
87
|
|
78
88
|
```bash
|
@@ -119,8 +129,8 @@ Options:
|
|
119
129
|
[default: 5000; required]
|
120
130
|
-t, --threads INTEGER Amount of threads used for operation
|
121
131
|
[default: 7]
|
122
|
-
-
|
123
|
-
|
132
|
+
-lyr, --layer TEXT Name of the layer or table to read when using a
|
133
|
+
an input that supports layers or tables
|
124
134
|
-g, --geom_col TEXT Column name to use when using a spatial
|
125
135
|
database connection as input [default:
|
126
136
|
geom]
|
@@ -137,9 +147,9 @@ Options:
|
|
137
147
|
|
138
148
|
Output is in the Apache Parquet format, a directory with one file per partition.
|
139
149
|
|
140
|
-
For a quick view of your output, you can read Apache Parquet with pandas, and then use h3-pandas and geopandas to convert this into a GeoPackage or GeoParquet for visualisation in a desktop GIS, such as QGIS. The Apache Parquet output is indexed by an ID column (which you can specify), so it should be ready for two intended use-cases:
|
150
|
+
For a quick view of your output, you can read Apache Parquet with pandas, and then use tools like h3-pandas and geopandas to convert this into a GeoPackage or GeoParquet for visualisation in a desktop GIS, such as QGIS. The Apache Parquet output is indexed by an ID column (which you can specify), so it should be ready for two intended use-cases:
|
141
151
|
- Joining attribute data from the original feature-level data onto computer DGGS cells.
|
142
|
-
- Joining other data to this output on the
|
152
|
+
- Joining other data to this output on the DGGS cell ID. (The output has a column like `{dggs}_\d`, e.g. `h3_09` or `h3_12` according to the target resolution, zero-padded to account for the maximum resolution of the DGGS)
|
143
153
|
|
144
154
|
Geoparquet output (hexagon boundaries):
|
145
155
|
|
@@ -166,6 +176,34 @@ h3_12
|
|
166
176
|
>>> g.to_parquet('./output-data/parcels.12.geo.parquet')
|
167
177
|
```
|
168
178
|
|
179
|
+
An example for S2 output (using `s2sphere`):
|
180
|
+
|
181
|
+
|
182
|
+
```python
|
183
|
+
import pandas as pd
|
184
|
+
import geopandas as gpd
|
185
|
+
import s2sphere
|
186
|
+
from shapely.geometry import Polygon
|
187
|
+
|
188
|
+
RES = 18
|
189
|
+
df = pd.read_parquet(f'~/output-data/ponds-with-holes.s2.{RES}.pq')
|
190
|
+
df = df.reset_index()
|
191
|
+
|
192
|
+
def s2id_to_polygon(s2_id_hex):
|
193
|
+
cell_id = s2sphere.CellId.from_token(s2_id_hex)
|
194
|
+
cell = s2sphere.Cell(cell_id)
|
195
|
+
vertices = []
|
196
|
+
for i in range(4):
|
197
|
+
vertex = cell.get_vertex(i)
|
198
|
+
lat_lng = s2sphere.LatLng.from_point(vertex)
|
199
|
+
vertices.append((lat_lng.lng().degrees, lat_lng.lat().degrees)) # (lon, lat)
|
200
|
+
return Polygon(vertices)
|
201
|
+
|
202
|
+
df['geometry'] = df[f's2_{RES}'].apply(s2id_to_polygon)
|
203
|
+
df = gpd.GeoDataFrame(df, geometry='geometry', crs='EPSG:4326') # WGS84
|
204
|
+
df.to_parquet(f'sample-{RES}.parquet')
|
205
|
+
```
|
206
|
+
|
169
207
|
### For development
|
170
208
|
|
171
209
|
In brief, to get started:
|
@@ -175,6 +213,7 @@ In brief, to get started:
|
|
175
213
|
- If you're on Windows, `pip install gdal` may be necessary before running the subsequent commands.
|
176
214
|
- On Linux, install GDAL 3.8+ according to your platform-specific instructions, including development headers, i.e. `libgdal-dev`.
|
177
215
|
- Create the virtual environment with `poetry init`. This will install necessary dependencies.
|
216
|
+
- If the installation of `s2geometry` fails, you may require SWIG to build it. (A command like `conda install swig` or `sudo dnf install swig` depending on your platform).
|
178
217
|
- Subsequently, the virtual environment can be re-activated with `poetry shell`.
|
179
218
|
|
180
219
|
If you run `poetry install`, the CLI tool will be aliased so you can simply use `vector2dggs` rather than `poetry run vector2dggs`, which is the alternative if you do not `poetry install`.
|
@@ -199,7 +238,7 @@ vector2dggs h3 -v DEBUG -id title_no -r 12 -o ~/Downloads/nz-property-titles.gpk
|
|
199
238
|
With a PostgreSQL/PostGIS connection:
|
200
239
|
|
201
240
|
```bash
|
202
|
-
vector2dggs h3 -v DEBUG -id ogc_fid -r 9 -p 5 -t 4 --overwrite -
|
241
|
+
vector2dggs h3 -v DEBUG -id ogc_fid -r 9 -p 5 -t 4 --overwrite -lyr topo50_lake postgresql://user:password@host:port/db ./topo50_lake.parquet
|
203
242
|
```
|
204
243
|
|
205
244
|
## Citation
|
@@ -209,14 +248,14 @@ vector2dggs h3 -v DEBUG -id ogc_fid -r 9 -p 5 -t 4 --overwrite -tbl topo50_lake
|
|
209
248
|
title={{vector2dggs}},
|
210
249
|
author={Ardo, James and Law, Richard},
|
211
250
|
url={https://github.com/manaakiwhenua/vector2dggs},
|
212
|
-
version={0.
|
251
|
+
version={0.9.0},
|
213
252
|
date={2023-04-20}
|
214
253
|
}
|
215
254
|
```
|
216
255
|
|
217
256
|
APA/Harvard
|
218
257
|
|
219
|
-
> Ardo, J., & Law, R. (2023). vector2dggs (0.
|
258
|
+
> Ardo, J., & Law, R. (2023). vector2dggs (0.9.0) [Computer software]. https://github.com/manaakiwhenua/vector2dggs
|
220
259
|
|
221
260
|
[](https://github.com/manaakiwhenua/manaakiwhenua-standards)
|
222
261
|
|
@@ -8,8 +8,13 @@ This is the vector equivalent of [raster2dggs](https://github.com/manaakiwhenua/
|
|
8
8
|
|
9
9
|
Currently this tool supports the following DGGSs:
|
10
10
|
|
11
|
-
- H3
|
12
|
-
- rHEALPix
|
11
|
+
- [H3](https://h3geo.org/)
|
12
|
+
- [rHEALPix](https://datastore.landcareresearch.co.nz/dataset/rhealpix-discrete-global-grid-system)
|
13
|
+
- [S2](https://s2geometry.io/)
|
14
|
+
|
15
|
+
... and the following geocode systems:
|
16
|
+
|
17
|
+
- [Geohash](https://en.wikipedia.org/wiki/Geohash) (points, polygons)
|
13
18
|
|
14
19
|
Contributions (espeically for other DGGSs), suggestions, bug reports and strongly worded letters are all welcome.
|
15
20
|
|
@@ -24,7 +29,8 @@ pip install vector2dggs
|
|
24
29
|
## Usage
|
25
30
|
|
26
31
|
```bash
|
27
|
-
vector2dggs --help
|
32
|
+
vector2dggs --help
|
33
|
+
|
28
34
|
Usage: vector2dggs [OPTIONS] COMMAND [ARGS]...
|
29
35
|
|
30
36
|
Options:
|
@@ -32,8 +38,10 @@ Options:
|
|
32
38
|
--help Show this message and exit.
|
33
39
|
|
34
40
|
Commands:
|
35
|
-
|
36
|
-
|
41
|
+
geohash Ingest a vector dataset and index it using the Geohash geocode...
|
42
|
+
h3 Ingest a vector dataset and index it to the H3 DGGS.
|
43
|
+
rhp Ingest a vector dataset and index it to the rHEALPix DGGS.
|
44
|
+
s2 Ingest a vector dataset and index it to the S2 DGGS.
|
37
45
|
```
|
38
46
|
|
39
47
|
```bash
|
@@ -80,8 +88,8 @@ Options:
|
|
80
88
|
[default: 5000; required]
|
81
89
|
-t, --threads INTEGER Amount of threads used for operation
|
82
90
|
[default: 7]
|
83
|
-
-
|
84
|
-
|
91
|
+
-lyr, --layer TEXT Name of the layer or table to read when using a
|
92
|
+
an input that supports layers or tables
|
85
93
|
-g, --geom_col TEXT Column name to use when using a spatial
|
86
94
|
database connection as input [default:
|
87
95
|
geom]
|
@@ -98,9 +106,9 @@ Options:
|
|
98
106
|
|
99
107
|
Output is in the Apache Parquet format, a directory with one file per partition.
|
100
108
|
|
101
|
-
For a quick view of your output, you can read Apache Parquet with pandas, and then use h3-pandas and geopandas to convert this into a GeoPackage or GeoParquet for visualisation in a desktop GIS, such as QGIS. The Apache Parquet output is indexed by an ID column (which you can specify), so it should be ready for two intended use-cases:
|
109
|
+
For a quick view of your output, you can read Apache Parquet with pandas, and then use tools like h3-pandas and geopandas to convert this into a GeoPackage or GeoParquet for visualisation in a desktop GIS, such as QGIS. The Apache Parquet output is indexed by an ID column (which you can specify), so it should be ready for two intended use-cases:
|
102
110
|
- Joining attribute data from the original feature-level data onto computer DGGS cells.
|
103
|
-
- Joining other data to this output on the
|
111
|
+
- Joining other data to this output on the DGGS cell ID. (The output has a column like `{dggs}_\d`, e.g. `h3_09` or `h3_12` according to the target resolution, zero-padded to account for the maximum resolution of the DGGS)
|
104
112
|
|
105
113
|
Geoparquet output (hexagon boundaries):
|
106
114
|
|
@@ -127,6 +135,34 @@ h3_12
|
|
127
135
|
>>> g.to_parquet('./output-data/parcels.12.geo.parquet')
|
128
136
|
```
|
129
137
|
|
138
|
+
An example for S2 output (using `s2sphere`):
|
139
|
+
|
140
|
+
|
141
|
+
```python
|
142
|
+
import pandas as pd
|
143
|
+
import geopandas as gpd
|
144
|
+
import s2sphere
|
145
|
+
from shapely.geometry import Polygon
|
146
|
+
|
147
|
+
RES = 18
|
148
|
+
df = pd.read_parquet(f'~/output-data/ponds-with-holes.s2.{RES}.pq')
|
149
|
+
df = df.reset_index()
|
150
|
+
|
151
|
+
def s2id_to_polygon(s2_id_hex):
|
152
|
+
cell_id = s2sphere.CellId.from_token(s2_id_hex)
|
153
|
+
cell = s2sphere.Cell(cell_id)
|
154
|
+
vertices = []
|
155
|
+
for i in range(4):
|
156
|
+
vertex = cell.get_vertex(i)
|
157
|
+
lat_lng = s2sphere.LatLng.from_point(vertex)
|
158
|
+
vertices.append((lat_lng.lng().degrees, lat_lng.lat().degrees)) # (lon, lat)
|
159
|
+
return Polygon(vertices)
|
160
|
+
|
161
|
+
df['geometry'] = df[f's2_{RES}'].apply(s2id_to_polygon)
|
162
|
+
df = gpd.GeoDataFrame(df, geometry='geometry', crs='EPSG:4326') # WGS84
|
163
|
+
df.to_parquet(f'sample-{RES}.parquet')
|
164
|
+
```
|
165
|
+
|
130
166
|
### For development
|
131
167
|
|
132
168
|
In brief, to get started:
|
@@ -136,6 +172,7 @@ In brief, to get started:
|
|
136
172
|
- If you're on Windows, `pip install gdal` may be necessary before running the subsequent commands.
|
137
173
|
- On Linux, install GDAL 3.8+ according to your platform-specific instructions, including development headers, i.e. `libgdal-dev`.
|
138
174
|
- Create the virtual environment with `poetry init`. This will install necessary dependencies.
|
175
|
+
- If the installation of `s2geometry` fails, you may require SWIG to build it. (A command like `conda install swig` or `sudo dnf install swig` depending on your platform).
|
139
176
|
- Subsequently, the virtual environment can be re-activated with `poetry shell`.
|
140
177
|
|
141
178
|
If you run `poetry install`, the CLI tool will be aliased so you can simply use `vector2dggs` rather than `poetry run vector2dggs`, which is the alternative if you do not `poetry install`.
|
@@ -160,7 +197,7 @@ vector2dggs h3 -v DEBUG -id title_no -r 12 -o ~/Downloads/nz-property-titles.gpk
|
|
160
197
|
With a PostgreSQL/PostGIS connection:
|
161
198
|
|
162
199
|
```bash
|
163
|
-
vector2dggs h3 -v DEBUG -id ogc_fid -r 9 -p 5 -t 4 --overwrite -
|
200
|
+
vector2dggs h3 -v DEBUG -id ogc_fid -r 9 -p 5 -t 4 --overwrite -lyr topo50_lake postgresql://user:password@host:port/db ./topo50_lake.parquet
|
164
201
|
```
|
165
202
|
|
166
203
|
## Citation
|
@@ -170,13 +207,13 @@ vector2dggs h3 -v DEBUG -id ogc_fid -r 9 -p 5 -t 4 --overwrite -tbl topo50_lake
|
|
170
207
|
title={{vector2dggs}},
|
171
208
|
author={Ardo, James and Law, Richard},
|
172
209
|
url={https://github.com/manaakiwhenua/vector2dggs},
|
173
|
-
version={0.
|
210
|
+
version={0.9.0},
|
174
211
|
date={2023-04-20}
|
175
212
|
}
|
176
213
|
```
|
177
214
|
|
178
215
|
APA/Harvard
|
179
216
|
|
180
|
-
> Ardo, J., & Law, R. (2023). vector2dggs (0.
|
217
|
+
> Ardo, J., & Law, R. (2023). vector2dggs (0.9.0) [Computer software]. https://github.com/manaakiwhenua/vector2dggs
|
181
218
|
|
182
219
|
[](https://github.com/manaakiwhenua/manaakiwhenua-standards)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "vector2dggs"
|
3
|
-
version = "0.
|
3
|
+
version = "0.9.0"
|
4
4
|
description = "CLI DGGS indexer for vector geospatial data"
|
5
5
|
authors = ["James Ardo <ardoj@landcareresearch.co.nz>"]
|
6
6
|
maintainers = ["Richard Law <lawr@landcareresearch.co.nz>"]
|
@@ -30,9 +30,11 @@ sqlalchemy = "^2.0.32"
|
|
30
30
|
psycopg2 = "^2.9.9"
|
31
31
|
shapely = "^2.1"
|
32
32
|
numpy = "^2"
|
33
|
-
rhppandas = "^0.
|
34
|
-
rhealpixdggs = "^0.5.5"
|
33
|
+
rhppandas = "^0.2.0"
|
35
34
|
pillow = "^11.2.1"
|
35
|
+
s2geometry = "^0.9.0"
|
36
|
+
rusty-polygon-geohasher = "^0.2.3"
|
37
|
+
python-geohash = "^0.8.5"
|
36
38
|
|
37
39
|
[tool.poetry.group.dev.dependencies]
|
38
40
|
pytest = "^7.2.2"
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__: str = "0.9.0"
|
@@ -3,6 +3,8 @@ import click
|
|
3
3
|
from vector2dggs import __version__
|
4
4
|
from vector2dggs.h3 import h3
|
5
5
|
from vector2dggs.rHP import rhp
|
6
|
+
from vector2dggs.s2 import s2
|
7
|
+
from vector2dggs.geohash import geohash
|
6
8
|
|
7
9
|
# If the program does terminal interaction, make it output a short
|
8
10
|
# notice like this when it starts in an interactive mode:
|
@@ -21,6 +23,8 @@ def cli():
|
|
21
23
|
|
22
24
|
cli.add_command(h3)
|
23
25
|
cli.add_command(rhp)
|
26
|
+
cli.add_command(s2)
|
27
|
+
cli.add_command(geohash)
|
24
28
|
|
25
29
|
|
26
30
|
def main():
|
@@ -104,13 +104,15 @@ def drop_condition(
|
|
104
104
|
_diff = _before - _after
|
105
105
|
if _diff:
|
106
106
|
log_method = (
|
107
|
-
LOGGER.info
|
107
|
+
LOGGER.info
|
108
|
+
if (_diff / float(_before)) < warning_threshold
|
109
|
+
else LOGGER.warning
|
108
110
|
)
|
109
111
|
log_method(f"Dropped {_diff} rows ({_diff/float(_before)*100:.2f}%)")
|
110
112
|
return df
|
111
113
|
|
112
114
|
|
113
|
-
def get_parent_res(dggs: str, parent_res: Union[None,
|
115
|
+
def get_parent_res(dggs: str, parent_res: Union[None, str], resolution: int) -> int:
|
114
116
|
"""
|
115
117
|
Uses a parent resolution,
|
116
118
|
OR,
|
@@ -118,22 +120,17 @@ def get_parent_res(dggs: str, parent_res: Union[None, int], resolution: int):
|
|
118
120
|
|
119
121
|
Used for intermediate re-partioning.
|
120
122
|
"""
|
121
|
-
if dggs
|
122
|
-
return (
|
123
|
-
parent_res
|
124
|
-
if parent_res is not None
|
125
|
-
else max(const.MIN_H3, (resolution - const.DEFAULT_PARENT_OFFSET))
|
126
|
-
)
|
127
|
-
elif dggs == "rhp":
|
128
|
-
return (
|
129
|
-
parent_res
|
130
|
-
if parent_res is not None
|
131
|
-
else max(const.MIN_RHP, (resolution - const.DEFAULT_PARENT_OFFSET))
|
132
|
-
)
|
133
|
-
else:
|
123
|
+
if not dggs in const.DEFAULT_DGGS_PARENT_RES.keys():
|
134
124
|
raise RuntimeError(
|
135
|
-
"Unknown dggs {dggs}) - must be one of [
|
125
|
+
"Unknown dggs {dggs}) - must be one of [ {options} ]".format(
|
126
|
+
dggs=dggs, options=", ".join(const.DEFAULT_DGGS_PARENT_RES.keys())
|
127
|
+
)
|
136
128
|
)
|
129
|
+
return (
|
130
|
+
int(parent_res)
|
131
|
+
if parent_res is not None
|
132
|
+
else const.DEFAULT_DGGS_PARENT_RES[dggs](resolution)
|
133
|
+
)
|
137
134
|
|
138
135
|
|
139
136
|
def parent_partitioning(
|
@@ -141,10 +138,9 @@ def parent_partitioning(
|
|
141
138
|
input_dir: Path,
|
142
139
|
output_dir: Path,
|
143
140
|
resolution: int,
|
144
|
-
parent_res:
|
141
|
+
parent_res: int,
|
145
142
|
**kwargs,
|
146
143
|
) -> None:
|
147
|
-
parent_res: int = get_parent_res(dggs, parent_res, resolution)
|
148
144
|
partition_col = f"{dggs}_{parent_res:02}"
|
149
145
|
|
150
146
|
with TqdmCallback(desc="Repartitioning"):
|
@@ -174,30 +170,29 @@ def polyfill(
|
|
174
170
|
pq_in: Path,
|
175
171
|
spatial_sort_col: str,
|
176
172
|
resolution: int,
|
177
|
-
parent_res:
|
173
|
+
parent_res: int,
|
178
174
|
output_directory: str,
|
179
175
|
) -> None:
|
180
176
|
"""
|
181
177
|
Reads a geoparquet, performs polyfilling (for Polygon),
|
182
|
-
linetracing (for LineString),
|
178
|
+
linetracing (for LineString), or indexing (for Point),
|
179
|
+
and writes out to parquet.
|
183
180
|
"""
|
184
|
-
df = gpd.read_parquet(pq_in).reset_index()
|
181
|
+
df = gpd.read_parquet(pq_in).reset_index()
|
182
|
+
if spatial_sort_col != "none":
|
183
|
+
df = df.drop(columns=[spatial_sort_col])
|
185
184
|
if len(df.index) == 0:
|
186
|
-
# Input is empty, nothing to
|
185
|
+
# Input is empty, nothing to convert
|
187
186
|
return None
|
188
187
|
|
189
|
-
# DGGS specific
|
188
|
+
# DGGS specific conversion
|
190
189
|
df = dggsfunc(df, resolution)
|
191
190
|
|
192
191
|
if len(df.index) == 0:
|
193
|
-
#
|
192
|
+
# Conversion resulted in empty output (e.g. large cell, small feature)
|
194
193
|
return None
|
195
194
|
|
196
195
|
df.index.rename(f"{dggs}_{resolution:02}", inplace=True)
|
197
|
-
parent_res: int = get_parent_res(dggs, parent_res, resolution)
|
198
|
-
# print(parent_res)
|
199
|
-
# print(df.index)
|
200
|
-
# print(df.columns)
|
201
196
|
|
202
197
|
# Secondary (parent) index, used later for partitioning
|
203
198
|
df = secondary_index_func(df, parent_res)
|
@@ -228,22 +223,23 @@ def index(
|
|
228
223
|
id_field: str = None,
|
229
224
|
cut_crs: pyproj.CRS = None,
|
230
225
|
con: SQLConnectionType = None,
|
231
|
-
|
226
|
+
layer: str = None,
|
232
227
|
geom_col: str = "geom",
|
233
228
|
overwrite: bool = False,
|
234
229
|
) -> Path:
|
235
230
|
"""
|
236
|
-
Performs multi-threaded
|
231
|
+
Performs multi-threaded DGGS indexing on geometries (including multipart and collections).
|
237
232
|
"""
|
233
|
+
parent_res = get_parent_res(dggs, parent_res, resolution)
|
238
234
|
|
239
|
-
if
|
235
|
+
if layer and con:
|
240
236
|
# Database connection
|
241
237
|
if keep_attributes:
|
242
|
-
q = sqlalchemy.text(f"SELECT * FROM {
|
238
|
+
q = sqlalchemy.text(f"SELECT * FROM {layer}")
|
243
239
|
elif id_field and not keep_attributes:
|
244
|
-
q = sqlalchemy.text(f"SELECT {id_field}, {geom_col} FROM {
|
240
|
+
q = sqlalchemy.text(f"SELECT {id_field}, {geom_col} FROM {layer}")
|
245
241
|
else:
|
246
|
-
q = sqlalchemy.text(f"SELECT {geom_col} FROM {
|
242
|
+
q = sqlalchemy.text(f"SELECT {geom_col} FROM {layer}")
|
247
243
|
df = gpd.read_postgis(q, con.connect(), geom_col=geom_col).rename_geometry(
|
248
244
|
"geometry"
|
249
245
|
)
|
@@ -291,7 +287,8 @@ def index(
|
|
291
287
|
"index": lambda frame: frame[
|
292
288
|
(frame.geometry.geom_type != "Polygon")
|
293
289
|
& (frame.geometry.geom_type != "LineString")
|
294
|
-
|
290
|
+
& (frame.geometry.geom_type != "Point")
|
291
|
+
],
|
295
292
|
"message": "Considering unsupported geometries",
|
296
293
|
},
|
297
294
|
]
|
@@ -300,11 +297,12 @@ def index(
|
|
300
297
|
|
301
298
|
ddf = dgpd.from_geopandas(df, chunksize=max(1, chunksize), sort=True)
|
302
299
|
|
303
|
-
|
304
|
-
|
300
|
+
if spatial_sorting != "none":
|
301
|
+
LOGGER.debug("Spatially sorting and partitioning (%s)", spatial_sorting)
|
302
|
+
ddf = ddf.spatial_shuffle(by=spatial_sorting)
|
305
303
|
spatial_sort_col = (
|
306
304
|
spatial_sorting
|
307
|
-
if spatial_sorting == "geohash"
|
305
|
+
if (spatial_sorting == "geohash" or spatial_sorting == "none")
|
308
306
|
else f"{spatial_sorting}_distance"
|
309
307
|
)
|
310
308
|
|
@@ -314,9 +312,9 @@ def index(
|
|
314
312
|
|
315
313
|
filepaths = list(map(lambda f: f.absolute(), Path(tmpdir).glob("*")))
|
316
314
|
|
317
|
-
# Multithreaded
|
315
|
+
# Multithreaded DGGS indexing
|
318
316
|
LOGGER.debug(
|
319
|
-
"
|
317
|
+
"DGGS indexing by spatial partitions with resolution: %d",
|
320
318
|
resolution,
|
321
319
|
)
|
322
320
|
with tempfile.TemporaryDirectory(suffix=".parquet") as tmpdir2:
|
@@ -344,7 +342,7 @@ def index(
|
|
344
342
|
|
345
343
|
parent_partitioning(
|
346
344
|
dggs,
|
347
|
-
tmpdir2,
|
345
|
+
Path(tmpdir2),
|
348
346
|
output_directory,
|
349
347
|
resolution,
|
350
348
|
parent_res,
|
@@ -0,0 +1,75 @@
|
|
1
|
+
import multiprocessing
|
2
|
+
import warnings
|
3
|
+
import tempfile
|
4
|
+
|
5
|
+
|
6
|
+
MIN_H3, MAX_H3 = 0, 15
|
7
|
+
MIN_RHP, MAX_RHP = 0, 15
|
8
|
+
MIN_S2, MAX_S2 = 0, 30
|
9
|
+
MIN_GEOHASH, MAX_GEOHASH = 1, 12
|
10
|
+
|
11
|
+
DEFAULTS = {
|
12
|
+
"id": None,
|
13
|
+
"k": False,
|
14
|
+
"ch": 50,
|
15
|
+
"s": "none",
|
16
|
+
"crs": None,
|
17
|
+
"c": 5000,
|
18
|
+
"t": (multiprocessing.cpu_count() - 1),
|
19
|
+
"lyr": None,
|
20
|
+
"g": "geom",
|
21
|
+
"tempdir": tempfile.tempdir,
|
22
|
+
}
|
23
|
+
|
24
|
+
SPATIAL_SORTING_METHODS = ["hilbert", "morton", "geohash", "none"]
|
25
|
+
|
26
|
+
DEFAULT_DGGS_PARENT_RES = {
|
27
|
+
"h3": lambda resolution: max(MIN_H3, (resolution - DEFAULT_PARENT_OFFSET)),
|
28
|
+
"rhp": lambda resolution: max(MIN_RHP, (resolution - DEFAULT_PARENT_OFFSET)),
|
29
|
+
"geohash": lambda resolution: max(
|
30
|
+
MIN_GEOHASH, (resolution - DEFAULT_PARENT_OFFSET)
|
31
|
+
),
|
32
|
+
"s2": lambda resolution: max(MIN_S2, (resolution - DEFAULT_PARENT_OFFSET)),
|
33
|
+
}
|
34
|
+
|
35
|
+
DEFAULT_PARENT_OFFSET = 6
|
36
|
+
|
37
|
+
# http://s2geometry.io/resources/s2cell_statistics.html
|
38
|
+
S2_CELLS_MAX_AREA_M2_BY_LEVEL = {
|
39
|
+
0: 85011012.19 * 1e6,
|
40
|
+
1: 21252753.05 * 1e6,
|
41
|
+
2: 6026521.16 * 1e6,
|
42
|
+
3: 1646455.50 * 1e6,
|
43
|
+
4: 413918.15 * 1e6,
|
44
|
+
5: 104297.91 * 1e6,
|
45
|
+
6: 26113.30 * 1e6,
|
46
|
+
7: 6529.09 * 1e6,
|
47
|
+
8: 1632.45 * 1e6,
|
48
|
+
9: 408.12 * 1e6,
|
49
|
+
10: 102.03 * 1e6,
|
50
|
+
11: 25.51 * 1e6,
|
51
|
+
12: 6.38 * 1e6,
|
52
|
+
13: 1.59 * 1e6,
|
53
|
+
14: 0.40 * 1e6,
|
54
|
+
15: 99638.93,
|
55
|
+
16: 24909.73,
|
56
|
+
17: 6227.43,
|
57
|
+
18: 1556.86,
|
58
|
+
19: 389.22,
|
59
|
+
20: 97.30,
|
60
|
+
21: 24.33,
|
61
|
+
22: 6.08,
|
62
|
+
23: 1.52,
|
63
|
+
24: 0.38,
|
64
|
+
25: 950.23 * 1e-4,
|
65
|
+
26: 237.56 * 1e-4,
|
66
|
+
27: 59.39 * 1e-4,
|
67
|
+
28: 14.85 * 1e-4,
|
68
|
+
29: 3.71 * 1e-4,
|
69
|
+
30: 0.93 * 1e-4,
|
70
|
+
}
|
71
|
+
|
72
|
+
|
73
|
+
warnings.filterwarnings(
|
74
|
+
"ignore"
|
75
|
+
) # This is to filter out the polyfill warnings when rows failed to get indexed at a resolution, can be commented out to find missing rows
|