vector2dggs 0.6.3__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vector2dggs/__init__.py +1 -1
- vector2dggs/cli.py +4 -0
- vector2dggs/common.py +39 -41
- vector2dggs/constants.py +51 -2
- vector2dggs/geohash.py +240 -0
- vector2dggs/h3.py +15 -11
- vector2dggs/katana.py +26 -20
- vector2dggs/rHP.py +24 -25
- vector2dggs/s2.py +349 -0
- {vector2dggs-0.6.3.dist-info → vector2dggs-0.9.0.dist-info}/METADATA +54 -15
- vector2dggs-0.9.0.dist-info/RECORD +15 -0
- vector2dggs-0.6.3.dist-info/RECORD +0 -13
- {vector2dggs-0.6.3.dist-info → vector2dggs-0.9.0.dist-info}/COPYING +0 -0
- {vector2dggs-0.6.3.dist-info → vector2dggs-0.9.0.dist-info}/COPYING.LESSER +0 -0
- {vector2dggs-0.6.3.dist-info → vector2dggs-0.9.0.dist-info}/WHEEL +0 -0
- {vector2dggs-0.6.3.dist-info → vector2dggs-0.9.0.dist-info}/entry_points.txt +0 -0
vector2dggs/__init__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__: str = "0.
|
1
|
+
__version__: str = "0.9.0"
|
vector2dggs/cli.py
CHANGED
@@ -3,6 +3,8 @@ import click
|
|
3
3
|
from vector2dggs import __version__
|
4
4
|
from vector2dggs.h3 import h3
|
5
5
|
from vector2dggs.rHP import rhp
|
6
|
+
from vector2dggs.s2 import s2
|
7
|
+
from vector2dggs.geohash import geohash
|
6
8
|
|
7
9
|
# If the program does terminal interaction, make it output a short
|
8
10
|
# notice like this when it starts in an interactive mode:
|
@@ -21,6 +23,8 @@ def cli():
|
|
21
23
|
|
22
24
|
cli.add_command(h3)
|
23
25
|
cli.add_command(rhp)
|
26
|
+
cli.add_command(s2)
|
27
|
+
cli.add_command(geohash)
|
24
28
|
|
25
29
|
|
26
30
|
def main():
|
vector2dggs/common.py
CHANGED
@@ -104,13 +104,15 @@ def drop_condition(
|
|
104
104
|
_diff = _before - _after
|
105
105
|
if _diff:
|
106
106
|
log_method = (
|
107
|
-
LOGGER.info
|
107
|
+
LOGGER.info
|
108
|
+
if (_diff / float(_before)) < warning_threshold
|
109
|
+
else LOGGER.warning
|
108
110
|
)
|
109
111
|
log_method(f"Dropped {_diff} rows ({_diff/float(_before)*100:.2f}%)")
|
110
112
|
return df
|
111
113
|
|
112
114
|
|
113
|
-
def get_parent_res(dggs: str, parent_res: Union[None,
|
115
|
+
def get_parent_res(dggs: str, parent_res: Union[None, str], resolution: int) -> int:
|
114
116
|
"""
|
115
117
|
Uses a parent resolution,
|
116
118
|
OR,
|
@@ -118,22 +120,17 @@ def get_parent_res(dggs: str, parent_res: Union[None, int], resolution: int):
|
|
118
120
|
|
119
121
|
Used for intermediate re-partioning.
|
120
122
|
"""
|
121
|
-
if dggs
|
122
|
-
return (
|
123
|
-
parent_res
|
124
|
-
if parent_res is not None
|
125
|
-
else max(const.MIN_H3, (resolution - const.DEFAULT_PARENT_OFFSET))
|
126
|
-
)
|
127
|
-
elif dggs == "rhp":
|
128
|
-
return (
|
129
|
-
parent_res
|
130
|
-
if parent_res is not None
|
131
|
-
else max(const.MIN_RHP, (resolution - const.DEFAULT_PARENT_OFFSET))
|
132
|
-
)
|
133
|
-
else:
|
123
|
+
if not dggs in const.DEFAULT_DGGS_PARENT_RES.keys():
|
134
124
|
raise RuntimeError(
|
135
|
-
"Unknown dggs {dggs}) - must be one of [
|
125
|
+
"Unknown dggs {dggs}) - must be one of [ {options} ]".format(
|
126
|
+
dggs=dggs, options=", ".join(const.DEFAULT_DGGS_PARENT_RES.keys())
|
127
|
+
)
|
136
128
|
)
|
129
|
+
return (
|
130
|
+
int(parent_res)
|
131
|
+
if parent_res is not None
|
132
|
+
else const.DEFAULT_DGGS_PARENT_RES[dggs](resolution)
|
133
|
+
)
|
137
134
|
|
138
135
|
|
139
136
|
def parent_partitioning(
|
@@ -141,10 +138,9 @@ def parent_partitioning(
|
|
141
138
|
input_dir: Path,
|
142
139
|
output_dir: Path,
|
143
140
|
resolution: int,
|
144
|
-
parent_res:
|
141
|
+
parent_res: int,
|
145
142
|
**kwargs,
|
146
143
|
) -> None:
|
147
|
-
parent_res: int = get_parent_res(dggs, parent_res, resolution)
|
148
144
|
partition_col = f"{dggs}_{parent_res:02}"
|
149
145
|
|
150
146
|
with TqdmCallback(desc="Repartitioning"):
|
@@ -174,30 +170,29 @@ def polyfill(
|
|
174
170
|
pq_in: Path,
|
175
171
|
spatial_sort_col: str,
|
176
172
|
resolution: int,
|
177
|
-
parent_res:
|
173
|
+
parent_res: int,
|
178
174
|
output_directory: str,
|
179
175
|
) -> None:
|
180
176
|
"""
|
181
177
|
Reads a geoparquet, performs polyfilling (for Polygon),
|
182
|
-
linetracing (for LineString),
|
178
|
+
linetracing (for LineString), or indexing (for Point),
|
179
|
+
and writes out to parquet.
|
183
180
|
"""
|
184
|
-
df = gpd.read_parquet(pq_in).reset_index()
|
181
|
+
df = gpd.read_parquet(pq_in).reset_index()
|
182
|
+
if spatial_sort_col != "none":
|
183
|
+
df = df.drop(columns=[spatial_sort_col])
|
185
184
|
if len(df.index) == 0:
|
186
|
-
# Input is empty, nothing to
|
185
|
+
# Input is empty, nothing to convert
|
187
186
|
return None
|
188
187
|
|
189
|
-
# DGGS specific
|
188
|
+
# DGGS specific conversion
|
190
189
|
df = dggsfunc(df, resolution)
|
191
190
|
|
192
191
|
if len(df.index) == 0:
|
193
|
-
#
|
192
|
+
# Conversion resulted in empty output (e.g. large cell, small feature)
|
194
193
|
return None
|
195
194
|
|
196
195
|
df.index.rename(f"{dggs}_{resolution:02}", inplace=True)
|
197
|
-
parent_res: int = get_parent_res(dggs, parent_res, resolution)
|
198
|
-
# print(parent_res)
|
199
|
-
# print(df.index)
|
200
|
-
# print(df.columns)
|
201
196
|
|
202
197
|
# Secondary (parent) index, used later for partitioning
|
203
198
|
df = secondary_index_func(df, parent_res)
|
@@ -228,22 +223,23 @@ def index(
|
|
228
223
|
id_field: str = None,
|
229
224
|
cut_crs: pyproj.CRS = None,
|
230
225
|
con: SQLConnectionType = None,
|
231
|
-
|
226
|
+
layer: str = None,
|
232
227
|
geom_col: str = "geom",
|
233
228
|
overwrite: bool = False,
|
234
229
|
) -> Path:
|
235
230
|
"""
|
236
|
-
Performs multi-threaded
|
231
|
+
Performs multi-threaded DGGS indexing on geometries (including multipart and collections).
|
237
232
|
"""
|
233
|
+
parent_res = get_parent_res(dggs, parent_res, resolution)
|
238
234
|
|
239
|
-
if
|
235
|
+
if layer and con:
|
240
236
|
# Database connection
|
241
237
|
if keep_attributes:
|
242
|
-
q = sqlalchemy.text(f"SELECT * FROM {
|
238
|
+
q = sqlalchemy.text(f"SELECT * FROM {layer}")
|
243
239
|
elif id_field and not keep_attributes:
|
244
|
-
q = sqlalchemy.text(f"SELECT {id_field}, {geom_col} FROM {
|
240
|
+
q = sqlalchemy.text(f"SELECT {id_field}, {geom_col} FROM {layer}")
|
245
241
|
else:
|
246
|
-
q = sqlalchemy.text(f"SELECT {geom_col} FROM {
|
242
|
+
q = sqlalchemy.text(f"SELECT {geom_col} FROM {layer}")
|
247
243
|
df = gpd.read_postgis(q, con.connect(), geom_col=geom_col).rename_geometry(
|
248
244
|
"geometry"
|
249
245
|
)
|
@@ -291,7 +287,8 @@ def index(
|
|
291
287
|
"index": lambda frame: frame[
|
292
288
|
(frame.geometry.geom_type != "Polygon")
|
293
289
|
& (frame.geometry.geom_type != "LineString")
|
294
|
-
|
290
|
+
& (frame.geometry.geom_type != "Point")
|
291
|
+
],
|
295
292
|
"message": "Considering unsupported geometries",
|
296
293
|
},
|
297
294
|
]
|
@@ -300,11 +297,12 @@ def index(
|
|
300
297
|
|
301
298
|
ddf = dgpd.from_geopandas(df, chunksize=max(1, chunksize), sort=True)
|
302
299
|
|
303
|
-
|
304
|
-
|
300
|
+
if spatial_sorting != "none":
|
301
|
+
LOGGER.debug("Spatially sorting and partitioning (%s)", spatial_sorting)
|
302
|
+
ddf = ddf.spatial_shuffle(by=spatial_sorting)
|
305
303
|
spatial_sort_col = (
|
306
304
|
spatial_sorting
|
307
|
-
if spatial_sorting == "geohash"
|
305
|
+
if (spatial_sorting == "geohash" or spatial_sorting == "none")
|
308
306
|
else f"{spatial_sorting}_distance"
|
309
307
|
)
|
310
308
|
|
@@ -314,9 +312,9 @@ def index(
|
|
314
312
|
|
315
313
|
filepaths = list(map(lambda f: f.absolute(), Path(tmpdir).glob("*")))
|
316
314
|
|
317
|
-
# Multithreaded
|
315
|
+
# Multithreaded DGGS indexing
|
318
316
|
LOGGER.debug(
|
319
|
-
"
|
317
|
+
"DGGS indexing by spatial partitions with resolution: %d",
|
320
318
|
resolution,
|
321
319
|
)
|
322
320
|
with tempfile.TemporaryDirectory(suffix=".parquet") as tmpdir2:
|
@@ -344,7 +342,7 @@ def index(
|
|
344
342
|
|
345
343
|
parent_partitioning(
|
346
344
|
dggs,
|
347
|
-
tmpdir2,
|
345
|
+
Path(tmpdir2),
|
348
346
|
output_directory,
|
349
347
|
resolution,
|
350
348
|
parent_res,
|
vector2dggs/constants.py
CHANGED
@@ -5,22 +5,71 @@ import tempfile
|
|
5
5
|
|
6
6
|
MIN_H3, MAX_H3 = 0, 15
|
7
7
|
MIN_RHP, MAX_RHP = 0, 15
|
8
|
+
MIN_S2, MAX_S2 = 0, 30
|
9
|
+
MIN_GEOHASH, MAX_GEOHASH = 1, 12
|
8
10
|
|
9
11
|
DEFAULTS = {
|
10
12
|
"id": None,
|
11
13
|
"k": False,
|
12
14
|
"ch": 50,
|
13
|
-
"s": "
|
15
|
+
"s": "none",
|
14
16
|
"crs": None,
|
15
17
|
"c": 5000,
|
16
18
|
"t": (multiprocessing.cpu_count() - 1),
|
17
|
-
"
|
19
|
+
"lyr": None,
|
18
20
|
"g": "geom",
|
19
21
|
"tempdir": tempfile.tempdir,
|
20
22
|
}
|
21
23
|
|
24
|
+
SPATIAL_SORTING_METHODS = ["hilbert", "morton", "geohash", "none"]
|
25
|
+
|
26
|
+
DEFAULT_DGGS_PARENT_RES = {
|
27
|
+
"h3": lambda resolution: max(MIN_H3, (resolution - DEFAULT_PARENT_OFFSET)),
|
28
|
+
"rhp": lambda resolution: max(MIN_RHP, (resolution - DEFAULT_PARENT_OFFSET)),
|
29
|
+
"geohash": lambda resolution: max(
|
30
|
+
MIN_GEOHASH, (resolution - DEFAULT_PARENT_OFFSET)
|
31
|
+
),
|
32
|
+
"s2": lambda resolution: max(MIN_S2, (resolution - DEFAULT_PARENT_OFFSET)),
|
33
|
+
}
|
34
|
+
|
22
35
|
DEFAULT_PARENT_OFFSET = 6
|
23
36
|
|
37
|
+
# http://s2geometry.io/resources/s2cell_statistics.html
|
38
|
+
S2_CELLS_MAX_AREA_M2_BY_LEVEL = {
|
39
|
+
0: 85011012.19 * 1e6,
|
40
|
+
1: 21252753.05 * 1e6,
|
41
|
+
2: 6026521.16 * 1e6,
|
42
|
+
3: 1646455.50 * 1e6,
|
43
|
+
4: 413918.15 * 1e6,
|
44
|
+
5: 104297.91 * 1e6,
|
45
|
+
6: 26113.30 * 1e6,
|
46
|
+
7: 6529.09 * 1e6,
|
47
|
+
8: 1632.45 * 1e6,
|
48
|
+
9: 408.12 * 1e6,
|
49
|
+
10: 102.03 * 1e6,
|
50
|
+
11: 25.51 * 1e6,
|
51
|
+
12: 6.38 * 1e6,
|
52
|
+
13: 1.59 * 1e6,
|
53
|
+
14: 0.40 * 1e6,
|
54
|
+
15: 99638.93,
|
55
|
+
16: 24909.73,
|
56
|
+
17: 6227.43,
|
57
|
+
18: 1556.86,
|
58
|
+
19: 389.22,
|
59
|
+
20: 97.30,
|
60
|
+
21: 24.33,
|
61
|
+
22: 6.08,
|
62
|
+
23: 1.52,
|
63
|
+
24: 0.38,
|
64
|
+
25: 950.23 * 1e-4,
|
65
|
+
26: 237.56 * 1e-4,
|
66
|
+
27: 59.39 * 1e-4,
|
67
|
+
28: 14.85 * 1e-4,
|
68
|
+
29: 3.71 * 1e-4,
|
69
|
+
30: 0.93 * 1e-4,
|
70
|
+
}
|
71
|
+
|
72
|
+
|
24
73
|
warnings.filterwarnings(
|
25
74
|
"ignore"
|
26
75
|
) # This is to filter out the polyfill warnings when rows failed to get indexed at a resolution, can be commented out to find missing rows
|
vector2dggs/geohash.py
ADDED
@@ -0,0 +1,240 @@
|
|
1
|
+
import sys
|
2
|
+
import click
|
3
|
+
import click_log
|
4
|
+
import tempfile
|
5
|
+
import pyproj
|
6
|
+
|
7
|
+
from geohash_polygon import polygon_to_geohashes # rusty-polygon-geohasher
|
8
|
+
from geohash import encode, decode # python-geohash
|
9
|
+
|
10
|
+
import pandas as pd
|
11
|
+
import geopandas as gpd
|
12
|
+
from shapely.geometry import Point, Polygon
|
13
|
+
|
14
|
+
from typing import Union
|
15
|
+
from pathlib import Path
|
16
|
+
|
17
|
+
import vector2dggs.constants as const
|
18
|
+
import vector2dggs.common as common
|
19
|
+
|
20
|
+
from vector2dggs import __version__
|
21
|
+
|
22
|
+
|
23
|
+
def gh_secondary_index(df: pd.DataFrame, parent_level: int) -> pd.DataFrame:
|
24
|
+
df[f"geohash_{parent_level:02}"] = df.index.to_series().str[:parent_level]
|
25
|
+
return df
|
26
|
+
|
27
|
+
|
28
|
+
# NB this implements a point-inside hash, but geohash_polygon only supports "within" or "intersects" (on the basis of geohashes as _polygon_ geometries) which means we have to perform additional computation to support "polyfill" as defined by H3
|
29
|
+
# A future version of vector2dggs may support within/intersects modality, at which point that would just be outer/inner with no further computation
|
30
|
+
def _polygon_to_geohashes(polygon: Polygon, level: int) -> set[str]:
|
31
|
+
# Function to compute geohash set for one polygon geometry
|
32
|
+
outer: set[str] = polygon_to_geohashes(polygon, level, inner=False)
|
33
|
+
inner: set[str] = polygon_to_geohashes(polygon, level, inner=True)
|
34
|
+
edge: set[str] = {
|
35
|
+
h
|
36
|
+
for h in (outer - inner) # All edge cells
|
37
|
+
if Point(*reversed(decode(h))).within(polygon)
|
38
|
+
} # Edge cells with a center within the polygon
|
39
|
+
return edge | inner
|
40
|
+
|
41
|
+
|
42
|
+
def gh_polyfill(df: gpd.GeoDataFrame, level: int) -> pd.DataFrame:
|
43
|
+
gh_col = f"geohash"
|
44
|
+
df_polygon = df[df.geom_type == "Polygon"].copy()
|
45
|
+
if not df_polygon.empty:
|
46
|
+
df_polygon = (
|
47
|
+
df_polygon.assign(
|
48
|
+
**{
|
49
|
+
gh_col: df_polygon.geometry.apply(
|
50
|
+
lambda geom: _polygon_to_geohashes(geom, level)
|
51
|
+
)
|
52
|
+
}
|
53
|
+
)
|
54
|
+
.explode(gh_col, ignore_index=True)
|
55
|
+
.set_index(gh_col)
|
56
|
+
)
|
57
|
+
|
58
|
+
# TODO linestring support
|
59
|
+
# e.g. JS implementation https://github.com/alrico88/geohashes-along
|
60
|
+
|
61
|
+
df_point = df[df.geom_type == "Point"].copy()
|
62
|
+
if len(df_point.index) > 0:
|
63
|
+
df_point[gh_col] = df_point.geometry.apply(
|
64
|
+
lambda geom: encode(geom.y, geom.x, precision=level)
|
65
|
+
)
|
66
|
+
df_point = df_point.set_index(gh_col)
|
67
|
+
|
68
|
+
return pd.concat(
|
69
|
+
map(
|
70
|
+
lambda _df: pd.DataFrame(_df.drop(columns=[_df.geometry.name])),
|
71
|
+
[df_polygon, df_point],
|
72
|
+
)
|
73
|
+
)
|
74
|
+
|
75
|
+
|
76
|
+
@click.command(context_settings={"show_default": True})
|
77
|
+
@click_log.simple_verbosity_option(common.LOGGER)
|
78
|
+
@click.argument("vector_input", required=True, type=click.Path(), nargs=1)
|
79
|
+
@click.argument("output_directory", required=True, type=click.Path(), nargs=1)
|
80
|
+
@click.option(
|
81
|
+
"-r",
|
82
|
+
"--resolution",
|
83
|
+
"level",
|
84
|
+
required=True,
|
85
|
+
type=click.Choice(list(map(str, range(const.MIN_GEOHASH, const.MAX_GEOHASH + 1)))),
|
86
|
+
help="Geohash level to index",
|
87
|
+
nargs=1,
|
88
|
+
)
|
89
|
+
@click.option(
|
90
|
+
"-pr",
|
91
|
+
"--parent_res",
|
92
|
+
"parent_level",
|
93
|
+
required=False,
|
94
|
+
type=click.Choice(list(map(str, range(const.MIN_GEOHASH, const.MAX_GEOHASH + 1)))),
|
95
|
+
help="Geohash parent level for the output partition. Defaults to resolution - 6",
|
96
|
+
)
|
97
|
+
@click.option(
|
98
|
+
"-id",
|
99
|
+
"--id_field",
|
100
|
+
required=False,
|
101
|
+
default=const.DEFAULTS["id"],
|
102
|
+
type=str,
|
103
|
+
help="Field to use as an ID; defaults to a constructed single 0...n index on the original feature order.",
|
104
|
+
nargs=1,
|
105
|
+
)
|
106
|
+
@click.option(
|
107
|
+
"-k",
|
108
|
+
"--keep_attributes",
|
109
|
+
is_flag=True,
|
110
|
+
show_default=True,
|
111
|
+
default=const.DEFAULTS["k"],
|
112
|
+
help="Retain attributes in output. The default is to create an output that only includes Geohash cell ID and the ID given by the -id field (or the default index ID).",
|
113
|
+
)
|
114
|
+
@click.option(
|
115
|
+
"-ch",
|
116
|
+
"--chunksize",
|
117
|
+
required=True,
|
118
|
+
type=int,
|
119
|
+
default=const.DEFAULTS["ch"],
|
120
|
+
help="The number of rows per index partition to use when spatially partioning. Adjusting this number will trade off memory use and time.",
|
121
|
+
nargs=1,
|
122
|
+
)
|
123
|
+
@click.option(
|
124
|
+
"-s",
|
125
|
+
"--spatial_sorting",
|
126
|
+
type=click.Choice(const.SPATIAL_SORTING_METHODS),
|
127
|
+
default=const.DEFAULTS["s"],
|
128
|
+
help="Spatial sorting method when perfoming spatial partitioning.",
|
129
|
+
)
|
130
|
+
@click.option(
|
131
|
+
"-crs",
|
132
|
+
"--cut_crs",
|
133
|
+
required=False,
|
134
|
+
default=const.DEFAULTS["crs"],
|
135
|
+
type=int,
|
136
|
+
help="Set the coordinate reference system (CRS) used for cutting large geometries (see `--cur-threshold`). Defaults to the same CRS as the input. Should be a valid EPSG code.",
|
137
|
+
nargs=1,
|
138
|
+
)
|
139
|
+
@click.option(
|
140
|
+
"-c",
|
141
|
+
"--cut_threshold",
|
142
|
+
required=True,
|
143
|
+
default=const.DEFAULTS["c"],
|
144
|
+
type=int,
|
145
|
+
help="Cutting up large geometries into smaller geometries based on a target length. Units are assumed to match the input CRS units unless the `--cut_crs` is also given, in which case units match the units of the supplied CRS.",
|
146
|
+
nargs=1,
|
147
|
+
)
|
148
|
+
@click.option(
|
149
|
+
"-t",
|
150
|
+
"--threads",
|
151
|
+
required=False,
|
152
|
+
default=const.DEFAULTS["t"],
|
153
|
+
type=int,
|
154
|
+
help="Amount of threads used for operation",
|
155
|
+
nargs=1,
|
156
|
+
)
|
157
|
+
@click.option(
|
158
|
+
"-lyr",
|
159
|
+
"--layer",
|
160
|
+
required=False,
|
161
|
+
default=const.DEFAULTS["lyr"],
|
162
|
+
type=str,
|
163
|
+
help="Name of the layer or table to read when using an input that supports layers or tables",
|
164
|
+
nargs=1,
|
165
|
+
)
|
166
|
+
@click.option(
|
167
|
+
"-g",
|
168
|
+
"--geom_col",
|
169
|
+
required=False,
|
170
|
+
default=const.DEFAULTS["g"],
|
171
|
+
type=str,
|
172
|
+
help="Column name to use when using a spatial database connection as input",
|
173
|
+
nargs=1,
|
174
|
+
)
|
175
|
+
@click.option(
|
176
|
+
"--tempdir",
|
177
|
+
default=const.DEFAULTS["tempdir"],
|
178
|
+
type=click.Path(),
|
179
|
+
help="Temporary data is created during the execution of this program. This parameter allows you to control where this data will be written.",
|
180
|
+
)
|
181
|
+
@click.option("-o", "--overwrite", is_flag=True)
|
182
|
+
@click.version_option(version=__version__)
|
183
|
+
def geohash(
|
184
|
+
vector_input: Union[str, Path],
|
185
|
+
output_directory: Union[str, Path],
|
186
|
+
level: str,
|
187
|
+
parent_level: str,
|
188
|
+
id_field: str,
|
189
|
+
keep_attributes: bool,
|
190
|
+
chunksize: int,
|
191
|
+
spatial_sorting: str,
|
192
|
+
cut_crs: int,
|
193
|
+
cut_threshold: int,
|
194
|
+
threads: int,
|
195
|
+
layer: str,
|
196
|
+
geom_col: str,
|
197
|
+
tempdir: Union[str, Path],
|
198
|
+
overwrite: bool,
|
199
|
+
):
|
200
|
+
"""
|
201
|
+
Ingest a vector dataset and index it using the Geohash geocode system.
|
202
|
+
|
203
|
+
VECTOR_INPUT is the path to input vector geospatial data.
|
204
|
+
OUTPUT_DIRECTORY should be a directory, not a file or database table, as it will instead be the write location for an Apache Parquet data store.
|
205
|
+
"""
|
206
|
+
tempfile.tempdir = tempdir if tempdir is not None else tempfile.tempdir
|
207
|
+
|
208
|
+
common.check_resolutions(level, parent_level)
|
209
|
+
|
210
|
+
con, vector_input = common.db_conn_and_input_path(vector_input)
|
211
|
+
output_directory = common.resolve_output_path(output_directory, overwrite)
|
212
|
+
|
213
|
+
if cut_crs is not None:
|
214
|
+
cut_crs = pyproj.CRS.from_user_input(cut_crs)
|
215
|
+
|
216
|
+
try:
|
217
|
+
common.index(
|
218
|
+
"geohash",
|
219
|
+
gh_polyfill,
|
220
|
+
gh_secondary_index,
|
221
|
+
vector_input,
|
222
|
+
output_directory,
|
223
|
+
int(level),
|
224
|
+
parent_level,
|
225
|
+
keep_attributes,
|
226
|
+
chunksize,
|
227
|
+
spatial_sorting,
|
228
|
+
cut_threshold,
|
229
|
+
threads,
|
230
|
+
cut_crs=cut_crs,
|
231
|
+
id_field=id_field,
|
232
|
+
con=con,
|
233
|
+
layer=layer,
|
234
|
+
geom_col=geom_col,
|
235
|
+
overwrite=overwrite,
|
236
|
+
)
|
237
|
+
except:
|
238
|
+
raise
|
239
|
+
else:
|
240
|
+
sys.exit(0)
|
vector2dggs/h3.py
CHANGED
@@ -18,13 +18,13 @@ import vector2dggs.common as common
|
|
18
18
|
from vector2dggs import __version__
|
19
19
|
|
20
20
|
|
21
|
-
def h3_secondary_index(df:
|
21
|
+
def h3_secondary_index(df: pd.DataFrame, parent_res: int) -> pd.DataFrame:
|
22
22
|
return df.h3.h3_to_parent(parent_res)
|
23
23
|
|
24
24
|
|
25
|
-
def h3polyfill(df: gpd.GeoDataFrame, resolution: int):
|
25
|
+
def h3polyfill(df: gpd.GeoDataFrame, resolution: int) -> pd.DataFrame:
|
26
26
|
df_polygon = df[df.geom_type == "Polygon"]
|
27
|
-
if
|
27
|
+
if not df_polygon.empty:
|
28
28
|
df_polygon = df_polygon.h3.polyfill_resample(
|
29
29
|
resolution, return_geometry=False
|
30
30
|
).drop(columns=["index"])
|
@@ -38,10 +38,14 @@ def h3polyfill(df: gpd.GeoDataFrame, resolution: int):
|
|
38
38
|
)
|
39
39
|
df_linestring = df_linestring[~df_linestring.index.duplicated(keep="first")]
|
40
40
|
|
41
|
+
df_point = df[df.geom_type == "Point"]
|
42
|
+
if len(df_point.index) > 0:
|
43
|
+
df_point = df_point.h3.geo_to_h3(resolution, set_index=True)
|
44
|
+
|
41
45
|
return pd.concat(
|
42
46
|
map(
|
43
47
|
lambda _df: pd.DataFrame(_df.drop(columns=[_df.geometry.name])),
|
44
|
-
[df_polygon, df_linestring],
|
48
|
+
[df_polygon, df_linestring, df_point],
|
45
49
|
)
|
46
50
|
)
|
47
51
|
|
@@ -94,7 +98,7 @@ def h3polyfill(df: gpd.GeoDataFrame, resolution: int):
|
|
94
98
|
@click.option(
|
95
99
|
"-s",
|
96
100
|
"--spatial_sorting",
|
97
|
-
type=click.Choice(
|
101
|
+
type=click.Choice(const.SPATIAL_SORTING_METHODS),
|
98
102
|
default=const.DEFAULTS["s"],
|
99
103
|
help="Spatial sorting method when perfoming spatial partitioning.",
|
100
104
|
)
|
@@ -126,12 +130,12 @@ def h3polyfill(df: gpd.GeoDataFrame, resolution: int):
|
|
126
130
|
nargs=1,
|
127
131
|
)
|
128
132
|
@click.option(
|
129
|
-
"-
|
130
|
-
"--
|
133
|
+
"-lyr",
|
134
|
+
"--layer",
|
131
135
|
required=False,
|
132
|
-
default=const.DEFAULTS["
|
136
|
+
default=const.DEFAULTS["lyr"],
|
133
137
|
type=str,
|
134
|
-
help="Name of the table to read when using
|
138
|
+
help="Name of the layer or table to read when using an input that supports layers or tables",
|
135
139
|
nargs=1,
|
136
140
|
)
|
137
141
|
@click.option(
|
@@ -163,7 +167,7 @@ def h3(
|
|
163
167
|
cut_crs: int,
|
164
168
|
cut_threshold: int,
|
165
169
|
threads: int,
|
166
|
-
|
170
|
+
layer: str,
|
167
171
|
geom_col: str,
|
168
172
|
tempdir: Union[str, Path],
|
169
173
|
overwrite: bool,
|
@@ -201,7 +205,7 @@ def h3(
|
|
201
205
|
cut_crs=cut_crs,
|
202
206
|
id_field=id_field,
|
203
207
|
con=con,
|
204
|
-
|
208
|
+
layer=layer,
|
205
209
|
geom_col=geom_col,
|
206
210
|
overwrite=overwrite,
|
207
211
|
)
|
vector2dggs/katana.py
CHANGED
@@ -11,6 +11,7 @@ Redistribution and use in source and binary forms, with or without modification,
|
|
11
11
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
12
12
|
"""
|
13
13
|
|
14
|
+
from shapely import force_2d, has_z, has_m
|
14
15
|
from shapely.geometry import (
|
15
16
|
box,
|
16
17
|
Polygon,
|
@@ -18,22 +19,34 @@ from shapely.geometry import (
|
|
18
19
|
LineString,
|
19
20
|
MultiLineString,
|
20
21
|
GeometryCollection,
|
22
|
+
LinearRing,
|
21
23
|
)
|
22
|
-
from shapely.
|
24
|
+
from shapely.geometry.base import BaseGeometry
|
25
|
+
from shapely.validation import make_valid
|
26
|
+
from typing import Union, List
|
23
27
|
|
24
28
|
|
25
|
-
def katana(
|
29
|
+
def katana(
|
30
|
+
geometry: Union[BaseGeometry, None],
|
31
|
+
threshold: float,
|
32
|
+
count: int = 0,
|
33
|
+
check_2D: bool = True,
|
34
|
+
) -> List[BaseGeometry]:
|
26
35
|
"""
|
27
|
-
|
36
|
+
Recursively split a geometry into two parts across its shortest dimension.
|
28
37
|
Invalid input `geometry` will silently be made valid (if possible).
|
38
|
+
Any LinearRings will be converted to Polygons.
|
29
39
|
"""
|
30
40
|
if geometry is None:
|
31
|
-
|
32
|
-
|
41
|
+
return []
|
42
|
+
if isinstance(geometry, LinearRing):
|
43
|
+
geometry = Polygon(geometry)
|
44
|
+
if check_2D and (has_z(geometry) or has_m(geometry)):
|
45
|
+
geometry = force_2d(geometry)
|
46
|
+
check_2D = False # No further 2D check needed
|
33
47
|
if not geometry.is_valid:
|
34
|
-
# print(explain_validity(geometry))
|
35
48
|
geometry = make_valid(geometry)
|
36
|
-
if geometry.
|
49
|
+
if geometry.geom_type == "GeometryCollection":
|
37
50
|
geometry.normalize()
|
38
51
|
geometry = geometry.buffer(0)
|
39
52
|
bounds = geometry.bounds
|
@@ -60,16 +73,9 @@ def katana(geometry, threshold, count=0) -> GeometryCollection:
|
|
60
73
|
if not isinstance(c, GeometryCollection):
|
61
74
|
c = GeometryCollection([c])
|
62
75
|
for e in c.geoms:
|
63
|
-
if isinstance(
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
for g in result:
|
70
|
-
# if isinstance(g, MultiPolygon):
|
71
|
-
# final_result.extend(g)
|
72
|
-
# else:
|
73
|
-
# final_result.append(g)
|
74
|
-
final_result.append(g)
|
75
|
-
return final_result
|
76
|
+
if isinstance(
|
77
|
+
e, (Polygon, MultiPolygon, LineString, MultiLineString, LinearRing)
|
78
|
+
):
|
79
|
+
result.extend(katana(e, threshold, count + 1, check_2D))
|
80
|
+
|
81
|
+
return result
|
vector2dggs/rHP.py
CHANGED
@@ -11,6 +11,7 @@ import geopandas as gpd
|
|
11
11
|
|
12
12
|
from typing import Union
|
13
13
|
from pathlib import Path
|
14
|
+
from rhppandas.util.const import COLUMNS
|
14
15
|
|
15
16
|
import vector2dggs.constants as const
|
16
17
|
import vector2dggs.common as common
|
@@ -18,36 +19,34 @@ import vector2dggs.common as common
|
|
18
19
|
from vector2dggs import __version__
|
19
20
|
|
20
21
|
|
21
|
-
def rhp_secondary_index(df:
|
22
|
+
def rhp_secondary_index(df: pd.date_range, parent_res: int) -> pd.DataFrame:
|
22
23
|
return df.rhp.rhp_to_parent(parent_res)
|
23
24
|
|
24
25
|
|
25
|
-
def rhppolyfill(df: gpd.GeoDataFrame, resolution: int):
|
26
|
+
def rhppolyfill(df: gpd.GeoDataFrame, resolution: int) -> pd.DataFrame:
|
26
27
|
df_polygon = df[df.geom_type == "Polygon"]
|
27
28
|
if len(df_polygon.index) > 0:
|
28
29
|
df_polygon = df_polygon.rhp.polyfill_resample(
|
29
30
|
resolution, return_geometry=False
|
30
31
|
).drop(columns=["index"])
|
31
32
|
|
32
|
-
|
33
|
-
if len(
|
34
|
-
|
35
|
-
resolution
|
36
|
-
|
33
|
+
df_linestring = df[df.geom_type == "LineString"]
|
34
|
+
if len(df_linestring.index) > 0:
|
35
|
+
df_linestring = (
|
36
|
+
df_linestring.rhp.linetrace(resolution)
|
37
|
+
.explode(COLUMNS["linetrace"])
|
38
|
+
.set_index(COLUMNS["linetrace"])
|
39
|
+
)
|
40
|
+
df_linestring = df_linestring[~df_linestring.index.duplicated(keep="first")]
|
37
41
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
# df_linestring.h3.linetrace(resolution)
|
42
|
-
# .explode("h3_linetrace")
|
43
|
-
# .set_index("h3_linetrace")
|
44
|
-
# )
|
45
|
-
# df_linestring = df_linestring[~df_linestring.index.duplicated(keep="first")]
|
42
|
+
df_point = df[df.geom_type == "Point"]
|
43
|
+
if len(df_point.index) > 0:
|
44
|
+
df_point = df_point.rhp.geo_to_rhp(resolution, set_index=True)
|
46
45
|
|
47
46
|
return pd.concat(
|
48
47
|
map(
|
49
48
|
lambda _df: pd.DataFrame(_df.drop(columns=[_df.geometry.name])),
|
50
|
-
[df_polygon,
|
49
|
+
[df_polygon, df_linestring, df_point],
|
51
50
|
)
|
52
51
|
)
|
53
52
|
|
@@ -61,7 +60,7 @@ def rhppolyfill(df: gpd.GeoDataFrame, resolution: int):
|
|
61
60
|
"--resolution",
|
62
61
|
required=True,
|
63
62
|
type=click.Choice(list(map(str, range(const.MIN_RHP, const.MAX_RHP + 1)))),
|
64
|
-
help="
|
63
|
+
help="rHEALPix resolution to index",
|
65
64
|
nargs=1,
|
66
65
|
)
|
67
66
|
@click.option(
|
@@ -69,7 +68,7 @@ def rhppolyfill(df: gpd.GeoDataFrame, resolution: int):
|
|
69
68
|
"--parent_res",
|
70
69
|
required=False,
|
71
70
|
type=click.Choice(list(map(str, range(const.MIN_RHP, const.MAX_RHP + 1)))),
|
72
|
-
help="
|
71
|
+
help="rHEALPix Parent resolution for the output partition. Defaults to resolution - 6",
|
73
72
|
)
|
74
73
|
@click.option(
|
75
74
|
"-id",
|
@@ -100,7 +99,7 @@ def rhppolyfill(df: gpd.GeoDataFrame, resolution: int):
|
|
100
99
|
@click.option(
|
101
100
|
"-s",
|
102
101
|
"--spatial_sorting",
|
103
|
-
type=click.Choice(
|
102
|
+
type=click.Choice(const.SPATIAL_SORTING_METHODS),
|
104
103
|
default=const.DEFAULTS["s"],
|
105
104
|
help="Spatial sorting method when perfoming spatial partitioning.",
|
106
105
|
)
|
@@ -132,12 +131,12 @@ def rhppolyfill(df: gpd.GeoDataFrame, resolution: int):
|
|
132
131
|
nargs=1,
|
133
132
|
)
|
134
133
|
@click.option(
|
135
|
-
"-
|
136
|
-
"--
|
134
|
+
"-lyr",
|
135
|
+
"--layer",
|
137
136
|
required=False,
|
138
|
-
default=const.DEFAULTS["
|
137
|
+
default=const.DEFAULTS["lyr"],
|
139
138
|
type=str,
|
140
|
-
help="Name of the table to read when using
|
139
|
+
help="Name of the layer or table to read when using an input that supports layers or tables",
|
141
140
|
nargs=1,
|
142
141
|
)
|
143
142
|
@click.option(
|
@@ -169,7 +168,7 @@ def rhp(
|
|
169
168
|
cut_crs: int,
|
170
169
|
cut_threshold: int,
|
171
170
|
threads: int,
|
172
|
-
|
171
|
+
layer: str,
|
173
172
|
geom_col: str,
|
174
173
|
tempdir: Union[str, Path],
|
175
174
|
overwrite: bool,
|
@@ -207,7 +206,7 @@ def rhp(
|
|
207
206
|
cut_crs=cut_crs,
|
208
207
|
id_field=id_field,
|
209
208
|
con=con,
|
210
|
-
|
209
|
+
layer=layer,
|
211
210
|
geom_col=geom_col,
|
212
211
|
overwrite=overwrite,
|
213
212
|
)
|
vector2dggs/s2.py
ADDED
@@ -0,0 +1,349 @@
|
|
1
|
+
import sys
|
2
|
+
import click
|
3
|
+
import click_log
|
4
|
+
import tempfile
|
5
|
+
import pyproj
|
6
|
+
from math import ceil
|
7
|
+
|
8
|
+
from s2geometry import pywraps2 as S2
|
9
|
+
|
10
|
+
import pandas as pd
|
11
|
+
import geopandas as gpd
|
12
|
+
from shapely.geometry import box, Polygon, LineString, Point
|
13
|
+
from shapely.ops import transform
|
14
|
+
from pyproj import CRS, Transformer
|
15
|
+
|
16
|
+
from typing import Union
|
17
|
+
from pathlib import Path
|
18
|
+
|
19
|
+
import vector2dggs.constants as const
|
20
|
+
import vector2dggs.common as common
|
21
|
+
|
22
|
+
from vector2dggs import __version__
|
23
|
+
|
24
|
+
|
25
|
+
def s2_secondary_index(df: pd.DataFrame, parent_level: int) -> pd.DataFrame:
|
26
|
+
# NB also converts the index to S2 cell tokens
|
27
|
+
index_series = df.index.to_series().astype(object)
|
28
|
+
df[f"s2_{parent_level:02}"] = index_series.map(
|
29
|
+
lambda cell_id: cell_id.parent(parent_level).ToToken()
|
30
|
+
)
|
31
|
+
df.index = index_series.map(lambda cell_id: cell_id.ToToken())
|
32
|
+
return df
|
33
|
+
|
34
|
+
|
35
|
+
def bbox_area_in_m2(
|
36
|
+
geom: Polygon,
|
37
|
+
src_crs: Union[str, CRS] = "EPSG:4326",
|
38
|
+
dst_crs: Union[str, CRS] = "EPSG:6933",
|
39
|
+
) -> float:
|
40
|
+
"""
|
41
|
+
Calculate the area of the bounding box of a geometry in square meters.
|
42
|
+
"""
|
43
|
+
minx, miny, maxx, maxy = geom.bounds
|
44
|
+
bbox = box(minx, miny, maxx, maxy)
|
45
|
+
transformer = Transformer.from_crs(src_crs, dst_crs, always_xy=True)
|
46
|
+
projected_bbox = transform(transformer.transform, bbox)
|
47
|
+
return projected_bbox.area
|
48
|
+
|
49
|
+
|
50
|
+
def max_cells_for_geom(
|
51
|
+
geom: Union[Polygon, LineString], level: int, margin: float = 1.02
|
52
|
+
) -> int:
|
53
|
+
"""
|
54
|
+
Calculate the maximum number of S2 cells that are appropriate for the given geometry and level.
|
55
|
+
This is based on the area of the geometry's bounding box,
|
56
|
+
and the maximum area of S2 cells at the given level.
|
57
|
+
"""
|
58
|
+
area = bbox_area_in_m2(geom)
|
59
|
+
max_cells = ceil(max(1, area / const.S2_CELLS_MAX_AREA_M2_BY_LEVEL[level]))
|
60
|
+
return ceil(max_cells * margin)
|
61
|
+
|
62
|
+
|
63
|
+
def cell_center_is_inside_polygon(cell: S2.S2CellId, polygon: S2.S2Polygon) -> bool:
|
64
|
+
"""Determines if the center of the S2 cell is inside the polygon"""
|
65
|
+
cell_center = S2.S2Cell(cell).GetCenter()
|
66
|
+
return polygon.Contains(cell_center)
|
67
|
+
|
68
|
+
|
69
|
+
def s2_polyfill_polygons(df: gpd.GeoDataFrame, level: int) -> gpd.GeoDataFrame:
|
70
|
+
|
71
|
+
def generate_s2_covering(
|
72
|
+
geom: Polygon, level: int, centroid_inside: bool = True
|
73
|
+
) -> set[S2.S2CellId]:
|
74
|
+
# Prepare loops: first the exterior loop, then the interior loops
|
75
|
+
loops = []
|
76
|
+
# Exterior ring
|
77
|
+
latlngs = [
|
78
|
+
S2.S2LatLng.FromDegrees(lat, lon) for lon, lat in geom.exterior.coords
|
79
|
+
]
|
80
|
+
s2loop = S2.S2Loop([latlng.ToPoint() for latlng in latlngs])
|
81
|
+
s2loop.Normalize()
|
82
|
+
loops.append(s2loop)
|
83
|
+
|
84
|
+
# Interior rings (polygon holes)
|
85
|
+
for interior in geom.interiors:
|
86
|
+
interior_latlngs = [
|
87
|
+
S2.S2LatLng.FromDegrees(lat, lon) for lon, lat in interior.coords
|
88
|
+
]
|
89
|
+
s2interior_loop = S2.S2Loop(
|
90
|
+
[latlng.ToPoint() for latlng in interior_latlngs]
|
91
|
+
)
|
92
|
+
s2interior_loop.Normalize()
|
93
|
+
loops.append(s2interior_loop)
|
94
|
+
|
95
|
+
# Build an S2Polygon from the loops
|
96
|
+
s2polygon = S2.S2Polygon()
|
97
|
+
s2polygon.InitNested(loops)
|
98
|
+
|
99
|
+
# Use S2RegionCoverer to get the cell IDs at the specified level
|
100
|
+
coverer = S2.S2RegionCoverer()
|
101
|
+
|
102
|
+
max_cells = max_cells_for_geom(geom, level)
|
103
|
+
coverer.set_max_cells(max_cells)
|
104
|
+
coverer.set_min_level(level)
|
105
|
+
coverer.set_max_level(level)
|
106
|
+
|
107
|
+
covering: list[S2.S2CellId] = coverer.GetCovering(s2polygon)
|
108
|
+
|
109
|
+
if centroid_inside:
|
110
|
+
# Coverings are "intersects" modality, polyfill is "centre inside" modality
|
111
|
+
# ergo, filter out covering cells that are not inside the polygon
|
112
|
+
covering = {
|
113
|
+
cell
|
114
|
+
for cell in covering
|
115
|
+
if cell_center_is_inside_polygon(cell, s2polygon)
|
116
|
+
}
|
117
|
+
else:
|
118
|
+
set(covering)
|
119
|
+
|
120
|
+
return covering
|
121
|
+
|
122
|
+
df["s2index"] = df["geometry"].apply(lambda geom: generate_s2_covering(geom, level))
|
123
|
+
df = df[
|
124
|
+
df["s2index"].map(lambda x: len(x) > 0)
|
125
|
+
] # Remove rows with no covering at this level
|
126
|
+
|
127
|
+
return df
|
128
|
+
|
129
|
+
|
130
|
+
def s2_cell_ids_from_linestring(
|
131
|
+
linestring: LineString, level: int
|
132
|
+
) -> list[S2.S2CellId]:
|
133
|
+
latlngs = [S2.S2LatLng.FromDegrees(lat, lon) for lon, lat in linestring.coords]
|
134
|
+
polyline = S2.S2Polyline(latlngs)
|
135
|
+
|
136
|
+
coverer = S2.S2RegionCoverer()
|
137
|
+
max_cells = max_cells_for_geom(linestring, level)
|
138
|
+
coverer.set_max_cells(max_cells)
|
139
|
+
coverer.set_min_level(level)
|
140
|
+
coverer.set_max_level(level)
|
141
|
+
|
142
|
+
return coverer.GetCovering(polyline)
|
143
|
+
|
144
|
+
|
145
|
+
def s2_cell_id_from_point(geom: Point, level: int) -> S2.S2CellId:
|
146
|
+
"""
|
147
|
+
Convert a point geometry to an S2 cell at the specified level.
|
148
|
+
"""
|
149
|
+
latlng = S2.S2LatLng.FromDegrees(geom.y, geom.x)
|
150
|
+
return S2.S2CellId(latlng).parent(level)
|
151
|
+
|
152
|
+
|
153
|
+
def s2_polyfill(df: gpd.GeoDataFrame, level: int) -> pd.DataFrame:
|
154
|
+
|
155
|
+
df_polygon = df[df.geom_type == "Polygon"].copy()
|
156
|
+
if len(df_polygon.index) > 0:
|
157
|
+
df_polygon = (
|
158
|
+
s2_polyfill_polygons(df_polygon, level)
|
159
|
+
.explode("s2index")
|
160
|
+
.set_index("s2index")
|
161
|
+
)
|
162
|
+
|
163
|
+
df_linestring = df[df.geom_type == "LineString"].copy()
|
164
|
+
if len(df_linestring.index) > 0:
|
165
|
+
df_linestring["s2index"] = df_linestring.geometry.apply(
|
166
|
+
lambda geom: s2_cell_ids_from_linestring(geom, level)
|
167
|
+
)
|
168
|
+
df_linestring = df_linestring.explode("s2index").set_index("s2index")
|
169
|
+
|
170
|
+
df_point = df[df.geom_type == "Point"].copy()
|
171
|
+
if len(df_point.index) > 0:
|
172
|
+
df_point["s2index"] = df_point.geometry.apply(
|
173
|
+
lambda geom: s2_cell_id_from_point(geom, level)
|
174
|
+
)
|
175
|
+
df_point = df_point.set_index("s2index")
|
176
|
+
|
177
|
+
return pd.concat(
|
178
|
+
map(
|
179
|
+
lambda _df: pd.DataFrame(_df.drop(columns=[_df.geometry.name])),
|
180
|
+
[df_polygon, df_linestring, df_point],
|
181
|
+
)
|
182
|
+
)
|
183
|
+
|
184
|
+
|
185
|
+
@click.command(context_settings={"show_default": True})
|
186
|
+
@click_log.simple_verbosity_option(common.LOGGER)
|
187
|
+
@click.argument("vector_input", required=True, type=click.Path(), nargs=1)
|
188
|
+
@click.argument("output_directory", required=True, type=click.Path(), nargs=1)
|
189
|
+
@click.option(
|
190
|
+
"-r",
|
191
|
+
"--resolution",
|
192
|
+
"level",
|
193
|
+
required=True,
|
194
|
+
type=click.Choice(list(map(str, range(const.MIN_S2, const.MAX_S2 + 1)))),
|
195
|
+
help="S2 level to index",
|
196
|
+
nargs=1,
|
197
|
+
)
|
198
|
+
@click.option(
|
199
|
+
"-pr",
|
200
|
+
"--parent_res",
|
201
|
+
"parent_level",
|
202
|
+
required=False,
|
203
|
+
type=click.Choice(list(map(str, range(const.MIN_S2, const.MAX_S2 + 1)))),
|
204
|
+
help="S2 parent level for the output partition. Defaults to resolution - 6",
|
205
|
+
)
|
206
|
+
@click.option(
|
207
|
+
"-id",
|
208
|
+
"--id_field",
|
209
|
+
required=False,
|
210
|
+
default=const.DEFAULTS["id"],
|
211
|
+
type=str,
|
212
|
+
help="Field to use as an ID; defaults to a constructed single 0...n index on the original feature order.",
|
213
|
+
nargs=1,
|
214
|
+
)
|
215
|
+
@click.option(
|
216
|
+
"-k",
|
217
|
+
"--keep_attributes",
|
218
|
+
is_flag=True,
|
219
|
+
show_default=True,
|
220
|
+
default=const.DEFAULTS["k"],
|
221
|
+
help="Retain attributes in output. The default is to create an output that only includes S2 cell ID and the ID given by the -id field (or the default index ID).",
|
222
|
+
)
|
223
|
+
@click.option(
|
224
|
+
"-ch",
|
225
|
+
"--chunksize",
|
226
|
+
required=True,
|
227
|
+
type=int,
|
228
|
+
default=const.DEFAULTS["ch"],
|
229
|
+
help="The number of rows per index partition to use when spatially partioning. Adjusting this number will trade off memory use and time.",
|
230
|
+
nargs=1,
|
231
|
+
)
|
232
|
+
@click.option(
|
233
|
+
"-s",
|
234
|
+
"--spatial_sorting",
|
235
|
+
type=click.Choice(const.SPATIAL_SORTING_METHODS),
|
236
|
+
default=const.DEFAULTS["s"],
|
237
|
+
help="Spatial sorting method when perfoming spatial partitioning.",
|
238
|
+
)
|
239
|
+
@click.option(
|
240
|
+
"-crs",
|
241
|
+
"--cut_crs",
|
242
|
+
required=False,
|
243
|
+
default=const.DEFAULTS["crs"],
|
244
|
+
type=int,
|
245
|
+
help="Set the coordinate reference system (CRS) used for cutting large geometries (see `--cur-threshold`). Defaults to the same CRS as the input. Should be a valid EPSG code.",
|
246
|
+
nargs=1,
|
247
|
+
)
|
248
|
+
@click.option(
|
249
|
+
"-c",
|
250
|
+
"--cut_threshold",
|
251
|
+
required=True,
|
252
|
+
default=const.DEFAULTS["c"],
|
253
|
+
type=int,
|
254
|
+
help="Cutting up large geometries into smaller geometries based on a target length. Units are assumed to match the input CRS units unless the `--cut_crs` is also given, in which case units match the units of the supplied CRS.",
|
255
|
+
nargs=1,
|
256
|
+
)
|
257
|
+
@click.option(
|
258
|
+
"-t",
|
259
|
+
"--threads",
|
260
|
+
required=False,
|
261
|
+
default=const.DEFAULTS["t"],
|
262
|
+
type=int,
|
263
|
+
help="Amount of threads used for operation",
|
264
|
+
nargs=1,
|
265
|
+
)
|
266
|
+
@click.option(
|
267
|
+
"-lyr",
|
268
|
+
"--layer",
|
269
|
+
required=False,
|
270
|
+
default=const.DEFAULTS["lyr"],
|
271
|
+
type=str,
|
272
|
+
help="Name of the layer or table to read when using an input that supports layers or tables",
|
273
|
+
nargs=1,
|
274
|
+
)
|
275
|
+
@click.option(
|
276
|
+
"-g",
|
277
|
+
"--geom_col",
|
278
|
+
required=False,
|
279
|
+
default=const.DEFAULTS["g"],
|
280
|
+
type=str,
|
281
|
+
help="Column name to use when using a spatial database connection as input",
|
282
|
+
nargs=1,
|
283
|
+
)
|
284
|
+
@click.option(
|
285
|
+
"--tempdir",
|
286
|
+
default=const.DEFAULTS["tempdir"],
|
287
|
+
type=click.Path(),
|
288
|
+
help="Temporary data is created during the execution of this program. This parameter allows you to control where this data will be written.",
|
289
|
+
)
|
290
|
+
@click.option("-o", "--overwrite", is_flag=True)
|
291
|
+
@click.version_option(version=__version__)
|
292
|
+
def s2(
|
293
|
+
vector_input: Union[str, Path],
|
294
|
+
output_directory: Union[str, Path],
|
295
|
+
level: str,
|
296
|
+
parent_level: str,
|
297
|
+
id_field: str,
|
298
|
+
keep_attributes: bool,
|
299
|
+
chunksize: int,
|
300
|
+
spatial_sorting: str,
|
301
|
+
cut_crs: int,
|
302
|
+
cut_threshold: int,
|
303
|
+
threads: int,
|
304
|
+
layer: str,
|
305
|
+
geom_col: str,
|
306
|
+
tempdir: Union[str, Path],
|
307
|
+
overwrite: bool,
|
308
|
+
):
|
309
|
+
"""
|
310
|
+
Ingest a vector dataset and index it to the S2 DGGS.
|
311
|
+
|
312
|
+
VECTOR_INPUT is the path to input vector geospatial data.
|
313
|
+
OUTPUT_DIRECTORY should be a directory, not a file or database table, as it will instead be the write location for an Apache Parquet data store.
|
314
|
+
"""
|
315
|
+
tempfile.tempdir = tempdir if tempdir is not None else tempfile.tempdir
|
316
|
+
|
317
|
+
common.check_resolutions(level, parent_level)
|
318
|
+
|
319
|
+
con, vector_input = common.db_conn_and_input_path(vector_input)
|
320
|
+
output_directory = common.resolve_output_path(output_directory, overwrite)
|
321
|
+
|
322
|
+
if cut_crs is not None:
|
323
|
+
cut_crs = pyproj.CRS.from_user_input(cut_crs)
|
324
|
+
|
325
|
+
try:
|
326
|
+
common.index(
|
327
|
+
"s2",
|
328
|
+
s2_polyfill,
|
329
|
+
s2_secondary_index,
|
330
|
+
vector_input,
|
331
|
+
output_directory,
|
332
|
+
int(level),
|
333
|
+
parent_level,
|
334
|
+
keep_attributes,
|
335
|
+
chunksize,
|
336
|
+
spatial_sorting,
|
337
|
+
cut_threshold,
|
338
|
+
threads,
|
339
|
+
cut_crs=cut_crs,
|
340
|
+
id_field=id_field,
|
341
|
+
con=con,
|
342
|
+
layer=layer,
|
343
|
+
geom_col=geom_col,
|
344
|
+
overwrite=overwrite,
|
345
|
+
)
|
346
|
+
except:
|
347
|
+
raise
|
348
|
+
else:
|
349
|
+
sys.exit(0)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vector2dggs
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.9.0
|
4
4
|
Summary: CLI DGGS indexer for vector geospatial data
|
5
5
|
Home-page: https://github.com/manaakiwhenua/vector2dggs
|
6
6
|
License: LGPL-3.0-or-later
|
@@ -29,8 +29,10 @@ Requires-Dist: pillow (>=11.2.1,<12.0.0)
|
|
29
29
|
Requires-Dist: psycopg2 (>=2.9.9,<3.0.0)
|
30
30
|
Requires-Dist: pyarrow (>=20.0,<21.0)
|
31
31
|
Requires-Dist: pyproj (>=3.7,<4.0)
|
32
|
-
Requires-Dist:
|
33
|
-
Requires-Dist: rhppandas (>=0.
|
32
|
+
Requires-Dist: python-geohash (>=0.8.5,<0.9.0)
|
33
|
+
Requires-Dist: rhppandas (>=0.2.0,<0.3.0)
|
34
|
+
Requires-Dist: rusty-polygon-geohasher (>=0.2.3,<0.3.0)
|
35
|
+
Requires-Dist: s2geometry (>=0.9.0,<0.10.0)
|
34
36
|
Requires-Dist: shapely (>=2.1,<3.0)
|
35
37
|
Requires-Dist: sqlalchemy (>=2.0.32,<3.0.0)
|
36
38
|
Requires-Dist: tqdm (>=4.67,<5.0)
|
@@ -47,8 +49,13 @@ This is the vector equivalent of [raster2dggs](https://github.com/manaakiwhenua/
|
|
47
49
|
|
48
50
|
Currently this tool supports the following DGGSs:
|
49
51
|
|
50
|
-
- H3
|
51
|
-
- rHEALPix
|
52
|
+
- [H3](https://h3geo.org/)
|
53
|
+
- [rHEALPix](https://datastore.landcareresearch.co.nz/dataset/rhealpix-discrete-global-grid-system)
|
54
|
+
- [S2](https://s2geometry.io/)
|
55
|
+
|
56
|
+
... and the following geocode systems:
|
57
|
+
|
58
|
+
- [Geohash](https://en.wikipedia.org/wiki/Geohash) (points, polygons)
|
52
59
|
|
53
60
|
Contributions (espeically for other DGGSs), suggestions, bug reports and strongly worded letters are all welcome.
|
54
61
|
|
@@ -63,7 +70,8 @@ pip install vector2dggs
|
|
63
70
|
## Usage
|
64
71
|
|
65
72
|
```bash
|
66
|
-
vector2dggs --help
|
73
|
+
vector2dggs --help
|
74
|
+
|
67
75
|
Usage: vector2dggs [OPTIONS] COMMAND [ARGS]...
|
68
76
|
|
69
77
|
Options:
|
@@ -71,8 +79,10 @@ Options:
|
|
71
79
|
--help Show this message and exit.
|
72
80
|
|
73
81
|
Commands:
|
74
|
-
|
75
|
-
|
82
|
+
geohash Ingest a vector dataset and index it using the Geohash geocode...
|
83
|
+
h3 Ingest a vector dataset and index it to the H3 DGGS.
|
84
|
+
rhp Ingest a vector dataset and index it to the rHEALPix DGGS.
|
85
|
+
s2 Ingest a vector dataset and index it to the S2 DGGS.
|
76
86
|
```
|
77
87
|
|
78
88
|
```bash
|
@@ -119,8 +129,8 @@ Options:
|
|
119
129
|
[default: 5000; required]
|
120
130
|
-t, --threads INTEGER Amount of threads used for operation
|
121
131
|
[default: 7]
|
122
|
-
-
|
123
|
-
|
132
|
+
-lyr, --layer TEXT Name of the layer or table to read when using a
|
133
|
+
an input that supports layers or tables
|
124
134
|
-g, --geom_col TEXT Column name to use when using a spatial
|
125
135
|
database connection as input [default:
|
126
136
|
geom]
|
@@ -137,9 +147,9 @@ Options:
|
|
137
147
|
|
138
148
|
Output is in the Apache Parquet format, a directory with one file per partition.
|
139
149
|
|
140
|
-
For a quick view of your output, you can read Apache Parquet with pandas, and then use h3-pandas and geopandas to convert this into a GeoPackage or GeoParquet for visualisation in a desktop GIS, such as QGIS. The Apache Parquet output is indexed by an ID column (which you can specify), so it should be ready for two intended use-cases:
|
150
|
+
For a quick view of your output, you can read Apache Parquet with pandas, and then use tools like h3-pandas and geopandas to convert this into a GeoPackage or GeoParquet for visualisation in a desktop GIS, such as QGIS. The Apache Parquet output is indexed by an ID column (which you can specify), so it should be ready for two intended use-cases:
|
141
151
|
- Joining attribute data from the original feature-level data onto computer DGGS cells.
|
142
|
-
- Joining other data to this output on the
|
152
|
+
- Joining other data to this output on the DGGS cell ID. (The output has a column like `{dggs}_\d`, e.g. `h3_09` or `h3_12` according to the target resolution, zero-padded to account for the maximum resolution of the DGGS)
|
143
153
|
|
144
154
|
Geoparquet output (hexagon boundaries):
|
145
155
|
|
@@ -166,6 +176,34 @@ h3_12
|
|
166
176
|
>>> g.to_parquet('./output-data/parcels.12.geo.parquet')
|
167
177
|
```
|
168
178
|
|
179
|
+
An example for S2 output (using `s2sphere`):
|
180
|
+
|
181
|
+
|
182
|
+
```python
|
183
|
+
import pandas as pd
|
184
|
+
import geopandas as gpd
|
185
|
+
import s2sphere
|
186
|
+
from shapely.geometry import Polygon
|
187
|
+
|
188
|
+
RES = 18
|
189
|
+
df = pd.read_parquet(f'~/output-data/ponds-with-holes.s2.{RES}.pq')
|
190
|
+
df = df.reset_index()
|
191
|
+
|
192
|
+
def s2id_to_polygon(s2_id_hex):
|
193
|
+
cell_id = s2sphere.CellId.from_token(s2_id_hex)
|
194
|
+
cell = s2sphere.Cell(cell_id)
|
195
|
+
vertices = []
|
196
|
+
for i in range(4):
|
197
|
+
vertex = cell.get_vertex(i)
|
198
|
+
lat_lng = s2sphere.LatLng.from_point(vertex)
|
199
|
+
vertices.append((lat_lng.lng().degrees, lat_lng.lat().degrees)) # (lon, lat)
|
200
|
+
return Polygon(vertices)
|
201
|
+
|
202
|
+
df['geometry'] = df[f's2_{RES}'].apply(s2id_to_polygon)
|
203
|
+
df = gpd.GeoDataFrame(df, geometry='geometry', crs='EPSG:4326') # WGS84
|
204
|
+
df.to_parquet(f'sample-{RES}.parquet')
|
205
|
+
```
|
206
|
+
|
169
207
|
### For development
|
170
208
|
|
171
209
|
In brief, to get started:
|
@@ -175,6 +213,7 @@ In brief, to get started:
|
|
175
213
|
- If you're on Windows, `pip install gdal` may be necessary before running the subsequent commands.
|
176
214
|
- On Linux, install GDAL 3.8+ according to your platform-specific instructions, including development headers, i.e. `libgdal-dev`.
|
177
215
|
- Create the virtual environment with `poetry init`. This will install necessary dependencies.
|
216
|
+
- If the installation of `s2geometry` fails, you may require SWIG to build it. (A command like `conda install swig` or `sudo dnf install swig` depending on your platform).
|
178
217
|
- Subsequently, the virtual environment can be re-activated with `poetry shell`.
|
179
218
|
|
180
219
|
If you run `poetry install`, the CLI tool will be aliased so you can simply use `vector2dggs` rather than `poetry run vector2dggs`, which is the alternative if you do not `poetry install`.
|
@@ -199,7 +238,7 @@ vector2dggs h3 -v DEBUG -id title_no -r 12 -o ~/Downloads/nz-property-titles.gpk
|
|
199
238
|
With a PostgreSQL/PostGIS connection:
|
200
239
|
|
201
240
|
```bash
|
202
|
-
vector2dggs h3 -v DEBUG -id ogc_fid -r 9 -p 5 -t 4 --overwrite -
|
241
|
+
vector2dggs h3 -v DEBUG -id ogc_fid -r 9 -p 5 -t 4 --overwrite -lyr topo50_lake postgresql://user:password@host:port/db ./topo50_lake.parquet
|
203
242
|
```
|
204
243
|
|
205
244
|
## Citation
|
@@ -209,14 +248,14 @@ vector2dggs h3 -v DEBUG -id ogc_fid -r 9 -p 5 -t 4 --overwrite -tbl topo50_lake
|
|
209
248
|
title={{vector2dggs}},
|
210
249
|
author={Ardo, James and Law, Richard},
|
211
250
|
url={https://github.com/manaakiwhenua/vector2dggs},
|
212
|
-
version={0.
|
251
|
+
version={0.9.0},
|
213
252
|
date={2023-04-20}
|
214
253
|
}
|
215
254
|
```
|
216
255
|
|
217
256
|
APA/Harvard
|
218
257
|
|
219
|
-
> Ardo, J., & Law, R. (2023). vector2dggs (0.
|
258
|
+
> Ardo, J., & Law, R. (2023). vector2dggs (0.9.0) [Computer software]. https://github.com/manaakiwhenua/vector2dggs
|
220
259
|
|
221
260
|
[](https://github.com/manaakiwhenua/manaakiwhenua-standards)
|
222
261
|
|
@@ -0,0 +1,15 @@
|
|
1
|
+
vector2dggs/__init__.py,sha256=L8qKCe-XFylNfRXefZ1yGESlLF24qwQQ87szPZJO6Zg,27
|
2
|
+
vector2dggs/cli.py,sha256=d_4skD62k6pXUWgDdVHbDwpe4A4yo62ZFx8Cp_6GpBA,767
|
3
|
+
vector2dggs/common.py,sha256=l5koOX1Ps0v5D7MgzHtK1t99hXnGA7b6I82n2rBOldE,10496
|
4
|
+
vector2dggs/constants.py,sha256=_cj3Pf52gsXfWwvpsbekE8h1yD_1jS9xqzRg2mRCq3w,1759
|
5
|
+
vector2dggs/geohash.py,sha256=t90FlZRQCH8lmtTHe2kPMcLTIf1nrrf2j-m95xk4xPc,7534
|
6
|
+
vector2dggs/h3.py,sha256=Bu_4T1WIDuTv_tJWTS8BgPmHRiCozfUUh2CxBwk98Gw,6310
|
7
|
+
vector2dggs/katana.py,sha256=v4BRzVCsroC6RzIYdxLfrr9eFOdmXb5S9jXBMs5tgSo,3571
|
8
|
+
vector2dggs/rHP.py,sha256=tC4LvqRPMmgUd36BppkvYeq94pPBhO1vBDQ-aaiHUg4,6410
|
9
|
+
vector2dggs/s2.py,sha256=HEpFTEL4UaZLjybKZ_q06QFjPuQ48MDLeg_qGc0NMEw,10835
|
10
|
+
vector2dggs-0.9.0.dist-info/COPYING,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
11
|
+
vector2dggs-0.9.0.dist-info/COPYING.LESSER,sha256=46mU2C5kSwOnkqkw9XQAJlhBL2JAf1_uCD8lVcXyMRg,7652
|
12
|
+
vector2dggs-0.9.0.dist-info/METADATA,sha256=7y97ZXmDNqUQ-n8M-BgOE2XLG-pJ6f_aNGjzVlCUFzc,11534
|
13
|
+
vector2dggs-0.9.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
14
|
+
vector2dggs-0.9.0.dist-info/entry_points.txt,sha256=5h8LB9L2oOE5u_N7FRGtu4JDwa553iPs4u0XhcLeLZU,52
|
15
|
+
vector2dggs-0.9.0.dist-info/RECORD,,
|
@@ -1,13 +0,0 @@
|
|
1
|
-
vector2dggs/__init__.py,sha256=75x2g9bnxuIHCJ-8fmoB5K2n_V2ayIxdRNvYsNUaMhs,27
|
2
|
-
vector2dggs/cli.py,sha256=HoPp7Bwk2kZghAms6wNepx-bFhoAuHH7WXACMIy3MuM,652
|
3
|
-
vector2dggs/common.py,sha256=DL3ohG-QQyI-phyxeO6Fi2BOwWnFct-I_Y87_XC2SRQ,10578
|
4
|
-
vector2dggs/constants.py,sha256=u6n6XNvEVLUexn9Sb2rc22s2B4Rrg_VXFJaM7uEy-9Q,536
|
5
|
-
vector2dggs/h3.py,sha256=GgiGOVbsXXNp95KWKKmJZvDxGFj91TTWl575OaPZ6yk,6145
|
6
|
-
vector2dggs/katana.py,sha256=pgVWy032NkT5yilUO0d0IKH4NUvY7DJLjmfsxhBiF08,3407
|
7
|
-
vector2dggs/rHP.py,sha256=Y36tPbtY-tYBUFILHD-xnUxa2yKlYotGP6043Bg5nZc,6450
|
8
|
-
vector2dggs-0.6.3.dist-info/COPYING,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
9
|
-
vector2dggs-0.6.3.dist-info/COPYING.LESSER,sha256=46mU2C5kSwOnkqkw9XQAJlhBL2JAf1_uCD8lVcXyMRg,7652
|
10
|
-
vector2dggs-0.6.3.dist-info/METADATA,sha256=sFXXSXOutJzbTLRviHXXn7RkN51SCfbD6ZRBeledcmY,10223
|
11
|
-
vector2dggs-0.6.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
12
|
-
vector2dggs-0.6.3.dist-info/entry_points.txt,sha256=5h8LB9L2oOE5u_N7FRGtu4JDwa553iPs4u0XhcLeLZU,52
|
13
|
-
vector2dggs-0.6.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|