vector2dggs 0.6.0__py3-none-any.whl → 0.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vector2dggs/__init__.py +1 -1
- vector2dggs/cli.py +2 -0
- vector2dggs/common.py +354 -0
- vector2dggs/constants.py +26 -0
- vector2dggs/h3.py +37 -300
- vector2dggs/katana.py +10 -3
- vector2dggs/rHP.py +217 -0
- {vector2dggs-0.6.0.dist-info → vector2dggs-0.6.2.dist-info}/METADATA +24 -18
- vector2dggs-0.6.2.dist-info/RECORD +13 -0
- {vector2dggs-0.6.0.dist-info → vector2dggs-0.6.2.dist-info}/WHEEL +1 -1
- vector2dggs-0.6.0.dist-info/RECORD +0 -10
- {vector2dggs-0.6.0.dist-info → vector2dggs-0.6.2.dist-info}/COPYING +0 -0
- {vector2dggs-0.6.0.dist-info → vector2dggs-0.6.2.dist-info}/COPYING.LESSER +0 -0
- {vector2dggs-0.6.0.dist-info → vector2dggs-0.6.2.dist-info}/entry_points.txt +0 -0
vector2dggs/__init__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__: str = "0.6.
|
1
|
+
__version__: str = "0.6.2"
|
vector2dggs/cli.py
CHANGED
@@ -2,6 +2,7 @@ import click
|
|
2
2
|
|
3
3
|
from vector2dggs import __version__
|
4
4
|
from vector2dggs.h3 import h3
|
5
|
+
from vector2dggs.rHP import rhp
|
5
6
|
|
6
7
|
# If the program does terminal interaction, make it output a short
|
7
8
|
# notice like this when it starts in an interactive mode:
|
@@ -19,6 +20,7 @@ def cli():
|
|
19
20
|
|
20
21
|
|
21
22
|
cli.add_command(h3)
|
23
|
+
cli.add_command(rhp)
|
22
24
|
|
23
25
|
|
24
26
|
def main():
|
vector2dggs/common.py
ADDED
@@ -0,0 +1,354 @@
|
|
1
|
+
import os
|
2
|
+
import errno
|
3
|
+
import logging
|
4
|
+
import tempfile
|
5
|
+
import click_log
|
6
|
+
import sqlalchemy
|
7
|
+
import shutil
|
8
|
+
import pyproj
|
9
|
+
|
10
|
+
import pandas as pd
|
11
|
+
import geopandas as gpd
|
12
|
+
import dask.dataframe as dd
|
13
|
+
import dask_geopandas as dgpd
|
14
|
+
|
15
|
+
from typing import Union, Callable
|
16
|
+
from pathlib import Path, PurePath
|
17
|
+
from urllib.parse import urlparse
|
18
|
+
from tqdm import tqdm
|
19
|
+
from tqdm.dask import TqdmCallback
|
20
|
+
from multiprocessing.dummy import Pool
|
21
|
+
from shapely.geometry import GeometryCollection
|
22
|
+
|
23
|
+
import vector2dggs.constants as const
|
24
|
+
|
25
|
+
from . import katana
|
26
|
+
|
27
|
+
SQLConnectionType = Union[sqlalchemy.engine.Connection, sqlalchemy.engine.Engine]
|
28
|
+
|
29
|
+
|
30
|
+
LOGGER = logging.getLogger(__name__)
|
31
|
+
click_log.basic_config(LOGGER)
|
32
|
+
click_log.ColorFormatter.colors["info"] = dict(fg="green")
|
33
|
+
|
34
|
+
|
35
|
+
class ParentResolutionException(Exception):
|
36
|
+
pass
|
37
|
+
|
38
|
+
|
39
|
+
def check_resolutions(resolution: int, parent_res: int) -> None:
|
40
|
+
if parent_res is not None and not int(parent_res) < int(resolution):
|
41
|
+
raise ParentResolutionException(
|
42
|
+
"Parent resolution ({pr}) must be less than target resolution ({r})".format(
|
43
|
+
pr=parent_res, r=resolution
|
44
|
+
)
|
45
|
+
)
|
46
|
+
|
47
|
+
|
48
|
+
def db_conn_and_input_path(
|
49
|
+
vector_input: Union[str, Path],
|
50
|
+
) -> tuple[SQLConnectionType, Union[str, Path]]:
|
51
|
+
con: sqlalchemy.engine.Connection = None
|
52
|
+
scheme: str = urlparse(vector_input).scheme
|
53
|
+
|
54
|
+
if bool(scheme) and scheme != "file":
|
55
|
+
# Assume database connection
|
56
|
+
con = sqlalchemy.create_engine(vector_input)
|
57
|
+
|
58
|
+
elif not Path(vector_input).exists():
|
59
|
+
if not scheme:
|
60
|
+
LOGGER.error(
|
61
|
+
f"Input vector {vector_input} does not exist, and is not recognised as a remote URI"
|
62
|
+
)
|
63
|
+
raise FileNotFoundError(
|
64
|
+
errno.ENOENT, os.strerror(errno.ENOENT), vector_input
|
65
|
+
)
|
66
|
+
vector_input = str(vector_input)
|
67
|
+
|
68
|
+
else:
|
69
|
+
vector_input = Path(vector_input)
|
70
|
+
|
71
|
+
return (con, vector_input)
|
72
|
+
|
73
|
+
|
74
|
+
def resolve_output_path(
|
75
|
+
output_directory: Union[str, Path], overwrite: bool
|
76
|
+
) -> Union[str, Path]:
|
77
|
+
output_directory = Path(output_directory)
|
78
|
+
outputexists = os.path.exists(output_directory)
|
79
|
+
|
80
|
+
if outputexists and not overwrite:
|
81
|
+
raise FileExistsError(
|
82
|
+
f"{output_directory} already exists; if you want to overwrite this, use the -o/--overwrite flag"
|
83
|
+
)
|
84
|
+
|
85
|
+
elif outputexists and overwrite:
|
86
|
+
LOGGER.warning(f"Overwriting the contents of {output_directory}")
|
87
|
+
shutil.rmtree(output_directory)
|
88
|
+
|
89
|
+
output_directory.mkdir(parents=True, exist_ok=True)
|
90
|
+
|
91
|
+
return output_directory
|
92
|
+
|
93
|
+
|
94
|
+
def drop_condition(
|
95
|
+
df: pd.DataFrame,
|
96
|
+
drop_index: pd.Index,
|
97
|
+
log_statement: str,
|
98
|
+
warning_threshold: float = 0.01,
|
99
|
+
):
|
100
|
+
LOGGER.debug(log_statement)
|
101
|
+
_before = len(df)
|
102
|
+
df = df.drop(drop_index)
|
103
|
+
_after = len(df)
|
104
|
+
_diff = _before - _after
|
105
|
+
if _diff:
|
106
|
+
log_method = (
|
107
|
+
LOGGER.info if (_diff / float(_before)) < warning_threshold else LOGGER.warn
|
108
|
+
)
|
109
|
+
log_method(f"Dropped {_diff} rows ({_diff/float(_before)*100:.2f}%)")
|
110
|
+
return df
|
111
|
+
|
112
|
+
|
113
|
+
def get_parent_res(dggs: str, parent_res: Union[None, int], resolution: int):
|
114
|
+
"""
|
115
|
+
Uses a parent resolution,
|
116
|
+
OR,
|
117
|
+
Given a target resolution, returns our recommended parent resolution.
|
118
|
+
|
119
|
+
Used for intermediate re-partioning.
|
120
|
+
"""
|
121
|
+
if dggs == "h3":
|
122
|
+
return (
|
123
|
+
parent_res
|
124
|
+
if parent_res is not None
|
125
|
+
else max(const.MIN_H3, (resolution - const.DEFAULT_PARENT_OFFSET))
|
126
|
+
)
|
127
|
+
elif dggs == "rhp":
|
128
|
+
return (
|
129
|
+
parent_res
|
130
|
+
if parent_res is not None
|
131
|
+
else max(const.MIN_RHP, (resolution - const.DEFAULT_PARENT_OFFSET))
|
132
|
+
)
|
133
|
+
else:
|
134
|
+
raise RuntimeError(
|
135
|
+
"Unknown dggs {dggs}) - must be one of [ 'h3', 'rhp' ]".format(dggs=dggs)
|
136
|
+
)
|
137
|
+
|
138
|
+
|
139
|
+
def parent_partitioning(
|
140
|
+
dggs: str,
|
141
|
+
input_dir: Path,
|
142
|
+
output_dir: Path,
|
143
|
+
resolution: int,
|
144
|
+
parent_res: Union[None, int],
|
145
|
+
**kwargs,
|
146
|
+
) -> None:
|
147
|
+
parent_res: int = get_parent_res(dggs, parent_res, resolution)
|
148
|
+
partition_col = f"{dggs}_{parent_res:02}"
|
149
|
+
|
150
|
+
with TqdmCallback(desc="Repartitioning"):
|
151
|
+
dd.read_parquet(input_dir, engine="pyarrow").to_parquet(
|
152
|
+
output_dir,
|
153
|
+
overwrite=kwargs.get("overwrite", False),
|
154
|
+
engine=kwargs.get("engine", "pyarrow"),
|
155
|
+
partition_on=partition_col,
|
156
|
+
compression=kwargs.get("compression", "ZSTD"),
|
157
|
+
)
|
158
|
+
LOGGER.debug("Parent cell repartitioning complete")
|
159
|
+
|
160
|
+
# Rename output to just be the partition key, suffix .parquet
|
161
|
+
for f in os.listdir(output_dir):
|
162
|
+
os.rename(
|
163
|
+
os.path.join(output_dir, f),
|
164
|
+
os.path.join(output_dir, f.replace(f"{partition_col}=", "") + ".parquet"),
|
165
|
+
)
|
166
|
+
|
167
|
+
return
|
168
|
+
|
169
|
+
|
170
|
+
def polyfill(
|
171
|
+
dggs: str,
|
172
|
+
dggsfunc: Callable,
|
173
|
+
secondary_index_func: Callable,
|
174
|
+
pq_in: Path,
|
175
|
+
spatial_sort_col: str,
|
176
|
+
resolution: int,
|
177
|
+
parent_res: Union[None, int],
|
178
|
+
output_directory: str,
|
179
|
+
) -> None:
|
180
|
+
"""
|
181
|
+
Reads a geoparquet, performs polyfilling (for Polygon),
|
182
|
+
linetracing (for LineString), and writes out to parquet.
|
183
|
+
"""
|
184
|
+
df = gpd.read_parquet(pq_in).reset_index().drop(columns=[spatial_sort_col])
|
185
|
+
if len(df.index) == 0:
|
186
|
+
# Input is empty, nothing to polyfill
|
187
|
+
return None
|
188
|
+
|
189
|
+
# DGGS specific polyfill
|
190
|
+
df = dggsfunc(df, resolution)
|
191
|
+
|
192
|
+
if len(df.index) == 0:
|
193
|
+
# Polyfill resulted in empty output (e.g. large cell, small feature)
|
194
|
+
return None
|
195
|
+
|
196
|
+
df.index.rename(f"{dggs}_{resolution:02}", inplace=True)
|
197
|
+
parent_res: int = get_parent_res(dggs, parent_res, resolution)
|
198
|
+
# print(parent_res)
|
199
|
+
# print(df.index)
|
200
|
+
# print(df.columns)
|
201
|
+
|
202
|
+
# Secondary (parent) index, used later for partitioning
|
203
|
+
df = secondary_index_func(df, parent_res)
|
204
|
+
|
205
|
+
df.to_parquet(
|
206
|
+
PurePath(output_directory, pq_in.name), engine="auto", compression="ZSTD"
|
207
|
+
)
|
208
|
+
return None
|
209
|
+
|
210
|
+
|
211
|
+
def polyfill_star(args) -> None:
|
212
|
+
return polyfill(*args)
|
213
|
+
|
214
|
+
|
215
|
+
def index(
|
216
|
+
dggs: str,
|
217
|
+
dggsfunc: Callable,
|
218
|
+
secondary_index_func: Callable,
|
219
|
+
input_file: Union[Path, str],
|
220
|
+
output_directory: Union[Path, str],
|
221
|
+
resolution: int,
|
222
|
+
parent_res: Union[None, int],
|
223
|
+
keep_attributes: bool,
|
224
|
+
chunksize: int,
|
225
|
+
spatial_sorting: str,
|
226
|
+
cut_threshold: int,
|
227
|
+
processes: int,
|
228
|
+
id_field: str = None,
|
229
|
+
cut_crs: pyproj.CRS = None,
|
230
|
+
con: SQLConnectionType = None,
|
231
|
+
table: str = None,
|
232
|
+
geom_col: str = "geom",
|
233
|
+
overwrite: bool = False,
|
234
|
+
) -> Path:
|
235
|
+
"""
|
236
|
+
Performs multi-threaded polyfilling on (multi)polygons.
|
237
|
+
"""
|
238
|
+
|
239
|
+
if table and con:
|
240
|
+
# Database connection
|
241
|
+
if keep_attributes:
|
242
|
+
q = sqlalchemy.text(f"SELECT * FROM {table}")
|
243
|
+
elif id_field and not keep_attributes:
|
244
|
+
q = sqlalchemy.text(f"SELECT {id_field}, {geom_col} FROM {table}")
|
245
|
+
else:
|
246
|
+
q = sqlalchemy.text(f"SELECT {geom_col} FROM {table}")
|
247
|
+
df = gpd.read_postgis(q, con.connect(), geom_col=geom_col).rename_geometry(
|
248
|
+
"geometry"
|
249
|
+
)
|
250
|
+
else:
|
251
|
+
# Read file
|
252
|
+
df = gpd.read_file(input_file)
|
253
|
+
|
254
|
+
if cut_crs:
|
255
|
+
df = df.to_crs(cut_crs)
|
256
|
+
LOGGER.debug("Cutting with CRS: %s", df.crs)
|
257
|
+
|
258
|
+
if id_field:
|
259
|
+
df = df.set_index(id_field)
|
260
|
+
else:
|
261
|
+
df = df.reset_index()
|
262
|
+
df = df.rename(columns={"index": "fid"}).set_index("fid")
|
263
|
+
|
264
|
+
if not keep_attributes:
|
265
|
+
# Remove all attributes except the geometry
|
266
|
+
df = df.loc[:, ["geometry"]]
|
267
|
+
|
268
|
+
LOGGER.debug("Cutting large geometries")
|
269
|
+
with tqdm(total=df.shape[0], desc="Splitting") as pbar:
|
270
|
+
for index, row in df.iterrows():
|
271
|
+
df.loc[index, "geometry"] = GeometryCollection(
|
272
|
+
katana.katana(row.geometry, cut_threshold)
|
273
|
+
)
|
274
|
+
pbar.update(1)
|
275
|
+
|
276
|
+
LOGGER.debug("Exploding geometry collections and multipolygons")
|
277
|
+
df = (
|
278
|
+
df.to_crs(4326)
|
279
|
+
.explode(index_parts=False) # Explode from GeometryCollection
|
280
|
+
.explode(index_parts=False) # Explode multipolygons to polygons
|
281
|
+
).reset_index()
|
282
|
+
|
283
|
+
drop_conditions = [
|
284
|
+
{
|
285
|
+
"index": lambda frame: frame[
|
286
|
+
(frame.geometry.is_empty | frame.geometry.isna())
|
287
|
+
],
|
288
|
+
"message": "Considering empty or null geometries",
|
289
|
+
},
|
290
|
+
{
|
291
|
+
"index": lambda frame: frame[
|
292
|
+
(frame.geometry.geom_type != "Polygon")
|
293
|
+
& (frame.geometry.geom_type != "LineString")
|
294
|
+
], # NB currently points and other types are lost; in principle, these could be indexed
|
295
|
+
"message": "Considering unsupported geometries",
|
296
|
+
},
|
297
|
+
]
|
298
|
+
for condition in drop_conditions:
|
299
|
+
df = drop_condition(df, condition["index"](df).index, condition["message"])
|
300
|
+
|
301
|
+
ddf = dgpd.from_geopandas(df, chunksize=max(1, chunksize), sort=True)
|
302
|
+
|
303
|
+
LOGGER.debug("Spatially sorting and partitioning (%s)", spatial_sorting)
|
304
|
+
ddf = ddf.spatial_shuffle(by=spatial_sorting)
|
305
|
+
spatial_sort_col = (
|
306
|
+
spatial_sorting
|
307
|
+
if spatial_sorting == "geohash"
|
308
|
+
else f"{spatial_sorting}_distance"
|
309
|
+
)
|
310
|
+
|
311
|
+
with tempfile.TemporaryDirectory(suffix=".parquet") as tmpdir:
|
312
|
+
with TqdmCallback(desc=f"Spatially partitioning"):
|
313
|
+
ddf.to_parquet(tmpdir, overwrite=True)
|
314
|
+
|
315
|
+
filepaths = list(map(lambda f: f.absolute(), Path(tmpdir).glob("*")))
|
316
|
+
|
317
|
+
# Multithreaded polyfilling
|
318
|
+
LOGGER.debug(
|
319
|
+
"Indexing on spatial partitions by polyfill with resolution: %d",
|
320
|
+
resolution,
|
321
|
+
)
|
322
|
+
with tempfile.TemporaryDirectory(suffix=".parquet") as tmpdir2:
|
323
|
+
with Pool(processes=processes) as pool:
|
324
|
+
args = [
|
325
|
+
(
|
326
|
+
dggs,
|
327
|
+
dggsfunc,
|
328
|
+
secondary_index_func,
|
329
|
+
filepath,
|
330
|
+
spatial_sort_col,
|
331
|
+
resolution,
|
332
|
+
parent_res,
|
333
|
+
tmpdir2,
|
334
|
+
)
|
335
|
+
for filepath in filepaths
|
336
|
+
]
|
337
|
+
list(
|
338
|
+
tqdm(
|
339
|
+
pool.imap(polyfill_star, args),
|
340
|
+
total=len(args),
|
341
|
+
desc="DGGS indexing",
|
342
|
+
)
|
343
|
+
)
|
344
|
+
|
345
|
+
parent_partitioning(
|
346
|
+
dggs,
|
347
|
+
tmpdir2,
|
348
|
+
output_directory,
|
349
|
+
resolution,
|
350
|
+
parent_res,
|
351
|
+
overwrite=overwrite,
|
352
|
+
)
|
353
|
+
|
354
|
+
return output_directory
|
vector2dggs/constants.py
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
import multiprocessing
|
2
|
+
import warnings
|
3
|
+
import tempfile
|
4
|
+
|
5
|
+
|
6
|
+
MIN_H3, MAX_H3 = 0, 15
|
7
|
+
MIN_RHP, MAX_RHP = 0, 15
|
8
|
+
|
9
|
+
DEFAULTS = {
|
10
|
+
"id": None,
|
11
|
+
"k": False,
|
12
|
+
"ch": 50,
|
13
|
+
"s": "hilbert",
|
14
|
+
"crs": None,
|
15
|
+
"c": 5000,
|
16
|
+
"t": (multiprocessing.cpu_count() - 1),
|
17
|
+
"tbl": None,
|
18
|
+
"g": "geom",
|
19
|
+
"tempdir": tempfile.tempdir,
|
20
|
+
}
|
21
|
+
|
22
|
+
DEFAULT_PARENT_OFFSET = 6
|
23
|
+
|
24
|
+
warnings.filterwarnings(
|
25
|
+
"ignore"
|
26
|
+
) # This is to filter out the polyfill warnings when rows failed to get indexed at a resolution, can be commented out to find missing rows
|
vector2dggs/h3.py
CHANGED
@@ -1,82 +1,28 @@
|
|
1
|
-
import errno
|
2
|
-
import logging
|
3
|
-
import os
|
4
|
-
import multiprocessing
|
5
|
-
from multiprocessing.dummy import Pool
|
6
|
-
from pathlib import Path, PurePath
|
7
|
-
import shutil
|
8
1
|
import sys
|
9
|
-
import tempfile
|
10
|
-
from typing import Union
|
11
|
-
from urllib.parse import urlparse
|
12
|
-
import warnings
|
13
|
-
|
14
|
-
os.environ["USE_PYGEOS"] = "0"
|
15
|
-
|
16
2
|
import click
|
17
3
|
import click_log
|
18
|
-
import
|
19
|
-
import dask_geopandas as dgpd
|
20
|
-
import geopandas as gpd
|
21
|
-
import h3pandas
|
22
|
-
import pandas as pd
|
4
|
+
import tempfile
|
23
5
|
import pyproj
|
24
|
-
from shapely.geometry import GeometryCollection
|
25
|
-
import sqlalchemy
|
26
|
-
from tqdm import tqdm
|
27
|
-
from tqdm.dask import TqdmCallback
|
28
|
-
|
29
|
-
from . import katana
|
30
|
-
from vector2dggs import __version__
|
31
6
|
|
32
|
-
|
33
|
-
click_log.basic_config(LOGGER)
|
34
|
-
MIN_H3, MAX_H3 = 0, 15
|
35
|
-
|
36
|
-
warnings.filterwarnings(
|
37
|
-
"ignore"
|
38
|
-
) # This is to filter out the polyfill warnings when rows failed to get indexed at a resolution, can be commented out to find missing rows
|
39
|
-
|
40
|
-
|
41
|
-
DEFAULT_PARENT_OFFSET = 6
|
42
|
-
DEFAULT_CHUNK_SIZE = 50
|
7
|
+
import h3pandas # Necessary import despite lack of explicit use
|
43
8
|
|
9
|
+
import pandas as pd
|
10
|
+
import geopandas as gpd
|
44
11
|
|
45
|
-
|
46
|
-
|
12
|
+
from typing import Union
|
13
|
+
from pathlib import Path
|
47
14
|
|
15
|
+
import vector2dggs.constants as const
|
16
|
+
import vector2dggs.common as common
|
48
17
|
|
49
|
-
|
50
|
-
"""
|
51
|
-
Uses a parent resolution,
|
52
|
-
OR,
|
53
|
-
Given a target resolution, returns our recommended parent resolution.
|
18
|
+
from vector2dggs import __version__
|
54
19
|
|
55
|
-
Used for intermediate re-partioning.
|
56
|
-
"""
|
57
|
-
return (
|
58
|
-
int(parent_res)
|
59
|
-
if parent_res is not None
|
60
|
-
else max(MIN_H3, (resolution - DEFAULT_PARENT_OFFSET))
|
61
|
-
)
|
62
20
|
|
21
|
+
def h3_secondary_index(df: gpd.GeoDataFrame, parent_res: int) -> gpd.GeoDataFrame:
|
22
|
+
return df.h3.h3_to_parent(parent_res)
|
63
23
|
|
64
|
-
def polyfill(
|
65
|
-
pq_in: Path,
|
66
|
-
spatial_sort_col: str,
|
67
|
-
resolution: int,
|
68
|
-
parent_res: Union[None, int],
|
69
|
-
output_directory: str,
|
70
|
-
) -> None:
|
71
|
-
"""
|
72
|
-
Reads a geoparquet, performs H3 polyfilling (for polygons),
|
73
|
-
linetracing (for linestrings), and writes out to parquet.
|
74
|
-
"""
|
75
|
-
df = gpd.read_parquet(pq_in).reset_index().drop(columns=[spatial_sort_col])
|
76
|
-
if len(df.index) == 0:
|
77
|
-
# Input is empty, nothing to polyfill
|
78
|
-
return None
|
79
24
|
|
25
|
+
def h3polyfill(df: gpd.GeoDataFrame, resolution: int):
|
80
26
|
df_polygon = df[df.geom_type == "Polygon"]
|
81
27
|
if len(df_polygon.index) > 0:
|
82
28
|
df_polygon = df_polygon.h3.polyfill_resample(
|
@@ -92,207 +38,23 @@ def polyfill(
|
|
92
38
|
)
|
93
39
|
df_linestring = df_linestring[~df_linestring.index.duplicated(keep="first")]
|
94
40
|
|
95
|
-
|
41
|
+
return pd.concat(
|
96
42
|
map(
|
97
43
|
lambda _df: pd.DataFrame(_df.drop(columns=[_df.geometry.name])),
|
98
44
|
[df_polygon, df_linestring],
|
99
45
|
)
|
100
46
|
)
|
101
47
|
|
102
|
-
if len(df.index) == 0:
|
103
|
-
# Polyfill resulted in empty output (e.g. large cell, small feature)
|
104
|
-
return None
|
105
|
-
|
106
|
-
df.index.rename(f"h3_{resolution:02}", inplace=True)
|
107
|
-
parent_res: int = _get_parent_res(parent_res, resolution)
|
108
|
-
# Secondary (parent) H3 index, used later for partitioning
|
109
|
-
df.h3.h3_to_parent(parent_res).to_parquet(
|
110
|
-
PurePath(output_directory, pq_in.name), engine="auto", compression="ZSTD"
|
111
|
-
)
|
112
|
-
return None
|
113
|
-
|
114
|
-
|
115
|
-
def polyfill_star(args) -> None:
|
116
|
-
return polyfill(*args)
|
117
|
-
|
118
|
-
|
119
|
-
def _parent_partitioning(
|
120
|
-
input_dir: Path,
|
121
|
-
output_dir: Path,
|
122
|
-
resolution: int,
|
123
|
-
parent_res: Union[None, int],
|
124
|
-
**kwargs,
|
125
|
-
) -> None:
|
126
|
-
parent_res: int = _get_parent_res(parent_res, resolution)
|
127
|
-
partition_col = f"h3_{parent_res:02}"
|
128
|
-
|
129
|
-
with TqdmCallback(desc="Repartitioning"):
|
130
|
-
dd.read_parquet(input_dir, engine="pyarrow").to_parquet(
|
131
|
-
output_dir,
|
132
|
-
overwrite=kwargs.get("overwrite", False),
|
133
|
-
engine=kwargs.get("engine", "pyarrow"),
|
134
|
-
partition_on=partition_col,
|
135
|
-
compression=kwargs.get("compression", "ZSTD"),
|
136
|
-
)
|
137
|
-
LOGGER.debug("Parent cell repartitioning complete")
|
138
|
-
|
139
|
-
# Rename output to just be the partition key, suffix .parquet
|
140
|
-
for f in os.listdir(output_dir):
|
141
|
-
os.rename(
|
142
|
-
os.path.join(output_dir, f),
|
143
|
-
os.path.join(output_dir, f.replace(f"{partition_col}=", "") + ".parquet"),
|
144
|
-
)
|
145
|
-
|
146
|
-
return
|
147
|
-
|
148
|
-
|
149
|
-
def drop_condition(
|
150
|
-
df: pd.DataFrame,
|
151
|
-
drop_index: pd.Index,
|
152
|
-
log_statement: str,
|
153
|
-
warning_threshold: float = 0.01,
|
154
|
-
):
|
155
|
-
LOGGER.info(log_statement)
|
156
|
-
_before = len(df)
|
157
|
-
df = df.drop(drop_index)
|
158
|
-
_after = len(df)
|
159
|
-
_diff = _before - _after
|
160
|
-
if _diff:
|
161
|
-
log_method = (
|
162
|
-
LOGGER.info if (_diff / float(_before)) < warning_threshold else LOGGER.warn
|
163
|
-
)
|
164
|
-
log_method(f"Dropped {_diff} rows ({_diff/float(_before)*100:.2f}%)")
|
165
|
-
return df
|
166
|
-
|
167
|
-
|
168
|
-
def _index(
|
169
|
-
input_file: Union[Path, str],
|
170
|
-
output_directory: Union[Path, str],
|
171
|
-
resolution: int,
|
172
|
-
parent_res: Union[None, int],
|
173
|
-
keep_attributes: bool,
|
174
|
-
chunksize: int,
|
175
|
-
spatial_sorting: str,
|
176
|
-
cut_threshold: int,
|
177
|
-
processes: int,
|
178
|
-
id_field: str = None,
|
179
|
-
cut_crs: pyproj.CRS = None,
|
180
|
-
con: Union[sqlalchemy.engine.Connection, sqlalchemy.engine.Engine] = None,
|
181
|
-
table: str = None,
|
182
|
-
geom_col: str = "geom",
|
183
|
-
overwrite: bool = False,
|
184
|
-
) -> Path:
|
185
|
-
"""
|
186
|
-
Performs multi-threaded H3 polyfilling on (multi)polygons.
|
187
|
-
"""
|
188
|
-
|
189
|
-
if table and con:
|
190
|
-
# Database connection
|
191
|
-
if keep_attributes:
|
192
|
-
q = sqlalchemy.text(f"SELECT * FROM {table}")
|
193
|
-
elif id_field and not keep_attributes:
|
194
|
-
q = sqlalchemy.text(f"SELECT {id_field}, {geom_col} FROM {table}")
|
195
|
-
else:
|
196
|
-
q = sqlalchemy.text(f"SELECT {geom_col} FROM {table}")
|
197
|
-
df = gpd.read_postgis(q, con.connect(), geom_col=geom_col).rename_geometry(
|
198
|
-
"geometry"
|
199
|
-
)
|
200
|
-
else:
|
201
|
-
# Read file
|
202
|
-
df = gpd.read_file(input_file)
|
203
|
-
|
204
|
-
if cut_crs:
|
205
|
-
df = df.to_crs(cut_crs)
|
206
|
-
LOGGER.info("Cutting with CRS: %s", df.crs)
|
207
|
-
|
208
|
-
if id_field:
|
209
|
-
df = df.set_index(id_field)
|
210
|
-
else:
|
211
|
-
df = df.reset_index()
|
212
|
-
df = df.rename(columns={"index": "fid"}).set_index("fid")
|
213
|
-
|
214
|
-
if not keep_attributes:
|
215
|
-
# Remove all attributes except the geometry
|
216
|
-
df = df.loc[:, ["geometry"]]
|
217
|
-
|
218
|
-
LOGGER.info("Watch out for ninjas! (Cutting polygons)")
|
219
|
-
with tqdm(total=df.shape[0]) as pbar:
|
220
|
-
for index, row in df.iterrows():
|
221
|
-
df.loc[index, "geometry"] = GeometryCollection(
|
222
|
-
katana.katana(row.geometry, cut_threshold)
|
223
|
-
)
|
224
|
-
pbar.update(1)
|
225
|
-
|
226
|
-
LOGGER.info("Exploding geometry collections and multipolygons")
|
227
|
-
df = (
|
228
|
-
df.to_crs(4326)
|
229
|
-
.explode(index_parts=False) # Explode from GeometryCollection
|
230
|
-
.explode(index_parts=False) # Explode multipolygons to polygons
|
231
|
-
).reset_index()
|
232
|
-
|
233
|
-
drop_conditions = [
|
234
|
-
{
|
235
|
-
"index": lambda frame: frame[
|
236
|
-
(frame.geometry.is_empty | frame.geometry.isna())
|
237
|
-
],
|
238
|
-
"message": "Dropping empty or null geometries",
|
239
|
-
},
|
240
|
-
{
|
241
|
-
"index": lambda frame: frame[
|
242
|
-
(frame.geometry.geom_type != "Polygon")
|
243
|
-
& (frame.geometry.geom_type != "LineString")
|
244
|
-
], # NB currently points and other types are lost; in principle, these could be indexed
|
245
|
-
"message": "Dropping non-polygonal geometries",
|
246
|
-
},
|
247
|
-
]
|
248
|
-
for condition in drop_conditions:
|
249
|
-
df = drop_condition(df, condition["index"](df).index, condition["message"])
|
250
|
-
|
251
|
-
ddf = dgpd.from_geopandas(df, chunksize=max(1, chunksize), sort=True)
|
252
|
-
|
253
|
-
LOGGER.info("Spatially sorting and partitioning (%s)", spatial_sorting)
|
254
|
-
ddf = ddf.spatial_shuffle(by=spatial_sorting)
|
255
|
-
spatial_sort_col = (
|
256
|
-
spatial_sorting
|
257
|
-
if spatial_sorting == "geohash"
|
258
|
-
else f"{spatial_sorting}_distance"
|
259
|
-
)
|
260
|
-
|
261
|
-
with tempfile.TemporaryDirectory(suffix=".parquet") as tmpdir:
|
262
|
-
with TqdmCallback():
|
263
|
-
ddf.to_parquet(tmpdir, overwrite=True)
|
264
|
-
|
265
|
-
filepaths = list(map(lambda f: f.absolute(), Path(tmpdir).glob("*")))
|
266
|
-
|
267
|
-
# Multithreaded polyfilling
|
268
|
-
LOGGER.info(
|
269
|
-
"H3 Indexing on spatial partitions by polyfill with H3 resolution: %d",
|
270
|
-
resolution,
|
271
|
-
)
|
272
|
-
with tempfile.TemporaryDirectory(suffix=".parquet") as tmpdir2:
|
273
|
-
with Pool(processes=processes) as pool:
|
274
|
-
args = [
|
275
|
-
(filepath, spatial_sort_col, resolution, parent_res, tmpdir2)
|
276
|
-
for filepath in filepaths
|
277
|
-
]
|
278
|
-
list(tqdm(pool.imap(polyfill_star, args), total=len(args)))
|
279
|
-
|
280
|
-
_parent_partitioning(
|
281
|
-
tmpdir2, output_directory, resolution, parent_res, overwrite=overwrite
|
282
|
-
)
|
283
|
-
|
284
|
-
return output_directory
|
285
|
-
|
286
48
|
|
287
49
|
@click.command(context_settings={"show_default": True})
|
288
|
-
@click_log.simple_verbosity_option(LOGGER)
|
50
|
+
@click_log.simple_verbosity_option(common.LOGGER)
|
289
51
|
@click.argument("vector_input", required=True, type=click.Path(), nargs=1)
|
290
52
|
@click.argument("output_directory", required=True, type=click.Path(), nargs=1)
|
291
53
|
@click.option(
|
292
54
|
"-r",
|
293
55
|
"--resolution",
|
294
56
|
required=True,
|
295
|
-
type=click.Choice(list(map(str, range(MIN_H3, MAX_H3 + 1)))),
|
57
|
+
type=click.Choice(list(map(str, range(const.MIN_H3, const.MAX_H3 + 1)))),
|
296
58
|
help="H3 resolution to index",
|
297
59
|
nargs=1,
|
298
60
|
)
|
@@ -300,14 +62,14 @@ def _index(
|
|
300
62
|
"-pr",
|
301
63
|
"--parent_res",
|
302
64
|
required=False,
|
303
|
-
type=click.Choice(list(map(str, range(MIN_H3, MAX_H3 + 1)))),
|
65
|
+
type=click.Choice(list(map(str, range(const.MIN_H3, const.MAX_H3 + 1)))),
|
304
66
|
help="H3 Parent resolution for the output partition. Defaults to resolution - 6",
|
305
67
|
)
|
306
68
|
@click.option(
|
307
69
|
"-id",
|
308
70
|
"--id_field",
|
309
71
|
required=False,
|
310
|
-
default=
|
72
|
+
default=const.DEFAULTS["id"],
|
311
73
|
type=str,
|
312
74
|
help="Field to use as an ID; defaults to a constructed single 0...n index on the original feature order.",
|
313
75
|
nargs=1,
|
@@ -317,7 +79,7 @@ def _index(
|
|
317
79
|
"--keep_attributes",
|
318
80
|
is_flag=True,
|
319
81
|
show_default=True,
|
320
|
-
default=
|
82
|
+
default=const.DEFAULTS["k"],
|
321
83
|
help="Retain attributes in output. The default is to create an output that only includes H3 cell ID and the ID given by the -id field (or the default index ID).",
|
322
84
|
)
|
323
85
|
@click.option(
|
@@ -325,7 +87,7 @@ def _index(
|
|
325
87
|
"--chunksize",
|
326
88
|
required=True,
|
327
89
|
type=int,
|
328
|
-
default=
|
90
|
+
default=const.DEFAULTS["ch"],
|
329
91
|
help="The number of rows per index partition to use when spatially partioning. Adjusting this number will trade off memory use and time.",
|
330
92
|
nargs=1,
|
331
93
|
)
|
@@ -333,32 +95,32 @@ def _index(
|
|
333
95
|
"-s",
|
334
96
|
"--spatial_sorting",
|
335
97
|
type=click.Choice(["hilbert", "morton", "geohash"]),
|
336
|
-
default="
|
98
|
+
default=const.DEFAULTS["s"],
|
337
99
|
help="Spatial sorting method when perfoming spatial partitioning.",
|
338
100
|
)
|
339
101
|
@click.option(
|
340
102
|
"-crs",
|
341
103
|
"--cut_crs",
|
342
104
|
required=False,
|
343
|
-
default=
|
105
|
+
default=const.DEFAULTS["crs"],
|
344
106
|
type=int,
|
345
|
-
help="Set the coordinate reference system (CRS) used for cutting large
|
107
|
+
help="Set the coordinate reference system (CRS) used for cutting large geometries (see `--cur-threshold`). Defaults to the same CRS as the input. Should be a valid EPSG code.",
|
346
108
|
nargs=1,
|
347
109
|
)
|
348
110
|
@click.option(
|
349
111
|
"-c",
|
350
112
|
"--cut_threshold",
|
351
113
|
required=True,
|
352
|
-
default=
|
114
|
+
default=const.DEFAULTS["c"],
|
353
115
|
type=int,
|
354
|
-
help="Cutting up large
|
116
|
+
help="Cutting up large geometries into smaller geometries based on a target length. Units are assumed to match the input CRS units unless the `--cut_crs` is also given, in which case units match the units of the supplied CRS.",
|
355
117
|
nargs=1,
|
356
118
|
)
|
357
119
|
@click.option(
|
358
120
|
"-t",
|
359
121
|
"--threads",
|
360
122
|
required=False,
|
361
|
-
default=
|
123
|
+
default=const.DEFAULTS["t"],
|
362
124
|
type=int,
|
363
125
|
help="Amount of threads used for operation",
|
364
126
|
nargs=1,
|
@@ -367,7 +129,7 @@ def _index(
|
|
367
129
|
"-tbl",
|
368
130
|
"--table",
|
369
131
|
required=False,
|
370
|
-
default=
|
132
|
+
default=const.DEFAULTS["tbl"],
|
371
133
|
type=str,
|
372
134
|
help="Name of the table to read when using a spatial database connection as input",
|
373
135
|
nargs=1,
|
@@ -376,14 +138,14 @@ def _index(
|
|
376
138
|
"-g",
|
377
139
|
"--geom_col",
|
378
140
|
required=False,
|
379
|
-
default="
|
141
|
+
default=const.DEFAULTS["g"],
|
380
142
|
type=str,
|
381
143
|
help="Column name to use when using a spatial database connection as input",
|
382
144
|
nargs=1,
|
383
145
|
)
|
384
146
|
@click.option(
|
385
147
|
"--tempdir",
|
386
|
-
default=
|
148
|
+
default=const.DEFAULTS["tempdir"],
|
387
149
|
type=click.Path(),
|
388
150
|
help="Temporary data is created during the execution of this program. This parameter allows you to control where this data will be written.",
|
389
151
|
)
|
@@ -412,46 +174,21 @@ def h3(
|
|
412
174
|
VECTOR_INPUT is the path to input vector geospatial data.
|
413
175
|
OUTPUT_DIRECTORY should be a directory, not a file or database table, as it will instead be the write location for an Apache Parquet data store.
|
414
176
|
"""
|
415
|
-
tempfile.tempdir = tempdir
|
416
|
-
if parent_res is not None and not int(parent_res) < int(resolution):
|
417
|
-
raise ParentResolutionException(
|
418
|
-
"Parent resolution ({pr}) must be less than target resolution ({r})".format(
|
419
|
-
pr=parent_res, r=resolution
|
420
|
-
)
|
421
|
-
)
|
422
|
-
con: sqlalchemy.engine.Connection = None
|
423
|
-
scheme: str = urlparse(vector_input).scheme
|
424
|
-
if bool(scheme) and scheme != "file":
|
425
|
-
# Assume database connection
|
426
|
-
con = sqlalchemy.create_engine(vector_input)
|
427
|
-
elif not Path(vector_input).exists():
|
428
|
-
if not scheme:
|
429
|
-
LOGGER.warning(
|
430
|
-
f"Input vector {vector_input} does not exist, and is not recognised as a remote URI"
|
431
|
-
)
|
432
|
-
raise FileNotFoundError(
|
433
|
-
errno.ENOENT, os.strerror(errno.ENOENT), vector_input
|
434
|
-
)
|
435
|
-
vector_input = str(vector_input)
|
436
|
-
else:
|
437
|
-
vector_input = Path(vector_input)
|
177
|
+
tempfile.tempdir = tempdir if tempdir is not None else tempfile.tempdir
|
438
178
|
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
f"{output_directory} already exists; if you want to overwrite this, use the -o/--overwrite flag"
|
444
|
-
)
|
445
|
-
elif outputexists and overwrite:
|
446
|
-
LOGGER.info(f"Overwriting the contents of {output_directory}")
|
447
|
-
shutil.rmtree(output_directory)
|
448
|
-
output_directory.mkdir(parents=True, exist_ok=True)
|
179
|
+
common.check_resolutions(resolution, parent_res)
|
180
|
+
|
181
|
+
con, vector_input = common.db_conn_and_input_path(vector_input)
|
182
|
+
output_directory = common.resolve_output_path(output_directory, overwrite)
|
449
183
|
|
450
184
|
if cut_crs is not None:
|
451
185
|
cut_crs = pyproj.CRS.from_user_input(cut_crs)
|
452
186
|
|
453
187
|
try:
|
454
|
-
|
188
|
+
common.index(
|
189
|
+
"h3",
|
190
|
+
h3polyfill,
|
191
|
+
h3_secondary_index,
|
455
192
|
vector_input,
|
456
193
|
output_directory,
|
457
194
|
int(resolution),
|
vector2dggs/katana.py
CHANGED
@@ -11,13 +11,20 @@ Redistribution and use in source and binary forms, with or without modification,
|
|
11
11
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
12
12
|
"""
|
13
13
|
|
14
|
-
from shapely.geometry import
|
14
|
+
from shapely.geometry import (
|
15
|
+
box,
|
16
|
+
Polygon,
|
17
|
+
MultiPolygon,
|
18
|
+
LineString,
|
19
|
+
MultiLineString,
|
20
|
+
GeometryCollection,
|
21
|
+
)
|
15
22
|
from shapely.validation import explain_validity, make_valid
|
16
23
|
|
17
24
|
|
18
25
|
def katana(geometry, threshold, count=0) -> GeometryCollection:
|
19
26
|
"""
|
20
|
-
Split a
|
27
|
+
Split a geometry into two parts across its shortest dimension.
|
21
28
|
Invalid input `geometry` will silently be made valid (if possible).
|
22
29
|
"""
|
23
30
|
if geometry is None:
|
@@ -53,7 +60,7 @@ def katana(geometry, threshold, count=0) -> GeometryCollection:
|
|
53
60
|
if not isinstance(c, GeometryCollection):
|
54
61
|
c = GeometryCollection([c])
|
55
62
|
for e in c.geoms:
|
56
|
-
if isinstance(e, (Polygon, MultiPolygon)):
|
63
|
+
if isinstance(e, (Polygon, MultiPolygon, LineString, MultiLineString)):
|
57
64
|
result.extend(katana(e, threshold, count + 1))
|
58
65
|
if count > 0:
|
59
66
|
return result
|
vector2dggs/rHP.py
ADDED
@@ -0,0 +1,217 @@
|
|
1
|
+
import sys
|
2
|
+
import click
|
3
|
+
import click_log
|
4
|
+
import tempfile
|
5
|
+
import pyproj
|
6
|
+
|
7
|
+
import rhppandas # Necessary import despite lack of explicit use
|
8
|
+
|
9
|
+
import pandas as pd
|
10
|
+
import geopandas as gpd
|
11
|
+
|
12
|
+
from typing import Union
|
13
|
+
from pathlib import Path
|
14
|
+
|
15
|
+
import vector2dggs.constants as const
|
16
|
+
import vector2dggs.common as common
|
17
|
+
|
18
|
+
from vector2dggs import __version__
|
19
|
+
|
20
|
+
|
21
|
+
def rhp_secondary_index(df: gpd.GeoDataFrame, parent_res: int) -> gpd.GeoDataFrame:
|
22
|
+
return df.rhp.rhp_to_parent(parent_res)
|
23
|
+
|
24
|
+
|
25
|
+
def rhppolyfill(df: gpd.GeoDataFrame, resolution: int):
|
26
|
+
df_polygon = df[df.geom_type == "Polygon"]
|
27
|
+
if len(df_polygon.index) > 0:
|
28
|
+
df_polygon = df_polygon.rhp.polyfill_resample(
|
29
|
+
resolution, return_geometry=False
|
30
|
+
).drop(columns=["index"])
|
31
|
+
|
32
|
+
df_multipolygon = df[df.geom_type == "MultiPolygon"]
|
33
|
+
if len(df_multipolygon.index) > 0:
|
34
|
+
df_multipolygon = df_multipolygon.rhp.polyfill_resample(
|
35
|
+
resolution, return_geometry=False
|
36
|
+
).drop(columns=["index"])
|
37
|
+
|
38
|
+
# df_linestring = df[df.geom_type == "LineString"]
|
39
|
+
# if len(df_linestring.index) > 0:
|
40
|
+
# df_linestring = (
|
41
|
+
# df_linestring.h3.linetrace(resolution)
|
42
|
+
# .explode("h3_linetrace")
|
43
|
+
# .set_index("h3_linetrace")
|
44
|
+
# )
|
45
|
+
# df_linestring = df_linestring[~df_linestring.index.duplicated(keep="first")]
|
46
|
+
|
47
|
+
return pd.concat(
|
48
|
+
map(
|
49
|
+
lambda _df: pd.DataFrame(_df.drop(columns=[_df.geometry.name])),
|
50
|
+
[df_polygon, df_multipolygon], # df_linestring],
|
51
|
+
)
|
52
|
+
)
|
53
|
+
|
54
|
+
|
55
|
+
@click.command(context_settings={"show_default": True})
|
56
|
+
@click_log.simple_verbosity_option(common.LOGGER)
|
57
|
+
@click.argument("vector_input", required=True, type=click.Path(), nargs=1)
|
58
|
+
@click.argument("output_directory", required=True, type=click.Path(), nargs=1)
|
59
|
+
@click.option(
|
60
|
+
"-r",
|
61
|
+
"--resolution",
|
62
|
+
required=True,
|
63
|
+
type=click.Choice(list(map(str, range(const.MIN_RHP, const.MAX_RHP + 1)))),
|
64
|
+
help="H3 resolution to index",
|
65
|
+
nargs=1,
|
66
|
+
)
|
67
|
+
@click.option(
|
68
|
+
"-pr",
|
69
|
+
"--parent_res",
|
70
|
+
required=False,
|
71
|
+
type=click.Choice(list(map(str, range(const.MIN_RHP, const.MAX_RHP + 1)))),
|
72
|
+
help="H3 Parent resolution for the output partition. Defaults to resolution - 6",
|
73
|
+
)
|
74
|
+
@click.option(
|
75
|
+
"-id",
|
76
|
+
"--id_field",
|
77
|
+
required=False,
|
78
|
+
default=const.DEFAULTS["id"],
|
79
|
+
type=str,
|
80
|
+
help="Field to use as an ID; defaults to a constructed single 0...n index on the original feature order.",
|
81
|
+
nargs=1,
|
82
|
+
)
|
83
|
+
@click.option(
|
84
|
+
"-k",
|
85
|
+
"--keep_attributes",
|
86
|
+
is_flag=True,
|
87
|
+
show_default=True,
|
88
|
+
default=const.DEFAULTS["k"],
|
89
|
+
help="Retain attributes in output. The default is to create an output that only includes rHEALPix cell ID and the ID given by the -id field (or the default index ID).",
|
90
|
+
)
|
91
|
+
@click.option(
|
92
|
+
"-ch",
|
93
|
+
"--chunksize",
|
94
|
+
required=True,
|
95
|
+
type=int,
|
96
|
+
default=const.DEFAULTS["ch"],
|
97
|
+
help="The number of rows per index partition to use when spatially partioning. Adjusting this number will trade off memory use and time.",
|
98
|
+
nargs=1,
|
99
|
+
)
|
100
|
+
@click.option(
|
101
|
+
"-s",
|
102
|
+
"--spatial_sorting",
|
103
|
+
type=click.Choice(["hilbert", "morton", "geohash"]),
|
104
|
+
default=const.DEFAULTS["s"],
|
105
|
+
help="Spatial sorting method when perfoming spatial partitioning.",
|
106
|
+
)
|
107
|
+
@click.option(
|
108
|
+
"-crs",
|
109
|
+
"--cut_crs",
|
110
|
+
required=False,
|
111
|
+
default=const.DEFAULTS["crs"],
|
112
|
+
type=int,
|
113
|
+
help="Set the coordinate reference system (CRS) used for cutting large geometries (see `--cur-threshold`). Defaults to the same CRS as the input. Should be a valid EPSG code.",
|
114
|
+
nargs=1,
|
115
|
+
)
|
116
|
+
@click.option(
|
117
|
+
"-c",
|
118
|
+
"--cut_threshold",
|
119
|
+
required=True,
|
120
|
+
default=const.DEFAULTS["c"],
|
121
|
+
type=int,
|
122
|
+
help="Cutting up large geometries into smaller geometries based on a target length. Units are assumed to match the input CRS units unless the `--cut_crs` is also given, in which case units match the units of the supplied CRS.",
|
123
|
+
nargs=1,
|
124
|
+
)
|
125
|
+
@click.option(
|
126
|
+
"-t",
|
127
|
+
"--threads",
|
128
|
+
required=False,
|
129
|
+
default=const.DEFAULTS["t"],
|
130
|
+
type=int,
|
131
|
+
help="Amount of threads used for operation",
|
132
|
+
nargs=1,
|
133
|
+
)
|
134
|
+
@click.option(
|
135
|
+
"-tbl",
|
136
|
+
"--table",
|
137
|
+
required=False,
|
138
|
+
default=const.DEFAULTS["tbl"],
|
139
|
+
type=str,
|
140
|
+
help="Name of the table to read when using a spatial database connection as input",
|
141
|
+
nargs=1,
|
142
|
+
)
|
143
|
+
@click.option(
|
144
|
+
"-g",
|
145
|
+
"--geom_col",
|
146
|
+
required=False,
|
147
|
+
default=const.DEFAULTS["g"],
|
148
|
+
type=str,
|
149
|
+
help="Column name to use when using a spatial database connection as input",
|
150
|
+
nargs=1,
|
151
|
+
)
|
152
|
+
@click.option(
|
153
|
+
"--tempdir",
|
154
|
+
default=const.DEFAULTS["tempdir"],
|
155
|
+
type=click.Path(),
|
156
|
+
help="Temporary data is created during the execution of this program. This parameter allows you to control where this data will be written.",
|
157
|
+
)
|
158
|
+
@click.option("-o", "--overwrite", is_flag=True)
|
159
|
+
@click.version_option(version=__version__)
|
160
|
+
def rhp(
|
161
|
+
vector_input: Union[str, Path],
|
162
|
+
output_directory: Union[str, Path],
|
163
|
+
resolution: str,
|
164
|
+
parent_res: str,
|
165
|
+
id_field: str,
|
166
|
+
keep_attributes: bool,
|
167
|
+
chunksize: int,
|
168
|
+
spatial_sorting: str,
|
169
|
+
cut_crs: int,
|
170
|
+
cut_threshold: int,
|
171
|
+
threads: int,
|
172
|
+
table: str,
|
173
|
+
geom_col: str,
|
174
|
+
tempdir: Union[str, Path],
|
175
|
+
overwrite: bool,
|
176
|
+
):
|
177
|
+
"""
|
178
|
+
Ingest a vector dataset and index it to the rHEALPix DGGS.
|
179
|
+
|
180
|
+
VECTOR_INPUT is the path to input vector geospatial data.
|
181
|
+
OUTPUT_DIRECTORY should be a directory, not a file or database table, as it will instead be the write location for an Apache Parquet data store.
|
182
|
+
"""
|
183
|
+
tempfile.tempdir = tempdir if tempdir is not None else tempfile.tempdir
|
184
|
+
|
185
|
+
common.check_resolutions(resolution, parent_res)
|
186
|
+
|
187
|
+
con, vector_input = common.db_conn_and_input_path(vector_input)
|
188
|
+
output_directory = common.resolve_output_path(output_directory, overwrite)
|
189
|
+
|
190
|
+
if cut_crs is not None:
|
191
|
+
cut_crs = pyproj.CRS.from_user_input(cut_crs)
|
192
|
+
|
193
|
+
try:
|
194
|
+
common.index(
|
195
|
+
"rhp",
|
196
|
+
rhppolyfill,
|
197
|
+
rhp_secondary_index,
|
198
|
+
vector_input,
|
199
|
+
output_directory,
|
200
|
+
int(resolution),
|
201
|
+
parent_res,
|
202
|
+
keep_attributes,
|
203
|
+
chunksize,
|
204
|
+
spatial_sorting,
|
205
|
+
cut_threshold,
|
206
|
+
threads,
|
207
|
+
cut_crs=cut_crs,
|
208
|
+
id_field=id_field,
|
209
|
+
con=con,
|
210
|
+
table=table,
|
211
|
+
geom_col=geom_col,
|
212
|
+
overwrite=overwrite,
|
213
|
+
)
|
214
|
+
except:
|
215
|
+
raise
|
216
|
+
else:
|
217
|
+
sys.exit(0)
|
@@ -1,35 +1,39 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vector2dggs
|
3
|
-
Version: 0.6.
|
3
|
+
Version: 0.6.2
|
4
4
|
Summary: CLI DGGS indexer for vector geospatial data
|
5
5
|
Home-page: https://github.com/manaakiwhenua/vector2dggs
|
6
6
|
License: LGPL-3.0-or-later
|
7
|
-
Keywords: dggs,vector,h3,cli
|
7
|
+
Keywords: dggs,vector,h3,rHEALPix,cli
|
8
8
|
Author: James Ardo
|
9
9
|
Author-email: ardoj@landcareresearch.co.nz
|
10
10
|
Maintainer: Richard Law
|
11
11
|
Maintainer-email: lawr@landcareresearch.co.nz
|
12
|
-
Requires-Python: >=3.
|
12
|
+
Requires-Python: >=3.11,<4.0
|
13
13
|
Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)
|
14
14
|
Classifier: Programming Language :: Python :: 3
|
15
|
-
Classifier: Programming Language :: Python :: 3.10
|
16
15
|
Classifier: Programming Language :: Python :: 3.11
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
17
17
|
Classifier: Topic :: Scientific/Engineering
|
18
18
|
Classifier: Topic :: Scientific/Engineering :: GIS
|
19
19
|
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
20
|
-
Requires-Dist: click (>=8.1.
|
20
|
+
Requires-Dist: click (>=8.1.7,<9.0.0)
|
21
21
|
Requires-Dist: click-log (>=0.4.0,<0.5.0)
|
22
|
-
Requires-Dist: dask (>=
|
23
|
-
Requires-Dist: dask-geopandas (>=0.
|
24
|
-
Requires-Dist: gdal (>=3.8
|
25
|
-
Requires-Dist: geopandas (>=0.
|
26
|
-
Requires-Dist: h3pandas (>=0.
|
27
|
-
Requires-Dist:
|
28
|
-
Requires-Dist:
|
29
|
-
Requires-Dist:
|
30
|
-
Requires-Dist:
|
31
|
-
Requires-Dist:
|
32
|
-
Requires-Dist:
|
22
|
+
Requires-Dist: dask (>=2025.1,<2026.0)
|
23
|
+
Requires-Dist: dask-geopandas (>=0.4,<0.5)
|
24
|
+
Requires-Dist: gdal (>=3.8,<4.0)
|
25
|
+
Requires-Dist: geopandas (>=1.0.1,<2.0.0)
|
26
|
+
Requires-Dist: h3pandas (>=0.3,<0.4)
|
27
|
+
Requires-Dist: numpy (>=2,<3)
|
28
|
+
Requires-Dist: pillow (>=11.2.1,<12.0.0)
|
29
|
+
Requires-Dist: psycopg2 (>=2.9.9,<3.0.0)
|
30
|
+
Requires-Dist: pyarrow (>=20.0,<21.0)
|
31
|
+
Requires-Dist: pyproj (>=3.7,<4.0)
|
32
|
+
Requires-Dist: rhealpixdggs (>=0.5.5,<0.6.0)
|
33
|
+
Requires-Dist: rhppandas (>=0.1.2,<0.2.0)
|
34
|
+
Requires-Dist: shapely (>=2.1,<3.0)
|
35
|
+
Requires-Dist: sqlalchemy (>=2.0.32,<3.0.0)
|
36
|
+
Requires-Dist: tqdm (>=4.67,<5.0)
|
33
37
|
Project-URL: Repository, https://github.com/manaakiwhenua/vector2dggs
|
34
38
|
Description-Content-Type: text/markdown
|
35
39
|
|
@@ -159,6 +163,8 @@ In brief, to get started:
|
|
159
163
|
|
160
164
|
If you run `poetry install`, the CLI tool will be aliased so you can simply use `vector2dggs` rather than `poetry run vector2dggs`, which is the alternative if you do not `poetry install`.
|
161
165
|
|
166
|
+
Alternatively, it is also possible to install using pip with `pip install -e .`, and bypass Poetry.
|
167
|
+
|
162
168
|
#### Code formatting
|
163
169
|
|
164
170
|
[](https://github.com/psf/black)
|
@@ -187,14 +193,14 @@ vector2dggs h3 -v DEBUG -id ogc_fid -r 9 -p 5 -t 4 --overwrite -tbl topo50_lake
|
|
187
193
|
title={{vector2dggs}},
|
188
194
|
author={Ardo, James and Law, Richard},
|
189
195
|
url={https://github.com/manaakiwhenua/vector2dggs},
|
190
|
-
version={0.6.
|
196
|
+
version={0.6.2},
|
191
197
|
date={2023-04-20}
|
192
198
|
}
|
193
199
|
```
|
194
200
|
|
195
201
|
APA/Harvard
|
196
202
|
|
197
|
-
> Ardo, J., & Law, R. (2023). vector2dggs (0.6.
|
203
|
+
> Ardo, J., & Law, R. (2023). vector2dggs (0.6.2) [Computer software]. https://github.com/manaakiwhenua/vector2dggs
|
198
204
|
|
199
205
|
[](https://github.com/manaakiwhenua/manaakiwhenua-standards)
|
200
206
|
|
@@ -0,0 +1,13 @@
|
|
1
|
+
vector2dggs/__init__.py,sha256=w9t1Aj5a_f__PKPw_C7bWnZmWL3_GHrtgVrGYGX1wfk,27
|
2
|
+
vector2dggs/cli.py,sha256=HoPp7Bwk2kZghAms6wNepx-bFhoAuHH7WXACMIy3MuM,652
|
3
|
+
vector2dggs/common.py,sha256=DL3ohG-QQyI-phyxeO6Fi2BOwWnFct-I_Y87_XC2SRQ,10578
|
4
|
+
vector2dggs/constants.py,sha256=u6n6XNvEVLUexn9Sb2rc22s2B4Rrg_VXFJaM7uEy-9Q,536
|
5
|
+
vector2dggs/h3.py,sha256=GgiGOVbsXXNp95KWKKmJZvDxGFj91TTWl575OaPZ6yk,6145
|
6
|
+
vector2dggs/katana.py,sha256=pgVWy032NkT5yilUO0d0IKH4NUvY7DJLjmfsxhBiF08,3407
|
7
|
+
vector2dggs/rHP.py,sha256=Y36tPbtY-tYBUFILHD-xnUxa2yKlYotGP6043Bg5nZc,6450
|
8
|
+
vector2dggs-0.6.2.dist-info/COPYING,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
9
|
+
vector2dggs-0.6.2.dist-info/COPYING.LESSER,sha256=46mU2C5kSwOnkqkw9XQAJlhBL2JAf1_uCD8lVcXyMRg,7652
|
10
|
+
vector2dggs-0.6.2.dist-info/METADATA,sha256=kNT2Iyd8irBMo2Tq0_CwnORVNeCc1ekjO1TlMwBp6qY,10014
|
11
|
+
vector2dggs-0.6.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
12
|
+
vector2dggs-0.6.2.dist-info/entry_points.txt,sha256=5h8LB9L2oOE5u_N7FRGtu4JDwa553iPs4u0XhcLeLZU,52
|
13
|
+
vector2dggs-0.6.2.dist-info/RECORD,,
|
@@ -1,10 +0,0 @@
|
|
1
|
-
vector2dggs/__init__.py,sha256=8xTECrwGH36hEfgoQ2Zcq4dfigWVZmIFK3OHqNOg-FQ,27
|
2
|
-
vector2dggs/cli.py,sha256=tL4NJ99uQsqoVinwYadna1a4ko5v2sdZaFaeDAj6QNE,599
|
3
|
-
vector2dggs/h3.py,sha256=kX3S630l-LOm04pe5YT-5g99DIV0t32GFYUEs0Hc5ZQ,14354
|
4
|
-
vector2dggs/katana.py,sha256=xx5R9lDuraWLK5bfGkiDQDC2r2naj_sKZlYeB52_xwc,3320
|
5
|
-
vector2dggs-0.6.0.dist-info/entry_points.txt,sha256=5h8LB9L2oOE5u_N7FRGtu4JDwa553iPs4u0XhcLeLZU,52
|
6
|
-
vector2dggs-0.6.0.dist-info/WHEEL,sha256=vVCvjcmxuUltf8cYhJ0sJMRDLr1XsPuxEId8YDzbyCY,88
|
7
|
-
vector2dggs-0.6.0.dist-info/METADATA,sha256=d8dvx5_wXFO-ZMKcdXmc2TuXQv5B5UFRo1hq4fghe8w,9777
|
8
|
-
vector2dggs-0.6.0.dist-info/COPYING.LESSER,sha256=46mU2C5kSwOnkqkw9XQAJlhBL2JAf1_uCD8lVcXyMRg,7652
|
9
|
-
vector2dggs-0.6.0.dist-info/COPYING,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
10
|
-
vector2dggs-0.6.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|