vector2dggs 0.6.2__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vector2dggs/__init__.py +1 -1
- vector2dggs/cli.py +4 -0
- vector2dggs/common.py +23 -27
- vector2dggs/constants.py +47 -0
- vector2dggs/geohash.py +240 -0
- vector2dggs/h3.py +8 -4
- vector2dggs/katana.py +1 -1
- vector2dggs/rHP.py +12 -14
- vector2dggs/s2.py +349 -0
- {vector2dggs-0.6.2.dist-info → vector2dggs-0.8.0.dist-info}/METADATA +64 -8
- vector2dggs-0.8.0.dist-info/RECORD +15 -0
- vector2dggs-0.6.2.dist-info/RECORD +0 -13
- {vector2dggs-0.6.2.dist-info → vector2dggs-0.8.0.dist-info}/COPYING +0 -0
- {vector2dggs-0.6.2.dist-info → vector2dggs-0.8.0.dist-info}/COPYING.LESSER +0 -0
- {vector2dggs-0.6.2.dist-info → vector2dggs-0.8.0.dist-info}/WHEEL +0 -0
- {vector2dggs-0.6.2.dist-info → vector2dggs-0.8.0.dist-info}/entry_points.txt +0 -0
vector2dggs/__init__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__: str = "0.
|
1
|
+
__version__: str = "0.8.0"
|
vector2dggs/cli.py
CHANGED
@@ -3,6 +3,8 @@ import click
|
|
3
3
|
from vector2dggs import __version__
|
4
4
|
from vector2dggs.h3 import h3
|
5
5
|
from vector2dggs.rHP import rhp
|
6
|
+
from vector2dggs.s2 import s2
|
7
|
+
from vector2dggs.geohash import geohash
|
6
8
|
|
7
9
|
# If the program does terminal interaction, make it output a short
|
8
10
|
# notice like this when it starts in an interactive mode:
|
@@ -21,6 +23,8 @@ def cli():
|
|
21
23
|
|
22
24
|
cli.add_command(h3)
|
23
25
|
cli.add_command(rhp)
|
26
|
+
cli.add_command(s2)
|
27
|
+
cli.add_command(geohash)
|
24
28
|
|
25
29
|
|
26
30
|
def main():
|
vector2dggs/common.py
CHANGED
@@ -104,7 +104,9 @@ def drop_condition(
|
|
104
104
|
_diff = _before - _after
|
105
105
|
if _diff:
|
106
106
|
log_method = (
|
107
|
-
LOGGER.info
|
107
|
+
LOGGER.info
|
108
|
+
if (_diff / float(_before)) < warning_threshold
|
109
|
+
else LOGGER.warning
|
108
110
|
)
|
109
111
|
log_method(f"Dropped {_diff} rows ({_diff/float(_before)*100:.2f}%)")
|
110
112
|
return df
|
@@ -118,22 +120,17 @@ def get_parent_res(dggs: str, parent_res: Union[None, int], resolution: int):
|
|
118
120
|
|
119
121
|
Used for intermediate re-partioning.
|
120
122
|
"""
|
121
|
-
if dggs
|
122
|
-
return (
|
123
|
-
parent_res
|
124
|
-
if parent_res is not None
|
125
|
-
else max(const.MIN_H3, (resolution - const.DEFAULT_PARENT_OFFSET))
|
126
|
-
)
|
127
|
-
elif dggs == "rhp":
|
128
|
-
return (
|
129
|
-
parent_res
|
130
|
-
if parent_res is not None
|
131
|
-
else max(const.MIN_RHP, (resolution - const.DEFAULT_PARENT_OFFSET))
|
132
|
-
)
|
133
|
-
else:
|
123
|
+
if not dggs in const.DEFAULT_DGGS_PARENT_RES.keys():
|
134
124
|
raise RuntimeError(
|
135
|
-
"Unknown dggs {dggs}) - must be one of [
|
125
|
+
"Unknown dggs {dggs}) - must be one of [ {options} ]".format(
|
126
|
+
dggs=dggs, options=", ".join(const.DEFAULT_DGGS_PARENT_RES.keys())
|
127
|
+
)
|
136
128
|
)
|
129
|
+
return (
|
130
|
+
parent_res
|
131
|
+
if parent_res is not None
|
132
|
+
else const.DEFAULT_DGGS_PARENT_RES[dggs](resolution)
|
133
|
+
)
|
137
134
|
|
138
135
|
|
139
136
|
def parent_partitioning(
|
@@ -179,25 +176,23 @@ def polyfill(
|
|
179
176
|
) -> None:
|
180
177
|
"""
|
181
178
|
Reads a geoparquet, performs polyfilling (for Polygon),
|
182
|
-
linetracing (for LineString),
|
179
|
+
linetracing (for LineString), or indexing (for Point),
|
180
|
+
and writes out to parquet.
|
183
181
|
"""
|
184
182
|
df = gpd.read_parquet(pq_in).reset_index().drop(columns=[spatial_sort_col])
|
185
183
|
if len(df.index) == 0:
|
186
|
-
# Input is empty, nothing to
|
184
|
+
# Input is empty, nothing to convert
|
187
185
|
return None
|
188
186
|
|
189
|
-
# DGGS specific
|
187
|
+
# DGGS specific conversion
|
190
188
|
df = dggsfunc(df, resolution)
|
191
189
|
|
192
190
|
if len(df.index) == 0:
|
193
|
-
#
|
191
|
+
# Conversion resulted in empty output (e.g. large cell, small feature)
|
194
192
|
return None
|
195
193
|
|
196
194
|
df.index.rename(f"{dggs}_{resolution:02}", inplace=True)
|
197
195
|
parent_res: int = get_parent_res(dggs, parent_res, resolution)
|
198
|
-
# print(parent_res)
|
199
|
-
# print(df.index)
|
200
|
-
# print(df.columns)
|
201
196
|
|
202
197
|
# Secondary (parent) index, used later for partitioning
|
203
198
|
df = secondary_index_func(df, parent_res)
|
@@ -233,7 +228,7 @@ def index(
|
|
233
228
|
overwrite: bool = False,
|
234
229
|
) -> Path:
|
235
230
|
"""
|
236
|
-
Performs multi-threaded
|
231
|
+
Performs multi-threaded DGGS indexing on geometries (including multipart and collections).
|
237
232
|
"""
|
238
233
|
|
239
234
|
if table and con:
|
@@ -291,7 +286,8 @@ def index(
|
|
291
286
|
"index": lambda frame: frame[
|
292
287
|
(frame.geometry.geom_type != "Polygon")
|
293
288
|
& (frame.geometry.geom_type != "LineString")
|
294
|
-
|
289
|
+
& (frame.geometry.geom_type != "Point")
|
290
|
+
],
|
295
291
|
"message": "Considering unsupported geometries",
|
296
292
|
},
|
297
293
|
]
|
@@ -314,9 +310,9 @@ def index(
|
|
314
310
|
|
315
311
|
filepaths = list(map(lambda f: f.absolute(), Path(tmpdir).glob("*")))
|
316
312
|
|
317
|
-
# Multithreaded
|
313
|
+
# Multithreaded DGGS indexing
|
318
314
|
LOGGER.debug(
|
319
|
-
"
|
315
|
+
"DGGS indexing by spatial partitions with resolution: %d",
|
320
316
|
resolution,
|
321
317
|
)
|
322
318
|
with tempfile.TemporaryDirectory(suffix=".parquet") as tmpdir2:
|
@@ -344,7 +340,7 @@ def index(
|
|
344
340
|
|
345
341
|
parent_partitioning(
|
346
342
|
dggs,
|
347
|
-
tmpdir2,
|
343
|
+
Path(tmpdir2),
|
348
344
|
output_directory,
|
349
345
|
resolution,
|
350
346
|
parent_res,
|
vector2dggs/constants.py
CHANGED
@@ -5,6 +5,8 @@ import tempfile
|
|
5
5
|
|
6
6
|
MIN_H3, MAX_H3 = 0, 15
|
7
7
|
MIN_RHP, MAX_RHP = 0, 15
|
8
|
+
MIN_S2, MAX_S2 = 0, 30
|
9
|
+
MIN_GEOHASH, MAX_GEOHASH = 0, 12
|
8
10
|
|
9
11
|
DEFAULTS = {
|
10
12
|
"id": None,
|
@@ -19,8 +21,53 @@ DEFAULTS = {
|
|
19
21
|
"tempdir": tempfile.tempdir,
|
20
22
|
}
|
21
23
|
|
24
|
+
DEFAULT_DGGS_PARENT_RES = {
|
25
|
+
"h3": lambda resolution: max(MIN_H3, (resolution - DEFAULT_PARENT_OFFSET)),
|
26
|
+
"rhp": lambda resolution: max(MIN_RHP, (resolution - DEFAULT_PARENT_OFFSET)),
|
27
|
+
"geohash": lambda resolution: max(
|
28
|
+
MIN_GEOHASH, (resolution - DEFAULT_PARENT_OFFSET)
|
29
|
+
),
|
30
|
+
"s2": lambda resolution: max(MIN_S2, (resolution - DEFAULT_PARENT_OFFSET)),
|
31
|
+
}
|
32
|
+
|
22
33
|
DEFAULT_PARENT_OFFSET = 6
|
23
34
|
|
35
|
+
# http://s2geometry.io/resources/s2cell_statistics.html
|
36
|
+
S2_CELLS_MAX_AREA_M2_BY_LEVEL = {
|
37
|
+
0: 85011012.19 * 1e6,
|
38
|
+
1: 21252753.05 * 1e6,
|
39
|
+
2: 6026521.16 * 1e6,
|
40
|
+
3: 1646455.50 * 1e6,
|
41
|
+
4: 413918.15 * 1e6,
|
42
|
+
5: 104297.91 * 1e6,
|
43
|
+
6: 26113.30 * 1e6,
|
44
|
+
7: 6529.09 * 1e6,
|
45
|
+
8: 1632.45 * 1e6,
|
46
|
+
9: 408.12 * 1e6,
|
47
|
+
10: 102.03 * 1e6,
|
48
|
+
11: 25.51 * 1e6,
|
49
|
+
12: 6.38 * 1e6,
|
50
|
+
13: 1.59 * 1e6,
|
51
|
+
14: 0.40 * 1e6,
|
52
|
+
15: 99638.93,
|
53
|
+
16: 24909.73,
|
54
|
+
17: 6227.43,
|
55
|
+
18: 1556.86,
|
56
|
+
19: 389.22,
|
57
|
+
20: 97.30,
|
58
|
+
21: 24.33,
|
59
|
+
22: 6.08,
|
60
|
+
23: 1.52,
|
61
|
+
24: 0.38,
|
62
|
+
25: 950.23 * 1e-4,
|
63
|
+
26: 237.56 * 1e-4,
|
64
|
+
27: 59.39 * 1e-4,
|
65
|
+
28: 14.85 * 1e-4,
|
66
|
+
29: 3.71 * 1e-4,
|
67
|
+
30: 0.93 * 1e-4,
|
68
|
+
}
|
69
|
+
|
70
|
+
|
24
71
|
warnings.filterwarnings(
|
25
72
|
"ignore"
|
26
73
|
) # This is to filter out the polyfill warnings when rows failed to get indexed at a resolution, can be commented out to find missing rows
|
vector2dggs/geohash.py
ADDED
@@ -0,0 +1,240 @@
|
|
1
|
+
import sys
|
2
|
+
import click
|
3
|
+
import click_log
|
4
|
+
import tempfile
|
5
|
+
import pyproj
|
6
|
+
|
7
|
+
from geohash_polygon import polygon_to_geohashes # rusty-polygon-geohasher
|
8
|
+
from geohash import encode, decode # python-geohash
|
9
|
+
|
10
|
+
import pandas as pd
|
11
|
+
import geopandas as gpd
|
12
|
+
from shapely.geometry import Point, Polygon
|
13
|
+
|
14
|
+
from typing import Union
|
15
|
+
from pathlib import Path
|
16
|
+
|
17
|
+
import vector2dggs.constants as const
|
18
|
+
import vector2dggs.common as common
|
19
|
+
|
20
|
+
from vector2dggs import __version__
|
21
|
+
|
22
|
+
|
23
|
+
def gh_secondary_index(df: pd.DataFrame, parent_level: int) -> pd.DataFrame:
|
24
|
+
df[f"geohash_{parent_level:02}"] = df.index.to_series().str[:parent_level]
|
25
|
+
return df
|
26
|
+
|
27
|
+
|
28
|
+
# NB this implements a point-inside hash, but geohash_polygon only supports "within" or "intersects" (on the basis of geohashes as _polygon_ geometries) which means we have to perform additional computation to support "polyfill" as defined by H3
|
29
|
+
# A future version of vector2dggs may support within/intersects modality, at which point that would just be outer/inner with no further computation
|
30
|
+
def _polygon_to_geohashes(polygon: Polygon, level: int) -> set[str]:
|
31
|
+
# Function to compute geohash set for one polygon geometry
|
32
|
+
outer: set[str] = polygon_to_geohashes(polygon, level, inner=False)
|
33
|
+
inner: set[str] = polygon_to_geohashes(polygon, level, inner=True)
|
34
|
+
edge: set[str] = {
|
35
|
+
h
|
36
|
+
for h in (outer - inner) # All edge cells
|
37
|
+
if Point(*reversed(decode(h))).within(polygon)
|
38
|
+
} # Edge cells with a center within the polygon
|
39
|
+
return edge | inner
|
40
|
+
|
41
|
+
|
42
|
+
def gh_polyfill(df: gpd.GeoDataFrame, level: int) -> pd.DataFrame:
|
43
|
+
gh_col = f"geohash"
|
44
|
+
df_polygon = df[df.geom_type == "Polygon"].copy()
|
45
|
+
if not df_polygon.empty:
|
46
|
+
df_polygon = (
|
47
|
+
df_polygon.assign(
|
48
|
+
**{
|
49
|
+
gh_col: df_polygon.geometry.apply(
|
50
|
+
lambda geom: _polygon_to_geohashes(geom, level)
|
51
|
+
)
|
52
|
+
}
|
53
|
+
)
|
54
|
+
.explode(gh_col, ignore_index=True)
|
55
|
+
.set_index(gh_col)
|
56
|
+
)
|
57
|
+
|
58
|
+
# TODO linestring support
|
59
|
+
# e.g. JS implementation https://github.com/alrico88/geohashes-along
|
60
|
+
|
61
|
+
df_point = df[df.geom_type == "Point"].copy()
|
62
|
+
if len(df_point.index) > 0:
|
63
|
+
df_point[gh_col] = df_point.geometry.apply(
|
64
|
+
lambda geom: encode(geom.y, geom.x, precision=level)
|
65
|
+
)
|
66
|
+
df_point = df_point.set_index(gh_col)
|
67
|
+
|
68
|
+
return pd.concat(
|
69
|
+
map(
|
70
|
+
lambda _df: pd.DataFrame(_df.drop(columns=[_df.geometry.name])),
|
71
|
+
[df_polygon, df_point],
|
72
|
+
)
|
73
|
+
)
|
74
|
+
|
75
|
+
|
76
|
+
@click.command(context_settings={"show_default": True})
|
77
|
+
@click_log.simple_verbosity_option(common.LOGGER)
|
78
|
+
@click.argument("vector_input", required=True, type=click.Path(), nargs=1)
|
79
|
+
@click.argument("output_directory", required=True, type=click.Path(), nargs=1)
|
80
|
+
@click.option(
|
81
|
+
"-r",
|
82
|
+
"--resolution",
|
83
|
+
"level",
|
84
|
+
required=True,
|
85
|
+
type=click.Choice(list(map(str, range(const.MIN_GEOHASH, const.MAX_GEOHASH + 1)))),
|
86
|
+
help="Geohash level to index",
|
87
|
+
nargs=1,
|
88
|
+
)
|
89
|
+
@click.option(
|
90
|
+
"-pr",
|
91
|
+
"--parent_res",
|
92
|
+
"parent_level",
|
93
|
+
required=False,
|
94
|
+
type=click.Choice(list(map(str, range(const.MIN_GEOHASH, const.MAX_GEOHASH + 1)))),
|
95
|
+
help="Geohash parent level for the output partition. Defaults to resolution - 6",
|
96
|
+
)
|
97
|
+
@click.option(
|
98
|
+
"-id",
|
99
|
+
"--id_field",
|
100
|
+
required=False,
|
101
|
+
default=const.DEFAULTS["id"],
|
102
|
+
type=str,
|
103
|
+
help="Field to use as an ID; defaults to a constructed single 0...n index on the original feature order.",
|
104
|
+
nargs=1,
|
105
|
+
)
|
106
|
+
@click.option(
|
107
|
+
"-k",
|
108
|
+
"--keep_attributes",
|
109
|
+
is_flag=True,
|
110
|
+
show_default=True,
|
111
|
+
default=const.DEFAULTS["k"],
|
112
|
+
help="Retain attributes in output. The default is to create an output that only includes Geohash cell ID and the ID given by the -id field (or the default index ID).",
|
113
|
+
)
|
114
|
+
@click.option(
|
115
|
+
"-ch",
|
116
|
+
"--chunksize",
|
117
|
+
required=True,
|
118
|
+
type=int,
|
119
|
+
default=const.DEFAULTS["ch"],
|
120
|
+
help="The number of rows per index partition to use when spatially partioning. Adjusting this number will trade off memory use and time.",
|
121
|
+
nargs=1,
|
122
|
+
)
|
123
|
+
@click.option(
|
124
|
+
"-s",
|
125
|
+
"--spatial_sorting",
|
126
|
+
type=click.Choice(["hilbert", "morton", "geohash"]),
|
127
|
+
default=const.DEFAULTS["s"],
|
128
|
+
help="Spatial sorting method when perfoming spatial partitioning.",
|
129
|
+
)
|
130
|
+
@click.option(
|
131
|
+
"-crs",
|
132
|
+
"--cut_crs",
|
133
|
+
required=False,
|
134
|
+
default=const.DEFAULTS["crs"],
|
135
|
+
type=int,
|
136
|
+
help="Set the coordinate reference system (CRS) used for cutting large geometries (see `--cur-threshold`). Defaults to the same CRS as the input. Should be a valid EPSG code.",
|
137
|
+
nargs=1,
|
138
|
+
)
|
139
|
+
@click.option(
|
140
|
+
"-c",
|
141
|
+
"--cut_threshold",
|
142
|
+
required=True,
|
143
|
+
default=const.DEFAULTS["c"],
|
144
|
+
type=int,
|
145
|
+
help="Cutting up large geometries into smaller geometries based on a target length. Units are assumed to match the input CRS units unless the `--cut_crs` is also given, in which case units match the units of the supplied CRS.",
|
146
|
+
nargs=1,
|
147
|
+
)
|
148
|
+
@click.option(
|
149
|
+
"-t",
|
150
|
+
"--threads",
|
151
|
+
required=False,
|
152
|
+
default=const.DEFAULTS["t"],
|
153
|
+
type=int,
|
154
|
+
help="Amount of threads used for operation",
|
155
|
+
nargs=1,
|
156
|
+
)
|
157
|
+
@click.option(
|
158
|
+
"-tbl",
|
159
|
+
"--table",
|
160
|
+
required=False,
|
161
|
+
default=const.DEFAULTS["tbl"],
|
162
|
+
type=str,
|
163
|
+
help="Name of the table to read when using a spatial database connection as input",
|
164
|
+
nargs=1,
|
165
|
+
)
|
166
|
+
@click.option(
|
167
|
+
"-g",
|
168
|
+
"--geom_col",
|
169
|
+
required=False,
|
170
|
+
default=const.DEFAULTS["g"],
|
171
|
+
type=str,
|
172
|
+
help="Column name to use when using a spatial database connection as input",
|
173
|
+
nargs=1,
|
174
|
+
)
|
175
|
+
@click.option(
|
176
|
+
"--tempdir",
|
177
|
+
default=const.DEFAULTS["tempdir"],
|
178
|
+
type=click.Path(),
|
179
|
+
help="Temporary data is created during the execution of this program. This parameter allows you to control where this data will be written.",
|
180
|
+
)
|
181
|
+
@click.option("-o", "--overwrite", is_flag=True)
|
182
|
+
@click.version_option(version=__version__)
|
183
|
+
def geohash(
|
184
|
+
vector_input: Union[str, Path],
|
185
|
+
output_directory: Union[str, Path],
|
186
|
+
level: str,
|
187
|
+
parent_level: str,
|
188
|
+
id_field: str,
|
189
|
+
keep_attributes: bool,
|
190
|
+
chunksize: int,
|
191
|
+
spatial_sorting: str,
|
192
|
+
cut_crs: int,
|
193
|
+
cut_threshold: int,
|
194
|
+
threads: int,
|
195
|
+
table: str,
|
196
|
+
geom_col: str,
|
197
|
+
tempdir: Union[str, Path],
|
198
|
+
overwrite: bool,
|
199
|
+
):
|
200
|
+
"""
|
201
|
+
Ingest a vector dataset and index it using the Geohash geocode system.
|
202
|
+
|
203
|
+
VECTOR_INPUT is the path to input vector geospatial data.
|
204
|
+
OUTPUT_DIRECTORY should be a directory, not a file or database table, as it will instead be the write location for an Apache Parquet data store.
|
205
|
+
"""
|
206
|
+
tempfile.tempdir = tempdir if tempdir is not None else tempfile.tempdir
|
207
|
+
|
208
|
+
common.check_resolutions(level, parent_level)
|
209
|
+
|
210
|
+
con, vector_input = common.db_conn_and_input_path(vector_input)
|
211
|
+
output_directory = common.resolve_output_path(output_directory, overwrite)
|
212
|
+
|
213
|
+
if cut_crs is not None:
|
214
|
+
cut_crs = pyproj.CRS.from_user_input(cut_crs)
|
215
|
+
|
216
|
+
try:
|
217
|
+
common.index(
|
218
|
+
"geohash",
|
219
|
+
gh_polyfill,
|
220
|
+
gh_secondary_index,
|
221
|
+
vector_input,
|
222
|
+
output_directory,
|
223
|
+
int(level),
|
224
|
+
parent_level,
|
225
|
+
keep_attributes,
|
226
|
+
chunksize,
|
227
|
+
spatial_sorting,
|
228
|
+
cut_threshold,
|
229
|
+
threads,
|
230
|
+
cut_crs=cut_crs,
|
231
|
+
id_field=id_field,
|
232
|
+
con=con,
|
233
|
+
table=table,
|
234
|
+
geom_col=geom_col,
|
235
|
+
overwrite=overwrite,
|
236
|
+
)
|
237
|
+
except:
|
238
|
+
raise
|
239
|
+
else:
|
240
|
+
sys.exit(0)
|
vector2dggs/h3.py
CHANGED
@@ -18,13 +18,13 @@ import vector2dggs.common as common
|
|
18
18
|
from vector2dggs import __version__
|
19
19
|
|
20
20
|
|
21
|
-
def h3_secondary_index(df:
|
21
|
+
def h3_secondary_index(df: pd.DataFrame, parent_res: int) -> pd.DataFrame:
|
22
22
|
return df.h3.h3_to_parent(parent_res)
|
23
23
|
|
24
24
|
|
25
|
-
def h3polyfill(df: gpd.GeoDataFrame, resolution: int):
|
25
|
+
def h3polyfill(df: gpd.GeoDataFrame, resolution: int) -> pd.DataFrame:
|
26
26
|
df_polygon = df[df.geom_type == "Polygon"]
|
27
|
-
if
|
27
|
+
if not df_polygon.empty:
|
28
28
|
df_polygon = df_polygon.h3.polyfill_resample(
|
29
29
|
resolution, return_geometry=False
|
30
30
|
).drop(columns=["index"])
|
@@ -38,10 +38,14 @@ def h3polyfill(df: gpd.GeoDataFrame, resolution: int):
|
|
38
38
|
)
|
39
39
|
df_linestring = df_linestring[~df_linestring.index.duplicated(keep="first")]
|
40
40
|
|
41
|
+
df_point = df[df.geom_type == "Point"]
|
42
|
+
if len(df_point.index) > 0:
|
43
|
+
df_point = df_point.h3.geo_to_h3(resolution, set_index=True)
|
44
|
+
|
41
45
|
return pd.concat(
|
42
46
|
map(
|
43
47
|
lambda _df: pd.DataFrame(_df.drop(columns=[_df.geometry.name])),
|
44
|
-
[df_polygon, df_linestring],
|
48
|
+
[df_polygon, df_linestring, df_point],
|
45
49
|
)
|
46
50
|
)
|
47
51
|
|
vector2dggs/katana.py
CHANGED
@@ -33,7 +33,7 @@ def katana(geometry, threshold, count=0) -> GeometryCollection:
|
|
33
33
|
if not geometry.is_valid:
|
34
34
|
# print(explain_validity(geometry))
|
35
35
|
geometry = make_valid(geometry)
|
36
|
-
if geometry.
|
36
|
+
if geometry.geom_type == "GeometryCollection":
|
37
37
|
geometry.normalize()
|
38
38
|
geometry = geometry.buffer(0)
|
39
39
|
bounds = geometry.bounds
|
vector2dggs/rHP.py
CHANGED
@@ -18,36 +18,34 @@ import vector2dggs.common as common
|
|
18
18
|
from vector2dggs import __version__
|
19
19
|
|
20
20
|
|
21
|
-
def rhp_secondary_index(df:
|
21
|
+
def rhp_secondary_index(df: pd.date_range, parent_res: int) -> pd.DataFrame:
|
22
22
|
return df.rhp.rhp_to_parent(parent_res)
|
23
23
|
|
24
24
|
|
25
|
-
def rhppolyfill(df: gpd.GeoDataFrame, resolution: int):
|
25
|
+
def rhppolyfill(df: gpd.GeoDataFrame, resolution: int) -> pd.DataFrame:
|
26
26
|
df_polygon = df[df.geom_type == "Polygon"]
|
27
27
|
if len(df_polygon.index) > 0:
|
28
28
|
df_polygon = df_polygon.rhp.polyfill_resample(
|
29
29
|
resolution, return_geometry=False
|
30
30
|
).drop(columns=["index"])
|
31
31
|
|
32
|
-
df_multipolygon = df[df.geom_type == "MultiPolygon"]
|
33
|
-
if len(df_multipolygon.index) > 0:
|
34
|
-
df_multipolygon = df_multipolygon.rhp.polyfill_resample(
|
35
|
-
resolution, return_geometry=False
|
36
|
-
).drop(columns=["index"])
|
37
|
-
|
38
32
|
# df_linestring = df[df.geom_type == "LineString"]
|
39
33
|
# if len(df_linestring.index) > 0:
|
40
34
|
# df_linestring = (
|
41
|
-
# df_linestring.
|
42
|
-
# .explode("
|
43
|
-
# .set_index("
|
35
|
+
# df_linestring.rhp.linetrace(resolution)
|
36
|
+
# .explode("rhp_linetrace")
|
37
|
+
# .set_index("rhp_linetrace")
|
44
38
|
# )
|
45
39
|
# df_linestring = df_linestring[~df_linestring.index.duplicated(keep="first")]
|
46
40
|
|
41
|
+
df_point = df[df.geom_type == "Point"]
|
42
|
+
if len(df_point.index) > 0:
|
43
|
+
df_point = df_point.rhp.geo_to_rhp(resolution, set_index=True)
|
44
|
+
|
47
45
|
return pd.concat(
|
48
46
|
map(
|
49
47
|
lambda _df: pd.DataFrame(_df.drop(columns=[_df.geometry.name])),
|
50
|
-
[df_polygon,
|
48
|
+
[df_polygon, df_point],
|
51
49
|
)
|
52
50
|
)
|
53
51
|
|
@@ -61,7 +59,7 @@ def rhppolyfill(df: gpd.GeoDataFrame, resolution: int):
|
|
61
59
|
"--resolution",
|
62
60
|
required=True,
|
63
61
|
type=click.Choice(list(map(str, range(const.MIN_RHP, const.MAX_RHP + 1)))),
|
64
|
-
help="
|
62
|
+
help="rHEALPix resolution to index",
|
65
63
|
nargs=1,
|
66
64
|
)
|
67
65
|
@click.option(
|
@@ -69,7 +67,7 @@ def rhppolyfill(df: gpd.GeoDataFrame, resolution: int):
|
|
69
67
|
"--parent_res",
|
70
68
|
required=False,
|
71
69
|
type=click.Choice(list(map(str, range(const.MIN_RHP, const.MAX_RHP + 1)))),
|
72
|
-
help="
|
70
|
+
help="rHEALPix Parent resolution for the output partition. Defaults to resolution - 6",
|
73
71
|
)
|
74
72
|
@click.option(
|
75
73
|
"-id",
|
vector2dggs/s2.py
ADDED
@@ -0,0 +1,349 @@
|
|
1
|
+
import sys
|
2
|
+
import click
|
3
|
+
import click_log
|
4
|
+
import tempfile
|
5
|
+
import pyproj
|
6
|
+
from math import ceil
|
7
|
+
|
8
|
+
from s2geometry import pywraps2 as S2
|
9
|
+
|
10
|
+
import pandas as pd
|
11
|
+
import geopandas as gpd
|
12
|
+
from shapely.geometry import box, Polygon, LineString, Point
|
13
|
+
from shapely.ops import transform
|
14
|
+
from pyproj import CRS, Transformer
|
15
|
+
|
16
|
+
from typing import Union
|
17
|
+
from pathlib import Path
|
18
|
+
|
19
|
+
import vector2dggs.constants as const
|
20
|
+
import vector2dggs.common as common
|
21
|
+
|
22
|
+
from vector2dggs import __version__
|
23
|
+
|
24
|
+
|
25
|
+
def s2_secondary_index(df: pd.DataFrame, parent_level: int) -> pd.DataFrame:
|
26
|
+
# NB also converts the index to S2 cell tokens
|
27
|
+
index_series = df.index.to_series().astype(object)
|
28
|
+
df[f"s2_{parent_level:02}"] = index_series.map(
|
29
|
+
lambda cell_id: cell_id.parent(parent_level).ToToken()
|
30
|
+
)
|
31
|
+
df.index = index_series.map(lambda cell_id: cell_id.ToToken())
|
32
|
+
return df
|
33
|
+
|
34
|
+
|
35
|
+
def bbox_area_in_m2(
|
36
|
+
geom: Polygon,
|
37
|
+
src_crs: Union[str, CRS] = "EPSG:4326",
|
38
|
+
dst_crs: Union[str, CRS] = "EPSG:6933",
|
39
|
+
) -> float:
|
40
|
+
"""
|
41
|
+
Calculate the area of the bounding box of a geometry in square meters.
|
42
|
+
"""
|
43
|
+
minx, miny, maxx, maxy = geom.bounds
|
44
|
+
bbox = box(minx, miny, maxx, maxy)
|
45
|
+
transformer = Transformer.from_crs(src_crs, dst_crs, always_xy=True)
|
46
|
+
projected_bbox = transform(transformer.transform, bbox)
|
47
|
+
return projected_bbox.area
|
48
|
+
|
49
|
+
|
50
|
+
def max_cells_for_geom(
|
51
|
+
geom: Union[Polygon, LineString], level: int, margin: float = 1.02
|
52
|
+
) -> int:
|
53
|
+
"""
|
54
|
+
Calculate the maximum number of S2 cells that are appropriate for the given geometry and level.
|
55
|
+
This is based on the area of the geometry's bounding box,
|
56
|
+
and the maximum area of S2 cells at the given level.
|
57
|
+
"""
|
58
|
+
area = bbox_area_in_m2(geom)
|
59
|
+
max_cells = ceil(max(1, area / const.S2_CELLS_MAX_AREA_M2_BY_LEVEL[level]))
|
60
|
+
return ceil(max_cells * margin)
|
61
|
+
|
62
|
+
|
63
|
+
def cell_center_is_inside_polygon(cell: S2.S2CellId, polygon: S2.S2Polygon) -> bool:
|
64
|
+
"""Determines if the center of the S2 cell is inside the polygon"""
|
65
|
+
cell_center = S2.S2Cell(cell).GetCenter()
|
66
|
+
return polygon.Contains(cell_center)
|
67
|
+
|
68
|
+
|
69
|
+
def s2_polyfill_polygons(df: gpd.GeoDataFrame, level: int) -> gpd.GeoDataFrame:
|
70
|
+
|
71
|
+
def generate_s2_covering(
|
72
|
+
geom: Polygon, level: int, centroid_inside: bool = True
|
73
|
+
) -> set[S2.S2CellId]:
|
74
|
+
# Prepare loops: first the exterior loop, then the interior loops
|
75
|
+
loops = []
|
76
|
+
# Exterior ring
|
77
|
+
latlngs = [
|
78
|
+
S2.S2LatLng.FromDegrees(lat, lon) for lon, lat in geom.exterior.coords
|
79
|
+
]
|
80
|
+
s2loop = S2.S2Loop([latlng.ToPoint() for latlng in latlngs])
|
81
|
+
s2loop.Normalize()
|
82
|
+
loops.append(s2loop)
|
83
|
+
|
84
|
+
# Interior rings (polygon holes)
|
85
|
+
for interior in geom.interiors:
|
86
|
+
interior_latlngs = [
|
87
|
+
S2.S2LatLng.FromDegrees(lat, lon) for lon, lat in interior.coords
|
88
|
+
]
|
89
|
+
s2interior_loop = S2.S2Loop(
|
90
|
+
[latlng.ToPoint() for latlng in interior_latlngs]
|
91
|
+
)
|
92
|
+
s2interior_loop.Normalize()
|
93
|
+
loops.append(s2interior_loop)
|
94
|
+
|
95
|
+
# Build an S2Polygon from the loops
|
96
|
+
s2polygon = S2.S2Polygon()
|
97
|
+
s2polygon.InitNested(loops)
|
98
|
+
|
99
|
+
# Use S2RegionCoverer to get the cell IDs at the specified level
|
100
|
+
coverer = S2.S2RegionCoverer()
|
101
|
+
|
102
|
+
max_cells = max_cells_for_geom(geom, level)
|
103
|
+
coverer.set_max_cells(max_cells)
|
104
|
+
coverer.set_min_level(level)
|
105
|
+
coverer.set_max_level(level)
|
106
|
+
|
107
|
+
covering: list[S2.S2CellId] = coverer.GetCovering(s2polygon)
|
108
|
+
|
109
|
+
if centroid_inside:
|
110
|
+
# Coverings are "intersects" modality, polyfill is "centre inside" modality
|
111
|
+
# ergo, filter out covering cells that are not inside the polygon
|
112
|
+
covering = {
|
113
|
+
cell
|
114
|
+
for cell in covering
|
115
|
+
if cell_center_is_inside_polygon(cell, s2polygon)
|
116
|
+
}
|
117
|
+
else:
|
118
|
+
set(covering)
|
119
|
+
|
120
|
+
return covering
|
121
|
+
|
122
|
+
df["s2index"] = df["geometry"].apply(lambda geom: generate_s2_covering(geom, level))
|
123
|
+
df = df[
|
124
|
+
df["s2index"].map(lambda x: len(x) > 0)
|
125
|
+
] # Remove rows with no covering at this level
|
126
|
+
|
127
|
+
return df
|
128
|
+
|
129
|
+
|
130
|
+
def s2_cell_ids_from_linestring(
|
131
|
+
linestring: LineString, level: int
|
132
|
+
) -> list[S2.S2CellId]:
|
133
|
+
latlngs = [S2.S2LatLng.FromDegrees(lat, lon) for lon, lat in linestring.coords]
|
134
|
+
polyline = S2.S2Polyline(latlngs)
|
135
|
+
|
136
|
+
coverer = S2.S2RegionCoverer()
|
137
|
+
max_cells = max_cells_for_geom(linestring, level)
|
138
|
+
coverer.set_max_cells(max_cells)
|
139
|
+
coverer.set_min_level(level)
|
140
|
+
coverer.set_max_level(level)
|
141
|
+
|
142
|
+
return coverer.GetCovering(polyline)
|
143
|
+
|
144
|
+
|
145
|
+
def s2_cell_id_from_point(geom: Point, level: int) -> S2.S2CellId:
|
146
|
+
"""
|
147
|
+
Convert a point geometry to an S2 cell at the specified level.
|
148
|
+
"""
|
149
|
+
latlng = S2.S2LatLng.FromDegrees(geom.y, geom.x)
|
150
|
+
return S2.S2CellId(latlng).parent(level)
|
151
|
+
|
152
|
+
|
153
|
+
def s2_polyfill(df: gpd.GeoDataFrame, level: int) -> pd.DataFrame:
|
154
|
+
|
155
|
+
df_polygon = df[df.geom_type == "Polygon"].copy()
|
156
|
+
if len(df_polygon.index) > 0:
|
157
|
+
df_polygon = (
|
158
|
+
s2_polyfill_polygons(df_polygon, level)
|
159
|
+
.explode("s2index")
|
160
|
+
.set_index("s2index")
|
161
|
+
)
|
162
|
+
|
163
|
+
df_linestring = df[df.geom_type == "LineString"].copy()
|
164
|
+
if len(df_linestring.index) > 0:
|
165
|
+
df_linestring["s2index"] = df_linestring.geometry.apply(
|
166
|
+
lambda geom: s2_cell_ids_from_linestring(geom, level)
|
167
|
+
)
|
168
|
+
df_linestring = df_linestring.explode("s2index").set_index("s2index")
|
169
|
+
|
170
|
+
df_point = df[df.geom_type == "Point"].copy()
|
171
|
+
if len(df_point.index) > 0:
|
172
|
+
df_point["s2index"] = df_point.geometry.apply(
|
173
|
+
lambda geom: s2_cell_id_from_point(geom, level)
|
174
|
+
)
|
175
|
+
df_point = df_point.set_index("s2index")
|
176
|
+
|
177
|
+
return pd.concat(
|
178
|
+
map(
|
179
|
+
lambda _df: pd.DataFrame(_df.drop(columns=[_df.geometry.name])),
|
180
|
+
[df_polygon, df_linestring, df_point],
|
181
|
+
)
|
182
|
+
)
|
183
|
+
|
184
|
+
|
185
|
+
@click.command(context_settings={"show_default": True})
|
186
|
+
@click_log.simple_verbosity_option(common.LOGGER)
|
187
|
+
@click.argument("vector_input", required=True, type=click.Path(), nargs=1)
|
188
|
+
@click.argument("output_directory", required=True, type=click.Path(), nargs=1)
|
189
|
+
@click.option(
|
190
|
+
"-r",
|
191
|
+
"--resolution",
|
192
|
+
"level",
|
193
|
+
required=True,
|
194
|
+
type=click.Choice(list(map(str, range(const.MIN_S2, const.MAX_S2 + 1)))),
|
195
|
+
help="S2 level to index",
|
196
|
+
nargs=1,
|
197
|
+
)
|
198
|
+
@click.option(
|
199
|
+
"-pr",
|
200
|
+
"--parent_res",
|
201
|
+
"parent_level",
|
202
|
+
required=False,
|
203
|
+
type=click.Choice(list(map(str, range(const.MIN_S2, const.MAX_S2 + 1)))),
|
204
|
+
help="S2 parent level for the output partition. Defaults to resolution - 6",
|
205
|
+
)
|
206
|
+
@click.option(
|
207
|
+
"-id",
|
208
|
+
"--id_field",
|
209
|
+
required=False,
|
210
|
+
default=const.DEFAULTS["id"],
|
211
|
+
type=str,
|
212
|
+
help="Field to use as an ID; defaults to a constructed single 0...n index on the original feature order.",
|
213
|
+
nargs=1,
|
214
|
+
)
|
215
|
+
@click.option(
|
216
|
+
"-k",
|
217
|
+
"--keep_attributes",
|
218
|
+
is_flag=True,
|
219
|
+
show_default=True,
|
220
|
+
default=const.DEFAULTS["k"],
|
221
|
+
help="Retain attributes in output. The default is to create an output that only includes S2 cell ID and the ID given by the -id field (or the default index ID).",
|
222
|
+
)
|
223
|
+
@click.option(
|
224
|
+
"-ch",
|
225
|
+
"--chunksize",
|
226
|
+
required=True,
|
227
|
+
type=int,
|
228
|
+
default=const.DEFAULTS["ch"],
|
229
|
+
help="The number of rows per index partition to use when spatially partioning. Adjusting this number will trade off memory use and time.",
|
230
|
+
nargs=1,
|
231
|
+
)
|
232
|
+
@click.option(
|
233
|
+
"-s",
|
234
|
+
"--spatial_sorting",
|
235
|
+
type=click.Choice(["hilbert", "morton", "geohash"]),
|
236
|
+
default=const.DEFAULTS["s"],
|
237
|
+
help="Spatial sorting method when perfoming spatial partitioning.",
|
238
|
+
)
|
239
|
+
@click.option(
|
240
|
+
"-crs",
|
241
|
+
"--cut_crs",
|
242
|
+
required=False,
|
243
|
+
default=const.DEFAULTS["crs"],
|
244
|
+
type=int,
|
245
|
+
help="Set the coordinate reference system (CRS) used for cutting large geometries (see `--cur-threshold`). Defaults to the same CRS as the input. Should be a valid EPSG code.",
|
246
|
+
nargs=1,
|
247
|
+
)
|
248
|
+
@click.option(
|
249
|
+
"-c",
|
250
|
+
"--cut_threshold",
|
251
|
+
required=True,
|
252
|
+
default=const.DEFAULTS["c"],
|
253
|
+
type=int,
|
254
|
+
help="Cutting up large geometries into smaller geometries based on a target length. Units are assumed to match the input CRS units unless the `--cut_crs` is also given, in which case units match the units of the supplied CRS.",
|
255
|
+
nargs=1,
|
256
|
+
)
|
257
|
+
@click.option(
|
258
|
+
"-t",
|
259
|
+
"--threads",
|
260
|
+
required=False,
|
261
|
+
default=const.DEFAULTS["t"],
|
262
|
+
type=int,
|
263
|
+
help="Amount of threads used for operation",
|
264
|
+
nargs=1,
|
265
|
+
)
|
266
|
+
@click.option(
|
267
|
+
"-tbl",
|
268
|
+
"--table",
|
269
|
+
required=False,
|
270
|
+
default=const.DEFAULTS["tbl"],
|
271
|
+
type=str,
|
272
|
+
help="Name of the table to read when using a spatial database connection as input",
|
273
|
+
nargs=1,
|
274
|
+
)
|
275
|
+
@click.option(
|
276
|
+
"-g",
|
277
|
+
"--geom_col",
|
278
|
+
required=False,
|
279
|
+
default=const.DEFAULTS["g"],
|
280
|
+
type=str,
|
281
|
+
help="Column name to use when using a spatial database connection as input",
|
282
|
+
nargs=1,
|
283
|
+
)
|
284
|
+
@click.option(
|
285
|
+
"--tempdir",
|
286
|
+
default=const.DEFAULTS["tempdir"],
|
287
|
+
type=click.Path(),
|
288
|
+
help="Temporary data is created during the execution of this program. This parameter allows you to control where this data will be written.",
|
289
|
+
)
|
290
|
+
@click.option("-o", "--overwrite", is_flag=True)
|
291
|
+
@click.version_option(version=__version__)
|
292
|
+
def s2(
|
293
|
+
vector_input: Union[str, Path],
|
294
|
+
output_directory: Union[str, Path],
|
295
|
+
level: str,
|
296
|
+
parent_level: str,
|
297
|
+
id_field: str,
|
298
|
+
keep_attributes: bool,
|
299
|
+
chunksize: int,
|
300
|
+
spatial_sorting: str,
|
301
|
+
cut_crs: int,
|
302
|
+
cut_threshold: int,
|
303
|
+
threads: int,
|
304
|
+
table: str,
|
305
|
+
geom_col: str,
|
306
|
+
tempdir: Union[str, Path],
|
307
|
+
overwrite: bool,
|
308
|
+
):
|
309
|
+
"""
|
310
|
+
Ingest a vector dataset and index it to the S2 DGGS.
|
311
|
+
|
312
|
+
VECTOR_INPUT is the path to input vector geospatial data.
|
313
|
+
OUTPUT_DIRECTORY should be a directory, not a file or database table, as it will instead be the write location for an Apache Parquet data store.
|
314
|
+
"""
|
315
|
+
tempfile.tempdir = tempdir if tempdir is not None else tempfile.tempdir
|
316
|
+
|
317
|
+
common.check_resolutions(level, parent_level)
|
318
|
+
|
319
|
+
con, vector_input = common.db_conn_and_input_path(vector_input)
|
320
|
+
output_directory = common.resolve_output_path(output_directory, overwrite)
|
321
|
+
|
322
|
+
if cut_crs is not None:
|
323
|
+
cut_crs = pyproj.CRS.from_user_input(cut_crs)
|
324
|
+
|
325
|
+
try:
|
326
|
+
common.index(
|
327
|
+
"s2",
|
328
|
+
s2_polyfill,
|
329
|
+
s2_secondary_index,
|
330
|
+
vector_input,
|
331
|
+
output_directory,
|
332
|
+
int(level),
|
333
|
+
parent_level,
|
334
|
+
keep_attributes,
|
335
|
+
chunksize,
|
336
|
+
spatial_sorting,
|
337
|
+
cut_threshold,
|
338
|
+
threads,
|
339
|
+
cut_crs=cut_crs,
|
340
|
+
id_field=id_field,
|
341
|
+
con=con,
|
342
|
+
table=table,
|
343
|
+
geom_col=geom_col,
|
344
|
+
overwrite=overwrite,
|
345
|
+
)
|
346
|
+
except:
|
347
|
+
raise
|
348
|
+
else:
|
349
|
+
sys.exit(0)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vector2dggs
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.8.0
|
4
4
|
Summary: CLI DGGS indexer for vector geospatial data
|
5
5
|
Home-page: https://github.com/manaakiwhenua/vector2dggs
|
6
6
|
License: LGPL-3.0-or-later
|
@@ -29,8 +29,11 @@ Requires-Dist: pillow (>=11.2.1,<12.0.0)
|
|
29
29
|
Requires-Dist: psycopg2 (>=2.9.9,<3.0.0)
|
30
30
|
Requires-Dist: pyarrow (>=20.0,<21.0)
|
31
31
|
Requires-Dist: pyproj (>=3.7,<4.0)
|
32
|
+
Requires-Dist: python-geohash (>=0.8.5,<0.9.0)
|
32
33
|
Requires-Dist: rhealpixdggs (>=0.5.5,<0.6.0)
|
33
34
|
Requires-Dist: rhppandas (>=0.1.2,<0.2.0)
|
35
|
+
Requires-Dist: rusty-polygon-geohasher (>=0.2.3,<0.3.0)
|
36
|
+
Requires-Dist: s2geometry (>=0.9.0,<0.10.0)
|
34
37
|
Requires-Dist: shapely (>=2.1,<3.0)
|
35
38
|
Requires-Dist: sqlalchemy (>=2.0.32,<3.0.0)
|
36
39
|
Requires-Dist: tqdm (>=4.67,<5.0)
|
@@ -45,9 +48,17 @@ Python-based CLI tool to index raster files to DGGS in parallel, writing out to
|
|
45
48
|
|
46
49
|
This is the vector equivalent of [raster2dggs](https://github.com/manaakiwhenua/raster2dggs).
|
47
50
|
|
48
|
-
Currently
|
51
|
+
Currently this tool supports the following DGGSs:
|
49
52
|
|
50
|
-
|
53
|
+
- [H3](https://h3geo.org/)
|
54
|
+
- [rHEALPix](https://datastore.landcareresearch.co.nz/dataset/rhealpix-discrete-global-grid-system) (points, polygons)
|
55
|
+
- [S2](https://s2geometry.io/)
|
56
|
+
|
57
|
+
... and the following geocode systems:
|
58
|
+
|
59
|
+
- [Geohash](https://en.wikipedia.org/wiki/Geohash) (points, polygons)
|
60
|
+
|
61
|
+
Contributions (espeically for other DGGSs), suggestions, bug reports and strongly worded letters are all welcome.
|
51
62
|
|
52
63
|

|
53
64
|
|
@@ -59,6 +70,22 @@ pip install vector2dggs
|
|
59
70
|
|
60
71
|
## Usage
|
61
72
|
|
73
|
+
```bash
|
74
|
+
vector2dggs --help
|
75
|
+
|
76
|
+
Usage: vector2dggs [OPTIONS] COMMAND [ARGS]...
|
77
|
+
|
78
|
+
Options:
|
79
|
+
--version Show the version and exit.
|
80
|
+
--help Show this message and exit.
|
81
|
+
|
82
|
+
Commands:
|
83
|
+
geohash Ingest a vector dataset and index it using the Geohash geocode...
|
84
|
+
h3 Ingest a vector dataset and index it to the H3 DGGS.
|
85
|
+
rhp Ingest a vector dataset and index it to the rHEALPix DGGS.
|
86
|
+
s2 Ingest a vector dataset and index it to the S2 DGGS.
|
87
|
+
```
|
88
|
+
|
62
89
|
```bash
|
63
90
|
vector2dggs h3 --help
|
64
91
|
Usage: vector2dggs h3 [OPTIONS] VECTOR_INPUT OUTPUT_DIRECTORY
|
@@ -121,9 +148,9 @@ Options:
|
|
121
148
|
|
122
149
|
Output is in the Apache Parquet format, a directory with one file per partition.
|
123
150
|
|
124
|
-
For a quick view of your output, you can read Apache Parquet with pandas, and then use h3-pandas and geopandas to convert this into a GeoPackage or GeoParquet for visualisation in a desktop GIS, such as QGIS. The Apache Parquet output is indexed by an ID column (which you can specify), so it should be ready for two intended use-cases:
|
151
|
+
For a quick view of your output, you can read Apache Parquet with pandas, and then use tools like h3-pandas and geopandas to convert this into a GeoPackage or GeoParquet for visualisation in a desktop GIS, such as QGIS. The Apache Parquet output is indexed by an ID column (which you can specify), so it should be ready for two intended use-cases:
|
125
152
|
- Joining attribute data from the original feature-level data onto computer DGGS cells.
|
126
|
-
- Joining other data to this output on the
|
153
|
+
- Joining other data to this output on the DGGS cell ID. (The output has a column like `{dggs}_\d`, e.g. `h3_09` or `h3_12` according to the target resolution, zero-padded to account for the maximum resolution of the DGGS)
|
127
154
|
|
128
155
|
Geoparquet output (hexagon boundaries):
|
129
156
|
|
@@ -150,6 +177,34 @@ h3_12
|
|
150
177
|
>>> g.to_parquet('./output-data/parcels.12.geo.parquet')
|
151
178
|
```
|
152
179
|
|
180
|
+
An example for S2 output (using `s2sphere`):
|
181
|
+
|
182
|
+
|
183
|
+
```python
|
184
|
+
import pandas as pd
|
185
|
+
import geopandas as gpd
|
186
|
+
import s2sphere
|
187
|
+
from shapely.geometry import Polygon
|
188
|
+
|
189
|
+
RES = 18
|
190
|
+
df = pd.read_parquet(f'~/output-data/ponds-with-holes.s2.{RES}.pq')
|
191
|
+
df = df.reset_index()
|
192
|
+
|
193
|
+
def s2id_to_polygon(s2_id_hex):
|
194
|
+
cell_id = s2sphere.CellId.from_token(s2_id_hex)
|
195
|
+
cell = s2sphere.Cell(cell_id)
|
196
|
+
vertices = []
|
197
|
+
for i in range(4):
|
198
|
+
vertex = cell.get_vertex(i)
|
199
|
+
lat_lng = s2sphere.LatLng.from_point(vertex)
|
200
|
+
vertices.append((lat_lng.lng().degrees, lat_lng.lat().degrees)) # (lon, lat)
|
201
|
+
return Polygon(vertices)
|
202
|
+
|
203
|
+
df['geometry'] = df[f's2_{RES}'].apply(s2id_to_polygon)
|
204
|
+
df = gpd.GeoDataFrame(df, geometry='geometry', crs='EPSG:4326') # WGS84
|
205
|
+
df.to_parquet(f'sample-{RES}.parquet')
|
206
|
+
```
|
207
|
+
|
153
208
|
### For development
|
154
209
|
|
155
210
|
In brief, to get started:
|
@@ -157,8 +212,9 @@ In brief, to get started:
|
|
157
212
|
- Install [Poetry](https://python-poetry.org/docs/basic-usage/)
|
158
213
|
- Install [GDAL](https://gdal.org/)
|
159
214
|
- If you're on Windows, `pip install gdal` may be necessary before running the subsequent commands.
|
160
|
-
- On Linux, install GDAL 3.
|
215
|
+
- On Linux, install GDAL 3.8+ according to your platform-specific instructions, including development headers, i.e. `libgdal-dev`.
|
161
216
|
- Create the virtual environment with `poetry init`. This will install necessary dependencies.
|
217
|
+
- If the installation of `s2geometry` fails, you may require SWIG to build it. (A command like `conda install swig` or `sudo dnf install swig` depending on your platform).
|
162
218
|
- Subsequently, the virtual environment can be re-activated with `poetry shell`.
|
163
219
|
|
164
220
|
If you run `poetry install`, the CLI tool will be aliased so you can simply use `vector2dggs` rather than `poetry run vector2dggs`, which is the alternative if you do not `poetry install`.
|
@@ -193,14 +249,14 @@ vector2dggs h3 -v DEBUG -id ogc_fid -r 9 -p 5 -t 4 --overwrite -tbl topo50_lake
|
|
193
249
|
title={{vector2dggs}},
|
194
250
|
author={Ardo, James and Law, Richard},
|
195
251
|
url={https://github.com/manaakiwhenua/vector2dggs},
|
196
|
-
version={0.
|
252
|
+
version={0.8.0},
|
197
253
|
date={2023-04-20}
|
198
254
|
}
|
199
255
|
```
|
200
256
|
|
201
257
|
APA/Harvard
|
202
258
|
|
203
|
-
> Ardo, J., & Law, R. (2023). vector2dggs (0.
|
259
|
+
> Ardo, J., & Law, R. (2023). vector2dggs (0.8.0) [Computer software]. https://github.com/manaakiwhenua/vector2dggs
|
204
260
|
|
205
261
|
[](https://github.com/manaakiwhenua/manaakiwhenua-standards)
|
206
262
|
|
@@ -0,0 +1,15 @@
|
|
1
|
+
vector2dggs/__init__.py,sha256=MBs_zNH91Wn_gMAiPSI9xF7r-us5pgElKJBoafyySfc,27
|
2
|
+
vector2dggs/cli.py,sha256=d_4skD62k6pXUWgDdVHbDwpe4A4yo62ZFx8Cp_6GpBA,767
|
3
|
+
vector2dggs/common.py,sha256=yLYIWAbvlgY_tcTCRIzeqDjkT95LbIUqpHhrvm6TREE,10458
|
4
|
+
vector2dggs/constants.py,sha256=I89XzsKO5vQlMkVFwDnVS2nO5MtLGRYeCOSunLU9Dfg,1694
|
5
|
+
vector2dggs/geohash.py,sha256=BfBpgMYpZqH0j7hGf00qo0Q484DeRt7Afil5QJO8ZGI,7527
|
6
|
+
vector2dggs/h3.py,sha256=LYZe4GgGjp0kBoqm_L14yYtgLLBS3lnkHi_pEbd7tYM,6303
|
7
|
+
vector2dggs/katana.py,sha256=Z3RFB92DsIZ069Bz0mKyYBKtOxd2dPxYvRy6M-MyRsM,3412
|
8
|
+
vector2dggs/rHP.py,sha256=oURBnbVTR5vZtcxN1LVI67A0vNbzPicogaYB_9Hwdvg,6353
|
9
|
+
vector2dggs/s2.py,sha256=cyhvdXZjYKPCIgLJMG2ZChrbvsMKzLJcIa3bdwMdmVc,10828
|
10
|
+
vector2dggs-0.8.0.dist-info/COPYING,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
11
|
+
vector2dggs-0.8.0.dist-info/COPYING.LESSER,sha256=46mU2C5kSwOnkqkw9XQAJlhBL2JAf1_uCD8lVcXyMRg,7652
|
12
|
+
vector2dggs-0.8.0.dist-info/METADATA,sha256=Dq8-j2-IeQZa92xRRmN7RC8-OJTTIKjZTv7WZL7Noz8,11586
|
13
|
+
vector2dggs-0.8.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
14
|
+
vector2dggs-0.8.0.dist-info/entry_points.txt,sha256=5h8LB9L2oOE5u_N7FRGtu4JDwa553iPs4u0XhcLeLZU,52
|
15
|
+
vector2dggs-0.8.0.dist-info/RECORD,,
|
@@ -1,13 +0,0 @@
|
|
1
|
-
vector2dggs/__init__.py,sha256=w9t1Aj5a_f__PKPw_C7bWnZmWL3_GHrtgVrGYGX1wfk,27
|
2
|
-
vector2dggs/cli.py,sha256=HoPp7Bwk2kZghAms6wNepx-bFhoAuHH7WXACMIy3MuM,652
|
3
|
-
vector2dggs/common.py,sha256=DL3ohG-QQyI-phyxeO6Fi2BOwWnFct-I_Y87_XC2SRQ,10578
|
4
|
-
vector2dggs/constants.py,sha256=u6n6XNvEVLUexn9Sb2rc22s2B4Rrg_VXFJaM7uEy-9Q,536
|
5
|
-
vector2dggs/h3.py,sha256=GgiGOVbsXXNp95KWKKmJZvDxGFj91TTWl575OaPZ6yk,6145
|
6
|
-
vector2dggs/katana.py,sha256=pgVWy032NkT5yilUO0d0IKH4NUvY7DJLjmfsxhBiF08,3407
|
7
|
-
vector2dggs/rHP.py,sha256=Y36tPbtY-tYBUFILHD-xnUxa2yKlYotGP6043Bg5nZc,6450
|
8
|
-
vector2dggs-0.6.2.dist-info/COPYING,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
9
|
-
vector2dggs-0.6.2.dist-info/COPYING.LESSER,sha256=46mU2C5kSwOnkqkw9XQAJlhBL2JAf1_uCD8lVcXyMRg,7652
|
10
|
-
vector2dggs-0.6.2.dist-info/METADATA,sha256=kNT2Iyd8irBMo2Tq0_CwnORVNeCc1ekjO1TlMwBp6qY,10014
|
11
|
-
vector2dggs-0.6.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
12
|
-
vector2dggs-0.6.2.dist-info/entry_points.txt,sha256=5h8LB9L2oOE5u_N7FRGtu4JDwa553iPs4u0XhcLeLZU,52
|
13
|
-
vector2dggs-0.6.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|