vector2dggs 0.6.3__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vector2dggs/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__: str = "0.6.3"
1
+ __version__: str = "0.9.0"
vector2dggs/cli.py CHANGED
@@ -3,6 +3,8 @@ import click
3
3
  from vector2dggs import __version__
4
4
  from vector2dggs.h3 import h3
5
5
  from vector2dggs.rHP import rhp
6
+ from vector2dggs.s2 import s2
7
+ from vector2dggs.geohash import geohash
6
8
 
7
9
  # If the program does terminal interaction, make it output a short
8
10
  # notice like this when it starts in an interactive mode:
@@ -21,6 +23,8 @@ def cli():
21
23
 
22
24
  cli.add_command(h3)
23
25
  cli.add_command(rhp)
26
+ cli.add_command(s2)
27
+ cli.add_command(geohash)
24
28
 
25
29
 
26
30
  def main():
vector2dggs/common.py CHANGED
@@ -104,13 +104,15 @@ def drop_condition(
104
104
  _diff = _before - _after
105
105
  if _diff:
106
106
  log_method = (
107
- LOGGER.info if (_diff / float(_before)) < warning_threshold else LOGGER.warn
107
+ LOGGER.info
108
+ if (_diff / float(_before)) < warning_threshold
109
+ else LOGGER.warning
108
110
  )
109
111
  log_method(f"Dropped {_diff} rows ({_diff/float(_before)*100:.2f}%)")
110
112
  return df
111
113
 
112
114
 
113
- def get_parent_res(dggs: str, parent_res: Union[None, int], resolution: int):
115
+ def get_parent_res(dggs: str, parent_res: Union[None, str], resolution: int) -> int:
114
116
  """
115
117
  Uses a parent resolution,
116
118
  OR,
@@ -118,22 +120,17 @@ def get_parent_res(dggs: str, parent_res: Union[None, int], resolution: int):
118
120
 
119
121
  Used for intermediate re-partioning.
120
122
  """
121
- if dggs == "h3":
122
- return (
123
- parent_res
124
- if parent_res is not None
125
- else max(const.MIN_H3, (resolution - const.DEFAULT_PARENT_OFFSET))
126
- )
127
- elif dggs == "rhp":
128
- return (
129
- parent_res
130
- if parent_res is not None
131
- else max(const.MIN_RHP, (resolution - const.DEFAULT_PARENT_OFFSET))
132
- )
133
- else:
123
+ if not dggs in const.DEFAULT_DGGS_PARENT_RES.keys():
134
124
  raise RuntimeError(
135
- "Unknown dggs {dggs}) - must be one of [ 'h3', 'rhp' ]".format(dggs=dggs)
125
+ "Unknown dggs {dggs}) - must be one of [ {options} ]".format(
126
+ dggs=dggs, options=", ".join(const.DEFAULT_DGGS_PARENT_RES.keys())
127
+ )
136
128
  )
129
+ return (
130
+ int(parent_res)
131
+ if parent_res is not None
132
+ else const.DEFAULT_DGGS_PARENT_RES[dggs](resolution)
133
+ )
137
134
 
138
135
 
139
136
  def parent_partitioning(
@@ -141,10 +138,9 @@ def parent_partitioning(
141
138
  input_dir: Path,
142
139
  output_dir: Path,
143
140
  resolution: int,
144
- parent_res: Union[None, int],
141
+ parent_res: int,
145
142
  **kwargs,
146
143
  ) -> None:
147
- parent_res: int = get_parent_res(dggs, parent_res, resolution)
148
144
  partition_col = f"{dggs}_{parent_res:02}"
149
145
 
150
146
  with TqdmCallback(desc="Repartitioning"):
@@ -174,30 +170,29 @@ def polyfill(
174
170
  pq_in: Path,
175
171
  spatial_sort_col: str,
176
172
  resolution: int,
177
- parent_res: Union[None, int],
173
+ parent_res: int,
178
174
  output_directory: str,
179
175
  ) -> None:
180
176
  """
181
177
  Reads a geoparquet, performs polyfilling (for Polygon),
182
- linetracing (for LineString), and writes out to parquet.
178
+ linetracing (for LineString), or indexing (for Point),
179
+ and writes out to parquet.
183
180
  """
184
- df = gpd.read_parquet(pq_in).reset_index().drop(columns=[spatial_sort_col])
181
+ df = gpd.read_parquet(pq_in).reset_index()
182
+ if spatial_sort_col != "none":
183
+ df = df.drop(columns=[spatial_sort_col])
185
184
  if len(df.index) == 0:
186
- # Input is empty, nothing to polyfill
185
+ # Input is empty, nothing to convert
187
186
  return None
188
187
 
189
- # DGGS specific polyfill
188
+ # DGGS specific conversion
190
189
  df = dggsfunc(df, resolution)
191
190
 
192
191
  if len(df.index) == 0:
193
- # Polyfill resulted in empty output (e.g. large cell, small feature)
192
+ # Conversion resulted in empty output (e.g. large cell, small feature)
194
193
  return None
195
194
 
196
195
  df.index.rename(f"{dggs}_{resolution:02}", inplace=True)
197
- parent_res: int = get_parent_res(dggs, parent_res, resolution)
198
- # print(parent_res)
199
- # print(df.index)
200
- # print(df.columns)
201
196
 
202
197
  # Secondary (parent) index, used later for partitioning
203
198
  df = secondary_index_func(df, parent_res)
@@ -228,22 +223,23 @@ def index(
228
223
  id_field: str = None,
229
224
  cut_crs: pyproj.CRS = None,
230
225
  con: SQLConnectionType = None,
231
- table: str = None,
226
+ layer: str = None,
232
227
  geom_col: str = "geom",
233
228
  overwrite: bool = False,
234
229
  ) -> Path:
235
230
  """
236
- Performs multi-threaded polyfilling on (multi)polygons.
231
+ Performs multi-threaded DGGS indexing on geometries (including multipart and collections).
237
232
  """
233
+ parent_res = get_parent_res(dggs, parent_res, resolution)
238
234
 
239
- if table and con:
235
+ if layer and con:
240
236
  # Database connection
241
237
  if keep_attributes:
242
- q = sqlalchemy.text(f"SELECT * FROM {table}")
238
+ q = sqlalchemy.text(f"SELECT * FROM {layer}")
243
239
  elif id_field and not keep_attributes:
244
- q = sqlalchemy.text(f"SELECT {id_field}, {geom_col} FROM {table}")
240
+ q = sqlalchemy.text(f"SELECT {id_field}, {geom_col} FROM {layer}")
245
241
  else:
246
- q = sqlalchemy.text(f"SELECT {geom_col} FROM {table}")
242
+ q = sqlalchemy.text(f"SELECT {geom_col} FROM {layer}")
247
243
  df = gpd.read_postgis(q, con.connect(), geom_col=geom_col).rename_geometry(
248
244
  "geometry"
249
245
  )
@@ -291,7 +287,8 @@ def index(
291
287
  "index": lambda frame: frame[
292
288
  (frame.geometry.geom_type != "Polygon")
293
289
  & (frame.geometry.geom_type != "LineString")
294
- ], # NB currently points and other types are lost; in principle, these could be indexed
290
+ & (frame.geometry.geom_type != "Point")
291
+ ],
295
292
  "message": "Considering unsupported geometries",
296
293
  },
297
294
  ]
@@ -300,11 +297,12 @@ def index(
300
297
 
301
298
  ddf = dgpd.from_geopandas(df, chunksize=max(1, chunksize), sort=True)
302
299
 
303
- LOGGER.debug("Spatially sorting and partitioning (%s)", spatial_sorting)
304
- ddf = ddf.spatial_shuffle(by=spatial_sorting)
300
+ if spatial_sorting != "none":
301
+ LOGGER.debug("Spatially sorting and partitioning (%s)", spatial_sorting)
302
+ ddf = ddf.spatial_shuffle(by=spatial_sorting)
305
303
  spatial_sort_col = (
306
304
  spatial_sorting
307
- if spatial_sorting == "geohash"
305
+ if (spatial_sorting == "geohash" or spatial_sorting == "none")
308
306
  else f"{spatial_sorting}_distance"
309
307
  )
310
308
 
@@ -314,9 +312,9 @@ def index(
314
312
 
315
313
  filepaths = list(map(lambda f: f.absolute(), Path(tmpdir).glob("*")))
316
314
 
317
- # Multithreaded polyfilling
315
+ # Multithreaded DGGS indexing
318
316
  LOGGER.debug(
319
- "Indexing on spatial partitions by polyfill with resolution: %d",
317
+ "DGGS indexing by spatial partitions with resolution: %d",
320
318
  resolution,
321
319
  )
322
320
  with tempfile.TemporaryDirectory(suffix=".parquet") as tmpdir2:
@@ -344,7 +342,7 @@ def index(
344
342
 
345
343
  parent_partitioning(
346
344
  dggs,
347
- tmpdir2,
345
+ Path(tmpdir2),
348
346
  output_directory,
349
347
  resolution,
350
348
  parent_res,
vector2dggs/constants.py CHANGED
@@ -5,22 +5,71 @@ import tempfile
5
5
 
6
6
  MIN_H3, MAX_H3 = 0, 15
7
7
  MIN_RHP, MAX_RHP = 0, 15
8
+ MIN_S2, MAX_S2 = 0, 30
9
+ MIN_GEOHASH, MAX_GEOHASH = 1, 12
8
10
 
9
11
  DEFAULTS = {
10
12
  "id": None,
11
13
  "k": False,
12
14
  "ch": 50,
13
- "s": "hilbert",
15
+ "s": "none",
14
16
  "crs": None,
15
17
  "c": 5000,
16
18
  "t": (multiprocessing.cpu_count() - 1),
17
- "tbl": None,
19
+ "lyr": None,
18
20
  "g": "geom",
19
21
  "tempdir": tempfile.tempdir,
20
22
  }
21
23
 
24
+ SPATIAL_SORTING_METHODS = ["hilbert", "morton", "geohash", "none"]
25
+
26
+ DEFAULT_DGGS_PARENT_RES = {
27
+ "h3": lambda resolution: max(MIN_H3, (resolution - DEFAULT_PARENT_OFFSET)),
28
+ "rhp": lambda resolution: max(MIN_RHP, (resolution - DEFAULT_PARENT_OFFSET)),
29
+ "geohash": lambda resolution: max(
30
+ MIN_GEOHASH, (resolution - DEFAULT_PARENT_OFFSET)
31
+ ),
32
+ "s2": lambda resolution: max(MIN_S2, (resolution - DEFAULT_PARENT_OFFSET)),
33
+ }
34
+
22
35
  DEFAULT_PARENT_OFFSET = 6
23
36
 
37
+ # http://s2geometry.io/resources/s2cell_statistics.html
38
+ S2_CELLS_MAX_AREA_M2_BY_LEVEL = {
39
+ 0: 85011012.19 * 1e6,
40
+ 1: 21252753.05 * 1e6,
41
+ 2: 6026521.16 * 1e6,
42
+ 3: 1646455.50 * 1e6,
43
+ 4: 413918.15 * 1e6,
44
+ 5: 104297.91 * 1e6,
45
+ 6: 26113.30 * 1e6,
46
+ 7: 6529.09 * 1e6,
47
+ 8: 1632.45 * 1e6,
48
+ 9: 408.12 * 1e6,
49
+ 10: 102.03 * 1e6,
50
+ 11: 25.51 * 1e6,
51
+ 12: 6.38 * 1e6,
52
+ 13: 1.59 * 1e6,
53
+ 14: 0.40 * 1e6,
54
+ 15: 99638.93,
55
+ 16: 24909.73,
56
+ 17: 6227.43,
57
+ 18: 1556.86,
58
+ 19: 389.22,
59
+ 20: 97.30,
60
+ 21: 24.33,
61
+ 22: 6.08,
62
+ 23: 1.52,
63
+ 24: 0.38,
64
+ 25: 950.23 * 1e-4,
65
+ 26: 237.56 * 1e-4,
66
+ 27: 59.39 * 1e-4,
67
+ 28: 14.85 * 1e-4,
68
+ 29: 3.71 * 1e-4,
69
+ 30: 0.93 * 1e-4,
70
+ }
71
+
72
+
24
73
  warnings.filterwarnings(
25
74
  "ignore"
26
75
  ) # This is to filter out the polyfill warnings when rows failed to get indexed at a resolution, can be commented out to find missing rows
vector2dggs/geohash.py ADDED
@@ -0,0 +1,240 @@
1
+ import sys
2
+ import click
3
+ import click_log
4
+ import tempfile
5
+ import pyproj
6
+
7
+ from geohash_polygon import polygon_to_geohashes # rusty-polygon-geohasher
8
+ from geohash import encode, decode # python-geohash
9
+
10
+ import pandas as pd
11
+ import geopandas as gpd
12
+ from shapely.geometry import Point, Polygon
13
+
14
+ from typing import Union
15
+ from pathlib import Path
16
+
17
+ import vector2dggs.constants as const
18
+ import vector2dggs.common as common
19
+
20
+ from vector2dggs import __version__
21
+
22
+
23
+ def gh_secondary_index(df: pd.DataFrame, parent_level: int) -> pd.DataFrame:
24
+ df[f"geohash_{parent_level:02}"] = df.index.to_series().str[:parent_level]
25
+ return df
26
+
27
+
28
+ # NB this implements a point-inside hash, but geohash_polygon only supports "within" or "intersects" (on the basis of geohashes as _polygon_ geometries) which means we have to perform additional computation to support "polyfill" as defined by H3
29
+ # A future version of vector2dggs may support within/intersects modality, at which point that would just be outer/inner with no further computation
30
+ def _polygon_to_geohashes(polygon: Polygon, level: int) -> set[str]:
31
+ # Function to compute geohash set for one polygon geometry
32
+ outer: set[str] = polygon_to_geohashes(polygon, level, inner=False)
33
+ inner: set[str] = polygon_to_geohashes(polygon, level, inner=True)
34
+ edge: set[str] = {
35
+ h
36
+ for h in (outer - inner) # All edge cells
37
+ if Point(*reversed(decode(h))).within(polygon)
38
+ } # Edge cells with a center within the polygon
39
+ return edge | inner
40
+
41
+
42
+ def gh_polyfill(df: gpd.GeoDataFrame, level: int) -> pd.DataFrame:
43
+ gh_col = f"geohash"
44
+ df_polygon = df[df.geom_type == "Polygon"].copy()
45
+ if not df_polygon.empty:
46
+ df_polygon = (
47
+ df_polygon.assign(
48
+ **{
49
+ gh_col: df_polygon.geometry.apply(
50
+ lambda geom: _polygon_to_geohashes(geom, level)
51
+ )
52
+ }
53
+ )
54
+ .explode(gh_col, ignore_index=True)
55
+ .set_index(gh_col)
56
+ )
57
+
58
+ # TODO linestring support
59
+ # e.g. JS implementation https://github.com/alrico88/geohashes-along
60
+
61
+ df_point = df[df.geom_type == "Point"].copy()
62
+ if len(df_point.index) > 0:
63
+ df_point[gh_col] = df_point.geometry.apply(
64
+ lambda geom: encode(geom.y, geom.x, precision=level)
65
+ )
66
+ df_point = df_point.set_index(gh_col)
67
+
68
+ return pd.concat(
69
+ map(
70
+ lambda _df: pd.DataFrame(_df.drop(columns=[_df.geometry.name])),
71
+ [df_polygon, df_point],
72
+ )
73
+ )
74
+
75
+
76
+ @click.command(context_settings={"show_default": True})
77
+ @click_log.simple_verbosity_option(common.LOGGER)
78
+ @click.argument("vector_input", required=True, type=click.Path(), nargs=1)
79
+ @click.argument("output_directory", required=True, type=click.Path(), nargs=1)
80
+ @click.option(
81
+ "-r",
82
+ "--resolution",
83
+ "level",
84
+ required=True,
85
+ type=click.Choice(list(map(str, range(const.MIN_GEOHASH, const.MAX_GEOHASH + 1)))),
86
+ help="Geohash level to index",
87
+ nargs=1,
88
+ )
89
+ @click.option(
90
+ "-pr",
91
+ "--parent_res",
92
+ "parent_level",
93
+ required=False,
94
+ type=click.Choice(list(map(str, range(const.MIN_GEOHASH, const.MAX_GEOHASH + 1)))),
95
+ help="Geohash parent level for the output partition. Defaults to resolution - 6",
96
+ )
97
+ @click.option(
98
+ "-id",
99
+ "--id_field",
100
+ required=False,
101
+ default=const.DEFAULTS["id"],
102
+ type=str,
103
+ help="Field to use as an ID; defaults to a constructed single 0...n index on the original feature order.",
104
+ nargs=1,
105
+ )
106
+ @click.option(
107
+ "-k",
108
+ "--keep_attributes",
109
+ is_flag=True,
110
+ show_default=True,
111
+ default=const.DEFAULTS["k"],
112
+ help="Retain attributes in output. The default is to create an output that only includes Geohash cell ID and the ID given by the -id field (or the default index ID).",
113
+ )
114
+ @click.option(
115
+ "-ch",
116
+ "--chunksize",
117
+ required=True,
118
+ type=int,
119
+ default=const.DEFAULTS["ch"],
120
+ help="The number of rows per index partition to use when spatially partioning. Adjusting this number will trade off memory use and time.",
121
+ nargs=1,
122
+ )
123
+ @click.option(
124
+ "-s",
125
+ "--spatial_sorting",
126
+ type=click.Choice(const.SPATIAL_SORTING_METHODS),
127
+ default=const.DEFAULTS["s"],
128
+ help="Spatial sorting method when perfoming spatial partitioning.",
129
+ )
130
+ @click.option(
131
+ "-crs",
132
+ "--cut_crs",
133
+ required=False,
134
+ default=const.DEFAULTS["crs"],
135
+ type=int,
136
+ help="Set the coordinate reference system (CRS) used for cutting large geometries (see `--cur-threshold`). Defaults to the same CRS as the input. Should be a valid EPSG code.",
137
+ nargs=1,
138
+ )
139
+ @click.option(
140
+ "-c",
141
+ "--cut_threshold",
142
+ required=True,
143
+ default=const.DEFAULTS["c"],
144
+ type=int,
145
+ help="Cutting up large geometries into smaller geometries based on a target length. Units are assumed to match the input CRS units unless the `--cut_crs` is also given, in which case units match the units of the supplied CRS.",
146
+ nargs=1,
147
+ )
148
+ @click.option(
149
+ "-t",
150
+ "--threads",
151
+ required=False,
152
+ default=const.DEFAULTS["t"],
153
+ type=int,
154
+ help="Amount of threads used for operation",
155
+ nargs=1,
156
+ )
157
+ @click.option(
158
+ "-lyr",
159
+ "--layer",
160
+ required=False,
161
+ default=const.DEFAULTS["lyr"],
162
+ type=str,
163
+ help="Name of the layer or table to read when using an input that supports layers or tables",
164
+ nargs=1,
165
+ )
166
+ @click.option(
167
+ "-g",
168
+ "--geom_col",
169
+ required=False,
170
+ default=const.DEFAULTS["g"],
171
+ type=str,
172
+ help="Column name to use when using a spatial database connection as input",
173
+ nargs=1,
174
+ )
175
+ @click.option(
176
+ "--tempdir",
177
+ default=const.DEFAULTS["tempdir"],
178
+ type=click.Path(),
179
+ help="Temporary data is created during the execution of this program. This parameter allows you to control where this data will be written.",
180
+ )
181
+ @click.option("-o", "--overwrite", is_flag=True)
182
+ @click.version_option(version=__version__)
183
+ def geohash(
184
+ vector_input: Union[str, Path],
185
+ output_directory: Union[str, Path],
186
+ level: str,
187
+ parent_level: str,
188
+ id_field: str,
189
+ keep_attributes: bool,
190
+ chunksize: int,
191
+ spatial_sorting: str,
192
+ cut_crs: int,
193
+ cut_threshold: int,
194
+ threads: int,
195
+ layer: str,
196
+ geom_col: str,
197
+ tempdir: Union[str, Path],
198
+ overwrite: bool,
199
+ ):
200
+ """
201
+ Ingest a vector dataset and index it using the Geohash geocode system.
202
+
203
+ VECTOR_INPUT is the path to input vector geospatial data.
204
+ OUTPUT_DIRECTORY should be a directory, not a file or database table, as it will instead be the write location for an Apache Parquet data store.
205
+ """
206
+ tempfile.tempdir = tempdir if tempdir is not None else tempfile.tempdir
207
+
208
+ common.check_resolutions(level, parent_level)
209
+
210
+ con, vector_input = common.db_conn_and_input_path(vector_input)
211
+ output_directory = common.resolve_output_path(output_directory, overwrite)
212
+
213
+ if cut_crs is not None:
214
+ cut_crs = pyproj.CRS.from_user_input(cut_crs)
215
+
216
+ try:
217
+ common.index(
218
+ "geohash",
219
+ gh_polyfill,
220
+ gh_secondary_index,
221
+ vector_input,
222
+ output_directory,
223
+ int(level),
224
+ parent_level,
225
+ keep_attributes,
226
+ chunksize,
227
+ spatial_sorting,
228
+ cut_threshold,
229
+ threads,
230
+ cut_crs=cut_crs,
231
+ id_field=id_field,
232
+ con=con,
233
+ layer=layer,
234
+ geom_col=geom_col,
235
+ overwrite=overwrite,
236
+ )
237
+ except:
238
+ raise
239
+ else:
240
+ sys.exit(0)
vector2dggs/h3.py CHANGED
@@ -18,13 +18,13 @@ import vector2dggs.common as common
18
18
  from vector2dggs import __version__
19
19
 
20
20
 
21
- def h3_secondary_index(df: gpd.GeoDataFrame, parent_res: int) -> gpd.GeoDataFrame:
21
+ def h3_secondary_index(df: pd.DataFrame, parent_res: int) -> pd.DataFrame:
22
22
  return df.h3.h3_to_parent(parent_res)
23
23
 
24
24
 
25
- def h3polyfill(df: gpd.GeoDataFrame, resolution: int):
25
+ def h3polyfill(df: gpd.GeoDataFrame, resolution: int) -> pd.DataFrame:
26
26
  df_polygon = df[df.geom_type == "Polygon"]
27
- if len(df_polygon.index) > 0:
27
+ if not df_polygon.empty:
28
28
  df_polygon = df_polygon.h3.polyfill_resample(
29
29
  resolution, return_geometry=False
30
30
  ).drop(columns=["index"])
@@ -38,10 +38,14 @@ def h3polyfill(df: gpd.GeoDataFrame, resolution: int):
38
38
  )
39
39
  df_linestring = df_linestring[~df_linestring.index.duplicated(keep="first")]
40
40
 
41
+ df_point = df[df.geom_type == "Point"]
42
+ if len(df_point.index) > 0:
43
+ df_point = df_point.h3.geo_to_h3(resolution, set_index=True)
44
+
41
45
  return pd.concat(
42
46
  map(
43
47
  lambda _df: pd.DataFrame(_df.drop(columns=[_df.geometry.name])),
44
- [df_polygon, df_linestring],
48
+ [df_polygon, df_linestring, df_point],
45
49
  )
46
50
  )
47
51
 
@@ -94,7 +98,7 @@ def h3polyfill(df: gpd.GeoDataFrame, resolution: int):
94
98
  @click.option(
95
99
  "-s",
96
100
  "--spatial_sorting",
97
- type=click.Choice(["hilbert", "morton", "geohash"]),
101
+ type=click.Choice(const.SPATIAL_SORTING_METHODS),
98
102
  default=const.DEFAULTS["s"],
99
103
  help="Spatial sorting method when perfoming spatial partitioning.",
100
104
  )
@@ -126,12 +130,12 @@ def h3polyfill(df: gpd.GeoDataFrame, resolution: int):
126
130
  nargs=1,
127
131
  )
128
132
  @click.option(
129
- "-tbl",
130
- "--table",
133
+ "-lyr",
134
+ "--layer",
131
135
  required=False,
132
- default=const.DEFAULTS["tbl"],
136
+ default=const.DEFAULTS["lyr"],
133
137
  type=str,
134
- help="Name of the table to read when using a spatial database connection as input",
138
+ help="Name of the layer or table to read when using an input that supports layers or tables",
135
139
  nargs=1,
136
140
  )
137
141
  @click.option(
@@ -163,7 +167,7 @@ def h3(
163
167
  cut_crs: int,
164
168
  cut_threshold: int,
165
169
  threads: int,
166
- table: str,
170
+ layer: str,
167
171
  geom_col: str,
168
172
  tempdir: Union[str, Path],
169
173
  overwrite: bool,
@@ -201,7 +205,7 @@ def h3(
201
205
  cut_crs=cut_crs,
202
206
  id_field=id_field,
203
207
  con=con,
204
- table=table,
208
+ layer=layer,
205
209
  geom_col=geom_col,
206
210
  overwrite=overwrite,
207
211
  )
vector2dggs/katana.py CHANGED
@@ -11,6 +11,7 @@ Redistribution and use in source and binary forms, with or without modification,
11
11
  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
12
12
  """
13
13
 
14
+ from shapely import force_2d, has_z, has_m
14
15
  from shapely.geometry import (
15
16
  box,
16
17
  Polygon,
@@ -18,22 +19,34 @@ from shapely.geometry import (
18
19
  LineString,
19
20
  MultiLineString,
20
21
  GeometryCollection,
22
+ LinearRing,
21
23
  )
22
- from shapely.validation import explain_validity, make_valid
24
+ from shapely.geometry.base import BaseGeometry
25
+ from shapely.validation import make_valid
26
+ from typing import Union, List
23
27
 
24
28
 
25
- def katana(geometry, threshold, count=0) -> GeometryCollection:
29
+ def katana(
30
+ geometry: Union[BaseGeometry, None],
31
+ threshold: float,
32
+ count: int = 0,
33
+ check_2D: bool = True,
34
+ ) -> List[BaseGeometry]:
26
35
  """
27
- Split a geometry into two parts across its shortest dimension.
36
+ Recursively split a geometry into two parts across its shortest dimension.
28
37
  Invalid input `geometry` will silently be made valid (if possible).
38
+ Any LinearRings will be converted to Polygons.
29
39
  """
30
40
  if geometry is None:
31
- # Empty geometry collection
32
- return GeometryCollection([])
41
+ return []
42
+ if isinstance(geometry, LinearRing):
43
+ geometry = Polygon(geometry)
44
+ if check_2D and (has_z(geometry) or has_m(geometry)):
45
+ geometry = force_2d(geometry)
46
+ check_2D = False # No further 2D check needed
33
47
  if not geometry.is_valid:
34
- # print(explain_validity(geometry))
35
48
  geometry = make_valid(geometry)
36
- if geometry.type == "GeometryCollection":
49
+ if geometry.geom_type == "GeometryCollection":
37
50
  geometry.normalize()
38
51
  geometry = geometry.buffer(0)
39
52
  bounds = geometry.bounds
@@ -60,16 +73,9 @@ def katana(geometry, threshold, count=0) -> GeometryCollection:
60
73
  if not isinstance(c, GeometryCollection):
61
74
  c = GeometryCollection([c])
62
75
  for e in c.geoms:
63
- if isinstance(e, (Polygon, MultiPolygon, LineString, MultiLineString)):
64
- result.extend(katana(e, threshold, count + 1))
65
- if count > 0:
66
- return result
67
- # convert multipart into singlepart
68
- final_result = []
69
- for g in result:
70
- # if isinstance(g, MultiPolygon):
71
- # final_result.extend(g)
72
- # else:
73
- # final_result.append(g)
74
- final_result.append(g)
75
- return final_result
76
+ if isinstance(
77
+ e, (Polygon, MultiPolygon, LineString, MultiLineString, LinearRing)
78
+ ):
79
+ result.extend(katana(e, threshold, count + 1, check_2D))
80
+
81
+ return result
vector2dggs/rHP.py CHANGED
@@ -11,6 +11,7 @@ import geopandas as gpd
11
11
 
12
12
  from typing import Union
13
13
  from pathlib import Path
14
+ from rhppandas.util.const import COLUMNS
14
15
 
15
16
  import vector2dggs.constants as const
16
17
  import vector2dggs.common as common
@@ -18,36 +19,34 @@ import vector2dggs.common as common
18
19
  from vector2dggs import __version__
19
20
 
20
21
 
21
- def rhp_secondary_index(df: gpd.GeoDataFrame, parent_res: int) -> gpd.GeoDataFrame:
22
+ def rhp_secondary_index(df: pd.date_range, parent_res: int) -> pd.DataFrame:
22
23
  return df.rhp.rhp_to_parent(parent_res)
23
24
 
24
25
 
25
- def rhppolyfill(df: gpd.GeoDataFrame, resolution: int):
26
+ def rhppolyfill(df: gpd.GeoDataFrame, resolution: int) -> pd.DataFrame:
26
27
  df_polygon = df[df.geom_type == "Polygon"]
27
28
  if len(df_polygon.index) > 0:
28
29
  df_polygon = df_polygon.rhp.polyfill_resample(
29
30
  resolution, return_geometry=False
30
31
  ).drop(columns=["index"])
31
32
 
32
- df_multipolygon = df[df.geom_type == "MultiPolygon"]
33
- if len(df_multipolygon.index) > 0:
34
- df_multipolygon = df_multipolygon.rhp.polyfill_resample(
35
- resolution, return_geometry=False
36
- ).drop(columns=["index"])
33
+ df_linestring = df[df.geom_type == "LineString"]
34
+ if len(df_linestring.index) > 0:
35
+ df_linestring = (
36
+ df_linestring.rhp.linetrace(resolution)
37
+ .explode(COLUMNS["linetrace"])
38
+ .set_index(COLUMNS["linetrace"])
39
+ )
40
+ df_linestring = df_linestring[~df_linestring.index.duplicated(keep="first")]
37
41
 
38
- # df_linestring = df[df.geom_type == "LineString"]
39
- # if len(df_linestring.index) > 0:
40
- # df_linestring = (
41
- # df_linestring.h3.linetrace(resolution)
42
- # .explode("h3_linetrace")
43
- # .set_index("h3_linetrace")
44
- # )
45
- # df_linestring = df_linestring[~df_linestring.index.duplicated(keep="first")]
42
+ df_point = df[df.geom_type == "Point"]
43
+ if len(df_point.index) > 0:
44
+ df_point = df_point.rhp.geo_to_rhp(resolution, set_index=True)
46
45
 
47
46
  return pd.concat(
48
47
  map(
49
48
  lambda _df: pd.DataFrame(_df.drop(columns=[_df.geometry.name])),
50
- [df_polygon, df_multipolygon], # df_linestring],
49
+ [df_polygon, df_linestring, df_point],
51
50
  )
52
51
  )
53
52
 
@@ -61,7 +60,7 @@ def rhppolyfill(df: gpd.GeoDataFrame, resolution: int):
61
60
  "--resolution",
62
61
  required=True,
63
62
  type=click.Choice(list(map(str, range(const.MIN_RHP, const.MAX_RHP + 1)))),
64
- help="H3 resolution to index",
63
+ help="rHEALPix resolution to index",
65
64
  nargs=1,
66
65
  )
67
66
  @click.option(
@@ -69,7 +68,7 @@ def rhppolyfill(df: gpd.GeoDataFrame, resolution: int):
69
68
  "--parent_res",
70
69
  required=False,
71
70
  type=click.Choice(list(map(str, range(const.MIN_RHP, const.MAX_RHP + 1)))),
72
- help="H3 Parent resolution for the output partition. Defaults to resolution - 6",
71
+ help="rHEALPix Parent resolution for the output partition. Defaults to resolution - 6",
73
72
  )
74
73
  @click.option(
75
74
  "-id",
@@ -100,7 +99,7 @@ def rhppolyfill(df: gpd.GeoDataFrame, resolution: int):
100
99
  @click.option(
101
100
  "-s",
102
101
  "--spatial_sorting",
103
- type=click.Choice(["hilbert", "morton", "geohash"]),
102
+ type=click.Choice(const.SPATIAL_SORTING_METHODS),
104
103
  default=const.DEFAULTS["s"],
105
104
  help="Spatial sorting method when perfoming spatial partitioning.",
106
105
  )
@@ -132,12 +131,12 @@ def rhppolyfill(df: gpd.GeoDataFrame, resolution: int):
132
131
  nargs=1,
133
132
  )
134
133
  @click.option(
135
- "-tbl",
136
- "--table",
134
+ "-lyr",
135
+ "--layer",
137
136
  required=False,
138
- default=const.DEFAULTS["tbl"],
137
+ default=const.DEFAULTS["lyr"],
139
138
  type=str,
140
- help="Name of the table to read when using a spatial database connection as input",
139
+ help="Name of the layer or table to read when using an input that supports layers or tables",
141
140
  nargs=1,
142
141
  )
143
142
  @click.option(
@@ -169,7 +168,7 @@ def rhp(
169
168
  cut_crs: int,
170
169
  cut_threshold: int,
171
170
  threads: int,
172
- table: str,
171
+ layer: str,
173
172
  geom_col: str,
174
173
  tempdir: Union[str, Path],
175
174
  overwrite: bool,
@@ -207,7 +206,7 @@ def rhp(
207
206
  cut_crs=cut_crs,
208
207
  id_field=id_field,
209
208
  con=con,
210
- table=table,
209
+ layer=layer,
211
210
  geom_col=geom_col,
212
211
  overwrite=overwrite,
213
212
  )
vector2dggs/s2.py ADDED
@@ -0,0 +1,349 @@
1
+ import sys
2
+ import click
3
+ import click_log
4
+ import tempfile
5
+ import pyproj
6
+ from math import ceil
7
+
8
+ from s2geometry import pywraps2 as S2
9
+
10
+ import pandas as pd
11
+ import geopandas as gpd
12
+ from shapely.geometry import box, Polygon, LineString, Point
13
+ from shapely.ops import transform
14
+ from pyproj import CRS, Transformer
15
+
16
+ from typing import Union
17
+ from pathlib import Path
18
+
19
+ import vector2dggs.constants as const
20
+ import vector2dggs.common as common
21
+
22
+ from vector2dggs import __version__
23
+
24
+
25
+ def s2_secondary_index(df: pd.DataFrame, parent_level: int) -> pd.DataFrame:
26
+ # NB also converts the index to S2 cell tokens
27
+ index_series = df.index.to_series().astype(object)
28
+ df[f"s2_{parent_level:02}"] = index_series.map(
29
+ lambda cell_id: cell_id.parent(parent_level).ToToken()
30
+ )
31
+ df.index = index_series.map(lambda cell_id: cell_id.ToToken())
32
+ return df
33
+
34
+
35
+ def bbox_area_in_m2(
36
+ geom: Polygon,
37
+ src_crs: Union[str, CRS] = "EPSG:4326",
38
+ dst_crs: Union[str, CRS] = "EPSG:6933",
39
+ ) -> float:
40
+ """
41
+ Calculate the area of the bounding box of a geometry in square meters.
42
+ """
43
+ minx, miny, maxx, maxy = geom.bounds
44
+ bbox = box(minx, miny, maxx, maxy)
45
+ transformer = Transformer.from_crs(src_crs, dst_crs, always_xy=True)
46
+ projected_bbox = transform(transformer.transform, bbox)
47
+ return projected_bbox.area
48
+
49
+
50
+ def max_cells_for_geom(
51
+ geom: Union[Polygon, LineString], level: int, margin: float = 1.02
52
+ ) -> int:
53
+ """
54
+ Calculate the maximum number of S2 cells that are appropriate for the given geometry and level.
55
+ This is based on the area of the geometry's bounding box,
56
+ and the maximum area of S2 cells at the given level.
57
+ """
58
+ area = bbox_area_in_m2(geom)
59
+ max_cells = ceil(max(1, area / const.S2_CELLS_MAX_AREA_M2_BY_LEVEL[level]))
60
+ return ceil(max_cells * margin)
61
+
62
+
63
+ def cell_center_is_inside_polygon(cell: S2.S2CellId, polygon: S2.S2Polygon) -> bool:
64
+ """Determines if the center of the S2 cell is inside the polygon"""
65
+ cell_center = S2.S2Cell(cell).GetCenter()
66
+ return polygon.Contains(cell_center)
67
+
68
+
69
+ def s2_polyfill_polygons(df: gpd.GeoDataFrame, level: int) -> gpd.GeoDataFrame:
70
+
71
+ def generate_s2_covering(
72
+ geom: Polygon, level: int, centroid_inside: bool = True
73
+ ) -> set[S2.S2CellId]:
74
+ # Prepare loops: first the exterior loop, then the interior loops
75
+ loops = []
76
+ # Exterior ring
77
+ latlngs = [
78
+ S2.S2LatLng.FromDegrees(lat, lon) for lon, lat in geom.exterior.coords
79
+ ]
80
+ s2loop = S2.S2Loop([latlng.ToPoint() for latlng in latlngs])
81
+ s2loop.Normalize()
82
+ loops.append(s2loop)
83
+
84
+ # Interior rings (polygon holes)
85
+ for interior in geom.interiors:
86
+ interior_latlngs = [
87
+ S2.S2LatLng.FromDegrees(lat, lon) for lon, lat in interior.coords
88
+ ]
89
+ s2interior_loop = S2.S2Loop(
90
+ [latlng.ToPoint() for latlng in interior_latlngs]
91
+ )
92
+ s2interior_loop.Normalize()
93
+ loops.append(s2interior_loop)
94
+
95
+ # Build an S2Polygon from the loops
96
+ s2polygon = S2.S2Polygon()
97
+ s2polygon.InitNested(loops)
98
+
99
+ # Use S2RegionCoverer to get the cell IDs at the specified level
100
+ coverer = S2.S2RegionCoverer()
101
+
102
+ max_cells = max_cells_for_geom(geom, level)
103
+ coverer.set_max_cells(max_cells)
104
+ coverer.set_min_level(level)
105
+ coverer.set_max_level(level)
106
+
107
+ covering: list[S2.S2CellId] = coverer.GetCovering(s2polygon)
108
+
109
+ if centroid_inside:
110
+ # Coverings are "intersects" modality, polyfill is "centre inside" modality
111
+ # ergo, filter out covering cells that are not inside the polygon
112
+ covering = {
113
+ cell
114
+ for cell in covering
115
+ if cell_center_is_inside_polygon(cell, s2polygon)
116
+ }
117
+ else:
118
+ set(covering)
119
+
120
+ return covering
121
+
122
+ df["s2index"] = df["geometry"].apply(lambda geom: generate_s2_covering(geom, level))
123
+ df = df[
124
+ df["s2index"].map(lambda x: len(x) > 0)
125
+ ] # Remove rows with no covering at this level
126
+
127
+ return df
128
+
129
+
130
+ def s2_cell_ids_from_linestring(
131
+ linestring: LineString, level: int
132
+ ) -> list[S2.S2CellId]:
133
+ latlngs = [S2.S2LatLng.FromDegrees(lat, lon) for lon, lat in linestring.coords]
134
+ polyline = S2.S2Polyline(latlngs)
135
+
136
+ coverer = S2.S2RegionCoverer()
137
+ max_cells = max_cells_for_geom(linestring, level)
138
+ coverer.set_max_cells(max_cells)
139
+ coverer.set_min_level(level)
140
+ coverer.set_max_level(level)
141
+
142
+ return coverer.GetCovering(polyline)
143
+
144
+
145
+ def s2_cell_id_from_point(geom: Point, level: int) -> S2.S2CellId:
146
+ """
147
+ Convert a point geometry to an S2 cell at the specified level.
148
+ """
149
+ latlng = S2.S2LatLng.FromDegrees(geom.y, geom.x)
150
+ return S2.S2CellId(latlng).parent(level)
151
+
152
+
153
+ def s2_polyfill(df: gpd.GeoDataFrame, level: int) -> pd.DataFrame:
154
+
155
+ df_polygon = df[df.geom_type == "Polygon"].copy()
156
+ if len(df_polygon.index) > 0:
157
+ df_polygon = (
158
+ s2_polyfill_polygons(df_polygon, level)
159
+ .explode("s2index")
160
+ .set_index("s2index")
161
+ )
162
+
163
+ df_linestring = df[df.geom_type == "LineString"].copy()
164
+ if len(df_linestring.index) > 0:
165
+ df_linestring["s2index"] = df_linestring.geometry.apply(
166
+ lambda geom: s2_cell_ids_from_linestring(geom, level)
167
+ )
168
+ df_linestring = df_linestring.explode("s2index").set_index("s2index")
169
+
170
+ df_point = df[df.geom_type == "Point"].copy()
171
+ if len(df_point.index) > 0:
172
+ df_point["s2index"] = df_point.geometry.apply(
173
+ lambda geom: s2_cell_id_from_point(geom, level)
174
+ )
175
+ df_point = df_point.set_index("s2index")
176
+
177
+ return pd.concat(
178
+ map(
179
+ lambda _df: pd.DataFrame(_df.drop(columns=[_df.geometry.name])),
180
+ [df_polygon, df_linestring, df_point],
181
+ )
182
+ )
183
+
184
+
185
+ @click.command(context_settings={"show_default": True})
186
+ @click_log.simple_verbosity_option(common.LOGGER)
187
+ @click.argument("vector_input", required=True, type=click.Path(), nargs=1)
188
+ @click.argument("output_directory", required=True, type=click.Path(), nargs=1)
189
+ @click.option(
190
+ "-r",
191
+ "--resolution",
192
+ "level",
193
+ required=True,
194
+ type=click.Choice(list(map(str, range(const.MIN_S2, const.MAX_S2 + 1)))),
195
+ help="S2 level to index",
196
+ nargs=1,
197
+ )
198
+ @click.option(
199
+ "-pr",
200
+ "--parent_res",
201
+ "parent_level",
202
+ required=False,
203
+ type=click.Choice(list(map(str, range(const.MIN_S2, const.MAX_S2 + 1)))),
204
+ help="S2 parent level for the output partition. Defaults to resolution - 6",
205
+ )
206
+ @click.option(
207
+ "-id",
208
+ "--id_field",
209
+ required=False,
210
+ default=const.DEFAULTS["id"],
211
+ type=str,
212
+ help="Field to use as an ID; defaults to a constructed single 0...n index on the original feature order.",
213
+ nargs=1,
214
+ )
215
+ @click.option(
216
+ "-k",
217
+ "--keep_attributes",
218
+ is_flag=True,
219
+ show_default=True,
220
+ default=const.DEFAULTS["k"],
221
+ help="Retain attributes in output. The default is to create an output that only includes S2 cell ID and the ID given by the -id field (or the default index ID).",
222
+ )
223
+ @click.option(
224
+ "-ch",
225
+ "--chunksize",
226
+ required=True,
227
+ type=int,
228
+ default=const.DEFAULTS["ch"],
229
+ help="The number of rows per index partition to use when spatially partioning. Adjusting this number will trade off memory use and time.",
230
+ nargs=1,
231
+ )
232
+ @click.option(
233
+ "-s",
234
+ "--spatial_sorting",
235
+ type=click.Choice(const.SPATIAL_SORTING_METHODS),
236
+ default=const.DEFAULTS["s"],
237
+ help="Spatial sorting method when perfoming spatial partitioning.",
238
+ )
239
+ @click.option(
240
+ "-crs",
241
+ "--cut_crs",
242
+ required=False,
243
+ default=const.DEFAULTS["crs"],
244
+ type=int,
245
+ help="Set the coordinate reference system (CRS) used for cutting large geometries (see `--cur-threshold`). Defaults to the same CRS as the input. Should be a valid EPSG code.",
246
+ nargs=1,
247
+ )
248
+ @click.option(
249
+ "-c",
250
+ "--cut_threshold",
251
+ required=True,
252
+ default=const.DEFAULTS["c"],
253
+ type=int,
254
+ help="Cutting up large geometries into smaller geometries based on a target length. Units are assumed to match the input CRS units unless the `--cut_crs` is also given, in which case units match the units of the supplied CRS.",
255
+ nargs=1,
256
+ )
257
+ @click.option(
258
+ "-t",
259
+ "--threads",
260
+ required=False,
261
+ default=const.DEFAULTS["t"],
262
+ type=int,
263
+ help="Amount of threads used for operation",
264
+ nargs=1,
265
+ )
266
+ @click.option(
267
+ "-lyr",
268
+ "--layer",
269
+ required=False,
270
+ default=const.DEFAULTS["lyr"],
271
+ type=str,
272
+ help="Name of the layer or table to read when using an input that supports layers or tables",
273
+ nargs=1,
274
+ )
275
+ @click.option(
276
+ "-g",
277
+ "--geom_col",
278
+ required=False,
279
+ default=const.DEFAULTS["g"],
280
+ type=str,
281
+ help="Column name to use when using a spatial database connection as input",
282
+ nargs=1,
283
+ )
284
+ @click.option(
285
+ "--tempdir",
286
+ default=const.DEFAULTS["tempdir"],
287
+ type=click.Path(),
288
+ help="Temporary data is created during the execution of this program. This parameter allows you to control where this data will be written.",
289
+ )
290
+ @click.option("-o", "--overwrite", is_flag=True)
291
+ @click.version_option(version=__version__)
292
+ def s2(
293
+ vector_input: Union[str, Path],
294
+ output_directory: Union[str, Path],
295
+ level: str,
296
+ parent_level: str,
297
+ id_field: str,
298
+ keep_attributes: bool,
299
+ chunksize: int,
300
+ spatial_sorting: str,
301
+ cut_crs: int,
302
+ cut_threshold: int,
303
+ threads: int,
304
+ layer: str,
305
+ geom_col: str,
306
+ tempdir: Union[str, Path],
307
+ overwrite: bool,
308
+ ):
309
+ """
310
+ Ingest a vector dataset and index it to the S2 DGGS.
311
+
312
+ VECTOR_INPUT is the path to input vector geospatial data.
313
+ OUTPUT_DIRECTORY should be a directory, not a file or database table, as it will instead be the write location for an Apache Parquet data store.
314
+ """
315
+ tempfile.tempdir = tempdir if tempdir is not None else tempfile.tempdir
316
+
317
+ common.check_resolutions(level, parent_level)
318
+
319
+ con, vector_input = common.db_conn_and_input_path(vector_input)
320
+ output_directory = common.resolve_output_path(output_directory, overwrite)
321
+
322
+ if cut_crs is not None:
323
+ cut_crs = pyproj.CRS.from_user_input(cut_crs)
324
+
325
+ try:
326
+ common.index(
327
+ "s2",
328
+ s2_polyfill,
329
+ s2_secondary_index,
330
+ vector_input,
331
+ output_directory,
332
+ int(level),
333
+ parent_level,
334
+ keep_attributes,
335
+ chunksize,
336
+ spatial_sorting,
337
+ cut_threshold,
338
+ threads,
339
+ cut_crs=cut_crs,
340
+ id_field=id_field,
341
+ con=con,
342
+ layer=layer,
343
+ geom_col=geom_col,
344
+ overwrite=overwrite,
345
+ )
346
+ except:
347
+ raise
348
+ else:
349
+ sys.exit(0)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vector2dggs
3
- Version: 0.6.3
3
+ Version: 0.9.0
4
4
  Summary: CLI DGGS indexer for vector geospatial data
5
5
  Home-page: https://github.com/manaakiwhenua/vector2dggs
6
6
  License: LGPL-3.0-or-later
@@ -29,8 +29,10 @@ Requires-Dist: pillow (>=11.2.1,<12.0.0)
29
29
  Requires-Dist: psycopg2 (>=2.9.9,<3.0.0)
30
30
  Requires-Dist: pyarrow (>=20.0,<21.0)
31
31
  Requires-Dist: pyproj (>=3.7,<4.0)
32
- Requires-Dist: rhealpixdggs (>=0.5.5,<0.6.0)
33
- Requires-Dist: rhppandas (>=0.1.2,<0.2.0)
32
+ Requires-Dist: python-geohash (>=0.8.5,<0.9.0)
33
+ Requires-Dist: rhppandas (>=0.2.0,<0.3.0)
34
+ Requires-Dist: rusty-polygon-geohasher (>=0.2.3,<0.3.0)
35
+ Requires-Dist: s2geometry (>=0.9.0,<0.10.0)
34
36
  Requires-Dist: shapely (>=2.1,<3.0)
35
37
  Requires-Dist: sqlalchemy (>=2.0.32,<3.0.0)
36
38
  Requires-Dist: tqdm (>=4.67,<5.0)
@@ -47,8 +49,13 @@ This is the vector equivalent of [raster2dggs](https://github.com/manaakiwhenua/
47
49
 
48
50
  Currently this tool supports the following DGGSs:
49
51
 
50
- - H3 (polygons, linestrings)
51
- - rHEALPix (polygons)
52
+ - [H3](https://h3geo.org/)
53
+ - [rHEALPix](https://datastore.landcareresearch.co.nz/dataset/rhealpix-discrete-global-grid-system)
54
+ - [S2](https://s2geometry.io/)
55
+
56
+ ... and the following geocode systems:
57
+
58
+ - [Geohash](https://en.wikipedia.org/wiki/Geohash) (points, polygons)
52
59
 
53
60
  Contributions (espeically for other DGGSs), suggestions, bug reports and strongly worded letters are all welcome.
54
61
 
@@ -63,7 +70,8 @@ pip install vector2dggs
63
70
  ## Usage
64
71
 
65
72
  ```bash
66
- vector2dggs --help [11:22:14]
73
+ vector2dggs --help
74
+
67
75
  Usage: vector2dggs [OPTIONS] COMMAND [ARGS]...
68
76
 
69
77
  Options:
@@ -71,8 +79,10 @@ Options:
71
79
  --help Show this message and exit.
72
80
 
73
81
  Commands:
74
- h3 Ingest a vector dataset and index it to the H3 DGGS.
75
- rhp Ingest a vector dataset and index it to the rHEALPix DGGS.
82
+ geohash Ingest a vector dataset and index it using the Geohash geocode...
83
+ h3 Ingest a vector dataset and index it to the H3 DGGS.
84
+ rhp Ingest a vector dataset and index it to the rHEALPix DGGS.
85
+ s2 Ingest a vector dataset and index it to the S2 DGGS.
76
86
  ```
77
87
 
78
88
  ```bash
@@ -119,8 +129,8 @@ Options:
119
129
  [default: 5000; required]
120
130
  -t, --threads INTEGER Amount of threads used for operation
121
131
  [default: 7]
122
- -tbl, --table TEXT Name of the table to read when using a
123
- spatial database connection as input
132
+ -lyr, --layer TEXT Name of the layer or table to read when using a
133
+ an input that supports layers or tables
124
134
  -g, --geom_col TEXT Column name to use when using a spatial
125
135
  database connection as input [default:
126
136
  geom]
@@ -137,9 +147,9 @@ Options:
137
147
 
138
148
  Output is in the Apache Parquet format, a directory with one file per partition.
139
149
 
140
- For a quick view of your output, you can read Apache Parquet with pandas, and then use h3-pandas and geopandas to convert this into a GeoPackage or GeoParquet for visualisation in a desktop GIS, such as QGIS. The Apache Parquet output is indexed by an ID column (which you can specify), so it should be ready for two intended use-cases:
150
+ For a quick view of your output, you can read Apache Parquet with pandas, and then use tools like h3-pandas and geopandas to convert this into a GeoPackage or GeoParquet for visualisation in a desktop GIS, such as QGIS. The Apache Parquet output is indexed by an ID column (which you can specify), so it should be ready for two intended use-cases:
141
151
  - Joining attribute data from the original feature-level data onto computer DGGS cells.
142
- - Joining other data to this output on the H3 cell ID. (The output has a column like `h3_\d{2}`, e.g. `h3_09` or `h3_12` according to the target resolution.)
152
+ - Joining other data to this output on the DGGS cell ID. (The output has a column like `{dggs}_\d`, e.g. `h3_09` or `h3_12` according to the target resolution, zero-padded to account for the maximum resolution of the DGGS)
143
153
 
144
154
  Geoparquet output (hexagon boundaries):
145
155
 
@@ -166,6 +176,34 @@ h3_12
166
176
  >>> g.to_parquet('./output-data/parcels.12.geo.parquet')
167
177
  ```
168
178
 
179
+ An example for S2 output (using `s2sphere`):
180
+
181
+
182
+ ```python
183
+ import pandas as pd
184
+ import geopandas as gpd
185
+ import s2sphere
186
+ from shapely.geometry import Polygon
187
+
188
+ RES = 18
189
+ df = pd.read_parquet(f'~/output-data/ponds-with-holes.s2.{RES}.pq')
190
+ df = df.reset_index()
191
+
192
+ def s2id_to_polygon(s2_id_hex):
193
+ cell_id = s2sphere.CellId.from_token(s2_id_hex)
194
+ cell = s2sphere.Cell(cell_id)
195
+ vertices = []
196
+ for i in range(4):
197
+ vertex = cell.get_vertex(i)
198
+ lat_lng = s2sphere.LatLng.from_point(vertex)
199
+ vertices.append((lat_lng.lng().degrees, lat_lng.lat().degrees)) # (lon, lat)
200
+ return Polygon(vertices)
201
+
202
+ df['geometry'] = df[f's2_{RES}'].apply(s2id_to_polygon)
203
+ df = gpd.GeoDataFrame(df, geometry='geometry', crs='EPSG:4326') # WGS84
204
+ df.to_parquet(f'sample-{RES}.parquet')
205
+ ```
206
+
169
207
  ### For development
170
208
 
171
209
  In brief, to get started:
@@ -175,6 +213,7 @@ In brief, to get started:
175
213
  - If you're on Windows, `pip install gdal` may be necessary before running the subsequent commands.
176
214
  - On Linux, install GDAL 3.8+ according to your platform-specific instructions, including development headers, i.e. `libgdal-dev`.
177
215
  - Create the virtual environment with `poetry init`. This will install necessary dependencies.
216
+ - If the installation of `s2geometry` fails, you may require SWIG to build it. (A command like `conda install swig` or `sudo dnf install swig` depending on your platform).
178
217
  - Subsequently, the virtual environment can be re-activated with `poetry shell`.
179
218
 
180
219
  If you run `poetry install`, the CLI tool will be aliased so you can simply use `vector2dggs` rather than `poetry run vector2dggs`, which is the alternative if you do not `poetry install`.
@@ -199,7 +238,7 @@ vector2dggs h3 -v DEBUG -id title_no -r 12 -o ~/Downloads/nz-property-titles.gpk
199
238
  With a PostgreSQL/PostGIS connection:
200
239
 
201
240
  ```bash
202
- vector2dggs h3 -v DEBUG -id ogc_fid -r 9 -p 5 -t 4 --overwrite -tbl topo50_lake postgresql://user:password@host:port/db ./topo50_lake.parquet
241
+ vector2dggs h3 -v DEBUG -id ogc_fid -r 9 -p 5 -t 4 --overwrite -lyr topo50_lake postgresql://user:password@host:port/db ./topo50_lake.parquet
203
242
  ```
204
243
 
205
244
  ## Citation
@@ -209,14 +248,14 @@ vector2dggs h3 -v DEBUG -id ogc_fid -r 9 -p 5 -t 4 --overwrite -tbl topo50_lake
209
248
  title={{vector2dggs}},
210
249
  author={Ardo, James and Law, Richard},
211
250
  url={https://github.com/manaakiwhenua/vector2dggs},
212
- version={0.6.3},
251
+ version={0.9.0},
213
252
  date={2023-04-20}
214
253
  }
215
254
  ```
216
255
 
217
256
  APA/Harvard
218
257
 
219
- > Ardo, J., & Law, R. (2023). vector2dggs (0.6.3) [Computer software]. https://github.com/manaakiwhenua/vector2dggs
258
+ > Ardo, J., & Law, R. (2023). vector2dggs (0.9.0) [Computer software]. https://github.com/manaakiwhenua/vector2dggs
220
259
 
221
260
  [![manaakiwhenua-standards](https://github.com/manaakiwhenua/vector2dggs/workflows/manaakiwhenua-standards/badge.svg)](https://github.com/manaakiwhenua/manaakiwhenua-standards)
222
261
 
@@ -0,0 +1,15 @@
1
+ vector2dggs/__init__.py,sha256=L8qKCe-XFylNfRXefZ1yGESlLF24qwQQ87szPZJO6Zg,27
2
+ vector2dggs/cli.py,sha256=d_4skD62k6pXUWgDdVHbDwpe4A4yo62ZFx8Cp_6GpBA,767
3
+ vector2dggs/common.py,sha256=l5koOX1Ps0v5D7MgzHtK1t99hXnGA7b6I82n2rBOldE,10496
4
+ vector2dggs/constants.py,sha256=_cj3Pf52gsXfWwvpsbekE8h1yD_1jS9xqzRg2mRCq3w,1759
5
+ vector2dggs/geohash.py,sha256=t90FlZRQCH8lmtTHe2kPMcLTIf1nrrf2j-m95xk4xPc,7534
6
+ vector2dggs/h3.py,sha256=Bu_4T1WIDuTv_tJWTS8BgPmHRiCozfUUh2CxBwk98Gw,6310
7
+ vector2dggs/katana.py,sha256=v4BRzVCsroC6RzIYdxLfrr9eFOdmXb5S9jXBMs5tgSo,3571
8
+ vector2dggs/rHP.py,sha256=tC4LvqRPMmgUd36BppkvYeq94pPBhO1vBDQ-aaiHUg4,6410
9
+ vector2dggs/s2.py,sha256=HEpFTEL4UaZLjybKZ_q06QFjPuQ48MDLeg_qGc0NMEw,10835
10
+ vector2dggs-0.9.0.dist-info/COPYING,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
11
+ vector2dggs-0.9.0.dist-info/COPYING.LESSER,sha256=46mU2C5kSwOnkqkw9XQAJlhBL2JAf1_uCD8lVcXyMRg,7652
12
+ vector2dggs-0.9.0.dist-info/METADATA,sha256=7y97ZXmDNqUQ-n8M-BgOE2XLG-pJ6f_aNGjzVlCUFzc,11534
13
+ vector2dggs-0.9.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
14
+ vector2dggs-0.9.0.dist-info/entry_points.txt,sha256=5h8LB9L2oOE5u_N7FRGtu4JDwa553iPs4u0XhcLeLZU,52
15
+ vector2dggs-0.9.0.dist-info/RECORD,,
@@ -1,13 +0,0 @@
1
- vector2dggs/__init__.py,sha256=75x2g9bnxuIHCJ-8fmoB5K2n_V2ayIxdRNvYsNUaMhs,27
2
- vector2dggs/cli.py,sha256=HoPp7Bwk2kZghAms6wNepx-bFhoAuHH7WXACMIy3MuM,652
3
- vector2dggs/common.py,sha256=DL3ohG-QQyI-phyxeO6Fi2BOwWnFct-I_Y87_XC2SRQ,10578
4
- vector2dggs/constants.py,sha256=u6n6XNvEVLUexn9Sb2rc22s2B4Rrg_VXFJaM7uEy-9Q,536
5
- vector2dggs/h3.py,sha256=GgiGOVbsXXNp95KWKKmJZvDxGFj91TTWl575OaPZ6yk,6145
6
- vector2dggs/katana.py,sha256=pgVWy032NkT5yilUO0d0IKH4NUvY7DJLjmfsxhBiF08,3407
7
- vector2dggs/rHP.py,sha256=Y36tPbtY-tYBUFILHD-xnUxa2yKlYotGP6043Bg5nZc,6450
8
- vector2dggs-0.6.3.dist-info/COPYING,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
9
- vector2dggs-0.6.3.dist-info/COPYING.LESSER,sha256=46mU2C5kSwOnkqkw9XQAJlhBL2JAf1_uCD8lVcXyMRg,7652
10
- vector2dggs-0.6.3.dist-info/METADATA,sha256=sFXXSXOutJzbTLRviHXXn7RkN51SCfbD6ZRBeledcmY,10223
11
- vector2dggs-0.6.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
12
- vector2dggs-0.6.3.dist-info/entry_points.txt,sha256=5h8LB9L2oOE5u_N7FRGtu4JDwa553iPs4u0XhcLeLZU,52
13
- vector2dggs-0.6.3.dist-info/RECORD,,