vector2dggs 0.5.3__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vector2dggs/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__: str = "0.5.3"
1
+ __version__: str = "0.6.0"
vector2dggs/h3.py CHANGED
@@ -41,6 +41,7 @@ warnings.filterwarnings(
41
41
  DEFAULT_PARENT_OFFSET = 6
42
42
  DEFAULT_CHUNK_SIZE = 50
43
43
 
44
+
44
45
  class ParentResolutionException(Exception):
45
46
  pass
46
47
 
@@ -68,29 +69,45 @@ def polyfill(
68
69
  output_directory: str,
69
70
  ) -> None:
70
71
  """
71
- Reads a geoparquet, performs H3 polyfilling,
72
- and writes out to parquet.
72
+ Reads a geoparquet, performs H3 polyfilling (for polygons),
73
+ linetracing (for linestrings), and writes out to parquet.
73
74
  """
74
- df = (
75
- gpd.read_parquet(pq_in)
76
- .reset_index()
77
- .drop(columns=[spatial_sort_col])
78
- )
75
+ df = gpd.read_parquet(pq_in).reset_index().drop(columns=[spatial_sort_col])
79
76
  if len(df.index) == 0:
80
77
  # Input is empty, nothing to polyfill
81
78
  return None
82
- df = df.h3.polyfill_resample(resolution, return_geometry=False)
79
+
80
+ df_polygon = df[df.geom_type == "Polygon"]
81
+ if len(df_polygon.index) > 0:
82
+ df_polygon = df_polygon.h3.polyfill_resample(
83
+ resolution, return_geometry=False
84
+ ).drop(columns=["index"])
85
+
86
+ df_linestring = df[df.geom_type == "LineString"]
87
+ if len(df_linestring.index) > 0:
88
+ df_linestring = (
89
+ df_linestring.h3.linetrace(resolution)
90
+ .explode("h3_linetrace")
91
+ .set_index("h3_linetrace")
92
+ )
93
+ df_linestring = df_linestring[~df_linestring.index.duplicated(keep="first")]
94
+
95
+ df = pd.concat(
96
+ map(
97
+ lambda _df: pd.DataFrame(_df.drop(columns=[_df.geometry.name])),
98
+ [df_polygon, df_linestring],
99
+ )
100
+ )
101
+
83
102
  if len(df.index) == 0:
84
103
  # Polyfill resulted in empty output (e.g. large cell, small feature)
85
104
  return None
86
- df = pd.DataFrame(df).drop(columns=["index", "geometry"])
105
+
87
106
  df.index.rename(f"h3_{resolution:02}", inplace=True)
88
107
  parent_res: int = _get_parent_res(parent_res, resolution)
89
108
  # Secondary (parent) H3 index, used later for partitioning
90
109
  df.h3.h3_to_parent(parent_res).to_parquet(
91
- PurePath(output_directory, pq_in.name),
92
- engine="auto",
93
- compression="ZSTD"
110
+ PurePath(output_directory, pq_in.name), engine="auto", compression="ZSTD"
94
111
  )
95
112
  return None
96
113
 
@@ -102,7 +119,7 @@ def polyfill_star(args) -> None:
102
119
  def _parent_partitioning(
103
120
  input_dir: Path,
104
121
  output_dir: Path,
105
- resolution,
122
+ resolution: int,
106
123
  parent_res: Union[None, int],
107
124
  **kwargs,
108
125
  ) -> None:
@@ -129,6 +146,25 @@ def _parent_partitioning(
129
146
  return
130
147
 
131
148
 
149
+ def drop_condition(
150
+ df: pd.DataFrame,
151
+ drop_index: pd.Index,
152
+ log_statement: str,
153
+ warning_threshold: float = 0.01,
154
+ ):
155
+ LOGGER.info(log_statement)
156
+ _before = len(df)
157
+ df = df.drop(drop_index)
158
+ _after = len(df)
159
+ _diff = _before - _after
160
+ if _diff:
161
+ log_method = (
162
+ LOGGER.info if (_diff / float(_before)) < warning_threshold else LOGGER.warn
163
+ )
164
+ log_method(f"Dropped {_diff} rows ({_diff/float(_before)*100:.2f}%)")
165
+ return df
166
+
167
+
132
168
  def _index(
133
169
  input_file: Union[Path, str],
134
170
  output_directory: Union[Path, str],
@@ -193,11 +229,24 @@ def _index(
193
229
  .explode(index_parts=False) # Explode from GeometryCollection
194
230
  .explode(index_parts=False) # Explode multipolygons to polygons
195
231
  ).reset_index()
196
- LOGGER.info("Dropping empty or null geometries")
197
- df = (
198
- df.drop(df[(df.geometry.is_empty|df.geometry.isna())].index)
199
- .reset_index()
200
- )
232
+
233
+ drop_conditions = [
234
+ {
235
+ "index": lambda frame: frame[
236
+ (frame.geometry.is_empty | frame.geometry.isna())
237
+ ],
238
+ "message": "Dropping empty or null geometries",
239
+ },
240
+ {
241
+ "index": lambda frame: frame[
242
+ (frame.geometry.geom_type != "Polygon")
243
+ & (frame.geometry.geom_type != "LineString")
244
+ ], # NB currently points and other types are lost; in principle, these could be indexed
245
+ "message": "Dropping non-polygonal geometries",
246
+ },
247
+ ]
248
+ for condition in drop_conditions:
249
+ df = drop_condition(df, condition["index"](df).index, condition["message"])
201
250
 
202
251
  ddf = dgpd.from_geopandas(df, chunksize=max(1, chunksize), sort=True)
203
252
 
@@ -336,7 +385,7 @@ def _index(
336
385
  "--tempdir",
337
386
  default=tempfile.tempdir,
338
387
  type=click.Path(),
339
- help="Temporary data is created during the execution of this program. This parameter allows you to control where this data will be written."
388
+ help="Temporary data is created during the execution of this program. This parameter allows you to control where this data will be written.",
340
389
  )
341
390
  @click.option("-o", "--overwrite", is_flag=True)
342
391
  @click.version_option(version=__version__)
vector2dggs/katana.py CHANGED
@@ -26,7 +26,7 @@ def katana(geometry, threshold, count=0) -> GeometryCollection:
26
26
  if not geometry.is_valid:
27
27
  # print(explain_validity(geometry))
28
28
  geometry = make_valid(geometry)
29
- if geometry.type == 'GeometryCollection':
29
+ if geometry.type == "GeometryCollection":
30
30
  geometry.normalize()
31
31
  geometry = geometry.buffer(0)
32
32
  bounds = geometry.bounds
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vector2dggs
3
- Version: 0.5.3
3
+ Version: 0.6.0
4
4
  Summary: CLI DGGS indexer for vector geospatial data
5
5
  Home-page: https://github.com/manaakiwhenua/vector2dggs
6
6
  License: LGPL-3.0-or-later
@@ -21,12 +21,12 @@ Requires-Dist: click (>=8.1.3,<9.0.0)
21
21
  Requires-Dist: click-log (>=0.4.0,<0.5.0)
22
22
  Requires-Dist: dask (>=2023.3.0,<2024.0.0)
23
23
  Requires-Dist: dask-geopandas (>=0.3.0,<0.4.0)
24
- Requires-Dist: gdal (>=3.6.4,<4.0.0)
24
+ Requires-Dist: gdal (>=3.8.0,<4.0.0)
25
25
  Requires-Dist: geopandas (>=0.12.2,<0.13.0)
26
- Requires-Dist: h3pandas (>=0.2.3,<0.3.0)
26
+ Requires-Dist: h3pandas (>=0.2.6,<0.3.0)
27
27
  Requires-Dist: psycopg2 (>=2.9.6,<3.0.0)
28
- Requires-Dist: pyarrow (>=11.0.0,<12.0.0)
29
- Requires-Dist: pygeos (>=0.14,<0.15)
28
+ Requires-Dist: pyarrow (>=14.0.1,<15.0.0)
29
+ Requires-Dist: pygeos (>=0.13,<0.14)
30
30
  Requires-Dist: pyproj (>=3.5.0,<4.0.0)
31
31
  Requires-Dist: sqlalchemy (>=2.0.10,<3.0.0)
32
32
  Requires-Dist: tqdm (>=4.65.0,<5.0.0)
@@ -187,14 +187,14 @@ vector2dggs h3 -v DEBUG -id ogc_fid -r 9 -p 5 -t 4 --overwrite -tbl topo50_lake
187
187
  title={{vector2dggs}},
188
188
  author={Ardo, James and Law, Richard},
189
189
  url={https://github.com/manaakiwhenua/vector2dggs},
190
- version={0.5.3},
190
+ version={0.6.0},
191
191
  date={2023-04-20}
192
192
  }
193
193
  ```
194
194
 
195
195
  APA/Harvard
196
196
 
197
- > Ardo, J., & Law, R. (2023). vector2dggs (0.5.3) [Computer software]. https://github.com/manaakiwhenua/vector2dggs
197
+ > Ardo, J., & Law, R. (2023). vector2dggs (0.6.0) [Computer software]. https://github.com/manaakiwhenua/vector2dggs
198
198
 
199
199
  [![manaakiwhenua-standards](https://github.com/manaakiwhenua/vector2dggs/workflows/manaakiwhenua-standards/badge.svg)](https://github.com/manaakiwhenua/manaakiwhenua-standards)
200
200
 
@@ -0,0 +1,10 @@
1
+ vector2dggs/__init__.py,sha256=8xTECrwGH36hEfgoQ2Zcq4dfigWVZmIFK3OHqNOg-FQ,27
2
+ vector2dggs/cli.py,sha256=tL4NJ99uQsqoVinwYadna1a4ko5v2sdZaFaeDAj6QNE,599
3
+ vector2dggs/h3.py,sha256=kX3S630l-LOm04pe5YT-5g99DIV0t32GFYUEs0Hc5ZQ,14354
4
+ vector2dggs/katana.py,sha256=xx5R9lDuraWLK5bfGkiDQDC2r2naj_sKZlYeB52_xwc,3320
5
+ vector2dggs-0.6.0.dist-info/entry_points.txt,sha256=5h8LB9L2oOE5u_N7FRGtu4JDwa553iPs4u0XhcLeLZU,52
6
+ vector2dggs-0.6.0.dist-info/WHEEL,sha256=vVCvjcmxuUltf8cYhJ0sJMRDLr1XsPuxEId8YDzbyCY,88
7
+ vector2dggs-0.6.0.dist-info/METADATA,sha256=d8dvx5_wXFO-ZMKcdXmc2TuXQv5B5UFRo1hq4fghe8w,9777
8
+ vector2dggs-0.6.0.dist-info/COPYING.LESSER,sha256=46mU2C5kSwOnkqkw9XQAJlhBL2JAf1_uCD8lVcXyMRg,7652
9
+ vector2dggs-0.6.0.dist-info/COPYING,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
10
+ vector2dggs-0.6.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.6.1
2
+ Generator: poetry-core 1.4.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,10 +0,0 @@
1
- vector2dggs/__init__.py,sha256=ricvdCMTDRNJiioloPx-XsgIV3l9wyNaFT5PqZCvlLI,27
2
- vector2dggs/cli.py,sha256=tL4NJ99uQsqoVinwYadna1a4ko5v2sdZaFaeDAj6QNE,599
3
- vector2dggs/h3.py,sha256=iEZGDaLYfblXXvmjWWhveQiaIxZ1xYvXx0phYBFgkLE,12726
4
- vector2dggs/katana.py,sha256=rxmNvXTdfvFs7Q17BNGPin_5H-jSLWGt63RKrEgYGOo,3320
5
- vector2dggs-0.5.3.dist-info/COPYING,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
6
- vector2dggs-0.5.3.dist-info/COPYING.LESSER,sha256=46mU2C5kSwOnkqkw9XQAJlhBL2JAf1_uCD8lVcXyMRg,7652
7
- vector2dggs-0.5.3.dist-info/METADATA,sha256=wyof6tDZ7wY9-dJQwkaY0V6f5Btfu3FGT8we50mtgIU,9777
8
- vector2dggs-0.5.3.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
9
- vector2dggs-0.5.3.dist-info/entry_points.txt,sha256=5h8LB9L2oOE5u_N7FRGtu4JDwa553iPs4u0XhcLeLZU,52
10
- vector2dggs-0.5.3.dist-info/RECORD,,