vector2dggs 0.6.0__tar.gz → 0.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vector2dggs
3
- Version: 0.6.0
3
+ Version: 0.6.1
4
4
  Summary: CLI DGGS indexer for vector geospatial data
5
5
  Home-page: https://github.com/manaakiwhenua/vector2dggs
6
6
  License: LGPL-3.0-or-later
@@ -9,27 +9,27 @@ Author: James Ardo
9
9
  Author-email: ardoj@landcareresearch.co.nz
10
10
  Maintainer: Richard Law
11
11
  Maintainer-email: lawr@landcareresearch.co.nz
12
- Requires-Python: >=3.10,<4.0
12
+ Requires-Python: >=3.11,<4.0
13
13
  Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)
14
14
  Classifier: Programming Language :: Python :: 3
15
- Classifier: Programming Language :: Python :: 3.10
16
15
  Classifier: Programming Language :: Python :: 3.11
17
16
  Classifier: Topic :: Scientific/Engineering
18
17
  Classifier: Topic :: Scientific/Engineering :: GIS
19
18
  Classifier: Topic :: Scientific/Engineering :: Information Analysis
20
- Requires-Dist: click (>=8.1.3,<9.0.0)
19
+ Requires-Dist: click (>=8.1.7,<9.0.0)
21
20
  Requires-Dist: click-log (>=0.4.0,<0.5.0)
22
- Requires-Dist: dask (>=2023.3.0,<2024.0.0)
23
- Requires-Dist: dask-geopandas (>=0.3.0,<0.4.0)
24
- Requires-Dist: gdal (>=3.8.0,<4.0.0)
25
- Requires-Dist: geopandas (>=0.12.2,<0.13.0)
21
+ Requires-Dist: dask (>=2024.8.0,<2025.0.0)
22
+ Requires-Dist: dask-geopandas (>=0.4.1,<0.5.0)
23
+ Requires-Dist: gdal (==3.8.4)
24
+ Requires-Dist: geopandas (>=1.0.1,<2.0.0)
26
25
  Requires-Dist: h3pandas (>=0.2.6,<0.3.0)
27
- Requires-Dist: psycopg2 (>=2.9.6,<3.0.0)
28
- Requires-Dist: pyarrow (>=14.0.1,<15.0.0)
29
- Requires-Dist: pygeos (>=0.13,<0.14)
30
- Requires-Dist: pyproj (>=3.5.0,<4.0.0)
31
- Requires-Dist: sqlalchemy (>=2.0.10,<3.0.0)
32
- Requires-Dist: tqdm (>=4.65.0,<5.0.0)
26
+ Requires-Dist: numpy (<2)
27
+ Requires-Dist: psycopg2 (>=2.9.9,<3.0.0)
28
+ Requires-Dist: pyarrow (>=17.0.0,<18.0.0)
29
+ Requires-Dist: pyproj (>=3.6.1,<4.0.0)
30
+ Requires-Dist: shapely (>=2.0.5,<3.0.0)
31
+ Requires-Dist: sqlalchemy (>=2.0.32,<3.0.0)
32
+ Requires-Dist: tqdm (>=4.66.5,<5.0.0)
33
33
  Project-URL: Repository, https://github.com/manaakiwhenua/vector2dggs
34
34
  Description-Content-Type: text/markdown
35
35
 
@@ -159,6 +159,8 @@ In brief, to get started:
159
159
 
160
160
  If you run `poetry install`, the CLI tool will be aliased so you can simply use `vector2dggs` rather than `poetry run vector2dggs`, which is the alternative if you do not `poetry install`.
161
161
 
162
+ Alternaively, it is also possible to isntall using pip with `pip install -e .`, and bypass Poetry.
163
+
162
164
  #### Code formatting
163
165
 
164
166
  [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
@@ -187,14 +189,14 @@ vector2dggs h3 -v DEBUG -id ogc_fid -r 9 -p 5 -t 4 --overwrite -tbl topo50_lake
187
189
  title={{vector2dggs}},
188
190
  author={Ardo, James and Law, Richard},
189
191
  url={https://github.com/manaakiwhenua/vector2dggs},
190
- version={0.6.0},
192
+ version={0.6.1},
191
193
  date={2023-04-20}
192
194
  }
193
195
  ```
194
196
 
195
197
  APA/Harvard
196
198
 
197
- > Ardo, J., & Law, R. (2023). vector2dggs (0.6.0) [Computer software]. https://github.com/manaakiwhenua/vector2dggs
199
+ > Ardo, J., & Law, R. (2023). vector2dggs (0.6.1) [Computer software]. https://github.com/manaakiwhenua/vector2dggs
198
200
 
199
201
  [![manaakiwhenua-standards](https://github.com/manaakiwhenua/vector2dggs/workflows/manaakiwhenua-standards/badge.svg)](https://github.com/manaakiwhenua/manaakiwhenua-standards)
200
202
 
@@ -124,6 +124,8 @@ In brief, to get started:
124
124
 
125
125
  If you run `poetry install`, the CLI tool will be aliased so you can simply use `vector2dggs` rather than `poetry run vector2dggs`, which is the alternative if you do not `poetry install`.
126
126
 
127
+ Alternaively, it is also possible to isntall using pip with `pip install -e .`, and bypass Poetry.
128
+
127
129
  #### Code formatting
128
130
 
129
131
  [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
@@ -152,13 +154,13 @@ vector2dggs h3 -v DEBUG -id ogc_fid -r 9 -p 5 -t 4 --overwrite -tbl topo50_lake
152
154
  title={{vector2dggs}},
153
155
  author={Ardo, James and Law, Richard},
154
156
  url={https://github.com/manaakiwhenua/vector2dggs},
155
- version={0.6.0},
157
+ version={0.6.1},
156
158
  date={2023-04-20}
157
159
  }
158
160
  ```
159
161
 
160
162
  APA/Harvard
161
163
 
162
- > Ardo, J., & Law, R. (2023). vector2dggs (0.6.0) [Computer software]. https://github.com/manaakiwhenua/vector2dggs
164
+ > Ardo, J., & Law, R. (2023). vector2dggs (0.6.1) [Computer software]. https://github.com/manaakiwhenua/vector2dggs
163
165
 
164
166
  [![manaakiwhenua-standards](https://github.com/manaakiwhenua/vector2dggs/workflows/manaakiwhenua-standards/badge.svg)](https://github.com/manaakiwhenua/manaakiwhenua-standards)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "vector2dggs"
3
- version = "0.6.0"
3
+ version = "0.6.1"
4
4
  description = "CLI DGGS indexer for vector geospatial data"
5
5
  authors = ["James Ardo <ardoj@landcareresearch.co.nz>"]
6
6
  maintainers = ["Richard Law <lawr@landcareresearch.co.nz>"]
@@ -15,20 +15,21 @@ classifiers = [
15
15
  ]
16
16
 
17
17
  [tool.poetry.dependencies]
18
- python = "^3.10"
19
- gdal = "^3.8.0"
20
- geopandas = "^0.12.2"
18
+ python = "^3.11"
19
+ gdal = "3.8.4"
20
+ geopandas = "^1.0.1"
21
21
  h3pandas = "^0.2.6"
22
- dask-geopandas = "^0.3.0"
23
- dask = "^2023.3.0"
24
- click = "^8.1.3"
25
- tqdm = "^4.65.0"
22
+ dask-geopandas = "^0.4.1"
23
+ dask = "^2024.8.0"
24
+ click = "^8.1.7"
25
+ tqdm = "^4.66.5"
26
26
  click-log = "^0.4.0"
27
- pyarrow = "^14.0.1"
28
- pygeos = "^0.13"
29
- pyproj = "^3.5.0"
30
- sqlalchemy = "^2.0.10"
31
- psycopg2 = "^2.9.6"
27
+ pyarrow = "^17.0.0"
28
+ pyproj = "^3.6.1"
29
+ sqlalchemy = "^2.0.32"
30
+ psycopg2 = "^2.9.9"
31
+ shapely = "^2.0.5"
32
+ numpy = "<2"
32
33
 
33
34
  [tool.poetry.group.dev.dependencies]
34
35
  pytest = "^7.2.2"
@@ -0,0 +1 @@
1
+ __version__: str = "0.6.1"
@@ -11,8 +11,6 @@ from typing import Union
11
11
  from urllib.parse import urlparse
12
12
  import warnings
13
13
 
14
- os.environ["USE_PYGEOS"] = "0"
15
-
16
14
  import click
17
15
  import click_log
18
16
  import dask.dataframe as dd
@@ -69,8 +67,8 @@ def polyfill(
69
67
  output_directory: str,
70
68
  ) -> None:
71
69
  """
72
- Reads a geoparquet, performs H3 polyfilling (for polygons),
73
- linetracing (for linestrings), and writes out to parquet.
70
+ Reads a geoparquet, performs H3 polyfilling (for Polygon),
71
+ linetracing (for LineString), and writes out to parquet.
74
72
  """
75
73
  df = gpd.read_parquet(pq_in).reset_index().drop(columns=[spatial_sort_col])
76
74
  if len(df.index) == 0:
@@ -215,7 +213,7 @@ def _index(
215
213
  # Remove all attributes except the geometry
216
214
  df = df.loc[:, ["geometry"]]
217
215
 
218
- LOGGER.info("Watch out for ninjas! (Cutting polygons)")
216
+ LOGGER.info("Cutting large geometries")
219
217
  with tqdm(total=df.shape[0]) as pbar:
220
218
  for index, row in df.iterrows():
221
219
  df.loc[index, "geometry"] = GeometryCollection(
@@ -242,7 +240,7 @@ def _index(
242
240
  (frame.geometry.geom_type != "Polygon")
243
241
  & (frame.geometry.geom_type != "LineString")
244
242
  ], # NB currently points and other types are lost; in principle, these could be indexed
245
- "message": "Dropping non-polygonal geometries",
243
+ "message": "Dropping unsupported geometries",
246
244
  },
247
245
  ]
248
246
  for condition in drop_conditions:
@@ -342,7 +340,7 @@ def _index(
342
340
  required=False,
343
341
  default=None,
344
342
  type=int,
345
- help="Set the coordinate reference system (CRS) used for cutting large polygons (see `--cur-threshold`). Defaults to the same CRS as the input. Should be a valid EPSG code.",
343
+ help="Set the coordinate reference system (CRS) used for cutting large geometries (see `--cur-threshold`). Defaults to the same CRS as the input. Should be a valid EPSG code.",
346
344
  nargs=1,
347
345
  )
348
346
  @click.option(
@@ -351,7 +349,7 @@ def _index(
351
349
  required=True,
352
350
  default=5000,
353
351
  type=int,
354
- help="Cutting up large polygons into smaller pieces based on a target length. Units are assumed to match the input CRS units unless the `--cut_crs` is also given, in which case units match the units of the supplied CRS.",
352
+ help="Cutting up large geometries into smaller geometries based on a target length. Units are assumed to match the input CRS units unless the `--cut_crs` is also given, in which case units match the units of the supplied CRS.",
355
353
  nargs=1,
356
354
  )
357
355
  @click.option(
@@ -11,13 +11,20 @@ Redistribution and use in source and binary forms, with or without modification,
11
11
  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
12
12
  """
13
13
 
14
- from shapely.geometry import box, Polygon, MultiPolygon, GeometryCollection
14
+ from shapely.geometry import (
15
+ box,
16
+ Polygon,
17
+ MultiPolygon,
18
+ LineString,
19
+ MultiLineString,
20
+ GeometryCollection,
21
+ )
15
22
  from shapely.validation import explain_validity, make_valid
16
23
 
17
24
 
18
25
  def katana(geometry, threshold, count=0) -> GeometryCollection:
19
26
  """
20
- Split a polygon into two parts across it's shortest dimension.
27
+ Split a geometry into two parts across its shortest dimension.
21
28
  Invalid input `geometry` will silently be made valid (if possible).
22
29
  """
23
30
  if geometry is None:
@@ -53,7 +60,7 @@ def katana(geometry, threshold, count=0) -> GeometryCollection:
53
60
  if not isinstance(c, GeometryCollection):
54
61
  c = GeometryCollection([c])
55
62
  for e in c.geoms:
56
- if isinstance(e, (Polygon, MultiPolygon)):
63
+ if isinstance(e, (Polygon, MultiPolygon, LineString, MultiLineString)):
57
64
  result.extend(katana(e, threshold, count + 1))
58
65
  if count > 0:
59
66
  return result
@@ -1,46 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- from setuptools import setup
3
-
4
- packages = \
5
- ['vector2dggs']
6
-
7
- package_data = \
8
- {'': ['*']}
9
-
10
- install_requires = \
11
- ['click-log>=0.4.0,<0.5.0',
12
- 'click>=8.1.3,<9.0.0',
13
- 'dask-geopandas>=0.3.0,<0.4.0',
14
- 'dask>=2023.3.0,<2024.0.0',
15
- 'gdal>=3.8.0,<4.0.0',
16
- 'geopandas>=0.12.2,<0.13.0',
17
- 'h3pandas>=0.2.6,<0.3.0',
18
- 'psycopg2>=2.9.6,<3.0.0',
19
- 'pyarrow>=14.0.1,<15.0.0',
20
- 'pygeos>=0.13,<0.14',
21
- 'pyproj>=3.5.0,<4.0.0',
22
- 'sqlalchemy>=2.0.10,<3.0.0',
23
- 'tqdm>=4.65.0,<5.0.0']
24
-
25
- entry_points = \
26
- {'console_scripts': ['vector2dggs = vector2dggs.cli:main']}
27
-
28
- setup_kwargs = {
29
- 'name': 'vector2dggs',
30
- 'version': '0.6.0',
31
- 'description': 'CLI DGGS indexer for vector geospatial data',
32
- 'long_description': '# vector2dggs\n\n[![pypi](https://img.shields.io/pypi/v/vector2dggs?label=vector2dggs)](https://pypi.org/project/vector2dggs/)\n\nPython-based CLI tool to index raster files to DGGS in parallel, writing out to Parquet.\n\nThis is the vector equivalent of [raster2dggs](https://github.com/manaakiwhenua/raster2dggs).\n\nCurrently only supports H3 DGGS, and probably has other limitations since it has been developed for a specific internal use case, though it is intended as a general-purpose abstraction. Contributions, suggestions, bug reports and strongly worded letters are all welcome.\n\nCurrently only supports polygons; but both coverages (strictly non-overlapping polygons), and sets of polygons that do/may overlap, are supported. Overlapping polygons are captured by ensuring that DGGS cell IDs may be non-unique (repeated) in the output.\n\n![Example use case for vector2dggs, showing parcels indexed to a high H3 resolution](./docs/imgs/vector2dggs-example.png "Example use case for vector2dggs, showing parcels indexed to a high H3 resolution")\n\n## Installation\n\n```bash\npip install vector2dggs\n```\n\n## Usage\n\n```bash\nvector2dggs h3 --help\nUsage: vector2dggs h3 [OPTIONS] VECTOR_INPUT OUTPUT_DIRECTORY\n\n Ingest a vector dataset and index it to the H3 DGGS.\n\n VECTOR_INPUT is the path to input vector geospatial data. OUTPUT_DIRECTORY\n should be a directory, not a file or database table, as it will instead be\n the write location for an Apache Parquet data store.\n\nOptions:\n -v, --verbosity LVL Either CRITICAL, ERROR, WARNING, INFO or\n DEBUG [default: INFO]\n -r, --resolution [0|1|2|3|4|5|6|7|8|9|10|11|12|13|14|15]\n H3 resolution to index [required]\n -pr, --parent_res [0|1|2|3|4|5|6|7|8|9|10|11|12|13|14|15]\n H3 Parent resolution for the output\n partition. Defaults to resolution - 6\n -id, --id_field TEXT Field to use as an ID; defaults to a\n constructed single 0...n index on the\n original feature order.\n -k, --keep_attributes Retain attributes in output. The default is\n to create an output that only includes H3\n cell ID and the ID given by the -id field\n (or the default index ID).\n -ch, --chunksize INTEGER The number of rows per index partition to\n use when spatially partioning. Adjusting\n this number will trade off memory use and\n time. [default: 50; required]\n -s, --spatial_sorting [hilbert|morton|geohash]\n Spatial sorting method when perfoming\n spatial partitioning. [default: hilbert]\n -crs, --cut_crs INTEGER Set the coordinate reference system (CRS)\n used for cutting large polygons (see `--cur-\n threshold`). Defaults to the same CRS as the\n input. Should be a valid EPSG code.\n -c, --cut_threshold INTEGER Cutting up large polygons into smaller\n pieces based on a target length. Units are\n assumed to match the input CRS units unless\n the `--cut_crs` is also given, in which case\n units match the units of the supplied CRS.\n [default: 5000; required]\n -t, --threads INTEGER Amount of threads used for operation\n [default: 7]\n -tbl, --table TEXT Name of the table to read when using a\n spatial database connection as input\n -g, --geom_col TEXT Column name to use when using a spatial\n database connection as input [default:\n geom]\n --tempdir PATH Temporary data is created during the\n execution of this program. This parameter\n allows you to control where this data will\n be written.\n -o, --overwrite\n --version Show the version and exit.\n --help Show this message and exit.\n```\n\n## Visualising output\n\nOutput is in the Apache Parquet format, a directory with one file per partition.\n\nFor a quick view of your output, you can read Apache Parquet with pandas, and then use h3-pandas and geopandas to convert this into a GeoPackage or GeoParquet for visualisation in a desktop GIS, such as QGIS. The Apache Parquet output is indexed by an ID column (which you can specify), so it should be ready for two intended use-cases:\n- Joining attribute data from the original feature-level data onto computer DGGS cells.\n- Joining other data to this output on the H3 cell ID. (The output has a column like `h3_\\d{2}`, e.g. `h3_09` or `h3_12` according to the target resolution.)\n\nGeoparquet output (hexagon boundaries):\n\n```python\n>>> import pandas as pd\n>>> import h3pandas\n>>> g = pd.read_parquet(\'./output-data/nz-property-titles.12.parquet\').h3.h3_to_geo_boundary()\n>>> g\n title_no geometry\nh3_12 \n8cbb53a734553ff NA94D/635 POLYGON ((174.28483 -35.69315, 174.28482 -35.6...\n8cbb53a734467ff NA94D/635 POLYGON ((174.28454 -35.69333, 174.28453 -35.6...\n8cbb53a734445ff NA94D/635 POLYGON ((174.28416 -35.69368, 174.28415 -35.6...\n8cbb53a734551ff NA94D/635 POLYGON ((174.28496 -35.69329, 174.28494 -35.6...\n8cbb53a734463ff NA94D/635 POLYGON ((174.28433 -35.69335, 174.28432 -35.6...\n... ... ...\n8cbb53a548b2dff NA62D/324 POLYGON ((174.30249 -35.69369, 174.30248 -35.6...\n8cbb53a548b61ff NA62D/324 POLYGON ((174.30232 -35.69402, 174.30231 -35.6...\n8cbb53a548b11ff NA57C/785 POLYGON ((174.30140 -35.69348, 174.30139 -35.6...\n8cbb53a548b15ff NA57C/785 POLYGON ((174.30161 -35.69346, 174.30160 -35.6...\n8cbb53a548b17ff NA57C/785 POLYGON ((174.30149 -35.69332, 174.30147 -35.6...\n\n[52736 rows x 2 columns]\n>>> g.to_parquet(\'./output-data/parcels.12.geo.parquet\')\n```\n\n### For development\n\nIn brief, to get started:\n\n- Install [Poetry](https://python-poetry.org/docs/basic-usage/)\n- Install [GDAL](https://gdal.org/)\n - If you\'re on Windows, `pip install gdal` may be necessary before running the subsequent commands.\n - On Linux, install GDAL 3.6+ according to your platform-specific instructions, including development headers, i.e. `libgdal-dev`.\n- Create the virtual environment with `poetry init`. This will install necessary dependencies.\n- Subsequently, the virtual environment can be re-activated with `poetry shell`.\n\nIf you run `poetry install`, the CLI tool will be aliased so you can simply use `vector2dggs` rather than `poetry run vector2dggs`, which is the alternative if you do not `poetry install`.\n\n#### Code formatting\n\n[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)\n\nPlease run `black .` before committing.\n\n## Example commands\n\nWith a local GPKG:\n\n```bash\nvector2dggs h3 -v DEBUG -id title_no -r 12 -o ~/Downloads/nz-property-titles.gpkg ~/Downloads/nz-property-titles.parquet\n\n```\n\nWith a PostgreSQL/PostGIS connection:\n\n```bash\nvector2dggs h3 -v DEBUG -id ogc_fid -r 9 -p 5 -t 4 --overwrite -tbl topo50_lake postgresql://user:password@host:port/db ./topo50_lake.parquet\n```\n\n## Citation\n\n```bibtex\n@software{vector2dggs,\n title={{vector2dggs}},\n author={Ardo, James and Law, Richard},\n url={https://github.com/manaakiwhenua/vector2dggs},\n version={0.6.0},\n date={2023-04-20}\n}\n```\n\nAPA/Harvard\n\n> Ardo, J., & Law, R. (2023). vector2dggs (0.6.0) [Computer software]. https://github.com/manaakiwhenua/vector2dggs\n\n[![manaakiwhenua-standards](https://github.com/manaakiwhenua/vector2dggs/workflows/manaakiwhenua-standards/badge.svg)](https://github.com/manaakiwhenua/manaakiwhenua-standards)\n',
33
- 'author': 'James Ardo',
34
- 'author_email': 'ardoj@landcareresearch.co.nz',
35
- 'maintainer': 'Richard Law',
36
- 'maintainer_email': 'lawr@landcareresearch.co.nz',
37
- 'url': 'https://github.com/manaakiwhenua/vector2dggs',
38
- 'packages': packages,
39
- 'package_data': package_data,
40
- 'install_requires': install_requires,
41
- 'entry_points': entry_points,
42
- 'python_requires': '>=3.10,<4.0',
43
- }
44
-
45
-
46
- setup(**setup_kwargs)
@@ -1 +0,0 @@
1
- __version__: str = "0.6.0"