vector2dggs 0.5.4__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vector2dggs-0.5.4 → vector2dggs-0.6.0}/PKG-INFO +7 -7
- {vector2dggs-0.5.4 → vector2dggs-0.6.0}/README.md +2 -2
- {vector2dggs-0.5.4 → vector2dggs-0.6.0}/pyproject.toml +5 -5
- vector2dggs-0.6.0/setup.py +46 -0
- vector2dggs-0.6.0/vector2dggs/__init__.py +1 -0
- {vector2dggs-0.5.4 → vector2dggs-0.6.0}/vector2dggs/h3.py +28 -5
- vector2dggs-0.5.4/vector2dggs/__init__.py +0 -1
- {vector2dggs-0.5.4 → vector2dggs-0.6.0}/vector2dggs/cli.py +0 -0
- {vector2dggs-0.5.4 → vector2dggs-0.6.0}/vector2dggs/katana.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vector2dggs
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.6.0
|
4
4
|
Summary: CLI DGGS indexer for vector geospatial data
|
5
5
|
Home-page: https://github.com/manaakiwhenua/vector2dggs
|
6
6
|
License: LGPL-3.0-or-later
|
@@ -21,12 +21,12 @@ Requires-Dist: click (>=8.1.3,<9.0.0)
|
|
21
21
|
Requires-Dist: click-log (>=0.4.0,<0.5.0)
|
22
22
|
Requires-Dist: dask (>=2023.3.0,<2024.0.0)
|
23
23
|
Requires-Dist: dask-geopandas (>=0.3.0,<0.4.0)
|
24
|
-
Requires-Dist: gdal (>=3.
|
24
|
+
Requires-Dist: gdal (>=3.8.0,<4.0.0)
|
25
25
|
Requires-Dist: geopandas (>=0.12.2,<0.13.0)
|
26
|
-
Requires-Dist: h3pandas (>=0.2.
|
26
|
+
Requires-Dist: h3pandas (>=0.2.6,<0.3.0)
|
27
27
|
Requires-Dist: psycopg2 (>=2.9.6,<3.0.0)
|
28
|
-
Requires-Dist: pyarrow (>=
|
29
|
-
Requires-Dist: pygeos (>=0.
|
28
|
+
Requires-Dist: pyarrow (>=14.0.1,<15.0.0)
|
29
|
+
Requires-Dist: pygeos (>=0.13,<0.14)
|
30
30
|
Requires-Dist: pyproj (>=3.5.0,<4.0.0)
|
31
31
|
Requires-Dist: sqlalchemy (>=2.0.10,<3.0.0)
|
32
32
|
Requires-Dist: tqdm (>=4.65.0,<5.0.0)
|
@@ -187,14 +187,14 @@ vector2dggs h3 -v DEBUG -id ogc_fid -r 9 -p 5 -t 4 --overwrite -tbl topo50_lake
|
|
187
187
|
title={{vector2dggs}},
|
188
188
|
author={Ardo, James and Law, Richard},
|
189
189
|
url={https://github.com/manaakiwhenua/vector2dggs},
|
190
|
-
version={0.
|
190
|
+
version={0.6.0},
|
191
191
|
date={2023-04-20}
|
192
192
|
}
|
193
193
|
```
|
194
194
|
|
195
195
|
APA/Harvard
|
196
196
|
|
197
|
-
> Ardo, J., & Law, R. (2023). vector2dggs (0.
|
197
|
+
> Ardo, J., & Law, R. (2023). vector2dggs (0.6.0) [Computer software]. https://github.com/manaakiwhenua/vector2dggs
|
198
198
|
|
199
199
|
[](https://github.com/manaakiwhenua/manaakiwhenua-standards)
|
200
200
|
|
@@ -152,13 +152,13 @@ vector2dggs h3 -v DEBUG -id ogc_fid -r 9 -p 5 -t 4 --overwrite -tbl topo50_lake
|
|
152
152
|
title={{vector2dggs}},
|
153
153
|
author={Ardo, James and Law, Richard},
|
154
154
|
url={https://github.com/manaakiwhenua/vector2dggs},
|
155
|
-
version={0.
|
155
|
+
version={0.6.0},
|
156
156
|
date={2023-04-20}
|
157
157
|
}
|
158
158
|
```
|
159
159
|
|
160
160
|
APA/Harvard
|
161
161
|
|
162
|
-
> Ardo, J., & Law, R. (2023). vector2dggs (0.
|
162
|
+
> Ardo, J., & Law, R. (2023). vector2dggs (0.6.0) [Computer software]. https://github.com/manaakiwhenua/vector2dggs
|
163
163
|
|
164
164
|
[](https://github.com/manaakiwhenua/manaakiwhenua-standards)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "vector2dggs"
|
3
|
-
version = "0.
|
3
|
+
version = "0.6.0"
|
4
4
|
description = "CLI DGGS indexer for vector geospatial data"
|
5
5
|
authors = ["James Ardo <ardoj@landcareresearch.co.nz>"]
|
6
6
|
maintainers = ["Richard Law <lawr@landcareresearch.co.nz>"]
|
@@ -16,16 +16,16 @@ classifiers = [
|
|
16
16
|
|
17
17
|
[tool.poetry.dependencies]
|
18
18
|
python = "^3.10"
|
19
|
-
gdal = "^3.
|
19
|
+
gdal = "^3.8.0"
|
20
20
|
geopandas = "^0.12.2"
|
21
|
-
h3pandas = "^0.2.
|
21
|
+
h3pandas = "^0.2.6"
|
22
22
|
dask-geopandas = "^0.3.0"
|
23
23
|
dask = "^2023.3.0"
|
24
24
|
click = "^8.1.3"
|
25
25
|
tqdm = "^4.65.0"
|
26
26
|
click-log = "^0.4.0"
|
27
|
-
pyarrow = "^
|
28
|
-
pygeos = "^0.
|
27
|
+
pyarrow = "^14.0.1"
|
28
|
+
pygeos = "^0.13"
|
29
29
|
pyproj = "^3.5.0"
|
30
30
|
sqlalchemy = "^2.0.10"
|
31
31
|
psycopg2 = "^2.9.6"
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
from setuptools import setup
|
3
|
+
|
4
|
+
packages = \
|
5
|
+
['vector2dggs']
|
6
|
+
|
7
|
+
package_data = \
|
8
|
+
{'': ['*']}
|
9
|
+
|
10
|
+
install_requires = \
|
11
|
+
['click-log>=0.4.0,<0.5.0',
|
12
|
+
'click>=8.1.3,<9.0.0',
|
13
|
+
'dask-geopandas>=0.3.0,<0.4.0',
|
14
|
+
'dask>=2023.3.0,<2024.0.0',
|
15
|
+
'gdal>=3.8.0,<4.0.0',
|
16
|
+
'geopandas>=0.12.2,<0.13.0',
|
17
|
+
'h3pandas>=0.2.6,<0.3.0',
|
18
|
+
'psycopg2>=2.9.6,<3.0.0',
|
19
|
+
'pyarrow>=14.0.1,<15.0.0',
|
20
|
+
'pygeos>=0.13,<0.14',
|
21
|
+
'pyproj>=3.5.0,<4.0.0',
|
22
|
+
'sqlalchemy>=2.0.10,<3.0.0',
|
23
|
+
'tqdm>=4.65.0,<5.0.0']
|
24
|
+
|
25
|
+
entry_points = \
|
26
|
+
{'console_scripts': ['vector2dggs = vector2dggs.cli:main']}
|
27
|
+
|
28
|
+
setup_kwargs = {
|
29
|
+
'name': 'vector2dggs',
|
30
|
+
'version': '0.6.0',
|
31
|
+
'description': 'CLI DGGS indexer for vector geospatial data',
|
32
|
+
'long_description': '# vector2dggs\n\n[](https://pypi.org/project/vector2dggs/)\n\nPython-based CLI tool to index raster files to DGGS in parallel, writing out to Parquet.\n\nThis is the vector equivalent of [raster2dggs](https://github.com/manaakiwhenua/raster2dggs).\n\nCurrently only supports H3 DGGS, and probably has other limitations since it has been developed for a specific internal use case, though it is intended as a general-purpose abstraction. Contributions, suggestions, bug reports and strongly worded letters are all welcome.\n\nCurrently only supports polygons; but both coverages (strictly non-overlapping polygons), and sets of polygons that do/may overlap, are supported. Overlapping polygons are captured by ensuring that DGGS cell IDs may be non-unique (repeated) in the output.\n\n\n\n## Installation\n\n```bash\npip install vector2dggs\n```\n\n## Usage\n\n```bash\nvector2dggs h3 --help\nUsage: vector2dggs h3 [OPTIONS] VECTOR_INPUT OUTPUT_DIRECTORY\n\n Ingest a vector dataset and index it to the H3 DGGS.\n\n VECTOR_INPUT is the path to input vector geospatial data. OUTPUT_DIRECTORY\n should be a directory, not a file or database table, as it will instead be\n the write location for an Apache Parquet data store.\n\nOptions:\n -v, --verbosity LVL Either CRITICAL, ERROR, WARNING, INFO or\n DEBUG [default: INFO]\n -r, --resolution [0|1|2|3|4|5|6|7|8|9|10|11|12|13|14|15]\n H3 resolution to index [required]\n -pr, --parent_res [0|1|2|3|4|5|6|7|8|9|10|11|12|13|14|15]\n H3 Parent resolution for the output\n partition. Defaults to resolution - 6\n -id, --id_field TEXT Field to use as an ID; defaults to a\n constructed single 0...n index on the\n original feature order.\n -k, --keep_attributes Retain attributes in output. The default is\n to create an output that only includes H3\n cell ID and the ID given by the -id field\n (or the default index ID).\n -ch, --chunksize INTEGER The number of rows per index partition to\n use when spatially partioning. Adjusting\n this number will trade off memory use and\n time. [default: 50; required]\n -s, --spatial_sorting [hilbert|morton|geohash]\n Spatial sorting method when perfoming\n spatial partitioning. [default: hilbert]\n -crs, --cut_crs INTEGER Set the coordinate reference system (CRS)\n used for cutting large polygons (see `--cur-\n threshold`). Defaults to the same CRS as the\n input. Should be a valid EPSG code.\n -c, --cut_threshold INTEGER Cutting up large polygons into smaller\n pieces based on a target length. Units are\n assumed to match the input CRS units unless\n the `--cut_crs` is also given, in which case\n units match the units of the supplied CRS.\n [default: 5000; required]\n -t, --threads INTEGER Amount of threads used for operation\n [default: 7]\n -tbl, --table TEXT Name of the table to read when using a\n spatial database connection as input\n -g, --geom_col TEXT Column name to use when using a spatial\n database connection as input [default:\n geom]\n --tempdir PATH Temporary data is created during the\n execution of this program. This parameter\n allows you to control where this data will\n be written.\n -o, --overwrite\n --version Show the version and exit.\n --help Show this message and exit.\n```\n\n## Visualising output\n\nOutput is in the Apache Parquet format, a directory with one file per partition.\n\nFor a quick view of your output, you can read Apache Parquet with pandas, and then use h3-pandas and geopandas to convert this into a GeoPackage or GeoParquet for visualisation in a desktop GIS, such as QGIS. The Apache Parquet output is indexed by an ID column (which you can specify), so it should be ready for two intended use-cases:\n- Joining attribute data from the original feature-level data onto computer DGGS cells.\n- Joining other data to this output on the H3 cell ID. (The output has a column like `h3_\\d{2}`, e.g. `h3_09` or `h3_12` according to the target resolution.)\n\nGeoparquet output (hexagon boundaries):\n\n```python\n>>> import pandas as pd\n>>> import h3pandas\n>>> g = pd.read_parquet(\'./output-data/nz-property-titles.12.parquet\').h3.h3_to_geo_boundary()\n>>> g\n title_no geometry\nh3_12 \n8cbb53a734553ff NA94D/635 POLYGON ((174.28483 -35.69315, 174.28482 -35.6...\n8cbb53a734467ff NA94D/635 POLYGON ((174.28454 -35.69333, 174.28453 -35.6...\n8cbb53a734445ff NA94D/635 POLYGON ((174.28416 -35.69368, 174.28415 -35.6...\n8cbb53a734551ff NA94D/635 POLYGON ((174.28496 -35.69329, 174.28494 -35.6...\n8cbb53a734463ff NA94D/635 POLYGON ((174.28433 -35.69335, 174.28432 -35.6...\n... ... ...\n8cbb53a548b2dff NA62D/324 POLYGON ((174.30249 -35.69369, 174.30248 -35.6...\n8cbb53a548b61ff NA62D/324 POLYGON ((174.30232 -35.69402, 174.30231 -35.6...\n8cbb53a548b11ff NA57C/785 POLYGON ((174.30140 -35.69348, 174.30139 -35.6...\n8cbb53a548b15ff NA57C/785 POLYGON ((174.30161 -35.69346, 174.30160 -35.6...\n8cbb53a548b17ff NA57C/785 POLYGON ((174.30149 -35.69332, 174.30147 -35.6...\n\n[52736 rows x 2 columns]\n>>> g.to_parquet(\'./output-data/parcels.12.geo.parquet\')\n```\n\n### For development\n\nIn brief, to get started:\n\n- Install [Poetry](https://python-poetry.org/docs/basic-usage/)\n- Install [GDAL](https://gdal.org/)\n - If you\'re on Windows, `pip install gdal` may be necessary before running the subsequent commands.\n - On Linux, install GDAL 3.6+ according to your platform-specific instructions, including development headers, i.e. `libgdal-dev`.\n- Create the virtual environment with `poetry init`. This will install necessary dependencies.\n- Subsequently, the virtual environment can be re-activated with `poetry shell`.\n\nIf you run `poetry install`, the CLI tool will be aliased so you can simply use `vector2dggs` rather than `poetry run vector2dggs`, which is the alternative if you do not `poetry install`.\n\n#### Code formatting\n\n[](https://github.com/psf/black)\n\nPlease run `black .` before committing.\n\n## Example commands\n\nWith a local GPKG:\n\n```bash\nvector2dggs h3 -v DEBUG -id title_no -r 12 -o ~/Downloads/nz-property-titles.gpkg ~/Downloads/nz-property-titles.parquet\n\n```\n\nWith a PostgreSQL/PostGIS connection:\n\n```bash\nvector2dggs h3 -v DEBUG -id ogc_fid -r 9 -p 5 -t 4 --overwrite -tbl topo50_lake postgresql://user:password@host:port/db ./topo50_lake.parquet\n```\n\n## Citation\n\n```bibtex\n@software{vector2dggs,\n title={{vector2dggs}},\n author={Ardo, James and Law, Richard},\n url={https://github.com/manaakiwhenua/vector2dggs},\n version={0.6.0},\n date={2023-04-20}\n}\n```\n\nAPA/Harvard\n\n> Ardo, J., & Law, R. (2023). vector2dggs (0.6.0) [Computer software]. https://github.com/manaakiwhenua/vector2dggs\n\n[](https://github.com/manaakiwhenua/manaakiwhenua-standards)\n',
|
33
|
+
'author': 'James Ardo',
|
34
|
+
'author_email': 'ardoj@landcareresearch.co.nz',
|
35
|
+
'maintainer': 'Richard Law',
|
36
|
+
'maintainer_email': 'lawr@landcareresearch.co.nz',
|
37
|
+
'url': 'https://github.com/manaakiwhenua/vector2dggs',
|
38
|
+
'packages': packages,
|
39
|
+
'package_data': package_data,
|
40
|
+
'install_requires': install_requires,
|
41
|
+
'entry_points': entry_points,
|
42
|
+
'python_requires': '>=3.10,<4.0',
|
43
|
+
}
|
44
|
+
|
45
|
+
|
46
|
+
setup(**setup_kwargs)
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__: str = "0.6.0"
|
@@ -69,18 +69,40 @@ def polyfill(
|
|
69
69
|
output_directory: str,
|
70
70
|
) -> None:
|
71
71
|
"""
|
72
|
-
Reads a geoparquet, performs H3 polyfilling,
|
73
|
-
and writes out to parquet.
|
72
|
+
Reads a geoparquet, performs H3 polyfilling (for polygons),
|
73
|
+
linetracing (for linestrings), and writes out to parquet.
|
74
74
|
"""
|
75
75
|
df = gpd.read_parquet(pq_in).reset_index().drop(columns=[spatial_sort_col])
|
76
76
|
if len(df.index) == 0:
|
77
77
|
# Input is empty, nothing to polyfill
|
78
78
|
return None
|
79
|
-
|
79
|
+
|
80
|
+
df_polygon = df[df.geom_type == "Polygon"]
|
81
|
+
if len(df_polygon.index) > 0:
|
82
|
+
df_polygon = df_polygon.h3.polyfill_resample(
|
83
|
+
resolution, return_geometry=False
|
84
|
+
).drop(columns=["index"])
|
85
|
+
|
86
|
+
df_linestring = df[df.geom_type == "LineString"]
|
87
|
+
if len(df_linestring.index) > 0:
|
88
|
+
df_linestring = (
|
89
|
+
df_linestring.h3.linetrace(resolution)
|
90
|
+
.explode("h3_linetrace")
|
91
|
+
.set_index("h3_linetrace")
|
92
|
+
)
|
93
|
+
df_linestring = df_linestring[~df_linestring.index.duplicated(keep="first")]
|
94
|
+
|
95
|
+
df = pd.concat(
|
96
|
+
map(
|
97
|
+
lambda _df: pd.DataFrame(_df.drop(columns=[_df.geometry.name])),
|
98
|
+
[df_polygon, df_linestring],
|
99
|
+
)
|
100
|
+
)
|
101
|
+
|
80
102
|
if len(df.index) == 0:
|
81
103
|
# Polyfill resulted in empty output (e.g. large cell, small feature)
|
82
104
|
return None
|
83
|
-
|
105
|
+
|
84
106
|
df.index.rename(f"h3_{resolution:02}", inplace=True)
|
85
107
|
parent_res: int = _get_parent_res(parent_res, resolution)
|
86
108
|
# Secondary (parent) H3 index, used later for partitioning
|
@@ -218,7 +240,8 @@ def _index(
|
|
218
240
|
{
|
219
241
|
"index": lambda frame: frame[
|
220
242
|
(frame.geometry.geom_type != "Polygon")
|
221
|
-
|
243
|
+
& (frame.geometry.geom_type != "LineString")
|
244
|
+
], # NB currently points and other types are lost; in principle, these could be indexed
|
222
245
|
"message": "Dropping non-polygonal geometries",
|
223
246
|
},
|
224
247
|
]
|
@@ -1 +0,0 @@
|
|
1
|
-
__version__: str = "0.5.4"
|
File without changes
|
File without changes
|