ssb-sgis 1.1.16__tar.gz → 1.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/PKG-INFO +1 -1
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/pyproject.toml +1 -1
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/__init__.py +4 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/conf.py +56 -4
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/geopandas_tools/buffer_dissolve_explode.py +24 -47
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/geopandas_tools/conversion.py +18 -25
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/geopandas_tools/duplicates.py +47 -60
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/geopandas_tools/general.py +8 -84
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/geopandas_tools/overlay.py +190 -260
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/geopandas_tools/polygon_operations.py +67 -88
- ssb_sgis-1.2.0/src/sgis/geopandas_tools/runners.py +277 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/geopandas_tools/sfilter.py +40 -24
- ssb_sgis-1.2.0/src/sgis/geopandas_tools/utils.py +37 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/helpers.py +1 -1
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/io/dapla_functions.py +5 -7
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/maps/map.py +3 -1
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/parallel/parallel.py +32 -24
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/raster/image_collection.py +184 -162
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/raster/indices.py +0 -1
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/LICENSE +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/README.md +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/debug_config.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/exceptions.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/geopandas_tools/__init__.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/geopandas_tools/bounds.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/geopandas_tools/centerlines.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/geopandas_tools/cleaning.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/geopandas_tools/geocoding.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/geopandas_tools/geometry_types.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/geopandas_tools/neighbors.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/geopandas_tools/point_operations.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/geopandas_tools/polygons_as_rings.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/io/__init__.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/io/_is_dapla.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/io/opener.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/io/read_parquet.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/maps/__init__.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/maps/examine.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/maps/explore.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/maps/httpserver.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/maps/legend.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/maps/maps.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/maps/norge_i_bilder.json +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/maps/thematicmap.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/maps/tilesources.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/maps/wms.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/networkanalysis/__init__.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/networkanalysis/_get_route.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/networkanalysis/_od_cost_matrix.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/networkanalysis/_points.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/networkanalysis/_service_area.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/networkanalysis/closing_network_holes.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/networkanalysis/cutting_lines.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/networkanalysis/directednetwork.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/networkanalysis/finding_isolated_networks.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/networkanalysis/network.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/networkanalysis/networkanalysis.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/networkanalysis/networkanalysisrules.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/networkanalysis/nodes.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/networkanalysis/traveling_salesman.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/parallel/__init__.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/py.typed +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/raster/__init__.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/raster/base.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/raster/regex.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/raster/sentinel_config.py +0 -0
- {ssb_sgis-1.1.16 → ssb_sgis-1.2.0}/src/sgis/raster/zonal.py +0 -0
|
@@ -76,6 +76,10 @@ from .geopandas_tools.polygon_operations import get_holes
|
|
|
76
76
|
from .geopandas_tools.polygon_operations import get_polygon_clusters
|
|
77
77
|
from .geopandas_tools.polygon_operations import split_polygons_by_lines
|
|
78
78
|
from .geopandas_tools.polygons_as_rings import PolygonsAsRings
|
|
79
|
+
from .geopandas_tools.runners import GridSizeOverlayRunner
|
|
80
|
+
from .geopandas_tools.runners import OverlayRunner
|
|
81
|
+
from .geopandas_tools.runners import RTreeQueryRunner
|
|
82
|
+
from .geopandas_tools.runners import UnionRunner
|
|
79
83
|
from .geopandas_tools.sfilter import sfilter
|
|
80
84
|
from .geopandas_tools.sfilter import sfilter_inverse
|
|
81
85
|
from .geopandas_tools.sfilter import sfilter_split
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
from collections.abc import Iterable
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
1
4
|
try:
|
|
2
5
|
from gcsfs import GCSFileSystem
|
|
3
6
|
|
|
@@ -66,7 +69,56 @@ except ImportError:
|
|
|
66
69
|
|
|
67
70
|
file_system = LocalFileSystem
|
|
68
71
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
72
|
+
from .geopandas_tools.runners import OverlayRunner
|
|
73
|
+
from .geopandas_tools.runners import RTreeQueryRunner
|
|
74
|
+
from .geopandas_tools.runners import UnionRunner
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class Config:
|
|
78
|
+
"""Dictlike config with a 'get_instance' method."""
|
|
79
|
+
|
|
80
|
+
def __init__(self, data: dict) -> None:
|
|
81
|
+
"""Initialise with dict."""
|
|
82
|
+
self.data = data
|
|
83
|
+
|
|
84
|
+
def get_instance(self, key: str, *args, **kwargs) -> Any:
|
|
85
|
+
"""Get the dict value and call it if callable."""
|
|
86
|
+
x = self.data[key]
|
|
87
|
+
if callable(x):
|
|
88
|
+
return x(*args, **kwargs)
|
|
89
|
+
return x
|
|
90
|
+
|
|
91
|
+
def __getattr__(self, attr: str) -> Any:
|
|
92
|
+
"""Get dict attribute."""
|
|
93
|
+
return getattr(self.data, attr)
|
|
94
|
+
|
|
95
|
+
def __getitem__(self, key: str) -> Any:
|
|
96
|
+
"""Get dict value."""
|
|
97
|
+
return self.data[key]
|
|
98
|
+
|
|
99
|
+
def __setitem__(self, key: str, value) -> None:
|
|
100
|
+
"""Set dict value."""
|
|
101
|
+
self.data[key] = value
|
|
102
|
+
|
|
103
|
+
def __iter__(self) -> Iterable[str]:
|
|
104
|
+
"""Iterate over dict keys."""
|
|
105
|
+
return iter(self.data)
|
|
106
|
+
|
|
107
|
+
def __len__(self) -> int:
|
|
108
|
+
"""Length of dict."""
|
|
109
|
+
return len(self.data)
|
|
110
|
+
|
|
111
|
+
def __str__(self) -> str:
|
|
112
|
+
"""String representation of dict."""
|
|
113
|
+
return str(self.data)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
config = Config(
|
|
117
|
+
{
|
|
118
|
+
"n_jobs": 1,
|
|
119
|
+
"file_system": file_system,
|
|
120
|
+
"rtree_runner": RTreeQueryRunner,
|
|
121
|
+
"overlay_runner": OverlayRunner,
|
|
122
|
+
"union_runner": UnionRunner,
|
|
123
|
+
}
|
|
124
|
+
)
|
|
@@ -23,12 +23,13 @@ from geopandas import GeoDataFrame
|
|
|
23
23
|
from geopandas import GeoSeries
|
|
24
24
|
from shapely import get_num_geometries
|
|
25
25
|
|
|
26
|
+
from ..conf import config
|
|
26
27
|
from ..parallel.parallel import Parallel
|
|
27
|
-
from .general import _parallel_unary_union
|
|
28
|
-
from .general import _unary_union_for_notna
|
|
29
28
|
from .geometry_types import make_all_singlepart
|
|
30
29
|
from .polygon_operations import get_cluster_mapper
|
|
31
30
|
from .polygon_operations import get_grouped_centroids
|
|
31
|
+
from .runners import UnionRunner
|
|
32
|
+
from .utils import _unary_union_for_notna
|
|
32
33
|
|
|
33
34
|
|
|
34
35
|
def _decide_ignore_index(kwargs: dict) -> tuple[dict, bool]:
|
|
@@ -53,8 +54,8 @@ def buffdissexp(
|
|
|
53
54
|
index_parts: bool = False,
|
|
54
55
|
copy: bool = True,
|
|
55
56
|
grid_size: float | int | None = None,
|
|
56
|
-
n_jobs: int = 1,
|
|
57
57
|
join_style: int | str = "round",
|
|
58
|
+
n_jobs: int = 1,
|
|
58
59
|
**dissolve_kwargs,
|
|
59
60
|
) -> GeoDataFrame:
|
|
60
61
|
"""Buffers and dissolves overlapping geometries.
|
|
@@ -187,27 +188,27 @@ def _dissolve(
|
|
|
187
188
|
gdf: GeoDataFrame,
|
|
188
189
|
aggfunc: str = "first",
|
|
189
190
|
grid_size: None | float = None,
|
|
190
|
-
n_jobs: int = 1,
|
|
191
191
|
as_index: bool = True,
|
|
192
|
+
n_jobs: int = 1,
|
|
193
|
+
union_runner: UnionRunner | None = None,
|
|
192
194
|
**dissolve_kwargs,
|
|
193
195
|
) -> GeoDataFrame:
|
|
194
|
-
|
|
195
196
|
if not len(gdf):
|
|
196
197
|
return gdf
|
|
197
198
|
|
|
198
|
-
|
|
199
|
+
if union_runner is None:
|
|
200
|
+
union_runner = config.get_instance("union_runner", n_jobs)
|
|
199
201
|
|
|
200
|
-
|
|
202
|
+
geom_col = gdf.geometry.name
|
|
203
|
+
by = dissolve_kwargs.pop("by", None)
|
|
204
|
+
by_was_none = not bool(by)
|
|
201
205
|
|
|
206
|
+
# make sure geometries are dissolved rowwise to make dissolving simpler later
|
|
202
207
|
more_than_one = get_num_geometries(gdf.geometry.values) > 1
|
|
203
208
|
gdf.loc[more_than_one, geom_col] = gdf.loc[more_than_one, geom_col].apply(
|
|
204
209
|
_unary_union_for_notna
|
|
205
210
|
)
|
|
206
211
|
|
|
207
|
-
by = dissolve_kwargs.pop("by", None)
|
|
208
|
-
|
|
209
|
-
by_was_none = not bool(by)
|
|
210
|
-
|
|
211
212
|
if by is None and dissolve_kwargs.get("level") is None:
|
|
212
213
|
by = np.zeros(len(gdf), dtype="int64")
|
|
213
214
|
other_cols = list(gdf.columns.difference({geom_col}))
|
|
@@ -215,30 +216,23 @@ def _dissolve(
|
|
|
215
216
|
if isinstance(by, str):
|
|
216
217
|
by = [by]
|
|
217
218
|
other_cols = list(gdf.columns.difference({geom_col} | set(by or {})))
|
|
218
|
-
|
|
219
219
|
try:
|
|
220
220
|
is_one_hit = (
|
|
221
221
|
gdf.groupby(by, as_index=True, **dissolve_kwargs).transform("size") == 1
|
|
222
222
|
)
|
|
223
223
|
except IndexError:
|
|
224
|
-
# if no rows
|
|
224
|
+
# if no rows after dropping na if dropna=True
|
|
225
225
|
original_by = [x for x in by]
|
|
226
226
|
query = gdf[by.pop(0)].notna()
|
|
227
227
|
for col in gdf[by]:
|
|
228
228
|
query &= gdf[col].notna()
|
|
229
229
|
gdf = gdf.loc[query]
|
|
230
230
|
assert not len(gdf), gdf
|
|
231
|
-
if not by_was_none
|
|
232
|
-
|
|
233
|
-
gdf = gdf.set_index(original_by)
|
|
234
|
-
except Exception as e:
|
|
235
|
-
print(gdf)
|
|
236
|
-
print(original_by)
|
|
237
|
-
raise e
|
|
238
|
-
|
|
231
|
+
if as_index and not by_was_none:
|
|
232
|
+
gdf = gdf.set_index(original_by)
|
|
239
233
|
return gdf
|
|
240
234
|
|
|
241
|
-
if not by_was_none
|
|
235
|
+
if as_index and not by_was_none:
|
|
242
236
|
one_hit = gdf[is_one_hit].set_index(by)
|
|
243
237
|
else:
|
|
244
238
|
one_hit = gdf[is_one_hit]
|
|
@@ -250,38 +244,21 @@ def _dissolve(
|
|
|
250
244
|
dissolved = many_hits.groupby(by, as_index=True, **dissolve_kwargs)[other_cols].agg(
|
|
251
245
|
aggfunc
|
|
252
246
|
)
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
by=by,
|
|
260
|
-
grid_size=grid_size,
|
|
261
|
-
as_index=True,
|
|
262
|
-
**dissolve_kwargs,
|
|
263
|
-
)
|
|
264
|
-
dissolved[geom_col] = agged
|
|
265
|
-
return GeoDataFrame(dissolved, geometry=geom_col, crs=gdf.crs)
|
|
266
|
-
except Exception as e:
|
|
267
|
-
print(e, dissolved, agged, many_hits)
|
|
268
|
-
raise e
|
|
269
|
-
|
|
270
|
-
geoms_agged = many_hits.groupby(by, **dissolve_kwargs)[geom_col].agg(
|
|
271
|
-
lambda x: _unary_union_for_notna(x, grid_size=grid_size)
|
|
247
|
+
dissolved[geom_col] = union_runner.run(
|
|
248
|
+
many_hits,
|
|
249
|
+
by=by,
|
|
250
|
+
grid_size=grid_size,
|
|
251
|
+
as_index=True,
|
|
252
|
+
**dissolve_kwargs,
|
|
272
253
|
)
|
|
273
|
-
|
|
274
|
-
dissolved[geom_col] = geoms_agged
|
|
275
|
-
|
|
276
254
|
if not as_index:
|
|
277
255
|
dissolved = dissolved.reset_index()
|
|
278
|
-
|
|
279
256
|
try:
|
|
280
257
|
return GeoDataFrame(
|
|
281
258
|
pd.concat([dissolved, one_hit]).sort_index(), geometry=geom_col, crs=gdf.crs
|
|
282
259
|
)
|
|
283
260
|
except TypeError as e:
|
|
284
|
-
raise e.__class__(e
|
|
261
|
+
raise e.__class__(f"{e}. {dissolved.index}. {one_hit.index}") from e
|
|
285
262
|
|
|
286
263
|
|
|
287
264
|
def diss(
|
|
@@ -582,7 +559,7 @@ def buff(
|
|
|
582
559
|
if copy:
|
|
583
560
|
gdf = gdf.copy()
|
|
584
561
|
|
|
585
|
-
gdf[gdf.
|
|
562
|
+
gdf[gdf.geometry.name] = gdf.buffer(
|
|
586
563
|
distance, resolution=resolution, join_style=join_style, **buffer_kwargs
|
|
587
564
|
).make_valid()
|
|
588
565
|
|
|
@@ -146,31 +146,24 @@ def to_bbox(
|
|
|
146
146
|
except Exception:
|
|
147
147
|
pass
|
|
148
148
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
minx = float(np.min(obj["west_longitude"])) # type: ignore [index]
|
|
168
|
-
miny = float(np.min(obj["south_latitude"])) # type: ignore [index]
|
|
169
|
-
maxx = float(np.max(obj["east_longitude"])) # type: ignore [index]
|
|
170
|
-
maxy = float(np.max(obj["north_latitude"])) # type: ignore [index]
|
|
171
|
-
return minx, miny, maxx, maxy
|
|
172
|
-
except Exception:
|
|
173
|
-
pass
|
|
149
|
+
def to_int_if_possible(x):
|
|
150
|
+
if isinstance(x, int) or float(x).is_integer():
|
|
151
|
+
return int(x)
|
|
152
|
+
return float(x)
|
|
153
|
+
|
|
154
|
+
for attrs in [
|
|
155
|
+
("minx", "miny", "maxx", "maxy"),
|
|
156
|
+
("xmin", "ymin", "xmax", "xmax"),
|
|
157
|
+
("west_longitude", "south_latitude", "east_longitude", "north_latitude"),
|
|
158
|
+
]:
|
|
159
|
+
try:
|
|
160
|
+
return tuple(to_int_if_possible(obj[attr]) for attr in attrs)
|
|
161
|
+
except Exception:
|
|
162
|
+
pass
|
|
163
|
+
try:
|
|
164
|
+
return tuple(to_int_if_possible(getattr(obj, attr)) for attr in attrs)
|
|
165
|
+
except Exception:
|
|
166
|
+
pass
|
|
174
167
|
|
|
175
168
|
if hasattr(obj, "geometry"):
|
|
176
169
|
try:
|
|
@@ -6,21 +6,19 @@ from geopandas import GeoDataFrame
|
|
|
6
6
|
from geopandas import GeoSeries
|
|
7
7
|
from shapely import STRtree
|
|
8
8
|
from shapely import difference
|
|
9
|
-
from shapely import make_valid
|
|
10
9
|
from shapely import simplify
|
|
11
|
-
from shapely.errors import GEOSException
|
|
12
10
|
|
|
11
|
+
from ..conf import config
|
|
13
12
|
from .general import _determine_geom_type_args
|
|
14
|
-
from .general import _grouped_unary_union
|
|
15
|
-
from .general import _parallel_unary_union_geoseries
|
|
16
13
|
from .general import _push_geom_col
|
|
17
14
|
from .general import clean_geoms
|
|
18
15
|
from .geometry_types import get_geom_type
|
|
19
16
|
from .geometry_types import make_all_singlepart
|
|
20
17
|
from .geometry_types import to_single_geom_type
|
|
21
|
-
from .overlay import _run_overlay_dask
|
|
22
18
|
from .overlay import clean_overlay
|
|
23
|
-
from .
|
|
19
|
+
from .runners import OverlayRunner
|
|
20
|
+
from .runners import RTreeQueryRunner
|
|
21
|
+
from .runners import UnionRunner
|
|
24
22
|
from .sfilter import sfilter_inverse
|
|
25
23
|
|
|
26
24
|
PRECISION = 1e-3
|
|
@@ -31,8 +29,11 @@ def update_geometries(
|
|
|
31
29
|
geom_type: str | None = None,
|
|
32
30
|
keep_geom_type: bool | None = None,
|
|
33
31
|
grid_size: int | None = None,
|
|
34
|
-
n_jobs: int = 1,
|
|
35
32
|
predicate: str | None = "intersects",
|
|
33
|
+
n_jobs: int = 1,
|
|
34
|
+
union_runner: UnionRunner | None = None,
|
|
35
|
+
rtree_runner: RTreeQueryRunner | None = None,
|
|
36
|
+
overlay_runner: OverlayRunner | None = None,
|
|
36
37
|
) -> GeoDataFrame:
|
|
37
38
|
"""Puts geometries on top of each other rowwise.
|
|
38
39
|
|
|
@@ -50,8 +51,14 @@ def update_geometries(
|
|
|
50
51
|
"line" or "point".
|
|
51
52
|
grid_size: Precision grid size to round the geometries. Will use the highest
|
|
52
53
|
precision of the inputs by default.
|
|
53
|
-
n_jobs: Number of threads.
|
|
54
54
|
predicate: Spatial predicate for the spatial tree.
|
|
55
|
+
n_jobs: Number of workers.
|
|
56
|
+
union_runner: Optionally debug/manipulate the spatial union operations.
|
|
57
|
+
See the 'runners' module for example implementations.
|
|
58
|
+
rtree_runner: Optionally debug/manipulate the spatial indexing operations.
|
|
59
|
+
See the 'runners' module for example implementations.
|
|
60
|
+
overlay_runner: Optionally debug/manipulate the spatial overlay operations.
|
|
61
|
+
See the 'runners' module for example implementations.
|
|
55
62
|
|
|
56
63
|
Example:
|
|
57
64
|
--------
|
|
@@ -98,6 +105,16 @@ def update_geometries(
|
|
|
98
105
|
if len(gdf) <= 1:
|
|
99
106
|
return gdf
|
|
100
107
|
|
|
108
|
+
if rtree_runner is None:
|
|
109
|
+
rtree_runner = config.get_instance("rtree_runner", n_jobs)
|
|
110
|
+
if union_runner is None:
|
|
111
|
+
union_runner = config.get_instance("union_runner", n_jobs)
|
|
112
|
+
if overlay_runner is None:
|
|
113
|
+
overlay_runner = config.get_instance("overlay_runner", n_jobs)
|
|
114
|
+
|
|
115
|
+
if geom_type == "polygon" or get_geom_type(gdf) == "polygon":
|
|
116
|
+
gdf.geometry = gdf.buffer(0)
|
|
117
|
+
|
|
101
118
|
copied = make_all_singlepart(clean_geoms(gdf))
|
|
102
119
|
|
|
103
120
|
copied, geom_type, keep_geom_type = _determine_geom_type_args(
|
|
@@ -108,66 +125,35 @@ def update_geometries(
|
|
|
108
125
|
index_mapper = {i: idx for i, idx in enumerate(copied.index)}
|
|
109
126
|
copied = copied.reset_index(drop=True)
|
|
110
127
|
|
|
111
|
-
|
|
112
|
-
|
|
128
|
+
left, right = rtree_runner.run(
|
|
129
|
+
copied.geometry.values, copied.geometry.values, predicate=predicate
|
|
130
|
+
)
|
|
113
131
|
indices = pd.Series(right, index=left).loc[lambda x: x.index > x.values]
|
|
114
132
|
|
|
115
133
|
# select geometries from 'right', index from 'left', dissolve by 'left'
|
|
116
134
|
erasers = pd.Series(copied.geometry.loc[indices.values].values, index=indices.index)
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
n_jobs=n_jobs,
|
|
122
|
-
grid_size=grid_size,
|
|
123
|
-
)
|
|
124
|
-
erasers = pd.Series(erasers, index=indices.index.unique())
|
|
125
|
-
else:
|
|
126
|
-
only_one = erasers.groupby(level=0).transform("size") == 1
|
|
127
|
-
one_hit = erasers[only_one]
|
|
128
|
-
many_hits = _grouped_unary_union(
|
|
129
|
-
erasers[~only_one], level=0, grid_size=grid_size
|
|
130
|
-
)
|
|
131
|
-
erasers = pd.concat([one_hit, many_hits]).sort_index()
|
|
135
|
+
only_one = erasers.groupby(level=0).transform("size") == 1
|
|
136
|
+
one_hit = erasers[only_one]
|
|
137
|
+
many_hits = union_runner.run(erasers[~only_one], level=0, grid_size=grid_size)
|
|
138
|
+
erasers = pd.concat([one_hit, many_hits]).sort_index()
|
|
132
139
|
|
|
133
140
|
# match up the aggregated erasers by index
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
arr1, arr2, func=difference, n_jobs=n_jobs, grid_size=grid_size
|
|
140
|
-
)
|
|
141
|
-
except GEOSException:
|
|
142
|
-
arr1 = make_valid_and_keep_geom_type(
|
|
143
|
-
arr1, geom_type=geom_type, n_jobs=n_jobs
|
|
144
|
-
)
|
|
145
|
-
arr2 = make_valid_and_keep_geom_type(
|
|
146
|
-
arr2, geom_type=geom_type, n_jobs=n_jobs
|
|
147
|
-
)
|
|
148
|
-
erased = _run_overlay_dask(
|
|
149
|
-
arr1, arr2, func=difference, n_jobs=n_jobs, grid_size=grid_size
|
|
150
|
-
)
|
|
151
|
-
erased = GeoSeries(erased, index=erasers.index)
|
|
152
|
-
else:
|
|
153
|
-
erased = make_valid(
|
|
154
|
-
difference(
|
|
155
|
-
copied.geometry.loc[erasers.index],
|
|
156
|
-
erasers,
|
|
157
|
-
grid_size=grid_size,
|
|
158
|
-
)
|
|
159
|
-
)
|
|
141
|
+
arr1 = copied.geometry.loc[erasers.index].to_numpy()
|
|
142
|
+
arr2 = erasers.to_numpy()
|
|
143
|
+
erased = overlay_runner.run(
|
|
144
|
+
difference, arr1, arr2, grid_size=grid_size, geom_type=geom_type
|
|
145
|
+
)
|
|
160
146
|
|
|
147
|
+
erased = GeoSeries(erased, index=erasers.index)
|
|
161
148
|
copied.loc[erased.index, geom_col] = erased
|
|
162
|
-
|
|
163
149
|
copied = copied.loc[~copied.is_empty]
|
|
164
|
-
|
|
165
150
|
copied.index = copied.index.map(index_mapper)
|
|
166
|
-
|
|
167
151
|
copied = make_all_singlepart(copied)
|
|
168
152
|
|
|
169
153
|
# TODO check why polygons dissappear in rare cases. For now, just add back the missing
|
|
170
|
-
dissapeared = sfilter_inverse(
|
|
154
|
+
dissapeared = sfilter_inverse(
|
|
155
|
+
gdf, copied.buffer(-PRECISION), rtree_runner=rtree_runner
|
|
156
|
+
)
|
|
171
157
|
copied = pd.concat([copied, dissapeared])
|
|
172
158
|
|
|
173
159
|
# TODO fix dupliates again with dissolve?
|
|
@@ -188,7 +174,7 @@ def get_intersections(
|
|
|
188
174
|
keep_geom_type: bool | None = None,
|
|
189
175
|
predicate: str | None = "intersects",
|
|
190
176
|
grid_size: float | None = None,
|
|
191
|
-
|
|
177
|
+
**kwargs,
|
|
192
178
|
) -> GeoDataFrame:
|
|
193
179
|
"""Find geometries that intersect in a GeoDataFrame.
|
|
194
180
|
|
|
@@ -211,6 +197,7 @@ def get_intersections(
|
|
|
211
197
|
precision of the inputs by default.
|
|
212
198
|
n_jobs: Number of threads.
|
|
213
199
|
predicate: Spatial predicate for the spatial tree.
|
|
200
|
+
**kwargs: Keyword arguments passed to clean_overlay.
|
|
214
201
|
|
|
215
202
|
Returns:
|
|
216
203
|
A GeoDataFrame of the overlapping polygons.
|
|
@@ -283,9 +270,9 @@ def get_intersections(
|
|
|
283
270
|
gdf,
|
|
284
271
|
geom_type,
|
|
285
272
|
keep_geom_type,
|
|
286
|
-
n_jobs=n_jobs,
|
|
287
273
|
grid_size=grid_size,
|
|
288
274
|
predicate=predicate,
|
|
275
|
+
**kwargs,
|
|
289
276
|
).pipe(clean_geoms)
|
|
290
277
|
|
|
291
278
|
duplicated_geoms.index = duplicated_geoms["orig_idx"].values
|
|
@@ -301,9 +288,9 @@ def _get_intersecting_geometries(
|
|
|
301
288
|
gdf: GeoDataFrame,
|
|
302
289
|
geom_type: str | None,
|
|
303
290
|
keep_geom_type: bool,
|
|
304
|
-
n_jobs: int,
|
|
305
291
|
grid_size: float | None = None,
|
|
306
292
|
predicate: str | None = None,
|
|
293
|
+
**kwargs,
|
|
307
294
|
) -> GeoDataFrame:
|
|
308
295
|
right = gdf[[gdf._geometry_column_name]]
|
|
309
296
|
right["idx_right"] = right.index
|
|
@@ -327,7 +314,7 @@ def _get_intersecting_geometries(
|
|
|
327
314
|
grid_size=grid_size,
|
|
328
315
|
geom_type=geom_type,
|
|
329
316
|
keep_geom_type=keep_geom_type,
|
|
330
|
-
|
|
317
|
+
**kwargs,
|
|
331
318
|
).loc[are_not_identical]
|
|
332
319
|
else:
|
|
333
320
|
if keep_geom_type:
|
|
@@ -347,7 +334,7 @@ def _get_intersecting_geometries(
|
|
|
347
334
|
grid_size=grid_size,
|
|
348
335
|
predicate=predicate,
|
|
349
336
|
geom_type=geom_type,
|
|
350
|
-
|
|
337
|
+
**kwargs,
|
|
351
338
|
)
|
|
352
339
|
]
|
|
353
340
|
intersected = pd.concat(intersected, ignore_index=True).loc[are_not_identical]
|
|
@@ -6,7 +6,6 @@ from collections.abc import Hashable
|
|
|
6
6
|
from collections.abc import Iterable
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
-
import joblib
|
|
10
9
|
import numpy as np
|
|
11
10
|
import pandas as pd
|
|
12
11
|
import pyproj
|
|
@@ -23,7 +22,6 @@ from shapely import get_parts
|
|
|
23
22
|
from shapely import linestrings
|
|
24
23
|
from shapely import make_valid
|
|
25
24
|
from shapely import points as shapely_points
|
|
26
|
-
from shapely import union_all
|
|
27
25
|
from shapely.geometry import LineString
|
|
28
26
|
from shapely.geometry import MultiPoint
|
|
29
27
|
from shapely.geometry import Point
|
|
@@ -333,7 +331,7 @@ def sort_large_first(gdf: GeoDataFrame | GeoSeries) -> GeoDataFrame | GeoSeries:
|
|
|
333
331
|
# using enumerate, then iloc on the sorted dict keys.
|
|
334
332
|
# to avoid creating a temporary area column (which doesn't work for GeoSeries).
|
|
335
333
|
area_mapper = dict(enumerate(gdf.area.values))
|
|
336
|
-
sorted_areas = dict(reversed(sorted(area_mapper.items(), key=
|
|
334
|
+
sorted_areas = dict(reversed(sorted(area_mapper.items(), key=_get_dict_value)))
|
|
337
335
|
return gdf.iloc[list(sorted_areas)]
|
|
338
336
|
|
|
339
337
|
|
|
@@ -349,9 +347,7 @@ def sort_long_first(gdf: GeoDataFrame | GeoSeries) -> GeoDataFrame | GeoSeries:
|
|
|
349
347
|
# using enumerate, then iloc on the sorted dict keys.
|
|
350
348
|
# to avoid creating a temporary area column (which doesn't work for GeoSeries).
|
|
351
349
|
length_mapper = dict(enumerate(gdf.length.values))
|
|
352
|
-
sorted_lengths = dict(
|
|
353
|
-
reversed(sorted(length_mapper.items(), key=lambda item: item[1]))
|
|
354
|
-
)
|
|
350
|
+
sorted_lengths = dict(reversed(sorted(length_mapper.items(), key=_get_dict_value)))
|
|
355
351
|
return gdf.iloc[list(sorted_lengths)]
|
|
356
352
|
|
|
357
353
|
|
|
@@ -367,7 +363,7 @@ def sort_short_first(gdf: GeoDataFrame | GeoSeries) -> GeoDataFrame | GeoSeries:
|
|
|
367
363
|
# using enumerate, then iloc on the sorted dict keys.
|
|
368
364
|
# to avoid creating a temporary area column (which doesn't work for GeoSeries).
|
|
369
365
|
length_mapper = dict(enumerate(gdf.length.values))
|
|
370
|
-
sorted_lengths = dict(sorted(length_mapper.items(), key=
|
|
366
|
+
sorted_lengths = dict(sorted(length_mapper.items(), key=_get_dict_value))
|
|
371
367
|
return gdf.iloc[list(sorted_lengths)]
|
|
372
368
|
|
|
373
369
|
|
|
@@ -384,10 +380,14 @@ def sort_small_first(gdf: GeoDataFrame | GeoSeries) -> GeoDataFrame | GeoSeries:
|
|
|
384
380
|
# using enumerate, then iloc on the sorted dict keys.
|
|
385
381
|
# to avoid creating a temporary area column (which doesn't work for GeoSeries).
|
|
386
382
|
area_mapper = dict(enumerate(gdf.area.values))
|
|
387
|
-
sorted_areas = dict(sorted(area_mapper.items(), key=
|
|
383
|
+
sorted_areas = dict(sorted(area_mapper.items(), key=_get_dict_value))
|
|
388
384
|
return gdf.iloc[list(sorted_areas)]
|
|
389
385
|
|
|
390
386
|
|
|
387
|
+
def _get_dict_value(item: tuple[Hashable, Any]) -> Any:
|
|
388
|
+
return item[1]
|
|
389
|
+
|
|
390
|
+
|
|
391
391
|
def make_lines_between_points(
|
|
392
392
|
*arrs: NDArray[Point] | GeometryArray | GeoSeries,
|
|
393
393
|
) -> NDArray[LineString]:
|
|
@@ -1121,79 +1121,3 @@ def _determine_geom_type_args(
|
|
|
1121
1121
|
if geom_type == "mixed":
|
|
1122
1122
|
raise ValueError("Cannot set keep_geom_type=True with mixed geometries")
|
|
1123
1123
|
return gdf, geom_type, keep_geom_type
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
def _unary_union_for_notna(geoms, **kwargs):
|
|
1127
|
-
try:
|
|
1128
|
-
return make_valid(union_all(geoms, **kwargs))
|
|
1129
|
-
except TypeError:
|
|
1130
|
-
return union_all([geom for geom in geoms.dropna().values], **kwargs)
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
def _grouped_unary_union(
|
|
1134
|
-
df: GeoDataFrame | GeoSeries | pd.DataFrame | pd.Series,
|
|
1135
|
-
by: str | list[str] | None = None,
|
|
1136
|
-
level: int | None = None,
|
|
1137
|
-
as_index: bool = True,
|
|
1138
|
-
grid_size: float | int | None = None,
|
|
1139
|
-
dropna: bool = False,
|
|
1140
|
-
**kwargs,
|
|
1141
|
-
) -> GeoSeries | GeoDataFrame:
|
|
1142
|
-
"""Vectorized unary_union for groups.
|
|
1143
|
-
|
|
1144
|
-
Experimental. Messy code.
|
|
1145
|
-
"""
|
|
1146
|
-
try:
|
|
1147
|
-
geom_col = df._geometry_column_name
|
|
1148
|
-
except AttributeError:
|
|
1149
|
-
try:
|
|
1150
|
-
geom_col = df.name
|
|
1151
|
-
if geom_col is None:
|
|
1152
|
-
geom_col = "geometry"
|
|
1153
|
-
except AttributeError:
|
|
1154
|
-
geom_col = "geometry"
|
|
1155
|
-
|
|
1156
|
-
if isinstance(df, pd.Series):
|
|
1157
|
-
return GeoSeries(
|
|
1158
|
-
df.groupby(level=level, as_index=as_index, **kwargs).agg(
|
|
1159
|
-
lambda x: _unary_union_for_notna(x, grid_size=grid_size)
|
|
1160
|
-
)
|
|
1161
|
-
)
|
|
1162
|
-
|
|
1163
|
-
return GeoSeries(
|
|
1164
|
-
df.groupby(by, level=level, as_index=as_index, **kwargs)[geom_col].agg(
|
|
1165
|
-
lambda x: _unary_union_for_notna(x, grid_size=grid_size)
|
|
1166
|
-
)
|
|
1167
|
-
)
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
def _parallel_unary_union(
|
|
1171
|
-
gdf: GeoDataFrame, n_jobs: int = 1, by=None, grid_size=None, **kwargs
|
|
1172
|
-
) -> list[Geometry]:
|
|
1173
|
-
try:
|
|
1174
|
-
geom_col = gdf._geometry_column_name
|
|
1175
|
-
except AttributeError:
|
|
1176
|
-
geom_col = "geometry"
|
|
1177
|
-
|
|
1178
|
-
with joblib.Parallel(n_jobs=n_jobs, backend="threading") as parallel:
|
|
1179
|
-
delayed_operations = []
|
|
1180
|
-
for _, geoms in gdf.groupby(by, **kwargs)[geom_col]:
|
|
1181
|
-
delayed_operations.append(
|
|
1182
|
-
joblib.delayed(_unary_union_for_notna)(geoms, grid_size=grid_size)
|
|
1183
|
-
)
|
|
1184
|
-
|
|
1185
|
-
return parallel(delayed_operations)
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
def _parallel_unary_union_geoseries(
|
|
1189
|
-
ser: GeoSeries, n_jobs: int = 1, grid_size=None, **kwargs
|
|
1190
|
-
) -> list[Geometry]:
|
|
1191
|
-
|
|
1192
|
-
with joblib.Parallel(n_jobs=n_jobs, backend="threading") as parallel:
|
|
1193
|
-
delayed_operations = []
|
|
1194
|
-
for _, geoms in ser.groupby(**kwargs):
|
|
1195
|
-
delayed_operations.append(
|
|
1196
|
-
joblib.delayed(_unary_union_for_notna)(geoms, grid_size=grid_size)
|
|
1197
|
-
)
|
|
1198
|
-
|
|
1199
|
-
return parallel(delayed_operations)
|