ssb-sgis 1.1.17__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sgis/__init__.py +4 -0
- sgis/conf.py +56 -4
- sgis/geopandas_tools/buffer_dissolve_explode.py +24 -47
- sgis/geopandas_tools/conversion.py +18 -25
- sgis/geopandas_tools/duplicates.py +44 -60
- sgis/geopandas_tools/general.py +8 -84
- sgis/geopandas_tools/overlay.py +177 -256
- sgis/geopandas_tools/polygon_operations.py +67 -88
- sgis/geopandas_tools/runners.py +277 -0
- sgis/geopandas_tools/sfilter.py +40 -24
- sgis/geopandas_tools/utils.py +37 -0
- sgis/helpers.py +1 -1
- sgis/io/dapla_functions.py +5 -7
- sgis/maps/map.py +3 -1
- sgis/parallel/parallel.py +32 -24
- sgis/raster/image_collection.py +184 -162
- sgis/raster/indices.py +0 -1
- {ssb_sgis-1.1.17.dist-info → ssb_sgis-1.2.0.dist-info}/METADATA +1 -1
- {ssb_sgis-1.1.17.dist-info → ssb_sgis-1.2.0.dist-info}/RECORD +21 -19
- {ssb_sgis-1.1.17.dist-info → ssb_sgis-1.2.0.dist-info}/LICENSE +0 -0
- {ssb_sgis-1.1.17.dist-info → ssb_sgis-1.2.0.dist-info}/WHEEL +0 -0
sgis/geopandas_tools/overlay.py
CHANGED
|
@@ -9,34 +9,29 @@ version of the solution from GH 2792.
|
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
11
|
import functools
|
|
12
|
-
from collections.abc import Callable
|
|
13
12
|
|
|
14
13
|
import geopandas as gpd
|
|
15
|
-
import joblib
|
|
16
14
|
import numpy as np
|
|
17
15
|
import pandas as pd
|
|
18
16
|
from geopandas import GeoDataFrame
|
|
19
|
-
from geopandas import GeoSeries
|
|
20
17
|
from pandas import DataFrame
|
|
21
18
|
from shapely import Geometry
|
|
22
|
-
from shapely import STRtree
|
|
23
19
|
from shapely import box
|
|
24
20
|
from shapely import difference
|
|
25
21
|
from shapely import intersection
|
|
22
|
+
from shapely import is_empty
|
|
26
23
|
from shapely import make_valid
|
|
27
|
-
from shapely import
|
|
28
|
-
from shapely.errors import GEOSException
|
|
29
|
-
|
|
30
|
-
try:
|
|
31
|
-
import dask.array as da
|
|
32
|
-
except ImportError:
|
|
33
|
-
pass
|
|
24
|
+
from shapely import union_all
|
|
34
25
|
|
|
26
|
+
from ..conf import config
|
|
35
27
|
from .general import _determine_geom_type_args
|
|
36
28
|
from .general import clean_geoms
|
|
37
29
|
from .geometry_types import get_geom_type
|
|
38
30
|
from .geometry_types import make_all_singlepart
|
|
39
31
|
from .geometry_types import to_single_geom_type
|
|
32
|
+
from .runners import OverlayRunner
|
|
33
|
+
from .runners import RTreeQueryRunner
|
|
34
|
+
from .runners import UnionRunner
|
|
40
35
|
|
|
41
36
|
DEFAULT_GRID_SIZE = None
|
|
42
37
|
DEFAULT_LSUFFIX = "_1"
|
|
@@ -51,9 +46,12 @@ def clean_overlay(
|
|
|
51
46
|
geom_type: str | None = None,
|
|
52
47
|
predicate: str | None = "intersects",
|
|
53
48
|
grid_size: float | None = None,
|
|
54
|
-
n_jobs: int = 1,
|
|
55
49
|
lsuffix: str = DEFAULT_LSUFFIX,
|
|
56
50
|
rsuffix: str = DEFAULT_RSUFFIX,
|
|
51
|
+
n_jobs: int = 1,
|
|
52
|
+
rtree_runner: RTreeQueryRunner | None = None,
|
|
53
|
+
union_runner: UnionRunner | None = None,
|
|
54
|
+
overlay_runner: OverlayRunner | None = None,
|
|
57
55
|
) -> GeoDataFrame:
|
|
58
56
|
"""Fixes and explodes geometries before doing a shapely overlay, then cleans up.
|
|
59
57
|
|
|
@@ -74,10 +72,16 @@ def clean_overlay(
|
|
|
74
72
|
"point".
|
|
75
73
|
grid_size: Precision grid size to round the geometries. Will use the highest
|
|
76
74
|
precision of the inputs by default.
|
|
77
|
-
n_jobs: number of threads.
|
|
78
75
|
predicate: Spatial predicate in the spatial tree.
|
|
79
76
|
lsuffix: Suffix of columns in df1 that are also in df2.
|
|
80
77
|
rsuffix: Suffix of columns in df2 that are also in df1.
|
|
78
|
+
n_jobs: number of jobs. Defaults to 1.
|
|
79
|
+
union_runner: Optionally debug/manipulate the spatial union operations.
|
|
80
|
+
See the 'runners' module for example implementations.
|
|
81
|
+
rtree_runner: Optionally debug/manipulate the spatial indexing operations.
|
|
82
|
+
See the 'runners' module for example implementations.
|
|
83
|
+
overlay_runner: Optionally debug/manipulate the spatial overlay operations.
|
|
84
|
+
See the 'runners' module for example implementations.
|
|
81
85
|
|
|
82
86
|
Returns:
|
|
83
87
|
GeoDataFrame with overlayed and fixed geometries and columns from both
|
|
@@ -104,6 +108,13 @@ def clean_overlay(
|
|
|
104
108
|
if df1.crs != df2.crs:
|
|
105
109
|
raise ValueError(f"'crs' mismatch. Got {df1.crs} and {df2.crs}")
|
|
106
110
|
|
|
111
|
+
if rtree_runner is None:
|
|
112
|
+
rtree_runner = config.get_instance("rtree_runner", n_jobs)
|
|
113
|
+
if union_runner is None:
|
|
114
|
+
union_runner = config.get_instance("union_runner", n_jobs)
|
|
115
|
+
if overlay_runner is None:
|
|
116
|
+
overlay_runner = config.get_instance("overlay_runner", n_jobs)
|
|
117
|
+
|
|
107
118
|
crs = df1.crs
|
|
108
119
|
|
|
109
120
|
# original_geom_type = geom_type
|
|
@@ -148,13 +159,16 @@ def clean_overlay(
|
|
|
148
159
|
box1 = box(*df1.total_bounds)
|
|
149
160
|
box2 = box(*df2.total_bounds)
|
|
150
161
|
|
|
151
|
-
if not
|
|
162
|
+
if not grid_size and (
|
|
163
|
+
(not len(df1) or not len(df2))
|
|
164
|
+
or (not box1.intersects(box2) and how == "intersection")
|
|
165
|
+
):
|
|
152
166
|
return _no_intersections_return(df1, df2, how, lsuffix, rsuffix)
|
|
153
167
|
|
|
154
|
-
if df1.
|
|
168
|
+
if df1.geometry.name != "geometry":
|
|
155
169
|
df1 = df1.rename_geometry("geometry")
|
|
156
170
|
|
|
157
|
-
if df2.
|
|
171
|
+
if df2.geometry.name != "geometry":
|
|
158
172
|
df2 = df2.rename_geometry("geometry")
|
|
159
173
|
|
|
160
174
|
# to pandas because GeoDataFrame constructor is expensive
|
|
@@ -171,8 +185,10 @@ def clean_overlay(
|
|
|
171
185
|
lsuffix=lsuffix,
|
|
172
186
|
rsuffix=rsuffix,
|
|
173
187
|
geom_type=geom_type,
|
|
174
|
-
n_jobs=n_jobs,
|
|
175
188
|
predicate=predicate,
|
|
189
|
+
rtree_runner=rtree_runner,
|
|
190
|
+
overlay_runner=overlay_runner,
|
|
191
|
+
union_runner=union_runner,
|
|
176
192
|
),
|
|
177
193
|
geometry="geometry",
|
|
178
194
|
crs=crs,
|
|
@@ -188,9 +204,9 @@ def clean_overlay(
|
|
|
188
204
|
|
|
189
205
|
|
|
190
206
|
def _join_and_get_no_rows(df1, df2, lsuffix, rsuffix):
|
|
191
|
-
geom_col = df1.
|
|
207
|
+
geom_col = df1.geometry.name
|
|
192
208
|
df1_cols = df1.columns.difference({geom_col})
|
|
193
|
-
df2_cols = df2.columns.difference({df2.
|
|
209
|
+
df2_cols = df2.columns.difference({df2.geometry.name})
|
|
194
210
|
cols_with_suffix = [
|
|
195
211
|
f"{col}{lsuffix}" if col in df2_cols else col for col in df1_cols
|
|
196
212
|
] + [f"{col}{rsuffix}" if col in df1_cols else col for col in df2_cols]
|
|
@@ -215,7 +231,7 @@ def _no_intersections_return(
|
|
|
215
231
|
if how == "identity":
|
|
216
232
|
# add suffixes and return df1
|
|
217
233
|
df_template = _join_and_get_no_rows(df1, df2, lsuffix, rsuffix)
|
|
218
|
-
df2_cols = df2.columns.difference({df2.
|
|
234
|
+
df2_cols = df2.columns.difference({df2.geometry.name})
|
|
219
235
|
df1.columns = [f"{col}{lsuffix}" if col in df2_cols else col for col in df1]
|
|
220
236
|
return pd.concat([df_template, df1], ignore_index=True)
|
|
221
237
|
|
|
@@ -237,33 +253,41 @@ def _shapely_pd_overlay(
|
|
|
237
253
|
df1: DataFrame,
|
|
238
254
|
df2: DataFrame,
|
|
239
255
|
how: str,
|
|
240
|
-
grid_size: float
|
|
241
|
-
predicate: str
|
|
242
|
-
lsuffix: str
|
|
243
|
-
rsuffix: str
|
|
244
|
-
geom_type: str | None
|
|
245
|
-
|
|
256
|
+
grid_size: float,
|
|
257
|
+
predicate: str,
|
|
258
|
+
lsuffix: str,
|
|
259
|
+
rsuffix: str,
|
|
260
|
+
geom_type: str | None,
|
|
261
|
+
rtree_runner: RTreeQueryRunner,
|
|
262
|
+
overlay_runner: OverlayRunner,
|
|
263
|
+
union_runner: UnionRunner,
|
|
246
264
|
) -> DataFrame:
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
tree = STRtree(df2.geometry.values)
|
|
251
|
-
left, right = tree.query(df1.geometry.values, predicate=predicate)
|
|
252
|
-
|
|
265
|
+
left, right = rtree_runner.run(
|
|
266
|
+
df1.geometry.values, df2.geometry.values, predicate=predicate
|
|
267
|
+
)
|
|
253
268
|
pairs = _get_intersects_pairs(df1, df2, left, right, rsuffix)
|
|
254
|
-
assert pairs
|
|
255
|
-
assert pairs
|
|
269
|
+
assert pairs["geometry"].notna().all(), pairs.geometry[lambda x: x.isna()]
|
|
270
|
+
assert pairs["geom_right"].notna().all(), pairs.geom_right[lambda x: x.isna()]
|
|
256
271
|
|
|
257
272
|
if how == "intersection":
|
|
258
273
|
overlayed = [
|
|
259
274
|
_intersection(
|
|
260
|
-
pairs,
|
|
275
|
+
pairs,
|
|
276
|
+
grid_size=grid_size,
|
|
277
|
+
geom_type=geom_type,
|
|
278
|
+
overlay_runner=overlay_runner,
|
|
261
279
|
)
|
|
262
280
|
]
|
|
263
281
|
|
|
264
282
|
elif how == "difference":
|
|
265
283
|
overlayed = _difference(
|
|
266
|
-
pairs,
|
|
284
|
+
pairs,
|
|
285
|
+
df1,
|
|
286
|
+
left,
|
|
287
|
+
grid_size=grid_size,
|
|
288
|
+
geom_type=geom_type,
|
|
289
|
+
overlay_runner=overlay_runner,
|
|
290
|
+
union_runner=union_runner,
|
|
267
291
|
)
|
|
268
292
|
|
|
269
293
|
elif how == "symmetric_difference":
|
|
@@ -276,12 +300,19 @@ def _shapely_pd_overlay(
|
|
|
276
300
|
grid_size=grid_size,
|
|
277
301
|
rsuffix=rsuffix,
|
|
278
302
|
geom_type=geom_type,
|
|
279
|
-
|
|
303
|
+
overlay_runner=overlay_runner,
|
|
304
|
+
union_runner=union_runner,
|
|
280
305
|
)
|
|
281
306
|
|
|
282
307
|
elif how == "identity":
|
|
283
308
|
overlayed = _identity(
|
|
284
|
-
pairs,
|
|
309
|
+
pairs,
|
|
310
|
+
df1,
|
|
311
|
+
left,
|
|
312
|
+
grid_size=grid_size,
|
|
313
|
+
geom_type=geom_type,
|
|
314
|
+
overlay_runner=overlay_runner,
|
|
315
|
+
union_runner=union_runner,
|
|
285
316
|
)
|
|
286
317
|
|
|
287
318
|
elif how == "union":
|
|
@@ -294,7 +325,8 @@ def _shapely_pd_overlay(
|
|
|
294
325
|
grid_size=grid_size,
|
|
295
326
|
rsuffix=rsuffix,
|
|
296
327
|
geom_type=geom_type,
|
|
297
|
-
|
|
328
|
+
overlay_runner=overlay_runner,
|
|
329
|
+
union_runner=union_runner,
|
|
298
330
|
)
|
|
299
331
|
|
|
300
332
|
elif how == "update":
|
|
@@ -304,8 +336,9 @@ def _shapely_pd_overlay(
|
|
|
304
336
|
df2,
|
|
305
337
|
left=left,
|
|
306
338
|
grid_size=grid_size,
|
|
307
|
-
n_jobs=n_jobs,
|
|
308
339
|
geom_type=geom_type,
|
|
340
|
+
overlay_runner=overlay_runner,
|
|
341
|
+
union_runner=union_runner,
|
|
309
342
|
)
|
|
310
343
|
|
|
311
344
|
assert isinstance(overlayed, list)
|
|
@@ -323,8 +356,9 @@ def _shapely_pd_overlay(
|
|
|
323
356
|
overlayed = _add_suffix_left(overlayed, df1, df2, lsuffix)
|
|
324
357
|
|
|
325
358
|
overlayed["geometry"] = make_valid(overlayed["geometry"])
|
|
326
|
-
|
|
327
|
-
|
|
359
|
+
overlayed = overlayed.loc[
|
|
360
|
+
lambda x: (x["geometry"].notna().values) & (~is_empty(x["geometry"].values))
|
|
361
|
+
]
|
|
328
362
|
|
|
329
363
|
return overlayed
|
|
330
364
|
|
|
@@ -336,115 +370,38 @@ def _update(
|
|
|
336
370
|
left: np.ndarray,
|
|
337
371
|
grid_size: float | None | int,
|
|
338
372
|
geom_type: str | None,
|
|
339
|
-
|
|
373
|
+
overlay_runner: OverlayRunner,
|
|
374
|
+
union_runner: UnionRunner,
|
|
340
375
|
) -> GeoDataFrame:
|
|
341
376
|
overlayed = _difference(
|
|
342
|
-
pairs,
|
|
377
|
+
pairs,
|
|
378
|
+
df1,
|
|
379
|
+
left,
|
|
380
|
+
grid_size=grid_size,
|
|
381
|
+
geom_type=geom_type,
|
|
382
|
+
overlay_runner=overlay_runner,
|
|
383
|
+
union_runner=union_runner,
|
|
343
384
|
)
|
|
344
385
|
|
|
345
386
|
return overlayed + [df2]
|
|
346
387
|
|
|
347
388
|
|
|
348
|
-
def _run_overlay_dask(
|
|
349
|
-
arr1: np.ndarray,
|
|
350
|
-
arr2: np.ndarray,
|
|
351
|
-
func: Callable,
|
|
352
|
-
n_jobs: int,
|
|
353
|
-
grid_size: float | int | None,
|
|
354
|
-
) -> np.ndarray:
|
|
355
|
-
if len(arr1) // n_jobs <= 1:
|
|
356
|
-
try:
|
|
357
|
-
return func(arr1, arr2, grid_size=grid_size)
|
|
358
|
-
except TypeError as e:
|
|
359
|
-
raise TypeError(e, {type(x) for x in arr1}, {type(x) for x in arr2}) from e
|
|
360
|
-
arr1 = da.from_array(arr1, chunks=len(arr1) // n_jobs)
|
|
361
|
-
arr2 = da.from_array(arr2, chunks=len(arr2) // n_jobs)
|
|
362
|
-
res = arr1.map_blocks(func, arr2, grid_size=grid_size, dtype=float)
|
|
363
|
-
return res.compute(scheduler="threads", optimize_graph=False, num_workers=n_jobs)
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
def _run_overlay_joblib_threading(
|
|
367
|
-
arr1: np.ndarray,
|
|
368
|
-
arr2: np.ndarray,
|
|
369
|
-
func: Callable,
|
|
370
|
-
n_jobs: int,
|
|
371
|
-
grid_size: int | float | None,
|
|
372
|
-
) -> list[Geometry]:
|
|
373
|
-
if len(arr1) // n_jobs <= 1:
|
|
374
|
-
try:
|
|
375
|
-
return func(arr1, arr2, grid_size=grid_size)
|
|
376
|
-
except TypeError as e:
|
|
377
|
-
raise TypeError(e, {type(x) for x in arr1}, {type(x) for x in arr2}) from e
|
|
378
|
-
with joblib.Parallel(n_jobs=n_jobs, backend="threading") as parallel:
|
|
379
|
-
return parallel(
|
|
380
|
-
joblib.delayed(func)(g1, g2, grid_size=grid_size)
|
|
381
|
-
for g1, g2 in zip(arr1, arr2, strict=True)
|
|
382
|
-
)
|
|
383
|
-
|
|
384
|
-
|
|
385
389
|
def _intersection(
|
|
386
390
|
pairs: pd.DataFrame,
|
|
387
391
|
grid_size: None | float | int,
|
|
388
392
|
geom_type: str | None,
|
|
389
|
-
|
|
393
|
+
overlay_runner: OverlayRunner,
|
|
390
394
|
) -> GeoDataFrame:
|
|
391
395
|
if not len(pairs):
|
|
392
396
|
return pairs.drop(columns="geom_right")
|
|
393
|
-
|
|
394
397
|
intersections = pairs.copy()
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
arr1,
|
|
403
|
-
arr2,
|
|
404
|
-
func=intersection,
|
|
405
|
-
n_jobs=n_jobs,
|
|
406
|
-
grid_size=grid_size,
|
|
407
|
-
)
|
|
408
|
-
except GEOSException:
|
|
409
|
-
arr1 = make_valid_and_keep_geom_type(
|
|
410
|
-
arr1, geom_type=geom_type, n_jobs=n_jobs
|
|
411
|
-
)
|
|
412
|
-
arr2 = make_valid_and_keep_geom_type(
|
|
413
|
-
arr2, geom_type=geom_type, n_jobs=n_jobs
|
|
414
|
-
)
|
|
415
|
-
arr1 = arr1.loc[lambda x: x.index.isin(arr2.index)]
|
|
416
|
-
arr2 = arr2.loc[lambda x: x.index.isin(arr1.index)]
|
|
417
|
-
|
|
418
|
-
res = _run_overlay_joblib_threading(
|
|
419
|
-
arr1.to_numpy(),
|
|
420
|
-
arr2.to_numpy(),
|
|
421
|
-
func=intersection,
|
|
422
|
-
n_jobs=n_jobs,
|
|
423
|
-
grid_size=grid_size,
|
|
424
|
-
)
|
|
425
|
-
intersections["geometry"] = res
|
|
426
|
-
return intersections.drop(columns="geom_right")
|
|
427
|
-
|
|
428
|
-
try:
|
|
429
|
-
intersections["geometry"] = intersection(
|
|
430
|
-
intersections["geometry"].to_numpy(),
|
|
431
|
-
intersections["geom_right"].to_numpy(),
|
|
432
|
-
grid_size=grid_size,
|
|
433
|
-
)
|
|
434
|
-
except GEOSException:
|
|
435
|
-
left = make_valid_and_keep_geom_type(
|
|
436
|
-
intersections["geometry"].to_numpy(), geom_type, n_jobs=n_jobs
|
|
437
|
-
)
|
|
438
|
-
right = make_valid_and_keep_geom_type(
|
|
439
|
-
intersections["geom_right"].to_numpy(), geom_type, n_jobs=n_jobs
|
|
440
|
-
)
|
|
441
|
-
left = left.loc[lambda x: x.index.isin(right.index)]
|
|
442
|
-
right = right.loc[lambda x: x.index.isin(left.index)]
|
|
443
|
-
|
|
444
|
-
intersections["geometry"] = intersection(
|
|
445
|
-
left.to_numpy(), right.to_numpy(), grid_size=grid_size
|
|
446
|
-
)
|
|
447
|
-
|
|
398
|
+
intersections["geometry"] = overlay_runner.run(
|
|
399
|
+
intersection,
|
|
400
|
+
intersections["geometry"].to_numpy(),
|
|
401
|
+
intersections["geom_right"].to_numpy(),
|
|
402
|
+
grid_size=grid_size,
|
|
403
|
+
geom_type=geom_type,
|
|
404
|
+
)
|
|
448
405
|
return intersections.drop(columns="geom_right")
|
|
449
406
|
|
|
450
407
|
|
|
@@ -457,12 +414,16 @@ def _union(
|
|
|
457
414
|
grid_size: int | float | None,
|
|
458
415
|
rsuffix: str,
|
|
459
416
|
geom_type: str | None,
|
|
460
|
-
|
|
417
|
+
overlay_runner: OverlayRunner,
|
|
418
|
+
union_runner: UnionRunner,
|
|
461
419
|
) -> list[GeoDataFrame]:
|
|
462
420
|
merged = []
|
|
463
421
|
if len(left):
|
|
464
422
|
intersections = _intersection(
|
|
465
|
-
pairs,
|
|
423
|
+
pairs,
|
|
424
|
+
grid_size=grid_size,
|
|
425
|
+
geom_type=geom_type,
|
|
426
|
+
overlay_runner=overlay_runner,
|
|
466
427
|
)
|
|
467
428
|
merged.append(intersections)
|
|
468
429
|
symmdiff = _symmetric_difference(
|
|
@@ -474,7 +435,8 @@ def _union(
|
|
|
474
435
|
grid_size=grid_size,
|
|
475
436
|
rsuffix=rsuffix,
|
|
476
437
|
geom_type=geom_type,
|
|
477
|
-
|
|
438
|
+
overlay_runner=overlay_runner,
|
|
439
|
+
union_runner=union_runner,
|
|
478
440
|
)
|
|
479
441
|
merged += symmdiff
|
|
480
442
|
return merged
|
|
@@ -486,15 +448,27 @@ def _identity(
|
|
|
486
448
|
left: np.ndarray,
|
|
487
449
|
grid_size: int | float | None,
|
|
488
450
|
geom_type: str | None,
|
|
489
|
-
|
|
451
|
+
overlay_runner: OverlayRunner,
|
|
452
|
+
union_runner: UnionRunner,
|
|
490
453
|
) -> list[GeoDataFrame]:
|
|
491
454
|
merged = []
|
|
492
455
|
if len(left):
|
|
493
456
|
intersections = _intersection(
|
|
494
|
-
pairs,
|
|
457
|
+
pairs,
|
|
458
|
+
grid_size=grid_size,
|
|
459
|
+
geom_type=geom_type,
|
|
460
|
+
overlay_runner=overlay_runner,
|
|
495
461
|
)
|
|
496
462
|
merged.append(intersections)
|
|
497
|
-
diff = _difference(
|
|
463
|
+
diff = _difference(
|
|
464
|
+
pairs,
|
|
465
|
+
df1,
|
|
466
|
+
left,
|
|
467
|
+
geom_type=geom_type,
|
|
468
|
+
grid_size=grid_size,
|
|
469
|
+
overlay_runner=overlay_runner,
|
|
470
|
+
union_runner=union_runner,
|
|
471
|
+
)
|
|
498
472
|
merged += diff
|
|
499
473
|
return merged
|
|
500
474
|
|
|
@@ -508,12 +482,19 @@ def _symmetric_difference(
|
|
|
508
482
|
grid_size: int | float | None,
|
|
509
483
|
rsuffix: str,
|
|
510
484
|
geom_type: str | None,
|
|
511
|
-
|
|
485
|
+
overlay_runner: OverlayRunner,
|
|
486
|
+
union_runner: UnionRunner,
|
|
512
487
|
) -> list[GeoDataFrame]:
|
|
513
488
|
merged = []
|
|
514
489
|
|
|
515
490
|
difference_left = _difference(
|
|
516
|
-
pairs,
|
|
491
|
+
pairs,
|
|
492
|
+
df1,
|
|
493
|
+
left,
|
|
494
|
+
grid_size=grid_size,
|
|
495
|
+
geom_type=geom_type,
|
|
496
|
+
overlay_runner=overlay_runner,
|
|
497
|
+
union_runner=union_runner,
|
|
517
498
|
)
|
|
518
499
|
merged += difference_left
|
|
519
500
|
|
|
@@ -525,7 +506,8 @@ def _symmetric_difference(
|
|
|
525
506
|
grid_size=grid_size,
|
|
526
507
|
rsuffix=rsuffix,
|
|
527
508
|
geom_type=geom_type,
|
|
528
|
-
|
|
509
|
+
overlay_runner=overlay_runner,
|
|
510
|
+
union_runner=union_runner,
|
|
529
511
|
)
|
|
530
512
|
merged.append(clip_right)
|
|
531
513
|
|
|
@@ -539,9 +521,10 @@ def _difference(
|
|
|
539
521
|
pairs: pd.DataFrame,
|
|
540
522
|
df1: pd.DataFrame,
|
|
541
523
|
left: np.ndarray,
|
|
542
|
-
grid_size: int | float | None
|
|
543
|
-
geom_type: str | None
|
|
544
|
-
|
|
524
|
+
grid_size: int | float | None,
|
|
525
|
+
geom_type: str | None,
|
|
526
|
+
overlay_runner: OverlayRunner,
|
|
527
|
+
union_runner: UnionRunner,
|
|
545
528
|
) -> list[GeoDataFrame]:
|
|
546
529
|
merged = []
|
|
547
530
|
if len(left):
|
|
@@ -550,7 +533,8 @@ def _difference(
|
|
|
550
533
|
df1=df1,
|
|
551
534
|
grid_size=grid_size,
|
|
552
535
|
geom_type=geom_type,
|
|
553
|
-
|
|
536
|
+
overlay_runner=overlay_runner,
|
|
537
|
+
union_runner=union_runner,
|
|
554
538
|
)
|
|
555
539
|
merged.append(clip_left)
|
|
556
540
|
diff_left = _add_indices_from_left(df1, left)
|
|
@@ -618,7 +602,8 @@ def _shapely_diffclip_left(
|
|
|
618
602
|
df1: pd.DataFrame,
|
|
619
603
|
grid_size: int | float | None,
|
|
620
604
|
geom_type: str | None,
|
|
621
|
-
|
|
605
|
+
overlay_runner: OverlayRunner,
|
|
606
|
+
union_runner: UnionRunner,
|
|
622
607
|
) -> pd.DataFrame:
|
|
623
608
|
"""Aggregate areas in right by unique values from left, then erases those from left."""
|
|
624
609
|
keep_cols = list(df1.columns.difference({"_overlay_index_right"})) + ["geom_right"]
|
|
@@ -675,12 +660,14 @@ def _shapely_diffclip_left(
|
|
|
675
660
|
}
|
|
676
661
|
)
|
|
677
662
|
|
|
678
|
-
agged =
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
)
|
|
663
|
+
agged = union_runner.run(agger["geom_right"], level=0)
|
|
664
|
+
# agged = pd.Series(
|
|
665
|
+
|
|
666
|
+
# {
|
|
667
|
+
# i: agg_geoms_partial(geoms)
|
|
668
|
+
# for i, geoms in agger.groupby(level=0)["geom_right"]
|
|
669
|
+
# }
|
|
670
|
+
# )
|
|
684
671
|
many_hits_agged["geom_right"] = inverse_index_mapper.map(agged)
|
|
685
672
|
many_hits_agged = many_hits_agged.drop(columns=["_right_indices"])
|
|
686
673
|
|
|
@@ -688,15 +675,19 @@ def _shapely_diffclip_left(
|
|
|
688
675
|
except IndexError:
|
|
689
676
|
clip_left = pairs.loc[:, list(keep_cols)]
|
|
690
677
|
|
|
691
|
-
assert clip_left["geometry"].notna().all()
|
|
692
|
-
|
|
678
|
+
assert clip_left["geometry"].notna().all(), clip_left["geometry"][
|
|
679
|
+
lambda x: x.isna()
|
|
680
|
+
]
|
|
681
|
+
assert clip_left["geom_right"].notna().all(), clip_left["geom_right"][
|
|
682
|
+
lambda x: x.isna()
|
|
683
|
+
]
|
|
693
684
|
|
|
694
|
-
clip_left["geometry"] =
|
|
685
|
+
clip_left["geometry"] = overlay_runner.run(
|
|
686
|
+
difference,
|
|
695
687
|
clip_left["geometry"].to_numpy(),
|
|
696
688
|
clip_left["geom_right"].to_numpy(),
|
|
697
689
|
grid_size=grid_size,
|
|
698
690
|
geom_type=geom_type,
|
|
699
|
-
n_jobs=n_jobs,
|
|
700
691
|
)
|
|
701
692
|
|
|
702
693
|
return clip_left.drop(columns="geom_right")
|
|
@@ -709,7 +700,8 @@ def _shapely_diffclip_right(
|
|
|
709
700
|
grid_size: int | float | None,
|
|
710
701
|
rsuffix: str,
|
|
711
702
|
geom_type: str | None,
|
|
712
|
-
|
|
703
|
+
overlay_runner: OverlayRunner,
|
|
704
|
+
union_runner: UnionRunner,
|
|
713
705
|
) -> pd.DataFrame:
|
|
714
706
|
agg_geoms_partial = functools.partial(_agg_geoms, grid_size=grid_size)
|
|
715
707
|
|
|
@@ -720,16 +712,22 @@ def _shapely_diffclip_right(
|
|
|
720
712
|
one_hit = pairs[only_one].set_index("_overlay_index_right")[
|
|
721
713
|
["geom_left", "geometry"]
|
|
722
714
|
]
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
"geom_left": agg_geoms_partial,
|
|
729
|
-
"geometry": "first",
|
|
730
|
-
}
|
|
731
|
-
)
|
|
715
|
+
many_hits_ungrouped = pairs[~only_one].set_index("_overlay_index_right")
|
|
716
|
+
many_hits = pd.DataFrame(index=many_hits_ungrouped.index.unique())
|
|
717
|
+
many_hits["geometry"] = many_hits_ungrouped.groupby(level=0)["geometry"].first()
|
|
718
|
+
many_hits["geom_left"] = union_runner.run(
|
|
719
|
+
many_hits_ungrouped["geom_left"], level=0
|
|
732
720
|
)
|
|
721
|
+
# many_hits = (
|
|
722
|
+
# pairs[~only_one]
|
|
723
|
+
# .groupby("_overlay_index_right")
|
|
724
|
+
# .agg(
|
|
725
|
+
# {
|
|
726
|
+
# "geom_left": agg_geoms_partial,
|
|
727
|
+
# "geometry": "first",
|
|
728
|
+
# }
|
|
729
|
+
# )
|
|
730
|
+
# )
|
|
733
731
|
clip_right = (
|
|
734
732
|
pd.concat([one_hit, many_hits])
|
|
735
733
|
.join(df2.drop(columns=["geometry"]))
|
|
@@ -748,10 +746,15 @@ def _shapely_diffclip_right(
|
|
|
748
746
|
}
|
|
749
747
|
)
|
|
750
748
|
|
|
751
|
-
assert clip_right["geometry"].notna().all()
|
|
752
|
-
|
|
749
|
+
assert clip_right["geometry"].notna().all(), clip_right["geometry"][
|
|
750
|
+
lambda x: x.isna()
|
|
751
|
+
]
|
|
752
|
+
assert clip_right["geom_left"].notna().all(), clip_right["geom_left"][
|
|
753
|
+
lambda x: x.isna()
|
|
754
|
+
]
|
|
753
755
|
|
|
754
|
-
clip_right["geometry"] =
|
|
756
|
+
clip_right["geometry"] = overlay_runner.run(
|
|
757
|
+
difference,
|
|
755
758
|
clip_right["geometry"].to_numpy(),
|
|
756
759
|
clip_right["geom_left"].to_numpy(),
|
|
757
760
|
grid_size=grid_size,
|
|
@@ -761,87 +764,5 @@ def _shapely_diffclip_right(
|
|
|
761
764
|
return clip_right.drop(columns="geom_left")
|
|
762
765
|
|
|
763
766
|
|
|
764
|
-
def _try_difference(
|
|
765
|
-
left: np.ndarray,
|
|
766
|
-
right: np.ndarray,
|
|
767
|
-
grid_size: int | float | None,
|
|
768
|
-
geom_type: str | None,
|
|
769
|
-
n_jobs: int = 1,
|
|
770
|
-
) -> np.ndarray:
|
|
771
|
-
"""Try difference overlay, then make_valid and retry."""
|
|
772
|
-
if n_jobs > 1 and len(left) / n_jobs > 10:
|
|
773
|
-
try:
|
|
774
|
-
return _run_overlay_joblib_threading(
|
|
775
|
-
left,
|
|
776
|
-
right,
|
|
777
|
-
func=difference,
|
|
778
|
-
n_jobs=n_jobs,
|
|
779
|
-
grid_size=grid_size,
|
|
780
|
-
)
|
|
781
|
-
except GEOSException:
|
|
782
|
-
left = make_valid_and_keep_geom_type(
|
|
783
|
-
left, geom_type=geom_type, n_jobs=n_jobs
|
|
784
|
-
)
|
|
785
|
-
right = make_valid_and_keep_geom_type(
|
|
786
|
-
right, geom_type=geom_type, n_jobs=n_jobs
|
|
787
|
-
)
|
|
788
|
-
left = left.loc[lambda x: x.index.isin(right.index)]
|
|
789
|
-
right = right.loc[lambda x: x.index.isin(left.index)]
|
|
790
|
-
|
|
791
|
-
return _run_overlay_joblib_threading(
|
|
792
|
-
left.to_numpy(),
|
|
793
|
-
right.to_numpy(),
|
|
794
|
-
func=difference,
|
|
795
|
-
n_jobs=n_jobs,
|
|
796
|
-
grid_size=grid_size,
|
|
797
|
-
)
|
|
798
|
-
|
|
799
|
-
try:
|
|
800
|
-
return difference(
|
|
801
|
-
left,
|
|
802
|
-
right,
|
|
803
|
-
grid_size=grid_size,
|
|
804
|
-
)
|
|
805
|
-
except GEOSException:
|
|
806
|
-
left = make_valid_and_keep_geom_type(left, geom_type, n_jobs=n_jobs)
|
|
807
|
-
right = make_valid_and_keep_geom_type(right, geom_type, n_jobs=n_jobs)
|
|
808
|
-
left = left.loc[lambda x: x.index.isin(right.index)]
|
|
809
|
-
right = right.loc[lambda x: x.index.isin(left.index)]
|
|
810
|
-
try:
|
|
811
|
-
return difference(
|
|
812
|
-
left.to_numpy(),
|
|
813
|
-
right.to_numpy(),
|
|
814
|
-
grid_size=grid_size,
|
|
815
|
-
)
|
|
816
|
-
except GEOSException as e:
|
|
817
|
-
raise e.__class__(e, f"{grid_size=}", f"{left=}", f"{right=}") from e
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
def make_valid_and_keep_geom_type(
|
|
821
|
-
geoms: np.ndarray, geom_type: str, n_jobs: int
|
|
822
|
-
) -> GeoSeries:
|
|
823
|
-
"""Make GeometryCollections into (Multi)Polygons, (Multi)LineStrings or (Multi)Points.
|
|
824
|
-
|
|
825
|
-
Because GeometryCollections might appear after dissolving (unary_union).
|
|
826
|
-
And this makes shapely difference/intersection fail.
|
|
827
|
-
|
|
828
|
-
Args:
|
|
829
|
-
geoms: Array of geometries.
|
|
830
|
-
geom_type: geometry type to be kept.
|
|
831
|
-
n_jobs: Number of treads.
|
|
832
|
-
"""
|
|
833
|
-
geoms = GeoSeries(geoms)
|
|
834
|
-
geoms.index = range(len(geoms))
|
|
835
|
-
geoms.loc[:] = make_valid(geoms.to_numpy())
|
|
836
|
-
geoms_with_correct_type = geoms.explode(index_parts=False).pipe(
|
|
837
|
-
to_single_geom_type, geom_type
|
|
838
|
-
)
|
|
839
|
-
only_one = geoms_with_correct_type.groupby(level=0).transform("size") == 1
|
|
840
|
-
one_hit = geoms_with_correct_type[only_one]
|
|
841
|
-
many_hits = geoms_with_correct_type[~only_one].groupby(level=0).agg(unary_union)
|
|
842
|
-
geoms_with_wrong_type = geoms.loc[~geoms.index.isin(geoms_with_correct_type.index)]
|
|
843
|
-
return pd.concat([one_hit, many_hits, geoms_with_wrong_type]).sort_index()
|
|
844
|
-
|
|
845
|
-
|
|
846
767
|
def _agg_geoms(g: np.ndarray, grid_size: int | float | None = None) -> Geometry:
|
|
847
|
-
return make_valid(
|
|
768
|
+
return make_valid(union_all(g, grid_size=grid_size))
|