ssb-sgis 1.1.17__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sgis/__init__.py +5 -0
- sgis/conf.py +18 -0
- sgis/geopandas_tools/buffer_dissolve_explode.py +25 -47
- sgis/geopandas_tools/conversion.py +18 -25
- sgis/geopandas_tools/duplicates.py +45 -60
- sgis/geopandas_tools/general.py +69 -114
- sgis/geopandas_tools/neighbors.py +25 -4
- sgis/geopandas_tools/overlay.py +178 -256
- sgis/geopandas_tools/polygon_operations.py +68 -88
- sgis/geopandas_tools/runners.py +326 -0
- sgis/geopandas_tools/sfilter.py +42 -24
- sgis/geopandas_tools/utils.py +37 -0
- sgis/helpers.py +1 -1
- sgis/io/dapla_functions.py +96 -107
- sgis/maps/map.py +3 -1
- sgis/parallel/parallel.py +32 -24
- sgis/raster/image_collection.py +184 -162
- sgis/raster/indices.py +0 -1
- {ssb_sgis-1.1.17.dist-info → ssb_sgis-1.2.1.dist-info}/METADATA +1 -1
- {ssb_sgis-1.1.17.dist-info → ssb_sgis-1.2.1.dist-info}/RECORD +22 -20
- {ssb_sgis-1.1.17.dist-info → ssb_sgis-1.2.1.dist-info}/LICENSE +0 -0
- {ssb_sgis-1.1.17.dist-info → ssb_sgis-1.2.1.dist-info}/WHEEL +0 -0
sgis/geopandas_tools/overlay.py
CHANGED
|
@@ -9,34 +9,30 @@ version of the solution from GH 2792.
|
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
11
|
import functools
|
|
12
|
-
from collections.abc import Callable
|
|
13
12
|
|
|
14
13
|
import geopandas as gpd
|
|
15
|
-
import joblib
|
|
16
14
|
import numpy as np
|
|
17
15
|
import pandas as pd
|
|
18
16
|
from geopandas import GeoDataFrame
|
|
19
|
-
from geopandas import GeoSeries
|
|
20
17
|
from pandas import DataFrame
|
|
21
18
|
from shapely import Geometry
|
|
22
|
-
from shapely import STRtree
|
|
23
19
|
from shapely import box
|
|
24
20
|
from shapely import difference
|
|
25
21
|
from shapely import intersection
|
|
22
|
+
from shapely import is_empty
|
|
26
23
|
from shapely import make_valid
|
|
27
|
-
from shapely import
|
|
28
|
-
from shapely.errors import GEOSException
|
|
29
|
-
|
|
30
|
-
try:
|
|
31
|
-
import dask.array as da
|
|
32
|
-
except ImportError:
|
|
33
|
-
pass
|
|
24
|
+
from shapely import union_all
|
|
34
25
|
|
|
26
|
+
from ..conf import _get_instance
|
|
27
|
+
from ..conf import config
|
|
35
28
|
from .general import _determine_geom_type_args
|
|
36
29
|
from .general import clean_geoms
|
|
37
30
|
from .geometry_types import get_geom_type
|
|
38
31
|
from .geometry_types import make_all_singlepart
|
|
39
32
|
from .geometry_types import to_single_geom_type
|
|
33
|
+
from .runners import OverlayRunner
|
|
34
|
+
from .runners import RTreeQueryRunner
|
|
35
|
+
from .runners import UnionRunner
|
|
40
36
|
|
|
41
37
|
DEFAULT_GRID_SIZE = None
|
|
42
38
|
DEFAULT_LSUFFIX = "_1"
|
|
@@ -51,9 +47,12 @@ def clean_overlay(
|
|
|
51
47
|
geom_type: str | None = None,
|
|
52
48
|
predicate: str | None = "intersects",
|
|
53
49
|
grid_size: float | None = None,
|
|
54
|
-
n_jobs: int = 1,
|
|
55
50
|
lsuffix: str = DEFAULT_LSUFFIX,
|
|
56
51
|
rsuffix: str = DEFAULT_RSUFFIX,
|
|
52
|
+
n_jobs: int = 1,
|
|
53
|
+
rtree_runner: RTreeQueryRunner | None = None,
|
|
54
|
+
union_runner: UnionRunner | None = None,
|
|
55
|
+
overlay_runner: OverlayRunner | None = None,
|
|
57
56
|
) -> GeoDataFrame:
|
|
58
57
|
"""Fixes and explodes geometries before doing a shapely overlay, then cleans up.
|
|
59
58
|
|
|
@@ -74,10 +73,16 @@ def clean_overlay(
|
|
|
74
73
|
"point".
|
|
75
74
|
grid_size: Precision grid size to round the geometries. Will use the highest
|
|
76
75
|
precision of the inputs by default.
|
|
77
|
-
n_jobs: number of threads.
|
|
78
76
|
predicate: Spatial predicate in the spatial tree.
|
|
79
77
|
lsuffix: Suffix of columns in df1 that are also in df2.
|
|
80
78
|
rsuffix: Suffix of columns in df2 that are also in df1.
|
|
79
|
+
n_jobs: number of jobs. Defaults to 1.
|
|
80
|
+
union_runner: Optionally debug/manipulate the spatial union operations.
|
|
81
|
+
See the 'runners' module for example implementations.
|
|
82
|
+
rtree_runner: Optionally debug/manipulate the spatial indexing operations.
|
|
83
|
+
See the 'runners' module for example implementations.
|
|
84
|
+
overlay_runner: Optionally debug/manipulate the spatial overlay operations.
|
|
85
|
+
See the 'runners' module for example implementations.
|
|
81
86
|
|
|
82
87
|
Returns:
|
|
83
88
|
GeoDataFrame with overlayed and fixed geometries and columns from both
|
|
@@ -104,6 +109,13 @@ def clean_overlay(
|
|
|
104
109
|
if df1.crs != df2.crs:
|
|
105
110
|
raise ValueError(f"'crs' mismatch. Got {df1.crs} and {df2.crs}")
|
|
106
111
|
|
|
112
|
+
if rtree_runner is None:
|
|
113
|
+
rtree_runner = _get_instance(config, "rtree_runner", n_jobs=n_jobs)
|
|
114
|
+
if union_runner is None:
|
|
115
|
+
union_runner = _get_instance(config, "union_runner", n_jobs=n_jobs)
|
|
116
|
+
if overlay_runner is None:
|
|
117
|
+
overlay_runner = _get_instance(config, "overlay_runner", n_jobs=n_jobs)
|
|
118
|
+
|
|
107
119
|
crs = df1.crs
|
|
108
120
|
|
|
109
121
|
# original_geom_type = geom_type
|
|
@@ -148,13 +160,16 @@ def clean_overlay(
|
|
|
148
160
|
box1 = box(*df1.total_bounds)
|
|
149
161
|
box2 = box(*df2.total_bounds)
|
|
150
162
|
|
|
151
|
-
if not
|
|
163
|
+
if not grid_size and (
|
|
164
|
+
(not len(df1) or not len(df2))
|
|
165
|
+
or (not box1.intersects(box2) and how == "intersection")
|
|
166
|
+
):
|
|
152
167
|
return _no_intersections_return(df1, df2, how, lsuffix, rsuffix)
|
|
153
168
|
|
|
154
|
-
if df1.
|
|
169
|
+
if df1.geometry.name != "geometry":
|
|
155
170
|
df1 = df1.rename_geometry("geometry")
|
|
156
171
|
|
|
157
|
-
if df2.
|
|
172
|
+
if df2.geometry.name != "geometry":
|
|
158
173
|
df2 = df2.rename_geometry("geometry")
|
|
159
174
|
|
|
160
175
|
# to pandas because GeoDataFrame constructor is expensive
|
|
@@ -171,8 +186,10 @@ def clean_overlay(
|
|
|
171
186
|
lsuffix=lsuffix,
|
|
172
187
|
rsuffix=rsuffix,
|
|
173
188
|
geom_type=geom_type,
|
|
174
|
-
n_jobs=n_jobs,
|
|
175
189
|
predicate=predicate,
|
|
190
|
+
rtree_runner=rtree_runner,
|
|
191
|
+
overlay_runner=overlay_runner,
|
|
192
|
+
union_runner=union_runner,
|
|
176
193
|
),
|
|
177
194
|
geometry="geometry",
|
|
178
195
|
crs=crs,
|
|
@@ -188,9 +205,9 @@ def clean_overlay(
|
|
|
188
205
|
|
|
189
206
|
|
|
190
207
|
def _join_and_get_no_rows(df1, df2, lsuffix, rsuffix):
|
|
191
|
-
geom_col = df1.
|
|
208
|
+
geom_col = df1.geometry.name
|
|
192
209
|
df1_cols = df1.columns.difference({geom_col})
|
|
193
|
-
df2_cols = df2.columns.difference({df2.
|
|
210
|
+
df2_cols = df2.columns.difference({df2.geometry.name})
|
|
194
211
|
cols_with_suffix = [
|
|
195
212
|
f"{col}{lsuffix}" if col in df2_cols else col for col in df1_cols
|
|
196
213
|
] + [f"{col}{rsuffix}" if col in df1_cols else col for col in df2_cols]
|
|
@@ -215,7 +232,7 @@ def _no_intersections_return(
|
|
|
215
232
|
if how == "identity":
|
|
216
233
|
# add suffixes and return df1
|
|
217
234
|
df_template = _join_and_get_no_rows(df1, df2, lsuffix, rsuffix)
|
|
218
|
-
df2_cols = df2.columns.difference({df2.
|
|
235
|
+
df2_cols = df2.columns.difference({df2.geometry.name})
|
|
219
236
|
df1.columns = [f"{col}{lsuffix}" if col in df2_cols else col for col in df1]
|
|
220
237
|
return pd.concat([df_template, df1], ignore_index=True)
|
|
221
238
|
|
|
@@ -237,33 +254,41 @@ def _shapely_pd_overlay(
|
|
|
237
254
|
df1: DataFrame,
|
|
238
255
|
df2: DataFrame,
|
|
239
256
|
how: str,
|
|
240
|
-
grid_size: float
|
|
241
|
-
predicate: str
|
|
242
|
-
lsuffix: str
|
|
243
|
-
rsuffix: str
|
|
244
|
-
geom_type: str | None
|
|
245
|
-
|
|
257
|
+
grid_size: float,
|
|
258
|
+
predicate: str,
|
|
259
|
+
lsuffix: str,
|
|
260
|
+
rsuffix: str,
|
|
261
|
+
geom_type: str | None,
|
|
262
|
+
rtree_runner: RTreeQueryRunner,
|
|
263
|
+
overlay_runner: OverlayRunner,
|
|
264
|
+
union_runner: UnionRunner,
|
|
246
265
|
) -> DataFrame:
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
tree = STRtree(df2.geometry.values)
|
|
251
|
-
left, right = tree.query(df1.geometry.values, predicate=predicate)
|
|
252
|
-
|
|
266
|
+
left, right = rtree_runner.run(
|
|
267
|
+
df1.geometry.values, df2.geometry.values, predicate=predicate
|
|
268
|
+
)
|
|
253
269
|
pairs = _get_intersects_pairs(df1, df2, left, right, rsuffix)
|
|
254
|
-
assert pairs
|
|
255
|
-
assert pairs
|
|
270
|
+
assert pairs["geometry"].notna().all(), pairs.geometry[lambda x: x.isna()]
|
|
271
|
+
assert pairs["geom_right"].notna().all(), pairs.geom_right[lambda x: x.isna()]
|
|
256
272
|
|
|
257
273
|
if how == "intersection":
|
|
258
274
|
overlayed = [
|
|
259
275
|
_intersection(
|
|
260
|
-
pairs,
|
|
276
|
+
pairs,
|
|
277
|
+
grid_size=grid_size,
|
|
278
|
+
geom_type=geom_type,
|
|
279
|
+
overlay_runner=overlay_runner,
|
|
261
280
|
)
|
|
262
281
|
]
|
|
263
282
|
|
|
264
283
|
elif how == "difference":
|
|
265
284
|
overlayed = _difference(
|
|
266
|
-
pairs,
|
|
285
|
+
pairs,
|
|
286
|
+
df1,
|
|
287
|
+
left,
|
|
288
|
+
grid_size=grid_size,
|
|
289
|
+
geom_type=geom_type,
|
|
290
|
+
overlay_runner=overlay_runner,
|
|
291
|
+
union_runner=union_runner,
|
|
267
292
|
)
|
|
268
293
|
|
|
269
294
|
elif how == "symmetric_difference":
|
|
@@ -276,12 +301,19 @@ def _shapely_pd_overlay(
|
|
|
276
301
|
grid_size=grid_size,
|
|
277
302
|
rsuffix=rsuffix,
|
|
278
303
|
geom_type=geom_type,
|
|
279
|
-
|
|
304
|
+
overlay_runner=overlay_runner,
|
|
305
|
+
union_runner=union_runner,
|
|
280
306
|
)
|
|
281
307
|
|
|
282
308
|
elif how == "identity":
|
|
283
309
|
overlayed = _identity(
|
|
284
|
-
pairs,
|
|
310
|
+
pairs,
|
|
311
|
+
df1,
|
|
312
|
+
left,
|
|
313
|
+
grid_size=grid_size,
|
|
314
|
+
geom_type=geom_type,
|
|
315
|
+
overlay_runner=overlay_runner,
|
|
316
|
+
union_runner=union_runner,
|
|
285
317
|
)
|
|
286
318
|
|
|
287
319
|
elif how == "union":
|
|
@@ -294,7 +326,8 @@ def _shapely_pd_overlay(
|
|
|
294
326
|
grid_size=grid_size,
|
|
295
327
|
rsuffix=rsuffix,
|
|
296
328
|
geom_type=geom_type,
|
|
297
|
-
|
|
329
|
+
overlay_runner=overlay_runner,
|
|
330
|
+
union_runner=union_runner,
|
|
298
331
|
)
|
|
299
332
|
|
|
300
333
|
elif how == "update":
|
|
@@ -304,8 +337,9 @@ def _shapely_pd_overlay(
|
|
|
304
337
|
df2,
|
|
305
338
|
left=left,
|
|
306
339
|
grid_size=grid_size,
|
|
307
|
-
n_jobs=n_jobs,
|
|
308
340
|
geom_type=geom_type,
|
|
341
|
+
overlay_runner=overlay_runner,
|
|
342
|
+
union_runner=union_runner,
|
|
309
343
|
)
|
|
310
344
|
|
|
311
345
|
assert isinstance(overlayed, list)
|
|
@@ -323,8 +357,9 @@ def _shapely_pd_overlay(
|
|
|
323
357
|
overlayed = _add_suffix_left(overlayed, df1, df2, lsuffix)
|
|
324
358
|
|
|
325
359
|
overlayed["geometry"] = make_valid(overlayed["geometry"])
|
|
326
|
-
|
|
327
|
-
|
|
360
|
+
overlayed = overlayed.loc[
|
|
361
|
+
lambda x: (x["geometry"].notna().values) & (~is_empty(x["geometry"].values))
|
|
362
|
+
]
|
|
328
363
|
|
|
329
364
|
return overlayed
|
|
330
365
|
|
|
@@ -336,115 +371,38 @@ def _update(
|
|
|
336
371
|
left: np.ndarray,
|
|
337
372
|
grid_size: float | None | int,
|
|
338
373
|
geom_type: str | None,
|
|
339
|
-
|
|
374
|
+
overlay_runner: OverlayRunner,
|
|
375
|
+
union_runner: UnionRunner,
|
|
340
376
|
) -> GeoDataFrame:
|
|
341
377
|
overlayed = _difference(
|
|
342
|
-
pairs,
|
|
378
|
+
pairs,
|
|
379
|
+
df1,
|
|
380
|
+
left,
|
|
381
|
+
grid_size=grid_size,
|
|
382
|
+
geom_type=geom_type,
|
|
383
|
+
overlay_runner=overlay_runner,
|
|
384
|
+
union_runner=union_runner,
|
|
343
385
|
)
|
|
344
386
|
|
|
345
387
|
return overlayed + [df2]
|
|
346
388
|
|
|
347
389
|
|
|
348
|
-
def _run_overlay_dask(
|
|
349
|
-
arr1: np.ndarray,
|
|
350
|
-
arr2: np.ndarray,
|
|
351
|
-
func: Callable,
|
|
352
|
-
n_jobs: int,
|
|
353
|
-
grid_size: float | int | None,
|
|
354
|
-
) -> np.ndarray:
|
|
355
|
-
if len(arr1) // n_jobs <= 1:
|
|
356
|
-
try:
|
|
357
|
-
return func(arr1, arr2, grid_size=grid_size)
|
|
358
|
-
except TypeError as e:
|
|
359
|
-
raise TypeError(e, {type(x) for x in arr1}, {type(x) for x in arr2}) from e
|
|
360
|
-
arr1 = da.from_array(arr1, chunks=len(arr1) // n_jobs)
|
|
361
|
-
arr2 = da.from_array(arr2, chunks=len(arr2) // n_jobs)
|
|
362
|
-
res = arr1.map_blocks(func, arr2, grid_size=grid_size, dtype=float)
|
|
363
|
-
return res.compute(scheduler="threads", optimize_graph=False, num_workers=n_jobs)
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
def _run_overlay_joblib_threading(
|
|
367
|
-
arr1: np.ndarray,
|
|
368
|
-
arr2: np.ndarray,
|
|
369
|
-
func: Callable,
|
|
370
|
-
n_jobs: int,
|
|
371
|
-
grid_size: int | float | None,
|
|
372
|
-
) -> list[Geometry]:
|
|
373
|
-
if len(arr1) // n_jobs <= 1:
|
|
374
|
-
try:
|
|
375
|
-
return func(arr1, arr2, grid_size=grid_size)
|
|
376
|
-
except TypeError as e:
|
|
377
|
-
raise TypeError(e, {type(x) for x in arr1}, {type(x) for x in arr2}) from e
|
|
378
|
-
with joblib.Parallel(n_jobs=n_jobs, backend="threading") as parallel:
|
|
379
|
-
return parallel(
|
|
380
|
-
joblib.delayed(func)(g1, g2, grid_size=grid_size)
|
|
381
|
-
for g1, g2 in zip(arr1, arr2, strict=True)
|
|
382
|
-
)
|
|
383
|
-
|
|
384
|
-
|
|
385
390
|
def _intersection(
|
|
386
391
|
pairs: pd.DataFrame,
|
|
387
392
|
grid_size: None | float | int,
|
|
388
393
|
geom_type: str | None,
|
|
389
|
-
|
|
394
|
+
overlay_runner: OverlayRunner,
|
|
390
395
|
) -> GeoDataFrame:
|
|
391
396
|
if not len(pairs):
|
|
392
397
|
return pairs.drop(columns="geom_right")
|
|
393
|
-
|
|
394
398
|
intersections = pairs.copy()
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
arr1,
|
|
403
|
-
arr2,
|
|
404
|
-
func=intersection,
|
|
405
|
-
n_jobs=n_jobs,
|
|
406
|
-
grid_size=grid_size,
|
|
407
|
-
)
|
|
408
|
-
except GEOSException:
|
|
409
|
-
arr1 = make_valid_and_keep_geom_type(
|
|
410
|
-
arr1, geom_type=geom_type, n_jobs=n_jobs
|
|
411
|
-
)
|
|
412
|
-
arr2 = make_valid_and_keep_geom_type(
|
|
413
|
-
arr2, geom_type=geom_type, n_jobs=n_jobs
|
|
414
|
-
)
|
|
415
|
-
arr1 = arr1.loc[lambda x: x.index.isin(arr2.index)]
|
|
416
|
-
arr2 = arr2.loc[lambda x: x.index.isin(arr1.index)]
|
|
417
|
-
|
|
418
|
-
res = _run_overlay_joblib_threading(
|
|
419
|
-
arr1.to_numpy(),
|
|
420
|
-
arr2.to_numpy(),
|
|
421
|
-
func=intersection,
|
|
422
|
-
n_jobs=n_jobs,
|
|
423
|
-
grid_size=grid_size,
|
|
424
|
-
)
|
|
425
|
-
intersections["geometry"] = res
|
|
426
|
-
return intersections.drop(columns="geom_right")
|
|
427
|
-
|
|
428
|
-
try:
|
|
429
|
-
intersections["geometry"] = intersection(
|
|
430
|
-
intersections["geometry"].to_numpy(),
|
|
431
|
-
intersections["geom_right"].to_numpy(),
|
|
432
|
-
grid_size=grid_size,
|
|
433
|
-
)
|
|
434
|
-
except GEOSException:
|
|
435
|
-
left = make_valid_and_keep_geom_type(
|
|
436
|
-
intersections["geometry"].to_numpy(), geom_type, n_jobs=n_jobs
|
|
437
|
-
)
|
|
438
|
-
right = make_valid_and_keep_geom_type(
|
|
439
|
-
intersections["geom_right"].to_numpy(), geom_type, n_jobs=n_jobs
|
|
440
|
-
)
|
|
441
|
-
left = left.loc[lambda x: x.index.isin(right.index)]
|
|
442
|
-
right = right.loc[lambda x: x.index.isin(left.index)]
|
|
443
|
-
|
|
444
|
-
intersections["geometry"] = intersection(
|
|
445
|
-
left.to_numpy(), right.to_numpy(), grid_size=grid_size
|
|
446
|
-
)
|
|
447
|
-
|
|
399
|
+
intersections["geometry"] = overlay_runner.run(
|
|
400
|
+
intersection,
|
|
401
|
+
intersections["geometry"].to_numpy(),
|
|
402
|
+
intersections["geom_right"].to_numpy(),
|
|
403
|
+
grid_size=grid_size,
|
|
404
|
+
geom_type=geom_type,
|
|
405
|
+
)
|
|
448
406
|
return intersections.drop(columns="geom_right")
|
|
449
407
|
|
|
450
408
|
|
|
@@ -457,12 +415,16 @@ def _union(
|
|
|
457
415
|
grid_size: int | float | None,
|
|
458
416
|
rsuffix: str,
|
|
459
417
|
geom_type: str | None,
|
|
460
|
-
|
|
418
|
+
overlay_runner: OverlayRunner,
|
|
419
|
+
union_runner: UnionRunner,
|
|
461
420
|
) -> list[GeoDataFrame]:
|
|
462
421
|
merged = []
|
|
463
422
|
if len(left):
|
|
464
423
|
intersections = _intersection(
|
|
465
|
-
pairs,
|
|
424
|
+
pairs,
|
|
425
|
+
grid_size=grid_size,
|
|
426
|
+
geom_type=geom_type,
|
|
427
|
+
overlay_runner=overlay_runner,
|
|
466
428
|
)
|
|
467
429
|
merged.append(intersections)
|
|
468
430
|
symmdiff = _symmetric_difference(
|
|
@@ -474,7 +436,8 @@ def _union(
|
|
|
474
436
|
grid_size=grid_size,
|
|
475
437
|
rsuffix=rsuffix,
|
|
476
438
|
geom_type=geom_type,
|
|
477
|
-
|
|
439
|
+
overlay_runner=overlay_runner,
|
|
440
|
+
union_runner=union_runner,
|
|
478
441
|
)
|
|
479
442
|
merged += symmdiff
|
|
480
443
|
return merged
|
|
@@ -486,15 +449,27 @@ def _identity(
|
|
|
486
449
|
left: np.ndarray,
|
|
487
450
|
grid_size: int | float | None,
|
|
488
451
|
geom_type: str | None,
|
|
489
|
-
|
|
452
|
+
overlay_runner: OverlayRunner,
|
|
453
|
+
union_runner: UnionRunner,
|
|
490
454
|
) -> list[GeoDataFrame]:
|
|
491
455
|
merged = []
|
|
492
456
|
if len(left):
|
|
493
457
|
intersections = _intersection(
|
|
494
|
-
pairs,
|
|
458
|
+
pairs,
|
|
459
|
+
grid_size=grid_size,
|
|
460
|
+
geom_type=geom_type,
|
|
461
|
+
overlay_runner=overlay_runner,
|
|
495
462
|
)
|
|
496
463
|
merged.append(intersections)
|
|
497
|
-
diff = _difference(
|
|
464
|
+
diff = _difference(
|
|
465
|
+
pairs,
|
|
466
|
+
df1,
|
|
467
|
+
left,
|
|
468
|
+
geom_type=geom_type,
|
|
469
|
+
grid_size=grid_size,
|
|
470
|
+
overlay_runner=overlay_runner,
|
|
471
|
+
union_runner=union_runner,
|
|
472
|
+
)
|
|
498
473
|
merged += diff
|
|
499
474
|
return merged
|
|
500
475
|
|
|
@@ -508,12 +483,19 @@ def _symmetric_difference(
|
|
|
508
483
|
grid_size: int | float | None,
|
|
509
484
|
rsuffix: str,
|
|
510
485
|
geom_type: str | None,
|
|
511
|
-
|
|
486
|
+
overlay_runner: OverlayRunner,
|
|
487
|
+
union_runner: UnionRunner,
|
|
512
488
|
) -> list[GeoDataFrame]:
|
|
513
489
|
merged = []
|
|
514
490
|
|
|
515
491
|
difference_left = _difference(
|
|
516
|
-
pairs,
|
|
492
|
+
pairs,
|
|
493
|
+
df1,
|
|
494
|
+
left,
|
|
495
|
+
grid_size=grid_size,
|
|
496
|
+
geom_type=geom_type,
|
|
497
|
+
overlay_runner=overlay_runner,
|
|
498
|
+
union_runner=union_runner,
|
|
517
499
|
)
|
|
518
500
|
merged += difference_left
|
|
519
501
|
|
|
@@ -525,7 +507,8 @@ def _symmetric_difference(
|
|
|
525
507
|
grid_size=grid_size,
|
|
526
508
|
rsuffix=rsuffix,
|
|
527
509
|
geom_type=geom_type,
|
|
528
|
-
|
|
510
|
+
overlay_runner=overlay_runner,
|
|
511
|
+
union_runner=union_runner,
|
|
529
512
|
)
|
|
530
513
|
merged.append(clip_right)
|
|
531
514
|
|
|
@@ -539,9 +522,10 @@ def _difference(
|
|
|
539
522
|
pairs: pd.DataFrame,
|
|
540
523
|
df1: pd.DataFrame,
|
|
541
524
|
left: np.ndarray,
|
|
542
|
-
grid_size: int | float | None
|
|
543
|
-
geom_type: str | None
|
|
544
|
-
|
|
525
|
+
grid_size: int | float | None,
|
|
526
|
+
geom_type: str | None,
|
|
527
|
+
overlay_runner: OverlayRunner,
|
|
528
|
+
union_runner: UnionRunner,
|
|
545
529
|
) -> list[GeoDataFrame]:
|
|
546
530
|
merged = []
|
|
547
531
|
if len(left):
|
|
@@ -550,7 +534,8 @@ def _difference(
|
|
|
550
534
|
df1=df1,
|
|
551
535
|
grid_size=grid_size,
|
|
552
536
|
geom_type=geom_type,
|
|
553
|
-
|
|
537
|
+
overlay_runner=overlay_runner,
|
|
538
|
+
union_runner=union_runner,
|
|
554
539
|
)
|
|
555
540
|
merged.append(clip_left)
|
|
556
541
|
diff_left = _add_indices_from_left(df1, left)
|
|
@@ -618,7 +603,8 @@ def _shapely_diffclip_left(
|
|
|
618
603
|
df1: pd.DataFrame,
|
|
619
604
|
grid_size: int | float | None,
|
|
620
605
|
geom_type: str | None,
|
|
621
|
-
|
|
606
|
+
overlay_runner: OverlayRunner,
|
|
607
|
+
union_runner: UnionRunner,
|
|
622
608
|
) -> pd.DataFrame:
|
|
623
609
|
"""Aggregate areas in right by unique values from left, then erases those from left."""
|
|
624
610
|
keep_cols = list(df1.columns.difference({"_overlay_index_right"})) + ["geom_right"]
|
|
@@ -675,12 +661,14 @@ def _shapely_diffclip_left(
|
|
|
675
661
|
}
|
|
676
662
|
)
|
|
677
663
|
|
|
678
|
-
agged =
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
)
|
|
664
|
+
agged = union_runner.run(agger["geom_right"], level=0)
|
|
665
|
+
# agged = pd.Series(
|
|
666
|
+
|
|
667
|
+
# {
|
|
668
|
+
# i: agg_geoms_partial(geoms)
|
|
669
|
+
# for i, geoms in agger.groupby(level=0)["geom_right"]
|
|
670
|
+
# }
|
|
671
|
+
# )
|
|
684
672
|
many_hits_agged["geom_right"] = inverse_index_mapper.map(agged)
|
|
685
673
|
many_hits_agged = many_hits_agged.drop(columns=["_right_indices"])
|
|
686
674
|
|
|
@@ -688,15 +676,19 @@ def _shapely_diffclip_left(
|
|
|
688
676
|
except IndexError:
|
|
689
677
|
clip_left = pairs.loc[:, list(keep_cols)]
|
|
690
678
|
|
|
691
|
-
assert clip_left["geometry"].notna().all()
|
|
692
|
-
|
|
679
|
+
assert clip_left["geometry"].notna().all(), clip_left["geometry"][
|
|
680
|
+
lambda x: x.isna()
|
|
681
|
+
]
|
|
682
|
+
assert clip_left["geom_right"].notna().all(), clip_left["geom_right"][
|
|
683
|
+
lambda x: x.isna()
|
|
684
|
+
]
|
|
693
685
|
|
|
694
|
-
clip_left["geometry"] =
|
|
686
|
+
clip_left["geometry"] = overlay_runner.run(
|
|
687
|
+
difference,
|
|
695
688
|
clip_left["geometry"].to_numpy(),
|
|
696
689
|
clip_left["geom_right"].to_numpy(),
|
|
697
690
|
grid_size=grid_size,
|
|
698
691
|
geom_type=geom_type,
|
|
699
|
-
n_jobs=n_jobs,
|
|
700
692
|
)
|
|
701
693
|
|
|
702
694
|
return clip_left.drop(columns="geom_right")
|
|
@@ -709,7 +701,8 @@ def _shapely_diffclip_right(
|
|
|
709
701
|
grid_size: int | float | None,
|
|
710
702
|
rsuffix: str,
|
|
711
703
|
geom_type: str | None,
|
|
712
|
-
|
|
704
|
+
overlay_runner: OverlayRunner,
|
|
705
|
+
union_runner: UnionRunner,
|
|
713
706
|
) -> pd.DataFrame:
|
|
714
707
|
agg_geoms_partial = functools.partial(_agg_geoms, grid_size=grid_size)
|
|
715
708
|
|
|
@@ -720,16 +713,22 @@ def _shapely_diffclip_right(
|
|
|
720
713
|
one_hit = pairs[only_one].set_index("_overlay_index_right")[
|
|
721
714
|
["geom_left", "geometry"]
|
|
722
715
|
]
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
"geom_left": agg_geoms_partial,
|
|
729
|
-
"geometry": "first",
|
|
730
|
-
}
|
|
731
|
-
)
|
|
716
|
+
many_hits_ungrouped = pairs[~only_one].set_index("_overlay_index_right")
|
|
717
|
+
many_hits = pd.DataFrame(index=many_hits_ungrouped.index.unique())
|
|
718
|
+
many_hits["geometry"] = many_hits_ungrouped.groupby(level=0)["geometry"].first()
|
|
719
|
+
many_hits["geom_left"] = union_runner.run(
|
|
720
|
+
many_hits_ungrouped["geom_left"], level=0
|
|
732
721
|
)
|
|
722
|
+
# many_hits = (
|
|
723
|
+
# pairs[~only_one]
|
|
724
|
+
# .groupby("_overlay_index_right")
|
|
725
|
+
# .agg(
|
|
726
|
+
# {
|
|
727
|
+
# "geom_left": agg_geoms_partial,
|
|
728
|
+
# "geometry": "first",
|
|
729
|
+
# }
|
|
730
|
+
# )
|
|
731
|
+
# )
|
|
733
732
|
clip_right = (
|
|
734
733
|
pd.concat([one_hit, many_hits])
|
|
735
734
|
.join(df2.drop(columns=["geometry"]))
|
|
@@ -748,10 +747,15 @@ def _shapely_diffclip_right(
|
|
|
748
747
|
}
|
|
749
748
|
)
|
|
750
749
|
|
|
751
|
-
assert clip_right["geometry"].notna().all()
|
|
752
|
-
|
|
750
|
+
assert clip_right["geometry"].notna().all(), clip_right["geometry"][
|
|
751
|
+
lambda x: x.isna()
|
|
752
|
+
]
|
|
753
|
+
assert clip_right["geom_left"].notna().all(), clip_right["geom_left"][
|
|
754
|
+
lambda x: x.isna()
|
|
755
|
+
]
|
|
753
756
|
|
|
754
|
-
clip_right["geometry"] =
|
|
757
|
+
clip_right["geometry"] = overlay_runner.run(
|
|
758
|
+
difference,
|
|
755
759
|
clip_right["geometry"].to_numpy(),
|
|
756
760
|
clip_right["geom_left"].to_numpy(),
|
|
757
761
|
grid_size=grid_size,
|
|
@@ -761,87 +765,5 @@ def _shapely_diffclip_right(
|
|
|
761
765
|
return clip_right.drop(columns="geom_left")
|
|
762
766
|
|
|
763
767
|
|
|
764
|
-
def _try_difference(
|
|
765
|
-
left: np.ndarray,
|
|
766
|
-
right: np.ndarray,
|
|
767
|
-
grid_size: int | float | None,
|
|
768
|
-
geom_type: str | None,
|
|
769
|
-
n_jobs: int = 1,
|
|
770
|
-
) -> np.ndarray:
|
|
771
|
-
"""Try difference overlay, then make_valid and retry."""
|
|
772
|
-
if n_jobs > 1 and len(left) / n_jobs > 10:
|
|
773
|
-
try:
|
|
774
|
-
return _run_overlay_joblib_threading(
|
|
775
|
-
left,
|
|
776
|
-
right,
|
|
777
|
-
func=difference,
|
|
778
|
-
n_jobs=n_jobs,
|
|
779
|
-
grid_size=grid_size,
|
|
780
|
-
)
|
|
781
|
-
except GEOSException:
|
|
782
|
-
left = make_valid_and_keep_geom_type(
|
|
783
|
-
left, geom_type=geom_type, n_jobs=n_jobs
|
|
784
|
-
)
|
|
785
|
-
right = make_valid_and_keep_geom_type(
|
|
786
|
-
right, geom_type=geom_type, n_jobs=n_jobs
|
|
787
|
-
)
|
|
788
|
-
left = left.loc[lambda x: x.index.isin(right.index)]
|
|
789
|
-
right = right.loc[lambda x: x.index.isin(left.index)]
|
|
790
|
-
|
|
791
|
-
return _run_overlay_joblib_threading(
|
|
792
|
-
left.to_numpy(),
|
|
793
|
-
right.to_numpy(),
|
|
794
|
-
func=difference,
|
|
795
|
-
n_jobs=n_jobs,
|
|
796
|
-
grid_size=grid_size,
|
|
797
|
-
)
|
|
798
|
-
|
|
799
|
-
try:
|
|
800
|
-
return difference(
|
|
801
|
-
left,
|
|
802
|
-
right,
|
|
803
|
-
grid_size=grid_size,
|
|
804
|
-
)
|
|
805
|
-
except GEOSException:
|
|
806
|
-
left = make_valid_and_keep_geom_type(left, geom_type, n_jobs=n_jobs)
|
|
807
|
-
right = make_valid_and_keep_geom_type(right, geom_type, n_jobs=n_jobs)
|
|
808
|
-
left = left.loc[lambda x: x.index.isin(right.index)]
|
|
809
|
-
right = right.loc[lambda x: x.index.isin(left.index)]
|
|
810
|
-
try:
|
|
811
|
-
return difference(
|
|
812
|
-
left.to_numpy(),
|
|
813
|
-
right.to_numpy(),
|
|
814
|
-
grid_size=grid_size,
|
|
815
|
-
)
|
|
816
|
-
except GEOSException as e:
|
|
817
|
-
raise e.__class__(e, f"{grid_size=}", f"{left=}", f"{right=}") from e
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
def make_valid_and_keep_geom_type(
|
|
821
|
-
geoms: np.ndarray, geom_type: str, n_jobs: int
|
|
822
|
-
) -> GeoSeries:
|
|
823
|
-
"""Make GeometryCollections into (Multi)Polygons, (Multi)LineStrings or (Multi)Points.
|
|
824
|
-
|
|
825
|
-
Because GeometryCollections might appear after dissolving (unary_union).
|
|
826
|
-
And this makes shapely difference/intersection fail.
|
|
827
|
-
|
|
828
|
-
Args:
|
|
829
|
-
geoms: Array of geometries.
|
|
830
|
-
geom_type: geometry type to be kept.
|
|
831
|
-
n_jobs: Number of treads.
|
|
832
|
-
"""
|
|
833
|
-
geoms = GeoSeries(geoms)
|
|
834
|
-
geoms.index = range(len(geoms))
|
|
835
|
-
geoms.loc[:] = make_valid(geoms.to_numpy())
|
|
836
|
-
geoms_with_correct_type = geoms.explode(index_parts=False).pipe(
|
|
837
|
-
to_single_geom_type, geom_type
|
|
838
|
-
)
|
|
839
|
-
only_one = geoms_with_correct_type.groupby(level=0).transform("size") == 1
|
|
840
|
-
one_hit = geoms_with_correct_type[only_one]
|
|
841
|
-
many_hits = geoms_with_correct_type[~only_one].groupby(level=0).agg(unary_union)
|
|
842
|
-
geoms_with_wrong_type = geoms.loc[~geoms.index.isin(geoms_with_correct_type.index)]
|
|
843
|
-
return pd.concat([one_hit, many_hits, geoms_with_wrong_type]).sort_index()
|
|
844
|
-
|
|
845
|
-
|
|
846
768
|
def _agg_geoms(g: np.ndarray, grid_size: int | float | None = None) -> Geometry:
|
|
847
|
-
return make_valid(
|
|
769
|
+
return make_valid(union_all(g, grid_size=grid_size))
|