ssb-sgis 1.1.17__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,34 +9,29 @@ version of the solution from GH 2792.
9
9
  """
10
10
 
11
11
  import functools
12
- from collections.abc import Callable
13
12
 
14
13
  import geopandas as gpd
15
- import joblib
16
14
  import numpy as np
17
15
  import pandas as pd
18
16
  from geopandas import GeoDataFrame
19
- from geopandas import GeoSeries
20
17
  from pandas import DataFrame
21
18
  from shapely import Geometry
22
- from shapely import STRtree
23
19
  from shapely import box
24
20
  from shapely import difference
25
21
  from shapely import intersection
22
+ from shapely import is_empty
26
23
  from shapely import make_valid
27
- from shapely import unary_union
28
- from shapely.errors import GEOSException
29
-
30
- try:
31
- import dask.array as da
32
- except ImportError:
33
- pass
24
+ from shapely import union_all
34
25
 
26
+ from ..conf import config
35
27
  from .general import _determine_geom_type_args
36
28
  from .general import clean_geoms
37
29
  from .geometry_types import get_geom_type
38
30
  from .geometry_types import make_all_singlepart
39
31
  from .geometry_types import to_single_geom_type
32
+ from .runners import OverlayRunner
33
+ from .runners import RTreeQueryRunner
34
+ from .runners import UnionRunner
40
35
 
41
36
  DEFAULT_GRID_SIZE = None
42
37
  DEFAULT_LSUFFIX = "_1"
@@ -51,9 +46,12 @@ def clean_overlay(
51
46
  geom_type: str | None = None,
52
47
  predicate: str | None = "intersects",
53
48
  grid_size: float | None = None,
54
- n_jobs: int = 1,
55
49
  lsuffix: str = DEFAULT_LSUFFIX,
56
50
  rsuffix: str = DEFAULT_RSUFFIX,
51
+ n_jobs: int = 1,
52
+ rtree_runner: RTreeQueryRunner | None = None,
53
+ union_runner: UnionRunner | None = None,
54
+ overlay_runner: OverlayRunner | None = None,
57
55
  ) -> GeoDataFrame:
58
56
  """Fixes and explodes geometries before doing a shapely overlay, then cleans up.
59
57
 
@@ -74,10 +72,16 @@ def clean_overlay(
74
72
  "point".
75
73
  grid_size: Precision grid size to round the geometries. Will use the highest
76
74
  precision of the inputs by default.
77
- n_jobs: number of threads.
78
75
  predicate: Spatial predicate in the spatial tree.
79
76
  lsuffix: Suffix of columns in df1 that are also in df2.
80
77
  rsuffix: Suffix of columns in df2 that are also in df1.
78
+ n_jobs: number of jobs. Defaults to 1.
79
+ union_runner: Optionally debug/manipulate the spatial union operations.
80
+ See the 'runners' module for example implementations.
81
+ rtree_runner: Optionally debug/manipulate the spatial indexing operations.
82
+ See the 'runners' module for example implementations.
83
+ overlay_runner: Optionally debug/manipulate the spatial overlay operations.
84
+ See the 'runners' module for example implementations.
81
85
 
82
86
  Returns:
83
87
  GeoDataFrame with overlayed and fixed geometries and columns from both
@@ -104,6 +108,13 @@ def clean_overlay(
104
108
  if df1.crs != df2.crs:
105
109
  raise ValueError(f"'crs' mismatch. Got {df1.crs} and {df2.crs}")
106
110
 
111
+ if rtree_runner is None:
112
+ rtree_runner = config.get_instance("rtree_runner", n_jobs)
113
+ if union_runner is None:
114
+ union_runner = config.get_instance("union_runner", n_jobs)
115
+ if overlay_runner is None:
116
+ overlay_runner = config.get_instance("overlay_runner", n_jobs)
117
+
107
118
  crs = df1.crs
108
119
 
109
120
  # original_geom_type = geom_type
@@ -148,13 +159,16 @@ def clean_overlay(
148
159
  box1 = box(*df1.total_bounds)
149
160
  box2 = box(*df2.total_bounds)
150
161
 
151
- if not len(df1) or not len(df1) or not box1.intersects(box2):
162
+ if not grid_size and (
163
+ (not len(df1) or not len(df2))
164
+ or (not box1.intersects(box2) and how == "intersection")
165
+ ):
152
166
  return _no_intersections_return(df1, df2, how, lsuffix, rsuffix)
153
167
 
154
- if df1._geometry_column_name != "geometry":
168
+ if df1.geometry.name != "geometry":
155
169
  df1 = df1.rename_geometry("geometry")
156
170
 
157
- if df2._geometry_column_name != "geometry":
171
+ if df2.geometry.name != "geometry":
158
172
  df2 = df2.rename_geometry("geometry")
159
173
 
160
174
  # to pandas because GeoDataFrame constructor is expensive
@@ -171,8 +185,10 @@ def clean_overlay(
171
185
  lsuffix=lsuffix,
172
186
  rsuffix=rsuffix,
173
187
  geom_type=geom_type,
174
- n_jobs=n_jobs,
175
188
  predicate=predicate,
189
+ rtree_runner=rtree_runner,
190
+ overlay_runner=overlay_runner,
191
+ union_runner=union_runner,
176
192
  ),
177
193
  geometry="geometry",
178
194
  crs=crs,
@@ -188,9 +204,9 @@ def clean_overlay(
188
204
 
189
205
 
190
206
  def _join_and_get_no_rows(df1, df2, lsuffix, rsuffix):
191
- geom_col = df1._geometry_column_name
207
+ geom_col = df1.geometry.name
192
208
  df1_cols = df1.columns.difference({geom_col})
193
- df2_cols = df2.columns.difference({df2._geometry_column_name})
209
+ df2_cols = df2.columns.difference({df2.geometry.name})
194
210
  cols_with_suffix = [
195
211
  f"{col}{lsuffix}" if col in df2_cols else col for col in df1_cols
196
212
  ] + [f"{col}{rsuffix}" if col in df1_cols else col for col in df2_cols]
@@ -215,7 +231,7 @@ def _no_intersections_return(
215
231
  if how == "identity":
216
232
  # add suffixes and return df1
217
233
  df_template = _join_and_get_no_rows(df1, df2, lsuffix, rsuffix)
218
- df2_cols = df2.columns.difference({df2._geometry_column_name})
234
+ df2_cols = df2.columns.difference({df2.geometry.name})
219
235
  df1.columns = [f"{col}{lsuffix}" if col in df2_cols else col for col in df1]
220
236
  return pd.concat([df_template, df1], ignore_index=True)
221
237
 
@@ -237,33 +253,41 @@ def _shapely_pd_overlay(
237
253
  df1: DataFrame,
238
254
  df2: DataFrame,
239
255
  how: str,
240
- grid_size: float = DEFAULT_GRID_SIZE,
241
- predicate: str = "intersects",
242
- lsuffix: str = DEFAULT_LSUFFIX,
243
- rsuffix: str = DEFAULT_RSUFFIX,
244
- geom_type: str | None = None,
245
- n_jobs: int = 1,
256
+ grid_size: float,
257
+ predicate: str,
258
+ lsuffix: str,
259
+ rsuffix: str,
260
+ geom_type: str | None,
261
+ rtree_runner: RTreeQueryRunner,
262
+ overlay_runner: OverlayRunner,
263
+ union_runner: UnionRunner,
246
264
  ) -> DataFrame:
247
- if not grid_size and not len(df1) or not len(df2):
248
- return _no_intersections_return(df1, df2, how, lsuffix, rsuffix)
249
-
250
- tree = STRtree(df2.geometry.values)
251
- left, right = tree.query(df1.geometry.values, predicate=predicate)
252
-
265
+ left, right = rtree_runner.run(
266
+ df1.geometry.values, df2.geometry.values, predicate=predicate
267
+ )
253
268
  pairs = _get_intersects_pairs(df1, df2, left, right, rsuffix)
254
- assert pairs.geometry.notna().all(), pairs.geometry
255
- assert pairs.geom_right.notna().all(), pairs.geom_right
269
+ assert pairs["geometry"].notna().all(), pairs.geometry[lambda x: x.isna()]
270
+ assert pairs["geom_right"].notna().all(), pairs.geom_right[lambda x: x.isna()]
256
271
 
257
272
  if how == "intersection":
258
273
  overlayed = [
259
274
  _intersection(
260
- pairs, grid_size=grid_size, geom_type=geom_type, n_jobs=n_jobs
275
+ pairs,
276
+ grid_size=grid_size,
277
+ geom_type=geom_type,
278
+ overlay_runner=overlay_runner,
261
279
  )
262
280
  ]
263
281
 
264
282
  elif how == "difference":
265
283
  overlayed = _difference(
266
- pairs, df1, left, grid_size=grid_size, geom_type=geom_type, n_jobs=n_jobs
284
+ pairs,
285
+ df1,
286
+ left,
287
+ grid_size=grid_size,
288
+ geom_type=geom_type,
289
+ overlay_runner=overlay_runner,
290
+ union_runner=union_runner,
267
291
  )
268
292
 
269
293
  elif how == "symmetric_difference":
@@ -276,12 +300,19 @@ def _shapely_pd_overlay(
276
300
  grid_size=grid_size,
277
301
  rsuffix=rsuffix,
278
302
  geom_type=geom_type,
279
- n_jobs=n_jobs,
303
+ overlay_runner=overlay_runner,
304
+ union_runner=union_runner,
280
305
  )
281
306
 
282
307
  elif how == "identity":
283
308
  overlayed = _identity(
284
- pairs, df1, left, grid_size=grid_size, geom_type=geom_type, n_jobs=n_jobs
309
+ pairs,
310
+ df1,
311
+ left,
312
+ grid_size=grid_size,
313
+ geom_type=geom_type,
314
+ overlay_runner=overlay_runner,
315
+ union_runner=union_runner,
285
316
  )
286
317
 
287
318
  elif how == "union":
@@ -294,7 +325,8 @@ def _shapely_pd_overlay(
294
325
  grid_size=grid_size,
295
326
  rsuffix=rsuffix,
296
327
  geom_type=geom_type,
297
- n_jobs=n_jobs,
328
+ overlay_runner=overlay_runner,
329
+ union_runner=union_runner,
298
330
  )
299
331
 
300
332
  elif how == "update":
@@ -304,8 +336,9 @@ def _shapely_pd_overlay(
304
336
  df2,
305
337
  left=left,
306
338
  grid_size=grid_size,
307
- n_jobs=n_jobs,
308
339
  geom_type=geom_type,
340
+ overlay_runner=overlay_runner,
341
+ union_runner=union_runner,
309
342
  )
310
343
 
311
344
  assert isinstance(overlayed, list)
@@ -323,8 +356,9 @@ def _shapely_pd_overlay(
323
356
  overlayed = _add_suffix_left(overlayed, df1, df2, lsuffix)
324
357
 
325
358
  overlayed["geometry"] = make_valid(overlayed["geometry"])
326
- # None and empty are falsy
327
- overlayed = overlayed.loc[lambda x: x["geometry"].notna()]
359
+ overlayed = overlayed.loc[
360
+ lambda x: (x["geometry"].notna().values) & (~is_empty(x["geometry"].values))
361
+ ]
328
362
 
329
363
  return overlayed
330
364
 
@@ -336,115 +370,38 @@ def _update(
336
370
  left: np.ndarray,
337
371
  grid_size: float | None | int,
338
372
  geom_type: str | None,
339
- n_jobs: int,
373
+ overlay_runner: OverlayRunner,
374
+ union_runner: UnionRunner,
340
375
  ) -> GeoDataFrame:
341
376
  overlayed = _difference(
342
- pairs, df1, left, grid_size=grid_size, geom_type=geom_type, n_jobs=n_jobs
377
+ pairs,
378
+ df1,
379
+ left,
380
+ grid_size=grid_size,
381
+ geom_type=geom_type,
382
+ overlay_runner=overlay_runner,
383
+ union_runner=union_runner,
343
384
  )
344
385
 
345
386
  return overlayed + [df2]
346
387
 
347
388
 
348
- def _run_overlay_dask(
349
- arr1: np.ndarray,
350
- arr2: np.ndarray,
351
- func: Callable,
352
- n_jobs: int,
353
- grid_size: float | int | None,
354
- ) -> np.ndarray:
355
- if len(arr1) // n_jobs <= 1:
356
- try:
357
- return func(arr1, arr2, grid_size=grid_size)
358
- except TypeError as e:
359
- raise TypeError(e, {type(x) for x in arr1}, {type(x) for x in arr2}) from e
360
- arr1 = da.from_array(arr1, chunks=len(arr1) // n_jobs)
361
- arr2 = da.from_array(arr2, chunks=len(arr2) // n_jobs)
362
- res = arr1.map_blocks(func, arr2, grid_size=grid_size, dtype=float)
363
- return res.compute(scheduler="threads", optimize_graph=False, num_workers=n_jobs)
364
-
365
-
366
- def _run_overlay_joblib_threading(
367
- arr1: np.ndarray,
368
- arr2: np.ndarray,
369
- func: Callable,
370
- n_jobs: int,
371
- grid_size: int | float | None,
372
- ) -> list[Geometry]:
373
- if len(arr1) // n_jobs <= 1:
374
- try:
375
- return func(arr1, arr2, grid_size=grid_size)
376
- except TypeError as e:
377
- raise TypeError(e, {type(x) for x in arr1}, {type(x) for x in arr2}) from e
378
- with joblib.Parallel(n_jobs=n_jobs, backend="threading") as parallel:
379
- return parallel(
380
- joblib.delayed(func)(g1, g2, grid_size=grid_size)
381
- for g1, g2 in zip(arr1, arr2, strict=True)
382
- )
383
-
384
-
385
389
  def _intersection(
386
390
  pairs: pd.DataFrame,
387
391
  grid_size: None | float | int,
388
392
  geom_type: str | None,
389
- n_jobs: int = 1,
393
+ overlay_runner: OverlayRunner,
390
394
  ) -> GeoDataFrame:
391
395
  if not len(pairs):
392
396
  return pairs.drop(columns="geom_right")
393
-
394
397
  intersections = pairs.copy()
395
-
396
- arr1 = intersections["geometry"].to_numpy()
397
- arr2 = intersections["geom_right"].to_numpy()
398
-
399
- if n_jobs > 1 and len(arr1) / n_jobs > 10:
400
- try:
401
- res = _run_overlay_joblib_threading(
402
- arr1,
403
- arr2,
404
- func=intersection,
405
- n_jobs=n_jobs,
406
- grid_size=grid_size,
407
- )
408
- except GEOSException:
409
- arr1 = make_valid_and_keep_geom_type(
410
- arr1, geom_type=geom_type, n_jobs=n_jobs
411
- )
412
- arr2 = make_valid_and_keep_geom_type(
413
- arr2, geom_type=geom_type, n_jobs=n_jobs
414
- )
415
- arr1 = arr1.loc[lambda x: x.index.isin(arr2.index)]
416
- arr2 = arr2.loc[lambda x: x.index.isin(arr1.index)]
417
-
418
- res = _run_overlay_joblib_threading(
419
- arr1.to_numpy(),
420
- arr2.to_numpy(),
421
- func=intersection,
422
- n_jobs=n_jobs,
423
- grid_size=grid_size,
424
- )
425
- intersections["geometry"] = res
426
- return intersections.drop(columns="geom_right")
427
-
428
- try:
429
- intersections["geometry"] = intersection(
430
- intersections["geometry"].to_numpy(),
431
- intersections["geom_right"].to_numpy(),
432
- grid_size=grid_size,
433
- )
434
- except GEOSException:
435
- left = make_valid_and_keep_geom_type(
436
- intersections["geometry"].to_numpy(), geom_type, n_jobs=n_jobs
437
- )
438
- right = make_valid_and_keep_geom_type(
439
- intersections["geom_right"].to_numpy(), geom_type, n_jobs=n_jobs
440
- )
441
- left = left.loc[lambda x: x.index.isin(right.index)]
442
- right = right.loc[lambda x: x.index.isin(left.index)]
443
-
444
- intersections["geometry"] = intersection(
445
- left.to_numpy(), right.to_numpy(), grid_size=grid_size
446
- )
447
-
398
+ intersections["geometry"] = overlay_runner.run(
399
+ intersection,
400
+ intersections["geometry"].to_numpy(),
401
+ intersections["geom_right"].to_numpy(),
402
+ grid_size=grid_size,
403
+ geom_type=geom_type,
404
+ )
448
405
  return intersections.drop(columns="geom_right")
449
406
 
450
407
 
@@ -457,12 +414,16 @@ def _union(
457
414
  grid_size: int | float | None,
458
415
  rsuffix: str,
459
416
  geom_type: str | None,
460
- n_jobs: int = 1,
417
+ overlay_runner: OverlayRunner,
418
+ union_runner: UnionRunner,
461
419
  ) -> list[GeoDataFrame]:
462
420
  merged = []
463
421
  if len(left):
464
422
  intersections = _intersection(
465
- pairs, grid_size=grid_size, geom_type=geom_type, n_jobs=n_jobs
423
+ pairs,
424
+ grid_size=grid_size,
425
+ geom_type=geom_type,
426
+ overlay_runner=overlay_runner,
466
427
  )
467
428
  merged.append(intersections)
468
429
  symmdiff = _symmetric_difference(
@@ -474,7 +435,8 @@ def _union(
474
435
  grid_size=grid_size,
475
436
  rsuffix=rsuffix,
476
437
  geom_type=geom_type,
477
- n_jobs=n_jobs,
438
+ overlay_runner=overlay_runner,
439
+ union_runner=union_runner,
478
440
  )
479
441
  merged += symmdiff
480
442
  return merged
@@ -486,15 +448,27 @@ def _identity(
486
448
  left: np.ndarray,
487
449
  grid_size: int | float | None,
488
450
  geom_type: str | None,
489
- n_jobs: int = 1,
451
+ overlay_runner: OverlayRunner,
452
+ union_runner: UnionRunner,
490
453
  ) -> list[GeoDataFrame]:
491
454
  merged = []
492
455
  if len(left):
493
456
  intersections = _intersection(
494
- pairs, grid_size=grid_size, geom_type=geom_type, n_jobs=n_jobs
457
+ pairs,
458
+ grid_size=grid_size,
459
+ geom_type=geom_type,
460
+ overlay_runner=overlay_runner,
495
461
  )
496
462
  merged.append(intersections)
497
- diff = _difference(pairs, df1, left, grid_size=grid_size, n_jobs=n_jobs)
463
+ diff = _difference(
464
+ pairs,
465
+ df1,
466
+ left,
467
+ geom_type=geom_type,
468
+ grid_size=grid_size,
469
+ overlay_runner=overlay_runner,
470
+ union_runner=union_runner,
471
+ )
498
472
  merged += diff
499
473
  return merged
500
474
 
@@ -508,12 +482,19 @@ def _symmetric_difference(
508
482
  grid_size: int | float | None,
509
483
  rsuffix: str,
510
484
  geom_type: str | None,
511
- n_jobs: int = 1,
485
+ overlay_runner: OverlayRunner,
486
+ union_runner: UnionRunner,
512
487
  ) -> list[GeoDataFrame]:
513
488
  merged = []
514
489
 
515
490
  difference_left = _difference(
516
- pairs, df1, left, grid_size=grid_size, geom_type=geom_type, n_jobs=n_jobs
491
+ pairs,
492
+ df1,
493
+ left,
494
+ grid_size=grid_size,
495
+ geom_type=geom_type,
496
+ overlay_runner=overlay_runner,
497
+ union_runner=union_runner,
517
498
  )
518
499
  merged += difference_left
519
500
 
@@ -525,7 +506,8 @@ def _symmetric_difference(
525
506
  grid_size=grid_size,
526
507
  rsuffix=rsuffix,
527
508
  geom_type=geom_type,
528
- n_jobs=n_jobs,
509
+ overlay_runner=overlay_runner,
510
+ union_runner=union_runner,
529
511
  )
530
512
  merged.append(clip_right)
531
513
 
@@ -539,9 +521,10 @@ def _difference(
539
521
  pairs: pd.DataFrame,
540
522
  df1: pd.DataFrame,
541
523
  left: np.ndarray,
542
- grid_size: int | float | None = None,
543
- geom_type: str | None = None,
544
- n_jobs: int = 1,
524
+ grid_size: int | float | None,
525
+ geom_type: str | None,
526
+ overlay_runner: OverlayRunner,
527
+ union_runner: UnionRunner,
545
528
  ) -> list[GeoDataFrame]:
546
529
  merged = []
547
530
  if len(left):
@@ -550,7 +533,8 @@ def _difference(
550
533
  df1=df1,
551
534
  grid_size=grid_size,
552
535
  geom_type=geom_type,
553
- n_jobs=n_jobs,
536
+ overlay_runner=overlay_runner,
537
+ union_runner=union_runner,
554
538
  )
555
539
  merged.append(clip_left)
556
540
  diff_left = _add_indices_from_left(df1, left)
@@ -618,7 +602,8 @@ def _shapely_diffclip_left(
618
602
  df1: pd.DataFrame,
619
603
  grid_size: int | float | None,
620
604
  geom_type: str | None,
621
- n_jobs: int,
605
+ overlay_runner: OverlayRunner,
606
+ union_runner: UnionRunner,
622
607
  ) -> pd.DataFrame:
623
608
  """Aggregate areas in right by unique values from left, then erases those from left."""
624
609
  keep_cols = list(df1.columns.difference({"_overlay_index_right"})) + ["geom_right"]
@@ -675,12 +660,14 @@ def _shapely_diffclip_left(
675
660
  }
676
661
  )
677
662
 
678
- agged = pd.Series(
679
- {
680
- i: agg_geoms_partial(geoms)
681
- for i, geoms in agger.groupby(level=0)["geom_right"]
682
- }
683
- )
663
+ agged = union_runner.run(agger["geom_right"], level=0)
664
+ # agged = pd.Series(
665
+
666
+ # {
667
+ # i: agg_geoms_partial(geoms)
668
+ # for i, geoms in agger.groupby(level=0)["geom_right"]
669
+ # }
670
+ # )
684
671
  many_hits_agged["geom_right"] = inverse_index_mapper.map(agged)
685
672
  many_hits_agged = many_hits_agged.drop(columns=["_right_indices"])
686
673
 
@@ -688,15 +675,19 @@ def _shapely_diffclip_left(
688
675
  except IndexError:
689
676
  clip_left = pairs.loc[:, list(keep_cols)]
690
677
 
691
- assert clip_left["geometry"].notna().all()
692
- assert clip_left["geom_right"].notna().all()
678
+ assert clip_left["geometry"].notna().all(), clip_left["geometry"][
679
+ lambda x: x.isna()
680
+ ]
681
+ assert clip_left["geom_right"].notna().all(), clip_left["geom_right"][
682
+ lambda x: x.isna()
683
+ ]
693
684
 
694
- clip_left["geometry"] = _try_difference(
685
+ clip_left["geometry"] = overlay_runner.run(
686
+ difference,
695
687
  clip_left["geometry"].to_numpy(),
696
688
  clip_left["geom_right"].to_numpy(),
697
689
  grid_size=grid_size,
698
690
  geom_type=geom_type,
699
- n_jobs=n_jobs,
700
691
  )
701
692
 
702
693
  return clip_left.drop(columns="geom_right")
@@ -709,7 +700,8 @@ def _shapely_diffclip_right(
709
700
  grid_size: int | float | None,
710
701
  rsuffix: str,
711
702
  geom_type: str | None,
712
- n_jobs: int,
703
+ overlay_runner: OverlayRunner,
704
+ union_runner: UnionRunner,
713
705
  ) -> pd.DataFrame:
714
706
  agg_geoms_partial = functools.partial(_agg_geoms, grid_size=grid_size)
715
707
 
@@ -720,16 +712,22 @@ def _shapely_diffclip_right(
720
712
  one_hit = pairs[only_one].set_index("_overlay_index_right")[
721
713
  ["geom_left", "geometry"]
722
714
  ]
723
- many_hits = (
724
- pairs[~only_one]
725
- .groupby("_overlay_index_right")
726
- .agg(
727
- {
728
- "geom_left": agg_geoms_partial,
729
- "geometry": "first",
730
- }
731
- )
715
+ many_hits_ungrouped = pairs[~only_one].set_index("_overlay_index_right")
716
+ many_hits = pd.DataFrame(index=many_hits_ungrouped.index.unique())
717
+ many_hits["geometry"] = many_hits_ungrouped.groupby(level=0)["geometry"].first()
718
+ many_hits["geom_left"] = union_runner.run(
719
+ many_hits_ungrouped["geom_left"], level=0
732
720
  )
721
+ # many_hits = (
722
+ # pairs[~only_one]
723
+ # .groupby("_overlay_index_right")
724
+ # .agg(
725
+ # {
726
+ # "geom_left": agg_geoms_partial,
727
+ # "geometry": "first",
728
+ # }
729
+ # )
730
+ # )
733
731
  clip_right = (
734
732
  pd.concat([one_hit, many_hits])
735
733
  .join(df2.drop(columns=["geometry"]))
@@ -748,10 +746,15 @@ def _shapely_diffclip_right(
748
746
  }
749
747
  )
750
748
 
751
- assert clip_right["geometry"].notna().all()
752
- assert clip_right["geom_left"].notna().all()
749
+ assert clip_right["geometry"].notna().all(), clip_right["geometry"][
750
+ lambda x: x.isna()
751
+ ]
752
+ assert clip_right["geom_left"].notna().all(), clip_right["geom_left"][
753
+ lambda x: x.isna()
754
+ ]
753
755
 
754
- clip_right["geometry"] = _try_difference(
756
+ clip_right["geometry"] = overlay_runner.run(
757
+ difference,
755
758
  clip_right["geometry"].to_numpy(),
756
759
  clip_right["geom_left"].to_numpy(),
757
760
  grid_size=grid_size,
@@ -761,87 +764,5 @@ def _shapely_diffclip_right(
761
764
  return clip_right.drop(columns="geom_left")
762
765
 
763
766
 
764
- def _try_difference(
765
- left: np.ndarray,
766
- right: np.ndarray,
767
- grid_size: int | float | None,
768
- geom_type: str | None,
769
- n_jobs: int = 1,
770
- ) -> np.ndarray:
771
- """Try difference overlay, then make_valid and retry."""
772
- if n_jobs > 1 and len(left) / n_jobs > 10:
773
- try:
774
- return _run_overlay_joblib_threading(
775
- left,
776
- right,
777
- func=difference,
778
- n_jobs=n_jobs,
779
- grid_size=grid_size,
780
- )
781
- except GEOSException:
782
- left = make_valid_and_keep_geom_type(
783
- left, geom_type=geom_type, n_jobs=n_jobs
784
- )
785
- right = make_valid_and_keep_geom_type(
786
- right, geom_type=geom_type, n_jobs=n_jobs
787
- )
788
- left = left.loc[lambda x: x.index.isin(right.index)]
789
- right = right.loc[lambda x: x.index.isin(left.index)]
790
-
791
- return _run_overlay_joblib_threading(
792
- left.to_numpy(),
793
- right.to_numpy(),
794
- func=difference,
795
- n_jobs=n_jobs,
796
- grid_size=grid_size,
797
- )
798
-
799
- try:
800
- return difference(
801
- left,
802
- right,
803
- grid_size=grid_size,
804
- )
805
- except GEOSException:
806
- left = make_valid_and_keep_geom_type(left, geom_type, n_jobs=n_jobs)
807
- right = make_valid_and_keep_geom_type(right, geom_type, n_jobs=n_jobs)
808
- left = left.loc[lambda x: x.index.isin(right.index)]
809
- right = right.loc[lambda x: x.index.isin(left.index)]
810
- try:
811
- return difference(
812
- left.to_numpy(),
813
- right.to_numpy(),
814
- grid_size=grid_size,
815
- )
816
- except GEOSException as e:
817
- raise e.__class__(e, f"{grid_size=}", f"{left=}", f"{right=}") from e
818
-
819
-
820
- def make_valid_and_keep_geom_type(
821
- geoms: np.ndarray, geom_type: str, n_jobs: int
822
- ) -> GeoSeries:
823
- """Make GeometryCollections into (Multi)Polygons, (Multi)LineStrings or (Multi)Points.
824
-
825
- Because GeometryCollections might appear after dissolving (unary_union).
826
- And this makes shapely difference/intersection fail.
827
-
828
- Args:
829
- geoms: Array of geometries.
830
- geom_type: geometry type to be kept.
831
- n_jobs: Number of treads.
832
- """
833
- geoms = GeoSeries(geoms)
834
- geoms.index = range(len(geoms))
835
- geoms.loc[:] = make_valid(geoms.to_numpy())
836
- geoms_with_correct_type = geoms.explode(index_parts=False).pipe(
837
- to_single_geom_type, geom_type
838
- )
839
- only_one = geoms_with_correct_type.groupby(level=0).transform("size") == 1
840
- one_hit = geoms_with_correct_type[only_one]
841
- many_hits = geoms_with_correct_type[~only_one].groupby(level=0).agg(unary_union)
842
- geoms_with_wrong_type = geoms.loc[~geoms.index.isin(geoms_with_correct_type.index)]
843
- return pd.concat([one_hit, many_hits, geoms_with_wrong_type]).sort_index()
844
-
845
-
846
767
  def _agg_geoms(g: np.ndarray, grid_size: int | float | None = None) -> Geometry:
847
- return make_valid(unary_union(g, grid_size=grid_size))
768
+ return make_valid(union_all(g, grid_size=grid_size))