ssb-sgis 1.1.17__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,17 +26,15 @@ from shapely.errors import GEOSException
26
26
  from shapely.geometry import LinearRing
27
27
  from shapely.ops import SplitOp
28
28
 
29
+ from ..conf import _get_instance
30
+ from ..conf import config
29
31
  from ..debug_config import _DEBUG_CONFIG
30
32
  from ..debug_config import _try_debug_print
31
33
  from ..maps.maps import explore_locals
32
34
  from .conversion import to_gdf
33
35
  from .conversion import to_geoseries
34
36
  from .duplicates import _get_intersecting_geometries
35
- from .general import _grouped_unary_union
36
- from .general import _parallel_unary_union
37
- from .general import _parallel_unary_union_geoseries
38
37
  from .general import _push_geom_col
39
- from .general import _unary_union_for_notna
40
38
  from .general import clean_geoms
41
39
  from .general import extend_lines
42
40
  from .general import get_grouped_centroids
@@ -46,11 +44,13 @@ from .geometry_types import get_geom_type
46
44
  from .geometry_types import make_all_singlepart
47
45
  from .geometry_types import to_single_geom_type
48
46
  from .neighbors import get_neighbor_indices
49
- from .overlay import _try_difference
50
47
  from .overlay import clean_overlay
51
48
  from .polygons_as_rings import PolygonsAsRings
49
+ from .runners import OverlayRunner
50
+ from .runners import UnionRunner
52
51
  from .sfilter import sfilter
53
52
  from .sfilter import sfilter_inverse
53
+ from .utils import _unary_union_for_notna
54
54
 
55
55
  PRECISION = 1e-3
56
56
  _BUFFER = False
@@ -232,6 +232,8 @@ def eliminate_by_longest(
232
232
  aggfunc: str | dict | list | None = None,
233
233
  grid_size=None,
234
234
  n_jobs: int = 1,
235
+ union_runner: UnionRunner | None = None,
236
+ overlay_runner: OverlayRunner | None = None,
235
237
  **kwargs,
236
238
  ) -> tuple[GeoDataFrame]:
237
239
  """Dissolves selected polygons with the longest bordering neighbor polygon.
@@ -259,6 +261,10 @@ def eliminate_by_longest(
259
261
  (if aggfunc="first").
260
262
  grid_size: Rounding of the coordinates. Defaults to None.
261
263
  n_jobs: Number of threads to use. Defaults to 1.
264
+ union_runner: Optionally debug/manipulate the spatial union operations.
265
+ See the 'runners' module for example implementations.
266
+ overlay_runner: Optionally debug/manipulate the spatial overlay operations.
267
+ See the 'runners' module for example implementations.
262
268
  **kwargs: Keyword arguments passed to the dissolve method.
263
269
 
264
270
  Returns:
@@ -350,6 +356,7 @@ def eliminate_by_longest(
350
356
  keep_geom_type=False,
351
357
  grid_size=grid_size,
352
358
  n_jobs=n_jobs,
359
+ overlay_runner=overlay_runner,
353
360
  ).loc[lambda x: x["_eliminate_idx"].notna()]
354
361
 
355
362
  borders["_length"] = borders.length
@@ -390,6 +397,8 @@ def eliminate_by_longest(
390
397
  fix_double,
391
398
  grid_size=grid_size,
392
399
  n_jobs=n_jobs,
400
+ union_runner=union_runner,
401
+ overlay_runner=overlay_runner,
393
402
  **kwargs,
394
403
  )
395
404
 
@@ -434,6 +443,8 @@ def eliminate_by_longest(
434
443
  ignore_index=ignore_index,
435
444
  aggfunc=aggfunc,
436
445
  grid_size=grid_size,
446
+ union_runner=union_runner,
447
+ overlay_runner=overlay_runner,
437
448
  n_jobs=n_jobs,
438
449
  )
439
450
 
@@ -494,6 +505,8 @@ def eliminate_by_largest(
494
505
  predicate: str = "intersects",
495
506
  grid_size=None,
496
507
  n_jobs: int = 1,
508
+ union_runner: UnionRunner | None = None,
509
+ overlay_runner: OverlayRunner | None = None,
497
510
  **kwargs,
498
511
  ) -> tuple[GeoDataFrame]:
499
512
  """Dissolves selected polygons with the largest neighbor polygon.
@@ -522,6 +535,10 @@ def eliminate_by_largest(
522
535
  predicate: Binary predicate passed to sjoin. Defaults to "intersects".
523
536
  grid_size: Rounding of the coordinates. Defaults to None.
524
537
  n_jobs: Number of threads to use. Defaults to 1.
538
+ union_runner: Optionally debug/manipulate the spatial union operations.
539
+ See the 'runners' module for example implementations.
540
+ overlay_runner: Optionally debug/manipulate the spatial overlay operations.
541
+ See the 'runners' module for example implementations.
525
542
  **kwargs: Keyword arguments passed to the dissolve method.
526
543
 
527
544
  Returns:
@@ -566,6 +583,8 @@ def eliminate_by_largest(
566
583
  fix_double=fix_double,
567
584
  grid_size=grid_size,
568
585
  n_jobs=n_jobs,
586
+ union_runner=union_runner,
587
+ overlay_runner=overlay_runner,
569
588
  **kwargs,
570
589
  )
571
590
 
@@ -581,6 +600,8 @@ def eliminate_by_smallest(
581
600
  fix_double: bool = True,
582
601
  grid_size=None,
583
602
  n_jobs: int = 1,
603
+ union_runner: UnionRunner | None = None,
604
+ overlay_runner: OverlayRunner | None = None,
584
605
  **kwargs,
585
606
  ) -> tuple[GeoDataFrame]:
586
607
  return _eliminate_by_area(
@@ -594,6 +615,8 @@ def eliminate_by_smallest(
594
615
  fix_double=fix_double,
595
616
  grid_size=grid_size,
596
617
  n_jobs=n_jobs,
618
+ union_runner=union_runner,
619
+ overlay_runner=overlay_runner,
597
620
  **kwargs,
598
621
  )
599
622
 
@@ -603,12 +626,14 @@ def _eliminate_by_area(
603
626
  to_eliminate: GeoDataFrame,
604
627
  max_distance: int | float | None,
605
628
  sort_ascending: bool,
606
- ignore_index: bool = False,
607
- aggfunc: str | dict | list | None = None,
608
- predicate="intersects",
609
- fix_double: bool = True,
610
- grid_size=None,
611
- n_jobs: int = 1,
629
+ ignore_index: bool,
630
+ aggfunc: str | dict | list | None,
631
+ predicate: str,
632
+ fix_double: bool,
633
+ grid_size,
634
+ n_jobs: int,
635
+ union_runner: UnionRunner,
636
+ overlay_runner: OverlayRunner,
612
637
  **kwargs,
613
638
  ) -> GeoDataFrame:
614
639
  _recurse = kwargs.pop("_recurse", False)
@@ -667,6 +692,8 @@ def _eliminate_by_area(
667
692
  fix_double=fix_double,
668
693
  grid_size=grid_size,
669
694
  n_jobs=n_jobs,
695
+ union_runner=union_runner,
696
+ overlay_runner=overlay_runner,
670
697
  **kwargs,
671
698
  )
672
699
 
@@ -717,18 +744,14 @@ def _eliminate_by_area(
717
744
  ignore_index=ignore_index,
718
745
  aggfunc=aggfunc,
719
746
  grid_size=grid_size,
747
+ union_runner=union_runner,
748
+ overlay_runner=overlay_runner,
720
749
  n_jobs=n_jobs,
721
750
  )
722
751
 
723
752
  if not was_multiple_gdfs:
724
753
  return out, isolated
725
754
 
726
- for k, v in locals().items():
727
- try:
728
- print(k, v.columns)
729
- except Exception:
730
- pass
731
-
732
755
  gdfs = ()
733
756
  for i, cols in enumerate(original_cols):
734
757
  df = out.loc[out["_df_idx"] == i, cols]
@@ -738,11 +761,26 @@ def _eliminate_by_area(
738
761
 
739
762
 
740
763
  def _eliminate(
741
- gdf, to_eliminate, aggfunc, crs, fix_double, grid_size, n_jobs, **kwargs
764
+ gdf,
765
+ to_eliminate,
766
+ aggfunc,
767
+ crs,
768
+ fix_double,
769
+ grid_size,
770
+ n_jobs,
771
+ overlay_runner,
772
+ union_runner,
773
+ **kwargs,
742
774
  ):
775
+
743
776
  if not len(to_eliminate):
744
777
  return gdf
745
778
 
779
+ if union_runner is None:
780
+ union_runner = _get_instance(config, "union_runner", n_jobs=n_jobs)
781
+ if overlay_runner is None:
782
+ overlay_runner = _get_instance(config, "overlay_runner", n_jobs=n_jobs)
783
+
746
784
  gdf["_range_idx_elim"] = range(len(gdf))
747
785
 
748
786
  in_to_eliminate = gdf["_dissolve_idx"].isin(to_eliminate["_dissolve_idx"])
@@ -798,16 +836,6 @@ def _eliminate(
798
836
  # all_geoms: pd.Series = gdf.set_index("_dissolve_idx").geometry
799
837
  all_geoms: pd.Series = gdf.geometry
800
838
 
801
- # more_than_one = get_num_geometries(all_geoms.values) > 1
802
- # all_geoms.loc[more_than_one] = all_geoms.loc[more_than_one].apply(
803
- # _unary_union_for_notna
804
- # )
805
-
806
- # more_than_one = get_num_geometries(to_be_eliminated.values) > 1
807
- # to_be_eliminated.loc[more_than_one, "geometry"] = to_be_eliminated.loc[
808
- # more_than_one, "geometry"
809
- # ].apply(_unary_union_for_notna)
810
-
811
839
  # create DataFrame of intersection pairs
812
840
  tree = STRtree(all_geoms.values)
813
841
  left, right = tree.query(
@@ -819,8 +847,6 @@ def _eliminate(
819
847
  dict(enumerate(to_be_eliminated.index))
820
848
  )
821
849
 
822
- # pairs = pairs.loc[lambda x: x["right"] != x["_dissolve_idx"]]
823
-
824
850
  soon_erased = to_be_eliminated.iloc[pairs.index]
825
851
  intersecting = all_geoms.iloc[pairs["right"]]
826
852
 
@@ -829,61 +855,31 @@ def _eliminate(
829
855
  intersecting = intersecting[shoud_not_erase]
830
856
 
831
857
  missing = to_be_eliminated.loc[
832
- # (~to_be_eliminated.index.isin(soon_erased.index))
833
- # |
834
- (~to_be_eliminated["_row_idx"].isin(soon_erased["_row_idx"])),
835
- # | (~to_be_eliminated["_row_idx"].isin(soon_erased.index)),
836
- "geometry",
858
+ (~to_be_eliminated["_row_idx"].isin(soon_erased["_row_idx"])), "geometry"
837
859
  ]
838
860
 
839
861
  # allign and aggregate by dissolve index to not get duplicates in difference
840
862
  intersecting.index = soon_erased.index
841
863
 
842
- soon_erased = _grouped_unary_union(soon_erased, level=0, grid_size=grid_size)
843
- intersecting = _grouped_unary_union(intersecting, level=0, grid_size=grid_size)
864
+ soon_erased = union_runner.run(soon_erased, level=0, grid_size=grid_size)
865
+ intersecting = union_runner.run(intersecting, level=0, grid_size=grid_size)
844
866
 
845
867
  assert soon_erased.index.equals(soon_erased.index)
846
868
 
847
- # soon_erased = soon_erased.geometry.groupby(level=0).agg(
848
- # lambda x: unary_union(x, grid_size=grid_size)
849
- # )
850
- # intersecting = intersecting.groupby(level=0).agg(
851
- # lambda x: unary_union(x, grid_size=grid_size)
852
- # )
853
-
854
- # explore_locals(center=_DEBUG_CONFIG["center"])
855
-
856
- soon_erased.loc[:] = _try_difference(
869
+ soon_erased.loc[:] = overlay_runner.run(
870
+ difference,
857
871
  soon_erased.to_numpy(),
858
872
  intersecting.to_numpy(),
859
873
  grid_size=grid_size,
860
- n_jobs=n_jobs,
861
874
  geom_type="polygon",
862
875
  )
863
876
 
864
- missing = _grouped_unary_union(missing, level=0, grid_size=grid_size)
877
+ missing = union_runner.run(missing, level=0, grid_size=grid_size)
865
878
 
866
879
  missing = make_all_singlepart(missing).loc[lambda x: x.area > 0]
867
880
 
868
881
  soon_erased = make_all_singlepart(soon_erased).loc[lambda x: x.area > 0]
869
882
 
870
- if 0:
871
- tree = STRtree(soon_erased.values)
872
- left, right = tree.query(missing.values, predicate="intersects")
873
- explore_locals(
874
- missing2=to_gdf(missing.to_numpy()[left], 25833),
875
- soon_erased2=to_gdf(soon_erased.to_numpy()[right], 25833),
876
- center=_DEBUG_CONFIG["center"],
877
- )
878
- missing = pd.Series(
879
- difference(
880
- missing.to_numpy()[left],
881
- soon_erased.to_numpy()[right],
882
- grid_size=grid_size,
883
- ),
884
- index=left,
885
- ).loc[lambda x: (x.notna()) & (~is_empty(x))]
886
-
887
883
  soon_eliminated = pd.concat([eliminators, soon_erased, missing])
888
884
  more_than_one = get_num_geometries(soon_eliminated.values) > 1
889
885
 
@@ -891,29 +887,13 @@ def _eliminate(
891
887
  _unary_union_for_notna
892
888
  )
893
889
 
894
- if n_jobs > 1:
895
- eliminated["geometry"] = GeoSeries(
896
- _parallel_unary_union_geoseries(
897
- soon_eliminated,
898
- level=0,
899
- grid_size=grid_size,
900
- n_jobs=n_jobs,
901
- ),
902
- index=eliminated.index,
903
- )
904
- else:
905
- eliminated["geometry"] = _grouped_unary_union(soon_eliminated, level=0)
906
- # eliminated["geometry"] = soon_eliminated.groupby(level=0).agg(
907
- # lambda x: make_valid(unary_union(x))
908
- # )
909
-
890
+ eliminated["geometry"] = union_runner.run(
891
+ soon_eliminated, level=0, grid_size=grid_size
892
+ )
910
893
  else:
911
- if n_jobs > 1:
912
- eliminated["geometry"] = _parallel_unary_union(
913
- many_hits, by="_dissolve_idx", grid_size=grid_size, n_jobs=n_jobs
914
- )
915
- else:
916
- eliminated["geometry"] = _grouped_unary_union(many_hits, by="_dissolve_idx")
894
+ eliminated["geometry"] = union_runner.run(
895
+ many_hits, by="_dissolve_idx", grid_size=grid_size, n_jobs=n_jobs
896
+ )
917
897
 
918
898
  # setting crs on the GeometryArrays to avoid warning in concat
919
899
  not_to_dissolve.geometry.values.crs = crs
@@ -0,0 +1,326 @@
1
+ import functools
2
+ from abc import ABC
3
+ from abc import abstractmethod
4
+ from collections.abc import Callable
5
+ from dataclasses import dataclass
6
+ from typing import Any
7
+
8
+ import joblib
9
+ import numpy as np
10
+ import pandas as pd
11
+ from geopandas import GeoDataFrame
12
+ from geopandas import GeoSeries
13
+ from shapely import Geometry
14
+ from shapely import STRtree
15
+ from shapely import get_parts
16
+ from shapely import make_valid
17
+ from shapely import union_all
18
+ from shapely.errors import GEOSException
19
+
20
+ from .utils import _unary_union_for_notna
21
+ from .utils import make_valid_and_keep_geom_type
22
+
23
+
24
+ @dataclass
25
+ class AbstractRunner(ABC):
26
+ """Blueprint for 'runner' classes.
27
+
28
+ Subclasses must implement a 'run' method.
29
+
30
+ Args:
31
+ n_jobs: Number of workers.
32
+ backend: Backend for the workers.
33
+ """
34
+
35
+ n_jobs: int
36
+ backend: str | None = None
37
+
38
+ @abstractmethod
39
+ def run(self, *args, **kwargs) -> Any:
40
+ """Abstract run method."""
41
+
42
+ def __str__(self) -> str:
43
+ """String representation."""
44
+ return (
45
+ f"{self.__class__.__name__}(n_jobs={self.n_jobs}, backend='{self.backend}')"
46
+ )
47
+
48
+
49
+ @dataclass
50
+ class UnionRunner(AbstractRunner):
51
+ """Run shapely.union_all with pandas.groupby.
52
+
53
+ Subclasses must implement a 'run' method that takes the arguments
54
+ 'df' (GeoDataFrame or GeoSeries), 'by' (optional column to group by), 'grid_size'
55
+ (passed to shapely.union_all) and **kwargs passed to pandas.DataFrame.groupby.
56
+ Defaults to None, meaning the default runner with number of workers set
57
+ to 'n_jobs'.
58
+
59
+
60
+ Args:
61
+ n_jobs: Number of workers.
62
+ backend: Backend for the workers.
63
+ """
64
+
65
+ n_jobs: int
66
+ backend: str | None = None
67
+
68
+ def run(
69
+ self,
70
+ df: GeoDataFrame | GeoSeries | pd.DataFrame | pd.Series,
71
+ by: str | list[str] | None = None,
72
+ grid_size: float | int | None = None,
73
+ **kwargs,
74
+ ) -> GeoSeries | GeoDataFrame:
75
+ """Run groupby on geometries in parallel (if n_jobs > 1)."""
76
+ # assume geometry column is 'geometry' if input is pandas.Series og pandas.DataFrame
77
+ try:
78
+ geom_col: str = df.geometry.name
79
+ except AttributeError:
80
+ try:
81
+ geom_col: str | None = df.name
82
+ if geom_col is None:
83
+ geom_col = "geometry"
84
+ except AttributeError:
85
+ geom_col = "geometry"
86
+ try:
87
+ crs = df.crs
88
+ except AttributeError:
89
+ crs = None
90
+
91
+ unary_union_for_grid_size = functools.partial(
92
+ _unary_union_for_notna, grid_size=grid_size
93
+ )
94
+
95
+ as_index = kwargs.pop("as_index", True)
96
+ if by is None and "level" not in kwargs:
97
+ by = np.zeros(len(df), dtype="int64")
98
+
99
+ try:
100
+ # (Geo)DataFrame
101
+ groupby_obj = df.groupby(by, **kwargs)[geom_col]
102
+ except KeyError:
103
+ # (Geo)Series
104
+ groupby_obj = df.groupby(by, **kwargs)
105
+
106
+ if self.n_jobs is None or self.n_jobs == 1:
107
+ results = groupby_obj.agg(unary_union_for_grid_size)
108
+ index = results.index
109
+ else:
110
+ backend = self.backend or "loky"
111
+ with joblib.Parallel(n_jobs=self.n_jobs, backend=backend) as parallel:
112
+ results = parallel(
113
+ joblib.delayed(unary_union_for_grid_size)(geoms)
114
+ for _, geoms in groupby_obj
115
+ )
116
+ index = groupby_obj.size().index
117
+ agged = GeoSeries(results, index=index, name=geom_col, crs=crs)
118
+ if not as_index:
119
+ return agged.reset_index()
120
+ return agged
121
+
122
+
123
+ def _strtree_query(
124
+ arr1: np.ndarray,
125
+ arr2: np.ndarray,
126
+ method: str,
127
+ indices1: np.ndarray | None = None,
128
+ indices2: np.ndarray | None = None,
129
+ **kwargs,
130
+ ):
131
+ tree = STRtree(arr2)
132
+ func = getattr(tree, method)
133
+ left, right = func(arr1, **kwargs)
134
+ if indices1 is not None:
135
+ index_mapper1 = {i: x for i, x in enumerate(indices1)}
136
+ left = np.array([index_mapper1[i] for i in left])
137
+ if indices2 is not None:
138
+ index_mapper2 = {i: x for i, x in enumerate(indices2)}
139
+ right = np.array([index_mapper2[i] for i in right])
140
+ return left, right
141
+
142
+
143
+ @dataclass
144
+ class RTreeQueryRunner(AbstractRunner):
145
+ """Run shapely.STRTree chunkwise.
146
+
147
+ Subclasses must implement a 'query' method that takes a numpy.ndarray
148
+ of geometries as 0th and 1st argument and **kwargs passed to the query method,
149
+ chiefly 'predicate' and 'distance'. The 'query' method should return a tuple
150
+ of two arrays representing the spatial index pairs of the left and right input arrays.
151
+ Defaults to None, meaning the default runner with number of workers set
152
+ to 'n_jobs'.
153
+
154
+ Args:
155
+ n_jobs: Number of workers.
156
+ backend: Backend for the workers.
157
+ """
158
+
159
+ n_jobs: int
160
+ backend: str = "loky"
161
+
162
+ def run(
163
+ self, arr1: np.ndarray, arr2: np.ndarray, method: str = "query", **kwargs
164
+ ) -> tuple[np.ndarray, np.ndarray]:
165
+ """Run a spatial rtree query and return indices of hits from arr1 and arr2 in a tuple of two arrays."""
166
+ if (
167
+ (self.n_jobs or 1) > 1
168
+ and len(arr1) / self.n_jobs > 10_000
169
+ and len(arr1) / len(arr2)
170
+ ):
171
+ chunks = np.array_split(np.arange(len(arr1)), self.n_jobs)
172
+ assert sum(len(x) for x in chunks) == len(arr1)
173
+ with joblib.Parallel(self.n_jobs, backend=self.backend) as parallel:
174
+ results = parallel(
175
+ joblib.delayed(_strtree_query)(
176
+ arr1[chunk],
177
+ arr2,
178
+ method=method,
179
+ indices1=chunk,
180
+ **kwargs,
181
+ )
182
+ for chunk in chunks
183
+ )
184
+ left = np.concatenate([x[0] for x in results])
185
+ right = np.concatenate([x[1] for x in results])
186
+ return left, right
187
+ elif (
188
+ (self.n_jobs or 1) > 1
189
+ and len(arr2) / self.n_jobs > 10_000
190
+ and len(arr2) / len(arr1)
191
+ ):
192
+ chunks = np.array_split(np.arange(len(arr2)), self.n_jobs)
193
+ with joblib.Parallel(self.n_jobs, backend=self.backend) as parallel:
194
+ results = parallel(
195
+ joblib.delayed(_strtree_query)(
196
+ arr1,
197
+ arr2[chunk],
198
+ method=method,
199
+ indices2=chunk,
200
+ **kwargs,
201
+ )
202
+ for chunk in chunks
203
+ )
204
+ left = np.concatenate([x[0] for x in results])
205
+ right = np.concatenate([x[1] for x in results])
206
+ return left, right
207
+
208
+ return _strtree_query(arr1, arr2, method=method, **kwargs)
209
+
210
+
211
+ @dataclass
212
+ class OverlayRunner(AbstractRunner):
213
+ """Run a vectorized shapely overlay operation on two equal-length numpy arrays.
214
+
215
+ Subclasses must implement a 'run' method that takes an overlay function (shapely.intersection, shapely.difference etc.)
216
+ as 0th argument and two numpy.ndarrays of same length as 1st and 2nd argument.
217
+ The 'run' method should also take the argument 'grid_size' to be passed to the overlay function
218
+ and the argument 'geom_type' which is used to keep only relevant geometries (polygon, line or point)
219
+ in cases of GEOSExceptions caused by geometry type mismatch.
220
+ Defaults to an instance of OverlayRunner, which is run sequencially (no n_jobs)
221
+ because the vectorized shapely functions are usually faster than any attempt to parallelize.
222
+ """
223
+
224
+ n_jobs: None = None
225
+ backend: None = None
226
+
227
+ def run(
228
+ self,
229
+ func: Callable,
230
+ arr1: np.ndarray,
231
+ arr2: np.ndarray,
232
+ grid_size: int | float | None,
233
+ geom_type: str | None,
234
+ ) -> np.ndarray:
235
+ """Run the overlay operation (func) with fallback.
236
+
237
+ First tries to run func, then, if GEOSException, geometries are made valid
238
+ and only geometries with correct geom_type (point, line, polygon) are kept
239
+ in GeometryCollections.
240
+ """
241
+ try:
242
+ return func(arr1, arr2, grid_size=grid_size)
243
+ except GEOSException:
244
+ arr1 = make_valid_and_keep_geom_type(arr1, geom_type=geom_type)
245
+ arr2 = make_valid_and_keep_geom_type(arr2, geom_type=geom_type)
246
+ arr1 = arr1.loc[lambda x: x.index.isin(arr2.index)].to_numpy()
247
+ arr2 = arr2.loc[lambda x: x.index.isin(arr1.index)].to_numpy()
248
+ return func(arr1, arr2, grid_size=grid_size)
249
+
250
+
251
+ @dataclass
252
+ class GridSizeOverlayRunner(OverlayRunner):
253
+ """Run a shapely overlay operation rowwise for different grid_sizes until success."""
254
+
255
+ n_jobs: int
256
+ backend: str | None
257
+ grid_sizes: list[float | int] | None = None
258
+
259
+ def __post_init__(self) -> None:
260
+ """Check that grid_sizes is passed."""
261
+ if self.grid_sizes is None:
262
+ raise ValueError(
263
+ f"must set 'grid_sizes' in the {self.__class__.__name__} initialiser."
264
+ )
265
+
266
+ def run(
267
+ self,
268
+ func: Callable,
269
+ arr1: np.ndarray,
270
+ arr2: np.ndarray,
271
+ grid_size: int | float | None = None,
272
+ geom_type: str | None = None,
273
+ ) -> np.ndarray:
274
+ """Run the overlay operation rowwise with fallback.
275
+
276
+ The overlay operation (func) is looped for each row in arr1 and arr2
277
+ as 0th and 1st argument to 'func' and 'grid_size' as keyword argument. If a GEOSException is thrown,
278
+ geometries are made valid and GeometryCollections are forced to either
279
+ (Multi)Point, (Multi)Polygon or (Multi)LineString, depending on the value in "geom_type".
280
+ Then, if Another GEOSException is thrown, the overlay operation is looped for the grid_sizes given
281
+ in the instance's 'grid_sizes' attribute.
282
+
283
+ """
284
+ kwargs = dict(
285
+ grid_size=grid_size,
286
+ geom_type=geom_type.lower() if geom_type is not None else None,
287
+ grid_sizes=self.grid_sizes,
288
+ )
289
+ with joblib.Parallel(self.n_jobs, backend="threading") as parallel:
290
+ return parallel(
291
+ joblib.delayed(_run_overlay_rowwise)(func, g1, g2, **kwargs)
292
+ for g1, g2 in zip(arr1, arr2, strict=True)
293
+ )
294
+
295
+
296
+ def _fix_gemetry_fast(geom: Geometry, geom_type: str | None) -> Geometry:
297
+ geom = make_valid(geom)
298
+ if geom.geom_type == geom_type or geom_type is None:
299
+ return geom
300
+ return union_all([g for g in get_parts(geom) if geom_type in g.geom_type])
301
+
302
+
303
+ def _run_overlay_rowwise(
304
+ func: Callable,
305
+ geom1: Geometry,
306
+ geom2: Geometry,
307
+ grid_size: float | int | None,
308
+ geom_type: str | None,
309
+ grid_sizes: list[float | int],
310
+ ) -> Geometry:
311
+ try:
312
+ return func(geom1, geom2, grid_size=grid_size)
313
+ except GEOSException:
314
+ pass
315
+ geom1 = _fix_gemetry_fast(geom1, geom_type)
316
+ geom2 = _fix_gemetry_fast(geom2, geom_type)
317
+ try:
318
+ return func(geom1, geom2)
319
+ except GEOSException:
320
+ pass
321
+ for i, grid_size in enumerate(grid_sizes):
322
+ try:
323
+ return func(geom1, geom2, grid_size=grid_size)
324
+ except GEOSException as e:
325
+ if i == len(grid_sizes) - 1:
326
+ raise e