ssb-sgis 1.1.16__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sgis/__init__.py CHANGED
@@ -76,6 +76,10 @@ from .geopandas_tools.polygon_operations import get_holes
76
76
  from .geopandas_tools.polygon_operations import get_polygon_clusters
77
77
  from .geopandas_tools.polygon_operations import split_polygons_by_lines
78
78
  from .geopandas_tools.polygons_as_rings import PolygonsAsRings
79
+ from .geopandas_tools.runners import GridSizeOverlayRunner
80
+ from .geopandas_tools.runners import OverlayRunner
81
+ from .geopandas_tools.runners import RTreeQueryRunner
82
+ from .geopandas_tools.runners import UnionRunner
79
83
  from .geopandas_tools.sfilter import sfilter
80
84
  from .geopandas_tools.sfilter import sfilter_inverse
81
85
  from .geopandas_tools.sfilter import sfilter_split
sgis/conf.py CHANGED
@@ -1,3 +1,6 @@
1
+ from collections.abc import Iterable
2
+ from typing import Any
3
+
1
4
  try:
2
5
  from gcsfs import GCSFileSystem
3
6
 
@@ -66,7 +69,56 @@ except ImportError:
66
69
 
67
70
  file_system = LocalFileSystem
68
71
 
69
- config = {
70
- "n_jobs": 1,
71
- "file_system": file_system,
72
- }
72
+ from .geopandas_tools.runners import OverlayRunner
73
+ from .geopandas_tools.runners import RTreeQueryRunner
74
+ from .geopandas_tools.runners import UnionRunner
75
+
76
+
77
+ class Config:
78
+ """Dictlike config with a 'get_instance' method."""
79
+
80
+ def __init__(self, data: dict) -> None:
81
+ """Initialise with dict."""
82
+ self.data = data
83
+
84
+ def get_instance(self, key: str, *args, **kwargs) -> Any:
85
+ """Get the dict value and call it if callable."""
86
+ x = self.data[key]
87
+ if callable(x):
88
+ return x(*args, **kwargs)
89
+ return x
90
+
91
+ def __getattr__(self, attr: str) -> Any:
92
+ """Get dict attribute."""
93
+ return getattr(self.data, attr)
94
+
95
+ def __getitem__(self, key: str) -> Any:
96
+ """Get dict value."""
97
+ return self.data[key]
98
+
99
+ def __setitem__(self, key: str, value) -> None:
100
+ """Set dict value."""
101
+ self.data[key] = value
102
+
103
+ def __iter__(self) -> Iterable[str]:
104
+ """Iterate over dict keys."""
105
+ return iter(self.data)
106
+
107
+ def __len__(self) -> int:
108
+ """Length of dict."""
109
+ return len(self.data)
110
+
111
+ def __str__(self) -> str:
112
+ """String representation of dict."""
113
+ return str(self.data)
114
+
115
+
116
+ config = Config(
117
+ {
118
+ "n_jobs": 1,
119
+ "file_system": file_system,
120
+ "rtree_runner": RTreeQueryRunner,
121
+ "overlay_runner": OverlayRunner,
122
+ "union_runner": UnionRunner,
123
+ }
124
+ )
@@ -23,12 +23,13 @@ from geopandas import GeoDataFrame
23
23
  from geopandas import GeoSeries
24
24
  from shapely import get_num_geometries
25
25
 
26
+ from ..conf import config
26
27
  from ..parallel.parallel import Parallel
27
- from .general import _parallel_unary_union
28
- from .general import _unary_union_for_notna
29
28
  from .geometry_types import make_all_singlepart
30
29
  from .polygon_operations import get_cluster_mapper
31
30
  from .polygon_operations import get_grouped_centroids
31
+ from .runners import UnionRunner
32
+ from .utils import _unary_union_for_notna
32
33
 
33
34
 
34
35
  def _decide_ignore_index(kwargs: dict) -> tuple[dict, bool]:
@@ -53,8 +54,8 @@ def buffdissexp(
53
54
  index_parts: bool = False,
54
55
  copy: bool = True,
55
56
  grid_size: float | int | None = None,
56
- n_jobs: int = 1,
57
57
  join_style: int | str = "round",
58
+ n_jobs: int = 1,
58
59
  **dissolve_kwargs,
59
60
  ) -> GeoDataFrame:
60
61
  """Buffers and dissolves overlapping geometries.
@@ -187,27 +188,27 @@ def _dissolve(
187
188
  gdf: GeoDataFrame,
188
189
  aggfunc: str = "first",
189
190
  grid_size: None | float = None,
190
- n_jobs: int = 1,
191
191
  as_index: bool = True,
192
+ n_jobs: int = 1,
193
+ union_runner: UnionRunner | None = None,
192
194
  **dissolve_kwargs,
193
195
  ) -> GeoDataFrame:
194
-
195
196
  if not len(gdf):
196
197
  return gdf
197
198
 
198
- geom_col = gdf._geometry_column_name
199
+ if union_runner is None:
200
+ union_runner = config.get_instance("union_runner", n_jobs)
199
201
 
200
- gdf[geom_col] = gdf[geom_col].make_valid()
202
+ geom_col = gdf.geometry.name
203
+ by = dissolve_kwargs.pop("by", None)
204
+ by_was_none = not bool(by)
201
205
 
206
+ # make sure geometries are dissolved rowwise to make dissolving simpler later
202
207
  more_than_one = get_num_geometries(gdf.geometry.values) > 1
203
208
  gdf.loc[more_than_one, geom_col] = gdf.loc[more_than_one, geom_col].apply(
204
209
  _unary_union_for_notna
205
210
  )
206
211
 
207
- by = dissolve_kwargs.pop("by", None)
208
-
209
- by_was_none = not bool(by)
210
-
211
212
  if by is None and dissolve_kwargs.get("level") is None:
212
213
  by = np.zeros(len(gdf), dtype="int64")
213
214
  other_cols = list(gdf.columns.difference({geom_col}))
@@ -215,30 +216,23 @@ def _dissolve(
215
216
  if isinstance(by, str):
216
217
  by = [by]
217
218
  other_cols = list(gdf.columns.difference({geom_col} | set(by or {})))
218
-
219
219
  try:
220
220
  is_one_hit = (
221
221
  gdf.groupby(by, as_index=True, **dissolve_kwargs).transform("size") == 1
222
222
  )
223
223
  except IndexError:
224
- # if no rows when dropna=True
224
+ # if no rows after dropping na if dropna=True
225
225
  original_by = [x for x in by]
226
226
  query = gdf[by.pop(0)].notna()
227
227
  for col in gdf[by]:
228
228
  query &= gdf[col].notna()
229
229
  gdf = gdf.loc[query]
230
230
  assert not len(gdf), gdf
231
- if not by_was_none and as_index:
232
- try:
233
- gdf = gdf.set_index(original_by)
234
- except Exception as e:
235
- print(gdf)
236
- print(original_by)
237
- raise e
238
-
231
+ if as_index and not by_was_none:
232
+ gdf = gdf.set_index(original_by)
239
233
  return gdf
240
234
 
241
- if not by_was_none and as_index:
235
+ if as_index and not by_was_none:
242
236
  one_hit = gdf[is_one_hit].set_index(by)
243
237
  else:
244
238
  one_hit = gdf[is_one_hit]
@@ -250,38 +244,21 @@ def _dissolve(
250
244
  dissolved = many_hits.groupby(by, as_index=True, **dissolve_kwargs)[other_cols].agg(
251
245
  aggfunc
252
246
  )
253
-
254
- if n_jobs > 1:
255
- try:
256
- agged = _parallel_unary_union(
257
- many_hits,
258
- n_jobs=n_jobs,
259
- by=by,
260
- grid_size=grid_size,
261
- as_index=True,
262
- **dissolve_kwargs,
263
- )
264
- dissolved[geom_col] = agged
265
- return GeoDataFrame(dissolved, geometry=geom_col, crs=gdf.crs)
266
- except Exception as e:
267
- print(e, dissolved, agged, many_hits)
268
- raise e
269
-
270
- geoms_agged = many_hits.groupby(by, **dissolve_kwargs)[geom_col].agg(
271
- lambda x: _unary_union_for_notna(x, grid_size=grid_size)
247
+ dissolved[geom_col] = union_runner.run(
248
+ many_hits,
249
+ by=by,
250
+ grid_size=grid_size,
251
+ as_index=True,
252
+ **dissolve_kwargs,
272
253
  )
273
-
274
- dissolved[geom_col] = geoms_agged
275
-
276
254
  if not as_index:
277
255
  dissolved = dissolved.reset_index()
278
-
279
256
  try:
280
257
  return GeoDataFrame(
281
258
  pd.concat([dissolved, one_hit]).sort_index(), geometry=geom_col, crs=gdf.crs
282
259
  )
283
260
  except TypeError as e:
284
- raise e.__class__(e, dissolved.index, one_hit.index) from e
261
+ raise e.__class__(f"{e}. {dissolved.index}. {one_hit.index}") from e
285
262
 
286
263
 
287
264
  def diss(
@@ -582,7 +559,7 @@ def buff(
582
559
  if copy:
583
560
  gdf = gdf.copy()
584
561
 
585
- gdf[gdf._geometry_column_name] = gdf.buffer(
562
+ gdf[gdf.geometry.name] = gdf.buffer(
586
563
  distance, resolution=resolution, join_style=join_style, **buffer_kwargs
587
564
  ).make_valid()
588
565
 
@@ -146,31 +146,24 @@ def to_bbox(
146
146
  except Exception:
147
147
  pass
148
148
 
149
- try:
150
- minx = float(np.min(obj["minx"])) # type: ignore [index]
151
- miny = float(np.min(obj["miny"])) # type: ignore [index]
152
- maxx = float(np.max(obj["maxx"])) # type: ignore [index]
153
- maxy = float(np.max(obj["maxy"])) # type: ignore [index]
154
- return minx, miny, maxx, maxy
155
- except Exception:
156
- pass
157
- try:
158
- minx = float(np.min(obj.minx)) # type: ignore [union-attr]
159
- miny = float(np.min(obj.miny)) # type: ignore [union-attr]
160
- maxx = float(np.max(obj.maxx)) # type: ignore [union-attr]
161
- maxy = float(np.max(obj.maxy)) # type: ignore [union-attr]
162
- return minx, miny, maxx, maxy
163
- except Exception:
164
- pass
165
-
166
- try:
167
- minx = float(np.min(obj["west_longitude"])) # type: ignore [index]
168
- miny = float(np.min(obj["south_latitude"])) # type: ignore [index]
169
- maxx = float(np.max(obj["east_longitude"])) # type: ignore [index]
170
- maxy = float(np.max(obj["north_latitude"])) # type: ignore [index]
171
- return minx, miny, maxx, maxy
172
- except Exception:
173
- pass
149
+ def to_int_if_possible(x):
150
+ if isinstance(x, int) or float(x).is_integer():
151
+ return int(x)
152
+ return float(x)
153
+
154
+ for attrs in [
155
+ ("minx", "miny", "maxx", "maxy"),
156
+ ("xmin", "ymin", "xmax", "xmax"),
157
+ ("west_longitude", "south_latitude", "east_longitude", "north_latitude"),
158
+ ]:
159
+ try:
160
+ return tuple(to_int_if_possible(obj[attr]) for attr in attrs)
161
+ except Exception:
162
+ pass
163
+ try:
164
+ return tuple(to_int_if_possible(getattr(obj, attr)) for attr in attrs)
165
+ except Exception:
166
+ pass
174
167
 
175
168
  if hasattr(obj, "geometry"):
176
169
  try:
@@ -6,21 +6,19 @@ from geopandas import GeoDataFrame
6
6
  from geopandas import GeoSeries
7
7
  from shapely import STRtree
8
8
  from shapely import difference
9
- from shapely import make_valid
10
9
  from shapely import simplify
11
- from shapely.errors import GEOSException
12
10
 
11
+ from ..conf import config
13
12
  from .general import _determine_geom_type_args
14
- from .general import _grouped_unary_union
15
- from .general import _parallel_unary_union_geoseries
16
13
  from .general import _push_geom_col
17
14
  from .general import clean_geoms
18
15
  from .geometry_types import get_geom_type
19
16
  from .geometry_types import make_all_singlepart
20
17
  from .geometry_types import to_single_geom_type
21
- from .overlay import _run_overlay_dask
22
18
  from .overlay import clean_overlay
23
- from .overlay import make_valid_and_keep_geom_type
19
+ from .runners import OverlayRunner
20
+ from .runners import RTreeQueryRunner
21
+ from .runners import UnionRunner
24
22
  from .sfilter import sfilter_inverse
25
23
 
26
24
  PRECISION = 1e-3
@@ -31,8 +29,11 @@ def update_geometries(
31
29
  geom_type: str | None = None,
32
30
  keep_geom_type: bool | None = None,
33
31
  grid_size: int | None = None,
34
- n_jobs: int = 1,
35
32
  predicate: str | None = "intersects",
33
+ n_jobs: int = 1,
34
+ union_runner: UnionRunner | None = None,
35
+ rtree_runner: RTreeQueryRunner | None = None,
36
+ overlay_runner: OverlayRunner | None = None,
36
37
  ) -> GeoDataFrame:
37
38
  """Puts geometries on top of each other rowwise.
38
39
 
@@ -50,8 +51,14 @@ def update_geometries(
50
51
  "line" or "point".
51
52
  grid_size: Precision grid size to round the geometries. Will use the highest
52
53
  precision of the inputs by default.
53
- n_jobs: Number of threads.
54
54
  predicate: Spatial predicate for the spatial tree.
55
+ n_jobs: Number of workers.
56
+ union_runner: Optionally debug/manipulate the spatial union operations.
57
+ See the 'runners' module for example implementations.
58
+ rtree_runner: Optionally debug/manipulate the spatial indexing operations.
59
+ See the 'runners' module for example implementations.
60
+ overlay_runner: Optionally debug/manipulate the spatial overlay operations.
61
+ See the 'runners' module for example implementations.
55
62
 
56
63
  Example:
57
64
  --------
@@ -98,6 +105,16 @@ def update_geometries(
98
105
  if len(gdf) <= 1:
99
106
  return gdf
100
107
 
108
+ if rtree_runner is None:
109
+ rtree_runner = config.get_instance("rtree_runner", n_jobs)
110
+ if union_runner is None:
111
+ union_runner = config.get_instance("union_runner", n_jobs)
112
+ if overlay_runner is None:
113
+ overlay_runner = config.get_instance("overlay_runner", n_jobs)
114
+
115
+ if geom_type == "polygon" or get_geom_type(gdf) == "polygon":
116
+ gdf.geometry = gdf.buffer(0)
117
+
101
118
  copied = make_all_singlepart(clean_geoms(gdf))
102
119
 
103
120
  copied, geom_type, keep_geom_type = _determine_geom_type_args(
@@ -108,66 +125,35 @@ def update_geometries(
108
125
  index_mapper = {i: idx for i, idx in enumerate(copied.index)}
109
126
  copied = copied.reset_index(drop=True)
110
127
 
111
- tree = STRtree(copied.geometry.values)
112
- left, right = tree.query(copied.geometry.values, predicate=predicate)
128
+ left, right = rtree_runner.run(
129
+ copied.geometry.values, copied.geometry.values, predicate=predicate
130
+ )
113
131
  indices = pd.Series(right, index=left).loc[lambda x: x.index > x.values]
114
132
 
115
133
  # select geometries from 'right', index from 'left', dissolve by 'left'
116
134
  erasers = pd.Series(copied.geometry.loc[indices.values].values, index=indices.index)
117
- if n_jobs > 1:
118
- erasers = _parallel_unary_union_geoseries(
119
- erasers,
120
- level=0,
121
- n_jobs=n_jobs,
122
- grid_size=grid_size,
123
- )
124
- erasers = pd.Series(erasers, index=indices.index.unique())
125
- else:
126
- only_one = erasers.groupby(level=0).transform("size") == 1
127
- one_hit = erasers[only_one]
128
- many_hits = _grouped_unary_union(
129
- erasers[~only_one], level=0, grid_size=grid_size
130
- )
131
- erasers = pd.concat([one_hit, many_hits]).sort_index()
135
+ only_one = erasers.groupby(level=0).transform("size") == 1
136
+ one_hit = erasers[only_one]
137
+ many_hits = union_runner.run(erasers[~only_one], level=0, grid_size=grid_size)
138
+ erasers = pd.concat([one_hit, many_hits]).sort_index()
132
139
 
133
140
  # match up the aggregated erasers by index
134
- if n_jobs > 1:
135
- arr1 = copied.geometry.loc[erasers.index].to_numpy()
136
- arr2 = erasers.to_numpy()
137
- try:
138
- erased = _run_overlay_dask(
139
- arr1, arr2, func=difference, n_jobs=n_jobs, grid_size=grid_size
140
- )
141
- except GEOSException:
142
- arr1 = make_valid_and_keep_geom_type(
143
- arr1, geom_type=geom_type, n_jobs=n_jobs
144
- )
145
- arr2 = make_valid_and_keep_geom_type(
146
- arr2, geom_type=geom_type, n_jobs=n_jobs
147
- )
148
- erased = _run_overlay_dask(
149
- arr1, arr2, func=difference, n_jobs=n_jobs, grid_size=grid_size
150
- )
151
- erased = GeoSeries(erased, index=erasers.index)
152
- else:
153
- erased = make_valid(
154
- difference(
155
- copied.geometry.loc[erasers.index],
156
- erasers,
157
- grid_size=grid_size,
158
- )
159
- )
141
+ arr1 = copied.geometry.loc[erasers.index].to_numpy()
142
+ arr2 = erasers.to_numpy()
143
+ erased = overlay_runner.run(
144
+ difference, arr1, arr2, grid_size=grid_size, geom_type=geom_type
145
+ )
160
146
 
147
+ erased = GeoSeries(erased, index=erasers.index)
161
148
  copied.loc[erased.index, geom_col] = erased
162
-
163
149
  copied = copied.loc[~copied.is_empty]
164
-
165
150
  copied.index = copied.index.map(index_mapper)
166
-
167
151
  copied = make_all_singlepart(copied)
168
152
 
169
153
  # TODO check why polygons dissappear in rare cases. For now, just add back the missing
170
- dissapeared = sfilter_inverse(gdf, copied.buffer(-PRECISION))
154
+ dissapeared = sfilter_inverse(
155
+ gdf, copied.buffer(-PRECISION), rtree_runner=rtree_runner
156
+ )
171
157
  copied = pd.concat([copied, dissapeared])
172
158
 
173
159
  # TODO fix dupliates again with dissolve?
@@ -188,7 +174,7 @@ def get_intersections(
188
174
  keep_geom_type: bool | None = None,
189
175
  predicate: str | None = "intersects",
190
176
  grid_size: float | None = None,
191
- n_jobs: int = 1,
177
+ **kwargs,
192
178
  ) -> GeoDataFrame:
193
179
  """Find geometries that intersect in a GeoDataFrame.
194
180
 
@@ -211,6 +197,7 @@ def get_intersections(
211
197
  precision of the inputs by default.
212
198
  n_jobs: Number of threads.
213
199
  predicate: Spatial predicate for the spatial tree.
200
+ **kwargs: Keyword arguments passed to clean_overlay.
214
201
 
215
202
  Returns:
216
203
  A GeoDataFrame of the overlapping polygons.
@@ -283,9 +270,9 @@ def get_intersections(
283
270
  gdf,
284
271
  geom_type,
285
272
  keep_geom_type,
286
- n_jobs=n_jobs,
287
273
  grid_size=grid_size,
288
274
  predicate=predicate,
275
+ **kwargs,
289
276
  ).pipe(clean_geoms)
290
277
 
291
278
  duplicated_geoms.index = duplicated_geoms["orig_idx"].values
@@ -301,9 +288,9 @@ def _get_intersecting_geometries(
301
288
  gdf: GeoDataFrame,
302
289
  geom_type: str | None,
303
290
  keep_geom_type: bool,
304
- n_jobs: int,
305
291
  grid_size: float | None = None,
306
292
  predicate: str | None = None,
293
+ **kwargs,
307
294
  ) -> GeoDataFrame:
308
295
  right = gdf[[gdf._geometry_column_name]]
309
296
  right["idx_right"] = right.index
@@ -327,7 +314,7 @@ def _get_intersecting_geometries(
327
314
  grid_size=grid_size,
328
315
  geom_type=geom_type,
329
316
  keep_geom_type=keep_geom_type,
330
- n_jobs=n_jobs,
317
+ **kwargs,
331
318
  ).loc[are_not_identical]
332
319
  else:
333
320
  if keep_geom_type:
@@ -347,7 +334,7 @@ def _get_intersecting_geometries(
347
334
  grid_size=grid_size,
348
335
  predicate=predicate,
349
336
  geom_type=geom_type,
350
- n_jobs=n_jobs,
337
+ **kwargs,
351
338
  )
352
339
  ]
353
340
  intersected = pd.concat(intersected, ignore_index=True).loc[are_not_identical]
@@ -6,7 +6,6 @@ from collections.abc import Hashable
6
6
  from collections.abc import Iterable
7
7
  from typing import Any
8
8
 
9
- import joblib
10
9
  import numpy as np
11
10
  import pandas as pd
12
11
  import pyproj
@@ -23,7 +22,6 @@ from shapely import get_parts
23
22
  from shapely import linestrings
24
23
  from shapely import make_valid
25
24
  from shapely import points as shapely_points
26
- from shapely import union_all
27
25
  from shapely.geometry import LineString
28
26
  from shapely.geometry import MultiPoint
29
27
  from shapely.geometry import Point
@@ -333,7 +331,7 @@ def sort_large_first(gdf: GeoDataFrame | GeoSeries) -> GeoDataFrame | GeoSeries:
333
331
  # using enumerate, then iloc on the sorted dict keys.
334
332
  # to avoid creating a temporary area column (which doesn't work for GeoSeries).
335
333
  area_mapper = dict(enumerate(gdf.area.values))
336
- sorted_areas = dict(reversed(sorted(area_mapper.items(), key=lambda item: item[1])))
334
+ sorted_areas = dict(reversed(sorted(area_mapper.items(), key=_get_dict_value)))
337
335
  return gdf.iloc[list(sorted_areas)]
338
336
 
339
337
 
@@ -349,9 +347,7 @@ def sort_long_first(gdf: GeoDataFrame | GeoSeries) -> GeoDataFrame | GeoSeries:
349
347
  # using enumerate, then iloc on the sorted dict keys.
350
348
  # to avoid creating a temporary area column (which doesn't work for GeoSeries).
351
349
  length_mapper = dict(enumerate(gdf.length.values))
352
- sorted_lengths = dict(
353
- reversed(sorted(length_mapper.items(), key=lambda item: item[1]))
354
- )
350
+ sorted_lengths = dict(reversed(sorted(length_mapper.items(), key=_get_dict_value)))
355
351
  return gdf.iloc[list(sorted_lengths)]
356
352
 
357
353
 
@@ -367,7 +363,7 @@ def sort_short_first(gdf: GeoDataFrame | GeoSeries) -> GeoDataFrame | GeoSeries:
367
363
  # using enumerate, then iloc on the sorted dict keys.
368
364
  # to avoid creating a temporary area column (which doesn't work for GeoSeries).
369
365
  length_mapper = dict(enumerate(gdf.length.values))
370
- sorted_lengths = dict(sorted(length_mapper.items(), key=lambda item: item[1]))
366
+ sorted_lengths = dict(sorted(length_mapper.items(), key=_get_dict_value))
371
367
  return gdf.iloc[list(sorted_lengths)]
372
368
 
373
369
 
@@ -384,10 +380,14 @@ def sort_small_first(gdf: GeoDataFrame | GeoSeries) -> GeoDataFrame | GeoSeries:
384
380
  # using enumerate, then iloc on the sorted dict keys.
385
381
  # to avoid creating a temporary area column (which doesn't work for GeoSeries).
386
382
  area_mapper = dict(enumerate(gdf.area.values))
387
- sorted_areas = dict(sorted(area_mapper.items(), key=lambda item: item[1]))
383
+ sorted_areas = dict(sorted(area_mapper.items(), key=_get_dict_value))
388
384
  return gdf.iloc[list(sorted_areas)]
389
385
 
390
386
 
387
+ def _get_dict_value(item: tuple[Hashable, Any]) -> Any:
388
+ return item[1]
389
+
390
+
391
391
  def make_lines_between_points(
392
392
  *arrs: NDArray[Point] | GeometryArray | GeoSeries,
393
393
  ) -> NDArray[LineString]:
@@ -1121,79 +1121,3 @@ def _determine_geom_type_args(
1121
1121
  if geom_type == "mixed":
1122
1122
  raise ValueError("Cannot set keep_geom_type=True with mixed geometries")
1123
1123
  return gdf, geom_type, keep_geom_type
1124
-
1125
-
1126
- def _unary_union_for_notna(geoms, **kwargs):
1127
- try:
1128
- return make_valid(union_all(geoms, **kwargs))
1129
- except TypeError:
1130
- return union_all([geom for geom in geoms.dropna().values], **kwargs)
1131
-
1132
-
1133
- def _grouped_unary_union(
1134
- df: GeoDataFrame | GeoSeries | pd.DataFrame | pd.Series,
1135
- by: str | list[str] | None = None,
1136
- level: int | None = None,
1137
- as_index: bool = True,
1138
- grid_size: float | int | None = None,
1139
- dropna: bool = False,
1140
- **kwargs,
1141
- ) -> GeoSeries | GeoDataFrame:
1142
- """Vectorized unary_union for groups.
1143
-
1144
- Experimental. Messy code.
1145
- """
1146
- try:
1147
- geom_col = df._geometry_column_name
1148
- except AttributeError:
1149
- try:
1150
- geom_col = df.name
1151
- if geom_col is None:
1152
- geom_col = "geometry"
1153
- except AttributeError:
1154
- geom_col = "geometry"
1155
-
1156
- if isinstance(df, pd.Series):
1157
- return GeoSeries(
1158
- df.groupby(level=level, as_index=as_index, **kwargs).agg(
1159
- lambda x: _unary_union_for_notna(x, grid_size=grid_size)
1160
- )
1161
- )
1162
-
1163
- return GeoSeries(
1164
- df.groupby(by, level=level, as_index=as_index, **kwargs)[geom_col].agg(
1165
- lambda x: _unary_union_for_notna(x, grid_size=grid_size)
1166
- )
1167
- )
1168
-
1169
-
1170
- def _parallel_unary_union(
1171
- gdf: GeoDataFrame, n_jobs: int = 1, by=None, grid_size=None, **kwargs
1172
- ) -> list[Geometry]:
1173
- try:
1174
- geom_col = gdf._geometry_column_name
1175
- except AttributeError:
1176
- geom_col = "geometry"
1177
-
1178
- with joblib.Parallel(n_jobs=n_jobs, backend="threading") as parallel:
1179
- delayed_operations = []
1180
- for _, geoms in gdf.groupby(by, **kwargs)[geom_col]:
1181
- delayed_operations.append(
1182
- joblib.delayed(_unary_union_for_notna)(geoms, grid_size=grid_size)
1183
- )
1184
-
1185
- return parallel(delayed_operations)
1186
-
1187
-
1188
- def _parallel_unary_union_geoseries(
1189
- ser: GeoSeries, n_jobs: int = 1, grid_size=None, **kwargs
1190
- ) -> list[Geometry]:
1191
-
1192
- with joblib.Parallel(n_jobs=n_jobs, backend="threading") as parallel:
1193
- delayed_operations = []
1194
- for _, geoms in ser.groupby(**kwargs):
1195
- delayed_operations.append(
1196
- joblib.delayed(_unary_union_for_notna)(geoms, grid_size=grid_size)
1197
- )
1198
-
1199
- return parallel(delayed_operations)