ssb-sgis 0.3.8__py3-none-any.whl → 0.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,8 +3,10 @@
3
3
  import networkx as nx
4
4
  import numpy as np
5
5
  import pandas as pd
6
+ import shapely
6
7
  from geopandas import GeoDataFrame, GeoSeries
7
8
  from shapely import (
9
+ STRtree,
8
10
  area,
9
11
  box,
10
12
  buffer,
@@ -20,6 +22,7 @@ from shapely import (
20
22
  )
21
23
  from shapely.errors import GEOSException
22
24
 
25
+ from .duplicates import get_intersections
23
26
  from .general import _push_geom_col, clean_geoms, get_grouped_centroids, to_lines
24
27
  from .geometry_types import get_geom_type, make_all_singlepart, to_single_geom_type
25
28
  from .neighbors import get_neighbor_indices
@@ -152,20 +155,7 @@ def get_polygon_clusters(
152
155
  if not len(concated):
153
156
  return concated.drop("i__", axis=1).assign(**{cluster_col: []})
154
157
 
155
- neighbors = get_neighbor_indices(concated, concated, predicate=predicate)
156
-
157
- edges = [(source, target) for source, target in neighbors.items()]
158
-
159
- graph = nx.Graph()
160
- graph.add_edges_from(edges)
161
-
162
- component_mapper = {
163
- j: i
164
- for i, component in enumerate(nx.connected_components(graph))
165
- for j in component
166
- }
167
-
168
- concated[cluster_col] = component_mapper
158
+ concated[cluster_col] = get_cluster_mapper(concated, predicate)
169
159
 
170
160
  if as_string:
171
161
  concated[cluster_col] = get_grouped_centroids(concated, groupby=cluster_col)
@@ -188,15 +178,33 @@ def get_polygon_clusters(
188
178
  return unconcated
189
179
 
190
180
 
181
+ def get_cluster_mapper(gdf, predicate="intersects"):
182
+ if not gdf.index.is_unique:
183
+ raise ValueError("Index must be unique")
184
+ neighbors = get_neighbor_indices(gdf, gdf, predicate=predicate)
185
+
186
+ edges = [(source, target) for source, target in neighbors.items()]
187
+
188
+ graph = nx.Graph()
189
+ graph.add_edges_from(edges)
190
+
191
+ return {
192
+ j: i
193
+ for i, component in enumerate(nx.connected_components(graph))
194
+ for j in component
195
+ }
196
+
197
+
191
198
  def eliminate_by_longest(
192
- gdf: GeoDataFrame,
199
+ gdf: GeoDataFrame | list[GeoDataFrame],
193
200
  to_eliminate: GeoDataFrame,
194
201
  *,
195
202
  remove_isolated: bool = False,
203
+ fix_double: bool = True,
196
204
  ignore_index: bool = False,
197
205
  aggfunc: str | dict | list | None = None,
198
206
  **kwargs,
199
- ) -> GeoDataFrame:
207
+ ) -> GeoDataFrame | tuple[GeoDataFrame]:
200
208
  """Dissolves selected polygons with the longest bordering neighbor polygon.
201
209
 
202
210
  Eliminates selected geometries by dissolving them with the neighboring
@@ -206,11 +214,14 @@ def eliminate_by_longest(
206
214
  Note that this might be a lot slower than eliminate_by_largest.
207
215
 
208
216
  Args:
209
- gdf: GeoDataFrame with polygon geometries.
217
+ gdf: GeoDataFrame with polygon geometries, or a list of GeoDataFrames.
210
218
  to_eliminate: The geometries to be eliminated by 'gdf'.
211
219
  remove_isolated: If False (default), polygons in 'to_eliminate' that share
212
220
  no border with any polygon in 'gdf' will be kept. If True, the isolated
213
221
  polygons will be removed.
222
+ fix_double: If True, geometries to be eliminated will be erased by overlapping
223
+ geometries to not get double surfaces if the geometries in 'to_eliminate'
224
+ overlaps with multiple geometries in 'gdf'.
214
225
  ignore_index: If False (default), the resulting GeoDataFrame will keep the
215
226
  index of the large polygons. If True, the resulting axis will be labeled
216
227
  0, 1, …, n - 1.
@@ -223,8 +234,17 @@ def eliminate_by_longest(
223
234
  kwargs: Keyword arguments passed to the dissolve method.
224
235
 
225
236
  Returns:
226
- The GeoDataFrame with the small polygons dissolved into the large polygons.
237
+ The GeoDataFrame (gdf) with the geometries of 'to_eliminate' dissolved in.
238
+ If multiple GeoDataFrame are passed as 'gdf', they are returned as a tuple.
227
239
  """
240
+ if isinstance(gdf, (list, tuple)):
241
+ # concat, then break up the dataframes in the end
242
+ was_multiple_gdfs = True
243
+ original_cols = [df.columns for df in gdf]
244
+ gdf = pd.concat(df.assign(**{"_df_idx": i}) for i, df in enumerate(gdf))
245
+ else:
246
+ was_multiple_gdfs = False
247
+
228
248
  crs = gdf.crs
229
249
  geom_type = get_geom_type(gdf)
230
250
 
@@ -234,70 +254,95 @@ def eliminate_by_longest(
234
254
 
235
255
  gdf = gdf.reset_index(drop=True)
236
256
 
237
- gdf["poly_idx"] = gdf.index
238
- to_eliminate = to_eliminate.assign(eliminate_idx=lambda x: range(len(x)))
257
+ gdf["_dissolve_idx"] = gdf.index
258
+ to_eliminate = to_eliminate.assign(_eliminate_idx=lambda x: range(len(x)))
239
259
 
240
260
  # convert to lines to get the borders
241
- lines_gdf = to_lines(gdf[["poly_idx", "geometry"]], copy=False)
242
- lines_eliminate = to_lines(to_eliminate[["eliminate_idx", "geometry"]], copy=False)
261
+ lines_eliminate = to_lines(to_eliminate[["_eliminate_idx", "geometry"]])
243
262
 
244
- borders = lines_gdf.overlay(lines_eliminate, keep_geom_type=True).loc[
245
- lambda x: x["eliminate_idx"].notna()
246
- ]
263
+ borders = (
264
+ gdf[["_dissolve_idx", "geometry"]]
265
+ .overlay(lines_eliminate, keep_geom_type=False)
266
+ .loc[lambda x: x["_eliminate_idx"].notna()]
267
+ )
247
268
 
248
269
  borders["_length"] = borders.length
249
270
 
250
271
  # as DataFrame because GeoDataFrame constructor is expensive
251
272
  borders = pd.DataFrame(borders)
252
- gdf = pd.DataFrame(gdf)
253
273
 
254
274
  longest_border = borders.sort_values("_length", ascending=False).drop_duplicates(
255
- "eliminate_idx"
275
+ "_eliminate_idx"
256
276
  )
257
277
 
258
- to_poly_idx = longest_border.set_index("eliminate_idx")["poly_idx"]
259
- to_eliminate["_dissolve_idx"] = to_eliminate["eliminate_idx"].map(to_poly_idx)
278
+ to_dissolve_idx = longest_border.set_index("_eliminate_idx")["_dissolve_idx"]
279
+ to_eliminate["_dissolve_idx"] = to_eliminate["_eliminate_idx"].map(to_dissolve_idx)
260
280
 
261
- gdf = gdf.rename(columns={"poly_idx": "_dissolve_idx"}, errors="raise")
281
+ actually_eliminate = to_eliminate.loc[to_eliminate["_dissolve_idx"].notna()]
262
282
 
263
- actually_eliminate = to_eliminate.loc[lambda x: x["_dissolve_idx"].notna()]
283
+ isolated = to_eliminate.loc[to_eliminate["_dissolve_idx"].isna()]
284
+ containing_eliminators = (
285
+ pd.DataFrame(
286
+ isolated.drop(columns="_dissolve_idx").sjoin(
287
+ gdf[["_dissolve_idx", "geometry"]], predicate="contains"
288
+ )
289
+ )
290
+ .drop(columns="index_right")
291
+ .drop_duplicates("_eliminate_idx")
292
+ )
264
293
 
265
- eliminated = _eliminate(gdf, actually_eliminate, aggfunc, crs, **kwargs)
294
+ eliminated = _eliminate(
295
+ pd.DataFrame(gdf),
296
+ pd.concat([actually_eliminate, containing_eliminators]),
297
+ aggfunc,
298
+ crs,
299
+ fix_double,
300
+ **kwargs,
301
+ )
266
302
 
267
- if ignore_index:
268
- eliminated = eliminated.reset_index(drop=True)
269
- else:
303
+ if not ignore_index:
270
304
  eliminated.index = eliminated.index.map(idx_mapper)
271
305
  eliminated.index.name = idx_name
272
306
 
273
- if not remove_isolated:
274
- isolated = to_eliminate.loc[to_eliminate["_dissolve_idx"].isna()]
275
- if len(isolated):
276
- eliminated = pd.concat([eliminated, isolated])
307
+ if not remove_isolated and len(isolated):
308
+ eliminated = pd.concat([eliminated, isolated])
277
309
 
278
310
  eliminated = eliminated.drop(
279
- ["_dissolve_idx", "_length", "eliminate_idx", "poly_idx"],
311
+ ["_dissolve_idx", "_length", "_eliminate_idx", "_dissolve_idx"],
280
312
  axis=1,
281
313
  errors="ignore",
282
314
  )
283
315
 
284
316
  out = GeoDataFrame(eliminated, geometry="geometry", crs=crs).pipe(clean_geoms)
317
+
285
318
  if geom_type != "mixed":
286
- return to_single_geom_type(out, geom_type)
287
- return out
319
+ out = to_single_geom_type(out, geom_type)
320
+
321
+ out = out.reset_index(drop=True) if ignore_index else out
322
+
323
+ if not was_multiple_gdfs:
324
+ return out
325
+
326
+ gdfs = ()
327
+ for i, cols in enumerate(original_cols):
328
+ df = out.loc[out["_df_idx"] == i, cols]
329
+ gdfs += (df,)
330
+
331
+ return gdfs
288
332
 
289
333
 
290
334
  def eliminate_by_largest(
291
- gdf: GeoDataFrame,
335
+ gdf: GeoDataFrame | list[GeoDataFrame],
292
336
  to_eliminate: GeoDataFrame,
293
337
  *,
294
338
  max_distance: int | float | None = None,
295
339
  remove_isolated: bool = False,
340
+ fix_double: bool = False,
296
341
  ignore_index: bool = False,
297
342
  aggfunc: str | dict | list | None = None,
298
343
  predicate: str = "intersects",
299
344
  **kwargs,
300
- ) -> GeoDataFrame:
345
+ ) -> GeoDataFrame | tuple[GeoDataFrame]:
301
346
  """Dissolves selected polygons with the largest neighbor polygon.
302
347
 
303
348
  Eliminates selected geometries by dissolving them with the neighboring
@@ -305,11 +350,14 @@ def eliminate_by_largest(
305
350
  large polygons will be kept, unless else is specified.
306
351
 
307
352
  Args:
308
- gdf: GeoDataFrame with polygon geometries.
353
+ gdf: GeoDataFrame with polygon geometries, or a list of GeoDataFrames.
309
354
  to_eliminate: The geometries to be eliminated by 'gdf'.
310
355
  remove_isolated: If False (default), polygons in 'to_eliminate' that share
311
356
  no border with any polygon in 'gdf' will be kept. If True, the isolated
312
357
  polygons will be removed.
358
+ fix_double: If True, geometries to be eliminated will be erased by overlapping
359
+ geometries to not get double surfaces if the geometries in 'to_eliminate'
360
+ overlaps with multiple geometries in 'gdf'.
313
361
  ignore_index: If False (default), the resulting GeoDataFrame will keep the
314
362
  index of the large polygons. If True, the resulting axis will be labeled
315
363
  0, 1, …, n - 1.
@@ -323,8 +371,9 @@ def eliminate_by_largest(
323
371
  kwargs: Keyword arguments passed to the dissolve method.
324
372
 
325
373
  Returns:
326
- The GeoDataFrame with the selected polygons dissolved into the polygons of
327
- 'gdf'.
374
+ The GeoDataFrame (gdf) with the geometries of 'to_eliminate' dissolved in.
375
+ If multiple GeoDataFrame are passed as 'gdf', they are returned as a tuple.
376
+
328
377
  """
329
378
  return _eliminate_by_area(
330
379
  gdf,
@@ -335,12 +384,13 @@ def eliminate_by_largest(
335
384
  sort_ascending=False,
336
385
  aggfunc=aggfunc,
337
386
  predicate=predicate,
387
+ fix_double=fix_double,
338
388
  **kwargs,
339
389
  )
340
390
 
341
391
 
342
392
  def eliminate_by_smallest(
343
- gdf: GeoDataFrame,
393
+ gdf: GeoDataFrame | list[GeoDataFrame],
344
394
  to_eliminate: GeoDataFrame,
345
395
  *,
346
396
  max_distance: int | float | None = None,
@@ -348,8 +398,9 @@ def eliminate_by_smallest(
348
398
  ignore_index: bool = False,
349
399
  aggfunc: str | dict | list | None = None,
350
400
  predicate: str = "intersects",
401
+ fix_double: bool = False,
351
402
  **kwargs,
352
- ) -> GeoDataFrame:
403
+ ) -> GeoDataFrame | tuple[GeoDataFrame]:
353
404
  return _eliminate_by_area(
354
405
  gdf,
355
406
  to_eliminate=to_eliminate,
@@ -359,6 +410,7 @@ def eliminate_by_smallest(
359
410
  sort_ascending=True,
360
411
  aggfunc=aggfunc,
361
412
  predicate=predicate,
413
+ fix_double=fix_double,
362
414
  **kwargs,
363
415
  )
364
416
 
@@ -372,8 +424,16 @@ def _eliminate_by_area(
372
424
  ignore_index: bool = False,
373
425
  aggfunc: str | dict | list | None = None,
374
426
  predicate="intersects",
427
+ fix_double: bool = False,
375
428
  **kwargs,
376
429
  ) -> GeoDataFrame:
430
+ if isinstance(gdf, (list, tuple)):
431
+ was_multiple_gdfs = True
432
+ original_cols = [df.columns for df in gdf]
433
+ gdf = pd.concat(df.assign(**{"_df_idx": i}) for i, df in enumerate(gdf))
434
+ else:
435
+ was_multiple_gdfs = False
436
+
377
437
  crs = gdf.crs
378
438
  geom_type = get_geom_type(gdf)
379
439
 
@@ -408,11 +468,9 @@ def _eliminate_by_area(
408
468
 
409
469
  notna = joined.loc[lambda x: x["_dissolve_idx"].notna()]
410
470
 
411
- eliminated = _eliminate(gdf, notna, aggfunc, crs, **kwargs)
471
+ eliminated = _eliminate(gdf, notna, aggfunc, crs, fix_double=fix_double, **kwargs)
412
472
 
413
- if ignore_index:
414
- eliminated = eliminated.reset_index(drop=True)
415
- else:
473
+ if not ignore_index:
416
474
  eliminated.index = eliminated.index.map(idx_mapper)
417
475
  eliminated.index.name = idx_name
418
476
 
@@ -422,7 +480,7 @@ def _eliminate_by_area(
422
480
  eliminated = pd.concat([eliminated, isolated])
423
481
 
424
482
  eliminated = eliminated.drop(
425
- ["_dissolve_idx", "_area", "eliminate_idx", "poly_idx"],
483
+ ["_dissolve_idx", "_area", "_eliminate_idx", "_dissolve_idx"],
426
484
  axis=1,
427
485
  errors="ignore",
428
486
  )
@@ -430,11 +488,22 @@ def _eliminate_by_area(
430
488
  out = GeoDataFrame(eliminated, geometry="geometry", crs=crs).pipe(clean_geoms)
431
489
 
432
490
  if geom_type != "mixed":
433
- return to_single_geom_type(out, geom_type)
434
- return out
491
+ out = to_single_geom_type(out, geom_type)
435
492
 
493
+ out = out.reset_index(drop=True) if ignore_index else out
436
494
 
437
- def _eliminate(gdf, to_eliminate, aggfunc, crs, **kwargs):
495
+ if not was_multiple_gdfs:
496
+ return out
497
+
498
+ gdfs = ()
499
+ for i, cols in enumerate(original_cols):
500
+ df = out.loc[out["_df_idx"] == i, cols]
501
+ gdfs += (df,)
502
+
503
+ return gdfs
504
+
505
+
506
+ def _eliminate(gdf, to_eliminate, aggfunc, crs, fix_double, **kwargs):
438
507
  if not len(to_eliminate):
439
508
  return gdf
440
509
 
@@ -442,9 +511,11 @@ def _eliminate(gdf, to_eliminate, aggfunc, crs, **kwargs):
442
511
  to_dissolve = gdf.loc[in_to_eliminate]
443
512
  not_to_dissolve = gdf.loc[~in_to_eliminate].set_index("_dissolve_idx")
444
513
 
514
+ to_eliminate["_to_eliminate"] = 1
515
+
445
516
  if aggfunc is None:
446
517
  concatted = pd.concat(
447
- [to_dissolve, to_eliminate[["_dissolve_idx", "geometry"]]]
518
+ [to_dissolve, to_eliminate[["_to_eliminate", "_dissolve_idx", "geometry"]]]
448
519
  )
449
520
  aggfunc = "first"
450
521
  else:
@@ -464,6 +535,7 @@ def _eliminate(gdf, to_eliminate, aggfunc, crs, **kwargs):
464
535
  if not len(many_hits):
465
536
  return one_hit
466
537
 
538
+ # aggregate all columns except geometry
467
539
  kwargs.pop("as_index", None)
468
540
  eliminated = (
469
541
  many_hits.drop(columns="geometry")
@@ -472,11 +544,145 @@ def _eliminate(gdf, to_eliminate, aggfunc, crs, **kwargs):
472
544
  .drop(["_area"], axis=1, errors="ignore")
473
545
  )
474
546
 
475
- eliminated["geometry"] = many_hits.groupby("_dissolve_idx")["geometry"].agg(
476
- lambda x: make_valid(unary_union(x.values))
477
- )
547
+ # aggregate geometry
548
+ if fix_double:
549
+ assert eliminated.index.is_unique
550
+
551
+ many_hits = many_hits.set_index("_dissolve_idx")
552
+ many_hits["_row_idx"] = range(len(many_hits))
553
+
554
+ # TODO kan dette fikses trygt med .duplicated og ~x.duplicated?
555
+ eliminators: pd.Series = many_hits.loc[
556
+ many_hits["_to_eliminate"] != 1, "geometry"
557
+ ]
558
+ to_be_eliminated = many_hits.loc[many_hits["_to_eliminate"] == 1]
559
+
560
+ if 0:
561
+ tree = STRtree(eliminators.values)
562
+ left, right = tree.query(
563
+ to_be_eliminated.geometry.values, predicate="intersects"
564
+ )
565
+ pairs = pd.Series(right, index=left).to_frame("right")
566
+ pairs["_dissolve_idx"] = pairs.index.map(
567
+ dict(enumerate(to_be_eliminated.index))
568
+ )
569
+
570
+ soon_erased = to_be_eliminated.iloc[pairs.index]
571
+ intersecting = eliminators.iloc[pairs["right"]]
572
+
573
+ intersecting.index = soon_erased.index
574
+ soon_erased = soon_erased.geometry.groupby(level=0).agg(unary_union)
575
+ intersecting = intersecting.groupby(level=0).agg(unary_union)
576
+
577
+ soon_erased.loc[:] = difference(
578
+ soon_erased.values,
579
+ intersecting.values,
580
+ )
581
+ intersecting.loc[:] = difference(
582
+ intersecting.values,
583
+ soon_erased.values,
584
+ )
585
+
586
+ eliminated["geometry"] = (
587
+ pd.concat([intersecting, soon_erased])
588
+ .groupby(level=0)
589
+ .agg(lambda x: make_valid(unary_union(x.dropna().values)))
590
+ )
591
+ from ..maps.maps import explore, explore_locals
592
+
593
+ explore_locals()
594
+
595
+ # all_geoms: pd.Series = gdf.set_index("_dissolve_idx").geometry
596
+ all_geoms: pd.Series = gdf.geometry
597
+
598
+ tree = STRtree(all_geoms.values)
599
+ left, right = tree.query(
600
+ to_be_eliminated.geometry.values, predicate="intersects"
601
+ )
602
+ pairs = pd.Series(right, index=left).to_frame("right")
603
+ pairs["_dissolve_idx"] = pairs.index.map(
604
+ dict(enumerate(to_be_eliminated.index))
605
+ )
606
+
607
+ # pairs = pairs.loc[lambda x: x["right"] != x["_dissolve_idx"]]
608
+
609
+ soon_erased = to_be_eliminated.iloc[pairs.index]
610
+ intersecting = all_geoms.iloc[pairs["right"]]
611
+
612
+ shoud_not_erase = soon_erased.index != intersecting.index
613
+ soon_erased = soon_erased[shoud_not_erase]
614
+ intersecting = intersecting[shoud_not_erase]
615
+
616
+ missing = to_be_eliminated.loc[
617
+ # (~to_be_eliminated.index.isin(soon_erased.index))
618
+ # |
619
+ (~to_be_eliminated["_row_idx"].isin(soon_erased["_row_idx"])),
620
+ # | (~to_be_eliminated["_row_idx"].isin(soon_erased.index)),
621
+ "geometry",
622
+ ]
623
+
624
+ if 0:
625
+ from ..geopandas_tools.conversion import to_gdf
626
+ from ..maps.maps import explore, explore_locals
627
+
628
+ display(pairs)
629
+ display(soon_erased.index.unique())
630
+ display(soon_erased._row_idx.unique())
631
+ display(to_be_eliminated.index.unique())
632
+ display(to_be_eliminated._row_idx.unique())
633
+ display(missing.index.unique())
634
+
635
+ display(soon_erased)
636
+ display(to_be_eliminated)
637
+ display(missing)
478
638
 
479
- # setting crs on geometryarray to avoid warning in concat
639
+ explore(
640
+ to_gdf(soon_erased, 25833), intersecting=to_gdf(intersecting, 25833)
641
+ )
642
+ for j, ((i, g), (i2, g2)) in enumerate(
643
+ zip(intersecting.items(), soon_erased.geometry.items())
644
+ ):
645
+ explore(
646
+ to_gdf(g, 25833).assign(ii=i, j=j),
647
+ g2=to_gdf(g2, 25833).assign(ii=i2, j=j),
648
+ )
649
+
650
+ if 0:
651
+ explore(to_gdf(to_be_eliminated.iloc[[16]]))
652
+ explore(to_gdf(to_be_eliminated.iloc[[15]]))
653
+ explore(to_gdf(to_be_eliminated.iloc[[0]]))
654
+ print("hei")
655
+ explore(to_gdf(soon_erased.loc[soon_erased.index == 16]))
656
+ explore(to_gdf(soon_erased.loc[soon_erased.index == 36]))
657
+
658
+ explore(to_gdf(soon_erased.loc[soon_erased._row_idx == 16]))
659
+ explore(to_gdf(soon_erased.loc[soon_erased._row_idx == 36]))
660
+
661
+ # allign and aggregate by dissolve index to not get duplicates in difference
662
+ intersecting.index = soon_erased.index
663
+ soon_erased = soon_erased.geometry.groupby(level=0).agg(unary_union)
664
+ intersecting = intersecting.groupby(level=0).agg(unary_union)
665
+
666
+ # from ..maps.maps import explore_locals
667
+ # explore_locals()
668
+
669
+ soon_erased.loc[:] = difference(
670
+ soon_erased.values,
671
+ intersecting.values,
672
+ )
673
+
674
+ eliminated["geometry"] = (
675
+ pd.concat([eliminators, soon_erased, missing])
676
+ .groupby(level=0)
677
+ .agg(lambda x: make_valid(unary_union(x.dropna().values)))
678
+ )
679
+
680
+ else:
681
+ eliminated["geometry"] = many_hits.groupby("_dissolve_idx")["geometry"].agg(
682
+ lambda x: make_valid(unary_union(x.values))
683
+ )
684
+
685
+ # setting crs on the GeometryArrays to avoid warning in concat
480
686
  not_to_dissolve.geometry.values.crs = crs
481
687
  eliminated.geometry.values.crs = crs
482
688
  one_hit.geometry.values.crs = crs
@@ -485,7 +691,7 @@ def _eliminate(gdf, to_eliminate, aggfunc, crs, **kwargs):
485
691
 
486
692
  assert all(df.index.name == "_dissolve_idx" for df in to_concat)
487
693
 
488
- return pd.concat(to_concat).sort_index()
694
+ return pd.concat(to_concat).sort_index().drop(columns="_to_eliminate")
489
695
 
490
696
 
491
697
  def close_thin_holes(gdf: GeoDataFrame, tolerance: int | float) -> GeoDataFrame:
@@ -601,33 +807,6 @@ def close_all_holes(
601
807
  return gdf.map(lambda x: _close_all_holes_no_islands(x, all_geoms))
602
808
 
603
809
 
604
- def _close_thin_holes(
605
- gdf: GeoDataFrame | GeoSeries,
606
- tolerance: int | float,
607
- *,
608
- ignore_islands: bool = False,
609
- copy: bool = True,
610
- ) -> GeoDataFrame | GeoSeries:
611
- holes = get_holes(gdf)
612
-
613
- if not len(holes):
614
- return gdf
615
-
616
- if not ignore_islands:
617
- inside_holes = sfilter(gdf, holes, predicate="within")
618
-
619
- def is_thin(x):
620
- return x.buffer(-tolerance).is_empty
621
-
622
- in_between = clean_overlay(
623
- holes, inside_holes, how="difference", grid_size=None
624
- ).loc[is_thin]
625
-
626
- holes = pd.concat([holes, in_between])
627
-
628
- thin_holes = holes.loc[is_thin]
629
-
630
-
631
810
  def close_small_holes(
632
811
  gdf: GeoDataFrame | GeoSeries,
633
812
  max_area: int | float,
@@ -802,6 +981,9 @@ def get_gaps(gdf: GeoDataFrame, include_interiors: bool = False) -> GeoDataFrame
802
981
  include_interiors: If False (default), the holes inside individual polygons
803
982
  will not be included as gaps.
804
983
 
984
+ Note:
985
+ See get_holes to find holes inside singlepart polygons.
986
+
805
987
  Returns:
806
988
  GeoDataFrame of polygons with only a geometry column.
807
989
  """
@@ -815,14 +997,14 @@ def get_gaps(gdf: GeoDataFrame, include_interiors: bool = False) -> GeoDataFrame
815
997
  {"geometry": [box(*tuple(gdf.total_bounds)).buffer(1)]}, crs=gdf.crs
816
998
  )
817
999
 
818
- gaps = make_all_singlepart(
1000
+ bbox_diff = make_all_singlepart(
819
1001
  clean_overlay(bbox, gdf, how="difference", geom_type="polygon")
820
1002
  )
821
1003
 
822
1004
  # remove the outer "gap", i.e. the surrounding area
823
- return sfilter_inverse(gaps, get_exterior_ring(bbox.geometry.values)).reset_index(
824
- drop=True
825
- )
1005
+ bbox_ring = get_exterior_ring(bbox.geometry.values)
1006
+ without_outer_ring = sfilter_inverse(bbox_diff, bbox_ring)
1007
+ return without_outer_ring.reset_index(drop=True)
826
1008
 
827
1009
 
828
1010
  def get_holes(gdf: GeoDataFrame, as_polygons=True) -> GeoDataFrame:
@@ -833,22 +1015,21 @@ def get_holes(gdf: GeoDataFrame, as_polygons=True) -> GeoDataFrame:
833
1015
  as_polygons: If True (default), the holes will be returned as polygons.
834
1016
  If False, they will be returned as LinearRings.
835
1017
 
1018
+ Note:
1019
+ See get_gaps to find holes/gaps between undissolved polygons.
1020
+
836
1021
  Returns:
837
1022
  GeoDataFrame of polygons or linearrings with only a geometry column.
838
1023
  """
839
1024
  if not len(gdf):
840
- return GeoDataFrame({"geometry": []}, crs=gdf.crs)
1025
+ return GeoDataFrame({"geometry": []}, index=gdf.index, crs=gdf.crs)
841
1026
 
842
1027
  def as_linearring(x):
843
1028
  return x
844
1029
 
845
1030
  astype = polygons if as_polygons else as_linearring
846
1031
 
847
- geoms = (
848
- make_all_singlepart(gdf.geometry).to_numpy()
849
- if isinstance(gdf, GeoDataFrame)
850
- else make_all_singlepart(gdf).to_numpy()
851
- )
1032
+ geoms = make_all_singlepart(gdf.geometry).to_numpy()
852
1033
 
853
1034
  rings = [
854
1035
  GeoSeries(astype(get_interior_ring(geoms, i)), crs=gdf.crs)