ssb-sgis 1.0.3__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. sgis/__init__.py +10 -3
  2. sgis/debug_config.py +24 -0
  3. sgis/geopandas_tools/bounds.py +16 -21
  4. sgis/geopandas_tools/buffer_dissolve_explode.py +112 -30
  5. sgis/geopandas_tools/centerlines.py +4 -91
  6. sgis/geopandas_tools/cleaning.py +1576 -583
  7. sgis/geopandas_tools/conversion.py +24 -14
  8. sgis/geopandas_tools/duplicates.py +27 -6
  9. sgis/geopandas_tools/general.py +259 -100
  10. sgis/geopandas_tools/geometry_types.py +1 -1
  11. sgis/geopandas_tools/neighbors.py +16 -12
  12. sgis/geopandas_tools/overlay.py +2 -2
  13. sgis/geopandas_tools/point_operations.py +3 -3
  14. sgis/geopandas_tools/polygon_operations.py +505 -100
  15. sgis/geopandas_tools/polygons_as_rings.py +40 -8
  16. sgis/geopandas_tools/sfilter.py +26 -9
  17. sgis/io/dapla_functions.py +238 -19
  18. sgis/maps/examine.py +11 -10
  19. sgis/maps/explore.py +227 -155
  20. sgis/maps/legend.py +13 -4
  21. sgis/maps/map.py +22 -13
  22. sgis/maps/maps.py +100 -29
  23. sgis/maps/thematicmap.py +25 -18
  24. sgis/networkanalysis/_service_area.py +6 -1
  25. sgis/networkanalysis/cutting_lines.py +12 -5
  26. sgis/networkanalysis/finding_isolated_networks.py +13 -6
  27. sgis/networkanalysis/networkanalysis.py +10 -12
  28. sgis/parallel/parallel.py +27 -10
  29. sgis/raster/base.py +208 -0
  30. sgis/raster/cube.py +3 -3
  31. sgis/raster/image_collection.py +1419 -722
  32. sgis/raster/indices.py +10 -7
  33. sgis/raster/raster.py +7 -7
  34. sgis/raster/sentinel_config.py +33 -17
  35. {ssb_sgis-1.0.3.dist-info → ssb_sgis-1.0.4.dist-info}/METADATA +6 -7
  36. ssb_sgis-1.0.4.dist-info/RECORD +62 -0
  37. ssb_sgis-1.0.3.dist-info/RECORD +0 -61
  38. {ssb_sgis-1.0.3.dist-info → ssb_sgis-1.0.4.dist-info}/LICENSE +0 -0
  39. {ssb_sgis-1.0.3.dist-info → ssb_sgis-1.0.4.dist-info}/WHEEL +0 -0
@@ -1,5 +1,7 @@
1
1
  """Functions for polygon geometries."""
2
2
 
3
+ from collections.abc import Callable
4
+
3
5
  import networkx as nx
4
6
  import numpy as np
5
7
  import pandas as pd
@@ -10,21 +12,35 @@ from shapely import area
10
12
  from shapely import box
11
13
  from shapely import buffer
12
14
  from shapely import difference
15
+ from shapely import extract_unique_points
13
16
  from shapely import get_exterior_ring
14
17
  from shapely import get_interior_ring
18
+ from shapely import get_num_geometries
15
19
  from shapely import get_num_interior_rings
16
20
  from shapely import get_parts
17
21
  from shapely import is_empty
18
22
  from shapely import make_valid
19
23
  from shapely import polygons
20
- from shapely import unary_union
24
+ from shapely import union_all
21
25
  from shapely.errors import GEOSException
22
-
26
+ from shapely.geometry import LinearRing
27
+ from shapely.ops import SplitOp
28
+
29
+ from ..debug_config import _DEBUG_CONFIG
30
+ from ..debug_config import _try_debug_print
31
+ from ..maps.maps import explore_locals
32
+ from .conversion import to_gdf
33
+ from .conversion import to_geoseries
34
+ from .duplicates import _get_intersecting_geometries
35
+ from .general import _grouped_unary_union
23
36
  from .general import _parallel_unary_union
24
37
  from .general import _parallel_unary_union_geoseries
25
38
  from .general import _push_geom_col
39
+ from .general import _unary_union_for_notna
26
40
  from .general import clean_geoms
41
+ from .general import extend_lines
27
42
  from .general import get_grouped_centroids
43
+ from .general import get_line_segments
28
44
  from .general import to_lines
29
45
  from .geometry_types import get_geom_type
30
46
  from .geometry_types import make_all_singlepart
@@ -36,6 +52,9 @@ from .polygons_as_rings import PolygonsAsRings
36
52
  from .sfilter import sfilter
37
53
  from .sfilter import sfilter_inverse
38
54
 
55
+ PRECISION = 1e-3
56
+ _BUFFER = False
57
+
39
58
 
40
59
  def get_polygon_clusters(
41
60
  *gdfs: GeoDataFrame | GeoSeries,
@@ -204,17 +223,16 @@ def get_cluster_mapper(
204
223
 
205
224
 
206
225
  def eliminate_by_longest(
207
- gdf: GeoDataFrame | list[GeoDataFrame],
226
+ gdf: GeoDataFrame | tuple[GeoDataFrame],
208
227
  to_eliminate: GeoDataFrame,
209
228
  *,
210
- remove_isolated: bool = False,
211
229
  fix_double: bool = True,
212
230
  ignore_index: bool = False,
213
231
  aggfunc: str | dict | list | None = None,
214
232
  grid_size=None,
215
233
  n_jobs: int = 1,
216
234
  **kwargs,
217
- ) -> GeoDataFrame | tuple[GeoDataFrame]:
235
+ ) -> tuple[GeoDataFrame]:
218
236
  """Dissolves selected polygons with the longest bordering neighbor polygon.
219
237
 
220
238
  Eliminates selected geometries by dissolving them with the neighboring
@@ -226,9 +244,6 @@ def eliminate_by_longest(
226
244
  Args:
227
245
  gdf: GeoDataFrame with polygon geometries, or a list of GeoDataFrames.
228
246
  to_eliminate: The geometries to be eliminated by 'gdf'.
229
- remove_isolated: If False (default), polygons in 'to_eliminate' that share
230
- no border with any polygon in 'gdf' will be kept. If True, the isolated
231
- polygons will be removed.
232
247
  fix_double: If True, geometries to be eliminated will be erased by overlapping
233
248
  geometries to not get double surfaces if the geometries in 'to_eliminate'
234
249
  overlaps with multiple geometries in 'gdf'.
@@ -246,8 +261,11 @@ def eliminate_by_longest(
246
261
  **kwargs: Keyword arguments passed to the dissolve method.
247
262
 
248
263
  Returns:
249
- The GeoDataFrame (gdf) with the geometries of 'to_eliminate' dissolved in.
250
- If multiple GeoDataFrame are passed as 'gdf', they are returned as a tuple.
264
+ A tuple of the GeoDataFrame with the geometries of 'to_eliminate'
265
+ dissolved in and a GeoDataFrame with the potentionally isolated
266
+ polygons that could not be eliminated. If multiple GeoDataFrame
267
+ are passed as 'gdf', the returned tuple will contain each frame
268
+ plus the isolated polygons as the last item.
251
269
 
252
270
  Examples:
253
271
  ---------
@@ -264,19 +282,21 @@ def eliminate_by_longest(
264
282
  Using multiple GeoDataFrame as input, the sliver is eliminated into the small
265
283
  polygon (because it has the longest border with sliver).
266
284
 
267
- >>> small_poly_eliminated, large_poly_eliminated = sg.eliminate_by_longest(
285
+ >>> small_poly_eliminated, large_poly_eliminated, isolated = sg.eliminate_by_longest(
268
286
  ... [small_poly, large_poly], sliver
269
287
  ... )
270
288
 
271
289
  With only one input GeoDataFrame:
272
290
 
273
291
  >>> polys = pd.concat([small_poly, large_poly])
274
- >>> eliminated = sg.eliminate_by_longest(polys, sliver)
275
-
276
-
292
+ >>> eliminated, isolated = sg.eliminate_by_longest(polys, sliver)
277
293
  """
278
- if not len(to_eliminate):
279
- return gdf
294
+ _recurse = kwargs.pop("_recurse", False)
295
+
296
+ if not len(to_eliminate) or not len(gdf):
297
+ if isinstance(gdf, (list, tuple)):
298
+ return (*gdf, to_eliminate)
299
+ return gdf, to_eliminate
280
300
 
281
301
  if isinstance(gdf, (list, tuple)):
282
302
  # concat, then break up the dataframes in the end
@@ -295,6 +315,28 @@ def eliminate_by_longest(
295
315
 
296
316
  gdf = gdf.reset_index(drop=True)
297
317
 
318
+ # TODO: is it ok to singlepart here?
319
+ gdf = make_all_singlepart(gdf, ignore_index=True).pipe(
320
+ to_single_geom_type, "polygon"
321
+ )
322
+
323
+ if _BUFFER:
324
+ gdf.geometry = gdf.buffer(
325
+ PRECISION,
326
+ resolution=1,
327
+ join_style=2,
328
+ )
329
+
330
+ to_eliminate.geometry = to_eliminate.buffer(
331
+ PRECISION,
332
+ resolution=1,
333
+ join_style=2,
334
+ )
335
+
336
+ # more_than_one = get_num_geometries(to_eliminate.geometry.values) > 1
337
+ # filt = more_than_one, to_eliminate._geometry_column_name
338
+ # to_eliminate.loc[*filt] = to_eliminate.loc[*filt].apply(_unary_union_for_notna)
339
+
298
340
  gdf["_dissolve_idx"] = gdf.index
299
341
  to_eliminate = to_eliminate.assign(_eliminate_idx=lambda x: range(len(x)))
300
342
 
@@ -333,6 +375,11 @@ def eliminate_by_longest(
333
375
  .drop(columns="index_right")
334
376
  .drop_duplicates("_eliminate_idx")
335
377
  )
378
+ isolated = isolated.drop(
379
+ ["_dissolve_idx", "_length", "_eliminate_idx"],
380
+ axis=1,
381
+ errors="ignore",
382
+ )
336
383
 
337
384
  eliminated = _eliminate(
338
385
  pd.DataFrame(gdf),
@@ -349,31 +396,96 @@ def eliminate_by_longest(
349
396
  eliminated.index = eliminated.index.map(idx_mapper)
350
397
  eliminated.index.name = idx_name
351
398
 
352
- if not remove_isolated and len(isolated):
353
- eliminated = pd.concat([eliminated, isolated])
354
-
355
399
  eliminated = eliminated.drop(
356
- ["_dissolve_idx", "_length", "_eliminate_idx", "_dissolve_idx"],
400
+ ["_dissolve_idx", "_length", "_eliminate_idx"],
357
401
  axis=1,
358
402
  errors="ignore",
359
403
  )
360
404
 
361
405
  out = GeoDataFrame(eliminated, geometry="geometry", crs=crs).pipe(clean_geoms)
362
406
 
407
+ if _BUFFER:
408
+ out.geometry = out.buffer(
409
+ -PRECISION,
410
+ resolution=1,
411
+ join_style=2,
412
+ )
413
+ isolated.geometry = isolated.buffer(
414
+ -PRECISION,
415
+ resolution=1,
416
+ join_style=2,
417
+ )
418
+
363
419
  if geom_type != "mixed":
364
420
  out = to_single_geom_type(out, geom_type)
365
421
 
366
422
  out = out.reset_index(drop=True) if ignore_index else out
367
423
 
424
+ _try_debug_print("inni eliminate_by_longest")
425
+ explore_locals(center=_DEBUG_CONFIG["center"])
426
+
427
+ if not _recurse and len(isolated):
428
+ if 0:
429
+ isolated.geometry = isolated.buffer(
430
+ -PRECISION,
431
+ resolution=1,
432
+ join_style=2,
433
+ )
434
+ out, isolated = _recursively_eliminate_new_neighbors(
435
+ out,
436
+ isolated,
437
+ func=eliminate_by_longest,
438
+ fix_double=fix_double,
439
+ ignore_index=ignore_index,
440
+ aggfunc=aggfunc,
441
+ grid_size=grid_size,
442
+ n_jobs=n_jobs,
443
+ )
444
+
445
+ _try_debug_print("inni eliminate_by_longest 2")
446
+ explore_locals(center=_DEBUG_CONFIG["center"])
447
+
448
+ # assert (
449
+ # out[["ARTYPE", "ARTRESLAG", "ARSKOGBON", "ARGRUNNF", "kilde"]]
450
+ # .notna()
451
+ # .all()
452
+ # .all()
453
+ # ), out[["ARTYPE", "ARTRESLAG", "ARSKOGBON", "ARGRUNNF", "kilde"]].sort_values(
454
+ # ["ARTYPE", "ARTRESLAG", "ARSKOGBON", "ARGRUNNF", "kilde"]
455
+ # )
456
+
368
457
  if not was_multiple_gdfs:
369
- return out
458
+ return out, isolated
370
459
 
371
460
  gdfs = ()
372
461
  for i, cols in enumerate(original_cols):
373
462
  df = out.loc[out["_df_idx"] == i, cols]
374
463
  gdfs += (df,)
375
464
 
376
- return gdfs
465
+ return (*gdfs, isolated)
466
+
467
+
468
+ def _recursively_eliminate_new_neighbors(
469
+ df: GeoDataFrame,
470
+ isolated: GeoDataFrame,
471
+ func: Callable,
472
+ **kwargs,
473
+ ):
474
+ len_now = len(isolated)
475
+ while len(isolated):
476
+ _try_debug_print(f"recurse len({len(isolated)})")
477
+ df, isolated = func(
478
+ df,
479
+ isolated,
480
+ _recurse=True,
481
+ **kwargs,
482
+ )
483
+
484
+ if len_now == len(isolated):
485
+ break
486
+ len_now = len(isolated)
487
+
488
+ return df, isolated
377
489
 
378
490
 
379
491
  def eliminate_by_largest(
@@ -381,15 +493,14 @@ def eliminate_by_largest(
381
493
  to_eliminate: GeoDataFrame,
382
494
  *,
383
495
  max_distance: int | float | None = None,
384
- remove_isolated: bool = False,
385
- fix_double: bool = False,
496
+ fix_double: bool = True,
386
497
  ignore_index: bool = False,
387
498
  aggfunc: str | dict | list | None = None,
388
499
  predicate: str = "intersects",
389
500
  grid_size=None,
390
501
  n_jobs: int = 1,
391
502
  **kwargs,
392
- ) -> GeoDataFrame | tuple[GeoDataFrame]:
503
+ ) -> tuple[GeoDataFrame]:
393
504
  """Dissolves selected polygons with the largest neighbor polygon.
394
505
 
395
506
  Eliminates selected geometries by dissolving them with the neighboring
@@ -401,9 +512,6 @@ def eliminate_by_largest(
401
512
  to_eliminate: The geometries to be eliminated by 'gdf'.
402
513
  max_distance: Max distance to search for neighbors. Defaults to None, meaning
403
514
  0.
404
- remove_isolated: If False (default), polygons in 'to_eliminate' that share
405
- no border with any polygon in 'gdf' will be kept. If True, the isolated
406
- polygons will be removed.
407
515
  fix_double: If True, geometries to be eliminated will be erased by overlapping
408
516
  geometries to not get double surfaces if the geometries in 'to_eliminate'
409
517
  overlaps with multiple geometries in 'gdf'.
@@ -422,8 +530,11 @@ def eliminate_by_largest(
422
530
  **kwargs: Keyword arguments passed to the dissolve method.
423
531
 
424
532
  Returns:
425
- The GeoDataFrame (gdf) with the geometries of 'to_eliminate' dissolved in.
426
- If multiple GeoDataFrame are passed as 'gdf', they are returned as a tuple.
533
+ A tuple of the GeoDataFrame with the geometries of 'to_eliminate'
534
+ dissolved in and a GeoDataFrame with the potentionally isolated
535
+ polygons that could not be eliminated. If multiple GeoDataFrame
536
+ are passed as 'gdf', the returned tuple will contain each frame
537
+ plus the isolated polygons as the last item.
427
538
 
428
539
  Examples:
429
540
  ---------
@@ -440,20 +551,18 @@ def eliminate_by_largest(
440
551
  Using multiple GeoDataFrame as input, the sliver is eliminated into
441
552
  the large polygon.
442
553
 
443
- >>> small_poly_eliminated, large_poly_eliminated = sg.eliminate_by_largest(
554
+ >>> small_poly_eliminated, large_poly_eliminated, isolated = sg.eliminate_by_largest(
444
555
  ... [small_poly, large_poly], sliver
445
556
  ... )
446
557
 
447
558
  With only one input GeoDataFrame:
448
559
 
449
560
  >>> polys = pd.concat([small_poly, large_poly])
450
- >>> eliminated = sg.eliminate_by_largest(polys, sliver)
451
-
561
+ >>> eliminated, isolated = sg.eliminate_by_largest(polys, sliver)
452
562
  """
453
563
  return _eliminate_by_area(
454
564
  gdf,
455
565
  to_eliminate=to_eliminate,
456
- remove_isolated=remove_isolated,
457
566
  max_distance=max_distance,
458
567
  ignore_index=ignore_index,
459
568
  sort_ascending=False,
@@ -471,19 +580,17 @@ def eliminate_by_smallest(
471
580
  to_eliminate: GeoDataFrame,
472
581
  *,
473
582
  max_distance: int | float | None = None,
474
- remove_isolated: bool = False,
475
583
  ignore_index: bool = False,
476
584
  aggfunc: str | dict | list | None = None,
477
585
  predicate: str = "intersects",
478
- fix_double: bool = False,
586
+ fix_double: bool = True,
479
587
  grid_size=None,
480
588
  n_jobs: int = 1,
481
589
  **kwargs,
482
- ) -> GeoDataFrame | tuple[GeoDataFrame]:
590
+ ) -> tuple[GeoDataFrame]:
483
591
  return _eliminate_by_area(
484
592
  gdf,
485
593
  to_eliminate=to_eliminate,
486
- remove_isolated=remove_isolated,
487
594
  max_distance=max_distance,
488
595
  ignore_index=ignore_index,
489
596
  sort_ascending=True,
@@ -499,19 +606,21 @@ def eliminate_by_smallest(
499
606
  def _eliminate_by_area(
500
607
  gdf: GeoDataFrame,
501
608
  to_eliminate: GeoDataFrame,
502
- remove_isolated: bool,
503
609
  max_distance: int | float | None,
504
610
  sort_ascending: bool,
505
611
  ignore_index: bool = False,
506
612
  aggfunc: str | dict | list | None = None,
507
613
  predicate="intersects",
508
- fix_double: bool = False,
614
+ fix_double: bool = True,
509
615
  grid_size=None,
510
616
  n_jobs: int = 1,
511
617
  **kwargs,
512
618
  ) -> GeoDataFrame:
513
- if not len(to_eliminate):
514
- return gdf
619
+ _recurse = kwargs.pop("_recurse", False)
620
+
621
+ if not len(to_eliminate) or not len(gdf):
622
+ return gdf, to_eliminate
623
+
515
624
  if isinstance(gdf, (list, tuple)):
516
625
  was_multiple_gdfs = True
517
626
  original_cols = [df.columns for df in gdf]
@@ -525,6 +634,8 @@ def _eliminate_by_area(
525
634
  if not ignore_index:
526
635
  idx_mapper = dict(enumerate(gdf.index))
527
636
  idx_name = gdf.index.name
637
+ idx_mapper_to_eliminate = dict(enumerate(to_eliminate.index))
638
+ idx_name_to_eliminate = to_eliminate.index.name
528
639
 
529
640
  gdf = make_all_singlepart(gdf).reset_index(drop=True)
530
641
  to_eliminate = make_all_singlepart(to_eliminate).reset_index(drop=True)
@@ -564,37 +675,71 @@ def _eliminate_by_area(
564
675
  **kwargs,
565
676
  )
566
677
 
567
- if not ignore_index:
568
- eliminated.index = eliminated.index.map(idx_mapper)
569
- eliminated.index.name = idx_name
570
-
571
- if not remove_isolated:
572
- isolated = joined.loc[joined["_dissolve_idx"].isna()]
573
- if len(isolated):
574
- eliminated = pd.concat([eliminated, isolated])
575
-
576
678
  eliminated = eliminated.drop(
577
679
  ["_dissolve_idx", "_area", "_eliminate_idx", "_dissolve_idx"],
578
680
  axis=1,
579
681
  errors="ignore",
580
682
  )
581
683
 
582
- out = GeoDataFrame(eliminated, geometry="geometry", crs=crs).pipe(clean_geoms)
684
+ out = GeoDataFrame(
685
+ eliminated,
686
+ geometry="geometry",
687
+ crs=crs,
688
+ ).pipe(clean_geoms)
689
+
690
+ isolated = (
691
+ GeoDataFrame(
692
+ joined.loc[joined["_dissolve_idx"].isna()], geometry="geometry", crs=crs
693
+ )
694
+ .drop(
695
+ ["_dissolve_idx", "_area", "_eliminate_idx", "_dissolve_idx"],
696
+ axis=1,
697
+ errors="ignore",
698
+ )
699
+ .pipe(clean_geoms)
700
+ )
701
+
702
+ if not ignore_index:
703
+ out.index = out.index.map(idx_mapper)
704
+ out.index.name = idx_name
705
+ isolated.index = isolated.index.map(idx_mapper_to_eliminate)
706
+ isolated.index.name = idx_name_to_eliminate
583
707
 
584
708
  if geom_type != "mixed":
585
709
  out = to_single_geom_type(out, geom_type)
586
710
 
587
711
  out = out.reset_index(drop=True) if ignore_index else out
588
712
 
713
+ if not _recurse and len(isolated):
714
+ out, isolated = _recursively_eliminate_new_neighbors(
715
+ out,
716
+ isolated,
717
+ func=_eliminate_by_area,
718
+ max_distance=max_distance,
719
+ sort_ascending=sort_ascending,
720
+ fix_double=fix_double,
721
+ predicate=predicate,
722
+ ignore_index=ignore_index,
723
+ aggfunc=aggfunc,
724
+ grid_size=grid_size,
725
+ n_jobs=n_jobs,
726
+ )
727
+
589
728
  if not was_multiple_gdfs:
590
- return out
729
+ return out, isolated
730
+
731
+ for k, v in locals().items():
732
+ try:
733
+ print(k, v.columns)
734
+ except Exception:
735
+ pass
591
736
 
592
737
  gdfs = ()
593
738
  for i, cols in enumerate(original_cols):
594
739
  df = out.loc[out["_df_idx"] == i, cols]
595
740
  gdfs += (df,)
596
741
 
597
- return gdfs
742
+ return (*gdfs, isolated)
598
743
 
599
744
 
600
745
  def _eliminate(
@@ -603,6 +748,8 @@ def _eliminate(
603
748
  if not len(to_eliminate):
604
749
  return gdf
605
750
 
751
+ gdf["_range_idx_elim"] = range(len(gdf))
752
+
606
753
  in_to_eliminate = gdf["_dissolve_idx"].isin(to_eliminate["_dissolve_idx"])
607
754
  to_dissolve = gdf.loc[in_to_eliminate]
608
755
  not_to_dissolve = gdf.loc[~in_to_eliminate].set_index("_dissolve_idx")
@@ -656,10 +803,22 @@ def _eliminate(
656
803
  # all_geoms: pd.Series = gdf.set_index("_dissolve_idx").geometry
657
804
  all_geoms: pd.Series = gdf.geometry
658
805
 
806
+ # more_than_one = get_num_geometries(all_geoms.values) > 1
807
+ # all_geoms.loc[more_than_one] = all_geoms.loc[more_than_one].apply(
808
+ # _unary_union_for_notna
809
+ # )
810
+
811
+ # more_than_one = get_num_geometries(to_be_eliminated.values) > 1
812
+ # to_be_eliminated.loc[more_than_one, "geometry"] = to_be_eliminated.loc[
813
+ # more_than_one, "geometry"
814
+ # ].apply(_unary_union_for_notna)
815
+
816
+ # create DataFrame of intersection pairs
659
817
  tree = STRtree(all_geoms.values)
660
818
  left, right = tree.query(
661
819
  to_be_eliminated.geometry.values, predicate="intersects"
662
820
  )
821
+
663
822
  pairs = pd.Series(right, index=left).to_frame("right")
664
823
  pairs["_dissolve_idx"] = pairs.index.map(
665
824
  dict(enumerate(to_be_eliminated.index))
@@ -684,15 +843,20 @@ def _eliminate(
684
843
 
685
844
  # allign and aggregate by dissolve index to not get duplicates in difference
686
845
  intersecting.index = soon_erased.index
687
- soon_erased = soon_erased.geometry.groupby(level=0).agg(
688
- lambda x: unary_union(x, grid_size=grid_size)
689
- )
690
- intersecting = intersecting.groupby(level=0).agg(
691
- lambda x: unary_union(x, grid_size=grid_size)
692
- )
693
846
 
694
- # from ..maps.maps import explore_locals
695
- # explore_locals()
847
+ soon_erased = _grouped_unary_union(soon_erased, level=0, grid_size=grid_size)
848
+ intersecting = _grouped_unary_union(intersecting, level=0, grid_size=grid_size)
849
+
850
+ assert soon_erased.index.equals(soon_erased.index)
851
+
852
+ # soon_erased = soon_erased.geometry.groupby(level=0).agg(
853
+ # lambda x: unary_union(x, grid_size=grid_size)
854
+ # )
855
+ # intersecting = intersecting.groupby(level=0).agg(
856
+ # lambda x: unary_union(x, grid_size=grid_size)
857
+ # )
858
+
859
+ # explore_locals(center=_DEBUG_CONFIG["center"])
696
860
 
697
861
  soon_erased.loc[:] = _try_difference(
698
862
  soon_erased.to_numpy(),
@@ -702,10 +866,40 @@ def _eliminate(
702
866
  geom_type="polygon",
703
867
  )
704
868
 
869
+ missing = _grouped_unary_union(missing, level=0, grid_size=grid_size)
870
+
871
+ missing = make_all_singlepart(missing).loc[lambda x: x.area > 0]
872
+
873
+ soon_erased = make_all_singlepart(soon_erased).loc[lambda x: x.area > 0]
874
+
875
+ if 0:
876
+ tree = STRtree(soon_erased.values)
877
+ left, right = tree.query(missing.values, predicate="intersects")
878
+ explore_locals(
879
+ missing2=to_gdf(missing.to_numpy()[left], 25833),
880
+ soon_erased2=to_gdf(soon_erased.to_numpy()[right], 25833),
881
+ center=_DEBUG_CONFIG["center"],
882
+ )
883
+ missing = pd.Series(
884
+ difference(
885
+ missing.to_numpy()[left],
886
+ soon_erased.to_numpy()[right],
887
+ grid_size=grid_size,
888
+ ),
889
+ index=left,
890
+ ).loc[lambda x: (x.notna()) & (~is_empty(x))]
891
+
892
+ soon_eliminated = pd.concat([eliminators, soon_erased, missing])
893
+ more_than_one = get_num_geometries(soon_eliminated.values) > 1
894
+
895
+ soon_eliminated.loc[more_than_one] = soon_eliminated.loc[more_than_one].apply(
896
+ _unary_union_for_notna
897
+ )
898
+
705
899
  if n_jobs > 1:
706
900
  eliminated["geometry"] = GeoSeries(
707
901
  _parallel_unary_union_geoseries(
708
- pd.concat([eliminators, soon_erased, missing]),
902
+ soon_eliminated,
709
903
  level=0,
710
904
  grid_size=grid_size,
711
905
  n_jobs=n_jobs,
@@ -713,15 +907,10 @@ def _eliminate(
713
907
  index=eliminated.index,
714
908
  )
715
909
  else:
716
- eliminated["geometry"] = (
717
- pd.concat([eliminators, soon_erased, missing])
718
- .groupby(level=0)
719
- .agg(
720
- lambda x: make_valid(
721
- unary_union(x.dropna().values, grid_size=grid_size)
722
- )
723
- )
724
- )
910
+ eliminated["geometry"] = _grouped_unary_union(soon_eliminated, level=0)
911
+ # eliminated["geometry"] = soon_eliminated.groupby(level=0).agg(
912
+ # lambda x: make_valid(unary_union(x))
913
+ # )
725
914
 
726
915
  else:
727
916
  if n_jobs > 1:
@@ -729,9 +918,7 @@ def _eliminate(
729
918
  many_hits, by="_dissolve_idx", grid_size=grid_size, n_jobs=n_jobs
730
919
  )
731
920
  else:
732
- eliminated["geometry"] = many_hits.groupby("_dissolve_idx")["geometry"].agg(
733
- lambda x: make_valid(unary_union(x.values, grid_size=grid_size))
734
- )
921
+ eliminated["geometry"] = _grouped_unary_union(many_hits, by="_dissolve_idx")
735
922
 
736
923
  # setting crs on the GeometryArrays to avoid warning in concat
737
924
  not_to_dissolve.geometry.values.crs = crs
@@ -745,31 +932,170 @@ def _eliminate(
745
932
 
746
933
  assert all(df.index.name == "_dissolve_idx" for df in to_concat)
747
934
 
748
- return pd.concat(to_concat).sort_index().drop(columns="_to_eliminate")
935
+ out = pd.concat(to_concat).sort_index()
936
+
937
+ duplicated_geoms = _get_intersecting_geometries(
938
+ GeoDataFrame(
939
+ {
940
+ "geometry": out.geometry.values,
941
+ "_range_idx_elim_dups": out["_range_idx_elim"].values,
942
+ },
943
+ ),
944
+ geom_type="polygon",
945
+ keep_geom_type=True,
946
+ n_jobs=n_jobs,
947
+ predicate="intersects",
948
+ ).pipe(clean_geoms)
949
+ duplicated_geoms.geometry = duplicated_geoms.buffer(-PRECISION)
950
+ duplicated_geoms = duplicated_geoms.pipe(clean_geoms)
951
+
952
+ if len(duplicated_geoms):
953
+ hits_in_original_df = duplicated_geoms.sjoin(
954
+ GeoDataFrame(
955
+ {
956
+ "geometry": gdf.geometry.values,
957
+ "_range_idx_elim": gdf["_range_idx_elim"].values,
958
+ },
959
+ ),
960
+ how="inner",
961
+ )
962
+
963
+ should_be_erased = hits_in_original_df.loc[
964
+ lambda x: x["_range_idx_elim"] != x["_range_idx_elim_dups"]
965
+ ]
966
+
967
+ should_be_erased_idx = list(
968
+ sorted(should_be_erased["_range_idx_elim_dups"].unique())
969
+ )
970
+ should_erase = (
971
+ should_be_erased.groupby("_range_idx_elim_dups")["geometry"]
972
+ .agg(lambda x: make_valid(union_all(x)))
973
+ .sort_index()
974
+ )
975
+
976
+ # aligining out with "should_erase" before rowwise difference
977
+ out = out.sort_values("_range_idx_elim")
978
+ assert out["_range_idx_elim"].is_unique
979
+ to_be_erased_idx = out["_range_idx_elim"].isin(should_be_erased_idx)
980
+
981
+ out.loc[to_be_erased_idx, "geometry"] = make_valid(
982
+ difference(
983
+ out.loc[
984
+ to_be_erased_idx,
985
+ "geometry",
986
+ ].values,
987
+ should_erase.values,
988
+ )
989
+ )
990
+
991
+ from ..maps.maps import explore
992
+
993
+ # display(hits_in_original_df)
994
+ # display(should_be_erased.assign(area=lambda x: x.area))
995
+
996
+ explore(
997
+ gdf=to_gdf(gdf, 25833),
998
+ out=to_gdf(out, 25833),
999
+ should_be_erased=to_gdf(should_be_erased, 25833),
1000
+ duplicated_geoms=duplicated_geoms.set_crs(25833),
1001
+ eli=GeoDataFrame(
1002
+ {
1003
+ "geometry": out.geometry.values,
1004
+ "_range_idx_elim": out["_range_idx_elim"].values,
1005
+ },
1006
+ crs=25833,
1007
+ ),
1008
+ center=_DEBUG_CONFIG["center"],
1009
+ )
1010
+
1011
+ _try_debug_print("inni _eliminate")
1012
+ _try_debug_print(duplicated_geoms)
1013
+ explore_locals(center=_DEBUG_CONFIG["center"])
1014
+
1015
+ return out.drop(columns=["_to_eliminate", "_range_idx_elim"])
1016
+
1017
+
1018
+ def clean_dissexp(df: GeoDataFrame, dissolve_func: Callable, **kwargs) -> GeoDataFrame:
1019
+ """Experimental."""
1020
+ original_points = GeoDataFrame(
1021
+ {"geometry": get_parts(extract_unique_points(df.geometry.values))}
1022
+ )[lambda x: ~x.geometry.duplicated()]
1023
+
1024
+ dissolved = df.copy()
1025
+
1026
+ try:
1027
+ dissolved.geometry = dissolved.buffer(PRECISION, resolution=1, join_style=2)
1028
+ except AttributeError as e:
1029
+ if isinstance(dissolved, GeoSeries):
1030
+ dissolved.loc[:] = dissolved.buffer(PRECISION, resolution=1, join_style=2)
1031
+ else:
1032
+ raise e
1033
+
1034
+ dissolved = dissolve_func(dissolved, **kwargs)
1035
+
1036
+ try:
1037
+ dissolved.geometry = dissolved.buffer(-PRECISION, resolution=1, join_style=2)
1038
+ except AttributeError as e:
1039
+ if isinstance(dissolved, GeoSeries):
1040
+ dissolved.loc[:] = dissolved.buffer(-PRECISION, resolution=1, join_style=2)
1041
+ else:
1042
+ raise e
1043
+
1044
+ dissolved = dissolved.loc[lambda x: ~x.geometry.is_empty]
1045
+ dissolved = dissolved.explode(ignore_index=True)
1046
+
1047
+ original_points = sfilter_inverse(original_points, dissolved.buffer(-PRECISION))
1048
+
1049
+ snapped = (
1050
+ PolygonsAsRings(
1051
+ dissolved.geometry,
1052
+ )
1053
+ .apply_numpy_func(
1054
+ _snap_points_back,
1055
+ kwargs={"snap_to": original_points, "tolerance": PRECISION},
1056
+ )
1057
+ .to_numpy()
1058
+ )
1059
+
1060
+ try:
1061
+ dissolved.geometry = snapped
1062
+ except AttributeError as e:
1063
+ if isinstance(dissolved, GeoSeries):
1064
+ dissolved.loc[:] = snapped
1065
+ else:
1066
+ raise e
1067
+
1068
+ return dissolved
1069
+
1070
+
1071
+ def _snap_points_back(rings, snap_to, tolerance):
1072
+ points = GeoDataFrame({"geometry": extract_unique_points(rings)})
1073
+ points = points.explode(index_parts=True)
1074
+
1075
+ snap_to["geom_right"] = snap_to.geometry
1076
+ nearest = points.sjoin_nearest(snap_to, max_distance=tolerance)
1077
+ points.loc[nearest.index, points.geometry.name] = nearest["geom_right"]
1078
+
1079
+ new_rings = points.groupby(level=0)[points.geometry.name].agg(LinearRing)
1080
+ return new_rings
749
1081
 
750
1082
 
751
1083
  def close_thin_holes(gdf: GeoDataFrame, tolerance: int | float) -> GeoDataFrame:
752
1084
  gdf = make_all_singlepart(gdf)
753
1085
  holes = get_holes(gdf)
754
- inside_holes = sfilter(gdf, holes, predicate="within").unary_union
1086
+ inside_holes = union_all(sfilter(gdf, holes, predicate="within").geometry.values)
755
1087
 
756
1088
  def to_none_if_thin(geoms):
1089
+ if not len(geoms):
1090
+ return geoms
757
1091
  try:
758
- buffered_in = buffer(
759
- difference(polygons(geoms), inside_holes), -(tolerance / 2)
760
- )
761
- return np.where(is_empty(buffered_in), None, geoms)
1092
+ polys = polygons(geoms)
762
1093
  except GEOSException:
763
- buffered_in = buffer(
764
- difference(make_valid(polygons(make_valid(geoms))), inside_holes),
765
- -(tolerance / 2),
766
- )
767
- return np.where(is_empty(buffered_in), None, geoms)
768
-
769
- except ValueError as e:
770
- if not len(geoms):
771
- return geoms
772
- raise e
1094
+ polys = make_valid(polygons(make_valid(geoms)))
1095
+ if inside_holes is not None:
1096
+ polys = difference(polys, inside_holes)
1097
+ buffered_in = buffer(polys, -(tolerance / 2))
1098
+ return np.where(is_empty(buffered_in), None, geoms)
773
1099
 
774
1100
  if not (gdf.geom_type == "Polygon").all():
775
1101
  raise ValueError(gdf.geom_type.value_counts())
@@ -848,7 +1174,7 @@ def close_all_holes(
848
1174
  else:
849
1175
  return holes_closed
850
1176
 
851
- all_geoms = make_valid(gdf.unary_union)
1177
+ all_geoms = make_valid(union_all(gdf.geometry.values))
852
1178
  if isinstance(gdf, GeoDataFrame):
853
1179
  gdf.geometry = gdf.geometry.map(
854
1180
  lambda x: _close_all_holes_no_islands(x, all_geoms)
@@ -934,7 +1260,7 @@ def close_small_holes(
934
1260
  gdf = make_all_singlepart(gdf)
935
1261
 
936
1262
  if not ignore_islands:
937
- all_geoms = make_valid(gdf.unary_union)
1263
+ all_geoms = make_valid(union_all(gdf.geometry.values))
938
1264
 
939
1265
  if isinstance(gdf, GeoDataFrame):
940
1266
  gdf.geometry = gdf.geometry.map(
@@ -990,14 +1316,14 @@ def _close_small_holes_no_islands(poly, max_area, all_geoms):
990
1316
  for n in range(n_interior_rings):
991
1317
  hole = polygons(get_interior_ring(part, n))
992
1318
  try:
993
- no_islands = unary_union(hole.difference(all_geoms))
1319
+ no_islands = union_all(hole.difference(all_geoms))
994
1320
  except GEOSException:
995
- no_islands = make_valid(unary_union(hole.difference(all_geoms)))
1321
+ no_islands = make_valid(union_all(hole.difference(all_geoms)))
996
1322
 
997
1323
  if area(no_islands) < max_area:
998
1324
  holes_closed.append(no_islands)
999
1325
 
1000
- return make_valid(unary_union(holes_closed))
1326
+ return make_valid(union_all(holes_closed))
1001
1327
 
1002
1328
 
1003
1329
  def _close_all_holes_no_islands(poly, all_geoms):
@@ -1012,13 +1338,13 @@ def _close_all_holes_no_islands(poly, all_geoms):
1012
1338
  for n in range(n_interior_rings):
1013
1339
  hole = polygons(get_interior_ring(part, n))
1014
1340
  try:
1015
- no_islands = unary_union(hole.difference(all_geoms))
1341
+ no_islands = union_all(hole.difference(all_geoms))
1016
1342
  except GEOSException:
1017
- no_islands = make_valid(unary_union(hole.difference(all_geoms)))
1343
+ no_islands = make_valid(union_all(hole.difference(all_geoms)))
1018
1344
 
1019
1345
  holes_closed.append(no_islands)
1020
1346
 
1021
- return make_valid(unary_union(holes_closed))
1347
+ return make_valid(union_all(holes_closed))
1022
1348
 
1023
1349
 
1024
1350
  def get_gaps(
@@ -1062,7 +1388,7 @@ def get_gaps(
1062
1388
  return without_outer_ring.reset_index(drop=True)
1063
1389
 
1064
1390
 
1065
- def get_holes(gdf: GeoDataFrame, as_polygons=True) -> GeoDataFrame:
1391
+ def get_holes(gdf: GeoDataFrame, as_polygons: bool = True) -> GeoDataFrame:
1066
1392
  """Get the holes inside polygons.
1067
1393
 
1068
1394
  Args:
@@ -1096,3 +1422,82 @@ def get_holes(gdf: GeoDataFrame, as_polygons=True) -> GeoDataFrame:
1096
1422
  if rings
1097
1423
  else GeoDataFrame({"geometry": []}, crs=gdf.crs)
1098
1424
  )
1425
+
1426
+
1427
+ def split_polygons_by_lines(polygons: GeoSeries, lines: GeoSeries) -> GeoSeries:
1428
+ idx_mapper = dict(enumerate(polygons.index))
1429
+ idx_name = polygons.index.name
1430
+ polygons = polygons.copy()
1431
+ polygons.index = range(len(polygons))
1432
+
1433
+ # use pandas to explode faster (from list instead of GeoSeries.explode)
1434
+ splitted = pd.Series(polygons.geometry.to_numpy())
1435
+ lines = to_geoseries(lines)
1436
+ lines.index = range(len(lines))
1437
+
1438
+ # find intersection pairs to split relevant polygon for each line
1439
+ tree = STRtree(splitted.values)
1440
+ left, right = tree.query(lines.values, predicate="intersects")
1441
+ pairs = pd.Series(right, index=left)
1442
+
1443
+ lines = lines.loc[lambda x: x.index.isin(pairs.index)]
1444
+
1445
+ for i, line in lines.items():
1446
+ intersecting = pairs.loc[[i]].values
1447
+ try:
1448
+ splitted.loc[intersecting] = splitted.loc[intersecting].apply(
1449
+ lambda poly: SplitOp._split_polygon_with_line(poly, line) or poly
1450
+ )
1451
+ except TypeError:
1452
+ # if we got multipolygon
1453
+ splitted = splitted.apply(get_parts).explode()
1454
+ splitted.loc[intersecting] = splitted.loc[intersecting].apply(
1455
+ lambda poly: SplitOp._split_polygon_with_line(poly, line) or poly
1456
+ )
1457
+ splitted = splitted.explode()
1458
+
1459
+ if isinstance(polygons, GeoDataFrame):
1460
+ polygons = polygons.loc[splitted.index]
1461
+ polygons.geometry = splitted
1462
+ polygons.index = polygons.index.map(idx_mapper)
1463
+ polygons.index.name = idx_name
1464
+ return polygons
1465
+ else:
1466
+ splitted.index = splitted.index.map(idx_mapper)
1467
+ splitted.index.name = idx_name
1468
+ return splitted
1469
+
1470
+
1471
+ def split_by_neighbors(
1472
+ df: GeoDataFrame,
1473
+ split_by: GeoDataFrame,
1474
+ tolerance: int | float,
1475
+ grid_size: float | int | None = None,
1476
+ ) -> GeoDataFrame:
1477
+ if not len(df):
1478
+ return df
1479
+
1480
+ df = make_all_singlepart(df)
1481
+
1482
+ split_by = split_by.copy()
1483
+
1484
+ intersecting_lines = (
1485
+ clean_overlay(
1486
+ to_lines(split_by),
1487
+ df.buffer(tolerance).to_frame("geometry"),
1488
+ how="intersection",
1489
+ grid_size=grid_size,
1490
+ )
1491
+ .pipe(get_line_segments)
1492
+ .reset_index(drop=True)
1493
+ )
1494
+
1495
+ endpoints = intersecting_lines.boundary.explode(index_parts=False)
1496
+
1497
+ lines = extend_lines(
1498
+ endpoints.loc[lambda x: ~x.index.duplicated(keep="first")].values,
1499
+ endpoints.loc[lambda x: ~x.index.duplicated(keep="last")].values,
1500
+ distance=tolerance * 3,
1501
+ )
1502
+
1503
+ return split_polygons_by_lines(df, lines)