ssb-sgis 0.3.13__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,7 @@ import re
2
2
  import warnings
3
3
  from typing import Callable
4
4
 
5
+ import networkx as nx
5
6
  import numpy as np
6
7
  import pandas as pd
7
8
  import shapely
@@ -46,7 +47,7 @@ from .general import (
46
47
  to_lines,
47
48
  )
48
49
  from .geometry_types import get_geom_type, make_all_singlepart, to_single_geom_type
49
- from .neighbors import get_k_nearest_neighbors
50
+ from .neighbors import get_k_nearest_neighbors, get_neighbor_indices
50
51
  from .overlay import clean_overlay
51
52
  from .polygon_operations import (
52
53
  close_all_holes,
@@ -126,7 +127,7 @@ def coverage_clean(
126
127
  it might be a good idea to buffer the gaps, slivers and double surfaces
127
128
  before elimination to make sure the polygons are properly dissolved.
128
129
 
129
- >>> def _small<_buffer(df):
130
+ >>> def _small_buffer(df):
130
131
  ... df.geometry = df.buffer(0.001)
131
132
  ... return df
132
133
  ...
@@ -151,7 +152,10 @@ def coverage_clean(
151
152
  lambda x: x.geom_type.isin(["Polygon", "MultiPolygon"])
152
153
  ]
153
154
 
154
- gdf = safe_simplify(gdf, PRECISION)
155
+ try:
156
+ gdf = safe_simplify(gdf, PRECISION)
157
+ except GEOSException:
158
+ pass
155
159
 
156
160
  gdf = (
157
161
  clean_geoms(gdf)
@@ -188,9 +192,7 @@ def coverage_clean(
188
192
  gdf["_poly_idx"] = range(len(gdf))
189
193
 
190
194
  thin_gaps_and_double = pd.concat([gaps, double]).loc[
191
- lambda x: (
192
- shapely.simplify(x.geometry, PRECISION).buffer(-tolerance / 2).is_empty
193
- )
195
+ lambda x: (x.buffer(-tolerance / 2).is_empty)
194
196
  ]
195
197
 
196
198
  all_are_thin = double["_double_idx"].isin(thin_gaps_and_double["_double_idx"]).all()
@@ -209,7 +211,6 @@ def coverage_clean(
209
211
  raise ValueError("Large double surfaces.")
210
212
 
211
213
  to_eliminate = pd.concat([thin_gaps_and_double, slivers], ignore_index=True)
212
- to_eliminate = safe_simplify(to_eliminate, PRECISION)
213
214
 
214
215
  to_eliminate = to_eliminate.loc[lambda x: ~x.buffer(-PRECISION / 10).is_empty]
215
216
 
@@ -309,7 +310,10 @@ def coverage_clean(
309
310
  one_hit = cleaned[only_one].drop(columns="_poly_idx")
310
311
  many_hits = cleaned[~only_one]
311
312
  except IndexError:
312
- one_hit = cleaned[lambda x: x.index == min(x.index) - 1]
313
+ assert not cleaned["_poly_idx"].notna().any(), cleaned
314
+ one_hit = cleaned[lambda x: x.index == min(x.index) - 1].drop(
315
+ columns="_poly_idx", errors="ignore"
316
+ )
313
317
  many_hits = cleaned
314
318
 
315
319
  for i, grid_size in enumerate(grid_sizes):
@@ -336,10 +340,12 @@ def coverage_clean(
336
340
 
337
341
  cleaned = pd.concat([many_hits, one_hit], ignore_index=True)
338
342
 
343
+ gdf = gdf.drop(columns="_poly_idx")
344
+
339
345
  for i, grid_size in enumerate(grid_sizes):
340
346
  try:
341
347
  cleaned = clean_overlay(
342
- gdf.drop(columns="_poly_idx"),
348
+ gdf,
343
349
  cleaned,
344
350
  how="update",
345
351
  geom_type="polygon",
@@ -347,16 +353,6 @@ def coverage_clean(
347
353
  )
348
354
  break
349
355
  except GEOSException as e:
350
- if 1 == 0:
351
- try:
352
- cleaned = update_geometries(
353
- sort_small_first(cleaned),
354
- geom_type="polygon",
355
- grid_size=grid_size,
356
- n_jobs=n_jobs,
357
- )
358
- except GEOSException:
359
- pass
360
356
  if i == len(grid_sizes) - 1:
361
357
  explore_geosexception(
362
358
  e,
@@ -369,7 +365,7 @@ def coverage_clean(
369
365
  )
370
366
  raise e
371
367
 
372
- cleaned = sort_small_first(cleaned)
368
+ cleaned = sort_large_first(cleaned)
373
369
 
374
370
  # slivers on bottom
375
371
  cleaned = pd.concat(split_out_slivers(cleaned, tolerance))
@@ -396,13 +392,17 @@ def coverage_clean(
396
392
  )
397
393
  raise e
398
394
 
399
- cleaned = safe_simplify(cleaned, PRECISION)
400
- cleaned.geometry = shapely.make_valid(cleaned.geometry)
395
+ # cleaned = safe_simplify(cleaned, PRECISION)
396
+ # cleaned.geometry = shapely.make_valid(cleaned.geometry)
397
+
398
+ # TODO check why polygons dissappear in rare cases. For now, just add back the missing
399
+ dissapeared_polygons = sfilter_inverse(gdf, cleaned.buffer(-PRECISION))
400
+ cleaned = pd.concat([cleaned, dissapeared_polygons])
401
401
 
402
- return cleaned
402
+ return to_single_geom_type(cleaned, "polygon")
403
403
 
404
404
 
405
- def safe_simplify(gdf, tolerance: float | int):
405
+ def safe_simplify(gdf, tolerance: float | int, **kwargs):
406
406
  """Simplify only if the resulting area is no more than 1 percent larger.
407
407
 
408
408
  Because simplifying can result in holes being filled.
@@ -412,21 +412,28 @@ def safe_simplify(gdf, tolerance: float | int):
412
412
  copied.geometry = shapely.make_valid(
413
413
  shapely.simplify(copied.geometry.values, tolerance=tolerance)
414
414
  )
415
- copied.loc[copied.area > length_then * 1.01, copied._geometry_column_name] = (
416
- gdf.loc[copied.area > length_then * 1.01, copied._geometry_column_name]
417
- )
415
+ filt = (copied.area > length_then * 1.01) | (copied.geometry.is_empty)
416
+ copied.loc[filt, copied._geometry_column_name] = gdf.loc[
417
+ filt, copied._geometry_column_name
418
+ ]
418
419
 
419
420
  return copied
420
421
 
421
422
 
422
- def simplify_and_put_small_on_top(gdf, tolerance: float | int, grid_size=None):
423
- copied = sort_small_first(gdf)
424
- copied.geometry = shapely.make_valid(
425
- shapely.simplify(
426
- shapely.segmentize(copied.geometry.values, tolerance), tolerance=tolerance
427
- )
423
+ def remove_interior_slivers(gdf, tolerance):
424
+ gdf, slivers = split_out_slivers(gdf, tolerance)
425
+ slivers["_idx"] = range(len(slivers))
426
+ without_thick = clean_overlay(
427
+ to_lines(slivers), buff(gdf, PRECISION), how="difference"
428
+ )
429
+ return pd.concat(
430
+ [
431
+ gdf,
432
+ slivers[lambda x: x["_idx"].isin(without_thick["_idx"])].drop(
433
+ columns="_idx"
434
+ ),
435
+ ]
428
436
  )
429
- return update_geometries(copied, geom_type="polygon", grid_size=grid_size)
430
437
 
431
438
 
432
439
  def remove_spikes(
@@ -501,10 +508,8 @@ def try_for_grid_size(
501
508
  args: tuple | None = None,
502
509
  kwargs: dict | None = None,
503
510
  ):
504
- if args is None:
505
- args = ()
506
- if kwargs is None:
507
- kwargs = {}
511
+ args = args or ()
512
+ kwargs = kwargs or {}
508
513
  for i, grid_size in enumerate(grid_sizes):
509
514
  try:
510
515
  return func(*args, grid_size=grid_size, **kwargs)
@@ -589,7 +594,10 @@ def split_by_neighbors(df, split_by, tolerance, grid_size=None):
589
594
 
590
595
  intersecting_lines = (
591
596
  clean_overlay(
592
- to_lines(split_by), buff(df, tolerance), how="identity", grid_size=grid_size
597
+ to_lines(split_by),
598
+ buff(df, tolerance),
599
+ how="intersection",
600
+ grid_size=grid_size,
593
601
  )
594
602
  .pipe(get_line_segments)
595
603
  .reset_index(drop=True)
@@ -656,468 +664,6 @@ def make_lines_between_points(
656
664
  return linestrings(coords.values, indices=coords.index)
657
665
 
658
666
 
659
- def explore_geosexception(e: GEOSException, *gdfs, logger=None):
660
- from ..maps.maps import Explore, explore
661
- from .conversion import to_gdf
662
-
663
- pattern = r"(\d+\.\d+)\s+(\d+\.\d+)"
664
-
665
- matches = re.findall(pattern, str(e))
666
- coords_in_error_message = [(float(match[0]), float(match[1])) for match in matches]
667
- exception_point = to_gdf(coords_in_error_message, crs=gdfs[0].crs)
668
- if len(exception_point):
669
- exception_point["wkt"] = exception_point.to_wkt()
670
- if logger:
671
- logger.error(
672
- e, Explore(exception_point, *gdfs, mask=exception_point.buffer(100))
673
- )
674
- else:
675
- explore(exception_point, *gdfs, mask=exception_point.buffer(100))
676
- else:
677
- if logger:
678
- logger.error(e, Explore(*gdfs))
679
- else:
680
- explore(*gdfs)
681
-
682
-
683
- def snap_to_mask(
684
- gdf: GeoDataFrame, tolerance: int | float, mask: GeoDataFrame | GeoSeries | Geometry
685
- ):
686
- return snap_polygons(
687
- gdf,
688
- mask=mask,
689
- tolerance=tolerance,
690
- snap_to_nodes=False,
691
- )
692
-
693
-
694
- def snap_polygons(
695
- gdf: GeoDataFrame,
696
- tolerance: int | float,
697
- mask: GeoDataFrame | GeoSeries | Geometry | None = None,
698
- snap_to_nodes: bool = True,
699
- **kwargs,
700
- ):
701
- if not len(gdf):
702
- return gdf
703
-
704
- geom_type = "polygon"
705
-
706
- gdf = safe_simplify(gdf, PRECISION)
707
-
708
- gdf = (
709
- clean_geoms(gdf)
710
- .pipe(make_all_singlepart, ignore_index=True)
711
- .pipe(to_single_geom_type, geom_type)
712
- )
713
-
714
- gdf = close_thin_holes(gdf, tolerance)
715
-
716
- if mask is None:
717
- mask: GeoDataFrame = close_all_holes(dissexp_by_cluster(gdf)).dissolve()
718
- else:
719
- try:
720
- mask: GeoDataFrame = mask[["geometry"]]
721
- except Exception:
722
- mask: GeoDataFrame = to_geoseries(mask).to_frame("geometry")
723
-
724
- gdf_copy = gdf.copy()
725
-
726
- gdf.geometry = (
727
- PolygonsAsRings(gdf.geometry.values)
728
- .apply_numpy_func(
729
- _snap_linearrings,
730
- kwargs=dict(tolerance=tolerance, mask=mask, snap_to_nodes=snap_to_nodes),
731
- )
732
- .to_numpy()
733
- )
734
-
735
- gdf = to_single_geom_type(make_all_singlepart(clean_geoms(gdf)), geom_type)
736
-
737
- if snap_to_nodes:
738
- missing = clean_overlay(gdf_copy, gdf, how="difference")
739
-
740
- missing, isolated = sfilter_split(missing, gdf)
741
- isolated.geometry = isolated.buffer(PRECISION * 10)
742
- gdf = eliminate_by_longest(
743
- gdf, pd.concat([missing, isolated]), remove_isolated=False
744
- )
745
-
746
- missing = clean_overlay(mask, gdf, how="difference")
747
-
748
- gdf = eliminate_by_longest(
749
- gdf, missing.buffer(PRECISION * 10).to_frame("geometry"), remove_isolated=False
750
- ).pipe(clean_clip, mask, geom_type="polygon")
751
-
752
- gdf = update_geometries(
753
- sort_small_first(close_small_holes(gdf, PRECISION)), geom_type="polygon"
754
- )
755
-
756
- return gdf
757
-
758
-
759
- def _snap_to_anchors(
760
- points: GeoDataFrame,
761
- tolerance: int | float,
762
- anchors: GeoDataFrame | None = None,
763
- custom_func: Callable | None = None,
764
- ) -> GeoDataFrame:
765
- if not len(points):
766
- try:
767
- return points, anchors[["geometry"]]
768
- except TypeError:
769
- return points, points[["geometry"]]
770
-
771
- assert points.index.is_unique
772
-
773
- tree = STRtree(points.geometry.values)
774
- left, right = tree.query(
775
- points.geometry.values,
776
- predicate="dwithin",
777
- distance=tolerance,
778
- )
779
- indices = pd.Series(right, index=left, name="_right_idx")
780
-
781
- geom_idx_left = indices.index.map(dict(enumerate(points["_geom_idx"])))
782
- geom_idx_right = indices.map(dict(enumerate(points["_geom_idx"])))
783
-
784
- left_on_top = indices.loc[geom_idx_left < geom_idx_right].sort_index()
785
-
786
- # keep only indices from left if they have not already appeared in right
787
- # these shouldn't be anchors, but instead be snapped
788
- new_indices = []
789
- values = []
790
- right_indices = set()
791
- for left, right in left_on_top.items():
792
- if left not in right_indices:
793
- new_indices.append(left)
794
- values.append(right)
795
- right_indices.add(right)
796
-
797
- snap_indices = pd.Series(values, index=new_indices)
798
-
799
- if custom_func:
800
- snap_indices = custom_func(snap_indices)
801
-
802
- new_anchors = points.loc[
803
- points.index.isin(snap_indices.index), ["geometry", "_geom_idx"]
804
- ]
805
- new_anchors["_cluster"] = get_cluster_mapper(new_anchors.buffer(0.1))
806
-
807
- assert new_anchors["_geom_idx"].notna().all()
808
-
809
- no_longer_anchors: pd.Index = new_anchors.loc[
810
- lambda x: (x["_cluster"].duplicated()) # & (x["_geom_idx"] >= idx_start)
811
- ].index
812
- new_anchors = new_anchors.loc[lambda x: ~x.index.isin(no_longer_anchors)]
813
-
814
- if anchors is not None:
815
- anchors = pd.concat([anchors, new_anchors]).loc[
816
- lambda x: ~x.geometry.duplicated()
817
- ]
818
- else:
819
- anchors = new_anchors
820
- anchors["_was_anchor"] = 0
821
-
822
- should_be_snapped = (points.index.isin(snap_indices.values)) | (
823
- points.index.isin(no_longer_anchors)
824
- )
825
- if anchors is not None:
826
- should_be_snapped |= points.index.isin(
827
- sfilter(points, anchors.buffer(tolerance)).index
828
- )
829
-
830
- to_be_snapped = points.loc[should_be_snapped]
831
-
832
- anchors["_right_geom"] = anchors.geometry
833
-
834
- snapped = (
835
- to_be_snapped.sjoin_nearest(anchors, max_distance=tolerance)
836
- .sort_values("index_right")["_right_geom"]
837
- .loc[lambda x: ~x.index.duplicated()]
838
- )
839
-
840
- # explore(
841
- # anchors,
842
- # to_be_snapped,
843
- # snapped=snapped,
844
- # left_on_top=points.loc[lambda x: (~x.index.isin(left_on_top.values))],
845
- # indices=points.loc[lambda x: (~x.index.isin(indices.values))],
846
- # points_i_snap_to=points.set_crs(25833),
847
- # )
848
-
849
- points.loc[snapped.index, "geometry"] = snapped
850
-
851
- return points, anchors[["geometry"]]
852
-
853
-
854
- def _snap_linearrings(
855
- geoms: NDArray[LinearRing],
856
- tolerance: int | float,
857
- mask: GeoDataFrame | None = None,
858
- snap_to_nodes: bool = True,
859
- gaps=None,
860
- ):
861
- if not len(geoms):
862
- return geoms
863
-
864
- if mask is None:
865
- idx_start = 0
866
- else:
867
- mask: GeoSeries = make_all_singlepart(mask).geometry
868
- mask_nodes = GeoDataFrame(
869
- {
870
- "geometry": extract_unique_points(mask.geometry),
871
- "_geom_idx": range(len(mask)),
872
- }
873
- ).explode(ignore_index=True)
874
-
875
- idx_start = len(mask)
876
-
877
- gdf = GeoDataFrame(
878
- {"geometry": geoms, "_geom_idx": np.arange(idx_start, len(geoms) + idx_start)}
879
- )
880
-
881
- is_thin = GeoSeries(polygons(gdf.geometry)).buffer(-tolerance / 2).is_empty
882
-
883
- gdf["_is_thin"] = is_thin
884
-
885
- thin = is_thin[lambda x: x == True]
886
- thin.loc[:] = None
887
- thin.index = thin.index.map(gdf["_geom_idx"])
888
-
889
- # points_from_thin = (
890
- # extract_unique_points(gdf.loc[is_thin, "geometry"])
891
- # .to_frame("geometry")
892
- # .explode(ignore_index=True)
893
- # .pipe(sfilter_inverse, gdf.buffer(PRECISION))
894
- # )
895
-
896
- gdf = gdf.loc[is_thin == False]
897
-
898
- points: GeoDataFrame = gdf.assign(
899
- geometry=lambda x: extract_unique_points(x.geometry.values)
900
- ).explode(ignore_index=True)
901
-
902
- # step 1: add vertices nearest to mask nodes
903
-
904
- segments = points_to_line_segments(points.set_index("_geom_idx"))
905
- segments["_geom_idx"] = segments.index
906
- segments.index = points.index
907
-
908
- mask_nodes["rgeom"] = mask_nodes.geometry
909
- joined = segments.sjoin_nearest(mask_nodes, max_distance=tolerance)
910
-
911
- midpoints = shapely.get_point(
912
- shapely.shortest_line(joined.geometry.values, joined["rgeom"].values), 0
913
- )
914
-
915
- boundaries_groupby = joined.boundary.explode(index_parts=False).groupby(level=0)
916
-
917
- with_new_midpoints = (
918
- pd.concat(
919
- [
920
- # first point
921
- GeoSeries(boundaries_groupby.nth(0)),
922
- GeoSeries(midpoints, index=joined.index),
923
- # last point
924
- GeoSeries(boundaries_groupby.nth(-1)),
925
- ]
926
- )
927
- .groupby(level=0)
928
- .agg(lambda x: MultiPoint(x.values))
929
- )
930
-
931
- segments.loc[with_new_midpoints.index, "geometry"] = with_new_midpoints
932
-
933
- segments.geometry = extract_unique_points(segments.geometry)
934
- points = segments.explode(ignore_index=True)
935
-
936
- # step 2: snap to mask nodes
937
-
938
- points_by_mask_nodes = sfilter(
939
- points.loc[lambda x: x["_geom_idx"] >= idx_start], mask_nodes.buffer(tolerance)
940
- )
941
-
942
- relevant_mask_nodes = sfilter(
943
- mask_nodes,
944
- points_by_mask_nodes.buffer(tolerance),
945
- predicate="within",
946
- )
947
- # explore(
948
- # relevant_mask_nodes,
949
- # points_by_mask_nodes,
950
- # points=points.set_crs(25833),
951
- # mask=to_gdf([5.37166432, 59.00987036], 4326).to_crs(25833).buffer(100),
952
- # )
953
-
954
- # explore(
955
- # mask,
956
- # gdf,
957
- # relevant_mask_nodes,
958
- # points_by_mask_nodes,
959
- # segments,
960
- # points=points.set_crs(25833),
961
- # mask=to_gdf([5.37166432, 59.00987036], 4326).to_crs(25833).buffer(100),
962
- # )
963
-
964
- if len(relevant_mask_nodes):
965
- mask_nodes["_right_geom"] = mask_nodes.geometry
966
- snapped = points_by_mask_nodes.sjoin_nearest(mask_nodes, max_distance=tolerance)
967
-
968
- anchors = GeoDataFrame(
969
- {"geometry": snapped.drop_duplicates("index_right")["_right_geom"].values}
970
- )
971
-
972
- snapmapper = snapped["_right_geom"].loc[lambda x: ~x.index.duplicated()]
973
-
974
- points.loc[snapmapper.index, "geometry"] = snapmapper
975
- else:
976
- anchors = None
977
-
978
- if snap_to_nodes:
979
- snapped, anchors = _snap_to_anchors(
980
- points, tolerance, anchors=mask_nodes
981
- ) # anchors)
982
- else:
983
- snapped = points
984
-
985
- # remove duplicates
986
- snapped = pd.concat(
987
- snapped.loc[lambda x: x["_geom_idx"] == i].loc[lambda x: ~x.duplicated()]
988
- for i in snapped.loc[
989
- lambda x: (x["_geom_idx"] >= idx_start), "_geom_idx"
990
- ].unique()
991
- )
992
-
993
- assert (snapped["_geom_idx"] >= idx_start).all()
994
-
995
- as_rings = (
996
- snapped.sort_index()
997
- .set_index("_geom_idx")
998
- # .pipe(_remove_legit_spikes)
999
- .loc[lambda x: x.groupby(level=0).size() > 2]
1000
- .groupby(level=0)["geometry"]
1001
- .agg(LinearRing)
1002
- )
1003
-
1004
- as_polygons = GeoDataFrame(
1005
- {"geometry": polygons(as_rings.values), "_geom_idx": as_rings.index}
1006
- )
1007
-
1008
- slivers = as_polygons.loc[lambda x: x.buffer(-tolerance / 2).is_empty]
1009
- snapped = snapped.loc[lambda x: ~x["_geom_idx"].isin(slivers["_geom_idx"])]
1010
-
1011
- as_polygons = update_geometries(sort_small_first(as_polygons))
1012
-
1013
- missing_mask_nodes = sfilter_inverse(
1014
- mask_nodes, as_polygons.buffer(PRECISION)
1015
- ).pipe(sfilter, as_polygons.buffer(PRECISION + tolerance))
1016
-
1017
- # explore(
1018
- # mask,
1019
- # gdf,
1020
- # anchors,
1021
- # missing_mask_nodes,
1022
- # snapped,
1023
- # as_polygons,
1024
- # points=points.set_crs(25833),
1025
- # mask=to_gdf([5.37166432, 59.00987036], 4326).to_crs(25833).buffer(100),
1026
- # )
1027
-
1028
- if snap_to_nodes or len(missing_mask_nodes):
1029
- thin_gaps = get_gaps(as_polygons, include_interiors=True).loc[
1030
- lambda x: x.buffer(-tolerance / 2).is_empty
1031
- ]
1032
- thin_gaps.geometry = thin_gaps.buffer(-PRECISION).buffer(PRECISION)
1033
-
1034
- assert snapped.index.is_unique
1035
- segments = points_to_line_segments(snapped.set_index("_geom_idx"))
1036
- segments["_geom_idx"] = segments.index
1037
- segments.index = snapped.index
1038
-
1039
- assert segments.index.is_unique
1040
-
1041
- segs_by_gaps = sfilter(
1042
- segments,
1043
- pd.concat([thin_gaps, slivers]).buffer(PRECISION),
1044
- )
1045
- # gap_nodes = pd.concat(
1046
- # [
1047
- # missing_mask_nodes,
1048
- # extract_unique_points(thin_gaps.geometry).to_frame("geometry"),
1049
- # ]
1050
- # )
1051
-
1052
- # explore(
1053
- # # missing_mask_polygons,
1054
- # missing_mask_nodes,
1055
- # segs_by_gaps,
1056
- # thin_gaps,
1057
- # as_polygons=as_polygons,
1058
- # anchors=anchors.set_crs(25833),
1059
- # )
1060
-
1061
- # segs_by_gaps = _add_midpoints_to_segments2(
1062
- # segs_by_gaps, points=gap_nodes, tolerance=tolerance
1063
- # )
1064
-
1065
- segs_by_gaps.geometry = segmentize(segs_by_gaps.geometry, tolerance)
1066
- segs_by_gaps.geometry = extract_unique_points(segs_by_gaps.geometry)
1067
- assert segs_by_gaps.index.is_unique
1068
-
1069
- snapped = pd.concat(
1070
- [snapped.loc[lambda x: ~x.index.isin(segs_by_gaps.index)], segs_by_gaps]
1071
- ).sort_index()
1072
-
1073
- snapped = pd.concat(
1074
- snapped.loc[lambda x: x["_geom_idx"] == i].loc[lambda x: ~x.duplicated()]
1075
- for i in snapped["_geom_idx"].unique()
1076
- ).explode(ignore_index=True)
1077
-
1078
- snapped, _ = _snap_to_anchors(snapped, tolerance, anchors=mask_nodes)
1079
-
1080
- as_rings = (
1081
- snapped.loc[lambda x: (x["_geom_idx"] >= idx_start)]
1082
- .sort_index()
1083
- .set_index("_geom_idx")
1084
- # .pipe(_remove_legit_spikes)
1085
- .loc[lambda x: x.groupby(level=0).size() > 2]
1086
- .groupby(level=0)["geometry"]
1087
- .agg(LinearRing)
1088
- )
1089
-
1090
- missing = gdf.set_index("_geom_idx")["geometry"].loc[
1091
- lambda x: (~x.index.isin(as_rings.index)) & (x.index >= idx_start)
1092
- ]
1093
- missing.loc[:] = None
1094
-
1095
- return pd.concat([as_rings, thin, missing]).sort_index()
1096
-
1097
-
1098
- def _remove_legit_spikes(df):
1099
- """Remove points where the next and previous points are the same.
1100
-
1101
- The lines these points make are as spiky as they come,
1102
- hence the term "legit spikes".
1103
- """
1104
- df["next"] = df.groupby(level=0)["geometry"].shift(-1)
1105
- df["prev"] = df.groupby(level=0)["geometry"].shift(1)
1106
-
1107
- first_points = df.loc[lambda x: ~x.index.duplicated(keep="first"), "geometry"]
1108
- is_last_point = df["next"].isna()
1109
- df.loc[is_last_point, "next"] = first_points
1110
-
1111
- last_points = df.loc[lambda x: ~x.index.duplicated(keep="last"), "geometry"]
1112
- is_first_point = df["prev"].isna()
1113
- df.loc[is_first_point, "prev"] = last_points
1114
-
1115
- assert df["next"].notna().all()
1116
- assert df["prev"].notna().all()
1117
-
1118
- return df.loc[lambda x: x["next"] != x["prev"]]
1119
-
1120
-
1121
667
  def get_line_segments(lines) -> GeoDataFrame:
1122
668
  assert lines.index.is_unique
1123
669
  if isinstance(lines, GeoDataFrame):
@@ -1188,99 +734,25 @@ def points_to_line_segments(points: GeoDataFrame) -> GeoDataFrame:
1188
734
  )
1189
735
 
1190
736
 
1191
- def split_spiky_polygons(
1192
- gdf: GeoDataFrame, tolerance: int | float, grid_sizes: tuple[None | int] = (None,)
1193
- ) -> GeoDataFrame:
1194
- if not len(gdf):
1195
- return gdf
1196
-
1197
- gdf = to_single_geom_type(make_all_singlepart(gdf), "polygon")
1198
-
1199
- if not gdf.index.is_unique:
1200
- gdf = gdf.reset_index(drop=True)
1201
-
1202
- # remove both inwards and outwards spikes
1203
- polygons_without_spikes = (
1204
- gdf.buffer(-tolerance / 2, join_style=2)
1205
- .buffer(tolerance, join_style=2)
1206
- .buffer(-tolerance / 2, join_style=2)
1207
- )
1208
-
1209
- donuts_around_polygons = to_lines(
1210
- polygons_without_spikes.to_frame("geometry")
1211
- ).pipe(buff, 1e-3, copy=False)
1212
-
1213
- # donuts_around_polygons["_poly_idx"] = donuts_around_polygons.index
1214
-
1215
- def _remove_spikes(df):
1216
- df = df.to_frame("geometry")
1217
- # df = df.reset_index(drop=True)
1218
- df["_poly_idx"] = df.index
1219
- df["_ring_idx"] = range(len(df))
1220
-
1221
- points = df.copy()
1222
- points.geometry = extract_unique_points(points.geometry)
1223
- points = points.explode(index_parts=False).explode(index_parts=False)
1224
- points["_idx"] = range(len(points))
1225
-
1226
- # keep only matches from same polygon
1227
- not_spikes = points.sjoin(donuts_around_polygons).loc[
1228
- lambda x: x["_poly_idx"] == x["index_right"]
1229
- ]
1230
- can_be_polygons = not_spikes.iloc[
1231
- (not_spikes.groupby("_ring_idx").transform("size") >= 3).values
1232
- ]
1233
-
1234
- without_spikes = (
1235
- can_be_polygons.sort_values("_idx")
1236
- .groupby("_ring_idx")["geometry"]
1237
- .agg(LinearRing)
1238
- )
1239
-
1240
- missing = df.loc[
1241
- ~df["_ring_idx"].isin(without_spikes.index), df._geometry_column_name
1242
- ]
1243
- return pd.concat(
1244
- [without_spikes, missing]
1245
- ).sort_index() # .to_frame("geometry")
1246
-
1247
- without_spikes = GeoDataFrame(
1248
- {
1249
- "geometry": PolygonsAsRings(gdf.geometry)
1250
- .apply_geoseries_func(_remove_spikes)
1251
- .to_numpy()
1252
- },
1253
- crs=gdf.crs,
1254
- ).pipe(to_single_geom_type, "polygon")
1255
- without_spikes.index = gdf.index
1256
-
1257
- is_thin = without_spikes.buffer(-tolerance / 2).is_empty
1258
- without_spikes = pd.concat(
1259
- [
1260
- split_by_neighbors(
1261
- without_spikes[is_thin], without_spikes, tolerance=tolerance
1262
- ),
1263
- without_spikes[~is_thin],
1264
- ]
1265
- )
737
+ def explore_geosexception(e: GEOSException, *gdfs, logger=None):
738
+ from ..maps.maps import Explore, explore
739
+ from .conversion import to_gdf
1266
740
 
1267
- # for _ in range(2):
1268
- if 1:
1269
- for i, grid_size in enumerate(grid_sizes):
1270
- try:
1271
- without_spikes = update_geometries(
1272
- sort_small_first(without_spikes), geom_type="polygon"
1273
- )
1274
- break
1275
- except GEOSException as e:
1276
- if i == len(grid_sizes) - 1:
1277
- raise e
741
+ pattern = r"(\d+\.\d+)\s+(\d+\.\d+)"
1278
742
 
1279
- for i, grid_size in enumerate(grid_sizes):
1280
- try:
1281
- return clean_overlay(
1282
- gdf, without_spikes, how="identity", grid_size=grid_size
743
+ matches = re.findall(pattern, str(e))
744
+ coords_in_error_message = [(float(match[0]), float(match[1])) for match in matches]
745
+ exception_point = to_gdf(coords_in_error_message, crs=gdfs[0].crs)
746
+ if len(exception_point):
747
+ exception_point["wkt"] = exception_point.to_wkt()
748
+ if logger:
749
+ logger.error(
750
+ e, Explore(exception_point, *gdfs, mask=exception_point.buffer(100))
1283
751
  )
1284
- except GEOSException as e:
1285
- if i == len(grid_sizes) - 1:
1286
- raise e
752
+ else:
753
+ explore(exception_point, *gdfs, mask=exception_point.buffer(100))
754
+ else:
755
+ if logger:
756
+ logger.error(e, Explore(*gdfs))
757
+ else:
758
+ explore(*gdfs)