ssb-sgis 0.3.13__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sgis/__init__.py +6 -4
- sgis/geopandas_tools/bounds.py +2 -6
- sgis/geopandas_tools/buffer_dissolve_explode.py +149 -45
- sgis/geopandas_tools/cleaning.py +66 -594
- sgis/geopandas_tools/conversion.py +92 -12
- sgis/geopandas_tools/duplicates.py +53 -23
- sgis/geopandas_tools/general.py +35 -0
- sgis/geopandas_tools/neighbors.py +31 -1
- sgis/geopandas_tools/overlay.py +143 -63
- sgis/geopandas_tools/polygons_as_rings.py +1 -1
- sgis/io/dapla_functions.py +7 -14
- sgis/maps/explore.py +29 -3
- sgis/maps/map.py +16 -4
- sgis/maps/maps.py +95 -49
- sgis/parallel/parallel.py +73 -35
- sgis/raster/torchgeo.py +30 -20
- {ssb_sgis-0.3.13.dist-info → ssb_sgis-1.0.1.dist-info}/METADATA +6 -6
- {ssb_sgis-0.3.13.dist-info → ssb_sgis-1.0.1.dist-info}/RECORD +20 -20
- {ssb_sgis-0.3.13.dist-info → ssb_sgis-1.0.1.dist-info}/LICENSE +0 -0
- {ssb_sgis-0.3.13.dist-info → ssb_sgis-1.0.1.dist-info}/WHEEL +0 -0
sgis/geopandas_tools/cleaning.py
CHANGED
|
@@ -2,6 +2,7 @@ import re
|
|
|
2
2
|
import warnings
|
|
3
3
|
from typing import Callable
|
|
4
4
|
|
|
5
|
+
import networkx as nx
|
|
5
6
|
import numpy as np
|
|
6
7
|
import pandas as pd
|
|
7
8
|
import shapely
|
|
@@ -46,7 +47,7 @@ from .general import (
|
|
|
46
47
|
to_lines,
|
|
47
48
|
)
|
|
48
49
|
from .geometry_types import get_geom_type, make_all_singlepart, to_single_geom_type
|
|
49
|
-
from .neighbors import get_k_nearest_neighbors
|
|
50
|
+
from .neighbors import get_k_nearest_neighbors, get_neighbor_indices
|
|
50
51
|
from .overlay import clean_overlay
|
|
51
52
|
from .polygon_operations import (
|
|
52
53
|
close_all_holes,
|
|
@@ -126,7 +127,7 @@ def coverage_clean(
|
|
|
126
127
|
it might be a good idea to buffer the gaps, slivers and double surfaces
|
|
127
128
|
before elimination to make sure the polygons are properly dissolved.
|
|
128
129
|
|
|
129
|
-
>>> def
|
|
130
|
+
>>> def _small_buffer(df):
|
|
130
131
|
... df.geometry = df.buffer(0.001)
|
|
131
132
|
... return df
|
|
132
133
|
...
|
|
@@ -151,7 +152,10 @@ def coverage_clean(
|
|
|
151
152
|
lambda x: x.geom_type.isin(["Polygon", "MultiPolygon"])
|
|
152
153
|
]
|
|
153
154
|
|
|
154
|
-
|
|
155
|
+
try:
|
|
156
|
+
gdf = safe_simplify(gdf, PRECISION)
|
|
157
|
+
except GEOSException:
|
|
158
|
+
pass
|
|
155
159
|
|
|
156
160
|
gdf = (
|
|
157
161
|
clean_geoms(gdf)
|
|
@@ -188,9 +192,7 @@ def coverage_clean(
|
|
|
188
192
|
gdf["_poly_idx"] = range(len(gdf))
|
|
189
193
|
|
|
190
194
|
thin_gaps_and_double = pd.concat([gaps, double]).loc[
|
|
191
|
-
lambda x: (
|
|
192
|
-
shapely.simplify(x.geometry, PRECISION).buffer(-tolerance / 2).is_empty
|
|
193
|
-
)
|
|
195
|
+
lambda x: (x.buffer(-tolerance / 2).is_empty)
|
|
194
196
|
]
|
|
195
197
|
|
|
196
198
|
all_are_thin = double["_double_idx"].isin(thin_gaps_and_double["_double_idx"]).all()
|
|
@@ -209,7 +211,6 @@ def coverage_clean(
|
|
|
209
211
|
raise ValueError("Large double surfaces.")
|
|
210
212
|
|
|
211
213
|
to_eliminate = pd.concat([thin_gaps_and_double, slivers], ignore_index=True)
|
|
212
|
-
to_eliminate = safe_simplify(to_eliminate, PRECISION)
|
|
213
214
|
|
|
214
215
|
to_eliminate = to_eliminate.loc[lambda x: ~x.buffer(-PRECISION / 10).is_empty]
|
|
215
216
|
|
|
@@ -309,7 +310,10 @@ def coverage_clean(
|
|
|
309
310
|
one_hit = cleaned[only_one].drop(columns="_poly_idx")
|
|
310
311
|
many_hits = cleaned[~only_one]
|
|
311
312
|
except IndexError:
|
|
312
|
-
|
|
313
|
+
assert not cleaned["_poly_idx"].notna().any(), cleaned
|
|
314
|
+
one_hit = cleaned[lambda x: x.index == min(x.index) - 1].drop(
|
|
315
|
+
columns="_poly_idx", errors="ignore"
|
|
316
|
+
)
|
|
313
317
|
many_hits = cleaned
|
|
314
318
|
|
|
315
319
|
for i, grid_size in enumerate(grid_sizes):
|
|
@@ -336,10 +340,12 @@ def coverage_clean(
|
|
|
336
340
|
|
|
337
341
|
cleaned = pd.concat([many_hits, one_hit], ignore_index=True)
|
|
338
342
|
|
|
343
|
+
gdf = gdf.drop(columns="_poly_idx")
|
|
344
|
+
|
|
339
345
|
for i, grid_size in enumerate(grid_sizes):
|
|
340
346
|
try:
|
|
341
347
|
cleaned = clean_overlay(
|
|
342
|
-
gdf
|
|
348
|
+
gdf,
|
|
343
349
|
cleaned,
|
|
344
350
|
how="update",
|
|
345
351
|
geom_type="polygon",
|
|
@@ -347,16 +353,6 @@ def coverage_clean(
|
|
|
347
353
|
)
|
|
348
354
|
break
|
|
349
355
|
except GEOSException as e:
|
|
350
|
-
if 1 == 0:
|
|
351
|
-
try:
|
|
352
|
-
cleaned = update_geometries(
|
|
353
|
-
sort_small_first(cleaned),
|
|
354
|
-
geom_type="polygon",
|
|
355
|
-
grid_size=grid_size,
|
|
356
|
-
n_jobs=n_jobs,
|
|
357
|
-
)
|
|
358
|
-
except GEOSException:
|
|
359
|
-
pass
|
|
360
356
|
if i == len(grid_sizes) - 1:
|
|
361
357
|
explore_geosexception(
|
|
362
358
|
e,
|
|
@@ -369,7 +365,7 @@ def coverage_clean(
|
|
|
369
365
|
)
|
|
370
366
|
raise e
|
|
371
367
|
|
|
372
|
-
cleaned =
|
|
368
|
+
cleaned = sort_large_first(cleaned)
|
|
373
369
|
|
|
374
370
|
# slivers on bottom
|
|
375
371
|
cleaned = pd.concat(split_out_slivers(cleaned, tolerance))
|
|
@@ -396,13 +392,17 @@ def coverage_clean(
|
|
|
396
392
|
)
|
|
397
393
|
raise e
|
|
398
394
|
|
|
399
|
-
cleaned = safe_simplify(cleaned, PRECISION)
|
|
400
|
-
cleaned.geometry = shapely.make_valid(cleaned.geometry)
|
|
395
|
+
# cleaned = safe_simplify(cleaned, PRECISION)
|
|
396
|
+
# cleaned.geometry = shapely.make_valid(cleaned.geometry)
|
|
397
|
+
|
|
398
|
+
# TODO check why polygons dissappear in rare cases. For now, just add back the missing
|
|
399
|
+
dissapeared_polygons = sfilter_inverse(gdf, cleaned.buffer(-PRECISION))
|
|
400
|
+
cleaned = pd.concat([cleaned, dissapeared_polygons])
|
|
401
401
|
|
|
402
|
-
return cleaned
|
|
402
|
+
return to_single_geom_type(cleaned, "polygon")
|
|
403
403
|
|
|
404
404
|
|
|
405
|
-
def safe_simplify(gdf, tolerance: float | int):
|
|
405
|
+
def safe_simplify(gdf, tolerance: float | int, **kwargs):
|
|
406
406
|
"""Simplify only if the resulting area is no more than 1 percent larger.
|
|
407
407
|
|
|
408
408
|
Because simplifying can result in holes being filled.
|
|
@@ -412,21 +412,28 @@ def safe_simplify(gdf, tolerance: float | int):
|
|
|
412
412
|
copied.geometry = shapely.make_valid(
|
|
413
413
|
shapely.simplify(copied.geometry.values, tolerance=tolerance)
|
|
414
414
|
)
|
|
415
|
-
copied.
|
|
416
|
-
|
|
417
|
-
|
|
415
|
+
filt = (copied.area > length_then * 1.01) | (copied.geometry.is_empty)
|
|
416
|
+
copied.loc[filt, copied._geometry_column_name] = gdf.loc[
|
|
417
|
+
filt, copied._geometry_column_name
|
|
418
|
+
]
|
|
418
419
|
|
|
419
420
|
return copied
|
|
420
421
|
|
|
421
422
|
|
|
422
|
-
def
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
423
|
+
def remove_interior_slivers(gdf, tolerance):
|
|
424
|
+
gdf, slivers = split_out_slivers(gdf, tolerance)
|
|
425
|
+
slivers["_idx"] = range(len(slivers))
|
|
426
|
+
without_thick = clean_overlay(
|
|
427
|
+
to_lines(slivers), buff(gdf, PRECISION), how="difference"
|
|
428
|
+
)
|
|
429
|
+
return pd.concat(
|
|
430
|
+
[
|
|
431
|
+
gdf,
|
|
432
|
+
slivers[lambda x: x["_idx"].isin(without_thick["_idx"])].drop(
|
|
433
|
+
columns="_idx"
|
|
434
|
+
),
|
|
435
|
+
]
|
|
428
436
|
)
|
|
429
|
-
return update_geometries(copied, geom_type="polygon", grid_size=grid_size)
|
|
430
437
|
|
|
431
438
|
|
|
432
439
|
def remove_spikes(
|
|
@@ -501,10 +508,8 @@ def try_for_grid_size(
|
|
|
501
508
|
args: tuple | None = None,
|
|
502
509
|
kwargs: dict | None = None,
|
|
503
510
|
):
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
if kwargs is None:
|
|
507
|
-
kwargs = {}
|
|
511
|
+
args = args or ()
|
|
512
|
+
kwargs = kwargs or {}
|
|
508
513
|
for i, grid_size in enumerate(grid_sizes):
|
|
509
514
|
try:
|
|
510
515
|
return func(*args, grid_size=grid_size, **kwargs)
|
|
@@ -589,7 +594,10 @@ def split_by_neighbors(df, split_by, tolerance, grid_size=None):
|
|
|
589
594
|
|
|
590
595
|
intersecting_lines = (
|
|
591
596
|
clean_overlay(
|
|
592
|
-
to_lines(split_by),
|
|
597
|
+
to_lines(split_by),
|
|
598
|
+
buff(df, tolerance),
|
|
599
|
+
how="intersection",
|
|
600
|
+
grid_size=grid_size,
|
|
593
601
|
)
|
|
594
602
|
.pipe(get_line_segments)
|
|
595
603
|
.reset_index(drop=True)
|
|
@@ -656,468 +664,6 @@ def make_lines_between_points(
|
|
|
656
664
|
return linestrings(coords.values, indices=coords.index)
|
|
657
665
|
|
|
658
666
|
|
|
659
|
-
def explore_geosexception(e: GEOSException, *gdfs, logger=None):
|
|
660
|
-
from ..maps.maps import Explore, explore
|
|
661
|
-
from .conversion import to_gdf
|
|
662
|
-
|
|
663
|
-
pattern = r"(\d+\.\d+)\s+(\d+\.\d+)"
|
|
664
|
-
|
|
665
|
-
matches = re.findall(pattern, str(e))
|
|
666
|
-
coords_in_error_message = [(float(match[0]), float(match[1])) for match in matches]
|
|
667
|
-
exception_point = to_gdf(coords_in_error_message, crs=gdfs[0].crs)
|
|
668
|
-
if len(exception_point):
|
|
669
|
-
exception_point["wkt"] = exception_point.to_wkt()
|
|
670
|
-
if logger:
|
|
671
|
-
logger.error(
|
|
672
|
-
e, Explore(exception_point, *gdfs, mask=exception_point.buffer(100))
|
|
673
|
-
)
|
|
674
|
-
else:
|
|
675
|
-
explore(exception_point, *gdfs, mask=exception_point.buffer(100))
|
|
676
|
-
else:
|
|
677
|
-
if logger:
|
|
678
|
-
logger.error(e, Explore(*gdfs))
|
|
679
|
-
else:
|
|
680
|
-
explore(*gdfs)
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
def snap_to_mask(
|
|
684
|
-
gdf: GeoDataFrame, tolerance: int | float, mask: GeoDataFrame | GeoSeries | Geometry
|
|
685
|
-
):
|
|
686
|
-
return snap_polygons(
|
|
687
|
-
gdf,
|
|
688
|
-
mask=mask,
|
|
689
|
-
tolerance=tolerance,
|
|
690
|
-
snap_to_nodes=False,
|
|
691
|
-
)
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
def snap_polygons(
|
|
695
|
-
gdf: GeoDataFrame,
|
|
696
|
-
tolerance: int | float,
|
|
697
|
-
mask: GeoDataFrame | GeoSeries | Geometry | None = None,
|
|
698
|
-
snap_to_nodes: bool = True,
|
|
699
|
-
**kwargs,
|
|
700
|
-
):
|
|
701
|
-
if not len(gdf):
|
|
702
|
-
return gdf
|
|
703
|
-
|
|
704
|
-
geom_type = "polygon"
|
|
705
|
-
|
|
706
|
-
gdf = safe_simplify(gdf, PRECISION)
|
|
707
|
-
|
|
708
|
-
gdf = (
|
|
709
|
-
clean_geoms(gdf)
|
|
710
|
-
.pipe(make_all_singlepart, ignore_index=True)
|
|
711
|
-
.pipe(to_single_geom_type, geom_type)
|
|
712
|
-
)
|
|
713
|
-
|
|
714
|
-
gdf = close_thin_holes(gdf, tolerance)
|
|
715
|
-
|
|
716
|
-
if mask is None:
|
|
717
|
-
mask: GeoDataFrame = close_all_holes(dissexp_by_cluster(gdf)).dissolve()
|
|
718
|
-
else:
|
|
719
|
-
try:
|
|
720
|
-
mask: GeoDataFrame = mask[["geometry"]]
|
|
721
|
-
except Exception:
|
|
722
|
-
mask: GeoDataFrame = to_geoseries(mask).to_frame("geometry")
|
|
723
|
-
|
|
724
|
-
gdf_copy = gdf.copy()
|
|
725
|
-
|
|
726
|
-
gdf.geometry = (
|
|
727
|
-
PolygonsAsRings(gdf.geometry.values)
|
|
728
|
-
.apply_numpy_func(
|
|
729
|
-
_snap_linearrings,
|
|
730
|
-
kwargs=dict(tolerance=tolerance, mask=mask, snap_to_nodes=snap_to_nodes),
|
|
731
|
-
)
|
|
732
|
-
.to_numpy()
|
|
733
|
-
)
|
|
734
|
-
|
|
735
|
-
gdf = to_single_geom_type(make_all_singlepart(clean_geoms(gdf)), geom_type)
|
|
736
|
-
|
|
737
|
-
if snap_to_nodes:
|
|
738
|
-
missing = clean_overlay(gdf_copy, gdf, how="difference")
|
|
739
|
-
|
|
740
|
-
missing, isolated = sfilter_split(missing, gdf)
|
|
741
|
-
isolated.geometry = isolated.buffer(PRECISION * 10)
|
|
742
|
-
gdf = eliminate_by_longest(
|
|
743
|
-
gdf, pd.concat([missing, isolated]), remove_isolated=False
|
|
744
|
-
)
|
|
745
|
-
|
|
746
|
-
missing = clean_overlay(mask, gdf, how="difference")
|
|
747
|
-
|
|
748
|
-
gdf = eliminate_by_longest(
|
|
749
|
-
gdf, missing.buffer(PRECISION * 10).to_frame("geometry"), remove_isolated=False
|
|
750
|
-
).pipe(clean_clip, mask, geom_type="polygon")
|
|
751
|
-
|
|
752
|
-
gdf = update_geometries(
|
|
753
|
-
sort_small_first(close_small_holes(gdf, PRECISION)), geom_type="polygon"
|
|
754
|
-
)
|
|
755
|
-
|
|
756
|
-
return gdf
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
def _snap_to_anchors(
|
|
760
|
-
points: GeoDataFrame,
|
|
761
|
-
tolerance: int | float,
|
|
762
|
-
anchors: GeoDataFrame | None = None,
|
|
763
|
-
custom_func: Callable | None = None,
|
|
764
|
-
) -> GeoDataFrame:
|
|
765
|
-
if not len(points):
|
|
766
|
-
try:
|
|
767
|
-
return points, anchors[["geometry"]]
|
|
768
|
-
except TypeError:
|
|
769
|
-
return points, points[["geometry"]]
|
|
770
|
-
|
|
771
|
-
assert points.index.is_unique
|
|
772
|
-
|
|
773
|
-
tree = STRtree(points.geometry.values)
|
|
774
|
-
left, right = tree.query(
|
|
775
|
-
points.geometry.values,
|
|
776
|
-
predicate="dwithin",
|
|
777
|
-
distance=tolerance,
|
|
778
|
-
)
|
|
779
|
-
indices = pd.Series(right, index=left, name="_right_idx")
|
|
780
|
-
|
|
781
|
-
geom_idx_left = indices.index.map(dict(enumerate(points["_geom_idx"])))
|
|
782
|
-
geom_idx_right = indices.map(dict(enumerate(points["_geom_idx"])))
|
|
783
|
-
|
|
784
|
-
left_on_top = indices.loc[geom_idx_left < geom_idx_right].sort_index()
|
|
785
|
-
|
|
786
|
-
# keep only indices from left if they have not already appeared in right
|
|
787
|
-
# these shouldn't be anchors, but instead be snapped
|
|
788
|
-
new_indices = []
|
|
789
|
-
values = []
|
|
790
|
-
right_indices = set()
|
|
791
|
-
for left, right in left_on_top.items():
|
|
792
|
-
if left not in right_indices:
|
|
793
|
-
new_indices.append(left)
|
|
794
|
-
values.append(right)
|
|
795
|
-
right_indices.add(right)
|
|
796
|
-
|
|
797
|
-
snap_indices = pd.Series(values, index=new_indices)
|
|
798
|
-
|
|
799
|
-
if custom_func:
|
|
800
|
-
snap_indices = custom_func(snap_indices)
|
|
801
|
-
|
|
802
|
-
new_anchors = points.loc[
|
|
803
|
-
points.index.isin(snap_indices.index), ["geometry", "_geom_idx"]
|
|
804
|
-
]
|
|
805
|
-
new_anchors["_cluster"] = get_cluster_mapper(new_anchors.buffer(0.1))
|
|
806
|
-
|
|
807
|
-
assert new_anchors["_geom_idx"].notna().all()
|
|
808
|
-
|
|
809
|
-
no_longer_anchors: pd.Index = new_anchors.loc[
|
|
810
|
-
lambda x: (x["_cluster"].duplicated()) # & (x["_geom_idx"] >= idx_start)
|
|
811
|
-
].index
|
|
812
|
-
new_anchors = new_anchors.loc[lambda x: ~x.index.isin(no_longer_anchors)]
|
|
813
|
-
|
|
814
|
-
if anchors is not None:
|
|
815
|
-
anchors = pd.concat([anchors, new_anchors]).loc[
|
|
816
|
-
lambda x: ~x.geometry.duplicated()
|
|
817
|
-
]
|
|
818
|
-
else:
|
|
819
|
-
anchors = new_anchors
|
|
820
|
-
anchors["_was_anchor"] = 0
|
|
821
|
-
|
|
822
|
-
should_be_snapped = (points.index.isin(snap_indices.values)) | (
|
|
823
|
-
points.index.isin(no_longer_anchors)
|
|
824
|
-
)
|
|
825
|
-
if anchors is not None:
|
|
826
|
-
should_be_snapped |= points.index.isin(
|
|
827
|
-
sfilter(points, anchors.buffer(tolerance)).index
|
|
828
|
-
)
|
|
829
|
-
|
|
830
|
-
to_be_snapped = points.loc[should_be_snapped]
|
|
831
|
-
|
|
832
|
-
anchors["_right_geom"] = anchors.geometry
|
|
833
|
-
|
|
834
|
-
snapped = (
|
|
835
|
-
to_be_snapped.sjoin_nearest(anchors, max_distance=tolerance)
|
|
836
|
-
.sort_values("index_right")["_right_geom"]
|
|
837
|
-
.loc[lambda x: ~x.index.duplicated()]
|
|
838
|
-
)
|
|
839
|
-
|
|
840
|
-
# explore(
|
|
841
|
-
# anchors,
|
|
842
|
-
# to_be_snapped,
|
|
843
|
-
# snapped=snapped,
|
|
844
|
-
# left_on_top=points.loc[lambda x: (~x.index.isin(left_on_top.values))],
|
|
845
|
-
# indices=points.loc[lambda x: (~x.index.isin(indices.values))],
|
|
846
|
-
# points_i_snap_to=points.set_crs(25833),
|
|
847
|
-
# )
|
|
848
|
-
|
|
849
|
-
points.loc[snapped.index, "geometry"] = snapped
|
|
850
|
-
|
|
851
|
-
return points, anchors[["geometry"]]
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
def _snap_linearrings(
|
|
855
|
-
geoms: NDArray[LinearRing],
|
|
856
|
-
tolerance: int | float,
|
|
857
|
-
mask: GeoDataFrame | None = None,
|
|
858
|
-
snap_to_nodes: bool = True,
|
|
859
|
-
gaps=None,
|
|
860
|
-
):
|
|
861
|
-
if not len(geoms):
|
|
862
|
-
return geoms
|
|
863
|
-
|
|
864
|
-
if mask is None:
|
|
865
|
-
idx_start = 0
|
|
866
|
-
else:
|
|
867
|
-
mask: GeoSeries = make_all_singlepart(mask).geometry
|
|
868
|
-
mask_nodes = GeoDataFrame(
|
|
869
|
-
{
|
|
870
|
-
"geometry": extract_unique_points(mask.geometry),
|
|
871
|
-
"_geom_idx": range(len(mask)),
|
|
872
|
-
}
|
|
873
|
-
).explode(ignore_index=True)
|
|
874
|
-
|
|
875
|
-
idx_start = len(mask)
|
|
876
|
-
|
|
877
|
-
gdf = GeoDataFrame(
|
|
878
|
-
{"geometry": geoms, "_geom_idx": np.arange(idx_start, len(geoms) + idx_start)}
|
|
879
|
-
)
|
|
880
|
-
|
|
881
|
-
is_thin = GeoSeries(polygons(gdf.geometry)).buffer(-tolerance / 2).is_empty
|
|
882
|
-
|
|
883
|
-
gdf["_is_thin"] = is_thin
|
|
884
|
-
|
|
885
|
-
thin = is_thin[lambda x: x == True]
|
|
886
|
-
thin.loc[:] = None
|
|
887
|
-
thin.index = thin.index.map(gdf["_geom_idx"])
|
|
888
|
-
|
|
889
|
-
# points_from_thin = (
|
|
890
|
-
# extract_unique_points(gdf.loc[is_thin, "geometry"])
|
|
891
|
-
# .to_frame("geometry")
|
|
892
|
-
# .explode(ignore_index=True)
|
|
893
|
-
# .pipe(sfilter_inverse, gdf.buffer(PRECISION))
|
|
894
|
-
# )
|
|
895
|
-
|
|
896
|
-
gdf = gdf.loc[is_thin == False]
|
|
897
|
-
|
|
898
|
-
points: GeoDataFrame = gdf.assign(
|
|
899
|
-
geometry=lambda x: extract_unique_points(x.geometry.values)
|
|
900
|
-
).explode(ignore_index=True)
|
|
901
|
-
|
|
902
|
-
# step 1: add vertices nearest to mask nodes
|
|
903
|
-
|
|
904
|
-
segments = points_to_line_segments(points.set_index("_geom_idx"))
|
|
905
|
-
segments["_geom_idx"] = segments.index
|
|
906
|
-
segments.index = points.index
|
|
907
|
-
|
|
908
|
-
mask_nodes["rgeom"] = mask_nodes.geometry
|
|
909
|
-
joined = segments.sjoin_nearest(mask_nodes, max_distance=tolerance)
|
|
910
|
-
|
|
911
|
-
midpoints = shapely.get_point(
|
|
912
|
-
shapely.shortest_line(joined.geometry.values, joined["rgeom"].values), 0
|
|
913
|
-
)
|
|
914
|
-
|
|
915
|
-
boundaries_groupby = joined.boundary.explode(index_parts=False).groupby(level=0)
|
|
916
|
-
|
|
917
|
-
with_new_midpoints = (
|
|
918
|
-
pd.concat(
|
|
919
|
-
[
|
|
920
|
-
# first point
|
|
921
|
-
GeoSeries(boundaries_groupby.nth(0)),
|
|
922
|
-
GeoSeries(midpoints, index=joined.index),
|
|
923
|
-
# last point
|
|
924
|
-
GeoSeries(boundaries_groupby.nth(-1)),
|
|
925
|
-
]
|
|
926
|
-
)
|
|
927
|
-
.groupby(level=0)
|
|
928
|
-
.agg(lambda x: MultiPoint(x.values))
|
|
929
|
-
)
|
|
930
|
-
|
|
931
|
-
segments.loc[with_new_midpoints.index, "geometry"] = with_new_midpoints
|
|
932
|
-
|
|
933
|
-
segments.geometry = extract_unique_points(segments.geometry)
|
|
934
|
-
points = segments.explode(ignore_index=True)
|
|
935
|
-
|
|
936
|
-
# step 2: snap to mask nodes
|
|
937
|
-
|
|
938
|
-
points_by_mask_nodes = sfilter(
|
|
939
|
-
points.loc[lambda x: x["_geom_idx"] >= idx_start], mask_nodes.buffer(tolerance)
|
|
940
|
-
)
|
|
941
|
-
|
|
942
|
-
relevant_mask_nodes = sfilter(
|
|
943
|
-
mask_nodes,
|
|
944
|
-
points_by_mask_nodes.buffer(tolerance),
|
|
945
|
-
predicate="within",
|
|
946
|
-
)
|
|
947
|
-
# explore(
|
|
948
|
-
# relevant_mask_nodes,
|
|
949
|
-
# points_by_mask_nodes,
|
|
950
|
-
# points=points.set_crs(25833),
|
|
951
|
-
# mask=to_gdf([5.37166432, 59.00987036], 4326).to_crs(25833).buffer(100),
|
|
952
|
-
# )
|
|
953
|
-
|
|
954
|
-
# explore(
|
|
955
|
-
# mask,
|
|
956
|
-
# gdf,
|
|
957
|
-
# relevant_mask_nodes,
|
|
958
|
-
# points_by_mask_nodes,
|
|
959
|
-
# segments,
|
|
960
|
-
# points=points.set_crs(25833),
|
|
961
|
-
# mask=to_gdf([5.37166432, 59.00987036], 4326).to_crs(25833).buffer(100),
|
|
962
|
-
# )
|
|
963
|
-
|
|
964
|
-
if len(relevant_mask_nodes):
|
|
965
|
-
mask_nodes["_right_geom"] = mask_nodes.geometry
|
|
966
|
-
snapped = points_by_mask_nodes.sjoin_nearest(mask_nodes, max_distance=tolerance)
|
|
967
|
-
|
|
968
|
-
anchors = GeoDataFrame(
|
|
969
|
-
{"geometry": snapped.drop_duplicates("index_right")["_right_geom"].values}
|
|
970
|
-
)
|
|
971
|
-
|
|
972
|
-
snapmapper = snapped["_right_geom"].loc[lambda x: ~x.index.duplicated()]
|
|
973
|
-
|
|
974
|
-
points.loc[snapmapper.index, "geometry"] = snapmapper
|
|
975
|
-
else:
|
|
976
|
-
anchors = None
|
|
977
|
-
|
|
978
|
-
if snap_to_nodes:
|
|
979
|
-
snapped, anchors = _snap_to_anchors(
|
|
980
|
-
points, tolerance, anchors=mask_nodes
|
|
981
|
-
) # anchors)
|
|
982
|
-
else:
|
|
983
|
-
snapped = points
|
|
984
|
-
|
|
985
|
-
# remove duplicates
|
|
986
|
-
snapped = pd.concat(
|
|
987
|
-
snapped.loc[lambda x: x["_geom_idx"] == i].loc[lambda x: ~x.duplicated()]
|
|
988
|
-
for i in snapped.loc[
|
|
989
|
-
lambda x: (x["_geom_idx"] >= idx_start), "_geom_idx"
|
|
990
|
-
].unique()
|
|
991
|
-
)
|
|
992
|
-
|
|
993
|
-
assert (snapped["_geom_idx"] >= idx_start).all()
|
|
994
|
-
|
|
995
|
-
as_rings = (
|
|
996
|
-
snapped.sort_index()
|
|
997
|
-
.set_index("_geom_idx")
|
|
998
|
-
# .pipe(_remove_legit_spikes)
|
|
999
|
-
.loc[lambda x: x.groupby(level=0).size() > 2]
|
|
1000
|
-
.groupby(level=0)["geometry"]
|
|
1001
|
-
.agg(LinearRing)
|
|
1002
|
-
)
|
|
1003
|
-
|
|
1004
|
-
as_polygons = GeoDataFrame(
|
|
1005
|
-
{"geometry": polygons(as_rings.values), "_geom_idx": as_rings.index}
|
|
1006
|
-
)
|
|
1007
|
-
|
|
1008
|
-
slivers = as_polygons.loc[lambda x: x.buffer(-tolerance / 2).is_empty]
|
|
1009
|
-
snapped = snapped.loc[lambda x: ~x["_geom_idx"].isin(slivers["_geom_idx"])]
|
|
1010
|
-
|
|
1011
|
-
as_polygons = update_geometries(sort_small_first(as_polygons))
|
|
1012
|
-
|
|
1013
|
-
missing_mask_nodes = sfilter_inverse(
|
|
1014
|
-
mask_nodes, as_polygons.buffer(PRECISION)
|
|
1015
|
-
).pipe(sfilter, as_polygons.buffer(PRECISION + tolerance))
|
|
1016
|
-
|
|
1017
|
-
# explore(
|
|
1018
|
-
# mask,
|
|
1019
|
-
# gdf,
|
|
1020
|
-
# anchors,
|
|
1021
|
-
# missing_mask_nodes,
|
|
1022
|
-
# snapped,
|
|
1023
|
-
# as_polygons,
|
|
1024
|
-
# points=points.set_crs(25833),
|
|
1025
|
-
# mask=to_gdf([5.37166432, 59.00987036], 4326).to_crs(25833).buffer(100),
|
|
1026
|
-
# )
|
|
1027
|
-
|
|
1028
|
-
if snap_to_nodes or len(missing_mask_nodes):
|
|
1029
|
-
thin_gaps = get_gaps(as_polygons, include_interiors=True).loc[
|
|
1030
|
-
lambda x: x.buffer(-tolerance / 2).is_empty
|
|
1031
|
-
]
|
|
1032
|
-
thin_gaps.geometry = thin_gaps.buffer(-PRECISION).buffer(PRECISION)
|
|
1033
|
-
|
|
1034
|
-
assert snapped.index.is_unique
|
|
1035
|
-
segments = points_to_line_segments(snapped.set_index("_geom_idx"))
|
|
1036
|
-
segments["_geom_idx"] = segments.index
|
|
1037
|
-
segments.index = snapped.index
|
|
1038
|
-
|
|
1039
|
-
assert segments.index.is_unique
|
|
1040
|
-
|
|
1041
|
-
segs_by_gaps = sfilter(
|
|
1042
|
-
segments,
|
|
1043
|
-
pd.concat([thin_gaps, slivers]).buffer(PRECISION),
|
|
1044
|
-
)
|
|
1045
|
-
# gap_nodes = pd.concat(
|
|
1046
|
-
# [
|
|
1047
|
-
# missing_mask_nodes,
|
|
1048
|
-
# extract_unique_points(thin_gaps.geometry).to_frame("geometry"),
|
|
1049
|
-
# ]
|
|
1050
|
-
# )
|
|
1051
|
-
|
|
1052
|
-
# explore(
|
|
1053
|
-
# # missing_mask_polygons,
|
|
1054
|
-
# missing_mask_nodes,
|
|
1055
|
-
# segs_by_gaps,
|
|
1056
|
-
# thin_gaps,
|
|
1057
|
-
# as_polygons=as_polygons,
|
|
1058
|
-
# anchors=anchors.set_crs(25833),
|
|
1059
|
-
# )
|
|
1060
|
-
|
|
1061
|
-
# segs_by_gaps = _add_midpoints_to_segments2(
|
|
1062
|
-
# segs_by_gaps, points=gap_nodes, tolerance=tolerance
|
|
1063
|
-
# )
|
|
1064
|
-
|
|
1065
|
-
segs_by_gaps.geometry = segmentize(segs_by_gaps.geometry, tolerance)
|
|
1066
|
-
segs_by_gaps.geometry = extract_unique_points(segs_by_gaps.geometry)
|
|
1067
|
-
assert segs_by_gaps.index.is_unique
|
|
1068
|
-
|
|
1069
|
-
snapped = pd.concat(
|
|
1070
|
-
[snapped.loc[lambda x: ~x.index.isin(segs_by_gaps.index)], segs_by_gaps]
|
|
1071
|
-
).sort_index()
|
|
1072
|
-
|
|
1073
|
-
snapped = pd.concat(
|
|
1074
|
-
snapped.loc[lambda x: x["_geom_idx"] == i].loc[lambda x: ~x.duplicated()]
|
|
1075
|
-
for i in snapped["_geom_idx"].unique()
|
|
1076
|
-
).explode(ignore_index=True)
|
|
1077
|
-
|
|
1078
|
-
snapped, _ = _snap_to_anchors(snapped, tolerance, anchors=mask_nodes)
|
|
1079
|
-
|
|
1080
|
-
as_rings = (
|
|
1081
|
-
snapped.loc[lambda x: (x["_geom_idx"] >= idx_start)]
|
|
1082
|
-
.sort_index()
|
|
1083
|
-
.set_index("_geom_idx")
|
|
1084
|
-
# .pipe(_remove_legit_spikes)
|
|
1085
|
-
.loc[lambda x: x.groupby(level=0).size() > 2]
|
|
1086
|
-
.groupby(level=0)["geometry"]
|
|
1087
|
-
.agg(LinearRing)
|
|
1088
|
-
)
|
|
1089
|
-
|
|
1090
|
-
missing = gdf.set_index("_geom_idx")["geometry"].loc[
|
|
1091
|
-
lambda x: (~x.index.isin(as_rings.index)) & (x.index >= idx_start)
|
|
1092
|
-
]
|
|
1093
|
-
missing.loc[:] = None
|
|
1094
|
-
|
|
1095
|
-
return pd.concat([as_rings, thin, missing]).sort_index()
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
def _remove_legit_spikes(df):
|
|
1099
|
-
"""Remove points where the next and previous points are the same.
|
|
1100
|
-
|
|
1101
|
-
The lines these points make are as spiky as they come,
|
|
1102
|
-
hence the term "legit spikes".
|
|
1103
|
-
"""
|
|
1104
|
-
df["next"] = df.groupby(level=0)["geometry"].shift(-1)
|
|
1105
|
-
df["prev"] = df.groupby(level=0)["geometry"].shift(1)
|
|
1106
|
-
|
|
1107
|
-
first_points = df.loc[lambda x: ~x.index.duplicated(keep="first"), "geometry"]
|
|
1108
|
-
is_last_point = df["next"].isna()
|
|
1109
|
-
df.loc[is_last_point, "next"] = first_points
|
|
1110
|
-
|
|
1111
|
-
last_points = df.loc[lambda x: ~x.index.duplicated(keep="last"), "geometry"]
|
|
1112
|
-
is_first_point = df["prev"].isna()
|
|
1113
|
-
df.loc[is_first_point, "prev"] = last_points
|
|
1114
|
-
|
|
1115
|
-
assert df["next"].notna().all()
|
|
1116
|
-
assert df["prev"].notna().all()
|
|
1117
|
-
|
|
1118
|
-
return df.loc[lambda x: x["next"] != x["prev"]]
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
667
|
def get_line_segments(lines) -> GeoDataFrame:
|
|
1122
668
|
assert lines.index.is_unique
|
|
1123
669
|
if isinstance(lines, GeoDataFrame):
|
|
@@ -1188,99 +734,25 @@ def points_to_line_segments(points: GeoDataFrame) -> GeoDataFrame:
|
|
|
1188
734
|
)
|
|
1189
735
|
|
|
1190
736
|
|
|
1191
|
-
def
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
if not len(gdf):
|
|
1195
|
-
return gdf
|
|
1196
|
-
|
|
1197
|
-
gdf = to_single_geom_type(make_all_singlepart(gdf), "polygon")
|
|
1198
|
-
|
|
1199
|
-
if not gdf.index.is_unique:
|
|
1200
|
-
gdf = gdf.reset_index(drop=True)
|
|
1201
|
-
|
|
1202
|
-
# remove both inwards and outwards spikes
|
|
1203
|
-
polygons_without_spikes = (
|
|
1204
|
-
gdf.buffer(-tolerance / 2, join_style=2)
|
|
1205
|
-
.buffer(tolerance, join_style=2)
|
|
1206
|
-
.buffer(-tolerance / 2, join_style=2)
|
|
1207
|
-
)
|
|
1208
|
-
|
|
1209
|
-
donuts_around_polygons = to_lines(
|
|
1210
|
-
polygons_without_spikes.to_frame("geometry")
|
|
1211
|
-
).pipe(buff, 1e-3, copy=False)
|
|
1212
|
-
|
|
1213
|
-
# donuts_around_polygons["_poly_idx"] = donuts_around_polygons.index
|
|
1214
|
-
|
|
1215
|
-
def _remove_spikes(df):
|
|
1216
|
-
df = df.to_frame("geometry")
|
|
1217
|
-
# df = df.reset_index(drop=True)
|
|
1218
|
-
df["_poly_idx"] = df.index
|
|
1219
|
-
df["_ring_idx"] = range(len(df))
|
|
1220
|
-
|
|
1221
|
-
points = df.copy()
|
|
1222
|
-
points.geometry = extract_unique_points(points.geometry)
|
|
1223
|
-
points = points.explode(index_parts=False).explode(index_parts=False)
|
|
1224
|
-
points["_idx"] = range(len(points))
|
|
1225
|
-
|
|
1226
|
-
# keep only matches from same polygon
|
|
1227
|
-
not_spikes = points.sjoin(donuts_around_polygons).loc[
|
|
1228
|
-
lambda x: x["_poly_idx"] == x["index_right"]
|
|
1229
|
-
]
|
|
1230
|
-
can_be_polygons = not_spikes.iloc[
|
|
1231
|
-
(not_spikes.groupby("_ring_idx").transform("size") >= 3).values
|
|
1232
|
-
]
|
|
1233
|
-
|
|
1234
|
-
without_spikes = (
|
|
1235
|
-
can_be_polygons.sort_values("_idx")
|
|
1236
|
-
.groupby("_ring_idx")["geometry"]
|
|
1237
|
-
.agg(LinearRing)
|
|
1238
|
-
)
|
|
1239
|
-
|
|
1240
|
-
missing = df.loc[
|
|
1241
|
-
~df["_ring_idx"].isin(without_spikes.index), df._geometry_column_name
|
|
1242
|
-
]
|
|
1243
|
-
return pd.concat(
|
|
1244
|
-
[without_spikes, missing]
|
|
1245
|
-
).sort_index() # .to_frame("geometry")
|
|
1246
|
-
|
|
1247
|
-
without_spikes = GeoDataFrame(
|
|
1248
|
-
{
|
|
1249
|
-
"geometry": PolygonsAsRings(gdf.geometry)
|
|
1250
|
-
.apply_geoseries_func(_remove_spikes)
|
|
1251
|
-
.to_numpy()
|
|
1252
|
-
},
|
|
1253
|
-
crs=gdf.crs,
|
|
1254
|
-
).pipe(to_single_geom_type, "polygon")
|
|
1255
|
-
without_spikes.index = gdf.index
|
|
1256
|
-
|
|
1257
|
-
is_thin = without_spikes.buffer(-tolerance / 2).is_empty
|
|
1258
|
-
without_spikes = pd.concat(
|
|
1259
|
-
[
|
|
1260
|
-
split_by_neighbors(
|
|
1261
|
-
without_spikes[is_thin], without_spikes, tolerance=tolerance
|
|
1262
|
-
),
|
|
1263
|
-
without_spikes[~is_thin],
|
|
1264
|
-
]
|
|
1265
|
-
)
|
|
737
|
+
def explore_geosexception(e: GEOSException, *gdfs, logger=None):
|
|
738
|
+
from ..maps.maps import Explore, explore
|
|
739
|
+
from .conversion import to_gdf
|
|
1266
740
|
|
|
1267
|
-
|
|
1268
|
-
if 1:
|
|
1269
|
-
for i, grid_size in enumerate(grid_sizes):
|
|
1270
|
-
try:
|
|
1271
|
-
without_spikes = update_geometries(
|
|
1272
|
-
sort_small_first(without_spikes), geom_type="polygon"
|
|
1273
|
-
)
|
|
1274
|
-
break
|
|
1275
|
-
except GEOSException as e:
|
|
1276
|
-
if i == len(grid_sizes) - 1:
|
|
1277
|
-
raise e
|
|
741
|
+
pattern = r"(\d+\.\d+)\s+(\d+\.\d+)"
|
|
1278
742
|
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
743
|
+
matches = re.findall(pattern, str(e))
|
|
744
|
+
coords_in_error_message = [(float(match[0]), float(match[1])) for match in matches]
|
|
745
|
+
exception_point = to_gdf(coords_in_error_message, crs=gdfs[0].crs)
|
|
746
|
+
if len(exception_point):
|
|
747
|
+
exception_point["wkt"] = exception_point.to_wkt()
|
|
748
|
+
if logger:
|
|
749
|
+
logger.error(
|
|
750
|
+
e, Explore(exception_point, *gdfs, mask=exception_point.buffer(100))
|
|
1283
751
|
)
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
752
|
+
else:
|
|
753
|
+
explore(exception_point, *gdfs, mask=exception_point.buffer(100))
|
|
754
|
+
else:
|
|
755
|
+
if logger:
|
|
756
|
+
logger.error(e, Explore(*gdfs))
|
|
757
|
+
else:
|
|
758
|
+
explore(*gdfs)
|