PyPI - ssb-sgis - Versions diffs - 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl - Mend

ssb-sgis 1.0.2py3-none-any.whl → 1.0.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

sgis/__init__.py +20 -9
sgis/debug_config.py +24 -0
sgis/exceptions.py +2 -2
sgis/geopandas_tools/bounds.py +33 -36
sgis/geopandas_tools/buffer_dissolve_explode.py +136 -35
sgis/geopandas_tools/centerlines.py +4 -91
sgis/geopandas_tools/cleaning.py +1576 -583
sgis/geopandas_tools/conversion.py +38 -19
sgis/geopandas_tools/duplicates.py +29 -8
sgis/geopandas_tools/general.py +263 -100
sgis/geopandas_tools/geometry_types.py +4 -4
sgis/geopandas_tools/neighbors.py +19 -15
sgis/geopandas_tools/overlay.py +2 -2
sgis/geopandas_tools/point_operations.py +5 -5
sgis/geopandas_tools/polygon_operations.py +510 -105
sgis/geopandas_tools/polygons_as_rings.py +40 -8
sgis/geopandas_tools/sfilter.py +29 -12
sgis/helpers.py +3 -3
sgis/io/dapla_functions.py +238 -19
sgis/io/read_parquet.py +1 -1
sgis/maps/examine.py +27 -12
sgis/maps/explore.py +450 -65
sgis/maps/legend.py +177 -76
sgis/maps/map.py +206 -103
sgis/maps/maps.py +178 -105
sgis/maps/thematicmap.py +243 -83
sgis/networkanalysis/_service_area.py +6 -1
sgis/networkanalysis/closing_network_holes.py +2 -2
sgis/networkanalysis/cutting_lines.py +15 -8
sgis/networkanalysis/directednetwork.py +1 -1
sgis/networkanalysis/finding_isolated_networks.py +15 -8
sgis/networkanalysis/networkanalysis.py +17 -19
sgis/networkanalysis/networkanalysisrules.py +1 -1
sgis/networkanalysis/traveling_salesman.py +1 -1
sgis/parallel/parallel.py +64 -27
sgis/raster/__init__.py +0 -6
sgis/raster/base.py +208 -0
sgis/raster/cube.py +54 -8
sgis/raster/image_collection.py +3257 -0
sgis/raster/indices.py +17 -5
sgis/raster/raster.py +138 -243
sgis/raster/sentinel_config.py +120 -0
sgis/raster/zonal.py +0 -1
{ssb_sgis-1.0.2.dist-info → ssb_sgis-1.0.4.dist-info}/METADATA +6 -7
ssb_sgis-1.0.4.dist-info/RECORD +62 -0
sgis/raster/methods_as_functions.py +0 -0
sgis/raster/torchgeo.py +0 -171
ssb_sgis-1.0.2.dist-info/RECORD +0 -61
{ssb_sgis-1.0.2.dist-info → ssb_sgis-1.0.4.dist-info}/LICENSE +0 -0
{ssb_sgis-1.0.2.dist-info → ssb_sgis-1.0.4.dist-info}/WHEEL +0 -0

sgis/geopandas_tools/cleaning.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import re
+# %%
 import warnings
 from collections.abc import Callable
 from typing import Any
@@ -8,36 +8,49 @@ import pandas as pd
 import shapely
 from geopandas import GeoDataFrame
 from geopandas import GeoSeries
-from geopandas.array import GeometryArray
 from numpy.typing import NDArray
+from shapely import Geometry
+from shapely import STRtree
 from shapely import extract_unique_points
 from shapely import get_coordinates
-from shapely import get_parts
-from shapely import linestrings
+from shapely import linearrings
+from shapely import polygons
 from shapely.errors import GEOSException
+from shapely.geometry import LinearRing
 from shapely.geometry import LineString
 from shapely.geometry import Point
-from .buffer_dissolve_explode import buff
-from .buffer_dissolve_explode import dissexp
-from .conversion import coordinate_array
+try:
+    import numba
+except ImportError:
+    class numba:
+        """Placeholder."""
+        @staticmethod
+        def njit(func) -> Callable:
+            """Placeholder that does nothing."""
+            def wrapper(*args, **kwargs):
+                return func(*args, **kwargs)
+            return wrapper
+from ..debug_config import _DEBUG_CONFIG
+from ..maps.maps import explore
 from .conversion import to_gdf
-from .duplicates import get_intersections
+from .conversion import to_geoseries
 from .duplicates import update_geometries
-# from .general import sort_large_first as _sort_large_first
 from .general import clean_geoms
-from .general import sort_large_first
-from .general import sort_small_first
-from .general import to_lines
 from .geometry_types import make_all_singlepart
 from .geometry_types import to_single_geom_type
 from .overlay import clean_overlay
 from .polygon_operations import eliminate_by_longest
-from .polygon_operations import get_cluster_mapper
-from .polygon_operations import get_gaps
+from .polygon_operations import split_by_neighbors
+from .polygons_as_rings import PolygonsAsRings
+from .sfilter import sfilter
 from .sfilter import sfilter_inverse
-from .sfilter import sfilter_split
 warnings.simplefilter(action="ignore", category=UserWarning)
 warnings.simplefilter(action="ignore", category=RuntimeWarning)
@@ -47,12 +60,31 @@ PRECISION = 1e-3
 BUFFER_RES = 50
+# def explore(*args, **kwargs):
+#     pass
+# def explore_locals(*args, **kwargs):
+#     pass
+# def no_njit(func):
+#     def wrapper(*args, **kwargs):
+#         result = func(*args, **kwargs)
+#         return result
+#     return wrapper
+# numba.njit = no_njit
 def coverage_clean(
     gdf: GeoDataFrame,
     tolerance: int | float,
-    duplicate_action: str = "fix",
-    grid_sizes: tuple[None | int] = (None,),
-    n_jobs: int = 1,
+    mask: GeoDataFrame | GeoSeries | Geometry | None = None,
+    snap_to_anchors: bool = True,
+    **kwargs,
 ) -> GeoDataFrame:
     """Fix thin gaps, holes, slivers and double surfaces.
@@ -78,15 +110,10 @@ def coverage_clean(
             for polygons to be eliminated. Any gap, hole, sliver or double
             surface that are empty after a negative buffer of tolerance / 2
             are eliminated into the neighbor with the longest shared border.
-        duplicate_action: Either "fix", "error" or "ignore".
-            If "fix" (default), double surfaces thicker than the
-            tolerance will be updated from top to bottom (function update_geometries)
-            and then dissolved into the neighbor polygon with the longest shared border.
-            If "error", an Exception is raised if there are any double surfaces thicker
-            than the tolerance. If "ignore", double surfaces are kept as is.
-        grid_sizes: One or more grid_sizes used in overlay and dissolve operations that
-            might raise a GEOSException. Defaults to (None,), meaning no grid_sizes.
-        n_jobs: Number of threads.
+        mask: Mask to clip gdf to.
+        snap_to_anchors: If True (default), snaps to anchor nodes in gdf. If False,
+            only snaps to mask nodes (mask cannot be None in this case).
+        **kwargs: Temporary backwards compatibility to avoid TypeErrors.
     Returns:
         A GeoDataFrame with cleaned polygons.
@@ -94,648 +121,1614 @@ def coverage_clean(
     if not len(gdf):
         return gdf
-    _cleaning_checks(gdf, tolerance, duplicate_action)
+    gdf_original = gdf.copy()
-    if not gdf.index.is_unique:
-        gdf = gdf.reset_index(drop=True)
+    # more_than_one = get_num_geometries(gdf.geometry.values) > 1
+    # gdf.loc[more_than_one, gdf._geometry_column_name] = gdf.loc[
+    #     more_than_one, gdf._geometry_column_name
+    # ].apply(_unary_union_for_notna)
-    gdf = make_all_singlepart(gdf).loc[
-        lambda x: x.geom_type.isin(["Polygon", "MultiPolygon"])
-    ]
-    try:
-        gdf = _safe_simplify(gdf, PRECISION)
-    except GEOSException:
-        pass
+    if mask is not None:
+        try:
+            mask: GeoDataFrame = mask[["geometry"]].pipe(make_all_singlepart)
+        except Exception:
+            mask: GeoDataFrame = (
+                to_geoseries(mask).to_frame("geometry").pipe(make_all_singlepart)
+            )
-    gdf = (
-        clean_geoms(gdf)
-        .pipe(make_all_singlepart)
-        .loc[lambda x: x.geom_type.isin(["Polygon", "MultiPolygon"])]
+        # mask: GeoDataFrame = close_all_holes(
+        #     dissexp_by_cluster(gdf[["geometry"]])
+        # ).pipe(make_all_singlepart)
+        # mask = GeoDataFrame(
+        #     {
+        #         "geometry": [
+        #             mask.union_all()
+        #             .buffer(
+        #                 PRECISION,
+        #                 resolution=1,
+        #                 join_style=2,
+        #             )
+        #             .buffer(
+        #                 -PRECISION,
+        #                 resolution=1,
+        #                 join_style=2,
+        #             )
+        #         ]
+        #     },
+        #     crs=gdf.crs,
+        # ).pipe(make_all_singlepart)
+        # # gaps = shapely.union_all(get_gaps(mask).geometry.values)
+        # # mask = shapely.get_parts(extract_unique_points(mask.geometry.values))
+        # # not_by_gaps = shapely.distance(mask, gaps) > PRECISION
+        # # mask = GeoDataFrame({"geometry": mask[not_by_gaps]})
+    gdf = snap_polygons(gdf, tolerance, mask=mask, snap_to_anchors=snap_to_anchors)
+    if mask is not None:
+        missing_from_mask = clean_overlay(
+            mask, gdf, how="difference", geom_type="polygon"
+        ).loc[lambda x: x.buffer(-tolerance + PRECISION).is_empty]
+        gdf, _ = eliminate_by_longest(gdf, missing_from_mask)
+    missing_from_gdf = sfilter_inverse(gdf_original, gdf.buffer(-PRECISION)).loc[
+        lambda x: (~x.buffer(-PRECISION).is_empty)
+    ]
+    return pd.concat([gdf, missing_from_gdf], ignore_index=True).pipe(
+        update_geometries, geom_type="polygon"
     )
-    try:
-        gaps = get_gaps(gdf, include_interiors=True)
-    except GEOSException:
-        for i, grid_size in enumerate(grid_sizes):
-            try:
-                gaps = get_gaps(gdf, include_interiors=True, grid_size=grid_size)
-                if grid_size:
-                    # in order to not get more gaps
-                    gaps.geometry = gaps.buffer(grid_size)
-                break
-            except GEOSException as e:
-                if i == len(grid_sizes) - 1:
-                    explore_geosexception(e, gdf)
-                    raise e
-    gaps["_was_gap"] = 1
-    if duplicate_action == "ignore":
-        double = GeoDataFrame({"geometry": []}, crs=gdf.crs)
-        double["_double_idx"] = None
-    else:
-        double = get_intersections(gdf, n_jobs=n_jobs)
-        double["_double_idx"] = range(len(double))
-    gdf, slivers = split_out_slivers(gdf, tolerance)
+def snap_polygons(
+    gdf: GeoDataFrame,
+    tolerance: int | float,
+    mask: GeoDataFrame | GeoSeries | Geometry | None = None,
+    snap_to_anchors: bool = True,
+) -> GeoDataFrame:
+    if not len(gdf):
+        return gdf.copy()
-    gdf["_poly_idx"] = range(len(gdf))
+    gdf_orig = gdf.copy()
-    thin_gaps_and_double = pd.concat([gaps, double]).loc[
-        lambda x: (x.buffer(-tolerance / 2).is_empty)
-    ]
+    crs = gdf.crs
-    all_are_thin = double["_double_idx"].isin(thin_gaps_and_double["_double_idx"]).all()
+    gdf = (
+        clean_geoms(gdf)
+        .pipe(make_all_singlepart, ignore_index=True)
+        .pipe(to_single_geom_type, "polygon")
+    )
-    if not all_are_thin and duplicate_action == "fix":
-        gdf, thin_gaps_and_double, slivers = _properly_fix_duplicates(
-            gdf,
-            double,
-            slivers,
-            thin_gaps_and_double,
-            tolerance,
-            n_jobs=n_jobs,
+    gdf.crs = None
+    gdf = gdf[lambda x: ~x.buffer(-tolerance / 2 - PRECISION).is_empty]
+    # gdf = gdf[lambda x: ~x.buffer(-tolerance / 3).is_empty]
+    # donuts_without_spikes = (
+    #     gdf.geometry.buffer(tolerance / 2, resolution=1, join_style=2)
+    #     .buffer(-tolerance, resolution=1, join_style=2)
+    #     .buffer(tolerance / 2, resolution=1, join_style=2)
+    #     .pipe(to_lines)
+    #     .buffer(tolerance)
+    # )
+    gdf.geometry = (
+        PolygonsAsRings(gdf.geometry.values)
+        .apply_numpy_func(
+            _snap_linearrings,
+            kwargs=dict(
+                tolerance=tolerance,
+                mask=mask,
+                snap_to_anchors=snap_to_anchors,
+            ),
         )
-    elif not all_are_thin and duplicate_action == "error":
-        raise ValueError("Large double surfaces.")
-    to_eliminate = pd.concat([thin_gaps_and_double, slivers], ignore_index=True)
-    to_eliminate = to_eliminate.loc[lambda x: ~x.buffer(-PRECISION / 10).is_empty]
-    to_eliminate = try_for_grid_size(
-        split_by_neighbors,
-        grid_sizes=grid_sizes,
-        args=(to_eliminate, gdf),
-        kwargs=dict(tolerance=tolerance),
+        .to_numpy()
     )
-    to_eliminate["_eliminate_idx"] = range(len(to_eliminate))
-    to_eliminate["_cluster"] = get_cluster_mapper(to_eliminate.buffer(PRECISION))
+    gdf = (
+        to_single_geom_type(make_all_singlepart(clean_geoms(gdf)), "polygon")
+        .reset_index(drop=True)
+        .set_crs(crs)
+    )
-    gdf_geoms_idx = gdf[["_poly_idx", "geometry"]]
+    missing = clean_overlay(gdf_orig, gdf, how="difference").loc[
+        lambda x: ~x.buffer(-tolerance / 2).is_empty
+    ]
-    poly_idx_mapper = clean_overlay(
-        buff(
-            to_eliminate[["_eliminate_idx", "geometry"]],
-            tolerance,
-            resolution=BUFFER_RES,
+    if mask is None:
+        mask = GeoDataFrame({"geometry": []})
+    explore(
+        gdf,
+        # gdf_orig,
+        # thin,
+        mask,
+        missing,
+        mask_p=to_gdf(mask.extract_unique_points().explode()).assign(
+            wkt=lambda x: [g.wkt for g in x.geometry]
         ),
-        gdf_geoms_idx,
-        geom_type="polygon",
-        n_jobs=n_jobs,
+        gdf_p=to_gdf(gdf.extract_unique_points().explode()).assign(
+            wkt=lambda x: [g.wkt for g in x.geometry]
+        ),
+        center=(5.36765872, 59.01199837, 1),
     )
-    poly_idx_mapper["_area_per_poly"] = poly_idx_mapper.area
-    poly_idx_mapper["_area_per_poly"] = poly_idx_mapper.groupby("_poly_idx")[
-        "_area_per_poly"
-    ].transform("sum")
-    poly_idx_mapper: pd.Series = (
-        poly_idx_mapper.sort_values("_area_per_poly", ascending=False)
-        .drop_duplicates("_eliminate_idx")
-        .set_index("_eliminate_idx")["_poly_idx"]
+    explore(
+        gdf,
+        gdf_orig,
+        # thin,
+        mask,
+        missing,
+        mask_p=to_gdf(mask.extract_unique_points().explode()).assign(
+            wkt=lambda x: [g.wkt for g in x.geometry]
+        ),
+        gdf_p=to_gdf(gdf.extract_unique_points().explode()).assign(
+            wkt=lambda x: [g.wkt for g in x.geometry]
+        ),
+        center=(5.36820681, 59.01182298, 2),
     )
-    to_eliminate["_poly_idx"] = to_eliminate["_eliminate_idx"].map(poly_idx_mapper)
-    isolated = to_eliminate[lambda x: x["_poly_idx"].isna()]
-    intersecting = to_eliminate[lambda x: x["_poly_idx"].notna()]
-    for i, grid_size in enumerate(grid_sizes):
-        try:
-            without_double = update_geometries(
-                intersecting,
-                geom_type="polygon",
-                grid_size=grid_size,
-                n_jobs=n_jobs,
-            ).drop(columns=["_eliminate_idx", "_double_idx"])
-            break
-        except GEOSException as e:
-            if i == len(grid_sizes) - 1:
-                explore_geosexception(e, gdf, intersecting, isolated)
-                raise e
-    not_really_isolated = isolated[["geometry", "_eliminate_idx", "_cluster"]].merge(
-        without_double.drop(columns=["geometry"]),
-        on="_cluster",
-        how="inner",
+    explore(
+        gdf,
+        gdf_orig,
+        # thin,
+        mask,
+        missing,
+        mask_p=to_gdf(mask.extract_unique_points().explode()).assign(
+            wkt=lambda x: [g.wkt for g in x.geometry]
+        ),
+        gdf_p=to_gdf(gdf.extract_unique_points().explode()).assign(
+            wkt=lambda x: [g.wkt for g in x.geometry]
+        ),
+        center=(5.37327042, 59.01099359, 5),
     )
-    really_isolated = isolated.loc[
-        lambda x: ~x["_eliminate_idx"].isin(not_really_isolated["_eliminate_idx"])
-    ]
-    is_gap = really_isolated["_was_gap"] == 1
-    isolated_gaps = really_isolated.loc[is_gap, ["geometry"]].sjoin_nearest(
-        gdf, max_distance=PRECISION
+    explore(
+        gdf,
+        gdf_orig,
+        # thin,
+        mask,
+        missing,
+        mask_p=to_gdf(mask.extract_unique_points().explode()).assign(
+            wkt=lambda x: [g.wkt for g in x.geometry]
+        ),
+        gdf_p=to_gdf(gdf.extract_unique_points().explode()).assign(
+            wkt=lambda x: [g.wkt for g in x.geometry]
+        ),
+        center=(5.36853688, 59.01169013, 5),
     )
-    really_isolated = really_isolated[~is_gap]
-    really_isolated["_poly_idx"] = (
-        really_isolated["_cluster"] + gdf["_poly_idx"].max() + 1
+    explore(
+        gdf,
+        # gdf_orig,
+        missing,
+        mask,
+        mask_p=to_gdf(mask.extract_unique_points().explode()).assign(
+            wkt=lambda x: [g.wkt for g in x.geometry]
+        ),
+        gdf_p=to_gdf(gdf.extract_unique_points().explode()).assign(
+            wkt=lambda x: [g.wkt for g in x.geometry]
+        ),
+        center=(5.37142966, 59.009799, 0.01),
+        max_zoom=40,
     )
-    cleaned = pd.concat(
-        [
-            gdf,
-            without_double,
-            not_really_isolated,
-            really_isolated,
-            isolated_gaps,
-        ],
-    ).drop(
-        columns=[
-            "_cluster",
-            "_was_gap",
-            "_eliminate_idx",
-            "index_right",
-            "_double_idx",
-            "_area_per_poly",
-        ],
-        errors="ignore",
+    explore(
+        gdf,
+        # gdf_orig,
+        missing,
+        mask,
+        mask_p=to_gdf(mask.extract_unique_points().explode()).assign(
+            wkt=lambda x: [g.wkt for g in x.geometry]
+        ),
+        gdf_p=to_gdf(gdf.extract_unique_points().explode()).assign(
+            wkt=lambda x: [g.wkt for g in x.geometry]
+        ),
+        center=(5.36866312, 59.00842846, 0.01),
+        max_zoom=40,
     )
-    try:
-        only_one = cleaned.groupby("_poly_idx").transform("size") == 1
-        one_hit = cleaned[only_one].drop(columns="_poly_idx")
-        many_hits = cleaned[~only_one]
-    except IndexError:
-        assert not cleaned["_poly_idx"].notna().any(), cleaned
-        one_hit = cleaned[lambda x: x.index == min(x.index) - 1].drop(
-            columns="_poly_idx", errors="ignore"
-        )
-        many_hits = cleaned
-    for i, grid_size in enumerate(grid_sizes):
-        try:
-            many_hits = (
-                dissexp(
-                    many_hits,
-                    by="_poly_idx",
-                    aggfunc="first",
-                    dropna=True,
-                    grid_size=grid_size,
-                    n_jobs=n_jobs,
-                )
-                .sort_index()
-                .reset_index(drop=True)
-            )
-            break
-        except GEOSException as e:
-            if i == len(grid_sizes) - 1:
-                explore_geosexception(e, gdf, without_double, isolated, really_isolated)
-                raise e
-    cleaned = pd.concat([many_hits, one_hit], ignore_index=True)
-    gdf = gdf.drop(columns="_poly_idx")
+    explore(
+        gdf,
+        # gdf_orig,
+        missing,
+        mask,
+        mask_p=to_gdf(mask.extract_unique_points().explode()).assign(
+            wkt=lambda x: [g.wkt for g in x.geometry]
+        ),
+        gdf_p=to_gdf(gdf.extract_unique_points().explode()).assign(
+            wkt=lambda x: [g.wkt for g in x.geometry]
+        ),
+        center=(5.37707146, 59.01065274, 0.4),
+        max_zoom=40,
+    )
-    for i, grid_size in enumerate(grid_sizes):
-        try:
-            cleaned = clean_overlay(
-                gdf,
-                cleaned,
-                how="update",
-                geom_type="polygon",
-                grid_size=grid_size,
-                n_jobs=n_jobs,
-            )
-            break
-        except GEOSException as e:
-            if i == len(grid_sizes) - 1:
-                explore_geosexception(
-                    e,
-                    gdf,
-                    cleaned,
-                    without_double,
-                    isolated,
-                    really_isolated,
-                )
-                raise e
+    explore(
+        gdf,
+        # gdf_orig,
+        missing,
+        mask,
+        mask_p=to_gdf(mask.extract_unique_points().explode()).assign(
+            wkt=lambda x: [g.wkt for g in x.geometry]
+        ),
+        gdf_p=to_gdf(gdf.extract_unique_points().explode()).assign(
+            wkt=lambda x: [g.wkt for g in x.geometry]
+        ),
+        center=(-52074.0241, 6580847.4464, 0.1),
+        max_zoom=40,
+    )
-    cleaned = sort_large_first(cleaned)
+    explore(
+        gdf,
+        # gdf_orig,
+        missing,
+        mask,
+        mask_p=to_gdf(mask.extract_unique_points().explode()).assign(
+            wkt=lambda x: [g.wkt for g in x.geometry]
+        ),
+        gdf_p=to_gdf(gdf.extract_unique_points().explode()).assign(
+            wkt=lambda x: [g.wkt for g in x.geometry]
+        ),
+        center=(5.38389153, 59.00548223, 1),
+        max_zoom=40,
+    )
-    # slivers on bottom
-    cleaned = pd.concat(split_out_slivers(cleaned, tolerance))
+    # explore(
+    #     gdf_orig,
+    #     gdf,
+    #     dups=get_intersections(gdf, geom_type="polygon"),
+    #     msk=mask,
+    #     gaps=get_gaps(gdf),
+    #     updated=update_geometries(gdf, geom_type="polygon"),
+    #     # browser=False,
+    # )
+    # gdf = update_geometries(gdf, geom_type="polygon")
+    return gdf  # .pipe(clean_clip, mask, geom_type="polygon")
+# @numba.njit
+def _snap_to_anchors(
+    geoms,
+    indices: NDArray[np.int32],
+    anchors,
+    anchor_indices,
+    mask,
+    mask_indices,
+    was_midpoint,
+    was_midpoint_mask,
+    tolerance: int | float,
+) -> tuple[NDArray, NDArray, NDArray]:
+    coords, all_distances = _snap_to_anchors_inner(
+        geoms,
+        indices,
+        anchors,
+        anchor_indices,
+        mask,
+        mask_indices,
+        was_midpoint,
+        was_midpoint_mask,
+        tolerance,
+    )
-    for i, grid_size in enumerate(grid_sizes):
-        try:
-            cleaned = update_geometries(
-                cleaned,
-                geom_type="polygon",
-                grid_size=grid_size,
-                n_jobs=n_jobs,
-            )
-            break
-        except GEOSException as e:
-            if i == len(grid_sizes) - 1:
-                explore_geosexception(
-                    e,
-                    gdf,
-                    cleaned,
-                    without_double,
-                    isolated,
-                    really_isolated,
+    not_inf = coords[:, 0] != np.inf
+    all_distances = all_distances[not_inf]
+    indices = indices[not_inf]
+    coords = coords[not_inf]
+    is_snapped = np.full(len(coords), False)
+    n_coords = len(coords)
+    range_indices = np.arange(len(coords))
+    range_index = -1
+    for index in np.unique(indices):
+        cond = indices == index
+        these_coords = coords[cond]
+        # explore(ll=to_gdf(LineString(shapely.points(these_coords)), 25833))
+        # assert np.array_equal(these_coords[0], these_coords[-1]), these_coords
+        these_range_indices = range_indices[cond]
+        these_distances = all_distances[cond]
+        for i in range(len(these_coords)):
+            range_index += 1
+            if is_snapped[range_index]:
+                print(i, "000")
+                continue
+            # distances = all_distances[range_index]
+            distances = these_distances[i]
+            # distances = these_distances[:, i]
+            min_dist = np.min(distances)
+            if min_dist > tolerance:  # or min_dist == 0:
+                print(i, "111", min_dist)
+                continue
+            is_snapped_now = False
+            for j in np.argsort(distances):
+                if distances[j] > tolerance:  # TODO or distances[j] == 0:
+                    break
+                if was_midpoint_mask[j]:
+                    continue
+                anchor = anchors[j]
+                ring = these_coords.copy()
+                ring[i] = anchor
+                # snap the nexts points to same anchor if neighboring points have same anchor
+                # in order to properly check if the ring will be simple after snapping
+                indices_with_same_anchor = [range_index]
+                # these_coords = coords[indices==index]
+                pos_counter = 0
+                # has_same_anchor_pos = True
+                # has_same_anchor_neg = True
+                while (
+                    pos_counter + i < len(these_distances) - 1
+                ):  # has_same_anchor_pos or has_same_anchor_neg:
+                    pos_counter += 1
+                    # if indices[i + pos_counter] != index:
+                    #     break
+                    # next_distances = all_distances[range_index + pos_counter]
+                    next_distances = these_distances[i + pos_counter]
+                    has_same_anchor_pos = False
+                    for j2 in np.argsort(next_distances):
+                        if was_midpoint_mask[j2]:
+                            continue
+                        if next_distances[j2] > tolerance:
+                            break
+                        has_same_anchor_pos = j2 == j
+                        # print(
+                        #     "pos c",
+                        #     i,
+                        #     j,
+                        #     j2,
+                        #     pos_counter,
+                        #     has_same_anchor_pos,
+                        #     distances[j],
+                        #     next_distances[j2],
+                        # )
+                        break
+                    if has_same_anchor_pos:
+                        ring[i + pos_counter] = anchor
+                        indices_with_same_anchor.append(range_index + pos_counter)
+                    else:
+                        break
+                # for j4 in np.arange(
+                #     indices_with_same_anchor[0], indices_with_same_anchor[-1]
+                # ):
+                #     ring[j4 - range_index + i] = anchor
+                #     indices_with_same_anchor.append(j4)
+                if i == 0:
+                    # snap points at the end of the line if same anchor
+                    neg_counter = 0
+                    # has_same_anchor_neg = True
+                    while True:  # has_same_anchor_pos or has_same_anchor_neg:
+                        neg_counter -= 1
+                        # if indices[i + pos_counter] != index:
+                        #     break
+                        this_range_index = these_range_indices[neg_counter]
+                        # next_distances = all_distances[this_range_index]
+                        next_distances = these_distances[neg_counter]
+                        has_same_anchor_neg = False
+                        for j3 in np.argsort(next_distances):
+                            if was_midpoint_mask[j3]:
+                                continue
+                            if next_distances[j3] > tolerance:
+                                break
+                            has_same_anchor_neg = j3 == j
+                            # print(
+                            #     "neg c",
+                            #     i,
+                            #     j,
+                            #     j3,
+                            #     pos_counter,
+                            #     # has_same_anchor,
+                            #     distances[j],
+                            #     next_distances[j3],
+                            # )
+                            break
+                        if has_same_anchor_neg:
+                            ring[neg_counter] = anchor
+                            indices_with_same_anchor.append(this_range_index)
+                        else:
+                            break
+                    # for j5 in np.arange(0, indices_with_same_anchor[-1]):
+                    #     ring[j5 - range_index + i] = anchor
+                    #     indices_with_same_anchor.append(j5)
+                indices_with_same_anchor = np.unique(indices_with_same_anchor)
+                line_is_simple: bool = LineString(ring).is_simple
+                # if i in [67, 68, 69, 173, 174, 175, 176, 177]:  # or
+                if Point(these_coords[i]).intersects(
+                    to_gdf([12.08375303, 67.50052183], 4326)
+                    .to_crs(25833)
+                    .buffer(10)
+                    .union_all()
+                ):
+                    # for xxx, yyy in locals().items():
+                    #     if len(str(yyy)) > 50:
+                    #         continue
+                    #     print(xxx)
+                    #     print(yyy)
+                    # print("prev:", was_midpoint_mask[j - 1])
+                    # print(distances[np.argsort(distances)])
+                    # print(anchors[np.argsort(distances)])
+                    # print(ring)
+                    explore(
+                        out_coords=to_gdf(
+                            shapely.linestrings(coords, indices=indices), 25833
+                        ),
+                        llll=to_gdf(LineString(ring), 25833),
+                        # this=to_gdf(this),
+                        # next_=to_gdf(next_),
+                        # line=to_gdf(LineString(np.array([this, next_])), 25833),
+                        geom=to_gdf(these_coords[i], 25833),
+                        prev=to_gdf(these_coords[i - 1], 25833),
+                        nxt=to_gdf(these_coords[i + 1], 25833),
+                        nxt2=to_gdf(these_coords[i + 2], 25833),
+                        anchor=to_gdf(anchor, 25833),
+                        # browser=True,
+                    )
+                print(
+                    "line_is_simple", line_is_simple, range_index, i, index, j
+                )  # , j2, j3, x)
+                if not line_is_simple:
+                    #     for j4 in range(len(ring)):
+                    #         this_p = ring[j4]
+                    #         for j5 in range(len(ring)):
+                    #             that_p = ring[j5]
+                    #             dist_ = np.sqrt(
+                    #                 (this_p[0] - that_p[0]) ** 2
+                    #                 + (this_p[1] - that_p[1]) ** 2
+                    #             )
+                    #             if dist_ > 0 and dist_ < 1e-5:
+                    #                 print(this_p)
+                    #                 print(that_p)
+                    #                 ring[j5] = this_p
+                    print(LineString(ring).wkt)
+                    # explore(
+                    #     out_coords=to_gdf(
+                    #         shapely.linestrings(coords, indices=indices), 25833
+                    #     ),
+                    #     llll=to_gdf(LineString(ring), 25833),
+                    #     # this=to_gdf(this),
+                    #     # next_=to_gdf(next_),
+                    #     # line=to_gdf(LineString(np.array([this, next_])), 25833),
+                    #     geom=to_gdf(these_coords[i], 25833),
+                    #     prev=to_gdf(these_coords[i - 1], 25833),
+                    #     nxt=to_gdf(these_coords[i + 1], 25833),
+                    #     nxt2=to_gdf(these_coords[i + 2], 25833),
+                    #     anchor=to_gdf(anchor, 25833),
+                    #     # browser=True,
+                    # )
+                line_is_simple: bool = LineString(ring).is_simple
+                if line_is_simple:
+                    # coords[i] = anchors[j]
+                    # is_snapped_to[j] = True
+                    # is_snapped[i] = True
+                    # explore(
+                    #     out_coords=to_gdf(
+                    #         shapely.linestrings(coords, indices=indices), 25833
+                    #     ),
+                    #     llll=to_gdf(LineString(ring), 25833),
+                    #     # this=to_gdf(this),
+                    #     # next_=to_gdf(next_),
+                    #     # line=to_gdf(LineString(np.array([this, next_])), 25833),
+                    #     anc=to_gdf(anchors[j]),
+                    #     geom=to_gdf(coords[i], 25833),
+                    #     these=to_gdf(coords[i : i + n_points_with_same_anchor ], 25833),
+                    #     prev=to_gdf(coords[i - 1], 25833),
+                    #     prev2=to_gdf(coords[i - 2], 25833),
+                    #     nxt=to_gdf(coords[i + n_points_with_same_anchor + 1], 25833),
+                    #     nxt2=to_gdf(coords[i + n_points_with_same_anchor + 2], 25833),
+                    #     nxt3=to_gdf(coords[i + n_points_with_same_anchor + 3], 25833),
+                    # )
+                    # print(coords[i : i + n_points_with_same_anchor + 1])
+                    for (
+                        x
+                    ) in indices_with_same_anchor:  # range(n_points_with_same_anchor):
+                        # print(range_index, i, index, j, j2, j3, x)
+                        coords[x] = anchor  # s[j]
+                        is_snapped[x] = True
+                        # coords[i + x] = anchors[j]
+                        # is_snapped[i + x] = True
+                    # print(coords[i : i + n_points_with_same_anchor + 1])
+                    is_snapped_now = True
+                    break
+                # else:
+            if not is_snapped_now:
+                coords[range_index] = anchors[np.argmin(distances)]
+                # is_snapped_to[np.argmin(distances)] = True
+            if 0 and index == 0:  # i > 30 and i < 40:
+                print(i)
+                explore(
+                    out_coords=to_gdf(
+                        shapely.linestrings(coords, indices=indices), 25833
+                    ),
+                    llll=to_gdf(LineString(ring), 25833),
+                    pppp=to_gdf(shapely.points(ring), 25833).assign(
+                        wkt=lambda x: [g.wkt for g in x.geometry]
+                    ),
+                    # this=to_gdf(this),
+                    # next_=to_gdf(next_),
+                    # line=to_gdf(LineString(np.array([this, next_])), 25833),
+                    anc=to_gdf(anchors[j]).assign(
+                        wkt=lambda x: [g.wkt for g in x.geometry]
+                    ),
+                    geom=to_gdf(these_coords[i], 25833).assign(
+                        wkt=lambda x: [g.wkt for g in x.geometry]
+                    ),
+                    # these=to_gdf(
+                    #     these_coords[i : i + n_points_with_same_anchor], 25833
+                    # ).assign(wkt=lambda x: [g.wkt for g in x.geometry]),
+                    prev=to_gdf(these_coords[i - 1], 25833).assign(
+                        wkt=lambda x: [g.wkt for g in x.geometry]
+                    ),
+                    prev2=to_gdf(these_coords[i - 2], 25833).assign(
+                        wkt=lambda x: [g.wkt for g in x.geometry]
+                    ),
+                    nxt=to_gdf(these_coords[i + 1], 25833).assign(
+                        wkt=lambda x: [g.wkt for g in x.geometry]
+                    ),
+                    nxt2=to_gdf(these_coords[i + 2], 25833).assign(
+                        wkt=lambda x: [g.wkt for g in x.geometry]
+                    ),
+                    nxt3=to_gdf(these_coords[i + 3], 25833).assign(
+                        wkt=lambda x: [g.wkt for g in x.geometry]
+                    ),
+                    # browser=True,
+                    # nxt_n=to_gdf(
+                    #     coords[i + n_points_with_same_anchor + 1], 25833
+                    # ).assign(wkt=lambda x: [g.wkt for g in x.geometry]),
+                    # nxt_n2=to_gdf(
+                    #     coords[i + n_points_with_same_anchor + 2], 25833
+                    # ).assign(wkt=lambda x: [g.wkt for g in x.geometry]),
+                    # nxt_n3=to_gdf(
+                    #     coords[i + n_points_with_same_anchor + 3], 25833
+                    # ).assign(wkt=lambda x: [g.wkt for g in x.geometry]),
                 )
-                raise e
-    # cleaned = _safe_simplify(cleaned, PRECISION)
-    # cleaned.geometry = shapely.make_valid(cleaned.geometry)
-    # TODO check why polygons dissappear in rare cases. For now, just add back the missing
-    dissapeared_polygons = sfilter_inverse(gdf, cleaned.buffer(-PRECISION))
-    cleaned = pd.concat([cleaned, dissapeared_polygons])
+            # if (
+            #     indices[i] == 48
+            # ):  # and int(out_coords[i][0]) == 375502 and int(out_coords[i][1]) == 7490104:
+            #     print(geom, out_coords[i], out_coords[-3:])
+            #     xxx += 1
+            #     if xxx > 100 and i >= 2106:
+            #         print(locals())
+            #         explore(
+            #             geom=to_gdf(geom, 25833),
+            #             out=to_gdf(out_coords[i], 25833),
+            #             anc=to_gdf(shapely.points(anchors), 25833),
+            #             llll=to_gdf(
+            #                 shapely.geometry.LineString(
+            #                     np.array(out_coords)[indices[: len(out_coords)] == 48]
+            #                 ),
+            #                 25833,
+            #             ),
+            #         )
+    return coords, indices
+@numba.njit
+def _snap_to_anchors_inner(
+    geoms,
+    indices: NDArray[np.int32],
+    anchors,
+    anchor_indices,
+    mask,
+    mask_indices,
+    was_midpoint,
+    was_midpoint_mask,
+    tolerance: int | float,
+) -> tuple[NDArray, NDArray, NDArray]:
+    # def orientation(p, q, r):
+    #     # Calculate orientation of the triplet (p, q, r).
+    #     # 0 -> collinear, 1 -> clockwise, 2 -> counterclockwise
+    #     val = (q[1] - p[1]) * (r[0] - q[0]) - (q[0] - p[0]) * (r[1] - q[1])
+    #     if val == 0:
+    #         return 0
+    #     return 1 if val > 0 else 2
+    # def on_segment(p, q, r):
+    #     # Check if point q lies on line segment pr
+    #     if min(p[0], r[0]) <= q[0] <= max(p[0], r[0]) and min(p[1], r[1]) <= q[
+    #         1
+    #     ] <= max(p[1], r[1]):
+    #         return True
+    #     return False
+    # def check_intersection(line1, line2):
+    #     """
+    #     Check if two line segments intersect.
+    #     Parameters:
+    #     line1 : np.array : 2x2 array with endpoints of the first line segment [[x1, y1], [x2, y2]]
+    #     line2 : np.array : 2x2 array with endpoints of the second line segment [[x3, y3], [x4, y4]]
+    #     Returns:
+    #     bool : True if the lines intersect, False otherwise.
+    #     """
+    #     p1, q1 = line1
+    #     p2, q2 = line2
+    #     # Find the four orientations needed for the general and special cases
+    #     o1 = orientation(p1, q1, p2)
+    #     o2 = orientation(p1, q1, q2)
+    #     o3 = orientation(p2, q2, p1)
+    #     o4 = orientation(p2, q2, q1)
+    #     # General case
+    #     if o1 != o2 and o3 != o4:
+    #         return True
+    #     # Special cases
+    #     # p1, q1, p2 are collinear and p2 lies on segment p1q1
+    #     if o1 == 0 and on_segment(p1, p2, q1):
+    #         return True
+    #     # p1, q1, q2 are collinear and q2 lies on segment p1q1
+    #     if o2 == 0 and on_segment(p1, q2, q1):
+    #         return True
+    #     # p2, q2, p1 are collinear and p1 lies on segment p2q2
+    #     if o3 == 0 and on_segment(p2, p1, q2):
+    #         return True
+    #     # p2, q2, q1 are collinear and q1 lies on segment p2q2
+    #     if o4 == 0 and on_segment(p2, q1, q2):
+    #         return True
+    #     return False
+    out_coords = geoms.copy()
+    # is_snapped = np.full(len(geoms), False)
+    n_anchors = len(anchors)
+    mask_n_minus_1 = len(mask) - 1
+    is_snapped_to = np.full(len(anchors), False)
+    out_distances = np.full((len(geoms), n_anchors), tolerance * 3)
+    for i in range(len(geoms)):
+        # if is_snapped[i]:
+        #     continue
+        geom = geoms[i]
+        index = indices[i]
+        # if i == 0 or index != indices[i - 1]:
+        #     i_for_this_index = 0
+        # else:
+        #     i_for_this_index += 1
+        is_snapped = False
+        for j in range(len(mask)):
+            mask_index = mask_indices[j]
+            is_last = j == mask_n_minus_1 or mask_index != mask_indices[j + 1]
+            if is_last:
+                continue
+            mask_point0 = mask[j]
+            # if (
+            #     not mask_is_snapped_to[j]
+            #     and np.sqrt(
+            #         (geom[0] - mask_point0[0]) ** 2 + (geom[1] - mask_point0[1]) ** 2
+            #     )
+            #     <= tolerance
+            # ):
+            #     out_coords[i] = mask_point0
+            #     mask_is_snapped_to[j] = True
+            #     is_snapped = True
+            #     break
+            mask_point1 = mask[j + 1]
+            segment_vector = mask_point1 - mask_point0
+            point_vector = geom - mask_point0
+            segment_length_squared = np.dot(segment_vector, segment_vector)
+            if segment_length_squared == 0:
+                closest_point = mask_point0
+            else:
+                factor = np.dot(point_vector, segment_vector) / segment_length_squared
+                factor = max(0, min(1, factor))
+                closest_point = mask_point0 + factor * segment_vector
+            if np.linalg.norm(geom - closest_point) == 0 and was_midpoint[i]:
+                out_coords[i] = np.array([np.inf, np.inf])
+                is_snapped = True
+                break
-    return to_single_geom_type(cleaned, "polygon")
+        if is_snapped:
+            continue
+        distances = np.full(n_anchors, tolerance * 3)
+        for j2 in range(n_anchors):
+            anchor = anchors[j2]
-def _safe_simplify(gdf: GeoDataFrame, tolerance: float | int, **kwargs) -> GeoDataFrame:
-    """Simplify only if the resulting area is no more than 1 percent larger.
+            # if anchor_indices[j] == index:
+            #     continue
-    Because simplifying can result in holes being filled.
-    """
-    length_then = gdf.length
-    copied = gdf.copy()
-    copied.geometry = shapely.make_valid(
-        shapely.simplify(copied.geometry.values, tolerance=tolerance)
-    )
-    filt = (copied.area > length_then * 1.01) | (copied.geometry.is_empty)
-    copied.loc[filt, copied._geometry_column_name] = gdf.loc[
-        filt, copied._geometry_column_name
-    ]
+            dist = np.sqrt((geom[0] - anchor[0]) ** 2 + (geom[1] - anchor[1]) ** 2)
+            distances[j2] = dist
+            out_distances[i, j2] = dist
+            if dist == 0 and not was_midpoint_mask[j2]:
+                break
-    return copied
+    return out_coords, out_distances
-def _remove_interior_slivers(gdf: GeoDataFrame, tolerance: int | float) -> GeoDataFrame:
-    gdf, slivers = split_out_slivers(gdf, tolerance)
-    slivers["_idx"] = range(len(slivers))
-    without_thick = clean_overlay(
-        to_lines(slivers), buff(gdf, PRECISION), how="difference"
-    )
-    return pd.concat(
-        [
-            gdf,
-            slivers[lambda x: x["_idx"].isin(without_thick["_idx"])].drop(
-                columns="_idx"
-            ),
-        ]
-    )
+@numba.njit
+def _build_anchors(
+    geoms: NDArray[np.float64],
+    indices: NDArray[np.int32],
+    mask_coords: NDArray[np.float64],
+    mask_indices: NDArray[np.int32],
+    was_midpoint_mask: NDArray[bool],
+    tolerance: int | float,
+):
+    anchors = list(mask_coords)
+    anchor_indices = list(mask_indices)
+    is_anchor_arr = np.full(len(geoms), False)
+    was_midpoint_mask = list(was_midpoint_mask)
+    for i in np.arange(len(geoms)):
+        geom = geoms[i]
+        index = indices[i]
+        # distances = []
+        # for j, anchor in zip(anchor_indices, anchors):
+        is_anchor = True
+        for j in range(len(anchors)):
+            # if indices[i] != indices[j]:
+            # if i != j  and indices[i] != indices[j]:
+            anchor = anchors[j]
+            dist = np.sqrt((geom[0] - anchor[0]) ** 2 + (geom[1] - anchor[1]) ** 2)
+            if dist <= tolerance:
+                is_anchor = False
+                break
+            # distances.append(dist)
+        # distances = np.array(distances)
+        is_anchor_arr[i] = is_anchor
+        if is_anchor:  # not len(distances) or np.min(distances) > tolerance:
+            anchors.append(geom)
+            anchor_indices.append(index)
+            was_midpoint_mask.append(True)
+    return anchors, anchor_indices, is_anchor_arr, was_midpoint_mask
+@numba.njit
+def _add_last_points_to_end(
+    coords: NDArray[np.float64],
+    indices: NDArray[np.int32],
+) -> tuple[
+    NDArray[np.float64],
+    NDArray[np.int32],
+]:
+    out_coords, out_indices = [coords[0]], [indices[0]]
+    last_coords = []
+    prev = coords[0]
+    first_coords = prev
+    n_minus_1 = len(coords) - 1
+    for i in np.arange(1, len(coords)):
+        idx = indices[i]
+        xy = coords[i]
+        distance_to_prev: float = np.sqrt(
+            (xy[0] - prev[0]) ** 2 + (xy[1] - prev[1]) ** 2
+        )
+        if idx != indices[i - 1]:
+            first_coords = xy
+            out_coords.append(xy)
+            out_indices.append(idx)
+        elif not distance_to_prev:
+            if i == n_minus_1 or idx != indices[i + 1]:
+                last_coords.append(xy)
+            prev = xy
+            continue
+        elif i == n_minus_1 or idx != indices[i + 1]:
+            out_coords.append(xy)
+            out_coords.append(first_coords)
+            out_indices.append(idx)
+            out_indices.append(idx)
+            last_coords.append(xy)
+        else:
+            out_coords.append(xy)
+            out_indices.append(idx)
+        prev = xy
+    return (out_coords, out_indices)
+@numba.njit
+def _add_last_points_to_end_with_third_arr(
+    coords: NDArray[np.float64],
+    indices: NDArray[np.int32],
+    third_arr: NDArray[Any],
+) -> tuple[
+    NDArray[np.float64],
+    NDArray[np.int32],
+    NDArray[Any],
+]:
+    out_coords, out_indices, out_third_arr = [coords[0]], [indices[0]], [third_arr[0]]
+    last_coords = []
+    prev = coords[0]
+    first_coords = prev
+    n_minus_1 = len(coords) - 1
+    for i in np.arange(1, len(coords)):
+        idx = indices[i]
+        xy = coords[i]
+        distance_to_prev: float = np.sqrt(
+            (xy[0] - prev[0]) ** 2 + (xy[1] - prev[1]) ** 2
+        )
+        if idx != indices[i - 1]:
+            first_coords = xy
+            out_coords.append(xy)
+            out_indices.append(idx)
+            out_third_arr.append(third_arr[i])
+        elif not distance_to_prev:
+            if i == n_minus_1 or idx != indices[i + 1]:
+                last_coords.append(xy)
+            prev = xy
+            continue
+        elif i == n_minus_1 or idx != indices[i + 1]:
+            out_coords.append(xy)
+            out_coords.append(first_coords)
+            out_indices.append(idx)
+            out_indices.append(idx)
+            last_coords.append(xy)
+            out_third_arr.append(third_arr[i])
+            out_third_arr.append(third_arr[i])
+        else:
+            out_coords.append(xy)
+            out_indices.append(idx)
+            out_third_arr.append(third_arr[i])
+        prev = xy
+    return (out_coords, out_indices, out_third_arr)
+@numba.njit
+def _remove_duplicate_points(
+    coords: NDArray[np.float64],
+    indices: NDArray[np.int32],
+    third_arr: NDArray[Any],
+):
+    out_coords, out_indices, out_third_arr = [coords[0]], [indices[0]], [third_arr[0]]
+    prev = coords[0]
+    for i in np.arange(1, len(coords)):
+        idx = indices[i]
+        xy = coords[i]
+        distance_to_prev: float = np.sqrt(
+            (xy[0] - prev[0]) ** 2 + (xy[1] - prev[1]) ** 2
+        )
+        if not distance_to_prev and idx == indices[i - 1]:
+            prev = xy
+            continue
-def remove_spikes(
-    gdf: GeoDataFrame, tolerance: int | float, n_jobs: int = 1
-) -> GeoDataFrame:
-    """Remove thin spikes from polygons.
+        if idx != indices[i - 1]:
+            out_coords.append(xy)
+            out_indices.append(idx)
+            out_third_arr.append(third_arr[i])
+            prev = xy
+            continue
-    Args:
-        gdf: A GeoDataFrame.
-        tolerance: Spike tolerance.
-        n_jobs: Number of threads.
+        out_coords.append(xy)
+        out_indices.append(idx)
+        out_third_arr.append(third_arr[i])
+        prev = xy
-    Returns:
-        A GeoDataFrame.
-    """
-    return clean_overlay(
-        gdf, gdf[["geometry"]], how="intersection", grid_size=tolerance, n_jobs=n_jobs
-    )
+    return out_coords, out_indices, out_third_arr
-def _properly_fix_duplicates(
-    gdf: GeoDataFrame,
-    double: GeoDataFrame,
-    slivers: GeoDataFrame,
-    thin_gaps_and_double: GeoDataFrame,
+def _snap_linearrings(
+    geoms: NDArray[LinearRing],
     tolerance: int | float,
-    n_jobs: int,
-) -> GeoDataFrame:
-    gdf = _dissolve_thick_double_and_update(gdf, double, thin_gaps_and_double, n_jobs)
-    gdf, more_slivers = split_out_slivers(gdf, tolerance)
-    slivers = pd.concat([slivers, more_slivers], ignore_index=True)
-    gaps = get_gaps(gdf, include_interiors=True)
-    gaps["_was_gap"] = 1
-    assert "_double_idx" not in gaps
-    double = get_intersections(gdf)
-    double["_double_idx"] = range(len(double))
-    thin_gaps_and_double = pd.concat([gaps, double], ignore_index=True).loc[
-        lambda x: x.buffer(-tolerance / 2).is_empty
-    ]
-    return gdf, thin_gaps_and_double, slivers
+    mask: GeoDataFrame | None,
+    snap_to_anchors: bool = True,
+):
+    if not len(geoms):
+        return geoms
+    points = GeoDataFrame(
+        {
+            "geometry": extract_unique_points(geoms),
+            "_geom_idx": np.arange(len(geoms)),
+        }
+    ).explode(ignore_index=True)
+    coords = get_coordinates(points.geometry.values)
+    indices = points["_geom_idx"].values
+    if mask is not None:
+        mask_coords, mask_indices = get_coordinates(
+            mask.geometry.values, return_index=True
+        )
+        is_anchor = np.full(len(mask_coords), False)
-def _dissolve_thick_double_and_update(
-    gdf: GeoDataFrame, double: GeoDataFrame, thin_double: GeoDataFrame, n_jobs: int
-) -> GeoDataFrame:
-    large = (
-        double.loc[~double["_double_idx"].isin(thin_double["_double_idx"])].drop(
-            columns="_double_idx"
+        mask_coords, mask_indices, is_anchor = _remove_duplicate_points(
+            mask_coords, mask_indices, is_anchor
         )
-        # .pipe(sort_large_first)
-        # .sort_values("_poly_idx")
-        .pipe(update_geometries, geom_type="polygon", n_jobs=n_jobs)
-    )
-    return (
-        clean_overlay(gdf, large, how="update", geom_type="polygon", n_jobs=n_jobs)
-        # .pipe(sort_large_first)
-        # .sort_values("_poly_idx")
-        .pipe(update_geometries, geom_type="polygon", n_jobs=n_jobs)
-    )
+        mask_coords, mask_indices = _add_last_points_to_end(mask_coords, mask_indices)
+        mask_coords = np.array(mask_coords)
+        mask_indices = np.array(mask_indices)
+        is_anchor = np.full(len(mask_coords), False)
+        mask_coords, mask_indices, is_anchor = _remove_duplicate_points(
+            mask_coords, mask_indices, is_anchor
+        )
+        mask_coords = np.array(mask_coords)
+        mask_indices = np.array(mask_indices)
-def _cleaning_checks(
-    gdf: GeoDataFrame, tolerance: int | float, duplicate_action: bool
-) -> GeoDataFrame:  # , spike_action):
-    if not len(gdf) or not tolerance:
-        return gdf
-    if tolerance < PRECISION:
-        raise ValueError(
-            f"'tolerance' must be larger than {PRECISION} to avoid "
-            "problems with floating point precision."
+        original_mask_buffered = shapely.buffer(
+            shapely.linearrings(mask_coords, indices=mask_indices),
+            tolerance * 1.1,
+        )
+        mask_coords, mask_indices, was_midpoint_mask, _ = (
+            _add_midpoints_to_segments_numba(
+                mask_coords,
+                mask_indices,
+                get_coordinates(
+                    sfilter(
+                        points.geometry.drop_duplicates(),
+                        original_mask_buffered,
+                    )
+                ),
+                tolerance * 1.1,
+            )
         )
-    if duplicate_action not in ["fix", "error", "ignore"]:
-        raise ValueError("duplicate_action must be 'fix', 'error' or 'ignore'")
-def split_out_slivers(
-    gdf: GeoDataFrame | GeoSeries, tolerance: float | int
-) -> tuple[GeoDataFrame, GeoDataFrame] | tuple[GeoSeries, GeoSeries]:
-    is_sliver = gdf.buffer(-tolerance / 2).is_empty
-    slivers = gdf.loc[is_sliver]
-    gdf = gdf.loc[~is_sliver]
-    slivers, isolated = sfilter_split(slivers, gdf.buffer(PRECISION))
-    gdf = pd.concat([gdf, isolated])
-    return gdf, slivers
-def try_for_grid_size(
-    func: Callable,
-    grid_sizes: tuple[None, float | int],
-    args: tuple | None = None,
-    kwargs: dict | None = None,
-) -> Any:
-    args = args or ()
-    kwargs = kwargs or {}
-    for i, grid_size in enumerate(grid_sizes):
-        try:
-            return func(*args, grid_size=grid_size, **kwargs)
-        except GEOSException as e:
-            if i == len(grid_sizes) - 1:
-                raise e
+        mask_coords = np.array(mask_coords)
+        mask_indices = np.array(mask_indices)
+        mask_indices = (mask_indices + 1) * -1
+    is_anchor = np.full(len(coords), False)
+    coords, indices, is_anchor = _remove_duplicate_points(coords, indices, is_anchor)
+    coords, indices = _add_last_points_to_end(coords, indices)
+    coords = np.array(coords)
+    indices = np.array(indices)
+    is_anchor = np.full(len(coords), False)
+    coords, indices, is_anchor = _remove_duplicate_points(coords, indices, is_anchor)
+    coords = np.array(coords)
+    indices = np.array(indices)
+    # if 0:
+    #     coords, indices, was_midpoint, _ = _add_midpoints_to_segments_numba(
+    #         coords,
+    #         indices,
+    #         mask_coords,
+    #         tolerance * 1.1,  # + PRECISION * 100,
+    #     )
+    #     was_midpoint = np.array(was_midpoint)
+    #     coords, is_snapped_to = _snap_to_anchors(
+    #         coords,
+    #         indices,
+    #         mask_coords,
+    #         mask_indices,
+    #         mask_coords,
+    #         mask_indices,
+    #         was_midpoint,
+    #         was_midpoint_mask,
+    #         tolerance + PRECISION * 20,
+    #     )
+    #     indices = np.array(indices)
+    #     coords = np.array(coords)
+    #     indices = indices[coords[:, 0] != np.inf]
+    #     coords = coords[coords[:, 0] != np.inf]
+    if snap_to_anchors:
+        if mask is None:
+            mask_coords = [coords[0]]
+            mask_indices = [indices[0]]
+            was_midpoint_mask = [False]
+        anchors, anchor_indices, is_anchor, was_midpoint_anchors = _build_anchors(
+            coords,
+            indices,
+            mask_coords,
+            mask_indices,
+            was_midpoint_mask,
+            tolerance + PRECISION,  # * 100
+        )
+        anchors = np.array(anchors)
+        anchor_indices = np.array(anchor_indices)
-def split_and_eliminate_by_longest(
-    gdf: GeoDataFrame | list[GeoDataFrame],
-    to_eliminate: GeoDataFrame,
-    tolerance: int | float,
-    grid_sizes: tuple[None | float | int] = (None,),
-    n_jobs: int = 1,
-    **kwargs,
-) -> GeoDataFrame | tuple[GeoDataFrame]:
-    if not len(to_eliminate):
-        return gdf
+        # anchors = np.round(anchors, 3)
-    if not isinstance(gdf, (GeoDataFrame, GeoSeries)):
-        as_gdf = pd.concat(gdf, ignore_index=True)
     else:
-        as_gdf = gdf
-    splitted = try_for_grid_size(
-        split_by_neighbors,
-        grid_sizes=grid_sizes,
-        args=(to_eliminate, as_gdf, tolerance),
-    ).pipe(sort_small_first)
-    splitted = try_for_grid_size(
-        update_geometries,
-        grid_sizes=grid_sizes,
-        args=(splitted,),
-        kwargs=dict(geom_type="polygon", n_jobs=n_jobs),
-    )
+        anchors, anchor_indices, was_midpoint_anchors = (
+            mask_coords,
+            mask_indices,
+            was_midpoint_mask,
+        )
-    gdf = try_for_grid_size(
-        eliminate_by_longest,
-        grid_sizes=grid_sizes,
-        args=(
-            gdf,
-            splitted,
-        ),
-        kwargs=kwargs | {"n_jobs": n_jobs},
+    coords, indices, was_midpoint, _ = _add_midpoints_to_segments_numba(
+        coords,
+        indices,
+        anchors,
+        tolerance * 1.1,
     )
-    if not isinstance(gdf, (GeoDataFrame, GeoSeries)):
-        as_gdf = pd.concat(gdf, ignore_index=True)
-    else:
-        as_gdf = gdf
-    missing = try_for_grid_size(
-        clean_overlay,
-        grid_sizes=grid_sizes,
-        args=(
-            to_eliminate,
-            as_gdf,
-        ),
-        kwargs=dict(
-            how="difference",
-            geom_type="polygon",
-            n_jobs=n_jobs,
-        ),
-    ).pipe(lambda x: dissexp(x, n_jobs=n_jobs))
+    was_midpoint = np.array(was_midpoint)
-    return try_for_grid_size(
-        eliminate_by_longest,
-        grid_sizes=grid_sizes,
-        args=(gdf, missing),
-        kwargs=kwargs | {"n_jobs": n_jobs},
+    coords_up_here000 = (
+        pd.Series(_coords_to_rings(np.array(coords), np.array(indices), geoms))
+        .loc[lambda x: x.notna()]
+        .values
     )
+    coords_up_here000 = to_gdf(polygons(coords_up_here000), 25833)
-def split_by_neighbors(
-    df: GeoDataFrame,
-    split_by: GeoDataFrame,
-    tolerance: int | float,
-    grid_size: float | int | None = None,
-) -> GeoDataFrame:
-    if not len(df):
-        return df
-    split_by = split_by.copy()
-    split_by.geometry = shapely.simplify(split_by.geometry, tolerance)
-    intersecting_lines = (
-        clean_overlay(
-            to_lines(split_by),
-            buff(df, tolerance),
-            how="intersection",
-            grid_size=grid_size,
-        )
-        .pipe(get_line_segments)
-        .reset_index(drop=True)
+    coords, indices, was_midpoint = _add_last_points_to_end_with_third_arr(
+        coords, indices, was_midpoint
     )
-    endpoints = intersecting_lines.boundary.explode(index_parts=False)
-    extended_lines = GeoDataFrame(
-        {
-            "geometry": extend_lines(
-                endpoints.loc[lambda x: ~x.index.duplicated(keep="first")].values,
-                endpoints.loc[lambda x: ~x.index.duplicated(keep="last")].values,
-                distance=tolerance * 3,
-            )
-        },
-        crs=df.crs,
+    coords, indices, was_midpoint = _remove_duplicate_points(
+        coords, indices, was_midpoint
     )
-    buffered = buff(extended_lines, tolerance, single_sided=True)
-    return clean_overlay(df, buffered, how="identity", grid_size=grid_size)
+    coords = np.array(coords)
+    indices = np.array(indices)
+    was_midpoint = np.array(was_midpoint)
-def extend_lines(arr1, arr2, distance) -> NDArray[LineString]:
-    if len(arr1) != len(arr2):
-        raise ValueError
-    if not len(arr1):
-        return arr1
-    arr1, arr2 = arr2, arr1  # TODO fix
-    coords1 = coordinate_array(arr1)
-    coords2 = coordinate_array(arr2)
-    dx = coords2[:, 0] - coords1[:, 0]
-    dy = coords2[:, 1] - coords1[:, 1]
-    len_xy = np.sqrt((dx**2.0) + (dy**2.0))
-    x = coords1[:, 0] + (coords1[:, 0] - coords2[:, 0]) / len_xy * distance
-    y = coords1[:, 1] + (coords1[:, 1] - coords2[:, 1]) / len_xy * distance
+    coords_up_here = (
+        pd.Series(_coords_to_rings(coords, indices, geoms))
+        .loc[lambda x: x.notna()]
+        .values
+    )
+    coords_up_here = to_gdf(polygons(coords_up_here), 25833)
-    new_points = np.array([None for _ in range(len(arr1))])
-    new_points[~np.isnan(x)] = shapely.points(x[~np.isnan(x)], y[~np.isnan(x)])
+    explore(
+        coords=to_gdf(shapely.points(coords), 25833).assign(
+            idx=indices, wkt=lambda x: [g.wkt for g in x.geometry]
+        ),
+        anchors=to_gdf(shapely.points(anchors), 25833).assign(
+            idx=anchor_indices, wkt=lambda x: [g.wkt for g in x.geometry]
+        ),  # , straight_distances=straight_distances, distances_to_lines=distances_to_lines),
+        coords_up_here000=coords_up_here000,
+        coords_up_here=coords_up_here,
+        geoms=to_gdf(polygons(geoms), 25833),
+        msk=to_gdf(shapely.points(mask_coords), 25833).assign(
+            was_midpoint_mask=was_midpoint_mask
+        ),
+        # center=_DEBUG_CONFIG["center"],
+    )
-    new_points[~np.isnan(x)] = make_lines_between_points(
-        arr2[~np.isnan(x)], new_points[~np.isnan(x)]
+    coords, indices = _snap_to_anchors(
+        coords,
+        indices,
+        anchors,
+        anchor_indices,
+        mask_coords,
+        mask_indices,
+        was_midpoint,
+        was_midpoint_anchors,
+        tolerance + PRECISION * 100,
     )
-    return new_points
+    indices = np.array(indices)
+    coords = np.array(coords)
+    indices = indices[coords[:, 0] != np.inf]
+    coords = coords[coords[:, 0] != np.inf]
+    # coords_up_here111 = (
+    #     pd.Series(_coords_to_rings(coords, indices, geoms))
+    #     .loc[lambda x: x.notna()]
+    #     .values
+    # )
+    # coords_up_here111 = to_gdf(polygons(coords_up_here111), 25833)
+    # if 0:
+    #     # coords = get_coordinates(points.geometry.values)
+    #     # indices = points["_geom_idx"].values
+    #     is_anchor = np.full(len(coords), False)
+    #     coords, indices, is_anchor = _remove_duplicate_points(
+    #         coords, indices, is_anchor
+    #     )
+    #     coords, indices = _add_last_points_to_end(coords, indices)
+    #     coords = np.array(coords)
+    #     indices = np.array(indices)
+    #     is_anchor = np.full(len(coords), False)
+    #     coords, indices, is_anchor = _remove_duplicate_points(
+    #         coords, indices, is_anchor
+    #     )
+    #     coords = np.array(coords)
+    #     indices = np.array(indices)
+    # display(pd.DataFrame(coords, index=indices, columns=[*"xy"]))
+    # if 0:
+    #     mask_coords, mask_indices, , dist_to_closest_geom = (
+    #         _add_midpoints_to_segments_numba(
+    #             mask_coords,
+    #             mask_indices,
+    #             # coords,
+    #             get_coordinates(
+    #                 sfilter(
+    #                     GeoSeries(shapely.points(coords)).drop_duplicates(),
+    #                     original_mask_buffered,
+    #                 )
+    #             ),
+    #             tolerance * 1.1,
+    #         )
+    #     )
+    #     mask_coords = np.array(mask_coords)
+    #     mask_indices = np.array(mask_indices)
+    #     anchors, anchor_indices, is_anchor = _build_anchors(
+    #         coords,
+    #         indices,
+    #         mask_coords,
+    #         mask_indices,
+    #         # is_anchor,
+    #         tolerance + PRECISION,  # * 100
+    #     )
+    #     anchors = np.array(anchors)
+    #     anchor_indices = np.array(anchor_indices)
+    #     coords, indices, was_midpoint, _ = _add_midpoints_to_segments_numba(
+    #         coords,
+    #         indices,
+    #         anchors,
+    #         tolerance * 1.1,  # + PRECISION * 100,
+    #         # GeoDataFrame({"geometry": shapely.points(coords), "_geom_idx": indices}),
+    #         # GeoDataFrame({"geometry": shapely.points(anchors)}),
+    #         # tolerance,  # + PRECISION * 100,
+    #         # None,
+    #     )
+    #     print(len(coords), len(anchors), len(was_midpoint))
+    #     indices = np.array(indices)
+    #     coords = np.array(coords)
+    #     was_midpoint = np.array(was_midpoint)
+    #     coords, is_snapped_to = _snap_to_anchors(
+    #         coords,
+    #         indices,
+    #         anchors,
+    #         anchor_indices,
+    #         mask_coords,
+    #         mask_indices,
+    #         was_midpoint,
+    #         was_midpoint_anchors,
+    #         tolerance + PRECISION * 20,
+    #     )
+    #     indices = np.array(indices)
+    #     coords = np.array(coords)
+    #     indices = indices[coords[:, 0] != np.inf]
+    #     coords = coords[coords[:, 0] != np.inf]
+    # coords = np.array(coords)
+    # indices = np.array(indices)
+    coords_down_here = (
+        pd.Series(_coords_to_rings(coords, indices, geoms))
+        .loc[lambda x: x.notna()]
+        .values
+    )
+    lines_down_here = to_gdf(shapely.buffer(coords_down_here, 0.1), 25833)
+    coords_down_here = to_gdf(polygons(coords_down_here), 25833)
+    try:
+        explore(
+            coords=to_gdf(shapely.points(coords), 25833).assign(
+                idx=indices, wkt=lambda x: [g.wkt for g in x.geometry]
+            ),
+            anchors=to_gdf(shapely.points(anchors), 25833).assign(
+                idx=anchor_indices, wkt=lambda x: [g.wkt for g in x.geometry]
+            ),  # , straight_distances=straight_distances, distances_to_lines=distances_to_lines),
+            coords_up_here000=coords_up_here000,
+            coords_up_here=coords_up_here,
+            coords_down_here=coords_down_here,
+            lines_down_here=lines_down_here,
+            geoms=to_gdf(polygons(geoms), 25833),
+            msk=to_gdf(shapely.points(mask_coords), 25833).assign(
+                was_midpoint_mask=was_midpoint_mask
+            ),
+        )
-def make_lines_between_points(
-    arr1: NDArray[Point], arr2: NDArray[Point]
-) -> NDArray[LineString]:
-    if arr1.shape != arr2.shape:
-        raise ValueError(
-            f"Arrays must have equal shape. Got {arr1.shape} and {arr2.shape}"
+        explore(
+            coords=to_gdf(shapely.points(coords), 25833).assign(
+                idx=indices, wkt=lambda x: [g.wkt for g in x.geometry]
+            ),
+            anchors=to_gdf(shapely.points(anchors), 25833).assign(
+                idx=anchor_indices, wkt=lambda x: [g.wkt for g in x.geometry]
+            ),  # , straight_distances=straight_distances, distances_to_lines=distances_to_lines),
+            coords_up_here000=coords_up_here000,
+            coords_up_here=coords_up_here,
+            coords_down_here=coords_down_here,
+            lines_down_here=lines_down_here,
+            geoms=to_gdf(polygons(geoms), 25833),
+            msk=to_gdf(shapely.points(mask_coords), 25833).assign(
+                was_midpoint_mask=was_midpoint_mask
+            ),
+            center=(5.37707159, 59.01065276, 1),
         )
-    coords: pd.DataFrame = pd.concat(
-        [
-            pd.DataFrame(get_coordinates(arr1), columns=["x", "y"]),
-            pd.DataFrame(get_coordinates(arr2), columns=["x", "y"]),
-        ]
-    ).sort_index()
-    return linestrings(coords.values, indices=coords.index)
-def get_line_segments(lines: GeoDataFrame | GeoSeries) -> GeoDataFrame:
-    assert lines.index.is_unique
-    if isinstance(lines, GeoDataFrame):
-        geom_col = lines._geometry_column_name
-        multipoints = lines.assign(
-            **{geom_col: extract_unique_points(lines.geometry.values)}
+        explore(
+            coords=to_gdf(shapely.points(coords), 25833).assign(
+                idx=indices, wkt=lambda x: [g.wkt for g in x.geometry]
+            ),
+            anchors=to_gdf(shapely.points(anchors), 25833).assign(
+                idx=anchor_indices, wkt=lambda x: [g.wkt for g in x.geometry]
+            ),  # , straight_distances=straight_distances, distances_to_lines=distances_to_lines),
+            coords_up_here000=coords_up_here000,
+            coords_up_here=coords_up_here,
+            coords_down_here=coords_down_here,
+            lines_down_here=lines_down_here,
+            geoms=to_gdf(polygons(geoms), 25833),
+            msk=to_gdf(shapely.points(mask_coords), 25833).assign(
+                was_midpoint_mask=was_midpoint_mask
+            ),
+            center=(5.37419946, 59.01138812, 15),
         )
-        segments = multipoints_to_line_segments(multipoints.geometry)
-        return segments.join(lines.drop(columns=geom_col))
-    multipoints = GeoSeries(extract_unique_points(lines.values), index=lines.index)
+        explore(
+            coords=to_gdf(shapely.points(coords), 25833).assign(
+                idx=indices, wkt=lambda x: [g.wkt for g in x.geometry]
+            ),
+            anchors=to_gdf(shapely.points(anchors), 25833).assign(
+                idx=anchor_indices, wkt=lambda x: [g.wkt for g in x.geometry]
+            ),  # , straight_distances=straight_distances, distances_to_lines=distances_to_lines),
+            coords_up_here000=coords_up_here000,
+            coords_up_here=coords_up_here,
+            lines_down_here=lines_down_here,
+            coords_down_here=coords_down_here,
+            geoms=to_gdf(polygons(geoms), 25833),
+            msk=to_gdf(shapely.points(mask_coords), 25833).assign(
+                was_midpoint_mask=was_midpoint_mask
+            ),
+            center=(5.38389153, 59.00548223, 1),
+        )
+        explore(
+            coords=to_gdf(shapely.points(coords), 25833).assign(
+                idx=indices, wkt=lambda x: [g.wkt for g in x.geometry]
+            ),
+            anchors=to_gdf(shapely.points(anchors), 25833).assign(
+                idx=anchor_indices, wkt=lambda x: [g.wkt for g in x.geometry]
+            ),  # , straight_distances=straight_distances, distances_to_lines=distances_to_lines),
+            coords_up_here000=coords_up_here000,
+            coords_up_here=coords_up_here,
+            coords_down_here=coords_down_here,
+            lines_down_here=lines_down_here,
+            geoms=to_gdf(polygons(geoms), 25833),
+            msk=to_gdf(shapely.points(mask_coords), 25833).assign(
+                was_midpoint_mask=was_midpoint_mask
+            ),
+            center=_DEBUG_CONFIG["center"],
+        )
-    return multipoints_to_line_segments(multipoints)
+    except GEOSException as e:
+        print(e)
+    return _coords_to_rings(coords, indices, geoms)
-def multipoints_to_line_segments(multipoints: GeoSeries) -> GeoDataFrame:
-    if not len(multipoints):
-        return GeoDataFrame({"geometry": multipoints}, index=multipoints.index)
-    try:
-        crs = multipoints.crs
-    except AttributeError:
-        crs = None
+def _coords_to_rings(
+    coords: NDArray[np.float64],
+    indices: NDArray[np.int32],
+    original_geoms: NDArray[LinearRing],
+) -> NDArray[LinearRing]:
+    df = pd.DataFrame({"x": coords[:, 0], "y": coords[:, 1]}, index=indices).loc[
+        lambda x: x.groupby(level=0).size() > 2
+    ]
+    to_int_idx = {idx: i for i, idx in enumerate(df.index.unique())}
+    rings = pd.Series(
+        linearrings(df.values, indices=df.index.map(to_int_idx)),
+        index=df.index.unique(),
+    )
-    try:
-        point_df = multipoints.explode(index_parts=False)
-    except AttributeError:
-        points, indices = get_parts(multipoints, return_index=True)
-        if isinstance(multipoints.index, pd.MultiIndex):
-            indices = pd.MultiIndex.from_arrays(indices, names=multipoints.index.names)
+    missing = pd.Series(
+        index=pd.Index(range(len(original_geoms))).difference(rings.index)
+    )
-        point_df = pd.DataFrame({"geometry": GeometryArray(points)}, index=indices)
+    return pd.concat([rings, missing]).sort_index().values
-    try:
-        point_df = point_df.to_frame("geometry")
-    except AttributeError:
-        pass
-    point_df["next"] = point_df.groupby(level=0)["geometry"].shift(-1)
+@numba.njit
+def _add_midpoints_to_segments_numba(
+    geoms: NDArray[np.float64],
+    indices: NDArray[np.int32],
+    anchors: NDArray[np.float64],
+    tolerance: int | float,
+):
+    n_minus_1 = len(geoms) - 1
+    out_coords = []
+    out_indices = []
+    was_midpoint = []
+    out_distances = []
+    for i in range(len(geoms)):
+        index = indices[i]
+        is_last = i == n_minus_1 or index != indices[i + 1]
+        if is_last:
+            continue
+        geom0 = geoms[i]
+        geom1 = geoms[i + 1]
+        closest_points = np.full((len(anchors) + 2, 2), np.inf)
+        these_out_distances = np.full(len(anchors) + 2, np.inf)
+        closest_points[-1] = geom1
+        closest_points[-2] = geom0
+        these_out_distances[-1] = 0
+        these_out_distances[-2] = 0
+        segment_vector = geom1 - geom0
+        segment_length_squared = np.dot(segment_vector, segment_vector)
+        for j in range(len(anchors)):
+            anchor = anchors[j]
+            if segment_length_squared == 0:
+                closest_point = geom0
+            else:
+                point_vector = anchor - geom0
+                factor = np.dot(point_vector, segment_vector) / segment_length_squared
+                factor = max(0, min(1, factor))
+                if factor < 1e-6:
+                    closest_point = geom0
+                elif factor > 1 - 1e-6:
+                    closest_point = geom1
+                else:
+                    closest_point = geom0 + factor * segment_vector
+            dist = np.linalg.norm(anchor - closest_point)
+            if dist <= tolerance and dist > PRECISION:
+                closest_points[j] = closest_point
+                these_out_distances[j] = dist
+            # if (
+            #     closest_point[0] == 905049.3317999999
+            # ):  # and int(closest_point[1]) == 7877676:
+            #     print()
+            #     for xxx in closest_point:
+            #         print(xxx)
+            #     for xxx in geom0:
+            #         print(xxx)
+            #     for xxx in geom1:
+            #         print(xxx)
+            #     for xxx, yyy in locals().items():
+            #         print(xxx, yyy)
+            #     ssss
+        not_inf = closest_points[:, 0] != np.inf
+        arr = closest_points[not_inf]
+        these_out_distances = these_out_distances[not_inf]
+        # sort by first and second column
+        # could have used np.lexsort, but it's not numba compatible
+        arr = arr[np.argsort(arr[:, 0])]
+        any_unsorted = True
+        while any_unsorted:
+            any_unsorted = False
+            for i in range(len(arr) - 1):
+                if arr[i, 0] < arr[i + 1, 0]:
+                    continue
+                if arr[i, 1] > arr[i + 1, 1]:
+                    copied = arr[i].copy()
+                    arr[i] = arr[i + 1]
+                    arr[i + 1] = copied
+                    copied = these_out_distances[i]
+                    these_out_distances[i] = these_out_distances[i + 1]
+                    these_out_distances[i + 1] = copied
+                    any_unsorted = True
+        with_midpoints = []
+        these_out_distances2 = []
+        first_is_added = False
+        last_is_added = False
+        is_reverse = False
+        for y in range(len(arr)):
+            point = arr[y]
+            if (
+                not first_is_added
+                and np.sqrt((geom0[0] - point[0]) ** 2 + (geom0[1] - point[1]) ** 2)
+                == 0
+            ):
+                first_is_added = True
+                with_midpoints.append(point)
+                these_out_distances2.append(these_out_distances[y])
+                if last_is_added:
+                    is_reverse = True
+                    break
+                else:
+                    continue
+            elif (
+                not last_is_added
+                and np.sqrt((geom1[0] - point[0]) ** 2 + (geom1[1] - point[1]) ** 2)
+                == 0
+            ):
+                last_is_added = True
+                with_midpoints.append(point)
+                these_out_distances2.append(these_out_distances[y])
+                if not first_is_added:
+                    is_reverse = True
+                    continue
+                else:
+                    with_midpoints.append(point)
+                    break
+            if first_is_added or last_is_added:
+                with_midpoints.append(point)
+                these_out_distances2.append(these_out_distances[y])
+            # these_out_distances2.append(these_out_distances[y])
+            # these_anchors2.append(these_anchors[y])
+        # with_midpoints = np.array(with_midpoints)
+        if is_reverse:
+            with_midpoints = with_midpoints[::-1]
+            these_out_distances2 = these_out_distances2[::-1]
+            # these_anchors2 = these_anchors2[::-1]
+        # print(index, is_reverse, arr)
+        # print(with_midpoints)
+        # print(to_gdf(LineString([geom0, geom1]), 25833))
+        # print(to_gdf(shapely.points(closest_points)))
+        # explore(
+        #     to_gdf(shapely.points(with_midpoints)).assign(
+        #         idx=lambda x: range(len(x))
+        #     ),
+        #     "idx",
+        # )
+        # explore(
+        #     l=to_gdf(LineString([geom0, geom1]), 25833),
+        #     # anchors=to_gdf(shapely.points(anchors)),
+        #     # anchors_in_dist=to_gdf(shapely.points(these_anchors)),
+        #     # closest_points=to_gdf(shapely.points(closest_points)),
+        #     with_midpoints=to_gdf(shapely.points(with_midpoints)),
+        #     anchors=to_gdf(shapely.points(anchors)),
+        #     arr=to_gdf(shapely.points(arr)),
+        #     # center=(-0.07034028, 1.80337784, 0.4),
+        # )
+        with_midpoints_no_dups = []
+        these_out_distances_no_dups = []
+        for y2 in range(len(with_midpoints)):
+            point = with_midpoints[y2]
+            should_be_added = True
+            for z in range(len(with_midpoints_no_dups)):
+                out_point = with_midpoints_no_dups[z]
+                if (
+                    np.sqrt(
+                        (point[0] - out_point[0]) ** 2 + (out_point[1] - point[1]) ** 2
+                    )
+                    == 0
+                ):
+                    should_be_added = False
+                    break
+            if should_be_added:
+                with_midpoints_no_dups.append(point)
+                these_out_distances_no_dups.append(these_out_distances2[y2])
+        n_minus_1_midpoints = len(with_midpoints_no_dups) - 1
+        for y3 in range(len(with_midpoints_no_dups)):
+            point = with_midpoints_no_dups[y3]
+            should_be_added = True
+            for z2 in np.arange(len(out_coords))[::-1]:
+                if out_indices[z2] != index:
+                    continue
+                out_point = out_coords[z2]
+                if (
+                    np.sqrt(
+                        (point[0] - out_point[0]) ** 2 + (out_point[1] - point[1]) ** 2
+                    )
+                    == 0
+                ):
+                    should_be_added = False
+                    break
+            if not should_be_added:
+                continue
+            out_coords.append(point)
+            out_indices.append(index)
+            out_distances.append(these_out_distances_no_dups[y3])
+            if y3 == 0 or y3 == n_minus_1_midpoints:
+                was_midpoint.append(False)
+            else:
+                was_midpoint.append(True)
-    first_points = point_df.loc[lambda x: ~x.index.duplicated(), "geometry"]
-    is_last_point = point_df["next"].isna()
+    return (
+        out_coords,
+        out_indices,
+        was_midpoint,
+        out_distances,
+    )
-    point_df.loc[is_last_point, "next"] = first_points
-    assert point_df["next"].notna().all()
-    point_df["geometry"] = [
-        LineString([x1, x2])
-        for x1, x2 in zip(point_df["geometry"], point_df["next"], strict=False)
-    ]
-    return GeoDataFrame(point_df.drop(columns=["next"]), geometry="geometry", crs=crs)
+def _separate_single_neighbored_from_multi_neighoured_geometries(
+    gdf: GeoDataFrame, neighbors: GeoDataFrame
+) -> tuple[GeoDataFrame, GeoDataFrame]:
+    """Split GeoDataFrame in two: those with 0 or 1 neighbors and those with 2 or more.
+    Because single-neighbored polygons does not need splitting.
+    """
+    tree = STRtree(neighbors.geometry.values)
+    left, right = tree.query(gdf.geometry.values, predicate="intersects")
+    pairs = pd.Series(right, index=left)
+    has_more_than_one_neighbor = (
+        pairs.groupby(level=0).size().loc[lambda x: x > 1].index
+    )
-def points_to_line_segments(points: GeoDataFrame) -> GeoDataFrame:
-    points = points.copy()
-    points["next"] = points.groupby(level=0)["geometry"].shift(-1)
+    more_than_one_neighbor = gdf.iloc[has_more_than_one_neighbor]
+    one_or_zero_neighbors = gdf.iloc[
+        pd.Index(range(len(gdf))).difference(has_more_than_one_neighbor)
+    ]
-    first_points = points.loc[lambda x: ~x.index.duplicated(), "geometry"]
-    is_last_point = points["next"].isna()
+    return one_or_zero_neighbors, more_than_one_neighbor
-    points.loc[is_last_point, "next"] = first_points
-    assert points["next"].notna().all()
-    points["geometry"] = [
-        LineString([x1, x2])
-        for x1, x2 in zip(points["geometry"], points["next"], strict=False)
-    ]
-    return GeoDataFrame(
-        points.drop(columns=["next"]), geometry="geometry", crs=points.crs
-    )
+def split_and_eliminate_by_longest(
+    gdf: GeoDataFrame | tuple[GeoDataFrame],
+    to_eliminate: GeoDataFrame,
+    tolerance: float | int,
+    ignore_index: bool = False,
+    **kwargs,
+) -> tuple[GeoDataFrame]:
+    if isinstance(gdf, (list, tuple)):
+        # concat, then break up the dataframes in the end
+        was_multiple_gdfs = True
+        original_cols = [df.columns for df in gdf]
+        gdf = pd.concat(df.assign(**{"_df_idx": i}) for i, df in enumerate(gdf))
+    else:
+        was_multiple_gdfs = False
+    if 0:
+        to_eliminate.geometry = to_eliminate.buffer(
+            -PRECISION,
+            resolution=1,
+            join_style=2,
+        ).buffer(
+            PRECISION,
+            resolution=1,
+            join_style=2,
+        )
+        to_eliminate = to_eliminate.loc[lambda x: ~x.is_empty]
+    # now to split polygons to be eliminated to avoid weird shapes
+    # split only the polygons with multiple neighbors
+    single_neighbored, multi_neighbored = (
+        _separate_single_neighbored_from_multi_neighoured_geometries(to_eliminate, gdf)
+    )
+    multi_neighbored = split_by_neighbors(multi_neighbored, gdf, tolerance=tolerance)
+    to_eliminate = pd.concat([multi_neighbored, single_neighbored])
+    gdf, isolated = eliminate_by_longest(
+        gdf, to_eliminate, ignore_index=ignore_index, **kwargs
+    )
-def explore_geosexception(
-    e: GEOSException, *gdfs: GeoDataFrame, logger: Any | None = None
-) -> None:
-    """Extract the coordinates of a GEOSException and show in map.
+    if not was_multiple_gdfs:
+        return gdf, isolated
-    Args:
-        e: The exception thrown by a GEOS operation, which potentially contains coordinates information.
-        *gdfs: One or more GeoDataFrames to display for context in the map.
-        logger: An optional logger to log the error with visualization. If None, uses standard output.
+    gdfs = ()
+    for i, cols in enumerate(original_cols):
+        df = gdf.loc[gdf["_df_idx"] == i, cols]
+        gdfs += (df,)
+    gdfs += (isolated,)
-    """
-    from ..maps.maps import Explore
-    from ..maps.maps import explore
-    pattern = r"(\d+\.\d+)\s+(\d+\.\d+)"
-    matches = re.findall(pattern, str(e))
-    coords_in_error_message = [(float(match[0]), float(match[1])) for match in matches]
-    exception_point = to_gdf(coords_in_error_message, crs=gdfs[0].crs)
-    if len(exception_point):
-        exception_point["wkt"] = exception_point.to_wkt()
-        if logger:
-            logger.error(
-                e, Explore(exception_point, *gdfs, mask=exception_point.buffer(100))
-            )
-        else:
-            explore(exception_point, *gdfs, mask=exception_point.buffer(100))
-    else:
-        if logger:
-            logger.error(e, Explore(*gdfs))
-        else:
-            explore(*gdfs)
+    return gdfs

ssb-sgis 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl

ssb-sgis 1.0.2py3-none-any.whl → 1.0.4py3-none-any.whl