PyPI - ssb-sgis - Versions diffs - 0.3.8__py3-none-any.whl → 0.3.9__py3-none-any.whl - Mend

ssb-sgis 0.3.8py3-none-any.whl → 0.3.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

sgis/__init__.py +5 -2
sgis/geopandas_tools/buffer_dissolve_explode.py +13 -9
sgis/geopandas_tools/centerlines.py +110 -47
sgis/geopandas_tools/cleaning.py +331 -0
sgis/geopandas_tools/conversion.py +9 -3
sgis/geopandas_tools/duplicates.py +67 -49
sgis/geopandas_tools/general.py +15 -1
sgis/geopandas_tools/neighbors.py +12 -0
sgis/geopandas_tools/overlay.py +26 -17
sgis/geopandas_tools/polygon_operations.py +281 -100
sgis/geopandas_tools/polygons_as_rings.py +72 -10
sgis/geopandas_tools/sfilter.py +8 -8
sgis/helpers.py +20 -3
sgis/io/dapla_functions.py +28 -6
sgis/io/write_municipality_data.py +11 -5
sgis/maps/examine.py +10 -7
sgis/maps/explore.py +102 -25
sgis/maps/map.py +32 -6
sgis/maps/maps.py +40 -58
sgis/maps/tilesources.py +61 -0
sgis/networkanalysis/closing_network_holes.py +89 -62
sgis/networkanalysis/cutting_lines.py +1 -1
sgis/networkanalysis/nodes.py +1 -1
sgis/networkanalysis/traveling_salesman.py +8 -4
sgis/parallel/parallel.py +63 -10
sgis/raster/raster.py +29 -27
{ssb_sgis-0.3.8.dist-info → ssb_sgis-0.3.9.dist-info}/METADATA +4 -1
ssb_sgis-0.3.9.dist-info/RECORD +59 -0
{ssb_sgis-0.3.8.dist-info → ssb_sgis-0.3.9.dist-info}/WHEEL +1 -1
sgis/geopandas_tools/snap_polygons.py +0 -0
ssb_sgis-0.3.8.dist-info/RECORD +0 -58
{ssb_sgis-0.3.8.dist-info → ssb_sgis-0.3.9.dist-info}/LICENSE +0 -0

sgis/geopandas_tools/cleaning.py ADDED Viewed

@@ -0,0 +1,331 @@
+import warnings
+import pandas as pd
+from geopandas import GeoDataFrame, GeoSeries
+from numpy.typing import NDArray
+from shapely import (
+    extract_unique_points,
+    get_coordinates,
+    get_exterior_ring,
+    linearrings,
+    make_valid,
+    polygons,
+)
+from shapely.geometry import LinearRing
+from ..networkanalysis.closing_network_holes import get_angle
+from .buffer_dissolve_explode import buff, dissexp
+from .conversion import coordinate_array, to_geoseries
+from .duplicates import get_intersections, update_geometries
+from .general import sort_large_first, sort_long_first
+from .geometry_types import get_geom_type
+from .overlay import clean_overlay
+from .polygon_operations import close_all_holes, close_thin_holes, get_gaps
+from .polygons_as_rings import PolygonsAsRings
+from .sfilter import sfilter, sfilter_inverse
+warnings.simplefilter(action="ignore", category=UserWarning)
+warnings.simplefilter(action="ignore", category=RuntimeWarning)
+PRECISION = 1e-4
+BUFFER_RES = 50
+def get_angle_between_indexed_points(point_df: GeoDataFrame):
+    """ "Get angle difference between the two lines"""
+    point_df["next"] = point_df.groupby(level=0)["geometry"].shift(-1)
+    notna = point_df["next"].notna()
+    this = coordinate_array(point_df.loc[notna, "geometry"].values)
+    next_ = coordinate_array(point_df.loc[notna, "next"].values)
+    point_df.loc[notna, "angle"] = get_angle(this, next_)
+    point_df["prev_angle"] = point_df.groupby(level=0)["angle"].shift(1)
+    point_df["angle_diff"] = point_df["angle"] - point_df["prev_angle"]
+    return point_df
+def remove_spikes(gdf: GeoDataFrame, tolerance: int | float) -> GeoDataFrame:
+    """Remove thin spikes in polygons.
+    Note that this function might be slow. Should only be used if nessecary.
+    Args:
+        gdf: GeoDataFrame of polygons
+        tolerance: distance (usually meters) used as the minimum thickness
+            for polygons to be eliminated. Any spike thinner than the tolerance
+            will be removed.
+    Returns:
+        A GeoDataFrame of polygons without spikes thinner.
+    """
+    def _remove_spikes(geoms: NDArray[LinearRing]) -> NDArray[LinearRing]:
+        if not len(geoms):
+            return geoms
+        geoms = to_geoseries(geoms).reset_index(drop=True)
+        points = (
+            extract_unique_points(geoms).explode(index_parts=False).to_frame("geometry")
+        )
+        points = get_angle_between_indexed_points(points)
+        indices_with_spikes = points[
+            lambda x: (x["angle_diff"] >= 180) & (x["angle_diff"] < 180.01)
+        ].index.unique()
+        rings_with_spikes = geoms[geoms.index.isin(indices_with_spikes)]
+        rings_without_spikes = geoms[~geoms.index.isin(indices_with_spikes)]
+        def to_buffered_rings_without_spikes(x):
+            polys = GeoSeries(make_valid(polygons(get_exterior_ring(x))))
+            return (
+                polys.buffer(-tolerance, resolution=BUFFER_RES)
+                .explode(index_parts=False)
+                .pipe(close_all_holes)
+                .pipe(get_exterior_ring)
+                .buffer(tolerance * 10)
+            )
+        buffered = to_buffered_rings_without_spikes(
+            rings_with_spikes.buffer(tolerance / 2, resolution=BUFFER_RES)
+        )
+        points_without_spikes = (
+            extract_unique_points(rings_with_spikes)
+            .explode(index_parts=False)
+            .loc[lambda x: x.index.isin(sfilter(x, buffered).index)]
+        )
+        # linearrings require at least 4 coordinate pairs, or three unique
+        points_without_spikes = points_without_spikes.loc[
+            lambda x: x.groupby(level=0).size() >= 3
+        ]
+        # need an index from 0 to n-1 in 'linearrings'
+        to_int_index = {
+            ring_idx: i
+            for i, ring_idx in enumerate(sorted(set(points_without_spikes.index)))
+        }
+        int_indices = points_without_spikes.index.map(to_int_index)
+        as_lines = pd.Series(
+            linearrings(
+                get_coordinates(points_without_spikes.geometry.values),
+                indices=int_indices,
+            ),
+            index=points_without_spikes.index.unique(),
+        )
+        as_lines = pd.concat([as_lines, rings_without_spikes])
+        # the missing polygons are thin and/or spiky. Let's remove them
+        missing = geoms.loc[~geoms.index.isin(as_lines.index)]
+        missing = pd.Series(
+            [None] * len(missing),
+            index=missing.index.values,
+        )
+        return pd.concat([as_lines, missing]).sort_index()
+    gdf.geometry = (
+        PolygonsAsRings(gdf.geometry).apply_numpy_func(_remove_spikes).to_numpy()
+    )
+    return gdf
+def coverage_clean(
+    gdf: GeoDataFrame,
+    tolerance: int | float,
+    duplicate_action: str = "fix",
+    remove_isolated: bool = False,
+) -> GeoDataFrame:
+    """Fix thin gaps, holes, slivers and double surfaces.
+    Rules:
+    - Holes (interiors) thinner than the tolerance are closed.
+    - Gaps between polygons are filled if thinner than the tolerance.
+    - Sliver polygons thinner than the tolerance are eliminated
+    into the neighbor polygon with the longest shared border.
+    - Double surfaces thinner than the tolerance are eliminated.
+    If duplicate_action is "fix", thicker double surfaces will
+    be updated from top to bottom of the GeoDataFrame's rows.
+    - Line and point geometries are removed.
+    - MultiPolygons are exploded to Polygons.
+    - Index is reset.
+    Args:
+        gdf: GeoDataFrame to be cleaned.
+        tolerance: distance (usually meters) used as the minimum thickness
+            for polygons to be eliminated. Any gap, hole, sliver or double
+            surface that are empty after a negative buffer of tolerance / 2
+            are eliminated into the neighbor with the longest shared border.
+        duplicate action: Either "fix", "error" or "ignore".
+            If "fix" (default), double surfaces thicker than the
+            tolerance will be updated from top to bottom (function update_geometries)
+            and then dissolved into the neighbor polygon with the longest shared border.
+            If "error", an Exception is raised if there are any double surfaces thicker
+            than the tolerance. If "ignore", double surfaces are kept as is.
+    Returns:
+        A GeoDataFrame with cleaned polygons.
+    """
+    _cleaning_checks(gdf, tolerance, duplicate_action)
+    if not gdf.index.is_unique:
+        gdf = gdf.reset_index(drop=True)
+    gdf = close_thin_holes(gdf, tolerance)
+    gaps = get_gaps(gdf, include_interiors=True)
+    double = get_intersections(gdf)
+    double["_double_idx"] = range(len(double))
+    gdf, slivers = split_out_slivers(gdf, tolerance)
+    thin_gaps_and_double = pd.concat([gaps, double]).loc[
+        lambda x: x.buffer(-tolerance / 2).is_empty
+    ]
+    all_are_thin = double["_double_idx"].isin(thin_gaps_and_double["_double_idx"]).all()
+    if not all_are_thin and duplicate_action == "fix":
+        gdf, thin_gaps_and_double = _properly_fix_duplicates(
+            gdf, double, slivers, thin_gaps_and_double, tolerance
+        )
+        # gaps = pd.concat([gaps, more_gaps], ignore_index=True)
+        # double = pd.concat([double, more_double], ignore_index=True)
+    elif not all_are_thin and duplicate_action == "error":
+        raise ValueError("Large double surfaces.")
+    to_eliminate = pd.concat([thin_gaps_and_double, slivers], ignore_index=True).loc[
+        lambda x: ~x.buffer(-PRECISION / 10).is_empty
+    ]
+    to_eliminate["_eliminate_idx"] = range(len(to_eliminate))
+    gdf["_poly_idx"] = range(len(gdf))
+    gdf_geoms_idx = gdf[["_poly_idx", "geometry"]]
+    joined = to_eliminate.sjoin(gdf_geoms_idx, how="left")
+    isolated = joined[lambda x: x["_poly_idx"].isna()]
+    intersecting = joined[lambda x: x["_poly_idx"].notna()]
+    poly_idx_mapper: pd.Series = (
+        clean_overlay(
+            intersecting[["_eliminate_idx", "geometry"]],
+            buff(gdf_geoms_idx, tolerance, resolution=BUFFER_RES),
+            geom_type="polygon",
+        )
+        .pipe(sort_long_first)
+        .drop_duplicates("_eliminate_idx")
+        .set_index("_eliminate_idx")["_poly_idx"]
+    )
+    intersecting["_poly_idx"] = intersecting["_eliminate_idx"].map(poly_idx_mapper)
+    without_double = update_geometries(intersecting).drop(
+        columns=["_eliminate_idx", "_double_idx", "index_right"]
+    )
+    cleaned = (
+        dissexp(pd.concat([gdf, without_double]), by="_poly_idx", aggfunc="first")
+        .reset_index(drop=True)
+        .loc[lambda x: ~x.buffer(-PRECISION / 10).is_empty]
+    )
+    if not remove_isolated:
+        cleaned = pd.concat(
+            [
+                cleaned,
+                isolated.drop(
+                    columns=[
+                        "_double_idx",
+                        "_eliminate_idx",
+                        "_poly_idx",
+                        "index_right",
+                    ]
+                ),
+            ]
+        )
+    missing_indices: pd.Index = sfilter_inverse(
+        gdf.representative_point(), cleaned
+    ).index
+    missing = clean_overlay(
+        gdf.loc[missing_indices].drop(columns="_poly_idx"),
+        cleaned,
+        how="difference",
+        geom_type="polygon",
+    )
+    return pd.concat([cleaned, missing], ignore_index=True)
+def _properly_fix_duplicates(gdf, double, slivers, thin_gaps_and_double, tolerance):
+    for _ in range(4):
+        gdf = _dissolve_thick_double_and_update(gdf, double, thin_gaps_and_double)
+        gdf, more_slivers = split_out_slivers(gdf, tolerance)
+        slivers = pd.concat([slivers, more_slivers], ignore_index=True)
+        gaps = get_gaps(gdf, include_interiors=True)
+        double = get_intersections(gdf)
+        double["_double_idx"] = range(len(double))
+        thin_gaps_and_double = pd.concat([gaps, double]).loc[
+            lambda x: x.buffer(-tolerance / 2).is_empty
+        ]
+        all_are_thin = (
+            double["_double_idx"].isin(thin_gaps_and_double["_double_idx"]).all()
+        )
+        if all_are_thin:
+            return gdf, thin_gaps_and_double
+    not_thin = double[
+        lambda x: ~x["_double_idx"].isin(thin_gaps_and_double["_double_idx"]).all()
+    ]
+    raise ValueError("Failed to properly fix thick double surfaces", not_thin.geometry)
+def _dissolve_thick_double_and_update(gdf, double, thin_double):
+    large = (
+        double.loc[~double["_double_idx"].isin(thin_double["_double_idx"])]
+        .drop(columns="_double_idx")
+        .pipe(sort_large_first)
+        .pipe(update_geometries)
+    )
+    return (
+        clean_overlay(gdf, large, how="update")
+        .pipe(sort_large_first)
+        .pipe(update_geometries)
+    )
+def _cleaning_checks(gdf, tolerance, duplicate_action):
+    if not len(gdf) or not tolerance:
+        return gdf
+    if get_geom_type(gdf) != "polygon":
+        raise ValueError("Must be polygons.")
+    if tolerance < PRECISION:
+        raise ValueError(
+            f"'tolerance' must be larger than {PRECISION} to avoid "
+            "problems with floating point precision."
+        )
+    if duplicate_action not in ["fix", "error", "ignore"]:
+        raise ValueError("duplicate_action must be 'fix', 'error' or 'ignore'")
+def split_out_slivers(
+    gdf: GeoDataFrame | GeoSeries, tolerance: float | int
+) -> tuple[GeoDataFrame, GeoDataFrame] | tuple[GeoSeries, GeoSeries]:
+    is_sliver = gdf.buffer(-tolerance / 2).is_empty
+    slivers = gdf.loc[is_sliver]
+    gdf = gdf.loc[~is_sliver]
+    return gdf, slivers

sgis/geopandas_tools/conversion.py CHANGED Viewed

@@ -22,7 +22,12 @@ def to_geoseries(obj: Any, crs: Any | None = None) -> GeoSeries:
             pass
     try:
-        index = obj.index.values
+        if hasattr(obj.index, "values"):
+            # pandas objects
+            index = obj.index
+        else:
+            # list
+            index = None
     except AttributeError:
         index = None
@@ -296,9 +301,10 @@ def to_gdf(
     if geom_col in obj.keys():
         if isinstance(obj, pd.DataFrame):
             notna = obj[geom_col].notna()
-            obj.loc[notna, geom_col] = GeoSeries(
-                make_shapely_geoms(obj.loc[notna, geom_col]), index=index
+            obj.loc[notna, geom_col] = list(
+                make_shapely_geoms(obj.loc[notna, geom_col])
             )
+            obj[geom_col] = GeoSeries(obj[geom_col])
             return GeoDataFrame(obj, geometry=geom_col, crs=crs, **kwargs)
         if isinstance(obj[geom_col], Geometry):
             return GeoDataFrame(

sgis/geopandas_tools/duplicates.py CHANGED Viewed

@@ -14,9 +14,9 @@ from .overlay import clean_overlay
 def update_geometries(
     gdf: GeoDataFrame,
+    geom_type: str | None = None,
     keep_geom_type: bool = True,
     grid_size: int | None = None,
-    copy: bool = True,
 ) -> GeoDataFrame:
     """Puts geometries on top of each other rowwise.
@@ -29,9 +29,11 @@ def update_geometries(
             of intersection resulting in multiple geometry types or
             GeometryCollections. If False, return all resulting geometries
             (potentially mixed types).
+        geom_type: Optionally specify what geometry type to keep.,
+            if there are mixed geometry types. Must be either "polygon",
+            "line" or "point".
         grid_size: Precision grid size to round the geometries. Will use the highest
             precision of the inputs by default.
-        copy: Defaults to True.
     Example
     ------
@@ -78,56 +80,51 @@ def update_geometries(
     if len(gdf) <= 1:
         return gdf
-    df = pd.DataFrame(gdf, copy=copy)
-    unioned = Polygon()
-    out_rows, indices, geometries = [], [], []
-    if keep_geom_type:
+    if geom_type:
+        gdf = to_single_geom_type(gdf, geom_type)
+        keep_geom_type = True
+    elif keep_geom_type:
         geom_type = get_geom_type(gdf)
         if geom_type == "mixed":
             raise ValueError("Cannot have mixed geometries when keep_geom_type is True")
-    for i, row in df.iterrows():
-        geom = row.pop("geometry")
-        if any(geom.equals(geom2) for geom2 in geometries):
-            continue
-        try:
-            new = difference(geom, unioned, grid_size=grid_size)
-        except GEOSException:
-            try:
-                geom = make_valid(geom)
-                new = difference(geom, unioned, grid_size=grid_size)
-            except GEOSException:
-                unioned = to_single_geom_type(unioned, geom_type=geom_type)
-                new = difference(geom, unioned, grid_size=grid_size)
+    geom_col = gdf._geometry_column_name
+    index_mapper = {i: idx for i, idx in enumerate(gdf.index)}
+    gdf = gdf.reset_index(drop=True)
-        if not new:
-            continue
+    tree = STRtree(gdf.geometry.values)
+    left, right = tree.query(gdf.geometry.values, predicate="intersects")
+    indices = pd.Series(right, index=left).loc[lambda x: x.index > x.values]
+    # select geometries from 'right', index from 'left', dissolve by 'left'
+    erasers = (
+        pd.Series(gdf.geometry.loc[indices.values].values, index=indices.index)
+        .groupby(level=0)
+        .agg(unary_union)
+    )
-        try:
-            unioned = unary_union([new, unioned], grid_size=grid_size)
-        except GEOSException:
-            new = make_valid(new)
-            unioned = unary_union([new, unioned], grid_size=grid_size)
+    # match up the aggregated erasers by index
+    erased = difference(
+        gdf.geometry.loc[erasers.index],
+        erasers,
+        grid_size=grid_size,
+    )
-        unioned = make_valid(unioned)
+    gdf.loc[erased.index, geom_col] = erased
-        out_rows.append(row)
-        geometries.append(new)
-        indices.append(i)
+    gdf = gdf.loc[~gdf.is_empty]
-    out = GeoDataFrame(out_rows, geometry=geometries, index=indices, crs=gdf.crs)
+    gdf.index = gdf.index.map(index_mapper)
     if keep_geom_type:
-        out = to_single_geom_type(out, geom_type)
+        gdf = to_single_geom_type(gdf, geom_type)
-    return out
+    return gdf
-def get_intersections(gdf: GeoDataFrame, geom_type: str | None = None) -> GeoDataFrame:
+def get_intersections(
+    gdf: GeoDataFrame, geom_type: str | None = None, keep_geom_type: bool = True
+) -> GeoDataFrame:
     """Find geometries that intersect in a GeoDataFrame.
     Does an intersection with itself and keeps only the geometries that appear
@@ -140,6 +137,11 @@ def get_intersections(gdf: GeoDataFrame, geom_type: str | None = None) -> GeoDat
     Args:
         gdf: GeoDataFrame of polygons.
+        geom_type: Optionally specify which geometry type to keep.
+            Either "polygon", "line" or "point".
+        keep_geom_type: Whether to keep the original geometry type.
+            If mixed geometry types and keep_geom_type=True,
+            an exception is raised.
     Returns:
         A GeoDataFrame of the overlapping polygons.
@@ -197,20 +199,27 @@ def get_intersections(gdf: GeoDataFrame, geom_type: str | None = None) -> GeoDat
     """
     if isinstance(gdf, GeoSeries):
         gdf = GeoDataFrame({"geometry": gdf}, crs=gdf.crs)
+        was_geoseries = True
+    else:
+        was_geoseries = False
     idx_name = gdf.index.name
     gdf = gdf.assign(orig_idx=gdf.index).reset_index(drop=True)
-    duplicated_geoms = _get_intersecting_geometries(gdf, geom_type=geom_type).pipe(
-        clean_geoms
-    )
+    duplicated_geoms = _get_intersecting_geometries(
+        gdf, geom_type, keep_geom_type
+    ).pipe(clean_geoms)
     duplicated_geoms.index = duplicated_geoms["orig_idx"].values
     duplicated_geoms.index.name = idx_name
+    if was_geoseries:
+        return duplicated_geoms.geometry
     return duplicated_geoms.drop(columns="orig_idx")
-def _get_intersecting_geometries(gdf: GeoDataFrame, geom_type) -> GeoDataFrame:
+def _get_intersecting_geometries(
+    gdf: GeoDataFrame, geom_type, keep_geom_type
+) -> GeoDataFrame:
     right = gdf[[gdf._geometry_column_name]]
     right["idx_right"] = right.index
@@ -221,9 +230,22 @@ def _get_intersecting_geometries(gdf: GeoDataFrame, geom_type) -> GeoDataFrame:
     )
     left["idx_left"] = left.index
-    not_identical = lambda x: x["idx_left"] != x["idx_right"]
+    def are_not_identical(df):
+        return df["idx_left"] != df["idx_right"]
-    if geom_type is None and get_geom_type(gdf) == "mixed":
+    if geom_type or get_geom_type(gdf) != "mixed":
+        intersected = clean_overlay(
+            left,
+            right,
+            how="intersection",
+            geom_type=geom_type,
+            keep_geom_type=keep_geom_type,
+        ).loc[are_not_identical]
+    else:
+        if keep_geom_type:
+            raise ValueError(
+                "Cannot set keep_geom_type=True when the geom_type is mixed."
+            )
         gdf = make_all_singlepart(gdf)
         intersected = []
         for geom_type in ["polygon", "line", "point"]:
@@ -232,11 +254,7 @@ def _get_intersecting_geometries(gdf: GeoDataFrame, geom_type) -> GeoDataFrame:
             intersected += [
                 clean_overlay(left, right, how="intersection", geom_type=geom_type)
             ]
-        intersected = pd.concat(intersected, ignore_index=True).loc[not_identical]
-    else:
-        intersected = clean_overlay(
-            left, right, how="intersection", geom_type=geom_type
-        ).loc[not_identical]
+        intersected = pd.concat(intersected, ignore_index=True).loc[are_not_identical]
     # make sure it's correct by sjoining a point inside the polygons
     points_joined = intersected.representative_point().to_frame().sjoin(intersected)

sgis/geopandas_tools/general.py CHANGED Viewed

@@ -25,6 +25,12 @@ from shapely.ops import unary_union
 from .geometry_types import get_geom_type, make_all_singlepart, to_single_geom_type
+def split_geom_types(gdf: GeoDataFrame | GeoSeries) -> tuple[GeoDataFrame | GeoSeries]:
+    return tuple(
+        gdf.loc[gdf.geom_type == geom_type] for geom_type in gdf.geom_type.unique()
+    )
 def get_common_crs(
     iterable: Iterable[Hashable], strict: bool = False
 ) -> pyproj.CRS | None:
@@ -453,6 +459,9 @@ def to_lines(*gdfs: GeoDataFrame, copy: bool = True) -> GeoDataFrame:
     >>> sg.qtm(lines, "l")
     """
+    if not all(isinstance(gdf, (GeoSeries, GeoDataFrame)) for gdf in gdfs):
+        raise TypeError("gdf must be GeoDataFrame or GeoSeries")
     if any(gdf.geom_type.isin(["Point", "MultiPoint"]).any() for gdf in gdfs):
         raise ValueError("Cannot convert points to lines.")
@@ -486,7 +495,12 @@ def to_lines(*gdfs: GeoDataFrame, copy: bool = True) -> GeoDataFrame:
         if copy:
             gdf = gdf.copy()
-        gdf.geometry = gdf.geometry.map(_shapely_geometry_to_lines)
+        mapped = gdf.geometry.map(_shapely_geometry_to_lines)
+        try:
+            gdf.geometry = mapped
+        except AttributeError:
+            # geoseries
+            gdf.loc[:] = mapped
         gdf = to_single_geom_type(gdf, "line")

sgis/geopandas_tools/neighbors.py CHANGED Viewed

@@ -97,6 +97,9 @@ def get_neighbor_indices(
     if gdf.crs != neighbors.crs:
         raise ValueError(f"'crs' mismatch. Got {gdf.crs} and {neighbors.crs}")
+    if isinstance(neighbors, GeoSeries):
+        neighbors = neighbors.to_frame()
     # buffer and keep only geometry column
     if max_distance and predicate != "nearest":
         gdf = gdf.buffer(max_distance).to_frame()
@@ -116,6 +119,15 @@ def get_neighbor_indices(
     return joined["neighbor_index"]
+def get_neighbor_dfs(
+    df: GeoDataFrame | DataFrame,
+    neighbor_mapper: Series,
+) -> list[GeoDataFrame | DataFrame]:
+    return [
+        df[df.index.isin(neighbor_mapper[i])] for i in neighbor_mapper.index.unique()
+    ]
 def get_all_distances(
     gdf: GeoDataFrame | GeoSeries, neighbors: GeoDataFrame | GeoSeries
 ) -> DataFrame:

sgis/geopandas_tools/overlay.py CHANGED Viewed

@@ -28,6 +28,11 @@ from .general import clean_geoms
 from .geometry_types import get_geom_type, make_all_singlepart, to_single_geom_type
+DEFAULT_GRID_SIZE = None
+DEFAULT_LSUFFIX = "_1"
+DEFAULT_RSUFFIX = "_2"
 def clean_overlay(
     df1: GeoDataFrame,
     df2: GeoDataFrame,
@@ -35,8 +40,8 @@ def clean_overlay(
     keep_geom_type: bool = True,
     geom_type: str | None = None,
     grid_size: float | None = None,
-    lsuffix: str = "_1",
-    rsuffix: str = "_2",
+    lsuffix: str = DEFAULT_LSUFFIX,
+    rsuffix: str = DEFAULT_RSUFFIX,
 ) -> GeoDataFrame:
     """Fixes and explodes geometries before doing a shapely overlay, then cleans up.
@@ -132,18 +137,22 @@ def clean_overlay(
     df1 = DataFrame(df1).reset_index(drop=True)
     df2 = DataFrame(df2).reset_index(drop=True)
-    overlayed = gpd.GeoDataFrame(
-        _shapely_pd_overlay(
-            df1,
-            df2,
-            how=how,
-            grid_size=grid_size,
-            lsuffix=lsuffix,
-            rsuffix=rsuffix,
-        ),
-        geometry="geometry",
-        crs=crs,
-    ).pipe(clean_geoms)
+    overlayed = (
+        gpd.GeoDataFrame(
+            _shapely_pd_overlay(
+                df1,
+                df2,
+                how=how,
+                grid_size=grid_size,
+                lsuffix=lsuffix,
+                rsuffix=rsuffix,
+            ),
+            geometry="geometry",
+            crs=crs,
+        )
+        .pipe(clean_geoms)
+        .pipe(make_all_singlepart, ignore_index=True)
+    )
     if keep_geom_type:
         overlayed = to_single_geom_type(overlayed, geom_type)
@@ -200,9 +209,9 @@ def _shapely_pd_overlay(
     df1: DataFrame,
     df2: DataFrame,
     how: str,
-    grid_size: float,
-    lsuffix,
-    rsuffix,
+    grid_size: float = DEFAULT_GRID_SIZE,
+    lsuffix=DEFAULT_LSUFFIX,
+    rsuffix=DEFAULT_RSUFFIX,
 ) -> DataFrame:
     if not grid_size and not len(df1) or not len(df2):
         return _no_intersections_return(df1, df2, how, lsuffix, rsuffix)

ssb-sgis 0.3.8__py3-none-any.whl → 0.3.9__py3-none-any.whl

ssb-sgis 0.3.8py3-none-any.whl → 0.3.9py3-none-any.whl