PyPI - ssb-sgis - Versions diffs - 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl - Mend

ssb-sgis 0.2.2py3-none-any.whl → 0.2.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

sgis/__init__.py CHANGED Viewed

@@ -38,6 +38,9 @@ from .geopandas_tools.point_operations import snap_all, snap_within_distance
 from .geopandas_tools.polygon_operations import (
     close_all_holes,
     close_small_holes,
+    eliminate_by_largest,
+    eliminate_by_longest,
+    eliminate_by_smallest,
     get_overlapping_polygon_indices,
     get_overlapping_polygon_product,
     get_overlapping_polygons,

sgis/geopandas_tools/polygon_operations.py CHANGED Viewed

@@ -15,11 +15,173 @@ from shapely import (
 )
 from shapely.ops import unary_union
-from .general import _push_geom_col
+from .general import _push_geom_col, to_lines
 from .neighbors import get_neighbor_indices
 from .overlay import clean_overlay
+def eliminate_by_longest(
+    gdf: GeoDataFrame,
+    min_area: int | float,
+    ignore_index: bool = False,
+    aggfunc: str | dict | list = "first",
+    **kwargs,
+) -> GeoDataFrame:
+    """Dissolves small polygons with the longest bordering neighbor polygon.
+    Eliminates small geometries by dissolving them with the neighboring
+    polygon with the longest shared border. The index and column values of the
+    large polygons will be kept, unless else is specified.
+    Args:
+        gdf: GeoDataFrame with polygon geometries.
+        min_area: minimum area for the polygons to be eliminated.
+        ignore_index: If False (default), the resulting GeoDataFrame will keep the
+            index of the large polygons. If True, the resulting axis will be labeled
+            0, 1, …, n - 1.
+        aggfunc: Aggregation function(s) to use when dissolving. Defaults to 'first',
+            meaning the column values of the large polygons are kept.
+        kwargs: Keyword arguments passed to the dissolve method.
+    Returns:
+        The GeoDataFrame with the small polygons dissolved into the large polygons.
+    """
+    if not ignore_index:
+        idx_mapper = {i: idx for i, idx in enumerate(gdf.index)}
+        idx_name = gdf.index.name
+    gdf = gdf.reset_index(drop=True)
+    small = gdf.loc[gdf.area <= min_area].assign(small_idx=lambda x: x.index)
+    large = gdf.loc[gdf.area > min_area].assign(large_idx=lambda x: x.index)
+    lines = to_lines(small[["small_idx", "geometry"]], large[["large_idx", "geometry"]])
+    lines = lines[lines["small_idx"].notna()]
+    lines["length__"] = lines.length
+    longest = lines.sort_values("length__", ascending=False).drop_duplicates(
+        "small_idx"
+    )
+    small_to_large = longest.set_index("small_idx")["large_idx"]
+    small["dissolve_idx"] = small["small_idx"].map(small_to_large)
+    large["dissolve_idx"] = large["large_idx"]
+    kwargs.pop("as_index", None)
+    eliminated = (
+        pd.concat([large, small])
+        .dissolve("dissolve_idx", aggfunc=aggfunc, **kwargs)
+        .drop(
+            ["length__", "small_idx", "large_idx"],
+            axis=1,
+            errors="ignore",
+        )
+    )
+    if ignore_index:
+        return eliminated.reset_index(drop=True)
+    eliminated.index = eliminated.index.map(idx_mapper)
+    eliminated.index.name = idx_name
+    return eliminated
+def eliminate_by_largest(
+    gdf: GeoDataFrame,
+    min_area: int | float,
+    ignore_index: bool = False,
+    aggfunc: str | dict | list = "first",
+    **kwargs,
+) -> GeoDataFrame:
+    """Dissolves small polygons with the largest neighbor polygon.
+    Eliminates small geometries by dissolving them with the neighboring
+    polygon with the largest area. The index and column values of the
+    large polygons will be kept, unless else is specified.
+    Args:
+        gdf: GeoDataFrame with polygon geometries.
+        min_area: minimum area for the polygons to be eliminated.
+        ignore_index: If False (default), the resulting GeoDataFrame will keep the
+            index of the large polygons. If True, the resulting axis will be labeled
+            0, 1, …, n - 1.
+        aggfunc: Aggregation function(s) to use when dissolving. Defaults to 'first',
+            meaning the column values of the large polygons are kept.
+        kwargs: Keyword arguments passed to the dissolve method.
+    Returns:
+        The GeoDataFrame with the small polygons dissolved into the large polygons.
+    """
+    return _eliminate_by_area(
+        gdf,
+        min_area=min_area,
+        ignore_index=ignore_index,
+        sort_ascending=False,
+        aggfunc=aggfunc,
+        **kwargs,
+    )
+def eliminate_by_smallest(
+    gdf: GeoDataFrame,
+    min_area: int | float,
+    ignore_index: bool = False,
+    aggfunc: str | dict | list = "first",
+    **kwargs,
+) -> GeoDataFrame:
+    return _eliminate_by_area(
+        gdf,
+        min_area=min_area,
+        ignore_index=ignore_index,
+        sort_ascending=True,
+        aggfunc=aggfunc,
+        **kwargs,
+    )
+def _eliminate_by_area(
+    gdf: GeoDataFrame,
+    min_area: int | float,
+    sort_ascending: bool,
+    ignore_index: bool = False,
+    aggfunc="first",
+    **kwargs,
+) -> GeoDataFrame:
+    if not ignore_index:
+        idx_mapper = {i: idx for i, idx in enumerate(gdf.index)}
+        idx_name = gdf.index.name
+    gdf = gdf.reset_index(drop=True)
+    small = gdf.loc[gdf.area <= min_area]
+    large = gdf.loc[gdf.area > min_area]
+    large["area__"] = large.area
+    joined = small.sjoin(
+        large[["area__", "geometry"]], predicate="touches"
+    ).sort_values("area__", ascending=sort_ascending)
+    largest = joined[~joined.index.duplicated()]
+    large = large.assign(index_right=lambda x: x.index)
+    kwargs.pop("as_index", None)
+    eliminated = (
+        pd.concat([large, largest])
+        .dissolve("index_right", aggfunc=aggfunc, **kwargs)
+        .drop(["area__"], axis=1, errors="ignore")
+    )
+    if ignore_index:
+        return eliminated.reset_index(drop=True)
+    eliminated.index = eliminated.index.map(idx_mapper)
+    eliminated.index.name = idx_name
+    return eliminated
 def get_polygon_clusters(
     *gdfs: GeoDataFrame | GeoSeries,
     cluster_col: str = "cluster",
@@ -161,7 +323,7 @@ def get_polygon_clusters(
 def get_overlapping_polygons(
-    gdf: GeoDataFrame | GeoSeries, ignore_index=False
+    gdf: GeoDataFrame | GeoSeries, ignore_index: bool = False
 ) -> GeoDataFrame | GeoSeries:
     """Find the areas that overlap.

{ssb_sgis-0.2.2.dist-info → ssb_sgis-0.2.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ssb-sgis
-Version: 0.2.2
+Version: 0.2.3
 Summary: GIS functions used at Statistics Norway.
 Home-page: https://github.com/statisticsnorway/ssb-sgis
 License: MIT

{ssb_sgis-0.2.2.dist-info → ssb_sgis-0.2.3.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-sgis/__init__.py,sha256=npPhiQqWptYQF0cwL6WPqSNWyKWzgvNlxGF_cgEuNss,2230
+sgis/__init__.py,sha256=NItNaPnNtu0K8nr0jICLJCytdk2bSNW172GjORimssw,2309
 sgis/dapla.py,sha256=t0NXKeEKnOBcFCVbHYbqvKY7f8UtmVnBsf7CmaHNIEY,3243
 sgis/exceptions.py,sha256=ztMp4sB9xxPvwj2IEsO5kOaB4FmHuU_7-M2pZ7qaxTs,576
 sgis/geopandas_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -8,7 +8,7 @@ sgis/geopandas_tools/geometry_types.py,sha256=u6HjPgzL1IFhhIiJqShyG-SSfrCpOKevR5
 sgis/geopandas_tools/neighbors.py,sha256=tv8bmYgq4VNFbXmT2wcmJsFH8946NwbIBMQXAi3n8L4,14520
 sgis/geopandas_tools/overlay.py,sha256=DLvvMw-4LRst1QWPeQYHJe8OLwdxO_aoT38_x4BCL7A,11815
 sgis/geopandas_tools/point_operations.py,sha256=3JynroucouAbpON4DWG32S3MQQGmfIJuY7D6gkqtk70,6888
-sgis/geopandas_tools/polygon_operations.py,sha256=hCcfi8QnJyHrnuafBQy0LdRww1JoiCEV_FgMOj30T9A,13161
+sgis/geopandas_tools/polygon_operations.py,sha256=bFHh8o1rIG8ymuAvXLyGvmS8pSPculB9Jq8e_xbwVVA,18319
 sgis/geopandas_tools/to_geodataframe.py,sha256=4jOy0YvXBIiOEqQx7_ept5xfd39R1XKPN_OVK8kxhp8,9722
 sgis/helpers.py,sha256=OqTojkSl-JVKlJzqqB-d_0CH6mk7_LS1DkiIjp1gD8E,2674
 sgis/maps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -33,7 +33,7 @@ sgis/networkanalysis/networkanalysisrules.py,sha256=BhhaSXIyBRNzxSOUP2kVBIR--TRq
 sgis/networkanalysis/nodes.py,sha256=fFagSB88Kj4yHCnxDtD3ALpGrAtkVPvGd7F8MOe7vuk,6740
 sgis/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sgis/read_parquet.py,sha256=GSW2NDy4-XosbamPEzB1xhWxFAPHuGEJZglfQ-V6DzY,3774
-ssb_sgis-0.2.2.dist-info/LICENSE,sha256=lL2h0dNKGTKAE0CjTy62SDbRennVD1xPgM5LzGqhKeo,1074
-ssb_sgis-0.2.2.dist-info/METADATA,sha256=XvUd974KNd9OD1iXBkmvCA_rbkwJu8BAJfbz5Ykny6w,8831
-ssb_sgis-0.2.2.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
-ssb_sgis-0.2.2.dist-info/RECORD,,
+ssb_sgis-0.2.3.dist-info/LICENSE,sha256=lL2h0dNKGTKAE0CjTy62SDbRennVD1xPgM5LzGqhKeo,1074
+ssb_sgis-0.2.3.dist-info/METADATA,sha256=0hWJYp0rfk73CN0jX8zAw_5Oz_fx604w1igH6ucQJsM,8831
+ssb_sgis-0.2.3.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
+ssb_sgis-0.2.3.dist-info/RECORD,,

{ssb_sgis-0.2.2.dist-info → ssb_sgis-0.2.3.dist-info}/LICENSE RENAMED Viewed

File without changes

{ssb_sgis-0.2.2.dist-info → ssb_sgis-0.2.3.dist-info}/WHEEL RENAMED Viewed

File without changes

ssb-sgis 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl

ssb-sgis 0.2.2py3-none-any.whl → 0.2.3py3-none-any.whl