ssb-sgis 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sgis/__init__.py CHANGED
@@ -38,6 +38,9 @@ from .geopandas_tools.point_operations import snap_all, snap_within_distance
38
38
  from .geopandas_tools.polygon_operations import (
39
39
  close_all_holes,
40
40
  close_small_holes,
41
+ eliminate_by_largest,
42
+ eliminate_by_longest,
43
+ eliminate_by_smallest,
41
44
  get_overlapping_polygon_indices,
42
45
  get_overlapping_polygon_product,
43
46
  get_overlapping_polygons,
@@ -15,11 +15,173 @@ from shapely import (
15
15
  )
16
16
  from shapely.ops import unary_union
17
17
 
18
- from .general import _push_geom_col
18
+ from .general import _push_geom_col, to_lines
19
19
  from .neighbors import get_neighbor_indices
20
20
  from .overlay import clean_overlay
21
21
 
22
22
 
23
+ def eliminate_by_longest(
24
+ gdf: GeoDataFrame,
25
+ min_area: int | float,
26
+ ignore_index: bool = False,
27
+ aggfunc: str | dict | list = "first",
28
+ **kwargs,
29
+ ) -> GeoDataFrame:
30
+ """Dissolves small polygons with the longest bordering neighbor polygon.
31
+
32
+ Eliminates small geometries by dissolving them with the neighboring
33
+ polygon with the longest shared border. The index and column values of the
34
+ large polygons will be kept, unless else is specified.
35
+
36
+ Args:
37
+ gdf: GeoDataFrame with polygon geometries.
38
+ min_area: minimum area for the polygons to be eliminated.
39
+ ignore_index: If False (default), the resulting GeoDataFrame will keep the
40
+ index of the large polygons. If True, the resulting axis will be labeled
41
+ 0, 1, …, n - 1.
42
+ aggfunc: Aggregation function(s) to use when dissolving. Defaults to 'first',
43
+ meaning the column values of the large polygons are kept.
44
+ kwargs: Keyword arguments passed to the dissolve method.
45
+
46
+ Returns:
47
+ The GeoDataFrame with the small polygons dissolved into the large polygons.
48
+ """
49
+ if not ignore_index:
50
+ idx_mapper = {i: idx for i, idx in enumerate(gdf.index)}
51
+ idx_name = gdf.index.name
52
+
53
+ gdf = gdf.reset_index(drop=True)
54
+
55
+ small = gdf.loc[gdf.area <= min_area].assign(small_idx=lambda x: x.index)
56
+ large = gdf.loc[gdf.area > min_area].assign(large_idx=lambda x: x.index)
57
+
58
+ lines = to_lines(small[["small_idx", "geometry"]], large[["large_idx", "geometry"]])
59
+ lines = lines[lines["small_idx"].notna()]
60
+ lines["length__"] = lines.length
61
+
62
+ longest = lines.sort_values("length__", ascending=False).drop_duplicates(
63
+ "small_idx"
64
+ )
65
+
66
+ small_to_large = longest.set_index("small_idx")["large_idx"]
67
+ small["dissolve_idx"] = small["small_idx"].map(small_to_large)
68
+ large["dissolve_idx"] = large["large_idx"]
69
+
70
+ kwargs.pop("as_index", None)
71
+ eliminated = (
72
+ pd.concat([large, small])
73
+ .dissolve("dissolve_idx", aggfunc=aggfunc, **kwargs)
74
+ .drop(
75
+ ["length__", "small_idx", "large_idx"],
76
+ axis=1,
77
+ errors="ignore",
78
+ )
79
+ )
80
+
81
+ if ignore_index:
82
+ return eliminated.reset_index(drop=True)
83
+
84
+ eliminated.index = eliminated.index.map(idx_mapper)
85
+ eliminated.index.name = idx_name
86
+
87
+ return eliminated
88
+
89
+
90
+ def eliminate_by_largest(
91
+ gdf: GeoDataFrame,
92
+ min_area: int | float,
93
+ ignore_index: bool = False,
94
+ aggfunc: str | dict | list = "first",
95
+ **kwargs,
96
+ ) -> GeoDataFrame:
97
+ """Dissolves small polygons with the largest neighbor polygon.
98
+
99
+ Eliminates small geometries by dissolving them with the neighboring
100
+ polygon with the largest area. The index and column values of the
101
+ large polygons will be kept, unless else is specified.
102
+
103
+ Args:
104
+ gdf: GeoDataFrame with polygon geometries.
105
+ min_area: minimum area for the polygons to be eliminated.
106
+ ignore_index: If False (default), the resulting GeoDataFrame will keep the
107
+ index of the large polygons. If True, the resulting axis will be labeled
108
+ 0, 1, …, n - 1.
109
+ aggfunc: Aggregation function(s) to use when dissolving. Defaults to 'first',
110
+ meaning the column values of the large polygons are kept.
111
+ kwargs: Keyword arguments passed to the dissolve method.
112
+
113
+ Returns:
114
+ The GeoDataFrame with the small polygons dissolved into the large polygons.
115
+ """
116
+ return _eliminate_by_area(
117
+ gdf,
118
+ min_area=min_area,
119
+ ignore_index=ignore_index,
120
+ sort_ascending=False,
121
+ aggfunc=aggfunc,
122
+ **kwargs,
123
+ )
124
+
125
+
126
+ def eliminate_by_smallest(
127
+ gdf: GeoDataFrame,
128
+ min_area: int | float,
129
+ ignore_index: bool = False,
130
+ aggfunc: str | dict | list = "first",
131
+ **kwargs,
132
+ ) -> GeoDataFrame:
133
+ return _eliminate_by_area(
134
+ gdf,
135
+ min_area=min_area,
136
+ ignore_index=ignore_index,
137
+ sort_ascending=True,
138
+ aggfunc=aggfunc,
139
+ **kwargs,
140
+ )
141
+
142
+
143
+ def _eliminate_by_area(
144
+ gdf: GeoDataFrame,
145
+ min_area: int | float,
146
+ sort_ascending: bool,
147
+ ignore_index: bool = False,
148
+ aggfunc="first",
149
+ **kwargs,
150
+ ) -> GeoDataFrame:
151
+ if not ignore_index:
152
+ idx_mapper = {i: idx for i, idx in enumerate(gdf.index)}
153
+ idx_name = gdf.index.name
154
+
155
+ gdf = gdf.reset_index(drop=True)
156
+
157
+ small = gdf.loc[gdf.area <= min_area]
158
+ large = gdf.loc[gdf.area > min_area]
159
+ large["area__"] = large.area
160
+
161
+ joined = small.sjoin(
162
+ large[["area__", "geometry"]], predicate="touches"
163
+ ).sort_values("area__", ascending=sort_ascending)
164
+
165
+ largest = joined[~joined.index.duplicated()]
166
+
167
+ large = large.assign(index_right=lambda x: x.index)
168
+
169
+ kwargs.pop("as_index", None)
170
+ eliminated = (
171
+ pd.concat([large, largest])
172
+ .dissolve("index_right", aggfunc=aggfunc, **kwargs)
173
+ .drop(["area__"], axis=1, errors="ignore")
174
+ )
175
+
176
+ if ignore_index:
177
+ return eliminated.reset_index(drop=True)
178
+
179
+ eliminated.index = eliminated.index.map(idx_mapper)
180
+ eliminated.index.name = idx_name
181
+
182
+ return eliminated
183
+
184
+
23
185
  def get_polygon_clusters(
24
186
  *gdfs: GeoDataFrame | GeoSeries,
25
187
  cluster_col: str = "cluster",
@@ -161,7 +323,7 @@ def get_polygon_clusters(
161
323
 
162
324
 
163
325
  def get_overlapping_polygons(
164
- gdf: GeoDataFrame | GeoSeries, ignore_index=False
326
+ gdf: GeoDataFrame | GeoSeries, ignore_index: bool = False
165
327
  ) -> GeoDataFrame | GeoSeries:
166
328
  """Find the areas that overlap.
167
329
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ssb-sgis
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: GIS functions used at Statistics Norway.
5
5
  Home-page: https://github.com/statisticsnorway/ssb-sgis
6
6
  License: MIT
@@ -1,4 +1,4 @@
1
- sgis/__init__.py,sha256=npPhiQqWptYQF0cwL6WPqSNWyKWzgvNlxGF_cgEuNss,2230
1
+ sgis/__init__.py,sha256=NItNaPnNtu0K8nr0jICLJCytdk2bSNW172GjORimssw,2309
2
2
  sgis/dapla.py,sha256=t0NXKeEKnOBcFCVbHYbqvKY7f8UtmVnBsf7CmaHNIEY,3243
3
3
  sgis/exceptions.py,sha256=ztMp4sB9xxPvwj2IEsO5kOaB4FmHuU_7-M2pZ7qaxTs,576
4
4
  sgis/geopandas_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -8,7 +8,7 @@ sgis/geopandas_tools/geometry_types.py,sha256=u6HjPgzL1IFhhIiJqShyG-SSfrCpOKevR5
8
8
  sgis/geopandas_tools/neighbors.py,sha256=tv8bmYgq4VNFbXmT2wcmJsFH8946NwbIBMQXAi3n8L4,14520
9
9
  sgis/geopandas_tools/overlay.py,sha256=DLvvMw-4LRst1QWPeQYHJe8OLwdxO_aoT38_x4BCL7A,11815
10
10
  sgis/geopandas_tools/point_operations.py,sha256=3JynroucouAbpON4DWG32S3MQQGmfIJuY7D6gkqtk70,6888
11
- sgis/geopandas_tools/polygon_operations.py,sha256=hCcfi8QnJyHrnuafBQy0LdRww1JoiCEV_FgMOj30T9A,13161
11
+ sgis/geopandas_tools/polygon_operations.py,sha256=bFHh8o1rIG8ymuAvXLyGvmS8pSPculB9Jq8e_xbwVVA,18319
12
12
  sgis/geopandas_tools/to_geodataframe.py,sha256=4jOy0YvXBIiOEqQx7_ept5xfd39R1XKPN_OVK8kxhp8,9722
13
13
  sgis/helpers.py,sha256=OqTojkSl-JVKlJzqqB-d_0CH6mk7_LS1DkiIjp1gD8E,2674
14
14
  sgis/maps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -33,7 +33,7 @@ sgis/networkanalysis/networkanalysisrules.py,sha256=BhhaSXIyBRNzxSOUP2kVBIR--TRq
33
33
  sgis/networkanalysis/nodes.py,sha256=fFagSB88Kj4yHCnxDtD3ALpGrAtkVPvGd7F8MOe7vuk,6740
34
34
  sgis/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
35
  sgis/read_parquet.py,sha256=GSW2NDy4-XosbamPEzB1xhWxFAPHuGEJZglfQ-V6DzY,3774
36
- ssb_sgis-0.2.2.dist-info/LICENSE,sha256=lL2h0dNKGTKAE0CjTy62SDbRennVD1xPgM5LzGqhKeo,1074
37
- ssb_sgis-0.2.2.dist-info/METADATA,sha256=XvUd974KNd9OD1iXBkmvCA_rbkwJu8BAJfbz5Ykny6w,8831
38
- ssb_sgis-0.2.2.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
39
- ssb_sgis-0.2.2.dist-info/RECORD,,
36
+ ssb_sgis-0.2.3.dist-info/LICENSE,sha256=lL2h0dNKGTKAE0CjTy62SDbRennVD1xPgM5LzGqhKeo,1074
37
+ ssb_sgis-0.2.3.dist-info/METADATA,sha256=0hWJYp0rfk73CN0jX8zAw_5Oz_fx604w1igH6ucQJsM,8831
38
+ ssb_sgis-0.2.3.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
39
+ ssb_sgis-0.2.3.dist-info/RECORD,,