ssb-sgis 0.2.1__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/PKG-INFO +1 -1
  2. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/pyproject.toml +1 -1
  3. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/__init__.py +3 -0
  4. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/geopandas_tools/polygon_operations.py +190 -46
  5. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/LICENSE +0 -0
  6. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/README.md +0 -0
  7. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/dapla.py +0 -0
  8. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/exceptions.py +0 -0
  9. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/geopandas_tools/__init__.py +0 -0
  10. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/geopandas_tools/buffer_dissolve_explode.py +0 -0
  11. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/geopandas_tools/general.py +0 -0
  12. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/geopandas_tools/geometry_types.py +0 -0
  13. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/geopandas_tools/neighbors.py +0 -0
  14. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/geopandas_tools/overlay.py +0 -0
  15. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/geopandas_tools/point_operations.py +0 -0
  16. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/geopandas_tools/to_geodataframe.py +0 -0
  17. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/helpers.py +0 -0
  18. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/maps/__init__.py +0 -0
  19. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/maps/explore.py +0 -0
  20. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/maps/httpserver.py +0 -0
  21. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/maps/legend.py +0 -0
  22. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/maps/map.py +0 -0
  23. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/maps/maps.py +0 -0
  24. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/maps/thematicmap.py +0 -0
  25. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/networkanalysis/__init__.py +0 -0
  26. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/networkanalysis/_get_route.py +0 -0
  27. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/networkanalysis/_od_cost_matrix.py +0 -0
  28. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/networkanalysis/_points.py +0 -0
  29. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/networkanalysis/_service_area.py +0 -0
  30. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/networkanalysis/closing_network_holes.py +0 -0
  31. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/networkanalysis/cutting_lines.py +0 -0
  32. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/networkanalysis/directednetwork.py +0 -0
  33. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/networkanalysis/finding_isolated_networks.py +0 -0
  34. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/networkanalysis/network.py +0 -0
  35. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/networkanalysis/networkanalysis.py +0 -0
  36. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/networkanalysis/networkanalysisrules.py +0 -0
  37. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/networkanalysis/nodes.py +0 -0
  38. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/py.typed +0 -0
  39. {ssb_sgis-0.2.1 → ssb_sgis-0.2.3}/src/sgis/read_parquet.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ssb-sgis
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: GIS functions used at Statistics Norway.
5
5
  Home-page: https://github.com/statisticsnorway/ssb-sgis
6
6
  License: MIT
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "ssb-sgis"
3
- version = "0.2.1"
3
+ version = "0.2.3"
4
4
  description = "GIS functions used at Statistics Norway."
5
5
  authors = ["Statistics Norway <ort@ssb.no>"]
6
6
  license = "MIT"
@@ -38,6 +38,9 @@ from .geopandas_tools.point_operations import snap_all, snap_within_distance
38
38
  from .geopandas_tools.polygon_operations import (
39
39
  close_all_holes,
40
40
  close_small_holes,
41
+ eliminate_by_largest,
42
+ eliminate_by_longest,
43
+ eliminate_by_smallest,
41
44
  get_overlapping_polygon_indices,
42
45
  get_overlapping_polygon_product,
43
46
  get_overlapping_polygons,
@@ -15,13 +15,177 @@ from shapely import (
15
15
  )
16
16
  from shapely.ops import unary_union
17
17
 
18
- from .general import _push_geom_col
18
+ from .general import _push_geom_col, to_lines
19
19
  from .neighbors import get_neighbor_indices
20
20
  from .overlay import clean_overlay
21
21
 
22
22
 
23
+ def eliminate_by_longest(
24
+ gdf: GeoDataFrame,
25
+ min_area: int | float,
26
+ ignore_index: bool = False,
27
+ aggfunc: str | dict | list = "first",
28
+ **kwargs,
29
+ ) -> GeoDataFrame:
30
+ """Dissolves small polygons with the longest bordering neighbor polygon.
31
+
32
+ Eliminates small geometries by dissolving them with the neighboring
33
+ polygon with the longest shared border. The index and column values of the
34
+ large polygons will be kept, unless else is specified.
35
+
36
+ Args:
37
+ gdf: GeoDataFrame with polygon geometries.
38
+ min_area: minimum area for the polygons to be eliminated.
39
+ ignore_index: If False (default), the resulting GeoDataFrame will keep the
40
+ index of the large polygons. If True, the resulting axis will be labeled
41
+ 0, 1, …, n - 1.
42
+ aggfunc: Aggregation function(s) to use when dissolving. Defaults to 'first',
43
+ meaning the column values of the large polygons are kept.
44
+ kwargs: Keyword arguments passed to the dissolve method.
45
+
46
+ Returns:
47
+ The GeoDataFrame with the small polygons dissolved into the large polygons.
48
+ """
49
+ if not ignore_index:
50
+ idx_mapper = {i: idx for i, idx in enumerate(gdf.index)}
51
+ idx_name = gdf.index.name
52
+
53
+ gdf = gdf.reset_index(drop=True)
54
+
55
+ small = gdf.loc[gdf.area <= min_area].assign(small_idx=lambda x: x.index)
56
+ large = gdf.loc[gdf.area > min_area].assign(large_idx=lambda x: x.index)
57
+
58
+ lines = to_lines(small[["small_idx", "geometry"]], large[["large_idx", "geometry"]])
59
+ lines = lines[lines["small_idx"].notna()]
60
+ lines["length__"] = lines.length
61
+
62
+ longest = lines.sort_values("length__", ascending=False).drop_duplicates(
63
+ "small_idx"
64
+ )
65
+
66
+ small_to_large = longest.set_index("small_idx")["large_idx"]
67
+ small["dissolve_idx"] = small["small_idx"].map(small_to_large)
68
+ large["dissolve_idx"] = large["large_idx"]
69
+
70
+ kwargs.pop("as_index", None)
71
+ eliminated = (
72
+ pd.concat([large, small])
73
+ .dissolve("dissolve_idx", aggfunc=aggfunc, **kwargs)
74
+ .drop(
75
+ ["length__", "small_idx", "large_idx"],
76
+ axis=1,
77
+ errors="ignore",
78
+ )
79
+ )
80
+
81
+ if ignore_index:
82
+ return eliminated.reset_index(drop=True)
83
+
84
+ eliminated.index = eliminated.index.map(idx_mapper)
85
+ eliminated.index.name = idx_name
86
+
87
+ return eliminated
88
+
89
+
90
+ def eliminate_by_largest(
91
+ gdf: GeoDataFrame,
92
+ min_area: int | float,
93
+ ignore_index: bool = False,
94
+ aggfunc: str | dict | list = "first",
95
+ **kwargs,
96
+ ) -> GeoDataFrame:
97
+ """Dissolves small polygons with the largest neighbor polygon.
98
+
99
+ Eliminates small geometries by dissolving them with the neighboring
100
+ polygon with the largest area. The index and column values of the
101
+ large polygons will be kept, unless else is specified.
102
+
103
+ Args:
104
+ gdf: GeoDataFrame with polygon geometries.
105
+ min_area: minimum area for the polygons to be eliminated.
106
+ ignore_index: If False (default), the resulting GeoDataFrame will keep the
107
+ index of the large polygons. If True, the resulting axis will be labeled
108
+ 0, 1, …, n - 1.
109
+ aggfunc: Aggregation function(s) to use when dissolving. Defaults to 'first',
110
+ meaning the column values of the large polygons are kept.
111
+ kwargs: Keyword arguments passed to the dissolve method.
112
+
113
+ Returns:
114
+ The GeoDataFrame with the small polygons dissolved into the large polygons.
115
+ """
116
+ return _eliminate_by_area(
117
+ gdf,
118
+ min_area=min_area,
119
+ ignore_index=ignore_index,
120
+ sort_ascending=False,
121
+ aggfunc=aggfunc,
122
+ **kwargs,
123
+ )
124
+
125
+
126
+ def eliminate_by_smallest(
127
+ gdf: GeoDataFrame,
128
+ min_area: int | float,
129
+ ignore_index: bool = False,
130
+ aggfunc: str | dict | list = "first",
131
+ **kwargs,
132
+ ) -> GeoDataFrame:
133
+ return _eliminate_by_area(
134
+ gdf,
135
+ min_area=min_area,
136
+ ignore_index=ignore_index,
137
+ sort_ascending=True,
138
+ aggfunc=aggfunc,
139
+ **kwargs,
140
+ )
141
+
142
+
143
+ def _eliminate_by_area(
144
+ gdf: GeoDataFrame,
145
+ min_area: int | float,
146
+ sort_ascending: bool,
147
+ ignore_index: bool = False,
148
+ aggfunc="first",
149
+ **kwargs,
150
+ ) -> GeoDataFrame:
151
+ if not ignore_index:
152
+ idx_mapper = {i: idx for i, idx in enumerate(gdf.index)}
153
+ idx_name = gdf.index.name
154
+
155
+ gdf = gdf.reset_index(drop=True)
156
+
157
+ small = gdf.loc[gdf.area <= min_area]
158
+ large = gdf.loc[gdf.area > min_area]
159
+ large["area__"] = large.area
160
+
161
+ joined = small.sjoin(
162
+ large[["area__", "geometry"]], predicate="touches"
163
+ ).sort_values("area__", ascending=sort_ascending)
164
+
165
+ largest = joined[~joined.index.duplicated()]
166
+
167
+ large = large.assign(index_right=lambda x: x.index)
168
+
169
+ kwargs.pop("as_index", None)
170
+ eliminated = (
171
+ pd.concat([large, largest])
172
+ .dissolve("index_right", aggfunc=aggfunc, **kwargs)
173
+ .drop(["area__"], axis=1, errors="ignore")
174
+ )
175
+
176
+ if ignore_index:
177
+ return eliminated.reset_index(drop=True)
178
+
179
+ eliminated.index = eliminated.index.map(idx_mapper)
180
+ eliminated.index.name = idx_name
181
+
182
+ return eliminated
183
+
184
+
23
185
  def get_polygon_clusters(
24
- *gdfs: GeoDataFrame | GeoSeries, cluster_col: str = "cluster", explode: bool = True
186
+ *gdfs: GeoDataFrame | GeoSeries,
187
+ cluster_col: str = "cluster",
188
+ allow_multipart: bool = False,
25
189
  ) -> GeoDataFrame | tuple[GeoDataFrame]:
26
190
  """Find which polygons overlap without dissolving.
27
191
 
@@ -38,8 +202,8 @@ def get_polygon_clusters(
38
202
  Args:
39
203
  gdfs: One or more GeoDataFrames of polygons.
40
204
  cluster_col: Name of the resulting cluster column.
41
- explode: Whether to explode the geometries to singlepart before the spatial
42
- join. Defaults to True. Index will be preserved.
205
+ allow_multipart: Whether to allow mutipart geometries in the gdfs.
206
+ Defaults to False to avoid confusing results.
43
207
 
44
208
  Returns:
45
209
  One or more GeoDataFrames (same amount as was given) with a new cluster column.
@@ -47,8 +211,7 @@ def get_polygon_clusters(
47
211
  Examples
48
212
  --------
49
213
 
50
- Create polygon geometries where row 0, 1 and 2 overlap, 3 and 4 overlap
51
- and 6 is on its own.
214
+ Create geometries with three clusters of overlapping polygons.
52
215
 
53
216
  >>> import sgis as sg
54
217
  >>> gdf = sg.to_gdf([(0, 0), (1, 1), (0, 1), (4, 4), (4, 3), (7, 7)])
@@ -62,7 +225,7 @@ def get_polygon_clusters(
62
225
  4 POLYGON ((5.00000 3.00000, 4.99951 2.96859, 4....
63
226
  5 POLYGON ((8.00000 7.00000, 7.99951 6.96859, 7....
64
227
 
65
- This will add a cluster column to the GeoDataFrame:
228
+ Add a cluster column to the GeoDataFrame:
66
229
 
67
230
  >>> gdf = sg.get_polygon_clusters(gdf, cluster_col="cluster")
68
231
  >>> gdf
@@ -80,7 +243,7 @@ def get_polygon_clusters(
80
243
  >>> gdf2 = sg.to_gdf([(0, 0), (7, 7)])
81
244
  >>> gdf, gdf2 = sg.get_polygon_clusters(gdf, gdf2, cluster_col="cluster")
82
245
  >>> gdf2
83
- cluster geometry
246
+ cluster geometry
84
247
  0 0 POINT (0.00000 0.00000)
85
248
  1 2 POINT (7.00000 7.00000)
86
249
  >>> gdf
@@ -101,28 +264,12 @@ def get_polygon_clusters(
101
264
  0 0 POLYGON ((0.99951 -0.03141, 0.99803 -0.06279, ...
102
265
  1 1 POLYGON ((4.99951 2.96859, 4.99803 2.93721, 4....
103
266
  2 2 POLYGON ((8.00000 7.00000, 7.99951 6.96859, 7....
104
-
105
- Which is equivelen to this in straigt geopandas:
106
-
107
- >>> dissolved2 = gdf.dissolve().explode(ignore_index=True).assign(cluster=lambda x: x.index)
108
- >>> dissolved2
109
- cluster geometry
110
- 0 0 POLYGON ((0.99803 -0.06279, 0.99556 -0.09411, ...
111
- 1 1 POLYGON ((4.99803 2.93721, 4.99556 2.90589, 4....
112
- 2 2 POLYGON ((7.99556 6.90589, 7.99211 6.87467, 7....
113
-
114
- Note that the order of the coordinates is different, and there is
115
- some deviations in the rounding on microscopic levels.
116
-
117
- >>> dissolved.area.sum()
118
- 15.016909720698278
119
- >>> dissolved2.area.sum()
120
- 15.016909720698285
121
267
  """
122
268
  if isinstance(gdfs[-1], str):
123
269
  *gdfs, cluster_col = gdfs
124
270
 
125
271
  concated = pd.DataFrame()
272
+ orig_indices = ()
126
273
  for i, gdf in enumerate(gdfs):
127
274
  if isinstance(gdf, GeoSeries):
128
275
  gdf = gdf.to_frame()
@@ -130,11 +277,15 @@ def get_polygon_clusters(
130
277
  if not isinstance(gdf, GeoDataFrame):
131
278
  raise TypeError("'gdfs' should be one or more GeoDataFrames or GeoSeries.")
132
279
 
133
- if explode:
134
- gdf = gdf.explode(index_parts=False)
280
+ if not allow_multipart and len(gdf) != len(gdf.explode(index_parts=False)):
281
+ raise ValueError(
282
+ "All geometries should be exploded to singlepart "
283
+ "in order to get correct polygon clusters. "
284
+ "To allow multipart geometries, set allow_multipart=True"
285
+ )
135
286
 
136
- gdf["orig_idx___"] = gdf.index
137
- gdf["_i___"] = i
287
+ orig_indices = orig_indices + (gdf.index,)
288
+ gdf["i__"] = i
138
289
 
139
290
  concated = pd.concat([concated, gdf], ignore_index=True)
140
291
 
@@ -151,27 +302,28 @@ def get_polygon_clusters(
151
302
  for j in component
152
303
  }
153
304
 
154
- concated[cluster_col] = concated.index.map(component_mapper)
155
-
156
- concated.index = concated["orig_idx___"].values
305
+ concated[cluster_col] = component_mapper
157
306
 
158
307
  concated = _push_geom_col(concated)
159
308
 
160
- _i___ = concated["_i___"].unique()
309
+ n_gdfs = concated["i__"].unique()
161
310
 
162
- if len(_i___) == 1:
163
- return concated.drop(["_i___", "orig_idx___"], axis=1)
311
+ if len(n_gdfs) == 1:
312
+ concated.index = orig_indices[0]
313
+ return concated.drop(["i__"], axis=1)
164
314
 
165
315
  unconcated = ()
166
- for i in _i___:
167
- gdf = concated[concated["_i___"] == i].drop(["_i___", "orig_idx___"], axis=1)
316
+ for i in n_gdfs:
317
+ gdf = concated[concated["i__"] == i]
318
+ gdf.index = orig_indices[i]
319
+ gdf = gdf.drop(["i__"], axis=1)
168
320
  unconcated = unconcated + (gdf,)
169
321
 
170
322
  return unconcated
171
323
 
172
324
 
173
325
  def get_overlapping_polygons(
174
- gdf: GeoDataFrame | GeoSeries, ignore_index=False
326
+ gdf: GeoDataFrame | GeoSeries, ignore_index: bool = False
175
327
  ) -> GeoDataFrame | GeoSeries:
176
328
  """Find the areas that overlap.
177
329
 
@@ -210,14 +362,6 @@ def get_overlapping_polygons(
210
362
 
211
363
 
212
364
  def get_overlapping_polygon_indices(gdf: GeoDataFrame | GeoSeries) -> pd.Index:
213
- """Get the index of the rows that contain overlapping geometries.
214
-
215
- Args:
216
- gdf: GeoDataFrame of polygons.
217
-
218
- Returns:
219
- A pandas Index with the overlapping polygon indices.
220
- """
221
365
  if not gdf.index.is_unique:
222
366
  raise ValueError(
223
367
  "Index must be unique in order to correctly find "
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes