ssb-sgis 0.3.7__py3-none-any.whl → 0.3.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sgis/__init__.py +6 -3
- sgis/geopandas_tools/buffer_dissolve_explode.py +13 -9
- sgis/geopandas_tools/centerlines.py +110 -47
- sgis/geopandas_tools/cleaning.py +331 -0
- sgis/geopandas_tools/conversion.py +17 -7
- sgis/geopandas_tools/duplicates.py +67 -49
- sgis/geopandas_tools/general.py +15 -1
- sgis/geopandas_tools/neighbors.py +12 -0
- sgis/geopandas_tools/overlay.py +26 -17
- sgis/geopandas_tools/polygon_operations.py +281 -100
- sgis/geopandas_tools/polygons_as_rings.py +72 -10
- sgis/geopandas_tools/sfilter.py +8 -8
- sgis/helpers.py +20 -3
- sgis/io/{dapla.py → dapla_functions.py} +28 -6
- sgis/io/write_municipality_data.py +13 -7
- sgis/maps/examine.py +10 -7
- sgis/maps/explore.py +102 -25
- sgis/maps/map.py +32 -6
- sgis/maps/maps.py +40 -58
- sgis/maps/tilesources.py +61 -0
- sgis/networkanalysis/closing_network_holes.py +89 -62
- sgis/networkanalysis/cutting_lines.py +11 -5
- sgis/networkanalysis/finding_isolated_networks.py +1 -1
- sgis/networkanalysis/nodes.py +1 -1
- sgis/networkanalysis/traveling_salesman.py +8 -4
- sgis/parallel/parallel.py +66 -12
- sgis/raster/raster.py +29 -27
- {ssb_sgis-0.3.7.dist-info → ssb_sgis-0.3.9.dist-info}/METADATA +6 -3
- ssb_sgis-0.3.9.dist-info/RECORD +59 -0
- {ssb_sgis-0.3.7.dist-info → ssb_sgis-0.3.9.dist-info}/WHEEL +1 -1
- sgis/geopandas_tools/snap_polygons.py +0 -0
- ssb_sgis-0.3.7.dist-info/RECORD +0 -58
- {ssb_sgis-0.3.7.dist-info → ssb_sgis-0.3.9.dist-info}/LICENSE +0 -0
sgis/geopandas_tools/overlay.py
CHANGED
|
@@ -28,6 +28,11 @@ from .general import clean_geoms
|
|
|
28
28
|
from .geometry_types import get_geom_type, make_all_singlepart, to_single_geom_type
|
|
29
29
|
|
|
30
30
|
|
|
31
|
+
DEFAULT_GRID_SIZE = None
|
|
32
|
+
DEFAULT_LSUFFIX = "_1"
|
|
33
|
+
DEFAULT_RSUFFIX = "_2"
|
|
34
|
+
|
|
35
|
+
|
|
31
36
|
def clean_overlay(
|
|
32
37
|
df1: GeoDataFrame,
|
|
33
38
|
df2: GeoDataFrame,
|
|
@@ -35,8 +40,8 @@ def clean_overlay(
|
|
|
35
40
|
keep_geom_type: bool = True,
|
|
36
41
|
geom_type: str | None = None,
|
|
37
42
|
grid_size: float | None = None,
|
|
38
|
-
lsuffix: str =
|
|
39
|
-
rsuffix: str =
|
|
43
|
+
lsuffix: str = DEFAULT_LSUFFIX,
|
|
44
|
+
rsuffix: str = DEFAULT_RSUFFIX,
|
|
40
45
|
) -> GeoDataFrame:
|
|
41
46
|
"""Fixes and explodes geometries before doing a shapely overlay, then cleans up.
|
|
42
47
|
|
|
@@ -132,18 +137,22 @@ def clean_overlay(
|
|
|
132
137
|
df1 = DataFrame(df1).reset_index(drop=True)
|
|
133
138
|
df2 = DataFrame(df2).reset_index(drop=True)
|
|
134
139
|
|
|
135
|
-
overlayed =
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
140
|
+
overlayed = (
|
|
141
|
+
gpd.GeoDataFrame(
|
|
142
|
+
_shapely_pd_overlay(
|
|
143
|
+
df1,
|
|
144
|
+
df2,
|
|
145
|
+
how=how,
|
|
146
|
+
grid_size=grid_size,
|
|
147
|
+
lsuffix=lsuffix,
|
|
148
|
+
rsuffix=rsuffix,
|
|
149
|
+
),
|
|
150
|
+
geometry="geometry",
|
|
151
|
+
crs=crs,
|
|
152
|
+
)
|
|
153
|
+
.pipe(clean_geoms)
|
|
154
|
+
.pipe(make_all_singlepart, ignore_index=True)
|
|
155
|
+
)
|
|
147
156
|
|
|
148
157
|
if keep_geom_type:
|
|
149
158
|
overlayed = to_single_geom_type(overlayed, geom_type)
|
|
@@ -200,9 +209,9 @@ def _shapely_pd_overlay(
|
|
|
200
209
|
df1: DataFrame,
|
|
201
210
|
df2: DataFrame,
|
|
202
211
|
how: str,
|
|
203
|
-
grid_size: float,
|
|
204
|
-
lsuffix,
|
|
205
|
-
rsuffix,
|
|
212
|
+
grid_size: float = DEFAULT_GRID_SIZE,
|
|
213
|
+
lsuffix=DEFAULT_LSUFFIX,
|
|
214
|
+
rsuffix=DEFAULT_RSUFFIX,
|
|
206
215
|
) -> DataFrame:
|
|
207
216
|
if not grid_size and not len(df1) or not len(df2):
|
|
208
217
|
return _no_intersections_return(df1, df2, how, lsuffix, rsuffix)
|
|
@@ -3,8 +3,10 @@
|
|
|
3
3
|
import networkx as nx
|
|
4
4
|
import numpy as np
|
|
5
5
|
import pandas as pd
|
|
6
|
+
import shapely
|
|
6
7
|
from geopandas import GeoDataFrame, GeoSeries
|
|
7
8
|
from shapely import (
|
|
9
|
+
STRtree,
|
|
8
10
|
area,
|
|
9
11
|
box,
|
|
10
12
|
buffer,
|
|
@@ -20,6 +22,7 @@ from shapely import (
|
|
|
20
22
|
)
|
|
21
23
|
from shapely.errors import GEOSException
|
|
22
24
|
|
|
25
|
+
from .duplicates import get_intersections
|
|
23
26
|
from .general import _push_geom_col, clean_geoms, get_grouped_centroids, to_lines
|
|
24
27
|
from .geometry_types import get_geom_type, make_all_singlepart, to_single_geom_type
|
|
25
28
|
from .neighbors import get_neighbor_indices
|
|
@@ -152,20 +155,7 @@ def get_polygon_clusters(
|
|
|
152
155
|
if not len(concated):
|
|
153
156
|
return concated.drop("i__", axis=1).assign(**{cluster_col: []})
|
|
154
157
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
edges = [(source, target) for source, target in neighbors.items()]
|
|
158
|
-
|
|
159
|
-
graph = nx.Graph()
|
|
160
|
-
graph.add_edges_from(edges)
|
|
161
|
-
|
|
162
|
-
component_mapper = {
|
|
163
|
-
j: i
|
|
164
|
-
for i, component in enumerate(nx.connected_components(graph))
|
|
165
|
-
for j in component
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
concated[cluster_col] = component_mapper
|
|
158
|
+
concated[cluster_col] = get_cluster_mapper(concated, predicate)
|
|
169
159
|
|
|
170
160
|
if as_string:
|
|
171
161
|
concated[cluster_col] = get_grouped_centroids(concated, groupby=cluster_col)
|
|
@@ -188,15 +178,33 @@ def get_polygon_clusters(
|
|
|
188
178
|
return unconcated
|
|
189
179
|
|
|
190
180
|
|
|
181
|
+
def get_cluster_mapper(gdf, predicate="intersects"):
|
|
182
|
+
if not gdf.index.is_unique:
|
|
183
|
+
raise ValueError("Index must be unique")
|
|
184
|
+
neighbors = get_neighbor_indices(gdf, gdf, predicate=predicate)
|
|
185
|
+
|
|
186
|
+
edges = [(source, target) for source, target in neighbors.items()]
|
|
187
|
+
|
|
188
|
+
graph = nx.Graph()
|
|
189
|
+
graph.add_edges_from(edges)
|
|
190
|
+
|
|
191
|
+
return {
|
|
192
|
+
j: i
|
|
193
|
+
for i, component in enumerate(nx.connected_components(graph))
|
|
194
|
+
for j in component
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
|
|
191
198
|
def eliminate_by_longest(
|
|
192
|
-
gdf: GeoDataFrame,
|
|
199
|
+
gdf: GeoDataFrame | list[GeoDataFrame],
|
|
193
200
|
to_eliminate: GeoDataFrame,
|
|
194
201
|
*,
|
|
195
202
|
remove_isolated: bool = False,
|
|
203
|
+
fix_double: bool = True,
|
|
196
204
|
ignore_index: bool = False,
|
|
197
205
|
aggfunc: str | dict | list | None = None,
|
|
198
206
|
**kwargs,
|
|
199
|
-
) -> GeoDataFrame:
|
|
207
|
+
) -> GeoDataFrame | tuple[GeoDataFrame]:
|
|
200
208
|
"""Dissolves selected polygons with the longest bordering neighbor polygon.
|
|
201
209
|
|
|
202
210
|
Eliminates selected geometries by dissolving them with the neighboring
|
|
@@ -206,11 +214,14 @@ def eliminate_by_longest(
|
|
|
206
214
|
Note that this might be a lot slower than eliminate_by_largest.
|
|
207
215
|
|
|
208
216
|
Args:
|
|
209
|
-
gdf: GeoDataFrame with polygon geometries.
|
|
217
|
+
gdf: GeoDataFrame with polygon geometries, or a list of GeoDataFrames.
|
|
210
218
|
to_eliminate: The geometries to be eliminated by 'gdf'.
|
|
211
219
|
remove_isolated: If False (default), polygons in 'to_eliminate' that share
|
|
212
220
|
no border with any polygon in 'gdf' will be kept. If True, the isolated
|
|
213
221
|
polygons will be removed.
|
|
222
|
+
fix_double: If True, geometries to be eliminated will be erased by overlapping
|
|
223
|
+
geometries to not get double surfaces if the geometries in 'to_eliminate'
|
|
224
|
+
overlaps with multiple geometries in 'gdf'.
|
|
214
225
|
ignore_index: If False (default), the resulting GeoDataFrame will keep the
|
|
215
226
|
index of the large polygons. If True, the resulting axis will be labeled
|
|
216
227
|
0, 1, …, n - 1.
|
|
@@ -223,8 +234,17 @@ def eliminate_by_longest(
|
|
|
223
234
|
kwargs: Keyword arguments passed to the dissolve method.
|
|
224
235
|
|
|
225
236
|
Returns:
|
|
226
|
-
The GeoDataFrame with the
|
|
237
|
+
The GeoDataFrame (gdf) with the geometries of 'to_eliminate' dissolved in.
|
|
238
|
+
If multiple GeoDataFrame are passed as 'gdf', they are returned as a tuple.
|
|
227
239
|
"""
|
|
240
|
+
if isinstance(gdf, (list, tuple)):
|
|
241
|
+
# concat, then break up the dataframes in the end
|
|
242
|
+
was_multiple_gdfs = True
|
|
243
|
+
original_cols = [df.columns for df in gdf]
|
|
244
|
+
gdf = pd.concat(df.assign(**{"_df_idx": i}) for i, df in enumerate(gdf))
|
|
245
|
+
else:
|
|
246
|
+
was_multiple_gdfs = False
|
|
247
|
+
|
|
228
248
|
crs = gdf.crs
|
|
229
249
|
geom_type = get_geom_type(gdf)
|
|
230
250
|
|
|
@@ -234,70 +254,95 @@ def eliminate_by_longest(
|
|
|
234
254
|
|
|
235
255
|
gdf = gdf.reset_index(drop=True)
|
|
236
256
|
|
|
237
|
-
gdf["
|
|
238
|
-
to_eliminate = to_eliminate.assign(
|
|
257
|
+
gdf["_dissolve_idx"] = gdf.index
|
|
258
|
+
to_eliminate = to_eliminate.assign(_eliminate_idx=lambda x: range(len(x)))
|
|
239
259
|
|
|
240
260
|
# convert to lines to get the borders
|
|
241
|
-
|
|
242
|
-
lines_eliminate = to_lines(to_eliminate[["eliminate_idx", "geometry"]], copy=False)
|
|
261
|
+
lines_eliminate = to_lines(to_eliminate[["_eliminate_idx", "geometry"]])
|
|
243
262
|
|
|
244
|
-
borders =
|
|
245
|
-
|
|
246
|
-
|
|
263
|
+
borders = (
|
|
264
|
+
gdf[["_dissolve_idx", "geometry"]]
|
|
265
|
+
.overlay(lines_eliminate, keep_geom_type=False)
|
|
266
|
+
.loc[lambda x: x["_eliminate_idx"].notna()]
|
|
267
|
+
)
|
|
247
268
|
|
|
248
269
|
borders["_length"] = borders.length
|
|
249
270
|
|
|
250
271
|
# as DataFrame because GeoDataFrame constructor is expensive
|
|
251
272
|
borders = pd.DataFrame(borders)
|
|
252
|
-
gdf = pd.DataFrame(gdf)
|
|
253
273
|
|
|
254
274
|
longest_border = borders.sort_values("_length", ascending=False).drop_duplicates(
|
|
255
|
-
"
|
|
275
|
+
"_eliminate_idx"
|
|
256
276
|
)
|
|
257
277
|
|
|
258
|
-
|
|
259
|
-
to_eliminate["_dissolve_idx"] = to_eliminate["
|
|
278
|
+
to_dissolve_idx = longest_border.set_index("_eliminate_idx")["_dissolve_idx"]
|
|
279
|
+
to_eliminate["_dissolve_idx"] = to_eliminate["_eliminate_idx"].map(to_dissolve_idx)
|
|
260
280
|
|
|
261
|
-
|
|
281
|
+
actually_eliminate = to_eliminate.loc[to_eliminate["_dissolve_idx"].notna()]
|
|
262
282
|
|
|
263
|
-
|
|
283
|
+
isolated = to_eliminate.loc[to_eliminate["_dissolve_idx"].isna()]
|
|
284
|
+
containing_eliminators = (
|
|
285
|
+
pd.DataFrame(
|
|
286
|
+
isolated.drop(columns="_dissolve_idx").sjoin(
|
|
287
|
+
gdf[["_dissolve_idx", "geometry"]], predicate="contains"
|
|
288
|
+
)
|
|
289
|
+
)
|
|
290
|
+
.drop(columns="index_right")
|
|
291
|
+
.drop_duplicates("_eliminate_idx")
|
|
292
|
+
)
|
|
264
293
|
|
|
265
|
-
eliminated = _eliminate(
|
|
294
|
+
eliminated = _eliminate(
|
|
295
|
+
pd.DataFrame(gdf),
|
|
296
|
+
pd.concat([actually_eliminate, containing_eliminators]),
|
|
297
|
+
aggfunc,
|
|
298
|
+
crs,
|
|
299
|
+
fix_double,
|
|
300
|
+
**kwargs,
|
|
301
|
+
)
|
|
266
302
|
|
|
267
|
-
if ignore_index:
|
|
268
|
-
eliminated = eliminated.reset_index(drop=True)
|
|
269
|
-
else:
|
|
303
|
+
if not ignore_index:
|
|
270
304
|
eliminated.index = eliminated.index.map(idx_mapper)
|
|
271
305
|
eliminated.index.name = idx_name
|
|
272
306
|
|
|
273
|
-
if not remove_isolated:
|
|
274
|
-
|
|
275
|
-
if len(isolated):
|
|
276
|
-
eliminated = pd.concat([eliminated, isolated])
|
|
307
|
+
if not remove_isolated and len(isolated):
|
|
308
|
+
eliminated = pd.concat([eliminated, isolated])
|
|
277
309
|
|
|
278
310
|
eliminated = eliminated.drop(
|
|
279
|
-
["_dissolve_idx", "_length", "
|
|
311
|
+
["_dissolve_idx", "_length", "_eliminate_idx", "_dissolve_idx"],
|
|
280
312
|
axis=1,
|
|
281
313
|
errors="ignore",
|
|
282
314
|
)
|
|
283
315
|
|
|
284
316
|
out = GeoDataFrame(eliminated, geometry="geometry", crs=crs).pipe(clean_geoms)
|
|
317
|
+
|
|
285
318
|
if geom_type != "mixed":
|
|
286
|
-
|
|
287
|
-
|
|
319
|
+
out = to_single_geom_type(out, geom_type)
|
|
320
|
+
|
|
321
|
+
out = out.reset_index(drop=True) if ignore_index else out
|
|
322
|
+
|
|
323
|
+
if not was_multiple_gdfs:
|
|
324
|
+
return out
|
|
325
|
+
|
|
326
|
+
gdfs = ()
|
|
327
|
+
for i, cols in enumerate(original_cols):
|
|
328
|
+
df = out.loc[out["_df_idx"] == i, cols]
|
|
329
|
+
gdfs += (df,)
|
|
330
|
+
|
|
331
|
+
return gdfs
|
|
288
332
|
|
|
289
333
|
|
|
290
334
|
def eliminate_by_largest(
|
|
291
|
-
gdf: GeoDataFrame,
|
|
335
|
+
gdf: GeoDataFrame | list[GeoDataFrame],
|
|
292
336
|
to_eliminate: GeoDataFrame,
|
|
293
337
|
*,
|
|
294
338
|
max_distance: int | float | None = None,
|
|
295
339
|
remove_isolated: bool = False,
|
|
340
|
+
fix_double: bool = False,
|
|
296
341
|
ignore_index: bool = False,
|
|
297
342
|
aggfunc: str | dict | list | None = None,
|
|
298
343
|
predicate: str = "intersects",
|
|
299
344
|
**kwargs,
|
|
300
|
-
) -> GeoDataFrame:
|
|
345
|
+
) -> GeoDataFrame | tuple[GeoDataFrame]:
|
|
301
346
|
"""Dissolves selected polygons with the largest neighbor polygon.
|
|
302
347
|
|
|
303
348
|
Eliminates selected geometries by dissolving them with the neighboring
|
|
@@ -305,11 +350,14 @@ def eliminate_by_largest(
|
|
|
305
350
|
large polygons will be kept, unless else is specified.
|
|
306
351
|
|
|
307
352
|
Args:
|
|
308
|
-
gdf: GeoDataFrame with polygon geometries.
|
|
353
|
+
gdf: GeoDataFrame with polygon geometries, or a list of GeoDataFrames.
|
|
309
354
|
to_eliminate: The geometries to be eliminated by 'gdf'.
|
|
310
355
|
remove_isolated: If False (default), polygons in 'to_eliminate' that share
|
|
311
356
|
no border with any polygon in 'gdf' will be kept. If True, the isolated
|
|
312
357
|
polygons will be removed.
|
|
358
|
+
fix_double: If True, geometries to be eliminated will be erased by overlapping
|
|
359
|
+
geometries to not get double surfaces if the geometries in 'to_eliminate'
|
|
360
|
+
overlaps with multiple geometries in 'gdf'.
|
|
313
361
|
ignore_index: If False (default), the resulting GeoDataFrame will keep the
|
|
314
362
|
index of the large polygons. If True, the resulting axis will be labeled
|
|
315
363
|
0, 1, …, n - 1.
|
|
@@ -323,8 +371,9 @@ def eliminate_by_largest(
|
|
|
323
371
|
kwargs: Keyword arguments passed to the dissolve method.
|
|
324
372
|
|
|
325
373
|
Returns:
|
|
326
|
-
The GeoDataFrame with the
|
|
327
|
-
'gdf'.
|
|
374
|
+
The GeoDataFrame (gdf) with the geometries of 'to_eliminate' dissolved in.
|
|
375
|
+
If multiple GeoDataFrame are passed as 'gdf', they are returned as a tuple.
|
|
376
|
+
|
|
328
377
|
"""
|
|
329
378
|
return _eliminate_by_area(
|
|
330
379
|
gdf,
|
|
@@ -335,12 +384,13 @@ def eliminate_by_largest(
|
|
|
335
384
|
sort_ascending=False,
|
|
336
385
|
aggfunc=aggfunc,
|
|
337
386
|
predicate=predicate,
|
|
387
|
+
fix_double=fix_double,
|
|
338
388
|
**kwargs,
|
|
339
389
|
)
|
|
340
390
|
|
|
341
391
|
|
|
342
392
|
def eliminate_by_smallest(
|
|
343
|
-
gdf: GeoDataFrame,
|
|
393
|
+
gdf: GeoDataFrame | list[GeoDataFrame],
|
|
344
394
|
to_eliminate: GeoDataFrame,
|
|
345
395
|
*,
|
|
346
396
|
max_distance: int | float | None = None,
|
|
@@ -348,8 +398,9 @@ def eliminate_by_smallest(
|
|
|
348
398
|
ignore_index: bool = False,
|
|
349
399
|
aggfunc: str | dict | list | None = None,
|
|
350
400
|
predicate: str = "intersects",
|
|
401
|
+
fix_double: bool = False,
|
|
351
402
|
**kwargs,
|
|
352
|
-
) -> GeoDataFrame:
|
|
403
|
+
) -> GeoDataFrame | tuple[GeoDataFrame]:
|
|
353
404
|
return _eliminate_by_area(
|
|
354
405
|
gdf,
|
|
355
406
|
to_eliminate=to_eliminate,
|
|
@@ -359,6 +410,7 @@ def eliminate_by_smallest(
|
|
|
359
410
|
sort_ascending=True,
|
|
360
411
|
aggfunc=aggfunc,
|
|
361
412
|
predicate=predicate,
|
|
413
|
+
fix_double=fix_double,
|
|
362
414
|
**kwargs,
|
|
363
415
|
)
|
|
364
416
|
|
|
@@ -372,8 +424,16 @@ def _eliminate_by_area(
|
|
|
372
424
|
ignore_index: bool = False,
|
|
373
425
|
aggfunc: str | dict | list | None = None,
|
|
374
426
|
predicate="intersects",
|
|
427
|
+
fix_double: bool = False,
|
|
375
428
|
**kwargs,
|
|
376
429
|
) -> GeoDataFrame:
|
|
430
|
+
if isinstance(gdf, (list, tuple)):
|
|
431
|
+
was_multiple_gdfs = True
|
|
432
|
+
original_cols = [df.columns for df in gdf]
|
|
433
|
+
gdf = pd.concat(df.assign(**{"_df_idx": i}) for i, df in enumerate(gdf))
|
|
434
|
+
else:
|
|
435
|
+
was_multiple_gdfs = False
|
|
436
|
+
|
|
377
437
|
crs = gdf.crs
|
|
378
438
|
geom_type = get_geom_type(gdf)
|
|
379
439
|
|
|
@@ -408,11 +468,9 @@ def _eliminate_by_area(
|
|
|
408
468
|
|
|
409
469
|
notna = joined.loc[lambda x: x["_dissolve_idx"].notna()]
|
|
410
470
|
|
|
411
|
-
eliminated = _eliminate(gdf, notna, aggfunc, crs, **kwargs)
|
|
471
|
+
eliminated = _eliminate(gdf, notna, aggfunc, crs, fix_double=fix_double, **kwargs)
|
|
412
472
|
|
|
413
|
-
if ignore_index:
|
|
414
|
-
eliminated = eliminated.reset_index(drop=True)
|
|
415
|
-
else:
|
|
473
|
+
if not ignore_index:
|
|
416
474
|
eliminated.index = eliminated.index.map(idx_mapper)
|
|
417
475
|
eliminated.index.name = idx_name
|
|
418
476
|
|
|
@@ -422,7 +480,7 @@ def _eliminate_by_area(
|
|
|
422
480
|
eliminated = pd.concat([eliminated, isolated])
|
|
423
481
|
|
|
424
482
|
eliminated = eliminated.drop(
|
|
425
|
-
["_dissolve_idx", "_area", "
|
|
483
|
+
["_dissolve_idx", "_area", "_eliminate_idx", "_dissolve_idx"],
|
|
426
484
|
axis=1,
|
|
427
485
|
errors="ignore",
|
|
428
486
|
)
|
|
@@ -430,11 +488,22 @@ def _eliminate_by_area(
|
|
|
430
488
|
out = GeoDataFrame(eliminated, geometry="geometry", crs=crs).pipe(clean_geoms)
|
|
431
489
|
|
|
432
490
|
if geom_type != "mixed":
|
|
433
|
-
|
|
434
|
-
return out
|
|
491
|
+
out = to_single_geom_type(out, geom_type)
|
|
435
492
|
|
|
493
|
+
out = out.reset_index(drop=True) if ignore_index else out
|
|
436
494
|
|
|
437
|
-
|
|
495
|
+
if not was_multiple_gdfs:
|
|
496
|
+
return out
|
|
497
|
+
|
|
498
|
+
gdfs = ()
|
|
499
|
+
for i, cols in enumerate(original_cols):
|
|
500
|
+
df = out.loc[out["_df_idx"] == i, cols]
|
|
501
|
+
gdfs += (df,)
|
|
502
|
+
|
|
503
|
+
return gdfs
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
def _eliminate(gdf, to_eliminate, aggfunc, crs, fix_double, **kwargs):
|
|
438
507
|
if not len(to_eliminate):
|
|
439
508
|
return gdf
|
|
440
509
|
|
|
@@ -442,9 +511,11 @@ def _eliminate(gdf, to_eliminate, aggfunc, crs, **kwargs):
|
|
|
442
511
|
to_dissolve = gdf.loc[in_to_eliminate]
|
|
443
512
|
not_to_dissolve = gdf.loc[~in_to_eliminate].set_index("_dissolve_idx")
|
|
444
513
|
|
|
514
|
+
to_eliminate["_to_eliminate"] = 1
|
|
515
|
+
|
|
445
516
|
if aggfunc is None:
|
|
446
517
|
concatted = pd.concat(
|
|
447
|
-
[to_dissolve, to_eliminate[["_dissolve_idx", "geometry"]]]
|
|
518
|
+
[to_dissolve, to_eliminate[["_to_eliminate", "_dissolve_idx", "geometry"]]]
|
|
448
519
|
)
|
|
449
520
|
aggfunc = "first"
|
|
450
521
|
else:
|
|
@@ -464,6 +535,7 @@ def _eliminate(gdf, to_eliminate, aggfunc, crs, **kwargs):
|
|
|
464
535
|
if not len(many_hits):
|
|
465
536
|
return one_hit
|
|
466
537
|
|
|
538
|
+
# aggregate all columns except geometry
|
|
467
539
|
kwargs.pop("as_index", None)
|
|
468
540
|
eliminated = (
|
|
469
541
|
many_hits.drop(columns="geometry")
|
|
@@ -472,11 +544,145 @@ def _eliminate(gdf, to_eliminate, aggfunc, crs, **kwargs):
|
|
|
472
544
|
.drop(["_area"], axis=1, errors="ignore")
|
|
473
545
|
)
|
|
474
546
|
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
547
|
+
# aggregate geometry
|
|
548
|
+
if fix_double:
|
|
549
|
+
assert eliminated.index.is_unique
|
|
550
|
+
|
|
551
|
+
many_hits = many_hits.set_index("_dissolve_idx")
|
|
552
|
+
many_hits["_row_idx"] = range(len(many_hits))
|
|
553
|
+
|
|
554
|
+
# TODO kan dette fikses trygt med .duplicated og ~x.duplicated?
|
|
555
|
+
eliminators: pd.Series = many_hits.loc[
|
|
556
|
+
many_hits["_to_eliminate"] != 1, "geometry"
|
|
557
|
+
]
|
|
558
|
+
to_be_eliminated = many_hits.loc[many_hits["_to_eliminate"] == 1]
|
|
559
|
+
|
|
560
|
+
if 0:
|
|
561
|
+
tree = STRtree(eliminators.values)
|
|
562
|
+
left, right = tree.query(
|
|
563
|
+
to_be_eliminated.geometry.values, predicate="intersects"
|
|
564
|
+
)
|
|
565
|
+
pairs = pd.Series(right, index=left).to_frame("right")
|
|
566
|
+
pairs["_dissolve_idx"] = pairs.index.map(
|
|
567
|
+
dict(enumerate(to_be_eliminated.index))
|
|
568
|
+
)
|
|
569
|
+
|
|
570
|
+
soon_erased = to_be_eliminated.iloc[pairs.index]
|
|
571
|
+
intersecting = eliminators.iloc[pairs["right"]]
|
|
572
|
+
|
|
573
|
+
intersecting.index = soon_erased.index
|
|
574
|
+
soon_erased = soon_erased.geometry.groupby(level=0).agg(unary_union)
|
|
575
|
+
intersecting = intersecting.groupby(level=0).agg(unary_union)
|
|
576
|
+
|
|
577
|
+
soon_erased.loc[:] = difference(
|
|
578
|
+
soon_erased.values,
|
|
579
|
+
intersecting.values,
|
|
580
|
+
)
|
|
581
|
+
intersecting.loc[:] = difference(
|
|
582
|
+
intersecting.values,
|
|
583
|
+
soon_erased.values,
|
|
584
|
+
)
|
|
585
|
+
|
|
586
|
+
eliminated["geometry"] = (
|
|
587
|
+
pd.concat([intersecting, soon_erased])
|
|
588
|
+
.groupby(level=0)
|
|
589
|
+
.agg(lambda x: make_valid(unary_union(x.dropna().values)))
|
|
590
|
+
)
|
|
591
|
+
from ..maps.maps import explore, explore_locals
|
|
592
|
+
|
|
593
|
+
explore_locals()
|
|
594
|
+
|
|
595
|
+
# all_geoms: pd.Series = gdf.set_index("_dissolve_idx").geometry
|
|
596
|
+
all_geoms: pd.Series = gdf.geometry
|
|
597
|
+
|
|
598
|
+
tree = STRtree(all_geoms.values)
|
|
599
|
+
left, right = tree.query(
|
|
600
|
+
to_be_eliminated.geometry.values, predicate="intersects"
|
|
601
|
+
)
|
|
602
|
+
pairs = pd.Series(right, index=left).to_frame("right")
|
|
603
|
+
pairs["_dissolve_idx"] = pairs.index.map(
|
|
604
|
+
dict(enumerate(to_be_eliminated.index))
|
|
605
|
+
)
|
|
606
|
+
|
|
607
|
+
# pairs = pairs.loc[lambda x: x["right"] != x["_dissolve_idx"]]
|
|
608
|
+
|
|
609
|
+
soon_erased = to_be_eliminated.iloc[pairs.index]
|
|
610
|
+
intersecting = all_geoms.iloc[pairs["right"]]
|
|
611
|
+
|
|
612
|
+
shoud_not_erase = soon_erased.index != intersecting.index
|
|
613
|
+
soon_erased = soon_erased[shoud_not_erase]
|
|
614
|
+
intersecting = intersecting[shoud_not_erase]
|
|
615
|
+
|
|
616
|
+
missing = to_be_eliminated.loc[
|
|
617
|
+
# (~to_be_eliminated.index.isin(soon_erased.index))
|
|
618
|
+
# |
|
|
619
|
+
(~to_be_eliminated["_row_idx"].isin(soon_erased["_row_idx"])),
|
|
620
|
+
# | (~to_be_eliminated["_row_idx"].isin(soon_erased.index)),
|
|
621
|
+
"geometry",
|
|
622
|
+
]
|
|
623
|
+
|
|
624
|
+
if 0:
|
|
625
|
+
from ..geopandas_tools.conversion import to_gdf
|
|
626
|
+
from ..maps.maps import explore, explore_locals
|
|
627
|
+
|
|
628
|
+
display(pairs)
|
|
629
|
+
display(soon_erased.index.unique())
|
|
630
|
+
display(soon_erased._row_idx.unique())
|
|
631
|
+
display(to_be_eliminated.index.unique())
|
|
632
|
+
display(to_be_eliminated._row_idx.unique())
|
|
633
|
+
display(missing.index.unique())
|
|
634
|
+
|
|
635
|
+
display(soon_erased)
|
|
636
|
+
display(to_be_eliminated)
|
|
637
|
+
display(missing)
|
|
478
638
|
|
|
479
|
-
|
|
639
|
+
explore(
|
|
640
|
+
to_gdf(soon_erased, 25833), intersecting=to_gdf(intersecting, 25833)
|
|
641
|
+
)
|
|
642
|
+
for j, ((i, g), (i2, g2)) in enumerate(
|
|
643
|
+
zip(intersecting.items(), soon_erased.geometry.items())
|
|
644
|
+
):
|
|
645
|
+
explore(
|
|
646
|
+
to_gdf(g, 25833).assign(ii=i, j=j),
|
|
647
|
+
g2=to_gdf(g2, 25833).assign(ii=i2, j=j),
|
|
648
|
+
)
|
|
649
|
+
|
|
650
|
+
if 0:
|
|
651
|
+
explore(to_gdf(to_be_eliminated.iloc[[16]]))
|
|
652
|
+
explore(to_gdf(to_be_eliminated.iloc[[15]]))
|
|
653
|
+
explore(to_gdf(to_be_eliminated.iloc[[0]]))
|
|
654
|
+
print("hei")
|
|
655
|
+
explore(to_gdf(soon_erased.loc[soon_erased.index == 16]))
|
|
656
|
+
explore(to_gdf(soon_erased.loc[soon_erased.index == 36]))
|
|
657
|
+
|
|
658
|
+
explore(to_gdf(soon_erased.loc[soon_erased._row_idx == 16]))
|
|
659
|
+
explore(to_gdf(soon_erased.loc[soon_erased._row_idx == 36]))
|
|
660
|
+
|
|
661
|
+
# allign and aggregate by dissolve index to not get duplicates in difference
|
|
662
|
+
intersecting.index = soon_erased.index
|
|
663
|
+
soon_erased = soon_erased.geometry.groupby(level=0).agg(unary_union)
|
|
664
|
+
intersecting = intersecting.groupby(level=0).agg(unary_union)
|
|
665
|
+
|
|
666
|
+
# from ..maps.maps import explore_locals
|
|
667
|
+
# explore_locals()
|
|
668
|
+
|
|
669
|
+
soon_erased.loc[:] = difference(
|
|
670
|
+
soon_erased.values,
|
|
671
|
+
intersecting.values,
|
|
672
|
+
)
|
|
673
|
+
|
|
674
|
+
eliminated["geometry"] = (
|
|
675
|
+
pd.concat([eliminators, soon_erased, missing])
|
|
676
|
+
.groupby(level=0)
|
|
677
|
+
.agg(lambda x: make_valid(unary_union(x.dropna().values)))
|
|
678
|
+
)
|
|
679
|
+
|
|
680
|
+
else:
|
|
681
|
+
eliminated["geometry"] = many_hits.groupby("_dissolve_idx")["geometry"].agg(
|
|
682
|
+
lambda x: make_valid(unary_union(x.values))
|
|
683
|
+
)
|
|
684
|
+
|
|
685
|
+
# setting crs on the GeometryArrays to avoid warning in concat
|
|
480
686
|
not_to_dissolve.geometry.values.crs = crs
|
|
481
687
|
eliminated.geometry.values.crs = crs
|
|
482
688
|
one_hit.geometry.values.crs = crs
|
|
@@ -485,7 +691,7 @@ def _eliminate(gdf, to_eliminate, aggfunc, crs, **kwargs):
|
|
|
485
691
|
|
|
486
692
|
assert all(df.index.name == "_dissolve_idx" for df in to_concat)
|
|
487
693
|
|
|
488
|
-
return pd.concat(to_concat).sort_index()
|
|
694
|
+
return pd.concat(to_concat).sort_index().drop(columns="_to_eliminate")
|
|
489
695
|
|
|
490
696
|
|
|
491
697
|
def close_thin_holes(gdf: GeoDataFrame, tolerance: int | float) -> GeoDataFrame:
|
|
@@ -601,33 +807,6 @@ def close_all_holes(
|
|
|
601
807
|
return gdf.map(lambda x: _close_all_holes_no_islands(x, all_geoms))
|
|
602
808
|
|
|
603
809
|
|
|
604
|
-
def _close_thin_holes(
|
|
605
|
-
gdf: GeoDataFrame | GeoSeries,
|
|
606
|
-
tolerance: int | float,
|
|
607
|
-
*,
|
|
608
|
-
ignore_islands: bool = False,
|
|
609
|
-
copy: bool = True,
|
|
610
|
-
) -> GeoDataFrame | GeoSeries:
|
|
611
|
-
holes = get_holes(gdf)
|
|
612
|
-
|
|
613
|
-
if not len(holes):
|
|
614
|
-
return gdf
|
|
615
|
-
|
|
616
|
-
if not ignore_islands:
|
|
617
|
-
inside_holes = sfilter(gdf, holes, predicate="within")
|
|
618
|
-
|
|
619
|
-
def is_thin(x):
|
|
620
|
-
return x.buffer(-tolerance).is_empty
|
|
621
|
-
|
|
622
|
-
in_between = clean_overlay(
|
|
623
|
-
holes, inside_holes, how="difference", grid_size=None
|
|
624
|
-
).loc[is_thin]
|
|
625
|
-
|
|
626
|
-
holes = pd.concat([holes, in_between])
|
|
627
|
-
|
|
628
|
-
thin_holes = holes.loc[is_thin]
|
|
629
|
-
|
|
630
|
-
|
|
631
810
|
def close_small_holes(
|
|
632
811
|
gdf: GeoDataFrame | GeoSeries,
|
|
633
812
|
max_area: int | float,
|
|
@@ -802,6 +981,9 @@ def get_gaps(gdf: GeoDataFrame, include_interiors: bool = False) -> GeoDataFrame
|
|
|
802
981
|
include_interiors: If False (default), the holes inside individual polygons
|
|
803
982
|
will not be included as gaps.
|
|
804
983
|
|
|
984
|
+
Note:
|
|
985
|
+
See get_holes to find holes inside singlepart polygons.
|
|
986
|
+
|
|
805
987
|
Returns:
|
|
806
988
|
GeoDataFrame of polygons with only a geometry column.
|
|
807
989
|
"""
|
|
@@ -815,14 +997,14 @@ def get_gaps(gdf: GeoDataFrame, include_interiors: bool = False) -> GeoDataFrame
|
|
|
815
997
|
{"geometry": [box(*tuple(gdf.total_bounds)).buffer(1)]}, crs=gdf.crs
|
|
816
998
|
)
|
|
817
999
|
|
|
818
|
-
|
|
1000
|
+
bbox_diff = make_all_singlepart(
|
|
819
1001
|
clean_overlay(bbox, gdf, how="difference", geom_type="polygon")
|
|
820
1002
|
)
|
|
821
1003
|
|
|
822
1004
|
# remove the outer "gap", i.e. the surrounding area
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
)
|
|
1005
|
+
bbox_ring = get_exterior_ring(bbox.geometry.values)
|
|
1006
|
+
without_outer_ring = sfilter_inverse(bbox_diff, bbox_ring)
|
|
1007
|
+
return without_outer_ring.reset_index(drop=True)
|
|
826
1008
|
|
|
827
1009
|
|
|
828
1010
|
def get_holes(gdf: GeoDataFrame, as_polygons=True) -> GeoDataFrame:
|
|
@@ -833,22 +1015,21 @@ def get_holes(gdf: GeoDataFrame, as_polygons=True) -> GeoDataFrame:
|
|
|
833
1015
|
as_polygons: If True (default), the holes will be returned as polygons.
|
|
834
1016
|
If False, they will be returned as LinearRings.
|
|
835
1017
|
|
|
1018
|
+
Note:
|
|
1019
|
+
See get_gaps to find holes/gaps between undissolved polygons.
|
|
1020
|
+
|
|
836
1021
|
Returns:
|
|
837
1022
|
GeoDataFrame of polygons or linearrings with only a geometry column.
|
|
838
1023
|
"""
|
|
839
1024
|
if not len(gdf):
|
|
840
|
-
return GeoDataFrame({"geometry": []}, crs=gdf.crs)
|
|
1025
|
+
return GeoDataFrame({"geometry": []}, index=gdf.index, crs=gdf.crs)
|
|
841
1026
|
|
|
842
1027
|
def as_linearring(x):
|
|
843
1028
|
return x
|
|
844
1029
|
|
|
845
1030
|
astype = polygons if as_polygons else as_linearring
|
|
846
1031
|
|
|
847
|
-
geoms = (
|
|
848
|
-
make_all_singlepart(gdf.geometry).to_numpy()
|
|
849
|
-
if isinstance(gdf, GeoDataFrame)
|
|
850
|
-
else make_all_singlepart(gdf).to_numpy()
|
|
851
|
-
)
|
|
1032
|
+
geoms = make_all_singlepart(gdf.geometry).to_numpy()
|
|
852
1033
|
|
|
853
1034
|
rings = [
|
|
854
1035
|
GeoSeries(astype(get_interior_ring(geoms, i)), crs=gdf.crs)
|