ssb-sgis 1.0.3__py3-none-any.whl → 1.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sgis/__init__.py +10 -3
- sgis/debug_config.py +24 -0
- sgis/geopandas_tools/bounds.py +16 -21
- sgis/geopandas_tools/buffer_dissolve_explode.py +112 -30
- sgis/geopandas_tools/centerlines.py +4 -91
- sgis/geopandas_tools/cleaning.py +1576 -583
- sgis/geopandas_tools/conversion.py +24 -14
- sgis/geopandas_tools/duplicates.py +27 -6
- sgis/geopandas_tools/general.py +259 -100
- sgis/geopandas_tools/geometry_types.py +1 -1
- sgis/geopandas_tools/neighbors.py +16 -12
- sgis/geopandas_tools/overlay.py +7 -3
- sgis/geopandas_tools/point_operations.py +3 -3
- sgis/geopandas_tools/polygon_operations.py +505 -100
- sgis/geopandas_tools/polygons_as_rings.py +40 -8
- sgis/geopandas_tools/sfilter.py +26 -9
- sgis/io/dapla_functions.py +238 -19
- sgis/maps/examine.py +11 -10
- sgis/maps/explore.py +227 -155
- sgis/maps/legend.py +13 -4
- sgis/maps/map.py +22 -13
- sgis/maps/maps.py +100 -29
- sgis/maps/thematicmap.py +25 -18
- sgis/networkanalysis/_service_area.py +6 -1
- sgis/networkanalysis/cutting_lines.py +12 -5
- sgis/networkanalysis/finding_isolated_networks.py +13 -6
- sgis/networkanalysis/networkanalysis.py +10 -12
- sgis/parallel/parallel.py +27 -10
- sgis/raster/base.py +208 -0
- sgis/raster/cube.py +3 -3
- sgis/raster/image_collection.py +1421 -724
- sgis/raster/indices.py +10 -7
- sgis/raster/raster.py +7 -7
- sgis/raster/sentinel_config.py +33 -17
- {ssb_sgis-1.0.3.dist-info → ssb_sgis-1.0.5.dist-info}/METADATA +6 -7
- ssb_sgis-1.0.5.dist-info/RECORD +62 -0
- ssb_sgis-1.0.3.dist-info/RECORD +0 -61
- {ssb_sgis-1.0.3.dist-info → ssb_sgis-1.0.5.dist-info}/LICENSE +0 -0
- {ssb_sgis-1.0.3.dist-info → ssb_sgis-1.0.5.dist-info}/WHEEL +0 -0
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
"""Functions for polygon geometries."""
|
|
2
2
|
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
|
|
3
5
|
import networkx as nx
|
|
4
6
|
import numpy as np
|
|
5
7
|
import pandas as pd
|
|
@@ -10,21 +12,35 @@ from shapely import area
|
|
|
10
12
|
from shapely import box
|
|
11
13
|
from shapely import buffer
|
|
12
14
|
from shapely import difference
|
|
15
|
+
from shapely import extract_unique_points
|
|
13
16
|
from shapely import get_exterior_ring
|
|
14
17
|
from shapely import get_interior_ring
|
|
18
|
+
from shapely import get_num_geometries
|
|
15
19
|
from shapely import get_num_interior_rings
|
|
16
20
|
from shapely import get_parts
|
|
17
21
|
from shapely import is_empty
|
|
18
22
|
from shapely import make_valid
|
|
19
23
|
from shapely import polygons
|
|
20
|
-
from shapely import
|
|
24
|
+
from shapely import union_all
|
|
21
25
|
from shapely.errors import GEOSException
|
|
22
|
-
|
|
26
|
+
from shapely.geometry import LinearRing
|
|
27
|
+
from shapely.ops import SplitOp
|
|
28
|
+
|
|
29
|
+
from ..debug_config import _DEBUG_CONFIG
|
|
30
|
+
from ..debug_config import _try_debug_print
|
|
31
|
+
from ..maps.maps import explore_locals
|
|
32
|
+
from .conversion import to_gdf
|
|
33
|
+
from .conversion import to_geoseries
|
|
34
|
+
from .duplicates import _get_intersecting_geometries
|
|
35
|
+
from .general import _grouped_unary_union
|
|
23
36
|
from .general import _parallel_unary_union
|
|
24
37
|
from .general import _parallel_unary_union_geoseries
|
|
25
38
|
from .general import _push_geom_col
|
|
39
|
+
from .general import _unary_union_for_notna
|
|
26
40
|
from .general import clean_geoms
|
|
41
|
+
from .general import extend_lines
|
|
27
42
|
from .general import get_grouped_centroids
|
|
43
|
+
from .general import get_line_segments
|
|
28
44
|
from .general import to_lines
|
|
29
45
|
from .geometry_types import get_geom_type
|
|
30
46
|
from .geometry_types import make_all_singlepart
|
|
@@ -36,6 +52,9 @@ from .polygons_as_rings import PolygonsAsRings
|
|
|
36
52
|
from .sfilter import sfilter
|
|
37
53
|
from .sfilter import sfilter_inverse
|
|
38
54
|
|
|
55
|
+
PRECISION = 1e-3
|
|
56
|
+
_BUFFER = False
|
|
57
|
+
|
|
39
58
|
|
|
40
59
|
def get_polygon_clusters(
|
|
41
60
|
*gdfs: GeoDataFrame | GeoSeries,
|
|
@@ -204,17 +223,16 @@ def get_cluster_mapper(
|
|
|
204
223
|
|
|
205
224
|
|
|
206
225
|
def eliminate_by_longest(
|
|
207
|
-
gdf: GeoDataFrame |
|
|
226
|
+
gdf: GeoDataFrame | tuple[GeoDataFrame],
|
|
208
227
|
to_eliminate: GeoDataFrame,
|
|
209
228
|
*,
|
|
210
|
-
remove_isolated: bool = False,
|
|
211
229
|
fix_double: bool = True,
|
|
212
230
|
ignore_index: bool = False,
|
|
213
231
|
aggfunc: str | dict | list | None = None,
|
|
214
232
|
grid_size=None,
|
|
215
233
|
n_jobs: int = 1,
|
|
216
234
|
**kwargs,
|
|
217
|
-
) ->
|
|
235
|
+
) -> tuple[GeoDataFrame]:
|
|
218
236
|
"""Dissolves selected polygons with the longest bordering neighbor polygon.
|
|
219
237
|
|
|
220
238
|
Eliminates selected geometries by dissolving them with the neighboring
|
|
@@ -226,9 +244,6 @@ def eliminate_by_longest(
|
|
|
226
244
|
Args:
|
|
227
245
|
gdf: GeoDataFrame with polygon geometries, or a list of GeoDataFrames.
|
|
228
246
|
to_eliminate: The geometries to be eliminated by 'gdf'.
|
|
229
|
-
remove_isolated: If False (default), polygons in 'to_eliminate' that share
|
|
230
|
-
no border with any polygon in 'gdf' will be kept. If True, the isolated
|
|
231
|
-
polygons will be removed.
|
|
232
247
|
fix_double: If True, geometries to be eliminated will be erased by overlapping
|
|
233
248
|
geometries to not get double surfaces if the geometries in 'to_eliminate'
|
|
234
249
|
overlaps with multiple geometries in 'gdf'.
|
|
@@ -246,8 +261,11 @@ def eliminate_by_longest(
|
|
|
246
261
|
**kwargs: Keyword arguments passed to the dissolve method.
|
|
247
262
|
|
|
248
263
|
Returns:
|
|
249
|
-
|
|
250
|
-
|
|
264
|
+
A tuple of the GeoDataFrame with the geometries of 'to_eliminate'
|
|
265
|
+
dissolved in and a GeoDataFrame with the potentionally isolated
|
|
266
|
+
polygons that could not be eliminated. If multiple GeoDataFrame
|
|
267
|
+
are passed as 'gdf', the returned tuple will contain each frame
|
|
268
|
+
plus the isolated polygons as the last item.
|
|
251
269
|
|
|
252
270
|
Examples:
|
|
253
271
|
---------
|
|
@@ -264,19 +282,21 @@ def eliminate_by_longest(
|
|
|
264
282
|
Using multiple GeoDataFrame as input, the sliver is eliminated into the small
|
|
265
283
|
polygon (because it has the longest border with sliver).
|
|
266
284
|
|
|
267
|
-
>>> small_poly_eliminated, large_poly_eliminated = sg.eliminate_by_longest(
|
|
285
|
+
>>> small_poly_eliminated, large_poly_eliminated, isolated = sg.eliminate_by_longest(
|
|
268
286
|
... [small_poly, large_poly], sliver
|
|
269
287
|
... )
|
|
270
288
|
|
|
271
289
|
With only one input GeoDataFrame:
|
|
272
290
|
|
|
273
291
|
>>> polys = pd.concat([small_poly, large_poly])
|
|
274
|
-
>>> eliminated = sg.eliminate_by_longest(polys, sliver)
|
|
275
|
-
|
|
276
|
-
|
|
292
|
+
>>> eliminated, isolated = sg.eliminate_by_longest(polys, sliver)
|
|
277
293
|
"""
|
|
278
|
-
|
|
279
|
-
|
|
294
|
+
_recurse = kwargs.pop("_recurse", False)
|
|
295
|
+
|
|
296
|
+
if not len(to_eliminate) or not len(gdf):
|
|
297
|
+
if isinstance(gdf, (list, tuple)):
|
|
298
|
+
return (*gdf, to_eliminate)
|
|
299
|
+
return gdf, to_eliminate
|
|
280
300
|
|
|
281
301
|
if isinstance(gdf, (list, tuple)):
|
|
282
302
|
# concat, then break up the dataframes in the end
|
|
@@ -295,6 +315,28 @@ def eliminate_by_longest(
|
|
|
295
315
|
|
|
296
316
|
gdf = gdf.reset_index(drop=True)
|
|
297
317
|
|
|
318
|
+
# TODO: is it ok to singlepart here?
|
|
319
|
+
gdf = make_all_singlepart(gdf, ignore_index=True).pipe(
|
|
320
|
+
to_single_geom_type, "polygon"
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
if _BUFFER:
|
|
324
|
+
gdf.geometry = gdf.buffer(
|
|
325
|
+
PRECISION,
|
|
326
|
+
resolution=1,
|
|
327
|
+
join_style=2,
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
to_eliminate.geometry = to_eliminate.buffer(
|
|
331
|
+
PRECISION,
|
|
332
|
+
resolution=1,
|
|
333
|
+
join_style=2,
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
# more_than_one = get_num_geometries(to_eliminate.geometry.values) > 1
|
|
337
|
+
# filt = more_than_one, to_eliminate._geometry_column_name
|
|
338
|
+
# to_eliminate.loc[*filt] = to_eliminate.loc[*filt].apply(_unary_union_for_notna)
|
|
339
|
+
|
|
298
340
|
gdf["_dissolve_idx"] = gdf.index
|
|
299
341
|
to_eliminate = to_eliminate.assign(_eliminate_idx=lambda x: range(len(x)))
|
|
300
342
|
|
|
@@ -333,6 +375,11 @@ def eliminate_by_longest(
|
|
|
333
375
|
.drop(columns="index_right")
|
|
334
376
|
.drop_duplicates("_eliminate_idx")
|
|
335
377
|
)
|
|
378
|
+
isolated = isolated.drop(
|
|
379
|
+
["_dissolve_idx", "_length", "_eliminate_idx"],
|
|
380
|
+
axis=1,
|
|
381
|
+
errors="ignore",
|
|
382
|
+
)
|
|
336
383
|
|
|
337
384
|
eliminated = _eliminate(
|
|
338
385
|
pd.DataFrame(gdf),
|
|
@@ -349,31 +396,96 @@ def eliminate_by_longest(
|
|
|
349
396
|
eliminated.index = eliminated.index.map(idx_mapper)
|
|
350
397
|
eliminated.index.name = idx_name
|
|
351
398
|
|
|
352
|
-
if not remove_isolated and len(isolated):
|
|
353
|
-
eliminated = pd.concat([eliminated, isolated])
|
|
354
|
-
|
|
355
399
|
eliminated = eliminated.drop(
|
|
356
|
-
["_dissolve_idx", "_length", "_eliminate_idx"
|
|
400
|
+
["_dissolve_idx", "_length", "_eliminate_idx"],
|
|
357
401
|
axis=1,
|
|
358
402
|
errors="ignore",
|
|
359
403
|
)
|
|
360
404
|
|
|
361
405
|
out = GeoDataFrame(eliminated, geometry="geometry", crs=crs).pipe(clean_geoms)
|
|
362
406
|
|
|
407
|
+
if _BUFFER:
|
|
408
|
+
out.geometry = out.buffer(
|
|
409
|
+
-PRECISION,
|
|
410
|
+
resolution=1,
|
|
411
|
+
join_style=2,
|
|
412
|
+
)
|
|
413
|
+
isolated.geometry = isolated.buffer(
|
|
414
|
+
-PRECISION,
|
|
415
|
+
resolution=1,
|
|
416
|
+
join_style=2,
|
|
417
|
+
)
|
|
418
|
+
|
|
363
419
|
if geom_type != "mixed":
|
|
364
420
|
out = to_single_geom_type(out, geom_type)
|
|
365
421
|
|
|
366
422
|
out = out.reset_index(drop=True) if ignore_index else out
|
|
367
423
|
|
|
424
|
+
_try_debug_print("inni eliminate_by_longest")
|
|
425
|
+
explore_locals(center=_DEBUG_CONFIG["center"])
|
|
426
|
+
|
|
427
|
+
if not _recurse and len(isolated):
|
|
428
|
+
if 0:
|
|
429
|
+
isolated.geometry = isolated.buffer(
|
|
430
|
+
-PRECISION,
|
|
431
|
+
resolution=1,
|
|
432
|
+
join_style=2,
|
|
433
|
+
)
|
|
434
|
+
out, isolated = _recursively_eliminate_new_neighbors(
|
|
435
|
+
out,
|
|
436
|
+
isolated,
|
|
437
|
+
func=eliminate_by_longest,
|
|
438
|
+
fix_double=fix_double,
|
|
439
|
+
ignore_index=ignore_index,
|
|
440
|
+
aggfunc=aggfunc,
|
|
441
|
+
grid_size=grid_size,
|
|
442
|
+
n_jobs=n_jobs,
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
_try_debug_print("inni eliminate_by_longest 2")
|
|
446
|
+
explore_locals(center=_DEBUG_CONFIG["center"])
|
|
447
|
+
|
|
448
|
+
# assert (
|
|
449
|
+
# out[["ARTYPE", "ARTRESLAG", "ARSKOGBON", "ARGRUNNF", "kilde"]]
|
|
450
|
+
# .notna()
|
|
451
|
+
# .all()
|
|
452
|
+
# .all()
|
|
453
|
+
# ), out[["ARTYPE", "ARTRESLAG", "ARSKOGBON", "ARGRUNNF", "kilde"]].sort_values(
|
|
454
|
+
# ["ARTYPE", "ARTRESLAG", "ARSKOGBON", "ARGRUNNF", "kilde"]
|
|
455
|
+
# )
|
|
456
|
+
|
|
368
457
|
if not was_multiple_gdfs:
|
|
369
|
-
return out
|
|
458
|
+
return out, isolated
|
|
370
459
|
|
|
371
460
|
gdfs = ()
|
|
372
461
|
for i, cols in enumerate(original_cols):
|
|
373
462
|
df = out.loc[out["_df_idx"] == i, cols]
|
|
374
463
|
gdfs += (df,)
|
|
375
464
|
|
|
376
|
-
return gdfs
|
|
465
|
+
return (*gdfs, isolated)
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
def _recursively_eliminate_new_neighbors(
|
|
469
|
+
df: GeoDataFrame,
|
|
470
|
+
isolated: GeoDataFrame,
|
|
471
|
+
func: Callable,
|
|
472
|
+
**kwargs,
|
|
473
|
+
):
|
|
474
|
+
len_now = len(isolated)
|
|
475
|
+
while len(isolated):
|
|
476
|
+
_try_debug_print(f"recurse len({len(isolated)})")
|
|
477
|
+
df, isolated = func(
|
|
478
|
+
df,
|
|
479
|
+
isolated,
|
|
480
|
+
_recurse=True,
|
|
481
|
+
**kwargs,
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
if len_now == len(isolated):
|
|
485
|
+
break
|
|
486
|
+
len_now = len(isolated)
|
|
487
|
+
|
|
488
|
+
return df, isolated
|
|
377
489
|
|
|
378
490
|
|
|
379
491
|
def eliminate_by_largest(
|
|
@@ -381,15 +493,14 @@ def eliminate_by_largest(
|
|
|
381
493
|
to_eliminate: GeoDataFrame,
|
|
382
494
|
*,
|
|
383
495
|
max_distance: int | float | None = None,
|
|
384
|
-
|
|
385
|
-
fix_double: bool = False,
|
|
496
|
+
fix_double: bool = True,
|
|
386
497
|
ignore_index: bool = False,
|
|
387
498
|
aggfunc: str | dict | list | None = None,
|
|
388
499
|
predicate: str = "intersects",
|
|
389
500
|
grid_size=None,
|
|
390
501
|
n_jobs: int = 1,
|
|
391
502
|
**kwargs,
|
|
392
|
-
) ->
|
|
503
|
+
) -> tuple[GeoDataFrame]:
|
|
393
504
|
"""Dissolves selected polygons with the largest neighbor polygon.
|
|
394
505
|
|
|
395
506
|
Eliminates selected geometries by dissolving them with the neighboring
|
|
@@ -401,9 +512,6 @@ def eliminate_by_largest(
|
|
|
401
512
|
to_eliminate: The geometries to be eliminated by 'gdf'.
|
|
402
513
|
max_distance: Max distance to search for neighbors. Defaults to None, meaning
|
|
403
514
|
0.
|
|
404
|
-
remove_isolated: If False (default), polygons in 'to_eliminate' that share
|
|
405
|
-
no border with any polygon in 'gdf' will be kept. If True, the isolated
|
|
406
|
-
polygons will be removed.
|
|
407
515
|
fix_double: If True, geometries to be eliminated will be erased by overlapping
|
|
408
516
|
geometries to not get double surfaces if the geometries in 'to_eliminate'
|
|
409
517
|
overlaps with multiple geometries in 'gdf'.
|
|
@@ -422,8 +530,11 @@ def eliminate_by_largest(
|
|
|
422
530
|
**kwargs: Keyword arguments passed to the dissolve method.
|
|
423
531
|
|
|
424
532
|
Returns:
|
|
425
|
-
|
|
426
|
-
|
|
533
|
+
A tuple of the GeoDataFrame with the geometries of 'to_eliminate'
|
|
534
|
+
dissolved in and a GeoDataFrame with the potentionally isolated
|
|
535
|
+
polygons that could not be eliminated. If multiple GeoDataFrame
|
|
536
|
+
are passed as 'gdf', the returned tuple will contain each frame
|
|
537
|
+
plus the isolated polygons as the last item.
|
|
427
538
|
|
|
428
539
|
Examples:
|
|
429
540
|
---------
|
|
@@ -440,20 +551,18 @@ def eliminate_by_largest(
|
|
|
440
551
|
Using multiple GeoDataFrame as input, the sliver is eliminated into
|
|
441
552
|
the large polygon.
|
|
442
553
|
|
|
443
|
-
>>> small_poly_eliminated, large_poly_eliminated = sg.eliminate_by_largest(
|
|
554
|
+
>>> small_poly_eliminated, large_poly_eliminated, isolated = sg.eliminate_by_largest(
|
|
444
555
|
... [small_poly, large_poly], sliver
|
|
445
556
|
... )
|
|
446
557
|
|
|
447
558
|
With only one input GeoDataFrame:
|
|
448
559
|
|
|
449
560
|
>>> polys = pd.concat([small_poly, large_poly])
|
|
450
|
-
>>> eliminated = sg.eliminate_by_largest(polys, sliver)
|
|
451
|
-
|
|
561
|
+
>>> eliminated, isolated = sg.eliminate_by_largest(polys, sliver)
|
|
452
562
|
"""
|
|
453
563
|
return _eliminate_by_area(
|
|
454
564
|
gdf,
|
|
455
565
|
to_eliminate=to_eliminate,
|
|
456
|
-
remove_isolated=remove_isolated,
|
|
457
566
|
max_distance=max_distance,
|
|
458
567
|
ignore_index=ignore_index,
|
|
459
568
|
sort_ascending=False,
|
|
@@ -471,19 +580,17 @@ def eliminate_by_smallest(
|
|
|
471
580
|
to_eliminate: GeoDataFrame,
|
|
472
581
|
*,
|
|
473
582
|
max_distance: int | float | None = None,
|
|
474
|
-
remove_isolated: bool = False,
|
|
475
583
|
ignore_index: bool = False,
|
|
476
584
|
aggfunc: str | dict | list | None = None,
|
|
477
585
|
predicate: str = "intersects",
|
|
478
|
-
fix_double: bool =
|
|
586
|
+
fix_double: bool = True,
|
|
479
587
|
grid_size=None,
|
|
480
588
|
n_jobs: int = 1,
|
|
481
589
|
**kwargs,
|
|
482
|
-
) ->
|
|
590
|
+
) -> tuple[GeoDataFrame]:
|
|
483
591
|
return _eliminate_by_area(
|
|
484
592
|
gdf,
|
|
485
593
|
to_eliminate=to_eliminate,
|
|
486
|
-
remove_isolated=remove_isolated,
|
|
487
594
|
max_distance=max_distance,
|
|
488
595
|
ignore_index=ignore_index,
|
|
489
596
|
sort_ascending=True,
|
|
@@ -499,19 +606,21 @@ def eliminate_by_smallest(
|
|
|
499
606
|
def _eliminate_by_area(
|
|
500
607
|
gdf: GeoDataFrame,
|
|
501
608
|
to_eliminate: GeoDataFrame,
|
|
502
|
-
remove_isolated: bool,
|
|
503
609
|
max_distance: int | float | None,
|
|
504
610
|
sort_ascending: bool,
|
|
505
611
|
ignore_index: bool = False,
|
|
506
612
|
aggfunc: str | dict | list | None = None,
|
|
507
613
|
predicate="intersects",
|
|
508
|
-
fix_double: bool =
|
|
614
|
+
fix_double: bool = True,
|
|
509
615
|
grid_size=None,
|
|
510
616
|
n_jobs: int = 1,
|
|
511
617
|
**kwargs,
|
|
512
618
|
) -> GeoDataFrame:
|
|
513
|
-
|
|
514
|
-
|
|
619
|
+
_recurse = kwargs.pop("_recurse", False)
|
|
620
|
+
|
|
621
|
+
if not len(to_eliminate) or not len(gdf):
|
|
622
|
+
return gdf, to_eliminate
|
|
623
|
+
|
|
515
624
|
if isinstance(gdf, (list, tuple)):
|
|
516
625
|
was_multiple_gdfs = True
|
|
517
626
|
original_cols = [df.columns for df in gdf]
|
|
@@ -525,6 +634,8 @@ def _eliminate_by_area(
|
|
|
525
634
|
if not ignore_index:
|
|
526
635
|
idx_mapper = dict(enumerate(gdf.index))
|
|
527
636
|
idx_name = gdf.index.name
|
|
637
|
+
idx_mapper_to_eliminate = dict(enumerate(to_eliminate.index))
|
|
638
|
+
idx_name_to_eliminate = to_eliminate.index.name
|
|
528
639
|
|
|
529
640
|
gdf = make_all_singlepart(gdf).reset_index(drop=True)
|
|
530
641
|
to_eliminate = make_all_singlepart(to_eliminate).reset_index(drop=True)
|
|
@@ -564,37 +675,71 @@ def _eliminate_by_area(
|
|
|
564
675
|
**kwargs,
|
|
565
676
|
)
|
|
566
677
|
|
|
567
|
-
if not ignore_index:
|
|
568
|
-
eliminated.index = eliminated.index.map(idx_mapper)
|
|
569
|
-
eliminated.index.name = idx_name
|
|
570
|
-
|
|
571
|
-
if not remove_isolated:
|
|
572
|
-
isolated = joined.loc[joined["_dissolve_idx"].isna()]
|
|
573
|
-
if len(isolated):
|
|
574
|
-
eliminated = pd.concat([eliminated, isolated])
|
|
575
|
-
|
|
576
678
|
eliminated = eliminated.drop(
|
|
577
679
|
["_dissolve_idx", "_area", "_eliminate_idx", "_dissolve_idx"],
|
|
578
680
|
axis=1,
|
|
579
681
|
errors="ignore",
|
|
580
682
|
)
|
|
581
683
|
|
|
582
|
-
out = GeoDataFrame(
|
|
684
|
+
out = GeoDataFrame(
|
|
685
|
+
eliminated,
|
|
686
|
+
geometry="geometry",
|
|
687
|
+
crs=crs,
|
|
688
|
+
).pipe(clean_geoms)
|
|
689
|
+
|
|
690
|
+
isolated = (
|
|
691
|
+
GeoDataFrame(
|
|
692
|
+
joined.loc[joined["_dissolve_idx"].isna()], geometry="geometry", crs=crs
|
|
693
|
+
)
|
|
694
|
+
.drop(
|
|
695
|
+
["_dissolve_idx", "_area", "_eliminate_idx", "_dissolve_idx"],
|
|
696
|
+
axis=1,
|
|
697
|
+
errors="ignore",
|
|
698
|
+
)
|
|
699
|
+
.pipe(clean_geoms)
|
|
700
|
+
)
|
|
701
|
+
|
|
702
|
+
if not ignore_index:
|
|
703
|
+
out.index = out.index.map(idx_mapper)
|
|
704
|
+
out.index.name = idx_name
|
|
705
|
+
isolated.index = isolated.index.map(idx_mapper_to_eliminate)
|
|
706
|
+
isolated.index.name = idx_name_to_eliminate
|
|
583
707
|
|
|
584
708
|
if geom_type != "mixed":
|
|
585
709
|
out = to_single_geom_type(out, geom_type)
|
|
586
710
|
|
|
587
711
|
out = out.reset_index(drop=True) if ignore_index else out
|
|
588
712
|
|
|
713
|
+
if not _recurse and len(isolated):
|
|
714
|
+
out, isolated = _recursively_eliminate_new_neighbors(
|
|
715
|
+
out,
|
|
716
|
+
isolated,
|
|
717
|
+
func=_eliminate_by_area,
|
|
718
|
+
max_distance=max_distance,
|
|
719
|
+
sort_ascending=sort_ascending,
|
|
720
|
+
fix_double=fix_double,
|
|
721
|
+
predicate=predicate,
|
|
722
|
+
ignore_index=ignore_index,
|
|
723
|
+
aggfunc=aggfunc,
|
|
724
|
+
grid_size=grid_size,
|
|
725
|
+
n_jobs=n_jobs,
|
|
726
|
+
)
|
|
727
|
+
|
|
589
728
|
if not was_multiple_gdfs:
|
|
590
|
-
return out
|
|
729
|
+
return out, isolated
|
|
730
|
+
|
|
731
|
+
for k, v in locals().items():
|
|
732
|
+
try:
|
|
733
|
+
print(k, v.columns)
|
|
734
|
+
except Exception:
|
|
735
|
+
pass
|
|
591
736
|
|
|
592
737
|
gdfs = ()
|
|
593
738
|
for i, cols in enumerate(original_cols):
|
|
594
739
|
df = out.loc[out["_df_idx"] == i, cols]
|
|
595
740
|
gdfs += (df,)
|
|
596
741
|
|
|
597
|
-
return gdfs
|
|
742
|
+
return (*gdfs, isolated)
|
|
598
743
|
|
|
599
744
|
|
|
600
745
|
def _eliminate(
|
|
@@ -603,6 +748,8 @@ def _eliminate(
|
|
|
603
748
|
if not len(to_eliminate):
|
|
604
749
|
return gdf
|
|
605
750
|
|
|
751
|
+
gdf["_range_idx_elim"] = range(len(gdf))
|
|
752
|
+
|
|
606
753
|
in_to_eliminate = gdf["_dissolve_idx"].isin(to_eliminate["_dissolve_idx"])
|
|
607
754
|
to_dissolve = gdf.loc[in_to_eliminate]
|
|
608
755
|
not_to_dissolve = gdf.loc[~in_to_eliminate].set_index("_dissolve_idx")
|
|
@@ -656,10 +803,22 @@ def _eliminate(
|
|
|
656
803
|
# all_geoms: pd.Series = gdf.set_index("_dissolve_idx").geometry
|
|
657
804
|
all_geoms: pd.Series = gdf.geometry
|
|
658
805
|
|
|
806
|
+
# more_than_one = get_num_geometries(all_geoms.values) > 1
|
|
807
|
+
# all_geoms.loc[more_than_one] = all_geoms.loc[more_than_one].apply(
|
|
808
|
+
# _unary_union_for_notna
|
|
809
|
+
# )
|
|
810
|
+
|
|
811
|
+
# more_than_one = get_num_geometries(to_be_eliminated.values) > 1
|
|
812
|
+
# to_be_eliminated.loc[more_than_one, "geometry"] = to_be_eliminated.loc[
|
|
813
|
+
# more_than_one, "geometry"
|
|
814
|
+
# ].apply(_unary_union_for_notna)
|
|
815
|
+
|
|
816
|
+
# create DataFrame of intersection pairs
|
|
659
817
|
tree = STRtree(all_geoms.values)
|
|
660
818
|
left, right = tree.query(
|
|
661
819
|
to_be_eliminated.geometry.values, predicate="intersects"
|
|
662
820
|
)
|
|
821
|
+
|
|
663
822
|
pairs = pd.Series(right, index=left).to_frame("right")
|
|
664
823
|
pairs["_dissolve_idx"] = pairs.index.map(
|
|
665
824
|
dict(enumerate(to_be_eliminated.index))
|
|
@@ -684,15 +843,20 @@ def _eliminate(
|
|
|
684
843
|
|
|
685
844
|
# allign and aggregate by dissolve index to not get duplicates in difference
|
|
686
845
|
intersecting.index = soon_erased.index
|
|
687
|
-
soon_erased = soon_erased.geometry.groupby(level=0).agg(
|
|
688
|
-
lambda x: unary_union(x, grid_size=grid_size)
|
|
689
|
-
)
|
|
690
|
-
intersecting = intersecting.groupby(level=0).agg(
|
|
691
|
-
lambda x: unary_union(x, grid_size=grid_size)
|
|
692
|
-
)
|
|
693
846
|
|
|
694
|
-
|
|
695
|
-
|
|
847
|
+
soon_erased = _grouped_unary_union(soon_erased, level=0, grid_size=grid_size)
|
|
848
|
+
intersecting = _grouped_unary_union(intersecting, level=0, grid_size=grid_size)
|
|
849
|
+
|
|
850
|
+
assert soon_erased.index.equals(soon_erased.index)
|
|
851
|
+
|
|
852
|
+
# soon_erased = soon_erased.geometry.groupby(level=0).agg(
|
|
853
|
+
# lambda x: unary_union(x, grid_size=grid_size)
|
|
854
|
+
# )
|
|
855
|
+
# intersecting = intersecting.groupby(level=0).agg(
|
|
856
|
+
# lambda x: unary_union(x, grid_size=grid_size)
|
|
857
|
+
# )
|
|
858
|
+
|
|
859
|
+
# explore_locals(center=_DEBUG_CONFIG["center"])
|
|
696
860
|
|
|
697
861
|
soon_erased.loc[:] = _try_difference(
|
|
698
862
|
soon_erased.to_numpy(),
|
|
@@ -702,10 +866,40 @@ def _eliminate(
|
|
|
702
866
|
geom_type="polygon",
|
|
703
867
|
)
|
|
704
868
|
|
|
869
|
+
missing = _grouped_unary_union(missing, level=0, grid_size=grid_size)
|
|
870
|
+
|
|
871
|
+
missing = make_all_singlepart(missing).loc[lambda x: x.area > 0]
|
|
872
|
+
|
|
873
|
+
soon_erased = make_all_singlepart(soon_erased).loc[lambda x: x.area > 0]
|
|
874
|
+
|
|
875
|
+
if 0:
|
|
876
|
+
tree = STRtree(soon_erased.values)
|
|
877
|
+
left, right = tree.query(missing.values, predicate="intersects")
|
|
878
|
+
explore_locals(
|
|
879
|
+
missing2=to_gdf(missing.to_numpy()[left], 25833),
|
|
880
|
+
soon_erased2=to_gdf(soon_erased.to_numpy()[right], 25833),
|
|
881
|
+
center=_DEBUG_CONFIG["center"],
|
|
882
|
+
)
|
|
883
|
+
missing = pd.Series(
|
|
884
|
+
difference(
|
|
885
|
+
missing.to_numpy()[left],
|
|
886
|
+
soon_erased.to_numpy()[right],
|
|
887
|
+
grid_size=grid_size,
|
|
888
|
+
),
|
|
889
|
+
index=left,
|
|
890
|
+
).loc[lambda x: (x.notna()) & (~is_empty(x))]
|
|
891
|
+
|
|
892
|
+
soon_eliminated = pd.concat([eliminators, soon_erased, missing])
|
|
893
|
+
more_than_one = get_num_geometries(soon_eliminated.values) > 1
|
|
894
|
+
|
|
895
|
+
soon_eliminated.loc[more_than_one] = soon_eliminated.loc[more_than_one].apply(
|
|
896
|
+
_unary_union_for_notna
|
|
897
|
+
)
|
|
898
|
+
|
|
705
899
|
if n_jobs > 1:
|
|
706
900
|
eliminated["geometry"] = GeoSeries(
|
|
707
901
|
_parallel_unary_union_geoseries(
|
|
708
|
-
|
|
902
|
+
soon_eliminated,
|
|
709
903
|
level=0,
|
|
710
904
|
grid_size=grid_size,
|
|
711
905
|
n_jobs=n_jobs,
|
|
@@ -713,15 +907,10 @@ def _eliminate(
|
|
|
713
907
|
index=eliminated.index,
|
|
714
908
|
)
|
|
715
909
|
else:
|
|
716
|
-
eliminated["geometry"] = (
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
lambda x: make_valid(
|
|
721
|
-
unary_union(x.dropna().values, grid_size=grid_size)
|
|
722
|
-
)
|
|
723
|
-
)
|
|
724
|
-
)
|
|
910
|
+
eliminated["geometry"] = _grouped_unary_union(soon_eliminated, level=0)
|
|
911
|
+
# eliminated["geometry"] = soon_eliminated.groupby(level=0).agg(
|
|
912
|
+
# lambda x: make_valid(unary_union(x))
|
|
913
|
+
# )
|
|
725
914
|
|
|
726
915
|
else:
|
|
727
916
|
if n_jobs > 1:
|
|
@@ -729,9 +918,7 @@ def _eliminate(
|
|
|
729
918
|
many_hits, by="_dissolve_idx", grid_size=grid_size, n_jobs=n_jobs
|
|
730
919
|
)
|
|
731
920
|
else:
|
|
732
|
-
eliminated["geometry"] = many_hits
|
|
733
|
-
lambda x: make_valid(unary_union(x.values, grid_size=grid_size))
|
|
734
|
-
)
|
|
921
|
+
eliminated["geometry"] = _grouped_unary_union(many_hits, by="_dissolve_idx")
|
|
735
922
|
|
|
736
923
|
# setting crs on the GeometryArrays to avoid warning in concat
|
|
737
924
|
not_to_dissolve.geometry.values.crs = crs
|
|
@@ -745,31 +932,170 @@ def _eliminate(
|
|
|
745
932
|
|
|
746
933
|
assert all(df.index.name == "_dissolve_idx" for df in to_concat)
|
|
747
934
|
|
|
748
|
-
|
|
935
|
+
out = pd.concat(to_concat).sort_index()
|
|
936
|
+
|
|
937
|
+
duplicated_geoms = _get_intersecting_geometries(
|
|
938
|
+
GeoDataFrame(
|
|
939
|
+
{
|
|
940
|
+
"geometry": out.geometry.values,
|
|
941
|
+
"_range_idx_elim_dups": out["_range_idx_elim"].values,
|
|
942
|
+
},
|
|
943
|
+
),
|
|
944
|
+
geom_type="polygon",
|
|
945
|
+
keep_geom_type=True,
|
|
946
|
+
n_jobs=n_jobs,
|
|
947
|
+
predicate="intersects",
|
|
948
|
+
).pipe(clean_geoms)
|
|
949
|
+
duplicated_geoms.geometry = duplicated_geoms.buffer(-PRECISION)
|
|
950
|
+
duplicated_geoms = duplicated_geoms.pipe(clean_geoms)
|
|
951
|
+
|
|
952
|
+
if len(duplicated_geoms):
|
|
953
|
+
hits_in_original_df = duplicated_geoms.sjoin(
|
|
954
|
+
GeoDataFrame(
|
|
955
|
+
{
|
|
956
|
+
"geometry": gdf.geometry.values,
|
|
957
|
+
"_range_idx_elim": gdf["_range_idx_elim"].values,
|
|
958
|
+
},
|
|
959
|
+
),
|
|
960
|
+
how="inner",
|
|
961
|
+
)
|
|
962
|
+
|
|
963
|
+
should_be_erased = hits_in_original_df.loc[
|
|
964
|
+
lambda x: x["_range_idx_elim"] != x["_range_idx_elim_dups"]
|
|
965
|
+
]
|
|
966
|
+
|
|
967
|
+
should_be_erased_idx = list(
|
|
968
|
+
sorted(should_be_erased["_range_idx_elim_dups"].unique())
|
|
969
|
+
)
|
|
970
|
+
should_erase = (
|
|
971
|
+
should_be_erased.groupby("_range_idx_elim_dups")["geometry"]
|
|
972
|
+
.agg(lambda x: make_valid(union_all(x)))
|
|
973
|
+
.sort_index()
|
|
974
|
+
)
|
|
975
|
+
|
|
976
|
+
# aligining out with "should_erase" before rowwise difference
|
|
977
|
+
out = out.sort_values("_range_idx_elim")
|
|
978
|
+
assert out["_range_idx_elim"].is_unique
|
|
979
|
+
to_be_erased_idx = out["_range_idx_elim"].isin(should_be_erased_idx)
|
|
980
|
+
|
|
981
|
+
out.loc[to_be_erased_idx, "geometry"] = make_valid(
|
|
982
|
+
difference(
|
|
983
|
+
out.loc[
|
|
984
|
+
to_be_erased_idx,
|
|
985
|
+
"geometry",
|
|
986
|
+
].values,
|
|
987
|
+
should_erase.values,
|
|
988
|
+
)
|
|
989
|
+
)
|
|
990
|
+
|
|
991
|
+
from ..maps.maps import explore
|
|
992
|
+
|
|
993
|
+
# display(hits_in_original_df)
|
|
994
|
+
# display(should_be_erased.assign(area=lambda x: x.area))
|
|
995
|
+
|
|
996
|
+
explore(
|
|
997
|
+
gdf=to_gdf(gdf, 25833),
|
|
998
|
+
out=to_gdf(out, 25833),
|
|
999
|
+
should_be_erased=to_gdf(should_be_erased, 25833),
|
|
1000
|
+
duplicated_geoms=duplicated_geoms.set_crs(25833),
|
|
1001
|
+
eli=GeoDataFrame(
|
|
1002
|
+
{
|
|
1003
|
+
"geometry": out.geometry.values,
|
|
1004
|
+
"_range_idx_elim": out["_range_idx_elim"].values,
|
|
1005
|
+
},
|
|
1006
|
+
crs=25833,
|
|
1007
|
+
),
|
|
1008
|
+
center=_DEBUG_CONFIG["center"],
|
|
1009
|
+
)
|
|
1010
|
+
|
|
1011
|
+
_try_debug_print("inni _eliminate")
|
|
1012
|
+
_try_debug_print(duplicated_geoms)
|
|
1013
|
+
explore_locals(center=_DEBUG_CONFIG["center"])
|
|
1014
|
+
|
|
1015
|
+
return out.drop(columns=["_to_eliminate", "_range_idx_elim"])
|
|
1016
|
+
|
|
1017
|
+
|
|
1018
|
+
def clean_dissexp(df: GeoDataFrame, dissolve_func: Callable, **kwargs) -> GeoDataFrame:
|
|
1019
|
+
"""Experimental."""
|
|
1020
|
+
original_points = GeoDataFrame(
|
|
1021
|
+
{"geometry": get_parts(extract_unique_points(df.geometry.values))}
|
|
1022
|
+
)[lambda x: ~x.geometry.duplicated()]
|
|
1023
|
+
|
|
1024
|
+
dissolved = df.copy()
|
|
1025
|
+
|
|
1026
|
+
try:
|
|
1027
|
+
dissolved.geometry = dissolved.buffer(PRECISION, resolution=1, join_style=2)
|
|
1028
|
+
except AttributeError as e:
|
|
1029
|
+
if isinstance(dissolved, GeoSeries):
|
|
1030
|
+
dissolved.loc[:] = dissolved.buffer(PRECISION, resolution=1, join_style=2)
|
|
1031
|
+
else:
|
|
1032
|
+
raise e
|
|
1033
|
+
|
|
1034
|
+
dissolved = dissolve_func(dissolved, **kwargs)
|
|
1035
|
+
|
|
1036
|
+
try:
|
|
1037
|
+
dissolved.geometry = dissolved.buffer(-PRECISION, resolution=1, join_style=2)
|
|
1038
|
+
except AttributeError as e:
|
|
1039
|
+
if isinstance(dissolved, GeoSeries):
|
|
1040
|
+
dissolved.loc[:] = dissolved.buffer(-PRECISION, resolution=1, join_style=2)
|
|
1041
|
+
else:
|
|
1042
|
+
raise e
|
|
1043
|
+
|
|
1044
|
+
dissolved = dissolved.loc[lambda x: ~x.geometry.is_empty]
|
|
1045
|
+
dissolved = dissolved.explode(ignore_index=True)
|
|
1046
|
+
|
|
1047
|
+
original_points = sfilter_inverse(original_points, dissolved.buffer(-PRECISION))
|
|
1048
|
+
|
|
1049
|
+
snapped = (
|
|
1050
|
+
PolygonsAsRings(
|
|
1051
|
+
dissolved.geometry,
|
|
1052
|
+
)
|
|
1053
|
+
.apply_numpy_func(
|
|
1054
|
+
_snap_points_back,
|
|
1055
|
+
kwargs={"snap_to": original_points, "tolerance": PRECISION},
|
|
1056
|
+
)
|
|
1057
|
+
.to_numpy()
|
|
1058
|
+
)
|
|
1059
|
+
|
|
1060
|
+
try:
|
|
1061
|
+
dissolved.geometry = snapped
|
|
1062
|
+
except AttributeError as e:
|
|
1063
|
+
if isinstance(dissolved, GeoSeries):
|
|
1064
|
+
dissolved.loc[:] = snapped
|
|
1065
|
+
else:
|
|
1066
|
+
raise e
|
|
1067
|
+
|
|
1068
|
+
return dissolved
|
|
1069
|
+
|
|
1070
|
+
|
|
1071
|
+
def _snap_points_back(rings, snap_to, tolerance):
|
|
1072
|
+
points = GeoDataFrame({"geometry": extract_unique_points(rings)})
|
|
1073
|
+
points = points.explode(index_parts=True)
|
|
1074
|
+
|
|
1075
|
+
snap_to["geom_right"] = snap_to.geometry
|
|
1076
|
+
nearest = points.sjoin_nearest(snap_to, max_distance=tolerance)
|
|
1077
|
+
points.loc[nearest.index, points.geometry.name] = nearest["geom_right"]
|
|
1078
|
+
|
|
1079
|
+
new_rings = points.groupby(level=0)[points.geometry.name].agg(LinearRing)
|
|
1080
|
+
return new_rings
|
|
749
1081
|
|
|
750
1082
|
|
|
751
1083
|
def close_thin_holes(gdf: GeoDataFrame, tolerance: int | float) -> GeoDataFrame:
|
|
752
1084
|
gdf = make_all_singlepart(gdf)
|
|
753
1085
|
holes = get_holes(gdf)
|
|
754
|
-
inside_holes = sfilter(gdf, holes, predicate="within").
|
|
1086
|
+
inside_holes = union_all(sfilter(gdf, holes, predicate="within").geometry.values)
|
|
755
1087
|
|
|
756
1088
|
def to_none_if_thin(geoms):
|
|
1089
|
+
if not len(geoms):
|
|
1090
|
+
return geoms
|
|
757
1091
|
try:
|
|
758
|
-
|
|
759
|
-
difference(polygons(geoms), inside_holes), -(tolerance / 2)
|
|
760
|
-
)
|
|
761
|
-
return np.where(is_empty(buffered_in), None, geoms)
|
|
1092
|
+
polys = polygons(geoms)
|
|
762
1093
|
except GEOSException:
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
except ValueError as e:
|
|
770
|
-
if not len(geoms):
|
|
771
|
-
return geoms
|
|
772
|
-
raise e
|
|
1094
|
+
polys = make_valid(polygons(make_valid(geoms)))
|
|
1095
|
+
if inside_holes is not None:
|
|
1096
|
+
polys = difference(polys, inside_holes)
|
|
1097
|
+
buffered_in = buffer(polys, -(tolerance / 2))
|
|
1098
|
+
return np.where(is_empty(buffered_in), None, geoms)
|
|
773
1099
|
|
|
774
1100
|
if not (gdf.geom_type == "Polygon").all():
|
|
775
1101
|
raise ValueError(gdf.geom_type.value_counts())
|
|
@@ -848,7 +1174,7 @@ def close_all_holes(
|
|
|
848
1174
|
else:
|
|
849
1175
|
return holes_closed
|
|
850
1176
|
|
|
851
|
-
all_geoms = make_valid(gdf.
|
|
1177
|
+
all_geoms = make_valid(union_all(gdf.geometry.values))
|
|
852
1178
|
if isinstance(gdf, GeoDataFrame):
|
|
853
1179
|
gdf.geometry = gdf.geometry.map(
|
|
854
1180
|
lambda x: _close_all_holes_no_islands(x, all_geoms)
|
|
@@ -934,7 +1260,7 @@ def close_small_holes(
|
|
|
934
1260
|
gdf = make_all_singlepart(gdf)
|
|
935
1261
|
|
|
936
1262
|
if not ignore_islands:
|
|
937
|
-
all_geoms = make_valid(gdf.
|
|
1263
|
+
all_geoms = make_valid(union_all(gdf.geometry.values))
|
|
938
1264
|
|
|
939
1265
|
if isinstance(gdf, GeoDataFrame):
|
|
940
1266
|
gdf.geometry = gdf.geometry.map(
|
|
@@ -990,14 +1316,14 @@ def _close_small_holes_no_islands(poly, max_area, all_geoms):
|
|
|
990
1316
|
for n in range(n_interior_rings):
|
|
991
1317
|
hole = polygons(get_interior_ring(part, n))
|
|
992
1318
|
try:
|
|
993
|
-
no_islands =
|
|
1319
|
+
no_islands = union_all(hole.difference(all_geoms))
|
|
994
1320
|
except GEOSException:
|
|
995
|
-
no_islands = make_valid(
|
|
1321
|
+
no_islands = make_valid(union_all(hole.difference(all_geoms)))
|
|
996
1322
|
|
|
997
1323
|
if area(no_islands) < max_area:
|
|
998
1324
|
holes_closed.append(no_islands)
|
|
999
1325
|
|
|
1000
|
-
return make_valid(
|
|
1326
|
+
return make_valid(union_all(holes_closed))
|
|
1001
1327
|
|
|
1002
1328
|
|
|
1003
1329
|
def _close_all_holes_no_islands(poly, all_geoms):
|
|
@@ -1012,13 +1338,13 @@ def _close_all_holes_no_islands(poly, all_geoms):
|
|
|
1012
1338
|
for n in range(n_interior_rings):
|
|
1013
1339
|
hole = polygons(get_interior_ring(part, n))
|
|
1014
1340
|
try:
|
|
1015
|
-
no_islands =
|
|
1341
|
+
no_islands = union_all(hole.difference(all_geoms))
|
|
1016
1342
|
except GEOSException:
|
|
1017
|
-
no_islands = make_valid(
|
|
1343
|
+
no_islands = make_valid(union_all(hole.difference(all_geoms)))
|
|
1018
1344
|
|
|
1019
1345
|
holes_closed.append(no_islands)
|
|
1020
1346
|
|
|
1021
|
-
return make_valid(
|
|
1347
|
+
return make_valid(union_all(holes_closed))
|
|
1022
1348
|
|
|
1023
1349
|
|
|
1024
1350
|
def get_gaps(
|
|
@@ -1062,7 +1388,7 @@ def get_gaps(
|
|
|
1062
1388
|
return without_outer_ring.reset_index(drop=True)
|
|
1063
1389
|
|
|
1064
1390
|
|
|
1065
|
-
def get_holes(gdf: GeoDataFrame, as_polygons=True) -> GeoDataFrame:
|
|
1391
|
+
def get_holes(gdf: GeoDataFrame, as_polygons: bool = True) -> GeoDataFrame:
|
|
1066
1392
|
"""Get the holes inside polygons.
|
|
1067
1393
|
|
|
1068
1394
|
Args:
|
|
@@ -1096,3 +1422,82 @@ def get_holes(gdf: GeoDataFrame, as_polygons=True) -> GeoDataFrame:
|
|
|
1096
1422
|
if rings
|
|
1097
1423
|
else GeoDataFrame({"geometry": []}, crs=gdf.crs)
|
|
1098
1424
|
)
|
|
1425
|
+
|
|
1426
|
+
|
|
1427
|
+
def split_polygons_by_lines(polygons: GeoSeries, lines: GeoSeries) -> GeoSeries:
|
|
1428
|
+
idx_mapper = dict(enumerate(polygons.index))
|
|
1429
|
+
idx_name = polygons.index.name
|
|
1430
|
+
polygons = polygons.copy()
|
|
1431
|
+
polygons.index = range(len(polygons))
|
|
1432
|
+
|
|
1433
|
+
# use pandas to explode faster (from list instead of GeoSeries.explode)
|
|
1434
|
+
splitted = pd.Series(polygons.geometry.to_numpy())
|
|
1435
|
+
lines = to_geoseries(lines)
|
|
1436
|
+
lines.index = range(len(lines))
|
|
1437
|
+
|
|
1438
|
+
# find intersection pairs to split relevant polygon for each line
|
|
1439
|
+
tree = STRtree(splitted.values)
|
|
1440
|
+
left, right = tree.query(lines.values, predicate="intersects")
|
|
1441
|
+
pairs = pd.Series(right, index=left)
|
|
1442
|
+
|
|
1443
|
+
lines = lines.loc[lambda x: x.index.isin(pairs.index)]
|
|
1444
|
+
|
|
1445
|
+
for i, line in lines.items():
|
|
1446
|
+
intersecting = pairs.loc[[i]].values
|
|
1447
|
+
try:
|
|
1448
|
+
splitted.loc[intersecting] = splitted.loc[intersecting].apply(
|
|
1449
|
+
lambda poly: SplitOp._split_polygon_with_line(poly, line) or poly
|
|
1450
|
+
)
|
|
1451
|
+
except TypeError:
|
|
1452
|
+
# if we got multipolygon
|
|
1453
|
+
splitted = splitted.apply(get_parts).explode()
|
|
1454
|
+
splitted.loc[intersecting] = splitted.loc[intersecting].apply(
|
|
1455
|
+
lambda poly: SplitOp._split_polygon_with_line(poly, line) or poly
|
|
1456
|
+
)
|
|
1457
|
+
splitted = splitted.explode()
|
|
1458
|
+
|
|
1459
|
+
if isinstance(polygons, GeoDataFrame):
|
|
1460
|
+
polygons = polygons.loc[splitted.index]
|
|
1461
|
+
polygons.geometry = splitted
|
|
1462
|
+
polygons.index = polygons.index.map(idx_mapper)
|
|
1463
|
+
polygons.index.name = idx_name
|
|
1464
|
+
return polygons
|
|
1465
|
+
else:
|
|
1466
|
+
splitted.index = splitted.index.map(idx_mapper)
|
|
1467
|
+
splitted.index.name = idx_name
|
|
1468
|
+
return splitted
|
|
1469
|
+
|
|
1470
|
+
|
|
1471
|
+
def split_by_neighbors(
|
|
1472
|
+
df: GeoDataFrame,
|
|
1473
|
+
split_by: GeoDataFrame,
|
|
1474
|
+
tolerance: int | float,
|
|
1475
|
+
grid_size: float | int | None = None,
|
|
1476
|
+
) -> GeoDataFrame:
|
|
1477
|
+
if not len(df):
|
|
1478
|
+
return df
|
|
1479
|
+
|
|
1480
|
+
df = make_all_singlepart(df)
|
|
1481
|
+
|
|
1482
|
+
split_by = split_by.copy()
|
|
1483
|
+
|
|
1484
|
+
intersecting_lines = (
|
|
1485
|
+
clean_overlay(
|
|
1486
|
+
to_lines(split_by),
|
|
1487
|
+
df.buffer(tolerance).to_frame("geometry"),
|
|
1488
|
+
how="intersection",
|
|
1489
|
+
grid_size=grid_size,
|
|
1490
|
+
)
|
|
1491
|
+
.pipe(get_line_segments)
|
|
1492
|
+
.reset_index(drop=True)
|
|
1493
|
+
)
|
|
1494
|
+
|
|
1495
|
+
endpoints = intersecting_lines.boundary.explode(index_parts=False)
|
|
1496
|
+
|
|
1497
|
+
lines = extend_lines(
|
|
1498
|
+
endpoints.loc[lambda x: ~x.index.duplicated(keep="first")].values,
|
|
1499
|
+
endpoints.loc[lambda x: ~x.index.duplicated(keep="last")].values,
|
|
1500
|
+
distance=tolerance * 3,
|
|
1501
|
+
)
|
|
1502
|
+
|
|
1503
|
+
return split_polygons_by_lines(df, lines)
|