ssb-sgis 1.1.17__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sgis/__init__.py +5 -0
- sgis/conf.py +18 -0
- sgis/geopandas_tools/buffer_dissolve_explode.py +25 -47
- sgis/geopandas_tools/conversion.py +18 -25
- sgis/geopandas_tools/duplicates.py +45 -60
- sgis/geopandas_tools/general.py +69 -114
- sgis/geopandas_tools/neighbors.py +25 -4
- sgis/geopandas_tools/overlay.py +178 -256
- sgis/geopandas_tools/polygon_operations.py +68 -88
- sgis/geopandas_tools/runners.py +326 -0
- sgis/geopandas_tools/sfilter.py +42 -24
- sgis/geopandas_tools/utils.py +37 -0
- sgis/helpers.py +1 -1
- sgis/io/dapla_functions.py +96 -107
- sgis/maps/map.py +3 -1
- sgis/parallel/parallel.py +32 -24
- sgis/raster/image_collection.py +184 -162
- sgis/raster/indices.py +0 -1
- {ssb_sgis-1.1.17.dist-info → ssb_sgis-1.2.1.dist-info}/METADATA +1 -1
- {ssb_sgis-1.1.17.dist-info → ssb_sgis-1.2.1.dist-info}/RECORD +22 -20
- {ssb_sgis-1.1.17.dist-info → ssb_sgis-1.2.1.dist-info}/LICENSE +0 -0
- {ssb_sgis-1.1.17.dist-info → ssb_sgis-1.2.1.dist-info}/WHEEL +0 -0
|
@@ -26,17 +26,15 @@ from shapely.errors import GEOSException
|
|
|
26
26
|
from shapely.geometry import LinearRing
|
|
27
27
|
from shapely.ops import SplitOp
|
|
28
28
|
|
|
29
|
+
from ..conf import _get_instance
|
|
30
|
+
from ..conf import config
|
|
29
31
|
from ..debug_config import _DEBUG_CONFIG
|
|
30
32
|
from ..debug_config import _try_debug_print
|
|
31
33
|
from ..maps.maps import explore_locals
|
|
32
34
|
from .conversion import to_gdf
|
|
33
35
|
from .conversion import to_geoseries
|
|
34
36
|
from .duplicates import _get_intersecting_geometries
|
|
35
|
-
from .general import _grouped_unary_union
|
|
36
|
-
from .general import _parallel_unary_union
|
|
37
|
-
from .general import _parallel_unary_union_geoseries
|
|
38
37
|
from .general import _push_geom_col
|
|
39
|
-
from .general import _unary_union_for_notna
|
|
40
38
|
from .general import clean_geoms
|
|
41
39
|
from .general import extend_lines
|
|
42
40
|
from .general import get_grouped_centroids
|
|
@@ -46,11 +44,13 @@ from .geometry_types import get_geom_type
|
|
|
46
44
|
from .geometry_types import make_all_singlepart
|
|
47
45
|
from .geometry_types import to_single_geom_type
|
|
48
46
|
from .neighbors import get_neighbor_indices
|
|
49
|
-
from .overlay import _try_difference
|
|
50
47
|
from .overlay import clean_overlay
|
|
51
48
|
from .polygons_as_rings import PolygonsAsRings
|
|
49
|
+
from .runners import OverlayRunner
|
|
50
|
+
from .runners import UnionRunner
|
|
52
51
|
from .sfilter import sfilter
|
|
53
52
|
from .sfilter import sfilter_inverse
|
|
53
|
+
from .utils import _unary_union_for_notna
|
|
54
54
|
|
|
55
55
|
PRECISION = 1e-3
|
|
56
56
|
_BUFFER = False
|
|
@@ -232,6 +232,8 @@ def eliminate_by_longest(
|
|
|
232
232
|
aggfunc: str | dict | list | None = None,
|
|
233
233
|
grid_size=None,
|
|
234
234
|
n_jobs: int = 1,
|
|
235
|
+
union_runner: UnionRunner | None = None,
|
|
236
|
+
overlay_runner: OverlayRunner | None = None,
|
|
235
237
|
**kwargs,
|
|
236
238
|
) -> tuple[GeoDataFrame]:
|
|
237
239
|
"""Dissolves selected polygons with the longest bordering neighbor polygon.
|
|
@@ -259,6 +261,10 @@ def eliminate_by_longest(
|
|
|
259
261
|
(if aggfunc="first").
|
|
260
262
|
grid_size: Rounding of the coordinates. Defaults to None.
|
|
261
263
|
n_jobs: Number of threads to use. Defaults to 1.
|
|
264
|
+
union_runner: Optionally debug/manipulate the spatial union operations.
|
|
265
|
+
See the 'runners' module for example implementations.
|
|
266
|
+
overlay_runner: Optionally debug/manipulate the spatial overlay operations.
|
|
267
|
+
See the 'runners' module for example implementations.
|
|
262
268
|
**kwargs: Keyword arguments passed to the dissolve method.
|
|
263
269
|
|
|
264
270
|
Returns:
|
|
@@ -350,6 +356,7 @@ def eliminate_by_longest(
|
|
|
350
356
|
keep_geom_type=False,
|
|
351
357
|
grid_size=grid_size,
|
|
352
358
|
n_jobs=n_jobs,
|
|
359
|
+
overlay_runner=overlay_runner,
|
|
353
360
|
).loc[lambda x: x["_eliminate_idx"].notna()]
|
|
354
361
|
|
|
355
362
|
borders["_length"] = borders.length
|
|
@@ -390,6 +397,8 @@ def eliminate_by_longest(
|
|
|
390
397
|
fix_double,
|
|
391
398
|
grid_size=grid_size,
|
|
392
399
|
n_jobs=n_jobs,
|
|
400
|
+
union_runner=union_runner,
|
|
401
|
+
overlay_runner=overlay_runner,
|
|
393
402
|
**kwargs,
|
|
394
403
|
)
|
|
395
404
|
|
|
@@ -434,6 +443,8 @@ def eliminate_by_longest(
|
|
|
434
443
|
ignore_index=ignore_index,
|
|
435
444
|
aggfunc=aggfunc,
|
|
436
445
|
grid_size=grid_size,
|
|
446
|
+
union_runner=union_runner,
|
|
447
|
+
overlay_runner=overlay_runner,
|
|
437
448
|
n_jobs=n_jobs,
|
|
438
449
|
)
|
|
439
450
|
|
|
@@ -494,6 +505,8 @@ def eliminate_by_largest(
|
|
|
494
505
|
predicate: str = "intersects",
|
|
495
506
|
grid_size=None,
|
|
496
507
|
n_jobs: int = 1,
|
|
508
|
+
union_runner: UnionRunner | None = None,
|
|
509
|
+
overlay_runner: OverlayRunner | None = None,
|
|
497
510
|
**kwargs,
|
|
498
511
|
) -> tuple[GeoDataFrame]:
|
|
499
512
|
"""Dissolves selected polygons with the largest neighbor polygon.
|
|
@@ -522,6 +535,10 @@ def eliminate_by_largest(
|
|
|
522
535
|
predicate: Binary predicate passed to sjoin. Defaults to "intersects".
|
|
523
536
|
grid_size: Rounding of the coordinates. Defaults to None.
|
|
524
537
|
n_jobs: Number of threads to use. Defaults to 1.
|
|
538
|
+
union_runner: Optionally debug/manipulate the spatial union operations.
|
|
539
|
+
See the 'runners' module for example implementations.
|
|
540
|
+
overlay_runner: Optionally debug/manipulate the spatial overlay operations.
|
|
541
|
+
See the 'runners' module for example implementations.
|
|
525
542
|
**kwargs: Keyword arguments passed to the dissolve method.
|
|
526
543
|
|
|
527
544
|
Returns:
|
|
@@ -566,6 +583,8 @@ def eliminate_by_largest(
|
|
|
566
583
|
fix_double=fix_double,
|
|
567
584
|
grid_size=grid_size,
|
|
568
585
|
n_jobs=n_jobs,
|
|
586
|
+
union_runner=union_runner,
|
|
587
|
+
overlay_runner=overlay_runner,
|
|
569
588
|
**kwargs,
|
|
570
589
|
)
|
|
571
590
|
|
|
@@ -581,6 +600,8 @@ def eliminate_by_smallest(
|
|
|
581
600
|
fix_double: bool = True,
|
|
582
601
|
grid_size=None,
|
|
583
602
|
n_jobs: int = 1,
|
|
603
|
+
union_runner: UnionRunner | None = None,
|
|
604
|
+
overlay_runner: OverlayRunner | None = None,
|
|
584
605
|
**kwargs,
|
|
585
606
|
) -> tuple[GeoDataFrame]:
|
|
586
607
|
return _eliminate_by_area(
|
|
@@ -594,6 +615,8 @@ def eliminate_by_smallest(
|
|
|
594
615
|
fix_double=fix_double,
|
|
595
616
|
grid_size=grid_size,
|
|
596
617
|
n_jobs=n_jobs,
|
|
618
|
+
union_runner=union_runner,
|
|
619
|
+
overlay_runner=overlay_runner,
|
|
597
620
|
**kwargs,
|
|
598
621
|
)
|
|
599
622
|
|
|
@@ -603,12 +626,14 @@ def _eliminate_by_area(
|
|
|
603
626
|
to_eliminate: GeoDataFrame,
|
|
604
627
|
max_distance: int | float | None,
|
|
605
628
|
sort_ascending: bool,
|
|
606
|
-
ignore_index: bool
|
|
607
|
-
aggfunc: str | dict | list | None
|
|
608
|
-
predicate
|
|
609
|
-
fix_double: bool
|
|
610
|
-
grid_size
|
|
611
|
-
n_jobs: int
|
|
629
|
+
ignore_index: bool,
|
|
630
|
+
aggfunc: str | dict | list | None,
|
|
631
|
+
predicate: str,
|
|
632
|
+
fix_double: bool,
|
|
633
|
+
grid_size,
|
|
634
|
+
n_jobs: int,
|
|
635
|
+
union_runner: UnionRunner,
|
|
636
|
+
overlay_runner: OverlayRunner,
|
|
612
637
|
**kwargs,
|
|
613
638
|
) -> GeoDataFrame:
|
|
614
639
|
_recurse = kwargs.pop("_recurse", False)
|
|
@@ -667,6 +692,8 @@ def _eliminate_by_area(
|
|
|
667
692
|
fix_double=fix_double,
|
|
668
693
|
grid_size=grid_size,
|
|
669
694
|
n_jobs=n_jobs,
|
|
695
|
+
union_runner=union_runner,
|
|
696
|
+
overlay_runner=overlay_runner,
|
|
670
697
|
**kwargs,
|
|
671
698
|
)
|
|
672
699
|
|
|
@@ -717,18 +744,14 @@ def _eliminate_by_area(
|
|
|
717
744
|
ignore_index=ignore_index,
|
|
718
745
|
aggfunc=aggfunc,
|
|
719
746
|
grid_size=grid_size,
|
|
747
|
+
union_runner=union_runner,
|
|
748
|
+
overlay_runner=overlay_runner,
|
|
720
749
|
n_jobs=n_jobs,
|
|
721
750
|
)
|
|
722
751
|
|
|
723
752
|
if not was_multiple_gdfs:
|
|
724
753
|
return out, isolated
|
|
725
754
|
|
|
726
|
-
for k, v in locals().items():
|
|
727
|
-
try:
|
|
728
|
-
print(k, v.columns)
|
|
729
|
-
except Exception:
|
|
730
|
-
pass
|
|
731
|
-
|
|
732
755
|
gdfs = ()
|
|
733
756
|
for i, cols in enumerate(original_cols):
|
|
734
757
|
df = out.loc[out["_df_idx"] == i, cols]
|
|
@@ -738,11 +761,26 @@ def _eliminate_by_area(
|
|
|
738
761
|
|
|
739
762
|
|
|
740
763
|
def _eliminate(
|
|
741
|
-
gdf,
|
|
764
|
+
gdf,
|
|
765
|
+
to_eliminate,
|
|
766
|
+
aggfunc,
|
|
767
|
+
crs,
|
|
768
|
+
fix_double,
|
|
769
|
+
grid_size,
|
|
770
|
+
n_jobs,
|
|
771
|
+
overlay_runner,
|
|
772
|
+
union_runner,
|
|
773
|
+
**kwargs,
|
|
742
774
|
):
|
|
775
|
+
|
|
743
776
|
if not len(to_eliminate):
|
|
744
777
|
return gdf
|
|
745
778
|
|
|
779
|
+
if union_runner is None:
|
|
780
|
+
union_runner = _get_instance(config, "union_runner", n_jobs=n_jobs)
|
|
781
|
+
if overlay_runner is None:
|
|
782
|
+
overlay_runner = _get_instance(config, "overlay_runner", n_jobs=n_jobs)
|
|
783
|
+
|
|
746
784
|
gdf["_range_idx_elim"] = range(len(gdf))
|
|
747
785
|
|
|
748
786
|
in_to_eliminate = gdf["_dissolve_idx"].isin(to_eliminate["_dissolve_idx"])
|
|
@@ -798,16 +836,6 @@ def _eliminate(
|
|
|
798
836
|
# all_geoms: pd.Series = gdf.set_index("_dissolve_idx").geometry
|
|
799
837
|
all_geoms: pd.Series = gdf.geometry
|
|
800
838
|
|
|
801
|
-
# more_than_one = get_num_geometries(all_geoms.values) > 1
|
|
802
|
-
# all_geoms.loc[more_than_one] = all_geoms.loc[more_than_one].apply(
|
|
803
|
-
# _unary_union_for_notna
|
|
804
|
-
# )
|
|
805
|
-
|
|
806
|
-
# more_than_one = get_num_geometries(to_be_eliminated.values) > 1
|
|
807
|
-
# to_be_eliminated.loc[more_than_one, "geometry"] = to_be_eliminated.loc[
|
|
808
|
-
# more_than_one, "geometry"
|
|
809
|
-
# ].apply(_unary_union_for_notna)
|
|
810
|
-
|
|
811
839
|
# create DataFrame of intersection pairs
|
|
812
840
|
tree = STRtree(all_geoms.values)
|
|
813
841
|
left, right = tree.query(
|
|
@@ -819,8 +847,6 @@ def _eliminate(
|
|
|
819
847
|
dict(enumerate(to_be_eliminated.index))
|
|
820
848
|
)
|
|
821
849
|
|
|
822
|
-
# pairs = pairs.loc[lambda x: x["right"] != x["_dissolve_idx"]]
|
|
823
|
-
|
|
824
850
|
soon_erased = to_be_eliminated.iloc[pairs.index]
|
|
825
851
|
intersecting = all_geoms.iloc[pairs["right"]]
|
|
826
852
|
|
|
@@ -829,61 +855,31 @@ def _eliminate(
|
|
|
829
855
|
intersecting = intersecting[shoud_not_erase]
|
|
830
856
|
|
|
831
857
|
missing = to_be_eliminated.loc[
|
|
832
|
-
|
|
833
|
-
# |
|
|
834
|
-
(~to_be_eliminated["_row_idx"].isin(soon_erased["_row_idx"])),
|
|
835
|
-
# | (~to_be_eliminated["_row_idx"].isin(soon_erased.index)),
|
|
836
|
-
"geometry",
|
|
858
|
+
(~to_be_eliminated["_row_idx"].isin(soon_erased["_row_idx"])), "geometry"
|
|
837
859
|
]
|
|
838
860
|
|
|
839
861
|
# allign and aggregate by dissolve index to not get duplicates in difference
|
|
840
862
|
intersecting.index = soon_erased.index
|
|
841
863
|
|
|
842
|
-
soon_erased =
|
|
843
|
-
intersecting =
|
|
864
|
+
soon_erased = union_runner.run(soon_erased, level=0, grid_size=grid_size)
|
|
865
|
+
intersecting = union_runner.run(intersecting, level=0, grid_size=grid_size)
|
|
844
866
|
|
|
845
867
|
assert soon_erased.index.equals(soon_erased.index)
|
|
846
868
|
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
# )
|
|
850
|
-
# intersecting = intersecting.groupby(level=0).agg(
|
|
851
|
-
# lambda x: unary_union(x, grid_size=grid_size)
|
|
852
|
-
# )
|
|
853
|
-
|
|
854
|
-
# explore_locals(center=_DEBUG_CONFIG["center"])
|
|
855
|
-
|
|
856
|
-
soon_erased.loc[:] = _try_difference(
|
|
869
|
+
soon_erased.loc[:] = overlay_runner.run(
|
|
870
|
+
difference,
|
|
857
871
|
soon_erased.to_numpy(),
|
|
858
872
|
intersecting.to_numpy(),
|
|
859
873
|
grid_size=grid_size,
|
|
860
|
-
n_jobs=n_jobs,
|
|
861
874
|
geom_type="polygon",
|
|
862
875
|
)
|
|
863
876
|
|
|
864
|
-
missing =
|
|
877
|
+
missing = union_runner.run(missing, level=0, grid_size=grid_size)
|
|
865
878
|
|
|
866
879
|
missing = make_all_singlepart(missing).loc[lambda x: x.area > 0]
|
|
867
880
|
|
|
868
881
|
soon_erased = make_all_singlepart(soon_erased).loc[lambda x: x.area > 0]
|
|
869
882
|
|
|
870
|
-
if 0:
|
|
871
|
-
tree = STRtree(soon_erased.values)
|
|
872
|
-
left, right = tree.query(missing.values, predicate="intersects")
|
|
873
|
-
explore_locals(
|
|
874
|
-
missing2=to_gdf(missing.to_numpy()[left], 25833),
|
|
875
|
-
soon_erased2=to_gdf(soon_erased.to_numpy()[right], 25833),
|
|
876
|
-
center=_DEBUG_CONFIG["center"],
|
|
877
|
-
)
|
|
878
|
-
missing = pd.Series(
|
|
879
|
-
difference(
|
|
880
|
-
missing.to_numpy()[left],
|
|
881
|
-
soon_erased.to_numpy()[right],
|
|
882
|
-
grid_size=grid_size,
|
|
883
|
-
),
|
|
884
|
-
index=left,
|
|
885
|
-
).loc[lambda x: (x.notna()) & (~is_empty(x))]
|
|
886
|
-
|
|
887
883
|
soon_eliminated = pd.concat([eliminators, soon_erased, missing])
|
|
888
884
|
more_than_one = get_num_geometries(soon_eliminated.values) > 1
|
|
889
885
|
|
|
@@ -891,29 +887,13 @@ def _eliminate(
|
|
|
891
887
|
_unary_union_for_notna
|
|
892
888
|
)
|
|
893
889
|
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
soon_eliminated,
|
|
898
|
-
level=0,
|
|
899
|
-
grid_size=grid_size,
|
|
900
|
-
n_jobs=n_jobs,
|
|
901
|
-
),
|
|
902
|
-
index=eliminated.index,
|
|
903
|
-
)
|
|
904
|
-
else:
|
|
905
|
-
eliminated["geometry"] = _grouped_unary_union(soon_eliminated, level=0)
|
|
906
|
-
# eliminated["geometry"] = soon_eliminated.groupby(level=0).agg(
|
|
907
|
-
# lambda x: make_valid(unary_union(x))
|
|
908
|
-
# )
|
|
909
|
-
|
|
890
|
+
eliminated["geometry"] = union_runner.run(
|
|
891
|
+
soon_eliminated, level=0, grid_size=grid_size
|
|
892
|
+
)
|
|
910
893
|
else:
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
)
|
|
915
|
-
else:
|
|
916
|
-
eliminated["geometry"] = _grouped_unary_union(many_hits, by="_dissolve_idx")
|
|
894
|
+
eliminated["geometry"] = union_runner.run(
|
|
895
|
+
many_hits, by="_dissolve_idx", grid_size=grid_size, n_jobs=n_jobs
|
|
896
|
+
)
|
|
917
897
|
|
|
918
898
|
# setting crs on the GeometryArrays to avoid warning in concat
|
|
919
899
|
not_to_dissolve.geometry.values.crs = crs
|
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
import functools
|
|
2
|
+
from abc import ABC
|
|
3
|
+
from abc import abstractmethod
|
|
4
|
+
from collections.abc import Callable
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
import joblib
|
|
9
|
+
import numpy as np
|
|
10
|
+
import pandas as pd
|
|
11
|
+
from geopandas import GeoDataFrame
|
|
12
|
+
from geopandas import GeoSeries
|
|
13
|
+
from shapely import Geometry
|
|
14
|
+
from shapely import STRtree
|
|
15
|
+
from shapely import get_parts
|
|
16
|
+
from shapely import make_valid
|
|
17
|
+
from shapely import union_all
|
|
18
|
+
from shapely.errors import GEOSException
|
|
19
|
+
|
|
20
|
+
from .utils import _unary_union_for_notna
|
|
21
|
+
from .utils import make_valid_and_keep_geom_type
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class AbstractRunner(ABC):
|
|
26
|
+
"""Blueprint for 'runner' classes.
|
|
27
|
+
|
|
28
|
+
Subclasses must implement a 'run' method.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
n_jobs: Number of workers.
|
|
32
|
+
backend: Backend for the workers.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
n_jobs: int
|
|
36
|
+
backend: str | None = None
|
|
37
|
+
|
|
38
|
+
@abstractmethod
|
|
39
|
+
def run(self, *args, **kwargs) -> Any:
|
|
40
|
+
"""Abstract run method."""
|
|
41
|
+
|
|
42
|
+
def __str__(self) -> str:
|
|
43
|
+
"""String representation."""
|
|
44
|
+
return (
|
|
45
|
+
f"{self.__class__.__name__}(n_jobs={self.n_jobs}, backend='{self.backend}')"
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass
|
|
50
|
+
class UnionRunner(AbstractRunner):
|
|
51
|
+
"""Run shapely.union_all with pandas.groupby.
|
|
52
|
+
|
|
53
|
+
Subclasses must implement a 'run' method that takes the arguments
|
|
54
|
+
'df' (GeoDataFrame or GeoSeries), 'by' (optional column to group by), 'grid_size'
|
|
55
|
+
(passed to shapely.union_all) and **kwargs passed to pandas.DataFrame.groupby.
|
|
56
|
+
Defaults to None, meaning the default runner with number of workers set
|
|
57
|
+
to 'n_jobs'.
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
n_jobs: Number of workers.
|
|
62
|
+
backend: Backend for the workers.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
n_jobs: int
|
|
66
|
+
backend: str | None = None
|
|
67
|
+
|
|
68
|
+
def run(
|
|
69
|
+
self,
|
|
70
|
+
df: GeoDataFrame | GeoSeries | pd.DataFrame | pd.Series,
|
|
71
|
+
by: str | list[str] | None = None,
|
|
72
|
+
grid_size: float | int | None = None,
|
|
73
|
+
**kwargs,
|
|
74
|
+
) -> GeoSeries | GeoDataFrame:
|
|
75
|
+
"""Run groupby on geometries in parallel (if n_jobs > 1)."""
|
|
76
|
+
# assume geometry column is 'geometry' if input is pandas.Series og pandas.DataFrame
|
|
77
|
+
try:
|
|
78
|
+
geom_col: str = df.geometry.name
|
|
79
|
+
except AttributeError:
|
|
80
|
+
try:
|
|
81
|
+
geom_col: str | None = df.name
|
|
82
|
+
if geom_col is None:
|
|
83
|
+
geom_col = "geometry"
|
|
84
|
+
except AttributeError:
|
|
85
|
+
geom_col = "geometry"
|
|
86
|
+
try:
|
|
87
|
+
crs = df.crs
|
|
88
|
+
except AttributeError:
|
|
89
|
+
crs = None
|
|
90
|
+
|
|
91
|
+
unary_union_for_grid_size = functools.partial(
|
|
92
|
+
_unary_union_for_notna, grid_size=grid_size
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
as_index = kwargs.pop("as_index", True)
|
|
96
|
+
if by is None and "level" not in kwargs:
|
|
97
|
+
by = np.zeros(len(df), dtype="int64")
|
|
98
|
+
|
|
99
|
+
try:
|
|
100
|
+
# (Geo)DataFrame
|
|
101
|
+
groupby_obj = df.groupby(by, **kwargs)[geom_col]
|
|
102
|
+
except KeyError:
|
|
103
|
+
# (Geo)Series
|
|
104
|
+
groupby_obj = df.groupby(by, **kwargs)
|
|
105
|
+
|
|
106
|
+
if self.n_jobs is None or self.n_jobs == 1:
|
|
107
|
+
results = groupby_obj.agg(unary_union_for_grid_size)
|
|
108
|
+
index = results.index
|
|
109
|
+
else:
|
|
110
|
+
backend = self.backend or "loky"
|
|
111
|
+
with joblib.Parallel(n_jobs=self.n_jobs, backend=backend) as parallel:
|
|
112
|
+
results = parallel(
|
|
113
|
+
joblib.delayed(unary_union_for_grid_size)(geoms)
|
|
114
|
+
for _, geoms in groupby_obj
|
|
115
|
+
)
|
|
116
|
+
index = groupby_obj.size().index
|
|
117
|
+
agged = GeoSeries(results, index=index, name=geom_col, crs=crs)
|
|
118
|
+
if not as_index:
|
|
119
|
+
return agged.reset_index()
|
|
120
|
+
return agged
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _strtree_query(
|
|
124
|
+
arr1: np.ndarray,
|
|
125
|
+
arr2: np.ndarray,
|
|
126
|
+
method: str,
|
|
127
|
+
indices1: np.ndarray | None = None,
|
|
128
|
+
indices2: np.ndarray | None = None,
|
|
129
|
+
**kwargs,
|
|
130
|
+
):
|
|
131
|
+
tree = STRtree(arr2)
|
|
132
|
+
func = getattr(tree, method)
|
|
133
|
+
left, right = func(arr1, **kwargs)
|
|
134
|
+
if indices1 is not None:
|
|
135
|
+
index_mapper1 = {i: x for i, x in enumerate(indices1)}
|
|
136
|
+
left = np.array([index_mapper1[i] for i in left])
|
|
137
|
+
if indices2 is not None:
|
|
138
|
+
index_mapper2 = {i: x for i, x in enumerate(indices2)}
|
|
139
|
+
right = np.array([index_mapper2[i] for i in right])
|
|
140
|
+
return left, right
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
@dataclass
|
|
144
|
+
class RTreeQueryRunner(AbstractRunner):
|
|
145
|
+
"""Run shapely.STRTree chunkwise.
|
|
146
|
+
|
|
147
|
+
Subclasses must implement a 'query' method that takes a numpy.ndarray
|
|
148
|
+
of geometries as 0th and 1st argument and **kwargs passed to the query method,
|
|
149
|
+
chiefly 'predicate' and 'distance'. The 'query' method should return a tuple
|
|
150
|
+
of two arrays representing the spatial index pairs of the left and right input arrays.
|
|
151
|
+
Defaults to None, meaning the default runner with number of workers set
|
|
152
|
+
to 'n_jobs'.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
n_jobs: Number of workers.
|
|
156
|
+
backend: Backend for the workers.
|
|
157
|
+
"""
|
|
158
|
+
|
|
159
|
+
n_jobs: int
|
|
160
|
+
backend: str = "loky"
|
|
161
|
+
|
|
162
|
+
def run(
|
|
163
|
+
self, arr1: np.ndarray, arr2: np.ndarray, method: str = "query", **kwargs
|
|
164
|
+
) -> tuple[np.ndarray, np.ndarray]:
|
|
165
|
+
"""Run a spatial rtree query and return indices of hits from arr1 and arr2 in a tuple of two arrays."""
|
|
166
|
+
if (
|
|
167
|
+
(self.n_jobs or 1) > 1
|
|
168
|
+
and len(arr1) / self.n_jobs > 10_000
|
|
169
|
+
and len(arr1) / len(arr2)
|
|
170
|
+
):
|
|
171
|
+
chunks = np.array_split(np.arange(len(arr1)), self.n_jobs)
|
|
172
|
+
assert sum(len(x) for x in chunks) == len(arr1)
|
|
173
|
+
with joblib.Parallel(self.n_jobs, backend=self.backend) as parallel:
|
|
174
|
+
results = parallel(
|
|
175
|
+
joblib.delayed(_strtree_query)(
|
|
176
|
+
arr1[chunk],
|
|
177
|
+
arr2,
|
|
178
|
+
method=method,
|
|
179
|
+
indices1=chunk,
|
|
180
|
+
**kwargs,
|
|
181
|
+
)
|
|
182
|
+
for chunk in chunks
|
|
183
|
+
)
|
|
184
|
+
left = np.concatenate([x[0] for x in results])
|
|
185
|
+
right = np.concatenate([x[1] for x in results])
|
|
186
|
+
return left, right
|
|
187
|
+
elif (
|
|
188
|
+
(self.n_jobs or 1) > 1
|
|
189
|
+
and len(arr2) / self.n_jobs > 10_000
|
|
190
|
+
and len(arr2) / len(arr1)
|
|
191
|
+
):
|
|
192
|
+
chunks = np.array_split(np.arange(len(arr2)), self.n_jobs)
|
|
193
|
+
with joblib.Parallel(self.n_jobs, backend=self.backend) as parallel:
|
|
194
|
+
results = parallel(
|
|
195
|
+
joblib.delayed(_strtree_query)(
|
|
196
|
+
arr1,
|
|
197
|
+
arr2[chunk],
|
|
198
|
+
method=method,
|
|
199
|
+
indices2=chunk,
|
|
200
|
+
**kwargs,
|
|
201
|
+
)
|
|
202
|
+
for chunk in chunks
|
|
203
|
+
)
|
|
204
|
+
left = np.concatenate([x[0] for x in results])
|
|
205
|
+
right = np.concatenate([x[1] for x in results])
|
|
206
|
+
return left, right
|
|
207
|
+
|
|
208
|
+
return _strtree_query(arr1, arr2, method=method, **kwargs)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
@dataclass
|
|
212
|
+
class OverlayRunner(AbstractRunner):
|
|
213
|
+
"""Run a vectorized shapely overlay operation on two equal-length numpy arrays.
|
|
214
|
+
|
|
215
|
+
Subclasses must implement a 'run' method that takes an overlay function (shapely.intersection, shapely.difference etc.)
|
|
216
|
+
as 0th argument and two numpy.ndarrays of same length as 1st and 2nd argument.
|
|
217
|
+
The 'run' method should also take the argument 'grid_size' to be passed to the overlay function
|
|
218
|
+
and the argument 'geom_type' which is used to keep only relevant geometries (polygon, line or point)
|
|
219
|
+
in cases of GEOSExceptions caused by geometry type mismatch.
|
|
220
|
+
Defaults to an instance of OverlayRunner, which is run sequencially (no n_jobs)
|
|
221
|
+
because the vectorized shapely functions are usually faster than any attempt to parallelize.
|
|
222
|
+
"""
|
|
223
|
+
|
|
224
|
+
n_jobs: None = None
|
|
225
|
+
backend: None = None
|
|
226
|
+
|
|
227
|
+
def run(
|
|
228
|
+
self,
|
|
229
|
+
func: Callable,
|
|
230
|
+
arr1: np.ndarray,
|
|
231
|
+
arr2: np.ndarray,
|
|
232
|
+
grid_size: int | float | None,
|
|
233
|
+
geom_type: str | None,
|
|
234
|
+
) -> np.ndarray:
|
|
235
|
+
"""Run the overlay operation (func) with fallback.
|
|
236
|
+
|
|
237
|
+
First tries to run func, then, if GEOSException, geometries are made valid
|
|
238
|
+
and only geometries with correct geom_type (point, line, polygon) are kept
|
|
239
|
+
in GeometryCollections.
|
|
240
|
+
"""
|
|
241
|
+
try:
|
|
242
|
+
return func(arr1, arr2, grid_size=grid_size)
|
|
243
|
+
except GEOSException:
|
|
244
|
+
arr1 = make_valid_and_keep_geom_type(arr1, geom_type=geom_type)
|
|
245
|
+
arr2 = make_valid_and_keep_geom_type(arr2, geom_type=geom_type)
|
|
246
|
+
arr1 = arr1.loc[lambda x: x.index.isin(arr2.index)].to_numpy()
|
|
247
|
+
arr2 = arr2.loc[lambda x: x.index.isin(arr1.index)].to_numpy()
|
|
248
|
+
return func(arr1, arr2, grid_size=grid_size)
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
@dataclass
|
|
252
|
+
class GridSizeOverlayRunner(OverlayRunner):
|
|
253
|
+
"""Run a shapely overlay operation rowwise for different grid_sizes until success."""
|
|
254
|
+
|
|
255
|
+
n_jobs: int
|
|
256
|
+
backend: str | None
|
|
257
|
+
grid_sizes: list[float | int] | None = None
|
|
258
|
+
|
|
259
|
+
def __post_init__(self) -> None:
|
|
260
|
+
"""Check that grid_sizes is passed."""
|
|
261
|
+
if self.grid_sizes is None:
|
|
262
|
+
raise ValueError(
|
|
263
|
+
f"must set 'grid_sizes' in the {self.__class__.__name__} initialiser."
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
def run(
|
|
267
|
+
self,
|
|
268
|
+
func: Callable,
|
|
269
|
+
arr1: np.ndarray,
|
|
270
|
+
arr2: np.ndarray,
|
|
271
|
+
grid_size: int | float | None = None,
|
|
272
|
+
geom_type: str | None = None,
|
|
273
|
+
) -> np.ndarray:
|
|
274
|
+
"""Run the overlay operation rowwise with fallback.
|
|
275
|
+
|
|
276
|
+
The overlay operation (func) is looped for each row in arr1 and arr2
|
|
277
|
+
as 0th and 1st argument to 'func' and 'grid_size' as keyword argument. If a GEOSException is thrown,
|
|
278
|
+
geometries are made valid and GeometryCollections are forced to either
|
|
279
|
+
(Multi)Point, (Multi)Polygon or (Multi)LineString, depending on the value in "geom_type".
|
|
280
|
+
Then, if Another GEOSException is thrown, the overlay operation is looped for the grid_sizes given
|
|
281
|
+
in the instance's 'grid_sizes' attribute.
|
|
282
|
+
|
|
283
|
+
"""
|
|
284
|
+
kwargs = dict(
|
|
285
|
+
grid_size=grid_size,
|
|
286
|
+
geom_type=geom_type.lower() if geom_type is not None else None,
|
|
287
|
+
grid_sizes=self.grid_sizes,
|
|
288
|
+
)
|
|
289
|
+
with joblib.Parallel(self.n_jobs, backend="threading") as parallel:
|
|
290
|
+
return parallel(
|
|
291
|
+
joblib.delayed(_run_overlay_rowwise)(func, g1, g2, **kwargs)
|
|
292
|
+
for g1, g2 in zip(arr1, arr2, strict=True)
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def _fix_gemetry_fast(geom: Geometry, geom_type: str | None) -> Geometry:
|
|
297
|
+
geom = make_valid(geom)
|
|
298
|
+
if geom.geom_type == geom_type or geom_type is None:
|
|
299
|
+
return geom
|
|
300
|
+
return union_all([g for g in get_parts(geom) if geom_type in g.geom_type])
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def _run_overlay_rowwise(
|
|
304
|
+
func: Callable,
|
|
305
|
+
geom1: Geometry,
|
|
306
|
+
geom2: Geometry,
|
|
307
|
+
grid_size: float | int | None,
|
|
308
|
+
geom_type: str | None,
|
|
309
|
+
grid_sizes: list[float | int],
|
|
310
|
+
) -> Geometry:
|
|
311
|
+
try:
|
|
312
|
+
return func(geom1, geom2, grid_size=grid_size)
|
|
313
|
+
except GEOSException:
|
|
314
|
+
pass
|
|
315
|
+
geom1 = _fix_gemetry_fast(geom1, geom_type)
|
|
316
|
+
geom2 = _fix_gemetry_fast(geom2, geom_type)
|
|
317
|
+
try:
|
|
318
|
+
return func(geom1, geom2)
|
|
319
|
+
except GEOSException:
|
|
320
|
+
pass
|
|
321
|
+
for i, grid_size in enumerate(grid_sizes):
|
|
322
|
+
try:
|
|
323
|
+
return func(geom1, geom2, grid_size=grid_size)
|
|
324
|
+
except GEOSException as e:
|
|
325
|
+
if i == len(grid_sizes) - 1:
|
|
326
|
+
raise e
|