ssb-sgis 1.0.3__py3-none-any.whl → 1.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sgis/__init__.py +10 -3
- sgis/debug_config.py +24 -0
- sgis/geopandas_tools/bounds.py +16 -21
- sgis/geopandas_tools/buffer_dissolve_explode.py +112 -30
- sgis/geopandas_tools/centerlines.py +4 -91
- sgis/geopandas_tools/cleaning.py +1576 -583
- sgis/geopandas_tools/conversion.py +24 -14
- sgis/geopandas_tools/duplicates.py +27 -6
- sgis/geopandas_tools/general.py +259 -100
- sgis/geopandas_tools/geometry_types.py +1 -1
- sgis/geopandas_tools/neighbors.py +16 -12
- sgis/geopandas_tools/overlay.py +7 -3
- sgis/geopandas_tools/point_operations.py +3 -3
- sgis/geopandas_tools/polygon_operations.py +505 -100
- sgis/geopandas_tools/polygons_as_rings.py +40 -8
- sgis/geopandas_tools/sfilter.py +26 -9
- sgis/io/dapla_functions.py +238 -19
- sgis/maps/examine.py +11 -10
- sgis/maps/explore.py +227 -155
- sgis/maps/legend.py +13 -4
- sgis/maps/map.py +22 -13
- sgis/maps/maps.py +100 -29
- sgis/maps/thematicmap.py +25 -18
- sgis/networkanalysis/_service_area.py +6 -1
- sgis/networkanalysis/cutting_lines.py +12 -5
- sgis/networkanalysis/finding_isolated_networks.py +13 -6
- sgis/networkanalysis/networkanalysis.py +10 -12
- sgis/parallel/parallel.py +27 -10
- sgis/raster/base.py +208 -0
- sgis/raster/cube.py +3 -3
- sgis/raster/image_collection.py +1421 -724
- sgis/raster/indices.py +10 -7
- sgis/raster/raster.py +7 -7
- sgis/raster/sentinel_config.py +33 -17
- {ssb_sgis-1.0.3.dist-info → ssb_sgis-1.0.5.dist-info}/METADATA +6 -7
- ssb_sgis-1.0.5.dist-info/RECORD +62 -0
- ssb_sgis-1.0.3.dist-info/RECORD +0 -61
- {ssb_sgis-1.0.3.dist-info → ssb_sgis-1.0.5.dist-info}/LICENSE +0 -0
- {ssb_sgis-1.0.3.dist-info → ssb_sgis-1.0.5.dist-info}/WHEEL +0 -0
|
@@ -91,7 +91,7 @@ def to_shapely(obj: Any) -> Geometry:
|
|
|
91
91
|
if not hasattr(obj, "__iter__"):
|
|
92
92
|
raise TypeError(type(obj))
|
|
93
93
|
try:
|
|
94
|
-
return obj.
|
|
94
|
+
return shapely.union_all(obj.geometry.values)
|
|
95
95
|
except AttributeError:
|
|
96
96
|
pass
|
|
97
97
|
try:
|
|
@@ -110,7 +110,7 @@ def to_shapely(obj: Any) -> Geometry:
|
|
|
110
110
|
return shapely.wkb.loads(obj)
|
|
111
111
|
except TypeError:
|
|
112
112
|
pass
|
|
113
|
-
raise TypeError(type(obj))
|
|
113
|
+
raise TypeError(type(obj), obj)
|
|
114
114
|
|
|
115
115
|
|
|
116
116
|
def to_bbox(
|
|
@@ -136,20 +136,30 @@ def to_bbox(
|
|
|
136
136
|
pass
|
|
137
137
|
|
|
138
138
|
try:
|
|
139
|
-
minx =
|
|
140
|
-
miny =
|
|
141
|
-
maxx =
|
|
142
|
-
maxy =
|
|
139
|
+
minx = float(np.min(obj["minx"])) # type: ignore [index]
|
|
140
|
+
miny = float(np.min(obj["miny"])) # type: ignore [index]
|
|
141
|
+
maxx = float(np.max(obj["maxx"])) # type: ignore [index]
|
|
142
|
+
maxy = float(np.max(obj["maxy"])) # type: ignore [index]
|
|
143
143
|
return minx, miny, maxx, maxy
|
|
144
144
|
except Exception:
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
145
|
+
pass
|
|
146
|
+
try:
|
|
147
|
+
minx = float(np.min(obj.minx)) # type: ignore [union-attr]
|
|
148
|
+
miny = float(np.min(obj.miny)) # type: ignore [union-attr]
|
|
149
|
+
maxx = float(np.max(obj.maxx)) # type: ignore [union-attr]
|
|
150
|
+
maxy = float(np.max(obj.maxy)) # type: ignore [union-attr]
|
|
151
|
+
return minx, miny, maxx, maxy
|
|
152
|
+
except Exception:
|
|
153
|
+
pass
|
|
154
|
+
|
|
155
|
+
try:
|
|
156
|
+
minx = float(np.min(obj["west_longitude"])) # type: ignore [index]
|
|
157
|
+
miny = float(np.min(obj["south_latitude"])) # type: ignore [index]
|
|
158
|
+
maxx = float(np.max(obj["east_longitude"])) # type: ignore [index]
|
|
159
|
+
maxy = float(np.max(obj["north_latitude"])) # type: ignore [index]
|
|
160
|
+
return minx, miny, maxx, maxy
|
|
161
|
+
except Exception:
|
|
162
|
+
pass
|
|
153
163
|
|
|
154
164
|
if hasattr(obj, "geometry"):
|
|
155
165
|
try:
|
|
@@ -8,10 +8,10 @@ from shapely import STRtree
|
|
|
8
8
|
from shapely import difference
|
|
9
9
|
from shapely import make_valid
|
|
10
10
|
from shapely import simplify
|
|
11
|
-
from shapely import unary_union
|
|
12
11
|
from shapely.errors import GEOSException
|
|
13
12
|
|
|
14
13
|
from .general import _determine_geom_type_args
|
|
14
|
+
from .general import _grouped_unary_union
|
|
15
15
|
from .general import _parallel_unary_union_geoseries
|
|
16
16
|
from .general import _push_geom_col
|
|
17
17
|
from .general import clean_geoms
|
|
@@ -125,10 +125,8 @@ def update_geometries(
|
|
|
125
125
|
else:
|
|
126
126
|
only_one = erasers.groupby(level=0).transform("size") == 1
|
|
127
127
|
one_hit = erasers[only_one]
|
|
128
|
-
many_hits = (
|
|
129
|
-
erasers[~only_one]
|
|
130
|
-
.groupby(level=0)
|
|
131
|
-
.agg(lambda x: make_valid(unary_union(x, grid_size=grid_size)))
|
|
128
|
+
many_hits = _grouped_unary_union(
|
|
129
|
+
erasers[~only_one], level=0, grid_size=grid_size
|
|
132
130
|
)
|
|
133
131
|
erasers = pd.concat([one_hit, many_hits]).sort_index()
|
|
134
132
|
|
|
@@ -357,10 +355,33 @@ def _get_intersecting_geometries(
|
|
|
357
355
|
|
|
358
356
|
duplicated_points = points_joined.loc[points_joined.index.duplicated(keep=False)]
|
|
359
357
|
|
|
360
|
-
|
|
358
|
+
out = intersected.loc[intersected.index.isin(duplicated_points.index)].drop(
|
|
361
359
|
columns=["idx_left", "idx_right"]
|
|
362
360
|
)
|
|
363
361
|
|
|
362
|
+
# some polygons within polygons are not counted in the
|
|
363
|
+
within = (
|
|
364
|
+
gdf.assign(_range_idx_inters_left=lambda x: range(len(x)))
|
|
365
|
+
.sjoin(
|
|
366
|
+
GeoDataFrame(
|
|
367
|
+
{
|
|
368
|
+
"geometry": gdf.buffer(1e-6).values,
|
|
369
|
+
"_range_idx_inters_right": range(len(gdf)),
|
|
370
|
+
},
|
|
371
|
+
crs=gdf.crs,
|
|
372
|
+
),
|
|
373
|
+
how="inner",
|
|
374
|
+
predicate="within",
|
|
375
|
+
)
|
|
376
|
+
.loc[lambda x: x["_range_idx_inters_left"] != x["_range_idx_inters_right"]]
|
|
377
|
+
.drop(
|
|
378
|
+
columns=["index_right", "_range_idx_inters_left", "_range_idx_inters_right"]
|
|
379
|
+
)
|
|
380
|
+
.pipe(sfilter_inverse, out.buffer(-PRECISION))
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
return pd.concat([out, within])
|
|
384
|
+
|
|
364
385
|
|
|
365
386
|
def _drop_duplicate_geometries(gdf: GeoDataFrame, **kwargs) -> GeoDataFrame:
|
|
366
387
|
"""Drop geometries that are considered equal.
|
sgis/geopandas_tools/general.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import functools
|
|
2
|
+
import itertools
|
|
1
3
|
import numbers
|
|
2
4
|
import warnings
|
|
3
5
|
from collections.abc import Hashable
|
|
@@ -8,12 +10,14 @@ import joblib
|
|
|
8
10
|
import numpy as np
|
|
9
11
|
import pandas as pd
|
|
10
12
|
import pyproj
|
|
13
|
+
import shapely
|
|
11
14
|
from geopandas import GeoDataFrame
|
|
12
15
|
from geopandas import GeoSeries
|
|
13
16
|
from geopandas.array import GeometryArray
|
|
14
17
|
from geopandas.array import GeometryDtype
|
|
15
18
|
from numpy.typing import NDArray
|
|
16
19
|
from shapely import Geometry
|
|
20
|
+
from shapely import extract_unique_points
|
|
17
21
|
from shapely import get_coordinates
|
|
18
22
|
from shapely import get_exterior_ring
|
|
19
23
|
from shapely import get_interior_ring
|
|
@@ -22,15 +26,16 @@ from shapely import get_parts
|
|
|
22
26
|
from shapely import linestrings
|
|
23
27
|
from shapely import make_valid
|
|
24
28
|
from shapely import points as shapely_points
|
|
25
|
-
from shapely import
|
|
29
|
+
from shapely import union_all
|
|
26
30
|
from shapely.geometry import LineString
|
|
31
|
+
from shapely.geometry import MultiPoint
|
|
27
32
|
from shapely.geometry import Point
|
|
33
|
+
from shapely.geometry import Polygon
|
|
28
34
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
35
|
+
from .conversion import coordinate_array
|
|
36
|
+
from .conversion import to_bbox
|
|
37
|
+
from .conversion import to_gdf
|
|
38
|
+
from .conversion import to_geoseries
|
|
34
39
|
from .geometry_types import get_geom_type
|
|
35
40
|
from .geometry_types import make_all_singlepart
|
|
36
41
|
from .geometry_types import to_single_geom_type
|
|
@@ -38,7 +43,7 @@ from .geometry_types import to_single_geom_type
|
|
|
38
43
|
|
|
39
44
|
def split_geom_types(gdf: GeoDataFrame | GeoSeries) -> tuple[GeoDataFrame | GeoSeries]:
|
|
40
45
|
return tuple(
|
|
41
|
-
gdf
|
|
46
|
+
gdf[gdf.geom_type == geom_type] for geom_type in gdf.geom_type.unique()
|
|
42
47
|
)
|
|
43
48
|
|
|
44
49
|
|
|
@@ -385,35 +390,27 @@ def sort_small_first(gdf: GeoDataFrame | GeoSeries) -> GeoDataFrame | GeoSeries:
|
|
|
385
390
|
|
|
386
391
|
|
|
387
392
|
def make_lines_between_points(
|
|
388
|
-
|
|
389
|
-
arr2: NDArray[Point] | GeometryArray | GeoSeries,
|
|
393
|
+
*arrs: NDArray[Point] | GeometryArray | GeoSeries,
|
|
390
394
|
) -> NDArray[LineString]:
|
|
391
|
-
"""Creates an array of linestrings from two arrays of points.
|
|
395
|
+
"""Creates an array of linestrings from two or more arrays of points.
|
|
392
396
|
|
|
393
|
-
The
|
|
397
|
+
The lines are created rowwise, meaning from arr0[0] to arr1[0], from arr0[1] to arr1[1]...
|
|
398
|
+
If more than two arrays are passed, e.g. three arrays,
|
|
399
|
+
the lines will go from arr0[0] via arr1[0] to arr2[0].
|
|
394
400
|
|
|
395
401
|
Args:
|
|
396
|
-
|
|
397
|
-
|
|
402
|
+
arrs: 1 dimensional arrays of point geometries.
|
|
403
|
+
All arrays must have the same shape.
|
|
404
|
+
Must be at least two arrays.
|
|
398
405
|
|
|
399
406
|
Returns:
|
|
400
407
|
A numpy array of linestrings.
|
|
401
408
|
|
|
402
|
-
Raises:
|
|
403
|
-
ValueError: If the arrays have unequal shape.
|
|
404
|
-
|
|
405
409
|
"""
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
[
|
|
411
|
-
pd.DataFrame(get_coordinates(arr1), columns=["x", "y"]),
|
|
412
|
-
pd.DataFrame(get_coordinates(arr2), columns=["x", "y"]),
|
|
413
|
-
]
|
|
414
|
-
).sort_index()
|
|
415
|
-
|
|
416
|
-
return linestrings(coords.values, indices=coords.index)
|
|
410
|
+
coords = [get_coordinates(arr, return_index=False) for arr in arrs]
|
|
411
|
+
return linestrings(
|
|
412
|
+
np.concatenate([coords_arr[:, None, :] for coords_arr in coords], axis=1)
|
|
413
|
+
)
|
|
417
414
|
|
|
418
415
|
|
|
419
416
|
def random_points(n: int, loc: float | int = 0.5) -> GeoDataFrame:
|
|
@@ -563,7 +560,9 @@ def to_lines(*gdfs: GeoDataFrame, copy: bool = True) -> GeoDataFrame:
|
|
|
563
560
|
raise TypeError("gdf must be GeoDataFrame or GeoSeries")
|
|
564
561
|
|
|
565
562
|
if any(gdf.geom_type.isin(["Point", "MultiPoint"]).any() for gdf in gdfs):
|
|
566
|
-
raise ValueError(
|
|
563
|
+
raise ValueError(
|
|
564
|
+
f"Cannot convert points to lines. {[gdf.geom_type.value_counts() for gdf in gdfs]}"
|
|
565
|
+
)
|
|
567
566
|
|
|
568
567
|
def _shapely_geometry_to_lines(geom):
|
|
569
568
|
"""Get all lines from the exterior and interiors of a Polygon."""
|
|
@@ -587,7 +586,7 @@ def to_lines(*gdfs: GeoDataFrame, copy: bool = True) -> GeoDataFrame:
|
|
|
587
586
|
|
|
588
587
|
lines += interior_rings
|
|
589
588
|
|
|
590
|
-
return
|
|
589
|
+
return union_all(lines)
|
|
591
590
|
|
|
592
591
|
lines = []
|
|
593
592
|
for gdf in gdfs:
|
|
@@ -677,6 +676,162 @@ def clean_clip(
|
|
|
677
676
|
return gdf
|
|
678
677
|
|
|
679
678
|
|
|
679
|
+
def extend_lines(arr1, arr2, distance) -> NDArray[LineString]:
|
|
680
|
+
if len(arr1) != len(arr2):
|
|
681
|
+
raise ValueError
|
|
682
|
+
if not len(arr1):
|
|
683
|
+
return arr1
|
|
684
|
+
|
|
685
|
+
arr1, arr2 = arr2, arr1 # TODO fix
|
|
686
|
+
|
|
687
|
+
coords1 = coordinate_array(arr1)
|
|
688
|
+
coords2 = coordinate_array(arr2)
|
|
689
|
+
|
|
690
|
+
dx = coords2[:, 0] - coords1[:, 0]
|
|
691
|
+
dy = coords2[:, 1] - coords1[:, 1]
|
|
692
|
+
len_xy = np.sqrt((dx**2.0) + (dy**2.0))
|
|
693
|
+
x = coords1[:, 0] + (coords1[:, 0] - coords2[:, 0]) / len_xy * distance
|
|
694
|
+
y = coords1[:, 1] + (coords1[:, 1] - coords2[:, 1]) / len_xy * distance
|
|
695
|
+
|
|
696
|
+
new_points = np.array([None for _ in range(len(arr1))])
|
|
697
|
+
new_points[~np.isnan(x)] = shapely.points(x[~np.isnan(x)], y[~np.isnan(x)])
|
|
698
|
+
|
|
699
|
+
new_points[~np.isnan(x)] = make_lines_between_points(
|
|
700
|
+
arr2[~np.isnan(x)], new_points[~np.isnan(x)]
|
|
701
|
+
)
|
|
702
|
+
return new_points
|
|
703
|
+
|
|
704
|
+
|
|
705
|
+
def multipoints_to_line_segments_numpy(
|
|
706
|
+
points: GeoSeries | NDArray[MultiPoint] | MultiPoint,
|
|
707
|
+
cycle: bool = False,
|
|
708
|
+
) -> list[LineString]:
|
|
709
|
+
try:
|
|
710
|
+
arr = get_parts(points.geometry.values)
|
|
711
|
+
except AttributeError:
|
|
712
|
+
arr = get_parts(points)
|
|
713
|
+
|
|
714
|
+
line_between_last_and_first = [LineString([arr[-1], arr[0]])] if cycle else []
|
|
715
|
+
return [
|
|
716
|
+
LineString([p0, p1]) for p0, p1 in itertools.pairwise(arr)
|
|
717
|
+
] + line_between_last_and_first
|
|
718
|
+
|
|
719
|
+
|
|
720
|
+
def multipoints_to_line_segments(
|
|
721
|
+
multipoints: GeoSeries | GeoDataFrame, cycle: bool = True # to_next: bool = True,
|
|
722
|
+
) -> GeoSeries | GeoDataFrame:
|
|
723
|
+
|
|
724
|
+
if not len(multipoints):
|
|
725
|
+
return multipoints
|
|
726
|
+
|
|
727
|
+
if isinstance(multipoints, GeoDataFrame):
|
|
728
|
+
df = multipoints.drop(columns=multipoints.geometry.name)
|
|
729
|
+
multipoints = multipoints.geometry
|
|
730
|
+
was_gdf = True
|
|
731
|
+
else:
|
|
732
|
+
multipoints = to_geoseries(multipoints)
|
|
733
|
+
was_gdf = False
|
|
734
|
+
|
|
735
|
+
multipoints = to_geoseries(multipoints)
|
|
736
|
+
|
|
737
|
+
segs = pd.Series(
|
|
738
|
+
[
|
|
739
|
+
multipoints_to_line_segments_numpy(geoms, cycle=cycle)
|
|
740
|
+
for geoms in multipoints
|
|
741
|
+
],
|
|
742
|
+
index=multipoints.index,
|
|
743
|
+
).explode()
|
|
744
|
+
|
|
745
|
+
segs = GeoSeries(segs, crs=multipoints.crs, name=multipoints.name)
|
|
746
|
+
|
|
747
|
+
if was_gdf:
|
|
748
|
+
return GeoDataFrame(df.join(segs), geometry=segs.name, crs=segs.crs)
|
|
749
|
+
else:
|
|
750
|
+
return segs
|
|
751
|
+
|
|
752
|
+
|
|
753
|
+
def get_line_segments(
|
|
754
|
+
lines: GeoDataFrame | GeoSeries, extract_unique: bool = False, cycle=False
|
|
755
|
+
) -> GeoDataFrame:
|
|
756
|
+
try:
|
|
757
|
+
assert lines.index.is_unique
|
|
758
|
+
except AttributeError:
|
|
759
|
+
pass
|
|
760
|
+
|
|
761
|
+
if isinstance(lines, GeoDataFrame):
|
|
762
|
+
df = lines.drop(columns=lines.geometry.name)
|
|
763
|
+
lines = lines.geometry
|
|
764
|
+
was_gdf = True
|
|
765
|
+
else:
|
|
766
|
+
lines = to_geoseries(lines)
|
|
767
|
+
was_gdf = False
|
|
768
|
+
|
|
769
|
+
partial_segs_func = functools.partial(
|
|
770
|
+
multipoints_to_line_segments_numpy, cycle=cycle
|
|
771
|
+
)
|
|
772
|
+
if extract_unique:
|
|
773
|
+
points = extract_unique_points(lines.geometry.values)
|
|
774
|
+
segs = pd.Series(
|
|
775
|
+
[partial_segs_func(geoms) for geoms in points],
|
|
776
|
+
index=lines.index,
|
|
777
|
+
).explode()
|
|
778
|
+
else:
|
|
779
|
+
coords, indices = shapely.get_coordinates(lines, return_index=True)
|
|
780
|
+
points = GeoSeries(shapely.points(coords), index=indices)
|
|
781
|
+
index_mapper = {
|
|
782
|
+
i: idx
|
|
783
|
+
for i, idx in zip(
|
|
784
|
+
np.unique(indices), lines.index.drop_duplicates(), strict=True
|
|
785
|
+
)
|
|
786
|
+
}
|
|
787
|
+
points.index = points.index.map(index_mapper)
|
|
788
|
+
|
|
789
|
+
segs = points.groupby(level=0).agg(partial_segs_func).explode()
|
|
790
|
+
segs = GeoSeries(segs, crs=lines.crs, name=lines.name)
|
|
791
|
+
|
|
792
|
+
if was_gdf:
|
|
793
|
+
return GeoDataFrame(df.join(segs), geometry=segs.name, crs=lines.crs)
|
|
794
|
+
else:
|
|
795
|
+
return segs
|
|
796
|
+
|
|
797
|
+
|
|
798
|
+
def get_index_right_columns(gdf: pd.DataFrame | pd.Series) -> list[str]:
|
|
799
|
+
"""Get a list of what will be the resulting columns in an sjoin."""
|
|
800
|
+
if gdf.index.name is None and all(name is None for name in gdf.index.names):
|
|
801
|
+
if gdf.index.nlevels == 1:
|
|
802
|
+
return ["index_right"]
|
|
803
|
+
else:
|
|
804
|
+
return [f"index_right{i}" for i in range(gdf.index.nlevels)]
|
|
805
|
+
else:
|
|
806
|
+
return gdf.index.names
|
|
807
|
+
|
|
808
|
+
|
|
809
|
+
def points_in_bounds(
|
|
810
|
+
gdf: GeoDataFrame | GeoSeries, gridsize: int | float
|
|
811
|
+
) -> GeoDataFrame:
|
|
812
|
+
"""Get a GeoDataFrame of points within the bounds of the GeoDataFrame."""
|
|
813
|
+
minx, miny, maxx, maxy = to_bbox(gdf)
|
|
814
|
+
try:
|
|
815
|
+
crs = gdf.crs
|
|
816
|
+
except AttributeError:
|
|
817
|
+
crs = None
|
|
818
|
+
|
|
819
|
+
xs = np.linspace(minx, maxx, num=int((maxx - minx) / gridsize))
|
|
820
|
+
ys = np.linspace(miny, maxy, num=int((maxy - miny) / gridsize))
|
|
821
|
+
x_coords, y_coords = np.meshgrid(xs, ys, indexing="ij")
|
|
822
|
+
coords = np.concatenate((x_coords.reshape(-1, 1), y_coords.reshape(-1, 1)), axis=1)
|
|
823
|
+
return to_gdf(coords, crs=crs)
|
|
824
|
+
|
|
825
|
+
|
|
826
|
+
def points_in_polygons(
|
|
827
|
+
gdf: GeoDataFrame | GeoSeries, gridsize: int | float
|
|
828
|
+
) -> GeoDataFrame:
|
|
829
|
+
index_right_col = get_index_right_columns(gdf)
|
|
830
|
+
out = points_in_bounds(gdf, gridsize).sjoin(gdf).set_index(index_right_col)
|
|
831
|
+
out.index.name = gdf.index.name
|
|
832
|
+
return out.sort_index()
|
|
833
|
+
|
|
834
|
+
|
|
680
835
|
def _determine_geom_type_args(
|
|
681
836
|
gdf: GeoDataFrame, geom_type: str | None, keep_geom_type: bool | None
|
|
682
837
|
) -> tuple[GeoDataFrame, str, bool]:
|
|
@@ -696,65 +851,93 @@ def _determine_geom_type_args(
|
|
|
696
851
|
return gdf, geom_type, keep_geom_type
|
|
697
852
|
|
|
698
853
|
|
|
699
|
-
def
|
|
700
|
-
|
|
854
|
+
def _unary_union_for_notna(geoms, **kwargs):
|
|
855
|
+
try:
|
|
856
|
+
return make_valid(union_all(geoms, **kwargs))
|
|
857
|
+
except TypeError:
|
|
858
|
+
return union_all([geom for geom in geoms.dropna().values], **kwargs)
|
|
859
|
+
|
|
860
|
+
|
|
861
|
+
def _grouped_unary_union(
|
|
862
|
+
df: GeoDataFrame | GeoSeries | pd.DataFrame | pd.Series,
|
|
863
|
+
by: str | list[str] | None = None,
|
|
864
|
+
level: int | None = None,
|
|
865
|
+
as_index: bool = True,
|
|
866
|
+
grid_size: float | int | None = None,
|
|
867
|
+
dropna: bool = False,
|
|
868
|
+
**kwargs,
|
|
869
|
+
) -> GeoSeries | GeoDataFrame:
|
|
870
|
+
"""Vectorized unary_union for groups.
|
|
701
871
|
|
|
872
|
+
Experimental. Messy code.
|
|
873
|
+
"""
|
|
874
|
+
df = df.copy()
|
|
875
|
+
df_orig = df.copy()
|
|
702
876
|
|
|
703
|
-
def _parallel_unary_union(
|
|
704
|
-
gdf: GeoDataFrame, n_jobs: int = 1, by=None, grid_size=None, **kwargs
|
|
705
|
-
) -> list[Geometry]:
|
|
706
877
|
try:
|
|
707
|
-
geom_col =
|
|
878
|
+
geom_col = df._geometry_column_name
|
|
708
879
|
except AttributeError:
|
|
709
|
-
geom_col = "geometry"
|
|
710
|
-
|
|
711
|
-
if by is not None and not isinstance(by, str):
|
|
712
|
-
gdf = gdf.copy()
|
|
713
880
|
try:
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
881
|
+
geom_col = df.name
|
|
882
|
+
if geom_col is None:
|
|
883
|
+
geom_col = "geometry"
|
|
884
|
+
except AttributeError:
|
|
885
|
+
geom_col = "geometry"
|
|
718
886
|
|
|
719
|
-
if
|
|
720
|
-
|
|
721
|
-
_was_none = True
|
|
722
|
-
else:
|
|
723
|
-
_was_none = False
|
|
887
|
+
if not len(df):
|
|
888
|
+
return GeoSeries(name=geom_col)
|
|
724
889
|
|
|
725
|
-
if isinstance(
|
|
726
|
-
|
|
890
|
+
if isinstance(df, pd.Series):
|
|
891
|
+
df.name = geom_col
|
|
892
|
+
original_index = df.index
|
|
893
|
+
df = df.reset_index()
|
|
894
|
+
df.index = original_index
|
|
727
895
|
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
896
|
+
if isinstance(by, str):
|
|
897
|
+
by = [by]
|
|
898
|
+
elif by is None and level is None:
|
|
899
|
+
raise TypeError("You have to supply one of 'by' and 'level'")
|
|
900
|
+
elif by is None:
|
|
901
|
+
by = df.index.get_level_values(level)
|
|
733
902
|
|
|
734
|
-
|
|
903
|
+
cumcount = df.groupby(by, dropna=dropna).cumcount().values
|
|
735
904
|
|
|
905
|
+
def get_col_or_index(df, col: str) -> pd.Series | pd.Index:
|
|
906
|
+
try:
|
|
907
|
+
return df[col]
|
|
908
|
+
except KeyError:
|
|
909
|
+
for i, name in enumerate(df.index.names):
|
|
910
|
+
if name == col:
|
|
911
|
+
return df.index.get_level_values(i)
|
|
912
|
+
raise KeyError(col)
|
|
736
913
|
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
)
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
914
|
+
try:
|
|
915
|
+
df.index = pd.MultiIndex.from_arrays(
|
|
916
|
+
[cumcount, *[get_col_or_index(df, col) for col in by]]
|
|
917
|
+
)
|
|
918
|
+
except KeyError:
|
|
919
|
+
df.index = pd.MultiIndex.from_arrays([cumcount, by])
|
|
920
|
+
|
|
921
|
+
# to wide format: each row will be one group to be merged to one geometry
|
|
922
|
+
try:
|
|
923
|
+
geoms_wide: pd.DataFrame = df[geom_col].unstack(level=0)
|
|
924
|
+
except Exception as e:
|
|
925
|
+
bb = [*by, geom_col]
|
|
926
|
+
raise e.__class__(e, f"by={by}", df_orig[bb], df[geom_col]) from e
|
|
927
|
+
geometries_2d: NDArray[Polygon | None] = geoms_wide.values
|
|
928
|
+
try:
|
|
929
|
+
geometries_2d = make_valid(geometries_2d)
|
|
930
|
+
except TypeError:
|
|
931
|
+
# make_valid doesn't like nan, so converting to None
|
|
932
|
+
# np.isnan doesn't accept geometry type, so using isinstance
|
|
933
|
+
np_isinstance = np.vectorize(isinstance)
|
|
934
|
+
geometries_2d[np_isinstance(geometries_2d, Geometry) == False] = None
|
|
745
935
|
|
|
746
|
-
|
|
747
|
-
ser = ser.reset_index(drop=True)
|
|
936
|
+
unioned = make_valid(union_all(geometries_2d, axis=1, **kwargs))
|
|
748
937
|
|
|
749
|
-
|
|
750
|
-
dask_geopandas.from_geopandas(ser.to_frame("geometry"), npartitions=n_jobs)
|
|
751
|
-
.dissolve(**kwargs)
|
|
752
|
-
.compute()
|
|
753
|
-
)
|
|
754
|
-
if _was_none:
|
|
755
|
-
dissolved.crs = None
|
|
938
|
+
geoms = GeoSeries(unioned, name=geom_col, index=geoms_wide.index)
|
|
756
939
|
|
|
757
|
-
return
|
|
940
|
+
return geoms if as_index else geoms.reset_index()
|
|
758
941
|
|
|
759
942
|
|
|
760
943
|
def _parallel_unary_union(
|
|
@@ -769,36 +952,12 @@ def _parallel_unary_union(
|
|
|
769
952
|
delayed_operations = []
|
|
770
953
|
for _, geoms in gdf.groupby(by, **kwargs)[geom_col]:
|
|
771
954
|
delayed_operations.append(
|
|
772
|
-
joblib.delayed(
|
|
955
|
+
joblib.delayed(_unary_union_for_notna)(geoms, grid_size=grid_size)
|
|
773
956
|
)
|
|
774
957
|
|
|
775
958
|
return parallel(delayed_operations)
|
|
776
959
|
|
|
777
960
|
|
|
778
|
-
def _parallel_unary_union_geoseries(
|
|
779
|
-
ser: GeoSeries, n_jobs: int = 1, grid_size=None, **kwargs
|
|
780
|
-
) -> list[Geometry]:
|
|
781
|
-
|
|
782
|
-
is_one_hit = ser.groupby(**kwargs).transform("size") == 1
|
|
783
|
-
|
|
784
|
-
one_hit = ser.loc[is_one_hit]
|
|
785
|
-
many_hits = ser.loc[~is_one_hit]
|
|
786
|
-
|
|
787
|
-
with joblib.Parallel(n_jobs=n_jobs, backend="threading") as parallel:
|
|
788
|
-
delayed_operations = []
|
|
789
|
-
for _, geoms in many_hits.groupby(**kwargs):
|
|
790
|
-
delayed_operations.append(
|
|
791
|
-
joblib.delayed(_merge_geometries)(geoms, grid_size=grid_size)
|
|
792
|
-
)
|
|
793
|
-
|
|
794
|
-
dissolved = pd.Series(
|
|
795
|
-
parallel(delayed_operations),
|
|
796
|
-
index=is_one_hit[lambda x: x is False].index.unique(),
|
|
797
|
-
)
|
|
798
|
-
|
|
799
|
-
return pd.concat([dissolved, one_hit]).sort_index().values
|
|
800
|
-
|
|
801
|
-
|
|
802
961
|
def _parallel_unary_union_geoseries(
|
|
803
962
|
ser: GeoSeries, n_jobs: int = 1, grid_size=None, **kwargs
|
|
804
963
|
) -> list[Geometry]:
|
|
@@ -807,7 +966,7 @@ def _parallel_unary_union_geoseries(
|
|
|
807
966
|
delayed_operations = []
|
|
808
967
|
for _, geoms in ser.groupby(**kwargs):
|
|
809
968
|
delayed_operations.append(
|
|
810
|
-
joblib.delayed(
|
|
969
|
+
joblib.delayed(_unary_union_for_notna)(geoms, grid_size=grid_size)
|
|
811
970
|
)
|
|
812
971
|
|
|
813
972
|
return parallel(delayed_operations)
|
|
@@ -138,7 +138,7 @@ def to_single_geom_type(
|
|
|
138
138
|
|
|
139
139
|
def _shapely_to_single_geom_type(geom: Geometry, geom_type: str) -> Geometry:
|
|
140
140
|
parts = shapely.get_parts(geom)
|
|
141
|
-
return shapely.
|
|
141
|
+
return shapely.union_all(
|
|
142
142
|
[part for part in parts if geom_type.lower() in part.geom_type.lower()]
|
|
143
143
|
)
|
|
144
144
|
|
|
@@ -13,6 +13,7 @@ import shapely
|
|
|
13
13
|
from geopandas import GeoDataFrame
|
|
14
14
|
from geopandas import GeoSeries
|
|
15
15
|
from pandas import DataFrame
|
|
16
|
+
from pandas import MultiIndex
|
|
16
17
|
from pandas import Series
|
|
17
18
|
from sklearn.neighbors import NearestNeighbors
|
|
18
19
|
|
|
@@ -97,29 +98,32 @@ def get_neighbor_indices(
|
|
|
97
98
|
['a' 'a' 'b' 'b']
|
|
98
99
|
|
|
99
100
|
"""
|
|
101
|
+
if isinstance(gdf.index, MultiIndex) or isinstance(neighbors.index, MultiIndex):
|
|
102
|
+
raise ValueError("get_neighbor_indices not implemented for pandas.MultiIndex")
|
|
100
103
|
if gdf.crs != neighbors.crs:
|
|
101
104
|
raise ValueError(f"'crs' mismatch. Got {gdf.crs} and {neighbors.crs}")
|
|
102
105
|
|
|
103
106
|
if isinstance(neighbors, GeoSeries):
|
|
104
107
|
neighbors = neighbors.to_frame()
|
|
108
|
+
else:
|
|
109
|
+
neighbors = neighbors[[neighbors._geometry_column_name]]
|
|
105
110
|
|
|
106
111
|
# buffer and keep only geometry column
|
|
107
112
|
if max_distance and predicate != "nearest":
|
|
108
|
-
gdf = gdf.buffer(max_distance).to_frame()
|
|
113
|
+
gdf = gdf.buffer(max_distance).to_frame("geometry")
|
|
109
114
|
else:
|
|
110
|
-
gdf = gdf.geometry.to_frame()
|
|
115
|
+
gdf = gdf.geometry.to_frame("geometry")
|
|
116
|
+
|
|
117
|
+
neighbors.index.name = None
|
|
118
|
+
gdf.index.name = None
|
|
111
119
|
|
|
112
120
|
if predicate == "nearest":
|
|
113
121
|
max_distance = None if max_distance == 0 else max_distance
|
|
114
|
-
joined = gdf.sjoin_nearest(
|
|
115
|
-
neighbors, how="inner", max_distance=max_distance
|
|
116
|
-
).rename(columns={"index_right": "neighbor_index"}, errors="raise")
|
|
122
|
+
joined = gdf.sjoin_nearest(neighbors, how="inner", max_distance=max_distance)
|
|
117
123
|
else:
|
|
118
|
-
joined = gdf.sjoin(neighbors, how="inner", predicate=predicate)
|
|
119
|
-
columns={"index_right": "neighbor_index"}, errors="raise"
|
|
120
|
-
)
|
|
124
|
+
joined = gdf.sjoin(neighbors, how="inner", predicate=predicate)
|
|
121
125
|
|
|
122
|
-
return joined["neighbor_index"]
|
|
126
|
+
return joined.rename(columns={"index_right": "neighbor_index"})["neighbor_index"]
|
|
123
127
|
|
|
124
128
|
|
|
125
129
|
def get_neighbor_dfs(
|
|
@@ -469,6 +473,6 @@ def _get_edges(
|
|
|
469
473
|
Returns:
|
|
470
474
|
A 2d numpy array of edges (from-to indices).
|
|
471
475
|
"""
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
)
|
|
476
|
+
row_indices = np.arange(len(indices)).reshape(-1, 1)
|
|
477
|
+
|
|
478
|
+
return np.stack((np.broadcast_to(row_indices, indices.shape), indices), axis=-1)
|
sgis/geopandas_tools/overlay.py
CHANGED
|
@@ -11,7 +11,6 @@ version of the solution from GH 2792.
|
|
|
11
11
|
import functools
|
|
12
12
|
from collections.abc import Callable
|
|
13
13
|
|
|
14
|
-
import dask.array as da
|
|
15
14
|
import geopandas as gpd
|
|
16
15
|
import joblib
|
|
17
16
|
import numpy as np
|
|
@@ -28,6 +27,11 @@ from shapely import make_valid
|
|
|
28
27
|
from shapely import unary_union
|
|
29
28
|
from shapely.errors import GEOSException
|
|
30
29
|
|
|
30
|
+
try:
|
|
31
|
+
import dask.array as da
|
|
32
|
+
except ImportError:
|
|
33
|
+
pass
|
|
34
|
+
|
|
31
35
|
from .general import _determine_geom_type_args
|
|
32
36
|
from .general import clean_geoms
|
|
33
37
|
from .geometry_types import get_geom_type
|
|
@@ -238,8 +242,8 @@ def _shapely_pd_overlay(
|
|
|
238
242
|
left, right = tree.query(df1.geometry.values, predicate=predicate)
|
|
239
243
|
|
|
240
244
|
pairs = _get_intersects_pairs(df1, df2, left, right, rsuffix)
|
|
241
|
-
assert pairs.geometry.notna().all()
|
|
242
|
-
assert pairs.geom_right.notna().all()
|
|
245
|
+
assert pairs.geometry.notna().all(), pairs.geometry
|
|
246
|
+
assert pairs.geom_right.notna().all(), pairs.geom_right
|
|
243
247
|
|
|
244
248
|
if how == "intersection":
|
|
245
249
|
overlayed = [
|