ssb-sgis 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sgis/__init__.py +20 -9
- sgis/debug_config.py +24 -0
- sgis/exceptions.py +2 -2
- sgis/geopandas_tools/bounds.py +33 -36
- sgis/geopandas_tools/buffer_dissolve_explode.py +136 -35
- sgis/geopandas_tools/centerlines.py +4 -91
- sgis/geopandas_tools/cleaning.py +1576 -583
- sgis/geopandas_tools/conversion.py +38 -19
- sgis/geopandas_tools/duplicates.py +29 -8
- sgis/geopandas_tools/general.py +263 -100
- sgis/geopandas_tools/geometry_types.py +4 -4
- sgis/geopandas_tools/neighbors.py +19 -15
- sgis/geopandas_tools/overlay.py +2 -2
- sgis/geopandas_tools/point_operations.py +5 -5
- sgis/geopandas_tools/polygon_operations.py +510 -105
- sgis/geopandas_tools/polygons_as_rings.py +40 -8
- sgis/geopandas_tools/sfilter.py +29 -12
- sgis/helpers.py +3 -3
- sgis/io/dapla_functions.py +238 -19
- sgis/io/read_parquet.py +1 -1
- sgis/maps/examine.py +27 -12
- sgis/maps/explore.py +450 -65
- sgis/maps/legend.py +177 -76
- sgis/maps/map.py +206 -103
- sgis/maps/maps.py +178 -105
- sgis/maps/thematicmap.py +243 -83
- sgis/networkanalysis/_service_area.py +6 -1
- sgis/networkanalysis/closing_network_holes.py +2 -2
- sgis/networkanalysis/cutting_lines.py +15 -8
- sgis/networkanalysis/directednetwork.py +1 -1
- sgis/networkanalysis/finding_isolated_networks.py +15 -8
- sgis/networkanalysis/networkanalysis.py +17 -19
- sgis/networkanalysis/networkanalysisrules.py +1 -1
- sgis/networkanalysis/traveling_salesman.py +1 -1
- sgis/parallel/parallel.py +64 -27
- sgis/raster/__init__.py +0 -6
- sgis/raster/base.py +208 -0
- sgis/raster/cube.py +54 -8
- sgis/raster/image_collection.py +3257 -0
- sgis/raster/indices.py +17 -5
- sgis/raster/raster.py +138 -243
- sgis/raster/sentinel_config.py +120 -0
- sgis/raster/zonal.py +0 -1
- {ssb_sgis-1.0.2.dist-info → ssb_sgis-1.0.4.dist-info}/METADATA +6 -7
- ssb_sgis-1.0.4.dist-info/RECORD +62 -0
- sgis/raster/methods_as_functions.py +0 -0
- sgis/raster/torchgeo.py +0 -171
- ssb_sgis-1.0.2.dist-info/RECORD +0 -61
- {ssb_sgis-1.0.2.dist-info → ssb_sgis-1.0.4.dist-info}/LICENSE +0 -0
- {ssb_sgis-1.0.2.dist-info → ssb_sgis-1.0.4.dist-info}/WHEEL +0 -0
|
@@ -90,8 +90,10 @@ def to_shapely(obj: Any) -> Geometry:
|
|
|
90
90
|
return obj
|
|
91
91
|
if not hasattr(obj, "__iter__"):
|
|
92
92
|
raise TypeError(type(obj))
|
|
93
|
-
|
|
94
|
-
return obj.
|
|
93
|
+
try:
|
|
94
|
+
return shapely.union_all(obj.geometry.values)
|
|
95
|
+
except AttributeError:
|
|
96
|
+
pass
|
|
95
97
|
try:
|
|
96
98
|
return Point(*obj)
|
|
97
99
|
except TypeError:
|
|
@@ -108,6 +110,7 @@ def to_shapely(obj: Any) -> Geometry:
|
|
|
108
110
|
return shapely.wkb.loads(obj)
|
|
109
111
|
except TypeError:
|
|
110
112
|
pass
|
|
113
|
+
raise TypeError(type(obj), obj)
|
|
111
114
|
|
|
112
115
|
|
|
113
116
|
def to_bbox(
|
|
@@ -122,25 +125,41 @@ def to_bbox(
|
|
|
122
125
|
"xmin", "ymin", "xmax", "ymax".
|
|
123
126
|
"""
|
|
124
127
|
if isinstance(obj, (GeoDataFrame, GeoSeries)):
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
return
|
|
128
|
+
bounds = tuple(obj.total_bounds)
|
|
129
|
+
assert isinstance(bounds, tuple)
|
|
130
|
+
return bounds
|
|
131
|
+
try:
|
|
132
|
+
bounds = tuple(obj.bounds)
|
|
133
|
+
assert isinstance(bounds, tuple)
|
|
134
|
+
return bounds
|
|
135
|
+
except Exception:
|
|
136
|
+
pass
|
|
128
137
|
|
|
129
138
|
try:
|
|
130
|
-
minx =
|
|
131
|
-
miny =
|
|
132
|
-
maxx =
|
|
133
|
-
maxy =
|
|
139
|
+
minx = float(np.min(obj["minx"])) # type: ignore [index]
|
|
140
|
+
miny = float(np.min(obj["miny"])) # type: ignore [index]
|
|
141
|
+
maxx = float(np.max(obj["maxx"])) # type: ignore [index]
|
|
142
|
+
maxy = float(np.max(obj["maxy"])) # type: ignore [index]
|
|
134
143
|
return minx, miny, maxx, maxy
|
|
135
144
|
except Exception:
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
145
|
+
pass
|
|
146
|
+
try:
|
|
147
|
+
minx = float(np.min(obj.minx)) # type: ignore [union-attr]
|
|
148
|
+
miny = float(np.min(obj.miny)) # type: ignore [union-attr]
|
|
149
|
+
maxx = float(np.max(obj.maxx)) # type: ignore [union-attr]
|
|
150
|
+
maxy = float(np.max(obj.maxy)) # type: ignore [union-attr]
|
|
151
|
+
return minx, miny, maxx, maxy
|
|
152
|
+
except Exception:
|
|
153
|
+
pass
|
|
154
|
+
|
|
155
|
+
try:
|
|
156
|
+
minx = float(np.min(obj["west_longitude"])) # type: ignore [index]
|
|
157
|
+
miny = float(np.min(obj["south_latitude"])) # type: ignore [index]
|
|
158
|
+
maxx = float(np.max(obj["east_longitude"])) # type: ignore [index]
|
|
159
|
+
maxy = float(np.max(obj["north_latitude"])) # type: ignore [index]
|
|
160
|
+
return minx, miny, maxx, maxy
|
|
161
|
+
except Exception:
|
|
162
|
+
pass
|
|
144
163
|
|
|
145
164
|
if hasattr(obj, "geometry"):
|
|
146
165
|
try:
|
|
@@ -195,7 +214,7 @@ def coordinate_array(
|
|
|
195
214
|
np.ndarray of np.ndarrays of coordinates.
|
|
196
215
|
|
|
197
216
|
Examples:
|
|
198
|
-
|
|
217
|
+
---------
|
|
199
218
|
>>> import sgis as sg
|
|
200
219
|
>>> points = sg.to_gdf(
|
|
201
220
|
... [
|
|
@@ -279,7 +298,7 @@ def to_gdf(
|
|
|
279
298
|
A GeoDataFrame with one column, the geometry column.
|
|
280
299
|
|
|
281
300
|
Examples:
|
|
282
|
-
|
|
301
|
+
---------
|
|
283
302
|
>>> import sgis as sg
|
|
284
303
|
>>> coords = (10, 60)
|
|
285
304
|
>>> sg.to_gdf(coords, crs=4326)
|
|
@@ -8,10 +8,10 @@ from shapely import STRtree
|
|
|
8
8
|
from shapely import difference
|
|
9
9
|
from shapely import make_valid
|
|
10
10
|
from shapely import simplify
|
|
11
|
-
from shapely import unary_union
|
|
12
11
|
from shapely.errors import GEOSException
|
|
13
12
|
|
|
14
13
|
from .general import _determine_geom_type_args
|
|
14
|
+
from .general import _grouped_unary_union
|
|
15
15
|
from .general import _parallel_unary_union_geoseries
|
|
16
16
|
from .general import _push_geom_col
|
|
17
17
|
from .general import clean_geoms
|
|
@@ -54,7 +54,7 @@ def update_geometries(
|
|
|
54
54
|
predicate: Spatial predicate for the spatial tree.
|
|
55
55
|
|
|
56
56
|
Example:
|
|
57
|
-
|
|
57
|
+
--------
|
|
58
58
|
Create two circles and get the overlap.
|
|
59
59
|
|
|
60
60
|
>>> import sgis as sg
|
|
@@ -125,10 +125,8 @@ def update_geometries(
|
|
|
125
125
|
else:
|
|
126
126
|
only_one = erasers.groupby(level=0).transform("size") == 1
|
|
127
127
|
one_hit = erasers[only_one]
|
|
128
|
-
many_hits = (
|
|
129
|
-
erasers[~only_one]
|
|
130
|
-
.groupby(level=0)
|
|
131
|
-
.agg(lambda x: make_valid(unary_union(x, grid_size=grid_size)))
|
|
128
|
+
many_hits = _grouped_unary_union(
|
|
129
|
+
erasers[~only_one], level=0, grid_size=grid_size
|
|
132
130
|
)
|
|
133
131
|
erasers = pd.concat([one_hit, many_hits]).sort_index()
|
|
134
132
|
|
|
@@ -213,7 +211,7 @@ def get_intersections(
|
|
|
213
211
|
A GeoDataFrame of the overlapping polygons.
|
|
214
212
|
|
|
215
213
|
Examples:
|
|
216
|
-
|
|
214
|
+
---------
|
|
217
215
|
Create three partially overlapping polygons.
|
|
218
216
|
|
|
219
217
|
>>> import sgis as sg
|
|
@@ -357,10 +355,33 @@ def _get_intersecting_geometries(
|
|
|
357
355
|
|
|
358
356
|
duplicated_points = points_joined.loc[points_joined.index.duplicated(keep=False)]
|
|
359
357
|
|
|
360
|
-
|
|
358
|
+
out = intersected.loc[intersected.index.isin(duplicated_points.index)].drop(
|
|
361
359
|
columns=["idx_left", "idx_right"]
|
|
362
360
|
)
|
|
363
361
|
|
|
362
|
+
# some polygons within polygons are not counted in the
|
|
363
|
+
within = (
|
|
364
|
+
gdf.assign(_range_idx_inters_left=lambda x: range(len(x)))
|
|
365
|
+
.sjoin(
|
|
366
|
+
GeoDataFrame(
|
|
367
|
+
{
|
|
368
|
+
"geometry": gdf.buffer(1e-6).values,
|
|
369
|
+
"_range_idx_inters_right": range(len(gdf)),
|
|
370
|
+
},
|
|
371
|
+
crs=gdf.crs,
|
|
372
|
+
),
|
|
373
|
+
how="inner",
|
|
374
|
+
predicate="within",
|
|
375
|
+
)
|
|
376
|
+
.loc[lambda x: x["_range_idx_inters_left"] != x["_range_idx_inters_right"]]
|
|
377
|
+
.drop(
|
|
378
|
+
columns=["index_right", "_range_idx_inters_left", "_range_idx_inters_right"]
|
|
379
|
+
)
|
|
380
|
+
.pipe(sfilter_inverse, out.buffer(-PRECISION))
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
return pd.concat([out, within])
|
|
384
|
+
|
|
364
385
|
|
|
365
386
|
def _drop_duplicate_geometries(gdf: GeoDataFrame, **kwargs) -> GeoDataFrame:
|
|
366
387
|
"""Drop geometries that are considered equal.
|
sgis/geopandas_tools/general.py
CHANGED
|
@@ -1,20 +1,23 @@
|
|
|
1
|
+
import functools
|
|
2
|
+
import itertools
|
|
1
3
|
import numbers
|
|
2
4
|
import warnings
|
|
3
5
|
from collections.abc import Hashable
|
|
4
6
|
from collections.abc import Iterable
|
|
5
7
|
from typing import Any
|
|
6
8
|
|
|
7
|
-
import dask_geopandas
|
|
8
9
|
import joblib
|
|
9
10
|
import numpy as np
|
|
10
11
|
import pandas as pd
|
|
11
12
|
import pyproj
|
|
13
|
+
import shapely
|
|
12
14
|
from geopandas import GeoDataFrame
|
|
13
15
|
from geopandas import GeoSeries
|
|
14
16
|
from geopandas.array import GeometryArray
|
|
15
17
|
from geopandas.array import GeometryDtype
|
|
16
18
|
from numpy.typing import NDArray
|
|
17
19
|
from shapely import Geometry
|
|
20
|
+
from shapely import extract_unique_points
|
|
18
21
|
from shapely import get_coordinates
|
|
19
22
|
from shapely import get_exterior_ring
|
|
20
23
|
from shapely import get_interior_ring
|
|
@@ -23,10 +26,16 @@ from shapely import get_parts
|
|
|
23
26
|
from shapely import linestrings
|
|
24
27
|
from shapely import make_valid
|
|
25
28
|
from shapely import points as shapely_points
|
|
26
|
-
from shapely import
|
|
29
|
+
from shapely import union_all
|
|
27
30
|
from shapely.geometry import LineString
|
|
31
|
+
from shapely.geometry import MultiPoint
|
|
28
32
|
from shapely.geometry import Point
|
|
33
|
+
from shapely.geometry import Polygon
|
|
29
34
|
|
|
35
|
+
from .conversion import coordinate_array
|
|
36
|
+
from .conversion import to_bbox
|
|
37
|
+
from .conversion import to_gdf
|
|
38
|
+
from .conversion import to_geoseries
|
|
30
39
|
from .geometry_types import get_geom_type
|
|
31
40
|
from .geometry_types import make_all_singlepart
|
|
32
41
|
from .geometry_types import to_single_geom_type
|
|
@@ -34,7 +43,7 @@ from .geometry_types import to_single_geom_type
|
|
|
34
43
|
|
|
35
44
|
def split_geom_types(gdf: GeoDataFrame | GeoSeries) -> tuple[GeoDataFrame | GeoSeries]:
|
|
36
45
|
return tuple(
|
|
37
|
-
gdf
|
|
46
|
+
gdf[gdf.geom_type == geom_type] for geom_type in gdf.geom_type.unique()
|
|
38
47
|
)
|
|
39
48
|
|
|
40
49
|
|
|
@@ -164,7 +173,7 @@ def clean_geoms(
|
|
|
164
173
|
non-empty and not-NaN/-None geometries.
|
|
165
174
|
|
|
166
175
|
Examples:
|
|
167
|
-
|
|
176
|
+
---------
|
|
168
177
|
>>> import sgis as sg
|
|
169
178
|
>>> import pandas as pd
|
|
170
179
|
>>> from shapely import wkt
|
|
@@ -281,7 +290,7 @@ def sort_large_first(gdf: GeoDataFrame | GeoSeries) -> GeoDataFrame | GeoSeries:
|
|
|
281
290
|
A GeoDataFrame or GeoSeries sorted from large to small in area.
|
|
282
291
|
|
|
283
292
|
Examples:
|
|
284
|
-
|
|
293
|
+
---------
|
|
285
294
|
Create GeoDataFrame with NaN values.
|
|
286
295
|
|
|
287
296
|
>>> import sgis as sg
|
|
@@ -381,35 +390,27 @@ def sort_small_first(gdf: GeoDataFrame | GeoSeries) -> GeoDataFrame | GeoSeries:
|
|
|
381
390
|
|
|
382
391
|
|
|
383
392
|
def make_lines_between_points(
|
|
384
|
-
|
|
385
|
-
arr2: NDArray[Point] | GeometryArray | GeoSeries,
|
|
393
|
+
*arrs: NDArray[Point] | GeometryArray | GeoSeries,
|
|
386
394
|
) -> NDArray[LineString]:
|
|
387
|
-
"""Creates an array of linestrings from two arrays of points.
|
|
395
|
+
"""Creates an array of linestrings from two or more arrays of points.
|
|
388
396
|
|
|
389
|
-
The
|
|
397
|
+
The lines are created rowwise, meaning from arr0[0] to arr1[0], from arr0[1] to arr1[1]...
|
|
398
|
+
If more than two arrays are passed, e.g. three arrays,
|
|
399
|
+
the lines will go from arr0[0] via arr1[0] to arr2[0].
|
|
390
400
|
|
|
391
401
|
Args:
|
|
392
|
-
|
|
393
|
-
|
|
402
|
+
arrs: 1 dimensional arrays of point geometries.
|
|
403
|
+
All arrays must have the same shape.
|
|
404
|
+
Must be at least two arrays.
|
|
394
405
|
|
|
395
406
|
Returns:
|
|
396
407
|
A numpy array of linestrings.
|
|
397
408
|
|
|
398
|
-
Raises:
|
|
399
|
-
ValueError: If the arrays have unequal shape.
|
|
400
|
-
|
|
401
409
|
"""
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
[
|
|
407
|
-
pd.DataFrame(get_coordinates(arr1), columns=["x", "y"]),
|
|
408
|
-
pd.DataFrame(get_coordinates(arr2), columns=["x", "y"]),
|
|
409
|
-
]
|
|
410
|
-
).sort_index()
|
|
411
|
-
|
|
412
|
-
return linestrings(coords.values, indices=coords.index)
|
|
410
|
+
coords = [get_coordinates(arr, return_index=False) for arr in arrs]
|
|
411
|
+
return linestrings(
|
|
412
|
+
np.concatenate([coords_arr[:, None, :] for coords_arr in coords], axis=1)
|
|
413
|
+
)
|
|
413
414
|
|
|
414
415
|
|
|
415
416
|
def random_points(n: int, loc: float | int = 0.5) -> GeoDataFrame:
|
|
@@ -423,7 +424,7 @@ def random_points(n: int, loc: float | int = 0.5) -> GeoDataFrame:
|
|
|
423
424
|
A GeoDataFrame of points with n rows.
|
|
424
425
|
|
|
425
426
|
Examples:
|
|
426
|
-
|
|
427
|
+
---------
|
|
427
428
|
>>> import sgis as sg
|
|
428
429
|
>>> points = sg.random_points(10_000)
|
|
429
430
|
>>> points
|
|
@@ -523,7 +524,7 @@ def to_lines(*gdfs: GeoDataFrame, copy: bool = True) -> GeoDataFrame:
|
|
|
523
524
|
always ignores the index.
|
|
524
525
|
|
|
525
526
|
Examples:
|
|
526
|
-
|
|
527
|
+
---------
|
|
527
528
|
Convert single polygon to linestring.
|
|
528
529
|
|
|
529
530
|
>>> import sgis as sg
|
|
@@ -559,7 +560,9 @@ def to_lines(*gdfs: GeoDataFrame, copy: bool = True) -> GeoDataFrame:
|
|
|
559
560
|
raise TypeError("gdf must be GeoDataFrame or GeoSeries")
|
|
560
561
|
|
|
561
562
|
if any(gdf.geom_type.isin(["Point", "MultiPoint"]).any() for gdf in gdfs):
|
|
562
|
-
raise ValueError(
|
|
563
|
+
raise ValueError(
|
|
564
|
+
f"Cannot convert points to lines. {[gdf.geom_type.value_counts() for gdf in gdfs]}"
|
|
565
|
+
)
|
|
563
566
|
|
|
564
567
|
def _shapely_geometry_to_lines(geom):
|
|
565
568
|
"""Get all lines from the exterior and interiors of a Polygon."""
|
|
@@ -583,7 +586,7 @@ def to_lines(*gdfs: GeoDataFrame, copy: bool = True) -> GeoDataFrame:
|
|
|
583
586
|
|
|
584
587
|
lines += interior_rings
|
|
585
588
|
|
|
586
|
-
return
|
|
589
|
+
return union_all(lines)
|
|
587
590
|
|
|
588
591
|
lines = []
|
|
589
592
|
for gdf in gdfs:
|
|
@@ -673,6 +676,162 @@ def clean_clip(
|
|
|
673
676
|
return gdf
|
|
674
677
|
|
|
675
678
|
|
|
679
|
+
def extend_lines(arr1, arr2, distance) -> NDArray[LineString]:
|
|
680
|
+
if len(arr1) != len(arr2):
|
|
681
|
+
raise ValueError
|
|
682
|
+
if not len(arr1):
|
|
683
|
+
return arr1
|
|
684
|
+
|
|
685
|
+
arr1, arr2 = arr2, arr1 # TODO fix
|
|
686
|
+
|
|
687
|
+
coords1 = coordinate_array(arr1)
|
|
688
|
+
coords2 = coordinate_array(arr2)
|
|
689
|
+
|
|
690
|
+
dx = coords2[:, 0] - coords1[:, 0]
|
|
691
|
+
dy = coords2[:, 1] - coords1[:, 1]
|
|
692
|
+
len_xy = np.sqrt((dx**2.0) + (dy**2.0))
|
|
693
|
+
x = coords1[:, 0] + (coords1[:, 0] - coords2[:, 0]) / len_xy * distance
|
|
694
|
+
y = coords1[:, 1] + (coords1[:, 1] - coords2[:, 1]) / len_xy * distance
|
|
695
|
+
|
|
696
|
+
new_points = np.array([None for _ in range(len(arr1))])
|
|
697
|
+
new_points[~np.isnan(x)] = shapely.points(x[~np.isnan(x)], y[~np.isnan(x)])
|
|
698
|
+
|
|
699
|
+
new_points[~np.isnan(x)] = make_lines_between_points(
|
|
700
|
+
arr2[~np.isnan(x)], new_points[~np.isnan(x)]
|
|
701
|
+
)
|
|
702
|
+
return new_points
|
|
703
|
+
|
|
704
|
+
|
|
705
|
+
def multipoints_to_line_segments_numpy(
|
|
706
|
+
points: GeoSeries | NDArray[MultiPoint] | MultiPoint,
|
|
707
|
+
cycle: bool = False,
|
|
708
|
+
) -> list[LineString]:
|
|
709
|
+
try:
|
|
710
|
+
arr = get_parts(points.geometry.values)
|
|
711
|
+
except AttributeError:
|
|
712
|
+
arr = get_parts(points)
|
|
713
|
+
|
|
714
|
+
line_between_last_and_first = [LineString([arr[-1], arr[0]])] if cycle else []
|
|
715
|
+
return [
|
|
716
|
+
LineString([p0, p1]) for p0, p1 in itertools.pairwise(arr)
|
|
717
|
+
] + line_between_last_and_first
|
|
718
|
+
|
|
719
|
+
|
|
720
|
+
def multipoints_to_line_segments(
|
|
721
|
+
multipoints: GeoSeries | GeoDataFrame, cycle: bool = True # to_next: bool = True,
|
|
722
|
+
) -> GeoSeries | GeoDataFrame:
|
|
723
|
+
|
|
724
|
+
if not len(multipoints):
|
|
725
|
+
return multipoints
|
|
726
|
+
|
|
727
|
+
if isinstance(multipoints, GeoDataFrame):
|
|
728
|
+
df = multipoints.drop(columns=multipoints.geometry.name)
|
|
729
|
+
multipoints = multipoints.geometry
|
|
730
|
+
was_gdf = True
|
|
731
|
+
else:
|
|
732
|
+
multipoints = to_geoseries(multipoints)
|
|
733
|
+
was_gdf = False
|
|
734
|
+
|
|
735
|
+
multipoints = to_geoseries(multipoints)
|
|
736
|
+
|
|
737
|
+
segs = pd.Series(
|
|
738
|
+
[
|
|
739
|
+
multipoints_to_line_segments_numpy(geoms, cycle=cycle)
|
|
740
|
+
for geoms in multipoints
|
|
741
|
+
],
|
|
742
|
+
index=multipoints.index,
|
|
743
|
+
).explode()
|
|
744
|
+
|
|
745
|
+
segs = GeoSeries(segs, crs=multipoints.crs, name=multipoints.name)
|
|
746
|
+
|
|
747
|
+
if was_gdf:
|
|
748
|
+
return GeoDataFrame(df.join(segs), geometry=segs.name, crs=segs.crs)
|
|
749
|
+
else:
|
|
750
|
+
return segs
|
|
751
|
+
|
|
752
|
+
|
|
753
|
+
def get_line_segments(
|
|
754
|
+
lines: GeoDataFrame | GeoSeries, extract_unique: bool = False, cycle=False
|
|
755
|
+
) -> GeoDataFrame:
|
|
756
|
+
try:
|
|
757
|
+
assert lines.index.is_unique
|
|
758
|
+
except AttributeError:
|
|
759
|
+
pass
|
|
760
|
+
|
|
761
|
+
if isinstance(lines, GeoDataFrame):
|
|
762
|
+
df = lines.drop(columns=lines.geometry.name)
|
|
763
|
+
lines = lines.geometry
|
|
764
|
+
was_gdf = True
|
|
765
|
+
else:
|
|
766
|
+
lines = to_geoseries(lines)
|
|
767
|
+
was_gdf = False
|
|
768
|
+
|
|
769
|
+
partial_segs_func = functools.partial(
|
|
770
|
+
multipoints_to_line_segments_numpy, cycle=cycle
|
|
771
|
+
)
|
|
772
|
+
if extract_unique:
|
|
773
|
+
points = extract_unique_points(lines.geometry.values)
|
|
774
|
+
segs = pd.Series(
|
|
775
|
+
[partial_segs_func(geoms) for geoms in points],
|
|
776
|
+
index=lines.index,
|
|
777
|
+
).explode()
|
|
778
|
+
else:
|
|
779
|
+
coords, indices = shapely.get_coordinates(lines, return_index=True)
|
|
780
|
+
points = GeoSeries(shapely.points(coords), index=indices)
|
|
781
|
+
index_mapper = {
|
|
782
|
+
i: idx
|
|
783
|
+
for i, idx in zip(
|
|
784
|
+
np.unique(indices), lines.index.drop_duplicates(), strict=True
|
|
785
|
+
)
|
|
786
|
+
}
|
|
787
|
+
points.index = points.index.map(index_mapper)
|
|
788
|
+
|
|
789
|
+
segs = points.groupby(level=0).agg(partial_segs_func).explode()
|
|
790
|
+
segs = GeoSeries(segs, crs=lines.crs, name=lines.name)
|
|
791
|
+
|
|
792
|
+
if was_gdf:
|
|
793
|
+
return GeoDataFrame(df.join(segs), geometry=segs.name, crs=lines.crs)
|
|
794
|
+
else:
|
|
795
|
+
return segs
|
|
796
|
+
|
|
797
|
+
|
|
798
|
+
def get_index_right_columns(gdf: pd.DataFrame | pd.Series) -> list[str]:
|
|
799
|
+
"""Get a list of what will be the resulting columns in an sjoin."""
|
|
800
|
+
if gdf.index.name is None and all(name is None for name in gdf.index.names):
|
|
801
|
+
if gdf.index.nlevels == 1:
|
|
802
|
+
return ["index_right"]
|
|
803
|
+
else:
|
|
804
|
+
return [f"index_right{i}" for i in range(gdf.index.nlevels)]
|
|
805
|
+
else:
|
|
806
|
+
return gdf.index.names
|
|
807
|
+
|
|
808
|
+
|
|
809
|
+
def points_in_bounds(
|
|
810
|
+
gdf: GeoDataFrame | GeoSeries, gridsize: int | float
|
|
811
|
+
) -> GeoDataFrame:
|
|
812
|
+
"""Get a GeoDataFrame of points within the bounds of the GeoDataFrame."""
|
|
813
|
+
minx, miny, maxx, maxy = to_bbox(gdf)
|
|
814
|
+
try:
|
|
815
|
+
crs = gdf.crs
|
|
816
|
+
except AttributeError:
|
|
817
|
+
crs = None
|
|
818
|
+
|
|
819
|
+
xs = np.linspace(minx, maxx, num=int((maxx - minx) / gridsize))
|
|
820
|
+
ys = np.linspace(miny, maxy, num=int((maxy - miny) / gridsize))
|
|
821
|
+
x_coords, y_coords = np.meshgrid(xs, ys, indexing="ij")
|
|
822
|
+
coords = np.concatenate((x_coords.reshape(-1, 1), y_coords.reshape(-1, 1)), axis=1)
|
|
823
|
+
return to_gdf(coords, crs=crs)
|
|
824
|
+
|
|
825
|
+
|
|
826
|
+
def points_in_polygons(
|
|
827
|
+
gdf: GeoDataFrame | GeoSeries, gridsize: int | float
|
|
828
|
+
) -> GeoDataFrame:
|
|
829
|
+
index_right_col = get_index_right_columns(gdf)
|
|
830
|
+
out = points_in_bounds(gdf, gridsize).sjoin(gdf).set_index(index_right_col)
|
|
831
|
+
out.index.name = gdf.index.name
|
|
832
|
+
return out.sort_index()
|
|
833
|
+
|
|
834
|
+
|
|
676
835
|
def _determine_geom_type_args(
|
|
677
836
|
gdf: GeoDataFrame, geom_type: str | None, keep_geom_type: bool | None
|
|
678
837
|
) -> tuple[GeoDataFrame, str, bool]:
|
|
@@ -692,65 +851,93 @@ def _determine_geom_type_args(
|
|
|
692
851
|
return gdf, geom_type, keep_geom_type
|
|
693
852
|
|
|
694
853
|
|
|
695
|
-
def
|
|
696
|
-
|
|
854
|
+
def _unary_union_for_notna(geoms, **kwargs):
|
|
855
|
+
try:
|
|
856
|
+
return make_valid(union_all(geoms, **kwargs))
|
|
857
|
+
except TypeError:
|
|
858
|
+
return union_all([geom for geom in geoms.dropna().values], **kwargs)
|
|
859
|
+
|
|
860
|
+
|
|
861
|
+
def _grouped_unary_union(
|
|
862
|
+
df: GeoDataFrame | GeoSeries | pd.DataFrame | pd.Series,
|
|
863
|
+
by: str | list[str] | None = None,
|
|
864
|
+
level: int | None = None,
|
|
865
|
+
as_index: bool = True,
|
|
866
|
+
grid_size: float | int | None = None,
|
|
867
|
+
dropna: bool = False,
|
|
868
|
+
**kwargs,
|
|
869
|
+
) -> GeoSeries | GeoDataFrame:
|
|
870
|
+
"""Vectorized unary_union for groups.
|
|
697
871
|
|
|
872
|
+
Experimental. Messy code.
|
|
873
|
+
"""
|
|
874
|
+
df = df.copy()
|
|
875
|
+
df_orig = df.copy()
|
|
698
876
|
|
|
699
|
-
def _parallel_unary_union(
|
|
700
|
-
gdf: GeoDataFrame, n_jobs: int = 1, by=None, grid_size=None, **kwargs
|
|
701
|
-
) -> list[Geometry]:
|
|
702
877
|
try:
|
|
703
|
-
geom_col =
|
|
878
|
+
geom_col = df._geometry_column_name
|
|
704
879
|
except AttributeError:
|
|
705
|
-
geom_col = "geometry"
|
|
706
|
-
|
|
707
|
-
if by is not None and not isinstance(by, str):
|
|
708
|
-
gdf = gdf.copy()
|
|
709
880
|
try:
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
881
|
+
geom_col = df.name
|
|
882
|
+
if geom_col is None:
|
|
883
|
+
geom_col = "geometry"
|
|
884
|
+
except AttributeError:
|
|
885
|
+
geom_col = "geometry"
|
|
714
886
|
|
|
715
|
-
if
|
|
716
|
-
|
|
717
|
-
_was_none = True
|
|
718
|
-
else:
|
|
719
|
-
_was_none = False
|
|
887
|
+
if not len(df):
|
|
888
|
+
return GeoSeries(name=geom_col)
|
|
720
889
|
|
|
721
|
-
if isinstance(
|
|
722
|
-
|
|
890
|
+
if isinstance(df, pd.Series):
|
|
891
|
+
df.name = geom_col
|
|
892
|
+
original_index = df.index
|
|
893
|
+
df = df.reset_index()
|
|
894
|
+
df.index = original_index
|
|
723
895
|
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
896
|
+
if isinstance(by, str):
|
|
897
|
+
by = [by]
|
|
898
|
+
elif by is None and level is None:
|
|
899
|
+
raise TypeError("You have to supply one of 'by' and 'level'")
|
|
900
|
+
elif by is None:
|
|
901
|
+
by = df.index.get_level_values(level)
|
|
729
902
|
|
|
730
|
-
|
|
903
|
+
cumcount = df.groupby(by, dropna=dropna).cumcount().values
|
|
731
904
|
|
|
905
|
+
def get_col_or_index(df, col: str) -> pd.Series | pd.Index:
|
|
906
|
+
try:
|
|
907
|
+
return df[col]
|
|
908
|
+
except KeyError:
|
|
909
|
+
for i, name in enumerate(df.index.names):
|
|
910
|
+
if name == col:
|
|
911
|
+
return df.index.get_level_values(i)
|
|
912
|
+
raise KeyError(col)
|
|
732
913
|
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
)
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
914
|
+
try:
|
|
915
|
+
df.index = pd.MultiIndex.from_arrays(
|
|
916
|
+
[cumcount, *[get_col_or_index(df, col) for col in by]]
|
|
917
|
+
)
|
|
918
|
+
except KeyError:
|
|
919
|
+
df.index = pd.MultiIndex.from_arrays([cumcount, by])
|
|
920
|
+
|
|
921
|
+
# to wide format: each row will be one group to be merged to one geometry
|
|
922
|
+
try:
|
|
923
|
+
geoms_wide: pd.DataFrame = df[geom_col].unstack(level=0)
|
|
924
|
+
except Exception as e:
|
|
925
|
+
bb = [*by, geom_col]
|
|
926
|
+
raise e.__class__(e, f"by={by}", df_orig[bb], df[geom_col]) from e
|
|
927
|
+
geometries_2d: NDArray[Polygon | None] = geoms_wide.values
|
|
928
|
+
try:
|
|
929
|
+
geometries_2d = make_valid(geometries_2d)
|
|
930
|
+
except TypeError:
|
|
931
|
+
# make_valid doesn't like nan, so converting to None
|
|
932
|
+
# np.isnan doesn't accept geometry type, so using isinstance
|
|
933
|
+
np_isinstance = np.vectorize(isinstance)
|
|
934
|
+
geometries_2d[np_isinstance(geometries_2d, Geometry) == False] = None
|
|
741
935
|
|
|
742
|
-
|
|
743
|
-
ser = ser.reset_index(drop=True)
|
|
936
|
+
unioned = make_valid(union_all(geometries_2d, axis=1, **kwargs))
|
|
744
937
|
|
|
745
|
-
|
|
746
|
-
dask_geopandas.from_geopandas(ser.to_frame("geometry"), npartitions=n_jobs)
|
|
747
|
-
.dissolve(**kwargs)
|
|
748
|
-
.compute()
|
|
749
|
-
)
|
|
750
|
-
if _was_none:
|
|
751
|
-
dissolved.crs = None
|
|
938
|
+
geoms = GeoSeries(unioned, name=geom_col, index=geoms_wide.index)
|
|
752
939
|
|
|
753
|
-
return
|
|
940
|
+
return geoms if as_index else geoms.reset_index()
|
|
754
941
|
|
|
755
942
|
|
|
756
943
|
def _parallel_unary_union(
|
|
@@ -765,36 +952,12 @@ def _parallel_unary_union(
|
|
|
765
952
|
delayed_operations = []
|
|
766
953
|
for _, geoms in gdf.groupby(by, **kwargs)[geom_col]:
|
|
767
954
|
delayed_operations.append(
|
|
768
|
-
joblib.delayed(
|
|
955
|
+
joblib.delayed(_unary_union_for_notna)(geoms, grid_size=grid_size)
|
|
769
956
|
)
|
|
770
957
|
|
|
771
958
|
return parallel(delayed_operations)
|
|
772
959
|
|
|
773
960
|
|
|
774
|
-
def _parallel_unary_union_geoseries(
|
|
775
|
-
ser: GeoSeries, n_jobs: int = 1, grid_size=None, **kwargs
|
|
776
|
-
) -> list[Geometry]:
|
|
777
|
-
|
|
778
|
-
is_one_hit = ser.groupby(**kwargs).transform("size") == 1
|
|
779
|
-
|
|
780
|
-
one_hit = ser.loc[is_one_hit]
|
|
781
|
-
many_hits = ser.loc[~is_one_hit]
|
|
782
|
-
|
|
783
|
-
with joblib.Parallel(n_jobs=n_jobs, backend="threading") as parallel:
|
|
784
|
-
delayed_operations = []
|
|
785
|
-
for _, geoms in many_hits.groupby(**kwargs):
|
|
786
|
-
delayed_operations.append(
|
|
787
|
-
joblib.delayed(_merge_geometries)(geoms, grid_size=grid_size)
|
|
788
|
-
)
|
|
789
|
-
|
|
790
|
-
dissolved = pd.Series(
|
|
791
|
-
parallel(delayed_operations),
|
|
792
|
-
index=is_one_hit[lambda x: x is False].index.unique(),
|
|
793
|
-
)
|
|
794
|
-
|
|
795
|
-
return pd.concat([dissolved, one_hit]).sort_index().values
|
|
796
|
-
|
|
797
|
-
|
|
798
961
|
def _parallel_unary_union_geoseries(
|
|
799
962
|
ser: GeoSeries, n_jobs: int = 1, grid_size=None, **kwargs
|
|
800
963
|
) -> list[Geometry]:
|
|
@@ -803,7 +966,7 @@ def _parallel_unary_union_geoseries(
|
|
|
803
966
|
delayed_operations = []
|
|
804
967
|
for _, geoms in ser.groupby(**kwargs):
|
|
805
968
|
delayed_operations.append(
|
|
806
|
-
joblib.delayed(
|
|
969
|
+
joblib.delayed(_unary_union_for_notna)(geoms, grid_size=grid_size)
|
|
807
970
|
)
|
|
808
971
|
|
|
809
972
|
return parallel(delayed_operations)
|