ssb-sgis 0.3.13__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sgis/__init__.py +6 -4
- sgis/geopandas_tools/bounds.py +2 -6
- sgis/geopandas_tools/buffer_dissolve_explode.py +149 -45
- sgis/geopandas_tools/cleaning.py +66 -594
- sgis/geopandas_tools/conversion.py +92 -12
- sgis/geopandas_tools/duplicates.py +53 -23
- sgis/geopandas_tools/general.py +35 -0
- sgis/geopandas_tools/neighbors.py +31 -1
- sgis/geopandas_tools/overlay.py +143 -63
- sgis/geopandas_tools/polygons_as_rings.py +1 -1
- sgis/io/dapla_functions.py +7 -14
- sgis/maps/explore.py +29 -3
- sgis/maps/map.py +16 -4
- sgis/maps/maps.py +95 -49
- sgis/parallel/parallel.py +73 -35
- sgis/raster/torchgeo.py +30 -20
- {ssb_sgis-0.3.13.dist-info → ssb_sgis-1.0.1.dist-info}/METADATA +6 -6
- {ssb_sgis-0.3.13.dist-info → ssb_sgis-1.0.1.dist-info}/RECORD +20 -20
- {ssb_sgis-0.3.13.dist-info → ssb_sgis-1.0.1.dist-info}/LICENSE +0 -0
- {ssb_sgis-0.3.13.dist-info → ssb_sgis-1.0.1.dist-info}/WHEEL +0 -0
|
@@ -7,14 +7,27 @@ import geopandas as gpd
|
|
|
7
7
|
import numpy as np
|
|
8
8
|
import pandas as pd
|
|
9
9
|
import pyproj
|
|
10
|
+
import rasterio
|
|
10
11
|
import shapely
|
|
12
|
+
from affine import Affine
|
|
11
13
|
from geopandas import GeoDataFrame, GeoSeries
|
|
12
14
|
from pandas.api.types import is_array_like, is_dict_like, is_list_like
|
|
15
|
+
from pyproj import CRS
|
|
16
|
+
from rasterio import features
|
|
13
17
|
from shapely import Geometry, box, wkb, wkt
|
|
14
|
-
from shapely.
|
|
18
|
+
from shapely.errors import GEOSException
|
|
19
|
+
from shapely.geometry import Point, shape
|
|
15
20
|
from shapely.ops import unary_union
|
|
16
21
|
|
|
17
22
|
|
|
23
|
+
try:
|
|
24
|
+
from torchgeo.datasets.geo import RasterDataset
|
|
25
|
+
except ImportError:
|
|
26
|
+
|
|
27
|
+
class RasterDataset:
|
|
28
|
+
"""Placeholder"""
|
|
29
|
+
|
|
30
|
+
|
|
18
31
|
@staticmethod
|
|
19
32
|
def crs_to_string(crs):
|
|
20
33
|
if crs is None:
|
|
@@ -189,16 +202,18 @@ def coordinate_array(
|
|
|
189
202
|
|
|
190
203
|
|
|
191
204
|
def to_gdf(
|
|
192
|
-
obj:
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
205
|
+
obj: (
|
|
206
|
+
Geometry
|
|
207
|
+
| str
|
|
208
|
+
| bytes
|
|
209
|
+
| list
|
|
210
|
+
| tuple
|
|
211
|
+
| dict
|
|
212
|
+
| GeoSeries
|
|
213
|
+
| pd.Series
|
|
214
|
+
| pd.DataFrame
|
|
215
|
+
| Iterator
|
|
216
|
+
),
|
|
202
217
|
crs: str | tuple[str] | None = None,
|
|
203
218
|
geometry: str | tuple[str] | int | None = None,
|
|
204
219
|
**kwargs,
|
|
@@ -316,6 +331,37 @@ def to_gdf(
|
|
|
316
331
|
geom_col = geometry or "geometry"
|
|
317
332
|
return _geoseries_to_gdf(obj, geom_col, crs, **kwargs)
|
|
318
333
|
|
|
334
|
+
if crs is None:
|
|
335
|
+
try:
|
|
336
|
+
crs = obj.crs
|
|
337
|
+
except AttributeError:
|
|
338
|
+
try:
|
|
339
|
+
matches = re.search(r"SRID=(\d+);", obj)
|
|
340
|
+
except TypeError:
|
|
341
|
+
try:
|
|
342
|
+
matches = re.search(r"SRID=(\d+);", obj[0])
|
|
343
|
+
except Exception:
|
|
344
|
+
pass
|
|
345
|
+
try:
|
|
346
|
+
crs = CRS(int("".join(x for x in matches.group(0) if x.isnumeric())))
|
|
347
|
+
except Exception:
|
|
348
|
+
pass
|
|
349
|
+
|
|
350
|
+
if isinstance(obj, RasterDataset):
|
|
351
|
+
# read the entire dataset
|
|
352
|
+
obj = obj[obj.bounds]
|
|
353
|
+
crs = obj["crs"]
|
|
354
|
+
array = np.array(obj["image"])
|
|
355
|
+
transform = get_transform_from_bounds(obj["bbox"], shape=array.shape)
|
|
356
|
+
return gpd.GeoDataFrame(
|
|
357
|
+
pd.DataFrame(
|
|
358
|
+
_array_to_geojson(array, transform),
|
|
359
|
+
columns=["value", "geometry"],
|
|
360
|
+
),
|
|
361
|
+
geometry="geometry",
|
|
362
|
+
crs=crs,
|
|
363
|
+
)
|
|
364
|
+
|
|
319
365
|
if is_array_like(geometry) and len(geometry) == len(obj):
|
|
320
366
|
geometry = GeoSeries(
|
|
321
367
|
_make_one_shapely_geom(g) for g in geometry if g is not None
|
|
@@ -425,6 +471,33 @@ def to_gdf(
|
|
|
425
471
|
return GeoDataFrame(geometry=geoseries, crs=crs, **kwargs)
|
|
426
472
|
|
|
427
473
|
|
|
474
|
+
def _array_to_geojson(array: np.ndarray, transform: Affine):
|
|
475
|
+
try:
|
|
476
|
+
return [
|
|
477
|
+
(value, shape(geom))
|
|
478
|
+
for geom, value in features.shapes(array, transform=transform)
|
|
479
|
+
]
|
|
480
|
+
except ValueError:
|
|
481
|
+
array = array.astype(np.float32)
|
|
482
|
+
return [
|
|
483
|
+
(value, shape(geom))
|
|
484
|
+
for geom, value in features.shapes(array, transform=transform)
|
|
485
|
+
]
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
def get_transform_from_bounds(
|
|
489
|
+
obj: GeoDataFrame | GeoSeries | Geometry | tuple, shape: tuple[float, ...]
|
|
490
|
+
) -> Affine:
|
|
491
|
+
minx, miny, maxx, maxy = to_bbox(obj)
|
|
492
|
+
if len(shape) == 2:
|
|
493
|
+
width, height = shape
|
|
494
|
+
elif len(shape) == 3:
|
|
495
|
+
_, width, height = shape
|
|
496
|
+
else:
|
|
497
|
+
raise ValueError
|
|
498
|
+
return rasterio.transform.from_bounds(minx, miny, maxx, maxy, width, height)
|
|
499
|
+
|
|
500
|
+
|
|
428
501
|
def make_shapely_geoms(obj):
|
|
429
502
|
if _is_one_geometry(obj):
|
|
430
503
|
return _make_one_shapely_geom(obj)
|
|
@@ -583,7 +656,14 @@ def _make_one_shapely_geom(obj):
|
|
|
583
656
|
Works recursively if the object is a nested iterable.
|
|
584
657
|
"""
|
|
585
658
|
if isinstance(obj, str):
|
|
586
|
-
|
|
659
|
+
try:
|
|
660
|
+
return wkt.loads(obj)
|
|
661
|
+
except GEOSException:
|
|
662
|
+
if obj.startswith("geography"):
|
|
663
|
+
matches = re.search(r"SRID=(\d+);", obj)
|
|
664
|
+
srid = matches.group(0)
|
|
665
|
+
_, _wkt = obj.split(srid)
|
|
666
|
+
return wkt.loads(_wkt)
|
|
587
667
|
|
|
588
668
|
if isinstance(obj, bytes):
|
|
589
669
|
return wkb.loads(obj)
|
|
@@ -6,10 +6,18 @@ from geopandas import GeoDataFrame, GeoSeries
|
|
|
6
6
|
from shapely import STRtree, difference, make_valid, simplify, unary_union
|
|
7
7
|
from shapely.errors import GEOSException
|
|
8
8
|
|
|
9
|
-
from .
|
|
10
|
-
|
|
9
|
+
from .general import (
|
|
10
|
+
_determine_geom_type_args,
|
|
11
|
+
_push_geom_col,
|
|
12
|
+
clean_geoms,
|
|
13
|
+
parallel_unary_union_geoseries,
|
|
14
|
+
)
|
|
11
15
|
from .geometry_types import get_geom_type, make_all_singlepart, to_single_geom_type
|
|
12
16
|
from .overlay import _run_overlay_dask, clean_overlay, make_valid_and_keep_geom_type
|
|
17
|
+
from .sfilter import sfilter_inverse, sfilter_split
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
PRECISION = 1e-3
|
|
13
21
|
|
|
14
22
|
|
|
15
23
|
def update_geometries(
|
|
@@ -18,6 +26,7 @@ def update_geometries(
|
|
|
18
26
|
keep_geom_type: bool | None = None,
|
|
19
27
|
grid_size: int | None = None,
|
|
20
28
|
n_jobs: int = 1,
|
|
29
|
+
predicate: str | None = "intersects",
|
|
21
30
|
) -> GeoDataFrame:
|
|
22
31
|
"""Puts geometries on top of each other rowwise.
|
|
23
32
|
|
|
@@ -81,40 +90,43 @@ def update_geometries(
|
|
|
81
90
|
if len(gdf) <= 1:
|
|
82
91
|
return gdf
|
|
83
92
|
|
|
84
|
-
|
|
93
|
+
copied = make_all_singlepart(clean_geoms(gdf))
|
|
85
94
|
|
|
86
|
-
|
|
87
|
-
|
|
95
|
+
copied, geom_type, keep_geom_type = _determine_geom_type_args(
|
|
96
|
+
copied, geom_type, keep_geom_type
|
|
88
97
|
)
|
|
89
98
|
|
|
90
|
-
geom_col =
|
|
91
|
-
index_mapper = {i: idx for i, idx in enumerate(
|
|
92
|
-
|
|
99
|
+
geom_col = copied._geometry_column_name
|
|
100
|
+
index_mapper = {i: idx for i, idx in enumerate(copied.index)}
|
|
101
|
+
copied = copied.reset_index(drop=True)
|
|
93
102
|
|
|
94
|
-
tree = STRtree(
|
|
95
|
-
left, right = tree.query(
|
|
103
|
+
tree = STRtree(copied.geometry.values)
|
|
104
|
+
left, right = tree.query(copied.geometry.values, predicate=predicate)
|
|
96
105
|
indices = pd.Series(right, index=left).loc[lambda x: x.index > x.values]
|
|
97
106
|
|
|
98
107
|
# select geometries from 'right', index from 'left', dissolve by 'left'
|
|
108
|
+
erasers = pd.Series(copied.geometry.loc[indices.values].values, index=indices.index)
|
|
99
109
|
if n_jobs > 1:
|
|
100
110
|
erasers = parallel_unary_union_geoseries(
|
|
101
|
-
|
|
111
|
+
erasers,
|
|
102
112
|
level=0,
|
|
103
113
|
n_jobs=n_jobs,
|
|
104
114
|
grid_size=grid_size,
|
|
105
|
-
# index=indices.index.unique(),
|
|
106
115
|
)
|
|
107
116
|
erasers = pd.Series(erasers, index=indices.index.unique())
|
|
108
117
|
else:
|
|
109
|
-
|
|
110
|
-
|
|
118
|
+
only_one = erasers.groupby(level=0).transform("size") == 1
|
|
119
|
+
one_hit = erasers[only_one]
|
|
120
|
+
many_hits = (
|
|
121
|
+
erasers[~only_one]
|
|
111
122
|
.groupby(level=0)
|
|
112
123
|
.agg(lambda x: make_valid(unary_union(x, grid_size=grid_size)))
|
|
113
124
|
)
|
|
125
|
+
erasers = pd.concat([one_hit, many_hits]).sort_index()
|
|
114
126
|
|
|
115
127
|
# match up the aggregated erasers by index
|
|
116
128
|
if n_jobs > 1:
|
|
117
|
-
arr1 =
|
|
129
|
+
arr1 = copied.geometry.loc[erasers.index].to_numpy()
|
|
118
130
|
arr2 = erasers.to_numpy()
|
|
119
131
|
try:
|
|
120
132
|
erased = _run_overlay_dask(
|
|
@@ -134,28 +146,39 @@ def update_geometries(
|
|
|
134
146
|
else:
|
|
135
147
|
erased = make_valid(
|
|
136
148
|
difference(
|
|
137
|
-
|
|
149
|
+
copied.geometry.loc[erasers.index],
|
|
138
150
|
erasers,
|
|
139
151
|
grid_size=grid_size,
|
|
140
152
|
)
|
|
141
153
|
)
|
|
142
154
|
|
|
143
|
-
|
|
155
|
+
copied.loc[erased.index, geom_col] = erased
|
|
156
|
+
|
|
157
|
+
copied = copied.loc[~copied.is_empty]
|
|
144
158
|
|
|
145
|
-
|
|
159
|
+
copied.index = copied.index.map(index_mapper)
|
|
146
160
|
|
|
147
|
-
|
|
161
|
+
# TODO check why polygons dissappear in rare cases. For now, just add back the missing
|
|
162
|
+
dissapeared = sfilter_inverse(gdf, copied.buffer(-PRECISION))
|
|
163
|
+
copied = pd.concat([copied, dissapeared])
|
|
164
|
+
|
|
165
|
+
# TODO fix dupliates again with dissolve?
|
|
166
|
+
# dups = get_intersections(copied, geom_type="polygon")
|
|
167
|
+
# dups["_cluster"] = get_cluster_mapper(dups.geometry.values)
|
|
168
|
+
# no_dups = dissexp(dups, by="_cluster").drop(columns="_cluster")
|
|
169
|
+
# copied = clean_overlay(copied, no_dups, how="update", geom_type="polygon")
|
|
148
170
|
|
|
149
171
|
if keep_geom_type:
|
|
150
|
-
|
|
172
|
+
copied = to_single_geom_type(copied, geom_type)
|
|
151
173
|
|
|
152
|
-
return
|
|
174
|
+
return copied
|
|
153
175
|
|
|
154
176
|
|
|
155
177
|
def get_intersections(
|
|
156
178
|
gdf: GeoDataFrame,
|
|
157
179
|
geom_type: str | None = None,
|
|
158
180
|
keep_geom_type: bool | None = None,
|
|
181
|
+
predicate: str | None = "intersects",
|
|
159
182
|
n_jobs: int = 1,
|
|
160
183
|
) -> GeoDataFrame:
|
|
161
184
|
"""Find geometries that intersect in a GeoDataFrame.
|
|
@@ -248,6 +271,7 @@ def get_intersections(
|
|
|
248
271
|
geom_type,
|
|
249
272
|
keep_geom_type,
|
|
250
273
|
n_jobs=n_jobs,
|
|
274
|
+
predicate=predicate,
|
|
251
275
|
).pipe(clean_geoms)
|
|
252
276
|
|
|
253
277
|
duplicated_geoms.index = duplicated_geoms["orig_idx"].values
|
|
@@ -260,7 +284,7 @@ def get_intersections(
|
|
|
260
284
|
|
|
261
285
|
|
|
262
286
|
def _get_intersecting_geometries(
|
|
263
|
-
gdf: GeoDataFrame, geom_type, keep_geom_type, n_jobs
|
|
287
|
+
gdf: GeoDataFrame, geom_type, keep_geom_type, n_jobs, predicate
|
|
264
288
|
) -> GeoDataFrame:
|
|
265
289
|
right = gdf[[gdf._geometry_column_name]]
|
|
266
290
|
right["idx_right"] = right.index
|
|
@@ -280,6 +304,7 @@ def _get_intersecting_geometries(
|
|
|
280
304
|
left,
|
|
281
305
|
right,
|
|
282
306
|
how="intersection",
|
|
307
|
+
predicate=predicate,
|
|
283
308
|
geom_type=geom_type,
|
|
284
309
|
keep_geom_type=keep_geom_type,
|
|
285
310
|
n_jobs=n_jobs,
|
|
@@ -296,7 +321,12 @@ def _get_intersecting_geometries(
|
|
|
296
321
|
continue
|
|
297
322
|
intersected += [
|
|
298
323
|
clean_overlay(
|
|
299
|
-
left,
|
|
324
|
+
left,
|
|
325
|
+
right,
|
|
326
|
+
how="intersection",
|
|
327
|
+
predicate=predicate,
|
|
328
|
+
geom_type=geom_type,
|
|
329
|
+
n_jobs=n_jobs,
|
|
300
330
|
)
|
|
301
331
|
]
|
|
302
332
|
intersected = pd.concat(intersected, ignore_index=True).loc[are_not_identical]
|
sgis/geopandas_tools/general.py
CHANGED
|
@@ -299,6 +299,16 @@ def sort_large_first(gdf: GeoDataFrame | GeoSeries) -> GeoDataFrame | GeoSeries:
|
|
|
299
299
|
return gdf.iloc[list(sorted_areas)]
|
|
300
300
|
|
|
301
301
|
|
|
302
|
+
def sort_df(
|
|
303
|
+
df: pd.DataFrame | GeoDataFrame, sort_col: pd.Series
|
|
304
|
+
) -> pd.DataFrame | GeoDataFrame:
|
|
305
|
+
value_mapper: dict[int, Any] = dict(enumerate(sort_col.values))
|
|
306
|
+
sorted_indices = dict(
|
|
307
|
+
reversed(sorted(value_mapper.items(), key=lambda item: item[1]))
|
|
308
|
+
)
|
|
309
|
+
return df.iloc[list(sorted_indices)]
|
|
310
|
+
|
|
311
|
+
|
|
302
312
|
def sort_long_first(gdf: GeoDataFrame | GeoSeries) -> GeoDataFrame | GeoSeries:
|
|
303
313
|
"""Sort GeoDataFrame by length in decending order.
|
|
304
314
|
|
|
@@ -736,6 +746,31 @@ def parallel_unary_union(
|
|
|
736
746
|
def parallel_unary_union_geoseries(
|
|
737
747
|
ser: GeoSeries, n_jobs: int = 1, grid_size=None, **kwargs
|
|
738
748
|
) -> list[Geometry]:
|
|
749
|
+
|
|
750
|
+
is_one_hit = ser.groupby(**kwargs).transform("size") == 1
|
|
751
|
+
|
|
752
|
+
one_hit = ser.loc[is_one_hit]
|
|
753
|
+
many_hits = ser.loc[~is_one_hit]
|
|
754
|
+
|
|
755
|
+
with joblib.Parallel(n_jobs=n_jobs, backend="threading") as parallel:
|
|
756
|
+
delayed_operations = []
|
|
757
|
+
for _, geoms in many_hits.groupby(**kwargs):
|
|
758
|
+
delayed_operations.append(
|
|
759
|
+
joblib.delayed(merge_geometries)(geoms, grid_size=grid_size)
|
|
760
|
+
)
|
|
761
|
+
|
|
762
|
+
dissolved = pd.Series(
|
|
763
|
+
parallel(delayed_operations),
|
|
764
|
+
index=is_one_hit[lambda x: x == False].index.unique(),
|
|
765
|
+
)
|
|
766
|
+
|
|
767
|
+
return pd.concat([dissolved, one_hit]).sort_index().values
|
|
768
|
+
|
|
769
|
+
|
|
770
|
+
def parallel_unary_union_geoseries(
|
|
771
|
+
ser: GeoSeries, n_jobs: int = 1, grid_size=None, **kwargs
|
|
772
|
+
) -> list[Geometry]:
|
|
773
|
+
|
|
739
774
|
with joblib.Parallel(n_jobs=n_jobs, backend="threading") as parallel:
|
|
740
775
|
delayed_operations = []
|
|
741
776
|
for _, geoms in ser.groupby(**kwargs):
|
|
@@ -7,9 +7,12 @@ GeoDataFrames.
|
|
|
7
7
|
The results of all functions will be identical with GeoDataFrame and GeoSeries as input
|
|
8
8
|
types.
|
|
9
9
|
"""
|
|
10
|
+
|
|
10
11
|
import numpy as np
|
|
12
|
+
import shapely
|
|
11
13
|
from geopandas import GeoDataFrame, GeoSeries
|
|
12
|
-
from pandas import DataFrame, Series
|
|
14
|
+
from pandas import DataFrame, Series, concat
|
|
15
|
+
from shapely import STRtree
|
|
13
16
|
from sklearn.neighbors import NearestNeighbors
|
|
14
17
|
|
|
15
18
|
from .conversion import coordinate_array
|
|
@@ -237,6 +240,33 @@ def get_all_distances(
|
|
|
237
240
|
)
|
|
238
241
|
|
|
239
242
|
|
|
243
|
+
def sjoin_within_distance(
|
|
244
|
+
gdf: GeoDataFrame | GeoSeries,
|
|
245
|
+
neighbors: GeoDataFrame | GeoSeries,
|
|
246
|
+
distance: int | float,
|
|
247
|
+
distance_col: str = "distance",
|
|
248
|
+
**kwargs,
|
|
249
|
+
) -> GeoDataFrame:
|
|
250
|
+
"""Sjoin with a buffer on the right GeoDataFrame and adds a distance column."""
|
|
251
|
+
|
|
252
|
+
new_neighbor_cols = {"__left_range_idx": range(len(neighbors))}
|
|
253
|
+
if distance:
|
|
254
|
+
new_neighbor_cols[neighbors._geometry_column_name] = lambda x: x.buffer(
|
|
255
|
+
distance
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
# using assign to get a copy
|
|
259
|
+
neighbors = neighbors.assign(**new_neighbor_cols)
|
|
260
|
+
|
|
261
|
+
out = gdf.sjoin(neighbors, **kwargs)
|
|
262
|
+
|
|
263
|
+
out[distance_col] = shapely.distance(
|
|
264
|
+
out.geometry.values, neighbors.geometry.iloc[out["__left_range_idx"]].values
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
return out.drop(columns="__left_range_idx")
|
|
268
|
+
|
|
269
|
+
|
|
240
270
|
def get_k_nearest_neighbors(
|
|
241
271
|
gdf: GeoDataFrame | GeoSeries,
|
|
242
272
|
neighbors: GeoDataFrame | GeoSeries,
|