ssb-sgis 0.3.13__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sgis/__init__.py +6 -4
- sgis/geopandas_tools/bounds.py +2 -6
- sgis/geopandas_tools/buffer_dissolve_explode.py +149 -45
- sgis/geopandas_tools/cleaning.py +66 -594
- sgis/geopandas_tools/conversion.py +92 -12
- sgis/geopandas_tools/duplicates.py +53 -23
- sgis/geopandas_tools/general.py +35 -0
- sgis/geopandas_tools/neighbors.py +31 -1
- sgis/geopandas_tools/overlay.py +143 -63
- sgis/geopandas_tools/polygons_as_rings.py +1 -1
- sgis/io/dapla_functions.py +7 -14
- sgis/maps/explore.py +29 -3
- sgis/maps/map.py +16 -4
- sgis/maps/maps.py +95 -49
- sgis/parallel/parallel.py +73 -35
- sgis/raster/torchgeo.py +30 -20
- {ssb_sgis-0.3.13.dist-info → ssb_sgis-1.0.1.dist-info}/METADATA +6 -6
- {ssb_sgis-0.3.13.dist-info → ssb_sgis-1.0.1.dist-info}/RECORD +20 -20
- {ssb_sgis-0.3.13.dist-info → ssb_sgis-1.0.1.dist-info}/LICENSE +0 -0
- {ssb_sgis-0.3.13.dist-info → ssb_sgis-1.0.1.dist-info}/WHEEL +0 -0
sgis/__init__.py
CHANGED
|
@@ -22,18 +22,18 @@ from .geopandas_tools.buffer_dissolve_explode import (
|
|
|
22
22
|
buffdiss,
|
|
23
23
|
buffdissexp,
|
|
24
24
|
buffdissexp_by_cluster,
|
|
25
|
+
diss,
|
|
26
|
+
diss_by_cluster,
|
|
25
27
|
dissexp,
|
|
26
28
|
dissexp_by_cluster,
|
|
27
29
|
)
|
|
28
30
|
from .geopandas_tools.centerlines import get_rough_centerlines
|
|
29
31
|
from .geopandas_tools.cleaning import (
|
|
30
32
|
coverage_clean,
|
|
33
|
+
remove_interior_slivers,
|
|
31
34
|
remove_spikes,
|
|
32
|
-
snap_polygons,
|
|
33
|
-
snap_to_mask,
|
|
34
35
|
split_and_eliminate_by_longest,
|
|
35
36
|
split_by_neighbors,
|
|
36
|
-
split_spiky_polygons,
|
|
37
37
|
)
|
|
38
38
|
from .geopandas_tools.conversion import (
|
|
39
39
|
coordinate_array,
|
|
@@ -76,6 +76,7 @@ from .geopandas_tools.neighbors import (
|
|
|
76
76
|
get_neighbor_dfs,
|
|
77
77
|
get_neighbor_indices,
|
|
78
78
|
k_nearest_neighbors,
|
|
79
|
+
sjoin_within_distance,
|
|
79
80
|
)
|
|
80
81
|
from .geopandas_tools.overlay import clean_overlay
|
|
81
82
|
from .geopandas_tools.point_operations import snap_all, snap_within_distance
|
|
@@ -87,6 +88,7 @@ from .geopandas_tools.polygon_operations import (
|
|
|
87
88
|
eliminate_by_largest,
|
|
88
89
|
eliminate_by_longest,
|
|
89
90
|
eliminate_by_smallest,
|
|
91
|
+
get_cluster_mapper,
|
|
90
92
|
get_gaps,
|
|
91
93
|
get_holes,
|
|
92
94
|
get_polygon_clusters,
|
|
@@ -130,7 +132,7 @@ from .networkanalysis.nodes import (
|
|
|
130
132
|
make_node_ids,
|
|
131
133
|
)
|
|
132
134
|
from .networkanalysis.traveling_salesman import traveling_salesman_problem
|
|
133
|
-
from .parallel.parallel import Parallel
|
|
135
|
+
from .parallel.parallel import Parallel, parallel_overlay
|
|
134
136
|
from .raster.cube import DataCube
|
|
135
137
|
|
|
136
138
|
|
sgis/geopandas_tools/bounds.py
CHANGED
|
@@ -59,19 +59,15 @@ class Gridlooper:
|
|
|
59
59
|
|
|
60
60
|
Instantiate a gridlooper.
|
|
61
61
|
|
|
62
|
-
>>> looper = sg.Gridlooper(gridsize=200, mask=buffered, parallelizer=sg.Parallel(1, backend="multiprocessing"))
|
|
62
|
+
>>> looper = sg.Gridlooper(gridsize=200, mask=buffered, concat=True, parallelizer=sg.Parallel(1, backend="multiprocessing"))
|
|
63
63
|
|
|
64
64
|
Run the function clean_overlay in a gridloop.
|
|
65
65
|
|
|
66
|
-
>>>
|
|
66
|
+
>>> results = looper.run(
|
|
67
67
|
... sg.clean_overlay,
|
|
68
68
|
... points,
|
|
69
69
|
... buffered,
|
|
70
70
|
... )
|
|
71
|
-
>>> type(resultslist)
|
|
72
|
-
list
|
|
73
|
-
|
|
74
|
-
>>> results = pd.concat(resultslist, ignore_index=True)
|
|
75
71
|
>>> results
|
|
76
72
|
idx_1 idx_2 geometry
|
|
77
73
|
0 220 220 POINT (254575.200 6661631.500)
|
|
@@ -14,17 +14,13 @@ for the following:
|
|
|
14
14
|
- The buff function returns a GeoDataFrame, the geopandas method returns a GeoSeries.
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
|
-
import
|
|
17
|
+
from typing import Callable
|
|
18
|
+
|
|
18
19
|
import numpy as np
|
|
20
|
+
import pandas as pd
|
|
19
21
|
from geopandas import GeoDataFrame, GeoSeries
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
from .general import (
|
|
23
|
-
_push_geom_col,
|
|
24
|
-
merge_geometries,
|
|
25
|
-
parallel_unary_union,
|
|
26
|
-
parallel_unary_union_geoseries,
|
|
27
|
-
)
|
|
22
|
+
|
|
23
|
+
from .general import merge_geometries, parallel_unary_union
|
|
28
24
|
from .geometry_types import make_all_singlepart
|
|
29
25
|
from .polygon_operations import get_cluster_mapper, get_grouped_centroids
|
|
30
26
|
|
|
@@ -172,17 +168,16 @@ def buffdiss(
|
|
|
172
168
|
|
|
173
169
|
|
|
174
170
|
def _dissolve(gdf, aggfunc="first", grid_size=None, n_jobs=1, **dissolve_kwargs):
|
|
175
|
-
geom_col = gdf._geometry_column_name
|
|
176
|
-
# if grid_size is None:
|
|
177
|
-
# dissolved = gdf.dissolve(aggfunc=aggfunc, **dissolve_kwargs)
|
|
178
171
|
|
|
179
|
-
|
|
180
|
-
|
|
172
|
+
if not len(gdf):
|
|
173
|
+
return gdf
|
|
181
174
|
|
|
182
175
|
geom_col = gdf._geometry_column_name
|
|
183
176
|
|
|
184
177
|
by = dissolve_kwargs.pop("by", None)
|
|
185
178
|
|
|
179
|
+
by_was_none = not bool(by)
|
|
180
|
+
|
|
186
181
|
if by is None and dissolve_kwargs.get("level") is None:
|
|
187
182
|
by = np.zeros(len(gdf), dtype="int64")
|
|
188
183
|
other_cols = list(gdf.columns.difference({geom_col}))
|
|
@@ -191,32 +186,50 @@ def _dissolve(gdf, aggfunc="first", grid_size=None, n_jobs=1, **dissolve_kwargs)
|
|
|
191
186
|
by = [by]
|
|
192
187
|
other_cols = list(gdf.columns.difference({geom_col} | set(by or {})))
|
|
193
188
|
|
|
194
|
-
|
|
189
|
+
try:
|
|
190
|
+
is_one_hit = gdf.groupby(by, **dissolve_kwargs).transform("size") == 1
|
|
191
|
+
except IndexError:
|
|
192
|
+
# if no rows when dropna=True
|
|
193
|
+
original_by = [x for x in by]
|
|
194
|
+
query = gdf[by.pop(0)].notna()
|
|
195
|
+
for col in gdf[by]:
|
|
196
|
+
query &= gdf[col].notna()
|
|
197
|
+
gdf = gdf.loc[query]
|
|
198
|
+
assert not len(gdf), gdf
|
|
199
|
+
if not by_was_none and dissolve_kwargs.get("as_index", True):
|
|
200
|
+
try:
|
|
201
|
+
gdf = gdf.set_index(original_by)
|
|
202
|
+
except Exception as e:
|
|
203
|
+
print(gdf)
|
|
204
|
+
print(original_by)
|
|
205
|
+
raise e
|
|
206
|
+
return gdf
|
|
207
|
+
|
|
208
|
+
if not by_was_none and dissolve_kwargs.get("as_index", True):
|
|
209
|
+
one_hit = gdf[is_one_hit].set_index(by)
|
|
210
|
+
else:
|
|
211
|
+
one_hit = gdf[is_one_hit]
|
|
212
|
+
many_hits = gdf[~is_one_hit]
|
|
213
|
+
|
|
214
|
+
if not len(many_hits):
|
|
215
|
+
return GeoDataFrame(one_hit, geometry=geom_col, crs=gdf.crs)
|
|
216
|
+
|
|
217
|
+
dissolved = many_hits.groupby(by, **dissolve_kwargs)[other_cols].agg(aggfunc)
|
|
218
|
+
|
|
219
|
+
# dissolved = gdf.groupby(by, **dissolve_kwargs)[other_cols].agg(aggfunc)
|
|
195
220
|
|
|
196
221
|
if n_jobs > 1:
|
|
197
|
-
dissolved[geom_col] = parallel_unary_union(
|
|
198
|
-
gdf, n_jobs=n_jobs, by=by, grid_size=grid_size, **dissolve_kwargs
|
|
199
|
-
)
|
|
200
222
|
try:
|
|
223
|
+
agged = parallel_unary_union(
|
|
224
|
+
many_hits, n_jobs=n_jobs, by=by, grid_size=grid_size, **dissolve_kwargs
|
|
225
|
+
)
|
|
226
|
+
dissolved[geom_col] = agged
|
|
201
227
|
return GeoDataFrame(dissolved, geometry=geom_col, crs=gdf.crs)
|
|
202
228
|
except Exception as e:
|
|
203
|
-
print(e, dissolved
|
|
229
|
+
print(e, dissolved, agged, many_hits)
|
|
204
230
|
raise e
|
|
205
|
-
# import dask_geopandas
|
|
206
|
-
|
|
207
|
-
# if not isinstance(by, str):
|
|
208
|
-
# gdf["_by"] = 1
|
|
209
|
-
# ddf = dask_geopandas.from_geopandas(gdf, npartitions=n_jobs, by=by)
|
|
210
|
-
|
|
211
|
-
with joblib.Parallel(n_jobs=n_jobs, backend="threading") as parallel:
|
|
212
|
-
delayed_operations = []
|
|
213
|
-
for _, geoms in gdf.groupby(by, **dissolve_kwargs)[geom_col]:
|
|
214
|
-
delayed_operations.append(joblib.delayed(merge_geometries)(geoms))
|
|
215
231
|
|
|
216
|
-
|
|
217
|
-
return GeoDataFrame(dissolved, geometry=geom_col, crs=gdf.crs)
|
|
218
|
-
|
|
219
|
-
geoms_agged = gdf.groupby(by, **dissolve_kwargs)[geom_col].agg(
|
|
232
|
+
geoms_agged = many_hits.groupby(by, **dissolve_kwargs)[geom_col].agg(
|
|
220
233
|
lambda x: merge_geometries(x, grid_size=grid_size)
|
|
221
234
|
)
|
|
222
235
|
|
|
@@ -228,22 +241,23 @@ def _dissolve(gdf, aggfunc="first", grid_size=None, n_jobs=1, **dissolve_kwargs)
|
|
|
228
241
|
|
|
229
242
|
dissolved[geom_col] = geoms_agged
|
|
230
243
|
|
|
231
|
-
return GeoDataFrame(
|
|
244
|
+
return GeoDataFrame(
|
|
245
|
+
pd.concat([dissolved, one_hit]).sort_index(), geometry=geom_col, crs=gdf.crs
|
|
246
|
+
)
|
|
232
247
|
|
|
233
248
|
|
|
234
|
-
def
|
|
249
|
+
def diss(
|
|
235
250
|
gdf: GeoDataFrame,
|
|
236
251
|
by=None,
|
|
237
252
|
aggfunc="first",
|
|
238
253
|
as_index: bool = True,
|
|
239
|
-
index_parts: bool = False,
|
|
240
254
|
grid_size: float | int | None = None,
|
|
241
255
|
n_jobs: int = 1,
|
|
242
256
|
**dissolve_kwargs,
|
|
243
257
|
):
|
|
244
|
-
"""Dissolves
|
|
258
|
+
"""Dissolves geometries.
|
|
245
259
|
|
|
246
|
-
It takes a GeoDataFrame and dissolves
|
|
260
|
+
It takes a GeoDataFrame and dissolves and fixes geometries.
|
|
247
261
|
|
|
248
262
|
Args:
|
|
249
263
|
gdf: the GeoDataFrame that will be dissolved and exploded.
|
|
@@ -251,12 +265,10 @@ def dissexp(
|
|
|
251
265
|
aggfunc: How to aggregate the non-geometry colums not in "by".
|
|
252
266
|
as_index: Whether the 'by' columns should be returned as index. Defaults to
|
|
253
267
|
True to be consistent with geopandas.
|
|
254
|
-
index_parts: If False (default), the index after dissolve is respected. If
|
|
255
|
-
True, an integer index level is added during explode.
|
|
256
268
|
**dissolve_kwargs: additional keyword arguments passed to geopandas' dissolve.
|
|
257
269
|
|
|
258
270
|
Returns:
|
|
259
|
-
A GeoDataFrame
|
|
271
|
+
A GeoDataFrame with dissolved geometries.
|
|
260
272
|
"""
|
|
261
273
|
if not len(gdf):
|
|
262
274
|
if as_index:
|
|
@@ -267,6 +279,44 @@ def dissexp(
|
|
|
267
279
|
else:
|
|
268
280
|
return gdf
|
|
269
281
|
|
|
282
|
+
return _dissolve(
|
|
283
|
+
gdf,
|
|
284
|
+
by=by,
|
|
285
|
+
aggfunc=aggfunc,
|
|
286
|
+
grid_size=grid_size,
|
|
287
|
+
n_jobs=n_jobs,
|
|
288
|
+
as_index=as_index,
|
|
289
|
+
**dissolve_kwargs,
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def dissexp(
|
|
294
|
+
gdf: GeoDataFrame,
|
|
295
|
+
by=None,
|
|
296
|
+
aggfunc="first",
|
|
297
|
+
as_index: bool = True,
|
|
298
|
+
index_parts: bool = False,
|
|
299
|
+
grid_size: float | int | None = None,
|
|
300
|
+
n_jobs: int = 1,
|
|
301
|
+
**dissolve_kwargs,
|
|
302
|
+
):
|
|
303
|
+
"""Dissolves overlapping geometries.
|
|
304
|
+
|
|
305
|
+
It takes a GeoDataFrame and dissolves, fixes and explodes geometries.
|
|
306
|
+
|
|
307
|
+
Args:
|
|
308
|
+
gdf: the GeoDataFrame that will be dissolved and exploded.
|
|
309
|
+
by: Columns to dissolve by.
|
|
310
|
+
aggfunc: How to aggregate the non-geometry colums not in "by".
|
|
311
|
+
as_index: Whether the 'by' columns should be returned as index. Defaults to
|
|
312
|
+
True to be consistent with geopandas.
|
|
313
|
+
index_parts: If False (default), the index after dissolve is respected. If
|
|
314
|
+
True, an integer index level is added during explode.
|
|
315
|
+
**dissolve_kwargs: additional keyword arguments passed to geopandas' dissolve.
|
|
316
|
+
|
|
317
|
+
Returns:
|
|
318
|
+
A GeoDataFrame where overlapping geometries are dissolved.
|
|
319
|
+
"""
|
|
270
320
|
dissolve_kwargs = dissolve_kwargs | {
|
|
271
321
|
"by": by,
|
|
272
322
|
"as_index": as_index,
|
|
@@ -274,7 +324,7 @@ def dissexp(
|
|
|
274
324
|
|
|
275
325
|
dissolve_kwargs, ignore_index = _decide_ignore_index(dissolve_kwargs)
|
|
276
326
|
|
|
277
|
-
dissolved =
|
|
327
|
+
dissolved = diss(
|
|
278
328
|
gdf, aggfunc=aggfunc, grid_size=grid_size, n_jobs=n_jobs, **dissolve_kwargs
|
|
279
329
|
)
|
|
280
330
|
|
|
@@ -296,6 +346,60 @@ def dissexp_by_cluster(
|
|
|
296
346
|
This might be many times faster than a regular dissexp, if there are many
|
|
297
347
|
non-overlapping geometries.
|
|
298
348
|
|
|
349
|
+
Args:
|
|
350
|
+
gdf: the GeoDataFrame that will be dissolved and exploded.
|
|
351
|
+
**dissolve_kwargs: Keyword arguments passed to geopandas' dissolve.
|
|
352
|
+
|
|
353
|
+
Returns:
|
|
354
|
+
A GeoDataFrame where overlapping geometries are dissolved.
|
|
355
|
+
"""
|
|
356
|
+
return _run_func_by_cluster(
|
|
357
|
+
dissexp, gdf, predicate=predicate, n_jobs=n_jobs, **dissolve_kwargs
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def diss_by_cluster(
|
|
362
|
+
gdf: GeoDataFrame, predicate=None, n_jobs: int = 1, **dissolve_kwargs
|
|
363
|
+
) -> GeoDataFrame:
|
|
364
|
+
"""Dissolves overlapping geometries through clustering with sjoin and networkx.
|
|
365
|
+
|
|
366
|
+
Works exactly like dissexp, but, before dissolving, the geometries are divided
|
|
367
|
+
into clusters based on overlap (uses the function sgis.get_polygon_clusters).
|
|
368
|
+
The geometries are then dissolved based on this column (and optionally other
|
|
369
|
+
columns).
|
|
370
|
+
|
|
371
|
+
This might be many times faster than a regular dissexp, if there are many
|
|
372
|
+
non-overlapping geometries.
|
|
373
|
+
|
|
374
|
+
Args:
|
|
375
|
+
gdf: the GeoDataFrame that will be dissolved and exploded.
|
|
376
|
+
**dissolve_kwargs: Keyword arguments passed to geopandas' dissolve.
|
|
377
|
+
|
|
378
|
+
Returns:
|
|
379
|
+
A GeoDataFrame where overlapping geometries are dissolved.
|
|
380
|
+
"""
|
|
381
|
+
return _run_func_by_cluster(
|
|
382
|
+
diss, gdf, predicate=predicate, n_jobs=n_jobs, **dissolve_kwargs
|
|
383
|
+
)
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def _run_func_by_cluster(
|
|
387
|
+
func: Callable,
|
|
388
|
+
gdf: GeoDataFrame,
|
|
389
|
+
predicate=None,
|
|
390
|
+
n_jobs: int = 1,
|
|
391
|
+
**dissolve_kwargs,
|
|
392
|
+
) -> GeoDataFrame:
|
|
393
|
+
"""Dissolves overlapping geometries through clustering with sjoin and networkx.
|
|
394
|
+
|
|
395
|
+
Works exactly like dissexp, but, before dissolving, the geometries are divided
|
|
396
|
+
into clusters based on overlap (uses the function sgis.get_polygon_clusters).
|
|
397
|
+
The geometries are then dissolved based on this column (and optionally other
|
|
398
|
+
columns).
|
|
399
|
+
|
|
400
|
+
This might be many times faster than a regular dissexp, if there are many
|
|
401
|
+
non-overlapping geometries.
|
|
402
|
+
|
|
299
403
|
Args:
|
|
300
404
|
gdf: the GeoDataFrame that will be dissolved and exploded.
|
|
301
405
|
**dissolve_kwargs: Keyword arguments passed to geopandas' dissolve.
|
|
@@ -312,7 +416,7 @@ def dissexp_by_cluster(
|
|
|
312
416
|
by = list(by)
|
|
313
417
|
|
|
314
418
|
if not len(gdf):
|
|
315
|
-
return
|
|
419
|
+
return func(gdf, by=by, **dissolve_kwargs)
|
|
316
420
|
|
|
317
421
|
def get_group_clusters(group: GeoDataFrame):
|
|
318
422
|
"""Adds cluster column. Applied to each group because much faster."""
|
|
@@ -328,11 +432,11 @@ def dissexp_by_cluster(
|
|
|
328
432
|
make_all_singlepart(gdf)
|
|
329
433
|
.groupby(by, group_keys=True, dropna=False, as_index=False)
|
|
330
434
|
.apply(get_group_clusters)
|
|
331
|
-
.pipe(
|
|
435
|
+
.pipe(func, by=["_cluster"] + by, n_jobs=n_jobs, **dissolve_kwargs)
|
|
332
436
|
)
|
|
333
437
|
else:
|
|
334
438
|
dissolved = get_group_clusters(make_all_singlepart(gdf)).pipe(
|
|
335
|
-
|
|
439
|
+
func, by="_cluster", n_jobs=n_jobs, **dissolve_kwargs
|
|
336
440
|
)
|
|
337
441
|
|
|
338
442
|
if not by:
|