ssb-sgis 0.3.9__py3-none-any.whl → 0.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sgis/__init__.py +13 -4
- sgis/geopandas_tools/bounds.py +236 -37
- sgis/geopandas_tools/buffer_dissolve_explode.py +41 -9
- sgis/geopandas_tools/cleaning.py +521 -169
- sgis/geopandas_tools/conversion.py +2 -2
- sgis/geopandas_tools/duplicates.py +22 -18
- sgis/geopandas_tools/general.py +87 -9
- sgis/geopandas_tools/overlay.py +12 -4
- sgis/geopandas_tools/polygon_operations.py +83 -8
- sgis/geopandas_tools/sfilter.py +53 -53
- sgis/helpers.py +8 -0
- sgis/io/dapla_functions.py +9 -6
- sgis/maps/explore.py +76 -1
- sgis/maps/maps.py +11 -8
- {ssb_sgis-0.3.9.dist-info → ssb_sgis-0.3.11.dist-info}/METADATA +1 -4
- {ssb_sgis-0.3.9.dist-info → ssb_sgis-0.3.11.dist-info}/RECORD +18 -18
- {ssb_sgis-0.3.9.dist-info → ssb_sgis-0.3.11.dist-info}/LICENSE +0 -0
- {ssb_sgis-0.3.9.dist-info → ssb_sgis-0.3.11.dist-info}/WHEEL +0 -0
|
@@ -61,7 +61,7 @@ def to_shapely(obj) -> Geometry:
|
|
|
61
61
|
raise TypeError(obj) from e
|
|
62
62
|
|
|
63
63
|
|
|
64
|
-
def
|
|
64
|
+
def from_4326(lon: float, lat: float, crs=25833):
|
|
65
65
|
"""Get utm 33 N coordinates from lonlat (4326)."""
|
|
66
66
|
transformer = pyproj.Transformer.from_crs(
|
|
67
67
|
"EPSG:4326", f"EPSG:{crs}", always_xy=True
|
|
@@ -69,7 +69,7 @@ def get_utm33(lon: float, lat: float, crs=25833):
|
|
|
69
69
|
return transformer.transform(lon, lat)
|
|
70
70
|
|
|
71
71
|
|
|
72
|
-
def
|
|
72
|
+
def to_4326(lon: float, lat: float, crs=25833):
|
|
73
73
|
"""Get degree coordinates 33 N coordinates from lonlat (4326)."""
|
|
74
74
|
transformer = pyproj.Transformer.from_crs(
|
|
75
75
|
f"EPSG:{crs}", "EPSG:4326", always_xy=True
|
|
@@ -3,11 +3,9 @@ from collections.abc import Iterable
|
|
|
3
3
|
import networkx as nx
|
|
4
4
|
import pandas as pd
|
|
5
5
|
from geopandas import GeoDataFrame, GeoSeries
|
|
6
|
-
from shapely import STRtree, difference,
|
|
7
|
-
from shapely.errors import GEOSException
|
|
8
|
-
from shapely.geometry import Polygon
|
|
6
|
+
from shapely import STRtree, difference, make_valid, unary_union
|
|
9
7
|
|
|
10
|
-
from .general import _push_geom_col, clean_geoms
|
|
8
|
+
from .general import _determine_geom_type_args, _push_geom_col, clean_geoms
|
|
11
9
|
from .geometry_types import get_geom_type, make_all_singlepart, to_single_geom_type
|
|
12
10
|
from .overlay import clean_overlay
|
|
13
11
|
|
|
@@ -15,7 +13,7 @@ from .overlay import clean_overlay
|
|
|
15
13
|
def update_geometries(
|
|
16
14
|
gdf: GeoDataFrame,
|
|
17
15
|
geom_type: str | None = None,
|
|
18
|
-
keep_geom_type: bool =
|
|
16
|
+
keep_geom_type: bool | None = None,
|
|
19
17
|
grid_size: int | None = None,
|
|
20
18
|
) -> GeoDataFrame:
|
|
21
19
|
"""Puts geometries on top of each other rowwise.
|
|
@@ -80,13 +78,11 @@ def update_geometries(
|
|
|
80
78
|
if len(gdf) <= 1:
|
|
81
79
|
return gdf
|
|
82
80
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
if geom_type == "mixed":
|
|
89
|
-
raise ValueError("Cannot have mixed geometries when keep_geom_type is True")
|
|
81
|
+
gdf = make_all_singlepart(clean_geoms(gdf))
|
|
82
|
+
|
|
83
|
+
gdf, geom_type, keep_geom_type = _determine_geom_type_args(
|
|
84
|
+
gdf, geom_type, keep_geom_type
|
|
85
|
+
)
|
|
90
86
|
|
|
91
87
|
geom_col = gdf._geometry_column_name
|
|
92
88
|
index_mapper = {i: idx for i, idx in enumerate(gdf.index)}
|
|
@@ -100,14 +96,16 @@ def update_geometries(
|
|
|
100
96
|
erasers = (
|
|
101
97
|
pd.Series(gdf.geometry.loc[indices.values].values, index=indices.index)
|
|
102
98
|
.groupby(level=0)
|
|
103
|
-
.agg(unary_union)
|
|
99
|
+
.agg(lambda x: make_valid(unary_union(x, grid_size=grid_size)))
|
|
104
100
|
)
|
|
105
101
|
|
|
106
102
|
# match up the aggregated erasers by index
|
|
107
|
-
erased =
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
103
|
+
erased = make_valid(
|
|
104
|
+
difference(
|
|
105
|
+
gdf.geometry.loc[erasers.index],
|
|
106
|
+
erasers,
|
|
107
|
+
grid_size=grid_size,
|
|
108
|
+
)
|
|
111
109
|
)
|
|
112
110
|
|
|
113
111
|
gdf.loc[erased.index, geom_col] = erased
|
|
@@ -123,7 +121,7 @@ def update_geometries(
|
|
|
123
121
|
|
|
124
122
|
|
|
125
123
|
def get_intersections(
|
|
126
|
-
gdf: GeoDataFrame, geom_type: str | None = None, keep_geom_type: bool =
|
|
124
|
+
gdf: GeoDataFrame, geom_type: str | None = None, keep_geom_type: bool | None = None
|
|
127
125
|
) -> GeoDataFrame:
|
|
128
126
|
"""Find geometries that intersect in a GeoDataFrame.
|
|
129
127
|
|
|
@@ -203,6 +201,10 @@ def get_intersections(
|
|
|
203
201
|
else:
|
|
204
202
|
was_geoseries = False
|
|
205
203
|
|
|
204
|
+
gdf, geom_type, keep_geom_type = _determine_geom_type_args(
|
|
205
|
+
gdf, geom_type, keep_geom_type
|
|
206
|
+
)
|
|
207
|
+
|
|
206
208
|
idx_name = gdf.index.name
|
|
207
209
|
gdf = gdf.assign(orig_idx=gdf.index).reset_index(drop=True)
|
|
208
210
|
|
|
@@ -212,8 +214,10 @@ def get_intersections(
|
|
|
212
214
|
|
|
213
215
|
duplicated_geoms.index = duplicated_geoms["orig_idx"].values
|
|
214
216
|
duplicated_geoms.index.name = idx_name
|
|
217
|
+
|
|
215
218
|
if was_geoseries:
|
|
216
219
|
return duplicated_geoms.geometry
|
|
220
|
+
|
|
217
221
|
return duplicated_geoms.drop(columns="orig_idx")
|
|
218
222
|
|
|
219
223
|
|
sgis/geopandas_tools/general.py
CHANGED
|
@@ -19,6 +19,7 @@ from shapely import (
|
|
|
19
19
|
linestrings,
|
|
20
20
|
make_valid,
|
|
21
21
|
)
|
|
22
|
+
from shapely import points as shapely_points
|
|
22
23
|
from shapely.geometry import LineString, Point
|
|
23
24
|
from shapely.ops import unary_union
|
|
24
25
|
|
|
@@ -304,7 +305,7 @@ def sort_long_first(gdf: GeoDataFrame | GeoSeries) -> GeoDataFrame | GeoSeries:
|
|
|
304
305
|
gdf: A GeoDataFrame or GeoSeries.
|
|
305
306
|
|
|
306
307
|
Returns:
|
|
307
|
-
A GeoDataFrame or GeoSeries sorted from
|
|
308
|
+
A GeoDataFrame or GeoSeries sorted from long to short in length.
|
|
308
309
|
"""
|
|
309
310
|
# using enumerate, then iloc on the sorted dict keys.
|
|
310
311
|
# to avoid creating a temporary area column (which doesn't work for GeoSeries).
|
|
@@ -315,6 +316,39 @@ def sort_long_first(gdf: GeoDataFrame | GeoSeries) -> GeoDataFrame | GeoSeries:
|
|
|
315
316
|
return gdf.iloc[list(sorted_lengths)]
|
|
316
317
|
|
|
317
318
|
|
|
319
|
+
def sort_short_first(gdf: GeoDataFrame | GeoSeries) -> GeoDataFrame | GeoSeries:
|
|
320
|
+
"""Sort GeoDataFrame by length in ascending order.
|
|
321
|
+
|
|
322
|
+
Args:
|
|
323
|
+
gdf: A GeoDataFrame or GeoSeries.
|
|
324
|
+
|
|
325
|
+
Returns:
|
|
326
|
+
A GeoDataFrame or GeoSeries sorted from short to long in length.
|
|
327
|
+
"""
|
|
328
|
+
# using enumerate, then iloc on the sorted dict keys.
|
|
329
|
+
# to avoid creating a temporary area column (which doesn't work for GeoSeries).
|
|
330
|
+
length_mapper = dict(enumerate(gdf.length.values))
|
|
331
|
+
sorted_lengths = dict(sorted(length_mapper.items(), key=lambda item: item[1]))
|
|
332
|
+
return gdf.iloc[list(sorted_lengths)]
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
def sort_small_first(gdf: GeoDataFrame | GeoSeries) -> GeoDataFrame | GeoSeries:
|
|
336
|
+
"""Sort GeoDataFrame by area in ascending order.
|
|
337
|
+
|
|
338
|
+
Args:
|
|
339
|
+
gdf: A GeoDataFrame or GeoSeries.
|
|
340
|
+
|
|
341
|
+
Returns:
|
|
342
|
+
A GeoDataFrame or GeoSeries sorted from small to large in area.
|
|
343
|
+
|
|
344
|
+
"""
|
|
345
|
+
# using enumerate, then iloc on the sorted dict keys.
|
|
346
|
+
# to avoid creating a temporary area column (which doesn't work for GeoSeries).
|
|
347
|
+
area_mapper = dict(enumerate(gdf.area.values))
|
|
348
|
+
sorted_areas = dict(sorted(area_mapper.items(), key=lambda item: item[1]))
|
|
349
|
+
return gdf.iloc[list(sorted_areas)]
|
|
350
|
+
|
|
351
|
+
|
|
318
352
|
def make_lines_between_points(
|
|
319
353
|
arr1: NDArray[Point] | GeometryArray | GeoSeries,
|
|
320
354
|
arr2: NDArray[Point] | GeometryArray | GeoSeries,
|
|
@@ -405,6 +439,28 @@ def random_points(n: int, loc: float | int = 0.5) -> GeoDataFrame:
|
|
|
405
439
|
)
|
|
406
440
|
|
|
407
441
|
|
|
442
|
+
def random_points_in_polygons(gdf: GeoDataFrame, n: int, seed=None) -> GeoDataFrame:
|
|
443
|
+
all_points = []
|
|
444
|
+
|
|
445
|
+
rng = np.random.default_rng(seed)
|
|
446
|
+
|
|
447
|
+
for i, geom in enumerate(gdf.geometry):
|
|
448
|
+
minx, miny, maxx, maxy = geom.bounds
|
|
449
|
+
|
|
450
|
+
xs = rng.uniform(minx, maxx, size=n * 500)
|
|
451
|
+
ys = rng.uniform(miny, maxy, size=n * 500)
|
|
452
|
+
|
|
453
|
+
points = GeoSeries(shapely_points(xs, y=ys), index=[i] * len(xs))
|
|
454
|
+
all_points.append(points)
|
|
455
|
+
|
|
456
|
+
return (
|
|
457
|
+
pd.concat(all_points)
|
|
458
|
+
.loc[lambda x: x.intersects(gdf.geometry)]
|
|
459
|
+
.groupby(level=0)
|
|
460
|
+
.head(n)
|
|
461
|
+
)
|
|
462
|
+
|
|
463
|
+
|
|
408
464
|
def to_lines(*gdfs: GeoDataFrame, copy: bool = True) -> GeoDataFrame:
|
|
409
465
|
"""Makes lines out of one or more GeoDataFrames and splits them at intersections.
|
|
410
466
|
|
|
@@ -527,7 +583,7 @@ def to_lines(*gdfs: GeoDataFrame, copy: bool = True) -> GeoDataFrame:
|
|
|
527
583
|
def clean_clip(
|
|
528
584
|
gdf: GeoDataFrame | GeoSeries,
|
|
529
585
|
mask: GeoDataFrame | GeoSeries | Geometry,
|
|
530
|
-
keep_geom_type: bool =
|
|
586
|
+
keep_geom_type: bool | None = None,
|
|
531
587
|
geom_type: str | None = None,
|
|
532
588
|
**kwargs,
|
|
533
589
|
) -> GeoDataFrame | GeoSeries:
|
|
@@ -540,6 +596,12 @@ def clean_clip(
|
|
|
540
596
|
Args:
|
|
541
597
|
gdf: GeoDataFrame or GeoSeries to be clipped
|
|
542
598
|
mask: the geometry to clip gdf
|
|
599
|
+
geom_type: Optionally specify what geometry type to keep.,
|
|
600
|
+
if there are mixed geometry types. Must be either "polygon",
|
|
601
|
+
"line" or "point".
|
|
602
|
+
keep_geom_type: Defaults to None, meaning True if 'geom_type' is given
|
|
603
|
+
and True if the geometries are single-typed and False if the geometries
|
|
604
|
+
are mixed.
|
|
543
605
|
**kwargs: Keyword arguments passed to geopandas.GeoDataFrame.clip
|
|
544
606
|
|
|
545
607
|
Returns:
|
|
@@ -551,12 +613,9 @@ def clean_clip(
|
|
|
551
613
|
if not isinstance(gdf, (GeoDataFrame, GeoSeries)):
|
|
552
614
|
raise TypeError(f"'gdf' should be GeoDataFrame or GeoSeries, got {type(gdf)}")
|
|
553
615
|
|
|
554
|
-
|
|
555
|
-
geom_type
|
|
556
|
-
|
|
557
|
-
raise ValueError(
|
|
558
|
-
"Mixed geometry types is not allowed when keep_geom_type is True."
|
|
559
|
-
)
|
|
616
|
+
gdf, geom_type, keep_geom_type = _determine_geom_type_args(
|
|
617
|
+
gdf, geom_type, keep_geom_type
|
|
618
|
+
)
|
|
560
619
|
|
|
561
620
|
try:
|
|
562
621
|
gdf = gdf.clip(mask, **kwargs).pipe(clean_geoms)
|
|
@@ -569,7 +628,26 @@ def clean_clip(
|
|
|
569
628
|
|
|
570
629
|
return gdf.clip(mask, **kwargs).pipe(clean_geoms)
|
|
571
630
|
|
|
572
|
-
if
|
|
631
|
+
if keep_geom_type:
|
|
573
632
|
gdf = to_single_geom_type(gdf, geom_type)
|
|
574
633
|
|
|
575
634
|
return gdf
|
|
635
|
+
|
|
636
|
+
|
|
637
|
+
def _determine_geom_type_args(
|
|
638
|
+
gdf: GeoDataFrame, geom_type: str | None, keep_geom_type: bool | None
|
|
639
|
+
) -> tuple[GeoDataFrame, str, bool]:
|
|
640
|
+
if geom_type:
|
|
641
|
+
gdf = to_single_geom_type(gdf, geom_type)
|
|
642
|
+
keep_geom_type = True
|
|
643
|
+
elif keep_geom_type is None:
|
|
644
|
+
geom_type = get_geom_type(gdf)
|
|
645
|
+
if geom_type == "mixed":
|
|
646
|
+
keep_geom_type = False
|
|
647
|
+
else:
|
|
648
|
+
keep_geom_type = True
|
|
649
|
+
elif keep_geom_type:
|
|
650
|
+
geom_type = get_geom_type(gdf)
|
|
651
|
+
if geom_type == "mixed":
|
|
652
|
+
raise ValueError("Cannot set keep_geom_type=True with mixed geometries")
|
|
653
|
+
return gdf, geom_type, keep_geom_type
|
sgis/geopandas_tools/overlay.py
CHANGED
|
@@ -7,6 +7,8 @@ version of the solution from GH 2792.
|
|
|
7
7
|
'clean_overlay' also includes the overlay type "update", which can be specified in the
|
|
8
8
|
"how" parameter, in addition to the five native geopandas how-s.
|
|
9
9
|
"""
|
|
10
|
+
import functools
|
|
11
|
+
|
|
10
12
|
import geopandas as gpd
|
|
11
13
|
import numpy as np
|
|
12
14
|
import pandas as pd
|
|
@@ -409,9 +411,11 @@ def _shapely_diffclip_left(pairs, df1, grid_size):
|
|
|
409
411
|
"""Aggregate areas in right by unique values of left, then use those to clip
|
|
410
412
|
areas out of left"""
|
|
411
413
|
|
|
414
|
+
agg_geoms_partial = functools.partial(agg_geoms, grid_size=grid_size)
|
|
415
|
+
|
|
412
416
|
clip_left = pairs.groupby(level=0).agg(
|
|
413
417
|
{
|
|
414
|
-
"geom_right":
|
|
418
|
+
"geom_right": agg_geoms_partial,
|
|
415
419
|
**{
|
|
416
420
|
c: "first"
|
|
417
421
|
for c in df1.columns
|
|
@@ -433,12 +437,14 @@ def _shapely_diffclip_left(pairs, df1, grid_size):
|
|
|
433
437
|
|
|
434
438
|
|
|
435
439
|
def _shapely_diffclip_right(pairs, df1, df2, grid_size, rsuffix):
|
|
440
|
+
agg_geoms_partial = functools.partial(agg_geoms, grid_size=grid_size)
|
|
441
|
+
|
|
436
442
|
clip_right = (
|
|
437
443
|
pairs.rename(columns={"geometry": "geom_left", "geom_right": "geometry"})
|
|
438
444
|
.groupby(by="_overlay_index_right")
|
|
439
445
|
.agg(
|
|
440
446
|
{
|
|
441
|
-
"geom_left":
|
|
447
|
+
"geom_left": agg_geoms_partial,
|
|
442
448
|
"geometry": "first",
|
|
443
449
|
}
|
|
444
450
|
)
|
|
@@ -479,5 +485,7 @@ def _try_difference(left, right, grid_size):
|
|
|
479
485
|
)
|
|
480
486
|
|
|
481
487
|
|
|
482
|
-
def agg_geoms(g):
|
|
483
|
-
return
|
|
488
|
+
def agg_geoms(g, grid_size=None):
|
|
489
|
+
return (
|
|
490
|
+
make_valid(unary_union(g, grid_size=grid_size)) if len(g) > 1 else make_valid(g)
|
|
491
|
+
)
|
|
@@ -203,6 +203,7 @@ def eliminate_by_longest(
|
|
|
203
203
|
fix_double: bool = True,
|
|
204
204
|
ignore_index: bool = False,
|
|
205
205
|
aggfunc: str | dict | list | None = None,
|
|
206
|
+
grid_size=None,
|
|
206
207
|
**kwargs,
|
|
207
208
|
) -> GeoDataFrame | tuple[GeoDataFrame]:
|
|
208
209
|
"""Dissolves selected polygons with the longest bordering neighbor polygon.
|
|
@@ -236,6 +237,33 @@ def eliminate_by_longest(
|
|
|
236
237
|
Returns:
|
|
237
238
|
The GeoDataFrame (gdf) with the geometries of 'to_eliminate' dissolved in.
|
|
238
239
|
If multiple GeoDataFrame are passed as 'gdf', they are returned as a tuple.
|
|
240
|
+
|
|
241
|
+
Examples
|
|
242
|
+
--------
|
|
243
|
+
|
|
244
|
+
Create two polygons with a sliver in between:
|
|
245
|
+
|
|
246
|
+
>>> sliver = sg.to_gdf(Polygon([(0, 0), (0.1, 1), (0, 2), (-0.1, 1)]))
|
|
247
|
+
>>> small_poly = sg.to_gdf(
|
|
248
|
+
... Polygon([(0, 0), (-0.1, 1), (0, 2), (-1, 2), (-2, 2), (-1, 1)])
|
|
249
|
+
... )
|
|
250
|
+
>>> large_poly = sg.to_gdf(
|
|
251
|
+
... Polygon([(0, 0), (0.1, 1), (1, 2), (2, 2), (3, 2), (3, 0)])
|
|
252
|
+
... )
|
|
253
|
+
|
|
254
|
+
Using multiple GeoDataFrame as input, the sliver is eliminated into the small
|
|
255
|
+
polygon (because it has the longest border with sliver).
|
|
256
|
+
|
|
257
|
+
>>> small_poly_eliminated, large_poly_eliminated = sg.eliminate_by_longest(
|
|
258
|
+
... [small_poly, large_poly], sliver
|
|
259
|
+
... )
|
|
260
|
+
|
|
261
|
+
With only one input GeoDataFrame:
|
|
262
|
+
|
|
263
|
+
>>> polys = pd.concat([small_poly, large_poly])
|
|
264
|
+
>>> eliminated = sg.eliminate_by_longest(polys, sliver)
|
|
265
|
+
|
|
266
|
+
|
|
239
267
|
"""
|
|
240
268
|
if isinstance(gdf, (list, tuple)):
|
|
241
269
|
# concat, then break up the dataframes in the end
|
|
@@ -297,6 +325,7 @@ def eliminate_by_longest(
|
|
|
297
325
|
aggfunc,
|
|
298
326
|
crs,
|
|
299
327
|
fix_double,
|
|
328
|
+
grid_size=grid_size,
|
|
300
329
|
**kwargs,
|
|
301
330
|
)
|
|
302
331
|
|
|
@@ -341,6 +370,7 @@ def eliminate_by_largest(
|
|
|
341
370
|
ignore_index: bool = False,
|
|
342
371
|
aggfunc: str | dict | list | None = None,
|
|
343
372
|
predicate: str = "intersects",
|
|
373
|
+
grid_size=None,
|
|
344
374
|
**kwargs,
|
|
345
375
|
) -> GeoDataFrame | tuple[GeoDataFrame]:
|
|
346
376
|
"""Dissolves selected polygons with the largest neighbor polygon.
|
|
@@ -374,6 +404,31 @@ def eliminate_by_largest(
|
|
|
374
404
|
The GeoDataFrame (gdf) with the geometries of 'to_eliminate' dissolved in.
|
|
375
405
|
If multiple GeoDataFrame are passed as 'gdf', they are returned as a tuple.
|
|
376
406
|
|
|
407
|
+
Examples
|
|
408
|
+
--------
|
|
409
|
+
|
|
410
|
+
Create two polygons with a sliver in between:
|
|
411
|
+
|
|
412
|
+
>>> sliver = sg.to_gdf(Polygon([(0, 0), (0.1, 1), (0, 2), (-0.1, 1)]))
|
|
413
|
+
>>> small_poly = sg.to_gdf(
|
|
414
|
+
... Polygon([(0, 0), (-0.1, 1), (0, 2), (-1, 2), (-2, 2), (-1, 1)])
|
|
415
|
+
... )
|
|
416
|
+
>>> large_poly = sg.to_gdf(
|
|
417
|
+
... Polygon([(0, 0), (0.1, 1), (1, 2), (2, 2), (3, 2), (3, 0)])
|
|
418
|
+
... )
|
|
419
|
+
|
|
420
|
+
Using multiple GeoDataFrame as input, the sliver is eliminated into
|
|
421
|
+
the large polygon.
|
|
422
|
+
|
|
423
|
+
>>> small_poly_eliminated, large_poly_eliminated = sg.eliminate_by_largest(
|
|
424
|
+
... [small_poly, large_poly], sliver
|
|
425
|
+
... )
|
|
426
|
+
|
|
427
|
+
With only one input GeoDataFrame:
|
|
428
|
+
|
|
429
|
+
>>> polys = pd.concat([small_poly, large_poly])
|
|
430
|
+
>>> eliminated = sg.eliminate_by_largest(polys, sliver)
|
|
431
|
+
|
|
377
432
|
"""
|
|
378
433
|
return _eliminate_by_area(
|
|
379
434
|
gdf,
|
|
@@ -385,6 +440,7 @@ def eliminate_by_largest(
|
|
|
385
440
|
aggfunc=aggfunc,
|
|
386
441
|
predicate=predicate,
|
|
387
442
|
fix_double=fix_double,
|
|
443
|
+
grid_size=grid_size,
|
|
388
444
|
**kwargs,
|
|
389
445
|
)
|
|
390
446
|
|
|
@@ -399,6 +455,7 @@ def eliminate_by_smallest(
|
|
|
399
455
|
aggfunc: str | dict | list | None = None,
|
|
400
456
|
predicate: str = "intersects",
|
|
401
457
|
fix_double: bool = False,
|
|
458
|
+
grid_size=None,
|
|
402
459
|
**kwargs,
|
|
403
460
|
) -> GeoDataFrame | tuple[GeoDataFrame]:
|
|
404
461
|
return _eliminate_by_area(
|
|
@@ -411,6 +468,7 @@ def eliminate_by_smallest(
|
|
|
411
468
|
aggfunc=aggfunc,
|
|
412
469
|
predicate=predicate,
|
|
413
470
|
fix_double=fix_double,
|
|
471
|
+
grid_size=grid_size,
|
|
414
472
|
**kwargs,
|
|
415
473
|
)
|
|
416
474
|
|
|
@@ -425,6 +483,7 @@ def _eliminate_by_area(
|
|
|
425
483
|
aggfunc: str | dict | list | None = None,
|
|
426
484
|
predicate="intersects",
|
|
427
485
|
fix_double: bool = False,
|
|
486
|
+
grid_size=None,
|
|
428
487
|
**kwargs,
|
|
429
488
|
) -> GeoDataFrame:
|
|
430
489
|
if isinstance(gdf, (list, tuple)):
|
|
@@ -468,7 +527,9 @@ def _eliminate_by_area(
|
|
|
468
527
|
|
|
469
528
|
notna = joined.loc[lambda x: x["_dissolve_idx"].notna()]
|
|
470
529
|
|
|
471
|
-
eliminated = _eliminate(
|
|
530
|
+
eliminated = _eliminate(
|
|
531
|
+
gdf, notna, aggfunc, crs, fix_double=fix_double, grid_size=grid_size, **kwargs
|
|
532
|
+
)
|
|
472
533
|
|
|
473
534
|
if not ignore_index:
|
|
474
535
|
eliminated.index = eliminated.index.map(idx_mapper)
|
|
@@ -503,7 +564,7 @@ def _eliminate_by_area(
|
|
|
503
564
|
return gdfs
|
|
504
565
|
|
|
505
566
|
|
|
506
|
-
def _eliminate(gdf, to_eliminate, aggfunc, crs, fix_double, **kwargs):
|
|
567
|
+
def _eliminate(gdf, to_eliminate, aggfunc, crs, fix_double, grid_size, **kwargs):
|
|
507
568
|
if not len(to_eliminate):
|
|
508
569
|
return gdf
|
|
509
570
|
|
|
@@ -660,8 +721,12 @@ def _eliminate(gdf, to_eliminate, aggfunc, crs, fix_double, **kwargs):
|
|
|
660
721
|
|
|
661
722
|
# allign and aggregate by dissolve index to not get duplicates in difference
|
|
662
723
|
intersecting.index = soon_erased.index
|
|
663
|
-
soon_erased = soon_erased.geometry.groupby(level=0).agg(
|
|
664
|
-
|
|
724
|
+
soon_erased = soon_erased.geometry.groupby(level=0).agg(
|
|
725
|
+
lambda x: unary_union(x, grid_size=grid_size)
|
|
726
|
+
)
|
|
727
|
+
intersecting = intersecting.groupby(level=0).agg(
|
|
728
|
+
lambda x: unary_union(x, grid_size=grid_size)
|
|
729
|
+
)
|
|
665
730
|
|
|
666
731
|
# from ..maps.maps import explore_locals
|
|
667
732
|
# explore_locals()
|
|
@@ -674,12 +739,16 @@ def _eliminate(gdf, to_eliminate, aggfunc, crs, fix_double, **kwargs):
|
|
|
674
739
|
eliminated["geometry"] = (
|
|
675
740
|
pd.concat([eliminators, soon_erased, missing])
|
|
676
741
|
.groupby(level=0)
|
|
677
|
-
.agg(
|
|
742
|
+
.agg(
|
|
743
|
+
lambda x: make_valid(
|
|
744
|
+
unary_union(x.dropna().values, grid_size=grid_size)
|
|
745
|
+
)
|
|
746
|
+
)
|
|
678
747
|
)
|
|
679
748
|
|
|
680
749
|
else:
|
|
681
750
|
eliminated["geometry"] = many_hits.groupby("_dissolve_idx")["geometry"].agg(
|
|
682
|
-
lambda x: make_valid(unary_union(x.values))
|
|
751
|
+
lambda x: make_valid(unary_union(x.values, grid_size=grid_size))
|
|
683
752
|
)
|
|
684
753
|
|
|
685
754
|
# setting crs on the GeometryArrays to avoid warning in concat
|
|
@@ -973,7 +1042,11 @@ def _close_all_holes_no_islands(poly, all_geoms):
|
|
|
973
1042
|
return make_valid(unary_union(holes_closed))
|
|
974
1043
|
|
|
975
1044
|
|
|
976
|
-
def get_gaps(
|
|
1045
|
+
def get_gaps(
|
|
1046
|
+
gdf: GeoDataFrame,
|
|
1047
|
+
include_interiors: bool = False,
|
|
1048
|
+
grid_size: float | int | None = None,
|
|
1049
|
+
) -> GeoDataFrame:
|
|
977
1050
|
"""Get the gaps between polygons.
|
|
978
1051
|
|
|
979
1052
|
Args:
|
|
@@ -998,7 +1071,9 @@ def get_gaps(gdf: GeoDataFrame, include_interiors: bool = False) -> GeoDataFrame
|
|
|
998
1071
|
)
|
|
999
1072
|
|
|
1000
1073
|
bbox_diff = make_all_singlepart(
|
|
1001
|
-
clean_overlay(
|
|
1074
|
+
clean_overlay(
|
|
1075
|
+
bbox, gdf, how="difference", geom_type="polygon", grid_size=grid_size
|
|
1076
|
+
)
|
|
1002
1077
|
)
|
|
1003
1078
|
|
|
1004
1079
|
# remove the outer "gap", i.e. the surrounding area
|
sgis/geopandas_tools/sfilter.py
CHANGED
|
@@ -11,59 +11,6 @@ from .conversion import to_gdf
|
|
|
11
11
|
gdf_type_error_message = "'gdf' should be of type GeoDataFrame or GeoSeries."
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def _get_sfilter_indices(
|
|
15
|
-
left: GeoDataFrame | GeoSeries,
|
|
16
|
-
right: GeoDataFrame | GeoSeries | Geometry,
|
|
17
|
-
predicate: str,
|
|
18
|
-
) -> np.ndarray:
|
|
19
|
-
"""Compute geometric comparisons and get matching indices.
|
|
20
|
-
|
|
21
|
-
Taken from:
|
|
22
|
-
geopandas.tools.sjoin._geom_predicate_query
|
|
23
|
-
|
|
24
|
-
Parameters
|
|
25
|
-
----------
|
|
26
|
-
left : GeoDataFrame
|
|
27
|
-
right : GeoDataFrame
|
|
28
|
-
predicate : string
|
|
29
|
-
Binary predicate to query.
|
|
30
|
-
|
|
31
|
-
Returns
|
|
32
|
-
-------
|
|
33
|
-
DataFrame
|
|
34
|
-
DataFrame with matching indices in
|
|
35
|
-
columns named `_key_left` and `_key_right`.
|
|
36
|
-
"""
|
|
37
|
-
original_predicate = predicate
|
|
38
|
-
|
|
39
|
-
with warnings.catch_warnings():
|
|
40
|
-
# We don't need to show our own warning here
|
|
41
|
-
# TODO remove this once the deprecation has been enforced
|
|
42
|
-
warnings.filterwarnings(
|
|
43
|
-
"ignore", "Generated spatial index is empty", FutureWarning
|
|
44
|
-
)
|
|
45
|
-
|
|
46
|
-
if predicate == "within":
|
|
47
|
-
# within is implemented as the inverse of contains
|
|
48
|
-
# contains is a faster predicate
|
|
49
|
-
# see discussion at https://github.com/geopandas/geopandas/pull/1421
|
|
50
|
-
predicate = "contains"
|
|
51
|
-
sindex = left.sindex
|
|
52
|
-
input_geoms = right.geometry if isinstance(right, GeoDataFrame) else right
|
|
53
|
-
else:
|
|
54
|
-
# all other predicates are symmetric
|
|
55
|
-
# keep them the same
|
|
56
|
-
sindex = right.sindex
|
|
57
|
-
input_geoms = left.geometry if isinstance(left, GeoDataFrame) else left
|
|
58
|
-
|
|
59
|
-
l_idx, r_idx = sindex.query(input_geoms, predicate=predicate, sort=False)
|
|
60
|
-
|
|
61
|
-
if original_predicate == "within":
|
|
62
|
-
return np.unique(r_idx)
|
|
63
|
-
|
|
64
|
-
return np.unique(l_idx)
|
|
65
|
-
|
|
66
|
-
|
|
67
14
|
def sfilter(
|
|
68
15
|
gdf: GeoDataFrame | GeoSeries,
|
|
69
16
|
other: GeoDataFrame | GeoSeries | Geometry,
|
|
@@ -290,3 +237,56 @@ def _sfilter_checks(other, crs):
|
|
|
290
237
|
raise ValueError("crs mismatch", crs, other.crs) from e
|
|
291
238
|
|
|
292
239
|
return other
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def _get_sfilter_indices(
|
|
243
|
+
left: GeoDataFrame | GeoSeries,
|
|
244
|
+
right: GeoDataFrame | GeoSeries | Geometry,
|
|
245
|
+
predicate: str,
|
|
246
|
+
) -> np.ndarray:
|
|
247
|
+
"""Compute geometric comparisons and get matching indices.
|
|
248
|
+
|
|
249
|
+
Taken from:
|
|
250
|
+
geopandas.tools.sjoin._geom_predicate_query
|
|
251
|
+
|
|
252
|
+
Parameters
|
|
253
|
+
----------
|
|
254
|
+
left : GeoDataFrame
|
|
255
|
+
right : GeoDataFrame
|
|
256
|
+
predicate : string
|
|
257
|
+
Binary predicate to query.
|
|
258
|
+
|
|
259
|
+
Returns
|
|
260
|
+
-------
|
|
261
|
+
DataFrame
|
|
262
|
+
DataFrame with matching indices in
|
|
263
|
+
columns named `_key_left` and `_key_right`.
|
|
264
|
+
"""
|
|
265
|
+
original_predicate = predicate
|
|
266
|
+
|
|
267
|
+
with warnings.catch_warnings():
|
|
268
|
+
# We don't need to show our own warning here
|
|
269
|
+
# TODO remove this once the deprecation has been enforced
|
|
270
|
+
warnings.filterwarnings(
|
|
271
|
+
"ignore", "Generated spatial index is empty", FutureWarning
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
if predicate == "within":
|
|
275
|
+
# within is implemented as the inverse of contains
|
|
276
|
+
# contains is a faster predicate
|
|
277
|
+
# see discussion at https://github.com/geopandas/geopandas/pull/1421
|
|
278
|
+
predicate = "contains"
|
|
279
|
+
sindex = left.sindex
|
|
280
|
+
input_geoms = right.geometry if isinstance(right, GeoDataFrame) else right
|
|
281
|
+
else:
|
|
282
|
+
# all other predicates are symmetric
|
|
283
|
+
# keep them the same
|
|
284
|
+
sindex = right.sindex
|
|
285
|
+
input_geoms = left.geometry if isinstance(left, GeoDataFrame) else left
|
|
286
|
+
|
|
287
|
+
l_idx, r_idx = sindex.query(input_geoms, predicate=predicate, sort=False)
|
|
288
|
+
|
|
289
|
+
if original_predicate == "within":
|
|
290
|
+
return np.unique(r_idx)
|
|
291
|
+
|
|
292
|
+
return np.unique(l_idx)
|
sgis/helpers.py
CHANGED
|
@@ -219,6 +219,14 @@ def sort_nans_last(df, ignore_index: bool = False):
|
|
|
219
219
|
return df.reset_index(drop=True) if ignore_index else df
|
|
220
220
|
|
|
221
221
|
|
|
222
|
+
def is_number(text) -> bool:
|
|
223
|
+
try:
|
|
224
|
+
float(text)
|
|
225
|
+
return True
|
|
226
|
+
except ValueError:
|
|
227
|
+
return False
|
|
228
|
+
|
|
229
|
+
|
|
222
230
|
class LocalFunctionError(ValueError):
|
|
223
231
|
def __init__(self, func: str):
|
|
224
232
|
self.func = func.__name__
|
sgis/io/dapla_functions.py
CHANGED
|
@@ -52,6 +52,8 @@ def read_geopandas(
|
|
|
52
52
|
try:
|
|
53
53
|
return gpd.read_parquet(file, **kwargs)
|
|
54
54
|
except ValueError as e:
|
|
55
|
+
if "Missing geo metadata" not in str(e) and "geometry" not in str(e):
|
|
56
|
+
raise e
|
|
55
57
|
df = dp.read_pandas(gcs_path, **kwargs)
|
|
56
58
|
|
|
57
59
|
if pandas_fallback or not len(df):
|
|
@@ -63,6 +65,8 @@ def read_geopandas(
|
|
|
63
65
|
try:
|
|
64
66
|
return gpd.read_file(file, **kwargs)
|
|
65
67
|
except ValueError as e:
|
|
68
|
+
if "Missing geo metadata" not in str(e) and "geometry" not in str(e):
|
|
69
|
+
raise e
|
|
66
70
|
df = dp.read_pandas(gcs_path, **kwargs)
|
|
67
71
|
|
|
68
72
|
if pandas_fallback or not len(df):
|
|
@@ -75,6 +79,7 @@ def write_geopandas(
|
|
|
75
79
|
df: gpd.GeoDataFrame,
|
|
76
80
|
gcs_path: str | Path,
|
|
77
81
|
overwrite: bool = True,
|
|
82
|
+
pandas_fallback: bool = False,
|
|
78
83
|
fs: Optional[dp.gcs.GCSFileSystem] = None,
|
|
79
84
|
**kwargs,
|
|
80
85
|
) -> None:
|
|
@@ -106,12 +111,10 @@ def write_geopandas(
|
|
|
106
111
|
pd.io.parquet.BaseImpl.validate_dataframe(df)
|
|
107
112
|
|
|
108
113
|
if not len(df):
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
df.drop(df._geometry_column_name, axis=1), gcs_path, **kwargs
|
|
114
|
-
)
|
|
114
|
+
if pandas_fallback:
|
|
115
|
+
df.geometry = df.geometry.astype(str)
|
|
116
|
+
df = pd.DataFrame(df)
|
|
117
|
+
dp.write_pandas(df, gcs_path, **kwargs)
|
|
115
118
|
return
|
|
116
119
|
|
|
117
120
|
fs = dp.FileClient.get_gcs_file_system()
|