ssb-sgis 0.3.9__py3-none-any.whl → 0.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sgis/__init__.py +13 -4
- sgis/geopandas_tools/bounds.py +236 -37
- sgis/geopandas_tools/buffer_dissolve_explode.py +41 -9
- sgis/geopandas_tools/cleaning.py +521 -169
- sgis/geopandas_tools/conversion.py +2 -2
- sgis/geopandas_tools/duplicates.py +22 -18
- sgis/geopandas_tools/general.py +87 -9
- sgis/geopandas_tools/overlay.py +12 -4
- sgis/geopandas_tools/polygon_operations.py +83 -8
- sgis/geopandas_tools/sfilter.py +53 -53
- sgis/helpers.py +8 -0
- sgis/io/dapla_functions.py +9 -6
- sgis/maps/explore.py +76 -1
- sgis/maps/maps.py +11 -8
- {ssb_sgis-0.3.9.dist-info → ssb_sgis-0.3.11.dist-info}/METADATA +1 -4
- {ssb_sgis-0.3.9.dist-info → ssb_sgis-0.3.11.dist-info}/RECORD +18 -18
- {ssb_sgis-0.3.9.dist-info → ssb_sgis-0.3.11.dist-info}/LICENSE +0 -0
- {ssb_sgis-0.3.9.dist-info → ssb_sgis-0.3.11.dist-info}/WHEEL +0 -0
sgis/geopandas_tools/cleaning.py
CHANGED
|
@@ -1,26 +1,45 @@
|
|
|
1
|
+
import re
|
|
1
2
|
import warnings
|
|
2
3
|
|
|
4
|
+
import numpy as np
|
|
3
5
|
import pandas as pd
|
|
6
|
+
import shapely
|
|
4
7
|
from geopandas import GeoDataFrame, GeoSeries
|
|
8
|
+
from geopandas.array import GeometryArray
|
|
5
9
|
from numpy.typing import NDArray
|
|
6
10
|
from shapely import (
|
|
7
11
|
extract_unique_points,
|
|
12
|
+
force_2d,
|
|
8
13
|
get_coordinates,
|
|
9
14
|
get_exterior_ring,
|
|
15
|
+
get_parts,
|
|
10
16
|
linearrings,
|
|
17
|
+
linestrings,
|
|
11
18
|
make_valid,
|
|
12
19
|
polygons,
|
|
13
20
|
)
|
|
14
|
-
from shapely.
|
|
21
|
+
from shapely.errors import GEOSException
|
|
22
|
+
from shapely.geometry import LinearRing, LineString, Point
|
|
15
23
|
|
|
16
24
|
from ..networkanalysis.closing_network_holes import get_angle
|
|
17
25
|
from .buffer_dissolve_explode import buff, dissexp
|
|
18
26
|
from .conversion import coordinate_array, to_geoseries
|
|
19
27
|
from .duplicates import get_intersections, update_geometries
|
|
20
|
-
from .general import
|
|
21
|
-
|
|
28
|
+
from .general import (
|
|
29
|
+
clean_geoms,
|
|
30
|
+
sort_large_first,
|
|
31
|
+
sort_long_first,
|
|
32
|
+
sort_small_first,
|
|
33
|
+
to_lines,
|
|
34
|
+
)
|
|
35
|
+
from .geometry_types import get_geom_type, make_all_singlepart, to_single_geom_type
|
|
22
36
|
from .overlay import clean_overlay
|
|
23
|
-
from .polygon_operations import
|
|
37
|
+
from .polygon_operations import (
|
|
38
|
+
close_all_holes,
|
|
39
|
+
eliminate_by_longest,
|
|
40
|
+
get_cluster_mapper,
|
|
41
|
+
get_gaps,
|
|
42
|
+
)
|
|
24
43
|
from .polygons_as_rings import PolygonsAsRings
|
|
25
44
|
from .sfilter import sfilter, sfilter_inverse
|
|
26
45
|
|
|
@@ -33,120 +52,17 @@ PRECISION = 1e-4
|
|
|
33
52
|
BUFFER_RES = 50
|
|
34
53
|
|
|
35
54
|
|
|
36
|
-
def get_angle_between_indexed_points(point_df: GeoDataFrame):
|
|
37
|
-
""" "Get angle difference between the two lines"""
|
|
38
|
-
|
|
39
|
-
point_df["next"] = point_df.groupby(level=0)["geometry"].shift(-1)
|
|
40
|
-
|
|
41
|
-
notna = point_df["next"].notna()
|
|
42
|
-
|
|
43
|
-
this = coordinate_array(point_df.loc[notna, "geometry"].values)
|
|
44
|
-
next_ = coordinate_array(point_df.loc[notna, "next"].values)
|
|
45
|
-
|
|
46
|
-
point_df.loc[notna, "angle"] = get_angle(this, next_)
|
|
47
|
-
point_df["prev_angle"] = point_df.groupby(level=0)["angle"].shift(1)
|
|
48
|
-
|
|
49
|
-
point_df["angle_diff"] = point_df["angle"] - point_df["prev_angle"]
|
|
50
|
-
|
|
51
|
-
return point_df
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def remove_spikes(gdf: GeoDataFrame, tolerance: int | float) -> GeoDataFrame:
|
|
55
|
-
"""Remove thin spikes in polygons.
|
|
56
|
-
|
|
57
|
-
Note that this function might be slow. Should only be used if nessecary.
|
|
58
|
-
|
|
59
|
-
Args:
|
|
60
|
-
gdf: GeoDataFrame of polygons
|
|
61
|
-
tolerance: distance (usually meters) used as the minimum thickness
|
|
62
|
-
for polygons to be eliminated. Any spike thinner than the tolerance
|
|
63
|
-
will be removed.
|
|
64
|
-
|
|
65
|
-
Returns:
|
|
66
|
-
A GeoDataFrame of polygons without spikes thinner.
|
|
67
|
-
"""
|
|
68
|
-
|
|
69
|
-
def _remove_spikes(geoms: NDArray[LinearRing]) -> NDArray[LinearRing]:
|
|
70
|
-
if not len(geoms):
|
|
71
|
-
return geoms
|
|
72
|
-
geoms = to_geoseries(geoms).reset_index(drop=True)
|
|
73
|
-
|
|
74
|
-
points = (
|
|
75
|
-
extract_unique_points(geoms).explode(index_parts=False).to_frame("geometry")
|
|
76
|
-
)
|
|
77
|
-
|
|
78
|
-
points = get_angle_between_indexed_points(points)
|
|
79
|
-
|
|
80
|
-
indices_with_spikes = points[
|
|
81
|
-
lambda x: (x["angle_diff"] >= 180) & (x["angle_diff"] < 180.01)
|
|
82
|
-
].index.unique()
|
|
83
|
-
|
|
84
|
-
rings_with_spikes = geoms[geoms.index.isin(indices_with_spikes)]
|
|
85
|
-
rings_without_spikes = geoms[~geoms.index.isin(indices_with_spikes)]
|
|
86
|
-
|
|
87
|
-
def to_buffered_rings_without_spikes(x):
|
|
88
|
-
polys = GeoSeries(make_valid(polygons(get_exterior_ring(x))))
|
|
89
|
-
|
|
90
|
-
return (
|
|
91
|
-
polys.buffer(-tolerance, resolution=BUFFER_RES)
|
|
92
|
-
.explode(index_parts=False)
|
|
93
|
-
.pipe(close_all_holes)
|
|
94
|
-
.pipe(get_exterior_ring)
|
|
95
|
-
.buffer(tolerance * 10)
|
|
96
|
-
)
|
|
97
|
-
|
|
98
|
-
buffered = to_buffered_rings_without_spikes(
|
|
99
|
-
rings_with_spikes.buffer(tolerance / 2, resolution=BUFFER_RES)
|
|
100
|
-
)
|
|
101
|
-
|
|
102
|
-
points_without_spikes = (
|
|
103
|
-
extract_unique_points(rings_with_spikes)
|
|
104
|
-
.explode(index_parts=False)
|
|
105
|
-
.loc[lambda x: x.index.isin(sfilter(x, buffered).index)]
|
|
106
|
-
)
|
|
107
|
-
|
|
108
|
-
# linearrings require at least 4 coordinate pairs, or three unique
|
|
109
|
-
points_without_spikes = points_without_spikes.loc[
|
|
110
|
-
lambda x: x.groupby(level=0).size() >= 3
|
|
111
|
-
]
|
|
112
|
-
|
|
113
|
-
# need an index from 0 to n-1 in 'linearrings'
|
|
114
|
-
to_int_index = {
|
|
115
|
-
ring_idx: i
|
|
116
|
-
for i, ring_idx in enumerate(sorted(set(points_without_spikes.index)))
|
|
117
|
-
}
|
|
118
|
-
int_indices = points_without_spikes.index.map(to_int_index)
|
|
119
|
-
|
|
120
|
-
as_lines = pd.Series(
|
|
121
|
-
linearrings(
|
|
122
|
-
get_coordinates(points_without_spikes.geometry.values),
|
|
123
|
-
indices=int_indices,
|
|
124
|
-
),
|
|
125
|
-
index=points_without_spikes.index.unique(),
|
|
126
|
-
)
|
|
127
|
-
as_lines = pd.concat([as_lines, rings_without_spikes])
|
|
128
|
-
|
|
129
|
-
# the missing polygons are thin and/or spiky. Let's remove them
|
|
130
|
-
missing = geoms.loc[~geoms.index.isin(as_lines.index)]
|
|
131
|
-
|
|
132
|
-
missing = pd.Series(
|
|
133
|
-
[None] * len(missing),
|
|
134
|
-
index=missing.index.values,
|
|
135
|
-
)
|
|
136
|
-
|
|
137
|
-
return pd.concat([as_lines, missing]).sort_index()
|
|
138
|
-
|
|
139
|
-
gdf.geometry = (
|
|
140
|
-
PolygonsAsRings(gdf.geometry).apply_numpy_func(_remove_spikes).to_numpy()
|
|
141
|
-
)
|
|
142
|
-
return gdf
|
|
143
|
-
|
|
144
|
-
|
|
145
55
|
def coverage_clean(
|
|
146
56
|
gdf: GeoDataFrame,
|
|
147
57
|
tolerance: int | float,
|
|
148
58
|
duplicate_action: str = "fix",
|
|
149
|
-
|
|
59
|
+
# spike_action: str = "ignore",
|
|
60
|
+
grid_sizes: tuple[None | int] = (
|
|
61
|
+
None,
|
|
62
|
+
# 1e-6,
|
|
63
|
+
# 1e-5,
|
|
64
|
+
# 1e-4,
|
|
65
|
+
),
|
|
150
66
|
) -> GeoDataFrame:
|
|
151
67
|
"""Fix thin gaps, holes, slivers and double surfaces.
|
|
152
68
|
|
|
@@ -174,6 +90,7 @@ def coverage_clean(
|
|
|
174
90
|
and then dissolved into the neighbor polygon with the longest shared border.
|
|
175
91
|
If "error", an Exception is raised if there are any double surfaces thicker
|
|
176
92
|
than the tolerance. If "ignore", double surfaces are kept as is.
|
|
93
|
+
spike_action: Either "fix", "ignore" or "try".
|
|
177
94
|
|
|
178
95
|
Returns:
|
|
179
96
|
A GeoDataFrame with cleaned polygons.
|
|
@@ -185,11 +102,38 @@ def coverage_clean(
|
|
|
185
102
|
if not gdf.index.is_unique:
|
|
186
103
|
gdf = gdf.reset_index(drop=True)
|
|
187
104
|
|
|
188
|
-
gdf =
|
|
105
|
+
gdf = make_all_singlepart(gdf).loc[
|
|
106
|
+
lambda x: x.geom_type.isin(["Polygon", "MultiPolygon"])
|
|
107
|
+
]
|
|
189
108
|
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
109
|
+
gdf = clean_geoms(gdf)
|
|
110
|
+
|
|
111
|
+
gdf.geometry = shapely.simplify(gdf.geometry, PRECISION)
|
|
112
|
+
|
|
113
|
+
gdf = (
|
|
114
|
+
clean_geoms(gdf)
|
|
115
|
+
.pipe(make_all_singlepart)
|
|
116
|
+
.loc[lambda x: x.geom_type.isin(["Polygon", "MultiPolygon"])]
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
try:
|
|
120
|
+
gaps = get_gaps(gdf, include_interiors=True)
|
|
121
|
+
except GEOSException:
|
|
122
|
+
for i, grid_size in enumerate(grid_sizes):
|
|
123
|
+
try:
|
|
124
|
+
gaps = get_gaps(gdf, include_interiors=True, grid_size=grid_size)
|
|
125
|
+
break
|
|
126
|
+
except GEOSException as e:
|
|
127
|
+
if i == len(grid_sizes) - 1:
|
|
128
|
+
explore_geosexception(e, gdf)
|
|
129
|
+
raise e
|
|
130
|
+
|
|
131
|
+
if duplicate_action == "ignore":
|
|
132
|
+
double = GeoDataFrame({"geometry": []}, crs=gdf.crs)
|
|
133
|
+
double["_double_idx"] = None
|
|
134
|
+
else:
|
|
135
|
+
double = get_intersections(gdf)
|
|
136
|
+
double["_double_idx"] = range(len(double))
|
|
193
137
|
|
|
194
138
|
gdf, slivers = split_out_slivers(gdf, tolerance)
|
|
195
139
|
|
|
@@ -200,21 +144,41 @@ def coverage_clean(
|
|
|
200
144
|
all_are_thin = double["_double_idx"].isin(thin_gaps_and_double["_double_idx"]).all()
|
|
201
145
|
|
|
202
146
|
if not all_are_thin and duplicate_action == "fix":
|
|
203
|
-
gdf, thin_gaps_and_double = _properly_fix_duplicates(
|
|
147
|
+
gdf, thin_gaps_and_double, slivers = _properly_fix_duplicates(
|
|
204
148
|
gdf, double, slivers, thin_gaps_and_double, tolerance
|
|
205
149
|
)
|
|
206
150
|
|
|
207
|
-
# gaps = pd.concat([gaps, more_gaps], ignore_index=True)
|
|
208
|
-
# double = pd.concat([double, more_double], ignore_index=True)
|
|
209
151
|
elif not all_are_thin and duplicate_action == "error":
|
|
210
152
|
raise ValueError("Large double surfaces.")
|
|
211
153
|
|
|
212
|
-
to_eliminate = pd.concat([thin_gaps_and_double, slivers], ignore_index=True)
|
|
213
|
-
|
|
214
|
-
|
|
154
|
+
to_eliminate = pd.concat([thin_gaps_and_double, slivers], ignore_index=True)
|
|
155
|
+
to_eliminate.geometry = shapely.simplify(to_eliminate.geometry, PRECISION)
|
|
156
|
+
|
|
157
|
+
# eliminate super-thin slivers causing weird geometries
|
|
158
|
+
is_thin = to_eliminate.buffer(-PRECISION).is_empty
|
|
159
|
+
thick, thin = to_eliminate[~is_thin], to_eliminate[is_thin]
|
|
160
|
+
for i, grid_size in enumerate(grid_sizes):
|
|
161
|
+
try:
|
|
162
|
+
to_eliminate = eliminate_by_longest(
|
|
163
|
+
thick,
|
|
164
|
+
thin,
|
|
165
|
+
remove_isolated=False,
|
|
166
|
+
ignore_index=True,
|
|
167
|
+
grid_size=grid_size,
|
|
168
|
+
)
|
|
169
|
+
break
|
|
170
|
+
except GEOSException as e:
|
|
171
|
+
if i == len(grid_sizes) - 1:
|
|
172
|
+
explore_geosexception(e, gdf, thick, thin)
|
|
173
|
+
raise e
|
|
174
|
+
|
|
175
|
+
to_eliminate = to_eliminate.loc[lambda x: ~x.buffer(-PRECISION / 10).is_empty]
|
|
176
|
+
|
|
215
177
|
to_eliminate["_eliminate_idx"] = range(len(to_eliminate))
|
|
216
178
|
gdf["_poly_idx"] = range(len(gdf))
|
|
217
179
|
|
|
180
|
+
to_eliminate["_cluster"] = get_cluster_mapper(to_eliminate.buffer(PRECISION))
|
|
181
|
+
|
|
218
182
|
gdf_geoms_idx = gdf[["_poly_idx", "geometry"]]
|
|
219
183
|
|
|
220
184
|
joined = to_eliminate.sjoin(gdf_geoms_idx, how="left")
|
|
@@ -227,35 +191,86 @@ def coverage_clean(
|
|
|
227
191
|
buff(gdf_geoms_idx, tolerance, resolution=BUFFER_RES),
|
|
228
192
|
geom_type="polygon",
|
|
229
193
|
)
|
|
230
|
-
.pipe(
|
|
194
|
+
.pipe(sort_large_first)
|
|
231
195
|
.drop_duplicates("_eliminate_idx")
|
|
232
196
|
.set_index("_eliminate_idx")["_poly_idx"]
|
|
233
197
|
)
|
|
234
198
|
intersecting["_poly_idx"] = intersecting["_eliminate_idx"].map(poly_idx_mapper)
|
|
235
|
-
without_double = update_geometries(intersecting).drop(
|
|
236
|
-
columns=["_eliminate_idx", "_double_idx", "index_right"]
|
|
237
|
-
)
|
|
238
199
|
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
200
|
+
for i, grid_size in enumerate(grid_sizes):
|
|
201
|
+
try:
|
|
202
|
+
without_double = update_geometries(
|
|
203
|
+
intersecting, geom_type="polygon", grid_size=grid_size
|
|
204
|
+
).drop(columns=["_eliminate_idx", "_double_idx", "index_right"])
|
|
205
|
+
break
|
|
206
|
+
except GEOSException as e:
|
|
207
|
+
intersecting.geometry = shapely.simplify(
|
|
208
|
+
intersecting.geometry, PRECISION * (10 * i + 1)
|
|
209
|
+
)
|
|
210
|
+
if i == len(grid_sizes) - 1:
|
|
211
|
+
explore_geosexception(e, gdf, intersecting, isolated)
|
|
212
|
+
raise e
|
|
213
|
+
|
|
214
|
+
not_really_isolated = isolated.drop(
|
|
215
|
+
columns=[
|
|
216
|
+
"_double_idx",
|
|
217
|
+
"index_right",
|
|
218
|
+
]
|
|
219
|
+
).merge(without_double, on="_cluster", how="inner")
|
|
220
|
+
|
|
221
|
+
really_isolated = isolated.loc[
|
|
222
|
+
lambda x: ~x["_eliminate_idx"].isin(not_really_isolated["_eliminate_idx"])
|
|
223
|
+
]
|
|
224
|
+
|
|
225
|
+
really_isolated["_poly_idx"] = (
|
|
226
|
+
really_isolated["_cluster"] + gdf["_poly_idx"].max() + 1
|
|
243
227
|
)
|
|
244
228
|
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
229
|
+
for i, grid_size in enumerate(grid_sizes):
|
|
230
|
+
try:
|
|
231
|
+
cleaned = (
|
|
232
|
+
dissexp(
|
|
233
|
+
pd.concat([gdf, without_double, isolated, really_isolated]).drop(
|
|
234
|
+
columns=[
|
|
235
|
+
"_cluster",
|
|
236
|
+
"_eliminate_idx",
|
|
237
|
+
"index_right",
|
|
238
|
+
"_double_idx",
|
|
239
|
+
],
|
|
240
|
+
errors="ignore",
|
|
241
|
+
),
|
|
242
|
+
by="_poly_idx",
|
|
243
|
+
aggfunc="first",
|
|
244
|
+
dropna=True,
|
|
245
|
+
grid_size=grid_size,
|
|
246
|
+
)
|
|
247
|
+
.sort_index()
|
|
248
|
+
.reset_index(drop=True)
|
|
249
|
+
# .loc[lambda x: ~x.buffer(-PRECISION / 10).is_empty]
|
|
250
|
+
)
|
|
251
|
+
break
|
|
252
|
+
except GEOSException as e:
|
|
253
|
+
if i == len(grid_sizes) - 1:
|
|
254
|
+
explore_geosexception(e, gdf, without_double, isolated, really_isolated)
|
|
255
|
+
raise e
|
|
256
|
+
|
|
257
|
+
cleaned.geometry = shapely.make_valid(shapely.simplify(cleaned.geometry, PRECISION))
|
|
258
|
+
|
|
259
|
+
for i, grid_size in enumerate(grid_sizes):
|
|
260
|
+
try:
|
|
261
|
+
cleaned = update_geometries(
|
|
262
|
+
cleaned, geom_type="polygon", grid_size=grid_size
|
|
263
|
+
)
|
|
264
|
+
break
|
|
265
|
+
except GEOSException as e:
|
|
266
|
+
cleaned.geometry = shapely.simplify(
|
|
267
|
+
cleaned.geometry, PRECISION * (10 * i + 1)
|
|
268
|
+
)
|
|
269
|
+
if i == len(grid_sizes) - 1:
|
|
270
|
+
explore_geosexception(
|
|
271
|
+
e, gdf, cleaned, without_double, isolated, really_isolated
|
|
272
|
+
)
|
|
273
|
+
raise e
|
|
259
274
|
|
|
260
275
|
missing_indices: pd.Index = sfilter_inverse(
|
|
261
276
|
gdf.representative_point(), cleaned
|
|
@@ -268,30 +283,228 @@ def coverage_clean(
|
|
|
268
283
|
geom_type="polygon",
|
|
269
284
|
)
|
|
270
285
|
|
|
271
|
-
|
|
286
|
+
cleaned = pd.concat([cleaned, missing], ignore_index=True)
|
|
287
|
+
cleaned.geometry = shapely.make_valid(shapely.simplify(cleaned.geometry, PRECISION))
|
|
272
288
|
|
|
289
|
+
return cleaned
|
|
273
290
|
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
291
|
+
|
|
292
|
+
def split_spiky_polygons(
|
|
293
|
+
gdf: GeoDataFrame, tolerance: int | float, grid_sizes: tuple[None | int] = (None,)
|
|
294
|
+
) -> GeoDataFrame:
|
|
295
|
+
if not len(gdf):
|
|
296
|
+
return gdf
|
|
297
|
+
|
|
298
|
+
gdf = to_single_geom_type(make_all_singlepart(gdf), "polygon")
|
|
299
|
+
|
|
300
|
+
if not gdf.index.is_unique:
|
|
301
|
+
gdf = gdf.reset_index(drop=True)
|
|
302
|
+
|
|
303
|
+
polygons_without_spikes = gdf.buffer(tolerance / 2, join_style=2).buffer(
|
|
304
|
+
-tolerance / 2, join_style=2
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
donuts_around_polygons = to_lines(
|
|
308
|
+
polygons_without_spikes.to_frame("geometry")
|
|
309
|
+
).pipe(buff, 1e-3, copy=False)
|
|
310
|
+
|
|
311
|
+
def remove_spikes(df):
|
|
312
|
+
df = df.to_frame("geometry")
|
|
313
|
+
df["_ring_idx"] = range(len(df))
|
|
314
|
+
df = df.reset_index(drop=True)
|
|
315
|
+
|
|
316
|
+
points = df.copy()
|
|
317
|
+
points.geometry = extract_unique_points(points.geometry)
|
|
318
|
+
points = points.explode(index_parts=False)
|
|
319
|
+
points["_idx"] = range(len(points))
|
|
320
|
+
|
|
321
|
+
not_spikes = points.sjoin(donuts_around_polygons).loc[
|
|
322
|
+
lambda x: x["_ring_idx"] == x["index_right"]
|
|
323
|
+
]
|
|
324
|
+
can_be_polygons = not_spikes.iloc[
|
|
325
|
+
(not_spikes.groupby("_ring_idx").transform("size") >= 3).values
|
|
284
326
|
]
|
|
285
|
-
|
|
286
|
-
|
|
327
|
+
|
|
328
|
+
without_spikes = (
|
|
329
|
+
can_be_polygons.sort_values("_idx")
|
|
330
|
+
.groupby("_ring_idx")["geometry"]
|
|
331
|
+
.agg(LinearRing)
|
|
287
332
|
)
|
|
288
|
-
if all_are_thin:
|
|
289
|
-
return gdf, thin_gaps_and_double
|
|
290
333
|
|
|
291
|
-
|
|
292
|
-
|
|
334
|
+
missing = df[~df["_ring_idx"].isin(without_spikes.index)].geometry
|
|
335
|
+
|
|
336
|
+
return pd.concat([without_spikes, missing]).sort_index()
|
|
337
|
+
|
|
338
|
+
without_spikes = GeoDataFrame(
|
|
339
|
+
{
|
|
340
|
+
"geometry": PolygonsAsRings(gdf.geometry)
|
|
341
|
+
.apply_geoseries_func(remove_spikes)
|
|
342
|
+
.to_numpy()
|
|
343
|
+
},
|
|
344
|
+
crs=gdf.crs,
|
|
345
|
+
).pipe(to_single_geom_type, "polygon")
|
|
346
|
+
|
|
347
|
+
is_thin = without_spikes.buffer(-tolerance / 2).is_empty
|
|
348
|
+
without_spikes = pd.concat(
|
|
349
|
+
[
|
|
350
|
+
split_by_neighbors(
|
|
351
|
+
without_spikes[is_thin], without_spikes, tolerance=tolerance
|
|
352
|
+
),
|
|
353
|
+
without_spikes[~is_thin],
|
|
354
|
+
]
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
for _ in range(2):
|
|
358
|
+
for i, grid_size in enumerate(grid_sizes):
|
|
359
|
+
try:
|
|
360
|
+
without_spikes = update_geometries(
|
|
361
|
+
sort_small_first(without_spikes), geom_type="polygon"
|
|
362
|
+
)
|
|
363
|
+
break
|
|
364
|
+
except GEOSException as e:
|
|
365
|
+
if i == len(grid_sizes) - 1:
|
|
366
|
+
raise e
|
|
367
|
+
|
|
368
|
+
for i, grid_size in enumerate(grid_sizes):
|
|
369
|
+
try:
|
|
370
|
+
return clean_overlay(
|
|
371
|
+
gdf, without_spikes, how="identity", grid_size=grid_size
|
|
372
|
+
)
|
|
373
|
+
except GEOSException as e:
|
|
374
|
+
if i == len(grid_sizes) - 1:
|
|
375
|
+
raise e
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
def remove_spikes(gdf: GeoDataFrame, tolerance: int | float) -> GeoDataFrame:
|
|
379
|
+
"""Remove thin spikes in polygons.
|
|
380
|
+
|
|
381
|
+
Note that this function might be slow. Should only be used if nessecary.
|
|
382
|
+
|
|
383
|
+
Args:
|
|
384
|
+
gdf: GeoDataFrame of polygons
|
|
385
|
+
tolerance: distance (usually meters) used as the minimum thickness
|
|
386
|
+
for polygons to be eliminated. Any spike thinner than the tolerance
|
|
387
|
+
will be removed.
|
|
388
|
+
|
|
389
|
+
Returns:
|
|
390
|
+
A GeoDataFrame of polygons without spikes thinner.
|
|
391
|
+
"""
|
|
392
|
+
|
|
393
|
+
gdf.geometry = (
|
|
394
|
+
PolygonsAsRings(gdf.geometry)
|
|
395
|
+
.apply_numpy_func(_remove_spikes, args=(tolerance,))
|
|
396
|
+
.to_numpy()
|
|
397
|
+
)
|
|
398
|
+
return gdf
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
def _remove_spikes(
|
|
402
|
+
geoms: NDArray[LinearRing], tolerance: int | float
|
|
403
|
+
) -> NDArray[LinearRing]:
|
|
404
|
+
if not len(geoms):
|
|
405
|
+
return geoms
|
|
406
|
+
geoms = to_geoseries(geoms).reset_index(drop=True)
|
|
407
|
+
|
|
408
|
+
points = (
|
|
409
|
+
extract_unique_points(geoms).explode(index_parts=False).to_frame("geometry")
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
points = get_angle_between_indexed_points(points)
|
|
413
|
+
|
|
414
|
+
def to_buffered_rings_without_spikes(x):
|
|
415
|
+
polys = GeoSeries(make_valid(polygons(get_exterior_ring(x))))
|
|
416
|
+
|
|
417
|
+
return (
|
|
418
|
+
polys.buffer(-tolerance, resolution=BUFFER_RES)
|
|
419
|
+
.explode(index_parts=False)
|
|
420
|
+
.pipe(close_all_holes)
|
|
421
|
+
.pipe(get_exterior_ring)
|
|
422
|
+
.buffer(tolerance * 10)
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
buffered = to_buffered_rings_without_spikes(
|
|
426
|
+
geoms.buffer(tolerance / 2, resolution=BUFFER_RES)
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
points_without_spikes = (
|
|
430
|
+
extract_unique_points(geoms)
|
|
431
|
+
.explode(index_parts=False)
|
|
432
|
+
.loc[lambda x: x.index.isin(sfilter(x, buffered).index)]
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
# linearrings require at least 4 coordinate pairs, or three unique
|
|
436
|
+
points_without_spikes = points_without_spikes.loc[
|
|
437
|
+
lambda x: x.groupby(level=0).size() >= 3
|
|
293
438
|
]
|
|
294
|
-
|
|
439
|
+
|
|
440
|
+
# need an index from 0 to n-1 in 'linearrings'
|
|
441
|
+
to_int_index = {
|
|
442
|
+
ring_idx: i
|
|
443
|
+
for i, ring_idx in enumerate(sorted(set(points_without_spikes.index)))
|
|
444
|
+
}
|
|
445
|
+
int_indices = points_without_spikes.index.map(to_int_index)
|
|
446
|
+
|
|
447
|
+
as_lines = pd.Series(
|
|
448
|
+
linearrings(
|
|
449
|
+
get_coordinates(points_without_spikes.geometry.values),
|
|
450
|
+
indices=int_indices,
|
|
451
|
+
),
|
|
452
|
+
index=points_without_spikes.index.unique(),
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
# the missing polygons are thin and/or spiky. Let's remove them
|
|
456
|
+
missing = geoms.loc[~geoms.index.isin(as_lines.index)]
|
|
457
|
+
|
|
458
|
+
missing = pd.Series(
|
|
459
|
+
[None] * len(missing),
|
|
460
|
+
index=missing.index.values,
|
|
461
|
+
)
|
|
462
|
+
|
|
463
|
+
return pd.concat([as_lines, missing]).sort_index()
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
def get_angle_between_indexed_points(point_df: GeoDataFrame):
|
|
467
|
+
""" "Get angle difference between the two lines"""
|
|
468
|
+
|
|
469
|
+
point_df["next"] = point_df.groupby(level=0)["geometry"].shift(-1)
|
|
470
|
+
|
|
471
|
+
notna = point_df["next"].notna()
|
|
472
|
+
|
|
473
|
+
this = coordinate_array(point_df.loc[notna, "geometry"].values)
|
|
474
|
+
next_ = coordinate_array(point_df.loc[notna, "next"].values)
|
|
475
|
+
|
|
476
|
+
point_df.loc[notna, "angle"] = get_angle(this, next_)
|
|
477
|
+
point_df["prev_angle"] = point_df.groupby(level=0)["angle"].shift(1)
|
|
478
|
+
|
|
479
|
+
point_df["angle_diff"] = point_df["angle"] - point_df["prev_angle"]
|
|
480
|
+
|
|
481
|
+
return point_df
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
def _properly_fix_duplicates(gdf, double, slivers, thin_gaps_and_double, tolerance):
|
|
485
|
+
# gdf = update_geometries(gdf)
|
|
486
|
+
# gdf, more_slivers = split_out_slivers(gdf, tolerance)
|
|
487
|
+
# slivers = pd.concat([slivers, more_slivers], ignore_index=True)
|
|
488
|
+
# gaps = get_gaps(gdf, include_interiors=True)
|
|
489
|
+
# double = get_intersections(gdf).pipe(update_geometries, geom_type="polygon")
|
|
490
|
+
# double["_double_idx"] = range(len(double))
|
|
491
|
+
# thin_gaps_and_double = pd.concat([gaps, double]).loc[
|
|
492
|
+
# lambda x: x.buffer(-tolerance / 2).is_empty
|
|
493
|
+
# ]
|
|
494
|
+
# return gdf, thin_gaps_and_double, slivers
|
|
495
|
+
|
|
496
|
+
gdf = _dissolve_thick_double_and_update(gdf, double, thin_gaps_and_double)
|
|
497
|
+
gdf, more_slivers = split_out_slivers(gdf, tolerance)
|
|
498
|
+
slivers = pd.concat([slivers, more_slivers], ignore_index=True)
|
|
499
|
+
gaps = get_gaps(gdf, include_interiors=True)
|
|
500
|
+
assert "_double_idx" not in gaps
|
|
501
|
+
double = get_intersections(gdf)
|
|
502
|
+
double["_double_idx"] = range(len(double))
|
|
503
|
+
thin_gaps_and_double = pd.concat([gaps, double], ignore_index=True).loc[
|
|
504
|
+
lambda x: x.buffer(-tolerance / 2).is_empty
|
|
505
|
+
]
|
|
506
|
+
|
|
507
|
+
return gdf, thin_gaps_and_double, slivers
|
|
295
508
|
|
|
296
509
|
|
|
297
510
|
def _dissolve_thick_double_and_update(gdf, double, thin_double):
|
|
@@ -299,20 +512,18 @@ def _dissolve_thick_double_and_update(gdf, double, thin_double):
|
|
|
299
512
|
double.loc[~double["_double_idx"].isin(thin_double["_double_idx"])]
|
|
300
513
|
.drop(columns="_double_idx")
|
|
301
514
|
.pipe(sort_large_first)
|
|
302
|
-
.pipe(update_geometries)
|
|
515
|
+
.pipe(update_geometries, geom_type="polygon")
|
|
303
516
|
)
|
|
304
517
|
return (
|
|
305
518
|
clean_overlay(gdf, large, how="update")
|
|
306
519
|
.pipe(sort_large_first)
|
|
307
|
-
.pipe(update_geometries)
|
|
520
|
+
.pipe(update_geometries, geom_type="polygon")
|
|
308
521
|
)
|
|
309
522
|
|
|
310
523
|
|
|
311
|
-
def _cleaning_checks(gdf, tolerance, duplicate_action):
|
|
524
|
+
def _cleaning_checks(gdf, tolerance, duplicate_action): # , spike_action):
|
|
312
525
|
if not len(gdf) or not tolerance:
|
|
313
526
|
return gdf
|
|
314
|
-
if get_geom_type(gdf) != "polygon":
|
|
315
|
-
raise ValueError("Must be polygons.")
|
|
316
527
|
if tolerance < PRECISION:
|
|
317
528
|
raise ValueError(
|
|
318
529
|
f"'tolerance' must be larger than {PRECISION} to avoid "
|
|
@@ -329,3 +540,144 @@ def split_out_slivers(
|
|
|
329
540
|
slivers = gdf.loc[is_sliver]
|
|
330
541
|
gdf = gdf.loc[~is_sliver]
|
|
331
542
|
return gdf, slivers
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
def split_by_neighbors(df, split_by, tolerance):
|
|
546
|
+
if not len(df):
|
|
547
|
+
return df
|
|
548
|
+
|
|
549
|
+
split_by = split_by.copy()
|
|
550
|
+
split_by.geometry = shapely.simplify(split_by.geometry, tolerance)
|
|
551
|
+
|
|
552
|
+
intersecting_lines = (
|
|
553
|
+
clean_overlay(to_lines(split_by), buff(df, tolerance), how="identity")
|
|
554
|
+
.pipe(get_line_segments)
|
|
555
|
+
.reset_index(drop=True)
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
endpoints = intersecting_lines.boundary.explode(index_parts=False)
|
|
559
|
+
|
|
560
|
+
extended_lines = GeoDataFrame(
|
|
561
|
+
{
|
|
562
|
+
"geometry": extend_lines(
|
|
563
|
+
endpoints.loc[lambda x: ~x.index.duplicated(keep="first")].values,
|
|
564
|
+
endpoints.loc[lambda x: ~x.index.duplicated(keep="last")].values,
|
|
565
|
+
distance=tolerance * 3,
|
|
566
|
+
)
|
|
567
|
+
},
|
|
568
|
+
crs=df.crs,
|
|
569
|
+
)
|
|
570
|
+
|
|
571
|
+
buffered = buff(extended_lines, tolerance, single_sided=True)
|
|
572
|
+
|
|
573
|
+
return clean_overlay(df, buffered, how="identity")
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
def extend_lines(arr1, arr2, distance):
|
|
577
|
+
if len(arr1) != len(arr2):
|
|
578
|
+
raise ValueError
|
|
579
|
+
if not len(arr1):
|
|
580
|
+
return arr1
|
|
581
|
+
|
|
582
|
+
arr1, arr2 = arr2, arr1 # TODO fix
|
|
583
|
+
|
|
584
|
+
coords1 = coordinate_array(arr1)
|
|
585
|
+
coords2 = coordinate_array(arr2)
|
|
586
|
+
|
|
587
|
+
dx = coords2[:, 0] - coords1[:, 0]
|
|
588
|
+
dy = coords2[:, 1] - coords1[:, 1]
|
|
589
|
+
len_xy = np.sqrt((dx**2.0) + (dy**2.0))
|
|
590
|
+
x = coords1[:, 0] + (coords1[:, 0] - coords2[:, 0]) / len_xy * distance
|
|
591
|
+
y = coords1[:, 1] + (coords1[:, 1] - coords2[:, 1]) / len_xy * distance
|
|
592
|
+
|
|
593
|
+
new_points = np.array([None for _ in range(len(arr1))])
|
|
594
|
+
new_points[~np.isnan(x)] = shapely.points(x[~np.isnan(x)], y[~np.isnan(x)])
|
|
595
|
+
|
|
596
|
+
new_points[~np.isnan(x)] = make_lines_between_points(
|
|
597
|
+
arr2[~np.isnan(x)], new_points[~np.isnan(x)]
|
|
598
|
+
)
|
|
599
|
+
return new_points
|
|
600
|
+
|
|
601
|
+
|
|
602
|
+
def make_lines_between_points(
|
|
603
|
+
arr1: NDArray[Point], arr2: NDArray[Point]
|
|
604
|
+
) -> NDArray[LineString]:
|
|
605
|
+
if arr1.shape != arr2.shape:
|
|
606
|
+
raise ValueError(
|
|
607
|
+
f"Arrays must have equal shape. Got {arr1.shape} and {arr2.shape}"
|
|
608
|
+
)
|
|
609
|
+
coords: pd.DataFrame = pd.concat(
|
|
610
|
+
[
|
|
611
|
+
pd.DataFrame(get_coordinates(arr1), columns=["x", "y"]),
|
|
612
|
+
pd.DataFrame(get_coordinates(arr2), columns=["x", "y"]),
|
|
613
|
+
]
|
|
614
|
+
).sort_index()
|
|
615
|
+
|
|
616
|
+
return linestrings(coords.values, indices=coords.index)
|
|
617
|
+
|
|
618
|
+
|
|
619
|
+
def get_line_segments(lines) -> GeoDataFrame:
|
|
620
|
+
assert lines.index.is_unique
|
|
621
|
+
if isinstance(lines, GeoDataFrame):
|
|
622
|
+
multipoints = lines.assign(
|
|
623
|
+
**{
|
|
624
|
+
lines._geometry_column_name: force_2d(
|
|
625
|
+
extract_unique_points(lines.geometry.values)
|
|
626
|
+
)
|
|
627
|
+
}
|
|
628
|
+
)
|
|
629
|
+
return multipoints_to_line_segments(multipoints.geometry)
|
|
630
|
+
|
|
631
|
+
multipoints = GeoSeries(extract_unique_points(lines.values), index=lines.index)
|
|
632
|
+
|
|
633
|
+
return multipoints_to_line_segments(multipoints)
|
|
634
|
+
|
|
635
|
+
|
|
636
|
+
def multipoints_to_line_segments(multipoints: GeoSeries) -> GeoDataFrame:
|
|
637
|
+
if not len(multipoints):
|
|
638
|
+
return GeoDataFrame({"geometry": multipoints}, index=multipoints.index)
|
|
639
|
+
|
|
640
|
+
try:
|
|
641
|
+
crs = multipoints.crs
|
|
642
|
+
except AttributeError:
|
|
643
|
+
crs = None
|
|
644
|
+
|
|
645
|
+
try:
|
|
646
|
+
point_df = multipoints.explode(index_parts=False)
|
|
647
|
+
if isinstance(point_df, GeoSeries):
|
|
648
|
+
point_df = point_df.to_frame("geometry")
|
|
649
|
+
except AttributeError:
|
|
650
|
+
points, indices = get_parts(multipoints, return_index=True)
|
|
651
|
+
if isinstance(multipoints.index, pd.MultiIndex):
|
|
652
|
+
indices = pd.MultiIndex.from_arrays(indices, names=multipoints.index.names)
|
|
653
|
+
|
|
654
|
+
point_df = pd.DataFrame({"geometry": GeometryArray(points)}, index=indices)
|
|
655
|
+
|
|
656
|
+
point_df["next"] = point_df.groupby(level=0)["geometry"].shift(-1)
|
|
657
|
+
|
|
658
|
+
first_points = point_df.loc[lambda x: ~x.index.duplicated(), "geometry"]
|
|
659
|
+
is_last_point = point_df["next"].isna()
|
|
660
|
+
|
|
661
|
+
point_df.loc[is_last_point, "next"] = first_points
|
|
662
|
+
assert point_df["next"].notna().all()
|
|
663
|
+
|
|
664
|
+
point_df["geometry"] = [
|
|
665
|
+
LineString([x1, x2]) for x1, x2 in zip(point_df["geometry"], point_df["next"])
|
|
666
|
+
]
|
|
667
|
+
return GeoDataFrame(point_df.drop(columns=["next"]), geometry="geometry", crs=crs)
|
|
668
|
+
|
|
669
|
+
|
|
670
|
+
def explore_geosexception(e: GEOSException, *gdfs):
|
|
671
|
+
from ..maps.maps import explore
|
|
672
|
+
from .conversion import to_gdf
|
|
673
|
+
|
|
674
|
+
pattern = r"(\d+\.\d+)\s+(\d+\.\d+)"
|
|
675
|
+
|
|
676
|
+
matches = re.findall(pattern, str(e))
|
|
677
|
+
coords_in_error_message = [(float(match[0]), float(match[1])) for match in matches]
|
|
678
|
+
exception_point = to_gdf(coords_in_error_message, crs=gdfs[0].crs)
|
|
679
|
+
if len(exception_point):
|
|
680
|
+
exception_point["wkt"] = exception_point.to_wkt()
|
|
681
|
+
explore(exception_point, *gdfs, mask=exception_point.buffer(100))
|
|
682
|
+
else:
|
|
683
|
+
explore(*gdfs)
|