ssb-sgis 0.3.8__py3-none-any.whl → 0.3.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sgis/__init__.py +5 -2
- sgis/geopandas_tools/buffer_dissolve_explode.py +13 -9
- sgis/geopandas_tools/centerlines.py +110 -47
- sgis/geopandas_tools/cleaning.py +331 -0
- sgis/geopandas_tools/conversion.py +9 -3
- sgis/geopandas_tools/duplicates.py +67 -49
- sgis/geopandas_tools/general.py +15 -1
- sgis/geopandas_tools/neighbors.py +12 -0
- sgis/geopandas_tools/overlay.py +26 -17
- sgis/geopandas_tools/polygon_operations.py +281 -100
- sgis/geopandas_tools/polygons_as_rings.py +72 -10
- sgis/geopandas_tools/sfilter.py +8 -8
- sgis/helpers.py +20 -3
- sgis/io/dapla_functions.py +28 -6
- sgis/io/write_municipality_data.py +11 -5
- sgis/maps/examine.py +10 -7
- sgis/maps/explore.py +102 -25
- sgis/maps/map.py +32 -6
- sgis/maps/maps.py +40 -58
- sgis/maps/tilesources.py +61 -0
- sgis/networkanalysis/closing_network_holes.py +89 -62
- sgis/networkanalysis/cutting_lines.py +1 -1
- sgis/networkanalysis/nodes.py +1 -1
- sgis/networkanalysis/traveling_salesman.py +8 -4
- sgis/parallel/parallel.py +63 -10
- sgis/raster/raster.py +29 -27
- {ssb_sgis-0.3.8.dist-info → ssb_sgis-0.3.9.dist-info}/METADATA +4 -1
- ssb_sgis-0.3.9.dist-info/RECORD +59 -0
- {ssb_sgis-0.3.8.dist-info → ssb_sgis-0.3.9.dist-info}/WHEEL +1 -1
- sgis/geopandas_tools/snap_polygons.py +0 -0
- ssb_sgis-0.3.8.dist-info/RECORD +0 -58
- {ssb_sgis-0.3.8.dist-info → ssb_sgis-0.3.9.dist-info}/LICENSE +0 -0
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from geopandas import GeoDataFrame, GeoSeries
|
|
5
|
+
from numpy.typing import NDArray
|
|
6
|
+
from shapely import (
|
|
7
|
+
extract_unique_points,
|
|
8
|
+
get_coordinates,
|
|
9
|
+
get_exterior_ring,
|
|
10
|
+
linearrings,
|
|
11
|
+
make_valid,
|
|
12
|
+
polygons,
|
|
13
|
+
)
|
|
14
|
+
from shapely.geometry import LinearRing
|
|
15
|
+
|
|
16
|
+
from ..networkanalysis.closing_network_holes import get_angle
|
|
17
|
+
from .buffer_dissolve_explode import buff, dissexp
|
|
18
|
+
from .conversion import coordinate_array, to_geoseries
|
|
19
|
+
from .duplicates import get_intersections, update_geometries
|
|
20
|
+
from .general import sort_large_first, sort_long_first
|
|
21
|
+
from .geometry_types import get_geom_type
|
|
22
|
+
from .overlay import clean_overlay
|
|
23
|
+
from .polygon_operations import close_all_holes, close_thin_holes, get_gaps
|
|
24
|
+
from .polygons_as_rings import PolygonsAsRings
|
|
25
|
+
from .sfilter import sfilter, sfilter_inverse
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
warnings.simplefilter(action="ignore", category=UserWarning)
|
|
29
|
+
warnings.simplefilter(action="ignore", category=RuntimeWarning)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
PRECISION = 1e-4
|
|
33
|
+
BUFFER_RES = 50
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def get_angle_between_indexed_points(point_df: GeoDataFrame):
|
|
37
|
+
""" "Get angle difference between the two lines"""
|
|
38
|
+
|
|
39
|
+
point_df["next"] = point_df.groupby(level=0)["geometry"].shift(-1)
|
|
40
|
+
|
|
41
|
+
notna = point_df["next"].notna()
|
|
42
|
+
|
|
43
|
+
this = coordinate_array(point_df.loc[notna, "geometry"].values)
|
|
44
|
+
next_ = coordinate_array(point_df.loc[notna, "next"].values)
|
|
45
|
+
|
|
46
|
+
point_df.loc[notna, "angle"] = get_angle(this, next_)
|
|
47
|
+
point_df["prev_angle"] = point_df.groupby(level=0)["angle"].shift(1)
|
|
48
|
+
|
|
49
|
+
point_df["angle_diff"] = point_df["angle"] - point_df["prev_angle"]
|
|
50
|
+
|
|
51
|
+
return point_df
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def remove_spikes(gdf: GeoDataFrame, tolerance: int | float) -> GeoDataFrame:
|
|
55
|
+
"""Remove thin spikes in polygons.
|
|
56
|
+
|
|
57
|
+
Note that this function might be slow. Should only be used if nessecary.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
gdf: GeoDataFrame of polygons
|
|
61
|
+
tolerance: distance (usually meters) used as the minimum thickness
|
|
62
|
+
for polygons to be eliminated. Any spike thinner than the tolerance
|
|
63
|
+
will be removed.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
A GeoDataFrame of polygons without spikes thinner.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
def _remove_spikes(geoms: NDArray[LinearRing]) -> NDArray[LinearRing]:
|
|
70
|
+
if not len(geoms):
|
|
71
|
+
return geoms
|
|
72
|
+
geoms = to_geoseries(geoms).reset_index(drop=True)
|
|
73
|
+
|
|
74
|
+
points = (
|
|
75
|
+
extract_unique_points(geoms).explode(index_parts=False).to_frame("geometry")
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
points = get_angle_between_indexed_points(points)
|
|
79
|
+
|
|
80
|
+
indices_with_spikes = points[
|
|
81
|
+
lambda x: (x["angle_diff"] >= 180) & (x["angle_diff"] < 180.01)
|
|
82
|
+
].index.unique()
|
|
83
|
+
|
|
84
|
+
rings_with_spikes = geoms[geoms.index.isin(indices_with_spikes)]
|
|
85
|
+
rings_without_spikes = geoms[~geoms.index.isin(indices_with_spikes)]
|
|
86
|
+
|
|
87
|
+
def to_buffered_rings_without_spikes(x):
|
|
88
|
+
polys = GeoSeries(make_valid(polygons(get_exterior_ring(x))))
|
|
89
|
+
|
|
90
|
+
return (
|
|
91
|
+
polys.buffer(-tolerance, resolution=BUFFER_RES)
|
|
92
|
+
.explode(index_parts=False)
|
|
93
|
+
.pipe(close_all_holes)
|
|
94
|
+
.pipe(get_exterior_ring)
|
|
95
|
+
.buffer(tolerance * 10)
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
buffered = to_buffered_rings_without_spikes(
|
|
99
|
+
rings_with_spikes.buffer(tolerance / 2, resolution=BUFFER_RES)
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
points_without_spikes = (
|
|
103
|
+
extract_unique_points(rings_with_spikes)
|
|
104
|
+
.explode(index_parts=False)
|
|
105
|
+
.loc[lambda x: x.index.isin(sfilter(x, buffered).index)]
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# linearrings require at least 4 coordinate pairs, or three unique
|
|
109
|
+
points_without_spikes = points_without_spikes.loc[
|
|
110
|
+
lambda x: x.groupby(level=0).size() >= 3
|
|
111
|
+
]
|
|
112
|
+
|
|
113
|
+
# need an index from 0 to n-1 in 'linearrings'
|
|
114
|
+
to_int_index = {
|
|
115
|
+
ring_idx: i
|
|
116
|
+
for i, ring_idx in enumerate(sorted(set(points_without_spikes.index)))
|
|
117
|
+
}
|
|
118
|
+
int_indices = points_without_spikes.index.map(to_int_index)
|
|
119
|
+
|
|
120
|
+
as_lines = pd.Series(
|
|
121
|
+
linearrings(
|
|
122
|
+
get_coordinates(points_without_spikes.geometry.values),
|
|
123
|
+
indices=int_indices,
|
|
124
|
+
),
|
|
125
|
+
index=points_without_spikes.index.unique(),
|
|
126
|
+
)
|
|
127
|
+
as_lines = pd.concat([as_lines, rings_without_spikes])
|
|
128
|
+
|
|
129
|
+
# the missing polygons are thin and/or spiky. Let's remove them
|
|
130
|
+
missing = geoms.loc[~geoms.index.isin(as_lines.index)]
|
|
131
|
+
|
|
132
|
+
missing = pd.Series(
|
|
133
|
+
[None] * len(missing),
|
|
134
|
+
index=missing.index.values,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
return pd.concat([as_lines, missing]).sort_index()
|
|
138
|
+
|
|
139
|
+
gdf.geometry = (
|
|
140
|
+
PolygonsAsRings(gdf.geometry).apply_numpy_func(_remove_spikes).to_numpy()
|
|
141
|
+
)
|
|
142
|
+
return gdf
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def coverage_clean(
|
|
146
|
+
gdf: GeoDataFrame,
|
|
147
|
+
tolerance: int | float,
|
|
148
|
+
duplicate_action: str = "fix",
|
|
149
|
+
remove_isolated: bool = False,
|
|
150
|
+
) -> GeoDataFrame:
|
|
151
|
+
"""Fix thin gaps, holes, slivers and double surfaces.
|
|
152
|
+
|
|
153
|
+
Rules:
|
|
154
|
+
- Holes (interiors) thinner than the tolerance are closed.
|
|
155
|
+
- Gaps between polygons are filled if thinner than the tolerance.
|
|
156
|
+
- Sliver polygons thinner than the tolerance are eliminated
|
|
157
|
+
into the neighbor polygon with the longest shared border.
|
|
158
|
+
- Double surfaces thinner than the tolerance are eliminated.
|
|
159
|
+
If duplicate_action is "fix", thicker double surfaces will
|
|
160
|
+
be updated from top to bottom of the GeoDataFrame's rows.
|
|
161
|
+
- Line and point geometries are removed.
|
|
162
|
+
- MultiPolygons are exploded to Polygons.
|
|
163
|
+
- Index is reset.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
gdf: GeoDataFrame to be cleaned.
|
|
167
|
+
tolerance: distance (usually meters) used as the minimum thickness
|
|
168
|
+
for polygons to be eliminated. Any gap, hole, sliver or double
|
|
169
|
+
surface that are empty after a negative buffer of tolerance / 2
|
|
170
|
+
are eliminated into the neighbor with the longest shared border.
|
|
171
|
+
duplicate action: Either "fix", "error" or "ignore".
|
|
172
|
+
If "fix" (default), double surfaces thicker than the
|
|
173
|
+
tolerance will be updated from top to bottom (function update_geometries)
|
|
174
|
+
and then dissolved into the neighbor polygon with the longest shared border.
|
|
175
|
+
If "error", an Exception is raised if there are any double surfaces thicker
|
|
176
|
+
than the tolerance. If "ignore", double surfaces are kept as is.
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
A GeoDataFrame with cleaned polygons.
|
|
180
|
+
|
|
181
|
+
"""
|
|
182
|
+
|
|
183
|
+
_cleaning_checks(gdf, tolerance, duplicate_action)
|
|
184
|
+
|
|
185
|
+
if not gdf.index.is_unique:
|
|
186
|
+
gdf = gdf.reset_index(drop=True)
|
|
187
|
+
|
|
188
|
+
gdf = close_thin_holes(gdf, tolerance)
|
|
189
|
+
|
|
190
|
+
gaps = get_gaps(gdf, include_interiors=True)
|
|
191
|
+
double = get_intersections(gdf)
|
|
192
|
+
double["_double_idx"] = range(len(double))
|
|
193
|
+
|
|
194
|
+
gdf, slivers = split_out_slivers(gdf, tolerance)
|
|
195
|
+
|
|
196
|
+
thin_gaps_and_double = pd.concat([gaps, double]).loc[
|
|
197
|
+
lambda x: x.buffer(-tolerance / 2).is_empty
|
|
198
|
+
]
|
|
199
|
+
|
|
200
|
+
all_are_thin = double["_double_idx"].isin(thin_gaps_and_double["_double_idx"]).all()
|
|
201
|
+
|
|
202
|
+
if not all_are_thin and duplicate_action == "fix":
|
|
203
|
+
gdf, thin_gaps_and_double = _properly_fix_duplicates(
|
|
204
|
+
gdf, double, slivers, thin_gaps_and_double, tolerance
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
# gaps = pd.concat([gaps, more_gaps], ignore_index=True)
|
|
208
|
+
# double = pd.concat([double, more_double], ignore_index=True)
|
|
209
|
+
elif not all_are_thin and duplicate_action == "error":
|
|
210
|
+
raise ValueError("Large double surfaces.")
|
|
211
|
+
|
|
212
|
+
to_eliminate = pd.concat([thin_gaps_and_double, slivers], ignore_index=True).loc[
|
|
213
|
+
lambda x: ~x.buffer(-PRECISION / 10).is_empty
|
|
214
|
+
]
|
|
215
|
+
to_eliminate["_eliminate_idx"] = range(len(to_eliminate))
|
|
216
|
+
gdf["_poly_idx"] = range(len(gdf))
|
|
217
|
+
|
|
218
|
+
gdf_geoms_idx = gdf[["_poly_idx", "geometry"]]
|
|
219
|
+
|
|
220
|
+
joined = to_eliminate.sjoin(gdf_geoms_idx, how="left")
|
|
221
|
+
isolated = joined[lambda x: x["_poly_idx"].isna()]
|
|
222
|
+
intersecting = joined[lambda x: x["_poly_idx"].notna()]
|
|
223
|
+
|
|
224
|
+
poly_idx_mapper: pd.Series = (
|
|
225
|
+
clean_overlay(
|
|
226
|
+
intersecting[["_eliminate_idx", "geometry"]],
|
|
227
|
+
buff(gdf_geoms_idx, tolerance, resolution=BUFFER_RES),
|
|
228
|
+
geom_type="polygon",
|
|
229
|
+
)
|
|
230
|
+
.pipe(sort_long_first)
|
|
231
|
+
.drop_duplicates("_eliminate_idx")
|
|
232
|
+
.set_index("_eliminate_idx")["_poly_idx"]
|
|
233
|
+
)
|
|
234
|
+
intersecting["_poly_idx"] = intersecting["_eliminate_idx"].map(poly_idx_mapper)
|
|
235
|
+
without_double = update_geometries(intersecting).drop(
|
|
236
|
+
columns=["_eliminate_idx", "_double_idx", "index_right"]
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
cleaned = (
|
|
240
|
+
dissexp(pd.concat([gdf, without_double]), by="_poly_idx", aggfunc="first")
|
|
241
|
+
.reset_index(drop=True)
|
|
242
|
+
.loc[lambda x: ~x.buffer(-PRECISION / 10).is_empty]
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
if not remove_isolated:
|
|
246
|
+
cleaned = pd.concat(
|
|
247
|
+
[
|
|
248
|
+
cleaned,
|
|
249
|
+
isolated.drop(
|
|
250
|
+
columns=[
|
|
251
|
+
"_double_idx",
|
|
252
|
+
"_eliminate_idx",
|
|
253
|
+
"_poly_idx",
|
|
254
|
+
"index_right",
|
|
255
|
+
]
|
|
256
|
+
),
|
|
257
|
+
]
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
missing_indices: pd.Index = sfilter_inverse(
|
|
261
|
+
gdf.representative_point(), cleaned
|
|
262
|
+
).index
|
|
263
|
+
|
|
264
|
+
missing = clean_overlay(
|
|
265
|
+
gdf.loc[missing_indices].drop(columns="_poly_idx"),
|
|
266
|
+
cleaned,
|
|
267
|
+
how="difference",
|
|
268
|
+
geom_type="polygon",
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
return pd.concat([cleaned, missing], ignore_index=True)
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def _properly_fix_duplicates(gdf, double, slivers, thin_gaps_and_double, tolerance):
|
|
275
|
+
for _ in range(4):
|
|
276
|
+
gdf = _dissolve_thick_double_and_update(gdf, double, thin_gaps_and_double)
|
|
277
|
+
gdf, more_slivers = split_out_slivers(gdf, tolerance)
|
|
278
|
+
slivers = pd.concat([slivers, more_slivers], ignore_index=True)
|
|
279
|
+
gaps = get_gaps(gdf, include_interiors=True)
|
|
280
|
+
double = get_intersections(gdf)
|
|
281
|
+
double["_double_idx"] = range(len(double))
|
|
282
|
+
thin_gaps_and_double = pd.concat([gaps, double]).loc[
|
|
283
|
+
lambda x: x.buffer(-tolerance / 2).is_empty
|
|
284
|
+
]
|
|
285
|
+
all_are_thin = (
|
|
286
|
+
double["_double_idx"].isin(thin_gaps_and_double["_double_idx"]).all()
|
|
287
|
+
)
|
|
288
|
+
if all_are_thin:
|
|
289
|
+
return gdf, thin_gaps_and_double
|
|
290
|
+
|
|
291
|
+
not_thin = double[
|
|
292
|
+
lambda x: ~x["_double_idx"].isin(thin_gaps_and_double["_double_idx"]).all()
|
|
293
|
+
]
|
|
294
|
+
raise ValueError("Failed to properly fix thick double surfaces", not_thin.geometry)
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def _dissolve_thick_double_and_update(gdf, double, thin_double):
|
|
298
|
+
large = (
|
|
299
|
+
double.loc[~double["_double_idx"].isin(thin_double["_double_idx"])]
|
|
300
|
+
.drop(columns="_double_idx")
|
|
301
|
+
.pipe(sort_large_first)
|
|
302
|
+
.pipe(update_geometries)
|
|
303
|
+
)
|
|
304
|
+
return (
|
|
305
|
+
clean_overlay(gdf, large, how="update")
|
|
306
|
+
.pipe(sort_large_first)
|
|
307
|
+
.pipe(update_geometries)
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def _cleaning_checks(gdf, tolerance, duplicate_action):
|
|
312
|
+
if not len(gdf) or not tolerance:
|
|
313
|
+
return gdf
|
|
314
|
+
if get_geom_type(gdf) != "polygon":
|
|
315
|
+
raise ValueError("Must be polygons.")
|
|
316
|
+
if tolerance < PRECISION:
|
|
317
|
+
raise ValueError(
|
|
318
|
+
f"'tolerance' must be larger than {PRECISION} to avoid "
|
|
319
|
+
"problems with floating point precision."
|
|
320
|
+
)
|
|
321
|
+
if duplicate_action not in ["fix", "error", "ignore"]:
|
|
322
|
+
raise ValueError("duplicate_action must be 'fix', 'error' or 'ignore'")
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def split_out_slivers(
|
|
326
|
+
gdf: GeoDataFrame | GeoSeries, tolerance: float | int
|
|
327
|
+
) -> tuple[GeoDataFrame, GeoDataFrame] | tuple[GeoSeries, GeoSeries]:
|
|
328
|
+
is_sliver = gdf.buffer(-tolerance / 2).is_empty
|
|
329
|
+
slivers = gdf.loc[is_sliver]
|
|
330
|
+
gdf = gdf.loc[~is_sliver]
|
|
331
|
+
return gdf, slivers
|
|
@@ -22,7 +22,12 @@ def to_geoseries(obj: Any, crs: Any | None = None) -> GeoSeries:
|
|
|
22
22
|
pass
|
|
23
23
|
|
|
24
24
|
try:
|
|
25
|
-
|
|
25
|
+
if hasattr(obj.index, "values"):
|
|
26
|
+
# pandas objects
|
|
27
|
+
index = obj.index
|
|
28
|
+
else:
|
|
29
|
+
# list
|
|
30
|
+
index = None
|
|
26
31
|
except AttributeError:
|
|
27
32
|
index = None
|
|
28
33
|
|
|
@@ -296,9 +301,10 @@ def to_gdf(
|
|
|
296
301
|
if geom_col in obj.keys():
|
|
297
302
|
if isinstance(obj, pd.DataFrame):
|
|
298
303
|
notna = obj[geom_col].notna()
|
|
299
|
-
obj.loc[notna, geom_col] =
|
|
300
|
-
make_shapely_geoms(obj.loc[notna, geom_col])
|
|
304
|
+
obj.loc[notna, geom_col] = list(
|
|
305
|
+
make_shapely_geoms(obj.loc[notna, geom_col])
|
|
301
306
|
)
|
|
307
|
+
obj[geom_col] = GeoSeries(obj[geom_col])
|
|
302
308
|
return GeoDataFrame(obj, geometry=geom_col, crs=crs, **kwargs)
|
|
303
309
|
if isinstance(obj[geom_col], Geometry):
|
|
304
310
|
return GeoDataFrame(
|
|
@@ -14,9 +14,9 @@ from .overlay import clean_overlay
|
|
|
14
14
|
|
|
15
15
|
def update_geometries(
|
|
16
16
|
gdf: GeoDataFrame,
|
|
17
|
+
geom_type: str | None = None,
|
|
17
18
|
keep_geom_type: bool = True,
|
|
18
19
|
grid_size: int | None = None,
|
|
19
|
-
copy: bool = True,
|
|
20
20
|
) -> GeoDataFrame:
|
|
21
21
|
"""Puts geometries on top of each other rowwise.
|
|
22
22
|
|
|
@@ -29,9 +29,11 @@ def update_geometries(
|
|
|
29
29
|
of intersection resulting in multiple geometry types or
|
|
30
30
|
GeometryCollections. If False, return all resulting geometries
|
|
31
31
|
(potentially mixed types).
|
|
32
|
+
geom_type: Optionally specify what geometry type to keep.,
|
|
33
|
+
if there are mixed geometry types. Must be either "polygon",
|
|
34
|
+
"line" or "point".
|
|
32
35
|
grid_size: Precision grid size to round the geometries. Will use the highest
|
|
33
36
|
precision of the inputs by default.
|
|
34
|
-
copy: Defaults to True.
|
|
35
37
|
|
|
36
38
|
Example
|
|
37
39
|
------
|
|
@@ -78,56 +80,51 @@ def update_geometries(
|
|
|
78
80
|
if len(gdf) <= 1:
|
|
79
81
|
return gdf
|
|
80
82
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
if keep_geom_type:
|
|
83
|
+
if geom_type:
|
|
84
|
+
gdf = to_single_geom_type(gdf, geom_type)
|
|
85
|
+
keep_geom_type = True
|
|
86
|
+
elif keep_geom_type:
|
|
87
87
|
geom_type = get_geom_type(gdf)
|
|
88
88
|
if geom_type == "mixed":
|
|
89
89
|
raise ValueError("Cannot have mixed geometries when keep_geom_type is True")
|
|
90
90
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
if any(geom.equals(geom2) for geom2 in geometries):
|
|
95
|
-
continue
|
|
96
|
-
|
|
97
|
-
try:
|
|
98
|
-
new = difference(geom, unioned, grid_size=grid_size)
|
|
99
|
-
except GEOSException:
|
|
100
|
-
try:
|
|
101
|
-
geom = make_valid(geom)
|
|
102
|
-
new = difference(geom, unioned, grid_size=grid_size)
|
|
103
|
-
except GEOSException:
|
|
104
|
-
unioned = to_single_geom_type(unioned, geom_type=geom_type)
|
|
105
|
-
new = difference(geom, unioned, grid_size=grid_size)
|
|
91
|
+
geom_col = gdf._geometry_column_name
|
|
92
|
+
index_mapper = {i: idx for i, idx in enumerate(gdf.index)}
|
|
93
|
+
gdf = gdf.reset_index(drop=True)
|
|
106
94
|
|
|
107
|
-
|
|
108
|
-
|
|
95
|
+
tree = STRtree(gdf.geometry.values)
|
|
96
|
+
left, right = tree.query(gdf.geometry.values, predicate="intersects")
|
|
97
|
+
indices = pd.Series(right, index=left).loc[lambda x: x.index > x.values]
|
|
98
|
+
|
|
99
|
+
# select geometries from 'right', index from 'left', dissolve by 'left'
|
|
100
|
+
erasers = (
|
|
101
|
+
pd.Series(gdf.geometry.loc[indices.values].values, index=indices.index)
|
|
102
|
+
.groupby(level=0)
|
|
103
|
+
.agg(unary_union)
|
|
104
|
+
)
|
|
109
105
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
106
|
+
# match up the aggregated erasers by index
|
|
107
|
+
erased = difference(
|
|
108
|
+
gdf.geometry.loc[erasers.index],
|
|
109
|
+
erasers,
|
|
110
|
+
grid_size=grid_size,
|
|
111
|
+
)
|
|
115
112
|
|
|
116
|
-
|
|
113
|
+
gdf.loc[erased.index, geom_col] = erased
|
|
117
114
|
|
|
118
|
-
|
|
119
|
-
geometries.append(new)
|
|
120
|
-
indices.append(i)
|
|
115
|
+
gdf = gdf.loc[~gdf.is_empty]
|
|
121
116
|
|
|
122
|
-
|
|
117
|
+
gdf.index = gdf.index.map(index_mapper)
|
|
123
118
|
|
|
124
119
|
if keep_geom_type:
|
|
125
|
-
|
|
120
|
+
gdf = to_single_geom_type(gdf, geom_type)
|
|
126
121
|
|
|
127
|
-
return
|
|
122
|
+
return gdf
|
|
128
123
|
|
|
129
124
|
|
|
130
|
-
def get_intersections(
|
|
125
|
+
def get_intersections(
|
|
126
|
+
gdf: GeoDataFrame, geom_type: str | None = None, keep_geom_type: bool = True
|
|
127
|
+
) -> GeoDataFrame:
|
|
131
128
|
"""Find geometries that intersect in a GeoDataFrame.
|
|
132
129
|
|
|
133
130
|
Does an intersection with itself and keeps only the geometries that appear
|
|
@@ -140,6 +137,11 @@ def get_intersections(gdf: GeoDataFrame, geom_type: str | None = None) -> GeoDat
|
|
|
140
137
|
|
|
141
138
|
Args:
|
|
142
139
|
gdf: GeoDataFrame of polygons.
|
|
140
|
+
geom_type: Optionally specify which geometry type to keep.
|
|
141
|
+
Either "polygon", "line" or "point".
|
|
142
|
+
keep_geom_type: Whether to keep the original geometry type.
|
|
143
|
+
If mixed geometry types and keep_geom_type=True,
|
|
144
|
+
an exception is raised.
|
|
143
145
|
|
|
144
146
|
Returns:
|
|
145
147
|
A GeoDataFrame of the overlapping polygons.
|
|
@@ -197,20 +199,27 @@ def get_intersections(gdf: GeoDataFrame, geom_type: str | None = None) -> GeoDat
|
|
|
197
199
|
"""
|
|
198
200
|
if isinstance(gdf, GeoSeries):
|
|
199
201
|
gdf = GeoDataFrame({"geometry": gdf}, crs=gdf.crs)
|
|
202
|
+
was_geoseries = True
|
|
203
|
+
else:
|
|
204
|
+
was_geoseries = False
|
|
200
205
|
|
|
201
206
|
idx_name = gdf.index.name
|
|
202
207
|
gdf = gdf.assign(orig_idx=gdf.index).reset_index(drop=True)
|
|
203
208
|
|
|
204
|
-
duplicated_geoms = _get_intersecting_geometries(
|
|
205
|
-
|
|
206
|
-
)
|
|
209
|
+
duplicated_geoms = _get_intersecting_geometries(
|
|
210
|
+
gdf, geom_type, keep_geom_type
|
|
211
|
+
).pipe(clean_geoms)
|
|
207
212
|
|
|
208
213
|
duplicated_geoms.index = duplicated_geoms["orig_idx"].values
|
|
209
214
|
duplicated_geoms.index.name = idx_name
|
|
215
|
+
if was_geoseries:
|
|
216
|
+
return duplicated_geoms.geometry
|
|
210
217
|
return duplicated_geoms.drop(columns="orig_idx")
|
|
211
218
|
|
|
212
219
|
|
|
213
|
-
def _get_intersecting_geometries(
|
|
220
|
+
def _get_intersecting_geometries(
|
|
221
|
+
gdf: GeoDataFrame, geom_type, keep_geom_type
|
|
222
|
+
) -> GeoDataFrame:
|
|
214
223
|
right = gdf[[gdf._geometry_column_name]]
|
|
215
224
|
right["idx_right"] = right.index
|
|
216
225
|
|
|
@@ -221,9 +230,22 @@ def _get_intersecting_geometries(gdf: GeoDataFrame, geom_type) -> GeoDataFrame:
|
|
|
221
230
|
)
|
|
222
231
|
left["idx_left"] = left.index
|
|
223
232
|
|
|
224
|
-
|
|
233
|
+
def are_not_identical(df):
|
|
234
|
+
return df["idx_left"] != df["idx_right"]
|
|
225
235
|
|
|
226
|
-
if geom_type
|
|
236
|
+
if geom_type or get_geom_type(gdf) != "mixed":
|
|
237
|
+
intersected = clean_overlay(
|
|
238
|
+
left,
|
|
239
|
+
right,
|
|
240
|
+
how="intersection",
|
|
241
|
+
geom_type=geom_type,
|
|
242
|
+
keep_geom_type=keep_geom_type,
|
|
243
|
+
).loc[are_not_identical]
|
|
244
|
+
else:
|
|
245
|
+
if keep_geom_type:
|
|
246
|
+
raise ValueError(
|
|
247
|
+
"Cannot set keep_geom_type=True when the geom_type is mixed."
|
|
248
|
+
)
|
|
227
249
|
gdf = make_all_singlepart(gdf)
|
|
228
250
|
intersected = []
|
|
229
251
|
for geom_type in ["polygon", "line", "point"]:
|
|
@@ -232,11 +254,7 @@ def _get_intersecting_geometries(gdf: GeoDataFrame, geom_type) -> GeoDataFrame:
|
|
|
232
254
|
intersected += [
|
|
233
255
|
clean_overlay(left, right, how="intersection", geom_type=geom_type)
|
|
234
256
|
]
|
|
235
|
-
intersected = pd.concat(intersected, ignore_index=True).loc[
|
|
236
|
-
else:
|
|
237
|
-
intersected = clean_overlay(
|
|
238
|
-
left, right, how="intersection", geom_type=geom_type
|
|
239
|
-
).loc[not_identical]
|
|
257
|
+
intersected = pd.concat(intersected, ignore_index=True).loc[are_not_identical]
|
|
240
258
|
|
|
241
259
|
# make sure it's correct by sjoining a point inside the polygons
|
|
242
260
|
points_joined = intersected.representative_point().to_frame().sjoin(intersected)
|
sgis/geopandas_tools/general.py
CHANGED
|
@@ -25,6 +25,12 @@ from shapely.ops import unary_union
|
|
|
25
25
|
from .geometry_types import get_geom_type, make_all_singlepart, to_single_geom_type
|
|
26
26
|
|
|
27
27
|
|
|
28
|
+
def split_geom_types(gdf: GeoDataFrame | GeoSeries) -> tuple[GeoDataFrame | GeoSeries]:
|
|
29
|
+
return tuple(
|
|
30
|
+
gdf.loc[gdf.geom_type == geom_type] for geom_type in gdf.geom_type.unique()
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
28
34
|
def get_common_crs(
|
|
29
35
|
iterable: Iterable[Hashable], strict: bool = False
|
|
30
36
|
) -> pyproj.CRS | None:
|
|
@@ -453,6 +459,9 @@ def to_lines(*gdfs: GeoDataFrame, copy: bool = True) -> GeoDataFrame:
|
|
|
453
459
|
>>> sg.qtm(lines, "l")
|
|
454
460
|
"""
|
|
455
461
|
|
|
462
|
+
if not all(isinstance(gdf, (GeoSeries, GeoDataFrame)) for gdf in gdfs):
|
|
463
|
+
raise TypeError("gdf must be GeoDataFrame or GeoSeries")
|
|
464
|
+
|
|
456
465
|
if any(gdf.geom_type.isin(["Point", "MultiPoint"]).any() for gdf in gdfs):
|
|
457
466
|
raise ValueError("Cannot convert points to lines.")
|
|
458
467
|
|
|
@@ -486,7 +495,12 @@ def to_lines(*gdfs: GeoDataFrame, copy: bool = True) -> GeoDataFrame:
|
|
|
486
495
|
if copy:
|
|
487
496
|
gdf = gdf.copy()
|
|
488
497
|
|
|
489
|
-
|
|
498
|
+
mapped = gdf.geometry.map(_shapely_geometry_to_lines)
|
|
499
|
+
try:
|
|
500
|
+
gdf.geometry = mapped
|
|
501
|
+
except AttributeError:
|
|
502
|
+
# geoseries
|
|
503
|
+
gdf.loc[:] = mapped
|
|
490
504
|
|
|
491
505
|
gdf = to_single_geom_type(gdf, "line")
|
|
492
506
|
|
|
@@ -97,6 +97,9 @@ def get_neighbor_indices(
|
|
|
97
97
|
if gdf.crs != neighbors.crs:
|
|
98
98
|
raise ValueError(f"'crs' mismatch. Got {gdf.crs} and {neighbors.crs}")
|
|
99
99
|
|
|
100
|
+
if isinstance(neighbors, GeoSeries):
|
|
101
|
+
neighbors = neighbors.to_frame()
|
|
102
|
+
|
|
100
103
|
# buffer and keep only geometry column
|
|
101
104
|
if max_distance and predicate != "nearest":
|
|
102
105
|
gdf = gdf.buffer(max_distance).to_frame()
|
|
@@ -116,6 +119,15 @@ def get_neighbor_indices(
|
|
|
116
119
|
return joined["neighbor_index"]
|
|
117
120
|
|
|
118
121
|
|
|
122
|
+
def get_neighbor_dfs(
|
|
123
|
+
df: GeoDataFrame | DataFrame,
|
|
124
|
+
neighbor_mapper: Series,
|
|
125
|
+
) -> list[GeoDataFrame | DataFrame]:
|
|
126
|
+
return [
|
|
127
|
+
df[df.index.isin(neighbor_mapper[i])] for i in neighbor_mapper.index.unique()
|
|
128
|
+
]
|
|
129
|
+
|
|
130
|
+
|
|
119
131
|
def get_all_distances(
|
|
120
132
|
gdf: GeoDataFrame | GeoSeries, neighbors: GeoDataFrame | GeoSeries
|
|
121
133
|
) -> DataFrame:
|
sgis/geopandas_tools/overlay.py
CHANGED
|
@@ -28,6 +28,11 @@ from .general import clean_geoms
|
|
|
28
28
|
from .geometry_types import get_geom_type, make_all_singlepart, to_single_geom_type
|
|
29
29
|
|
|
30
30
|
|
|
31
|
+
DEFAULT_GRID_SIZE = None
|
|
32
|
+
DEFAULT_LSUFFIX = "_1"
|
|
33
|
+
DEFAULT_RSUFFIX = "_2"
|
|
34
|
+
|
|
35
|
+
|
|
31
36
|
def clean_overlay(
|
|
32
37
|
df1: GeoDataFrame,
|
|
33
38
|
df2: GeoDataFrame,
|
|
@@ -35,8 +40,8 @@ def clean_overlay(
|
|
|
35
40
|
keep_geom_type: bool = True,
|
|
36
41
|
geom_type: str | None = None,
|
|
37
42
|
grid_size: float | None = None,
|
|
38
|
-
lsuffix: str =
|
|
39
|
-
rsuffix: str =
|
|
43
|
+
lsuffix: str = DEFAULT_LSUFFIX,
|
|
44
|
+
rsuffix: str = DEFAULT_RSUFFIX,
|
|
40
45
|
) -> GeoDataFrame:
|
|
41
46
|
"""Fixes and explodes geometries before doing a shapely overlay, then cleans up.
|
|
42
47
|
|
|
@@ -132,18 +137,22 @@ def clean_overlay(
|
|
|
132
137
|
df1 = DataFrame(df1).reset_index(drop=True)
|
|
133
138
|
df2 = DataFrame(df2).reset_index(drop=True)
|
|
134
139
|
|
|
135
|
-
overlayed =
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
140
|
+
overlayed = (
|
|
141
|
+
gpd.GeoDataFrame(
|
|
142
|
+
_shapely_pd_overlay(
|
|
143
|
+
df1,
|
|
144
|
+
df2,
|
|
145
|
+
how=how,
|
|
146
|
+
grid_size=grid_size,
|
|
147
|
+
lsuffix=lsuffix,
|
|
148
|
+
rsuffix=rsuffix,
|
|
149
|
+
),
|
|
150
|
+
geometry="geometry",
|
|
151
|
+
crs=crs,
|
|
152
|
+
)
|
|
153
|
+
.pipe(clean_geoms)
|
|
154
|
+
.pipe(make_all_singlepart, ignore_index=True)
|
|
155
|
+
)
|
|
147
156
|
|
|
148
157
|
if keep_geom_type:
|
|
149
158
|
overlayed = to_single_geom_type(overlayed, geom_type)
|
|
@@ -200,9 +209,9 @@ def _shapely_pd_overlay(
|
|
|
200
209
|
df1: DataFrame,
|
|
201
210
|
df2: DataFrame,
|
|
202
211
|
how: str,
|
|
203
|
-
grid_size: float,
|
|
204
|
-
lsuffix,
|
|
205
|
-
rsuffix,
|
|
212
|
+
grid_size: float = DEFAULT_GRID_SIZE,
|
|
213
|
+
lsuffix=DEFAULT_LSUFFIX,
|
|
214
|
+
rsuffix=DEFAULT_RSUFFIX,
|
|
206
215
|
) -> DataFrame:
|
|
207
216
|
if not grid_size and not len(df1) or not len(df2):
|
|
208
217
|
return _no_intersections_return(df1, df2, how, lsuffix, rsuffix)
|