ssb-sgis 1.1.1__py3-none-any.whl → 1.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sgis/__init__.py +1 -0
- sgis/conf.py +62 -6
- sgis/geopandas_tools/cleaning.py +583 -1577
- sgis/geopandas_tools/duplicates.py +17 -3
- sgis/helpers.py +22 -0
- sgis/io/__init__.py +6 -0
- sgis/io/dapla_functions.py +273 -101
- sgis/maps/explore.py +23 -5
- {ssb_sgis-1.1.1.dist-info → ssb_sgis-1.1.2.dist-info}/METADATA +1 -1
- {ssb_sgis-1.1.1.dist-info → ssb_sgis-1.1.2.dist-info}/RECORD +12 -11
- {ssb_sgis-1.1.1.dist-info → ssb_sgis-1.1.2.dist-info}/LICENSE +0 -0
- {ssb_sgis-1.1.1.dist-info → ssb_sgis-1.1.2.dist-info}/WHEEL +0 -0
sgis/geopandas_tools/cleaning.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
|
|
1
|
+
import re
|
|
2
2
|
import warnings
|
|
3
|
-
from collections.abc import Callable
|
|
4
3
|
from typing import Any
|
|
5
4
|
|
|
6
5
|
import numpy as np
|
|
@@ -8,90 +7,62 @@ import pandas as pd
|
|
|
8
7
|
import shapely
|
|
9
8
|
from geopandas import GeoDataFrame
|
|
10
9
|
from geopandas import GeoSeries
|
|
10
|
+
from geopandas.array import GeometryArray
|
|
11
11
|
from numpy.typing import NDArray
|
|
12
|
-
from shapely import Geometry
|
|
13
|
-
from shapely import STRtree
|
|
14
12
|
from shapely import extract_unique_points
|
|
13
|
+
from shapely import force_2d
|
|
15
14
|
from shapely import get_coordinates
|
|
15
|
+
from shapely import get_exterior_ring
|
|
16
|
+
from shapely import get_parts
|
|
16
17
|
from shapely import linearrings
|
|
18
|
+
from shapely import linestrings
|
|
19
|
+
from shapely import make_valid
|
|
17
20
|
from shapely import polygons
|
|
18
21
|
from shapely.errors import GEOSException
|
|
19
22
|
from shapely.geometry import LinearRing
|
|
20
23
|
from shapely.geometry import LineString
|
|
21
24
|
from shapely.geometry import Point
|
|
22
25
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
"""Placeholder."""
|
|
29
|
-
|
|
30
|
-
@staticmethod
|
|
31
|
-
def njit(func) -> Callable:
|
|
32
|
-
"""Placeholder that does nothing."""
|
|
33
|
-
|
|
34
|
-
def wrapper(*args, **kwargs):
|
|
35
|
-
return func(*args, **kwargs)
|
|
36
|
-
|
|
37
|
-
return wrapper
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
from ..debug_config import _DEBUG_CONFIG
|
|
41
|
-
from ..maps.maps import explore
|
|
42
|
-
from .conversion import to_gdf
|
|
26
|
+
from ..networkanalysis.closing_network_holes import get_angle
|
|
27
|
+
from .buffer_dissolve_explode import buff
|
|
28
|
+
from .buffer_dissolve_explode import dissexp
|
|
29
|
+
from .buffer_dissolve_explode import dissexp_by_cluster
|
|
30
|
+
from .conversion import coordinate_array
|
|
43
31
|
from .conversion import to_geoseries
|
|
32
|
+
from .duplicates import get_intersections
|
|
44
33
|
from .duplicates import update_geometries
|
|
45
34
|
from .general import clean_geoms
|
|
35
|
+
from .general import sort_large_first
|
|
36
|
+
from .general import sort_small_first
|
|
37
|
+
from .general import to_lines
|
|
46
38
|
from .geometry_types import make_all_singlepart
|
|
47
|
-
from .geometry_types import to_single_geom_type
|
|
48
39
|
from .overlay import clean_overlay
|
|
40
|
+
from .polygon_operations import close_all_holes
|
|
49
41
|
from .polygon_operations import eliminate_by_longest
|
|
50
|
-
from .polygon_operations import
|
|
42
|
+
from .polygon_operations import get_cluster_mapper
|
|
43
|
+
from .polygon_operations import get_gaps
|
|
51
44
|
from .polygons_as_rings import PolygonsAsRings
|
|
52
45
|
from .sfilter import sfilter
|
|
53
|
-
from .sfilter import sfilter_inverse
|
|
54
46
|
|
|
55
47
|
warnings.simplefilter(action="ignore", category=UserWarning)
|
|
56
48
|
warnings.simplefilter(action="ignore", category=RuntimeWarning)
|
|
57
49
|
|
|
58
50
|
|
|
59
|
-
PRECISION = 1e-
|
|
51
|
+
PRECISION = 1e-4
|
|
60
52
|
BUFFER_RES = 50
|
|
61
53
|
|
|
62
54
|
|
|
63
|
-
# def explore(*args, **kwargs):
|
|
64
|
-
# pass
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
# def explore_locals(*args, **kwargs):
|
|
68
|
-
# pass
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
# def no_njit(func):
|
|
72
|
-
# def wrapper(*args, **kwargs):
|
|
73
|
-
# result = func(*args, **kwargs)
|
|
74
|
-
# return result
|
|
75
|
-
|
|
76
|
-
# return wrapper
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
# numba.njit = no_njit
|
|
80
|
-
|
|
81
|
-
|
|
82
55
|
def coverage_clean(
|
|
83
56
|
gdf: GeoDataFrame,
|
|
84
57
|
tolerance: int | float,
|
|
85
|
-
mask
|
|
86
|
-
|
|
87
|
-
|
|
58
|
+
mask=None,
|
|
59
|
+
*,
|
|
60
|
+
duplicate_action: str = "fix",
|
|
61
|
+
grid_sizes: tuple[None | int] = (None,),
|
|
62
|
+
logger=None,
|
|
88
63
|
) -> GeoDataFrame:
|
|
89
64
|
"""Fix thin gaps, holes, slivers and double surfaces.
|
|
90
65
|
|
|
91
|
-
The operations might raise GEOSExceptions, so it might be nessecary to set
|
|
92
|
-
the 'grid_sizes' argument, it might also be a good idea to run coverage_clean
|
|
93
|
-
twice to fill gaps resulting from these GEOSExceptions.
|
|
94
|
-
|
|
95
66
|
Rules:
|
|
96
67
|
- Holes (interiors) thinner than the tolerance are closed.
|
|
97
68
|
- Gaps between polygons are filled if thinner than the tolerance.
|
|
@@ -99,9 +70,9 @@ def coverage_clean(
|
|
|
99
70
|
into the neighbor polygon with the longest shared border.
|
|
100
71
|
- Double surfaces thinner than the tolerance are eliminated.
|
|
101
72
|
If duplicate_action is "fix", thicker double surfaces will
|
|
102
|
-
be updated.
|
|
103
|
-
- Line and point geometries are removed
|
|
104
|
-
- MultiPolygons
|
|
73
|
+
be updated from top to bottom of the GeoDataFrame's rows.
|
|
74
|
+
- Line and point geometries are removed.
|
|
75
|
+
- MultiPolygons are exploded to Polygons.
|
|
105
76
|
- Index is reset.
|
|
106
77
|
|
|
107
78
|
Args:
|
|
@@ -110,1625 +81,660 @@ def coverage_clean(
|
|
|
110
81
|
for polygons to be eliminated. Any gap, hole, sliver or double
|
|
111
82
|
surface that are empty after a negative buffer of tolerance / 2
|
|
112
83
|
are eliminated into the neighbor with the longest shared border.
|
|
113
|
-
mask:
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
84
|
+
mask: Unused.
|
|
85
|
+
duplicate_action: Either "fix", "error" or "ignore".
|
|
86
|
+
If "fix" (default), double surfaces thicker than the
|
|
87
|
+
tolerance will be updated from top to bottom (function update_geometries)
|
|
88
|
+
and then dissolved into the neighbor polygon with the longest shared border.
|
|
89
|
+
If "error", an Exception is raised if there are any double surfaces thicker
|
|
90
|
+
than the tolerance. If "ignore", double surfaces are kept as is.
|
|
91
|
+
grid_sizes: One or more grid_sizes used in overlay and dissolve operations that
|
|
92
|
+
might raise a GEOSException. Defaults to (None,), meaning no grid_sizes.
|
|
93
|
+
logger: Optional.
|
|
117
94
|
|
|
118
95
|
Returns:
|
|
119
96
|
A GeoDataFrame with cleaned polygons.
|
|
97
|
+
|
|
120
98
|
"""
|
|
121
99
|
if not len(gdf):
|
|
122
100
|
return gdf
|
|
123
101
|
|
|
124
|
-
|
|
102
|
+
_cleaning_checks(gdf, tolerance, duplicate_action)
|
|
125
103
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
# more_than_one, gdf._geometry_column_name
|
|
129
|
-
# ].apply(_unary_union_for_notna)
|
|
130
|
-
|
|
131
|
-
if mask is not None:
|
|
132
|
-
try:
|
|
133
|
-
mask: GeoDataFrame = mask[["geometry"]].pipe(make_all_singlepart)
|
|
134
|
-
except Exception:
|
|
135
|
-
mask: GeoDataFrame = (
|
|
136
|
-
to_geoseries(mask).to_frame("geometry").pipe(make_all_singlepart)
|
|
137
|
-
)
|
|
104
|
+
if not gdf.index.is_unique:
|
|
105
|
+
gdf = gdf.reset_index(drop=True)
|
|
138
106
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
# ).pipe(make_all_singlepart)
|
|
142
|
-
# mask = GeoDataFrame(
|
|
143
|
-
# {
|
|
144
|
-
# "geometry": [
|
|
145
|
-
# mask.union_all()
|
|
146
|
-
# .buffer(
|
|
147
|
-
# PRECISION,
|
|
148
|
-
# resolution=1,
|
|
149
|
-
# join_style=2,
|
|
150
|
-
# )
|
|
151
|
-
# .buffer(
|
|
152
|
-
# -PRECISION,
|
|
153
|
-
# resolution=1,
|
|
154
|
-
# join_style=2,
|
|
155
|
-
# )
|
|
156
|
-
# ]
|
|
157
|
-
# },
|
|
158
|
-
# crs=gdf.crs,
|
|
159
|
-
# ).pipe(make_all_singlepart)
|
|
160
|
-
# # gaps = shapely.union_all(get_gaps(mask).geometry.values)
|
|
161
|
-
# # mask = shapely.get_parts(extract_unique_points(mask.geometry.values))
|
|
162
|
-
# # not_by_gaps = shapely.distance(mask, gaps) > PRECISION
|
|
163
|
-
# # mask = GeoDataFrame({"geometry": mask[not_by_gaps]})
|
|
164
|
-
|
|
165
|
-
gdf = snap_polygons(gdf, tolerance, mask=mask, snap_to_anchors=snap_to_anchors)
|
|
166
|
-
|
|
167
|
-
if mask is not None:
|
|
168
|
-
missing_from_mask = clean_overlay(
|
|
169
|
-
mask, gdf, how="difference", geom_type="polygon"
|
|
170
|
-
).loc[lambda x: x.buffer(-tolerance + PRECISION).is_empty]
|
|
171
|
-
gdf, _ = eliminate_by_longest(gdf, missing_from_mask)
|
|
172
|
-
|
|
173
|
-
missing_from_gdf = sfilter_inverse(gdf_original, gdf.buffer(-PRECISION)).loc[
|
|
174
|
-
lambda x: (~x.buffer(-PRECISION).is_empty)
|
|
107
|
+
gdf = make_all_singlepart(gdf).loc[
|
|
108
|
+
lambda x: x.geom_type.isin(["Polygon", "MultiPolygon"])
|
|
175
109
|
]
|
|
176
|
-
|
|
177
|
-
|
|
110
|
+
|
|
111
|
+
gdf = safe_simplify(gdf, PRECISION)
|
|
112
|
+
|
|
113
|
+
gdf = (
|
|
114
|
+
clean_geoms(gdf)
|
|
115
|
+
.pipe(make_all_singlepart)
|
|
116
|
+
.loc[lambda x: x.geom_type.isin(["Polygon", "MultiPolygon"])]
|
|
178
117
|
)
|
|
179
118
|
|
|
119
|
+
try:
|
|
120
|
+
gaps = get_gaps(gdf, include_interiors=True)
|
|
121
|
+
except GEOSException:
|
|
122
|
+
for i, grid_size in enumerate(grid_sizes):
|
|
123
|
+
try:
|
|
124
|
+
gaps = get_gaps(gdf, include_interiors=True, grid_size=grid_size)
|
|
125
|
+
if grid_size:
|
|
126
|
+
# in order to not get more gaps
|
|
127
|
+
gaps.geometry = gaps.buffer(grid_size)
|
|
128
|
+
break
|
|
129
|
+
except GEOSException as e:
|
|
130
|
+
if i == len(grid_sizes) - 1:
|
|
131
|
+
explore_geosexception(e, gdf, logger=logger)
|
|
132
|
+
raise e
|
|
180
133
|
|
|
181
|
-
|
|
182
|
-
gdf: GeoDataFrame,
|
|
183
|
-
tolerance: int | float,
|
|
184
|
-
mask: GeoDataFrame | GeoSeries | Geometry | None = None,
|
|
185
|
-
snap_to_anchors: bool = True,
|
|
186
|
-
) -> GeoDataFrame:
|
|
187
|
-
if not len(gdf):
|
|
188
|
-
return gdf.copy()
|
|
134
|
+
gaps["_was_gap"] = 1
|
|
189
135
|
|
|
190
|
-
|
|
136
|
+
if duplicate_action == "ignore":
|
|
137
|
+
double = GeoDataFrame({"geometry": []}, crs=gdf.crs)
|
|
138
|
+
double["_double_idx"] = None
|
|
139
|
+
else:
|
|
140
|
+
double = get_intersections(gdf)
|
|
141
|
+
double["_double_idx"] = range(len(double))
|
|
191
142
|
|
|
192
|
-
|
|
143
|
+
gdf, slivers = split_out_slivers(gdf, tolerance)
|
|
193
144
|
|
|
194
|
-
gdf = (
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
.
|
|
145
|
+
gdf["_poly_idx"] = range(len(gdf))
|
|
146
|
+
|
|
147
|
+
thin_gaps_and_double = pd.concat([gaps, double]).loc[
|
|
148
|
+
lambda x: x.buffer(-tolerance / 2).is_empty
|
|
149
|
+
]
|
|
150
|
+
|
|
151
|
+
all_are_thin = double["_double_idx"].isin(thin_gaps_and_double["_double_idx"]).all()
|
|
152
|
+
|
|
153
|
+
if not all_are_thin and duplicate_action == "fix":
|
|
154
|
+
gdf, thin_gaps_and_double, slivers = _properly_fix_duplicates(
|
|
155
|
+
gdf, double, slivers, thin_gaps_and_double, tolerance
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
elif not all_are_thin and duplicate_action == "error":
|
|
159
|
+
raise ValueError("Large double surfaces.")
|
|
160
|
+
|
|
161
|
+
to_eliminate = pd.concat([thin_gaps_and_double, slivers], ignore_index=True)
|
|
162
|
+
to_eliminate = safe_simplify(to_eliminate, PRECISION)
|
|
163
|
+
|
|
164
|
+
to_eliminate = to_eliminate.loc[lambda x: ~x.buffer(-PRECISION / 10).is_empty]
|
|
165
|
+
|
|
166
|
+
to_eliminate = try_for_grid_size(
|
|
167
|
+
split_by_neighbors,
|
|
168
|
+
grid_sizes=grid_sizes,
|
|
169
|
+
args=(to_eliminate, gdf),
|
|
170
|
+
kwargs=dict(tolerance=tolerance),
|
|
198
171
|
)
|
|
199
172
|
|
|
200
|
-
|
|
173
|
+
to_eliminate["_eliminate_idx"] = range(len(to_eliminate))
|
|
201
174
|
|
|
202
|
-
|
|
203
|
-
# gdf = gdf[lambda x: ~x.buffer(-tolerance / 3).is_empty]
|
|
175
|
+
to_eliminate["_cluster"] = get_cluster_mapper(to_eliminate.buffer(PRECISION))
|
|
204
176
|
|
|
205
|
-
|
|
206
|
-
# gdf.geometry.buffer(tolerance / 2, resolution=1, join_style=2)
|
|
207
|
-
# .buffer(-tolerance, resolution=1, join_style=2)
|
|
208
|
-
# .buffer(tolerance / 2, resolution=1, join_style=2)
|
|
209
|
-
# .pipe(to_lines)
|
|
210
|
-
# .buffer(tolerance)
|
|
211
|
-
# )
|
|
177
|
+
gdf_geoms_idx = gdf[["_poly_idx", "geometry"]]
|
|
212
178
|
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
mask=mask,
|
|
220
|
-
snap_to_anchors=snap_to_anchors,
|
|
179
|
+
poly_idx_mapper: pd.Series = (
|
|
180
|
+
clean_overlay(
|
|
181
|
+
buff(
|
|
182
|
+
to_eliminate[["_eliminate_idx", "geometry"]],
|
|
183
|
+
tolerance,
|
|
184
|
+
resolution=BUFFER_RES,
|
|
221
185
|
),
|
|
186
|
+
gdf_geoms_idx,
|
|
187
|
+
geom_type="polygon",
|
|
222
188
|
)
|
|
223
|
-
.
|
|
189
|
+
.pipe(sort_large_first)
|
|
190
|
+
.drop_duplicates("_eliminate_idx")
|
|
191
|
+
.set_index("_eliminate_idx")["_poly_idx"]
|
|
224
192
|
)
|
|
225
193
|
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
194
|
+
to_eliminate["_poly_idx"] = to_eliminate["_eliminate_idx"].map(poly_idx_mapper)
|
|
195
|
+
isolated = to_eliminate[lambda x: x["_poly_idx"].isna()]
|
|
196
|
+
intersecting = to_eliminate[lambda x: x["_poly_idx"].notna()]
|
|
197
|
+
|
|
198
|
+
for i, grid_size in enumerate(grid_sizes):
|
|
199
|
+
try:
|
|
200
|
+
without_double = update_geometries(
|
|
201
|
+
intersecting, geom_type="polygon", grid_size=grid_size
|
|
202
|
+
).drop(columns=["_eliminate_idx", "_double_idx"])
|
|
203
|
+
break
|
|
204
|
+
except GEOSException as e:
|
|
205
|
+
if i == len(grid_sizes) - 1:
|
|
206
|
+
explore_geosexception(e, gdf, intersecting, isolated, logger=logger)
|
|
207
|
+
raise e
|
|
208
|
+
|
|
209
|
+
not_really_isolated = isolated[["geometry", "_eliminate_idx", "_cluster"]].merge(
|
|
210
|
+
without_double.drop(columns=["geometry"]),
|
|
211
|
+
on="_cluster",
|
|
212
|
+
how="inner",
|
|
230
213
|
)
|
|
231
214
|
|
|
232
|
-
|
|
233
|
-
lambda x: ~x.
|
|
215
|
+
really_isolated = isolated.loc[
|
|
216
|
+
lambda x: ~x["_eliminate_idx"].isin(not_really_isolated["_eliminate_idx"])
|
|
234
217
|
]
|
|
235
218
|
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
gdf,
|
|
240
|
-
# gdf_orig,
|
|
241
|
-
# thin,
|
|
242
|
-
mask,
|
|
243
|
-
missing,
|
|
244
|
-
mask_p=to_gdf(mask.extract_unique_points().explode()).assign(
|
|
245
|
-
wkt=lambda x: [g.wkt for g in x.geometry]
|
|
246
|
-
),
|
|
247
|
-
gdf_p=to_gdf(gdf.extract_unique_points().explode()).assign(
|
|
248
|
-
wkt=lambda x: [g.wkt for g in x.geometry]
|
|
249
|
-
),
|
|
250
|
-
center=(5.36765872, 59.01199837, 1),
|
|
219
|
+
is_gap = really_isolated["_was_gap"] == 1
|
|
220
|
+
isolated_gaps = really_isolated.loc[is_gap, ["geometry"]].sjoin_nearest(
|
|
221
|
+
gdf, max_distance=PRECISION
|
|
251
222
|
)
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
mask,
|
|
257
|
-
missing,
|
|
258
|
-
mask_p=to_gdf(mask.extract_unique_points().explode()).assign(
|
|
259
|
-
wkt=lambda x: [g.wkt for g in x.geometry]
|
|
260
|
-
),
|
|
261
|
-
gdf_p=to_gdf(gdf.extract_unique_points().explode()).assign(
|
|
262
|
-
wkt=lambda x: [g.wkt for g in x.geometry]
|
|
263
|
-
),
|
|
264
|
-
center=(5.36820681, 59.01182298, 2),
|
|
223
|
+
really_isolated = really_isolated[~is_gap]
|
|
224
|
+
|
|
225
|
+
really_isolated["_poly_idx"] = (
|
|
226
|
+
really_isolated["_cluster"] + gdf["_poly_idx"].max() + 1
|
|
265
227
|
)
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
228
|
+
|
|
229
|
+
for i, grid_size in enumerate(grid_sizes):
|
|
230
|
+
try:
|
|
231
|
+
cleaned = (
|
|
232
|
+
dissexp(
|
|
233
|
+
pd.concat(
|
|
234
|
+
[
|
|
235
|
+
gdf,
|
|
236
|
+
without_double,
|
|
237
|
+
not_really_isolated,
|
|
238
|
+
really_isolated,
|
|
239
|
+
isolated_gaps,
|
|
240
|
+
]
|
|
241
|
+
).drop(
|
|
242
|
+
columns=[
|
|
243
|
+
"_cluster",
|
|
244
|
+
"_was_gap",
|
|
245
|
+
"_eliminate_idx",
|
|
246
|
+
"index_right",
|
|
247
|
+
"_double_idx",
|
|
248
|
+
],
|
|
249
|
+
errors="ignore",
|
|
250
|
+
),
|
|
251
|
+
by="_poly_idx",
|
|
252
|
+
aggfunc="first",
|
|
253
|
+
dropna=True,
|
|
254
|
+
grid_size=grid_size,
|
|
255
|
+
)
|
|
256
|
+
.sort_index()
|
|
257
|
+
.reset_index(drop=True)
|
|
258
|
+
)
|
|
259
|
+
break
|
|
260
|
+
except GEOSException as e:
|
|
261
|
+
if i == len(grid_sizes) - 1:
|
|
262
|
+
explore_geosexception(
|
|
263
|
+
e, gdf, without_double, isolated, really_isolated, logger=logger
|
|
264
|
+
)
|
|
265
|
+
raise e
|
|
266
|
+
|
|
267
|
+
cleaned_area_sum = cleaned.area.sum()
|
|
268
|
+
|
|
269
|
+
for i, grid_size in enumerate(grid_sizes):
|
|
270
|
+
try:
|
|
271
|
+
cleaned = clean_overlay(
|
|
272
|
+
gdf.drop(columns="_poly_idx"),
|
|
273
|
+
cleaned,
|
|
274
|
+
how="update",
|
|
275
|
+
geom_type="polygon",
|
|
276
|
+
)
|
|
277
|
+
break
|
|
278
|
+
except GEOSException as e:
|
|
279
|
+
if i == len(grid_sizes) - 1:
|
|
280
|
+
explore_geosexception(
|
|
281
|
+
e,
|
|
282
|
+
gdf,
|
|
283
|
+
cleaned,
|
|
284
|
+
without_double,
|
|
285
|
+
isolated,
|
|
286
|
+
really_isolated,
|
|
287
|
+
logger=logger,
|
|
288
|
+
)
|
|
289
|
+
raise e
|
|
290
|
+
try:
|
|
291
|
+
cleaned = update_geometries(
|
|
292
|
+
cleaned, geom_type="polygon", grid_size=grid_size
|
|
293
|
+
)
|
|
294
|
+
except GEOSException:
|
|
295
|
+
pass
|
|
296
|
+
|
|
297
|
+
# if logger and cleaned_area_sum > cleaned.area.sum() + 1:
|
|
298
|
+
# print("\ncleaned.area.sum() diff", cleaned_area_sum - cleaned.area.sum())
|
|
299
|
+
# logger.debug("cleaned.area.sum() diff", cleaned_area_sum - cleaned.area.sum())
|
|
300
|
+
|
|
301
|
+
cleaned = sort_large_first(cleaned)
|
|
302
|
+
|
|
303
|
+
for i, grid_size in enumerate(grid_sizes):
|
|
304
|
+
try:
|
|
305
|
+
cleaned = update_geometries(
|
|
306
|
+
cleaned, geom_type="polygon", grid_size=grid_size
|
|
307
|
+
)
|
|
308
|
+
break
|
|
309
|
+
except GEOSException as e:
|
|
310
|
+
cleaned.geometry = shapely.simplify(
|
|
311
|
+
cleaned.geometry, PRECISION * (10 * i + 1)
|
|
312
|
+
)
|
|
313
|
+
if i == len(grid_sizes) - 1:
|
|
314
|
+
explore_geosexception(
|
|
315
|
+
e,
|
|
316
|
+
gdf,
|
|
317
|
+
cleaned,
|
|
318
|
+
without_double,
|
|
319
|
+
isolated,
|
|
320
|
+
really_isolated,
|
|
321
|
+
logger=logger,
|
|
322
|
+
)
|
|
323
|
+
raise e
|
|
324
|
+
|
|
325
|
+
cleaned = safe_simplify(cleaned, PRECISION)
|
|
326
|
+
cleaned.geometry = shapely.make_valid(cleaned.geometry)
|
|
327
|
+
|
|
328
|
+
return cleaned
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def safe_simplify(gdf, tolerance: float | int) -> GeoDataFrame:
|
|
332
|
+
"""Simplify only if the resulting area is no more than 1 percent larger.
|
|
333
|
+
|
|
334
|
+
Because simplifying can result in holes being filled.
|
|
335
|
+
"""
|
|
336
|
+
length_then = gdf.length
|
|
337
|
+
copied = gdf.copy()
|
|
338
|
+
copied.geometry = shapely.make_valid(
|
|
339
|
+
shapely.simplify(copied.geometry.values, tolerance=tolerance)
|
|
279
340
|
)
|
|
280
|
-
|
|
281
|
-
gdf,
|
|
282
|
-
gdf_orig,
|
|
283
|
-
# thin,
|
|
284
|
-
mask,
|
|
285
|
-
missing,
|
|
286
|
-
mask_p=to_gdf(mask.extract_unique_points().explode()).assign(
|
|
287
|
-
wkt=lambda x: [g.wkt for g in x.geometry]
|
|
288
|
-
),
|
|
289
|
-
gdf_p=to_gdf(gdf.extract_unique_points().explode()).assign(
|
|
290
|
-
wkt=lambda x: [g.wkt for g in x.geometry]
|
|
291
|
-
),
|
|
292
|
-
center=(5.36853688, 59.01169013, 5),
|
|
341
|
+
copied.loc[copied.area > length_then * 1.01, copied._geometry_column_name] = (
|
|
342
|
+
gdf.loc[copied.area > length_then * 1.01, copied._geometry_column_name]
|
|
293
343
|
)
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
344
|
+
|
|
345
|
+
return copied
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def remove_spikes(gdf: GeoDataFrame, tolerance: int | float) -> GeoDataFrame:
|
|
349
|
+
"""Remove thin spikes in polygons.
|
|
350
|
+
|
|
351
|
+
Note that this function might be slow. Should only be used if nessecary.
|
|
352
|
+
|
|
353
|
+
Args:
|
|
354
|
+
gdf: GeoDataFrame of polygons
|
|
355
|
+
tolerance: distance (usually meters) used as the minimum thickness
|
|
356
|
+
for polygons to be eliminated. Any spike thinner than the tolerance
|
|
357
|
+
will be removed.
|
|
358
|
+
|
|
359
|
+
Returns:
|
|
360
|
+
A GeoDataFrame of polygons without spikes thinner.
|
|
361
|
+
"""
|
|
362
|
+
gdf.geometry = (
|
|
363
|
+
PolygonsAsRings(gdf.geometry)
|
|
364
|
+
.apply_numpy_func(_remove_spikes, args=(tolerance,))
|
|
365
|
+
.to_numpy()
|
|
307
366
|
)
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
367
|
+
return gdf
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
# def remove_spikes(gdf: GeoDataFrame, tolerance: int | float) -> GeoDataFrame:
|
|
371
|
+
# return clean_overlay(
|
|
372
|
+
# gdf, gdf[["geometry"]], how="intersection", grid_size=tolerance
|
|
373
|
+
# )
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
def _remove_spikes(
|
|
377
|
+
geoms: NDArray[LinearRing], tolerance: int | float
|
|
378
|
+
) -> NDArray[LinearRing]:
|
|
379
|
+
if not len(geoms):
|
|
380
|
+
return geoms
|
|
381
|
+
geoms = to_geoseries(geoms).reset_index(drop=True)
|
|
382
|
+
|
|
383
|
+
points = (
|
|
384
|
+
extract_unique_points(geoms).explode(index_parts=False).to_frame("geometry")
|
|
321
385
|
)
|
|
322
386
|
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
387
|
+
points = get_angle_between_indexed_points(points)
|
|
388
|
+
|
|
389
|
+
def to_buffered_rings_without_spikes(x):
|
|
390
|
+
polys = GeoSeries(make_valid(polygons(get_exterior_ring(x))))
|
|
391
|
+
|
|
392
|
+
return (
|
|
393
|
+
polys.buffer(-tolerance, resolution=BUFFER_RES)
|
|
394
|
+
.explode(index_parts=False)
|
|
395
|
+
.pipe(close_all_holes)
|
|
396
|
+
.pipe(get_exterior_ring)
|
|
397
|
+
.buffer(tolerance * 10)
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
buffered = to_buffered_rings_without_spikes(
|
|
401
|
+
geoms.buffer(tolerance / 2, resolution=BUFFER_RES)
|
|
336
402
|
)
|
|
337
403
|
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
mask,
|
|
343
|
-
mask_p=to_gdf(mask.extract_unique_points().explode()).assign(
|
|
344
|
-
wkt=lambda x: [g.wkt for g in x.geometry]
|
|
345
|
-
),
|
|
346
|
-
gdf_p=to_gdf(gdf.extract_unique_points().explode()).assign(
|
|
347
|
-
wkt=lambda x: [g.wkt for g in x.geometry]
|
|
348
|
-
),
|
|
349
|
-
center=(-52074.0241, 6580847.4464, 0.1),
|
|
350
|
-
max_zoom=40,
|
|
404
|
+
points_without_spikes = (
|
|
405
|
+
extract_unique_points(geoms)
|
|
406
|
+
.explode(index_parts=False)
|
|
407
|
+
.loc[lambda x: x.index.isin(sfilter(x, buffered).index)]
|
|
351
408
|
)
|
|
352
409
|
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
410
|
+
# linearrings require at least 4 coordinate pairs, or three unique
|
|
411
|
+
points_without_spikes = points_without_spikes.loc[
|
|
412
|
+
lambda x: x.groupby(level=0).size() >= 3
|
|
413
|
+
]
|
|
414
|
+
|
|
415
|
+
# need an index from 0 to n-1 in 'linearrings'
|
|
416
|
+
to_int_index = {
|
|
417
|
+
ring_idx: i
|
|
418
|
+
for i, ring_idx in enumerate(sorted(set(points_without_spikes.index)))
|
|
419
|
+
}
|
|
420
|
+
int_indices = points_without_spikes.index.map(to_int_index)
|
|
421
|
+
|
|
422
|
+
as_lines = pd.Series(
|
|
423
|
+
linearrings(
|
|
424
|
+
get_coordinates(points_without_spikes.geometry.values),
|
|
425
|
+
indices=int_indices,
|
|
363
426
|
),
|
|
364
|
-
|
|
365
|
-
max_zoom=40,
|
|
427
|
+
index=points_without_spikes.index.unique(),
|
|
366
428
|
)
|
|
367
429
|
|
|
368
|
-
#
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
# updated=update_geometries(gdf, geom_type="polygon"),
|
|
375
|
-
# # browser=False,
|
|
376
|
-
# )
|
|
377
|
-
|
|
378
|
-
# gdf = update_geometries(gdf, geom_type="polygon")
|
|
379
|
-
|
|
380
|
-
return gdf # .pipe(clean_clip, mask, geom_type="polygon")
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
# @numba.njit
|
|
384
|
-
def _snap_to_anchors(
|
|
385
|
-
geoms,
|
|
386
|
-
indices: NDArray[np.int32],
|
|
387
|
-
anchors,
|
|
388
|
-
anchor_indices,
|
|
389
|
-
mask,
|
|
390
|
-
mask_indices,
|
|
391
|
-
was_midpoint,
|
|
392
|
-
was_midpoint_mask,
|
|
393
|
-
tolerance: int | float,
|
|
394
|
-
) -> tuple[NDArray, NDArray, NDArray]:
|
|
395
|
-
|
|
396
|
-
coords, all_distances = _snap_to_anchors_inner(
|
|
397
|
-
geoms,
|
|
398
|
-
indices,
|
|
399
|
-
anchors,
|
|
400
|
-
anchor_indices,
|
|
401
|
-
mask,
|
|
402
|
-
mask_indices,
|
|
403
|
-
was_midpoint,
|
|
404
|
-
was_midpoint_mask,
|
|
405
|
-
tolerance,
|
|
430
|
+
# the missing polygons are thin and/or spiky. Let's remove them
|
|
431
|
+
missing = geoms.loc[~geoms.index.isin(as_lines.index)]
|
|
432
|
+
|
|
433
|
+
missing = pd.Series(
|
|
434
|
+
[None] * len(missing),
|
|
435
|
+
index=missing.index.values,
|
|
406
436
|
)
|
|
407
437
|
|
|
408
|
-
|
|
409
|
-
all_distances = all_distances[not_inf]
|
|
410
|
-
indices = indices[not_inf]
|
|
411
|
-
coords = coords[not_inf]
|
|
412
|
-
|
|
413
|
-
is_snapped = np.full(len(coords), False)
|
|
414
|
-
|
|
415
|
-
n_coords = len(coords)
|
|
416
|
-
|
|
417
|
-
range_indices = np.arange(len(coords))
|
|
418
|
-
|
|
419
|
-
range_index = -1
|
|
420
|
-
for index in np.unique(indices):
|
|
421
|
-
cond = indices == index
|
|
422
|
-
these_coords = coords[cond]
|
|
423
|
-
|
|
424
|
-
# explore(ll=to_gdf(LineString(shapely.points(these_coords)), 25833))
|
|
425
|
-
|
|
426
|
-
# assert np.array_equal(these_coords[0], these_coords[-1]), these_coords
|
|
427
|
-
|
|
428
|
-
these_range_indices = range_indices[cond]
|
|
429
|
-
these_distances = all_distances[cond]
|
|
430
|
-
for i in range(len(these_coords)):
|
|
431
|
-
range_index += 1
|
|
432
|
-
if is_snapped[range_index]:
|
|
433
|
-
print(i, "000")
|
|
434
|
-
continue
|
|
435
|
-
# distances = all_distances[range_index]
|
|
436
|
-
distances = these_distances[i]
|
|
437
|
-
# distances = these_distances[:, i]
|
|
438
|
-
min_dist = np.min(distances)
|
|
439
|
-
if min_dist > tolerance: # or min_dist == 0:
|
|
440
|
-
print(i, "111", min_dist)
|
|
441
|
-
continue
|
|
442
|
-
|
|
443
|
-
is_snapped_now = False
|
|
444
|
-
|
|
445
|
-
for j in np.argsort(distances):
|
|
446
|
-
if distances[j] > tolerance: # TODO or distances[j] == 0:
|
|
447
|
-
break
|
|
448
|
-
|
|
449
|
-
if was_midpoint_mask[j]:
|
|
450
|
-
continue
|
|
451
|
-
|
|
452
|
-
anchor = anchors[j]
|
|
453
|
-
ring = these_coords.copy()
|
|
454
|
-
ring[i] = anchor
|
|
455
|
-
|
|
456
|
-
# snap the nexts points to same anchor if neighboring points have same anchor
|
|
457
|
-
# in order to properly check if the ring will be simple after snapping
|
|
458
|
-
indices_with_same_anchor = [range_index]
|
|
459
|
-
# these_coords = coords[indices==index]
|
|
460
|
-
|
|
461
|
-
pos_counter = 0
|
|
462
|
-
# has_same_anchor_pos = True
|
|
463
|
-
# has_same_anchor_neg = True
|
|
464
|
-
while (
|
|
465
|
-
pos_counter + i < len(these_distances) - 1
|
|
466
|
-
): # has_same_anchor_pos or has_same_anchor_neg:
|
|
467
|
-
pos_counter += 1
|
|
468
|
-
|
|
469
|
-
# if indices[i + pos_counter] != index:
|
|
470
|
-
# break
|
|
471
|
-
# next_distances = all_distances[range_index + pos_counter]
|
|
472
|
-
next_distances = these_distances[i + pos_counter]
|
|
473
|
-
has_same_anchor_pos = False
|
|
474
|
-
for j2 in np.argsort(next_distances):
|
|
475
|
-
if was_midpoint_mask[j2]:
|
|
476
|
-
continue
|
|
477
|
-
if next_distances[j2] > tolerance:
|
|
478
|
-
break
|
|
479
|
-
|
|
480
|
-
has_same_anchor_pos = j2 == j
|
|
481
|
-
# print(
|
|
482
|
-
# "pos c",
|
|
483
|
-
# i,
|
|
484
|
-
# j,
|
|
485
|
-
# j2,
|
|
486
|
-
# pos_counter,
|
|
487
|
-
# has_same_anchor_pos,
|
|
488
|
-
# distances[j],
|
|
489
|
-
# next_distances[j2],
|
|
490
|
-
# )
|
|
491
|
-
break
|
|
492
|
-
if has_same_anchor_pos:
|
|
493
|
-
ring[i + pos_counter] = anchor
|
|
494
|
-
indices_with_same_anchor.append(range_index + pos_counter)
|
|
495
|
-
else:
|
|
496
|
-
break
|
|
497
|
-
|
|
498
|
-
# for j4 in np.arange(
|
|
499
|
-
# indices_with_same_anchor[0], indices_with_same_anchor[-1]
|
|
500
|
-
# ):
|
|
501
|
-
# ring[j4 - range_index + i] = anchor
|
|
502
|
-
# indices_with_same_anchor.append(j4)
|
|
503
|
-
|
|
504
|
-
if i == 0:
|
|
505
|
-
# snap points at the end of the line if same anchor
|
|
506
|
-
neg_counter = 0
|
|
507
|
-
# has_same_anchor_neg = True
|
|
508
|
-
while True: # has_same_anchor_pos or has_same_anchor_neg:
|
|
509
|
-
neg_counter -= 1
|
|
510
|
-
|
|
511
|
-
# if indices[i + pos_counter] != index:
|
|
512
|
-
# break
|
|
513
|
-
this_range_index = these_range_indices[neg_counter]
|
|
514
|
-
# next_distances = all_distances[this_range_index]
|
|
515
|
-
next_distances = these_distances[neg_counter]
|
|
516
|
-
has_same_anchor_neg = False
|
|
517
|
-
for j3 in np.argsort(next_distances):
|
|
518
|
-
if was_midpoint_mask[j3]:
|
|
519
|
-
continue
|
|
520
|
-
if next_distances[j3] > tolerance:
|
|
521
|
-
break
|
|
522
|
-
|
|
523
|
-
has_same_anchor_neg = j3 == j
|
|
524
|
-
# print(
|
|
525
|
-
# "neg c",
|
|
526
|
-
# i,
|
|
527
|
-
# j,
|
|
528
|
-
# j3,
|
|
529
|
-
# pos_counter,
|
|
530
|
-
# # has_same_anchor,
|
|
531
|
-
# distances[j],
|
|
532
|
-
# next_distances[j3],
|
|
533
|
-
# )
|
|
534
|
-
break
|
|
535
|
-
if has_same_anchor_neg:
|
|
536
|
-
ring[neg_counter] = anchor
|
|
537
|
-
indices_with_same_anchor.append(this_range_index)
|
|
538
|
-
else:
|
|
539
|
-
break
|
|
540
|
-
|
|
541
|
-
# for j5 in np.arange(0, indices_with_same_anchor[-1]):
|
|
542
|
-
# ring[j5 - range_index + i] = anchor
|
|
543
|
-
# indices_with_same_anchor.append(j5)
|
|
544
|
-
|
|
545
|
-
indices_with_same_anchor = np.unique(indices_with_same_anchor)
|
|
546
|
-
|
|
547
|
-
line_is_simple: bool = LineString(ring).is_simple
|
|
548
|
-
|
|
549
|
-
# if i in [67, 68, 69, 173, 174, 175, 176, 177]: # or
|
|
550
|
-
if Point(these_coords[i]).intersects(
|
|
551
|
-
to_gdf([12.08375303, 67.50052183], 4326)
|
|
552
|
-
.to_crs(25833)
|
|
553
|
-
.buffer(10)
|
|
554
|
-
.union_all()
|
|
555
|
-
):
|
|
556
|
-
# for xxx, yyy in locals().items():
|
|
557
|
-
# if len(str(yyy)) > 50:
|
|
558
|
-
# continue
|
|
559
|
-
# print(xxx)
|
|
560
|
-
# print(yyy)
|
|
561
|
-
|
|
562
|
-
# print("prev:", was_midpoint_mask[j - 1])
|
|
563
|
-
# print(distances[np.argsort(distances)])
|
|
564
|
-
# print(anchors[np.argsort(distances)])
|
|
565
|
-
# print(ring)
|
|
566
|
-
explore(
|
|
567
|
-
out_coords=to_gdf(
|
|
568
|
-
shapely.linestrings(coords, indices=indices), 25833
|
|
569
|
-
),
|
|
570
|
-
llll=to_gdf(LineString(ring), 25833),
|
|
571
|
-
# this=to_gdf(this),
|
|
572
|
-
# next_=to_gdf(next_),
|
|
573
|
-
# line=to_gdf(LineString(np.array([this, next_])), 25833),
|
|
574
|
-
geom=to_gdf(these_coords[i], 25833),
|
|
575
|
-
prev=to_gdf(these_coords[i - 1], 25833),
|
|
576
|
-
nxt=to_gdf(these_coords[i + 1], 25833),
|
|
577
|
-
nxt2=to_gdf(these_coords[i + 2], 25833),
|
|
578
|
-
anchor=to_gdf(anchor, 25833),
|
|
579
|
-
# browser=True,
|
|
580
|
-
)
|
|
581
|
-
|
|
582
|
-
# print(
|
|
583
|
-
# "line_is_simple", line_is_simple, range_index, i, index, j
|
|
584
|
-
# ) # , j2, j3, x)
|
|
585
|
-
|
|
586
|
-
if not line_is_simple:
|
|
587
|
-
# for j4 in range(len(ring)):
|
|
588
|
-
# this_p = ring[j4]
|
|
589
|
-
# for j5 in range(len(ring)):
|
|
590
|
-
# that_p = ring[j5]
|
|
591
|
-
# dist_ = np.sqrt(
|
|
592
|
-
# (this_p[0] - that_p[0]) ** 2
|
|
593
|
-
# + (this_p[1] - that_p[1]) ** 2
|
|
594
|
-
# )
|
|
595
|
-
# if dist_ > 0 and dist_ < 1e-5:
|
|
596
|
-
# print(this_p)
|
|
597
|
-
# print(that_p)
|
|
598
|
-
# ring[j5] = this_p
|
|
599
|
-
|
|
600
|
-
print(LineString(ring).wkt)
|
|
601
|
-
# explore(
|
|
602
|
-
# out_coords=to_gdf(
|
|
603
|
-
# shapely.linestrings(coords, indices=indices), 25833
|
|
604
|
-
# ),
|
|
605
|
-
# llll=to_gdf(LineString(ring), 25833),
|
|
606
|
-
# # this=to_gdf(this),
|
|
607
|
-
# # next_=to_gdf(next_),
|
|
608
|
-
# # line=to_gdf(LineString(np.array([this, next_])), 25833),
|
|
609
|
-
# geom=to_gdf(these_coords[i], 25833),
|
|
610
|
-
# prev=to_gdf(these_coords[i - 1], 25833),
|
|
611
|
-
# nxt=to_gdf(these_coords[i + 1], 25833),
|
|
612
|
-
# nxt2=to_gdf(these_coords[i + 2], 25833),
|
|
613
|
-
# anchor=to_gdf(anchor, 25833),
|
|
614
|
-
# # browser=True,
|
|
615
|
-
# )
|
|
616
|
-
|
|
617
|
-
line_is_simple: bool = LineString(ring).is_simple
|
|
618
|
-
|
|
619
|
-
if line_is_simple:
|
|
620
|
-
# coords[i] = anchors[j]
|
|
621
|
-
# is_snapped_to[j] = True
|
|
622
|
-
# is_snapped[i] = True
|
|
623
|
-
# explore(
|
|
624
|
-
# out_coords=to_gdf(
|
|
625
|
-
# shapely.linestrings(coords, indices=indices), 25833
|
|
626
|
-
# ),
|
|
627
|
-
# llll=to_gdf(LineString(ring), 25833),
|
|
628
|
-
# # this=to_gdf(this),
|
|
629
|
-
# # next_=to_gdf(next_),
|
|
630
|
-
# # line=to_gdf(LineString(np.array([this, next_])), 25833),
|
|
631
|
-
# anc=to_gdf(anchors[j]),
|
|
632
|
-
# geom=to_gdf(coords[i], 25833),
|
|
633
|
-
# these=to_gdf(coords[i : i + n_points_with_same_anchor ], 25833),
|
|
634
|
-
# prev=to_gdf(coords[i - 1], 25833),
|
|
635
|
-
# prev2=to_gdf(coords[i - 2], 25833),
|
|
636
|
-
# nxt=to_gdf(coords[i + n_points_with_same_anchor + 1], 25833),
|
|
637
|
-
# nxt2=to_gdf(coords[i + n_points_with_same_anchor + 2], 25833),
|
|
638
|
-
# nxt3=to_gdf(coords[i + n_points_with_same_anchor + 3], 25833),
|
|
639
|
-
# )
|
|
640
|
-
# print(coords[i : i + n_points_with_same_anchor + 1])
|
|
641
|
-
for (
|
|
642
|
-
x
|
|
643
|
-
) in indices_with_same_anchor: # range(n_points_with_same_anchor):
|
|
644
|
-
# print(range_index, i, index, j, j2, j3, x)
|
|
645
|
-
coords[x] = anchor # s[j]
|
|
646
|
-
is_snapped[x] = True
|
|
647
|
-
# coords[i + x] = anchors[j]
|
|
648
|
-
# is_snapped[i + x] = True
|
|
649
|
-
# print(coords[i : i + n_points_with_same_anchor + 1])
|
|
650
|
-
|
|
651
|
-
is_snapped_now = True
|
|
652
|
-
break
|
|
653
|
-
# else:
|
|
654
|
-
|
|
655
|
-
if not is_snapped_now:
|
|
656
|
-
coords[range_index] = anchors[np.argmin(distances)]
|
|
657
|
-
# is_snapped_to[np.argmin(distances)] = True
|
|
658
|
-
|
|
659
|
-
if 0 and index == 0: # i > 30 and i < 40:
|
|
660
|
-
print(i)
|
|
661
|
-
explore(
|
|
662
|
-
out_coords=to_gdf(
|
|
663
|
-
shapely.linestrings(coords, indices=indices), 25833
|
|
664
|
-
),
|
|
665
|
-
llll=to_gdf(LineString(ring), 25833),
|
|
666
|
-
pppp=to_gdf(shapely.points(ring), 25833).assign(
|
|
667
|
-
wkt=lambda x: [g.wkt for g in x.geometry]
|
|
668
|
-
),
|
|
669
|
-
# this=to_gdf(this),
|
|
670
|
-
# next_=to_gdf(next_),
|
|
671
|
-
# line=to_gdf(LineString(np.array([this, next_])), 25833),
|
|
672
|
-
anc=to_gdf(anchors[j]).assign(
|
|
673
|
-
wkt=lambda x: [g.wkt for g in x.geometry]
|
|
674
|
-
),
|
|
675
|
-
geom=to_gdf(these_coords[i], 25833).assign(
|
|
676
|
-
wkt=lambda x: [g.wkt for g in x.geometry]
|
|
677
|
-
),
|
|
678
|
-
# these=to_gdf(
|
|
679
|
-
# these_coords[i : i + n_points_with_same_anchor], 25833
|
|
680
|
-
# ).assign(wkt=lambda x: [g.wkt for g in x.geometry]),
|
|
681
|
-
prev=to_gdf(these_coords[i - 1], 25833).assign(
|
|
682
|
-
wkt=lambda x: [g.wkt for g in x.geometry]
|
|
683
|
-
),
|
|
684
|
-
prev2=to_gdf(these_coords[i - 2], 25833).assign(
|
|
685
|
-
wkt=lambda x: [g.wkt for g in x.geometry]
|
|
686
|
-
),
|
|
687
|
-
nxt=to_gdf(these_coords[i + 1], 25833).assign(
|
|
688
|
-
wkt=lambda x: [g.wkt for g in x.geometry]
|
|
689
|
-
),
|
|
690
|
-
nxt2=to_gdf(these_coords[i + 2], 25833).assign(
|
|
691
|
-
wkt=lambda x: [g.wkt for g in x.geometry]
|
|
692
|
-
),
|
|
693
|
-
nxt3=to_gdf(these_coords[i + 3], 25833).assign(
|
|
694
|
-
wkt=lambda x: [g.wkt for g in x.geometry]
|
|
695
|
-
),
|
|
696
|
-
# browser=True,
|
|
697
|
-
# nxt_n=to_gdf(
|
|
698
|
-
# coords[i + n_points_with_same_anchor + 1], 25833
|
|
699
|
-
# ).assign(wkt=lambda x: [g.wkt for g in x.geometry]),
|
|
700
|
-
# nxt_n2=to_gdf(
|
|
701
|
-
# coords[i + n_points_with_same_anchor + 2], 25833
|
|
702
|
-
# ).assign(wkt=lambda x: [g.wkt for g in x.geometry]),
|
|
703
|
-
# nxt_n3=to_gdf(
|
|
704
|
-
# coords[i + n_points_with_same_anchor + 3], 25833
|
|
705
|
-
# ).assign(wkt=lambda x: [g.wkt for g in x.geometry]),
|
|
706
|
-
)
|
|
707
|
-
# if (
|
|
708
|
-
# indices[i] == 48
|
|
709
|
-
# ): # and int(out_coords[i][0]) == 375502 and int(out_coords[i][1]) == 7490104:
|
|
710
|
-
# print(geom, out_coords[i], out_coords[-3:])
|
|
711
|
-
# xxx += 1
|
|
712
|
-
# if xxx > 100 and i >= 2106:
|
|
713
|
-
# print(locals())
|
|
714
|
-
# explore(
|
|
715
|
-
# geom=to_gdf(geom, 25833),
|
|
716
|
-
# out=to_gdf(out_coords[i], 25833),
|
|
717
|
-
# anc=to_gdf(shapely.points(anchors), 25833),
|
|
718
|
-
# llll=to_gdf(
|
|
719
|
-
# shapely.geometry.LineString(
|
|
720
|
-
# np.array(out_coords)[indices[: len(out_coords)] == 48]
|
|
721
|
-
# ),
|
|
722
|
-
# 25833,
|
|
723
|
-
# ),
|
|
724
|
-
# )
|
|
725
|
-
|
|
726
|
-
return coords, indices
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
@numba.njit
|
|
730
|
-
def _snap_to_anchors_inner(
|
|
731
|
-
geoms,
|
|
732
|
-
indices: NDArray[np.int32],
|
|
733
|
-
anchors,
|
|
734
|
-
anchor_indices,
|
|
735
|
-
mask,
|
|
736
|
-
mask_indices,
|
|
737
|
-
was_midpoint,
|
|
738
|
-
was_midpoint_mask,
|
|
739
|
-
tolerance: int | float,
|
|
740
|
-
) -> tuple[NDArray, NDArray, NDArray]:
|
|
741
|
-
# def orientation(p, q, r):
|
|
742
|
-
# # Calculate orientation of the triplet (p, q, r).
|
|
743
|
-
# # 0 -> collinear, 1 -> clockwise, 2 -> counterclockwise
|
|
744
|
-
# val = (q[1] - p[1]) * (r[0] - q[0]) - (q[0] - p[0]) * (r[1] - q[1])
|
|
745
|
-
# if val == 0:
|
|
746
|
-
# return 0
|
|
747
|
-
# return 1 if val > 0 else 2
|
|
748
|
-
|
|
749
|
-
# def on_segment(p, q, r):
|
|
750
|
-
# # Check if point q lies on line segment pr
|
|
751
|
-
# if min(p[0], r[0]) <= q[0] <= max(p[0], r[0]) and min(p[1], r[1]) <= q[
|
|
752
|
-
# 1
|
|
753
|
-
# ] <= max(p[1], r[1]):
|
|
754
|
-
# return True
|
|
755
|
-
# return False
|
|
756
|
-
|
|
757
|
-
# def check_intersection(line1, line2):
|
|
758
|
-
# """
|
|
759
|
-
# Check if two line segments intersect.
|
|
760
|
-
|
|
761
|
-
# Parameters:
|
|
762
|
-
# line1 : np.array : 2x2 array with endpoints of the first line segment [[x1, y1], [x2, y2]]
|
|
763
|
-
# line2 : np.array : 2x2 array with endpoints of the second line segment [[x3, y3], [x4, y4]]
|
|
764
|
-
|
|
765
|
-
# Returns:
|
|
766
|
-
# bool : True if the lines intersect, False otherwise.
|
|
767
|
-
# """
|
|
768
|
-
|
|
769
|
-
# p1, q1 = line1
|
|
770
|
-
# p2, q2 = line2
|
|
771
|
-
|
|
772
|
-
# # Find the four orientations needed for the general and special cases
|
|
773
|
-
# o1 = orientation(p1, q1, p2)
|
|
774
|
-
# o2 = orientation(p1, q1, q2)
|
|
775
|
-
# o3 = orientation(p2, q2, p1)
|
|
776
|
-
# o4 = orientation(p2, q2, q1)
|
|
777
|
-
|
|
778
|
-
# # General case
|
|
779
|
-
# if o1 != o2 and o3 != o4:
|
|
780
|
-
# return True
|
|
781
|
-
|
|
782
|
-
# # Special cases
|
|
783
|
-
# # p1, q1, p2 are collinear and p2 lies on segment p1q1
|
|
784
|
-
# if o1 == 0 and on_segment(p1, p2, q1):
|
|
785
|
-
# return True
|
|
786
|
-
|
|
787
|
-
# # p1, q1, q2 are collinear and q2 lies on segment p1q1
|
|
788
|
-
# if o2 == 0 and on_segment(p1, q2, q1):
|
|
789
|
-
# return True
|
|
790
|
-
|
|
791
|
-
# # p2, q2, p1 are collinear and p1 lies on segment p2q2
|
|
792
|
-
# if o3 == 0 and on_segment(p2, p1, q2):
|
|
793
|
-
# return True
|
|
794
|
-
|
|
795
|
-
# # p2, q2, q1 are collinear and q1 lies on segment p2q2
|
|
796
|
-
# if o4 == 0 and on_segment(p2, q1, q2):
|
|
797
|
-
# return True
|
|
798
|
-
|
|
799
|
-
# return False
|
|
800
|
-
|
|
801
|
-
out_coords = geoms.copy()
|
|
802
|
-
# is_snapped = np.full(len(geoms), False)
|
|
803
|
-
|
|
804
|
-
n_anchors = len(anchors)
|
|
805
|
-
mask_n_minus_1 = len(mask) - 1
|
|
806
|
-
is_snapped_to = np.full(len(anchors), False)
|
|
807
|
-
out_distances = np.full((len(geoms), n_anchors), tolerance * 3)
|
|
808
|
-
|
|
809
|
-
for i in range(len(geoms)):
|
|
810
|
-
# if is_snapped[i]:
|
|
811
|
-
# continue
|
|
812
|
-
geom = geoms[i]
|
|
813
|
-
index = indices[i]
|
|
814
|
-
# if i == 0 or index != indices[i - 1]:
|
|
815
|
-
# i_for_this_index = 0
|
|
816
|
-
# else:
|
|
817
|
-
# i_for_this_index += 1
|
|
818
|
-
|
|
819
|
-
is_snapped = False
|
|
820
|
-
for j in range(len(mask)):
|
|
821
|
-
mask_index = mask_indices[j]
|
|
822
|
-
|
|
823
|
-
is_last = j == mask_n_minus_1 or mask_index != mask_indices[j + 1]
|
|
824
|
-
if is_last:
|
|
825
|
-
continue
|
|
826
|
-
|
|
827
|
-
mask_point0 = mask[j]
|
|
828
|
-
|
|
829
|
-
# if (
|
|
830
|
-
# not mask_is_snapped_to[j]
|
|
831
|
-
# and np.sqrt(
|
|
832
|
-
# (geom[0] - mask_point0[0]) ** 2 + (geom[1] - mask_point0[1]) ** 2
|
|
833
|
-
# )
|
|
834
|
-
# <= tolerance
|
|
835
|
-
# ):
|
|
836
|
-
# out_coords[i] = mask_point0
|
|
837
|
-
# mask_is_snapped_to[j] = True
|
|
838
|
-
# is_snapped = True
|
|
839
|
-
# break
|
|
840
|
-
|
|
841
|
-
mask_point1 = mask[j + 1]
|
|
842
|
-
|
|
843
|
-
segment_vector = mask_point1 - mask_point0
|
|
844
|
-
point_vector = geom - mask_point0
|
|
845
|
-
segment_length_squared = np.dot(segment_vector, segment_vector)
|
|
846
|
-
if segment_length_squared == 0:
|
|
847
|
-
closest_point = mask_point0
|
|
848
|
-
else:
|
|
849
|
-
factor = np.dot(point_vector, segment_vector) / segment_length_squared
|
|
850
|
-
factor = max(0, min(1, factor))
|
|
851
|
-
closest_point = mask_point0 + factor * segment_vector
|
|
852
|
-
|
|
853
|
-
if np.linalg.norm(geom - closest_point) == 0 and was_midpoint[i]:
|
|
854
|
-
out_coords[i] = np.array([np.inf, np.inf])
|
|
855
|
-
is_snapped = True
|
|
856
|
-
break
|
|
438
|
+
return pd.concat([as_lines, missing]).sort_index()
|
|
857
439
|
|
|
858
|
-
if is_snapped:
|
|
859
|
-
continue
|
|
860
440
|
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
441
|
+
def get_angle_between_indexed_points(point_df: GeoDataFrame) -> GeoDataFrame:
|
|
442
|
+
"""Get angle difference between the two lines."""
|
|
443
|
+
point_df["next"] = point_df.groupby(level=0)["geometry"].shift(-1)
|
|
864
444
|
|
|
865
|
-
|
|
866
|
-
# continue
|
|
445
|
+
notna = point_df["next"].notna()
|
|
867
446
|
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
out_distances[i, j2] = dist
|
|
871
|
-
if dist == 0 and not was_midpoint_mask[j2]:
|
|
872
|
-
break
|
|
447
|
+
this = coordinate_array(point_df.loc[notna, "geometry"].values)
|
|
448
|
+
next_ = coordinate_array(point_df.loc[notna, "next"].values)
|
|
873
449
|
|
|
874
|
-
|
|
450
|
+
point_df.loc[notna, "angle"] = get_angle(this, next_)
|
|
451
|
+
point_df["prev_angle"] = point_df.groupby(level=0)["angle"].shift(1)
|
|
875
452
|
|
|
453
|
+
point_df["angle_diff"] = point_df["angle"] - point_df["prev_angle"]
|
|
876
454
|
|
|
877
|
-
|
|
878
|
-
def _build_anchors(
|
|
879
|
-
geoms: NDArray[np.float64],
|
|
880
|
-
indices: NDArray[np.int32],
|
|
881
|
-
mask_coords: NDArray[np.float64],
|
|
882
|
-
mask_indices: NDArray[np.int32],
|
|
883
|
-
was_midpoint_mask: NDArray[bool],
|
|
884
|
-
tolerance: int | float,
|
|
885
|
-
):
|
|
886
|
-
anchors = list(mask_coords)
|
|
887
|
-
anchor_indices = list(mask_indices)
|
|
888
|
-
is_anchor_arr = np.full(len(geoms), False)
|
|
889
|
-
was_midpoint_mask = list(was_midpoint_mask)
|
|
890
|
-
for i in np.arange(len(geoms)):
|
|
891
|
-
geom = geoms[i]
|
|
892
|
-
index = indices[i]
|
|
893
|
-
# distances = []
|
|
894
|
-
# for j, anchor in zip(anchor_indices, anchors):
|
|
895
|
-
|
|
896
|
-
is_anchor = True
|
|
897
|
-
for j in range(len(anchors)):
|
|
898
|
-
# if indices[i] != indices[j]:
|
|
899
|
-
# if i != j and indices[i] != indices[j]:
|
|
900
|
-
anchor = anchors[j]
|
|
901
|
-
dist = np.sqrt((geom[0] - anchor[0]) ** 2 + (geom[1] - anchor[1]) ** 2)
|
|
902
|
-
if dist <= tolerance:
|
|
903
|
-
is_anchor = False
|
|
904
|
-
break
|
|
905
|
-
# distances.append(dist)
|
|
906
|
-
# distances = np.array(distances)
|
|
907
|
-
is_anchor_arr[i] = is_anchor
|
|
908
|
-
if is_anchor: # not len(distances) or np.min(distances) > tolerance:
|
|
909
|
-
anchors.append(geom)
|
|
910
|
-
anchor_indices.append(index)
|
|
911
|
-
was_midpoint_mask.append(True)
|
|
912
|
-
return anchors, anchor_indices, is_anchor_arr, was_midpoint_mask
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
@numba.njit
|
|
916
|
-
def _add_last_points_to_end(
|
|
917
|
-
coords: NDArray[np.float64],
|
|
918
|
-
indices: NDArray[np.int32],
|
|
919
|
-
) -> tuple[
|
|
920
|
-
NDArray[np.float64],
|
|
921
|
-
NDArray[np.int32],
|
|
922
|
-
]:
|
|
923
|
-
out_coords, out_indices = [coords[0]], [indices[0]]
|
|
924
|
-
last_coords = []
|
|
925
|
-
prev = coords[0]
|
|
926
|
-
first_coords = prev
|
|
927
|
-
n_minus_1 = len(coords) - 1
|
|
928
|
-
for i in np.arange(1, len(coords)):
|
|
929
|
-
idx = indices[i]
|
|
930
|
-
xy = coords[i]
|
|
931
|
-
distance_to_prev: float = np.sqrt(
|
|
932
|
-
(xy[0] - prev[0]) ** 2 + (xy[1] - prev[1]) ** 2
|
|
933
|
-
)
|
|
934
|
-
if idx != indices[i - 1]:
|
|
935
|
-
first_coords = xy
|
|
936
|
-
out_coords.append(xy)
|
|
937
|
-
out_indices.append(idx)
|
|
938
|
-
elif not distance_to_prev:
|
|
939
|
-
if i == n_minus_1 or idx != indices[i + 1]:
|
|
940
|
-
last_coords.append(xy)
|
|
941
|
-
prev = xy
|
|
942
|
-
continue
|
|
943
|
-
elif i == n_minus_1 or idx != indices[i + 1]:
|
|
944
|
-
out_coords.append(xy)
|
|
945
|
-
out_coords.append(first_coords)
|
|
946
|
-
out_indices.append(idx)
|
|
947
|
-
out_indices.append(idx)
|
|
948
|
-
last_coords.append(xy)
|
|
949
|
-
else:
|
|
950
|
-
out_coords.append(xy)
|
|
951
|
-
out_indices.append(idx)
|
|
952
|
-
|
|
953
|
-
prev = xy
|
|
954
|
-
|
|
955
|
-
return (out_coords, out_indices)
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
@numba.njit
|
|
959
|
-
def _add_last_points_to_end_with_third_arr(
|
|
960
|
-
coords: NDArray[np.float64],
|
|
961
|
-
indices: NDArray[np.int32],
|
|
962
|
-
third_arr: NDArray[Any],
|
|
963
|
-
) -> tuple[
|
|
964
|
-
NDArray[np.float64],
|
|
965
|
-
NDArray[np.int32],
|
|
966
|
-
NDArray[Any],
|
|
967
|
-
]:
|
|
968
|
-
out_coords, out_indices, out_third_arr = [coords[0]], [indices[0]], [third_arr[0]]
|
|
969
|
-
last_coords = []
|
|
970
|
-
prev = coords[0]
|
|
971
|
-
first_coords = prev
|
|
972
|
-
n_minus_1 = len(coords) - 1
|
|
973
|
-
for i in np.arange(1, len(coords)):
|
|
974
|
-
idx = indices[i]
|
|
975
|
-
xy = coords[i]
|
|
976
|
-
distance_to_prev: float = np.sqrt(
|
|
977
|
-
(xy[0] - prev[0]) ** 2 + (xy[1] - prev[1]) ** 2
|
|
978
|
-
)
|
|
979
|
-
if idx != indices[i - 1]:
|
|
980
|
-
first_coords = xy
|
|
981
|
-
out_coords.append(xy)
|
|
982
|
-
out_indices.append(idx)
|
|
983
|
-
out_third_arr.append(third_arr[i])
|
|
984
|
-
elif not distance_to_prev:
|
|
985
|
-
if i == n_minus_1 or idx != indices[i + 1]:
|
|
986
|
-
last_coords.append(xy)
|
|
987
|
-
prev = xy
|
|
988
|
-
continue
|
|
989
|
-
elif i == n_minus_1 or idx != indices[i + 1]:
|
|
990
|
-
out_coords.append(xy)
|
|
991
|
-
out_coords.append(first_coords)
|
|
992
|
-
out_indices.append(idx)
|
|
993
|
-
out_indices.append(idx)
|
|
994
|
-
last_coords.append(xy)
|
|
995
|
-
out_third_arr.append(third_arr[i])
|
|
996
|
-
out_third_arr.append(third_arr[i])
|
|
997
|
-
else:
|
|
998
|
-
out_coords.append(xy)
|
|
999
|
-
out_indices.append(idx)
|
|
1000
|
-
out_third_arr.append(third_arr[i])
|
|
1001
|
-
|
|
1002
|
-
prev = xy
|
|
1003
|
-
|
|
1004
|
-
return (out_coords, out_indices, out_third_arr)
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
@numba.njit
|
|
1008
|
-
def _remove_duplicate_points(
|
|
1009
|
-
coords: NDArray[np.float64],
|
|
1010
|
-
indices: NDArray[np.int32],
|
|
1011
|
-
third_arr: NDArray[Any],
|
|
1012
|
-
):
|
|
1013
|
-
out_coords, out_indices, out_third_arr = [coords[0]], [indices[0]], [third_arr[0]]
|
|
1014
|
-
prev = coords[0]
|
|
1015
|
-
for i in np.arange(1, len(coords)):
|
|
1016
|
-
idx = indices[i]
|
|
1017
|
-
xy = coords[i]
|
|
1018
|
-
distance_to_prev: float = np.sqrt(
|
|
1019
|
-
(xy[0] - prev[0]) ** 2 + (xy[1] - prev[1]) ** 2
|
|
1020
|
-
)
|
|
1021
|
-
if not distance_to_prev and idx == indices[i - 1]:
|
|
1022
|
-
prev = xy
|
|
1023
|
-
continue
|
|
455
|
+
return point_df
|
|
1024
456
|
|
|
1025
|
-
if idx != indices[i - 1]:
|
|
1026
|
-
out_coords.append(xy)
|
|
1027
|
-
out_indices.append(idx)
|
|
1028
|
-
out_third_arr.append(third_arr[i])
|
|
1029
|
-
prev = xy
|
|
1030
|
-
continue
|
|
1031
457
|
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
458
|
+
def _properly_fix_duplicates(gdf, double, slivers, thin_gaps_and_double, tolerance):
|
|
459
|
+
gdf = _dissolve_thick_double_and_update(gdf, double, thin_gaps_and_double)
|
|
460
|
+
gdf, more_slivers = split_out_slivers(gdf, tolerance)
|
|
461
|
+
slivers = pd.concat([slivers, more_slivers], ignore_index=True)
|
|
462
|
+
gaps = get_gaps(gdf, include_interiors=True)
|
|
463
|
+
gaps["_was_gap"] = 1
|
|
464
|
+
assert "_double_idx" not in gaps
|
|
465
|
+
double = get_intersections(gdf)
|
|
466
|
+
double["_double_idx"] = range(len(double))
|
|
467
|
+
thin_gaps_and_double = pd.concat([gaps, double], ignore_index=True).loc[
|
|
468
|
+
lambda x: x.buffer(-tolerance / 2).is_empty
|
|
469
|
+
]
|
|
1036
470
|
|
|
1037
|
-
return
|
|
471
|
+
return gdf, thin_gaps_and_double, slivers
|
|
1038
472
|
|
|
1039
473
|
|
|
1040
|
-
def
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
)
|
|
1046
|
-
|
|
1047
|
-
|
|
474
|
+
def _dissolve_thick_double_and_update(gdf, double, thin_double):
|
|
475
|
+
large = (
|
|
476
|
+
double.loc[~double["_double_idx"].isin(thin_double["_double_idx"])]
|
|
477
|
+
.drop(columns="_double_idx")
|
|
478
|
+
# .pipe(sort_large_first)
|
|
479
|
+
.sort_values("_poly_idx")
|
|
480
|
+
.pipe(update_geometries, geom_type="polygon")
|
|
481
|
+
)
|
|
482
|
+
return (
|
|
483
|
+
clean_overlay(gdf, large, how="update")
|
|
484
|
+
# .pipe(sort_large_first)
|
|
485
|
+
.sort_values("_poly_idx").pipe(update_geometries, geom_type="polygon")
|
|
486
|
+
)
|
|
1048
487
|
|
|
1049
|
-
points = GeoDataFrame(
|
|
1050
|
-
{
|
|
1051
|
-
"geometry": extract_unique_points(geoms),
|
|
1052
|
-
"_geom_idx": np.arange(len(geoms)),
|
|
1053
|
-
}
|
|
1054
|
-
).explode(ignore_index=True)
|
|
1055
|
-
coords = get_coordinates(points.geometry.values)
|
|
1056
|
-
indices = points["_geom_idx"].values
|
|
1057
|
-
|
|
1058
|
-
if mask is not None:
|
|
1059
|
-
mask_coords, mask_indices = get_coordinates(
|
|
1060
|
-
mask.geometry.values, return_index=True
|
|
1061
|
-
)
|
|
1062
|
-
is_anchor = np.full(len(mask_coords), False)
|
|
1063
488
|
|
|
1064
|
-
|
|
1065
|
-
|
|
489
|
+
def _cleaning_checks(gdf, tolerance, duplicate_action): # , spike_action):
|
|
490
|
+
if not len(gdf) or not tolerance:
|
|
491
|
+
return gdf
|
|
492
|
+
if tolerance < PRECISION:
|
|
493
|
+
raise ValueError(
|
|
494
|
+
f"'tolerance' must be larger than {PRECISION} to avoid "
|
|
495
|
+
"problems with floating point precision."
|
|
1066
496
|
)
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
497
|
+
if duplicate_action not in ["fix", "error", "ignore"]:
|
|
498
|
+
raise ValueError("duplicate_action must be 'fix', 'error' or 'ignore'")
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
def split_out_slivers(
|
|
502
|
+
gdf: GeoDataFrame | GeoSeries, tolerance: float | int
|
|
503
|
+
) -> tuple[GeoDataFrame, GeoDataFrame] | tuple[GeoSeries, GeoSeries]:
|
|
504
|
+
is_sliver = gdf.buffer(-tolerance / 2).is_empty
|
|
505
|
+
slivers = gdf.loc[is_sliver]
|
|
506
|
+
gdf = gdf.loc[~is_sliver]
|
|
507
|
+
return gdf, slivers
|
|
508
|
+
|
|
509
|
+
|
|
510
|
+
def try_for_grid_size(
|
|
511
|
+
func,
|
|
512
|
+
grid_sizes: tuple[None, float | int],
|
|
513
|
+
args: tuple | None = None,
|
|
514
|
+
kwargs: dict | None = None,
|
|
515
|
+
) -> Any:
|
|
516
|
+
if args is None:
|
|
517
|
+
args = ()
|
|
518
|
+
if kwargs is None:
|
|
519
|
+
kwargs = {}
|
|
520
|
+
for i, grid_size in enumerate(grid_sizes):
|
|
521
|
+
try:
|
|
522
|
+
return func(*args, grid_size=grid_size, **kwargs)
|
|
523
|
+
except GEOSException as e:
|
|
524
|
+
if i == len(grid_sizes) - 1:
|
|
525
|
+
raise e
|
|
1070
526
|
|
|
1071
|
-
is_anchor = np.full(len(mask_coords), False)
|
|
1072
|
-
mask_coords, mask_indices, is_anchor = _remove_duplicate_points(
|
|
1073
|
-
mask_coords, mask_indices, is_anchor
|
|
1074
|
-
)
|
|
1075
|
-
mask_coords = np.array(mask_coords)
|
|
1076
|
-
mask_indices = np.array(mask_indices)
|
|
1077
527
|
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
points.geometry.drop_duplicates(),
|
|
1089
|
-
original_mask_buffered,
|
|
1090
|
-
)
|
|
1091
|
-
),
|
|
1092
|
-
tolerance * 1.1,
|
|
1093
|
-
)
|
|
1094
|
-
)
|
|
528
|
+
def split_and_eliminate_by_longest(
|
|
529
|
+
gdf: GeoDataFrame | list[GeoDataFrame],
|
|
530
|
+
to_eliminate: GeoDataFrame,
|
|
531
|
+
tolerance: int | float,
|
|
532
|
+
grid_sizes: tuple[None | float | int] = (None,),
|
|
533
|
+
logger=None,
|
|
534
|
+
**kwargs,
|
|
535
|
+
) -> GeoDataFrame | tuple[GeoDataFrame]:
|
|
536
|
+
if not len(to_eliminate):
|
|
537
|
+
return gdf
|
|
1095
538
|
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
# coords, indices, was_midpoint, _ = _add_midpoints_to_segments_numba(
|
|
1114
|
-
# coords,
|
|
1115
|
-
# indices,
|
|
1116
|
-
# mask_coords,
|
|
1117
|
-
# tolerance * 1.1, # + PRECISION * 100,
|
|
1118
|
-
# )
|
|
1119
|
-
|
|
1120
|
-
# was_midpoint = np.array(was_midpoint)
|
|
1121
|
-
|
|
1122
|
-
# coords, is_snapped_to = _snap_to_anchors(
|
|
1123
|
-
# coords,
|
|
1124
|
-
# indices,
|
|
1125
|
-
# mask_coords,
|
|
1126
|
-
# mask_indices,
|
|
1127
|
-
# mask_coords,
|
|
1128
|
-
# mask_indices,
|
|
1129
|
-
# was_midpoint,
|
|
1130
|
-
# was_midpoint_mask,
|
|
1131
|
-
# tolerance + PRECISION * 20,
|
|
1132
|
-
# )
|
|
1133
|
-
# indices = np.array(indices)
|
|
1134
|
-
# coords = np.array(coords)
|
|
1135
|
-
|
|
1136
|
-
# indices = indices[coords[:, 0] != np.inf]
|
|
1137
|
-
# coords = coords[coords[:, 0] != np.inf]
|
|
1138
|
-
|
|
1139
|
-
if snap_to_anchors:
|
|
1140
|
-
if mask is None:
|
|
1141
|
-
mask_coords = [coords[0]]
|
|
1142
|
-
mask_indices = [indices[0]]
|
|
1143
|
-
was_midpoint_mask = [False]
|
|
1144
|
-
anchors, anchor_indices, is_anchor, was_midpoint_anchors = _build_anchors(
|
|
1145
|
-
coords,
|
|
1146
|
-
indices,
|
|
1147
|
-
mask_coords,
|
|
1148
|
-
mask_indices,
|
|
1149
|
-
was_midpoint_mask,
|
|
1150
|
-
tolerance + PRECISION, # * 100
|
|
1151
|
-
)
|
|
1152
|
-
anchors = np.array(anchors)
|
|
1153
|
-
anchor_indices = np.array(anchor_indices)
|
|
539
|
+
if not isinstance(gdf, (GeoDataFrame, GeoSeries)):
|
|
540
|
+
as_gdf = pd.concat(gdf, ignore_index=True)
|
|
541
|
+
else:
|
|
542
|
+
as_gdf = gdf
|
|
543
|
+
|
|
544
|
+
splitted = try_for_grid_size(
|
|
545
|
+
split_by_neighbors,
|
|
546
|
+
grid_sizes=grid_sizes,
|
|
547
|
+
args=(to_eliminate, as_gdf, tolerance),
|
|
548
|
+
).pipe(sort_small_first)
|
|
549
|
+
|
|
550
|
+
splitted = try_for_grid_size(
|
|
551
|
+
update_geometries,
|
|
552
|
+
grid_sizes=grid_sizes,
|
|
553
|
+
args=(splitted,),
|
|
554
|
+
kwargs=dict(geom_type="polygon"),
|
|
555
|
+
)
|
|
1154
556
|
|
|
1155
|
-
|
|
557
|
+
gdf = try_for_grid_size(
|
|
558
|
+
eliminate_by_longest,
|
|
559
|
+
grid_sizes=grid_sizes,
|
|
560
|
+
args=(
|
|
561
|
+
gdf,
|
|
562
|
+
splitted,
|
|
563
|
+
),
|
|
564
|
+
kwargs=kwargs,
|
|
565
|
+
)
|
|
1156
566
|
|
|
567
|
+
if not isinstance(gdf, (GeoDataFrame, GeoSeries)):
|
|
568
|
+
as_gdf = pd.concat(gdf, ignore_index=True)
|
|
1157
569
|
else:
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
570
|
+
as_gdf = gdf
|
|
571
|
+
|
|
572
|
+
missing = try_for_grid_size(
|
|
573
|
+
clean_overlay,
|
|
574
|
+
grid_sizes=grid_sizes,
|
|
575
|
+
args=(
|
|
576
|
+
to_eliminate,
|
|
577
|
+
as_gdf,
|
|
578
|
+
),
|
|
579
|
+
kwargs=dict(
|
|
580
|
+
how="difference",
|
|
581
|
+
geom_type="polygon",
|
|
582
|
+
),
|
|
583
|
+
).pipe(dissexp_by_cluster)
|
|
1163
584
|
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
indices,
|
|
1167
|
-
anchors,
|
|
1168
|
-
tolerance * 1.1,
|
|
585
|
+
return try_for_grid_size(
|
|
586
|
+
eliminate_by_longest, grid_sizes=grid_sizes, args=(gdf, missing), kwargs=kwargs
|
|
1169
587
|
)
|
|
1170
588
|
|
|
1171
|
-
was_midpoint = np.array(was_midpoint)
|
|
1172
589
|
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
.values
|
|
1177
|
-
)
|
|
1178
|
-
coords_up_here000 = to_gdf(polygons(coords_up_here000), 25833)
|
|
590
|
+
def split_by_neighbors(df, split_by, tolerance, grid_size=None) -> GeoDataFrame:
|
|
591
|
+
if not len(df):
|
|
592
|
+
return df
|
|
1179
593
|
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
)
|
|
594
|
+
split_by = split_by.copy()
|
|
595
|
+
split_by.geometry = shapely.simplify(split_by.geometry, tolerance)
|
|
1183
596
|
|
|
1184
|
-
|
|
1185
|
-
|
|
597
|
+
intersecting_lines = (
|
|
598
|
+
clean_overlay(
|
|
599
|
+
to_lines(split_by), buff(df, tolerance), how="identity", grid_size=grid_size
|
|
600
|
+
)
|
|
601
|
+
.pipe(get_line_segments)
|
|
602
|
+
.reset_index(drop=True)
|
|
1186
603
|
)
|
|
1187
604
|
|
|
1188
|
-
|
|
1189
|
-
indices = np.array(indices)
|
|
1190
|
-
was_midpoint = np.array(was_midpoint)
|
|
605
|
+
endpoints = intersecting_lines.boundary.explode(index_parts=False)
|
|
1191
606
|
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
607
|
+
extended_lines = GeoDataFrame(
|
|
608
|
+
{
|
|
609
|
+
"geometry": extend_lines(
|
|
610
|
+
endpoints.loc[lambda x: ~x.index.duplicated(keep="first")].values,
|
|
611
|
+
endpoints.loc[lambda x: ~x.index.duplicated(keep="last")].values,
|
|
612
|
+
distance=tolerance * 3,
|
|
613
|
+
)
|
|
614
|
+
},
|
|
615
|
+
crs=df.crs,
|
|
1196
616
|
)
|
|
1197
|
-
coords_up_here = to_gdf(polygons(coords_up_here), 25833)
|
|
1198
617
|
|
|
1199
|
-
|
|
1200
|
-
coords=to_gdf(shapely.points(coords), 25833).assign(
|
|
1201
|
-
idx=indices, wkt=lambda x: [g.wkt for g in x.geometry]
|
|
1202
|
-
),
|
|
1203
|
-
anchors=to_gdf(shapely.points(anchors), 25833).assign(
|
|
1204
|
-
idx=anchor_indices, wkt=lambda x: [g.wkt for g in x.geometry]
|
|
1205
|
-
), # , straight_distances=straight_distances, distances_to_lines=distances_to_lines),
|
|
1206
|
-
coords_up_here000=coords_up_here000,
|
|
1207
|
-
coords_up_here=coords_up_here,
|
|
1208
|
-
geoms=to_gdf(polygons(geoms), 25833),
|
|
1209
|
-
msk=to_gdf(shapely.points(mask_coords), 25833).assign(
|
|
1210
|
-
was_midpoint_mask=was_midpoint_mask
|
|
1211
|
-
),
|
|
1212
|
-
# center=_DEBUG_CONFIG["center"],
|
|
1213
|
-
)
|
|
618
|
+
buffered = buff(extended_lines, tolerance, single_sided=True)
|
|
1214
619
|
|
|
1215
|
-
|
|
1216
|
-
coords,
|
|
1217
|
-
indices,
|
|
1218
|
-
anchors,
|
|
1219
|
-
anchor_indices,
|
|
1220
|
-
mask_coords,
|
|
1221
|
-
mask_indices,
|
|
1222
|
-
was_midpoint,
|
|
1223
|
-
was_midpoint_anchors,
|
|
1224
|
-
tolerance + PRECISION * 100,
|
|
1225
|
-
)
|
|
1226
|
-
indices = np.array(indices)
|
|
1227
|
-
coords = np.array(coords)
|
|
1228
|
-
indices = indices[coords[:, 0] != np.inf]
|
|
1229
|
-
coords = coords[coords[:, 0] != np.inf]
|
|
1230
|
-
|
|
1231
|
-
# coords_up_here111 = (
|
|
1232
|
-
# pd.Series(_coords_to_rings(coords, indices, geoms))
|
|
1233
|
-
# .loc[lambda x: x.notna()]
|
|
1234
|
-
# .values
|
|
1235
|
-
# )
|
|
1236
|
-
# coords_up_here111 = to_gdf(polygons(coords_up_here111), 25833)
|
|
1237
|
-
|
|
1238
|
-
# if 0:
|
|
1239
|
-
# # coords = get_coordinates(points.geometry.values)
|
|
1240
|
-
# # indices = points["_geom_idx"].values
|
|
1241
|
-
|
|
1242
|
-
# is_anchor = np.full(len(coords), False)
|
|
1243
|
-
# coords, indices, is_anchor = _remove_duplicate_points(
|
|
1244
|
-
# coords, indices, is_anchor
|
|
1245
|
-
# )
|
|
1246
|
-
# coords, indices = _add_last_points_to_end(coords, indices)
|
|
1247
|
-
# coords = np.array(coords)
|
|
1248
|
-
# indices = np.array(indices)
|
|
1249
|
-
# is_anchor = np.full(len(coords), False)
|
|
1250
|
-
# coords, indices, is_anchor = _remove_duplicate_points(
|
|
1251
|
-
# coords, indices, is_anchor
|
|
1252
|
-
# )
|
|
1253
|
-
# coords = np.array(coords)
|
|
1254
|
-
# indices = np.array(indices)
|
|
1255
|
-
|
|
1256
|
-
# display(pd.DataFrame(coords, index=indices, columns=[*"xy"]))
|
|
1257
|
-
|
|
1258
|
-
# if 0:
|
|
1259
|
-
# mask_coords, mask_indices, , dist_to_closest_geom = (
|
|
1260
|
-
# _add_midpoints_to_segments_numba(
|
|
1261
|
-
# mask_coords,
|
|
1262
|
-
# mask_indices,
|
|
1263
|
-
# # coords,
|
|
1264
|
-
# get_coordinates(
|
|
1265
|
-
# sfilter(
|
|
1266
|
-
# GeoSeries(shapely.points(coords)).drop_duplicates(),
|
|
1267
|
-
# original_mask_buffered,
|
|
1268
|
-
# )
|
|
1269
|
-
# ),
|
|
1270
|
-
# tolerance * 1.1,
|
|
1271
|
-
# )
|
|
1272
|
-
# )
|
|
1273
|
-
|
|
1274
|
-
# mask_coords = np.array(mask_coords)
|
|
1275
|
-
# mask_indices = np.array(mask_indices)
|
|
1276
|
-
|
|
1277
|
-
# anchors, anchor_indices, is_anchor = _build_anchors(
|
|
1278
|
-
# coords,
|
|
1279
|
-
# indices,
|
|
1280
|
-
# mask_coords,
|
|
1281
|
-
# mask_indices,
|
|
1282
|
-
# # is_anchor,
|
|
1283
|
-
# tolerance + PRECISION, # * 100
|
|
1284
|
-
# )
|
|
1285
|
-
# anchors = np.array(anchors)
|
|
1286
|
-
# anchor_indices = np.array(anchor_indices)
|
|
1287
|
-
|
|
1288
|
-
# coords, indices, was_midpoint, _ = _add_midpoints_to_segments_numba(
|
|
1289
|
-
# coords,
|
|
1290
|
-
# indices,
|
|
1291
|
-
# anchors,
|
|
1292
|
-
# tolerance * 1.1, # + PRECISION * 100,
|
|
1293
|
-
# # GeoDataFrame({"geometry": shapely.points(coords), "_geom_idx": indices}),
|
|
1294
|
-
# # GeoDataFrame({"geometry": shapely.points(anchors)}),
|
|
1295
|
-
# # tolerance, # + PRECISION * 100,
|
|
1296
|
-
# # None,
|
|
1297
|
-
# )
|
|
1298
|
-
# print(len(coords), len(anchors), len(was_midpoint))
|
|
1299
|
-
|
|
1300
|
-
# indices = np.array(indices)
|
|
1301
|
-
# coords = np.array(coords)
|
|
1302
|
-
|
|
1303
|
-
# was_midpoint = np.array(was_midpoint)
|
|
1304
|
-
|
|
1305
|
-
# coords, is_snapped_to = _snap_to_anchors(
|
|
1306
|
-
# coords,
|
|
1307
|
-
# indices,
|
|
1308
|
-
# anchors,
|
|
1309
|
-
# anchor_indices,
|
|
1310
|
-
# mask_coords,
|
|
1311
|
-
# mask_indices,
|
|
1312
|
-
# was_midpoint,
|
|
1313
|
-
# was_midpoint_anchors,
|
|
1314
|
-
# tolerance + PRECISION * 20,
|
|
1315
|
-
# )
|
|
1316
|
-
# indices = np.array(indices)
|
|
1317
|
-
# coords = np.array(coords)
|
|
1318
|
-
# indices = indices[coords[:, 0] != np.inf]
|
|
1319
|
-
# coords = coords[coords[:, 0] != np.inf]
|
|
1320
|
-
|
|
1321
|
-
# coords = np.array(coords)
|
|
1322
|
-
|
|
1323
|
-
# indices = np.array(indices)
|
|
1324
|
-
|
|
1325
|
-
coords_down_here = (
|
|
1326
|
-
pd.Series(_coords_to_rings(coords, indices, geoms))
|
|
1327
|
-
.loc[lambda x: x.notna()]
|
|
1328
|
-
.values
|
|
1329
|
-
)
|
|
1330
|
-
lines_down_here = to_gdf(shapely.buffer(coords_down_here, 0.1), 25833)
|
|
1331
|
-
coords_down_here = to_gdf(polygons(coords_down_here), 25833)
|
|
620
|
+
return clean_overlay(df, buffered, how="identity", grid_size=grid_size)
|
|
1332
621
|
|
|
1333
|
-
try:
|
|
1334
|
-
explore(
|
|
1335
|
-
coords=to_gdf(shapely.points(coords), 25833).assign(
|
|
1336
|
-
idx=indices, wkt=lambda x: [g.wkt for g in x.geometry]
|
|
1337
|
-
),
|
|
1338
|
-
anchors=to_gdf(shapely.points(anchors), 25833).assign(
|
|
1339
|
-
idx=anchor_indices, wkt=lambda x: [g.wkt for g in x.geometry]
|
|
1340
|
-
), # , straight_distances=straight_distances, distances_to_lines=distances_to_lines),
|
|
1341
|
-
coords_up_here000=coords_up_here000,
|
|
1342
|
-
coords_up_here=coords_up_here,
|
|
1343
|
-
coords_down_here=coords_down_here,
|
|
1344
|
-
lines_down_here=lines_down_here,
|
|
1345
|
-
geoms=to_gdf(polygons(geoms), 25833),
|
|
1346
|
-
msk=to_gdf(shapely.points(mask_coords), 25833).assign(
|
|
1347
|
-
was_midpoint_mask=was_midpoint_mask
|
|
1348
|
-
),
|
|
1349
|
-
)
|
|
1350
622
|
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
idx=anchor_indices, wkt=lambda x: [g.wkt for g in x.geometry]
|
|
1357
|
-
), # , straight_distances=straight_distances, distances_to_lines=distances_to_lines),
|
|
1358
|
-
coords_up_here000=coords_up_here000,
|
|
1359
|
-
coords_up_here=coords_up_here,
|
|
1360
|
-
coords_down_here=coords_down_here,
|
|
1361
|
-
lines_down_here=lines_down_here,
|
|
1362
|
-
geoms=to_gdf(polygons(geoms), 25833),
|
|
1363
|
-
msk=to_gdf(shapely.points(mask_coords), 25833).assign(
|
|
1364
|
-
was_midpoint_mask=was_midpoint_mask
|
|
1365
|
-
),
|
|
1366
|
-
center=(5.37707159, 59.01065276, 1),
|
|
1367
|
-
)
|
|
1368
|
-
explore(
|
|
1369
|
-
coords=to_gdf(shapely.points(coords), 25833).assign(
|
|
1370
|
-
idx=indices, wkt=lambda x: [g.wkt for g in x.geometry]
|
|
1371
|
-
),
|
|
1372
|
-
anchors=to_gdf(shapely.points(anchors), 25833).assign(
|
|
1373
|
-
idx=anchor_indices, wkt=lambda x: [g.wkt for g in x.geometry]
|
|
1374
|
-
), # , straight_distances=straight_distances, distances_to_lines=distances_to_lines),
|
|
1375
|
-
coords_up_here000=coords_up_here000,
|
|
1376
|
-
coords_up_here=coords_up_here,
|
|
1377
|
-
coords_down_here=coords_down_here,
|
|
1378
|
-
lines_down_here=lines_down_here,
|
|
1379
|
-
geoms=to_gdf(polygons(geoms), 25833),
|
|
1380
|
-
msk=to_gdf(shapely.points(mask_coords), 25833).assign(
|
|
1381
|
-
was_midpoint_mask=was_midpoint_mask
|
|
1382
|
-
),
|
|
1383
|
-
center=(5.37419946, 59.01138812, 15),
|
|
1384
|
-
)
|
|
623
|
+
def extend_lines(arr1, arr2, distance) -> NDArray[LineString]:
|
|
624
|
+
if len(arr1) != len(arr2):
|
|
625
|
+
raise ValueError
|
|
626
|
+
if not len(arr1):
|
|
627
|
+
return arr1
|
|
1385
628
|
|
|
1386
|
-
|
|
1387
|
-
coords=to_gdf(shapely.points(coords), 25833).assign(
|
|
1388
|
-
idx=indices, wkt=lambda x: [g.wkt for g in x.geometry]
|
|
1389
|
-
),
|
|
1390
|
-
anchors=to_gdf(shapely.points(anchors), 25833).assign(
|
|
1391
|
-
idx=anchor_indices, wkt=lambda x: [g.wkt for g in x.geometry]
|
|
1392
|
-
), # , straight_distances=straight_distances, distances_to_lines=distances_to_lines),
|
|
1393
|
-
coords_up_here000=coords_up_here000,
|
|
1394
|
-
coords_up_here=coords_up_here,
|
|
1395
|
-
lines_down_here=lines_down_here,
|
|
1396
|
-
coords_down_here=coords_down_here,
|
|
1397
|
-
geoms=to_gdf(polygons(geoms), 25833),
|
|
1398
|
-
msk=to_gdf(shapely.points(mask_coords), 25833).assign(
|
|
1399
|
-
was_midpoint_mask=was_midpoint_mask
|
|
1400
|
-
),
|
|
1401
|
-
center=(5.38389153, 59.00548223, 1),
|
|
1402
|
-
)
|
|
1403
|
-
explore(
|
|
1404
|
-
coords=to_gdf(shapely.points(coords), 25833).assign(
|
|
1405
|
-
idx=indices, wkt=lambda x: [g.wkt for g in x.geometry]
|
|
1406
|
-
),
|
|
1407
|
-
anchors=to_gdf(shapely.points(anchors), 25833).assign(
|
|
1408
|
-
idx=anchor_indices, wkt=lambda x: [g.wkt for g in x.geometry]
|
|
1409
|
-
), # , straight_distances=straight_distances, distances_to_lines=distances_to_lines),
|
|
1410
|
-
coords_up_here000=coords_up_here000,
|
|
1411
|
-
coords_up_here=coords_up_here,
|
|
1412
|
-
coords_down_here=coords_down_here,
|
|
1413
|
-
lines_down_here=lines_down_here,
|
|
1414
|
-
geoms=to_gdf(polygons(geoms), 25833),
|
|
1415
|
-
msk=to_gdf(shapely.points(mask_coords), 25833).assign(
|
|
1416
|
-
was_midpoint_mask=was_midpoint_mask
|
|
1417
|
-
),
|
|
1418
|
-
center=_DEBUG_CONFIG["center"],
|
|
1419
|
-
)
|
|
629
|
+
arr1, arr2 = arr2, arr1 # TODO fix
|
|
1420
630
|
|
|
1421
|
-
|
|
1422
|
-
|
|
631
|
+
coords1 = coordinate_array(arr1)
|
|
632
|
+
coords2 = coordinate_array(arr2)
|
|
1423
633
|
|
|
1424
|
-
|
|
634
|
+
dx = coords2[:, 0] - coords1[:, 0]
|
|
635
|
+
dy = coords2[:, 1] - coords1[:, 1]
|
|
636
|
+
len_xy = np.sqrt((dx**2.0) + (dy**2.0))
|
|
637
|
+
x = coords1[:, 0] + (coords1[:, 0] - coords2[:, 0]) / len_xy * distance
|
|
638
|
+
y = coords1[:, 1] + (coords1[:, 1] - coords2[:, 1]) / len_xy * distance
|
|
1425
639
|
|
|
640
|
+
new_points = np.array([None for _ in range(len(arr1))])
|
|
641
|
+
new_points[~np.isnan(x)] = shapely.points(x[~np.isnan(x)], y[~np.isnan(x)])
|
|
1426
642
|
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
indices: NDArray[np.int32],
|
|
1430
|
-
original_geoms: NDArray[LinearRing],
|
|
1431
|
-
) -> NDArray[LinearRing]:
|
|
1432
|
-
df = pd.DataFrame({"x": coords[:, 0], "y": coords[:, 1]}, index=indices).loc[
|
|
1433
|
-
lambda x: x.groupby(level=0).size() > 2
|
|
1434
|
-
]
|
|
1435
|
-
to_int_idx = {idx: i for i, idx in enumerate(df.index.unique())}
|
|
1436
|
-
rings = pd.Series(
|
|
1437
|
-
linearrings(df.values, indices=df.index.map(to_int_idx)),
|
|
1438
|
-
index=df.index.unique(),
|
|
643
|
+
new_points[~np.isnan(x)] = make_lines_between_points(
|
|
644
|
+
arr2[~np.isnan(x)], new_points[~np.isnan(x)]
|
|
1439
645
|
)
|
|
646
|
+
return new_points
|
|
1440
647
|
|
|
1441
|
-
missing = pd.Series(
|
|
1442
|
-
index=pd.Index(range(len(original_geoms))).difference(rings.index)
|
|
1443
|
-
)
|
|
1444
648
|
|
|
1445
|
-
|
|
649
|
+
def make_lines_between_points(
|
|
650
|
+
arr1: NDArray[Point], arr2: NDArray[Point]
|
|
651
|
+
) -> NDArray[LineString]:
|
|
652
|
+
if arr1.shape != arr2.shape:
|
|
653
|
+
raise ValueError(
|
|
654
|
+
f"Arrays must have equal shape. Got {arr1.shape} and {arr2.shape}"
|
|
655
|
+
)
|
|
656
|
+
coords: pd.DataFrame = pd.concat(
|
|
657
|
+
[
|
|
658
|
+
pd.DataFrame(get_coordinates(arr1), columns=["x", "y"]),
|
|
659
|
+
pd.DataFrame(get_coordinates(arr2), columns=["x", "y"]),
|
|
660
|
+
]
|
|
661
|
+
).sort_index()
|
|
662
|
+
|
|
663
|
+
return linestrings(coords.values, indices=coords.index)
|
|
664
|
+
|
|
665
|
+
|
|
666
|
+
def get_line_segments(lines) -> GeoDataFrame:
|
|
667
|
+
assert lines.index.is_unique
|
|
668
|
+
if isinstance(lines, GeoDataFrame):
|
|
669
|
+
multipoints = lines.assign(
|
|
670
|
+
**{
|
|
671
|
+
lines._geometry_column_name: force_2d(
|
|
672
|
+
extract_unique_points(lines.geometry.values)
|
|
673
|
+
)
|
|
674
|
+
}
|
|
675
|
+
)
|
|
676
|
+
return multipoints_to_line_segments(multipoints.geometry)
|
|
1446
677
|
|
|
678
|
+
multipoints = GeoSeries(extract_unique_points(lines.values), index=lines.index)
|
|
1447
679
|
|
|
1448
|
-
|
|
1449
|
-
def _add_midpoints_to_segments_numba(
|
|
1450
|
-
geoms: NDArray[np.float64],
|
|
1451
|
-
indices: NDArray[np.int32],
|
|
1452
|
-
anchors: NDArray[np.float64],
|
|
1453
|
-
tolerance: int | float,
|
|
1454
|
-
):
|
|
1455
|
-
n_minus_1 = len(geoms) - 1
|
|
1456
|
-
out_coords = []
|
|
1457
|
-
out_indices = []
|
|
1458
|
-
was_midpoint = []
|
|
1459
|
-
out_distances = []
|
|
1460
|
-
for i in range(len(geoms)):
|
|
1461
|
-
index = indices[i]
|
|
1462
|
-
|
|
1463
|
-
is_last = i == n_minus_1 or index != indices[i + 1]
|
|
1464
|
-
if is_last:
|
|
1465
|
-
continue
|
|
1466
|
-
|
|
1467
|
-
geom0 = geoms[i]
|
|
1468
|
-
geom1 = geoms[i + 1]
|
|
1469
|
-
|
|
1470
|
-
closest_points = np.full((len(anchors) + 2, 2), np.inf)
|
|
1471
|
-
these_out_distances = np.full(len(anchors) + 2, np.inf)
|
|
1472
|
-
closest_points[-1] = geom1
|
|
1473
|
-
closest_points[-2] = geom0
|
|
1474
|
-
these_out_distances[-1] = 0
|
|
1475
|
-
these_out_distances[-2] = 0
|
|
1476
|
-
|
|
1477
|
-
segment_vector = geom1 - geom0
|
|
1478
|
-
segment_length_squared = np.dot(segment_vector, segment_vector)
|
|
1479
|
-
for j in range(len(anchors)):
|
|
1480
|
-
anchor = anchors[j]
|
|
1481
|
-
|
|
1482
|
-
if segment_length_squared == 0:
|
|
1483
|
-
closest_point = geom0
|
|
1484
|
-
else:
|
|
1485
|
-
point_vector = anchor - geom0
|
|
1486
|
-
factor = np.dot(point_vector, segment_vector) / segment_length_squared
|
|
1487
|
-
factor = max(0, min(1, factor))
|
|
1488
|
-
if factor < 1e-6:
|
|
1489
|
-
closest_point = geom0
|
|
1490
|
-
elif factor > 1 - 1e-6:
|
|
1491
|
-
closest_point = geom1
|
|
1492
|
-
else:
|
|
1493
|
-
closest_point = geom0 + factor * segment_vector
|
|
1494
|
-
|
|
1495
|
-
dist = np.linalg.norm(anchor - closest_point)
|
|
1496
|
-
if dist <= tolerance and dist > PRECISION:
|
|
1497
|
-
closest_points[j] = closest_point
|
|
1498
|
-
these_out_distances[j] = dist
|
|
1499
|
-
|
|
1500
|
-
# if (
|
|
1501
|
-
# closest_point[0] == 905049.3317999999
|
|
1502
|
-
# ): # and int(closest_point[1]) == 7877676:
|
|
1503
|
-
# print()
|
|
1504
|
-
# for xxx in closest_point:
|
|
1505
|
-
# print(xxx)
|
|
1506
|
-
# for xxx in geom0:
|
|
1507
|
-
# print(xxx)
|
|
1508
|
-
# for xxx in geom1:
|
|
1509
|
-
# print(xxx)
|
|
1510
|
-
# for xxx, yyy in locals().items():
|
|
1511
|
-
# print(xxx, yyy)
|
|
1512
|
-
# ssss
|
|
1513
|
-
|
|
1514
|
-
not_inf = closest_points[:, 0] != np.inf
|
|
1515
|
-
arr = closest_points[not_inf]
|
|
1516
|
-
these_out_distances = these_out_distances[not_inf]
|
|
1517
|
-
|
|
1518
|
-
# sort by first and second column
|
|
1519
|
-
# could have used np.lexsort, but it's not numba compatible
|
|
1520
|
-
arr = arr[np.argsort(arr[:, 0])]
|
|
1521
|
-
any_unsorted = True
|
|
1522
|
-
while any_unsorted:
|
|
1523
|
-
any_unsorted = False
|
|
1524
|
-
for i in range(len(arr) - 1):
|
|
1525
|
-
if arr[i, 0] < arr[i + 1, 0]:
|
|
1526
|
-
continue
|
|
1527
|
-
if arr[i, 1] > arr[i + 1, 1]:
|
|
1528
|
-
copied = arr[i].copy()
|
|
1529
|
-
arr[i] = arr[i + 1]
|
|
1530
|
-
arr[i + 1] = copied
|
|
1531
|
-
|
|
1532
|
-
copied = these_out_distances[i]
|
|
1533
|
-
these_out_distances[i] = these_out_distances[i + 1]
|
|
1534
|
-
these_out_distances[i + 1] = copied
|
|
1535
|
-
|
|
1536
|
-
any_unsorted = True
|
|
1537
|
-
|
|
1538
|
-
with_midpoints = []
|
|
1539
|
-
these_out_distances2 = []
|
|
1540
|
-
first_is_added = False
|
|
1541
|
-
last_is_added = False
|
|
1542
|
-
is_reverse = False
|
|
1543
|
-
for y in range(len(arr)):
|
|
1544
|
-
point = arr[y]
|
|
1545
|
-
if (
|
|
1546
|
-
not first_is_added
|
|
1547
|
-
and np.sqrt((geom0[0] - point[0]) ** 2 + (geom0[1] - point[1]) ** 2)
|
|
1548
|
-
== 0
|
|
1549
|
-
):
|
|
1550
|
-
first_is_added = True
|
|
1551
|
-
with_midpoints.append(point)
|
|
1552
|
-
these_out_distances2.append(these_out_distances[y])
|
|
1553
|
-
if last_is_added:
|
|
1554
|
-
is_reverse = True
|
|
1555
|
-
break
|
|
1556
|
-
else:
|
|
1557
|
-
continue
|
|
1558
|
-
elif (
|
|
1559
|
-
not last_is_added
|
|
1560
|
-
and np.sqrt((geom1[0] - point[0]) ** 2 + (geom1[1] - point[1]) ** 2)
|
|
1561
|
-
== 0
|
|
1562
|
-
):
|
|
1563
|
-
last_is_added = True
|
|
1564
|
-
with_midpoints.append(point)
|
|
1565
|
-
these_out_distances2.append(these_out_distances[y])
|
|
1566
|
-
if not first_is_added:
|
|
1567
|
-
is_reverse = True
|
|
1568
|
-
continue
|
|
1569
|
-
else:
|
|
1570
|
-
with_midpoints.append(point)
|
|
1571
|
-
break
|
|
1572
|
-
if first_is_added or last_is_added:
|
|
1573
|
-
with_midpoints.append(point)
|
|
1574
|
-
these_out_distances2.append(these_out_distances[y])
|
|
1575
|
-
|
|
1576
|
-
# these_out_distances2.append(these_out_distances[y])
|
|
1577
|
-
# these_anchors2.append(these_anchors[y])
|
|
1578
|
-
|
|
1579
|
-
# with_midpoints = np.array(with_midpoints)
|
|
1580
|
-
|
|
1581
|
-
if is_reverse:
|
|
1582
|
-
with_midpoints = with_midpoints[::-1]
|
|
1583
|
-
these_out_distances2 = these_out_distances2[::-1]
|
|
1584
|
-
# these_anchors2 = these_anchors2[::-1]
|
|
1585
|
-
|
|
1586
|
-
# print(index, is_reverse, arr)
|
|
1587
|
-
# print(with_midpoints)
|
|
1588
|
-
# print(to_gdf(LineString([geom0, geom1]), 25833))
|
|
1589
|
-
# print(to_gdf(shapely.points(closest_points)))
|
|
1590
|
-
# explore(
|
|
1591
|
-
# to_gdf(shapely.points(with_midpoints)).assign(
|
|
1592
|
-
# idx=lambda x: range(len(x))
|
|
1593
|
-
# ),
|
|
1594
|
-
# "idx",
|
|
1595
|
-
# )
|
|
1596
|
-
# explore(
|
|
1597
|
-
# l=to_gdf(LineString([geom0, geom1]), 25833),
|
|
1598
|
-
# # anchors=to_gdf(shapely.points(anchors)),
|
|
1599
|
-
# # anchors_in_dist=to_gdf(shapely.points(these_anchors)),
|
|
1600
|
-
# # closest_points=to_gdf(shapely.points(closest_points)),
|
|
1601
|
-
# with_midpoints=to_gdf(shapely.points(with_midpoints)),
|
|
1602
|
-
# anchors=to_gdf(shapely.points(anchors)),
|
|
1603
|
-
# arr=to_gdf(shapely.points(arr)),
|
|
1604
|
-
# # center=(-0.07034028, 1.80337784, 0.4),
|
|
1605
|
-
# )
|
|
1606
|
-
|
|
1607
|
-
with_midpoints_no_dups = []
|
|
1608
|
-
these_out_distances_no_dups = []
|
|
1609
|
-
|
|
1610
|
-
for y2 in range(len(with_midpoints)):
|
|
1611
|
-
point = with_midpoints[y2]
|
|
1612
|
-
should_be_added = True
|
|
1613
|
-
for z in range(len(with_midpoints_no_dups)):
|
|
1614
|
-
out_point = with_midpoints_no_dups[z]
|
|
1615
|
-
if (
|
|
1616
|
-
np.sqrt(
|
|
1617
|
-
(point[0] - out_point[0]) ** 2 + (out_point[1] - point[1]) ** 2
|
|
1618
|
-
)
|
|
1619
|
-
== 0
|
|
1620
|
-
):
|
|
1621
|
-
should_be_added = False
|
|
1622
|
-
break
|
|
1623
|
-
if should_be_added:
|
|
1624
|
-
with_midpoints_no_dups.append(point)
|
|
1625
|
-
these_out_distances_no_dups.append(these_out_distances2[y2])
|
|
1626
|
-
|
|
1627
|
-
n_minus_1_midpoints = len(with_midpoints_no_dups) - 1
|
|
1628
|
-
for y3 in range(len(with_midpoints_no_dups)):
|
|
1629
|
-
point = with_midpoints_no_dups[y3]
|
|
1630
|
-
should_be_added = True
|
|
1631
|
-
|
|
1632
|
-
for z2 in np.arange(len(out_coords))[::-1]:
|
|
1633
|
-
if out_indices[z2] != index:
|
|
1634
|
-
continue
|
|
1635
|
-
out_point = out_coords[z2]
|
|
1636
|
-
|
|
1637
|
-
if (
|
|
1638
|
-
np.sqrt(
|
|
1639
|
-
(point[0] - out_point[0]) ** 2 + (out_point[1] - point[1]) ** 2
|
|
1640
|
-
)
|
|
1641
|
-
== 0
|
|
1642
|
-
):
|
|
1643
|
-
should_be_added = False
|
|
1644
|
-
break
|
|
1645
|
-
|
|
1646
|
-
if not should_be_added:
|
|
1647
|
-
continue
|
|
1648
|
-
|
|
1649
|
-
out_coords.append(point)
|
|
1650
|
-
out_indices.append(index)
|
|
1651
|
-
out_distances.append(these_out_distances_no_dups[y3])
|
|
1652
|
-
if y3 == 0 or y3 == n_minus_1_midpoints:
|
|
1653
|
-
was_midpoint.append(False)
|
|
1654
|
-
else:
|
|
1655
|
-
was_midpoint.append(True)
|
|
680
|
+
return multipoints_to_line_segments(multipoints)
|
|
1656
681
|
|
|
1657
|
-
return (
|
|
1658
|
-
out_coords,
|
|
1659
|
-
out_indices,
|
|
1660
|
-
was_midpoint,
|
|
1661
|
-
out_distances,
|
|
1662
|
-
)
|
|
1663
682
|
|
|
683
|
+
def multipoints_to_line_segments(multipoints: GeoSeries) -> GeoDataFrame:
|
|
684
|
+
if not len(multipoints):
|
|
685
|
+
return GeoDataFrame({"geometry": multipoints}, index=multipoints.index)
|
|
1664
686
|
|
|
1665
|
-
|
|
1666
|
-
|
|
1667
|
-
|
|
1668
|
-
|
|
687
|
+
try:
|
|
688
|
+
crs = multipoints.crs
|
|
689
|
+
except AttributeError:
|
|
690
|
+
crs = None
|
|
1669
691
|
|
|
1670
|
-
|
|
1671
|
-
|
|
1672
|
-
|
|
1673
|
-
|
|
1674
|
-
|
|
1675
|
-
|
|
1676
|
-
|
|
1677
|
-
|
|
692
|
+
try:
|
|
693
|
+
point_df = multipoints.explode(index_parts=False)
|
|
694
|
+
if isinstance(point_df, GeoSeries):
|
|
695
|
+
point_df = point_df.to_frame("geometry")
|
|
696
|
+
except AttributeError:
|
|
697
|
+
points, indices = get_parts(multipoints, return_index=True)
|
|
698
|
+
if isinstance(multipoints.index, pd.MultiIndex):
|
|
699
|
+
indices = pd.MultiIndex.from_arrays(indices, names=multipoints.index.names)
|
|
1678
700
|
|
|
1679
|
-
|
|
1680
|
-
one_or_zero_neighbors = gdf.iloc[
|
|
1681
|
-
pd.Index(range(len(gdf))).difference(has_more_than_one_neighbor)
|
|
1682
|
-
]
|
|
701
|
+
point_df = pd.DataFrame({"geometry": GeometryArray(points)}, index=indices)
|
|
1683
702
|
|
|
1684
|
-
|
|
703
|
+
point_df["next"] = point_df.groupby(level=0)["geometry"].shift(-1)
|
|
1685
704
|
|
|
705
|
+
first_points = point_df.loc[lambda x: ~x.index.duplicated(), "geometry"]
|
|
706
|
+
is_last_point = point_df["next"].isna()
|
|
1686
707
|
|
|
1687
|
-
|
|
1688
|
-
|
|
1689
|
-
to_eliminate: GeoDataFrame,
|
|
1690
|
-
tolerance: float | int,
|
|
1691
|
-
ignore_index: bool = False,
|
|
1692
|
-
**kwargs,
|
|
1693
|
-
) -> tuple[GeoDataFrame]:
|
|
1694
|
-
if isinstance(gdf, (list, tuple)):
|
|
1695
|
-
# concat, then break up the dataframes in the end
|
|
1696
|
-
was_multiple_gdfs = True
|
|
1697
|
-
original_cols = [df.columns for df in gdf]
|
|
1698
|
-
gdf = pd.concat(df.assign(**{"_df_idx": i}) for i, df in enumerate(gdf))
|
|
1699
|
-
else:
|
|
1700
|
-
was_multiple_gdfs = False
|
|
1701
|
-
|
|
1702
|
-
if 0:
|
|
1703
|
-
to_eliminate.geometry = to_eliminate.buffer(
|
|
1704
|
-
-PRECISION,
|
|
1705
|
-
resolution=1,
|
|
1706
|
-
join_style=2,
|
|
1707
|
-
).buffer(
|
|
1708
|
-
PRECISION,
|
|
1709
|
-
resolution=1,
|
|
1710
|
-
join_style=2,
|
|
1711
|
-
)
|
|
1712
|
-
to_eliminate = to_eliminate.loc[lambda x: ~x.is_empty]
|
|
708
|
+
point_df.loc[is_last_point, "next"] = first_points
|
|
709
|
+
assert point_df["next"].notna().all()
|
|
1713
710
|
|
|
1714
|
-
|
|
1715
|
-
|
|
1716
|
-
|
|
1717
|
-
|
|
1718
|
-
)
|
|
1719
|
-
multi_neighbored = split_by_neighbors(multi_neighbored, gdf, tolerance=tolerance)
|
|
1720
|
-
to_eliminate = pd.concat([multi_neighbored, single_neighbored])
|
|
1721
|
-
gdf, isolated = eliminate_by_longest(
|
|
1722
|
-
gdf, to_eliminate, ignore_index=ignore_index, **kwargs
|
|
1723
|
-
)
|
|
711
|
+
point_df["geometry"] = [
|
|
712
|
+
LineString([x1, x2])
|
|
713
|
+
for x1, x2 in zip(point_df["geometry"], point_df["next"], strict=False)
|
|
714
|
+
]
|
|
715
|
+
return GeoDataFrame(point_df.drop(columns=["next"]), geometry="geometry", crs=crs)
|
|
1724
716
|
|
|
1725
|
-
if not was_multiple_gdfs:
|
|
1726
|
-
return gdf, isolated
|
|
1727
717
|
|
|
1728
|
-
|
|
1729
|
-
|
|
1730
|
-
|
|
1731
|
-
|
|
1732
|
-
gdfs += (isolated,)
|
|
718
|
+
def explore_geosexception(e: GEOSException, *gdfs, logger=None) -> None:
|
|
719
|
+
from ..maps.maps import Explore
|
|
720
|
+
from ..maps.maps import explore
|
|
721
|
+
from .conversion import to_gdf
|
|
1733
722
|
|
|
1734
|
-
|
|
723
|
+
pattern = r"(\d+\.\d+)\s+(\d+\.\d+)"
|
|
724
|
+
|
|
725
|
+
matches = re.findall(pattern, str(e))
|
|
726
|
+
coords_in_error_message = [(float(match[0]), float(match[1])) for match in matches]
|
|
727
|
+
exception_point = to_gdf(coords_in_error_message, crs=gdfs[0].crs)
|
|
728
|
+
if len(exception_point):
|
|
729
|
+
exception_point["wkt"] = exception_point.to_wkt()
|
|
730
|
+
if logger:
|
|
731
|
+
logger.error(
|
|
732
|
+
e, Explore(exception_point, *gdfs, mask=exception_point.buffer(100))
|
|
733
|
+
)
|
|
734
|
+
else:
|
|
735
|
+
explore(exception_point, *gdfs, mask=exception_point.buffer(100))
|
|
736
|
+
else:
|
|
737
|
+
if logger:
|
|
738
|
+
logger.error(e, Explore(*gdfs))
|
|
739
|
+
else:
|
|
740
|
+
explore(*gdfs)
|