ssb-sgis 1.1.0__py3-none-any.whl → 1.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,5 @@
1
- # %%
1
+ import re
2
2
  import warnings
3
- from collections.abc import Callable
4
3
  from typing import Any
5
4
 
6
5
  import numpy as np
@@ -8,90 +7,62 @@ import pandas as pd
8
7
  import shapely
9
8
  from geopandas import GeoDataFrame
10
9
  from geopandas import GeoSeries
10
+ from geopandas.array import GeometryArray
11
11
  from numpy.typing import NDArray
12
- from shapely import Geometry
13
- from shapely import STRtree
14
12
  from shapely import extract_unique_points
13
+ from shapely import force_2d
15
14
  from shapely import get_coordinates
15
+ from shapely import get_exterior_ring
16
+ from shapely import get_parts
16
17
  from shapely import linearrings
18
+ from shapely import linestrings
19
+ from shapely import make_valid
17
20
  from shapely import polygons
18
21
  from shapely.errors import GEOSException
19
22
  from shapely.geometry import LinearRing
20
23
  from shapely.geometry import LineString
21
24
  from shapely.geometry import Point
22
25
 
23
- try:
24
- import numba
25
- except ImportError:
26
-
27
- class numba:
28
- """Placeholder."""
29
-
30
- @staticmethod
31
- def njit(func) -> Callable:
32
- """Placeholder that does nothing."""
33
-
34
- def wrapper(*args, **kwargs):
35
- return func(*args, **kwargs)
36
-
37
- return wrapper
38
-
39
-
40
- from ..debug_config import _DEBUG_CONFIG
41
- from ..maps.maps import explore
42
- from .conversion import to_gdf
26
+ from ..networkanalysis.closing_network_holes import get_angle
27
+ from .buffer_dissolve_explode import buff
28
+ from .buffer_dissolve_explode import dissexp
29
+ from .buffer_dissolve_explode import dissexp_by_cluster
30
+ from .conversion import coordinate_array
43
31
  from .conversion import to_geoseries
32
+ from .duplicates import get_intersections
44
33
  from .duplicates import update_geometries
45
34
  from .general import clean_geoms
35
+ from .general import sort_large_first
36
+ from .general import sort_small_first
37
+ from .general import to_lines
46
38
  from .geometry_types import make_all_singlepart
47
- from .geometry_types import to_single_geom_type
48
39
  from .overlay import clean_overlay
40
+ from .polygon_operations import close_all_holes
49
41
  from .polygon_operations import eliminate_by_longest
50
- from .polygon_operations import split_by_neighbors
42
+ from .polygon_operations import get_cluster_mapper
43
+ from .polygon_operations import get_gaps
51
44
  from .polygons_as_rings import PolygonsAsRings
52
45
  from .sfilter import sfilter
53
- from .sfilter import sfilter_inverse
54
46
 
55
47
  warnings.simplefilter(action="ignore", category=UserWarning)
56
48
  warnings.simplefilter(action="ignore", category=RuntimeWarning)
57
49
 
58
50
 
59
- PRECISION = 1e-3
51
+ PRECISION = 1e-4
60
52
  BUFFER_RES = 50
61
53
 
62
54
 
63
- # def explore(*args, **kwargs):
64
- # pass
65
-
66
-
67
- # def explore_locals(*args, **kwargs):
68
- # pass
69
-
70
-
71
- # def no_njit(func):
72
- # def wrapper(*args, **kwargs):
73
- # result = func(*args, **kwargs)
74
- # return result
75
-
76
- # return wrapper
77
-
78
-
79
- # numba.njit = no_njit
80
-
81
-
82
55
  def coverage_clean(
83
56
  gdf: GeoDataFrame,
84
57
  tolerance: int | float,
85
- mask: GeoDataFrame | GeoSeries | Geometry | None = None,
86
- snap_to_anchors: bool = True,
87
- **kwargs,
58
+ mask=None,
59
+ *,
60
+ duplicate_action: str = "fix",
61
+ grid_sizes: tuple[None | int] = (None,),
62
+ logger=None,
88
63
  ) -> GeoDataFrame:
89
64
  """Fix thin gaps, holes, slivers and double surfaces.
90
65
 
91
- The operations might raise GEOSExceptions, so it might be nessecary to set
92
- the 'grid_sizes' argument, it might also be a good idea to run coverage_clean
93
- twice to fill gaps resulting from these GEOSExceptions.
94
-
95
66
  Rules:
96
67
  - Holes (interiors) thinner than the tolerance are closed.
97
68
  - Gaps between polygons are filled if thinner than the tolerance.
@@ -99,9 +70,9 @@ def coverage_clean(
99
70
  into the neighbor polygon with the longest shared border.
100
71
  - Double surfaces thinner than the tolerance are eliminated.
101
72
  If duplicate_action is "fix", thicker double surfaces will
102
- be updated.
103
- - Line and point geometries are removed with no warning.
104
- - MultiPolygons and GeometryCollections are exploded to Polygons.
73
+ be updated from top to bottom of the GeoDataFrame's rows.
74
+ - Line and point geometries are removed.
75
+ - MultiPolygons are exploded to Polygons.
105
76
  - Index is reset.
106
77
 
107
78
  Args:
@@ -110,1625 +81,660 @@ def coverage_clean(
110
81
  for polygons to be eliminated. Any gap, hole, sliver or double
111
82
  surface that are empty after a negative buffer of tolerance / 2
112
83
  are eliminated into the neighbor with the longest shared border.
113
- mask: Mask to clip gdf to.
114
- snap_to_anchors: If True (default), snaps to anchor nodes in gdf. If False,
115
- only snaps to mask nodes (mask cannot be None in this case).
116
- **kwargs: Temporary backwards compatibility to avoid TypeErrors.
84
+ mask: Unused.
85
+ duplicate_action: Either "fix", "error" or "ignore".
86
+ If "fix" (default), double surfaces thicker than the
87
+ tolerance will be updated from top to bottom (function update_geometries)
88
+ and then dissolved into the neighbor polygon with the longest shared border.
89
+ If "error", an Exception is raised if there are any double surfaces thicker
90
+ than the tolerance. If "ignore", double surfaces are kept as is.
91
+ grid_sizes: One or more grid_sizes used in overlay and dissolve operations that
92
+ might raise a GEOSException. Defaults to (None,), meaning no grid_sizes.
93
+ logger: Optional.
117
94
 
118
95
  Returns:
119
96
  A GeoDataFrame with cleaned polygons.
97
+
120
98
  """
121
99
  if not len(gdf):
122
100
  return gdf
123
101
 
124
- gdf_original = gdf.copy()
102
+ _cleaning_checks(gdf, tolerance, duplicate_action)
125
103
 
126
- # more_than_one = get_num_geometries(gdf.geometry.values) > 1
127
- # gdf.loc[more_than_one, gdf._geometry_column_name] = gdf.loc[
128
- # more_than_one, gdf._geometry_column_name
129
- # ].apply(_unary_union_for_notna)
130
-
131
- if mask is not None:
132
- try:
133
- mask: GeoDataFrame = mask[["geometry"]].pipe(make_all_singlepart)
134
- except Exception:
135
- mask: GeoDataFrame = (
136
- to_geoseries(mask).to_frame("geometry").pipe(make_all_singlepart)
137
- )
104
+ if not gdf.index.is_unique:
105
+ gdf = gdf.reset_index(drop=True)
138
106
 
139
- # mask: GeoDataFrame = close_all_holes(
140
- # dissexp_by_cluster(gdf[["geometry"]])
141
- # ).pipe(make_all_singlepart)
142
- # mask = GeoDataFrame(
143
- # {
144
- # "geometry": [
145
- # mask.union_all()
146
- # .buffer(
147
- # PRECISION,
148
- # resolution=1,
149
- # join_style=2,
150
- # )
151
- # .buffer(
152
- # -PRECISION,
153
- # resolution=1,
154
- # join_style=2,
155
- # )
156
- # ]
157
- # },
158
- # crs=gdf.crs,
159
- # ).pipe(make_all_singlepart)
160
- # # gaps = shapely.union_all(get_gaps(mask).geometry.values)
161
- # # mask = shapely.get_parts(extract_unique_points(mask.geometry.values))
162
- # # not_by_gaps = shapely.distance(mask, gaps) > PRECISION
163
- # # mask = GeoDataFrame({"geometry": mask[not_by_gaps]})
164
-
165
- gdf = snap_polygons(gdf, tolerance, mask=mask, snap_to_anchors=snap_to_anchors)
166
-
167
- if mask is not None:
168
- missing_from_mask = clean_overlay(
169
- mask, gdf, how="difference", geom_type="polygon"
170
- ).loc[lambda x: x.buffer(-tolerance + PRECISION).is_empty]
171
- gdf, _ = eliminate_by_longest(gdf, missing_from_mask)
172
-
173
- missing_from_gdf = sfilter_inverse(gdf_original, gdf.buffer(-PRECISION)).loc[
174
- lambda x: (~x.buffer(-PRECISION).is_empty)
107
+ gdf = make_all_singlepart(gdf).loc[
108
+ lambda x: x.geom_type.isin(["Polygon", "MultiPolygon"])
175
109
  ]
176
- return pd.concat([gdf, missing_from_gdf], ignore_index=True).pipe(
177
- update_geometries, geom_type="polygon"
110
+
111
+ gdf = safe_simplify(gdf, PRECISION)
112
+
113
+ gdf = (
114
+ clean_geoms(gdf)
115
+ .pipe(make_all_singlepart)
116
+ .loc[lambda x: x.geom_type.isin(["Polygon", "MultiPolygon"])]
178
117
  )
179
118
 
119
+ try:
120
+ gaps = get_gaps(gdf, include_interiors=True)
121
+ except GEOSException:
122
+ for i, grid_size in enumerate(grid_sizes):
123
+ try:
124
+ gaps = get_gaps(gdf, include_interiors=True, grid_size=grid_size)
125
+ if grid_size:
126
+ # in order to not get more gaps
127
+ gaps.geometry = gaps.buffer(grid_size)
128
+ break
129
+ except GEOSException as e:
130
+ if i == len(grid_sizes) - 1:
131
+ explore_geosexception(e, gdf, logger=logger)
132
+ raise e
180
133
 
181
- def snap_polygons(
182
- gdf: GeoDataFrame,
183
- tolerance: int | float,
184
- mask: GeoDataFrame | GeoSeries | Geometry | None = None,
185
- snap_to_anchors: bool = True,
186
- ) -> GeoDataFrame:
187
- if not len(gdf):
188
- return gdf.copy()
134
+ gaps["_was_gap"] = 1
189
135
 
190
- gdf_orig = gdf.copy()
136
+ if duplicate_action == "ignore":
137
+ double = GeoDataFrame({"geometry": []}, crs=gdf.crs)
138
+ double["_double_idx"] = None
139
+ else:
140
+ double = get_intersections(gdf)
141
+ double["_double_idx"] = range(len(double))
191
142
 
192
- crs = gdf.crs
143
+ gdf, slivers = split_out_slivers(gdf, tolerance)
193
144
 
194
- gdf = (
195
- clean_geoms(gdf)
196
- .pipe(make_all_singlepart, ignore_index=True)
197
- .pipe(to_single_geom_type, "polygon")
145
+ gdf["_poly_idx"] = range(len(gdf))
146
+
147
+ thin_gaps_and_double = pd.concat([gaps, double]).loc[
148
+ lambda x: x.buffer(-tolerance / 2).is_empty
149
+ ]
150
+
151
+ all_are_thin = double["_double_idx"].isin(thin_gaps_and_double["_double_idx"]).all()
152
+
153
+ if not all_are_thin and duplicate_action == "fix":
154
+ gdf, thin_gaps_and_double, slivers = _properly_fix_duplicates(
155
+ gdf, double, slivers, thin_gaps_and_double, tolerance
156
+ )
157
+
158
+ elif not all_are_thin and duplicate_action == "error":
159
+ raise ValueError("Large double surfaces.")
160
+
161
+ to_eliminate = pd.concat([thin_gaps_and_double, slivers], ignore_index=True)
162
+ to_eliminate = safe_simplify(to_eliminate, PRECISION)
163
+
164
+ to_eliminate = to_eliminate.loc[lambda x: ~x.buffer(-PRECISION / 10).is_empty]
165
+
166
+ to_eliminate = try_for_grid_size(
167
+ split_by_neighbors,
168
+ grid_sizes=grid_sizes,
169
+ args=(to_eliminate, gdf),
170
+ kwargs=dict(tolerance=tolerance),
198
171
  )
199
172
 
200
- gdf.crs = None
173
+ to_eliminate["_eliminate_idx"] = range(len(to_eliminate))
201
174
 
202
- gdf = gdf[lambda x: ~x.buffer(-tolerance / 2 - PRECISION).is_empty]
203
- # gdf = gdf[lambda x: ~x.buffer(-tolerance / 3).is_empty]
175
+ to_eliminate["_cluster"] = get_cluster_mapper(to_eliminate.buffer(PRECISION))
204
176
 
205
- # donuts_without_spikes = (
206
- # gdf.geometry.buffer(tolerance / 2, resolution=1, join_style=2)
207
- # .buffer(-tolerance, resolution=1, join_style=2)
208
- # .buffer(tolerance / 2, resolution=1, join_style=2)
209
- # .pipe(to_lines)
210
- # .buffer(tolerance)
211
- # )
177
+ gdf_geoms_idx = gdf[["_poly_idx", "geometry"]]
212
178
 
213
- gdf.geometry = (
214
- PolygonsAsRings(gdf.geometry.values)
215
- .apply_numpy_func(
216
- _snap_linearrings,
217
- kwargs=dict(
218
- tolerance=tolerance,
219
- mask=mask,
220
- snap_to_anchors=snap_to_anchors,
179
+ poly_idx_mapper: pd.Series = (
180
+ clean_overlay(
181
+ buff(
182
+ to_eliminate[["_eliminate_idx", "geometry"]],
183
+ tolerance,
184
+ resolution=BUFFER_RES,
221
185
  ),
186
+ gdf_geoms_idx,
187
+ geom_type="polygon",
222
188
  )
223
- .to_numpy()
189
+ .pipe(sort_large_first)
190
+ .drop_duplicates("_eliminate_idx")
191
+ .set_index("_eliminate_idx")["_poly_idx"]
224
192
  )
225
193
 
226
- gdf = (
227
- to_single_geom_type(make_all_singlepart(clean_geoms(gdf)), "polygon")
228
- .reset_index(drop=True)
229
- .set_crs(crs)
194
+ to_eliminate["_poly_idx"] = to_eliminate["_eliminate_idx"].map(poly_idx_mapper)
195
+ isolated = to_eliminate[lambda x: x["_poly_idx"].isna()]
196
+ intersecting = to_eliminate[lambda x: x["_poly_idx"].notna()]
197
+
198
+ for i, grid_size in enumerate(grid_sizes):
199
+ try:
200
+ without_double = update_geometries(
201
+ intersecting, geom_type="polygon", grid_size=grid_size
202
+ ).drop(columns=["_eliminate_idx", "_double_idx"])
203
+ break
204
+ except GEOSException as e:
205
+ if i == len(grid_sizes) - 1:
206
+ explore_geosexception(e, gdf, intersecting, isolated, logger=logger)
207
+ raise e
208
+
209
+ not_really_isolated = isolated[["geometry", "_eliminate_idx", "_cluster"]].merge(
210
+ without_double.drop(columns=["geometry"]),
211
+ on="_cluster",
212
+ how="inner",
230
213
  )
231
214
 
232
- missing = clean_overlay(gdf_orig, gdf, how="difference").loc[
233
- lambda x: ~x.buffer(-tolerance / 2).is_empty
215
+ really_isolated = isolated.loc[
216
+ lambda x: ~x["_eliminate_idx"].isin(not_really_isolated["_eliminate_idx"])
234
217
  ]
235
218
 
236
- if mask is None:
237
- mask = GeoDataFrame({"geometry": []})
238
- explore(
239
- gdf,
240
- # gdf_orig,
241
- # thin,
242
- mask,
243
- missing,
244
- mask_p=to_gdf(mask.extract_unique_points().explode()).assign(
245
- wkt=lambda x: [g.wkt for g in x.geometry]
246
- ),
247
- gdf_p=to_gdf(gdf.extract_unique_points().explode()).assign(
248
- wkt=lambda x: [g.wkt for g in x.geometry]
249
- ),
250
- center=(5.36765872, 59.01199837, 1),
219
+ is_gap = really_isolated["_was_gap"] == 1
220
+ isolated_gaps = really_isolated.loc[is_gap, ["geometry"]].sjoin_nearest(
221
+ gdf, max_distance=PRECISION
251
222
  )
252
- explore(
253
- gdf,
254
- gdf_orig,
255
- # thin,
256
- mask,
257
- missing,
258
- mask_p=to_gdf(mask.extract_unique_points().explode()).assign(
259
- wkt=lambda x: [g.wkt for g in x.geometry]
260
- ),
261
- gdf_p=to_gdf(gdf.extract_unique_points().explode()).assign(
262
- wkt=lambda x: [g.wkt for g in x.geometry]
263
- ),
264
- center=(5.36820681, 59.01182298, 2),
223
+ really_isolated = really_isolated[~is_gap]
224
+
225
+ really_isolated["_poly_idx"] = (
226
+ really_isolated["_cluster"] + gdf["_poly_idx"].max() + 1
265
227
  )
266
- explore(
267
- gdf,
268
- gdf_orig,
269
- # thin,
270
- mask,
271
- missing,
272
- mask_p=to_gdf(mask.extract_unique_points().explode()).assign(
273
- wkt=lambda x: [g.wkt for g in x.geometry]
274
- ),
275
- gdf_p=to_gdf(gdf.extract_unique_points().explode()).assign(
276
- wkt=lambda x: [g.wkt for g in x.geometry]
277
- ),
278
- center=(5.37327042, 59.01099359, 5),
228
+
229
+ for i, grid_size in enumerate(grid_sizes):
230
+ try:
231
+ cleaned = (
232
+ dissexp(
233
+ pd.concat(
234
+ [
235
+ gdf,
236
+ without_double,
237
+ not_really_isolated,
238
+ really_isolated,
239
+ isolated_gaps,
240
+ ]
241
+ ).drop(
242
+ columns=[
243
+ "_cluster",
244
+ "_was_gap",
245
+ "_eliminate_idx",
246
+ "index_right",
247
+ "_double_idx",
248
+ ],
249
+ errors="ignore",
250
+ ),
251
+ by="_poly_idx",
252
+ aggfunc="first",
253
+ dropna=True,
254
+ grid_size=grid_size,
255
+ )
256
+ .sort_index()
257
+ .reset_index(drop=True)
258
+ )
259
+ break
260
+ except GEOSException as e:
261
+ if i == len(grid_sizes) - 1:
262
+ explore_geosexception(
263
+ e, gdf, without_double, isolated, really_isolated, logger=logger
264
+ )
265
+ raise e
266
+
267
+ cleaned_area_sum = cleaned.area.sum()
268
+
269
+ for i, grid_size in enumerate(grid_sizes):
270
+ try:
271
+ cleaned = clean_overlay(
272
+ gdf.drop(columns="_poly_idx"),
273
+ cleaned,
274
+ how="update",
275
+ geom_type="polygon",
276
+ )
277
+ break
278
+ except GEOSException as e:
279
+ if i == len(grid_sizes) - 1:
280
+ explore_geosexception(
281
+ e,
282
+ gdf,
283
+ cleaned,
284
+ without_double,
285
+ isolated,
286
+ really_isolated,
287
+ logger=logger,
288
+ )
289
+ raise e
290
+ try:
291
+ cleaned = update_geometries(
292
+ cleaned, geom_type="polygon", grid_size=grid_size
293
+ )
294
+ except GEOSException:
295
+ pass
296
+
297
+ # if logger and cleaned_area_sum > cleaned.area.sum() + 1:
298
+ # print("\ncleaned.area.sum() diff", cleaned_area_sum - cleaned.area.sum())
299
+ # logger.debug("cleaned.area.sum() diff", cleaned_area_sum - cleaned.area.sum())
300
+
301
+ cleaned = sort_large_first(cleaned)
302
+
303
+ for i, grid_size in enumerate(grid_sizes):
304
+ try:
305
+ cleaned = update_geometries(
306
+ cleaned, geom_type="polygon", grid_size=grid_size
307
+ )
308
+ break
309
+ except GEOSException as e:
310
+ cleaned.geometry = shapely.simplify(
311
+ cleaned.geometry, PRECISION * (10 * i + 1)
312
+ )
313
+ if i == len(grid_sizes) - 1:
314
+ explore_geosexception(
315
+ e,
316
+ gdf,
317
+ cleaned,
318
+ without_double,
319
+ isolated,
320
+ really_isolated,
321
+ logger=logger,
322
+ )
323
+ raise e
324
+
325
+ cleaned = safe_simplify(cleaned, PRECISION)
326
+ cleaned.geometry = shapely.make_valid(cleaned.geometry)
327
+
328
+ return cleaned
329
+
330
+
331
+ def safe_simplify(gdf, tolerance: float | int) -> GeoDataFrame:
332
+ """Simplify only if the resulting area is no more than 1 percent larger.
333
+
334
+ Because simplifying can result in holes being filled.
335
+ """
336
+ length_then = gdf.length
337
+ copied = gdf.copy()
338
+ copied.geometry = shapely.make_valid(
339
+ shapely.simplify(copied.geometry.values, tolerance=tolerance)
279
340
  )
280
- explore(
281
- gdf,
282
- gdf_orig,
283
- # thin,
284
- mask,
285
- missing,
286
- mask_p=to_gdf(mask.extract_unique_points().explode()).assign(
287
- wkt=lambda x: [g.wkt for g in x.geometry]
288
- ),
289
- gdf_p=to_gdf(gdf.extract_unique_points().explode()).assign(
290
- wkt=lambda x: [g.wkt for g in x.geometry]
291
- ),
292
- center=(5.36853688, 59.01169013, 5),
341
+ copied.loc[copied.area > length_then * 1.01, copied._geometry_column_name] = (
342
+ gdf.loc[copied.area > length_then * 1.01, copied._geometry_column_name]
293
343
  )
294
- explore(
295
- gdf,
296
- # gdf_orig,
297
- missing,
298
- mask,
299
- mask_p=to_gdf(mask.extract_unique_points().explode()).assign(
300
- wkt=lambda x: [g.wkt for g in x.geometry]
301
- ),
302
- gdf_p=to_gdf(gdf.extract_unique_points().explode()).assign(
303
- wkt=lambda x: [g.wkt for g in x.geometry]
304
- ),
305
- center=(5.37142966, 59.009799, 0.01),
306
- max_zoom=40,
344
+
345
+ return copied
346
+
347
+
348
+ def remove_spikes(gdf: GeoDataFrame, tolerance: int | float) -> GeoDataFrame:
349
+ """Remove thin spikes in polygons.
350
+
351
+ Note that this function might be slow. Should only be used if nessecary.
352
+
353
+ Args:
354
+ gdf: GeoDataFrame of polygons
355
+ tolerance: distance (usually meters) used as the minimum thickness
356
+ for polygons to be eliminated. Any spike thinner than the tolerance
357
+ will be removed.
358
+
359
+ Returns:
360
+ A GeoDataFrame of polygons without spikes thinner.
361
+ """
362
+ gdf.geometry = (
363
+ PolygonsAsRings(gdf.geometry)
364
+ .apply_numpy_func(_remove_spikes, args=(tolerance,))
365
+ .to_numpy()
307
366
  )
308
- explore(
309
- gdf,
310
- # gdf_orig,
311
- missing,
312
- mask,
313
- mask_p=to_gdf(mask.extract_unique_points().explode()).assign(
314
- wkt=lambda x: [g.wkt for g in x.geometry]
315
- ),
316
- gdf_p=to_gdf(gdf.extract_unique_points().explode()).assign(
317
- wkt=lambda x: [g.wkt for g in x.geometry]
318
- ),
319
- center=(5.36866312, 59.00842846, 0.01),
320
- max_zoom=40,
367
+ return gdf
368
+
369
+
370
+ # def remove_spikes(gdf: GeoDataFrame, tolerance: int | float) -> GeoDataFrame:
371
+ # return clean_overlay(
372
+ # gdf, gdf[["geometry"]], how="intersection", grid_size=tolerance
373
+ # )
374
+
375
+
376
+ def _remove_spikes(
377
+ geoms: NDArray[LinearRing], tolerance: int | float
378
+ ) -> NDArray[LinearRing]:
379
+ if not len(geoms):
380
+ return geoms
381
+ geoms = to_geoseries(geoms).reset_index(drop=True)
382
+
383
+ points = (
384
+ extract_unique_points(geoms).explode(index_parts=False).to_frame("geometry")
321
385
  )
322
386
 
323
- explore(
324
- gdf,
325
- # gdf_orig,
326
- missing,
327
- mask,
328
- mask_p=to_gdf(mask.extract_unique_points().explode()).assign(
329
- wkt=lambda x: [g.wkt for g in x.geometry]
330
- ),
331
- gdf_p=to_gdf(gdf.extract_unique_points().explode()).assign(
332
- wkt=lambda x: [g.wkt for g in x.geometry]
333
- ),
334
- center=(5.37707146, 59.01065274, 0.4),
335
- max_zoom=40,
387
+ points = get_angle_between_indexed_points(points)
388
+
389
+ def to_buffered_rings_without_spikes(x):
390
+ polys = GeoSeries(make_valid(polygons(get_exterior_ring(x))))
391
+
392
+ return (
393
+ polys.buffer(-tolerance, resolution=BUFFER_RES)
394
+ .explode(index_parts=False)
395
+ .pipe(close_all_holes)
396
+ .pipe(get_exterior_ring)
397
+ .buffer(tolerance * 10)
398
+ )
399
+
400
+ buffered = to_buffered_rings_without_spikes(
401
+ geoms.buffer(tolerance / 2, resolution=BUFFER_RES)
336
402
  )
337
403
 
338
- explore(
339
- gdf,
340
- # gdf_orig,
341
- missing,
342
- mask,
343
- mask_p=to_gdf(mask.extract_unique_points().explode()).assign(
344
- wkt=lambda x: [g.wkt for g in x.geometry]
345
- ),
346
- gdf_p=to_gdf(gdf.extract_unique_points().explode()).assign(
347
- wkt=lambda x: [g.wkt for g in x.geometry]
348
- ),
349
- center=(-52074.0241, 6580847.4464, 0.1),
350
- max_zoom=40,
404
+ points_without_spikes = (
405
+ extract_unique_points(geoms)
406
+ .explode(index_parts=False)
407
+ .loc[lambda x: x.index.isin(sfilter(x, buffered).index)]
351
408
  )
352
409
 
353
- explore(
354
- gdf,
355
- # gdf_orig,
356
- missing,
357
- mask,
358
- mask_p=to_gdf(mask.extract_unique_points().explode()).assign(
359
- wkt=lambda x: [g.wkt for g in x.geometry]
360
- ),
361
- gdf_p=to_gdf(gdf.extract_unique_points().explode()).assign(
362
- wkt=lambda x: [g.wkt for g in x.geometry]
410
+ # linearrings require at least 4 coordinate pairs, or three unique
411
+ points_without_spikes = points_without_spikes.loc[
412
+ lambda x: x.groupby(level=0).size() >= 3
413
+ ]
414
+
415
+ # need an index from 0 to n-1 in 'linearrings'
416
+ to_int_index = {
417
+ ring_idx: i
418
+ for i, ring_idx in enumerate(sorted(set(points_without_spikes.index)))
419
+ }
420
+ int_indices = points_without_spikes.index.map(to_int_index)
421
+
422
+ as_lines = pd.Series(
423
+ linearrings(
424
+ get_coordinates(points_without_spikes.geometry.values),
425
+ indices=int_indices,
363
426
  ),
364
- center=(5.38389153, 59.00548223, 1),
365
- max_zoom=40,
427
+ index=points_without_spikes.index.unique(),
366
428
  )
367
429
 
368
- # explore(
369
- # gdf_orig,
370
- # gdf,
371
- # dups=get_intersections(gdf, geom_type="polygon"),
372
- # msk=mask,
373
- # gaps=get_gaps(gdf),
374
- # updated=update_geometries(gdf, geom_type="polygon"),
375
- # # browser=False,
376
- # )
377
-
378
- # gdf = update_geometries(gdf, geom_type="polygon")
379
-
380
- return gdf # .pipe(clean_clip, mask, geom_type="polygon")
381
-
382
-
383
- # @numba.njit
384
- def _snap_to_anchors(
385
- geoms,
386
- indices: NDArray[np.int32],
387
- anchors,
388
- anchor_indices,
389
- mask,
390
- mask_indices,
391
- was_midpoint,
392
- was_midpoint_mask,
393
- tolerance: int | float,
394
- ) -> tuple[NDArray, NDArray, NDArray]:
395
-
396
- coords, all_distances = _snap_to_anchors_inner(
397
- geoms,
398
- indices,
399
- anchors,
400
- anchor_indices,
401
- mask,
402
- mask_indices,
403
- was_midpoint,
404
- was_midpoint_mask,
405
- tolerance,
430
+ # the missing polygons are thin and/or spiky. Let's remove them
431
+ missing = geoms.loc[~geoms.index.isin(as_lines.index)]
432
+
433
+ missing = pd.Series(
434
+ [None] * len(missing),
435
+ index=missing.index.values,
406
436
  )
407
437
 
408
- not_inf = coords[:, 0] != np.inf
409
- all_distances = all_distances[not_inf]
410
- indices = indices[not_inf]
411
- coords = coords[not_inf]
412
-
413
- is_snapped = np.full(len(coords), False)
414
-
415
- n_coords = len(coords)
416
-
417
- range_indices = np.arange(len(coords))
418
-
419
- range_index = -1
420
- for index in np.unique(indices):
421
- cond = indices == index
422
- these_coords = coords[cond]
423
-
424
- # explore(ll=to_gdf(LineString(shapely.points(these_coords)), 25833))
425
-
426
- # assert np.array_equal(these_coords[0], these_coords[-1]), these_coords
427
-
428
- these_range_indices = range_indices[cond]
429
- these_distances = all_distances[cond]
430
- for i in range(len(these_coords)):
431
- range_index += 1
432
- if is_snapped[range_index]:
433
- print(i, "000")
434
- continue
435
- # distances = all_distances[range_index]
436
- distances = these_distances[i]
437
- # distances = these_distances[:, i]
438
- min_dist = np.min(distances)
439
- if min_dist > tolerance: # or min_dist == 0:
440
- print(i, "111", min_dist)
441
- continue
442
-
443
- is_snapped_now = False
444
-
445
- for j in np.argsort(distances):
446
- if distances[j] > tolerance: # TODO or distances[j] == 0:
447
- break
448
-
449
- if was_midpoint_mask[j]:
450
- continue
451
-
452
- anchor = anchors[j]
453
- ring = these_coords.copy()
454
- ring[i] = anchor
455
-
456
- # snap the nexts points to same anchor if neighboring points have same anchor
457
- # in order to properly check if the ring will be simple after snapping
458
- indices_with_same_anchor = [range_index]
459
- # these_coords = coords[indices==index]
460
-
461
- pos_counter = 0
462
- # has_same_anchor_pos = True
463
- # has_same_anchor_neg = True
464
- while (
465
- pos_counter + i < len(these_distances) - 1
466
- ): # has_same_anchor_pos or has_same_anchor_neg:
467
- pos_counter += 1
468
-
469
- # if indices[i + pos_counter] != index:
470
- # break
471
- # next_distances = all_distances[range_index + pos_counter]
472
- next_distances = these_distances[i + pos_counter]
473
- has_same_anchor_pos = False
474
- for j2 in np.argsort(next_distances):
475
- if was_midpoint_mask[j2]:
476
- continue
477
- if next_distances[j2] > tolerance:
478
- break
479
-
480
- has_same_anchor_pos = j2 == j
481
- # print(
482
- # "pos c",
483
- # i,
484
- # j,
485
- # j2,
486
- # pos_counter,
487
- # has_same_anchor_pos,
488
- # distances[j],
489
- # next_distances[j2],
490
- # )
491
- break
492
- if has_same_anchor_pos:
493
- ring[i + pos_counter] = anchor
494
- indices_with_same_anchor.append(range_index + pos_counter)
495
- else:
496
- break
497
-
498
- # for j4 in np.arange(
499
- # indices_with_same_anchor[0], indices_with_same_anchor[-1]
500
- # ):
501
- # ring[j4 - range_index + i] = anchor
502
- # indices_with_same_anchor.append(j4)
503
-
504
- if i == 0:
505
- # snap points at the end of the line if same anchor
506
- neg_counter = 0
507
- # has_same_anchor_neg = True
508
- while True: # has_same_anchor_pos or has_same_anchor_neg:
509
- neg_counter -= 1
510
-
511
- # if indices[i + pos_counter] != index:
512
- # break
513
- this_range_index = these_range_indices[neg_counter]
514
- # next_distances = all_distances[this_range_index]
515
- next_distances = these_distances[neg_counter]
516
- has_same_anchor_neg = False
517
- for j3 in np.argsort(next_distances):
518
- if was_midpoint_mask[j3]:
519
- continue
520
- if next_distances[j3] > tolerance:
521
- break
522
-
523
- has_same_anchor_neg = j3 == j
524
- # print(
525
- # "neg c",
526
- # i,
527
- # j,
528
- # j3,
529
- # pos_counter,
530
- # # has_same_anchor,
531
- # distances[j],
532
- # next_distances[j3],
533
- # )
534
- break
535
- if has_same_anchor_neg:
536
- ring[neg_counter] = anchor
537
- indices_with_same_anchor.append(this_range_index)
538
- else:
539
- break
540
-
541
- # for j5 in np.arange(0, indices_with_same_anchor[-1]):
542
- # ring[j5 - range_index + i] = anchor
543
- # indices_with_same_anchor.append(j5)
544
-
545
- indices_with_same_anchor = np.unique(indices_with_same_anchor)
546
-
547
- line_is_simple: bool = LineString(ring).is_simple
548
-
549
- # if i in [67, 68, 69, 173, 174, 175, 176, 177]: # or
550
- if Point(these_coords[i]).intersects(
551
- to_gdf([12.08375303, 67.50052183], 4326)
552
- .to_crs(25833)
553
- .buffer(10)
554
- .union_all()
555
- ):
556
- # for xxx, yyy in locals().items():
557
- # if len(str(yyy)) > 50:
558
- # continue
559
- # print(xxx)
560
- # print(yyy)
561
-
562
- # print("prev:", was_midpoint_mask[j - 1])
563
- # print(distances[np.argsort(distances)])
564
- # print(anchors[np.argsort(distances)])
565
- # print(ring)
566
- explore(
567
- out_coords=to_gdf(
568
- shapely.linestrings(coords, indices=indices), 25833
569
- ),
570
- llll=to_gdf(LineString(ring), 25833),
571
- # this=to_gdf(this),
572
- # next_=to_gdf(next_),
573
- # line=to_gdf(LineString(np.array([this, next_])), 25833),
574
- geom=to_gdf(these_coords[i], 25833),
575
- prev=to_gdf(these_coords[i - 1], 25833),
576
- nxt=to_gdf(these_coords[i + 1], 25833),
577
- nxt2=to_gdf(these_coords[i + 2], 25833),
578
- anchor=to_gdf(anchor, 25833),
579
- # browser=True,
580
- )
581
-
582
- # print(
583
- # "line_is_simple", line_is_simple, range_index, i, index, j
584
- # ) # , j2, j3, x)
585
-
586
- if not line_is_simple:
587
- # for j4 in range(len(ring)):
588
- # this_p = ring[j4]
589
- # for j5 in range(len(ring)):
590
- # that_p = ring[j5]
591
- # dist_ = np.sqrt(
592
- # (this_p[0] - that_p[0]) ** 2
593
- # + (this_p[1] - that_p[1]) ** 2
594
- # )
595
- # if dist_ > 0 and dist_ < 1e-5:
596
- # print(this_p)
597
- # print(that_p)
598
- # ring[j5] = this_p
599
-
600
- print(LineString(ring).wkt)
601
- # explore(
602
- # out_coords=to_gdf(
603
- # shapely.linestrings(coords, indices=indices), 25833
604
- # ),
605
- # llll=to_gdf(LineString(ring), 25833),
606
- # # this=to_gdf(this),
607
- # # next_=to_gdf(next_),
608
- # # line=to_gdf(LineString(np.array([this, next_])), 25833),
609
- # geom=to_gdf(these_coords[i], 25833),
610
- # prev=to_gdf(these_coords[i - 1], 25833),
611
- # nxt=to_gdf(these_coords[i + 1], 25833),
612
- # nxt2=to_gdf(these_coords[i + 2], 25833),
613
- # anchor=to_gdf(anchor, 25833),
614
- # # browser=True,
615
- # )
616
-
617
- line_is_simple: bool = LineString(ring).is_simple
618
-
619
- if line_is_simple:
620
- # coords[i] = anchors[j]
621
- # is_snapped_to[j] = True
622
- # is_snapped[i] = True
623
- # explore(
624
- # out_coords=to_gdf(
625
- # shapely.linestrings(coords, indices=indices), 25833
626
- # ),
627
- # llll=to_gdf(LineString(ring), 25833),
628
- # # this=to_gdf(this),
629
- # # next_=to_gdf(next_),
630
- # # line=to_gdf(LineString(np.array([this, next_])), 25833),
631
- # anc=to_gdf(anchors[j]),
632
- # geom=to_gdf(coords[i], 25833),
633
- # these=to_gdf(coords[i : i + n_points_with_same_anchor ], 25833),
634
- # prev=to_gdf(coords[i - 1], 25833),
635
- # prev2=to_gdf(coords[i - 2], 25833),
636
- # nxt=to_gdf(coords[i + n_points_with_same_anchor + 1], 25833),
637
- # nxt2=to_gdf(coords[i + n_points_with_same_anchor + 2], 25833),
638
- # nxt3=to_gdf(coords[i + n_points_with_same_anchor + 3], 25833),
639
- # )
640
- # print(coords[i : i + n_points_with_same_anchor + 1])
641
- for (
642
- x
643
- ) in indices_with_same_anchor: # range(n_points_with_same_anchor):
644
- # print(range_index, i, index, j, j2, j3, x)
645
- coords[x] = anchor # s[j]
646
- is_snapped[x] = True
647
- # coords[i + x] = anchors[j]
648
- # is_snapped[i + x] = True
649
- # print(coords[i : i + n_points_with_same_anchor + 1])
650
-
651
- is_snapped_now = True
652
- break
653
- # else:
654
-
655
- if not is_snapped_now:
656
- coords[range_index] = anchors[np.argmin(distances)]
657
- # is_snapped_to[np.argmin(distances)] = True
658
-
659
- if 0 and index == 0: # i > 30 and i < 40:
660
- print(i)
661
- explore(
662
- out_coords=to_gdf(
663
- shapely.linestrings(coords, indices=indices), 25833
664
- ),
665
- llll=to_gdf(LineString(ring), 25833),
666
- pppp=to_gdf(shapely.points(ring), 25833).assign(
667
- wkt=lambda x: [g.wkt for g in x.geometry]
668
- ),
669
- # this=to_gdf(this),
670
- # next_=to_gdf(next_),
671
- # line=to_gdf(LineString(np.array([this, next_])), 25833),
672
- anc=to_gdf(anchors[j]).assign(
673
- wkt=lambda x: [g.wkt for g in x.geometry]
674
- ),
675
- geom=to_gdf(these_coords[i], 25833).assign(
676
- wkt=lambda x: [g.wkt for g in x.geometry]
677
- ),
678
- # these=to_gdf(
679
- # these_coords[i : i + n_points_with_same_anchor], 25833
680
- # ).assign(wkt=lambda x: [g.wkt for g in x.geometry]),
681
- prev=to_gdf(these_coords[i - 1], 25833).assign(
682
- wkt=lambda x: [g.wkt for g in x.geometry]
683
- ),
684
- prev2=to_gdf(these_coords[i - 2], 25833).assign(
685
- wkt=lambda x: [g.wkt for g in x.geometry]
686
- ),
687
- nxt=to_gdf(these_coords[i + 1], 25833).assign(
688
- wkt=lambda x: [g.wkt for g in x.geometry]
689
- ),
690
- nxt2=to_gdf(these_coords[i + 2], 25833).assign(
691
- wkt=lambda x: [g.wkt for g in x.geometry]
692
- ),
693
- nxt3=to_gdf(these_coords[i + 3], 25833).assign(
694
- wkt=lambda x: [g.wkt for g in x.geometry]
695
- ),
696
- # browser=True,
697
- # nxt_n=to_gdf(
698
- # coords[i + n_points_with_same_anchor + 1], 25833
699
- # ).assign(wkt=lambda x: [g.wkt for g in x.geometry]),
700
- # nxt_n2=to_gdf(
701
- # coords[i + n_points_with_same_anchor + 2], 25833
702
- # ).assign(wkt=lambda x: [g.wkt for g in x.geometry]),
703
- # nxt_n3=to_gdf(
704
- # coords[i + n_points_with_same_anchor + 3], 25833
705
- # ).assign(wkt=lambda x: [g.wkt for g in x.geometry]),
706
- )
707
- # if (
708
- # indices[i] == 48
709
- # ): # and int(out_coords[i][0]) == 375502 and int(out_coords[i][1]) == 7490104:
710
- # print(geom, out_coords[i], out_coords[-3:])
711
- # xxx += 1
712
- # if xxx > 100 and i >= 2106:
713
- # print(locals())
714
- # explore(
715
- # geom=to_gdf(geom, 25833),
716
- # out=to_gdf(out_coords[i], 25833),
717
- # anc=to_gdf(shapely.points(anchors), 25833),
718
- # llll=to_gdf(
719
- # shapely.geometry.LineString(
720
- # np.array(out_coords)[indices[: len(out_coords)] == 48]
721
- # ),
722
- # 25833,
723
- # ),
724
- # )
725
-
726
- return coords, indices
727
-
728
-
729
- @numba.njit
730
- def _snap_to_anchors_inner(
731
- geoms,
732
- indices: NDArray[np.int32],
733
- anchors,
734
- anchor_indices,
735
- mask,
736
- mask_indices,
737
- was_midpoint,
738
- was_midpoint_mask,
739
- tolerance: int | float,
740
- ) -> tuple[NDArray, NDArray, NDArray]:
741
- # def orientation(p, q, r):
742
- # # Calculate orientation of the triplet (p, q, r).
743
- # # 0 -> collinear, 1 -> clockwise, 2 -> counterclockwise
744
- # val = (q[1] - p[1]) * (r[0] - q[0]) - (q[0] - p[0]) * (r[1] - q[1])
745
- # if val == 0:
746
- # return 0
747
- # return 1 if val > 0 else 2
748
-
749
- # def on_segment(p, q, r):
750
- # # Check if point q lies on line segment pr
751
- # if min(p[0], r[0]) <= q[0] <= max(p[0], r[0]) and min(p[1], r[1]) <= q[
752
- # 1
753
- # ] <= max(p[1], r[1]):
754
- # return True
755
- # return False
756
-
757
- # def check_intersection(line1, line2):
758
- # """
759
- # Check if two line segments intersect.
760
-
761
- # Parameters:
762
- # line1 : np.array : 2x2 array with endpoints of the first line segment [[x1, y1], [x2, y2]]
763
- # line2 : np.array : 2x2 array with endpoints of the second line segment [[x3, y3], [x4, y4]]
764
-
765
- # Returns:
766
- # bool : True if the lines intersect, False otherwise.
767
- # """
768
-
769
- # p1, q1 = line1
770
- # p2, q2 = line2
771
-
772
- # # Find the four orientations needed for the general and special cases
773
- # o1 = orientation(p1, q1, p2)
774
- # o2 = orientation(p1, q1, q2)
775
- # o3 = orientation(p2, q2, p1)
776
- # o4 = orientation(p2, q2, q1)
777
-
778
- # # General case
779
- # if o1 != o2 and o3 != o4:
780
- # return True
781
-
782
- # # Special cases
783
- # # p1, q1, p2 are collinear and p2 lies on segment p1q1
784
- # if o1 == 0 and on_segment(p1, p2, q1):
785
- # return True
786
-
787
- # # p1, q1, q2 are collinear and q2 lies on segment p1q1
788
- # if o2 == 0 and on_segment(p1, q2, q1):
789
- # return True
790
-
791
- # # p2, q2, p1 are collinear and p1 lies on segment p2q2
792
- # if o3 == 0 and on_segment(p2, p1, q2):
793
- # return True
794
-
795
- # # p2, q2, q1 are collinear and q1 lies on segment p2q2
796
- # if o4 == 0 and on_segment(p2, q1, q2):
797
- # return True
798
-
799
- # return False
800
-
801
- out_coords = geoms.copy()
802
- # is_snapped = np.full(len(geoms), False)
803
-
804
- n_anchors = len(anchors)
805
- mask_n_minus_1 = len(mask) - 1
806
- is_snapped_to = np.full(len(anchors), False)
807
- out_distances = np.full((len(geoms), n_anchors), tolerance * 3)
808
-
809
- for i in range(len(geoms)):
810
- # if is_snapped[i]:
811
- # continue
812
- geom = geoms[i]
813
- index = indices[i]
814
- # if i == 0 or index != indices[i - 1]:
815
- # i_for_this_index = 0
816
- # else:
817
- # i_for_this_index += 1
818
-
819
- is_snapped = False
820
- for j in range(len(mask)):
821
- mask_index = mask_indices[j]
822
-
823
- is_last = j == mask_n_minus_1 or mask_index != mask_indices[j + 1]
824
- if is_last:
825
- continue
826
-
827
- mask_point0 = mask[j]
828
-
829
- # if (
830
- # not mask_is_snapped_to[j]
831
- # and np.sqrt(
832
- # (geom[0] - mask_point0[0]) ** 2 + (geom[1] - mask_point0[1]) ** 2
833
- # )
834
- # <= tolerance
835
- # ):
836
- # out_coords[i] = mask_point0
837
- # mask_is_snapped_to[j] = True
838
- # is_snapped = True
839
- # break
840
-
841
- mask_point1 = mask[j + 1]
842
-
843
- segment_vector = mask_point1 - mask_point0
844
- point_vector = geom - mask_point0
845
- segment_length_squared = np.dot(segment_vector, segment_vector)
846
- if segment_length_squared == 0:
847
- closest_point = mask_point0
848
- else:
849
- factor = np.dot(point_vector, segment_vector) / segment_length_squared
850
- factor = max(0, min(1, factor))
851
- closest_point = mask_point0 + factor * segment_vector
852
-
853
- if np.linalg.norm(geom - closest_point) == 0 and was_midpoint[i]:
854
- out_coords[i] = np.array([np.inf, np.inf])
855
- is_snapped = True
856
- break
438
+ return pd.concat([as_lines, missing]).sort_index()
857
439
 
858
- if is_snapped:
859
- continue
860
440
 
861
- distances = np.full(n_anchors, tolerance * 3)
862
- for j2 in range(n_anchors):
863
- anchor = anchors[j2]
441
+ def get_angle_between_indexed_points(point_df: GeoDataFrame) -> GeoDataFrame:
442
+ """Get angle difference between the two lines."""
443
+ point_df["next"] = point_df.groupby(level=0)["geometry"].shift(-1)
864
444
 
865
- # if anchor_indices[j] == index:
866
- # continue
445
+ notna = point_df["next"].notna()
867
446
 
868
- dist = np.sqrt((geom[0] - anchor[0]) ** 2 + (geom[1] - anchor[1]) ** 2)
869
- distances[j2] = dist
870
- out_distances[i, j2] = dist
871
- if dist == 0 and not was_midpoint_mask[j2]:
872
- break
447
+ this = coordinate_array(point_df.loc[notna, "geometry"].values)
448
+ next_ = coordinate_array(point_df.loc[notna, "next"].values)
873
449
 
874
- return out_coords, out_distances
450
+ point_df.loc[notna, "angle"] = get_angle(this, next_)
451
+ point_df["prev_angle"] = point_df.groupby(level=0)["angle"].shift(1)
875
452
 
453
+ point_df["angle_diff"] = point_df["angle"] - point_df["prev_angle"]
876
454
 
877
- @numba.njit
878
- def _build_anchors(
879
- geoms: NDArray[np.float64],
880
- indices: NDArray[np.int32],
881
- mask_coords: NDArray[np.float64],
882
- mask_indices: NDArray[np.int32],
883
- was_midpoint_mask: NDArray[bool],
884
- tolerance: int | float,
885
- ):
886
- anchors = list(mask_coords)
887
- anchor_indices = list(mask_indices)
888
- is_anchor_arr = np.full(len(geoms), False)
889
- was_midpoint_mask = list(was_midpoint_mask)
890
- for i in np.arange(len(geoms)):
891
- geom = geoms[i]
892
- index = indices[i]
893
- # distances = []
894
- # for j, anchor in zip(anchor_indices, anchors):
895
-
896
- is_anchor = True
897
- for j in range(len(anchors)):
898
- # if indices[i] != indices[j]:
899
- # if i != j and indices[i] != indices[j]:
900
- anchor = anchors[j]
901
- dist = np.sqrt((geom[0] - anchor[0]) ** 2 + (geom[1] - anchor[1]) ** 2)
902
- if dist <= tolerance:
903
- is_anchor = False
904
- break
905
- # distances.append(dist)
906
- # distances = np.array(distances)
907
- is_anchor_arr[i] = is_anchor
908
- if is_anchor: # not len(distances) or np.min(distances) > tolerance:
909
- anchors.append(geom)
910
- anchor_indices.append(index)
911
- was_midpoint_mask.append(True)
912
- return anchors, anchor_indices, is_anchor_arr, was_midpoint_mask
913
-
914
-
915
- @numba.njit
916
- def _add_last_points_to_end(
917
- coords: NDArray[np.float64],
918
- indices: NDArray[np.int32],
919
- ) -> tuple[
920
- NDArray[np.float64],
921
- NDArray[np.int32],
922
- ]:
923
- out_coords, out_indices = [coords[0]], [indices[0]]
924
- last_coords = []
925
- prev = coords[0]
926
- first_coords = prev
927
- n_minus_1 = len(coords) - 1
928
- for i in np.arange(1, len(coords)):
929
- idx = indices[i]
930
- xy = coords[i]
931
- distance_to_prev: float = np.sqrt(
932
- (xy[0] - prev[0]) ** 2 + (xy[1] - prev[1]) ** 2
933
- )
934
- if idx != indices[i - 1]:
935
- first_coords = xy
936
- out_coords.append(xy)
937
- out_indices.append(idx)
938
- elif not distance_to_prev:
939
- if i == n_minus_1 or idx != indices[i + 1]:
940
- last_coords.append(xy)
941
- prev = xy
942
- continue
943
- elif i == n_minus_1 or idx != indices[i + 1]:
944
- out_coords.append(xy)
945
- out_coords.append(first_coords)
946
- out_indices.append(idx)
947
- out_indices.append(idx)
948
- last_coords.append(xy)
949
- else:
950
- out_coords.append(xy)
951
- out_indices.append(idx)
952
-
953
- prev = xy
954
-
955
- return (out_coords, out_indices)
956
-
957
-
958
- @numba.njit
959
- def _add_last_points_to_end_with_third_arr(
960
- coords: NDArray[np.float64],
961
- indices: NDArray[np.int32],
962
- third_arr: NDArray[Any],
963
- ) -> tuple[
964
- NDArray[np.float64],
965
- NDArray[np.int32],
966
- NDArray[Any],
967
- ]:
968
- out_coords, out_indices, out_third_arr = [coords[0]], [indices[0]], [third_arr[0]]
969
- last_coords = []
970
- prev = coords[0]
971
- first_coords = prev
972
- n_minus_1 = len(coords) - 1
973
- for i in np.arange(1, len(coords)):
974
- idx = indices[i]
975
- xy = coords[i]
976
- distance_to_prev: float = np.sqrt(
977
- (xy[0] - prev[0]) ** 2 + (xy[1] - prev[1]) ** 2
978
- )
979
- if idx != indices[i - 1]:
980
- first_coords = xy
981
- out_coords.append(xy)
982
- out_indices.append(idx)
983
- out_third_arr.append(third_arr[i])
984
- elif not distance_to_prev:
985
- if i == n_minus_1 or idx != indices[i + 1]:
986
- last_coords.append(xy)
987
- prev = xy
988
- continue
989
- elif i == n_minus_1 or idx != indices[i + 1]:
990
- out_coords.append(xy)
991
- out_coords.append(first_coords)
992
- out_indices.append(idx)
993
- out_indices.append(idx)
994
- last_coords.append(xy)
995
- out_third_arr.append(third_arr[i])
996
- out_third_arr.append(third_arr[i])
997
- else:
998
- out_coords.append(xy)
999
- out_indices.append(idx)
1000
- out_third_arr.append(third_arr[i])
1001
-
1002
- prev = xy
1003
-
1004
- return (out_coords, out_indices, out_third_arr)
1005
-
1006
-
1007
- @numba.njit
1008
- def _remove_duplicate_points(
1009
- coords: NDArray[np.float64],
1010
- indices: NDArray[np.int32],
1011
- third_arr: NDArray[Any],
1012
- ):
1013
- out_coords, out_indices, out_third_arr = [coords[0]], [indices[0]], [third_arr[0]]
1014
- prev = coords[0]
1015
- for i in np.arange(1, len(coords)):
1016
- idx = indices[i]
1017
- xy = coords[i]
1018
- distance_to_prev: float = np.sqrt(
1019
- (xy[0] - prev[0]) ** 2 + (xy[1] - prev[1]) ** 2
1020
- )
1021
- if not distance_to_prev and idx == indices[i - 1]:
1022
- prev = xy
1023
- continue
455
+ return point_df
1024
456
 
1025
- if idx != indices[i - 1]:
1026
- out_coords.append(xy)
1027
- out_indices.append(idx)
1028
- out_third_arr.append(third_arr[i])
1029
- prev = xy
1030
- continue
1031
457
 
1032
- out_coords.append(xy)
1033
- out_indices.append(idx)
1034
- out_third_arr.append(third_arr[i])
1035
- prev = xy
458
+ def _properly_fix_duplicates(gdf, double, slivers, thin_gaps_and_double, tolerance):
459
+ gdf = _dissolve_thick_double_and_update(gdf, double, thin_gaps_and_double)
460
+ gdf, more_slivers = split_out_slivers(gdf, tolerance)
461
+ slivers = pd.concat([slivers, more_slivers], ignore_index=True)
462
+ gaps = get_gaps(gdf, include_interiors=True)
463
+ gaps["_was_gap"] = 1
464
+ assert "_double_idx" not in gaps
465
+ double = get_intersections(gdf)
466
+ double["_double_idx"] = range(len(double))
467
+ thin_gaps_and_double = pd.concat([gaps, double], ignore_index=True).loc[
468
+ lambda x: x.buffer(-tolerance / 2).is_empty
469
+ ]
1036
470
 
1037
- return out_coords, out_indices, out_third_arr
471
+ return gdf, thin_gaps_and_double, slivers
1038
472
 
1039
473
 
1040
- def _snap_linearrings(
1041
- geoms: NDArray[LinearRing],
1042
- tolerance: int | float,
1043
- mask: GeoDataFrame | None,
1044
- snap_to_anchors: bool = True,
1045
- ):
1046
- if not len(geoms):
1047
- return geoms
474
+ def _dissolve_thick_double_and_update(gdf, double, thin_double):
475
+ large = (
476
+ double.loc[~double["_double_idx"].isin(thin_double["_double_idx"])]
477
+ .drop(columns="_double_idx")
478
+ # .pipe(sort_large_first)
479
+ .sort_values("_poly_idx")
480
+ .pipe(update_geometries, geom_type="polygon")
481
+ )
482
+ return (
483
+ clean_overlay(gdf, large, how="update")
484
+ # .pipe(sort_large_first)
485
+ .sort_values("_poly_idx").pipe(update_geometries, geom_type="polygon")
486
+ )
1048
487
 
1049
- points = GeoDataFrame(
1050
- {
1051
- "geometry": extract_unique_points(geoms),
1052
- "_geom_idx": np.arange(len(geoms)),
1053
- }
1054
- ).explode(ignore_index=True)
1055
- coords = get_coordinates(points.geometry.values)
1056
- indices = points["_geom_idx"].values
1057
-
1058
- if mask is not None:
1059
- mask_coords, mask_indices = get_coordinates(
1060
- mask.geometry.values, return_index=True
1061
- )
1062
- is_anchor = np.full(len(mask_coords), False)
1063
488
 
1064
- mask_coords, mask_indices, is_anchor = _remove_duplicate_points(
1065
- mask_coords, mask_indices, is_anchor
489
+ def _cleaning_checks(gdf, tolerance, duplicate_action): # , spike_action):
490
+ if not len(gdf) or not tolerance:
491
+ return gdf
492
+ if tolerance < PRECISION:
493
+ raise ValueError(
494
+ f"'tolerance' must be larger than {PRECISION} to avoid "
495
+ "problems with floating point precision."
1066
496
  )
1067
- mask_coords, mask_indices = _add_last_points_to_end(mask_coords, mask_indices)
1068
- mask_coords = np.array(mask_coords)
1069
- mask_indices = np.array(mask_indices)
497
+ if duplicate_action not in ["fix", "error", "ignore"]:
498
+ raise ValueError("duplicate_action must be 'fix', 'error' or 'ignore'")
499
+
500
+
501
+ def split_out_slivers(
502
+ gdf: GeoDataFrame | GeoSeries, tolerance: float | int
503
+ ) -> tuple[GeoDataFrame, GeoDataFrame] | tuple[GeoSeries, GeoSeries]:
504
+ is_sliver = gdf.buffer(-tolerance / 2).is_empty
505
+ slivers = gdf.loc[is_sliver]
506
+ gdf = gdf.loc[~is_sliver]
507
+ return gdf, slivers
508
+
509
+
510
+ def try_for_grid_size(
511
+ func,
512
+ grid_sizes: tuple[None, float | int],
513
+ args: tuple | None = None,
514
+ kwargs: dict | None = None,
515
+ ) -> Any:
516
+ if args is None:
517
+ args = ()
518
+ if kwargs is None:
519
+ kwargs = {}
520
+ for i, grid_size in enumerate(grid_sizes):
521
+ try:
522
+ return func(*args, grid_size=grid_size, **kwargs)
523
+ except GEOSException as e:
524
+ if i == len(grid_sizes) - 1:
525
+ raise e
1070
526
 
1071
- is_anchor = np.full(len(mask_coords), False)
1072
- mask_coords, mask_indices, is_anchor = _remove_duplicate_points(
1073
- mask_coords, mask_indices, is_anchor
1074
- )
1075
- mask_coords = np.array(mask_coords)
1076
- mask_indices = np.array(mask_indices)
1077
527
 
1078
- original_mask_buffered = shapely.buffer(
1079
- shapely.linearrings(mask_coords, indices=mask_indices),
1080
- tolerance * 1.1,
1081
- )
1082
- mask_coords, mask_indices, was_midpoint_mask, _ = (
1083
- _add_midpoints_to_segments_numba(
1084
- mask_coords,
1085
- mask_indices,
1086
- get_coordinates(
1087
- sfilter(
1088
- points.geometry.drop_duplicates(),
1089
- original_mask_buffered,
1090
- )
1091
- ),
1092
- tolerance * 1.1,
1093
- )
1094
- )
528
+ def split_and_eliminate_by_longest(
529
+ gdf: GeoDataFrame | list[GeoDataFrame],
530
+ to_eliminate: GeoDataFrame,
531
+ tolerance: int | float,
532
+ grid_sizes: tuple[None | float | int] = (None,),
533
+ logger=None,
534
+ **kwargs,
535
+ ) -> GeoDataFrame | tuple[GeoDataFrame]:
536
+ if not len(to_eliminate):
537
+ return gdf
1095
538
 
1096
- mask_coords = np.array(mask_coords)
1097
- mask_indices = np.array(mask_indices)
1098
- mask_indices = (mask_indices + 1) * -1
1099
-
1100
- is_anchor = np.full(len(coords), False)
1101
- coords, indices, is_anchor = _remove_duplicate_points(coords, indices, is_anchor)
1102
-
1103
- coords, indices = _add_last_points_to_end(coords, indices)
1104
- coords = np.array(coords)
1105
- indices = np.array(indices)
1106
-
1107
- is_anchor = np.full(len(coords), False)
1108
- coords, indices, is_anchor = _remove_duplicate_points(coords, indices, is_anchor)
1109
- coords = np.array(coords)
1110
- indices = np.array(indices)
1111
-
1112
- # if 0:
1113
- # coords, indices, was_midpoint, _ = _add_midpoints_to_segments_numba(
1114
- # coords,
1115
- # indices,
1116
- # mask_coords,
1117
- # tolerance * 1.1, # + PRECISION * 100,
1118
- # )
1119
-
1120
- # was_midpoint = np.array(was_midpoint)
1121
-
1122
- # coords, is_snapped_to = _snap_to_anchors(
1123
- # coords,
1124
- # indices,
1125
- # mask_coords,
1126
- # mask_indices,
1127
- # mask_coords,
1128
- # mask_indices,
1129
- # was_midpoint,
1130
- # was_midpoint_mask,
1131
- # tolerance + PRECISION * 20,
1132
- # )
1133
- # indices = np.array(indices)
1134
- # coords = np.array(coords)
1135
-
1136
- # indices = indices[coords[:, 0] != np.inf]
1137
- # coords = coords[coords[:, 0] != np.inf]
1138
-
1139
- if snap_to_anchors:
1140
- if mask is None:
1141
- mask_coords = [coords[0]]
1142
- mask_indices = [indices[0]]
1143
- was_midpoint_mask = [False]
1144
- anchors, anchor_indices, is_anchor, was_midpoint_anchors = _build_anchors(
1145
- coords,
1146
- indices,
1147
- mask_coords,
1148
- mask_indices,
1149
- was_midpoint_mask,
1150
- tolerance + PRECISION, # * 100
1151
- )
1152
- anchors = np.array(anchors)
1153
- anchor_indices = np.array(anchor_indices)
539
+ if not isinstance(gdf, (GeoDataFrame, GeoSeries)):
540
+ as_gdf = pd.concat(gdf, ignore_index=True)
541
+ else:
542
+ as_gdf = gdf
543
+
544
+ splitted = try_for_grid_size(
545
+ split_by_neighbors,
546
+ grid_sizes=grid_sizes,
547
+ args=(to_eliminate, as_gdf, tolerance),
548
+ ).pipe(sort_small_first)
549
+
550
+ splitted = try_for_grid_size(
551
+ update_geometries,
552
+ grid_sizes=grid_sizes,
553
+ args=(splitted,),
554
+ kwargs=dict(geom_type="polygon"),
555
+ )
1154
556
 
1155
- # anchors = np.round(anchors, 3)
557
+ gdf = try_for_grid_size(
558
+ eliminate_by_longest,
559
+ grid_sizes=grid_sizes,
560
+ args=(
561
+ gdf,
562
+ splitted,
563
+ ),
564
+ kwargs=kwargs,
565
+ )
1156
566
 
567
+ if not isinstance(gdf, (GeoDataFrame, GeoSeries)):
568
+ as_gdf = pd.concat(gdf, ignore_index=True)
1157
569
  else:
1158
- anchors, anchor_indices, was_midpoint_anchors = (
1159
- mask_coords,
1160
- mask_indices,
1161
- was_midpoint_mask,
1162
- )
570
+ as_gdf = gdf
571
+
572
+ missing = try_for_grid_size(
573
+ clean_overlay,
574
+ grid_sizes=grid_sizes,
575
+ args=(
576
+ to_eliminate,
577
+ as_gdf,
578
+ ),
579
+ kwargs=dict(
580
+ how="difference",
581
+ geom_type="polygon",
582
+ ),
583
+ ).pipe(dissexp_by_cluster)
1163
584
 
1164
- coords, indices, was_midpoint, _ = _add_midpoints_to_segments_numba(
1165
- coords,
1166
- indices,
1167
- anchors,
1168
- tolerance * 1.1,
585
+ return try_for_grid_size(
586
+ eliminate_by_longest, grid_sizes=grid_sizes, args=(gdf, missing), kwargs=kwargs
1169
587
  )
1170
588
 
1171
- was_midpoint = np.array(was_midpoint)
1172
589
 
1173
- coords_up_here000 = (
1174
- pd.Series(_coords_to_rings(np.array(coords), np.array(indices), geoms))
1175
- .loc[lambda x: x.notna()]
1176
- .values
1177
- )
1178
- coords_up_here000 = to_gdf(polygons(coords_up_here000), 25833)
590
+ def split_by_neighbors(df, split_by, tolerance, grid_size=None) -> GeoDataFrame:
591
+ if not len(df):
592
+ return df
1179
593
 
1180
- coords, indices, was_midpoint = _add_last_points_to_end_with_third_arr(
1181
- coords, indices, was_midpoint
1182
- )
594
+ split_by = split_by.copy()
595
+ split_by.geometry = shapely.simplify(split_by.geometry, tolerance)
1183
596
 
1184
- coords, indices, was_midpoint = _remove_duplicate_points(
1185
- coords, indices, was_midpoint
597
+ intersecting_lines = (
598
+ clean_overlay(
599
+ to_lines(split_by), buff(df, tolerance), how="identity", grid_size=grid_size
600
+ )
601
+ .pipe(get_line_segments)
602
+ .reset_index(drop=True)
1186
603
  )
1187
604
 
1188
- coords = np.array(coords)
1189
- indices = np.array(indices)
1190
- was_midpoint = np.array(was_midpoint)
605
+ endpoints = intersecting_lines.boundary.explode(index_parts=False)
1191
606
 
1192
- coords_up_here = (
1193
- pd.Series(_coords_to_rings(coords, indices, geoms))
1194
- .loc[lambda x: x.notna()]
1195
- .values
607
+ extended_lines = GeoDataFrame(
608
+ {
609
+ "geometry": extend_lines(
610
+ endpoints.loc[lambda x: ~x.index.duplicated(keep="first")].values,
611
+ endpoints.loc[lambda x: ~x.index.duplicated(keep="last")].values,
612
+ distance=tolerance * 3,
613
+ )
614
+ },
615
+ crs=df.crs,
1196
616
  )
1197
- coords_up_here = to_gdf(polygons(coords_up_here), 25833)
1198
617
 
1199
- explore(
1200
- coords=to_gdf(shapely.points(coords), 25833).assign(
1201
- idx=indices, wkt=lambda x: [g.wkt for g in x.geometry]
1202
- ),
1203
- anchors=to_gdf(shapely.points(anchors), 25833).assign(
1204
- idx=anchor_indices, wkt=lambda x: [g.wkt for g in x.geometry]
1205
- ), # , straight_distances=straight_distances, distances_to_lines=distances_to_lines),
1206
- coords_up_here000=coords_up_here000,
1207
- coords_up_here=coords_up_here,
1208
- geoms=to_gdf(polygons(geoms), 25833),
1209
- msk=to_gdf(shapely.points(mask_coords), 25833).assign(
1210
- was_midpoint_mask=was_midpoint_mask
1211
- ),
1212
- # center=_DEBUG_CONFIG["center"],
1213
- )
618
+ buffered = buff(extended_lines, tolerance, single_sided=True)
1214
619
 
1215
- coords, indices = _snap_to_anchors(
1216
- coords,
1217
- indices,
1218
- anchors,
1219
- anchor_indices,
1220
- mask_coords,
1221
- mask_indices,
1222
- was_midpoint,
1223
- was_midpoint_anchors,
1224
- tolerance + PRECISION * 100,
1225
- )
1226
- indices = np.array(indices)
1227
- coords = np.array(coords)
1228
- indices = indices[coords[:, 0] != np.inf]
1229
- coords = coords[coords[:, 0] != np.inf]
1230
-
1231
- # coords_up_here111 = (
1232
- # pd.Series(_coords_to_rings(coords, indices, geoms))
1233
- # .loc[lambda x: x.notna()]
1234
- # .values
1235
- # )
1236
- # coords_up_here111 = to_gdf(polygons(coords_up_here111), 25833)
1237
-
1238
- # if 0:
1239
- # # coords = get_coordinates(points.geometry.values)
1240
- # # indices = points["_geom_idx"].values
1241
-
1242
- # is_anchor = np.full(len(coords), False)
1243
- # coords, indices, is_anchor = _remove_duplicate_points(
1244
- # coords, indices, is_anchor
1245
- # )
1246
- # coords, indices = _add_last_points_to_end(coords, indices)
1247
- # coords = np.array(coords)
1248
- # indices = np.array(indices)
1249
- # is_anchor = np.full(len(coords), False)
1250
- # coords, indices, is_anchor = _remove_duplicate_points(
1251
- # coords, indices, is_anchor
1252
- # )
1253
- # coords = np.array(coords)
1254
- # indices = np.array(indices)
1255
-
1256
- # display(pd.DataFrame(coords, index=indices, columns=[*"xy"]))
1257
-
1258
- # if 0:
1259
- # mask_coords, mask_indices, , dist_to_closest_geom = (
1260
- # _add_midpoints_to_segments_numba(
1261
- # mask_coords,
1262
- # mask_indices,
1263
- # # coords,
1264
- # get_coordinates(
1265
- # sfilter(
1266
- # GeoSeries(shapely.points(coords)).drop_duplicates(),
1267
- # original_mask_buffered,
1268
- # )
1269
- # ),
1270
- # tolerance * 1.1,
1271
- # )
1272
- # )
1273
-
1274
- # mask_coords = np.array(mask_coords)
1275
- # mask_indices = np.array(mask_indices)
1276
-
1277
- # anchors, anchor_indices, is_anchor = _build_anchors(
1278
- # coords,
1279
- # indices,
1280
- # mask_coords,
1281
- # mask_indices,
1282
- # # is_anchor,
1283
- # tolerance + PRECISION, # * 100
1284
- # )
1285
- # anchors = np.array(anchors)
1286
- # anchor_indices = np.array(anchor_indices)
1287
-
1288
- # coords, indices, was_midpoint, _ = _add_midpoints_to_segments_numba(
1289
- # coords,
1290
- # indices,
1291
- # anchors,
1292
- # tolerance * 1.1, # + PRECISION * 100,
1293
- # # GeoDataFrame({"geometry": shapely.points(coords), "_geom_idx": indices}),
1294
- # # GeoDataFrame({"geometry": shapely.points(anchors)}),
1295
- # # tolerance, # + PRECISION * 100,
1296
- # # None,
1297
- # )
1298
- # print(len(coords), len(anchors), len(was_midpoint))
1299
-
1300
- # indices = np.array(indices)
1301
- # coords = np.array(coords)
1302
-
1303
- # was_midpoint = np.array(was_midpoint)
1304
-
1305
- # coords, is_snapped_to = _snap_to_anchors(
1306
- # coords,
1307
- # indices,
1308
- # anchors,
1309
- # anchor_indices,
1310
- # mask_coords,
1311
- # mask_indices,
1312
- # was_midpoint,
1313
- # was_midpoint_anchors,
1314
- # tolerance + PRECISION * 20,
1315
- # )
1316
- # indices = np.array(indices)
1317
- # coords = np.array(coords)
1318
- # indices = indices[coords[:, 0] != np.inf]
1319
- # coords = coords[coords[:, 0] != np.inf]
1320
-
1321
- # coords = np.array(coords)
1322
-
1323
- # indices = np.array(indices)
1324
-
1325
- coords_down_here = (
1326
- pd.Series(_coords_to_rings(coords, indices, geoms))
1327
- .loc[lambda x: x.notna()]
1328
- .values
1329
- )
1330
- lines_down_here = to_gdf(shapely.buffer(coords_down_here, 0.1), 25833)
1331
- coords_down_here = to_gdf(polygons(coords_down_here), 25833)
620
+ return clean_overlay(df, buffered, how="identity", grid_size=grid_size)
1332
621
 
1333
- try:
1334
- explore(
1335
- coords=to_gdf(shapely.points(coords), 25833).assign(
1336
- idx=indices, wkt=lambda x: [g.wkt for g in x.geometry]
1337
- ),
1338
- anchors=to_gdf(shapely.points(anchors), 25833).assign(
1339
- idx=anchor_indices, wkt=lambda x: [g.wkt for g in x.geometry]
1340
- ), # , straight_distances=straight_distances, distances_to_lines=distances_to_lines),
1341
- coords_up_here000=coords_up_here000,
1342
- coords_up_here=coords_up_here,
1343
- coords_down_here=coords_down_here,
1344
- lines_down_here=lines_down_here,
1345
- geoms=to_gdf(polygons(geoms), 25833),
1346
- msk=to_gdf(shapely.points(mask_coords), 25833).assign(
1347
- was_midpoint_mask=was_midpoint_mask
1348
- ),
1349
- )
1350
622
 
1351
- explore(
1352
- coords=to_gdf(shapely.points(coords), 25833).assign(
1353
- idx=indices, wkt=lambda x: [g.wkt for g in x.geometry]
1354
- ),
1355
- anchors=to_gdf(shapely.points(anchors), 25833).assign(
1356
- idx=anchor_indices, wkt=lambda x: [g.wkt for g in x.geometry]
1357
- ), # , straight_distances=straight_distances, distances_to_lines=distances_to_lines),
1358
- coords_up_here000=coords_up_here000,
1359
- coords_up_here=coords_up_here,
1360
- coords_down_here=coords_down_here,
1361
- lines_down_here=lines_down_here,
1362
- geoms=to_gdf(polygons(geoms), 25833),
1363
- msk=to_gdf(shapely.points(mask_coords), 25833).assign(
1364
- was_midpoint_mask=was_midpoint_mask
1365
- ),
1366
- center=(5.37707159, 59.01065276, 1),
1367
- )
1368
- explore(
1369
- coords=to_gdf(shapely.points(coords), 25833).assign(
1370
- idx=indices, wkt=lambda x: [g.wkt for g in x.geometry]
1371
- ),
1372
- anchors=to_gdf(shapely.points(anchors), 25833).assign(
1373
- idx=anchor_indices, wkt=lambda x: [g.wkt for g in x.geometry]
1374
- ), # , straight_distances=straight_distances, distances_to_lines=distances_to_lines),
1375
- coords_up_here000=coords_up_here000,
1376
- coords_up_here=coords_up_here,
1377
- coords_down_here=coords_down_here,
1378
- lines_down_here=lines_down_here,
1379
- geoms=to_gdf(polygons(geoms), 25833),
1380
- msk=to_gdf(shapely.points(mask_coords), 25833).assign(
1381
- was_midpoint_mask=was_midpoint_mask
1382
- ),
1383
- center=(5.37419946, 59.01138812, 15),
1384
- )
623
+ def extend_lines(arr1, arr2, distance) -> NDArray[LineString]:
624
+ if len(arr1) != len(arr2):
625
+ raise ValueError
626
+ if not len(arr1):
627
+ return arr1
1385
628
 
1386
- explore(
1387
- coords=to_gdf(shapely.points(coords), 25833).assign(
1388
- idx=indices, wkt=lambda x: [g.wkt for g in x.geometry]
1389
- ),
1390
- anchors=to_gdf(shapely.points(anchors), 25833).assign(
1391
- idx=anchor_indices, wkt=lambda x: [g.wkt for g in x.geometry]
1392
- ), # , straight_distances=straight_distances, distances_to_lines=distances_to_lines),
1393
- coords_up_here000=coords_up_here000,
1394
- coords_up_here=coords_up_here,
1395
- lines_down_here=lines_down_here,
1396
- coords_down_here=coords_down_here,
1397
- geoms=to_gdf(polygons(geoms), 25833),
1398
- msk=to_gdf(shapely.points(mask_coords), 25833).assign(
1399
- was_midpoint_mask=was_midpoint_mask
1400
- ),
1401
- center=(5.38389153, 59.00548223, 1),
1402
- )
1403
- explore(
1404
- coords=to_gdf(shapely.points(coords), 25833).assign(
1405
- idx=indices, wkt=lambda x: [g.wkt for g in x.geometry]
1406
- ),
1407
- anchors=to_gdf(shapely.points(anchors), 25833).assign(
1408
- idx=anchor_indices, wkt=lambda x: [g.wkt for g in x.geometry]
1409
- ), # , straight_distances=straight_distances, distances_to_lines=distances_to_lines),
1410
- coords_up_here000=coords_up_here000,
1411
- coords_up_here=coords_up_here,
1412
- coords_down_here=coords_down_here,
1413
- lines_down_here=lines_down_here,
1414
- geoms=to_gdf(polygons(geoms), 25833),
1415
- msk=to_gdf(shapely.points(mask_coords), 25833).assign(
1416
- was_midpoint_mask=was_midpoint_mask
1417
- ),
1418
- center=_DEBUG_CONFIG["center"],
1419
- )
629
+ arr1, arr2 = arr2, arr1 # TODO fix
1420
630
 
1421
- except GEOSException as e:
1422
- print(e)
631
+ coords1 = coordinate_array(arr1)
632
+ coords2 = coordinate_array(arr2)
1423
633
 
1424
- return _coords_to_rings(coords, indices, geoms)
634
+ dx = coords2[:, 0] - coords1[:, 0]
635
+ dy = coords2[:, 1] - coords1[:, 1]
636
+ len_xy = np.sqrt((dx**2.0) + (dy**2.0))
637
+ x = coords1[:, 0] + (coords1[:, 0] - coords2[:, 0]) / len_xy * distance
638
+ y = coords1[:, 1] + (coords1[:, 1] - coords2[:, 1]) / len_xy * distance
1425
639
 
640
+ new_points = np.array([None for _ in range(len(arr1))])
641
+ new_points[~np.isnan(x)] = shapely.points(x[~np.isnan(x)], y[~np.isnan(x)])
1426
642
 
1427
- def _coords_to_rings(
1428
- coords: NDArray[np.float64],
1429
- indices: NDArray[np.int32],
1430
- original_geoms: NDArray[LinearRing],
1431
- ) -> NDArray[LinearRing]:
1432
- df = pd.DataFrame({"x": coords[:, 0], "y": coords[:, 1]}, index=indices).loc[
1433
- lambda x: x.groupby(level=0).size() > 2
1434
- ]
1435
- to_int_idx = {idx: i for i, idx in enumerate(df.index.unique())}
1436
- rings = pd.Series(
1437
- linearrings(df.values, indices=df.index.map(to_int_idx)),
1438
- index=df.index.unique(),
643
+ new_points[~np.isnan(x)] = make_lines_between_points(
644
+ arr2[~np.isnan(x)], new_points[~np.isnan(x)]
1439
645
  )
646
+ return new_points
1440
647
 
1441
- missing = pd.Series(
1442
- index=pd.Index(range(len(original_geoms))).difference(rings.index)
1443
- )
1444
648
 
1445
- return pd.concat([rings, missing]).sort_index().values
649
+ def make_lines_between_points(
650
+ arr1: NDArray[Point], arr2: NDArray[Point]
651
+ ) -> NDArray[LineString]:
652
+ if arr1.shape != arr2.shape:
653
+ raise ValueError(
654
+ f"Arrays must have equal shape. Got {arr1.shape} and {arr2.shape}"
655
+ )
656
+ coords: pd.DataFrame = pd.concat(
657
+ [
658
+ pd.DataFrame(get_coordinates(arr1), columns=["x", "y"]),
659
+ pd.DataFrame(get_coordinates(arr2), columns=["x", "y"]),
660
+ ]
661
+ ).sort_index()
662
+
663
+ return linestrings(coords.values, indices=coords.index)
664
+
665
+
666
+ def get_line_segments(lines) -> GeoDataFrame:
667
+ assert lines.index.is_unique
668
+ if isinstance(lines, GeoDataFrame):
669
+ multipoints = lines.assign(
670
+ **{
671
+ lines._geometry_column_name: force_2d(
672
+ extract_unique_points(lines.geometry.values)
673
+ )
674
+ }
675
+ )
676
+ return multipoints_to_line_segments(multipoints.geometry)
1446
677
 
678
+ multipoints = GeoSeries(extract_unique_points(lines.values), index=lines.index)
1447
679
 
1448
- @numba.njit
1449
- def _add_midpoints_to_segments_numba(
1450
- geoms: NDArray[np.float64],
1451
- indices: NDArray[np.int32],
1452
- anchors: NDArray[np.float64],
1453
- tolerance: int | float,
1454
- ):
1455
- n_minus_1 = len(geoms) - 1
1456
- out_coords = []
1457
- out_indices = []
1458
- was_midpoint = []
1459
- out_distances = []
1460
- for i in range(len(geoms)):
1461
- index = indices[i]
1462
-
1463
- is_last = i == n_minus_1 or index != indices[i + 1]
1464
- if is_last:
1465
- continue
1466
-
1467
- geom0 = geoms[i]
1468
- geom1 = geoms[i + 1]
1469
-
1470
- closest_points = np.full((len(anchors) + 2, 2), np.inf)
1471
- these_out_distances = np.full(len(anchors) + 2, np.inf)
1472
- closest_points[-1] = geom1
1473
- closest_points[-2] = geom0
1474
- these_out_distances[-1] = 0
1475
- these_out_distances[-2] = 0
1476
-
1477
- segment_vector = geom1 - geom0
1478
- segment_length_squared = np.dot(segment_vector, segment_vector)
1479
- for j in range(len(anchors)):
1480
- anchor = anchors[j]
1481
-
1482
- if segment_length_squared == 0:
1483
- closest_point = geom0
1484
- else:
1485
- point_vector = anchor - geom0
1486
- factor = np.dot(point_vector, segment_vector) / segment_length_squared
1487
- factor = max(0, min(1, factor))
1488
- if factor < 1e-6:
1489
- closest_point = geom0
1490
- elif factor > 1 - 1e-6:
1491
- closest_point = geom1
1492
- else:
1493
- closest_point = geom0 + factor * segment_vector
1494
-
1495
- dist = np.linalg.norm(anchor - closest_point)
1496
- if dist <= tolerance and dist > PRECISION:
1497
- closest_points[j] = closest_point
1498
- these_out_distances[j] = dist
1499
-
1500
- # if (
1501
- # closest_point[0] == 905049.3317999999
1502
- # ): # and int(closest_point[1]) == 7877676:
1503
- # print()
1504
- # for xxx in closest_point:
1505
- # print(xxx)
1506
- # for xxx in geom0:
1507
- # print(xxx)
1508
- # for xxx in geom1:
1509
- # print(xxx)
1510
- # for xxx, yyy in locals().items():
1511
- # print(xxx, yyy)
1512
- # ssss
1513
-
1514
- not_inf = closest_points[:, 0] != np.inf
1515
- arr = closest_points[not_inf]
1516
- these_out_distances = these_out_distances[not_inf]
1517
-
1518
- # sort by first and second column
1519
- # could have used np.lexsort, but it's not numba compatible
1520
- arr = arr[np.argsort(arr[:, 0])]
1521
- any_unsorted = True
1522
- while any_unsorted:
1523
- any_unsorted = False
1524
- for i in range(len(arr) - 1):
1525
- if arr[i, 0] < arr[i + 1, 0]:
1526
- continue
1527
- if arr[i, 1] > arr[i + 1, 1]:
1528
- copied = arr[i].copy()
1529
- arr[i] = arr[i + 1]
1530
- arr[i + 1] = copied
1531
-
1532
- copied = these_out_distances[i]
1533
- these_out_distances[i] = these_out_distances[i + 1]
1534
- these_out_distances[i + 1] = copied
1535
-
1536
- any_unsorted = True
1537
-
1538
- with_midpoints = []
1539
- these_out_distances2 = []
1540
- first_is_added = False
1541
- last_is_added = False
1542
- is_reverse = False
1543
- for y in range(len(arr)):
1544
- point = arr[y]
1545
- if (
1546
- not first_is_added
1547
- and np.sqrt((geom0[0] - point[0]) ** 2 + (geom0[1] - point[1]) ** 2)
1548
- == 0
1549
- ):
1550
- first_is_added = True
1551
- with_midpoints.append(point)
1552
- these_out_distances2.append(these_out_distances[y])
1553
- if last_is_added:
1554
- is_reverse = True
1555
- break
1556
- else:
1557
- continue
1558
- elif (
1559
- not last_is_added
1560
- and np.sqrt((geom1[0] - point[0]) ** 2 + (geom1[1] - point[1]) ** 2)
1561
- == 0
1562
- ):
1563
- last_is_added = True
1564
- with_midpoints.append(point)
1565
- these_out_distances2.append(these_out_distances[y])
1566
- if not first_is_added:
1567
- is_reverse = True
1568
- continue
1569
- else:
1570
- with_midpoints.append(point)
1571
- break
1572
- if first_is_added or last_is_added:
1573
- with_midpoints.append(point)
1574
- these_out_distances2.append(these_out_distances[y])
1575
-
1576
- # these_out_distances2.append(these_out_distances[y])
1577
- # these_anchors2.append(these_anchors[y])
1578
-
1579
- # with_midpoints = np.array(with_midpoints)
1580
-
1581
- if is_reverse:
1582
- with_midpoints = with_midpoints[::-1]
1583
- these_out_distances2 = these_out_distances2[::-1]
1584
- # these_anchors2 = these_anchors2[::-1]
1585
-
1586
- # print(index, is_reverse, arr)
1587
- # print(with_midpoints)
1588
- # print(to_gdf(LineString([geom0, geom1]), 25833))
1589
- # print(to_gdf(shapely.points(closest_points)))
1590
- # explore(
1591
- # to_gdf(shapely.points(with_midpoints)).assign(
1592
- # idx=lambda x: range(len(x))
1593
- # ),
1594
- # "idx",
1595
- # )
1596
- # explore(
1597
- # l=to_gdf(LineString([geom0, geom1]), 25833),
1598
- # # anchors=to_gdf(shapely.points(anchors)),
1599
- # # anchors_in_dist=to_gdf(shapely.points(these_anchors)),
1600
- # # closest_points=to_gdf(shapely.points(closest_points)),
1601
- # with_midpoints=to_gdf(shapely.points(with_midpoints)),
1602
- # anchors=to_gdf(shapely.points(anchors)),
1603
- # arr=to_gdf(shapely.points(arr)),
1604
- # # center=(-0.07034028, 1.80337784, 0.4),
1605
- # )
1606
-
1607
- with_midpoints_no_dups = []
1608
- these_out_distances_no_dups = []
1609
-
1610
- for y2 in range(len(with_midpoints)):
1611
- point = with_midpoints[y2]
1612
- should_be_added = True
1613
- for z in range(len(with_midpoints_no_dups)):
1614
- out_point = with_midpoints_no_dups[z]
1615
- if (
1616
- np.sqrt(
1617
- (point[0] - out_point[0]) ** 2 + (out_point[1] - point[1]) ** 2
1618
- )
1619
- == 0
1620
- ):
1621
- should_be_added = False
1622
- break
1623
- if should_be_added:
1624
- with_midpoints_no_dups.append(point)
1625
- these_out_distances_no_dups.append(these_out_distances2[y2])
1626
-
1627
- n_minus_1_midpoints = len(with_midpoints_no_dups) - 1
1628
- for y3 in range(len(with_midpoints_no_dups)):
1629
- point = with_midpoints_no_dups[y3]
1630
- should_be_added = True
1631
-
1632
- for z2 in np.arange(len(out_coords))[::-1]:
1633
- if out_indices[z2] != index:
1634
- continue
1635
- out_point = out_coords[z2]
1636
-
1637
- if (
1638
- np.sqrt(
1639
- (point[0] - out_point[0]) ** 2 + (out_point[1] - point[1]) ** 2
1640
- )
1641
- == 0
1642
- ):
1643
- should_be_added = False
1644
- break
1645
-
1646
- if not should_be_added:
1647
- continue
1648
-
1649
- out_coords.append(point)
1650
- out_indices.append(index)
1651
- out_distances.append(these_out_distances_no_dups[y3])
1652
- if y3 == 0 or y3 == n_minus_1_midpoints:
1653
- was_midpoint.append(False)
1654
- else:
1655
- was_midpoint.append(True)
680
+ return multipoints_to_line_segments(multipoints)
1656
681
 
1657
- return (
1658
- out_coords,
1659
- out_indices,
1660
- was_midpoint,
1661
- out_distances,
1662
- )
1663
682
 
683
+ def multipoints_to_line_segments(multipoints: GeoSeries) -> GeoDataFrame:
684
+ if not len(multipoints):
685
+ return GeoDataFrame({"geometry": multipoints}, index=multipoints.index)
1664
686
 
1665
- def _separate_single_neighbored_from_multi_neighoured_geometries(
1666
- gdf: GeoDataFrame, neighbors: GeoDataFrame
1667
- ) -> tuple[GeoDataFrame, GeoDataFrame]:
1668
- """Split GeoDataFrame in two: those with 0 or 1 neighbors and those with 2 or more.
687
+ try:
688
+ crs = multipoints.crs
689
+ except AttributeError:
690
+ crs = None
1669
691
 
1670
- Because single-neighbored polygons does not need splitting.
1671
- """
1672
- tree = STRtree(neighbors.geometry.values)
1673
- left, right = tree.query(gdf.geometry.values, predicate="intersects")
1674
- pairs = pd.Series(right, index=left)
1675
- has_more_than_one_neighbor = (
1676
- pairs.groupby(level=0).size().loc[lambda x: x > 1].index
1677
- )
692
+ try:
693
+ point_df = multipoints.explode(index_parts=False)
694
+ if isinstance(point_df, GeoSeries):
695
+ point_df = point_df.to_frame("geometry")
696
+ except AttributeError:
697
+ points, indices = get_parts(multipoints, return_index=True)
698
+ if isinstance(multipoints.index, pd.MultiIndex):
699
+ indices = pd.MultiIndex.from_arrays(indices, names=multipoints.index.names)
1678
700
 
1679
- more_than_one_neighbor = gdf.iloc[has_more_than_one_neighbor]
1680
- one_or_zero_neighbors = gdf.iloc[
1681
- pd.Index(range(len(gdf))).difference(has_more_than_one_neighbor)
1682
- ]
701
+ point_df = pd.DataFrame({"geometry": GeometryArray(points)}, index=indices)
1683
702
 
1684
- return one_or_zero_neighbors, more_than_one_neighbor
703
+ point_df["next"] = point_df.groupby(level=0)["geometry"].shift(-1)
1685
704
 
705
+ first_points = point_df.loc[lambda x: ~x.index.duplicated(), "geometry"]
706
+ is_last_point = point_df["next"].isna()
1686
707
 
1687
- def split_and_eliminate_by_longest(
1688
- gdf: GeoDataFrame | tuple[GeoDataFrame],
1689
- to_eliminate: GeoDataFrame,
1690
- tolerance: float | int,
1691
- ignore_index: bool = False,
1692
- **kwargs,
1693
- ) -> tuple[GeoDataFrame]:
1694
- if isinstance(gdf, (list, tuple)):
1695
- # concat, then break up the dataframes in the end
1696
- was_multiple_gdfs = True
1697
- original_cols = [df.columns for df in gdf]
1698
- gdf = pd.concat(df.assign(**{"_df_idx": i}) for i, df in enumerate(gdf))
1699
- else:
1700
- was_multiple_gdfs = False
1701
-
1702
- if 0:
1703
- to_eliminate.geometry = to_eliminate.buffer(
1704
- -PRECISION,
1705
- resolution=1,
1706
- join_style=2,
1707
- ).buffer(
1708
- PRECISION,
1709
- resolution=1,
1710
- join_style=2,
1711
- )
1712
- to_eliminate = to_eliminate.loc[lambda x: ~x.is_empty]
708
+ point_df.loc[is_last_point, "next"] = first_points
709
+ assert point_df["next"].notna().all()
1713
710
 
1714
- # now to split polygons to be eliminated to avoid weird shapes
1715
- # split only the polygons with multiple neighbors
1716
- single_neighbored, multi_neighbored = (
1717
- _separate_single_neighbored_from_multi_neighoured_geometries(to_eliminate, gdf)
1718
- )
1719
- multi_neighbored = split_by_neighbors(multi_neighbored, gdf, tolerance=tolerance)
1720
- to_eliminate = pd.concat([multi_neighbored, single_neighbored])
1721
- gdf, isolated = eliminate_by_longest(
1722
- gdf, to_eliminate, ignore_index=ignore_index, **kwargs
1723
- )
711
+ point_df["geometry"] = [
712
+ LineString([x1, x2])
713
+ for x1, x2 in zip(point_df["geometry"], point_df["next"], strict=False)
714
+ ]
715
+ return GeoDataFrame(point_df.drop(columns=["next"]), geometry="geometry", crs=crs)
1724
716
 
1725
- if not was_multiple_gdfs:
1726
- return gdf, isolated
1727
717
 
1728
- gdfs = ()
1729
- for i, cols in enumerate(original_cols):
1730
- df = gdf.loc[gdf["_df_idx"] == i, cols]
1731
- gdfs += (df,)
1732
- gdfs += (isolated,)
718
+ def explore_geosexception(e: GEOSException, *gdfs, logger=None) -> None:
719
+ from ..maps.maps import Explore
720
+ from ..maps.maps import explore
721
+ from .conversion import to_gdf
1733
722
 
1734
- return gdfs
723
+ pattern = r"(\d+\.\d+)\s+(\d+\.\d+)"
724
+
725
+ matches = re.findall(pattern, str(e))
726
+ coords_in_error_message = [(float(match[0]), float(match[1])) for match in matches]
727
+ exception_point = to_gdf(coords_in_error_message, crs=gdfs[0].crs)
728
+ if len(exception_point):
729
+ exception_point["wkt"] = exception_point.to_wkt()
730
+ if logger:
731
+ logger.error(
732
+ e, Explore(exception_point, *gdfs, mask=exception_point.buffer(100))
733
+ )
734
+ else:
735
+ explore(exception_point, *gdfs, mask=exception_point.buffer(100))
736
+ else:
737
+ if logger:
738
+ logger.error(e, Explore(*gdfs))
739
+ else:
740
+ explore(*gdfs)