ssb-sgis 0.3.9__py3-none-any.whl → 0.3.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,26 +1,45 @@
1
+ import re
1
2
  import warnings
2
3
 
4
+ import numpy as np
3
5
  import pandas as pd
6
+ import shapely
4
7
  from geopandas import GeoDataFrame, GeoSeries
8
+ from geopandas.array import GeometryArray
5
9
  from numpy.typing import NDArray
6
10
  from shapely import (
7
11
  extract_unique_points,
12
+ force_2d,
8
13
  get_coordinates,
9
14
  get_exterior_ring,
15
+ get_parts,
10
16
  linearrings,
17
+ linestrings,
11
18
  make_valid,
12
19
  polygons,
13
20
  )
14
- from shapely.geometry import LinearRing
21
+ from shapely.errors import GEOSException
22
+ from shapely.geometry import LinearRing, LineString, Point
15
23
 
16
24
  from ..networkanalysis.closing_network_holes import get_angle
17
25
  from .buffer_dissolve_explode import buff, dissexp
18
26
  from .conversion import coordinate_array, to_geoseries
19
27
  from .duplicates import get_intersections, update_geometries
20
- from .general import sort_large_first, sort_long_first
21
- from .geometry_types import get_geom_type
28
+ from .general import (
29
+ clean_geoms,
30
+ sort_large_first,
31
+ sort_long_first,
32
+ sort_small_first,
33
+ to_lines,
34
+ )
35
+ from .geometry_types import get_geom_type, make_all_singlepart, to_single_geom_type
22
36
  from .overlay import clean_overlay
23
- from .polygon_operations import close_all_holes, close_thin_holes, get_gaps
37
+ from .polygon_operations import (
38
+ close_all_holes,
39
+ eliminate_by_longest,
40
+ get_cluster_mapper,
41
+ get_gaps,
42
+ )
24
43
  from .polygons_as_rings import PolygonsAsRings
25
44
  from .sfilter import sfilter, sfilter_inverse
26
45
 
@@ -33,120 +52,17 @@ PRECISION = 1e-4
33
52
  BUFFER_RES = 50
34
53
 
35
54
 
36
- def get_angle_between_indexed_points(point_df: GeoDataFrame):
37
- """ "Get angle difference between the two lines"""
38
-
39
- point_df["next"] = point_df.groupby(level=0)["geometry"].shift(-1)
40
-
41
- notna = point_df["next"].notna()
42
-
43
- this = coordinate_array(point_df.loc[notna, "geometry"].values)
44
- next_ = coordinate_array(point_df.loc[notna, "next"].values)
45
-
46
- point_df.loc[notna, "angle"] = get_angle(this, next_)
47
- point_df["prev_angle"] = point_df.groupby(level=0)["angle"].shift(1)
48
-
49
- point_df["angle_diff"] = point_df["angle"] - point_df["prev_angle"]
50
-
51
- return point_df
52
-
53
-
54
- def remove_spikes(gdf: GeoDataFrame, tolerance: int | float) -> GeoDataFrame:
55
- """Remove thin spikes in polygons.
56
-
57
- Note that this function might be slow. Should only be used if nessecary.
58
-
59
- Args:
60
- gdf: GeoDataFrame of polygons
61
- tolerance: distance (usually meters) used as the minimum thickness
62
- for polygons to be eliminated. Any spike thinner than the tolerance
63
- will be removed.
64
-
65
- Returns:
66
- A GeoDataFrame of polygons without spikes thinner.
67
- """
68
-
69
- def _remove_spikes(geoms: NDArray[LinearRing]) -> NDArray[LinearRing]:
70
- if not len(geoms):
71
- return geoms
72
- geoms = to_geoseries(geoms).reset_index(drop=True)
73
-
74
- points = (
75
- extract_unique_points(geoms).explode(index_parts=False).to_frame("geometry")
76
- )
77
-
78
- points = get_angle_between_indexed_points(points)
79
-
80
- indices_with_spikes = points[
81
- lambda x: (x["angle_diff"] >= 180) & (x["angle_diff"] < 180.01)
82
- ].index.unique()
83
-
84
- rings_with_spikes = geoms[geoms.index.isin(indices_with_spikes)]
85
- rings_without_spikes = geoms[~geoms.index.isin(indices_with_spikes)]
86
-
87
- def to_buffered_rings_without_spikes(x):
88
- polys = GeoSeries(make_valid(polygons(get_exterior_ring(x))))
89
-
90
- return (
91
- polys.buffer(-tolerance, resolution=BUFFER_RES)
92
- .explode(index_parts=False)
93
- .pipe(close_all_holes)
94
- .pipe(get_exterior_ring)
95
- .buffer(tolerance * 10)
96
- )
97
-
98
- buffered = to_buffered_rings_without_spikes(
99
- rings_with_spikes.buffer(tolerance / 2, resolution=BUFFER_RES)
100
- )
101
-
102
- points_without_spikes = (
103
- extract_unique_points(rings_with_spikes)
104
- .explode(index_parts=False)
105
- .loc[lambda x: x.index.isin(sfilter(x, buffered).index)]
106
- )
107
-
108
- # linearrings require at least 4 coordinate pairs, or three unique
109
- points_without_spikes = points_without_spikes.loc[
110
- lambda x: x.groupby(level=0).size() >= 3
111
- ]
112
-
113
- # need an index from 0 to n-1 in 'linearrings'
114
- to_int_index = {
115
- ring_idx: i
116
- for i, ring_idx in enumerate(sorted(set(points_without_spikes.index)))
117
- }
118
- int_indices = points_without_spikes.index.map(to_int_index)
119
-
120
- as_lines = pd.Series(
121
- linearrings(
122
- get_coordinates(points_without_spikes.geometry.values),
123
- indices=int_indices,
124
- ),
125
- index=points_without_spikes.index.unique(),
126
- )
127
- as_lines = pd.concat([as_lines, rings_without_spikes])
128
-
129
- # the missing polygons are thin and/or spiky. Let's remove them
130
- missing = geoms.loc[~geoms.index.isin(as_lines.index)]
131
-
132
- missing = pd.Series(
133
- [None] * len(missing),
134
- index=missing.index.values,
135
- )
136
-
137
- return pd.concat([as_lines, missing]).sort_index()
138
-
139
- gdf.geometry = (
140
- PolygonsAsRings(gdf.geometry).apply_numpy_func(_remove_spikes).to_numpy()
141
- )
142
- return gdf
143
-
144
-
145
55
  def coverage_clean(
146
56
  gdf: GeoDataFrame,
147
57
  tolerance: int | float,
148
58
  duplicate_action: str = "fix",
149
- remove_isolated: bool = False,
59
+ # spike_action: str = "ignore",
60
+ grid_sizes: tuple[None | int] = (
61
+ None,
62
+ # 1e-6,
63
+ # 1e-5,
64
+ # 1e-4,
65
+ ),
150
66
  ) -> GeoDataFrame:
151
67
  """Fix thin gaps, holes, slivers and double surfaces.
152
68
 
@@ -174,6 +90,7 @@ def coverage_clean(
174
90
  and then dissolved into the neighbor polygon with the longest shared border.
175
91
  If "error", an Exception is raised if there are any double surfaces thicker
176
92
  than the tolerance. If "ignore", double surfaces are kept as is.
93
+ spike_action: Either "fix", "ignore" or "try".
177
94
 
178
95
  Returns:
179
96
  A GeoDataFrame with cleaned polygons.
@@ -185,11 +102,38 @@ def coverage_clean(
185
102
  if not gdf.index.is_unique:
186
103
  gdf = gdf.reset_index(drop=True)
187
104
 
188
- gdf = close_thin_holes(gdf, tolerance)
105
+ gdf = make_all_singlepart(gdf).loc[
106
+ lambda x: x.geom_type.isin(["Polygon", "MultiPolygon"])
107
+ ]
189
108
 
190
- gaps = get_gaps(gdf, include_interiors=True)
191
- double = get_intersections(gdf)
192
- double["_double_idx"] = range(len(double))
109
+ gdf = clean_geoms(gdf)
110
+
111
+ gdf.geometry = shapely.simplify(gdf.geometry, PRECISION)
112
+
113
+ gdf = (
114
+ clean_geoms(gdf)
115
+ .pipe(make_all_singlepart)
116
+ .loc[lambda x: x.geom_type.isin(["Polygon", "MultiPolygon"])]
117
+ )
118
+
119
+ try:
120
+ gaps = get_gaps(gdf, include_interiors=True)
121
+ except GEOSException:
122
+ for i, grid_size in enumerate(grid_sizes):
123
+ try:
124
+ gaps = get_gaps(gdf, include_interiors=True, grid_size=grid_size)
125
+ break
126
+ except GEOSException as e:
127
+ if i == len(grid_sizes) - 1:
128
+ explore_geosexception(e, gdf)
129
+ raise e
130
+
131
+ if duplicate_action == "ignore":
132
+ double = GeoDataFrame({"geometry": []}, crs=gdf.crs)
133
+ double["_double_idx"] = None
134
+ else:
135
+ double = get_intersections(gdf)
136
+ double["_double_idx"] = range(len(double))
193
137
 
194
138
  gdf, slivers = split_out_slivers(gdf, tolerance)
195
139
 
@@ -200,21 +144,41 @@ def coverage_clean(
200
144
  all_are_thin = double["_double_idx"].isin(thin_gaps_and_double["_double_idx"]).all()
201
145
 
202
146
  if not all_are_thin and duplicate_action == "fix":
203
- gdf, thin_gaps_and_double = _properly_fix_duplicates(
147
+ gdf, thin_gaps_and_double, slivers = _properly_fix_duplicates(
204
148
  gdf, double, slivers, thin_gaps_and_double, tolerance
205
149
  )
206
150
 
207
- # gaps = pd.concat([gaps, more_gaps], ignore_index=True)
208
- # double = pd.concat([double, more_double], ignore_index=True)
209
151
  elif not all_are_thin and duplicate_action == "error":
210
152
  raise ValueError("Large double surfaces.")
211
153
 
212
- to_eliminate = pd.concat([thin_gaps_and_double, slivers], ignore_index=True).loc[
213
- lambda x: ~x.buffer(-PRECISION / 10).is_empty
214
- ]
154
+ to_eliminate = pd.concat([thin_gaps_and_double, slivers], ignore_index=True)
155
+ to_eliminate.geometry = shapely.simplify(to_eliminate.geometry, PRECISION)
156
+
157
+ # eliminate super-thin slivers causing weird geometries
158
+ is_thin = to_eliminate.buffer(-PRECISION).is_empty
159
+ thick, thin = to_eliminate[~is_thin], to_eliminate[is_thin]
160
+ for i, grid_size in enumerate(grid_sizes):
161
+ try:
162
+ to_eliminate = eliminate_by_longest(
163
+ thick,
164
+ thin,
165
+ remove_isolated=False,
166
+ ignore_index=True,
167
+ grid_size=grid_size,
168
+ )
169
+ break
170
+ except GEOSException as e:
171
+ if i == len(grid_sizes) - 1:
172
+ explore_geosexception(e, gdf, thick, thin)
173
+ raise e
174
+
175
+ to_eliminate = to_eliminate.loc[lambda x: ~x.buffer(-PRECISION / 10).is_empty]
176
+
215
177
  to_eliminate["_eliminate_idx"] = range(len(to_eliminate))
216
178
  gdf["_poly_idx"] = range(len(gdf))
217
179
 
180
+ to_eliminate["_cluster"] = get_cluster_mapper(to_eliminate.buffer(PRECISION))
181
+
218
182
  gdf_geoms_idx = gdf[["_poly_idx", "geometry"]]
219
183
 
220
184
  joined = to_eliminate.sjoin(gdf_geoms_idx, how="left")
@@ -227,35 +191,86 @@ def coverage_clean(
227
191
  buff(gdf_geoms_idx, tolerance, resolution=BUFFER_RES),
228
192
  geom_type="polygon",
229
193
  )
230
- .pipe(sort_long_first)
194
+ .pipe(sort_large_first)
231
195
  .drop_duplicates("_eliminate_idx")
232
196
  .set_index("_eliminate_idx")["_poly_idx"]
233
197
  )
234
198
  intersecting["_poly_idx"] = intersecting["_eliminate_idx"].map(poly_idx_mapper)
235
- without_double = update_geometries(intersecting).drop(
236
- columns=["_eliminate_idx", "_double_idx", "index_right"]
237
- )
238
199
 
239
- cleaned = (
240
- dissexp(pd.concat([gdf, without_double]), by="_poly_idx", aggfunc="first")
241
- .reset_index(drop=True)
242
- .loc[lambda x: ~x.buffer(-PRECISION / 10).is_empty]
200
+ for i, grid_size in enumerate(grid_sizes):
201
+ try:
202
+ without_double = update_geometries(
203
+ intersecting, geom_type="polygon", grid_size=grid_size
204
+ ).drop(columns=["_eliminate_idx", "_double_idx", "index_right"])
205
+ break
206
+ except GEOSException as e:
207
+ intersecting.geometry = shapely.simplify(
208
+ intersecting.geometry, PRECISION * (10 * i + 1)
209
+ )
210
+ if i == len(grid_sizes) - 1:
211
+ explore_geosexception(e, gdf, intersecting, isolated)
212
+ raise e
213
+
214
+ not_really_isolated = isolated.drop(
215
+ columns=[
216
+ "_double_idx",
217
+ "index_right",
218
+ ]
219
+ ).merge(without_double, on="_cluster", how="inner")
220
+
221
+ really_isolated = isolated.loc[
222
+ lambda x: ~x["_eliminate_idx"].isin(not_really_isolated["_eliminate_idx"])
223
+ ]
224
+
225
+ really_isolated["_poly_idx"] = (
226
+ really_isolated["_cluster"] + gdf["_poly_idx"].max() + 1
243
227
  )
244
228
 
245
- if not remove_isolated:
246
- cleaned = pd.concat(
247
- [
248
- cleaned,
249
- isolated.drop(
250
- columns=[
251
- "_double_idx",
252
- "_eliminate_idx",
253
- "_poly_idx",
254
- "index_right",
255
- ]
256
- ),
257
- ]
258
- )
229
+ for i, grid_size in enumerate(grid_sizes):
230
+ try:
231
+ cleaned = (
232
+ dissexp(
233
+ pd.concat([gdf, without_double, isolated, really_isolated]).drop(
234
+ columns=[
235
+ "_cluster",
236
+ "_eliminate_idx",
237
+ "index_right",
238
+ "_double_idx",
239
+ ],
240
+ errors="ignore",
241
+ ),
242
+ by="_poly_idx",
243
+ aggfunc="first",
244
+ dropna=True,
245
+ grid_size=grid_size,
246
+ )
247
+ .sort_index()
248
+ .reset_index(drop=True)
249
+ # .loc[lambda x: ~x.buffer(-PRECISION / 10).is_empty]
250
+ )
251
+ break
252
+ except GEOSException as e:
253
+ if i == len(grid_sizes) - 1:
254
+ explore_geosexception(e, gdf, without_double, isolated, really_isolated)
255
+ raise e
256
+
257
+ cleaned.geometry = shapely.make_valid(shapely.simplify(cleaned.geometry, PRECISION))
258
+
259
+ for i, grid_size in enumerate(grid_sizes):
260
+ try:
261
+ cleaned = update_geometries(
262
+ cleaned, geom_type="polygon", grid_size=grid_size
263
+ )
264
+ break
265
+ except GEOSException as e:
266
+ cleaned.geometry = shapely.simplify(
267
+ cleaned.geometry, PRECISION * (10 * i + 1)
268
+ )
269
+ if i == len(grid_sizes) - 1:
270
+ explore_geosexception(
271
+ e, gdf, cleaned, without_double, isolated, really_isolated
272
+ )
273
+ raise e
259
274
 
260
275
  missing_indices: pd.Index = sfilter_inverse(
261
276
  gdf.representative_point(), cleaned
@@ -268,30 +283,228 @@ def coverage_clean(
268
283
  geom_type="polygon",
269
284
  )
270
285
 
271
- return pd.concat([cleaned, missing], ignore_index=True)
286
+ cleaned = pd.concat([cleaned, missing], ignore_index=True)
287
+ cleaned.geometry = shapely.make_valid(shapely.simplify(cleaned.geometry, PRECISION))
272
288
 
289
+ return cleaned
273
290
 
274
- def _properly_fix_duplicates(gdf, double, slivers, thin_gaps_and_double, tolerance):
275
- for _ in range(4):
276
- gdf = _dissolve_thick_double_and_update(gdf, double, thin_gaps_and_double)
277
- gdf, more_slivers = split_out_slivers(gdf, tolerance)
278
- slivers = pd.concat([slivers, more_slivers], ignore_index=True)
279
- gaps = get_gaps(gdf, include_interiors=True)
280
- double = get_intersections(gdf)
281
- double["_double_idx"] = range(len(double))
282
- thin_gaps_and_double = pd.concat([gaps, double]).loc[
283
- lambda x: x.buffer(-tolerance / 2).is_empty
291
+
292
+ def split_spiky_polygons(
293
+ gdf: GeoDataFrame, tolerance: int | float, grid_sizes: tuple[None | int] = (None,)
294
+ ) -> GeoDataFrame:
295
+ if not len(gdf):
296
+ return gdf
297
+
298
+ gdf = to_single_geom_type(make_all_singlepart(gdf), "polygon")
299
+
300
+ if not gdf.index.is_unique:
301
+ gdf = gdf.reset_index(drop=True)
302
+
303
+ polygons_without_spikes = gdf.buffer(tolerance / 2, join_style=2).buffer(
304
+ -tolerance / 2, join_style=2
305
+ )
306
+
307
+ donuts_around_polygons = to_lines(
308
+ polygons_without_spikes.to_frame("geometry")
309
+ ).pipe(buff, 1e-3, copy=False)
310
+
311
+ def remove_spikes(df):
312
+ df = df.to_frame("geometry")
313
+ df["_ring_idx"] = range(len(df))
314
+ df = df.reset_index(drop=True)
315
+
316
+ points = df.copy()
317
+ points.geometry = extract_unique_points(points.geometry)
318
+ points = points.explode(index_parts=False)
319
+ points["_idx"] = range(len(points))
320
+
321
+ not_spikes = points.sjoin(donuts_around_polygons).loc[
322
+ lambda x: x["_ring_idx"] == x["index_right"]
323
+ ]
324
+ can_be_polygons = not_spikes.iloc[
325
+ (not_spikes.groupby("_ring_idx").transform("size") >= 3).values
284
326
  ]
285
- all_are_thin = (
286
- double["_double_idx"].isin(thin_gaps_and_double["_double_idx"]).all()
327
+
328
+ without_spikes = (
329
+ can_be_polygons.sort_values("_idx")
330
+ .groupby("_ring_idx")["geometry"]
331
+ .agg(LinearRing)
287
332
  )
288
- if all_are_thin:
289
- return gdf, thin_gaps_and_double
290
333
 
291
- not_thin = double[
292
- lambda x: ~x["_double_idx"].isin(thin_gaps_and_double["_double_idx"]).all()
334
+ missing = df[~df["_ring_idx"].isin(without_spikes.index)].geometry
335
+
336
+ return pd.concat([without_spikes, missing]).sort_index()
337
+
338
+ without_spikes = GeoDataFrame(
339
+ {
340
+ "geometry": PolygonsAsRings(gdf.geometry)
341
+ .apply_geoseries_func(remove_spikes)
342
+ .to_numpy()
343
+ },
344
+ crs=gdf.crs,
345
+ ).pipe(to_single_geom_type, "polygon")
346
+
347
+ is_thin = without_spikes.buffer(-tolerance / 2).is_empty
348
+ without_spikes = pd.concat(
349
+ [
350
+ split_by_neighbors(
351
+ without_spikes[is_thin], without_spikes, tolerance=tolerance
352
+ ),
353
+ without_spikes[~is_thin],
354
+ ]
355
+ )
356
+
357
+ for _ in range(2):
358
+ for i, grid_size in enumerate(grid_sizes):
359
+ try:
360
+ without_spikes = update_geometries(
361
+ sort_small_first(without_spikes), geom_type="polygon"
362
+ )
363
+ break
364
+ except GEOSException as e:
365
+ if i == len(grid_sizes) - 1:
366
+ raise e
367
+
368
+ for i, grid_size in enumerate(grid_sizes):
369
+ try:
370
+ return clean_overlay(
371
+ gdf, without_spikes, how="identity", grid_size=grid_size
372
+ )
373
+ except GEOSException as e:
374
+ if i == len(grid_sizes) - 1:
375
+ raise e
376
+
377
+
378
+ def remove_spikes(gdf: GeoDataFrame, tolerance: int | float) -> GeoDataFrame:
379
+ """Remove thin spikes in polygons.
380
+
381
+ Note that this function might be slow. Should only be used if nessecary.
382
+
383
+ Args:
384
+ gdf: GeoDataFrame of polygons
385
+ tolerance: distance (usually meters) used as the minimum thickness
386
+ for polygons to be eliminated. Any spike thinner than the tolerance
387
+ will be removed.
388
+
389
+ Returns:
390
+ A GeoDataFrame of polygons without spikes thinner.
391
+ """
392
+
393
+ gdf.geometry = (
394
+ PolygonsAsRings(gdf.geometry)
395
+ .apply_numpy_func(_remove_spikes, args=(tolerance,))
396
+ .to_numpy()
397
+ )
398
+ return gdf
399
+
400
+
401
+ def _remove_spikes(
402
+ geoms: NDArray[LinearRing], tolerance: int | float
403
+ ) -> NDArray[LinearRing]:
404
+ if not len(geoms):
405
+ return geoms
406
+ geoms = to_geoseries(geoms).reset_index(drop=True)
407
+
408
+ points = (
409
+ extract_unique_points(geoms).explode(index_parts=False).to_frame("geometry")
410
+ )
411
+
412
+ points = get_angle_between_indexed_points(points)
413
+
414
+ def to_buffered_rings_without_spikes(x):
415
+ polys = GeoSeries(make_valid(polygons(get_exterior_ring(x))))
416
+
417
+ return (
418
+ polys.buffer(-tolerance, resolution=BUFFER_RES)
419
+ .explode(index_parts=False)
420
+ .pipe(close_all_holes)
421
+ .pipe(get_exterior_ring)
422
+ .buffer(tolerance * 10)
423
+ )
424
+
425
+ buffered = to_buffered_rings_without_spikes(
426
+ geoms.buffer(tolerance / 2, resolution=BUFFER_RES)
427
+ )
428
+
429
+ points_without_spikes = (
430
+ extract_unique_points(geoms)
431
+ .explode(index_parts=False)
432
+ .loc[lambda x: x.index.isin(sfilter(x, buffered).index)]
433
+ )
434
+
435
+ # linearrings require at least 4 coordinate pairs, or three unique
436
+ points_without_spikes = points_without_spikes.loc[
437
+ lambda x: x.groupby(level=0).size() >= 3
293
438
  ]
294
- raise ValueError("Failed to properly fix thick double surfaces", not_thin.geometry)
439
+
440
+ # need an index from 0 to n-1 in 'linearrings'
441
+ to_int_index = {
442
+ ring_idx: i
443
+ for i, ring_idx in enumerate(sorted(set(points_without_spikes.index)))
444
+ }
445
+ int_indices = points_without_spikes.index.map(to_int_index)
446
+
447
+ as_lines = pd.Series(
448
+ linearrings(
449
+ get_coordinates(points_without_spikes.geometry.values),
450
+ indices=int_indices,
451
+ ),
452
+ index=points_without_spikes.index.unique(),
453
+ )
454
+
455
+ # the missing polygons are thin and/or spiky. Let's remove them
456
+ missing = geoms.loc[~geoms.index.isin(as_lines.index)]
457
+
458
+ missing = pd.Series(
459
+ [None] * len(missing),
460
+ index=missing.index.values,
461
+ )
462
+
463
+ return pd.concat([as_lines, missing]).sort_index()
464
+
465
+
466
+ def get_angle_between_indexed_points(point_df: GeoDataFrame):
467
+ """ "Get angle difference between the two lines"""
468
+
469
+ point_df["next"] = point_df.groupby(level=0)["geometry"].shift(-1)
470
+
471
+ notna = point_df["next"].notna()
472
+
473
+ this = coordinate_array(point_df.loc[notna, "geometry"].values)
474
+ next_ = coordinate_array(point_df.loc[notna, "next"].values)
475
+
476
+ point_df.loc[notna, "angle"] = get_angle(this, next_)
477
+ point_df["prev_angle"] = point_df.groupby(level=0)["angle"].shift(1)
478
+
479
+ point_df["angle_diff"] = point_df["angle"] - point_df["prev_angle"]
480
+
481
+ return point_df
482
+
483
+
484
+ def _properly_fix_duplicates(gdf, double, slivers, thin_gaps_and_double, tolerance):
485
+ # gdf = update_geometries(gdf)
486
+ # gdf, more_slivers = split_out_slivers(gdf, tolerance)
487
+ # slivers = pd.concat([slivers, more_slivers], ignore_index=True)
488
+ # gaps = get_gaps(gdf, include_interiors=True)
489
+ # double = get_intersections(gdf).pipe(update_geometries, geom_type="polygon")
490
+ # double["_double_idx"] = range(len(double))
491
+ # thin_gaps_and_double = pd.concat([gaps, double]).loc[
492
+ # lambda x: x.buffer(-tolerance / 2).is_empty
493
+ # ]
494
+ # return gdf, thin_gaps_and_double, slivers
495
+
496
+ gdf = _dissolve_thick_double_and_update(gdf, double, thin_gaps_and_double)
497
+ gdf, more_slivers = split_out_slivers(gdf, tolerance)
498
+ slivers = pd.concat([slivers, more_slivers], ignore_index=True)
499
+ gaps = get_gaps(gdf, include_interiors=True)
500
+ assert "_double_idx" not in gaps
501
+ double = get_intersections(gdf)
502
+ double["_double_idx"] = range(len(double))
503
+ thin_gaps_and_double = pd.concat([gaps, double], ignore_index=True).loc[
504
+ lambda x: x.buffer(-tolerance / 2).is_empty
505
+ ]
506
+
507
+ return gdf, thin_gaps_and_double, slivers
295
508
 
296
509
 
297
510
  def _dissolve_thick_double_and_update(gdf, double, thin_double):
@@ -299,20 +512,18 @@ def _dissolve_thick_double_and_update(gdf, double, thin_double):
299
512
  double.loc[~double["_double_idx"].isin(thin_double["_double_idx"])]
300
513
  .drop(columns="_double_idx")
301
514
  .pipe(sort_large_first)
302
- .pipe(update_geometries)
515
+ .pipe(update_geometries, geom_type="polygon")
303
516
  )
304
517
  return (
305
518
  clean_overlay(gdf, large, how="update")
306
519
  .pipe(sort_large_first)
307
- .pipe(update_geometries)
520
+ .pipe(update_geometries, geom_type="polygon")
308
521
  )
309
522
 
310
523
 
311
- def _cleaning_checks(gdf, tolerance, duplicate_action):
524
+ def _cleaning_checks(gdf, tolerance, duplicate_action): # , spike_action):
312
525
  if not len(gdf) or not tolerance:
313
526
  return gdf
314
- if get_geom_type(gdf) != "polygon":
315
- raise ValueError("Must be polygons.")
316
527
  if tolerance < PRECISION:
317
528
  raise ValueError(
318
529
  f"'tolerance' must be larger than {PRECISION} to avoid "
@@ -329,3 +540,144 @@ def split_out_slivers(
329
540
  slivers = gdf.loc[is_sliver]
330
541
  gdf = gdf.loc[~is_sliver]
331
542
  return gdf, slivers
543
+
544
+
545
+ def split_by_neighbors(df, split_by, tolerance):
546
+ if not len(df):
547
+ return df
548
+
549
+ split_by = split_by.copy()
550
+ split_by.geometry = shapely.simplify(split_by.geometry, tolerance)
551
+
552
+ intersecting_lines = (
553
+ clean_overlay(to_lines(split_by), buff(df, tolerance), how="identity")
554
+ .pipe(get_line_segments)
555
+ .reset_index(drop=True)
556
+ )
557
+
558
+ endpoints = intersecting_lines.boundary.explode(index_parts=False)
559
+
560
+ extended_lines = GeoDataFrame(
561
+ {
562
+ "geometry": extend_lines(
563
+ endpoints.loc[lambda x: ~x.index.duplicated(keep="first")].values,
564
+ endpoints.loc[lambda x: ~x.index.duplicated(keep="last")].values,
565
+ distance=tolerance * 3,
566
+ )
567
+ },
568
+ crs=df.crs,
569
+ )
570
+
571
+ buffered = buff(extended_lines, tolerance, single_sided=True)
572
+
573
+ return clean_overlay(df, buffered, how="identity")
574
+
575
+
576
+ def extend_lines(arr1, arr2, distance):
577
+ if len(arr1) != len(arr2):
578
+ raise ValueError
579
+ if not len(arr1):
580
+ return arr1
581
+
582
+ arr1, arr2 = arr2, arr1 # TODO fix
583
+
584
+ coords1 = coordinate_array(arr1)
585
+ coords2 = coordinate_array(arr2)
586
+
587
+ dx = coords2[:, 0] - coords1[:, 0]
588
+ dy = coords2[:, 1] - coords1[:, 1]
589
+ len_xy = np.sqrt((dx**2.0) + (dy**2.0))
590
+ x = coords1[:, 0] + (coords1[:, 0] - coords2[:, 0]) / len_xy * distance
591
+ y = coords1[:, 1] + (coords1[:, 1] - coords2[:, 1]) / len_xy * distance
592
+
593
+ new_points = np.array([None for _ in range(len(arr1))])
594
+ new_points[~np.isnan(x)] = shapely.points(x[~np.isnan(x)], y[~np.isnan(x)])
595
+
596
+ new_points[~np.isnan(x)] = make_lines_between_points(
597
+ arr2[~np.isnan(x)], new_points[~np.isnan(x)]
598
+ )
599
+ return new_points
600
+
601
+
602
+ def make_lines_between_points(
603
+ arr1: NDArray[Point], arr2: NDArray[Point]
604
+ ) -> NDArray[LineString]:
605
+ if arr1.shape != arr2.shape:
606
+ raise ValueError(
607
+ f"Arrays must have equal shape. Got {arr1.shape} and {arr2.shape}"
608
+ )
609
+ coords: pd.DataFrame = pd.concat(
610
+ [
611
+ pd.DataFrame(get_coordinates(arr1), columns=["x", "y"]),
612
+ pd.DataFrame(get_coordinates(arr2), columns=["x", "y"]),
613
+ ]
614
+ ).sort_index()
615
+
616
+ return linestrings(coords.values, indices=coords.index)
617
+
618
+
619
+ def get_line_segments(lines) -> GeoDataFrame:
620
+ assert lines.index.is_unique
621
+ if isinstance(lines, GeoDataFrame):
622
+ multipoints = lines.assign(
623
+ **{
624
+ lines._geometry_column_name: force_2d(
625
+ extract_unique_points(lines.geometry.values)
626
+ )
627
+ }
628
+ )
629
+ return multipoints_to_line_segments(multipoints.geometry)
630
+
631
+ multipoints = GeoSeries(extract_unique_points(lines.values), index=lines.index)
632
+
633
+ return multipoints_to_line_segments(multipoints)
634
+
635
+
636
+ def multipoints_to_line_segments(multipoints: GeoSeries) -> GeoDataFrame:
637
+ if not len(multipoints):
638
+ return GeoDataFrame({"geometry": multipoints}, index=multipoints.index)
639
+
640
+ try:
641
+ crs = multipoints.crs
642
+ except AttributeError:
643
+ crs = None
644
+
645
+ try:
646
+ point_df = multipoints.explode(index_parts=False)
647
+ if isinstance(point_df, GeoSeries):
648
+ point_df = point_df.to_frame("geometry")
649
+ except AttributeError:
650
+ points, indices = get_parts(multipoints, return_index=True)
651
+ if isinstance(multipoints.index, pd.MultiIndex):
652
+ indices = pd.MultiIndex.from_arrays(indices, names=multipoints.index.names)
653
+
654
+ point_df = pd.DataFrame({"geometry": GeometryArray(points)}, index=indices)
655
+
656
+ point_df["next"] = point_df.groupby(level=0)["geometry"].shift(-1)
657
+
658
+ first_points = point_df.loc[lambda x: ~x.index.duplicated(), "geometry"]
659
+ is_last_point = point_df["next"].isna()
660
+
661
+ point_df.loc[is_last_point, "next"] = first_points
662
+ assert point_df["next"].notna().all()
663
+
664
+ point_df["geometry"] = [
665
+ LineString([x1, x2]) for x1, x2 in zip(point_df["geometry"], point_df["next"])
666
+ ]
667
+ return GeoDataFrame(point_df.drop(columns=["next"]), geometry="geometry", crs=crs)
668
+
669
+
670
+ def explore_geosexception(e: GEOSException, *gdfs):
671
+ from ..maps.maps import explore
672
+ from .conversion import to_gdf
673
+
674
+ pattern = r"(\d+\.\d+)\s+(\d+\.\d+)"
675
+
676
+ matches = re.findall(pattern, str(e))
677
+ coords_in_error_message = [(float(match[0]), float(match[1])) for match in matches]
678
+ exception_point = to_gdf(coords_in_error_message, crs=gdfs[0].crs)
679
+ if len(exception_point):
680
+ exception_point["wkt"] = exception_point.to_wkt()
681
+ explore(exception_point, *gdfs, mask=exception_point.buffer(100))
682
+ else:
683
+ explore(*gdfs)