ssb-sgis 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. sgis/__init__.py +20 -9
  2. sgis/debug_config.py +24 -0
  3. sgis/exceptions.py +2 -2
  4. sgis/geopandas_tools/bounds.py +33 -36
  5. sgis/geopandas_tools/buffer_dissolve_explode.py +136 -35
  6. sgis/geopandas_tools/centerlines.py +4 -91
  7. sgis/geopandas_tools/cleaning.py +1576 -583
  8. sgis/geopandas_tools/conversion.py +38 -19
  9. sgis/geopandas_tools/duplicates.py +29 -8
  10. sgis/geopandas_tools/general.py +263 -100
  11. sgis/geopandas_tools/geometry_types.py +4 -4
  12. sgis/geopandas_tools/neighbors.py +19 -15
  13. sgis/geopandas_tools/overlay.py +2 -2
  14. sgis/geopandas_tools/point_operations.py +5 -5
  15. sgis/geopandas_tools/polygon_operations.py +510 -105
  16. sgis/geopandas_tools/polygons_as_rings.py +40 -8
  17. sgis/geopandas_tools/sfilter.py +29 -12
  18. sgis/helpers.py +3 -3
  19. sgis/io/dapla_functions.py +238 -19
  20. sgis/io/read_parquet.py +1 -1
  21. sgis/maps/examine.py +27 -12
  22. sgis/maps/explore.py +450 -65
  23. sgis/maps/legend.py +177 -76
  24. sgis/maps/map.py +206 -103
  25. sgis/maps/maps.py +178 -105
  26. sgis/maps/thematicmap.py +243 -83
  27. sgis/networkanalysis/_service_area.py +6 -1
  28. sgis/networkanalysis/closing_network_holes.py +2 -2
  29. sgis/networkanalysis/cutting_lines.py +15 -8
  30. sgis/networkanalysis/directednetwork.py +1 -1
  31. sgis/networkanalysis/finding_isolated_networks.py +15 -8
  32. sgis/networkanalysis/networkanalysis.py +17 -19
  33. sgis/networkanalysis/networkanalysisrules.py +1 -1
  34. sgis/networkanalysis/traveling_salesman.py +1 -1
  35. sgis/parallel/parallel.py +64 -27
  36. sgis/raster/__init__.py +0 -6
  37. sgis/raster/base.py +208 -0
  38. sgis/raster/cube.py +54 -8
  39. sgis/raster/image_collection.py +3257 -0
  40. sgis/raster/indices.py +17 -5
  41. sgis/raster/raster.py +138 -243
  42. sgis/raster/sentinel_config.py +120 -0
  43. sgis/raster/zonal.py +0 -1
  44. {ssb_sgis-1.0.2.dist-info → ssb_sgis-1.0.4.dist-info}/METADATA +6 -7
  45. ssb_sgis-1.0.4.dist-info/RECORD +62 -0
  46. sgis/raster/methods_as_functions.py +0 -0
  47. sgis/raster/torchgeo.py +0 -171
  48. ssb_sgis-1.0.2.dist-info/RECORD +0 -61
  49. {ssb_sgis-1.0.2.dist-info → ssb_sgis-1.0.4.dist-info}/LICENSE +0 -0
  50. {ssb_sgis-1.0.2.dist-info → ssb_sgis-1.0.4.dist-info}/WHEEL +0 -0
@@ -1,4 +1,4 @@
1
- import re
1
+ # %%
2
2
  import warnings
3
3
  from collections.abc import Callable
4
4
  from typing import Any
@@ -8,36 +8,49 @@ import pandas as pd
8
8
  import shapely
9
9
  from geopandas import GeoDataFrame
10
10
  from geopandas import GeoSeries
11
- from geopandas.array import GeometryArray
12
11
  from numpy.typing import NDArray
12
+ from shapely import Geometry
13
+ from shapely import STRtree
13
14
  from shapely import extract_unique_points
14
15
  from shapely import get_coordinates
15
- from shapely import get_parts
16
- from shapely import linestrings
16
+ from shapely import linearrings
17
+ from shapely import polygons
17
18
  from shapely.errors import GEOSException
19
+ from shapely.geometry import LinearRing
18
20
  from shapely.geometry import LineString
19
21
  from shapely.geometry import Point
20
22
 
21
- from .buffer_dissolve_explode import buff
22
- from .buffer_dissolve_explode import dissexp
23
- from .conversion import coordinate_array
23
+ try:
24
+ import numba
25
+ except ImportError:
26
+
27
+ class numba:
28
+ """Placeholder."""
29
+
30
+ @staticmethod
31
+ def njit(func) -> Callable:
32
+ """Placeholder that does nothing."""
33
+
34
+ def wrapper(*args, **kwargs):
35
+ return func(*args, **kwargs)
36
+
37
+ return wrapper
38
+
39
+
40
+ from ..debug_config import _DEBUG_CONFIG
41
+ from ..maps.maps import explore
24
42
  from .conversion import to_gdf
25
- from .duplicates import get_intersections
43
+ from .conversion import to_geoseries
26
44
  from .duplicates import update_geometries
27
-
28
- # from .general import sort_large_first as _sort_large_first
29
45
  from .general import clean_geoms
30
- from .general import sort_large_first
31
- from .general import sort_small_first
32
- from .general import to_lines
33
46
  from .geometry_types import make_all_singlepart
34
47
  from .geometry_types import to_single_geom_type
35
48
  from .overlay import clean_overlay
36
49
  from .polygon_operations import eliminate_by_longest
37
- from .polygon_operations import get_cluster_mapper
38
- from .polygon_operations import get_gaps
50
+ from .polygon_operations import split_by_neighbors
51
+ from .polygons_as_rings import PolygonsAsRings
52
+ from .sfilter import sfilter
39
53
  from .sfilter import sfilter_inverse
40
- from .sfilter import sfilter_split
41
54
 
42
55
  warnings.simplefilter(action="ignore", category=UserWarning)
43
56
  warnings.simplefilter(action="ignore", category=RuntimeWarning)
@@ -47,12 +60,31 @@ PRECISION = 1e-3
47
60
  BUFFER_RES = 50
48
61
 
49
62
 
63
+ # def explore(*args, **kwargs):
64
+ # pass
65
+
66
+
67
+ # def explore_locals(*args, **kwargs):
68
+ # pass
69
+
70
+
71
+ # def no_njit(func):
72
+ # def wrapper(*args, **kwargs):
73
+ # result = func(*args, **kwargs)
74
+ # return result
75
+
76
+ # return wrapper
77
+
78
+
79
+ # numba.njit = no_njit
80
+
81
+
50
82
  def coverage_clean(
51
83
  gdf: GeoDataFrame,
52
84
  tolerance: int | float,
53
- duplicate_action: str = "fix",
54
- grid_sizes: tuple[None | int] = (None,),
55
- n_jobs: int = 1,
85
+ mask: GeoDataFrame | GeoSeries | Geometry | None = None,
86
+ snap_to_anchors: bool = True,
87
+ **kwargs,
56
88
  ) -> GeoDataFrame:
57
89
  """Fix thin gaps, holes, slivers and double surfaces.
58
90
 
@@ -78,15 +110,10 @@ def coverage_clean(
78
110
  for polygons to be eliminated. Any gap, hole, sliver or double
79
111
  surface that are empty after a negative buffer of tolerance / 2
80
112
  are eliminated into the neighbor with the longest shared border.
81
- duplicate_action: Either "fix", "error" or "ignore".
82
- If "fix" (default), double surfaces thicker than the
83
- tolerance will be updated from top to bottom (function update_geometries)
84
- and then dissolved into the neighbor polygon with the longest shared border.
85
- If "error", an Exception is raised if there are any double surfaces thicker
86
- than the tolerance. If "ignore", double surfaces are kept as is.
87
- grid_sizes: One or more grid_sizes used in overlay and dissolve operations that
88
- might raise a GEOSException. Defaults to (None,), meaning no grid_sizes.
89
- n_jobs: Number of threads.
113
+ mask: Mask to clip gdf to.
114
+ snap_to_anchors: If True (default), snaps to anchor nodes in gdf. If False,
115
+ only snaps to mask nodes (mask cannot be None in this case).
116
+ **kwargs: Temporary backwards compatibility to avoid TypeErrors.
90
117
 
91
118
  Returns:
92
119
  A GeoDataFrame with cleaned polygons.
@@ -94,648 +121,1614 @@ def coverage_clean(
94
121
  if not len(gdf):
95
122
  return gdf
96
123
 
97
- _cleaning_checks(gdf, tolerance, duplicate_action)
124
+ gdf_original = gdf.copy()
98
125
 
99
- if not gdf.index.is_unique:
100
- gdf = gdf.reset_index(drop=True)
126
+ # more_than_one = get_num_geometries(gdf.geometry.values) > 1
127
+ # gdf.loc[more_than_one, gdf._geometry_column_name] = gdf.loc[
128
+ # more_than_one, gdf._geometry_column_name
129
+ # ].apply(_unary_union_for_notna)
101
130
 
102
- gdf = make_all_singlepart(gdf).loc[
103
- lambda x: x.geom_type.isin(["Polygon", "MultiPolygon"])
104
- ]
105
-
106
- try:
107
- gdf = _safe_simplify(gdf, PRECISION)
108
- except GEOSException:
109
- pass
131
+ if mask is not None:
132
+ try:
133
+ mask: GeoDataFrame = mask[["geometry"]].pipe(make_all_singlepart)
134
+ except Exception:
135
+ mask: GeoDataFrame = (
136
+ to_geoseries(mask).to_frame("geometry").pipe(make_all_singlepart)
137
+ )
110
138
 
111
- gdf = (
112
- clean_geoms(gdf)
113
- .pipe(make_all_singlepart)
114
- .loc[lambda x: x.geom_type.isin(["Polygon", "MultiPolygon"])]
139
+ # mask: GeoDataFrame = close_all_holes(
140
+ # dissexp_by_cluster(gdf[["geometry"]])
141
+ # ).pipe(make_all_singlepart)
142
+ # mask = GeoDataFrame(
143
+ # {
144
+ # "geometry": [
145
+ # mask.union_all()
146
+ # .buffer(
147
+ # PRECISION,
148
+ # resolution=1,
149
+ # join_style=2,
150
+ # )
151
+ # .buffer(
152
+ # -PRECISION,
153
+ # resolution=1,
154
+ # join_style=2,
155
+ # )
156
+ # ]
157
+ # },
158
+ # crs=gdf.crs,
159
+ # ).pipe(make_all_singlepart)
160
+ # # gaps = shapely.union_all(get_gaps(mask).geometry.values)
161
+ # # mask = shapely.get_parts(extract_unique_points(mask.geometry.values))
162
+ # # not_by_gaps = shapely.distance(mask, gaps) > PRECISION
163
+ # # mask = GeoDataFrame({"geometry": mask[not_by_gaps]})
164
+
165
+ gdf = snap_polygons(gdf, tolerance, mask=mask, snap_to_anchors=snap_to_anchors)
166
+
167
+ if mask is not None:
168
+ missing_from_mask = clean_overlay(
169
+ mask, gdf, how="difference", geom_type="polygon"
170
+ ).loc[lambda x: x.buffer(-tolerance + PRECISION).is_empty]
171
+ gdf, _ = eliminate_by_longest(gdf, missing_from_mask)
172
+
173
+ missing_from_gdf = sfilter_inverse(gdf_original, gdf.buffer(-PRECISION)).loc[
174
+ lambda x: (~x.buffer(-PRECISION).is_empty)
175
+ ]
176
+ return pd.concat([gdf, missing_from_gdf], ignore_index=True).pipe(
177
+ update_geometries, geom_type="polygon"
115
178
  )
116
179
 
117
- try:
118
- gaps = get_gaps(gdf, include_interiors=True)
119
- except GEOSException:
120
- for i, grid_size in enumerate(grid_sizes):
121
- try:
122
- gaps = get_gaps(gdf, include_interiors=True, grid_size=grid_size)
123
- if grid_size:
124
- # in order to not get more gaps
125
- gaps.geometry = gaps.buffer(grid_size)
126
- break
127
- except GEOSException as e:
128
- if i == len(grid_sizes) - 1:
129
- explore_geosexception(e, gdf)
130
- raise e
131
-
132
- gaps["_was_gap"] = 1
133
-
134
- if duplicate_action == "ignore":
135
- double = GeoDataFrame({"geometry": []}, crs=gdf.crs)
136
- double["_double_idx"] = None
137
- else:
138
- double = get_intersections(gdf, n_jobs=n_jobs)
139
- double["_double_idx"] = range(len(double))
140
180
 
141
- gdf, slivers = split_out_slivers(gdf, tolerance)
181
+ def snap_polygons(
182
+ gdf: GeoDataFrame,
183
+ tolerance: int | float,
184
+ mask: GeoDataFrame | GeoSeries | Geometry | None = None,
185
+ snap_to_anchors: bool = True,
186
+ ) -> GeoDataFrame:
187
+ if not len(gdf):
188
+ return gdf.copy()
142
189
 
143
- gdf["_poly_idx"] = range(len(gdf))
190
+ gdf_orig = gdf.copy()
144
191
 
145
- thin_gaps_and_double = pd.concat([gaps, double]).loc[
146
- lambda x: (x.buffer(-tolerance / 2).is_empty)
147
- ]
192
+ crs = gdf.crs
148
193
 
149
- all_are_thin = double["_double_idx"].isin(thin_gaps_and_double["_double_idx"]).all()
194
+ gdf = (
195
+ clean_geoms(gdf)
196
+ .pipe(make_all_singlepart, ignore_index=True)
197
+ .pipe(to_single_geom_type, "polygon")
198
+ )
150
199
 
151
- if not all_are_thin and duplicate_action == "fix":
152
- gdf, thin_gaps_and_double, slivers = _properly_fix_duplicates(
153
- gdf,
154
- double,
155
- slivers,
156
- thin_gaps_and_double,
157
- tolerance,
158
- n_jobs=n_jobs,
200
+ gdf.crs = None
201
+
202
+ gdf = gdf[lambda x: ~x.buffer(-tolerance / 2 - PRECISION).is_empty]
203
+ # gdf = gdf[lambda x: ~x.buffer(-tolerance / 3).is_empty]
204
+
205
+ # donuts_without_spikes = (
206
+ # gdf.geometry.buffer(tolerance / 2, resolution=1, join_style=2)
207
+ # .buffer(-tolerance, resolution=1, join_style=2)
208
+ # .buffer(tolerance / 2, resolution=1, join_style=2)
209
+ # .pipe(to_lines)
210
+ # .buffer(tolerance)
211
+ # )
212
+
213
+ gdf.geometry = (
214
+ PolygonsAsRings(gdf.geometry.values)
215
+ .apply_numpy_func(
216
+ _snap_linearrings,
217
+ kwargs=dict(
218
+ tolerance=tolerance,
219
+ mask=mask,
220
+ snap_to_anchors=snap_to_anchors,
221
+ ),
159
222
  )
160
-
161
- elif not all_are_thin and duplicate_action == "error":
162
- raise ValueError("Large double surfaces.")
163
-
164
- to_eliminate = pd.concat([thin_gaps_and_double, slivers], ignore_index=True)
165
-
166
- to_eliminate = to_eliminate.loc[lambda x: ~x.buffer(-PRECISION / 10).is_empty]
167
-
168
- to_eliminate = try_for_grid_size(
169
- split_by_neighbors,
170
- grid_sizes=grid_sizes,
171
- args=(to_eliminate, gdf),
172
- kwargs=dict(tolerance=tolerance),
223
+ .to_numpy()
173
224
  )
174
225
 
175
- to_eliminate["_eliminate_idx"] = range(len(to_eliminate))
176
-
177
- to_eliminate["_cluster"] = get_cluster_mapper(to_eliminate.buffer(PRECISION))
226
+ gdf = (
227
+ to_single_geom_type(make_all_singlepart(clean_geoms(gdf)), "polygon")
228
+ .reset_index(drop=True)
229
+ .set_crs(crs)
230
+ )
178
231
 
179
- gdf_geoms_idx = gdf[["_poly_idx", "geometry"]]
232
+ missing = clean_overlay(gdf_orig, gdf, how="difference").loc[
233
+ lambda x: ~x.buffer(-tolerance / 2).is_empty
234
+ ]
180
235
 
181
- poly_idx_mapper = clean_overlay(
182
- buff(
183
- to_eliminate[["_eliminate_idx", "geometry"]],
184
- tolerance,
185
- resolution=BUFFER_RES,
236
+ if mask is None:
237
+ mask = GeoDataFrame({"geometry": []})
238
+ explore(
239
+ gdf,
240
+ # gdf_orig,
241
+ # thin,
242
+ mask,
243
+ missing,
244
+ mask_p=to_gdf(mask.extract_unique_points().explode()).assign(
245
+ wkt=lambda x: [g.wkt for g in x.geometry]
186
246
  ),
187
- gdf_geoms_idx,
188
- geom_type="polygon",
189
- n_jobs=n_jobs,
247
+ gdf_p=to_gdf(gdf.extract_unique_points().explode()).assign(
248
+ wkt=lambda x: [g.wkt for g in x.geometry]
249
+ ),
250
+ center=(5.36765872, 59.01199837, 1),
190
251
  )
191
- poly_idx_mapper["_area_per_poly"] = poly_idx_mapper.area
192
- poly_idx_mapper["_area_per_poly"] = poly_idx_mapper.groupby("_poly_idx")[
193
- "_area_per_poly"
194
- ].transform("sum")
195
-
196
- poly_idx_mapper: pd.Series = (
197
- poly_idx_mapper.sort_values("_area_per_poly", ascending=False)
198
- .drop_duplicates("_eliminate_idx")
199
- .set_index("_eliminate_idx")["_poly_idx"]
252
+ explore(
253
+ gdf,
254
+ gdf_orig,
255
+ # thin,
256
+ mask,
257
+ missing,
258
+ mask_p=to_gdf(mask.extract_unique_points().explode()).assign(
259
+ wkt=lambda x: [g.wkt for g in x.geometry]
260
+ ),
261
+ gdf_p=to_gdf(gdf.extract_unique_points().explode()).assign(
262
+ wkt=lambda x: [g.wkt for g in x.geometry]
263
+ ),
264
+ center=(5.36820681, 59.01182298, 2),
200
265
  )
201
- to_eliminate["_poly_idx"] = to_eliminate["_eliminate_idx"].map(poly_idx_mapper)
202
- isolated = to_eliminate[lambda x: x["_poly_idx"].isna()]
203
- intersecting = to_eliminate[lambda x: x["_poly_idx"].notna()]
204
-
205
- for i, grid_size in enumerate(grid_sizes):
206
- try:
207
- without_double = update_geometries(
208
- intersecting,
209
- geom_type="polygon",
210
- grid_size=grid_size,
211
- n_jobs=n_jobs,
212
- ).drop(columns=["_eliminate_idx", "_double_idx"])
213
- break
214
- except GEOSException as e:
215
- if i == len(grid_sizes) - 1:
216
- explore_geosexception(e, gdf, intersecting, isolated)
217
- raise e
218
-
219
- not_really_isolated = isolated[["geometry", "_eliminate_idx", "_cluster"]].merge(
220
- without_double.drop(columns=["geometry"]),
221
- on="_cluster",
222
- how="inner",
266
+ explore(
267
+ gdf,
268
+ gdf_orig,
269
+ # thin,
270
+ mask,
271
+ missing,
272
+ mask_p=to_gdf(mask.extract_unique_points().explode()).assign(
273
+ wkt=lambda x: [g.wkt for g in x.geometry]
274
+ ),
275
+ gdf_p=to_gdf(gdf.extract_unique_points().explode()).assign(
276
+ wkt=lambda x: [g.wkt for g in x.geometry]
277
+ ),
278
+ center=(5.37327042, 59.01099359, 5),
223
279
  )
224
-
225
- really_isolated = isolated.loc[
226
- lambda x: ~x["_eliminate_idx"].isin(not_really_isolated["_eliminate_idx"])
227
- ]
228
-
229
- is_gap = really_isolated["_was_gap"] == 1
230
- isolated_gaps = really_isolated.loc[is_gap, ["geometry"]].sjoin_nearest(
231
- gdf, max_distance=PRECISION
280
+ explore(
281
+ gdf,
282
+ gdf_orig,
283
+ # thin,
284
+ mask,
285
+ missing,
286
+ mask_p=to_gdf(mask.extract_unique_points().explode()).assign(
287
+ wkt=lambda x: [g.wkt for g in x.geometry]
288
+ ),
289
+ gdf_p=to_gdf(gdf.extract_unique_points().explode()).assign(
290
+ wkt=lambda x: [g.wkt for g in x.geometry]
291
+ ),
292
+ center=(5.36853688, 59.01169013, 5),
232
293
  )
233
- really_isolated = really_isolated[~is_gap]
234
-
235
- really_isolated["_poly_idx"] = (
236
- really_isolated["_cluster"] + gdf["_poly_idx"].max() + 1
294
+ explore(
295
+ gdf,
296
+ # gdf_orig,
297
+ missing,
298
+ mask,
299
+ mask_p=to_gdf(mask.extract_unique_points().explode()).assign(
300
+ wkt=lambda x: [g.wkt for g in x.geometry]
301
+ ),
302
+ gdf_p=to_gdf(gdf.extract_unique_points().explode()).assign(
303
+ wkt=lambda x: [g.wkt for g in x.geometry]
304
+ ),
305
+ center=(5.37142966, 59.009799, 0.01),
306
+ max_zoom=40,
237
307
  )
238
-
239
- cleaned = pd.concat(
240
- [
241
- gdf,
242
- without_double,
243
- not_really_isolated,
244
- really_isolated,
245
- isolated_gaps,
246
- ],
247
- ).drop(
248
- columns=[
249
- "_cluster",
250
- "_was_gap",
251
- "_eliminate_idx",
252
- "index_right",
253
- "_double_idx",
254
- "_area_per_poly",
255
- ],
256
- errors="ignore",
308
+ explore(
309
+ gdf,
310
+ # gdf_orig,
311
+ missing,
312
+ mask,
313
+ mask_p=to_gdf(mask.extract_unique_points().explode()).assign(
314
+ wkt=lambda x: [g.wkt for g in x.geometry]
315
+ ),
316
+ gdf_p=to_gdf(gdf.extract_unique_points().explode()).assign(
317
+ wkt=lambda x: [g.wkt for g in x.geometry]
318
+ ),
319
+ center=(5.36866312, 59.00842846, 0.01),
320
+ max_zoom=40,
257
321
  )
258
322
 
259
- try:
260
- only_one = cleaned.groupby("_poly_idx").transform("size") == 1
261
- one_hit = cleaned[only_one].drop(columns="_poly_idx")
262
- many_hits = cleaned[~only_one]
263
- except IndexError:
264
- assert not cleaned["_poly_idx"].notna().any(), cleaned
265
- one_hit = cleaned[lambda x: x.index == min(x.index) - 1].drop(
266
- columns="_poly_idx", errors="ignore"
267
- )
268
- many_hits = cleaned
269
-
270
- for i, grid_size in enumerate(grid_sizes):
271
- try:
272
- many_hits = (
273
- dissexp(
274
- many_hits,
275
- by="_poly_idx",
276
- aggfunc="first",
277
- dropna=True,
278
- grid_size=grid_size,
279
- n_jobs=n_jobs,
280
- )
281
- .sort_index()
282
- .reset_index(drop=True)
283
- )
284
- break
285
- except GEOSException as e:
286
- if i == len(grid_sizes) - 1:
287
- explore_geosexception(e, gdf, without_double, isolated, really_isolated)
288
- raise e
289
-
290
- cleaned = pd.concat([many_hits, one_hit], ignore_index=True)
291
-
292
- gdf = gdf.drop(columns="_poly_idx")
323
+ explore(
324
+ gdf,
325
+ # gdf_orig,
326
+ missing,
327
+ mask,
328
+ mask_p=to_gdf(mask.extract_unique_points().explode()).assign(
329
+ wkt=lambda x: [g.wkt for g in x.geometry]
330
+ ),
331
+ gdf_p=to_gdf(gdf.extract_unique_points().explode()).assign(
332
+ wkt=lambda x: [g.wkt for g in x.geometry]
333
+ ),
334
+ center=(5.37707146, 59.01065274, 0.4),
335
+ max_zoom=40,
336
+ )
293
337
 
294
- for i, grid_size in enumerate(grid_sizes):
295
- try:
296
- cleaned = clean_overlay(
297
- gdf,
298
- cleaned,
299
- how="update",
300
- geom_type="polygon",
301
- grid_size=grid_size,
302
- n_jobs=n_jobs,
303
- )
304
- break
305
- except GEOSException as e:
306
- if i == len(grid_sizes) - 1:
307
- explore_geosexception(
308
- e,
309
- gdf,
310
- cleaned,
311
- without_double,
312
- isolated,
313
- really_isolated,
314
- )
315
- raise e
338
+ explore(
339
+ gdf,
340
+ # gdf_orig,
341
+ missing,
342
+ mask,
343
+ mask_p=to_gdf(mask.extract_unique_points().explode()).assign(
344
+ wkt=lambda x: [g.wkt for g in x.geometry]
345
+ ),
346
+ gdf_p=to_gdf(gdf.extract_unique_points().explode()).assign(
347
+ wkt=lambda x: [g.wkt for g in x.geometry]
348
+ ),
349
+ center=(-52074.0241, 6580847.4464, 0.1),
350
+ max_zoom=40,
351
+ )
316
352
 
317
- cleaned = sort_large_first(cleaned)
353
+ explore(
354
+ gdf,
355
+ # gdf_orig,
356
+ missing,
357
+ mask,
358
+ mask_p=to_gdf(mask.extract_unique_points().explode()).assign(
359
+ wkt=lambda x: [g.wkt for g in x.geometry]
360
+ ),
361
+ gdf_p=to_gdf(gdf.extract_unique_points().explode()).assign(
362
+ wkt=lambda x: [g.wkt for g in x.geometry]
363
+ ),
364
+ center=(5.38389153, 59.00548223, 1),
365
+ max_zoom=40,
366
+ )
318
367
 
319
- # slivers on bottom
320
- cleaned = pd.concat(split_out_slivers(cleaned, tolerance))
368
+ # explore(
369
+ # gdf_orig,
370
+ # gdf,
371
+ # dups=get_intersections(gdf, geom_type="polygon"),
372
+ # msk=mask,
373
+ # gaps=get_gaps(gdf),
374
+ # updated=update_geometries(gdf, geom_type="polygon"),
375
+ # # browser=False,
376
+ # )
377
+
378
+ # gdf = update_geometries(gdf, geom_type="polygon")
379
+
380
+ return gdf # .pipe(clean_clip, mask, geom_type="polygon")
381
+
382
+
383
+ # @numba.njit
384
+ def _snap_to_anchors(
385
+ geoms,
386
+ indices: NDArray[np.int32],
387
+ anchors,
388
+ anchor_indices,
389
+ mask,
390
+ mask_indices,
391
+ was_midpoint,
392
+ was_midpoint_mask,
393
+ tolerance: int | float,
394
+ ) -> tuple[NDArray, NDArray, NDArray]:
395
+
396
+ coords, all_distances = _snap_to_anchors_inner(
397
+ geoms,
398
+ indices,
399
+ anchors,
400
+ anchor_indices,
401
+ mask,
402
+ mask_indices,
403
+ was_midpoint,
404
+ was_midpoint_mask,
405
+ tolerance,
406
+ )
321
407
 
322
- for i, grid_size in enumerate(grid_sizes):
323
- try:
324
- cleaned = update_geometries(
325
- cleaned,
326
- geom_type="polygon",
327
- grid_size=grid_size,
328
- n_jobs=n_jobs,
329
- )
330
- break
331
- except GEOSException as e:
332
- if i == len(grid_sizes) - 1:
333
- explore_geosexception(
334
- e,
335
- gdf,
336
- cleaned,
337
- without_double,
338
- isolated,
339
- really_isolated,
408
+ not_inf = coords[:, 0] != np.inf
409
+ all_distances = all_distances[not_inf]
410
+ indices = indices[not_inf]
411
+ coords = coords[not_inf]
412
+
413
+ is_snapped = np.full(len(coords), False)
414
+
415
+ n_coords = len(coords)
416
+
417
+ range_indices = np.arange(len(coords))
418
+
419
+ range_index = -1
420
+ for index in np.unique(indices):
421
+ cond = indices == index
422
+ these_coords = coords[cond]
423
+
424
+ # explore(ll=to_gdf(LineString(shapely.points(these_coords)), 25833))
425
+
426
+ # assert np.array_equal(these_coords[0], these_coords[-1]), these_coords
427
+
428
+ these_range_indices = range_indices[cond]
429
+ these_distances = all_distances[cond]
430
+ for i in range(len(these_coords)):
431
+ range_index += 1
432
+ if is_snapped[range_index]:
433
+ print(i, "000")
434
+ continue
435
+ # distances = all_distances[range_index]
436
+ distances = these_distances[i]
437
+ # distances = these_distances[:, i]
438
+ min_dist = np.min(distances)
439
+ if min_dist > tolerance: # or min_dist == 0:
440
+ print(i, "111", min_dist)
441
+ continue
442
+
443
+ is_snapped_now = False
444
+
445
+ for j in np.argsort(distances):
446
+ if distances[j] > tolerance: # TODO or distances[j] == 0:
447
+ break
448
+
449
+ if was_midpoint_mask[j]:
450
+ continue
451
+
452
+ anchor = anchors[j]
453
+ ring = these_coords.copy()
454
+ ring[i] = anchor
455
+
456
+ # snap the nexts points to same anchor if neighboring points have same anchor
457
+ # in order to properly check if the ring will be simple after snapping
458
+ indices_with_same_anchor = [range_index]
459
+ # these_coords = coords[indices==index]
460
+
461
+ pos_counter = 0
462
+ # has_same_anchor_pos = True
463
+ # has_same_anchor_neg = True
464
+ while (
465
+ pos_counter + i < len(these_distances) - 1
466
+ ): # has_same_anchor_pos or has_same_anchor_neg:
467
+ pos_counter += 1
468
+
469
+ # if indices[i + pos_counter] != index:
470
+ # break
471
+ # next_distances = all_distances[range_index + pos_counter]
472
+ next_distances = these_distances[i + pos_counter]
473
+ has_same_anchor_pos = False
474
+ for j2 in np.argsort(next_distances):
475
+ if was_midpoint_mask[j2]:
476
+ continue
477
+ if next_distances[j2] > tolerance:
478
+ break
479
+
480
+ has_same_anchor_pos = j2 == j
481
+ # print(
482
+ # "pos c",
483
+ # i,
484
+ # j,
485
+ # j2,
486
+ # pos_counter,
487
+ # has_same_anchor_pos,
488
+ # distances[j],
489
+ # next_distances[j2],
490
+ # )
491
+ break
492
+ if has_same_anchor_pos:
493
+ ring[i + pos_counter] = anchor
494
+ indices_with_same_anchor.append(range_index + pos_counter)
495
+ else:
496
+ break
497
+
498
+ # for j4 in np.arange(
499
+ # indices_with_same_anchor[0], indices_with_same_anchor[-1]
500
+ # ):
501
+ # ring[j4 - range_index + i] = anchor
502
+ # indices_with_same_anchor.append(j4)
503
+
504
+ if i == 0:
505
+ # snap points at the end of the line if same anchor
506
+ neg_counter = 0
507
+ # has_same_anchor_neg = True
508
+ while True: # has_same_anchor_pos or has_same_anchor_neg:
509
+ neg_counter -= 1
510
+
511
+ # if indices[i + pos_counter] != index:
512
+ # break
513
+ this_range_index = these_range_indices[neg_counter]
514
+ # next_distances = all_distances[this_range_index]
515
+ next_distances = these_distances[neg_counter]
516
+ has_same_anchor_neg = False
517
+ for j3 in np.argsort(next_distances):
518
+ if was_midpoint_mask[j3]:
519
+ continue
520
+ if next_distances[j3] > tolerance:
521
+ break
522
+
523
+ has_same_anchor_neg = j3 == j
524
+ # print(
525
+ # "neg c",
526
+ # i,
527
+ # j,
528
+ # j3,
529
+ # pos_counter,
530
+ # # has_same_anchor,
531
+ # distances[j],
532
+ # next_distances[j3],
533
+ # )
534
+ break
535
+ if has_same_anchor_neg:
536
+ ring[neg_counter] = anchor
537
+ indices_with_same_anchor.append(this_range_index)
538
+ else:
539
+ break
540
+
541
+ # for j5 in np.arange(0, indices_with_same_anchor[-1]):
542
+ # ring[j5 - range_index + i] = anchor
543
+ # indices_with_same_anchor.append(j5)
544
+
545
+ indices_with_same_anchor = np.unique(indices_with_same_anchor)
546
+
547
+ line_is_simple: bool = LineString(ring).is_simple
548
+
549
+ # if i in [67, 68, 69, 173, 174, 175, 176, 177]: # or
550
+ if Point(these_coords[i]).intersects(
551
+ to_gdf([12.08375303, 67.50052183], 4326)
552
+ .to_crs(25833)
553
+ .buffer(10)
554
+ .union_all()
555
+ ):
556
+ # for xxx, yyy in locals().items():
557
+ # if len(str(yyy)) > 50:
558
+ # continue
559
+ # print(xxx)
560
+ # print(yyy)
561
+
562
+ # print("prev:", was_midpoint_mask[j - 1])
563
+ # print(distances[np.argsort(distances)])
564
+ # print(anchors[np.argsort(distances)])
565
+ # print(ring)
566
+ explore(
567
+ out_coords=to_gdf(
568
+ shapely.linestrings(coords, indices=indices), 25833
569
+ ),
570
+ llll=to_gdf(LineString(ring), 25833),
571
+ # this=to_gdf(this),
572
+ # next_=to_gdf(next_),
573
+ # line=to_gdf(LineString(np.array([this, next_])), 25833),
574
+ geom=to_gdf(these_coords[i], 25833),
575
+ prev=to_gdf(these_coords[i - 1], 25833),
576
+ nxt=to_gdf(these_coords[i + 1], 25833),
577
+ nxt2=to_gdf(these_coords[i + 2], 25833),
578
+ anchor=to_gdf(anchor, 25833),
579
+ # browser=True,
580
+ )
581
+
582
+ print(
583
+ "line_is_simple", line_is_simple, range_index, i, index, j
584
+ ) # , j2, j3, x)
585
+
586
+ if not line_is_simple:
587
+ # for j4 in range(len(ring)):
588
+ # this_p = ring[j4]
589
+ # for j5 in range(len(ring)):
590
+ # that_p = ring[j5]
591
+ # dist_ = np.sqrt(
592
+ # (this_p[0] - that_p[0]) ** 2
593
+ # + (this_p[1] - that_p[1]) ** 2
594
+ # )
595
+ # if dist_ > 0 and dist_ < 1e-5:
596
+ # print(this_p)
597
+ # print(that_p)
598
+ # ring[j5] = this_p
599
+
600
+ print(LineString(ring).wkt)
601
+ # explore(
602
+ # out_coords=to_gdf(
603
+ # shapely.linestrings(coords, indices=indices), 25833
604
+ # ),
605
+ # llll=to_gdf(LineString(ring), 25833),
606
+ # # this=to_gdf(this),
607
+ # # next_=to_gdf(next_),
608
+ # # line=to_gdf(LineString(np.array([this, next_])), 25833),
609
+ # geom=to_gdf(these_coords[i], 25833),
610
+ # prev=to_gdf(these_coords[i - 1], 25833),
611
+ # nxt=to_gdf(these_coords[i + 1], 25833),
612
+ # nxt2=to_gdf(these_coords[i + 2], 25833),
613
+ # anchor=to_gdf(anchor, 25833),
614
+ # # browser=True,
615
+ # )
616
+
617
+ line_is_simple: bool = LineString(ring).is_simple
618
+
619
+ if line_is_simple:
620
+ # coords[i] = anchors[j]
621
+ # is_snapped_to[j] = True
622
+ # is_snapped[i] = True
623
+ # explore(
624
+ # out_coords=to_gdf(
625
+ # shapely.linestrings(coords, indices=indices), 25833
626
+ # ),
627
+ # llll=to_gdf(LineString(ring), 25833),
628
+ # # this=to_gdf(this),
629
+ # # next_=to_gdf(next_),
630
+ # # line=to_gdf(LineString(np.array([this, next_])), 25833),
631
+ # anc=to_gdf(anchors[j]),
632
+ # geom=to_gdf(coords[i], 25833),
633
+ # these=to_gdf(coords[i : i + n_points_with_same_anchor ], 25833),
634
+ # prev=to_gdf(coords[i - 1], 25833),
635
+ # prev2=to_gdf(coords[i - 2], 25833),
636
+ # nxt=to_gdf(coords[i + n_points_with_same_anchor + 1], 25833),
637
+ # nxt2=to_gdf(coords[i + n_points_with_same_anchor + 2], 25833),
638
+ # nxt3=to_gdf(coords[i + n_points_with_same_anchor + 3], 25833),
639
+ # )
640
+ # print(coords[i : i + n_points_with_same_anchor + 1])
641
+ for (
642
+ x
643
+ ) in indices_with_same_anchor: # range(n_points_with_same_anchor):
644
+ # print(range_index, i, index, j, j2, j3, x)
645
+ coords[x] = anchor # s[j]
646
+ is_snapped[x] = True
647
+ # coords[i + x] = anchors[j]
648
+ # is_snapped[i + x] = True
649
+ # print(coords[i : i + n_points_with_same_anchor + 1])
650
+
651
+ is_snapped_now = True
652
+ break
653
+ # else:
654
+
655
+ if not is_snapped_now:
656
+ coords[range_index] = anchors[np.argmin(distances)]
657
+ # is_snapped_to[np.argmin(distances)] = True
658
+
659
+ if 0 and index == 0: # i > 30 and i < 40:
660
+ print(i)
661
+ explore(
662
+ out_coords=to_gdf(
663
+ shapely.linestrings(coords, indices=indices), 25833
664
+ ),
665
+ llll=to_gdf(LineString(ring), 25833),
666
+ pppp=to_gdf(shapely.points(ring), 25833).assign(
667
+ wkt=lambda x: [g.wkt for g in x.geometry]
668
+ ),
669
+ # this=to_gdf(this),
670
+ # next_=to_gdf(next_),
671
+ # line=to_gdf(LineString(np.array([this, next_])), 25833),
672
+ anc=to_gdf(anchors[j]).assign(
673
+ wkt=lambda x: [g.wkt for g in x.geometry]
674
+ ),
675
+ geom=to_gdf(these_coords[i], 25833).assign(
676
+ wkt=lambda x: [g.wkt for g in x.geometry]
677
+ ),
678
+ # these=to_gdf(
679
+ # these_coords[i : i + n_points_with_same_anchor], 25833
680
+ # ).assign(wkt=lambda x: [g.wkt for g in x.geometry]),
681
+ prev=to_gdf(these_coords[i - 1], 25833).assign(
682
+ wkt=lambda x: [g.wkt for g in x.geometry]
683
+ ),
684
+ prev2=to_gdf(these_coords[i - 2], 25833).assign(
685
+ wkt=lambda x: [g.wkt for g in x.geometry]
686
+ ),
687
+ nxt=to_gdf(these_coords[i + 1], 25833).assign(
688
+ wkt=lambda x: [g.wkt for g in x.geometry]
689
+ ),
690
+ nxt2=to_gdf(these_coords[i + 2], 25833).assign(
691
+ wkt=lambda x: [g.wkt for g in x.geometry]
692
+ ),
693
+ nxt3=to_gdf(these_coords[i + 3], 25833).assign(
694
+ wkt=lambda x: [g.wkt for g in x.geometry]
695
+ ),
696
+ # browser=True,
697
+ # nxt_n=to_gdf(
698
+ # coords[i + n_points_with_same_anchor + 1], 25833
699
+ # ).assign(wkt=lambda x: [g.wkt for g in x.geometry]),
700
+ # nxt_n2=to_gdf(
701
+ # coords[i + n_points_with_same_anchor + 2], 25833
702
+ # ).assign(wkt=lambda x: [g.wkt for g in x.geometry]),
703
+ # nxt_n3=to_gdf(
704
+ # coords[i + n_points_with_same_anchor + 3], 25833
705
+ # ).assign(wkt=lambda x: [g.wkt for g in x.geometry]),
340
706
  )
341
- raise e
342
-
343
- # cleaned = _safe_simplify(cleaned, PRECISION)
344
- # cleaned.geometry = shapely.make_valid(cleaned.geometry)
345
-
346
- # TODO check why polygons dissappear in rare cases. For now, just add back the missing
347
- dissapeared_polygons = sfilter_inverse(gdf, cleaned.buffer(-PRECISION))
348
- cleaned = pd.concat([cleaned, dissapeared_polygons])
707
+ # if (
708
+ # indices[i] == 48
709
+ # ): # and int(out_coords[i][0]) == 375502 and int(out_coords[i][1]) == 7490104:
710
+ # print(geom, out_coords[i], out_coords[-3:])
711
+ # xxx += 1
712
+ # if xxx > 100 and i >= 2106:
713
+ # print(locals())
714
+ # explore(
715
+ # geom=to_gdf(geom, 25833),
716
+ # out=to_gdf(out_coords[i], 25833),
717
+ # anc=to_gdf(shapely.points(anchors), 25833),
718
+ # llll=to_gdf(
719
+ # shapely.geometry.LineString(
720
+ # np.array(out_coords)[indices[: len(out_coords)] == 48]
721
+ # ),
722
+ # 25833,
723
+ # ),
724
+ # )
725
+
726
+ return coords, indices
727
+
728
+
729
+ @numba.njit
730
+ def _snap_to_anchors_inner(
731
+ geoms,
732
+ indices: NDArray[np.int32],
733
+ anchors,
734
+ anchor_indices,
735
+ mask,
736
+ mask_indices,
737
+ was_midpoint,
738
+ was_midpoint_mask,
739
+ tolerance: int | float,
740
+ ) -> tuple[NDArray, NDArray, NDArray]:
741
+ # def orientation(p, q, r):
742
+ # # Calculate orientation of the triplet (p, q, r).
743
+ # # 0 -> collinear, 1 -> clockwise, 2 -> counterclockwise
744
+ # val = (q[1] - p[1]) * (r[0] - q[0]) - (q[0] - p[0]) * (r[1] - q[1])
745
+ # if val == 0:
746
+ # return 0
747
+ # return 1 if val > 0 else 2
748
+
749
+ # def on_segment(p, q, r):
750
+ # # Check if point q lies on line segment pr
751
+ # if min(p[0], r[0]) <= q[0] <= max(p[0], r[0]) and min(p[1], r[1]) <= q[
752
+ # 1
753
+ # ] <= max(p[1], r[1]):
754
+ # return True
755
+ # return False
756
+
757
+ # def check_intersection(line1, line2):
758
+ # """
759
+ # Check if two line segments intersect.
760
+
761
+ # Parameters:
762
+ # line1 : np.array : 2x2 array with endpoints of the first line segment [[x1, y1], [x2, y2]]
763
+ # line2 : np.array : 2x2 array with endpoints of the second line segment [[x3, y3], [x4, y4]]
764
+
765
+ # Returns:
766
+ # bool : True if the lines intersect, False otherwise.
767
+ # """
768
+
769
+ # p1, q1 = line1
770
+ # p2, q2 = line2
771
+
772
+ # # Find the four orientations needed for the general and special cases
773
+ # o1 = orientation(p1, q1, p2)
774
+ # o2 = orientation(p1, q1, q2)
775
+ # o3 = orientation(p2, q2, p1)
776
+ # o4 = orientation(p2, q2, q1)
777
+
778
+ # # General case
779
+ # if o1 != o2 and o3 != o4:
780
+ # return True
781
+
782
+ # # Special cases
783
+ # # p1, q1, p2 are collinear and p2 lies on segment p1q1
784
+ # if o1 == 0 and on_segment(p1, p2, q1):
785
+ # return True
786
+
787
+ # # p1, q1, q2 are collinear and q2 lies on segment p1q1
788
+ # if o2 == 0 and on_segment(p1, q2, q1):
789
+ # return True
790
+
791
+ # # p2, q2, p1 are collinear and p1 lies on segment p2q2
792
+ # if o3 == 0 and on_segment(p2, p1, q2):
793
+ # return True
794
+
795
+ # # p2, q2, q1 are collinear and q1 lies on segment p2q2
796
+ # if o4 == 0 and on_segment(p2, q1, q2):
797
+ # return True
798
+
799
+ # return False
800
+
801
+ out_coords = geoms.copy()
802
+ # is_snapped = np.full(len(geoms), False)
803
+
804
+ n_anchors = len(anchors)
805
+ mask_n_minus_1 = len(mask) - 1
806
+ is_snapped_to = np.full(len(anchors), False)
807
+ out_distances = np.full((len(geoms), n_anchors), tolerance * 3)
808
+
809
+ for i in range(len(geoms)):
810
+ # if is_snapped[i]:
811
+ # continue
812
+ geom = geoms[i]
813
+ index = indices[i]
814
+ # if i == 0 or index != indices[i - 1]:
815
+ # i_for_this_index = 0
816
+ # else:
817
+ # i_for_this_index += 1
818
+
819
+ is_snapped = False
820
+ for j in range(len(mask)):
821
+ mask_index = mask_indices[j]
822
+
823
+ is_last = j == mask_n_minus_1 or mask_index != mask_indices[j + 1]
824
+ if is_last:
825
+ continue
826
+
827
+ mask_point0 = mask[j]
828
+
829
+ # if (
830
+ # not mask_is_snapped_to[j]
831
+ # and np.sqrt(
832
+ # (geom[0] - mask_point0[0]) ** 2 + (geom[1] - mask_point0[1]) ** 2
833
+ # )
834
+ # <= tolerance
835
+ # ):
836
+ # out_coords[i] = mask_point0
837
+ # mask_is_snapped_to[j] = True
838
+ # is_snapped = True
839
+ # break
840
+
841
+ mask_point1 = mask[j + 1]
842
+
843
+ segment_vector = mask_point1 - mask_point0
844
+ point_vector = geom - mask_point0
845
+ segment_length_squared = np.dot(segment_vector, segment_vector)
846
+ if segment_length_squared == 0:
847
+ closest_point = mask_point0
848
+ else:
849
+ factor = np.dot(point_vector, segment_vector) / segment_length_squared
850
+ factor = max(0, min(1, factor))
851
+ closest_point = mask_point0 + factor * segment_vector
852
+
853
+ if np.linalg.norm(geom - closest_point) == 0 and was_midpoint[i]:
854
+ out_coords[i] = np.array([np.inf, np.inf])
855
+ is_snapped = True
856
+ break
349
857
 
350
- return to_single_geom_type(cleaned, "polygon")
858
+ if is_snapped:
859
+ continue
351
860
 
861
+ distances = np.full(n_anchors, tolerance * 3)
862
+ for j2 in range(n_anchors):
863
+ anchor = anchors[j2]
352
864
 
353
- def _safe_simplify(gdf: GeoDataFrame, tolerance: float | int, **kwargs) -> GeoDataFrame:
354
- """Simplify only if the resulting area is no more than 1 percent larger.
865
+ # if anchor_indices[j] == index:
866
+ # continue
355
867
 
356
- Because simplifying can result in holes being filled.
357
- """
358
- length_then = gdf.length
359
- copied = gdf.copy()
360
- copied.geometry = shapely.make_valid(
361
- shapely.simplify(copied.geometry.values, tolerance=tolerance)
362
- )
363
- filt = (copied.area > length_then * 1.01) | (copied.geometry.is_empty)
364
- copied.loc[filt, copied._geometry_column_name] = gdf.loc[
365
- filt, copied._geometry_column_name
366
- ]
868
+ dist = np.sqrt((geom[0] - anchor[0]) ** 2 + (geom[1] - anchor[1]) ** 2)
869
+ distances[j2] = dist
870
+ out_distances[i, j2] = dist
871
+ if dist == 0 and not was_midpoint_mask[j2]:
872
+ break
367
873
 
368
- return copied
874
+ return out_coords, out_distances
369
875
 
370
876
 
371
- def _remove_interior_slivers(gdf: GeoDataFrame, tolerance: int | float) -> GeoDataFrame:
372
- gdf, slivers = split_out_slivers(gdf, tolerance)
373
- slivers["_idx"] = range(len(slivers))
374
- without_thick = clean_overlay(
375
- to_lines(slivers), buff(gdf, PRECISION), how="difference"
376
- )
377
- return pd.concat(
378
- [
379
- gdf,
380
- slivers[lambda x: x["_idx"].isin(without_thick["_idx"])].drop(
381
- columns="_idx"
382
- ),
383
- ]
384
- )
385
-
877
+ @numba.njit
878
+ def _build_anchors(
879
+ geoms: NDArray[np.float64],
880
+ indices: NDArray[np.int32],
881
+ mask_coords: NDArray[np.float64],
882
+ mask_indices: NDArray[np.int32],
883
+ was_midpoint_mask: NDArray[bool],
884
+ tolerance: int | float,
885
+ ):
886
+ anchors = list(mask_coords)
887
+ anchor_indices = list(mask_indices)
888
+ is_anchor_arr = np.full(len(geoms), False)
889
+ was_midpoint_mask = list(was_midpoint_mask)
890
+ for i in np.arange(len(geoms)):
891
+ geom = geoms[i]
892
+ index = indices[i]
893
+ # distances = []
894
+ # for j, anchor in zip(anchor_indices, anchors):
895
+
896
+ is_anchor = True
897
+ for j in range(len(anchors)):
898
+ # if indices[i] != indices[j]:
899
+ # if i != j and indices[i] != indices[j]:
900
+ anchor = anchors[j]
901
+ dist = np.sqrt((geom[0] - anchor[0]) ** 2 + (geom[1] - anchor[1]) ** 2)
902
+ if dist <= tolerance:
903
+ is_anchor = False
904
+ break
905
+ # distances.append(dist)
906
+ # distances = np.array(distances)
907
+ is_anchor_arr[i] = is_anchor
908
+ if is_anchor: # not len(distances) or np.min(distances) > tolerance:
909
+ anchors.append(geom)
910
+ anchor_indices.append(index)
911
+ was_midpoint_mask.append(True)
912
+ return anchors, anchor_indices, is_anchor_arr, was_midpoint_mask
913
+
914
+
915
+ @numba.njit
916
+ def _add_last_points_to_end(
917
+ coords: NDArray[np.float64],
918
+ indices: NDArray[np.int32],
919
+ ) -> tuple[
920
+ NDArray[np.float64],
921
+ NDArray[np.int32],
922
+ ]:
923
+ out_coords, out_indices = [coords[0]], [indices[0]]
924
+ last_coords = []
925
+ prev = coords[0]
926
+ first_coords = prev
927
+ n_minus_1 = len(coords) - 1
928
+ for i in np.arange(1, len(coords)):
929
+ idx = indices[i]
930
+ xy = coords[i]
931
+ distance_to_prev: float = np.sqrt(
932
+ (xy[0] - prev[0]) ** 2 + (xy[1] - prev[1]) ** 2
933
+ )
934
+ if idx != indices[i - 1]:
935
+ first_coords = xy
936
+ out_coords.append(xy)
937
+ out_indices.append(idx)
938
+ elif not distance_to_prev:
939
+ if i == n_minus_1 or idx != indices[i + 1]:
940
+ last_coords.append(xy)
941
+ prev = xy
942
+ continue
943
+ elif i == n_minus_1 or idx != indices[i + 1]:
944
+ out_coords.append(xy)
945
+ out_coords.append(first_coords)
946
+ out_indices.append(idx)
947
+ out_indices.append(idx)
948
+ last_coords.append(xy)
949
+ else:
950
+ out_coords.append(xy)
951
+ out_indices.append(idx)
952
+
953
+ prev = xy
954
+
955
+ return (out_coords, out_indices)
956
+
957
+
958
+ @numba.njit
959
+ def _add_last_points_to_end_with_third_arr(
960
+ coords: NDArray[np.float64],
961
+ indices: NDArray[np.int32],
962
+ third_arr: NDArray[Any],
963
+ ) -> tuple[
964
+ NDArray[np.float64],
965
+ NDArray[np.int32],
966
+ NDArray[Any],
967
+ ]:
968
+ out_coords, out_indices, out_third_arr = [coords[0]], [indices[0]], [third_arr[0]]
969
+ last_coords = []
970
+ prev = coords[0]
971
+ first_coords = prev
972
+ n_minus_1 = len(coords) - 1
973
+ for i in np.arange(1, len(coords)):
974
+ idx = indices[i]
975
+ xy = coords[i]
976
+ distance_to_prev: float = np.sqrt(
977
+ (xy[0] - prev[0]) ** 2 + (xy[1] - prev[1]) ** 2
978
+ )
979
+ if idx != indices[i - 1]:
980
+ first_coords = xy
981
+ out_coords.append(xy)
982
+ out_indices.append(idx)
983
+ out_third_arr.append(third_arr[i])
984
+ elif not distance_to_prev:
985
+ if i == n_minus_1 or idx != indices[i + 1]:
986
+ last_coords.append(xy)
987
+ prev = xy
988
+ continue
989
+ elif i == n_minus_1 or idx != indices[i + 1]:
990
+ out_coords.append(xy)
991
+ out_coords.append(first_coords)
992
+ out_indices.append(idx)
993
+ out_indices.append(idx)
994
+ last_coords.append(xy)
995
+ out_third_arr.append(third_arr[i])
996
+ out_third_arr.append(third_arr[i])
997
+ else:
998
+ out_coords.append(xy)
999
+ out_indices.append(idx)
1000
+ out_third_arr.append(third_arr[i])
1001
+
1002
+ prev = xy
1003
+
1004
+ return (out_coords, out_indices, out_third_arr)
1005
+
1006
+
1007
+ @numba.njit
1008
+ def _remove_duplicate_points(
1009
+ coords: NDArray[np.float64],
1010
+ indices: NDArray[np.int32],
1011
+ third_arr: NDArray[Any],
1012
+ ):
1013
+ out_coords, out_indices, out_third_arr = [coords[0]], [indices[0]], [third_arr[0]]
1014
+ prev = coords[0]
1015
+ for i in np.arange(1, len(coords)):
1016
+ idx = indices[i]
1017
+ xy = coords[i]
1018
+ distance_to_prev: float = np.sqrt(
1019
+ (xy[0] - prev[0]) ** 2 + (xy[1] - prev[1]) ** 2
1020
+ )
1021
+ if not distance_to_prev and idx == indices[i - 1]:
1022
+ prev = xy
1023
+ continue
386
1024
 
387
- def remove_spikes(
388
- gdf: GeoDataFrame, tolerance: int | float, n_jobs: int = 1
389
- ) -> GeoDataFrame:
390
- """Remove thin spikes from polygons.
1025
+ if idx != indices[i - 1]:
1026
+ out_coords.append(xy)
1027
+ out_indices.append(idx)
1028
+ out_third_arr.append(third_arr[i])
1029
+ prev = xy
1030
+ continue
391
1031
 
392
- Args:
393
- gdf: A GeoDataFrame.
394
- tolerance: Spike tolerance.
395
- n_jobs: Number of threads.
1032
+ out_coords.append(xy)
1033
+ out_indices.append(idx)
1034
+ out_third_arr.append(third_arr[i])
1035
+ prev = xy
396
1036
 
397
- Returns:
398
- A GeoDataFrame.
399
- """
400
- return clean_overlay(
401
- gdf, gdf[["geometry"]], how="intersection", grid_size=tolerance, n_jobs=n_jobs
402
- )
1037
+ return out_coords, out_indices, out_third_arr
403
1038
 
404
1039
 
405
- def _properly_fix_duplicates(
406
- gdf: GeoDataFrame,
407
- double: GeoDataFrame,
408
- slivers: GeoDataFrame,
409
- thin_gaps_and_double: GeoDataFrame,
1040
+ def _snap_linearrings(
1041
+ geoms: NDArray[LinearRing],
410
1042
  tolerance: int | float,
411
- n_jobs: int,
412
- ) -> GeoDataFrame:
413
- gdf = _dissolve_thick_double_and_update(gdf, double, thin_gaps_and_double, n_jobs)
414
- gdf, more_slivers = split_out_slivers(gdf, tolerance)
415
- slivers = pd.concat([slivers, more_slivers], ignore_index=True)
416
- gaps = get_gaps(gdf, include_interiors=True)
417
- gaps["_was_gap"] = 1
418
- assert "_double_idx" not in gaps
419
- double = get_intersections(gdf)
420
- double["_double_idx"] = range(len(double))
421
- thin_gaps_and_double = pd.concat([gaps, double], ignore_index=True).loc[
422
- lambda x: x.buffer(-tolerance / 2).is_empty
423
- ]
424
-
425
- return gdf, thin_gaps_and_double, slivers
1043
+ mask: GeoDataFrame | None,
1044
+ snap_to_anchors: bool = True,
1045
+ ):
1046
+ if not len(geoms):
1047
+ return geoms
426
1048
 
1049
+ points = GeoDataFrame(
1050
+ {
1051
+ "geometry": extract_unique_points(geoms),
1052
+ "_geom_idx": np.arange(len(geoms)),
1053
+ }
1054
+ ).explode(ignore_index=True)
1055
+ coords = get_coordinates(points.geometry.values)
1056
+ indices = points["_geom_idx"].values
1057
+
1058
+ if mask is not None:
1059
+ mask_coords, mask_indices = get_coordinates(
1060
+ mask.geometry.values, return_index=True
1061
+ )
1062
+ is_anchor = np.full(len(mask_coords), False)
427
1063
 
428
- def _dissolve_thick_double_and_update(
429
- gdf: GeoDataFrame, double: GeoDataFrame, thin_double: GeoDataFrame, n_jobs: int
430
- ) -> GeoDataFrame:
431
- large = (
432
- double.loc[~double["_double_idx"].isin(thin_double["_double_idx"])].drop(
433
- columns="_double_idx"
1064
+ mask_coords, mask_indices, is_anchor = _remove_duplicate_points(
1065
+ mask_coords, mask_indices, is_anchor
434
1066
  )
435
- # .pipe(sort_large_first)
436
- # .sort_values("_poly_idx")
437
- .pipe(update_geometries, geom_type="polygon", n_jobs=n_jobs)
438
- )
439
- return (
440
- clean_overlay(gdf, large, how="update", geom_type="polygon", n_jobs=n_jobs)
441
- # .pipe(sort_large_first)
442
- # .sort_values("_poly_idx")
443
- .pipe(update_geometries, geom_type="polygon", n_jobs=n_jobs)
444
- )
1067
+ mask_coords, mask_indices = _add_last_points_to_end(mask_coords, mask_indices)
1068
+ mask_coords = np.array(mask_coords)
1069
+ mask_indices = np.array(mask_indices)
445
1070
 
1071
+ is_anchor = np.full(len(mask_coords), False)
1072
+ mask_coords, mask_indices, is_anchor = _remove_duplicate_points(
1073
+ mask_coords, mask_indices, is_anchor
1074
+ )
1075
+ mask_coords = np.array(mask_coords)
1076
+ mask_indices = np.array(mask_indices)
446
1077
 
447
- def _cleaning_checks(
448
- gdf: GeoDataFrame, tolerance: int | float, duplicate_action: bool
449
- ) -> GeoDataFrame: # , spike_action):
450
- if not len(gdf) or not tolerance:
451
- return gdf
452
- if tolerance < PRECISION:
453
- raise ValueError(
454
- f"'tolerance' must be larger than {PRECISION} to avoid "
455
- "problems with floating point precision."
1078
+ original_mask_buffered = shapely.buffer(
1079
+ shapely.linearrings(mask_coords, indices=mask_indices),
1080
+ tolerance * 1.1,
1081
+ )
1082
+ mask_coords, mask_indices, was_midpoint_mask, _ = (
1083
+ _add_midpoints_to_segments_numba(
1084
+ mask_coords,
1085
+ mask_indices,
1086
+ get_coordinates(
1087
+ sfilter(
1088
+ points.geometry.drop_duplicates(),
1089
+ original_mask_buffered,
1090
+ )
1091
+ ),
1092
+ tolerance * 1.1,
1093
+ )
456
1094
  )
457
- if duplicate_action not in ["fix", "error", "ignore"]:
458
- raise ValueError("duplicate_action must be 'fix', 'error' or 'ignore'")
459
-
460
-
461
- def split_out_slivers(
462
- gdf: GeoDataFrame | GeoSeries, tolerance: float | int
463
- ) -> tuple[GeoDataFrame, GeoDataFrame] | tuple[GeoSeries, GeoSeries]:
464
- is_sliver = gdf.buffer(-tolerance / 2).is_empty
465
- slivers = gdf.loc[is_sliver]
466
- gdf = gdf.loc[~is_sliver]
467
- slivers, isolated = sfilter_split(slivers, gdf.buffer(PRECISION))
468
- gdf = pd.concat([gdf, isolated])
469
- return gdf, slivers
470
-
471
-
472
- def try_for_grid_size(
473
- func: Callable,
474
- grid_sizes: tuple[None, float | int],
475
- args: tuple | None = None,
476
- kwargs: dict | None = None,
477
- ) -> Any:
478
- args = args or ()
479
- kwargs = kwargs or {}
480
- for i, grid_size in enumerate(grid_sizes):
481
- try:
482
- return func(*args, grid_size=grid_size, **kwargs)
483
- except GEOSException as e:
484
- if i == len(grid_sizes) - 1:
485
- raise e
486
1095
 
1096
+ mask_coords = np.array(mask_coords)
1097
+ mask_indices = np.array(mask_indices)
1098
+ mask_indices = (mask_indices + 1) * -1
1099
+
1100
+ is_anchor = np.full(len(coords), False)
1101
+ coords, indices, is_anchor = _remove_duplicate_points(coords, indices, is_anchor)
1102
+
1103
+ coords, indices = _add_last_points_to_end(coords, indices)
1104
+ coords = np.array(coords)
1105
+ indices = np.array(indices)
1106
+
1107
+ is_anchor = np.full(len(coords), False)
1108
+ coords, indices, is_anchor = _remove_duplicate_points(coords, indices, is_anchor)
1109
+ coords = np.array(coords)
1110
+ indices = np.array(indices)
1111
+
1112
+ # if 0:
1113
+ # coords, indices, was_midpoint, _ = _add_midpoints_to_segments_numba(
1114
+ # coords,
1115
+ # indices,
1116
+ # mask_coords,
1117
+ # tolerance * 1.1, # + PRECISION * 100,
1118
+ # )
1119
+
1120
+ # was_midpoint = np.array(was_midpoint)
1121
+
1122
+ # coords, is_snapped_to = _snap_to_anchors(
1123
+ # coords,
1124
+ # indices,
1125
+ # mask_coords,
1126
+ # mask_indices,
1127
+ # mask_coords,
1128
+ # mask_indices,
1129
+ # was_midpoint,
1130
+ # was_midpoint_mask,
1131
+ # tolerance + PRECISION * 20,
1132
+ # )
1133
+ # indices = np.array(indices)
1134
+ # coords = np.array(coords)
1135
+
1136
+ # indices = indices[coords[:, 0] != np.inf]
1137
+ # coords = coords[coords[:, 0] != np.inf]
1138
+
1139
+ if snap_to_anchors:
1140
+ if mask is None:
1141
+ mask_coords = [coords[0]]
1142
+ mask_indices = [indices[0]]
1143
+ was_midpoint_mask = [False]
1144
+ anchors, anchor_indices, is_anchor, was_midpoint_anchors = _build_anchors(
1145
+ coords,
1146
+ indices,
1147
+ mask_coords,
1148
+ mask_indices,
1149
+ was_midpoint_mask,
1150
+ tolerance + PRECISION, # * 100
1151
+ )
1152
+ anchors = np.array(anchors)
1153
+ anchor_indices = np.array(anchor_indices)
487
1154
 
488
- def split_and_eliminate_by_longest(
489
- gdf: GeoDataFrame | list[GeoDataFrame],
490
- to_eliminate: GeoDataFrame,
491
- tolerance: int | float,
492
- grid_sizes: tuple[None | float | int] = (None,),
493
- n_jobs: int = 1,
494
- **kwargs,
495
- ) -> GeoDataFrame | tuple[GeoDataFrame]:
496
- if not len(to_eliminate):
497
- return gdf
1155
+ # anchors = np.round(anchors, 3)
498
1156
 
499
- if not isinstance(gdf, (GeoDataFrame, GeoSeries)):
500
- as_gdf = pd.concat(gdf, ignore_index=True)
501
1157
  else:
502
- as_gdf = gdf
503
-
504
- splitted = try_for_grid_size(
505
- split_by_neighbors,
506
- grid_sizes=grid_sizes,
507
- args=(to_eliminate, as_gdf, tolerance),
508
- ).pipe(sort_small_first)
509
-
510
- splitted = try_for_grid_size(
511
- update_geometries,
512
- grid_sizes=grid_sizes,
513
- args=(splitted,),
514
- kwargs=dict(geom_type="polygon", n_jobs=n_jobs),
515
- )
1158
+ anchors, anchor_indices, was_midpoint_anchors = (
1159
+ mask_coords,
1160
+ mask_indices,
1161
+ was_midpoint_mask,
1162
+ )
516
1163
 
517
- gdf = try_for_grid_size(
518
- eliminate_by_longest,
519
- grid_sizes=grid_sizes,
520
- args=(
521
- gdf,
522
- splitted,
523
- ),
524
- kwargs=kwargs | {"n_jobs": n_jobs},
1164
+ coords, indices, was_midpoint, _ = _add_midpoints_to_segments_numba(
1165
+ coords,
1166
+ indices,
1167
+ anchors,
1168
+ tolerance * 1.1,
525
1169
  )
526
1170
 
527
- if not isinstance(gdf, (GeoDataFrame, GeoSeries)):
528
- as_gdf = pd.concat(gdf, ignore_index=True)
529
- else:
530
- as_gdf = gdf
531
-
532
- missing = try_for_grid_size(
533
- clean_overlay,
534
- grid_sizes=grid_sizes,
535
- args=(
536
- to_eliminate,
537
- as_gdf,
538
- ),
539
- kwargs=dict(
540
- how="difference",
541
- geom_type="polygon",
542
- n_jobs=n_jobs,
543
- ),
544
- ).pipe(lambda x: dissexp(x, n_jobs=n_jobs))
1171
+ was_midpoint = np.array(was_midpoint)
545
1172
 
546
- return try_for_grid_size(
547
- eliminate_by_longest,
548
- grid_sizes=grid_sizes,
549
- args=(gdf, missing),
550
- kwargs=kwargs | {"n_jobs": n_jobs},
1173
+ coords_up_here000 = (
1174
+ pd.Series(_coords_to_rings(np.array(coords), np.array(indices), geoms))
1175
+ .loc[lambda x: x.notna()]
1176
+ .values
551
1177
  )
1178
+ coords_up_here000 = to_gdf(polygons(coords_up_here000), 25833)
552
1179
 
553
-
554
- def split_by_neighbors(
555
- df: GeoDataFrame,
556
- split_by: GeoDataFrame,
557
- tolerance: int | float,
558
- grid_size: float | int | None = None,
559
- ) -> GeoDataFrame:
560
- if not len(df):
561
- return df
562
-
563
- split_by = split_by.copy()
564
- split_by.geometry = shapely.simplify(split_by.geometry, tolerance)
565
-
566
- intersecting_lines = (
567
- clean_overlay(
568
- to_lines(split_by),
569
- buff(df, tolerance),
570
- how="intersection",
571
- grid_size=grid_size,
572
- )
573
- .pipe(get_line_segments)
574
- .reset_index(drop=True)
1180
+ coords, indices, was_midpoint = _add_last_points_to_end_with_third_arr(
1181
+ coords, indices, was_midpoint
575
1182
  )
576
1183
 
577
- endpoints = intersecting_lines.boundary.explode(index_parts=False)
578
-
579
- extended_lines = GeoDataFrame(
580
- {
581
- "geometry": extend_lines(
582
- endpoints.loc[lambda x: ~x.index.duplicated(keep="first")].values,
583
- endpoints.loc[lambda x: ~x.index.duplicated(keep="last")].values,
584
- distance=tolerance * 3,
585
- )
586
- },
587
- crs=df.crs,
1184
+ coords, indices, was_midpoint = _remove_duplicate_points(
1185
+ coords, indices, was_midpoint
588
1186
  )
589
1187
 
590
- buffered = buff(extended_lines, tolerance, single_sided=True)
591
-
592
- return clean_overlay(df, buffered, how="identity", grid_size=grid_size)
593
-
1188
+ coords = np.array(coords)
1189
+ indices = np.array(indices)
1190
+ was_midpoint = np.array(was_midpoint)
594
1191
 
595
- def extend_lines(arr1, arr2, distance) -> NDArray[LineString]:
596
- if len(arr1) != len(arr2):
597
- raise ValueError
598
- if not len(arr1):
599
- return arr1
600
-
601
- arr1, arr2 = arr2, arr1 # TODO fix
602
-
603
- coords1 = coordinate_array(arr1)
604
- coords2 = coordinate_array(arr2)
605
-
606
- dx = coords2[:, 0] - coords1[:, 0]
607
- dy = coords2[:, 1] - coords1[:, 1]
608
- len_xy = np.sqrt((dx**2.0) + (dy**2.0))
609
- x = coords1[:, 0] + (coords1[:, 0] - coords2[:, 0]) / len_xy * distance
610
- y = coords1[:, 1] + (coords1[:, 1] - coords2[:, 1]) / len_xy * distance
1192
+ coords_up_here = (
1193
+ pd.Series(_coords_to_rings(coords, indices, geoms))
1194
+ .loc[lambda x: x.notna()]
1195
+ .values
1196
+ )
1197
+ coords_up_here = to_gdf(polygons(coords_up_here), 25833)
611
1198
 
612
- new_points = np.array([None for _ in range(len(arr1))])
613
- new_points[~np.isnan(x)] = shapely.points(x[~np.isnan(x)], y[~np.isnan(x)])
1199
+ explore(
1200
+ coords=to_gdf(shapely.points(coords), 25833).assign(
1201
+ idx=indices, wkt=lambda x: [g.wkt for g in x.geometry]
1202
+ ),
1203
+ anchors=to_gdf(shapely.points(anchors), 25833).assign(
1204
+ idx=anchor_indices, wkt=lambda x: [g.wkt for g in x.geometry]
1205
+ ), # , straight_distances=straight_distances, distances_to_lines=distances_to_lines),
1206
+ coords_up_here000=coords_up_here000,
1207
+ coords_up_here=coords_up_here,
1208
+ geoms=to_gdf(polygons(geoms), 25833),
1209
+ msk=to_gdf(shapely.points(mask_coords), 25833).assign(
1210
+ was_midpoint_mask=was_midpoint_mask
1211
+ ),
1212
+ # center=_DEBUG_CONFIG["center"],
1213
+ )
614
1214
 
615
- new_points[~np.isnan(x)] = make_lines_between_points(
616
- arr2[~np.isnan(x)], new_points[~np.isnan(x)]
1215
+ coords, indices = _snap_to_anchors(
1216
+ coords,
1217
+ indices,
1218
+ anchors,
1219
+ anchor_indices,
1220
+ mask_coords,
1221
+ mask_indices,
1222
+ was_midpoint,
1223
+ was_midpoint_anchors,
1224
+ tolerance + PRECISION * 100,
617
1225
  )
618
- return new_points
1226
+ indices = np.array(indices)
1227
+ coords = np.array(coords)
1228
+ indices = indices[coords[:, 0] != np.inf]
1229
+ coords = coords[coords[:, 0] != np.inf]
1230
+
1231
+ # coords_up_here111 = (
1232
+ # pd.Series(_coords_to_rings(coords, indices, geoms))
1233
+ # .loc[lambda x: x.notna()]
1234
+ # .values
1235
+ # )
1236
+ # coords_up_here111 = to_gdf(polygons(coords_up_here111), 25833)
1237
+
1238
+ # if 0:
1239
+ # # coords = get_coordinates(points.geometry.values)
1240
+ # # indices = points["_geom_idx"].values
1241
+
1242
+ # is_anchor = np.full(len(coords), False)
1243
+ # coords, indices, is_anchor = _remove_duplicate_points(
1244
+ # coords, indices, is_anchor
1245
+ # )
1246
+ # coords, indices = _add_last_points_to_end(coords, indices)
1247
+ # coords = np.array(coords)
1248
+ # indices = np.array(indices)
1249
+ # is_anchor = np.full(len(coords), False)
1250
+ # coords, indices, is_anchor = _remove_duplicate_points(
1251
+ # coords, indices, is_anchor
1252
+ # )
1253
+ # coords = np.array(coords)
1254
+ # indices = np.array(indices)
1255
+
1256
+ # display(pd.DataFrame(coords, index=indices, columns=[*"xy"]))
1257
+
1258
+ # if 0:
1259
+ # mask_coords, mask_indices, , dist_to_closest_geom = (
1260
+ # _add_midpoints_to_segments_numba(
1261
+ # mask_coords,
1262
+ # mask_indices,
1263
+ # # coords,
1264
+ # get_coordinates(
1265
+ # sfilter(
1266
+ # GeoSeries(shapely.points(coords)).drop_duplicates(),
1267
+ # original_mask_buffered,
1268
+ # )
1269
+ # ),
1270
+ # tolerance * 1.1,
1271
+ # )
1272
+ # )
1273
+
1274
+ # mask_coords = np.array(mask_coords)
1275
+ # mask_indices = np.array(mask_indices)
1276
+
1277
+ # anchors, anchor_indices, is_anchor = _build_anchors(
1278
+ # coords,
1279
+ # indices,
1280
+ # mask_coords,
1281
+ # mask_indices,
1282
+ # # is_anchor,
1283
+ # tolerance + PRECISION, # * 100
1284
+ # )
1285
+ # anchors = np.array(anchors)
1286
+ # anchor_indices = np.array(anchor_indices)
1287
+
1288
+ # coords, indices, was_midpoint, _ = _add_midpoints_to_segments_numba(
1289
+ # coords,
1290
+ # indices,
1291
+ # anchors,
1292
+ # tolerance * 1.1, # + PRECISION * 100,
1293
+ # # GeoDataFrame({"geometry": shapely.points(coords), "_geom_idx": indices}),
1294
+ # # GeoDataFrame({"geometry": shapely.points(anchors)}),
1295
+ # # tolerance, # + PRECISION * 100,
1296
+ # # None,
1297
+ # )
1298
+ # print(len(coords), len(anchors), len(was_midpoint))
1299
+
1300
+ # indices = np.array(indices)
1301
+ # coords = np.array(coords)
1302
+
1303
+ # was_midpoint = np.array(was_midpoint)
1304
+
1305
+ # coords, is_snapped_to = _snap_to_anchors(
1306
+ # coords,
1307
+ # indices,
1308
+ # anchors,
1309
+ # anchor_indices,
1310
+ # mask_coords,
1311
+ # mask_indices,
1312
+ # was_midpoint,
1313
+ # was_midpoint_anchors,
1314
+ # tolerance + PRECISION * 20,
1315
+ # )
1316
+ # indices = np.array(indices)
1317
+ # coords = np.array(coords)
1318
+ # indices = indices[coords[:, 0] != np.inf]
1319
+ # coords = coords[coords[:, 0] != np.inf]
1320
+
1321
+ # coords = np.array(coords)
1322
+
1323
+ # indices = np.array(indices)
1324
+
1325
+ coords_down_here = (
1326
+ pd.Series(_coords_to_rings(coords, indices, geoms))
1327
+ .loc[lambda x: x.notna()]
1328
+ .values
1329
+ )
1330
+ lines_down_here = to_gdf(shapely.buffer(coords_down_here, 0.1), 25833)
1331
+ coords_down_here = to_gdf(polygons(coords_down_here), 25833)
619
1332
 
1333
+ try:
1334
+ explore(
1335
+ coords=to_gdf(shapely.points(coords), 25833).assign(
1336
+ idx=indices, wkt=lambda x: [g.wkt for g in x.geometry]
1337
+ ),
1338
+ anchors=to_gdf(shapely.points(anchors), 25833).assign(
1339
+ idx=anchor_indices, wkt=lambda x: [g.wkt for g in x.geometry]
1340
+ ), # , straight_distances=straight_distances, distances_to_lines=distances_to_lines),
1341
+ coords_up_here000=coords_up_here000,
1342
+ coords_up_here=coords_up_here,
1343
+ coords_down_here=coords_down_here,
1344
+ lines_down_here=lines_down_here,
1345
+ geoms=to_gdf(polygons(geoms), 25833),
1346
+ msk=to_gdf(shapely.points(mask_coords), 25833).assign(
1347
+ was_midpoint_mask=was_midpoint_mask
1348
+ ),
1349
+ )
620
1350
 
621
- def make_lines_between_points(
622
- arr1: NDArray[Point], arr2: NDArray[Point]
623
- ) -> NDArray[LineString]:
624
- if arr1.shape != arr2.shape:
625
- raise ValueError(
626
- f"Arrays must have equal shape. Got {arr1.shape} and {arr2.shape}"
1351
+ explore(
1352
+ coords=to_gdf(shapely.points(coords), 25833).assign(
1353
+ idx=indices, wkt=lambda x: [g.wkt for g in x.geometry]
1354
+ ),
1355
+ anchors=to_gdf(shapely.points(anchors), 25833).assign(
1356
+ idx=anchor_indices, wkt=lambda x: [g.wkt for g in x.geometry]
1357
+ ), # , straight_distances=straight_distances, distances_to_lines=distances_to_lines),
1358
+ coords_up_here000=coords_up_here000,
1359
+ coords_up_here=coords_up_here,
1360
+ coords_down_here=coords_down_here,
1361
+ lines_down_here=lines_down_here,
1362
+ geoms=to_gdf(polygons(geoms), 25833),
1363
+ msk=to_gdf(shapely.points(mask_coords), 25833).assign(
1364
+ was_midpoint_mask=was_midpoint_mask
1365
+ ),
1366
+ center=(5.37707159, 59.01065276, 1),
627
1367
  )
628
- coords: pd.DataFrame = pd.concat(
629
- [
630
- pd.DataFrame(get_coordinates(arr1), columns=["x", "y"]),
631
- pd.DataFrame(get_coordinates(arr2), columns=["x", "y"]),
632
- ]
633
- ).sort_index()
634
-
635
- return linestrings(coords.values, indices=coords.index)
636
-
637
-
638
- def get_line_segments(lines: GeoDataFrame | GeoSeries) -> GeoDataFrame:
639
- assert lines.index.is_unique
640
- if isinstance(lines, GeoDataFrame):
641
- geom_col = lines._geometry_column_name
642
- multipoints = lines.assign(
643
- **{geom_col: extract_unique_points(lines.geometry.values)}
1368
+ explore(
1369
+ coords=to_gdf(shapely.points(coords), 25833).assign(
1370
+ idx=indices, wkt=lambda x: [g.wkt for g in x.geometry]
1371
+ ),
1372
+ anchors=to_gdf(shapely.points(anchors), 25833).assign(
1373
+ idx=anchor_indices, wkt=lambda x: [g.wkt for g in x.geometry]
1374
+ ), # , straight_distances=straight_distances, distances_to_lines=distances_to_lines),
1375
+ coords_up_here000=coords_up_here000,
1376
+ coords_up_here=coords_up_here,
1377
+ coords_down_here=coords_down_here,
1378
+ lines_down_here=lines_down_here,
1379
+ geoms=to_gdf(polygons(geoms), 25833),
1380
+ msk=to_gdf(shapely.points(mask_coords), 25833).assign(
1381
+ was_midpoint_mask=was_midpoint_mask
1382
+ ),
1383
+ center=(5.37419946, 59.01138812, 15),
644
1384
  )
645
- segments = multipoints_to_line_segments(multipoints.geometry)
646
- return segments.join(lines.drop(columns=geom_col))
647
1385
 
648
- multipoints = GeoSeries(extract_unique_points(lines.values), index=lines.index)
1386
+ explore(
1387
+ coords=to_gdf(shapely.points(coords), 25833).assign(
1388
+ idx=indices, wkt=lambda x: [g.wkt for g in x.geometry]
1389
+ ),
1390
+ anchors=to_gdf(shapely.points(anchors), 25833).assign(
1391
+ idx=anchor_indices, wkt=lambda x: [g.wkt for g in x.geometry]
1392
+ ), # , straight_distances=straight_distances, distances_to_lines=distances_to_lines),
1393
+ coords_up_here000=coords_up_here000,
1394
+ coords_up_here=coords_up_here,
1395
+ lines_down_here=lines_down_here,
1396
+ coords_down_here=coords_down_here,
1397
+ geoms=to_gdf(polygons(geoms), 25833),
1398
+ msk=to_gdf(shapely.points(mask_coords), 25833).assign(
1399
+ was_midpoint_mask=was_midpoint_mask
1400
+ ),
1401
+ center=(5.38389153, 59.00548223, 1),
1402
+ )
1403
+ explore(
1404
+ coords=to_gdf(shapely.points(coords), 25833).assign(
1405
+ idx=indices, wkt=lambda x: [g.wkt for g in x.geometry]
1406
+ ),
1407
+ anchors=to_gdf(shapely.points(anchors), 25833).assign(
1408
+ idx=anchor_indices, wkt=lambda x: [g.wkt for g in x.geometry]
1409
+ ), # , straight_distances=straight_distances, distances_to_lines=distances_to_lines),
1410
+ coords_up_here000=coords_up_here000,
1411
+ coords_up_here=coords_up_here,
1412
+ coords_down_here=coords_down_here,
1413
+ lines_down_here=lines_down_here,
1414
+ geoms=to_gdf(polygons(geoms), 25833),
1415
+ msk=to_gdf(shapely.points(mask_coords), 25833).assign(
1416
+ was_midpoint_mask=was_midpoint_mask
1417
+ ),
1418
+ center=_DEBUG_CONFIG["center"],
1419
+ )
649
1420
 
650
- return multipoints_to_line_segments(multipoints)
1421
+ except GEOSException as e:
1422
+ print(e)
651
1423
 
1424
+ return _coords_to_rings(coords, indices, geoms)
652
1425
 
653
- def multipoints_to_line_segments(multipoints: GeoSeries) -> GeoDataFrame:
654
- if not len(multipoints):
655
- return GeoDataFrame({"geometry": multipoints}, index=multipoints.index)
656
1426
 
657
- try:
658
- crs = multipoints.crs
659
- except AttributeError:
660
- crs = None
1427
+ def _coords_to_rings(
1428
+ coords: NDArray[np.float64],
1429
+ indices: NDArray[np.int32],
1430
+ original_geoms: NDArray[LinearRing],
1431
+ ) -> NDArray[LinearRing]:
1432
+ df = pd.DataFrame({"x": coords[:, 0], "y": coords[:, 1]}, index=indices).loc[
1433
+ lambda x: x.groupby(level=0).size() > 2
1434
+ ]
1435
+ to_int_idx = {idx: i for i, idx in enumerate(df.index.unique())}
1436
+ rings = pd.Series(
1437
+ linearrings(df.values, indices=df.index.map(to_int_idx)),
1438
+ index=df.index.unique(),
1439
+ )
661
1440
 
662
- try:
663
- point_df = multipoints.explode(index_parts=False)
664
- except AttributeError:
665
- points, indices = get_parts(multipoints, return_index=True)
666
- if isinstance(multipoints.index, pd.MultiIndex):
667
- indices = pd.MultiIndex.from_arrays(indices, names=multipoints.index.names)
1441
+ missing = pd.Series(
1442
+ index=pd.Index(range(len(original_geoms))).difference(rings.index)
1443
+ )
668
1444
 
669
- point_df = pd.DataFrame({"geometry": GeometryArray(points)}, index=indices)
1445
+ return pd.concat([rings, missing]).sort_index().values
670
1446
 
671
- try:
672
- point_df = point_df.to_frame("geometry")
673
- except AttributeError:
674
- pass
675
1447
 
676
- point_df["next"] = point_df.groupby(level=0)["geometry"].shift(-1)
1448
+ @numba.njit
1449
+ def _add_midpoints_to_segments_numba(
1450
+ geoms: NDArray[np.float64],
1451
+ indices: NDArray[np.int32],
1452
+ anchors: NDArray[np.float64],
1453
+ tolerance: int | float,
1454
+ ):
1455
+ n_minus_1 = len(geoms) - 1
1456
+ out_coords = []
1457
+ out_indices = []
1458
+ was_midpoint = []
1459
+ out_distances = []
1460
+ for i in range(len(geoms)):
1461
+ index = indices[i]
1462
+
1463
+ is_last = i == n_minus_1 or index != indices[i + 1]
1464
+ if is_last:
1465
+ continue
1466
+
1467
+ geom0 = geoms[i]
1468
+ geom1 = geoms[i + 1]
1469
+
1470
+ closest_points = np.full((len(anchors) + 2, 2), np.inf)
1471
+ these_out_distances = np.full(len(anchors) + 2, np.inf)
1472
+ closest_points[-1] = geom1
1473
+ closest_points[-2] = geom0
1474
+ these_out_distances[-1] = 0
1475
+ these_out_distances[-2] = 0
1476
+
1477
+ segment_vector = geom1 - geom0
1478
+ segment_length_squared = np.dot(segment_vector, segment_vector)
1479
+ for j in range(len(anchors)):
1480
+ anchor = anchors[j]
1481
+
1482
+ if segment_length_squared == 0:
1483
+ closest_point = geom0
1484
+ else:
1485
+ point_vector = anchor - geom0
1486
+ factor = np.dot(point_vector, segment_vector) / segment_length_squared
1487
+ factor = max(0, min(1, factor))
1488
+ if factor < 1e-6:
1489
+ closest_point = geom0
1490
+ elif factor > 1 - 1e-6:
1491
+ closest_point = geom1
1492
+ else:
1493
+ closest_point = geom0 + factor * segment_vector
1494
+
1495
+ dist = np.linalg.norm(anchor - closest_point)
1496
+ if dist <= tolerance and dist > PRECISION:
1497
+ closest_points[j] = closest_point
1498
+ these_out_distances[j] = dist
1499
+
1500
+ # if (
1501
+ # closest_point[0] == 905049.3317999999
1502
+ # ): # and int(closest_point[1]) == 7877676:
1503
+ # print()
1504
+ # for xxx in closest_point:
1505
+ # print(xxx)
1506
+ # for xxx in geom0:
1507
+ # print(xxx)
1508
+ # for xxx in geom1:
1509
+ # print(xxx)
1510
+ # for xxx, yyy in locals().items():
1511
+ # print(xxx, yyy)
1512
+ # ssss
1513
+
1514
+ not_inf = closest_points[:, 0] != np.inf
1515
+ arr = closest_points[not_inf]
1516
+ these_out_distances = these_out_distances[not_inf]
1517
+
1518
+ # sort by first and second column
1519
+ # could have used np.lexsort, but it's not numba compatible
1520
+ arr = arr[np.argsort(arr[:, 0])]
1521
+ any_unsorted = True
1522
+ while any_unsorted:
1523
+ any_unsorted = False
1524
+ for i in range(len(arr) - 1):
1525
+ if arr[i, 0] < arr[i + 1, 0]:
1526
+ continue
1527
+ if arr[i, 1] > arr[i + 1, 1]:
1528
+ copied = arr[i].copy()
1529
+ arr[i] = arr[i + 1]
1530
+ arr[i + 1] = copied
1531
+
1532
+ copied = these_out_distances[i]
1533
+ these_out_distances[i] = these_out_distances[i + 1]
1534
+ these_out_distances[i + 1] = copied
1535
+
1536
+ any_unsorted = True
1537
+
1538
+ with_midpoints = []
1539
+ these_out_distances2 = []
1540
+ first_is_added = False
1541
+ last_is_added = False
1542
+ is_reverse = False
1543
+ for y in range(len(arr)):
1544
+ point = arr[y]
1545
+ if (
1546
+ not first_is_added
1547
+ and np.sqrt((geom0[0] - point[0]) ** 2 + (geom0[1] - point[1]) ** 2)
1548
+ == 0
1549
+ ):
1550
+ first_is_added = True
1551
+ with_midpoints.append(point)
1552
+ these_out_distances2.append(these_out_distances[y])
1553
+ if last_is_added:
1554
+ is_reverse = True
1555
+ break
1556
+ else:
1557
+ continue
1558
+ elif (
1559
+ not last_is_added
1560
+ and np.sqrt((geom1[0] - point[0]) ** 2 + (geom1[1] - point[1]) ** 2)
1561
+ == 0
1562
+ ):
1563
+ last_is_added = True
1564
+ with_midpoints.append(point)
1565
+ these_out_distances2.append(these_out_distances[y])
1566
+ if not first_is_added:
1567
+ is_reverse = True
1568
+ continue
1569
+ else:
1570
+ with_midpoints.append(point)
1571
+ break
1572
+ if first_is_added or last_is_added:
1573
+ with_midpoints.append(point)
1574
+ these_out_distances2.append(these_out_distances[y])
1575
+
1576
+ # these_out_distances2.append(these_out_distances[y])
1577
+ # these_anchors2.append(these_anchors[y])
1578
+
1579
+ # with_midpoints = np.array(with_midpoints)
1580
+
1581
+ if is_reverse:
1582
+ with_midpoints = with_midpoints[::-1]
1583
+ these_out_distances2 = these_out_distances2[::-1]
1584
+ # these_anchors2 = these_anchors2[::-1]
1585
+
1586
+ # print(index, is_reverse, arr)
1587
+ # print(with_midpoints)
1588
+ # print(to_gdf(LineString([geom0, geom1]), 25833))
1589
+ # print(to_gdf(shapely.points(closest_points)))
1590
+ # explore(
1591
+ # to_gdf(shapely.points(with_midpoints)).assign(
1592
+ # idx=lambda x: range(len(x))
1593
+ # ),
1594
+ # "idx",
1595
+ # )
1596
+ # explore(
1597
+ # l=to_gdf(LineString([geom0, geom1]), 25833),
1598
+ # # anchors=to_gdf(shapely.points(anchors)),
1599
+ # # anchors_in_dist=to_gdf(shapely.points(these_anchors)),
1600
+ # # closest_points=to_gdf(shapely.points(closest_points)),
1601
+ # with_midpoints=to_gdf(shapely.points(with_midpoints)),
1602
+ # anchors=to_gdf(shapely.points(anchors)),
1603
+ # arr=to_gdf(shapely.points(arr)),
1604
+ # # center=(-0.07034028, 1.80337784, 0.4),
1605
+ # )
1606
+
1607
+ with_midpoints_no_dups = []
1608
+ these_out_distances_no_dups = []
1609
+
1610
+ for y2 in range(len(with_midpoints)):
1611
+ point = with_midpoints[y2]
1612
+ should_be_added = True
1613
+ for z in range(len(with_midpoints_no_dups)):
1614
+ out_point = with_midpoints_no_dups[z]
1615
+ if (
1616
+ np.sqrt(
1617
+ (point[0] - out_point[0]) ** 2 + (out_point[1] - point[1]) ** 2
1618
+ )
1619
+ == 0
1620
+ ):
1621
+ should_be_added = False
1622
+ break
1623
+ if should_be_added:
1624
+ with_midpoints_no_dups.append(point)
1625
+ these_out_distances_no_dups.append(these_out_distances2[y2])
1626
+
1627
+ n_minus_1_midpoints = len(with_midpoints_no_dups) - 1
1628
+ for y3 in range(len(with_midpoints_no_dups)):
1629
+ point = with_midpoints_no_dups[y3]
1630
+ should_be_added = True
1631
+
1632
+ for z2 in np.arange(len(out_coords))[::-1]:
1633
+ if out_indices[z2] != index:
1634
+ continue
1635
+ out_point = out_coords[z2]
1636
+
1637
+ if (
1638
+ np.sqrt(
1639
+ (point[0] - out_point[0]) ** 2 + (out_point[1] - point[1]) ** 2
1640
+ )
1641
+ == 0
1642
+ ):
1643
+ should_be_added = False
1644
+ break
1645
+
1646
+ if not should_be_added:
1647
+ continue
1648
+
1649
+ out_coords.append(point)
1650
+ out_indices.append(index)
1651
+ out_distances.append(these_out_distances_no_dups[y3])
1652
+ if y3 == 0 or y3 == n_minus_1_midpoints:
1653
+ was_midpoint.append(False)
1654
+ else:
1655
+ was_midpoint.append(True)
677
1656
 
678
- first_points = point_df.loc[lambda x: ~x.index.duplicated(), "geometry"]
679
- is_last_point = point_df["next"].isna()
1657
+ return (
1658
+ out_coords,
1659
+ out_indices,
1660
+ was_midpoint,
1661
+ out_distances,
1662
+ )
680
1663
 
681
- point_df.loc[is_last_point, "next"] = first_points
682
- assert point_df["next"].notna().all()
683
1664
 
684
- point_df["geometry"] = [
685
- LineString([x1, x2])
686
- for x1, x2 in zip(point_df["geometry"], point_df["next"], strict=False)
687
- ]
688
- return GeoDataFrame(point_df.drop(columns=["next"]), geometry="geometry", crs=crs)
1665
+ def _separate_single_neighbored_from_multi_neighoured_geometries(
1666
+ gdf: GeoDataFrame, neighbors: GeoDataFrame
1667
+ ) -> tuple[GeoDataFrame, GeoDataFrame]:
1668
+ """Split GeoDataFrame in two: those with 0 or 1 neighbors and those with 2 or more.
689
1669
 
1670
+ Because single-neighbored polygons does not need splitting.
1671
+ """
1672
+ tree = STRtree(neighbors.geometry.values)
1673
+ left, right = tree.query(gdf.geometry.values, predicate="intersects")
1674
+ pairs = pd.Series(right, index=left)
1675
+ has_more_than_one_neighbor = (
1676
+ pairs.groupby(level=0).size().loc[lambda x: x > 1].index
1677
+ )
690
1678
 
691
- def points_to_line_segments(points: GeoDataFrame) -> GeoDataFrame:
692
- points = points.copy()
693
- points["next"] = points.groupby(level=0)["geometry"].shift(-1)
1679
+ more_than_one_neighbor = gdf.iloc[has_more_than_one_neighbor]
1680
+ one_or_zero_neighbors = gdf.iloc[
1681
+ pd.Index(range(len(gdf))).difference(has_more_than_one_neighbor)
1682
+ ]
694
1683
 
695
- first_points = points.loc[lambda x: ~x.index.duplicated(), "geometry"]
696
- is_last_point = points["next"].isna()
1684
+ return one_or_zero_neighbors, more_than_one_neighbor
697
1685
 
698
- points.loc[is_last_point, "next"] = first_points
699
- assert points["next"].notna().all()
700
1686
 
701
- points["geometry"] = [
702
- LineString([x1, x2])
703
- for x1, x2 in zip(points["geometry"], points["next"], strict=False)
704
- ]
705
- return GeoDataFrame(
706
- points.drop(columns=["next"]), geometry="geometry", crs=points.crs
707
- )
1687
+ def split_and_eliminate_by_longest(
1688
+ gdf: GeoDataFrame | tuple[GeoDataFrame],
1689
+ to_eliminate: GeoDataFrame,
1690
+ tolerance: float | int,
1691
+ ignore_index: bool = False,
1692
+ **kwargs,
1693
+ ) -> tuple[GeoDataFrame]:
1694
+ if isinstance(gdf, (list, tuple)):
1695
+ # concat, then break up the dataframes in the end
1696
+ was_multiple_gdfs = True
1697
+ original_cols = [df.columns for df in gdf]
1698
+ gdf = pd.concat(df.assign(**{"_df_idx": i}) for i, df in enumerate(gdf))
1699
+ else:
1700
+ was_multiple_gdfs = False
1701
+
1702
+ if 0:
1703
+ to_eliminate.geometry = to_eliminate.buffer(
1704
+ -PRECISION,
1705
+ resolution=1,
1706
+ join_style=2,
1707
+ ).buffer(
1708
+ PRECISION,
1709
+ resolution=1,
1710
+ join_style=2,
1711
+ )
1712
+ to_eliminate = to_eliminate.loc[lambda x: ~x.is_empty]
708
1713
 
1714
+ # now to split polygons to be eliminated to avoid weird shapes
1715
+ # split only the polygons with multiple neighbors
1716
+ single_neighbored, multi_neighbored = (
1717
+ _separate_single_neighbored_from_multi_neighoured_geometries(to_eliminate, gdf)
1718
+ )
1719
+ multi_neighbored = split_by_neighbors(multi_neighbored, gdf, tolerance=tolerance)
1720
+ to_eliminate = pd.concat([multi_neighbored, single_neighbored])
1721
+ gdf, isolated = eliminate_by_longest(
1722
+ gdf, to_eliminate, ignore_index=ignore_index, **kwargs
1723
+ )
709
1724
 
710
- def explore_geosexception(
711
- e: GEOSException, *gdfs: GeoDataFrame, logger: Any | None = None
712
- ) -> None:
713
- """Extract the coordinates of a GEOSException and show in map.
1725
+ if not was_multiple_gdfs:
1726
+ return gdf, isolated
714
1727
 
715
- Args:
716
- e: The exception thrown by a GEOS operation, which potentially contains coordinates information.
717
- *gdfs: One or more GeoDataFrames to display for context in the map.
718
- logger: An optional logger to log the error with visualization. If None, uses standard output.
1728
+ gdfs = ()
1729
+ for i, cols in enumerate(original_cols):
1730
+ df = gdf.loc[gdf["_df_idx"] == i, cols]
1731
+ gdfs += (df,)
1732
+ gdfs += (isolated,)
719
1733
 
720
- """
721
- from ..maps.maps import Explore
722
- from ..maps.maps import explore
723
-
724
- pattern = r"(\d+\.\d+)\s+(\d+\.\d+)"
725
-
726
- matches = re.findall(pattern, str(e))
727
- coords_in_error_message = [(float(match[0]), float(match[1])) for match in matches]
728
- exception_point = to_gdf(coords_in_error_message, crs=gdfs[0].crs)
729
- if len(exception_point):
730
- exception_point["wkt"] = exception_point.to_wkt()
731
- if logger:
732
- logger.error(
733
- e, Explore(exception_point, *gdfs, mask=exception_point.buffer(100))
734
- )
735
- else:
736
- explore(exception_point, *gdfs, mask=exception_point.buffer(100))
737
- else:
738
- if logger:
739
- logger.error(e, Explore(*gdfs))
740
- else:
741
- explore(*gdfs)
1734
+ return gdfs