ssb-sgis 1.1.16__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sgis/__init__.py +4 -0
- sgis/conf.py +56 -4
- sgis/geopandas_tools/buffer_dissolve_explode.py +24 -47
- sgis/geopandas_tools/conversion.py +18 -25
- sgis/geopandas_tools/duplicates.py +47 -60
- sgis/geopandas_tools/general.py +8 -84
- sgis/geopandas_tools/overlay.py +190 -260
- sgis/geopandas_tools/polygon_operations.py +67 -88
- sgis/geopandas_tools/runners.py +277 -0
- sgis/geopandas_tools/sfilter.py +40 -24
- sgis/geopandas_tools/utils.py +37 -0
- sgis/helpers.py +1 -1
- sgis/io/dapla_functions.py +5 -7
- sgis/maps/map.py +3 -1
- sgis/parallel/parallel.py +32 -24
- sgis/raster/image_collection.py +184 -162
- sgis/raster/indices.py +0 -1
- {ssb_sgis-1.1.16.dist-info → ssb_sgis-1.2.0.dist-info}/METADATA +1 -1
- {ssb_sgis-1.1.16.dist-info → ssb_sgis-1.2.0.dist-info}/RECORD +21 -19
- {ssb_sgis-1.1.16.dist-info → ssb_sgis-1.2.0.dist-info}/LICENSE +0 -0
- {ssb_sgis-1.1.16.dist-info → ssb_sgis-1.2.0.dist-info}/WHEEL +0 -0
sgis/geopandas_tools/overlay.py
CHANGED
|
@@ -9,34 +9,29 @@ version of the solution from GH 2792.
|
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
11
|
import functools
|
|
12
|
-
from collections.abc import Callable
|
|
13
12
|
|
|
14
13
|
import geopandas as gpd
|
|
15
|
-
import joblib
|
|
16
14
|
import numpy as np
|
|
17
15
|
import pandas as pd
|
|
18
16
|
from geopandas import GeoDataFrame
|
|
19
|
-
from geopandas import GeoSeries
|
|
20
17
|
from pandas import DataFrame
|
|
21
18
|
from shapely import Geometry
|
|
22
|
-
from shapely import STRtree
|
|
23
19
|
from shapely import box
|
|
24
20
|
from shapely import difference
|
|
25
21
|
from shapely import intersection
|
|
22
|
+
from shapely import is_empty
|
|
26
23
|
from shapely import make_valid
|
|
27
|
-
from shapely import
|
|
28
|
-
from shapely.errors import GEOSException
|
|
29
|
-
|
|
30
|
-
try:
|
|
31
|
-
import dask.array as da
|
|
32
|
-
except ImportError:
|
|
33
|
-
pass
|
|
24
|
+
from shapely import union_all
|
|
34
25
|
|
|
26
|
+
from ..conf import config
|
|
35
27
|
from .general import _determine_geom_type_args
|
|
36
28
|
from .general import clean_geoms
|
|
37
29
|
from .geometry_types import get_geom_type
|
|
38
30
|
from .geometry_types import make_all_singlepart
|
|
39
31
|
from .geometry_types import to_single_geom_type
|
|
32
|
+
from .runners import OverlayRunner
|
|
33
|
+
from .runners import RTreeQueryRunner
|
|
34
|
+
from .runners import UnionRunner
|
|
40
35
|
|
|
41
36
|
DEFAULT_GRID_SIZE = None
|
|
42
37
|
DEFAULT_LSUFFIX = "_1"
|
|
@@ -51,9 +46,12 @@ def clean_overlay(
|
|
|
51
46
|
geom_type: str | None = None,
|
|
52
47
|
predicate: str | None = "intersects",
|
|
53
48
|
grid_size: float | None = None,
|
|
54
|
-
n_jobs: int = 1,
|
|
55
49
|
lsuffix: str = DEFAULT_LSUFFIX,
|
|
56
50
|
rsuffix: str = DEFAULT_RSUFFIX,
|
|
51
|
+
n_jobs: int = 1,
|
|
52
|
+
rtree_runner: RTreeQueryRunner | None = None,
|
|
53
|
+
union_runner: UnionRunner | None = None,
|
|
54
|
+
overlay_runner: OverlayRunner | None = None,
|
|
57
55
|
) -> GeoDataFrame:
|
|
58
56
|
"""Fixes and explodes geometries before doing a shapely overlay, then cleans up.
|
|
59
57
|
|
|
@@ -74,10 +72,16 @@ def clean_overlay(
|
|
|
74
72
|
"point".
|
|
75
73
|
grid_size: Precision grid size to round the geometries. Will use the highest
|
|
76
74
|
precision of the inputs by default.
|
|
77
|
-
n_jobs: number of threads.
|
|
78
75
|
predicate: Spatial predicate in the spatial tree.
|
|
79
76
|
lsuffix: Suffix of columns in df1 that are also in df2.
|
|
80
77
|
rsuffix: Suffix of columns in df2 that are also in df1.
|
|
78
|
+
n_jobs: number of jobs. Defaults to 1.
|
|
79
|
+
union_runner: Optionally debug/manipulate the spatial union operations.
|
|
80
|
+
See the 'runners' module for example implementations.
|
|
81
|
+
rtree_runner: Optionally debug/manipulate the spatial indexing operations.
|
|
82
|
+
See the 'runners' module for example implementations.
|
|
83
|
+
overlay_runner: Optionally debug/manipulate the spatial overlay operations.
|
|
84
|
+
See the 'runners' module for example implementations.
|
|
81
85
|
|
|
82
86
|
Returns:
|
|
83
87
|
GeoDataFrame with overlayed and fixed geometries and columns from both
|
|
@@ -104,6 +108,13 @@ def clean_overlay(
|
|
|
104
108
|
if df1.crs != df2.crs:
|
|
105
109
|
raise ValueError(f"'crs' mismatch. Got {df1.crs} and {df2.crs}")
|
|
106
110
|
|
|
111
|
+
if rtree_runner is None:
|
|
112
|
+
rtree_runner = config.get_instance("rtree_runner", n_jobs)
|
|
113
|
+
if union_runner is None:
|
|
114
|
+
union_runner = config.get_instance("union_runner", n_jobs)
|
|
115
|
+
if overlay_runner is None:
|
|
116
|
+
overlay_runner = config.get_instance("overlay_runner", n_jobs)
|
|
117
|
+
|
|
107
118
|
crs = df1.crs
|
|
108
119
|
|
|
109
120
|
# original_geom_type = geom_type
|
|
@@ -119,6 +130,11 @@ def clean_overlay(
|
|
|
119
130
|
df1.geometry.geom_type.value_counts(),
|
|
120
131
|
)
|
|
121
132
|
|
|
133
|
+
if geom_type == "polygon" or get_geom_type(df1) == "polygon":
|
|
134
|
+
df1.geometry = df1.buffer(0)
|
|
135
|
+
if geom_type == "polygon" or get_geom_type(df2) == "polygon":
|
|
136
|
+
df2.geometry = df2.buffer(0)
|
|
137
|
+
|
|
122
138
|
df1 = clean_geoms(df1)
|
|
123
139
|
df2 = clean_geoms(df2)
|
|
124
140
|
|
|
@@ -131,21 +147,28 @@ def clean_overlay(
|
|
|
131
147
|
if geom_type and get_geom_type(df1) == get_geom_type(df2):
|
|
132
148
|
df2 = to_single_geom_type(df2, geom_type)
|
|
133
149
|
|
|
134
|
-
assert df1.is_valid.all(),
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
assert df2.
|
|
150
|
+
assert df1.is_valid.all(), [
|
|
151
|
+
geom.wkt for geom in df1[lambda x: x.is_valid == False].geometry
|
|
152
|
+
]
|
|
153
|
+
assert df2.is_valid.all(), [
|
|
154
|
+
geom.wkt for geom in df2[lambda x: x.is_valid == False].geometry
|
|
155
|
+
]
|
|
156
|
+
assert df1.geometry.notna().all(), df1[lambda x: x.isna()]
|
|
157
|
+
assert df2.geometry.notna().all(), df2[lambda x: x.isna()]
|
|
138
158
|
|
|
139
159
|
box1 = box(*df1.total_bounds)
|
|
140
160
|
box2 = box(*df2.total_bounds)
|
|
141
161
|
|
|
142
|
-
if not
|
|
162
|
+
if not grid_size and (
|
|
163
|
+
(not len(df1) or not len(df2))
|
|
164
|
+
or (not box1.intersects(box2) and how == "intersection")
|
|
165
|
+
):
|
|
143
166
|
return _no_intersections_return(df1, df2, how, lsuffix, rsuffix)
|
|
144
167
|
|
|
145
|
-
if df1.
|
|
168
|
+
if df1.geometry.name != "geometry":
|
|
146
169
|
df1 = df1.rename_geometry("geometry")
|
|
147
170
|
|
|
148
|
-
if df2.
|
|
171
|
+
if df2.geometry.name != "geometry":
|
|
149
172
|
df2 = df2.rename_geometry("geometry")
|
|
150
173
|
|
|
151
174
|
# to pandas because GeoDataFrame constructor is expensive
|
|
@@ -162,8 +185,10 @@ def clean_overlay(
|
|
|
162
185
|
lsuffix=lsuffix,
|
|
163
186
|
rsuffix=rsuffix,
|
|
164
187
|
geom_type=geom_type,
|
|
165
|
-
n_jobs=n_jobs,
|
|
166
188
|
predicate=predicate,
|
|
189
|
+
rtree_runner=rtree_runner,
|
|
190
|
+
overlay_runner=overlay_runner,
|
|
191
|
+
union_runner=union_runner,
|
|
167
192
|
),
|
|
168
193
|
geometry="geometry",
|
|
169
194
|
crs=crs,
|
|
@@ -179,9 +204,9 @@ def clean_overlay(
|
|
|
179
204
|
|
|
180
205
|
|
|
181
206
|
def _join_and_get_no_rows(df1, df2, lsuffix, rsuffix):
|
|
182
|
-
geom_col = df1.
|
|
207
|
+
geom_col = df1.geometry.name
|
|
183
208
|
df1_cols = df1.columns.difference({geom_col})
|
|
184
|
-
df2_cols = df2.columns.difference({df2.
|
|
209
|
+
df2_cols = df2.columns.difference({df2.geometry.name})
|
|
185
210
|
cols_with_suffix = [
|
|
186
211
|
f"{col}{lsuffix}" if col in df2_cols else col for col in df1_cols
|
|
187
212
|
] + [f"{col}{rsuffix}" if col in df1_cols else col for col in df2_cols]
|
|
@@ -206,7 +231,7 @@ def _no_intersections_return(
|
|
|
206
231
|
if how == "identity":
|
|
207
232
|
# add suffixes and return df1
|
|
208
233
|
df_template = _join_and_get_no_rows(df1, df2, lsuffix, rsuffix)
|
|
209
|
-
df2_cols = df2.columns.difference({df2.
|
|
234
|
+
df2_cols = df2.columns.difference({df2.geometry.name})
|
|
210
235
|
df1.columns = [f"{col}{lsuffix}" if col in df2_cols else col for col in df1]
|
|
211
236
|
return pd.concat([df_template, df1], ignore_index=True)
|
|
212
237
|
|
|
@@ -228,33 +253,41 @@ def _shapely_pd_overlay(
|
|
|
228
253
|
df1: DataFrame,
|
|
229
254
|
df2: DataFrame,
|
|
230
255
|
how: str,
|
|
231
|
-
grid_size: float
|
|
232
|
-
predicate: str
|
|
233
|
-
lsuffix: str
|
|
234
|
-
rsuffix: str
|
|
235
|
-
geom_type: str | None
|
|
236
|
-
|
|
256
|
+
grid_size: float,
|
|
257
|
+
predicate: str,
|
|
258
|
+
lsuffix: str,
|
|
259
|
+
rsuffix: str,
|
|
260
|
+
geom_type: str | None,
|
|
261
|
+
rtree_runner: RTreeQueryRunner,
|
|
262
|
+
overlay_runner: OverlayRunner,
|
|
263
|
+
union_runner: UnionRunner,
|
|
237
264
|
) -> DataFrame:
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
tree = STRtree(df2.geometry.values)
|
|
242
|
-
left, right = tree.query(df1.geometry.values, predicate=predicate)
|
|
243
|
-
|
|
265
|
+
left, right = rtree_runner.run(
|
|
266
|
+
df1.geometry.values, df2.geometry.values, predicate=predicate
|
|
267
|
+
)
|
|
244
268
|
pairs = _get_intersects_pairs(df1, df2, left, right, rsuffix)
|
|
245
|
-
assert pairs
|
|
246
|
-
assert pairs
|
|
269
|
+
assert pairs["geometry"].notna().all(), pairs.geometry[lambda x: x.isna()]
|
|
270
|
+
assert pairs["geom_right"].notna().all(), pairs.geom_right[lambda x: x.isna()]
|
|
247
271
|
|
|
248
272
|
if how == "intersection":
|
|
249
273
|
overlayed = [
|
|
250
274
|
_intersection(
|
|
251
|
-
pairs,
|
|
275
|
+
pairs,
|
|
276
|
+
grid_size=grid_size,
|
|
277
|
+
geom_type=geom_type,
|
|
278
|
+
overlay_runner=overlay_runner,
|
|
252
279
|
)
|
|
253
280
|
]
|
|
254
281
|
|
|
255
282
|
elif how == "difference":
|
|
256
283
|
overlayed = _difference(
|
|
257
|
-
pairs,
|
|
284
|
+
pairs,
|
|
285
|
+
df1,
|
|
286
|
+
left,
|
|
287
|
+
grid_size=grid_size,
|
|
288
|
+
geom_type=geom_type,
|
|
289
|
+
overlay_runner=overlay_runner,
|
|
290
|
+
union_runner=union_runner,
|
|
258
291
|
)
|
|
259
292
|
|
|
260
293
|
elif how == "symmetric_difference":
|
|
@@ -267,12 +300,19 @@ def _shapely_pd_overlay(
|
|
|
267
300
|
grid_size=grid_size,
|
|
268
301
|
rsuffix=rsuffix,
|
|
269
302
|
geom_type=geom_type,
|
|
270
|
-
|
|
303
|
+
overlay_runner=overlay_runner,
|
|
304
|
+
union_runner=union_runner,
|
|
271
305
|
)
|
|
272
306
|
|
|
273
307
|
elif how == "identity":
|
|
274
308
|
overlayed = _identity(
|
|
275
|
-
pairs,
|
|
309
|
+
pairs,
|
|
310
|
+
df1,
|
|
311
|
+
left,
|
|
312
|
+
grid_size=grid_size,
|
|
313
|
+
geom_type=geom_type,
|
|
314
|
+
overlay_runner=overlay_runner,
|
|
315
|
+
union_runner=union_runner,
|
|
276
316
|
)
|
|
277
317
|
|
|
278
318
|
elif how == "union":
|
|
@@ -285,7 +325,8 @@ def _shapely_pd_overlay(
|
|
|
285
325
|
grid_size=grid_size,
|
|
286
326
|
rsuffix=rsuffix,
|
|
287
327
|
geom_type=geom_type,
|
|
288
|
-
|
|
328
|
+
overlay_runner=overlay_runner,
|
|
329
|
+
union_runner=union_runner,
|
|
289
330
|
)
|
|
290
331
|
|
|
291
332
|
elif how == "update":
|
|
@@ -295,8 +336,9 @@ def _shapely_pd_overlay(
|
|
|
295
336
|
df2,
|
|
296
337
|
left=left,
|
|
297
338
|
grid_size=grid_size,
|
|
298
|
-
n_jobs=n_jobs,
|
|
299
339
|
geom_type=geom_type,
|
|
340
|
+
overlay_runner=overlay_runner,
|
|
341
|
+
union_runner=union_runner,
|
|
300
342
|
)
|
|
301
343
|
|
|
302
344
|
assert isinstance(overlayed, list)
|
|
@@ -314,8 +356,9 @@ def _shapely_pd_overlay(
|
|
|
314
356
|
overlayed = _add_suffix_left(overlayed, df1, df2, lsuffix)
|
|
315
357
|
|
|
316
358
|
overlayed["geometry"] = make_valid(overlayed["geometry"])
|
|
317
|
-
|
|
318
|
-
|
|
359
|
+
overlayed = overlayed.loc[
|
|
360
|
+
lambda x: (x["geometry"].notna().values) & (~is_empty(x["geometry"].values))
|
|
361
|
+
]
|
|
319
362
|
|
|
320
363
|
return overlayed
|
|
321
364
|
|
|
@@ -327,115 +370,38 @@ def _update(
|
|
|
327
370
|
left: np.ndarray,
|
|
328
371
|
grid_size: float | None | int,
|
|
329
372
|
geom_type: str | None,
|
|
330
|
-
|
|
373
|
+
overlay_runner: OverlayRunner,
|
|
374
|
+
union_runner: UnionRunner,
|
|
331
375
|
) -> GeoDataFrame:
|
|
332
376
|
overlayed = _difference(
|
|
333
|
-
pairs,
|
|
377
|
+
pairs,
|
|
378
|
+
df1,
|
|
379
|
+
left,
|
|
380
|
+
grid_size=grid_size,
|
|
381
|
+
geom_type=geom_type,
|
|
382
|
+
overlay_runner=overlay_runner,
|
|
383
|
+
union_runner=union_runner,
|
|
334
384
|
)
|
|
335
385
|
|
|
336
386
|
return overlayed + [df2]
|
|
337
387
|
|
|
338
388
|
|
|
339
|
-
def _run_overlay_dask(
|
|
340
|
-
arr1: np.ndarray,
|
|
341
|
-
arr2: np.ndarray,
|
|
342
|
-
func: Callable,
|
|
343
|
-
n_jobs: int,
|
|
344
|
-
grid_size: float | int | None,
|
|
345
|
-
) -> np.ndarray:
|
|
346
|
-
if len(arr1) // n_jobs <= 1:
|
|
347
|
-
try:
|
|
348
|
-
return func(arr1, arr2, grid_size=grid_size)
|
|
349
|
-
except TypeError as e:
|
|
350
|
-
raise TypeError(e, {type(x) for x in arr1}, {type(x) for x in arr2}) from e
|
|
351
|
-
arr1 = da.from_array(arr1, chunks=len(arr1) // n_jobs)
|
|
352
|
-
arr2 = da.from_array(arr2, chunks=len(arr2) // n_jobs)
|
|
353
|
-
res = arr1.map_blocks(func, arr2, grid_size=grid_size, dtype=float)
|
|
354
|
-
return res.compute(scheduler="threads", optimize_graph=False, num_workers=n_jobs)
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
def _run_overlay_joblib_threading(
|
|
358
|
-
arr1: np.ndarray,
|
|
359
|
-
arr2: np.ndarray,
|
|
360
|
-
func: Callable,
|
|
361
|
-
n_jobs: int,
|
|
362
|
-
grid_size: int | float | None,
|
|
363
|
-
) -> list[Geometry]:
|
|
364
|
-
if len(arr1) // n_jobs <= 1:
|
|
365
|
-
try:
|
|
366
|
-
return func(arr1, arr2, grid_size=grid_size)
|
|
367
|
-
except TypeError as e:
|
|
368
|
-
raise TypeError(e, {type(x) for x in arr1}, {type(x) for x in arr2}) from e
|
|
369
|
-
with joblib.Parallel(n_jobs=n_jobs, backend="threading") as parallel:
|
|
370
|
-
return parallel(
|
|
371
|
-
joblib.delayed(func)(g1, g2, grid_size=grid_size)
|
|
372
|
-
for g1, g2 in zip(arr1, arr2, strict=True)
|
|
373
|
-
)
|
|
374
|
-
|
|
375
|
-
|
|
376
389
|
def _intersection(
|
|
377
390
|
pairs: pd.DataFrame,
|
|
378
391
|
grid_size: None | float | int,
|
|
379
392
|
geom_type: str | None,
|
|
380
|
-
|
|
393
|
+
overlay_runner: OverlayRunner,
|
|
381
394
|
) -> GeoDataFrame:
|
|
382
395
|
if not len(pairs):
|
|
383
396
|
return pairs.drop(columns="geom_right")
|
|
384
|
-
|
|
385
397
|
intersections = pairs.copy()
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
arr1,
|
|
394
|
-
arr2,
|
|
395
|
-
func=intersection,
|
|
396
|
-
n_jobs=n_jobs,
|
|
397
|
-
grid_size=grid_size,
|
|
398
|
-
)
|
|
399
|
-
except GEOSException:
|
|
400
|
-
arr1 = make_valid_and_keep_geom_type(
|
|
401
|
-
arr1, geom_type=geom_type, n_jobs=n_jobs
|
|
402
|
-
)
|
|
403
|
-
arr2 = make_valid_and_keep_geom_type(
|
|
404
|
-
arr2, geom_type=geom_type, n_jobs=n_jobs
|
|
405
|
-
)
|
|
406
|
-
arr1 = arr1.loc[lambda x: x.index.isin(arr2.index)]
|
|
407
|
-
arr2 = arr2.loc[lambda x: x.index.isin(arr1.index)]
|
|
408
|
-
|
|
409
|
-
res = _run_overlay_joblib_threading(
|
|
410
|
-
arr1.to_numpy(),
|
|
411
|
-
arr2.to_numpy(),
|
|
412
|
-
func=intersection,
|
|
413
|
-
n_jobs=n_jobs,
|
|
414
|
-
grid_size=grid_size,
|
|
415
|
-
)
|
|
416
|
-
intersections["geometry"] = res
|
|
417
|
-
return intersections.drop(columns="geom_right")
|
|
418
|
-
|
|
419
|
-
try:
|
|
420
|
-
intersections["geometry"] = intersection(
|
|
421
|
-
intersections["geometry"].to_numpy(),
|
|
422
|
-
intersections["geom_right"].to_numpy(),
|
|
423
|
-
grid_size=grid_size,
|
|
424
|
-
)
|
|
425
|
-
except GEOSException:
|
|
426
|
-
left = make_valid_and_keep_geom_type(
|
|
427
|
-
intersections["geometry"].to_numpy(), geom_type, n_jobs=n_jobs
|
|
428
|
-
)
|
|
429
|
-
right = make_valid_and_keep_geom_type(
|
|
430
|
-
intersections["geom_right"].to_numpy(), geom_type, n_jobs=n_jobs
|
|
431
|
-
)
|
|
432
|
-
left = left.loc[lambda x: x.index.isin(right.index)]
|
|
433
|
-
right = right.loc[lambda x: x.index.isin(left.index)]
|
|
434
|
-
|
|
435
|
-
intersections["geometry"] = intersection(
|
|
436
|
-
left.to_numpy(), right.to_numpy(), grid_size=grid_size
|
|
437
|
-
)
|
|
438
|
-
|
|
398
|
+
intersections["geometry"] = overlay_runner.run(
|
|
399
|
+
intersection,
|
|
400
|
+
intersections["geometry"].to_numpy(),
|
|
401
|
+
intersections["geom_right"].to_numpy(),
|
|
402
|
+
grid_size=grid_size,
|
|
403
|
+
geom_type=geom_type,
|
|
404
|
+
)
|
|
439
405
|
return intersections.drop(columns="geom_right")
|
|
440
406
|
|
|
441
407
|
|
|
@@ -448,12 +414,16 @@ def _union(
|
|
|
448
414
|
grid_size: int | float | None,
|
|
449
415
|
rsuffix: str,
|
|
450
416
|
geom_type: str | None,
|
|
451
|
-
|
|
417
|
+
overlay_runner: OverlayRunner,
|
|
418
|
+
union_runner: UnionRunner,
|
|
452
419
|
) -> list[GeoDataFrame]:
|
|
453
420
|
merged = []
|
|
454
421
|
if len(left):
|
|
455
422
|
intersections = _intersection(
|
|
456
|
-
pairs,
|
|
423
|
+
pairs,
|
|
424
|
+
grid_size=grid_size,
|
|
425
|
+
geom_type=geom_type,
|
|
426
|
+
overlay_runner=overlay_runner,
|
|
457
427
|
)
|
|
458
428
|
merged.append(intersections)
|
|
459
429
|
symmdiff = _symmetric_difference(
|
|
@@ -465,7 +435,8 @@ def _union(
|
|
|
465
435
|
grid_size=grid_size,
|
|
466
436
|
rsuffix=rsuffix,
|
|
467
437
|
geom_type=geom_type,
|
|
468
|
-
|
|
438
|
+
overlay_runner=overlay_runner,
|
|
439
|
+
union_runner=union_runner,
|
|
469
440
|
)
|
|
470
441
|
merged += symmdiff
|
|
471
442
|
return merged
|
|
@@ -477,15 +448,27 @@ def _identity(
|
|
|
477
448
|
left: np.ndarray,
|
|
478
449
|
grid_size: int | float | None,
|
|
479
450
|
geom_type: str | None,
|
|
480
|
-
|
|
451
|
+
overlay_runner: OverlayRunner,
|
|
452
|
+
union_runner: UnionRunner,
|
|
481
453
|
) -> list[GeoDataFrame]:
|
|
482
454
|
merged = []
|
|
483
455
|
if len(left):
|
|
484
456
|
intersections = _intersection(
|
|
485
|
-
pairs,
|
|
457
|
+
pairs,
|
|
458
|
+
grid_size=grid_size,
|
|
459
|
+
geom_type=geom_type,
|
|
460
|
+
overlay_runner=overlay_runner,
|
|
486
461
|
)
|
|
487
462
|
merged.append(intersections)
|
|
488
|
-
diff = _difference(
|
|
463
|
+
diff = _difference(
|
|
464
|
+
pairs,
|
|
465
|
+
df1,
|
|
466
|
+
left,
|
|
467
|
+
geom_type=geom_type,
|
|
468
|
+
grid_size=grid_size,
|
|
469
|
+
overlay_runner=overlay_runner,
|
|
470
|
+
union_runner=union_runner,
|
|
471
|
+
)
|
|
489
472
|
merged += diff
|
|
490
473
|
return merged
|
|
491
474
|
|
|
@@ -499,12 +482,19 @@ def _symmetric_difference(
|
|
|
499
482
|
grid_size: int | float | None,
|
|
500
483
|
rsuffix: str,
|
|
501
484
|
geom_type: str | None,
|
|
502
|
-
|
|
485
|
+
overlay_runner: OverlayRunner,
|
|
486
|
+
union_runner: UnionRunner,
|
|
503
487
|
) -> list[GeoDataFrame]:
|
|
504
488
|
merged = []
|
|
505
489
|
|
|
506
490
|
difference_left = _difference(
|
|
507
|
-
pairs,
|
|
491
|
+
pairs,
|
|
492
|
+
df1,
|
|
493
|
+
left,
|
|
494
|
+
grid_size=grid_size,
|
|
495
|
+
geom_type=geom_type,
|
|
496
|
+
overlay_runner=overlay_runner,
|
|
497
|
+
union_runner=union_runner,
|
|
508
498
|
)
|
|
509
499
|
merged += difference_left
|
|
510
500
|
|
|
@@ -516,7 +506,8 @@ def _symmetric_difference(
|
|
|
516
506
|
grid_size=grid_size,
|
|
517
507
|
rsuffix=rsuffix,
|
|
518
508
|
geom_type=geom_type,
|
|
519
|
-
|
|
509
|
+
overlay_runner=overlay_runner,
|
|
510
|
+
union_runner=union_runner,
|
|
520
511
|
)
|
|
521
512
|
merged.append(clip_right)
|
|
522
513
|
|
|
@@ -530,9 +521,10 @@ def _difference(
|
|
|
530
521
|
pairs: pd.DataFrame,
|
|
531
522
|
df1: pd.DataFrame,
|
|
532
523
|
left: np.ndarray,
|
|
533
|
-
grid_size: int | float | None
|
|
534
|
-
geom_type: str | None
|
|
535
|
-
|
|
524
|
+
grid_size: int | float | None,
|
|
525
|
+
geom_type: str | None,
|
|
526
|
+
overlay_runner: OverlayRunner,
|
|
527
|
+
union_runner: UnionRunner,
|
|
536
528
|
) -> list[GeoDataFrame]:
|
|
537
529
|
merged = []
|
|
538
530
|
if len(left):
|
|
@@ -541,7 +533,8 @@ def _difference(
|
|
|
541
533
|
df1=df1,
|
|
542
534
|
grid_size=grid_size,
|
|
543
535
|
geom_type=geom_type,
|
|
544
|
-
|
|
536
|
+
overlay_runner=overlay_runner,
|
|
537
|
+
union_runner=union_runner,
|
|
545
538
|
)
|
|
546
539
|
merged.append(clip_left)
|
|
547
540
|
diff_left = _add_indices_from_left(df1, left)
|
|
@@ -609,7 +602,8 @@ def _shapely_diffclip_left(
|
|
|
609
602
|
df1: pd.DataFrame,
|
|
610
603
|
grid_size: int | float | None,
|
|
611
604
|
geom_type: str | None,
|
|
612
|
-
|
|
605
|
+
overlay_runner: OverlayRunner,
|
|
606
|
+
union_runner: UnionRunner,
|
|
613
607
|
) -> pd.DataFrame:
|
|
614
608
|
"""Aggregate areas in right by unique values from left, then erases those from left."""
|
|
615
609
|
keep_cols = list(df1.columns.difference({"_overlay_index_right"})) + ["geom_right"]
|
|
@@ -666,12 +660,14 @@ def _shapely_diffclip_left(
|
|
|
666
660
|
}
|
|
667
661
|
)
|
|
668
662
|
|
|
669
|
-
agged =
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
)
|
|
663
|
+
agged = union_runner.run(agger["geom_right"], level=0)
|
|
664
|
+
# agged = pd.Series(
|
|
665
|
+
|
|
666
|
+
# {
|
|
667
|
+
# i: agg_geoms_partial(geoms)
|
|
668
|
+
# for i, geoms in agger.groupby(level=0)["geom_right"]
|
|
669
|
+
# }
|
|
670
|
+
# )
|
|
675
671
|
many_hits_agged["geom_right"] = inverse_index_mapper.map(agged)
|
|
676
672
|
many_hits_agged = many_hits_agged.drop(columns=["_right_indices"])
|
|
677
673
|
|
|
@@ -679,15 +675,19 @@ def _shapely_diffclip_left(
|
|
|
679
675
|
except IndexError:
|
|
680
676
|
clip_left = pairs.loc[:, list(keep_cols)]
|
|
681
677
|
|
|
682
|
-
assert clip_left["geometry"].notna().all()
|
|
683
|
-
|
|
678
|
+
assert clip_left["geometry"].notna().all(), clip_left["geometry"][
|
|
679
|
+
lambda x: x.isna()
|
|
680
|
+
]
|
|
681
|
+
assert clip_left["geom_right"].notna().all(), clip_left["geom_right"][
|
|
682
|
+
lambda x: x.isna()
|
|
683
|
+
]
|
|
684
684
|
|
|
685
|
-
clip_left["geometry"] =
|
|
685
|
+
clip_left["geometry"] = overlay_runner.run(
|
|
686
|
+
difference,
|
|
686
687
|
clip_left["geometry"].to_numpy(),
|
|
687
688
|
clip_left["geom_right"].to_numpy(),
|
|
688
689
|
grid_size=grid_size,
|
|
689
690
|
geom_type=geom_type,
|
|
690
|
-
n_jobs=n_jobs,
|
|
691
691
|
)
|
|
692
692
|
|
|
693
693
|
return clip_left.drop(columns="geom_right")
|
|
@@ -700,7 +700,8 @@ def _shapely_diffclip_right(
|
|
|
700
700
|
grid_size: int | float | None,
|
|
701
701
|
rsuffix: str,
|
|
702
702
|
geom_type: str | None,
|
|
703
|
-
|
|
703
|
+
overlay_runner: OverlayRunner,
|
|
704
|
+
union_runner: UnionRunner,
|
|
704
705
|
) -> pd.DataFrame:
|
|
705
706
|
agg_geoms_partial = functools.partial(_agg_geoms, grid_size=grid_size)
|
|
706
707
|
|
|
@@ -711,16 +712,22 @@ def _shapely_diffclip_right(
|
|
|
711
712
|
one_hit = pairs[only_one].set_index("_overlay_index_right")[
|
|
712
713
|
["geom_left", "geometry"]
|
|
713
714
|
]
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
"geom_left": agg_geoms_partial,
|
|
720
|
-
"geometry": "first",
|
|
721
|
-
}
|
|
722
|
-
)
|
|
715
|
+
many_hits_ungrouped = pairs[~only_one].set_index("_overlay_index_right")
|
|
716
|
+
many_hits = pd.DataFrame(index=many_hits_ungrouped.index.unique())
|
|
717
|
+
many_hits["geometry"] = many_hits_ungrouped.groupby(level=0)["geometry"].first()
|
|
718
|
+
many_hits["geom_left"] = union_runner.run(
|
|
719
|
+
many_hits_ungrouped["geom_left"], level=0
|
|
723
720
|
)
|
|
721
|
+
# many_hits = (
|
|
722
|
+
# pairs[~only_one]
|
|
723
|
+
# .groupby("_overlay_index_right")
|
|
724
|
+
# .agg(
|
|
725
|
+
# {
|
|
726
|
+
# "geom_left": agg_geoms_partial,
|
|
727
|
+
# "geometry": "first",
|
|
728
|
+
# }
|
|
729
|
+
# )
|
|
730
|
+
# )
|
|
724
731
|
clip_right = (
|
|
725
732
|
pd.concat([one_hit, many_hits])
|
|
726
733
|
.join(df2.drop(columns=["geometry"]))
|
|
@@ -739,10 +746,15 @@ def _shapely_diffclip_right(
|
|
|
739
746
|
}
|
|
740
747
|
)
|
|
741
748
|
|
|
742
|
-
assert clip_right["geometry"].notna().all()
|
|
743
|
-
|
|
749
|
+
assert clip_right["geometry"].notna().all(), clip_right["geometry"][
|
|
750
|
+
lambda x: x.isna()
|
|
751
|
+
]
|
|
752
|
+
assert clip_right["geom_left"].notna().all(), clip_right["geom_left"][
|
|
753
|
+
lambda x: x.isna()
|
|
754
|
+
]
|
|
744
755
|
|
|
745
|
-
clip_right["geometry"] =
|
|
756
|
+
clip_right["geometry"] = overlay_runner.run(
|
|
757
|
+
difference,
|
|
746
758
|
clip_right["geometry"].to_numpy(),
|
|
747
759
|
clip_right["geom_left"].to_numpy(),
|
|
748
760
|
grid_size=grid_size,
|
|
@@ -752,87 +764,5 @@ def _shapely_diffclip_right(
|
|
|
752
764
|
return clip_right.drop(columns="geom_left")
|
|
753
765
|
|
|
754
766
|
|
|
755
|
-
def _try_difference(
|
|
756
|
-
left: np.ndarray,
|
|
757
|
-
right: np.ndarray,
|
|
758
|
-
grid_size: int | float | None,
|
|
759
|
-
geom_type: str | None,
|
|
760
|
-
n_jobs: int = 1,
|
|
761
|
-
) -> np.ndarray:
|
|
762
|
-
"""Try difference overlay, then make_valid and retry."""
|
|
763
|
-
if n_jobs > 1 and len(left) / n_jobs > 10:
|
|
764
|
-
try:
|
|
765
|
-
return _run_overlay_joblib_threading(
|
|
766
|
-
left,
|
|
767
|
-
right,
|
|
768
|
-
func=difference,
|
|
769
|
-
n_jobs=n_jobs,
|
|
770
|
-
grid_size=grid_size,
|
|
771
|
-
)
|
|
772
|
-
except GEOSException:
|
|
773
|
-
left = make_valid_and_keep_geom_type(
|
|
774
|
-
left, geom_type=geom_type, n_jobs=n_jobs
|
|
775
|
-
)
|
|
776
|
-
right = make_valid_and_keep_geom_type(
|
|
777
|
-
right, geom_type=geom_type, n_jobs=n_jobs
|
|
778
|
-
)
|
|
779
|
-
left = left.loc[lambda x: x.index.isin(right.index)]
|
|
780
|
-
right = right.loc[lambda x: x.index.isin(left.index)]
|
|
781
|
-
|
|
782
|
-
return _run_overlay_joblib_threading(
|
|
783
|
-
left.to_numpy(),
|
|
784
|
-
right.to_numpy(),
|
|
785
|
-
func=difference,
|
|
786
|
-
n_jobs=n_jobs,
|
|
787
|
-
grid_size=grid_size,
|
|
788
|
-
)
|
|
789
|
-
|
|
790
|
-
try:
|
|
791
|
-
return difference(
|
|
792
|
-
left,
|
|
793
|
-
right,
|
|
794
|
-
grid_size=grid_size,
|
|
795
|
-
)
|
|
796
|
-
except GEOSException:
|
|
797
|
-
left = make_valid_and_keep_geom_type(left, geom_type, n_jobs=n_jobs)
|
|
798
|
-
right = make_valid_and_keep_geom_type(right, geom_type, n_jobs=n_jobs)
|
|
799
|
-
left = left.loc[lambda x: x.index.isin(right.index)]
|
|
800
|
-
right = right.loc[lambda x: x.index.isin(left.index)]
|
|
801
|
-
try:
|
|
802
|
-
return difference(
|
|
803
|
-
left.to_numpy(),
|
|
804
|
-
right.to_numpy(),
|
|
805
|
-
grid_size=grid_size,
|
|
806
|
-
)
|
|
807
|
-
except GEOSException as e:
|
|
808
|
-
raise e.__class__(e, f"{grid_size=}", f"{left=}", f"{right=}") from e
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
def make_valid_and_keep_geom_type(
|
|
812
|
-
geoms: np.ndarray, geom_type: str, n_jobs: int
|
|
813
|
-
) -> GeoSeries:
|
|
814
|
-
"""Make GeometryCollections into (Multi)Polygons, (Multi)LineStrings or (Multi)Points.
|
|
815
|
-
|
|
816
|
-
Because GeometryCollections might appear after dissolving (unary_union).
|
|
817
|
-
And this makes shapely difference/intersection fail.
|
|
818
|
-
|
|
819
|
-
Args:
|
|
820
|
-
geoms: Array of geometries.
|
|
821
|
-
geom_type: geometry type to be kept.
|
|
822
|
-
n_jobs: Number of treads.
|
|
823
|
-
"""
|
|
824
|
-
geoms = GeoSeries(geoms)
|
|
825
|
-
geoms.index = range(len(geoms))
|
|
826
|
-
geoms.loc[:] = make_valid(geoms.to_numpy())
|
|
827
|
-
geoms_with_correct_type = geoms.explode(index_parts=False).pipe(
|
|
828
|
-
to_single_geom_type, geom_type
|
|
829
|
-
)
|
|
830
|
-
only_one = geoms_with_correct_type.groupby(level=0).transform("size") == 1
|
|
831
|
-
one_hit = geoms_with_correct_type[only_one]
|
|
832
|
-
many_hits = geoms_with_correct_type[~only_one].groupby(level=0).agg(unary_union)
|
|
833
|
-
geoms_with_wrong_type = geoms.loc[~geoms.index.isin(geoms_with_correct_type.index)]
|
|
834
|
-
return pd.concat([one_hit, many_hits, geoms_with_wrong_type]).sort_index()
|
|
835
|
-
|
|
836
|
-
|
|
837
767
|
def _agg_geoms(g: np.ndarray, grid_size: int | float | None = None) -> Geometry:
|
|
838
|
-
return make_valid(
|
|
768
|
+
return make_valid(union_all(g, grid_size=grid_size))
|