ssb-sgis 0.3.8__py3-none-any.whl → 0.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,331 @@
1
+ import warnings
2
+
3
+ import pandas as pd
4
+ from geopandas import GeoDataFrame, GeoSeries
5
+ from numpy.typing import NDArray
6
+ from shapely import (
7
+ extract_unique_points,
8
+ get_coordinates,
9
+ get_exterior_ring,
10
+ linearrings,
11
+ make_valid,
12
+ polygons,
13
+ )
14
+ from shapely.geometry import LinearRing
15
+
16
+ from ..networkanalysis.closing_network_holes import get_angle
17
+ from .buffer_dissolve_explode import buff, dissexp
18
+ from .conversion import coordinate_array, to_geoseries
19
+ from .duplicates import get_intersections, update_geometries
20
+ from .general import sort_large_first, sort_long_first
21
+ from .geometry_types import get_geom_type
22
+ from .overlay import clean_overlay
23
+ from .polygon_operations import close_all_holes, close_thin_holes, get_gaps
24
+ from .polygons_as_rings import PolygonsAsRings
25
+ from .sfilter import sfilter, sfilter_inverse
26
+
27
+
28
+ warnings.simplefilter(action="ignore", category=UserWarning)
29
+ warnings.simplefilter(action="ignore", category=RuntimeWarning)
30
+
31
+
32
+ PRECISION = 1e-4
33
+ BUFFER_RES = 50
34
+
35
+
36
+ def get_angle_between_indexed_points(point_df: GeoDataFrame):
37
+ """ "Get angle difference between the two lines"""
38
+
39
+ point_df["next"] = point_df.groupby(level=0)["geometry"].shift(-1)
40
+
41
+ notna = point_df["next"].notna()
42
+
43
+ this = coordinate_array(point_df.loc[notna, "geometry"].values)
44
+ next_ = coordinate_array(point_df.loc[notna, "next"].values)
45
+
46
+ point_df.loc[notna, "angle"] = get_angle(this, next_)
47
+ point_df["prev_angle"] = point_df.groupby(level=0)["angle"].shift(1)
48
+
49
+ point_df["angle_diff"] = point_df["angle"] - point_df["prev_angle"]
50
+
51
+ return point_df
52
+
53
+
54
+ def remove_spikes(gdf: GeoDataFrame, tolerance: int | float) -> GeoDataFrame:
55
+ """Remove thin spikes in polygons.
56
+
57
+ Note that this function might be slow. Should only be used if nessecary.
58
+
59
+ Args:
60
+ gdf: GeoDataFrame of polygons
61
+ tolerance: distance (usually meters) used as the minimum thickness
62
+ for polygons to be eliminated. Any spike thinner than the tolerance
63
+ will be removed.
64
+
65
+ Returns:
66
+ A GeoDataFrame of polygons without spikes thinner.
67
+ """
68
+
69
+ def _remove_spikes(geoms: NDArray[LinearRing]) -> NDArray[LinearRing]:
70
+ if not len(geoms):
71
+ return geoms
72
+ geoms = to_geoseries(geoms).reset_index(drop=True)
73
+
74
+ points = (
75
+ extract_unique_points(geoms).explode(index_parts=False).to_frame("geometry")
76
+ )
77
+
78
+ points = get_angle_between_indexed_points(points)
79
+
80
+ indices_with_spikes = points[
81
+ lambda x: (x["angle_diff"] >= 180) & (x["angle_diff"] < 180.01)
82
+ ].index.unique()
83
+
84
+ rings_with_spikes = geoms[geoms.index.isin(indices_with_spikes)]
85
+ rings_without_spikes = geoms[~geoms.index.isin(indices_with_spikes)]
86
+
87
+ def to_buffered_rings_without_spikes(x):
88
+ polys = GeoSeries(make_valid(polygons(get_exterior_ring(x))))
89
+
90
+ return (
91
+ polys.buffer(-tolerance, resolution=BUFFER_RES)
92
+ .explode(index_parts=False)
93
+ .pipe(close_all_holes)
94
+ .pipe(get_exterior_ring)
95
+ .buffer(tolerance * 10)
96
+ )
97
+
98
+ buffered = to_buffered_rings_without_spikes(
99
+ rings_with_spikes.buffer(tolerance / 2, resolution=BUFFER_RES)
100
+ )
101
+
102
+ points_without_spikes = (
103
+ extract_unique_points(rings_with_spikes)
104
+ .explode(index_parts=False)
105
+ .loc[lambda x: x.index.isin(sfilter(x, buffered).index)]
106
+ )
107
+
108
+ # linearrings require at least 4 coordinate pairs, or three unique
109
+ points_without_spikes = points_without_spikes.loc[
110
+ lambda x: x.groupby(level=0).size() >= 3
111
+ ]
112
+
113
+ # need an index from 0 to n-1 in 'linearrings'
114
+ to_int_index = {
115
+ ring_idx: i
116
+ for i, ring_idx in enumerate(sorted(set(points_without_spikes.index)))
117
+ }
118
+ int_indices = points_without_spikes.index.map(to_int_index)
119
+
120
+ as_lines = pd.Series(
121
+ linearrings(
122
+ get_coordinates(points_without_spikes.geometry.values),
123
+ indices=int_indices,
124
+ ),
125
+ index=points_without_spikes.index.unique(),
126
+ )
127
+ as_lines = pd.concat([as_lines, rings_without_spikes])
128
+
129
+ # the missing polygons are thin and/or spiky. Let's remove them
130
+ missing = geoms.loc[~geoms.index.isin(as_lines.index)]
131
+
132
+ missing = pd.Series(
133
+ [None] * len(missing),
134
+ index=missing.index.values,
135
+ )
136
+
137
+ return pd.concat([as_lines, missing]).sort_index()
138
+
139
+ gdf.geometry = (
140
+ PolygonsAsRings(gdf.geometry).apply_numpy_func(_remove_spikes).to_numpy()
141
+ )
142
+ return gdf
143
+
144
+
145
+ def coverage_clean(
146
+ gdf: GeoDataFrame,
147
+ tolerance: int | float,
148
+ duplicate_action: str = "fix",
149
+ remove_isolated: bool = False,
150
+ ) -> GeoDataFrame:
151
+ """Fix thin gaps, holes, slivers and double surfaces.
152
+
153
+ Rules:
154
+ - Holes (interiors) thinner than the tolerance are closed.
155
+ - Gaps between polygons are filled if thinner than the tolerance.
156
+ - Sliver polygons thinner than the tolerance are eliminated
157
+ into the neighbor polygon with the longest shared border.
158
+ - Double surfaces thinner than the tolerance are eliminated.
159
+ If duplicate_action is "fix", thicker double surfaces will
160
+ be updated from top to bottom of the GeoDataFrame's rows.
161
+ - Line and point geometries are removed.
162
+ - MultiPolygons are exploded to Polygons.
163
+ - Index is reset.
164
+
165
+ Args:
166
+ gdf: GeoDataFrame to be cleaned.
167
+ tolerance: distance (usually meters) used as the minimum thickness
168
+ for polygons to be eliminated. Any gap, hole, sliver or double
169
+ surface that are empty after a negative buffer of tolerance / 2
170
+ are eliminated into the neighbor with the longest shared border.
171
+ duplicate action: Either "fix", "error" or "ignore".
172
+ If "fix" (default), double surfaces thicker than the
173
+ tolerance will be updated from top to bottom (function update_geometries)
174
+ and then dissolved into the neighbor polygon with the longest shared border.
175
+ If "error", an Exception is raised if there are any double surfaces thicker
176
+ than the tolerance. If "ignore", double surfaces are kept as is.
177
+
178
+ Returns:
179
+ A GeoDataFrame with cleaned polygons.
180
+
181
+ """
182
+
183
+ _cleaning_checks(gdf, tolerance, duplicate_action)
184
+
185
+ if not gdf.index.is_unique:
186
+ gdf = gdf.reset_index(drop=True)
187
+
188
+ gdf = close_thin_holes(gdf, tolerance)
189
+
190
+ gaps = get_gaps(gdf, include_interiors=True)
191
+ double = get_intersections(gdf)
192
+ double["_double_idx"] = range(len(double))
193
+
194
+ gdf, slivers = split_out_slivers(gdf, tolerance)
195
+
196
+ thin_gaps_and_double = pd.concat([gaps, double]).loc[
197
+ lambda x: x.buffer(-tolerance / 2).is_empty
198
+ ]
199
+
200
+ all_are_thin = double["_double_idx"].isin(thin_gaps_and_double["_double_idx"]).all()
201
+
202
+ if not all_are_thin and duplicate_action == "fix":
203
+ gdf, thin_gaps_and_double = _properly_fix_duplicates(
204
+ gdf, double, slivers, thin_gaps_and_double, tolerance
205
+ )
206
+
207
+ # gaps = pd.concat([gaps, more_gaps], ignore_index=True)
208
+ # double = pd.concat([double, more_double], ignore_index=True)
209
+ elif not all_are_thin and duplicate_action == "error":
210
+ raise ValueError("Large double surfaces.")
211
+
212
+ to_eliminate = pd.concat([thin_gaps_and_double, slivers], ignore_index=True).loc[
213
+ lambda x: ~x.buffer(-PRECISION / 10).is_empty
214
+ ]
215
+ to_eliminate["_eliminate_idx"] = range(len(to_eliminate))
216
+ gdf["_poly_idx"] = range(len(gdf))
217
+
218
+ gdf_geoms_idx = gdf[["_poly_idx", "geometry"]]
219
+
220
+ joined = to_eliminate.sjoin(gdf_geoms_idx, how="left")
221
+ isolated = joined[lambda x: x["_poly_idx"].isna()]
222
+ intersecting = joined[lambda x: x["_poly_idx"].notna()]
223
+
224
+ poly_idx_mapper: pd.Series = (
225
+ clean_overlay(
226
+ intersecting[["_eliminate_idx", "geometry"]],
227
+ buff(gdf_geoms_idx, tolerance, resolution=BUFFER_RES),
228
+ geom_type="polygon",
229
+ )
230
+ .pipe(sort_long_first)
231
+ .drop_duplicates("_eliminate_idx")
232
+ .set_index("_eliminate_idx")["_poly_idx"]
233
+ )
234
+ intersecting["_poly_idx"] = intersecting["_eliminate_idx"].map(poly_idx_mapper)
235
+ without_double = update_geometries(intersecting).drop(
236
+ columns=["_eliminate_idx", "_double_idx", "index_right"]
237
+ )
238
+
239
+ cleaned = (
240
+ dissexp(pd.concat([gdf, without_double]), by="_poly_idx", aggfunc="first")
241
+ .reset_index(drop=True)
242
+ .loc[lambda x: ~x.buffer(-PRECISION / 10).is_empty]
243
+ )
244
+
245
+ if not remove_isolated:
246
+ cleaned = pd.concat(
247
+ [
248
+ cleaned,
249
+ isolated.drop(
250
+ columns=[
251
+ "_double_idx",
252
+ "_eliminate_idx",
253
+ "_poly_idx",
254
+ "index_right",
255
+ ]
256
+ ),
257
+ ]
258
+ )
259
+
260
+ missing_indices: pd.Index = sfilter_inverse(
261
+ gdf.representative_point(), cleaned
262
+ ).index
263
+
264
+ missing = clean_overlay(
265
+ gdf.loc[missing_indices].drop(columns="_poly_idx"),
266
+ cleaned,
267
+ how="difference",
268
+ geom_type="polygon",
269
+ )
270
+
271
+ return pd.concat([cleaned, missing], ignore_index=True)
272
+
273
+
274
+ def _properly_fix_duplicates(gdf, double, slivers, thin_gaps_and_double, tolerance):
275
+ for _ in range(4):
276
+ gdf = _dissolve_thick_double_and_update(gdf, double, thin_gaps_and_double)
277
+ gdf, more_slivers = split_out_slivers(gdf, tolerance)
278
+ slivers = pd.concat([slivers, more_slivers], ignore_index=True)
279
+ gaps = get_gaps(gdf, include_interiors=True)
280
+ double = get_intersections(gdf)
281
+ double["_double_idx"] = range(len(double))
282
+ thin_gaps_and_double = pd.concat([gaps, double]).loc[
283
+ lambda x: x.buffer(-tolerance / 2).is_empty
284
+ ]
285
+ all_are_thin = (
286
+ double["_double_idx"].isin(thin_gaps_and_double["_double_idx"]).all()
287
+ )
288
+ if all_are_thin:
289
+ return gdf, thin_gaps_and_double
290
+
291
+ not_thin = double[
292
+ lambda x: ~x["_double_idx"].isin(thin_gaps_and_double["_double_idx"]).all()
293
+ ]
294
+ raise ValueError("Failed to properly fix thick double surfaces", not_thin.geometry)
295
+
296
+
297
+ def _dissolve_thick_double_and_update(gdf, double, thin_double):
298
+ large = (
299
+ double.loc[~double["_double_idx"].isin(thin_double["_double_idx"])]
300
+ .drop(columns="_double_idx")
301
+ .pipe(sort_large_first)
302
+ .pipe(update_geometries)
303
+ )
304
+ return (
305
+ clean_overlay(gdf, large, how="update")
306
+ .pipe(sort_large_first)
307
+ .pipe(update_geometries)
308
+ )
309
+
310
+
311
+ def _cleaning_checks(gdf, tolerance, duplicate_action):
312
+ if not len(gdf) or not tolerance:
313
+ return gdf
314
+ if get_geom_type(gdf) != "polygon":
315
+ raise ValueError("Must be polygons.")
316
+ if tolerance < PRECISION:
317
+ raise ValueError(
318
+ f"'tolerance' must be larger than {PRECISION} to avoid "
319
+ "problems with floating point precision."
320
+ )
321
+ if duplicate_action not in ["fix", "error", "ignore"]:
322
+ raise ValueError("duplicate_action must be 'fix', 'error' or 'ignore'")
323
+
324
+
325
+ def split_out_slivers(
326
+ gdf: GeoDataFrame | GeoSeries, tolerance: float | int
327
+ ) -> tuple[GeoDataFrame, GeoDataFrame] | tuple[GeoSeries, GeoSeries]:
328
+ is_sliver = gdf.buffer(-tolerance / 2).is_empty
329
+ slivers = gdf.loc[is_sliver]
330
+ gdf = gdf.loc[~is_sliver]
331
+ return gdf, slivers
@@ -22,7 +22,12 @@ def to_geoseries(obj: Any, crs: Any | None = None) -> GeoSeries:
22
22
  pass
23
23
 
24
24
  try:
25
- index = obj.index.values
25
+ if hasattr(obj.index, "values"):
26
+ # pandas objects
27
+ index = obj.index
28
+ else:
29
+ # list
30
+ index = None
26
31
  except AttributeError:
27
32
  index = None
28
33
 
@@ -296,9 +301,10 @@ def to_gdf(
296
301
  if geom_col in obj.keys():
297
302
  if isinstance(obj, pd.DataFrame):
298
303
  notna = obj[geom_col].notna()
299
- obj.loc[notna, geom_col] = GeoSeries(
300
- make_shapely_geoms(obj.loc[notna, geom_col]), index=index
304
+ obj.loc[notna, geom_col] = list(
305
+ make_shapely_geoms(obj.loc[notna, geom_col])
301
306
  )
307
+ obj[geom_col] = GeoSeries(obj[geom_col])
302
308
  return GeoDataFrame(obj, geometry=geom_col, crs=crs, **kwargs)
303
309
  if isinstance(obj[geom_col], Geometry):
304
310
  return GeoDataFrame(
@@ -14,9 +14,9 @@ from .overlay import clean_overlay
14
14
 
15
15
  def update_geometries(
16
16
  gdf: GeoDataFrame,
17
+ geom_type: str | None = None,
17
18
  keep_geom_type: bool = True,
18
19
  grid_size: int | None = None,
19
- copy: bool = True,
20
20
  ) -> GeoDataFrame:
21
21
  """Puts geometries on top of each other rowwise.
22
22
 
@@ -29,9 +29,11 @@ def update_geometries(
29
29
  of intersection resulting in multiple geometry types or
30
30
  GeometryCollections. If False, return all resulting geometries
31
31
  (potentially mixed types).
32
+ geom_type: Optionally specify what geometry type to keep.,
33
+ if there are mixed geometry types. Must be either "polygon",
34
+ "line" or "point".
32
35
  grid_size: Precision grid size to round the geometries. Will use the highest
33
36
  precision of the inputs by default.
34
- copy: Defaults to True.
35
37
 
36
38
  Example
37
39
  ------
@@ -78,56 +80,51 @@ def update_geometries(
78
80
  if len(gdf) <= 1:
79
81
  return gdf
80
82
 
81
- df = pd.DataFrame(gdf, copy=copy)
82
-
83
- unioned = Polygon()
84
- out_rows, indices, geometries = [], [], []
85
-
86
- if keep_geom_type:
83
+ if geom_type:
84
+ gdf = to_single_geom_type(gdf, geom_type)
85
+ keep_geom_type = True
86
+ elif keep_geom_type:
87
87
  geom_type = get_geom_type(gdf)
88
88
  if geom_type == "mixed":
89
89
  raise ValueError("Cannot have mixed geometries when keep_geom_type is True")
90
90
 
91
- for i, row in df.iterrows():
92
- geom = row.pop("geometry")
93
-
94
- if any(geom.equals(geom2) for geom2 in geometries):
95
- continue
96
-
97
- try:
98
- new = difference(geom, unioned, grid_size=grid_size)
99
- except GEOSException:
100
- try:
101
- geom = make_valid(geom)
102
- new = difference(geom, unioned, grid_size=grid_size)
103
- except GEOSException:
104
- unioned = to_single_geom_type(unioned, geom_type=geom_type)
105
- new = difference(geom, unioned, grid_size=grid_size)
91
+ geom_col = gdf._geometry_column_name
92
+ index_mapper = {i: idx for i, idx in enumerate(gdf.index)}
93
+ gdf = gdf.reset_index(drop=True)
106
94
 
107
- if not new:
108
- continue
95
+ tree = STRtree(gdf.geometry.values)
96
+ left, right = tree.query(gdf.geometry.values, predicate="intersects")
97
+ indices = pd.Series(right, index=left).loc[lambda x: x.index > x.values]
98
+
99
+ # select geometries from 'right', index from 'left', dissolve by 'left'
100
+ erasers = (
101
+ pd.Series(gdf.geometry.loc[indices.values].values, index=indices.index)
102
+ .groupby(level=0)
103
+ .agg(unary_union)
104
+ )
109
105
 
110
- try:
111
- unioned = unary_union([new, unioned], grid_size=grid_size)
112
- except GEOSException:
113
- new = make_valid(new)
114
- unioned = unary_union([new, unioned], grid_size=grid_size)
106
+ # match up the aggregated erasers by index
107
+ erased = difference(
108
+ gdf.geometry.loc[erasers.index],
109
+ erasers,
110
+ grid_size=grid_size,
111
+ )
115
112
 
116
- unioned = make_valid(unioned)
113
+ gdf.loc[erased.index, geom_col] = erased
117
114
 
118
- out_rows.append(row)
119
- geometries.append(new)
120
- indices.append(i)
115
+ gdf = gdf.loc[~gdf.is_empty]
121
116
 
122
- out = GeoDataFrame(out_rows, geometry=geometries, index=indices, crs=gdf.crs)
117
+ gdf.index = gdf.index.map(index_mapper)
123
118
 
124
119
  if keep_geom_type:
125
- out = to_single_geom_type(out, geom_type)
120
+ gdf = to_single_geom_type(gdf, geom_type)
126
121
 
127
- return out
122
+ return gdf
128
123
 
129
124
 
130
- def get_intersections(gdf: GeoDataFrame, geom_type: str | None = None) -> GeoDataFrame:
125
+ def get_intersections(
126
+ gdf: GeoDataFrame, geom_type: str | None = None, keep_geom_type: bool = True
127
+ ) -> GeoDataFrame:
131
128
  """Find geometries that intersect in a GeoDataFrame.
132
129
 
133
130
  Does an intersection with itself and keeps only the geometries that appear
@@ -140,6 +137,11 @@ def get_intersections(gdf: GeoDataFrame, geom_type: str | None = None) -> GeoDat
140
137
 
141
138
  Args:
142
139
  gdf: GeoDataFrame of polygons.
140
+ geom_type: Optionally specify which geometry type to keep.
141
+ Either "polygon", "line" or "point".
142
+ keep_geom_type: Whether to keep the original geometry type.
143
+ If mixed geometry types and keep_geom_type=True,
144
+ an exception is raised.
143
145
 
144
146
  Returns:
145
147
  A GeoDataFrame of the overlapping polygons.
@@ -197,20 +199,27 @@ def get_intersections(gdf: GeoDataFrame, geom_type: str | None = None) -> GeoDat
197
199
  """
198
200
  if isinstance(gdf, GeoSeries):
199
201
  gdf = GeoDataFrame({"geometry": gdf}, crs=gdf.crs)
202
+ was_geoseries = True
203
+ else:
204
+ was_geoseries = False
200
205
 
201
206
  idx_name = gdf.index.name
202
207
  gdf = gdf.assign(orig_idx=gdf.index).reset_index(drop=True)
203
208
 
204
- duplicated_geoms = _get_intersecting_geometries(gdf, geom_type=geom_type).pipe(
205
- clean_geoms
206
- )
209
+ duplicated_geoms = _get_intersecting_geometries(
210
+ gdf, geom_type, keep_geom_type
211
+ ).pipe(clean_geoms)
207
212
 
208
213
  duplicated_geoms.index = duplicated_geoms["orig_idx"].values
209
214
  duplicated_geoms.index.name = idx_name
215
+ if was_geoseries:
216
+ return duplicated_geoms.geometry
210
217
  return duplicated_geoms.drop(columns="orig_idx")
211
218
 
212
219
 
213
- def _get_intersecting_geometries(gdf: GeoDataFrame, geom_type) -> GeoDataFrame:
220
+ def _get_intersecting_geometries(
221
+ gdf: GeoDataFrame, geom_type, keep_geom_type
222
+ ) -> GeoDataFrame:
214
223
  right = gdf[[gdf._geometry_column_name]]
215
224
  right["idx_right"] = right.index
216
225
 
@@ -221,9 +230,22 @@ def _get_intersecting_geometries(gdf: GeoDataFrame, geom_type) -> GeoDataFrame:
221
230
  )
222
231
  left["idx_left"] = left.index
223
232
 
224
- not_identical = lambda x: x["idx_left"] != x["idx_right"]
233
+ def are_not_identical(df):
234
+ return df["idx_left"] != df["idx_right"]
225
235
 
226
- if geom_type is None and get_geom_type(gdf) == "mixed":
236
+ if geom_type or get_geom_type(gdf) != "mixed":
237
+ intersected = clean_overlay(
238
+ left,
239
+ right,
240
+ how="intersection",
241
+ geom_type=geom_type,
242
+ keep_geom_type=keep_geom_type,
243
+ ).loc[are_not_identical]
244
+ else:
245
+ if keep_geom_type:
246
+ raise ValueError(
247
+ "Cannot set keep_geom_type=True when the geom_type is mixed."
248
+ )
227
249
  gdf = make_all_singlepart(gdf)
228
250
  intersected = []
229
251
  for geom_type in ["polygon", "line", "point"]:
@@ -232,11 +254,7 @@ def _get_intersecting_geometries(gdf: GeoDataFrame, geom_type) -> GeoDataFrame:
232
254
  intersected += [
233
255
  clean_overlay(left, right, how="intersection", geom_type=geom_type)
234
256
  ]
235
- intersected = pd.concat(intersected, ignore_index=True).loc[not_identical]
236
- else:
237
- intersected = clean_overlay(
238
- left, right, how="intersection", geom_type=geom_type
239
- ).loc[not_identical]
257
+ intersected = pd.concat(intersected, ignore_index=True).loc[are_not_identical]
240
258
 
241
259
  # make sure it's correct by sjoining a point inside the polygons
242
260
  points_joined = intersected.representative_point().to_frame().sjoin(intersected)
@@ -25,6 +25,12 @@ from shapely.ops import unary_union
25
25
  from .geometry_types import get_geom_type, make_all_singlepart, to_single_geom_type
26
26
 
27
27
 
28
+ def split_geom_types(gdf: GeoDataFrame | GeoSeries) -> tuple[GeoDataFrame | GeoSeries]:
29
+ return tuple(
30
+ gdf.loc[gdf.geom_type == geom_type] for geom_type in gdf.geom_type.unique()
31
+ )
32
+
33
+
28
34
  def get_common_crs(
29
35
  iterable: Iterable[Hashable], strict: bool = False
30
36
  ) -> pyproj.CRS | None:
@@ -453,6 +459,9 @@ def to_lines(*gdfs: GeoDataFrame, copy: bool = True) -> GeoDataFrame:
453
459
  >>> sg.qtm(lines, "l")
454
460
  """
455
461
 
462
+ if not all(isinstance(gdf, (GeoSeries, GeoDataFrame)) for gdf in gdfs):
463
+ raise TypeError("gdf must be GeoDataFrame or GeoSeries")
464
+
456
465
  if any(gdf.geom_type.isin(["Point", "MultiPoint"]).any() for gdf in gdfs):
457
466
  raise ValueError("Cannot convert points to lines.")
458
467
 
@@ -486,7 +495,12 @@ def to_lines(*gdfs: GeoDataFrame, copy: bool = True) -> GeoDataFrame:
486
495
  if copy:
487
496
  gdf = gdf.copy()
488
497
 
489
- gdf.geometry = gdf.geometry.map(_shapely_geometry_to_lines)
498
+ mapped = gdf.geometry.map(_shapely_geometry_to_lines)
499
+ try:
500
+ gdf.geometry = mapped
501
+ except AttributeError:
502
+ # geoseries
503
+ gdf.loc[:] = mapped
490
504
 
491
505
  gdf = to_single_geom_type(gdf, "line")
492
506
 
@@ -97,6 +97,9 @@ def get_neighbor_indices(
97
97
  if gdf.crs != neighbors.crs:
98
98
  raise ValueError(f"'crs' mismatch. Got {gdf.crs} and {neighbors.crs}")
99
99
 
100
+ if isinstance(neighbors, GeoSeries):
101
+ neighbors = neighbors.to_frame()
102
+
100
103
  # buffer and keep only geometry column
101
104
  if max_distance and predicate != "nearest":
102
105
  gdf = gdf.buffer(max_distance).to_frame()
@@ -116,6 +119,15 @@ def get_neighbor_indices(
116
119
  return joined["neighbor_index"]
117
120
 
118
121
 
122
+ def get_neighbor_dfs(
123
+ df: GeoDataFrame | DataFrame,
124
+ neighbor_mapper: Series,
125
+ ) -> list[GeoDataFrame | DataFrame]:
126
+ return [
127
+ df[df.index.isin(neighbor_mapper[i])] for i in neighbor_mapper.index.unique()
128
+ ]
129
+
130
+
119
131
  def get_all_distances(
120
132
  gdf: GeoDataFrame | GeoSeries, neighbors: GeoDataFrame | GeoSeries
121
133
  ) -> DataFrame:
@@ -28,6 +28,11 @@ from .general import clean_geoms
28
28
  from .geometry_types import get_geom_type, make_all_singlepart, to_single_geom_type
29
29
 
30
30
 
31
+ DEFAULT_GRID_SIZE = None
32
+ DEFAULT_LSUFFIX = "_1"
33
+ DEFAULT_RSUFFIX = "_2"
34
+
35
+
31
36
  def clean_overlay(
32
37
  df1: GeoDataFrame,
33
38
  df2: GeoDataFrame,
@@ -35,8 +40,8 @@ def clean_overlay(
35
40
  keep_geom_type: bool = True,
36
41
  geom_type: str | None = None,
37
42
  grid_size: float | None = None,
38
- lsuffix: str = "_1",
39
- rsuffix: str = "_2",
43
+ lsuffix: str = DEFAULT_LSUFFIX,
44
+ rsuffix: str = DEFAULT_RSUFFIX,
40
45
  ) -> GeoDataFrame:
41
46
  """Fixes and explodes geometries before doing a shapely overlay, then cleans up.
42
47
 
@@ -132,18 +137,22 @@ def clean_overlay(
132
137
  df1 = DataFrame(df1).reset_index(drop=True)
133
138
  df2 = DataFrame(df2).reset_index(drop=True)
134
139
 
135
- overlayed = gpd.GeoDataFrame(
136
- _shapely_pd_overlay(
137
- df1,
138
- df2,
139
- how=how,
140
- grid_size=grid_size,
141
- lsuffix=lsuffix,
142
- rsuffix=rsuffix,
143
- ),
144
- geometry="geometry",
145
- crs=crs,
146
- ).pipe(clean_geoms)
140
+ overlayed = (
141
+ gpd.GeoDataFrame(
142
+ _shapely_pd_overlay(
143
+ df1,
144
+ df2,
145
+ how=how,
146
+ grid_size=grid_size,
147
+ lsuffix=lsuffix,
148
+ rsuffix=rsuffix,
149
+ ),
150
+ geometry="geometry",
151
+ crs=crs,
152
+ )
153
+ .pipe(clean_geoms)
154
+ .pipe(make_all_singlepart, ignore_index=True)
155
+ )
147
156
 
148
157
  if keep_geom_type:
149
158
  overlayed = to_single_geom_type(overlayed, geom_type)
@@ -200,9 +209,9 @@ def _shapely_pd_overlay(
200
209
  df1: DataFrame,
201
210
  df2: DataFrame,
202
211
  how: str,
203
- grid_size: float,
204
- lsuffix,
205
- rsuffix,
212
+ grid_size: float = DEFAULT_GRID_SIZE,
213
+ lsuffix=DEFAULT_LSUFFIX,
214
+ rsuffix=DEFAULT_RSUFFIX,
206
215
  ) -> DataFrame:
207
216
  if not grid_size and not len(df1) or not len(df2):
208
217
  return _no_intersections_return(df1, df2, how, lsuffix, rsuffix)