ssb-sgis 0.3.9__py3-none-any.whl → 0.3.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -61,7 +61,7 @@ def to_shapely(obj) -> Geometry:
61
61
  raise TypeError(obj) from e
62
62
 
63
63
 
64
- def get_utm33(lon: float, lat: float, crs=25833):
64
+ def from_4326(lon: float, lat: float, crs=25833):
65
65
  """Get utm 33 N coordinates from lonlat (4326)."""
66
66
  transformer = pyproj.Transformer.from_crs(
67
67
  "EPSG:4326", f"EPSG:{crs}", always_xy=True
@@ -69,7 +69,7 @@ def get_utm33(lon: float, lat: float, crs=25833):
69
69
  return transformer.transform(lon, lat)
70
70
 
71
71
 
72
- def get_lonlat(lon: float, lat: float, crs=25833):
72
+ def to_4326(lon: float, lat: float, crs=25833):
73
73
  """Get degree coordinates 33 N coordinates from lonlat (4326)."""
74
74
  transformer = pyproj.Transformer.from_crs(
75
75
  f"EPSG:{crs}", "EPSG:4326", always_xy=True
@@ -3,11 +3,9 @@ from collections.abc import Iterable
3
3
  import networkx as nx
4
4
  import pandas as pd
5
5
  from geopandas import GeoDataFrame, GeoSeries
6
- from shapely import STRtree, difference, intersection, make_valid, unary_union, union
7
- from shapely.errors import GEOSException
8
- from shapely.geometry import Polygon
6
+ from shapely import STRtree, difference, make_valid, unary_union
9
7
 
10
- from .general import _push_geom_col, clean_geoms
8
+ from .general import _determine_geom_type_args, _push_geom_col, clean_geoms
11
9
  from .geometry_types import get_geom_type, make_all_singlepart, to_single_geom_type
12
10
  from .overlay import clean_overlay
13
11
 
@@ -15,7 +13,7 @@ from .overlay import clean_overlay
15
13
  def update_geometries(
16
14
  gdf: GeoDataFrame,
17
15
  geom_type: str | None = None,
18
- keep_geom_type: bool = True,
16
+ keep_geom_type: bool | None = None,
19
17
  grid_size: int | None = None,
20
18
  ) -> GeoDataFrame:
21
19
  """Puts geometries on top of each other rowwise.
@@ -80,13 +78,11 @@ def update_geometries(
80
78
  if len(gdf) <= 1:
81
79
  return gdf
82
80
 
83
- if geom_type:
84
- gdf = to_single_geom_type(gdf, geom_type)
85
- keep_geom_type = True
86
- elif keep_geom_type:
87
- geom_type = get_geom_type(gdf)
88
- if geom_type == "mixed":
89
- raise ValueError("Cannot have mixed geometries when keep_geom_type is True")
81
+ gdf = make_all_singlepart(clean_geoms(gdf))
82
+
83
+ gdf, geom_type, keep_geom_type = _determine_geom_type_args(
84
+ gdf, geom_type, keep_geom_type
85
+ )
90
86
 
91
87
  geom_col = gdf._geometry_column_name
92
88
  index_mapper = {i: idx for i, idx in enumerate(gdf.index)}
@@ -100,14 +96,16 @@ def update_geometries(
100
96
  erasers = (
101
97
  pd.Series(gdf.geometry.loc[indices.values].values, index=indices.index)
102
98
  .groupby(level=0)
103
- .agg(unary_union)
99
+ .agg(lambda x: make_valid(unary_union(x, grid_size=grid_size)))
104
100
  )
105
101
 
106
102
  # match up the aggregated erasers by index
107
- erased = difference(
108
- gdf.geometry.loc[erasers.index],
109
- erasers,
110
- grid_size=grid_size,
103
+ erased = make_valid(
104
+ difference(
105
+ gdf.geometry.loc[erasers.index],
106
+ erasers,
107
+ grid_size=grid_size,
108
+ )
111
109
  )
112
110
 
113
111
  gdf.loc[erased.index, geom_col] = erased
@@ -123,7 +121,7 @@ def update_geometries(
123
121
 
124
122
 
125
123
  def get_intersections(
126
- gdf: GeoDataFrame, geom_type: str | None = None, keep_geom_type: bool = True
124
+ gdf: GeoDataFrame, geom_type: str | None = None, keep_geom_type: bool | None = None
127
125
  ) -> GeoDataFrame:
128
126
  """Find geometries that intersect in a GeoDataFrame.
129
127
 
@@ -203,6 +201,10 @@ def get_intersections(
203
201
  else:
204
202
  was_geoseries = False
205
203
 
204
+ gdf, geom_type, keep_geom_type = _determine_geom_type_args(
205
+ gdf, geom_type, keep_geom_type
206
+ )
207
+
206
208
  idx_name = gdf.index.name
207
209
  gdf = gdf.assign(orig_idx=gdf.index).reset_index(drop=True)
208
210
 
@@ -212,8 +214,10 @@ def get_intersections(
212
214
 
213
215
  duplicated_geoms.index = duplicated_geoms["orig_idx"].values
214
216
  duplicated_geoms.index.name = idx_name
217
+
215
218
  if was_geoseries:
216
219
  return duplicated_geoms.geometry
220
+
217
221
  return duplicated_geoms.drop(columns="orig_idx")
218
222
 
219
223
 
@@ -19,6 +19,7 @@ from shapely import (
19
19
  linestrings,
20
20
  make_valid,
21
21
  )
22
+ from shapely import points as shapely_points
22
23
  from shapely.geometry import LineString, Point
23
24
  from shapely.ops import unary_union
24
25
 
@@ -304,7 +305,7 @@ def sort_long_first(gdf: GeoDataFrame | GeoSeries) -> GeoDataFrame | GeoSeries:
304
305
  gdf: A GeoDataFrame or GeoSeries.
305
306
 
306
307
  Returns:
307
- A GeoDataFrame or GeoSeries sorted from large to small in length.
308
+ A GeoDataFrame or GeoSeries sorted from long to short in length.
308
309
  """
309
310
  # using enumerate, then iloc on the sorted dict keys.
310
311
  # to avoid creating a temporary area column (which doesn't work for GeoSeries).
@@ -315,6 +316,39 @@ def sort_long_first(gdf: GeoDataFrame | GeoSeries) -> GeoDataFrame | GeoSeries:
315
316
  return gdf.iloc[list(sorted_lengths)]
316
317
 
317
318
 
319
+ def sort_short_first(gdf: GeoDataFrame | GeoSeries) -> GeoDataFrame | GeoSeries:
320
+ """Sort GeoDataFrame by length in ascending order.
321
+
322
+ Args:
323
+ gdf: A GeoDataFrame or GeoSeries.
324
+
325
+ Returns:
326
+ A GeoDataFrame or GeoSeries sorted from short to long in length.
327
+ """
328
+ # using enumerate, then iloc on the sorted dict keys.
329
+ # to avoid creating a temporary area column (which doesn't work for GeoSeries).
330
+ length_mapper = dict(enumerate(gdf.length.values))
331
+ sorted_lengths = dict(sorted(length_mapper.items(), key=lambda item: item[1]))
332
+ return gdf.iloc[list(sorted_lengths)]
333
+
334
+
335
+ def sort_small_first(gdf: GeoDataFrame | GeoSeries) -> GeoDataFrame | GeoSeries:
336
+ """Sort GeoDataFrame by area in ascending order.
337
+
338
+ Args:
339
+ gdf: A GeoDataFrame or GeoSeries.
340
+
341
+ Returns:
342
+ A GeoDataFrame or GeoSeries sorted from small to large in area.
343
+
344
+ """
345
+ # using enumerate, then iloc on the sorted dict keys.
346
+ # to avoid creating a temporary area column (which doesn't work for GeoSeries).
347
+ area_mapper = dict(enumerate(gdf.area.values))
348
+ sorted_areas = dict(sorted(area_mapper.items(), key=lambda item: item[1]))
349
+ return gdf.iloc[list(sorted_areas)]
350
+
351
+
318
352
  def make_lines_between_points(
319
353
  arr1: NDArray[Point] | GeometryArray | GeoSeries,
320
354
  arr2: NDArray[Point] | GeometryArray | GeoSeries,
@@ -405,6 +439,28 @@ def random_points(n: int, loc: float | int = 0.5) -> GeoDataFrame:
405
439
  )
406
440
 
407
441
 
442
+ def random_points_in_polygons(gdf: GeoDataFrame, n: int, seed=None) -> GeoDataFrame:
443
+ all_points = []
444
+
445
+ rng = np.random.default_rng(seed)
446
+
447
+ for i, geom in enumerate(gdf.geometry):
448
+ minx, miny, maxx, maxy = geom.bounds
449
+
450
+ xs = rng.uniform(minx, maxx, size=n * 500)
451
+ ys = rng.uniform(miny, maxy, size=n * 500)
452
+
453
+ points = GeoSeries(shapely_points(xs, y=ys), index=[i] * len(xs))
454
+ all_points.append(points)
455
+
456
+ return (
457
+ pd.concat(all_points)
458
+ .loc[lambda x: x.intersects(gdf.geometry)]
459
+ .groupby(level=0)
460
+ .head(n)
461
+ )
462
+
463
+
408
464
  def to_lines(*gdfs: GeoDataFrame, copy: bool = True) -> GeoDataFrame:
409
465
  """Makes lines out of one or more GeoDataFrames and splits them at intersections.
410
466
 
@@ -527,7 +583,7 @@ def to_lines(*gdfs: GeoDataFrame, copy: bool = True) -> GeoDataFrame:
527
583
  def clean_clip(
528
584
  gdf: GeoDataFrame | GeoSeries,
529
585
  mask: GeoDataFrame | GeoSeries | Geometry,
530
- keep_geom_type: bool = True,
586
+ keep_geom_type: bool | None = None,
531
587
  geom_type: str | None = None,
532
588
  **kwargs,
533
589
  ) -> GeoDataFrame | GeoSeries:
@@ -540,6 +596,12 @@ def clean_clip(
540
596
  Args:
541
597
  gdf: GeoDataFrame or GeoSeries to be clipped
542
598
  mask: the geometry to clip gdf
599
+ geom_type: Optionally specify what geometry type to keep.,
600
+ if there are mixed geometry types. Must be either "polygon",
601
+ "line" or "point".
602
+ keep_geom_type: Defaults to None, meaning True if 'geom_type' is given
603
+ and True if the geometries are single-typed and False if the geometries
604
+ are mixed.
543
605
  **kwargs: Keyword arguments passed to geopandas.GeoDataFrame.clip
544
606
 
545
607
  Returns:
@@ -551,12 +613,9 @@ def clean_clip(
551
613
  if not isinstance(gdf, (GeoDataFrame, GeoSeries)):
552
614
  raise TypeError(f"'gdf' should be GeoDataFrame or GeoSeries, got {type(gdf)}")
553
615
 
554
- if geom_type is None and keep_geom_type:
555
- geom_type = get_geom_type(gdf)
556
- if geom_type == "mixed":
557
- raise ValueError(
558
- "Mixed geometry types is not allowed when keep_geom_type is True."
559
- )
616
+ gdf, geom_type, keep_geom_type = _determine_geom_type_args(
617
+ gdf, geom_type, keep_geom_type
618
+ )
560
619
 
561
620
  try:
562
621
  gdf = gdf.clip(mask, **kwargs).pipe(clean_geoms)
@@ -569,7 +628,26 @@ def clean_clip(
569
628
 
570
629
  return gdf.clip(mask, **kwargs).pipe(clean_geoms)
571
630
 
572
- if geom_type is not None or keep_geom_type:
631
+ if keep_geom_type:
573
632
  gdf = to_single_geom_type(gdf, geom_type)
574
633
 
575
634
  return gdf
635
+
636
+
637
+ def _determine_geom_type_args(
638
+ gdf: GeoDataFrame, geom_type: str | None, keep_geom_type: bool | None
639
+ ) -> tuple[GeoDataFrame, str, bool]:
640
+ if geom_type:
641
+ gdf = to_single_geom_type(gdf, geom_type)
642
+ keep_geom_type = True
643
+ elif keep_geom_type is None:
644
+ geom_type = get_geom_type(gdf)
645
+ if geom_type == "mixed":
646
+ keep_geom_type = False
647
+ else:
648
+ keep_geom_type = True
649
+ elif keep_geom_type:
650
+ geom_type = get_geom_type(gdf)
651
+ if geom_type == "mixed":
652
+ raise ValueError("Cannot set keep_geom_type=True with mixed geometries")
653
+ return gdf, geom_type, keep_geom_type
@@ -7,6 +7,8 @@ version of the solution from GH 2792.
7
7
  'clean_overlay' also includes the overlay type "update", which can be specified in the
8
8
  "how" parameter, in addition to the five native geopandas how-s.
9
9
  """
10
+ import functools
11
+
10
12
  import geopandas as gpd
11
13
  import numpy as np
12
14
  import pandas as pd
@@ -409,9 +411,11 @@ def _shapely_diffclip_left(pairs, df1, grid_size):
409
411
  """Aggregate areas in right by unique values of left, then use those to clip
410
412
  areas out of left"""
411
413
 
414
+ agg_geoms_partial = functools.partial(agg_geoms, grid_size=grid_size)
415
+
412
416
  clip_left = pairs.groupby(level=0).agg(
413
417
  {
414
- "geom_right": agg_geoms,
418
+ "geom_right": agg_geoms_partial,
415
419
  **{
416
420
  c: "first"
417
421
  for c in df1.columns
@@ -433,12 +437,14 @@ def _shapely_diffclip_left(pairs, df1, grid_size):
433
437
 
434
438
 
435
439
  def _shapely_diffclip_right(pairs, df1, df2, grid_size, rsuffix):
440
+ agg_geoms_partial = functools.partial(agg_geoms, grid_size=grid_size)
441
+
436
442
  clip_right = (
437
443
  pairs.rename(columns={"geometry": "geom_left", "geom_right": "geometry"})
438
444
  .groupby(by="_overlay_index_right")
439
445
  .agg(
440
446
  {
441
- "geom_left": agg_geoms,
447
+ "geom_left": agg_geoms_partial,
442
448
  "geometry": "first",
443
449
  }
444
450
  )
@@ -479,5 +485,7 @@ def _try_difference(left, right, grid_size):
479
485
  )
480
486
 
481
487
 
482
- def agg_geoms(g):
483
- return make_valid(unary_union(g)) if len(g) > 1 else make_valid(g)
488
+ def agg_geoms(g, grid_size=None):
489
+ return (
490
+ make_valid(unary_union(g, grid_size=grid_size)) if len(g) > 1 else make_valid(g)
491
+ )
@@ -203,6 +203,7 @@ def eliminate_by_longest(
203
203
  fix_double: bool = True,
204
204
  ignore_index: bool = False,
205
205
  aggfunc: str | dict | list | None = None,
206
+ grid_size=None,
206
207
  **kwargs,
207
208
  ) -> GeoDataFrame | tuple[GeoDataFrame]:
208
209
  """Dissolves selected polygons with the longest bordering neighbor polygon.
@@ -236,6 +237,33 @@ def eliminate_by_longest(
236
237
  Returns:
237
238
  The GeoDataFrame (gdf) with the geometries of 'to_eliminate' dissolved in.
238
239
  If multiple GeoDataFrame are passed as 'gdf', they are returned as a tuple.
240
+
241
+ Examples
242
+ --------
243
+
244
+ Create two polygons with a sliver in between:
245
+
246
+ >>> sliver = sg.to_gdf(Polygon([(0, 0), (0.1, 1), (0, 2), (-0.1, 1)]))
247
+ >>> small_poly = sg.to_gdf(
248
+ ... Polygon([(0, 0), (-0.1, 1), (0, 2), (-1, 2), (-2, 2), (-1, 1)])
249
+ ... )
250
+ >>> large_poly = sg.to_gdf(
251
+ ... Polygon([(0, 0), (0.1, 1), (1, 2), (2, 2), (3, 2), (3, 0)])
252
+ ... )
253
+
254
+ Using multiple GeoDataFrame as input, the sliver is eliminated into the small
255
+ polygon (because it has the longest border with sliver).
256
+
257
+ >>> small_poly_eliminated, large_poly_eliminated = sg.eliminate_by_longest(
258
+ ... [small_poly, large_poly], sliver
259
+ ... )
260
+
261
+ With only one input GeoDataFrame:
262
+
263
+ >>> polys = pd.concat([small_poly, large_poly])
264
+ >>> eliminated = sg.eliminate_by_longest(polys, sliver)
265
+
266
+
239
267
  """
240
268
  if isinstance(gdf, (list, tuple)):
241
269
  # concat, then break up the dataframes in the end
@@ -297,6 +325,7 @@ def eliminate_by_longest(
297
325
  aggfunc,
298
326
  crs,
299
327
  fix_double,
328
+ grid_size=grid_size,
300
329
  **kwargs,
301
330
  )
302
331
 
@@ -341,6 +370,7 @@ def eliminate_by_largest(
341
370
  ignore_index: bool = False,
342
371
  aggfunc: str | dict | list | None = None,
343
372
  predicate: str = "intersects",
373
+ grid_size=None,
344
374
  **kwargs,
345
375
  ) -> GeoDataFrame | tuple[GeoDataFrame]:
346
376
  """Dissolves selected polygons with the largest neighbor polygon.
@@ -374,6 +404,31 @@ def eliminate_by_largest(
374
404
  The GeoDataFrame (gdf) with the geometries of 'to_eliminate' dissolved in.
375
405
  If multiple GeoDataFrame are passed as 'gdf', they are returned as a tuple.
376
406
 
407
+ Examples
408
+ --------
409
+
410
+ Create two polygons with a sliver in between:
411
+
412
+ >>> sliver = sg.to_gdf(Polygon([(0, 0), (0.1, 1), (0, 2), (-0.1, 1)]))
413
+ >>> small_poly = sg.to_gdf(
414
+ ... Polygon([(0, 0), (-0.1, 1), (0, 2), (-1, 2), (-2, 2), (-1, 1)])
415
+ ... )
416
+ >>> large_poly = sg.to_gdf(
417
+ ... Polygon([(0, 0), (0.1, 1), (1, 2), (2, 2), (3, 2), (3, 0)])
418
+ ... )
419
+
420
+ Using multiple GeoDataFrame as input, the sliver is eliminated into
421
+ the large polygon.
422
+
423
+ >>> small_poly_eliminated, large_poly_eliminated = sg.eliminate_by_largest(
424
+ ... [small_poly, large_poly], sliver
425
+ ... )
426
+
427
+ With only one input GeoDataFrame:
428
+
429
+ >>> polys = pd.concat([small_poly, large_poly])
430
+ >>> eliminated = sg.eliminate_by_largest(polys, sliver)
431
+
377
432
  """
378
433
  return _eliminate_by_area(
379
434
  gdf,
@@ -385,6 +440,7 @@ def eliminate_by_largest(
385
440
  aggfunc=aggfunc,
386
441
  predicate=predicate,
387
442
  fix_double=fix_double,
443
+ grid_size=grid_size,
388
444
  **kwargs,
389
445
  )
390
446
 
@@ -399,6 +455,7 @@ def eliminate_by_smallest(
399
455
  aggfunc: str | dict | list | None = None,
400
456
  predicate: str = "intersects",
401
457
  fix_double: bool = False,
458
+ grid_size=None,
402
459
  **kwargs,
403
460
  ) -> GeoDataFrame | tuple[GeoDataFrame]:
404
461
  return _eliminate_by_area(
@@ -411,6 +468,7 @@ def eliminate_by_smallest(
411
468
  aggfunc=aggfunc,
412
469
  predicate=predicate,
413
470
  fix_double=fix_double,
471
+ grid_size=grid_size,
414
472
  **kwargs,
415
473
  )
416
474
 
@@ -425,6 +483,7 @@ def _eliminate_by_area(
425
483
  aggfunc: str | dict | list | None = None,
426
484
  predicate="intersects",
427
485
  fix_double: bool = False,
486
+ grid_size=None,
428
487
  **kwargs,
429
488
  ) -> GeoDataFrame:
430
489
  if isinstance(gdf, (list, tuple)):
@@ -468,7 +527,9 @@ def _eliminate_by_area(
468
527
 
469
528
  notna = joined.loc[lambda x: x["_dissolve_idx"].notna()]
470
529
 
471
- eliminated = _eliminate(gdf, notna, aggfunc, crs, fix_double=fix_double, **kwargs)
530
+ eliminated = _eliminate(
531
+ gdf, notna, aggfunc, crs, fix_double=fix_double, grid_size=grid_size, **kwargs
532
+ )
472
533
 
473
534
  if not ignore_index:
474
535
  eliminated.index = eliminated.index.map(idx_mapper)
@@ -503,7 +564,7 @@ def _eliminate_by_area(
503
564
  return gdfs
504
565
 
505
566
 
506
- def _eliminate(gdf, to_eliminate, aggfunc, crs, fix_double, **kwargs):
567
+ def _eliminate(gdf, to_eliminate, aggfunc, crs, fix_double, grid_size, **kwargs):
507
568
  if not len(to_eliminate):
508
569
  return gdf
509
570
 
@@ -660,8 +721,12 @@ def _eliminate(gdf, to_eliminate, aggfunc, crs, fix_double, **kwargs):
660
721
 
661
722
  # allign and aggregate by dissolve index to not get duplicates in difference
662
723
  intersecting.index = soon_erased.index
663
- soon_erased = soon_erased.geometry.groupby(level=0).agg(unary_union)
664
- intersecting = intersecting.groupby(level=0).agg(unary_union)
724
+ soon_erased = soon_erased.geometry.groupby(level=0).agg(
725
+ lambda x: unary_union(x, grid_size=grid_size)
726
+ )
727
+ intersecting = intersecting.groupby(level=0).agg(
728
+ lambda x: unary_union(x, grid_size=grid_size)
729
+ )
665
730
 
666
731
  # from ..maps.maps import explore_locals
667
732
  # explore_locals()
@@ -674,12 +739,16 @@ def _eliminate(gdf, to_eliminate, aggfunc, crs, fix_double, **kwargs):
674
739
  eliminated["geometry"] = (
675
740
  pd.concat([eliminators, soon_erased, missing])
676
741
  .groupby(level=0)
677
- .agg(lambda x: make_valid(unary_union(x.dropna().values)))
742
+ .agg(
743
+ lambda x: make_valid(
744
+ unary_union(x.dropna().values, grid_size=grid_size)
745
+ )
746
+ )
678
747
  )
679
748
 
680
749
  else:
681
750
  eliminated["geometry"] = many_hits.groupby("_dissolve_idx")["geometry"].agg(
682
- lambda x: make_valid(unary_union(x.values))
751
+ lambda x: make_valid(unary_union(x.values, grid_size=grid_size))
683
752
  )
684
753
 
685
754
  # setting crs on the GeometryArrays to avoid warning in concat
@@ -973,7 +1042,11 @@ def _close_all_holes_no_islands(poly, all_geoms):
973
1042
  return make_valid(unary_union(holes_closed))
974
1043
 
975
1044
 
976
- def get_gaps(gdf: GeoDataFrame, include_interiors: bool = False) -> GeoDataFrame:
1045
+ def get_gaps(
1046
+ gdf: GeoDataFrame,
1047
+ include_interiors: bool = False,
1048
+ grid_size: float | int | None = None,
1049
+ ) -> GeoDataFrame:
977
1050
  """Get the gaps between polygons.
978
1051
 
979
1052
  Args:
@@ -998,7 +1071,9 @@ def get_gaps(gdf: GeoDataFrame, include_interiors: bool = False) -> GeoDataFrame
998
1071
  )
999
1072
 
1000
1073
  bbox_diff = make_all_singlepart(
1001
- clean_overlay(bbox, gdf, how="difference", geom_type="polygon")
1074
+ clean_overlay(
1075
+ bbox, gdf, how="difference", geom_type="polygon", grid_size=grid_size
1076
+ )
1002
1077
  )
1003
1078
 
1004
1079
  # remove the outer "gap", i.e. the surrounding area
@@ -11,59 +11,6 @@ from .conversion import to_gdf
11
11
  gdf_type_error_message = "'gdf' should be of type GeoDataFrame or GeoSeries."
12
12
 
13
13
 
14
- def _get_sfilter_indices(
15
- left: GeoDataFrame | GeoSeries,
16
- right: GeoDataFrame | GeoSeries | Geometry,
17
- predicate: str,
18
- ) -> np.ndarray:
19
- """Compute geometric comparisons and get matching indices.
20
-
21
- Taken from:
22
- geopandas.tools.sjoin._geom_predicate_query
23
-
24
- Parameters
25
- ----------
26
- left : GeoDataFrame
27
- right : GeoDataFrame
28
- predicate : string
29
- Binary predicate to query.
30
-
31
- Returns
32
- -------
33
- DataFrame
34
- DataFrame with matching indices in
35
- columns named `_key_left` and `_key_right`.
36
- """
37
- original_predicate = predicate
38
-
39
- with warnings.catch_warnings():
40
- # We don't need to show our own warning here
41
- # TODO remove this once the deprecation has been enforced
42
- warnings.filterwarnings(
43
- "ignore", "Generated spatial index is empty", FutureWarning
44
- )
45
-
46
- if predicate == "within":
47
- # within is implemented as the inverse of contains
48
- # contains is a faster predicate
49
- # see discussion at https://github.com/geopandas/geopandas/pull/1421
50
- predicate = "contains"
51
- sindex = left.sindex
52
- input_geoms = right.geometry if isinstance(right, GeoDataFrame) else right
53
- else:
54
- # all other predicates are symmetric
55
- # keep them the same
56
- sindex = right.sindex
57
- input_geoms = left.geometry if isinstance(left, GeoDataFrame) else left
58
-
59
- l_idx, r_idx = sindex.query(input_geoms, predicate=predicate, sort=False)
60
-
61
- if original_predicate == "within":
62
- return np.unique(r_idx)
63
-
64
- return np.unique(l_idx)
65
-
66
-
67
14
  def sfilter(
68
15
  gdf: GeoDataFrame | GeoSeries,
69
16
  other: GeoDataFrame | GeoSeries | Geometry,
@@ -290,3 +237,56 @@ def _sfilter_checks(other, crs):
290
237
  raise ValueError("crs mismatch", crs, other.crs) from e
291
238
 
292
239
  return other
240
+
241
+
242
+ def _get_sfilter_indices(
243
+ left: GeoDataFrame | GeoSeries,
244
+ right: GeoDataFrame | GeoSeries | Geometry,
245
+ predicate: str,
246
+ ) -> np.ndarray:
247
+ """Compute geometric comparisons and get matching indices.
248
+
249
+ Taken from:
250
+ geopandas.tools.sjoin._geom_predicate_query
251
+
252
+ Parameters
253
+ ----------
254
+ left : GeoDataFrame
255
+ right : GeoDataFrame
256
+ predicate : string
257
+ Binary predicate to query.
258
+
259
+ Returns
260
+ -------
261
+ DataFrame
262
+ DataFrame with matching indices in
263
+ columns named `_key_left` and `_key_right`.
264
+ """
265
+ original_predicate = predicate
266
+
267
+ with warnings.catch_warnings():
268
+ # We don't need to show our own warning here
269
+ # TODO remove this once the deprecation has been enforced
270
+ warnings.filterwarnings(
271
+ "ignore", "Generated spatial index is empty", FutureWarning
272
+ )
273
+
274
+ if predicate == "within":
275
+ # within is implemented as the inverse of contains
276
+ # contains is a faster predicate
277
+ # see discussion at https://github.com/geopandas/geopandas/pull/1421
278
+ predicate = "contains"
279
+ sindex = left.sindex
280
+ input_geoms = right.geometry if isinstance(right, GeoDataFrame) else right
281
+ else:
282
+ # all other predicates are symmetric
283
+ # keep them the same
284
+ sindex = right.sindex
285
+ input_geoms = left.geometry if isinstance(left, GeoDataFrame) else left
286
+
287
+ l_idx, r_idx = sindex.query(input_geoms, predicate=predicate, sort=False)
288
+
289
+ if original_predicate == "within":
290
+ return np.unique(r_idx)
291
+
292
+ return np.unique(l_idx)
sgis/helpers.py CHANGED
@@ -219,6 +219,14 @@ def sort_nans_last(df, ignore_index: bool = False):
219
219
  return df.reset_index(drop=True) if ignore_index else df
220
220
 
221
221
 
222
+ def is_number(text) -> bool:
223
+ try:
224
+ float(text)
225
+ return True
226
+ except ValueError:
227
+ return False
228
+
229
+
222
230
  class LocalFunctionError(ValueError):
223
231
  def __init__(self, func: str):
224
232
  self.func = func.__name__
@@ -52,6 +52,8 @@ def read_geopandas(
52
52
  try:
53
53
  return gpd.read_parquet(file, **kwargs)
54
54
  except ValueError as e:
55
+ if "Missing geo metadata" not in str(e) and "geometry" not in str(e):
56
+ raise e
55
57
  df = dp.read_pandas(gcs_path, **kwargs)
56
58
 
57
59
  if pandas_fallback or not len(df):
@@ -63,6 +65,8 @@ def read_geopandas(
63
65
  try:
64
66
  return gpd.read_file(file, **kwargs)
65
67
  except ValueError as e:
68
+ if "Missing geo metadata" not in str(e) and "geometry" not in str(e):
69
+ raise e
66
70
  df = dp.read_pandas(gcs_path, **kwargs)
67
71
 
68
72
  if pandas_fallback or not len(df):
@@ -75,6 +79,7 @@ def write_geopandas(
75
79
  df: gpd.GeoDataFrame,
76
80
  gcs_path: str | Path,
77
81
  overwrite: bool = True,
82
+ pandas_fallback: bool = False,
78
83
  fs: Optional[dp.gcs.GCSFileSystem] = None,
79
84
  **kwargs,
80
85
  ) -> None:
@@ -106,12 +111,10 @@ def write_geopandas(
106
111
  pd.io.parquet.BaseImpl.validate_dataframe(df)
107
112
 
108
113
  if not len(df):
109
- try:
110
- dp.write_pandas(df, gcs_path, **kwargs)
111
- except Exception:
112
- dp.write_pandas(
113
- df.drop(df._geometry_column_name, axis=1), gcs_path, **kwargs
114
- )
114
+ if pandas_fallback:
115
+ df.geometry = df.geometry.astype(str)
116
+ df = pd.DataFrame(df)
117
+ dp.write_pandas(df, gcs_path, **kwargs)
115
118
  return
116
119
 
117
120
  fs = dp.FileClient.get_gcs_file_system()