ssb-sgis 1.0.3__py3-none-any.whl → 1.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. sgis/__init__.py +10 -3
  2. sgis/debug_config.py +24 -0
  3. sgis/geopandas_tools/bounds.py +16 -21
  4. sgis/geopandas_tools/buffer_dissolve_explode.py +112 -30
  5. sgis/geopandas_tools/centerlines.py +4 -91
  6. sgis/geopandas_tools/cleaning.py +1576 -583
  7. sgis/geopandas_tools/conversion.py +24 -14
  8. sgis/geopandas_tools/duplicates.py +27 -6
  9. sgis/geopandas_tools/general.py +259 -100
  10. sgis/geopandas_tools/geometry_types.py +1 -1
  11. sgis/geopandas_tools/neighbors.py +16 -12
  12. sgis/geopandas_tools/overlay.py +7 -3
  13. sgis/geopandas_tools/point_operations.py +3 -3
  14. sgis/geopandas_tools/polygon_operations.py +505 -100
  15. sgis/geopandas_tools/polygons_as_rings.py +40 -8
  16. sgis/geopandas_tools/sfilter.py +26 -9
  17. sgis/io/dapla_functions.py +238 -19
  18. sgis/maps/examine.py +11 -10
  19. sgis/maps/explore.py +227 -155
  20. sgis/maps/legend.py +13 -4
  21. sgis/maps/map.py +22 -13
  22. sgis/maps/maps.py +100 -29
  23. sgis/maps/thematicmap.py +25 -18
  24. sgis/networkanalysis/_service_area.py +6 -1
  25. sgis/networkanalysis/cutting_lines.py +12 -5
  26. sgis/networkanalysis/finding_isolated_networks.py +13 -6
  27. sgis/networkanalysis/networkanalysis.py +10 -12
  28. sgis/parallel/parallel.py +27 -10
  29. sgis/raster/base.py +208 -0
  30. sgis/raster/cube.py +3 -3
  31. sgis/raster/image_collection.py +1421 -724
  32. sgis/raster/indices.py +10 -7
  33. sgis/raster/raster.py +7 -7
  34. sgis/raster/sentinel_config.py +33 -17
  35. {ssb_sgis-1.0.3.dist-info → ssb_sgis-1.0.5.dist-info}/METADATA +6 -7
  36. ssb_sgis-1.0.5.dist-info/RECORD +62 -0
  37. ssb_sgis-1.0.3.dist-info/RECORD +0 -61
  38. {ssb_sgis-1.0.3.dist-info → ssb_sgis-1.0.5.dist-info}/LICENSE +0 -0
  39. {ssb_sgis-1.0.3.dist-info → ssb_sgis-1.0.5.dist-info}/WHEEL +0 -0
@@ -91,7 +91,7 @@ def to_shapely(obj: Any) -> Geometry:
91
91
  if not hasattr(obj, "__iter__"):
92
92
  raise TypeError(type(obj))
93
93
  try:
94
- return obj.unary_union
94
+ return shapely.union_all(obj.geometry.values)
95
95
  except AttributeError:
96
96
  pass
97
97
  try:
@@ -110,7 +110,7 @@ def to_shapely(obj: Any) -> Geometry:
110
110
  return shapely.wkb.loads(obj)
111
111
  except TypeError:
112
112
  pass
113
- raise TypeError(type(obj))
113
+ raise TypeError(type(obj), obj)
114
114
 
115
115
 
116
116
  def to_bbox(
@@ -136,20 +136,30 @@ def to_bbox(
136
136
  pass
137
137
 
138
138
  try:
139
- minx = int(np.min(obj["minx"])) # type: ignore [index]
140
- miny = int(np.min(obj["miny"])) # type: ignore [index]
141
- maxx = int(np.max(obj["maxx"])) # type: ignore [index]
142
- maxy = int(np.max(obj["maxy"])) # type: ignore [index]
139
+ minx = float(np.min(obj["minx"])) # type: ignore [index]
140
+ miny = float(np.min(obj["miny"])) # type: ignore [index]
141
+ maxx = float(np.max(obj["maxx"])) # type: ignore [index]
142
+ maxy = float(np.max(obj["maxy"])) # type: ignore [index]
143
143
  return minx, miny, maxx, maxy
144
144
  except Exception:
145
- try:
146
- minx = int(np.min(obj.minx)) # type: ignore [union-attr]
147
- miny = int(np.min(obj.miny)) # type: ignore [union-attr]
148
- maxx = int(np.max(obj.maxx)) # type: ignore [union-attr]
149
- maxy = int(np.max(obj.maxy)) # type: ignore [union-attr]
150
- return minx, miny, maxx, maxy
151
- except Exception:
152
- pass
145
+ pass
146
+ try:
147
+ minx = float(np.min(obj.minx)) # type: ignore [union-attr]
148
+ miny = float(np.min(obj.miny)) # type: ignore [union-attr]
149
+ maxx = float(np.max(obj.maxx)) # type: ignore [union-attr]
150
+ maxy = float(np.max(obj.maxy)) # type: ignore [union-attr]
151
+ return minx, miny, maxx, maxy
152
+ except Exception:
153
+ pass
154
+
155
+ try:
156
+ minx = float(np.min(obj["west_longitude"])) # type: ignore [index]
157
+ miny = float(np.min(obj["south_latitude"])) # type: ignore [index]
158
+ maxx = float(np.max(obj["east_longitude"])) # type: ignore [index]
159
+ maxy = float(np.max(obj["north_latitude"])) # type: ignore [index]
160
+ return minx, miny, maxx, maxy
161
+ except Exception:
162
+ pass
153
163
 
154
164
  if hasattr(obj, "geometry"):
155
165
  try:
@@ -8,10 +8,10 @@ from shapely import STRtree
8
8
  from shapely import difference
9
9
  from shapely import make_valid
10
10
  from shapely import simplify
11
- from shapely import unary_union
12
11
  from shapely.errors import GEOSException
13
12
 
14
13
  from .general import _determine_geom_type_args
14
+ from .general import _grouped_unary_union
15
15
  from .general import _parallel_unary_union_geoseries
16
16
  from .general import _push_geom_col
17
17
  from .general import clean_geoms
@@ -125,10 +125,8 @@ def update_geometries(
125
125
  else:
126
126
  only_one = erasers.groupby(level=0).transform("size") == 1
127
127
  one_hit = erasers[only_one]
128
- many_hits = (
129
- erasers[~only_one]
130
- .groupby(level=0)
131
- .agg(lambda x: make_valid(unary_union(x, grid_size=grid_size)))
128
+ many_hits = _grouped_unary_union(
129
+ erasers[~only_one], level=0, grid_size=grid_size
132
130
  )
133
131
  erasers = pd.concat([one_hit, many_hits]).sort_index()
134
132
 
@@ -357,10 +355,33 @@ def _get_intersecting_geometries(
357
355
 
358
356
  duplicated_points = points_joined.loc[points_joined.index.duplicated(keep=False)]
359
357
 
360
- return intersected.loc[intersected.index.isin(duplicated_points.index)].drop(
358
+ out = intersected.loc[intersected.index.isin(duplicated_points.index)].drop(
361
359
  columns=["idx_left", "idx_right"]
362
360
  )
363
361
 
362
+ # some polygons within polygons are not counted in the
363
+ within = (
364
+ gdf.assign(_range_idx_inters_left=lambda x: range(len(x)))
365
+ .sjoin(
366
+ GeoDataFrame(
367
+ {
368
+ "geometry": gdf.buffer(1e-6).values,
369
+ "_range_idx_inters_right": range(len(gdf)),
370
+ },
371
+ crs=gdf.crs,
372
+ ),
373
+ how="inner",
374
+ predicate="within",
375
+ )
376
+ .loc[lambda x: x["_range_idx_inters_left"] != x["_range_idx_inters_right"]]
377
+ .drop(
378
+ columns=["index_right", "_range_idx_inters_left", "_range_idx_inters_right"]
379
+ )
380
+ .pipe(sfilter_inverse, out.buffer(-PRECISION))
381
+ )
382
+
383
+ return pd.concat([out, within])
384
+
364
385
 
365
386
  def _drop_duplicate_geometries(gdf: GeoDataFrame, **kwargs) -> GeoDataFrame:
366
387
  """Drop geometries that are considered equal.
@@ -1,3 +1,5 @@
1
+ import functools
2
+ import itertools
1
3
  import numbers
2
4
  import warnings
3
5
  from collections.abc import Hashable
@@ -8,12 +10,14 @@ import joblib
8
10
  import numpy as np
9
11
  import pandas as pd
10
12
  import pyproj
13
+ import shapely
11
14
  from geopandas import GeoDataFrame
12
15
  from geopandas import GeoSeries
13
16
  from geopandas.array import GeometryArray
14
17
  from geopandas.array import GeometryDtype
15
18
  from numpy.typing import NDArray
16
19
  from shapely import Geometry
20
+ from shapely import extract_unique_points
17
21
  from shapely import get_coordinates
18
22
  from shapely import get_exterior_ring
19
23
  from shapely import get_interior_ring
@@ -22,15 +26,16 @@ from shapely import get_parts
22
26
  from shapely import linestrings
23
27
  from shapely import make_valid
24
28
  from shapely import points as shapely_points
25
- from shapely import unary_union
29
+ from shapely import union_all
26
30
  from shapely.geometry import LineString
31
+ from shapely.geometry import MultiPoint
27
32
  from shapely.geometry import Point
33
+ from shapely.geometry import Polygon
28
34
 
29
- try:
30
- import dask_geopandas
31
- except ImportError:
32
- pass
33
-
35
+ from .conversion import coordinate_array
36
+ from .conversion import to_bbox
37
+ from .conversion import to_gdf
38
+ from .conversion import to_geoseries
34
39
  from .geometry_types import get_geom_type
35
40
  from .geometry_types import make_all_singlepart
36
41
  from .geometry_types import to_single_geom_type
@@ -38,7 +43,7 @@ from .geometry_types import to_single_geom_type
38
43
 
39
44
  def split_geom_types(gdf: GeoDataFrame | GeoSeries) -> tuple[GeoDataFrame | GeoSeries]:
40
45
  return tuple(
41
- gdf.loc[gdf.geom_type == geom_type] for geom_type in gdf.geom_type.unique()
46
+ gdf[gdf.geom_type == geom_type] for geom_type in gdf.geom_type.unique()
42
47
  )
43
48
 
44
49
 
@@ -385,35 +390,27 @@ def sort_small_first(gdf: GeoDataFrame | GeoSeries) -> GeoDataFrame | GeoSeries:
385
390
 
386
391
 
387
392
  def make_lines_between_points(
388
- arr1: NDArray[Point] | GeometryArray | GeoSeries,
389
- arr2: NDArray[Point] | GeometryArray | GeoSeries,
393
+ *arrs: NDArray[Point] | GeometryArray | GeoSeries,
390
394
  ) -> NDArray[LineString]:
391
- """Creates an array of linestrings from two arrays of points.
395
+ """Creates an array of linestrings from two or more arrays of points.
392
396
 
393
- The operation is done rowwise.
397
+ The lines are created rowwise, meaning from arr0[0] to arr1[0], from arr0[1] to arr1[1]...
398
+ If more than two arrays are passed, e.g. three arrays,
399
+ the lines will go from arr0[0] via arr1[0] to arr2[0].
394
400
 
395
401
  Args:
396
- arr1: GeometryArray og GeoSeries of points.
397
- arr2: GeometryArray og GeoSeries of points of same length as arr1.
402
+ arrs: 1 dimensional arrays of point geometries.
403
+ All arrays must have the same shape.
404
+ Must be at least two arrays.
398
405
 
399
406
  Returns:
400
407
  A numpy array of linestrings.
401
408
 
402
- Raises:
403
- ValueError: If the arrays have unequal shape.
404
-
405
409
  """
406
- if arr1.shape != arr2.shape:
407
- raise ValueError("Arrays must have equal shape.")
408
-
409
- coords: pd.DataFrame = pd.concat(
410
- [
411
- pd.DataFrame(get_coordinates(arr1), columns=["x", "y"]),
412
- pd.DataFrame(get_coordinates(arr2), columns=["x", "y"]),
413
- ]
414
- ).sort_index()
415
-
416
- return linestrings(coords.values, indices=coords.index)
410
+ coords = [get_coordinates(arr, return_index=False) for arr in arrs]
411
+ return linestrings(
412
+ np.concatenate([coords_arr[:, None, :] for coords_arr in coords], axis=1)
413
+ )
417
414
 
418
415
 
419
416
  def random_points(n: int, loc: float | int = 0.5) -> GeoDataFrame:
@@ -563,7 +560,9 @@ def to_lines(*gdfs: GeoDataFrame, copy: bool = True) -> GeoDataFrame:
563
560
  raise TypeError("gdf must be GeoDataFrame or GeoSeries")
564
561
 
565
562
  if any(gdf.geom_type.isin(["Point", "MultiPoint"]).any() for gdf in gdfs):
566
- raise ValueError("Cannot convert points to lines.")
563
+ raise ValueError(
564
+ f"Cannot convert points to lines. {[gdf.geom_type.value_counts() for gdf in gdfs]}"
565
+ )
567
566
 
568
567
  def _shapely_geometry_to_lines(geom):
569
568
  """Get all lines from the exterior and interiors of a Polygon."""
@@ -587,7 +586,7 @@ def to_lines(*gdfs: GeoDataFrame, copy: bool = True) -> GeoDataFrame:
587
586
 
588
587
  lines += interior_rings
589
588
 
590
- return unary_union(lines)
589
+ return union_all(lines)
591
590
 
592
591
  lines = []
593
592
  for gdf in gdfs:
@@ -677,6 +676,162 @@ def clean_clip(
677
676
  return gdf
678
677
 
679
678
 
679
+ def extend_lines(arr1, arr2, distance) -> NDArray[LineString]:
680
+ if len(arr1) != len(arr2):
681
+ raise ValueError
682
+ if not len(arr1):
683
+ return arr1
684
+
685
+ arr1, arr2 = arr2, arr1 # TODO fix
686
+
687
+ coords1 = coordinate_array(arr1)
688
+ coords2 = coordinate_array(arr2)
689
+
690
+ dx = coords2[:, 0] - coords1[:, 0]
691
+ dy = coords2[:, 1] - coords1[:, 1]
692
+ len_xy = np.sqrt((dx**2.0) + (dy**2.0))
693
+ x = coords1[:, 0] + (coords1[:, 0] - coords2[:, 0]) / len_xy * distance
694
+ y = coords1[:, 1] + (coords1[:, 1] - coords2[:, 1]) / len_xy * distance
695
+
696
+ new_points = np.array([None for _ in range(len(arr1))])
697
+ new_points[~np.isnan(x)] = shapely.points(x[~np.isnan(x)], y[~np.isnan(x)])
698
+
699
+ new_points[~np.isnan(x)] = make_lines_between_points(
700
+ arr2[~np.isnan(x)], new_points[~np.isnan(x)]
701
+ )
702
+ return new_points
703
+
704
+
705
+ def multipoints_to_line_segments_numpy(
706
+ points: GeoSeries | NDArray[MultiPoint] | MultiPoint,
707
+ cycle: bool = False,
708
+ ) -> list[LineString]:
709
+ try:
710
+ arr = get_parts(points.geometry.values)
711
+ except AttributeError:
712
+ arr = get_parts(points)
713
+
714
+ line_between_last_and_first = [LineString([arr[-1], arr[0]])] if cycle else []
715
+ return [
716
+ LineString([p0, p1]) for p0, p1 in itertools.pairwise(arr)
717
+ ] + line_between_last_and_first
718
+
719
+
720
+ def multipoints_to_line_segments(
721
+ multipoints: GeoSeries | GeoDataFrame, cycle: bool = True # to_next: bool = True,
722
+ ) -> GeoSeries | GeoDataFrame:
723
+
724
+ if not len(multipoints):
725
+ return multipoints
726
+
727
+ if isinstance(multipoints, GeoDataFrame):
728
+ df = multipoints.drop(columns=multipoints.geometry.name)
729
+ multipoints = multipoints.geometry
730
+ was_gdf = True
731
+ else:
732
+ multipoints = to_geoseries(multipoints)
733
+ was_gdf = False
734
+
735
+ multipoints = to_geoseries(multipoints)
736
+
737
+ segs = pd.Series(
738
+ [
739
+ multipoints_to_line_segments_numpy(geoms, cycle=cycle)
740
+ for geoms in multipoints
741
+ ],
742
+ index=multipoints.index,
743
+ ).explode()
744
+
745
+ segs = GeoSeries(segs, crs=multipoints.crs, name=multipoints.name)
746
+
747
+ if was_gdf:
748
+ return GeoDataFrame(df.join(segs), geometry=segs.name, crs=segs.crs)
749
+ else:
750
+ return segs
751
+
752
+
753
+ def get_line_segments(
754
+ lines: GeoDataFrame | GeoSeries, extract_unique: bool = False, cycle=False
755
+ ) -> GeoDataFrame:
756
+ try:
757
+ assert lines.index.is_unique
758
+ except AttributeError:
759
+ pass
760
+
761
+ if isinstance(lines, GeoDataFrame):
762
+ df = lines.drop(columns=lines.geometry.name)
763
+ lines = lines.geometry
764
+ was_gdf = True
765
+ else:
766
+ lines = to_geoseries(lines)
767
+ was_gdf = False
768
+
769
+ partial_segs_func = functools.partial(
770
+ multipoints_to_line_segments_numpy, cycle=cycle
771
+ )
772
+ if extract_unique:
773
+ points = extract_unique_points(lines.geometry.values)
774
+ segs = pd.Series(
775
+ [partial_segs_func(geoms) for geoms in points],
776
+ index=lines.index,
777
+ ).explode()
778
+ else:
779
+ coords, indices = shapely.get_coordinates(lines, return_index=True)
780
+ points = GeoSeries(shapely.points(coords), index=indices)
781
+ index_mapper = {
782
+ i: idx
783
+ for i, idx in zip(
784
+ np.unique(indices), lines.index.drop_duplicates(), strict=True
785
+ )
786
+ }
787
+ points.index = points.index.map(index_mapper)
788
+
789
+ segs = points.groupby(level=0).agg(partial_segs_func).explode()
790
+ segs = GeoSeries(segs, crs=lines.crs, name=lines.name)
791
+
792
+ if was_gdf:
793
+ return GeoDataFrame(df.join(segs), geometry=segs.name, crs=lines.crs)
794
+ else:
795
+ return segs
796
+
797
+
798
+ def get_index_right_columns(gdf: pd.DataFrame | pd.Series) -> list[str]:
799
+ """Get a list of what will be the resulting columns in an sjoin."""
800
+ if gdf.index.name is None and all(name is None for name in gdf.index.names):
801
+ if gdf.index.nlevels == 1:
802
+ return ["index_right"]
803
+ else:
804
+ return [f"index_right{i}" for i in range(gdf.index.nlevels)]
805
+ else:
806
+ return gdf.index.names
807
+
808
+
809
+ def points_in_bounds(
810
+ gdf: GeoDataFrame | GeoSeries, gridsize: int | float
811
+ ) -> GeoDataFrame:
812
+ """Get a GeoDataFrame of points within the bounds of the GeoDataFrame."""
813
+ minx, miny, maxx, maxy = to_bbox(gdf)
814
+ try:
815
+ crs = gdf.crs
816
+ except AttributeError:
817
+ crs = None
818
+
819
+ xs = np.linspace(minx, maxx, num=int((maxx - minx) / gridsize))
820
+ ys = np.linspace(miny, maxy, num=int((maxy - miny) / gridsize))
821
+ x_coords, y_coords = np.meshgrid(xs, ys, indexing="ij")
822
+ coords = np.concatenate((x_coords.reshape(-1, 1), y_coords.reshape(-1, 1)), axis=1)
823
+ return to_gdf(coords, crs=crs)
824
+
825
+
826
+ def points_in_polygons(
827
+ gdf: GeoDataFrame | GeoSeries, gridsize: int | float
828
+ ) -> GeoDataFrame:
829
+ index_right_col = get_index_right_columns(gdf)
830
+ out = points_in_bounds(gdf, gridsize).sjoin(gdf).set_index(index_right_col)
831
+ out.index.name = gdf.index.name
832
+ return out.sort_index()
833
+
834
+
680
835
  def _determine_geom_type_args(
681
836
  gdf: GeoDataFrame, geom_type: str | None, keep_geom_type: bool | None
682
837
  ) -> tuple[GeoDataFrame, str, bool]:
@@ -696,65 +851,93 @@ def _determine_geom_type_args(
696
851
  return gdf, geom_type, keep_geom_type
697
852
 
698
853
 
699
- def _merge_geometries(geoms: GeoSeries, grid_size=None) -> Geometry:
700
- return make_valid(unary_union(geoms, grid_size=grid_size))
854
+ def _unary_union_for_notna(geoms, **kwargs):
855
+ try:
856
+ return make_valid(union_all(geoms, **kwargs))
857
+ except TypeError:
858
+ return union_all([geom for geom in geoms.dropna().values], **kwargs)
859
+
860
+
861
+ def _grouped_unary_union(
862
+ df: GeoDataFrame | GeoSeries | pd.DataFrame | pd.Series,
863
+ by: str | list[str] | None = None,
864
+ level: int | None = None,
865
+ as_index: bool = True,
866
+ grid_size: float | int | None = None,
867
+ dropna: bool = False,
868
+ **kwargs,
869
+ ) -> GeoSeries | GeoDataFrame:
870
+ """Vectorized unary_union for groups.
701
871
 
872
+ Experimental. Messy code.
873
+ """
874
+ df = df.copy()
875
+ df_orig = df.copy()
702
876
 
703
- def _parallel_unary_union(
704
- gdf: GeoDataFrame, n_jobs: int = 1, by=None, grid_size=None, **kwargs
705
- ) -> list[Geometry]:
706
877
  try:
707
- geom_col = gdf._geometry_column_name
878
+ geom_col = df._geometry_column_name
708
879
  except AttributeError:
709
- geom_col = "geometry"
710
-
711
- if by is not None and not isinstance(by, str):
712
- gdf = gdf.copy()
713
880
  try:
714
- gdf["_by"] = gdf[by].astype(str).agg("-".join, axis=1)
715
- except KeyError:
716
- gdf["_by"] = by
717
- by = "_by"
881
+ geom_col = df.name
882
+ if geom_col is None:
883
+ geom_col = "geometry"
884
+ except AttributeError:
885
+ geom_col = "geometry"
718
886
 
719
- if gdf.crs is None:
720
- gdf.crs = 25833
721
- _was_none = True
722
- else:
723
- _was_none = False
887
+ if not len(df):
888
+ return GeoSeries(name=geom_col)
724
889
 
725
- if isinstance(gdf.index, pd.MultiIndex):
726
- gdf = gdf.reset_index(drop=True)
890
+ if isinstance(df, pd.Series):
891
+ df.name = geom_col
892
+ original_index = df.index
893
+ df = df.reset_index()
894
+ df.index = original_index
727
895
 
728
- dissolved = (
729
- dask_geopandas.from_geopandas(gdf, npartitions=n_jobs).dissolve(by).compute()
730
- )
731
- if _was_none:
732
- dissolved.crs = None
896
+ if isinstance(by, str):
897
+ by = [by]
898
+ elif by is None and level is None:
899
+ raise TypeError("You have to supply one of 'by' and 'level'")
900
+ elif by is None:
901
+ by = df.index.get_level_values(level)
733
902
 
734
- return dissolved.geometry
903
+ cumcount = df.groupby(by, dropna=dropna).cumcount().values
735
904
 
905
+ def get_col_or_index(df, col: str) -> pd.Series | pd.Index:
906
+ try:
907
+ return df[col]
908
+ except KeyError:
909
+ for i, name in enumerate(df.index.names):
910
+ if name == col:
911
+ return df.index.get_level_values(i)
912
+ raise KeyError(col)
736
913
 
737
- def _parallel_unary_union_geoseries(
738
- ser: GeoSeries, n_jobs: int = 1, grid_size=None, **kwargs
739
- ) -> list[Geometry]:
740
- if ser.crs is None:
741
- ser.crs = 25833
742
- _was_none = True
743
- else:
744
- _was_none = False
914
+ try:
915
+ df.index = pd.MultiIndex.from_arrays(
916
+ [cumcount, *[get_col_or_index(df, col) for col in by]]
917
+ )
918
+ except KeyError:
919
+ df.index = pd.MultiIndex.from_arrays([cumcount, by])
920
+
921
+ # to wide format: each row will be one group to be merged to one geometry
922
+ try:
923
+ geoms_wide: pd.DataFrame = df[geom_col].unstack(level=0)
924
+ except Exception as e:
925
+ bb = [*by, geom_col]
926
+ raise e.__class__(e, f"by={by}", df_orig[bb], df[geom_col]) from e
927
+ geometries_2d: NDArray[Polygon | None] = geoms_wide.values
928
+ try:
929
+ geometries_2d = make_valid(geometries_2d)
930
+ except TypeError:
931
+ # make_valid doesn't like nan, so converting to None
932
+ # np.isnan doesn't accept geometry type, so using isinstance
933
+ np_isinstance = np.vectorize(isinstance)
934
+ geometries_2d[np_isinstance(geometries_2d, Geometry) == False] = None
745
935
 
746
- if isinstance(ser.index, pd.MultiIndex):
747
- ser = ser.reset_index(drop=True)
936
+ unioned = make_valid(union_all(geometries_2d, axis=1, **kwargs))
748
937
 
749
- dissolved = (
750
- dask_geopandas.from_geopandas(ser.to_frame("geometry"), npartitions=n_jobs)
751
- .dissolve(**kwargs)
752
- .compute()
753
- )
754
- if _was_none:
755
- dissolved.crs = None
938
+ geoms = GeoSeries(unioned, name=geom_col, index=geoms_wide.index)
756
939
 
757
- return dissolved.geometry
940
+ return geoms if as_index else geoms.reset_index()
758
941
 
759
942
 
760
943
  def _parallel_unary_union(
@@ -769,36 +952,12 @@ def _parallel_unary_union(
769
952
  delayed_operations = []
770
953
  for _, geoms in gdf.groupby(by, **kwargs)[geom_col]:
771
954
  delayed_operations.append(
772
- joblib.delayed(_merge_geometries)(geoms, grid_size=grid_size)
955
+ joblib.delayed(_unary_union_for_notna)(geoms, grid_size=grid_size)
773
956
  )
774
957
 
775
958
  return parallel(delayed_operations)
776
959
 
777
960
 
778
- def _parallel_unary_union_geoseries(
779
- ser: GeoSeries, n_jobs: int = 1, grid_size=None, **kwargs
780
- ) -> list[Geometry]:
781
-
782
- is_one_hit = ser.groupby(**kwargs).transform("size") == 1
783
-
784
- one_hit = ser.loc[is_one_hit]
785
- many_hits = ser.loc[~is_one_hit]
786
-
787
- with joblib.Parallel(n_jobs=n_jobs, backend="threading") as parallel:
788
- delayed_operations = []
789
- for _, geoms in many_hits.groupby(**kwargs):
790
- delayed_operations.append(
791
- joblib.delayed(_merge_geometries)(geoms, grid_size=grid_size)
792
- )
793
-
794
- dissolved = pd.Series(
795
- parallel(delayed_operations),
796
- index=is_one_hit[lambda x: x is False].index.unique(),
797
- )
798
-
799
- return pd.concat([dissolved, one_hit]).sort_index().values
800
-
801
-
802
961
  def _parallel_unary_union_geoseries(
803
962
  ser: GeoSeries, n_jobs: int = 1, grid_size=None, **kwargs
804
963
  ) -> list[Geometry]:
@@ -807,7 +966,7 @@ def _parallel_unary_union_geoseries(
807
966
  delayed_operations = []
808
967
  for _, geoms in ser.groupby(**kwargs):
809
968
  delayed_operations.append(
810
- joblib.delayed(_merge_geometries)(geoms, grid_size=grid_size)
969
+ joblib.delayed(_unary_union_for_notna)(geoms, grid_size=grid_size)
811
970
  )
812
971
 
813
972
  return parallel(delayed_operations)
@@ -138,7 +138,7 @@ def to_single_geom_type(
138
138
 
139
139
  def _shapely_to_single_geom_type(geom: Geometry, geom_type: str) -> Geometry:
140
140
  parts = shapely.get_parts(geom)
141
- return shapely.unary_union(
141
+ return shapely.union_all(
142
142
  [part for part in parts if geom_type.lower() in part.geom_type.lower()]
143
143
  )
144
144
 
@@ -13,6 +13,7 @@ import shapely
13
13
  from geopandas import GeoDataFrame
14
14
  from geopandas import GeoSeries
15
15
  from pandas import DataFrame
16
+ from pandas import MultiIndex
16
17
  from pandas import Series
17
18
  from sklearn.neighbors import NearestNeighbors
18
19
 
@@ -97,29 +98,32 @@ def get_neighbor_indices(
97
98
  ['a' 'a' 'b' 'b']
98
99
 
99
100
  """
101
+ if isinstance(gdf.index, MultiIndex) or isinstance(neighbors.index, MultiIndex):
102
+ raise ValueError("get_neighbor_indices not implemented for pandas.MultiIndex")
100
103
  if gdf.crs != neighbors.crs:
101
104
  raise ValueError(f"'crs' mismatch. Got {gdf.crs} and {neighbors.crs}")
102
105
 
103
106
  if isinstance(neighbors, GeoSeries):
104
107
  neighbors = neighbors.to_frame()
108
+ else:
109
+ neighbors = neighbors[[neighbors._geometry_column_name]]
105
110
 
106
111
  # buffer and keep only geometry column
107
112
  if max_distance and predicate != "nearest":
108
- gdf = gdf.buffer(max_distance).to_frame()
113
+ gdf = gdf.buffer(max_distance).to_frame("geometry")
109
114
  else:
110
- gdf = gdf.geometry.to_frame()
115
+ gdf = gdf.geometry.to_frame("geometry")
116
+
117
+ neighbors.index.name = None
118
+ gdf.index.name = None
111
119
 
112
120
  if predicate == "nearest":
113
121
  max_distance = None if max_distance == 0 else max_distance
114
- joined = gdf.sjoin_nearest(
115
- neighbors, how="inner", max_distance=max_distance
116
- ).rename(columns={"index_right": "neighbor_index"}, errors="raise")
122
+ joined = gdf.sjoin_nearest(neighbors, how="inner", max_distance=max_distance)
117
123
  else:
118
- joined = gdf.sjoin(neighbors, how="inner", predicate=predicate).rename(
119
- columns={"index_right": "neighbor_index"}, errors="raise"
120
- )
124
+ joined = gdf.sjoin(neighbors, how="inner", predicate=predicate)
121
125
 
122
- return joined["neighbor_index"]
126
+ return joined.rename(columns={"index_right": "neighbor_index"})["neighbor_index"]
123
127
 
124
128
 
125
129
  def get_neighbor_dfs(
@@ -469,6 +473,6 @@ def _get_edges(
469
473
  Returns:
470
474
  A 2d numpy array of edges (from-to indices).
471
475
  """
472
- return np.array(
473
- [[(i, neighbor) for neighbor in indices[i]] for i in range(len(gdf))]
474
- )
476
+ row_indices = np.arange(len(indices)).reshape(-1, 1)
477
+
478
+ return np.stack((np.broadcast_to(row_indices, indices.shape), indices), axis=-1)
@@ -11,7 +11,6 @@ version of the solution from GH 2792.
11
11
  import functools
12
12
  from collections.abc import Callable
13
13
 
14
- import dask.array as da
15
14
  import geopandas as gpd
16
15
  import joblib
17
16
  import numpy as np
@@ -28,6 +27,11 @@ from shapely import make_valid
28
27
  from shapely import unary_union
29
28
  from shapely.errors import GEOSException
30
29
 
30
+ try:
31
+ import dask.array as da
32
+ except ImportError:
33
+ pass
34
+
31
35
  from .general import _determine_geom_type_args
32
36
  from .general import clean_geoms
33
37
  from .geometry_types import get_geom_type
@@ -238,8 +242,8 @@ def _shapely_pd_overlay(
238
242
  left, right = tree.query(df1.geometry.values, predicate=predicate)
239
243
 
240
244
  pairs = _get_intersects_pairs(df1, df2, left, right, rsuffix)
241
- assert pairs.geometry.notna().all()
242
- assert pairs.geom_right.notna().all()
245
+ assert pairs.geometry.notna().all(), pairs.geometry
246
+ assert pairs.geom_right.notna().all(), pairs.geom_right
243
247
 
244
248
  if how == "intersection":
245
249
  overlayed = [