ssb-sgis 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. sgis/__init__.py +20 -9
  2. sgis/debug_config.py +24 -0
  3. sgis/exceptions.py +2 -2
  4. sgis/geopandas_tools/bounds.py +33 -36
  5. sgis/geopandas_tools/buffer_dissolve_explode.py +136 -35
  6. sgis/geopandas_tools/centerlines.py +4 -91
  7. sgis/geopandas_tools/cleaning.py +1576 -583
  8. sgis/geopandas_tools/conversion.py +38 -19
  9. sgis/geopandas_tools/duplicates.py +29 -8
  10. sgis/geopandas_tools/general.py +263 -100
  11. sgis/geopandas_tools/geometry_types.py +4 -4
  12. sgis/geopandas_tools/neighbors.py +19 -15
  13. sgis/geopandas_tools/overlay.py +2 -2
  14. sgis/geopandas_tools/point_operations.py +5 -5
  15. sgis/geopandas_tools/polygon_operations.py +510 -105
  16. sgis/geopandas_tools/polygons_as_rings.py +40 -8
  17. sgis/geopandas_tools/sfilter.py +29 -12
  18. sgis/helpers.py +3 -3
  19. sgis/io/dapla_functions.py +238 -19
  20. sgis/io/read_parquet.py +1 -1
  21. sgis/maps/examine.py +27 -12
  22. sgis/maps/explore.py +450 -65
  23. sgis/maps/legend.py +177 -76
  24. sgis/maps/map.py +206 -103
  25. sgis/maps/maps.py +178 -105
  26. sgis/maps/thematicmap.py +243 -83
  27. sgis/networkanalysis/_service_area.py +6 -1
  28. sgis/networkanalysis/closing_network_holes.py +2 -2
  29. sgis/networkanalysis/cutting_lines.py +15 -8
  30. sgis/networkanalysis/directednetwork.py +1 -1
  31. sgis/networkanalysis/finding_isolated_networks.py +15 -8
  32. sgis/networkanalysis/networkanalysis.py +17 -19
  33. sgis/networkanalysis/networkanalysisrules.py +1 -1
  34. sgis/networkanalysis/traveling_salesman.py +1 -1
  35. sgis/parallel/parallel.py +64 -27
  36. sgis/raster/__init__.py +0 -6
  37. sgis/raster/base.py +208 -0
  38. sgis/raster/cube.py +54 -8
  39. sgis/raster/image_collection.py +3257 -0
  40. sgis/raster/indices.py +17 -5
  41. sgis/raster/raster.py +138 -243
  42. sgis/raster/sentinel_config.py +120 -0
  43. sgis/raster/zonal.py +0 -1
  44. {ssb_sgis-1.0.2.dist-info → ssb_sgis-1.0.4.dist-info}/METADATA +6 -7
  45. ssb_sgis-1.0.4.dist-info/RECORD +62 -0
  46. sgis/raster/methods_as_functions.py +0 -0
  47. sgis/raster/torchgeo.py +0 -171
  48. ssb_sgis-1.0.2.dist-info/RECORD +0 -61
  49. {ssb_sgis-1.0.2.dist-info → ssb_sgis-1.0.4.dist-info}/LICENSE +0 -0
  50. {ssb_sgis-1.0.2.dist-info → ssb_sgis-1.0.4.dist-info}/WHEEL +0 -0
@@ -90,8 +90,10 @@ def to_shapely(obj: Any) -> Geometry:
90
90
  return obj
91
91
  if not hasattr(obj, "__iter__"):
92
92
  raise TypeError(type(obj))
93
- if hasattr(obj, "unary_union"):
94
- return obj.unary_union
93
+ try:
94
+ return shapely.union_all(obj.geometry.values)
95
+ except AttributeError:
96
+ pass
95
97
  try:
96
98
  return Point(*obj)
97
99
  except TypeError:
@@ -108,6 +110,7 @@ def to_shapely(obj: Any) -> Geometry:
108
110
  return shapely.wkb.loads(obj)
109
111
  except TypeError:
110
112
  pass
113
+ raise TypeError(type(obj), obj)
111
114
 
112
115
 
113
116
  def to_bbox(
@@ -122,25 +125,41 @@ def to_bbox(
122
125
  "xmin", "ymin", "xmax", "ymax".
123
126
  """
124
127
  if isinstance(obj, (GeoDataFrame, GeoSeries)):
125
- return tuple(obj.total_bounds)
126
- if isinstance(obj, Geometry):
127
- return tuple(obj.bounds)
128
+ bounds = tuple(obj.total_bounds)
129
+ assert isinstance(bounds, tuple)
130
+ return bounds
131
+ try:
132
+ bounds = tuple(obj.bounds)
133
+ assert isinstance(bounds, tuple)
134
+ return bounds
135
+ except Exception:
136
+ pass
128
137
 
129
138
  try:
130
- minx = int(np.min(obj["minx"])) # type: ignore [index]
131
- miny = int(np.min(obj["miny"])) # type: ignore [index]
132
- maxx = int(np.max(obj["maxx"])) # type: ignore [index]
133
- maxy = int(np.max(obj["maxy"])) # type: ignore [index]
139
+ minx = float(np.min(obj["minx"])) # type: ignore [index]
140
+ miny = float(np.min(obj["miny"])) # type: ignore [index]
141
+ maxx = float(np.max(obj["maxx"])) # type: ignore [index]
142
+ maxy = float(np.max(obj["maxy"])) # type: ignore [index]
134
143
  return minx, miny, maxx, maxy
135
144
  except Exception:
136
- try:
137
- minx = int(np.min(obj.minx)) # type: ignore [union-attr]
138
- miny = int(np.min(obj.miny)) # type: ignore [union-attr]
139
- maxx = int(np.max(obj.maxx)) # type: ignore [union-attr]
140
- maxy = int(np.max(obj.maxy)) # type: ignore [union-attr]
141
- return minx, miny, maxx, maxy
142
- except Exception:
143
- pass
145
+ pass
146
+ try:
147
+ minx = float(np.min(obj.minx)) # type: ignore [union-attr]
148
+ miny = float(np.min(obj.miny)) # type: ignore [union-attr]
149
+ maxx = float(np.max(obj.maxx)) # type: ignore [union-attr]
150
+ maxy = float(np.max(obj.maxy)) # type: ignore [union-attr]
151
+ return minx, miny, maxx, maxy
152
+ except Exception:
153
+ pass
154
+
155
+ try:
156
+ minx = float(np.min(obj["west_longitude"])) # type: ignore [index]
157
+ miny = float(np.min(obj["south_latitude"])) # type: ignore [index]
158
+ maxx = float(np.max(obj["east_longitude"])) # type: ignore [index]
159
+ maxy = float(np.max(obj["north_latitude"])) # type: ignore [index]
160
+ return minx, miny, maxx, maxy
161
+ except Exception:
162
+ pass
144
163
 
145
164
  if hasattr(obj, "geometry"):
146
165
  try:
@@ -195,7 +214,7 @@ def coordinate_array(
195
214
  np.ndarray of np.ndarrays of coordinates.
196
215
 
197
216
  Examples:
198
- --------
217
+ ---------
199
218
  >>> import sgis as sg
200
219
  >>> points = sg.to_gdf(
201
220
  ... [
@@ -279,7 +298,7 @@ def to_gdf(
279
298
  A GeoDataFrame with one column, the geometry column.
280
299
 
281
300
  Examples:
282
- --------
301
+ ---------
283
302
  >>> import sgis as sg
284
303
  >>> coords = (10, 60)
285
304
  >>> sg.to_gdf(coords, crs=4326)
@@ -8,10 +8,10 @@ from shapely import STRtree
8
8
  from shapely import difference
9
9
  from shapely import make_valid
10
10
  from shapely import simplify
11
- from shapely import unary_union
12
11
  from shapely.errors import GEOSException
13
12
 
14
13
  from .general import _determine_geom_type_args
14
+ from .general import _grouped_unary_union
15
15
  from .general import _parallel_unary_union_geoseries
16
16
  from .general import _push_geom_col
17
17
  from .general import clean_geoms
@@ -54,7 +54,7 @@ def update_geometries(
54
54
  predicate: Spatial predicate for the spatial tree.
55
55
 
56
56
  Example:
57
- ------
57
+ --------
58
58
  Create two circles and get the overlap.
59
59
 
60
60
  >>> import sgis as sg
@@ -125,10 +125,8 @@ def update_geometries(
125
125
  else:
126
126
  only_one = erasers.groupby(level=0).transform("size") == 1
127
127
  one_hit = erasers[only_one]
128
- many_hits = (
129
- erasers[~only_one]
130
- .groupby(level=0)
131
- .agg(lambda x: make_valid(unary_union(x, grid_size=grid_size)))
128
+ many_hits = _grouped_unary_union(
129
+ erasers[~only_one], level=0, grid_size=grid_size
132
130
  )
133
131
  erasers = pd.concat([one_hit, many_hits]).sort_index()
134
132
 
@@ -213,7 +211,7 @@ def get_intersections(
213
211
  A GeoDataFrame of the overlapping polygons.
214
212
 
215
213
  Examples:
216
- --------
214
+ ---------
217
215
  Create three partially overlapping polygons.
218
216
 
219
217
  >>> import sgis as sg
@@ -357,10 +355,33 @@ def _get_intersecting_geometries(
357
355
 
358
356
  duplicated_points = points_joined.loc[points_joined.index.duplicated(keep=False)]
359
357
 
360
- return intersected.loc[intersected.index.isin(duplicated_points.index)].drop(
358
+ out = intersected.loc[intersected.index.isin(duplicated_points.index)].drop(
361
359
  columns=["idx_left", "idx_right"]
362
360
  )
363
361
 
362
+ # some polygons within polygons are not counted in the
363
+ within = (
364
+ gdf.assign(_range_idx_inters_left=lambda x: range(len(x)))
365
+ .sjoin(
366
+ GeoDataFrame(
367
+ {
368
+ "geometry": gdf.buffer(1e-6).values,
369
+ "_range_idx_inters_right": range(len(gdf)),
370
+ },
371
+ crs=gdf.crs,
372
+ ),
373
+ how="inner",
374
+ predicate="within",
375
+ )
376
+ .loc[lambda x: x["_range_idx_inters_left"] != x["_range_idx_inters_right"]]
377
+ .drop(
378
+ columns=["index_right", "_range_idx_inters_left", "_range_idx_inters_right"]
379
+ )
380
+ .pipe(sfilter_inverse, out.buffer(-PRECISION))
381
+ )
382
+
383
+ return pd.concat([out, within])
384
+
364
385
 
365
386
  def _drop_duplicate_geometries(gdf: GeoDataFrame, **kwargs) -> GeoDataFrame:
366
387
  """Drop geometries that are considered equal.
@@ -1,20 +1,23 @@
1
+ import functools
2
+ import itertools
1
3
  import numbers
2
4
  import warnings
3
5
  from collections.abc import Hashable
4
6
  from collections.abc import Iterable
5
7
  from typing import Any
6
8
 
7
- import dask_geopandas
8
9
  import joblib
9
10
  import numpy as np
10
11
  import pandas as pd
11
12
  import pyproj
13
+ import shapely
12
14
  from geopandas import GeoDataFrame
13
15
  from geopandas import GeoSeries
14
16
  from geopandas.array import GeometryArray
15
17
  from geopandas.array import GeometryDtype
16
18
  from numpy.typing import NDArray
17
19
  from shapely import Geometry
20
+ from shapely import extract_unique_points
18
21
  from shapely import get_coordinates
19
22
  from shapely import get_exterior_ring
20
23
  from shapely import get_interior_ring
@@ -23,10 +26,16 @@ from shapely import get_parts
23
26
  from shapely import linestrings
24
27
  from shapely import make_valid
25
28
  from shapely import points as shapely_points
26
- from shapely import unary_union
29
+ from shapely import union_all
27
30
  from shapely.geometry import LineString
31
+ from shapely.geometry import MultiPoint
28
32
  from shapely.geometry import Point
33
+ from shapely.geometry import Polygon
29
34
 
35
+ from .conversion import coordinate_array
36
+ from .conversion import to_bbox
37
+ from .conversion import to_gdf
38
+ from .conversion import to_geoseries
30
39
  from .geometry_types import get_geom_type
31
40
  from .geometry_types import make_all_singlepart
32
41
  from .geometry_types import to_single_geom_type
@@ -34,7 +43,7 @@ from .geometry_types import to_single_geom_type
34
43
 
35
44
  def split_geom_types(gdf: GeoDataFrame | GeoSeries) -> tuple[GeoDataFrame | GeoSeries]:
36
45
  return tuple(
37
- gdf.loc[gdf.geom_type == geom_type] for geom_type in gdf.geom_type.unique()
46
+ gdf[gdf.geom_type == geom_type] for geom_type in gdf.geom_type.unique()
38
47
  )
39
48
 
40
49
 
@@ -164,7 +173,7 @@ def clean_geoms(
164
173
  non-empty and not-NaN/-None geometries.
165
174
 
166
175
  Examples:
167
- --------
176
+ ---------
168
177
  >>> import sgis as sg
169
178
  >>> import pandas as pd
170
179
  >>> from shapely import wkt
@@ -281,7 +290,7 @@ def sort_large_first(gdf: GeoDataFrame | GeoSeries) -> GeoDataFrame | GeoSeries:
281
290
  A GeoDataFrame or GeoSeries sorted from large to small in area.
282
291
 
283
292
  Examples:
284
- --------
293
+ ---------
285
294
  Create GeoDataFrame with NaN values.
286
295
 
287
296
  >>> import sgis as sg
@@ -381,35 +390,27 @@ def sort_small_first(gdf: GeoDataFrame | GeoSeries) -> GeoDataFrame | GeoSeries:
381
390
 
382
391
 
383
392
  def make_lines_between_points(
384
- arr1: NDArray[Point] | GeometryArray | GeoSeries,
385
- arr2: NDArray[Point] | GeometryArray | GeoSeries,
393
+ *arrs: NDArray[Point] | GeometryArray | GeoSeries,
386
394
  ) -> NDArray[LineString]:
387
- """Creates an array of linestrings from two arrays of points.
395
+ """Creates an array of linestrings from two or more arrays of points.
388
396
 
389
- The operation is done rowwise.
397
+ The lines are created rowwise, meaning from arr0[0] to arr1[0], from arr0[1] to arr1[1]...
398
+ If more than two arrays are passed, e.g. three arrays,
399
+ the lines will go from arr0[0] via arr1[0] to arr2[0].
390
400
 
391
401
  Args:
392
- arr1: GeometryArray og GeoSeries of points.
393
- arr2: GeometryArray og GeoSeries of points of same length as arr1.
402
+ arrs: 1 dimensional arrays of point geometries.
403
+ All arrays must have the same shape.
404
+ Must be at least two arrays.
394
405
 
395
406
  Returns:
396
407
  A numpy array of linestrings.
397
408
 
398
- Raises:
399
- ValueError: If the arrays have unequal shape.
400
-
401
409
  """
402
- if arr1.shape != arr2.shape:
403
- raise ValueError("Arrays must have equal shape.")
404
-
405
- coords: pd.DataFrame = pd.concat(
406
- [
407
- pd.DataFrame(get_coordinates(arr1), columns=["x", "y"]),
408
- pd.DataFrame(get_coordinates(arr2), columns=["x", "y"]),
409
- ]
410
- ).sort_index()
411
-
412
- return linestrings(coords.values, indices=coords.index)
410
+ coords = [get_coordinates(arr, return_index=False) for arr in arrs]
411
+ return linestrings(
412
+ np.concatenate([coords_arr[:, None, :] for coords_arr in coords], axis=1)
413
+ )
413
414
 
414
415
 
415
416
  def random_points(n: int, loc: float | int = 0.5) -> GeoDataFrame:
@@ -423,7 +424,7 @@ def random_points(n: int, loc: float | int = 0.5) -> GeoDataFrame:
423
424
  A GeoDataFrame of points with n rows.
424
425
 
425
426
  Examples:
426
- --------
427
+ ---------
427
428
  >>> import sgis as sg
428
429
  >>> points = sg.random_points(10_000)
429
430
  >>> points
@@ -523,7 +524,7 @@ def to_lines(*gdfs: GeoDataFrame, copy: bool = True) -> GeoDataFrame:
523
524
  always ignores the index.
524
525
 
525
526
  Examples:
526
- --------
527
+ ---------
527
528
  Convert single polygon to linestring.
528
529
 
529
530
  >>> import sgis as sg
@@ -559,7 +560,9 @@ def to_lines(*gdfs: GeoDataFrame, copy: bool = True) -> GeoDataFrame:
559
560
  raise TypeError("gdf must be GeoDataFrame or GeoSeries")
560
561
 
561
562
  if any(gdf.geom_type.isin(["Point", "MultiPoint"]).any() for gdf in gdfs):
562
- raise ValueError("Cannot convert points to lines.")
563
+ raise ValueError(
564
+ f"Cannot convert points to lines. {[gdf.geom_type.value_counts() for gdf in gdfs]}"
565
+ )
563
566
 
564
567
  def _shapely_geometry_to_lines(geom):
565
568
  """Get all lines from the exterior and interiors of a Polygon."""
@@ -583,7 +586,7 @@ def to_lines(*gdfs: GeoDataFrame, copy: bool = True) -> GeoDataFrame:
583
586
 
584
587
  lines += interior_rings
585
588
 
586
- return unary_union(lines)
589
+ return union_all(lines)
587
590
 
588
591
  lines = []
589
592
  for gdf in gdfs:
@@ -673,6 +676,162 @@ def clean_clip(
673
676
  return gdf
674
677
 
675
678
 
679
+ def extend_lines(arr1, arr2, distance) -> NDArray[LineString]:
680
+ if len(arr1) != len(arr2):
681
+ raise ValueError
682
+ if not len(arr1):
683
+ return arr1
684
+
685
+ arr1, arr2 = arr2, arr1 # TODO fix
686
+
687
+ coords1 = coordinate_array(arr1)
688
+ coords2 = coordinate_array(arr2)
689
+
690
+ dx = coords2[:, 0] - coords1[:, 0]
691
+ dy = coords2[:, 1] - coords1[:, 1]
692
+ len_xy = np.sqrt((dx**2.0) + (dy**2.0))
693
+ x = coords1[:, 0] + (coords1[:, 0] - coords2[:, 0]) / len_xy * distance
694
+ y = coords1[:, 1] + (coords1[:, 1] - coords2[:, 1]) / len_xy * distance
695
+
696
+ new_points = np.array([None for _ in range(len(arr1))])
697
+ new_points[~np.isnan(x)] = shapely.points(x[~np.isnan(x)], y[~np.isnan(x)])
698
+
699
+ new_points[~np.isnan(x)] = make_lines_between_points(
700
+ arr2[~np.isnan(x)], new_points[~np.isnan(x)]
701
+ )
702
+ return new_points
703
+
704
+
705
+ def multipoints_to_line_segments_numpy(
706
+ points: GeoSeries | NDArray[MultiPoint] | MultiPoint,
707
+ cycle: bool = False,
708
+ ) -> list[LineString]:
709
+ try:
710
+ arr = get_parts(points.geometry.values)
711
+ except AttributeError:
712
+ arr = get_parts(points)
713
+
714
+ line_between_last_and_first = [LineString([arr[-1], arr[0]])] if cycle else []
715
+ return [
716
+ LineString([p0, p1]) for p0, p1 in itertools.pairwise(arr)
717
+ ] + line_between_last_and_first
718
+
719
+
720
+ def multipoints_to_line_segments(
721
+ multipoints: GeoSeries | GeoDataFrame, cycle: bool = True # to_next: bool = True,
722
+ ) -> GeoSeries | GeoDataFrame:
723
+
724
+ if not len(multipoints):
725
+ return multipoints
726
+
727
+ if isinstance(multipoints, GeoDataFrame):
728
+ df = multipoints.drop(columns=multipoints.geometry.name)
729
+ multipoints = multipoints.geometry
730
+ was_gdf = True
731
+ else:
732
+ multipoints = to_geoseries(multipoints)
733
+ was_gdf = False
734
+
735
+ multipoints = to_geoseries(multipoints)
736
+
737
+ segs = pd.Series(
738
+ [
739
+ multipoints_to_line_segments_numpy(geoms, cycle=cycle)
740
+ for geoms in multipoints
741
+ ],
742
+ index=multipoints.index,
743
+ ).explode()
744
+
745
+ segs = GeoSeries(segs, crs=multipoints.crs, name=multipoints.name)
746
+
747
+ if was_gdf:
748
+ return GeoDataFrame(df.join(segs), geometry=segs.name, crs=segs.crs)
749
+ else:
750
+ return segs
751
+
752
+
753
+ def get_line_segments(
754
+ lines: GeoDataFrame | GeoSeries, extract_unique: bool = False, cycle=False
755
+ ) -> GeoDataFrame:
756
+ try:
757
+ assert lines.index.is_unique
758
+ except AttributeError:
759
+ pass
760
+
761
+ if isinstance(lines, GeoDataFrame):
762
+ df = lines.drop(columns=lines.geometry.name)
763
+ lines = lines.geometry
764
+ was_gdf = True
765
+ else:
766
+ lines = to_geoseries(lines)
767
+ was_gdf = False
768
+
769
+ partial_segs_func = functools.partial(
770
+ multipoints_to_line_segments_numpy, cycle=cycle
771
+ )
772
+ if extract_unique:
773
+ points = extract_unique_points(lines.geometry.values)
774
+ segs = pd.Series(
775
+ [partial_segs_func(geoms) for geoms in points],
776
+ index=lines.index,
777
+ ).explode()
778
+ else:
779
+ coords, indices = shapely.get_coordinates(lines, return_index=True)
780
+ points = GeoSeries(shapely.points(coords), index=indices)
781
+ index_mapper = {
782
+ i: idx
783
+ for i, idx in zip(
784
+ np.unique(indices), lines.index.drop_duplicates(), strict=True
785
+ )
786
+ }
787
+ points.index = points.index.map(index_mapper)
788
+
789
+ segs = points.groupby(level=0).agg(partial_segs_func).explode()
790
+ segs = GeoSeries(segs, crs=lines.crs, name=lines.name)
791
+
792
+ if was_gdf:
793
+ return GeoDataFrame(df.join(segs), geometry=segs.name, crs=lines.crs)
794
+ else:
795
+ return segs
796
+
797
+
798
+ def get_index_right_columns(gdf: pd.DataFrame | pd.Series) -> list[str]:
799
+ """Get a list of what will be the resulting columns in an sjoin."""
800
+ if gdf.index.name is None and all(name is None for name in gdf.index.names):
801
+ if gdf.index.nlevels == 1:
802
+ return ["index_right"]
803
+ else:
804
+ return [f"index_right{i}" for i in range(gdf.index.nlevels)]
805
+ else:
806
+ return gdf.index.names
807
+
808
+
809
+ def points_in_bounds(
810
+ gdf: GeoDataFrame | GeoSeries, gridsize: int | float
811
+ ) -> GeoDataFrame:
812
+ """Get a GeoDataFrame of points within the bounds of the GeoDataFrame."""
813
+ minx, miny, maxx, maxy = to_bbox(gdf)
814
+ try:
815
+ crs = gdf.crs
816
+ except AttributeError:
817
+ crs = None
818
+
819
+ xs = np.linspace(minx, maxx, num=int((maxx - minx) / gridsize))
820
+ ys = np.linspace(miny, maxy, num=int((maxy - miny) / gridsize))
821
+ x_coords, y_coords = np.meshgrid(xs, ys, indexing="ij")
822
+ coords = np.concatenate((x_coords.reshape(-1, 1), y_coords.reshape(-1, 1)), axis=1)
823
+ return to_gdf(coords, crs=crs)
824
+
825
+
826
+ def points_in_polygons(
827
+ gdf: GeoDataFrame | GeoSeries, gridsize: int | float
828
+ ) -> GeoDataFrame:
829
+ index_right_col = get_index_right_columns(gdf)
830
+ out = points_in_bounds(gdf, gridsize).sjoin(gdf).set_index(index_right_col)
831
+ out.index.name = gdf.index.name
832
+ return out.sort_index()
833
+
834
+
676
835
  def _determine_geom_type_args(
677
836
  gdf: GeoDataFrame, geom_type: str | None, keep_geom_type: bool | None
678
837
  ) -> tuple[GeoDataFrame, str, bool]:
@@ -692,65 +851,93 @@ def _determine_geom_type_args(
692
851
  return gdf, geom_type, keep_geom_type
693
852
 
694
853
 
695
- def _merge_geometries(geoms: GeoSeries, grid_size=None) -> Geometry:
696
- return make_valid(unary_union(geoms, grid_size=grid_size))
854
+ def _unary_union_for_notna(geoms, **kwargs):
855
+ try:
856
+ return make_valid(union_all(geoms, **kwargs))
857
+ except TypeError:
858
+ return union_all([geom for geom in geoms.dropna().values], **kwargs)
859
+
860
+
861
+ def _grouped_unary_union(
862
+ df: GeoDataFrame | GeoSeries | pd.DataFrame | pd.Series,
863
+ by: str | list[str] | None = None,
864
+ level: int | None = None,
865
+ as_index: bool = True,
866
+ grid_size: float | int | None = None,
867
+ dropna: bool = False,
868
+ **kwargs,
869
+ ) -> GeoSeries | GeoDataFrame:
870
+ """Vectorized unary_union for groups.
697
871
 
872
+ Experimental. Messy code.
873
+ """
874
+ df = df.copy()
875
+ df_orig = df.copy()
698
876
 
699
- def _parallel_unary_union(
700
- gdf: GeoDataFrame, n_jobs: int = 1, by=None, grid_size=None, **kwargs
701
- ) -> list[Geometry]:
702
877
  try:
703
- geom_col = gdf._geometry_column_name
878
+ geom_col = df._geometry_column_name
704
879
  except AttributeError:
705
- geom_col = "geometry"
706
-
707
- if by is not None and not isinstance(by, str):
708
- gdf = gdf.copy()
709
880
  try:
710
- gdf["_by"] = gdf[by].astype(str).agg("-".join, axis=1)
711
- except KeyError:
712
- gdf["_by"] = by
713
- by = "_by"
881
+ geom_col = df.name
882
+ if geom_col is None:
883
+ geom_col = "geometry"
884
+ except AttributeError:
885
+ geom_col = "geometry"
714
886
 
715
- if gdf.crs is None:
716
- gdf.crs = 25833
717
- _was_none = True
718
- else:
719
- _was_none = False
887
+ if not len(df):
888
+ return GeoSeries(name=geom_col)
720
889
 
721
- if isinstance(gdf.index, pd.MultiIndex):
722
- gdf = gdf.reset_index(drop=True)
890
+ if isinstance(df, pd.Series):
891
+ df.name = geom_col
892
+ original_index = df.index
893
+ df = df.reset_index()
894
+ df.index = original_index
723
895
 
724
- dissolved = (
725
- dask_geopandas.from_geopandas(gdf, npartitions=n_jobs).dissolve(by).compute()
726
- )
727
- if _was_none:
728
- dissolved.crs = None
896
+ if isinstance(by, str):
897
+ by = [by]
898
+ elif by is None and level is None:
899
+ raise TypeError("You have to supply one of 'by' and 'level'")
900
+ elif by is None:
901
+ by = df.index.get_level_values(level)
729
902
 
730
- return dissolved.geometry
903
+ cumcount = df.groupby(by, dropna=dropna).cumcount().values
731
904
 
905
+ def get_col_or_index(df, col: str) -> pd.Series | pd.Index:
906
+ try:
907
+ return df[col]
908
+ except KeyError:
909
+ for i, name in enumerate(df.index.names):
910
+ if name == col:
911
+ return df.index.get_level_values(i)
912
+ raise KeyError(col)
732
913
 
733
- def _parallel_unary_union_geoseries(
734
- ser: GeoSeries, n_jobs: int = 1, grid_size=None, **kwargs
735
- ) -> list[Geometry]:
736
- if ser.crs is None:
737
- ser.crs = 25833
738
- _was_none = True
739
- else:
740
- _was_none = False
914
+ try:
915
+ df.index = pd.MultiIndex.from_arrays(
916
+ [cumcount, *[get_col_or_index(df, col) for col in by]]
917
+ )
918
+ except KeyError:
919
+ df.index = pd.MultiIndex.from_arrays([cumcount, by])
920
+
921
+ # to wide format: each row will be one group to be merged to one geometry
922
+ try:
923
+ geoms_wide: pd.DataFrame = df[geom_col].unstack(level=0)
924
+ except Exception as e:
925
+ bb = [*by, geom_col]
926
+ raise e.__class__(e, f"by={by}", df_orig[bb], df[geom_col]) from e
927
+ geometries_2d: NDArray[Polygon | None] = geoms_wide.values
928
+ try:
929
+ geometries_2d = make_valid(geometries_2d)
930
+ except TypeError:
931
+ # make_valid doesn't like nan, so converting to None
932
+ # np.isnan doesn't accept geometry type, so using isinstance
933
+ np_isinstance = np.vectorize(isinstance)
934
+ geometries_2d[np_isinstance(geometries_2d, Geometry) == False] = None
741
935
 
742
- if isinstance(ser.index, pd.MultiIndex):
743
- ser = ser.reset_index(drop=True)
936
+ unioned = make_valid(union_all(geometries_2d, axis=1, **kwargs))
744
937
 
745
- dissolved = (
746
- dask_geopandas.from_geopandas(ser.to_frame("geometry"), npartitions=n_jobs)
747
- .dissolve(**kwargs)
748
- .compute()
749
- )
750
- if _was_none:
751
- dissolved.crs = None
938
+ geoms = GeoSeries(unioned, name=geom_col, index=geoms_wide.index)
752
939
 
753
- return dissolved.geometry
940
+ return geoms if as_index else geoms.reset_index()
754
941
 
755
942
 
756
943
  def _parallel_unary_union(
@@ -765,36 +952,12 @@ def _parallel_unary_union(
765
952
  delayed_operations = []
766
953
  for _, geoms in gdf.groupby(by, **kwargs)[geom_col]:
767
954
  delayed_operations.append(
768
- joblib.delayed(_merge_geometries)(geoms, grid_size=grid_size)
955
+ joblib.delayed(_unary_union_for_notna)(geoms, grid_size=grid_size)
769
956
  )
770
957
 
771
958
  return parallel(delayed_operations)
772
959
 
773
960
 
774
- def _parallel_unary_union_geoseries(
775
- ser: GeoSeries, n_jobs: int = 1, grid_size=None, **kwargs
776
- ) -> list[Geometry]:
777
-
778
- is_one_hit = ser.groupby(**kwargs).transform("size") == 1
779
-
780
- one_hit = ser.loc[is_one_hit]
781
- many_hits = ser.loc[~is_one_hit]
782
-
783
- with joblib.Parallel(n_jobs=n_jobs, backend="threading") as parallel:
784
- delayed_operations = []
785
- for _, geoms in many_hits.groupby(**kwargs):
786
- delayed_operations.append(
787
- joblib.delayed(_merge_geometries)(geoms, grid_size=grid_size)
788
- )
789
-
790
- dissolved = pd.Series(
791
- parallel(delayed_operations),
792
- index=is_one_hit[lambda x: x is False].index.unique(),
793
- )
794
-
795
- return pd.concat([dissolved, one_hit]).sort_index().values
796
-
797
-
798
961
  def _parallel_unary_union_geoseries(
799
962
  ser: GeoSeries, n_jobs: int = 1, grid_size=None, **kwargs
800
963
  ) -> list[Geometry]:
@@ -803,7 +966,7 @@ def _parallel_unary_union_geoseries(
803
966
  delayed_operations = []
804
967
  for _, geoms in ser.groupby(**kwargs):
805
968
  delayed_operations.append(
806
- joblib.delayed(_merge_geometries)(geoms, grid_size=grid_size)
969
+ joblib.delayed(_unary_union_for_notna)(geoms, grid_size=grid_size)
807
970
  )
808
971
 
809
972
  return parallel(delayed_operations)