ssb-sgis 0.3.13__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,14 +7,27 @@ import geopandas as gpd
7
7
  import numpy as np
8
8
  import pandas as pd
9
9
  import pyproj
10
+ import rasterio
10
11
  import shapely
12
+ from affine import Affine
11
13
  from geopandas import GeoDataFrame, GeoSeries
12
14
  from pandas.api.types import is_array_like, is_dict_like, is_list_like
15
+ from pyproj import CRS
16
+ from rasterio import features
13
17
  from shapely import Geometry, box, wkb, wkt
14
- from shapely.geometry import Point
18
+ from shapely.errors import GEOSException
19
+ from shapely.geometry import Point, shape
15
20
  from shapely.ops import unary_union
16
21
 
17
22
 
23
+ try:
24
+ from torchgeo.datasets.geo import RasterDataset
25
+ except ImportError:
26
+
27
+ class RasterDataset:
28
+ """Placeholder"""
29
+
30
+
18
31
  @staticmethod
19
32
  def crs_to_string(crs):
20
33
  if crs is None:
@@ -189,16 +202,18 @@ def coordinate_array(
189
202
 
190
203
 
191
204
  def to_gdf(
192
- obj: Geometry
193
- | str
194
- | bytes
195
- | list
196
- | tuple
197
- | dict
198
- | GeoSeries
199
- | pd.Series
200
- | pd.DataFrame
201
- | Iterator,
205
+ obj: (
206
+ Geometry
207
+ | str
208
+ | bytes
209
+ | list
210
+ | tuple
211
+ | dict
212
+ | GeoSeries
213
+ | pd.Series
214
+ | pd.DataFrame
215
+ | Iterator
216
+ ),
202
217
  crs: str | tuple[str] | None = None,
203
218
  geometry: str | tuple[str] | int | None = None,
204
219
  **kwargs,
@@ -316,6 +331,37 @@ def to_gdf(
316
331
  geom_col = geometry or "geometry"
317
332
  return _geoseries_to_gdf(obj, geom_col, crs, **kwargs)
318
333
 
334
+ if crs is None:
335
+ try:
336
+ crs = obj.crs
337
+ except AttributeError:
338
+ try:
339
+ matches = re.search(r"SRID=(\d+);", obj)
340
+ except TypeError:
341
+ try:
342
+ matches = re.search(r"SRID=(\d+);", obj[0])
343
+ except Exception:
344
+ pass
345
+ try:
346
+ crs = CRS(int("".join(x for x in matches.group(0) if x.isnumeric())))
347
+ except Exception:
348
+ pass
349
+
350
+ if isinstance(obj, RasterDataset):
351
+ # read the entire dataset
352
+ obj = obj[obj.bounds]
353
+ crs = obj["crs"]
354
+ array = np.array(obj["image"])
355
+ transform = get_transform_from_bounds(obj["bbox"], shape=array.shape)
356
+ return gpd.GeoDataFrame(
357
+ pd.DataFrame(
358
+ _array_to_geojson(array, transform),
359
+ columns=["value", "geometry"],
360
+ ),
361
+ geometry="geometry",
362
+ crs=crs,
363
+ )
364
+
319
365
  if is_array_like(geometry) and len(geometry) == len(obj):
320
366
  geometry = GeoSeries(
321
367
  _make_one_shapely_geom(g) for g in geometry if g is not None
@@ -425,6 +471,33 @@ def to_gdf(
425
471
  return GeoDataFrame(geometry=geoseries, crs=crs, **kwargs)
426
472
 
427
473
 
474
+ def _array_to_geojson(array: np.ndarray, transform: Affine):
475
+ try:
476
+ return [
477
+ (value, shape(geom))
478
+ for geom, value in features.shapes(array, transform=transform)
479
+ ]
480
+ except ValueError:
481
+ array = array.astype(np.float32)
482
+ return [
483
+ (value, shape(geom))
484
+ for geom, value in features.shapes(array, transform=transform)
485
+ ]
486
+
487
+
488
+ def get_transform_from_bounds(
489
+ obj: GeoDataFrame | GeoSeries | Geometry | tuple, shape: tuple[float, ...]
490
+ ) -> Affine:
491
+ minx, miny, maxx, maxy = to_bbox(obj)
492
+ if len(shape) == 2:
493
+ width, height = shape
494
+ elif len(shape) == 3:
495
+ _, width, height = shape
496
+ else:
497
+ raise ValueError
498
+ return rasterio.transform.from_bounds(minx, miny, maxx, maxy, width, height)
499
+
500
+
428
501
  def make_shapely_geoms(obj):
429
502
  if _is_one_geometry(obj):
430
503
  return _make_one_shapely_geom(obj)
@@ -583,7 +656,14 @@ def _make_one_shapely_geom(obj):
583
656
  Works recursively if the object is a nested iterable.
584
657
  """
585
658
  if isinstance(obj, str):
586
- return wkt.loads(obj)
659
+ try:
660
+ return wkt.loads(obj)
661
+ except GEOSException:
662
+ if obj.startswith("geography"):
663
+ matches = re.search(r"SRID=(\d+);", obj)
664
+ srid = matches.group(0)
665
+ _, _wkt = obj.split(srid)
666
+ return wkt.loads(_wkt)
587
667
 
588
668
  if isinstance(obj, bytes):
589
669
  return wkb.loads(obj)
@@ -6,10 +6,18 @@ from geopandas import GeoDataFrame, GeoSeries
6
6
  from shapely import STRtree, difference, make_valid, simplify, unary_union
7
7
  from shapely.errors import GEOSException
8
8
 
9
- from .buffer_dissolve_explode import parallel_unary_union_geoseries
10
- from .general import _determine_geom_type_args, _push_geom_col, clean_geoms
9
+ from .general import (
10
+ _determine_geom_type_args,
11
+ _push_geom_col,
12
+ clean_geoms,
13
+ parallel_unary_union_geoseries,
14
+ )
11
15
  from .geometry_types import get_geom_type, make_all_singlepart, to_single_geom_type
12
16
  from .overlay import _run_overlay_dask, clean_overlay, make_valid_and_keep_geom_type
17
+ from .sfilter import sfilter_inverse, sfilter_split
18
+
19
+
20
+ PRECISION = 1e-3
13
21
 
14
22
 
15
23
  def update_geometries(
@@ -18,6 +26,7 @@ def update_geometries(
18
26
  keep_geom_type: bool | None = None,
19
27
  grid_size: int | None = None,
20
28
  n_jobs: int = 1,
29
+ predicate: str | None = "intersects",
21
30
  ) -> GeoDataFrame:
22
31
  """Puts geometries on top of each other rowwise.
23
32
 
@@ -81,40 +90,43 @@ def update_geometries(
81
90
  if len(gdf) <= 1:
82
91
  return gdf
83
92
 
84
- gdf = make_all_singlepart(clean_geoms(gdf))
93
+ copied = make_all_singlepart(clean_geoms(gdf))
85
94
 
86
- gdf, geom_type, keep_geom_type = _determine_geom_type_args(
87
- gdf, geom_type, keep_geom_type
95
+ copied, geom_type, keep_geom_type = _determine_geom_type_args(
96
+ copied, geom_type, keep_geom_type
88
97
  )
89
98
 
90
- geom_col = gdf._geometry_column_name
91
- index_mapper = {i: idx for i, idx in enumerate(gdf.index)}
92
- gdf = gdf.reset_index(drop=True)
99
+ geom_col = copied._geometry_column_name
100
+ index_mapper = {i: idx for i, idx in enumerate(copied.index)}
101
+ copied = copied.reset_index(drop=True)
93
102
 
94
- tree = STRtree(gdf.geometry.values)
95
- left, right = tree.query(gdf.geometry.values, predicate="intersects")
103
+ tree = STRtree(copied.geometry.values)
104
+ left, right = tree.query(copied.geometry.values, predicate=predicate)
96
105
  indices = pd.Series(right, index=left).loc[lambda x: x.index > x.values]
97
106
 
98
107
  # select geometries from 'right', index from 'left', dissolve by 'left'
108
+ erasers = pd.Series(copied.geometry.loc[indices.values].values, index=indices.index)
99
109
  if n_jobs > 1:
100
110
  erasers = parallel_unary_union_geoseries(
101
- pd.Series(gdf.geometry.loc[indices.values].values, index=indices.index),
111
+ erasers,
102
112
  level=0,
103
113
  n_jobs=n_jobs,
104
114
  grid_size=grid_size,
105
- # index=indices.index.unique(),
106
115
  )
107
116
  erasers = pd.Series(erasers, index=indices.index.unique())
108
117
  else:
109
- erasers = (
110
- pd.Series(gdf.geometry.loc[indices.values].values, index=indices.index)
118
+ only_one = erasers.groupby(level=0).transform("size") == 1
119
+ one_hit = erasers[only_one]
120
+ many_hits = (
121
+ erasers[~only_one]
111
122
  .groupby(level=0)
112
123
  .agg(lambda x: make_valid(unary_union(x, grid_size=grid_size)))
113
124
  )
125
+ erasers = pd.concat([one_hit, many_hits]).sort_index()
114
126
 
115
127
  # match up the aggregated erasers by index
116
128
  if n_jobs > 1:
117
- arr1 = gdf.geometry.loc[erasers.index].to_numpy()
129
+ arr1 = copied.geometry.loc[erasers.index].to_numpy()
118
130
  arr2 = erasers.to_numpy()
119
131
  try:
120
132
  erased = _run_overlay_dask(
@@ -134,28 +146,39 @@ def update_geometries(
134
146
  else:
135
147
  erased = make_valid(
136
148
  difference(
137
- gdf.geometry.loc[erasers.index],
149
+ copied.geometry.loc[erasers.index],
138
150
  erasers,
139
151
  grid_size=grid_size,
140
152
  )
141
153
  )
142
154
 
143
- gdf.loc[erased.index, geom_col] = erased
155
+ copied.loc[erased.index, geom_col] = erased
156
+
157
+ copied = copied.loc[~copied.is_empty]
144
158
 
145
- gdf = gdf.loc[~gdf.is_empty]
159
+ copied.index = copied.index.map(index_mapper)
146
160
 
147
- gdf.index = gdf.index.map(index_mapper)
161
+ # TODO check why polygons dissappear in rare cases. For now, just add back the missing
162
+ dissapeared = sfilter_inverse(gdf, copied.buffer(-PRECISION))
163
+ copied = pd.concat([copied, dissapeared])
164
+
165
+ # TODO fix dupliates again with dissolve?
166
+ # dups = get_intersections(copied, geom_type="polygon")
167
+ # dups["_cluster"] = get_cluster_mapper(dups.geometry.values)
168
+ # no_dups = dissexp(dups, by="_cluster").drop(columns="_cluster")
169
+ # copied = clean_overlay(copied, no_dups, how="update", geom_type="polygon")
148
170
 
149
171
  if keep_geom_type:
150
- gdf = to_single_geom_type(gdf, geom_type)
172
+ copied = to_single_geom_type(copied, geom_type)
151
173
 
152
- return gdf
174
+ return copied
153
175
 
154
176
 
155
177
  def get_intersections(
156
178
  gdf: GeoDataFrame,
157
179
  geom_type: str | None = None,
158
180
  keep_geom_type: bool | None = None,
181
+ predicate: str | None = "intersects",
159
182
  n_jobs: int = 1,
160
183
  ) -> GeoDataFrame:
161
184
  """Find geometries that intersect in a GeoDataFrame.
@@ -248,6 +271,7 @@ def get_intersections(
248
271
  geom_type,
249
272
  keep_geom_type,
250
273
  n_jobs=n_jobs,
274
+ predicate=predicate,
251
275
  ).pipe(clean_geoms)
252
276
 
253
277
  duplicated_geoms.index = duplicated_geoms["orig_idx"].values
@@ -260,7 +284,7 @@ def get_intersections(
260
284
 
261
285
 
262
286
  def _get_intersecting_geometries(
263
- gdf: GeoDataFrame, geom_type, keep_geom_type, n_jobs
287
+ gdf: GeoDataFrame, geom_type, keep_geom_type, n_jobs, predicate
264
288
  ) -> GeoDataFrame:
265
289
  right = gdf[[gdf._geometry_column_name]]
266
290
  right["idx_right"] = right.index
@@ -280,6 +304,7 @@ def _get_intersecting_geometries(
280
304
  left,
281
305
  right,
282
306
  how="intersection",
307
+ predicate=predicate,
283
308
  geom_type=geom_type,
284
309
  keep_geom_type=keep_geom_type,
285
310
  n_jobs=n_jobs,
@@ -296,7 +321,12 @@ def _get_intersecting_geometries(
296
321
  continue
297
322
  intersected += [
298
323
  clean_overlay(
299
- left, right, how="intersection", geom_type=geom_type, n_jobs=n_jobs
324
+ left,
325
+ right,
326
+ how="intersection",
327
+ predicate=predicate,
328
+ geom_type=geom_type,
329
+ n_jobs=n_jobs,
300
330
  )
301
331
  ]
302
332
  intersected = pd.concat(intersected, ignore_index=True).loc[are_not_identical]
@@ -299,6 +299,16 @@ def sort_large_first(gdf: GeoDataFrame | GeoSeries) -> GeoDataFrame | GeoSeries:
299
299
  return gdf.iloc[list(sorted_areas)]
300
300
 
301
301
 
302
+ def sort_df(
303
+ df: pd.DataFrame | GeoDataFrame, sort_col: pd.Series
304
+ ) -> pd.DataFrame | GeoDataFrame:
305
+ value_mapper: dict[int, Any] = dict(enumerate(sort_col.values))
306
+ sorted_indices = dict(
307
+ reversed(sorted(value_mapper.items(), key=lambda item: item[1]))
308
+ )
309
+ return df.iloc[list(sorted_indices)]
310
+
311
+
302
312
  def sort_long_first(gdf: GeoDataFrame | GeoSeries) -> GeoDataFrame | GeoSeries:
303
313
  """Sort GeoDataFrame by length in decending order.
304
314
 
@@ -736,6 +746,31 @@ def parallel_unary_union(
736
746
  def parallel_unary_union_geoseries(
737
747
  ser: GeoSeries, n_jobs: int = 1, grid_size=None, **kwargs
738
748
  ) -> list[Geometry]:
749
+
750
+ is_one_hit = ser.groupby(**kwargs).transform("size") == 1
751
+
752
+ one_hit = ser.loc[is_one_hit]
753
+ many_hits = ser.loc[~is_one_hit]
754
+
755
+ with joblib.Parallel(n_jobs=n_jobs, backend="threading") as parallel:
756
+ delayed_operations = []
757
+ for _, geoms in many_hits.groupby(**kwargs):
758
+ delayed_operations.append(
759
+ joblib.delayed(merge_geometries)(geoms, grid_size=grid_size)
760
+ )
761
+
762
+ dissolved = pd.Series(
763
+ parallel(delayed_operations),
764
+ index=is_one_hit[lambda x: x == False].index.unique(),
765
+ )
766
+
767
+ return pd.concat([dissolved, one_hit]).sort_index().values
768
+
769
+
770
+ def parallel_unary_union_geoseries(
771
+ ser: GeoSeries, n_jobs: int = 1, grid_size=None, **kwargs
772
+ ) -> list[Geometry]:
773
+
739
774
  with joblib.Parallel(n_jobs=n_jobs, backend="threading") as parallel:
740
775
  delayed_operations = []
741
776
  for _, geoms in ser.groupby(**kwargs):
@@ -7,9 +7,12 @@ GeoDataFrames.
7
7
  The results of all functions will be identical with GeoDataFrame and GeoSeries as input
8
8
  types.
9
9
  """
10
+
10
11
  import numpy as np
12
+ import shapely
11
13
  from geopandas import GeoDataFrame, GeoSeries
12
- from pandas import DataFrame, Series
14
+ from pandas import DataFrame, Series, concat
15
+ from shapely import STRtree
13
16
  from sklearn.neighbors import NearestNeighbors
14
17
 
15
18
  from .conversion import coordinate_array
@@ -237,6 +240,33 @@ def get_all_distances(
237
240
  )
238
241
 
239
242
 
243
+ def sjoin_within_distance(
244
+ gdf: GeoDataFrame | GeoSeries,
245
+ neighbors: GeoDataFrame | GeoSeries,
246
+ distance: int | float,
247
+ distance_col: str = "distance",
248
+ **kwargs,
249
+ ) -> GeoDataFrame:
250
+ """Sjoin with a buffer on the right GeoDataFrame and adds a distance column."""
251
+
252
+ new_neighbor_cols = {"__left_range_idx": range(len(neighbors))}
253
+ if distance:
254
+ new_neighbor_cols[neighbors._geometry_column_name] = lambda x: x.buffer(
255
+ distance
256
+ )
257
+
258
+ # using assign to get a copy
259
+ neighbors = neighbors.assign(**new_neighbor_cols)
260
+
261
+ out = gdf.sjoin(neighbors, **kwargs)
262
+
263
+ out[distance_col] = shapely.distance(
264
+ out.geometry.values, neighbors.geometry.iloc[out["__left_range_idx"]].values
265
+ )
266
+
267
+ return out.drop(columns="__left_range_idx")
268
+
269
+
240
270
  def get_k_nearest_neighbors(
241
271
  gdf: GeoDataFrame | GeoSeries,
242
272
  neighbors: GeoDataFrame | GeoSeries,