ssb-sgis 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. sgis/__init__.py +97 -115
  2. sgis/exceptions.py +3 -1
  3. sgis/geopandas_tools/__init__.py +1 -0
  4. sgis/geopandas_tools/bounds.py +75 -38
  5. sgis/geopandas_tools/buffer_dissolve_explode.py +38 -34
  6. sgis/geopandas_tools/centerlines.py +53 -44
  7. sgis/geopandas_tools/cleaning.py +87 -104
  8. sgis/geopandas_tools/conversion.py +149 -101
  9. sgis/geopandas_tools/duplicates.py +31 -17
  10. sgis/geopandas_tools/general.py +76 -48
  11. sgis/geopandas_tools/geometry_types.py +21 -7
  12. sgis/geopandas_tools/neighbors.py +20 -8
  13. sgis/geopandas_tools/overlay.py +136 -53
  14. sgis/geopandas_tools/point_operations.py +9 -8
  15. sgis/geopandas_tools/polygon_operations.py +48 -56
  16. sgis/geopandas_tools/polygons_as_rings.py +121 -78
  17. sgis/geopandas_tools/sfilter.py +14 -14
  18. sgis/helpers.py +114 -56
  19. sgis/io/dapla_functions.py +32 -23
  20. sgis/io/opener.py +13 -6
  21. sgis/io/read_parquet.py +1 -1
  22. sgis/maps/examine.py +39 -26
  23. sgis/maps/explore.py +112 -66
  24. sgis/maps/httpserver.py +12 -12
  25. sgis/maps/legend.py +124 -65
  26. sgis/maps/map.py +66 -41
  27. sgis/maps/maps.py +31 -29
  28. sgis/maps/thematicmap.py +46 -33
  29. sgis/maps/tilesources.py +3 -8
  30. sgis/networkanalysis/_get_route.py +5 -4
  31. sgis/networkanalysis/_od_cost_matrix.py +44 -1
  32. sgis/networkanalysis/_points.py +10 -4
  33. sgis/networkanalysis/_service_area.py +5 -2
  34. sgis/networkanalysis/closing_network_holes.py +20 -62
  35. sgis/networkanalysis/cutting_lines.py +55 -43
  36. sgis/networkanalysis/directednetwork.py +15 -7
  37. sgis/networkanalysis/finding_isolated_networks.py +4 -3
  38. sgis/networkanalysis/network.py +15 -13
  39. sgis/networkanalysis/networkanalysis.py +72 -54
  40. sgis/networkanalysis/networkanalysisrules.py +20 -16
  41. sgis/networkanalysis/nodes.py +2 -3
  42. sgis/networkanalysis/traveling_salesman.py +5 -2
  43. sgis/parallel/parallel.py +337 -127
  44. sgis/raster/__init__.py +6 -0
  45. sgis/raster/base.py +9 -3
  46. sgis/raster/cube.py +280 -208
  47. sgis/raster/cubebase.py +15 -29
  48. sgis/raster/indices.py +3 -7
  49. sgis/raster/methods_as_functions.py +0 -124
  50. sgis/raster/raster.py +313 -127
  51. sgis/raster/torchgeo.py +58 -37
  52. sgis/raster/zonal.py +38 -13
  53. {ssb_sgis-1.0.0.dist-info → ssb_sgis-1.0.2.dist-info}/LICENSE +1 -1
  54. {ssb_sgis-1.0.0.dist-info → ssb_sgis-1.0.2.dist-info}/METADATA +89 -18
  55. ssb_sgis-1.0.2.dist-info/RECORD +61 -0
  56. {ssb_sgis-1.0.0.dist-info → ssb_sgis-1.0.2.dist-info}/WHEEL +1 -1
  57. sgis/raster/bands.py +0 -48
  58. sgis/raster/gradient.py +0 -78
  59. ssb_sgis-1.0.0.dist-info/RECORD +0 -63
sgis/raster/cube.py CHANGED
@@ -1,11 +1,15 @@
1
1
  import functools
2
2
  import itertools
3
3
  import multiprocessing
4
- import os
5
4
  import re
6
- from copy import copy, deepcopy
5
+ from collections.abc import Callable
6
+ from collections.abc import Iterable
7
+ from collections.abc import Iterator
8
+ from copy import copy
9
+ from copy import deepcopy
7
10
  from pathlib import Path
8
- from typing import Any, Callable, Iterable, Optional, Sequence
11
+ from typing import Any
12
+ from typing import ClassVar
9
13
 
10
14
  import geopandas as gpd
11
15
  import numpy as np
@@ -13,42 +17,41 @@ import pandas as pd
13
17
  import pyproj
14
18
  import rasterio
15
19
  import shapely
16
- from geopandas import GeoDataFrame, GeoSeries
17
- from pandas import DataFrame, Series
18
- from pandas.api.types import is_dict_like, is_list_like
20
+ from geopandas import GeoDataFrame
21
+ from geopandas import GeoSeries
22
+ from pandas import DataFrame
23
+ from pandas import Series
24
+ from pandas.api.types import is_dict_like
25
+ from pandas.api.types import is_list_like
19
26
  from rasterio import merge as rasterio_merge
20
27
 
21
-
22
28
  try:
23
29
  import xarray as xr
24
- from rioxarray.merge import merge_arrays
25
30
  from xarray import Dataset
26
31
  except ImportError:
27
32
 
28
33
  class Dataset:
29
- pass
34
+ """Placeholder."""
30
35
 
31
36
 
32
- from rtree.index import Index, Property
37
+ from rtree.index import Index
38
+ from rtree.index import Property
33
39
  from shapely import Geometry
34
40
  from typing_extensions import Self # TODO: imperter fra typing når python 3.11
35
41
 
36
- from ..geopandas_tools.bounds import get_total_bounds, make_grid
37
- from ..geopandas_tools.conversion import (
38
- crs_to_string,
39
- is_bbox_like,
40
- to_bbox,
41
- to_shapely,
42
- )
42
+ from ..geopandas_tools.bounds import make_grid
43
+ from ..geopandas_tools.conversion import is_bbox_like
44
+ from ..geopandas_tools.conversion import to_bbox
45
+ from ..geopandas_tools.conversion import to_shapely
43
46
  from ..geopandas_tools.general import get_common_crs
44
47
  from ..geopandas_tools.overlay import clean_overlay
45
- from ..helpers import dict_zip_intersection, get_all_files, get_numpy_func
48
+ from ..helpers import get_all_files
49
+ from ..helpers import get_numpy_func
46
50
  from ..io._is_dapla import is_dapla
47
51
  from ..io.opener import opener
48
52
  from ..parallel.parallel import Parallel
49
53
  from .raster import Raster
50
54
 
51
-
52
55
  try:
53
56
  from torchgeo.datasets.geo import RasterDataset
54
57
  from torchgeo.datasets.utils import BoundingBox
@@ -57,13 +60,15 @@ except ImportError:
57
60
  class BoundingBox:
58
61
  """Placeholder."""
59
62
 
60
- def __init__(self, *args, **kwargs):
63
+ def __init__(self, *args, **kwargs) -> None:
64
+ """Placeholder."""
61
65
  raise ImportError("missing optional dependency 'torchgeo'")
62
66
 
63
67
  class RasterDataset:
64
68
  """Placeholder."""
65
69
 
66
- def __init__(self, *args, **kwargs):
70
+ def __init__(self, *args, **kwargs) -> None:
71
+ """Placeholder."""
67
72
  raise ImportError("missing optional dependency 'torchgeo'")
68
73
 
69
74
 
@@ -76,7 +81,7 @@ except ImportError:
76
81
  """Placeholder."""
77
82
 
78
83
  class Tensor:
79
- pass
84
+ """Placeholder to reference torch.Tensor."""
80
85
 
81
86
 
82
87
  try:
@@ -85,37 +90,36 @@ except ImportError:
85
90
  pass
86
91
 
87
92
  try:
88
- from dapla import FileClient, write_pandas
93
+ from dapla import FileClient
94
+ from dapla import write_pandas
89
95
  except ImportError:
90
96
  pass
91
97
 
92
- from .bands import Sentinel2
93
- from .base import ALLOWED_KEYS, NESSECARY_META, get_index_mapper
94
- from .cubebase import _from_gdf_func, _method_as_func, _raster_from_path, _write_func
95
- from .indices import get_raster_pairs, index_calc_pair
96
- from .zonal import make_geometry_iterrows, prepare_zonal, zonal_func, zonal_post
98
+ from .base import ALLOWED_KEYS
99
+ from .base import NESSECARY_META
100
+ from .base import get_index_mapper
101
+ from .cubebase import _from_gdf_func
102
+ from .cubebase import _method_as_func
103
+ from .cubebase import _raster_from_path
104
+ from .cubebase import _write_func
105
+ from .indices import get_raster_pairs
106
+ from .indices import index_calc_pair
107
+ from .zonal import _make_geometry_iterrows
108
+ from .zonal import _prepare_zonal
109
+ from .zonal import _zonal_func
110
+ from .zonal import _zonal_post
97
111
 
112
+ TORCHGEO_RETURN_TYPE = dict[str, torch.Tensor | pyproj.CRS | BoundingBox]
98
113
 
99
- class DataCube:
100
- """Experimental.
101
-
102
- Examples
103
- --------
104
114
 
105
- >>> cube = sg.DataCube.from_root(...)
106
- >>> clipped = cube.clip(mask).merge(by="date")
107
- >>>
108
- """
109
-
110
- CUBE_DF_NAME = "cube_df.parquet"
115
+ class DataCube:
116
+ """Experimental."""
111
117
 
112
- CANON_RASTER_TYPES = {
113
- "Raster": Raster,
114
- "Sentinel2": Sentinel2,
115
- }
118
+ CUBE_DF_NAME: ClassVar[str] = "cube_df.parquet"
116
119
 
117
- separate_files = True
118
- is_image = True
120
+ separate_files: ClassVar[bool] = True
121
+ is_image: ClassVar[bool] = True
122
+ date_format: ClassVar[str | None] = None
119
123
 
120
124
  def __init__(
121
125
  self,
@@ -124,8 +128,18 @@ class DataCube:
124
128
  res: int | None = None,
125
129
  nodata: int | None = None,
126
130
  copy: bool = False,
127
- parallelizer: Optional[Parallel] = None,
131
+ parallelizer: Parallel | None = None,
128
132
  ) -> None:
133
+ """Initialize a DataCube instance with optional Raster data.
134
+
135
+ Args:
136
+ data: Iterable of Raster objects or a single DataCube to copy data from.
137
+ crs: Coordinate reference system to be applied to the images.
138
+ res: Spatial resolution of the images, applied uniformly to all Rasters.
139
+ nodata: Nodata value to unify across all Rasters within the cube.
140
+ copy: If True, makes deep copies of Rasters provided.
141
+ parallelizer: sgis.Parallel instance to handle concurrent operations.
142
+ """
129
143
  self._arrays = None
130
144
  self._res = res
131
145
  self.parallelizer = parallelizer
@@ -190,19 +204,36 @@ class DataCube:
190
204
  root: str | Path,
191
205
  *,
192
206
  res: int | None = None,
193
- raster_type: Raster = Raster,
194
207
  check_for_df: bool = True,
195
208
  contains: str | None = None,
196
209
  endswith: str = ".tif",
197
- regex: str | None = None,
198
- parallelizer: Optional[Parallel] = None,
210
+ filename_regex: str | None = None,
211
+ parallelizer: Parallel | None = None,
199
212
  file_system=None,
200
213
  **kwargs,
201
- ):
202
- kwargs = {
203
- "raster_type": raster_type,
204
- "res": res,
205
- } | kwargs
214
+ ) -> "DataCube":
215
+ """Construct a DataCube by searching for files starting from a root directory.
216
+
217
+ Args:
218
+ root: Root directory path to search for raster image files.
219
+ res: Resolution to unify the data within the cube.
220
+ check_for_df: Check for a parquet file in the root directory
221
+ that holds metadata for the files in the directory.
222
+ contains: Filter files containing specific substrings.
223
+ endswith: Filter files that end with specific substrings.
224
+ filename_regex: Regular expression to match file names
225
+ and attributes (date, band, tile, resolution).
226
+ parallelizer: sgis.Parallel instance for concurrent file processing.
227
+ file_system: File system to use for file operations, used in GCS environment.
228
+ **kwargs: Additional keyword arguments to pass to 'from_path' method.
229
+
230
+ Returns:
231
+ An instance of DataCube containing the raster data from specified paths.
232
+ """
233
+ kwargs["res"] = res
234
+ kwargs["filename_regex"] = filename_regex
235
+ kwargs["contains"] = contains
236
+ kwargs["endswith"] = endswith
206
237
 
207
238
  if is_dapla():
208
239
  if file_system is None:
@@ -217,35 +248,16 @@ class DataCube:
217
248
 
218
249
  dfs = [path for path in paths if path.endswith(cls.CUBE_DF_NAME)]
219
250
 
220
- if contains:
221
- paths = [path for path in paths if contains in path]
222
- if endswith:
223
- paths = [path for path in paths if path.endswith(endswith)]
224
- if regex:
225
- regex = re.compile(regex)
226
- paths = [path for path in paths if re.search(regex, path)]
227
- if raster_type.filename_regex is not None:
228
- # regex = raster_type.filename_regex
229
- # paths = [path for path in paths if re.search(regex, Path(path).name)]
230
- regex = re.compile(raster_type.filename_regex, re.VERBOSE)
231
- paths = [
232
- path
233
- for path in paths
234
- if re.match(regex, os.path.basename(path))
235
- or re.search(regex, os.path.basename(path))
236
- ]
237
-
238
251
  if not check_for_df or not len(dfs):
239
252
  return cls.from_paths(
240
253
  paths,
241
- # indexes=indexes,
242
254
  parallelizer=parallelizer,
243
255
  **kwargs,
244
256
  )
245
257
 
246
258
  folders_with_df: set[Path] = {Path(path).parent for path in dfs if path}
247
259
 
248
- cubes: list[DataCube] = [cls.from_cube_df(df, **kwargs) for df in dfs]
260
+ cubes: list[DataCube] = [cls.from_cube_df(df, res=res) for df in dfs]
249
261
 
250
262
  paths_in_folders_without_df = [
251
263
  path for path in paths if Path(path).parent not in folders_with_df
@@ -268,20 +280,46 @@ class DataCube:
268
280
  paths: Iterable[str | Path],
269
281
  *,
270
282
  res: int | None = None,
271
- raster_type: Raster = Raster,
272
- parallelizer: Optional[Parallel] = None,
283
+ parallelizer: Parallel | None = None,
273
284
  file_system=None,
285
+ contains: str | None = None,
286
+ endswith: str = ".tif",
287
+ filename_regex: str | None = None,
274
288
  **kwargs,
275
- ):
289
+ ) -> "DataCube":
290
+ """Create a DataCube from a list of file paths.
291
+
292
+ Args:
293
+ paths: Iterable of file paths to raster files.
294
+ res: Resolution to unify the data within the cube.
295
+ parallelizer: Joblib Parallel instance for concurrent file processing.
296
+ file_system: File system to use for file operations, used in Dapla environment.
297
+ contains: Filter files containing specific substrings.
298
+ endswith: Filter files that end with specific substrings.
299
+ filename_regex: Regular expression to match file names.
300
+ **kwargs: Additional keyword arguments to pass to the raster loading function.
301
+
302
+ Returns:
303
+ An instance of DataCube containing the raster data from specified paths.
304
+ """
276
305
  crs = kwargs.pop("crs", None)
277
306
 
307
+ if contains:
308
+ paths = [path for path in paths if contains in path]
309
+ if endswith:
310
+ paths = [path for path in paths if path.endswith(endswith)]
311
+ if filename_regex:
312
+ compiled = re.compile(filename_regex, re.VERBOSE)
313
+ paths = [path for path in paths if re.search(compiled, Path(path).name)]
314
+
278
315
  if not paths:
279
316
  return cls(crs=crs, parallelizer=parallelizer, res=res)
280
317
 
281
- kwargs = dict(raster_type=raster_type, res=res) | kwargs
318
+ kwargs["res"] = res
319
+ kwargs["filename_regex"] = filename_regex
282
320
 
283
321
  if file_system is None and is_dapla():
284
- kwargs |= {"file_system": FileClient.get_gcs_file_system()}
322
+ kwargs["file_system"] = FileClient.get_gcs_file_system()
285
323
 
286
324
  if parallelizer is None:
287
325
  rasters: list[Raster] = [
@@ -302,17 +340,24 @@ class DataCube:
302
340
  gdf: GeoDataFrame | Iterable[GeoDataFrame],
303
341
  columns: str | Iterable[str],
304
342
  res: int,
305
- parallelizer: Optional[Parallel] = None,
343
+ parallelizer: Parallel | None = None,
306
344
  tile_size: int | None = None,
307
345
  grid: GeoSeries | None = None,
308
- raster_type: Raster = Raster,
309
346
  **kwargs,
310
- ):
311
- """
347
+ ) -> "DataCube":
348
+ """Create a DataCube from a GeoDataFrame or a set of them, tiling the spatial data as specified.
312
349
 
313
350
  Args:
314
- grid: A grid.
315
- **kwargs: Keyword arguments passed to Raster.from_gdf.
351
+ gdf: GeoDataFrame or an iterable of GeoDataFrames to rasterize.
352
+ columns: The column(s) in the GeoDataFrame that will be used as values for the rasterization.
353
+ res: Spatial resolution of the output rasters.
354
+ parallelizer: Joblib Parallel instance for concurrent processing.
355
+ tile_size: Size of each tile/grid cell in the output raster.
356
+ grid: Predefined grid to align the rasterization.
357
+ **kwargs: Additional keyword arguments passed to Raster.from_gdf.
358
+
359
+ Returns:
360
+ An instance of DataCube containing rasterized data from the GeoDataFrame(s).
316
361
  """
317
362
  if grid is None and tile_size is None:
318
363
  raise ValueError("Must specify either 'tile_size' or 'grid'.")
@@ -335,7 +380,6 @@ class DataCube:
335
380
  _from_gdf_func,
336
381
  columns=columns,
337
382
  res=res,
338
- raster_type=raster_type,
339
383
  **kwargs,
340
384
  )
341
385
 
@@ -344,16 +388,16 @@ class DataCube:
344
388
 
345
389
  rasters = []
346
390
 
347
- if processes > 1:
391
+ if parallelizer.processes > 1:
348
392
  rasters = parallelizer.map(
349
393
  clean_overlay, gdf, args=(grid,), kwargs=dict(keep_geom_type=True)
350
394
  )
351
- with multiprocessing.get_context("spawn").Pool(processes) as p:
395
+ with multiprocessing.get_context("spawn").Pool(parallelizer.processes) as p:
352
396
  for frame in gdf:
353
397
  frame = frame.overlay(grid, keep_geom_type=True)
354
398
  gdfs = to_gdf_list(frame)
355
399
  rasters += p.map(partial_func, gdfs)
356
- elif processes < 1:
400
+ elif parallelizer.processes < 1:
357
401
  raise ValueError("processes must be an integer 1 or greater.")
358
402
  else:
359
403
  for frame in gdf:
@@ -364,7 +408,18 @@ class DataCube:
364
408
  return cls(rasters, res=res)
365
409
 
366
410
  @classmethod
367
- def from_cube_df(cls, df: DataFrame | str | Path, res: int | None = None):
411
+ def from_cube_df(
412
+ cls, df: DataFrame | str | Path, res: int | None = None
413
+ ) -> "DataCube":
414
+ """Construct a DataCube from a DataFrame or path containing metadata or paths of rasters.
415
+
416
+ Args:
417
+ df: DataFrame, path to a DataFrame, or string path pointing to cube data.
418
+ res: Optional resolution to standardize all rasters to this resolution.
419
+
420
+ Returns:
421
+ A DataCube instance containing the raster data described by the DataFrame.
422
+ """
368
423
  if isinstance(df, (str, Path)):
369
424
  df = read_geopandas(df) if is_dapla() else gpd.read_parquet(df)
370
425
 
@@ -381,28 +436,15 @@ class DataCube:
381
436
  elif not isinstance(df, DataFrame):
382
437
  raise TypeError("df must be DataFrame or file path to a parquet file.")
383
438
 
384
- try:
385
- raster_types = [cls.CANON_RASTER_TYPES[x] for x in df["type"]]
386
- except KeyError:
387
- for x in df["type"]:
388
- try:
389
- cls.CANON_RASTER_TYPES[x]
390
- except KeyError:
391
- raise ValueError(
392
- f"Cannot convert raster type '{x}' to a Raster instance."
393
- )
394
-
395
439
  rasters: list[Raster] = [
396
- raster_type.from_dict(meta)
397
- for raster_type, (_, meta) in zip(
398
- raster_types, df[NESSECARY_META].iterrows()
399
- )
440
+ Raster.from_dict(meta) for _, meta in (df[NESSECARY_META].iterrows())
400
441
  ]
401
442
  return cls(rasters)
402
443
 
403
444
  def to_gdf(
404
445
  self, column: str | None = None, ignore_index: bool = False, concat: bool = True
405
446
  ) -> GeoDataFrame:
447
+ """Convert DataCube to GeoDataFrame."""
406
448
  gdfs = self.run_raster_method("to_gdf", column=column, return_self=False)
407
449
 
408
450
  if concat:
@@ -410,6 +452,7 @@ class DataCube:
410
452
  return gdfs
411
453
 
412
454
  def to_xarray(self) -> Dataset:
455
+ """Convert DataCube to an xarray.Dataset."""
413
456
  return xr.Dataset({i: r.to_xarray() for i, r in enumerate(self.data)})
414
457
 
415
458
  def zonal(
@@ -420,16 +463,17 @@ class DataCube:
420
463
  by_date: bool | None = None,
421
464
  dropna: bool = True,
422
465
  ) -> GeoDataFrame:
466
+ """Calculate zonal statistics within polygons."""
423
467
  idx_mapper, idx_name = get_index_mapper(polygons)
424
- polygons, aggfunc, func_names = prepare_zonal(polygons, aggfunc)
425
- poly_iter = make_geometry_iterrows(polygons)
468
+ polygons, aggfunc, func_names = _prepare_zonal(polygons, aggfunc)
469
+ poly_iter = _make_geometry_iterrows(polygons)
426
470
 
427
471
  if by_date is None:
428
472
  by_date: bool = all(r.date is not None for r in self)
429
473
 
430
474
  if not self.parallelizer:
431
475
  aggregated: list[DataFrame] = [
432
- zonal_func(
476
+ _zonal_func(
433
477
  poly,
434
478
  cube=self,
435
479
  array_func=array_func,
@@ -441,7 +485,7 @@ class DataCube:
441
485
  ]
442
486
  else:
443
487
  aggregated: list[DataFrame] = self.parallelizer.map(
444
- zonal_func,
488
+ _zonal_func,
445
489
  poly_iter,
446
490
  kwargs=dict(
447
491
  cube=self,
@@ -452,7 +496,7 @@ class DataCube:
452
496
  ),
453
497
  )
454
498
 
455
- return zonal_post(
499
+ return _zonal_post(
456
500
  aggregated,
457
501
  polygons=polygons,
458
502
  idx_mapper=idx_mapper,
@@ -461,6 +505,7 @@ class DataCube:
461
505
  )
462
506
 
463
507
  def gradient(self, degrees: bool = False) -> Self:
508
+ """Get gradients in each image."""
464
509
  self.data = self.run_raster_method("gradient", degrees=degrees)
465
510
  return self
466
511
 
@@ -500,6 +545,7 @@ class DataCube:
500
545
  return self
501
546
 
502
547
  def load(self, copy: bool = True, **kwargs) -> Self:
548
+ """Load all images as arrays into a DataCube copy."""
503
549
  if self.crs is None:
504
550
  self._crs = get_common_crs(self.data)
505
551
 
@@ -509,13 +555,17 @@ class DataCube:
509
555
 
510
556
  return cube
511
557
 
512
- def intersects(self, other, copy: bool = True) -> Self:
558
+ def intersection(self, other: Any, copy: bool = True) -> Self:
559
+ """Select the images that intersect 'other'."""
513
560
  other = to_shapely(other)
514
561
  cube = self.copy() if copy else self
515
562
  cube = cube[cube.boxes.intersects(other)]
516
563
  return cube
517
564
 
518
- def sfilter(self, other, copy: bool = True) -> Self:
565
+ def sfilter(
566
+ self, other: GeoDataFrame | GeoSeries | Geometry | tuple, copy: bool = True
567
+ ) -> Self:
568
+ """Spatially filter images by bounding box or geometry object."""
519
569
  other = to_shapely(other)
520
570
  cube = self.copy() if copy else self
521
571
  cube.data = [raster for raster in self if raster.unary_union.intersects(other)]
@@ -524,6 +574,7 @@ class DataCube:
524
574
  def clip(
525
575
  self, mask: GeoDataFrame | GeoSeries | Geometry, copy: bool = True, **kwargs
526
576
  ) -> Self:
577
+ """Clip the images by bounding box or geometry object."""
527
578
  if self.crs is None:
528
579
  self._crs = get_common_crs(self.data)
529
580
 
@@ -541,16 +592,24 @@ class DataCube:
541
592
  cube.data = cube.run_raster_method("clip", mask=mask, **kwargs)
542
593
  return cube
543
594
 
544
- def clipmerge(self, mask, **kwargs) -> Self:
545
- return clipmerge(self, mask, **kwargs)
595
+ def clipmerge(self, mask: GeoDataFrame | GeoSeries | Geometry, **kwargs) -> Self:
596
+ """Clip the images and merge to one image."""
597
+ return _clipmerge(self, mask, **kwargs)
546
598
 
547
599
  def merge_by_bounds(self, by: str | list[str] | None = None, **kwargs) -> Self:
548
- return merge_by_bounds(self, by=by, **kwargs)
600
+ """Merge images with the same bounding box."""
601
+ return _merge_by_bounds(self, by=by, **kwargs)
549
602
 
550
603
  def merge(self, by: str | list[str] | None = None, **kwargs) -> Self:
551
- return merge(self, by=by, **kwargs)
604
+ """Merge all images to one."""
605
+ return _merge(self, by=by, **kwargs)
552
606
 
553
607
  def explode(self) -> Self:
608
+ """Convert from 3D to 2D arrays.
609
+
610
+ Make multi-banded arrays (3d) into multiple single-banded arrays (2d).
611
+ """
612
+
554
613
  def explode_one_raster(raster: Raster) -> list[Raster]:
555
614
  property_values = {key: getattr(raster, key) for key in raster.properties}
556
615
 
@@ -581,7 +640,8 @@ class DataCube:
581
640
  )
582
641
  return self
583
642
 
584
- def dissolve_bands(self, aggfunc, copy: bool = True) -> Self:
643
+ def dissolve_bands(self, aggfunc: Callable | str, copy: bool = True) -> Self:
644
+ """Aggregate values in 3D arrays to a single value in a 2D array."""
585
645
  self._check_for_array()
586
646
  if not callable(aggfunc) and not isinstance(aggfunc, str):
587
647
  raise TypeError("Can only supply a single aggfunc")
@@ -605,6 +665,9 @@ class DataCube:
605
665
  its array values have been recalculated.
606
666
 
607
667
  Args:
668
+ root: Directory path where the images will be written to.
669
+ file_format: File extension.
670
+ **kwargs: Keyword arguments passed to rasterio.open.
608
671
 
609
672
  """
610
673
  self._check_for_array()
@@ -617,11 +680,17 @@ class DataCube:
617
680
  ]
618
681
 
619
682
  if self.parallelizer:
620
- self.parallelizer.starmap(_write_func, zip(self, paths), kwargs=kwargs)
683
+ self.parallelizer.starmap(
684
+ _write_func, zip(self, paths, strict=False), kwargs=kwargs
685
+ )
621
686
  else:
622
- [_write_func(raster, path, **kwargs) for raster, path in zip(self, paths)]
687
+ [
688
+ _write_func(raster, path, **kwargs)
689
+ for raster, path in zip(self, paths, strict=False)
690
+ ]
623
691
 
624
692
  def write_df(self, folder: str) -> None:
693
+ """Write metadata DataFrame."""
625
694
  df = pd.DataFrame(self.meta)
626
695
 
627
696
  folder = Path(folder)
@@ -638,9 +707,10 @@ class DataCube:
638
707
  index_func: Callable,
639
708
  band_name1: str,
640
709
  band_name2: str,
641
- copy=True,
710
+ copy: bool = True,
642
711
  **kwargs,
643
712
  ) -> Self:
713
+ """Calculate an index based on a function."""
644
714
  cube = self.copy() if copy else self
645
715
 
646
716
  raster_pairs: list[tuple[Raster, Raster]] = get_raster_pairs(
@@ -658,28 +728,34 @@ class DataCube:
658
728
 
659
729
  return cube.__class__(rasters)
660
730
 
661
- def reproject_match(self) -> Self:
662
- pass
731
+ # def reproject_match(self) -> Self:
732
+ # pass
663
733
 
664
- def to_crs(self, crs, copy: bool = True) -> Self:
734
+ def to_crs(self, crs: Any, copy: bool = True) -> Self:
735
+ """Reproject the coordinates of each image."""
665
736
  cube = self.copy() if copy else self
666
737
  cube.data = [r.to_crs(crs) for r in cube]
667
738
  cube._warped_crs = crs
668
739
  return cube
669
740
 
670
- def set_crs(self, crs, allow_override: bool = False, copy: bool = True) -> Self:
741
+ def set_crs(
742
+ self, crs: Any, allow_override: bool = False, copy: bool = True
743
+ ) -> Self:
744
+ """Set the CRS of each image."""
671
745
  cube = self.copy() if copy else self
672
746
  cube.data = [r.set_crs(crs, allow_override=allow_override) for r in cube]
673
747
  cube._warped_crs = crs
674
748
  return cube
675
749
 
676
750
  def min(self) -> Series:
751
+ """Get minimum array values for each image."""
677
752
  return Series(
678
753
  self.run_raster_method("min"),
679
754
  name="min",
680
755
  )
681
756
 
682
757
  def max(self) -> Series:
758
+ """Get maximum array values for each image."""
683
759
  return Series(
684
760
  self.run_raster_method("max"),
685
761
  name="max",
@@ -693,14 +769,14 @@ class DataCube:
693
769
  return Series(data, name=attribute)
694
770
 
695
771
  def run_raster_method(
696
- self, method: str, *args, copy: bool = True, return_self=False, **kwargs
772
+ self, method: str, *args, copy: bool = True, return_self: bool = False, **kwargs
697
773
  ) -> Self:
698
774
  """Run a Raster method for each raster in the cube."""
699
775
  if not all(hasattr(r, method) for r in self):
700
776
  raise AttributeError(f"Raster has no method {method!r}.")
701
777
 
702
778
  method_as_func = functools.partial(
703
- _method_as_func, method=method, *args, **kwargs
779
+ _method_as_func, *args, method=method, **kwargs
704
780
  )
705
781
 
706
782
  cube = self.copy() if copy else self
@@ -709,6 +785,7 @@ class DataCube:
709
785
 
710
786
  @property
711
787
  def meta(self) -> list[dict]:
788
+ """Get metadata property of each raster."""
712
789
  return [raster.meta for raster in self]
713
790
 
714
791
  # @property
@@ -723,6 +800,7 @@ class DataCube:
723
800
 
724
801
  @property
725
802
  def data(self) -> list[Raster]:
803
+ """The Rasters as a list."""
726
804
  return self._data
727
805
 
728
806
  @data.setter
@@ -738,10 +816,10 @@ class DataCube:
738
816
  self._data = list(data)
739
817
 
740
818
  for i, raster in enumerate(self._data):
741
- if raster.date and raster.date_format:
819
+ if raster.date:
742
820
  try:
743
- mint, maxt = disambiguate_timestamp(raster.date, raster.date_format)
744
- except NameError:
821
+ mint, maxt = disambiguate_timestamp(raster.date, self.date_format)
822
+ except (NameError, TypeError):
745
823
  mint, maxt = 0, 1
746
824
  else:
747
825
  mint, maxt = 0, 1
@@ -751,6 +829,7 @@ class DataCube:
751
829
 
752
830
  @property
753
831
  def arrays(self) -> list[np.ndarray]:
832
+ """The arrays of the images as a list."""
754
833
  return [raster.array for raster in self]
755
834
 
756
835
  @arrays.setter
@@ -762,17 +841,14 @@ class DataCube:
762
841
  if not all(isinstance(arr, np.ndarray) for arr in new_arrays):
763
842
  raise ValueError("Must be list of numpy ndarrays")
764
843
 
765
- self.data = [raster.update(array=arr) for raster, arr in zip(self, new_arrays)]
766
-
767
- @property
768
- def raster_type(self) -> Series:
769
- return Series(
770
- [r.__class__ for r in self],
771
- name="raster_type",
772
- )
844
+ self.data = [
845
+ raster.update(array=arr)
846
+ for raster, arr in zip(self, new_arrays, strict=False)
847
+ ]
773
848
 
774
849
  @property
775
850
  def band(self) -> Series:
851
+ """Get the 'band' attribute of the rasters."""
776
852
  return Series(
777
853
  [r.band for r in self],
778
854
  name="band",
@@ -780,6 +856,7 @@ class DataCube:
780
856
 
781
857
  @property
782
858
  def dtype(self) -> Series:
859
+ """Get the 'dtype' attribute of the rasters."""
783
860
  return Series(
784
861
  [r.dtype for r in self],
785
862
  name="dtype",
@@ -787,22 +864,27 @@ class DataCube:
787
864
 
788
865
  @property
789
866
  def nodata(self) -> int | None:
867
+ """No data value."""
790
868
  return self._nodata
791
869
 
792
870
  @property
793
871
  def path(self) -> Series:
872
+ """Get the 'path' attribute of the rasters."""
794
873
  return self.raster_attribute("path")
795
874
 
796
875
  @property
797
876
  def name(self) -> Series:
877
+ """Get the 'name' attribute of the rasters."""
798
878
  return self.raster_attribute("name")
799
879
 
800
880
  @property
801
881
  def date(self) -> Series:
882
+ """Get the 'date' attribute of the rasters."""
802
883
  return self.raster_attribute("date")
803
884
 
804
885
  @property
805
886
  def indexes(self) -> Series:
887
+ """Get the 'indexes' attribute of the rasters."""
806
888
  return self.raster_attribute("indexes")
807
889
 
808
890
  # @property
@@ -811,38 +893,46 @@ class DataCube:
811
893
 
812
894
  @property
813
895
  def area(self) -> Series:
896
+ """Get the 'area' attribute of the rasters."""
814
897
  return self.raster_attribute("area")
815
898
 
816
899
  @property
817
900
  def length(self) -> Series:
901
+ """Get the 'length' attribute of the rasters."""
818
902
  return self.raster_attribute("length")
819
903
 
820
904
  @property
821
905
  def height(self) -> Series:
906
+ """Get the 'height' attribute of the rasters."""
822
907
  return self.raster_attribute("height")
823
908
 
824
909
  @property
825
910
  def width(self) -> Series:
911
+ """Get the 'width' attribute of the rasters."""
826
912
  return self.raster_attribute("width")
827
913
 
828
914
  @property
829
915
  def shape(self) -> Series:
916
+ """Get the 'shape' attribute of the rasters."""
830
917
  return self.raster_attribute("shape")
831
918
 
832
919
  @property
833
920
  def count(self) -> Series:
921
+ """Get the 'count' attribute of the rasters."""
834
922
  return self.raster_attribute("count")
835
923
 
836
924
  @property
837
925
  def res(self) -> int:
926
+ """Spatial resolution of the images."""
838
927
  return self._res
839
928
 
840
929
  @res.setter
841
- def res(self, value):
930
+ def res(self, value) -> None:
842
931
  self._res = value
843
932
 
844
933
  @property
845
934
  def crs(self) -> pyproj.CRS:
935
+ """Coordinate reference system of the images."""
846
936
  crs = self._warped_crs if hasattr(self, "_warped_crs") else self._crs
847
937
  if crs is not None:
848
938
  return crs
@@ -853,10 +943,12 @@ class DataCube:
853
943
 
854
944
  @property
855
945
  def unary_union(self) -> Geometry:
946
+ """Box polygon of the combined bounds of each image."""
856
947
  return shapely.unary_union([shapely.box(*r.bounds) for r in self])
857
948
 
858
949
  @property
859
950
  def centroid(self) -> GeoSeries:
951
+ """Get the 'centroid' attribute of the rasters."""
860
952
  return GeoSeries(
861
953
  [r.centroid for r in self],
862
954
  name="centroid",
@@ -865,11 +957,12 @@ class DataCube:
865
957
 
866
958
  @property
867
959
  def tile(self) -> Series:
960
+ """Get the 'tile' attribute of the rasters."""
868
961
  return self.raster_attribute("tile")
869
962
 
870
963
  @property
871
964
  def boxes(self) -> GeoSeries:
872
- """GeoSeries of each raster's bounds as polygon."""
965
+ """Get the 'bounds' attribute of the rasters."""
873
966
  return GeoSeries(
874
967
  [shapely.box(*r.bounds) if r.bounds is not None else None for r in self],
875
968
  name="boxes",
@@ -878,6 +971,7 @@ class DataCube:
878
971
 
879
972
  @property
880
973
  def total_bounds(self) -> tuple[float, float, float, float]:
974
+ """Combined minimum and maximum longitude and latitude."""
881
975
  return tuple(x for x in self.boxes.total_bounds)
882
976
 
883
977
  @property
@@ -889,7 +983,7 @@ class DataCube:
889
983
  """
890
984
  return BoundingBox(*self.index.bounds)
891
985
 
892
- def copy(self, deep=True) -> Self:
986
+ def copy(self, deep: bool = True) -> Self:
893
987
  """Returns a (deep) copy of the class instance and its rasters.
894
988
 
895
989
  Args:
@@ -899,19 +993,22 @@ class DataCube:
899
993
  copied.data = [raster.copy() for raster in copied]
900
994
  return copied
901
995
 
902
- def _check_for_array(self, text="") -> None:
996
+ def _check_for_array(self, text: str = "") -> None:
903
997
  mess = "Arrays are not loaded. " + text
904
998
  if all(raster.array is None for raster in self):
905
999
  raise ValueError(mess)
906
1000
 
907
1001
  def __getitem__(
908
- self, item: slice | int | Series | Sequence | Callable | Geometry | BoundingBox
909
- ) -> Self | Raster:
910
- """
1002
+ self,
1003
+ item: slice | int | Series | list | tuple | Callable | Geometry | BoundingBox,
1004
+ ) -> Self | Raster | TORCHGEO_RETURN_TYPE:
1005
+ """Select one or more of the Rasters based on indexing or spatial or boolean predicates.
911
1006
 
912
- Examples
913
- --------
914
- >>> cube = sg.DataCube.from_root(testdata, endswith=".tif", crs=25833).load()
1007
+ Examples:
1008
+ ------------
1009
+ >>> import sgis as sg
1010
+ >>> root = 'https://media.githubusercontent.com/media/statisticsnorway/ssb-sgis/main/tests/testdata/raster'
1011
+ >>> cube = sg.DataCube.from_root(root, filename_regex=sg.raster.SENTINEL2_FILENAME_REGEX, crs=25833).load()
915
1012
 
916
1013
  List slicing:
917
1014
 
@@ -937,7 +1034,7 @@ class DataCube:
937
1034
  elif callable(item):
938
1035
  item = item(copy)
939
1036
  elif isinstance(item, BoundingBox):
940
- return cube_to_torch(self, item)
1037
+ return cube_to_torchgeo(self, item)
941
1038
 
942
1039
  elif isinstance(item, (GeoDataFrame, GeoSeries, Geometry)) or is_bbox_like(
943
1040
  item
@@ -956,7 +1053,8 @@ class DataCube:
956
1053
 
957
1054
  return copy
958
1055
 
959
- def __setattr__(self, attr, value):
1056
+ def __setattr__(self, attr: str, value: Any) -> None:
1057
+ """Set an attribute of the cube."""
960
1058
  if (
961
1059
  attr in ["data", "_data"]
962
1060
  or not is_list_like(value)
@@ -970,13 +1068,16 @@ class DataCube:
970
1068
  )
971
1069
  return super().__setattr__(attr, value)
972
1070
 
973
- def __iter__(self):
1071
+ def __iter__(self) -> Iterator[Raster]:
1072
+ """Iterate over the Rasters."""
974
1073
  return iter(self.data)
975
1074
 
976
1075
  def __len__(self) -> int:
1076
+ """Number of Rasters."""
977
1077
  return len(self.data)
978
1078
 
979
1079
  def __repr__(self) -> str:
1080
+ """String representation."""
980
1081
  return f"{self.__class__.__name__}({len(self)})"
981
1082
 
982
1083
  # def __mul__(self, scalar) -> Self:
@@ -998,23 +1099,32 @@ class DataCube:
998
1099
  # return self.map(_pow, scalar=scalar)
999
1100
 
1000
1101
 
1001
- def concat_cubes(cube_list: list[DataCube], res: int | None = None) -> DataCube:
1002
- if not all(isinstance(cube, DataCube) for cube in cube_list):
1003
- raise TypeError
1102
+ def concat_cubes(cubes: list[DataCube], res: int | None = None) -> DataCube:
1103
+ """Concatenate cubes to one.
1104
+
1105
+ Args:
1106
+ cubes: A sequence of DataCubes.
1107
+ res: Spatial resolution.
1108
+
1109
+ Returns:
1110
+ The cubes combined to one.
1111
+ """
1112
+ if not all(isinstance(cube, DataCube) for cube in cubes):
1113
+ raise TypeError("cubes must be of type DataCube.")
1004
1114
 
1005
1115
  return DataCube(
1006
- list(itertools.chain.from_iterable([cube.data for cube in cube_list])), res=res
1116
+ list(itertools.chain.from_iterable([cube.data for cube in cubes])), res=res
1007
1117
  )
1008
1118
 
1009
1119
 
1010
- def clipmerge(cube: DataCube, mask, **kwargs) -> DataCube:
1011
- return merge(cube, bounds=mask, **kwargs)
1120
+ def _clipmerge(cube: DataCube, mask: Any, **kwargs) -> DataCube:
1121
+ return _merge(cube, bounds=mask, **kwargs)
1012
1122
 
1013
1123
 
1014
- def merge(
1124
+ def _merge(
1015
1125
  cube: DataCube,
1016
- by=None,
1017
- bounds=None,
1126
+ by: str | list[str] | None = None,
1127
+ bounds: Any | None = None,
1018
1128
  **kwargs,
1019
1129
  ) -> DataCube:
1020
1130
  if not all(r.array is None for r in cube):
@@ -1053,10 +1163,10 @@ def merge(
1053
1163
  )
1054
1164
 
1055
1165
 
1056
- def merge_by_bounds(
1166
+ def _merge_by_bounds(
1057
1167
  cube: DataCube,
1058
- by=None,
1059
- bounds=None,
1168
+ by: str | list[str] | None = None,
1169
+ bounds: Any | None = None,
1060
1170
  **kwargs,
1061
1171
  ) -> DataCube:
1062
1172
  if isinstance(by, str):
@@ -1064,9 +1174,9 @@ def merge_by_bounds(
1064
1174
  elif by is None:
1065
1175
  by = ["tile"]
1066
1176
  else:
1067
- by = by + ["tile"]
1177
+ by = list(by) + ["tile"]
1068
1178
 
1069
- return merge(
1179
+ return _merge(
1070
1180
  cube,
1071
1181
  by=by,
1072
1182
  bounds=bounds,
@@ -1074,65 +1184,26 @@ def merge_by_bounds(
1074
1184
  )
1075
1185
 
1076
1186
 
1077
- def _merge(cube, **kwargs) -> DataCube:
1187
+ def _merge(cube: DataCube, **kwargs) -> DataCube:
1078
1188
  if cube.crs is None:
1079
1189
  cube._crs = get_common_crs(cube.data)
1080
1190
 
1081
1191
  indexes = cube[0].indexes_as_tuple()
1082
1192
 
1083
- datasets = [load_raster(raster.path) for raster in cube]
1193
+ datasets = [_load_raster(raster.path) for raster in cube]
1084
1194
  array, transform = rasterio_merge.merge(datasets, indexes=indexes, **kwargs)
1085
1195
  cube.data = [Raster.from_array(array, crs=cube.crs, transform=transform)]
1086
1196
 
1087
1197
  return cube
1088
1198
 
1089
- if all(arr is None for arr in cube.arrays):
1090
- datasets = [load_raster(raster.path) for raster in cube]
1091
- array, transform = rasterio_merge.merge(datasets, indexes=indexes, **kwargs)
1092
- cube.data = [Raster.from_array(array, crs=cube.crs, transform=transform)]
1093
- return cube
1094
-
1095
- bounds = kwargs.pop("bounds", None)
1096
-
1097
- if bounds:
1098
- xarrays = [
1099
- r.to_xarray().transpose("y", "x")
1100
- for r in cube.explode()
1101
- if r.intersects(bounds)
1102
- ]
1103
- else:
1104
- xarrays = [r.to_xarray().transpose("y", "x") for r in cube.explode()]
1105
-
1106
- if len(xarrays) > 1:
1107
- merged = merge_arrays(
1108
- xarrays,
1109
- bounds=bounds,
1110
- res=cube.res,
1111
- nodata=cube.nodata,
1112
- **kwargs,
1113
- )
1114
- else:
1115
- try:
1116
- merged = xarrays[0]
1117
- except IndexError:
1118
- cube.data = []
1119
- return cube
1120
-
1121
- array = merged.to_numpy()
1122
-
1123
- raster = cube[0].__class__
1124
- out_bounds = bounds or cube.total_bounds
1125
- cube.data = [raster.from_array(array, bounds=out_bounds, crs=cube.crs)]
1126
-
1127
- return cube
1128
-
1129
1199
 
1130
- def load_raster(path):
1200
+ def _load_raster(path: str | Path) -> rasterio.io.DatasetReader:
1131
1201
  with opener(path) as file:
1132
1202
  return rasterio.open(file)
1133
1203
 
1134
1204
 
1135
1205
  def numpy_to_torch(array: np.ndarray) -> torch.Tensor:
1206
+ """Convert numpy array to a pytorch tensor."""
1136
1207
  # fix numpy dtypes which are not supported by pytorch tensors
1137
1208
  if array.dtype == np.uint16:
1138
1209
  array = array.astype(np.int32)
@@ -1142,7 +1213,8 @@ def numpy_to_torch(array: np.ndarray) -> torch.Tensor:
1142
1213
  return torch.tensor(array)
1143
1214
 
1144
1215
 
1145
- def cube_to_torch(cube: DataCube, query: BoundingBox):
1216
+ def cube_to_torchgeo(cube: DataCube, query: BoundingBox) -> TORCHGEO_RETURN_TYPE:
1217
+ """Convert a DayaCube to the type of dict returned from torchgeo datasets __getitem__."""
1146
1218
  bbox = shapely.box(*to_bbox(query))
1147
1219
  if cube.separate_files:
1148
1220
  cube = cube.sfilter(bbox).explode().load()