ssb-sgis 1.0.6__py3-none-any.whl → 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,6 @@ import datetime
2
2
  import functools
3
3
  import glob
4
4
  import itertools
5
- import math
6
5
  import os
7
6
  import random
8
7
  import re
@@ -11,6 +10,7 @@ from collections.abc import Callable
11
10
  from collections.abc import Iterable
12
11
  from collections.abc import Iterator
13
12
  from collections.abc import Sequence
13
+ from concurrent.futures import ThreadPoolExecutor
14
14
  from copy import deepcopy
15
15
  from dataclasses import dataclass
16
16
  from pathlib import Path
@@ -27,6 +27,7 @@ from affine import Affine
27
27
  from geopandas import GeoDataFrame
28
28
  from geopandas import GeoSeries
29
29
  from matplotlib.colors import LinearSegmentedColormap
30
+ from pandas.api.types import is_dict_like
30
31
  from rasterio.enums import MergeAlg
31
32
  from scipy import stats
32
33
  from scipy.ndimage import binary_dilation
@@ -88,10 +89,14 @@ except ImportError:
88
89
  from ..geopandas_tools.bounds import get_total_bounds
89
90
  from ..geopandas_tools.conversion import to_bbox
90
91
  from ..geopandas_tools.conversion import to_gdf
92
+ from ..geopandas_tools.conversion import to_geoseries
91
93
  from ..geopandas_tools.conversion import to_shapely
92
94
  from ..geopandas_tools.general import get_common_crs
95
+ from ..helpers import _fix_path
93
96
  from ..helpers import get_all_files
94
97
  from ..helpers import get_numpy_func
98
+ from ..helpers import is_method
99
+ from ..helpers import is_property
95
100
  from ..io._is_dapla import is_dapla
96
101
  from ..io.opener import opener
97
102
  from . import sentinel_config as config
@@ -101,7 +106,6 @@ from .base import _get_shape_from_bounds
101
106
  from .base import _get_transform_from_bounds
102
107
  from .base import get_index_mapper
103
108
  from .indices import ndvi
104
- from .regex import _any_regex_matches
105
109
  from .regex import _extract_regex_match_from_string
106
110
  from .regex import _get_first_group_match
107
111
  from .regex import _get_non_optional_groups
@@ -157,14 +161,22 @@ ALLOWED_INIT_KWARGS = [
157
161
  "band_class",
158
162
  "image_regexes",
159
163
  "filename_regexes",
160
- "bounds_regexes",
161
164
  "all_bands",
162
165
  "crs",
166
+ "backend",
163
167
  "masking",
164
168
  "_merged",
165
- "_add_metadata_attributes",
169
+ "date",
166
170
  ]
167
171
 
172
+ _load_counter: int = 0
173
+
174
+
175
+ def _get_child_paths_threaded(data: Sequence[str]) -> set[str]:
176
+ with ThreadPoolExecutor() as executor:
177
+ all_paths: Iterator[set[str]] = executor.map(_ls_func, data)
178
+ return set(itertools.chain.from_iterable(all_paths))
179
+
168
180
 
169
181
  class ImageCollectionGroupBy:
170
182
  """Iterator and merger class returned from groupby.
@@ -216,7 +228,6 @@ class ImageCollectionGroupBy:
216
228
 
217
229
  collection = ImageCollection(
218
230
  images,
219
- # TODO band_class?
220
231
  level=self.collection.level,
221
232
  **self.collection._common_init_kwargs,
222
233
  )
@@ -254,7 +265,6 @@ class ImageCollectionGroupBy:
254
265
 
255
266
  image = Image(
256
267
  bands,
257
- # TODO band_class?
258
268
  **self.collection._common_init_kwargs,
259
269
  )
260
270
  image._merged = True
@@ -284,49 +294,20 @@ class ImageCollectionGroupBy:
284
294
  return f"{self.__class__.__name__}({len(self)})"
285
295
 
286
296
 
287
- def standardize_band_id(x: str) -> str:
288
- return x.replace("B", "").replace("A", "").zfill(2)
289
-
290
-
291
- class BandIdDict(dict):
292
- """Dict that tells the band initialiser to get the dict value of the band_id."""
293
-
294
- def __init__(self, data: dict | None = None, **kwargs) -> None:
295
- """Add dicts or kwargs."""
296
- self._standardized_keys = {}
297
- for key, value in ((data or {}) | kwargs).items():
298
- setattr(self, key, value)
299
- self._standardized_keys[standardize_band_id(key)] = value
300
-
301
- def __len__(self) -> int:
302
- """Number of items."""
303
- return len({key for key in self.__dict__ if key != "_standardized_keys"})
304
-
305
- def __getitem__(self, item: str) -> Any:
306
- """Get dict value from key."""
307
- try:
308
- return getattr(self, item)
309
- except AttributeError as e:
310
- try:
311
- return self._standardized_keys[standardize_band_id(item)]
312
- except KeyError:
313
- raise KeyError(item, self.__dict__) from e
314
-
315
-
316
297
  @dataclass(frozen=True)
317
298
  class BandMasking:
318
299
  """Frozen dict with forced keys."""
319
300
 
320
301
  band_id: str
321
- values: tuple[int]
302
+ values: Sequence[int] | dict[int, Any]
322
303
 
323
304
  def __getitem__(self, item: str) -> Any:
324
305
  """Index into attributes to mimick dict."""
325
306
  return getattr(self, item)
326
307
 
327
308
 
328
- class NoLevel:
329
- """Equivelant to None."""
309
+ class None_:
310
+ """Default value for keyword arguments that should not have a default."""
330
311
 
331
312
 
332
313
  class _ImageBase:
@@ -335,53 +316,86 @@ class _ImageBase:
335
316
  metadata_attributes: ClassVar[dict | None] = None
336
317
  masking: ClassVar[BandMasking | None] = None
337
318
 
338
- def __init__(self, *, bbox=None, **kwargs) -> None:
319
+ def __init__(self, *, metadata=None, bbox=None, **kwargs) -> None:
339
320
 
340
321
  self._mask = None
341
322
  self._bounds = None
342
323
  self._merged = False
343
324
  self._from_array = False
344
- self._from_gdf = False
325
+ self._from_geopandas = False
345
326
  self.metadata_attributes = self.metadata_attributes or {}
346
327
  self._path = None
328
+ self._metadata_from_xml = False
347
329
 
348
330
  self._bbox = to_bbox(bbox) if bbox is not None else None
349
331
 
350
- if self.filename_regexes:
351
- if isinstance(self.filename_regexes, str):
352
- self.filename_regexes = (self.filename_regexes,)
353
- self.filename_patterns = [
354
- re.compile(regexes, flags=re.VERBOSE)
355
- for regexes in self.filename_regexes
356
- ]
357
- else:
358
- self.filename_patterns = ()
332
+ self.metadata = self._metadata_to_nested_dict(metadata)
359
333
 
360
- if self.image_regexes:
361
- if isinstance(self.image_regexes, str):
362
- self.image_regexes = (self.image_regexes,)
363
- self.image_patterns = [
364
- re.compile(regexes, flags=re.VERBOSE) for regexes in self.image_regexes
365
- ]
366
- else:
367
- self.image_patterns = ()
334
+ self.image_patterns = self._compile_regexes("image_regexes")
335
+ self.filename_patterns = self._compile_regexes("filename_regexes")
368
336
 
369
337
  for key, value in kwargs.items():
338
+ error_obj = ValueError(
339
+ f"{self.__class__.__name__} got an unexpected keyword argument '{key}'"
340
+ )
370
341
  if key in ALLOWED_INIT_KWARGS and key in dir(self):
371
- setattr(self, key, value)
342
+ if is_property(self, key):
343
+ setattr(self, f"_{key}", value)
344
+ elif is_method(self, key):
345
+ raise error_obj
346
+ else:
347
+ setattr(self, key, value)
372
348
  else:
373
- raise ValueError(
374
- f"{self.__class__.__name__} got an unexpected keyword argument '{key}'"
375
- )
349
+ raise error_obj
350
+
351
+ def _compile_regexes(self, regex_attr: str) -> tuple[re.Pattern]:
352
+ regexes = getattr(self, regex_attr)
353
+ if regexes:
354
+ if isinstance(regexes, str):
355
+ regexes = (regexes,)
356
+ return tuple(re.compile(regexes, flags=re.VERBOSE) for regexes in regexes)
357
+ return ()
358
+
359
+ @staticmethod
360
+ def _metadata_to_nested_dict(
361
+ metadata: str | Path | os.PathLike | dict | pd.DataFrame | None,
362
+ ) -> dict[str, dict[str, Any]] | None:
363
+ if metadata is None:
364
+ return {}
365
+ if isinstance(metadata, (str | Path | os.PathLike)):
366
+ metadata = _read_parquet_func(metadata)
367
+
368
+ if isinstance(metadata, pd.DataFrame):
369
+
370
+ def is_scalar(x) -> bool:
371
+ """Check if scalar because 'truth value of Series is ambigous'."""
372
+ return not hasattr(x, "__len__") or len(x) <= 1
373
+
374
+ def na_to_none(x) -> None:
375
+ """Convert to None rowwise because pandas doesn't always."""
376
+ return x if not (is_scalar(x) and pd.isna(x)) else None
377
+
378
+ # to nested dict because pandas indexing gives rare KeyError with long strings
379
+ metadata = {
380
+ _fix_path(path): {
381
+ attr: na_to_none(value) for attr, value in row.items()
382
+ }
383
+ for path, row in metadata.iterrows()
384
+ }
385
+ elif is_dict_like(metadata):
386
+ metadata = {_fix_path(path): value for path, value in metadata.items()}
387
+
388
+ return metadata
376
389
 
377
390
  @property
378
391
  def _common_init_kwargs(self) -> dict:
379
392
  return {
380
- "file_system": self.file_system,
381
393
  "processes": self.processes,
382
394
  "res": self.res,
383
395
  "bbox": self._bbox,
384
396
  "nodata": self.nodata,
397
+ "backend": self.backend,
398
+ "metadata": self.metadata,
385
399
  }
386
400
 
387
401
  @property
@@ -401,6 +415,14 @@ class _ImageBase:
401
415
  """Centerpoint of the object."""
402
416
  return self.union_all().centroid
403
417
 
418
+ def assign(self, **kwargs) -> "_ImageBase":
419
+ for key, value in kwargs.items():
420
+ try:
421
+ setattr(self, key, value)
422
+ except AttributeError:
423
+ setattr(self, f"_{key}", value)
424
+ return self
425
+
404
426
  def _name_regex_searcher(
405
427
  self, group: str, patterns: tuple[re.Pattern]
406
428
  ) -> str | None:
@@ -411,18 +433,28 @@ class _ImageBase:
411
433
  return _get_first_group_match(pat, self.name)[group]
412
434
  except (TypeError, KeyError):
413
435
  pass
436
+ if isinstance(self, Band):
437
+ for pat in patterns:
438
+ try:
439
+ return _get_first_group_match(
440
+ pat, str(Path(self.path).parent.name)
441
+ )[group]
442
+ except (TypeError, KeyError):
443
+ pass
414
444
  if not any(group in _get_non_optional_groups(pat) for pat in patterns):
415
445
  return None
446
+ band_text = (
447
+ f" or {Path(self.path).parent.name!s}" if isinstance(self, Band) else ""
448
+ )
416
449
  raise ValueError(
417
- f"Couldn't find group '{group}' in name {self.name} with regex patterns {patterns}"
450
+ f"Couldn't find group '{group}' in name {self.name}{band_text} with regex patterns {patterns}"
418
451
  )
419
452
 
420
- def _create_metadata_df(self, file_paths: list[str]) -> pd.DataFrame:
453
+ def _create_metadata_df(self, file_paths: Sequence[str]) -> pd.DataFrame:
421
454
  """Create a dataframe with file paths and image paths that match regexes."""
422
- df = pd.DataFrame({"file_path": file_paths})
455
+ df = pd.DataFrame({"file_path": list(file_paths)})
423
456
 
424
- df["file_path"] = df["file_path"].apply(_fix_path)
425
- df["filename"] = df["file_path"].apply(lambda x: Path(x).name)
457
+ df["file_name"] = df["file_path"].apply(lambda x: Path(x).name)
426
458
 
427
459
  df["image_path"] = df["file_path"].apply(
428
460
  lambda x: _fix_path(str(Path(x).parent))
@@ -434,20 +466,20 @@ class _ImageBase:
434
466
  df = df[~df["file_path"].isin(df["image_path"])]
435
467
 
436
468
  if self.filename_patterns:
437
- df = _get_regexes_matches_for_df(df, "filename", self.filename_patterns)
469
+ df = _get_regexes_matches_for_df(df, "file_name", self.filename_patterns)
438
470
 
439
471
  if not len(df):
440
472
  return df
441
473
 
442
474
  grouped = df.drop_duplicates("image_path").set_index("image_path")
443
- for col in ["file_path", "filename"]:
475
+ for col in ["file_path", "file_name"]:
444
476
  if col in df:
445
477
  grouped[col] = df.groupby("image_path")[col].apply(tuple)
446
478
 
447
479
  grouped = grouped.reset_index()
448
480
  else:
449
481
  df["file_path"] = df.groupby("image_path")["file_path"].apply(tuple)
450
- df["filename"] = df.groupby("image_path")["filename"].apply(tuple)
482
+ df["file_name"] = df.groupby("image_path")["file_name"].apply(tuple)
451
483
  grouped = df.drop_duplicates("image_path")
452
484
 
453
485
  grouped["imagename"] = grouped["image_path"].apply(
@@ -521,7 +553,7 @@ class _ImageBandBase(_ImageBase):
521
553
  return self._name
522
554
  try:
523
555
  return Path(self.path).name
524
- except (ValueError, AttributeError):
556
+ except (ValueError, AttributeError, TypeError):
525
557
  return None
526
558
 
527
559
  @name.setter
@@ -532,22 +564,31 @@ class _ImageBandBase(_ImageBase):
532
564
  def stem(self) -> str | None:
533
565
  try:
534
566
  return Path(self.path).stem
535
- except (AttributeError, ValueError):
567
+ except (AttributeError, ValueError, TypeError):
536
568
  return None
537
569
 
538
570
  @property
539
571
  def level(self) -> str:
540
572
  return self._name_regex_searcher("level", self.image_patterns)
541
573
 
542
- def _add_metadata_attributes(self):
574
+ def _get_metadata_attributes(self, metadata_attributes: dict) -> dict:
543
575
 
544
- missing_attributes = {}
545
- for key, value in self.metadata_attributes.items():
546
- if getattr(self, key) is None:
547
- missing_attributes[key] = value
576
+ self._metadata_from_xml = True
548
577
 
549
- if not missing_attributes:
550
- return
578
+ missing_metadata_attributes = {
579
+ key: value
580
+ for key, value in metadata_attributes.items()
581
+ if not hasattr(self, key) or getattr(self, key) is None
582
+ }
583
+
584
+ nonmissing_metadata_attributes = {
585
+ key: getattr(self, key)
586
+ for key in metadata_attributes
587
+ if key not in missing_metadata_attributes
588
+ }
589
+
590
+ if not missing_metadata_attributes:
591
+ return nonmissing_metadata_attributes
551
592
 
552
593
  file_contents: list[str] = []
553
594
  for path in self._all_file_paths:
@@ -556,7 +597,7 @@ class _ImageBandBase(_ImageBase):
556
597
  with _open_func(path, "rb") as file:
557
598
  file_contents.append(file.read().decode("utf-8"))
558
599
 
559
- for key, value in missing_attributes.items():
600
+ for key, value in missing_metadata_attributes.items():
560
601
  results = None
561
602
  for i, filetext in enumerate(file_contents):
562
603
  if isinstance(value, str) and value in dir(self):
@@ -586,19 +627,54 @@ class _ImageBandBase(_ImageBase):
586
627
  if i == len(self._all_file_paths) - 1:
587
628
  raise e
588
629
 
589
- if isinstance(results, BandIdDict) and isinstance(self, Band):
590
- results = results[self.band_id]
630
+ missing_metadata_attributes[key] = results
591
631
 
592
- setattr(self, key, results)
632
+ return missing_metadata_attributes | nonmissing_metadata_attributes
633
+
634
+ def _to_xarray(self, array: np.ndarray, transform: Affine) -> DataArray:
635
+ """Convert the raster to an xarray.DataArray."""
636
+ attrs = {"crs": self.crs}
637
+ for attr in set(self.metadata_attributes).union({"date"}):
638
+ try:
639
+ attrs[attr] = getattr(self, attr)
640
+ except Exception:
641
+ pass
642
+
643
+ if len(array.shape) == 2:
644
+ height, width = array.shape
645
+ dims = ["y", "x"]
646
+ elif len(array.shape) == 3:
647
+ height, width = array.shape[1:]
648
+ dims = ["band", "y", "x"]
649
+ elif not any(dim for dim in array.shape):
650
+ DataArray(
651
+ name=self.name or self.__class__.__name__,
652
+ attrs=attrs,
653
+ )
654
+ else:
655
+ raise ValueError(
656
+ f"Array should be 2 or 3 dimensional. Got shape {array.shape}"
657
+ )
658
+
659
+ coords = _generate_spatial_coords(transform, width, height)
660
+
661
+ return DataArray(
662
+ array,
663
+ coords=coords,
664
+ dims=dims,
665
+ name=self.name or self.__class__.__name__,
666
+ attrs=attrs,
667
+ )
593
668
 
594
669
 
595
670
  class Band(_ImageBandBase):
596
671
  """Band holding a single 2 dimensional array representing an image band."""
597
672
 
598
673
  cmap: ClassVar[str | None] = None
674
+ backend: str = "numpy"
599
675
 
600
676
  @classmethod
601
- def from_gdf(
677
+ def from_geopandas(
602
678
  cls,
603
679
  gdf: GeoDataFrame | GeoSeries,
604
680
  res: int,
@@ -622,18 +698,17 @@ class Band(_ImageBandBase):
622
698
  )
623
699
 
624
700
  obj = cls(arr, res=res, crs=gdf.crs, bounds=gdf.total_bounds, **kwargs)
625
- obj._from_gdf = True
701
+ obj._from_geopandas = True
626
702
  return obj
627
703
 
628
704
  def __init__(
629
705
  self,
630
- data: str | np.ndarray,
631
- res: int | None,
706
+ data: str | np.ndarray | None = None,
707
+ res: int | None_ = None_,
632
708
  crs: Any | None = None,
633
709
  bounds: tuple[float, float, float, float] | None = None,
634
710
  nodata: int | None = None,
635
711
  mask: "Band | None" = None,
636
- file_system: GCSFileSystem | None = None,
637
712
  processes: int = 1,
638
713
  name: str | None = None,
639
714
  band_id: str | None = None,
@@ -642,6 +717,16 @@ class Band(_ImageBandBase):
642
717
  **kwargs,
643
718
  ) -> None:
644
719
  """Band initialiser."""
720
+ if callable(res) and isinstance(res(), None_):
721
+ raise TypeError("Must specify 'res'")
722
+
723
+ if data is None:
724
+ # allowing 'path' to replace 'data' as argument
725
+ # to make the print repr. valid as initialiser
726
+ if "path" not in kwargs:
727
+ raise TypeError("Must specify either 'data' or 'path'.")
728
+ data = kwargs.pop("path")
729
+
645
730
  super().__init__(**kwargs)
646
731
 
647
732
  if isinstance(data, (str | Path | os.PathLike)) and any(
@@ -657,20 +742,13 @@ class Band(_ImageBandBase):
657
742
  self._bounds = bounds
658
743
  self._all_file_paths = all_file_paths
659
744
 
660
- self._image = None
661
-
662
- for key in self.metadata_attributes:
663
- setattr(self, key, None)
664
-
665
745
  if isinstance(data, np.ndarray):
666
- self.values = data
667
746
  if self._bounds is None:
668
747
  raise ValueError("Must specify bounds when data is an array.")
669
748
  self._crs = crs
670
- self.transform = _get_transform_from_bounds(
671
- self._bounds, shape=self.values.shape
672
- )
749
+ self.transform = _get_transform_from_bounds(self._bounds, shape=data.shape)
673
750
  self._from_array = True
751
+ self.values = data
674
752
 
675
753
  elif not isinstance(data, (str | Path | os.PathLike)):
676
754
  raise TypeError(
@@ -678,44 +756,48 @@ class Band(_ImageBandBase):
678
756
  f"Got {type(data)}"
679
757
  )
680
758
  else:
681
- self._path = str(data)
759
+ self._path = _fix_path(str(data))
682
760
 
683
761
  self._res = res
684
762
  if cmap is not None:
685
763
  self.cmap = cmap
686
- self.file_system = file_system
687
764
  self._name = name
688
765
  self._band_id = band_id
689
766
  self.processes = processes
690
767
 
691
- if (
692
- kwargs.get("_add_metadata_attributes", True)
693
- and self.metadata_attributes
694
- and self.path is not None
695
- ):
768
+ if self._all_file_paths:
769
+ self._all_file_paths = {_fix_path(path) for path in self._all_file_paths}
770
+ parent = _fix_path(Path(self.path).parent)
771
+ self._all_file_paths = {
772
+ path for path in self._all_file_paths if parent in path
773
+ }
774
+
775
+ if self.metadata:
776
+ if self.path is not None:
777
+ self.metadata = {
778
+ key: value
779
+ for key, value in self.metadata.items()
780
+ if key == self.path
781
+ }
782
+ this_metadata = self.metadata[self.path]
783
+ for key, value in this_metadata.items():
784
+ if key in dir(self):
785
+ setattr(self, f"_{key}", value)
786
+ else:
787
+ setattr(self, key, value)
788
+
789
+ elif self.metadata_attributes and self.path is not None and not self.is_mask:
696
790
  if self._all_file_paths is None:
697
791
  self._all_file_paths = _get_all_file_paths(str(Path(self.path).parent))
698
- self._add_metadata_attributes()
792
+ for key, value in self._get_metadata_attributes(
793
+ self.metadata_attributes
794
+ ).items():
795
+ setattr(self, key, value)
699
796
 
700
797
  def __lt__(self, other: "Band") -> bool:
701
798
  """Makes Bands sortable by band_id."""
702
799
  return self.band_id < other.band_id
703
800
 
704
- # def __getattribute__(self, attr: str) -> Any:
705
- # # try:
706
- # # value =
707
- # # except AttributeError:
708
- # # value = None
709
-
710
- # if (
711
- # attr in (super().__getattribute__("metadata_attributes") or {})
712
- # and super().__getattribute__(attr) is None
713
- # ):
714
- # if self._all_file_paths is None:
715
- # self._all_file_paths = _get_all_file_paths(str(Path(self.path).parent))
716
- # self._add_metadata_attributes()
717
- # return super().__getattribute__(attr)
718
-
719
801
  @property
720
802
  def values(self) -> np.ndarray:
721
803
  """The numpy array, if loaded."""
@@ -725,23 +807,35 @@ class Band(_ImageBandBase):
725
807
 
726
808
  @values.setter
727
809
  def values(self, new_val):
728
- if not isinstance(new_val, np.ndarray):
729
- raise TypeError(
730
- f"{self.__class__.__name__} 'values' must be np.ndarray. Got {type(new_val)}"
731
- )
732
- self._values = new_val
810
+ if self.backend == "numpy" and isinstance(new_val, np.ndarray):
811
+ self._values = new_val
812
+ return
813
+ elif self.backend == "xarray" and isinstance(new_val, DataArray):
814
+ # attrs can dissappear, so doing a union
815
+ attrs = self._values.attrs | new_val.attrs
816
+ self._values = new_val
817
+ self._values.attrs = attrs
818
+ return
819
+
820
+ if self.backend == "numpy":
821
+ self._values = self._to_numpy(new_val)
822
+ if self.backend == "xarray":
823
+ if not isinstance(self._values, DataArray):
824
+ self._values = self._to_xarray(
825
+ new_val,
826
+ transform=self.transform,
827
+ )
828
+
829
+ elif isinstance(new_val, np.ndarray):
830
+ self._values.values = new_val
831
+ else:
832
+ self._values = new_val
733
833
 
734
834
  @property
735
835
  def mask(self) -> "Band":
736
836
  """Mask Band."""
737
837
  return self._mask
738
838
 
739
- @mask.setter
740
- def mask(self, values: "Band") -> None:
741
- if values is not None and not isinstance(values, Band):
742
- raise TypeError(f"'mask' should be of type Band. Got {type(values)}")
743
- self._mask = values
744
-
745
839
  @property
746
840
  def band_id(self) -> str:
747
841
  """Band id."""
@@ -752,12 +846,18 @@ class Band(_ImageBandBase):
752
846
  @property
753
847
  def height(self) -> int:
754
848
  """Pixel heigth of the image band."""
755
- return self.values.shape[-2]
849
+ try:
850
+ return self.values.shape[-2]
851
+ except IndexError:
852
+ return 0
756
853
 
757
854
  @property
758
855
  def width(self) -> int:
759
856
  """Pixel width of the image band."""
760
- return self.values.shape[-1]
857
+ try:
858
+ return self.values.shape[-1]
859
+ except IndexError:
860
+ return 0
761
861
 
762
862
  @property
763
863
  def tile(self) -> str:
@@ -779,11 +879,11 @@ class Band(_ImageBandBase):
779
879
  )
780
880
 
781
881
  @property
782
- def crs(self) -> str | None:
882
+ def crs(self) -> pyproj.CRS | None:
783
883
  """Coordinate reference system."""
784
884
  if self._crs is None:
785
885
  self._add_crs_and_bounds()
786
- return self._crs
886
+ return pyproj.CRS(self._crs)
787
887
 
788
888
  @property
789
889
  def bounds(self) -> tuple[int, int, int, int] | None:
@@ -793,7 +893,7 @@ class Band(_ImageBandBase):
793
893
  return self._bounds
794
894
 
795
895
  def _add_crs_and_bounds(self) -> None:
796
- with opener(self.path, file_system=self.file_system) as file:
896
+ with opener(self.path) as file:
797
897
  with rasterio.open(file) as src:
798
898
  self._bounds = to_bbox(src.bounds)
799
899
  self._crs = src.crs
@@ -805,7 +905,7 @@ class Band(_ImageBandBase):
805
905
  copied = self.copy()
806
906
  value_must_be_at_least = np.sort(np.ravel(copied.values))[-n] - (precision or 0)
807
907
  copied._values = np.where(copied.values >= value_must_be_at_least, 1, 0)
808
- df = copied.to_gdf(column).loc[lambda x: x[column] == 1]
908
+ df = copied.to_geopandas(column).loc[lambda x: x[column] == 1]
809
909
  df[column] = f"largest_{n}"
810
910
  return df
811
911
 
@@ -816,48 +916,71 @@ class Band(_ImageBandBase):
816
916
  copied = self.copy()
817
917
  value_must_be_at_least = np.sort(np.ravel(copied.values))[n] - (precision or 0)
818
918
  copied._values = np.where(copied.values <= value_must_be_at_least, 1, 0)
819
- df = copied.to_gdf(column).loc[lambda x: x[column] == 1]
919
+ df = copied.to_geopandas(column).loc[lambda x: x[column] == 1]
820
920
  df[column] = f"smallest_{n}"
821
921
  return df
822
922
 
923
+ def clip(
924
+ self, mask: GeoDataFrame | GeoSeries | Polygon | MultiPolygon, **kwargs
925
+ ) -> "Band":
926
+ """Clip band values to geometry mask."""
927
+ if not self.height or not self.width:
928
+ return self
929
+
930
+ values = _clip_xarray(
931
+ self.to_xarray(),
932
+ mask,
933
+ crs=self.crs,
934
+ **kwargs,
935
+ )
936
+ self._bounds = to_bbox(mask)
937
+ self.transform = _get_transform_from_bounds(self._bounds, values.shape)
938
+ self.values = values
939
+ return self
940
+
823
941
  def load(
824
942
  self,
825
943
  bounds: tuple | Geometry | GeoDataFrame | GeoSeries | None = None,
826
944
  indexes: int | tuple[int] | None = None,
827
945
  masked: bool | None = None,
946
+ file_system=None,
828
947
  **kwargs,
829
948
  ) -> "Band":
830
949
  """Load and potentially clip the array.
831
950
 
832
951
  The array is stored in the 'values' property.
833
952
  """
953
+ global _load_counter
954
+ _load_counter += 1
955
+
834
956
  if masked is None:
835
957
  masked = True if self.mask is None else False
836
958
 
837
959
  bounds_was_none = bounds is None
838
960
 
839
- bounds = _get_bounds(bounds, self._bbox)
961
+ bounds = _get_bounds(bounds, self._bbox, self.union_all())
840
962
 
841
963
  should_return_empty: bool = bounds is not None and bounds.area == 0
842
964
  if should_return_empty:
843
965
  self._values = np.array([])
844
966
  if self.mask is not None and not self.is_mask:
845
- self._mask = self._mask.load()
967
+ self._mask = self._mask.load(
968
+ bounds=bounds, indexes=indexes, file_system=file_system
969
+ )
846
970
  self._bounds = None
847
971
  self.transform = None
848
- try:
849
- self._image._mask = self._mask
850
- except AttributeError:
851
- pass
972
+ self.values = self._values
973
+
852
974
  return self
853
975
 
854
976
  if self.has_array and bounds_was_none:
855
977
  return self
856
978
 
857
- # round down/up to integer to avoid precision trouble
858
979
  if bounds is not None:
859
980
  minx, miny, maxx, maxy = to_bbox(bounds)
860
- bounds = (int(minx), int(miny), math.ceil(maxx), math.ceil(maxy))
981
+ ## round down/up to integer to avoid precision trouble
982
+ # bounds = (int(minx), int(miny), math.ceil(maxx), math.ceil(maxy))
983
+ bounds = minx, miny, maxx, maxy
861
984
 
862
985
  if indexes is None:
863
986
  indexes = 1
@@ -868,126 +991,131 @@ class Band(_ImageBandBase):
868
991
  # allow setting a fixed out_shape for the array, in order to make mask same shape as values
869
992
  out_shape = kwargs.pop("out_shape", None)
870
993
 
871
- if self.has_array:
872
- self.values = _clip_loaded_array(
873
- self.values, bounds, self.transform, self.crs, out_shape, **kwargs
994
+ if self.has_array and [int(x) for x in bounds] != [int(x) for x in self.bounds]:
995
+ print(self)
996
+ print(self.mask)
997
+ print(self.values.shape)
998
+ print([int(x) for x in bounds], [int(x) for x in self.bounds])
999
+ raise ValueError(
1000
+ "Cannot re-load array with different bounds. "
1001
+ "Use .copy() to read with different bounds. "
1002
+ "Or .clip(mask) to clip."
874
1003
  )
875
- self._bounds = bounds
876
- self.transform = _get_transform_from_bounds(self._bounds, self.values.shape)
877
-
878
- else:
879
- with opener(self.path, file_system=self.file_system) as f:
880
- with rasterio.open(f, nodata=self.nodata) as src:
881
- self._res = int(src.res[0]) if not self.res else self.res
1004
+ # with opener(self.path, file_system=self.file_system) as f:
1005
+ with opener(self.path, file_system=file_system) as f:
1006
+ with rasterio.open(f, nodata=self.nodata) as src:
1007
+ self._res = int(src.res[0]) if not self.res else self.res
882
1008
 
883
- if self.nodata is None or np.isnan(self.nodata):
884
- self.nodata = src.nodata
885
- else:
886
- dtype_min_value = _get_dtype_min(src.dtypes[0])
887
- dtype_max_value = _get_dtype_max(src.dtypes[0])
888
- if (
889
- self.nodata > dtype_max_value
890
- or self.nodata < dtype_min_value
891
- ):
892
- src._dtypes = tuple(
893
- rasterio.dtypes.get_minimum_dtype(self.nodata)
894
- for _ in range(len(_indexes))
895
- )
896
-
897
- if bounds is None:
898
- if self._res != int(src.res[0]):
899
- if out_shape is None:
900
- out_shape = _get_shape_from_bounds(
901
- to_bbox(src.bounds), self.res, indexes
902
- )
903
- self.transform = _get_transform_from_bounds(
904
- to_bbox(src.bounds), shape=out_shape
905
- )
906
- else:
907
- self.transform = src.transform
908
-
909
- self._values = src.read(
910
- indexes=indexes,
911
- out_shape=out_shape,
912
- masked=masked,
913
- **kwargs,
914
- )
915
- else:
916
- window = rasterio.windows.from_bounds(
917
- *bounds, transform=src.transform
1009
+ if self.nodata is None or np.isnan(self.nodata):
1010
+ self.nodata = src.nodata
1011
+ else:
1012
+ dtype_min_value = _get_dtype_min(src.dtypes[0])
1013
+ dtype_max_value = _get_dtype_max(src.dtypes[0])
1014
+ if self.nodata > dtype_max_value or self.nodata < dtype_min_value:
1015
+ src._dtypes = tuple(
1016
+ rasterio.dtypes.get_minimum_dtype(self.nodata)
1017
+ for _ in range(len(_indexes))
918
1018
  )
919
1019
 
1020
+ if bounds is None:
1021
+ if self._res != int(src.res[0]):
920
1022
  if out_shape is None:
921
1023
  out_shape = _get_shape_from_bounds(
922
- bounds, self.res, indexes
1024
+ to_bbox(src.bounds), self.res, indexes
923
1025
  )
924
-
925
- self._values = src.read(
926
- indexes=indexes,
927
- window=window,
928
- boundless=False,
929
- out_shape=out_shape,
930
- masked=masked,
931
- **kwargs,
1026
+ self.transform = _get_transform_from_bounds(
1027
+ to_bbox(src.bounds), shape=out_shape
932
1028
  )
1029
+ else:
1030
+ self.transform = src.transform
933
1031
 
934
- assert out_shape == self._values.shape, (
935
- out_shape,
936
- self._values.shape,
937
- )
1032
+ values = src.read(
1033
+ indexes=indexes,
1034
+ out_shape=out_shape,
1035
+ masked=masked,
1036
+ **kwargs,
1037
+ )
1038
+ else:
1039
+ window = rasterio.windows.from_bounds(
1040
+ *bounds, transform=src.transform
1041
+ )
938
1042
 
1043
+ if out_shape is None:
1044
+ out_shape = _get_shape_from_bounds(bounds, self.res, indexes)
1045
+
1046
+ values = src.read(
1047
+ indexes=indexes,
1048
+ window=window,
1049
+ boundless=False,
1050
+ out_shape=out_shape,
1051
+ masked=masked,
1052
+ **kwargs,
1053
+ )
1054
+
1055
+ assert out_shape == values.shape, (
1056
+ out_shape,
1057
+ values.shape,
1058
+ )
1059
+
1060
+ width, height = values.shape[-2:]
1061
+
1062
+ if width and height:
939
1063
  self.transform = rasterio.transform.from_bounds(
940
- *bounds, self.width, self.height
1064
+ *bounds, width, height
941
1065
  )
942
- self._bounds = bounds
943
1066
 
944
- if self.nodata is not None and not np.isnan(self.nodata):
945
- if isinstance(self.values, np.ma.core.MaskedArray):
946
- self.values.data[self.values.data == src.nodata] = (
947
- self.nodata
948
- )
949
- else:
950
- self.values[self.values == src.nodata] = self.nodata
1067
+ if self.nodata is not None and not np.isnan(self.nodata):
1068
+ if isinstance(values, np.ma.core.MaskedArray):
1069
+ values.data[values.data == src.nodata] = self.nodata
1070
+ else:
1071
+ values[values == src.nodata] = self.nodata
951
1072
 
952
1073
  if self.masking and self.is_mask:
953
- self.values = np.isin(self.values, self.masking["values"])
1074
+ values = np.isin(values, list(self.masking["values"]))
954
1075
 
955
- elif self.mask is not None and not isinstance(
956
- self.values, np.ma.core.MaskedArray
957
- ):
958
- self.mask = self.mask.copy().load(
959
- bounds=bounds, indexes=indexes, out_shape=out_shape, **kwargs
960
- )
1076
+ elif self.mask is not None and not isinstance(values, np.ma.core.MaskedArray):
1077
+
1078
+ if not self.mask.has_array:
1079
+ self._mask = self.mask.load(
1080
+ bounds=bounds, indexes=indexes, out_shape=out_shape, **kwargs
1081
+ )
961
1082
  mask_arr = self.mask.values
962
1083
 
963
- self._values = np.ma.array(
964
- self._values, mask=mask_arr, fill_value=self.nodata
965
- )
1084
+ values = np.ma.array(values, mask=mask_arr, fill_value=self.nodata)
966
1085
 
967
- try:
968
- self._image._mask = self._mask
969
- except AttributeError:
970
- pass
1086
+ if bounds is not None:
1087
+ self._bounds = to_bbox(bounds)
1088
+
1089
+ self._values = values
1090
+ # trigger the setter
1091
+ self.values = values
971
1092
 
972
1093
  return self
973
1094
 
974
1095
  @property
975
1096
  def is_mask(self) -> bool:
976
1097
  """True if the band_id is equal to the masking band_id."""
1098
+ if self.masking is None:
1099
+ return False
977
1100
  return self.band_id == self.masking["band_id"]
978
1101
 
979
1102
  @property
980
1103
  def has_array(self) -> bool:
981
1104
  """Whether the array is loaded."""
982
1105
  try:
983
- if not isinstance(self.values, np.ndarray):
1106
+ if not isinstance(self.values, (np.ndarray | DataArray)):
984
1107
  raise ValueError()
985
1108
  return True
986
1109
  except ValueError: # also catches ArrayNotLoadedError
987
1110
  return False
988
1111
 
989
1112
  def write(
990
- self, path: str | Path, driver: str = "GTiff", compress: str = "LZW", **kwargs
1113
+ self,
1114
+ path: str | Path,
1115
+ driver: str = "GTiff",
1116
+ compress: str = "LZW",
1117
+ file_system=None,
1118
+ **kwargs,
991
1119
  ) -> None:
992
1120
  """Write the array as an image file."""
993
1121
  if not hasattr(self, "_values"):
@@ -1010,7 +1138,8 @@ class Band(_ImageBandBase):
1010
1138
  "width": self.width,
1011
1139
  } | kwargs
1012
1140
 
1013
- with opener(path, "wb", file_system=self.file_system) as f:
1141
+ # with opener(path, "wb", file_system=self.file_system) as f:
1142
+ with opener(path, "wb", file_system=file_system) as f:
1014
1143
  with rasterio.open(f, "w", **profile) as dst:
1015
1144
 
1016
1145
  if dst.nodata is None:
@@ -1032,17 +1161,14 @@ class Band(_ImageBandBase):
1032
1161
  if isinstance(self.values, np.ma.core.MaskedArray):
1033
1162
  dst.write_mask(self.values.mask)
1034
1163
 
1035
- self._path = str(path)
1164
+ self._path = _fix_path(str(path))
1036
1165
 
1037
1166
  def apply(self, func: Callable, **kwargs) -> "Band":
1038
- """Apply a function to the array."""
1039
- self.values = func(self.values, **kwargs)
1040
- return self
1041
-
1042
- def normalize(self) -> "Band":
1043
- """Normalize array values between 0 and 1."""
1044
- arr = self.values
1045
- self.values = (arr - np.min(arr)) / (np.max(arr) - np.min(arr))
1167
+ """Apply a function to the Band."""
1168
+ results = func(self, **kwargs)
1169
+ if isinstance(results, Band):
1170
+ return results
1171
+ self.values = results
1046
1172
  return self
1047
1173
 
1048
1174
  def sample(self, size: int = 1000, mask: Any = None, **kwargs) -> "Image":
@@ -1173,7 +1299,7 @@ class Band(_ImageBandBase):
1173
1299
  dropna=dropna,
1174
1300
  )
1175
1301
 
1176
- def to_gdf(self, column: str = "value") -> GeoDataFrame:
1302
+ def to_geopandas(self, column: str = "value") -> GeoDataFrame:
1177
1303
  """Create a GeoDataFrame from the image Band.
1178
1304
 
1179
1305
  Args:
@@ -1200,23 +1326,61 @@ class Band(_ImageBandBase):
1200
1326
  )
1201
1327
 
1202
1328
  def to_xarray(self) -> DataArray:
1203
- """Convert the raster to an xarray.DataArray."""
1204
- name = self.name or self.__class__.__name__.lower()
1205
- coords = _generate_spatial_coords(self.transform, self.width, self.height)
1206
- if len(self.values.shape) == 2:
1207
- dims = ["y", "x"]
1208
- elif len(self.values.shape) == 3:
1209
- dims = ["band", "y", "x"]
1210
- else:
1211
- raise ValueError("Array must be 2 or 3 dimensional.")
1212
- return xr.DataArray(
1329
+ """Convert the raster to an xarray.DataArray."""
1330
+ if self.backend == "xarray":
1331
+ return self.values
1332
+ return self._to_xarray(
1213
1333
  self.values,
1214
- coords=coords,
1215
- dims=dims,
1216
- name=name,
1217
- attrs={"crs": self.crs},
1334
+ transform=self.transform,
1335
+ # name=self.name or self.__class__.__name__.lower(),
1218
1336
  )
1219
1337
 
1338
+ def to_numpy(self) -> np.ndarray | np.ma.core.MaskedArray:
1339
+ """Convert the raster to a numpy.ndarray."""
1340
+ return self._to_numpy(self.values).copy()
1341
+
1342
+ def _to_numpy(
1343
+ self, arr: np.ndarray | DataArray, masked: bool = True
1344
+ ) -> np.ndarray | np.ma.core.MaskedArray:
1345
+ if not isinstance(arr, np.ndarray):
1346
+ mask_arr = None
1347
+ if masked:
1348
+ # if self.mask is not None:
1349
+ # print(self.mask.values.shape, arr.shape)
1350
+ # if self.mask is not None and self.mask.values.shape == arr.shape:
1351
+ # print("hei", self.mask.values.sum())
1352
+ # mask_arr = self.mask.values
1353
+ # else:
1354
+ # mask_arr = np.full(arr.shape, False)
1355
+ # try:
1356
+ # print("hei222", arr.isnull().values.sum())
1357
+ # mask_arr |= arr.isnull().values
1358
+ # except AttributeError:
1359
+ # pass
1360
+ # mask_arr = np.full(arr.shape, False)
1361
+ try:
1362
+ mask_arr = arr.isnull().values
1363
+ except AttributeError:
1364
+ pass
1365
+ try:
1366
+ arr = arr.to_numpy()
1367
+ except AttributeError:
1368
+ arr = arr.values
1369
+ if mask_arr is not None:
1370
+ arr = np.ma.array(arr, mask=mask_arr, fill_value=self.nodata)
1371
+
1372
+ if not isinstance(arr, np.ndarray):
1373
+ arr = np.array(arr)
1374
+
1375
+ if (
1376
+ masked
1377
+ and self.mask is not None
1378
+ and not self.is_mask
1379
+ and not isinstance(arr, np.ma.core.MaskedArray)
1380
+ ):
1381
+ arr = np.ma.array(arr, mask=mask_arr, fill_value=self.nodata)
1382
+ return arr
1383
+
1220
1384
  def __repr__(self) -> str:
1221
1385
  """String representation."""
1222
1386
  try:
@@ -1252,12 +1416,12 @@ class Image(_ImageBandBase):
1252
1416
  """Image consisting of one or more Bands."""
1253
1417
 
1254
1418
  band_class: ClassVar[Band] = Band
1419
+ backend: str = "numpy"
1255
1420
 
1256
1421
  def __init__(
1257
1422
  self,
1258
- data: str | Path | Sequence[Band],
1423
+ data: str | Path | Sequence[Band] | None = None,
1259
1424
  res: int | None = None,
1260
- file_system: GCSFileSystem | None = None,
1261
1425
  processes: int = 1,
1262
1426
  df: pd.DataFrame | None = None,
1263
1427
  nodata: int | None = None,
@@ -1265,44 +1429,38 @@ class Image(_ImageBandBase):
1265
1429
  **kwargs,
1266
1430
  ) -> None:
1267
1431
  """Image initialiser."""
1432
+ if data is None:
1433
+ # allowing 'bands' to replace 'data' as argument
1434
+ # to make the print repr. valid as initialiser
1435
+ if "bands" not in kwargs:
1436
+ raise TypeError("Must specify either 'data' or 'bands'.")
1437
+ data = kwargs.pop("bands")
1438
+
1268
1439
  super().__init__(**kwargs)
1269
1440
 
1270
1441
  self.nodata = nodata
1271
- self._res = res
1272
- self._crs = None
1273
- self.file_system = file_system
1274
1442
  self.processes = processes
1443
+ self._crs = None
1444
+ self._bands = None
1275
1445
 
1276
1446
  if hasattr(data, "__iter__") and all(isinstance(x, Band) for x in data):
1277
- self._bands = list(data)
1278
- if res is None:
1279
- res = list({band.res for band in self.bands})
1280
- if len(res) == 1:
1281
- self._res = res[0]
1282
- else:
1283
- raise ValueError(f"Different resolutions for the bands: {res}")
1284
- else:
1285
- self._res = res
1447
+ self._construct_image_from_bands(data, res)
1286
1448
  return
1287
-
1288
- if not isinstance(data, (str | Path | os.PathLike)):
1449
+ elif not isinstance(data, (str | Path | os.PathLike)):
1289
1450
  raise TypeError("'data' must be string, Path-like or a sequence of Band.")
1290
1451
 
1291
- self._bands = None
1292
- self._path = _fix_path(data) # str(data).rstrip("/").rstrip(r"\"")
1452
+ self._res = res
1453
+ self._path = _fix_path(data)
1293
1454
 
1294
1455
  if all_file_paths is None and self.path:
1295
1456
  self._all_file_paths = _get_all_file_paths(self.path)
1296
1457
  elif self.path:
1297
- self._all_file_paths = [
1298
- x for x in all_file_paths if self.path in _fix_path(x)
1299
- ]
1458
+ all_file_paths = {_fix_path(x) for x in all_file_paths}
1459
+ self._all_file_paths = {x for x in all_file_paths if self.path in x}
1300
1460
  else:
1301
1461
  self._all_file_paths = None
1302
1462
 
1303
1463
  if df is None:
1304
- # file_paths = _get_all_file_paths(self.path)
1305
-
1306
1464
  if not self._all_file_paths:
1307
1465
  self._all_file_paths = [self.path]
1308
1466
  df = self._create_metadata_df(self._all_file_paths)
@@ -1311,7 +1469,7 @@ class Image(_ImageBandBase):
1311
1469
 
1312
1470
  cols_to_explode = [
1313
1471
  "file_path",
1314
- "filename",
1472
+ "file_name",
1315
1473
  *[x for x in df if FILENAME_COL_SUFFIX in x],
1316
1474
  ]
1317
1475
  try:
@@ -1319,34 +1477,82 @@ class Image(_ImageBandBase):
1319
1477
  except ValueError:
1320
1478
  for col in cols_to_explode:
1321
1479
  df = df.explode(col)
1322
- df = df.loc[lambda x: ~x["filename"].duplicated()].reset_index(drop=True)
1480
+ df = df.loc[lambda x: ~x["file_name"].duplicated()].reset_index(drop=True)
1323
1481
 
1324
- df = df.loc[lambda x: x["image_path"] == _fix_path(self.path)]
1482
+ df = df.loc[lambda x: x["image_path"] == self.path]
1325
1483
 
1326
1484
  self._df = df
1327
1485
 
1486
+ if self.path is not None and self.metadata:
1487
+ self.metadata = {
1488
+ key: value for key, value in self.metadata.items() if self.path in key
1489
+ }
1490
+
1491
+ if self.metadata:
1492
+ try:
1493
+ metadata = self.metadata[self.path]
1494
+ except KeyError:
1495
+ metadata = {}
1496
+ for key, value in metadata.items():
1497
+ if key in dir(self):
1498
+ setattr(self, f"_{key}", value)
1499
+ else:
1500
+ setattr(self, key, value)
1501
+
1502
+ else:
1503
+ for key, value in self._get_metadata_attributes(
1504
+ self.metadata_attributes
1505
+ ).items():
1506
+ setattr(self, key, value)
1507
+
1508
+ def _construct_image_from_bands(
1509
+ self, data: Sequence[Band], res: int | None
1510
+ ) -> None:
1511
+ self._bands = list(data)
1512
+ if res is None:
1513
+ res = list({band.res for band in self.bands})
1514
+ if len(res) == 1:
1515
+ self._res = res[0]
1516
+ else:
1517
+ raise ValueError(f"Different resolutions for the bands: {res}")
1518
+ else:
1519
+ self._res = res
1328
1520
  for key in self.metadata_attributes:
1329
- setattr(self, key, None)
1521
+ band_values = {getattr(band, key) for band in self if hasattr(band, key)}
1522
+ band_values = {x for x in band_values if x is not None}
1523
+ if len(band_values) > 1:
1524
+ raise ValueError(f"Different {key} values in bands: {band_values}")
1525
+ elif len(band_values):
1526
+ try:
1527
+ setattr(self, key, next(iter(band_values)))
1528
+ except AttributeError:
1529
+ setattr(self, f"_{key}", next(iter(band_values)))
1330
1530
 
1331
- if self.metadata_attributes:
1332
- self._add_metadata_attributes()
1531
+ def copy(self) -> "Image":
1532
+ """Copy the instance and its attributes."""
1533
+ copied = super().copy()
1534
+ for band in copied:
1535
+ band._mask = copied._mask
1536
+ return copied
1333
1537
 
1334
- @property
1335
- def values(self) -> np.ndarray:
1336
- """3 dimensional numpy array."""
1337
- values = [band.values for band in self]
1338
- if self.mask is not None:
1339
- mask = [band.mask.values for band in self]
1340
- return np.ma.array(values, mask=mask, fill_value=self.nodata)
1341
- return np.array(values)
1342
-
1343
- def ndvi(self, red_band: str, nir_band: str, copy: bool = True) -> NDVIBand:
1538
+ def apply(self, func: Callable, **kwargs) -> "Image":
1539
+ """Apply a function to each band of the Image."""
1540
+ with joblib.Parallel(n_jobs=self.processes, backend="loky") as parallel:
1541
+ parallel(joblib.delayed(_band_apply)(band, func, **kwargs) for band in self)
1542
+
1543
+ return self
1544
+
1545
+ def ndvi(
1546
+ self, red_band: str, nir_band: str, padding: int = 0, copy: bool = True
1547
+ ) -> NDVIBand:
1344
1548
  """Calculate the NDVI for the Image."""
1345
1549
  copied = self.copy() if copy else self
1346
1550
  red = copied[red_band].load()
1347
1551
  nir = copied[nir_band].load()
1348
1552
 
1349
- arr: np.ndarray | np.ma.core.MaskedArray = ndvi(red.values, nir.values)
1553
+ arr: np.ndarray | np.ma.core.MaskedArray = ndvi(
1554
+ red.values, nir.values, padding=padding
1555
+ )
1350
1556
 
1351
1557
  return NDVIBand(
1352
1558
  arr,
@@ -1390,56 +1596,61 @@ class Image(_ImageBandBase):
1390
1596
 
1391
1597
  def to_xarray(self) -> DataArray:
1392
1598
  """Convert the raster to an xarray.DataArray."""
1393
- name = self.name or self.__class__.__name__.lower()
1394
- coords = _generate_spatial_coords(
1395
- self[0].transform, self[0].width, self[0].height
1396
- )
1397
- dims = ["band", "y", "x"]
1398
- return xr.DataArray(
1399
- self.values,
1400
- coords=coords,
1401
- dims=dims,
1402
- name=name,
1403
- attrs={"crs": self.crs},
1599
+ if self.backend == "xarray":
1600
+ return self.values
1601
+
1602
+ return self._to_xarray(
1603
+ np.array([band.values for band in self]),
1604
+ transform=self[0].transform,
1404
1605
  )
1405
1606
 
1406
1607
  @property
1407
1608
  def mask(self) -> Band | None:
1408
1609
  """Mask Band."""
1409
- if self._mask is not None:
1410
- # if not self._mask.has_array:
1411
- # try:
1412
- # self._mask.values = self[0]._mask.values
1413
- # except Exception:
1414
- # pass
1415
- return self._mask
1416
1610
  if self.masking is None:
1417
1611
  return None
1418
1612
 
1613
+ elif self._mask is not None:
1614
+ return self._mask
1615
+
1616
+ elif self._bands is not None and all(band.mask is not None for band in self):
1617
+ if len({id(band.mask) for band in self}) > 1:
1618
+ raise ValueError(
1619
+ "Image bands must have same mask.",
1620
+ {id(band.mask) for band in self},
1621
+ ) # TODO
1622
+ self._mask = next(
1623
+ iter([band.mask for band in self if band.mask is not None])
1624
+ )
1625
+ return self._mask
1626
+
1419
1627
  mask_band_id = self.masking["band_id"]
1420
- mask_paths = [path for path in self._df["file_path"] if mask_band_id in path]
1628
+ mask_paths = [path for path in self._all_file_paths if mask_band_id in path]
1421
1629
  if len(mask_paths) > 1:
1422
1630
  raise ValueError(
1423
1631
  f"Multiple file_paths match mask band_id {mask_band_id} for {self.path}"
1424
1632
  )
1425
1633
  elif not mask_paths:
1426
1634
  raise ValueError(
1427
- f"No file_paths match mask band_id {mask_band_id} for {self.path}"
1635
+ f"No file_paths match mask band_id {mask_band_id} for {self.path} among "
1636
+ + str([Path(x).name for x in _ls_func(self.path)])
1428
1637
  )
1638
+
1429
1639
  self._mask = self.band_class(
1430
1640
  mask_paths[0],
1431
- _add_metadata_attributes=False,
1432
1641
  **self._common_init_kwargs,
1433
1642
  )
1434
-
1643
+ if self._bands is not None:
1644
+ for band in self:
1645
+ band._mask = self._mask
1435
1646
  return self._mask
1436
1647
 
1437
1648
  @mask.setter
1438
- def mask(self, values: Band) -> None:
1649
+ def mask(self, values: Band | None) -> None:
1439
1650
  if values is None:
1440
1651
  self._mask = None
1441
1652
  for band in self:
1442
- band.mask = None
1653
+ band._mask = None
1443
1654
  return
1444
1655
  if not isinstance(values, Band):
1445
1656
  raise TypeError(f"mask must be Band. Got {type(values)}")
@@ -1449,7 +1660,7 @@ class Image(_ImageBandBase):
1449
1660
  band._mask = self._mask
1450
1661
  try:
1451
1662
  band.values = np.ma.array(
1452
- band.values, mask=mask_arr, fill_value=band.nodata
1663
+ band.values.data, mask=mask_arr, fill_value=band.nodata
1453
1664
  )
1454
1665
  except ArrayNotLoadedError:
1455
1666
  pass
@@ -1470,22 +1681,24 @@ class Image(_ImageBandBase):
1470
1681
  if self._bands is not None:
1471
1682
  return self._bands
1472
1683
 
1684
+ if self.masking:
1685
+ mask_band_id = self.masking["band_id"]
1686
+ paths = [path for path in self._df["file_path"] if mask_band_id not in path]
1687
+ else:
1688
+ paths = self._df["file_path"]
1689
+
1690
+ mask = self.mask
1691
+
1473
1692
  self._bands = [
1474
1693
  self.band_class(
1475
1694
  path,
1476
- mask=self.mask,
1477
- _add_metadata_attributes=False,
1695
+ mask=mask,
1696
+ all_file_paths=self._all_file_paths,
1478
1697
  **self._common_init_kwargs,
1479
1698
  )
1480
- for path in (self._df["file_path"])
1699
+ for path in paths
1481
1700
  ]
1482
1701
 
1483
- if self.masking:
1484
- mask_band_id = self.masking["band_id"]
1485
- self._bands = [
1486
- band for band in self._bands if mask_band_id not in band.path
1487
- ]
1488
-
1489
1702
  if (
1490
1703
  self.filename_patterns
1491
1704
  and any(_get_non_optional_groups(pat) for pat in self.filename_patterns)
@@ -1514,30 +1727,19 @@ class Image(_ImageBandBase):
1514
1727
  if self._should_be_sorted:
1515
1728
  self._bands = list(sorted(self._bands))
1516
1729
 
1517
- for key in self.metadata_attributes:
1518
- for band in self:
1519
- value = getattr(self, key)
1520
- if value is None:
1521
- continue
1522
- if isinstance(value, BandIdDict):
1523
- try:
1524
- value = value[band.band_id]
1525
- except KeyError:
1526
- continue
1527
- setattr(band, key, value)
1528
-
1529
- for band in self:
1530
- band._image = self
1531
-
1532
1730
  return self._bands
1533
1731
 
1534
1732
  @property
1535
1733
  def _should_be_sorted(self) -> bool:
1536
1734
  sort_groups = ["band", "band_id"]
1537
- return self.filename_patterns and any(
1538
- group in _get_non_optional_groups(pat)
1539
- for group in sort_groups
1540
- for pat in self.filename_patterns
1735
+ return (
1736
+ self.filename_patterns
1737
+ and any(
1738
+ group in _get_non_optional_groups(pat)
1739
+ for group in sort_groups
1740
+ for pat in self.filename_patterns
1741
+ )
1742
+ or all(band.band_id is not None for band in self)
1541
1743
  )
1542
1744
 
1543
1745
  @property
@@ -1581,10 +1783,10 @@ class Image(_ImageBandBase):
1581
1783
  bounds.append(band.bounds)
1582
1784
  return get_total_bounds(bounds)
1583
1785
 
1584
- def to_gdf(self, column: str = "value") -> GeoDataFrame:
1786
+ def to_geopandas(self, column: str = "value") -> GeoDataFrame:
1585
1787
  """Convert the array to a GeoDataFrame of grid polygons and values."""
1586
1788
  return pd.concat(
1587
- [band.to_gdf(column=column) for band in self], ignore_index=True
1789
+ [band.to_geopandas(column=column) for band in self], ignore_index=True
1588
1790
  )
1589
1791
 
1590
1792
  def sample(
@@ -1613,7 +1815,7 @@ class Image(_ImageBandBase):
1613
1815
  if isinstance(band, str):
1614
1816
  return self._get_band(band)
1615
1817
  if isinstance(band, int):
1616
- return self.bands[band] # .copy()
1818
+ return self.bands[band]
1617
1819
 
1618
1820
  copied = self.copy()
1619
1821
  try:
@@ -1639,10 +1841,7 @@ class Image(_ImageBandBase):
1639
1841
  try:
1640
1842
  return self.date < other.date
1641
1843
  except Exception as e:
1642
- print(self.path)
1643
- print(self.date)
1644
- print(other.path)
1645
- print(other.date)
1844
+ print("", self.path, self.date, other.path, other.date, sep="\n")
1646
1845
  raise e
1647
1846
 
1648
1847
  def __iter__(self) -> Iterator[Band]:
@@ -1702,36 +1901,36 @@ class ImageCollection(_ImageBase):
1702
1901
  image_class: ClassVar[Image] = Image
1703
1902
  band_class: ClassVar[Band] = Band
1704
1903
  _metadata_attribute_collection_type: ClassVar[type] = pd.Series
1904
+ backend: str = "numpy"
1705
1905
 
1706
1906
  def __init__(
1707
1907
  self,
1708
1908
  data: str | Path | Sequence[Image] | Sequence[str | Path],
1709
1909
  res: int,
1710
- level: str | None = NoLevel,
1910
+ level: str | None = None_,
1711
1911
  processes: int = 1,
1712
- file_system: GCSFileSystem | None = None,
1713
1912
  metadata: str | dict | pd.DataFrame | None = None,
1714
1913
  nodata: int | None = None,
1715
1914
  **kwargs,
1716
1915
  ) -> None:
1717
1916
  """Initialiser."""
1718
- super().__init__(**kwargs)
1917
+ if data is not None and kwargs.get("root"):
1918
+ root = _fix_path(kwargs.pop("root"))
1919
+ data = [f"{root}/{name}" for name in data]
1920
+ _from_root = True
1921
+ else:
1922
+ _from_root = False
1923
+
1924
+ super().__init__(metadata=metadata, **kwargs)
1925
+
1926
+ if callable(level) and isinstance(level(), None_):
1927
+ level = None
1719
1928
 
1720
1929
  self.nodata = nodata
1721
- self.level = level if not isinstance(level, NoLevel) else None
1930
+ self.level = level
1722
1931
  self.processes = processes
1723
- self.file_system = file_system
1724
1932
  self._res = res
1725
- self._band_ids = None
1726
- self._crs = None # crs
1727
-
1728
- if metadata is not None:
1729
- if isinstance(metadata, (str | Path | os.PathLike)):
1730
- self.metadata = _read_parquet_func(metadata)
1731
- else:
1732
- self.metadata = metadata
1733
- else:
1734
- self.metadata = metadata
1933
+ self._crs = None
1735
1934
 
1736
1935
  self._df = None
1737
1936
  self._all_file_paths = None
@@ -1743,18 +1942,22 @@ class ImageCollection(_ImageBase):
1743
1942
  self.images = [x.copy() for x in data]
1744
1943
  return
1745
1944
  elif all(isinstance(x, (str | Path | os.PathLike)) for x in data):
1746
- self._all_file_paths = list(
1747
- itertools.chain.from_iterable(
1748
- _get_all_file_paths(str(path)) for path in data
1749
- )
1750
- )
1751
- self._df = self._create_metadata_df([str(x) for x in data])
1945
+ # adding band paths (asuming 'data' is a sequence of image paths)
1946
+ try:
1947
+ self._all_file_paths = _get_child_paths_threaded(data) | set(data)
1948
+ except FileNotFoundError as e:
1949
+ if _from_root:
1950
+ raise TypeError(
1951
+ "When passing 'root', 'data' must be a sequence of image names that have 'root' as parent path."
1952
+ ) from e
1953
+ raise e
1954
+ self._df = self._create_metadata_df(self._all_file_paths)
1752
1955
  return
1753
1956
 
1754
1957
  if not isinstance(data, (str | Path | os.PathLike)):
1755
1958
  raise TypeError("'data' must be string, Path-like or a sequence of Image.")
1756
1959
 
1757
- self._path = str(data)
1960
+ self._path = _fix_path(str(data))
1758
1961
 
1759
1962
  self._all_file_paths = _get_all_file_paths(self.path)
1760
1963
 
@@ -1765,18 +1968,6 @@ class ImageCollection(_ImageBase):
1765
1968
 
1766
1969
  self._df = self._create_metadata_df(self._all_file_paths)
1767
1970
 
1768
- @property
1769
- def values(self) -> np.ndarray:
1770
- """4 dimensional numpy array."""
1771
- if isinstance(self[0].values, np.ma.core.MaskedArray):
1772
- return np.ma.array([img.values for img in self])
1773
- return np.array([img.values for img in self])
1774
-
1775
- @property
1776
- def mask(self) -> np.ndarray:
1777
- """4 dimensional numpy array."""
1778
- return np.array([img.mask.values for img in self])
1779
-
1780
1971
  def groupby(self, by: str | list[str], **kwargs) -> ImageCollectionGroupBy:
1781
1972
  """Group the Collection by Image or Band attribute(s)."""
1782
1973
  df = pd.DataFrame(
@@ -1830,15 +2021,20 @@ class ImageCollection(_ImageBase):
1830
2021
  for img in copied:
1831
2022
  assert len(img) == 1
1832
2023
  try:
1833
- img._path = img[0].path
2024
+ img._path = _fix_path(img[0].path)
1834
2025
  except PathlessImageError:
1835
2026
  pass
1836
2027
  return copied
1837
2028
 
1838
2029
  def apply(self, func: Callable, **kwargs) -> "ImageCollection":
1839
2030
  """Apply a function to all bands in each image of the collection."""
1840
- for img in self:
1841
- img._bands = [func(band, **kwargs) for band in img]
2031
+ with joblib.Parallel(n_jobs=self.processes, backend="loky") as parallel:
2032
+ parallel(
2033
+ joblib.delayed(_band_apply)(band, func, **kwargs)
2034
+ for img in self
2035
+ for band in img
2036
+ )
2037
+
1842
2038
  return self
1843
2039
 
1844
2040
  def get_unique_band_ids(self) -> list[str]:
@@ -1851,7 +2047,7 @@ class ImageCollection(_ImageBase):
1851
2047
  date_ranges: DATE_RANGES_TYPE = None,
1852
2048
  bbox: GeoDataFrame | GeoSeries | Geometry | tuple[float] | None = None,
1853
2049
  intersects: GeoDataFrame | GeoSeries | Geometry | tuple[float] | None = None,
1854
- max_cloud_coverage: int | None = None,
2050
+ max_cloud_cover: int | None = None,
1855
2051
  copy: bool = True,
1856
2052
  ) -> "ImageCollection":
1857
2053
  """Filter images and bands in the collection."""
@@ -1860,11 +2056,11 @@ class ImageCollection(_ImageBase):
1860
2056
  if date_ranges:
1861
2057
  copied = copied._filter_dates(date_ranges)
1862
2058
 
1863
- if max_cloud_coverage is not None:
2059
+ if max_cloud_cover is not None:
1864
2060
  copied.images = [
1865
2061
  image
1866
2062
  for image in copied.images
1867
- if image.cloud_coverage_percentage < max_cloud_coverage
2063
+ if image.cloud_cover_percentage < max_cloud_cover
1868
2064
  ]
1869
2065
 
1870
2066
  if bbox is not None:
@@ -1878,7 +2074,6 @@ class ImageCollection(_ImageBase):
1878
2074
  if isinstance(bands, str):
1879
2075
  bands = [bands]
1880
2076
  bands = set(bands)
1881
- copied._band_ids = bands
1882
2077
  copied.images = [img[bands] for img in copied.images if bands in img]
1883
2078
 
1884
2079
  return copied
@@ -1892,7 +2087,7 @@ class ImageCollection(_ImageBase):
1892
2087
  **kwargs,
1893
2088
  ) -> Band:
1894
2089
  """Merge all areas and all bands to a single Band."""
1895
- bounds = _get_bounds(bounds, self._bbox)
2090
+ bounds = _get_bounds(bounds, self._bbox, self.union_all())
1896
2091
  if bounds is not None:
1897
2092
  bounds = to_bbox(bounds)
1898
2093
 
@@ -1930,14 +2125,14 @@ class ImageCollection(_ImageBase):
1930
2125
  **kwargs,
1931
2126
  )
1932
2127
 
1933
- if isinstance(indexes, int) and len(arr.shape) == 3 and arr.shape[0] == 1:
1934
- arr = arr[0]
2128
+ if isinstance(indexes, int) and len(arr.shape) == 3 and arr.shape[0] == 1:
2129
+ arr = arr[0]
1935
2130
 
1936
- if method == "mean":
1937
- if as_int:
1938
- arr = arr // len(datasets)
1939
- else:
1940
- arr = arr / len(datasets)
2131
+ if method == "mean":
2132
+ if as_int:
2133
+ arr = arr // len(datasets)
2134
+ else:
2135
+ arr = arr / len(datasets)
1941
2136
 
1942
2137
  if bounds is None:
1943
2138
  bounds = self.bounds
@@ -1963,7 +2158,7 @@ class ImageCollection(_ImageBase):
1963
2158
  **kwargs,
1964
2159
  ) -> Image:
1965
2160
  """Merge all areas to a single tile, one band per band_id."""
1966
- bounds = _get_bounds(bounds, self._bbox)
2161
+ bounds = _get_bounds(bounds, self._bbox, self.union_all())
1967
2162
  if bounds is not None:
1968
2163
  bounds = to_bbox(bounds)
1969
2164
  bounds = self.bounds if bounds is None else bounds
@@ -2021,7 +2216,6 @@ class ImageCollection(_ImageBase):
2021
2216
  bounds=out_bounds,
2022
2217
  crs=crs,
2023
2218
  band_id=band_id,
2024
- _add_metadata_attributes=False,
2025
2219
  **self._common_init_kwargs,
2026
2220
  )
2027
2221
  )
@@ -2061,10 +2255,13 @@ class ImageCollection(_ImageBase):
2061
2255
  arr = np.array(
2062
2256
  [
2063
2257
  (
2064
- band.load(
2065
- bounds=(_bounds if _bounds is not None else None),
2066
- **kwargs,
2067
- )
2258
+ # band.load(
2259
+ # bounds=(_bounds if _bounds is not None else None),
2260
+ # **kwargs,
2261
+ # )
2262
+ # if not band.has_array
2263
+ # else
2264
+ band
2068
2265
  ).values
2069
2266
  for img in collection
2070
2267
  for band in img
@@ -2087,7 +2284,7 @@ class ImageCollection(_ImageBase):
2087
2284
  coords = _generate_spatial_coords(transform, width, height)
2088
2285
 
2089
2286
  arrs.append(
2090
- xr.DataArray(
2287
+ DataArray(
2091
2288
  arr,
2092
2289
  coords=coords,
2093
2290
  dims=["y", "x"],
@@ -2104,7 +2301,7 @@ class ImageCollection(_ImageBase):
2104
2301
  return merged.to_numpy()
2105
2302
 
2106
2303
  def sort_images(self, ascending: bool = True) -> "ImageCollection":
2107
- """Sort Images by date."""
2304
+ """Sort Images by date, then file path if date attribute is missing."""
2108
2305
  self._images = (
2109
2306
  list(sorted([img for img in self if img.date is not None]))
2110
2307
  + sorted(
@@ -2121,6 +2318,7 @@ class ImageCollection(_ImageBase):
2121
2318
  self,
2122
2319
  bounds: tuple | Geometry | GeoDataFrame | GeoSeries | None = None,
2123
2320
  indexes: int | tuple[int] | None = None,
2321
+ file_system=None,
2124
2322
  **kwargs,
2125
2323
  ) -> "ImageCollection":
2126
2324
  """Load all image Bands with threading."""
@@ -2130,10 +2328,46 @@ class ImageCollection(_ImageBase):
2130
2328
  and all(band.has_array for img in self for band in img)
2131
2329
  ):
2132
2330
  return self
2331
+
2332
+ # if self.processes == 1:
2333
+ # for img in self:
2334
+ # for band in img:
2335
+ # band.load(
2336
+ # bounds=bounds,
2337
+ # indexes=indexes,
2338
+ # file_system=file_system,
2339
+ # **kwargs,
2340
+ # )
2341
+ # return self
2342
+
2133
2343
  with joblib.Parallel(n_jobs=self.processes, backend="threading") as parallel:
2344
+ if self.masking:
2345
+ parallel(
2346
+ joblib.delayed(_load_band)(
2347
+ img.mask,
2348
+ bounds=bounds,
2349
+ indexes=indexes,
2350
+ file_system=file_system,
2351
+ **kwargs,
2352
+ )
2353
+ for img in self
2354
+ )
2355
+ for img in self:
2356
+ for band in img:
2357
+ band._mask = img.mask
2358
+
2359
+ # print({img.mask.has_array for img in self })
2360
+ # print({band.mask.has_array for img in self for band in img})
2361
+
2362
+ # with joblib.Parallel(n_jobs=self.processes, backend="threading") as parallel:
2363
+
2134
2364
  parallel(
2135
2365
  joblib.delayed(_load_band)(
2136
- band, bounds=bounds, indexes=indexes, **kwargs
2366
+ band,
2367
+ bounds=bounds,
2368
+ indexes=indexes,
2369
+ file_system=file_system,
2370
+ **kwargs,
2137
2371
  )
2138
2372
  for img in self
2139
2373
  for band in img
@@ -2141,6 +2375,27 @@ class ImageCollection(_ImageBase):
2141
2375
 
2142
2376
  return self
2143
2377
 
2378
+ def clip(
2379
+ self,
2380
+ mask: Geometry | GeoDataFrame | GeoSeries,
2381
+ **kwargs,
2382
+ ) -> "ImageCollection":
2383
+ """Clip all image Bands with 'loky'."""
2384
+ if self.processes == 1:
2385
+ for img in self:
2386
+ for band in img:
2387
+ band.clip(mask, **kwargs)
2388
+ return self
2389
+
2390
+ with joblib.Parallel(n_jobs=self.processes, backend="loky") as parallel:
2391
+ parallel(
2392
+ joblib.delayed(_clip_band)(band, mask, **kwargs)
2393
+ for img in self
2394
+ for band in img
2395
+ )
2396
+
2397
+ return self
2398
+
2144
2399
  def _set_bbox(
2145
2400
  self, bbox: GeoDataFrame | GeoSeries | Geometry | tuple[float]
2146
2401
  ) -> "ImageCollection":
@@ -2150,12 +2405,17 @@ class ImageCollection(_ImageBase):
2150
2405
  if self._images is not None:
2151
2406
  for img in self._images:
2152
2407
  img._bbox = self._bbox
2408
+ if img.mask is not None:
2409
+ img.mask._bbox = self._bbox
2153
2410
  if img.bands is None:
2154
2411
  continue
2155
2412
  for band in img:
2156
2413
  band._bbox = self._bbox
2157
2414
  bounds = box(*band._bbox).intersection(box(*band.bounds))
2158
2415
  band._bounds = to_bbox(bounds) if not bounds.is_empty else None
2416
+ if band.mask is not None:
2417
+ band.mask._bbox = self._bbox
2418
+ band.mask._bounds = band._bounds
2159
2419
 
2160
2420
  return self
2161
2421
 
@@ -2184,11 +2444,15 @@ class ImageCollection(_ImageBase):
2184
2444
 
2185
2445
  other = to_shapely(other)
2186
2446
 
2187
- # intersects_list = GeoSeries([img.union_all() for img in self]).intersects(other)
2188
- with joblib.Parallel(n_jobs=self.processes, backend="loky") as parallel:
2189
- intersects_list: list[bool] = parallel(
2190
- joblib.delayed(_intesects)(image, other) for image in self
2191
- )
2447
+ if self.processes == 1:
2448
+ intersects_list: pd.Series = GeoSeries(
2449
+ [img.union_all() for img in self]
2450
+ ).intersects(other)
2451
+ else:
2452
+ with joblib.Parallel(n_jobs=self.processes, backend="loky") as parallel:
2453
+ intersects_list: list[bool] = parallel(
2454
+ joblib.delayed(_intesects)(image, other) for image in self
2455
+ )
2192
2456
 
2193
2457
  self.images = [
2194
2458
  image
@@ -2197,39 +2461,70 @@ class ImageCollection(_ImageBase):
2197
2461
  ]
2198
2462
  return self
2199
2463
 
2200
- def to_xarray(self, **kwargs) -> DataArray:
2201
- """Convert the raster to an xarray.DataArray."""
2202
- # arrs = []
2203
- # for img in self:
2204
- # for band in img:
2205
- # arr = band.load(**kwargs).values
2206
- # arrs.append(arr)
2207
-
2208
- # n_images = len(self)
2209
- # n_bands = len(img)
2210
- # height, width = arr.shape
2211
-
2212
- # arr_4d = np.array(arrs).reshape(n_images, n_bands, height, width)
2464
+ def to_xarray(
2465
+ self,
2466
+ **kwargs,
2467
+ ) -> Dataset:
2468
+ """Convert the raster to an xarray.Dataset.
2213
2469
 
2214
- try:
2215
- name = Path(self.path).stem
2216
- except TypeError:
2217
- name = self.__class__.__name__.lower()
2470
+ Images are converted to 2d arrays for each unique bounds.
2471
+ The spatial dimensions will be labeled "x" and "y". The third
2472
+ dimension defaults to "date" if all images have date attributes.
2473
+ Otherwise defaults to the image name.
2474
+ """
2475
+ if any(not band.has_array for img in self for band in img):
2476
+ raise ValueError("Arrays must be loaded.")
2477
+
2478
+ # if by is None:
2479
+ if all(img.date for img in self):
2480
+ by = ["date"]
2481
+ elif not pd.Index([img.name for img in self]).is_unique:
2482
+ raise ValueError("Images must have unique names.")
2483
+ else:
2484
+ by = ["name"]
2485
+ # elif isinstance(by, str):
2486
+ # by = [by]
2487
+
2488
+ xarrs: dict[str, DataArray] = {}
2489
+ for (bounds, band_id), collection in self.groupby(["bounds", "band_id"]):
2490
+ name = f"{band_id}_{'-'.join(str(int(x)) for x in bounds)}"
2491
+ first_band = collection[0][0]
2492
+ coords = _generate_spatial_coords(
2493
+ first_band.transform, first_band.width, first_band.height
2494
+ )
2495
+ values = np.array([band.to_numpy() for img in collection for band in img])
2496
+ assert len(values) == len(collection)
2497
+
2498
+ # coords["band_id"] = [
2499
+ # band.band_id or i for i, band in enumerate(collection[0])
2500
+ # ]
2501
+ for attr in by:
2502
+ coords[attr] = [getattr(img, attr) for img in collection]
2503
+ # coords["band"] = band_id #
2504
+
2505
+ dims = [*by, "y", "x"]
2506
+ # dims = ["band", "y", "x"]
2507
+ # dims = {}
2508
+ # for attr in by:
2509
+ # dims[attr] = [getattr(img, attr) for img in collection]
2510
+
2511
+ xarrs[name] = DataArray(
2512
+ values,
2513
+ coords=coords,
2514
+ dims=dims,
2515
+ # name=name,
2516
+ name=band_id,
2517
+ attrs={
2518
+ "crs": collection.crs,
2519
+ "band_id": band_id,
2520
+ }, # , "bounds": bounds},
2521
+ **kwargs,
2522
+ )
2218
2523
 
2219
- first_band = self[0][0]
2220
- coords = _generate_spatial_coords(
2221
- first_band.transform, first_band.width, first_band.height
2222
- )
2223
- dims = ["image", "band", "y", "x"]
2224
- return xr.DataArray(
2225
- self.values,
2226
- coords=coords,
2227
- dims=dims,
2228
- name=name,
2229
- attrs={"crs": self.crs},
2230
- )
2524
+ return xr.combine_by_coords(list(xarrs.values()))
2525
+ # return Dataset(xarrs)
2231
2526
 
2232
- def to_gdfs(self, column: str = "value") -> dict[str, GeoDataFrame]:
2527
+ def to_geopandas(self, column: str = "value") -> dict[str, GeoDataFrame]:
2233
2528
  """Convert each band in each Image to a GeoDataFrame."""
2234
2529
  out = {}
2235
2530
  i = 0
@@ -2241,10 +2536,8 @@ class ImageCollection(_ImageBase):
2241
2536
  except AttributeError:
2242
2537
  name = f"{self.__class__.__name__}({i})"
2243
2538
 
2244
- # band.load()
2245
-
2246
2539
  if name not in out:
2247
- out[name] = band.to_gdf(column=column)
2540
+ out[name] = band.to_geopandas(column=column)
2248
2541
  return out
2249
2542
 
2250
2543
  def sample(self, n: int = 1, size: int = 500) -> "ImageCollection":
@@ -2384,36 +2677,22 @@ class ImageCollection(_ImageBase):
2384
2677
  masking=self.masking,
2385
2678
  **self._common_init_kwargs,
2386
2679
  )
2680
+
2387
2681
  if self.masking is not None:
2388
2682
  images = []
2389
2683
  for image in self._images:
2684
+ # TODO why this loop?
2390
2685
  try:
2391
2686
  if not isinstance(image.mask, Band):
2392
2687
  raise ValueError()
2393
2688
  images.append(image)
2394
- except ValueError:
2689
+ except ValueError as e:
2690
+ raise e
2395
2691
  continue
2396
2692
  self._images = images
2397
2693
  for image in self._images:
2398
2694
  image._bands = [band for band in image if band.band_id is not None]
2399
2695
 
2400
- if self.metadata is not None:
2401
- attributes_to_add = ["crs", "bounds"] + list(self.metadata_attributes)
2402
- for img in self:
2403
- for band in img:
2404
- for key in attributes_to_add:
2405
- try:
2406
- value = self.metadata[band.path][key]
2407
- except KeyError:
2408
- try:
2409
- value = self.metadata[key][band.path]
2410
- except KeyError:
2411
- continue
2412
- try:
2413
- setattr(band, key, value)
2414
- except Exception:
2415
- setattr(band, f"_{key}", value)
2416
-
2417
2696
  self._images = [img for img in self if len(img)]
2418
2697
 
2419
2698
  if self._should_be_sorted:
@@ -2438,7 +2717,7 @@ class ImageCollection(_ImageBase):
2438
2717
  and sort_group in _get_non_optional_groups(pat)
2439
2718
  for pat in self.image_patterns
2440
2719
  )
2441
- or all(img.date is not None for img in self)
2720
+ or all(getattr(img, sort_group) is not None for img in self)
2442
2721
  )
2443
2722
 
2444
2723
  @images.setter
@@ -2449,7 +2728,18 @@ class ImageCollection(_ImageBase):
2449
2728
 
2450
2729
  def __repr__(self) -> str:
2451
2730
  """String representation."""
2452
- return f"{self.__class__.__name__}({len(self)}, path='{self.path}')"
2731
+ root = ""
2732
+ if self.path is not None:
2733
+ data = f"'{self.path}'"
2734
+ elif all(img.path is not None for img in self):
2735
+ data = [img.path for img in self]
2736
+ parents = {str(Path(path).parent) for path in data}
2737
+ if len(parents) == 1:
2738
+ data = [Path(path).name for path in data]
2739
+ root = f" root='{next(iter(parents))}',"
2740
+ else:
2741
+ data = [img for img in self]
2742
+ return f"{self.__class__.__name__}({data},{root} res={self.res}, level='{self.level}')"
2453
2743
 
2454
2744
  def union_all(self) -> Polygon | MultiPolygon:
2455
2745
  """(Multi)Polygon representing the union of all image bounds."""
@@ -2500,12 +2790,8 @@ class ImageCollection(_ImageBase):
2500
2790
 
2501
2791
  alpha = 1 - p
2502
2792
 
2503
- # for img in self:
2504
- # for band in img:
2505
- # band.load()
2506
-
2507
2793
  for group_values, subcollection in self.groupby(by):
2508
- print("group_values:", *group_values)
2794
+ print("subcollection group values:", group_values)
2509
2795
 
2510
2796
  if "date" in x_var and subcollection._should_be_sorted:
2511
2797
  subcollection._images = list(sorted(subcollection._images))
@@ -2519,6 +2805,7 @@ class ImageCollection(_ImageBase):
2519
2805
  for band in img
2520
2806
  ]
2521
2807
  )
2808
+ first_date = pd.Timestamp(x[0])
2522
2809
  x = (
2523
2810
  pd.to_datetime(
2524
2811
  [band.date[:8] for img in subcollection for band in img]
@@ -2611,6 +2898,23 @@ class ImageCollection(_ImageBase):
2611
2898
  )
2612
2899
  plt.xlabel(x_var)
2613
2900
  plt.ylabel(y_label)
2901
+
2902
+ if x_var == "date":
2903
+ date_labels = pd.to_datetime(
2904
+ [first_date + pd.Timedelta(days=int(day)) for day in this_x]
2905
+ )
2906
+
2907
+ _, unique_indices = np.unique(
2908
+ date_labels.strftime("%Y-%m"), return_index=True
2909
+ )
2910
+
2911
+ unique_x = np.array(this_x)[unique_indices]
2912
+ unique_labels = date_labels[unique_indices].strftime("%Y-%m")
2913
+
2914
+ ax.set_xticks(unique_x)
2915
+ ax.set_xticklabels(unique_labels, rotation=45, ha="right")
2916
+ # ax.tick_params(axis="x", length=10, width=2)
2917
+
2614
2918
  plt.show()
2615
2919
 
2616
2920
 
@@ -2629,10 +2933,7 @@ class Sentinel2Config:
2629
2933
  """Holder of Sentinel 2 regexes, band_ids etc."""
2630
2934
 
2631
2935
  image_regexes: ClassVar[str] = (config.SENTINEL2_IMAGE_REGEX,)
2632
- filename_regexes: ClassVar[str] = (
2633
- config.SENTINEL2_FILENAME_REGEX,
2634
- config.SENTINEL2_CLOUD_FILENAME_REGEX,
2635
- )
2936
+ filename_regexes: ClassVar[str] = (config.SENTINEL2_FILENAME_REGEX,)
2636
2937
  metadata_attributes: ClassVar[
2637
2938
  dict[str, Callable | functools.partial | tuple[str]]
2638
2939
  ] = {
@@ -2640,22 +2941,69 @@ class Sentinel2Config:
2640
2941
  _extract_regex_match_from_string,
2641
2942
  regexes=(r"<PROCESSING_BASELINE>(.*?)</PROCESSING_BASELINE>",),
2642
2943
  ),
2643
- "cloud_coverage_percentage": "_get_cloud_coverage_percentage",
2644
- "is_refined": functools.partial(
2645
- _any_regex_matches, regexes=(r'<Image_Refining flag="REFINED">',)
2646
- ),
2647
- "boa_add_offset": "_get_boa_add_offset_dict",
2944
+ "cloud_cover_percentage": "_get_cloud_cover_percentage",
2945
+ "is_refined": "_get_image_refining_flag",
2946
+ "boa_quantification_value": "_get_boa_quantification_value",
2648
2947
  }
2649
- all_bands: ClassVar[list[str]] = list(config.SENTINEL2_BANDS)
2650
- rbg_bands: ClassVar[list[str]] = config.SENTINEL2_RBG_BANDS
2651
- ndvi_bands: ClassVar[list[str]] = config.SENTINEL2_NDVI_BANDS
2652
- l2a_bands: ClassVar[dict[str, int]] = config.SENTINEL2_L2A_BANDS
2653
- l1c_bands: ClassVar[dict[str, int]] = config.SENTINEL2_L1C_BANDS
2948
+ l1c_bands: ClassVar[set[str]] = {
2949
+ "B01": 60,
2950
+ "B02": 10,
2951
+ "B03": 10,
2952
+ "B04": 10,
2953
+ "B05": 20,
2954
+ "B06": 20,
2955
+ "B07": 20,
2956
+ "B08": 10,
2957
+ "B8A": 20,
2958
+ "B09": 60,
2959
+ "B10": 60,
2960
+ "B11": 20,
2961
+ "B12": 20,
2962
+ }
2963
+ l2a_bands: ClassVar[set[str]] = {
2964
+ key: res for key, res in l1c_bands.items() if key != "B10"
2965
+ }
2966
+ all_bands: ClassVar[set[str]] = l1c_bands
2967
+ rbg_bands: ClassVar[tuple[str]] = ("B04", "B02", "B03")
2968
+ ndvi_bands: ClassVar[tuple[str]] = ("B04", "B08")
2654
2969
  masking: ClassVar[BandMasking] = BandMasking(
2655
- band_id="SCL", values=(3, 8, 9, 10, 11)
2970
+ band_id="SCL",
2971
+ values={
2972
+ 2: "Topographic casted shadows",
2973
+ 3: "Cloud shadows",
2974
+ 8: "Cloud medium probability",
2975
+ 9: "Cloud high probability",
2976
+ 10: "Thin cirrus",
2977
+ 11: "Snow or ice",
2978
+ },
2656
2979
  )
2657
2980
 
2658
- def _get_cloud_coverage_percentage(self, xml_file: str) -> float:
2981
+ def _get_image_refining_flag(self, xml_file: str) -> bool:
2982
+ match_ = re.search(
2983
+ r'Image_Refining flag="(?:REFINED|NOT_REFINED)"',
2984
+ xml_file,
2985
+ )
2986
+ if match_ is None:
2987
+ raise _RegexError()
2988
+
2989
+ if "NOT_REFINED" in match_.group(0):
2990
+ return False
2991
+ elif "REFINED" in match_.group(0):
2992
+ return True
2993
+ else:
2994
+ raise _RegexError()
2995
+
2996
+ def _get_boa_quantification_value(self, xml_file: str) -> int:
2997
+ return int(
2998
+ _extract_regex_match_from_string(
2999
+ xml_file,
3000
+ (
3001
+ r'<BOA_QUANTIFICATION_VALUE unit="none">-?(\d+)</BOA_QUANTIFICATION_VALUE>',
3002
+ ),
3003
+ )
3004
+ )
3005
+
3006
+ def _get_cloud_cover_percentage(self, xml_file: str) -> float:
2659
3007
  return float(
2660
3008
  _extract_regex_match_from_string(
2661
3009
  xml_file,
@@ -2666,7 +3014,35 @@ class Sentinel2Config:
2666
3014
  )
2667
3015
  )
2668
3016
 
2669
- def _get_boa_add_offset_dict(self, xml_file: str) -> BandIdDict:
3017
+
3018
+ class Sentinel2CloudlessConfig(Sentinel2Config):
3019
+ """Holder of regexes, band_ids etc. for Sentinel 2 cloudless mosaic."""
3020
+
3021
+ image_regexes: ClassVar[str] = (config.SENTINEL2_MOSAIC_IMAGE_REGEX,)
3022
+ filename_regexes: ClassVar[str] = (config.SENTINEL2_MOSAIC_FILENAME_REGEX,)
3023
+ masking: ClassVar[None] = None
3024
+ all_bands: ClassVar[list[str]] = [
3025
+ x.replace("B0", "B") for x in Sentinel2Config.all_bands
3026
+ ]
3027
+ rbg_bands: ClassVar[dict[str, str]] = {
3028
+ key.replace("B0", "B") for key in Sentinel2Config.rbg_bands
3029
+ }
3030
+ ndvi_bands: ClassVar[dict[str, str]] = {
3031
+ key.replace("B0", "B") for key in Sentinel2Config.ndvi_bands
3032
+ }
3033
+
3034
+
3035
+ class Sentinel2Band(Sentinel2Config, Band):
3036
+ """Band with Sentinel2 specific name variables and regexes."""
3037
+
3038
+ metadata_attributes = Sentinel2Config.metadata_attributes | {
3039
+ "boa_add_offset": "_get_boa_add_offset_dict",
3040
+ }
3041
+
3042
+ def _get_boa_add_offset_dict(self, xml_file: str) -> int | None:
3043
+ if self.is_mask:
3044
+ return None
3045
+
2670
3046
  pat = re.compile(
2671
3047
  r"""
2672
3048
  <BOA_ADD_OFFSET\s*
@@ -2683,30 +3059,39 @@ class Sentinel2Config:
2683
3059
  raise _RegexError(f"Could not find boa_add_offset info from {pat}") from e
2684
3060
  if not matches:
2685
3061
  raise _RegexError(f"Could not find boa_add_offset info from {pat}")
2686
- return BandIdDict(
3062
+
3063
+ dict_ = (
2687
3064
  pd.DataFrame(matches).set_index("band_id")["value"].astype(int).to_dict()
2688
3065
  )
2689
3066
 
3067
+ # some xml files have band ids in range index form
3068
+ # converting these to actual band ids (B01 etc.)
3069
+ is_integer_coded = [int(i) for i in dict_] == list(range(len(dict_)))
2690
3070
 
2691
- class Sentinel2CloudlessConfig(Sentinel2Config):
2692
- """Holder of regexes, band_ids etc. for Sentinel 2 cloudless mosaic."""
2693
-
2694
- image_regexes: ClassVar[str] = (config.SENTINEL2_MOSAIC_IMAGE_REGEX,)
2695
- filename_regexes: ClassVar[str] = (config.SENTINEL2_MOSAIC_FILENAME_REGEX,)
2696
- masking: ClassVar[None] = None
2697
- all_bands: ClassVar[list[str]] = [
2698
- x.replace("B0", "B") for x in Sentinel2Config.all_bands
2699
- ]
2700
- rbg_bands: ClassVar[list[str]] = [
2701
- x.replace("B0", "B") for x in Sentinel2Config.rbg_bands
2702
- ]
2703
- ndvi_bands: ClassVar[list[str]] = [
2704
- x.replace("B0", "B") for x in Sentinel2Config.ndvi_bands
2705
- ]
2706
-
3071
+ if is_integer_coded:
3072
+ # the xml files contain 13 bandIds for both L1C and L2A
3073
+ # eventhough L2A doesn't have band B10
3074
+ all_bands = list(self.l1c_bands)
3075
+ if len(all_bands) != len(dict_):
3076
+ raise ValueError(
3077
+ f"Different number of bands in xml file and config for {self.name}: {all_bands}, {list(dict_)}"
3078
+ )
3079
+ dict_ = {
3080
+ band_id: value
3081
+ for band_id, value in zip(all_bands, dict_.values(), strict=True)
3082
+ }
2707
3083
 
2708
- class Sentinel2Band(Sentinel2Config, Band):
2709
- """Band with Sentinel2 specific name variables and regexes."""
3084
+ try:
3085
+ return dict_[self.band_id]
3086
+ except KeyError as e:
3087
+ band_id = self.band_id.upper()
3088
+ for txt in ["B0", "B", "A"]:
3089
+ band_id = band_id.replace(txt, "")
3090
+ try:
3091
+ return dict_[band_id]
3092
+ except KeyError:
3093
+ continue
3094
+ raise KeyError(self.band_id, dict_) from e
2710
3095
 
2711
3096
 
2712
3097
  class Sentinel2Image(Sentinel2Config, Image):
@@ -2716,12 +3101,15 @@ class Sentinel2Image(Sentinel2Config, Image):
2716
3101
 
2717
3102
  def ndvi(
2718
3103
  self,
2719
- red_band: str = Sentinel2Config.ndvi_bands[0],
2720
- nir_band: str = Sentinel2Config.ndvi_bands[1],
3104
+ red_band: str = "B04",
3105
+ nir_band: str = "B08",
3106
+ padding: int = 0,
2721
3107
  copy: bool = True,
2722
3108
  ) -> NDVIBand:
2723
3109
  """Calculate the NDVI for the Image."""
2724
- return super().ndvi(red_band=red_band, nir_band=nir_band, copy=copy)
3110
+ return super().ndvi(
3111
+ red_band=red_band, nir_band=nir_band, padding=padding, copy=copy
3112
+ )
2725
3113
 
2726
3114
 
2727
3115
  class Sentinel2Collection(Sentinel2Config, ImageCollection):
@@ -2732,8 +3120,8 @@ class Sentinel2Collection(Sentinel2Config, ImageCollection):
2732
3120
 
2733
3121
  def __init__(self, data: str | Path | Sequence[Image], **kwargs) -> None:
2734
3122
  """ImageCollection with Sentinel2 specific name variables and path regexes."""
2735
- level = kwargs.get("level", NoLevel)
2736
- if isinstance(level, type) and isinstance(level(), NoLevel):
3123
+ level = kwargs.get("level", None_)
3124
+ if callable(level) and isinstance(level(), None_):
2737
3125
  raise ValueError("Must specify level for Sentinel2Collection.")
2738
3126
  super().__init__(data=data, **kwargs)
2739
3127
 
@@ -2797,29 +3185,6 @@ def _get_gradient(band: Band, degrees: bool = False, copy: bool = True) -> Band:
2797
3185
  raise ValueError("array must be 2 or 3 dimensional")
2798
3186
 
2799
3187
 
2800
- def to_xarray(
2801
- array: np.ndarray, transform: Affine, crs: Any, name: str | None = None
2802
- ) -> DataArray:
2803
- """Convert the raster to an xarray.DataArray."""
2804
- if len(array.shape) == 2:
2805
- height, width = array.shape
2806
- dims = ["y", "x"]
2807
- elif len(array.shape) == 3:
2808
- height, width = array.shape[1:]
2809
- dims = ["band", "y", "x"]
2810
- else:
2811
- raise ValueError(f"Array should be 2 or 3 dimensional. Got shape {array.shape}")
2812
-
2813
- coords = _generate_spatial_coords(transform, width, height)
2814
- return xr.DataArray(
2815
- array,
2816
- coords=coords,
2817
- dims=dims,
2818
- name=name,
2819
- attrs={"crs": crs},
2820
- )
2821
-
2822
-
2823
3188
  def _slope_2d(array: np.ndarray, res: int, degrees: int) -> np.ndarray:
2824
3189
  gradient_x, gradient_y = np.gradient(array, res, res)
2825
3190
 
@@ -2836,47 +3201,31 @@ def _slope_2d(array: np.ndarray, res: int, degrees: int) -> np.ndarray:
2836
3201
  return degrees
2837
3202
 
2838
3203
 
2839
- def _clip_loaded_array(
2840
- arr: np.ndarray,
2841
- bounds: tuple[int, int, int, int],
2842
- transform: Affine,
3204
+ def _clip_xarray(
3205
+ xarr: DataArray,
3206
+ mask: tuple[int, int, int, int],
2843
3207
  crs: Any,
2844
- out_shape: tuple[int, int],
2845
3208
  **kwargs,
2846
- ) -> np.ndarray:
3209
+ ) -> DataArray:
2847
3210
  # xarray needs a numpy array of polygons
2848
- bounds_arr: np.ndarray = GeoSeries([to_shapely(bounds)]).values
3211
+ mask_arr: np.ndarray = to_geoseries(mask).values
2849
3212
  try:
2850
-
2851
- while out_shape != arr.shape:
2852
- arr = (
2853
- to_xarray(
2854
- arr,
2855
- transform=transform,
2856
- crs=crs,
2857
- )
2858
- .rio.clip(bounds_arr, crs=crs, **kwargs)
2859
- .to_numpy()
2860
- )
2861
- # bounds_arr = bounds_arr.buffer(0.0000001)
2862
- return arr
2863
-
3213
+ return xarr.rio.clip(
3214
+ mask_arr,
3215
+ crs=crs,
3216
+ **kwargs,
3217
+ )
2864
3218
  except NoDataInBounds:
2865
3219
  return np.array([])
2866
3220
 
2867
3221
 
2868
- def _fix_path(path: str) -> str:
2869
- return (
2870
- str(path).replace("\\", "/").replace(r"\"", "/").replace("//", "/").rstrip("/")
2871
- )
2872
-
2873
-
2874
- def _get_all_file_paths(path: str) -> list[str]:
3222
+ def _get_all_file_paths(path: str) -> set[str]:
2875
3223
  if is_dapla():
2876
- return list(sorted(set(_glob_func(path + "/**"))))
3224
+ return {_fix_path(x) for x in sorted(set(_glob_func(path + "/**")))}
2877
3225
  else:
2878
- return list(
2879
- sorted(
3226
+ return {
3227
+ _fix_path(x)
3228
+ for x in sorted(
2880
3229
  set(
2881
3230
  _glob_func(path + "/**")
2882
3231
  + _glob_func(path + "/**/**")
@@ -2885,7 +3234,7 @@ def _get_all_file_paths(path: str) -> list[str]:
2885
3234
  + _glob_func(path + "/**/**/**/**/**")
2886
3235
  )
2887
3236
  )
2888
- )
3237
+ }
2889
3238
 
2890
3239
 
2891
3240
  def _get_images(
@@ -2900,9 +3249,8 @@ def _get_images(
2900
3249
  masking: BandMasking | None,
2901
3250
  **kwargs,
2902
3251
  ) -> list[Image]:
2903
-
2904
- with joblib.Parallel(n_jobs=processes, backend="loky") as parallel:
2905
- images = parallel(
3252
+ with joblib.Parallel(n_jobs=processes, backend="threading") as parallel:
3253
+ images: list[Image] = parallel(
2906
3254
  joblib.delayed(image_class)(
2907
3255
  path,
2908
3256
  df=df,
@@ -2942,7 +3290,7 @@ class PathlessImageError(ValueError):
2942
3290
  what = "that have been merged"
2943
3291
  elif self.instance._from_array:
2944
3292
  what = "from arrays"
2945
- elif self.instance._from_gdf:
3293
+ elif self.instance._from_geopandas:
2946
3294
  what = "from GeoDataFrames"
2947
3295
  else:
2948
3296
  raise ValueError(self.instance)
@@ -3017,13 +3365,13 @@ def _copy_and_add_df_parallel(
3017
3365
  return (i, copied)
3018
3366
 
3019
3367
 
3020
- def _get_bounds(bounds, bbox) -> None | Polygon:
3368
+ def _get_bounds(bounds, bbox, band_bounds: Polygon) -> None | Polygon:
3021
3369
  if bounds is None and bbox is None:
3022
3370
  return None
3023
3371
  elif bounds is not None and bbox is None:
3024
- return to_shapely(bounds) # .intersection(self.union_all())
3372
+ return to_shapely(bounds).intersection(band_bounds)
3025
3373
  elif bounds is None and bbox is not None:
3026
- return to_shapely(bbox) # .intersection(self.union_all())
3374
+ return to_shapely(bbox).intersection(band_bounds)
3027
3375
  else:
3028
3376
  return to_shapely(bounds).intersection(to_shapely(bbox))
3029
3377
 
@@ -3041,7 +3389,15 @@ def _open_raster(path: str | Path) -> rasterio.io.DatasetReader:
3041
3389
 
3042
3390
 
3043
3391
  def _load_band(band: Band, **kwargs) -> None:
3044
- band.load(**kwargs)
3392
+ return band.load(**kwargs)
3393
+
3394
+
3395
+ def _band_apply(band: Band, func: Callable, **kwargs) -> None:
3396
+ return band.apply(func, **kwargs)
3397
+
3398
+
3399
+ def _clip_band(band: Band, mask, **kwargs) -> None:
3400
+ return band.clip(mask, **kwargs)
3045
3401
 
3046
3402
 
3047
3403
  def _merge_by_band(collection: ImageCollection, **kwargs) -> Image:
@@ -3053,7 +3409,7 @@ def _merge(collection: ImageCollection, **kwargs) -> Band:
3053
3409
 
3054
3410
 
3055
3411
  def _zonal_one_pair(i: int, poly: Polygon, band: Band, aggfunc, array_func, func_names):
3056
- clipped = band.copy().load(bounds=poly)
3412
+ clipped = band.copy().clip(poly)
3057
3413
  if not np.size(clipped.values):
3058
3414
  return _no_overlap_df(func_names, i, date=band.date)
3059
3415
  return _aggregate(clipped.values, array_func, aggfunc, func_names, band.date, i)