ssb-sgis 1.0.7__py3-none-any.whl → 1.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,6 +6,7 @@ import os
6
6
  import random
7
7
  import re
8
8
  import time
9
+ from abc import abstractmethod
9
10
  from collections.abc import Callable
10
11
  from collections.abc import Iterable
11
12
  from collections.abc import Iterator
@@ -26,7 +27,6 @@ import rasterio
26
27
  from affine import Affine
27
28
  from geopandas import GeoDataFrame
28
29
  from geopandas import GeoSeries
29
- from matplotlib.colors import LinearSegmentedColormap
30
30
  from pandas.api.types import is_dict_like
31
31
  from rasterio.enums import MergeAlg
32
32
  from scipy import stats
@@ -41,11 +41,8 @@ from shapely.geometry import Polygon
41
41
 
42
42
  try:
43
43
  import dapla as dp
44
- from dapla.gcs import GCSFileSystem
45
44
  except ImportError:
46
-
47
- class GCSFileSystem:
48
- """Placeholder."""
45
+ pass
49
46
 
50
47
 
51
48
  try:
@@ -55,7 +52,7 @@ except ImportError:
55
52
  class exceptions:
56
53
  """Placeholder."""
57
54
 
58
- class RefreshError:
55
+ class RefreshError(Exception):
59
56
  """Placeholder."""
60
57
 
61
58
 
@@ -74,9 +71,9 @@ try:
74
71
  except ImportError:
75
72
  pass
76
73
  try:
77
- import xarray as xr
78
74
  from xarray import DataArray
79
75
  from xarray import Dataset
76
+ from xarray import combine_by_coords
80
77
  except ImportError:
81
78
 
82
79
  class DataArray:
@@ -85,6 +82,9 @@ except ImportError:
85
82
  class Dataset:
86
83
  """Placeholder."""
87
84
 
85
+ def combine_by_coords(*args, **kwargs) -> None:
86
+ raise ImportError("xarray")
87
+
88
88
 
89
89
  from ..geopandas_tools.bounds import get_total_bounds
90
90
  from ..geopandas_tools.conversion import to_bbox
@@ -95,13 +95,17 @@ from ..geopandas_tools.general import get_common_crs
95
95
  from ..helpers import _fix_path
96
96
  from ..helpers import get_all_files
97
97
  from ..helpers import get_numpy_func
98
+ from ..helpers import is_method
99
+ from ..helpers import is_property
98
100
  from ..io._is_dapla import is_dapla
99
101
  from ..io.opener import opener
100
102
  from . import sentinel_config as config
101
103
  from .base import _array_to_geojson
102
104
  from .base import _gdf_to_arr
105
+ from .base import _get_res_from_bounds
103
106
  from .base import _get_shape_from_bounds
104
107
  from .base import _get_transform_from_bounds
108
+ from .base import _res_as_tuple
105
109
  from .base import get_index_mapper
106
110
  from .indices import ndvi
107
111
  from .regex import _extract_regex_match_from_string
@@ -140,8 +144,6 @@ DATE_RANGES_TYPE = (
140
144
  | tuple[tuple[str | pd.Timestamp | None, str | pd.Timestamp | None], ...]
141
145
  )
142
146
 
143
- FILENAME_COL_SUFFIX = "_filename"
144
-
145
147
  DEFAULT_FILENAME_REGEX = r"""
146
148
  .*?
147
149
  (?:_?(?P<date>\d{8}(?:T\d{6})?))? # Optional underscore and date group
@@ -161,12 +163,12 @@ ALLOWED_INIT_KWARGS = [
161
163
  "filename_regexes",
162
164
  "all_bands",
163
165
  "crs",
164
- "backend",
165
166
  "masking",
166
167
  "_merged",
168
+ "date",
167
169
  ]
168
170
 
169
- _load_counter: int = 0
171
+ _LOAD_COUNTER: int = 0
170
172
 
171
173
 
172
174
  def _get_child_paths_threaded(data: Sequence[str]) -> set[str]:
@@ -193,7 +195,7 @@ class ImageCollectionGroupBy:
193
195
  Args:
194
196
  data: Iterable of group values and ImageCollection groups.
195
197
  by: list of group attributes.
196
- collection: ImageCollection instance. Used to pass attributes.
198
+ collection: Ungrouped ImageCollection. Used to pass attributes to outputs.
197
199
  """
198
200
  self.data = list(data)
199
201
  self.by = by
@@ -288,7 +290,7 @@ class ImageCollectionGroupBy:
288
290
 
289
291
  def __repr__(self) -> str:
290
292
  """String representation."""
291
- return f"{self.__class__.__name__}({len(self)})"
293
+ return f"{self.__class__.__name__}({len(self)}, by={self.by})"
292
294
 
293
295
 
294
296
  @dataclass(frozen=True)
@@ -304,7 +306,11 @@ class BandMasking:
304
306
 
305
307
 
306
308
  class None_:
307
- """Default value for keyword arguments that should not have a default."""
309
+ """Default None for args that are not allowed to be None."""
310
+
311
+ def __new__(cls) -> None:
312
+ """Always returns None."""
313
+ return None
308
314
 
309
315
 
310
316
  class _ImageBase:
@@ -315,58 +321,71 @@ class _ImageBase:
315
321
 
316
322
  def __init__(self, *, metadata=None, bbox=None, **kwargs) -> None:
317
323
 
318
- self._mask = None
319
324
  self._bounds = None
320
- self._merged = False
321
- self._from_array = False
322
- self._from_gdf = False
323
- self.metadata_attributes = self.metadata_attributes or {}
324
325
  self._path = None
325
- self._metadata_from_xml = False
326
-
327
326
  self._bbox = to_bbox(bbox) if bbox is not None else None
328
327
 
329
- self.metadata = self._metadata_to_nested_dict(metadata)
328
+ self.metadata_attributes = self.metadata_attributes or {}
330
329
 
331
- if self.filename_regexes:
332
- if isinstance(self.filename_regexes, str):
333
- self.filename_regexes = (self.filename_regexes,)
334
- self.filename_patterns = [
335
- re.compile(regexes, flags=re.VERBOSE)
336
- for regexes in self.filename_regexes
337
- ]
330
+ if metadata is not None:
331
+ self.metadata = self._metadata_to_nested_dict(metadata)
338
332
  else:
339
- self.filename_patterns = ()
333
+ self.metadata = {}
340
334
 
341
- if self.image_regexes:
342
- if isinstance(self.image_regexes, str):
343
- self.image_regexes = (self.image_regexes,)
344
- self.image_patterns = [
345
- re.compile(regexes, flags=re.VERBOSE) for regexes in self.image_regexes
346
- ]
347
- else:
348
- self.image_patterns = ()
335
+ self.image_patterns = self._compile_regexes("image_regexes")
336
+ self.filename_patterns = self._compile_regexes("filename_regexes")
349
337
 
350
338
  for key, value in kwargs.items():
339
+ error_obj = ValueError(
340
+ f"{self.__class__.__name__} got an unexpected keyword argument '{key}'"
341
+ )
351
342
  if key in ALLOWED_INIT_KWARGS and key in dir(self):
352
- setattr(self, key, value)
343
+ self._safe_setattr(key, value, error_obj)
353
344
  else:
354
- raise ValueError(
355
- f"{self.__class__.__name__} got an unexpected keyword argument '{key}'"
356
- )
345
+ raise error_obj
346
+
347
+ # attributes for debugging
348
+ self._metadata_from_xml = False
349
+ self._merged = False
350
+ self._from_array = False
351
+ self._from_geopandas = False
352
+
353
+ def _safe_setattr(
354
+ self, key: str, value: Any, error_obj: Exception | None = None
355
+ ) -> None:
356
+ if is_property(self, key):
357
+ setattr(self, f"_{key}", value)
358
+ elif is_method(self, key):
359
+ if error_obj is None:
360
+ raise AttributeError(f"Cannot set method '{key}'.")
361
+ raise error_obj
362
+ else:
363
+ setattr(self, key, value)
364
+
365
+ def _compile_regexes(self, regex_attr: str) -> tuple[re.Pattern]:
366
+ regexes: tuple[str] | str = getattr(self, regex_attr)
367
+ if not regexes:
368
+ return ()
369
+ if isinstance(regexes, str):
370
+ regexes = (regexes,)
371
+ return tuple(re.compile(regexes, flags=re.VERBOSE) for regexes in regexes)
357
372
 
358
373
  @staticmethod
359
374
  def _metadata_to_nested_dict(
360
375
  metadata: str | Path | os.PathLike | dict | pd.DataFrame | None,
361
- ) -> dict[str, dict[str, Any]] | None:
362
- if metadata is None:
363
- return {}
376
+ ) -> dict[str, dict[str, Any]]:
377
+ """Construct metadata dict from dictlike, DataFrame or file path.
378
+
379
+ Extract metadata value:
380
+ >>> self.metadata[self.path]['cloud_cover_percentage']
381
+ """
364
382
  if isinstance(metadata, (str | Path | os.PathLike)):
365
383
  metadata = _read_parquet_func(metadata)
366
384
 
367
385
  if isinstance(metadata, pd.DataFrame):
368
386
 
369
387
  def is_scalar(x) -> bool:
388
+ """Check if scalar because 'truth value of Series is ambigous'."""
370
389
  return not hasattr(x, "__len__") or len(x) <= 1
371
390
 
372
391
  def na_to_none(x) -> None:
@@ -374,15 +393,16 @@ class _ImageBase:
374
393
  return x if not (is_scalar(x) and pd.isna(x)) else None
375
394
 
376
395
  # to nested dict because pandas indexing gives rare KeyError with long strings
377
- metadata = {
396
+ return {
378
397
  _fix_path(path): {
379
398
  attr: na_to_none(value) for attr, value in row.items()
380
399
  }
381
400
  for path, row in metadata.iterrows()
382
401
  }
383
402
  elif is_dict_like(metadata):
384
- metadata = {_fix_path(path): value for path, value in metadata.items()}
403
+ return {_fix_path(path): value for path, value in metadata.items()}
385
404
 
405
+ # try to allow custom types with dict-like indexing
386
406
  return metadata
387
407
 
388
408
  @property
@@ -392,7 +412,6 @@ class _ImageBase:
392
412
  "res": self.res,
393
413
  "bbox": self._bbox,
394
414
  "nodata": self.nodata,
395
- "backend": self.backend,
396
415
  "metadata": self.metadata,
397
416
  }
398
417
 
@@ -406,19 +425,22 @@ class _ImageBase:
406
425
  @property
407
426
  def res(self) -> int:
408
427
  """Pixel resolution."""
428
+ # if self._res is None:
429
+ # if self.has_array:
430
+ # self._res = _get_res_from_bounds(self.bounds, self.values.shape)
431
+ # else:
432
+ # with opener(self.path) as file:
433
+ # with rasterio.open(file) as src:
434
+ # self._res = src.res
409
435
  return self._res
410
436
 
411
- @property
412
- def centroid(self) -> Point:
413
- """Centerpoint of the object."""
414
- return self.union_all().centroid
437
+ @abstractmethod
438
+ def union_all(self) -> Polygon | MultiPolygon:
439
+ pass
415
440
 
416
441
  def assign(self, **kwargs) -> "_ImageBase":
417
442
  for key, value in kwargs.items():
418
- try:
419
- setattr(self, key, value)
420
- except AttributeError:
421
- setattr(self, f"_{key}", value)
443
+ self._safe_setattr(key, value)
422
444
  return self
423
445
 
424
446
  def _name_regex_searcher(
@@ -449,7 +471,10 @@ class _ImageBase:
449
471
  )
450
472
 
451
473
  def _create_metadata_df(self, file_paths: Sequence[str]) -> pd.DataFrame:
452
- """Create a dataframe with file paths and image paths that match regexes."""
474
+ """Create a dataframe with file paths and image paths that match regexes.
475
+
476
+ Used in __init__ to select relevant paths fast.
477
+ """
453
478
  df = pd.DataFrame({"file_path": list(file_paths)})
454
479
 
455
480
  df["file_name"] = df["file_path"].apply(lambda x: Path(x).name)
@@ -516,12 +541,14 @@ class _ImageBase:
516
541
  class _ImageBandBase(_ImageBase):
517
542
  """Common parent class of Image and Band."""
518
543
 
519
- def intersects(self, other: GeoDataFrame | GeoSeries | Geometry) -> bool:
520
- if hasattr(other, "crs") and not pyproj.CRS(self.crs).equals(
521
- pyproj.CRS(other.crs)
544
+ def intersects(
545
+ self, geometry: GeoDataFrame | GeoSeries | Geometry | tuple | _ImageBase
546
+ ) -> bool:
547
+ if hasattr(geometry, "crs") and not pyproj.CRS(self.crs).equals(
548
+ pyproj.CRS(geometry.crs)
522
549
  ):
523
- raise ValueError(f"crs mismatch: {self.crs} and {other.crs}")
524
- return self.union_all().intersects(to_shapely(other))
550
+ raise ValueError(f"crs mismatch: {self.crs} and {geometry.crs}")
551
+ return self.union_all().intersects(to_shapely(geometry))
525
552
 
526
553
  def union_all(self) -> Polygon:
527
554
  try:
@@ -530,20 +557,21 @@ class _ImageBandBase(_ImageBase):
530
557
  return Polygon()
531
558
 
532
559
  @property
533
- def mask_percentage(self) -> float:
534
- return self.mask.values.sum() / (self.mask.width * self.mask.height) * 100
560
+ def centroid(self) -> Point:
561
+ """Centerpoint of the object."""
562
+ return self.union_all().centroid
535
563
 
536
564
  @property
537
565
  def year(self) -> str:
538
566
  if hasattr(self, "_year") and self._year:
539
567
  return self._year
540
- return self.date[:4]
568
+ return str(self.date)[:4]
541
569
 
542
570
  @property
543
571
  def month(self) -> str:
544
572
  if hasattr(self, "_month") and self._month:
545
573
  return self._month
546
- return "".join(self.date.split("-"))[4:6]
574
+ return str(self.date).replace("-", "").replace("/", "")[4:6]
547
575
 
548
576
  @property
549
577
  def name(self) -> str | None:
@@ -570,24 +598,25 @@ class _ImageBandBase(_ImageBase):
570
598
  return self._name_regex_searcher("level", self.image_patterns)
571
599
 
572
600
  def _get_metadata_attributes(self, metadata_attributes: dict) -> dict:
573
-
601
+ """Search through xml files for missing metadata attributes."""
574
602
  self._metadata_from_xml = True
575
603
 
576
604
  missing_metadata_attributes = {
577
- key: value
578
- for key, value in metadata_attributes.items()
579
- if not hasattr(self, key) or getattr(self, key) is None
605
+ attr: constructor_func
606
+ for attr, constructor_func in metadata_attributes.items()
607
+ if not hasattr(self, attr) or getattr(self, attr) is None
580
608
  }
581
609
 
582
610
  nonmissing_metadata_attributes = {
583
- key: getattr(self, key)
584
- for key in metadata_attributes
585
- if key not in missing_metadata_attributes
611
+ attr: getattr(self, attr)
612
+ for attr in metadata_attributes
613
+ if attr not in missing_metadata_attributes
586
614
  }
587
615
 
588
616
  if not missing_metadata_attributes:
589
617
  return nonmissing_metadata_attributes
590
618
 
619
+ # read all xml content once
591
620
  file_contents: list[str] = []
592
621
  for path in self._all_file_paths:
593
622
  if ".xml" not in path:
@@ -595,48 +624,63 @@ class _ImageBandBase(_ImageBase):
595
624
  with _open_func(path, "rb") as file:
596
625
  file_contents.append(file.read().decode("utf-8"))
597
626
 
598
- for key, value in missing_metadata_attributes.items():
627
+ def is_last_xml(i: int) -> bool:
628
+ return i == len(file_contents) - 1
629
+
630
+ for attr, value in missing_metadata_attributes.items():
599
631
  results = None
600
- for i, filetext in enumerate(file_contents):
632
+ for i, file_content in enumerate(file_contents):
601
633
  if isinstance(value, str) and value in dir(self):
602
- method = getattr(self, value)
634
+ # method or a hardcoded value
635
+ value: Callable | Any = getattr(self, value)
636
+
637
+ if callable(value):
603
638
  try:
604
- results = method(filetext)
639
+ results = value(file_content)
605
640
  except _RegexError as e:
606
- if i == len(self._all_file_paths) - 1:
607
- raise e
641
+ if is_last_xml(i):
642
+ raise e.__class__(self.path, e) from e
608
643
  continue
609
644
  if results is not None:
610
645
  break
611
-
612
- if callable(value):
646
+ elif (
647
+ isinstance(value, str)
648
+ or hasattr(value, "__iter__")
649
+ and all(isinstance(x, str | re.Pattern) for x in value)
650
+ ):
613
651
  try:
614
- results = value(filetext)
652
+ results = _extract_regex_match_from_string(file_content, value)
615
653
  except _RegexError as e:
616
- if i == len(self._all_file_paths) - 1:
654
+ if is_last_xml(i):
617
655
  raise e
618
- continue
619
- if results is not None:
620
- break
656
+ elif value is not None:
657
+ results = value
658
+ break
621
659
 
622
- try:
623
- results = _extract_regex_match_from_string(filetext, value)
624
- except _RegexError as e:
625
- if i == len(self._all_file_paths) - 1:
626
- raise e
627
-
628
- missing_metadata_attributes[key] = results
660
+ missing_metadata_attributes[attr] = results
629
661
 
630
662
  return missing_metadata_attributes | nonmissing_metadata_attributes
631
663
 
632
664
  def _to_xarray(self, array: np.ndarray, transform: Affine) -> DataArray:
633
665
  """Convert the raster to an xarray.DataArray."""
666
+ attrs = {"crs": self.crs}
667
+ for attr in set(self.metadata_attributes).union({"date"}):
668
+ try:
669
+ attrs[attr] = getattr(self, attr)
670
+ except Exception:
671
+ pass
672
+
634
673
  if len(array.shape) == 2:
635
674
  height, width = array.shape
636
675
  dims = ["y", "x"]
637
676
  elif len(array.shape) == 3:
638
677
  height, width = array.shape[1:]
639
678
  dims = ["band", "y", "x"]
679
+ elif not any(dim for dim in array.shape):
680
+ DataArray(
681
+ name=self.name or self.__class__.__name__,
682
+ attrs=attrs,
683
+ )
640
684
  else:
641
685
  raise ValueError(
642
686
  f"Array should be 2 or 3 dimensional. Got shape {array.shape}"
@@ -644,13 +688,6 @@ class _ImageBandBase(_ImageBase):
644
688
 
645
689
  coords = _generate_spatial_coords(transform, width, height)
646
690
 
647
- attrs = {"crs": self.crs}
648
- for attr in set(self.metadata_attributes).union({"date"}):
649
- try:
650
- attrs[attr] = getattr(self, attr)
651
- except Exception:
652
- pass
653
-
654
691
  return DataArray(
655
692
  array,
656
693
  coords=coords,
@@ -664,14 +701,15 @@ class Band(_ImageBandBase):
664
701
  """Band holding a single 2 dimensional array representing an image band."""
665
702
 
666
703
  cmap: ClassVar[str | None] = None
667
- backend: str = "numpy"
668
704
 
669
705
  @classmethod
670
- def from_gdf(
706
+ def from_geopandas(
671
707
  cls,
672
708
  gdf: GeoDataFrame | GeoSeries,
673
- res: int,
674
709
  *,
710
+ res: int | None = None,
711
+ out_shape: tuple[int, int] | None = None,
712
+ bounds: Any | None = None,
675
713
  fill: int = 0,
676
714
  all_touched: bool = False,
677
715
  merge_alg: Callable = MergeAlg.replace,
@@ -680,18 +718,28 @@ class Band(_ImageBandBase):
680
718
  **kwargs,
681
719
  ) -> None:
682
720
  """Create Band from a GeoDataFrame."""
683
- arr: np.ndarray = _gdf_to_arr(
684
- gdf,
685
- res=res,
686
- fill=fill,
687
- all_touched=all_touched,
688
- merge_alg=merge_alg,
689
- default_value=default_value,
690
- dtype=dtype,
691
- )
721
+ if bounds is not None:
722
+ bounds = to_bbox(bounds)
692
723
 
693
- obj = cls(arr, res=res, crs=gdf.crs, bounds=gdf.total_bounds, **kwargs)
694
- obj._from_gdf = True
724
+ if out_shape == (0,):
725
+ arr = np.array([])
726
+ else:
727
+ arr = _gdf_to_arr(
728
+ gdf,
729
+ res=res,
730
+ bounds=bounds,
731
+ fill=fill,
732
+ all_touched=all_touched,
733
+ merge_alg=merge_alg,
734
+ default_value=default_value,
735
+ dtype=dtype,
736
+ out_shape=out_shape,
737
+ )
738
+ if bounds is None:
739
+ bounds = gdf.total_bounds
740
+
741
+ obj = cls(arr, crs=gdf.crs, bounds=bounds, **kwargs)
742
+ obj._from_geopandas = True
695
743
  return obj
696
744
 
697
745
  def __init__(
@@ -710,9 +758,6 @@ class Band(_ImageBandBase):
710
758
  **kwargs,
711
759
  ) -> None:
712
760
  """Band initialiser."""
713
- if callable(res) and isinstance(res(), None_):
714
- raise TypeError("Must specify 'res'")
715
-
716
761
  if data is None:
717
762
  # allowing 'path' to replace 'data' as argument
718
763
  # to make the print repr. valid as initialiser
@@ -738,11 +783,20 @@ class Band(_ImageBandBase):
738
783
  if isinstance(data, np.ndarray):
739
784
  if self._bounds is None:
740
785
  raise ValueError("Must specify bounds when data is an array.")
786
+ if not (res is None or (callable(res) and res() is None)):
787
+ # if not (res is None or (callable(res) and res() is None)) and _res_as_tuple(
788
+ # res
789
+ # ) != _get_res_from_bounds(self._bounds, data.shape):
790
+ raise ValueError(
791
+ f"Cannot specify 'res' when data is an array. {res} and {_get_res_from_bounds(self._bounds, data.shape)}"
792
+ )
741
793
  self._crs = crs
742
794
  self.transform = _get_transform_from_bounds(self._bounds, shape=data.shape)
743
795
  self._from_array = True
744
796
  self.values = data
745
797
 
798
+ self._res = _get_res_from_bounds(self._bounds, self.values.shape)
799
+
746
800
  elif not isinstance(data, (str | Path | os.PathLike)):
747
801
  raise TypeError(
748
802
  "'data' must be string, Path-like or numpy.ndarray. "
@@ -750,8 +804,10 @@ class Band(_ImageBandBase):
750
804
  )
751
805
  else:
752
806
  self._path = _fix_path(str(data))
807
+ if callable(res) and res() is None:
808
+ res = None
809
+ self._res = res
753
810
 
754
- self._res = res
755
811
  if cmap is not None:
756
812
  self.cmap = cmap
757
813
  self._name = name
@@ -779,7 +835,7 @@ class Band(_ImageBandBase):
779
835
  else:
780
836
  setattr(self, key, value)
781
837
 
782
- elif self.metadata_attributes and self.path is not None and not self.is_mask:
838
+ elif self.metadata_attributes and self.path is not None:
783
839
  if self._all_file_paths is None:
784
840
  self._all_file_paths = _get_all_file_paths(str(Path(self.path).parent))
785
841
  for key, value in self._get_metadata_attributes(
@@ -791,43 +847,28 @@ class Band(_ImageBandBase):
791
847
  """Makes Bands sortable by band_id."""
792
848
  return self.band_id < other.band_id
793
849
 
850
+ def value_counts(self) -> pd.Series:
851
+ """Value count of each value of the band's array."""
852
+ try:
853
+ values = self.values.data[self.values.mask == False]
854
+ except AttributeError:
855
+ values = self.values
856
+ unique_values, counts = np.unique(values, return_counts=True)
857
+ return pd.Series(counts, index=unique_values).sort_values(ascending=False)
858
+
794
859
  @property
795
860
  def values(self) -> np.ndarray:
796
861
  """The numpy array, if loaded."""
797
862
  if self._values is None:
798
- raise ArrayNotLoadedError("array is not loaded.")
863
+ raise _ArrayNotLoadedError("array is not loaded.")
799
864
  return self._values
800
865
 
801
866
  @values.setter
802
867
  def values(self, new_val):
803
- if self.backend == "numpy" and isinstance(new_val, np.ndarray):
804
- self._values = new_val
805
- return
806
- elif self.backend == "xarray" and isinstance(new_val, DataArray):
807
- # attrs can dissappear, so doing a union
808
- attrs = self._values.attrs | new_val.attrs
868
+ if isinstance(new_val, np.ndarray):
809
869
  self._values = new_val
810
- self._values.attrs = attrs
811
- return
812
-
813
- if self.backend == "numpy":
870
+ else:
814
871
  self._values = self._to_numpy(new_val)
815
- if self.backend == "xarray":
816
- if not isinstance(self._values, DataArray):
817
- self._values = self._to_xarray(
818
- new_val,
819
- transform=self.transform,
820
- )
821
-
822
- elif isinstance(new_val, np.ndarray):
823
- self._values.values = new_val
824
- else:
825
- self._values = new_val
826
-
827
- @property
828
- def mask(self) -> "Band":
829
- """Mask Band."""
830
- return self._mask
831
872
 
832
873
  @property
833
874
  def band_id(self) -> str:
@@ -839,12 +880,18 @@ class Band(_ImageBandBase):
839
880
  @property
840
881
  def height(self) -> int:
841
882
  """Pixel heigth of the image band."""
842
- return self.values.shape[-2]
883
+ try:
884
+ return self.values.shape[-2]
885
+ except IndexError:
886
+ return 0
843
887
 
844
888
  @property
845
889
  def width(self) -> int:
846
890
  """Pixel width of the image band."""
847
- return self.values.shape[-1]
891
+ try:
892
+ return self.values.shape[-1]
893
+ except IndexError:
894
+ return 0
848
895
 
849
896
  @property
850
897
  def tile(self) -> str:
@@ -892,7 +939,7 @@ class Band(_ImageBandBase):
892
939
  copied = self.copy()
893
940
  value_must_be_at_least = np.sort(np.ravel(copied.values))[-n] - (precision or 0)
894
941
  copied._values = np.where(copied.values >= value_must_be_at_least, 1, 0)
895
- df = copied.to_gdf(column).loc[lambda x: x[column] == 1]
942
+ df = copied.to_geopandas(column).loc[lambda x: x[column] == 1]
896
943
  df[column] = f"largest_{n}"
897
944
  return df
898
945
 
@@ -903,30 +950,44 @@ class Band(_ImageBandBase):
903
950
  copied = self.copy()
904
951
  value_must_be_at_least = np.sort(np.ravel(copied.values))[n] - (precision or 0)
905
952
  copied._values = np.where(copied.values <= value_must_be_at_least, 1, 0)
906
- df = copied.to_gdf(column).loc[lambda x: x[column] == 1]
953
+ df = copied.to_geopandas(column).loc[lambda x: x[column] == 1]
907
954
  df[column] = f"smallest_{n}"
908
955
  return df
909
956
 
910
957
  def clip(
911
- self, mask: GeoDataFrame | GeoSeries | Polygon | MultiPolygon, **kwargs
958
+ self,
959
+ mask: GeoDataFrame | GeoSeries | Polygon | MultiPolygon,
912
960
  ) -> "Band":
913
- """Clip band values to geometry mask."""
914
- values = _clip_xarray(
915
- self.to_xarray(),
916
- mask,
917
- crs=self.crs,
918
- **kwargs,
919
- )
920
- self._bounds = to_bbox(mask)
921
- self.transform = _get_transform_from_bounds(self._bounds, values.shape)
922
- self.values = values
961
+ """Clip band values to geometry mask while preserving bounds."""
962
+ if not self.height or not self.width:
963
+ return self
964
+
965
+ fill: int = self.nodata or 0
966
+
967
+ mask_array: np.ndarray = Band.from_geopandas(
968
+ gdf=to_gdf(mask)[["geometry"]],
969
+ default_value=1,
970
+ fill=fill,
971
+ out_shape=self.values.shape,
972
+ bounds=mask,
973
+ ).values
974
+
975
+ is_not_polygon = mask_array == fill
976
+
977
+ if isinstance(self.values, np.ma.core.MaskedArray):
978
+ self._values.mask |= is_not_polygon
979
+ else:
980
+ self._values = np.ma.array(
981
+ self.values, mask=is_not_polygon, fill_value=self.nodata
982
+ )
983
+
923
984
  return self
924
985
 
925
986
  def load(
926
987
  self,
927
988
  bounds: tuple | Geometry | GeoDataFrame | GeoSeries | None = None,
928
989
  indexes: int | tuple[int] | None = None,
929
- masked: bool | None = None,
990
+ masked: bool = True,
930
991
  file_system=None,
931
992
  **kwargs,
932
993
  ) -> "Band":
@@ -934,11 +995,10 @@ class Band(_ImageBandBase):
934
995
 
935
996
  The array is stored in the 'values' property.
936
997
  """
937
- global _load_counter
938
- _load_counter += 1
998
+ global _LOAD_COUNTER
999
+ _LOAD_COUNTER += 1
939
1000
 
940
- if masked is None:
941
- masked = True if self.mask is None else False
1001
+ _masking = kwargs.pop("_masking", self.masking)
942
1002
 
943
1003
  bounds_was_none = bounds is None
944
1004
 
@@ -947,12 +1007,9 @@ class Band(_ImageBandBase):
947
1007
  should_return_empty: bool = bounds is not None and bounds.area == 0
948
1008
  if should_return_empty:
949
1009
  self._values = np.array([])
950
- if self.mask is not None and not self.is_mask:
951
- self._mask = self._mask.load(
952
- bounds=bounds, indexes=indexes, file_system=file_system
953
- )
954
1010
  self._bounds = None
955
1011
  self.transform = None
1012
+ # activate setter
956
1013
  self.values = self._values
957
1014
 
958
1015
  return self
@@ -962,7 +1019,6 @@ class Band(_ImageBandBase):
962
1019
 
963
1020
  if bounds is not None:
964
1021
  minx, miny, maxx, maxy = to_bbox(bounds)
965
- ## round down/up to integer to avoid precision trouble
966
1022
  # bounds = (int(minx), int(miny), math.ceil(maxx), math.ceil(maxy))
967
1023
  bounds = minx, miny, maxx, maxy
968
1024
 
@@ -976,21 +1032,19 @@ class Band(_ImageBandBase):
976
1032
  out_shape = kwargs.pop("out_shape", None)
977
1033
 
978
1034
  if self.has_array and [int(x) for x in bounds] != [int(x) for x in self.bounds]:
979
- print(self)
980
- print(self.mask)
981
- print(self.mask.values.shape)
982
- print(self.values.shape)
983
- print([int(x) for x in bounds], [int(x) for x in self.bounds])
984
1035
  raise ValueError(
985
1036
  "Cannot re-load array with different bounds. "
986
1037
  "Use .copy() to read with different bounds. "
987
- "Or .clip(mask) to clip."
1038
+ "Or .clip(mask) to clip.",
1039
+ self,
1040
+ self.values.shape,
1041
+ [int(x) for x in bounds],
1042
+ [int(x) for x in self.bounds],
988
1043
  )
989
- # with opener(self.path, file_system=self.file_system) as f:
1044
+
990
1045
  with opener(self.path, file_system=file_system) as f:
991
1046
  with rasterio.open(f, nodata=self.nodata) as src:
992
- self._res = int(src.res[0]) if not self.res else self.res
993
-
1047
+ self._res = src.res if not self.res else self.res
994
1048
  if self.nodata is None or np.isnan(self.nodata):
995
1049
  self.nodata = src.nodata
996
1050
  else:
@@ -1003,7 +1057,7 @@ class Band(_ImageBandBase):
1003
1057
  )
1004
1058
 
1005
1059
  if bounds is None:
1006
- if self._res != int(src.res[0]):
1060
+ if self._res != src.res:
1007
1061
  if out_shape is None:
1008
1062
  out_shape = _get_shape_from_bounds(
1009
1063
  to_bbox(src.bounds), self.res, indexes
@@ -1055,18 +1109,12 @@ class Band(_ImageBandBase):
1055
1109
  else:
1056
1110
  values[values == src.nodata] = self.nodata
1057
1111
 
1058
- if self.masking and self.is_mask:
1059
- values = np.isin(values, list(self.masking["values"]))
1060
-
1061
- elif self.mask is not None and not isinstance(values, np.ma.core.MaskedArray):
1062
-
1063
- if not self.mask.has_array:
1064
- self._mask = self.mask.load(
1065
- bounds=bounds, indexes=indexes, out_shape=out_shape, **kwargs
1066
- )
1067
- mask_arr = self.mask.values
1068
-
1112
+ if _masking and not isinstance(values, np.ma.core.MaskedArray):
1113
+ mask_arr = _read_mask_array(self, bounds=bounds)
1069
1114
  values = np.ma.array(values, mask=mask_arr, fill_value=self.nodata)
1115
+ elif _masking:
1116
+ mask_arr = _read_mask_array(self, bounds=bounds)
1117
+ values.mask |= mask_arr
1070
1118
 
1071
1119
  if bounds is not None:
1072
1120
  self._bounds = to_bbox(bounds)
@@ -1077,13 +1125,6 @@ class Band(_ImageBandBase):
1077
1125
 
1078
1126
  return self
1079
1127
 
1080
- @property
1081
- def is_mask(self) -> bool:
1082
- """True if the band_id is equal to the masking band_id."""
1083
- if self.masking is None:
1084
- return False
1085
- return self.band_id == self.masking["band_id"]
1086
-
1087
1128
  @property
1088
1129
  def has_array(self) -> bool:
1089
1130
  """Whether the array is loaded."""
@@ -1091,7 +1132,7 @@ class Band(_ImageBandBase):
1091
1132
  if not isinstance(self.values, (np.ndarray | DataArray)):
1092
1133
  raise ValueError()
1093
1134
  return True
1094
- except ValueError: # also catches ArrayNotLoadedError
1135
+ except ValueError: # also catches _ArrayNotLoadedError
1095
1136
  return False
1096
1137
 
1097
1138
  def write(
@@ -1111,10 +1152,17 @@ class Band(_ImageBandBase):
1111
1152
  if self.crs is None:
1112
1153
  raise ValueError("Cannot write None crs to image.")
1113
1154
 
1155
+ if self.nodata:
1156
+ # TODO take out .data if masked?
1157
+ values_with_nodata = np.concatenate(
1158
+ [self.values.flatten(), np.array([self.nodata])]
1159
+ )
1160
+ else:
1161
+ values_with_nodata = self.values
1114
1162
  profile = {
1115
1163
  "driver": driver,
1116
1164
  "compress": compress,
1117
- "dtype": rasterio.dtypes.get_minimum_dtype(self.values),
1165
+ "dtype": rasterio.dtypes.get_minimum_dtype(values_with_nodata),
1118
1166
  "crs": self.crs,
1119
1167
  "transform": self.transform,
1120
1168
  "nodata": self.nodata,
@@ -1123,19 +1171,18 @@ class Band(_ImageBandBase):
1123
1171
  "width": self.width,
1124
1172
  } | kwargs
1125
1173
 
1126
- # with opener(path, "wb", file_system=self.file_system) as f:
1127
1174
  with opener(path, "wb", file_system=file_system) as f:
1128
1175
  with rasterio.open(f, "w", **profile) as dst:
1129
1176
 
1130
1177
  if dst.nodata is None:
1131
1178
  dst.nodata = _get_dtype_min(dst.dtypes[0])
1132
1179
 
1133
- # if (
1134
- # isinstance(self.values, np.ma.core.MaskedArray)
1135
- # # and dst.nodata is not None
1136
- # ):
1137
- # self.values.data[np.isnan(self.values.data)] = dst.nodata
1138
- # self.values.data[self.values.mask] = dst.nodata
1180
+ if (
1181
+ isinstance(self.values, np.ma.core.MaskedArray)
1182
+ and dst.nodata is not None
1183
+ ):
1184
+ self.values.data[np.isnan(self.values.data)] = dst.nodata
1185
+ self.values.data[self.values.mask] = dst.nodata
1139
1186
 
1140
1187
  if len(self.values.shape) == 2:
1141
1188
  dst.write(self.values, indexes=1)
@@ -1223,7 +1270,7 @@ class Band(_ImageBandBase):
1223
1270
  The gradient will be 1 (1 meter up for every meter forward).
1224
1271
  The calculation is by default done in place to save memory.
1225
1272
 
1226
- >>> band.gradient()
1273
+ >>> band.gradient(copy=False)
1227
1274
  >>> band.values
1228
1275
  array([[0., 1., 1., 1., 0.],
1229
1276
  [1., 1., 1., 1., 1.],
@@ -1284,11 +1331,13 @@ class Band(_ImageBandBase):
1284
1331
  dropna=dropna,
1285
1332
  )
1286
1333
 
1287
- def to_gdf(self, column: str = "value") -> GeoDataFrame:
1334
+ def to_geopandas(self, column: str = "value", dropna: bool = True) -> GeoDataFrame:
1288
1335
  """Create a GeoDataFrame from the image Band.
1289
1336
 
1290
1337
  Args:
1291
1338
  column: Name of resulting column that holds the raster values.
1339
+ dropna: Whether to remove values that are NA or equal to the nodata
1340
+ value.
1292
1341
 
1293
1342
  Returns:
1294
1343
  A GeoDataFrame with a geometry column and array values.
@@ -1296,24 +1345,28 @@ class Band(_ImageBandBase):
1296
1345
  if not hasattr(self, "_values"):
1297
1346
  raise ValueError("Array is not loaded.")
1298
1347
 
1348
+ if isinstance(self.values, np.ma.core.MaskedArray):
1349
+ self.values.data[self.values.mask] = self.nodata or 0
1299
1350
  if self.values.shape[0] == 0:
1300
- return GeoDataFrame({"geometry": []}, crs=self.crs)
1301
-
1302
- return GeoDataFrame(
1303
- pd.DataFrame(
1304
- _array_to_geojson(
1305
- self.values, self.transform, processes=self.processes
1351
+ df = GeoDataFrame({"geometry": []}, crs=self.crs)
1352
+ else:
1353
+ df = GeoDataFrame(
1354
+ pd.DataFrame(
1355
+ _array_to_geojson(
1356
+ self.values, self.transform, processes=self.processes
1357
+ ),
1358
+ columns=[column, "geometry"],
1306
1359
  ),
1307
- columns=[column, "geometry"],
1308
- ),
1309
- geometry="geometry",
1310
- crs=self.crs,
1311
- )
1360
+ geometry="geometry",
1361
+ crs=self.crs,
1362
+ )
1363
+
1364
+ if dropna:
1365
+ return df[(df[column] != self.nodata) & (df[column].notna())]
1366
+ return df
1312
1367
 
1313
1368
  def to_xarray(self) -> DataArray:
1314
1369
  """Convert the raster to an xarray.DataArray."""
1315
- if self.backend == "xarray":
1316
- return self.values
1317
1370
  return self._to_xarray(
1318
1371
  self.values,
1319
1372
  transform=self.transform,
@@ -1328,24 +1381,29 @@ class Band(_ImageBandBase):
1328
1381
  self, arr: np.ndarray | DataArray, masked: bool = True
1329
1382
  ) -> np.ndarray | np.ma.core.MaskedArray:
1330
1383
  if not isinstance(arr, np.ndarray):
1384
+ mask_arr = None
1331
1385
  if masked:
1332
1386
  try:
1333
1387
  mask_arr = arr.isnull().values
1334
1388
  except AttributeError:
1335
- mask_arr = np.full(arr.shape, False)
1389
+ pass
1336
1390
  try:
1337
1391
  arr = arr.to_numpy()
1338
1392
  except AttributeError:
1339
1393
  arr = arr.values
1394
+ if mask_arr is not None:
1395
+ arr = np.ma.array(arr, mask=mask_arr, fill_value=self.nodata)
1396
+
1340
1397
  if not isinstance(arr, np.ndarray):
1341
1398
  arr = np.array(arr)
1399
+
1342
1400
  if (
1343
1401
  masked
1344
- and self.mask is not None
1345
- and not self.is_mask
1346
1402
  and not isinstance(arr, np.ma.core.MaskedArray)
1403
+ and mask_arr is not None
1347
1404
  ):
1348
1405
  arr = np.ma.array(arr, mask=mask_arr, fill_value=self.nodata)
1406
+
1349
1407
  return arr
1350
1408
 
1351
1409
  def __repr__(self) -> str:
@@ -1368,10 +1426,6 @@ class NDVIBand(Band):
1368
1426
 
1369
1427
  cmap: str = "Greens"
1370
1428
 
1371
- # @staticmethod
1372
- # def get_cmap(arr: np.ndarray):
1373
- # return get_cmap(arr)
1374
-
1375
1429
 
1376
1430
  def median_as_int_and_minimum_dtype(arr: np.ndarray) -> np.ndarray:
1377
1431
  arr = np.median(arr, axis=0).astype(int)
@@ -1383,12 +1437,12 @@ class Image(_ImageBandBase):
1383
1437
  """Image consisting of one or more Bands."""
1384
1438
 
1385
1439
  band_class: ClassVar[Band] = Band
1386
- backend: str = "numpy"
1387
1440
 
1388
1441
  def __init__(
1389
1442
  self,
1390
1443
  data: str | Path | Sequence[Band] | None = None,
1391
- res: int | None = None,
1444
+ res: int | None_ = None_,
1445
+ mask: "Band | None" = None,
1392
1446
  processes: int = 1,
1393
1447
  df: pd.DataFrame | None = None,
1394
1448
  nodata: int | None = None,
@@ -1409,12 +1463,18 @@ class Image(_ImageBandBase):
1409
1463
  self.processes = processes
1410
1464
  self._crs = None
1411
1465
  self._bands = None
1466
+ self._mask = mask
1467
+
1468
+ if isinstance(data, Band):
1469
+ data = [data]
1412
1470
 
1413
1471
  if hasattr(data, "__iter__") and all(isinstance(x, Band) for x in data):
1414
1472
  self._construct_image_from_bands(data, res)
1415
1473
  return
1416
1474
  elif not isinstance(data, (str | Path | os.PathLike)):
1417
- raise TypeError("'data' must be string, Path-like or a sequence of Band.")
1475
+ raise TypeError(
1476
+ f"'data' must be string, Path-like or a sequence of Band. Got {data}"
1477
+ )
1418
1478
 
1419
1479
  self._res = res
1420
1480
  self._path = _fix_path(data)
@@ -1422,7 +1482,8 @@ class Image(_ImageBandBase):
1422
1482
  if all_file_paths is None and self.path:
1423
1483
  self._all_file_paths = _get_all_file_paths(self.path)
1424
1484
  elif self.path:
1425
- all_file_paths = {_fix_path(x) for x in all_file_paths}
1485
+ name = Path(self.path).name
1486
+ all_file_paths = {_fix_path(x) for x in all_file_paths if name in x}
1426
1487
  self._all_file_paths = {x for x in all_file_paths if self.path in x}
1427
1488
  else:
1428
1489
  self._all_file_paths = None
@@ -1434,11 +1495,7 @@ class Image(_ImageBandBase):
1434
1495
 
1435
1496
  df["image_path"] = df["image_path"].astype(str)
1436
1497
 
1437
- cols_to_explode = [
1438
- "file_path",
1439
- "file_name",
1440
- *[x for x in df if FILENAME_COL_SUFFIX in x],
1441
- ]
1498
+ cols_to_explode = ["file_path", "file_name"]
1442
1499
  try:
1443
1500
  df = df.explode(cols_to_explode, ignore_index=True)
1444
1501
  except ValueError:
@@ -1466,20 +1523,92 @@ class Image(_ImageBandBase):
1466
1523
  else:
1467
1524
  setattr(self, key, value)
1468
1525
 
1469
- else:
1526
+ elif self.metadata_attributes and self.path is not None:
1470
1527
  for key, value in self._get_metadata_attributes(
1471
1528
  self.metadata_attributes
1472
1529
  ).items():
1473
1530
  setattr(self, key, value)
1474
1531
 
1532
+ def clip(
1533
+ self, mask: GeoDataFrame | GeoSeries | Polygon | MultiPolygon, copy: bool = True
1534
+ ) -> "Image":
1535
+ """Clip band values to geometry mask while preserving bounds."""
1536
+ copied = self.copy() if copy else self
1537
+
1538
+ fill: int = self.nodata or 0
1539
+
1540
+ mask_array: np.ndarray = Band.from_geopandas(
1541
+ gdf=to_gdf(mask)[["geometry"]],
1542
+ default_value=1,
1543
+ fill=fill,
1544
+ out_shape=next(iter(self)).values.shape,
1545
+ bounds=self.bounds,
1546
+ ).values
1547
+
1548
+ is_not_polygon = mask_array == fill
1549
+
1550
+ for band in copied:
1551
+ if isinstance(band.values, np.ma.core.MaskedArray):
1552
+ band._values.mask |= is_not_polygon
1553
+ else:
1554
+ band._values = np.ma.array(
1555
+ band.values, mask=is_not_polygon, fill_value=band.nodata
1556
+ )
1557
+
1558
+ return copied
1559
+
1560
+ def load(
1561
+ self,
1562
+ bounds: tuple | Geometry | GeoDataFrame | GeoSeries | None = None,
1563
+ indexes: int | tuple[int] | None = None,
1564
+ file_system=None,
1565
+ **kwargs,
1566
+ ) -> "ImageCollection":
1567
+ """Load all image Bands with threading."""
1568
+ if bounds is None and indexes is None and all(band.has_array for band in self):
1569
+ return self
1570
+
1571
+ if self.masking:
1572
+ mask_array: np.ndarray = _read_mask_array(
1573
+ self,
1574
+ bounds=bounds,
1575
+ indexes=indexes,
1576
+ file_system=file_system,
1577
+ **kwargs,
1578
+ )
1579
+
1580
+ with joblib.Parallel(n_jobs=self.processes, backend="threading") as parallel:
1581
+ parallel(
1582
+ joblib.delayed(_load_band)(
1583
+ band,
1584
+ bounds=bounds,
1585
+ indexes=indexes,
1586
+ file_system=file_system,
1587
+ _masking=None,
1588
+ **kwargs,
1589
+ )
1590
+ for band in self
1591
+ )
1592
+
1593
+ if self.masking:
1594
+ for band in self:
1595
+ if isinstance(band.values, np.ma.core.MaskedArray):
1596
+ band.values.mask |= mask_array
1597
+ else:
1598
+ band.values = np.ma.array(
1599
+ band.values, mask=mask_array, fill_value=self.nodata
1600
+ )
1601
+
1602
+ return self
1603
+
1475
1604
  def _construct_image_from_bands(
1476
1605
  self, data: Sequence[Band], res: int | None
1477
1606
  ) -> None:
1478
1607
  self._bands = list(data)
1479
1608
  if res is None:
1480
- res = list({band.res for band in self.bands})
1609
+ res = {band.res for band in self.bands}
1481
1610
  if len(res) == 1:
1482
- self._res = res[0]
1611
+ self._res = next(iter(res))
1483
1612
  else:
1484
1613
  raise ValueError(f"Different resolutions for the bands: {res}")
1485
1614
  else:
@@ -1525,8 +1654,7 @@ class Image(_ImageBandBase):
1525
1654
  arr,
1526
1655
  bounds=red.bounds,
1527
1656
  crs=red.crs,
1528
- mask=red.mask,
1529
- **red._common_init_kwargs,
1657
+ **{k: v for k, v in red._common_init_kwargs.items() if k != "res"},
1530
1658
  )
1531
1659
 
1532
1660
  def get_brightness(
@@ -1557,81 +1685,16 @@ class Image(_ImageBandBase):
1557
1685
  brightness,
1558
1686
  bounds=red.bounds,
1559
1687
  crs=self.crs,
1560
- mask=self.mask,
1561
- **self._common_init_kwargs,
1688
+ **{k: v for k, v in self._common_init_kwargs.items() if k != "res"},
1562
1689
  )
1563
1690
 
1564
1691
  def to_xarray(self) -> DataArray:
1565
1692
  """Convert the raster to an xarray.DataArray."""
1566
- if self.backend == "xarray":
1567
- return self.values
1568
-
1569
1693
  return self._to_xarray(
1570
1694
  np.array([band.values for band in self]),
1571
1695
  transform=self[0].transform,
1572
1696
  )
1573
1697
 
1574
- @property
1575
- def mask(self) -> Band | None:
1576
- """Mask Band."""
1577
- if self.masking is None:
1578
- return None
1579
-
1580
- elif self._mask is not None:
1581
- return self._mask
1582
-
1583
- elif self._bands is not None and all(band.mask is not None for band in self):
1584
- if len({id(band.mask) for band in self}) > 1:
1585
- raise ValueError(
1586
- "Image bands must have same mask.",
1587
- {id(band.mask) for band in self},
1588
- ) # TODO
1589
- self._mask = next(
1590
- iter([band.mask for band in self if band.mask is not None])
1591
- )
1592
- return self._mask
1593
-
1594
- mask_band_id = self.masking["band_id"]
1595
- mask_paths = [path for path in self._all_file_paths if mask_band_id in path]
1596
- if len(mask_paths) > 1:
1597
- raise ValueError(
1598
- f"Multiple file_paths match mask band_id {mask_band_id} for {self.path}"
1599
- )
1600
- elif not mask_paths:
1601
- raise ValueError(
1602
- f"No file_paths match mask band_id {mask_band_id} for {self.path} among "
1603
- + str([Path(x).name for x in _ls_func(self.path)])
1604
- )
1605
-
1606
- self._mask = self.band_class(
1607
- mask_paths[0],
1608
- **self._common_init_kwargs,
1609
- )
1610
- if self._bands is not None:
1611
- for band in self:
1612
- band._mask = self._mask
1613
- return self._mask
1614
-
1615
- @mask.setter
1616
- def mask(self, values: Band | None) -> None:
1617
- if values is None:
1618
- self._mask = None
1619
- for band in self:
1620
- band._mask = None
1621
- return
1622
- if not isinstance(values, Band):
1623
- raise TypeError(f"mask must be Band. Got {type(values)}")
1624
- self._mask = values
1625
- mask_arr = self._mask.values
1626
- for band in self:
1627
- band._mask = self._mask
1628
- try:
1629
- band.values = np.ma.array(
1630
- band.values.data, mask=mask_arr, fill_value=band.nodata
1631
- )
1632
- except ArrayNotLoadedError:
1633
- pass
1634
-
1635
1698
  @property
1636
1699
  def band_ids(self) -> list[str]:
1637
1700
  """The Band ids."""
@@ -1654,12 +1717,9 @@ class Image(_ImageBandBase):
1654
1717
  else:
1655
1718
  paths = self._df["file_path"]
1656
1719
 
1657
- mask = self.mask
1658
-
1659
1720
  self._bands = [
1660
1721
  self.band_class(
1661
1722
  path,
1662
- mask=mask,
1663
1723
  all_file_paths=self._all_file_paths,
1664
1724
  **self._common_init_kwargs,
1665
1725
  )
@@ -1750,10 +1810,10 @@ class Image(_ImageBandBase):
1750
1810
  bounds.append(band.bounds)
1751
1811
  return get_total_bounds(bounds)
1752
1812
 
1753
- def to_gdf(self, column: str = "value") -> GeoDataFrame:
1813
+ def to_geopandas(self, column: str = "value") -> GeoDataFrame:
1754
1814
  """Convert the array to a GeoDataFrame of grid polygons and values."""
1755
1815
  return pd.concat(
1756
- [band.to_gdf(column=column) for band in self], ignore_index=True
1816
+ [band.to_geopandas(column=column) for band in self], ignore_index=True
1757
1817
  )
1758
1818
 
1759
1819
  def sample(
@@ -1868,13 +1928,12 @@ class ImageCollection(_ImageBase):
1868
1928
  image_class: ClassVar[Image] = Image
1869
1929
  band_class: ClassVar[Band] = Band
1870
1930
  _metadata_attribute_collection_type: ClassVar[type] = pd.Series
1871
- backend: str = "numpy"
1872
1931
 
1873
1932
  def __init__(
1874
1933
  self,
1875
1934
  data: str | Path | Sequence[Image] | Sequence[str | Path],
1876
- res: int,
1877
- level: str | None = None_,
1935
+ res: int | None_ = None_,
1936
+ level: str | None_ | None = None_,
1878
1937
  processes: int = 1,
1879
1938
  metadata: str | dict | pd.DataFrame | None = None,
1880
1939
  nodata: int | None = None,
@@ -1890,7 +1949,7 @@ class ImageCollection(_ImageBase):
1890
1949
 
1891
1950
  super().__init__(metadata=metadata, **kwargs)
1892
1951
 
1893
- if callable(level) and isinstance(level(), None_):
1952
+ if callable(level) and level() is None:
1894
1953
  level = None
1895
1954
 
1896
1955
  self.nodata = nodata
@@ -1911,13 +1970,19 @@ class ImageCollection(_ImageBase):
1911
1970
  elif all(isinstance(x, (str | Path | os.PathLike)) for x in data):
1912
1971
  # adding band paths (asuming 'data' is a sequence of image paths)
1913
1972
  try:
1914
- self._all_file_paths = _get_child_paths_threaded(data) | set(data)
1973
+ self._all_file_paths = _get_child_paths_threaded(data) | {
1974
+ _fix_path(x) for x in data
1975
+ }
1915
1976
  except FileNotFoundError as e:
1916
1977
  if _from_root:
1917
1978
  raise TypeError(
1918
- "When passing 'root', 'data' must be a sequence of image names that have 'root' as parent path."
1979
+ "When passing 'root', 'data' must be a sequence of image file names that have 'root' as parent path."
1919
1980
  ) from e
1920
1981
  raise e
1982
+ if self.level:
1983
+ self._all_file_paths = [
1984
+ path for path in self._all_file_paths if self.level in path
1985
+ ]
1921
1986
  self._df = self._create_metadata_df(self._all_file_paths)
1922
1987
  return
1923
1988
 
@@ -1935,7 +2000,9 @@ class ImageCollection(_ImageBase):
1935
2000
 
1936
2001
  self._df = self._create_metadata_df(self._all_file_paths)
1937
2002
 
1938
- def groupby(self, by: str | list[str], **kwargs) -> ImageCollectionGroupBy:
2003
+ def groupby(
2004
+ self, by: str | list[str], copy: bool = True, **kwargs
2005
+ ) -> ImageCollectionGroupBy:
1939
2006
  """Group the Collection by Image or Band attribute(s)."""
1940
2007
  df = pd.DataFrame(
1941
2008
  [(i, img) for i, img in enumerate(self) for _ in img],
@@ -1962,8 +2029,10 @@ class ImageCollection(_ImageBase):
1962
2029
  return ImageCollectionGroupBy(
1963
2030
  sorted(
1964
2031
  parallel(
1965
- joblib.delayed(_copy_and_add_df_parallel)(i, group, self)
1966
- for i, group in df.groupby(by, **kwargs)
2032
+ joblib.delayed(_copy_and_add_df_parallel)(
2033
+ group_values, group_df, self, copy
2034
+ )
2035
+ for group_values, group_df in df.groupby(by, **kwargs)
1967
2036
  )
1968
2037
  ),
1969
2038
  by=by,
@@ -2004,6 +2073,51 @@ class ImageCollection(_ImageBase):
2004
2073
 
2005
2074
  return self
2006
2075
 
2076
+ def pixelwise(
2077
+ self,
2078
+ func: Callable,
2079
+ kwargs: dict | None = None,
2080
+ index_aligned_kwargs: dict | None = None,
2081
+ masked: bool = True,
2082
+ ) -> np.ndarray | tuple[np.ndarray] | None:
2083
+ """Run a function for each pixel.
2084
+
2085
+ The function should take a 1d array as first argument. This will be
2086
+ the pixel values for all bands in all images in the collection.
2087
+ """
2088
+ values = np.array([band.values for img in self for band in img])
2089
+
2090
+ if (
2091
+ masked
2092
+ and self.nodata is not None
2093
+ and hasattr(next(iter(next(iter(self)))).values, "mask")
2094
+ ):
2095
+ mask_array = np.array(
2096
+ [
2097
+ (band.values.mask) | (band.values.data == self.nodata)
2098
+ for img in self
2099
+ for band in img
2100
+ ]
2101
+ )
2102
+ elif masked and self.nodata is not None:
2103
+ mask_array = np.array(
2104
+ [band.values == self.nodata for img in self for band in img]
2105
+ )
2106
+ elif masked:
2107
+ mask_array = np.array([band.values.mask for img in self for band in img])
2108
+ else:
2109
+ mask_array = None
2110
+
2111
+ return pixelwise(
2112
+ func=func,
2113
+ values=values,
2114
+ mask_array=mask_array,
2115
+ index_aligned_kwargs=index_aligned_kwargs,
2116
+ kwargs=kwargs,
2117
+ processes=self.processes,
2118
+ nodata=self.nodata or np.nan,
2119
+ )
2120
+
2007
2121
  def get_unique_band_ids(self) -> list[str]:
2008
2122
  """Get a list of unique band_ids across all images."""
2009
2123
  return list({band.band_id for img in self for band in img})
@@ -2109,8 +2223,7 @@ class ImageCollection(_ImageBase):
2109
2223
  arr,
2110
2224
  bounds=bounds,
2111
2225
  crs=crs,
2112
- mask=self.mask,
2113
- **self._common_init_kwargs,
2226
+ **{k: v for k, v in self._common_init_kwargs.items() if k != "res"},
2114
2227
  )
2115
2228
 
2116
2229
  band._merged = True
@@ -2183,7 +2296,7 @@ class ImageCollection(_ImageBase):
2183
2296
  bounds=out_bounds,
2184
2297
  crs=crs,
2185
2298
  band_id=band_id,
2186
- **self._common_init_kwargs,
2299
+ **{k: v for k, v in self._common_init_kwargs.items() if k != "res"},
2187
2300
  )
2188
2301
  )
2189
2302
 
@@ -2296,22 +2409,11 @@ class ImageCollection(_ImageBase):
2296
2409
  ):
2297
2410
  return self
2298
2411
 
2299
- # if self.processes == 1:
2300
- # for img in self:
2301
- # for band in img:
2302
- # band.load(
2303
- # bounds=bounds,
2304
- # indexes=indexes,
2305
- # file_system=file_system,
2306
- # **kwargs,
2307
- # )
2308
- # return self
2309
-
2310
2412
  with joblib.Parallel(n_jobs=self.processes, backend="threading") as parallel:
2311
2413
  if self.masking:
2312
- parallel(
2313
- joblib.delayed(_load_band)(
2314
- img.mask,
2414
+ masks: list[np.ndarray] = parallel(
2415
+ joblib.delayed(_read_mask_array)(
2416
+ img,
2315
2417
  bounds=bounds,
2316
2418
  indexes=indexes,
2317
2419
  file_system=file_system,
@@ -2319,14 +2421,6 @@ class ImageCollection(_ImageBase):
2319
2421
  )
2320
2422
  for img in self
2321
2423
  )
2322
- for img in self:
2323
- for band in img:
2324
- band._mask = img.mask
2325
-
2326
- # print({img.mask.has_array for img in self })
2327
- # print({band.mask.has_array for img in self for band in img})
2328
-
2329
- # with joblib.Parallel(n_jobs=self.processes, backend="threading") as parallel:
2330
2424
 
2331
2425
  parallel(
2332
2426
  joblib.delayed(_load_band)(
@@ -2334,34 +2428,86 @@ class ImageCollection(_ImageBase):
2334
2428
  bounds=bounds,
2335
2429
  indexes=indexes,
2336
2430
  file_system=file_system,
2431
+ _masking=None,
2337
2432
  **kwargs,
2338
2433
  )
2339
2434
  for img in self
2340
2435
  for band in img
2341
2436
  )
2342
2437
 
2438
+ if self.masking:
2439
+ for img, mask_array in zip(self, masks, strict=True):
2440
+ for band in img:
2441
+ if isinstance(band.values, np.ma.core.MaskedArray):
2442
+ band.values.mask |= mask_array
2443
+ else:
2444
+ band.values = np.ma.array(
2445
+ band.values, mask=mask_array, fill_value=self.nodata
2446
+ )
2447
+
2343
2448
  return self
2344
2449
 
2345
2450
  def clip(
2346
2451
  self,
2347
2452
  mask: Geometry | GeoDataFrame | GeoSeries,
2348
- **kwargs,
2453
+ dropna: bool = True,
2454
+ copy: bool = True,
2349
2455
  ) -> "ImageCollection":
2350
- """Clip all image Bands with 'loky'."""
2351
- if self.processes == 1:
2352
- for img in self:
2353
- for band in img:
2354
- band.clip(mask, **kwargs)
2355
- return self
2456
+ """Clip all image Bands while preserving bounds."""
2457
+ copied = self.copy() if copy else self
2356
2458
 
2357
- with joblib.Parallel(n_jobs=self.processes, backend="loky") as parallel:
2358
- parallel(
2359
- joblib.delayed(_clip_band)(band, mask, **kwargs)
2360
- for img in self
2459
+ copied._images = [img for img in copied if img.union_all()]
2460
+
2461
+ fill: int = self.nodata or 0
2462
+
2463
+ common_band_from_geopandas_kwargs = dict(
2464
+ gdf=to_gdf(mask)[["geometry"]],
2465
+ default_value=1,
2466
+ fill=fill,
2467
+ )
2468
+
2469
+ for img in copied:
2470
+ img._rounded_bounds = tuple(int(x) for x in img.bounds)
2471
+
2472
+ for bounds in {img._rounded_bounds for img in copied}:
2473
+ shapes = {
2474
+ band.values.shape
2475
+ for img in copied
2361
2476
  for band in img
2362
- )
2477
+ if img._rounded_bounds == bounds
2478
+ }
2479
+ if len(shapes) != 1:
2480
+ raise ValueError(f"Different shapes: {shapes}. For bounds {bounds}")
2363
2481
 
2364
- return self
2482
+ mask_array: np.ndarray = Band.from_geopandas(
2483
+ **common_band_from_geopandas_kwargs,
2484
+ out_shape=next(iter(shapes)),
2485
+ bounds=bounds,
2486
+ ).values
2487
+
2488
+ is_not_polygon = mask_array == fill
2489
+
2490
+ for img in copied:
2491
+ if img._rounded_bounds != bounds:
2492
+ continue
2493
+
2494
+ for band in img:
2495
+ if isinstance(band.values, np.ma.core.MaskedArray):
2496
+ band._values.mask |= is_not_polygon
2497
+ else:
2498
+ band._values = np.ma.array(
2499
+ band.values, mask=is_not_polygon, fill_value=band.nodata
2500
+ )
2501
+
2502
+ for img in copied:
2503
+ del img._rounded_bounds
2504
+
2505
+ if dropna:
2506
+ copied.images = [
2507
+ img for img in copied if any(np.sum(band.values) for band in img)
2508
+ ]
2509
+
2510
+ return copied
2365
2511
 
2366
2512
  def _set_bbox(
2367
2513
  self, bbox: GeoDataFrame | GeoSeries | Geometry | tuple[float]
@@ -2372,17 +2518,12 @@ class ImageCollection(_ImageBase):
2372
2518
  if self._images is not None:
2373
2519
  for img in self._images:
2374
2520
  img._bbox = self._bbox
2375
- if img.mask is not None:
2376
- img.mask._bbox = self._bbox
2377
2521
  if img.bands is None:
2378
2522
  continue
2379
2523
  for band in img:
2380
2524
  band._bbox = self._bbox
2381
2525
  bounds = box(*band._bbox).intersection(box(*band.bounds))
2382
2526
  band._bounds = to_bbox(bounds) if not bounds.is_empty else None
2383
- if band.mask is not None:
2384
- band.mask._bbox = self._bbox
2385
- band.mask._bounds = band._bounds
2386
2527
 
2387
2528
  return self
2388
2529
 
@@ -2488,10 +2629,10 @@ class ImageCollection(_ImageBase):
2488
2629
  **kwargs,
2489
2630
  )
2490
2631
 
2491
- return xr.combine_by_coords(list(xarrs.values()))
2632
+ return combine_by_coords(list(xarrs.values()))
2492
2633
  # return Dataset(xarrs)
2493
2634
 
2494
- def to_gdfs(self, column: str = "value") -> dict[str, GeoDataFrame]:
2635
+ def to_geopandas(self, column: str = "value") -> dict[str, GeoDataFrame]:
2495
2636
  """Convert each band in each Image to a GeoDataFrame."""
2496
2637
  out = {}
2497
2638
  i = 0
@@ -2501,10 +2642,13 @@ class ImageCollection(_ImageBase):
2501
2642
  try:
2502
2643
  name = band.name
2503
2644
  except AttributeError:
2645
+ name = None
2646
+
2647
+ if name is None:
2504
2648
  name = f"{self.__class__.__name__}({i})"
2505
2649
 
2506
2650
  if name not in out:
2507
- out[name] = band.to_gdf(column=column)
2651
+ out[name] = band.to_geopandas(column=column)
2508
2652
  return out
2509
2653
 
2510
2654
  def sample(self, n: int = 1, size: int = 500) -> "ImageCollection":
@@ -2561,10 +2705,6 @@ class ImageCollection(_ImageBase):
2561
2705
 
2562
2706
  return copied
2563
2707
 
2564
- def __or__(self, collection: "ImageCollection") -> "ImageCollection":
2565
- """Concatenate the collection with another collection."""
2566
- return concat_image_collections([self, collection])
2567
-
2568
2708
  def __iter__(self) -> Iterator[Image]:
2569
2709
  """Iterate over the images."""
2570
2710
  return iter(self.images)
@@ -2574,14 +2714,16 @@ class ImageCollection(_ImageBase):
2574
2714
  return len(self.images)
2575
2715
 
2576
2716
  def __getattr__(self, attr: str) -> Any:
2577
- """Make iterable of metadata_attribute."""
2717
+ """Make iterable of metadata attribute."""
2578
2718
  if attr in (self.metadata_attributes or {}):
2579
2719
  return self._metadata_attribute_collection_type(
2580
2720
  [getattr(img, attr) for img in self]
2581
2721
  )
2582
2722
  return super().__getattribute__(attr)
2583
2723
 
2584
- def __getitem__(self, item: int | slice | Sequence[int | bool]) -> Image:
2724
+ def __getitem__(
2725
+ self, item: int | slice | Sequence[int | bool]
2726
+ ) -> "Image | ImageCollection":
2585
2727
  """Select one Image by integer index, or multiple Images by slice, list of int."""
2586
2728
  if isinstance(item, int):
2587
2729
  return self.images[item]
@@ -2620,14 +2762,14 @@ class ImageCollection(_ImageBase):
2620
2762
  return copied
2621
2763
 
2622
2764
  @property
2623
- def dates(self) -> list[str]:
2765
+ def date(self) -> Any:
2624
2766
  """List of image dates."""
2625
- return [img.date for img in self]
2767
+ return self._metadata_attribute_collection_type([img.date for img in self])
2626
2768
 
2627
2769
  @property
2628
- def image_paths(self) -> list[str]:
2770
+ def image_paths(self) -> Any:
2629
2771
  """List of image paths."""
2630
- return [img.path for img in self]
2772
+ return self._metadata_attribute_collection_type([img.path for img in self])
2631
2773
 
2632
2774
  @property
2633
2775
  def images(self) -> list["Image"]:
@@ -2645,21 +2787,6 @@ class ImageCollection(_ImageBase):
2645
2787
  **self._common_init_kwargs,
2646
2788
  )
2647
2789
 
2648
- if self.masking is not None:
2649
- images = []
2650
- for image in self._images:
2651
- # TODO why this loop?
2652
- try:
2653
- if not isinstance(image.mask, Band):
2654
- raise ValueError()
2655
- images.append(image)
2656
- except ValueError as e:
2657
- raise e
2658
- continue
2659
- self._images = images
2660
- for image in self._images:
2661
- image._bands = [band for band in image if band.band_id is not None]
2662
-
2663
2790
  self._images = [img for img in self if len(img)]
2664
2791
 
2665
2792
  if self._should_be_sorted:
@@ -2689,24 +2816,22 @@ class ImageCollection(_ImageBase):
2689
2816
 
2690
2817
  @images.setter
2691
2818
  def images(self, new_value: list["Image"]) -> list["Image"]:
2692
- self._images = list(new_value)
2693
- if not all(isinstance(x, Image) for x in self._images):
2819
+ new_value = list(new_value)
2820
+ if not new_value:
2821
+ self._images = new_value
2822
+ return
2823
+ if all(isinstance(x, Band) for x in new_value):
2824
+ if len(new_value) != len(self):
2825
+ raise ValueError("'images' must have same length as number of images.")
2826
+ new_images = []
2827
+ for i, img in enumerate(self):
2828
+ img._bands = [new_value[i]]
2829
+ new_images.append(img)
2830
+ self._images = new_images
2831
+ return
2832
+ if not all(isinstance(x, Image) for x in new_value):
2694
2833
  raise TypeError("images should be a sequence of Image.")
2695
-
2696
- def __repr__(self) -> str:
2697
- """String representation."""
2698
- root = ""
2699
- if self.path is not None:
2700
- data = f"'{self.path}'"
2701
- elif all(img.path is not None for img in self):
2702
- data = [img.path for img in self]
2703
- parents = {str(Path(path).parent) for path in data}
2704
- if len(parents) == 1:
2705
- data = [Path(path).name for path in data]
2706
- root = f" root='{next(iter(parents))}',"
2707
- else:
2708
- data = [img for img in self]
2709
- return f"{self.__class__.__name__}({data},{root} res={self.res}, level='{self.level}')"
2834
+ self._images = new_value
2710
2835
 
2711
2836
  def union_all(self) -> Polygon | MultiPolygon:
2712
2837
  """(Multi)Polygon representing the union of all image bounds."""
@@ -2763,7 +2888,6 @@ class ImageCollection(_ImageBase):
2763
2888
  if "date" in x_var and subcollection._should_be_sorted:
2764
2889
  subcollection._images = list(sorted(subcollection._images))
2765
2890
 
2766
- y = np.array([band.values for img in subcollection for band in img])
2767
2891
  if "date" in x_var and subcollection._should_be_sorted:
2768
2892
  x = np.array(
2769
2893
  [
@@ -2780,120 +2904,35 @@ class ImageCollection(_ImageBase):
2780
2904
  - pd.Timestamp(np.min(x))
2781
2905
  ).days
2782
2906
  else:
2783
- x = np.arange(0, len(y))
2784
-
2785
- mask = np.array(
2786
- [
2787
- (
2788
- band.values.mask
2789
- if hasattr(band.values, "mask")
2790
- else np.full(band.values.shape, False)
2791
- )
2792
- for img in subcollection
2793
- for band in img
2794
- ]
2907
+ x = np.arange(0, sum(1 for img in subcollection for band in img))
2908
+
2909
+ subcollection.pixelwise(
2910
+ _plot_pixels_1d,
2911
+ kwargs=dict(
2912
+ alpha=alpha,
2913
+ x_var=x_var,
2914
+ y_label=y_label,
2915
+ rounding=rounding,
2916
+ first_date=first_date,
2917
+ figsize=figsize,
2918
+ ),
2919
+ index_aligned_kwargs=dict(x=x),
2795
2920
  )
2796
2921
 
2797
- if x_var == "days_since_start":
2798
- x = x - np.min(x)
2799
-
2800
- for i in range(y.shape[1]):
2801
- for j in range(y.shape[2]):
2802
- this_y = y[:, i, j]
2803
-
2804
- this_mask = mask[:, i, j]
2805
- this_x = x[~this_mask]
2806
- this_y = this_y[~this_mask]
2807
-
2808
- if ylim:
2809
- condition = (this_y >= ylim[0]) & (this_y <= ylim[1])
2810
- this_y = this_y[condition]
2811
- this_x = this_x[condition]
2812
-
2813
- coef, intercept = np.linalg.lstsq(
2814
- np.vstack([this_x, np.ones(this_x.shape[0])]).T,
2815
- this_y,
2816
- rcond=None,
2817
- )[0]
2818
- predicted = np.array([intercept + coef * x for x in this_x])
2819
-
2820
- predicted_start = predicted[0]
2821
- predicted_end = predicted[-1]
2822
- predicted_change = predicted_end - predicted_start
2823
-
2824
- # Degrees of freedom
2825
- dof = len(this_x) - 2
2826
-
2827
- # 95% confidence interval
2828
- t_val = stats.t.ppf(1 - alpha / 2, dof)
2829
-
2830
- # Mean squared error of the residuals
2831
- mse = np.sum((this_y - predicted) ** 2) / dof
2832
-
2833
- # Calculate the standard error of predictions
2834
- pred_stderr = np.sqrt(
2835
- mse
2836
- * (
2837
- 1 / len(this_x)
2838
- + (this_x - np.mean(this_x)) ** 2
2839
- / np.sum((this_x - np.mean(this_x)) ** 2)
2840
- )
2841
- )
2842
-
2843
- # Calculate the confidence interval for predictions
2844
- ci_lower = predicted - t_val * pred_stderr
2845
- ci_upper = predicted + t_val * pred_stderr
2846
-
2847
- fig = plt.figure(figsize=figsize)
2848
- ax = fig.add_subplot(1, 1, 1)
2849
-
2850
- ax.scatter(this_x, this_y, color="#2c93db")
2851
- ax.plot(this_x, predicted, color="#e0436b")
2852
- ax.fill_between(
2853
- this_x,
2854
- ci_lower,
2855
- ci_upper,
2856
- color="#e0436b",
2857
- alpha=0.2,
2858
- label=f"{int(alpha*100)}% CI",
2859
- )
2860
- plt.title(
2861
- f"coef: {round(coef, int(np.log(1 / abs(coef))))}, "
2862
- f"pred change: {round(predicted_change, rounding)}, "
2863
- f"pred start: {round(predicted_start, rounding)}, "
2864
- f"pred end: {round(predicted_end, rounding)}"
2865
- )
2866
- plt.xlabel(x_var)
2867
- plt.ylabel(y_label)
2868
-
2869
- if x_var == "date":
2870
- date_labels = pd.to_datetime(
2871
- [first_date + pd.Timedelta(days=int(day)) for day in this_x]
2872
- )
2873
-
2874
- _, unique_indices = np.unique(
2875
- date_labels.strftime("%Y-%m"), return_index=True
2876
- )
2877
-
2878
- unique_x = np.array(this_x)[unique_indices]
2879
- unique_labels = date_labels[unique_indices].strftime("%Y-%m")
2880
-
2881
- ax.set_xticks(unique_x)
2882
- ax.set_xticklabels(unique_labels, rotation=45, ha="right")
2883
- # ax.tick_params(axis="x", length=10, width=2)
2884
-
2885
- plt.show()
2886
-
2887
-
2888
- def _get_all_regex_matches(xml_file: str, regexes: tuple[str]) -> tuple[str]:
2889
- for regex in regexes:
2890
- try:
2891
- return re.search(regex, xml_file)
2892
- except (TypeError, AttributeError):
2893
- continue
2894
- raise ValueError(
2895
- f"Could not find processing_baseline info from {regexes} in {xml_file}"
2896
- )
2922
+ def __repr__(self) -> str:
2923
+ """String representation."""
2924
+ root = ""
2925
+ if self.path is not None:
2926
+ data = f"'{self.path}'"
2927
+ elif all(img.path is not None for img in self):
2928
+ data = [img.path for img in self]
2929
+ parents = {str(Path(path).parent) for path in data}
2930
+ if len(parents) == 1:
2931
+ data = [Path(path).name for path in data]
2932
+ root = f" root='{next(iter(parents))}',"
2933
+ else:
2934
+ data = [img for img in self]
2935
+ return f"{self.__class__.__name__}({data},{root} res={self.res}, level='{self.level}')"
2897
2936
 
2898
2937
 
2899
2938
  class Sentinel2Config:
@@ -3007,9 +3046,6 @@ class Sentinel2Band(Sentinel2Config, Band):
3007
3046
  }
3008
3047
 
3009
3048
  def _get_boa_add_offset_dict(self, xml_file: str) -> int | None:
3010
- if self.is_mask:
3011
- return None
3012
-
3013
3049
  pat = re.compile(
3014
3050
  r"""
3015
3051
  <BOA_ADD_OFFSET\s*
@@ -3025,7 +3061,7 @@ class Sentinel2Band(Sentinel2Config, Band):
3025
3061
  except (TypeError, AttributeError, KeyError) as e:
3026
3062
  raise _RegexError(f"Could not find boa_add_offset info from {pat}") from e
3027
3063
  if not matches:
3028
- raise _RegexError(f"Could not find boa_add_offset info from {pat}")
3064
+ return None
3029
3065
 
3030
3066
  dict_ = (
3031
3067
  pd.DataFrame(matches).set_index("band_id")["value"].astype(int).to_dict()
@@ -3088,7 +3124,7 @@ class Sentinel2Collection(Sentinel2Config, ImageCollection):
3088
3124
  def __init__(self, data: str | Path | Sequence[Image], **kwargs) -> None:
3089
3125
  """ImageCollection with Sentinel2 specific name variables and path regexes."""
3090
3126
  level = kwargs.get("level", None_)
3091
- if callable(level) and isinstance(level(), None_):
3127
+ if callable(level) and level() is None:
3092
3128
  raise ValueError("Must specify level for Sentinel2Collection.")
3093
3129
  super().__init__(data=data, **kwargs)
3094
3130
 
@@ -3113,10 +3149,7 @@ class Sentinel2CloudlessCollection(Sentinel2CloudlessConfig, ImageCollection):
3113
3149
 
3114
3150
 
3115
3151
  def concat_image_collections(collections: Sequence[ImageCollection]) -> ImageCollection:
3116
- """Union multiple ImageCollections together.
3117
-
3118
- Same as using the union operator |.
3119
- """
3152
+ """Concatenate ImageCollections."""
3120
3153
  resolutions = {x.res for x in collections}
3121
3154
  if len(resolutions) > 1:
3122
3155
  raise ValueError(f"resoultion mismatch. {resolutions}")
@@ -3152,8 +3185,10 @@ def _get_gradient(band: Band, degrees: bool = False, copy: bool = True) -> Band:
3152
3185
  raise ValueError("array must be 2 or 3 dimensional")
3153
3186
 
3154
3187
 
3155
- def _slope_2d(array: np.ndarray, res: int, degrees: int) -> np.ndarray:
3156
- gradient_x, gradient_y = np.gradient(array, res, res)
3188
+ def _slope_2d(array: np.ndarray, res: int | tuple[int], degrees: int) -> np.ndarray:
3189
+ resx, resy = _res_as_tuple(res)
3190
+
3191
+ gradient_x, gradient_y = np.gradient(array, resx, resy)
3157
3192
 
3158
3193
  gradient = abs(gradient_x) + abs(gradient_y)
3159
3194
 
@@ -3240,7 +3275,7 @@ def _get_images(
3240
3275
  return images
3241
3276
 
3242
3277
 
3243
- class ArrayNotLoadedError(ValueError):
3278
+ class _ArrayNotLoadedError(ValueError):
3244
3279
  """Arrays are not loaded."""
3245
3280
 
3246
3281
 
@@ -3257,7 +3292,7 @@ class PathlessImageError(ValueError):
3257
3292
  what = "that have been merged"
3258
3293
  elif self.instance._from_array:
3259
3294
  what = "from arrays"
3260
- elif self.instance._from_gdf:
3295
+ elif self.instance._from_geopandas:
3261
3296
  what = "from GeoDataFrames"
3262
3297
  else:
3263
3298
  raise ValueError(self.instance)
@@ -3318,18 +3353,22 @@ def _intesects(x, other) -> bool:
3318
3353
 
3319
3354
 
3320
3355
  def _copy_and_add_df_parallel(
3321
- i: tuple[Any, ...], group: pd.DataFrame, self: ImageCollection
3356
+ group_values: tuple[Any, ...],
3357
+ group_df: pd.DataFrame,
3358
+ self: ImageCollection,
3359
+ copy: bool,
3322
3360
  ) -> tuple[tuple[Any], ImageCollection]:
3323
- copied = self.copy()
3361
+ copied = self.copy() if copy else self
3324
3362
  copied.images = [
3325
- img.copy() for img in group.drop_duplicates("_image_idx")["_image_instance"]
3363
+ img.copy() if copy else img
3364
+ for img in group_df.drop_duplicates("_image_idx")["_image_instance"]
3326
3365
  ]
3327
- if "band_id" in group:
3328
- band_ids = set(group["band_id"].values)
3366
+ if "band_id" in group_df:
3367
+ band_ids = set(group_df["band_id"].values)
3329
3368
  for img in copied.images:
3330
3369
  img._bands = [band for band in img if band.band_id in band_ids]
3331
3370
 
3332
- return (i, copied)
3371
+ return (group_values, copied)
3333
3372
 
3334
3373
 
3335
3374
  def _get_bounds(bounds, bbox, band_bounds: Polygon) -> None | Polygon:
@@ -3355,15 +3394,37 @@ def _open_raster(path: str | Path) -> rasterio.io.DatasetReader:
3355
3394
  return rasterio.open(file)
3356
3395
 
3357
3396
 
3358
- def _load_band(band: Band, **kwargs) -> None:
3397
+ def _read_mask_array(self: Band | Image, **kwargs) -> np.ndarray:
3398
+ mask_band_id = self.masking["band_id"]
3399
+ mask_paths = [path for path in self._all_file_paths if mask_band_id in path]
3400
+ if len(mask_paths) > 1:
3401
+ raise ValueError(
3402
+ f"Multiple file_paths match mask band_id {mask_band_id} for {self.path}"
3403
+ )
3404
+ elif not mask_paths:
3405
+ raise ValueError(
3406
+ f"No file_paths match mask band_id {mask_band_id} for {self.path} among "
3407
+ + str([Path(x).name for x in _ls_func(self.path)])
3408
+ )
3409
+
3410
+ band = Band(
3411
+ next(iter(mask_paths)),
3412
+ **{**self._common_init_kwargs, "metadata": None},
3413
+ )
3414
+ band.load(**kwargs)
3415
+ boolean_mask = np.isin(band.values, list(self.masking["values"]))
3416
+ return boolean_mask
3417
+
3418
+
3419
+ def _load_band(band: Band, **kwargs) -> Band:
3359
3420
  return band.load(**kwargs)
3360
3421
 
3361
3422
 
3362
- def _band_apply(band: Band, func: Callable, **kwargs) -> None:
3423
+ def _band_apply(band: Band, func: Callable, **kwargs) -> Band:
3363
3424
  return band.apply(func, **kwargs)
3364
3425
 
3365
3426
 
3366
- def _clip_band(band: Band, mask, **kwargs) -> None:
3427
+ def _clip_band(band: Band, mask, **kwargs) -> Band:
3367
3428
  return band.clip(mask, **kwargs)
3368
3429
 
3369
3430
 
@@ -3408,126 +3469,148 @@ def array_buffer(arr: np.ndarray, distance: int) -> np.ndarray:
3408
3469
  return binary_erosion(arr, structure=structure).astype(dtype)
3409
3470
 
3410
3471
 
3411
- def get_cmap(arr: np.ndarray) -> LinearSegmentedColormap:
3472
+ def _plot_pixels_1d(
3473
+ y: np.ndarray,
3474
+ x: np.ndarray,
3475
+ alpha: float,
3476
+ x_var: str,
3477
+ y_label: str,
3478
+ rounding: int,
3479
+ figsize: tuple,
3480
+ first_date: pd.Timestamp,
3481
+ ) -> None:
3482
+ coef, intercept = np.linalg.lstsq(
3483
+ np.vstack([x, np.ones(x.shape[0])]).T,
3484
+ y,
3485
+ rcond=None,
3486
+ )[0]
3487
+ predicted = np.array([intercept + coef * x for x in x])
3488
+
3489
+ predicted_start = predicted[0]
3490
+ predicted_end = predicted[-1]
3491
+ predicted_change = predicted_end - predicted_start
3492
+
3493
+ # Degrees of freedom
3494
+ dof = len(x) - 2
3495
+
3496
+ # 95% confidence interval
3497
+ t_val = stats.t.ppf(1 - alpha / 2, dof)
3498
+
3499
+ # Mean squared error of the residuals
3500
+ mse = np.sum((y - predicted) ** 2) / dof
3501
+
3502
+ # Calculate the standard error of predictions
3503
+ pred_stderr = np.sqrt(
3504
+ mse * (1 / len(x) + (x - np.mean(x)) ** 2 / np.sum((x - np.mean(x)) ** 2))
3505
+ )
3412
3506
 
3413
- # blue = [[i / 10 + 0.1, i / 10 + 0.1, 1 - (i / 10) + 0.1] for i in range(11)][1:]
3414
- blue = [
3415
- [0.1, 0.1, 1.0],
3416
- [0.2, 0.2, 0.9],
3417
- [0.3, 0.3, 0.8],
3418
- [0.4, 0.4, 0.7],
3419
- [0.6, 0.6, 0.6],
3420
- [0.6, 0.6, 0.6],
3421
- [0.7, 0.7, 0.7],
3422
- [0.8, 0.8, 0.8],
3423
- ]
3424
- # gray = list(reversed([[i / 10 - 0.1, i / 10, i / 10 - 0.1] for i in range(11)][1:]))
3425
- gray = [
3426
- [0.6, 0.6, 0.6],
3427
- [0.6, 0.6, 0.6],
3428
- [0.6, 0.6, 0.6],
3429
- [0.6, 0.6, 0.6],
3430
- [0.6, 0.6, 0.6],
3431
- [0.4, 0.7, 0.4],
3432
- [0.3, 0.7, 0.3],
3433
- [0.2, 0.8, 0.2],
3434
- ]
3435
- # gray = [[0.6, 0.6, 0.6] for i in range(10)]
3436
- # green = [[0.2 + i/20, i / 10 - 0.1, + i/20] for i in range(11)][1:]
3437
- green = [
3438
- [0.25, 0.0, 0.05],
3439
- [0.3, 0.1, 0.1],
3440
- [0.35, 0.2, 0.15],
3441
- [0.4, 0.3, 0.2],
3442
- [0.45, 0.4, 0.25],
3443
- [0.5, 0.5, 0.3],
3444
- [0.55, 0.6, 0.35],
3445
- [0.7, 0.9, 0.5],
3446
- ]
3447
- green = [
3448
- [0.6, 0.6, 0.6],
3449
- [0.4, 0.7, 0.4],
3450
- [0.3, 0.8, 0.3],
3451
- [0.25, 0.4, 0.25],
3452
- [0.2, 0.5, 0.2],
3453
- [0.10, 0.7, 0.10],
3454
- [0, 0.9, 0],
3455
- ]
3507
+ # Calculate the confidence interval for predictions
3508
+ ci_lower = predicted - t_val * pred_stderr
3509
+ ci_upper = predicted + t_val * pred_stderr
3510
+
3511
+ fig = plt.figure(figsize=figsize)
3512
+ ax = fig.add_subplot(1, 1, 1)
3513
+
3514
+ ax.scatter(x, y, color="#2c93db")
3515
+ ax.plot(x, predicted, color="#e0436b")
3516
+ ax.fill_between(
3517
+ x,
3518
+ ci_lower,
3519
+ ci_upper,
3520
+ color="#e0436b",
3521
+ alpha=0.2,
3522
+ label=f"{int(alpha*100)}% CI",
3523
+ )
3524
+ plt.title(
3525
+ f"coef: {round(coef, int(np.log(1 / abs(coef))))}, "
3526
+ f"pred change: {round(predicted_change, rounding)}, "
3527
+ f"pred start: {round(predicted_start, rounding)}, "
3528
+ f"pred end: {round(predicted_end, rounding)}"
3529
+ )
3530
+ plt.xlabel(x_var)
3531
+ plt.ylabel(y_label)
3456
3532
 
3457
- def get_start(arr):
3458
- min_value = np.min(arr)
3459
- if min_value < -0.75:
3460
- return 0
3461
- if min_value < -0.5:
3462
- return 1
3463
- if min_value < -0.25:
3464
- return 2
3465
- if min_value < 0:
3466
- return 3
3467
- if min_value < 0.25:
3468
- return 4
3469
- if min_value < 0.5:
3470
- return 5
3471
- if min_value < 0.75:
3472
- return 6
3473
- return 7
3474
-
3475
- def get_stop(arr):
3476
- max_value = np.max(arr)
3477
- if max_value <= 0.05:
3478
- return 0
3479
- if max_value < 0.175:
3480
- return 1
3481
- if max_value < 0.25:
3482
- return 2
3483
- if max_value < 0.375:
3484
- return 3
3485
- if max_value < 0.5:
3486
- return 4
3487
- if max_value < 0.75:
3488
- return 5
3489
- return 6
3490
-
3491
- cmap_name = "blue_gray_green"
3492
-
3493
- start = get_start(arr)
3494
- stop = get_stop(arr)
3495
- blue = blue[start]
3496
- gray = gray[start]
3497
- # green = green[start]
3498
- green = green[stop]
3499
-
3500
- # green[0] = np.arange(0, 1, 0.1)[::-1][stop]
3501
- # green[1] = np.arange(0, 1, 0.1)[stop]
3502
- # green[2] = np.arange(0, 1, 0.1)[::-1][stop]
3503
-
3504
- print(green)
3505
- print(start, stop)
3506
- print("blue gray green")
3507
- print(blue)
3508
- print(gray)
3509
- print(green)
3510
-
3511
- # Define the segments of the colormap
3512
- cdict = {
3513
- "red": [
3514
- (0.0, blue[0], blue[0]),
3515
- (0.3, gray[0], gray[0]),
3516
- (0.7, gray[0], gray[0]),
3517
- (1.0, green[0], green[0]),
3518
- ],
3519
- "green": [
3520
- (0.0, blue[1], blue[1]),
3521
- (0.3, gray[1], gray[1]),
3522
- (0.7, gray[1], gray[1]),
3523
- (1.0, green[1], green[1]),
3524
- ],
3525
- "blue": [
3526
- (0.0, blue[2], blue[2]),
3527
- (0.3, gray[2], gray[2]),
3528
- (0.7, gray[2], gray[2]),
3529
- (1.0, green[2], green[2]),
3530
- ],
3531
- }
3533
+ if x_var == "date":
3534
+ date_labels = pd.to_datetime(
3535
+ [first_date + pd.Timedelta(days=int(day)) for day in x]
3536
+ )
3537
+
3538
+ _, unique_indices = np.unique(date_labels.strftime("%Y-%m"), return_index=True)
3539
+
3540
+ unique_x = np.array(x)[unique_indices]
3541
+ unique_labels = date_labels[unique_indices].strftime("%Y-%m")
3542
+
3543
+ ax.set_xticks(unique_x)
3544
+ ax.set_xticklabels(unique_labels, rotation=45, ha="right")
3545
+
3546
+ plt.show()
3547
+
3548
+
3549
+ def pixelwise(
3550
+ func: Callable,
3551
+ values: np.ndarray,
3552
+ mask_array: np.ndarray | None = None,
3553
+ index_aligned_kwargs: dict | None = None,
3554
+ kwargs: dict | None = None,
3555
+ processes: int = 1,
3556
+ nodata=np.nan,
3557
+ ) -> Any:
3558
+ """Run a function for each pixel of a 3d array."""
3559
+ index_aligned_kwargs = index_aligned_kwargs or {}
3560
+ kwargs = kwargs or {}
3561
+
3562
+ if mask_array is not None:
3563
+ not_all_missing = np.all(mask_array, axis=0) == False
3564
+
3565
+ else:
3566
+ mask_array = np.full(values.shape, False)
3567
+ not_all_missing = np.full(values.shape[1:], True)
3568
+
3569
+ nonmissing_row_indices, nonmissing_col_indices = not_all_missing.nonzero()
3570
+
3571
+ def select_pixel_values(row: int, col: int) -> np.ndarray:
3572
+ return values[~mask_array[:, row, col], row, col]
3573
+
3574
+ with joblib.Parallel(n_jobs=processes, backend="loky") as parallel:
3575
+ results: list[tuple[np.float64, np.float64]] = parallel(
3576
+ joblib.delayed(func)(
3577
+ select_pixel_values(row, col),
3578
+ **kwargs,
3579
+ **{
3580
+ key: value[~mask_array[:, row, col]]
3581
+ for key, value in index_aligned_kwargs.items()
3582
+ },
3583
+ )
3584
+ for row, col in (
3585
+ zip(nonmissing_row_indices, nonmissing_col_indices, strict=True)
3586
+ )
3587
+ )
3588
+
3589
+ if all(x is None for x in results):
3590
+ return
3591
+
3592
+ try:
3593
+ n_out_arrays = len(next(iter(results)))
3594
+ except TypeError:
3595
+ n_out_arrays = 1
3596
+
3597
+ out_arrays = tuple(np.full(values.shape[1:], nodata) for _ in range(n_out_arrays))
3598
+
3599
+ counter = 0
3600
+ for row, col in zip(nonmissing_row_indices, nonmissing_col_indices, strict=True):
3601
+ these_results = results[counter]
3602
+ if these_results is None:
3603
+ counter += 1
3604
+ continue
3605
+ for i, arr in enumerate(out_arrays):
3606
+ try:
3607
+ arr[row, col] = these_results[i]
3608
+ except TypeError:
3609
+ arr[row, col] = these_results
3610
+ counter += 1
3611
+ assert counter == len(results), (counter, len(results))
3612
+
3613
+ if len(out_arrays) == 1:
3614
+ return out_arrays[0]
3532
3615
 
3533
- return LinearSegmentedColormap(cmap_name, segmentdata=cdict, N=50)
3616
+ return out_arrays