ssb-sgis 1.0.8__py3-none-any.whl → 1.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,6 +6,7 @@ import os
6
6
  import random
7
7
  import re
8
8
  import time
9
+ from abc import abstractmethod
9
10
  from collections.abc import Callable
10
11
  from collections.abc import Iterable
11
12
  from collections.abc import Iterator
@@ -26,7 +27,6 @@ import rasterio
26
27
  from affine import Affine
27
28
  from geopandas import GeoDataFrame
28
29
  from geopandas import GeoSeries
29
- from matplotlib.colors import LinearSegmentedColormap
30
30
  from pandas.api.types import is_dict_like
31
31
  from rasterio.enums import MergeAlg
32
32
  from scipy import stats
@@ -41,11 +41,8 @@ from shapely.geometry import Polygon
41
41
 
42
42
  try:
43
43
  import dapla as dp
44
- from dapla.gcs import GCSFileSystem
45
44
  except ImportError:
46
-
47
- class GCSFileSystem:
48
- """Placeholder."""
45
+ pass
49
46
 
50
47
 
51
48
  try:
@@ -55,7 +52,7 @@ except ImportError:
55
52
  class exceptions:
56
53
  """Placeholder."""
57
54
 
58
- class RefreshError:
55
+ class RefreshError(Exception):
59
56
  """Placeholder."""
60
57
 
61
58
 
@@ -74,9 +71,9 @@ try:
74
71
  except ImportError:
75
72
  pass
76
73
  try:
77
- import xarray as xr
78
74
  from xarray import DataArray
79
75
  from xarray import Dataset
76
+ from xarray import combine_by_coords
80
77
  except ImportError:
81
78
 
82
79
  class DataArray:
@@ -85,6 +82,9 @@ except ImportError:
85
82
  class Dataset:
86
83
  """Placeholder."""
87
84
 
85
+ def combine_by_coords(*args, **kwargs) -> None:
86
+ raise ImportError("xarray")
87
+
88
88
 
89
89
  from ..geopandas_tools.bounds import get_total_bounds
90
90
  from ..geopandas_tools.conversion import to_bbox
@@ -102,8 +102,10 @@ from ..io.opener import opener
102
102
  from . import sentinel_config as config
103
103
  from .base import _array_to_geojson
104
104
  from .base import _gdf_to_arr
105
+ from .base import _get_res_from_bounds
105
106
  from .base import _get_shape_from_bounds
106
107
  from .base import _get_transform_from_bounds
108
+ from .base import _res_as_tuple
107
109
  from .base import get_index_mapper
108
110
  from .indices import ndvi
109
111
  from .regex import _extract_regex_match_from_string
@@ -142,8 +144,6 @@ DATE_RANGES_TYPE = (
142
144
  | tuple[tuple[str | pd.Timestamp | None, str | pd.Timestamp | None], ...]
143
145
  )
144
146
 
145
- FILENAME_COL_SUFFIX = "_filename"
146
-
147
147
  DEFAULT_FILENAME_REGEX = r"""
148
148
  .*?
149
149
  (?:_?(?P<date>\d{8}(?:T\d{6})?))? # Optional underscore and date group
@@ -163,13 +163,12 @@ ALLOWED_INIT_KWARGS = [
163
163
  "filename_regexes",
164
164
  "all_bands",
165
165
  "crs",
166
- "backend",
167
166
  "masking",
168
167
  "_merged",
169
168
  "date",
170
169
  ]
171
170
 
172
- _load_counter: int = 0
171
+ _LOAD_COUNTER: int = 0
173
172
 
174
173
 
175
174
  def _get_child_paths_threaded(data: Sequence[str]) -> set[str]:
@@ -196,7 +195,7 @@ class ImageCollectionGroupBy:
196
195
  Args:
197
196
  data: Iterable of group values and ImageCollection groups.
198
197
  by: list of group attributes.
199
- collection: ImageCollection instance. Used to pass attributes.
198
+ collection: Ungrouped ImageCollection. Used to pass attributes to outputs.
200
199
  """
201
200
  self.data = list(data)
202
201
  self.by = by
@@ -291,7 +290,7 @@ class ImageCollectionGroupBy:
291
290
 
292
291
  def __repr__(self) -> str:
293
292
  """String representation."""
294
- return f"{self.__class__.__name__}({len(self)})"
293
+ return f"{self.__class__.__name__}({len(self)}, by={self.by})"
295
294
 
296
295
 
297
296
  @dataclass(frozen=True)
@@ -307,7 +306,11 @@ class BandMasking:
307
306
 
308
307
 
309
308
  class None_:
310
- """Default value for keyword arguments that should not have a default."""
309
+ """Default None for args that are not allowed to be None."""
310
+
311
+ def __new__(cls) -> None:
312
+ """Always returns None."""
313
+ return None
311
314
 
312
315
 
313
316
  class _ImageBase:
@@ -318,18 +321,16 @@ class _ImageBase:
318
321
 
319
322
  def __init__(self, *, metadata=None, bbox=None, **kwargs) -> None:
320
323
 
321
- self._mask = None
322
324
  self._bounds = None
323
- self._merged = False
324
- self._from_array = False
325
- self._from_geopandas = False
326
- self.metadata_attributes = self.metadata_attributes or {}
327
325
  self._path = None
328
- self._metadata_from_xml = False
329
-
330
326
  self._bbox = to_bbox(bbox) if bbox is not None else None
331
327
 
332
- self.metadata = self._metadata_to_nested_dict(metadata)
328
+ self.metadata_attributes = self.metadata_attributes or {}
329
+
330
+ if metadata is not None:
331
+ self.metadata = self._metadata_to_nested_dict(metadata)
332
+ else:
333
+ self.metadata = {}
333
334
 
334
335
  self.image_patterns = self._compile_regexes("image_regexes")
335
336
  self.filename_patterns = self._compile_regexes("filename_regexes")
@@ -339,29 +340,45 @@ class _ImageBase:
339
340
  f"{self.__class__.__name__} got an unexpected keyword argument '{key}'"
340
341
  )
341
342
  if key in ALLOWED_INIT_KWARGS and key in dir(self):
342
- if is_property(self, key):
343
- setattr(self, f"_{key}", value)
344
- elif is_method(self, key):
345
- raise error_obj
346
- else:
347
- setattr(self, key, value)
343
+ self._safe_setattr(key, value, error_obj)
348
344
  else:
349
345
  raise error_obj
350
346
 
347
+ # attributes for debugging
348
+ self._metadata_from_xml = False
349
+ self._merged = False
350
+ self._from_array = False
351
+ self._from_geopandas = False
352
+
353
+ def _safe_setattr(
354
+ self, key: str, value: Any, error_obj: Exception | None = None
355
+ ) -> None:
356
+ if is_property(self, key):
357
+ setattr(self, f"_{key}", value)
358
+ elif is_method(self, key):
359
+ if error_obj is None:
360
+ raise AttributeError(f"Cannot set method '{key}'.")
361
+ raise error_obj
362
+ else:
363
+ setattr(self, key, value)
364
+
351
365
  def _compile_regexes(self, regex_attr: str) -> tuple[re.Pattern]:
352
- regexes = getattr(self, regex_attr)
353
- if regexes:
354
- if isinstance(regexes, str):
355
- regexes = (regexes,)
356
- return tuple(re.compile(regexes, flags=re.VERBOSE) for regexes in regexes)
357
- return ()
366
+ regexes: tuple[str] | str = getattr(self, regex_attr)
367
+ if not regexes:
368
+ return ()
369
+ if isinstance(regexes, str):
370
+ regexes = (regexes,)
371
+ return tuple(re.compile(regexes, flags=re.VERBOSE) for regexes in regexes)
358
372
 
359
373
  @staticmethod
360
374
  def _metadata_to_nested_dict(
361
375
  metadata: str | Path | os.PathLike | dict | pd.DataFrame | None,
362
- ) -> dict[str, dict[str, Any]] | None:
363
- if metadata is None:
364
- return {}
376
+ ) -> dict[str, dict[str, Any]]:
377
+ """Construct metadata dict from dictlike, DataFrame or file path.
378
+
379
+ Extract metadata value:
380
+ >>> self.metadata[self.path]['cloud_cover_percentage']
381
+ """
365
382
  if isinstance(metadata, (str | Path | os.PathLike)):
366
383
  metadata = _read_parquet_func(metadata)
367
384
 
@@ -376,15 +393,16 @@ class _ImageBase:
376
393
  return x if not (is_scalar(x) and pd.isna(x)) else None
377
394
 
378
395
  # to nested dict because pandas indexing gives rare KeyError with long strings
379
- metadata = {
396
+ return {
380
397
  _fix_path(path): {
381
398
  attr: na_to_none(value) for attr, value in row.items()
382
399
  }
383
400
  for path, row in metadata.iterrows()
384
401
  }
385
402
  elif is_dict_like(metadata):
386
- metadata = {_fix_path(path): value for path, value in metadata.items()}
403
+ return {_fix_path(path): value for path, value in metadata.items()}
387
404
 
405
+ # try to allow custom types with dict-like indexing
388
406
  return metadata
389
407
 
390
408
  @property
@@ -394,7 +412,6 @@ class _ImageBase:
394
412
  "res": self.res,
395
413
  "bbox": self._bbox,
396
414
  "nodata": self.nodata,
397
- "backend": self.backend,
398
415
  "metadata": self.metadata,
399
416
  }
400
417
 
@@ -408,19 +425,22 @@ class _ImageBase:
408
425
  @property
409
426
  def res(self) -> int:
410
427
  """Pixel resolution."""
428
+ # if self._res is None:
429
+ # if self.has_array:
430
+ # self._res = _get_res_from_bounds(self.bounds, self.values.shape)
431
+ # else:
432
+ # with opener(self.path) as file:
433
+ # with rasterio.open(file) as src:
434
+ # self._res = src.res
411
435
  return self._res
412
436
 
413
- @property
414
- def centroid(self) -> Point:
415
- """Centerpoint of the object."""
416
- return self.union_all().centroid
437
+ @abstractmethod
438
+ def union_all(self) -> Polygon | MultiPolygon:
439
+ pass
417
440
 
418
441
  def assign(self, **kwargs) -> "_ImageBase":
419
442
  for key, value in kwargs.items():
420
- try:
421
- setattr(self, key, value)
422
- except AttributeError:
423
- setattr(self, f"_{key}", value)
443
+ self._safe_setattr(key, value)
424
444
  return self
425
445
 
426
446
  def _name_regex_searcher(
@@ -451,7 +471,10 @@ class _ImageBase:
451
471
  )
452
472
 
453
473
  def _create_metadata_df(self, file_paths: Sequence[str]) -> pd.DataFrame:
454
- """Create a dataframe with file paths and image paths that match regexes."""
474
+ """Create a dataframe with file paths and image paths that match regexes.
475
+
476
+ Used in __init__ to select relevant paths fast.
477
+ """
455
478
  df = pd.DataFrame({"file_path": list(file_paths)})
456
479
 
457
480
  df["file_name"] = df["file_path"].apply(lambda x: Path(x).name)
@@ -518,12 +541,14 @@ class _ImageBase:
518
541
  class _ImageBandBase(_ImageBase):
519
542
  """Common parent class of Image and Band."""
520
543
 
521
- def intersects(self, other: GeoDataFrame | GeoSeries | Geometry) -> bool:
522
- if hasattr(other, "crs") and not pyproj.CRS(self.crs).equals(
523
- pyproj.CRS(other.crs)
544
+ def intersects(
545
+ self, geometry: GeoDataFrame | GeoSeries | Geometry | tuple | _ImageBase
546
+ ) -> bool:
547
+ if hasattr(geometry, "crs") and not pyproj.CRS(self.crs).equals(
548
+ pyproj.CRS(geometry.crs)
524
549
  ):
525
- raise ValueError(f"crs mismatch: {self.crs} and {other.crs}")
526
- return self.union_all().intersects(to_shapely(other))
550
+ raise ValueError(f"crs mismatch: {self.crs} and {geometry.crs}")
551
+ return self.union_all().intersects(to_shapely(geometry))
527
552
 
528
553
  def union_all(self) -> Polygon:
529
554
  try:
@@ -532,20 +557,21 @@ class _ImageBandBase(_ImageBase):
532
557
  return Polygon()
533
558
 
534
559
  @property
535
- def mask_percentage(self) -> float:
536
- return self.mask.values.sum() / (self.mask.width * self.mask.height) * 100
560
+ def centroid(self) -> Point:
561
+ """Centerpoint of the object."""
562
+ return self.union_all().centroid
537
563
 
538
564
  @property
539
565
  def year(self) -> str:
540
566
  if hasattr(self, "_year") and self._year:
541
567
  return self._year
542
- return self.date[:4]
568
+ return str(self.date)[:4]
543
569
 
544
570
  @property
545
571
  def month(self) -> str:
546
572
  if hasattr(self, "_month") and self._month:
547
573
  return self._month
548
- return "".join(self.date.split("-"))[4:6]
574
+ return str(self.date).replace("-", "").replace("/", "")[4:6]
549
575
 
550
576
  @property
551
577
  def name(self) -> str | None:
@@ -572,24 +598,25 @@ class _ImageBandBase(_ImageBase):
572
598
  return self._name_regex_searcher("level", self.image_patterns)
573
599
 
574
600
  def _get_metadata_attributes(self, metadata_attributes: dict) -> dict:
575
-
601
+ """Search through xml files for missing metadata attributes."""
576
602
  self._metadata_from_xml = True
577
603
 
578
604
  missing_metadata_attributes = {
579
- key: value
580
- for key, value in metadata_attributes.items()
581
- if not hasattr(self, key) or getattr(self, key) is None
605
+ attr: constructor_func
606
+ for attr, constructor_func in metadata_attributes.items()
607
+ if not hasattr(self, attr) or getattr(self, attr) is None
582
608
  }
583
609
 
584
610
  nonmissing_metadata_attributes = {
585
- key: getattr(self, key)
586
- for key in metadata_attributes
587
- if key not in missing_metadata_attributes
611
+ attr: getattr(self, attr)
612
+ for attr in metadata_attributes
613
+ if attr not in missing_metadata_attributes
588
614
  }
589
615
 
590
616
  if not missing_metadata_attributes:
591
617
  return nonmissing_metadata_attributes
592
618
 
619
+ # read all xml content once
593
620
  file_contents: list[str] = []
594
621
  for path in self._all_file_paths:
595
622
  if ".xml" not in path:
@@ -597,37 +624,40 @@ class _ImageBandBase(_ImageBase):
597
624
  with _open_func(path, "rb") as file:
598
625
  file_contents.append(file.read().decode("utf-8"))
599
626
 
600
- for key, value in missing_metadata_attributes.items():
627
+ def is_last_xml(i: int) -> bool:
628
+ return i == len(file_contents) - 1
629
+
630
+ for attr, value in missing_metadata_attributes.items():
601
631
  results = None
602
- for i, filetext in enumerate(file_contents):
632
+ for i, file_content in enumerate(file_contents):
603
633
  if isinstance(value, str) and value in dir(self):
604
- method = getattr(self, value)
634
+ # method or a hardcoded value
635
+ value: Callable | Any = getattr(self, value)
636
+
637
+ if callable(value):
605
638
  try:
606
- results = method(filetext)
639
+ results = value(file_content)
607
640
  except _RegexError as e:
608
- if i == len(self._all_file_paths) - 1:
609
- raise e
641
+ if is_last_xml(i):
642
+ raise e.__class__(self.path, e) from e
610
643
  continue
611
644
  if results is not None:
612
645
  break
613
-
614
- if callable(value):
646
+ elif (
647
+ isinstance(value, str)
648
+ or hasattr(value, "__iter__")
649
+ and all(isinstance(x, str | re.Pattern) for x in value)
650
+ ):
615
651
  try:
616
- results = value(filetext)
652
+ results = _extract_regex_match_from_string(file_content, value)
617
653
  except _RegexError as e:
618
- if i == len(self._all_file_paths) - 1:
654
+ if is_last_xml(i):
619
655
  raise e
620
- continue
621
- if results is not None:
622
- break
623
-
624
- try:
625
- results = _extract_regex_match_from_string(filetext, value)
626
- except _RegexError as e:
627
- if i == len(self._all_file_paths) - 1:
628
- raise e
656
+ elif value is not None:
657
+ results = value
658
+ break
629
659
 
630
- missing_metadata_attributes[key] = results
660
+ missing_metadata_attributes[attr] = results
631
661
 
632
662
  return missing_metadata_attributes | nonmissing_metadata_attributes
633
663
 
@@ -671,14 +701,15 @@ class Band(_ImageBandBase):
671
701
  """Band holding a single 2 dimensional array representing an image band."""
672
702
 
673
703
  cmap: ClassVar[str | None] = None
674
- backend: str = "numpy"
675
704
 
676
705
  @classmethod
677
706
  def from_geopandas(
678
707
  cls,
679
708
  gdf: GeoDataFrame | GeoSeries,
680
- res: int,
681
709
  *,
710
+ res: int | None = None,
711
+ out_shape: tuple[int, int] | None = None,
712
+ bounds: Any | None = None,
682
713
  fill: int = 0,
683
714
  all_touched: bool = False,
684
715
  merge_alg: Callable = MergeAlg.replace,
@@ -687,17 +718,27 @@ class Band(_ImageBandBase):
687
718
  **kwargs,
688
719
  ) -> None:
689
720
  """Create Band from a GeoDataFrame."""
690
- arr: np.ndarray = _gdf_to_arr(
691
- gdf,
692
- res=res,
693
- fill=fill,
694
- all_touched=all_touched,
695
- merge_alg=merge_alg,
696
- default_value=default_value,
697
- dtype=dtype,
698
- )
721
+ if bounds is not None:
722
+ bounds = to_bbox(bounds)
699
723
 
700
- obj = cls(arr, res=res, crs=gdf.crs, bounds=gdf.total_bounds, **kwargs)
724
+ if out_shape == (0,):
725
+ arr = np.array([])
726
+ else:
727
+ arr = _gdf_to_arr(
728
+ gdf,
729
+ res=res,
730
+ bounds=bounds,
731
+ fill=fill,
732
+ all_touched=all_touched,
733
+ merge_alg=merge_alg,
734
+ default_value=default_value,
735
+ dtype=dtype,
736
+ out_shape=out_shape,
737
+ )
738
+ if bounds is None:
739
+ bounds = gdf.total_bounds
740
+
741
+ obj = cls(arr, crs=gdf.crs, bounds=bounds, **kwargs)
701
742
  obj._from_geopandas = True
702
743
  return obj
703
744
 
@@ -717,9 +758,6 @@ class Band(_ImageBandBase):
717
758
  **kwargs,
718
759
  ) -> None:
719
760
  """Band initialiser."""
720
- if callable(res) and isinstance(res(), None_):
721
- raise TypeError("Must specify 'res'")
722
-
723
761
  if data is None:
724
762
  # allowing 'path' to replace 'data' as argument
725
763
  # to make the print repr. valid as initialiser
@@ -745,11 +783,20 @@ class Band(_ImageBandBase):
745
783
  if isinstance(data, np.ndarray):
746
784
  if self._bounds is None:
747
785
  raise ValueError("Must specify bounds when data is an array.")
786
+ if not (res is None or (callable(res) and res() is None)):
787
+ # if not (res is None or (callable(res) and res() is None)) and _res_as_tuple(
788
+ # res
789
+ # ) != _get_res_from_bounds(self._bounds, data.shape):
790
+ raise ValueError(
791
+ f"Cannot specify 'res' when data is an array. {res} and {_get_res_from_bounds(self._bounds, data.shape)}"
792
+ )
748
793
  self._crs = crs
749
794
  self.transform = _get_transform_from_bounds(self._bounds, shape=data.shape)
750
795
  self._from_array = True
751
796
  self.values = data
752
797
 
798
+ self._res = _get_res_from_bounds(self._bounds, self.values.shape)
799
+
753
800
  elif not isinstance(data, (str | Path | os.PathLike)):
754
801
  raise TypeError(
755
802
  "'data' must be string, Path-like or numpy.ndarray. "
@@ -757,8 +804,10 @@ class Band(_ImageBandBase):
757
804
  )
758
805
  else:
759
806
  self._path = _fix_path(str(data))
807
+ if callable(res) and res() is None:
808
+ res = None
809
+ self._res = res
760
810
 
761
- self._res = res
762
811
  if cmap is not None:
763
812
  self.cmap = cmap
764
813
  self._name = name
@@ -786,7 +835,7 @@ class Band(_ImageBandBase):
786
835
  else:
787
836
  setattr(self, key, value)
788
837
 
789
- elif self.metadata_attributes and self.path is not None and not self.is_mask:
838
+ elif self.metadata_attributes and self.path is not None:
790
839
  if self._all_file_paths is None:
791
840
  self._all_file_paths = _get_all_file_paths(str(Path(self.path).parent))
792
841
  for key, value in self._get_metadata_attributes(
@@ -798,43 +847,28 @@ class Band(_ImageBandBase):
798
847
  """Makes Bands sortable by band_id."""
799
848
  return self.band_id < other.band_id
800
849
 
850
+ def value_counts(self) -> pd.Series:
851
+ """Value count of each value of the band's array."""
852
+ try:
853
+ values = self.values.data[self.values.mask == False]
854
+ except AttributeError:
855
+ values = self.values
856
+ unique_values, counts = np.unique(values, return_counts=True)
857
+ return pd.Series(counts, index=unique_values).sort_values(ascending=False)
858
+
801
859
  @property
802
860
  def values(self) -> np.ndarray:
803
861
  """The numpy array, if loaded."""
804
862
  if self._values is None:
805
- raise ArrayNotLoadedError("array is not loaded.")
863
+ raise _ArrayNotLoadedError("array is not loaded.")
806
864
  return self._values
807
865
 
808
866
  @values.setter
809
867
  def values(self, new_val):
810
- if self.backend == "numpy" and isinstance(new_val, np.ndarray):
811
- self._values = new_val
812
- return
813
- elif self.backend == "xarray" and isinstance(new_val, DataArray):
814
- # attrs can dissappear, so doing a union
815
- attrs = self._values.attrs | new_val.attrs
868
+ if isinstance(new_val, np.ndarray):
816
869
  self._values = new_val
817
- self._values.attrs = attrs
818
- return
819
-
820
- if self.backend == "numpy":
870
+ else:
821
871
  self._values = self._to_numpy(new_val)
822
- if self.backend == "xarray":
823
- if not isinstance(self._values, DataArray):
824
- self._values = self._to_xarray(
825
- new_val,
826
- transform=self.transform,
827
- )
828
-
829
- elif isinstance(new_val, np.ndarray):
830
- self._values.values = new_val
831
- else:
832
- self._values = new_val
833
-
834
- @property
835
- def mask(self) -> "Band":
836
- """Mask Band."""
837
- return self._mask
838
872
 
839
873
  @property
840
874
  def band_id(self) -> str:
@@ -921,28 +955,39 @@ class Band(_ImageBandBase):
921
955
  return df
922
956
 
923
957
  def clip(
924
- self, mask: GeoDataFrame | GeoSeries | Polygon | MultiPolygon, **kwargs
958
+ self,
959
+ mask: GeoDataFrame | GeoSeries | Polygon | MultiPolygon,
925
960
  ) -> "Band":
926
- """Clip band values to geometry mask."""
961
+ """Clip band values to geometry mask while preserving bounds."""
927
962
  if not self.height or not self.width:
928
963
  return self
929
964
 
930
- values = _clip_xarray(
931
- self.to_xarray(),
932
- mask,
933
- crs=self.crs,
934
- **kwargs,
935
- )
936
- self._bounds = to_bbox(mask)
937
- self.transform = _get_transform_from_bounds(self._bounds, values.shape)
938
- self.values = values
965
+ fill: int = self.nodata or 0
966
+
967
+ mask_array: np.ndarray = Band.from_geopandas(
968
+ gdf=to_gdf(mask)[["geometry"]],
969
+ default_value=1,
970
+ fill=fill,
971
+ out_shape=self.values.shape,
972
+ bounds=mask,
973
+ ).values
974
+
975
+ is_not_polygon = mask_array == fill
976
+
977
+ if isinstance(self.values, np.ma.core.MaskedArray):
978
+ self._values.mask |= is_not_polygon
979
+ else:
980
+ self._values = np.ma.array(
981
+ self.values, mask=is_not_polygon, fill_value=self.nodata
982
+ )
983
+
939
984
  return self
940
985
 
941
986
  def load(
942
987
  self,
943
988
  bounds: tuple | Geometry | GeoDataFrame | GeoSeries | None = None,
944
989
  indexes: int | tuple[int] | None = None,
945
- masked: bool | None = None,
990
+ masked: bool = True,
946
991
  file_system=None,
947
992
  **kwargs,
948
993
  ) -> "Band":
@@ -950,11 +995,10 @@ class Band(_ImageBandBase):
950
995
 
951
996
  The array is stored in the 'values' property.
952
997
  """
953
- global _load_counter
954
- _load_counter += 1
998
+ global _LOAD_COUNTER
999
+ _LOAD_COUNTER += 1
955
1000
 
956
- if masked is None:
957
- masked = True if self.mask is None else False
1001
+ _masking = kwargs.pop("_masking", self.masking)
958
1002
 
959
1003
  bounds_was_none = bounds is None
960
1004
 
@@ -963,12 +1007,9 @@ class Band(_ImageBandBase):
963
1007
  should_return_empty: bool = bounds is not None and bounds.area == 0
964
1008
  if should_return_empty:
965
1009
  self._values = np.array([])
966
- if self.mask is not None and not self.is_mask:
967
- self._mask = self._mask.load(
968
- bounds=bounds, indexes=indexes, file_system=file_system
969
- )
970
1010
  self._bounds = None
971
1011
  self.transform = None
1012
+ # activate setter
972
1013
  self.values = self._values
973
1014
 
974
1015
  return self
@@ -978,7 +1019,6 @@ class Band(_ImageBandBase):
978
1019
 
979
1020
  if bounds is not None:
980
1021
  minx, miny, maxx, maxy = to_bbox(bounds)
981
- ## round down/up to integer to avoid precision trouble
982
1022
  # bounds = (int(minx), int(miny), math.ceil(maxx), math.ceil(maxy))
983
1023
  bounds = minx, miny, maxx, maxy
984
1024
 
@@ -992,20 +1032,19 @@ class Band(_ImageBandBase):
992
1032
  out_shape = kwargs.pop("out_shape", None)
993
1033
 
994
1034
  if self.has_array and [int(x) for x in bounds] != [int(x) for x in self.bounds]:
995
- print(self)
996
- print(self.mask)
997
- print(self.values.shape)
998
- print([int(x) for x in bounds], [int(x) for x in self.bounds])
999
1035
  raise ValueError(
1000
1036
  "Cannot re-load array with different bounds. "
1001
1037
  "Use .copy() to read with different bounds. "
1002
- "Or .clip(mask) to clip."
1038
+ "Or .clip(mask) to clip.",
1039
+ self,
1040
+ self.values.shape,
1041
+ [int(x) for x in bounds],
1042
+ [int(x) for x in self.bounds],
1003
1043
  )
1004
- # with opener(self.path, file_system=self.file_system) as f:
1044
+
1005
1045
  with opener(self.path, file_system=file_system) as f:
1006
1046
  with rasterio.open(f, nodata=self.nodata) as src:
1007
- self._res = int(src.res[0]) if not self.res else self.res
1008
-
1047
+ self._res = src.res if not self.res else self.res
1009
1048
  if self.nodata is None or np.isnan(self.nodata):
1010
1049
  self.nodata = src.nodata
1011
1050
  else:
@@ -1018,7 +1057,7 @@ class Band(_ImageBandBase):
1018
1057
  )
1019
1058
 
1020
1059
  if bounds is None:
1021
- if self._res != int(src.res[0]):
1060
+ if self._res != src.res:
1022
1061
  if out_shape is None:
1023
1062
  out_shape = _get_shape_from_bounds(
1024
1063
  to_bbox(src.bounds), self.res, indexes
@@ -1070,18 +1109,12 @@ class Band(_ImageBandBase):
1070
1109
  else:
1071
1110
  values[values == src.nodata] = self.nodata
1072
1111
 
1073
- if self.masking and self.is_mask:
1074
- values = np.isin(values, list(self.masking["values"]))
1075
-
1076
- elif self.mask is not None and not isinstance(values, np.ma.core.MaskedArray):
1077
-
1078
- if not self.mask.has_array:
1079
- self._mask = self.mask.load(
1080
- bounds=bounds, indexes=indexes, out_shape=out_shape, **kwargs
1081
- )
1082
- mask_arr = self.mask.values
1083
-
1112
+ if _masking and not isinstance(values, np.ma.core.MaskedArray):
1113
+ mask_arr = _read_mask_array(self, bounds=bounds)
1084
1114
  values = np.ma.array(values, mask=mask_arr, fill_value=self.nodata)
1115
+ elif _masking:
1116
+ mask_arr = _read_mask_array(self, bounds=bounds)
1117
+ values.mask |= mask_arr
1085
1118
 
1086
1119
  if bounds is not None:
1087
1120
  self._bounds = to_bbox(bounds)
@@ -1092,13 +1125,6 @@ class Band(_ImageBandBase):
1092
1125
 
1093
1126
  return self
1094
1127
 
1095
- @property
1096
- def is_mask(self) -> bool:
1097
- """True if the band_id is equal to the masking band_id."""
1098
- if self.masking is None:
1099
- return False
1100
- return self.band_id == self.masking["band_id"]
1101
-
1102
1128
  @property
1103
1129
  def has_array(self) -> bool:
1104
1130
  """Whether the array is loaded."""
@@ -1106,7 +1132,7 @@ class Band(_ImageBandBase):
1106
1132
  if not isinstance(self.values, (np.ndarray | DataArray)):
1107
1133
  raise ValueError()
1108
1134
  return True
1109
- except ValueError: # also catches ArrayNotLoadedError
1135
+ except ValueError: # also catches _ArrayNotLoadedError
1110
1136
  return False
1111
1137
 
1112
1138
  def write(
@@ -1126,10 +1152,17 @@ class Band(_ImageBandBase):
1126
1152
  if self.crs is None:
1127
1153
  raise ValueError("Cannot write None crs to image.")
1128
1154
 
1155
+ if self.nodata:
1156
+ # TODO take out .data if masked?
1157
+ values_with_nodata = np.concatenate(
1158
+ [self.values.flatten(), np.array([self.nodata])]
1159
+ )
1160
+ else:
1161
+ values_with_nodata = self.values
1129
1162
  profile = {
1130
1163
  "driver": driver,
1131
1164
  "compress": compress,
1132
- "dtype": rasterio.dtypes.get_minimum_dtype(self.values),
1165
+ "dtype": rasterio.dtypes.get_minimum_dtype(values_with_nodata),
1133
1166
  "crs": self.crs,
1134
1167
  "transform": self.transform,
1135
1168
  "nodata": self.nodata,
@@ -1138,19 +1171,18 @@ class Band(_ImageBandBase):
1138
1171
  "width": self.width,
1139
1172
  } | kwargs
1140
1173
 
1141
- # with opener(path, "wb", file_system=self.file_system) as f:
1142
1174
  with opener(path, "wb", file_system=file_system) as f:
1143
1175
  with rasterio.open(f, "w", **profile) as dst:
1144
1176
 
1145
1177
  if dst.nodata is None:
1146
1178
  dst.nodata = _get_dtype_min(dst.dtypes[0])
1147
1179
 
1148
- # if (
1149
- # isinstance(self.values, np.ma.core.MaskedArray)
1150
- # # and dst.nodata is not None
1151
- # ):
1152
- # self.values.data[np.isnan(self.values.data)] = dst.nodata
1153
- # self.values.data[self.values.mask] = dst.nodata
1180
+ if (
1181
+ isinstance(self.values, np.ma.core.MaskedArray)
1182
+ and dst.nodata is not None
1183
+ ):
1184
+ self.values.data[np.isnan(self.values.data)] = dst.nodata
1185
+ self.values.data[self.values.mask] = dst.nodata
1154
1186
 
1155
1187
  if len(self.values.shape) == 2:
1156
1188
  dst.write(self.values, indexes=1)
@@ -1238,7 +1270,7 @@ class Band(_ImageBandBase):
1238
1270
  The gradient will be 1 (1 meter up for every meter forward).
1239
1271
  The calculation is by default done in place to save memory.
1240
1272
 
1241
- >>> band.gradient()
1273
+ >>> band.gradient(copy=False)
1242
1274
  >>> band.values
1243
1275
  array([[0., 1., 1., 1., 0.],
1244
1276
  [1., 1., 1., 1., 1.],
@@ -1299,11 +1331,13 @@ class Band(_ImageBandBase):
1299
1331
  dropna=dropna,
1300
1332
  )
1301
1333
 
1302
- def to_geopandas(self, column: str = "value") -> GeoDataFrame:
1334
+ def to_geopandas(self, column: str = "value", dropna: bool = True) -> GeoDataFrame:
1303
1335
  """Create a GeoDataFrame from the image Band.
1304
1336
 
1305
1337
  Args:
1306
1338
  column: Name of resulting column that holds the raster values.
1339
+ dropna: Whether to remove values that are NA or equal to the nodata
1340
+ value.
1307
1341
 
1308
1342
  Returns:
1309
1343
  A GeoDataFrame with a geometry column and array values.
@@ -1311,24 +1345,28 @@ class Band(_ImageBandBase):
1311
1345
  if not hasattr(self, "_values"):
1312
1346
  raise ValueError("Array is not loaded.")
1313
1347
 
1348
+ if isinstance(self.values, np.ma.core.MaskedArray):
1349
+ self.values.data[self.values.mask] = self.nodata or 0
1314
1350
  if self.values.shape[0] == 0:
1315
- return GeoDataFrame({"geometry": []}, crs=self.crs)
1316
-
1317
- return GeoDataFrame(
1318
- pd.DataFrame(
1319
- _array_to_geojson(
1320
- self.values, self.transform, processes=self.processes
1351
+ df = GeoDataFrame({"geometry": []}, crs=self.crs)
1352
+ else:
1353
+ df = GeoDataFrame(
1354
+ pd.DataFrame(
1355
+ _array_to_geojson(
1356
+ self.values, self.transform, processes=self.processes
1357
+ ),
1358
+ columns=[column, "geometry"],
1321
1359
  ),
1322
- columns=[column, "geometry"],
1323
- ),
1324
- geometry="geometry",
1325
- crs=self.crs,
1326
- )
1360
+ geometry="geometry",
1361
+ crs=self.crs,
1362
+ )
1363
+
1364
+ if dropna:
1365
+ return df[(df[column] != self.nodata) & (df[column].notna())]
1366
+ return df
1327
1367
 
1328
1368
  def to_xarray(self) -> DataArray:
1329
1369
  """Convert the raster to an xarray.DataArray."""
1330
- if self.backend == "xarray":
1331
- return self.values
1332
1370
  return self._to_xarray(
1333
1371
  self.values,
1334
1372
  transform=self.transform,
@@ -1345,19 +1383,6 @@ class Band(_ImageBandBase):
1345
1383
  if not isinstance(arr, np.ndarray):
1346
1384
  mask_arr = None
1347
1385
  if masked:
1348
- # if self.mask is not None:
1349
- # print(self.mask.values.shape, arr.shape)
1350
- # if self.mask is not None and self.mask.values.shape == arr.shape:
1351
- # print("hei", self.mask.values.sum())
1352
- # mask_arr = self.mask.values
1353
- # else:
1354
- # mask_arr = np.full(arr.shape, False)
1355
- # try:
1356
- # print("hei222", arr.isnull().values.sum())
1357
- # mask_arr |= arr.isnull().values
1358
- # except AttributeError:
1359
- # pass
1360
- # mask_arr = np.full(arr.shape, False)
1361
1386
  try:
1362
1387
  mask_arr = arr.isnull().values
1363
1388
  except AttributeError:
@@ -1374,11 +1399,11 @@ class Band(_ImageBandBase):
1374
1399
 
1375
1400
  if (
1376
1401
  masked
1377
- and self.mask is not None
1378
- and not self.is_mask
1379
1402
  and not isinstance(arr, np.ma.core.MaskedArray)
1403
+ and mask_arr is not None
1380
1404
  ):
1381
1405
  arr = np.ma.array(arr, mask=mask_arr, fill_value=self.nodata)
1406
+
1382
1407
  return arr
1383
1408
 
1384
1409
  def __repr__(self) -> str:
@@ -1401,10 +1426,6 @@ class NDVIBand(Band):
1401
1426
 
1402
1427
  cmap: str = "Greens"
1403
1428
 
1404
- # @staticmethod
1405
- # def get_cmap(arr: np.ndarray):
1406
- # return get_cmap(arr)
1407
-
1408
1429
 
1409
1430
  def median_as_int_and_minimum_dtype(arr: np.ndarray) -> np.ndarray:
1410
1431
  arr = np.median(arr, axis=0).astype(int)
@@ -1416,12 +1437,12 @@ class Image(_ImageBandBase):
1416
1437
  """Image consisting of one or more Bands."""
1417
1438
 
1418
1439
  band_class: ClassVar[Band] = Band
1419
- backend: str = "numpy"
1420
1440
 
1421
1441
  def __init__(
1422
1442
  self,
1423
1443
  data: str | Path | Sequence[Band] | None = None,
1424
- res: int | None = None,
1444
+ res: int | None_ = None_,
1445
+ mask: "Band | None" = None,
1425
1446
  processes: int = 1,
1426
1447
  df: pd.DataFrame | None = None,
1427
1448
  nodata: int | None = None,
@@ -1442,12 +1463,18 @@ class Image(_ImageBandBase):
1442
1463
  self.processes = processes
1443
1464
  self._crs = None
1444
1465
  self._bands = None
1466
+ self._mask = mask
1467
+
1468
+ if isinstance(data, Band):
1469
+ data = [data]
1445
1470
 
1446
1471
  if hasattr(data, "__iter__") and all(isinstance(x, Band) for x in data):
1447
1472
  self._construct_image_from_bands(data, res)
1448
1473
  return
1449
1474
  elif not isinstance(data, (str | Path | os.PathLike)):
1450
- raise TypeError("'data' must be string, Path-like or a sequence of Band.")
1475
+ raise TypeError(
1476
+ f"'data' must be string, Path-like or a sequence of Band. Got {data}"
1477
+ )
1451
1478
 
1452
1479
  self._res = res
1453
1480
  self._path = _fix_path(data)
@@ -1455,7 +1482,8 @@ class Image(_ImageBandBase):
1455
1482
  if all_file_paths is None and self.path:
1456
1483
  self._all_file_paths = _get_all_file_paths(self.path)
1457
1484
  elif self.path:
1458
- all_file_paths = {_fix_path(x) for x in all_file_paths}
1485
+ name = Path(self.path).name
1486
+ all_file_paths = {_fix_path(x) for x in all_file_paths if name in x}
1459
1487
  self._all_file_paths = {x for x in all_file_paths if self.path in x}
1460
1488
  else:
1461
1489
  self._all_file_paths = None
@@ -1467,11 +1495,7 @@ class Image(_ImageBandBase):
1467
1495
 
1468
1496
  df["image_path"] = df["image_path"].astype(str)
1469
1497
 
1470
- cols_to_explode = [
1471
- "file_path",
1472
- "file_name",
1473
- *[x for x in df if FILENAME_COL_SUFFIX in x],
1474
- ]
1498
+ cols_to_explode = ["file_path", "file_name"]
1475
1499
  try:
1476
1500
  df = df.explode(cols_to_explode, ignore_index=True)
1477
1501
  except ValueError:
@@ -1499,20 +1523,92 @@ class Image(_ImageBandBase):
1499
1523
  else:
1500
1524
  setattr(self, key, value)
1501
1525
 
1502
- else:
1526
+ elif self.metadata_attributes and self.path is not None:
1503
1527
  for key, value in self._get_metadata_attributes(
1504
1528
  self.metadata_attributes
1505
1529
  ).items():
1506
1530
  setattr(self, key, value)
1507
1531
 
1532
+ def clip(
1533
+ self, mask: GeoDataFrame | GeoSeries | Polygon | MultiPolygon, copy: bool = True
1534
+ ) -> "Image":
1535
+ """Clip band values to geometry mask while preserving bounds."""
1536
+ copied = self.copy() if copy else self
1537
+
1538
+ fill: int = self.nodata or 0
1539
+
1540
+ mask_array: np.ndarray = Band.from_geopandas(
1541
+ gdf=to_gdf(mask)[["geometry"]],
1542
+ default_value=1,
1543
+ fill=fill,
1544
+ out_shape=next(iter(self)).values.shape,
1545
+ bounds=self.bounds,
1546
+ ).values
1547
+
1548
+ is_not_polygon = mask_array == fill
1549
+
1550
+ for band in copied:
1551
+ if isinstance(band.values, np.ma.core.MaskedArray):
1552
+ band._values.mask |= is_not_polygon
1553
+ else:
1554
+ band._values = np.ma.array(
1555
+ band.values, mask=is_not_polygon, fill_value=band.nodata
1556
+ )
1557
+
1558
+ return copied
1559
+
1560
+ def load(
1561
+ self,
1562
+ bounds: tuple | Geometry | GeoDataFrame | GeoSeries | None = None,
1563
+ indexes: int | tuple[int] | None = None,
1564
+ file_system=None,
1565
+ **kwargs,
1566
+ ) -> "ImageCollection":
1567
+ """Load all image Bands with threading."""
1568
+ if bounds is None and indexes is None and all(band.has_array for band in self):
1569
+ return self
1570
+
1571
+ if self.masking:
1572
+ mask_array: np.ndarray = _read_mask_array(
1573
+ self,
1574
+ bounds=bounds,
1575
+ indexes=indexes,
1576
+ file_system=file_system,
1577
+ **kwargs,
1578
+ )
1579
+
1580
+ with joblib.Parallel(n_jobs=self.processes, backend="threading") as parallel:
1581
+ parallel(
1582
+ joblib.delayed(_load_band)(
1583
+ band,
1584
+ bounds=bounds,
1585
+ indexes=indexes,
1586
+ file_system=file_system,
1587
+ _masking=None,
1588
+ **kwargs,
1589
+ )
1590
+ for band in self
1591
+ )
1592
+
1593
+ if self.masking:
1594
+ for band in self:
1595
+ if isinstance(band.values, np.ma.core.MaskedArray):
1596
+ band.values.mask |= mask_array
1597
+ else:
1598
+ band.values = np.ma.array(
1599
+ band.values, mask=mask_array, fill_value=self.nodata
1600
+ )
1601
+
1602
+ return self
1603
+
1508
1604
  def _construct_image_from_bands(
1509
1605
  self, data: Sequence[Band], res: int | None
1510
1606
  ) -> None:
1511
1607
  self._bands = list(data)
1512
1608
  if res is None:
1513
- res = list({band.res for band in self.bands})
1609
+ res = {band.res for band in self.bands}
1514
1610
  if len(res) == 1:
1515
- self._res = res[0]
1611
+ self._res = next(iter(res))
1516
1612
  else:
1517
1613
  raise ValueError(f"Different resolutions for the bands: {res}")
1518
1614
  else:
@@ -1558,8 +1654,7 @@ class Image(_ImageBandBase):
1558
1654
  arr,
1559
1655
  bounds=red.bounds,
1560
1656
  crs=red.crs,
1561
- mask=red.mask,
1562
- **red._common_init_kwargs,
1657
+ **{k: v for k, v in red._common_init_kwargs.items() if k != "res"},
1563
1658
  )
1564
1659
 
1565
1660
  def get_brightness(
@@ -1590,81 +1685,16 @@ class Image(_ImageBandBase):
1590
1685
  brightness,
1591
1686
  bounds=red.bounds,
1592
1687
  crs=self.crs,
1593
- mask=self.mask,
1594
- **self._common_init_kwargs,
1688
+ **{k: v for k, v in self._common_init_kwargs.items() if k != "res"},
1595
1689
  )
1596
1690
 
1597
1691
  def to_xarray(self) -> DataArray:
1598
1692
  """Convert the raster to an xarray.DataArray."""
1599
- if self.backend == "xarray":
1600
- return self.values
1601
-
1602
1693
  return self._to_xarray(
1603
1694
  np.array([band.values for band in self]),
1604
1695
  transform=self[0].transform,
1605
1696
  )
1606
1697
 
1607
- @property
1608
- def mask(self) -> Band | None:
1609
- """Mask Band."""
1610
- if self.masking is None:
1611
- return None
1612
-
1613
- elif self._mask is not None:
1614
- return self._mask
1615
-
1616
- elif self._bands is not None and all(band.mask is not None for band in self):
1617
- if len({id(band.mask) for band in self}) > 1:
1618
- raise ValueError(
1619
- "Image bands must have same mask.",
1620
- {id(band.mask) for band in self},
1621
- ) # TODO
1622
- self._mask = next(
1623
- iter([band.mask for band in self if band.mask is not None])
1624
- )
1625
- return self._mask
1626
-
1627
- mask_band_id = self.masking["band_id"]
1628
- mask_paths = [path for path in self._all_file_paths if mask_band_id in path]
1629
- if len(mask_paths) > 1:
1630
- raise ValueError(
1631
- f"Multiple file_paths match mask band_id {mask_band_id} for {self.path}"
1632
- )
1633
- elif not mask_paths:
1634
- raise ValueError(
1635
- f"No file_paths match mask band_id {mask_band_id} for {self.path} among "
1636
- + str([Path(x).name for x in _ls_func(self.path)])
1637
- )
1638
-
1639
- self._mask = self.band_class(
1640
- mask_paths[0],
1641
- **self._common_init_kwargs,
1642
- )
1643
- if self._bands is not None:
1644
- for band in self:
1645
- band._mask = self._mask
1646
- return self._mask
1647
-
1648
- @mask.setter
1649
- def mask(self, values: Band | None) -> None:
1650
- if values is None:
1651
- self._mask = None
1652
- for band in self:
1653
- band._mask = None
1654
- return
1655
- if not isinstance(values, Band):
1656
- raise TypeError(f"mask must be Band. Got {type(values)}")
1657
- self._mask = values
1658
- mask_arr = self._mask.values
1659
- for band in self:
1660
- band._mask = self._mask
1661
- try:
1662
- band.values = np.ma.array(
1663
- band.values.data, mask=mask_arr, fill_value=band.nodata
1664
- )
1665
- except ArrayNotLoadedError:
1666
- pass
1667
-
1668
1698
  @property
1669
1699
  def band_ids(self) -> list[str]:
1670
1700
  """The Band ids."""
@@ -1687,12 +1717,9 @@ class Image(_ImageBandBase):
1687
1717
  else:
1688
1718
  paths = self._df["file_path"]
1689
1719
 
1690
- mask = self.mask
1691
-
1692
1720
  self._bands = [
1693
1721
  self.band_class(
1694
1722
  path,
1695
- mask=mask,
1696
1723
  all_file_paths=self._all_file_paths,
1697
1724
  **self._common_init_kwargs,
1698
1725
  )
@@ -1901,13 +1928,12 @@ class ImageCollection(_ImageBase):
1901
1928
  image_class: ClassVar[Image] = Image
1902
1929
  band_class: ClassVar[Band] = Band
1903
1930
  _metadata_attribute_collection_type: ClassVar[type] = pd.Series
1904
- backend: str = "numpy"
1905
1931
 
1906
1932
  def __init__(
1907
1933
  self,
1908
1934
  data: str | Path | Sequence[Image] | Sequence[str | Path],
1909
- res: int,
1910
- level: str | None = None_,
1935
+ res: int | None_ = None_,
1936
+ level: str | None_ | None = None_,
1911
1937
  processes: int = 1,
1912
1938
  metadata: str | dict | pd.DataFrame | None = None,
1913
1939
  nodata: int | None = None,
@@ -1923,7 +1949,7 @@ class ImageCollection(_ImageBase):
1923
1949
 
1924
1950
  super().__init__(metadata=metadata, **kwargs)
1925
1951
 
1926
- if callable(level) and isinstance(level(), None_):
1952
+ if callable(level) and level() is None:
1927
1953
  level = None
1928
1954
 
1929
1955
  self.nodata = nodata
@@ -1944,13 +1970,19 @@ class ImageCollection(_ImageBase):
1944
1970
  elif all(isinstance(x, (str | Path | os.PathLike)) for x in data):
1945
1971
  # adding band paths (asuming 'data' is a sequence of image paths)
1946
1972
  try:
1947
- self._all_file_paths = _get_child_paths_threaded(data) | set(data)
1973
+ self._all_file_paths = _get_child_paths_threaded(data) | {
1974
+ _fix_path(x) for x in data
1975
+ }
1948
1976
  except FileNotFoundError as e:
1949
1977
  if _from_root:
1950
1978
  raise TypeError(
1951
- "When passing 'root', 'data' must be a sequence of image names that have 'root' as parent path."
1979
+ "When passing 'root', 'data' must be a sequence of image file names that have 'root' as parent path."
1952
1980
  ) from e
1953
1981
  raise e
1982
+ if self.level:
1983
+ self._all_file_paths = [
1984
+ path for path in self._all_file_paths if self.level in path
1985
+ ]
1954
1986
  self._df = self._create_metadata_df(self._all_file_paths)
1955
1987
  return
1956
1988
 
@@ -1968,7 +2000,9 @@ class ImageCollection(_ImageBase):
1968
2000
 
1969
2001
  self._df = self._create_metadata_df(self._all_file_paths)
1970
2002
 
1971
- def groupby(self, by: str | list[str], **kwargs) -> ImageCollectionGroupBy:
2003
+ def groupby(
2004
+ self, by: str | list[str], copy: bool = True, **kwargs
2005
+ ) -> ImageCollectionGroupBy:
1972
2006
  """Group the Collection by Image or Band attribute(s)."""
1973
2007
  df = pd.DataFrame(
1974
2008
  [(i, img) for i, img in enumerate(self) for _ in img],
@@ -1995,8 +2029,10 @@ class ImageCollection(_ImageBase):
1995
2029
  return ImageCollectionGroupBy(
1996
2030
  sorted(
1997
2031
  parallel(
1998
- joblib.delayed(_copy_and_add_df_parallel)(i, group, self)
1999
- for i, group in df.groupby(by, **kwargs)
2032
+ joblib.delayed(_copy_and_add_df_parallel)(
2033
+ group_values, group_df, self, copy
2034
+ )
2035
+ for group_values, group_df in df.groupby(by, **kwargs)
2000
2036
  )
2001
2037
  ),
2002
2038
  by=by,
@@ -2037,6 +2073,51 @@ class ImageCollection(_ImageBase):
2037
2073
 
2038
2074
  return self
2039
2075
 
2076
+ def pixelwise(
2077
+ self,
2078
+ func: Callable,
2079
+ kwargs: dict | None = None,
2080
+ index_aligned_kwargs: dict | None = None,
2081
+ masked: bool = True,
2082
+ ) -> np.ndarray | tuple[np.ndarray] | None:
2083
+ """Run a function for each pixel.
2084
+
2085
+ The function should take a 1d array as first argument. This will be
2086
+ the pixel values for all bands in all images in the collection.
2087
+ """
2088
+ values = np.array([band.values for img in self for band in img])
2089
+
2090
+ if (
2091
+ masked
2092
+ and self.nodata is not None
2093
+ and hasattr(next(iter(next(iter(self)))).values, "mask")
2094
+ ):
2095
+ mask_array = np.array(
2096
+ [
2097
+ (band.values.mask) | (band.values.data == self.nodata)
2098
+ for img in self
2099
+ for band in img
2100
+ ]
2101
+ )
2102
+ elif masked and self.nodata is not None:
2103
+ mask_array = np.array(
2104
+ [band.values == self.nodata for img in self for band in img]
2105
+ )
2106
+ elif masked:
2107
+ mask_array = np.array([band.values.mask for img in self for band in img])
2108
+ else:
2109
+ mask_array = None
2110
+
2111
+ return pixelwise(
2112
+ func=func,
2113
+ values=values,
2114
+ mask_array=mask_array,
2115
+ index_aligned_kwargs=index_aligned_kwargs,
2116
+ kwargs=kwargs,
2117
+ processes=self.processes,
2118
+ nodata=self.nodata or np.nan,
2119
+ )
2120
+
2040
2121
  def get_unique_band_ids(self) -> list[str]:
2041
2122
  """Get a list of unique band_ids across all images."""
2042
2123
  return list({band.band_id for img in self for band in img})
@@ -2142,8 +2223,7 @@ class ImageCollection(_ImageBase):
2142
2223
  arr,
2143
2224
  bounds=bounds,
2144
2225
  crs=crs,
2145
- mask=self.mask,
2146
- **self._common_init_kwargs,
2226
+ **{k: v for k, v in self._common_init_kwargs.items() if k != "res"},
2147
2227
  )
2148
2228
 
2149
2229
  band._merged = True
@@ -2216,7 +2296,7 @@ class ImageCollection(_ImageBase):
2216
2296
  bounds=out_bounds,
2217
2297
  crs=crs,
2218
2298
  band_id=band_id,
2219
- **self._common_init_kwargs,
2299
+ **{k: v for k, v in self._common_init_kwargs.items() if k != "res"},
2220
2300
  )
2221
2301
  )
2222
2302
 
@@ -2329,22 +2409,11 @@ class ImageCollection(_ImageBase):
2329
2409
  ):
2330
2410
  return self
2331
2411
 
2332
- # if self.processes == 1:
2333
- # for img in self:
2334
- # for band in img:
2335
- # band.load(
2336
- # bounds=bounds,
2337
- # indexes=indexes,
2338
- # file_system=file_system,
2339
- # **kwargs,
2340
- # )
2341
- # return self
2342
-
2343
2412
  with joblib.Parallel(n_jobs=self.processes, backend="threading") as parallel:
2344
2413
  if self.masking:
2345
- parallel(
2346
- joblib.delayed(_load_band)(
2347
- img.mask,
2414
+ masks: list[np.ndarray] = parallel(
2415
+ joblib.delayed(_read_mask_array)(
2416
+ img,
2348
2417
  bounds=bounds,
2349
2418
  indexes=indexes,
2350
2419
  file_system=file_system,
@@ -2352,14 +2421,6 @@ class ImageCollection(_ImageBase):
2352
2421
  )
2353
2422
  for img in self
2354
2423
  )
2355
- for img in self:
2356
- for band in img:
2357
- band._mask = img.mask
2358
-
2359
- # print({img.mask.has_array for img in self })
2360
- # print({band.mask.has_array for img in self for band in img})
2361
-
2362
- # with joblib.Parallel(n_jobs=self.processes, backend="threading") as parallel:
2363
2424
 
2364
2425
  parallel(
2365
2426
  joblib.delayed(_load_band)(
@@ -2367,34 +2428,86 @@ class ImageCollection(_ImageBase):
2367
2428
  bounds=bounds,
2368
2429
  indexes=indexes,
2369
2430
  file_system=file_system,
2431
+ _masking=None,
2370
2432
  **kwargs,
2371
2433
  )
2372
2434
  for img in self
2373
2435
  for band in img
2374
2436
  )
2375
2437
 
2438
+ if self.masking:
2439
+ for img, mask_array in zip(self, masks, strict=True):
2440
+ for band in img:
2441
+ if isinstance(band.values, np.ma.core.MaskedArray):
2442
+ band.values.mask |= mask_array
2443
+ else:
2444
+ band.values = np.ma.array(
2445
+ band.values, mask=mask_array, fill_value=self.nodata
2446
+ )
2447
+
2376
2448
  return self
2377
2449
 
2378
2450
  def clip(
2379
2451
  self,
2380
2452
  mask: Geometry | GeoDataFrame | GeoSeries,
2381
- **kwargs,
2453
+ dropna: bool = True,
2454
+ copy: bool = True,
2382
2455
  ) -> "ImageCollection":
2383
- """Clip all image Bands with 'loky'."""
2384
- if self.processes == 1:
2385
- for img in self:
2386
- for band in img:
2387
- band.clip(mask, **kwargs)
2388
- return self
2456
+ """Clip all image Bands while preserving bounds."""
2457
+ copied = self.copy() if copy else self
2389
2458
 
2390
- with joblib.Parallel(n_jobs=self.processes, backend="loky") as parallel:
2391
- parallel(
2392
- joblib.delayed(_clip_band)(band, mask, **kwargs)
2393
- for img in self
2459
+ copied._images = [img for img in copied if img.union_all()]
2460
+
2461
+ fill: int = self.nodata or 0
2462
+
2463
+ common_band_from_geopandas_kwargs = dict(
2464
+ gdf=to_gdf(mask)[["geometry"]],
2465
+ default_value=1,
2466
+ fill=fill,
2467
+ )
2468
+
2469
+ for img in copied:
2470
+ img._rounded_bounds = tuple(int(x) for x in img.bounds)
2471
+
2472
+ for bounds in {img._rounded_bounds for img in copied}:
2473
+ shapes = {
2474
+ band.values.shape
2475
+ for img in copied
2394
2476
  for band in img
2395
- )
2477
+ if img._rounded_bounds == bounds
2478
+ }
2479
+ if len(shapes) != 1:
2480
+ raise ValueError(f"Different shapes: {shapes}. For bounds {bounds}")
2396
2481
 
2397
- return self
2482
+ mask_array: np.ndarray = Band.from_geopandas(
2483
+ **common_band_from_geopandas_kwargs,
2484
+ out_shape=next(iter(shapes)),
2485
+ bounds=bounds,
2486
+ ).values
2487
+
2488
+ is_not_polygon = mask_array == fill
2489
+
2490
+ for img in copied:
2491
+ if img._rounded_bounds != bounds:
2492
+ continue
2493
+
2494
+ for band in img:
2495
+ if isinstance(band.values, np.ma.core.MaskedArray):
2496
+ band._values.mask |= is_not_polygon
2497
+ else:
2498
+ band._values = np.ma.array(
2499
+ band.values, mask=is_not_polygon, fill_value=band.nodata
2500
+ )
2501
+
2502
+ for img in copied:
2503
+ del img._rounded_bounds
2504
+
2505
+ if dropna:
2506
+ copied.images = [
2507
+ img for img in copied if any(np.sum(band.values) for band in img)
2508
+ ]
2509
+
2510
+ return copied
2398
2511
 
2399
2512
  def _set_bbox(
2400
2513
  self, bbox: GeoDataFrame | GeoSeries | Geometry | tuple[float]
@@ -2405,17 +2518,12 @@ class ImageCollection(_ImageBase):
2405
2518
  if self._images is not None:
2406
2519
  for img in self._images:
2407
2520
  img._bbox = self._bbox
2408
- if img.mask is not None:
2409
- img.mask._bbox = self._bbox
2410
2521
  if img.bands is None:
2411
2522
  continue
2412
2523
  for band in img:
2413
2524
  band._bbox = self._bbox
2414
2525
  bounds = box(*band._bbox).intersection(box(*band.bounds))
2415
2526
  band._bounds = to_bbox(bounds) if not bounds.is_empty else None
2416
- if band.mask is not None:
2417
- band.mask._bbox = self._bbox
2418
- band.mask._bounds = band._bounds
2419
2527
 
2420
2528
  return self
2421
2529
 
@@ -2521,7 +2629,7 @@ class ImageCollection(_ImageBase):
2521
2629
  **kwargs,
2522
2630
  )
2523
2631
 
2524
- return xr.combine_by_coords(list(xarrs.values()))
2632
+ return combine_by_coords(list(xarrs.values()))
2525
2633
  # return Dataset(xarrs)
2526
2634
 
2527
2635
  def to_geopandas(self, column: str = "value") -> dict[str, GeoDataFrame]:
@@ -2534,6 +2642,9 @@ class ImageCollection(_ImageBase):
2534
2642
  try:
2535
2643
  name = band.name
2536
2644
  except AttributeError:
2645
+ name = None
2646
+
2647
+ if name is None:
2537
2648
  name = f"{self.__class__.__name__}({i})"
2538
2649
 
2539
2650
  if name not in out:
@@ -2594,10 +2705,6 @@ class ImageCollection(_ImageBase):
2594
2705
 
2595
2706
  return copied
2596
2707
 
2597
- def __or__(self, collection: "ImageCollection") -> "ImageCollection":
2598
- """Concatenate the collection with another collection."""
2599
- return concat_image_collections([self, collection])
2600
-
2601
2708
  def __iter__(self) -> Iterator[Image]:
2602
2709
  """Iterate over the images."""
2603
2710
  return iter(self.images)
@@ -2607,14 +2714,16 @@ class ImageCollection(_ImageBase):
2607
2714
  return len(self.images)
2608
2715
 
2609
2716
  def __getattr__(self, attr: str) -> Any:
2610
- """Make iterable of metadata_attribute."""
2717
+ """Make iterable of metadata attribute."""
2611
2718
  if attr in (self.metadata_attributes or {}):
2612
2719
  return self._metadata_attribute_collection_type(
2613
2720
  [getattr(img, attr) for img in self]
2614
2721
  )
2615
2722
  return super().__getattribute__(attr)
2616
2723
 
2617
- def __getitem__(self, item: int | slice | Sequence[int | bool]) -> Image:
2724
+ def __getitem__(
2725
+ self, item: int | slice | Sequence[int | bool]
2726
+ ) -> "Image | ImageCollection":
2618
2727
  """Select one Image by integer index, or multiple Images by slice, list of int."""
2619
2728
  if isinstance(item, int):
2620
2729
  return self.images[item]
@@ -2653,14 +2762,14 @@ class ImageCollection(_ImageBase):
2653
2762
  return copied
2654
2763
 
2655
2764
  @property
2656
- def dates(self) -> list[str]:
2765
+ def date(self) -> Any:
2657
2766
  """List of image dates."""
2658
- return [img.date for img in self]
2767
+ return self._metadata_attribute_collection_type([img.date for img in self])
2659
2768
 
2660
2769
  @property
2661
- def image_paths(self) -> list[str]:
2770
+ def image_paths(self) -> Any:
2662
2771
  """List of image paths."""
2663
- return [img.path for img in self]
2772
+ return self._metadata_attribute_collection_type([img.path for img in self])
2664
2773
 
2665
2774
  @property
2666
2775
  def images(self) -> list["Image"]:
@@ -2678,21 +2787,6 @@ class ImageCollection(_ImageBase):
2678
2787
  **self._common_init_kwargs,
2679
2788
  )
2680
2789
 
2681
- if self.masking is not None:
2682
- images = []
2683
- for image in self._images:
2684
- # TODO why this loop?
2685
- try:
2686
- if not isinstance(image.mask, Band):
2687
- raise ValueError()
2688
- images.append(image)
2689
- except ValueError as e:
2690
- raise e
2691
- continue
2692
- self._images = images
2693
- for image in self._images:
2694
- image._bands = [band for band in image if band.band_id is not None]
2695
-
2696
2790
  self._images = [img for img in self if len(img)]
2697
2791
 
2698
2792
  if self._should_be_sorted:
@@ -2722,24 +2816,22 @@ class ImageCollection(_ImageBase):
2722
2816
 
2723
2817
  @images.setter
2724
2818
  def images(self, new_value: list["Image"]) -> list["Image"]:
2725
- self._images = list(new_value)
2726
- if not all(isinstance(x, Image) for x in self._images):
2819
+ new_value = list(new_value)
2820
+ if not new_value:
2821
+ self._images = new_value
2822
+ return
2823
+ if all(isinstance(x, Band) for x in new_value):
2824
+ if len(new_value) != len(self):
2825
+ raise ValueError("'images' must have same length as number of images.")
2826
+ new_images = []
2827
+ for i, img in enumerate(self):
2828
+ img._bands = [new_value[i]]
2829
+ new_images.append(img)
2830
+ self._images = new_images
2831
+ return
2832
+ if not all(isinstance(x, Image) for x in new_value):
2727
2833
  raise TypeError("images should be a sequence of Image.")
2728
-
2729
- def __repr__(self) -> str:
2730
- """String representation."""
2731
- root = ""
2732
- if self.path is not None:
2733
- data = f"'{self.path}'"
2734
- elif all(img.path is not None for img in self):
2735
- data = [img.path for img in self]
2736
- parents = {str(Path(path).parent) for path in data}
2737
- if len(parents) == 1:
2738
- data = [Path(path).name for path in data]
2739
- root = f" root='{next(iter(parents))}',"
2740
- else:
2741
- data = [img for img in self]
2742
- return f"{self.__class__.__name__}({data},{root} res={self.res}, level='{self.level}')"
2834
+ self._images = new_value
2743
2835
 
2744
2836
  def union_all(self) -> Polygon | MultiPolygon:
2745
2837
  """(Multi)Polygon representing the union of all image bounds."""
@@ -2796,7 +2888,6 @@ class ImageCollection(_ImageBase):
2796
2888
  if "date" in x_var and subcollection._should_be_sorted:
2797
2889
  subcollection._images = list(sorted(subcollection._images))
2798
2890
 
2799
- y = np.array([band.values for img in subcollection for band in img])
2800
2891
  if "date" in x_var and subcollection._should_be_sorted:
2801
2892
  x = np.array(
2802
2893
  [
@@ -2813,120 +2904,35 @@ class ImageCollection(_ImageBase):
2813
2904
  - pd.Timestamp(np.min(x))
2814
2905
  ).days
2815
2906
  else:
2816
- x = np.arange(0, len(y))
2817
-
2818
- mask = np.array(
2819
- [
2820
- (
2821
- band.values.mask
2822
- if hasattr(band.values, "mask")
2823
- else np.full(band.values.shape, False)
2824
- )
2825
- for img in subcollection
2826
- for band in img
2827
- ]
2907
+ x = np.arange(0, sum(1 for img in subcollection for band in img))
2908
+
2909
+ subcollection.pixelwise(
2910
+ _plot_pixels_1d,
2911
+ kwargs=dict(
2912
+ alpha=alpha,
2913
+ x_var=x_var,
2914
+ y_label=y_label,
2915
+ rounding=rounding,
2916
+ first_date=first_date,
2917
+ figsize=figsize,
2918
+ ),
2919
+ index_aligned_kwargs=dict(x=x),
2828
2920
  )
2829
2921
 
2830
- if x_var == "days_since_start":
2831
- x = x - np.min(x)
2832
-
2833
- for i in range(y.shape[1]):
2834
- for j in range(y.shape[2]):
2835
- this_y = y[:, i, j]
2836
-
2837
- this_mask = mask[:, i, j]
2838
- this_x = x[~this_mask]
2839
- this_y = this_y[~this_mask]
2840
-
2841
- if ylim:
2842
- condition = (this_y >= ylim[0]) & (this_y <= ylim[1])
2843
- this_y = this_y[condition]
2844
- this_x = this_x[condition]
2845
-
2846
- coef, intercept = np.linalg.lstsq(
2847
- np.vstack([this_x, np.ones(this_x.shape[0])]).T,
2848
- this_y,
2849
- rcond=None,
2850
- )[0]
2851
- predicted = np.array([intercept + coef * x for x in this_x])
2852
-
2853
- predicted_start = predicted[0]
2854
- predicted_end = predicted[-1]
2855
- predicted_change = predicted_end - predicted_start
2856
-
2857
- # Degrees of freedom
2858
- dof = len(this_x) - 2
2859
-
2860
- # 95% confidence interval
2861
- t_val = stats.t.ppf(1 - alpha / 2, dof)
2862
-
2863
- # Mean squared error of the residuals
2864
- mse = np.sum((this_y - predicted) ** 2) / dof
2865
-
2866
- # Calculate the standard error of predictions
2867
- pred_stderr = np.sqrt(
2868
- mse
2869
- * (
2870
- 1 / len(this_x)
2871
- + (this_x - np.mean(this_x)) ** 2
2872
- / np.sum((this_x - np.mean(this_x)) ** 2)
2873
- )
2874
- )
2875
-
2876
- # Calculate the confidence interval for predictions
2877
- ci_lower = predicted - t_val * pred_stderr
2878
- ci_upper = predicted + t_val * pred_stderr
2879
-
2880
- fig = plt.figure(figsize=figsize)
2881
- ax = fig.add_subplot(1, 1, 1)
2882
-
2883
- ax.scatter(this_x, this_y, color="#2c93db")
2884
- ax.plot(this_x, predicted, color="#e0436b")
2885
- ax.fill_between(
2886
- this_x,
2887
- ci_lower,
2888
- ci_upper,
2889
- color="#e0436b",
2890
- alpha=0.2,
2891
- label=f"{int(alpha*100)}% CI",
2892
- )
2893
- plt.title(
2894
- f"coef: {round(coef, int(np.log(1 / abs(coef))))}, "
2895
- f"pred change: {round(predicted_change, rounding)}, "
2896
- f"pred start: {round(predicted_start, rounding)}, "
2897
- f"pred end: {round(predicted_end, rounding)}"
2898
- )
2899
- plt.xlabel(x_var)
2900
- plt.ylabel(y_label)
2901
-
2902
- if x_var == "date":
2903
- date_labels = pd.to_datetime(
2904
- [first_date + pd.Timedelta(days=int(day)) for day in this_x]
2905
- )
2906
-
2907
- _, unique_indices = np.unique(
2908
- date_labels.strftime("%Y-%m"), return_index=True
2909
- )
2910
-
2911
- unique_x = np.array(this_x)[unique_indices]
2912
- unique_labels = date_labels[unique_indices].strftime("%Y-%m")
2913
-
2914
- ax.set_xticks(unique_x)
2915
- ax.set_xticklabels(unique_labels, rotation=45, ha="right")
2916
- # ax.tick_params(axis="x", length=10, width=2)
2917
-
2918
- plt.show()
2919
-
2920
-
2921
- def _get_all_regex_matches(xml_file: str, regexes: tuple[str]) -> tuple[str]:
2922
- for regex in regexes:
2923
- try:
2924
- return re.search(regex, xml_file)
2925
- except (TypeError, AttributeError):
2926
- continue
2927
- raise ValueError(
2928
- f"Could not find processing_baseline info from {regexes} in {xml_file}"
2929
- )
2922
+ def __repr__(self) -> str:
2923
+ """String representation."""
2924
+ root = ""
2925
+ if self.path is not None:
2926
+ data = f"'{self.path}'"
2927
+ elif all(img.path is not None for img in self):
2928
+ data = [img.path for img in self]
2929
+ parents = {str(Path(path).parent) for path in data}
2930
+ if len(parents) == 1:
2931
+ data = [Path(path).name for path in data]
2932
+ root = f" root='{next(iter(parents))}',"
2933
+ else:
2934
+ data = [img for img in self]
2935
+ return f"{self.__class__.__name__}({data},{root} res={self.res}, level='{self.level}')"
2930
2936
 
2931
2937
 
2932
2938
  class Sentinel2Config:
@@ -3040,9 +3046,6 @@ class Sentinel2Band(Sentinel2Config, Band):
3040
3046
  }
3041
3047
 
3042
3048
  def _get_boa_add_offset_dict(self, xml_file: str) -> int | None:
3043
- if self.is_mask:
3044
- return None
3045
-
3046
3049
  pat = re.compile(
3047
3050
  r"""
3048
3051
  <BOA_ADD_OFFSET\s*
@@ -3058,7 +3061,7 @@ class Sentinel2Band(Sentinel2Config, Band):
3058
3061
  except (TypeError, AttributeError, KeyError) as e:
3059
3062
  raise _RegexError(f"Could not find boa_add_offset info from {pat}") from e
3060
3063
  if not matches:
3061
- raise _RegexError(f"Could not find boa_add_offset info from {pat}")
3064
+ return None
3062
3065
 
3063
3066
  dict_ = (
3064
3067
  pd.DataFrame(matches).set_index("band_id")["value"].astype(int).to_dict()
@@ -3121,7 +3124,7 @@ class Sentinel2Collection(Sentinel2Config, ImageCollection):
3121
3124
  def __init__(self, data: str | Path | Sequence[Image], **kwargs) -> None:
3122
3125
  """ImageCollection with Sentinel2 specific name variables and path regexes."""
3123
3126
  level = kwargs.get("level", None_)
3124
- if callable(level) and isinstance(level(), None_):
3127
+ if callable(level) and level() is None:
3125
3128
  raise ValueError("Must specify level for Sentinel2Collection.")
3126
3129
  super().__init__(data=data, **kwargs)
3127
3130
 
@@ -3146,10 +3149,7 @@ class Sentinel2CloudlessCollection(Sentinel2CloudlessConfig, ImageCollection):
3146
3149
 
3147
3150
 
3148
3151
  def concat_image_collections(collections: Sequence[ImageCollection]) -> ImageCollection:
3149
- """Union multiple ImageCollections together.
3150
-
3151
- Same as using the union operator |.
3152
- """
3152
+ """Concatenate ImageCollections."""
3153
3153
  resolutions = {x.res for x in collections}
3154
3154
  if len(resolutions) > 1:
3155
3155
  raise ValueError(f"resoultion mismatch. {resolutions}")
@@ -3185,8 +3185,10 @@ def _get_gradient(band: Band, degrees: bool = False, copy: bool = True) -> Band:
3185
3185
  raise ValueError("array must be 2 or 3 dimensional")
3186
3186
 
3187
3187
 
3188
- def _slope_2d(array: np.ndarray, res: int, degrees: int) -> np.ndarray:
3189
- gradient_x, gradient_y = np.gradient(array, res, res)
3188
+ def _slope_2d(array: np.ndarray, res: int | tuple[int], degrees: int) -> np.ndarray:
3189
+ resx, resy = _res_as_tuple(res)
3190
+
3191
+ gradient_x, gradient_y = np.gradient(array, resx, resy)
3190
3192
 
3191
3193
  gradient = abs(gradient_x) + abs(gradient_y)
3192
3194
 
@@ -3273,7 +3275,7 @@ def _get_images(
3273
3275
  return images
3274
3276
 
3275
3277
 
3276
- class ArrayNotLoadedError(ValueError):
3278
+ class _ArrayNotLoadedError(ValueError):
3277
3279
  """Arrays are not loaded."""
3278
3280
 
3279
3281
 
@@ -3351,18 +3353,22 @@ def _intesects(x, other) -> bool:
3351
3353
 
3352
3354
 
3353
3355
  def _copy_and_add_df_parallel(
3354
- i: tuple[Any, ...], group: pd.DataFrame, self: ImageCollection
3356
+ group_values: tuple[Any, ...],
3357
+ group_df: pd.DataFrame,
3358
+ self: ImageCollection,
3359
+ copy: bool,
3355
3360
  ) -> tuple[tuple[Any], ImageCollection]:
3356
- copied = self.copy()
3361
+ copied = self.copy() if copy else self
3357
3362
  copied.images = [
3358
- img.copy() for img in group.drop_duplicates("_image_idx")["_image_instance"]
3363
+ img.copy() if copy else img
3364
+ for img in group_df.drop_duplicates("_image_idx")["_image_instance"]
3359
3365
  ]
3360
- if "band_id" in group:
3361
- band_ids = set(group["band_id"].values)
3366
+ if "band_id" in group_df:
3367
+ band_ids = set(group_df["band_id"].values)
3362
3368
  for img in copied.images:
3363
3369
  img._bands = [band for band in img if band.band_id in band_ids]
3364
3370
 
3365
- return (i, copied)
3371
+ return (group_values, copied)
3366
3372
 
3367
3373
 
3368
3374
  def _get_bounds(bounds, bbox, band_bounds: Polygon) -> None | Polygon:
@@ -3388,15 +3394,37 @@ def _open_raster(path: str | Path) -> rasterio.io.DatasetReader:
3388
3394
  return rasterio.open(file)
3389
3395
 
3390
3396
 
3391
- def _load_band(band: Band, **kwargs) -> None:
3397
+ def _read_mask_array(self: Band | Image, **kwargs) -> np.ndarray:
3398
+ mask_band_id = self.masking["band_id"]
3399
+ mask_paths = [path for path in self._all_file_paths if mask_band_id in path]
3400
+ if len(mask_paths) > 1:
3401
+ raise ValueError(
3402
+ f"Multiple file_paths match mask band_id {mask_band_id} for {self.path}"
3403
+ )
3404
+ elif not mask_paths:
3405
+ raise ValueError(
3406
+ f"No file_paths match mask band_id {mask_band_id} for {self.path} among "
3407
+ + str([Path(x).name for x in _ls_func(self.path)])
3408
+ )
3409
+
3410
+ band = Band(
3411
+ next(iter(mask_paths)),
3412
+ **{**self._common_init_kwargs, "metadata": None},
3413
+ )
3414
+ band.load(**kwargs)
3415
+ boolean_mask = np.isin(band.values, list(self.masking["values"]))
3416
+ return boolean_mask
3417
+
3418
+
3419
+ def _load_band(band: Band, **kwargs) -> Band:
3392
3420
  return band.load(**kwargs)
3393
3421
 
3394
3422
 
3395
- def _band_apply(band: Band, func: Callable, **kwargs) -> None:
3423
+ def _band_apply(band: Band, func: Callable, **kwargs) -> Band:
3396
3424
  return band.apply(func, **kwargs)
3397
3425
 
3398
3426
 
3399
- def _clip_band(band: Band, mask, **kwargs) -> None:
3427
+ def _clip_band(band: Band, mask, **kwargs) -> Band:
3400
3428
  return band.clip(mask, **kwargs)
3401
3429
 
3402
3430
 
@@ -3441,126 +3469,148 @@ def array_buffer(arr: np.ndarray, distance: int) -> np.ndarray:
3441
3469
  return binary_erosion(arr, structure=structure).astype(dtype)
3442
3470
 
3443
3471
 
3444
- def get_cmap(arr: np.ndarray) -> LinearSegmentedColormap:
3472
+ def _plot_pixels_1d(
3473
+ y: np.ndarray,
3474
+ x: np.ndarray,
3475
+ alpha: float,
3476
+ x_var: str,
3477
+ y_label: str,
3478
+ rounding: int,
3479
+ figsize: tuple,
3480
+ first_date: pd.Timestamp,
3481
+ ) -> None:
3482
+ coef, intercept = np.linalg.lstsq(
3483
+ np.vstack([x, np.ones(x.shape[0])]).T,
3484
+ y,
3485
+ rcond=None,
3486
+ )[0]
3487
+ predicted = np.array([intercept + coef * x for x in x])
3488
+
3489
+ predicted_start = predicted[0]
3490
+ predicted_end = predicted[-1]
3491
+ predicted_change = predicted_end - predicted_start
3492
+
3493
+ # Degrees of freedom
3494
+ dof = len(x) - 2
3495
+
3496
+ # 95% confidence interval
3497
+ t_val = stats.t.ppf(1 - alpha / 2, dof)
3498
+
3499
+ # Mean squared error of the residuals
3500
+ mse = np.sum((y - predicted) ** 2) / dof
3501
+
3502
+ # Calculate the standard error of predictions
3503
+ pred_stderr = np.sqrt(
3504
+ mse * (1 / len(x) + (x - np.mean(x)) ** 2 / np.sum((x - np.mean(x)) ** 2))
3505
+ )
3445
3506
 
3446
- # blue = [[i / 10 + 0.1, i / 10 + 0.1, 1 - (i / 10) + 0.1] for i in range(11)][1:]
3447
- blue = [
3448
- [0.1, 0.1, 1.0],
3449
- [0.2, 0.2, 0.9],
3450
- [0.3, 0.3, 0.8],
3451
- [0.4, 0.4, 0.7],
3452
- [0.6, 0.6, 0.6],
3453
- [0.6, 0.6, 0.6],
3454
- [0.7, 0.7, 0.7],
3455
- [0.8, 0.8, 0.8],
3456
- ]
3457
- # gray = list(reversed([[i / 10 - 0.1, i / 10, i / 10 - 0.1] for i in range(11)][1:]))
3458
- gray = [
3459
- [0.6, 0.6, 0.6],
3460
- [0.6, 0.6, 0.6],
3461
- [0.6, 0.6, 0.6],
3462
- [0.6, 0.6, 0.6],
3463
- [0.6, 0.6, 0.6],
3464
- [0.4, 0.7, 0.4],
3465
- [0.3, 0.7, 0.3],
3466
- [0.2, 0.8, 0.2],
3467
- ]
3468
- # gray = [[0.6, 0.6, 0.6] for i in range(10)]
3469
- # green = [[0.2 + i/20, i / 10 - 0.1, + i/20] for i in range(11)][1:]
3470
- green = [
3471
- [0.25, 0.0, 0.05],
3472
- [0.3, 0.1, 0.1],
3473
- [0.35, 0.2, 0.15],
3474
- [0.4, 0.3, 0.2],
3475
- [0.45, 0.4, 0.25],
3476
- [0.5, 0.5, 0.3],
3477
- [0.55, 0.6, 0.35],
3478
- [0.7, 0.9, 0.5],
3479
- ]
3480
- green = [
3481
- [0.6, 0.6, 0.6],
3482
- [0.4, 0.7, 0.4],
3483
- [0.3, 0.8, 0.3],
3484
- [0.25, 0.4, 0.25],
3485
- [0.2, 0.5, 0.2],
3486
- [0.10, 0.7, 0.10],
3487
- [0, 0.9, 0],
3488
- ]
3507
+ # Calculate the confidence interval for predictions
3508
+ ci_lower = predicted - t_val * pred_stderr
3509
+ ci_upper = predicted + t_val * pred_stderr
3510
+
3511
+ fig = plt.figure(figsize=figsize)
3512
+ ax = fig.add_subplot(1, 1, 1)
3513
+
3514
+ ax.scatter(x, y, color="#2c93db")
3515
+ ax.plot(x, predicted, color="#e0436b")
3516
+ ax.fill_between(
3517
+ x,
3518
+ ci_lower,
3519
+ ci_upper,
3520
+ color="#e0436b",
3521
+ alpha=0.2,
3522
+ label=f"{int(alpha*100)}% CI",
3523
+ )
3524
+ plt.title(
3525
+ f"coef: {round(coef, int(np.log(1 / abs(coef))))}, "
3526
+ f"pred change: {round(predicted_change, rounding)}, "
3527
+ f"pred start: {round(predicted_start, rounding)}, "
3528
+ f"pred end: {round(predicted_end, rounding)}"
3529
+ )
3530
+ plt.xlabel(x_var)
3531
+ plt.ylabel(y_label)
3489
3532
 
3490
- def get_start(arr):
3491
- min_value = np.min(arr)
3492
- if min_value < -0.75:
3493
- return 0
3494
- if min_value < -0.5:
3495
- return 1
3496
- if min_value < -0.25:
3497
- return 2
3498
- if min_value < 0:
3499
- return 3
3500
- if min_value < 0.25:
3501
- return 4
3502
- if min_value < 0.5:
3503
- return 5
3504
- if min_value < 0.75:
3505
- return 6
3506
- return 7
3507
-
3508
- def get_stop(arr):
3509
- max_value = np.max(arr)
3510
- if max_value <= 0.05:
3511
- return 0
3512
- if max_value < 0.175:
3513
- return 1
3514
- if max_value < 0.25:
3515
- return 2
3516
- if max_value < 0.375:
3517
- return 3
3518
- if max_value < 0.5:
3519
- return 4
3520
- if max_value < 0.75:
3521
- return 5
3522
- return 6
3523
-
3524
- cmap_name = "blue_gray_green"
3525
-
3526
- start = get_start(arr)
3527
- stop = get_stop(arr)
3528
- blue = blue[start]
3529
- gray = gray[start]
3530
- # green = green[start]
3531
- green = green[stop]
3532
-
3533
- # green[0] = np.arange(0, 1, 0.1)[::-1][stop]
3534
- # green[1] = np.arange(0, 1, 0.1)[stop]
3535
- # green[2] = np.arange(0, 1, 0.1)[::-1][stop]
3536
-
3537
- print(green)
3538
- print(start, stop)
3539
- print("blue gray green")
3540
- print(blue)
3541
- print(gray)
3542
- print(green)
3543
-
3544
- # Define the segments of the colormap
3545
- cdict = {
3546
- "red": [
3547
- (0.0, blue[0], blue[0]),
3548
- (0.3, gray[0], gray[0]),
3549
- (0.7, gray[0], gray[0]),
3550
- (1.0, green[0], green[0]),
3551
- ],
3552
- "green": [
3553
- (0.0, blue[1], blue[1]),
3554
- (0.3, gray[1], gray[1]),
3555
- (0.7, gray[1], gray[1]),
3556
- (1.0, green[1], green[1]),
3557
- ],
3558
- "blue": [
3559
- (0.0, blue[2], blue[2]),
3560
- (0.3, gray[2], gray[2]),
3561
- (0.7, gray[2], gray[2]),
3562
- (1.0, green[2], green[2]),
3563
- ],
3564
- }
3533
+ if x_var == "date":
3534
+ date_labels = pd.to_datetime(
3535
+ [first_date + pd.Timedelta(days=int(day)) for day in x]
3536
+ )
3537
+
3538
+ _, unique_indices = np.unique(date_labels.strftime("%Y-%m"), return_index=True)
3539
+
3540
+ unique_x = np.array(x)[unique_indices]
3541
+ unique_labels = date_labels[unique_indices].strftime("%Y-%m")
3542
+
3543
+ ax.set_xticks(unique_x)
3544
+ ax.set_xticklabels(unique_labels, rotation=45, ha="right")
3545
+
3546
+ plt.show()
3547
+
3548
+
3549
+ def pixelwise(
3550
+ func: Callable,
3551
+ values: np.ndarray,
3552
+ mask_array: np.ndarray | None = None,
3553
+ index_aligned_kwargs: dict | None = None,
3554
+ kwargs: dict | None = None,
3555
+ processes: int = 1,
3556
+ nodata=np.nan,
3557
+ ) -> Any:
3558
+ """Run a function for each pixel of a 3d array."""
3559
+ index_aligned_kwargs = index_aligned_kwargs or {}
3560
+ kwargs = kwargs or {}
3561
+
3562
+ if mask_array is not None:
3563
+ not_all_missing = np.all(mask_array, axis=0) == False
3564
+
3565
+ else:
3566
+ mask_array = np.full(values.shape, False)
3567
+ not_all_missing = np.full(values.shape[1:], True)
3568
+
3569
+ nonmissing_row_indices, nonmissing_col_indices = not_all_missing.nonzero()
3570
+
3571
+ def select_pixel_values(row: int, col: int) -> np.ndarray:
3572
+ return values[~mask_array[:, row, col], row, col]
3573
+
3574
+ with joblib.Parallel(n_jobs=processes, backend="loky") as parallel:
3575
+ results: list[tuple[np.float64, np.float64]] = parallel(
3576
+ joblib.delayed(func)(
3577
+ select_pixel_values(row, col),
3578
+ **kwargs,
3579
+ **{
3580
+ key: value[~mask_array[:, row, col]]
3581
+ for key, value in index_aligned_kwargs.items()
3582
+ },
3583
+ )
3584
+ for row, col in (
3585
+ zip(nonmissing_row_indices, nonmissing_col_indices, strict=True)
3586
+ )
3587
+ )
3588
+
3589
+ if all(x is None for x in results):
3590
+ return
3591
+
3592
+ try:
3593
+ n_out_arrays = len(next(iter(results)))
3594
+ except TypeError:
3595
+ n_out_arrays = 1
3596
+
3597
+ out_arrays = tuple(np.full(values.shape[1:], nodata) for _ in range(n_out_arrays))
3598
+
3599
+ counter = 0
3600
+ for row, col in zip(nonmissing_row_indices, nonmissing_col_indices, strict=True):
3601
+ these_results = results[counter]
3602
+ if these_results is None:
3603
+ counter += 1
3604
+ continue
3605
+ for i, arr in enumerate(out_arrays):
3606
+ try:
3607
+ arr[row, col] = these_results[i]
3608
+ except TypeError:
3609
+ arr[row, col] = these_results
3610
+ counter += 1
3611
+ assert counter == len(results), (counter, len(results))
3612
+
3613
+ if len(out_arrays) == 1:
3614
+ return out_arrays[0]
3565
3615
 
3566
- return LinearSegmentedColormap(cmap_name, segmentdata=cdict, N=50)
3616
+ return out_arrays