ssb-sgis 1.0.8__py3-none-any.whl → 1.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,6 +6,7 @@ import os
6
6
  import random
7
7
  import re
8
8
  import time
9
+ from abc import abstractmethod
9
10
  from collections.abc import Callable
10
11
  from collections.abc import Iterable
11
12
  from collections.abc import Iterator
@@ -26,7 +27,6 @@ import rasterio
26
27
  from affine import Affine
27
28
  from geopandas import GeoDataFrame
28
29
  from geopandas import GeoSeries
29
- from matplotlib.colors import LinearSegmentedColormap
30
30
  from pandas.api.types import is_dict_like
31
31
  from rasterio.enums import MergeAlg
32
32
  from scipy import stats
@@ -41,11 +41,8 @@ from shapely.geometry import Polygon
41
41
 
42
42
  try:
43
43
  import dapla as dp
44
- from dapla.gcs import GCSFileSystem
45
44
  except ImportError:
46
-
47
- class GCSFileSystem:
48
- """Placeholder."""
45
+ pass
49
46
 
50
47
 
51
48
  try:
@@ -55,7 +52,7 @@ except ImportError:
55
52
  class exceptions:
56
53
  """Placeholder."""
57
54
 
58
- class RefreshError:
55
+ class RefreshError(Exception):
59
56
  """Placeholder."""
60
57
 
61
58
 
@@ -74,9 +71,9 @@ try:
74
71
  except ImportError:
75
72
  pass
76
73
  try:
77
- import xarray as xr
78
74
  from xarray import DataArray
79
75
  from xarray import Dataset
76
+ from xarray import combine_by_coords
80
77
  except ImportError:
81
78
 
82
79
  class DataArray:
@@ -85,6 +82,9 @@ except ImportError:
85
82
  class Dataset:
86
83
  """Placeholder."""
87
84
 
85
+ def combine_by_coords(*args, **kwargs) -> None:
86
+ raise ImportError("xarray")
87
+
88
88
 
89
89
  from ..geopandas_tools.bounds import get_total_bounds
90
90
  from ..geopandas_tools.conversion import to_bbox
@@ -102,8 +102,10 @@ from ..io.opener import opener
102
102
  from . import sentinel_config as config
103
103
  from .base import _array_to_geojson
104
104
  from .base import _gdf_to_arr
105
+ from .base import _get_res_from_bounds
105
106
  from .base import _get_shape_from_bounds
106
107
  from .base import _get_transform_from_bounds
108
+ from .base import _res_as_tuple
107
109
  from .base import get_index_mapper
108
110
  from .indices import ndvi
109
111
  from .regex import _extract_regex_match_from_string
@@ -142,8 +144,6 @@ DATE_RANGES_TYPE = (
142
144
  | tuple[tuple[str | pd.Timestamp | None, str | pd.Timestamp | None], ...]
143
145
  )
144
146
 
145
- FILENAME_COL_SUFFIX = "_filename"
146
-
147
147
  DEFAULT_FILENAME_REGEX = r"""
148
148
  .*?
149
149
  (?:_?(?P<date>\d{8}(?:T\d{6})?))? # Optional underscore and date group
@@ -163,13 +163,12 @@ ALLOWED_INIT_KWARGS = [
163
163
  "filename_regexes",
164
164
  "all_bands",
165
165
  "crs",
166
- "backend",
167
166
  "masking",
168
167
  "_merged",
169
168
  "date",
170
169
  ]
171
170
 
172
- _load_counter: int = 0
171
+ _LOAD_COUNTER: int = 0
173
172
 
174
173
 
175
174
  def _get_child_paths_threaded(data: Sequence[str]) -> set[str]:
@@ -178,6 +177,90 @@ def _get_child_paths_threaded(data: Sequence[str]) -> set[str]:
178
177
  return set(itertools.chain.from_iterable(all_paths))
179
178
 
180
179
 
180
+ @dataclass
181
+ class PixelwiseResults:
182
+ """Container of results from pixelwise operation to be converted."""
183
+
184
+ row_indices: np.ndarray
185
+ col_indices: np.ndarray
186
+ results: list[Any]
187
+ res: int | tuple[int, int]
188
+ bounds: tuple[float, float, float, float]
189
+ shape: tuple[int, int]
190
+ crs: Any
191
+ nodata: int | float | None
192
+
193
+ def to_tuple(self) -> tuple[int, int, Any]:
194
+ """Return 3-length tuple of row indices, column indices and pixelwise results."""
195
+ return self.row_indices, self.col_indices, self.results
196
+
197
+ def to_dict(self) -> dict[tuple[int, int], Any]:
198
+ """Return dictionary with row and column indices as keys and pixelwise results as values."""
199
+ return {
200
+ (int(row), int(col)): value
201
+ for row, col, value in zip(
202
+ self.row_indices, self.col_indices, self.results, strict=True
203
+ )
204
+ }
205
+
206
+ def to_geopandas(self, column: str = "value") -> GeoDataFrame:
207
+ """Return GeoDataFrame with pixel geometries and values from the pixelwise operation."""
208
+ minx, miny = self.bounds[:2]
209
+ resx, resy = _res_as_tuple(self.res)
210
+
211
+ minxs = np.full(self.row_indices.shape, minx) + (minx * self.row_indices * resx)
212
+ minys = np.full(self.col_indices.shape, miny) + (miny * self.col_indices * resy)
213
+ maxxs = minxs + resx
214
+ maxys = minys + resy
215
+
216
+ return GeoDataFrame(
217
+ {
218
+ column: self.results,
219
+ "geometry": [
220
+ box(minx, miny, maxx, maxy)
221
+ for minx, miny, maxx, maxy in zip(
222
+ minxs, minys, maxxs, maxys, strict=True
223
+ )
224
+ ],
225
+ },
226
+ index=[self.row_indices, self.col_indices],
227
+ crs=self.crs,
228
+ )
229
+
230
+ def to_numpy(self) -> np.ndarray | tuple[np.ndarray, ...]:
231
+ """Reshape pixelwise results to 2d numpy arrays in the shape of the full arrays of the image bands."""
232
+ try:
233
+ n_out_arrays = len(next(iter(self.results)))
234
+ except TypeError:
235
+ n_out_arrays = 1
236
+
237
+ out_arrays = [
238
+ np.full(self.shape, self.nodata).astype(np.float64)
239
+ for _ in range(n_out_arrays)
240
+ ]
241
+
242
+ for row, col, these_results in zip(
243
+ self.row_indices, self.col_indices, self.results, strict=True
244
+ ):
245
+ if these_results is None:
246
+ continue
247
+ for i, arr in enumerate(out_arrays):
248
+ try:
249
+ arr[row, col] = these_results[i]
250
+ except TypeError:
251
+ arr[row, col] = these_results
252
+
253
+ for i, array in enumerate(out_arrays):
254
+ all_are_integers = np.all(np.mod(array, 1) == 0)
255
+ if all_are_integers:
256
+ out_arrays[i] = array.astype(int)
257
+
258
+ if len(out_arrays) == 1:
259
+ return out_arrays[0]
260
+
261
+ return tuple(out_arrays)
262
+
263
+
181
264
  class ImageCollectionGroupBy:
182
265
  """Iterator and merger class returned from groupby.
183
266
 
@@ -196,7 +279,7 @@ class ImageCollectionGroupBy:
196
279
  Args:
197
280
  data: Iterable of group values and ImageCollection groups.
198
281
  by: list of group attributes.
199
- collection: ImageCollection instance. Used to pass attributes.
282
+ collection: Ungrouped ImageCollection. Used to pass attributes to outputs.
200
283
  """
201
284
  self.data = list(data)
202
285
  self.by = by
@@ -291,7 +374,7 @@ class ImageCollectionGroupBy:
291
374
 
292
375
  def __repr__(self) -> str:
293
376
  """String representation."""
294
- return f"{self.__class__.__name__}({len(self)})"
377
+ return f"{self.__class__.__name__}({len(self)}, by={self.by})"
295
378
 
296
379
 
297
380
  @dataclass(frozen=True)
@@ -307,7 +390,11 @@ class BandMasking:
307
390
 
308
391
 
309
392
  class None_:
310
- """Default value for keyword arguments that should not have a default."""
393
+ """Default None for args that are not allowed to be None."""
394
+
395
+ def __new__(cls) -> None:
396
+ """Always returns None."""
397
+ return None
311
398
 
312
399
 
313
400
  class _ImageBase:
@@ -318,18 +405,16 @@ class _ImageBase:
318
405
 
319
406
  def __init__(self, *, metadata=None, bbox=None, **kwargs) -> None:
320
407
 
321
- self._mask = None
322
408
  self._bounds = None
323
- self._merged = False
324
- self._from_array = False
325
- self._from_geopandas = False
326
- self.metadata_attributes = self.metadata_attributes or {}
327
409
  self._path = None
328
- self._metadata_from_xml = False
329
-
330
410
  self._bbox = to_bbox(bbox) if bbox is not None else None
331
411
 
332
- self.metadata = self._metadata_to_nested_dict(metadata)
412
+ self.metadata_attributes = self.metadata_attributes or {}
413
+
414
+ if metadata is not None:
415
+ self.metadata = self._metadata_to_nested_dict(metadata)
416
+ else:
417
+ self.metadata = {}
333
418
 
334
419
  self.image_patterns = self._compile_regexes("image_regexes")
335
420
  self.filename_patterns = self._compile_regexes("filename_regexes")
@@ -339,29 +424,45 @@ class _ImageBase:
339
424
  f"{self.__class__.__name__} got an unexpected keyword argument '{key}'"
340
425
  )
341
426
  if key in ALLOWED_INIT_KWARGS and key in dir(self):
342
- if is_property(self, key):
343
- setattr(self, f"_{key}", value)
344
- elif is_method(self, key):
345
- raise error_obj
346
- else:
347
- setattr(self, key, value)
427
+ self._safe_setattr(key, value, error_obj)
348
428
  else:
349
429
  raise error_obj
350
430
 
431
+ # attributes for debugging
432
+ self._metadata_from_xml = False
433
+ self._merged = False
434
+ self._from_array = False
435
+ self._from_geopandas = False
436
+
437
+ def _safe_setattr(
438
+ self, key: str, value: Any, error_obj: Exception | None = None
439
+ ) -> None:
440
+ if is_property(self, key):
441
+ setattr(self, f"_{key}", value)
442
+ elif is_method(self, key):
443
+ if error_obj is None:
444
+ raise AttributeError(f"Cannot set method '{key}'.")
445
+ raise error_obj
446
+ else:
447
+ setattr(self, key, value)
448
+
351
449
  def _compile_regexes(self, regex_attr: str) -> tuple[re.Pattern]:
352
- regexes = getattr(self, regex_attr)
353
- if regexes:
354
- if isinstance(regexes, str):
355
- regexes = (regexes,)
356
- return tuple(re.compile(regexes, flags=re.VERBOSE) for regexes in regexes)
357
- return ()
450
+ regexes: tuple[str] | str = getattr(self, regex_attr)
451
+ if not regexes:
452
+ return ()
453
+ if isinstance(regexes, str):
454
+ regexes = (regexes,)
455
+ return tuple(re.compile(regexes, flags=re.VERBOSE) for regexes in regexes)
358
456
 
359
457
  @staticmethod
360
458
  def _metadata_to_nested_dict(
361
459
  metadata: str | Path | os.PathLike | dict | pd.DataFrame | None,
362
- ) -> dict[str, dict[str, Any]] | None:
363
- if metadata is None:
364
- return {}
460
+ ) -> dict[str, dict[str, Any]]:
461
+ """Construct metadata dict from dictlike, DataFrame or file path.
462
+
463
+ Extract metadata value:
464
+ >>> self.metadata[self.path]['cloud_cover_percentage']
465
+ """
365
466
  if isinstance(metadata, (str | Path | os.PathLike)):
366
467
  metadata = _read_parquet_func(metadata)
367
468
 
@@ -376,15 +477,16 @@ class _ImageBase:
376
477
  return x if not (is_scalar(x) and pd.isna(x)) else None
377
478
 
378
479
  # to nested dict because pandas indexing gives rare KeyError with long strings
379
- metadata = {
480
+ return {
380
481
  _fix_path(path): {
381
482
  attr: na_to_none(value) for attr, value in row.items()
382
483
  }
383
484
  for path, row in metadata.iterrows()
384
485
  }
385
486
  elif is_dict_like(metadata):
386
- metadata = {_fix_path(path): value for path, value in metadata.items()}
487
+ return {_fix_path(path): value for path, value in metadata.items()}
387
488
 
489
+ # try to allow custom types with dict-like indexing
388
490
  return metadata
389
491
 
390
492
  @property
@@ -394,7 +496,6 @@ class _ImageBase:
394
496
  "res": self.res,
395
497
  "bbox": self._bbox,
396
498
  "nodata": self.nodata,
397
- "backend": self.backend,
398
499
  "metadata": self.metadata,
399
500
  }
400
501
 
@@ -408,19 +509,22 @@ class _ImageBase:
408
509
  @property
409
510
  def res(self) -> int:
410
511
  """Pixel resolution."""
512
+ # if self._res is None:
513
+ # if self.has_array:
514
+ # self._res = _get_res_from_bounds(self.bounds, self.values.shape)
515
+ # else:
516
+ # with opener(self.path) as file:
517
+ # with rasterio.open(file) as src:
518
+ # self._res = src.res
411
519
  return self._res
412
520
 
413
- @property
414
- def centroid(self) -> Point:
415
- """Centerpoint of the object."""
416
- return self.union_all().centroid
521
+ @abstractmethod
522
+ def union_all(self) -> Polygon | MultiPolygon:
523
+ pass
417
524
 
418
525
  def assign(self, **kwargs) -> "_ImageBase":
419
526
  for key, value in kwargs.items():
420
- try:
421
- setattr(self, key, value)
422
- except AttributeError:
423
- setattr(self, f"_{key}", value)
527
+ self._safe_setattr(key, value)
424
528
  return self
425
529
 
426
530
  def _name_regex_searcher(
@@ -451,7 +555,10 @@ class _ImageBase:
451
555
  )
452
556
 
453
557
  def _create_metadata_df(self, file_paths: Sequence[str]) -> pd.DataFrame:
454
- """Create a dataframe with file paths and image paths that match regexes."""
558
+ """Create a dataframe with file paths and image paths that match regexes.
559
+
560
+ Used in __init__ to select relevant paths fast.
561
+ """
455
562
  df = pd.DataFrame({"file_path": list(file_paths)})
456
563
 
457
564
  df["file_name"] = df["file_path"].apply(lambda x: Path(x).name)
@@ -518,12 +625,14 @@ class _ImageBase:
518
625
  class _ImageBandBase(_ImageBase):
519
626
  """Common parent class of Image and Band."""
520
627
 
521
- def intersects(self, other: GeoDataFrame | GeoSeries | Geometry) -> bool:
522
- if hasattr(other, "crs") and not pyproj.CRS(self.crs).equals(
523
- pyproj.CRS(other.crs)
628
+ def intersects(
629
+ self, geometry: GeoDataFrame | GeoSeries | Geometry | tuple | _ImageBase
630
+ ) -> bool:
631
+ if hasattr(geometry, "crs") and not pyproj.CRS(self.crs).equals(
632
+ pyproj.CRS(geometry.crs)
524
633
  ):
525
- raise ValueError(f"crs mismatch: {self.crs} and {other.crs}")
526
- return self.union_all().intersects(to_shapely(other))
634
+ raise ValueError(f"crs mismatch: {self.crs} and {geometry.crs}")
635
+ return self.union_all().intersects(to_shapely(geometry))
527
636
 
528
637
  def union_all(self) -> Polygon:
529
638
  try:
@@ -532,20 +641,21 @@ class _ImageBandBase(_ImageBase):
532
641
  return Polygon()
533
642
 
534
643
  @property
535
- def mask_percentage(self) -> float:
536
- return self.mask.values.sum() / (self.mask.width * self.mask.height) * 100
644
+ def centroid(self) -> Point:
645
+ """Centerpoint of the object."""
646
+ return self.union_all().centroid
537
647
 
538
648
  @property
539
649
  def year(self) -> str:
540
650
  if hasattr(self, "_year") and self._year:
541
651
  return self._year
542
- return self.date[:4]
652
+ return str(self.date)[:4]
543
653
 
544
654
  @property
545
655
  def month(self) -> str:
546
656
  if hasattr(self, "_month") and self._month:
547
657
  return self._month
548
- return "".join(self.date.split("-"))[4:6]
658
+ return str(self.date).replace("-", "").replace("/", "")[4:6]
549
659
 
550
660
  @property
551
661
  def name(self) -> str | None:
@@ -572,62 +682,66 @@ class _ImageBandBase(_ImageBase):
572
682
  return self._name_regex_searcher("level", self.image_patterns)
573
683
 
574
684
  def _get_metadata_attributes(self, metadata_attributes: dict) -> dict:
575
-
685
+ """Search through xml files for missing metadata attributes."""
576
686
  self._metadata_from_xml = True
577
687
 
578
688
  missing_metadata_attributes = {
579
- key: value
580
- for key, value in metadata_attributes.items()
581
- if not hasattr(self, key) or getattr(self, key) is None
689
+ attr: constructor_func
690
+ for attr, constructor_func in metadata_attributes.items()
691
+ if not hasattr(self, attr) or getattr(self, attr) is None
582
692
  }
583
693
 
584
694
  nonmissing_metadata_attributes = {
585
- key: getattr(self, key)
586
- for key in metadata_attributes
587
- if key not in missing_metadata_attributes
695
+ attr: getattr(self, attr)
696
+ for attr in metadata_attributes
697
+ if attr not in missing_metadata_attributes
588
698
  }
589
699
 
590
700
  if not missing_metadata_attributes:
591
701
  return nonmissing_metadata_attributes
592
702
 
593
- file_contents: list[str] = []
703
+ # read all xml content once
704
+ file_contents: dict[str, str] = {}
594
705
  for path in self._all_file_paths:
595
706
  if ".xml" not in path:
596
707
  continue
597
708
  with _open_func(path, "rb") as file:
598
- file_contents.append(file.read().decode("utf-8"))
709
+ file_contents[path] = file.read().decode("utf-8")
599
710
 
600
- for key, value in missing_metadata_attributes.items():
711
+ def is_last_xml(i: int) -> bool:
712
+ return i == len(file_contents) - 1
713
+
714
+ for attr, value in missing_metadata_attributes.items():
601
715
  results = None
602
- for i, filetext in enumerate(file_contents):
716
+ for i, file_content in enumerate(file_contents.values()):
603
717
  if isinstance(value, str) and value in dir(self):
604
- method = getattr(self, value)
718
+ # method or a hardcoded value
719
+ value: Callable | Any = getattr(self, value)
720
+
721
+ if callable(value):
605
722
  try:
606
- results = method(filetext)
723
+ results = value(file_content)
607
724
  except _RegexError as e:
608
- if i == len(self._all_file_paths) - 1:
609
- raise e
725
+ if is_last_xml(i):
726
+ raise e.__class__(self.path, list(file_contents), e) from e
610
727
  continue
611
728
  if results is not None:
612
729
  break
613
-
614
- if callable(value):
730
+ elif (
731
+ isinstance(value, str)
732
+ or hasattr(value, "__iter__")
733
+ and all(isinstance(x, str | re.Pattern) for x in value)
734
+ ):
615
735
  try:
616
- results = value(filetext)
736
+ results = _extract_regex_match_from_string(file_content, value)
617
737
  except _RegexError as e:
618
- if i == len(self._all_file_paths) - 1:
738
+ if is_last_xml(i):
619
739
  raise e
620
- continue
621
- if results is not None:
622
- break
740
+ elif value is not None:
741
+ results = value
742
+ break
623
743
 
624
- try:
625
- results = _extract_regex_match_from_string(filetext, value)
626
- except _RegexError as e:
627
- if i == len(self._all_file_paths) - 1:
628
- raise e
629
-
630
- missing_metadata_attributes[key] = results
744
+ missing_metadata_attributes[attr] = results
631
745
 
632
746
  return missing_metadata_attributes | nonmissing_metadata_attributes
633
747
 
@@ -671,14 +785,15 @@ class Band(_ImageBandBase):
671
785
  """Band holding a single 2 dimensional array representing an image band."""
672
786
 
673
787
  cmap: ClassVar[str | None] = None
674
- backend: str = "numpy"
675
788
 
676
789
  @classmethod
677
790
  def from_geopandas(
678
791
  cls,
679
792
  gdf: GeoDataFrame | GeoSeries,
680
- res: int,
681
793
  *,
794
+ res: int | None = None,
795
+ out_shape: tuple[int, int] | None = None,
796
+ bounds: Any | None = None,
682
797
  fill: int = 0,
683
798
  all_touched: bool = False,
684
799
  merge_alg: Callable = MergeAlg.replace,
@@ -687,17 +802,27 @@ class Band(_ImageBandBase):
687
802
  **kwargs,
688
803
  ) -> None:
689
804
  """Create Band from a GeoDataFrame."""
690
- arr: np.ndarray = _gdf_to_arr(
691
- gdf,
692
- res=res,
693
- fill=fill,
694
- all_touched=all_touched,
695
- merge_alg=merge_alg,
696
- default_value=default_value,
697
- dtype=dtype,
698
- )
805
+ if bounds is not None:
806
+ bounds = to_bbox(bounds)
807
+
808
+ if out_shape == (0,):
809
+ arr = np.array([])
810
+ else:
811
+ arr = _gdf_to_arr(
812
+ gdf,
813
+ res=res,
814
+ bounds=bounds,
815
+ fill=fill,
816
+ all_touched=all_touched,
817
+ merge_alg=merge_alg,
818
+ default_value=default_value,
819
+ dtype=dtype,
820
+ out_shape=out_shape,
821
+ )
822
+ if bounds is None:
823
+ bounds = gdf.total_bounds
699
824
 
700
- obj = cls(arr, res=res, crs=gdf.crs, bounds=gdf.total_bounds, **kwargs)
825
+ obj = cls(arr, crs=gdf.crs, bounds=bounds, **kwargs)
701
826
  obj._from_geopandas = True
702
827
  return obj
703
828
 
@@ -717,9 +842,6 @@ class Band(_ImageBandBase):
717
842
  **kwargs,
718
843
  ) -> None:
719
844
  """Band initialiser."""
720
- if callable(res) and isinstance(res(), None_):
721
- raise TypeError("Must specify 'res'")
722
-
723
845
  if data is None:
724
846
  # allowing 'path' to replace 'data' as argument
725
847
  # to make the print repr. valid as initialiser
@@ -745,11 +867,20 @@ class Band(_ImageBandBase):
745
867
  if isinstance(data, np.ndarray):
746
868
  if self._bounds is None:
747
869
  raise ValueError("Must specify bounds when data is an array.")
870
+ if not (res is None or (callable(res) and res() is None)):
871
+ # if not (res is None or (callable(res) and res() is None)) and _res_as_tuple(
872
+ # res
873
+ # ) != _get_res_from_bounds(self._bounds, data.shape):
874
+ raise ValueError(
875
+ f"Cannot specify 'res' when data is an array. {res} and {_get_res_from_bounds(self._bounds, data.shape)}"
876
+ )
748
877
  self._crs = crs
749
878
  self.transform = _get_transform_from_bounds(self._bounds, shape=data.shape)
750
879
  self._from_array = True
751
880
  self.values = data
752
881
 
882
+ self._res = _get_res_from_bounds(self._bounds, self.values.shape)
883
+
753
884
  elif not isinstance(data, (str | Path | os.PathLike)):
754
885
  raise TypeError(
755
886
  "'data' must be string, Path-like or numpy.ndarray. "
@@ -757,8 +888,8 @@ class Band(_ImageBandBase):
757
888
  )
758
889
  else:
759
890
  self._path = _fix_path(str(data))
891
+ self._res = res if not (callable(res) and res() is None) else None
760
892
 
761
- self._res = res
762
893
  if cmap is not None:
763
894
  self.cmap = cmap
764
895
  self._name = name
@@ -786,7 +917,7 @@ class Band(_ImageBandBase):
786
917
  else:
787
918
  setattr(self, key, value)
788
919
 
789
- elif self.metadata_attributes and self.path is not None and not self.is_mask:
920
+ elif self.metadata_attributes and self.path is not None:
790
921
  if self._all_file_paths is None:
791
922
  self._all_file_paths = _get_all_file_paths(str(Path(self.path).parent))
792
923
  for key, value in self._get_metadata_attributes(
@@ -798,43 +929,28 @@ class Band(_ImageBandBase):
798
929
  """Makes Bands sortable by band_id."""
799
930
  return self.band_id < other.band_id
800
931
 
932
+ def value_counts(self) -> pd.Series:
933
+ """Value count of each value of the band's array."""
934
+ try:
935
+ values = self.values.data[self.values.mask == False]
936
+ except AttributeError:
937
+ values = self.values
938
+ unique_values, counts = np.unique(values, return_counts=True)
939
+ return pd.Series(counts, index=unique_values).sort_values(ascending=False)
940
+
801
941
  @property
802
942
  def values(self) -> np.ndarray:
803
943
  """The numpy array, if loaded."""
804
944
  if self._values is None:
805
- raise ArrayNotLoadedError("array is not loaded.")
945
+ raise _ArrayNotLoadedError("array is not loaded.")
806
946
  return self._values
807
947
 
808
948
  @values.setter
809
949
  def values(self, new_val):
810
- if self.backend == "numpy" and isinstance(new_val, np.ndarray):
950
+ if isinstance(new_val, np.ndarray):
811
951
  self._values = new_val
812
- return
813
- elif self.backend == "xarray" and isinstance(new_val, DataArray):
814
- # attrs can dissappear, so doing a union
815
- attrs = self._values.attrs | new_val.attrs
816
- self._values = new_val
817
- self._values.attrs = attrs
818
- return
819
-
820
- if self.backend == "numpy":
952
+ else:
821
953
  self._values = self._to_numpy(new_val)
822
- if self.backend == "xarray":
823
- if not isinstance(self._values, DataArray):
824
- self._values = self._to_xarray(
825
- new_val,
826
- transform=self.transform,
827
- )
828
-
829
- elif isinstance(new_val, np.ndarray):
830
- self._values.values = new_val
831
- else:
832
- self._values = new_val
833
-
834
- @property
835
- def mask(self) -> "Band":
836
- """Mask Band."""
837
- return self._mask
838
954
 
839
955
  @property
840
956
  def band_id(self) -> str:
@@ -921,28 +1037,39 @@ class Band(_ImageBandBase):
921
1037
  return df
922
1038
 
923
1039
  def clip(
924
- self, mask: GeoDataFrame | GeoSeries | Polygon | MultiPolygon, **kwargs
1040
+ self,
1041
+ mask: GeoDataFrame | GeoSeries | Polygon | MultiPolygon,
925
1042
  ) -> "Band":
926
- """Clip band values to geometry mask."""
1043
+ """Clip band values to geometry mask while preserving bounds."""
927
1044
  if not self.height or not self.width:
928
1045
  return self
929
1046
 
930
- values = _clip_xarray(
931
- self.to_xarray(),
932
- mask,
933
- crs=self.crs,
934
- **kwargs,
935
- )
936
- self._bounds = to_bbox(mask)
937
- self.transform = _get_transform_from_bounds(self._bounds, values.shape)
938
- self.values = values
1047
+ fill: int = self.nodata or 0
1048
+
1049
+ mask_array: np.ndarray = Band.from_geopandas(
1050
+ gdf=to_gdf(mask)[["geometry"]],
1051
+ default_value=1,
1052
+ fill=fill,
1053
+ out_shape=self.values.shape,
1054
+ bounds=mask,
1055
+ ).values
1056
+
1057
+ is_not_polygon = mask_array == fill
1058
+
1059
+ if isinstance(self.values, np.ma.core.MaskedArray):
1060
+ self._values.mask |= is_not_polygon
1061
+ else:
1062
+ self._values = np.ma.array(
1063
+ self.values, mask=is_not_polygon, fill_value=self.nodata
1064
+ )
1065
+
939
1066
  return self
940
1067
 
941
1068
  def load(
942
1069
  self,
943
1070
  bounds: tuple | Geometry | GeoDataFrame | GeoSeries | None = None,
944
1071
  indexes: int | tuple[int] | None = None,
945
- masked: bool | None = None,
1072
+ masked: bool = True,
946
1073
  file_system=None,
947
1074
  **kwargs,
948
1075
  ) -> "Band":
@@ -950,11 +1077,10 @@ class Band(_ImageBandBase):
950
1077
 
951
1078
  The array is stored in the 'values' property.
952
1079
  """
953
- global _load_counter
954
- _load_counter += 1
1080
+ global _LOAD_COUNTER
1081
+ _LOAD_COUNTER += 1
955
1082
 
956
- if masked is None:
957
- masked = True if self.mask is None else False
1083
+ _masking = kwargs.pop("_masking", self.masking)
958
1084
 
959
1085
  bounds_was_none = bounds is None
960
1086
 
@@ -963,12 +1089,9 @@ class Band(_ImageBandBase):
963
1089
  should_return_empty: bool = bounds is not None and bounds.area == 0
964
1090
  if should_return_empty:
965
1091
  self._values = np.array([])
966
- if self.mask is not None and not self.is_mask:
967
- self._mask = self._mask.load(
968
- bounds=bounds, indexes=indexes, file_system=file_system
969
- )
970
1092
  self._bounds = None
971
1093
  self.transform = None
1094
+ # activate setter
972
1095
  self.values = self._values
973
1096
 
974
1097
  return self
@@ -978,7 +1101,6 @@ class Band(_ImageBandBase):
978
1101
 
979
1102
  if bounds is not None:
980
1103
  minx, miny, maxx, maxy = to_bbox(bounds)
981
- ## round down/up to integer to avoid precision trouble
982
1104
  # bounds = (int(minx), int(miny), math.ceil(maxx), math.ceil(maxy))
983
1105
  bounds = minx, miny, maxx, maxy
984
1106
 
@@ -992,20 +1114,19 @@ class Band(_ImageBandBase):
992
1114
  out_shape = kwargs.pop("out_shape", None)
993
1115
 
994
1116
  if self.has_array and [int(x) for x in bounds] != [int(x) for x in self.bounds]:
995
- print(self)
996
- print(self.mask)
997
- print(self.values.shape)
998
- print([int(x) for x in bounds], [int(x) for x in self.bounds])
999
1117
  raise ValueError(
1000
1118
  "Cannot re-load array with different bounds. "
1001
1119
  "Use .copy() to read with different bounds. "
1002
- "Or .clip(mask) to clip."
1120
+ "Or .clip(mask) to clip.",
1121
+ self,
1122
+ self.values.shape,
1123
+ [int(x) for x in bounds],
1124
+ [int(x) for x in self.bounds],
1003
1125
  )
1004
- # with opener(self.path, file_system=self.file_system) as f:
1126
+
1005
1127
  with opener(self.path, file_system=file_system) as f:
1006
1128
  with rasterio.open(f, nodata=self.nodata) as src:
1007
- self._res = int(src.res[0]) if not self.res else self.res
1008
-
1129
+ self._res = src.res if not self.res else self.res
1009
1130
  if self.nodata is None or np.isnan(self.nodata):
1010
1131
  self.nodata = src.nodata
1011
1132
  else:
@@ -1018,7 +1139,7 @@ class Band(_ImageBandBase):
1018
1139
  )
1019
1140
 
1020
1141
  if bounds is None:
1021
- if self._res != int(src.res[0]):
1142
+ if self._res != src.res:
1022
1143
  if out_shape is None:
1023
1144
  out_shape = _get_shape_from_bounds(
1024
1145
  to_bbox(src.bounds), self.res, indexes
@@ -1070,18 +1191,12 @@ class Band(_ImageBandBase):
1070
1191
  else:
1071
1192
  values[values == src.nodata] = self.nodata
1072
1193
 
1073
- if self.masking and self.is_mask:
1074
- values = np.isin(values, list(self.masking["values"]))
1075
-
1076
- elif self.mask is not None and not isinstance(values, np.ma.core.MaskedArray):
1077
-
1078
- if not self.mask.has_array:
1079
- self._mask = self.mask.load(
1080
- bounds=bounds, indexes=indexes, out_shape=out_shape, **kwargs
1081
- )
1082
- mask_arr = self.mask.values
1083
-
1194
+ if _masking and not isinstance(values, np.ma.core.MaskedArray):
1195
+ mask_arr = _read_mask_array(self, bounds=bounds)
1084
1196
  values = np.ma.array(values, mask=mask_arr, fill_value=self.nodata)
1197
+ elif _masking:
1198
+ mask_arr = _read_mask_array(self, bounds=bounds)
1199
+ values.mask |= mask_arr
1085
1200
 
1086
1201
  if bounds is not None:
1087
1202
  self._bounds = to_bbox(bounds)
@@ -1092,13 +1207,6 @@ class Band(_ImageBandBase):
1092
1207
 
1093
1208
  return self
1094
1209
 
1095
- @property
1096
- def is_mask(self) -> bool:
1097
- """True if the band_id is equal to the masking band_id."""
1098
- if self.masking is None:
1099
- return False
1100
- return self.band_id == self.masking["band_id"]
1101
-
1102
1210
  @property
1103
1211
  def has_array(self) -> bool:
1104
1212
  """Whether the array is loaded."""
@@ -1106,7 +1214,7 @@ class Band(_ImageBandBase):
1106
1214
  if not isinstance(self.values, (np.ndarray | DataArray)):
1107
1215
  raise ValueError()
1108
1216
  return True
1109
- except ValueError: # also catches ArrayNotLoadedError
1217
+ except ValueError: # also catches _ArrayNotLoadedError
1110
1218
  return False
1111
1219
 
1112
1220
  def write(
@@ -1126,10 +1234,17 @@ class Band(_ImageBandBase):
1126
1234
  if self.crs is None:
1127
1235
  raise ValueError("Cannot write None crs to image.")
1128
1236
 
1237
+ if self.nodata:
1238
+ # TODO take out .data if masked?
1239
+ values_with_nodata = np.concatenate(
1240
+ [self.values.flatten(), np.array([self.nodata])]
1241
+ )
1242
+ else:
1243
+ values_with_nodata = self.values
1129
1244
  profile = {
1130
1245
  "driver": driver,
1131
1246
  "compress": compress,
1132
- "dtype": rasterio.dtypes.get_minimum_dtype(self.values),
1247
+ "dtype": rasterio.dtypes.get_minimum_dtype(values_with_nodata),
1133
1248
  "crs": self.crs,
1134
1249
  "transform": self.transform,
1135
1250
  "nodata": self.nodata,
@@ -1138,19 +1253,18 @@ class Band(_ImageBandBase):
1138
1253
  "width": self.width,
1139
1254
  } | kwargs
1140
1255
 
1141
- # with opener(path, "wb", file_system=self.file_system) as f:
1142
1256
  with opener(path, "wb", file_system=file_system) as f:
1143
1257
  with rasterio.open(f, "w", **profile) as dst:
1144
1258
 
1145
1259
  if dst.nodata is None:
1146
1260
  dst.nodata = _get_dtype_min(dst.dtypes[0])
1147
1261
 
1148
- # if (
1149
- # isinstance(self.values, np.ma.core.MaskedArray)
1150
- # # and dst.nodata is not None
1151
- # ):
1152
- # self.values.data[np.isnan(self.values.data)] = dst.nodata
1153
- # self.values.data[self.values.mask] = dst.nodata
1262
+ if (
1263
+ isinstance(self.values, np.ma.core.MaskedArray)
1264
+ and dst.nodata is not None
1265
+ ):
1266
+ self.values.data[np.isnan(self.values.data)] = dst.nodata
1267
+ self.values.data[self.values.mask] = dst.nodata
1154
1268
 
1155
1269
  if len(self.values.shape) == 2:
1156
1270
  dst.write(self.values, indexes=1)
@@ -1238,7 +1352,7 @@ class Band(_ImageBandBase):
1238
1352
  The gradient will be 1 (1 meter up for every meter forward).
1239
1353
  The calculation is by default done in place to save memory.
1240
1354
 
1241
- >>> band.gradient()
1355
+ >>> band.gradient(copy=False)
1242
1356
  >>> band.values
1243
1357
  array([[0., 1., 1., 1., 0.],
1244
1358
  [1., 1., 1., 1., 1.],
@@ -1299,11 +1413,13 @@ class Band(_ImageBandBase):
1299
1413
  dropna=dropna,
1300
1414
  )
1301
1415
 
1302
- def to_geopandas(self, column: str = "value") -> GeoDataFrame:
1416
+ def to_geopandas(self, column: str = "value", dropna: bool = True) -> GeoDataFrame:
1303
1417
  """Create a GeoDataFrame from the image Band.
1304
1418
 
1305
1419
  Args:
1306
1420
  column: Name of resulting column that holds the raster values.
1421
+ dropna: Whether to remove values that are NA or equal to the nodata
1422
+ value.
1307
1423
 
1308
1424
  Returns:
1309
1425
  A GeoDataFrame with a geometry column and array values.
@@ -1311,24 +1427,28 @@ class Band(_ImageBandBase):
1311
1427
  if not hasattr(self, "_values"):
1312
1428
  raise ValueError("Array is not loaded.")
1313
1429
 
1430
+ if isinstance(self.values, np.ma.core.MaskedArray):
1431
+ self.values.data[self.values.mask] = self.nodata or 0
1314
1432
  if self.values.shape[0] == 0:
1315
- return GeoDataFrame({"geometry": []}, crs=self.crs)
1316
-
1317
- return GeoDataFrame(
1318
- pd.DataFrame(
1319
- _array_to_geojson(
1320
- self.values, self.transform, processes=self.processes
1433
+ df = GeoDataFrame({"geometry": []}, crs=self.crs)
1434
+ else:
1435
+ df = GeoDataFrame(
1436
+ pd.DataFrame(
1437
+ _array_to_geojson(
1438
+ self.values, self.transform, processes=self.processes
1439
+ ),
1440
+ columns=[column, "geometry"],
1321
1441
  ),
1322
- columns=[column, "geometry"],
1323
- ),
1324
- geometry="geometry",
1325
- crs=self.crs,
1326
- )
1442
+ geometry="geometry",
1443
+ crs=self.crs,
1444
+ )
1445
+
1446
+ if dropna:
1447
+ return df[(df[column] != self.nodata) & (df[column].notna())]
1448
+ return df
1327
1449
 
1328
1450
  def to_xarray(self) -> DataArray:
1329
1451
  """Convert the raster to an xarray.DataArray."""
1330
- if self.backend == "xarray":
1331
- return self.values
1332
1452
  return self._to_xarray(
1333
1453
  self.values,
1334
1454
  transform=self.transform,
@@ -1345,19 +1465,6 @@ class Band(_ImageBandBase):
1345
1465
  if not isinstance(arr, np.ndarray):
1346
1466
  mask_arr = None
1347
1467
  if masked:
1348
- # if self.mask is not None:
1349
- # print(self.mask.values.shape, arr.shape)
1350
- # if self.mask is not None and self.mask.values.shape == arr.shape:
1351
- # print("hei", self.mask.values.sum())
1352
- # mask_arr = self.mask.values
1353
- # else:
1354
- # mask_arr = np.full(arr.shape, False)
1355
- # try:
1356
- # print("hei222", arr.isnull().values.sum())
1357
- # mask_arr |= arr.isnull().values
1358
- # except AttributeError:
1359
- # pass
1360
- # mask_arr = np.full(arr.shape, False)
1361
1468
  try:
1362
1469
  mask_arr = arr.isnull().values
1363
1470
  except AttributeError:
@@ -1374,11 +1481,11 @@ class Band(_ImageBandBase):
1374
1481
 
1375
1482
  if (
1376
1483
  masked
1377
- and self.mask is not None
1378
- and not self.is_mask
1379
1484
  and not isinstance(arr, np.ma.core.MaskedArray)
1485
+ and mask_arr is not None
1380
1486
  ):
1381
1487
  arr = np.ma.array(arr, mask=mask_arr, fill_value=self.nodata)
1488
+
1382
1489
  return arr
1383
1490
 
1384
1491
  def __repr__(self) -> str:
@@ -1401,10 +1508,6 @@ class NDVIBand(Band):
1401
1508
 
1402
1509
  cmap: str = "Greens"
1403
1510
 
1404
- # @staticmethod
1405
- # def get_cmap(arr: np.ndarray):
1406
- # return get_cmap(arr)
1407
-
1408
1511
 
1409
1512
  def median_as_int_and_minimum_dtype(arr: np.ndarray) -> np.ndarray:
1410
1513
  arr = np.median(arr, axis=0).astype(int)
@@ -1416,12 +1519,12 @@ class Image(_ImageBandBase):
1416
1519
  """Image consisting of one or more Bands."""
1417
1520
 
1418
1521
  band_class: ClassVar[Band] = Band
1419
- backend: str = "numpy"
1420
1522
 
1421
1523
  def __init__(
1422
1524
  self,
1423
1525
  data: str | Path | Sequence[Band] | None = None,
1424
- res: int | None = None,
1526
+ res: int | None_ = None_,
1527
+ mask: "Band | None" = None,
1425
1528
  processes: int = 1,
1426
1529
  df: pd.DataFrame | None = None,
1427
1530
  nodata: int | None = None,
@@ -1442,20 +1545,27 @@ class Image(_ImageBandBase):
1442
1545
  self.processes = processes
1443
1546
  self._crs = None
1444
1547
  self._bands = None
1548
+ self._mask = mask
1549
+
1550
+ if isinstance(data, Band):
1551
+ data = [data]
1445
1552
 
1446
1553
  if hasattr(data, "__iter__") and all(isinstance(x, Band) for x in data):
1447
1554
  self._construct_image_from_bands(data, res)
1448
1555
  return
1449
1556
  elif not isinstance(data, (str | Path | os.PathLike)):
1450
- raise TypeError("'data' must be string, Path-like or a sequence of Band.")
1557
+ raise TypeError(
1558
+ f"'data' must be string, Path-like or a sequence of Band. Got {data}"
1559
+ )
1451
1560
 
1452
- self._res = res
1561
+ self._res = res if not (callable(res) and res() is None) else None
1453
1562
  self._path = _fix_path(data)
1454
1563
 
1455
1564
  if all_file_paths is None and self.path:
1456
1565
  self._all_file_paths = _get_all_file_paths(self.path)
1457
1566
  elif self.path:
1458
- all_file_paths = {_fix_path(x) for x in all_file_paths}
1567
+ name = Path(self.path).name
1568
+ all_file_paths = {_fix_path(x) for x in all_file_paths if name in x}
1459
1569
  self._all_file_paths = {x for x in all_file_paths if self.path in x}
1460
1570
  else:
1461
1571
  self._all_file_paths = None
@@ -1467,11 +1577,7 @@ class Image(_ImageBandBase):
1467
1577
 
1468
1578
  df["image_path"] = df["image_path"].astype(str)
1469
1579
 
1470
- cols_to_explode = [
1471
- "file_path",
1472
- "file_name",
1473
- *[x for x in df if FILENAME_COL_SUFFIX in x],
1474
- ]
1580
+ cols_to_explode = ["file_path", "file_name"]
1475
1581
  try:
1476
1582
  df = df.explode(cols_to_explode, ignore_index=True)
1477
1583
  except ValueError:
@@ -1499,20 +1605,92 @@ class Image(_ImageBandBase):
1499
1605
  else:
1500
1606
  setattr(self, key, value)
1501
1607
 
1502
- else:
1608
+ elif self.metadata_attributes and self.path is not None:
1503
1609
  for key, value in self._get_metadata_attributes(
1504
1610
  self.metadata_attributes
1505
1611
  ).items():
1506
1612
  setattr(self, key, value)
1507
1613
 
1614
+ def clip(
1615
+ self, mask: GeoDataFrame | GeoSeries | Polygon | MultiPolygon, copy: bool = True
1616
+ ) -> "Image":
1617
+ """Clip band values to geometry mask while preserving bounds."""
1618
+ copied = self.copy() if copy else self
1619
+
1620
+ fill: int = self.nodata or 0
1621
+
1622
+ mask_array: np.ndarray = Band.from_geopandas(
1623
+ gdf=to_gdf(mask)[["geometry"]],
1624
+ default_value=1,
1625
+ fill=fill,
1626
+ out_shape=next(iter(self)).values.shape,
1627
+ bounds=self.bounds,
1628
+ ).values
1629
+
1630
+ is_not_polygon = mask_array == fill
1631
+
1632
+ for band in copied:
1633
+ if isinstance(band.values, np.ma.core.MaskedArray):
1634
+ band._values.mask |= is_not_polygon
1635
+ else:
1636
+ band._values = np.ma.array(
1637
+ band.values, mask=is_not_polygon, fill_value=band.nodata
1638
+ )
1639
+
1640
+ return copied
1641
+
1642
+ def load(
1643
+ self,
1644
+ bounds: tuple | Geometry | GeoDataFrame | GeoSeries | None = None,
1645
+ indexes: int | tuple[int] | None = None,
1646
+ file_system=None,
1647
+ **kwargs,
1648
+ ) -> "ImageCollection":
1649
+ """Load all image Bands with threading."""
1650
+ if bounds is None and indexes is None and all(band.has_array for band in self):
1651
+ return self
1652
+
1653
+ if self.masking:
1654
+ mask_array: np.ndarray = _read_mask_array(
1655
+ self,
1656
+ bounds=bounds,
1657
+ indexes=indexes,
1658
+ file_system=file_system,
1659
+ **kwargs,
1660
+ )
1661
+
1662
+ with joblib.Parallel(n_jobs=self.processes, backend="threading") as parallel:
1663
+ parallel(
1664
+ joblib.delayed(_load_band)(
1665
+ band,
1666
+ bounds=bounds,
1667
+ indexes=indexes,
1668
+ file_system=file_system,
1669
+ _masking=None,
1670
+ **kwargs,
1671
+ )
1672
+ for band in self
1673
+ )
1674
+
1675
+ if self.masking:
1676
+ for band in self:
1677
+ if isinstance(band.values, np.ma.core.MaskedArray):
1678
+ band.values.mask |= mask_array
1679
+ else:
1680
+ band.values = np.ma.array(
1681
+ band.values, mask=mask_array, fill_value=self.nodata
1682
+ )
1683
+
1684
+ return self
1685
+
1508
1686
  def _construct_image_from_bands(
1509
1687
  self, data: Sequence[Band], res: int | None
1510
1688
  ) -> None:
1511
1689
  self._bands = list(data)
1512
1690
  if res is None:
1513
- res = list({band.res for band in self.bands})
1691
+ res = {band.res for band in self.bands}
1514
1692
  if len(res) == 1:
1515
- self._res = res[0]
1693
+ self._res = next(iter(res))
1516
1694
  else:
1517
1695
  raise ValueError(f"Different resolutions for the bands: {res}")
1518
1696
  else:
@@ -1558,8 +1736,7 @@ class Image(_ImageBandBase):
1558
1736
  arr,
1559
1737
  bounds=red.bounds,
1560
1738
  crs=red.crs,
1561
- mask=red.mask,
1562
- **red._common_init_kwargs,
1739
+ **{k: v for k, v in red._common_init_kwargs.items() if k != "res"},
1563
1740
  )
1564
1741
 
1565
1742
  def get_brightness(
@@ -1590,81 +1767,16 @@ class Image(_ImageBandBase):
1590
1767
  brightness,
1591
1768
  bounds=red.bounds,
1592
1769
  crs=self.crs,
1593
- mask=self.mask,
1594
- **self._common_init_kwargs,
1770
+ **{k: v for k, v in self._common_init_kwargs.items() if k != "res"},
1595
1771
  )
1596
1772
 
1597
1773
  def to_xarray(self) -> DataArray:
1598
1774
  """Convert the raster to an xarray.DataArray."""
1599
- if self.backend == "xarray":
1600
- return self.values
1601
-
1602
1775
  return self._to_xarray(
1603
1776
  np.array([band.values for band in self]),
1604
1777
  transform=self[0].transform,
1605
1778
  )
1606
1779
 
1607
- @property
1608
- def mask(self) -> Band | None:
1609
- """Mask Band."""
1610
- if self.masking is None:
1611
- return None
1612
-
1613
- elif self._mask is not None:
1614
- return self._mask
1615
-
1616
- elif self._bands is not None and all(band.mask is not None for band in self):
1617
- if len({id(band.mask) for band in self}) > 1:
1618
- raise ValueError(
1619
- "Image bands must have same mask.",
1620
- {id(band.mask) for band in self},
1621
- ) # TODO
1622
- self._mask = next(
1623
- iter([band.mask for band in self if band.mask is not None])
1624
- )
1625
- return self._mask
1626
-
1627
- mask_band_id = self.masking["band_id"]
1628
- mask_paths = [path for path in self._all_file_paths if mask_band_id in path]
1629
- if len(mask_paths) > 1:
1630
- raise ValueError(
1631
- f"Multiple file_paths match mask band_id {mask_band_id} for {self.path}"
1632
- )
1633
- elif not mask_paths:
1634
- raise ValueError(
1635
- f"No file_paths match mask band_id {mask_band_id} for {self.path} among "
1636
- + str([Path(x).name for x in _ls_func(self.path)])
1637
- )
1638
-
1639
- self._mask = self.band_class(
1640
- mask_paths[0],
1641
- **self._common_init_kwargs,
1642
- )
1643
- if self._bands is not None:
1644
- for band in self:
1645
- band._mask = self._mask
1646
- return self._mask
1647
-
1648
- @mask.setter
1649
- def mask(self, values: Band | None) -> None:
1650
- if values is None:
1651
- self._mask = None
1652
- for band in self:
1653
- band._mask = None
1654
- return
1655
- if not isinstance(values, Band):
1656
- raise TypeError(f"mask must be Band. Got {type(values)}")
1657
- self._mask = values
1658
- mask_arr = self._mask.values
1659
- for band in self:
1660
- band._mask = self._mask
1661
- try:
1662
- band.values = np.ma.array(
1663
- band.values.data, mask=mask_arr, fill_value=band.nodata
1664
- )
1665
- except ArrayNotLoadedError:
1666
- pass
1667
-
1668
1780
  @property
1669
1781
  def band_ids(self) -> list[str]:
1670
1782
  """The Band ids."""
@@ -1687,12 +1799,9 @@ class Image(_ImageBandBase):
1687
1799
  else:
1688
1800
  paths = self._df["file_path"]
1689
1801
 
1690
- mask = self.mask
1691
-
1692
1802
  self._bands = [
1693
1803
  self.band_class(
1694
1804
  path,
1695
- mask=mask,
1696
1805
  all_file_paths=self._all_file_paths,
1697
1806
  **self._common_init_kwargs,
1698
1807
  )
@@ -1901,13 +2010,12 @@ class ImageCollection(_ImageBase):
1901
2010
  image_class: ClassVar[Image] = Image
1902
2011
  band_class: ClassVar[Band] = Band
1903
2012
  _metadata_attribute_collection_type: ClassVar[type] = pd.Series
1904
- backend: str = "numpy"
1905
2013
 
1906
2014
  def __init__(
1907
2015
  self,
1908
2016
  data: str | Path | Sequence[Image] | Sequence[str | Path],
1909
- res: int,
1910
- level: str | None = None_,
2017
+ res: int | None_ = None_,
2018
+ level: str | None_ | None = None_,
1911
2019
  processes: int = 1,
1912
2020
  metadata: str | dict | pd.DataFrame | None = None,
1913
2021
  nodata: int | None = None,
@@ -1923,13 +2031,13 @@ class ImageCollection(_ImageBase):
1923
2031
 
1924
2032
  super().__init__(metadata=metadata, **kwargs)
1925
2033
 
1926
- if callable(level) and isinstance(level(), None_):
2034
+ if callable(level) and level() is None:
1927
2035
  level = None
1928
2036
 
1929
2037
  self.nodata = nodata
1930
2038
  self.level = level
1931
2039
  self.processes = processes
1932
- self._res = res
2040
+ self._res = res if not (callable(res) and res() is None) else None
1933
2041
  self._crs = None
1934
2042
 
1935
2043
  self._df = None
@@ -1944,13 +2052,19 @@ class ImageCollection(_ImageBase):
1944
2052
  elif all(isinstance(x, (str | Path | os.PathLike)) for x in data):
1945
2053
  # adding band paths (asuming 'data' is a sequence of image paths)
1946
2054
  try:
1947
- self._all_file_paths = _get_child_paths_threaded(data) | set(data)
2055
+ self._all_file_paths = _get_child_paths_threaded(data) | {
2056
+ _fix_path(x) for x in data
2057
+ }
1948
2058
  except FileNotFoundError as e:
1949
2059
  if _from_root:
1950
2060
  raise TypeError(
1951
- "When passing 'root', 'data' must be a sequence of image names that have 'root' as parent path."
2061
+ "When passing 'root', 'data' must be a sequence of image file names that have 'root' as parent path."
1952
2062
  ) from e
1953
2063
  raise e
2064
+ if self.level:
2065
+ self._all_file_paths = [
2066
+ path for path in self._all_file_paths if self.level in path
2067
+ ]
1954
2068
  self._df = self._create_metadata_df(self._all_file_paths)
1955
2069
  return
1956
2070
 
@@ -1968,7 +2082,9 @@ class ImageCollection(_ImageBase):
1968
2082
 
1969
2083
  self._df = self._create_metadata_df(self._all_file_paths)
1970
2084
 
1971
- def groupby(self, by: str | list[str], **kwargs) -> ImageCollectionGroupBy:
2085
+ def groupby(
2086
+ self, by: str | list[str], copy: bool = True, **kwargs
2087
+ ) -> ImageCollectionGroupBy:
1972
2088
  """Group the Collection by Image or Band attribute(s)."""
1973
2089
  df = pd.DataFrame(
1974
2090
  [(i, img) for i, img in enumerate(self) for _ in img],
@@ -1995,8 +2111,10 @@ class ImageCollection(_ImageBase):
1995
2111
  return ImageCollectionGroupBy(
1996
2112
  sorted(
1997
2113
  parallel(
1998
- joblib.delayed(_copy_and_add_df_parallel)(i, group, self)
1999
- for i, group in df.groupby(by, **kwargs)
2114
+ joblib.delayed(_copy_and_add_df_parallel)(
2115
+ group_values, group_df, self, copy
2116
+ )
2117
+ for group_values, group_df in df.groupby(by, **kwargs)
2000
2118
  )
2001
2119
  ),
2002
2120
  by=by,
@@ -2037,6 +2155,62 @@ class ImageCollection(_ImageBase):
2037
2155
 
2038
2156
  return self
2039
2157
 
2158
+ def pixelwise(
2159
+ self,
2160
+ func: Callable,
2161
+ kwargs: dict | None = None,
2162
+ index_aligned_kwargs: dict | None = None,
2163
+ masked: bool = True,
2164
+ processes: int | None = None,
2165
+ ) -> np.ndarray | tuple[np.ndarray] | None:
2166
+ """Run a function for each pixel.
2167
+
2168
+ The function should take a 1d array as first argument. This will be
2169
+ the pixel values for all bands in all images in the collection.
2170
+ """
2171
+ values = np.array([band.values for img in self for band in img])
2172
+
2173
+ if (
2174
+ masked
2175
+ and self.nodata is not None
2176
+ and hasattr(next(iter(next(iter(self)))).values, "mask")
2177
+ ):
2178
+ mask_array = np.array(
2179
+ [
2180
+ (band.values.mask) | (band.values.data == self.nodata)
2181
+ for img in self
2182
+ for band in img
2183
+ ]
2184
+ )
2185
+ elif masked and self.nodata is not None:
2186
+ mask_array = np.array(
2187
+ [band.values == self.nodata for img in self for band in img]
2188
+ )
2189
+ elif masked:
2190
+ mask_array = np.array([band.values.mask for img in self for band in img])
2191
+ else:
2192
+ mask_array = None
2193
+
2194
+ nonmissing_row_indices, nonmissing_col_indices, results = pixelwise(
2195
+ func=func,
2196
+ values=values,
2197
+ mask_array=mask_array,
2198
+ index_aligned_kwargs=index_aligned_kwargs,
2199
+ kwargs=kwargs,
2200
+ processes=processes or self.processes,
2201
+ )
2202
+
2203
+ return PixelwiseResults(
2204
+ nonmissing_row_indices,
2205
+ nonmissing_col_indices,
2206
+ results,
2207
+ shape=values.shape[1:],
2208
+ res=self.res,
2209
+ bounds=self.bounds,
2210
+ crs=self.crs,
2211
+ nodata=self.nodata or np.nan,
2212
+ )
2213
+
2040
2214
  def get_unique_band_ids(self) -> list[str]:
2041
2215
  """Get a list of unique band_ids across all images."""
2042
2216
  return list({band.band_id for img in self for band in img})
@@ -2142,8 +2316,7 @@ class ImageCollection(_ImageBase):
2142
2316
  arr,
2143
2317
  bounds=bounds,
2144
2318
  crs=crs,
2145
- mask=self.mask,
2146
- **self._common_init_kwargs,
2319
+ **{k: v for k, v in self._common_init_kwargs.items() if k != "res"},
2147
2320
  )
2148
2321
 
2149
2322
  band._merged = True
@@ -2216,7 +2389,7 @@ class ImageCollection(_ImageBase):
2216
2389
  bounds=out_bounds,
2217
2390
  crs=crs,
2218
2391
  band_id=band_id,
2219
- **self._common_init_kwargs,
2392
+ **{k: v for k, v in self._common_init_kwargs.items() if k != "res"},
2220
2393
  )
2221
2394
  )
2222
2395
 
@@ -2329,22 +2502,11 @@ class ImageCollection(_ImageBase):
2329
2502
  ):
2330
2503
  return self
2331
2504
 
2332
- # if self.processes == 1:
2333
- # for img in self:
2334
- # for band in img:
2335
- # band.load(
2336
- # bounds=bounds,
2337
- # indexes=indexes,
2338
- # file_system=file_system,
2339
- # **kwargs,
2340
- # )
2341
- # return self
2342
-
2343
2505
  with joblib.Parallel(n_jobs=self.processes, backend="threading") as parallel:
2344
2506
  if self.masking:
2345
- parallel(
2346
- joblib.delayed(_load_band)(
2347
- img.mask,
2507
+ masks: list[np.ndarray] = parallel(
2508
+ joblib.delayed(_read_mask_array)(
2509
+ img,
2348
2510
  bounds=bounds,
2349
2511
  indexes=indexes,
2350
2512
  file_system=file_system,
@@ -2352,14 +2514,6 @@ class ImageCollection(_ImageBase):
2352
2514
  )
2353
2515
  for img in self
2354
2516
  )
2355
- for img in self:
2356
- for band in img:
2357
- band._mask = img.mask
2358
-
2359
- # print({img.mask.has_array for img in self })
2360
- # print({band.mask.has_array for img in self for band in img})
2361
-
2362
- # with joblib.Parallel(n_jobs=self.processes, backend="threading") as parallel:
2363
2517
 
2364
2518
  parallel(
2365
2519
  joblib.delayed(_load_band)(
@@ -2367,34 +2521,86 @@ class ImageCollection(_ImageBase):
2367
2521
  bounds=bounds,
2368
2522
  indexes=indexes,
2369
2523
  file_system=file_system,
2524
+ _masking=None,
2370
2525
  **kwargs,
2371
2526
  )
2372
2527
  for img in self
2373
2528
  for band in img
2374
2529
  )
2375
2530
 
2531
+ if self.masking:
2532
+ for img, mask_array in zip(self, masks, strict=True):
2533
+ for band in img:
2534
+ if isinstance(band.values, np.ma.core.MaskedArray):
2535
+ band.values.mask |= mask_array
2536
+ else:
2537
+ band.values = np.ma.array(
2538
+ band.values, mask=mask_array, fill_value=self.nodata
2539
+ )
2540
+
2376
2541
  return self
2377
2542
 
2378
2543
  def clip(
2379
2544
  self,
2380
2545
  mask: Geometry | GeoDataFrame | GeoSeries,
2381
- **kwargs,
2546
+ dropna: bool = True,
2547
+ copy: bool = True,
2382
2548
  ) -> "ImageCollection":
2383
- """Clip all image Bands with 'loky'."""
2384
- if self.processes == 1:
2385
- for img in self:
2386
- for band in img:
2387
- band.clip(mask, **kwargs)
2388
- return self
2549
+ """Clip all image Bands while preserving bounds."""
2550
+ copied = self.copy() if copy else self
2389
2551
 
2390
- with joblib.Parallel(n_jobs=self.processes, backend="loky") as parallel:
2391
- parallel(
2392
- joblib.delayed(_clip_band)(band, mask, **kwargs)
2393
- for img in self
2552
+ copied._images = [img for img in copied if img.union_all()]
2553
+
2554
+ fill: int = self.nodata or 0
2555
+
2556
+ common_band_from_geopandas_kwargs = dict(
2557
+ gdf=to_gdf(mask)[["geometry"]],
2558
+ default_value=1,
2559
+ fill=fill,
2560
+ )
2561
+
2562
+ for img in copied:
2563
+ img._rounded_bounds = tuple(int(x) for x in img.bounds)
2564
+
2565
+ for bounds in {img._rounded_bounds for img in copied}:
2566
+ shapes = {
2567
+ band.values.shape
2568
+ for img in copied
2394
2569
  for band in img
2395
- )
2570
+ if img._rounded_bounds == bounds
2571
+ }
2572
+ if len(shapes) != 1:
2573
+ raise ValueError(f"Different shapes: {shapes}. For bounds {bounds}")
2396
2574
 
2397
- return self
2575
+ mask_array: np.ndarray = Band.from_geopandas(
2576
+ **common_band_from_geopandas_kwargs,
2577
+ out_shape=next(iter(shapes)),
2578
+ bounds=bounds,
2579
+ ).values
2580
+
2581
+ is_not_polygon = mask_array == fill
2582
+
2583
+ for img in copied:
2584
+ if img._rounded_bounds != bounds:
2585
+ continue
2586
+
2587
+ for band in img:
2588
+ if isinstance(band.values, np.ma.core.MaskedArray):
2589
+ band._values.mask |= is_not_polygon
2590
+ else:
2591
+ band._values = np.ma.array(
2592
+ band.values, mask=is_not_polygon, fill_value=band.nodata
2593
+ )
2594
+
2595
+ for img in copied:
2596
+ del img._rounded_bounds
2597
+
2598
+ if dropna:
2599
+ copied.images = [
2600
+ img for img in copied if any(np.sum(band.values) for band in img)
2601
+ ]
2602
+
2603
+ return copied
2398
2604
 
2399
2605
  def _set_bbox(
2400
2606
  self, bbox: GeoDataFrame | GeoSeries | Geometry | tuple[float]
@@ -2405,17 +2611,12 @@ class ImageCollection(_ImageBase):
2405
2611
  if self._images is not None:
2406
2612
  for img in self._images:
2407
2613
  img._bbox = self._bbox
2408
- if img.mask is not None:
2409
- img.mask._bbox = self._bbox
2410
2614
  if img.bands is None:
2411
2615
  continue
2412
2616
  for band in img:
2413
2617
  band._bbox = self._bbox
2414
2618
  bounds = box(*band._bbox).intersection(box(*band.bounds))
2415
2619
  band._bounds = to_bbox(bounds) if not bounds.is_empty else None
2416
- if band.mask is not None:
2417
- band.mask._bbox = self._bbox
2418
- band.mask._bounds = band._bounds
2419
2620
 
2420
2621
  return self
2421
2622
 
@@ -2521,7 +2722,7 @@ class ImageCollection(_ImageBase):
2521
2722
  **kwargs,
2522
2723
  )
2523
2724
 
2524
- return xr.combine_by_coords(list(xarrs.values()))
2725
+ return combine_by_coords(list(xarrs.values()))
2525
2726
  # return Dataset(xarrs)
2526
2727
 
2527
2728
  def to_geopandas(self, column: str = "value") -> dict[str, GeoDataFrame]:
@@ -2534,6 +2735,9 @@ class ImageCollection(_ImageBase):
2534
2735
  try:
2535
2736
  name = band.name
2536
2737
  except AttributeError:
2738
+ name = None
2739
+
2740
+ if name is None:
2537
2741
  name = f"{self.__class__.__name__}({i})"
2538
2742
 
2539
2743
  if name not in out:
@@ -2594,10 +2798,6 @@ class ImageCollection(_ImageBase):
2594
2798
 
2595
2799
  return copied
2596
2800
 
2597
- def __or__(self, collection: "ImageCollection") -> "ImageCollection":
2598
- """Concatenate the collection with another collection."""
2599
- return concat_image_collections([self, collection])
2600
-
2601
2801
  def __iter__(self) -> Iterator[Image]:
2602
2802
  """Iterate over the images."""
2603
2803
  return iter(self.images)
@@ -2607,14 +2807,16 @@ class ImageCollection(_ImageBase):
2607
2807
  return len(self.images)
2608
2808
 
2609
2809
  def __getattr__(self, attr: str) -> Any:
2610
- """Make iterable of metadata_attribute."""
2810
+ """Make iterable of metadata attribute."""
2611
2811
  if attr in (self.metadata_attributes or {}):
2612
2812
  return self._metadata_attribute_collection_type(
2613
2813
  [getattr(img, attr) for img in self]
2614
2814
  )
2615
2815
  return super().__getattribute__(attr)
2616
2816
 
2617
- def __getitem__(self, item: int | slice | Sequence[int | bool]) -> Image:
2817
+ def __getitem__(
2818
+ self, item: int | slice | Sequence[int | bool]
2819
+ ) -> "Image | ImageCollection":
2618
2820
  """Select one Image by integer index, or multiple Images by slice, list of int."""
2619
2821
  if isinstance(item, int):
2620
2822
  return self.images[item]
@@ -2653,14 +2855,14 @@ class ImageCollection(_ImageBase):
2653
2855
  return copied
2654
2856
 
2655
2857
  @property
2656
- def dates(self) -> list[str]:
2858
+ def date(self) -> Any:
2657
2859
  """List of image dates."""
2658
- return [img.date for img in self]
2860
+ return self._metadata_attribute_collection_type([img.date for img in self])
2659
2861
 
2660
2862
  @property
2661
- def image_paths(self) -> list[str]:
2863
+ def image_paths(self) -> Any:
2662
2864
  """List of image paths."""
2663
- return [img.path for img in self]
2865
+ return self._metadata_attribute_collection_type([img.path for img in self])
2664
2866
 
2665
2867
  @property
2666
2868
  def images(self) -> list["Image"]:
@@ -2678,21 +2880,6 @@ class ImageCollection(_ImageBase):
2678
2880
  **self._common_init_kwargs,
2679
2881
  )
2680
2882
 
2681
- if self.masking is not None:
2682
- images = []
2683
- for image in self._images:
2684
- # TODO why this loop?
2685
- try:
2686
- if not isinstance(image.mask, Band):
2687
- raise ValueError()
2688
- images.append(image)
2689
- except ValueError as e:
2690
- raise e
2691
- continue
2692
- self._images = images
2693
- for image in self._images:
2694
- image._bands = [band for band in image if band.band_id is not None]
2695
-
2696
2883
  self._images = [img for img in self if len(img)]
2697
2884
 
2698
2885
  if self._should_be_sorted:
@@ -2722,24 +2909,22 @@ class ImageCollection(_ImageBase):
2722
2909
 
2723
2910
  @images.setter
2724
2911
  def images(self, new_value: list["Image"]) -> list["Image"]:
2725
- self._images = list(new_value)
2726
- if not all(isinstance(x, Image) for x in self._images):
2912
+ new_value = list(new_value)
2913
+ if not new_value:
2914
+ self._images = new_value
2915
+ return
2916
+ if all(isinstance(x, Band) for x in new_value):
2917
+ if len(new_value) != len(self):
2918
+ raise ValueError("'images' must have same length as number of images.")
2919
+ new_images = []
2920
+ for i, img in enumerate(self):
2921
+ img._bands = [new_value[i]]
2922
+ new_images.append(img)
2923
+ self._images = new_images
2924
+ return
2925
+ if not all(isinstance(x, Image) for x in new_value):
2727
2926
  raise TypeError("images should be a sequence of Image.")
2728
-
2729
- def __repr__(self) -> str:
2730
- """String representation."""
2731
- root = ""
2732
- if self.path is not None:
2733
- data = f"'{self.path}'"
2734
- elif all(img.path is not None for img in self):
2735
- data = [img.path for img in self]
2736
- parents = {str(Path(path).parent) for path in data}
2737
- if len(parents) == 1:
2738
- data = [Path(path).name for path in data]
2739
- root = f" root='{next(iter(parents))}',"
2740
- else:
2741
- data = [img for img in self]
2742
- return f"{self.__class__.__name__}({data},{root} res={self.res}, level='{self.level}')"
2927
+ self._images = new_value
2743
2928
 
2744
2929
  def union_all(self) -> Polygon | MultiPolygon:
2745
2930
  """(Multi)Polygon representing the union of all image bounds."""
@@ -2796,7 +2981,6 @@ class ImageCollection(_ImageBase):
2796
2981
  if "date" in x_var and subcollection._should_be_sorted:
2797
2982
  subcollection._images = list(sorted(subcollection._images))
2798
2983
 
2799
- y = np.array([band.values for img in subcollection for band in img])
2800
2984
  if "date" in x_var and subcollection._should_be_sorted:
2801
2985
  x = np.array(
2802
2986
  [
@@ -2813,120 +2997,35 @@ class ImageCollection(_ImageBase):
2813
2997
  - pd.Timestamp(np.min(x))
2814
2998
  ).days
2815
2999
  else:
2816
- x = np.arange(0, len(y))
2817
-
2818
- mask = np.array(
2819
- [
2820
- (
2821
- band.values.mask
2822
- if hasattr(band.values, "mask")
2823
- else np.full(band.values.shape, False)
2824
- )
2825
- for img in subcollection
2826
- for band in img
2827
- ]
3000
+ x = np.arange(0, sum(1 for img in subcollection for band in img))
3001
+
3002
+ subcollection.pixelwise(
3003
+ _plot_pixels_1d,
3004
+ kwargs=dict(
3005
+ alpha=alpha,
3006
+ x_var=x_var,
3007
+ y_label=y_label,
3008
+ rounding=rounding,
3009
+ first_date=first_date,
3010
+ figsize=figsize,
3011
+ ),
3012
+ index_aligned_kwargs=dict(x=x),
2828
3013
  )
2829
3014
 
2830
- if x_var == "days_since_start":
2831
- x = x - np.min(x)
2832
-
2833
- for i in range(y.shape[1]):
2834
- for j in range(y.shape[2]):
2835
- this_y = y[:, i, j]
2836
-
2837
- this_mask = mask[:, i, j]
2838
- this_x = x[~this_mask]
2839
- this_y = this_y[~this_mask]
2840
-
2841
- if ylim:
2842
- condition = (this_y >= ylim[0]) & (this_y <= ylim[1])
2843
- this_y = this_y[condition]
2844
- this_x = this_x[condition]
2845
-
2846
- coef, intercept = np.linalg.lstsq(
2847
- np.vstack([this_x, np.ones(this_x.shape[0])]).T,
2848
- this_y,
2849
- rcond=None,
2850
- )[0]
2851
- predicted = np.array([intercept + coef * x for x in this_x])
2852
-
2853
- predicted_start = predicted[0]
2854
- predicted_end = predicted[-1]
2855
- predicted_change = predicted_end - predicted_start
2856
-
2857
- # Degrees of freedom
2858
- dof = len(this_x) - 2
2859
-
2860
- # 95% confidence interval
2861
- t_val = stats.t.ppf(1 - alpha / 2, dof)
2862
-
2863
- # Mean squared error of the residuals
2864
- mse = np.sum((this_y - predicted) ** 2) / dof
2865
-
2866
- # Calculate the standard error of predictions
2867
- pred_stderr = np.sqrt(
2868
- mse
2869
- * (
2870
- 1 / len(this_x)
2871
- + (this_x - np.mean(this_x)) ** 2
2872
- / np.sum((this_x - np.mean(this_x)) ** 2)
2873
- )
2874
- )
2875
-
2876
- # Calculate the confidence interval for predictions
2877
- ci_lower = predicted - t_val * pred_stderr
2878
- ci_upper = predicted + t_val * pred_stderr
2879
-
2880
- fig = plt.figure(figsize=figsize)
2881
- ax = fig.add_subplot(1, 1, 1)
2882
-
2883
- ax.scatter(this_x, this_y, color="#2c93db")
2884
- ax.plot(this_x, predicted, color="#e0436b")
2885
- ax.fill_between(
2886
- this_x,
2887
- ci_lower,
2888
- ci_upper,
2889
- color="#e0436b",
2890
- alpha=0.2,
2891
- label=f"{int(alpha*100)}% CI",
2892
- )
2893
- plt.title(
2894
- f"coef: {round(coef, int(np.log(1 / abs(coef))))}, "
2895
- f"pred change: {round(predicted_change, rounding)}, "
2896
- f"pred start: {round(predicted_start, rounding)}, "
2897
- f"pred end: {round(predicted_end, rounding)}"
2898
- )
2899
- plt.xlabel(x_var)
2900
- plt.ylabel(y_label)
2901
-
2902
- if x_var == "date":
2903
- date_labels = pd.to_datetime(
2904
- [first_date + pd.Timedelta(days=int(day)) for day in this_x]
2905
- )
2906
-
2907
- _, unique_indices = np.unique(
2908
- date_labels.strftime("%Y-%m"), return_index=True
2909
- )
2910
-
2911
- unique_x = np.array(this_x)[unique_indices]
2912
- unique_labels = date_labels[unique_indices].strftime("%Y-%m")
2913
-
2914
- ax.set_xticks(unique_x)
2915
- ax.set_xticklabels(unique_labels, rotation=45, ha="right")
2916
- # ax.tick_params(axis="x", length=10, width=2)
2917
-
2918
- plt.show()
2919
-
2920
-
2921
- def _get_all_regex_matches(xml_file: str, regexes: tuple[str]) -> tuple[str]:
2922
- for regex in regexes:
2923
- try:
2924
- return re.search(regex, xml_file)
2925
- except (TypeError, AttributeError):
2926
- continue
2927
- raise ValueError(
2928
- f"Could not find processing_baseline info from {regexes} in {xml_file}"
2929
- )
3015
+ def __repr__(self) -> str:
3016
+ """String representation."""
3017
+ root = ""
3018
+ if self.path is not None:
3019
+ data = f"'{self.path}'"
3020
+ elif all(img.path is not None for img in self):
3021
+ data = [img.path for img in self]
3022
+ parents = {str(Path(path).parent) for path in data}
3023
+ if len(parents) == 1:
3024
+ data = [Path(path).name for path in data]
3025
+ root = f" root='{next(iter(parents))}',"
3026
+ else:
3027
+ data = [img for img in self]
3028
+ return f"{self.__class__.__name__}({data},{root} res={self.res}, level='{self.level}')"
2930
3029
 
2931
3030
 
2932
3031
  class Sentinel2Config:
@@ -2984,14 +3083,14 @@ class Sentinel2Config:
2984
3083
  xml_file,
2985
3084
  )
2986
3085
  if match_ is None:
2987
- raise _RegexError()
3086
+ return None
2988
3087
 
2989
3088
  if "NOT_REFINED" in match_.group(0):
2990
3089
  return False
2991
3090
  elif "REFINED" in match_.group(0):
2992
3091
  return True
2993
3092
  else:
2994
- raise _RegexError()
3093
+ raise _RegexError(xml_file)
2995
3094
 
2996
3095
  def _get_boa_quantification_value(self, xml_file: str) -> int:
2997
3096
  return int(
@@ -3040,9 +3139,6 @@ class Sentinel2Band(Sentinel2Config, Band):
3040
3139
  }
3041
3140
 
3042
3141
  def _get_boa_add_offset_dict(self, xml_file: str) -> int | None:
3043
- if self.is_mask:
3044
- return None
3045
-
3046
3142
  pat = re.compile(
3047
3143
  r"""
3048
3144
  <BOA_ADD_OFFSET\s*
@@ -3058,7 +3154,7 @@ class Sentinel2Band(Sentinel2Config, Band):
3058
3154
  except (TypeError, AttributeError, KeyError) as e:
3059
3155
  raise _RegexError(f"Could not find boa_add_offset info from {pat}") from e
3060
3156
  if not matches:
3061
- raise _RegexError(f"Could not find boa_add_offset info from {pat}")
3157
+ return None
3062
3158
 
3063
3159
  dict_ = (
3064
3160
  pd.DataFrame(matches).set_index("band_id")["value"].astype(int).to_dict()
@@ -3121,7 +3217,7 @@ class Sentinel2Collection(Sentinel2Config, ImageCollection):
3121
3217
  def __init__(self, data: str | Path | Sequence[Image], **kwargs) -> None:
3122
3218
  """ImageCollection with Sentinel2 specific name variables and path regexes."""
3123
3219
  level = kwargs.get("level", None_)
3124
- if callable(level) and isinstance(level(), None_):
3220
+ if callable(level) and level() is None:
3125
3221
  raise ValueError("Must specify level for Sentinel2Collection.")
3126
3222
  super().__init__(data=data, **kwargs)
3127
3223
 
@@ -3146,10 +3242,7 @@ class Sentinel2CloudlessCollection(Sentinel2CloudlessConfig, ImageCollection):
3146
3242
 
3147
3243
 
3148
3244
  def concat_image_collections(collections: Sequence[ImageCollection]) -> ImageCollection:
3149
- """Union multiple ImageCollections together.
3150
-
3151
- Same as using the union operator |.
3152
- """
3245
+ """Concatenate ImageCollections."""
3153
3246
  resolutions = {x.res for x in collections}
3154
3247
  if len(resolutions) > 1:
3155
3248
  raise ValueError(f"resoultion mismatch. {resolutions}")
@@ -3185,8 +3278,10 @@ def _get_gradient(band: Band, degrees: bool = False, copy: bool = True) -> Band:
3185
3278
  raise ValueError("array must be 2 or 3 dimensional")
3186
3279
 
3187
3280
 
3188
- def _slope_2d(array: np.ndarray, res: int, degrees: int) -> np.ndarray:
3189
- gradient_x, gradient_y = np.gradient(array, res, res)
3281
+ def _slope_2d(array: np.ndarray, res: int | tuple[int], degrees: int) -> np.ndarray:
3282
+ resx, resy = _res_as_tuple(res)
3283
+
3284
+ gradient_x, gradient_y = np.gradient(array, resx, resy)
3190
3285
 
3191
3286
  gradient = abs(gradient_x) + abs(gradient_y)
3192
3287
 
@@ -3273,7 +3368,7 @@ def _get_images(
3273
3368
  return images
3274
3369
 
3275
3370
 
3276
- class ArrayNotLoadedError(ValueError):
3371
+ class _ArrayNotLoadedError(ValueError):
3277
3372
  """Arrays are not loaded."""
3278
3373
 
3279
3374
 
@@ -3351,18 +3446,22 @@ def _intesects(x, other) -> bool:
3351
3446
 
3352
3447
 
3353
3448
  def _copy_and_add_df_parallel(
3354
- i: tuple[Any, ...], group: pd.DataFrame, self: ImageCollection
3449
+ group_values: tuple[Any, ...],
3450
+ group_df: pd.DataFrame,
3451
+ self: ImageCollection,
3452
+ copy: bool,
3355
3453
  ) -> tuple[tuple[Any], ImageCollection]:
3356
- copied = self.copy()
3454
+ copied = self.copy() if copy else self
3357
3455
  copied.images = [
3358
- img.copy() for img in group.drop_duplicates("_image_idx")["_image_instance"]
3456
+ img.copy() if copy else img
3457
+ for img in group_df.drop_duplicates("_image_idx")["_image_instance"]
3359
3458
  ]
3360
- if "band_id" in group:
3361
- band_ids = set(group["band_id"].values)
3459
+ if "band_id" in group_df:
3460
+ band_ids = set(group_df["band_id"].values)
3362
3461
  for img in copied.images:
3363
3462
  img._bands = [band for band in img if band.band_id in band_ids]
3364
3463
 
3365
- return (i, copied)
3464
+ return (group_values, copied)
3366
3465
 
3367
3466
 
3368
3467
  def _get_bounds(bounds, bbox, band_bounds: Polygon) -> None | Polygon:
@@ -3388,15 +3487,37 @@ def _open_raster(path: str | Path) -> rasterio.io.DatasetReader:
3388
3487
  return rasterio.open(file)
3389
3488
 
3390
3489
 
3391
- def _load_band(band: Band, **kwargs) -> None:
3490
+ def _read_mask_array(self: Band | Image, **kwargs) -> np.ndarray:
3491
+ mask_band_id = self.masking["band_id"]
3492
+ mask_paths = [path for path in self._all_file_paths if mask_band_id in path]
3493
+ if len(mask_paths) > 1:
3494
+ raise ValueError(
3495
+ f"Multiple file_paths match mask band_id {mask_band_id} for {self.path}"
3496
+ )
3497
+ elif not mask_paths:
3498
+ raise ValueError(
3499
+ f"No file_paths match mask band_id {mask_band_id} for {self.path} among "
3500
+ + str([Path(x).name for x in _ls_func(self.path)])
3501
+ )
3502
+
3503
+ band = Band(
3504
+ next(iter(mask_paths)),
3505
+ **{**self._common_init_kwargs, "metadata": None},
3506
+ )
3507
+ band.load(**kwargs)
3508
+ boolean_mask = np.isin(band.values, list(self.masking["values"]))
3509
+ return boolean_mask
3510
+
3511
+
3512
+ def _load_band(band: Band, **kwargs) -> Band:
3392
3513
  return band.load(**kwargs)
3393
3514
 
3394
3515
 
3395
- def _band_apply(band: Band, func: Callable, **kwargs) -> None:
3516
+ def _band_apply(band: Band, func: Callable, **kwargs) -> Band:
3396
3517
  return band.apply(func, **kwargs)
3397
3518
 
3398
3519
 
3399
- def _clip_band(band: Band, mask, **kwargs) -> None:
3520
+ def _clip_band(band: Band, mask, **kwargs) -> Band:
3400
3521
  return band.clip(mask, **kwargs)
3401
3522
 
3402
3523
 
@@ -3441,126 +3562,120 @@ def array_buffer(arr: np.ndarray, distance: int) -> np.ndarray:
3441
3562
  return binary_erosion(arr, structure=structure).astype(dtype)
3442
3563
 
3443
3564
 
3444
- def get_cmap(arr: np.ndarray) -> LinearSegmentedColormap:
3565
+ def _plot_pixels_1d(
3566
+ y: np.ndarray,
3567
+ x: np.ndarray,
3568
+ alpha: float,
3569
+ x_var: str,
3570
+ y_label: str,
3571
+ rounding: int,
3572
+ figsize: tuple,
3573
+ first_date: pd.Timestamp,
3574
+ ) -> None:
3575
+ coef, intercept = np.linalg.lstsq(
3576
+ np.vstack([x, np.ones(x.shape[0])]).T,
3577
+ y,
3578
+ rcond=None,
3579
+ )[0]
3580
+ predicted = np.array([intercept + coef * x for x in x])
3581
+
3582
+ predicted_start = predicted[0]
3583
+ predicted_end = predicted[-1]
3584
+ predicted_change = predicted_end - predicted_start
3585
+
3586
+ # Degrees of freedom
3587
+ dof = len(x) - 2
3588
+
3589
+ # 95% confidence interval
3590
+ t_val = stats.t.ppf(1 - alpha / 2, dof)
3591
+
3592
+ # Mean squared error of the residuals
3593
+ mse = np.sum((y - predicted) ** 2) / dof
3594
+
3595
+ # Calculate the standard error of predictions
3596
+ pred_stderr = np.sqrt(
3597
+ mse * (1 / len(x) + (x - np.mean(x)) ** 2 / np.sum((x - np.mean(x)) ** 2))
3598
+ )
3445
3599
 
3446
- # blue = [[i / 10 + 0.1, i / 10 + 0.1, 1 - (i / 10) + 0.1] for i in range(11)][1:]
3447
- blue = [
3448
- [0.1, 0.1, 1.0],
3449
- [0.2, 0.2, 0.9],
3450
- [0.3, 0.3, 0.8],
3451
- [0.4, 0.4, 0.7],
3452
- [0.6, 0.6, 0.6],
3453
- [0.6, 0.6, 0.6],
3454
- [0.7, 0.7, 0.7],
3455
- [0.8, 0.8, 0.8],
3456
- ]
3457
- # gray = list(reversed([[i / 10 - 0.1, i / 10, i / 10 - 0.1] for i in range(11)][1:]))
3458
- gray = [
3459
- [0.6, 0.6, 0.6],
3460
- [0.6, 0.6, 0.6],
3461
- [0.6, 0.6, 0.6],
3462
- [0.6, 0.6, 0.6],
3463
- [0.6, 0.6, 0.6],
3464
- [0.4, 0.7, 0.4],
3465
- [0.3, 0.7, 0.3],
3466
- [0.2, 0.8, 0.2],
3467
- ]
3468
- # gray = [[0.6, 0.6, 0.6] for i in range(10)]
3469
- # green = [[0.2 + i/20, i / 10 - 0.1, + i/20] for i in range(11)][1:]
3470
- green = [
3471
- [0.25, 0.0, 0.05],
3472
- [0.3, 0.1, 0.1],
3473
- [0.35, 0.2, 0.15],
3474
- [0.4, 0.3, 0.2],
3475
- [0.45, 0.4, 0.25],
3476
- [0.5, 0.5, 0.3],
3477
- [0.55, 0.6, 0.35],
3478
- [0.7, 0.9, 0.5],
3479
- ]
3480
- green = [
3481
- [0.6, 0.6, 0.6],
3482
- [0.4, 0.7, 0.4],
3483
- [0.3, 0.8, 0.3],
3484
- [0.25, 0.4, 0.25],
3485
- [0.2, 0.5, 0.2],
3486
- [0.10, 0.7, 0.10],
3487
- [0, 0.9, 0],
3488
- ]
3600
+ # Calculate the confidence interval for predictions
3601
+ ci_lower = predicted - t_val * pred_stderr
3602
+ ci_upper = predicted + t_val * pred_stderr
3603
+
3604
+ fig = plt.figure(figsize=figsize)
3605
+ ax = fig.add_subplot(1, 1, 1)
3606
+
3607
+ ax.scatter(x, y, color="#2c93db")
3608
+ ax.plot(x, predicted, color="#e0436b")
3609
+ ax.fill_between(
3610
+ x,
3611
+ ci_lower,
3612
+ ci_upper,
3613
+ color="#e0436b",
3614
+ alpha=0.2,
3615
+ label=f"{int(alpha*100)}% CI",
3616
+ )
3617
+ plt.title(
3618
+ f"coef: {round(coef, int(np.log(1 / abs(coef))))}, "
3619
+ f"pred change: {round(predicted_change, rounding)}, "
3620
+ f"pred start: {round(predicted_start, rounding)}, "
3621
+ f"pred end: {round(predicted_end, rounding)}"
3622
+ )
3623
+ plt.xlabel(x_var)
3624
+ plt.ylabel(y_label)
3489
3625
 
3490
- def get_start(arr):
3491
- min_value = np.min(arr)
3492
- if min_value < -0.75:
3493
- return 0
3494
- if min_value < -0.5:
3495
- return 1
3496
- if min_value < -0.25:
3497
- return 2
3498
- if min_value < 0:
3499
- return 3
3500
- if min_value < 0.25:
3501
- return 4
3502
- if min_value < 0.5:
3503
- return 5
3504
- if min_value < 0.75:
3505
- return 6
3506
- return 7
3507
-
3508
- def get_stop(arr):
3509
- max_value = np.max(arr)
3510
- if max_value <= 0.05:
3511
- return 0
3512
- if max_value < 0.175:
3513
- return 1
3514
- if max_value < 0.25:
3515
- return 2
3516
- if max_value < 0.375:
3517
- return 3
3518
- if max_value < 0.5:
3519
- return 4
3520
- if max_value < 0.75:
3521
- return 5
3522
- return 6
3523
-
3524
- cmap_name = "blue_gray_green"
3525
-
3526
- start = get_start(arr)
3527
- stop = get_stop(arr)
3528
- blue = blue[start]
3529
- gray = gray[start]
3530
- # green = green[start]
3531
- green = green[stop]
3532
-
3533
- # green[0] = np.arange(0, 1, 0.1)[::-1][stop]
3534
- # green[1] = np.arange(0, 1, 0.1)[stop]
3535
- # green[2] = np.arange(0, 1, 0.1)[::-1][stop]
3536
-
3537
- print(green)
3538
- print(start, stop)
3539
- print("blue gray green")
3540
- print(blue)
3541
- print(gray)
3542
- print(green)
3543
-
3544
- # Define the segments of the colormap
3545
- cdict = {
3546
- "red": [
3547
- (0.0, blue[0], blue[0]),
3548
- (0.3, gray[0], gray[0]),
3549
- (0.7, gray[0], gray[0]),
3550
- (1.0, green[0], green[0]),
3551
- ],
3552
- "green": [
3553
- (0.0, blue[1], blue[1]),
3554
- (0.3, gray[1], gray[1]),
3555
- (0.7, gray[1], gray[1]),
3556
- (1.0, green[1], green[1]),
3557
- ],
3558
- "blue": [
3559
- (0.0, blue[2], blue[2]),
3560
- (0.3, gray[2], gray[2]),
3561
- (0.7, gray[2], gray[2]),
3562
- (1.0, green[2], green[2]),
3563
- ],
3564
- }
3626
+ if x_var == "date":
3627
+ date_labels = pd.to_datetime(
3628
+ [first_date + pd.Timedelta(days=int(day)) for day in x]
3629
+ )
3630
+
3631
+ _, unique_indices = np.unique(date_labels.strftime("%Y-%m"), return_index=True)
3632
+
3633
+ unique_x = np.array(x)[unique_indices]
3634
+ unique_labels = date_labels[unique_indices].strftime("%Y-%m")
3635
+
3636
+ ax.set_xticks(unique_x)
3637
+ ax.set_xticklabels(unique_labels, rotation=45, ha="right")
3638
+
3639
+ plt.show()
3640
+
3641
+
3642
+ def pixelwise(
3643
+ func: Callable,
3644
+ values: np.ndarray,
3645
+ mask_array: np.ndarray | None = None,
3646
+ index_aligned_kwargs: dict | None = None,
3647
+ kwargs: dict | None = None,
3648
+ processes: int = 1,
3649
+ ) -> tuple[np.ndarray, np.ndarray, list[Any]]:
3650
+ """Run a function for each pixel of a 3d array."""
3651
+ index_aligned_kwargs = index_aligned_kwargs or {}
3652
+ kwargs = kwargs or {}
3653
+
3654
+ if mask_array is not None:
3655
+ # skip pixels where all values are masked
3656
+ not_all_missing = np.all(mask_array, axis=0) == False
3657
+ else:
3658
+ mask_array = np.full(values.shape, False)
3659
+ not_all_missing = np.full(values.shape[1:], True)
3660
+
3661
+ def select_pixel_values(row: int, col: int) -> np.ndarray:
3662
+ return values[~mask_array[:, row, col], row, col]
3663
+
3664
+ # loop through long 1d arrays of aligned row and col indices
3665
+ nonmissing_row_indices, nonmissing_col_indices = not_all_missing.nonzero()
3666
+ with joblib.Parallel(n_jobs=processes, backend="loky") as parallel:
3667
+ results: list[Any] = parallel(
3668
+ joblib.delayed(func)(
3669
+ select_pixel_values(row, col),
3670
+ **kwargs,
3671
+ **{
3672
+ key: value[~mask_array[:, row, col]]
3673
+ for key, value in index_aligned_kwargs.items()
3674
+ },
3675
+ )
3676
+ for row, col in (
3677
+ zip(nonmissing_row_indices, nonmissing_col_indices, strict=True)
3678
+ )
3679
+ )
3565
3680
 
3566
- return LinearSegmentedColormap(cmap_name, segmentdata=cdict, N=50)
3681
+ return nonmissing_row_indices, nonmissing_col_indices, results