ssb-sgis 1.0.8__py3-none-any.whl → 1.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sgis/geopandas_tools/conversion.py +6 -5
- sgis/io/dapla_functions.py +2 -0
- sgis/io/opener.py +2 -0
- sgis/maps/explore.py +18 -8
- sgis/maps/legend.py +3 -1
- sgis/maps/map.py +4 -0
- sgis/maps/thematicmap.py +53 -26
- sgis/raster/base.py +60 -23
- sgis/raster/image_collection.py +702 -652
- sgis/raster/regex.py +2 -2
- sgis/raster/zonal.py +1 -58
- {ssb_sgis-1.0.8.dist-info → ssb_sgis-1.0.9.dist-info}/METADATA +1 -2
- {ssb_sgis-1.0.8.dist-info → ssb_sgis-1.0.9.dist-info}/RECORD +15 -15
- {ssb_sgis-1.0.8.dist-info → ssb_sgis-1.0.9.dist-info}/LICENSE +0 -0
- {ssb_sgis-1.0.8.dist-info → ssb_sgis-1.0.9.dist-info}/WHEEL +0 -0
sgis/raster/image_collection.py
CHANGED
|
@@ -6,6 +6,7 @@ import os
|
|
|
6
6
|
import random
|
|
7
7
|
import re
|
|
8
8
|
import time
|
|
9
|
+
from abc import abstractmethod
|
|
9
10
|
from collections.abc import Callable
|
|
10
11
|
from collections.abc import Iterable
|
|
11
12
|
from collections.abc import Iterator
|
|
@@ -26,7 +27,6 @@ import rasterio
|
|
|
26
27
|
from affine import Affine
|
|
27
28
|
from geopandas import GeoDataFrame
|
|
28
29
|
from geopandas import GeoSeries
|
|
29
|
-
from matplotlib.colors import LinearSegmentedColormap
|
|
30
30
|
from pandas.api.types import is_dict_like
|
|
31
31
|
from rasterio.enums import MergeAlg
|
|
32
32
|
from scipy import stats
|
|
@@ -41,11 +41,8 @@ from shapely.geometry import Polygon
|
|
|
41
41
|
|
|
42
42
|
try:
|
|
43
43
|
import dapla as dp
|
|
44
|
-
from dapla.gcs import GCSFileSystem
|
|
45
44
|
except ImportError:
|
|
46
|
-
|
|
47
|
-
class GCSFileSystem:
|
|
48
|
-
"""Placeholder."""
|
|
45
|
+
pass
|
|
49
46
|
|
|
50
47
|
|
|
51
48
|
try:
|
|
@@ -55,7 +52,7 @@ except ImportError:
|
|
|
55
52
|
class exceptions:
|
|
56
53
|
"""Placeholder."""
|
|
57
54
|
|
|
58
|
-
class RefreshError:
|
|
55
|
+
class RefreshError(Exception):
|
|
59
56
|
"""Placeholder."""
|
|
60
57
|
|
|
61
58
|
|
|
@@ -74,9 +71,9 @@ try:
|
|
|
74
71
|
except ImportError:
|
|
75
72
|
pass
|
|
76
73
|
try:
|
|
77
|
-
import xarray as xr
|
|
78
74
|
from xarray import DataArray
|
|
79
75
|
from xarray import Dataset
|
|
76
|
+
from xarray import combine_by_coords
|
|
80
77
|
except ImportError:
|
|
81
78
|
|
|
82
79
|
class DataArray:
|
|
@@ -85,6 +82,9 @@ except ImportError:
|
|
|
85
82
|
class Dataset:
|
|
86
83
|
"""Placeholder."""
|
|
87
84
|
|
|
85
|
+
def combine_by_coords(*args, **kwargs) -> None:
|
|
86
|
+
raise ImportError("xarray")
|
|
87
|
+
|
|
88
88
|
|
|
89
89
|
from ..geopandas_tools.bounds import get_total_bounds
|
|
90
90
|
from ..geopandas_tools.conversion import to_bbox
|
|
@@ -102,8 +102,10 @@ from ..io.opener import opener
|
|
|
102
102
|
from . import sentinel_config as config
|
|
103
103
|
from .base import _array_to_geojson
|
|
104
104
|
from .base import _gdf_to_arr
|
|
105
|
+
from .base import _get_res_from_bounds
|
|
105
106
|
from .base import _get_shape_from_bounds
|
|
106
107
|
from .base import _get_transform_from_bounds
|
|
108
|
+
from .base import _res_as_tuple
|
|
107
109
|
from .base import get_index_mapper
|
|
108
110
|
from .indices import ndvi
|
|
109
111
|
from .regex import _extract_regex_match_from_string
|
|
@@ -142,8 +144,6 @@ DATE_RANGES_TYPE = (
|
|
|
142
144
|
| tuple[tuple[str | pd.Timestamp | None, str | pd.Timestamp | None], ...]
|
|
143
145
|
)
|
|
144
146
|
|
|
145
|
-
FILENAME_COL_SUFFIX = "_filename"
|
|
146
|
-
|
|
147
147
|
DEFAULT_FILENAME_REGEX = r"""
|
|
148
148
|
.*?
|
|
149
149
|
(?:_?(?P<date>\d{8}(?:T\d{6})?))? # Optional underscore and date group
|
|
@@ -163,13 +163,12 @@ ALLOWED_INIT_KWARGS = [
|
|
|
163
163
|
"filename_regexes",
|
|
164
164
|
"all_bands",
|
|
165
165
|
"crs",
|
|
166
|
-
"backend",
|
|
167
166
|
"masking",
|
|
168
167
|
"_merged",
|
|
169
168
|
"date",
|
|
170
169
|
]
|
|
171
170
|
|
|
172
|
-
|
|
171
|
+
_LOAD_COUNTER: int = 0
|
|
173
172
|
|
|
174
173
|
|
|
175
174
|
def _get_child_paths_threaded(data: Sequence[str]) -> set[str]:
|
|
@@ -196,7 +195,7 @@ class ImageCollectionGroupBy:
|
|
|
196
195
|
Args:
|
|
197
196
|
data: Iterable of group values and ImageCollection groups.
|
|
198
197
|
by: list of group attributes.
|
|
199
|
-
collection: ImageCollection
|
|
198
|
+
collection: Ungrouped ImageCollection. Used to pass attributes to outputs.
|
|
200
199
|
"""
|
|
201
200
|
self.data = list(data)
|
|
202
201
|
self.by = by
|
|
@@ -291,7 +290,7 @@ class ImageCollectionGroupBy:
|
|
|
291
290
|
|
|
292
291
|
def __repr__(self) -> str:
|
|
293
292
|
"""String representation."""
|
|
294
|
-
return f"{self.__class__.__name__}({len(self)})"
|
|
293
|
+
return f"{self.__class__.__name__}({len(self)}, by={self.by})"
|
|
295
294
|
|
|
296
295
|
|
|
297
296
|
@dataclass(frozen=True)
|
|
@@ -307,7 +306,11 @@ class BandMasking:
|
|
|
307
306
|
|
|
308
307
|
|
|
309
308
|
class None_:
|
|
310
|
-
"""Default
|
|
309
|
+
"""Default None for args that are not allowed to be None."""
|
|
310
|
+
|
|
311
|
+
def __new__(cls) -> None:
|
|
312
|
+
"""Always returns None."""
|
|
313
|
+
return None
|
|
311
314
|
|
|
312
315
|
|
|
313
316
|
class _ImageBase:
|
|
@@ -318,18 +321,16 @@ class _ImageBase:
|
|
|
318
321
|
|
|
319
322
|
def __init__(self, *, metadata=None, bbox=None, **kwargs) -> None:
|
|
320
323
|
|
|
321
|
-
self._mask = None
|
|
322
324
|
self._bounds = None
|
|
323
|
-
self._merged = False
|
|
324
|
-
self._from_array = False
|
|
325
|
-
self._from_geopandas = False
|
|
326
|
-
self.metadata_attributes = self.metadata_attributes or {}
|
|
327
325
|
self._path = None
|
|
328
|
-
self._metadata_from_xml = False
|
|
329
|
-
|
|
330
326
|
self._bbox = to_bbox(bbox) if bbox is not None else None
|
|
331
327
|
|
|
332
|
-
self.
|
|
328
|
+
self.metadata_attributes = self.metadata_attributes or {}
|
|
329
|
+
|
|
330
|
+
if metadata is not None:
|
|
331
|
+
self.metadata = self._metadata_to_nested_dict(metadata)
|
|
332
|
+
else:
|
|
333
|
+
self.metadata = {}
|
|
333
334
|
|
|
334
335
|
self.image_patterns = self._compile_regexes("image_regexes")
|
|
335
336
|
self.filename_patterns = self._compile_regexes("filename_regexes")
|
|
@@ -339,29 +340,45 @@ class _ImageBase:
|
|
|
339
340
|
f"{self.__class__.__name__} got an unexpected keyword argument '{key}'"
|
|
340
341
|
)
|
|
341
342
|
if key in ALLOWED_INIT_KWARGS and key in dir(self):
|
|
342
|
-
|
|
343
|
-
setattr(self, f"_{key}", value)
|
|
344
|
-
elif is_method(self, key):
|
|
345
|
-
raise error_obj
|
|
346
|
-
else:
|
|
347
|
-
setattr(self, key, value)
|
|
343
|
+
self._safe_setattr(key, value, error_obj)
|
|
348
344
|
else:
|
|
349
345
|
raise error_obj
|
|
350
346
|
|
|
347
|
+
# attributes for debugging
|
|
348
|
+
self._metadata_from_xml = False
|
|
349
|
+
self._merged = False
|
|
350
|
+
self._from_array = False
|
|
351
|
+
self._from_geopandas = False
|
|
352
|
+
|
|
353
|
+
def _safe_setattr(
|
|
354
|
+
self, key: str, value: Any, error_obj: Exception | None = None
|
|
355
|
+
) -> None:
|
|
356
|
+
if is_property(self, key):
|
|
357
|
+
setattr(self, f"_{key}", value)
|
|
358
|
+
elif is_method(self, key):
|
|
359
|
+
if error_obj is None:
|
|
360
|
+
raise AttributeError(f"Cannot set method '{key}'.")
|
|
361
|
+
raise error_obj
|
|
362
|
+
else:
|
|
363
|
+
setattr(self, key, value)
|
|
364
|
+
|
|
351
365
|
def _compile_regexes(self, regex_attr: str) -> tuple[re.Pattern]:
|
|
352
|
-
regexes = getattr(self, regex_attr)
|
|
353
|
-
if regexes:
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
return ()
|
|
366
|
+
regexes: tuple[str] | str = getattr(self, regex_attr)
|
|
367
|
+
if not regexes:
|
|
368
|
+
return ()
|
|
369
|
+
if isinstance(regexes, str):
|
|
370
|
+
regexes = (regexes,)
|
|
371
|
+
return tuple(re.compile(regexes, flags=re.VERBOSE) for regexes in regexes)
|
|
358
372
|
|
|
359
373
|
@staticmethod
|
|
360
374
|
def _metadata_to_nested_dict(
|
|
361
375
|
metadata: str | Path | os.PathLike | dict | pd.DataFrame | None,
|
|
362
|
-
) -> dict[str, dict[str, Any]]
|
|
363
|
-
|
|
364
|
-
|
|
376
|
+
) -> dict[str, dict[str, Any]]:
|
|
377
|
+
"""Construct metadata dict from dictlike, DataFrame or file path.
|
|
378
|
+
|
|
379
|
+
Extract metadata value:
|
|
380
|
+
>>> self.metadata[self.path]['cloud_cover_percentage']
|
|
381
|
+
"""
|
|
365
382
|
if isinstance(metadata, (str | Path | os.PathLike)):
|
|
366
383
|
metadata = _read_parquet_func(metadata)
|
|
367
384
|
|
|
@@ -376,15 +393,16 @@ class _ImageBase:
|
|
|
376
393
|
return x if not (is_scalar(x) and pd.isna(x)) else None
|
|
377
394
|
|
|
378
395
|
# to nested dict because pandas indexing gives rare KeyError with long strings
|
|
379
|
-
|
|
396
|
+
return {
|
|
380
397
|
_fix_path(path): {
|
|
381
398
|
attr: na_to_none(value) for attr, value in row.items()
|
|
382
399
|
}
|
|
383
400
|
for path, row in metadata.iterrows()
|
|
384
401
|
}
|
|
385
402
|
elif is_dict_like(metadata):
|
|
386
|
-
|
|
403
|
+
return {_fix_path(path): value for path, value in metadata.items()}
|
|
387
404
|
|
|
405
|
+
# try to allow custom types with dict-like indexing
|
|
388
406
|
return metadata
|
|
389
407
|
|
|
390
408
|
@property
|
|
@@ -394,7 +412,6 @@ class _ImageBase:
|
|
|
394
412
|
"res": self.res,
|
|
395
413
|
"bbox": self._bbox,
|
|
396
414
|
"nodata": self.nodata,
|
|
397
|
-
"backend": self.backend,
|
|
398
415
|
"metadata": self.metadata,
|
|
399
416
|
}
|
|
400
417
|
|
|
@@ -408,19 +425,22 @@ class _ImageBase:
|
|
|
408
425
|
@property
|
|
409
426
|
def res(self) -> int:
|
|
410
427
|
"""Pixel resolution."""
|
|
428
|
+
# if self._res is None:
|
|
429
|
+
# if self.has_array:
|
|
430
|
+
# self._res = _get_res_from_bounds(self.bounds, self.values.shape)
|
|
431
|
+
# else:
|
|
432
|
+
# with opener(self.path) as file:
|
|
433
|
+
# with rasterio.open(file) as src:
|
|
434
|
+
# self._res = src.res
|
|
411
435
|
return self._res
|
|
412
436
|
|
|
413
|
-
@
|
|
414
|
-
def
|
|
415
|
-
|
|
416
|
-
return self.union_all().centroid
|
|
437
|
+
@abstractmethod
|
|
438
|
+
def union_all(self) -> Polygon | MultiPolygon:
|
|
439
|
+
pass
|
|
417
440
|
|
|
418
441
|
def assign(self, **kwargs) -> "_ImageBase":
|
|
419
442
|
for key, value in kwargs.items():
|
|
420
|
-
|
|
421
|
-
setattr(self, key, value)
|
|
422
|
-
except AttributeError:
|
|
423
|
-
setattr(self, f"_{key}", value)
|
|
443
|
+
self._safe_setattr(key, value)
|
|
424
444
|
return self
|
|
425
445
|
|
|
426
446
|
def _name_regex_searcher(
|
|
@@ -451,7 +471,10 @@ class _ImageBase:
|
|
|
451
471
|
)
|
|
452
472
|
|
|
453
473
|
def _create_metadata_df(self, file_paths: Sequence[str]) -> pd.DataFrame:
|
|
454
|
-
"""Create a dataframe with file paths and image paths that match regexes.
|
|
474
|
+
"""Create a dataframe with file paths and image paths that match regexes.
|
|
475
|
+
|
|
476
|
+
Used in __init__ to select relevant paths fast.
|
|
477
|
+
"""
|
|
455
478
|
df = pd.DataFrame({"file_path": list(file_paths)})
|
|
456
479
|
|
|
457
480
|
df["file_name"] = df["file_path"].apply(lambda x: Path(x).name)
|
|
@@ -518,12 +541,14 @@ class _ImageBase:
|
|
|
518
541
|
class _ImageBandBase(_ImageBase):
|
|
519
542
|
"""Common parent class of Image and Band."""
|
|
520
543
|
|
|
521
|
-
def intersects(
|
|
522
|
-
|
|
523
|
-
|
|
544
|
+
def intersects(
|
|
545
|
+
self, geometry: GeoDataFrame | GeoSeries | Geometry | tuple | _ImageBase
|
|
546
|
+
) -> bool:
|
|
547
|
+
if hasattr(geometry, "crs") and not pyproj.CRS(self.crs).equals(
|
|
548
|
+
pyproj.CRS(geometry.crs)
|
|
524
549
|
):
|
|
525
|
-
raise ValueError(f"crs mismatch: {self.crs} and {
|
|
526
|
-
return self.union_all().intersects(to_shapely(
|
|
550
|
+
raise ValueError(f"crs mismatch: {self.crs} and {geometry.crs}")
|
|
551
|
+
return self.union_all().intersects(to_shapely(geometry))
|
|
527
552
|
|
|
528
553
|
def union_all(self) -> Polygon:
|
|
529
554
|
try:
|
|
@@ -532,20 +557,21 @@ class _ImageBandBase(_ImageBase):
|
|
|
532
557
|
return Polygon()
|
|
533
558
|
|
|
534
559
|
@property
|
|
535
|
-
def
|
|
536
|
-
|
|
560
|
+
def centroid(self) -> Point:
|
|
561
|
+
"""Centerpoint of the object."""
|
|
562
|
+
return self.union_all().centroid
|
|
537
563
|
|
|
538
564
|
@property
|
|
539
565
|
def year(self) -> str:
|
|
540
566
|
if hasattr(self, "_year") and self._year:
|
|
541
567
|
return self._year
|
|
542
|
-
return self.date[:4]
|
|
568
|
+
return str(self.date)[:4]
|
|
543
569
|
|
|
544
570
|
@property
|
|
545
571
|
def month(self) -> str:
|
|
546
572
|
if hasattr(self, "_month") and self._month:
|
|
547
573
|
return self._month
|
|
548
|
-
return
|
|
574
|
+
return str(self.date).replace("-", "").replace("/", "")[4:6]
|
|
549
575
|
|
|
550
576
|
@property
|
|
551
577
|
def name(self) -> str | None:
|
|
@@ -572,24 +598,25 @@ class _ImageBandBase(_ImageBase):
|
|
|
572
598
|
return self._name_regex_searcher("level", self.image_patterns)
|
|
573
599
|
|
|
574
600
|
def _get_metadata_attributes(self, metadata_attributes: dict) -> dict:
|
|
575
|
-
|
|
601
|
+
"""Search through xml files for missing metadata attributes."""
|
|
576
602
|
self._metadata_from_xml = True
|
|
577
603
|
|
|
578
604
|
missing_metadata_attributes = {
|
|
579
|
-
|
|
580
|
-
for
|
|
581
|
-
if not hasattr(self,
|
|
605
|
+
attr: constructor_func
|
|
606
|
+
for attr, constructor_func in metadata_attributes.items()
|
|
607
|
+
if not hasattr(self, attr) or getattr(self, attr) is None
|
|
582
608
|
}
|
|
583
609
|
|
|
584
610
|
nonmissing_metadata_attributes = {
|
|
585
|
-
|
|
586
|
-
for
|
|
587
|
-
if
|
|
611
|
+
attr: getattr(self, attr)
|
|
612
|
+
for attr in metadata_attributes
|
|
613
|
+
if attr not in missing_metadata_attributes
|
|
588
614
|
}
|
|
589
615
|
|
|
590
616
|
if not missing_metadata_attributes:
|
|
591
617
|
return nonmissing_metadata_attributes
|
|
592
618
|
|
|
619
|
+
# read all xml content once
|
|
593
620
|
file_contents: list[str] = []
|
|
594
621
|
for path in self._all_file_paths:
|
|
595
622
|
if ".xml" not in path:
|
|
@@ -597,37 +624,40 @@ class _ImageBandBase(_ImageBase):
|
|
|
597
624
|
with _open_func(path, "rb") as file:
|
|
598
625
|
file_contents.append(file.read().decode("utf-8"))
|
|
599
626
|
|
|
600
|
-
|
|
627
|
+
def is_last_xml(i: int) -> bool:
|
|
628
|
+
return i == len(file_contents) - 1
|
|
629
|
+
|
|
630
|
+
for attr, value in missing_metadata_attributes.items():
|
|
601
631
|
results = None
|
|
602
|
-
for i,
|
|
632
|
+
for i, file_content in enumerate(file_contents):
|
|
603
633
|
if isinstance(value, str) and value in dir(self):
|
|
604
|
-
method
|
|
634
|
+
# method or a hardcoded value
|
|
635
|
+
value: Callable | Any = getattr(self, value)
|
|
636
|
+
|
|
637
|
+
if callable(value):
|
|
605
638
|
try:
|
|
606
|
-
results =
|
|
639
|
+
results = value(file_content)
|
|
607
640
|
except _RegexError as e:
|
|
608
|
-
if i
|
|
609
|
-
raise e
|
|
641
|
+
if is_last_xml(i):
|
|
642
|
+
raise e.__class__(self.path, e) from e
|
|
610
643
|
continue
|
|
611
644
|
if results is not None:
|
|
612
645
|
break
|
|
613
|
-
|
|
614
|
-
|
|
646
|
+
elif (
|
|
647
|
+
isinstance(value, str)
|
|
648
|
+
or hasattr(value, "__iter__")
|
|
649
|
+
and all(isinstance(x, str | re.Pattern) for x in value)
|
|
650
|
+
):
|
|
615
651
|
try:
|
|
616
|
-
results = value
|
|
652
|
+
results = _extract_regex_match_from_string(file_content, value)
|
|
617
653
|
except _RegexError as e:
|
|
618
|
-
if i
|
|
654
|
+
if is_last_xml(i):
|
|
619
655
|
raise e
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
try:
|
|
625
|
-
results = _extract_regex_match_from_string(filetext, value)
|
|
626
|
-
except _RegexError as e:
|
|
627
|
-
if i == len(self._all_file_paths) - 1:
|
|
628
|
-
raise e
|
|
656
|
+
elif value is not None:
|
|
657
|
+
results = value
|
|
658
|
+
break
|
|
629
659
|
|
|
630
|
-
missing_metadata_attributes[
|
|
660
|
+
missing_metadata_attributes[attr] = results
|
|
631
661
|
|
|
632
662
|
return missing_metadata_attributes | nonmissing_metadata_attributes
|
|
633
663
|
|
|
@@ -671,14 +701,15 @@ class Band(_ImageBandBase):
|
|
|
671
701
|
"""Band holding a single 2 dimensional array representing an image band."""
|
|
672
702
|
|
|
673
703
|
cmap: ClassVar[str | None] = None
|
|
674
|
-
backend: str = "numpy"
|
|
675
704
|
|
|
676
705
|
@classmethod
|
|
677
706
|
def from_geopandas(
|
|
678
707
|
cls,
|
|
679
708
|
gdf: GeoDataFrame | GeoSeries,
|
|
680
|
-
res: int,
|
|
681
709
|
*,
|
|
710
|
+
res: int | None = None,
|
|
711
|
+
out_shape: tuple[int, int] | None = None,
|
|
712
|
+
bounds: Any | None = None,
|
|
682
713
|
fill: int = 0,
|
|
683
714
|
all_touched: bool = False,
|
|
684
715
|
merge_alg: Callable = MergeAlg.replace,
|
|
@@ -687,17 +718,27 @@ class Band(_ImageBandBase):
|
|
|
687
718
|
**kwargs,
|
|
688
719
|
) -> None:
|
|
689
720
|
"""Create Band from a GeoDataFrame."""
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
res=res,
|
|
693
|
-
fill=fill,
|
|
694
|
-
all_touched=all_touched,
|
|
695
|
-
merge_alg=merge_alg,
|
|
696
|
-
default_value=default_value,
|
|
697
|
-
dtype=dtype,
|
|
698
|
-
)
|
|
721
|
+
if bounds is not None:
|
|
722
|
+
bounds = to_bbox(bounds)
|
|
699
723
|
|
|
700
|
-
|
|
724
|
+
if out_shape == (0,):
|
|
725
|
+
arr = np.array([])
|
|
726
|
+
else:
|
|
727
|
+
arr = _gdf_to_arr(
|
|
728
|
+
gdf,
|
|
729
|
+
res=res,
|
|
730
|
+
bounds=bounds,
|
|
731
|
+
fill=fill,
|
|
732
|
+
all_touched=all_touched,
|
|
733
|
+
merge_alg=merge_alg,
|
|
734
|
+
default_value=default_value,
|
|
735
|
+
dtype=dtype,
|
|
736
|
+
out_shape=out_shape,
|
|
737
|
+
)
|
|
738
|
+
if bounds is None:
|
|
739
|
+
bounds = gdf.total_bounds
|
|
740
|
+
|
|
741
|
+
obj = cls(arr, crs=gdf.crs, bounds=bounds, **kwargs)
|
|
701
742
|
obj._from_geopandas = True
|
|
702
743
|
return obj
|
|
703
744
|
|
|
@@ -717,9 +758,6 @@ class Band(_ImageBandBase):
|
|
|
717
758
|
**kwargs,
|
|
718
759
|
) -> None:
|
|
719
760
|
"""Band initialiser."""
|
|
720
|
-
if callable(res) and isinstance(res(), None_):
|
|
721
|
-
raise TypeError("Must specify 'res'")
|
|
722
|
-
|
|
723
761
|
if data is None:
|
|
724
762
|
# allowing 'path' to replace 'data' as argument
|
|
725
763
|
# to make the print repr. valid as initialiser
|
|
@@ -745,11 +783,20 @@ class Band(_ImageBandBase):
|
|
|
745
783
|
if isinstance(data, np.ndarray):
|
|
746
784
|
if self._bounds is None:
|
|
747
785
|
raise ValueError("Must specify bounds when data is an array.")
|
|
786
|
+
if not (res is None or (callable(res) and res() is None)):
|
|
787
|
+
# if not (res is None or (callable(res) and res() is None)) and _res_as_tuple(
|
|
788
|
+
# res
|
|
789
|
+
# ) != _get_res_from_bounds(self._bounds, data.shape):
|
|
790
|
+
raise ValueError(
|
|
791
|
+
f"Cannot specify 'res' when data is an array. {res} and {_get_res_from_bounds(self._bounds, data.shape)}"
|
|
792
|
+
)
|
|
748
793
|
self._crs = crs
|
|
749
794
|
self.transform = _get_transform_from_bounds(self._bounds, shape=data.shape)
|
|
750
795
|
self._from_array = True
|
|
751
796
|
self.values = data
|
|
752
797
|
|
|
798
|
+
self._res = _get_res_from_bounds(self._bounds, self.values.shape)
|
|
799
|
+
|
|
753
800
|
elif not isinstance(data, (str | Path | os.PathLike)):
|
|
754
801
|
raise TypeError(
|
|
755
802
|
"'data' must be string, Path-like or numpy.ndarray. "
|
|
@@ -757,8 +804,10 @@ class Band(_ImageBandBase):
|
|
|
757
804
|
)
|
|
758
805
|
else:
|
|
759
806
|
self._path = _fix_path(str(data))
|
|
807
|
+
if callable(res) and res() is None:
|
|
808
|
+
res = None
|
|
809
|
+
self._res = res
|
|
760
810
|
|
|
761
|
-
self._res = res
|
|
762
811
|
if cmap is not None:
|
|
763
812
|
self.cmap = cmap
|
|
764
813
|
self._name = name
|
|
@@ -786,7 +835,7 @@ class Band(_ImageBandBase):
|
|
|
786
835
|
else:
|
|
787
836
|
setattr(self, key, value)
|
|
788
837
|
|
|
789
|
-
elif self.metadata_attributes and self.path is not None
|
|
838
|
+
elif self.metadata_attributes and self.path is not None:
|
|
790
839
|
if self._all_file_paths is None:
|
|
791
840
|
self._all_file_paths = _get_all_file_paths(str(Path(self.path).parent))
|
|
792
841
|
for key, value in self._get_metadata_attributes(
|
|
@@ -798,43 +847,28 @@ class Band(_ImageBandBase):
|
|
|
798
847
|
"""Makes Bands sortable by band_id."""
|
|
799
848
|
return self.band_id < other.band_id
|
|
800
849
|
|
|
850
|
+
def value_counts(self) -> pd.Series:
|
|
851
|
+
"""Value count of each value of the band's array."""
|
|
852
|
+
try:
|
|
853
|
+
values = self.values.data[self.values.mask == False]
|
|
854
|
+
except AttributeError:
|
|
855
|
+
values = self.values
|
|
856
|
+
unique_values, counts = np.unique(values, return_counts=True)
|
|
857
|
+
return pd.Series(counts, index=unique_values).sort_values(ascending=False)
|
|
858
|
+
|
|
801
859
|
@property
|
|
802
860
|
def values(self) -> np.ndarray:
|
|
803
861
|
"""The numpy array, if loaded."""
|
|
804
862
|
if self._values is None:
|
|
805
|
-
raise
|
|
863
|
+
raise _ArrayNotLoadedError("array is not loaded.")
|
|
806
864
|
return self._values
|
|
807
865
|
|
|
808
866
|
@values.setter
|
|
809
867
|
def values(self, new_val):
|
|
810
|
-
if
|
|
811
|
-
self._values = new_val
|
|
812
|
-
return
|
|
813
|
-
elif self.backend == "xarray" and isinstance(new_val, DataArray):
|
|
814
|
-
# attrs can dissappear, so doing a union
|
|
815
|
-
attrs = self._values.attrs | new_val.attrs
|
|
868
|
+
if isinstance(new_val, np.ndarray):
|
|
816
869
|
self._values = new_val
|
|
817
|
-
|
|
818
|
-
return
|
|
819
|
-
|
|
820
|
-
if self.backend == "numpy":
|
|
870
|
+
else:
|
|
821
871
|
self._values = self._to_numpy(new_val)
|
|
822
|
-
if self.backend == "xarray":
|
|
823
|
-
if not isinstance(self._values, DataArray):
|
|
824
|
-
self._values = self._to_xarray(
|
|
825
|
-
new_val,
|
|
826
|
-
transform=self.transform,
|
|
827
|
-
)
|
|
828
|
-
|
|
829
|
-
elif isinstance(new_val, np.ndarray):
|
|
830
|
-
self._values.values = new_val
|
|
831
|
-
else:
|
|
832
|
-
self._values = new_val
|
|
833
|
-
|
|
834
|
-
@property
|
|
835
|
-
def mask(self) -> "Band":
|
|
836
|
-
"""Mask Band."""
|
|
837
|
-
return self._mask
|
|
838
872
|
|
|
839
873
|
@property
|
|
840
874
|
def band_id(self) -> str:
|
|
@@ -921,28 +955,39 @@ class Band(_ImageBandBase):
|
|
|
921
955
|
return df
|
|
922
956
|
|
|
923
957
|
def clip(
|
|
924
|
-
self,
|
|
958
|
+
self,
|
|
959
|
+
mask: GeoDataFrame | GeoSeries | Polygon | MultiPolygon,
|
|
925
960
|
) -> "Band":
|
|
926
|
-
"""Clip band values to geometry mask."""
|
|
961
|
+
"""Clip band values to geometry mask while preserving bounds."""
|
|
927
962
|
if not self.height or not self.width:
|
|
928
963
|
return self
|
|
929
964
|
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
965
|
+
fill: int = self.nodata or 0
|
|
966
|
+
|
|
967
|
+
mask_array: np.ndarray = Band.from_geopandas(
|
|
968
|
+
gdf=to_gdf(mask)[["geometry"]],
|
|
969
|
+
default_value=1,
|
|
970
|
+
fill=fill,
|
|
971
|
+
out_shape=self.values.shape,
|
|
972
|
+
bounds=mask,
|
|
973
|
+
).values
|
|
974
|
+
|
|
975
|
+
is_not_polygon = mask_array == fill
|
|
976
|
+
|
|
977
|
+
if isinstance(self.values, np.ma.core.MaskedArray):
|
|
978
|
+
self._values.mask |= is_not_polygon
|
|
979
|
+
else:
|
|
980
|
+
self._values = np.ma.array(
|
|
981
|
+
self.values, mask=is_not_polygon, fill_value=self.nodata
|
|
982
|
+
)
|
|
983
|
+
|
|
939
984
|
return self
|
|
940
985
|
|
|
941
986
|
def load(
|
|
942
987
|
self,
|
|
943
988
|
bounds: tuple | Geometry | GeoDataFrame | GeoSeries | None = None,
|
|
944
989
|
indexes: int | tuple[int] | None = None,
|
|
945
|
-
masked: bool
|
|
990
|
+
masked: bool = True,
|
|
946
991
|
file_system=None,
|
|
947
992
|
**kwargs,
|
|
948
993
|
) -> "Band":
|
|
@@ -950,11 +995,10 @@ class Band(_ImageBandBase):
|
|
|
950
995
|
|
|
951
996
|
The array is stored in the 'values' property.
|
|
952
997
|
"""
|
|
953
|
-
global
|
|
954
|
-
|
|
998
|
+
global _LOAD_COUNTER
|
|
999
|
+
_LOAD_COUNTER += 1
|
|
955
1000
|
|
|
956
|
-
|
|
957
|
-
masked = True if self.mask is None else False
|
|
1001
|
+
_masking = kwargs.pop("_masking", self.masking)
|
|
958
1002
|
|
|
959
1003
|
bounds_was_none = bounds is None
|
|
960
1004
|
|
|
@@ -963,12 +1007,9 @@ class Band(_ImageBandBase):
|
|
|
963
1007
|
should_return_empty: bool = bounds is not None and bounds.area == 0
|
|
964
1008
|
if should_return_empty:
|
|
965
1009
|
self._values = np.array([])
|
|
966
|
-
if self.mask is not None and not self.is_mask:
|
|
967
|
-
self._mask = self._mask.load(
|
|
968
|
-
bounds=bounds, indexes=indexes, file_system=file_system
|
|
969
|
-
)
|
|
970
1010
|
self._bounds = None
|
|
971
1011
|
self.transform = None
|
|
1012
|
+
# activate setter
|
|
972
1013
|
self.values = self._values
|
|
973
1014
|
|
|
974
1015
|
return self
|
|
@@ -978,7 +1019,6 @@ class Band(_ImageBandBase):
|
|
|
978
1019
|
|
|
979
1020
|
if bounds is not None:
|
|
980
1021
|
minx, miny, maxx, maxy = to_bbox(bounds)
|
|
981
|
-
## round down/up to integer to avoid precision trouble
|
|
982
1022
|
# bounds = (int(minx), int(miny), math.ceil(maxx), math.ceil(maxy))
|
|
983
1023
|
bounds = minx, miny, maxx, maxy
|
|
984
1024
|
|
|
@@ -992,20 +1032,19 @@ class Band(_ImageBandBase):
|
|
|
992
1032
|
out_shape = kwargs.pop("out_shape", None)
|
|
993
1033
|
|
|
994
1034
|
if self.has_array and [int(x) for x in bounds] != [int(x) for x in self.bounds]:
|
|
995
|
-
print(self)
|
|
996
|
-
print(self.mask)
|
|
997
|
-
print(self.values.shape)
|
|
998
|
-
print([int(x) for x in bounds], [int(x) for x in self.bounds])
|
|
999
1035
|
raise ValueError(
|
|
1000
1036
|
"Cannot re-load array with different bounds. "
|
|
1001
1037
|
"Use .copy() to read with different bounds. "
|
|
1002
|
-
"Or .clip(mask) to clip."
|
|
1038
|
+
"Or .clip(mask) to clip.",
|
|
1039
|
+
self,
|
|
1040
|
+
self.values.shape,
|
|
1041
|
+
[int(x) for x in bounds],
|
|
1042
|
+
[int(x) for x in self.bounds],
|
|
1003
1043
|
)
|
|
1004
|
-
|
|
1044
|
+
|
|
1005
1045
|
with opener(self.path, file_system=file_system) as f:
|
|
1006
1046
|
with rasterio.open(f, nodata=self.nodata) as src:
|
|
1007
|
-
self._res =
|
|
1008
|
-
|
|
1047
|
+
self._res = src.res if not self.res else self.res
|
|
1009
1048
|
if self.nodata is None or np.isnan(self.nodata):
|
|
1010
1049
|
self.nodata = src.nodata
|
|
1011
1050
|
else:
|
|
@@ -1018,7 +1057,7 @@ class Band(_ImageBandBase):
|
|
|
1018
1057
|
)
|
|
1019
1058
|
|
|
1020
1059
|
if bounds is None:
|
|
1021
|
-
if self._res !=
|
|
1060
|
+
if self._res != src.res:
|
|
1022
1061
|
if out_shape is None:
|
|
1023
1062
|
out_shape = _get_shape_from_bounds(
|
|
1024
1063
|
to_bbox(src.bounds), self.res, indexes
|
|
@@ -1070,18 +1109,12 @@ class Band(_ImageBandBase):
|
|
|
1070
1109
|
else:
|
|
1071
1110
|
values[values == src.nodata] = self.nodata
|
|
1072
1111
|
|
|
1073
|
-
if
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
elif self.mask is not None and not isinstance(values, np.ma.core.MaskedArray):
|
|
1077
|
-
|
|
1078
|
-
if not self.mask.has_array:
|
|
1079
|
-
self._mask = self.mask.load(
|
|
1080
|
-
bounds=bounds, indexes=indexes, out_shape=out_shape, **kwargs
|
|
1081
|
-
)
|
|
1082
|
-
mask_arr = self.mask.values
|
|
1083
|
-
|
|
1112
|
+
if _masking and not isinstance(values, np.ma.core.MaskedArray):
|
|
1113
|
+
mask_arr = _read_mask_array(self, bounds=bounds)
|
|
1084
1114
|
values = np.ma.array(values, mask=mask_arr, fill_value=self.nodata)
|
|
1115
|
+
elif _masking:
|
|
1116
|
+
mask_arr = _read_mask_array(self, bounds=bounds)
|
|
1117
|
+
values.mask |= mask_arr
|
|
1085
1118
|
|
|
1086
1119
|
if bounds is not None:
|
|
1087
1120
|
self._bounds = to_bbox(bounds)
|
|
@@ -1092,13 +1125,6 @@ class Band(_ImageBandBase):
|
|
|
1092
1125
|
|
|
1093
1126
|
return self
|
|
1094
1127
|
|
|
1095
|
-
@property
|
|
1096
|
-
def is_mask(self) -> bool:
|
|
1097
|
-
"""True if the band_id is equal to the masking band_id."""
|
|
1098
|
-
if self.masking is None:
|
|
1099
|
-
return False
|
|
1100
|
-
return self.band_id == self.masking["band_id"]
|
|
1101
|
-
|
|
1102
1128
|
@property
|
|
1103
1129
|
def has_array(self) -> bool:
|
|
1104
1130
|
"""Whether the array is loaded."""
|
|
@@ -1106,7 +1132,7 @@ class Band(_ImageBandBase):
|
|
|
1106
1132
|
if not isinstance(self.values, (np.ndarray | DataArray)):
|
|
1107
1133
|
raise ValueError()
|
|
1108
1134
|
return True
|
|
1109
|
-
except ValueError: # also catches
|
|
1135
|
+
except ValueError: # also catches _ArrayNotLoadedError
|
|
1110
1136
|
return False
|
|
1111
1137
|
|
|
1112
1138
|
def write(
|
|
@@ -1126,10 +1152,17 @@ class Band(_ImageBandBase):
|
|
|
1126
1152
|
if self.crs is None:
|
|
1127
1153
|
raise ValueError("Cannot write None crs to image.")
|
|
1128
1154
|
|
|
1155
|
+
if self.nodata:
|
|
1156
|
+
# TODO take out .data if masked?
|
|
1157
|
+
values_with_nodata = np.concatenate(
|
|
1158
|
+
[self.values.flatten(), np.array([self.nodata])]
|
|
1159
|
+
)
|
|
1160
|
+
else:
|
|
1161
|
+
values_with_nodata = self.values
|
|
1129
1162
|
profile = {
|
|
1130
1163
|
"driver": driver,
|
|
1131
1164
|
"compress": compress,
|
|
1132
|
-
"dtype": rasterio.dtypes.get_minimum_dtype(
|
|
1165
|
+
"dtype": rasterio.dtypes.get_minimum_dtype(values_with_nodata),
|
|
1133
1166
|
"crs": self.crs,
|
|
1134
1167
|
"transform": self.transform,
|
|
1135
1168
|
"nodata": self.nodata,
|
|
@@ -1138,19 +1171,18 @@ class Band(_ImageBandBase):
|
|
|
1138
1171
|
"width": self.width,
|
|
1139
1172
|
} | kwargs
|
|
1140
1173
|
|
|
1141
|
-
# with opener(path, "wb", file_system=self.file_system) as f:
|
|
1142
1174
|
with opener(path, "wb", file_system=file_system) as f:
|
|
1143
1175
|
with rasterio.open(f, "w", **profile) as dst:
|
|
1144
1176
|
|
|
1145
1177
|
if dst.nodata is None:
|
|
1146
1178
|
dst.nodata = _get_dtype_min(dst.dtypes[0])
|
|
1147
1179
|
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1180
|
+
if (
|
|
1181
|
+
isinstance(self.values, np.ma.core.MaskedArray)
|
|
1182
|
+
and dst.nodata is not None
|
|
1183
|
+
):
|
|
1184
|
+
self.values.data[np.isnan(self.values.data)] = dst.nodata
|
|
1185
|
+
self.values.data[self.values.mask] = dst.nodata
|
|
1154
1186
|
|
|
1155
1187
|
if len(self.values.shape) == 2:
|
|
1156
1188
|
dst.write(self.values, indexes=1)
|
|
@@ -1238,7 +1270,7 @@ class Band(_ImageBandBase):
|
|
|
1238
1270
|
The gradient will be 1 (1 meter up for every meter forward).
|
|
1239
1271
|
The calculation is by default done in place to save memory.
|
|
1240
1272
|
|
|
1241
|
-
>>> band.gradient()
|
|
1273
|
+
>>> band.gradient(copy=False)
|
|
1242
1274
|
>>> band.values
|
|
1243
1275
|
array([[0., 1., 1., 1., 0.],
|
|
1244
1276
|
[1., 1., 1., 1., 1.],
|
|
@@ -1299,11 +1331,13 @@ class Band(_ImageBandBase):
|
|
|
1299
1331
|
dropna=dropna,
|
|
1300
1332
|
)
|
|
1301
1333
|
|
|
1302
|
-
def to_geopandas(self, column: str = "value") -> GeoDataFrame:
|
|
1334
|
+
def to_geopandas(self, column: str = "value", dropna: bool = True) -> GeoDataFrame:
|
|
1303
1335
|
"""Create a GeoDataFrame from the image Band.
|
|
1304
1336
|
|
|
1305
1337
|
Args:
|
|
1306
1338
|
column: Name of resulting column that holds the raster values.
|
|
1339
|
+
dropna: Whether to remove values that are NA or equal to the nodata
|
|
1340
|
+
value.
|
|
1307
1341
|
|
|
1308
1342
|
Returns:
|
|
1309
1343
|
A GeoDataFrame with a geometry column and array values.
|
|
@@ -1311,24 +1345,28 @@ class Band(_ImageBandBase):
|
|
|
1311
1345
|
if not hasattr(self, "_values"):
|
|
1312
1346
|
raise ValueError("Array is not loaded.")
|
|
1313
1347
|
|
|
1348
|
+
if isinstance(self.values, np.ma.core.MaskedArray):
|
|
1349
|
+
self.values.data[self.values.mask] = self.nodata or 0
|
|
1314
1350
|
if self.values.shape[0] == 0:
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
|
|
1351
|
+
df = GeoDataFrame({"geometry": []}, crs=self.crs)
|
|
1352
|
+
else:
|
|
1353
|
+
df = GeoDataFrame(
|
|
1354
|
+
pd.DataFrame(
|
|
1355
|
+
_array_to_geojson(
|
|
1356
|
+
self.values, self.transform, processes=self.processes
|
|
1357
|
+
),
|
|
1358
|
+
columns=[column, "geometry"],
|
|
1321
1359
|
),
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1360
|
+
geometry="geometry",
|
|
1361
|
+
crs=self.crs,
|
|
1362
|
+
)
|
|
1363
|
+
|
|
1364
|
+
if dropna:
|
|
1365
|
+
return df[(df[column] != self.nodata) & (df[column].notna())]
|
|
1366
|
+
return df
|
|
1327
1367
|
|
|
1328
1368
|
def to_xarray(self) -> DataArray:
|
|
1329
1369
|
"""Convert the raster to an xarray.DataArray."""
|
|
1330
|
-
if self.backend == "xarray":
|
|
1331
|
-
return self.values
|
|
1332
1370
|
return self._to_xarray(
|
|
1333
1371
|
self.values,
|
|
1334
1372
|
transform=self.transform,
|
|
@@ -1345,19 +1383,6 @@ class Band(_ImageBandBase):
|
|
|
1345
1383
|
if not isinstance(arr, np.ndarray):
|
|
1346
1384
|
mask_arr = None
|
|
1347
1385
|
if masked:
|
|
1348
|
-
# if self.mask is not None:
|
|
1349
|
-
# print(self.mask.values.shape, arr.shape)
|
|
1350
|
-
# if self.mask is not None and self.mask.values.shape == arr.shape:
|
|
1351
|
-
# print("hei", self.mask.values.sum())
|
|
1352
|
-
# mask_arr = self.mask.values
|
|
1353
|
-
# else:
|
|
1354
|
-
# mask_arr = np.full(arr.shape, False)
|
|
1355
|
-
# try:
|
|
1356
|
-
# print("hei222", arr.isnull().values.sum())
|
|
1357
|
-
# mask_arr |= arr.isnull().values
|
|
1358
|
-
# except AttributeError:
|
|
1359
|
-
# pass
|
|
1360
|
-
# mask_arr = np.full(arr.shape, False)
|
|
1361
1386
|
try:
|
|
1362
1387
|
mask_arr = arr.isnull().values
|
|
1363
1388
|
except AttributeError:
|
|
@@ -1374,11 +1399,11 @@ class Band(_ImageBandBase):
|
|
|
1374
1399
|
|
|
1375
1400
|
if (
|
|
1376
1401
|
masked
|
|
1377
|
-
and self.mask is not None
|
|
1378
|
-
and not self.is_mask
|
|
1379
1402
|
and not isinstance(arr, np.ma.core.MaskedArray)
|
|
1403
|
+
and mask_arr is not None
|
|
1380
1404
|
):
|
|
1381
1405
|
arr = np.ma.array(arr, mask=mask_arr, fill_value=self.nodata)
|
|
1406
|
+
|
|
1382
1407
|
return arr
|
|
1383
1408
|
|
|
1384
1409
|
def __repr__(self) -> str:
|
|
@@ -1401,10 +1426,6 @@ class NDVIBand(Band):
|
|
|
1401
1426
|
|
|
1402
1427
|
cmap: str = "Greens"
|
|
1403
1428
|
|
|
1404
|
-
# @staticmethod
|
|
1405
|
-
# def get_cmap(arr: np.ndarray):
|
|
1406
|
-
# return get_cmap(arr)
|
|
1407
|
-
|
|
1408
1429
|
|
|
1409
1430
|
def median_as_int_and_minimum_dtype(arr: np.ndarray) -> np.ndarray:
|
|
1410
1431
|
arr = np.median(arr, axis=0).astype(int)
|
|
@@ -1416,12 +1437,12 @@ class Image(_ImageBandBase):
|
|
|
1416
1437
|
"""Image consisting of one or more Bands."""
|
|
1417
1438
|
|
|
1418
1439
|
band_class: ClassVar[Band] = Band
|
|
1419
|
-
backend: str = "numpy"
|
|
1420
1440
|
|
|
1421
1441
|
def __init__(
|
|
1422
1442
|
self,
|
|
1423
1443
|
data: str | Path | Sequence[Band] | None = None,
|
|
1424
|
-
res: int |
|
|
1444
|
+
res: int | None_ = None_,
|
|
1445
|
+
mask: "Band | None" = None,
|
|
1425
1446
|
processes: int = 1,
|
|
1426
1447
|
df: pd.DataFrame | None = None,
|
|
1427
1448
|
nodata: int | None = None,
|
|
@@ -1442,12 +1463,18 @@ class Image(_ImageBandBase):
|
|
|
1442
1463
|
self.processes = processes
|
|
1443
1464
|
self._crs = None
|
|
1444
1465
|
self._bands = None
|
|
1466
|
+
self._mask = mask
|
|
1467
|
+
|
|
1468
|
+
if isinstance(data, Band):
|
|
1469
|
+
data = [data]
|
|
1445
1470
|
|
|
1446
1471
|
if hasattr(data, "__iter__") and all(isinstance(x, Band) for x in data):
|
|
1447
1472
|
self._construct_image_from_bands(data, res)
|
|
1448
1473
|
return
|
|
1449
1474
|
elif not isinstance(data, (str | Path | os.PathLike)):
|
|
1450
|
-
raise TypeError(
|
|
1475
|
+
raise TypeError(
|
|
1476
|
+
f"'data' must be string, Path-like or a sequence of Band. Got {data}"
|
|
1477
|
+
)
|
|
1451
1478
|
|
|
1452
1479
|
self._res = res
|
|
1453
1480
|
self._path = _fix_path(data)
|
|
@@ -1455,7 +1482,8 @@ class Image(_ImageBandBase):
|
|
|
1455
1482
|
if all_file_paths is None and self.path:
|
|
1456
1483
|
self._all_file_paths = _get_all_file_paths(self.path)
|
|
1457
1484
|
elif self.path:
|
|
1458
|
-
|
|
1485
|
+
name = Path(self.path).name
|
|
1486
|
+
all_file_paths = {_fix_path(x) for x in all_file_paths if name in x}
|
|
1459
1487
|
self._all_file_paths = {x for x in all_file_paths if self.path in x}
|
|
1460
1488
|
else:
|
|
1461
1489
|
self._all_file_paths = None
|
|
@@ -1467,11 +1495,7 @@ class Image(_ImageBandBase):
|
|
|
1467
1495
|
|
|
1468
1496
|
df["image_path"] = df["image_path"].astype(str)
|
|
1469
1497
|
|
|
1470
|
-
cols_to_explode = [
|
|
1471
|
-
"file_path",
|
|
1472
|
-
"file_name",
|
|
1473
|
-
*[x for x in df if FILENAME_COL_SUFFIX in x],
|
|
1474
|
-
]
|
|
1498
|
+
cols_to_explode = ["file_path", "file_name"]
|
|
1475
1499
|
try:
|
|
1476
1500
|
df = df.explode(cols_to_explode, ignore_index=True)
|
|
1477
1501
|
except ValueError:
|
|
@@ -1499,20 +1523,92 @@ class Image(_ImageBandBase):
|
|
|
1499
1523
|
else:
|
|
1500
1524
|
setattr(self, key, value)
|
|
1501
1525
|
|
|
1502
|
-
|
|
1526
|
+
elif self.metadata_attributes and self.path is not None:
|
|
1503
1527
|
for key, value in self._get_metadata_attributes(
|
|
1504
1528
|
self.metadata_attributes
|
|
1505
1529
|
).items():
|
|
1506
1530
|
setattr(self, key, value)
|
|
1507
1531
|
|
|
1532
|
+
def clip(
|
|
1533
|
+
self, mask: GeoDataFrame | GeoSeries | Polygon | MultiPolygon, copy: bool = True
|
|
1534
|
+
) -> "Image":
|
|
1535
|
+
"""Clip band values to geometry mask while preserving bounds."""
|
|
1536
|
+
copied = self.copy() if copy else self
|
|
1537
|
+
|
|
1538
|
+
fill: int = self.nodata or 0
|
|
1539
|
+
|
|
1540
|
+
mask_array: np.ndarray = Band.from_geopandas(
|
|
1541
|
+
gdf=to_gdf(mask)[["geometry"]],
|
|
1542
|
+
default_value=1,
|
|
1543
|
+
fill=fill,
|
|
1544
|
+
out_shape=next(iter(self)).values.shape,
|
|
1545
|
+
bounds=self.bounds,
|
|
1546
|
+
).values
|
|
1547
|
+
|
|
1548
|
+
is_not_polygon = mask_array == fill
|
|
1549
|
+
|
|
1550
|
+
for band in copied:
|
|
1551
|
+
if isinstance(band.values, np.ma.core.MaskedArray):
|
|
1552
|
+
band._values.mask |= is_not_polygon
|
|
1553
|
+
else:
|
|
1554
|
+
band._values = np.ma.array(
|
|
1555
|
+
band.values, mask=is_not_polygon, fill_value=band.nodata
|
|
1556
|
+
)
|
|
1557
|
+
|
|
1558
|
+
return copied
|
|
1559
|
+
|
|
1560
|
+
def load(
|
|
1561
|
+
self,
|
|
1562
|
+
bounds: tuple | Geometry | GeoDataFrame | GeoSeries | None = None,
|
|
1563
|
+
indexes: int | tuple[int] | None = None,
|
|
1564
|
+
file_system=None,
|
|
1565
|
+
**kwargs,
|
|
1566
|
+
) -> "ImageCollection":
|
|
1567
|
+
"""Load all image Bands with threading."""
|
|
1568
|
+
if bounds is None and indexes is None and all(band.has_array for band in self):
|
|
1569
|
+
return self
|
|
1570
|
+
|
|
1571
|
+
if self.masking:
|
|
1572
|
+
mask_array: np.ndarray = _read_mask_array(
|
|
1573
|
+
self,
|
|
1574
|
+
bounds=bounds,
|
|
1575
|
+
indexes=indexes,
|
|
1576
|
+
file_system=file_system,
|
|
1577
|
+
**kwargs,
|
|
1578
|
+
)
|
|
1579
|
+
|
|
1580
|
+
with joblib.Parallel(n_jobs=self.processes, backend="threading") as parallel:
|
|
1581
|
+
parallel(
|
|
1582
|
+
joblib.delayed(_load_band)(
|
|
1583
|
+
band,
|
|
1584
|
+
bounds=bounds,
|
|
1585
|
+
indexes=indexes,
|
|
1586
|
+
file_system=file_system,
|
|
1587
|
+
_masking=None,
|
|
1588
|
+
**kwargs,
|
|
1589
|
+
)
|
|
1590
|
+
for band in self
|
|
1591
|
+
)
|
|
1592
|
+
|
|
1593
|
+
if self.masking:
|
|
1594
|
+
for band in self:
|
|
1595
|
+
if isinstance(band.values, np.ma.core.MaskedArray):
|
|
1596
|
+
band.values.mask |= mask_array
|
|
1597
|
+
else:
|
|
1598
|
+
band.values = np.ma.array(
|
|
1599
|
+
band.values, mask=mask_array, fill_value=self.nodata
|
|
1600
|
+
)
|
|
1601
|
+
|
|
1602
|
+
return self
|
|
1603
|
+
|
|
1508
1604
|
def _construct_image_from_bands(
|
|
1509
1605
|
self, data: Sequence[Band], res: int | None
|
|
1510
1606
|
) -> None:
|
|
1511
1607
|
self._bands = list(data)
|
|
1512
1608
|
if res is None:
|
|
1513
|
-
res =
|
|
1609
|
+
res = {band.res for band in self.bands}
|
|
1514
1610
|
if len(res) == 1:
|
|
1515
|
-
self._res = res
|
|
1611
|
+
self._res = next(iter(res))
|
|
1516
1612
|
else:
|
|
1517
1613
|
raise ValueError(f"Different resolutions for the bands: {res}")
|
|
1518
1614
|
else:
|
|
@@ -1558,8 +1654,7 @@ class Image(_ImageBandBase):
|
|
|
1558
1654
|
arr,
|
|
1559
1655
|
bounds=red.bounds,
|
|
1560
1656
|
crs=red.crs,
|
|
1561
|
-
|
|
1562
|
-
**red._common_init_kwargs,
|
|
1657
|
+
**{k: v for k, v in red._common_init_kwargs.items() if k != "res"},
|
|
1563
1658
|
)
|
|
1564
1659
|
|
|
1565
1660
|
def get_brightness(
|
|
@@ -1590,81 +1685,16 @@ class Image(_ImageBandBase):
|
|
|
1590
1685
|
brightness,
|
|
1591
1686
|
bounds=red.bounds,
|
|
1592
1687
|
crs=self.crs,
|
|
1593
|
-
|
|
1594
|
-
**self._common_init_kwargs,
|
|
1688
|
+
**{k: v for k, v in self._common_init_kwargs.items() if k != "res"},
|
|
1595
1689
|
)
|
|
1596
1690
|
|
|
1597
1691
|
def to_xarray(self) -> DataArray:
|
|
1598
1692
|
"""Convert the raster to an xarray.DataArray."""
|
|
1599
|
-
if self.backend == "xarray":
|
|
1600
|
-
return self.values
|
|
1601
|
-
|
|
1602
1693
|
return self._to_xarray(
|
|
1603
1694
|
np.array([band.values for band in self]),
|
|
1604
1695
|
transform=self[0].transform,
|
|
1605
1696
|
)
|
|
1606
1697
|
|
|
1607
|
-
@property
|
|
1608
|
-
def mask(self) -> Band | None:
|
|
1609
|
-
"""Mask Band."""
|
|
1610
|
-
if self.masking is None:
|
|
1611
|
-
return None
|
|
1612
|
-
|
|
1613
|
-
elif self._mask is not None:
|
|
1614
|
-
return self._mask
|
|
1615
|
-
|
|
1616
|
-
elif self._bands is not None and all(band.mask is not None for band in self):
|
|
1617
|
-
if len({id(band.mask) for band in self}) > 1:
|
|
1618
|
-
raise ValueError(
|
|
1619
|
-
"Image bands must have same mask.",
|
|
1620
|
-
{id(band.mask) for band in self},
|
|
1621
|
-
) # TODO
|
|
1622
|
-
self._mask = next(
|
|
1623
|
-
iter([band.mask for band in self if band.mask is not None])
|
|
1624
|
-
)
|
|
1625
|
-
return self._mask
|
|
1626
|
-
|
|
1627
|
-
mask_band_id = self.masking["band_id"]
|
|
1628
|
-
mask_paths = [path for path in self._all_file_paths if mask_band_id in path]
|
|
1629
|
-
if len(mask_paths) > 1:
|
|
1630
|
-
raise ValueError(
|
|
1631
|
-
f"Multiple file_paths match mask band_id {mask_band_id} for {self.path}"
|
|
1632
|
-
)
|
|
1633
|
-
elif not mask_paths:
|
|
1634
|
-
raise ValueError(
|
|
1635
|
-
f"No file_paths match mask band_id {mask_band_id} for {self.path} among "
|
|
1636
|
-
+ str([Path(x).name for x in _ls_func(self.path)])
|
|
1637
|
-
)
|
|
1638
|
-
|
|
1639
|
-
self._mask = self.band_class(
|
|
1640
|
-
mask_paths[0],
|
|
1641
|
-
**self._common_init_kwargs,
|
|
1642
|
-
)
|
|
1643
|
-
if self._bands is not None:
|
|
1644
|
-
for band in self:
|
|
1645
|
-
band._mask = self._mask
|
|
1646
|
-
return self._mask
|
|
1647
|
-
|
|
1648
|
-
@mask.setter
|
|
1649
|
-
def mask(self, values: Band | None) -> None:
|
|
1650
|
-
if values is None:
|
|
1651
|
-
self._mask = None
|
|
1652
|
-
for band in self:
|
|
1653
|
-
band._mask = None
|
|
1654
|
-
return
|
|
1655
|
-
if not isinstance(values, Band):
|
|
1656
|
-
raise TypeError(f"mask must be Band. Got {type(values)}")
|
|
1657
|
-
self._mask = values
|
|
1658
|
-
mask_arr = self._mask.values
|
|
1659
|
-
for band in self:
|
|
1660
|
-
band._mask = self._mask
|
|
1661
|
-
try:
|
|
1662
|
-
band.values = np.ma.array(
|
|
1663
|
-
band.values.data, mask=mask_arr, fill_value=band.nodata
|
|
1664
|
-
)
|
|
1665
|
-
except ArrayNotLoadedError:
|
|
1666
|
-
pass
|
|
1667
|
-
|
|
1668
1698
|
@property
|
|
1669
1699
|
def band_ids(self) -> list[str]:
|
|
1670
1700
|
"""The Band ids."""
|
|
@@ -1687,12 +1717,9 @@ class Image(_ImageBandBase):
|
|
|
1687
1717
|
else:
|
|
1688
1718
|
paths = self._df["file_path"]
|
|
1689
1719
|
|
|
1690
|
-
mask = self.mask
|
|
1691
|
-
|
|
1692
1720
|
self._bands = [
|
|
1693
1721
|
self.band_class(
|
|
1694
1722
|
path,
|
|
1695
|
-
mask=mask,
|
|
1696
1723
|
all_file_paths=self._all_file_paths,
|
|
1697
1724
|
**self._common_init_kwargs,
|
|
1698
1725
|
)
|
|
@@ -1901,13 +1928,12 @@ class ImageCollection(_ImageBase):
|
|
|
1901
1928
|
image_class: ClassVar[Image] = Image
|
|
1902
1929
|
band_class: ClassVar[Band] = Band
|
|
1903
1930
|
_metadata_attribute_collection_type: ClassVar[type] = pd.Series
|
|
1904
|
-
backend: str = "numpy"
|
|
1905
1931
|
|
|
1906
1932
|
def __init__(
|
|
1907
1933
|
self,
|
|
1908
1934
|
data: str | Path | Sequence[Image] | Sequence[str | Path],
|
|
1909
|
-
res: int,
|
|
1910
|
-
level: str | None = None_,
|
|
1935
|
+
res: int | None_ = None_,
|
|
1936
|
+
level: str | None_ | None = None_,
|
|
1911
1937
|
processes: int = 1,
|
|
1912
1938
|
metadata: str | dict | pd.DataFrame | None = None,
|
|
1913
1939
|
nodata: int | None = None,
|
|
@@ -1923,7 +1949,7 @@ class ImageCollection(_ImageBase):
|
|
|
1923
1949
|
|
|
1924
1950
|
super().__init__(metadata=metadata, **kwargs)
|
|
1925
1951
|
|
|
1926
|
-
if callable(level) and
|
|
1952
|
+
if callable(level) and level() is None:
|
|
1927
1953
|
level = None
|
|
1928
1954
|
|
|
1929
1955
|
self.nodata = nodata
|
|
@@ -1944,13 +1970,19 @@ class ImageCollection(_ImageBase):
|
|
|
1944
1970
|
elif all(isinstance(x, (str | Path | os.PathLike)) for x in data):
|
|
1945
1971
|
# adding band paths (asuming 'data' is a sequence of image paths)
|
|
1946
1972
|
try:
|
|
1947
|
-
self._all_file_paths = _get_child_paths_threaded(data) |
|
|
1973
|
+
self._all_file_paths = _get_child_paths_threaded(data) | {
|
|
1974
|
+
_fix_path(x) for x in data
|
|
1975
|
+
}
|
|
1948
1976
|
except FileNotFoundError as e:
|
|
1949
1977
|
if _from_root:
|
|
1950
1978
|
raise TypeError(
|
|
1951
|
-
"When passing 'root', 'data' must be a sequence of image names that have 'root' as parent path."
|
|
1979
|
+
"When passing 'root', 'data' must be a sequence of image file names that have 'root' as parent path."
|
|
1952
1980
|
) from e
|
|
1953
1981
|
raise e
|
|
1982
|
+
if self.level:
|
|
1983
|
+
self._all_file_paths = [
|
|
1984
|
+
path for path in self._all_file_paths if self.level in path
|
|
1985
|
+
]
|
|
1954
1986
|
self._df = self._create_metadata_df(self._all_file_paths)
|
|
1955
1987
|
return
|
|
1956
1988
|
|
|
@@ -1968,7 +2000,9 @@ class ImageCollection(_ImageBase):
|
|
|
1968
2000
|
|
|
1969
2001
|
self._df = self._create_metadata_df(self._all_file_paths)
|
|
1970
2002
|
|
|
1971
|
-
def groupby(
|
|
2003
|
+
def groupby(
|
|
2004
|
+
self, by: str | list[str], copy: bool = True, **kwargs
|
|
2005
|
+
) -> ImageCollectionGroupBy:
|
|
1972
2006
|
"""Group the Collection by Image or Band attribute(s)."""
|
|
1973
2007
|
df = pd.DataFrame(
|
|
1974
2008
|
[(i, img) for i, img in enumerate(self) for _ in img],
|
|
@@ -1995,8 +2029,10 @@ class ImageCollection(_ImageBase):
|
|
|
1995
2029
|
return ImageCollectionGroupBy(
|
|
1996
2030
|
sorted(
|
|
1997
2031
|
parallel(
|
|
1998
|
-
joblib.delayed(_copy_and_add_df_parallel)(
|
|
1999
|
-
|
|
2032
|
+
joblib.delayed(_copy_and_add_df_parallel)(
|
|
2033
|
+
group_values, group_df, self, copy
|
|
2034
|
+
)
|
|
2035
|
+
for group_values, group_df in df.groupby(by, **kwargs)
|
|
2000
2036
|
)
|
|
2001
2037
|
),
|
|
2002
2038
|
by=by,
|
|
@@ -2037,6 +2073,51 @@ class ImageCollection(_ImageBase):
|
|
|
2037
2073
|
|
|
2038
2074
|
return self
|
|
2039
2075
|
|
|
2076
|
+
def pixelwise(
|
|
2077
|
+
self,
|
|
2078
|
+
func: Callable,
|
|
2079
|
+
kwargs: dict | None = None,
|
|
2080
|
+
index_aligned_kwargs: dict | None = None,
|
|
2081
|
+
masked: bool = True,
|
|
2082
|
+
) -> np.ndarray | tuple[np.ndarray] | None:
|
|
2083
|
+
"""Run a function for each pixel.
|
|
2084
|
+
|
|
2085
|
+
The function should take a 1d array as first argument. This will be
|
|
2086
|
+
the pixel values for all bands in all images in the collection.
|
|
2087
|
+
"""
|
|
2088
|
+
values = np.array([band.values for img in self for band in img])
|
|
2089
|
+
|
|
2090
|
+
if (
|
|
2091
|
+
masked
|
|
2092
|
+
and self.nodata is not None
|
|
2093
|
+
and hasattr(next(iter(next(iter(self)))).values, "mask")
|
|
2094
|
+
):
|
|
2095
|
+
mask_array = np.array(
|
|
2096
|
+
[
|
|
2097
|
+
(band.values.mask) | (band.values.data == self.nodata)
|
|
2098
|
+
for img in self
|
|
2099
|
+
for band in img
|
|
2100
|
+
]
|
|
2101
|
+
)
|
|
2102
|
+
elif masked and self.nodata is not None:
|
|
2103
|
+
mask_array = np.array(
|
|
2104
|
+
[band.values == self.nodata for img in self for band in img]
|
|
2105
|
+
)
|
|
2106
|
+
elif masked:
|
|
2107
|
+
mask_array = np.array([band.values.mask for img in self for band in img])
|
|
2108
|
+
else:
|
|
2109
|
+
mask_array = None
|
|
2110
|
+
|
|
2111
|
+
return pixelwise(
|
|
2112
|
+
func=func,
|
|
2113
|
+
values=values,
|
|
2114
|
+
mask_array=mask_array,
|
|
2115
|
+
index_aligned_kwargs=index_aligned_kwargs,
|
|
2116
|
+
kwargs=kwargs,
|
|
2117
|
+
processes=self.processes,
|
|
2118
|
+
nodata=self.nodata or np.nan,
|
|
2119
|
+
)
|
|
2120
|
+
|
|
2040
2121
|
def get_unique_band_ids(self) -> list[str]:
|
|
2041
2122
|
"""Get a list of unique band_ids across all images."""
|
|
2042
2123
|
return list({band.band_id for img in self for band in img})
|
|
@@ -2142,8 +2223,7 @@ class ImageCollection(_ImageBase):
|
|
|
2142
2223
|
arr,
|
|
2143
2224
|
bounds=bounds,
|
|
2144
2225
|
crs=crs,
|
|
2145
|
-
|
|
2146
|
-
**self._common_init_kwargs,
|
|
2226
|
+
**{k: v for k, v in self._common_init_kwargs.items() if k != "res"},
|
|
2147
2227
|
)
|
|
2148
2228
|
|
|
2149
2229
|
band._merged = True
|
|
@@ -2216,7 +2296,7 @@ class ImageCollection(_ImageBase):
|
|
|
2216
2296
|
bounds=out_bounds,
|
|
2217
2297
|
crs=crs,
|
|
2218
2298
|
band_id=band_id,
|
|
2219
|
-
**self._common_init_kwargs,
|
|
2299
|
+
**{k: v for k, v in self._common_init_kwargs.items() if k != "res"},
|
|
2220
2300
|
)
|
|
2221
2301
|
)
|
|
2222
2302
|
|
|
@@ -2329,22 +2409,11 @@ class ImageCollection(_ImageBase):
|
|
|
2329
2409
|
):
|
|
2330
2410
|
return self
|
|
2331
2411
|
|
|
2332
|
-
# if self.processes == 1:
|
|
2333
|
-
# for img in self:
|
|
2334
|
-
# for band in img:
|
|
2335
|
-
# band.load(
|
|
2336
|
-
# bounds=bounds,
|
|
2337
|
-
# indexes=indexes,
|
|
2338
|
-
# file_system=file_system,
|
|
2339
|
-
# **kwargs,
|
|
2340
|
-
# )
|
|
2341
|
-
# return self
|
|
2342
|
-
|
|
2343
2412
|
with joblib.Parallel(n_jobs=self.processes, backend="threading") as parallel:
|
|
2344
2413
|
if self.masking:
|
|
2345
|
-
parallel(
|
|
2346
|
-
joblib.delayed(
|
|
2347
|
-
img
|
|
2414
|
+
masks: list[np.ndarray] = parallel(
|
|
2415
|
+
joblib.delayed(_read_mask_array)(
|
|
2416
|
+
img,
|
|
2348
2417
|
bounds=bounds,
|
|
2349
2418
|
indexes=indexes,
|
|
2350
2419
|
file_system=file_system,
|
|
@@ -2352,14 +2421,6 @@ class ImageCollection(_ImageBase):
|
|
|
2352
2421
|
)
|
|
2353
2422
|
for img in self
|
|
2354
2423
|
)
|
|
2355
|
-
for img in self:
|
|
2356
|
-
for band in img:
|
|
2357
|
-
band._mask = img.mask
|
|
2358
|
-
|
|
2359
|
-
# print({img.mask.has_array for img in self })
|
|
2360
|
-
# print({band.mask.has_array for img in self for band in img})
|
|
2361
|
-
|
|
2362
|
-
# with joblib.Parallel(n_jobs=self.processes, backend="threading") as parallel:
|
|
2363
2424
|
|
|
2364
2425
|
parallel(
|
|
2365
2426
|
joblib.delayed(_load_band)(
|
|
@@ -2367,34 +2428,86 @@ class ImageCollection(_ImageBase):
|
|
|
2367
2428
|
bounds=bounds,
|
|
2368
2429
|
indexes=indexes,
|
|
2369
2430
|
file_system=file_system,
|
|
2431
|
+
_masking=None,
|
|
2370
2432
|
**kwargs,
|
|
2371
2433
|
)
|
|
2372
2434
|
for img in self
|
|
2373
2435
|
for band in img
|
|
2374
2436
|
)
|
|
2375
2437
|
|
|
2438
|
+
if self.masking:
|
|
2439
|
+
for img, mask_array in zip(self, masks, strict=True):
|
|
2440
|
+
for band in img:
|
|
2441
|
+
if isinstance(band.values, np.ma.core.MaskedArray):
|
|
2442
|
+
band.values.mask |= mask_array
|
|
2443
|
+
else:
|
|
2444
|
+
band.values = np.ma.array(
|
|
2445
|
+
band.values, mask=mask_array, fill_value=self.nodata
|
|
2446
|
+
)
|
|
2447
|
+
|
|
2376
2448
|
return self
|
|
2377
2449
|
|
|
2378
2450
|
def clip(
|
|
2379
2451
|
self,
|
|
2380
2452
|
mask: Geometry | GeoDataFrame | GeoSeries,
|
|
2381
|
-
|
|
2453
|
+
dropna: bool = True,
|
|
2454
|
+
copy: bool = True,
|
|
2382
2455
|
) -> "ImageCollection":
|
|
2383
|
-
"""Clip all image Bands
|
|
2384
|
-
|
|
2385
|
-
for img in self:
|
|
2386
|
-
for band in img:
|
|
2387
|
-
band.clip(mask, **kwargs)
|
|
2388
|
-
return self
|
|
2456
|
+
"""Clip all image Bands while preserving bounds."""
|
|
2457
|
+
copied = self.copy() if copy else self
|
|
2389
2458
|
|
|
2390
|
-
|
|
2391
|
-
|
|
2392
|
-
|
|
2393
|
-
|
|
2459
|
+
copied._images = [img for img in copied if img.union_all()]
|
|
2460
|
+
|
|
2461
|
+
fill: int = self.nodata or 0
|
|
2462
|
+
|
|
2463
|
+
common_band_from_geopandas_kwargs = dict(
|
|
2464
|
+
gdf=to_gdf(mask)[["geometry"]],
|
|
2465
|
+
default_value=1,
|
|
2466
|
+
fill=fill,
|
|
2467
|
+
)
|
|
2468
|
+
|
|
2469
|
+
for img in copied:
|
|
2470
|
+
img._rounded_bounds = tuple(int(x) for x in img.bounds)
|
|
2471
|
+
|
|
2472
|
+
for bounds in {img._rounded_bounds for img in copied}:
|
|
2473
|
+
shapes = {
|
|
2474
|
+
band.values.shape
|
|
2475
|
+
for img in copied
|
|
2394
2476
|
for band in img
|
|
2395
|
-
|
|
2477
|
+
if img._rounded_bounds == bounds
|
|
2478
|
+
}
|
|
2479
|
+
if len(shapes) != 1:
|
|
2480
|
+
raise ValueError(f"Different shapes: {shapes}. For bounds {bounds}")
|
|
2396
2481
|
|
|
2397
|
-
|
|
2482
|
+
mask_array: np.ndarray = Band.from_geopandas(
|
|
2483
|
+
**common_band_from_geopandas_kwargs,
|
|
2484
|
+
out_shape=next(iter(shapes)),
|
|
2485
|
+
bounds=bounds,
|
|
2486
|
+
).values
|
|
2487
|
+
|
|
2488
|
+
is_not_polygon = mask_array == fill
|
|
2489
|
+
|
|
2490
|
+
for img in copied:
|
|
2491
|
+
if img._rounded_bounds != bounds:
|
|
2492
|
+
continue
|
|
2493
|
+
|
|
2494
|
+
for band in img:
|
|
2495
|
+
if isinstance(band.values, np.ma.core.MaskedArray):
|
|
2496
|
+
band._values.mask |= is_not_polygon
|
|
2497
|
+
else:
|
|
2498
|
+
band._values = np.ma.array(
|
|
2499
|
+
band.values, mask=is_not_polygon, fill_value=band.nodata
|
|
2500
|
+
)
|
|
2501
|
+
|
|
2502
|
+
for img in copied:
|
|
2503
|
+
del img._rounded_bounds
|
|
2504
|
+
|
|
2505
|
+
if dropna:
|
|
2506
|
+
copied.images = [
|
|
2507
|
+
img for img in copied if any(np.sum(band.values) for band in img)
|
|
2508
|
+
]
|
|
2509
|
+
|
|
2510
|
+
return copied
|
|
2398
2511
|
|
|
2399
2512
|
def _set_bbox(
|
|
2400
2513
|
self, bbox: GeoDataFrame | GeoSeries | Geometry | tuple[float]
|
|
@@ -2405,17 +2518,12 @@ class ImageCollection(_ImageBase):
|
|
|
2405
2518
|
if self._images is not None:
|
|
2406
2519
|
for img in self._images:
|
|
2407
2520
|
img._bbox = self._bbox
|
|
2408
|
-
if img.mask is not None:
|
|
2409
|
-
img.mask._bbox = self._bbox
|
|
2410
2521
|
if img.bands is None:
|
|
2411
2522
|
continue
|
|
2412
2523
|
for band in img:
|
|
2413
2524
|
band._bbox = self._bbox
|
|
2414
2525
|
bounds = box(*band._bbox).intersection(box(*band.bounds))
|
|
2415
2526
|
band._bounds = to_bbox(bounds) if not bounds.is_empty else None
|
|
2416
|
-
if band.mask is not None:
|
|
2417
|
-
band.mask._bbox = self._bbox
|
|
2418
|
-
band.mask._bounds = band._bounds
|
|
2419
2527
|
|
|
2420
2528
|
return self
|
|
2421
2529
|
|
|
@@ -2521,7 +2629,7 @@ class ImageCollection(_ImageBase):
|
|
|
2521
2629
|
**kwargs,
|
|
2522
2630
|
)
|
|
2523
2631
|
|
|
2524
|
-
return
|
|
2632
|
+
return combine_by_coords(list(xarrs.values()))
|
|
2525
2633
|
# return Dataset(xarrs)
|
|
2526
2634
|
|
|
2527
2635
|
def to_geopandas(self, column: str = "value") -> dict[str, GeoDataFrame]:
|
|
@@ -2534,6 +2642,9 @@ class ImageCollection(_ImageBase):
|
|
|
2534
2642
|
try:
|
|
2535
2643
|
name = band.name
|
|
2536
2644
|
except AttributeError:
|
|
2645
|
+
name = None
|
|
2646
|
+
|
|
2647
|
+
if name is None:
|
|
2537
2648
|
name = f"{self.__class__.__name__}({i})"
|
|
2538
2649
|
|
|
2539
2650
|
if name not in out:
|
|
@@ -2594,10 +2705,6 @@ class ImageCollection(_ImageBase):
|
|
|
2594
2705
|
|
|
2595
2706
|
return copied
|
|
2596
2707
|
|
|
2597
|
-
def __or__(self, collection: "ImageCollection") -> "ImageCollection":
|
|
2598
|
-
"""Concatenate the collection with another collection."""
|
|
2599
|
-
return concat_image_collections([self, collection])
|
|
2600
|
-
|
|
2601
2708
|
def __iter__(self) -> Iterator[Image]:
|
|
2602
2709
|
"""Iterate over the images."""
|
|
2603
2710
|
return iter(self.images)
|
|
@@ -2607,14 +2714,16 @@ class ImageCollection(_ImageBase):
|
|
|
2607
2714
|
return len(self.images)
|
|
2608
2715
|
|
|
2609
2716
|
def __getattr__(self, attr: str) -> Any:
|
|
2610
|
-
"""Make iterable of
|
|
2717
|
+
"""Make iterable of metadata attribute."""
|
|
2611
2718
|
if attr in (self.metadata_attributes or {}):
|
|
2612
2719
|
return self._metadata_attribute_collection_type(
|
|
2613
2720
|
[getattr(img, attr) for img in self]
|
|
2614
2721
|
)
|
|
2615
2722
|
return super().__getattribute__(attr)
|
|
2616
2723
|
|
|
2617
|
-
def __getitem__(
|
|
2724
|
+
def __getitem__(
|
|
2725
|
+
self, item: int | slice | Sequence[int | bool]
|
|
2726
|
+
) -> "Image | ImageCollection":
|
|
2618
2727
|
"""Select one Image by integer index, or multiple Images by slice, list of int."""
|
|
2619
2728
|
if isinstance(item, int):
|
|
2620
2729
|
return self.images[item]
|
|
@@ -2653,14 +2762,14 @@ class ImageCollection(_ImageBase):
|
|
|
2653
2762
|
return copied
|
|
2654
2763
|
|
|
2655
2764
|
@property
|
|
2656
|
-
def
|
|
2765
|
+
def date(self) -> Any:
|
|
2657
2766
|
"""List of image dates."""
|
|
2658
|
-
return [img.date for img in self]
|
|
2767
|
+
return self._metadata_attribute_collection_type([img.date for img in self])
|
|
2659
2768
|
|
|
2660
2769
|
@property
|
|
2661
|
-
def image_paths(self) ->
|
|
2770
|
+
def image_paths(self) -> Any:
|
|
2662
2771
|
"""List of image paths."""
|
|
2663
|
-
return [img.path for img in self]
|
|
2772
|
+
return self._metadata_attribute_collection_type([img.path for img in self])
|
|
2664
2773
|
|
|
2665
2774
|
@property
|
|
2666
2775
|
def images(self) -> list["Image"]:
|
|
@@ -2678,21 +2787,6 @@ class ImageCollection(_ImageBase):
|
|
|
2678
2787
|
**self._common_init_kwargs,
|
|
2679
2788
|
)
|
|
2680
2789
|
|
|
2681
|
-
if self.masking is not None:
|
|
2682
|
-
images = []
|
|
2683
|
-
for image in self._images:
|
|
2684
|
-
# TODO why this loop?
|
|
2685
|
-
try:
|
|
2686
|
-
if not isinstance(image.mask, Band):
|
|
2687
|
-
raise ValueError()
|
|
2688
|
-
images.append(image)
|
|
2689
|
-
except ValueError as e:
|
|
2690
|
-
raise e
|
|
2691
|
-
continue
|
|
2692
|
-
self._images = images
|
|
2693
|
-
for image in self._images:
|
|
2694
|
-
image._bands = [band for band in image if band.band_id is not None]
|
|
2695
|
-
|
|
2696
2790
|
self._images = [img for img in self if len(img)]
|
|
2697
2791
|
|
|
2698
2792
|
if self._should_be_sorted:
|
|
@@ -2722,24 +2816,22 @@ class ImageCollection(_ImageBase):
|
|
|
2722
2816
|
|
|
2723
2817
|
@images.setter
|
|
2724
2818
|
def images(self, new_value: list["Image"]) -> list["Image"]:
|
|
2725
|
-
|
|
2726
|
-
if not
|
|
2819
|
+
new_value = list(new_value)
|
|
2820
|
+
if not new_value:
|
|
2821
|
+
self._images = new_value
|
|
2822
|
+
return
|
|
2823
|
+
if all(isinstance(x, Band) for x in new_value):
|
|
2824
|
+
if len(new_value) != len(self):
|
|
2825
|
+
raise ValueError("'images' must have same length as number of images.")
|
|
2826
|
+
new_images = []
|
|
2827
|
+
for i, img in enumerate(self):
|
|
2828
|
+
img._bands = [new_value[i]]
|
|
2829
|
+
new_images.append(img)
|
|
2830
|
+
self._images = new_images
|
|
2831
|
+
return
|
|
2832
|
+
if not all(isinstance(x, Image) for x in new_value):
|
|
2727
2833
|
raise TypeError("images should be a sequence of Image.")
|
|
2728
|
-
|
|
2729
|
-
def __repr__(self) -> str:
|
|
2730
|
-
"""String representation."""
|
|
2731
|
-
root = ""
|
|
2732
|
-
if self.path is not None:
|
|
2733
|
-
data = f"'{self.path}'"
|
|
2734
|
-
elif all(img.path is not None for img in self):
|
|
2735
|
-
data = [img.path for img in self]
|
|
2736
|
-
parents = {str(Path(path).parent) for path in data}
|
|
2737
|
-
if len(parents) == 1:
|
|
2738
|
-
data = [Path(path).name for path in data]
|
|
2739
|
-
root = f" root='{next(iter(parents))}',"
|
|
2740
|
-
else:
|
|
2741
|
-
data = [img for img in self]
|
|
2742
|
-
return f"{self.__class__.__name__}({data},{root} res={self.res}, level='{self.level}')"
|
|
2834
|
+
self._images = new_value
|
|
2743
2835
|
|
|
2744
2836
|
def union_all(self) -> Polygon | MultiPolygon:
|
|
2745
2837
|
"""(Multi)Polygon representing the union of all image bounds."""
|
|
@@ -2796,7 +2888,6 @@ class ImageCollection(_ImageBase):
|
|
|
2796
2888
|
if "date" in x_var and subcollection._should_be_sorted:
|
|
2797
2889
|
subcollection._images = list(sorted(subcollection._images))
|
|
2798
2890
|
|
|
2799
|
-
y = np.array([band.values for img in subcollection for band in img])
|
|
2800
2891
|
if "date" in x_var and subcollection._should_be_sorted:
|
|
2801
2892
|
x = np.array(
|
|
2802
2893
|
[
|
|
@@ -2813,120 +2904,35 @@ class ImageCollection(_ImageBase):
|
|
|
2813
2904
|
- pd.Timestamp(np.min(x))
|
|
2814
2905
|
).days
|
|
2815
2906
|
else:
|
|
2816
|
-
x = np.arange(0,
|
|
2817
|
-
|
|
2818
|
-
|
|
2819
|
-
|
|
2820
|
-
|
|
2821
|
-
|
|
2822
|
-
|
|
2823
|
-
|
|
2824
|
-
|
|
2825
|
-
|
|
2826
|
-
|
|
2827
|
-
|
|
2907
|
+
x = np.arange(0, sum(1 for img in subcollection for band in img))
|
|
2908
|
+
|
|
2909
|
+
subcollection.pixelwise(
|
|
2910
|
+
_plot_pixels_1d,
|
|
2911
|
+
kwargs=dict(
|
|
2912
|
+
alpha=alpha,
|
|
2913
|
+
x_var=x_var,
|
|
2914
|
+
y_label=y_label,
|
|
2915
|
+
rounding=rounding,
|
|
2916
|
+
first_date=first_date,
|
|
2917
|
+
figsize=figsize,
|
|
2918
|
+
),
|
|
2919
|
+
index_aligned_kwargs=dict(x=x),
|
|
2828
2920
|
)
|
|
2829
2921
|
|
|
2830
|
-
|
|
2831
|
-
|
|
2832
|
-
|
|
2833
|
-
|
|
2834
|
-
|
|
2835
|
-
|
|
2836
|
-
|
|
2837
|
-
|
|
2838
|
-
|
|
2839
|
-
|
|
2840
|
-
|
|
2841
|
-
|
|
2842
|
-
|
|
2843
|
-
|
|
2844
|
-
this_x = this_x[condition]
|
|
2845
|
-
|
|
2846
|
-
coef, intercept = np.linalg.lstsq(
|
|
2847
|
-
np.vstack([this_x, np.ones(this_x.shape[0])]).T,
|
|
2848
|
-
this_y,
|
|
2849
|
-
rcond=None,
|
|
2850
|
-
)[0]
|
|
2851
|
-
predicted = np.array([intercept + coef * x for x in this_x])
|
|
2852
|
-
|
|
2853
|
-
predicted_start = predicted[0]
|
|
2854
|
-
predicted_end = predicted[-1]
|
|
2855
|
-
predicted_change = predicted_end - predicted_start
|
|
2856
|
-
|
|
2857
|
-
# Degrees of freedom
|
|
2858
|
-
dof = len(this_x) - 2
|
|
2859
|
-
|
|
2860
|
-
# 95% confidence interval
|
|
2861
|
-
t_val = stats.t.ppf(1 - alpha / 2, dof)
|
|
2862
|
-
|
|
2863
|
-
# Mean squared error of the residuals
|
|
2864
|
-
mse = np.sum((this_y - predicted) ** 2) / dof
|
|
2865
|
-
|
|
2866
|
-
# Calculate the standard error of predictions
|
|
2867
|
-
pred_stderr = np.sqrt(
|
|
2868
|
-
mse
|
|
2869
|
-
* (
|
|
2870
|
-
1 / len(this_x)
|
|
2871
|
-
+ (this_x - np.mean(this_x)) ** 2
|
|
2872
|
-
/ np.sum((this_x - np.mean(this_x)) ** 2)
|
|
2873
|
-
)
|
|
2874
|
-
)
|
|
2875
|
-
|
|
2876
|
-
# Calculate the confidence interval for predictions
|
|
2877
|
-
ci_lower = predicted - t_val * pred_stderr
|
|
2878
|
-
ci_upper = predicted + t_val * pred_stderr
|
|
2879
|
-
|
|
2880
|
-
fig = plt.figure(figsize=figsize)
|
|
2881
|
-
ax = fig.add_subplot(1, 1, 1)
|
|
2882
|
-
|
|
2883
|
-
ax.scatter(this_x, this_y, color="#2c93db")
|
|
2884
|
-
ax.plot(this_x, predicted, color="#e0436b")
|
|
2885
|
-
ax.fill_between(
|
|
2886
|
-
this_x,
|
|
2887
|
-
ci_lower,
|
|
2888
|
-
ci_upper,
|
|
2889
|
-
color="#e0436b",
|
|
2890
|
-
alpha=0.2,
|
|
2891
|
-
label=f"{int(alpha*100)}% CI",
|
|
2892
|
-
)
|
|
2893
|
-
plt.title(
|
|
2894
|
-
f"coef: {round(coef, int(np.log(1 / abs(coef))))}, "
|
|
2895
|
-
f"pred change: {round(predicted_change, rounding)}, "
|
|
2896
|
-
f"pred start: {round(predicted_start, rounding)}, "
|
|
2897
|
-
f"pred end: {round(predicted_end, rounding)}"
|
|
2898
|
-
)
|
|
2899
|
-
plt.xlabel(x_var)
|
|
2900
|
-
plt.ylabel(y_label)
|
|
2901
|
-
|
|
2902
|
-
if x_var == "date":
|
|
2903
|
-
date_labels = pd.to_datetime(
|
|
2904
|
-
[first_date + pd.Timedelta(days=int(day)) for day in this_x]
|
|
2905
|
-
)
|
|
2906
|
-
|
|
2907
|
-
_, unique_indices = np.unique(
|
|
2908
|
-
date_labels.strftime("%Y-%m"), return_index=True
|
|
2909
|
-
)
|
|
2910
|
-
|
|
2911
|
-
unique_x = np.array(this_x)[unique_indices]
|
|
2912
|
-
unique_labels = date_labels[unique_indices].strftime("%Y-%m")
|
|
2913
|
-
|
|
2914
|
-
ax.set_xticks(unique_x)
|
|
2915
|
-
ax.set_xticklabels(unique_labels, rotation=45, ha="right")
|
|
2916
|
-
# ax.tick_params(axis="x", length=10, width=2)
|
|
2917
|
-
|
|
2918
|
-
plt.show()
|
|
2919
|
-
|
|
2920
|
-
|
|
2921
|
-
def _get_all_regex_matches(xml_file: str, regexes: tuple[str]) -> tuple[str]:
|
|
2922
|
-
for regex in regexes:
|
|
2923
|
-
try:
|
|
2924
|
-
return re.search(regex, xml_file)
|
|
2925
|
-
except (TypeError, AttributeError):
|
|
2926
|
-
continue
|
|
2927
|
-
raise ValueError(
|
|
2928
|
-
f"Could not find processing_baseline info from {regexes} in {xml_file}"
|
|
2929
|
-
)
|
|
2922
|
+
def __repr__(self) -> str:
|
|
2923
|
+
"""String representation."""
|
|
2924
|
+
root = ""
|
|
2925
|
+
if self.path is not None:
|
|
2926
|
+
data = f"'{self.path}'"
|
|
2927
|
+
elif all(img.path is not None for img in self):
|
|
2928
|
+
data = [img.path for img in self]
|
|
2929
|
+
parents = {str(Path(path).parent) for path in data}
|
|
2930
|
+
if len(parents) == 1:
|
|
2931
|
+
data = [Path(path).name for path in data]
|
|
2932
|
+
root = f" root='{next(iter(parents))}',"
|
|
2933
|
+
else:
|
|
2934
|
+
data = [img for img in self]
|
|
2935
|
+
return f"{self.__class__.__name__}({data},{root} res={self.res}, level='{self.level}')"
|
|
2930
2936
|
|
|
2931
2937
|
|
|
2932
2938
|
class Sentinel2Config:
|
|
@@ -3040,9 +3046,6 @@ class Sentinel2Band(Sentinel2Config, Band):
|
|
|
3040
3046
|
}
|
|
3041
3047
|
|
|
3042
3048
|
def _get_boa_add_offset_dict(self, xml_file: str) -> int | None:
|
|
3043
|
-
if self.is_mask:
|
|
3044
|
-
return None
|
|
3045
|
-
|
|
3046
3049
|
pat = re.compile(
|
|
3047
3050
|
r"""
|
|
3048
3051
|
<BOA_ADD_OFFSET\s*
|
|
@@ -3058,7 +3061,7 @@ class Sentinel2Band(Sentinel2Config, Band):
|
|
|
3058
3061
|
except (TypeError, AttributeError, KeyError) as e:
|
|
3059
3062
|
raise _RegexError(f"Could not find boa_add_offset info from {pat}") from e
|
|
3060
3063
|
if not matches:
|
|
3061
|
-
|
|
3064
|
+
return None
|
|
3062
3065
|
|
|
3063
3066
|
dict_ = (
|
|
3064
3067
|
pd.DataFrame(matches).set_index("band_id")["value"].astype(int).to_dict()
|
|
@@ -3121,7 +3124,7 @@ class Sentinel2Collection(Sentinel2Config, ImageCollection):
|
|
|
3121
3124
|
def __init__(self, data: str | Path | Sequence[Image], **kwargs) -> None:
|
|
3122
3125
|
"""ImageCollection with Sentinel2 specific name variables and path regexes."""
|
|
3123
3126
|
level = kwargs.get("level", None_)
|
|
3124
|
-
if callable(level) and
|
|
3127
|
+
if callable(level) and level() is None:
|
|
3125
3128
|
raise ValueError("Must specify level for Sentinel2Collection.")
|
|
3126
3129
|
super().__init__(data=data, **kwargs)
|
|
3127
3130
|
|
|
@@ -3146,10 +3149,7 @@ class Sentinel2CloudlessCollection(Sentinel2CloudlessConfig, ImageCollection):
|
|
|
3146
3149
|
|
|
3147
3150
|
|
|
3148
3151
|
def concat_image_collections(collections: Sequence[ImageCollection]) -> ImageCollection:
|
|
3149
|
-
"""
|
|
3150
|
-
|
|
3151
|
-
Same as using the union operator |.
|
|
3152
|
-
"""
|
|
3152
|
+
"""Concatenate ImageCollections."""
|
|
3153
3153
|
resolutions = {x.res for x in collections}
|
|
3154
3154
|
if len(resolutions) > 1:
|
|
3155
3155
|
raise ValueError(f"resoultion mismatch. {resolutions}")
|
|
@@ -3185,8 +3185,10 @@ def _get_gradient(band: Band, degrees: bool = False, copy: bool = True) -> Band:
|
|
|
3185
3185
|
raise ValueError("array must be 2 or 3 dimensional")
|
|
3186
3186
|
|
|
3187
3187
|
|
|
3188
|
-
def _slope_2d(array: np.ndarray, res: int, degrees: int) -> np.ndarray:
|
|
3189
|
-
|
|
3188
|
+
def _slope_2d(array: np.ndarray, res: int | tuple[int], degrees: int) -> np.ndarray:
|
|
3189
|
+
resx, resy = _res_as_tuple(res)
|
|
3190
|
+
|
|
3191
|
+
gradient_x, gradient_y = np.gradient(array, resx, resy)
|
|
3190
3192
|
|
|
3191
3193
|
gradient = abs(gradient_x) + abs(gradient_y)
|
|
3192
3194
|
|
|
@@ -3273,7 +3275,7 @@ def _get_images(
|
|
|
3273
3275
|
return images
|
|
3274
3276
|
|
|
3275
3277
|
|
|
3276
|
-
class
|
|
3278
|
+
class _ArrayNotLoadedError(ValueError):
|
|
3277
3279
|
"""Arrays are not loaded."""
|
|
3278
3280
|
|
|
3279
3281
|
|
|
@@ -3351,18 +3353,22 @@ def _intesects(x, other) -> bool:
|
|
|
3351
3353
|
|
|
3352
3354
|
|
|
3353
3355
|
def _copy_and_add_df_parallel(
|
|
3354
|
-
|
|
3356
|
+
group_values: tuple[Any, ...],
|
|
3357
|
+
group_df: pd.DataFrame,
|
|
3358
|
+
self: ImageCollection,
|
|
3359
|
+
copy: bool,
|
|
3355
3360
|
) -> tuple[tuple[Any], ImageCollection]:
|
|
3356
|
-
copied = self.copy()
|
|
3361
|
+
copied = self.copy() if copy else self
|
|
3357
3362
|
copied.images = [
|
|
3358
|
-
img.copy()
|
|
3363
|
+
img.copy() if copy else img
|
|
3364
|
+
for img in group_df.drop_duplicates("_image_idx")["_image_instance"]
|
|
3359
3365
|
]
|
|
3360
|
-
if "band_id" in
|
|
3361
|
-
band_ids = set(
|
|
3366
|
+
if "band_id" in group_df:
|
|
3367
|
+
band_ids = set(group_df["band_id"].values)
|
|
3362
3368
|
for img in copied.images:
|
|
3363
3369
|
img._bands = [band for band in img if band.band_id in band_ids]
|
|
3364
3370
|
|
|
3365
|
-
return (
|
|
3371
|
+
return (group_values, copied)
|
|
3366
3372
|
|
|
3367
3373
|
|
|
3368
3374
|
def _get_bounds(bounds, bbox, band_bounds: Polygon) -> None | Polygon:
|
|
@@ -3388,15 +3394,37 @@ def _open_raster(path: str | Path) -> rasterio.io.DatasetReader:
|
|
|
3388
3394
|
return rasterio.open(file)
|
|
3389
3395
|
|
|
3390
3396
|
|
|
3391
|
-
def
|
|
3397
|
+
def _read_mask_array(self: Band | Image, **kwargs) -> np.ndarray:
|
|
3398
|
+
mask_band_id = self.masking["band_id"]
|
|
3399
|
+
mask_paths = [path for path in self._all_file_paths if mask_band_id in path]
|
|
3400
|
+
if len(mask_paths) > 1:
|
|
3401
|
+
raise ValueError(
|
|
3402
|
+
f"Multiple file_paths match mask band_id {mask_band_id} for {self.path}"
|
|
3403
|
+
)
|
|
3404
|
+
elif not mask_paths:
|
|
3405
|
+
raise ValueError(
|
|
3406
|
+
f"No file_paths match mask band_id {mask_band_id} for {self.path} among "
|
|
3407
|
+
+ str([Path(x).name for x in _ls_func(self.path)])
|
|
3408
|
+
)
|
|
3409
|
+
|
|
3410
|
+
band = Band(
|
|
3411
|
+
next(iter(mask_paths)),
|
|
3412
|
+
**{**self._common_init_kwargs, "metadata": None},
|
|
3413
|
+
)
|
|
3414
|
+
band.load(**kwargs)
|
|
3415
|
+
boolean_mask = np.isin(band.values, list(self.masking["values"]))
|
|
3416
|
+
return boolean_mask
|
|
3417
|
+
|
|
3418
|
+
|
|
3419
|
+
def _load_band(band: Band, **kwargs) -> Band:
|
|
3392
3420
|
return band.load(**kwargs)
|
|
3393
3421
|
|
|
3394
3422
|
|
|
3395
|
-
def _band_apply(band: Band, func: Callable, **kwargs) ->
|
|
3423
|
+
def _band_apply(band: Band, func: Callable, **kwargs) -> Band:
|
|
3396
3424
|
return band.apply(func, **kwargs)
|
|
3397
3425
|
|
|
3398
3426
|
|
|
3399
|
-
def _clip_band(band: Band, mask, **kwargs) ->
|
|
3427
|
+
def _clip_band(band: Band, mask, **kwargs) -> Band:
|
|
3400
3428
|
return band.clip(mask, **kwargs)
|
|
3401
3429
|
|
|
3402
3430
|
|
|
@@ -3441,126 +3469,148 @@ def array_buffer(arr: np.ndarray, distance: int) -> np.ndarray:
|
|
|
3441
3469
|
return binary_erosion(arr, structure=structure).astype(dtype)
|
|
3442
3470
|
|
|
3443
3471
|
|
|
3444
|
-
def
|
|
3472
|
+
def _plot_pixels_1d(
|
|
3473
|
+
y: np.ndarray,
|
|
3474
|
+
x: np.ndarray,
|
|
3475
|
+
alpha: float,
|
|
3476
|
+
x_var: str,
|
|
3477
|
+
y_label: str,
|
|
3478
|
+
rounding: int,
|
|
3479
|
+
figsize: tuple,
|
|
3480
|
+
first_date: pd.Timestamp,
|
|
3481
|
+
) -> None:
|
|
3482
|
+
coef, intercept = np.linalg.lstsq(
|
|
3483
|
+
np.vstack([x, np.ones(x.shape[0])]).T,
|
|
3484
|
+
y,
|
|
3485
|
+
rcond=None,
|
|
3486
|
+
)[0]
|
|
3487
|
+
predicted = np.array([intercept + coef * x for x in x])
|
|
3488
|
+
|
|
3489
|
+
predicted_start = predicted[0]
|
|
3490
|
+
predicted_end = predicted[-1]
|
|
3491
|
+
predicted_change = predicted_end - predicted_start
|
|
3492
|
+
|
|
3493
|
+
# Degrees of freedom
|
|
3494
|
+
dof = len(x) - 2
|
|
3495
|
+
|
|
3496
|
+
# 95% confidence interval
|
|
3497
|
+
t_val = stats.t.ppf(1 - alpha / 2, dof)
|
|
3498
|
+
|
|
3499
|
+
# Mean squared error of the residuals
|
|
3500
|
+
mse = np.sum((y - predicted) ** 2) / dof
|
|
3501
|
+
|
|
3502
|
+
# Calculate the standard error of predictions
|
|
3503
|
+
pred_stderr = np.sqrt(
|
|
3504
|
+
mse * (1 / len(x) + (x - np.mean(x)) ** 2 / np.sum((x - np.mean(x)) ** 2))
|
|
3505
|
+
)
|
|
3445
3506
|
|
|
3446
|
-
#
|
|
3447
|
-
|
|
3448
|
-
|
|
3449
|
-
|
|
3450
|
-
|
|
3451
|
-
|
|
3452
|
-
|
|
3453
|
-
|
|
3454
|
-
|
|
3455
|
-
|
|
3456
|
-
|
|
3457
|
-
|
|
3458
|
-
|
|
3459
|
-
|
|
3460
|
-
|
|
3461
|
-
|
|
3462
|
-
|
|
3463
|
-
|
|
3464
|
-
|
|
3465
|
-
|
|
3466
|
-
|
|
3467
|
-
|
|
3468
|
-
|
|
3469
|
-
|
|
3470
|
-
|
|
3471
|
-
[0.25, 0.0, 0.05],
|
|
3472
|
-
[0.3, 0.1, 0.1],
|
|
3473
|
-
[0.35, 0.2, 0.15],
|
|
3474
|
-
[0.4, 0.3, 0.2],
|
|
3475
|
-
[0.45, 0.4, 0.25],
|
|
3476
|
-
[0.5, 0.5, 0.3],
|
|
3477
|
-
[0.55, 0.6, 0.35],
|
|
3478
|
-
[0.7, 0.9, 0.5],
|
|
3479
|
-
]
|
|
3480
|
-
green = [
|
|
3481
|
-
[0.6, 0.6, 0.6],
|
|
3482
|
-
[0.4, 0.7, 0.4],
|
|
3483
|
-
[0.3, 0.8, 0.3],
|
|
3484
|
-
[0.25, 0.4, 0.25],
|
|
3485
|
-
[0.2, 0.5, 0.2],
|
|
3486
|
-
[0.10, 0.7, 0.10],
|
|
3487
|
-
[0, 0.9, 0],
|
|
3488
|
-
]
|
|
3507
|
+
# Calculate the confidence interval for predictions
|
|
3508
|
+
ci_lower = predicted - t_val * pred_stderr
|
|
3509
|
+
ci_upper = predicted + t_val * pred_stderr
|
|
3510
|
+
|
|
3511
|
+
fig = plt.figure(figsize=figsize)
|
|
3512
|
+
ax = fig.add_subplot(1, 1, 1)
|
|
3513
|
+
|
|
3514
|
+
ax.scatter(x, y, color="#2c93db")
|
|
3515
|
+
ax.plot(x, predicted, color="#e0436b")
|
|
3516
|
+
ax.fill_between(
|
|
3517
|
+
x,
|
|
3518
|
+
ci_lower,
|
|
3519
|
+
ci_upper,
|
|
3520
|
+
color="#e0436b",
|
|
3521
|
+
alpha=0.2,
|
|
3522
|
+
label=f"{int(alpha*100)}% CI",
|
|
3523
|
+
)
|
|
3524
|
+
plt.title(
|
|
3525
|
+
f"coef: {round(coef, int(np.log(1 / abs(coef))))}, "
|
|
3526
|
+
f"pred change: {round(predicted_change, rounding)}, "
|
|
3527
|
+
f"pred start: {round(predicted_start, rounding)}, "
|
|
3528
|
+
f"pred end: {round(predicted_end, rounding)}"
|
|
3529
|
+
)
|
|
3530
|
+
plt.xlabel(x_var)
|
|
3531
|
+
plt.ylabel(y_label)
|
|
3489
3532
|
|
|
3490
|
-
|
|
3491
|
-
|
|
3492
|
-
|
|
3493
|
-
|
|
3494
|
-
|
|
3495
|
-
|
|
3496
|
-
|
|
3497
|
-
|
|
3498
|
-
|
|
3499
|
-
|
|
3500
|
-
|
|
3501
|
-
|
|
3502
|
-
|
|
3503
|
-
|
|
3504
|
-
|
|
3505
|
-
|
|
3506
|
-
|
|
3507
|
-
|
|
3508
|
-
|
|
3509
|
-
|
|
3510
|
-
|
|
3511
|
-
|
|
3512
|
-
|
|
3513
|
-
|
|
3514
|
-
|
|
3515
|
-
|
|
3516
|
-
|
|
3517
|
-
|
|
3518
|
-
|
|
3519
|
-
|
|
3520
|
-
|
|
3521
|
-
|
|
3522
|
-
|
|
3523
|
-
|
|
3524
|
-
|
|
3525
|
-
|
|
3526
|
-
|
|
3527
|
-
|
|
3528
|
-
|
|
3529
|
-
|
|
3530
|
-
|
|
3531
|
-
|
|
3532
|
-
|
|
3533
|
-
|
|
3534
|
-
|
|
3535
|
-
|
|
3536
|
-
|
|
3537
|
-
|
|
3538
|
-
|
|
3539
|
-
|
|
3540
|
-
|
|
3541
|
-
|
|
3542
|
-
|
|
3543
|
-
|
|
3544
|
-
|
|
3545
|
-
|
|
3546
|
-
|
|
3547
|
-
|
|
3548
|
-
|
|
3549
|
-
|
|
3550
|
-
|
|
3551
|
-
|
|
3552
|
-
|
|
3553
|
-
|
|
3554
|
-
|
|
3555
|
-
|
|
3556
|
-
|
|
3557
|
-
|
|
3558
|
-
|
|
3559
|
-
|
|
3560
|
-
|
|
3561
|
-
|
|
3562
|
-
|
|
3563
|
-
|
|
3564
|
-
|
|
3533
|
+
if x_var == "date":
|
|
3534
|
+
date_labels = pd.to_datetime(
|
|
3535
|
+
[first_date + pd.Timedelta(days=int(day)) for day in x]
|
|
3536
|
+
)
|
|
3537
|
+
|
|
3538
|
+
_, unique_indices = np.unique(date_labels.strftime("%Y-%m"), return_index=True)
|
|
3539
|
+
|
|
3540
|
+
unique_x = np.array(x)[unique_indices]
|
|
3541
|
+
unique_labels = date_labels[unique_indices].strftime("%Y-%m")
|
|
3542
|
+
|
|
3543
|
+
ax.set_xticks(unique_x)
|
|
3544
|
+
ax.set_xticklabels(unique_labels, rotation=45, ha="right")
|
|
3545
|
+
|
|
3546
|
+
plt.show()
|
|
3547
|
+
|
|
3548
|
+
|
|
3549
|
+
def pixelwise(
|
|
3550
|
+
func: Callable,
|
|
3551
|
+
values: np.ndarray,
|
|
3552
|
+
mask_array: np.ndarray | None = None,
|
|
3553
|
+
index_aligned_kwargs: dict | None = None,
|
|
3554
|
+
kwargs: dict | None = None,
|
|
3555
|
+
processes: int = 1,
|
|
3556
|
+
nodata=np.nan,
|
|
3557
|
+
) -> Any:
|
|
3558
|
+
"""Run a function for each pixel of a 3d array."""
|
|
3559
|
+
index_aligned_kwargs = index_aligned_kwargs or {}
|
|
3560
|
+
kwargs = kwargs or {}
|
|
3561
|
+
|
|
3562
|
+
if mask_array is not None:
|
|
3563
|
+
not_all_missing = np.all(mask_array, axis=0) == False
|
|
3564
|
+
|
|
3565
|
+
else:
|
|
3566
|
+
mask_array = np.full(values.shape, False)
|
|
3567
|
+
not_all_missing = np.full(values.shape[1:], True)
|
|
3568
|
+
|
|
3569
|
+
nonmissing_row_indices, nonmissing_col_indices = not_all_missing.nonzero()
|
|
3570
|
+
|
|
3571
|
+
def select_pixel_values(row: int, col: int) -> np.ndarray:
|
|
3572
|
+
return values[~mask_array[:, row, col], row, col]
|
|
3573
|
+
|
|
3574
|
+
with joblib.Parallel(n_jobs=processes, backend="loky") as parallel:
|
|
3575
|
+
results: list[tuple[np.float64, np.float64]] = parallel(
|
|
3576
|
+
joblib.delayed(func)(
|
|
3577
|
+
select_pixel_values(row, col),
|
|
3578
|
+
**kwargs,
|
|
3579
|
+
**{
|
|
3580
|
+
key: value[~mask_array[:, row, col]]
|
|
3581
|
+
for key, value in index_aligned_kwargs.items()
|
|
3582
|
+
},
|
|
3583
|
+
)
|
|
3584
|
+
for row, col in (
|
|
3585
|
+
zip(nonmissing_row_indices, nonmissing_col_indices, strict=True)
|
|
3586
|
+
)
|
|
3587
|
+
)
|
|
3588
|
+
|
|
3589
|
+
if all(x is None for x in results):
|
|
3590
|
+
return
|
|
3591
|
+
|
|
3592
|
+
try:
|
|
3593
|
+
n_out_arrays = len(next(iter(results)))
|
|
3594
|
+
except TypeError:
|
|
3595
|
+
n_out_arrays = 1
|
|
3596
|
+
|
|
3597
|
+
out_arrays = tuple(np.full(values.shape[1:], nodata) for _ in range(n_out_arrays))
|
|
3598
|
+
|
|
3599
|
+
counter = 0
|
|
3600
|
+
for row, col in zip(nonmissing_row_indices, nonmissing_col_indices, strict=True):
|
|
3601
|
+
these_results = results[counter]
|
|
3602
|
+
if these_results is None:
|
|
3603
|
+
counter += 1
|
|
3604
|
+
continue
|
|
3605
|
+
for i, arr in enumerate(out_arrays):
|
|
3606
|
+
try:
|
|
3607
|
+
arr[row, col] = these_results[i]
|
|
3608
|
+
except TypeError:
|
|
3609
|
+
arr[row, col] = these_results
|
|
3610
|
+
counter += 1
|
|
3611
|
+
assert counter == len(results), (counter, len(results))
|
|
3612
|
+
|
|
3613
|
+
if len(out_arrays) == 1:
|
|
3614
|
+
return out_arrays[0]
|
|
3565
3615
|
|
|
3566
|
-
return
|
|
3616
|
+
return out_arrays
|