ssb-sgis 1.0.7__py3-none-any.whl → 1.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sgis/geopandas_tools/conversion.py +6 -5
- sgis/helpers.py +15 -5
- sgis/io/dapla_functions.py +2 -0
- sgis/io/opener.py +2 -0
- sgis/maps/explore.py +24 -14
- sgis/maps/legend.py +3 -1
- sgis/maps/map.py +4 -0
- sgis/maps/thematicmap.py +53 -26
- sgis/maps/tilesources.py +11 -29
- sgis/raster/base.py +60 -23
- sgis/raster/image_collection.py +750 -667
- sgis/raster/regex.py +2 -2
- sgis/raster/zonal.py +1 -58
- {ssb_sgis-1.0.7.dist-info → ssb_sgis-1.0.9.dist-info}/METADATA +1 -2
- {ssb_sgis-1.0.7.dist-info → ssb_sgis-1.0.9.dist-info}/RECORD +17 -17
- {ssb_sgis-1.0.7.dist-info → ssb_sgis-1.0.9.dist-info}/LICENSE +0 -0
- {ssb_sgis-1.0.7.dist-info → ssb_sgis-1.0.9.dist-info}/WHEEL +0 -0
sgis/raster/image_collection.py
CHANGED
|
@@ -6,6 +6,7 @@ import os
|
|
|
6
6
|
import random
|
|
7
7
|
import re
|
|
8
8
|
import time
|
|
9
|
+
from abc import abstractmethod
|
|
9
10
|
from collections.abc import Callable
|
|
10
11
|
from collections.abc import Iterable
|
|
11
12
|
from collections.abc import Iterator
|
|
@@ -26,7 +27,6 @@ import rasterio
|
|
|
26
27
|
from affine import Affine
|
|
27
28
|
from geopandas import GeoDataFrame
|
|
28
29
|
from geopandas import GeoSeries
|
|
29
|
-
from matplotlib.colors import LinearSegmentedColormap
|
|
30
30
|
from pandas.api.types import is_dict_like
|
|
31
31
|
from rasterio.enums import MergeAlg
|
|
32
32
|
from scipy import stats
|
|
@@ -41,11 +41,8 @@ from shapely.geometry import Polygon
|
|
|
41
41
|
|
|
42
42
|
try:
|
|
43
43
|
import dapla as dp
|
|
44
|
-
from dapla.gcs import GCSFileSystem
|
|
45
44
|
except ImportError:
|
|
46
|
-
|
|
47
|
-
class GCSFileSystem:
|
|
48
|
-
"""Placeholder."""
|
|
45
|
+
pass
|
|
49
46
|
|
|
50
47
|
|
|
51
48
|
try:
|
|
@@ -55,7 +52,7 @@ except ImportError:
|
|
|
55
52
|
class exceptions:
|
|
56
53
|
"""Placeholder."""
|
|
57
54
|
|
|
58
|
-
class RefreshError:
|
|
55
|
+
class RefreshError(Exception):
|
|
59
56
|
"""Placeholder."""
|
|
60
57
|
|
|
61
58
|
|
|
@@ -74,9 +71,9 @@ try:
|
|
|
74
71
|
except ImportError:
|
|
75
72
|
pass
|
|
76
73
|
try:
|
|
77
|
-
import xarray as xr
|
|
78
74
|
from xarray import DataArray
|
|
79
75
|
from xarray import Dataset
|
|
76
|
+
from xarray import combine_by_coords
|
|
80
77
|
except ImportError:
|
|
81
78
|
|
|
82
79
|
class DataArray:
|
|
@@ -85,6 +82,9 @@ except ImportError:
|
|
|
85
82
|
class Dataset:
|
|
86
83
|
"""Placeholder."""
|
|
87
84
|
|
|
85
|
+
def combine_by_coords(*args, **kwargs) -> None:
|
|
86
|
+
raise ImportError("xarray")
|
|
87
|
+
|
|
88
88
|
|
|
89
89
|
from ..geopandas_tools.bounds import get_total_bounds
|
|
90
90
|
from ..geopandas_tools.conversion import to_bbox
|
|
@@ -95,13 +95,17 @@ from ..geopandas_tools.general import get_common_crs
|
|
|
95
95
|
from ..helpers import _fix_path
|
|
96
96
|
from ..helpers import get_all_files
|
|
97
97
|
from ..helpers import get_numpy_func
|
|
98
|
+
from ..helpers import is_method
|
|
99
|
+
from ..helpers import is_property
|
|
98
100
|
from ..io._is_dapla import is_dapla
|
|
99
101
|
from ..io.opener import opener
|
|
100
102
|
from . import sentinel_config as config
|
|
101
103
|
from .base import _array_to_geojson
|
|
102
104
|
from .base import _gdf_to_arr
|
|
105
|
+
from .base import _get_res_from_bounds
|
|
103
106
|
from .base import _get_shape_from_bounds
|
|
104
107
|
from .base import _get_transform_from_bounds
|
|
108
|
+
from .base import _res_as_tuple
|
|
105
109
|
from .base import get_index_mapper
|
|
106
110
|
from .indices import ndvi
|
|
107
111
|
from .regex import _extract_regex_match_from_string
|
|
@@ -140,8 +144,6 @@ DATE_RANGES_TYPE = (
|
|
|
140
144
|
| tuple[tuple[str | pd.Timestamp | None, str | pd.Timestamp | None], ...]
|
|
141
145
|
)
|
|
142
146
|
|
|
143
|
-
FILENAME_COL_SUFFIX = "_filename"
|
|
144
|
-
|
|
145
147
|
DEFAULT_FILENAME_REGEX = r"""
|
|
146
148
|
.*?
|
|
147
149
|
(?:_?(?P<date>\d{8}(?:T\d{6})?))? # Optional underscore and date group
|
|
@@ -161,12 +163,12 @@ ALLOWED_INIT_KWARGS = [
|
|
|
161
163
|
"filename_regexes",
|
|
162
164
|
"all_bands",
|
|
163
165
|
"crs",
|
|
164
|
-
"backend",
|
|
165
166
|
"masking",
|
|
166
167
|
"_merged",
|
|
168
|
+
"date",
|
|
167
169
|
]
|
|
168
170
|
|
|
169
|
-
|
|
171
|
+
_LOAD_COUNTER: int = 0
|
|
170
172
|
|
|
171
173
|
|
|
172
174
|
def _get_child_paths_threaded(data: Sequence[str]) -> set[str]:
|
|
@@ -193,7 +195,7 @@ class ImageCollectionGroupBy:
|
|
|
193
195
|
Args:
|
|
194
196
|
data: Iterable of group values and ImageCollection groups.
|
|
195
197
|
by: list of group attributes.
|
|
196
|
-
collection: ImageCollection
|
|
198
|
+
collection: Ungrouped ImageCollection. Used to pass attributes to outputs.
|
|
197
199
|
"""
|
|
198
200
|
self.data = list(data)
|
|
199
201
|
self.by = by
|
|
@@ -288,7 +290,7 @@ class ImageCollectionGroupBy:
|
|
|
288
290
|
|
|
289
291
|
def __repr__(self) -> str:
|
|
290
292
|
"""String representation."""
|
|
291
|
-
return f"{self.__class__.__name__}({len(self)})"
|
|
293
|
+
return f"{self.__class__.__name__}({len(self)}, by={self.by})"
|
|
292
294
|
|
|
293
295
|
|
|
294
296
|
@dataclass(frozen=True)
|
|
@@ -304,7 +306,11 @@ class BandMasking:
|
|
|
304
306
|
|
|
305
307
|
|
|
306
308
|
class None_:
|
|
307
|
-
"""Default
|
|
309
|
+
"""Default None for args that are not allowed to be None."""
|
|
310
|
+
|
|
311
|
+
def __new__(cls) -> None:
|
|
312
|
+
"""Always returns None."""
|
|
313
|
+
return None
|
|
308
314
|
|
|
309
315
|
|
|
310
316
|
class _ImageBase:
|
|
@@ -315,58 +321,71 @@ class _ImageBase:
|
|
|
315
321
|
|
|
316
322
|
def __init__(self, *, metadata=None, bbox=None, **kwargs) -> None:
|
|
317
323
|
|
|
318
|
-
self._mask = None
|
|
319
324
|
self._bounds = None
|
|
320
|
-
self._merged = False
|
|
321
|
-
self._from_array = False
|
|
322
|
-
self._from_gdf = False
|
|
323
|
-
self.metadata_attributes = self.metadata_attributes or {}
|
|
324
325
|
self._path = None
|
|
325
|
-
self._metadata_from_xml = False
|
|
326
|
-
|
|
327
326
|
self._bbox = to_bbox(bbox) if bbox is not None else None
|
|
328
327
|
|
|
329
|
-
self.
|
|
328
|
+
self.metadata_attributes = self.metadata_attributes or {}
|
|
330
329
|
|
|
331
|
-
if
|
|
332
|
-
|
|
333
|
-
self.filename_regexes = (self.filename_regexes,)
|
|
334
|
-
self.filename_patterns = [
|
|
335
|
-
re.compile(regexes, flags=re.VERBOSE)
|
|
336
|
-
for regexes in self.filename_regexes
|
|
337
|
-
]
|
|
330
|
+
if metadata is not None:
|
|
331
|
+
self.metadata = self._metadata_to_nested_dict(metadata)
|
|
338
332
|
else:
|
|
339
|
-
self.
|
|
333
|
+
self.metadata = {}
|
|
340
334
|
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
self.image_regexes = (self.image_regexes,)
|
|
344
|
-
self.image_patterns = [
|
|
345
|
-
re.compile(regexes, flags=re.VERBOSE) for regexes in self.image_regexes
|
|
346
|
-
]
|
|
347
|
-
else:
|
|
348
|
-
self.image_patterns = ()
|
|
335
|
+
self.image_patterns = self._compile_regexes("image_regexes")
|
|
336
|
+
self.filename_patterns = self._compile_regexes("filename_regexes")
|
|
349
337
|
|
|
350
338
|
for key, value in kwargs.items():
|
|
339
|
+
error_obj = ValueError(
|
|
340
|
+
f"{self.__class__.__name__} got an unexpected keyword argument '{key}'"
|
|
341
|
+
)
|
|
351
342
|
if key in ALLOWED_INIT_KWARGS and key in dir(self):
|
|
352
|
-
|
|
343
|
+
self._safe_setattr(key, value, error_obj)
|
|
353
344
|
else:
|
|
354
|
-
raise
|
|
355
|
-
|
|
356
|
-
|
|
345
|
+
raise error_obj
|
|
346
|
+
|
|
347
|
+
# attributes for debugging
|
|
348
|
+
self._metadata_from_xml = False
|
|
349
|
+
self._merged = False
|
|
350
|
+
self._from_array = False
|
|
351
|
+
self._from_geopandas = False
|
|
352
|
+
|
|
353
|
+
def _safe_setattr(
|
|
354
|
+
self, key: str, value: Any, error_obj: Exception | None = None
|
|
355
|
+
) -> None:
|
|
356
|
+
if is_property(self, key):
|
|
357
|
+
setattr(self, f"_{key}", value)
|
|
358
|
+
elif is_method(self, key):
|
|
359
|
+
if error_obj is None:
|
|
360
|
+
raise AttributeError(f"Cannot set method '{key}'.")
|
|
361
|
+
raise error_obj
|
|
362
|
+
else:
|
|
363
|
+
setattr(self, key, value)
|
|
364
|
+
|
|
365
|
+
def _compile_regexes(self, regex_attr: str) -> tuple[re.Pattern]:
|
|
366
|
+
regexes: tuple[str] | str = getattr(self, regex_attr)
|
|
367
|
+
if not regexes:
|
|
368
|
+
return ()
|
|
369
|
+
if isinstance(regexes, str):
|
|
370
|
+
regexes = (regexes,)
|
|
371
|
+
return tuple(re.compile(regexes, flags=re.VERBOSE) for regexes in regexes)
|
|
357
372
|
|
|
358
373
|
@staticmethod
|
|
359
374
|
def _metadata_to_nested_dict(
|
|
360
375
|
metadata: str | Path | os.PathLike | dict | pd.DataFrame | None,
|
|
361
|
-
) -> dict[str, dict[str, Any]]
|
|
362
|
-
|
|
363
|
-
|
|
376
|
+
) -> dict[str, dict[str, Any]]:
|
|
377
|
+
"""Construct metadata dict from dictlike, DataFrame or file path.
|
|
378
|
+
|
|
379
|
+
Extract metadata value:
|
|
380
|
+
>>> self.metadata[self.path]['cloud_cover_percentage']
|
|
381
|
+
"""
|
|
364
382
|
if isinstance(metadata, (str | Path | os.PathLike)):
|
|
365
383
|
metadata = _read_parquet_func(metadata)
|
|
366
384
|
|
|
367
385
|
if isinstance(metadata, pd.DataFrame):
|
|
368
386
|
|
|
369
387
|
def is_scalar(x) -> bool:
|
|
388
|
+
"""Check if scalar because 'truth value of Series is ambigous'."""
|
|
370
389
|
return not hasattr(x, "__len__") or len(x) <= 1
|
|
371
390
|
|
|
372
391
|
def na_to_none(x) -> None:
|
|
@@ -374,15 +393,16 @@ class _ImageBase:
|
|
|
374
393
|
return x if not (is_scalar(x) and pd.isna(x)) else None
|
|
375
394
|
|
|
376
395
|
# to nested dict because pandas indexing gives rare KeyError with long strings
|
|
377
|
-
|
|
396
|
+
return {
|
|
378
397
|
_fix_path(path): {
|
|
379
398
|
attr: na_to_none(value) for attr, value in row.items()
|
|
380
399
|
}
|
|
381
400
|
for path, row in metadata.iterrows()
|
|
382
401
|
}
|
|
383
402
|
elif is_dict_like(metadata):
|
|
384
|
-
|
|
403
|
+
return {_fix_path(path): value for path, value in metadata.items()}
|
|
385
404
|
|
|
405
|
+
# try to allow custom types with dict-like indexing
|
|
386
406
|
return metadata
|
|
387
407
|
|
|
388
408
|
@property
|
|
@@ -392,7 +412,6 @@ class _ImageBase:
|
|
|
392
412
|
"res": self.res,
|
|
393
413
|
"bbox": self._bbox,
|
|
394
414
|
"nodata": self.nodata,
|
|
395
|
-
"backend": self.backend,
|
|
396
415
|
"metadata": self.metadata,
|
|
397
416
|
}
|
|
398
417
|
|
|
@@ -406,19 +425,22 @@ class _ImageBase:
|
|
|
406
425
|
@property
|
|
407
426
|
def res(self) -> int:
|
|
408
427
|
"""Pixel resolution."""
|
|
428
|
+
# if self._res is None:
|
|
429
|
+
# if self.has_array:
|
|
430
|
+
# self._res = _get_res_from_bounds(self.bounds, self.values.shape)
|
|
431
|
+
# else:
|
|
432
|
+
# with opener(self.path) as file:
|
|
433
|
+
# with rasterio.open(file) as src:
|
|
434
|
+
# self._res = src.res
|
|
409
435
|
return self._res
|
|
410
436
|
|
|
411
|
-
@
|
|
412
|
-
def
|
|
413
|
-
|
|
414
|
-
return self.union_all().centroid
|
|
437
|
+
@abstractmethod
|
|
438
|
+
def union_all(self) -> Polygon | MultiPolygon:
|
|
439
|
+
pass
|
|
415
440
|
|
|
416
441
|
def assign(self, **kwargs) -> "_ImageBase":
|
|
417
442
|
for key, value in kwargs.items():
|
|
418
|
-
|
|
419
|
-
setattr(self, key, value)
|
|
420
|
-
except AttributeError:
|
|
421
|
-
setattr(self, f"_{key}", value)
|
|
443
|
+
self._safe_setattr(key, value)
|
|
422
444
|
return self
|
|
423
445
|
|
|
424
446
|
def _name_regex_searcher(
|
|
@@ -449,7 +471,10 @@ class _ImageBase:
|
|
|
449
471
|
)
|
|
450
472
|
|
|
451
473
|
def _create_metadata_df(self, file_paths: Sequence[str]) -> pd.DataFrame:
|
|
452
|
-
"""Create a dataframe with file paths and image paths that match regexes.
|
|
474
|
+
"""Create a dataframe with file paths and image paths that match regexes.
|
|
475
|
+
|
|
476
|
+
Used in __init__ to select relevant paths fast.
|
|
477
|
+
"""
|
|
453
478
|
df = pd.DataFrame({"file_path": list(file_paths)})
|
|
454
479
|
|
|
455
480
|
df["file_name"] = df["file_path"].apply(lambda x: Path(x).name)
|
|
@@ -516,12 +541,14 @@ class _ImageBase:
|
|
|
516
541
|
class _ImageBandBase(_ImageBase):
|
|
517
542
|
"""Common parent class of Image and Band."""
|
|
518
543
|
|
|
519
|
-
def intersects(
|
|
520
|
-
|
|
521
|
-
|
|
544
|
+
def intersects(
|
|
545
|
+
self, geometry: GeoDataFrame | GeoSeries | Geometry | tuple | _ImageBase
|
|
546
|
+
) -> bool:
|
|
547
|
+
if hasattr(geometry, "crs") and not pyproj.CRS(self.crs).equals(
|
|
548
|
+
pyproj.CRS(geometry.crs)
|
|
522
549
|
):
|
|
523
|
-
raise ValueError(f"crs mismatch: {self.crs} and {
|
|
524
|
-
return self.union_all().intersects(to_shapely(
|
|
550
|
+
raise ValueError(f"crs mismatch: {self.crs} and {geometry.crs}")
|
|
551
|
+
return self.union_all().intersects(to_shapely(geometry))
|
|
525
552
|
|
|
526
553
|
def union_all(self) -> Polygon:
|
|
527
554
|
try:
|
|
@@ -530,20 +557,21 @@ class _ImageBandBase(_ImageBase):
|
|
|
530
557
|
return Polygon()
|
|
531
558
|
|
|
532
559
|
@property
|
|
533
|
-
def
|
|
534
|
-
|
|
560
|
+
def centroid(self) -> Point:
|
|
561
|
+
"""Centerpoint of the object."""
|
|
562
|
+
return self.union_all().centroid
|
|
535
563
|
|
|
536
564
|
@property
|
|
537
565
|
def year(self) -> str:
|
|
538
566
|
if hasattr(self, "_year") and self._year:
|
|
539
567
|
return self._year
|
|
540
|
-
return self.date[:4]
|
|
568
|
+
return str(self.date)[:4]
|
|
541
569
|
|
|
542
570
|
@property
|
|
543
571
|
def month(self) -> str:
|
|
544
572
|
if hasattr(self, "_month") and self._month:
|
|
545
573
|
return self._month
|
|
546
|
-
return
|
|
574
|
+
return str(self.date).replace("-", "").replace("/", "")[4:6]
|
|
547
575
|
|
|
548
576
|
@property
|
|
549
577
|
def name(self) -> str | None:
|
|
@@ -570,24 +598,25 @@ class _ImageBandBase(_ImageBase):
|
|
|
570
598
|
return self._name_regex_searcher("level", self.image_patterns)
|
|
571
599
|
|
|
572
600
|
def _get_metadata_attributes(self, metadata_attributes: dict) -> dict:
|
|
573
|
-
|
|
601
|
+
"""Search through xml files for missing metadata attributes."""
|
|
574
602
|
self._metadata_from_xml = True
|
|
575
603
|
|
|
576
604
|
missing_metadata_attributes = {
|
|
577
|
-
|
|
578
|
-
for
|
|
579
|
-
if not hasattr(self,
|
|
605
|
+
attr: constructor_func
|
|
606
|
+
for attr, constructor_func in metadata_attributes.items()
|
|
607
|
+
if not hasattr(self, attr) or getattr(self, attr) is None
|
|
580
608
|
}
|
|
581
609
|
|
|
582
610
|
nonmissing_metadata_attributes = {
|
|
583
|
-
|
|
584
|
-
for
|
|
585
|
-
if
|
|
611
|
+
attr: getattr(self, attr)
|
|
612
|
+
for attr in metadata_attributes
|
|
613
|
+
if attr not in missing_metadata_attributes
|
|
586
614
|
}
|
|
587
615
|
|
|
588
616
|
if not missing_metadata_attributes:
|
|
589
617
|
return nonmissing_metadata_attributes
|
|
590
618
|
|
|
619
|
+
# read all xml content once
|
|
591
620
|
file_contents: list[str] = []
|
|
592
621
|
for path in self._all_file_paths:
|
|
593
622
|
if ".xml" not in path:
|
|
@@ -595,48 +624,63 @@ class _ImageBandBase(_ImageBase):
|
|
|
595
624
|
with _open_func(path, "rb") as file:
|
|
596
625
|
file_contents.append(file.read().decode("utf-8"))
|
|
597
626
|
|
|
598
|
-
|
|
627
|
+
def is_last_xml(i: int) -> bool:
|
|
628
|
+
return i == len(file_contents) - 1
|
|
629
|
+
|
|
630
|
+
for attr, value in missing_metadata_attributes.items():
|
|
599
631
|
results = None
|
|
600
|
-
for i,
|
|
632
|
+
for i, file_content in enumerate(file_contents):
|
|
601
633
|
if isinstance(value, str) and value in dir(self):
|
|
602
|
-
method
|
|
634
|
+
# method or a hardcoded value
|
|
635
|
+
value: Callable | Any = getattr(self, value)
|
|
636
|
+
|
|
637
|
+
if callable(value):
|
|
603
638
|
try:
|
|
604
|
-
results =
|
|
639
|
+
results = value(file_content)
|
|
605
640
|
except _RegexError as e:
|
|
606
|
-
if i
|
|
607
|
-
raise e
|
|
641
|
+
if is_last_xml(i):
|
|
642
|
+
raise e.__class__(self.path, e) from e
|
|
608
643
|
continue
|
|
609
644
|
if results is not None:
|
|
610
645
|
break
|
|
611
|
-
|
|
612
|
-
|
|
646
|
+
elif (
|
|
647
|
+
isinstance(value, str)
|
|
648
|
+
or hasattr(value, "__iter__")
|
|
649
|
+
and all(isinstance(x, str | re.Pattern) for x in value)
|
|
650
|
+
):
|
|
613
651
|
try:
|
|
614
|
-
results = value
|
|
652
|
+
results = _extract_regex_match_from_string(file_content, value)
|
|
615
653
|
except _RegexError as e:
|
|
616
|
-
if i
|
|
654
|
+
if is_last_xml(i):
|
|
617
655
|
raise e
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
656
|
+
elif value is not None:
|
|
657
|
+
results = value
|
|
658
|
+
break
|
|
621
659
|
|
|
622
|
-
|
|
623
|
-
results = _extract_regex_match_from_string(filetext, value)
|
|
624
|
-
except _RegexError as e:
|
|
625
|
-
if i == len(self._all_file_paths) - 1:
|
|
626
|
-
raise e
|
|
627
|
-
|
|
628
|
-
missing_metadata_attributes[key] = results
|
|
660
|
+
missing_metadata_attributes[attr] = results
|
|
629
661
|
|
|
630
662
|
return missing_metadata_attributes | nonmissing_metadata_attributes
|
|
631
663
|
|
|
632
664
|
def _to_xarray(self, array: np.ndarray, transform: Affine) -> DataArray:
|
|
633
665
|
"""Convert the raster to an xarray.DataArray."""
|
|
666
|
+
attrs = {"crs": self.crs}
|
|
667
|
+
for attr in set(self.metadata_attributes).union({"date"}):
|
|
668
|
+
try:
|
|
669
|
+
attrs[attr] = getattr(self, attr)
|
|
670
|
+
except Exception:
|
|
671
|
+
pass
|
|
672
|
+
|
|
634
673
|
if len(array.shape) == 2:
|
|
635
674
|
height, width = array.shape
|
|
636
675
|
dims = ["y", "x"]
|
|
637
676
|
elif len(array.shape) == 3:
|
|
638
677
|
height, width = array.shape[1:]
|
|
639
678
|
dims = ["band", "y", "x"]
|
|
679
|
+
elif not any(dim for dim in array.shape):
|
|
680
|
+
DataArray(
|
|
681
|
+
name=self.name or self.__class__.__name__,
|
|
682
|
+
attrs=attrs,
|
|
683
|
+
)
|
|
640
684
|
else:
|
|
641
685
|
raise ValueError(
|
|
642
686
|
f"Array should be 2 or 3 dimensional. Got shape {array.shape}"
|
|
@@ -644,13 +688,6 @@ class _ImageBandBase(_ImageBase):
|
|
|
644
688
|
|
|
645
689
|
coords = _generate_spatial_coords(transform, width, height)
|
|
646
690
|
|
|
647
|
-
attrs = {"crs": self.crs}
|
|
648
|
-
for attr in set(self.metadata_attributes).union({"date"}):
|
|
649
|
-
try:
|
|
650
|
-
attrs[attr] = getattr(self, attr)
|
|
651
|
-
except Exception:
|
|
652
|
-
pass
|
|
653
|
-
|
|
654
691
|
return DataArray(
|
|
655
692
|
array,
|
|
656
693
|
coords=coords,
|
|
@@ -664,14 +701,15 @@ class Band(_ImageBandBase):
|
|
|
664
701
|
"""Band holding a single 2 dimensional array representing an image band."""
|
|
665
702
|
|
|
666
703
|
cmap: ClassVar[str | None] = None
|
|
667
|
-
backend: str = "numpy"
|
|
668
704
|
|
|
669
705
|
@classmethod
|
|
670
|
-
def
|
|
706
|
+
def from_geopandas(
|
|
671
707
|
cls,
|
|
672
708
|
gdf: GeoDataFrame | GeoSeries,
|
|
673
|
-
res: int,
|
|
674
709
|
*,
|
|
710
|
+
res: int | None = None,
|
|
711
|
+
out_shape: tuple[int, int] | None = None,
|
|
712
|
+
bounds: Any | None = None,
|
|
675
713
|
fill: int = 0,
|
|
676
714
|
all_touched: bool = False,
|
|
677
715
|
merge_alg: Callable = MergeAlg.replace,
|
|
@@ -680,18 +718,28 @@ class Band(_ImageBandBase):
|
|
|
680
718
|
**kwargs,
|
|
681
719
|
) -> None:
|
|
682
720
|
"""Create Band from a GeoDataFrame."""
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
res=res,
|
|
686
|
-
fill=fill,
|
|
687
|
-
all_touched=all_touched,
|
|
688
|
-
merge_alg=merge_alg,
|
|
689
|
-
default_value=default_value,
|
|
690
|
-
dtype=dtype,
|
|
691
|
-
)
|
|
721
|
+
if bounds is not None:
|
|
722
|
+
bounds = to_bbox(bounds)
|
|
692
723
|
|
|
693
|
-
|
|
694
|
-
|
|
724
|
+
if out_shape == (0,):
|
|
725
|
+
arr = np.array([])
|
|
726
|
+
else:
|
|
727
|
+
arr = _gdf_to_arr(
|
|
728
|
+
gdf,
|
|
729
|
+
res=res,
|
|
730
|
+
bounds=bounds,
|
|
731
|
+
fill=fill,
|
|
732
|
+
all_touched=all_touched,
|
|
733
|
+
merge_alg=merge_alg,
|
|
734
|
+
default_value=default_value,
|
|
735
|
+
dtype=dtype,
|
|
736
|
+
out_shape=out_shape,
|
|
737
|
+
)
|
|
738
|
+
if bounds is None:
|
|
739
|
+
bounds = gdf.total_bounds
|
|
740
|
+
|
|
741
|
+
obj = cls(arr, crs=gdf.crs, bounds=bounds, **kwargs)
|
|
742
|
+
obj._from_geopandas = True
|
|
695
743
|
return obj
|
|
696
744
|
|
|
697
745
|
def __init__(
|
|
@@ -710,9 +758,6 @@ class Band(_ImageBandBase):
|
|
|
710
758
|
**kwargs,
|
|
711
759
|
) -> None:
|
|
712
760
|
"""Band initialiser."""
|
|
713
|
-
if callable(res) and isinstance(res(), None_):
|
|
714
|
-
raise TypeError("Must specify 'res'")
|
|
715
|
-
|
|
716
761
|
if data is None:
|
|
717
762
|
# allowing 'path' to replace 'data' as argument
|
|
718
763
|
# to make the print repr. valid as initialiser
|
|
@@ -738,11 +783,20 @@ class Band(_ImageBandBase):
|
|
|
738
783
|
if isinstance(data, np.ndarray):
|
|
739
784
|
if self._bounds is None:
|
|
740
785
|
raise ValueError("Must specify bounds when data is an array.")
|
|
786
|
+
if not (res is None or (callable(res) and res() is None)):
|
|
787
|
+
# if not (res is None or (callable(res) and res() is None)) and _res_as_tuple(
|
|
788
|
+
# res
|
|
789
|
+
# ) != _get_res_from_bounds(self._bounds, data.shape):
|
|
790
|
+
raise ValueError(
|
|
791
|
+
f"Cannot specify 'res' when data is an array. {res} and {_get_res_from_bounds(self._bounds, data.shape)}"
|
|
792
|
+
)
|
|
741
793
|
self._crs = crs
|
|
742
794
|
self.transform = _get_transform_from_bounds(self._bounds, shape=data.shape)
|
|
743
795
|
self._from_array = True
|
|
744
796
|
self.values = data
|
|
745
797
|
|
|
798
|
+
self._res = _get_res_from_bounds(self._bounds, self.values.shape)
|
|
799
|
+
|
|
746
800
|
elif not isinstance(data, (str | Path | os.PathLike)):
|
|
747
801
|
raise TypeError(
|
|
748
802
|
"'data' must be string, Path-like or numpy.ndarray. "
|
|
@@ -750,8 +804,10 @@ class Band(_ImageBandBase):
|
|
|
750
804
|
)
|
|
751
805
|
else:
|
|
752
806
|
self._path = _fix_path(str(data))
|
|
807
|
+
if callable(res) and res() is None:
|
|
808
|
+
res = None
|
|
809
|
+
self._res = res
|
|
753
810
|
|
|
754
|
-
self._res = res
|
|
755
811
|
if cmap is not None:
|
|
756
812
|
self.cmap = cmap
|
|
757
813
|
self._name = name
|
|
@@ -779,7 +835,7 @@ class Band(_ImageBandBase):
|
|
|
779
835
|
else:
|
|
780
836
|
setattr(self, key, value)
|
|
781
837
|
|
|
782
|
-
elif self.metadata_attributes and self.path is not None
|
|
838
|
+
elif self.metadata_attributes and self.path is not None:
|
|
783
839
|
if self._all_file_paths is None:
|
|
784
840
|
self._all_file_paths = _get_all_file_paths(str(Path(self.path).parent))
|
|
785
841
|
for key, value in self._get_metadata_attributes(
|
|
@@ -791,43 +847,28 @@ class Band(_ImageBandBase):
|
|
|
791
847
|
"""Makes Bands sortable by band_id."""
|
|
792
848
|
return self.band_id < other.band_id
|
|
793
849
|
|
|
850
|
+
def value_counts(self) -> pd.Series:
|
|
851
|
+
"""Value count of each value of the band's array."""
|
|
852
|
+
try:
|
|
853
|
+
values = self.values.data[self.values.mask == False]
|
|
854
|
+
except AttributeError:
|
|
855
|
+
values = self.values
|
|
856
|
+
unique_values, counts = np.unique(values, return_counts=True)
|
|
857
|
+
return pd.Series(counts, index=unique_values).sort_values(ascending=False)
|
|
858
|
+
|
|
794
859
|
@property
|
|
795
860
|
def values(self) -> np.ndarray:
|
|
796
861
|
"""The numpy array, if loaded."""
|
|
797
862
|
if self._values is None:
|
|
798
|
-
raise
|
|
863
|
+
raise _ArrayNotLoadedError("array is not loaded.")
|
|
799
864
|
return self._values
|
|
800
865
|
|
|
801
866
|
@values.setter
|
|
802
867
|
def values(self, new_val):
|
|
803
|
-
if
|
|
804
|
-
self._values = new_val
|
|
805
|
-
return
|
|
806
|
-
elif self.backend == "xarray" and isinstance(new_val, DataArray):
|
|
807
|
-
# attrs can dissappear, so doing a union
|
|
808
|
-
attrs = self._values.attrs | new_val.attrs
|
|
868
|
+
if isinstance(new_val, np.ndarray):
|
|
809
869
|
self._values = new_val
|
|
810
|
-
|
|
811
|
-
return
|
|
812
|
-
|
|
813
|
-
if self.backend == "numpy":
|
|
870
|
+
else:
|
|
814
871
|
self._values = self._to_numpy(new_val)
|
|
815
|
-
if self.backend == "xarray":
|
|
816
|
-
if not isinstance(self._values, DataArray):
|
|
817
|
-
self._values = self._to_xarray(
|
|
818
|
-
new_val,
|
|
819
|
-
transform=self.transform,
|
|
820
|
-
)
|
|
821
|
-
|
|
822
|
-
elif isinstance(new_val, np.ndarray):
|
|
823
|
-
self._values.values = new_val
|
|
824
|
-
else:
|
|
825
|
-
self._values = new_val
|
|
826
|
-
|
|
827
|
-
@property
|
|
828
|
-
def mask(self) -> "Band":
|
|
829
|
-
"""Mask Band."""
|
|
830
|
-
return self._mask
|
|
831
872
|
|
|
832
873
|
@property
|
|
833
874
|
def band_id(self) -> str:
|
|
@@ -839,12 +880,18 @@ class Band(_ImageBandBase):
|
|
|
839
880
|
@property
|
|
840
881
|
def height(self) -> int:
|
|
841
882
|
"""Pixel heigth of the image band."""
|
|
842
|
-
|
|
883
|
+
try:
|
|
884
|
+
return self.values.shape[-2]
|
|
885
|
+
except IndexError:
|
|
886
|
+
return 0
|
|
843
887
|
|
|
844
888
|
@property
|
|
845
889
|
def width(self) -> int:
|
|
846
890
|
"""Pixel width of the image band."""
|
|
847
|
-
|
|
891
|
+
try:
|
|
892
|
+
return self.values.shape[-1]
|
|
893
|
+
except IndexError:
|
|
894
|
+
return 0
|
|
848
895
|
|
|
849
896
|
@property
|
|
850
897
|
def tile(self) -> str:
|
|
@@ -892,7 +939,7 @@ class Band(_ImageBandBase):
|
|
|
892
939
|
copied = self.copy()
|
|
893
940
|
value_must_be_at_least = np.sort(np.ravel(copied.values))[-n] - (precision or 0)
|
|
894
941
|
copied._values = np.where(copied.values >= value_must_be_at_least, 1, 0)
|
|
895
|
-
df = copied.
|
|
942
|
+
df = copied.to_geopandas(column).loc[lambda x: x[column] == 1]
|
|
896
943
|
df[column] = f"largest_{n}"
|
|
897
944
|
return df
|
|
898
945
|
|
|
@@ -903,30 +950,44 @@ class Band(_ImageBandBase):
|
|
|
903
950
|
copied = self.copy()
|
|
904
951
|
value_must_be_at_least = np.sort(np.ravel(copied.values))[n] - (precision or 0)
|
|
905
952
|
copied._values = np.where(copied.values <= value_must_be_at_least, 1, 0)
|
|
906
|
-
df = copied.
|
|
953
|
+
df = copied.to_geopandas(column).loc[lambda x: x[column] == 1]
|
|
907
954
|
df[column] = f"smallest_{n}"
|
|
908
955
|
return df
|
|
909
956
|
|
|
910
957
|
def clip(
|
|
911
|
-
self,
|
|
958
|
+
self,
|
|
959
|
+
mask: GeoDataFrame | GeoSeries | Polygon | MultiPolygon,
|
|
912
960
|
) -> "Band":
|
|
913
|
-
"""Clip band values to geometry mask."""
|
|
914
|
-
|
|
915
|
-
self
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
961
|
+
"""Clip band values to geometry mask while preserving bounds."""
|
|
962
|
+
if not self.height or not self.width:
|
|
963
|
+
return self
|
|
964
|
+
|
|
965
|
+
fill: int = self.nodata or 0
|
|
966
|
+
|
|
967
|
+
mask_array: np.ndarray = Band.from_geopandas(
|
|
968
|
+
gdf=to_gdf(mask)[["geometry"]],
|
|
969
|
+
default_value=1,
|
|
970
|
+
fill=fill,
|
|
971
|
+
out_shape=self.values.shape,
|
|
972
|
+
bounds=mask,
|
|
973
|
+
).values
|
|
974
|
+
|
|
975
|
+
is_not_polygon = mask_array == fill
|
|
976
|
+
|
|
977
|
+
if isinstance(self.values, np.ma.core.MaskedArray):
|
|
978
|
+
self._values.mask |= is_not_polygon
|
|
979
|
+
else:
|
|
980
|
+
self._values = np.ma.array(
|
|
981
|
+
self.values, mask=is_not_polygon, fill_value=self.nodata
|
|
982
|
+
)
|
|
983
|
+
|
|
923
984
|
return self
|
|
924
985
|
|
|
925
986
|
def load(
|
|
926
987
|
self,
|
|
927
988
|
bounds: tuple | Geometry | GeoDataFrame | GeoSeries | None = None,
|
|
928
989
|
indexes: int | tuple[int] | None = None,
|
|
929
|
-
masked: bool
|
|
990
|
+
masked: bool = True,
|
|
930
991
|
file_system=None,
|
|
931
992
|
**kwargs,
|
|
932
993
|
) -> "Band":
|
|
@@ -934,11 +995,10 @@ class Band(_ImageBandBase):
|
|
|
934
995
|
|
|
935
996
|
The array is stored in the 'values' property.
|
|
936
997
|
"""
|
|
937
|
-
global
|
|
938
|
-
|
|
998
|
+
global _LOAD_COUNTER
|
|
999
|
+
_LOAD_COUNTER += 1
|
|
939
1000
|
|
|
940
|
-
|
|
941
|
-
masked = True if self.mask is None else False
|
|
1001
|
+
_masking = kwargs.pop("_masking", self.masking)
|
|
942
1002
|
|
|
943
1003
|
bounds_was_none = bounds is None
|
|
944
1004
|
|
|
@@ -947,12 +1007,9 @@ class Band(_ImageBandBase):
|
|
|
947
1007
|
should_return_empty: bool = bounds is not None and bounds.area == 0
|
|
948
1008
|
if should_return_empty:
|
|
949
1009
|
self._values = np.array([])
|
|
950
|
-
if self.mask is not None and not self.is_mask:
|
|
951
|
-
self._mask = self._mask.load(
|
|
952
|
-
bounds=bounds, indexes=indexes, file_system=file_system
|
|
953
|
-
)
|
|
954
1010
|
self._bounds = None
|
|
955
1011
|
self.transform = None
|
|
1012
|
+
# activate setter
|
|
956
1013
|
self.values = self._values
|
|
957
1014
|
|
|
958
1015
|
return self
|
|
@@ -962,7 +1019,6 @@ class Band(_ImageBandBase):
|
|
|
962
1019
|
|
|
963
1020
|
if bounds is not None:
|
|
964
1021
|
minx, miny, maxx, maxy = to_bbox(bounds)
|
|
965
|
-
## round down/up to integer to avoid precision trouble
|
|
966
1022
|
# bounds = (int(minx), int(miny), math.ceil(maxx), math.ceil(maxy))
|
|
967
1023
|
bounds = minx, miny, maxx, maxy
|
|
968
1024
|
|
|
@@ -976,21 +1032,19 @@ class Band(_ImageBandBase):
|
|
|
976
1032
|
out_shape = kwargs.pop("out_shape", None)
|
|
977
1033
|
|
|
978
1034
|
if self.has_array and [int(x) for x in bounds] != [int(x) for x in self.bounds]:
|
|
979
|
-
print(self)
|
|
980
|
-
print(self.mask)
|
|
981
|
-
print(self.mask.values.shape)
|
|
982
|
-
print(self.values.shape)
|
|
983
|
-
print([int(x) for x in bounds], [int(x) for x in self.bounds])
|
|
984
1035
|
raise ValueError(
|
|
985
1036
|
"Cannot re-load array with different bounds. "
|
|
986
1037
|
"Use .copy() to read with different bounds. "
|
|
987
|
-
"Or .clip(mask) to clip."
|
|
1038
|
+
"Or .clip(mask) to clip.",
|
|
1039
|
+
self,
|
|
1040
|
+
self.values.shape,
|
|
1041
|
+
[int(x) for x in bounds],
|
|
1042
|
+
[int(x) for x in self.bounds],
|
|
988
1043
|
)
|
|
989
|
-
|
|
1044
|
+
|
|
990
1045
|
with opener(self.path, file_system=file_system) as f:
|
|
991
1046
|
with rasterio.open(f, nodata=self.nodata) as src:
|
|
992
|
-
self._res =
|
|
993
|
-
|
|
1047
|
+
self._res = src.res if not self.res else self.res
|
|
994
1048
|
if self.nodata is None or np.isnan(self.nodata):
|
|
995
1049
|
self.nodata = src.nodata
|
|
996
1050
|
else:
|
|
@@ -1003,7 +1057,7 @@ class Band(_ImageBandBase):
|
|
|
1003
1057
|
)
|
|
1004
1058
|
|
|
1005
1059
|
if bounds is None:
|
|
1006
|
-
if self._res !=
|
|
1060
|
+
if self._res != src.res:
|
|
1007
1061
|
if out_shape is None:
|
|
1008
1062
|
out_shape = _get_shape_from_bounds(
|
|
1009
1063
|
to_bbox(src.bounds), self.res, indexes
|
|
@@ -1055,18 +1109,12 @@ class Band(_ImageBandBase):
|
|
|
1055
1109
|
else:
|
|
1056
1110
|
values[values == src.nodata] = self.nodata
|
|
1057
1111
|
|
|
1058
|
-
if
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
elif self.mask is not None and not isinstance(values, np.ma.core.MaskedArray):
|
|
1062
|
-
|
|
1063
|
-
if not self.mask.has_array:
|
|
1064
|
-
self._mask = self.mask.load(
|
|
1065
|
-
bounds=bounds, indexes=indexes, out_shape=out_shape, **kwargs
|
|
1066
|
-
)
|
|
1067
|
-
mask_arr = self.mask.values
|
|
1068
|
-
|
|
1112
|
+
if _masking and not isinstance(values, np.ma.core.MaskedArray):
|
|
1113
|
+
mask_arr = _read_mask_array(self, bounds=bounds)
|
|
1069
1114
|
values = np.ma.array(values, mask=mask_arr, fill_value=self.nodata)
|
|
1115
|
+
elif _masking:
|
|
1116
|
+
mask_arr = _read_mask_array(self, bounds=bounds)
|
|
1117
|
+
values.mask |= mask_arr
|
|
1070
1118
|
|
|
1071
1119
|
if bounds is not None:
|
|
1072
1120
|
self._bounds = to_bbox(bounds)
|
|
@@ -1077,13 +1125,6 @@ class Band(_ImageBandBase):
|
|
|
1077
1125
|
|
|
1078
1126
|
return self
|
|
1079
1127
|
|
|
1080
|
-
@property
|
|
1081
|
-
def is_mask(self) -> bool:
|
|
1082
|
-
"""True if the band_id is equal to the masking band_id."""
|
|
1083
|
-
if self.masking is None:
|
|
1084
|
-
return False
|
|
1085
|
-
return self.band_id == self.masking["band_id"]
|
|
1086
|
-
|
|
1087
1128
|
@property
|
|
1088
1129
|
def has_array(self) -> bool:
|
|
1089
1130
|
"""Whether the array is loaded."""
|
|
@@ -1091,7 +1132,7 @@ class Band(_ImageBandBase):
|
|
|
1091
1132
|
if not isinstance(self.values, (np.ndarray | DataArray)):
|
|
1092
1133
|
raise ValueError()
|
|
1093
1134
|
return True
|
|
1094
|
-
except ValueError: # also catches
|
|
1135
|
+
except ValueError: # also catches _ArrayNotLoadedError
|
|
1095
1136
|
return False
|
|
1096
1137
|
|
|
1097
1138
|
def write(
|
|
@@ -1111,10 +1152,17 @@ class Band(_ImageBandBase):
|
|
|
1111
1152
|
if self.crs is None:
|
|
1112
1153
|
raise ValueError("Cannot write None crs to image.")
|
|
1113
1154
|
|
|
1155
|
+
if self.nodata:
|
|
1156
|
+
# TODO take out .data if masked?
|
|
1157
|
+
values_with_nodata = np.concatenate(
|
|
1158
|
+
[self.values.flatten(), np.array([self.nodata])]
|
|
1159
|
+
)
|
|
1160
|
+
else:
|
|
1161
|
+
values_with_nodata = self.values
|
|
1114
1162
|
profile = {
|
|
1115
1163
|
"driver": driver,
|
|
1116
1164
|
"compress": compress,
|
|
1117
|
-
"dtype": rasterio.dtypes.get_minimum_dtype(
|
|
1165
|
+
"dtype": rasterio.dtypes.get_minimum_dtype(values_with_nodata),
|
|
1118
1166
|
"crs": self.crs,
|
|
1119
1167
|
"transform": self.transform,
|
|
1120
1168
|
"nodata": self.nodata,
|
|
@@ -1123,19 +1171,18 @@ class Band(_ImageBandBase):
|
|
|
1123
1171
|
"width": self.width,
|
|
1124
1172
|
} | kwargs
|
|
1125
1173
|
|
|
1126
|
-
# with opener(path, "wb", file_system=self.file_system) as f:
|
|
1127
1174
|
with opener(path, "wb", file_system=file_system) as f:
|
|
1128
1175
|
with rasterio.open(f, "w", **profile) as dst:
|
|
1129
1176
|
|
|
1130
1177
|
if dst.nodata is None:
|
|
1131
1178
|
dst.nodata = _get_dtype_min(dst.dtypes[0])
|
|
1132
1179
|
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1180
|
+
if (
|
|
1181
|
+
isinstance(self.values, np.ma.core.MaskedArray)
|
|
1182
|
+
and dst.nodata is not None
|
|
1183
|
+
):
|
|
1184
|
+
self.values.data[np.isnan(self.values.data)] = dst.nodata
|
|
1185
|
+
self.values.data[self.values.mask] = dst.nodata
|
|
1139
1186
|
|
|
1140
1187
|
if len(self.values.shape) == 2:
|
|
1141
1188
|
dst.write(self.values, indexes=1)
|
|
@@ -1223,7 +1270,7 @@ class Band(_ImageBandBase):
|
|
|
1223
1270
|
The gradient will be 1 (1 meter up for every meter forward).
|
|
1224
1271
|
The calculation is by default done in place to save memory.
|
|
1225
1272
|
|
|
1226
|
-
>>> band.gradient()
|
|
1273
|
+
>>> band.gradient(copy=False)
|
|
1227
1274
|
>>> band.values
|
|
1228
1275
|
array([[0., 1., 1., 1., 0.],
|
|
1229
1276
|
[1., 1., 1., 1., 1.],
|
|
@@ -1284,11 +1331,13 @@ class Band(_ImageBandBase):
|
|
|
1284
1331
|
dropna=dropna,
|
|
1285
1332
|
)
|
|
1286
1333
|
|
|
1287
|
-
def
|
|
1334
|
+
def to_geopandas(self, column: str = "value", dropna: bool = True) -> GeoDataFrame:
|
|
1288
1335
|
"""Create a GeoDataFrame from the image Band.
|
|
1289
1336
|
|
|
1290
1337
|
Args:
|
|
1291
1338
|
column: Name of resulting column that holds the raster values.
|
|
1339
|
+
dropna: Whether to remove values that are NA or equal to the nodata
|
|
1340
|
+
value.
|
|
1292
1341
|
|
|
1293
1342
|
Returns:
|
|
1294
1343
|
A GeoDataFrame with a geometry column and array values.
|
|
@@ -1296,24 +1345,28 @@ class Band(_ImageBandBase):
|
|
|
1296
1345
|
if not hasattr(self, "_values"):
|
|
1297
1346
|
raise ValueError("Array is not loaded.")
|
|
1298
1347
|
|
|
1348
|
+
if isinstance(self.values, np.ma.core.MaskedArray):
|
|
1349
|
+
self.values.data[self.values.mask] = self.nodata or 0
|
|
1299
1350
|
if self.values.shape[0] == 0:
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1351
|
+
df = GeoDataFrame({"geometry": []}, crs=self.crs)
|
|
1352
|
+
else:
|
|
1353
|
+
df = GeoDataFrame(
|
|
1354
|
+
pd.DataFrame(
|
|
1355
|
+
_array_to_geojson(
|
|
1356
|
+
self.values, self.transform, processes=self.processes
|
|
1357
|
+
),
|
|
1358
|
+
columns=[column, "geometry"],
|
|
1306
1359
|
),
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1360
|
+
geometry="geometry",
|
|
1361
|
+
crs=self.crs,
|
|
1362
|
+
)
|
|
1363
|
+
|
|
1364
|
+
if dropna:
|
|
1365
|
+
return df[(df[column] != self.nodata) & (df[column].notna())]
|
|
1366
|
+
return df
|
|
1312
1367
|
|
|
1313
1368
|
def to_xarray(self) -> DataArray:
|
|
1314
1369
|
"""Convert the raster to an xarray.DataArray."""
|
|
1315
|
-
if self.backend == "xarray":
|
|
1316
|
-
return self.values
|
|
1317
1370
|
return self._to_xarray(
|
|
1318
1371
|
self.values,
|
|
1319
1372
|
transform=self.transform,
|
|
@@ -1328,24 +1381,29 @@ class Band(_ImageBandBase):
|
|
|
1328
1381
|
self, arr: np.ndarray | DataArray, masked: bool = True
|
|
1329
1382
|
) -> np.ndarray | np.ma.core.MaskedArray:
|
|
1330
1383
|
if not isinstance(arr, np.ndarray):
|
|
1384
|
+
mask_arr = None
|
|
1331
1385
|
if masked:
|
|
1332
1386
|
try:
|
|
1333
1387
|
mask_arr = arr.isnull().values
|
|
1334
1388
|
except AttributeError:
|
|
1335
|
-
|
|
1389
|
+
pass
|
|
1336
1390
|
try:
|
|
1337
1391
|
arr = arr.to_numpy()
|
|
1338
1392
|
except AttributeError:
|
|
1339
1393
|
arr = arr.values
|
|
1394
|
+
if mask_arr is not None:
|
|
1395
|
+
arr = np.ma.array(arr, mask=mask_arr, fill_value=self.nodata)
|
|
1396
|
+
|
|
1340
1397
|
if not isinstance(arr, np.ndarray):
|
|
1341
1398
|
arr = np.array(arr)
|
|
1399
|
+
|
|
1342
1400
|
if (
|
|
1343
1401
|
masked
|
|
1344
|
-
and self.mask is not None
|
|
1345
|
-
and not self.is_mask
|
|
1346
1402
|
and not isinstance(arr, np.ma.core.MaskedArray)
|
|
1403
|
+
and mask_arr is not None
|
|
1347
1404
|
):
|
|
1348
1405
|
arr = np.ma.array(arr, mask=mask_arr, fill_value=self.nodata)
|
|
1406
|
+
|
|
1349
1407
|
return arr
|
|
1350
1408
|
|
|
1351
1409
|
def __repr__(self) -> str:
|
|
@@ -1368,10 +1426,6 @@ class NDVIBand(Band):
|
|
|
1368
1426
|
|
|
1369
1427
|
cmap: str = "Greens"
|
|
1370
1428
|
|
|
1371
|
-
# @staticmethod
|
|
1372
|
-
# def get_cmap(arr: np.ndarray):
|
|
1373
|
-
# return get_cmap(arr)
|
|
1374
|
-
|
|
1375
1429
|
|
|
1376
1430
|
def median_as_int_and_minimum_dtype(arr: np.ndarray) -> np.ndarray:
|
|
1377
1431
|
arr = np.median(arr, axis=0).astype(int)
|
|
@@ -1383,12 +1437,12 @@ class Image(_ImageBandBase):
|
|
|
1383
1437
|
"""Image consisting of one or more Bands."""
|
|
1384
1438
|
|
|
1385
1439
|
band_class: ClassVar[Band] = Band
|
|
1386
|
-
backend: str = "numpy"
|
|
1387
1440
|
|
|
1388
1441
|
def __init__(
|
|
1389
1442
|
self,
|
|
1390
1443
|
data: str | Path | Sequence[Band] | None = None,
|
|
1391
|
-
res: int |
|
|
1444
|
+
res: int | None_ = None_,
|
|
1445
|
+
mask: "Band | None" = None,
|
|
1392
1446
|
processes: int = 1,
|
|
1393
1447
|
df: pd.DataFrame | None = None,
|
|
1394
1448
|
nodata: int | None = None,
|
|
@@ -1409,12 +1463,18 @@ class Image(_ImageBandBase):
|
|
|
1409
1463
|
self.processes = processes
|
|
1410
1464
|
self._crs = None
|
|
1411
1465
|
self._bands = None
|
|
1466
|
+
self._mask = mask
|
|
1467
|
+
|
|
1468
|
+
if isinstance(data, Band):
|
|
1469
|
+
data = [data]
|
|
1412
1470
|
|
|
1413
1471
|
if hasattr(data, "__iter__") and all(isinstance(x, Band) for x in data):
|
|
1414
1472
|
self._construct_image_from_bands(data, res)
|
|
1415
1473
|
return
|
|
1416
1474
|
elif not isinstance(data, (str | Path | os.PathLike)):
|
|
1417
|
-
raise TypeError(
|
|
1475
|
+
raise TypeError(
|
|
1476
|
+
f"'data' must be string, Path-like or a sequence of Band. Got {data}"
|
|
1477
|
+
)
|
|
1418
1478
|
|
|
1419
1479
|
self._res = res
|
|
1420
1480
|
self._path = _fix_path(data)
|
|
@@ -1422,7 +1482,8 @@ class Image(_ImageBandBase):
|
|
|
1422
1482
|
if all_file_paths is None and self.path:
|
|
1423
1483
|
self._all_file_paths = _get_all_file_paths(self.path)
|
|
1424
1484
|
elif self.path:
|
|
1425
|
-
|
|
1485
|
+
name = Path(self.path).name
|
|
1486
|
+
all_file_paths = {_fix_path(x) for x in all_file_paths if name in x}
|
|
1426
1487
|
self._all_file_paths = {x for x in all_file_paths if self.path in x}
|
|
1427
1488
|
else:
|
|
1428
1489
|
self._all_file_paths = None
|
|
@@ -1434,11 +1495,7 @@ class Image(_ImageBandBase):
|
|
|
1434
1495
|
|
|
1435
1496
|
df["image_path"] = df["image_path"].astype(str)
|
|
1436
1497
|
|
|
1437
|
-
cols_to_explode = [
|
|
1438
|
-
"file_path",
|
|
1439
|
-
"file_name",
|
|
1440
|
-
*[x for x in df if FILENAME_COL_SUFFIX in x],
|
|
1441
|
-
]
|
|
1498
|
+
cols_to_explode = ["file_path", "file_name"]
|
|
1442
1499
|
try:
|
|
1443
1500
|
df = df.explode(cols_to_explode, ignore_index=True)
|
|
1444
1501
|
except ValueError:
|
|
@@ -1466,20 +1523,92 @@ class Image(_ImageBandBase):
|
|
|
1466
1523
|
else:
|
|
1467
1524
|
setattr(self, key, value)
|
|
1468
1525
|
|
|
1469
|
-
|
|
1526
|
+
elif self.metadata_attributes and self.path is not None:
|
|
1470
1527
|
for key, value in self._get_metadata_attributes(
|
|
1471
1528
|
self.metadata_attributes
|
|
1472
1529
|
).items():
|
|
1473
1530
|
setattr(self, key, value)
|
|
1474
1531
|
|
|
1532
|
+
def clip(
|
|
1533
|
+
self, mask: GeoDataFrame | GeoSeries | Polygon | MultiPolygon, copy: bool = True
|
|
1534
|
+
) -> "Image":
|
|
1535
|
+
"""Clip band values to geometry mask while preserving bounds."""
|
|
1536
|
+
copied = self.copy() if copy else self
|
|
1537
|
+
|
|
1538
|
+
fill: int = self.nodata or 0
|
|
1539
|
+
|
|
1540
|
+
mask_array: np.ndarray = Band.from_geopandas(
|
|
1541
|
+
gdf=to_gdf(mask)[["geometry"]],
|
|
1542
|
+
default_value=1,
|
|
1543
|
+
fill=fill,
|
|
1544
|
+
out_shape=next(iter(self)).values.shape,
|
|
1545
|
+
bounds=self.bounds,
|
|
1546
|
+
).values
|
|
1547
|
+
|
|
1548
|
+
is_not_polygon = mask_array == fill
|
|
1549
|
+
|
|
1550
|
+
for band in copied:
|
|
1551
|
+
if isinstance(band.values, np.ma.core.MaskedArray):
|
|
1552
|
+
band._values.mask |= is_not_polygon
|
|
1553
|
+
else:
|
|
1554
|
+
band._values = np.ma.array(
|
|
1555
|
+
band.values, mask=is_not_polygon, fill_value=band.nodata
|
|
1556
|
+
)
|
|
1557
|
+
|
|
1558
|
+
return copied
|
|
1559
|
+
|
|
1560
|
+
def load(
|
|
1561
|
+
self,
|
|
1562
|
+
bounds: tuple | Geometry | GeoDataFrame | GeoSeries | None = None,
|
|
1563
|
+
indexes: int | tuple[int] | None = None,
|
|
1564
|
+
file_system=None,
|
|
1565
|
+
**kwargs,
|
|
1566
|
+
) -> "ImageCollection":
|
|
1567
|
+
"""Load all image Bands with threading."""
|
|
1568
|
+
if bounds is None and indexes is None and all(band.has_array for band in self):
|
|
1569
|
+
return self
|
|
1570
|
+
|
|
1571
|
+
if self.masking:
|
|
1572
|
+
mask_array: np.ndarray = _read_mask_array(
|
|
1573
|
+
self,
|
|
1574
|
+
bounds=bounds,
|
|
1575
|
+
indexes=indexes,
|
|
1576
|
+
file_system=file_system,
|
|
1577
|
+
**kwargs,
|
|
1578
|
+
)
|
|
1579
|
+
|
|
1580
|
+
with joblib.Parallel(n_jobs=self.processes, backend="threading") as parallel:
|
|
1581
|
+
parallel(
|
|
1582
|
+
joblib.delayed(_load_band)(
|
|
1583
|
+
band,
|
|
1584
|
+
bounds=bounds,
|
|
1585
|
+
indexes=indexes,
|
|
1586
|
+
file_system=file_system,
|
|
1587
|
+
_masking=None,
|
|
1588
|
+
**kwargs,
|
|
1589
|
+
)
|
|
1590
|
+
for band in self
|
|
1591
|
+
)
|
|
1592
|
+
|
|
1593
|
+
if self.masking:
|
|
1594
|
+
for band in self:
|
|
1595
|
+
if isinstance(band.values, np.ma.core.MaskedArray):
|
|
1596
|
+
band.values.mask |= mask_array
|
|
1597
|
+
else:
|
|
1598
|
+
band.values = np.ma.array(
|
|
1599
|
+
band.values, mask=mask_array, fill_value=self.nodata
|
|
1600
|
+
)
|
|
1601
|
+
|
|
1602
|
+
return self
|
|
1603
|
+
|
|
1475
1604
|
def _construct_image_from_bands(
|
|
1476
1605
|
self, data: Sequence[Band], res: int | None
|
|
1477
1606
|
) -> None:
|
|
1478
1607
|
self._bands = list(data)
|
|
1479
1608
|
if res is None:
|
|
1480
|
-
res =
|
|
1609
|
+
res = {band.res for band in self.bands}
|
|
1481
1610
|
if len(res) == 1:
|
|
1482
|
-
self._res = res
|
|
1611
|
+
self._res = next(iter(res))
|
|
1483
1612
|
else:
|
|
1484
1613
|
raise ValueError(f"Different resolutions for the bands: {res}")
|
|
1485
1614
|
else:
|
|
@@ -1525,8 +1654,7 @@ class Image(_ImageBandBase):
|
|
|
1525
1654
|
arr,
|
|
1526
1655
|
bounds=red.bounds,
|
|
1527
1656
|
crs=red.crs,
|
|
1528
|
-
|
|
1529
|
-
**red._common_init_kwargs,
|
|
1657
|
+
**{k: v for k, v in red._common_init_kwargs.items() if k != "res"},
|
|
1530
1658
|
)
|
|
1531
1659
|
|
|
1532
1660
|
def get_brightness(
|
|
@@ -1557,81 +1685,16 @@ class Image(_ImageBandBase):
|
|
|
1557
1685
|
brightness,
|
|
1558
1686
|
bounds=red.bounds,
|
|
1559
1687
|
crs=self.crs,
|
|
1560
|
-
|
|
1561
|
-
**self._common_init_kwargs,
|
|
1688
|
+
**{k: v for k, v in self._common_init_kwargs.items() if k != "res"},
|
|
1562
1689
|
)
|
|
1563
1690
|
|
|
1564
1691
|
def to_xarray(self) -> DataArray:
|
|
1565
1692
|
"""Convert the raster to an xarray.DataArray."""
|
|
1566
|
-
if self.backend == "xarray":
|
|
1567
|
-
return self.values
|
|
1568
|
-
|
|
1569
1693
|
return self._to_xarray(
|
|
1570
1694
|
np.array([band.values for band in self]),
|
|
1571
1695
|
transform=self[0].transform,
|
|
1572
1696
|
)
|
|
1573
1697
|
|
|
1574
|
-
@property
|
|
1575
|
-
def mask(self) -> Band | None:
|
|
1576
|
-
"""Mask Band."""
|
|
1577
|
-
if self.masking is None:
|
|
1578
|
-
return None
|
|
1579
|
-
|
|
1580
|
-
elif self._mask is not None:
|
|
1581
|
-
return self._mask
|
|
1582
|
-
|
|
1583
|
-
elif self._bands is not None and all(band.mask is not None for band in self):
|
|
1584
|
-
if len({id(band.mask) for band in self}) > 1:
|
|
1585
|
-
raise ValueError(
|
|
1586
|
-
"Image bands must have same mask.",
|
|
1587
|
-
{id(band.mask) for band in self},
|
|
1588
|
-
) # TODO
|
|
1589
|
-
self._mask = next(
|
|
1590
|
-
iter([band.mask for band in self if band.mask is not None])
|
|
1591
|
-
)
|
|
1592
|
-
return self._mask
|
|
1593
|
-
|
|
1594
|
-
mask_band_id = self.masking["band_id"]
|
|
1595
|
-
mask_paths = [path for path in self._all_file_paths if mask_band_id in path]
|
|
1596
|
-
if len(mask_paths) > 1:
|
|
1597
|
-
raise ValueError(
|
|
1598
|
-
f"Multiple file_paths match mask band_id {mask_band_id} for {self.path}"
|
|
1599
|
-
)
|
|
1600
|
-
elif not mask_paths:
|
|
1601
|
-
raise ValueError(
|
|
1602
|
-
f"No file_paths match mask band_id {mask_band_id} for {self.path} among "
|
|
1603
|
-
+ str([Path(x).name for x in _ls_func(self.path)])
|
|
1604
|
-
)
|
|
1605
|
-
|
|
1606
|
-
self._mask = self.band_class(
|
|
1607
|
-
mask_paths[0],
|
|
1608
|
-
**self._common_init_kwargs,
|
|
1609
|
-
)
|
|
1610
|
-
if self._bands is not None:
|
|
1611
|
-
for band in self:
|
|
1612
|
-
band._mask = self._mask
|
|
1613
|
-
return self._mask
|
|
1614
|
-
|
|
1615
|
-
@mask.setter
|
|
1616
|
-
def mask(self, values: Band | None) -> None:
|
|
1617
|
-
if values is None:
|
|
1618
|
-
self._mask = None
|
|
1619
|
-
for band in self:
|
|
1620
|
-
band._mask = None
|
|
1621
|
-
return
|
|
1622
|
-
if not isinstance(values, Band):
|
|
1623
|
-
raise TypeError(f"mask must be Band. Got {type(values)}")
|
|
1624
|
-
self._mask = values
|
|
1625
|
-
mask_arr = self._mask.values
|
|
1626
|
-
for band in self:
|
|
1627
|
-
band._mask = self._mask
|
|
1628
|
-
try:
|
|
1629
|
-
band.values = np.ma.array(
|
|
1630
|
-
band.values.data, mask=mask_arr, fill_value=band.nodata
|
|
1631
|
-
)
|
|
1632
|
-
except ArrayNotLoadedError:
|
|
1633
|
-
pass
|
|
1634
|
-
|
|
1635
1698
|
@property
|
|
1636
1699
|
def band_ids(self) -> list[str]:
|
|
1637
1700
|
"""The Band ids."""
|
|
@@ -1654,12 +1717,9 @@ class Image(_ImageBandBase):
|
|
|
1654
1717
|
else:
|
|
1655
1718
|
paths = self._df["file_path"]
|
|
1656
1719
|
|
|
1657
|
-
mask = self.mask
|
|
1658
|
-
|
|
1659
1720
|
self._bands = [
|
|
1660
1721
|
self.band_class(
|
|
1661
1722
|
path,
|
|
1662
|
-
mask=mask,
|
|
1663
1723
|
all_file_paths=self._all_file_paths,
|
|
1664
1724
|
**self._common_init_kwargs,
|
|
1665
1725
|
)
|
|
@@ -1750,10 +1810,10 @@ class Image(_ImageBandBase):
|
|
|
1750
1810
|
bounds.append(band.bounds)
|
|
1751
1811
|
return get_total_bounds(bounds)
|
|
1752
1812
|
|
|
1753
|
-
def
|
|
1813
|
+
def to_geopandas(self, column: str = "value") -> GeoDataFrame:
|
|
1754
1814
|
"""Convert the array to a GeoDataFrame of grid polygons and values."""
|
|
1755
1815
|
return pd.concat(
|
|
1756
|
-
[band.
|
|
1816
|
+
[band.to_geopandas(column=column) for band in self], ignore_index=True
|
|
1757
1817
|
)
|
|
1758
1818
|
|
|
1759
1819
|
def sample(
|
|
@@ -1868,13 +1928,12 @@ class ImageCollection(_ImageBase):
|
|
|
1868
1928
|
image_class: ClassVar[Image] = Image
|
|
1869
1929
|
band_class: ClassVar[Band] = Band
|
|
1870
1930
|
_metadata_attribute_collection_type: ClassVar[type] = pd.Series
|
|
1871
|
-
backend: str = "numpy"
|
|
1872
1931
|
|
|
1873
1932
|
def __init__(
|
|
1874
1933
|
self,
|
|
1875
1934
|
data: str | Path | Sequence[Image] | Sequence[str | Path],
|
|
1876
|
-
res: int,
|
|
1877
|
-
level: str | None = None_,
|
|
1935
|
+
res: int | None_ = None_,
|
|
1936
|
+
level: str | None_ | None = None_,
|
|
1878
1937
|
processes: int = 1,
|
|
1879
1938
|
metadata: str | dict | pd.DataFrame | None = None,
|
|
1880
1939
|
nodata: int | None = None,
|
|
@@ -1890,7 +1949,7 @@ class ImageCollection(_ImageBase):
|
|
|
1890
1949
|
|
|
1891
1950
|
super().__init__(metadata=metadata, **kwargs)
|
|
1892
1951
|
|
|
1893
|
-
if callable(level) and
|
|
1952
|
+
if callable(level) and level() is None:
|
|
1894
1953
|
level = None
|
|
1895
1954
|
|
|
1896
1955
|
self.nodata = nodata
|
|
@@ -1911,13 +1970,19 @@ class ImageCollection(_ImageBase):
|
|
|
1911
1970
|
elif all(isinstance(x, (str | Path | os.PathLike)) for x in data):
|
|
1912
1971
|
# adding band paths (asuming 'data' is a sequence of image paths)
|
|
1913
1972
|
try:
|
|
1914
|
-
self._all_file_paths = _get_child_paths_threaded(data) |
|
|
1973
|
+
self._all_file_paths = _get_child_paths_threaded(data) | {
|
|
1974
|
+
_fix_path(x) for x in data
|
|
1975
|
+
}
|
|
1915
1976
|
except FileNotFoundError as e:
|
|
1916
1977
|
if _from_root:
|
|
1917
1978
|
raise TypeError(
|
|
1918
|
-
"When passing 'root', 'data' must be a sequence of image names that have 'root' as parent path."
|
|
1979
|
+
"When passing 'root', 'data' must be a sequence of image file names that have 'root' as parent path."
|
|
1919
1980
|
) from e
|
|
1920
1981
|
raise e
|
|
1982
|
+
if self.level:
|
|
1983
|
+
self._all_file_paths = [
|
|
1984
|
+
path for path in self._all_file_paths if self.level in path
|
|
1985
|
+
]
|
|
1921
1986
|
self._df = self._create_metadata_df(self._all_file_paths)
|
|
1922
1987
|
return
|
|
1923
1988
|
|
|
@@ -1935,7 +2000,9 @@ class ImageCollection(_ImageBase):
|
|
|
1935
2000
|
|
|
1936
2001
|
self._df = self._create_metadata_df(self._all_file_paths)
|
|
1937
2002
|
|
|
1938
|
-
def groupby(
|
|
2003
|
+
def groupby(
|
|
2004
|
+
self, by: str | list[str], copy: bool = True, **kwargs
|
|
2005
|
+
) -> ImageCollectionGroupBy:
|
|
1939
2006
|
"""Group the Collection by Image or Band attribute(s)."""
|
|
1940
2007
|
df = pd.DataFrame(
|
|
1941
2008
|
[(i, img) for i, img in enumerate(self) for _ in img],
|
|
@@ -1962,8 +2029,10 @@ class ImageCollection(_ImageBase):
|
|
|
1962
2029
|
return ImageCollectionGroupBy(
|
|
1963
2030
|
sorted(
|
|
1964
2031
|
parallel(
|
|
1965
|
-
joblib.delayed(_copy_and_add_df_parallel)(
|
|
1966
|
-
|
|
2032
|
+
joblib.delayed(_copy_and_add_df_parallel)(
|
|
2033
|
+
group_values, group_df, self, copy
|
|
2034
|
+
)
|
|
2035
|
+
for group_values, group_df in df.groupby(by, **kwargs)
|
|
1967
2036
|
)
|
|
1968
2037
|
),
|
|
1969
2038
|
by=by,
|
|
@@ -2004,6 +2073,51 @@ class ImageCollection(_ImageBase):
|
|
|
2004
2073
|
|
|
2005
2074
|
return self
|
|
2006
2075
|
|
|
2076
|
+
def pixelwise(
|
|
2077
|
+
self,
|
|
2078
|
+
func: Callable,
|
|
2079
|
+
kwargs: dict | None = None,
|
|
2080
|
+
index_aligned_kwargs: dict | None = None,
|
|
2081
|
+
masked: bool = True,
|
|
2082
|
+
) -> np.ndarray | tuple[np.ndarray] | None:
|
|
2083
|
+
"""Run a function for each pixel.
|
|
2084
|
+
|
|
2085
|
+
The function should take a 1d array as first argument. This will be
|
|
2086
|
+
the pixel values for all bands in all images in the collection.
|
|
2087
|
+
"""
|
|
2088
|
+
values = np.array([band.values for img in self for band in img])
|
|
2089
|
+
|
|
2090
|
+
if (
|
|
2091
|
+
masked
|
|
2092
|
+
and self.nodata is not None
|
|
2093
|
+
and hasattr(next(iter(next(iter(self)))).values, "mask")
|
|
2094
|
+
):
|
|
2095
|
+
mask_array = np.array(
|
|
2096
|
+
[
|
|
2097
|
+
(band.values.mask) | (band.values.data == self.nodata)
|
|
2098
|
+
for img in self
|
|
2099
|
+
for band in img
|
|
2100
|
+
]
|
|
2101
|
+
)
|
|
2102
|
+
elif masked and self.nodata is not None:
|
|
2103
|
+
mask_array = np.array(
|
|
2104
|
+
[band.values == self.nodata for img in self for band in img]
|
|
2105
|
+
)
|
|
2106
|
+
elif masked:
|
|
2107
|
+
mask_array = np.array([band.values.mask for img in self for band in img])
|
|
2108
|
+
else:
|
|
2109
|
+
mask_array = None
|
|
2110
|
+
|
|
2111
|
+
return pixelwise(
|
|
2112
|
+
func=func,
|
|
2113
|
+
values=values,
|
|
2114
|
+
mask_array=mask_array,
|
|
2115
|
+
index_aligned_kwargs=index_aligned_kwargs,
|
|
2116
|
+
kwargs=kwargs,
|
|
2117
|
+
processes=self.processes,
|
|
2118
|
+
nodata=self.nodata or np.nan,
|
|
2119
|
+
)
|
|
2120
|
+
|
|
2007
2121
|
def get_unique_band_ids(self) -> list[str]:
|
|
2008
2122
|
"""Get a list of unique band_ids across all images."""
|
|
2009
2123
|
return list({band.band_id for img in self for band in img})
|
|
@@ -2109,8 +2223,7 @@ class ImageCollection(_ImageBase):
|
|
|
2109
2223
|
arr,
|
|
2110
2224
|
bounds=bounds,
|
|
2111
2225
|
crs=crs,
|
|
2112
|
-
|
|
2113
|
-
**self._common_init_kwargs,
|
|
2226
|
+
**{k: v for k, v in self._common_init_kwargs.items() if k != "res"},
|
|
2114
2227
|
)
|
|
2115
2228
|
|
|
2116
2229
|
band._merged = True
|
|
@@ -2183,7 +2296,7 @@ class ImageCollection(_ImageBase):
|
|
|
2183
2296
|
bounds=out_bounds,
|
|
2184
2297
|
crs=crs,
|
|
2185
2298
|
band_id=band_id,
|
|
2186
|
-
**self._common_init_kwargs,
|
|
2299
|
+
**{k: v for k, v in self._common_init_kwargs.items() if k != "res"},
|
|
2187
2300
|
)
|
|
2188
2301
|
)
|
|
2189
2302
|
|
|
@@ -2296,22 +2409,11 @@ class ImageCollection(_ImageBase):
|
|
|
2296
2409
|
):
|
|
2297
2410
|
return self
|
|
2298
2411
|
|
|
2299
|
-
# if self.processes == 1:
|
|
2300
|
-
# for img in self:
|
|
2301
|
-
# for band in img:
|
|
2302
|
-
# band.load(
|
|
2303
|
-
# bounds=bounds,
|
|
2304
|
-
# indexes=indexes,
|
|
2305
|
-
# file_system=file_system,
|
|
2306
|
-
# **kwargs,
|
|
2307
|
-
# )
|
|
2308
|
-
# return self
|
|
2309
|
-
|
|
2310
2412
|
with joblib.Parallel(n_jobs=self.processes, backend="threading") as parallel:
|
|
2311
2413
|
if self.masking:
|
|
2312
|
-
parallel(
|
|
2313
|
-
joblib.delayed(
|
|
2314
|
-
img
|
|
2414
|
+
masks: list[np.ndarray] = parallel(
|
|
2415
|
+
joblib.delayed(_read_mask_array)(
|
|
2416
|
+
img,
|
|
2315
2417
|
bounds=bounds,
|
|
2316
2418
|
indexes=indexes,
|
|
2317
2419
|
file_system=file_system,
|
|
@@ -2319,14 +2421,6 @@ class ImageCollection(_ImageBase):
|
|
|
2319
2421
|
)
|
|
2320
2422
|
for img in self
|
|
2321
2423
|
)
|
|
2322
|
-
for img in self:
|
|
2323
|
-
for band in img:
|
|
2324
|
-
band._mask = img.mask
|
|
2325
|
-
|
|
2326
|
-
# print({img.mask.has_array for img in self })
|
|
2327
|
-
# print({band.mask.has_array for img in self for band in img})
|
|
2328
|
-
|
|
2329
|
-
# with joblib.Parallel(n_jobs=self.processes, backend="threading") as parallel:
|
|
2330
2424
|
|
|
2331
2425
|
parallel(
|
|
2332
2426
|
joblib.delayed(_load_band)(
|
|
@@ -2334,34 +2428,86 @@ class ImageCollection(_ImageBase):
|
|
|
2334
2428
|
bounds=bounds,
|
|
2335
2429
|
indexes=indexes,
|
|
2336
2430
|
file_system=file_system,
|
|
2431
|
+
_masking=None,
|
|
2337
2432
|
**kwargs,
|
|
2338
2433
|
)
|
|
2339
2434
|
for img in self
|
|
2340
2435
|
for band in img
|
|
2341
2436
|
)
|
|
2342
2437
|
|
|
2438
|
+
if self.masking:
|
|
2439
|
+
for img, mask_array in zip(self, masks, strict=True):
|
|
2440
|
+
for band in img:
|
|
2441
|
+
if isinstance(band.values, np.ma.core.MaskedArray):
|
|
2442
|
+
band.values.mask |= mask_array
|
|
2443
|
+
else:
|
|
2444
|
+
band.values = np.ma.array(
|
|
2445
|
+
band.values, mask=mask_array, fill_value=self.nodata
|
|
2446
|
+
)
|
|
2447
|
+
|
|
2343
2448
|
return self
|
|
2344
2449
|
|
|
2345
2450
|
def clip(
|
|
2346
2451
|
self,
|
|
2347
2452
|
mask: Geometry | GeoDataFrame | GeoSeries,
|
|
2348
|
-
|
|
2453
|
+
dropna: bool = True,
|
|
2454
|
+
copy: bool = True,
|
|
2349
2455
|
) -> "ImageCollection":
|
|
2350
|
-
"""Clip all image Bands
|
|
2351
|
-
|
|
2352
|
-
for img in self:
|
|
2353
|
-
for band in img:
|
|
2354
|
-
band.clip(mask, **kwargs)
|
|
2355
|
-
return self
|
|
2456
|
+
"""Clip all image Bands while preserving bounds."""
|
|
2457
|
+
copied = self.copy() if copy else self
|
|
2356
2458
|
|
|
2357
|
-
|
|
2358
|
-
|
|
2359
|
-
|
|
2360
|
-
|
|
2459
|
+
copied._images = [img for img in copied if img.union_all()]
|
|
2460
|
+
|
|
2461
|
+
fill: int = self.nodata or 0
|
|
2462
|
+
|
|
2463
|
+
common_band_from_geopandas_kwargs = dict(
|
|
2464
|
+
gdf=to_gdf(mask)[["geometry"]],
|
|
2465
|
+
default_value=1,
|
|
2466
|
+
fill=fill,
|
|
2467
|
+
)
|
|
2468
|
+
|
|
2469
|
+
for img in copied:
|
|
2470
|
+
img._rounded_bounds = tuple(int(x) for x in img.bounds)
|
|
2471
|
+
|
|
2472
|
+
for bounds in {img._rounded_bounds for img in copied}:
|
|
2473
|
+
shapes = {
|
|
2474
|
+
band.values.shape
|
|
2475
|
+
for img in copied
|
|
2361
2476
|
for band in img
|
|
2362
|
-
|
|
2477
|
+
if img._rounded_bounds == bounds
|
|
2478
|
+
}
|
|
2479
|
+
if len(shapes) != 1:
|
|
2480
|
+
raise ValueError(f"Different shapes: {shapes}. For bounds {bounds}")
|
|
2363
2481
|
|
|
2364
|
-
|
|
2482
|
+
mask_array: np.ndarray = Band.from_geopandas(
|
|
2483
|
+
**common_band_from_geopandas_kwargs,
|
|
2484
|
+
out_shape=next(iter(shapes)),
|
|
2485
|
+
bounds=bounds,
|
|
2486
|
+
).values
|
|
2487
|
+
|
|
2488
|
+
is_not_polygon = mask_array == fill
|
|
2489
|
+
|
|
2490
|
+
for img in copied:
|
|
2491
|
+
if img._rounded_bounds != bounds:
|
|
2492
|
+
continue
|
|
2493
|
+
|
|
2494
|
+
for band in img:
|
|
2495
|
+
if isinstance(band.values, np.ma.core.MaskedArray):
|
|
2496
|
+
band._values.mask |= is_not_polygon
|
|
2497
|
+
else:
|
|
2498
|
+
band._values = np.ma.array(
|
|
2499
|
+
band.values, mask=is_not_polygon, fill_value=band.nodata
|
|
2500
|
+
)
|
|
2501
|
+
|
|
2502
|
+
for img in copied:
|
|
2503
|
+
del img._rounded_bounds
|
|
2504
|
+
|
|
2505
|
+
if dropna:
|
|
2506
|
+
copied.images = [
|
|
2507
|
+
img for img in copied if any(np.sum(band.values) for band in img)
|
|
2508
|
+
]
|
|
2509
|
+
|
|
2510
|
+
return copied
|
|
2365
2511
|
|
|
2366
2512
|
def _set_bbox(
|
|
2367
2513
|
self, bbox: GeoDataFrame | GeoSeries | Geometry | tuple[float]
|
|
@@ -2372,17 +2518,12 @@ class ImageCollection(_ImageBase):
|
|
|
2372
2518
|
if self._images is not None:
|
|
2373
2519
|
for img in self._images:
|
|
2374
2520
|
img._bbox = self._bbox
|
|
2375
|
-
if img.mask is not None:
|
|
2376
|
-
img.mask._bbox = self._bbox
|
|
2377
2521
|
if img.bands is None:
|
|
2378
2522
|
continue
|
|
2379
2523
|
for band in img:
|
|
2380
2524
|
band._bbox = self._bbox
|
|
2381
2525
|
bounds = box(*band._bbox).intersection(box(*band.bounds))
|
|
2382
2526
|
band._bounds = to_bbox(bounds) if not bounds.is_empty else None
|
|
2383
|
-
if band.mask is not None:
|
|
2384
|
-
band.mask._bbox = self._bbox
|
|
2385
|
-
band.mask._bounds = band._bounds
|
|
2386
2527
|
|
|
2387
2528
|
return self
|
|
2388
2529
|
|
|
@@ -2488,10 +2629,10 @@ class ImageCollection(_ImageBase):
|
|
|
2488
2629
|
**kwargs,
|
|
2489
2630
|
)
|
|
2490
2631
|
|
|
2491
|
-
return
|
|
2632
|
+
return combine_by_coords(list(xarrs.values()))
|
|
2492
2633
|
# return Dataset(xarrs)
|
|
2493
2634
|
|
|
2494
|
-
def
|
|
2635
|
+
def to_geopandas(self, column: str = "value") -> dict[str, GeoDataFrame]:
|
|
2495
2636
|
"""Convert each band in each Image to a GeoDataFrame."""
|
|
2496
2637
|
out = {}
|
|
2497
2638
|
i = 0
|
|
@@ -2501,10 +2642,13 @@ class ImageCollection(_ImageBase):
|
|
|
2501
2642
|
try:
|
|
2502
2643
|
name = band.name
|
|
2503
2644
|
except AttributeError:
|
|
2645
|
+
name = None
|
|
2646
|
+
|
|
2647
|
+
if name is None:
|
|
2504
2648
|
name = f"{self.__class__.__name__}({i})"
|
|
2505
2649
|
|
|
2506
2650
|
if name not in out:
|
|
2507
|
-
out[name] = band.
|
|
2651
|
+
out[name] = band.to_geopandas(column=column)
|
|
2508
2652
|
return out
|
|
2509
2653
|
|
|
2510
2654
|
def sample(self, n: int = 1, size: int = 500) -> "ImageCollection":
|
|
@@ -2561,10 +2705,6 @@ class ImageCollection(_ImageBase):
|
|
|
2561
2705
|
|
|
2562
2706
|
return copied
|
|
2563
2707
|
|
|
2564
|
-
def __or__(self, collection: "ImageCollection") -> "ImageCollection":
|
|
2565
|
-
"""Concatenate the collection with another collection."""
|
|
2566
|
-
return concat_image_collections([self, collection])
|
|
2567
|
-
|
|
2568
2708
|
def __iter__(self) -> Iterator[Image]:
|
|
2569
2709
|
"""Iterate over the images."""
|
|
2570
2710
|
return iter(self.images)
|
|
@@ -2574,14 +2714,16 @@ class ImageCollection(_ImageBase):
|
|
|
2574
2714
|
return len(self.images)
|
|
2575
2715
|
|
|
2576
2716
|
def __getattr__(self, attr: str) -> Any:
|
|
2577
|
-
"""Make iterable of
|
|
2717
|
+
"""Make iterable of metadata attribute."""
|
|
2578
2718
|
if attr in (self.metadata_attributes or {}):
|
|
2579
2719
|
return self._metadata_attribute_collection_type(
|
|
2580
2720
|
[getattr(img, attr) for img in self]
|
|
2581
2721
|
)
|
|
2582
2722
|
return super().__getattribute__(attr)
|
|
2583
2723
|
|
|
2584
|
-
def __getitem__(
|
|
2724
|
+
def __getitem__(
|
|
2725
|
+
self, item: int | slice | Sequence[int | bool]
|
|
2726
|
+
) -> "Image | ImageCollection":
|
|
2585
2727
|
"""Select one Image by integer index, or multiple Images by slice, list of int."""
|
|
2586
2728
|
if isinstance(item, int):
|
|
2587
2729
|
return self.images[item]
|
|
@@ -2620,14 +2762,14 @@ class ImageCollection(_ImageBase):
|
|
|
2620
2762
|
return copied
|
|
2621
2763
|
|
|
2622
2764
|
@property
|
|
2623
|
-
def
|
|
2765
|
+
def date(self) -> Any:
|
|
2624
2766
|
"""List of image dates."""
|
|
2625
|
-
return [img.date for img in self]
|
|
2767
|
+
return self._metadata_attribute_collection_type([img.date for img in self])
|
|
2626
2768
|
|
|
2627
2769
|
@property
|
|
2628
|
-
def image_paths(self) ->
|
|
2770
|
+
def image_paths(self) -> Any:
|
|
2629
2771
|
"""List of image paths."""
|
|
2630
|
-
return [img.path for img in self]
|
|
2772
|
+
return self._metadata_attribute_collection_type([img.path for img in self])
|
|
2631
2773
|
|
|
2632
2774
|
@property
|
|
2633
2775
|
def images(self) -> list["Image"]:
|
|
@@ -2645,21 +2787,6 @@ class ImageCollection(_ImageBase):
|
|
|
2645
2787
|
**self._common_init_kwargs,
|
|
2646
2788
|
)
|
|
2647
2789
|
|
|
2648
|
-
if self.masking is not None:
|
|
2649
|
-
images = []
|
|
2650
|
-
for image in self._images:
|
|
2651
|
-
# TODO why this loop?
|
|
2652
|
-
try:
|
|
2653
|
-
if not isinstance(image.mask, Band):
|
|
2654
|
-
raise ValueError()
|
|
2655
|
-
images.append(image)
|
|
2656
|
-
except ValueError as e:
|
|
2657
|
-
raise e
|
|
2658
|
-
continue
|
|
2659
|
-
self._images = images
|
|
2660
|
-
for image in self._images:
|
|
2661
|
-
image._bands = [band for band in image if band.band_id is not None]
|
|
2662
|
-
|
|
2663
2790
|
self._images = [img for img in self if len(img)]
|
|
2664
2791
|
|
|
2665
2792
|
if self._should_be_sorted:
|
|
@@ -2689,24 +2816,22 @@ class ImageCollection(_ImageBase):
|
|
|
2689
2816
|
|
|
2690
2817
|
@images.setter
|
|
2691
2818
|
def images(self, new_value: list["Image"]) -> list["Image"]:
|
|
2692
|
-
|
|
2693
|
-
if not
|
|
2819
|
+
new_value = list(new_value)
|
|
2820
|
+
if not new_value:
|
|
2821
|
+
self._images = new_value
|
|
2822
|
+
return
|
|
2823
|
+
if all(isinstance(x, Band) for x in new_value):
|
|
2824
|
+
if len(new_value) != len(self):
|
|
2825
|
+
raise ValueError("'images' must have same length as number of images.")
|
|
2826
|
+
new_images = []
|
|
2827
|
+
for i, img in enumerate(self):
|
|
2828
|
+
img._bands = [new_value[i]]
|
|
2829
|
+
new_images.append(img)
|
|
2830
|
+
self._images = new_images
|
|
2831
|
+
return
|
|
2832
|
+
if not all(isinstance(x, Image) for x in new_value):
|
|
2694
2833
|
raise TypeError("images should be a sequence of Image.")
|
|
2695
|
-
|
|
2696
|
-
def __repr__(self) -> str:
|
|
2697
|
-
"""String representation."""
|
|
2698
|
-
root = ""
|
|
2699
|
-
if self.path is not None:
|
|
2700
|
-
data = f"'{self.path}'"
|
|
2701
|
-
elif all(img.path is not None for img in self):
|
|
2702
|
-
data = [img.path for img in self]
|
|
2703
|
-
parents = {str(Path(path).parent) for path in data}
|
|
2704
|
-
if len(parents) == 1:
|
|
2705
|
-
data = [Path(path).name for path in data]
|
|
2706
|
-
root = f" root='{next(iter(parents))}',"
|
|
2707
|
-
else:
|
|
2708
|
-
data = [img for img in self]
|
|
2709
|
-
return f"{self.__class__.__name__}({data},{root} res={self.res}, level='{self.level}')"
|
|
2834
|
+
self._images = new_value
|
|
2710
2835
|
|
|
2711
2836
|
def union_all(self) -> Polygon | MultiPolygon:
|
|
2712
2837
|
"""(Multi)Polygon representing the union of all image bounds."""
|
|
@@ -2763,7 +2888,6 @@ class ImageCollection(_ImageBase):
|
|
|
2763
2888
|
if "date" in x_var and subcollection._should_be_sorted:
|
|
2764
2889
|
subcollection._images = list(sorted(subcollection._images))
|
|
2765
2890
|
|
|
2766
|
-
y = np.array([band.values for img in subcollection for band in img])
|
|
2767
2891
|
if "date" in x_var and subcollection._should_be_sorted:
|
|
2768
2892
|
x = np.array(
|
|
2769
2893
|
[
|
|
@@ -2780,120 +2904,35 @@ class ImageCollection(_ImageBase):
|
|
|
2780
2904
|
- pd.Timestamp(np.min(x))
|
|
2781
2905
|
).days
|
|
2782
2906
|
else:
|
|
2783
|
-
x = np.arange(0,
|
|
2784
|
-
|
|
2785
|
-
|
|
2786
|
-
|
|
2787
|
-
|
|
2788
|
-
|
|
2789
|
-
|
|
2790
|
-
|
|
2791
|
-
|
|
2792
|
-
|
|
2793
|
-
|
|
2794
|
-
|
|
2907
|
+
x = np.arange(0, sum(1 for img in subcollection for band in img))
|
|
2908
|
+
|
|
2909
|
+
subcollection.pixelwise(
|
|
2910
|
+
_plot_pixels_1d,
|
|
2911
|
+
kwargs=dict(
|
|
2912
|
+
alpha=alpha,
|
|
2913
|
+
x_var=x_var,
|
|
2914
|
+
y_label=y_label,
|
|
2915
|
+
rounding=rounding,
|
|
2916
|
+
first_date=first_date,
|
|
2917
|
+
figsize=figsize,
|
|
2918
|
+
),
|
|
2919
|
+
index_aligned_kwargs=dict(x=x),
|
|
2795
2920
|
)
|
|
2796
2921
|
|
|
2797
|
-
|
|
2798
|
-
|
|
2799
|
-
|
|
2800
|
-
|
|
2801
|
-
|
|
2802
|
-
|
|
2803
|
-
|
|
2804
|
-
|
|
2805
|
-
|
|
2806
|
-
|
|
2807
|
-
|
|
2808
|
-
|
|
2809
|
-
|
|
2810
|
-
|
|
2811
|
-
this_x = this_x[condition]
|
|
2812
|
-
|
|
2813
|
-
coef, intercept = np.linalg.lstsq(
|
|
2814
|
-
np.vstack([this_x, np.ones(this_x.shape[0])]).T,
|
|
2815
|
-
this_y,
|
|
2816
|
-
rcond=None,
|
|
2817
|
-
)[0]
|
|
2818
|
-
predicted = np.array([intercept + coef * x for x in this_x])
|
|
2819
|
-
|
|
2820
|
-
predicted_start = predicted[0]
|
|
2821
|
-
predicted_end = predicted[-1]
|
|
2822
|
-
predicted_change = predicted_end - predicted_start
|
|
2823
|
-
|
|
2824
|
-
# Degrees of freedom
|
|
2825
|
-
dof = len(this_x) - 2
|
|
2826
|
-
|
|
2827
|
-
# 95% confidence interval
|
|
2828
|
-
t_val = stats.t.ppf(1 - alpha / 2, dof)
|
|
2829
|
-
|
|
2830
|
-
# Mean squared error of the residuals
|
|
2831
|
-
mse = np.sum((this_y - predicted) ** 2) / dof
|
|
2832
|
-
|
|
2833
|
-
# Calculate the standard error of predictions
|
|
2834
|
-
pred_stderr = np.sqrt(
|
|
2835
|
-
mse
|
|
2836
|
-
* (
|
|
2837
|
-
1 / len(this_x)
|
|
2838
|
-
+ (this_x - np.mean(this_x)) ** 2
|
|
2839
|
-
/ np.sum((this_x - np.mean(this_x)) ** 2)
|
|
2840
|
-
)
|
|
2841
|
-
)
|
|
2842
|
-
|
|
2843
|
-
# Calculate the confidence interval for predictions
|
|
2844
|
-
ci_lower = predicted - t_val * pred_stderr
|
|
2845
|
-
ci_upper = predicted + t_val * pred_stderr
|
|
2846
|
-
|
|
2847
|
-
fig = plt.figure(figsize=figsize)
|
|
2848
|
-
ax = fig.add_subplot(1, 1, 1)
|
|
2849
|
-
|
|
2850
|
-
ax.scatter(this_x, this_y, color="#2c93db")
|
|
2851
|
-
ax.plot(this_x, predicted, color="#e0436b")
|
|
2852
|
-
ax.fill_between(
|
|
2853
|
-
this_x,
|
|
2854
|
-
ci_lower,
|
|
2855
|
-
ci_upper,
|
|
2856
|
-
color="#e0436b",
|
|
2857
|
-
alpha=0.2,
|
|
2858
|
-
label=f"{int(alpha*100)}% CI",
|
|
2859
|
-
)
|
|
2860
|
-
plt.title(
|
|
2861
|
-
f"coef: {round(coef, int(np.log(1 / abs(coef))))}, "
|
|
2862
|
-
f"pred change: {round(predicted_change, rounding)}, "
|
|
2863
|
-
f"pred start: {round(predicted_start, rounding)}, "
|
|
2864
|
-
f"pred end: {round(predicted_end, rounding)}"
|
|
2865
|
-
)
|
|
2866
|
-
plt.xlabel(x_var)
|
|
2867
|
-
plt.ylabel(y_label)
|
|
2868
|
-
|
|
2869
|
-
if x_var == "date":
|
|
2870
|
-
date_labels = pd.to_datetime(
|
|
2871
|
-
[first_date + pd.Timedelta(days=int(day)) for day in this_x]
|
|
2872
|
-
)
|
|
2873
|
-
|
|
2874
|
-
_, unique_indices = np.unique(
|
|
2875
|
-
date_labels.strftime("%Y-%m"), return_index=True
|
|
2876
|
-
)
|
|
2877
|
-
|
|
2878
|
-
unique_x = np.array(this_x)[unique_indices]
|
|
2879
|
-
unique_labels = date_labels[unique_indices].strftime("%Y-%m")
|
|
2880
|
-
|
|
2881
|
-
ax.set_xticks(unique_x)
|
|
2882
|
-
ax.set_xticklabels(unique_labels, rotation=45, ha="right")
|
|
2883
|
-
# ax.tick_params(axis="x", length=10, width=2)
|
|
2884
|
-
|
|
2885
|
-
plt.show()
|
|
2886
|
-
|
|
2887
|
-
|
|
2888
|
-
def _get_all_regex_matches(xml_file: str, regexes: tuple[str]) -> tuple[str]:
|
|
2889
|
-
for regex in regexes:
|
|
2890
|
-
try:
|
|
2891
|
-
return re.search(regex, xml_file)
|
|
2892
|
-
except (TypeError, AttributeError):
|
|
2893
|
-
continue
|
|
2894
|
-
raise ValueError(
|
|
2895
|
-
f"Could not find processing_baseline info from {regexes} in {xml_file}"
|
|
2896
|
-
)
|
|
2922
|
+
def __repr__(self) -> str:
|
|
2923
|
+
"""String representation."""
|
|
2924
|
+
root = ""
|
|
2925
|
+
if self.path is not None:
|
|
2926
|
+
data = f"'{self.path}'"
|
|
2927
|
+
elif all(img.path is not None for img in self):
|
|
2928
|
+
data = [img.path for img in self]
|
|
2929
|
+
parents = {str(Path(path).parent) for path in data}
|
|
2930
|
+
if len(parents) == 1:
|
|
2931
|
+
data = [Path(path).name for path in data]
|
|
2932
|
+
root = f" root='{next(iter(parents))}',"
|
|
2933
|
+
else:
|
|
2934
|
+
data = [img for img in self]
|
|
2935
|
+
return f"{self.__class__.__name__}({data},{root} res={self.res}, level='{self.level}')"
|
|
2897
2936
|
|
|
2898
2937
|
|
|
2899
2938
|
class Sentinel2Config:
|
|
@@ -3007,9 +3046,6 @@ class Sentinel2Band(Sentinel2Config, Band):
|
|
|
3007
3046
|
}
|
|
3008
3047
|
|
|
3009
3048
|
def _get_boa_add_offset_dict(self, xml_file: str) -> int | None:
|
|
3010
|
-
if self.is_mask:
|
|
3011
|
-
return None
|
|
3012
|
-
|
|
3013
3049
|
pat = re.compile(
|
|
3014
3050
|
r"""
|
|
3015
3051
|
<BOA_ADD_OFFSET\s*
|
|
@@ -3025,7 +3061,7 @@ class Sentinel2Band(Sentinel2Config, Band):
|
|
|
3025
3061
|
except (TypeError, AttributeError, KeyError) as e:
|
|
3026
3062
|
raise _RegexError(f"Could not find boa_add_offset info from {pat}") from e
|
|
3027
3063
|
if not matches:
|
|
3028
|
-
|
|
3064
|
+
return None
|
|
3029
3065
|
|
|
3030
3066
|
dict_ = (
|
|
3031
3067
|
pd.DataFrame(matches).set_index("band_id")["value"].astype(int).to_dict()
|
|
@@ -3088,7 +3124,7 @@ class Sentinel2Collection(Sentinel2Config, ImageCollection):
|
|
|
3088
3124
|
def __init__(self, data: str | Path | Sequence[Image], **kwargs) -> None:
|
|
3089
3125
|
"""ImageCollection with Sentinel2 specific name variables and path regexes."""
|
|
3090
3126
|
level = kwargs.get("level", None_)
|
|
3091
|
-
if callable(level) and
|
|
3127
|
+
if callable(level) and level() is None:
|
|
3092
3128
|
raise ValueError("Must specify level for Sentinel2Collection.")
|
|
3093
3129
|
super().__init__(data=data, **kwargs)
|
|
3094
3130
|
|
|
@@ -3113,10 +3149,7 @@ class Sentinel2CloudlessCollection(Sentinel2CloudlessConfig, ImageCollection):
|
|
|
3113
3149
|
|
|
3114
3150
|
|
|
3115
3151
|
def concat_image_collections(collections: Sequence[ImageCollection]) -> ImageCollection:
|
|
3116
|
-
"""
|
|
3117
|
-
|
|
3118
|
-
Same as using the union operator |.
|
|
3119
|
-
"""
|
|
3152
|
+
"""Concatenate ImageCollections."""
|
|
3120
3153
|
resolutions = {x.res for x in collections}
|
|
3121
3154
|
if len(resolutions) > 1:
|
|
3122
3155
|
raise ValueError(f"resoultion mismatch. {resolutions}")
|
|
@@ -3152,8 +3185,10 @@ def _get_gradient(band: Band, degrees: bool = False, copy: bool = True) -> Band:
|
|
|
3152
3185
|
raise ValueError("array must be 2 or 3 dimensional")
|
|
3153
3186
|
|
|
3154
3187
|
|
|
3155
|
-
def _slope_2d(array: np.ndarray, res: int, degrees: int) -> np.ndarray:
|
|
3156
|
-
|
|
3188
|
+
def _slope_2d(array: np.ndarray, res: int | tuple[int], degrees: int) -> np.ndarray:
|
|
3189
|
+
resx, resy = _res_as_tuple(res)
|
|
3190
|
+
|
|
3191
|
+
gradient_x, gradient_y = np.gradient(array, resx, resy)
|
|
3157
3192
|
|
|
3158
3193
|
gradient = abs(gradient_x) + abs(gradient_y)
|
|
3159
3194
|
|
|
@@ -3240,7 +3275,7 @@ def _get_images(
|
|
|
3240
3275
|
return images
|
|
3241
3276
|
|
|
3242
3277
|
|
|
3243
|
-
class
|
|
3278
|
+
class _ArrayNotLoadedError(ValueError):
|
|
3244
3279
|
"""Arrays are not loaded."""
|
|
3245
3280
|
|
|
3246
3281
|
|
|
@@ -3257,7 +3292,7 @@ class PathlessImageError(ValueError):
|
|
|
3257
3292
|
what = "that have been merged"
|
|
3258
3293
|
elif self.instance._from_array:
|
|
3259
3294
|
what = "from arrays"
|
|
3260
|
-
elif self.instance.
|
|
3295
|
+
elif self.instance._from_geopandas:
|
|
3261
3296
|
what = "from GeoDataFrames"
|
|
3262
3297
|
else:
|
|
3263
3298
|
raise ValueError(self.instance)
|
|
@@ -3318,18 +3353,22 @@ def _intesects(x, other) -> bool:
|
|
|
3318
3353
|
|
|
3319
3354
|
|
|
3320
3355
|
def _copy_and_add_df_parallel(
|
|
3321
|
-
|
|
3356
|
+
group_values: tuple[Any, ...],
|
|
3357
|
+
group_df: pd.DataFrame,
|
|
3358
|
+
self: ImageCollection,
|
|
3359
|
+
copy: bool,
|
|
3322
3360
|
) -> tuple[tuple[Any], ImageCollection]:
|
|
3323
|
-
copied = self.copy()
|
|
3361
|
+
copied = self.copy() if copy else self
|
|
3324
3362
|
copied.images = [
|
|
3325
|
-
img.copy()
|
|
3363
|
+
img.copy() if copy else img
|
|
3364
|
+
for img in group_df.drop_duplicates("_image_idx")["_image_instance"]
|
|
3326
3365
|
]
|
|
3327
|
-
if "band_id" in
|
|
3328
|
-
band_ids = set(
|
|
3366
|
+
if "band_id" in group_df:
|
|
3367
|
+
band_ids = set(group_df["band_id"].values)
|
|
3329
3368
|
for img in copied.images:
|
|
3330
3369
|
img._bands = [band for band in img if band.band_id in band_ids]
|
|
3331
3370
|
|
|
3332
|
-
return (
|
|
3371
|
+
return (group_values, copied)
|
|
3333
3372
|
|
|
3334
3373
|
|
|
3335
3374
|
def _get_bounds(bounds, bbox, band_bounds: Polygon) -> None | Polygon:
|
|
@@ -3355,15 +3394,37 @@ def _open_raster(path: str | Path) -> rasterio.io.DatasetReader:
|
|
|
3355
3394
|
return rasterio.open(file)
|
|
3356
3395
|
|
|
3357
3396
|
|
|
3358
|
-
def
|
|
3397
|
+
def _read_mask_array(self: Band | Image, **kwargs) -> np.ndarray:
|
|
3398
|
+
mask_band_id = self.masking["band_id"]
|
|
3399
|
+
mask_paths = [path for path in self._all_file_paths if mask_band_id in path]
|
|
3400
|
+
if len(mask_paths) > 1:
|
|
3401
|
+
raise ValueError(
|
|
3402
|
+
f"Multiple file_paths match mask band_id {mask_band_id} for {self.path}"
|
|
3403
|
+
)
|
|
3404
|
+
elif not mask_paths:
|
|
3405
|
+
raise ValueError(
|
|
3406
|
+
f"No file_paths match mask band_id {mask_band_id} for {self.path} among "
|
|
3407
|
+
+ str([Path(x).name for x in _ls_func(self.path)])
|
|
3408
|
+
)
|
|
3409
|
+
|
|
3410
|
+
band = Band(
|
|
3411
|
+
next(iter(mask_paths)),
|
|
3412
|
+
**{**self._common_init_kwargs, "metadata": None},
|
|
3413
|
+
)
|
|
3414
|
+
band.load(**kwargs)
|
|
3415
|
+
boolean_mask = np.isin(band.values, list(self.masking["values"]))
|
|
3416
|
+
return boolean_mask
|
|
3417
|
+
|
|
3418
|
+
|
|
3419
|
+
def _load_band(band: Band, **kwargs) -> Band:
|
|
3359
3420
|
return band.load(**kwargs)
|
|
3360
3421
|
|
|
3361
3422
|
|
|
3362
|
-
def _band_apply(band: Band, func: Callable, **kwargs) ->
|
|
3423
|
+
def _band_apply(band: Band, func: Callable, **kwargs) -> Band:
|
|
3363
3424
|
return band.apply(func, **kwargs)
|
|
3364
3425
|
|
|
3365
3426
|
|
|
3366
|
-
def _clip_band(band: Band, mask, **kwargs) ->
|
|
3427
|
+
def _clip_band(band: Band, mask, **kwargs) -> Band:
|
|
3367
3428
|
return band.clip(mask, **kwargs)
|
|
3368
3429
|
|
|
3369
3430
|
|
|
@@ -3408,126 +3469,148 @@ def array_buffer(arr: np.ndarray, distance: int) -> np.ndarray:
|
|
|
3408
3469
|
return binary_erosion(arr, structure=structure).astype(dtype)
|
|
3409
3470
|
|
|
3410
3471
|
|
|
3411
|
-
def
|
|
3472
|
+
def _plot_pixels_1d(
|
|
3473
|
+
y: np.ndarray,
|
|
3474
|
+
x: np.ndarray,
|
|
3475
|
+
alpha: float,
|
|
3476
|
+
x_var: str,
|
|
3477
|
+
y_label: str,
|
|
3478
|
+
rounding: int,
|
|
3479
|
+
figsize: tuple,
|
|
3480
|
+
first_date: pd.Timestamp,
|
|
3481
|
+
) -> None:
|
|
3482
|
+
coef, intercept = np.linalg.lstsq(
|
|
3483
|
+
np.vstack([x, np.ones(x.shape[0])]).T,
|
|
3484
|
+
y,
|
|
3485
|
+
rcond=None,
|
|
3486
|
+
)[0]
|
|
3487
|
+
predicted = np.array([intercept + coef * x for x in x])
|
|
3488
|
+
|
|
3489
|
+
predicted_start = predicted[0]
|
|
3490
|
+
predicted_end = predicted[-1]
|
|
3491
|
+
predicted_change = predicted_end - predicted_start
|
|
3492
|
+
|
|
3493
|
+
# Degrees of freedom
|
|
3494
|
+
dof = len(x) - 2
|
|
3495
|
+
|
|
3496
|
+
# 95% confidence interval
|
|
3497
|
+
t_val = stats.t.ppf(1 - alpha / 2, dof)
|
|
3498
|
+
|
|
3499
|
+
# Mean squared error of the residuals
|
|
3500
|
+
mse = np.sum((y - predicted) ** 2) / dof
|
|
3501
|
+
|
|
3502
|
+
# Calculate the standard error of predictions
|
|
3503
|
+
pred_stderr = np.sqrt(
|
|
3504
|
+
mse * (1 / len(x) + (x - np.mean(x)) ** 2 / np.sum((x - np.mean(x)) ** 2))
|
|
3505
|
+
)
|
|
3412
3506
|
|
|
3413
|
-
#
|
|
3414
|
-
|
|
3415
|
-
|
|
3416
|
-
|
|
3417
|
-
|
|
3418
|
-
|
|
3419
|
-
|
|
3420
|
-
|
|
3421
|
-
|
|
3422
|
-
|
|
3423
|
-
|
|
3424
|
-
|
|
3425
|
-
|
|
3426
|
-
|
|
3427
|
-
|
|
3428
|
-
|
|
3429
|
-
|
|
3430
|
-
|
|
3431
|
-
|
|
3432
|
-
|
|
3433
|
-
|
|
3434
|
-
|
|
3435
|
-
|
|
3436
|
-
|
|
3437
|
-
|
|
3438
|
-
[0.25, 0.0, 0.05],
|
|
3439
|
-
[0.3, 0.1, 0.1],
|
|
3440
|
-
[0.35, 0.2, 0.15],
|
|
3441
|
-
[0.4, 0.3, 0.2],
|
|
3442
|
-
[0.45, 0.4, 0.25],
|
|
3443
|
-
[0.5, 0.5, 0.3],
|
|
3444
|
-
[0.55, 0.6, 0.35],
|
|
3445
|
-
[0.7, 0.9, 0.5],
|
|
3446
|
-
]
|
|
3447
|
-
green = [
|
|
3448
|
-
[0.6, 0.6, 0.6],
|
|
3449
|
-
[0.4, 0.7, 0.4],
|
|
3450
|
-
[0.3, 0.8, 0.3],
|
|
3451
|
-
[0.25, 0.4, 0.25],
|
|
3452
|
-
[0.2, 0.5, 0.2],
|
|
3453
|
-
[0.10, 0.7, 0.10],
|
|
3454
|
-
[0, 0.9, 0],
|
|
3455
|
-
]
|
|
3507
|
+
# Calculate the confidence interval for predictions
|
|
3508
|
+
ci_lower = predicted - t_val * pred_stderr
|
|
3509
|
+
ci_upper = predicted + t_val * pred_stderr
|
|
3510
|
+
|
|
3511
|
+
fig = plt.figure(figsize=figsize)
|
|
3512
|
+
ax = fig.add_subplot(1, 1, 1)
|
|
3513
|
+
|
|
3514
|
+
ax.scatter(x, y, color="#2c93db")
|
|
3515
|
+
ax.plot(x, predicted, color="#e0436b")
|
|
3516
|
+
ax.fill_between(
|
|
3517
|
+
x,
|
|
3518
|
+
ci_lower,
|
|
3519
|
+
ci_upper,
|
|
3520
|
+
color="#e0436b",
|
|
3521
|
+
alpha=0.2,
|
|
3522
|
+
label=f"{int(alpha*100)}% CI",
|
|
3523
|
+
)
|
|
3524
|
+
plt.title(
|
|
3525
|
+
f"coef: {round(coef, int(np.log(1 / abs(coef))))}, "
|
|
3526
|
+
f"pred change: {round(predicted_change, rounding)}, "
|
|
3527
|
+
f"pred start: {round(predicted_start, rounding)}, "
|
|
3528
|
+
f"pred end: {round(predicted_end, rounding)}"
|
|
3529
|
+
)
|
|
3530
|
+
plt.xlabel(x_var)
|
|
3531
|
+
plt.ylabel(y_label)
|
|
3456
3532
|
|
|
3457
|
-
|
|
3458
|
-
|
|
3459
|
-
|
|
3460
|
-
|
|
3461
|
-
|
|
3462
|
-
|
|
3463
|
-
|
|
3464
|
-
|
|
3465
|
-
|
|
3466
|
-
|
|
3467
|
-
|
|
3468
|
-
|
|
3469
|
-
|
|
3470
|
-
|
|
3471
|
-
|
|
3472
|
-
|
|
3473
|
-
|
|
3474
|
-
|
|
3475
|
-
|
|
3476
|
-
|
|
3477
|
-
|
|
3478
|
-
|
|
3479
|
-
|
|
3480
|
-
|
|
3481
|
-
|
|
3482
|
-
|
|
3483
|
-
|
|
3484
|
-
|
|
3485
|
-
|
|
3486
|
-
|
|
3487
|
-
|
|
3488
|
-
|
|
3489
|
-
|
|
3490
|
-
|
|
3491
|
-
|
|
3492
|
-
|
|
3493
|
-
|
|
3494
|
-
|
|
3495
|
-
|
|
3496
|
-
|
|
3497
|
-
|
|
3498
|
-
|
|
3499
|
-
|
|
3500
|
-
|
|
3501
|
-
|
|
3502
|
-
|
|
3503
|
-
|
|
3504
|
-
|
|
3505
|
-
|
|
3506
|
-
|
|
3507
|
-
|
|
3508
|
-
|
|
3509
|
-
|
|
3510
|
-
|
|
3511
|
-
|
|
3512
|
-
|
|
3513
|
-
|
|
3514
|
-
|
|
3515
|
-
|
|
3516
|
-
|
|
3517
|
-
|
|
3518
|
-
|
|
3519
|
-
|
|
3520
|
-
|
|
3521
|
-
|
|
3522
|
-
|
|
3523
|
-
|
|
3524
|
-
|
|
3525
|
-
|
|
3526
|
-
|
|
3527
|
-
|
|
3528
|
-
|
|
3529
|
-
|
|
3530
|
-
|
|
3531
|
-
|
|
3533
|
+
if x_var == "date":
|
|
3534
|
+
date_labels = pd.to_datetime(
|
|
3535
|
+
[first_date + pd.Timedelta(days=int(day)) for day in x]
|
|
3536
|
+
)
|
|
3537
|
+
|
|
3538
|
+
_, unique_indices = np.unique(date_labels.strftime("%Y-%m"), return_index=True)
|
|
3539
|
+
|
|
3540
|
+
unique_x = np.array(x)[unique_indices]
|
|
3541
|
+
unique_labels = date_labels[unique_indices].strftime("%Y-%m")
|
|
3542
|
+
|
|
3543
|
+
ax.set_xticks(unique_x)
|
|
3544
|
+
ax.set_xticklabels(unique_labels, rotation=45, ha="right")
|
|
3545
|
+
|
|
3546
|
+
plt.show()
|
|
3547
|
+
|
|
3548
|
+
|
|
3549
|
+
def pixelwise(
|
|
3550
|
+
func: Callable,
|
|
3551
|
+
values: np.ndarray,
|
|
3552
|
+
mask_array: np.ndarray | None = None,
|
|
3553
|
+
index_aligned_kwargs: dict | None = None,
|
|
3554
|
+
kwargs: dict | None = None,
|
|
3555
|
+
processes: int = 1,
|
|
3556
|
+
nodata=np.nan,
|
|
3557
|
+
) -> Any:
|
|
3558
|
+
"""Run a function for each pixel of a 3d array."""
|
|
3559
|
+
index_aligned_kwargs = index_aligned_kwargs or {}
|
|
3560
|
+
kwargs = kwargs or {}
|
|
3561
|
+
|
|
3562
|
+
if mask_array is not None:
|
|
3563
|
+
not_all_missing = np.all(mask_array, axis=0) == False
|
|
3564
|
+
|
|
3565
|
+
else:
|
|
3566
|
+
mask_array = np.full(values.shape, False)
|
|
3567
|
+
not_all_missing = np.full(values.shape[1:], True)
|
|
3568
|
+
|
|
3569
|
+
nonmissing_row_indices, nonmissing_col_indices = not_all_missing.nonzero()
|
|
3570
|
+
|
|
3571
|
+
def select_pixel_values(row: int, col: int) -> np.ndarray:
|
|
3572
|
+
return values[~mask_array[:, row, col], row, col]
|
|
3573
|
+
|
|
3574
|
+
with joblib.Parallel(n_jobs=processes, backend="loky") as parallel:
|
|
3575
|
+
results: list[tuple[np.float64, np.float64]] = parallel(
|
|
3576
|
+
joblib.delayed(func)(
|
|
3577
|
+
select_pixel_values(row, col),
|
|
3578
|
+
**kwargs,
|
|
3579
|
+
**{
|
|
3580
|
+
key: value[~mask_array[:, row, col]]
|
|
3581
|
+
for key, value in index_aligned_kwargs.items()
|
|
3582
|
+
},
|
|
3583
|
+
)
|
|
3584
|
+
for row, col in (
|
|
3585
|
+
zip(nonmissing_row_indices, nonmissing_col_indices, strict=True)
|
|
3586
|
+
)
|
|
3587
|
+
)
|
|
3588
|
+
|
|
3589
|
+
if all(x is None for x in results):
|
|
3590
|
+
return
|
|
3591
|
+
|
|
3592
|
+
try:
|
|
3593
|
+
n_out_arrays = len(next(iter(results)))
|
|
3594
|
+
except TypeError:
|
|
3595
|
+
n_out_arrays = 1
|
|
3596
|
+
|
|
3597
|
+
out_arrays = tuple(np.full(values.shape[1:], nodata) for _ in range(n_out_arrays))
|
|
3598
|
+
|
|
3599
|
+
counter = 0
|
|
3600
|
+
for row, col in zip(nonmissing_row_indices, nonmissing_col_indices, strict=True):
|
|
3601
|
+
these_results = results[counter]
|
|
3602
|
+
if these_results is None:
|
|
3603
|
+
counter += 1
|
|
3604
|
+
continue
|
|
3605
|
+
for i, arr in enumerate(out_arrays):
|
|
3606
|
+
try:
|
|
3607
|
+
arr[row, col] = these_results[i]
|
|
3608
|
+
except TypeError:
|
|
3609
|
+
arr[row, col] = these_results
|
|
3610
|
+
counter += 1
|
|
3611
|
+
assert counter == len(results), (counter, len(results))
|
|
3612
|
+
|
|
3613
|
+
if len(out_arrays) == 1:
|
|
3614
|
+
return out_arrays[0]
|
|
3532
3615
|
|
|
3533
|
-
return
|
|
3616
|
+
return out_arrays
|