ssb-sgis 1.0.6__py3-none-any.whl → 1.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sgis/helpers.py +8 -2
- sgis/maps/explore.py +8 -29
- sgis/maps/map.py +5 -1
- sgis/raster/base.py +0 -54
- sgis/raster/image_collection.py +869 -546
- sgis/raster/indices.py +2 -5
- sgis/raster/regex.py +7 -2
- sgis/raster/sentinel_config.py +1 -71
- {ssb_sgis-1.0.6.dist-info → ssb_sgis-1.0.7.dist-info}/METADATA +1 -1
- {ssb_sgis-1.0.6.dist-info → ssb_sgis-1.0.7.dist-info}/RECORD +12 -12
- {ssb_sgis-1.0.6.dist-info → ssb_sgis-1.0.7.dist-info}/LICENSE +0 -0
- {ssb_sgis-1.0.6.dist-info → ssb_sgis-1.0.7.dist-info}/WHEEL +0 -0
sgis/raster/image_collection.py
CHANGED
|
@@ -2,7 +2,6 @@ import datetime
|
|
|
2
2
|
import functools
|
|
3
3
|
import glob
|
|
4
4
|
import itertools
|
|
5
|
-
import math
|
|
6
5
|
import os
|
|
7
6
|
import random
|
|
8
7
|
import re
|
|
@@ -11,6 +10,7 @@ from collections.abc import Callable
|
|
|
11
10
|
from collections.abc import Iterable
|
|
12
11
|
from collections.abc import Iterator
|
|
13
12
|
from collections.abc import Sequence
|
|
13
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
14
14
|
from copy import deepcopy
|
|
15
15
|
from dataclasses import dataclass
|
|
16
16
|
from pathlib import Path
|
|
@@ -27,6 +27,7 @@ from affine import Affine
|
|
|
27
27
|
from geopandas import GeoDataFrame
|
|
28
28
|
from geopandas import GeoSeries
|
|
29
29
|
from matplotlib.colors import LinearSegmentedColormap
|
|
30
|
+
from pandas.api.types import is_dict_like
|
|
30
31
|
from rasterio.enums import MergeAlg
|
|
31
32
|
from scipy import stats
|
|
32
33
|
from scipy.ndimage import binary_dilation
|
|
@@ -88,8 +89,10 @@ except ImportError:
|
|
|
88
89
|
from ..geopandas_tools.bounds import get_total_bounds
|
|
89
90
|
from ..geopandas_tools.conversion import to_bbox
|
|
90
91
|
from ..geopandas_tools.conversion import to_gdf
|
|
92
|
+
from ..geopandas_tools.conversion import to_geoseries
|
|
91
93
|
from ..geopandas_tools.conversion import to_shapely
|
|
92
94
|
from ..geopandas_tools.general import get_common_crs
|
|
95
|
+
from ..helpers import _fix_path
|
|
93
96
|
from ..helpers import get_all_files
|
|
94
97
|
from ..helpers import get_numpy_func
|
|
95
98
|
from ..io._is_dapla import is_dapla
|
|
@@ -101,7 +104,6 @@ from .base import _get_shape_from_bounds
|
|
|
101
104
|
from .base import _get_transform_from_bounds
|
|
102
105
|
from .base import get_index_mapper
|
|
103
106
|
from .indices import ndvi
|
|
104
|
-
from .regex import _any_regex_matches
|
|
105
107
|
from .regex import _extract_regex_match_from_string
|
|
106
108
|
from .regex import _get_first_group_match
|
|
107
109
|
from .regex import _get_non_optional_groups
|
|
@@ -157,14 +159,21 @@ ALLOWED_INIT_KWARGS = [
|
|
|
157
159
|
"band_class",
|
|
158
160
|
"image_regexes",
|
|
159
161
|
"filename_regexes",
|
|
160
|
-
"bounds_regexes",
|
|
161
162
|
"all_bands",
|
|
162
163
|
"crs",
|
|
164
|
+
"backend",
|
|
163
165
|
"masking",
|
|
164
166
|
"_merged",
|
|
165
|
-
"_add_metadata_attributes",
|
|
166
167
|
]
|
|
167
168
|
|
|
169
|
+
_load_counter: int = 0
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _get_child_paths_threaded(data: Sequence[str]) -> set[str]:
|
|
173
|
+
with ThreadPoolExecutor() as executor:
|
|
174
|
+
all_paths: Iterator[set[str]] = executor.map(_ls_func, data)
|
|
175
|
+
return set(itertools.chain.from_iterable(all_paths))
|
|
176
|
+
|
|
168
177
|
|
|
169
178
|
class ImageCollectionGroupBy:
|
|
170
179
|
"""Iterator and merger class returned from groupby.
|
|
@@ -216,7 +225,6 @@ class ImageCollectionGroupBy:
|
|
|
216
225
|
|
|
217
226
|
collection = ImageCollection(
|
|
218
227
|
images,
|
|
219
|
-
# TODO band_class?
|
|
220
228
|
level=self.collection.level,
|
|
221
229
|
**self.collection._common_init_kwargs,
|
|
222
230
|
)
|
|
@@ -254,7 +262,6 @@ class ImageCollectionGroupBy:
|
|
|
254
262
|
|
|
255
263
|
image = Image(
|
|
256
264
|
bands,
|
|
257
|
-
# TODO band_class?
|
|
258
265
|
**self.collection._common_init_kwargs,
|
|
259
266
|
)
|
|
260
267
|
image._merged = True
|
|
@@ -284,49 +291,20 @@ class ImageCollectionGroupBy:
|
|
|
284
291
|
return f"{self.__class__.__name__}({len(self)})"
|
|
285
292
|
|
|
286
293
|
|
|
287
|
-
def standardize_band_id(x: str) -> str:
|
|
288
|
-
return x.replace("B", "").replace("A", "").zfill(2)
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
class BandIdDict(dict):
|
|
292
|
-
"""Dict that tells the band initialiser to get the dict value of the band_id."""
|
|
293
|
-
|
|
294
|
-
def __init__(self, data: dict | None = None, **kwargs) -> None:
|
|
295
|
-
"""Add dicts or kwargs."""
|
|
296
|
-
self._standardized_keys = {}
|
|
297
|
-
for key, value in ((data or {}) | kwargs).items():
|
|
298
|
-
setattr(self, key, value)
|
|
299
|
-
self._standardized_keys[standardize_band_id(key)] = value
|
|
300
|
-
|
|
301
|
-
def __len__(self) -> int:
|
|
302
|
-
"""Number of items."""
|
|
303
|
-
return len({key for key in self.__dict__ if key != "_standardized_keys"})
|
|
304
|
-
|
|
305
|
-
def __getitem__(self, item: str) -> Any:
|
|
306
|
-
"""Get dict value from key."""
|
|
307
|
-
try:
|
|
308
|
-
return getattr(self, item)
|
|
309
|
-
except AttributeError as e:
|
|
310
|
-
try:
|
|
311
|
-
return self._standardized_keys[standardize_band_id(item)]
|
|
312
|
-
except KeyError:
|
|
313
|
-
raise KeyError(item, self.__dict__) from e
|
|
314
|
-
|
|
315
|
-
|
|
316
294
|
@dataclass(frozen=True)
|
|
317
295
|
class BandMasking:
|
|
318
296
|
"""Frozen dict with forced keys."""
|
|
319
297
|
|
|
320
298
|
band_id: str
|
|
321
|
-
values:
|
|
299
|
+
values: Sequence[int] | dict[int, Any]
|
|
322
300
|
|
|
323
301
|
def __getitem__(self, item: str) -> Any:
|
|
324
302
|
"""Index into attributes to mimick dict."""
|
|
325
303
|
return getattr(self, item)
|
|
326
304
|
|
|
327
305
|
|
|
328
|
-
class
|
|
329
|
-
"""
|
|
306
|
+
class None_:
|
|
307
|
+
"""Default value for keyword arguments that should not have a default."""
|
|
330
308
|
|
|
331
309
|
|
|
332
310
|
class _ImageBase:
|
|
@@ -335,7 +313,7 @@ class _ImageBase:
|
|
|
335
313
|
metadata_attributes: ClassVar[dict | None] = None
|
|
336
314
|
masking: ClassVar[BandMasking | None] = None
|
|
337
315
|
|
|
338
|
-
def __init__(self, *, bbox=None, **kwargs) -> None:
|
|
316
|
+
def __init__(self, *, metadata=None, bbox=None, **kwargs) -> None:
|
|
339
317
|
|
|
340
318
|
self._mask = None
|
|
341
319
|
self._bounds = None
|
|
@@ -344,9 +322,12 @@ class _ImageBase:
|
|
|
344
322
|
self._from_gdf = False
|
|
345
323
|
self.metadata_attributes = self.metadata_attributes or {}
|
|
346
324
|
self._path = None
|
|
325
|
+
self._metadata_from_xml = False
|
|
347
326
|
|
|
348
327
|
self._bbox = to_bbox(bbox) if bbox is not None else None
|
|
349
328
|
|
|
329
|
+
self.metadata = self._metadata_to_nested_dict(metadata)
|
|
330
|
+
|
|
350
331
|
if self.filename_regexes:
|
|
351
332
|
if isinstance(self.filename_regexes, str):
|
|
352
333
|
self.filename_regexes = (self.filename_regexes,)
|
|
@@ -374,14 +355,45 @@ class _ImageBase:
|
|
|
374
355
|
f"{self.__class__.__name__} got an unexpected keyword argument '{key}'"
|
|
375
356
|
)
|
|
376
357
|
|
|
358
|
+
@staticmethod
|
|
359
|
+
def _metadata_to_nested_dict(
|
|
360
|
+
metadata: str | Path | os.PathLike | dict | pd.DataFrame | None,
|
|
361
|
+
) -> dict[str, dict[str, Any]] | None:
|
|
362
|
+
if metadata is None:
|
|
363
|
+
return {}
|
|
364
|
+
if isinstance(metadata, (str | Path | os.PathLike)):
|
|
365
|
+
metadata = _read_parquet_func(metadata)
|
|
366
|
+
|
|
367
|
+
if isinstance(metadata, pd.DataFrame):
|
|
368
|
+
|
|
369
|
+
def is_scalar(x) -> bool:
|
|
370
|
+
return not hasattr(x, "__len__") or len(x) <= 1
|
|
371
|
+
|
|
372
|
+
def na_to_none(x) -> None:
|
|
373
|
+
"""Convert to None rowwise because pandas doesn't always."""
|
|
374
|
+
return x if not (is_scalar(x) and pd.isna(x)) else None
|
|
375
|
+
|
|
376
|
+
# to nested dict because pandas indexing gives rare KeyError with long strings
|
|
377
|
+
metadata = {
|
|
378
|
+
_fix_path(path): {
|
|
379
|
+
attr: na_to_none(value) for attr, value in row.items()
|
|
380
|
+
}
|
|
381
|
+
for path, row in metadata.iterrows()
|
|
382
|
+
}
|
|
383
|
+
elif is_dict_like(metadata):
|
|
384
|
+
metadata = {_fix_path(path): value for path, value in metadata.items()}
|
|
385
|
+
|
|
386
|
+
return metadata
|
|
387
|
+
|
|
377
388
|
@property
|
|
378
389
|
def _common_init_kwargs(self) -> dict:
|
|
379
390
|
return {
|
|
380
|
-
"file_system": self.file_system,
|
|
381
391
|
"processes": self.processes,
|
|
382
392
|
"res": self.res,
|
|
383
393
|
"bbox": self._bbox,
|
|
384
394
|
"nodata": self.nodata,
|
|
395
|
+
"backend": self.backend,
|
|
396
|
+
"metadata": self.metadata,
|
|
385
397
|
}
|
|
386
398
|
|
|
387
399
|
@property
|
|
@@ -401,6 +413,14 @@ class _ImageBase:
|
|
|
401
413
|
"""Centerpoint of the object."""
|
|
402
414
|
return self.union_all().centroid
|
|
403
415
|
|
|
416
|
+
def assign(self, **kwargs) -> "_ImageBase":
|
|
417
|
+
for key, value in kwargs.items():
|
|
418
|
+
try:
|
|
419
|
+
setattr(self, key, value)
|
|
420
|
+
except AttributeError:
|
|
421
|
+
setattr(self, f"_{key}", value)
|
|
422
|
+
return self
|
|
423
|
+
|
|
404
424
|
def _name_regex_searcher(
|
|
405
425
|
self, group: str, patterns: tuple[re.Pattern]
|
|
406
426
|
) -> str | None:
|
|
@@ -411,18 +431,28 @@ class _ImageBase:
|
|
|
411
431
|
return _get_first_group_match(pat, self.name)[group]
|
|
412
432
|
except (TypeError, KeyError):
|
|
413
433
|
pass
|
|
434
|
+
if isinstance(self, Band):
|
|
435
|
+
for pat in patterns:
|
|
436
|
+
try:
|
|
437
|
+
return _get_first_group_match(
|
|
438
|
+
pat, str(Path(self.path).parent.name)
|
|
439
|
+
)[group]
|
|
440
|
+
except (TypeError, KeyError):
|
|
441
|
+
pass
|
|
414
442
|
if not any(group in _get_non_optional_groups(pat) for pat in patterns):
|
|
415
443
|
return None
|
|
444
|
+
band_text = (
|
|
445
|
+
f" or {Path(self.path).parent.name!s}" if isinstance(self, Band) else ""
|
|
446
|
+
)
|
|
416
447
|
raise ValueError(
|
|
417
|
-
f"Couldn't find group '{group}' in name {self.name} with regex patterns {patterns}"
|
|
448
|
+
f"Couldn't find group '{group}' in name {self.name}{band_text} with regex patterns {patterns}"
|
|
418
449
|
)
|
|
419
450
|
|
|
420
|
-
def _create_metadata_df(self, file_paths:
|
|
451
|
+
def _create_metadata_df(self, file_paths: Sequence[str]) -> pd.DataFrame:
|
|
421
452
|
"""Create a dataframe with file paths and image paths that match regexes."""
|
|
422
|
-
df = pd.DataFrame({"file_path": file_paths})
|
|
453
|
+
df = pd.DataFrame({"file_path": list(file_paths)})
|
|
423
454
|
|
|
424
|
-
df["
|
|
425
|
-
df["filename"] = df["file_path"].apply(lambda x: Path(x).name)
|
|
455
|
+
df["file_name"] = df["file_path"].apply(lambda x: Path(x).name)
|
|
426
456
|
|
|
427
457
|
df["image_path"] = df["file_path"].apply(
|
|
428
458
|
lambda x: _fix_path(str(Path(x).parent))
|
|
@@ -434,20 +464,20 @@ class _ImageBase:
|
|
|
434
464
|
df = df[~df["file_path"].isin(df["image_path"])]
|
|
435
465
|
|
|
436
466
|
if self.filename_patterns:
|
|
437
|
-
df = _get_regexes_matches_for_df(df, "
|
|
467
|
+
df = _get_regexes_matches_for_df(df, "file_name", self.filename_patterns)
|
|
438
468
|
|
|
439
469
|
if not len(df):
|
|
440
470
|
return df
|
|
441
471
|
|
|
442
472
|
grouped = df.drop_duplicates("image_path").set_index("image_path")
|
|
443
|
-
for col in ["file_path", "
|
|
473
|
+
for col in ["file_path", "file_name"]:
|
|
444
474
|
if col in df:
|
|
445
475
|
grouped[col] = df.groupby("image_path")[col].apply(tuple)
|
|
446
476
|
|
|
447
477
|
grouped = grouped.reset_index()
|
|
448
478
|
else:
|
|
449
479
|
df["file_path"] = df.groupby("image_path")["file_path"].apply(tuple)
|
|
450
|
-
df["
|
|
480
|
+
df["file_name"] = df.groupby("image_path")["file_name"].apply(tuple)
|
|
451
481
|
grouped = df.drop_duplicates("image_path")
|
|
452
482
|
|
|
453
483
|
grouped["imagename"] = grouped["image_path"].apply(
|
|
@@ -521,7 +551,7 @@ class _ImageBandBase(_ImageBase):
|
|
|
521
551
|
return self._name
|
|
522
552
|
try:
|
|
523
553
|
return Path(self.path).name
|
|
524
|
-
except (ValueError, AttributeError):
|
|
554
|
+
except (ValueError, AttributeError, TypeError):
|
|
525
555
|
return None
|
|
526
556
|
|
|
527
557
|
@name.setter
|
|
@@ -532,22 +562,31 @@ class _ImageBandBase(_ImageBase):
|
|
|
532
562
|
def stem(self) -> str | None:
|
|
533
563
|
try:
|
|
534
564
|
return Path(self.path).stem
|
|
535
|
-
except (AttributeError, ValueError):
|
|
565
|
+
except (AttributeError, ValueError, TypeError):
|
|
536
566
|
return None
|
|
537
567
|
|
|
538
568
|
@property
|
|
539
569
|
def level(self) -> str:
|
|
540
570
|
return self._name_regex_searcher("level", self.image_patterns)
|
|
541
571
|
|
|
542
|
-
def
|
|
572
|
+
def _get_metadata_attributes(self, metadata_attributes: dict) -> dict:
|
|
543
573
|
|
|
544
|
-
|
|
545
|
-
for key, value in self.metadata_attributes.items():
|
|
546
|
-
if getattr(self, key) is None:
|
|
547
|
-
missing_attributes[key] = value
|
|
574
|
+
self._metadata_from_xml = True
|
|
548
575
|
|
|
549
|
-
|
|
550
|
-
|
|
576
|
+
missing_metadata_attributes = {
|
|
577
|
+
key: value
|
|
578
|
+
for key, value in metadata_attributes.items()
|
|
579
|
+
if not hasattr(self, key) or getattr(self, key) is None
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
nonmissing_metadata_attributes = {
|
|
583
|
+
key: getattr(self, key)
|
|
584
|
+
for key in metadata_attributes
|
|
585
|
+
if key not in missing_metadata_attributes
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
if not missing_metadata_attributes:
|
|
589
|
+
return nonmissing_metadata_attributes
|
|
551
590
|
|
|
552
591
|
file_contents: list[str] = []
|
|
553
592
|
for path in self._all_file_paths:
|
|
@@ -556,7 +595,7 @@ class _ImageBandBase(_ImageBase):
|
|
|
556
595
|
with _open_func(path, "rb") as file:
|
|
557
596
|
file_contents.append(file.read().decode("utf-8"))
|
|
558
597
|
|
|
559
|
-
for key, value in
|
|
598
|
+
for key, value in missing_metadata_attributes.items():
|
|
560
599
|
results = None
|
|
561
600
|
for i, filetext in enumerate(file_contents):
|
|
562
601
|
if isinstance(value, str) and value in dir(self):
|
|
@@ -586,16 +625,46 @@ class _ImageBandBase(_ImageBase):
|
|
|
586
625
|
if i == len(self._all_file_paths) - 1:
|
|
587
626
|
raise e
|
|
588
627
|
|
|
589
|
-
|
|
590
|
-
results = results[self.band_id]
|
|
628
|
+
missing_metadata_attributes[key] = results
|
|
591
629
|
|
|
592
|
-
|
|
630
|
+
return missing_metadata_attributes | nonmissing_metadata_attributes
|
|
631
|
+
|
|
632
|
+
def _to_xarray(self, array: np.ndarray, transform: Affine) -> DataArray:
|
|
633
|
+
"""Convert the raster to an xarray.DataArray."""
|
|
634
|
+
if len(array.shape) == 2:
|
|
635
|
+
height, width = array.shape
|
|
636
|
+
dims = ["y", "x"]
|
|
637
|
+
elif len(array.shape) == 3:
|
|
638
|
+
height, width = array.shape[1:]
|
|
639
|
+
dims = ["band", "y", "x"]
|
|
640
|
+
else:
|
|
641
|
+
raise ValueError(
|
|
642
|
+
f"Array should be 2 or 3 dimensional. Got shape {array.shape}"
|
|
643
|
+
)
|
|
644
|
+
|
|
645
|
+
coords = _generate_spatial_coords(transform, width, height)
|
|
646
|
+
|
|
647
|
+
attrs = {"crs": self.crs}
|
|
648
|
+
for attr in set(self.metadata_attributes).union({"date"}):
|
|
649
|
+
try:
|
|
650
|
+
attrs[attr] = getattr(self, attr)
|
|
651
|
+
except Exception:
|
|
652
|
+
pass
|
|
653
|
+
|
|
654
|
+
return DataArray(
|
|
655
|
+
array,
|
|
656
|
+
coords=coords,
|
|
657
|
+
dims=dims,
|
|
658
|
+
name=self.name or self.__class__.__name__,
|
|
659
|
+
attrs=attrs,
|
|
660
|
+
)
|
|
593
661
|
|
|
594
662
|
|
|
595
663
|
class Band(_ImageBandBase):
|
|
596
664
|
"""Band holding a single 2 dimensional array representing an image band."""
|
|
597
665
|
|
|
598
666
|
cmap: ClassVar[str | None] = None
|
|
667
|
+
backend: str = "numpy"
|
|
599
668
|
|
|
600
669
|
@classmethod
|
|
601
670
|
def from_gdf(
|
|
@@ -627,13 +696,12 @@ class Band(_ImageBandBase):
|
|
|
627
696
|
|
|
628
697
|
def __init__(
|
|
629
698
|
self,
|
|
630
|
-
data: str | np.ndarray,
|
|
631
|
-
res: int |
|
|
699
|
+
data: str | np.ndarray | None = None,
|
|
700
|
+
res: int | None_ = None_,
|
|
632
701
|
crs: Any | None = None,
|
|
633
702
|
bounds: tuple[float, float, float, float] | None = None,
|
|
634
703
|
nodata: int | None = None,
|
|
635
704
|
mask: "Band | None" = None,
|
|
636
|
-
file_system: GCSFileSystem | None = None,
|
|
637
705
|
processes: int = 1,
|
|
638
706
|
name: str | None = None,
|
|
639
707
|
band_id: str | None = None,
|
|
@@ -642,6 +710,16 @@ class Band(_ImageBandBase):
|
|
|
642
710
|
**kwargs,
|
|
643
711
|
) -> None:
|
|
644
712
|
"""Band initialiser."""
|
|
713
|
+
if callable(res) and isinstance(res(), None_):
|
|
714
|
+
raise TypeError("Must specify 'res'")
|
|
715
|
+
|
|
716
|
+
if data is None:
|
|
717
|
+
# allowing 'path' to replace 'data' as argument
|
|
718
|
+
# to make the print repr. valid as initialiser
|
|
719
|
+
if "path" not in kwargs:
|
|
720
|
+
raise TypeError("Must specify either 'data' or 'path'.")
|
|
721
|
+
data = kwargs.pop("path")
|
|
722
|
+
|
|
645
723
|
super().__init__(**kwargs)
|
|
646
724
|
|
|
647
725
|
if isinstance(data, (str | Path | os.PathLike)) and any(
|
|
@@ -657,20 +735,13 @@ class Band(_ImageBandBase):
|
|
|
657
735
|
self._bounds = bounds
|
|
658
736
|
self._all_file_paths = all_file_paths
|
|
659
737
|
|
|
660
|
-
self._image = None
|
|
661
|
-
|
|
662
|
-
for key in self.metadata_attributes:
|
|
663
|
-
setattr(self, key, None)
|
|
664
|
-
|
|
665
738
|
if isinstance(data, np.ndarray):
|
|
666
|
-
self.values = data
|
|
667
739
|
if self._bounds is None:
|
|
668
740
|
raise ValueError("Must specify bounds when data is an array.")
|
|
669
741
|
self._crs = crs
|
|
670
|
-
self.transform = _get_transform_from_bounds(
|
|
671
|
-
self._bounds, shape=self.values.shape
|
|
672
|
-
)
|
|
742
|
+
self.transform = _get_transform_from_bounds(self._bounds, shape=data.shape)
|
|
673
743
|
self._from_array = True
|
|
744
|
+
self.values = data
|
|
674
745
|
|
|
675
746
|
elif not isinstance(data, (str | Path | os.PathLike)):
|
|
676
747
|
raise TypeError(
|
|
@@ -678,44 +749,48 @@ class Band(_ImageBandBase):
|
|
|
678
749
|
f"Got {type(data)}"
|
|
679
750
|
)
|
|
680
751
|
else:
|
|
681
|
-
self._path = str(data)
|
|
752
|
+
self._path = _fix_path(str(data))
|
|
682
753
|
|
|
683
754
|
self._res = res
|
|
684
755
|
if cmap is not None:
|
|
685
756
|
self.cmap = cmap
|
|
686
|
-
self.file_system = file_system
|
|
687
757
|
self._name = name
|
|
688
758
|
self._band_id = band_id
|
|
689
759
|
self.processes = processes
|
|
690
760
|
|
|
691
|
-
if
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
761
|
+
if self._all_file_paths:
|
|
762
|
+
self._all_file_paths = {_fix_path(path) for path in self._all_file_paths}
|
|
763
|
+
parent = _fix_path(Path(self.path).parent)
|
|
764
|
+
self._all_file_paths = {
|
|
765
|
+
path for path in self._all_file_paths if parent in path
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
if self.metadata:
|
|
769
|
+
if self.path is not None:
|
|
770
|
+
self.metadata = {
|
|
771
|
+
key: value
|
|
772
|
+
for key, value in self.metadata.items()
|
|
773
|
+
if key == self.path
|
|
774
|
+
}
|
|
775
|
+
this_metadata = self.metadata[self.path]
|
|
776
|
+
for key, value in this_metadata.items():
|
|
777
|
+
if key in dir(self):
|
|
778
|
+
setattr(self, f"_{key}", value)
|
|
779
|
+
else:
|
|
780
|
+
setattr(self, key, value)
|
|
781
|
+
|
|
782
|
+
elif self.metadata_attributes and self.path is not None and not self.is_mask:
|
|
696
783
|
if self._all_file_paths is None:
|
|
697
784
|
self._all_file_paths = _get_all_file_paths(str(Path(self.path).parent))
|
|
698
|
-
self.
|
|
785
|
+
for key, value in self._get_metadata_attributes(
|
|
786
|
+
self.metadata_attributes
|
|
787
|
+
).items():
|
|
788
|
+
setattr(self, key, value)
|
|
699
789
|
|
|
700
790
|
def __lt__(self, other: "Band") -> bool:
|
|
701
791
|
"""Makes Bands sortable by band_id."""
|
|
702
792
|
return self.band_id < other.band_id
|
|
703
793
|
|
|
704
|
-
# def __getattribute__(self, attr: str) -> Any:
|
|
705
|
-
# # try:
|
|
706
|
-
# # value =
|
|
707
|
-
# # except AttributeError:
|
|
708
|
-
# # value = None
|
|
709
|
-
|
|
710
|
-
# if (
|
|
711
|
-
# attr in (super().__getattribute__("metadata_attributes") or {})
|
|
712
|
-
# and super().__getattribute__(attr) is None
|
|
713
|
-
# ):
|
|
714
|
-
# if self._all_file_paths is None:
|
|
715
|
-
# self._all_file_paths = _get_all_file_paths(str(Path(self.path).parent))
|
|
716
|
-
# self._add_metadata_attributes()
|
|
717
|
-
# return super().__getattribute__(attr)
|
|
718
|
-
|
|
719
794
|
@property
|
|
720
795
|
def values(self) -> np.ndarray:
|
|
721
796
|
"""The numpy array, if loaded."""
|
|
@@ -725,23 +800,35 @@ class Band(_ImageBandBase):
|
|
|
725
800
|
|
|
726
801
|
@values.setter
|
|
727
802
|
def values(self, new_val):
|
|
728
|
-
if
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
803
|
+
if self.backend == "numpy" and isinstance(new_val, np.ndarray):
|
|
804
|
+
self._values = new_val
|
|
805
|
+
return
|
|
806
|
+
elif self.backend == "xarray" and isinstance(new_val, DataArray):
|
|
807
|
+
# attrs can dissappear, so doing a union
|
|
808
|
+
attrs = self._values.attrs | new_val.attrs
|
|
809
|
+
self._values = new_val
|
|
810
|
+
self._values.attrs = attrs
|
|
811
|
+
return
|
|
812
|
+
|
|
813
|
+
if self.backend == "numpy":
|
|
814
|
+
self._values = self._to_numpy(new_val)
|
|
815
|
+
if self.backend == "xarray":
|
|
816
|
+
if not isinstance(self._values, DataArray):
|
|
817
|
+
self._values = self._to_xarray(
|
|
818
|
+
new_val,
|
|
819
|
+
transform=self.transform,
|
|
820
|
+
)
|
|
821
|
+
|
|
822
|
+
elif isinstance(new_val, np.ndarray):
|
|
823
|
+
self._values.values = new_val
|
|
824
|
+
else:
|
|
825
|
+
self._values = new_val
|
|
733
826
|
|
|
734
827
|
@property
|
|
735
828
|
def mask(self) -> "Band":
|
|
736
829
|
"""Mask Band."""
|
|
737
830
|
return self._mask
|
|
738
831
|
|
|
739
|
-
@mask.setter
|
|
740
|
-
def mask(self, values: "Band") -> None:
|
|
741
|
-
if values is not None and not isinstance(values, Band):
|
|
742
|
-
raise TypeError(f"'mask' should be of type Band. Got {type(values)}")
|
|
743
|
-
self._mask = values
|
|
744
|
-
|
|
745
832
|
@property
|
|
746
833
|
def band_id(self) -> str:
|
|
747
834
|
"""Band id."""
|
|
@@ -779,11 +866,11 @@ class Band(_ImageBandBase):
|
|
|
779
866
|
)
|
|
780
867
|
|
|
781
868
|
@property
|
|
782
|
-
def crs(self) ->
|
|
869
|
+
def crs(self) -> pyproj.CRS | None:
|
|
783
870
|
"""Coordinate reference system."""
|
|
784
871
|
if self._crs is None:
|
|
785
872
|
self._add_crs_and_bounds()
|
|
786
|
-
return self._crs
|
|
873
|
+
return pyproj.CRS(self._crs)
|
|
787
874
|
|
|
788
875
|
@property
|
|
789
876
|
def bounds(self) -> tuple[int, int, int, int] | None:
|
|
@@ -793,7 +880,7 @@ class Band(_ImageBandBase):
|
|
|
793
880
|
return self._bounds
|
|
794
881
|
|
|
795
882
|
def _add_crs_and_bounds(self) -> None:
|
|
796
|
-
with opener(self.path
|
|
883
|
+
with opener(self.path) as file:
|
|
797
884
|
with rasterio.open(file) as src:
|
|
798
885
|
self._bounds = to_bbox(src.bounds)
|
|
799
886
|
self._crs = src.crs
|
|
@@ -820,44 +907,64 @@ class Band(_ImageBandBase):
|
|
|
820
907
|
df[column] = f"smallest_{n}"
|
|
821
908
|
return df
|
|
822
909
|
|
|
910
|
+
def clip(
|
|
911
|
+
self, mask: GeoDataFrame | GeoSeries | Polygon | MultiPolygon, **kwargs
|
|
912
|
+
) -> "Band":
|
|
913
|
+
"""Clip band values to geometry mask."""
|
|
914
|
+
values = _clip_xarray(
|
|
915
|
+
self.to_xarray(),
|
|
916
|
+
mask,
|
|
917
|
+
crs=self.crs,
|
|
918
|
+
**kwargs,
|
|
919
|
+
)
|
|
920
|
+
self._bounds = to_bbox(mask)
|
|
921
|
+
self.transform = _get_transform_from_bounds(self._bounds, values.shape)
|
|
922
|
+
self.values = values
|
|
923
|
+
return self
|
|
924
|
+
|
|
823
925
|
def load(
|
|
824
926
|
self,
|
|
825
927
|
bounds: tuple | Geometry | GeoDataFrame | GeoSeries | None = None,
|
|
826
928
|
indexes: int | tuple[int] | None = None,
|
|
827
929
|
masked: bool | None = None,
|
|
930
|
+
file_system=None,
|
|
828
931
|
**kwargs,
|
|
829
932
|
) -> "Band":
|
|
830
933
|
"""Load and potentially clip the array.
|
|
831
934
|
|
|
832
935
|
The array is stored in the 'values' property.
|
|
833
936
|
"""
|
|
937
|
+
global _load_counter
|
|
938
|
+
_load_counter += 1
|
|
939
|
+
|
|
834
940
|
if masked is None:
|
|
835
941
|
masked = True if self.mask is None else False
|
|
836
942
|
|
|
837
943
|
bounds_was_none = bounds is None
|
|
838
944
|
|
|
839
|
-
bounds = _get_bounds(bounds, self._bbox)
|
|
945
|
+
bounds = _get_bounds(bounds, self._bbox, self.union_all())
|
|
840
946
|
|
|
841
947
|
should_return_empty: bool = bounds is not None and bounds.area == 0
|
|
842
948
|
if should_return_empty:
|
|
843
949
|
self._values = np.array([])
|
|
844
950
|
if self.mask is not None and not self.is_mask:
|
|
845
|
-
self._mask = self._mask.load(
|
|
951
|
+
self._mask = self._mask.load(
|
|
952
|
+
bounds=bounds, indexes=indexes, file_system=file_system
|
|
953
|
+
)
|
|
846
954
|
self._bounds = None
|
|
847
955
|
self.transform = None
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
except AttributeError:
|
|
851
|
-
pass
|
|
956
|
+
self.values = self._values
|
|
957
|
+
|
|
852
958
|
return self
|
|
853
959
|
|
|
854
960
|
if self.has_array and bounds_was_none:
|
|
855
961
|
return self
|
|
856
962
|
|
|
857
|
-
# round down/up to integer to avoid precision trouble
|
|
858
963
|
if bounds is not None:
|
|
859
964
|
minx, miny, maxx, maxy = to_bbox(bounds)
|
|
860
|
-
|
|
965
|
+
## round down/up to integer to avoid precision trouble
|
|
966
|
+
# bounds = (int(minx), int(miny), math.ceil(maxx), math.ceil(maxy))
|
|
967
|
+
bounds = minx, miny, maxx, maxy
|
|
861
968
|
|
|
862
969
|
if indexes is None:
|
|
863
970
|
indexes = 1
|
|
@@ -868,126 +975,132 @@ class Band(_ImageBandBase):
|
|
|
868
975
|
# allow setting a fixed out_shape for the array, in order to make mask same shape as values
|
|
869
976
|
out_shape = kwargs.pop("out_shape", None)
|
|
870
977
|
|
|
871
|
-
if self.has_array:
|
|
872
|
-
self
|
|
873
|
-
|
|
978
|
+
if self.has_array and [int(x) for x in bounds] != [int(x) for x in self.bounds]:
|
|
979
|
+
print(self)
|
|
980
|
+
print(self.mask)
|
|
981
|
+
print(self.mask.values.shape)
|
|
982
|
+
print(self.values.shape)
|
|
983
|
+
print([int(x) for x in bounds], [int(x) for x in self.bounds])
|
|
984
|
+
raise ValueError(
|
|
985
|
+
"Cannot re-load array with different bounds. "
|
|
986
|
+
"Use .copy() to read with different bounds. "
|
|
987
|
+
"Or .clip(mask) to clip."
|
|
874
988
|
)
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
with opener(self.path, file_system=self.file_system) as f:
|
|
880
|
-
with rasterio.open(f, nodata=self.nodata) as src:
|
|
881
|
-
self._res = int(src.res[0]) if not self.res else self.res
|
|
882
|
-
|
|
883
|
-
if self.nodata is None or np.isnan(self.nodata):
|
|
884
|
-
self.nodata = src.nodata
|
|
885
|
-
else:
|
|
886
|
-
dtype_min_value = _get_dtype_min(src.dtypes[0])
|
|
887
|
-
dtype_max_value = _get_dtype_max(src.dtypes[0])
|
|
888
|
-
if (
|
|
889
|
-
self.nodata > dtype_max_value
|
|
890
|
-
or self.nodata < dtype_min_value
|
|
891
|
-
):
|
|
892
|
-
src._dtypes = tuple(
|
|
893
|
-
rasterio.dtypes.get_minimum_dtype(self.nodata)
|
|
894
|
-
for _ in range(len(_indexes))
|
|
895
|
-
)
|
|
989
|
+
# with opener(self.path, file_system=self.file_system) as f:
|
|
990
|
+
with opener(self.path, file_system=file_system) as f:
|
|
991
|
+
with rasterio.open(f, nodata=self.nodata) as src:
|
|
992
|
+
self._res = int(src.res[0]) if not self.res else self.res
|
|
896
993
|
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
)
|
|
906
|
-
else:
|
|
907
|
-
self.transform = src.transform
|
|
908
|
-
|
|
909
|
-
self._values = src.read(
|
|
910
|
-
indexes=indexes,
|
|
911
|
-
out_shape=out_shape,
|
|
912
|
-
masked=masked,
|
|
913
|
-
**kwargs,
|
|
914
|
-
)
|
|
915
|
-
else:
|
|
916
|
-
window = rasterio.windows.from_bounds(
|
|
917
|
-
*bounds, transform=src.transform
|
|
994
|
+
if self.nodata is None or np.isnan(self.nodata):
|
|
995
|
+
self.nodata = src.nodata
|
|
996
|
+
else:
|
|
997
|
+
dtype_min_value = _get_dtype_min(src.dtypes[0])
|
|
998
|
+
dtype_max_value = _get_dtype_max(src.dtypes[0])
|
|
999
|
+
if self.nodata > dtype_max_value or self.nodata < dtype_min_value:
|
|
1000
|
+
src._dtypes = tuple(
|
|
1001
|
+
rasterio.dtypes.get_minimum_dtype(self.nodata)
|
|
1002
|
+
for _ in range(len(_indexes))
|
|
918
1003
|
)
|
|
919
1004
|
|
|
1005
|
+
if bounds is None:
|
|
1006
|
+
if self._res != int(src.res[0]):
|
|
920
1007
|
if out_shape is None:
|
|
921
1008
|
out_shape = _get_shape_from_bounds(
|
|
922
|
-
bounds, self.res, indexes
|
|
1009
|
+
to_bbox(src.bounds), self.res, indexes
|
|
923
1010
|
)
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
indexes=indexes,
|
|
927
|
-
window=window,
|
|
928
|
-
boundless=False,
|
|
929
|
-
out_shape=out_shape,
|
|
930
|
-
masked=masked,
|
|
931
|
-
**kwargs,
|
|
1011
|
+
self.transform = _get_transform_from_bounds(
|
|
1012
|
+
to_bbox(src.bounds), shape=out_shape
|
|
932
1013
|
)
|
|
1014
|
+
else:
|
|
1015
|
+
self.transform = src.transform
|
|
933
1016
|
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
1017
|
+
values = src.read(
|
|
1018
|
+
indexes=indexes,
|
|
1019
|
+
out_shape=out_shape,
|
|
1020
|
+
masked=masked,
|
|
1021
|
+
**kwargs,
|
|
1022
|
+
)
|
|
1023
|
+
else:
|
|
1024
|
+
window = rasterio.windows.from_bounds(
|
|
1025
|
+
*bounds, transform=src.transform
|
|
1026
|
+
)
|
|
1027
|
+
|
|
1028
|
+
if out_shape is None:
|
|
1029
|
+
out_shape = _get_shape_from_bounds(bounds, self.res, indexes)
|
|
938
1030
|
|
|
1031
|
+
values = src.read(
|
|
1032
|
+
indexes=indexes,
|
|
1033
|
+
window=window,
|
|
1034
|
+
boundless=False,
|
|
1035
|
+
out_shape=out_shape,
|
|
1036
|
+
masked=masked,
|
|
1037
|
+
**kwargs,
|
|
1038
|
+
)
|
|
1039
|
+
|
|
1040
|
+
assert out_shape == values.shape, (
|
|
1041
|
+
out_shape,
|
|
1042
|
+
values.shape,
|
|
1043
|
+
)
|
|
1044
|
+
|
|
1045
|
+
width, height = values.shape[-2:]
|
|
1046
|
+
|
|
1047
|
+
if width and height:
|
|
939
1048
|
self.transform = rasterio.transform.from_bounds(
|
|
940
|
-
*bounds,
|
|
1049
|
+
*bounds, width, height
|
|
941
1050
|
)
|
|
942
|
-
self._bounds = bounds
|
|
943
1051
|
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
else:
|
|
950
|
-
self.values[self.values == src.nodata] = self.nodata
|
|
1052
|
+
if self.nodata is not None and not np.isnan(self.nodata):
|
|
1053
|
+
if isinstance(values, np.ma.core.MaskedArray):
|
|
1054
|
+
values.data[values.data == src.nodata] = self.nodata
|
|
1055
|
+
else:
|
|
1056
|
+
values[values == src.nodata] = self.nodata
|
|
951
1057
|
|
|
952
1058
|
if self.masking and self.is_mask:
|
|
953
|
-
|
|
1059
|
+
values = np.isin(values, list(self.masking["values"]))
|
|
954
1060
|
|
|
955
|
-
elif self.mask is not None and not isinstance(
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
1061
|
+
elif self.mask is not None and not isinstance(values, np.ma.core.MaskedArray):
|
|
1062
|
+
|
|
1063
|
+
if not self.mask.has_array:
|
|
1064
|
+
self._mask = self.mask.load(
|
|
1065
|
+
bounds=bounds, indexes=indexes, out_shape=out_shape, **kwargs
|
|
1066
|
+
)
|
|
961
1067
|
mask_arr = self.mask.values
|
|
962
1068
|
|
|
963
|
-
|
|
964
|
-
self._values, mask=mask_arr, fill_value=self.nodata
|
|
965
|
-
)
|
|
1069
|
+
values = np.ma.array(values, mask=mask_arr, fill_value=self.nodata)
|
|
966
1070
|
|
|
967
|
-
|
|
968
|
-
self.
|
|
969
|
-
|
|
970
|
-
|
|
1071
|
+
if bounds is not None:
|
|
1072
|
+
self._bounds = to_bbox(bounds)
|
|
1073
|
+
|
|
1074
|
+
self._values = values
|
|
1075
|
+
# trigger the setter
|
|
1076
|
+
self.values = values
|
|
971
1077
|
|
|
972
1078
|
return self
|
|
973
1079
|
|
|
974
1080
|
@property
|
|
975
1081
|
def is_mask(self) -> bool:
|
|
976
1082
|
"""True if the band_id is equal to the masking band_id."""
|
|
1083
|
+
if self.masking is None:
|
|
1084
|
+
return False
|
|
977
1085
|
return self.band_id == self.masking["band_id"]
|
|
978
1086
|
|
|
979
1087
|
@property
|
|
980
1088
|
def has_array(self) -> bool:
|
|
981
1089
|
"""Whether the array is loaded."""
|
|
982
1090
|
try:
|
|
983
|
-
if not isinstance(self.values, np.ndarray):
|
|
1091
|
+
if not isinstance(self.values, (np.ndarray | DataArray)):
|
|
984
1092
|
raise ValueError()
|
|
985
1093
|
return True
|
|
986
1094
|
except ValueError: # also catches ArrayNotLoadedError
|
|
987
1095
|
return False
|
|
988
1096
|
|
|
989
1097
|
def write(
|
|
990
|
-
self,
|
|
1098
|
+
self,
|
|
1099
|
+
path: str | Path,
|
|
1100
|
+
driver: str = "GTiff",
|
|
1101
|
+
compress: str = "LZW",
|
|
1102
|
+
file_system=None,
|
|
1103
|
+
**kwargs,
|
|
991
1104
|
) -> None:
|
|
992
1105
|
"""Write the array as an image file."""
|
|
993
1106
|
if not hasattr(self, "_values"):
|
|
@@ -1010,7 +1123,8 @@ class Band(_ImageBandBase):
|
|
|
1010
1123
|
"width": self.width,
|
|
1011
1124
|
} | kwargs
|
|
1012
1125
|
|
|
1013
|
-
with opener(path, "wb", file_system=self.file_system) as f:
|
|
1126
|
+
# with opener(path, "wb", file_system=self.file_system) as f:
|
|
1127
|
+
with opener(path, "wb", file_system=file_system) as f:
|
|
1014
1128
|
with rasterio.open(f, "w", **profile) as dst:
|
|
1015
1129
|
|
|
1016
1130
|
if dst.nodata is None:
|
|
@@ -1032,17 +1146,14 @@ class Band(_ImageBandBase):
|
|
|
1032
1146
|
if isinstance(self.values, np.ma.core.MaskedArray):
|
|
1033
1147
|
dst.write_mask(self.values.mask)
|
|
1034
1148
|
|
|
1035
|
-
self._path = str(path)
|
|
1149
|
+
self._path = _fix_path(str(path))
|
|
1036
1150
|
|
|
1037
1151
|
def apply(self, func: Callable, **kwargs) -> "Band":
|
|
1038
|
-
"""Apply a function to the
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
"""Normalize array values between 0 and 1."""
|
|
1044
|
-
arr = self.values
|
|
1045
|
-
self.values = (arr - np.min(arr)) / (np.max(arr) - np.min(arr))
|
|
1152
|
+
"""Apply a function to the Band."""
|
|
1153
|
+
results = func(self, **kwargs)
|
|
1154
|
+
if isinstance(results, Band):
|
|
1155
|
+
return results
|
|
1156
|
+
self.values = results
|
|
1046
1157
|
return self
|
|
1047
1158
|
|
|
1048
1159
|
def sample(self, size: int = 1000, mask: Any = None, **kwargs) -> "Image":
|
|
@@ -1200,23 +1311,43 @@ class Band(_ImageBandBase):
|
|
|
1200
1311
|
)
|
|
1201
1312
|
|
|
1202
1313
|
def to_xarray(self) -> DataArray:
|
|
1203
|
-
"""Convert the raster to
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
dims = ["y", "x"]
|
|
1208
|
-
elif len(self.values.shape) == 3:
|
|
1209
|
-
dims = ["band", "y", "x"]
|
|
1210
|
-
else:
|
|
1211
|
-
raise ValueError("Array must be 2 or 3 dimensional.")
|
|
1212
|
-
return xr.DataArray(
|
|
1314
|
+
"""Convert the raster to an xarray.DataArray."""
|
|
1315
|
+
if self.backend == "xarray":
|
|
1316
|
+
return self.values
|
|
1317
|
+
return self._to_xarray(
|
|
1213
1318
|
self.values,
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
name=name,
|
|
1217
|
-
attrs={"crs": self.crs},
|
|
1319
|
+
transform=self.transform,
|
|
1320
|
+
# name=self.name or self.__class__.__name__.lower(),
|
|
1218
1321
|
)
|
|
1219
1322
|
|
|
1323
|
+
def to_numpy(self) -> np.ndarray | np.ma.core.MaskedArray:
|
|
1324
|
+
"""Convert the raster to a numpy.ndarray."""
|
|
1325
|
+
return self._to_numpy(self.values).copy()
|
|
1326
|
+
|
|
1327
|
+
def _to_numpy(
|
|
1328
|
+
self, arr: np.ndarray | DataArray, masked: bool = True
|
|
1329
|
+
) -> np.ndarray | np.ma.core.MaskedArray:
|
|
1330
|
+
if not isinstance(arr, np.ndarray):
|
|
1331
|
+
if masked:
|
|
1332
|
+
try:
|
|
1333
|
+
mask_arr = arr.isnull().values
|
|
1334
|
+
except AttributeError:
|
|
1335
|
+
mask_arr = np.full(arr.shape, False)
|
|
1336
|
+
try:
|
|
1337
|
+
arr = arr.to_numpy()
|
|
1338
|
+
except AttributeError:
|
|
1339
|
+
arr = arr.values
|
|
1340
|
+
if not isinstance(arr, np.ndarray):
|
|
1341
|
+
arr = np.array(arr)
|
|
1342
|
+
if (
|
|
1343
|
+
masked
|
|
1344
|
+
and self.mask is not None
|
|
1345
|
+
and not self.is_mask
|
|
1346
|
+
and not isinstance(arr, np.ma.core.MaskedArray)
|
|
1347
|
+
):
|
|
1348
|
+
arr = np.ma.array(arr, mask=mask_arr, fill_value=self.nodata)
|
|
1349
|
+
return arr
|
|
1350
|
+
|
|
1220
1351
|
def __repr__(self) -> str:
|
|
1221
1352
|
"""String representation."""
|
|
1222
1353
|
try:
|
|
@@ -1252,12 +1383,12 @@ class Image(_ImageBandBase):
|
|
|
1252
1383
|
"""Image consisting of one or more Bands."""
|
|
1253
1384
|
|
|
1254
1385
|
band_class: ClassVar[Band] = Band
|
|
1386
|
+
backend: str = "numpy"
|
|
1255
1387
|
|
|
1256
1388
|
def __init__(
|
|
1257
1389
|
self,
|
|
1258
|
-
data: str | Path | Sequence[Band],
|
|
1390
|
+
data: str | Path | Sequence[Band] | None = None,
|
|
1259
1391
|
res: int | None = None,
|
|
1260
|
-
file_system: GCSFileSystem | None = None,
|
|
1261
1392
|
processes: int = 1,
|
|
1262
1393
|
df: pd.DataFrame | None = None,
|
|
1263
1394
|
nodata: int | None = None,
|
|
@@ -1265,44 +1396,38 @@ class Image(_ImageBandBase):
|
|
|
1265
1396
|
**kwargs,
|
|
1266
1397
|
) -> None:
|
|
1267
1398
|
"""Image initialiser."""
|
|
1399
|
+
if data is None:
|
|
1400
|
+
# allowing 'bands' to replace 'data' as argument
|
|
1401
|
+
# to make the print repr. valid as initialiser
|
|
1402
|
+
if "bands" not in kwargs:
|
|
1403
|
+
raise TypeError("Must specify either 'data' or 'bands'.")
|
|
1404
|
+
data = kwargs.pop("bands")
|
|
1405
|
+
|
|
1268
1406
|
super().__init__(**kwargs)
|
|
1269
1407
|
|
|
1270
1408
|
self.nodata = nodata
|
|
1271
|
-
self._res = res
|
|
1272
|
-
self._crs = None
|
|
1273
|
-
self.file_system = file_system
|
|
1274
1409
|
self.processes = processes
|
|
1410
|
+
self._crs = None
|
|
1411
|
+
self._bands = None
|
|
1275
1412
|
|
|
1276
1413
|
if hasattr(data, "__iter__") and all(isinstance(x, Band) for x in data):
|
|
1277
|
-
self.
|
|
1278
|
-
if res is None:
|
|
1279
|
-
res = list({band.res for band in self.bands})
|
|
1280
|
-
if len(res) == 1:
|
|
1281
|
-
self._res = res[0]
|
|
1282
|
-
else:
|
|
1283
|
-
raise ValueError(f"Different resolutions for the bands: {res}")
|
|
1284
|
-
else:
|
|
1285
|
-
self._res = res
|
|
1414
|
+
self._construct_image_from_bands(data, res)
|
|
1286
1415
|
return
|
|
1287
|
-
|
|
1288
|
-
if not isinstance(data, (str | Path | os.PathLike)):
|
|
1416
|
+
elif not isinstance(data, (str | Path | os.PathLike)):
|
|
1289
1417
|
raise TypeError("'data' must be string, Path-like or a sequence of Band.")
|
|
1290
1418
|
|
|
1291
|
-
self.
|
|
1292
|
-
self._path = _fix_path(data)
|
|
1419
|
+
self._res = res
|
|
1420
|
+
self._path = _fix_path(data)
|
|
1293
1421
|
|
|
1294
1422
|
if all_file_paths is None and self.path:
|
|
1295
1423
|
self._all_file_paths = _get_all_file_paths(self.path)
|
|
1296
1424
|
elif self.path:
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
]
|
|
1425
|
+
all_file_paths = {_fix_path(x) for x in all_file_paths}
|
|
1426
|
+
self._all_file_paths = {x for x in all_file_paths if self.path in x}
|
|
1300
1427
|
else:
|
|
1301
1428
|
self._all_file_paths = None
|
|
1302
1429
|
|
|
1303
1430
|
if df is None:
|
|
1304
|
-
# file_paths = _get_all_file_paths(self.path)
|
|
1305
|
-
|
|
1306
1431
|
if not self._all_file_paths:
|
|
1307
1432
|
self._all_file_paths = [self.path]
|
|
1308
1433
|
df = self._create_metadata_df(self._all_file_paths)
|
|
@@ -1311,7 +1436,7 @@ class Image(_ImageBandBase):
|
|
|
1311
1436
|
|
|
1312
1437
|
cols_to_explode = [
|
|
1313
1438
|
"file_path",
|
|
1314
|
-
"
|
|
1439
|
+
"file_name",
|
|
1315
1440
|
*[x for x in df if FILENAME_COL_SUFFIX in x],
|
|
1316
1441
|
]
|
|
1317
1442
|
try:
|
|
@@ -1319,34 +1444,82 @@ class Image(_ImageBandBase):
|
|
|
1319
1444
|
except ValueError:
|
|
1320
1445
|
for col in cols_to_explode:
|
|
1321
1446
|
df = df.explode(col)
|
|
1322
|
-
df = df.loc[lambda x: ~x["
|
|
1447
|
+
df = df.loc[lambda x: ~x["file_name"].duplicated()].reset_index(drop=True)
|
|
1323
1448
|
|
|
1324
|
-
df = df.loc[lambda x: x["image_path"] ==
|
|
1449
|
+
df = df.loc[lambda x: x["image_path"] == self.path]
|
|
1325
1450
|
|
|
1326
1451
|
self._df = df
|
|
1327
1452
|
|
|
1453
|
+
if self.path is not None and self.metadata:
|
|
1454
|
+
self.metadata = {
|
|
1455
|
+
key: value for key, value in self.metadata.items() if self.path in key
|
|
1456
|
+
}
|
|
1457
|
+
|
|
1458
|
+
if self.metadata:
|
|
1459
|
+
try:
|
|
1460
|
+
metadata = self.metadata[self.path]
|
|
1461
|
+
except KeyError:
|
|
1462
|
+
metadata = {}
|
|
1463
|
+
for key, value in metadata.items():
|
|
1464
|
+
if key in dir(self):
|
|
1465
|
+
setattr(self, f"_{key}", value)
|
|
1466
|
+
else:
|
|
1467
|
+
setattr(self, key, value)
|
|
1468
|
+
|
|
1469
|
+
else:
|
|
1470
|
+
for key, value in self._get_metadata_attributes(
|
|
1471
|
+
self.metadata_attributes
|
|
1472
|
+
).items():
|
|
1473
|
+
setattr(self, key, value)
|
|
1474
|
+
|
|
1475
|
+
def _construct_image_from_bands(
|
|
1476
|
+
self, data: Sequence[Band], res: int | None
|
|
1477
|
+
) -> None:
|
|
1478
|
+
self._bands = list(data)
|
|
1479
|
+
if res is None:
|
|
1480
|
+
res = list({band.res for band in self.bands})
|
|
1481
|
+
if len(res) == 1:
|
|
1482
|
+
self._res = res[0]
|
|
1483
|
+
else:
|
|
1484
|
+
raise ValueError(f"Different resolutions for the bands: {res}")
|
|
1485
|
+
else:
|
|
1486
|
+
self._res = res
|
|
1328
1487
|
for key in self.metadata_attributes:
|
|
1329
|
-
|
|
1488
|
+
band_values = {getattr(band, key) for band in self if hasattr(band, key)}
|
|
1489
|
+
band_values = {x for x in band_values if x is not None}
|
|
1490
|
+
if len(band_values) > 1:
|
|
1491
|
+
raise ValueError(f"Different {key} values in bands: {band_values}")
|
|
1492
|
+
elif len(band_values):
|
|
1493
|
+
try:
|
|
1494
|
+
setattr(self, key, next(iter(band_values)))
|
|
1495
|
+
except AttributeError:
|
|
1496
|
+
setattr(self, f"_{key}", next(iter(band_values)))
|
|
1330
1497
|
|
|
1331
|
-
|
|
1332
|
-
|
|
1498
|
+
def copy(self) -> "Image":
|
|
1499
|
+
"""Copy the instance and its attributes."""
|
|
1500
|
+
copied = super().copy()
|
|
1501
|
+
for band in copied:
|
|
1502
|
+
band._mask = copied._mask
|
|
1503
|
+
return copied
|
|
1333
1504
|
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
""
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1505
|
+
def apply(self, func: Callable, **kwargs) -> "Image":
|
|
1506
|
+
"""Apply a function to each band of the Image."""
|
|
1507
|
+
with joblib.Parallel(n_jobs=self.processes, backend="loky") as parallel:
|
|
1508
|
+
parallel(joblib.delayed(_band_apply)(band, func, **kwargs) for band in self)
|
|
1509
|
+
|
|
1510
|
+
return self
|
|
1511
|
+
|
|
1512
|
+
def ndvi(
|
|
1513
|
+
self, red_band: str, nir_band: str, padding: int = 0, copy: bool = True
|
|
1514
|
+
) -> NDVIBand:
|
|
1344
1515
|
"""Calculate the NDVI for the Image."""
|
|
1345
1516
|
copied = self.copy() if copy else self
|
|
1346
1517
|
red = copied[red_band].load()
|
|
1347
1518
|
nir = copied[nir_band].load()
|
|
1348
1519
|
|
|
1349
|
-
arr: np.ndarray | np.ma.core.MaskedArray = ndvi(
|
|
1520
|
+
arr: np.ndarray | np.ma.core.MaskedArray = ndvi(
|
|
1521
|
+
red.values, nir.values, padding=padding
|
|
1522
|
+
)
|
|
1350
1523
|
|
|
1351
1524
|
return NDVIBand(
|
|
1352
1525
|
arr,
|
|
@@ -1390,56 +1563,61 @@ class Image(_ImageBandBase):
|
|
|
1390
1563
|
|
|
1391
1564
|
def to_xarray(self) -> DataArray:
|
|
1392
1565
|
"""Convert the raster to an xarray.DataArray."""
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
self.values,
|
|
1400
|
-
coords=coords,
|
|
1401
|
-
dims=dims,
|
|
1402
|
-
name=name,
|
|
1403
|
-
attrs={"crs": self.crs},
|
|
1566
|
+
if self.backend == "xarray":
|
|
1567
|
+
return self.values
|
|
1568
|
+
|
|
1569
|
+
return self._to_xarray(
|
|
1570
|
+
np.array([band.values for band in self]),
|
|
1571
|
+
transform=self[0].transform,
|
|
1404
1572
|
)
|
|
1405
1573
|
|
|
1406
1574
|
@property
|
|
1407
1575
|
def mask(self) -> Band | None:
|
|
1408
1576
|
"""Mask Band."""
|
|
1409
|
-
if self._mask is not None:
|
|
1410
|
-
# if not self._mask.has_array:
|
|
1411
|
-
# try:
|
|
1412
|
-
# self._mask.values = self[0]._mask.values
|
|
1413
|
-
# except Exception:
|
|
1414
|
-
# pass
|
|
1415
|
-
return self._mask
|
|
1416
1577
|
if self.masking is None:
|
|
1417
1578
|
return None
|
|
1418
1579
|
|
|
1580
|
+
elif self._mask is not None:
|
|
1581
|
+
return self._mask
|
|
1582
|
+
|
|
1583
|
+
elif self._bands is not None and all(band.mask is not None for band in self):
|
|
1584
|
+
if len({id(band.mask) for band in self}) > 1:
|
|
1585
|
+
raise ValueError(
|
|
1586
|
+
"Image bands must have same mask.",
|
|
1587
|
+
{id(band.mask) for band in self},
|
|
1588
|
+
) # TODO
|
|
1589
|
+
self._mask = next(
|
|
1590
|
+
iter([band.mask for band in self if band.mask is not None])
|
|
1591
|
+
)
|
|
1592
|
+
return self._mask
|
|
1593
|
+
|
|
1419
1594
|
mask_band_id = self.masking["band_id"]
|
|
1420
|
-
mask_paths = [path for path in self.
|
|
1595
|
+
mask_paths = [path for path in self._all_file_paths if mask_band_id in path]
|
|
1421
1596
|
if len(mask_paths) > 1:
|
|
1422
1597
|
raise ValueError(
|
|
1423
1598
|
f"Multiple file_paths match mask band_id {mask_band_id} for {self.path}"
|
|
1424
1599
|
)
|
|
1425
1600
|
elif not mask_paths:
|
|
1426
1601
|
raise ValueError(
|
|
1427
|
-
f"No file_paths match mask band_id {mask_band_id} for {self.path}"
|
|
1602
|
+
f"No file_paths match mask band_id {mask_band_id} for {self.path} among "
|
|
1603
|
+
+ str([Path(x).name for x in _ls_func(self.path)])
|
|
1428
1604
|
)
|
|
1605
|
+
|
|
1429
1606
|
self._mask = self.band_class(
|
|
1430
1607
|
mask_paths[0],
|
|
1431
|
-
_add_metadata_attributes=False,
|
|
1432
1608
|
**self._common_init_kwargs,
|
|
1433
1609
|
)
|
|
1434
|
-
|
|
1610
|
+
if self._bands is not None:
|
|
1611
|
+
for band in self:
|
|
1612
|
+
band._mask = self._mask
|
|
1435
1613
|
return self._mask
|
|
1436
1614
|
|
|
1437
1615
|
@mask.setter
|
|
1438
|
-
def mask(self, values: Band) -> None:
|
|
1616
|
+
def mask(self, values: Band | None) -> None:
|
|
1439
1617
|
if values is None:
|
|
1440
1618
|
self._mask = None
|
|
1441
1619
|
for band in self:
|
|
1442
|
-
band.
|
|
1620
|
+
band._mask = None
|
|
1443
1621
|
return
|
|
1444
1622
|
if not isinstance(values, Band):
|
|
1445
1623
|
raise TypeError(f"mask must be Band. Got {type(values)}")
|
|
@@ -1449,7 +1627,7 @@ class Image(_ImageBandBase):
|
|
|
1449
1627
|
band._mask = self._mask
|
|
1450
1628
|
try:
|
|
1451
1629
|
band.values = np.ma.array(
|
|
1452
|
-
band.values, mask=mask_arr, fill_value=band.nodata
|
|
1630
|
+
band.values.data, mask=mask_arr, fill_value=band.nodata
|
|
1453
1631
|
)
|
|
1454
1632
|
except ArrayNotLoadedError:
|
|
1455
1633
|
pass
|
|
@@ -1470,22 +1648,24 @@ class Image(_ImageBandBase):
|
|
|
1470
1648
|
if self._bands is not None:
|
|
1471
1649
|
return self._bands
|
|
1472
1650
|
|
|
1651
|
+
if self.masking:
|
|
1652
|
+
mask_band_id = self.masking["band_id"]
|
|
1653
|
+
paths = [path for path in self._df["file_path"] if mask_band_id not in path]
|
|
1654
|
+
else:
|
|
1655
|
+
paths = self._df["file_path"]
|
|
1656
|
+
|
|
1657
|
+
mask = self.mask
|
|
1658
|
+
|
|
1473
1659
|
self._bands = [
|
|
1474
1660
|
self.band_class(
|
|
1475
1661
|
path,
|
|
1476
|
-
mask=
|
|
1477
|
-
|
|
1662
|
+
mask=mask,
|
|
1663
|
+
all_file_paths=self._all_file_paths,
|
|
1478
1664
|
**self._common_init_kwargs,
|
|
1479
1665
|
)
|
|
1480
|
-
for path in
|
|
1666
|
+
for path in paths
|
|
1481
1667
|
]
|
|
1482
1668
|
|
|
1483
|
-
if self.masking:
|
|
1484
|
-
mask_band_id = self.masking["band_id"]
|
|
1485
|
-
self._bands = [
|
|
1486
|
-
band for band in self._bands if mask_band_id not in band.path
|
|
1487
|
-
]
|
|
1488
|
-
|
|
1489
1669
|
if (
|
|
1490
1670
|
self.filename_patterns
|
|
1491
1671
|
and any(_get_non_optional_groups(pat) for pat in self.filename_patterns)
|
|
@@ -1514,30 +1694,19 @@ class Image(_ImageBandBase):
|
|
|
1514
1694
|
if self._should_be_sorted:
|
|
1515
1695
|
self._bands = list(sorted(self._bands))
|
|
1516
1696
|
|
|
1517
|
-
for key in self.metadata_attributes:
|
|
1518
|
-
for band in self:
|
|
1519
|
-
value = getattr(self, key)
|
|
1520
|
-
if value is None:
|
|
1521
|
-
continue
|
|
1522
|
-
if isinstance(value, BandIdDict):
|
|
1523
|
-
try:
|
|
1524
|
-
value = value[band.band_id]
|
|
1525
|
-
except KeyError:
|
|
1526
|
-
continue
|
|
1527
|
-
setattr(band, key, value)
|
|
1528
|
-
|
|
1529
|
-
for band in self:
|
|
1530
|
-
band._image = self
|
|
1531
|
-
|
|
1532
1697
|
return self._bands
|
|
1533
1698
|
|
|
1534
1699
|
@property
|
|
1535
1700
|
def _should_be_sorted(self) -> bool:
|
|
1536
1701
|
sort_groups = ["band", "band_id"]
|
|
1537
|
-
return
|
|
1538
|
-
|
|
1539
|
-
|
|
1540
|
-
|
|
1702
|
+
return (
|
|
1703
|
+
self.filename_patterns
|
|
1704
|
+
and any(
|
|
1705
|
+
group in _get_non_optional_groups(pat)
|
|
1706
|
+
for group in sort_groups
|
|
1707
|
+
for pat in self.filename_patterns
|
|
1708
|
+
)
|
|
1709
|
+
or all(band.band_id is not None for band in self)
|
|
1541
1710
|
)
|
|
1542
1711
|
|
|
1543
1712
|
@property
|
|
@@ -1613,7 +1782,7 @@ class Image(_ImageBandBase):
|
|
|
1613
1782
|
if isinstance(band, str):
|
|
1614
1783
|
return self._get_band(band)
|
|
1615
1784
|
if isinstance(band, int):
|
|
1616
|
-
return self.bands[band]
|
|
1785
|
+
return self.bands[band]
|
|
1617
1786
|
|
|
1618
1787
|
copied = self.copy()
|
|
1619
1788
|
try:
|
|
@@ -1639,10 +1808,7 @@ class Image(_ImageBandBase):
|
|
|
1639
1808
|
try:
|
|
1640
1809
|
return self.date < other.date
|
|
1641
1810
|
except Exception as e:
|
|
1642
|
-
print(self.path)
|
|
1643
|
-
print(self.date)
|
|
1644
|
-
print(other.path)
|
|
1645
|
-
print(other.date)
|
|
1811
|
+
print("", self.path, self.date, other.path, other.date, sep="\n")
|
|
1646
1812
|
raise e
|
|
1647
1813
|
|
|
1648
1814
|
def __iter__(self) -> Iterator[Band]:
|
|
@@ -1702,36 +1868,36 @@ class ImageCollection(_ImageBase):
|
|
|
1702
1868
|
image_class: ClassVar[Image] = Image
|
|
1703
1869
|
band_class: ClassVar[Band] = Band
|
|
1704
1870
|
_metadata_attribute_collection_type: ClassVar[type] = pd.Series
|
|
1871
|
+
backend: str = "numpy"
|
|
1705
1872
|
|
|
1706
1873
|
def __init__(
|
|
1707
1874
|
self,
|
|
1708
1875
|
data: str | Path | Sequence[Image] | Sequence[str | Path],
|
|
1709
1876
|
res: int,
|
|
1710
|
-
level: str | None =
|
|
1877
|
+
level: str | None = None_,
|
|
1711
1878
|
processes: int = 1,
|
|
1712
|
-
file_system: GCSFileSystem | None = None,
|
|
1713
1879
|
metadata: str | dict | pd.DataFrame | None = None,
|
|
1714
1880
|
nodata: int | None = None,
|
|
1715
1881
|
**kwargs,
|
|
1716
1882
|
) -> None:
|
|
1717
1883
|
"""Initialiser."""
|
|
1718
|
-
|
|
1884
|
+
if data is not None and kwargs.get("root"):
|
|
1885
|
+
root = _fix_path(kwargs.pop("root"))
|
|
1886
|
+
data = [f"{root}/{name}" for name in data]
|
|
1887
|
+
_from_root = True
|
|
1888
|
+
else:
|
|
1889
|
+
_from_root = False
|
|
1890
|
+
|
|
1891
|
+
super().__init__(metadata=metadata, **kwargs)
|
|
1892
|
+
|
|
1893
|
+
if callable(level) and isinstance(level(), None_):
|
|
1894
|
+
level = None
|
|
1719
1895
|
|
|
1720
1896
|
self.nodata = nodata
|
|
1721
|
-
self.level = level
|
|
1897
|
+
self.level = level
|
|
1722
1898
|
self.processes = processes
|
|
1723
|
-
self.file_system = file_system
|
|
1724
1899
|
self._res = res
|
|
1725
|
-
self.
|
|
1726
|
-
self._crs = None # crs
|
|
1727
|
-
|
|
1728
|
-
if metadata is not None:
|
|
1729
|
-
if isinstance(metadata, (str | Path | os.PathLike)):
|
|
1730
|
-
self.metadata = _read_parquet_func(metadata)
|
|
1731
|
-
else:
|
|
1732
|
-
self.metadata = metadata
|
|
1733
|
-
else:
|
|
1734
|
-
self.metadata = metadata
|
|
1900
|
+
self._crs = None
|
|
1735
1901
|
|
|
1736
1902
|
self._df = None
|
|
1737
1903
|
self._all_file_paths = None
|
|
@@ -1743,18 +1909,22 @@ class ImageCollection(_ImageBase):
|
|
|
1743
1909
|
self.images = [x.copy() for x in data]
|
|
1744
1910
|
return
|
|
1745
1911
|
elif all(isinstance(x, (str | Path | os.PathLike)) for x in data):
|
|
1746
|
-
|
|
1747
|
-
|
|
1748
|
-
|
|
1749
|
-
|
|
1750
|
-
|
|
1751
|
-
|
|
1912
|
+
# adding band paths (asuming 'data' is a sequence of image paths)
|
|
1913
|
+
try:
|
|
1914
|
+
self._all_file_paths = _get_child_paths_threaded(data) | set(data)
|
|
1915
|
+
except FileNotFoundError as e:
|
|
1916
|
+
if _from_root:
|
|
1917
|
+
raise TypeError(
|
|
1918
|
+
"When passing 'root', 'data' must be a sequence of image names that have 'root' as parent path."
|
|
1919
|
+
) from e
|
|
1920
|
+
raise e
|
|
1921
|
+
self._df = self._create_metadata_df(self._all_file_paths)
|
|
1752
1922
|
return
|
|
1753
1923
|
|
|
1754
1924
|
if not isinstance(data, (str | Path | os.PathLike)):
|
|
1755
1925
|
raise TypeError("'data' must be string, Path-like or a sequence of Image.")
|
|
1756
1926
|
|
|
1757
|
-
self._path = str(data)
|
|
1927
|
+
self._path = _fix_path(str(data))
|
|
1758
1928
|
|
|
1759
1929
|
self._all_file_paths = _get_all_file_paths(self.path)
|
|
1760
1930
|
|
|
@@ -1765,18 +1935,6 @@ class ImageCollection(_ImageBase):
|
|
|
1765
1935
|
|
|
1766
1936
|
self._df = self._create_metadata_df(self._all_file_paths)
|
|
1767
1937
|
|
|
1768
|
-
@property
|
|
1769
|
-
def values(self) -> np.ndarray:
|
|
1770
|
-
"""4 dimensional numpy array."""
|
|
1771
|
-
if isinstance(self[0].values, np.ma.core.MaskedArray):
|
|
1772
|
-
return np.ma.array([img.values for img in self])
|
|
1773
|
-
return np.array([img.values for img in self])
|
|
1774
|
-
|
|
1775
|
-
@property
|
|
1776
|
-
def mask(self) -> np.ndarray:
|
|
1777
|
-
"""4 dimensional numpy array."""
|
|
1778
|
-
return np.array([img.mask.values for img in self])
|
|
1779
|
-
|
|
1780
1938
|
def groupby(self, by: str | list[str], **kwargs) -> ImageCollectionGroupBy:
|
|
1781
1939
|
"""Group the Collection by Image or Band attribute(s)."""
|
|
1782
1940
|
df = pd.DataFrame(
|
|
@@ -1830,15 +1988,20 @@ class ImageCollection(_ImageBase):
|
|
|
1830
1988
|
for img in copied:
|
|
1831
1989
|
assert len(img) == 1
|
|
1832
1990
|
try:
|
|
1833
|
-
img._path = img[0].path
|
|
1991
|
+
img._path = _fix_path(img[0].path)
|
|
1834
1992
|
except PathlessImageError:
|
|
1835
1993
|
pass
|
|
1836
1994
|
return copied
|
|
1837
1995
|
|
|
1838
1996
|
def apply(self, func: Callable, **kwargs) -> "ImageCollection":
|
|
1839
1997
|
"""Apply a function to all bands in each image of the collection."""
|
|
1840
|
-
|
|
1841
|
-
|
|
1998
|
+
with joblib.Parallel(n_jobs=self.processes, backend="loky") as parallel:
|
|
1999
|
+
parallel(
|
|
2000
|
+
joblib.delayed(_band_apply)(band, func, **kwargs)
|
|
2001
|
+
for img in self
|
|
2002
|
+
for band in img
|
|
2003
|
+
)
|
|
2004
|
+
|
|
1842
2005
|
return self
|
|
1843
2006
|
|
|
1844
2007
|
def get_unique_band_ids(self) -> list[str]:
|
|
@@ -1851,7 +2014,7 @@ class ImageCollection(_ImageBase):
|
|
|
1851
2014
|
date_ranges: DATE_RANGES_TYPE = None,
|
|
1852
2015
|
bbox: GeoDataFrame | GeoSeries | Geometry | tuple[float] | None = None,
|
|
1853
2016
|
intersects: GeoDataFrame | GeoSeries | Geometry | tuple[float] | None = None,
|
|
1854
|
-
|
|
2017
|
+
max_cloud_cover: int | None = None,
|
|
1855
2018
|
copy: bool = True,
|
|
1856
2019
|
) -> "ImageCollection":
|
|
1857
2020
|
"""Filter images and bands in the collection."""
|
|
@@ -1860,11 +2023,11 @@ class ImageCollection(_ImageBase):
|
|
|
1860
2023
|
if date_ranges:
|
|
1861
2024
|
copied = copied._filter_dates(date_ranges)
|
|
1862
2025
|
|
|
1863
|
-
if
|
|
2026
|
+
if max_cloud_cover is not None:
|
|
1864
2027
|
copied.images = [
|
|
1865
2028
|
image
|
|
1866
2029
|
for image in copied.images
|
|
1867
|
-
if image.
|
|
2030
|
+
if image.cloud_cover_percentage < max_cloud_cover
|
|
1868
2031
|
]
|
|
1869
2032
|
|
|
1870
2033
|
if bbox is not None:
|
|
@@ -1878,7 +2041,6 @@ class ImageCollection(_ImageBase):
|
|
|
1878
2041
|
if isinstance(bands, str):
|
|
1879
2042
|
bands = [bands]
|
|
1880
2043
|
bands = set(bands)
|
|
1881
|
-
copied._band_ids = bands
|
|
1882
2044
|
copied.images = [img[bands] for img in copied.images if bands in img]
|
|
1883
2045
|
|
|
1884
2046
|
return copied
|
|
@@ -1892,7 +2054,7 @@ class ImageCollection(_ImageBase):
|
|
|
1892
2054
|
**kwargs,
|
|
1893
2055
|
) -> Band:
|
|
1894
2056
|
"""Merge all areas and all bands to a single Band."""
|
|
1895
|
-
bounds = _get_bounds(bounds, self._bbox)
|
|
2057
|
+
bounds = _get_bounds(bounds, self._bbox, self.union_all())
|
|
1896
2058
|
if bounds is not None:
|
|
1897
2059
|
bounds = to_bbox(bounds)
|
|
1898
2060
|
|
|
@@ -1930,14 +2092,14 @@ class ImageCollection(_ImageBase):
|
|
|
1930
2092
|
**kwargs,
|
|
1931
2093
|
)
|
|
1932
2094
|
|
|
1933
|
-
|
|
1934
|
-
|
|
2095
|
+
if isinstance(indexes, int) and len(arr.shape) == 3 and arr.shape[0] == 1:
|
|
2096
|
+
arr = arr[0]
|
|
1935
2097
|
|
|
1936
|
-
|
|
1937
|
-
|
|
1938
|
-
|
|
1939
|
-
|
|
1940
|
-
|
|
2098
|
+
if method == "mean":
|
|
2099
|
+
if as_int:
|
|
2100
|
+
arr = arr // len(datasets)
|
|
2101
|
+
else:
|
|
2102
|
+
arr = arr / len(datasets)
|
|
1941
2103
|
|
|
1942
2104
|
if bounds is None:
|
|
1943
2105
|
bounds = self.bounds
|
|
@@ -1963,7 +2125,7 @@ class ImageCollection(_ImageBase):
|
|
|
1963
2125
|
**kwargs,
|
|
1964
2126
|
) -> Image:
|
|
1965
2127
|
"""Merge all areas to a single tile, one band per band_id."""
|
|
1966
|
-
bounds = _get_bounds(bounds, self._bbox)
|
|
2128
|
+
bounds = _get_bounds(bounds, self._bbox, self.union_all())
|
|
1967
2129
|
if bounds is not None:
|
|
1968
2130
|
bounds = to_bbox(bounds)
|
|
1969
2131
|
bounds = self.bounds if bounds is None else bounds
|
|
@@ -2021,7 +2183,6 @@ class ImageCollection(_ImageBase):
|
|
|
2021
2183
|
bounds=out_bounds,
|
|
2022
2184
|
crs=crs,
|
|
2023
2185
|
band_id=band_id,
|
|
2024
|
-
_add_metadata_attributes=False,
|
|
2025
2186
|
**self._common_init_kwargs,
|
|
2026
2187
|
)
|
|
2027
2188
|
)
|
|
@@ -2061,10 +2222,13 @@ class ImageCollection(_ImageBase):
|
|
|
2061
2222
|
arr = np.array(
|
|
2062
2223
|
[
|
|
2063
2224
|
(
|
|
2064
|
-
band.load(
|
|
2065
|
-
|
|
2066
|
-
|
|
2067
|
-
)
|
|
2225
|
+
# band.load(
|
|
2226
|
+
# bounds=(_bounds if _bounds is not None else None),
|
|
2227
|
+
# **kwargs,
|
|
2228
|
+
# )
|
|
2229
|
+
# if not band.has_array
|
|
2230
|
+
# else
|
|
2231
|
+
band
|
|
2068
2232
|
).values
|
|
2069
2233
|
for img in collection
|
|
2070
2234
|
for band in img
|
|
@@ -2087,7 +2251,7 @@ class ImageCollection(_ImageBase):
|
|
|
2087
2251
|
coords = _generate_spatial_coords(transform, width, height)
|
|
2088
2252
|
|
|
2089
2253
|
arrs.append(
|
|
2090
|
-
|
|
2254
|
+
DataArray(
|
|
2091
2255
|
arr,
|
|
2092
2256
|
coords=coords,
|
|
2093
2257
|
dims=["y", "x"],
|
|
@@ -2104,7 +2268,7 @@ class ImageCollection(_ImageBase):
|
|
|
2104
2268
|
return merged.to_numpy()
|
|
2105
2269
|
|
|
2106
2270
|
def sort_images(self, ascending: bool = True) -> "ImageCollection":
|
|
2107
|
-
"""Sort Images by date."""
|
|
2271
|
+
"""Sort Images by date, then file path if date attribute is missing."""
|
|
2108
2272
|
self._images = (
|
|
2109
2273
|
list(sorted([img for img in self if img.date is not None]))
|
|
2110
2274
|
+ sorted(
|
|
@@ -2121,6 +2285,7 @@ class ImageCollection(_ImageBase):
|
|
|
2121
2285
|
self,
|
|
2122
2286
|
bounds: tuple | Geometry | GeoDataFrame | GeoSeries | None = None,
|
|
2123
2287
|
indexes: int | tuple[int] | None = None,
|
|
2288
|
+
file_system=None,
|
|
2124
2289
|
**kwargs,
|
|
2125
2290
|
) -> "ImageCollection":
|
|
2126
2291
|
"""Load all image Bands with threading."""
|
|
@@ -2130,10 +2295,46 @@ class ImageCollection(_ImageBase):
|
|
|
2130
2295
|
and all(band.has_array for img in self for band in img)
|
|
2131
2296
|
):
|
|
2132
2297
|
return self
|
|
2298
|
+
|
|
2299
|
+
# if self.processes == 1:
|
|
2300
|
+
# for img in self:
|
|
2301
|
+
# for band in img:
|
|
2302
|
+
# band.load(
|
|
2303
|
+
# bounds=bounds,
|
|
2304
|
+
# indexes=indexes,
|
|
2305
|
+
# file_system=file_system,
|
|
2306
|
+
# **kwargs,
|
|
2307
|
+
# )
|
|
2308
|
+
# return self
|
|
2309
|
+
|
|
2133
2310
|
with joblib.Parallel(n_jobs=self.processes, backend="threading") as parallel:
|
|
2311
|
+
if self.masking:
|
|
2312
|
+
parallel(
|
|
2313
|
+
joblib.delayed(_load_band)(
|
|
2314
|
+
img.mask,
|
|
2315
|
+
bounds=bounds,
|
|
2316
|
+
indexes=indexes,
|
|
2317
|
+
file_system=file_system,
|
|
2318
|
+
**kwargs,
|
|
2319
|
+
)
|
|
2320
|
+
for img in self
|
|
2321
|
+
)
|
|
2322
|
+
for img in self:
|
|
2323
|
+
for band in img:
|
|
2324
|
+
band._mask = img.mask
|
|
2325
|
+
|
|
2326
|
+
# print({img.mask.has_array for img in self })
|
|
2327
|
+
# print({band.mask.has_array for img in self for band in img})
|
|
2328
|
+
|
|
2329
|
+
# with joblib.Parallel(n_jobs=self.processes, backend="threading") as parallel:
|
|
2330
|
+
|
|
2134
2331
|
parallel(
|
|
2135
2332
|
joblib.delayed(_load_band)(
|
|
2136
|
-
band,
|
|
2333
|
+
band,
|
|
2334
|
+
bounds=bounds,
|
|
2335
|
+
indexes=indexes,
|
|
2336
|
+
file_system=file_system,
|
|
2337
|
+
**kwargs,
|
|
2137
2338
|
)
|
|
2138
2339
|
for img in self
|
|
2139
2340
|
for band in img
|
|
@@ -2141,6 +2342,27 @@ class ImageCollection(_ImageBase):
|
|
|
2141
2342
|
|
|
2142
2343
|
return self
|
|
2143
2344
|
|
|
2345
|
+
def clip(
|
|
2346
|
+
self,
|
|
2347
|
+
mask: Geometry | GeoDataFrame | GeoSeries,
|
|
2348
|
+
**kwargs,
|
|
2349
|
+
) -> "ImageCollection":
|
|
2350
|
+
"""Clip all image Bands with 'loky'."""
|
|
2351
|
+
if self.processes == 1:
|
|
2352
|
+
for img in self:
|
|
2353
|
+
for band in img:
|
|
2354
|
+
band.clip(mask, **kwargs)
|
|
2355
|
+
return self
|
|
2356
|
+
|
|
2357
|
+
with joblib.Parallel(n_jobs=self.processes, backend="loky") as parallel:
|
|
2358
|
+
parallel(
|
|
2359
|
+
joblib.delayed(_clip_band)(band, mask, **kwargs)
|
|
2360
|
+
for img in self
|
|
2361
|
+
for band in img
|
|
2362
|
+
)
|
|
2363
|
+
|
|
2364
|
+
return self
|
|
2365
|
+
|
|
2144
2366
|
def _set_bbox(
|
|
2145
2367
|
self, bbox: GeoDataFrame | GeoSeries | Geometry | tuple[float]
|
|
2146
2368
|
) -> "ImageCollection":
|
|
@@ -2150,12 +2372,17 @@ class ImageCollection(_ImageBase):
|
|
|
2150
2372
|
if self._images is not None:
|
|
2151
2373
|
for img in self._images:
|
|
2152
2374
|
img._bbox = self._bbox
|
|
2375
|
+
if img.mask is not None:
|
|
2376
|
+
img.mask._bbox = self._bbox
|
|
2153
2377
|
if img.bands is None:
|
|
2154
2378
|
continue
|
|
2155
2379
|
for band in img:
|
|
2156
2380
|
band._bbox = self._bbox
|
|
2157
2381
|
bounds = box(*band._bbox).intersection(box(*band.bounds))
|
|
2158
2382
|
band._bounds = to_bbox(bounds) if not bounds.is_empty else None
|
|
2383
|
+
if band.mask is not None:
|
|
2384
|
+
band.mask._bbox = self._bbox
|
|
2385
|
+
band.mask._bounds = band._bounds
|
|
2159
2386
|
|
|
2160
2387
|
return self
|
|
2161
2388
|
|
|
@@ -2184,11 +2411,15 @@ class ImageCollection(_ImageBase):
|
|
|
2184
2411
|
|
|
2185
2412
|
other = to_shapely(other)
|
|
2186
2413
|
|
|
2187
|
-
|
|
2188
|
-
|
|
2189
|
-
|
|
2190
|
-
|
|
2191
|
-
|
|
2414
|
+
if self.processes == 1:
|
|
2415
|
+
intersects_list: pd.Series = GeoSeries(
|
|
2416
|
+
[img.union_all() for img in self]
|
|
2417
|
+
).intersects(other)
|
|
2418
|
+
else:
|
|
2419
|
+
with joblib.Parallel(n_jobs=self.processes, backend="loky") as parallel:
|
|
2420
|
+
intersects_list: list[bool] = parallel(
|
|
2421
|
+
joblib.delayed(_intesects)(image, other) for image in self
|
|
2422
|
+
)
|
|
2192
2423
|
|
|
2193
2424
|
self.images = [
|
|
2194
2425
|
image
|
|
@@ -2197,37 +2428,68 @@ class ImageCollection(_ImageBase):
|
|
|
2197
2428
|
]
|
|
2198
2429
|
return self
|
|
2199
2430
|
|
|
2200
|
-
def to_xarray(
|
|
2201
|
-
|
|
2202
|
-
|
|
2203
|
-
|
|
2204
|
-
|
|
2205
|
-
# arr = band.load(**kwargs).values
|
|
2206
|
-
# arrs.append(arr)
|
|
2207
|
-
|
|
2208
|
-
# n_images = len(self)
|
|
2209
|
-
# n_bands = len(img)
|
|
2210
|
-
# height, width = arr.shape
|
|
2211
|
-
|
|
2212
|
-
# arr_4d = np.array(arrs).reshape(n_images, n_bands, height, width)
|
|
2431
|
+
def to_xarray(
|
|
2432
|
+
self,
|
|
2433
|
+
**kwargs,
|
|
2434
|
+
) -> Dataset:
|
|
2435
|
+
"""Convert the raster to an xarray.Dataset.
|
|
2213
2436
|
|
|
2214
|
-
|
|
2215
|
-
|
|
2216
|
-
|
|
2217
|
-
|
|
2437
|
+
Images are converted to 2d arrays for each unique bounds.
|
|
2438
|
+
The spatial dimensions will be labeled "x" and "y". The third
|
|
2439
|
+
dimension defaults to "date" if all images have date attributes.
|
|
2440
|
+
Otherwise defaults to the image name.
|
|
2441
|
+
"""
|
|
2442
|
+
if any(not band.has_array for img in self for band in img):
|
|
2443
|
+
raise ValueError("Arrays must be loaded.")
|
|
2444
|
+
|
|
2445
|
+
# if by is None:
|
|
2446
|
+
if all(img.date for img in self):
|
|
2447
|
+
by = ["date"]
|
|
2448
|
+
elif not pd.Index([img.name for img in self]).is_unique:
|
|
2449
|
+
raise ValueError("Images must have unique names.")
|
|
2450
|
+
else:
|
|
2451
|
+
by = ["name"]
|
|
2452
|
+
# elif isinstance(by, str):
|
|
2453
|
+
# by = [by]
|
|
2454
|
+
|
|
2455
|
+
xarrs: dict[str, DataArray] = {}
|
|
2456
|
+
for (bounds, band_id), collection in self.groupby(["bounds", "band_id"]):
|
|
2457
|
+
name = f"{band_id}_{'-'.join(str(int(x)) for x in bounds)}"
|
|
2458
|
+
first_band = collection[0][0]
|
|
2459
|
+
coords = _generate_spatial_coords(
|
|
2460
|
+
first_band.transform, first_band.width, first_band.height
|
|
2461
|
+
)
|
|
2462
|
+
values = np.array([band.to_numpy() for img in collection for band in img])
|
|
2463
|
+
assert len(values) == len(collection)
|
|
2464
|
+
|
|
2465
|
+
# coords["band_id"] = [
|
|
2466
|
+
# band.band_id or i for i, band in enumerate(collection[0])
|
|
2467
|
+
# ]
|
|
2468
|
+
for attr in by:
|
|
2469
|
+
coords[attr] = [getattr(img, attr) for img in collection]
|
|
2470
|
+
# coords["band"] = band_id #
|
|
2471
|
+
|
|
2472
|
+
dims = [*by, "y", "x"]
|
|
2473
|
+
# dims = ["band", "y", "x"]
|
|
2474
|
+
# dims = {}
|
|
2475
|
+
# for attr in by:
|
|
2476
|
+
# dims[attr] = [getattr(img, attr) for img in collection]
|
|
2477
|
+
|
|
2478
|
+
xarrs[name] = DataArray(
|
|
2479
|
+
values,
|
|
2480
|
+
coords=coords,
|
|
2481
|
+
dims=dims,
|
|
2482
|
+
# name=name,
|
|
2483
|
+
name=band_id,
|
|
2484
|
+
attrs={
|
|
2485
|
+
"crs": collection.crs,
|
|
2486
|
+
"band_id": band_id,
|
|
2487
|
+
}, # , "bounds": bounds},
|
|
2488
|
+
**kwargs,
|
|
2489
|
+
)
|
|
2218
2490
|
|
|
2219
|
-
|
|
2220
|
-
|
|
2221
|
-
first_band.transform, first_band.width, first_band.height
|
|
2222
|
-
)
|
|
2223
|
-
dims = ["image", "band", "y", "x"]
|
|
2224
|
-
return xr.DataArray(
|
|
2225
|
-
self.values,
|
|
2226
|
-
coords=coords,
|
|
2227
|
-
dims=dims,
|
|
2228
|
-
name=name,
|
|
2229
|
-
attrs={"crs": self.crs},
|
|
2230
|
-
)
|
|
2491
|
+
return xr.combine_by_coords(list(xarrs.values()))
|
|
2492
|
+
# return Dataset(xarrs)
|
|
2231
2493
|
|
|
2232
2494
|
def to_gdfs(self, column: str = "value") -> dict[str, GeoDataFrame]:
|
|
2233
2495
|
"""Convert each band in each Image to a GeoDataFrame."""
|
|
@@ -2241,8 +2503,6 @@ class ImageCollection(_ImageBase):
|
|
|
2241
2503
|
except AttributeError:
|
|
2242
2504
|
name = f"{self.__class__.__name__}({i})"
|
|
2243
2505
|
|
|
2244
|
-
# band.load()
|
|
2245
|
-
|
|
2246
2506
|
if name not in out:
|
|
2247
2507
|
out[name] = band.to_gdf(column=column)
|
|
2248
2508
|
return out
|
|
@@ -2384,36 +2644,22 @@ class ImageCollection(_ImageBase):
|
|
|
2384
2644
|
masking=self.masking,
|
|
2385
2645
|
**self._common_init_kwargs,
|
|
2386
2646
|
)
|
|
2647
|
+
|
|
2387
2648
|
if self.masking is not None:
|
|
2388
2649
|
images = []
|
|
2389
2650
|
for image in self._images:
|
|
2651
|
+
# TODO why this loop?
|
|
2390
2652
|
try:
|
|
2391
2653
|
if not isinstance(image.mask, Band):
|
|
2392
2654
|
raise ValueError()
|
|
2393
2655
|
images.append(image)
|
|
2394
|
-
except ValueError:
|
|
2656
|
+
except ValueError as e:
|
|
2657
|
+
raise e
|
|
2395
2658
|
continue
|
|
2396
2659
|
self._images = images
|
|
2397
2660
|
for image in self._images:
|
|
2398
2661
|
image._bands = [band for band in image if band.band_id is not None]
|
|
2399
2662
|
|
|
2400
|
-
if self.metadata is not None:
|
|
2401
|
-
attributes_to_add = ["crs", "bounds"] + list(self.metadata_attributes)
|
|
2402
|
-
for img in self:
|
|
2403
|
-
for band in img:
|
|
2404
|
-
for key in attributes_to_add:
|
|
2405
|
-
try:
|
|
2406
|
-
value = self.metadata[band.path][key]
|
|
2407
|
-
except KeyError:
|
|
2408
|
-
try:
|
|
2409
|
-
value = self.metadata[key][band.path]
|
|
2410
|
-
except KeyError:
|
|
2411
|
-
continue
|
|
2412
|
-
try:
|
|
2413
|
-
setattr(band, key, value)
|
|
2414
|
-
except Exception:
|
|
2415
|
-
setattr(band, f"_{key}", value)
|
|
2416
|
-
|
|
2417
2663
|
self._images = [img for img in self if len(img)]
|
|
2418
2664
|
|
|
2419
2665
|
if self._should_be_sorted:
|
|
@@ -2438,7 +2684,7 @@ class ImageCollection(_ImageBase):
|
|
|
2438
2684
|
and sort_group in _get_non_optional_groups(pat)
|
|
2439
2685
|
for pat in self.image_patterns
|
|
2440
2686
|
)
|
|
2441
|
-
or all(img
|
|
2687
|
+
or all(getattr(img, sort_group) is not None for img in self)
|
|
2442
2688
|
)
|
|
2443
2689
|
|
|
2444
2690
|
@images.setter
|
|
@@ -2449,7 +2695,18 @@ class ImageCollection(_ImageBase):
|
|
|
2449
2695
|
|
|
2450
2696
|
def __repr__(self) -> str:
|
|
2451
2697
|
"""String representation."""
|
|
2452
|
-
|
|
2698
|
+
root = ""
|
|
2699
|
+
if self.path is not None:
|
|
2700
|
+
data = f"'{self.path}'"
|
|
2701
|
+
elif all(img.path is not None for img in self):
|
|
2702
|
+
data = [img.path for img in self]
|
|
2703
|
+
parents = {str(Path(path).parent) for path in data}
|
|
2704
|
+
if len(parents) == 1:
|
|
2705
|
+
data = [Path(path).name for path in data]
|
|
2706
|
+
root = f" root='{next(iter(parents))}',"
|
|
2707
|
+
else:
|
|
2708
|
+
data = [img for img in self]
|
|
2709
|
+
return f"{self.__class__.__name__}({data},{root} res={self.res}, level='{self.level}')"
|
|
2453
2710
|
|
|
2454
2711
|
def union_all(self) -> Polygon | MultiPolygon:
|
|
2455
2712
|
"""(Multi)Polygon representing the union of all image bounds."""
|
|
@@ -2500,12 +2757,8 @@ class ImageCollection(_ImageBase):
|
|
|
2500
2757
|
|
|
2501
2758
|
alpha = 1 - p
|
|
2502
2759
|
|
|
2503
|
-
# for img in self:
|
|
2504
|
-
# for band in img:
|
|
2505
|
-
# band.load()
|
|
2506
|
-
|
|
2507
2760
|
for group_values, subcollection in self.groupby(by):
|
|
2508
|
-
print("
|
|
2761
|
+
print("subcollection group values:", group_values)
|
|
2509
2762
|
|
|
2510
2763
|
if "date" in x_var and subcollection._should_be_sorted:
|
|
2511
2764
|
subcollection._images = list(sorted(subcollection._images))
|
|
@@ -2519,6 +2772,7 @@ class ImageCollection(_ImageBase):
|
|
|
2519
2772
|
for band in img
|
|
2520
2773
|
]
|
|
2521
2774
|
)
|
|
2775
|
+
first_date = pd.Timestamp(x[0])
|
|
2522
2776
|
x = (
|
|
2523
2777
|
pd.to_datetime(
|
|
2524
2778
|
[band.date[:8] for img in subcollection for band in img]
|
|
@@ -2611,6 +2865,23 @@ class ImageCollection(_ImageBase):
|
|
|
2611
2865
|
)
|
|
2612
2866
|
plt.xlabel(x_var)
|
|
2613
2867
|
plt.ylabel(y_label)
|
|
2868
|
+
|
|
2869
|
+
if x_var == "date":
|
|
2870
|
+
date_labels = pd.to_datetime(
|
|
2871
|
+
[first_date + pd.Timedelta(days=int(day)) for day in this_x]
|
|
2872
|
+
)
|
|
2873
|
+
|
|
2874
|
+
_, unique_indices = np.unique(
|
|
2875
|
+
date_labels.strftime("%Y-%m"), return_index=True
|
|
2876
|
+
)
|
|
2877
|
+
|
|
2878
|
+
unique_x = np.array(this_x)[unique_indices]
|
|
2879
|
+
unique_labels = date_labels[unique_indices].strftime("%Y-%m")
|
|
2880
|
+
|
|
2881
|
+
ax.set_xticks(unique_x)
|
|
2882
|
+
ax.set_xticklabels(unique_labels, rotation=45, ha="right")
|
|
2883
|
+
# ax.tick_params(axis="x", length=10, width=2)
|
|
2884
|
+
|
|
2614
2885
|
plt.show()
|
|
2615
2886
|
|
|
2616
2887
|
|
|
@@ -2629,10 +2900,7 @@ class Sentinel2Config:
|
|
|
2629
2900
|
"""Holder of Sentinel 2 regexes, band_ids etc."""
|
|
2630
2901
|
|
|
2631
2902
|
image_regexes: ClassVar[str] = (config.SENTINEL2_IMAGE_REGEX,)
|
|
2632
|
-
filename_regexes: ClassVar[str] = (
|
|
2633
|
-
config.SENTINEL2_FILENAME_REGEX,
|
|
2634
|
-
config.SENTINEL2_CLOUD_FILENAME_REGEX,
|
|
2635
|
-
)
|
|
2903
|
+
filename_regexes: ClassVar[str] = (config.SENTINEL2_FILENAME_REGEX,)
|
|
2636
2904
|
metadata_attributes: ClassVar[
|
|
2637
2905
|
dict[str, Callable | functools.partial | tuple[str]]
|
|
2638
2906
|
] = {
|
|
@@ -2640,22 +2908,69 @@ class Sentinel2Config:
|
|
|
2640
2908
|
_extract_regex_match_from_string,
|
|
2641
2909
|
regexes=(r"<PROCESSING_BASELINE>(.*?)</PROCESSING_BASELINE>",),
|
|
2642
2910
|
),
|
|
2643
|
-
"
|
|
2644
|
-
"is_refined":
|
|
2645
|
-
|
|
2646
|
-
|
|
2647
|
-
|
|
2911
|
+
"cloud_cover_percentage": "_get_cloud_cover_percentage",
|
|
2912
|
+
"is_refined": "_get_image_refining_flag",
|
|
2913
|
+
"boa_quantification_value": "_get_boa_quantification_value",
|
|
2914
|
+
}
|
|
2915
|
+
l1c_bands: ClassVar[set[str]] = {
|
|
2916
|
+
"B01": 60,
|
|
2917
|
+
"B02": 10,
|
|
2918
|
+
"B03": 10,
|
|
2919
|
+
"B04": 10,
|
|
2920
|
+
"B05": 20,
|
|
2921
|
+
"B06": 20,
|
|
2922
|
+
"B07": 20,
|
|
2923
|
+
"B08": 10,
|
|
2924
|
+
"B8A": 20,
|
|
2925
|
+
"B09": 60,
|
|
2926
|
+
"B10": 60,
|
|
2927
|
+
"B11": 20,
|
|
2928
|
+
"B12": 20,
|
|
2648
2929
|
}
|
|
2649
|
-
|
|
2650
|
-
|
|
2651
|
-
|
|
2652
|
-
|
|
2653
|
-
|
|
2930
|
+
l2a_bands: ClassVar[set[str]] = {
|
|
2931
|
+
key: res for key, res in l1c_bands.items() if key != "B10"
|
|
2932
|
+
}
|
|
2933
|
+
all_bands: ClassVar[set[str]] = l1c_bands
|
|
2934
|
+
rbg_bands: ClassVar[tuple[str]] = ("B04", "B02", "B03")
|
|
2935
|
+
ndvi_bands: ClassVar[tuple[str]] = ("B04", "B08")
|
|
2654
2936
|
masking: ClassVar[BandMasking] = BandMasking(
|
|
2655
|
-
band_id="SCL",
|
|
2937
|
+
band_id="SCL",
|
|
2938
|
+
values={
|
|
2939
|
+
2: "Topographic casted shadows",
|
|
2940
|
+
3: "Cloud shadows",
|
|
2941
|
+
8: "Cloud medium probability",
|
|
2942
|
+
9: "Cloud high probability",
|
|
2943
|
+
10: "Thin cirrus",
|
|
2944
|
+
11: "Snow or ice",
|
|
2945
|
+
},
|
|
2656
2946
|
)
|
|
2657
2947
|
|
|
2658
|
-
def
|
|
2948
|
+
def _get_image_refining_flag(self, xml_file: str) -> bool:
|
|
2949
|
+
match_ = re.search(
|
|
2950
|
+
r'Image_Refining flag="(?:REFINED|NOT_REFINED)"',
|
|
2951
|
+
xml_file,
|
|
2952
|
+
)
|
|
2953
|
+
if match_ is None:
|
|
2954
|
+
raise _RegexError()
|
|
2955
|
+
|
|
2956
|
+
if "NOT_REFINED" in match_.group(0):
|
|
2957
|
+
return False
|
|
2958
|
+
elif "REFINED" in match_.group(0):
|
|
2959
|
+
return True
|
|
2960
|
+
else:
|
|
2961
|
+
raise _RegexError()
|
|
2962
|
+
|
|
2963
|
+
def _get_boa_quantification_value(self, xml_file: str) -> int:
|
|
2964
|
+
return int(
|
|
2965
|
+
_extract_regex_match_from_string(
|
|
2966
|
+
xml_file,
|
|
2967
|
+
(
|
|
2968
|
+
r'<BOA_QUANTIFICATION_VALUE unit="none">-?(\d+)</BOA_QUANTIFICATION_VALUE>',
|
|
2969
|
+
),
|
|
2970
|
+
)
|
|
2971
|
+
)
|
|
2972
|
+
|
|
2973
|
+
def _get_cloud_cover_percentage(self, xml_file: str) -> float:
|
|
2659
2974
|
return float(
|
|
2660
2975
|
_extract_regex_match_from_string(
|
|
2661
2976
|
xml_file,
|
|
@@ -2666,7 +2981,35 @@ class Sentinel2Config:
|
|
|
2666
2981
|
)
|
|
2667
2982
|
)
|
|
2668
2983
|
|
|
2669
|
-
|
|
2984
|
+
|
|
2985
|
+
class Sentinel2CloudlessConfig(Sentinel2Config):
|
|
2986
|
+
"""Holder of regexes, band_ids etc. for Sentinel 2 cloudless mosaic."""
|
|
2987
|
+
|
|
2988
|
+
image_regexes: ClassVar[str] = (config.SENTINEL2_MOSAIC_IMAGE_REGEX,)
|
|
2989
|
+
filename_regexes: ClassVar[str] = (config.SENTINEL2_MOSAIC_FILENAME_REGEX,)
|
|
2990
|
+
masking: ClassVar[None] = None
|
|
2991
|
+
all_bands: ClassVar[list[str]] = [
|
|
2992
|
+
x.replace("B0", "B") for x in Sentinel2Config.all_bands
|
|
2993
|
+
]
|
|
2994
|
+
rbg_bands: ClassVar[dict[str, str]] = {
|
|
2995
|
+
key.replace("B0", "B") for key in Sentinel2Config.rbg_bands
|
|
2996
|
+
}
|
|
2997
|
+
ndvi_bands: ClassVar[dict[str, str]] = {
|
|
2998
|
+
key.replace("B0", "B") for key in Sentinel2Config.ndvi_bands
|
|
2999
|
+
}
|
|
3000
|
+
|
|
3001
|
+
|
|
3002
|
+
class Sentinel2Band(Sentinel2Config, Band):
|
|
3003
|
+
"""Band with Sentinel2 specific name variables and regexes."""
|
|
3004
|
+
|
|
3005
|
+
metadata_attributes = Sentinel2Config.metadata_attributes | {
|
|
3006
|
+
"boa_add_offset": "_get_boa_add_offset_dict",
|
|
3007
|
+
}
|
|
3008
|
+
|
|
3009
|
+
def _get_boa_add_offset_dict(self, xml_file: str) -> int | None:
|
|
3010
|
+
if self.is_mask:
|
|
3011
|
+
return None
|
|
3012
|
+
|
|
2670
3013
|
pat = re.compile(
|
|
2671
3014
|
r"""
|
|
2672
3015
|
<BOA_ADD_OFFSET\s*
|
|
@@ -2683,30 +3026,39 @@ class Sentinel2Config:
|
|
|
2683
3026
|
raise _RegexError(f"Could not find boa_add_offset info from {pat}") from e
|
|
2684
3027
|
if not matches:
|
|
2685
3028
|
raise _RegexError(f"Could not find boa_add_offset info from {pat}")
|
|
2686
|
-
|
|
3029
|
+
|
|
3030
|
+
dict_ = (
|
|
2687
3031
|
pd.DataFrame(matches).set_index("band_id")["value"].astype(int).to_dict()
|
|
2688
3032
|
)
|
|
2689
3033
|
|
|
3034
|
+
# some xml files have band ids in range index form
|
|
3035
|
+
# converting these to actual band ids (B01 etc.)
|
|
3036
|
+
is_integer_coded = [int(i) for i in dict_] == list(range(len(dict_)))
|
|
2690
3037
|
|
|
2691
|
-
|
|
2692
|
-
|
|
2693
|
-
|
|
2694
|
-
|
|
2695
|
-
|
|
2696
|
-
|
|
2697
|
-
|
|
2698
|
-
|
|
2699
|
-
|
|
2700
|
-
|
|
2701
|
-
|
|
2702
|
-
|
|
2703
|
-
ndvi_bands: ClassVar[list[str]] = [
|
|
2704
|
-
x.replace("B0", "B") for x in Sentinel2Config.ndvi_bands
|
|
2705
|
-
]
|
|
2706
|
-
|
|
3038
|
+
if is_integer_coded:
|
|
3039
|
+
# the xml files contain 13 bandIds for both L1C and L2A
|
|
3040
|
+
# eventhough L2A doesn't have band B10
|
|
3041
|
+
all_bands = list(self.l1c_bands)
|
|
3042
|
+
if len(all_bands) != len(dict_):
|
|
3043
|
+
raise ValueError(
|
|
3044
|
+
f"Different number of bands in xml file and config for {self.name}: {all_bands}, {list(dict_)}"
|
|
3045
|
+
)
|
|
3046
|
+
dict_ = {
|
|
3047
|
+
band_id: value
|
|
3048
|
+
for band_id, value in zip(all_bands, dict_.values(), strict=True)
|
|
3049
|
+
}
|
|
2707
3050
|
|
|
2708
|
-
|
|
2709
|
-
|
|
3051
|
+
try:
|
|
3052
|
+
return dict_[self.band_id]
|
|
3053
|
+
except KeyError as e:
|
|
3054
|
+
band_id = self.band_id.upper()
|
|
3055
|
+
for txt in ["B0", "B", "A"]:
|
|
3056
|
+
band_id = band_id.replace(txt, "")
|
|
3057
|
+
try:
|
|
3058
|
+
return dict_[band_id]
|
|
3059
|
+
except KeyError:
|
|
3060
|
+
continue
|
|
3061
|
+
raise KeyError(self.band_id, dict_) from e
|
|
2710
3062
|
|
|
2711
3063
|
|
|
2712
3064
|
class Sentinel2Image(Sentinel2Config, Image):
|
|
@@ -2716,12 +3068,15 @@ class Sentinel2Image(Sentinel2Config, Image):
|
|
|
2716
3068
|
|
|
2717
3069
|
def ndvi(
|
|
2718
3070
|
self,
|
|
2719
|
-
red_band: str =
|
|
2720
|
-
nir_band: str =
|
|
3071
|
+
red_band: str = "B04",
|
|
3072
|
+
nir_band: str = "B08",
|
|
3073
|
+
padding: int = 0,
|
|
2721
3074
|
copy: bool = True,
|
|
2722
3075
|
) -> NDVIBand:
|
|
2723
3076
|
"""Calculate the NDVI for the Image."""
|
|
2724
|
-
return super().ndvi(
|
|
3077
|
+
return super().ndvi(
|
|
3078
|
+
red_band=red_band, nir_band=nir_band, padding=padding, copy=copy
|
|
3079
|
+
)
|
|
2725
3080
|
|
|
2726
3081
|
|
|
2727
3082
|
class Sentinel2Collection(Sentinel2Config, ImageCollection):
|
|
@@ -2732,8 +3087,8 @@ class Sentinel2Collection(Sentinel2Config, ImageCollection):
|
|
|
2732
3087
|
|
|
2733
3088
|
def __init__(self, data: str | Path | Sequence[Image], **kwargs) -> None:
|
|
2734
3089
|
"""ImageCollection with Sentinel2 specific name variables and path regexes."""
|
|
2735
|
-
level = kwargs.get("level",
|
|
2736
|
-
if
|
|
3090
|
+
level = kwargs.get("level", None_)
|
|
3091
|
+
if callable(level) and isinstance(level(), None_):
|
|
2737
3092
|
raise ValueError("Must specify level for Sentinel2Collection.")
|
|
2738
3093
|
super().__init__(data=data, **kwargs)
|
|
2739
3094
|
|
|
@@ -2797,29 +3152,6 @@ def _get_gradient(band: Band, degrees: bool = False, copy: bool = True) -> Band:
|
|
|
2797
3152
|
raise ValueError("array must be 2 or 3 dimensional")
|
|
2798
3153
|
|
|
2799
3154
|
|
|
2800
|
-
def to_xarray(
|
|
2801
|
-
array: np.ndarray, transform: Affine, crs: Any, name: str | None = None
|
|
2802
|
-
) -> DataArray:
|
|
2803
|
-
"""Convert the raster to an xarray.DataArray."""
|
|
2804
|
-
if len(array.shape) == 2:
|
|
2805
|
-
height, width = array.shape
|
|
2806
|
-
dims = ["y", "x"]
|
|
2807
|
-
elif len(array.shape) == 3:
|
|
2808
|
-
height, width = array.shape[1:]
|
|
2809
|
-
dims = ["band", "y", "x"]
|
|
2810
|
-
else:
|
|
2811
|
-
raise ValueError(f"Array should be 2 or 3 dimensional. Got shape {array.shape}")
|
|
2812
|
-
|
|
2813
|
-
coords = _generate_spatial_coords(transform, width, height)
|
|
2814
|
-
return xr.DataArray(
|
|
2815
|
-
array,
|
|
2816
|
-
coords=coords,
|
|
2817
|
-
dims=dims,
|
|
2818
|
-
name=name,
|
|
2819
|
-
attrs={"crs": crs},
|
|
2820
|
-
)
|
|
2821
|
-
|
|
2822
|
-
|
|
2823
3155
|
def _slope_2d(array: np.ndarray, res: int, degrees: int) -> np.ndarray:
|
|
2824
3156
|
gradient_x, gradient_y = np.gradient(array, res, res)
|
|
2825
3157
|
|
|
@@ -2836,47 +3168,31 @@ def _slope_2d(array: np.ndarray, res: int, degrees: int) -> np.ndarray:
|
|
|
2836
3168
|
return degrees
|
|
2837
3169
|
|
|
2838
3170
|
|
|
2839
|
-
def
|
|
2840
|
-
|
|
2841
|
-
|
|
2842
|
-
transform: Affine,
|
|
3171
|
+
def _clip_xarray(
|
|
3172
|
+
xarr: DataArray,
|
|
3173
|
+
mask: tuple[int, int, int, int],
|
|
2843
3174
|
crs: Any,
|
|
2844
|
-
out_shape: tuple[int, int],
|
|
2845
3175
|
**kwargs,
|
|
2846
|
-
) ->
|
|
3176
|
+
) -> DataArray:
|
|
2847
3177
|
# xarray needs a numpy array of polygons
|
|
2848
|
-
|
|
3178
|
+
mask_arr: np.ndarray = to_geoseries(mask).values
|
|
2849
3179
|
try:
|
|
2850
|
-
|
|
2851
|
-
|
|
2852
|
-
|
|
2853
|
-
|
|
2854
|
-
|
|
2855
|
-
transform=transform,
|
|
2856
|
-
crs=crs,
|
|
2857
|
-
)
|
|
2858
|
-
.rio.clip(bounds_arr, crs=crs, **kwargs)
|
|
2859
|
-
.to_numpy()
|
|
2860
|
-
)
|
|
2861
|
-
# bounds_arr = bounds_arr.buffer(0.0000001)
|
|
2862
|
-
return arr
|
|
2863
|
-
|
|
3180
|
+
return xarr.rio.clip(
|
|
3181
|
+
mask_arr,
|
|
3182
|
+
crs=crs,
|
|
3183
|
+
**kwargs,
|
|
3184
|
+
)
|
|
2864
3185
|
except NoDataInBounds:
|
|
2865
3186
|
return np.array([])
|
|
2866
3187
|
|
|
2867
3188
|
|
|
2868
|
-
def
|
|
2869
|
-
return (
|
|
2870
|
-
str(path).replace("\\", "/").replace(r"\"", "/").replace("//", "/").rstrip("/")
|
|
2871
|
-
)
|
|
2872
|
-
|
|
2873
|
-
|
|
2874
|
-
def _get_all_file_paths(path: str) -> list[str]:
|
|
3189
|
+
def _get_all_file_paths(path: str) -> set[str]:
|
|
2875
3190
|
if is_dapla():
|
|
2876
|
-
return
|
|
3191
|
+
return {_fix_path(x) for x in sorted(set(_glob_func(path + "/**")))}
|
|
2877
3192
|
else:
|
|
2878
|
-
return
|
|
2879
|
-
|
|
3193
|
+
return {
|
|
3194
|
+
_fix_path(x)
|
|
3195
|
+
for x in sorted(
|
|
2880
3196
|
set(
|
|
2881
3197
|
_glob_func(path + "/**")
|
|
2882
3198
|
+ _glob_func(path + "/**/**")
|
|
@@ -2885,7 +3201,7 @@ def _get_all_file_paths(path: str) -> list[str]:
|
|
|
2885
3201
|
+ _glob_func(path + "/**/**/**/**/**")
|
|
2886
3202
|
)
|
|
2887
3203
|
)
|
|
2888
|
-
|
|
3204
|
+
}
|
|
2889
3205
|
|
|
2890
3206
|
|
|
2891
3207
|
def _get_images(
|
|
@@ -2900,9 +3216,8 @@ def _get_images(
|
|
|
2900
3216
|
masking: BandMasking | None,
|
|
2901
3217
|
**kwargs,
|
|
2902
3218
|
) -> list[Image]:
|
|
2903
|
-
|
|
2904
|
-
|
|
2905
|
-
images = parallel(
|
|
3219
|
+
with joblib.Parallel(n_jobs=processes, backend="threading") as parallel:
|
|
3220
|
+
images: list[Image] = parallel(
|
|
2906
3221
|
joblib.delayed(image_class)(
|
|
2907
3222
|
path,
|
|
2908
3223
|
df=df,
|
|
@@ -3017,13 +3332,13 @@ def _copy_and_add_df_parallel(
|
|
|
3017
3332
|
return (i, copied)
|
|
3018
3333
|
|
|
3019
3334
|
|
|
3020
|
-
def _get_bounds(bounds, bbox) -> None | Polygon:
|
|
3335
|
+
def _get_bounds(bounds, bbox, band_bounds: Polygon) -> None | Polygon:
|
|
3021
3336
|
if bounds is None and bbox is None:
|
|
3022
3337
|
return None
|
|
3023
3338
|
elif bounds is not None and bbox is None:
|
|
3024
|
-
return to_shapely(bounds)
|
|
3339
|
+
return to_shapely(bounds).intersection(band_bounds)
|
|
3025
3340
|
elif bounds is None and bbox is not None:
|
|
3026
|
-
return to_shapely(bbox)
|
|
3341
|
+
return to_shapely(bbox).intersection(band_bounds)
|
|
3027
3342
|
else:
|
|
3028
3343
|
return to_shapely(bounds).intersection(to_shapely(bbox))
|
|
3029
3344
|
|
|
@@ -3041,7 +3356,15 @@ def _open_raster(path: str | Path) -> rasterio.io.DatasetReader:
|
|
|
3041
3356
|
|
|
3042
3357
|
|
|
3043
3358
|
def _load_band(band: Band, **kwargs) -> None:
|
|
3044
|
-
band.load(**kwargs)
|
|
3359
|
+
return band.load(**kwargs)
|
|
3360
|
+
|
|
3361
|
+
|
|
3362
|
+
def _band_apply(band: Band, func: Callable, **kwargs) -> None:
|
|
3363
|
+
return band.apply(func, **kwargs)
|
|
3364
|
+
|
|
3365
|
+
|
|
3366
|
+
def _clip_band(band: Band, mask, **kwargs) -> None:
|
|
3367
|
+
return band.clip(mask, **kwargs)
|
|
3045
3368
|
|
|
3046
3369
|
|
|
3047
3370
|
def _merge_by_band(collection: ImageCollection, **kwargs) -> Image:
|
|
@@ -3053,7 +3376,7 @@ def _merge(collection: ImageCollection, **kwargs) -> Band:
|
|
|
3053
3376
|
|
|
3054
3377
|
|
|
3055
3378
|
def _zonal_one_pair(i: int, poly: Polygon, band: Band, aggfunc, array_func, func_names):
|
|
3056
|
-
clipped = band.copy().
|
|
3379
|
+
clipped = band.copy().clip(poly)
|
|
3057
3380
|
if not np.size(clipped.values):
|
|
3058
3381
|
return _no_overlap_df(func_names, i, date=band.date)
|
|
3059
3382
|
return _aggregate(clipped.values, array_func, aggfunc, func_names, band.date, i)
|