ssb-sgis 1.0.6__py3-none-any.whl → 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sgis/helpers.py +23 -7
- sgis/maps/explore.py +14 -35
- sgis/maps/map.py +5 -1
- sgis/maps/tilesources.py +11 -29
- sgis/raster/base.py +0 -54
- sgis/raster/image_collection.py +936 -580
- sgis/raster/indices.py +2 -5
- sgis/raster/regex.py +7 -2
- sgis/raster/sentinel_config.py +1 -71
- {ssb_sgis-1.0.6.dist-info → ssb_sgis-1.0.8.dist-info}/METADATA +1 -1
- {ssb_sgis-1.0.6.dist-info → ssb_sgis-1.0.8.dist-info}/RECORD +13 -13
- {ssb_sgis-1.0.6.dist-info → ssb_sgis-1.0.8.dist-info}/LICENSE +0 -0
- {ssb_sgis-1.0.6.dist-info → ssb_sgis-1.0.8.dist-info}/WHEEL +0 -0
sgis/raster/image_collection.py
CHANGED
|
@@ -2,7 +2,6 @@ import datetime
|
|
|
2
2
|
import functools
|
|
3
3
|
import glob
|
|
4
4
|
import itertools
|
|
5
|
-
import math
|
|
6
5
|
import os
|
|
7
6
|
import random
|
|
8
7
|
import re
|
|
@@ -11,6 +10,7 @@ from collections.abc import Callable
|
|
|
11
10
|
from collections.abc import Iterable
|
|
12
11
|
from collections.abc import Iterator
|
|
13
12
|
from collections.abc import Sequence
|
|
13
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
14
14
|
from copy import deepcopy
|
|
15
15
|
from dataclasses import dataclass
|
|
16
16
|
from pathlib import Path
|
|
@@ -27,6 +27,7 @@ from affine import Affine
|
|
|
27
27
|
from geopandas import GeoDataFrame
|
|
28
28
|
from geopandas import GeoSeries
|
|
29
29
|
from matplotlib.colors import LinearSegmentedColormap
|
|
30
|
+
from pandas.api.types import is_dict_like
|
|
30
31
|
from rasterio.enums import MergeAlg
|
|
31
32
|
from scipy import stats
|
|
32
33
|
from scipy.ndimage import binary_dilation
|
|
@@ -88,10 +89,14 @@ except ImportError:
|
|
|
88
89
|
from ..geopandas_tools.bounds import get_total_bounds
|
|
89
90
|
from ..geopandas_tools.conversion import to_bbox
|
|
90
91
|
from ..geopandas_tools.conversion import to_gdf
|
|
92
|
+
from ..geopandas_tools.conversion import to_geoseries
|
|
91
93
|
from ..geopandas_tools.conversion import to_shapely
|
|
92
94
|
from ..geopandas_tools.general import get_common_crs
|
|
95
|
+
from ..helpers import _fix_path
|
|
93
96
|
from ..helpers import get_all_files
|
|
94
97
|
from ..helpers import get_numpy_func
|
|
98
|
+
from ..helpers import is_method
|
|
99
|
+
from ..helpers import is_property
|
|
95
100
|
from ..io._is_dapla import is_dapla
|
|
96
101
|
from ..io.opener import opener
|
|
97
102
|
from . import sentinel_config as config
|
|
@@ -101,7 +106,6 @@ from .base import _get_shape_from_bounds
|
|
|
101
106
|
from .base import _get_transform_from_bounds
|
|
102
107
|
from .base import get_index_mapper
|
|
103
108
|
from .indices import ndvi
|
|
104
|
-
from .regex import _any_regex_matches
|
|
105
109
|
from .regex import _extract_regex_match_from_string
|
|
106
110
|
from .regex import _get_first_group_match
|
|
107
111
|
from .regex import _get_non_optional_groups
|
|
@@ -157,14 +161,22 @@ ALLOWED_INIT_KWARGS = [
|
|
|
157
161
|
"band_class",
|
|
158
162
|
"image_regexes",
|
|
159
163
|
"filename_regexes",
|
|
160
|
-
"bounds_regexes",
|
|
161
164
|
"all_bands",
|
|
162
165
|
"crs",
|
|
166
|
+
"backend",
|
|
163
167
|
"masking",
|
|
164
168
|
"_merged",
|
|
165
|
-
"
|
|
169
|
+
"date",
|
|
166
170
|
]
|
|
167
171
|
|
|
172
|
+
_load_counter: int = 0
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _get_child_paths_threaded(data: Sequence[str]) -> set[str]:
|
|
176
|
+
with ThreadPoolExecutor() as executor:
|
|
177
|
+
all_paths: Iterator[set[str]] = executor.map(_ls_func, data)
|
|
178
|
+
return set(itertools.chain.from_iterable(all_paths))
|
|
179
|
+
|
|
168
180
|
|
|
169
181
|
class ImageCollectionGroupBy:
|
|
170
182
|
"""Iterator and merger class returned from groupby.
|
|
@@ -216,7 +228,6 @@ class ImageCollectionGroupBy:
|
|
|
216
228
|
|
|
217
229
|
collection = ImageCollection(
|
|
218
230
|
images,
|
|
219
|
-
# TODO band_class?
|
|
220
231
|
level=self.collection.level,
|
|
221
232
|
**self.collection._common_init_kwargs,
|
|
222
233
|
)
|
|
@@ -254,7 +265,6 @@ class ImageCollectionGroupBy:
|
|
|
254
265
|
|
|
255
266
|
image = Image(
|
|
256
267
|
bands,
|
|
257
|
-
# TODO band_class?
|
|
258
268
|
**self.collection._common_init_kwargs,
|
|
259
269
|
)
|
|
260
270
|
image._merged = True
|
|
@@ -284,49 +294,20 @@ class ImageCollectionGroupBy:
|
|
|
284
294
|
return f"{self.__class__.__name__}({len(self)})"
|
|
285
295
|
|
|
286
296
|
|
|
287
|
-
def standardize_band_id(x: str) -> str:
|
|
288
|
-
return x.replace("B", "").replace("A", "").zfill(2)
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
class BandIdDict(dict):
|
|
292
|
-
"""Dict that tells the band initialiser to get the dict value of the band_id."""
|
|
293
|
-
|
|
294
|
-
def __init__(self, data: dict | None = None, **kwargs) -> None:
|
|
295
|
-
"""Add dicts or kwargs."""
|
|
296
|
-
self._standardized_keys = {}
|
|
297
|
-
for key, value in ((data or {}) | kwargs).items():
|
|
298
|
-
setattr(self, key, value)
|
|
299
|
-
self._standardized_keys[standardize_band_id(key)] = value
|
|
300
|
-
|
|
301
|
-
def __len__(self) -> int:
|
|
302
|
-
"""Number of items."""
|
|
303
|
-
return len({key for key in self.__dict__ if key != "_standardized_keys"})
|
|
304
|
-
|
|
305
|
-
def __getitem__(self, item: str) -> Any:
|
|
306
|
-
"""Get dict value from key."""
|
|
307
|
-
try:
|
|
308
|
-
return getattr(self, item)
|
|
309
|
-
except AttributeError as e:
|
|
310
|
-
try:
|
|
311
|
-
return self._standardized_keys[standardize_band_id(item)]
|
|
312
|
-
except KeyError:
|
|
313
|
-
raise KeyError(item, self.__dict__) from e
|
|
314
|
-
|
|
315
|
-
|
|
316
297
|
@dataclass(frozen=True)
|
|
317
298
|
class BandMasking:
|
|
318
299
|
"""Frozen dict with forced keys."""
|
|
319
300
|
|
|
320
301
|
band_id: str
|
|
321
|
-
values:
|
|
302
|
+
values: Sequence[int] | dict[int, Any]
|
|
322
303
|
|
|
323
304
|
def __getitem__(self, item: str) -> Any:
|
|
324
305
|
"""Index into attributes to mimick dict."""
|
|
325
306
|
return getattr(self, item)
|
|
326
307
|
|
|
327
308
|
|
|
328
|
-
class
|
|
329
|
-
"""
|
|
309
|
+
class None_:
|
|
310
|
+
"""Default value for keyword arguments that should not have a default."""
|
|
330
311
|
|
|
331
312
|
|
|
332
313
|
class _ImageBase:
|
|
@@ -335,53 +316,86 @@ class _ImageBase:
|
|
|
335
316
|
metadata_attributes: ClassVar[dict | None] = None
|
|
336
317
|
masking: ClassVar[BandMasking | None] = None
|
|
337
318
|
|
|
338
|
-
def __init__(self, *, bbox=None, **kwargs) -> None:
|
|
319
|
+
def __init__(self, *, metadata=None, bbox=None, **kwargs) -> None:
|
|
339
320
|
|
|
340
321
|
self._mask = None
|
|
341
322
|
self._bounds = None
|
|
342
323
|
self._merged = False
|
|
343
324
|
self._from_array = False
|
|
344
|
-
self.
|
|
325
|
+
self._from_geopandas = False
|
|
345
326
|
self.metadata_attributes = self.metadata_attributes or {}
|
|
346
327
|
self._path = None
|
|
328
|
+
self._metadata_from_xml = False
|
|
347
329
|
|
|
348
330
|
self._bbox = to_bbox(bbox) if bbox is not None else None
|
|
349
331
|
|
|
350
|
-
|
|
351
|
-
if isinstance(self.filename_regexes, str):
|
|
352
|
-
self.filename_regexes = (self.filename_regexes,)
|
|
353
|
-
self.filename_patterns = [
|
|
354
|
-
re.compile(regexes, flags=re.VERBOSE)
|
|
355
|
-
for regexes in self.filename_regexes
|
|
356
|
-
]
|
|
357
|
-
else:
|
|
358
|
-
self.filename_patterns = ()
|
|
332
|
+
self.metadata = self._metadata_to_nested_dict(metadata)
|
|
359
333
|
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
self.image_regexes = (self.image_regexes,)
|
|
363
|
-
self.image_patterns = [
|
|
364
|
-
re.compile(regexes, flags=re.VERBOSE) for regexes in self.image_regexes
|
|
365
|
-
]
|
|
366
|
-
else:
|
|
367
|
-
self.image_patterns = ()
|
|
334
|
+
self.image_patterns = self._compile_regexes("image_regexes")
|
|
335
|
+
self.filename_patterns = self._compile_regexes("filename_regexes")
|
|
368
336
|
|
|
369
337
|
for key, value in kwargs.items():
|
|
338
|
+
error_obj = ValueError(
|
|
339
|
+
f"{self.__class__.__name__} got an unexpected keyword argument '{key}'"
|
|
340
|
+
)
|
|
370
341
|
if key in ALLOWED_INIT_KWARGS and key in dir(self):
|
|
371
|
-
|
|
342
|
+
if is_property(self, key):
|
|
343
|
+
setattr(self, f"_{key}", value)
|
|
344
|
+
elif is_method(self, key):
|
|
345
|
+
raise error_obj
|
|
346
|
+
else:
|
|
347
|
+
setattr(self, key, value)
|
|
372
348
|
else:
|
|
373
|
-
raise
|
|
374
|
-
|
|
375
|
-
|
|
349
|
+
raise error_obj
|
|
350
|
+
|
|
351
|
+
def _compile_regexes(self, regex_attr: str) -> tuple[re.Pattern]:
|
|
352
|
+
regexes = getattr(self, regex_attr)
|
|
353
|
+
if regexes:
|
|
354
|
+
if isinstance(regexes, str):
|
|
355
|
+
regexes = (regexes,)
|
|
356
|
+
return tuple(re.compile(regexes, flags=re.VERBOSE) for regexes in regexes)
|
|
357
|
+
return ()
|
|
358
|
+
|
|
359
|
+
@staticmethod
|
|
360
|
+
def _metadata_to_nested_dict(
|
|
361
|
+
metadata: str | Path | os.PathLike | dict | pd.DataFrame | None,
|
|
362
|
+
) -> dict[str, dict[str, Any]] | None:
|
|
363
|
+
if metadata is None:
|
|
364
|
+
return {}
|
|
365
|
+
if isinstance(metadata, (str | Path | os.PathLike)):
|
|
366
|
+
metadata = _read_parquet_func(metadata)
|
|
367
|
+
|
|
368
|
+
if isinstance(metadata, pd.DataFrame):
|
|
369
|
+
|
|
370
|
+
def is_scalar(x) -> bool:
|
|
371
|
+
"""Check if scalar because 'truth value of Series is ambigous'."""
|
|
372
|
+
return not hasattr(x, "__len__") or len(x) <= 1
|
|
373
|
+
|
|
374
|
+
def na_to_none(x) -> None:
|
|
375
|
+
"""Convert to None rowwise because pandas doesn't always."""
|
|
376
|
+
return x if not (is_scalar(x) and pd.isna(x)) else None
|
|
377
|
+
|
|
378
|
+
# to nested dict because pandas indexing gives rare KeyError with long strings
|
|
379
|
+
metadata = {
|
|
380
|
+
_fix_path(path): {
|
|
381
|
+
attr: na_to_none(value) for attr, value in row.items()
|
|
382
|
+
}
|
|
383
|
+
for path, row in metadata.iterrows()
|
|
384
|
+
}
|
|
385
|
+
elif is_dict_like(metadata):
|
|
386
|
+
metadata = {_fix_path(path): value for path, value in metadata.items()}
|
|
387
|
+
|
|
388
|
+
return metadata
|
|
376
389
|
|
|
377
390
|
@property
|
|
378
391
|
def _common_init_kwargs(self) -> dict:
|
|
379
392
|
return {
|
|
380
|
-
"file_system": self.file_system,
|
|
381
393
|
"processes": self.processes,
|
|
382
394
|
"res": self.res,
|
|
383
395
|
"bbox": self._bbox,
|
|
384
396
|
"nodata": self.nodata,
|
|
397
|
+
"backend": self.backend,
|
|
398
|
+
"metadata": self.metadata,
|
|
385
399
|
}
|
|
386
400
|
|
|
387
401
|
@property
|
|
@@ -401,6 +415,14 @@ class _ImageBase:
|
|
|
401
415
|
"""Centerpoint of the object."""
|
|
402
416
|
return self.union_all().centroid
|
|
403
417
|
|
|
418
|
+
def assign(self, **kwargs) -> "_ImageBase":
|
|
419
|
+
for key, value in kwargs.items():
|
|
420
|
+
try:
|
|
421
|
+
setattr(self, key, value)
|
|
422
|
+
except AttributeError:
|
|
423
|
+
setattr(self, f"_{key}", value)
|
|
424
|
+
return self
|
|
425
|
+
|
|
404
426
|
def _name_regex_searcher(
|
|
405
427
|
self, group: str, patterns: tuple[re.Pattern]
|
|
406
428
|
) -> str | None:
|
|
@@ -411,18 +433,28 @@ class _ImageBase:
|
|
|
411
433
|
return _get_first_group_match(pat, self.name)[group]
|
|
412
434
|
except (TypeError, KeyError):
|
|
413
435
|
pass
|
|
436
|
+
if isinstance(self, Band):
|
|
437
|
+
for pat in patterns:
|
|
438
|
+
try:
|
|
439
|
+
return _get_first_group_match(
|
|
440
|
+
pat, str(Path(self.path).parent.name)
|
|
441
|
+
)[group]
|
|
442
|
+
except (TypeError, KeyError):
|
|
443
|
+
pass
|
|
414
444
|
if not any(group in _get_non_optional_groups(pat) for pat in patterns):
|
|
415
445
|
return None
|
|
446
|
+
band_text = (
|
|
447
|
+
f" or {Path(self.path).parent.name!s}" if isinstance(self, Band) else ""
|
|
448
|
+
)
|
|
416
449
|
raise ValueError(
|
|
417
|
-
f"Couldn't find group '{group}' in name {self.name} with regex patterns {patterns}"
|
|
450
|
+
f"Couldn't find group '{group}' in name {self.name}{band_text} with regex patterns {patterns}"
|
|
418
451
|
)
|
|
419
452
|
|
|
420
|
-
def _create_metadata_df(self, file_paths:
|
|
453
|
+
def _create_metadata_df(self, file_paths: Sequence[str]) -> pd.DataFrame:
|
|
421
454
|
"""Create a dataframe with file paths and image paths that match regexes."""
|
|
422
|
-
df = pd.DataFrame({"file_path": file_paths})
|
|
455
|
+
df = pd.DataFrame({"file_path": list(file_paths)})
|
|
423
456
|
|
|
424
|
-
df["
|
|
425
|
-
df["filename"] = df["file_path"].apply(lambda x: Path(x).name)
|
|
457
|
+
df["file_name"] = df["file_path"].apply(lambda x: Path(x).name)
|
|
426
458
|
|
|
427
459
|
df["image_path"] = df["file_path"].apply(
|
|
428
460
|
lambda x: _fix_path(str(Path(x).parent))
|
|
@@ -434,20 +466,20 @@ class _ImageBase:
|
|
|
434
466
|
df = df[~df["file_path"].isin(df["image_path"])]
|
|
435
467
|
|
|
436
468
|
if self.filename_patterns:
|
|
437
|
-
df = _get_regexes_matches_for_df(df, "
|
|
469
|
+
df = _get_regexes_matches_for_df(df, "file_name", self.filename_patterns)
|
|
438
470
|
|
|
439
471
|
if not len(df):
|
|
440
472
|
return df
|
|
441
473
|
|
|
442
474
|
grouped = df.drop_duplicates("image_path").set_index("image_path")
|
|
443
|
-
for col in ["file_path", "
|
|
475
|
+
for col in ["file_path", "file_name"]:
|
|
444
476
|
if col in df:
|
|
445
477
|
grouped[col] = df.groupby("image_path")[col].apply(tuple)
|
|
446
478
|
|
|
447
479
|
grouped = grouped.reset_index()
|
|
448
480
|
else:
|
|
449
481
|
df["file_path"] = df.groupby("image_path")["file_path"].apply(tuple)
|
|
450
|
-
df["
|
|
482
|
+
df["file_name"] = df.groupby("image_path")["file_name"].apply(tuple)
|
|
451
483
|
grouped = df.drop_duplicates("image_path")
|
|
452
484
|
|
|
453
485
|
grouped["imagename"] = grouped["image_path"].apply(
|
|
@@ -521,7 +553,7 @@ class _ImageBandBase(_ImageBase):
|
|
|
521
553
|
return self._name
|
|
522
554
|
try:
|
|
523
555
|
return Path(self.path).name
|
|
524
|
-
except (ValueError, AttributeError):
|
|
556
|
+
except (ValueError, AttributeError, TypeError):
|
|
525
557
|
return None
|
|
526
558
|
|
|
527
559
|
@name.setter
|
|
@@ -532,22 +564,31 @@ class _ImageBandBase(_ImageBase):
|
|
|
532
564
|
def stem(self) -> str | None:
|
|
533
565
|
try:
|
|
534
566
|
return Path(self.path).stem
|
|
535
|
-
except (AttributeError, ValueError):
|
|
567
|
+
except (AttributeError, ValueError, TypeError):
|
|
536
568
|
return None
|
|
537
569
|
|
|
538
570
|
@property
|
|
539
571
|
def level(self) -> str:
|
|
540
572
|
return self._name_regex_searcher("level", self.image_patterns)
|
|
541
573
|
|
|
542
|
-
def
|
|
574
|
+
def _get_metadata_attributes(self, metadata_attributes: dict) -> dict:
|
|
543
575
|
|
|
544
|
-
|
|
545
|
-
for key, value in self.metadata_attributes.items():
|
|
546
|
-
if getattr(self, key) is None:
|
|
547
|
-
missing_attributes[key] = value
|
|
576
|
+
self._metadata_from_xml = True
|
|
548
577
|
|
|
549
|
-
|
|
550
|
-
|
|
578
|
+
missing_metadata_attributes = {
|
|
579
|
+
key: value
|
|
580
|
+
for key, value in metadata_attributes.items()
|
|
581
|
+
if not hasattr(self, key) or getattr(self, key) is None
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
nonmissing_metadata_attributes = {
|
|
585
|
+
key: getattr(self, key)
|
|
586
|
+
for key in metadata_attributes
|
|
587
|
+
if key not in missing_metadata_attributes
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
if not missing_metadata_attributes:
|
|
591
|
+
return nonmissing_metadata_attributes
|
|
551
592
|
|
|
552
593
|
file_contents: list[str] = []
|
|
553
594
|
for path in self._all_file_paths:
|
|
@@ -556,7 +597,7 @@ class _ImageBandBase(_ImageBase):
|
|
|
556
597
|
with _open_func(path, "rb") as file:
|
|
557
598
|
file_contents.append(file.read().decode("utf-8"))
|
|
558
599
|
|
|
559
|
-
for key, value in
|
|
600
|
+
for key, value in missing_metadata_attributes.items():
|
|
560
601
|
results = None
|
|
561
602
|
for i, filetext in enumerate(file_contents):
|
|
562
603
|
if isinstance(value, str) and value in dir(self):
|
|
@@ -586,19 +627,54 @@ class _ImageBandBase(_ImageBase):
|
|
|
586
627
|
if i == len(self._all_file_paths) - 1:
|
|
587
628
|
raise e
|
|
588
629
|
|
|
589
|
-
|
|
590
|
-
results = results[self.band_id]
|
|
630
|
+
missing_metadata_attributes[key] = results
|
|
591
631
|
|
|
592
|
-
|
|
632
|
+
return missing_metadata_attributes | nonmissing_metadata_attributes
|
|
633
|
+
|
|
634
|
+
def _to_xarray(self, array: np.ndarray, transform: Affine) -> DataArray:
|
|
635
|
+
"""Convert the raster to an xarray.DataArray."""
|
|
636
|
+
attrs = {"crs": self.crs}
|
|
637
|
+
for attr in set(self.metadata_attributes).union({"date"}):
|
|
638
|
+
try:
|
|
639
|
+
attrs[attr] = getattr(self, attr)
|
|
640
|
+
except Exception:
|
|
641
|
+
pass
|
|
642
|
+
|
|
643
|
+
if len(array.shape) == 2:
|
|
644
|
+
height, width = array.shape
|
|
645
|
+
dims = ["y", "x"]
|
|
646
|
+
elif len(array.shape) == 3:
|
|
647
|
+
height, width = array.shape[1:]
|
|
648
|
+
dims = ["band", "y", "x"]
|
|
649
|
+
elif not any(dim for dim in array.shape):
|
|
650
|
+
DataArray(
|
|
651
|
+
name=self.name or self.__class__.__name__,
|
|
652
|
+
attrs=attrs,
|
|
653
|
+
)
|
|
654
|
+
else:
|
|
655
|
+
raise ValueError(
|
|
656
|
+
f"Array should be 2 or 3 dimensional. Got shape {array.shape}"
|
|
657
|
+
)
|
|
658
|
+
|
|
659
|
+
coords = _generate_spatial_coords(transform, width, height)
|
|
660
|
+
|
|
661
|
+
return DataArray(
|
|
662
|
+
array,
|
|
663
|
+
coords=coords,
|
|
664
|
+
dims=dims,
|
|
665
|
+
name=self.name or self.__class__.__name__,
|
|
666
|
+
attrs=attrs,
|
|
667
|
+
)
|
|
593
668
|
|
|
594
669
|
|
|
595
670
|
class Band(_ImageBandBase):
|
|
596
671
|
"""Band holding a single 2 dimensional array representing an image band."""
|
|
597
672
|
|
|
598
673
|
cmap: ClassVar[str | None] = None
|
|
674
|
+
backend: str = "numpy"
|
|
599
675
|
|
|
600
676
|
@classmethod
|
|
601
|
-
def
|
|
677
|
+
def from_geopandas(
|
|
602
678
|
cls,
|
|
603
679
|
gdf: GeoDataFrame | GeoSeries,
|
|
604
680
|
res: int,
|
|
@@ -622,18 +698,17 @@ class Band(_ImageBandBase):
|
|
|
622
698
|
)
|
|
623
699
|
|
|
624
700
|
obj = cls(arr, res=res, crs=gdf.crs, bounds=gdf.total_bounds, **kwargs)
|
|
625
|
-
obj.
|
|
701
|
+
obj._from_geopandas = True
|
|
626
702
|
return obj
|
|
627
703
|
|
|
628
704
|
def __init__(
|
|
629
705
|
self,
|
|
630
|
-
data: str | np.ndarray,
|
|
631
|
-
res: int |
|
|
706
|
+
data: str | np.ndarray | None = None,
|
|
707
|
+
res: int | None_ = None_,
|
|
632
708
|
crs: Any | None = None,
|
|
633
709
|
bounds: tuple[float, float, float, float] | None = None,
|
|
634
710
|
nodata: int | None = None,
|
|
635
711
|
mask: "Band | None" = None,
|
|
636
|
-
file_system: GCSFileSystem | None = None,
|
|
637
712
|
processes: int = 1,
|
|
638
713
|
name: str | None = None,
|
|
639
714
|
band_id: str | None = None,
|
|
@@ -642,6 +717,16 @@ class Band(_ImageBandBase):
|
|
|
642
717
|
**kwargs,
|
|
643
718
|
) -> None:
|
|
644
719
|
"""Band initialiser."""
|
|
720
|
+
if callable(res) and isinstance(res(), None_):
|
|
721
|
+
raise TypeError("Must specify 'res'")
|
|
722
|
+
|
|
723
|
+
if data is None:
|
|
724
|
+
# allowing 'path' to replace 'data' as argument
|
|
725
|
+
# to make the print repr. valid as initialiser
|
|
726
|
+
if "path" not in kwargs:
|
|
727
|
+
raise TypeError("Must specify either 'data' or 'path'.")
|
|
728
|
+
data = kwargs.pop("path")
|
|
729
|
+
|
|
645
730
|
super().__init__(**kwargs)
|
|
646
731
|
|
|
647
732
|
if isinstance(data, (str | Path | os.PathLike)) and any(
|
|
@@ -657,20 +742,13 @@ class Band(_ImageBandBase):
|
|
|
657
742
|
self._bounds = bounds
|
|
658
743
|
self._all_file_paths = all_file_paths
|
|
659
744
|
|
|
660
|
-
self._image = None
|
|
661
|
-
|
|
662
|
-
for key in self.metadata_attributes:
|
|
663
|
-
setattr(self, key, None)
|
|
664
|
-
|
|
665
745
|
if isinstance(data, np.ndarray):
|
|
666
|
-
self.values = data
|
|
667
746
|
if self._bounds is None:
|
|
668
747
|
raise ValueError("Must specify bounds when data is an array.")
|
|
669
748
|
self._crs = crs
|
|
670
|
-
self.transform = _get_transform_from_bounds(
|
|
671
|
-
self._bounds, shape=self.values.shape
|
|
672
|
-
)
|
|
749
|
+
self.transform = _get_transform_from_bounds(self._bounds, shape=data.shape)
|
|
673
750
|
self._from_array = True
|
|
751
|
+
self.values = data
|
|
674
752
|
|
|
675
753
|
elif not isinstance(data, (str | Path | os.PathLike)):
|
|
676
754
|
raise TypeError(
|
|
@@ -678,44 +756,48 @@ class Band(_ImageBandBase):
|
|
|
678
756
|
f"Got {type(data)}"
|
|
679
757
|
)
|
|
680
758
|
else:
|
|
681
|
-
self._path = str(data)
|
|
759
|
+
self._path = _fix_path(str(data))
|
|
682
760
|
|
|
683
761
|
self._res = res
|
|
684
762
|
if cmap is not None:
|
|
685
763
|
self.cmap = cmap
|
|
686
|
-
self.file_system = file_system
|
|
687
764
|
self._name = name
|
|
688
765
|
self._band_id = band_id
|
|
689
766
|
self.processes = processes
|
|
690
767
|
|
|
691
|
-
if
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
768
|
+
if self._all_file_paths:
|
|
769
|
+
self._all_file_paths = {_fix_path(path) for path in self._all_file_paths}
|
|
770
|
+
parent = _fix_path(Path(self.path).parent)
|
|
771
|
+
self._all_file_paths = {
|
|
772
|
+
path for path in self._all_file_paths if parent in path
|
|
773
|
+
}
|
|
774
|
+
|
|
775
|
+
if self.metadata:
|
|
776
|
+
if self.path is not None:
|
|
777
|
+
self.metadata = {
|
|
778
|
+
key: value
|
|
779
|
+
for key, value in self.metadata.items()
|
|
780
|
+
if key == self.path
|
|
781
|
+
}
|
|
782
|
+
this_metadata = self.metadata[self.path]
|
|
783
|
+
for key, value in this_metadata.items():
|
|
784
|
+
if key in dir(self):
|
|
785
|
+
setattr(self, f"_{key}", value)
|
|
786
|
+
else:
|
|
787
|
+
setattr(self, key, value)
|
|
788
|
+
|
|
789
|
+
elif self.metadata_attributes and self.path is not None and not self.is_mask:
|
|
696
790
|
if self._all_file_paths is None:
|
|
697
791
|
self._all_file_paths = _get_all_file_paths(str(Path(self.path).parent))
|
|
698
|
-
self.
|
|
792
|
+
for key, value in self._get_metadata_attributes(
|
|
793
|
+
self.metadata_attributes
|
|
794
|
+
).items():
|
|
795
|
+
setattr(self, key, value)
|
|
699
796
|
|
|
700
797
|
def __lt__(self, other: "Band") -> bool:
|
|
701
798
|
"""Makes Bands sortable by band_id."""
|
|
702
799
|
return self.band_id < other.band_id
|
|
703
800
|
|
|
704
|
-
# def __getattribute__(self, attr: str) -> Any:
|
|
705
|
-
# # try:
|
|
706
|
-
# # value =
|
|
707
|
-
# # except AttributeError:
|
|
708
|
-
# # value = None
|
|
709
|
-
|
|
710
|
-
# if (
|
|
711
|
-
# attr in (super().__getattribute__("metadata_attributes") or {})
|
|
712
|
-
# and super().__getattribute__(attr) is None
|
|
713
|
-
# ):
|
|
714
|
-
# if self._all_file_paths is None:
|
|
715
|
-
# self._all_file_paths = _get_all_file_paths(str(Path(self.path).parent))
|
|
716
|
-
# self._add_metadata_attributes()
|
|
717
|
-
# return super().__getattribute__(attr)
|
|
718
|
-
|
|
719
801
|
@property
|
|
720
802
|
def values(self) -> np.ndarray:
|
|
721
803
|
"""The numpy array, if loaded."""
|
|
@@ -725,23 +807,35 @@ class Band(_ImageBandBase):
|
|
|
725
807
|
|
|
726
808
|
@values.setter
|
|
727
809
|
def values(self, new_val):
|
|
728
|
-
if
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
810
|
+
if self.backend == "numpy" and isinstance(new_val, np.ndarray):
|
|
811
|
+
self._values = new_val
|
|
812
|
+
return
|
|
813
|
+
elif self.backend == "xarray" and isinstance(new_val, DataArray):
|
|
814
|
+
# attrs can dissappear, so doing a union
|
|
815
|
+
attrs = self._values.attrs | new_val.attrs
|
|
816
|
+
self._values = new_val
|
|
817
|
+
self._values.attrs = attrs
|
|
818
|
+
return
|
|
819
|
+
|
|
820
|
+
if self.backend == "numpy":
|
|
821
|
+
self._values = self._to_numpy(new_val)
|
|
822
|
+
if self.backend == "xarray":
|
|
823
|
+
if not isinstance(self._values, DataArray):
|
|
824
|
+
self._values = self._to_xarray(
|
|
825
|
+
new_val,
|
|
826
|
+
transform=self.transform,
|
|
827
|
+
)
|
|
828
|
+
|
|
829
|
+
elif isinstance(new_val, np.ndarray):
|
|
830
|
+
self._values.values = new_val
|
|
831
|
+
else:
|
|
832
|
+
self._values = new_val
|
|
733
833
|
|
|
734
834
|
@property
|
|
735
835
|
def mask(self) -> "Band":
|
|
736
836
|
"""Mask Band."""
|
|
737
837
|
return self._mask
|
|
738
838
|
|
|
739
|
-
@mask.setter
|
|
740
|
-
def mask(self, values: "Band") -> None:
|
|
741
|
-
if values is not None and not isinstance(values, Band):
|
|
742
|
-
raise TypeError(f"'mask' should be of type Band. Got {type(values)}")
|
|
743
|
-
self._mask = values
|
|
744
|
-
|
|
745
839
|
@property
|
|
746
840
|
def band_id(self) -> str:
|
|
747
841
|
"""Band id."""
|
|
@@ -752,12 +846,18 @@ class Band(_ImageBandBase):
|
|
|
752
846
|
@property
|
|
753
847
|
def height(self) -> int:
|
|
754
848
|
"""Pixel heigth of the image band."""
|
|
755
|
-
|
|
849
|
+
try:
|
|
850
|
+
return self.values.shape[-2]
|
|
851
|
+
except IndexError:
|
|
852
|
+
return 0
|
|
756
853
|
|
|
757
854
|
@property
|
|
758
855
|
def width(self) -> int:
|
|
759
856
|
"""Pixel width of the image band."""
|
|
760
|
-
|
|
857
|
+
try:
|
|
858
|
+
return self.values.shape[-1]
|
|
859
|
+
except IndexError:
|
|
860
|
+
return 0
|
|
761
861
|
|
|
762
862
|
@property
|
|
763
863
|
def tile(self) -> str:
|
|
@@ -779,11 +879,11 @@ class Band(_ImageBandBase):
|
|
|
779
879
|
)
|
|
780
880
|
|
|
781
881
|
@property
|
|
782
|
-
def crs(self) ->
|
|
882
|
+
def crs(self) -> pyproj.CRS | None:
|
|
783
883
|
"""Coordinate reference system."""
|
|
784
884
|
if self._crs is None:
|
|
785
885
|
self._add_crs_and_bounds()
|
|
786
|
-
return self._crs
|
|
886
|
+
return pyproj.CRS(self._crs)
|
|
787
887
|
|
|
788
888
|
@property
|
|
789
889
|
def bounds(self) -> tuple[int, int, int, int] | None:
|
|
@@ -793,7 +893,7 @@ class Band(_ImageBandBase):
|
|
|
793
893
|
return self._bounds
|
|
794
894
|
|
|
795
895
|
def _add_crs_and_bounds(self) -> None:
|
|
796
|
-
with opener(self.path
|
|
896
|
+
with opener(self.path) as file:
|
|
797
897
|
with rasterio.open(file) as src:
|
|
798
898
|
self._bounds = to_bbox(src.bounds)
|
|
799
899
|
self._crs = src.crs
|
|
@@ -805,7 +905,7 @@ class Band(_ImageBandBase):
|
|
|
805
905
|
copied = self.copy()
|
|
806
906
|
value_must_be_at_least = np.sort(np.ravel(copied.values))[-n] - (precision or 0)
|
|
807
907
|
copied._values = np.where(copied.values >= value_must_be_at_least, 1, 0)
|
|
808
|
-
df = copied.
|
|
908
|
+
df = copied.to_geopandas(column).loc[lambda x: x[column] == 1]
|
|
809
909
|
df[column] = f"largest_{n}"
|
|
810
910
|
return df
|
|
811
911
|
|
|
@@ -816,48 +916,71 @@ class Band(_ImageBandBase):
|
|
|
816
916
|
copied = self.copy()
|
|
817
917
|
value_must_be_at_least = np.sort(np.ravel(copied.values))[n] - (precision or 0)
|
|
818
918
|
copied._values = np.where(copied.values <= value_must_be_at_least, 1, 0)
|
|
819
|
-
df = copied.
|
|
919
|
+
df = copied.to_geopandas(column).loc[lambda x: x[column] == 1]
|
|
820
920
|
df[column] = f"smallest_{n}"
|
|
821
921
|
return df
|
|
822
922
|
|
|
923
|
+
def clip(
|
|
924
|
+
self, mask: GeoDataFrame | GeoSeries | Polygon | MultiPolygon, **kwargs
|
|
925
|
+
) -> "Band":
|
|
926
|
+
"""Clip band values to geometry mask."""
|
|
927
|
+
if not self.height or not self.width:
|
|
928
|
+
return self
|
|
929
|
+
|
|
930
|
+
values = _clip_xarray(
|
|
931
|
+
self.to_xarray(),
|
|
932
|
+
mask,
|
|
933
|
+
crs=self.crs,
|
|
934
|
+
**kwargs,
|
|
935
|
+
)
|
|
936
|
+
self._bounds = to_bbox(mask)
|
|
937
|
+
self.transform = _get_transform_from_bounds(self._bounds, values.shape)
|
|
938
|
+
self.values = values
|
|
939
|
+
return self
|
|
940
|
+
|
|
823
941
|
def load(
|
|
824
942
|
self,
|
|
825
943
|
bounds: tuple | Geometry | GeoDataFrame | GeoSeries | None = None,
|
|
826
944
|
indexes: int | tuple[int] | None = None,
|
|
827
945
|
masked: bool | None = None,
|
|
946
|
+
file_system=None,
|
|
828
947
|
**kwargs,
|
|
829
948
|
) -> "Band":
|
|
830
949
|
"""Load and potentially clip the array.
|
|
831
950
|
|
|
832
951
|
The array is stored in the 'values' property.
|
|
833
952
|
"""
|
|
953
|
+
global _load_counter
|
|
954
|
+
_load_counter += 1
|
|
955
|
+
|
|
834
956
|
if masked is None:
|
|
835
957
|
masked = True if self.mask is None else False
|
|
836
958
|
|
|
837
959
|
bounds_was_none = bounds is None
|
|
838
960
|
|
|
839
|
-
bounds = _get_bounds(bounds, self._bbox)
|
|
961
|
+
bounds = _get_bounds(bounds, self._bbox, self.union_all())
|
|
840
962
|
|
|
841
963
|
should_return_empty: bool = bounds is not None and bounds.area == 0
|
|
842
964
|
if should_return_empty:
|
|
843
965
|
self._values = np.array([])
|
|
844
966
|
if self.mask is not None and not self.is_mask:
|
|
845
|
-
self._mask = self._mask.load(
|
|
967
|
+
self._mask = self._mask.load(
|
|
968
|
+
bounds=bounds, indexes=indexes, file_system=file_system
|
|
969
|
+
)
|
|
846
970
|
self._bounds = None
|
|
847
971
|
self.transform = None
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
except AttributeError:
|
|
851
|
-
pass
|
|
972
|
+
self.values = self._values
|
|
973
|
+
|
|
852
974
|
return self
|
|
853
975
|
|
|
854
976
|
if self.has_array and bounds_was_none:
|
|
855
977
|
return self
|
|
856
978
|
|
|
857
|
-
# round down/up to integer to avoid precision trouble
|
|
858
979
|
if bounds is not None:
|
|
859
980
|
minx, miny, maxx, maxy = to_bbox(bounds)
|
|
860
|
-
|
|
981
|
+
## round down/up to integer to avoid precision trouble
|
|
982
|
+
# bounds = (int(minx), int(miny), math.ceil(maxx), math.ceil(maxy))
|
|
983
|
+
bounds = minx, miny, maxx, maxy
|
|
861
984
|
|
|
862
985
|
if indexes is None:
|
|
863
986
|
indexes = 1
|
|
@@ -868,126 +991,131 @@ class Band(_ImageBandBase):
|
|
|
868
991
|
# allow setting a fixed out_shape for the array, in order to make mask same shape as values
|
|
869
992
|
out_shape = kwargs.pop("out_shape", None)
|
|
870
993
|
|
|
871
|
-
if self.has_array:
|
|
872
|
-
self
|
|
873
|
-
|
|
994
|
+
if self.has_array and [int(x) for x in bounds] != [int(x) for x in self.bounds]:
|
|
995
|
+
print(self)
|
|
996
|
+
print(self.mask)
|
|
997
|
+
print(self.values.shape)
|
|
998
|
+
print([int(x) for x in bounds], [int(x) for x in self.bounds])
|
|
999
|
+
raise ValueError(
|
|
1000
|
+
"Cannot re-load array with different bounds. "
|
|
1001
|
+
"Use .copy() to read with different bounds. "
|
|
1002
|
+
"Or .clip(mask) to clip."
|
|
874
1003
|
)
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
with opener(self.path, file_system=self.file_system) as f:
|
|
880
|
-
with rasterio.open(f, nodata=self.nodata) as src:
|
|
881
|
-
self._res = int(src.res[0]) if not self.res else self.res
|
|
1004
|
+
# with opener(self.path, file_system=self.file_system) as f:
|
|
1005
|
+
with opener(self.path, file_system=file_system) as f:
|
|
1006
|
+
with rasterio.open(f, nodata=self.nodata) as src:
|
|
1007
|
+
self._res = int(src.res[0]) if not self.res else self.res
|
|
882
1008
|
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
src._dtypes = tuple(
|
|
893
|
-
rasterio.dtypes.get_minimum_dtype(self.nodata)
|
|
894
|
-
for _ in range(len(_indexes))
|
|
895
|
-
)
|
|
896
|
-
|
|
897
|
-
if bounds is None:
|
|
898
|
-
if self._res != int(src.res[0]):
|
|
899
|
-
if out_shape is None:
|
|
900
|
-
out_shape = _get_shape_from_bounds(
|
|
901
|
-
to_bbox(src.bounds), self.res, indexes
|
|
902
|
-
)
|
|
903
|
-
self.transform = _get_transform_from_bounds(
|
|
904
|
-
to_bbox(src.bounds), shape=out_shape
|
|
905
|
-
)
|
|
906
|
-
else:
|
|
907
|
-
self.transform = src.transform
|
|
908
|
-
|
|
909
|
-
self._values = src.read(
|
|
910
|
-
indexes=indexes,
|
|
911
|
-
out_shape=out_shape,
|
|
912
|
-
masked=masked,
|
|
913
|
-
**kwargs,
|
|
914
|
-
)
|
|
915
|
-
else:
|
|
916
|
-
window = rasterio.windows.from_bounds(
|
|
917
|
-
*bounds, transform=src.transform
|
|
1009
|
+
if self.nodata is None or np.isnan(self.nodata):
|
|
1010
|
+
self.nodata = src.nodata
|
|
1011
|
+
else:
|
|
1012
|
+
dtype_min_value = _get_dtype_min(src.dtypes[0])
|
|
1013
|
+
dtype_max_value = _get_dtype_max(src.dtypes[0])
|
|
1014
|
+
if self.nodata > dtype_max_value or self.nodata < dtype_min_value:
|
|
1015
|
+
src._dtypes = tuple(
|
|
1016
|
+
rasterio.dtypes.get_minimum_dtype(self.nodata)
|
|
1017
|
+
for _ in range(len(_indexes))
|
|
918
1018
|
)
|
|
919
1019
|
|
|
1020
|
+
if bounds is None:
|
|
1021
|
+
if self._res != int(src.res[0]):
|
|
920
1022
|
if out_shape is None:
|
|
921
1023
|
out_shape = _get_shape_from_bounds(
|
|
922
|
-
bounds, self.res, indexes
|
|
1024
|
+
to_bbox(src.bounds), self.res, indexes
|
|
923
1025
|
)
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
indexes=indexes,
|
|
927
|
-
window=window,
|
|
928
|
-
boundless=False,
|
|
929
|
-
out_shape=out_shape,
|
|
930
|
-
masked=masked,
|
|
931
|
-
**kwargs,
|
|
1026
|
+
self.transform = _get_transform_from_bounds(
|
|
1027
|
+
to_bbox(src.bounds), shape=out_shape
|
|
932
1028
|
)
|
|
1029
|
+
else:
|
|
1030
|
+
self.transform = src.transform
|
|
933
1031
|
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
1032
|
+
values = src.read(
|
|
1033
|
+
indexes=indexes,
|
|
1034
|
+
out_shape=out_shape,
|
|
1035
|
+
masked=masked,
|
|
1036
|
+
**kwargs,
|
|
1037
|
+
)
|
|
1038
|
+
else:
|
|
1039
|
+
window = rasterio.windows.from_bounds(
|
|
1040
|
+
*bounds, transform=src.transform
|
|
1041
|
+
)
|
|
938
1042
|
|
|
1043
|
+
if out_shape is None:
|
|
1044
|
+
out_shape = _get_shape_from_bounds(bounds, self.res, indexes)
|
|
1045
|
+
|
|
1046
|
+
values = src.read(
|
|
1047
|
+
indexes=indexes,
|
|
1048
|
+
window=window,
|
|
1049
|
+
boundless=False,
|
|
1050
|
+
out_shape=out_shape,
|
|
1051
|
+
masked=masked,
|
|
1052
|
+
**kwargs,
|
|
1053
|
+
)
|
|
1054
|
+
|
|
1055
|
+
assert out_shape == values.shape, (
|
|
1056
|
+
out_shape,
|
|
1057
|
+
values.shape,
|
|
1058
|
+
)
|
|
1059
|
+
|
|
1060
|
+
width, height = values.shape[-2:]
|
|
1061
|
+
|
|
1062
|
+
if width and height:
|
|
939
1063
|
self.transform = rasterio.transform.from_bounds(
|
|
940
|
-
*bounds,
|
|
1064
|
+
*bounds, width, height
|
|
941
1065
|
)
|
|
942
|
-
self._bounds = bounds
|
|
943
1066
|
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
else:
|
|
950
|
-
self.values[self.values == src.nodata] = self.nodata
|
|
1067
|
+
if self.nodata is not None and not np.isnan(self.nodata):
|
|
1068
|
+
if isinstance(values, np.ma.core.MaskedArray):
|
|
1069
|
+
values.data[values.data == src.nodata] = self.nodata
|
|
1070
|
+
else:
|
|
1071
|
+
values[values == src.nodata] = self.nodata
|
|
951
1072
|
|
|
952
1073
|
if self.masking and self.is_mask:
|
|
953
|
-
|
|
1074
|
+
values = np.isin(values, list(self.masking["values"]))
|
|
954
1075
|
|
|
955
|
-
elif self.mask is not None and not isinstance(
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
1076
|
+
elif self.mask is not None and not isinstance(values, np.ma.core.MaskedArray):
|
|
1077
|
+
|
|
1078
|
+
if not self.mask.has_array:
|
|
1079
|
+
self._mask = self.mask.load(
|
|
1080
|
+
bounds=bounds, indexes=indexes, out_shape=out_shape, **kwargs
|
|
1081
|
+
)
|
|
961
1082
|
mask_arr = self.mask.values
|
|
962
1083
|
|
|
963
|
-
|
|
964
|
-
self._values, mask=mask_arr, fill_value=self.nodata
|
|
965
|
-
)
|
|
1084
|
+
values = np.ma.array(values, mask=mask_arr, fill_value=self.nodata)
|
|
966
1085
|
|
|
967
|
-
|
|
968
|
-
self.
|
|
969
|
-
|
|
970
|
-
|
|
1086
|
+
if bounds is not None:
|
|
1087
|
+
self._bounds = to_bbox(bounds)
|
|
1088
|
+
|
|
1089
|
+
self._values = values
|
|
1090
|
+
# trigger the setter
|
|
1091
|
+
self.values = values
|
|
971
1092
|
|
|
972
1093
|
return self
|
|
973
1094
|
|
|
974
1095
|
@property
|
|
975
1096
|
def is_mask(self) -> bool:
|
|
976
1097
|
"""True if the band_id is equal to the masking band_id."""
|
|
1098
|
+
if self.masking is None:
|
|
1099
|
+
return False
|
|
977
1100
|
return self.band_id == self.masking["band_id"]
|
|
978
1101
|
|
|
979
1102
|
@property
|
|
980
1103
|
def has_array(self) -> bool:
|
|
981
1104
|
"""Whether the array is loaded."""
|
|
982
1105
|
try:
|
|
983
|
-
if not isinstance(self.values, np.ndarray):
|
|
1106
|
+
if not isinstance(self.values, (np.ndarray | DataArray)):
|
|
984
1107
|
raise ValueError()
|
|
985
1108
|
return True
|
|
986
1109
|
except ValueError: # also catches ArrayNotLoadedError
|
|
987
1110
|
return False
|
|
988
1111
|
|
|
989
1112
|
def write(
|
|
990
|
-
self,
|
|
1113
|
+
self,
|
|
1114
|
+
path: str | Path,
|
|
1115
|
+
driver: str = "GTiff",
|
|
1116
|
+
compress: str = "LZW",
|
|
1117
|
+
file_system=None,
|
|
1118
|
+
**kwargs,
|
|
991
1119
|
) -> None:
|
|
992
1120
|
"""Write the array as an image file."""
|
|
993
1121
|
if not hasattr(self, "_values"):
|
|
@@ -1010,7 +1138,8 @@ class Band(_ImageBandBase):
|
|
|
1010
1138
|
"width": self.width,
|
|
1011
1139
|
} | kwargs
|
|
1012
1140
|
|
|
1013
|
-
with opener(path, "wb", file_system=self.file_system) as f:
|
|
1141
|
+
# with opener(path, "wb", file_system=self.file_system) as f:
|
|
1142
|
+
with opener(path, "wb", file_system=file_system) as f:
|
|
1014
1143
|
with rasterio.open(f, "w", **profile) as dst:
|
|
1015
1144
|
|
|
1016
1145
|
if dst.nodata is None:
|
|
@@ -1032,17 +1161,14 @@ class Band(_ImageBandBase):
|
|
|
1032
1161
|
if isinstance(self.values, np.ma.core.MaskedArray):
|
|
1033
1162
|
dst.write_mask(self.values.mask)
|
|
1034
1163
|
|
|
1035
|
-
self._path = str(path)
|
|
1164
|
+
self._path = _fix_path(str(path))
|
|
1036
1165
|
|
|
1037
1166
|
def apply(self, func: Callable, **kwargs) -> "Band":
|
|
1038
|
-
"""Apply a function to the
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
"""Normalize array values between 0 and 1."""
|
|
1044
|
-
arr = self.values
|
|
1045
|
-
self.values = (arr - np.min(arr)) / (np.max(arr) - np.min(arr))
|
|
1167
|
+
"""Apply a function to the Band."""
|
|
1168
|
+
results = func(self, **kwargs)
|
|
1169
|
+
if isinstance(results, Band):
|
|
1170
|
+
return results
|
|
1171
|
+
self.values = results
|
|
1046
1172
|
return self
|
|
1047
1173
|
|
|
1048
1174
|
def sample(self, size: int = 1000, mask: Any = None, **kwargs) -> "Image":
|
|
@@ -1173,7 +1299,7 @@ class Band(_ImageBandBase):
|
|
|
1173
1299
|
dropna=dropna,
|
|
1174
1300
|
)
|
|
1175
1301
|
|
|
1176
|
-
def
|
|
1302
|
+
def to_geopandas(self, column: str = "value") -> GeoDataFrame:
|
|
1177
1303
|
"""Create a GeoDataFrame from the image Band.
|
|
1178
1304
|
|
|
1179
1305
|
Args:
|
|
@@ -1200,23 +1326,61 @@ class Band(_ImageBandBase):
|
|
|
1200
1326
|
)
|
|
1201
1327
|
|
|
1202
1328
|
def to_xarray(self) -> DataArray:
|
|
1203
|
-
"""Convert the raster to
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
dims = ["y", "x"]
|
|
1208
|
-
elif len(self.values.shape) == 3:
|
|
1209
|
-
dims = ["band", "y", "x"]
|
|
1210
|
-
else:
|
|
1211
|
-
raise ValueError("Array must be 2 or 3 dimensional.")
|
|
1212
|
-
return xr.DataArray(
|
|
1329
|
+
"""Convert the raster to an xarray.DataArray."""
|
|
1330
|
+
if self.backend == "xarray":
|
|
1331
|
+
return self.values
|
|
1332
|
+
return self._to_xarray(
|
|
1213
1333
|
self.values,
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
name=name,
|
|
1217
|
-
attrs={"crs": self.crs},
|
|
1334
|
+
transform=self.transform,
|
|
1335
|
+
# name=self.name or self.__class__.__name__.lower(),
|
|
1218
1336
|
)
|
|
1219
1337
|
|
|
1338
|
+
def to_numpy(self) -> np.ndarray | np.ma.core.MaskedArray:
|
|
1339
|
+
"""Convert the raster to a numpy.ndarray."""
|
|
1340
|
+
return self._to_numpy(self.values).copy()
|
|
1341
|
+
|
|
1342
|
+
def _to_numpy(
|
|
1343
|
+
self, arr: np.ndarray | DataArray, masked: bool = True
|
|
1344
|
+
) -> np.ndarray | np.ma.core.MaskedArray:
|
|
1345
|
+
if not isinstance(arr, np.ndarray):
|
|
1346
|
+
mask_arr = None
|
|
1347
|
+
if masked:
|
|
1348
|
+
# if self.mask is not None:
|
|
1349
|
+
# print(self.mask.values.shape, arr.shape)
|
|
1350
|
+
# if self.mask is not None and self.mask.values.shape == arr.shape:
|
|
1351
|
+
# print("hei", self.mask.values.sum())
|
|
1352
|
+
# mask_arr = self.mask.values
|
|
1353
|
+
# else:
|
|
1354
|
+
# mask_arr = np.full(arr.shape, False)
|
|
1355
|
+
# try:
|
|
1356
|
+
# print("hei222", arr.isnull().values.sum())
|
|
1357
|
+
# mask_arr |= arr.isnull().values
|
|
1358
|
+
# except AttributeError:
|
|
1359
|
+
# pass
|
|
1360
|
+
# mask_arr = np.full(arr.shape, False)
|
|
1361
|
+
try:
|
|
1362
|
+
mask_arr = arr.isnull().values
|
|
1363
|
+
except AttributeError:
|
|
1364
|
+
pass
|
|
1365
|
+
try:
|
|
1366
|
+
arr = arr.to_numpy()
|
|
1367
|
+
except AttributeError:
|
|
1368
|
+
arr = arr.values
|
|
1369
|
+
if mask_arr is not None:
|
|
1370
|
+
arr = np.ma.array(arr, mask=mask_arr, fill_value=self.nodata)
|
|
1371
|
+
|
|
1372
|
+
if not isinstance(arr, np.ndarray):
|
|
1373
|
+
arr = np.array(arr)
|
|
1374
|
+
|
|
1375
|
+
if (
|
|
1376
|
+
masked
|
|
1377
|
+
and self.mask is not None
|
|
1378
|
+
and not self.is_mask
|
|
1379
|
+
and not isinstance(arr, np.ma.core.MaskedArray)
|
|
1380
|
+
):
|
|
1381
|
+
arr = np.ma.array(arr, mask=mask_arr, fill_value=self.nodata)
|
|
1382
|
+
return arr
|
|
1383
|
+
|
|
1220
1384
|
def __repr__(self) -> str:
|
|
1221
1385
|
"""String representation."""
|
|
1222
1386
|
try:
|
|
@@ -1252,12 +1416,12 @@ class Image(_ImageBandBase):
|
|
|
1252
1416
|
"""Image consisting of one or more Bands."""
|
|
1253
1417
|
|
|
1254
1418
|
band_class: ClassVar[Band] = Band
|
|
1419
|
+
backend: str = "numpy"
|
|
1255
1420
|
|
|
1256
1421
|
def __init__(
|
|
1257
1422
|
self,
|
|
1258
|
-
data: str | Path | Sequence[Band],
|
|
1423
|
+
data: str | Path | Sequence[Band] | None = None,
|
|
1259
1424
|
res: int | None = None,
|
|
1260
|
-
file_system: GCSFileSystem | None = None,
|
|
1261
1425
|
processes: int = 1,
|
|
1262
1426
|
df: pd.DataFrame | None = None,
|
|
1263
1427
|
nodata: int | None = None,
|
|
@@ -1265,44 +1429,38 @@ class Image(_ImageBandBase):
|
|
|
1265
1429
|
**kwargs,
|
|
1266
1430
|
) -> None:
|
|
1267
1431
|
"""Image initialiser."""
|
|
1432
|
+
if data is None:
|
|
1433
|
+
# allowing 'bands' to replace 'data' as argument
|
|
1434
|
+
# to make the print repr. valid as initialiser
|
|
1435
|
+
if "bands" not in kwargs:
|
|
1436
|
+
raise TypeError("Must specify either 'data' or 'bands'.")
|
|
1437
|
+
data = kwargs.pop("bands")
|
|
1438
|
+
|
|
1268
1439
|
super().__init__(**kwargs)
|
|
1269
1440
|
|
|
1270
1441
|
self.nodata = nodata
|
|
1271
|
-
self._res = res
|
|
1272
|
-
self._crs = None
|
|
1273
|
-
self.file_system = file_system
|
|
1274
1442
|
self.processes = processes
|
|
1443
|
+
self._crs = None
|
|
1444
|
+
self._bands = None
|
|
1275
1445
|
|
|
1276
1446
|
if hasattr(data, "__iter__") and all(isinstance(x, Band) for x in data):
|
|
1277
|
-
self.
|
|
1278
|
-
if res is None:
|
|
1279
|
-
res = list({band.res for band in self.bands})
|
|
1280
|
-
if len(res) == 1:
|
|
1281
|
-
self._res = res[0]
|
|
1282
|
-
else:
|
|
1283
|
-
raise ValueError(f"Different resolutions for the bands: {res}")
|
|
1284
|
-
else:
|
|
1285
|
-
self._res = res
|
|
1447
|
+
self._construct_image_from_bands(data, res)
|
|
1286
1448
|
return
|
|
1287
|
-
|
|
1288
|
-
if not isinstance(data, (str | Path | os.PathLike)):
|
|
1449
|
+
elif not isinstance(data, (str | Path | os.PathLike)):
|
|
1289
1450
|
raise TypeError("'data' must be string, Path-like or a sequence of Band.")
|
|
1290
1451
|
|
|
1291
|
-
self.
|
|
1292
|
-
self._path = _fix_path(data)
|
|
1452
|
+
self._res = res
|
|
1453
|
+
self._path = _fix_path(data)
|
|
1293
1454
|
|
|
1294
1455
|
if all_file_paths is None and self.path:
|
|
1295
1456
|
self._all_file_paths = _get_all_file_paths(self.path)
|
|
1296
1457
|
elif self.path:
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
]
|
|
1458
|
+
all_file_paths = {_fix_path(x) for x in all_file_paths}
|
|
1459
|
+
self._all_file_paths = {x for x in all_file_paths if self.path in x}
|
|
1300
1460
|
else:
|
|
1301
1461
|
self._all_file_paths = None
|
|
1302
1462
|
|
|
1303
1463
|
if df is None:
|
|
1304
|
-
# file_paths = _get_all_file_paths(self.path)
|
|
1305
|
-
|
|
1306
1464
|
if not self._all_file_paths:
|
|
1307
1465
|
self._all_file_paths = [self.path]
|
|
1308
1466
|
df = self._create_metadata_df(self._all_file_paths)
|
|
@@ -1311,7 +1469,7 @@ class Image(_ImageBandBase):
|
|
|
1311
1469
|
|
|
1312
1470
|
cols_to_explode = [
|
|
1313
1471
|
"file_path",
|
|
1314
|
-
"
|
|
1472
|
+
"file_name",
|
|
1315
1473
|
*[x for x in df if FILENAME_COL_SUFFIX in x],
|
|
1316
1474
|
]
|
|
1317
1475
|
try:
|
|
@@ -1319,34 +1477,82 @@ class Image(_ImageBandBase):
|
|
|
1319
1477
|
except ValueError:
|
|
1320
1478
|
for col in cols_to_explode:
|
|
1321
1479
|
df = df.explode(col)
|
|
1322
|
-
df = df.loc[lambda x: ~x["
|
|
1480
|
+
df = df.loc[lambda x: ~x["file_name"].duplicated()].reset_index(drop=True)
|
|
1323
1481
|
|
|
1324
|
-
df = df.loc[lambda x: x["image_path"] ==
|
|
1482
|
+
df = df.loc[lambda x: x["image_path"] == self.path]
|
|
1325
1483
|
|
|
1326
1484
|
self._df = df
|
|
1327
1485
|
|
|
1486
|
+
if self.path is not None and self.metadata:
|
|
1487
|
+
self.metadata = {
|
|
1488
|
+
key: value for key, value in self.metadata.items() if self.path in key
|
|
1489
|
+
}
|
|
1490
|
+
|
|
1491
|
+
if self.metadata:
|
|
1492
|
+
try:
|
|
1493
|
+
metadata = self.metadata[self.path]
|
|
1494
|
+
except KeyError:
|
|
1495
|
+
metadata = {}
|
|
1496
|
+
for key, value in metadata.items():
|
|
1497
|
+
if key in dir(self):
|
|
1498
|
+
setattr(self, f"_{key}", value)
|
|
1499
|
+
else:
|
|
1500
|
+
setattr(self, key, value)
|
|
1501
|
+
|
|
1502
|
+
else:
|
|
1503
|
+
for key, value in self._get_metadata_attributes(
|
|
1504
|
+
self.metadata_attributes
|
|
1505
|
+
).items():
|
|
1506
|
+
setattr(self, key, value)
|
|
1507
|
+
|
|
1508
|
+
def _construct_image_from_bands(
|
|
1509
|
+
self, data: Sequence[Band], res: int | None
|
|
1510
|
+
) -> None:
|
|
1511
|
+
self._bands = list(data)
|
|
1512
|
+
if res is None:
|
|
1513
|
+
res = list({band.res for band in self.bands})
|
|
1514
|
+
if len(res) == 1:
|
|
1515
|
+
self._res = res[0]
|
|
1516
|
+
else:
|
|
1517
|
+
raise ValueError(f"Different resolutions for the bands: {res}")
|
|
1518
|
+
else:
|
|
1519
|
+
self._res = res
|
|
1328
1520
|
for key in self.metadata_attributes:
|
|
1329
|
-
|
|
1521
|
+
band_values = {getattr(band, key) for band in self if hasattr(band, key)}
|
|
1522
|
+
band_values = {x for x in band_values if x is not None}
|
|
1523
|
+
if len(band_values) > 1:
|
|
1524
|
+
raise ValueError(f"Different {key} values in bands: {band_values}")
|
|
1525
|
+
elif len(band_values):
|
|
1526
|
+
try:
|
|
1527
|
+
setattr(self, key, next(iter(band_values)))
|
|
1528
|
+
except AttributeError:
|
|
1529
|
+
setattr(self, f"_{key}", next(iter(band_values)))
|
|
1330
1530
|
|
|
1331
|
-
|
|
1332
|
-
|
|
1531
|
+
def copy(self) -> "Image":
|
|
1532
|
+
"""Copy the instance and its attributes."""
|
|
1533
|
+
copied = super().copy()
|
|
1534
|
+
for band in copied:
|
|
1535
|
+
band._mask = copied._mask
|
|
1536
|
+
return copied
|
|
1333
1537
|
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
""
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1538
|
+
def apply(self, func: Callable, **kwargs) -> "Image":
|
|
1539
|
+
"""Apply a function to each band of the Image."""
|
|
1540
|
+
with joblib.Parallel(n_jobs=self.processes, backend="loky") as parallel:
|
|
1541
|
+
parallel(joblib.delayed(_band_apply)(band, func, **kwargs) for band in self)
|
|
1542
|
+
|
|
1543
|
+
return self
|
|
1544
|
+
|
|
1545
|
+
def ndvi(
|
|
1546
|
+
self, red_band: str, nir_band: str, padding: int = 0, copy: bool = True
|
|
1547
|
+
) -> NDVIBand:
|
|
1344
1548
|
"""Calculate the NDVI for the Image."""
|
|
1345
1549
|
copied = self.copy() if copy else self
|
|
1346
1550
|
red = copied[red_band].load()
|
|
1347
1551
|
nir = copied[nir_band].load()
|
|
1348
1552
|
|
|
1349
|
-
arr: np.ndarray | np.ma.core.MaskedArray = ndvi(
|
|
1553
|
+
arr: np.ndarray | np.ma.core.MaskedArray = ndvi(
|
|
1554
|
+
red.values, nir.values, padding=padding
|
|
1555
|
+
)
|
|
1350
1556
|
|
|
1351
1557
|
return NDVIBand(
|
|
1352
1558
|
arr,
|
|
@@ -1390,56 +1596,61 @@ class Image(_ImageBandBase):
|
|
|
1390
1596
|
|
|
1391
1597
|
def to_xarray(self) -> DataArray:
|
|
1392
1598
|
"""Convert the raster to an xarray.DataArray."""
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
self.values,
|
|
1400
|
-
coords=coords,
|
|
1401
|
-
dims=dims,
|
|
1402
|
-
name=name,
|
|
1403
|
-
attrs={"crs": self.crs},
|
|
1599
|
+
if self.backend == "xarray":
|
|
1600
|
+
return self.values
|
|
1601
|
+
|
|
1602
|
+
return self._to_xarray(
|
|
1603
|
+
np.array([band.values for band in self]),
|
|
1604
|
+
transform=self[0].transform,
|
|
1404
1605
|
)
|
|
1405
1606
|
|
|
1406
1607
|
@property
|
|
1407
1608
|
def mask(self) -> Band | None:
|
|
1408
1609
|
"""Mask Band."""
|
|
1409
|
-
if self._mask is not None:
|
|
1410
|
-
# if not self._mask.has_array:
|
|
1411
|
-
# try:
|
|
1412
|
-
# self._mask.values = self[0]._mask.values
|
|
1413
|
-
# except Exception:
|
|
1414
|
-
# pass
|
|
1415
|
-
return self._mask
|
|
1416
1610
|
if self.masking is None:
|
|
1417
1611
|
return None
|
|
1418
1612
|
|
|
1613
|
+
elif self._mask is not None:
|
|
1614
|
+
return self._mask
|
|
1615
|
+
|
|
1616
|
+
elif self._bands is not None and all(band.mask is not None for band in self):
|
|
1617
|
+
if len({id(band.mask) for band in self}) > 1:
|
|
1618
|
+
raise ValueError(
|
|
1619
|
+
"Image bands must have same mask.",
|
|
1620
|
+
{id(band.mask) for band in self},
|
|
1621
|
+
) # TODO
|
|
1622
|
+
self._mask = next(
|
|
1623
|
+
iter([band.mask for band in self if band.mask is not None])
|
|
1624
|
+
)
|
|
1625
|
+
return self._mask
|
|
1626
|
+
|
|
1419
1627
|
mask_band_id = self.masking["band_id"]
|
|
1420
|
-
mask_paths = [path for path in self.
|
|
1628
|
+
mask_paths = [path for path in self._all_file_paths if mask_band_id in path]
|
|
1421
1629
|
if len(mask_paths) > 1:
|
|
1422
1630
|
raise ValueError(
|
|
1423
1631
|
f"Multiple file_paths match mask band_id {mask_band_id} for {self.path}"
|
|
1424
1632
|
)
|
|
1425
1633
|
elif not mask_paths:
|
|
1426
1634
|
raise ValueError(
|
|
1427
|
-
f"No file_paths match mask band_id {mask_band_id} for {self.path}"
|
|
1635
|
+
f"No file_paths match mask band_id {mask_band_id} for {self.path} among "
|
|
1636
|
+
+ str([Path(x).name for x in _ls_func(self.path)])
|
|
1428
1637
|
)
|
|
1638
|
+
|
|
1429
1639
|
self._mask = self.band_class(
|
|
1430
1640
|
mask_paths[0],
|
|
1431
|
-
_add_metadata_attributes=False,
|
|
1432
1641
|
**self._common_init_kwargs,
|
|
1433
1642
|
)
|
|
1434
|
-
|
|
1643
|
+
if self._bands is not None:
|
|
1644
|
+
for band in self:
|
|
1645
|
+
band._mask = self._mask
|
|
1435
1646
|
return self._mask
|
|
1436
1647
|
|
|
1437
1648
|
@mask.setter
|
|
1438
|
-
def mask(self, values: Band) -> None:
|
|
1649
|
+
def mask(self, values: Band | None) -> None:
|
|
1439
1650
|
if values is None:
|
|
1440
1651
|
self._mask = None
|
|
1441
1652
|
for band in self:
|
|
1442
|
-
band.
|
|
1653
|
+
band._mask = None
|
|
1443
1654
|
return
|
|
1444
1655
|
if not isinstance(values, Band):
|
|
1445
1656
|
raise TypeError(f"mask must be Band. Got {type(values)}")
|
|
@@ -1449,7 +1660,7 @@ class Image(_ImageBandBase):
|
|
|
1449
1660
|
band._mask = self._mask
|
|
1450
1661
|
try:
|
|
1451
1662
|
band.values = np.ma.array(
|
|
1452
|
-
band.values, mask=mask_arr, fill_value=band.nodata
|
|
1663
|
+
band.values.data, mask=mask_arr, fill_value=band.nodata
|
|
1453
1664
|
)
|
|
1454
1665
|
except ArrayNotLoadedError:
|
|
1455
1666
|
pass
|
|
@@ -1470,22 +1681,24 @@ class Image(_ImageBandBase):
|
|
|
1470
1681
|
if self._bands is not None:
|
|
1471
1682
|
return self._bands
|
|
1472
1683
|
|
|
1684
|
+
if self.masking:
|
|
1685
|
+
mask_band_id = self.masking["band_id"]
|
|
1686
|
+
paths = [path for path in self._df["file_path"] if mask_band_id not in path]
|
|
1687
|
+
else:
|
|
1688
|
+
paths = self._df["file_path"]
|
|
1689
|
+
|
|
1690
|
+
mask = self.mask
|
|
1691
|
+
|
|
1473
1692
|
self._bands = [
|
|
1474
1693
|
self.band_class(
|
|
1475
1694
|
path,
|
|
1476
|
-
mask=
|
|
1477
|
-
|
|
1695
|
+
mask=mask,
|
|
1696
|
+
all_file_paths=self._all_file_paths,
|
|
1478
1697
|
**self._common_init_kwargs,
|
|
1479
1698
|
)
|
|
1480
|
-
for path in
|
|
1699
|
+
for path in paths
|
|
1481
1700
|
]
|
|
1482
1701
|
|
|
1483
|
-
if self.masking:
|
|
1484
|
-
mask_band_id = self.masking["band_id"]
|
|
1485
|
-
self._bands = [
|
|
1486
|
-
band for band in self._bands if mask_band_id not in band.path
|
|
1487
|
-
]
|
|
1488
|
-
|
|
1489
1702
|
if (
|
|
1490
1703
|
self.filename_patterns
|
|
1491
1704
|
and any(_get_non_optional_groups(pat) for pat in self.filename_patterns)
|
|
@@ -1514,30 +1727,19 @@ class Image(_ImageBandBase):
|
|
|
1514
1727
|
if self._should_be_sorted:
|
|
1515
1728
|
self._bands = list(sorted(self._bands))
|
|
1516
1729
|
|
|
1517
|
-
for key in self.metadata_attributes:
|
|
1518
|
-
for band in self:
|
|
1519
|
-
value = getattr(self, key)
|
|
1520
|
-
if value is None:
|
|
1521
|
-
continue
|
|
1522
|
-
if isinstance(value, BandIdDict):
|
|
1523
|
-
try:
|
|
1524
|
-
value = value[band.band_id]
|
|
1525
|
-
except KeyError:
|
|
1526
|
-
continue
|
|
1527
|
-
setattr(band, key, value)
|
|
1528
|
-
|
|
1529
|
-
for band in self:
|
|
1530
|
-
band._image = self
|
|
1531
|
-
|
|
1532
1730
|
return self._bands
|
|
1533
1731
|
|
|
1534
1732
|
@property
|
|
1535
1733
|
def _should_be_sorted(self) -> bool:
|
|
1536
1734
|
sort_groups = ["band", "band_id"]
|
|
1537
|
-
return
|
|
1538
|
-
|
|
1539
|
-
|
|
1540
|
-
|
|
1735
|
+
return (
|
|
1736
|
+
self.filename_patterns
|
|
1737
|
+
and any(
|
|
1738
|
+
group in _get_non_optional_groups(pat)
|
|
1739
|
+
for group in sort_groups
|
|
1740
|
+
for pat in self.filename_patterns
|
|
1741
|
+
)
|
|
1742
|
+
or all(band.band_id is not None for band in self)
|
|
1541
1743
|
)
|
|
1542
1744
|
|
|
1543
1745
|
@property
|
|
@@ -1581,10 +1783,10 @@ class Image(_ImageBandBase):
|
|
|
1581
1783
|
bounds.append(band.bounds)
|
|
1582
1784
|
return get_total_bounds(bounds)
|
|
1583
1785
|
|
|
1584
|
-
def
|
|
1786
|
+
def to_geopandas(self, column: str = "value") -> GeoDataFrame:
|
|
1585
1787
|
"""Convert the array to a GeoDataFrame of grid polygons and values."""
|
|
1586
1788
|
return pd.concat(
|
|
1587
|
-
[band.
|
|
1789
|
+
[band.to_geopandas(column=column) for band in self], ignore_index=True
|
|
1588
1790
|
)
|
|
1589
1791
|
|
|
1590
1792
|
def sample(
|
|
@@ -1613,7 +1815,7 @@ class Image(_ImageBandBase):
|
|
|
1613
1815
|
if isinstance(band, str):
|
|
1614
1816
|
return self._get_band(band)
|
|
1615
1817
|
if isinstance(band, int):
|
|
1616
|
-
return self.bands[band]
|
|
1818
|
+
return self.bands[band]
|
|
1617
1819
|
|
|
1618
1820
|
copied = self.copy()
|
|
1619
1821
|
try:
|
|
@@ -1639,10 +1841,7 @@ class Image(_ImageBandBase):
|
|
|
1639
1841
|
try:
|
|
1640
1842
|
return self.date < other.date
|
|
1641
1843
|
except Exception as e:
|
|
1642
|
-
print(self.path)
|
|
1643
|
-
print(self.date)
|
|
1644
|
-
print(other.path)
|
|
1645
|
-
print(other.date)
|
|
1844
|
+
print("", self.path, self.date, other.path, other.date, sep="\n")
|
|
1646
1845
|
raise e
|
|
1647
1846
|
|
|
1648
1847
|
def __iter__(self) -> Iterator[Band]:
|
|
@@ -1702,36 +1901,36 @@ class ImageCollection(_ImageBase):
|
|
|
1702
1901
|
image_class: ClassVar[Image] = Image
|
|
1703
1902
|
band_class: ClassVar[Band] = Band
|
|
1704
1903
|
_metadata_attribute_collection_type: ClassVar[type] = pd.Series
|
|
1904
|
+
backend: str = "numpy"
|
|
1705
1905
|
|
|
1706
1906
|
def __init__(
|
|
1707
1907
|
self,
|
|
1708
1908
|
data: str | Path | Sequence[Image] | Sequence[str | Path],
|
|
1709
1909
|
res: int,
|
|
1710
|
-
level: str | None =
|
|
1910
|
+
level: str | None = None_,
|
|
1711
1911
|
processes: int = 1,
|
|
1712
|
-
file_system: GCSFileSystem | None = None,
|
|
1713
1912
|
metadata: str | dict | pd.DataFrame | None = None,
|
|
1714
1913
|
nodata: int | None = None,
|
|
1715
1914
|
**kwargs,
|
|
1716
1915
|
) -> None:
|
|
1717
1916
|
"""Initialiser."""
|
|
1718
|
-
|
|
1917
|
+
if data is not None and kwargs.get("root"):
|
|
1918
|
+
root = _fix_path(kwargs.pop("root"))
|
|
1919
|
+
data = [f"{root}/{name}" for name in data]
|
|
1920
|
+
_from_root = True
|
|
1921
|
+
else:
|
|
1922
|
+
_from_root = False
|
|
1923
|
+
|
|
1924
|
+
super().__init__(metadata=metadata, **kwargs)
|
|
1925
|
+
|
|
1926
|
+
if callable(level) and isinstance(level(), None_):
|
|
1927
|
+
level = None
|
|
1719
1928
|
|
|
1720
1929
|
self.nodata = nodata
|
|
1721
|
-
self.level = level
|
|
1930
|
+
self.level = level
|
|
1722
1931
|
self.processes = processes
|
|
1723
|
-
self.file_system = file_system
|
|
1724
1932
|
self._res = res
|
|
1725
|
-
self.
|
|
1726
|
-
self._crs = None # crs
|
|
1727
|
-
|
|
1728
|
-
if metadata is not None:
|
|
1729
|
-
if isinstance(metadata, (str | Path | os.PathLike)):
|
|
1730
|
-
self.metadata = _read_parquet_func(metadata)
|
|
1731
|
-
else:
|
|
1732
|
-
self.metadata = metadata
|
|
1733
|
-
else:
|
|
1734
|
-
self.metadata = metadata
|
|
1933
|
+
self._crs = None
|
|
1735
1934
|
|
|
1736
1935
|
self._df = None
|
|
1737
1936
|
self._all_file_paths = None
|
|
@@ -1743,18 +1942,22 @@ class ImageCollection(_ImageBase):
|
|
|
1743
1942
|
self.images = [x.copy() for x in data]
|
|
1744
1943
|
return
|
|
1745
1944
|
elif all(isinstance(x, (str | Path | os.PathLike)) for x in data):
|
|
1746
|
-
|
|
1747
|
-
|
|
1748
|
-
|
|
1749
|
-
|
|
1750
|
-
|
|
1751
|
-
|
|
1945
|
+
# adding band paths (asuming 'data' is a sequence of image paths)
|
|
1946
|
+
try:
|
|
1947
|
+
self._all_file_paths = _get_child_paths_threaded(data) | set(data)
|
|
1948
|
+
except FileNotFoundError as e:
|
|
1949
|
+
if _from_root:
|
|
1950
|
+
raise TypeError(
|
|
1951
|
+
"When passing 'root', 'data' must be a sequence of image names that have 'root' as parent path."
|
|
1952
|
+
) from e
|
|
1953
|
+
raise e
|
|
1954
|
+
self._df = self._create_metadata_df(self._all_file_paths)
|
|
1752
1955
|
return
|
|
1753
1956
|
|
|
1754
1957
|
if not isinstance(data, (str | Path | os.PathLike)):
|
|
1755
1958
|
raise TypeError("'data' must be string, Path-like or a sequence of Image.")
|
|
1756
1959
|
|
|
1757
|
-
self._path = str(data)
|
|
1960
|
+
self._path = _fix_path(str(data))
|
|
1758
1961
|
|
|
1759
1962
|
self._all_file_paths = _get_all_file_paths(self.path)
|
|
1760
1963
|
|
|
@@ -1765,18 +1968,6 @@ class ImageCollection(_ImageBase):
|
|
|
1765
1968
|
|
|
1766
1969
|
self._df = self._create_metadata_df(self._all_file_paths)
|
|
1767
1970
|
|
|
1768
|
-
@property
|
|
1769
|
-
def values(self) -> np.ndarray:
|
|
1770
|
-
"""4 dimensional numpy array."""
|
|
1771
|
-
if isinstance(self[0].values, np.ma.core.MaskedArray):
|
|
1772
|
-
return np.ma.array([img.values for img in self])
|
|
1773
|
-
return np.array([img.values for img in self])
|
|
1774
|
-
|
|
1775
|
-
@property
|
|
1776
|
-
def mask(self) -> np.ndarray:
|
|
1777
|
-
"""4 dimensional numpy array."""
|
|
1778
|
-
return np.array([img.mask.values for img in self])
|
|
1779
|
-
|
|
1780
1971
|
def groupby(self, by: str | list[str], **kwargs) -> ImageCollectionGroupBy:
|
|
1781
1972
|
"""Group the Collection by Image or Band attribute(s)."""
|
|
1782
1973
|
df = pd.DataFrame(
|
|
@@ -1830,15 +2021,20 @@ class ImageCollection(_ImageBase):
|
|
|
1830
2021
|
for img in copied:
|
|
1831
2022
|
assert len(img) == 1
|
|
1832
2023
|
try:
|
|
1833
|
-
img._path = img[0].path
|
|
2024
|
+
img._path = _fix_path(img[0].path)
|
|
1834
2025
|
except PathlessImageError:
|
|
1835
2026
|
pass
|
|
1836
2027
|
return copied
|
|
1837
2028
|
|
|
1838
2029
|
def apply(self, func: Callable, **kwargs) -> "ImageCollection":
|
|
1839
2030
|
"""Apply a function to all bands in each image of the collection."""
|
|
1840
|
-
|
|
1841
|
-
|
|
2031
|
+
with joblib.Parallel(n_jobs=self.processes, backend="loky") as parallel:
|
|
2032
|
+
parallel(
|
|
2033
|
+
joblib.delayed(_band_apply)(band, func, **kwargs)
|
|
2034
|
+
for img in self
|
|
2035
|
+
for band in img
|
|
2036
|
+
)
|
|
2037
|
+
|
|
1842
2038
|
return self
|
|
1843
2039
|
|
|
1844
2040
|
def get_unique_band_ids(self) -> list[str]:
|
|
@@ -1851,7 +2047,7 @@ class ImageCollection(_ImageBase):
|
|
|
1851
2047
|
date_ranges: DATE_RANGES_TYPE = None,
|
|
1852
2048
|
bbox: GeoDataFrame | GeoSeries | Geometry | tuple[float] | None = None,
|
|
1853
2049
|
intersects: GeoDataFrame | GeoSeries | Geometry | tuple[float] | None = None,
|
|
1854
|
-
|
|
2050
|
+
max_cloud_cover: int | None = None,
|
|
1855
2051
|
copy: bool = True,
|
|
1856
2052
|
) -> "ImageCollection":
|
|
1857
2053
|
"""Filter images and bands in the collection."""
|
|
@@ -1860,11 +2056,11 @@ class ImageCollection(_ImageBase):
|
|
|
1860
2056
|
if date_ranges:
|
|
1861
2057
|
copied = copied._filter_dates(date_ranges)
|
|
1862
2058
|
|
|
1863
|
-
if
|
|
2059
|
+
if max_cloud_cover is not None:
|
|
1864
2060
|
copied.images = [
|
|
1865
2061
|
image
|
|
1866
2062
|
for image in copied.images
|
|
1867
|
-
if image.
|
|
2063
|
+
if image.cloud_cover_percentage < max_cloud_cover
|
|
1868
2064
|
]
|
|
1869
2065
|
|
|
1870
2066
|
if bbox is not None:
|
|
@@ -1878,7 +2074,6 @@ class ImageCollection(_ImageBase):
|
|
|
1878
2074
|
if isinstance(bands, str):
|
|
1879
2075
|
bands = [bands]
|
|
1880
2076
|
bands = set(bands)
|
|
1881
|
-
copied._band_ids = bands
|
|
1882
2077
|
copied.images = [img[bands] for img in copied.images if bands in img]
|
|
1883
2078
|
|
|
1884
2079
|
return copied
|
|
@@ -1892,7 +2087,7 @@ class ImageCollection(_ImageBase):
|
|
|
1892
2087
|
**kwargs,
|
|
1893
2088
|
) -> Band:
|
|
1894
2089
|
"""Merge all areas and all bands to a single Band."""
|
|
1895
|
-
bounds = _get_bounds(bounds, self._bbox)
|
|
2090
|
+
bounds = _get_bounds(bounds, self._bbox, self.union_all())
|
|
1896
2091
|
if bounds is not None:
|
|
1897
2092
|
bounds = to_bbox(bounds)
|
|
1898
2093
|
|
|
@@ -1930,14 +2125,14 @@ class ImageCollection(_ImageBase):
|
|
|
1930
2125
|
**kwargs,
|
|
1931
2126
|
)
|
|
1932
2127
|
|
|
1933
|
-
|
|
1934
|
-
|
|
2128
|
+
if isinstance(indexes, int) and len(arr.shape) == 3 and arr.shape[0] == 1:
|
|
2129
|
+
arr = arr[0]
|
|
1935
2130
|
|
|
1936
|
-
|
|
1937
|
-
|
|
1938
|
-
|
|
1939
|
-
|
|
1940
|
-
|
|
2131
|
+
if method == "mean":
|
|
2132
|
+
if as_int:
|
|
2133
|
+
arr = arr // len(datasets)
|
|
2134
|
+
else:
|
|
2135
|
+
arr = arr / len(datasets)
|
|
1941
2136
|
|
|
1942
2137
|
if bounds is None:
|
|
1943
2138
|
bounds = self.bounds
|
|
@@ -1963,7 +2158,7 @@ class ImageCollection(_ImageBase):
|
|
|
1963
2158
|
**kwargs,
|
|
1964
2159
|
) -> Image:
|
|
1965
2160
|
"""Merge all areas to a single tile, one band per band_id."""
|
|
1966
|
-
bounds = _get_bounds(bounds, self._bbox)
|
|
2161
|
+
bounds = _get_bounds(bounds, self._bbox, self.union_all())
|
|
1967
2162
|
if bounds is not None:
|
|
1968
2163
|
bounds = to_bbox(bounds)
|
|
1969
2164
|
bounds = self.bounds if bounds is None else bounds
|
|
@@ -2021,7 +2216,6 @@ class ImageCollection(_ImageBase):
|
|
|
2021
2216
|
bounds=out_bounds,
|
|
2022
2217
|
crs=crs,
|
|
2023
2218
|
band_id=band_id,
|
|
2024
|
-
_add_metadata_attributes=False,
|
|
2025
2219
|
**self._common_init_kwargs,
|
|
2026
2220
|
)
|
|
2027
2221
|
)
|
|
@@ -2061,10 +2255,13 @@ class ImageCollection(_ImageBase):
|
|
|
2061
2255
|
arr = np.array(
|
|
2062
2256
|
[
|
|
2063
2257
|
(
|
|
2064
|
-
band.load(
|
|
2065
|
-
|
|
2066
|
-
|
|
2067
|
-
)
|
|
2258
|
+
# band.load(
|
|
2259
|
+
# bounds=(_bounds if _bounds is not None else None),
|
|
2260
|
+
# **kwargs,
|
|
2261
|
+
# )
|
|
2262
|
+
# if not band.has_array
|
|
2263
|
+
# else
|
|
2264
|
+
band
|
|
2068
2265
|
).values
|
|
2069
2266
|
for img in collection
|
|
2070
2267
|
for band in img
|
|
@@ -2087,7 +2284,7 @@ class ImageCollection(_ImageBase):
|
|
|
2087
2284
|
coords = _generate_spatial_coords(transform, width, height)
|
|
2088
2285
|
|
|
2089
2286
|
arrs.append(
|
|
2090
|
-
|
|
2287
|
+
DataArray(
|
|
2091
2288
|
arr,
|
|
2092
2289
|
coords=coords,
|
|
2093
2290
|
dims=["y", "x"],
|
|
@@ -2104,7 +2301,7 @@ class ImageCollection(_ImageBase):
|
|
|
2104
2301
|
return merged.to_numpy()
|
|
2105
2302
|
|
|
2106
2303
|
def sort_images(self, ascending: bool = True) -> "ImageCollection":
|
|
2107
|
-
"""Sort Images by date."""
|
|
2304
|
+
"""Sort Images by date, then file path if date attribute is missing."""
|
|
2108
2305
|
self._images = (
|
|
2109
2306
|
list(sorted([img for img in self if img.date is not None]))
|
|
2110
2307
|
+ sorted(
|
|
@@ -2121,6 +2318,7 @@ class ImageCollection(_ImageBase):
|
|
|
2121
2318
|
self,
|
|
2122
2319
|
bounds: tuple | Geometry | GeoDataFrame | GeoSeries | None = None,
|
|
2123
2320
|
indexes: int | tuple[int] | None = None,
|
|
2321
|
+
file_system=None,
|
|
2124
2322
|
**kwargs,
|
|
2125
2323
|
) -> "ImageCollection":
|
|
2126
2324
|
"""Load all image Bands with threading."""
|
|
@@ -2130,10 +2328,46 @@ class ImageCollection(_ImageBase):
|
|
|
2130
2328
|
and all(band.has_array for img in self for band in img)
|
|
2131
2329
|
):
|
|
2132
2330
|
return self
|
|
2331
|
+
|
|
2332
|
+
# if self.processes == 1:
|
|
2333
|
+
# for img in self:
|
|
2334
|
+
# for band in img:
|
|
2335
|
+
# band.load(
|
|
2336
|
+
# bounds=bounds,
|
|
2337
|
+
# indexes=indexes,
|
|
2338
|
+
# file_system=file_system,
|
|
2339
|
+
# **kwargs,
|
|
2340
|
+
# )
|
|
2341
|
+
# return self
|
|
2342
|
+
|
|
2133
2343
|
with joblib.Parallel(n_jobs=self.processes, backend="threading") as parallel:
|
|
2344
|
+
if self.masking:
|
|
2345
|
+
parallel(
|
|
2346
|
+
joblib.delayed(_load_band)(
|
|
2347
|
+
img.mask,
|
|
2348
|
+
bounds=bounds,
|
|
2349
|
+
indexes=indexes,
|
|
2350
|
+
file_system=file_system,
|
|
2351
|
+
**kwargs,
|
|
2352
|
+
)
|
|
2353
|
+
for img in self
|
|
2354
|
+
)
|
|
2355
|
+
for img in self:
|
|
2356
|
+
for band in img:
|
|
2357
|
+
band._mask = img.mask
|
|
2358
|
+
|
|
2359
|
+
# print({img.mask.has_array for img in self })
|
|
2360
|
+
# print({band.mask.has_array for img in self for band in img})
|
|
2361
|
+
|
|
2362
|
+
# with joblib.Parallel(n_jobs=self.processes, backend="threading") as parallel:
|
|
2363
|
+
|
|
2134
2364
|
parallel(
|
|
2135
2365
|
joblib.delayed(_load_band)(
|
|
2136
|
-
band,
|
|
2366
|
+
band,
|
|
2367
|
+
bounds=bounds,
|
|
2368
|
+
indexes=indexes,
|
|
2369
|
+
file_system=file_system,
|
|
2370
|
+
**kwargs,
|
|
2137
2371
|
)
|
|
2138
2372
|
for img in self
|
|
2139
2373
|
for band in img
|
|
@@ -2141,6 +2375,27 @@ class ImageCollection(_ImageBase):
|
|
|
2141
2375
|
|
|
2142
2376
|
return self
|
|
2143
2377
|
|
|
2378
|
+
def clip(
|
|
2379
|
+
self,
|
|
2380
|
+
mask: Geometry | GeoDataFrame | GeoSeries,
|
|
2381
|
+
**kwargs,
|
|
2382
|
+
) -> "ImageCollection":
|
|
2383
|
+
"""Clip all image Bands with 'loky'."""
|
|
2384
|
+
if self.processes == 1:
|
|
2385
|
+
for img in self:
|
|
2386
|
+
for band in img:
|
|
2387
|
+
band.clip(mask, **kwargs)
|
|
2388
|
+
return self
|
|
2389
|
+
|
|
2390
|
+
with joblib.Parallel(n_jobs=self.processes, backend="loky") as parallel:
|
|
2391
|
+
parallel(
|
|
2392
|
+
joblib.delayed(_clip_band)(band, mask, **kwargs)
|
|
2393
|
+
for img in self
|
|
2394
|
+
for band in img
|
|
2395
|
+
)
|
|
2396
|
+
|
|
2397
|
+
return self
|
|
2398
|
+
|
|
2144
2399
|
def _set_bbox(
|
|
2145
2400
|
self, bbox: GeoDataFrame | GeoSeries | Geometry | tuple[float]
|
|
2146
2401
|
) -> "ImageCollection":
|
|
@@ -2150,12 +2405,17 @@ class ImageCollection(_ImageBase):
|
|
|
2150
2405
|
if self._images is not None:
|
|
2151
2406
|
for img in self._images:
|
|
2152
2407
|
img._bbox = self._bbox
|
|
2408
|
+
if img.mask is not None:
|
|
2409
|
+
img.mask._bbox = self._bbox
|
|
2153
2410
|
if img.bands is None:
|
|
2154
2411
|
continue
|
|
2155
2412
|
for band in img:
|
|
2156
2413
|
band._bbox = self._bbox
|
|
2157
2414
|
bounds = box(*band._bbox).intersection(box(*band.bounds))
|
|
2158
2415
|
band._bounds = to_bbox(bounds) if not bounds.is_empty else None
|
|
2416
|
+
if band.mask is not None:
|
|
2417
|
+
band.mask._bbox = self._bbox
|
|
2418
|
+
band.mask._bounds = band._bounds
|
|
2159
2419
|
|
|
2160
2420
|
return self
|
|
2161
2421
|
|
|
@@ -2184,11 +2444,15 @@ class ImageCollection(_ImageBase):
|
|
|
2184
2444
|
|
|
2185
2445
|
other = to_shapely(other)
|
|
2186
2446
|
|
|
2187
|
-
|
|
2188
|
-
|
|
2189
|
-
|
|
2190
|
-
|
|
2191
|
-
|
|
2447
|
+
if self.processes == 1:
|
|
2448
|
+
intersects_list: pd.Series = GeoSeries(
|
|
2449
|
+
[img.union_all() for img in self]
|
|
2450
|
+
).intersects(other)
|
|
2451
|
+
else:
|
|
2452
|
+
with joblib.Parallel(n_jobs=self.processes, backend="loky") as parallel:
|
|
2453
|
+
intersects_list: list[bool] = parallel(
|
|
2454
|
+
joblib.delayed(_intesects)(image, other) for image in self
|
|
2455
|
+
)
|
|
2192
2456
|
|
|
2193
2457
|
self.images = [
|
|
2194
2458
|
image
|
|
@@ -2197,39 +2461,70 @@ class ImageCollection(_ImageBase):
|
|
|
2197
2461
|
]
|
|
2198
2462
|
return self
|
|
2199
2463
|
|
|
2200
|
-
def to_xarray(
|
|
2201
|
-
|
|
2202
|
-
|
|
2203
|
-
|
|
2204
|
-
|
|
2205
|
-
# arr = band.load(**kwargs).values
|
|
2206
|
-
# arrs.append(arr)
|
|
2207
|
-
|
|
2208
|
-
# n_images = len(self)
|
|
2209
|
-
# n_bands = len(img)
|
|
2210
|
-
# height, width = arr.shape
|
|
2211
|
-
|
|
2212
|
-
# arr_4d = np.array(arrs).reshape(n_images, n_bands, height, width)
|
|
2464
|
+
def to_xarray(
|
|
2465
|
+
self,
|
|
2466
|
+
**kwargs,
|
|
2467
|
+
) -> Dataset:
|
|
2468
|
+
"""Convert the raster to an xarray.Dataset.
|
|
2213
2469
|
|
|
2214
|
-
|
|
2215
|
-
|
|
2216
|
-
|
|
2217
|
-
|
|
2470
|
+
Images are converted to 2d arrays for each unique bounds.
|
|
2471
|
+
The spatial dimensions will be labeled "x" and "y". The third
|
|
2472
|
+
dimension defaults to "date" if all images have date attributes.
|
|
2473
|
+
Otherwise defaults to the image name.
|
|
2474
|
+
"""
|
|
2475
|
+
if any(not band.has_array for img in self for band in img):
|
|
2476
|
+
raise ValueError("Arrays must be loaded.")
|
|
2477
|
+
|
|
2478
|
+
# if by is None:
|
|
2479
|
+
if all(img.date for img in self):
|
|
2480
|
+
by = ["date"]
|
|
2481
|
+
elif not pd.Index([img.name for img in self]).is_unique:
|
|
2482
|
+
raise ValueError("Images must have unique names.")
|
|
2483
|
+
else:
|
|
2484
|
+
by = ["name"]
|
|
2485
|
+
# elif isinstance(by, str):
|
|
2486
|
+
# by = [by]
|
|
2487
|
+
|
|
2488
|
+
xarrs: dict[str, DataArray] = {}
|
|
2489
|
+
for (bounds, band_id), collection in self.groupby(["bounds", "band_id"]):
|
|
2490
|
+
name = f"{band_id}_{'-'.join(str(int(x)) for x in bounds)}"
|
|
2491
|
+
first_band = collection[0][0]
|
|
2492
|
+
coords = _generate_spatial_coords(
|
|
2493
|
+
first_band.transform, first_band.width, first_band.height
|
|
2494
|
+
)
|
|
2495
|
+
values = np.array([band.to_numpy() for img in collection for band in img])
|
|
2496
|
+
assert len(values) == len(collection)
|
|
2497
|
+
|
|
2498
|
+
# coords["band_id"] = [
|
|
2499
|
+
# band.band_id or i for i, band in enumerate(collection[0])
|
|
2500
|
+
# ]
|
|
2501
|
+
for attr in by:
|
|
2502
|
+
coords[attr] = [getattr(img, attr) for img in collection]
|
|
2503
|
+
# coords["band"] = band_id #
|
|
2504
|
+
|
|
2505
|
+
dims = [*by, "y", "x"]
|
|
2506
|
+
# dims = ["band", "y", "x"]
|
|
2507
|
+
# dims = {}
|
|
2508
|
+
# for attr in by:
|
|
2509
|
+
# dims[attr] = [getattr(img, attr) for img in collection]
|
|
2510
|
+
|
|
2511
|
+
xarrs[name] = DataArray(
|
|
2512
|
+
values,
|
|
2513
|
+
coords=coords,
|
|
2514
|
+
dims=dims,
|
|
2515
|
+
# name=name,
|
|
2516
|
+
name=band_id,
|
|
2517
|
+
attrs={
|
|
2518
|
+
"crs": collection.crs,
|
|
2519
|
+
"band_id": band_id,
|
|
2520
|
+
}, # , "bounds": bounds},
|
|
2521
|
+
**kwargs,
|
|
2522
|
+
)
|
|
2218
2523
|
|
|
2219
|
-
|
|
2220
|
-
|
|
2221
|
-
first_band.transform, first_band.width, first_band.height
|
|
2222
|
-
)
|
|
2223
|
-
dims = ["image", "band", "y", "x"]
|
|
2224
|
-
return xr.DataArray(
|
|
2225
|
-
self.values,
|
|
2226
|
-
coords=coords,
|
|
2227
|
-
dims=dims,
|
|
2228
|
-
name=name,
|
|
2229
|
-
attrs={"crs": self.crs},
|
|
2230
|
-
)
|
|
2524
|
+
return xr.combine_by_coords(list(xarrs.values()))
|
|
2525
|
+
# return Dataset(xarrs)
|
|
2231
2526
|
|
|
2232
|
-
def
|
|
2527
|
+
def to_geopandas(self, column: str = "value") -> dict[str, GeoDataFrame]:
|
|
2233
2528
|
"""Convert each band in each Image to a GeoDataFrame."""
|
|
2234
2529
|
out = {}
|
|
2235
2530
|
i = 0
|
|
@@ -2241,10 +2536,8 @@ class ImageCollection(_ImageBase):
|
|
|
2241
2536
|
except AttributeError:
|
|
2242
2537
|
name = f"{self.__class__.__name__}({i})"
|
|
2243
2538
|
|
|
2244
|
-
# band.load()
|
|
2245
|
-
|
|
2246
2539
|
if name not in out:
|
|
2247
|
-
out[name] = band.
|
|
2540
|
+
out[name] = band.to_geopandas(column=column)
|
|
2248
2541
|
return out
|
|
2249
2542
|
|
|
2250
2543
|
def sample(self, n: int = 1, size: int = 500) -> "ImageCollection":
|
|
@@ -2384,36 +2677,22 @@ class ImageCollection(_ImageBase):
|
|
|
2384
2677
|
masking=self.masking,
|
|
2385
2678
|
**self._common_init_kwargs,
|
|
2386
2679
|
)
|
|
2680
|
+
|
|
2387
2681
|
if self.masking is not None:
|
|
2388
2682
|
images = []
|
|
2389
2683
|
for image in self._images:
|
|
2684
|
+
# TODO why this loop?
|
|
2390
2685
|
try:
|
|
2391
2686
|
if not isinstance(image.mask, Band):
|
|
2392
2687
|
raise ValueError()
|
|
2393
2688
|
images.append(image)
|
|
2394
|
-
except ValueError:
|
|
2689
|
+
except ValueError as e:
|
|
2690
|
+
raise e
|
|
2395
2691
|
continue
|
|
2396
2692
|
self._images = images
|
|
2397
2693
|
for image in self._images:
|
|
2398
2694
|
image._bands = [band for band in image if band.band_id is not None]
|
|
2399
2695
|
|
|
2400
|
-
if self.metadata is not None:
|
|
2401
|
-
attributes_to_add = ["crs", "bounds"] + list(self.metadata_attributes)
|
|
2402
|
-
for img in self:
|
|
2403
|
-
for band in img:
|
|
2404
|
-
for key in attributes_to_add:
|
|
2405
|
-
try:
|
|
2406
|
-
value = self.metadata[band.path][key]
|
|
2407
|
-
except KeyError:
|
|
2408
|
-
try:
|
|
2409
|
-
value = self.metadata[key][band.path]
|
|
2410
|
-
except KeyError:
|
|
2411
|
-
continue
|
|
2412
|
-
try:
|
|
2413
|
-
setattr(band, key, value)
|
|
2414
|
-
except Exception:
|
|
2415
|
-
setattr(band, f"_{key}", value)
|
|
2416
|
-
|
|
2417
2696
|
self._images = [img for img in self if len(img)]
|
|
2418
2697
|
|
|
2419
2698
|
if self._should_be_sorted:
|
|
@@ -2438,7 +2717,7 @@ class ImageCollection(_ImageBase):
|
|
|
2438
2717
|
and sort_group in _get_non_optional_groups(pat)
|
|
2439
2718
|
for pat in self.image_patterns
|
|
2440
2719
|
)
|
|
2441
|
-
or all(img
|
|
2720
|
+
or all(getattr(img, sort_group) is not None for img in self)
|
|
2442
2721
|
)
|
|
2443
2722
|
|
|
2444
2723
|
@images.setter
|
|
@@ -2449,7 +2728,18 @@ class ImageCollection(_ImageBase):
|
|
|
2449
2728
|
|
|
2450
2729
|
def __repr__(self) -> str:
|
|
2451
2730
|
"""String representation."""
|
|
2452
|
-
|
|
2731
|
+
root = ""
|
|
2732
|
+
if self.path is not None:
|
|
2733
|
+
data = f"'{self.path}'"
|
|
2734
|
+
elif all(img.path is not None for img in self):
|
|
2735
|
+
data = [img.path for img in self]
|
|
2736
|
+
parents = {str(Path(path).parent) for path in data}
|
|
2737
|
+
if len(parents) == 1:
|
|
2738
|
+
data = [Path(path).name for path in data]
|
|
2739
|
+
root = f" root='{next(iter(parents))}',"
|
|
2740
|
+
else:
|
|
2741
|
+
data = [img for img in self]
|
|
2742
|
+
return f"{self.__class__.__name__}({data},{root} res={self.res}, level='{self.level}')"
|
|
2453
2743
|
|
|
2454
2744
|
def union_all(self) -> Polygon | MultiPolygon:
|
|
2455
2745
|
"""(Multi)Polygon representing the union of all image bounds."""
|
|
@@ -2500,12 +2790,8 @@ class ImageCollection(_ImageBase):
|
|
|
2500
2790
|
|
|
2501
2791
|
alpha = 1 - p
|
|
2502
2792
|
|
|
2503
|
-
# for img in self:
|
|
2504
|
-
# for band in img:
|
|
2505
|
-
# band.load()
|
|
2506
|
-
|
|
2507
2793
|
for group_values, subcollection in self.groupby(by):
|
|
2508
|
-
print("
|
|
2794
|
+
print("subcollection group values:", group_values)
|
|
2509
2795
|
|
|
2510
2796
|
if "date" in x_var and subcollection._should_be_sorted:
|
|
2511
2797
|
subcollection._images = list(sorted(subcollection._images))
|
|
@@ -2519,6 +2805,7 @@ class ImageCollection(_ImageBase):
|
|
|
2519
2805
|
for band in img
|
|
2520
2806
|
]
|
|
2521
2807
|
)
|
|
2808
|
+
first_date = pd.Timestamp(x[0])
|
|
2522
2809
|
x = (
|
|
2523
2810
|
pd.to_datetime(
|
|
2524
2811
|
[band.date[:8] for img in subcollection for band in img]
|
|
@@ -2611,6 +2898,23 @@ class ImageCollection(_ImageBase):
|
|
|
2611
2898
|
)
|
|
2612
2899
|
plt.xlabel(x_var)
|
|
2613
2900
|
plt.ylabel(y_label)
|
|
2901
|
+
|
|
2902
|
+
if x_var == "date":
|
|
2903
|
+
date_labels = pd.to_datetime(
|
|
2904
|
+
[first_date + pd.Timedelta(days=int(day)) for day in this_x]
|
|
2905
|
+
)
|
|
2906
|
+
|
|
2907
|
+
_, unique_indices = np.unique(
|
|
2908
|
+
date_labels.strftime("%Y-%m"), return_index=True
|
|
2909
|
+
)
|
|
2910
|
+
|
|
2911
|
+
unique_x = np.array(this_x)[unique_indices]
|
|
2912
|
+
unique_labels = date_labels[unique_indices].strftime("%Y-%m")
|
|
2913
|
+
|
|
2914
|
+
ax.set_xticks(unique_x)
|
|
2915
|
+
ax.set_xticklabels(unique_labels, rotation=45, ha="right")
|
|
2916
|
+
# ax.tick_params(axis="x", length=10, width=2)
|
|
2917
|
+
|
|
2614
2918
|
plt.show()
|
|
2615
2919
|
|
|
2616
2920
|
|
|
@@ -2629,10 +2933,7 @@ class Sentinel2Config:
|
|
|
2629
2933
|
"""Holder of Sentinel 2 regexes, band_ids etc."""
|
|
2630
2934
|
|
|
2631
2935
|
image_regexes: ClassVar[str] = (config.SENTINEL2_IMAGE_REGEX,)
|
|
2632
|
-
filename_regexes: ClassVar[str] = (
|
|
2633
|
-
config.SENTINEL2_FILENAME_REGEX,
|
|
2634
|
-
config.SENTINEL2_CLOUD_FILENAME_REGEX,
|
|
2635
|
-
)
|
|
2936
|
+
filename_regexes: ClassVar[str] = (config.SENTINEL2_FILENAME_REGEX,)
|
|
2636
2937
|
metadata_attributes: ClassVar[
|
|
2637
2938
|
dict[str, Callable | functools.partial | tuple[str]]
|
|
2638
2939
|
] = {
|
|
@@ -2640,22 +2941,69 @@ class Sentinel2Config:
|
|
|
2640
2941
|
_extract_regex_match_from_string,
|
|
2641
2942
|
regexes=(r"<PROCESSING_BASELINE>(.*?)</PROCESSING_BASELINE>",),
|
|
2642
2943
|
),
|
|
2643
|
-
"
|
|
2644
|
-
"is_refined":
|
|
2645
|
-
|
|
2646
|
-
),
|
|
2647
|
-
"boa_add_offset": "_get_boa_add_offset_dict",
|
|
2944
|
+
"cloud_cover_percentage": "_get_cloud_cover_percentage",
|
|
2945
|
+
"is_refined": "_get_image_refining_flag",
|
|
2946
|
+
"boa_quantification_value": "_get_boa_quantification_value",
|
|
2648
2947
|
}
|
|
2649
|
-
|
|
2650
|
-
|
|
2651
|
-
|
|
2652
|
-
|
|
2653
|
-
|
|
2948
|
+
l1c_bands: ClassVar[set[str]] = {
|
|
2949
|
+
"B01": 60,
|
|
2950
|
+
"B02": 10,
|
|
2951
|
+
"B03": 10,
|
|
2952
|
+
"B04": 10,
|
|
2953
|
+
"B05": 20,
|
|
2954
|
+
"B06": 20,
|
|
2955
|
+
"B07": 20,
|
|
2956
|
+
"B08": 10,
|
|
2957
|
+
"B8A": 20,
|
|
2958
|
+
"B09": 60,
|
|
2959
|
+
"B10": 60,
|
|
2960
|
+
"B11": 20,
|
|
2961
|
+
"B12": 20,
|
|
2962
|
+
}
|
|
2963
|
+
l2a_bands: ClassVar[set[str]] = {
|
|
2964
|
+
key: res for key, res in l1c_bands.items() if key != "B10"
|
|
2965
|
+
}
|
|
2966
|
+
all_bands: ClassVar[set[str]] = l1c_bands
|
|
2967
|
+
rbg_bands: ClassVar[tuple[str]] = ("B04", "B02", "B03")
|
|
2968
|
+
ndvi_bands: ClassVar[tuple[str]] = ("B04", "B08")
|
|
2654
2969
|
masking: ClassVar[BandMasking] = BandMasking(
|
|
2655
|
-
band_id="SCL",
|
|
2970
|
+
band_id="SCL",
|
|
2971
|
+
values={
|
|
2972
|
+
2: "Topographic casted shadows",
|
|
2973
|
+
3: "Cloud shadows",
|
|
2974
|
+
8: "Cloud medium probability",
|
|
2975
|
+
9: "Cloud high probability",
|
|
2976
|
+
10: "Thin cirrus",
|
|
2977
|
+
11: "Snow or ice",
|
|
2978
|
+
},
|
|
2656
2979
|
)
|
|
2657
2980
|
|
|
2658
|
-
def
|
|
2981
|
+
def _get_image_refining_flag(self, xml_file: str) -> bool:
|
|
2982
|
+
match_ = re.search(
|
|
2983
|
+
r'Image_Refining flag="(?:REFINED|NOT_REFINED)"',
|
|
2984
|
+
xml_file,
|
|
2985
|
+
)
|
|
2986
|
+
if match_ is None:
|
|
2987
|
+
raise _RegexError()
|
|
2988
|
+
|
|
2989
|
+
if "NOT_REFINED" in match_.group(0):
|
|
2990
|
+
return False
|
|
2991
|
+
elif "REFINED" in match_.group(0):
|
|
2992
|
+
return True
|
|
2993
|
+
else:
|
|
2994
|
+
raise _RegexError()
|
|
2995
|
+
|
|
2996
|
+
def _get_boa_quantification_value(self, xml_file: str) -> int:
|
|
2997
|
+
return int(
|
|
2998
|
+
_extract_regex_match_from_string(
|
|
2999
|
+
xml_file,
|
|
3000
|
+
(
|
|
3001
|
+
r'<BOA_QUANTIFICATION_VALUE unit="none">-?(\d+)</BOA_QUANTIFICATION_VALUE>',
|
|
3002
|
+
),
|
|
3003
|
+
)
|
|
3004
|
+
)
|
|
3005
|
+
|
|
3006
|
+
def _get_cloud_cover_percentage(self, xml_file: str) -> float:
|
|
2659
3007
|
return float(
|
|
2660
3008
|
_extract_regex_match_from_string(
|
|
2661
3009
|
xml_file,
|
|
@@ -2666,7 +3014,35 @@ class Sentinel2Config:
|
|
|
2666
3014
|
)
|
|
2667
3015
|
)
|
|
2668
3016
|
|
|
2669
|
-
|
|
3017
|
+
|
|
3018
|
+
class Sentinel2CloudlessConfig(Sentinel2Config):
|
|
3019
|
+
"""Holder of regexes, band_ids etc. for Sentinel 2 cloudless mosaic."""
|
|
3020
|
+
|
|
3021
|
+
image_regexes: ClassVar[str] = (config.SENTINEL2_MOSAIC_IMAGE_REGEX,)
|
|
3022
|
+
filename_regexes: ClassVar[str] = (config.SENTINEL2_MOSAIC_FILENAME_REGEX,)
|
|
3023
|
+
masking: ClassVar[None] = None
|
|
3024
|
+
all_bands: ClassVar[list[str]] = [
|
|
3025
|
+
x.replace("B0", "B") for x in Sentinel2Config.all_bands
|
|
3026
|
+
]
|
|
3027
|
+
rbg_bands: ClassVar[dict[str, str]] = {
|
|
3028
|
+
key.replace("B0", "B") for key in Sentinel2Config.rbg_bands
|
|
3029
|
+
}
|
|
3030
|
+
ndvi_bands: ClassVar[dict[str, str]] = {
|
|
3031
|
+
key.replace("B0", "B") for key in Sentinel2Config.ndvi_bands
|
|
3032
|
+
}
|
|
3033
|
+
|
|
3034
|
+
|
|
3035
|
+
class Sentinel2Band(Sentinel2Config, Band):
|
|
3036
|
+
"""Band with Sentinel2 specific name variables and regexes."""
|
|
3037
|
+
|
|
3038
|
+
metadata_attributes = Sentinel2Config.metadata_attributes | {
|
|
3039
|
+
"boa_add_offset": "_get_boa_add_offset_dict",
|
|
3040
|
+
}
|
|
3041
|
+
|
|
3042
|
+
def _get_boa_add_offset_dict(self, xml_file: str) -> int | None:
|
|
3043
|
+
if self.is_mask:
|
|
3044
|
+
return None
|
|
3045
|
+
|
|
2670
3046
|
pat = re.compile(
|
|
2671
3047
|
r"""
|
|
2672
3048
|
<BOA_ADD_OFFSET\s*
|
|
@@ -2683,30 +3059,39 @@ class Sentinel2Config:
|
|
|
2683
3059
|
raise _RegexError(f"Could not find boa_add_offset info from {pat}") from e
|
|
2684
3060
|
if not matches:
|
|
2685
3061
|
raise _RegexError(f"Could not find boa_add_offset info from {pat}")
|
|
2686
|
-
|
|
3062
|
+
|
|
3063
|
+
dict_ = (
|
|
2687
3064
|
pd.DataFrame(matches).set_index("band_id")["value"].astype(int).to_dict()
|
|
2688
3065
|
)
|
|
2689
3066
|
|
|
3067
|
+
# some xml files have band ids in range index form
|
|
3068
|
+
# converting these to actual band ids (B01 etc.)
|
|
3069
|
+
is_integer_coded = [int(i) for i in dict_] == list(range(len(dict_)))
|
|
2690
3070
|
|
|
2691
|
-
|
|
2692
|
-
|
|
2693
|
-
|
|
2694
|
-
|
|
2695
|
-
|
|
2696
|
-
|
|
2697
|
-
|
|
2698
|
-
|
|
2699
|
-
|
|
2700
|
-
|
|
2701
|
-
|
|
2702
|
-
|
|
2703
|
-
ndvi_bands: ClassVar[list[str]] = [
|
|
2704
|
-
x.replace("B0", "B") for x in Sentinel2Config.ndvi_bands
|
|
2705
|
-
]
|
|
2706
|
-
|
|
3071
|
+
if is_integer_coded:
|
|
3072
|
+
# the xml files contain 13 bandIds for both L1C and L2A
|
|
3073
|
+
# eventhough L2A doesn't have band B10
|
|
3074
|
+
all_bands = list(self.l1c_bands)
|
|
3075
|
+
if len(all_bands) != len(dict_):
|
|
3076
|
+
raise ValueError(
|
|
3077
|
+
f"Different number of bands in xml file and config for {self.name}: {all_bands}, {list(dict_)}"
|
|
3078
|
+
)
|
|
3079
|
+
dict_ = {
|
|
3080
|
+
band_id: value
|
|
3081
|
+
for band_id, value in zip(all_bands, dict_.values(), strict=True)
|
|
3082
|
+
}
|
|
2707
3083
|
|
|
2708
|
-
|
|
2709
|
-
|
|
3084
|
+
try:
|
|
3085
|
+
return dict_[self.band_id]
|
|
3086
|
+
except KeyError as e:
|
|
3087
|
+
band_id = self.band_id.upper()
|
|
3088
|
+
for txt in ["B0", "B", "A"]:
|
|
3089
|
+
band_id = band_id.replace(txt, "")
|
|
3090
|
+
try:
|
|
3091
|
+
return dict_[band_id]
|
|
3092
|
+
except KeyError:
|
|
3093
|
+
continue
|
|
3094
|
+
raise KeyError(self.band_id, dict_) from e
|
|
2710
3095
|
|
|
2711
3096
|
|
|
2712
3097
|
class Sentinel2Image(Sentinel2Config, Image):
|
|
@@ -2716,12 +3101,15 @@ class Sentinel2Image(Sentinel2Config, Image):
|
|
|
2716
3101
|
|
|
2717
3102
|
def ndvi(
|
|
2718
3103
|
self,
|
|
2719
|
-
red_band: str =
|
|
2720
|
-
nir_band: str =
|
|
3104
|
+
red_band: str = "B04",
|
|
3105
|
+
nir_band: str = "B08",
|
|
3106
|
+
padding: int = 0,
|
|
2721
3107
|
copy: bool = True,
|
|
2722
3108
|
) -> NDVIBand:
|
|
2723
3109
|
"""Calculate the NDVI for the Image."""
|
|
2724
|
-
return super().ndvi(
|
|
3110
|
+
return super().ndvi(
|
|
3111
|
+
red_band=red_band, nir_band=nir_band, padding=padding, copy=copy
|
|
3112
|
+
)
|
|
2725
3113
|
|
|
2726
3114
|
|
|
2727
3115
|
class Sentinel2Collection(Sentinel2Config, ImageCollection):
|
|
@@ -2732,8 +3120,8 @@ class Sentinel2Collection(Sentinel2Config, ImageCollection):
|
|
|
2732
3120
|
|
|
2733
3121
|
def __init__(self, data: str | Path | Sequence[Image], **kwargs) -> None:
|
|
2734
3122
|
"""ImageCollection with Sentinel2 specific name variables and path regexes."""
|
|
2735
|
-
level = kwargs.get("level",
|
|
2736
|
-
if
|
|
3123
|
+
level = kwargs.get("level", None_)
|
|
3124
|
+
if callable(level) and isinstance(level(), None_):
|
|
2737
3125
|
raise ValueError("Must specify level for Sentinel2Collection.")
|
|
2738
3126
|
super().__init__(data=data, **kwargs)
|
|
2739
3127
|
|
|
@@ -2797,29 +3185,6 @@ def _get_gradient(band: Band, degrees: bool = False, copy: bool = True) -> Band:
|
|
|
2797
3185
|
raise ValueError("array must be 2 or 3 dimensional")
|
|
2798
3186
|
|
|
2799
3187
|
|
|
2800
|
-
def to_xarray(
|
|
2801
|
-
array: np.ndarray, transform: Affine, crs: Any, name: str | None = None
|
|
2802
|
-
) -> DataArray:
|
|
2803
|
-
"""Convert the raster to an xarray.DataArray."""
|
|
2804
|
-
if len(array.shape) == 2:
|
|
2805
|
-
height, width = array.shape
|
|
2806
|
-
dims = ["y", "x"]
|
|
2807
|
-
elif len(array.shape) == 3:
|
|
2808
|
-
height, width = array.shape[1:]
|
|
2809
|
-
dims = ["band", "y", "x"]
|
|
2810
|
-
else:
|
|
2811
|
-
raise ValueError(f"Array should be 2 or 3 dimensional. Got shape {array.shape}")
|
|
2812
|
-
|
|
2813
|
-
coords = _generate_spatial_coords(transform, width, height)
|
|
2814
|
-
return xr.DataArray(
|
|
2815
|
-
array,
|
|
2816
|
-
coords=coords,
|
|
2817
|
-
dims=dims,
|
|
2818
|
-
name=name,
|
|
2819
|
-
attrs={"crs": crs},
|
|
2820
|
-
)
|
|
2821
|
-
|
|
2822
|
-
|
|
2823
3188
|
def _slope_2d(array: np.ndarray, res: int, degrees: int) -> np.ndarray:
|
|
2824
3189
|
gradient_x, gradient_y = np.gradient(array, res, res)
|
|
2825
3190
|
|
|
@@ -2836,47 +3201,31 @@ def _slope_2d(array: np.ndarray, res: int, degrees: int) -> np.ndarray:
|
|
|
2836
3201
|
return degrees
|
|
2837
3202
|
|
|
2838
3203
|
|
|
2839
|
-
def
|
|
2840
|
-
|
|
2841
|
-
|
|
2842
|
-
transform: Affine,
|
|
3204
|
+
def _clip_xarray(
|
|
3205
|
+
xarr: DataArray,
|
|
3206
|
+
mask: tuple[int, int, int, int],
|
|
2843
3207
|
crs: Any,
|
|
2844
|
-
out_shape: tuple[int, int],
|
|
2845
3208
|
**kwargs,
|
|
2846
|
-
) ->
|
|
3209
|
+
) -> DataArray:
|
|
2847
3210
|
# xarray needs a numpy array of polygons
|
|
2848
|
-
|
|
3211
|
+
mask_arr: np.ndarray = to_geoseries(mask).values
|
|
2849
3212
|
try:
|
|
2850
|
-
|
|
2851
|
-
|
|
2852
|
-
|
|
2853
|
-
|
|
2854
|
-
|
|
2855
|
-
transform=transform,
|
|
2856
|
-
crs=crs,
|
|
2857
|
-
)
|
|
2858
|
-
.rio.clip(bounds_arr, crs=crs, **kwargs)
|
|
2859
|
-
.to_numpy()
|
|
2860
|
-
)
|
|
2861
|
-
# bounds_arr = bounds_arr.buffer(0.0000001)
|
|
2862
|
-
return arr
|
|
2863
|
-
|
|
3213
|
+
return xarr.rio.clip(
|
|
3214
|
+
mask_arr,
|
|
3215
|
+
crs=crs,
|
|
3216
|
+
**kwargs,
|
|
3217
|
+
)
|
|
2864
3218
|
except NoDataInBounds:
|
|
2865
3219
|
return np.array([])
|
|
2866
3220
|
|
|
2867
3221
|
|
|
2868
|
-
def
|
|
2869
|
-
return (
|
|
2870
|
-
str(path).replace("\\", "/").replace(r"\"", "/").replace("//", "/").rstrip("/")
|
|
2871
|
-
)
|
|
2872
|
-
|
|
2873
|
-
|
|
2874
|
-
def _get_all_file_paths(path: str) -> list[str]:
|
|
3222
|
+
def _get_all_file_paths(path: str) -> set[str]:
|
|
2875
3223
|
if is_dapla():
|
|
2876
|
-
return
|
|
3224
|
+
return {_fix_path(x) for x in sorted(set(_glob_func(path + "/**")))}
|
|
2877
3225
|
else:
|
|
2878
|
-
return
|
|
2879
|
-
|
|
3226
|
+
return {
|
|
3227
|
+
_fix_path(x)
|
|
3228
|
+
for x in sorted(
|
|
2880
3229
|
set(
|
|
2881
3230
|
_glob_func(path + "/**")
|
|
2882
3231
|
+ _glob_func(path + "/**/**")
|
|
@@ -2885,7 +3234,7 @@ def _get_all_file_paths(path: str) -> list[str]:
|
|
|
2885
3234
|
+ _glob_func(path + "/**/**/**/**/**")
|
|
2886
3235
|
)
|
|
2887
3236
|
)
|
|
2888
|
-
|
|
3237
|
+
}
|
|
2889
3238
|
|
|
2890
3239
|
|
|
2891
3240
|
def _get_images(
|
|
@@ -2900,9 +3249,8 @@ def _get_images(
|
|
|
2900
3249
|
masking: BandMasking | None,
|
|
2901
3250
|
**kwargs,
|
|
2902
3251
|
) -> list[Image]:
|
|
2903
|
-
|
|
2904
|
-
|
|
2905
|
-
images = parallel(
|
|
3252
|
+
with joblib.Parallel(n_jobs=processes, backend="threading") as parallel:
|
|
3253
|
+
images: list[Image] = parallel(
|
|
2906
3254
|
joblib.delayed(image_class)(
|
|
2907
3255
|
path,
|
|
2908
3256
|
df=df,
|
|
@@ -2942,7 +3290,7 @@ class PathlessImageError(ValueError):
|
|
|
2942
3290
|
what = "that have been merged"
|
|
2943
3291
|
elif self.instance._from_array:
|
|
2944
3292
|
what = "from arrays"
|
|
2945
|
-
elif self.instance.
|
|
3293
|
+
elif self.instance._from_geopandas:
|
|
2946
3294
|
what = "from GeoDataFrames"
|
|
2947
3295
|
else:
|
|
2948
3296
|
raise ValueError(self.instance)
|
|
@@ -3017,13 +3365,13 @@ def _copy_and_add_df_parallel(
|
|
|
3017
3365
|
return (i, copied)
|
|
3018
3366
|
|
|
3019
3367
|
|
|
3020
|
-
def _get_bounds(bounds, bbox) -> None | Polygon:
|
|
3368
|
+
def _get_bounds(bounds, bbox, band_bounds: Polygon) -> None | Polygon:
|
|
3021
3369
|
if bounds is None and bbox is None:
|
|
3022
3370
|
return None
|
|
3023
3371
|
elif bounds is not None and bbox is None:
|
|
3024
|
-
return to_shapely(bounds)
|
|
3372
|
+
return to_shapely(bounds).intersection(band_bounds)
|
|
3025
3373
|
elif bounds is None and bbox is not None:
|
|
3026
|
-
return to_shapely(bbox)
|
|
3374
|
+
return to_shapely(bbox).intersection(band_bounds)
|
|
3027
3375
|
else:
|
|
3028
3376
|
return to_shapely(bounds).intersection(to_shapely(bbox))
|
|
3029
3377
|
|
|
@@ -3041,7 +3389,15 @@ def _open_raster(path: str | Path) -> rasterio.io.DatasetReader:
|
|
|
3041
3389
|
|
|
3042
3390
|
|
|
3043
3391
|
def _load_band(band: Band, **kwargs) -> None:
|
|
3044
|
-
band.load(**kwargs)
|
|
3392
|
+
return band.load(**kwargs)
|
|
3393
|
+
|
|
3394
|
+
|
|
3395
|
+
def _band_apply(band: Band, func: Callable, **kwargs) -> None:
|
|
3396
|
+
return band.apply(func, **kwargs)
|
|
3397
|
+
|
|
3398
|
+
|
|
3399
|
+
def _clip_band(band: Band, mask, **kwargs) -> None:
|
|
3400
|
+
return band.clip(mask, **kwargs)
|
|
3045
3401
|
|
|
3046
3402
|
|
|
3047
3403
|
def _merge_by_band(collection: ImageCollection, **kwargs) -> Image:
|
|
@@ -3053,7 +3409,7 @@ def _merge(collection: ImageCollection, **kwargs) -> Band:
|
|
|
3053
3409
|
|
|
3054
3410
|
|
|
3055
3411
|
def _zonal_one_pair(i: int, poly: Polygon, band: Band, aggfunc, array_func, func_names):
|
|
3056
|
-
clipped = band.copy().
|
|
3412
|
+
clipped = band.copy().clip(poly)
|
|
3057
3413
|
if not np.size(clipped.values):
|
|
3058
3414
|
return _no_overlap_df(func_names, i, date=band.date)
|
|
3059
3415
|
return _aggregate(clipped.values, array_func, aggfunc, func_names, band.date, i)
|