ssb-sgis 1.0.8__py3-none-any.whl → 1.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sgis/geopandas_tools/cleaning.py +3 -3
- sgis/geopandas_tools/conversion.py +6 -5
- sgis/io/_is_dapla.py +2 -5
- sgis/io/dapla_functions.py +2 -0
- sgis/io/opener.py +2 -0
- sgis/maps/explore.py +18 -8
- sgis/maps/legend.py +3 -1
- sgis/maps/map.py +4 -0
- sgis/maps/thematicmap.py +53 -26
- sgis/raster/base.py +60 -23
- sgis/raster/image_collection.py +773 -658
- sgis/raster/regex.py +2 -2
- sgis/raster/zonal.py +1 -58
- {ssb_sgis-1.0.8.dist-info → ssb_sgis-1.0.10.dist-info}/METADATA +1 -2
- {ssb_sgis-1.0.8.dist-info → ssb_sgis-1.0.10.dist-info}/RECORD +17 -17
- {ssb_sgis-1.0.8.dist-info → ssb_sgis-1.0.10.dist-info}/LICENSE +0 -0
- {ssb_sgis-1.0.8.dist-info → ssb_sgis-1.0.10.dist-info}/WHEEL +0 -0
sgis/raster/image_collection.py
CHANGED
|
@@ -6,6 +6,7 @@ import os
|
|
|
6
6
|
import random
|
|
7
7
|
import re
|
|
8
8
|
import time
|
|
9
|
+
from abc import abstractmethod
|
|
9
10
|
from collections.abc import Callable
|
|
10
11
|
from collections.abc import Iterable
|
|
11
12
|
from collections.abc import Iterator
|
|
@@ -26,7 +27,6 @@ import rasterio
|
|
|
26
27
|
from affine import Affine
|
|
27
28
|
from geopandas import GeoDataFrame
|
|
28
29
|
from geopandas import GeoSeries
|
|
29
|
-
from matplotlib.colors import LinearSegmentedColormap
|
|
30
30
|
from pandas.api.types import is_dict_like
|
|
31
31
|
from rasterio.enums import MergeAlg
|
|
32
32
|
from scipy import stats
|
|
@@ -41,11 +41,8 @@ from shapely.geometry import Polygon
|
|
|
41
41
|
|
|
42
42
|
try:
|
|
43
43
|
import dapla as dp
|
|
44
|
-
from dapla.gcs import GCSFileSystem
|
|
45
44
|
except ImportError:
|
|
46
|
-
|
|
47
|
-
class GCSFileSystem:
|
|
48
|
-
"""Placeholder."""
|
|
45
|
+
pass
|
|
49
46
|
|
|
50
47
|
|
|
51
48
|
try:
|
|
@@ -55,7 +52,7 @@ except ImportError:
|
|
|
55
52
|
class exceptions:
|
|
56
53
|
"""Placeholder."""
|
|
57
54
|
|
|
58
|
-
class RefreshError:
|
|
55
|
+
class RefreshError(Exception):
|
|
59
56
|
"""Placeholder."""
|
|
60
57
|
|
|
61
58
|
|
|
@@ -74,9 +71,9 @@ try:
|
|
|
74
71
|
except ImportError:
|
|
75
72
|
pass
|
|
76
73
|
try:
|
|
77
|
-
import xarray as xr
|
|
78
74
|
from xarray import DataArray
|
|
79
75
|
from xarray import Dataset
|
|
76
|
+
from xarray import combine_by_coords
|
|
80
77
|
except ImportError:
|
|
81
78
|
|
|
82
79
|
class DataArray:
|
|
@@ -85,6 +82,9 @@ except ImportError:
|
|
|
85
82
|
class Dataset:
|
|
86
83
|
"""Placeholder."""
|
|
87
84
|
|
|
85
|
+
def combine_by_coords(*args, **kwargs) -> None:
|
|
86
|
+
raise ImportError("xarray")
|
|
87
|
+
|
|
88
88
|
|
|
89
89
|
from ..geopandas_tools.bounds import get_total_bounds
|
|
90
90
|
from ..geopandas_tools.conversion import to_bbox
|
|
@@ -102,8 +102,10 @@ from ..io.opener import opener
|
|
|
102
102
|
from . import sentinel_config as config
|
|
103
103
|
from .base import _array_to_geojson
|
|
104
104
|
from .base import _gdf_to_arr
|
|
105
|
+
from .base import _get_res_from_bounds
|
|
105
106
|
from .base import _get_shape_from_bounds
|
|
106
107
|
from .base import _get_transform_from_bounds
|
|
108
|
+
from .base import _res_as_tuple
|
|
107
109
|
from .base import get_index_mapper
|
|
108
110
|
from .indices import ndvi
|
|
109
111
|
from .regex import _extract_regex_match_from_string
|
|
@@ -142,8 +144,6 @@ DATE_RANGES_TYPE = (
|
|
|
142
144
|
| tuple[tuple[str | pd.Timestamp | None, str | pd.Timestamp | None], ...]
|
|
143
145
|
)
|
|
144
146
|
|
|
145
|
-
FILENAME_COL_SUFFIX = "_filename"
|
|
146
|
-
|
|
147
147
|
DEFAULT_FILENAME_REGEX = r"""
|
|
148
148
|
.*?
|
|
149
149
|
(?:_?(?P<date>\d{8}(?:T\d{6})?))? # Optional underscore and date group
|
|
@@ -163,13 +163,12 @@ ALLOWED_INIT_KWARGS = [
|
|
|
163
163
|
"filename_regexes",
|
|
164
164
|
"all_bands",
|
|
165
165
|
"crs",
|
|
166
|
-
"backend",
|
|
167
166
|
"masking",
|
|
168
167
|
"_merged",
|
|
169
168
|
"date",
|
|
170
169
|
]
|
|
171
170
|
|
|
172
|
-
|
|
171
|
+
_LOAD_COUNTER: int = 0
|
|
173
172
|
|
|
174
173
|
|
|
175
174
|
def _get_child_paths_threaded(data: Sequence[str]) -> set[str]:
|
|
@@ -178,6 +177,90 @@ def _get_child_paths_threaded(data: Sequence[str]) -> set[str]:
|
|
|
178
177
|
return set(itertools.chain.from_iterable(all_paths))
|
|
179
178
|
|
|
180
179
|
|
|
180
|
+
@dataclass
|
|
181
|
+
class PixelwiseResults:
|
|
182
|
+
"""Container of results from pixelwise operation to be converted."""
|
|
183
|
+
|
|
184
|
+
row_indices: np.ndarray
|
|
185
|
+
col_indices: np.ndarray
|
|
186
|
+
results: list[Any]
|
|
187
|
+
res: int | tuple[int, int]
|
|
188
|
+
bounds: tuple[float, float, float, float]
|
|
189
|
+
shape: tuple[int, int]
|
|
190
|
+
crs: Any
|
|
191
|
+
nodata: int | float | None
|
|
192
|
+
|
|
193
|
+
def to_tuple(self) -> tuple[int, int, Any]:
|
|
194
|
+
"""Return 3-length tuple of row indices, column indices and pixelwise results."""
|
|
195
|
+
return self.row_indices, self.col_indices, self.results
|
|
196
|
+
|
|
197
|
+
def to_dict(self) -> dict[tuple[int, int], Any]:
|
|
198
|
+
"""Return dictionary with row and column indices as keys and pixelwise results as values."""
|
|
199
|
+
return {
|
|
200
|
+
(int(row), int(col)): value
|
|
201
|
+
for row, col, value in zip(
|
|
202
|
+
self.row_indices, self.col_indices, self.results, strict=True
|
|
203
|
+
)
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
def to_geopandas(self, column: str = "value") -> GeoDataFrame:
|
|
207
|
+
"""Return GeoDataFrame with pixel geometries and values from the pixelwise operation."""
|
|
208
|
+
minx, miny = self.bounds[:2]
|
|
209
|
+
resx, resy = _res_as_tuple(self.res)
|
|
210
|
+
|
|
211
|
+
minxs = np.full(self.row_indices.shape, minx) + (minx * self.row_indices * resx)
|
|
212
|
+
minys = np.full(self.col_indices.shape, miny) + (miny * self.col_indices * resy)
|
|
213
|
+
maxxs = minxs + resx
|
|
214
|
+
maxys = minys + resy
|
|
215
|
+
|
|
216
|
+
return GeoDataFrame(
|
|
217
|
+
{
|
|
218
|
+
column: self.results,
|
|
219
|
+
"geometry": [
|
|
220
|
+
box(minx, miny, maxx, maxy)
|
|
221
|
+
for minx, miny, maxx, maxy in zip(
|
|
222
|
+
minxs, minys, maxxs, maxys, strict=True
|
|
223
|
+
)
|
|
224
|
+
],
|
|
225
|
+
},
|
|
226
|
+
index=[self.row_indices, self.col_indices],
|
|
227
|
+
crs=self.crs,
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
def to_numpy(self) -> np.ndarray | tuple[np.ndarray, ...]:
|
|
231
|
+
"""Reshape pixelwise results to 2d numpy arrays in the shape of the full arrays of the image bands."""
|
|
232
|
+
try:
|
|
233
|
+
n_out_arrays = len(next(iter(self.results)))
|
|
234
|
+
except TypeError:
|
|
235
|
+
n_out_arrays = 1
|
|
236
|
+
|
|
237
|
+
out_arrays = [
|
|
238
|
+
np.full(self.shape, self.nodata).astype(np.float64)
|
|
239
|
+
for _ in range(n_out_arrays)
|
|
240
|
+
]
|
|
241
|
+
|
|
242
|
+
for row, col, these_results in zip(
|
|
243
|
+
self.row_indices, self.col_indices, self.results, strict=True
|
|
244
|
+
):
|
|
245
|
+
if these_results is None:
|
|
246
|
+
continue
|
|
247
|
+
for i, arr in enumerate(out_arrays):
|
|
248
|
+
try:
|
|
249
|
+
arr[row, col] = these_results[i]
|
|
250
|
+
except TypeError:
|
|
251
|
+
arr[row, col] = these_results
|
|
252
|
+
|
|
253
|
+
for i, array in enumerate(out_arrays):
|
|
254
|
+
all_are_integers = np.all(np.mod(array, 1) == 0)
|
|
255
|
+
if all_are_integers:
|
|
256
|
+
out_arrays[i] = array.astype(int)
|
|
257
|
+
|
|
258
|
+
if len(out_arrays) == 1:
|
|
259
|
+
return out_arrays[0]
|
|
260
|
+
|
|
261
|
+
return tuple(out_arrays)
|
|
262
|
+
|
|
263
|
+
|
|
181
264
|
class ImageCollectionGroupBy:
|
|
182
265
|
"""Iterator and merger class returned from groupby.
|
|
183
266
|
|
|
@@ -196,7 +279,7 @@ class ImageCollectionGroupBy:
|
|
|
196
279
|
Args:
|
|
197
280
|
data: Iterable of group values and ImageCollection groups.
|
|
198
281
|
by: list of group attributes.
|
|
199
|
-
collection: ImageCollection
|
|
282
|
+
collection: Ungrouped ImageCollection. Used to pass attributes to outputs.
|
|
200
283
|
"""
|
|
201
284
|
self.data = list(data)
|
|
202
285
|
self.by = by
|
|
@@ -291,7 +374,7 @@ class ImageCollectionGroupBy:
|
|
|
291
374
|
|
|
292
375
|
def __repr__(self) -> str:
|
|
293
376
|
"""String representation."""
|
|
294
|
-
return f"{self.__class__.__name__}({len(self)})"
|
|
377
|
+
return f"{self.__class__.__name__}({len(self)}, by={self.by})"
|
|
295
378
|
|
|
296
379
|
|
|
297
380
|
@dataclass(frozen=True)
|
|
@@ -307,7 +390,11 @@ class BandMasking:
|
|
|
307
390
|
|
|
308
391
|
|
|
309
392
|
class None_:
|
|
310
|
-
"""Default
|
|
393
|
+
"""Default None for args that are not allowed to be None."""
|
|
394
|
+
|
|
395
|
+
def __new__(cls) -> None:
|
|
396
|
+
"""Always returns None."""
|
|
397
|
+
return None
|
|
311
398
|
|
|
312
399
|
|
|
313
400
|
class _ImageBase:
|
|
@@ -318,18 +405,16 @@ class _ImageBase:
|
|
|
318
405
|
|
|
319
406
|
def __init__(self, *, metadata=None, bbox=None, **kwargs) -> None:
|
|
320
407
|
|
|
321
|
-
self._mask = None
|
|
322
408
|
self._bounds = None
|
|
323
|
-
self._merged = False
|
|
324
|
-
self._from_array = False
|
|
325
|
-
self._from_geopandas = False
|
|
326
|
-
self.metadata_attributes = self.metadata_attributes or {}
|
|
327
409
|
self._path = None
|
|
328
|
-
self._metadata_from_xml = False
|
|
329
|
-
|
|
330
410
|
self._bbox = to_bbox(bbox) if bbox is not None else None
|
|
331
411
|
|
|
332
|
-
self.
|
|
412
|
+
self.metadata_attributes = self.metadata_attributes or {}
|
|
413
|
+
|
|
414
|
+
if metadata is not None:
|
|
415
|
+
self.metadata = self._metadata_to_nested_dict(metadata)
|
|
416
|
+
else:
|
|
417
|
+
self.metadata = {}
|
|
333
418
|
|
|
334
419
|
self.image_patterns = self._compile_regexes("image_regexes")
|
|
335
420
|
self.filename_patterns = self._compile_regexes("filename_regexes")
|
|
@@ -339,29 +424,45 @@ class _ImageBase:
|
|
|
339
424
|
f"{self.__class__.__name__} got an unexpected keyword argument '{key}'"
|
|
340
425
|
)
|
|
341
426
|
if key in ALLOWED_INIT_KWARGS and key in dir(self):
|
|
342
|
-
|
|
343
|
-
setattr(self, f"_{key}", value)
|
|
344
|
-
elif is_method(self, key):
|
|
345
|
-
raise error_obj
|
|
346
|
-
else:
|
|
347
|
-
setattr(self, key, value)
|
|
427
|
+
self._safe_setattr(key, value, error_obj)
|
|
348
428
|
else:
|
|
349
429
|
raise error_obj
|
|
350
430
|
|
|
431
|
+
# attributes for debugging
|
|
432
|
+
self._metadata_from_xml = False
|
|
433
|
+
self._merged = False
|
|
434
|
+
self._from_array = False
|
|
435
|
+
self._from_geopandas = False
|
|
436
|
+
|
|
437
|
+
def _safe_setattr(
|
|
438
|
+
self, key: str, value: Any, error_obj: Exception | None = None
|
|
439
|
+
) -> None:
|
|
440
|
+
if is_property(self, key):
|
|
441
|
+
setattr(self, f"_{key}", value)
|
|
442
|
+
elif is_method(self, key):
|
|
443
|
+
if error_obj is None:
|
|
444
|
+
raise AttributeError(f"Cannot set method '{key}'.")
|
|
445
|
+
raise error_obj
|
|
446
|
+
else:
|
|
447
|
+
setattr(self, key, value)
|
|
448
|
+
|
|
351
449
|
def _compile_regexes(self, regex_attr: str) -> tuple[re.Pattern]:
|
|
352
|
-
regexes = getattr(self, regex_attr)
|
|
353
|
-
if regexes:
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
return ()
|
|
450
|
+
regexes: tuple[str] | str = getattr(self, regex_attr)
|
|
451
|
+
if not regexes:
|
|
452
|
+
return ()
|
|
453
|
+
if isinstance(regexes, str):
|
|
454
|
+
regexes = (regexes,)
|
|
455
|
+
return tuple(re.compile(regexes, flags=re.VERBOSE) for regexes in regexes)
|
|
358
456
|
|
|
359
457
|
@staticmethod
|
|
360
458
|
def _metadata_to_nested_dict(
|
|
361
459
|
metadata: str | Path | os.PathLike | dict | pd.DataFrame | None,
|
|
362
|
-
) -> dict[str, dict[str, Any]]
|
|
363
|
-
|
|
364
|
-
|
|
460
|
+
) -> dict[str, dict[str, Any]]:
|
|
461
|
+
"""Construct metadata dict from dictlike, DataFrame or file path.
|
|
462
|
+
|
|
463
|
+
Extract metadata value:
|
|
464
|
+
>>> self.metadata[self.path]['cloud_cover_percentage']
|
|
465
|
+
"""
|
|
365
466
|
if isinstance(metadata, (str | Path | os.PathLike)):
|
|
366
467
|
metadata = _read_parquet_func(metadata)
|
|
367
468
|
|
|
@@ -376,15 +477,16 @@ class _ImageBase:
|
|
|
376
477
|
return x if not (is_scalar(x) and pd.isna(x)) else None
|
|
377
478
|
|
|
378
479
|
# to nested dict because pandas indexing gives rare KeyError with long strings
|
|
379
|
-
|
|
480
|
+
return {
|
|
380
481
|
_fix_path(path): {
|
|
381
482
|
attr: na_to_none(value) for attr, value in row.items()
|
|
382
483
|
}
|
|
383
484
|
for path, row in metadata.iterrows()
|
|
384
485
|
}
|
|
385
486
|
elif is_dict_like(metadata):
|
|
386
|
-
|
|
487
|
+
return {_fix_path(path): value for path, value in metadata.items()}
|
|
387
488
|
|
|
489
|
+
# try to allow custom types with dict-like indexing
|
|
388
490
|
return metadata
|
|
389
491
|
|
|
390
492
|
@property
|
|
@@ -394,7 +496,6 @@ class _ImageBase:
|
|
|
394
496
|
"res": self.res,
|
|
395
497
|
"bbox": self._bbox,
|
|
396
498
|
"nodata": self.nodata,
|
|
397
|
-
"backend": self.backend,
|
|
398
499
|
"metadata": self.metadata,
|
|
399
500
|
}
|
|
400
501
|
|
|
@@ -408,19 +509,22 @@ class _ImageBase:
|
|
|
408
509
|
@property
|
|
409
510
|
def res(self) -> int:
|
|
410
511
|
"""Pixel resolution."""
|
|
512
|
+
# if self._res is None:
|
|
513
|
+
# if self.has_array:
|
|
514
|
+
# self._res = _get_res_from_bounds(self.bounds, self.values.shape)
|
|
515
|
+
# else:
|
|
516
|
+
# with opener(self.path) as file:
|
|
517
|
+
# with rasterio.open(file) as src:
|
|
518
|
+
# self._res = src.res
|
|
411
519
|
return self._res
|
|
412
520
|
|
|
413
|
-
@
|
|
414
|
-
def
|
|
415
|
-
|
|
416
|
-
return self.union_all().centroid
|
|
521
|
+
@abstractmethod
|
|
522
|
+
def union_all(self) -> Polygon | MultiPolygon:
|
|
523
|
+
pass
|
|
417
524
|
|
|
418
525
|
def assign(self, **kwargs) -> "_ImageBase":
|
|
419
526
|
for key, value in kwargs.items():
|
|
420
|
-
|
|
421
|
-
setattr(self, key, value)
|
|
422
|
-
except AttributeError:
|
|
423
|
-
setattr(self, f"_{key}", value)
|
|
527
|
+
self._safe_setattr(key, value)
|
|
424
528
|
return self
|
|
425
529
|
|
|
426
530
|
def _name_regex_searcher(
|
|
@@ -451,7 +555,10 @@ class _ImageBase:
|
|
|
451
555
|
)
|
|
452
556
|
|
|
453
557
|
def _create_metadata_df(self, file_paths: Sequence[str]) -> pd.DataFrame:
|
|
454
|
-
"""Create a dataframe with file paths and image paths that match regexes.
|
|
558
|
+
"""Create a dataframe with file paths and image paths that match regexes.
|
|
559
|
+
|
|
560
|
+
Used in __init__ to select relevant paths fast.
|
|
561
|
+
"""
|
|
455
562
|
df = pd.DataFrame({"file_path": list(file_paths)})
|
|
456
563
|
|
|
457
564
|
df["file_name"] = df["file_path"].apply(lambda x: Path(x).name)
|
|
@@ -518,12 +625,14 @@ class _ImageBase:
|
|
|
518
625
|
class _ImageBandBase(_ImageBase):
|
|
519
626
|
"""Common parent class of Image and Band."""
|
|
520
627
|
|
|
521
|
-
def intersects(
|
|
522
|
-
|
|
523
|
-
|
|
628
|
+
def intersects(
|
|
629
|
+
self, geometry: GeoDataFrame | GeoSeries | Geometry | tuple | _ImageBase
|
|
630
|
+
) -> bool:
|
|
631
|
+
if hasattr(geometry, "crs") and not pyproj.CRS(self.crs).equals(
|
|
632
|
+
pyproj.CRS(geometry.crs)
|
|
524
633
|
):
|
|
525
|
-
raise ValueError(f"crs mismatch: {self.crs} and {
|
|
526
|
-
return self.union_all().intersects(to_shapely(
|
|
634
|
+
raise ValueError(f"crs mismatch: {self.crs} and {geometry.crs}")
|
|
635
|
+
return self.union_all().intersects(to_shapely(geometry))
|
|
527
636
|
|
|
528
637
|
def union_all(self) -> Polygon:
|
|
529
638
|
try:
|
|
@@ -532,20 +641,21 @@ class _ImageBandBase(_ImageBase):
|
|
|
532
641
|
return Polygon()
|
|
533
642
|
|
|
534
643
|
@property
|
|
535
|
-
def
|
|
536
|
-
|
|
644
|
+
def centroid(self) -> Point:
|
|
645
|
+
"""Centerpoint of the object."""
|
|
646
|
+
return self.union_all().centroid
|
|
537
647
|
|
|
538
648
|
@property
|
|
539
649
|
def year(self) -> str:
|
|
540
650
|
if hasattr(self, "_year") and self._year:
|
|
541
651
|
return self._year
|
|
542
|
-
return self.date[:4]
|
|
652
|
+
return str(self.date)[:4]
|
|
543
653
|
|
|
544
654
|
@property
|
|
545
655
|
def month(self) -> str:
|
|
546
656
|
if hasattr(self, "_month") and self._month:
|
|
547
657
|
return self._month
|
|
548
|
-
return
|
|
658
|
+
return str(self.date).replace("-", "").replace("/", "")[4:6]
|
|
549
659
|
|
|
550
660
|
@property
|
|
551
661
|
def name(self) -> str | None:
|
|
@@ -572,62 +682,66 @@ class _ImageBandBase(_ImageBase):
|
|
|
572
682
|
return self._name_regex_searcher("level", self.image_patterns)
|
|
573
683
|
|
|
574
684
|
def _get_metadata_attributes(self, metadata_attributes: dict) -> dict:
|
|
575
|
-
|
|
685
|
+
"""Search through xml files for missing metadata attributes."""
|
|
576
686
|
self._metadata_from_xml = True
|
|
577
687
|
|
|
578
688
|
missing_metadata_attributes = {
|
|
579
|
-
|
|
580
|
-
for
|
|
581
|
-
if not hasattr(self,
|
|
689
|
+
attr: constructor_func
|
|
690
|
+
for attr, constructor_func in metadata_attributes.items()
|
|
691
|
+
if not hasattr(self, attr) or getattr(self, attr) is None
|
|
582
692
|
}
|
|
583
693
|
|
|
584
694
|
nonmissing_metadata_attributes = {
|
|
585
|
-
|
|
586
|
-
for
|
|
587
|
-
if
|
|
695
|
+
attr: getattr(self, attr)
|
|
696
|
+
for attr in metadata_attributes
|
|
697
|
+
if attr not in missing_metadata_attributes
|
|
588
698
|
}
|
|
589
699
|
|
|
590
700
|
if not missing_metadata_attributes:
|
|
591
701
|
return nonmissing_metadata_attributes
|
|
592
702
|
|
|
593
|
-
|
|
703
|
+
# read all xml content once
|
|
704
|
+
file_contents: dict[str, str] = {}
|
|
594
705
|
for path in self._all_file_paths:
|
|
595
706
|
if ".xml" not in path:
|
|
596
707
|
continue
|
|
597
708
|
with _open_func(path, "rb") as file:
|
|
598
|
-
file_contents
|
|
709
|
+
file_contents[path] = file.read().decode("utf-8")
|
|
599
710
|
|
|
600
|
-
|
|
711
|
+
def is_last_xml(i: int) -> bool:
|
|
712
|
+
return i == len(file_contents) - 1
|
|
713
|
+
|
|
714
|
+
for attr, value in missing_metadata_attributes.items():
|
|
601
715
|
results = None
|
|
602
|
-
for i,
|
|
716
|
+
for i, file_content in enumerate(file_contents.values()):
|
|
603
717
|
if isinstance(value, str) and value in dir(self):
|
|
604
|
-
method
|
|
718
|
+
# method or a hardcoded value
|
|
719
|
+
value: Callable | Any = getattr(self, value)
|
|
720
|
+
|
|
721
|
+
if callable(value):
|
|
605
722
|
try:
|
|
606
|
-
results =
|
|
723
|
+
results = value(file_content)
|
|
607
724
|
except _RegexError as e:
|
|
608
|
-
if i
|
|
609
|
-
raise e
|
|
725
|
+
if is_last_xml(i):
|
|
726
|
+
raise e.__class__(self.path, list(file_contents), e) from e
|
|
610
727
|
continue
|
|
611
728
|
if results is not None:
|
|
612
729
|
break
|
|
613
|
-
|
|
614
|
-
|
|
730
|
+
elif (
|
|
731
|
+
isinstance(value, str)
|
|
732
|
+
or hasattr(value, "__iter__")
|
|
733
|
+
and all(isinstance(x, str | re.Pattern) for x in value)
|
|
734
|
+
):
|
|
615
735
|
try:
|
|
616
|
-
results = value
|
|
736
|
+
results = _extract_regex_match_from_string(file_content, value)
|
|
617
737
|
except _RegexError as e:
|
|
618
|
-
if i
|
|
738
|
+
if is_last_xml(i):
|
|
619
739
|
raise e
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
740
|
+
elif value is not None:
|
|
741
|
+
results = value
|
|
742
|
+
break
|
|
623
743
|
|
|
624
|
-
|
|
625
|
-
results = _extract_regex_match_from_string(filetext, value)
|
|
626
|
-
except _RegexError as e:
|
|
627
|
-
if i == len(self._all_file_paths) - 1:
|
|
628
|
-
raise e
|
|
629
|
-
|
|
630
|
-
missing_metadata_attributes[key] = results
|
|
744
|
+
missing_metadata_attributes[attr] = results
|
|
631
745
|
|
|
632
746
|
return missing_metadata_attributes | nonmissing_metadata_attributes
|
|
633
747
|
|
|
@@ -671,14 +785,15 @@ class Band(_ImageBandBase):
|
|
|
671
785
|
"""Band holding a single 2 dimensional array representing an image band."""
|
|
672
786
|
|
|
673
787
|
cmap: ClassVar[str | None] = None
|
|
674
|
-
backend: str = "numpy"
|
|
675
788
|
|
|
676
789
|
@classmethod
|
|
677
790
|
def from_geopandas(
|
|
678
791
|
cls,
|
|
679
792
|
gdf: GeoDataFrame | GeoSeries,
|
|
680
|
-
res: int,
|
|
681
793
|
*,
|
|
794
|
+
res: int | None = None,
|
|
795
|
+
out_shape: tuple[int, int] | None = None,
|
|
796
|
+
bounds: Any | None = None,
|
|
682
797
|
fill: int = 0,
|
|
683
798
|
all_touched: bool = False,
|
|
684
799
|
merge_alg: Callable = MergeAlg.replace,
|
|
@@ -687,17 +802,27 @@ class Band(_ImageBandBase):
|
|
|
687
802
|
**kwargs,
|
|
688
803
|
) -> None:
|
|
689
804
|
"""Create Band from a GeoDataFrame."""
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
805
|
+
if bounds is not None:
|
|
806
|
+
bounds = to_bbox(bounds)
|
|
807
|
+
|
|
808
|
+
if out_shape == (0,):
|
|
809
|
+
arr = np.array([])
|
|
810
|
+
else:
|
|
811
|
+
arr = _gdf_to_arr(
|
|
812
|
+
gdf,
|
|
813
|
+
res=res,
|
|
814
|
+
bounds=bounds,
|
|
815
|
+
fill=fill,
|
|
816
|
+
all_touched=all_touched,
|
|
817
|
+
merge_alg=merge_alg,
|
|
818
|
+
default_value=default_value,
|
|
819
|
+
dtype=dtype,
|
|
820
|
+
out_shape=out_shape,
|
|
821
|
+
)
|
|
822
|
+
if bounds is None:
|
|
823
|
+
bounds = gdf.total_bounds
|
|
699
824
|
|
|
700
|
-
obj = cls(arr,
|
|
825
|
+
obj = cls(arr, crs=gdf.crs, bounds=bounds, **kwargs)
|
|
701
826
|
obj._from_geopandas = True
|
|
702
827
|
return obj
|
|
703
828
|
|
|
@@ -717,9 +842,6 @@ class Band(_ImageBandBase):
|
|
|
717
842
|
**kwargs,
|
|
718
843
|
) -> None:
|
|
719
844
|
"""Band initialiser."""
|
|
720
|
-
if callable(res) and isinstance(res(), None_):
|
|
721
|
-
raise TypeError("Must specify 'res'")
|
|
722
|
-
|
|
723
845
|
if data is None:
|
|
724
846
|
# allowing 'path' to replace 'data' as argument
|
|
725
847
|
# to make the print repr. valid as initialiser
|
|
@@ -745,11 +867,20 @@ class Band(_ImageBandBase):
|
|
|
745
867
|
if isinstance(data, np.ndarray):
|
|
746
868
|
if self._bounds is None:
|
|
747
869
|
raise ValueError("Must specify bounds when data is an array.")
|
|
870
|
+
if not (res is None or (callable(res) and res() is None)):
|
|
871
|
+
# if not (res is None or (callable(res) and res() is None)) and _res_as_tuple(
|
|
872
|
+
# res
|
|
873
|
+
# ) != _get_res_from_bounds(self._bounds, data.shape):
|
|
874
|
+
raise ValueError(
|
|
875
|
+
f"Cannot specify 'res' when data is an array. {res} and {_get_res_from_bounds(self._bounds, data.shape)}"
|
|
876
|
+
)
|
|
748
877
|
self._crs = crs
|
|
749
878
|
self.transform = _get_transform_from_bounds(self._bounds, shape=data.shape)
|
|
750
879
|
self._from_array = True
|
|
751
880
|
self.values = data
|
|
752
881
|
|
|
882
|
+
self._res = _get_res_from_bounds(self._bounds, self.values.shape)
|
|
883
|
+
|
|
753
884
|
elif not isinstance(data, (str | Path | os.PathLike)):
|
|
754
885
|
raise TypeError(
|
|
755
886
|
"'data' must be string, Path-like or numpy.ndarray. "
|
|
@@ -757,8 +888,8 @@ class Band(_ImageBandBase):
|
|
|
757
888
|
)
|
|
758
889
|
else:
|
|
759
890
|
self._path = _fix_path(str(data))
|
|
891
|
+
self._res = res if not (callable(res) and res() is None) else None
|
|
760
892
|
|
|
761
|
-
self._res = res
|
|
762
893
|
if cmap is not None:
|
|
763
894
|
self.cmap = cmap
|
|
764
895
|
self._name = name
|
|
@@ -786,7 +917,7 @@ class Band(_ImageBandBase):
|
|
|
786
917
|
else:
|
|
787
918
|
setattr(self, key, value)
|
|
788
919
|
|
|
789
|
-
elif self.metadata_attributes and self.path is not None
|
|
920
|
+
elif self.metadata_attributes and self.path is not None:
|
|
790
921
|
if self._all_file_paths is None:
|
|
791
922
|
self._all_file_paths = _get_all_file_paths(str(Path(self.path).parent))
|
|
792
923
|
for key, value in self._get_metadata_attributes(
|
|
@@ -798,43 +929,28 @@ class Band(_ImageBandBase):
|
|
|
798
929
|
"""Makes Bands sortable by band_id."""
|
|
799
930
|
return self.band_id < other.band_id
|
|
800
931
|
|
|
932
|
+
def value_counts(self) -> pd.Series:
|
|
933
|
+
"""Value count of each value of the band's array."""
|
|
934
|
+
try:
|
|
935
|
+
values = self.values.data[self.values.mask == False]
|
|
936
|
+
except AttributeError:
|
|
937
|
+
values = self.values
|
|
938
|
+
unique_values, counts = np.unique(values, return_counts=True)
|
|
939
|
+
return pd.Series(counts, index=unique_values).sort_values(ascending=False)
|
|
940
|
+
|
|
801
941
|
@property
|
|
802
942
|
def values(self) -> np.ndarray:
|
|
803
943
|
"""The numpy array, if loaded."""
|
|
804
944
|
if self._values is None:
|
|
805
|
-
raise
|
|
945
|
+
raise _ArrayNotLoadedError("array is not loaded.")
|
|
806
946
|
return self._values
|
|
807
947
|
|
|
808
948
|
@values.setter
|
|
809
949
|
def values(self, new_val):
|
|
810
|
-
if
|
|
950
|
+
if isinstance(new_val, np.ndarray):
|
|
811
951
|
self._values = new_val
|
|
812
|
-
|
|
813
|
-
elif self.backend == "xarray" and isinstance(new_val, DataArray):
|
|
814
|
-
# attrs can dissappear, so doing a union
|
|
815
|
-
attrs = self._values.attrs | new_val.attrs
|
|
816
|
-
self._values = new_val
|
|
817
|
-
self._values.attrs = attrs
|
|
818
|
-
return
|
|
819
|
-
|
|
820
|
-
if self.backend == "numpy":
|
|
952
|
+
else:
|
|
821
953
|
self._values = self._to_numpy(new_val)
|
|
822
|
-
if self.backend == "xarray":
|
|
823
|
-
if not isinstance(self._values, DataArray):
|
|
824
|
-
self._values = self._to_xarray(
|
|
825
|
-
new_val,
|
|
826
|
-
transform=self.transform,
|
|
827
|
-
)
|
|
828
|
-
|
|
829
|
-
elif isinstance(new_val, np.ndarray):
|
|
830
|
-
self._values.values = new_val
|
|
831
|
-
else:
|
|
832
|
-
self._values = new_val
|
|
833
|
-
|
|
834
|
-
@property
|
|
835
|
-
def mask(self) -> "Band":
|
|
836
|
-
"""Mask Band."""
|
|
837
|
-
return self._mask
|
|
838
954
|
|
|
839
955
|
@property
|
|
840
956
|
def band_id(self) -> str:
|
|
@@ -921,28 +1037,39 @@ class Band(_ImageBandBase):
|
|
|
921
1037
|
return df
|
|
922
1038
|
|
|
923
1039
|
def clip(
|
|
924
|
-
self,
|
|
1040
|
+
self,
|
|
1041
|
+
mask: GeoDataFrame | GeoSeries | Polygon | MultiPolygon,
|
|
925
1042
|
) -> "Band":
|
|
926
|
-
"""Clip band values to geometry mask."""
|
|
1043
|
+
"""Clip band values to geometry mask while preserving bounds."""
|
|
927
1044
|
if not self.height or not self.width:
|
|
928
1045
|
return self
|
|
929
1046
|
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
1047
|
+
fill: int = self.nodata or 0
|
|
1048
|
+
|
|
1049
|
+
mask_array: np.ndarray = Band.from_geopandas(
|
|
1050
|
+
gdf=to_gdf(mask)[["geometry"]],
|
|
1051
|
+
default_value=1,
|
|
1052
|
+
fill=fill,
|
|
1053
|
+
out_shape=self.values.shape,
|
|
1054
|
+
bounds=mask,
|
|
1055
|
+
).values
|
|
1056
|
+
|
|
1057
|
+
is_not_polygon = mask_array == fill
|
|
1058
|
+
|
|
1059
|
+
if isinstance(self.values, np.ma.core.MaskedArray):
|
|
1060
|
+
self._values.mask |= is_not_polygon
|
|
1061
|
+
else:
|
|
1062
|
+
self._values = np.ma.array(
|
|
1063
|
+
self.values, mask=is_not_polygon, fill_value=self.nodata
|
|
1064
|
+
)
|
|
1065
|
+
|
|
939
1066
|
return self
|
|
940
1067
|
|
|
941
1068
|
def load(
|
|
942
1069
|
self,
|
|
943
1070
|
bounds: tuple | Geometry | GeoDataFrame | GeoSeries | None = None,
|
|
944
1071
|
indexes: int | tuple[int] | None = None,
|
|
945
|
-
masked: bool
|
|
1072
|
+
masked: bool = True,
|
|
946
1073
|
file_system=None,
|
|
947
1074
|
**kwargs,
|
|
948
1075
|
) -> "Band":
|
|
@@ -950,11 +1077,10 @@ class Band(_ImageBandBase):
|
|
|
950
1077
|
|
|
951
1078
|
The array is stored in the 'values' property.
|
|
952
1079
|
"""
|
|
953
|
-
global
|
|
954
|
-
|
|
1080
|
+
global _LOAD_COUNTER
|
|
1081
|
+
_LOAD_COUNTER += 1
|
|
955
1082
|
|
|
956
|
-
|
|
957
|
-
masked = True if self.mask is None else False
|
|
1083
|
+
_masking = kwargs.pop("_masking", self.masking)
|
|
958
1084
|
|
|
959
1085
|
bounds_was_none = bounds is None
|
|
960
1086
|
|
|
@@ -963,12 +1089,9 @@ class Band(_ImageBandBase):
|
|
|
963
1089
|
should_return_empty: bool = bounds is not None and bounds.area == 0
|
|
964
1090
|
if should_return_empty:
|
|
965
1091
|
self._values = np.array([])
|
|
966
|
-
if self.mask is not None and not self.is_mask:
|
|
967
|
-
self._mask = self._mask.load(
|
|
968
|
-
bounds=bounds, indexes=indexes, file_system=file_system
|
|
969
|
-
)
|
|
970
1092
|
self._bounds = None
|
|
971
1093
|
self.transform = None
|
|
1094
|
+
# activate setter
|
|
972
1095
|
self.values = self._values
|
|
973
1096
|
|
|
974
1097
|
return self
|
|
@@ -978,7 +1101,6 @@ class Band(_ImageBandBase):
|
|
|
978
1101
|
|
|
979
1102
|
if bounds is not None:
|
|
980
1103
|
minx, miny, maxx, maxy = to_bbox(bounds)
|
|
981
|
-
## round down/up to integer to avoid precision trouble
|
|
982
1104
|
# bounds = (int(minx), int(miny), math.ceil(maxx), math.ceil(maxy))
|
|
983
1105
|
bounds = minx, miny, maxx, maxy
|
|
984
1106
|
|
|
@@ -992,20 +1114,19 @@ class Band(_ImageBandBase):
|
|
|
992
1114
|
out_shape = kwargs.pop("out_shape", None)
|
|
993
1115
|
|
|
994
1116
|
if self.has_array and [int(x) for x in bounds] != [int(x) for x in self.bounds]:
|
|
995
|
-
print(self)
|
|
996
|
-
print(self.mask)
|
|
997
|
-
print(self.values.shape)
|
|
998
|
-
print([int(x) for x in bounds], [int(x) for x in self.bounds])
|
|
999
1117
|
raise ValueError(
|
|
1000
1118
|
"Cannot re-load array with different bounds. "
|
|
1001
1119
|
"Use .copy() to read with different bounds. "
|
|
1002
|
-
"Or .clip(mask) to clip."
|
|
1120
|
+
"Or .clip(mask) to clip.",
|
|
1121
|
+
self,
|
|
1122
|
+
self.values.shape,
|
|
1123
|
+
[int(x) for x in bounds],
|
|
1124
|
+
[int(x) for x in self.bounds],
|
|
1003
1125
|
)
|
|
1004
|
-
|
|
1126
|
+
|
|
1005
1127
|
with opener(self.path, file_system=file_system) as f:
|
|
1006
1128
|
with rasterio.open(f, nodata=self.nodata) as src:
|
|
1007
|
-
self._res =
|
|
1008
|
-
|
|
1129
|
+
self._res = src.res if not self.res else self.res
|
|
1009
1130
|
if self.nodata is None or np.isnan(self.nodata):
|
|
1010
1131
|
self.nodata = src.nodata
|
|
1011
1132
|
else:
|
|
@@ -1018,7 +1139,7 @@ class Band(_ImageBandBase):
|
|
|
1018
1139
|
)
|
|
1019
1140
|
|
|
1020
1141
|
if bounds is None:
|
|
1021
|
-
if self._res !=
|
|
1142
|
+
if self._res != src.res:
|
|
1022
1143
|
if out_shape is None:
|
|
1023
1144
|
out_shape = _get_shape_from_bounds(
|
|
1024
1145
|
to_bbox(src.bounds), self.res, indexes
|
|
@@ -1070,18 +1191,12 @@ class Band(_ImageBandBase):
|
|
|
1070
1191
|
else:
|
|
1071
1192
|
values[values == src.nodata] = self.nodata
|
|
1072
1193
|
|
|
1073
|
-
if
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
elif self.mask is not None and not isinstance(values, np.ma.core.MaskedArray):
|
|
1077
|
-
|
|
1078
|
-
if not self.mask.has_array:
|
|
1079
|
-
self._mask = self.mask.load(
|
|
1080
|
-
bounds=bounds, indexes=indexes, out_shape=out_shape, **kwargs
|
|
1081
|
-
)
|
|
1082
|
-
mask_arr = self.mask.values
|
|
1083
|
-
|
|
1194
|
+
if _masking and not isinstance(values, np.ma.core.MaskedArray):
|
|
1195
|
+
mask_arr = _read_mask_array(self, bounds=bounds)
|
|
1084
1196
|
values = np.ma.array(values, mask=mask_arr, fill_value=self.nodata)
|
|
1197
|
+
elif _masking:
|
|
1198
|
+
mask_arr = _read_mask_array(self, bounds=bounds)
|
|
1199
|
+
values.mask |= mask_arr
|
|
1085
1200
|
|
|
1086
1201
|
if bounds is not None:
|
|
1087
1202
|
self._bounds = to_bbox(bounds)
|
|
@@ -1092,13 +1207,6 @@ class Band(_ImageBandBase):
|
|
|
1092
1207
|
|
|
1093
1208
|
return self
|
|
1094
1209
|
|
|
1095
|
-
@property
|
|
1096
|
-
def is_mask(self) -> bool:
|
|
1097
|
-
"""True if the band_id is equal to the masking band_id."""
|
|
1098
|
-
if self.masking is None:
|
|
1099
|
-
return False
|
|
1100
|
-
return self.band_id == self.masking["band_id"]
|
|
1101
|
-
|
|
1102
1210
|
@property
|
|
1103
1211
|
def has_array(self) -> bool:
|
|
1104
1212
|
"""Whether the array is loaded."""
|
|
@@ -1106,7 +1214,7 @@ class Band(_ImageBandBase):
|
|
|
1106
1214
|
if not isinstance(self.values, (np.ndarray | DataArray)):
|
|
1107
1215
|
raise ValueError()
|
|
1108
1216
|
return True
|
|
1109
|
-
except ValueError: # also catches
|
|
1217
|
+
except ValueError: # also catches _ArrayNotLoadedError
|
|
1110
1218
|
return False
|
|
1111
1219
|
|
|
1112
1220
|
def write(
|
|
@@ -1126,10 +1234,17 @@ class Band(_ImageBandBase):
|
|
|
1126
1234
|
if self.crs is None:
|
|
1127
1235
|
raise ValueError("Cannot write None crs to image.")
|
|
1128
1236
|
|
|
1237
|
+
if self.nodata:
|
|
1238
|
+
# TODO take out .data if masked?
|
|
1239
|
+
values_with_nodata = np.concatenate(
|
|
1240
|
+
[self.values.flatten(), np.array([self.nodata])]
|
|
1241
|
+
)
|
|
1242
|
+
else:
|
|
1243
|
+
values_with_nodata = self.values
|
|
1129
1244
|
profile = {
|
|
1130
1245
|
"driver": driver,
|
|
1131
1246
|
"compress": compress,
|
|
1132
|
-
"dtype": rasterio.dtypes.get_minimum_dtype(
|
|
1247
|
+
"dtype": rasterio.dtypes.get_minimum_dtype(values_with_nodata),
|
|
1133
1248
|
"crs": self.crs,
|
|
1134
1249
|
"transform": self.transform,
|
|
1135
1250
|
"nodata": self.nodata,
|
|
@@ -1138,19 +1253,18 @@ class Band(_ImageBandBase):
|
|
|
1138
1253
|
"width": self.width,
|
|
1139
1254
|
} | kwargs
|
|
1140
1255
|
|
|
1141
|
-
# with opener(path, "wb", file_system=self.file_system) as f:
|
|
1142
1256
|
with opener(path, "wb", file_system=file_system) as f:
|
|
1143
1257
|
with rasterio.open(f, "w", **profile) as dst:
|
|
1144
1258
|
|
|
1145
1259
|
if dst.nodata is None:
|
|
1146
1260
|
dst.nodata = _get_dtype_min(dst.dtypes[0])
|
|
1147
1261
|
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1262
|
+
if (
|
|
1263
|
+
isinstance(self.values, np.ma.core.MaskedArray)
|
|
1264
|
+
and dst.nodata is not None
|
|
1265
|
+
):
|
|
1266
|
+
self.values.data[np.isnan(self.values.data)] = dst.nodata
|
|
1267
|
+
self.values.data[self.values.mask] = dst.nodata
|
|
1154
1268
|
|
|
1155
1269
|
if len(self.values.shape) == 2:
|
|
1156
1270
|
dst.write(self.values, indexes=1)
|
|
@@ -1238,7 +1352,7 @@ class Band(_ImageBandBase):
|
|
|
1238
1352
|
The gradient will be 1 (1 meter up for every meter forward).
|
|
1239
1353
|
The calculation is by default done in place to save memory.
|
|
1240
1354
|
|
|
1241
|
-
>>> band.gradient()
|
|
1355
|
+
>>> band.gradient(copy=False)
|
|
1242
1356
|
>>> band.values
|
|
1243
1357
|
array([[0., 1., 1., 1., 0.],
|
|
1244
1358
|
[1., 1., 1., 1., 1.],
|
|
@@ -1299,11 +1413,13 @@ class Band(_ImageBandBase):
|
|
|
1299
1413
|
dropna=dropna,
|
|
1300
1414
|
)
|
|
1301
1415
|
|
|
1302
|
-
def to_geopandas(self, column: str = "value") -> GeoDataFrame:
|
|
1416
|
+
def to_geopandas(self, column: str = "value", dropna: bool = True) -> GeoDataFrame:
|
|
1303
1417
|
"""Create a GeoDataFrame from the image Band.
|
|
1304
1418
|
|
|
1305
1419
|
Args:
|
|
1306
1420
|
column: Name of resulting column that holds the raster values.
|
|
1421
|
+
dropna: Whether to remove values that are NA or equal to the nodata
|
|
1422
|
+
value.
|
|
1307
1423
|
|
|
1308
1424
|
Returns:
|
|
1309
1425
|
A GeoDataFrame with a geometry column and array values.
|
|
@@ -1311,24 +1427,28 @@ class Band(_ImageBandBase):
|
|
|
1311
1427
|
if not hasattr(self, "_values"):
|
|
1312
1428
|
raise ValueError("Array is not loaded.")
|
|
1313
1429
|
|
|
1430
|
+
if isinstance(self.values, np.ma.core.MaskedArray):
|
|
1431
|
+
self.values.data[self.values.mask] = self.nodata or 0
|
|
1314
1432
|
if self.values.shape[0] == 0:
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
|
|
1433
|
+
df = GeoDataFrame({"geometry": []}, crs=self.crs)
|
|
1434
|
+
else:
|
|
1435
|
+
df = GeoDataFrame(
|
|
1436
|
+
pd.DataFrame(
|
|
1437
|
+
_array_to_geojson(
|
|
1438
|
+
self.values, self.transform, processes=self.processes
|
|
1439
|
+
),
|
|
1440
|
+
columns=[column, "geometry"],
|
|
1321
1441
|
),
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1442
|
+
geometry="geometry",
|
|
1443
|
+
crs=self.crs,
|
|
1444
|
+
)
|
|
1445
|
+
|
|
1446
|
+
if dropna:
|
|
1447
|
+
return df[(df[column] != self.nodata) & (df[column].notna())]
|
|
1448
|
+
return df
|
|
1327
1449
|
|
|
1328
1450
|
def to_xarray(self) -> DataArray:
|
|
1329
1451
|
"""Convert the raster to an xarray.DataArray."""
|
|
1330
|
-
if self.backend == "xarray":
|
|
1331
|
-
return self.values
|
|
1332
1452
|
return self._to_xarray(
|
|
1333
1453
|
self.values,
|
|
1334
1454
|
transform=self.transform,
|
|
@@ -1345,19 +1465,6 @@ class Band(_ImageBandBase):
|
|
|
1345
1465
|
if not isinstance(arr, np.ndarray):
|
|
1346
1466
|
mask_arr = None
|
|
1347
1467
|
if masked:
|
|
1348
|
-
# if self.mask is not None:
|
|
1349
|
-
# print(self.mask.values.shape, arr.shape)
|
|
1350
|
-
# if self.mask is not None and self.mask.values.shape == arr.shape:
|
|
1351
|
-
# print("hei", self.mask.values.sum())
|
|
1352
|
-
# mask_arr = self.mask.values
|
|
1353
|
-
# else:
|
|
1354
|
-
# mask_arr = np.full(arr.shape, False)
|
|
1355
|
-
# try:
|
|
1356
|
-
# print("hei222", arr.isnull().values.sum())
|
|
1357
|
-
# mask_arr |= arr.isnull().values
|
|
1358
|
-
# except AttributeError:
|
|
1359
|
-
# pass
|
|
1360
|
-
# mask_arr = np.full(arr.shape, False)
|
|
1361
1468
|
try:
|
|
1362
1469
|
mask_arr = arr.isnull().values
|
|
1363
1470
|
except AttributeError:
|
|
@@ -1374,11 +1481,11 @@ class Band(_ImageBandBase):
|
|
|
1374
1481
|
|
|
1375
1482
|
if (
|
|
1376
1483
|
masked
|
|
1377
|
-
and self.mask is not None
|
|
1378
|
-
and not self.is_mask
|
|
1379
1484
|
and not isinstance(arr, np.ma.core.MaskedArray)
|
|
1485
|
+
and mask_arr is not None
|
|
1380
1486
|
):
|
|
1381
1487
|
arr = np.ma.array(arr, mask=mask_arr, fill_value=self.nodata)
|
|
1488
|
+
|
|
1382
1489
|
return arr
|
|
1383
1490
|
|
|
1384
1491
|
def __repr__(self) -> str:
|
|
@@ -1401,10 +1508,6 @@ class NDVIBand(Band):
|
|
|
1401
1508
|
|
|
1402
1509
|
cmap: str = "Greens"
|
|
1403
1510
|
|
|
1404
|
-
# @staticmethod
|
|
1405
|
-
# def get_cmap(arr: np.ndarray):
|
|
1406
|
-
# return get_cmap(arr)
|
|
1407
|
-
|
|
1408
1511
|
|
|
1409
1512
|
def median_as_int_and_minimum_dtype(arr: np.ndarray) -> np.ndarray:
|
|
1410
1513
|
arr = np.median(arr, axis=0).astype(int)
|
|
@@ -1416,12 +1519,12 @@ class Image(_ImageBandBase):
|
|
|
1416
1519
|
"""Image consisting of one or more Bands."""
|
|
1417
1520
|
|
|
1418
1521
|
band_class: ClassVar[Band] = Band
|
|
1419
|
-
backend: str = "numpy"
|
|
1420
1522
|
|
|
1421
1523
|
def __init__(
|
|
1422
1524
|
self,
|
|
1423
1525
|
data: str | Path | Sequence[Band] | None = None,
|
|
1424
|
-
res: int |
|
|
1526
|
+
res: int | None_ = None_,
|
|
1527
|
+
mask: "Band | None" = None,
|
|
1425
1528
|
processes: int = 1,
|
|
1426
1529
|
df: pd.DataFrame | None = None,
|
|
1427
1530
|
nodata: int | None = None,
|
|
@@ -1442,20 +1545,27 @@ class Image(_ImageBandBase):
|
|
|
1442
1545
|
self.processes = processes
|
|
1443
1546
|
self._crs = None
|
|
1444
1547
|
self._bands = None
|
|
1548
|
+
self._mask = mask
|
|
1549
|
+
|
|
1550
|
+
if isinstance(data, Band):
|
|
1551
|
+
data = [data]
|
|
1445
1552
|
|
|
1446
1553
|
if hasattr(data, "__iter__") and all(isinstance(x, Band) for x in data):
|
|
1447
1554
|
self._construct_image_from_bands(data, res)
|
|
1448
1555
|
return
|
|
1449
1556
|
elif not isinstance(data, (str | Path | os.PathLike)):
|
|
1450
|
-
raise TypeError(
|
|
1557
|
+
raise TypeError(
|
|
1558
|
+
f"'data' must be string, Path-like or a sequence of Band. Got {data}"
|
|
1559
|
+
)
|
|
1451
1560
|
|
|
1452
|
-
self._res = res
|
|
1561
|
+
self._res = res if not (callable(res) and res() is None) else None
|
|
1453
1562
|
self._path = _fix_path(data)
|
|
1454
1563
|
|
|
1455
1564
|
if all_file_paths is None and self.path:
|
|
1456
1565
|
self._all_file_paths = _get_all_file_paths(self.path)
|
|
1457
1566
|
elif self.path:
|
|
1458
|
-
|
|
1567
|
+
name = Path(self.path).name
|
|
1568
|
+
all_file_paths = {_fix_path(x) for x in all_file_paths if name in x}
|
|
1459
1569
|
self._all_file_paths = {x for x in all_file_paths if self.path in x}
|
|
1460
1570
|
else:
|
|
1461
1571
|
self._all_file_paths = None
|
|
@@ -1467,11 +1577,7 @@ class Image(_ImageBandBase):
|
|
|
1467
1577
|
|
|
1468
1578
|
df["image_path"] = df["image_path"].astype(str)
|
|
1469
1579
|
|
|
1470
|
-
cols_to_explode = [
|
|
1471
|
-
"file_path",
|
|
1472
|
-
"file_name",
|
|
1473
|
-
*[x for x in df if FILENAME_COL_SUFFIX in x],
|
|
1474
|
-
]
|
|
1580
|
+
cols_to_explode = ["file_path", "file_name"]
|
|
1475
1581
|
try:
|
|
1476
1582
|
df = df.explode(cols_to_explode, ignore_index=True)
|
|
1477
1583
|
except ValueError:
|
|
@@ -1499,20 +1605,92 @@ class Image(_ImageBandBase):
|
|
|
1499
1605
|
else:
|
|
1500
1606
|
setattr(self, key, value)
|
|
1501
1607
|
|
|
1502
|
-
|
|
1608
|
+
elif self.metadata_attributes and self.path is not None:
|
|
1503
1609
|
for key, value in self._get_metadata_attributes(
|
|
1504
1610
|
self.metadata_attributes
|
|
1505
1611
|
).items():
|
|
1506
1612
|
setattr(self, key, value)
|
|
1507
1613
|
|
|
1614
|
+
def clip(
|
|
1615
|
+
self, mask: GeoDataFrame | GeoSeries | Polygon | MultiPolygon, copy: bool = True
|
|
1616
|
+
) -> "Image":
|
|
1617
|
+
"""Clip band values to geometry mask while preserving bounds."""
|
|
1618
|
+
copied = self.copy() if copy else self
|
|
1619
|
+
|
|
1620
|
+
fill: int = self.nodata or 0
|
|
1621
|
+
|
|
1622
|
+
mask_array: np.ndarray = Band.from_geopandas(
|
|
1623
|
+
gdf=to_gdf(mask)[["geometry"]],
|
|
1624
|
+
default_value=1,
|
|
1625
|
+
fill=fill,
|
|
1626
|
+
out_shape=next(iter(self)).values.shape,
|
|
1627
|
+
bounds=self.bounds,
|
|
1628
|
+
).values
|
|
1629
|
+
|
|
1630
|
+
is_not_polygon = mask_array == fill
|
|
1631
|
+
|
|
1632
|
+
for band in copied:
|
|
1633
|
+
if isinstance(band.values, np.ma.core.MaskedArray):
|
|
1634
|
+
band._values.mask |= is_not_polygon
|
|
1635
|
+
else:
|
|
1636
|
+
band._values = np.ma.array(
|
|
1637
|
+
band.values, mask=is_not_polygon, fill_value=band.nodata
|
|
1638
|
+
)
|
|
1639
|
+
|
|
1640
|
+
return copied
|
|
1641
|
+
|
|
1642
|
+
def load(
|
|
1643
|
+
self,
|
|
1644
|
+
bounds: tuple | Geometry | GeoDataFrame | GeoSeries | None = None,
|
|
1645
|
+
indexes: int | tuple[int] | None = None,
|
|
1646
|
+
file_system=None,
|
|
1647
|
+
**kwargs,
|
|
1648
|
+
) -> "ImageCollection":
|
|
1649
|
+
"""Load all image Bands with threading."""
|
|
1650
|
+
if bounds is None and indexes is None and all(band.has_array for band in self):
|
|
1651
|
+
return self
|
|
1652
|
+
|
|
1653
|
+
if self.masking:
|
|
1654
|
+
mask_array: np.ndarray = _read_mask_array(
|
|
1655
|
+
self,
|
|
1656
|
+
bounds=bounds,
|
|
1657
|
+
indexes=indexes,
|
|
1658
|
+
file_system=file_system,
|
|
1659
|
+
**kwargs,
|
|
1660
|
+
)
|
|
1661
|
+
|
|
1662
|
+
with joblib.Parallel(n_jobs=self.processes, backend="threading") as parallel:
|
|
1663
|
+
parallel(
|
|
1664
|
+
joblib.delayed(_load_band)(
|
|
1665
|
+
band,
|
|
1666
|
+
bounds=bounds,
|
|
1667
|
+
indexes=indexes,
|
|
1668
|
+
file_system=file_system,
|
|
1669
|
+
_masking=None,
|
|
1670
|
+
**kwargs,
|
|
1671
|
+
)
|
|
1672
|
+
for band in self
|
|
1673
|
+
)
|
|
1674
|
+
|
|
1675
|
+
if self.masking:
|
|
1676
|
+
for band in self:
|
|
1677
|
+
if isinstance(band.values, np.ma.core.MaskedArray):
|
|
1678
|
+
band.values.mask |= mask_array
|
|
1679
|
+
else:
|
|
1680
|
+
band.values = np.ma.array(
|
|
1681
|
+
band.values, mask=mask_array, fill_value=self.nodata
|
|
1682
|
+
)
|
|
1683
|
+
|
|
1684
|
+
return self
|
|
1685
|
+
|
|
1508
1686
|
def _construct_image_from_bands(
|
|
1509
1687
|
self, data: Sequence[Band], res: int | None
|
|
1510
1688
|
) -> None:
|
|
1511
1689
|
self._bands = list(data)
|
|
1512
1690
|
if res is None:
|
|
1513
|
-
res =
|
|
1691
|
+
res = {band.res for band in self.bands}
|
|
1514
1692
|
if len(res) == 1:
|
|
1515
|
-
self._res = res
|
|
1693
|
+
self._res = next(iter(res))
|
|
1516
1694
|
else:
|
|
1517
1695
|
raise ValueError(f"Different resolutions for the bands: {res}")
|
|
1518
1696
|
else:
|
|
@@ -1558,8 +1736,7 @@ class Image(_ImageBandBase):
|
|
|
1558
1736
|
arr,
|
|
1559
1737
|
bounds=red.bounds,
|
|
1560
1738
|
crs=red.crs,
|
|
1561
|
-
|
|
1562
|
-
**red._common_init_kwargs,
|
|
1739
|
+
**{k: v for k, v in red._common_init_kwargs.items() if k != "res"},
|
|
1563
1740
|
)
|
|
1564
1741
|
|
|
1565
1742
|
def get_brightness(
|
|
@@ -1590,81 +1767,16 @@ class Image(_ImageBandBase):
|
|
|
1590
1767
|
brightness,
|
|
1591
1768
|
bounds=red.bounds,
|
|
1592
1769
|
crs=self.crs,
|
|
1593
|
-
|
|
1594
|
-
**self._common_init_kwargs,
|
|
1770
|
+
**{k: v for k, v in self._common_init_kwargs.items() if k != "res"},
|
|
1595
1771
|
)
|
|
1596
1772
|
|
|
1597
1773
|
def to_xarray(self) -> DataArray:
|
|
1598
1774
|
"""Convert the raster to an xarray.DataArray."""
|
|
1599
|
-
if self.backend == "xarray":
|
|
1600
|
-
return self.values
|
|
1601
|
-
|
|
1602
1775
|
return self._to_xarray(
|
|
1603
1776
|
np.array([band.values for band in self]),
|
|
1604
1777
|
transform=self[0].transform,
|
|
1605
1778
|
)
|
|
1606
1779
|
|
|
1607
|
-
@property
|
|
1608
|
-
def mask(self) -> Band | None:
|
|
1609
|
-
"""Mask Band."""
|
|
1610
|
-
if self.masking is None:
|
|
1611
|
-
return None
|
|
1612
|
-
|
|
1613
|
-
elif self._mask is not None:
|
|
1614
|
-
return self._mask
|
|
1615
|
-
|
|
1616
|
-
elif self._bands is not None and all(band.mask is not None for band in self):
|
|
1617
|
-
if len({id(band.mask) for band in self}) > 1:
|
|
1618
|
-
raise ValueError(
|
|
1619
|
-
"Image bands must have same mask.",
|
|
1620
|
-
{id(band.mask) for band in self},
|
|
1621
|
-
) # TODO
|
|
1622
|
-
self._mask = next(
|
|
1623
|
-
iter([band.mask for band in self if band.mask is not None])
|
|
1624
|
-
)
|
|
1625
|
-
return self._mask
|
|
1626
|
-
|
|
1627
|
-
mask_band_id = self.masking["band_id"]
|
|
1628
|
-
mask_paths = [path for path in self._all_file_paths if mask_band_id in path]
|
|
1629
|
-
if len(mask_paths) > 1:
|
|
1630
|
-
raise ValueError(
|
|
1631
|
-
f"Multiple file_paths match mask band_id {mask_band_id} for {self.path}"
|
|
1632
|
-
)
|
|
1633
|
-
elif not mask_paths:
|
|
1634
|
-
raise ValueError(
|
|
1635
|
-
f"No file_paths match mask band_id {mask_band_id} for {self.path} among "
|
|
1636
|
-
+ str([Path(x).name for x in _ls_func(self.path)])
|
|
1637
|
-
)
|
|
1638
|
-
|
|
1639
|
-
self._mask = self.band_class(
|
|
1640
|
-
mask_paths[0],
|
|
1641
|
-
**self._common_init_kwargs,
|
|
1642
|
-
)
|
|
1643
|
-
if self._bands is not None:
|
|
1644
|
-
for band in self:
|
|
1645
|
-
band._mask = self._mask
|
|
1646
|
-
return self._mask
|
|
1647
|
-
|
|
1648
|
-
@mask.setter
|
|
1649
|
-
def mask(self, values: Band | None) -> None:
|
|
1650
|
-
if values is None:
|
|
1651
|
-
self._mask = None
|
|
1652
|
-
for band in self:
|
|
1653
|
-
band._mask = None
|
|
1654
|
-
return
|
|
1655
|
-
if not isinstance(values, Band):
|
|
1656
|
-
raise TypeError(f"mask must be Band. Got {type(values)}")
|
|
1657
|
-
self._mask = values
|
|
1658
|
-
mask_arr = self._mask.values
|
|
1659
|
-
for band in self:
|
|
1660
|
-
band._mask = self._mask
|
|
1661
|
-
try:
|
|
1662
|
-
band.values = np.ma.array(
|
|
1663
|
-
band.values.data, mask=mask_arr, fill_value=band.nodata
|
|
1664
|
-
)
|
|
1665
|
-
except ArrayNotLoadedError:
|
|
1666
|
-
pass
|
|
1667
|
-
|
|
1668
1780
|
@property
|
|
1669
1781
|
def band_ids(self) -> list[str]:
|
|
1670
1782
|
"""The Band ids."""
|
|
@@ -1687,12 +1799,9 @@ class Image(_ImageBandBase):
|
|
|
1687
1799
|
else:
|
|
1688
1800
|
paths = self._df["file_path"]
|
|
1689
1801
|
|
|
1690
|
-
mask = self.mask
|
|
1691
|
-
|
|
1692
1802
|
self._bands = [
|
|
1693
1803
|
self.band_class(
|
|
1694
1804
|
path,
|
|
1695
|
-
mask=mask,
|
|
1696
1805
|
all_file_paths=self._all_file_paths,
|
|
1697
1806
|
**self._common_init_kwargs,
|
|
1698
1807
|
)
|
|
@@ -1901,13 +2010,12 @@ class ImageCollection(_ImageBase):
|
|
|
1901
2010
|
image_class: ClassVar[Image] = Image
|
|
1902
2011
|
band_class: ClassVar[Band] = Band
|
|
1903
2012
|
_metadata_attribute_collection_type: ClassVar[type] = pd.Series
|
|
1904
|
-
backend: str = "numpy"
|
|
1905
2013
|
|
|
1906
2014
|
def __init__(
|
|
1907
2015
|
self,
|
|
1908
2016
|
data: str | Path | Sequence[Image] | Sequence[str | Path],
|
|
1909
|
-
res: int,
|
|
1910
|
-
level: str | None = None_,
|
|
2017
|
+
res: int | None_ = None_,
|
|
2018
|
+
level: str | None_ | None = None_,
|
|
1911
2019
|
processes: int = 1,
|
|
1912
2020
|
metadata: str | dict | pd.DataFrame | None = None,
|
|
1913
2021
|
nodata: int | None = None,
|
|
@@ -1923,13 +2031,13 @@ class ImageCollection(_ImageBase):
|
|
|
1923
2031
|
|
|
1924
2032
|
super().__init__(metadata=metadata, **kwargs)
|
|
1925
2033
|
|
|
1926
|
-
if callable(level) and
|
|
2034
|
+
if callable(level) and level() is None:
|
|
1927
2035
|
level = None
|
|
1928
2036
|
|
|
1929
2037
|
self.nodata = nodata
|
|
1930
2038
|
self.level = level
|
|
1931
2039
|
self.processes = processes
|
|
1932
|
-
self._res = res
|
|
2040
|
+
self._res = res if not (callable(res) and res() is None) else None
|
|
1933
2041
|
self._crs = None
|
|
1934
2042
|
|
|
1935
2043
|
self._df = None
|
|
@@ -1944,13 +2052,19 @@ class ImageCollection(_ImageBase):
|
|
|
1944
2052
|
elif all(isinstance(x, (str | Path | os.PathLike)) for x in data):
|
|
1945
2053
|
# adding band paths (asuming 'data' is a sequence of image paths)
|
|
1946
2054
|
try:
|
|
1947
|
-
self._all_file_paths = _get_child_paths_threaded(data) |
|
|
2055
|
+
self._all_file_paths = _get_child_paths_threaded(data) | {
|
|
2056
|
+
_fix_path(x) for x in data
|
|
2057
|
+
}
|
|
1948
2058
|
except FileNotFoundError as e:
|
|
1949
2059
|
if _from_root:
|
|
1950
2060
|
raise TypeError(
|
|
1951
|
-
"When passing 'root', 'data' must be a sequence of image names that have 'root' as parent path."
|
|
2061
|
+
"When passing 'root', 'data' must be a sequence of image file names that have 'root' as parent path."
|
|
1952
2062
|
) from e
|
|
1953
2063
|
raise e
|
|
2064
|
+
if self.level:
|
|
2065
|
+
self._all_file_paths = [
|
|
2066
|
+
path for path in self._all_file_paths if self.level in path
|
|
2067
|
+
]
|
|
1954
2068
|
self._df = self._create_metadata_df(self._all_file_paths)
|
|
1955
2069
|
return
|
|
1956
2070
|
|
|
@@ -1968,7 +2082,9 @@ class ImageCollection(_ImageBase):
|
|
|
1968
2082
|
|
|
1969
2083
|
self._df = self._create_metadata_df(self._all_file_paths)
|
|
1970
2084
|
|
|
1971
|
-
def groupby(
|
|
2085
|
+
def groupby(
|
|
2086
|
+
self, by: str | list[str], copy: bool = True, **kwargs
|
|
2087
|
+
) -> ImageCollectionGroupBy:
|
|
1972
2088
|
"""Group the Collection by Image or Band attribute(s)."""
|
|
1973
2089
|
df = pd.DataFrame(
|
|
1974
2090
|
[(i, img) for i, img in enumerate(self) for _ in img],
|
|
@@ -1995,8 +2111,10 @@ class ImageCollection(_ImageBase):
|
|
|
1995
2111
|
return ImageCollectionGroupBy(
|
|
1996
2112
|
sorted(
|
|
1997
2113
|
parallel(
|
|
1998
|
-
joblib.delayed(_copy_and_add_df_parallel)(
|
|
1999
|
-
|
|
2114
|
+
joblib.delayed(_copy_and_add_df_parallel)(
|
|
2115
|
+
group_values, group_df, self, copy
|
|
2116
|
+
)
|
|
2117
|
+
for group_values, group_df in df.groupby(by, **kwargs)
|
|
2000
2118
|
)
|
|
2001
2119
|
),
|
|
2002
2120
|
by=by,
|
|
@@ -2037,6 +2155,62 @@ class ImageCollection(_ImageBase):
|
|
|
2037
2155
|
|
|
2038
2156
|
return self
|
|
2039
2157
|
|
|
2158
|
+
def pixelwise(
|
|
2159
|
+
self,
|
|
2160
|
+
func: Callable,
|
|
2161
|
+
kwargs: dict | None = None,
|
|
2162
|
+
index_aligned_kwargs: dict | None = None,
|
|
2163
|
+
masked: bool = True,
|
|
2164
|
+
processes: int | None = None,
|
|
2165
|
+
) -> np.ndarray | tuple[np.ndarray] | None:
|
|
2166
|
+
"""Run a function for each pixel.
|
|
2167
|
+
|
|
2168
|
+
The function should take a 1d array as first argument. This will be
|
|
2169
|
+
the pixel values for all bands in all images in the collection.
|
|
2170
|
+
"""
|
|
2171
|
+
values = np.array([band.values for img in self for band in img])
|
|
2172
|
+
|
|
2173
|
+
if (
|
|
2174
|
+
masked
|
|
2175
|
+
and self.nodata is not None
|
|
2176
|
+
and hasattr(next(iter(next(iter(self)))).values, "mask")
|
|
2177
|
+
):
|
|
2178
|
+
mask_array = np.array(
|
|
2179
|
+
[
|
|
2180
|
+
(band.values.mask) | (band.values.data == self.nodata)
|
|
2181
|
+
for img in self
|
|
2182
|
+
for band in img
|
|
2183
|
+
]
|
|
2184
|
+
)
|
|
2185
|
+
elif masked and self.nodata is not None:
|
|
2186
|
+
mask_array = np.array(
|
|
2187
|
+
[band.values == self.nodata for img in self for band in img]
|
|
2188
|
+
)
|
|
2189
|
+
elif masked:
|
|
2190
|
+
mask_array = np.array([band.values.mask for img in self for band in img])
|
|
2191
|
+
else:
|
|
2192
|
+
mask_array = None
|
|
2193
|
+
|
|
2194
|
+
nonmissing_row_indices, nonmissing_col_indices, results = pixelwise(
|
|
2195
|
+
func=func,
|
|
2196
|
+
values=values,
|
|
2197
|
+
mask_array=mask_array,
|
|
2198
|
+
index_aligned_kwargs=index_aligned_kwargs,
|
|
2199
|
+
kwargs=kwargs,
|
|
2200
|
+
processes=processes or self.processes,
|
|
2201
|
+
)
|
|
2202
|
+
|
|
2203
|
+
return PixelwiseResults(
|
|
2204
|
+
nonmissing_row_indices,
|
|
2205
|
+
nonmissing_col_indices,
|
|
2206
|
+
results,
|
|
2207
|
+
shape=values.shape[1:],
|
|
2208
|
+
res=self.res,
|
|
2209
|
+
bounds=self.bounds,
|
|
2210
|
+
crs=self.crs,
|
|
2211
|
+
nodata=self.nodata or np.nan,
|
|
2212
|
+
)
|
|
2213
|
+
|
|
2040
2214
|
def get_unique_band_ids(self) -> list[str]:
|
|
2041
2215
|
"""Get a list of unique band_ids across all images."""
|
|
2042
2216
|
return list({band.band_id for img in self for band in img})
|
|
@@ -2142,8 +2316,7 @@ class ImageCollection(_ImageBase):
|
|
|
2142
2316
|
arr,
|
|
2143
2317
|
bounds=bounds,
|
|
2144
2318
|
crs=crs,
|
|
2145
|
-
|
|
2146
|
-
**self._common_init_kwargs,
|
|
2319
|
+
**{k: v for k, v in self._common_init_kwargs.items() if k != "res"},
|
|
2147
2320
|
)
|
|
2148
2321
|
|
|
2149
2322
|
band._merged = True
|
|
@@ -2216,7 +2389,7 @@ class ImageCollection(_ImageBase):
|
|
|
2216
2389
|
bounds=out_bounds,
|
|
2217
2390
|
crs=crs,
|
|
2218
2391
|
band_id=band_id,
|
|
2219
|
-
**self._common_init_kwargs,
|
|
2392
|
+
**{k: v for k, v in self._common_init_kwargs.items() if k != "res"},
|
|
2220
2393
|
)
|
|
2221
2394
|
)
|
|
2222
2395
|
|
|
@@ -2329,22 +2502,11 @@ class ImageCollection(_ImageBase):
|
|
|
2329
2502
|
):
|
|
2330
2503
|
return self
|
|
2331
2504
|
|
|
2332
|
-
# if self.processes == 1:
|
|
2333
|
-
# for img in self:
|
|
2334
|
-
# for band in img:
|
|
2335
|
-
# band.load(
|
|
2336
|
-
# bounds=bounds,
|
|
2337
|
-
# indexes=indexes,
|
|
2338
|
-
# file_system=file_system,
|
|
2339
|
-
# **kwargs,
|
|
2340
|
-
# )
|
|
2341
|
-
# return self
|
|
2342
|
-
|
|
2343
2505
|
with joblib.Parallel(n_jobs=self.processes, backend="threading") as parallel:
|
|
2344
2506
|
if self.masking:
|
|
2345
|
-
parallel(
|
|
2346
|
-
joblib.delayed(
|
|
2347
|
-
img
|
|
2507
|
+
masks: list[np.ndarray] = parallel(
|
|
2508
|
+
joblib.delayed(_read_mask_array)(
|
|
2509
|
+
img,
|
|
2348
2510
|
bounds=bounds,
|
|
2349
2511
|
indexes=indexes,
|
|
2350
2512
|
file_system=file_system,
|
|
@@ -2352,14 +2514,6 @@ class ImageCollection(_ImageBase):
|
|
|
2352
2514
|
)
|
|
2353
2515
|
for img in self
|
|
2354
2516
|
)
|
|
2355
|
-
for img in self:
|
|
2356
|
-
for band in img:
|
|
2357
|
-
band._mask = img.mask
|
|
2358
|
-
|
|
2359
|
-
# print({img.mask.has_array for img in self })
|
|
2360
|
-
# print({band.mask.has_array for img in self for band in img})
|
|
2361
|
-
|
|
2362
|
-
# with joblib.Parallel(n_jobs=self.processes, backend="threading") as parallel:
|
|
2363
2517
|
|
|
2364
2518
|
parallel(
|
|
2365
2519
|
joblib.delayed(_load_band)(
|
|
@@ -2367,34 +2521,86 @@ class ImageCollection(_ImageBase):
|
|
|
2367
2521
|
bounds=bounds,
|
|
2368
2522
|
indexes=indexes,
|
|
2369
2523
|
file_system=file_system,
|
|
2524
|
+
_masking=None,
|
|
2370
2525
|
**kwargs,
|
|
2371
2526
|
)
|
|
2372
2527
|
for img in self
|
|
2373
2528
|
for band in img
|
|
2374
2529
|
)
|
|
2375
2530
|
|
|
2531
|
+
if self.masking:
|
|
2532
|
+
for img, mask_array in zip(self, masks, strict=True):
|
|
2533
|
+
for band in img:
|
|
2534
|
+
if isinstance(band.values, np.ma.core.MaskedArray):
|
|
2535
|
+
band.values.mask |= mask_array
|
|
2536
|
+
else:
|
|
2537
|
+
band.values = np.ma.array(
|
|
2538
|
+
band.values, mask=mask_array, fill_value=self.nodata
|
|
2539
|
+
)
|
|
2540
|
+
|
|
2376
2541
|
return self
|
|
2377
2542
|
|
|
2378
2543
|
def clip(
|
|
2379
2544
|
self,
|
|
2380
2545
|
mask: Geometry | GeoDataFrame | GeoSeries,
|
|
2381
|
-
|
|
2546
|
+
dropna: bool = True,
|
|
2547
|
+
copy: bool = True,
|
|
2382
2548
|
) -> "ImageCollection":
|
|
2383
|
-
"""Clip all image Bands
|
|
2384
|
-
|
|
2385
|
-
for img in self:
|
|
2386
|
-
for band in img:
|
|
2387
|
-
band.clip(mask, **kwargs)
|
|
2388
|
-
return self
|
|
2549
|
+
"""Clip all image Bands while preserving bounds."""
|
|
2550
|
+
copied = self.copy() if copy else self
|
|
2389
2551
|
|
|
2390
|
-
|
|
2391
|
-
|
|
2392
|
-
|
|
2393
|
-
|
|
2552
|
+
copied._images = [img for img in copied if img.union_all()]
|
|
2553
|
+
|
|
2554
|
+
fill: int = self.nodata or 0
|
|
2555
|
+
|
|
2556
|
+
common_band_from_geopandas_kwargs = dict(
|
|
2557
|
+
gdf=to_gdf(mask)[["geometry"]],
|
|
2558
|
+
default_value=1,
|
|
2559
|
+
fill=fill,
|
|
2560
|
+
)
|
|
2561
|
+
|
|
2562
|
+
for img in copied:
|
|
2563
|
+
img._rounded_bounds = tuple(int(x) for x in img.bounds)
|
|
2564
|
+
|
|
2565
|
+
for bounds in {img._rounded_bounds for img in copied}:
|
|
2566
|
+
shapes = {
|
|
2567
|
+
band.values.shape
|
|
2568
|
+
for img in copied
|
|
2394
2569
|
for band in img
|
|
2395
|
-
|
|
2570
|
+
if img._rounded_bounds == bounds
|
|
2571
|
+
}
|
|
2572
|
+
if len(shapes) != 1:
|
|
2573
|
+
raise ValueError(f"Different shapes: {shapes}. For bounds {bounds}")
|
|
2396
2574
|
|
|
2397
|
-
|
|
2575
|
+
mask_array: np.ndarray = Band.from_geopandas(
|
|
2576
|
+
**common_band_from_geopandas_kwargs,
|
|
2577
|
+
out_shape=next(iter(shapes)),
|
|
2578
|
+
bounds=bounds,
|
|
2579
|
+
).values
|
|
2580
|
+
|
|
2581
|
+
is_not_polygon = mask_array == fill
|
|
2582
|
+
|
|
2583
|
+
for img in copied:
|
|
2584
|
+
if img._rounded_bounds != bounds:
|
|
2585
|
+
continue
|
|
2586
|
+
|
|
2587
|
+
for band in img:
|
|
2588
|
+
if isinstance(band.values, np.ma.core.MaskedArray):
|
|
2589
|
+
band._values.mask |= is_not_polygon
|
|
2590
|
+
else:
|
|
2591
|
+
band._values = np.ma.array(
|
|
2592
|
+
band.values, mask=is_not_polygon, fill_value=band.nodata
|
|
2593
|
+
)
|
|
2594
|
+
|
|
2595
|
+
for img in copied:
|
|
2596
|
+
del img._rounded_bounds
|
|
2597
|
+
|
|
2598
|
+
if dropna:
|
|
2599
|
+
copied.images = [
|
|
2600
|
+
img for img in copied if any(np.sum(band.values) for band in img)
|
|
2601
|
+
]
|
|
2602
|
+
|
|
2603
|
+
return copied
|
|
2398
2604
|
|
|
2399
2605
|
def _set_bbox(
|
|
2400
2606
|
self, bbox: GeoDataFrame | GeoSeries | Geometry | tuple[float]
|
|
@@ -2405,17 +2611,12 @@ class ImageCollection(_ImageBase):
|
|
|
2405
2611
|
if self._images is not None:
|
|
2406
2612
|
for img in self._images:
|
|
2407
2613
|
img._bbox = self._bbox
|
|
2408
|
-
if img.mask is not None:
|
|
2409
|
-
img.mask._bbox = self._bbox
|
|
2410
2614
|
if img.bands is None:
|
|
2411
2615
|
continue
|
|
2412
2616
|
for band in img:
|
|
2413
2617
|
band._bbox = self._bbox
|
|
2414
2618
|
bounds = box(*band._bbox).intersection(box(*band.bounds))
|
|
2415
2619
|
band._bounds = to_bbox(bounds) if not bounds.is_empty else None
|
|
2416
|
-
if band.mask is not None:
|
|
2417
|
-
band.mask._bbox = self._bbox
|
|
2418
|
-
band.mask._bounds = band._bounds
|
|
2419
2620
|
|
|
2420
2621
|
return self
|
|
2421
2622
|
|
|
@@ -2521,7 +2722,7 @@ class ImageCollection(_ImageBase):
|
|
|
2521
2722
|
**kwargs,
|
|
2522
2723
|
)
|
|
2523
2724
|
|
|
2524
|
-
return
|
|
2725
|
+
return combine_by_coords(list(xarrs.values()))
|
|
2525
2726
|
# return Dataset(xarrs)
|
|
2526
2727
|
|
|
2527
2728
|
def to_geopandas(self, column: str = "value") -> dict[str, GeoDataFrame]:
|
|
@@ -2534,6 +2735,9 @@ class ImageCollection(_ImageBase):
|
|
|
2534
2735
|
try:
|
|
2535
2736
|
name = band.name
|
|
2536
2737
|
except AttributeError:
|
|
2738
|
+
name = None
|
|
2739
|
+
|
|
2740
|
+
if name is None:
|
|
2537
2741
|
name = f"{self.__class__.__name__}({i})"
|
|
2538
2742
|
|
|
2539
2743
|
if name not in out:
|
|
@@ -2594,10 +2798,6 @@ class ImageCollection(_ImageBase):
|
|
|
2594
2798
|
|
|
2595
2799
|
return copied
|
|
2596
2800
|
|
|
2597
|
-
def __or__(self, collection: "ImageCollection") -> "ImageCollection":
|
|
2598
|
-
"""Concatenate the collection with another collection."""
|
|
2599
|
-
return concat_image_collections([self, collection])
|
|
2600
|
-
|
|
2601
2801
|
def __iter__(self) -> Iterator[Image]:
|
|
2602
2802
|
"""Iterate over the images."""
|
|
2603
2803
|
return iter(self.images)
|
|
@@ -2607,14 +2807,16 @@ class ImageCollection(_ImageBase):
|
|
|
2607
2807
|
return len(self.images)
|
|
2608
2808
|
|
|
2609
2809
|
def __getattr__(self, attr: str) -> Any:
|
|
2610
|
-
"""Make iterable of
|
|
2810
|
+
"""Make iterable of metadata attribute."""
|
|
2611
2811
|
if attr in (self.metadata_attributes or {}):
|
|
2612
2812
|
return self._metadata_attribute_collection_type(
|
|
2613
2813
|
[getattr(img, attr) for img in self]
|
|
2614
2814
|
)
|
|
2615
2815
|
return super().__getattribute__(attr)
|
|
2616
2816
|
|
|
2617
|
-
def __getitem__(
|
|
2817
|
+
def __getitem__(
|
|
2818
|
+
self, item: int | slice | Sequence[int | bool]
|
|
2819
|
+
) -> "Image | ImageCollection":
|
|
2618
2820
|
"""Select one Image by integer index, or multiple Images by slice, list of int."""
|
|
2619
2821
|
if isinstance(item, int):
|
|
2620
2822
|
return self.images[item]
|
|
@@ -2653,14 +2855,14 @@ class ImageCollection(_ImageBase):
|
|
|
2653
2855
|
return copied
|
|
2654
2856
|
|
|
2655
2857
|
@property
|
|
2656
|
-
def
|
|
2858
|
+
def date(self) -> Any:
|
|
2657
2859
|
"""List of image dates."""
|
|
2658
|
-
return [img.date for img in self]
|
|
2860
|
+
return self._metadata_attribute_collection_type([img.date for img in self])
|
|
2659
2861
|
|
|
2660
2862
|
@property
|
|
2661
|
-
def image_paths(self) ->
|
|
2863
|
+
def image_paths(self) -> Any:
|
|
2662
2864
|
"""List of image paths."""
|
|
2663
|
-
return [img.path for img in self]
|
|
2865
|
+
return self._metadata_attribute_collection_type([img.path for img in self])
|
|
2664
2866
|
|
|
2665
2867
|
@property
|
|
2666
2868
|
def images(self) -> list["Image"]:
|
|
@@ -2678,21 +2880,6 @@ class ImageCollection(_ImageBase):
|
|
|
2678
2880
|
**self._common_init_kwargs,
|
|
2679
2881
|
)
|
|
2680
2882
|
|
|
2681
|
-
if self.masking is not None:
|
|
2682
|
-
images = []
|
|
2683
|
-
for image in self._images:
|
|
2684
|
-
# TODO why this loop?
|
|
2685
|
-
try:
|
|
2686
|
-
if not isinstance(image.mask, Band):
|
|
2687
|
-
raise ValueError()
|
|
2688
|
-
images.append(image)
|
|
2689
|
-
except ValueError as e:
|
|
2690
|
-
raise e
|
|
2691
|
-
continue
|
|
2692
|
-
self._images = images
|
|
2693
|
-
for image in self._images:
|
|
2694
|
-
image._bands = [band for band in image if band.band_id is not None]
|
|
2695
|
-
|
|
2696
2883
|
self._images = [img for img in self if len(img)]
|
|
2697
2884
|
|
|
2698
2885
|
if self._should_be_sorted:
|
|
@@ -2722,24 +2909,22 @@ class ImageCollection(_ImageBase):
|
|
|
2722
2909
|
|
|
2723
2910
|
@images.setter
|
|
2724
2911
|
def images(self, new_value: list["Image"]) -> list["Image"]:
|
|
2725
|
-
|
|
2726
|
-
if not
|
|
2912
|
+
new_value = list(new_value)
|
|
2913
|
+
if not new_value:
|
|
2914
|
+
self._images = new_value
|
|
2915
|
+
return
|
|
2916
|
+
if all(isinstance(x, Band) for x in new_value):
|
|
2917
|
+
if len(new_value) != len(self):
|
|
2918
|
+
raise ValueError("'images' must have same length as number of images.")
|
|
2919
|
+
new_images = []
|
|
2920
|
+
for i, img in enumerate(self):
|
|
2921
|
+
img._bands = [new_value[i]]
|
|
2922
|
+
new_images.append(img)
|
|
2923
|
+
self._images = new_images
|
|
2924
|
+
return
|
|
2925
|
+
if not all(isinstance(x, Image) for x in new_value):
|
|
2727
2926
|
raise TypeError("images should be a sequence of Image.")
|
|
2728
|
-
|
|
2729
|
-
def __repr__(self) -> str:
|
|
2730
|
-
"""String representation."""
|
|
2731
|
-
root = ""
|
|
2732
|
-
if self.path is not None:
|
|
2733
|
-
data = f"'{self.path}'"
|
|
2734
|
-
elif all(img.path is not None for img in self):
|
|
2735
|
-
data = [img.path for img in self]
|
|
2736
|
-
parents = {str(Path(path).parent) for path in data}
|
|
2737
|
-
if len(parents) == 1:
|
|
2738
|
-
data = [Path(path).name for path in data]
|
|
2739
|
-
root = f" root='{next(iter(parents))}',"
|
|
2740
|
-
else:
|
|
2741
|
-
data = [img for img in self]
|
|
2742
|
-
return f"{self.__class__.__name__}({data},{root} res={self.res}, level='{self.level}')"
|
|
2927
|
+
self._images = new_value
|
|
2743
2928
|
|
|
2744
2929
|
def union_all(self) -> Polygon | MultiPolygon:
|
|
2745
2930
|
"""(Multi)Polygon representing the union of all image bounds."""
|
|
@@ -2796,7 +2981,6 @@ class ImageCollection(_ImageBase):
|
|
|
2796
2981
|
if "date" in x_var and subcollection._should_be_sorted:
|
|
2797
2982
|
subcollection._images = list(sorted(subcollection._images))
|
|
2798
2983
|
|
|
2799
|
-
y = np.array([band.values for img in subcollection for band in img])
|
|
2800
2984
|
if "date" in x_var and subcollection._should_be_sorted:
|
|
2801
2985
|
x = np.array(
|
|
2802
2986
|
[
|
|
@@ -2813,120 +2997,35 @@ class ImageCollection(_ImageBase):
|
|
|
2813
2997
|
- pd.Timestamp(np.min(x))
|
|
2814
2998
|
).days
|
|
2815
2999
|
else:
|
|
2816
|
-
x = np.arange(0,
|
|
2817
|
-
|
|
2818
|
-
|
|
2819
|
-
|
|
2820
|
-
|
|
2821
|
-
|
|
2822
|
-
|
|
2823
|
-
|
|
2824
|
-
|
|
2825
|
-
|
|
2826
|
-
|
|
2827
|
-
|
|
3000
|
+
x = np.arange(0, sum(1 for img in subcollection for band in img))
|
|
3001
|
+
|
|
3002
|
+
subcollection.pixelwise(
|
|
3003
|
+
_plot_pixels_1d,
|
|
3004
|
+
kwargs=dict(
|
|
3005
|
+
alpha=alpha,
|
|
3006
|
+
x_var=x_var,
|
|
3007
|
+
y_label=y_label,
|
|
3008
|
+
rounding=rounding,
|
|
3009
|
+
first_date=first_date,
|
|
3010
|
+
figsize=figsize,
|
|
3011
|
+
),
|
|
3012
|
+
index_aligned_kwargs=dict(x=x),
|
|
2828
3013
|
)
|
|
2829
3014
|
|
|
2830
|
-
|
|
2831
|
-
|
|
2832
|
-
|
|
2833
|
-
|
|
2834
|
-
|
|
2835
|
-
|
|
2836
|
-
|
|
2837
|
-
|
|
2838
|
-
|
|
2839
|
-
|
|
2840
|
-
|
|
2841
|
-
|
|
2842
|
-
|
|
2843
|
-
|
|
2844
|
-
this_x = this_x[condition]
|
|
2845
|
-
|
|
2846
|
-
coef, intercept = np.linalg.lstsq(
|
|
2847
|
-
np.vstack([this_x, np.ones(this_x.shape[0])]).T,
|
|
2848
|
-
this_y,
|
|
2849
|
-
rcond=None,
|
|
2850
|
-
)[0]
|
|
2851
|
-
predicted = np.array([intercept + coef * x for x in this_x])
|
|
2852
|
-
|
|
2853
|
-
predicted_start = predicted[0]
|
|
2854
|
-
predicted_end = predicted[-1]
|
|
2855
|
-
predicted_change = predicted_end - predicted_start
|
|
2856
|
-
|
|
2857
|
-
# Degrees of freedom
|
|
2858
|
-
dof = len(this_x) - 2
|
|
2859
|
-
|
|
2860
|
-
# 95% confidence interval
|
|
2861
|
-
t_val = stats.t.ppf(1 - alpha / 2, dof)
|
|
2862
|
-
|
|
2863
|
-
# Mean squared error of the residuals
|
|
2864
|
-
mse = np.sum((this_y - predicted) ** 2) / dof
|
|
2865
|
-
|
|
2866
|
-
# Calculate the standard error of predictions
|
|
2867
|
-
pred_stderr = np.sqrt(
|
|
2868
|
-
mse
|
|
2869
|
-
* (
|
|
2870
|
-
1 / len(this_x)
|
|
2871
|
-
+ (this_x - np.mean(this_x)) ** 2
|
|
2872
|
-
/ np.sum((this_x - np.mean(this_x)) ** 2)
|
|
2873
|
-
)
|
|
2874
|
-
)
|
|
2875
|
-
|
|
2876
|
-
# Calculate the confidence interval for predictions
|
|
2877
|
-
ci_lower = predicted - t_val * pred_stderr
|
|
2878
|
-
ci_upper = predicted + t_val * pred_stderr
|
|
2879
|
-
|
|
2880
|
-
fig = plt.figure(figsize=figsize)
|
|
2881
|
-
ax = fig.add_subplot(1, 1, 1)
|
|
2882
|
-
|
|
2883
|
-
ax.scatter(this_x, this_y, color="#2c93db")
|
|
2884
|
-
ax.plot(this_x, predicted, color="#e0436b")
|
|
2885
|
-
ax.fill_between(
|
|
2886
|
-
this_x,
|
|
2887
|
-
ci_lower,
|
|
2888
|
-
ci_upper,
|
|
2889
|
-
color="#e0436b",
|
|
2890
|
-
alpha=0.2,
|
|
2891
|
-
label=f"{int(alpha*100)}% CI",
|
|
2892
|
-
)
|
|
2893
|
-
plt.title(
|
|
2894
|
-
f"coef: {round(coef, int(np.log(1 / abs(coef))))}, "
|
|
2895
|
-
f"pred change: {round(predicted_change, rounding)}, "
|
|
2896
|
-
f"pred start: {round(predicted_start, rounding)}, "
|
|
2897
|
-
f"pred end: {round(predicted_end, rounding)}"
|
|
2898
|
-
)
|
|
2899
|
-
plt.xlabel(x_var)
|
|
2900
|
-
plt.ylabel(y_label)
|
|
2901
|
-
|
|
2902
|
-
if x_var == "date":
|
|
2903
|
-
date_labels = pd.to_datetime(
|
|
2904
|
-
[first_date + pd.Timedelta(days=int(day)) for day in this_x]
|
|
2905
|
-
)
|
|
2906
|
-
|
|
2907
|
-
_, unique_indices = np.unique(
|
|
2908
|
-
date_labels.strftime("%Y-%m"), return_index=True
|
|
2909
|
-
)
|
|
2910
|
-
|
|
2911
|
-
unique_x = np.array(this_x)[unique_indices]
|
|
2912
|
-
unique_labels = date_labels[unique_indices].strftime("%Y-%m")
|
|
2913
|
-
|
|
2914
|
-
ax.set_xticks(unique_x)
|
|
2915
|
-
ax.set_xticklabels(unique_labels, rotation=45, ha="right")
|
|
2916
|
-
# ax.tick_params(axis="x", length=10, width=2)
|
|
2917
|
-
|
|
2918
|
-
plt.show()
|
|
2919
|
-
|
|
2920
|
-
|
|
2921
|
-
def _get_all_regex_matches(xml_file: str, regexes: tuple[str]) -> tuple[str]:
|
|
2922
|
-
for regex in regexes:
|
|
2923
|
-
try:
|
|
2924
|
-
return re.search(regex, xml_file)
|
|
2925
|
-
except (TypeError, AttributeError):
|
|
2926
|
-
continue
|
|
2927
|
-
raise ValueError(
|
|
2928
|
-
f"Could not find processing_baseline info from {regexes} in {xml_file}"
|
|
2929
|
-
)
|
|
3015
|
+
def __repr__(self) -> str:
|
|
3016
|
+
"""String representation."""
|
|
3017
|
+
root = ""
|
|
3018
|
+
if self.path is not None:
|
|
3019
|
+
data = f"'{self.path}'"
|
|
3020
|
+
elif all(img.path is not None for img in self):
|
|
3021
|
+
data = [img.path for img in self]
|
|
3022
|
+
parents = {str(Path(path).parent) for path in data}
|
|
3023
|
+
if len(parents) == 1:
|
|
3024
|
+
data = [Path(path).name for path in data]
|
|
3025
|
+
root = f" root='{next(iter(parents))}',"
|
|
3026
|
+
else:
|
|
3027
|
+
data = [img for img in self]
|
|
3028
|
+
return f"{self.__class__.__name__}({data},{root} res={self.res}, level='{self.level}')"
|
|
2930
3029
|
|
|
2931
3030
|
|
|
2932
3031
|
class Sentinel2Config:
|
|
@@ -2984,14 +3083,14 @@ class Sentinel2Config:
|
|
|
2984
3083
|
xml_file,
|
|
2985
3084
|
)
|
|
2986
3085
|
if match_ is None:
|
|
2987
|
-
|
|
3086
|
+
return None
|
|
2988
3087
|
|
|
2989
3088
|
if "NOT_REFINED" in match_.group(0):
|
|
2990
3089
|
return False
|
|
2991
3090
|
elif "REFINED" in match_.group(0):
|
|
2992
3091
|
return True
|
|
2993
3092
|
else:
|
|
2994
|
-
raise _RegexError()
|
|
3093
|
+
raise _RegexError(xml_file)
|
|
2995
3094
|
|
|
2996
3095
|
def _get_boa_quantification_value(self, xml_file: str) -> int:
|
|
2997
3096
|
return int(
|
|
@@ -3040,9 +3139,6 @@ class Sentinel2Band(Sentinel2Config, Band):
|
|
|
3040
3139
|
}
|
|
3041
3140
|
|
|
3042
3141
|
def _get_boa_add_offset_dict(self, xml_file: str) -> int | None:
|
|
3043
|
-
if self.is_mask:
|
|
3044
|
-
return None
|
|
3045
|
-
|
|
3046
3142
|
pat = re.compile(
|
|
3047
3143
|
r"""
|
|
3048
3144
|
<BOA_ADD_OFFSET\s*
|
|
@@ -3058,7 +3154,7 @@ class Sentinel2Band(Sentinel2Config, Band):
|
|
|
3058
3154
|
except (TypeError, AttributeError, KeyError) as e:
|
|
3059
3155
|
raise _RegexError(f"Could not find boa_add_offset info from {pat}") from e
|
|
3060
3156
|
if not matches:
|
|
3061
|
-
|
|
3157
|
+
return None
|
|
3062
3158
|
|
|
3063
3159
|
dict_ = (
|
|
3064
3160
|
pd.DataFrame(matches).set_index("band_id")["value"].astype(int).to_dict()
|
|
@@ -3121,7 +3217,7 @@ class Sentinel2Collection(Sentinel2Config, ImageCollection):
|
|
|
3121
3217
|
def __init__(self, data: str | Path | Sequence[Image], **kwargs) -> None:
|
|
3122
3218
|
"""ImageCollection with Sentinel2 specific name variables and path regexes."""
|
|
3123
3219
|
level = kwargs.get("level", None_)
|
|
3124
|
-
if callable(level) and
|
|
3220
|
+
if callable(level) and level() is None:
|
|
3125
3221
|
raise ValueError("Must specify level for Sentinel2Collection.")
|
|
3126
3222
|
super().__init__(data=data, **kwargs)
|
|
3127
3223
|
|
|
@@ -3146,10 +3242,7 @@ class Sentinel2CloudlessCollection(Sentinel2CloudlessConfig, ImageCollection):
|
|
|
3146
3242
|
|
|
3147
3243
|
|
|
3148
3244
|
def concat_image_collections(collections: Sequence[ImageCollection]) -> ImageCollection:
|
|
3149
|
-
"""
|
|
3150
|
-
|
|
3151
|
-
Same as using the union operator |.
|
|
3152
|
-
"""
|
|
3245
|
+
"""Concatenate ImageCollections."""
|
|
3153
3246
|
resolutions = {x.res for x in collections}
|
|
3154
3247
|
if len(resolutions) > 1:
|
|
3155
3248
|
raise ValueError(f"resoultion mismatch. {resolutions}")
|
|
@@ -3185,8 +3278,10 @@ def _get_gradient(band: Band, degrees: bool = False, copy: bool = True) -> Band:
|
|
|
3185
3278
|
raise ValueError("array must be 2 or 3 dimensional")
|
|
3186
3279
|
|
|
3187
3280
|
|
|
3188
|
-
def _slope_2d(array: np.ndarray, res: int, degrees: int) -> np.ndarray:
|
|
3189
|
-
|
|
3281
|
+
def _slope_2d(array: np.ndarray, res: int | tuple[int], degrees: int) -> np.ndarray:
|
|
3282
|
+
resx, resy = _res_as_tuple(res)
|
|
3283
|
+
|
|
3284
|
+
gradient_x, gradient_y = np.gradient(array, resx, resy)
|
|
3190
3285
|
|
|
3191
3286
|
gradient = abs(gradient_x) + abs(gradient_y)
|
|
3192
3287
|
|
|
@@ -3273,7 +3368,7 @@ def _get_images(
|
|
|
3273
3368
|
return images
|
|
3274
3369
|
|
|
3275
3370
|
|
|
3276
|
-
class
|
|
3371
|
+
class _ArrayNotLoadedError(ValueError):
|
|
3277
3372
|
"""Arrays are not loaded."""
|
|
3278
3373
|
|
|
3279
3374
|
|
|
@@ -3351,18 +3446,22 @@ def _intesects(x, other) -> bool:
|
|
|
3351
3446
|
|
|
3352
3447
|
|
|
3353
3448
|
def _copy_and_add_df_parallel(
|
|
3354
|
-
|
|
3449
|
+
group_values: tuple[Any, ...],
|
|
3450
|
+
group_df: pd.DataFrame,
|
|
3451
|
+
self: ImageCollection,
|
|
3452
|
+
copy: bool,
|
|
3355
3453
|
) -> tuple[tuple[Any], ImageCollection]:
|
|
3356
|
-
copied = self.copy()
|
|
3454
|
+
copied = self.copy() if copy else self
|
|
3357
3455
|
copied.images = [
|
|
3358
|
-
img.copy()
|
|
3456
|
+
img.copy() if copy else img
|
|
3457
|
+
for img in group_df.drop_duplicates("_image_idx")["_image_instance"]
|
|
3359
3458
|
]
|
|
3360
|
-
if "band_id" in
|
|
3361
|
-
band_ids = set(
|
|
3459
|
+
if "band_id" in group_df:
|
|
3460
|
+
band_ids = set(group_df["band_id"].values)
|
|
3362
3461
|
for img in copied.images:
|
|
3363
3462
|
img._bands = [band for band in img if band.band_id in band_ids]
|
|
3364
3463
|
|
|
3365
|
-
return (
|
|
3464
|
+
return (group_values, copied)
|
|
3366
3465
|
|
|
3367
3466
|
|
|
3368
3467
|
def _get_bounds(bounds, bbox, band_bounds: Polygon) -> None | Polygon:
|
|
@@ -3388,15 +3487,37 @@ def _open_raster(path: str | Path) -> rasterio.io.DatasetReader:
|
|
|
3388
3487
|
return rasterio.open(file)
|
|
3389
3488
|
|
|
3390
3489
|
|
|
3391
|
-
def
|
|
3490
|
+
def _read_mask_array(self: Band | Image, **kwargs) -> np.ndarray:
|
|
3491
|
+
mask_band_id = self.masking["band_id"]
|
|
3492
|
+
mask_paths = [path for path in self._all_file_paths if mask_band_id in path]
|
|
3493
|
+
if len(mask_paths) > 1:
|
|
3494
|
+
raise ValueError(
|
|
3495
|
+
f"Multiple file_paths match mask band_id {mask_band_id} for {self.path}"
|
|
3496
|
+
)
|
|
3497
|
+
elif not mask_paths:
|
|
3498
|
+
raise ValueError(
|
|
3499
|
+
f"No file_paths match mask band_id {mask_band_id} for {self.path} among "
|
|
3500
|
+
+ str([Path(x).name for x in _ls_func(self.path)])
|
|
3501
|
+
)
|
|
3502
|
+
|
|
3503
|
+
band = Band(
|
|
3504
|
+
next(iter(mask_paths)),
|
|
3505
|
+
**{**self._common_init_kwargs, "metadata": None},
|
|
3506
|
+
)
|
|
3507
|
+
band.load(**kwargs)
|
|
3508
|
+
boolean_mask = np.isin(band.values, list(self.masking["values"]))
|
|
3509
|
+
return boolean_mask
|
|
3510
|
+
|
|
3511
|
+
|
|
3512
|
+
def _load_band(band: Band, **kwargs) -> Band:
|
|
3392
3513
|
return band.load(**kwargs)
|
|
3393
3514
|
|
|
3394
3515
|
|
|
3395
|
-
def _band_apply(band: Band, func: Callable, **kwargs) ->
|
|
3516
|
+
def _band_apply(band: Band, func: Callable, **kwargs) -> Band:
|
|
3396
3517
|
return band.apply(func, **kwargs)
|
|
3397
3518
|
|
|
3398
3519
|
|
|
3399
|
-
def _clip_band(band: Band, mask, **kwargs) ->
|
|
3520
|
+
def _clip_band(band: Band, mask, **kwargs) -> Band:
|
|
3400
3521
|
return band.clip(mask, **kwargs)
|
|
3401
3522
|
|
|
3402
3523
|
|
|
@@ -3441,126 +3562,120 @@ def array_buffer(arr: np.ndarray, distance: int) -> np.ndarray:
|
|
|
3441
3562
|
return binary_erosion(arr, structure=structure).astype(dtype)
|
|
3442
3563
|
|
|
3443
3564
|
|
|
3444
|
-
def
|
|
3565
|
+
def _plot_pixels_1d(
|
|
3566
|
+
y: np.ndarray,
|
|
3567
|
+
x: np.ndarray,
|
|
3568
|
+
alpha: float,
|
|
3569
|
+
x_var: str,
|
|
3570
|
+
y_label: str,
|
|
3571
|
+
rounding: int,
|
|
3572
|
+
figsize: tuple,
|
|
3573
|
+
first_date: pd.Timestamp,
|
|
3574
|
+
) -> None:
|
|
3575
|
+
coef, intercept = np.linalg.lstsq(
|
|
3576
|
+
np.vstack([x, np.ones(x.shape[0])]).T,
|
|
3577
|
+
y,
|
|
3578
|
+
rcond=None,
|
|
3579
|
+
)[0]
|
|
3580
|
+
predicted = np.array([intercept + coef * x for x in x])
|
|
3581
|
+
|
|
3582
|
+
predicted_start = predicted[0]
|
|
3583
|
+
predicted_end = predicted[-1]
|
|
3584
|
+
predicted_change = predicted_end - predicted_start
|
|
3585
|
+
|
|
3586
|
+
# Degrees of freedom
|
|
3587
|
+
dof = len(x) - 2
|
|
3588
|
+
|
|
3589
|
+
# 95% confidence interval
|
|
3590
|
+
t_val = stats.t.ppf(1 - alpha / 2, dof)
|
|
3591
|
+
|
|
3592
|
+
# Mean squared error of the residuals
|
|
3593
|
+
mse = np.sum((y - predicted) ** 2) / dof
|
|
3594
|
+
|
|
3595
|
+
# Calculate the standard error of predictions
|
|
3596
|
+
pred_stderr = np.sqrt(
|
|
3597
|
+
mse * (1 / len(x) + (x - np.mean(x)) ** 2 / np.sum((x - np.mean(x)) ** 2))
|
|
3598
|
+
)
|
|
3445
3599
|
|
|
3446
|
-
#
|
|
3447
|
-
|
|
3448
|
-
|
|
3449
|
-
|
|
3450
|
-
|
|
3451
|
-
|
|
3452
|
-
|
|
3453
|
-
|
|
3454
|
-
|
|
3455
|
-
|
|
3456
|
-
|
|
3457
|
-
|
|
3458
|
-
|
|
3459
|
-
|
|
3460
|
-
|
|
3461
|
-
|
|
3462
|
-
|
|
3463
|
-
|
|
3464
|
-
|
|
3465
|
-
|
|
3466
|
-
|
|
3467
|
-
|
|
3468
|
-
|
|
3469
|
-
|
|
3470
|
-
|
|
3471
|
-
[0.25, 0.0, 0.05],
|
|
3472
|
-
[0.3, 0.1, 0.1],
|
|
3473
|
-
[0.35, 0.2, 0.15],
|
|
3474
|
-
[0.4, 0.3, 0.2],
|
|
3475
|
-
[0.45, 0.4, 0.25],
|
|
3476
|
-
[0.5, 0.5, 0.3],
|
|
3477
|
-
[0.55, 0.6, 0.35],
|
|
3478
|
-
[0.7, 0.9, 0.5],
|
|
3479
|
-
]
|
|
3480
|
-
green = [
|
|
3481
|
-
[0.6, 0.6, 0.6],
|
|
3482
|
-
[0.4, 0.7, 0.4],
|
|
3483
|
-
[0.3, 0.8, 0.3],
|
|
3484
|
-
[0.25, 0.4, 0.25],
|
|
3485
|
-
[0.2, 0.5, 0.2],
|
|
3486
|
-
[0.10, 0.7, 0.10],
|
|
3487
|
-
[0, 0.9, 0],
|
|
3488
|
-
]
|
|
3600
|
+
# Calculate the confidence interval for predictions
|
|
3601
|
+
ci_lower = predicted - t_val * pred_stderr
|
|
3602
|
+
ci_upper = predicted + t_val * pred_stderr
|
|
3603
|
+
|
|
3604
|
+
fig = plt.figure(figsize=figsize)
|
|
3605
|
+
ax = fig.add_subplot(1, 1, 1)
|
|
3606
|
+
|
|
3607
|
+
ax.scatter(x, y, color="#2c93db")
|
|
3608
|
+
ax.plot(x, predicted, color="#e0436b")
|
|
3609
|
+
ax.fill_between(
|
|
3610
|
+
x,
|
|
3611
|
+
ci_lower,
|
|
3612
|
+
ci_upper,
|
|
3613
|
+
color="#e0436b",
|
|
3614
|
+
alpha=0.2,
|
|
3615
|
+
label=f"{int(alpha*100)}% CI",
|
|
3616
|
+
)
|
|
3617
|
+
plt.title(
|
|
3618
|
+
f"coef: {round(coef, int(np.log(1 / abs(coef))))}, "
|
|
3619
|
+
f"pred change: {round(predicted_change, rounding)}, "
|
|
3620
|
+
f"pred start: {round(predicted_start, rounding)}, "
|
|
3621
|
+
f"pred end: {round(predicted_end, rounding)}"
|
|
3622
|
+
)
|
|
3623
|
+
plt.xlabel(x_var)
|
|
3624
|
+
plt.ylabel(y_label)
|
|
3489
3625
|
|
|
3490
|
-
|
|
3491
|
-
|
|
3492
|
-
|
|
3493
|
-
|
|
3494
|
-
|
|
3495
|
-
|
|
3496
|
-
|
|
3497
|
-
|
|
3498
|
-
|
|
3499
|
-
|
|
3500
|
-
|
|
3501
|
-
|
|
3502
|
-
|
|
3503
|
-
|
|
3504
|
-
|
|
3505
|
-
|
|
3506
|
-
|
|
3507
|
-
|
|
3508
|
-
|
|
3509
|
-
|
|
3510
|
-
|
|
3511
|
-
|
|
3512
|
-
|
|
3513
|
-
|
|
3514
|
-
|
|
3515
|
-
|
|
3516
|
-
|
|
3517
|
-
|
|
3518
|
-
|
|
3519
|
-
|
|
3520
|
-
|
|
3521
|
-
|
|
3522
|
-
|
|
3523
|
-
|
|
3524
|
-
|
|
3525
|
-
|
|
3526
|
-
|
|
3527
|
-
|
|
3528
|
-
|
|
3529
|
-
|
|
3530
|
-
|
|
3531
|
-
|
|
3532
|
-
|
|
3533
|
-
|
|
3534
|
-
|
|
3535
|
-
|
|
3536
|
-
|
|
3537
|
-
|
|
3538
|
-
|
|
3539
|
-
|
|
3540
|
-
|
|
3541
|
-
|
|
3542
|
-
|
|
3543
|
-
|
|
3544
|
-
# Define the segments of the colormap
|
|
3545
|
-
cdict = {
|
|
3546
|
-
"red": [
|
|
3547
|
-
(0.0, blue[0], blue[0]),
|
|
3548
|
-
(0.3, gray[0], gray[0]),
|
|
3549
|
-
(0.7, gray[0], gray[0]),
|
|
3550
|
-
(1.0, green[0], green[0]),
|
|
3551
|
-
],
|
|
3552
|
-
"green": [
|
|
3553
|
-
(0.0, blue[1], blue[1]),
|
|
3554
|
-
(0.3, gray[1], gray[1]),
|
|
3555
|
-
(0.7, gray[1], gray[1]),
|
|
3556
|
-
(1.0, green[1], green[1]),
|
|
3557
|
-
],
|
|
3558
|
-
"blue": [
|
|
3559
|
-
(0.0, blue[2], blue[2]),
|
|
3560
|
-
(0.3, gray[2], gray[2]),
|
|
3561
|
-
(0.7, gray[2], gray[2]),
|
|
3562
|
-
(1.0, green[2], green[2]),
|
|
3563
|
-
],
|
|
3564
|
-
}
|
|
3626
|
+
if x_var == "date":
|
|
3627
|
+
date_labels = pd.to_datetime(
|
|
3628
|
+
[first_date + pd.Timedelta(days=int(day)) for day in x]
|
|
3629
|
+
)
|
|
3630
|
+
|
|
3631
|
+
_, unique_indices = np.unique(date_labels.strftime("%Y-%m"), return_index=True)
|
|
3632
|
+
|
|
3633
|
+
unique_x = np.array(x)[unique_indices]
|
|
3634
|
+
unique_labels = date_labels[unique_indices].strftime("%Y-%m")
|
|
3635
|
+
|
|
3636
|
+
ax.set_xticks(unique_x)
|
|
3637
|
+
ax.set_xticklabels(unique_labels, rotation=45, ha="right")
|
|
3638
|
+
|
|
3639
|
+
plt.show()
|
|
3640
|
+
|
|
3641
|
+
|
|
3642
|
+
def pixelwise(
|
|
3643
|
+
func: Callable,
|
|
3644
|
+
values: np.ndarray,
|
|
3645
|
+
mask_array: np.ndarray | None = None,
|
|
3646
|
+
index_aligned_kwargs: dict | None = None,
|
|
3647
|
+
kwargs: dict | None = None,
|
|
3648
|
+
processes: int = 1,
|
|
3649
|
+
) -> tuple[np.ndarray, np.ndarray, list[Any]]:
|
|
3650
|
+
"""Run a function for each pixel of a 3d array."""
|
|
3651
|
+
index_aligned_kwargs = index_aligned_kwargs or {}
|
|
3652
|
+
kwargs = kwargs or {}
|
|
3653
|
+
|
|
3654
|
+
if mask_array is not None:
|
|
3655
|
+
# skip pixels where all values are masked
|
|
3656
|
+
not_all_missing = np.all(mask_array, axis=0) == False
|
|
3657
|
+
else:
|
|
3658
|
+
mask_array = np.full(values.shape, False)
|
|
3659
|
+
not_all_missing = np.full(values.shape[1:], True)
|
|
3660
|
+
|
|
3661
|
+
def select_pixel_values(row: int, col: int) -> np.ndarray:
|
|
3662
|
+
return values[~mask_array[:, row, col], row, col]
|
|
3663
|
+
|
|
3664
|
+
# loop through long 1d arrays of aligned row and col indices
|
|
3665
|
+
nonmissing_row_indices, nonmissing_col_indices = not_all_missing.nonzero()
|
|
3666
|
+
with joblib.Parallel(n_jobs=processes, backend="loky") as parallel:
|
|
3667
|
+
results: list[Any] = parallel(
|
|
3668
|
+
joblib.delayed(func)(
|
|
3669
|
+
select_pixel_values(row, col),
|
|
3670
|
+
**kwargs,
|
|
3671
|
+
**{
|
|
3672
|
+
key: value[~mask_array[:, row, col]]
|
|
3673
|
+
for key, value in index_aligned_kwargs.items()
|
|
3674
|
+
},
|
|
3675
|
+
)
|
|
3676
|
+
for row, col in (
|
|
3677
|
+
zip(nonmissing_row_indices, nonmissing_col_indices, strict=True)
|
|
3678
|
+
)
|
|
3679
|
+
)
|
|
3565
3680
|
|
|
3566
|
-
return
|
|
3681
|
+
return nonmissing_row_indices, nonmissing_col_indices, results
|