ssb-sgis 1.1.17__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sgis/__init__.py +5 -0
- sgis/conf.py +18 -0
- sgis/geopandas_tools/buffer_dissolve_explode.py +25 -47
- sgis/geopandas_tools/conversion.py +18 -25
- sgis/geopandas_tools/duplicates.py +45 -60
- sgis/geopandas_tools/general.py +69 -114
- sgis/geopandas_tools/neighbors.py +25 -4
- sgis/geopandas_tools/overlay.py +178 -256
- sgis/geopandas_tools/polygon_operations.py +68 -88
- sgis/geopandas_tools/runners.py +326 -0
- sgis/geopandas_tools/sfilter.py +42 -24
- sgis/geopandas_tools/utils.py +37 -0
- sgis/helpers.py +1 -1
- sgis/io/dapla_functions.py +96 -107
- sgis/maps/map.py +3 -1
- sgis/parallel/parallel.py +32 -24
- sgis/raster/image_collection.py +184 -162
- sgis/raster/indices.py +0 -1
- {ssb_sgis-1.1.17.dist-info → ssb_sgis-1.2.1.dist-info}/METADATA +1 -1
- {ssb_sgis-1.1.17.dist-info → ssb_sgis-1.2.1.dist-info}/RECORD +22 -20
- {ssb_sgis-1.1.17.dist-info → ssb_sgis-1.2.1.dist-info}/LICENSE +0 -0
- {ssb_sgis-1.1.17.dist-info → ssb_sgis-1.2.1.dist-info}/WHEEL +0 -0
sgis/raster/image_collection.py
CHANGED
|
@@ -41,12 +41,6 @@ from shapely.geometry import MultiPolygon
|
|
|
41
41
|
from shapely.geometry import Point
|
|
42
42
|
from shapely.geometry import Polygon
|
|
43
43
|
|
|
44
|
-
try:
|
|
45
|
-
import dapla as dp
|
|
46
|
-
except ImportError:
|
|
47
|
-
pass
|
|
48
|
-
|
|
49
|
-
|
|
50
44
|
try:
|
|
51
45
|
from google.auth import exceptions
|
|
52
46
|
except ImportError:
|
|
@@ -59,12 +53,16 @@ except ImportError:
|
|
|
59
53
|
|
|
60
54
|
|
|
61
55
|
try:
|
|
56
|
+
from gcsfs import GCSFileSystem
|
|
62
57
|
from gcsfs.core import GCSFile
|
|
63
58
|
except ImportError:
|
|
64
59
|
|
|
65
60
|
class GCSFile:
|
|
66
61
|
"""Placeholder."""
|
|
67
62
|
|
|
63
|
+
class GCSFileSystem:
|
|
64
|
+
"""Placeholder."""
|
|
65
|
+
|
|
68
66
|
|
|
69
67
|
try:
|
|
70
68
|
from rioxarray.exceptions import NoDataInBounds
|
|
@@ -103,7 +101,6 @@ from ..geopandas_tools.conversion import to_geoseries
|
|
|
103
101
|
from ..geopandas_tools.conversion import to_shapely
|
|
104
102
|
from ..geopandas_tools.general import get_common_crs
|
|
105
103
|
from ..helpers import _fix_path
|
|
106
|
-
from ..helpers import get_all_files
|
|
107
104
|
from ..helpers import get_numpy_func
|
|
108
105
|
from ..helpers import is_method
|
|
109
106
|
from ..helpers import is_property
|
|
@@ -132,22 +129,23 @@ from .zonal import _zonal_post
|
|
|
132
129
|
if is_dapla():
|
|
133
130
|
|
|
134
131
|
def _ls_func(*args, **kwargs) -> list[str]:
|
|
135
|
-
return
|
|
132
|
+
return GCSFileSystem().ls(*args, **kwargs)
|
|
136
133
|
|
|
137
134
|
def _glob_func(*args, **kwargs) -> list[str]:
|
|
138
|
-
return
|
|
135
|
+
return GCSFileSystem().glob(*args, **kwargs)
|
|
139
136
|
|
|
140
137
|
def _open_func(*args, **kwargs) -> GCSFile:
|
|
141
|
-
return
|
|
142
|
-
|
|
143
|
-
def _read_parquet_func(*args, **kwargs) -> list[str]:
|
|
144
|
-
return dp.read_pandas(*args, **kwargs)
|
|
138
|
+
return GCSFileSystem().open(*args, **kwargs)
|
|
145
139
|
|
|
146
140
|
else:
|
|
147
|
-
|
|
141
|
+
|
|
142
|
+
def _ls_func(path):
|
|
143
|
+
return glob.glob(str(Path(path) / "**"), recursive=False)
|
|
144
|
+
|
|
145
|
+
def _glob_func(path, **kwargs):
|
|
146
|
+
return glob.glob(path, recursive=True, **kwargs)
|
|
147
|
+
|
|
148
148
|
_open_func = open
|
|
149
|
-
_glob_func = glob.glob
|
|
150
|
-
_read_parquet_func = pd.read_parquet
|
|
151
149
|
|
|
152
150
|
DATE_RANGES_TYPE = (
|
|
153
151
|
tuple[str | pd.Timestamp | None, str | pd.Timestamp | None]
|
|
@@ -181,15 +179,12 @@ ALLOWED_INIT_KWARGS = [
|
|
|
181
179
|
_LOAD_COUNTER: int = 0
|
|
182
180
|
|
|
183
181
|
|
|
184
|
-
def _get_child_paths_threaded(data: Sequence[str]) -> set[str]:
|
|
185
|
-
with ThreadPoolExecutor() as executor:
|
|
186
|
-
all_paths: Iterator[set[str]] = executor.map(_ls_func, data)
|
|
187
|
-
return set(itertools.chain.from_iterable(all_paths))
|
|
188
|
-
|
|
189
|
-
|
|
190
182
|
@dataclass
|
|
191
183
|
class PixelwiseResults:
|
|
192
|
-
"""Container of pixelwise results to be converted to numpy/geopandas.
|
|
184
|
+
"""Container of pixelwise results to be converted to dict/tuple/numpy/geopandas.
|
|
185
|
+
|
|
186
|
+
Not to be initialised by user.
|
|
187
|
+
"""
|
|
193
188
|
|
|
194
189
|
row_indices: np.ndarray
|
|
195
190
|
col_indices: np.ndarray
|
|
@@ -213,8 +208,34 @@ class PixelwiseResults:
|
|
|
213
208
|
)
|
|
214
209
|
}
|
|
215
210
|
|
|
211
|
+
def to_pandas(self, column: str | list[str] = "value") -> GeoDataFrame:
|
|
212
|
+
"""Return DataFrame with 2 dim index and values from the pixelwise operation."""
|
|
213
|
+
return pd.DataFrame(
|
|
214
|
+
{
|
|
215
|
+
**(
|
|
216
|
+
{col: [x[i] for x in self.results] for i, col in enumerate(column)}
|
|
217
|
+
if not isinstance(column, str)
|
|
218
|
+
else {column: self.results}
|
|
219
|
+
)
|
|
220
|
+
},
|
|
221
|
+
index=[self.row_indices, self.col_indices],
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
@property
|
|
225
|
+
def is_empty(self) -> bool:
|
|
226
|
+
"""Returns True if all band arrays in all images have shape (0,)."""
|
|
227
|
+
return not any(x for x in self.row_indices.shape) and not any(
|
|
228
|
+
x for x in self.col_indices.shape
|
|
229
|
+
)
|
|
230
|
+
|
|
216
231
|
def to_geopandas(self, column: str | list[str] = "value") -> GeoDataFrame:
|
|
217
232
|
"""Return GeoDataFrame with pixel geometries and values from the pixelwise operation."""
|
|
233
|
+
if self.is_empty:
|
|
234
|
+
if isinstance(column, str):
|
|
235
|
+
return GeoDataFrame({"geometry": [], column: []}, crs=self.crs)
|
|
236
|
+
return GeoDataFrame(
|
|
237
|
+
{"geometry": [], **{col: [] for col in column}}, crs=self.crs
|
|
238
|
+
)
|
|
218
239
|
resx, resy = _res_as_tuple(self.res)
|
|
219
240
|
|
|
220
241
|
# work ourselves inwards from the bottom left and top right corners
|
|
@@ -223,7 +244,6 @@ class PixelwiseResults:
|
|
|
223
244
|
maxys = np.full(self.row_indices.shape, maxy) - (self.row_indices * resy)
|
|
224
245
|
maxxs = minxs + resx
|
|
225
246
|
minys = maxys - resy
|
|
226
|
-
|
|
227
247
|
return GeoDataFrame(
|
|
228
248
|
{
|
|
229
249
|
**(
|
|
@@ -244,6 +264,8 @@ class PixelwiseResults:
|
|
|
244
264
|
|
|
245
265
|
def to_numpy(self) -> np.ndarray | tuple[np.ndarray, ...]:
|
|
246
266
|
"""Reshape pixelwise results to 2d numpy arrays in the shape of the full arrays of the image bands."""
|
|
267
|
+
if self.is_empty:
|
|
268
|
+
return tuple(np.array([]) for _ in range(len(self.results)))
|
|
247
269
|
try:
|
|
248
270
|
n_out_arrays = len(next(iter(self.results)))
|
|
249
271
|
except TypeError:
|
|
@@ -310,7 +332,7 @@ class ImageCollectionGroupBy:
|
|
|
310
332
|
) -> "ImageCollection":
|
|
311
333
|
"""Merge each group into separate Bands per band_id, returned as an ImageCollection."""
|
|
312
334
|
images = self._run_func_for_collection_groups(
|
|
313
|
-
|
|
335
|
+
_merge_by_band_as_func,
|
|
314
336
|
method=method,
|
|
315
337
|
bounds=bounds,
|
|
316
338
|
as_int=as_int,
|
|
@@ -342,7 +364,7 @@ class ImageCollectionGroupBy:
|
|
|
342
364
|
) -> "Image":
|
|
343
365
|
"""Merge each group into a single Band, returned as combined Image."""
|
|
344
366
|
bands: list[Band] = self._run_func_for_collection_groups(
|
|
345
|
-
|
|
367
|
+
_merge_as_func,
|
|
346
368
|
method=method,
|
|
347
369
|
bounds=bounds,
|
|
348
370
|
as_int=as_int,
|
|
@@ -405,7 +427,10 @@ class BandMasking:
|
|
|
405
427
|
|
|
406
428
|
|
|
407
429
|
class None_:
|
|
408
|
-
"""Default None for args that
|
|
430
|
+
"""Default None for args that should not be None.
|
|
431
|
+
|
|
432
|
+
In order to raise error only in some cases.
|
|
433
|
+
"""
|
|
409
434
|
|
|
410
435
|
def __new__(cls) -> None:
|
|
411
436
|
"""Always returns None."""
|
|
@@ -449,6 +474,27 @@ class _ImageBase:
|
|
|
449
474
|
self._from_array = False
|
|
450
475
|
self._from_geopandas = False
|
|
451
476
|
|
|
477
|
+
@property
|
|
478
|
+
def path(self) -> str:
|
|
479
|
+
try:
|
|
480
|
+
return self._path
|
|
481
|
+
except AttributeError as e:
|
|
482
|
+
raise PathlessImageError(self) from e
|
|
483
|
+
|
|
484
|
+
@property
|
|
485
|
+
def res(self) -> int:
|
|
486
|
+
"""Pixel resolution."""
|
|
487
|
+
return self._res
|
|
488
|
+
|
|
489
|
+
@abstractmethod
|
|
490
|
+
def union_all(self) -> Polygon | MultiPolygon:
|
|
491
|
+
pass
|
|
492
|
+
|
|
493
|
+
def assign(self, **kwargs) -> "_ImageBase":
|
|
494
|
+
for key, value in kwargs.items():
|
|
495
|
+
self._safe_setattr(key, value)
|
|
496
|
+
return self
|
|
497
|
+
|
|
452
498
|
def _safe_setattr(
|
|
453
499
|
self, key: str, value: Any, error_obj: Exception | None = None
|
|
454
500
|
) -> None:
|
|
@@ -471,16 +517,15 @@ class _ImageBase:
|
|
|
471
517
|
|
|
472
518
|
def _metadata_to_nested_dict(
|
|
473
519
|
self,
|
|
474
|
-
metadata:
|
|
520
|
+
metadata: dict | pd.DataFrame | None,
|
|
475
521
|
) -> dict[str, dict[str, Any]]:
|
|
476
|
-
"""Construct metadata dict from dictlike
|
|
522
|
+
"""Construct metadata dict from dictlike or DataFrame.
|
|
523
|
+
|
|
524
|
+
First level keys are are file paths, second level keys are attributes.
|
|
477
525
|
|
|
478
526
|
Extract metadata value:
|
|
479
527
|
>>> self.metadata[self.path]['cloud_cover_percentage']
|
|
480
528
|
"""
|
|
481
|
-
if isinstance(metadata, (str | Path | os.PathLike)):
|
|
482
|
-
metadata = _read_parquet_func(metadata)
|
|
483
|
-
|
|
484
529
|
if isinstance(metadata, pd.DataFrame):
|
|
485
530
|
|
|
486
531
|
def is_scalar(x) -> bool:
|
|
@@ -491,7 +536,7 @@ class _ImageBase:
|
|
|
491
536
|
"""Convert to None rowwise because pandas doesn't always."""
|
|
492
537
|
return x if not (is_scalar(x) and pd.isna(x)) else None
|
|
493
538
|
|
|
494
|
-
# to nested dict because pandas indexing gives rare KeyError with long strings
|
|
539
|
+
# to nested dict instead of pandas because pandas indexing gives rare KeyError with long strings
|
|
495
540
|
return {
|
|
496
541
|
_fix_path(path): {
|
|
497
542
|
attr: na_to_none(value) for attr, value in row.items()
|
|
@@ -516,40 +561,13 @@ class _ImageBase:
|
|
|
516
561
|
|
|
517
562
|
@property
|
|
518
563
|
def _common_init_kwargs_after_load(self) -> dict:
|
|
564
|
+
"""Some attributes can be wrong after loading the image."""
|
|
519
565
|
return {
|
|
520
566
|
k: v
|
|
521
567
|
for k, v in self._common_init_kwargs.items()
|
|
522
568
|
if k not in {"res", "metadata"}
|
|
523
569
|
}
|
|
524
570
|
|
|
525
|
-
@property
|
|
526
|
-
def path(self) -> str:
|
|
527
|
-
try:
|
|
528
|
-
return self._path
|
|
529
|
-
except AttributeError as e:
|
|
530
|
-
raise PathlessImageError(self) from e
|
|
531
|
-
|
|
532
|
-
@property
|
|
533
|
-
def res(self) -> int:
|
|
534
|
-
"""Pixel resolution."""
|
|
535
|
-
# if self._res is None:
|
|
536
|
-
# if self.has_array:
|
|
537
|
-
# self._res = _get_res_from_bounds(self.bounds, self.values.shape)
|
|
538
|
-
# else:
|
|
539
|
-
# with opener(self.path) as file:
|
|
540
|
-
# with rasterio.open(file) as src:
|
|
541
|
-
# self._res = src.res
|
|
542
|
-
return self._res
|
|
543
|
-
|
|
544
|
-
@abstractmethod
|
|
545
|
-
def union_all(self) -> Polygon | MultiPolygon:
|
|
546
|
-
pass
|
|
547
|
-
|
|
548
|
-
def assign(self, **kwargs) -> "_ImageBase":
|
|
549
|
-
for key, value in kwargs.items():
|
|
550
|
-
self._safe_setattr(key, value)
|
|
551
|
-
return self
|
|
552
|
-
|
|
553
571
|
def _name_regex_searcher(
|
|
554
572
|
self, group: str, patterns: tuple[re.Pattern]
|
|
555
573
|
) -> str | None:
|
|
@@ -648,21 +666,22 @@ class _ImageBase:
|
|
|
648
666
|
class _ImageBandBase(_ImageBase):
|
|
649
667
|
"""Common parent class of Image and Band."""
|
|
650
668
|
|
|
651
|
-
def intersects(
|
|
652
|
-
self, geometry: GeoDataFrame | GeoSeries | Geometry | tuple | _ImageBase
|
|
653
|
-
) -> bool:
|
|
654
|
-
if hasattr(geometry, "crs") and not pyproj.CRS(self.crs).equals(
|
|
655
|
-
pyproj.CRS(geometry.crs)
|
|
656
|
-
):
|
|
657
|
-
raise ValueError(f"crs mismatch: {self.crs} and {geometry.crs}")
|
|
658
|
-
return self.union_all().intersects(to_shapely(geometry))
|
|
659
|
-
|
|
660
669
|
def union_all(self) -> Polygon:
|
|
661
670
|
try:
|
|
662
671
|
return box(*self.bounds)
|
|
663
672
|
except TypeError:
|
|
664
673
|
return Polygon()
|
|
665
674
|
|
|
675
|
+
def intersects(
|
|
676
|
+
self, geometry: GeoDataFrame | GeoSeries | Geometry | tuple | _ImageBase
|
|
677
|
+
) -> bool:
|
|
678
|
+
crs_mismatch: bool = hasattr(geometry, "crs") and not pyproj.CRS(
|
|
679
|
+
self.crs
|
|
680
|
+
).equals(pyproj.CRS(geometry.crs))
|
|
681
|
+
if crs_mismatch:
|
|
682
|
+
raise ValueError(f"crs mismatch: {self.crs} and {geometry.crs}")
|
|
683
|
+
return self.union_all().intersects(to_shapely(geometry))
|
|
684
|
+
|
|
666
685
|
@property
|
|
667
686
|
def centroid(self) -> Point:
|
|
668
687
|
"""Centerpoint of the object."""
|
|
@@ -672,19 +691,19 @@ class _ImageBandBase(_ImageBase):
|
|
|
672
691
|
def year(self) -> str:
|
|
673
692
|
if hasattr(self, "_year") and self._year:
|
|
674
693
|
return self._year
|
|
675
|
-
return
|
|
694
|
+
return pd.to_datetime(self.date).year
|
|
676
695
|
|
|
677
696
|
@property
|
|
678
697
|
def month(self) -> str:
|
|
679
698
|
if hasattr(self, "_month") and self._month:
|
|
680
699
|
return self._month
|
|
681
|
-
return
|
|
700
|
+
return pd.to_datetime(self.date).month
|
|
682
701
|
|
|
683
702
|
@property
|
|
684
703
|
def day(self) -> str:
|
|
685
704
|
if hasattr(self, "_day") and self._day:
|
|
686
705
|
return self._day
|
|
687
|
-
return
|
|
706
|
+
return pd.to_datetime(self.date).day
|
|
688
707
|
|
|
689
708
|
@property
|
|
690
709
|
def name(self) -> str | None:
|
|
@@ -744,7 +763,7 @@ class _ImageBandBase(_ImageBase):
|
|
|
744
763
|
results = None
|
|
745
764
|
for i, file_content in enumerate(file_contents.values()):
|
|
746
765
|
if isinstance(value, str) and value in dir(self):
|
|
747
|
-
# method or a hardcoded value
|
|
766
|
+
# is method or a hardcoded value
|
|
748
767
|
value: Callable | Any = getattr(self, value)
|
|
749
768
|
|
|
750
769
|
if callable(value):
|
|
@@ -1305,13 +1324,14 @@ class Band(_ImageBandBase):
|
|
|
1305
1324
|
|
|
1306
1325
|
self._path = _fix_path(str(path))
|
|
1307
1326
|
|
|
1308
|
-
def apply(self,
|
|
1309
|
-
"""Apply a function to the
|
|
1310
|
-
|
|
1327
|
+
def apply(self, func_: Callable, copy: bool = True, **kwargs) -> "Band":
|
|
1328
|
+
"""Apply a function to the array."""
|
|
1329
|
+
copied = self.copy() if copy else self
|
|
1330
|
+
results = func_(copied, **kwargs)
|
|
1311
1331
|
if isinstance(results, Band):
|
|
1312
1332
|
return results
|
|
1313
|
-
|
|
1314
|
-
return
|
|
1333
|
+
copied.values = results
|
|
1334
|
+
return copied
|
|
1315
1335
|
|
|
1316
1336
|
def sample(self, size: int = 1000, mask: Any = None, **kwargs) -> "Image":
|
|
1317
1337
|
"""Take a random spatial sample area of the Band."""
|
|
@@ -1580,8 +1600,8 @@ class Image(_ImageBandBase):
|
|
|
1580
1600
|
f"'data' must be string, Path-like or a sequence of Band. Got {data}"
|
|
1581
1601
|
)
|
|
1582
1602
|
|
|
1583
|
-
self._res = res if not (callable(res) and res() is None) else None
|
|
1584
1603
|
self._path = _fix_path(data)
|
|
1604
|
+
self._res = res if not (callable(res) and res() is None) else None
|
|
1585
1605
|
|
|
1586
1606
|
if all_file_paths is None and self.path:
|
|
1587
1607
|
self._all_file_paths = _get_all_file_paths(self.path)
|
|
@@ -1592,9 +1612,11 @@ class Image(_ImageBandBase):
|
|
|
1592
1612
|
else:
|
|
1593
1613
|
self._all_file_paths = None
|
|
1594
1614
|
|
|
1595
|
-
if
|
|
1596
|
-
|
|
1597
|
-
|
|
1615
|
+
if (
|
|
1616
|
+
self.metadata is None
|
|
1617
|
+
or not len(self.metadata)
|
|
1618
|
+
and "metadata.json" in {Path(x).name for x in self._all_file_paths}
|
|
1619
|
+
):
|
|
1598
1620
|
with _open_func(
|
|
1599
1621
|
next(
|
|
1600
1622
|
iter(
|
|
@@ -1697,7 +1719,7 @@ class Image(_ImageBandBase):
|
|
|
1697
1719
|
|
|
1698
1720
|
with joblib.Parallel(n_jobs=self.processes, backend="threading") as parallel:
|
|
1699
1721
|
parallel(
|
|
1700
|
-
joblib.delayed(
|
|
1722
|
+
joblib.delayed(_load_as_func)(
|
|
1701
1723
|
band,
|
|
1702
1724
|
bounds=bounds,
|
|
1703
1725
|
indexes=indexes,
|
|
@@ -1787,10 +1809,11 @@ class Image(_ImageBandBase):
|
|
|
1787
1809
|
if len(band_values) > 1:
|
|
1788
1810
|
raise ValueError(f"Different {key} values in bands: {band_values}")
|
|
1789
1811
|
elif len(band_values):
|
|
1812
|
+
value = next(iter(band_values))
|
|
1790
1813
|
try:
|
|
1791
|
-
setattr(self, key,
|
|
1814
|
+
setattr(self, key, value)
|
|
1792
1815
|
except AttributeError:
|
|
1793
|
-
setattr(self, f"_{key}",
|
|
1816
|
+
setattr(self, f"_{key}", value)
|
|
1794
1817
|
|
|
1795
1818
|
def copy(self) -> "Image":
|
|
1796
1819
|
"""Copy the instance and its attributes."""
|
|
@@ -1799,12 +1822,18 @@ class Image(_ImageBandBase):
|
|
|
1799
1822
|
band._mask = copied._mask
|
|
1800
1823
|
return copied
|
|
1801
1824
|
|
|
1802
|
-
def apply(self,
|
|
1825
|
+
def apply(self, func_: Callable, copy: bool = True, **kwargs) -> "Image":
|
|
1803
1826
|
"""Apply a function to each band of the Image."""
|
|
1804
|
-
|
|
1805
|
-
|
|
1827
|
+
copied = self.copy() if copy else self
|
|
1828
|
+
with joblib.Parallel(n_jobs=copied.processes, backend="loky") as parallel:
|
|
1829
|
+
results = parallel(joblib.delayed(func_)(band, **kwargs) for band in copied)
|
|
1830
|
+
if all(isinstance(x, Band) for x in results):
|
|
1831
|
+
copied._bands = results
|
|
1832
|
+
elif all(hasattr(x, "shape") for x in results):
|
|
1833
|
+
for band, arr in zip(copied.bands, results, strict=True):
|
|
1834
|
+
band.values = arr
|
|
1806
1835
|
|
|
1807
|
-
return
|
|
1836
|
+
return copied
|
|
1808
1837
|
|
|
1809
1838
|
def ndvi(
|
|
1810
1839
|
self, red_band: str, nir_band: str, padding: int = 0, copy: bool = True
|
|
@@ -2132,40 +2161,36 @@ class ImageCollection(_ImageBase):
|
|
|
2132
2161
|
|
|
2133
2162
|
if hasattr(data, "__iter__") and not isinstance(data, str):
|
|
2134
2163
|
self._path = None
|
|
2135
|
-
|
|
2164
|
+
data_is_images: bool = all(isinstance(x, Image) for x in data)
|
|
2165
|
+
if data_is_images:
|
|
2136
2166
|
self.images = [x.copy() for x in data]
|
|
2137
2167
|
return
|
|
2138
|
-
|
|
2139
|
-
|
|
2168
|
+
data_is_paths: bool = all(
|
|
2169
|
+
isinstance(x, (str | Path | os.PathLike)) for x in data
|
|
2170
|
+
)
|
|
2171
|
+
if data_is_paths:
|
|
2140
2172
|
try:
|
|
2173
|
+
# adding band paths (asuming 'data' is a sequence of image paths)
|
|
2141
2174
|
self._all_file_paths = _get_child_paths_threaded(data) | {
|
|
2142
2175
|
_fix_path(x) for x in data
|
|
2143
2176
|
}
|
|
2144
2177
|
except FileNotFoundError as e:
|
|
2145
2178
|
if _from_root:
|
|
2146
|
-
raise
|
|
2179
|
+
raise ValueError(
|
|
2147
2180
|
"When passing 'root', 'data' must be a sequence of image file names that have 'root' as parent path."
|
|
2148
2181
|
) from e
|
|
2149
2182
|
raise e
|
|
2150
|
-
if self.level:
|
|
2151
|
-
self._all_file_paths = {
|
|
2152
|
-
path for path in self._all_file_paths if self.level in path
|
|
2153
|
-
}
|
|
2154
|
-
self._df = self._create_metadata_df(self._all_file_paths)
|
|
2155
|
-
return
|
|
2156
2183
|
|
|
2157
|
-
|
|
2184
|
+
elif isinstance(data, (str | Path | os.PathLike)):
|
|
2185
|
+
self._path = _fix_path(str(data))
|
|
2186
|
+
self._all_file_paths = _get_all_file_paths(self.path)
|
|
2187
|
+
else:
|
|
2158
2188
|
raise TypeError("'data' must be string, Path-like or a sequence of Image.")
|
|
2159
2189
|
|
|
2160
|
-
self._path = _fix_path(str(data))
|
|
2161
|
-
|
|
2162
|
-
self._all_file_paths = _get_all_file_paths(self.path)
|
|
2163
|
-
|
|
2164
2190
|
if self.level:
|
|
2165
2191
|
self._all_file_paths = {
|
|
2166
2192
|
path for path in self._all_file_paths if self.level in path
|
|
2167
2193
|
}
|
|
2168
|
-
|
|
2169
2194
|
self._df = self._create_metadata_df(self._all_file_paths)
|
|
2170
2195
|
|
|
2171
2196
|
def groupby(
|
|
@@ -2230,16 +2255,15 @@ class ImageCollection(_ImageBase):
|
|
|
2230
2255
|
pass
|
|
2231
2256
|
return copied
|
|
2232
2257
|
|
|
2233
|
-
def apply(self,
|
|
2234
|
-
"""Apply a function to
|
|
2235
|
-
|
|
2236
|
-
|
|
2237
|
-
|
|
2238
|
-
for img in
|
|
2239
|
-
for band in img
|
|
2258
|
+
def apply(self, func_: Callable, copy: bool = True, **kwargs) -> "ImageCollection":
|
|
2259
|
+
"""Apply a function to each image of the collection."""
|
|
2260
|
+
copied = self.copy() if copy else self
|
|
2261
|
+
with joblib.Parallel(n_jobs=copied.processes, backend="loky") as parallel:
|
|
2262
|
+
copied.images = parallel(
|
|
2263
|
+
joblib.delayed(func_)(img, **kwargs) for img in copied
|
|
2240
2264
|
)
|
|
2241
2265
|
|
|
2242
|
-
return
|
|
2266
|
+
return copied
|
|
2243
2267
|
|
|
2244
2268
|
def pixelwise(
|
|
2245
2269
|
self,
|
|
@@ -2254,6 +2278,18 @@ class ImageCollection(_ImageBase):
|
|
|
2254
2278
|
The function should take a 1d array as first argument. This will be
|
|
2255
2279
|
the pixel values for all bands in all images in the collection.
|
|
2256
2280
|
"""
|
|
2281
|
+
if not len(self):
|
|
2282
|
+
return PixelwiseResults(
|
|
2283
|
+
np.array([]),
|
|
2284
|
+
np.array([]),
|
|
2285
|
+
np.array([]),
|
|
2286
|
+
shape=(0,),
|
|
2287
|
+
res=self.res,
|
|
2288
|
+
bounds=None,
|
|
2289
|
+
crs=None,
|
|
2290
|
+
nodata=self.nodata or np.nan,
|
|
2291
|
+
)
|
|
2292
|
+
|
|
2257
2293
|
values = np.array([band.values for img in self for band in img])
|
|
2258
2294
|
|
|
2259
2295
|
if (
|
|
@@ -2594,7 +2630,7 @@ class ImageCollection(_ImageBase):
|
|
|
2594
2630
|
)
|
|
2595
2631
|
|
|
2596
2632
|
parallel(
|
|
2597
|
-
joblib.delayed(
|
|
2633
|
+
joblib.delayed(_load_as_func)(
|
|
2598
2634
|
band,
|
|
2599
2635
|
bounds=bounds,
|
|
2600
2636
|
indexes=indexes,
|
|
@@ -2798,25 +2834,6 @@ class ImageCollection(_ImageBase):
|
|
|
2798
2834
|
return combine_by_coords(list(xarrs.values()))
|
|
2799
2835
|
# return Dataset(xarrs)
|
|
2800
2836
|
|
|
2801
|
-
def to_geopandas(self, column: str = "value") -> dict[str, GeoDataFrame]:
|
|
2802
|
-
"""Convert each band in each Image to a GeoDataFrame."""
|
|
2803
|
-
out = {}
|
|
2804
|
-
i = 0
|
|
2805
|
-
for img in self:
|
|
2806
|
-
for band in img:
|
|
2807
|
-
i += 1
|
|
2808
|
-
try:
|
|
2809
|
-
name = band.name
|
|
2810
|
-
except AttributeError:
|
|
2811
|
-
name = None
|
|
2812
|
-
|
|
2813
|
-
if name is None:
|
|
2814
|
-
name = f"{self.__class__.__name__}({i})"
|
|
2815
|
-
|
|
2816
|
-
if name not in out:
|
|
2817
|
-
out[name] = band.to_geopandas(column=column)
|
|
2818
|
-
return out
|
|
2819
|
-
|
|
2820
2837
|
def sample(self, n: int = 1, size: int = 500) -> "ImageCollection":
|
|
2821
2838
|
"""Sample one or more areas of a given size and set this as mask for the images."""
|
|
2822
2839
|
unioned = self.union_all()
|
|
@@ -2894,6 +2911,11 @@ class ImageCollection(_ImageBase):
|
|
|
2894
2911
|
if isinstance(item, int):
|
|
2895
2912
|
return self.images[item]
|
|
2896
2913
|
|
|
2914
|
+
if isinstance(item, str):
|
|
2915
|
+
return self._metadata_attribute_collection_type(
|
|
2916
|
+
[getattr(img, item) for img in self]
|
|
2917
|
+
)
|
|
2918
|
+
|
|
2897
2919
|
if isinstance(item, slice):
|
|
2898
2920
|
copied = self.copy()
|
|
2899
2921
|
copied.images = copied.images[item]
|
|
@@ -2922,7 +2944,6 @@ class ImageCollection(_ImageBase):
|
|
|
2922
2944
|
# check for base bool and numpy bool
|
|
2923
2945
|
if all("bool" in str(type(x)) for x in item):
|
|
2924
2946
|
copied.images = [img for x, img in zip(item, copied, strict=True) if x]
|
|
2925
|
-
|
|
2926
2947
|
else:
|
|
2927
2948
|
copied.images = [copied.images[i] for i in item]
|
|
2928
2949
|
return copied
|
|
@@ -2981,7 +3002,7 @@ class ImageCollection(_ImageBase):
|
|
|
2981
3002
|
)
|
|
2982
3003
|
|
|
2983
3004
|
@images.setter
|
|
2984
|
-
def images(self, new_value: list["Image"]) ->
|
|
3005
|
+
def images(self, new_value: list["Image"]) -> None:
|
|
2985
3006
|
new_value = list(new_value)
|
|
2986
3007
|
if not new_value:
|
|
2987
3008
|
self._images = new_value
|
|
@@ -2994,10 +3015,20 @@ class ImageCollection(_ImageBase):
|
|
|
2994
3015
|
img._bands = [new_value[i]]
|
|
2995
3016
|
new_images.append(img)
|
|
2996
3017
|
self._images = new_images
|
|
2997
|
-
|
|
2998
|
-
|
|
3018
|
+
elif all(hasattr(x, "shape") for x in new_value):
|
|
3019
|
+
for img, arr in zip(self._images, new_value, strict=True):
|
|
3020
|
+
img._bands = [
|
|
3021
|
+
Band(
|
|
3022
|
+
arr,
|
|
3023
|
+
bounds=img.bounds,
|
|
3024
|
+
crs=img.crs,
|
|
3025
|
+
**img._common_init_kwargs_after_load,
|
|
3026
|
+
)
|
|
3027
|
+
]
|
|
3028
|
+
elif not all(isinstance(x, Image) for x in new_value):
|
|
2999
3029
|
raise TypeError("images should be a sequence of Image.")
|
|
3000
|
-
|
|
3030
|
+
else:
|
|
3031
|
+
self._images = new_value
|
|
3001
3032
|
|
|
3002
3033
|
def union_all(self) -> Polygon | MultiPolygon:
|
|
3003
3034
|
"""(Multi)Polygon representing the union of all image bounds."""
|
|
@@ -3098,7 +3129,7 @@ class ImageCollection(_ImageBase):
|
|
|
3098
3129
|
root = f" root='{next(iter(parents))}',"
|
|
3099
3130
|
else:
|
|
3100
3131
|
data = [img for img in self]
|
|
3101
|
-
return f"{self.__class__.__name__}({data},{root} res={self.res}, level='{self.level}')"
|
|
3132
|
+
return f"{self.__class__.__name__}({data},{root} res={self.res}, level='{self.level}', processes={self.processes})"
|
|
3102
3133
|
|
|
3103
3134
|
|
|
3104
3135
|
class Sentinel2Config:
|
|
@@ -3171,7 +3202,6 @@ class Sentinel2Config:
|
|
|
3171
3202
|
xml_file,
|
|
3172
3203
|
(
|
|
3173
3204
|
r'<BOA_QUANTIFICATION_VALUE unit="none">(\d+)</BOA_QUANTIFICATION_VALUE>',
|
|
3174
|
-
# r'<BOA_QUANTIFICATION_VALUE unit="none">-?(\d+)</BOA_QUANTIFICATION_VALUE>',
|
|
3175
3205
|
r'<QUANTIFICATION_VALUE unit="none">?(\d+)</QUANTIFICATION_VALUE>',
|
|
3176
3206
|
),
|
|
3177
3207
|
)
|
|
@@ -3390,21 +3420,13 @@ def _clip_xarray(
|
|
|
3390
3420
|
|
|
3391
3421
|
|
|
3392
3422
|
def _get_all_file_paths(path: str) -> set[str]:
|
|
3393
|
-
|
|
3394
|
-
|
|
3395
|
-
|
|
3396
|
-
|
|
3397
|
-
|
|
3398
|
-
|
|
3399
|
-
|
|
3400
|
-
_glob_func(path + "/**")
|
|
3401
|
-
+ _glob_func(path + "/**/**")
|
|
3402
|
-
+ _glob_func(path + "/**/**/**")
|
|
3403
|
-
+ _glob_func(path + "/**/**/**/**")
|
|
3404
|
-
+ _glob_func(path + "/**/**/**/**/**")
|
|
3405
|
-
)
|
|
3406
|
-
)
|
|
3407
|
-
}
|
|
3423
|
+
return {_fix_path(x) for x in sorted(set(_glob_func(path + "/**")))}
|
|
3424
|
+
|
|
3425
|
+
|
|
3426
|
+
def _get_child_paths_threaded(data: Sequence[str]) -> set[str]:
|
|
3427
|
+
with ThreadPoolExecutor() as executor:
|
|
3428
|
+
all_paths: Iterator[set[str]] = executor.map(_ls_func, data)
|
|
3429
|
+
return set(itertools.chain.from_iterable(all_paths))
|
|
3408
3430
|
|
|
3409
3431
|
|
|
3410
3432
|
def _get_images(
|
|
@@ -3584,19 +3606,19 @@ def _read_mask_array(self: Band | Image, **kwargs) -> np.ndarray:
|
|
|
3584
3606
|
return boolean_mask
|
|
3585
3607
|
|
|
3586
3608
|
|
|
3587
|
-
def
|
|
3609
|
+
def _load_as_func(band: Band, **kwargs) -> Band:
|
|
3588
3610
|
return band.load(**kwargs)
|
|
3589
3611
|
|
|
3590
3612
|
|
|
3591
|
-
def
|
|
3592
|
-
return band.apply(
|
|
3613
|
+
def _apply_as_func(band: Band | Image, func_: Callable, **kwargs) -> Band:
|
|
3614
|
+
return band.apply(func_, **kwargs)
|
|
3593
3615
|
|
|
3594
3616
|
|
|
3595
|
-
def
|
|
3617
|
+
def _merge_by_band_as_func(collection: ImageCollection, **kwargs) -> Image:
|
|
3596
3618
|
return collection.merge_by_band(**kwargs)
|
|
3597
3619
|
|
|
3598
3620
|
|
|
3599
|
-
def
|
|
3621
|
+
def _merge_as_func(collection: ImageCollection, **kwargs) -> Band:
|
|
3600
3622
|
return collection.merge(**kwargs)
|
|
3601
3623
|
|
|
3602
3624
|
|
|
@@ -3749,4 +3771,4 @@ def pixelwise(
|
|
|
3749
3771
|
)
|
|
3750
3772
|
)
|
|
3751
3773
|
|
|
3752
|
-
return nonmissing_row_indices, nonmissing_col_indices, results
|
|
3774
|
+
return (nonmissing_row_indices, nonmissing_col_indices, results)
|