PyPI - ssb-sgis - Versions diffs - 1.0.5__py3-none-any.whl → 1.0.7__py3-none-any.whl - Mend

ssb-sgis 1.0.5py3-none-any.whl → 1.0.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

sgis/__init__.py +5 -5
sgis/debug_config.py +1 -0
sgis/geopandas_tools/buffer_dissolve_explode.py +3 -40
sgis/geopandas_tools/conversion.py +37 -9
sgis/geopandas_tools/general.py +330 -106
sgis/geopandas_tools/geometry_types.py +38 -33
sgis/helpers.py +8 -2
sgis/io/dapla_functions.py +33 -17
sgis/maps/explore.py +24 -34
sgis/maps/map.py +8 -1
sgis/maps/maps.py +0 -1
sgis/networkanalysis/closing_network_holes.py +100 -22
sgis/networkanalysis/cutting_lines.py +4 -147
sgis/networkanalysis/finding_isolated_networks.py +6 -0
sgis/networkanalysis/nodes.py +4 -110
sgis/parallel/parallel.py +267 -182
sgis/raster/base.py +0 -54
sgis/raster/image_collection.py +1425 -1149
sgis/raster/indices.py +2 -95
sgis/raster/regex.py +151 -0
sgis/raster/sentinel_config.py +1 -62
{ssb_sgis-1.0.5.dist-info → ssb_sgis-1.0.7.dist-info}/METADATA +1 -1
{ssb_sgis-1.0.5.dist-info → ssb_sgis-1.0.7.dist-info}/RECORD +25 -27
sgis/raster/cube.py +0 -1274
sgis/raster/cubebase.py +0 -25
sgis/raster/raster.py +0 -1475
{ssb_sgis-1.0.5.dist-info → ssb_sgis-1.0.7.dist-info}/LICENSE +0 -0
{ssb_sgis-1.0.5.dist-info → ssb_sgis-1.0.7.dist-info}/WHEEL +0 -0

sgis/raster/image_collection.py CHANGED Viewed

@@ -2,14 +2,15 @@ import datetime
 import functools
 import glob
 import itertools
-import math
 import os
 import random
 import re
+import time
 from collections.abc import Callable
 from collections.abc import Iterable
 from collections.abc import Iterator
 from collections.abc import Sequence
+from concurrent.futures import ThreadPoolExecutor
 from copy import deepcopy
 from dataclasses import dataclass
 from pathlib import Path
@@ -26,9 +27,8 @@ from affine import Affine
 from geopandas import GeoDataFrame
 from geopandas import GeoSeries
 from matplotlib.colors import LinearSegmentedColormap
+from pandas.api.types import is_dict_like
 from rasterio.enums import MergeAlg
-from rtree.index import Index
-from rtree.index import Property
 from scipy import stats
 from scipy.ndimage import binary_dilation
 from scipy.ndimage import binary_erosion
@@ -49,24 +49,15 @@ except ImportError:
 try:
-    from rioxarray.exceptions import NoDataInBounds
-    from rioxarray.merge import merge_arrays
-    from rioxarray.rioxarray import _generate_spatial_coords
-except ImportError:
-    pass
-try:
-    import xarray as xr
-    from xarray import DataArray
+    from google.auth import exceptions
 except ImportError:
-    class DataArray:
+    class exceptions:
         """Placeholder."""
+        class RefreshError:
+            """Placeholder."""
-try:
-    import torch
-except ImportError:
-    pass
 try:
     from gcsfs.core import GCSFile
@@ -77,33 +68,31 @@ except ImportError:
 try:
-    from torchgeo.datasets.utils import disambiguate_timestamp
+    from rioxarray.exceptions import NoDataInBounds
+    from rioxarray.merge import merge_arrays
+    from rioxarray.rioxarray import _generate_spatial_coords
 except ImportError:
-    class torch:
-        """Placeholder."""
-        class Tensor:
-            """Placeholder to reference torch.Tensor."""
+    pass
 try:
-    from torchgeo.datasets.utils import BoundingBox
+    import xarray as xr
+    from xarray import DataArray
+    from xarray import Dataset
 except ImportError:
-    class BoundingBox:
+    class DataArray:
         """Placeholder."""
-        def __init__(self, *args, **kwargs) -> None:
-            """Placeholder."""
-            raise ImportError("missing optional dependency 'torchgeo'")
+    class Dataset:
+        """Placeholder."""
 from ..geopandas_tools.bounds import get_total_bounds
 from ..geopandas_tools.conversion import to_bbox
 from ..geopandas_tools.conversion import to_gdf
+from ..geopandas_tools.conversion import to_geoseries
 from ..geopandas_tools.conversion import to_shapely
 from ..geopandas_tools.general import get_common_crs
+from ..helpers import _fix_path
 from ..helpers import get_all_files
 from ..helpers import get_numpy_func
 from ..io._is_dapla import is_dapla
@@ -115,6 +104,11 @@ from .base import _get_shape_from_bounds
 from .base import _get_transform_from_bounds
 from .base import get_index_mapper
 from .indices import ndvi
+from .regex import _extract_regex_match_from_string
+from .regex import _get_first_group_match
+from .regex import _get_non_optional_groups
+from .regex import _get_regexes_matches_for_df
+from .regex import _RegexError
 from .zonal import _aggregate
 from .zonal import _make_geometry_iterrows
 from .zonal import _no_overlap_df
@@ -132,9 +126,6 @@ if is_dapla():
     def _open_func(*args, **kwargs) -> GCSFile:
         return dp.FileClient.get_gcs_file_system().open(*args, **kwargs)
-    def _rm_file_func(*args, **kwargs) -> None:
-        return dp.FileClient.get_gcs_file_system().rm_file(*args, **kwargs)
     def _read_parquet_func(*args, **kwargs) -> list[str]:
         return dp.read_pandas(*args, **kwargs)
@@ -142,22 +133,25 @@ else:
     _ls_func = functools.partial(get_all_files, recursive=False)
     _open_func = open
     _glob_func = glob.glob
-    _rm_file_func = os.remove
     _read_parquet_func = pd.read_parquet
-TORCHGEO_RETURN_TYPE = dict[str, torch.Tensor | pyproj.CRS | BoundingBox]
+DATE_RANGES_TYPE = (
+    tuple[str | pd.Timestamp | None, str | pd.Timestamp | None]
+    | tuple[tuple[str | pd.Timestamp | None, str | pd.Timestamp | None], ...]
+)
 FILENAME_COL_SUFFIX = "_filename"
 DEFAULT_FILENAME_REGEX = r"""
     .*?
-    (?:_(?P<date>\d{8}(?:T\d{6})?))?  # Optional date group
+    (?:_?(?P<date>\d{8}(?:T\d{6})?))?  # Optional underscore and date group
     .*?
-    (?:_(?P<band>B\d{1,2}A|B\d{1,2}))?  # Optional band group
+    (?:_?(?P<band>B\d{1,2}A|B\d{1,2}))?  # Optional underscore and band group
     \.(?:tif|tiff|jp2)$  # End with .tif, .tiff, or .jp2
 """
 DEFAULT_IMAGE_REGEX = r"""
     .*?
-    (?:_(?P<date>\d{8}(?:T\d{6})?))?  # Optional date group
-    (?:_(?P<band>B\d{1,2}A|B\d{1,2}))?  # Optional band group
+    (?:_?(?P<date>\d{8}(?:T\d{6})?))?  # Optional underscore and date group
 """
 ALLOWED_INIT_KWARGS = [
@@ -165,15 +159,21 @@ ALLOWED_INIT_KWARGS = [
     "band_class",
     "image_regexes",
     "filename_regexes",
-    "date_format",
-    "cloud_cover_regexes",
-    "bounds_regexes",
     "all_bands",
     "crs",
+    "backend",
     "masking",
     "_merged",
 ]
+_load_counter: int = 0
+def _get_child_paths_threaded(data: Sequence[str]) -> set[str]:
+    with ThreadPoolExecutor() as executor:
+        all_paths: Iterator[set[str]] = executor.map(_ls_func, data)
+    return set(itertools.chain.from_iterable(all_paths))
 class ImageCollectionGroupBy:
     """Iterator and merger class returned from groupby.
@@ -225,7 +225,6 @@ class ImageCollectionGroupBy:
         collection = ImageCollection(
             images,
-            # TODO band_class?
             level=self.collection.level,
             **self.collection._common_init_kwargs,
         )
@@ -263,7 +262,6 @@ class ImageCollectionGroupBy:
         image = Image(
             bands,
-            # TODO band_class?
             **self.collection._common_init_kwargs,
         )
         image._merged = True
@@ -295,29 +293,40 @@ class ImageCollectionGroupBy:
 @dataclass(frozen=True)
 class BandMasking:
-    """Basically a frozen dict with forced keys."""
+    """Frozen dict with forced keys."""
     band_id: str
-    values: tuple[int]
+    values: Sequence[int] | dict[int, Any]
     def __getitem__(self, item: str) -> Any:
         """Index into attributes to mimick dict."""
         return getattr(self, item)
+class None_:
+    """Default value for keyword arguments that should not have a default."""
 class _ImageBase:
     image_regexes: ClassVar[str | None] = (DEFAULT_IMAGE_REGEX,)
     filename_regexes: ClassVar[str | tuple[str]] = (DEFAULT_FILENAME_REGEX,)
-    date_format: ClassVar[str] = "%Y%m%d"  # T%H%M%S"
+    metadata_attributes: ClassVar[dict | None] = None
     masking: ClassVar[BandMasking | None] = None
-    def __init__(self, **kwargs) -> None:
+    def __init__(self, *, metadata=None, bbox=None, **kwargs) -> None:
         self._mask = None
         self._bounds = None
         self._merged = False
         self._from_array = False
         self._from_gdf = False
+        self.metadata_attributes = self.metadata_attributes or {}
+        self._path = None
+        self._metadata_from_xml = False
+        self._bbox = to_bbox(bbox) if bbox is not None else None
+        self.metadata = self._metadata_to_nested_dict(metadata)
         if self.filename_regexes:
             if isinstance(self.filename_regexes, str):
@@ -346,14 +355,45 @@ class _ImageBase:
                     f"{self.__class__.__name__} got an unexpected keyword argument '{key}'"
                 )
+    @staticmethod
+    def _metadata_to_nested_dict(
+        metadata: str | Path | os.PathLike | dict | pd.DataFrame | None,
+    ) -> dict[str, dict[str, Any]] | None:
+        if metadata is None:
+            return {}
+        if isinstance(metadata, (str | Path | os.PathLike)):
+            metadata = _read_parquet_func(metadata)
+        if isinstance(metadata, pd.DataFrame):
+            def is_scalar(x) -> bool:
+                return not hasattr(x, "__len__") or len(x) <= 1
+            def na_to_none(x) -> None:
+                """Convert to None rowwise because pandas doesn't always."""
+                return x if not (is_scalar(x) and pd.isna(x)) else None
+            # to nested dict because pandas indexing gives rare KeyError with long strings
+            metadata = {
+                _fix_path(path): {
+                    attr: na_to_none(value) for attr, value in row.items()
+                }
+                for path, row in metadata.iterrows()
+            }
+        elif is_dict_like(metadata):
+            metadata = {_fix_path(path): value for path, value in metadata.items()}
+        return metadata
     @property
     def _common_init_kwargs(self) -> dict:
         return {
-            "file_system": self.file_system,
             "processes": self.processes,
             "res": self.res,
             "bbox": self._bbox,
             "nodata": self.nodata,
+            "backend": self.backend,
+            "metadata": self.metadata,
         }
     @property
@@ -373,6 +413,14 @@ class _ImageBase:
         """Centerpoint of the object."""
         return self.union_all().centroid
+    def assign(self, **kwargs) -> "_ImageBase":
+        for key, value in kwargs.items():
+            try:
+                setattr(self, key, value)
+            except AttributeError:
+                setattr(self, f"_{key}", value)
+        return self
     def _name_regex_searcher(
         self, group: str, patterns: tuple[re.Pattern]
     ) -> str | None:
@@ -381,46 +429,55 @@ class _ImageBase:
         for pat in patterns:
             try:
                 return _get_first_group_match(pat, self.name)[group]
-                return re.match(pat, self.name).group(group)
             except (TypeError, KeyError):
                 pass
+        if isinstance(self, Band):
+            for pat in patterns:
+                try:
+                    return _get_first_group_match(
+                        pat, str(Path(self.path).parent.name)
+                    )[group]
+                except (TypeError, KeyError):
+                    pass
         if not any(group in _get_non_optional_groups(pat) for pat in patterns):
             return None
+        band_text = (
+            f" or {Path(self.path).parent.name!s}" if isinstance(self, Band) else ""
+        )
         raise ValueError(
-            f"Couldn't find group '{group}' in name {self.name} with regex patterns {patterns}"
+            f"Couldn't find group '{group}' in name {self.name}{band_text} with regex patterns {patterns}"
         )
-    def _create_metadata_df(self, file_paths: list[str]) -> pd.DataFrame:
+    def _create_metadata_df(self, file_paths: Sequence[str]) -> pd.DataFrame:
         """Create a dataframe with file paths and image paths that match regexes."""
-        df = pd.DataFrame({"file_path": file_paths})
+        df = pd.DataFrame({"file_path": list(file_paths)})
-        df["filename"] = df["file_path"].apply(lambda x: _fix_path(Path(x).name))
+        df["file_name"] = df["file_path"].apply(lambda x: Path(x).name)
-        if not self.single_banded:
-            df["image_path"] = df["file_path"].apply(
-                lambda x: _fix_path(str(Path(x).parent))
-            )
-        else:
-            df["image_path"] = df["file_path"]
+        df["image_path"] = df["file_path"].apply(
+            lambda x: _fix_path(str(Path(x).parent))
+        )
         if not len(df):
             return df
+        df = df[~df["file_path"].isin(df["image_path"])]
         if self.filename_patterns:
-            df = _get_regexes_matches_for_df(df, "filename", self.filename_patterns)
+            df = _get_regexes_matches_for_df(df, "file_name", self.filename_patterns)
             if not len(df):
                 return df
             grouped = df.drop_duplicates("image_path").set_index("image_path")
-            for col in ["file_path", "filename"]:
+            for col in ["file_path", "file_name"]:
                 if col in df:
                     grouped[col] = df.groupby("image_path")[col].apply(tuple)
             grouped = grouped.reset_index()
         else:
             df["file_path"] = df.groupby("image_path")["file_path"].apply(tuple)
-            df["filename"] = df.groupby("image_path")["filename"].apply(tuple)
+            df["file_name"] = df.groupby("image_path")["file_name"].apply(tuple)
             grouped = df.drop_duplicates("image_path")
         grouped["imagename"] = grouped["image_path"].apply(
@@ -446,8 +503,19 @@ class _ImageBase:
                 continue
         return copied
+    def equals(self, other) -> bool:
+        for key, value in self.__dict__.items():
+            if key.startswith("_"):
+                continue
+            if value != getattr(other, key):
+                print(key, value, getattr(other, key))
+                return False
+        return True
 class _ImageBandBase(_ImageBase):
+    """Common parent class of Image and Band."""
     def intersects(self, other: GeoDataFrame | GeoSeries | Geometry) -> bool:
         if hasattr(other, "crs") and not pyproj.CRS(self.crs).equals(
             pyproj.CRS(other.crs)
@@ -455,6 +523,12 @@ class _ImageBandBase(_ImageBase):
             raise ValueError(f"crs mismatch: {self.crs} and {other.crs}")
         return self.union_all().intersects(to_shapely(other))
+    def union_all(self) -> Polygon:
+        try:
+            return box(*self.bounds)
+        except TypeError:
+            return Polygon()
     @property
     def mask_percentage(self) -> float:
         return self.mask.values.sum() / (self.mask.width * self.mask.height) * 100
@@ -477,7 +551,7 @@ class _ImageBandBase(_ImageBase):
             return self._name
         try:
             return Path(self.path).name
-        except (ValueError, AttributeError):
+        except (ValueError, AttributeError, TypeError):
             return None
     @name.setter
@@ -488,37 +562,101 @@ class _ImageBandBase(_ImageBase):
     def stem(self) -> str | None:
         try:
             return Path(self.path).stem
-        except (AttributeError, ValueError):
+        except (AttributeError, ValueError, TypeError):
             return None
     @property
     def level(self) -> str:
         return self._name_regex_searcher("level", self.image_patterns)
-    @property
-    def mint(self) -> float:
-        return disambiguate_timestamp(self.date, self.date_format)[0]
+    def _get_metadata_attributes(self, metadata_attributes: dict) -> dict:
-    @property
-    def maxt(self) -> float:
-        return disambiguate_timestamp(self.date, self.date_format)[1]
+        self._metadata_from_xml = True
-    def union_all(self) -> Polygon:
-        try:
-            return box(*self.bounds)
-        except TypeError:
-            return Polygon()
+        missing_metadata_attributes = {
+            key: value
+            for key, value in metadata_attributes.items()
+            if not hasattr(self, key) or getattr(self, key) is None
+        }
-    @property
-    def torch_bbox(self) -> BoundingBox:
-        bounds = GeoSeries([self.union_all()]).bounds
-        return BoundingBox(
-            minx=bounds.minx[0],
-            miny=bounds.miny[0],
-            maxx=bounds.maxx[0],
-            maxy=bounds.maxy[0],
-            mint=self.mint,
-            maxt=self.maxt,
+        nonmissing_metadata_attributes = {
+            key: getattr(self, key)
+            for key in metadata_attributes
+            if key not in missing_metadata_attributes
+        }
+        if not missing_metadata_attributes:
+            return nonmissing_metadata_attributes
+        file_contents: list[str] = []
+        for path in self._all_file_paths:
+            if ".xml" not in path:
+                continue
+            with _open_func(path, "rb") as file:
+                file_contents.append(file.read().decode("utf-8"))
+        for key, value in missing_metadata_attributes.items():
+            results = None
+            for i, filetext in enumerate(file_contents):
+                if isinstance(value, str) and value in dir(self):
+                    method = getattr(self, value)
+                    try:
+                        results = method(filetext)
+                    except _RegexError as e:
+                        if i == len(self._all_file_paths) - 1:
+                            raise e
+                        continue
+                    if results is not None:
+                        break
+                if callable(value):
+                    try:
+                        results = value(filetext)
+                    except _RegexError as e:
+                        if i == len(self._all_file_paths) - 1:
+                            raise e
+                        continue
+                    if results is not None:
+                        break
+                try:
+                    results = _extract_regex_match_from_string(filetext, value)
+                except _RegexError as e:
+                    if i == len(self._all_file_paths) - 1:
+                        raise e
+            missing_metadata_attributes[key] = results
+        return missing_metadata_attributes | nonmissing_metadata_attributes
+    def _to_xarray(self, array: np.ndarray, transform: Affine) -> DataArray:
+        """Convert the raster to  an xarray.DataArray."""
+        if len(array.shape) == 2:
+            height, width = array.shape
+            dims = ["y", "x"]
+        elif len(array.shape) == 3:
+            height, width = array.shape[1:]
+            dims = ["band", "y", "x"]
+        else:
+            raise ValueError(
+                f"Array should be 2 or 3 dimensional. Got shape {array.shape}"
+            )
+        coords = _generate_spatial_coords(transform, width, height)
+        attrs = {"crs": self.crs}
+        for attr in set(self.metadata_attributes).union({"date"}):
+            try:
+                attrs[attr] = getattr(self, attr)
+            except Exception:
+                pass
+        return DataArray(
+            array,
+            coords=coords,
+            dims=dims,
+            name=self.name or self.__class__.__name__,
+            attrs=attrs,
         )
@@ -526,6 +664,7 @@ class Band(_ImageBandBase):
     """Band holding a single 2 dimensional array representing an image band."""
     cmap: ClassVar[str | None] = None
+    backend: str = "numpy"
     @classmethod
     def from_gdf(
@@ -557,42 +696,52 @@ class Band(_ImageBandBase):
     def __init__(
         self,
-        data: str | np.ndarray,
-        res: int | None,
+        data: str | np.ndarray | None = None,
+        res: int | None_ = None_,
         crs: Any | None = None,
         bounds: tuple[float, float, float, float] | None = None,
-        cmap: str | None = None,
+        nodata: int | None = None,
+        mask: "Band | None" = None,
+        processes: int = 1,
         name: str | None = None,
-        file_system: GCSFileSystem | None = None,
         band_id: str | None = None,
-        processes: int = 1,
-        bbox: GeoDataFrame | GeoSeries | Geometry | tuple[float] | None = None,
-        mask: "Band | None" = None,
-        nodata: int | None = None,
+        cmap: str | None = None,
+        all_file_paths: list[str] | None = None,
         **kwargs,
     ) -> None:
         """Band initialiser."""
+        if callable(res) and isinstance(res(), None_):
+            raise TypeError("Must specify 'res'")
+        if data is None:
+            # allowing 'path' to replace 'data' as argument
+            # to make the print repr. valid as initialiser
+            if "path" not in kwargs:
+                raise TypeError("Must specify either 'data' or 'path'.")
+            data = kwargs.pop("path")
         super().__init__(**kwargs)
+        if isinstance(data, (str | Path | os.PathLike)) and any(
+            arg is not None for arg in [crs, bounds]
+        ):
+            raise ValueError("Can only specify 'bounds' and 'crs' if data is an array.")
         self._mask = mask
-        self._bbox = to_bbox(bbox) if bbox is not None else None
         self._values = None
-        self._crs = None
         self.nodata = nodata
+        self._crs = crs
         bounds = to_bbox(bounds) if bounds is not None else None
         self._bounds = bounds
+        self._all_file_paths = all_file_paths
         if isinstance(data, np.ndarray):
-            self.values = data
             if self._bounds is None:
                 raise ValueError("Must specify bounds when data is an array.")
             self._crs = crs
-            self.transform = _get_transform_from_bounds(
-                self._bounds, shape=self.values.shape
-            )
+            self.transform = _get_transform_from_bounds(self._bounds, shape=data.shape)
             self._from_array = True
+            self.values = data
         elif not isinstance(data, (str | Path | os.PathLike)):
             raise TypeError(
@@ -600,24 +749,43 @@ class Band(_ImageBandBase):
                 f"Got {type(data)}"
             )
         else:
-            self._path = str(data)
+            self._path = _fix_path(str(data))
         self._res = res
         if cmap is not None:
             self.cmap = cmap
-        self.file_system = file_system
         self._name = name
         self._band_id = band_id
         self.processes = processes
-        # if self.filename_regexes:
-        #     if isinstance(self.filename_regexes, str):
-        #         self.filename_regexes = [self.filename_regexes]
-        #     self.filename_patterns = [
-        #         re.compile(pat, flags=re.VERBOSE) for pat in self.filename_regexes
-        #     ]
-        # else:
-        #     self.filename_patterns = None
+        if self._all_file_paths:
+            self._all_file_paths = {_fix_path(path) for path in self._all_file_paths}
+            parent = _fix_path(Path(self.path).parent)
+            self._all_file_paths = {
+                path for path in self._all_file_paths if parent in path
+            }
+        if self.metadata:
+            if self.path is not None:
+                self.metadata = {
+                    key: value
+                    for key, value in self.metadata.items()
+                    if key == self.path
+                }
+            this_metadata = self.metadata[self.path]
+            for key, value in this_metadata.items():
+                if key in dir(self):
+                    setattr(self, f"_{key}", value)
+                else:
+                    setattr(self, key, value)
+        elif self.metadata_attributes and self.path is not None and not self.is_mask:
+            if self._all_file_paths is None:
+                self._all_file_paths = _get_all_file_paths(str(Path(self.path).parent))
+            for key, value in self._get_metadata_attributes(
+                self.metadata_attributes
+            ).items():
+                setattr(self, key, value)
     def __lt__(self, other: "Band") -> bool:
         """Makes Bands sortable by band_id."""
@@ -632,23 +800,35 @@ class Band(_ImageBandBase):
     @values.setter
     def values(self, new_val):
-        if not isinstance(new_val, np.ndarray):
-            raise TypeError(
-                f"{self.__class__.__name__} 'values' must be np.ndarray. Got {type(new_val)}"
-            )
-        self._values = new_val
+        if self.backend == "numpy" and isinstance(new_val, np.ndarray):
+            self._values = new_val
+            return
+        elif self.backend == "xarray" and isinstance(new_val, DataArray):
+            # attrs can dissappear, so doing a union
+            attrs = self._values.attrs | new_val.attrs
+            self._values = new_val
+            self._values.attrs = attrs
+            return
+        if self.backend == "numpy":
+            self._values = self._to_numpy(new_val)
+        if self.backend == "xarray":
+            if not isinstance(self._values, DataArray):
+                self._values = self._to_xarray(
+                    new_val,
+                    transform=self.transform,
+                )
+            elif isinstance(new_val, np.ndarray):
+                self._values.values = new_val
+            else:
+                self._values = new_val
     @property
     def mask(self) -> "Band":
         """Mask Band."""
         return self._mask
-    @mask.setter
-    def mask(self, values: "Band") -> None:
-        if values is not None and not isinstance(values, Band):
-            raise TypeError(f"'mask' should be of type Band. Got {type(values)}")
-        self._mask = values
     @property
     def band_id(self) -> str:
         """Band id."""
@@ -686,26 +866,24 @@ class Band(_ImageBandBase):
         )
     @property
-    def crs(self) -> str | None:
+    def crs(self) -> pyproj.CRS | None:
         """Coordinate reference system."""
-        if self._crs is not None:
-            return self._crs
-        with opener(self.path, file_system=self.file_system) as file:
-            with rasterio.open(file) as src:
-                # self._bounds = to_bbox(src.bounds)
-                self._crs = src.crs
-        return self._crs
+        if self._crs is None:
+            self._add_crs_and_bounds()
+        return pyproj.CRS(self._crs)
     @property
     def bounds(self) -> tuple[int, int, int, int] | None:
         """Bounds as tuple (minx, miny, maxx, maxy)."""
-        if self._bounds is not None:
-            return self._bounds
-        with opener(self.path, file_system=self.file_system) as file:
+        if self._bounds is None:
+            self._add_crs_and_bounds()
+        return self._bounds
+    def _add_crs_and_bounds(self) -> None:
+        with opener(self.path) as file:
             with rasterio.open(file) as src:
                 self._bounds = to_bbox(src.bounds)
                 self._crs = src.crs
-        return self._bounds
     def get_n_largest(
         self, n: int, precision: float = 0.000001, column: str = "value"
@@ -729,59 +907,64 @@ class Band(_ImageBandBase):
         df[column] = f"smallest_{n}"
         return df
+    def clip(
+        self, mask: GeoDataFrame | GeoSeries | Polygon | MultiPolygon, **kwargs
+    ) -> "Band":
+        """Clip band values to geometry mask."""
+        values = _clip_xarray(
+            self.to_xarray(),
+            mask,
+            crs=self.crs,
+            **kwargs,
+        )
+        self._bounds = to_bbox(mask)
+        self.transform = _get_transform_from_bounds(self._bounds, values.shape)
+        self.values = values
+        return self
     def load(
         self,
         bounds: tuple | Geometry | GeoDataFrame | GeoSeries | None = None,
         indexes: int | tuple[int] | None = None,
         masked: bool | None = None,
+        file_system=None,
         **kwargs,
     ) -> "Band":
         """Load and potentially clip the array.
         The array is stored in the 'values' property.
         """
+        global _load_counter
+        _load_counter += 1
         if masked is None:
             masked = True if self.mask is None else False
         bounds_was_none = bounds is None
-        try:
-            if not isinstance(self.values, np.ndarray):
-                raise ValueError()
-            has_array = True
-        except ValueError:  # also catches ArrayNotLoadedError
-            has_array = False
-        # get common bounds of function argument 'bounds' and previously set bbox
-        if bounds is None and self._bbox is None:
-            bounds = None
-        elif bounds is not None and self._bbox is None:
-            bounds = to_shapely(bounds).intersection(self.union_all())
-        elif bounds is None and self._bbox is not None:
-            bounds = to_shapely(self._bbox).intersection(self.union_all())
-        else:
-            bounds = to_shapely(bounds).intersection(to_shapely(self._bbox))
+        bounds = _get_bounds(bounds, self._bbox, self.union_all())
         should_return_empty: bool = bounds is not None and bounds.area == 0
         if should_return_empty:
             self._values = np.array([])
             if self.mask is not None and not self.is_mask:
-                self._mask = self._mask.load()
-            # self._mask = np.ma.array([], [])
+                self._mask = self._mask.load(
+                    bounds=bounds, indexes=indexes, file_system=file_system
+                )
             self._bounds = None
             self.transform = None
+            self.values = self._values
             return self
-        if has_array and bounds_was_none:
+        if self.has_array and bounds_was_none:
             return self
-        # round down/up to integer to avoid precision trouble
         if bounds is not None:
-            #     bounds = to_bbox(bounds)
             minx, miny, maxx, maxy = to_bbox(bounds)
-            bounds = (int(minx), int(miny), math.ceil(maxx), math.ceil(maxy))
-        boundless = False
+            ## round down/up to integer to avoid precision trouble
+            # bounds = (int(minx), int(miny), math.ceil(maxx), math.ceil(maxy))
+            bounds = minx, miny, maxx, maxy
         if indexes is None:
             indexes = 1
@@ -792,114 +975,132 @@ class Band(_ImageBandBase):
         # allow setting a fixed out_shape for the array, in order to make mask same shape as values
         out_shape = kwargs.pop("out_shape", None)
-        if has_array:
-            self.values = _clip_loaded_array(
-                self.values, bounds, self.transform, self.crs, out_shape, **kwargs
+        if self.has_array and [int(x) for x in bounds] != [int(x) for x in self.bounds]:
+            print(self)
+            print(self.mask)
+            print(self.mask.values.shape)
+            print(self.values.shape)
+            print([int(x) for x in bounds], [int(x) for x in self.bounds])
+            raise ValueError(
+                "Cannot re-load array with different bounds. "
+                "Use .copy() to read with different bounds. "
+                "Or .clip(mask) to clip."
             )
-            self._bounds = bounds
-            self.transform = _get_transform_from_bounds(self._bounds, self.values.shape)
+        # with opener(self.path, file_system=self.file_system) as f:
+        with opener(self.path, file_system=file_system) as f:
+            with rasterio.open(f, nodata=self.nodata) as src:
+                self._res = int(src.res[0]) if not self.res else self.res
-        else:
-            with opener(self.path, file_system=self.file_system) as f:
-                with rasterio.open(f, nodata=self.nodata) as src:
-                    self._res = int(src.res[0]) if not self.res else self.res
-                    if self.nodata is None or np.isnan(self.nodata):
-                        self.nodata = src.nodata
-                    else:
-                        dtype_min_value = _get_dtype_min(src.dtypes[0])
-                        dtype_max_value = _get_dtype_max(src.dtypes[0])
-                        if (
-                            self.nodata > dtype_max_value
-                            or self.nodata < dtype_min_value
-                        ):
-                            src._dtypes = tuple(
-                                rasterio.dtypes.get_minimum_dtype(self.nodata)
-                                for _ in range(len(_indexes))
-                            )
-                    if bounds is None:
-                        if self._res != int(src.res[0]):
-                            if out_shape is None:
-                                out_shape = _get_shape_from_bounds(
-                                    to_bbox(src.bounds), self.res, indexes
-                                )
-                            self.transform = _get_transform_from_bounds(
-                                to_bbox(src.bounds), shape=out_shape
-                            )
-                        else:
-                            self.transform = src.transform
-                        self._values = src.read(
-                            indexes=indexes,
-                            out_shape=out_shape,
-                            masked=masked,
-                            **kwargs,
-                        )
-                    else:
-                        window = rasterio.windows.from_bounds(
-                            *bounds, transform=src.transform
+                if self.nodata is None or np.isnan(self.nodata):
+                    self.nodata = src.nodata
+                else:
+                    dtype_min_value = _get_dtype_min(src.dtypes[0])
+                    dtype_max_value = _get_dtype_max(src.dtypes[0])
+                    if self.nodata > dtype_max_value or self.nodata < dtype_min_value:
+                        src._dtypes = tuple(
+                            rasterio.dtypes.get_minimum_dtype(self.nodata)
+                            for _ in range(len(_indexes))
                         )
+                if bounds is None:
+                    if self._res != int(src.res[0]):
                         if out_shape is None:
                             out_shape = _get_shape_from_bounds(
-                                bounds, self.res, indexes
+                                to_bbox(src.bounds), self.res, indexes
                             )
-                        self._values = src.read(
-                            indexes=indexes,
-                            window=window,
-                            boundless=boundless,
-                            out_shape=out_shape,
-                            masked=masked,
-                            **kwargs,
+                        self.transform = _get_transform_from_bounds(
+                            to_bbox(src.bounds), shape=out_shape
                         )
+                    else:
+                        self.transform = src.transform
-                        assert out_shape == self._values.shape, (
-                            out_shape,
-                            self._values.shape,
-                        )
+                    values = src.read(
+                        indexes=indexes,
+                        out_shape=out_shape,
+                        masked=masked,
+                        **kwargs,
+                    )
+                else:
+                    window = rasterio.windows.from_bounds(
+                        *bounds, transform=src.transform
+                    )
+                    if out_shape is None:
+                        out_shape = _get_shape_from_bounds(bounds, self.res, indexes)
+                    values = src.read(
+                        indexes=indexes,
+                        window=window,
+                        boundless=False,
+                        out_shape=out_shape,
+                        masked=masked,
+                        **kwargs,
+                    )
+                    assert out_shape == values.shape, (
+                        out_shape,
+                        values.shape,
+                    )
+                    width, height = values.shape[-2:]
+                    if width and height:
                         self.transform = rasterio.transform.from_bounds(
-                            *bounds, self.width, self.height
+                            *bounds, width, height
                         )
-                        self._bounds = bounds
-                    if self.nodata is not None and not np.isnan(self.nodata):
-                        if isinstance(self.values, np.ma.core.MaskedArray):
-                            self.values.data[self.values.data == src.nodata] = (
-                                self.nodata
-                            )
-                        else:
-                            self.values[self.values == src.nodata] = self.nodata
+                if self.nodata is not None and not np.isnan(self.nodata):
+                    if isinstance(values, np.ma.core.MaskedArray):
+                        values.data[values.data == src.nodata] = self.nodata
+                    else:
+                        values[values == src.nodata] = self.nodata
         if self.masking and self.is_mask:
-            self.values = np.isin(self.values, self.masking["values"])
+            values = np.isin(values, list(self.masking["values"]))
-        elif self.mask is not None and not isinstance(
-            self.values, np.ma.core.MaskedArray
-        ):
-            self.mask = self.mask.copy().load(
-                bounds=bounds, indexes=indexes, out_shape=out_shape, **kwargs
-            )
+        elif self.mask is not None and not isinstance(values, np.ma.core.MaskedArray):
+            if not self.mask.has_array:
+                self._mask = self.mask.load(
+                    bounds=bounds, indexes=indexes, out_shape=out_shape, **kwargs
+                )
             mask_arr = self.mask.values
-            # if self.masking:
-            #     mask_arr = np.isin(mask_arr, self.masking["values"])
+            values = np.ma.array(values, mask=mask_arr, fill_value=self.nodata)
-            self._values = np.ma.array(
-                self._values, mask=mask_arr, fill_value=self.nodata
-            )
+        if bounds is not None:
+            self._bounds = to_bbox(bounds)
+        self._values = values
+        # trigger the setter
+        self.values = values
         return self
     @property
     def is_mask(self) -> bool:
         """True if the band_id is equal to the masking band_id."""
+        if self.masking is None:
+            return False
         return self.band_id == self.masking["band_id"]
+    @property
+    def has_array(self) -> bool:
+        """Whether the array is loaded."""
+        try:
+            if not isinstance(self.values, (np.ndarray | DataArray)):
+                raise ValueError()
+            return True
+        except ValueError:  # also catches ArrayNotLoadedError
+            return False
     def write(
-        self, path: str | Path, driver: str = "GTiff", compress: str = "LZW", **kwargs
+        self,
+        path: str | Path,
+        driver: str = "GTiff",
+        compress: str = "LZW",
+        file_system=None,
+        **kwargs,
     ) -> None:
         """Write the array as an image file."""
         if not hasattr(self, "_values"):
@@ -922,7 +1123,8 @@ class Band(_ImageBandBase):
             "width": self.width,
         } | kwargs
-        with opener(path, "wb", file_system=self.file_system) as f:
+        # with opener(path, "wb", file_system=self.file_system) as f:
+        with opener(path, "wb", file_system=file_system) as f:
             with rasterio.open(f, "w", **profile) as dst:
                 if dst.nodata is None:
@@ -944,17 +1146,14 @@ class Band(_ImageBandBase):
                 if isinstance(self.values, np.ma.core.MaskedArray):
                     dst.write_mask(self.values.mask)
-        self._path = str(path)
+        self._path = _fix_path(str(path))
     def apply(self, func: Callable, **kwargs) -> "Band":
-        """Apply a function to the array."""
-        self.values = func(self.values, **kwargs)
-        return self
-    def normalize(self) -> "Band":
-        """Normalize array values between 0 and 1."""
-        arr = self.values
-        self.values = (arr - np.min(arr)) / (np.max(arr) - np.min(arr))
+        """Apply a function to the Band."""
+        results = func(self, **kwargs)
+        if isinstance(results, Band):
+            return results
+        self.values = results
         return self
     def sample(self, size: int = 1000, mask: Any = None, **kwargs) -> "Image":
@@ -1112,23 +1311,43 @@ class Band(_ImageBandBase):
         )
     def to_xarray(self) -> DataArray:
-        """Convert the raster to  an xarray.DataArray."""
-        name = self.name or self.__class__.__name__.lower()
-        coords = _generate_spatial_coords(self.transform, self.width, self.height)
-        if len(self.values.shape) == 2:
-            dims = ["y", "x"]
-        elif len(self.values.shape) == 3:
-            dims = ["band", "y", "x"]
-        else:
-            raise ValueError("Array must be 2 or 3 dimensional.")
-        return xr.DataArray(
+        """Convert the raster to an xarray.DataArray."""
+        if self.backend == "xarray":
+            return self.values
+        return self._to_xarray(
             self.values,
-            coords=coords,
-            dims=dims,
-            name=name,
-            attrs={"crs": self.crs},
+            transform=self.transform,
+            # name=self.name or self.__class__.__name__.lower(),
         )
+    def to_numpy(self) -> np.ndarray | np.ma.core.MaskedArray:
+        """Convert the raster to a numpy.ndarray."""
+        return self._to_numpy(self.values).copy()
+    def _to_numpy(
+        self, arr: np.ndarray | DataArray, masked: bool = True
+    ) -> np.ndarray | np.ma.core.MaskedArray:
+        if not isinstance(arr, np.ndarray):
+            if masked:
+                try:
+                    mask_arr = arr.isnull().values
+                except AttributeError:
+                    mask_arr = np.full(arr.shape, False)
+            try:
+                arr = arr.to_numpy()
+            except AttributeError:
+                arr = arr.values
+        if not isinstance(arr, np.ndarray):
+            arr = np.array(arr)
+        if (
+            masked
+            and self.mask is not None
+            and not self.is_mask
+            and not isinstance(arr, np.ma.core.MaskedArray)
+        ):
+            arr = np.ma.array(arr, mask=mask_arr, fill_value=self.nodata)
+        return arr
     def __repr__(self) -> str:
         """String representation."""
         try:
@@ -1154,211 +1373,70 @@ class NDVIBand(Band):
     #     return get_cmap(arr)
-def get_cmap(arr: np.ndarray) -> LinearSegmentedColormap:
-    # blue = [[i / 10 + 0.1, i / 10 + 0.1, 1 - (i / 10) + 0.1] for i in range(11)][1:]
-    blue = [
-        [0.1, 0.1, 1.0],
-        [0.2, 0.2, 0.9],
-        [0.3, 0.3, 0.8],
-        [0.4, 0.4, 0.7],
-        [0.6, 0.6, 0.6],
-        [0.6, 0.6, 0.6],
-        [0.7, 0.7, 0.7],
-        [0.8, 0.8, 0.8],
-    ]
-    # gray = list(reversed([[i / 10 - 0.1, i / 10, i / 10 - 0.1] for i in range(11)][1:]))
-    gray = [
-        [0.6, 0.6, 0.6],
-        [0.6, 0.6, 0.6],
-        [0.6, 0.6, 0.6],
-        [0.6, 0.6, 0.6],
-        [0.6, 0.6, 0.6],
-        [0.4, 0.7, 0.4],
-        [0.3, 0.7, 0.3],
-        [0.2, 0.8, 0.2],
-    ]
-    # gray = [[0.6, 0.6, 0.6] for i in range(10)]
-    # green = [[0.2 + i/20, i / 10 - 0.1, + i/20] for i in range(11)][1:]
-    green = [
-        [0.25, 0.0, 0.05],
-        [0.3, 0.1, 0.1],
-        [0.35, 0.2, 0.15],
-        [0.4, 0.3, 0.2],
-        [0.45, 0.4, 0.25],
-        [0.5, 0.5, 0.3],
-        [0.55, 0.6, 0.35],
-        [0.7, 0.9, 0.5],
-    ]
-    green = [
-        [0.6, 0.6, 0.6],
-        [0.4, 0.7, 0.4],
-        [0.3, 0.8, 0.3],
-        [0.25, 0.4, 0.25],
-        [0.2, 0.5, 0.2],
-        [0.10, 0.7, 0.10],
-        [0, 0.9, 0],
-    ]
-    def get_start(arr):
-        min_value = np.min(arr)
-        if min_value < -0.75:
-            return 0
-        if min_value < -0.5:
-            return 1
-        if min_value < -0.25:
-            return 2
-        if min_value < 0:
-            return 3
-        if min_value < 0.25:
-            return 4
-        if min_value < 0.5:
-            return 5
-        if min_value < 0.75:
-            return 6
-        return 7
-    def get_stop(arr):
-        max_value = np.max(arr)
-        if max_value <= 0.05:
-            return 0
-        if max_value < 0.175:
-            return 1
-        if max_value < 0.25:
-            return 2
-        if max_value < 0.375:
-            return 3
-        if max_value < 0.5:
-            return 4
-        if max_value < 0.75:
-            return 5
-        return 6
-    cmap_name = "blue_gray_green"
-    start = get_start(arr)
-    stop = get_stop(arr)
-    blue = blue[start]
-    gray = gray[start]
-    # green = green[start]
-    green = green[stop]
-    # green[0] = np.arange(0, 1, 0.1)[::-1][stop]
-    # green[1] = np.arange(0, 1, 0.1)[stop]
-    # green[2] = np.arange(0, 1, 0.1)[::-1][stop]
-    print(green)
-    print(start, stop)
-    print("blue gray green")
-    print(blue)
-    print(gray)
-    print(green)
-    # Define the segments of the colormap
-    cdict = {
-        "red": [
-            (0.0, blue[0], blue[0]),
-            (0.3, gray[0], gray[0]),
-            (0.7, gray[0], gray[0]),
-            (1.0, green[0], green[0]),
-        ],
-        "green": [
-            (0.0, blue[1], blue[1]),
-            (0.3, gray[1], gray[1]),
-            (0.7, gray[1], gray[1]),
-            (1.0, green[1], green[1]),
-        ],
-        "blue": [
-            (0.0, blue[2], blue[2]),
-            (0.3, gray[2], gray[2]),
-            (0.7, gray[2], gray[2]),
-            (1.0, green[2], green[2]),
-        ],
-    }
-    return LinearSegmentedColormap(cmap_name, segmentdata=cdict, N=50)
-def median_as_int_and_minimum_dtype(arr: np.ndarray) -> np.ndarray:
-    arr = np.median(arr, axis=0).astype(int)
-    min_dtype = rasterio.dtypes.get_minimum_dtype(arr)
-    return arr.astype(min_dtype)
+def median_as_int_and_minimum_dtype(arr: np.ndarray) -> np.ndarray:
+    arr = np.median(arr, axis=0).astype(int)
+    min_dtype = rasterio.dtypes.get_minimum_dtype(arr)
+    return arr.astype(min_dtype)
 class Image(_ImageBandBase):
     """Image consisting of one or more Bands."""
-    cloud_cover_regexes: ClassVar[tuple[str] | None] = None
     band_class: ClassVar[Band] = Band
+    backend: str = "numpy"
     def __init__(
         self,
-        data: str | Path | Sequence[Band],
+        data: str | Path | Sequence[Band] | None = None,
         res: int | None = None,
-        crs: Any | None = None,
-        single_banded: bool = False,
-        file_system: GCSFileSystem | None = None,
-        df: pd.DataFrame | None = None,
-        all_file_paths: list[str] | None = None,
         processes: int = 1,
-        bbox: GeoDataFrame | GeoSeries | Geometry | tuple | None = None,
+        df: pd.DataFrame | None = None,
         nodata: int | None = None,
+        all_file_paths: list[str] | None = None,
         **kwargs,
     ) -> None:
         """Image initialiser."""
+        if data is None:
+            # allowing 'bands' to replace 'data' as argument
+            # to make the print repr. valid as initialiser
+            if "bands" not in kwargs:
+                raise TypeError("Must specify either 'data' or 'bands'.")
+            data = kwargs.pop("bands")
         super().__init__(**kwargs)
         self.nodata = nodata
-        self._res = res
-        self._crs = crs
-        self.file_system = file_system
-        self._bbox = to_bbox(bbox) if bbox is not None else None
-        # self._mask = _mask
-        self.single_banded = single_banded
         self.processes = processes
-        self._all_file_paths = all_file_paths
+        self._crs = None
+        self._bands = None
         if hasattr(data, "__iter__") and all(isinstance(x, Band) for x in data):
-            self._bands = list(data)
-            if res is None:
-                res = list({band.res for band in self._bands})
-                if len(res) == 1:
-                    self._res = res[0]
-                else:
-                    raise ValueError(f"Different resolutions for the bands: {res}")
-            else:
-                self._res = res
+            self._construct_image_from_bands(data, res)
             return
-        if not isinstance(data, (str | Path | os.PathLike)):
+        elif not isinstance(data, (str | Path | os.PathLike)):
             raise TypeError("'data' must be string, Path-like or a sequence of Band.")
-        self._bands = None
-        self._path = str(data)
+        self._res = res
+        self._path = _fix_path(data)
+        if all_file_paths is None and self.path:
+            self._all_file_paths = _get_all_file_paths(self.path)
+        elif self.path:
+            all_file_paths = {_fix_path(x) for x in all_file_paths}
+            self._all_file_paths = {x for x in all_file_paths if self.path in x}
+        else:
+            self._all_file_paths = None
         if df is None:
-            if is_dapla():
-                file_paths = list(sorted(set(_glob_func(self.path + "/**"))))
-            else:
-                file_paths = list(
-                    sorted(
-                        set(
-                            _glob_func(self.path + "/**/**")
-                            + _glob_func(self.path + "/**/**/**")
-                            + _glob_func(self.path + "/**/**/**/**")
-                            + _glob_func(self.path + "/**/**/**/**/**")
-                        )
-                    )
-                )
-            if not file_paths:
-                file_paths = [self.path]
-            df = self._create_metadata_df(file_paths)
+            if not self._all_file_paths:
+                self._all_file_paths = [self.path]
+            df = self._create_metadata_df(self._all_file_paths)
         df["image_path"] = df["image_path"].astype(str)
         cols_to_explode = [
             "file_path",
-            "filename",
+            "file_name",
             *[x for x in df if FILENAME_COL_SUFFIX in x],
         ]
         try:
@@ -1366,44 +1444,82 @@ class Image(_ImageBandBase):
         except ValueError:
             for col in cols_to_explode:
                 df = df.explode(col)
-            df = df.loc[lambda x: ~x["filename"].duplicated()].reset_index(drop=True)
+            df = df.loc[lambda x: ~x["file_name"].duplicated()].reset_index(drop=True)
+        df = df.loc[lambda x: x["image_path"] == self.path]
+        self._df = df
+        if self.path is not None and self.metadata:
+            self.metadata = {
+                key: value for key, value in self.metadata.items() if self.path in key
+            }
+        if self.metadata:
+            try:
+                metadata = self.metadata[self.path]
+            except KeyError:
+                metadata = {}
+            for key, value in metadata.items():
+                if key in dir(self):
+                    setattr(self, f"_{key}", value)
+                else:
+                    setattr(self, key, value)
-        df = df.loc[lambda x: x["image_path"].str.contains(_fix_path(self.path))]
+        else:
+            for key, value in self._get_metadata_attributes(
+                self.metadata_attributes
+            ).items():
+                setattr(self, key, value)
-        if self.cloud_cover_regexes:
-            if all_file_paths is None:
-                file_paths = _ls_func(self.path)
+    def _construct_image_from_bands(
+        self, data: Sequence[Band], res: int | None
+    ) -> None:
+        self._bands = list(data)
+        if res is None:
+            res = list({band.res for band in self.bands})
+            if len(res) == 1:
+                self._res = res[0]
             else:
-                file_paths = [path for path in all_file_paths if self.name in path]
-            self.cloud_coverage_percentage = float(
-                _get_regex_match_from_xml_in_local_dir(
-                    file_paths, regexes=self.cloud_cover_regexes
-                )
-            )
+                raise ValueError(f"Different resolutions for the bands: {res}")
         else:
-            self.cloud_coverage_percentage = None
+            self._res = res
+        for key in self.metadata_attributes:
+            band_values = {getattr(band, key) for band in self if hasattr(band, key)}
+            band_values = {x for x in band_values if x is not None}
+            if len(band_values) > 1:
+                raise ValueError(f"Different {key} values in bands: {band_values}")
+            elif len(band_values):
+                try:
+                    setattr(self, key, next(iter(band_values)))
+                except AttributeError:
+                    setattr(self, f"_{key}", next(iter(band_values)))
-        self._df = df
+    def copy(self) -> "Image":
+        """Copy the instance and its attributes."""
+        copied = super().copy()
+        for band in copied:
+            band._mask = copied._mask
+        return copied
-    @property
-    def values(self) -> np.ndarray:
-        """3 dimensional numpy array."""
-        return np.array([band.values for band in self])
+    def apply(self, func: Callable, **kwargs) -> "Image":
+        """Apply a function to each band of the Image."""
+        with joblib.Parallel(n_jobs=self.processes, backend="loky") as parallel:
+            parallel(joblib.delayed(_band_apply)(band, func, **kwargs) for band in self)
+        return self
-    def ndvi(self, red_band: str, nir_band: str, copy: bool = True) -> NDVIBand:
+    def ndvi(
+        self, red_band: str, nir_band: str, padding: int = 0, copy: bool = True
+    ) -> NDVIBand:
         """Calculate the NDVI for the Image."""
         copied = self.copy() if copy else self
         red = copied[red_band].load()
         nir = copied[nir_band].load()
-        arr: np.ndarray | np.ma.core.MaskedArray = ndvi(red.values, nir.values)
-        # if self.nodata is not None and not np.isnan(self.nodata):
-        #     try:
-        #         arr.data[arr.mask] = self.nodata
-        #         arr = arr.copy()
-        #     except AttributeError:
-        #         pass
+        arr: np.ndarray | np.ma.core.MaskedArray = ndvi(
+            red.values, nir.values, padding=padding
+        )
         return NDVIBand(
             arr,
@@ -1445,37 +1561,63 @@ class Image(_ImageBandBase):
             **self._common_init_kwargs,
         )
+    def to_xarray(self) -> DataArray:
+        """Convert the raster to  an xarray.DataArray."""
+        if self.backend == "xarray":
+            return self.values
+        return self._to_xarray(
+            np.array([band.values for band in self]),
+            transform=self[0].transform,
+        )
     @property
     def mask(self) -> Band | None:
         """Mask Band."""
-        if self._mask is not None:
-            return self._mask
         if self.masking is None:
             return None
+        elif self._mask is not None:
+            return self._mask
+        elif self._bands is not None and all(band.mask is not None for band in self):
+            if len({id(band.mask) for band in self}) > 1:
+                raise ValueError(
+                    "Image bands must have same mask.",
+                    {id(band.mask) for band in self},
+                )  # TODO
+            self._mask = next(
+                iter([band.mask for band in self if band.mask is not None])
+            )
+            return self._mask
         mask_band_id = self.masking["band_id"]
-        mask_paths = [path for path in self._df["file_path"] if mask_band_id in path]
+        mask_paths = [path for path in self._all_file_paths if mask_band_id in path]
         if len(mask_paths) > 1:
             raise ValueError(
                 f"Multiple file_paths match mask band_id {mask_band_id} for {self.path}"
             )
         elif not mask_paths:
             raise ValueError(
-                f"No file_paths match mask band_id {mask_band_id} for {self.path}"
+                f"No file_paths match mask band_id {mask_band_id} for {self.path} among "
+                + str([Path(x).name for x in _ls_func(self.path)])
             )
         self._mask = self.band_class(
             mask_paths[0],
             **self._common_init_kwargs,
         )
+        if self._bands is not None:
+            for band in self:
+                band._mask = self._mask
         return self._mask
     @mask.setter
-    def mask(self, values: Band) -> None:
+    def mask(self, values: Band | None) -> None:
         if values is None:
             self._mask = None
             for band in self:
-                band.mask = None
+                band._mask = None
             return
         if not isinstance(values, Band):
             raise TypeError(f"mask must be Band. Got {type(values)}")
@@ -1485,7 +1627,7 @@ class Image(_ImageBandBase):
             band._mask = self._mask
             try:
                 band.values = np.ma.array(
-                    band.values, mask=mask_arr, fill_value=band.nodata
+                    band.values.data, mask=mask_arr, fill_value=band.nodata
                 )
             except ArrayNotLoadedError:
                 pass
@@ -1506,45 +1648,24 @@ class Image(_ImageBandBase):
         if self._bands is not None:
             return self._bands
-        # if self.masking:
-        #     mask_band_id = self.masking["band_id"]
-        #     mask_paths = [
-        #         path for path in self._df["file_path"] if mask_band_id in path
-        #     ]
-        #     if len(mask_paths) > 1:
-        #         raise ValueError(
-        #             f"Multiple file_paths match mask band_id {mask_band_id}"
-        #         )
-        #     elif not mask_paths:
-        #         raise ValueError(f"No file_paths match mask band_id {mask_band_id}")
-        #     arr = (
-        #         self.band_class(
-        #             mask_paths[0],
-        #             # mask=self.mask,
-        #             **self._common_init_kwargs,
-        #         )
-        #         .load()
-        #         .values
-        #     )
-        #     self._mask = np.ma.array(
-        #         arr, mask=np.isin(arr, self.masking["values"]), fill_value=None
-        #     )
+        if self.masking:
+            mask_band_id = self.masking["band_id"]
+            paths = [path for path in self._df["file_path"] if mask_band_id not in path]
+        else:
+            paths = self._df["file_path"]
+        mask = self.mask
         self._bands = [
             self.band_class(
                 path,
-                mask=self.mask,
+                mask=mask,
+                all_file_paths=self._all_file_paths,
                 **self._common_init_kwargs,
             )
-            for path in (self._df["file_path"])
+            for path in paths
         ]
-        if self.masking:
-            mask_band_id = self.masking["band_id"]
-            self._bands = [
-                band for band in self._bands if mask_band_id not in band.path
-            ]
         if (
             self.filename_patterns
             and any(_get_non_optional_groups(pat) for pat in self.filename_patterns)
@@ -1557,11 +1678,7 @@ class Image(_ImageBandBase):
             self._bands = [
                 band
                 for band in self._bands
-                if any(
-                    # _get_first_group_match(pat, band.name)
-                    re.search(pat, band.name)
-                    for pat in self.filename_patterns
-                )
+                if any(re.search(pat, band.name) for pat in self.filename_patterns)
             ]
         if self.image_patterns:
@@ -1570,7 +1687,6 @@ class Image(_ImageBandBase):
                 for band in self._bands
                 if any(
                     re.search(pat, Path(band.path).parent.name)
-                    # _get_first_group_match(pat, Path(band.path).parent.name)
                     for pat in self.image_patterns
                 )
             ]
@@ -1583,10 +1699,14 @@ class Image(_ImageBandBase):
     @property
     def _should_be_sorted(self) -> bool:
         sort_groups = ["band", "band_id"]
-        return self.filename_patterns and any(
-            group in _get_non_optional_groups(pat)
-            for group in sort_groups
-            for pat in self.filename_patterns
+        return (
+            self.filename_patterns
+            and any(
+                group in _get_non_optional_groups(pat)
+                for group in sort_groups
+                for pat in self.filename_patterns
+            )
+            or all(band.band_id is not None for band in self)
         )
     @property
@@ -1621,7 +1741,14 @@ class Image(_ImageBandBase):
     @property
     def bounds(self) -> tuple[int, int, int, int] | None:
         """Bounds of the Image (minx, miny, maxx, maxy)."""
-        return get_total_bounds([band.bounds for band in self])
+        try:
+            return get_total_bounds([band.bounds for band in self])
+        except exceptions.RefreshError:
+            bounds = []
+            for band in self:
+                time.sleep(0.1)
+                bounds.append(band.bounds)
+            return get_total_bounds(bounds)
     def to_gdf(self, column: str = "value") -> GeoDataFrame:
         """Convert the array to a GeoDataFrame of grid polygons and values."""
@@ -1647,7 +1774,7 @@ class Image(_ImageBandBase):
     def __getitem__(
         self, band: str | int | Sequence[str] | Sequence[int]
     ) -> "Band | Image":
-        """Get bands by band_id or integer index.
+        """Get bands by band_id or integer index or a sequence of such.
         Returns a Band if a string or int is passed,
         returns an Image if a sequence of strings or integers is passed.
@@ -1655,7 +1782,7 @@ class Image(_ImageBandBase):
         if isinstance(band, str):
             return self._get_band(band)
         if isinstance(band, int):
-            return self.bands[band]  # .copy()
+            return self.bands[band]
         copied = self.copy()
         try:
@@ -1681,10 +1808,7 @@ class Image(_ImageBandBase):
         try:
             return self.date < other.date
         except Exception as e:
-            print(self.path)
-            print(self.date)
-            print(other.path)
-            print(other.date)
+            print("", self.path, self.date, other.path, other.date, sep="\n")
             raise e
     def __iter__(self) -> Iterator[Band]:
@@ -1743,103 +1867,73 @@ class ImageCollection(_ImageBase):
     image_class: ClassVar[Image] = Image
     band_class: ClassVar[Band] = Band
+    _metadata_attribute_collection_type: ClassVar[type] = pd.Series
+    backend: str = "numpy"
     def __init__(
         self,
-        data: str | Path | Sequence[Image],
+        data: str | Path | Sequence[Image] | Sequence[str | Path],
         res: int,
-        level: str | None,
-        crs: Any | None = None,
-        single_banded: bool = False,
+        level: str | None = None_,
         processes: int = 1,
-        file_system: GCSFileSystem | None = None,
-        df: pd.DataFrame | None = None,
-        bbox: Any | None = None,
-        nodata: int | None = None,
         metadata: str | dict | pd.DataFrame | None = None,
+        nodata: int | None = None,
         **kwargs,
     ) -> None:
         """Initialiser."""
-        super().__init__(**kwargs)
+        if data is not None and kwargs.get("root"):
+            root = _fix_path(kwargs.pop("root"))
+            data = [f"{root}/{name}" for name in data]
+            _from_root = True
+        else:
+            _from_root = False
+        super().__init__(metadata=metadata, **kwargs)
+        if callable(level) and isinstance(level(), None_):
+            level = None
         self.nodata = nodata
         self.level = level
-        self._crs = crs
         self.processes = processes
-        self.file_system = file_system
         self._res = res
-        self._bbox = to_bbox(bbox) if bbox is not None else None
-        self._band_ids = None
-        self.single_banded = single_banded
+        self._crs = None
-        if metadata is not None:
-            if isinstance(metadata, (str | Path | os.PathLike)):
-                self.metadata = _read_parquet_func(metadata)
-            else:
-                self.metadata = metadata
-        else:
-            self.metadata = metadata
+        self._df = None
+        self._all_file_paths = None
+        self._images = None
-        if hasattr(data, "__iter__") and all(isinstance(x, Image) for x in data):
+        if hasattr(data, "__iter__") and not isinstance(data, str):
             self._path = None
-            self.images = [x.copy() for x in data]
-            return
-        else:
-            self._images = None
+            if all(isinstance(x, Image) for x in data):
+                self.images = [x.copy() for x in data]
+                return
+            elif all(isinstance(x, (str | Path | os.PathLike)) for x in data):
+                # adding band paths (asuming 'data' is a sequence of image paths)
+                try:
+                    self._all_file_paths = _get_child_paths_threaded(data) | set(data)
+                except FileNotFoundError as e:
+                    if _from_root:
+                        raise TypeError(
+                            "When passing 'root', 'data' must be a sequence of image names that have 'root' as parent path."
+                        ) from e
+                    raise e
+                self._df = self._create_metadata_df(self._all_file_paths)
+                return
         if not isinstance(data, (str | Path | os.PathLike)):
             raise TypeError("'data' must be string, Path-like or a sequence of Image.")
-        self._path = str(data)
+        self._path = _fix_path(str(data))
-        if is_dapla():
-            self._all_file_paths = list(sorted(set(_glob_func(self.path + "/**"))))
-        else:
-            self._all_file_paths = list(
-                sorted(
-                    set(
-                        _glob_func(self.path + "/**/**")
-                        + _glob_func(self.path + "/**/**/**")
-                        + _glob_func(self.path + "/**/**/**/**")
-                        + _glob_func(self.path + "/**/**/**/**/**")
-                    )
-                )
-            )
+        self._all_file_paths = _get_all_file_paths(self.path)
         if self.level:
             self._all_file_paths = [
                 path for path in self._all_file_paths if self.level in path
             ]
-        if df is not None:
-            self._df = df
-        else:
-            self._df = self._create_metadata_df(self._all_file_paths)
-    @property
-    def values(self) -> np.ndarray:
-        """4 dimensional numpy array."""
-        return np.array([img.values for img in self])
-    @property
-    def mask(self) -> np.ndarray:
-        """4 dimensional numpy array."""
-        return np.array([img.mask.values for img in self])
-    # def ndvi(
-    #     self, red_band: str, nir_band: str, copy: bool = True
-    # ) -> "ImageCollection":
-    #     # copied = self.copy() if copy else self
-    #     with joblib.Parallel(n_jobs=self.processes, backend="loky") as parallel:
-    #         ndvi_images = parallel(
-    #             joblib.delayed(_img_ndvi)(
-    #                 img, red_band=red_band, nir_band=nir_band, copy=False
-    #             )
-    #             for img in self
-    #         )
-    #     return ImageCollection(ndvi_images, single_banded=True)
+        self._df = self._create_metadata_df(self._all_file_paths)
     def groupby(self, by: str | list[str], **kwargs) -> ImageCollectionGroupBy:
         """Group the Collection by Image or Band attribute(s)."""
@@ -1882,7 +1976,6 @@ class ImageCollection(_ImageBase):
         copied.images = [
             self.image_class(
                 [band],
-                single_banded=True,
                 masking=self.masking,
                 band_class=self.band_class,
                 **self._common_init_kwargs,
@@ -1892,6 +1985,64 @@ class ImageCollection(_ImageBase):
             for img in self
             for band in img
         ]
+        for img in copied:
+            assert len(img) == 1
+            try:
+                img._path = _fix_path(img[0].path)
+            except PathlessImageError:
+                pass
+        return copied
+    def apply(self, func: Callable, **kwargs) -> "ImageCollection":
+        """Apply a function to all bands in each image of the collection."""
+        with joblib.Parallel(n_jobs=self.processes, backend="loky") as parallel:
+            parallel(
+                joblib.delayed(_band_apply)(band, func, **kwargs)
+                for img in self
+                for band in img
+            )
+        return self
+    def get_unique_band_ids(self) -> list[str]:
+        """Get a list of unique band_ids across all images."""
+        return list({band.band_id for img in self for band in img})
+    def filter(
+        self,
+        bands: str | list[str] | None = None,
+        date_ranges: DATE_RANGES_TYPE = None,
+        bbox: GeoDataFrame | GeoSeries | Geometry | tuple[float] | None = None,
+        intersects: GeoDataFrame | GeoSeries | Geometry | tuple[float] | None = None,
+        max_cloud_cover: int | None = None,
+        copy: bool = True,
+    ) -> "ImageCollection":
+        """Filter images and bands in the collection."""
+        copied = self.copy() if copy else self
+        if date_ranges:
+            copied = copied._filter_dates(date_ranges)
+        if max_cloud_cover is not None:
+            copied.images = [
+                image
+                for image in copied.images
+                if image.cloud_cover_percentage < max_cloud_cover
+            ]
+        if bbox is not None:
+            copied = copied._filter_bounds(bbox)
+            copied._set_bbox(bbox)
+        if intersects is not None:
+            copied = copied._filter_bounds(intersects)
+        if bands is not None:
+            if isinstance(bands, str):
+                bands = [bands]
+            bands = set(bands)
+            copied.images = [img[bands] for img in copied.images if bands in img]
         return copied
     def merge(
@@ -1903,8 +2054,11 @@ class ImageCollection(_ImageBase):
         **kwargs,
     ) -> Band:
         """Merge all areas and all bands to a single Band."""
-        bounds = to_bbox(bounds) if bounds is not None else self._bbox
-        crs = self.crs
+        bounds = _get_bounds(bounds, self._bbox, self.union_all())
+        if bounds is not None:
+            bounds = to_bbox(bounds)
+        crs = self.crs
         if indexes is None:
             indexes = 1
@@ -1938,14 +2092,14 @@ class ImageCollection(_ImageBase):
                 **kwargs,
             )
-        if isinstance(indexes, int) and len(arr.shape) == 3 and arr.shape[0] == 1:
-            arr = arr[0]
+            if isinstance(indexes, int) and len(arr.shape) == 3 and arr.shape[0] == 1:
+                arr = arr[0]
-        if method == "mean":
-            if as_int:
-                arr = arr // len(datasets)
-            else:
-                arr = arr / len(datasets)
+            if method == "mean":
+                if as_int:
+                    arr = arr // len(datasets)
+                else:
+                    arr = arr / len(datasets)
         if bounds is None:
             bounds = self.bounds
@@ -1971,7 +2125,9 @@ class ImageCollection(_ImageBase):
         **kwargs,
     ) -> Image:
         """Merge all areas to a single tile, one band per band_id."""
-        bounds = to_bbox(bounds) if bounds is not None else self._bbox
+        bounds = _get_bounds(bounds, self._bbox, self.union_all())
+        if bounds is not None:
+            bounds = to_bbox(bounds)
         bounds = self.bounds if bounds is None else bounds
         out_bounds = bounds
         crs = self.crs
@@ -2031,7 +2187,7 @@ class ImageCollection(_ImageBase):
                 )
             )
-        # return self.image_class(
+        # return self.image_class( # TODO
         image = Image(
             bands,
             band_class=self.band_class,
@@ -2066,10 +2222,13 @@ class ImageCollection(_ImageBase):
             arr = np.array(
                 [
                     (
-                        band.load(
-                            bounds=(_bounds if _bounds is not None else None),
-                            **kwargs,
-                        )
+                        # band.load(
+                        #     bounds=(_bounds if _bounds is not None else None),
+                        #     **kwargs,
+                        # )
+                        # if not band.has_array
+                        # else
+                        band
                     ).values
                     for img in collection
                     for band in img
@@ -2092,7 +2251,7 @@ class ImageCollection(_ImageBase):
             coords = _generate_spatial_coords(transform, width, height)
             arrs.append(
-                xr.DataArray(
+                DataArray(
                     arr,
                     coords=coords,
                     dims=["y", "x"],
@@ -2109,7 +2268,7 @@ class ImageCollection(_ImageBase):
         return merged.to_numpy()
     def sort_images(self, ascending: bool = True) -> "ImageCollection":
-        """Sort Images by date."""
+        """Sort Images by date, then file path if date attribute is missing."""
         self._images = (
             list(sorted([img for img in self if img.date is not None]))
             + sorted(
@@ -2126,20 +2285,56 @@ class ImageCollection(_ImageBase):
         self,
         bounds: tuple | Geometry | GeoDataFrame | GeoSeries | None = None,
         indexes: int | tuple[int] | None = None,
+        file_system=None,
         **kwargs,
     ) -> "ImageCollection":
         """Load all image Bands with threading."""
+        if (
+            bounds is None
+            and indexes is None
+            and all(band.has_array for img in self for band in img)
+        ):
+            return self
+        # if self.processes == 1:
+        #     for img in self:
+        #         for band in img:
+        #             band.load(
+        #                 bounds=bounds,
+        #                 indexes=indexes,
+        #                 file_system=file_system,
+        #                 **kwargs,
+        #             )
+        #     return self
         with joblib.Parallel(n_jobs=self.processes, backend="threading") as parallel:
             if self.masking:
                 parallel(
                     joblib.delayed(_load_band)(
-                        img.mask, bounds=bounds, indexes=indexes, **kwargs
+                        img.mask,
+                        bounds=bounds,
+                        indexes=indexes,
+                        file_system=file_system,
+                        **kwargs,
                     )
                     for img in self
                 )
+                for img in self:
+                    for band in img:
+                        band._mask = img.mask
+                # print({img.mask.has_array for img in self })
+                # print({band.mask.has_array for img in self for band in img})
+            # with joblib.Parallel(n_jobs=self.processes, backend="threading") as parallel:
             parallel(
                 joblib.delayed(_load_band)(
-                    band, bounds=bounds, indexes=indexes, **kwargs
+                    band,
+                    bounds=bounds,
+                    indexes=indexes,
+                    file_system=file_system,
+                    **kwargs,
                 )
                 for img in self
                 for band in img
@@ -2147,7 +2342,28 @@ class ImageCollection(_ImageBase):
         return self
-    def set_bbox(
+    def clip(
+        self,
+        mask: Geometry | GeoDataFrame | GeoSeries,
+        **kwargs,
+    ) -> "ImageCollection":
+        """Clip all image Bands with 'loky'."""
+        if self.processes == 1:
+            for img in self:
+                for band in img:
+                    band.clip(mask, **kwargs)
+            return self
+        with joblib.Parallel(n_jobs=self.processes, backend="loky") as parallel:
+            parallel(
+                joblib.delayed(_clip_band)(band, mask, **kwargs)
+                for img in self
+                for band in img
+            )
+        return self
+    def _set_bbox(
         self, bbox: GeoDataFrame | GeoSeries | Geometry | tuple[float]
     ) -> "ImageCollection":
         """Set the mask to be used to clip the images to."""
@@ -2156,86 +2372,23 @@ class ImageCollection(_ImageBase):
         if self._images is not None:
             for img in self._images:
                 img._bbox = self._bbox
-                if img._bands is not None:
-                    for band in img:
-                        band._bbox = self._bbox
-                        bounds = box(*band._bbox).intersection(box(*band.bounds))
-                        band._bounds = to_bbox(bounds) if not bounds.is_empty else None
-        return self
+                if img.mask is not None:
+                    img.mask._bbox = self._bbox
+                if img.bands is None:
+                    continue
+                for band in img:
+                    band._bbox = self._bbox
+                    bounds = box(*band._bbox).intersection(box(*band.bounds))
+                    band._bounds = to_bbox(bounds) if not bounds.is_empty else None
+                    if band.mask is not None:
+                        band.mask._bbox = self._bbox
+                        band.mask._bounds = band._bounds
-    def apply(self, func: Callable, **kwargs) -> "ImageCollection":
-        """Apply a function to all bands in each image of the collection."""
-        for img in self:
-            img.bands = [func(band, **kwargs) for band in img]
         return self
-    def filter(
-        self,
-        bands: str | list[str] | None = None,
-        exclude_bands: str | list[str] | None = None,
-        date_ranges: (
-            tuple[str | None, str | None]
-            | tuple[tuple[str | None, str | None], ...]
-            | None
-        ) = None,
-        bbox: GeoDataFrame | GeoSeries | Geometry | tuple[float] | None = None,
-        intersects: GeoDataFrame | GeoSeries | Geometry | tuple[float] | None = None,
-        max_cloud_coverage: int | None = None,
-        copy: bool = True,
-    ) -> "ImageCollection":
-        """Filter images and bands in the collection."""
-        copied = self.copy() if copy else self
-        if isinstance(bbox, BoundingBox):
-            date_ranges = (bbox.mint, bbox.maxt)
-        if date_ranges:
-            copied = copied._filter_dates(date_ranges)
-        if max_cloud_coverage is not None:
-            copied.images = [
-                image
-                for image in copied.images
-                if image.cloud_coverage_percentage < max_cloud_coverage
-            ]
-        if bbox is not None:
-            copied = copied._filter_bounds(bbox)
-            copied.set_bbox(bbox)
-        if intersects is not None:
-            copied = copied._filter_bounds(intersects)
-        if bands is not None:
-            if isinstance(bands, str):
-                bands = [bands]
-            bands = set(bands)
-            copied._band_ids = bands
-            copied.images = [img[bands] for img in copied.images if bands in img]
-        if exclude_bands is not None:
-            if isinstance(exclude_bands, str):
-                exclude_bands = {exclude_bands}
-            else:
-                exclude_bands = set(exclude_bands)
-            include_bands: list[list[str]] = [
-                [band_id for band_id in img.band_ids if band_id not in exclude_bands]
-                for img in copied
-            ]
-            copied.images = [
-                img[bands]
-                for img, bands in zip(copied.images, include_bands, strict=False)
-                if bands
-            ]
-        return copied
     def _filter_dates(
         self,
-        date_ranges: (
-            tuple[str | None, str | None] | tuple[tuple[str | None, str | None], ...]
-        ),
+        date_ranges: DATE_RANGES_TYPE = None,
     ) -> "ImageCollection":
         if not isinstance(date_ranges, (tuple, list)):
             raise TypeError(
@@ -2247,13 +2400,7 @@ class ImageCollection(_ImageBase):
                 "Cannot set date_ranges when the class's image_regexes attribute is None"
             )
-        self.images = [
-            img
-            for img in self
-            if _date_is_within(
-                img.path, date_ranges, self.image_patterns, self.date_format
-            )
-        ]
+        self.images = [img for img in self if _date_is_within(img.date, date_ranges)]
         return self
     def _filter_bounds(
@@ -2264,11 +2411,15 @@ class ImageCollection(_ImageBase):
         other = to_shapely(other)
-        # intersects_list = GeoSeries([img.union_all() for img in self]).intersects(other)
-        with joblib.Parallel(n_jobs=self.processes, backend="loky") as parallel:
-            intersects_list: list[bool] = parallel(
-                joblib.delayed(_intesects)(image, other) for image in self
-            )
+        if self.processes == 1:
+            intersects_list: pd.Series = GeoSeries(
+                [img.union_all() for img in self]
+            ).intersects(other)
+        else:
+            with joblib.Parallel(n_jobs=self.processes, backend="loky") as parallel:
+                intersects_list: list[bool] = parallel(
+                    joblib.delayed(_intesects)(image, other) for image in self
+                )
         self.images = [
             image
@@ -2277,6 +2428,69 @@ class ImageCollection(_ImageBase):
         ]
         return self
+    def to_xarray(
+        self,
+        **kwargs,
+    ) -> Dataset:
+        """Convert the raster to  an xarray.Dataset.
+        Images are converted to 2d arrays for each unique bounds.
+        The spatial dimensions will be labeled "x" and "y". The third
+        dimension defaults to "date" if all images have date attributes.
+        Otherwise defaults to the image name.
+        """
+        if any(not band.has_array for img in self for band in img):
+            raise ValueError("Arrays must be loaded.")
+        # if by is None:
+        if all(img.date for img in self):
+            by = ["date"]
+        elif not pd.Index([img.name for img in self]).is_unique:
+            raise ValueError("Images must have unique names.")
+        else:
+            by = ["name"]
+        # elif isinstance(by, str):
+        # by = [by]
+        xarrs: dict[str, DataArray] = {}
+        for (bounds, band_id), collection in self.groupby(["bounds", "band_id"]):
+            name = f"{band_id}_{'-'.join(str(int(x)) for x in bounds)}"
+            first_band = collection[0][0]
+            coords = _generate_spatial_coords(
+                first_band.transform, first_band.width, first_band.height
+            )
+            values = np.array([band.to_numpy() for img in collection for band in img])
+            assert len(values) == len(collection)
+            # coords["band_id"] = [
+            #     band.band_id or i for i, band in enumerate(collection[0])
+            # ]
+            for attr in by:
+                coords[attr] = [getattr(img, attr) for img in collection]
+            # coords["band"] = band_id  #
+            dims = [*by, "y", "x"]
+            # dims = ["band", "y", "x"]
+            # dims = {}
+            # for attr in by:
+            #     dims[attr] = [getattr(img, attr) for img in collection]
+            xarrs[name] = DataArray(
+                values,
+                coords=coords,
+                dims=dims,
+                # name=name,
+                name=band_id,
+                attrs={
+                    "crs": collection.crs,
+                    "band_id": band_id,
+                },  # , "bounds": bounds},
+                **kwargs,
+            )
+        return xr.combine_by_coords(list(xarrs.values()))
+        # return Dataset(xarrs)
     def to_gdfs(self, column: str = "value") -> dict[str, GeoDataFrame]:
         """Convert each band in each Image to a GeoDataFrame."""
         out = {}
@@ -2289,12 +2503,8 @@ class ImageCollection(_ImageBase):
                 except AttributeError:
                     name = f"{self.__class__.__name__}({i})"
-                band.load()
                 if name not in out:
                     out[name] = band.to_gdf(column=column)
-                # else:
-                #     out[name] = f"{self.__class__.__name__}({i})"
         return out
     def sample(self, n: int = 1, size: int = 500) -> "ImageCollection":
@@ -2363,11 +2573,16 @@ class ImageCollection(_ImageBase):
         """Number of images."""
         return len(self.images)
-    def __getitem__(
-        self,
-        item: int | slice | Sequence[int | bool] | BoundingBox | Sequence[BoundingBox],
-    ) -> Image | TORCHGEO_RETURN_TYPE:
-        """Select one Image by integer index, or multiple Images by slice, list of int or torchgeo.BoundingBox."""
+    def __getattr__(self, attr: str) -> Any:
+        """Make iterable of metadata_attribute."""
+        if attr in (self.metadata_attributes or {}):
+            return self._metadata_attribute_collection_type(
+                [getattr(img, attr) for img in self]
+            )
+        return super().__getattribute__(attr)
+    def __getitem__(self, item: int | slice | Sequence[int | bool]) -> Image:
+        """Select one Image by integer index, or multiple Images by slice, list of int."""
         if isinstance(item, int):
             return self.images[item]
@@ -2392,90 +2607,23 @@ class ImageCollection(_ImageBase):
             ]
             return copied
-        if not isinstance(item, BoundingBox) and not (
-            isinstance(item, Iterable)
-            and len(item)
-            and all(isinstance(x, BoundingBox) for x in item)
-        ):
-            copied = self.copy()
-            if callable(item):
-                item = [item(img) for img in copied]
-            # check for base bool and numpy bool
-            if all("bool" in str(type(x)) for x in item):
-                copied.images = [img for x, img in zip(item, copied, strict=True) if x]
+        copied = self.copy()
+        if callable(item):
+            item = [item(img) for img in copied]
-            else:
-                copied.images = [copied.images[i] for i in item]
-            return copied
+        # check for base bool and numpy bool
+        if all("bool" in str(type(x)) for x in item):
+            copied.images = [img for x, img in zip(item, copied, strict=True) if x]
-        if isinstance(item, BoundingBox):
-            date_ranges: tuple[str] = (item.mint, item.maxt)
-            data: torch.Tensor = numpy_to_torch(
-                np.array(
-                    [
-                        band.values
-                        for band in self.filter(
-                            bbox=item, date_ranges=date_ranges
-                        ).merge_by_band(bounds=item)
-                    ]
-                )
-            )
         else:
-            bboxes: list[Polygon] = [to_bbox(x) for x in item]
-            date_ranges: list[list[str, str]] = [(x.mint, x.maxt) for x in item]
-            data: torch.Tensor = torch.cat(
-                [
-                    numpy_to_torch(
-                        np.array(
-                            [
-                                band.values
-                                for band in self.filter(
-                                    bbox=bbox, date_ranges=date_range
-                                ).merge_by_band(bounds=bbox)
-                            ]
-                        )
-                    )
-                    for bbox, date_range in zip(bboxes, date_ranges, strict=True)
-                ]
-            )
-        crs = get_common_crs(self.images)
-        key = "image"  # if self.is_image else "mask"
-        sample = {key: data, "crs": crs, "bbox": item}
-        return sample
-    @property
-    def mint(self) -> float:
-        """Min timestamp of the images combined."""
-        return min(img.mint for img in self)
-    @property
-    def maxt(self) -> float:
-        """Max timestamp of the images combined."""
-        return max(img.maxt for img in self)
-    @property
-    def band_ids(self) -> list[str]:
-        """Sorted list of unique band_ids."""
-        return list(sorted({band.band_id for img in self for band in img}))
-    @property
-    def file_paths(self) -> list[str]:
-        """Sorted list of all file paths, meaning all band paths."""
-        return list(sorted({band.path for img in self for band in img}))
+            copied.images = [copied.images[i] for i in item]
+        return copied
     @property
     def dates(self) -> list[str]:
         """List of image dates."""
         return [img.date for img in self]
-    def dates_as_int(self) -> list[int]:
-        """List of image dates as 8-length integers."""
-        return [int(img.date[:8]) for img in self]
     @property
     def image_paths(self) -> list[str]:
         """List of image paths."""
@@ -2496,29 +2644,22 @@ class ImageCollection(_ImageBase):
             masking=self.masking,
             **self._common_init_kwargs,
         )
         if self.masking is not None:
             images = []
             for image in self._images:
+                # TODO why this loop?
                 try:
                     if not isinstance(image.mask, Band):
                         raise ValueError()
                     images.append(image)
-                except ValueError:
+                except ValueError as e:
+                    raise e
                     continue
             self._images = images
             for image in self._images:
                 image._bands = [band for band in image if band.band_id is not None]
-        if self.metadata is not None:
-            for img in self:
-                for band in img:
-                    for key in ["crs", "bounds"]:
-                        try:
-                            value = self.metadata[band.path][key]
-                        except KeyError:
-                            value = self.metadata[key][band.path]
-                        setattr(band, f"_{key}", value)
         self._images = [img for img in self if len(img)]
         if self._should_be_sorted:
@@ -2543,7 +2684,7 @@ class ImageCollection(_ImageBase):
                 and sort_group in _get_non_optional_groups(pat)
                 for pat in self.image_patterns
             )
-            or all(img.date is not None for img in self)
+            or all(getattr(img, sort_group) is not None for img in self)
         )
     @images.setter
@@ -2552,31 +2693,20 @@ class ImageCollection(_ImageBase):
         if not all(isinstance(x, Image) for x in self._images):
             raise TypeError("images should be a sequence of Image.")
-    @property
-    def index(self) -> Index:
-        """Spatial index that makes torchgeo think this class is a RasterDataset."""
-        try:
-            if len(self) == len(self._index):
-                return self._index
-        except AttributeError:
-            self._index = Index(interleaved=False, properties=Property(dimension=3))
-            for i, img in enumerate(self.images):
-                if img.date:
-                    try:
-                        mint, maxt = disambiguate_timestamp(img.date, self.date_format)
-                    except (NameError, TypeError):
-                        mint, maxt = 0, 1
-                else:
-                    mint, maxt = 0, 1
-                # important: torchgeo has a different order of the bbox than shapely and geopandas
-                minx, miny, maxx, maxy = img.bounds
-                self._index.insert(i, (minx, maxx, miny, maxy, mint, maxt))
-            return self._index
     def __repr__(self) -> str:
         """String representation."""
-        return f"{self.__class__.__name__}({len(self)}, path='{self.path}')"
+        root = ""
+        if self.path is not None:
+            data = f"'{self.path}'"
+        elif all(img.path is not None for img in self):
+            data = [img.path for img in self]
+            parents = {str(Path(path).parent) for path in data}
+            if len(parents) == 1:
+                data = [Path(path).name for path in data]
+                root = f" root='{next(iter(parents))}',"
+        else:
+            data = [img for img in self]
+        return f"{self.__class__.__name__}({data},{root} res={self.res}, level='{self.level}')"
     def union_all(self) -> Polygon | MultiPolygon:
         """(Multi)Polygon representing the union of all image bounds."""
@@ -2603,6 +2733,7 @@ class ImageCollection(_ImageBase):
         p: float = 0.95,
         ylim: tuple[float, float] | None = None,
         figsize: tuple[int] = (20, 8),
+        rounding: int = 3,
     ) -> None:
         """Plot each individual pixel in a dotplot for all dates.
@@ -2616,6 +2747,7 @@ class ImageCollection(_ImageBase):
             p: p-value for the confidence interval.
             ylim: Limits of the y-axis.
             figsize: Figure size as tuple (width, height).
+            rounding: rounding of title n
         """
         if by is None and all(band.band_id is not None for img in self for band in img):
@@ -2625,12 +2757,11 @@ class ImageCollection(_ImageBase):
         alpha = 1 - p
-        for img in self:
-            for band in img:
-                band.load()
         for group_values, subcollection in self.groupby(by):
-            print("group_values:", *group_values)
+            print("subcollection group values:", group_values)
+            if "date" in x_var and subcollection._should_be_sorted:
+                subcollection._images = list(sorted(subcollection._images))
             y = np.array([band.values for img in subcollection for band in img])
             if "date" in x_var and subcollection._should_be_sorted:
@@ -2641,6 +2772,7 @@ class ImageCollection(_ImageBase):
                         for band in img
                     ]
                 )
+                first_date = pd.Timestamp(x[0])
                 x = (
                     pd.to_datetime(
                         [band.date[:8] for img in subcollection for band in img]
@@ -2685,6 +2817,10 @@ class ImageCollection(_ImageBase):
                     )[0]
                     predicted = np.array([intercept + coef * x for x in this_x])
+                    predicted_start = predicted[0]
+                    predicted_end = predicted[-1]
+                    predicted_change = predicted_end - predicted_start
                     # Degrees of freedom
                     dof = len(this_x) - 2
@@ -2708,8 +2844,6 @@ class ImageCollection(_ImageBase):
                     ci_lower = predicted - t_val * pred_stderr
                     ci_upper = predicted + t_val * pred_stderr
-                    rounding = int(np.log(1 / abs(coef)))
                     fig = plt.figure(figsize=figsize)
                     ax = fig.add_subplot(1, 1, 1)
@@ -2723,120 +2857,353 @@ class ImageCollection(_ImageBase):
                         alpha=0.2,
                         label=f"{int(alpha*100)}% CI",
                     )
-                    plt.title(f"Coefficient: {round(coef, rounding)}")
+                    plt.title(
+                        f"coef: {round(coef, int(np.log(1 / abs(coef))))}, "
+                        f"pred change: {round(predicted_change, rounding)}, "
+                        f"pred start: {round(predicted_start, rounding)}, "
+                        f"pred end: {round(predicted_end, rounding)}"
+                    )
                     plt.xlabel(x_var)
                     plt.ylabel(y_label)
-                    plt.show()
+                    if x_var == "date":
+                        date_labels = pd.to_datetime(
+                            [first_date + pd.Timedelta(days=int(day)) for day in this_x]
+                        )
-def concat_image_collections(collections: Sequence[ImageCollection]) -> ImageCollection:
-    """Union multiple ImageCollections together.
+                        _, unique_indices = np.unique(
+                            date_labels.strftime("%Y-%m"), return_index=True
+                        )
-    Same as using the union operator |.
-    """
-    resolutions = {x.res for x in collections}
-    if len(resolutions) > 1:
-        raise ValueError(f"resoultion mismatch. {resolutions}")
-    images = list(itertools.chain.from_iterable([x.images for x in collections]))
-    levels = {x.level for x in collections}
-    level = next(iter(levels)) if len(levels) == 1 else None
-    first_collection = collections[0]
+                        unique_x = np.array(this_x)[unique_indices]
+                        unique_labels = date_labels[unique_indices].strftime("%Y-%m")
-    out_collection = first_collection.__class__(
-        images,
-        level=level,
-        band_class=first_collection.band_class,
-        image_class=first_collection.image_class,
-        **first_collection._common_init_kwargs,
+                        ax.set_xticks(unique_x)
+                        ax.set_xticklabels(unique_labels, rotation=45, ha="right")
+                        # ax.tick_params(axis="x", length=10, width=2)
+                    plt.show()
+def _get_all_regex_matches(xml_file: str, regexes: tuple[str]) -> tuple[str]:
+    for regex in regexes:
+        try:
+            return re.search(regex, xml_file)
+        except (TypeError, AttributeError):
+            continue
+    raise ValueError(
+        f"Could not find processing_baseline info from {regexes} in {xml_file}"
     )
-    out_collection._all_file_paths = list(
-        sorted(
-            set(itertools.chain.from_iterable([x._all_file_paths for x in collections]))
-        )
+class Sentinel2Config:
+    """Holder of Sentinel 2 regexes, band_ids etc."""
+    image_regexes: ClassVar[str] = (config.SENTINEL2_IMAGE_REGEX,)
+    filename_regexes: ClassVar[str] = (config.SENTINEL2_FILENAME_REGEX,)
+    metadata_attributes: ClassVar[
+        dict[str, Callable | functools.partial | tuple[str]]
+    ] = {
+        "processing_baseline": functools.partial(
+            _extract_regex_match_from_string,
+            regexes=(r"<PROCESSING_BASELINE>(.*?)</PROCESSING_BASELINE>",),
+        ),
+        "cloud_cover_percentage": "_get_cloud_cover_percentage",
+        "is_refined": "_get_image_refining_flag",
+        "boa_quantification_value": "_get_boa_quantification_value",
+    }
+    l1c_bands: ClassVar[set[str]] = {
+        "B01": 60,
+        "B02": 10,
+        "B03": 10,
+        "B04": 10,
+        "B05": 20,
+        "B06": 20,
+        "B07": 20,
+        "B08": 10,
+        "B8A": 20,
+        "B09": 60,
+        "B10": 60,
+        "B11": 20,
+        "B12": 20,
+    }
+    l2a_bands: ClassVar[set[str]] = {
+        key: res for key, res in l1c_bands.items() if key != "B10"
+    }
+    all_bands: ClassVar[set[str]] = l1c_bands
+    rbg_bands: ClassVar[tuple[str]] = ("B04", "B02", "B03")
+    ndvi_bands: ClassVar[tuple[str]] = ("B04", "B08")
+    masking: ClassVar[BandMasking] = BandMasking(
+        band_id="SCL",
+        values={
+            2: "Topographic casted shadows",
+            3: "Cloud shadows",
+            8: "Cloud medium probability",
+            9: "Cloud high probability",
+            10: "Thin cirrus",
+            11: "Snow or ice",
+        },
     )
-    return out_collection
+    def _get_image_refining_flag(self, xml_file: str) -> bool:
+        match_ = re.search(
+            r'Image_Refining flag="(?:REFINED|NOT_REFINED)"',
+            xml_file,
+        )
+        if match_ is None:
+            raise _RegexError()
-def _get_gradient(band: Band, degrees: bool = False, copy: bool = True) -> Band:
-    copied = band.copy() if copy else band
-    if len(copied.values.shape) == 3:
-        return np.array(
-            [_slope_2d(arr, copied.res, degrees=degrees) for arr in copied.values]
+        if "NOT_REFINED" in match_.group(0):
+            return False
+        elif "REFINED" in match_.group(0):
+            return True
+        else:
+            raise _RegexError()
+    def _get_boa_quantification_value(self, xml_file: str) -> int:
+        return int(
+            _extract_regex_match_from_string(
+                xml_file,
+                (
+                    r'<BOA_QUANTIFICATION_VALUE unit="none">-?(\d+)</BOA_QUANTIFICATION_VALUE>',
+                ),
+            )
         )
-    elif len(copied.values.shape) == 2:
-        return _slope_2d(copied.values, copied.res, degrees=degrees)
-    else:
-        raise ValueError("array must be 2 or 3 dimensional")
+    def _get_cloud_cover_percentage(self, xml_file: str) -> float:
+        return float(
+            _extract_regex_match_from_string(
+                xml_file,
+                (
+                    r"<Cloud_Coverage_Assessment>([\d.]+)</Cloud_Coverage_Assessment>",
+                    r"<CLOUDY_PIXEL_OVER_LAND_PERCENTAGE>([\d.]+)</CLOUDY_PIXEL_OVER_LAND_PERCENTAGE>",
+                ),
+            )
+        )
-def to_xarray(
-    array: np.ndarray, transform: Affine, crs: Any, name: str | None = None
-) -> DataArray:
-    """Convert the raster to  an xarray.DataArray."""
-    if len(array.shape) == 2:
-        height, width = array.shape
-        dims = ["y", "x"]
-    elif len(array.shape) == 3:
-        height, width = array.shape[1:]
-        dims = ["band", "y", "x"]
-    else:
-        raise ValueError(f"Array should be 2 or 3 dimensional. Got shape {array.shape}")
-    coords = _generate_spatial_coords(transform, width, height)
-    return xr.DataArray(
-        array,
-        coords=coords,
-        dims=dims,
-        name=name,
-        attrs={"crs": crs},
-    )
+class Sentinel2CloudlessConfig(Sentinel2Config):
+    """Holder of regexes, band_ids etc. for Sentinel 2 cloudless mosaic."""
-def _slope_2d(array: np.ndarray, res: int, degrees: int) -> np.ndarray:
-    gradient_x, gradient_y = np.gradient(array, res, res)
+    image_regexes: ClassVar[str] = (config.SENTINEL2_MOSAIC_IMAGE_REGEX,)
+    filename_regexes: ClassVar[str] = (config.SENTINEL2_MOSAIC_FILENAME_REGEX,)
+    masking: ClassVar[None] = None
+    all_bands: ClassVar[list[str]] = [
+        x.replace("B0", "B") for x in Sentinel2Config.all_bands
+    ]
+    rbg_bands: ClassVar[dict[str, str]] = {
+        key.replace("B0", "B") for key in Sentinel2Config.rbg_bands
+    }
+    ndvi_bands: ClassVar[dict[str, str]] = {
+        key.replace("B0", "B") for key in Sentinel2Config.ndvi_bands
+    }
-    gradient = abs(gradient_x) + abs(gradient_y)
-    if not degrees:
-        return gradient
+class Sentinel2Band(Sentinel2Config, Band):
+    """Band with Sentinel2 specific name variables and regexes."""
-    radians = np.arctan(gradient)
-    degrees = np.degrees(radians)
+    metadata_attributes = Sentinel2Config.metadata_attributes | {
+        "boa_add_offset": "_get_boa_add_offset_dict",
+    }
-    assert np.max(degrees) <= 90
+    def _get_boa_add_offset_dict(self, xml_file: str) -> int | None:
+        if self.is_mask:
+            return None
-    return degrees
+        pat = re.compile(
+            r"""
+    <BOA_ADD_OFFSET\s*
+    band_id="(?P<band_id>\d+)"\s*
+    >\s*(?P<value>-?\d+)\s*
+    </BOA_ADD_OFFSET>
+    """,
+            flags=re.VERBOSE,
+        )
+        try:
+            matches = [x.groupdict() for x in re.finditer(pat, xml_file)]
+        except (TypeError, AttributeError, KeyError) as e:
+            raise _RegexError(f"Could not find boa_add_offset info from {pat}") from e
+        if not matches:
+            raise _RegexError(f"Could not find boa_add_offset info from {pat}")
+        dict_ = (
+            pd.DataFrame(matches).set_index("band_id")["value"].astype(int).to_dict()
+        )
-def _clip_loaded_array(
-    arr: np.ndarray,
-    bounds: tuple[int, int, int, int],
-    transform: Affine,
-    crs: Any,
-    out_shape: tuple[int, int],
-    **kwargs,
-) -> np.ndarray:
-    # xarray needs a numpy array of polygon(s)
-    bounds_arr: np.ndarray = GeoSeries([to_shapely(bounds)]).values
-    try:
+        # some xml files have band ids in range index form
+        # converting these to actual band ids (B01 etc.)
+        is_integer_coded = [int(i) for i in dict_] == list(range(len(dict_)))
-        while out_shape != arr.shape:
-            arr = (
-                to_xarray(
-                    arr,
-                    transform=transform,
-                    crs=crs,
+        if is_integer_coded:
+            # the xml files contain 13 bandIds for both L1C and L2A
+            # eventhough L2A doesn't have band B10
+            all_bands = list(self.l1c_bands)
+            if len(all_bands) != len(dict_):
+                raise ValueError(
+                    f"Different number of bands in xml file and config for {self.name}: {all_bands}, {list(dict_)}"
                 )
-                .rio.clip(bounds_arr, crs=crs, **kwargs)
-                .to_numpy()
-            )
-            # bounds_arr = bounds_arr.buffer(0.0000001)
-        return arr
+            dict_ = {
+                band_id: value
+                for band_id, value in zip(all_bands, dict_.values(), strict=True)
+            }
+        try:
+            return dict_[self.band_id]
+        except KeyError as e:
+            band_id = self.band_id.upper()
+            for txt in ["B0", "B", "A"]:
+                band_id = band_id.replace(txt, "")
+                try:
+                    return dict_[band_id]
+                except KeyError:
+                    continue
+            raise KeyError(self.band_id, dict_) from e
+class Sentinel2Image(Sentinel2Config, Image):
+    """Image with Sentinel2 specific name variables and regexes."""
+    band_class: ClassVar[Sentinel2Band] = Sentinel2Band
+    def ndvi(
+        self,
+        red_band: str = "B04",
+        nir_band: str = "B08",
+        padding: int = 0,
+        copy: bool = True,
+    ) -> NDVIBand:
+        """Calculate the NDVI for the Image."""
+        return super().ndvi(
+            red_band=red_band, nir_band=nir_band, padding=padding, copy=copy
+        )
+class Sentinel2Collection(Sentinel2Config, ImageCollection):
+    """ImageCollection with Sentinel2 specific name variables and path regexes."""
+    image_class: ClassVar[Sentinel2Image] = Sentinel2Image
+    band_class: ClassVar[Sentinel2Band] = Sentinel2Band
+    def __init__(self, data: str | Path | Sequence[Image], **kwargs) -> None:
+        """ImageCollection with Sentinel2 specific name variables and path regexes."""
+        level = kwargs.get("level", None_)
+        if callable(level) and isinstance(level(), None_):
+            raise ValueError("Must specify level for Sentinel2Collection.")
+        super().__init__(data=data, **kwargs)
+class Sentinel2CloudlessBand(Sentinel2CloudlessConfig, Band):
+    """Band for cloudless mosaic with Sentinel2 specific name variables and regexes."""
+class Sentinel2CloudlessImage(Sentinel2CloudlessConfig, Sentinel2Image):
+    """Image for cloudless mosaic with Sentinel2 specific name variables and regexes."""
+    band_class: ClassVar[Sentinel2CloudlessBand] = Sentinel2CloudlessBand
+    ndvi = Sentinel2Image.ndvi
+class Sentinel2CloudlessCollection(Sentinel2CloudlessConfig, ImageCollection):
+    """ImageCollection with Sentinel2 specific name variables and regexes."""
+    image_class: ClassVar[Sentinel2CloudlessImage] = Sentinel2CloudlessImage
+    band_class: ClassVar[Sentinel2Band] = Sentinel2CloudlessBand
+def concat_image_collections(collections: Sequence[ImageCollection]) -> ImageCollection:
+    """Union multiple ImageCollections together.
+    Same as using the union operator |.
+    """
+    resolutions = {x.res for x in collections}
+    if len(resolutions) > 1:
+        raise ValueError(f"resoultion mismatch. {resolutions}")
+    images = list(itertools.chain.from_iterable([x.images for x in collections]))
+    levels = {x.level for x in collections}
+    level = next(iter(levels)) if len(levels) == 1 else None
+    first_collection = collections[0]
+    out_collection = first_collection.__class__(
+        images,
+        level=level,
+        band_class=first_collection.band_class,
+        image_class=first_collection.image_class,
+        **first_collection._common_init_kwargs,
+    )
+    out_collection._all_file_paths = list(
+        sorted(
+            set(itertools.chain.from_iterable([x._all_file_paths for x in collections]))
+        )
+    )
+    return out_collection
+def _get_gradient(band: Band, degrees: bool = False, copy: bool = True) -> Band:
+    copied = band.copy() if copy else band
+    if len(copied.values.shape) == 3:
+        return np.array(
+            [_slope_2d(arr, copied.res, degrees=degrees) for arr in copied.values]
+        )
+    elif len(copied.values.shape) == 2:
+        return _slope_2d(copied.values, copied.res, degrees=degrees)
+    else:
+        raise ValueError("array must be 2 or 3 dimensional")
+def _slope_2d(array: np.ndarray, res: int, degrees: int) -> np.ndarray:
+    gradient_x, gradient_y = np.gradient(array, res, res)
+    gradient = abs(gradient_x) + abs(gradient_y)
+    if not degrees:
+        return gradient
+    radians = np.arctan(gradient)
+    degrees = np.degrees(radians)
+    assert np.max(degrees) <= 90
+    return degrees
+def _clip_xarray(
+    xarr: DataArray,
+    mask: tuple[int, int, int, int],
+    crs: Any,
+    **kwargs,
+) -> DataArray:
+    # xarray needs a numpy array of polygons
+    mask_arr: np.ndarray = to_geoseries(mask).values
+    try:
+        return xarr.rio.clip(
+            mask_arr,
+            crs=crs,
+            **kwargs,
+        )
     except NoDataInBounds:
         return np.array([])
+def _get_all_file_paths(path: str) -> set[str]:
+    if is_dapla():
+        return {_fix_path(x) for x in sorted(set(_glob_func(path + "/**")))}
+    else:
+        return {
+            _fix_path(x)
+            for x in sorted(
+                set(
+                    _glob_func(path + "/**")
+                    + _glob_func(path + "/**/**")
+                    + _glob_func(path + "/**/**/**")
+                    + _glob_func(path + "/**/**/**/**")
+                    + _glob_func(path + "/**/**/**/**/**")
+                )
+            )
+        }
 def _get_images(
     image_paths: list[str],
     *,
@@ -2849,9 +3216,8 @@ def _get_images(
     masking: BandMasking | None,
     **kwargs,
 ) -> list[Image]:
-    with joblib.Parallel(n_jobs=processes, backend="loky") as parallel:
-        images = parallel(
+    with joblib.Parallel(n_jobs=processes, backend="threading") as parallel:
+        images: list[Image] = parallel(
             joblib.delayed(image_class)(
                 path,
                 df=df,
@@ -2874,21 +3240,6 @@ def _get_images(
     return images
-def numpy_to_torch(array: np.ndarray) -> torch.Tensor:
-    """Convert numpy array to a pytorch tensor."""
-    # fix numpy dtypes which are not supported by pytorch tensors
-    if array.dtype == np.uint16:
-        array = array.astype(np.int32)
-    elif array.dtype == np.uint32:
-        array = array.astype(np.int64)
-    return torch.tensor(array)
-class _RegexError(ValueError):
-    pass
 class ArrayNotLoadedError(ValueError):
     """Arrays are not loaded."""
@@ -2904,10 +3255,12 @@ class PathlessImageError(ValueError):
         """String representation."""
         if self.instance._merged:
             what = "that have been merged"
-        elif self.isinstance._from_array:
+        elif self.instance._from_array:
             what = "from arrays"
-        elif self.isinstance._from_gdf:
+        elif self.instance._from_gdf:
             what = "from GeoDataFrames"
+        else:
+            raise ValueError(self.instance)
         return (
             f"{self.instance.__class__.__name__} instances {what} "
@@ -2915,165 +3268,32 @@ class PathlessImageError(ValueError):
         )
-def _get_regex_match_from_xml_in_local_dir(
-    paths: list[str], regexes: str | tuple[str]
-) -> str | dict[str, str]:
-    for i, path in enumerate(paths):
-        if ".xml" not in path:
-            continue
-        with _open_func(path, "rb") as file:
-            filebytes: bytes = file.read()
-            try:
-                return _extract_regex_match_from_string(
-                    filebytes.decode("utf-8"), regexes
-                )
-            except _RegexError as e:
-                if i == len(paths) - 1:
-                    raise e
-def _extract_regex_match_from_string(
-    xml_file: str, regexes: tuple[str | re.Pattern]
-) -> str | dict[str, str]:
-    if all(isinstance(x, str) for x in regexes):
-        for regex in regexes:
-            try:
-                return re.search(regex, xml_file).group(1)
-            except (TypeError, AttributeError):
-                continue
-        raise _RegexError()
-    out = {}
-    for regex in regexes:
-        try:
-            matches = re.search(regex, xml_file)
-            out |= matches.groupdict()
-        except (TypeError, AttributeError):
-            continue
-    if not out:
-        raise _RegexError()
-    return out
-def _fix_path(path: str) -> str:
-    return (
-        str(path).replace("\\", "/").replace(r"\"", "/").replace("//", "/").rstrip("/")
-    )
-def _get_regexes_matches_for_df(
-    df, match_col: str, patterns: Sequence[re.Pattern]
-) -> pd.DataFrame:
-    if not len(df):
-        return df
-    non_optional_groups = list(
-        set(
-            itertools.chain.from_iterable(
-                [_get_non_optional_groups(pat) for pat in patterns]
-            )
-        )
-    )
-    if not non_optional_groups:
-        return df
-    assert df.index.is_unique
-    keep = []
-    for pat in patterns:
-        for i, row in df[match_col].items():
-            matches = _get_first_group_match(pat, row)
-            if all(group in matches for group in non_optional_groups):
-                keep.append(i)
-    return df.loc[keep]
-def _get_non_optional_groups(pat: re.Pattern | str) -> list[str]:
-    return [
-        x
-        for x in [
-            _extract_group_name(group)
-            for group in pat.pattern.split("\n")
-            if group
-            and not group.replace(" ", "").startswith("#")
-            and not group.replace(" ", "").split("#")[0].endswith("?")
-        ]
-        if x is not None
-    ]
-def _extract_group_name(txt: str) -> str | None:
-    try:
-        return re.search(r"\(\?P<(\w+)>", txt)[1]
-    except TypeError:
-        return None
-def _get_first_group_match(pat: re.Pattern, text: str) -> dict[str, str]:
-    groups = pat.groupindex.keys()
-    all_matches: dict[str, str] = {}
-    for x in pat.findall(text):
-        for group, value in zip(groups, x, strict=True):
-            if value and group not in all_matches:
-                all_matches[group] = value
-    return all_matches
 def _date_is_within(
-    path,
-    date_ranges: (
-        tuple[str | None, str | None] | tuple[tuple[str | None, str | None], ...] | None
-    ),
-    image_patterns: Sequence[re.Pattern],
-    date_format: str,
+    date: str | None,
+    date_ranges: DATE_RANGES_TYPE,
 ) -> bool:
-    for pat in image_patterns:
-        try:
-            date = _get_first_group_match(pat, Path(path).name)["date"]
-            break
-        except KeyError:
-            date = None
+    if date_ranges is None:
+        return True
     if date is None:
         return False
-    if date_ranges is None:
-        return True
+    date = pd.Timestamp(date)
-    if all(x is None or isinstance(x, (str, float)) for x in date_ranges):
+    if all(x is None or isinstance(x, str) for x in date_ranges):
         date_ranges = (date_ranges,)
-    if all(isinstance(x, float) for date_range in date_ranges for x in date_range):
-        date = disambiguate_timestamp(date, date_format)
-    else:
-        date = date[:8]
     for date_range in date_ranges:
         date_min, date_max = date_range
-        if isinstance(date_min, float) and isinstance(date_max, float):
-            if date[0] >= date_min + 0.0000001 and date[1] <= date_max - 0.0000001:
-                return True
-            continue
+        if date_min is not None:
+            date_min = pd.Timestamp(date_min)
+        if date_max is not None:
+            date_max = pd.Timestamp(date_max)
-        try:
-            date_min = date_min or "00000000"
-            date_max = date_max or "99999999"
-            if not (
-                isinstance(date_min, str)
-                and len(date_min) == 8
-                and isinstance(date_max, str)
-                and len(date_max) == 8
-            ):
-                raise ValueError()
-        except ValueError as err:
-            raise TypeError(
-                "date_ranges should be a tuple of two 8-charactered strings (start and end date)."
-                f"Got {date_range} of type {[type(x) for x in date_range]}"
-            ) from err
-        if date >= date_min and date <= date_max:
+        if (date_min is None or date >= date_min) and (
+            date_max is None or date <= date_max
+        ):
             return True
     return False
@@ -3093,10 +3313,6 @@ def _get_dtype_max(dtype: str | type) -> int | float:
         return np.finfo(dtype).max
-def _img_ndvi(img, **kwargs):
-    return Image([img.ndvi(**kwargs)])
 def _intesects(x, other) -> bool:
     return box(*x.bounds).intersects(other)
@@ -3116,6 +3332,17 @@ def _copy_and_add_df_parallel(
     return (i, copied)
+def _get_bounds(bounds, bbox, band_bounds: Polygon) -> None | Polygon:
+    if bounds is None and bbox is None:
+        return None
+    elif bounds is not None and bbox is None:
+        return to_shapely(bounds).intersection(band_bounds)
+    elif bounds is None and bbox is not None:
+        return to_shapely(bbox).intersection(band_bounds)
+    else:
+        return to_shapely(bounds).intersection(to_shapely(bbox))
 def _get_single_value(values: tuple):
     if len(set(values)) == 1:
         return next(iter(values))
@@ -3129,7 +3356,15 @@ def _open_raster(path: str | Path) -> rasterio.io.DatasetReader:
 def _load_band(band: Band, **kwargs) -> None:
-    band.load(**kwargs)
+    return band.load(**kwargs)
+def _band_apply(band: Band, func: Callable, **kwargs) -> None:
+    return band.apply(func, **kwargs)
+def _clip_band(band: Band, mask, **kwargs) -> None:
+    return band.clip(mask, **kwargs)
 def _merge_by_band(collection: ImageCollection, **kwargs) -> Image:
@@ -3141,7 +3376,7 @@ def _merge(collection: ImageCollection, **kwargs) -> Band:
 def _zonal_one_pair(i: int, poly: Polygon, band: Band, aggfunc, array_func, func_names):
-    clipped = band.copy().load(bounds=poly)
+    clipped = band.copy().clip(poly)
     if not np.size(clipped.values):
         return _no_overlap_df(func_names, i, date=band.date)
     return _aggregate(clipped.values, array_func, aggfunc, func_names, band.date, i)
@@ -3173,85 +3408,126 @@ def array_buffer(arr: np.ndarray, distance: int) -> np.ndarray:
         return binary_erosion(arr, structure=structure).astype(dtype)
-class Sentinel2Config:
-    """Holder of Sentinel 2 regexes, band_ids etc."""
-    image_regexes: ClassVar[str] = (config.SENTINEL2_IMAGE_REGEX,)
-    filename_regexes: ClassVar[str] = (
-        config.SENTINEL2_FILENAME_REGEX,
-        config.SENTINEL2_CLOUD_FILENAME_REGEX,
-    )
-    all_bands: ClassVar[list[str]] = list(config.SENTINEL2_BANDS)
-    rbg_bands: ClassVar[list[str]] = config.SENTINEL2_RBG_BANDS
-    ndvi_bands: ClassVar[list[str]] = config.SENTINEL2_NDVI_BANDS
-    l2a_bands: ClassVar[dict[str, int]] = config.SENTINEL2_L2A_BANDS
-    l1c_bands: ClassVar[dict[str, int]] = config.SENTINEL2_L1C_BANDS
-    date_format: ClassVar[str] = "%Y%m%d"  # T%H%M%S"
-    masking: ClassVar[BandMasking] = BandMasking(
-        band_id="SCL", values=(3, 8, 9, 10, 11)
-    )
-class Sentinel2CloudlessConfig(Sentinel2Config):
-    """Holder of regexes, band_ids etc. for Sentinel 2 cloudless mosaic."""
+def get_cmap(arr: np.ndarray) -> LinearSegmentedColormap:
-    image_regexes: ClassVar[str] = (config.SENTINEL2_MOSAIC_IMAGE_REGEX,)
-    filename_regexes: ClassVar[str] = (config.SENTINEL2_MOSAIC_FILENAME_REGEX,)
-    masking: ClassVar[None] = None
-    date_format: ClassVar[str] = "%Y%m%d"
-    all_bands: ClassVar[list[str]] = [
-        x.replace("B0", "B") for x in Sentinel2Config.all_bands
+    # blue = [[i / 10 + 0.1, i / 10 + 0.1, 1 - (i / 10) + 0.1] for i in range(11)][1:]
+    blue = [
+        [0.1, 0.1, 1.0],
+        [0.2, 0.2, 0.9],
+        [0.3, 0.3, 0.8],
+        [0.4, 0.4, 0.7],
+        [0.6, 0.6, 0.6],
+        [0.6, 0.6, 0.6],
+        [0.7, 0.7, 0.7],
+        [0.8, 0.8, 0.8],
     ]
-    rbg_bands: ClassVar[list[str]] = [
-        x.replace("B0", "B") for x in Sentinel2Config.rbg_bands
+    # gray = list(reversed([[i / 10 - 0.1, i / 10, i / 10 - 0.1] for i in range(11)][1:]))
+    gray = [
+        [0.6, 0.6, 0.6],
+        [0.6, 0.6, 0.6],
+        [0.6, 0.6, 0.6],
+        [0.6, 0.6, 0.6],
+        [0.6, 0.6, 0.6],
+        [0.4, 0.7, 0.4],
+        [0.3, 0.7, 0.3],
+        [0.2, 0.8, 0.2],
     ]
-    ndvi_bands: ClassVar[list[str]] = [
-        x.replace("B0", "B") for x in Sentinel2Config.ndvi_bands
+    # gray = [[0.6, 0.6, 0.6] for i in range(10)]
+    # green = [[0.2 + i/20, i / 10 - 0.1, + i/20] for i in range(11)][1:]
+    green = [
+        [0.25, 0.0, 0.05],
+        [0.3, 0.1, 0.1],
+        [0.35, 0.2, 0.15],
+        [0.4, 0.3, 0.2],
+        [0.45, 0.4, 0.25],
+        [0.5, 0.5, 0.3],
+        [0.55, 0.6, 0.35],
+        [0.7, 0.9, 0.5],
+    ]
+    green = [
+        [0.6, 0.6, 0.6],
+        [0.4, 0.7, 0.4],
+        [0.3, 0.8, 0.3],
+        [0.25, 0.4, 0.25],
+        [0.2, 0.5, 0.2],
+        [0.10, 0.7, 0.10],
+        [0, 0.9, 0],
     ]
+    def get_start(arr):
+        min_value = np.min(arr)
+        if min_value < -0.75:
+            return 0
+        if min_value < -0.5:
+            return 1
+        if min_value < -0.25:
+            return 2
+        if min_value < 0:
+            return 3
+        if min_value < 0.25:
+            return 4
+        if min_value < 0.5:
+            return 5
+        if min_value < 0.75:
+            return 6
+        return 7
-class Sentinel2Band(Sentinel2Config, Band):
-    """Band with Sentinel2 specific name variables and regexes."""
-class Sentinel2Image(Sentinel2Config, Image):
-    """Image with Sentinel2 specific name variables and regexes."""
-    cloud_cover_regexes: ClassVar[tuple[str]] = config.CLOUD_COVERAGE_REGEXES
-    band_class: ClassVar[Sentinel2Band] = Sentinel2Band
-    def ndvi(
-        self,
-        red_band: str = Sentinel2Config.ndvi_bands[0],
-        nir_band: str = Sentinel2Config.ndvi_bands[1],
-        copy: bool = True,
-    ) -> NDVIBand:
-        """Calculate the NDVI for the Image."""
-        return super().ndvi(red_band=red_band, nir_band=nir_band, copy=copy)
-class Sentinel2Collection(Sentinel2Config, ImageCollection):
-    """ImageCollection with Sentinel2 specific name variables and regexes."""
-    image_class: ClassVar[Sentinel2Image] = Sentinel2Image
-    band_class: ClassVar[Sentinel2Band] = Sentinel2Band
-class Sentinel2CloudlessBand(Sentinel2CloudlessConfig, Band):
-    """Band for cloudless mosaic with Sentinel2 specific name variables and regexes."""
+    def get_stop(arr):
+        max_value = np.max(arr)
+        if max_value <= 0.05:
+            return 0
+        if max_value < 0.175:
+            return 1
+        if max_value < 0.25:
+            return 2
+        if max_value < 0.375:
+            return 3
+        if max_value < 0.5:
+            return 4
+        if max_value < 0.75:
+            return 5
+        return 6
-class Sentinel2CloudlessImage(Sentinel2CloudlessConfig, Sentinel2Image):
-    """Image for cloudless mosaic with Sentinel2 specific name variables and regexes."""
+    cmap_name = "blue_gray_green"
-    cloud_cover_regexes: ClassVar[None] = None
-    band_class: ClassVar[Sentinel2CloudlessBand] = Sentinel2CloudlessBand
+    start = get_start(arr)
+    stop = get_stop(arr)
+    blue = blue[start]
+    gray = gray[start]
+    # green = green[start]
+    green = green[stop]
-    ndvi = Sentinel2Image.ndvi
+    # green[0] = np.arange(0, 1, 0.1)[::-1][stop]
+    # green[1] = np.arange(0, 1, 0.1)[stop]
+    # green[2] = np.arange(0, 1, 0.1)[::-1][stop]
+    print(green)
+    print(start, stop)
+    print("blue gray green")
+    print(blue)
+    print(gray)
+    print(green)
-class Sentinel2CloudlessCollection(Sentinel2CloudlessConfig, ImageCollection):
-    """ImageCollection with Sentinel2 specific name variables and regexes."""
+    # Define the segments of the colormap
+    cdict = {
+        "red": [
+            (0.0, blue[0], blue[0]),
+            (0.3, gray[0], gray[0]),
+            (0.7, gray[0], gray[0]),
+            (1.0, green[0], green[0]),
+        ],
+        "green": [
+            (0.0, blue[1], blue[1]),
+            (0.3, gray[1], gray[1]),
+            (0.7, gray[1], gray[1]),
+            (1.0, green[1], green[1]),
+        ],
+        "blue": [
+            (0.0, blue[2], blue[2]),
+            (0.3, gray[2], gray[2]),
+            (0.7, gray[2], gray[2]),
+            (1.0, green[2], green[2]),
+        ],
+    }
-    image_class: ClassVar[Sentinel2CloudlessImage] = Sentinel2CloudlessImage
-    band_class: ClassVar[Sentinel2Band] = Sentinel2CloudlessBand
+    return LinearSegmentedColormap(cmap_name, segmentdata=cdict, N=50)

ssb-sgis 1.0.5__py3-none-any.whl → 1.0.7__py3-none-any.whl

ssb-sgis 1.0.5py3-none-any.whl → 1.0.7py3-none-any.whl