PyPI - ssb-sgis - Versions diffs - 1.0.9__tar.gz → 1.0.11__tar.gz - Mend

ssb-sgis 1.0.9tar.gz → 1.0.11tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

{ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ssb-sgis
-Version: 1.0.9
+Version: 1.0.11
 Summary: GIS functions used at Statistics Norway.
 Home-page: https://github.com/statisticsnorway/ssb-sgis
 License: MIT

{ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "ssb-sgis"
-version = "1.0.9"
+version = "1.0.11"
 description = "GIS functions used at Statistics Norway."
 authors = ["Morten Letnes <morten.letnes@ssb.no>"]
 license = "MIT"

{ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/geopandas_tools/bounds.py RENAMED Viewed

@@ -697,4 +697,6 @@ def get_total_bounds(
                     raise e2 from e
                 else:
                     continue
+    if not xs or not ys:
+        raise ValueError(f"No bounds found for {geometries}")
     return min(xs), min(ys), max(xs), max(ys)

{ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/geopandas_tools/cleaning.py RENAMED Viewed

@@ -579,9 +579,9 @@ def _snap_to_anchors(
                         # browser=True,
                     )
-                print(
-                    "line_is_simple", line_is_simple, range_index, i, index, j
-                )  # , j2, j3, x)
+                # print(
+                #     "line_is_simple", line_is_simple, range_index, i, index, j
+                # )  # , j2, j3, x)
                 if not line_is_simple:
                     #     for j4 in range(len(ring)):

{ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/io/_is_dapla.py RENAMED Viewed

@@ -7,8 +7,5 @@ import os
 def is_dapla() -> bool:
-    """From https://github.com/statisticsnorway/ssb-altinn-python/blob/main/src/altinn/utils.py."""
-    try:
-        return os.environ["GCS_TOKEN_PROVIDER_KEY"] == "google"
-    except KeyError:
-        return False
+    """Simply checks if an os environment variable contains the text 'dapla'."""
+    return any("dapla" in key.lower() for key in os.environ)

{ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/maps/explore.py RENAMED Viewed

@@ -18,6 +18,7 @@ from typing import ClassVar
 import branca as bc
 import folium
 import geopandas as gpd
+import joblib
 import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np
@@ -189,6 +190,10 @@ def _single_band_to_arr(band, mask, name, raster_data_dict):
         arr = band.clip(mask).values
     else:
         arr = band.load(indexes=1, bounds=mask).values
+    if _is_too_much_nodata([arr], band.nodata):
+        return False
     bounds: tuple = (
         _any_to_bbox_crs4326(mask, band.crs)
         if mask is not None
@@ -205,7 +210,28 @@ def _single_band_to_arr(band, mask, name, raster_data_dict):
             raster_data_dict["cmap"] = band.cmap or "Grays"
     raster_data_dict["arr"] = arr
     raster_data_dict["bounds"] = bounds
-    raster_data_dict["label"] = name
+    raster_data_dict["label"] = band.name or name
+    raster_data_dict["date"] = band.date
+    return True
+def _is_too_much_nodata(
+    arrays: list[np.ndarray],
+    nodata: int | None = None,
+    max_nodata_percentage: int = 100,
+) -> bool:
+    return (
+        any(arr.shape[0] == 0 for arr in arrays)
+        or any(
+            (
+                isinstance(arr, np.ma.core.MaskedArray)
+                and np.mean((arr.mask) | (arr.data == nodata) | (np.isnan(arr.data)))
+                > (max_nodata_percentage / 100)
+            )
+            for arr in arrays
+        )
+        or any(np.mean(arr == nodata) > (max_nodata_percentage / 100) for arr in arrays)
+    )
 def _any_to_bbox_crs4326(obj, crs):
@@ -240,6 +266,7 @@ class Explore(Map):
         text: str | None = None,
         decimals: int = 6,
         max_images: int = 10,
+        max_nodata_percentage: int = 100,
         **kwargs,
     ) -> None:
         """Initialiser.
@@ -266,6 +293,8 @@ class Explore(Map):
                 map. Defaults to 15.
             text: Optional text for a text box in the map.
             decimals: Number of decimals in the coordinates.
+            max_nodata_percentage: Maximum percentage nodata values (e.g. clouds) ro allow in
+                image arrays.
             **kwargs: Additional keyword arguments. Can also be geometry-like objects
                 where the key is the label.
         """
@@ -280,6 +309,7 @@ class Explore(Map):
         self.text = text
         self.decimals = decimals
         self.max_images = max_images
+        self.max_nodata_percentage = max_nodata_percentage
         self.legend = None
         self.browser = browser
@@ -471,7 +501,10 @@ class Explore(Map):
         random_point = sample.sample_points(size=1)
-        self.center = (random_point.geometry.iloc[0].x, random_point.geometry.iloc[0].y)
+        self.center = (
+            float(random_point.geometry.iloc[0].x),
+            float(random_point.geometry.iloc[0].y),
+        )
         print(f"center={self.center}, size={size}")
         mask = random_point.buffer(size)
@@ -509,18 +542,26 @@ class Explore(Map):
     def _load_rasters_as_images(self):
         self.raster_data = []
-        n_added_images = 0
         self._show_rasters = True
-        for name, value in self.rasters.items():
-            data, n_added_images = self._image_collection_to_background_map(
-                value,
-                self.mask,
-                name,
-                max_images=self.max_images,
-                n_added_images=n_added_images,
+        with joblib.Parallel(len(self.rasters) or 1, backend="threading") as parallel:
+            results = parallel(
+                joblib.delayed(_image_collection_to_background_map)(
+                    raster,
+                    name,
+                    self.mask,
+                    self.max_images,
+                    self.max_nodata_percentage,
+                )
+                for name, raster in self.rasters.items()
             )
+        for data in results:
             self.raster_data += data
+        if len(self.raster_data) > 6:
+            self._show_rasters = False
     def _rasters_to_background_maps(self):
         for raster_data_dict in self.raster_data:
             try:
@@ -1061,160 +1102,6 @@ class Explore(Map):
             **kwargs,
         )
-    def _image_collection_to_background_map(
-        self,
-        image_collection: ImageCollection | Image | Band,
-        mask: Any | None,
-        name: str,
-        max_images: int,
-        n_added_images: int,
-        rbg_bands: list[str] = (("B04", "B02", "B03"), ("B4", "B2", "B3")),
-    ) -> tuple[list[dict], int]:
-        out = []
-        if all(isinstance(x, str) for x in rbg_bands):
-            rbg_bands = (rbg_bands,)
-        if isinstance(image_collection, ImageCollection):
-            images = image_collection.images
-            name = None
-        elif isinstance(image_collection, Image):
-            img = image_collection
-            if not _intersects_if_not_none_or_empty(
-                mask, img.bounds
-            ):  # is not None and not to_shapely(mask).intersects(
-                #     to_shapely(img.bounds)
-                # ):
-                return out, n_added_images
-            if len(img) == 1:
-                band = next(iter(img))
-                raster_data_dict = {}
-                out.append(raster_data_dict)
-                name = _determine_label(band, name, out, n_added_images)
-                _single_band_to_arr(band, mask, name, raster_data_dict)
-                n_added_images += 1
-                return out, n_added_images
-            elif len(img) < 3:
-                raster_data_dict = {}
-                out.append(raster_data_dict)
-                for band in img:
-                    name = _determine_label(band, None, out, n_added_images)
-                    _single_band_to_arr(band, mask, name, raster_data_dict)
-                    n_added_images += 1
-                return out, n_added_images
-            else:
-                images = [image_collection]
-        elif isinstance(image_collection, Band):
-            band = image_collection
-            if not _intersects_if_not_none_or_empty(
-                mask, band.bounds
-            ):  # mask is not None and not to_shapely(mask).intersects(
-                #     to_shapely(band.bounds)
-                # ):
-                return out, n_added_images
-            raster_data_dict = {}
-            out.append(raster_data_dict)
-            _single_band_to_arr(band, mask, name, raster_data_dict)
-            return out, n_added_images
-        else:
-            raise TypeError(type(image_collection))
-        if max(len(out), len(images)) + n_added_images > max_images:
-            warnings.warn(
-                f"Showing only a sample of {max_images}. Set 'max_images.", stacklevel=1
-            )
-            self._show_rasters = False
-            random.shuffle(images)
-            images = images[: (max_images - n_added_images)]
-            images = (
-                list(sorted([img for img in images if img.date is not None]))
-                + sorted(
-                    [
-                        img
-                        for img in images
-                        if img.date is None and img.path is not None
-                    ],
-                    key=lambda x: x.path,
-                )
-                + [img for img in images if img.date is None and img.path is None]
-            )
-        for image in images:
-            if not _intersects_if_not_none_or_empty(
-                mask, image.bounds
-            ):  # mask is not None and not to_shapely(mask).intersects(
-                #     to_shapely(image.bounds)
-                # ):
-                continue
-            raster_data_dict = {}
-            out.append(raster_data_dict)
-            if len(image) < 3:
-                for band in image:
-                    name = _determine_label(band, None, out, n_added_images)
-                    _single_band_to_arr(band, mask, name, raster_data_dict)
-                    n_added_images += 1
-                continue
-            def load(band_id: str) -> Band:
-                band = image[band_id]
-                if band.has_array and mask is not None:
-                    band = band.clip(mask, copy=True)
-                elif not band.has_array:
-                    band = band.load(indexes=1, bounds=mask)
-                return band
-            for red, blue, green in rbg_bands:
-                try:
-                    red_band = load(red)
-                except KeyError:
-                    continue
-                try:
-                    blue_band = load(blue)
-                except KeyError:
-                    continue
-                try:
-                    green_band = load(green)
-                except KeyError:
-                    continue
-                break
-            crs = red_band.crs
-            bounds = to_bbox(to_gdf(red_band.bounds, crs).to_crs(4326))
-            red_band = red_band.values
-            blue_band = blue_band.values
-            green_band = green_band.values
-            if (
-                red_band.shape[0] == 0
-                or blue_band.shape[0] == 0
-                or green_band.shape[0] == 0
-            ):
-                continue
-            # to 3d array in shape (x, y, 3)
-            rbg_image = np.stack([red_band, blue_band, green_band], axis=2)
-            raster_data_dict["arr"] = rbg_image
-            raster_data_dict["bounds"] = bounds
-            raster_data_dict["cmap"] = None
-            raster_data_dict["label"] = _determine_label(
-                image, name, out, n_added_images
-            )
-            n_added_images += 1
-        return out, n_added_images
 def _tooltip_popup(
     type_: str, fields: Any, gdf: GeoDataFrame, **kwargs
@@ -1252,29 +1139,24 @@ def _intersects_if_not_none_or_empty(obj: Any, other: Any) -> bool:
     return obj.intersects(to_shapely(other))
-def _determine_label(
-    obj: Image | Band | ImageCollection, obj_name: str | None, out: list[dict], i: int
-) -> str:
+def _determine_label(obj: Image | Band | ImageCollection, obj_name: str | None) -> str:
     # Prefer the object's name
     if obj_name:
         # Avoid the generic label e.g. Image(1)
         does_not_have_generic_name = (
-            re.sub("(\d+)", "", obj_name) != f"{obj.__class__.__name__}()"
+            re.sub(r"(\d+)", "", obj_name) != f"{obj.__class__.__name__}()"
         )
         if does_not_have_generic_name:
             return obj_name
     try:
-        # Images/Bands/Collections constructed from arrays have no path stems
         if obj.name:
             name = obj.name
         else:
+            # Images/Bands/Collections constructed from arrays have no path stems
             name = str(obj)[:23]
     except (AttributeError, ValueError):
         name = str(obj)[:23]
-    if name in [x["label"] for x in out if "label" in x]:
-        name += f"_{i}"
     return name
@@ -1448,3 +1330,140 @@ def get_textbox(text: str) -> str:
 </style>
 {{% endmacro %}}
 """
+def _add_one_image(
+    image: Image, mask, rbg_bands, name: str, max_nodata_percentage: int
+) -> dict:
+    raster_data_dict = {}
+    if len(image) < 3:
+        for band in image:
+            name = _determine_label(band, band.name or name)
+            _single_band_to_arr(band, mask, name, raster_data_dict)
+        return raster_data_dict
+    def load(band_id: str) -> Band:
+        band = image[band_id]
+        if band.has_array and mask is not None:
+            band = band.clip(mask, copy=True)
+        elif not band.has_array:
+            band = band.load(indexes=1, bounds=mask)
+        return band
+    for red, blue, green in rbg_bands:
+        try:
+            red_band = load(red)
+        except KeyError:
+            continue
+        try:
+            blue_band = load(blue)
+        except KeyError:
+            continue
+        try:
+            green_band = load(green)
+        except KeyError:
+            continue
+        break
+    crs = red_band.crs
+    bounds = to_bbox(to_gdf(red_band.bounds, crs).to_crs(4326))
+    red_band = red_band.values
+    blue_band = blue_band.values
+    green_band = green_band.values
+    if _is_too_much_nodata(
+        [red_band, blue_band, green_band], image.nodata, max_nodata_percentage
+    ):
+        return
+    # to 3d array in shape (x, y, 3)
+    rbg_image = np.stack([red_band, blue_band, green_band], axis=2)
+    raster_data_dict["arr"] = rbg_image
+    raster_data_dict["bounds"] = bounds
+    raster_data_dict["cmap"] = None
+    raster_data_dict["label"] = _determine_label(image, image.name or name)
+    raster_data_dict["date"] = image.date
+    return raster_data_dict
+def _image_collection_to_background_map(
+    image_collection: ImageCollection | Image | Band,
+    name: str,
+    mask: Any | None,
+    max_images: int,
+    max_nodata_percentage: int,
+    rbg_bands: list[str] = (("B04", "B02", "B03"), ("B4", "B2", "B3")),
+) -> tuple[list[dict], int]:
+    out = []
+    n_added_images = 0
+    if all(isinstance(x, str) for x in rbg_bands):
+        rbg_bands = (rbg_bands,)
+    if isinstance(image_collection, ImageCollection):
+        if mask is not None:
+            image_collection = image_collection.filter(bbox=mask)
+        images: list[Image] = image_collection.images
+        name = None
+    elif isinstance(image_collection, Image):
+        images: list[Image] = [image_collection]
+        name = image_collection.name
+    elif isinstance(image_collection, Band):
+        band = image_collection
+        if not _intersects_if_not_none_or_empty(mask, band.bounds):
+            return out
+        raster_data_dict = {}
+        out.append(raster_data_dict)
+        _single_band_to_arr(band, mask, name, raster_data_dict)
+        return out
+    else:
+        raise TypeError(type(image_collection))
+    if max(len(out), len(images)) + n_added_images > max_images:
+        warnings.warn(
+            f"Showing only a sample of {max_images}. Set 'max_images.", stacklevel=1
+        )
+        random.shuffle(images)
+    while n_added_images < max_images:
+        n_max = min(max_images - n_added_images, len(images))
+        if not n_max:
+            break
+        n_images_was = len(images)
+        these_images = images[:n_max]
+        images = images[n_max:]
+        assert n_images_was == sum([len(these_images), len(images)])
+        with joblib.Parallel(n_max, backend="threading") as parallel:
+            results = parallel(
+                joblib.delayed(_add_one_image)(
+                    img, mask, rbg_bands, name, max_nodata_percentage
+                )
+                for img in these_images
+            )
+        for x in results:
+            if not x:
+                continue
+            i = 1
+            while x["label"] in {y["label"] for y in out}:
+                x["label"] = x["label"].rstrip(f"_{i}", "") + f"_{i + 1}"
+                i += 1
+            n_added_images += 1
+            out.append(x)
+    if all(x["date"] for x in out):
+        out = sorted(out, key=lambda x: x["date"])
+    else:
+        out = sorted(out, key=lambda x: x["label"])
+    return out

{ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/maps/maps.py RENAMED Viewed

@@ -86,7 +86,7 @@ def explore(
     smooth_factor: int | float = 1.5,
     size: int | None = None,
     max_images: int = 10,
-    images_to_gdf: bool = False,
+    max_nodata_percentage: int = 100,
     **kwargs,
 ) -> Explore:
     """Interactive map of GeoDataFrames with layers that can be toggled on/off.
@@ -116,8 +116,8 @@ def explore(
             1000.
         max_images: Maximum number of images (Image, ImageCollection, Band) to show per
             map. Defaults to 10.
-        images_to_gdf: If True (not default), images (Image, ImageCollection, Band)
-            will be converted to GeoDataFrame and added to the map.
+        max_nodata_percentage: Maximum percentage nodata values (e.g. clouds) ro allow in
+            image arrays.
         **kwargs: Keyword arguments to pass to geopandas.GeoDataFrame.explore, for
             instance 'cmap' to change the colors, 'scheme' to change how the data
             is grouped. This defaults to 'fisherjenkssampled' for numeric data.
@@ -165,6 +165,8 @@ def explore(
             mask=mask,
             browser=browser,
             max_zoom=max_zoom,
+            max_images=max_images,
+            max_nodata_percentage=max_nodata_percentage,
             **kwargs,
         )
@@ -211,46 +213,6 @@ def explore(
             else:
                 mask = mask4326.to_crs(to_crs)
-        # else:
-        #     mask_flipped = mask
-        # # coords = mask.get_coordinates()
-        # if (
-        #     (mask_flipped.distance(bounds) > size).all()
-        #     # and coords["x"].max() < 180
-        #     # and coords["y"].max() < 180
-        #     # and coords["x"].min() > -180
-        #     # and coords["y"].min() > -180
-        # ):
-        #     try:
-        #         bounds4326 = to_gdf(bounds, to_crs).to_crs(4326).geometry.iloc[0]
-        #     except ValueError:
-        #         bounds4326 = to_gdf(bounds, to_crs).set_crs(4326).geometry.iloc[0]
-        #     mask4326 = mask.set_crs(4326, allow_override=True)
-        #     if (mask4326.distance(bounds4326) > size).all():
-        #         # try flipping coordinates
-        #         x, y = list(mask4326.geometry.iloc[0].coords)[0]
-        #         mask4326 = to_gdf([y, x], 4326)
-        #     mask = mask4326
-        #     # if mask4326.intersects(bounds4326).any():
-        #     #     mask = mask4326
-        #     # else:
-        #     #     try:
-        #     #         mask = mask.to_crs(to_crs)
-        #     #     except ValueError:
-        #     #         pass
-        # else:
-        #     mask = mask_flipped
-        # try:
-        #     mask = mask.to_crs(to_crs)
-        # except ValueError:
-        #     pass
         if get_geom_type(mask) in ["point", "line"]:
             mask = mask.buffer(size)
@@ -260,6 +222,8 @@ def explore(
             mask=mask,
             browser=browser,
             max_zoom=max_zoom,
+            max_images=max_images,
+            max_nodata_percentage=max_nodata_percentage,
             **kwargs,
         )
@@ -270,6 +234,7 @@ def explore(
         max_zoom=max_zoom,
         smooth_factor=smooth_factor,
         max_images=max_images,
+        max_nodata_percentage=max_nodata_percentage,
         **kwargs,
     )
@@ -294,6 +259,7 @@ def samplemap(
     explore: bool = True,
     browser: bool = False,
     max_images: int = 10,
+    max_nodata_percentage: int = 100,
     **kwargs,
 ) -> Explore:
     """Shows an interactive map of a random area of GeoDataFrames.
@@ -327,6 +293,8 @@ def samplemap(
             If True the maps will be opened in a browser folder.
         max_images: Maximum number of images (Image, ImageCollection, Band) to show per
             map. Defaults to 10.
+        max_nodata_percentage: Maximum percentage nodata values (e.g. clouds) ro allow in
+            image arrays.
         **kwargs: Keyword arguments to pass to geopandas.GeoDataFrame.explore, for
             instance 'cmap' to change the colors, 'scheme' to change how the data
             is grouped. This defaults to 'fisherjenkssampled' for numeric data.
@@ -409,6 +377,7 @@ def samplemap(
         explore=explore,
         smooth_factor=smooth_factor,
         max_images=max_images,
+        max_nodata_percentage=max_nodata_percentage,
         **kwargs,
     )
@@ -422,6 +391,7 @@ def clipmap(
     smooth_factor: int | float = 1.5,
     browser: bool = False,
     max_images: int = 10,
+    max_nodata_percentage: int = 100,
     **kwargs,
 ) -> Explore | Map:
     """Shows an interactive map of a of GeoDataFrames clipped to the mask extent.
@@ -450,6 +420,8 @@ def clipmap(
             If True the maps will be opened in a browser folder.
         max_images: Maximum number of images (Image, ImageCollection, Band) to show per
             map. Defaults to 10.
+        max_nodata_percentage: Maximum percentage nodata values (e.g. clouds) ro allow in
+            image arrays.
         **kwargs: Keyword arguments to pass to geopandas.GeoDataFrame.explore, for
             instance 'cmap' to change the colors, 'scheme' to change how the data
             is grouped. This defaults to 'fisherjenkssampled' for numeric data.
@@ -484,6 +456,7 @@ def clipmap(
             max_zoom=max_zoom,
             smooth_factor=smooth_factor,
             max_images=max_images,
+            max_nodata_percentage=max_nodata_percentage,
             **kwargs,
         )
         m.mask = mask

{ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/raster/image_collection.py RENAMED Viewed

@@ -177,6 +177,90 @@ def _get_child_paths_threaded(data: Sequence[str]) -> set[str]:
     return set(itertools.chain.from_iterable(all_paths))
+@dataclass
+class PixelwiseResults:
+    """Container of pixelwise results to be converted to numpy/geopandas."""
+    row_indices: np.ndarray
+    col_indices: np.ndarray
+    results: list[Any]
+    res: int | tuple[int, int]
+    bounds: tuple[float, float, float, float]
+    shape: tuple[int, int]
+    crs: Any
+    nodata: int | float | None
+    def to_tuple(self) -> tuple[int, int, Any]:
+        """Return 3-length tuple of row indices, column indices and pixelwise results."""
+        return self.row_indices, self.col_indices, self.results
+    def to_dict(self) -> dict[tuple[int, int], Any]:
+        """Return dictionary with row and column indices as keys and pixelwise results as values."""
+        return {
+            (int(row), int(col)): value
+            for row, col, value in zip(
+                self.row_indices, self.col_indices, self.results, strict=True
+            )
+        }
+    def to_geopandas(self, column: str = "value") -> GeoDataFrame:
+        """Return GeoDataFrame with pixel geometries and values from the pixelwise operation."""
+        minx, miny = self.bounds[:2]
+        resx, resy = _res_as_tuple(self.res)
+        minxs = np.full(self.row_indices.shape, minx) + (minx * self.row_indices * resx)
+        minys = np.full(self.col_indices.shape, miny) + (miny * self.col_indices * resy)
+        maxxs = minxs + resx
+        maxys = minys + resy
+        return GeoDataFrame(
+            {
+                column: self.results,
+                "geometry": [
+                    box(minx, miny, maxx, maxy)
+                    for minx, miny, maxx, maxy in zip(
+                        minxs, minys, maxxs, maxys, strict=True
+                    )
+                ],
+            },
+            index=[self.row_indices, self.col_indices],
+            crs=self.crs,
+        )
+    def to_numpy(self) -> np.ndarray | tuple[np.ndarray, ...]:
+        """Reshape pixelwise results to 2d numpy arrays in the shape of the full arrays of the image bands."""
+        try:
+            n_out_arrays = len(next(iter(self.results)))
+        except TypeError:
+            n_out_arrays = 1
+        out_arrays = [
+            np.full(self.shape, self.nodata).astype(np.float64)
+            for _ in range(n_out_arrays)
+        ]
+        for row, col, these_results in zip(
+            self.row_indices, self.col_indices, self.results, strict=True
+        ):
+            if these_results is None:
+                continue
+            for i, arr in enumerate(out_arrays):
+                try:
+                    arr[row, col] = these_results[i]
+                except TypeError:
+                    arr[row, col] = these_results
+        for i, array in enumerate(out_arrays):
+            all_are_integers = np.all(np.mod(array, 1) == 0)
+            if all_are_integers:
+                out_arrays[i] = array.astype(int)
+        if len(out_arrays) == 1:
+            return out_arrays[0]
+        return tuple(out_arrays)
 class ImageCollectionGroupBy:
     """Iterator and merger class returned from groupby.
@@ -573,6 +657,12 @@ class _ImageBandBase(_ImageBase):
             return self._month
         return str(self.date).replace("-", "").replace("/", "")[4:6]
+    @property
+    def day(self) -> str:
+        if hasattr(self, "_day") and self._day:
+            return self._day
+        return str(self.date).replace("-", "").replace("/", "")[6:8]
     @property
     def name(self) -> str | None:
         if hasattr(self, "_name") and self._name is not None:
@@ -617,19 +707,19 @@ class _ImageBandBase(_ImageBase):
             return nonmissing_metadata_attributes
         # read all xml content once
-        file_contents: list[str] = []
+        file_contents: dict[str, str] = {}
         for path in self._all_file_paths:
             if ".xml" not in path:
                 continue
             with _open_func(path, "rb") as file:
-                file_contents.append(file.read().decode("utf-8"))
+                file_contents[path] = file.read().decode("utf-8")
         def is_last_xml(i: int) -> bool:
             return i == len(file_contents) - 1
         for attr, value in missing_metadata_attributes.items():
             results = None
-            for i, file_content in enumerate(file_contents):
+            for i, file_content in enumerate(file_contents.values()):
                 if isinstance(value, str) and value in dir(self):
                     # method or a hardcoded value
                     value: Callable | Any = getattr(self, value)
@@ -639,7 +729,7 @@ class _ImageBandBase(_ImageBase):
                         results = value(file_content)
                     except _RegexError as e:
                         if is_last_xml(i):
-                            raise e.__class__(self.path, e) from e
+                            raise e.__class__(self.path, list(file_contents), e) from e
                         continue
                     if results is not None:
                         break
@@ -804,9 +894,7 @@ class Band(_ImageBandBase):
             )
         else:
             self._path = _fix_path(str(data))
-            if callable(res) and res() is None:
-                res = None
-            self._res = res
+            self._res = res if not (callable(res) and res() is None) else None
         if cmap is not None:
             self.cmap = cmap
@@ -1476,7 +1564,7 @@ class Image(_ImageBandBase):
                 f"'data' must be string, Path-like or a sequence of Band. Got {data}"
             )
-        self._res = res
+        self._res = res if not (callable(res) and res() is None) else None
         self._path = _fix_path(data)
         if all_file_paths is None and self.path:
@@ -1490,7 +1578,7 @@ class Image(_ImageBandBase):
         if df is None:
             if not self._all_file_paths:
-                self._all_file_paths = [self.path]
+                self._all_file_paths = {self.path}
             df = self._create_metadata_df(self._all_file_paths)
         df["image_path"] = df["image_path"].astype(str)
@@ -1515,7 +1603,7 @@ class Image(_ImageBandBase):
         if self.metadata:
             try:
                 metadata = self.metadata[self.path]
-            except KeyError:
+            except KeyError as e:
                 metadata = {}
             for key, value in metadata.items():
                 if key in dir(self):
@@ -1955,7 +2043,7 @@ class ImageCollection(_ImageBase):
         self.nodata = nodata
         self.level = level
         self.processes = processes
-        self._res = res
+        self._res = res if not (callable(res) and res() is None) else None
         self._crs = None
         self._df = None
@@ -1980,9 +2068,9 @@ class ImageCollection(_ImageBase):
                         ) from e
                     raise e
                 if self.level:
-                    self._all_file_paths = [
+                    self._all_file_paths = {
                         path for path in self._all_file_paths if self.level in path
-                    ]
+                    }
                 self._df = self._create_metadata_df(self._all_file_paths)
                 return
@@ -1994,9 +2082,9 @@ class ImageCollection(_ImageBase):
         self._all_file_paths = _get_all_file_paths(self.path)
         if self.level:
-            self._all_file_paths = [
+            self._all_file_paths = {
                 path for path in self._all_file_paths if self.level in path
-            ]
+            }
         self._df = self._create_metadata_df(self._all_file_paths)
@@ -2079,6 +2167,7 @@ class ImageCollection(_ImageBase):
         kwargs: dict | None = None,
         index_aligned_kwargs: dict | None = None,
         masked: bool = True,
+        processes: int | None = None,
     ) -> np.ndarray | tuple[np.ndarray] | None:
         """Run a function for each pixel.
@@ -2108,13 +2197,23 @@ class ImageCollection(_ImageBase):
         else:
             mask_array = None
-        return pixelwise(
+        nonmissing_row_indices, nonmissing_col_indices, results = pixelwise(
             func=func,
             values=values,
             mask_array=mask_array,
             index_aligned_kwargs=index_aligned_kwargs,
             kwargs=kwargs,
-            processes=self.processes,
+            processes=processes or self.processes,
+        )
+        return PixelwiseResults(
+            nonmissing_row_indices,
+            nonmissing_col_indices,
+            results,
+            shape=values.shape[1:],
+            res=self.res,
+            bounds=self.bounds,
+            crs=self.crs,
             nodata=self.nodata or np.nan,
         )
@@ -2552,15 +2651,9 @@ class ImageCollection(_ImageBase):
         other = to_shapely(other)
-        if self.processes == 1:
-            intersects_list: pd.Series = GeoSeries(
-                [img.union_all() for img in self]
-            ).intersects(other)
-        else:
-            with joblib.Parallel(n_jobs=self.processes, backend="loky") as parallel:
-                intersects_list: list[bool] = parallel(
-                    joblib.delayed(_intesects)(image, other) for image in self
-                )
+        intersects_list: pd.Series = GeoSeries(
+            [img.union_all() for img in self]
+        ).intersects(other)
         self.images = [
             image
@@ -2990,21 +3083,23 @@ class Sentinel2Config:
             xml_file,
         )
         if match_ is None:
-            raise _RegexError()
+            return None
         if "NOT_REFINED" in match_.group(0):
             return False
         elif "REFINED" in match_.group(0):
             return True
         else:
-            raise _RegexError()
+            raise _RegexError(xml_file)
     def _get_boa_quantification_value(self, xml_file: str) -> int:
         return int(
             _extract_regex_match_from_string(
                 xml_file,
                 (
-                    r'<BOA_QUANTIFICATION_VALUE unit="none">-?(\d+)</BOA_QUANTIFICATION_VALUE>',
+                    r'<BOA_QUANTIFICATION_VALUE unit="none">(\d+)</BOA_QUANTIFICATION_VALUE>',
+                    # r'<BOA_QUANTIFICATION_VALUE unit="none">-?(\d+)</BOA_QUANTIFICATION_VALUE>',
+                    r'<QUANTIFICATION_VALUE unit="none">?(\d+)</QUANTIFICATION_VALUE>',
                 ),
             )
         )
@@ -3424,10 +3519,6 @@ def _band_apply(band: Band, func: Callable, **kwargs) -> Band:
     return band.apply(func, **kwargs)
-def _clip_band(band: Band, mask, **kwargs) -> Band:
-    return band.clip(mask, **kwargs)
 def _merge_by_band(collection: ImageCollection, **kwargs) -> Image:
     return collection.merge_by_band(**kwargs)
@@ -3553,26 +3644,25 @@ def pixelwise(
     index_aligned_kwargs: dict | None = None,
     kwargs: dict | None = None,
     processes: int = 1,
-    nodata=np.nan,
-) -> Any:
+) -> tuple[np.ndarray, np.ndarray, list[Any]]:
     """Run a function for each pixel of a 3d array."""
     index_aligned_kwargs = index_aligned_kwargs or {}
     kwargs = kwargs or {}
     if mask_array is not None:
+        # skip pixels where all values are masked
         not_all_missing = np.all(mask_array, axis=0) == False
     else:
         mask_array = np.full(values.shape, False)
         not_all_missing = np.full(values.shape[1:], True)
-    nonmissing_row_indices, nonmissing_col_indices = not_all_missing.nonzero()
     def select_pixel_values(row: int, col: int) -> np.ndarray:
         return values[~mask_array[:, row, col], row, col]
+    # loop through long 1d arrays of aligned row and col indices
+    nonmissing_row_indices, nonmissing_col_indices = not_all_missing.nonzero()
     with joblib.Parallel(n_jobs=processes, backend="loky") as parallel:
-        results: list[tuple[np.float64, np.float64]] = parallel(
+        results: list[Any] = parallel(
             joblib.delayed(func)(
                 select_pixel_values(row, col),
                 **kwargs,
@@ -3586,31 +3676,4 @@ def pixelwise(
             )
         )
-    if all(x is None for x in results):
-        return
-    try:
-        n_out_arrays = len(next(iter(results)))
-    except TypeError:
-        n_out_arrays = 1
-    out_arrays = tuple(np.full(values.shape[1:], nodata) for _ in range(n_out_arrays))
-    counter = 0
-    for row, col in zip(nonmissing_row_indices, nonmissing_col_indices, strict=True):
-        these_results = results[counter]
-        if these_results is None:
-            counter += 1
-            continue
-        for i, arr in enumerate(out_arrays):
-            try:
-                arr[row, col] = these_results[i]
-            except TypeError:
-                arr[row, col] = these_results
-        counter += 1
-    assert counter == len(results), (counter, len(results))
-    if len(out_arrays) == 1:
-        return out_arrays[0]
-    return out_arrays
+    return nonmissing_row_indices, nonmissing_col_indices, results