PyPI - ssb-sgis - Versions diffs - 1.2.4__tar.gz → 1.2.7__tar.gz - Mend

ssb-sgis 1.2.4tar.gz → 1.2.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

{ssb_sgis-1.2.4 → ssb_sgis-1.2.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ssb-sgis
-Version: 1.2.4
+Version: 1.2.7
 Summary: GIS functions used at Statistics Norway.
 Home-page: https://github.com/statisticsnorway/ssb-sgis
 License: MIT

{ssb_sgis-1.2.4 → ssb_sgis-1.2.7}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "ssb-sgis"
-version = "1.2.4"
+version = "1.2.7"
 description = "GIS functions used at Statistics Norway."
 authors = ["Morten Letnes <morten.letnes@ssb.no>"]
 license = "MIT"

{ssb_sgis-1.2.4 → ssb_sgis-1.2.7}/src/sgis/geopandas_tools/conversion.py RENAMED Viewed

@@ -43,14 +43,6 @@ except ImportError:
         """Placeholder."""
-try:
-    from torchgeo.datasets.geo import RasterDataset
-except ImportError:
-    class RasterDataset:  # type: ignore
-        """Placeholder."""
 def crs_to_string(crs: Any) -> str:
     """Extract the string of a CRS-like object."""
     if crs is None:
@@ -415,21 +407,6 @@ def to_gdf(
             except Exception:
                 pass
-    if isinstance(obj, RasterDataset):
-        # read the entire dataset
-        obj = obj[obj.bounds]
-        crs = obj["crs"]
-        array = np.array(obj["image"])
-        transform = get_transform_from_bounds(obj["bbox"], shape=array.shape)
-        return gpd.GeoDataFrame(
-            pd.DataFrame(
-                _array_to_geojson(array, transform),
-                columns=["value", "geometry"],
-            ),
-            geometry="geometry",
-            crs=crs,
-        )
     if is_array_like(geometry) and len(geometry) == len(obj):  # type: ignore
         geometry = GeoSeries(
             _make_one_shapely_geom(g) for g in geometry if g is not None  # type: ignore
@@ -442,10 +419,6 @@ def to_gdf(
     # get done with iterators that would get consumed by 'all' later
     if isinstance(obj, Iterator) and not isinstance(obj, Sized):
         obj = list(obj)
-        # obj = GeoSeries(
-        #     (_make_one_shapely_geom(g) for g in obj if g is not None), index=index
-        # )
-        # return GeoDataFrame({geom_col: obj}, geometry=geom_col, crs=crs, **kwargs)
     if hasattr(obj, "__len__") and not len(obj):
         return GeoDataFrame({"geometry": []}, crs=crs)

{ssb_sgis-1.2.4 → ssb_sgis-1.2.7}/src/sgis/geopandas_tools/geocoding.py RENAMED Viewed

@@ -1,7 +1,3 @@
-try:
-    import geocoder
-except ImportError:
-    pass
 from geopandas import GeoDataFrame
 from .conversion import to_gdf
@@ -9,6 +5,8 @@ from .conversion import to_gdf
 def address_to_gdf(address: str, crs=4326) -> GeoDataFrame:
     """Takes an address and returns a point GeoDataFrame."""
+    import geocoder
     g = geocoder.osm(address).json
     coords = g["lng"], g["lat"]
     return to_gdf(coords, crs=4326).to_crs(crs)
@@ -16,6 +14,8 @@ def address_to_gdf(address: str, crs=4326) -> GeoDataFrame:
 def address_to_coords(address: str, crs=4326) -> tuple[float, float]:
     """Takes an address and returns a tuple of xy coordinates."""
+    import geocoder
     g = geocoder.osm(address).json
     coords = g["lng"], g["lat"]
     point = to_gdf(coords, crs=4326).to_crs(crs)

{ssb_sgis-1.2.4 → ssb_sgis-1.2.7}/src/sgis/geopandas_tools/neighbors.py RENAMED Viewed

@@ -15,7 +15,6 @@ from geopandas import GeoSeries
 from pandas import DataFrame
 from pandas import MultiIndex
 from pandas import Series
-from sklearn.neighbors import NearestNeighbors
 from ..conf import _get_instance
 from ..conf import config
@@ -467,6 +466,8 @@ def k_nearest_neighbors(
         of the neighbors.
     """
+    from sklearn.neighbors import NearestNeighbors
     if not len(to_array) or not len(from_array):
         return np.array([]), np.array([])

{ssb_sgis-1.2.4 → ssb_sgis-1.2.7}/src/sgis/geopandas_tools/sfilter.py RENAMED Viewed

@@ -83,6 +83,8 @@ def sfilter(
     """
     if not isinstance(gdf, (GeoDataFrame | GeoSeries)):
         raise TypeError(gdf_type_error_message)
+    if not len(gdf):
+        return gdf
     other = _sfilter_checks(other, crs=gdf.crs)
@@ -159,6 +161,9 @@ def sfilter_split(
     >>> not_intersecting = df1.loc[~filt]
     """
+    if not len(gdf):
+        return gdf, gdf
     if not isinstance(gdf, (GeoDataFrame | GeoSeries)):
         raise TypeError(gdf_type_error_message)
@@ -235,6 +240,9 @@ def sfilter_inverse(
     """
     if not isinstance(gdf, (GeoDataFrame | GeoSeries)):
         raise TypeError(gdf_type_error_message)
+    if not len(gdf):
+        return gdf
     other = _sfilter_checks(other, crs=gdf.crs)
     indices = _get_sfilter_indices(
         gdf, other, predicate, distance, n_jobs, rtree_runner

{ssb_sgis-1.2.4 → ssb_sgis-1.2.7}/src/sgis/io/dapla_functions.py RENAMED Viewed

@@ -185,7 +185,9 @@ def _read_geopandas_from_iterable(
                     except ArrowInvalid as e:
                         if file_system.isfile(path):
                             raise ArrowInvalid(e, path) from e
-            return GeoDataFrame(cols | {"geometry": []})
+            first_path = next(iter(paths.index))
+            _, crs = _get_bounds_parquet(first_path, file_system)
+            return GeoDataFrame(cols | {"geometry": []}, crs=crs)
         paths = list(bounds_series.index)
     results: list[pyarrow.Table] = _read_pyarrow_with_treads(
@@ -203,7 +205,9 @@ def _read_geopandas_from_iterable(
                 print(e)
                 raise e
     else:
-        df = GeoDataFrame(cols | {"geometry": []})
+        first_path = next(iter(paths))
+        _, crs = _get_bounds_parquet(first_path, file_system)
+        df = GeoDataFrame(cols | {"geometry": []}, crs=crs)
     return df
@@ -786,7 +790,8 @@ def _read_partitioned_parquet(
     # add columns to empty DataFrame
     first_path = next(iter(child_paths + [path]))
-    df = pd.DataFrame(columns=_get_columns(first_path, file_system))
+    _, crs = _get_bounds_parquet(first_path, file_system)
+    df = GeoDataFrame(columns=_get_columns(first_path, file_system), crs=crs)
     if kwargs.get("columns"):
         return df[list(kwargs["columns"])]
     return df

{ssb_sgis-1.2.4 → ssb_sgis-1.2.7}/src/sgis/maps/explore.py RENAMED Viewed

@@ -43,6 +43,7 @@ from ..geopandas_tools.general import clean_geoms
 from ..geopandas_tools.general import make_all_singlepart
 from ..geopandas_tools.geometry_types import get_geom_type
 from ..geopandas_tools.geometry_types import to_single_geom_type
+from ..geopandas_tools.sfilter import sfilter
 from ..helpers import _get_file_system
 from ..helpers import dict_zip
 from .wms import WmsLoader
@@ -69,14 +70,6 @@ from .map import _determine_best_name
 from .tilesources import kartverket
 from .tilesources import xyz
-try:
-    from torchgeo.datasets.geo import RasterDataset
-except ImportError:
-    class RasterDataset:
-        """Placeholder."""
 # the geopandas._explore raises a deprication warning. Ignoring for now.
 warnings.filterwarnings(
     action="ignore", category=matplotlib.MatplotlibDeprecationWarning
@@ -207,9 +200,9 @@ def _single_band_to_arr_is_too_much_nodata(
     if band.has_array and mask is None:
         arr = band.values
     elif band.has_array:
-        arr = band.clip(mask).values
+        arr = band.copy().clip(mask).values
     else:
-        arr = band.load(indexes=1, bounds=mask).values
+        arr = band.copy().load(indexes=1, bounds=mask).values
     if _is_too_much_nodata([arr], band.nodata, max_nodata_percentage):
         return True
@@ -495,10 +488,13 @@ class Explore(Map):
             if not isinstance(center, GeoDataFrame)
             else center
         )
+        centerbuffer = centerpoint.buffer(size)
         for label, gdf in self._gdfs.items():
             keep_geom_type = False if get_geom_type(gdf) == "mixed" else True
-            gdf = gdf.clip(centerpoint.buffer(size), keep_geom_type=keep_geom_type)
+            gdf = sfilter(gdf, centerbuffer).clip(
+                centerbuffer, keep_geom_type=keep_geom_type
+            )
             self._gdfs[label] = gdf
         self._gdf = pd.concat(self._gdfs.values(), ignore_index=True)
@@ -555,7 +551,7 @@ class Explore(Map):
             kwargs.pop("column", None)
         for label, gdf in self._gdfs.items():
-            gdf = gdf.clip(self.mask)
+            gdf = sfilter(gdf, self.mask).clip(self.mask)
             collections = gdf.loc[gdf.geom_type == "GeometryCollection"]
             if len(collections):
                 collections = make_all_singlepart(collections)
@@ -618,6 +614,8 @@ class Explore(Map):
                 arr,
                 bounds=[[miny, minx], [maxy, maxx]],
                 show=self._show_rasters,
+                vmin=arr.min(),
+                vmax=arr.max(),
                 **kwargs,
             )
             image_overlay.layer_name = Path(label).stem
@@ -1399,9 +1397,9 @@ def _add_one_image(
     def load(band_id: str) -> Band:
         band = image[band_id]
         if band.has_array and mask is not None:
-            band = band.clip(mask, copy=True)
+            band = band.copy().clip(mask, copy=True)
         elif not band.has_array:
-            band = band.load(indexes=1, bounds=mask)
+            band = band.copy().load(indexes=1, bounds=mask)
         return band
     for red, blue, green in rbg_bands:

{ssb_sgis-1.2.4 → ssb_sgis-1.2.7}/src/sgis/maps/httpserver.py RENAMED Viewed

@@ -24,10 +24,11 @@ def run_html_server(contents: str | None = None, port: int = 3000) -> None:
     if "JUPYTERHUB_SERVICE_PREFIX" in os.environ:
         # Create a link using the https://github.com/jupyterhub/jupyter-server-proxy
         display_address = os.environ["JUPYTERHUB_SERVICE_PREFIX"] + f"proxy/{port}/"
+        stop_address = os.environ["JUPYTERHUB_SERVICE_PREFIX"] + f"proxy/{port}/stop"
         display_content = HTML(
             f"""
         <p>Click <a href='{display_address}'>here</a> to open in browser.</p>
-        <p>Click <a href='{display_address}/stop'>here</a> to stop.</p>
+        <p>Click <a href='{stop_address}'>here</a> to stop.</p>
         """
         )
     else:

{ssb_sgis-1.2.4 → ssb_sgis-1.2.7}/src/sgis/maps/map.py RENAMED Viewed

@@ -14,12 +14,12 @@ import numpy as np
 import pandas as pd
 from geopandas import GeoDataFrame
 from geopandas import GeoSeries
+from pandas.api.types import is_dict_like
 try:
     from jenkspy import jenks_breaks
 except ImportError:
     pass
-from mapclassify import classify
 from pandas.errors import PerformanceWarning
 from shapely import Geometry
@@ -34,14 +34,6 @@ from ..raster.image_collection import Band
 from ..raster.image_collection import Image
 from ..raster.image_collection import ImageCollection
-try:
-    from torchgeo.datasets.geo import RasterDataset
-except ImportError:
-    class RasterDataset:
-        """Placeholder."""
 # the geopandas._explore raises a deprication warning. Ignoring for now.
 warnings.filterwarnings(
     action="ignore", category=matplotlib.MatplotlibDeprecationWarning
@@ -303,9 +295,10 @@ class Map:
             else:
                 return series.astype("string")
-        for i, gdf in enumerate(self._gdfs):
+        for label, gdf in self._gdfs.items():
             if self.column in gdf:
-                self._gdfs[i][self.column] = to_string_via_int(gdf[self.column])
+                gdf[self.column] = to_string_via_int(gdf[self.column])
+                self._gdfs[label] = gdf
         self._gdf[self.column] = to_string_via_int(self._gdf[self.column])
     def __bool__(self) -> bool:
@@ -442,7 +435,6 @@ class Map:
             GeoDataFrame,
             GeoSeries,
             Geometry,
-            RasterDataset,
             ImageCollection,
             Image,
             Band,
@@ -605,22 +597,29 @@ class Map:
         return False
     def _make_categories_colors_dict(self) -> None:
-        # custom categorical cmap
-        if not self._cmap and len(self._unique_values) <= len(_CATEGORICAL_CMAP):
+        if "color" in self.kwargs and is_dict_like(self.kwargs["color"]):
+            if self._column is None and not all(
+                key in self.kwargs["color"] for key in self._gdfs
+            ):
+                raise ValueError(
+                    "When specifying 'color' as dict-like, you must also pass a column "
+                    "or all gdfs passed must have labels/names corresponding to keys in the color dict."
+                )
+            self._categories_colors_dict = self.kwargs.pop("color")
+        elif not self._cmap and len(self._unique_values) <= len(_CATEGORICAL_CMAP):
+            # custom categorical cmap
             self._categories_colors_dict = {
                 category: _CATEGORICAL_CMAP[i]
                 for i, category in enumerate(self._unique_values)
             } | self._categories_colors_dict
         elif self._cmap:
             cmap = matplotlib.colormaps.get_cmap(self._cmap)
             self._categories_colors_dict = {
                 category: colors.to_hex(cmap(int(i)))
                 for i, category in enumerate(self._unique_values)
             } | self._categories_colors_dict
         else:
             cmap = matplotlib.colormaps.get_cmap("tab20")
             self._categories_colors_dict = {
                 category: colors.to_hex(cmap(int(i)))
                 for i, category in enumerate(self._unique_values)
@@ -664,6 +663,9 @@ class Map:
         if self.scheme == "jenks":
             bins = jenks_breaks(gdf[column].dropna(), n_classes=n_classes)
         else:
+            # local import because slow
+            from mapclassify import classify
             binning = classify(
                 np.asarray(gdf[column].dropna()),
                 scheme=self.scheme,

{ssb_sgis-1.2.4 → ssb_sgis-1.2.7}/src/sgis/maps/maps.py RENAMED Viewed

@@ -29,18 +29,12 @@ from ..geopandas_tools.general import get_common_crs
 from ..geopandas_tools.general import is_wkt
 from ..geopandas_tools.geocoding import address_to_gdf
 from ..geopandas_tools.geometry_types import get_geom_type
+from ..geopandas_tools.sfilter import sfilter
 from .explore import Explore
 from .map import Map
 from .thematicmap import ThematicMap
 from .wms import WmsLoader
-try:
-    from torchgeo.datasets.geo import RasterDataset
-except ImportError:
-    class RasterDataset:
-        """Placeholder."""
 def _get_location_mask(kwargs: dict, gdfs) -> tuple[GeoDataFrame | None, dict]:
     try:
@@ -479,8 +473,10 @@ def clipmap(
         if m.gdfs is None and not len(m.rasters):
             return m
-        m._gdfs = {label: gdf.clip(mask) for label, gdf in m._gdfs.items()}
-        m._gdf = m._gdf.clip(mask)
+        m._gdfs = {
+            label: sfilter(gdf, mask).clip(mask) for label, gdf in m._gdfs.items()
+        }
+        m._gdf = sfilter(m._gdf, mask).clip(mask)
         m._nan_idx = m._gdf[m._column].isna()
         m._get_unique_values()
         m.explore(center=center, size=size)
@@ -494,8 +490,10 @@ def clipmap(
         if m.gdfs is None:
             return m
-        m._gdfs = {label: gdf.clip(mask) for label, gdf in m._gdfs.items()}
-        m._gdf = m._gdf.clip(mask)
+        m._gdfs = {
+            label: sfilter(gdf, mask).clip(mask) for label, gdf in m._gdfs.items()
+        }
+        m._gdf = sfilter(m._gdf, mask).clip(mask)
         m._nan_idx = m._gdf[m._column].isna()
         m._get_unique_values()
@@ -530,7 +528,7 @@ def explore_locals(
     frame = inspect.currentframe().f_back
-    allowed_types = (GeoDataFrame, GeoSeries, Geometry, RasterDataset)
+    allowed_types = (GeoDataFrame, GeoSeries, Geometry)
     local_gdfs = {}
     while True:

{ssb_sgis-1.2.4 → ssb_sgis-1.2.7}/src/sgis/maps/wms.py RENAMED Viewed

@@ -20,7 +20,7 @@ JSON_YEARS = [str(year) for year in range(1999, 2025)]
 DEFAULT_YEARS: tuple[str] = tuple(
     str(year)
     for year in range(
-        int(datetime.datetime.now().year) - 8,
+        int(datetime.datetime.now().year) - 10,
         int(datetime.datetime.now().year) + 1,
     )
 )
@@ -111,6 +111,7 @@ class NorgeIBilderWms(WmsLoader):
                     this_tile["year"] = year
                 else:
                     this_tile["year"] = "9999"
                 all_tiles.append(this_tile)
         self.tiles = sorted(all_tiles, key=lambda x: x["year"])

{ssb_sgis-1.2.4 → ssb_sgis-1.2.7}/src/sgis/networkanalysis/_get_route.py RENAMED Viewed

@@ -1,5 +1,6 @@
 import warnings
+import joblib
 import pandas as pd
 from geopandas import GeoDataFrame
 from igraph import Graph
@@ -10,6 +11,7 @@ def _get_route_frequencies(
     graph: Graph,
     roads: GeoDataFrame,
     weight_df: DataFrame,
+    n_jobs: int,
 ) -> GeoDataFrame:
     """Function used in the get_route_frequencies method of NetworkAnalysis."""
     warnings.filterwarnings("ignore", category=RuntimeWarning)
@@ -18,26 +20,25 @@ def _get_route_frequencies(
     od_pairs = weight_df.index
-    for ori_id in od_pairs.get_level_values(0).unique():
-        relevant_pairs = od_pairs[od_pairs.get_level_values(0) == ori_id]
-        destinations = relevant_pairs.get_level_values(1)
-        res = graph.get_shortest_paths(
-            weights="weight", v=ori_id, to=destinations, output="epath"
-        )
-        for i, des_id in enumerate(destinations):
-            indices = graph.es[res[i]]
-            if not indices:
-                continue
-            line_ids = DataFrame({"src_tgt_wt": indices["src_tgt_wt"]})
-            line_ids["origin"] = ori_id
-            line_ids["destination"] = des_id
-            line_ids["multiplier"] = weight_df.loc[ori_id, des_id].iloc[0]
+    ori_ids = od_pairs.get_level_values(0).unique()
+    if n_jobs == 1:
+        nested_results: list[list[DataFrame]] = [
+            _get_one_route_frequency(
+                ori_id, od_pairs=od_pairs, graph=graph, weight_df=weight_df
+            )
+            for ori_id in ori_ids
+        ]
+        del nested_results
+    else:
+        with joblib.Parallel(n_jobs) as parallel:
+            nested_results: list[list[DataFrame]] = parallel(
+                joblib.delayed(_get_one_route_frequency)(
+                    ori_id, od_pairs=od_pairs, graph=graph, weight_df=weight_df
+                )
+                for ori_id in ori_ids
+            )
-            resultlist.append(line_ids)
+    resultlist = [x for y in nested_results for x in y]
     if not resultlist:
         return pd.DataFrame(columns=["frequency", "geometry"])
@@ -53,34 +54,56 @@ def _get_route_frequencies(
     return roads_visited
+def _get_one_route_frequency(
+    ori_id: int, od_pairs: pd.MultiIndex, graph: Graph, weight_df: pd.DataFrame
+):
+    relevant_pairs = od_pairs[od_pairs.get_level_values(0) == ori_id]
+    destinations = relevant_pairs.get_level_values(1)
+    res = graph.get_shortest_paths(
+        weights="weight", v=ori_id, to=destinations, output="epath"
+    )
+    results = []
+    for i, des_id in enumerate(destinations):
+        indices = graph.es[res[i]]
+        if not indices:
+            continue
+        line_ids = DataFrame({"src_tgt_wt": indices["src_tgt_wt"]})
+        line_ids["origin"] = ori_id
+        line_ids["destination"] = des_id
+        line_ids["multiplier"] = weight_df.loc[ori_id, des_id].iloc[0]
+        results.append(line_ids)
+    return results
 def _get_route(
     graph: Graph,
     weight: str,
     roads: GeoDataFrame,
     od_pairs: pd.MultiIndex,
+    n_jobs: int,
 ) -> GeoDataFrame:
     """Function used in the get_route method of NetworkAnalysis."""
     warnings.filterwarnings("ignore", category=RuntimeWarning)
-    resultlist: list[DataFrame] = []
-    for ori_id in od_pairs.get_level_values(0).unique():
-        relevant_pairs = od_pairs[od_pairs.get_level_values(0) == ori_id]
-        destinations = relevant_pairs.get_level_values(1)
-        res = graph.get_shortest_paths(
-            weights="weight", v=ori_id, to=destinations, output="epath"
-        )
-        for i, des_id in enumerate(destinations):
-            indices = graph.es[res[i]]
-            if not indices:
-                continue
-            line_ids = _create_line_id_df(indices["src_tgt_wt"], ori_id, des_id)
+    ori_ids = od_pairs.get_level_values(0).unique()
+    if n_jobs == 1:
+        nested_results: list[list[DataFrame]] = [
+            _get_one_route(ori_id, od_pairs=od_pairs, graph=graph) for ori_id in ori_ids
+        ]
+        del nested_results
+    else:
+        with joblib.Parallel(n_jobs) as parallel:
+            nested_results: list[list[DataFrame]] = parallel(
+                joblib.delayed(_get_one_route)(ori_id, od_pairs=od_pairs, graph=graph)
+                for ori_id in ori_ids
+            )
-            resultlist.append(line_ids)
+    resultlist = [x for y in nested_results for x in y]
     if not resultlist:
         warnings.warn(
@@ -98,6 +121,29 @@ def _get_route(
     return lines[["origin", "destination", weight, "geometry"]]
+def _get_one_route(
+    ori_id: int,
+    od_pairs: pd.MultiIndex,
+    graph: Graph,
+) -> list[DataFrame]:
+    relevant_pairs = od_pairs[od_pairs.get_level_values(0) == ori_id]
+    destinations = relevant_pairs.get_level_values(1)
+    results = graph.get_shortest_paths(
+        weights="weight", v=ori_id, to=destinations, output="epath"
+    )
+    out_lines = []
+    for i, des_id in enumerate(destinations):
+        indices = graph.es[results[i]]
+        if not indices:
+            continue
+        line_ids: DataFrame = _create_line_id_df(indices["src_tgt_wt"], ori_id, des_id)
+        out_lines.append(line_ids)
+    return out_lines
 def _get_k_routes(
     graph: Graph,
     weight: str,

{ssb_sgis-1.2.4 → ssb_sgis-1.2.7}/src/sgis/networkanalysis/_points.py RENAMED Viewed

@@ -45,14 +45,12 @@ class Points:
             return [0 for _ in distances]
         if rules.nodedist_multiplier and rules.nodedist_kmh:
-            raise ValueError(
-                "Can only specify one of 'nodedist_multiplier' and 'nodedist_kmh'"
-            )
+            raise ValueError("Cannot set both 'nodedist_multiplier' and 'nodedist_kmh'")
         if rules.nodedist_multiplier:
-            if rules.weight != "meters":
+            if rules.weight == "minutes":
                 raise ValueError(
-                    "Can only specify 'nodedist_multiplier' when the 'weight' is meters"
+                    "Cannot set 'nodedist_multiplier' when the 'weight' is minutes"
                 )
             return [x * rules.nodedist_multiplier for x in distances]

{ssb_sgis-1.2.4 → ssb_sgis-1.2.7}/src/sgis/networkanalysis/networkanalysis.py RENAMED Viewed

@@ -436,6 +436,7 @@ class NetworkAnalysis:
         rowwise: bool = False,
         strict: bool = False,
         frequency_col: str = "frequency",
+        n_jobs: int | None = None,
     ) -> GeoDataFrame:
         """Finds the number of times each line segment was visited in all trips.
@@ -465,6 +466,7 @@ class NetworkAnalysis:
                 to False.
             frequency_col: Name of column with the number of times each road was
                 visited. Defaults to 'frequency'.
+            n_jobs: Number of parallell jobs.
         Returns:
             A GeoDataFrame with all line segments that were visited at least once,
@@ -635,6 +637,7 @@ class NetworkAnalysis:
             graph=self.graph,
             roads=self.network.gdf,
             weight_df=weight_df,
+            n_jobs=n_jobs,
         )
         if isinstance(results, GeoDataFrame):
@@ -665,6 +668,7 @@ class NetworkAnalysis:
         rowwise: bool = False,
         destination_count: int | None = None,
         cutoff: int | float | None = None,
+        n_jobs: int | None = None,
     ) -> GeoDataFrame:
         """Returns the geometry of the low-cost route between origins and destinations.
@@ -685,6 +689,7 @@ class NetworkAnalysis:
             cutoff: the maximum cost (weight) for the trips. Defaults to None,
                 meaning all rows will be included. NaNs will also be removed if cutoff
                 is specified.
+            n_jobs: Number of parallell jobs.
         Returns:
             A DataFrame with the geometry of the routes between origin and destination.
@@ -738,6 +743,7 @@ class NetworkAnalysis:
             weight=self.rules.weight,
             roads=self.network.gdf,
             od_pairs=od_pairs,
+            n_jobs=n_jobs,
         )
         if cutoff is not None:

{ssb_sgis-1.2.4 → ssb_sgis-1.2.7}/src/sgis/parallel/parallel.py RENAMED Viewed

@@ -13,7 +13,7 @@ from typing import Any
 from pandas.api.types import is_array_like
 try:
-    import dapla as dp
+    from gcsfs import GCSFileSystem
 except ImportError:
     pass
@@ -575,7 +575,7 @@ class Parallel:
             A GeoDataFrame, or a list of GeoDataFrames if concat is False.
         """
         if "file_system" not in kwargs:
-            kwargs["file_system"] = dp.FileClient.get_gcs_file_system()
+            kwargs["file_system"] = GCSFileSystem()
         if strict:
             res = self.map(read_geopandas, files, kwargs=kwargs)
@@ -653,7 +653,7 @@ class Parallel:
         if funcdict is None:
             funcdict = {}
-        fs = dp.FileClient.get_gcs_file_system()
+        fs = GCSFileSystem()
         for _, data, folder, postfunc in dict_zip_union(in_data, out_data, funcdict):
             if data is None or (

{ssb_sgis-1.2.4 → ssb_sgis-1.2.7}/src/sgis/raster/base.py RENAMED Viewed

@@ -38,10 +38,8 @@ def _get_transform_from_bounds(
     obj: GeoDataFrame | GeoSeries | Geometry | tuple, shape: tuple[int, ...]
 ) -> Affine:
     minx, miny, maxx, maxy = to_bbox(obj)
-    if len(shape) == 2:
-        height, width = shape
-    elif len(shape) == 3:
-        _, height, width = shape
+    if len(shape) in [2, 3]:
+        height, width = shape[-2:]
     else:
         return None
         # raise ValueError(shape)
@@ -104,7 +102,7 @@ def _array_to_geojson(
             return _array_to_geojson_loop(array, transform, mask, processes)
         except Exception as err:
-            raise err.__class__(array.shape, err) from err
+            raise err.__class__(f"{array.shape}: {err}") from err
 def _array_to_geojson_loop(array, transform, mask, processes):

{ssb_sgis-1.2.4 → ssb_sgis-1.2.7}/src/sgis/raster/image_collection.py RENAMED Viewed

@@ -26,14 +26,11 @@ import numpy as np
 import pandas as pd
 import pyproj
 import rasterio
-from affine import Affine
 from geopandas import GeoDataFrame
 from geopandas import GeoSeries
+from pandas.api.types import is_array_like
 from pandas.api.types import is_dict_like
 from rasterio.enums import MergeAlg
-from scipy import stats
-from scipy.ndimage import binary_dilation
-from scipy.ndimage import binary_erosion
 from shapely import Geometry
 from shapely import box
 from shapely import unary_union
@@ -64,42 +61,11 @@ except ImportError:
         """Placeholder."""
-try:
-    from rioxarray.exceptions import NoDataInBounds
-    from rioxarray.merge import merge_arrays
-    from rioxarray.rioxarray import _generate_spatial_coords
-except ImportError:
-    pass
-try:
-    from xarray import DataArray
-    from xarray import Dataset
-    from xarray import combine_by_coords
-except ImportError:
-    class DataArray:
-        """Placeholder."""
-    class Dataset:
-        """Placeholder."""
-    def combine_by_coords(*args, **kwargs) -> None:
-        raise ImportError("xarray")
-try:
-    from gcsfs.core import GCSFile
-except ImportError:
-    class GCSFile:
-        """Placeholder."""
 from ..conf import _get_instance
 from ..conf import config
 from ..geopandas_tools.bounds import get_total_bounds
 from ..geopandas_tools.conversion import to_bbox
 from ..geopandas_tools.conversion import to_gdf
-from ..geopandas_tools.conversion import to_geoseries
 from ..geopandas_tools.conversion import to_shapely
 from ..geopandas_tools.general import get_common_crs
 from ..helpers import _fix_path
@@ -799,41 +765,6 @@ class _ImageBandBase(_ImageBase):
         return missing_metadata_attributes | nonmissing_metadata_attributes
-    def _to_xarray(self, array: np.ndarray, transform: Affine) -> DataArray:
-        """Convert the raster to  an xarray.DataArray."""
-        attrs = {"crs": self.crs}
-        for attr in set(self.metadata_attributes).union({"date"}):
-            try:
-                attrs[attr] = getattr(self, attr)
-            except Exception:
-                pass
-        if len(array.shape) == 2:
-            height, width = array.shape
-            dims = ["y", "x"]
-        elif len(array.shape) == 3:
-            height, width = array.shape[1:]
-            dims = ["band", "y", "x"]
-        elif not any(dim for dim in array.shape):
-            DataArray(
-                name=self.name or self.__class__.__name__,
-                attrs=attrs,
-            )
-        else:
-            raise ValueError(
-                f"Array should be 2 or 3 dimensional. Got shape {array.shape}"
-            )
-        coords = _generate_spatial_coords(transform, width, height)
-        return DataArray(
-            array,
-            coords=coords,
-            dims=dims,
-            name=self.name or self.__class__.__name__,
-            attrs=attrs,
-        )
 class Band(_ImageBandBase):
     """Band holding a single 2 dimensional array representing an image band."""
@@ -929,7 +860,6 @@ class Band(_ImageBandBase):
             self.transform = _get_transform_from_bounds(self._bounds, shape=data.shape)
             self._from_array = True
             self.values = data
             self._res = _get_res_from_bounds(self._bounds, self.values.shape)
         elif not isinstance(data, (str | Path | os.PathLike)):
@@ -1146,7 +1076,6 @@ class Band(_ImageBandBase):
             self.transform = None
             # activate setter
             self.values = self._values
             return self
         if self.has_array and bounds_was_none:
@@ -1231,8 +1160,7 @@ class Band(_ImageBandBase):
                         values.shape,
                     )
-                    width, height = values.shape[-2:]
+                    height, width = values.shape[-2:]
                     if width and height:
                         self.transform = rasterio.transform.from_bounds(
                             *bounds, width, height
@@ -1264,7 +1192,7 @@ class Band(_ImageBandBase):
     def has_array(self) -> bool:
         """Whether the array is loaded."""
         try:
-            if not isinstance(self.values, (np.ndarray | DataArray)):
+            if not is_array_like(self.values):
                 raise ValueError()
             return True
         except ValueError:  # also catches _ArrayNotLoadedError
@@ -1501,20 +1429,12 @@ class Band(_ImageBandBase):
             return df[(df[column] != self.nodata) & (df[column].notna())]
         return df
-    def to_xarray(self) -> DataArray:
-        """Convert the raster to an xarray.DataArray."""
-        return self._to_xarray(
-            self.values,
-            transform=self.transform,
-            # name=self.name or self.__class__.__name__.lower(),
-        )
     def to_numpy(self) -> np.ndarray | np.ma.core.MaskedArray:
         """Convert the raster to a numpy.ndarray."""
         return self._to_numpy(self.values).copy()
     def _to_numpy(
-        self, arr: np.ndarray | DataArray, masked: bool = True
+        self, arr: np.ndarray, masked: bool = True
     ) -> np.ndarray | np.ma.core.MaskedArray:
         if not isinstance(arr, np.ndarray):
             mask_arr = None
@@ -1891,13 +1811,6 @@ class Image(_ImageBandBase):
             **self._common_init_kwargs_after_load,
         )
-    def to_xarray(self) -> DataArray:
-        """Convert the raster to  an xarray.DataArray."""
-        return self._to_xarray(
-            np.array([band.values for band in self]),
-            transform=self[0].transform,
-        )
     @property
     def band_ids(self) -> list[str]:
         """The Band ids."""
@@ -2539,6 +2452,10 @@ class ImageCollection(_ImageBase):
         indexes: int | tuple[int] | None = None,
         **kwargs,
     ) -> np.ndarray:
+        from rioxarray.merge import merge_arrays
+        from rioxarray.rioxarray import _generate_spatial_coords
+        from xarray import DataArray
         arrs = []
         kwargs["indexes"] = indexes
         bounds = to_shapely(bounds) if bounds is not None else None
@@ -2777,69 +2694,6 @@ class ImageCollection(_ImageBase):
         ]
         return self
-    def to_xarray(
-        self,
-        **kwargs,
-    ) -> Dataset:
-        """Convert the raster to  an xarray.Dataset.
-        Images are converted to 2d arrays for each unique bounds.
-        The spatial dimensions will be labeled "x" and "y". The third
-        dimension defaults to "date" if all images have date attributes.
-        Otherwise defaults to the image name.
-        """
-        if any(not band.has_array for img in self for band in img):
-            raise ValueError("Arrays must be loaded.")
-        # if by is None:
-        if all(img.date for img in self):
-            by = ["date"]
-        elif not pd.Index([img.name for img in self]).is_unique:
-            raise ValueError("Images must have unique names.")
-        else:
-            by = ["name"]
-        # elif isinstance(by, str):
-        # by = [by]
-        xarrs: dict[str, DataArray] = {}
-        for (bounds, band_id), collection in self.groupby(["bounds", "band_id"]):
-            name = f"{band_id}_{'-'.join(str(int(x)) for x in bounds)}"
-            first_band = collection[0][0]
-            coords = _generate_spatial_coords(
-                first_band.transform, first_band.width, first_band.height
-            )
-            values = np.array([band.to_numpy() for img in collection for band in img])
-            assert len(values) == len(collection)
-            # coords["band_id"] = [
-            #     band.band_id or i for i, band in enumerate(collection[0])
-            # ]
-            for attr in by:
-                coords[attr] = [getattr(img, attr) for img in collection]
-            # coords["band"] = band_id  #
-            dims = [*by, "y", "x"]
-            # dims = ["band", "y", "x"]
-            # dims = {}
-            # for attr in by:
-            #     dims[attr] = [getattr(img, attr) for img in collection]
-            xarrs[name] = DataArray(
-                values,
-                coords=coords,
-                dims=dims,
-                # name=name,
-                name=band_id,
-                attrs={
-                    "crs": collection.crs,
-                    "band_id": band_id,
-                },  # , "bounds": bounds},
-                **kwargs,
-            )
-        return combine_by_coords(list(xarrs.values()))
-        # return Dataset(xarrs)
     def sample(self, n: int = 1, size: int = 500) -> "ImageCollection":
         """Sample one or more areas of a given size and set this as mask for the images."""
         unioned = self.union_all()
@@ -3407,24 +3261,6 @@ def _slope_2d(array: np.ndarray, res: int | tuple[int], degrees: int) -> np.ndar
     return degrees
-def _clip_xarray(
-    xarr: DataArray,
-    mask: tuple[int, int, int, int],
-    crs: Any,
-    **kwargs,
-) -> DataArray:
-    # xarray needs a numpy array of polygons
-    mask_arr: np.ndarray = to_geoseries(mask).values
-    try:
-        return xarr.rio.clip(
-            mask_arr,
-            crs=crs,
-            **kwargs,
-        )
-    except NoDataInBounds:
-        return np.array([])
 def _get_all_file_paths(path: str) -> set[str]:
     return {_fix_path(x) for x in sorted(set(_glob_func(path + "/**")))}
@@ -3645,6 +3481,9 @@ def array_buffer(arr: np.ndarray, distance: int) -> np.ndarray:
     Returns:
         Array with buffered values.
     """
+    from scipy.ndimage import binary_dilation
+    from scipy.ndimage import binary_erosion
     if not np.all(np.isin(arr, (1, 0, True, False))):
         raise ValueError("Array must be all 0s and 1s or boolean.")
@@ -3655,6 +3494,7 @@ def array_buffer(arr: np.ndarray, distance: int) -> np.ndarray:
     arr = np.where(arr, 1, 0)
     if distance > 0:
         return binary_dilation(arr, structure=structure).astype(dtype)
     elif distance < 0:
@@ -3671,6 +3511,8 @@ def _plot_pixels_1d(
     figsize: tuple,
     first_date: pd.Timestamp,
 ) -> None:
+    from scipy import stats
     coef, intercept = np.linalg.lstsq(
         np.vstack([x, np.ones(x.shape[0])]).T,
         y,

{ssb_sgis-1.2.4 → ssb_sgis-1.2.7}/src/sgis/raster/regex.py RENAMED Viewed

@@ -7,7 +7,7 @@ import pandas as pd
 from ..io._is_dapla import is_dapla
 try:
-    import dapla as dp
+    from gcsfs import GCSFileSystem
 except ImportError:
     pass
@@ -22,7 +22,7 @@ except ImportError:
 if is_dapla():
     def _open_func(*args, **kwargs) -> GCSFile:
-        return dp.FileClient.get_gcs_file_system().open(*args, **kwargs)
+        return GCSFileSystem().open(*args, **kwargs)
 else:
     _open_func = open