PyPI - ssb-sgis - Versions diffs - 1.2.3__tar.gz → 1.2.6__tar.gz - Mend

ssb-sgis 1.2.3tar.gz → 1.2.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

{ssb_sgis-1.2.3 → ssb_sgis-1.2.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ssb-sgis
-Version: 1.2.3
+Version: 1.2.6
 Summary: GIS functions used at Statistics Norway.
 Home-page: https://github.com/statisticsnorway/ssb-sgis
 License: MIT

{ssb_sgis-1.2.3 → ssb_sgis-1.2.6}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "ssb-sgis"
-version = "1.2.3"
+version = "1.2.6"
 description = "GIS functions used at Statistics Norway."
 authors = ["Morten Letnes <morten.letnes@ssb.no>"]
 license = "MIT"

{ssb_sgis-1.2.3 → ssb_sgis-1.2.6}/src/sgis/__init__.py RENAMED Viewed

@@ -78,6 +78,7 @@ from .geopandas_tools.polygon_operations import get_polygon_clusters
 from .geopandas_tools.polygon_operations import split_polygons_by_lines
 from .geopandas_tools.polygons_as_rings import PolygonsAsRings
 from .geopandas_tools.runners import GridSizeOverlayRunner
+from .geopandas_tools.runners import GridSizeUnionRunner
 from .geopandas_tools.runners import OverlayRunner
 from .geopandas_tools.runners import RTreeQueryRunner
 from .geopandas_tools.runners import UnionRunner

{ssb_sgis-1.2.3 → ssb_sgis-1.2.6}/src/sgis/geopandas_tools/cleaning.py RENAMED Viewed

@@ -603,7 +603,6 @@ def split_by_neighbors(df, split_by, tolerance, grid_size=None) -> GeoDataFrame:
             buff(df, tolerance),
             how="identity",
             grid_size=grid_size,
-            geom_type="polygon",
         )
         .pipe(get_line_segments)
         .reset_index(drop=True)

{ssb_sgis-1.2.3 → ssb_sgis-1.2.6}/src/sgis/geopandas_tools/conversion.py RENAMED Viewed

@@ -43,14 +43,6 @@ except ImportError:
         """Placeholder."""
-try:
-    from torchgeo.datasets.geo import RasterDataset
-except ImportError:
-    class RasterDataset:  # type: ignore
-        """Placeholder."""
 def crs_to_string(crs: Any) -> str:
     """Extract the string of a CRS-like object."""
     if crs is None:
@@ -415,21 +407,6 @@ def to_gdf(
             except Exception:
                 pass
-    if isinstance(obj, RasterDataset):
-        # read the entire dataset
-        obj = obj[obj.bounds]
-        crs = obj["crs"]
-        array = np.array(obj["image"])
-        transform = get_transform_from_bounds(obj["bbox"], shape=array.shape)
-        return gpd.GeoDataFrame(
-            pd.DataFrame(
-                _array_to_geojson(array, transform),
-                columns=["value", "geometry"],
-            ),
-            geometry="geometry",
-            crs=crs,
-        )
     if is_array_like(geometry) and len(geometry) == len(obj):  # type: ignore
         geometry = GeoSeries(
             _make_one_shapely_geom(g) for g in geometry if g is not None  # type: ignore
@@ -442,10 +419,6 @@ def to_gdf(
     # get done with iterators that would get consumed by 'all' later
     if isinstance(obj, Iterator) and not isinstance(obj, Sized):
         obj = list(obj)
-        # obj = GeoSeries(
-        #     (_make_one_shapely_geom(g) for g in obj if g is not None), index=index
-        # )
-        # return GeoDataFrame({geom_col: obj}, geometry=geom_col, crs=crs, **kwargs)
     if hasattr(obj, "__len__") and not len(obj):
         return GeoDataFrame({"geometry": []}, crs=crs)

{ssb_sgis-1.2.3 → ssb_sgis-1.2.6}/src/sgis/geopandas_tools/neighbors.py RENAMED Viewed

@@ -15,7 +15,6 @@ from geopandas import GeoSeries
 from pandas import DataFrame
 from pandas import MultiIndex
 from pandas import Series
-from sklearn.neighbors import NearestNeighbors
 from ..conf import _get_instance
 from ..conf import config
@@ -467,6 +466,8 @@ def k_nearest_neighbors(
         of the neighbors.
     """
+    from sklearn.neighbors import NearestNeighbors
     if not len(to_array) or not len(from_array):
         return np.array([]), np.array([])

{ssb_sgis-1.2.3 → ssb_sgis-1.2.6}/src/sgis/geopandas_tools/runners.py RENAMED Viewed

@@ -120,6 +120,53 @@ class UnionRunner(AbstractRunner):
         return agged
+@dataclass
+class GridSizeUnionRunner(UnionRunner):
+    """Run shapely.union_all with pandas.groupby for different grid sizes until no GEOSException is raised.
+    Subclasses must implement a 'run' method that takes the arguments
+    'df' (GeoDataFrame or GeoSeries), 'by' (optional column to group by), 'grid_size'
+    (passed to shapely.union_all) and **kwargs passed to pandas.DataFrame.groupby.
+    Defaults to None, meaning the default runner with number of workers set
+    to 'n_jobs'.
+    Args:
+        n_jobs: Number of workers.
+        backend: Backend for the workers.
+    """
+    n_jobs: int
+    backend: str | None = None
+    grid_sizes: list[float | int] | None = None
+    def __post_init__(self) -> None:
+        """Check that grid_sizes is passed."""
+        if self.grid_sizes is None:
+            raise ValueError(
+                f"must set 'grid_sizes' in the {self.__class__.__name__} initialiser."
+            )
+    def run(
+        self,
+        df: GeoDataFrame | GeoSeries | pd.DataFrame | pd.Series,
+        by: str | list[str] | None = None,
+        grid_size: int | float | None = None,
+        **kwargs,
+    ) -> GeoSeries | GeoDataFrame:
+        """Run groupby on geometries in parallel (if n_jobs > 1) with grid_sizes."""
+        try:
+            return super().run(df, by=by, grid_size=grid_size, **kwargs)
+        except GEOSException:
+            pass
+        for i, grid_size in enumerate(self.grid_sizes):
+            try:
+                return super().run(df, by=by, grid_size=grid_size, **kwargs)
+            except GEOSException as e:
+                if i == len(self.grid_sizes) - 1:
+                    raise e
 def _strtree_query(
     arr1: np.ndarray,
     arr2: np.ndarray,

{ssb_sgis-1.2.3 → ssb_sgis-1.2.6}/src/sgis/io/dapla_functions.py RENAMED Viewed

@@ -40,7 +40,10 @@ from ..helpers import _get_file_system
 try:
     from gcsfs import GCSFileSystem
 except ImportError:
-    pass
+    class GCSFileSystem:
+        """Placeholder."""
 PANDAS_FALLBACK_INFO = " Set pandas_fallback=True to ignore this error."
 NULL_VALUE = "__HIVE_DEFAULT_PARTITION__"
@@ -96,6 +99,7 @@ def read_geopandas(
             file_system=file_system,
             use_threads=use_threads,
             pandas_fallback=pandas_fallback,
+            filters=filters,
             **kwargs,
         )
@@ -108,7 +112,9 @@ def read_geopandas(
     # because glob is slow without GCSFileSystem from the root partition
     if single_eq_filter:
         try:
-            expression = "".join(next(iter(filters))).replace("==", "=")
+            expression: list[str] = "".join(
+                [str(x) for x in next(iter(filters))]
+            ).replace("==", "=")
             glob_func = _get_glob_func(file_system)
             suffix: str = Path(gcs_path).suffix
             paths = glob_func(str(Path(gcs_path) / expression / f"*{suffix}"))
@@ -119,6 +125,7 @@ def read_geopandas(
                     file_system=file_system,
                     use_threads=use_threads,
                     pandas_fallback=pandas_fallback,
+                    filters=filters,
                     **kwargs,
                 )
         except FileNotFoundError:
@@ -178,11 +185,17 @@ def _read_geopandas_from_iterable(
                     except ArrowInvalid as e:
                         if file_system.isfile(path):
                             raise ArrowInvalid(e, path) from e
-            return GeoDataFrame(cols | {"geometry": []})
+            first_path = next(iter(paths.index))
+            _, crs = _get_bounds_parquet(first_path, file_system)
+            return GeoDataFrame(cols | {"geometry": []}, crs=crs)
         paths = list(bounds_series.index)
     results: list[pyarrow.Table] = _read_pyarrow_with_treads(
-        paths, file_system=file_system, mask=mask, use_threads=use_threads, **kwargs
+        paths,
+        file_system=file_system,
+        mask=mask,
+        use_threads=use_threads,
+        **kwargs,
     )
     if results:
         try:
@@ -192,16 +205,23 @@ def _read_geopandas_from_iterable(
                 print(e)
                 raise e
     else:
-        df = GeoDataFrame(cols | {"geometry": []})
+        first_path = next(iter(paths))
+        _, crs = _get_bounds_parquet(first_path, file_system)
+        df = GeoDataFrame(cols | {"geometry": []}, crs=crs)
     return df
 def _read_pyarrow_with_treads(
-    paths: list[str | Path | os.PathLike], file_system, use_threads, mask, **kwargs
+    paths: list[str | Path | os.PathLike],
+    file_system,
+    use_threads,
+    mask,
+    filters,
+    **kwargs,
 ) -> list[pyarrow.Table]:
     read_partial = functools.partial(
-        _read_pyarrow, mask=mask, file_system=file_system, **kwargs
+        _read_pyarrow, filters=filters, mask=mask, file_system=file_system, **kwargs
     )
     if not use_threads:
         return [x for x in map(read_partial, paths) if x is not None]
@@ -645,7 +665,7 @@ def expression_match_path(expression: ds.Expression, path: str) -> bool:
     """Check if a file path match a pyarrow Expression.
     Examples:
-    --------
+    ---------
     >>> import pyarrow.compute as pc
     >>> path = 'data/file.parquet/x=1/y=10/name0.parquet'
     >>> expression = (pc.Field("x") == 1) & (pc.Field("y") == 10)
@@ -758,6 +778,7 @@ def _read_partitioned_parquet(
         ),
         file_system=file_system,
         mask=mask,
+        filters=filters,
         use_threads=use_threads,
         **kwargs,
     )
@@ -769,7 +790,8 @@ def _read_partitioned_parquet(
     # add columns to empty DataFrame
     first_path = next(iter(child_paths + [path]))
-    df = pd.DataFrame(columns=_get_columns(first_path, file_system))
+    _, crs = _get_bounds_parquet(first_path, file_system)
+    df = GeoDataFrame(columns=_get_columns(first_path, file_system), crs=crs)
     if kwargs.get("columns"):
         return df[list(kwargs["columns"])]
     return df

{ssb_sgis-1.2.3 → ssb_sgis-1.2.6}/src/sgis/maps/explore.py RENAMED Viewed

@@ -69,14 +69,6 @@ from .map import _determine_best_name
 from .tilesources import kartverket
 from .tilesources import xyz
-try:
-    from torchgeo.datasets.geo import RasterDataset
-except ImportError:
-    class RasterDataset:
-        """Placeholder."""
 # the geopandas._explore raises a deprication warning. Ignoring for now.
 warnings.filterwarnings(
     action="ignore", category=matplotlib.MatplotlibDeprecationWarning
@@ -207,9 +199,9 @@ def _single_band_to_arr_is_too_much_nodata(
     if band.has_array and mask is None:
         arr = band.values
     elif band.has_array:
-        arr = band.clip(mask).values
+        arr = band.copy().clip(mask).values
     else:
-        arr = band.load(indexes=1, bounds=mask).values
+        arr = band.copy().load(indexes=1, bounds=mask).values
     if _is_too_much_nodata([arr], band.nodata, max_nodata_percentage):
         return True
@@ -618,6 +610,8 @@ class Explore(Map):
                 arr,
                 bounds=[[miny, minx], [maxy, maxx]],
                 show=self._show_rasters,
+                vmin=arr.min(),
+                vmax=arr.max(),
                 **kwargs,
             )
             image_overlay.layer_name = Path(label).stem
@@ -1399,9 +1393,9 @@ def _add_one_image(
     def load(band_id: str) -> Band:
         band = image[band_id]
         if band.has_array and mask is not None:
-            band = band.clip(mask, copy=True)
+            band = band.copy().clip(mask, copy=True)
         elif not band.has_array:
-            band = band.load(indexes=1, bounds=mask)
+            band = band.copy().load(indexes=1, bounds=mask)
         return band
     for red, blue, green in rbg_bands:

{ssb_sgis-1.2.3 → ssb_sgis-1.2.6}/src/sgis/maps/httpserver.py RENAMED Viewed

@@ -24,10 +24,11 @@ def run_html_server(contents: str | None = None, port: int = 3000) -> None:
     if "JUPYTERHUB_SERVICE_PREFIX" in os.environ:
         # Create a link using the https://github.com/jupyterhub/jupyter-server-proxy
         display_address = os.environ["JUPYTERHUB_SERVICE_PREFIX"] + f"proxy/{port}/"
+        stop_address = os.environ["JUPYTERHUB_SERVICE_PREFIX"] + f"proxy/{port}/stop"
         display_content = HTML(
             f"""
         <p>Click <a href='{display_address}'>here</a> to open in browser.</p>
-        <p>Click <a href='{display_address}/stop'>here</a> to stop.</p>
+        <p>Click <a href='{stop_address}'>here</a> to stop.</p>
         """
         )
     else:

{ssb_sgis-1.2.3 → ssb_sgis-1.2.6}/src/sgis/maps/map.py RENAMED Viewed

@@ -14,12 +14,12 @@ import numpy as np
 import pandas as pd
 from geopandas import GeoDataFrame
 from geopandas import GeoSeries
+from pandas.api.types import is_dict_like
 try:
     from jenkspy import jenks_breaks
 except ImportError:
     pass
-from mapclassify import classify
 from pandas.errors import PerformanceWarning
 from shapely import Geometry
@@ -34,14 +34,6 @@ from ..raster.image_collection import Band
 from ..raster.image_collection import Image
 from ..raster.image_collection import ImageCollection
-try:
-    from torchgeo.datasets.geo import RasterDataset
-except ImportError:
-    class RasterDataset:
-        """Placeholder."""
 # the geopandas._explore raises a deprication warning. Ignoring for now.
 warnings.filterwarnings(
     action="ignore", category=matplotlib.MatplotlibDeprecationWarning
@@ -442,7 +434,6 @@ class Map:
             GeoDataFrame,
             GeoSeries,
             Geometry,
-            RasterDataset,
             ImageCollection,
             Image,
             Band,
@@ -605,22 +596,29 @@ class Map:
         return False
     def _make_categories_colors_dict(self) -> None:
-        # custom categorical cmap
-        if not self._cmap and len(self._unique_values) <= len(_CATEGORICAL_CMAP):
+        if "color" in self.kwargs and is_dict_like(self.kwargs["color"]):
+            if self._column is None and not all(
+                key in self.kwargs["color"] for key in self._gdfs
+            ):
+                raise ValueError(
+                    "When specifying 'color' as dict-like, you must also pass a column "
+                    "or all gdfs passed must have labels/names corresponding to keys in the color dict."
+                )
+            self._categories_colors_dict = self.kwargs.pop("color")
+        elif not self._cmap and len(self._unique_values) <= len(_CATEGORICAL_CMAP):
+            # custom categorical cmap
             self._categories_colors_dict = {
                 category: _CATEGORICAL_CMAP[i]
                 for i, category in enumerate(self._unique_values)
             } | self._categories_colors_dict
         elif self._cmap:
             cmap = matplotlib.colormaps.get_cmap(self._cmap)
             self._categories_colors_dict = {
                 category: colors.to_hex(cmap(int(i)))
                 for i, category in enumerate(self._unique_values)
             } | self._categories_colors_dict
         else:
             cmap = matplotlib.colormaps.get_cmap("tab20")
             self._categories_colors_dict = {
                 category: colors.to_hex(cmap(int(i)))
                 for i, category in enumerate(self._unique_values)
@@ -664,6 +662,9 @@ class Map:
         if self.scheme == "jenks":
             bins = jenks_breaks(gdf[column].dropna(), n_classes=n_classes)
         else:
+            # local import because slow
+            from mapclassify import classify
             binning = classify(
                 np.asarray(gdf[column].dropna()),
                 scheme=self.scheme,

{ssb_sgis-1.2.3 → ssb_sgis-1.2.6}/src/sgis/maps/maps.py RENAMED Viewed

@@ -34,13 +34,6 @@ from .map import Map
 from .thematicmap import ThematicMap
 from .wms import WmsLoader
-try:
-    from torchgeo.datasets.geo import RasterDataset
-except ImportError:
-    class RasterDataset:
-        """Placeholder."""
 def _get_location_mask(kwargs: dict, gdfs) -> tuple[GeoDataFrame | None, dict]:
     try:
@@ -530,7 +523,7 @@ def explore_locals(
     frame = inspect.currentframe().f_back
-    allowed_types = (GeoDataFrame, GeoSeries, Geometry, RasterDataset)
+    allowed_types = (GeoDataFrame, GeoSeries, Geometry)
     local_gdfs = {}
     while True:

{ssb_sgis-1.2.3 → ssb_sgis-1.2.6}/src/sgis/maps/thematicmap.py RENAMED Viewed

@@ -296,13 +296,11 @@ class ThematicMap(Map):
         if self._gdf[self._column].isna().any():
             isnas = []
             for label, gdf in self._gdfs.items():
                 isnas.append(gdf[gdf[self._column].isna()])
                 self._gdfs[label] = gdf[gdf[self._column].notna()]
-            color = self.facecolor if nan_hatch else self.nan_color
             self._more_data[nan_label] = {
                 "gdf": pd.concat(isnas, ignore_index=True),
-                "color": color,
+                "color": self.nan_color,
                 "hatch": nan_hatch,
             } | new_kwargs
             self._gdf = pd.concat(self.gdfs.values(), ignore_index=True)

{ssb_sgis-1.2.3 → ssb_sgis-1.2.6}/src/sgis/maps/wms.py RENAMED Viewed

@@ -20,7 +20,7 @@ JSON_YEARS = [str(year) for year in range(1999, 2025)]
 DEFAULT_YEARS: tuple[str] = tuple(
     str(year)
     for year in range(
-        int(datetime.datetime.now().year) - 8,
+        int(datetime.datetime.now().year) - 10,
         int(datetime.datetime.now().year) + 1,
     )
 )
@@ -111,6 +111,7 @@ class NorgeIBilderWms(WmsLoader):
                     this_tile["year"] = year
                 else:
                     this_tile["year"] = "9999"
                 all_tiles.append(this_tile)
         self.tiles = sorted(all_tiles, key=lambda x: x["year"])

{ssb_sgis-1.2.3 → ssb_sgis-1.2.6}/src/sgis/networkanalysis/_get_route.py RENAMED Viewed

@@ -1,5 +1,6 @@
 import warnings
+import joblib
 import pandas as pd
 from geopandas import GeoDataFrame
 from igraph import Graph
@@ -10,6 +11,7 @@ def _get_route_frequencies(
     graph: Graph,
     roads: GeoDataFrame,
     weight_df: DataFrame,
+    n_jobs: int,
 ) -> GeoDataFrame:
     """Function used in the get_route_frequencies method of NetworkAnalysis."""
     warnings.filterwarnings("ignore", category=RuntimeWarning)
@@ -18,26 +20,25 @@ def _get_route_frequencies(
     od_pairs = weight_df.index
-    for ori_id in od_pairs.get_level_values(0).unique():
-        relevant_pairs = od_pairs[od_pairs.get_level_values(0) == ori_id]
-        destinations = relevant_pairs.get_level_values(1)
-        res = graph.get_shortest_paths(
-            weights="weight", v=ori_id, to=destinations, output="epath"
-        )
-        for i, des_id in enumerate(destinations):
-            indices = graph.es[res[i]]
-            if not indices:
-                continue
-            line_ids = DataFrame({"src_tgt_wt": indices["src_tgt_wt"]})
-            line_ids["origin"] = ori_id
-            line_ids["destination"] = des_id
-            line_ids["multiplier"] = weight_df.loc[ori_id, des_id].iloc[0]
+    ori_ids = od_pairs.get_level_values(0).unique()
+    if n_jobs == 1:
+        nested_results: list[list[DataFrame]] = [
+            _get_one_route_frequency(
+                ori_id, od_pairs=od_pairs, graph=graph, weight_df=weight_df
+            )
+            for ori_id in ori_ids
+        ]
+        del nested_results
+    else:
+        with joblib.Parallel(n_jobs) as parallel:
+            nested_results: list[list[DataFrame]] = parallel(
+                joblib.delayed(_get_one_route_frequency)(
+                    ori_id, od_pairs=od_pairs, graph=graph, weight_df=weight_df
+                )
+                for ori_id in ori_ids
+            )
-            resultlist.append(line_ids)
+    resultlist = [x for y in nested_results for x in y]
     if not resultlist:
         return pd.DataFrame(columns=["frequency", "geometry"])
@@ -53,34 +54,56 @@ def _get_route_frequencies(
     return roads_visited
+def _get_one_route_frequency(
+    ori_id: int, od_pairs: pd.MultiIndex, graph: Graph, weight_df: pd.DataFrame
+):
+    relevant_pairs = od_pairs[od_pairs.get_level_values(0) == ori_id]
+    destinations = relevant_pairs.get_level_values(1)
+    res = graph.get_shortest_paths(
+        weights="weight", v=ori_id, to=destinations, output="epath"
+    )
+    results = []
+    for i, des_id in enumerate(destinations):
+        indices = graph.es[res[i]]
+        if not indices:
+            continue
+        line_ids = DataFrame({"src_tgt_wt": indices["src_tgt_wt"]})
+        line_ids["origin"] = ori_id
+        line_ids["destination"] = des_id
+        line_ids["multiplier"] = weight_df.loc[ori_id, des_id].iloc[0]
+        results.append(line_ids)
+    return results
 def _get_route(
     graph: Graph,
     weight: str,
     roads: GeoDataFrame,
     od_pairs: pd.MultiIndex,
+    n_jobs: int,
 ) -> GeoDataFrame:
     """Function used in the get_route method of NetworkAnalysis."""
     warnings.filterwarnings("ignore", category=RuntimeWarning)
-    resultlist: list[DataFrame] = []
-    for ori_id in od_pairs.get_level_values(0).unique():
-        relevant_pairs = od_pairs[od_pairs.get_level_values(0) == ori_id]
-        destinations = relevant_pairs.get_level_values(1)
-        res = graph.get_shortest_paths(
-            weights="weight", v=ori_id, to=destinations, output="epath"
-        )
-        for i, des_id in enumerate(destinations):
-            indices = graph.es[res[i]]
-            if not indices:
-                continue
-            line_ids = _create_line_id_df(indices["src_tgt_wt"], ori_id, des_id)
+    ori_ids = od_pairs.get_level_values(0).unique()
+    if n_jobs == 1:
+        nested_results: list[list[DataFrame]] = [
+            _get_one_route(ori_id, od_pairs=od_pairs, graph=graph) for ori_id in ori_ids
+        ]
+        del nested_results
+    else:
+        with joblib.Parallel(n_jobs) as parallel:
+            nested_results: list[list[DataFrame]] = parallel(
+                joblib.delayed(_get_one_route)(ori_id, od_pairs=od_pairs, graph=graph)
+                for ori_id in ori_ids
+            )
-            resultlist.append(line_ids)
+    resultlist = [x for y in nested_results for x in y]
     if not resultlist:
         warnings.warn(
@@ -98,6 +121,29 @@ def _get_route(
     return lines[["origin", "destination", weight, "geometry"]]
+def _get_one_route(
+    ori_id: int,
+    od_pairs: pd.MultiIndex,
+    graph: Graph,
+) -> list[DataFrame]:
+    relevant_pairs = od_pairs[od_pairs.get_level_values(0) == ori_id]
+    destinations = relevant_pairs.get_level_values(1)
+    results = graph.get_shortest_paths(
+        weights="weight", v=ori_id, to=destinations, output="epath"
+    )
+    out_lines = []
+    for i, des_id in enumerate(destinations):
+        indices = graph.es[results[i]]
+        if not indices:
+            continue
+        line_ids: DataFrame = _create_line_id_df(indices["src_tgt_wt"], ori_id, des_id)
+        out_lines.append(line_ids)
+    return out_lines
 def _get_k_routes(
     graph: Graph,
     weight: str,

{ssb_sgis-1.2.3 → ssb_sgis-1.2.6}/src/sgis/networkanalysis/networkanalysis.py RENAMED Viewed

@@ -436,6 +436,7 @@ class NetworkAnalysis:
         rowwise: bool = False,
         strict: bool = False,
         frequency_col: str = "frequency",
+        n_jobs: int | None = None,
     ) -> GeoDataFrame:
         """Finds the number of times each line segment was visited in all trips.
@@ -465,6 +466,7 @@ class NetworkAnalysis:
                 to False.
             frequency_col: Name of column with the number of times each road was
                 visited. Defaults to 'frequency'.
+            n_jobs: Number of parallell jobs.
         Returns:
             A GeoDataFrame with all line segments that were visited at least once,
@@ -635,6 +637,7 @@ class NetworkAnalysis:
             graph=self.graph,
             roads=self.network.gdf,
             weight_df=weight_df,
+            n_jobs=n_jobs,
         )
         if isinstance(results, GeoDataFrame):
@@ -665,6 +668,7 @@ class NetworkAnalysis:
         rowwise: bool = False,
         destination_count: int | None = None,
         cutoff: int | float | None = None,
+        n_jobs: int | None = None,
     ) -> GeoDataFrame:
         """Returns the geometry of the low-cost route between origins and destinations.
@@ -685,6 +689,7 @@ class NetworkAnalysis:
             cutoff: the maximum cost (weight) for the trips. Defaults to None,
                 meaning all rows will be included. NaNs will also be removed if cutoff
                 is specified.
+            n_jobs: Number of parallell jobs.
         Returns:
             A DataFrame with the geometry of the routes between origin and destination.
@@ -738,6 +743,7 @@ class NetworkAnalysis:
             weight=self.rules.weight,
             roads=self.network.gdf,
             od_pairs=od_pairs,
+            n_jobs=n_jobs,
         )
         if cutoff is not None:

{ssb_sgis-1.2.3 → ssb_sgis-1.2.6}/src/sgis/parallel/parallel.py RENAMED Viewed

@@ -13,7 +13,7 @@ from typing import Any
 from pandas.api.types import is_array_like
 try:
-    import dapla as dp
+    from gcsfs import GCSFileSystem
 except ImportError:
     pass
@@ -575,7 +575,7 @@ class Parallel:
             A GeoDataFrame, or a list of GeoDataFrames if concat is False.
         """
         if "file_system" not in kwargs:
-            kwargs["file_system"] = dp.FileClient.get_gcs_file_system()
+            kwargs["file_system"] = GCSFileSystem()
         if strict:
             res = self.map(read_geopandas, files, kwargs=kwargs)
@@ -653,7 +653,7 @@ class Parallel:
         if funcdict is None:
             funcdict = {}
-        fs = dp.FileClient.get_gcs_file_system()
+        fs = GCSFileSystem()
         for _, data, folder, postfunc in dict_zip_union(in_data, out_data, funcdict):
             if data is None or (

{ssb_sgis-1.2.3 → ssb_sgis-1.2.6}/src/sgis/raster/image_collection.py RENAMED Viewed

@@ -26,14 +26,11 @@ import numpy as np
 import pandas as pd
 import pyproj
 import rasterio
-from affine import Affine
 from geopandas import GeoDataFrame
 from geopandas import GeoSeries
+from pandas.api.types import is_array_like
 from pandas.api.types import is_dict_like
 from rasterio.enums import MergeAlg
-from scipy import stats
-from scipy.ndimage import binary_dilation
-from scipy.ndimage import binary_erosion
 from shapely import Geometry
 from shapely import box
 from shapely import unary_union
@@ -64,42 +61,11 @@ except ImportError:
         """Placeholder."""
-try:
-    from rioxarray.exceptions import NoDataInBounds
-    from rioxarray.merge import merge_arrays
-    from rioxarray.rioxarray import _generate_spatial_coords
-except ImportError:
-    pass
-try:
-    from xarray import DataArray
-    from xarray import Dataset
-    from xarray import combine_by_coords
-except ImportError:
-    class DataArray:
-        """Placeholder."""
-    class Dataset:
-        """Placeholder."""
-    def combine_by_coords(*args, **kwargs) -> None:
-        raise ImportError("xarray")
-try:
-    from gcsfs.core import GCSFile
-except ImportError:
-    class GCSFile:
-        """Placeholder."""
 from ..conf import _get_instance
 from ..conf import config
 from ..geopandas_tools.bounds import get_total_bounds
 from ..geopandas_tools.conversion import to_bbox
 from ..geopandas_tools.conversion import to_gdf
-from ..geopandas_tools.conversion import to_geoseries
 from ..geopandas_tools.conversion import to_shapely
 from ..geopandas_tools.general import get_common_crs
 from ..helpers import _fix_path
@@ -799,41 +765,6 @@ class _ImageBandBase(_ImageBase):
         return missing_metadata_attributes | nonmissing_metadata_attributes
-    def _to_xarray(self, array: np.ndarray, transform: Affine) -> DataArray:
-        """Convert the raster to  an xarray.DataArray."""
-        attrs = {"crs": self.crs}
-        for attr in set(self.metadata_attributes).union({"date"}):
-            try:
-                attrs[attr] = getattr(self, attr)
-            except Exception:
-                pass
-        if len(array.shape) == 2:
-            height, width = array.shape
-            dims = ["y", "x"]
-        elif len(array.shape) == 3:
-            height, width = array.shape[1:]
-            dims = ["band", "y", "x"]
-        elif not any(dim for dim in array.shape):
-            DataArray(
-                name=self.name or self.__class__.__name__,
-                attrs=attrs,
-            )
-        else:
-            raise ValueError(
-                f"Array should be 2 or 3 dimensional. Got shape {array.shape}"
-            )
-        coords = _generate_spatial_coords(transform, width, height)
-        return DataArray(
-            array,
-            coords=coords,
-            dims=dims,
-            name=self.name or self.__class__.__name__,
-            attrs=attrs,
-        )
 class Band(_ImageBandBase):
     """Band holding a single 2 dimensional array representing an image band."""
@@ -1264,7 +1195,7 @@ class Band(_ImageBandBase):
     def has_array(self) -> bool:
         """Whether the array is loaded."""
         try:
-            if not isinstance(self.values, (np.ndarray | DataArray)):
+            if not is_array_like(self.values):
                 raise ValueError()
             return True
         except ValueError:  # also catches _ArrayNotLoadedError
@@ -1501,20 +1432,12 @@ class Band(_ImageBandBase):
             return df[(df[column] != self.nodata) & (df[column].notna())]
         return df
-    def to_xarray(self) -> DataArray:
-        """Convert the raster to an xarray.DataArray."""
-        return self._to_xarray(
-            self.values,
-            transform=self.transform,
-            # name=self.name or self.__class__.__name__.lower(),
-        )
     def to_numpy(self) -> np.ndarray | np.ma.core.MaskedArray:
         """Convert the raster to a numpy.ndarray."""
         return self._to_numpy(self.values).copy()
     def _to_numpy(
-        self, arr: np.ndarray | DataArray, masked: bool = True
+        self, arr: np.ndarray, masked: bool = True
     ) -> np.ndarray | np.ma.core.MaskedArray:
         if not isinstance(arr, np.ndarray):
             mask_arr = None
@@ -1891,13 +1814,6 @@ class Image(_ImageBandBase):
             **self._common_init_kwargs_after_load,
         )
-    def to_xarray(self) -> DataArray:
-        """Convert the raster to  an xarray.DataArray."""
-        return self._to_xarray(
-            np.array([band.values for band in self]),
-            transform=self[0].transform,
-        )
     @property
     def band_ids(self) -> list[str]:
         """The Band ids."""
@@ -2539,6 +2455,10 @@ class ImageCollection(_ImageBase):
         indexes: int | tuple[int] | None = None,
         **kwargs,
     ) -> np.ndarray:
+        from rioxarray.merge import merge_arrays
+        from rioxarray.rioxarray import _generate_spatial_coords
+        from xarray import DataArray
         arrs = []
         kwargs["indexes"] = indexes
         bounds = to_shapely(bounds) if bounds is not None else None
@@ -2777,69 +2697,6 @@ class ImageCollection(_ImageBase):
         ]
         return self
-    def to_xarray(
-        self,
-        **kwargs,
-    ) -> Dataset:
-        """Convert the raster to  an xarray.Dataset.
-        Images are converted to 2d arrays for each unique bounds.
-        The spatial dimensions will be labeled "x" and "y". The third
-        dimension defaults to "date" if all images have date attributes.
-        Otherwise defaults to the image name.
-        """
-        if any(not band.has_array for img in self for band in img):
-            raise ValueError("Arrays must be loaded.")
-        # if by is None:
-        if all(img.date for img in self):
-            by = ["date"]
-        elif not pd.Index([img.name for img in self]).is_unique:
-            raise ValueError("Images must have unique names.")
-        else:
-            by = ["name"]
-        # elif isinstance(by, str):
-        # by = [by]
-        xarrs: dict[str, DataArray] = {}
-        for (bounds, band_id), collection in self.groupby(["bounds", "band_id"]):
-            name = f"{band_id}_{'-'.join(str(int(x)) for x in bounds)}"
-            first_band = collection[0][0]
-            coords = _generate_spatial_coords(
-                first_band.transform, first_band.width, first_band.height
-            )
-            values = np.array([band.to_numpy() for img in collection for band in img])
-            assert len(values) == len(collection)
-            # coords["band_id"] = [
-            #     band.band_id or i for i, band in enumerate(collection[0])
-            # ]
-            for attr in by:
-                coords[attr] = [getattr(img, attr) for img in collection]
-            # coords["band"] = band_id  #
-            dims = [*by, "y", "x"]
-            # dims = ["band", "y", "x"]
-            # dims = {}
-            # for attr in by:
-            #     dims[attr] = [getattr(img, attr) for img in collection]
-            xarrs[name] = DataArray(
-                values,
-                coords=coords,
-                dims=dims,
-                # name=name,
-                name=band_id,
-                attrs={
-                    "crs": collection.crs,
-                    "band_id": band_id,
-                },  # , "bounds": bounds},
-                **kwargs,
-            )
-        return combine_by_coords(list(xarrs.values()))
-        # return Dataset(xarrs)
     def sample(self, n: int = 1, size: int = 500) -> "ImageCollection":
         """Sample one or more areas of a given size and set this as mask for the images."""
         unioned = self.union_all()
@@ -3407,24 +3264,6 @@ def _slope_2d(array: np.ndarray, res: int | tuple[int], degrees: int) -> np.ndar
     return degrees
-def _clip_xarray(
-    xarr: DataArray,
-    mask: tuple[int, int, int, int],
-    crs: Any,
-    **kwargs,
-) -> DataArray:
-    # xarray needs a numpy array of polygons
-    mask_arr: np.ndarray = to_geoseries(mask).values
-    try:
-        return xarr.rio.clip(
-            mask_arr,
-            crs=crs,
-            **kwargs,
-        )
-    except NoDataInBounds:
-        return np.array([])
 def _get_all_file_paths(path: str) -> set[str]:
     return {_fix_path(x) for x in sorted(set(_glob_func(path + "/**")))}
@@ -3645,6 +3484,9 @@ def array_buffer(arr: np.ndarray, distance: int) -> np.ndarray:
     Returns:
         Array with buffered values.
     """
+    from scipy.ndimage import binary_dilation
+    from scipy.ndimage import binary_erosion
     if not np.all(np.isin(arr, (1, 0, True, False))):
         raise ValueError("Array must be all 0s and 1s or boolean.")
@@ -3655,6 +3497,7 @@ def array_buffer(arr: np.ndarray, distance: int) -> np.ndarray:
     arr = np.where(arr, 1, 0)
     if distance > 0:
         return binary_dilation(arr, structure=structure).astype(dtype)
     elif distance < 0:
@@ -3671,6 +3514,8 @@ def _plot_pixels_1d(
     figsize: tuple,
     first_date: pd.Timestamp,
 ) -> None:
+    from scipy import stats
     coef, intercept = np.linalg.lstsq(
         np.vstack([x, np.ones(x.shape[0])]).T,
         y,

{ssb_sgis-1.2.3 → ssb_sgis-1.2.6}/src/sgis/raster/regex.py RENAMED Viewed

@@ -7,7 +7,7 @@ import pandas as pd
 from ..io._is_dapla import is_dapla
 try:
-    import dapla as dp
+    from gcsfs import GCSFileSystem
 except ImportError:
     pass
@@ -22,7 +22,7 @@ except ImportError:
 if is_dapla():
     def _open_func(*args, **kwargs) -> GCSFile:
-        return dp.FileClient.get_gcs_file_system().open(*args, **kwargs)
+        return GCSFileSystem().open(*args, **kwargs)
 else:
     _open_func = open