PyPI - ssb-sgis - Versions diffs - 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl - Mend

ssb-sgis 0.1.4py3-none-any.whl → 0.1.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

sgis/geopandas_tools/general.py +18 -7
sgis/geopandas_tools/geometry_types.py +1 -1
sgis/geopandas_tools/line_operations.py +1 -1
sgis/geopandas_tools/overlay.py +11 -0
sgis/maps/explore.py +62 -99
sgis/maps/legend.py +300 -203
sgis/maps/map.py +69 -74
sgis/maps/maps.py +9 -5
sgis/maps/thematicmap.py +145 -61
sgis/networkanalysis/_get_route.py +168 -114
sgis/networkanalysis/_od_cost_matrix.py +7 -9
sgis/networkanalysis/_points.py +0 -18
sgis/networkanalysis/directednetwork.py +2 -2
sgis/networkanalysis/network.py +16 -25
sgis/networkanalysis/networkanalysis.py +301 -123
sgis/networkanalysis/networkanalysisrules.py +2 -2
{ssb_sgis-0.1.4.dist-info → ssb_sgis-0.1.6.dist-info}/METADATA +17 -9
ssb_sgis-0.1.6.dist-info/RECORD +35 -0
ssb_sgis-0.1.4.dist-info/RECORD +0 -35
{ssb_sgis-0.1.4.dist-info → ssb_sgis-0.1.6.dist-info}/LICENSE +0 -0
{ssb_sgis-0.1.4.dist-info → ssb_sgis-0.1.6.dist-info}/WHEEL +0 -0

sgis/networkanalysis/_get_route.py CHANGED Viewed

@@ -1,15 +1,10 @@
 import warnings
+import numpy as np
 import pandas as pd
 from geopandas import GeoDataFrame
 from igraph import Graph
-from .network import _edge_ids
-# run functions for get_route, get_k_routes and get_route_frequencies
-# TODO: clean up this mess. Make smaller base functions and three separated for route, frequency and k_routes
+from pandas import DataFrame
 def _get_route(
@@ -18,162 +13,199 @@ def _get_route(
     destinations: GeoDataFrame,
     weight: str,
     roads: GeoDataFrame,
-    summarise: bool = False,
     rowwise: bool = False,
-    k: int = 1,
-    drop_middle_percent: int = 0,
-):
-    """Super function used in the NetworkAnalysis class.
+) -> GeoDataFrame:
+    """Function used in the get_route method of NetworkAnalysis."""
-    Big, ugly super function that is used in the get_route, get_k_routes
-    and get_route_frequencies methods of the NetworkAnalysis class.
-    """
     warnings.filterwarnings("ignore", category=RuntimeWarning)
-    if k > 1:
-        route_func = _run_get_k_routes
-    else:
-        route_func = _run_get_route
+    od_pairs = _create_od_pairs(origins, destinations, rowwise)
-    resultlist: list[GeoDataFrame] = []
-    if rowwise:
-        for ori_id, des_id in zip(origins["temp_idx"], destinations["temp_idx"]):
-            resultlist = resultlist + route_func(
-                ori_id, des_id, graph, roads, summarise, weight, k, drop_middle_percent
-            )
-    else:
-        for ori_id in origins["temp_idx"]:
-            for des_id in destinations["temp_idx"]:
-                resultlist = resultlist + route_func(
-                    ori_id,
-                    des_id,
-                    graph,
-                    roads,
-                    summarise,
-                    weight,
-                    k,
-                    drop_middle_percent,
-                )
+    resultlist: list[DataFrame] = []
+    for ori_id, des_id in od_pairs:
+        indices = _get_one_route(graph, ori_id, des_id)
+        if not indices:
+            continue
+        line_ids = _create_line_id_df(indices["source_target_weight"], ori_id, des_id)
+        resultlist.append(line_ids)
     if not resultlist:
-        warnings.warn("No paths were found.")
+        warnings.warn(
+            "No paths were found. Try larger search_tolerance or search_factor. "
+            "Or close_network_holes() or remove_isolated()."
+        )
         return pd.DataFrame(columns=["origin", "destination", weight, "geometry"])
-    if summarise:
-        counted = (
-            pd.concat(resultlist, ignore_index=True)
-            .assign(n=1)
-            .groupby("source_target_weight")["n"]
-            .count()
-        )
+    results: DataFrame = pd.concat(resultlist)
+    assert list(results.columns) == ["origin", "destination"], list(results.columns)
+    lines: GeoDataFrame = _get_line_geometries(results, roads, weight)
+    lines = lines.dissolve(by=["origin", "destination"], aggfunc="sum", as_index=False)
-        roads["source_target_weight"] = _edge_ids(roads, weight)
+    return lines[["origin", "destination", weight, "geometry"]]
-        roads["n"] = roads["source_target_weight"].map(counted)
-        roads_visited = roads.loc[
-            roads.n.notna(), roads.columns.difference(["source_target_weight"])
-        ]
+def _get_k_routes(
+    graph: Graph,
+    origins: GeoDataFrame,
+    destinations: GeoDataFrame,
+    weight: str,
+    roads: GeoDataFrame,
+    k: int,
+    drop_middle_percent: int,
+    rowwise: bool,
+) -> GeoDataFrame:
+    """Function used in the get_k_routes method of NetworkAnalysis."""
+    warnings.filterwarnings("ignore", category=RuntimeWarning)
+    od_pairs = _create_od_pairs(origins, destinations, rowwise)
-        return roads_visited
+    resultlist: list[DataFrame] = []
+    for ori_id, des_id in od_pairs:
+        k_lines: DataFrame = _loop_k_routes(
+            graph, ori_id, des_id, k, drop_middle_percent
+        )
+        if k_lines is not None:
+            resultlist.append(k_lines)
-    try:
-        results = pd.concat(resultlist)
-    except Exception:
-        raise ValueError(
+    if not resultlist:
+        warnings.warn(
             "No paths were found. Try larger search_tolerance or search_factor. "
             "Or close_network_holes() or remove_isolated()."
         )
+        return pd.DataFrame(columns=["origin", "destination", weight, "geometry"])
-    cols = ["origin", "destination", weight, "geometry"]
-    if "k" in results.columns:
-        cols.append("k")
+    results: DataFrame = pd.concat(resultlist)
+    assert list(results.columns) == ["origin", "destination", "k"], list(
+        results.columns
+    )
+    lines: GeoDataFrame = _get_line_geometries(results, roads, weight)
-    results = results.loc[:, cols].reset_index(drop=True)
+    lines = lines.dissolve(
+        by=["origin", "destination", "k"], aggfunc="sum", as_index=False
+    )
-    return results
+    return lines[["origin", "destination", weight, "k", "geometry"]]
-def _run_get_route(
-    ori_id: str,
-    des_id: str,
-    graph: Graph,
-    roads: GeoDataFrame,
-    summarise: bool,
-    weight: str,
-    k: int,
-    drop_middle_percent: int,
-) -> list[GeoDataFrame] | tuple[GeoDataFrame, list[tuple] | None]:
+def _get_route_frequencies(
+    graph,
+    origins,
+    destinations,
+    rowwise,
+    roads,
+    weight_df: DataFrame | None = None,
+):
+    """Function used in the get_route_frequencies method of NetworkAnalysis."""
+    warnings.filterwarnings("ignore", category=RuntimeWarning)
+    od_pairs = _create_od_pairs(origins, destinations, rowwise)
+    if weight_df is not None and len(weight_df) != len(od_pairs):
+        error_message = _make_keyerror_message(rowwise, weight_df, origins)
+        raise ValueError(error_message)
+    resultlist: list[DataFrame] = []
+    for ori_id, des_id in od_pairs:
+        indices = _get_one_route(graph, ori_id, des_id)
+        if not indices:
+            continue
+        line_ids = DataFrame({"source_target_weight": indices["source_target_weight"]})
+        line_ids["origin"] = ori_id
+        line_ids["destination"] = des_id
+        if weight_df is not None:
+            try:
+                line_ids["multiplier"] = weight_df.loc[ori_id, des_id].iloc[0]
+            except KeyError as e:
+                error_message = _make_keyerror_message(rowwise, weight_df, origins)
+                raise KeyError(error_message) from e
+        else:
+            line_ids["multiplier"] = 1
+        resultlist.append(line_ids)
+    summarised = (
+        pd.concat(resultlist, ignore_index=True)
+        .groupby("source_target_weight")["multiplier"]
+        .sum()
+    )
+    roads["frequency"] = roads["source_target_weight"].map(summarised)
+    roads_visited = roads.loc[
+        roads.frequency.notna(), roads.columns.difference(["source_target_weight"])
+    ]
+    return roads_visited
+def _create_od_pairs(
+    origins: GeoDataFrame, destinations: GeoDataFrame, rowwise: bool
+) -> zip | pd.MultiIndex:
+    """Get all od combinaions if not rowwise."""
+    if rowwise:
+        return zip(origins.temp_idx, destinations.temp_idx)
+    else:
+        return pd.MultiIndex.from_product([origins.temp_idx, destinations.temp_idx])
+def _get_one_route(graph: Graph, ori_id: str, des_id: str):
+    """Get the edges for one route."""
     res = graph.get_shortest_paths(
         weights="weight", v=ori_id, to=des_id, output="epath"
     )
     if not res[0]:
         return []
-    source_target_weight = graph.es[res[0]]["source_target_weight"]
+    return graph.es[res[0]]
-    if summarise:
-        return [pd.DataFrame({"source_target_weight": source_target_weight})]
-    roads["source_target_weight"] = _edge_ids(roads, weight)
-    line = roads.loc[
-        roads["source_target_weight"].isin(source_target_weight),
-        ["geometry", weight, "source_target_weight"],
-    ]
+def _get_line_geometries(line_ids, roads, weight) -> GeoDataFrame:
+    road_mapper = roads.set_index(["source_target_weight"])[[weight, "geometry"]]
+    line_ids = line_ids.join(road_mapper)
+    return GeoDataFrame(line_ids, geometry="geometry", crs=roads.crs)
-    # if len(line) != len(source_target_weight) - 2:
-    #    raise ValueError("length mismatch", len(line), len(source_target_weight))
-    if not len(line):
-        return []
+def _create_line_id_df(source_target_weight: list, ori_id, des_id) -> DataFrame:
+    line_ids = DataFrame(index=source_target_weight)
-    weight_sum = line[weight].sum()
-    line = line.dissolve()
+    # remove edges from ori/des to the roads
+    line_ids = line_ids.loc[~line_ids.index.str.endswith("_0")]
-    line["origin"] = ori_id
-    line["destination"] = des_id
-    line[weight] = weight_sum
+    line_ids["origin"] = ori_id
+    line_ids["destination"] = des_id
-    if k == 1:
-        return [line]
-    else:
-        return [line], graph.es[res[0]]["edge_tuples"]
+    return line_ids
-def _run_get_k_routes(
-    ori_id: str,
-    des_id: str,
-    graph: Graph,
-    roads: GeoDataFrame,
-    summarise: bool,
-    weight: str,
-    k: int,
-    drop_middle_percent,
-) -> list[GeoDataFrame]:
+def _loop_k_routes(graph: Graph, ori_id, des_id, k, drop_middle_percent) -> DataFrame:
     """Workaround for igraph's get_k_shortest_paths.
     igraph's get_k_shorest_paths doesn't seem to work (gives just the same path k
-    times), so doing it manually. Run _run_get_route, then remove the edges in the
+    times), so doing it manually. Run _get_one_route, then remove the edges in the
     middle of the route, given with drop_middle_percent, repeat k times.
     """
     graph = graph.copy()
-    lines: list[GeoDataFrame] = []
+    lines: list[DataFrame] = []
     for i in range(k):
-        line = _run_get_route(
-            ori_id, des_id, graph, roads, summarise, weight, k, drop_middle_percent
-        )
+        indices = _get_one_route(graph, ori_id, des_id)
-        if not isinstance(line, tuple):
+        if not indices:
             continue
-        line, edge_tuples = line
-        line = line[0]
-        line["k"] = i + 1
+        line_ids = _create_line_id_df(indices["source_target_weight"], ori_id, des_id)
+        line_ids["k"] = i + 1
+        lines.append(line_ids)
-        lines.append(line)
+        edge_tuples = indices["edge_tuples"]
         n_edges_to_keep = (
             len(edge_tuples) - len(edge_tuples) * drop_middle_percent / 100
@@ -187,4 +219,26 @@ def _run_get_k_routes(
         to_be_dropped = edge_tuples[n_edges_to_keep:-n_edges_to_keep]
         graph.delete_edges(to_be_dropped)
-    return lines
+    if lines:
+        return pd.concat(lines)
+    else:
+        return pd.DataFrame()
+def _make_keyerror_message(rowwise, weight_df, origins) -> str:
+    """Add help info to error message if key in weight_df is missing.
+    If empty resultlist, assume all indices are wrong. Else, assume
+    """
+    error_message = (
+        "'weight_df' does not contain all indices of each OD pair combination. "
+    )
+    if not rowwise and len(weight_df) == len(origins):
+        error_message = error_message + (
+            "Did you mean to set rowwise to True? "
+            "If not, make sure weight_df contains all combinations of "
+            "origin-destination pairs. Either specified as a MultiIndex or as the "
+            "first two columns of 'weight_df'. So (0, 0), (0, 1), (1, 0), (1, 1) etc."
+        )
+    return error_message

sgis/networkanalysis/_od_cost_matrix.py CHANGED Viewed

@@ -16,12 +16,7 @@ def _od_cost_matrix(
     lines: bool = False,
     rowwise: bool = False,
 ) -> DataFrame | GeoDataFrame:
-    if rowwise and len(origins) != len(destinations):
-        raise ValueError(
-            "'origins' and 'destinations' must have the same length when rowwise=True"
-        )
-    results = graph.distances(
+    distances: list[list[str]] = graph.distances(
         weights="weight",
         source=origins["temp_idx"],
         target=destinations["temp_idx"],
@@ -29,10 +24,10 @@ def _od_cost_matrix(
     ori_idx, des_idx, costs = [], [], []
     for i, f_idx in enumerate(origins["temp_idx"]):
-        for ii, t_idx in enumerate(destinations["temp_idx"]):
+        for j, t_idx in enumerate(destinations["temp_idx"]):
             ori_idx.append(f_idx)
             des_idx.append(t_idx)
-            costs.append(results[i][ii])
+            costs.append(distances[i][j])
     results = (
         pd.DataFrame(data={"origin": ori_idx, "destination": des_idx, weight: costs})
@@ -44,7 +39,10 @@ def _od_cost_matrix(
     # so filtering to rowwise afterwards instead
     if rowwise:
         rowwise_df = DataFrame(
-            {"origin": origins["temp_idx"], "destination": destinations["temp_idx"]}
+            {
+                "origin": origins["temp_idx"].reset_index(drop=True),
+                "destination": destinations["temp_idx"].reset_index(drop=True),
+            }
         )
         results = rowwise_df.merge(results, on=["origin", "destination"], how="left")

sgis/networkanalysis/_points.py CHANGED Viewed

@@ -36,24 +36,6 @@ class Points:
             for temp_idx, idx in zip(self.gdf.temp_idx, self.gdf.index, strict=True)
         }
-    def _get_n_missing(
-        self,
-        results: GeoDataFrame | DataFrame,
-        col: str,
-    ) -> None:
-        """
-        Get number of missing values for each point after a network analysis.
-        Args:
-            results: (Geo)DataFrame resulting from od_cost_matrix, get_route,
-                get_k_routes, get_route_frequencies or service_area.
-            col: id column of the results. Either 'origin' or 'destination'.
-        """
-        self.gdf["missing"] = self.gdf["temp_idx"].map(
-            results.groupby(col).count().iloc[:, 0]
-            - results.dropna().groupby(col).count().iloc[:, 0]
-        )
     @staticmethod
     def _convert_distance_to_weight(distances, rules):
         """Meters to minutes based on 'weight_to_nodes_' attribute of the rules."""

sgis/networkanalysis/directednetwork.py CHANGED Viewed

@@ -11,7 +11,7 @@ from .network import Network
 class DirectedNetwork(Network):
-    """Subclass of Network with methods for making the network directed.
+    """Class for preparing line data for directed network analysis.
     Can be used as the 'network' parameter in the NetworkAnalysis class for directed
     network analysis.
@@ -19,7 +19,7 @@ class DirectedNetwork(Network):
     The DirectedNetwork class differs from the Network base class in two ways:
     1) using a DirectedNetwork in the NetworkAnalysis class means the network graph
     will be directed, meaning you can only travel in one direction on each line.
-    2) the class offers methods for making the network directed, mainly the
+    2) the class holds methods for making the network directed, mainly the
     'make_directed_network' method, which reverses lines going the wrong direction
     and duplicates and flips lines going both directions. It also creates a 'minute'
     column.

sgis/networkanalysis/network.py CHANGED Viewed

@@ -105,7 +105,7 @@ class Network:
     >>> len(nw.gdf)
     85638
-    >>> nw = nw.close_network_holes(max_distance=1.5, fillna=0)
+    >>> nw = nw.close_network_holes(max_distance=1.5, max_angle=90, fillna=0)
     >>> len(nw.gdf)
     86929
@@ -612,6 +612,21 @@ class Network:
         return True
+    def get_edges(self) -> list[tuple[str, str]]:
+        return [
+            (str(source), str(target))
+            for source, target in zip(
+                self.gdf["source"], self.gdf["target"], strict=True
+            )
+        ]
+    @staticmethod
+    def _create_edge_ids(
+        edges: list[tuple[str, str]], weights: list[float]
+    ) -> list[str]:
+        """Edge identifiers represented with source and target ids and the weight."""
+        return [f"{s}_{t}_{w}" for (s, t), w in zip(edges, weights, strict=True)]
     def _update_nodes_if(self):
         if not self._nodes_are_up_to_date():
             self._make_node_ids()
@@ -653,27 +668,3 @@ class Network:
     def __len__(self):
         return len(self.gdf)
-# TODO: put these a better place:
-def _edge_ids(
-    gdf: GeoDataFrame | list[tuple[int, int]], weight: str | list[float]
-) -> list[str]:
-    """Quite messy way to deal with different input types."""
-    if isinstance(gdf, GeoDataFrame):
-        return _edge_id_template(
-            zip(gdf["source"], gdf["target"], strict=True),
-            weight_arr=gdf[weight],
-        )
-    if isinstance(gdf, list):
-        return _edge_id_template(gdf, weight_arr=weight)
-def _edge_id_template(*source_target_arrs, weight_arr):
-    """Edge identifiers represented with source and target ids and the weight."""
-    return [
-        f"{s}_{t}_{w}"
-        for (s, t), w in zip(*source_target_arrs, weight_arr, strict=True)
-    ]

ssb-sgis 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

ssb-sgis 0.1.4py3-none-any.whl → 0.1.6py3-none-any.whl