ssb-sgis 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,15 +1,10 @@
1
1
  import warnings
2
2
 
3
+ import numpy as np
3
4
  import pandas as pd
4
5
  from geopandas import GeoDataFrame
5
6
  from igraph import Graph
6
-
7
- from .network import _edge_ids
8
-
9
-
10
- # run functions for get_route, get_k_routes and get_route_frequencies
11
-
12
- # TODO: clean up this mess. Make smaller base functions and three separated for route, frequency and k_routes
7
+ from pandas import DataFrame
13
8
 
14
9
 
15
10
  def _get_route(
@@ -18,162 +13,199 @@ def _get_route(
18
13
  destinations: GeoDataFrame,
19
14
  weight: str,
20
15
  roads: GeoDataFrame,
21
- summarise: bool = False,
22
16
  rowwise: bool = False,
23
- k: int = 1,
24
- drop_middle_percent: int = 0,
25
- ):
26
- """Super function used in the NetworkAnalysis class.
17
+ ) -> GeoDataFrame:
18
+ """Function used in the get_route method of NetworkAnalysis."""
27
19
 
28
- Big, ugly super function that is used in the get_route, get_k_routes
29
- and get_route_frequencies methods of the NetworkAnalysis class.
30
- """
31
20
  warnings.filterwarnings("ignore", category=RuntimeWarning)
32
21
 
33
- if k > 1:
34
- route_func = _run_get_k_routes
35
- else:
36
- route_func = _run_get_route
22
+ od_pairs = _create_od_pairs(origins, destinations, rowwise)
37
23
 
38
- resultlist: list[GeoDataFrame] = []
39
- if rowwise:
40
- for ori_id, des_id in zip(origins["temp_idx"], destinations["temp_idx"]):
41
- resultlist = resultlist + route_func(
42
- ori_id, des_id, graph, roads, summarise, weight, k, drop_middle_percent
43
- )
44
- else:
45
- for ori_id in origins["temp_idx"]:
46
- for des_id in destinations["temp_idx"]:
47
- resultlist = resultlist + route_func(
48
- ori_id,
49
- des_id,
50
- graph,
51
- roads,
52
- summarise,
53
- weight,
54
- k,
55
- drop_middle_percent,
56
- )
24
+ resultlist: list[DataFrame] = []
25
+
26
+ for ori_id, des_id in od_pairs:
27
+ indices = _get_one_route(graph, ori_id, des_id)
28
+
29
+ if not indices:
30
+ continue
31
+
32
+ line_ids = _create_line_id_df(indices["source_target_weight"], ori_id, des_id)
33
+
34
+ resultlist.append(line_ids)
57
35
 
58
36
  if not resultlist:
59
- warnings.warn("No paths were found.")
37
+ warnings.warn(
38
+ "No paths were found. Try larger search_tolerance or search_factor. "
39
+ "Or close_network_holes() or remove_isolated()."
40
+ )
60
41
  return pd.DataFrame(columns=["origin", "destination", weight, "geometry"])
61
42
 
62
- if summarise:
63
- counted = (
64
- pd.concat(resultlist, ignore_index=True)
65
- .assign(n=1)
66
- .groupby("source_target_weight")["n"]
67
- .count()
68
- )
43
+ results: DataFrame = pd.concat(resultlist)
44
+ assert list(results.columns) == ["origin", "destination"], list(results.columns)
45
+ lines: GeoDataFrame = _get_line_geometries(results, roads, weight)
46
+ lines = lines.dissolve(by=["origin", "destination"], aggfunc="sum", as_index=False)
69
47
 
70
- roads["source_target_weight"] = _edge_ids(roads, weight)
48
+ return lines[["origin", "destination", weight, "geometry"]]
71
49
 
72
- roads["n"] = roads["source_target_weight"].map(counted)
73
50
 
74
- roads_visited = roads.loc[
75
- roads.n.notna(), roads.columns.difference(["source_target_weight"])
76
- ]
51
+ def _get_k_routes(
52
+ graph: Graph,
53
+ origins: GeoDataFrame,
54
+ destinations: GeoDataFrame,
55
+ weight: str,
56
+ roads: GeoDataFrame,
57
+ k: int,
58
+ drop_middle_percent: int,
59
+ rowwise: bool,
60
+ ) -> GeoDataFrame:
61
+ """Function used in the get_k_routes method of NetworkAnalysis."""
62
+ warnings.filterwarnings("ignore", category=RuntimeWarning)
63
+ od_pairs = _create_od_pairs(origins, destinations, rowwise)
77
64
 
78
- return roads_visited
65
+ resultlist: list[DataFrame] = []
66
+
67
+ for ori_id, des_id in od_pairs:
68
+ k_lines: DataFrame = _loop_k_routes(
69
+ graph, ori_id, des_id, k, drop_middle_percent
70
+ )
71
+ if k_lines is not None:
72
+ resultlist.append(k_lines)
79
73
 
80
- try:
81
- results = pd.concat(resultlist)
82
- except Exception:
83
- raise ValueError(
74
+ if not resultlist:
75
+ warnings.warn(
84
76
  "No paths were found. Try larger search_tolerance or search_factor. "
85
77
  "Or close_network_holes() or remove_isolated()."
86
78
  )
79
+ return pd.DataFrame(columns=["origin", "destination", weight, "geometry"])
87
80
 
88
- cols = ["origin", "destination", weight, "geometry"]
89
- if "k" in results.columns:
90
- cols.append("k")
81
+ results: DataFrame = pd.concat(resultlist)
82
+ assert list(results.columns) == ["origin", "destination", "k"], list(
83
+ results.columns
84
+ )
85
+ lines: GeoDataFrame = _get_line_geometries(results, roads, weight)
91
86
 
92
- results = results.loc[:, cols].reset_index(drop=True)
87
+ lines = lines.dissolve(
88
+ by=["origin", "destination", "k"], aggfunc="sum", as_index=False
89
+ )
93
90
 
94
- return results
91
+ return lines[["origin", "destination", weight, "k", "geometry"]]
95
92
 
96
93
 
97
- def _run_get_route(
98
- ori_id: str,
99
- des_id: str,
100
- graph: Graph,
101
- roads: GeoDataFrame,
102
- summarise: bool,
103
- weight: str,
104
- k: int,
105
- drop_middle_percent: int,
106
- ) -> list[GeoDataFrame] | tuple[GeoDataFrame, list[tuple] | None]:
94
+ def _get_route_frequencies(
95
+ graph,
96
+ origins,
97
+ destinations,
98
+ rowwise,
99
+ roads,
100
+ weight_df: DataFrame | None = None,
101
+ ):
102
+ """Function used in the get_route_frequencies method of NetworkAnalysis."""
103
+ warnings.filterwarnings("ignore", category=RuntimeWarning)
104
+ od_pairs = _create_od_pairs(origins, destinations, rowwise)
105
+
106
+ if weight_df is not None and len(weight_df) != len(od_pairs):
107
+ error_message = _make_keyerror_message(rowwise, weight_df, origins)
108
+ raise ValueError(error_message)
109
+
110
+ resultlist: list[DataFrame] = []
111
+
112
+ for ori_id, des_id in od_pairs:
113
+ indices = _get_one_route(graph, ori_id, des_id)
114
+
115
+ if not indices:
116
+ continue
117
+
118
+ line_ids = DataFrame({"source_target_weight": indices["source_target_weight"]})
119
+ line_ids["origin"] = ori_id
120
+ line_ids["destination"] = des_id
121
+
122
+ if weight_df is not None:
123
+ try:
124
+ line_ids["multiplier"] = weight_df.loc[ori_id, des_id].iloc[0]
125
+ except KeyError as e:
126
+ error_message = _make_keyerror_message(rowwise, weight_df, origins)
127
+ raise KeyError(error_message) from e
128
+ else:
129
+ line_ids["multiplier"] = 1
130
+
131
+ resultlist.append(line_ids)
132
+
133
+ summarised = (
134
+ pd.concat(resultlist, ignore_index=True)
135
+ .groupby("source_target_weight")["multiplier"]
136
+ .sum()
137
+ )
138
+
139
+ roads["frequency"] = roads["source_target_weight"].map(summarised)
140
+
141
+ roads_visited = roads.loc[
142
+ roads.frequency.notna(), roads.columns.difference(["source_target_weight"])
143
+ ]
144
+
145
+ return roads_visited
146
+
147
+
148
+ def _create_od_pairs(
149
+ origins: GeoDataFrame, destinations: GeoDataFrame, rowwise: bool
150
+ ) -> zip | pd.MultiIndex:
151
+ """Get all od combinaions if not rowwise."""
152
+ if rowwise:
153
+ return zip(origins.temp_idx, destinations.temp_idx)
154
+ else:
155
+ return pd.MultiIndex.from_product([origins.temp_idx, destinations.temp_idx])
156
+
157
+
158
+ def _get_one_route(graph: Graph, ori_id: str, des_id: str):
159
+ """Get the edges for one route."""
107
160
  res = graph.get_shortest_paths(
108
161
  weights="weight", v=ori_id, to=des_id, output="epath"
109
162
  )
110
-
111
163
  if not res[0]:
112
164
  return []
113
165
 
114
- source_target_weight = graph.es[res[0]]["source_target_weight"]
166
+ return graph.es[res[0]]
115
167
 
116
- if summarise:
117
- return [pd.DataFrame({"source_target_weight": source_target_weight})]
118
168
 
119
- roads["source_target_weight"] = _edge_ids(roads, weight)
120
- line = roads.loc[
121
- roads["source_target_weight"].isin(source_target_weight),
122
- ["geometry", weight, "source_target_weight"],
123
- ]
169
+ def _get_line_geometries(line_ids, roads, weight) -> GeoDataFrame:
170
+ road_mapper = roads.set_index(["source_target_weight"])[[weight, "geometry"]]
171
+ line_ids = line_ids.join(road_mapper)
172
+ return GeoDataFrame(line_ids, geometry="geometry", crs=roads.crs)
124
173
 
125
- # if len(line) != len(source_target_weight) - 2:
126
- # raise ValueError("length mismatch", len(line), len(source_target_weight))
127
174
 
128
- if not len(line):
129
- return []
175
+ def _create_line_id_df(source_target_weight: list, ori_id, des_id) -> DataFrame:
176
+ line_ids = DataFrame(index=source_target_weight)
130
177
 
131
- weight_sum = line[weight].sum()
132
- line = line.dissolve()
178
+ # remove edges from ori/des to the roads
179
+ line_ids = line_ids.loc[~line_ids.index.str.endswith("_0")]
133
180
 
134
- line["origin"] = ori_id
135
- line["destination"] = des_id
136
- line[weight] = weight_sum
181
+ line_ids["origin"] = ori_id
182
+ line_ids["destination"] = des_id
137
183
 
138
- if k == 1:
139
- return [line]
140
- else:
141
- return [line], graph.es[res[0]]["edge_tuples"]
184
+ return line_ids
142
185
 
143
186
 
144
- def _run_get_k_routes(
145
- ori_id: str,
146
- des_id: str,
147
- graph: Graph,
148
- roads: GeoDataFrame,
149
- summarise: bool,
150
- weight: str,
151
- k: int,
152
- drop_middle_percent,
153
- ) -> list[GeoDataFrame]:
187
+ def _loop_k_routes(graph: Graph, ori_id, des_id, k, drop_middle_percent) -> DataFrame:
154
188
  """Workaround for igraph's get_k_shortest_paths.
155
189
 
156
190
  igraph's get_k_shorest_paths doesn't seem to work (gives just the same path k
157
- times), so doing it manually. Run _run_get_route, then remove the edges in the
191
+ times), so doing it manually. Run _get_one_route, then remove the edges in the
158
192
  middle of the route, given with drop_middle_percent, repeat k times.
159
193
  """
160
194
  graph = graph.copy()
161
195
 
162
- lines: list[GeoDataFrame] = []
196
+ lines: list[DataFrame] = []
163
197
 
164
198
  for i in range(k):
165
- line = _run_get_route(
166
- ori_id, des_id, graph, roads, summarise, weight, k, drop_middle_percent
167
- )
199
+ indices = _get_one_route(graph, ori_id, des_id)
168
200
 
169
- if not isinstance(line, tuple):
201
+ if not indices:
170
202
  continue
171
203
 
172
- line, edge_tuples = line
173
- line = line[0]
174
- line["k"] = i + 1
204
+ line_ids = _create_line_id_df(indices["source_target_weight"], ori_id, des_id)
205
+ line_ids["k"] = i + 1
206
+ lines.append(line_ids)
175
207
 
176
- lines.append(line)
208
+ edge_tuples = indices["edge_tuples"]
177
209
 
178
210
  n_edges_to_keep = (
179
211
  len(edge_tuples) - len(edge_tuples) * drop_middle_percent / 100
@@ -187,4 +219,26 @@ def _run_get_k_routes(
187
219
  to_be_dropped = edge_tuples[n_edges_to_keep:-n_edges_to_keep]
188
220
  graph.delete_edges(to_be_dropped)
189
221
 
190
- return lines
222
+ if lines:
223
+ return pd.concat(lines)
224
+ else:
225
+ return pd.DataFrame()
226
+
227
+
228
+ def _make_keyerror_message(rowwise, weight_df, origins) -> str:
229
+ """Add help info to error message if key in weight_df is missing.
230
+
231
+ If empty resultlist, assume all indices are wrong. Else, assume
232
+ """
233
+ error_message = (
234
+ "'weight_df' does not contain all indices of each OD pair combination. "
235
+ )
236
+ if not rowwise and len(weight_df) == len(origins):
237
+ error_message = error_message + (
238
+ "Did you mean to set rowwise to True? "
239
+ "If not, make sure weight_df contains all combinations of "
240
+ "origin-destination pairs. Either specified as a MultiIndex or as the "
241
+ "first two columns of 'weight_df'. So (0, 0), (0, 1), (1, 0), (1, 1) etc."
242
+ )
243
+
244
+ return error_message
@@ -16,12 +16,7 @@ def _od_cost_matrix(
16
16
  lines: bool = False,
17
17
  rowwise: bool = False,
18
18
  ) -> DataFrame | GeoDataFrame:
19
- if rowwise and len(origins) != len(destinations):
20
- raise ValueError(
21
- "'origins' and 'destinations' must have the same length when rowwise=True"
22
- )
23
-
24
- results = graph.distances(
19
+ distances: list[list[str]] = graph.distances(
25
20
  weights="weight",
26
21
  source=origins["temp_idx"],
27
22
  target=destinations["temp_idx"],
@@ -29,10 +24,10 @@ def _od_cost_matrix(
29
24
 
30
25
  ori_idx, des_idx, costs = [], [], []
31
26
  for i, f_idx in enumerate(origins["temp_idx"]):
32
- for ii, t_idx in enumerate(destinations["temp_idx"]):
27
+ for j, t_idx in enumerate(destinations["temp_idx"]):
33
28
  ori_idx.append(f_idx)
34
29
  des_idx.append(t_idx)
35
- costs.append(results[i][ii])
30
+ costs.append(distances[i][j])
36
31
 
37
32
  results = (
38
33
  pd.DataFrame(data={"origin": ori_idx, "destination": des_idx, weight: costs})
@@ -44,7 +39,10 @@ def _od_cost_matrix(
44
39
  # so filtering to rowwise afterwards instead
45
40
  if rowwise:
46
41
  rowwise_df = DataFrame(
47
- {"origin": origins["temp_idx"], "destination": destinations["temp_idx"]}
42
+ {
43
+ "origin": origins["temp_idx"].reset_index(drop=True),
44
+ "destination": destinations["temp_idx"].reset_index(drop=True),
45
+ }
48
46
  )
49
47
  results = rowwise_df.merge(results, on=["origin", "destination"], how="left")
50
48
 
@@ -36,24 +36,6 @@ class Points:
36
36
  for temp_idx, idx in zip(self.gdf.temp_idx, self.gdf.index, strict=True)
37
37
  }
38
38
 
39
- def _get_n_missing(
40
- self,
41
- results: GeoDataFrame | DataFrame,
42
- col: str,
43
- ) -> None:
44
- """
45
- Get number of missing values for each point after a network analysis.
46
-
47
- Args:
48
- results: (Geo)DataFrame resulting from od_cost_matrix, get_route,
49
- get_k_routes, get_route_frequencies or service_area.
50
- col: id column of the results. Either 'origin' or 'destination'.
51
- """
52
- self.gdf["missing"] = self.gdf["temp_idx"].map(
53
- results.groupby(col).count().iloc[:, 0]
54
- - results.dropna().groupby(col).count().iloc[:, 0]
55
- )
56
-
57
39
  @staticmethod
58
40
  def _convert_distance_to_weight(distances, rules):
59
41
  """Meters to minutes based on 'weight_to_nodes_' attribute of the rules."""
@@ -11,7 +11,7 @@ from .network import Network
11
11
 
12
12
 
13
13
  class DirectedNetwork(Network):
14
- """Subclass of Network with methods for making the network directed.
14
+ """Class for preparing line data for directed network analysis.
15
15
 
16
16
  Can be used as the 'network' parameter in the NetworkAnalysis class for directed
17
17
  network analysis.
@@ -19,7 +19,7 @@ class DirectedNetwork(Network):
19
19
  The DirectedNetwork class differs from the Network base class in two ways:
20
20
  1) using a DirectedNetwork in the NetworkAnalysis class means the network graph
21
21
  will be directed, meaning you can only travel in one direction on each line.
22
- 2) the class offers methods for making the network directed, mainly the
22
+ 2) the class holds methods for making the network directed, mainly the
23
23
  'make_directed_network' method, which reverses lines going the wrong direction
24
24
  and duplicates and flips lines going both directions. It also creates a 'minute'
25
25
  column.
@@ -105,7 +105,7 @@ class Network:
105
105
 
106
106
  >>> len(nw.gdf)
107
107
  85638
108
- >>> nw = nw.close_network_holes(max_distance=1.5, fillna=0)
108
+ >>> nw = nw.close_network_holes(max_distance=1.5, max_angle=90, fillna=0)
109
109
  >>> len(nw.gdf)
110
110
  86929
111
111
 
@@ -612,6 +612,21 @@ class Network:
612
612
 
613
613
  return True
614
614
 
615
+ def get_edges(self) -> list[tuple[str, str]]:
616
+ return [
617
+ (str(source), str(target))
618
+ for source, target in zip(
619
+ self.gdf["source"], self.gdf["target"], strict=True
620
+ )
621
+ ]
622
+
623
+ @staticmethod
624
+ def _create_edge_ids(
625
+ edges: list[tuple[str, str]], weights: list[float]
626
+ ) -> list[str]:
627
+ """Edge identifiers represented with source and target ids and the weight."""
628
+ return [f"{s}_{t}_{w}" for (s, t), w in zip(edges, weights, strict=True)]
629
+
615
630
  def _update_nodes_if(self):
616
631
  if not self._nodes_are_up_to_date():
617
632
  self._make_node_ids()
@@ -653,27 +668,3 @@ class Network:
653
668
 
654
669
  def __len__(self):
655
670
  return len(self.gdf)
656
-
657
-
658
- # TODO: put these a better place:
659
-
660
-
661
- def _edge_ids(
662
- gdf: GeoDataFrame | list[tuple[int, int]], weight: str | list[float]
663
- ) -> list[str]:
664
- """Quite messy way to deal with different input types."""
665
- if isinstance(gdf, GeoDataFrame):
666
- return _edge_id_template(
667
- zip(gdf["source"], gdf["target"], strict=True),
668
- weight_arr=gdf[weight],
669
- )
670
- if isinstance(gdf, list):
671
- return _edge_id_template(gdf, weight_arr=weight)
672
-
673
-
674
- def _edge_id_template(*source_target_arrs, weight_arr):
675
- """Edge identifiers represented with source and target ids and the weight."""
676
- return [
677
- f"{s}_{t}_{w}"
678
- for (s, t), w in zip(*source_target_arrs, weight_arr, strict=True)
679
- ]