ssb-sgis 0.1.5__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,12 +18,12 @@ from pandas import DataFrame
18
18
 
19
19
  from ..geopandas_tools.general import _push_geom_col
20
20
  from ..geopandas_tools.line_operations import split_lines_by_nearest_point
21
- from ._get_route import _get_route
21
+ from ._get_route import _get_k_routes, _get_route, _get_route_frequencies
22
22
  from ._od_cost_matrix import _od_cost_matrix
23
23
  from ._points import Destinations, Origins
24
24
  from ._service_area import _service_area
25
25
  from .directednetwork import DirectedNetwork
26
- from .network import Network, _edge_ids
26
+ from .network import Network
27
27
  from .networkanalysisrules import NetworkAnalysisRules
28
28
 
29
29
 
@@ -71,24 +71,24 @@ class NetworkAnalysis:
71
71
 
72
72
  See also
73
73
  --------
74
- DirectedNetwork : for customising and optimising line data before directed network
74
+ DirectedNetwork : For customising and optimising line data before directed network
75
75
  analysis.
76
- Network : for customising and optimising line data before undirected network
76
+
77
+ Network : For customising and optimising line data before undirected network
77
78
  analysis.
78
79
 
79
80
  Examples
80
81
  --------
81
82
  Read example data.
82
83
 
83
- >>> from sgis import read_parquet_url
84
- >>> roads = read_parquet_url("https://media.githubusercontent.com/media/statisticsnorway/ssb-sgis/main/tests/testdata/roads_eidskog_2022.parquet")
85
- >>> points = read_parquet_url("https://media.githubusercontent.com/media/statisticsnorway/ssb-sgis/main/tests/testdata/points_eidskog.parquet")
84
+ >>> import sgis as sg
85
+ >>> roads = sg.read_parquet_url("https://media.githubusercontent.com/media/statisticsnorway/ssb-sgis/main/tests/testdata/roads_eidskog_2022.parquet")
86
+ >>> points = sg.read_parquet_url("https://media.githubusercontent.com/media/statisticsnorway/ssb-sgis/main/tests/testdata/points_eidskog.parquet")
86
87
 
87
88
  Creating a NetworkAnalysis class instance.
88
89
 
89
- >>> from sgis import DirectedNetwork, NetworkAnalysisRules, NetworkAnalysis
90
90
  >>> nw = (
91
- ... DirectedNetwork(roads)
91
+ ... sg.DirectedNetwork(roads)
92
92
  ... .remove_isolated()
93
93
  ... .make_directed_network(
94
94
  ... direction_col="oneway",
@@ -96,12 +96,13 @@ class NetworkAnalysis:
96
96
  ... minute_cols=("drivetime_fw", "drivetime_bw"),
97
97
  ... )
98
98
  ... )
99
- >>> rules = NetworkAnalysisRules(weight="minutes")
100
- >>> nwa = NetworkAnalysis(network=nw, rules=rules, detailed_log=False)
99
+ >>> rules = sg.NetworkAnalysisRules(weight="minutes")
100
+ >>> nwa = sg.NetworkAnalysis(network=nw, rules=rules, detailed_log=False)
101
101
  >>> nwa
102
102
  NetworkAnalysis(
103
103
  network=DirectedNetwork(6364 km, percent_bidirectional=87),
104
- rules=NetworkAnalysisRules(weight='minutes', search_tolerance=250, search_factor=10, split_lines=False, ...)
104
+ rules=NetworkAnalysisRules(weight='minutes', search_tolerance=250, search_factor=10, split_lines=False, ...),
105
+ log=True, detailed_log=True,
105
106
  )
106
107
 
107
108
  od_cost_matrix: fast many-to-many travel time/distance calculation.
@@ -120,7 +121,6 @@ class NetworkAnalysis:
120
121
  999997 999 997 10.288465
121
122
  999998 999 998 14.798257
122
123
  999999 999 999 0.000000
123
-
124
124
  [1000000 rows x 3 columns]
125
125
 
126
126
  get_route: get the geometry of the routes.
@@ -139,7 +139,6 @@ class NetworkAnalysis:
139
139
  97 606 901 16.998595 MULTILINESTRING Z ((265040.505 6641218.021 100...
140
140
  98 606 766 10.094371 MULTILINESTRING Z ((265639.400 6649020.000 85....
141
141
  99 606 320 7.317098 MULTILINESTRING Z ((262711.480 6648807.500 3.8...
142
-
143
142
  [100 rows x 4 columns]
144
143
 
145
144
  get_route_frequencies: get the number of times each line segment was used.
@@ -147,19 +146,19 @@ class NetworkAnalysis:
147
146
  >>> frequencies = nwa.get_route_frequencies(points.sample(25), points.sample(25))
148
147
  >>> frequencies[[["source", "target", "frequency", "geometry"]]
149
148
  source target frequency geometry
150
- 116897 28500 13496 1.0 LINESTRING Z (256638.500 6653339.300 153.857, ...
151
- 155781 23913 23908 1.0 LINESTRING Z (256843.774 6653563.678 143.702, ...
152
- 155780 76957 23913 1.0 LINESTRING Z (256866.300 6653559.400 142.457, ...
153
- 155779 76956 76957 1.0 LINESTRING Z (256875.300 6653557.100 142.157, ...
154
- 155778 74816 76956 1.0 LINESTRING Z (256882.738 6653554.580 141.657, ...
149
+ 160188 77264 79112 1.0 LINESTRING Z (268641.225 6651871.624 111.355, ...
150
+ 138956 30221 45403 1.0 LINESTRING Z (273091.100 6652396.000 170.471, ...
151
+ 138958 30224 30221 1.0 LINESTRING Z (273117.500 6652391.500 169.771, ...
152
+ 138960 16513 30224 1.0 LINESTRING Z (273176.813 6652379.896 169.414, ...
153
+ 138962 40610 16513 1.0 LINESTRING Z (273207.300 6652372.100 168.871, ...
155
154
  ... ... ... ... ...
156
- 156631 77375 77374 90.0 LINESTRING Z (265454.387 6651000.044 88.806, 2...
157
- 149793 72649 77375 90.0 LINESTRING Z (265455.009 6651007.750 88.612, 2...
158
- 158249 78124 78123 95.0 LINESTRING Z (265563.150 6650547.620 89.382, 2...
159
- 158248 78123 72820 95.0 LINESTRING Z (265567.158 6650542.836 89.522, 2...
160
- 156601 77353 78124 95.0 LINESTRING Z (265530.470 6650587.640 88.527, 2...
155
+ 151464 73800 73801 108.0 LINESTRING Z (265362.800 6647137.100 131.660, ...
156
+ 151465 73801 73802 108.0 LINESTRING Z (265368.600 6647142.900 131.660, ...
157
+ 151466 73802 73632 108.0 LINESTRING Z (265371.400 6647147.900 131.660, ...
158
+ 151463 73799 73800 129.0 LINESTRING Z (265359.600 6647135.400 131.660, ...
159
+ 152170 74418 74246 135.0 LINESTRING Z (264579.835 6651954.573 113.209, ...
161
160
 
162
- [9268 rows x 4 columns]
161
+ [8915 rows x 4 columns]
163
162
 
164
163
  service_area: get the area that can be reached within one or more breaks.
165
164
 
@@ -187,8 +186,8 @@ class NetworkAnalysis:
187
186
  1 2023-03-29 15:20:21 0.5 get_route 10 10.0 0.0000 15.001443 True ... 10.093613 14.641413 19.725085 6.869095 NaN False NaN NaN
188
187
  2 2023-03-29 15:20:40 0.3 get_route_frequencies 25 25.0 0.0000 0.067199 True ... 0.013309 0.038496 0.085692 0.087247 NaN NaN NaN NaN
189
188
  3 2023-03-29 15:20:50 0.2 service_area 3 NaN 0.0000 10.000000 True ... 5.000000 10.000000 15.000000 4.330127 NaN NaN 5, 10, 15 True
190
-
191
189
  [4 rows x 23 columns]
190
+
192
191
  """
193
192
 
194
193
  def __init__(
@@ -274,15 +273,19 @@ class NetworkAnalysis:
274
273
 
275
274
  Examples
276
275
  --------
276
+ Create the NetworkAnalysis instance.
277
+
278
+ >>> import sgis as sg
279
+ >>> roads = sg.read_parquet_url("https://media.githubusercontent.com/media/statisticsnorway/ssb-sgis/main/tests/testdata/roads_eidskog_2022.parquet")
280
+ >>> nw = sg.DirectedNetwork(roads).remove_isolated().make_directed_network_norway()
281
+ >>> rules = sg.NetworkAnalysisRules(weight="minutes")
282
+ >>> nwa = sg.NetworkAnalysis(network=nw, rules=rules, detailed_log=False)
283
+
277
284
  Create some origin and destination points.
278
- See the class examples for how to prepare the network.
279
285
 
280
- import sgis as sg
281
- >>> points = sg.read_parquet_url(
282
- ... "https://media.githubusercontent.com/media/statisticsnorway/ssb-sgis/main/tests/testdata/points_oslo.parquet"
283
- ... )
286
+ >>> points = sg.read_parquet_url("https://media.githubusercontent.com/media/statisticsnorway/ssb-sgis/main/tests/testdata/points_oslo.parquet")
284
287
 
285
- >>> origins = points.loc[:99]
288
+ >>> origins = points.loc[:99, ["geometry"]]
286
289
  >>> origins
287
290
  geometry
288
291
  0 POINT (263122.700 6651184.900)
@@ -296,10 +299,9 @@ class NetworkAnalysis:
296
299
  97 POINT (263162.000 6650732.200)
297
300
  98 POINT (272322.700 6653729.100)
298
301
  99 POINT (265622.800 6644644.200)
299
-
300
302
  [100 rows x 1 columns]
301
303
 
302
- >>> destinations = points.loc[100:199]
304
+ >>> destinations = points.loc[100:199, ["geometry"]]
303
305
  >>> destinations
304
306
  geometry
305
307
  100 POINT (265997.900 6647899.400)
@@ -313,7 +315,6 @@ class NetworkAnalysis:
313
315
  197 POINT (273135.300 6653198.100)
314
316
  198 POINT (270582.300 6652163.800)
315
317
  199 POINT (264980.800 6647231.300)
316
-
317
318
  [100 rows x 1 columns]
318
319
 
319
320
  Travel time from 100 to 100 points.
@@ -332,7 +333,6 @@ class NetworkAnalysis:
332
333
  9997 99 197 19.977029
333
334
  9998 99 198 15.233163
334
335
  9999 99 199 6.439002
335
-
336
336
  [10000 rows x 3 columns]
337
337
 
338
338
  Join the results onto the 'origins' GeoDataFrame via the index.
@@ -351,7 +351,6 @@ class NetworkAnalysis:
351
351
  99 POINT (265622.800 6644644.200) 197 19.977029
352
352
  99 POINT (265622.800 6644644.200) 198 15.233163
353
353
  99 POINT (265622.800 6644644.200) 199 6.439002
354
-
355
354
  [10000 rows x 3 columns]
356
355
 
357
356
  Get travel times below 10 minutes.
@@ -371,7 +370,6 @@ class NetworkAnalysis:
371
370
  99 POINT (265622.800 6644644.200) 177.0 5.944194
372
371
  99 POINT (265622.800 6644644.200) 183.0 8.449906
373
372
  99 POINT (265622.800 6644644.200) 199.0 6.439002
374
-
375
373
  [2195 rows x 3 columns]
376
374
 
377
375
  Get the three fastest routes from each origin.
@@ -391,7 +389,6 @@ class NetworkAnalysis:
391
389
  99 POINT (265622.800 6644644.200) 102.0 1.648705
392
390
  99 POINT (265622.800 6644644.200) 134.0 1.116209
393
391
  99 POINT (265622.800 6644644.200) 156.0 1.368926
394
-
395
392
  [294 rows x 3 columns]
396
393
 
397
394
  Assign aggregated values directly onto the origins via the index.
@@ -410,6 +407,7 @@ class NetworkAnalysis:
410
407
  97 POINT (263162.000 6650732.200) 11.904372
411
408
  98 POINT (272322.700 6653729.100) 17.579399
412
409
  99 POINT (265622.800 6644644.200) 12.185800
410
+ [100 rows x 2 columns]
413
411
 
414
412
  Use set_index to use column as identifier insted of the index.
415
413
 
@@ -428,7 +426,6 @@ class NetworkAnalysis:
428
426
  9997 b 197 19.977029
429
427
  9998 b 198 15.233163
430
428
  9999 b 199 6.439002
431
-
432
429
  [10000 rows x 3 columns]
433
430
 
434
431
  Travel time from 1000 to 1000 points rowwise.
@@ -448,13 +445,13 @@ class NetworkAnalysis:
448
445
  997 997 997 19.968743
449
446
  998 998 998 9.484374
450
447
  999 999 999 14.892648
451
-
452
448
  [1000 rows x 3 columns]
449
+
453
450
  """
454
451
  if self._log:
455
452
  time_ = perf_counter()
456
453
 
457
- self._prepare_network_analysis(origins, destinations)
454
+ self._prepare_network_analysis(origins, destinations, rowwise)
458
455
 
459
456
  results = _od_cost_matrix(
460
457
  graph=self.graph,
@@ -465,9 +462,6 @@ class NetworkAnalysis:
465
462
  rowwise=rowwise,
466
463
  )
467
464
 
468
- self.origins._get_n_missing(results, "origin")
469
- self.destinations._get_n_missing(results, "destination")
470
-
471
465
  results["origin"] = results["origin"].map(self.origins.idx_dict)
472
466
  results["destination"] = results["destination"].map(self.destinations.idx_dict)
473
467
 
@@ -519,12 +513,17 @@ class NetworkAnalysis:
519
513
 
520
514
  Examples
521
515
  --------
516
+ Create the NetworkAnalysis instance.
517
+
518
+ >>> import sgis as sg
519
+ >>> roads = sg.read_parquet_url("https://media.githubusercontent.com/media/statisticsnorway/ssb-sgis/main/tests/testdata/roads_eidskog_2022.parquet")
520
+ >>> nw = sg.DirectedNetwork(roads).remove_isolated().make_directed_network_norway()
521
+ >>> rules = sg.NetworkAnalysisRules(weight="minutes")
522
+ >>> nwa = sg.NetworkAnalysis(network=nw, rules=rules, detailed_log=False)
523
+
522
524
  Get routes from 1 to 1000 points.
523
525
 
524
- import sgis as sg
525
- >>> points = sg.read_parquet_url(
526
- ... "https://media.githubusercontent.com/media/statisticsnorway/ssb-sgis/main/tests/testdata/points_oslo.parquet"
527
- ... )
526
+ >>> points = sg.read_parquet_url("https://media.githubusercontent.com/media/statisticsnorway/ssb-sgis/main/tests/testdata/points_oslo.parquet")
528
527
 
529
528
  >>> routes = nwa.get_route(points.iloc[[0]], points)
530
529
  >>> routes
@@ -546,7 +545,7 @@ class NetworkAnalysis:
546
545
  if self._log:
547
546
  time_ = perf_counter()
548
547
 
549
- self._prepare_network_analysis(origins, destinations)
548
+ self._prepare_network_analysis(origins, destinations, rowwise)
550
549
 
551
550
  results = _get_route(
552
551
  graph=self.graph,
@@ -557,15 +556,9 @@ class NetworkAnalysis:
557
556
  rowwise=rowwise,
558
557
  )
559
558
 
560
- self.origins._get_n_missing(results, "origin")
561
- self.destinations._get_n_missing(results, "destination")
562
-
563
559
  results["origin"] = results["origin"].map(self.origins.idx_dict)
564
560
  results["destination"] = results["destination"].map(self.destinations.idx_dict)
565
561
 
566
- if isinstance(results, GeoDataFrame):
567
- results = _push_geom_col(results)
568
-
569
562
  if self.rules.split_lines:
570
563
  self._unsplit_network()
571
564
 
@@ -628,10 +621,15 @@ class NetworkAnalysis:
628
621
 
629
622
  Examples
630
623
  --------
631
- import sgis as sg
632
- >>> points = sg.read_parquet_url(
633
- ... "https://media.githubusercontent.com/media/statisticsnorway/ssb-sgis/main/tests/testdata/points_oslo.parquet"
634
- ... )
624
+ Create the NetworkAnalysis instance.
625
+
626
+ >>> import sgis as sg
627
+ >>> roads = sg.read_parquet_url("https://media.githubusercontent.com/media/statisticsnorway/ssb-sgis/main/tests/testdata/roads_eidskog_2022.parquet")
628
+ >>> nw = sg.DirectedNetwork(roads).remove_isolated().make_directed_network_norway()
629
+ >>> rules = sg.NetworkAnalysisRules(weight="minutes")
630
+ >>> nwa = sg.NetworkAnalysis(network=nw, rules=rules, detailed_log=False)
631
+
632
+ >>> points = sg.read_parquet_url("https://media.githubusercontent.com/media/statisticsnorway/ssb-sgis/main/tests/testdata/points_oslo.parquet")
635
633
  >>> point1 = points.iloc[[0]]
636
634
  >>> point2 = points.iloc[[1]]
637
635
 
@@ -656,8 +654,9 @@ class NetworkAnalysis:
656
654
  8 0 1 16.513253 9 MULTILINESTRING Z ((272281.367 6653079.745 160...
657
655
  9 0 1 16.551196 10 MULTILINESTRING Z ((272281.367 6653079.745 160...
658
656
 
659
- We got all 10 routes because only the middle 1 percent of the routes are removed in
660
- each iteration. Let's compare with dropping middle 50 and middle 100 percent.
657
+ We got all 10 routes because only the middle 1 percent of the routes are
658
+ removed in each iteration. Let's compare with dropping middle 50 and middle
659
+ 100 percent.
661
660
 
662
661
  >>> k_routes = nwa.get_k_routes(
663
662
  ... point1,
@@ -689,9 +688,9 @@ class NetworkAnalysis:
689
688
  if self._log:
690
689
  time_ = perf_counter()
691
690
 
692
- self._prepare_network_analysis(origins, destinations)
691
+ self._prepare_network_analysis(origins, destinations, rowwise)
693
692
 
694
- results = _get_route(
693
+ results = _get_k_routes(
695
694
  graph=self.graph,
696
695
  origins=self.origins.gdf,
697
696
  destinations=self.destinations.gdf,
@@ -702,9 +701,6 @@ class NetworkAnalysis:
702
701
  drop_middle_percent=drop_middle_percent,
703
702
  )
704
703
 
705
- self.origins._get_n_missing(results, "origin")
706
- self.destinations._get_n_missing(results, "destination")
707
-
708
704
  results["origin"] = results["origin"].map(self.origins.idx_dict)
709
705
  results["destination"] = results["destination"].map(self.destinations.idx_dict)
710
706
 
@@ -729,6 +725,8 @@ class NetworkAnalysis:
729
725
  self,
730
726
  origins: GeoDataFrame,
731
727
  destinations: GeoDataFrame,
728
+ weight_df: DataFrame | None = None,
729
+ rowwise: bool = False,
732
730
  frequency_col: str = "frequency",
733
731
  ) -> GeoDataFrame:
734
732
  """Finds the number of times each line segment was visited in all trips.
@@ -738,9 +736,18 @@ class NetworkAnalysis:
738
736
  segment was used. The aggregation is done on the line indices, which is much
739
737
  faster than getting the geometries and then dissolving.
740
738
 
739
+ The trip frequencies can be weighted (multiplied) based on 'weight_df'. See
740
+ example below.
741
+
741
742
  Args:
742
- origins: GeoDataFrame of points from where the routes will originate
743
- destinations: GeoDataFrame of points from where the routes will terminate
743
+ origins: GeoDataFrame of points from where the routes will originate.
744
+ destinations: GeoDataFrame of points from where the routes will terminate.
745
+ weight_df: A long formated DataFrame where each row contains the indices of
746
+ an origin-destination pair and the number to multiply the frequency for
747
+ this route by. The DataFrame can either contain three columns (origin
748
+ index, destination index and weight. In that order) or only a weight
749
+ column and a MultiIndex where level 0 is origin index and level 1 is
750
+ destination index.
744
751
  frequency_col: Name of column with the number of times each road was
745
752
  visited. Defaults to 'frequency'.
746
753
 
@@ -753,52 +760,146 @@ class NetworkAnalysis:
753
760
  The resulting lines will keep all columns of the 'gdf' of the Network.
754
761
 
755
762
  Raises:
756
- ValueError: if no paths were found.
763
+ ValueError: If no paths were found.
764
+ ValueError: If weight_df is not a DataFrame with one or three columns.
765
+ ValueError: If weight_df is given and the index of origins/destinations
766
+ is not unique.
757
767
 
758
768
  Examples
759
769
  --------
770
+ Create the NetworkAnalysis instance.
771
+
772
+ >>> import sgis as sg
773
+ >>> import pandas as pd
774
+ >>> roads = sg.read_parquet_url("https://media.githubusercontent.com/media/statisticsnorway/ssb-sgis/main/tests/testdata/roads_eidskog_2022.parquet")
775
+ >>> nw = sg.DirectedNetwork(roads).remove_isolated().make_directed_network_norway()
776
+ >>> rules = sg.NetworkAnalysisRules(weight="minutes")
777
+ >>> nwa = sg.NetworkAnalysis(network=nw, rules=rules, detailed_log=False)
778
+
760
779
  Get number of times each road was visited for trips from 25 to 25 points.
761
780
 
762
- import sgis as sg
763
- >>> points = sg.read_parquet_url(
764
- ... "https://media.githubusercontent.com/media/statisticsnorway/ssb-sgis/main/tests/testdata/points_oslo.parquet"
765
- ... )
781
+ >>> points = sg.read_parquet_url("https://media.githubusercontent.com/media/statisticsnorway/ssb-sgis/main/tests/testdata/points_oslo.parquet")
766
782
 
767
- >>> frequencies = nwa.get_route_frequencies(points.sample(25), points.sample(25))
783
+ >>> origins = points.iloc[:25]
784
+ >>> destinations = points.iloc[25:50]
785
+ >>> frequencies = nwa.get_route_frequencies(origins, destinations)
768
786
  >>> frequencies[["source", "target", "frequency", "geometry"]]
769
- source target frequency geometry
770
- 137866 19095 44962 1.0 LINESTRING Z (265476.114 6645475.318 160.724, ...
771
- 138905 30597 16266 1.0 LINESTRING Z (272648.400 6652234.800 178.170, ...
772
- 138903 16266 45388 1.0 LINESTRING Z (272642.602 6652236.229 178.687, ...
773
- 138894 43025 30588 1.0 LINESTRING Z (272446.600 6652253.700 162.970, ...
774
- 138892 30588 16021 1.0 LINESTRING Z (272414.400 6652263.100 161.170, ...
775
- ... ... ... ... ...
776
- 158287 78157 78156 176.0 LINESTRING Z (263975.482 6653605.092 132.739, ...
777
- 149697 72562 72563 180.0 LINESTRING Z (265179.202 6651549.723 81.532, 2...
778
- 149698 72563 72564 180.0 LINESTRING Z (265178.761 6651549.956 81.561, 2...
779
- 149695 72560 72561 180.0 LINESTRING Z (265457.755 6651249.238 76.502, 2...
780
- 149696 72561 72562 180.0 LINESTRING Z (265180.086 6651549.259 81.473, 2...
781
-
782
- [12231 rows x 4 columns]
787
+ source target frequency geometry
788
+ 160188 77264 79112 1.0 LINESTRING Z (268641.225 6651871.624 111.355, ...
789
+ 153682 68376 4136 1.0 LINESTRING Z (268542.700 6652162.400 121.266, ...
790
+ 153679 75263 75502 1.0 LINESTRING Z (268665.600 6652165.400 117.466, ...
791
+ 153678 75262 75263 1.0 LINESTRING Z (268660.000 6652167.100 117.466, ...
792
+ 153677 47999 75262 1.0 LINESTRING Z (268631.500 6652176.800 118.166, ...
793
+ ... ... ... ... ...
794
+ 151465 73801 73802 103.0 LINESTRING Z (265368.600 6647142.900 131.660, ...
795
+ 151464 73800 73801 103.0 LINESTRING Z (265362.800 6647137.100 131.660, ...
796
+ 151466 73802 73632 103.0 LINESTRING Z (265371.400 6647147.900 131.660, ...
797
+ 151463 73799 73800 123.0 LINESTRING Z (265359.600 6647135.400 131.660, ...
798
+ 152170 74418 74246 130.0 LINESTRING Z (264579.835 6651954.573 113.209, ...
799
+
800
+ [8556 rows x 4 columns]
801
+
802
+ The frequencies can be weighted for each origin-destination pair by specifying
803
+ 'weight_df'. This can be a DataFrame with three columns, where the first two
804
+ contain the indices of the origin and destination (in that order), and the
805
+ third the number to multiply the frequency by. 'weight_df' can also be a
806
+ DataFrame with a 2-leveled MultiIndex, where level 0 is the origin index and
807
+ level 1 is the destination.
808
+
809
+ Constructing a DataFrame with all od-pair combinations and give all rows a
810
+ weight of 10.
811
+
812
+ >>> od_pairs = pd.MultiIndex.from_product(
813
+ ... [origins.index, destinations.index], names=["origin", "destination"]
814
+ ... )
815
+ >>> weight_df = pd.DataFrame(index=od_pairs).reset_index()
816
+ >>> weight_df["weight"] = 10
817
+ >>> weight_df
818
+ origin destination weight
819
+ 0 0 25 10
820
+ 1 0 26 10
821
+ 2 0 27 10
822
+ 3 0 28 10
823
+ 4 0 29 10
824
+ .. ... ... ...
825
+ 620 24 45 10
826
+ 621 24 46 10
827
+ 622 24 47 10
828
+ 623 24 48 10
829
+ 624 24 49 10
830
+
831
+ [625 rows x 3 columns]
832
+
833
+ All frequencies will now be multiplied by 10.
834
+
835
+ >>> frequencies = nwa.get_route_frequencies(origins, destinations, weight_df, weight_df=weight_df)
836
+ >>> frequencies[["source", "target", "frequency", "geometry"]]
837
+
838
+ source target frequency geometry
839
+ 160188 77264 79112 10.0 LINESTRING Z (268641.225 6651871.624 111.355, ...
840
+ 153682 68376 4136 10.0 LINESTRING Z (268542.700 6652162.400 121.266, ...
841
+ 153679 75263 75502 10.0 LINESTRING Z (268665.600 6652165.400 117.466, ...
842
+ 153678 75262 75263 10.0 LINESTRING Z (268660.000 6652167.100 117.466, ...
843
+ 153677 47999 75262 10.0 LINESTRING Z (268631.500 6652176.800 118.166, ...
844
+ ... ... ... ... ...
845
+ 151465 73801 73802 1030.0 LINESTRING Z (265368.600 6647142.900 131.660, ...
846
+ 151464 73800 73801 1030.0 LINESTRING Z (265362.800 6647137.100 131.660, ...
847
+ 151466 73802 73632 1030.0 LINESTRING Z (265371.400 6647147.900 131.660, ...
848
+ 151463 73799 73800 1230.0 LINESTRING Z (265359.600 6647135.400 131.660, ...
849
+ 152170 74418 74246 1300.0 LINESTRING Z (264579.835 6651954.573 113.209, ...
850
+
851
+ [8556 rows x 4 columns]
852
+
853
+ 'weight_df' can also be a DataFrame with one column (the weight) and a
854
+ MultiIndex.
855
+
856
+ >>> weight_df = pd.DataFrame(index=od_pairs)
857
+ >>> weight_df["weight"] = 10
858
+ >>> weight_df
859
+ weight
860
+ 0 25 10
861
+ 26 10
862
+ 27 10
863
+ 28 10
864
+ 29 10
865
+ ... ...
866
+ 24 45 10
867
+ 46 10
868
+ 47 10
869
+ 48 10
870
+ 49 10
871
+
783
872
  """
784
873
  if self._log:
785
874
  time_ = perf_counter()
786
875
 
787
- self._prepare_network_analysis(origins, destinations)
876
+ if weight_df is not None:
877
+ weight_df = self._prepare_weight_df(weight_df, origins, destinations)
788
878
 
789
- results = _get_route(
879
+ self._prepare_network_analysis(origins, destinations, rowwise)
880
+
881
+ if weight_df is not None:
882
+ ori_idx_mapper = {v: k for k, v in self.origins.idx_dict.items()}
883
+ des_idx_mapper = {v: k for k, v in self.destinations.idx_dict.items()}
884
+ multiindex_mapper = lambda x: (
885
+ ori_idx_mapper.get(x[0]),
886
+ des_idx_mapper.get(x[1]),
887
+ )
888
+ weight_df.index = weight_df.index.map(multiindex_mapper)
889
+
890
+ results = _get_route_frequencies(
790
891
  graph=self.graph,
791
892
  origins=self.origins.gdf,
792
893
  destinations=self.destinations.gdf,
793
- weight=self.rules.weight,
794
894
  roads=self.network.gdf,
795
- summarise=True,
895
+ weight_df=weight_df,
896
+ rowwise=rowwise,
796
897
  )
797
898
 
798
899
  if isinstance(results, GeoDataFrame):
799
900
  results = _push_geom_col(results)
800
901
 
801
- results = results.rename(columns={"n": frequency_col}).sort_values(
902
+ results = results.rename(columns={"frequency": frequency_col}).sort_values(
802
903
  frequency_col
803
904
  )
804
905
 
@@ -849,13 +950,17 @@ class NetworkAnalysis:
849
950
 
850
951
  Examples
851
952
  --------
852
- import sgis as sg
853
- >>> points = sg.read_parquet_url(
854
- ... "https://media.githubusercontent.com/media/statisticsnorway/ssb-sgis/main/tests/testdata/points_oslo.parquet"
855
- ... )
953
+ Create the NetworkAnalysis instance.
954
+
955
+ >>> import sgis as sg
956
+ >>> roads = sg.read_parquet_url("https://media.githubusercontent.com/media/statisticsnorway/ssb-sgis/main/tests/testdata/roads_eidskog_2022.parquet")
957
+ >>> nw = sg.DirectedNetwork(roads).remove_isolated().make_directed_network_norway()
958
+ >>> rules = sg.NetworkAnalysisRules(weight="minutes")
959
+ >>> nwa = sg.NetworkAnalysis(network=nw, rules=rules, detailed_log=False)
856
960
 
857
961
  10 minute service area for three origin points.
858
962
 
963
+ >>> points = sg.read_parquet_url("https://media.githubusercontent.com/media/statisticsnorway/ssb-sgis/main/tests/testdata/points_oslo.parquet")
859
964
  >>> service_areas = nwa.service_area(
860
965
  ... points.loc[:2],
861
966
  ... breaks=10,
@@ -890,10 +995,6 @@ class NetworkAnalysis:
890
995
  # sort the breaks as an np.ndarray
891
996
  breaks = self._sort_breaks(breaks)
892
997
 
893
- self.network.gdf["source_target_weight"] = _edge_ids(
894
- self.network.gdf, self.rules.weight
895
- )
896
-
897
998
  results = _service_area(
898
999
  graph=self.graph,
899
1000
  origins=self.origins.gdf,
@@ -977,13 +1078,18 @@ class NetworkAnalysis:
977
1078
 
978
1079
  Examples
979
1080
  --------
980
- import sgis as sg
981
- >>> points = sg.read_parquet_url(
982
- ... "https://media.githubusercontent.com/media/statisticsnorway/ssb-sgis/main/tests/testdata/points_oslo.parquet"
983
- ... )
1081
+ Create the NetworkAnalysis instance.
1082
+
1083
+ >>> import sgis as sg
1084
+ >>> roads = sg.read_parquet_url("https://media.githubusercontent.com/media/statisticsnorway/ssb-sgis/main/tests/testdata/roads_eidskog_2022.parquet")
1085
+ >>> nw = sg.DirectedNetwork(roads).remove_isolated().make_directed_network_norway()
1086
+ >>> rules = sg.NetworkAnalysisRules(weight="minutes")
1087
+ >>> nwa = sg.NetworkAnalysis(network=nw, rules=rules, detailed_log=False)
984
1088
 
985
1089
  10 minute service area for one origin point.
986
1090
 
1091
+ >>> points = sg.read_parquet_url("https://media.githubusercontent.com/media/statisticsnorway/ssb-sgis/main/tests/testdata/points_oslo.parquet")
1092
+
987
1093
  >>> sa = nwa.precice_service_area(
988
1094
  ... points.iloc[[0]],
989
1095
  ... breaks=10,
@@ -1016,10 +1122,6 @@ class NetworkAnalysis:
1016
1122
  # sort the breaks as an np.ndarray
1017
1123
  breaks = self._sort_breaks(breaks)
1018
1124
 
1019
- self.network.gdf["source_target_weight"] = _edge_ids(
1020
- self.network.gdf, self.rules.weight
1021
- )
1022
-
1023
1125
  results = _service_area(
1024
1126
  graph=self.graph,
1025
1127
  origins=self.origins.gdf,
@@ -1070,6 +1172,64 @@ class NetworkAnalysis:
1070
1172
 
1071
1173
  return results
1072
1174
 
1175
+ @staticmethod
1176
+ def _prepare_weight_df(weight_df, origins, destinations):
1177
+ """Copy weight_df, convert to MultiIndex (if needed), then validate it.
1178
+
1179
+ The weight_df needs to have a very specific shape and index. If a 3-columned df
1180
+ is given, convert the first two to a MultiIndex.
1181
+
1182
+ Then make sure this index matches the index of origins and destinations.
1183
+ """
1184
+ error_message = (
1185
+ "'weight_df' should be a DataFrame with the columns "
1186
+ "'origin', 'destination' and 'weight', where the first "
1187
+ "two contain the indices of the origins and destinations "
1188
+ "and the weight column contains the number to multiply "
1189
+ "the trip frequency for this origin-destination pair."
1190
+ )
1191
+
1192
+ if not isinstance(weight_df, DataFrame):
1193
+ raise ValueError(error_message)
1194
+
1195
+ weight_df = weight_df.copy()
1196
+
1197
+ if len(weight_df.columns) == 3:
1198
+ weight_df = weight_df.set_index(list(weight_df.columns[:2]))
1199
+
1200
+ if len(weight_df.columns) != 1 and isinstance(weight_df.index, pd.MultiIndex):
1201
+ raise ValueError(error_message)
1202
+
1203
+ if not weight_df.index.is_unique:
1204
+ raise ValueError("'weight_df' must contain only unique OD combinations.")
1205
+
1206
+ if not origins.index.is_unique:
1207
+ raise ValueError(
1208
+ "The index of 'origins' must be unque when using a 'weight_df'."
1209
+ )
1210
+ if not destinations.index.is_unique:
1211
+ raise ValueError(
1212
+ "The index of 'destinations' must be unque when using a 'weight_df'."
1213
+ )
1214
+
1215
+ # check if any/all indices are in origins/destinations.
1216
+ # Doing 'any' to give better error message
1217
+ level_0 = weight_df.index.get_level_values(0)
1218
+ if not level_0.isin(origins.index).any():
1219
+ raise ValueError("None of the 'origins' indices are in 'weight_df'.")
1220
+
1221
+ level_1 = weight_df.index.get_level_values(1)
1222
+ if not level_1.isin(destinations.index).any():
1223
+ raise ValueError("None of the 'destinations' indices are in 'weight_df'.")
1224
+
1225
+ if not level_0.isin(origins.index).all():
1226
+ raise ValueError("Not all 'origins' indices are in 'weight_df'.")
1227
+
1228
+ if not level_1.isin(destinations.index).all():
1229
+ raise ValueError("Not all 'destinations' indices are in 'weight_df'.")
1230
+
1231
+ return weight_df
1232
+
1073
1233
  def _log_df_template(self, method: str, minutes_elapsed: float) -> DataFrame:
1074
1234
  """Creates a DataFrame with one row and the main columns.
1075
1235
 
@@ -1149,13 +1309,22 @@ class NetworkAnalysis:
1149
1309
 
1150
1310
  self.log = pd.concat([self.log, df], ignore_index=True)
1151
1311
 
1152
- def _prepare_network_analysis(self, origins, destinations=None) -> None:
1312
+ def _prepare_network_analysis(
1313
+ self, origins, destinations=None, rowwise=False
1314
+ ) -> None:
1153
1315
  """Prepares the weight column, node ids, origins, destinations and graph.
1154
1316
 
1155
1317
  Updates the graph only if it is not yet created and no parts of the analysis
1156
1318
  has changed. this method is run inside od_cost_matrix, get_route and
1157
1319
  service_area.
1158
1320
  """
1321
+
1322
+ if rowwise and len(origins) != len(destinations):
1323
+ raise ValueError(
1324
+ "'origins' and 'destinations' must have the same length when "
1325
+ "rowwise=True"
1326
+ )
1327
+
1159
1328
  self.network.gdf = self.rules._validate_weight(self.network.gdf)
1160
1329
 
1161
1330
  self.origins = Origins(origins)
@@ -1175,10 +1344,13 @@ class NetworkAnalysis:
1175
1344
  if not self._graph_is_up_to_date() or not self.network._nodes_are_up_to_date():
1176
1345
  self.network._update_nodes_if()
1177
1346
 
1178
- edges, weights = self._get_edges_and_weights()
1347
+ edges, weights, ids = self._get_edges_and_weights()
1179
1348
 
1180
1349
  self.graph = self._make_graph(
1181
- edges=edges, weights=weights, directed=self.network._as_directed
1350
+ edges=edges,
1351
+ weights=weights,
1352
+ edge_ids=ids,
1353
+ directed=self.network._as_directed,
1182
1354
  )
1183
1355
 
1184
1356
  self._add_missing_vertices()
@@ -1186,7 +1358,9 @@ class NetworkAnalysis:
1186
1358
  self._update_wkts()
1187
1359
  self.rules._update_rules()
1188
1360
 
1189
- def _get_edges_and_weights(self) -> tuple[list[tuple[str, str]], list[float]]:
1361
+ def _get_edges_and_weights(
1362
+ self,
1363
+ ) -> tuple[list[tuple[str, str]], list[float], list[str]]:
1190
1364
  """Creates lists of edges and weights which will be used to make the graph.
1191
1365
 
1192
1366
  Edges and weights between origins and nodes and nodes and destinations are
@@ -1203,15 +1377,14 @@ class NetworkAnalysis:
1203
1377
  start=max(self.origins.gdf.temp_idx.astype(int)) + 1
1204
1378
  )
1205
1379
 
1206
- edges = [
1207
- (str(source), str(target))
1208
- for source, target in zip(
1209
- self.network.gdf["source"], self.network.gdf["target"], strict=True
1210
- )
1211
- ]
1380
+ edges: list[tuple[str, str]] = self.network.get_edges()
1212
1381
 
1213
1382
  weights = list(self.network.gdf[self.rules.weight])
1214
1383
 
1384
+ self.network.gdf["source_target_weight"] = self.network._create_edge_ids(
1385
+ edges, weights
1386
+ )
1387
+
1215
1388
  edges_start, weights_start = self.origins._get_edges_and_weights(
1216
1389
  nodes=self.network.nodes,
1217
1390
  rules=self.rules,
@@ -1221,7 +1394,8 @@ class NetworkAnalysis:
1221
1394
  weights = weights + weights_start
1222
1395
 
1223
1396
  if self.destinations is None:
1224
- return edges, weights
1397
+ edge_ids = self.network._create_edge_ids(edges, weights)
1398
+ return edges, weights, edge_ids
1225
1399
 
1226
1400
  edges_end, weights_end = self.destinations._get_edges_and_weights(
1227
1401
  nodes=self.network.nodes,
@@ -1231,7 +1405,9 @@ class NetworkAnalysis:
1231
1405
  edges = edges + edges_end
1232
1406
  weights = weights + weights_end
1233
1407
 
1234
- return edges, weights
1408
+ edge_ids = self.network._create_edge_ids(edges, weights)
1409
+
1410
+ return edges, weights, edge_ids
1235
1411
 
1236
1412
  def _split_lines(self) -> None:
1237
1413
  if self.destinations is not None:
@@ -1304,6 +1480,7 @@ class NetworkAnalysis:
1304
1480
  def _make_graph(
1305
1481
  edges: list[tuple[str, ...]] | np.ndarray[tuple[str, ...]],
1306
1482
  weights: list[float] | np.ndarray[float],
1483
+ edge_ids: np.ndarray,
1307
1484
  directed: bool,
1308
1485
  ) -> Graph:
1309
1486
  """Creates an igraph Graph from a list of edges and weights."""
@@ -1312,7 +1489,7 @@ class NetworkAnalysis:
1312
1489
  graph = igraph.Graph.TupleList(edges, directed=directed)
1313
1490
 
1314
1491
  graph.es["weight"] = weights
1315
- graph.es["source_target_weight"] = _edge_ids(edges, weights)
1492
+ graph.es["source_target_weight"] = edge_ids
1316
1493
  graph.es["edge_tuples"] = edges
1317
1494
  graph.es["source"] = [edge[0] for edge in edges]
1318
1495
  graph.es["target"] = [edge[1] for edge in edges]
@@ -1339,6 +1516,7 @@ class NetworkAnalysis:
1339
1516
  if self._points_have_changed(self[points].gdf, what=points):
1340
1517
  return False
1341
1518
 
1519
+ # if not self.gdf["source_target_weight"].
1342
1520
  return True
1343
1521
 
1344
1522
  def _points_have_changed(self, points: GeoDataFrame, what: str) -> bool: