suntdataset 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- suntdataset-0.1.0/PKG-INFO +20 -0
- suntdataset-0.1.0/README.md +154 -0
- suntdataset-0.1.0/pyproject.toml +22 -0
- suntdataset-0.1.0/setup.cfg +4 -0
- suntdataset-0.1.0/suntdataset/__init__.py +4 -0
- suntdataset-0.1.0/suntdataset/loader.py +53 -0
- suntdataset-0.1.0/suntdataset/visualizer.py +108 -0
- suntdataset-0.1.0/suntdataset.egg-info/PKG-INFO +20 -0
- suntdataset-0.1.0/suntdataset.egg-info/SOURCES.txt +12 -0
- suntdataset-0.1.0/suntdataset.egg-info/dependency_links.txt +1 -0
- suntdataset-0.1.0/suntdataset.egg-info/requires.txt +19 -0
- suntdataset-0.1.0/suntdataset.egg-info/top_level.txt +1 -0
- suntdataset-0.1.0/tests/test_local.py +60 -0
- suntdataset-0.1.0/tests/test_visualizer.py +102 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: suntdataset
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A Dataset for the Salvador Unified Network for Transport (SUNT).
|
|
5
|
+
Requires-Dist: pandas
|
|
6
|
+
Requires-Dist: pyarrow
|
|
7
|
+
Requires-Dist: requests
|
|
8
|
+
Requires-Dist: tqdm
|
|
9
|
+
Provides-Extra: vis
|
|
10
|
+
Requires-Dist: pydeck; extra == "vis"
|
|
11
|
+
Requires-Dist: folium; extra == "vis"
|
|
12
|
+
Requires-Dist: geopandas; extra == "vis"
|
|
13
|
+
Provides-Extra: dashboard
|
|
14
|
+
Requires-Dist: streamlit; extra == "dashboard"
|
|
15
|
+
Provides-Extra: all
|
|
16
|
+
Requires-Dist: pydeck; extra == "all"
|
|
17
|
+
Requires-Dist: folium; extra == "all"
|
|
18
|
+
Requires-Dist: geopandas; extra == "all"
|
|
19
|
+
Requires-Dist: streamlit; extra == "all"
|
|
20
|
+
Requires-Dist: networkx; extra == "all"
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
# SUNT-Lib: Salvador Unified Network for Transport Library
|
|
2
|
+
|
|
3
|
+
**SUNT-Lib** is a Python library designed to streamline the access, processing, and visualization of the **SUNT (Salvador Unified Network for Transport)** dataset. It is built for researchers, urban planners, and data scientists working with multimodal public transport data from Salvador, Brazil.
|
|
4
|
+
|
|
5
|
+
With just a few lines of code, you can load large blocks of historical data (AVL, AFC, GTFS), filter by day types (workdays/weekends), and generate high-fidelity 3D visualizations or academic-ready tables.
|
|
6
|
+
|
|
7
|
+
## 🚀 Features
|
|
8
|
+
|
|
9
|
+
* **Smart Data Loading**: Direct integration with the SUNT GitHub repository.
|
|
10
|
+
* **Temporal Batching**: Load data for $X$ days or months without manual URL formatting.
|
|
11
|
+
* **Smart Filtering**: Built-in logic to select only Workdays, Saturdays, or Sundays.
|
|
12
|
+
|
|
13
|
+
* **Advanced Visualization**:
|
|
14
|
+
* **3D Hexagon Maps**: View passenger density and loading with `PyDeck`.
|
|
15
|
+
* **Interactive Heatmaps**: Traditional spatial hotspots with `Folium`.
|
|
16
|
+
* **Graph Analysis**: Convert OD matrices into `NetworkX` directed graphs.
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## 🛠 Installation
|
|
23
|
+
|
|
24
|
+
You can install the library locally for development:
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
git clone https://github.com/your-username/sunt-lib.git
|
|
28
|
+
cd sunt-lib
|
|
29
|
+
pip install -e .
|
|
30
|
+
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Or via pip (once published):
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
# Basic data loading only
|
|
37
|
+
pip install sunt-lib
|
|
38
|
+
|
|
39
|
+
# With 3D Mapping support
|
|
40
|
+
pip install "sunt-lib[vis]"
|
|
41
|
+
|
|
42
|
+
# Full suite (Dashboards, Graphs, Maps)
|
|
43
|
+
pip install "sunt-lib[all]"
|
|
44
|
+
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
## 📖 Quick Start
|
|
50
|
+
|
|
51
|
+
### 1. Loading Data by Blocks
|
|
52
|
+
|
|
53
|
+
You don't need to provide the exact start date if you want to use the dataset default (**2024-03-01**).
|
|
54
|
+
|
|
55
|
+
```python
|
|
56
|
+
from sunt_lib.loader import SUNTLoader
|
|
57
|
+
|
|
58
|
+
loader = SUNTLoader()
|
|
59
|
+
|
|
60
|
+
# Load AVL data for all SUNDAYS in a 4-week period
|
|
61
|
+
df_sundays = loader.load_batch(
|
|
62
|
+
dataset_type='avl-lines',
|
|
63
|
+
periods=4,
|
|
64
|
+
freq='W',
|
|
65
|
+
day_type='sundays'
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# Load Boarding/Alighting flow for 15 WORKDAYS
|
|
69
|
+
df_work = loader.load_batch(
|
|
70
|
+
dataset_type='boarding-alighting',
|
|
71
|
+
periods=15,
|
|
72
|
+
day_type='workdays'
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### 2. Geographic Visualization
|
|
78
|
+
|
|
79
|
+
Visualize passenger loading or origin-destination flows in 3D.
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
from sunt_lib.visualizer import SUNTVisualizer
|
|
83
|
+
|
|
84
|
+
viz = SUNTVisualizer(df_work)
|
|
85
|
+
|
|
86
|
+
# Generate a 3D Hexagon map (PyDeck)
|
|
87
|
+
deck_map = viz.plot_3d_flow(weight_col='n-boarding')
|
|
88
|
+
deck_map.to_html("salvador_density.html")
|
|
89
|
+
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### 3. Academic Export
|
|
93
|
+
|
|
94
|
+
Generate a summary table for your paper or thesis.
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
from sunt_lib.utils import export_academic_table
|
|
98
|
+
|
|
99
|
+
# Creates a .tex file with mean boarding and speed stats
|
|
100
|
+
export_academic_table(df_work, name="results_table.tex")
|
|
101
|
+
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### 4. OD Graph (NetworkX)
|
|
105
|
+
|
|
106
|
+
Generate and export a directed OD graph using the SUNT parquet file.
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
import pandas as pd
|
|
110
|
+
from suntdataset.visualizer import SUNTVisualizer
|
|
111
|
+
|
|
112
|
+
url = "https://github.com/LabIA-UFBA/SUNT/raw/main/data/od/od-2024-03-01.parquet"
|
|
113
|
+
df = pd.read_parquet(url)
|
|
114
|
+
|
|
115
|
+
viz = SUNTVisualizer(df)
|
|
116
|
+
graph = viz.build_od_graph()
|
|
117
|
+
viz.export_od_graph("outputs/od_2024-03-01.graphml", fmt="graphml")
|
|
118
|
+
viz.export_od_graph("outputs/od_2024-03-01.gexf", fmt="gexf")
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
---
|
|
122
|
+
|
|
123
|
+
## 📊 Dataset Types Supported
|
|
124
|
+
|
|
125
|
+
| Type | Description |
|
|
126
|
+
| --- | --- |
|
|
127
|
+
| `avl-full` | Full telemetry including GPS, speed, and occupancy. |
|
|
128
|
+
| `avl-lines` | Information concerning routes and schedules. |
|
|
129
|
+
| `trip-timeseries` | Node activity (loading, boarding) in 5-min intervals. |
|
|
130
|
+
| `boarding-alighting` | Passenger flow at stop level. |
|
|
131
|
+
| `gtfs-stops` | Geospatial data for all transit stops. |
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
135
|
+
## 🏛 Academic Use & Citation
|
|
136
|
+
|
|
137
|
+
This library is a wrapper for the **SUNT Dataset** (UFBA). If you use this tool in your research, please cite:
|
|
138
|
+
|
|
139
|
+
> *Nery, A. S., et al. (2024). SUNT: Salvador Unified Network for Transport Dataset.*
|
|
140
|
+
|
|
141
|
+
---
|
|
142
|
+
|
|
143
|
+
## 🛠 Development: Running Locally
|
|
144
|
+
|
|
145
|
+
To test changes before publishing to PyPI:
|
|
146
|
+
|
|
147
|
+
1. Create a virtual environment: `python -m venv venv`
|
|
148
|
+
2. Activate it: `source venv/bin/activate`
|
|
149
|
+
3. Install dependencies: `pip install -r requirements.txt`
|
|
150
|
+
4. Run the sample dashboard: `streamlit run examples/dashboard_app.py`
|
|
151
|
+
|
|
152
|
+
---
|
|
153
|
+
|
|
154
|
+
**Would you like me to help you configure the `__init__.py` file so these classes are available directly at the top level of the library?**
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "suntdataset"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "A Dataset for the Salvador Unified Network for Transport (SUNT)."
|
|
5
|
+
dependencies = [
|
|
6
|
+
"pandas",
|
|
7
|
+
"pyarrow",
|
|
8
|
+
"requests",
|
|
9
|
+
"tqdm"
|
|
10
|
+
]
|
|
11
|
+
|
|
12
|
+
[tool.setuptools]
|
|
13
|
+
packages = ["suntdataset"]
|
|
14
|
+
|
|
15
|
+
[project.optional-dependencies]
|
|
16
|
+
vis = ["pydeck", "folium", "geopandas"]
|
|
17
|
+
dashboard = ["streamlit"]
|
|
18
|
+
all = ["pydeck", "folium", "geopandas", "streamlit", "networkx"]
|
|
19
|
+
|
|
20
|
+
[build-system]
|
|
21
|
+
requires = ["setuptools>=61.0"]
|
|
22
|
+
build-backend = "setuptools.build_meta"
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from tqdm import tqdm
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class SUNTLoader:
|
|
6
|
+
BASE_URL = "https://raw.githubusercontent.com/Nery-Ufba/SUNT/main/data/"
|
|
7
|
+
ROUTES_URL = "https://raw.githubusercontent.com/Nery-Ufba/SUNT/main/data/raw/GTFS_INTEGRA_SALVADOR2/"
|
|
8
|
+
DEFAULT_START = "2024-03-01"
|
|
9
|
+
|
|
10
|
+
def _filter_days(self, dates, day_type):
|
|
11
|
+
df = pd.DataFrame({'date': dates})
|
|
12
|
+
df['dow'] = df['date'].dt.dayofweek
|
|
13
|
+
if day_type == 'workdays': return df[df['dow'] < 5]['date']
|
|
14
|
+
if day_type == 'saturdays': return df[df['dow'] == 5]['date']
|
|
15
|
+
if day_type == 'sundays': return df[df['dow'] == 6]['date']
|
|
16
|
+
return df['date']
|
|
17
|
+
|
|
18
|
+
def load_batch(self, dataset_type, start_date=None, periods=1, freq='D', day_type='all'):
|
|
19
|
+
start = start_date if start_date else self.DEFAULT_START
|
|
20
|
+
dates = pd.date_range(start=start, periods=periods, freq=freq)
|
|
21
|
+
selected_dates = self._filter_days(dates, day_type)
|
|
22
|
+
|
|
23
|
+
dataframes = []
|
|
24
|
+
for dt in tqdm(selected_dates, desc=f"Carregando {dataset_type}"):
|
|
25
|
+
y, m, d = dt.year, f"{dt.month:02d}", f"{dt.day:02d}"
|
|
26
|
+
|
|
27
|
+
# Mapeamento conforme datasets.md do SUNT
|
|
28
|
+
paths = {
|
|
29
|
+
'od': f"{self.BASE_URL}od/od-{y}-{m}-{d}.parquet",
|
|
30
|
+
'gtfs-stops': f"{self.ROUTES_URL}stops.txt",
|
|
31
|
+
'gtfs-trips': f"{self.ROUTES_URL}trips.txt",
|
|
32
|
+
'gtfs-stop-times': f"{self.ROUTES_URL}stop_times.txt",
|
|
33
|
+
'gtfs-routes': f"{self.ROUTES_URL}routes.txt",
|
|
34
|
+
'gtfs-shapes': f"{self.ROUTES_URL}shapes.txt",
|
|
35
|
+
'gtfs-agency': f"{self.ROUTES_URL}agency.txt",
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
try:
|
|
39
|
+
url = paths[dataset_type]
|
|
40
|
+
if url.endswith('.parquet'):
|
|
41
|
+
df = pd.read_parquet(url)
|
|
42
|
+
elif url.endswith('.csv'):
|
|
43
|
+
df = pd.read_csv(url)
|
|
44
|
+
elif url.endswith('.txt'):
|
|
45
|
+
df = pd.read_csv(url)
|
|
46
|
+
else:
|
|
47
|
+
raise ValueError(f"Formato nao suportado para: {url}")
|
|
48
|
+
df['date_ref'] = dt
|
|
49
|
+
dataframes.append(df)
|
|
50
|
+
except Exception as e:
|
|
51
|
+
continue
|
|
52
|
+
|
|
53
|
+
return pd.concat(dataframes, ignore_index=True) if dataframes else pd.DataFrame()
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
import importlib.util
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
def _has_pkg(name):
|
|
6
|
+
return importlib.util.find_spec(name) is not None
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class SUNTVisualizer:
|
|
10
|
+
def __init__(self, df):
|
|
11
|
+
self.df = df
|
|
12
|
+
|
|
13
|
+
def build_od_graph(
|
|
14
|
+
self,
|
|
15
|
+
origin_col="stop_id",
|
|
16
|
+
trip_col="trip_id",
|
|
17
|
+
sequence_col="pt_sequence",
|
|
18
|
+
time_col="stop_time",
|
|
19
|
+
weight_col="n-boardings",
|
|
20
|
+
):
|
|
21
|
+
if not _has_pkg("networkx"):
|
|
22
|
+
raise ImportError("NetworkX não encontrado. Instale com: pip install networkx")
|
|
23
|
+
|
|
24
|
+
import networkx as nx
|
|
25
|
+
|
|
26
|
+
required_cols = {origin_col, trip_col}
|
|
27
|
+
if sequence_col:
|
|
28
|
+
required_cols.add(sequence_col)
|
|
29
|
+
if time_col:
|
|
30
|
+
required_cols.add(time_col)
|
|
31
|
+
if weight_col:
|
|
32
|
+
required_cols.add(weight_col)
|
|
33
|
+
|
|
34
|
+
missing = [col for col in required_cols if col not in self.df.columns]
|
|
35
|
+
if missing:
|
|
36
|
+
raise ValueError(f"Colunas ausentes para gerar grafo OD: {missing}")
|
|
37
|
+
|
|
38
|
+
df = self.df.copy()
|
|
39
|
+
if time_col and time_col in df.columns:
|
|
40
|
+
df[time_col] = pd.to_datetime(df[time_col], errors="coerce")
|
|
41
|
+
|
|
42
|
+
sort_cols = [trip_col]
|
|
43
|
+
if sequence_col and sequence_col in df.columns:
|
|
44
|
+
sort_cols.append(sequence_col)
|
|
45
|
+
elif time_col and time_col in df.columns:
|
|
46
|
+
sort_cols.append(time_col)
|
|
47
|
+
else:
|
|
48
|
+
raise ValueError("Informe sequence_col ou time_col válido para ordenar os deslocamentos.")
|
|
49
|
+
|
|
50
|
+
df = df.sort_values(sort_cols)
|
|
51
|
+
df["next_stop_id"] = df.groupby(trip_col)[origin_col].shift(-1)
|
|
52
|
+
edges_df = df.dropna(subset=[origin_col, "next_stop_id"])
|
|
53
|
+
edges_df = edges_df[edges_df[origin_col] != edges_df["next_stop_id"]]
|
|
54
|
+
|
|
55
|
+
if edges_df.empty:
|
|
56
|
+
return nx.DiGraph()
|
|
57
|
+
|
|
58
|
+
if weight_col and weight_col in edges_df.columns:
|
|
59
|
+
edges_df[weight_col] = pd.to_numeric(edges_df[weight_col], errors="coerce").fillna(0)
|
|
60
|
+
grouped = (
|
|
61
|
+
edges_df.groupby([origin_col, "next_stop_id"], as_index=False)
|
|
62
|
+
.agg(
|
|
63
|
+
trips=(trip_col, "nunique"),
|
|
64
|
+
passengers=(weight_col, "sum"),
|
|
65
|
+
)
|
|
66
|
+
)
|
|
67
|
+
else:
|
|
68
|
+
grouped = (
|
|
69
|
+
edges_df.groupby([origin_col, "next_stop_id"], as_index=False)
|
|
70
|
+
.agg(trips=(trip_col, "nunique"))
|
|
71
|
+
)
|
|
72
|
+
grouped["passengers"] = grouped["trips"]
|
|
73
|
+
|
|
74
|
+
graph = nx.DiGraph()
|
|
75
|
+
for row in grouped.itertuples(index=False):
|
|
76
|
+
origin = str(getattr(row, origin_col))
|
|
77
|
+
destination = str(row.next_stop_id)
|
|
78
|
+
trips = float(row.trips)
|
|
79
|
+
passengers = float(row.passengers)
|
|
80
|
+
graph.add_edge(
|
|
81
|
+
origin,
|
|
82
|
+
destination,
|
|
83
|
+
trips=trips,
|
|
84
|
+
passengers=passengers,
|
|
85
|
+
weight=passengers,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
return graph
|
|
89
|
+
|
|
90
|
+
def export_od_graph(self, output_path, fmt="graphml", **build_kwargs):
|
|
91
|
+
if not _has_pkg("networkx"):
|
|
92
|
+
raise ImportError("NetworkX não encontrado. Instale com: pip install networkx")
|
|
93
|
+
|
|
94
|
+
import networkx as nx
|
|
95
|
+
|
|
96
|
+
graph = self.build_od_graph(**build_kwargs)
|
|
97
|
+
output = Path(output_path)
|
|
98
|
+
output.parent.mkdir(parents=True, exist_ok=True)
|
|
99
|
+
|
|
100
|
+
fmt_normalized = fmt.strip().lower()
|
|
101
|
+
if fmt_normalized == "graphml":
|
|
102
|
+
nx.write_graphml(graph, output)
|
|
103
|
+
elif fmt_normalized == "gexf":
|
|
104
|
+
nx.write_gexf(graph, output)
|
|
105
|
+
else:
|
|
106
|
+
raise ValueError("Formato inválido. Use 'graphml' ou 'gexf'.")
|
|
107
|
+
|
|
108
|
+
return output
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: suntdataset
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A Dataset for the Salvador Unified Network for Transport (SUNT).
|
|
5
|
+
Requires-Dist: pandas
|
|
6
|
+
Requires-Dist: pyarrow
|
|
7
|
+
Requires-Dist: requests
|
|
8
|
+
Requires-Dist: tqdm
|
|
9
|
+
Provides-Extra: vis
|
|
10
|
+
Requires-Dist: pydeck; extra == "vis"
|
|
11
|
+
Requires-Dist: folium; extra == "vis"
|
|
12
|
+
Requires-Dist: geopandas; extra == "vis"
|
|
13
|
+
Provides-Extra: dashboard
|
|
14
|
+
Requires-Dist: streamlit; extra == "dashboard"
|
|
15
|
+
Provides-Extra: all
|
|
16
|
+
Requires-Dist: pydeck; extra == "all"
|
|
17
|
+
Requires-Dist: folium; extra == "all"
|
|
18
|
+
Requires-Dist: geopandas; extra == "all"
|
|
19
|
+
Requires-Dist: streamlit; extra == "all"
|
|
20
|
+
Requires-Dist: networkx; extra == "all"
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
suntdataset/__init__.py
|
|
4
|
+
suntdataset/loader.py
|
|
5
|
+
suntdataset/visualizer.py
|
|
6
|
+
suntdataset.egg-info/PKG-INFO
|
|
7
|
+
suntdataset.egg-info/SOURCES.txt
|
|
8
|
+
suntdataset.egg-info/dependency_links.txt
|
|
9
|
+
suntdataset.egg-info/requires.txt
|
|
10
|
+
suntdataset.egg-info/top_level.txt
|
|
11
|
+
tests/test_local.py
|
|
12
|
+
tests/test_visualizer.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
suntdataset
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
from suntdataset.loader import SUNTLoader
|
|
2
|
+
from suntdataset.visualizer import SUNTVisualizer
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def test_load_batch_od_parquet():
|
|
8
|
+
loader = SUNTLoader()
|
|
9
|
+
df = loader.load_batch(dataset_type="od", periods=1)
|
|
10
|
+
|
|
11
|
+
assert df is not None
|
|
12
|
+
if not df.empty:
|
|
13
|
+
assert "date_ref" in df.columns
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_load_batch_gtfs_parquet():
|
|
17
|
+
loader = SUNTLoader()
|
|
18
|
+
dataset_types = [
|
|
19
|
+
"od",
|
|
20
|
+
"gtfs-stops",
|
|
21
|
+
"gtfs-trips",
|
|
22
|
+
"gtfs-stop-times",
|
|
23
|
+
"gtfs-routes",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
for dataset_type in dataset_types:
|
|
27
|
+
df = loader.load_batch(dataset_type=dataset_type, periods=1)
|
|
28
|
+
assert df is not None
|
|
29
|
+
if not df.empty:
|
|
30
|
+
print(f"{dataset_type} - len: {len(df)}")
|
|
31
|
+
assert "date_ref" in df.columns
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def test_build_od_graph_from_dataframe():
|
|
35
|
+
pytest.importorskip("networkx")
|
|
36
|
+
|
|
37
|
+
df = pd.DataFrame(
|
|
38
|
+
{
|
|
39
|
+
"trip_id": ["t1", "t1", "t1", "t2", "t2"],
|
|
40
|
+
"stop_id": ["A", "B", "C", "A", "C"],
|
|
41
|
+
"pt_sequence": [1, 2, 3, 1, 2],
|
|
42
|
+
"stop_time": [
|
|
43
|
+
"2024-03-01 08:00:00",
|
|
44
|
+
"2024-03-01 08:10:00",
|
|
45
|
+
"2024-03-01 08:20:00",
|
|
46
|
+
"2024-03-01 09:00:00",
|
|
47
|
+
"2024-03-01 09:12:00",
|
|
48
|
+
],
|
|
49
|
+
"n-boardings": [5, 3, 1, 4, 2],
|
|
50
|
+
}
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
viz = SUNTVisualizer(df)
|
|
54
|
+
graph = viz.build_od_graph()
|
|
55
|
+
|
|
56
|
+
assert graph.number_of_nodes() == 3
|
|
57
|
+
assert graph.number_of_edges() == 3
|
|
58
|
+
assert graph.has_edge("A", "B")
|
|
59
|
+
assert graph.has_edge("B", "C")
|
|
60
|
+
assert graph.has_edge("A", "C")
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import pytest
|
|
3
|
+
|
|
4
|
+
from suntdataset.visualizer import SUNTVisualizer
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
nx = pytest.importorskip("networkx")
|
|
8
|
+
OD_URL = "https://github.com/LabIA-UFBA/SUNT/raw/main/data/od/od-2024-03-01.parquet"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _sample_od_df():
|
|
12
|
+
return pd.DataFrame(
|
|
13
|
+
{
|
|
14
|
+
"trip_id": ["t1", "t1", "t1", "t2", "t2"],
|
|
15
|
+
"stop_id": ["A", "B", "C", "A", "B"],
|
|
16
|
+
"pt_sequence": [1, 2, 3, 1, 2],
|
|
17
|
+
"stop_time": [
|
|
18
|
+
"2024-03-01 08:00:00",
|
|
19
|
+
"2024-03-01 08:10:00",
|
|
20
|
+
"2024-03-01 08:20:00",
|
|
21
|
+
"2024-03-01 09:00:00",
|
|
22
|
+
"2024-03-01 09:10:00",
|
|
23
|
+
],
|
|
24
|
+
"n-boardings": [10, 20, 30, 5, 7],
|
|
25
|
+
}
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@pytest.fixture(scope="module")
|
|
30
|
+
def real_od_df():
|
|
31
|
+
return pd.read_parquet(OD_URL)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def test_build_od_graph_aggregates_edges():
|
|
35
|
+
viz = SUNTVisualizer(_sample_od_df())
|
|
36
|
+
graph = viz.build_od_graph()
|
|
37
|
+
|
|
38
|
+
assert isinstance(graph, nx.DiGraph)
|
|
39
|
+
assert graph.number_of_nodes() == 3
|
|
40
|
+
assert graph.number_of_edges() == 2
|
|
41
|
+
|
|
42
|
+
assert graph.has_edge("A", "B")
|
|
43
|
+
assert graph["A"]["B"]["trips"] == 2.0
|
|
44
|
+
assert graph["A"]["B"]["passengers"] == 15.0
|
|
45
|
+
assert graph["A"]["B"]["weight"] == 15.0
|
|
46
|
+
|
|
47
|
+
assert graph.has_edge("B", "C")
|
|
48
|
+
assert graph["B"]["C"]["trips"] == 1.0
|
|
49
|
+
assert graph["B"]["C"]["passengers"] == 20.0
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def test_build_od_graph_works_with_time_order_only():
|
|
53
|
+
df = _sample_od_df().drop(columns=["pt_sequence"])
|
|
54
|
+
viz = SUNTVisualizer(df)
|
|
55
|
+
graph = viz.build_od_graph(sequence_col=None, time_col="stop_time")
|
|
56
|
+
|
|
57
|
+
assert graph.number_of_edges() == 2
|
|
58
|
+
assert graph.has_edge("A", "B")
|
|
59
|
+
assert graph.has_edge("B", "C")
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def test_build_od_graph_missing_columns_raises():
|
|
63
|
+
df = pd.DataFrame({"trip_id": ["t1"], "stop_id": ["A"]})
|
|
64
|
+
viz = SUNTVisualizer(df)
|
|
65
|
+
|
|
66
|
+
with pytest.raises(ValueError, match="Colunas ausentes"):
|
|
67
|
+
viz.build_od_graph()
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def test_export_od_graph_graphml_and_gexf(tmp_path):
|
|
71
|
+
viz = SUNTVisualizer(_sample_od_df())
|
|
72
|
+
|
|
73
|
+
graphml_path = viz.export_od_graph(tmp_path / "od.graphml", fmt="graphml")
|
|
74
|
+
gexf_path = viz.export_od_graph(tmp_path / "od.gexf", fmt="gexf")
|
|
75
|
+
|
|
76
|
+
assert graphml_path.exists()
|
|
77
|
+
assert graphml_path.suffix == ".graphml"
|
|
78
|
+
assert gexf_path.exists()
|
|
79
|
+
assert gexf_path.suffix == ".gexf"
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def test_export_od_graph_invalid_format_raises(tmp_path):
|
|
83
|
+
viz = SUNTVisualizer(_sample_od_df())
|
|
84
|
+
|
|
85
|
+
with pytest.raises(ValueError, match="Formato invalido|Formato inválido"):
|
|
86
|
+
viz.export_od_graph(tmp_path / "od.invalid", fmt="json")
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def test_build_od_graph_with_real_od_data(real_od_df):
|
|
90
|
+
viz = SUNTVisualizer(real_od_df)
|
|
91
|
+
graph = viz.build_od_graph(
|
|
92
|
+
origin_col="stop_id",
|
|
93
|
+
trip_col="trip_id",
|
|
94
|
+
sequence_col="pt_sequence",
|
|
95
|
+
time_col="stop_time",
|
|
96
|
+
weight_col="n-boardings",
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
assert isinstance(graph, nx.DiGraph)
|
|
100
|
+
assert graph.number_of_nodes() > 0
|
|
101
|
+
assert graph.number_of_edges() > 0
|
|
102
|
+
assert sum(data["passengers"] for _, _, data in graph.edges(data=True)) > 0
|