waze-logs 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- analysis.py +91 -0
- cli.py +1219 -0
- collector.py +193 -0
- collector_europe.py +312 -0
- collector_worldwide.py +532 -0
- database.py +176 -0
- waze_client.py +234 -0
- waze_logs-1.0.0.dist-info/METADATA +411 -0
- waze_logs-1.0.0.dist-info/RECORD +15 -0
- waze_logs-1.0.0.dist-info/WHEEL +5 -0
- waze_logs-1.0.0.dist-info/entry_points.txt +2 -0
- waze_logs-1.0.0.dist-info/licenses/LICENSE +21 -0
- waze_logs-1.0.0.dist-info/top_level.txt +8 -0
- web/app.py +536 -0
- web/templates/index.html +1241 -0
database.py
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
# database.py
|
|
2
|
+
import sqlite3
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional, List, Any
|
|
5
|
+
|
|
6
|
+
class Database:
|
|
7
|
+
def __init__(self, db_path: str, check_same_thread: bool = True):
|
|
8
|
+
self.db_path = db_path
|
|
9
|
+
Path(db_path).parent.mkdir(parents=True, exist_ok=True)
|
|
10
|
+
# timeout=30 waits up to 30 seconds for locks instead of failing immediately
|
|
11
|
+
self.conn = sqlite3.connect(db_path, check_same_thread=check_same_thread, timeout=30)
|
|
12
|
+
self.conn.row_factory = sqlite3.Row
|
|
13
|
+
# Enable WAL mode for better concurrent write performance
|
|
14
|
+
self.conn.execute("PRAGMA journal_mode=WAL")
|
|
15
|
+
self.conn.execute("PRAGMA busy_timeout=30000") # 30 second busy timeout
|
|
16
|
+
self._create_tables()
|
|
17
|
+
|
|
18
|
+
def _create_tables(self):
|
|
19
|
+
self.conn.executescript("""
|
|
20
|
+
CREATE TABLE IF NOT EXISTS events (
|
|
21
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
22
|
+
event_hash TEXT UNIQUE NOT NULL,
|
|
23
|
+
username TEXT NOT NULL,
|
|
24
|
+
latitude REAL NOT NULL,
|
|
25
|
+
longitude REAL NOT NULL,
|
|
26
|
+
timestamp_utc TEXT NOT NULL,
|
|
27
|
+
timestamp_ms INTEGER NOT NULL,
|
|
28
|
+
report_type TEXT NOT NULL,
|
|
29
|
+
subtype TEXT,
|
|
30
|
+
raw_json TEXT,
|
|
31
|
+
collected_at TEXT NOT NULL,
|
|
32
|
+
grid_cell TEXT NOT NULL
|
|
33
|
+
);
|
|
34
|
+
|
|
35
|
+
CREATE INDEX IF NOT EXISTS idx_events_username ON events(username);
|
|
36
|
+
CREATE INDEX IF NOT EXISTS idx_events_timestamp ON events(timestamp_ms);
|
|
37
|
+
CREATE INDEX IF NOT EXISTS idx_events_location ON events(latitude, longitude);
|
|
38
|
+
CREATE INDEX IF NOT EXISTS idx_events_type ON events(report_type);
|
|
39
|
+
|
|
40
|
+
CREATE TABLE IF NOT EXISTS collection_runs (
|
|
41
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
42
|
+
started_at TEXT NOT NULL,
|
|
43
|
+
completed_at TEXT,
|
|
44
|
+
grid_cell TEXT NOT NULL,
|
|
45
|
+
events_found INTEGER DEFAULT 0,
|
|
46
|
+
events_new INTEGER DEFAULT 0,
|
|
47
|
+
status TEXT DEFAULT 'running'
|
|
48
|
+
);
|
|
49
|
+
|
|
50
|
+
CREATE TABLE IF NOT EXISTS tracked_users (
|
|
51
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
52
|
+
username TEXT UNIQUE NOT NULL,
|
|
53
|
+
first_seen TEXT NOT NULL,
|
|
54
|
+
last_seen TEXT NOT NULL,
|
|
55
|
+
event_count INTEGER DEFAULT 1,
|
|
56
|
+
notes TEXT
|
|
57
|
+
);
|
|
58
|
+
|
|
59
|
+
CREATE INDEX IF NOT EXISTS idx_tracked_users_username ON tracked_users(username);
|
|
60
|
+
|
|
61
|
+
CREATE TABLE IF NOT EXISTS daily_stats (
|
|
62
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
63
|
+
date TEXT UNIQUE NOT NULL,
|
|
64
|
+
events_collected INTEGER DEFAULT 0,
|
|
65
|
+
unique_users INTEGER DEFAULT 0,
|
|
66
|
+
api_requests INTEGER DEFAULT 0,
|
|
67
|
+
api_errors INTEGER DEFAULT 0,
|
|
68
|
+
grid_cells_scanned INTEGER DEFAULT 0,
|
|
69
|
+
by_type_json TEXT
|
|
70
|
+
);
|
|
71
|
+
|
|
72
|
+
CREATE INDEX IF NOT EXISTS idx_daily_stats_date ON daily_stats(date);
|
|
73
|
+
""")
|
|
74
|
+
self.conn.commit()
|
|
75
|
+
|
|
76
|
+
def upsert_tracked_user(self, username: str, timestamp: str) -> bool:
|
|
77
|
+
"""Track a user, updating last_seen and event_count if exists."""
|
|
78
|
+
try:
|
|
79
|
+
self.conn.execute("""
|
|
80
|
+
INSERT INTO tracked_users (username, first_seen, last_seen, event_count)
|
|
81
|
+
VALUES (?, ?, ?, 1)
|
|
82
|
+
ON CONFLICT(username) DO UPDATE SET
|
|
83
|
+
last_seen = excluded.last_seen,
|
|
84
|
+
event_count = event_count + 1
|
|
85
|
+
""", (username, timestamp, timestamp))
|
|
86
|
+
self.conn.commit()
|
|
87
|
+
return True
|
|
88
|
+
except Exception:
|
|
89
|
+
return False
|
|
90
|
+
|
|
91
|
+
def get_tracked_users(self, limit: int = 100):
|
|
92
|
+
"""Get tracked users ordered by event count."""
|
|
93
|
+
return self.conn.execute("""
|
|
94
|
+
SELECT * FROM tracked_users
|
|
95
|
+
ORDER BY event_count DESC
|
|
96
|
+
LIMIT ?
|
|
97
|
+
""", (limit,)).fetchall()
|
|
98
|
+
|
|
99
|
+
def update_daily_stats(self, date: str, events: int = 0, users: int = 0,
|
|
100
|
+
requests: int = 0, errors: int = 0, cells: int = 0,
|
|
101
|
+
by_type: dict = None):
|
|
102
|
+
"""Update daily collection statistics."""
|
|
103
|
+
import json
|
|
104
|
+
by_type_json = json.dumps(by_type) if by_type else None
|
|
105
|
+
|
|
106
|
+
self.conn.execute("""
|
|
107
|
+
INSERT INTO daily_stats (date, events_collected, unique_users,
|
|
108
|
+
api_requests, api_errors, grid_cells_scanned, by_type_json)
|
|
109
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
110
|
+
ON CONFLICT(date) DO UPDATE SET
|
|
111
|
+
events_collected = events_collected + excluded.events_collected,
|
|
112
|
+
unique_users = excluded.unique_users,
|
|
113
|
+
api_requests = api_requests + excluded.api_requests,
|
|
114
|
+
api_errors = api_errors + excluded.api_errors,
|
|
115
|
+
grid_cells_scanned = grid_cells_scanned + excluded.grid_cells_scanned,
|
|
116
|
+
by_type_json = excluded.by_type_json
|
|
117
|
+
""", (date, events, users, requests, errors, cells, by_type_json))
|
|
118
|
+
self.conn.commit()
|
|
119
|
+
|
|
120
|
+
def get_daily_stats(self, days: int = 30):
|
|
121
|
+
"""Get daily stats for the last N days."""
|
|
122
|
+
return self.conn.execute("""
|
|
123
|
+
SELECT * FROM daily_stats
|
|
124
|
+
ORDER BY date DESC
|
|
125
|
+
LIMIT ?
|
|
126
|
+
""", (days,)).fetchall()
|
|
127
|
+
|
|
128
|
+
def get_collection_summary(self):
|
|
129
|
+
"""Get overall collection summary."""
|
|
130
|
+
result = self.conn.execute("""
|
|
131
|
+
SELECT
|
|
132
|
+
COUNT(*) as total_events,
|
|
133
|
+
COUNT(DISTINCT username) as unique_users,
|
|
134
|
+
COUNT(DISTINCT DATE(timestamp_utc)) as days_collected,
|
|
135
|
+
MIN(timestamp_utc) as first_event,
|
|
136
|
+
MAX(timestamp_utc) as last_event,
|
|
137
|
+
COUNT(DISTINCT grid_cell) as grid_cells_used
|
|
138
|
+
FROM events
|
|
139
|
+
""").fetchone()
|
|
140
|
+
return dict(result) if result else {}
|
|
141
|
+
|
|
142
|
+
def execute(self, query: str, params: tuple = ()) -> sqlite3.Cursor:
|
|
143
|
+
return self.conn.execute(query, params)
|
|
144
|
+
|
|
145
|
+
def commit(self):
|
|
146
|
+
self.conn.commit()
|
|
147
|
+
|
|
148
|
+
def close(self):
|
|
149
|
+
self.conn.close()
|
|
150
|
+
|
|
151
|
+
def insert_event(self, event: dict) -> bool:
|
|
152
|
+
"""Insert event, return True if inserted, False if duplicate."""
|
|
153
|
+
try:
|
|
154
|
+
self.conn.execute("""
|
|
155
|
+
INSERT INTO events (
|
|
156
|
+
event_hash, username, latitude, longitude,
|
|
157
|
+
timestamp_utc, timestamp_ms, report_type, subtype,
|
|
158
|
+
raw_json, collected_at, grid_cell
|
|
159
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
160
|
+
""", (
|
|
161
|
+
event["event_hash"],
|
|
162
|
+
event["username"],
|
|
163
|
+
event["latitude"],
|
|
164
|
+
event["longitude"],
|
|
165
|
+
event["timestamp_utc"],
|
|
166
|
+
event["timestamp_ms"],
|
|
167
|
+
event["report_type"],
|
|
168
|
+
event.get("subtype"),
|
|
169
|
+
event.get("raw_json"),
|
|
170
|
+
event["collected_at"],
|
|
171
|
+
event["grid_cell"]
|
|
172
|
+
))
|
|
173
|
+
self.conn.commit()
|
|
174
|
+
return True
|
|
175
|
+
except sqlite3.IntegrityError:
|
|
176
|
+
return False
|
waze_client.py
ADDED
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
# waze_client.py
|
|
2
|
+
import requests
|
|
3
|
+
import time
|
|
4
|
+
import random
|
|
5
|
+
from typing import Tuple, List, Dict, Any
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class RateLimiter:
|
|
9
|
+
"""Simple rate limiter with exponential backoff."""
|
|
10
|
+
|
|
11
|
+
def __init__(self, min_delay: float = 1.0, max_delay: float = 10.0, backoff_factor: float = 2.0):
|
|
12
|
+
self.min_delay = min_delay
|
|
13
|
+
self.max_delay = max_delay
|
|
14
|
+
self.backoff_factor = backoff_factor
|
|
15
|
+
self.current_delay = min_delay
|
|
16
|
+
self.last_request_time = 0
|
|
17
|
+
self.consecutive_errors = 0
|
|
18
|
+
|
|
19
|
+
def wait(self):
|
|
20
|
+
"""Wait before making next request."""
|
|
21
|
+
elapsed = time.time() - self.last_request_time
|
|
22
|
+
# Add jitter to avoid synchronized requests
|
|
23
|
+
jitter = random.uniform(0, 0.5)
|
|
24
|
+
wait_time = max(0, self.current_delay + jitter - elapsed)
|
|
25
|
+
if wait_time > 0:
|
|
26
|
+
time.sleep(wait_time)
|
|
27
|
+
self.last_request_time = time.time()
|
|
28
|
+
|
|
29
|
+
def success(self):
|
|
30
|
+
"""Call after successful request to reset backoff."""
|
|
31
|
+
self.consecutive_errors = 0
|
|
32
|
+
self.current_delay = self.min_delay
|
|
33
|
+
|
|
34
|
+
def error(self):
|
|
35
|
+
"""Call after failed request to increase backoff."""
|
|
36
|
+
self.consecutive_errors += 1
|
|
37
|
+
self.current_delay = min(
|
|
38
|
+
self.max_delay,
|
|
39
|
+
self.min_delay * (self.backoff_factor ** self.consecutive_errors)
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class WazeClient:
|
|
44
|
+
"""Client for querying Waze live traffic data directly."""
|
|
45
|
+
|
|
46
|
+
WAZE_API_URL = "https://www.waze.com/live-map/api/georss"
|
|
47
|
+
|
|
48
|
+
def __init__(self, server_url: str = None, timeout: int = 30):
|
|
49
|
+
"""
|
|
50
|
+
Initialize WazeClient.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
server_url: Ignored - kept for backwards compatibility.
|
|
54
|
+
We now query Waze API directly.
|
|
55
|
+
timeout: Request timeout in seconds.
|
|
56
|
+
"""
|
|
57
|
+
self.timeout = timeout
|
|
58
|
+
self.rate_limiter = RateLimiter(min_delay=1.5, max_delay=30.0)
|
|
59
|
+
self.session = requests.Session()
|
|
60
|
+
self.session.headers.update({
|
|
61
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
62
|
+
"Referer": "https://www.waze.com/live-map",
|
|
63
|
+
"Accept": "application/json",
|
|
64
|
+
"Accept-Language": "en-US,en;q=0.9,es;q=0.8",
|
|
65
|
+
})
|
|
66
|
+
|
|
67
|
+
def get_traffic_notifications(
|
|
68
|
+
self,
|
|
69
|
+
lat_top: float,
|
|
70
|
+
lat_bottom: float,
|
|
71
|
+
lon_left: float,
|
|
72
|
+
lon_right: float,
|
|
73
|
+
max_retries: int = 3
|
|
74
|
+
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
|
|
75
|
+
"""
|
|
76
|
+
Query Waze for traffic notifications in a bounding box.
|
|
77
|
+
Returns (alerts, jams) tuple.
|
|
78
|
+
Implements retry logic with exponential backoff.
|
|
79
|
+
"""
|
|
80
|
+
last_error = None
|
|
81
|
+
|
|
82
|
+
for attempt in range(max_retries):
|
|
83
|
+
try:
|
|
84
|
+
# Wait according to rate limiter
|
|
85
|
+
self.rate_limiter.wait()
|
|
86
|
+
|
|
87
|
+
response = self.session.get(
|
|
88
|
+
self.WAZE_API_URL,
|
|
89
|
+
params={
|
|
90
|
+
"top": str(lat_top),
|
|
91
|
+
"bottom": str(lat_bottom),
|
|
92
|
+
"left": str(lon_left),
|
|
93
|
+
"right": str(lon_right),
|
|
94
|
+
"env": "row",
|
|
95
|
+
"types": "alerts,traffic,users"
|
|
96
|
+
},
|
|
97
|
+
timeout=self.timeout
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Check for rate limiting responses
|
|
101
|
+
if response.status_code == 429:
|
|
102
|
+
self.rate_limiter.error()
|
|
103
|
+
retry_after = int(response.headers.get("Retry-After", 60))
|
|
104
|
+
time.sleep(retry_after)
|
|
105
|
+
continue
|
|
106
|
+
|
|
107
|
+
if response.status_code == 403:
|
|
108
|
+
self.rate_limiter.error()
|
|
109
|
+
# Possible IP block, wait longer
|
|
110
|
+
time.sleep(30 + random.uniform(0, 30))
|
|
111
|
+
continue
|
|
112
|
+
|
|
113
|
+
response.raise_for_status()
|
|
114
|
+
self.rate_limiter.success()
|
|
115
|
+
|
|
116
|
+
data = response.json()
|
|
117
|
+
|
|
118
|
+
# Transform alerts to normalize the location format
|
|
119
|
+
alerts = []
|
|
120
|
+
for alert in data.get("alerts", []):
|
|
121
|
+
# Extract location from nested structure
|
|
122
|
+
loc = alert.get("location", {})
|
|
123
|
+
transformed = {
|
|
124
|
+
**alert,
|
|
125
|
+
"latitude": loc.get("y", alert.get("latitude")),
|
|
126
|
+
"longitude": loc.get("x", alert.get("longitude")),
|
|
127
|
+
# Extract username from wazeData if available
|
|
128
|
+
"reportBy": self._extract_username(alert)
|
|
129
|
+
}
|
|
130
|
+
alerts.append(transformed)
|
|
131
|
+
|
|
132
|
+
return alerts, data.get("jams", [])
|
|
133
|
+
|
|
134
|
+
except requests.exceptions.RequestException as e:
|
|
135
|
+
self.rate_limiter.error()
|
|
136
|
+
last_error = e
|
|
137
|
+
if attempt < max_retries - 1:
|
|
138
|
+
# Wait with exponential backoff before retry
|
|
139
|
+
wait_time = (2 ** attempt) + random.uniform(0, 1)
|
|
140
|
+
time.sleep(wait_time)
|
|
141
|
+
|
|
142
|
+
# All retries failed
|
|
143
|
+
raise last_error or Exception("Failed to fetch data after retries")
|
|
144
|
+
|
|
145
|
+
def _extract_username(self, alert: Dict[str, Any]) -> str:
|
|
146
|
+
"""Extract username from alert data."""
|
|
147
|
+
# wazeData format: "world,lon,lat,uuid" or sometimes contains username
|
|
148
|
+
waze_data = alert.get("wazeData", "")
|
|
149
|
+
if waze_data:
|
|
150
|
+
parts = waze_data.split(",")
|
|
151
|
+
if len(parts) >= 1:
|
|
152
|
+
# First part is often the username prefix (e.g., "world")
|
|
153
|
+
# or could be an actual username
|
|
154
|
+
return parts[0] if parts[0] != "world" else f"world_{parts[-1][:8]}"
|
|
155
|
+
|
|
156
|
+
# Fallback: use uuid as identifier
|
|
157
|
+
uuid = alert.get("uuid", "")
|
|
158
|
+
if uuid:
|
|
159
|
+
return f"user_{uuid[:8]}"
|
|
160
|
+
|
|
161
|
+
return "anonymous"
|
|
162
|
+
|
|
163
|
+
def get_users(
|
|
164
|
+
self,
|
|
165
|
+
lat_top: float,
|
|
166
|
+
lat_bottom: float,
|
|
167
|
+
lon_left: float,
|
|
168
|
+
lon_right: float
|
|
169
|
+
) -> List[Dict[str, Any]]:
|
|
170
|
+
"""Get active Waze users in a bounding box."""
|
|
171
|
+
self.rate_limiter.wait()
|
|
172
|
+
|
|
173
|
+
try:
|
|
174
|
+
response = self.session.get(
|
|
175
|
+
self.WAZE_API_URL,
|
|
176
|
+
params={
|
|
177
|
+
"top": str(lat_top),
|
|
178
|
+
"bottom": str(lat_bottom),
|
|
179
|
+
"left": str(lon_left),
|
|
180
|
+
"right": str(lon_right),
|
|
181
|
+
"env": "row",
|
|
182
|
+
"types": "users"
|
|
183
|
+
},
|
|
184
|
+
timeout=self.timeout
|
|
185
|
+
)
|
|
186
|
+
response.raise_for_status()
|
|
187
|
+
self.rate_limiter.success()
|
|
188
|
+
data = response.json()
|
|
189
|
+
|
|
190
|
+
users = []
|
|
191
|
+
for user in data.get("users", []):
|
|
192
|
+
loc = user.get("location", {})
|
|
193
|
+
users.append({
|
|
194
|
+
**user,
|
|
195
|
+
"latitude": loc.get("y"),
|
|
196
|
+
"longitude": loc.get("x"),
|
|
197
|
+
})
|
|
198
|
+
return users
|
|
199
|
+
except requests.RequestException:
|
|
200
|
+
self.rate_limiter.error()
|
|
201
|
+
raise
|
|
202
|
+
|
|
203
|
+
def health_check(self) -> bool:
|
|
204
|
+
"""Check if the Waze API is responding."""
|
|
205
|
+
try:
|
|
206
|
+
self.rate_limiter.wait()
|
|
207
|
+
response = self.session.get(
|
|
208
|
+
self.WAZE_API_URL,
|
|
209
|
+
params={
|
|
210
|
+
"top": "40.43",
|
|
211
|
+
"bottom": "40.42",
|
|
212
|
+
"left": "-3.71",
|
|
213
|
+
"right": "-3.70",
|
|
214
|
+
"env": "row",
|
|
215
|
+
"types": "alerts"
|
|
216
|
+
},
|
|
217
|
+
timeout=5
|
|
218
|
+
)
|
|
219
|
+
if response.status_code == 200:
|
|
220
|
+
self.rate_limiter.success()
|
|
221
|
+
return True
|
|
222
|
+
self.rate_limiter.error()
|
|
223
|
+
return False
|
|
224
|
+
except requests.RequestException:
|
|
225
|
+
self.rate_limiter.error()
|
|
226
|
+
return False
|
|
227
|
+
|
|
228
|
+
def get_rate_limit_status(self) -> dict:
|
|
229
|
+
"""Get current rate limiter status."""
|
|
230
|
+
return {
|
|
231
|
+
"current_delay": self.rate_limiter.current_delay,
|
|
232
|
+
"consecutive_errors": self.rate_limiter.consecutive_errors,
|
|
233
|
+
"last_request": self.rate_limiter.last_request_time
|
|
234
|
+
}
|