waze-logs 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
database.py ADDED
@@ -0,0 +1,176 @@
1
+ # database.py
2
+ import sqlite3
3
+ from pathlib import Path
4
+ from typing import Optional, List, Any
5
+
6
+ class Database:
7
+ def __init__(self, db_path: str, check_same_thread: bool = True):
8
+ self.db_path = db_path
9
+ Path(db_path).parent.mkdir(parents=True, exist_ok=True)
10
+ # timeout=30 waits up to 30 seconds for locks instead of failing immediately
11
+ self.conn = sqlite3.connect(db_path, check_same_thread=check_same_thread, timeout=30)
12
+ self.conn.row_factory = sqlite3.Row
13
+ # Enable WAL mode for better concurrent write performance
14
+ self.conn.execute("PRAGMA journal_mode=WAL")
15
+ self.conn.execute("PRAGMA busy_timeout=30000") # 30 second busy timeout
16
+ self._create_tables()
17
+
18
+ def _create_tables(self):
19
+ self.conn.executescript("""
20
+ CREATE TABLE IF NOT EXISTS events (
21
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
22
+ event_hash TEXT UNIQUE NOT NULL,
23
+ username TEXT NOT NULL,
24
+ latitude REAL NOT NULL,
25
+ longitude REAL NOT NULL,
26
+ timestamp_utc TEXT NOT NULL,
27
+ timestamp_ms INTEGER NOT NULL,
28
+ report_type TEXT NOT NULL,
29
+ subtype TEXT,
30
+ raw_json TEXT,
31
+ collected_at TEXT NOT NULL,
32
+ grid_cell TEXT NOT NULL
33
+ );
34
+
35
+ CREATE INDEX IF NOT EXISTS idx_events_username ON events(username);
36
+ CREATE INDEX IF NOT EXISTS idx_events_timestamp ON events(timestamp_ms);
37
+ CREATE INDEX IF NOT EXISTS idx_events_location ON events(latitude, longitude);
38
+ CREATE INDEX IF NOT EXISTS idx_events_type ON events(report_type);
39
+
40
+ CREATE TABLE IF NOT EXISTS collection_runs (
41
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
42
+ started_at TEXT NOT NULL,
43
+ completed_at TEXT,
44
+ grid_cell TEXT NOT NULL,
45
+ events_found INTEGER DEFAULT 0,
46
+ events_new INTEGER DEFAULT 0,
47
+ status TEXT DEFAULT 'running'
48
+ );
49
+
50
+ CREATE TABLE IF NOT EXISTS tracked_users (
51
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
52
+ username TEXT UNIQUE NOT NULL,
53
+ first_seen TEXT NOT NULL,
54
+ last_seen TEXT NOT NULL,
55
+ event_count INTEGER DEFAULT 1,
56
+ notes TEXT
57
+ );
58
+
59
+ CREATE INDEX IF NOT EXISTS idx_tracked_users_username ON tracked_users(username);
60
+
61
+ CREATE TABLE IF NOT EXISTS daily_stats (
62
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
63
+ date TEXT UNIQUE NOT NULL,
64
+ events_collected INTEGER DEFAULT 0,
65
+ unique_users INTEGER DEFAULT 0,
66
+ api_requests INTEGER DEFAULT 0,
67
+ api_errors INTEGER DEFAULT 0,
68
+ grid_cells_scanned INTEGER DEFAULT 0,
69
+ by_type_json TEXT
70
+ );
71
+
72
+ CREATE INDEX IF NOT EXISTS idx_daily_stats_date ON daily_stats(date);
73
+ """)
74
+ self.conn.commit()
75
+
76
+ def upsert_tracked_user(self, username: str, timestamp: str) -> bool:
77
+ """Track a user, updating last_seen and event_count if exists."""
78
+ try:
79
+ self.conn.execute("""
80
+ INSERT INTO tracked_users (username, first_seen, last_seen, event_count)
81
+ VALUES (?, ?, ?, 1)
82
+ ON CONFLICT(username) DO UPDATE SET
83
+ last_seen = excluded.last_seen,
84
+ event_count = event_count + 1
85
+ """, (username, timestamp, timestamp))
86
+ self.conn.commit()
87
+ return True
88
+ except Exception:
89
+ return False
90
+
91
+ def get_tracked_users(self, limit: int = 100):
92
+ """Get tracked users ordered by event count."""
93
+ return self.conn.execute("""
94
+ SELECT * FROM tracked_users
95
+ ORDER BY event_count DESC
96
+ LIMIT ?
97
+ """, (limit,)).fetchall()
98
+
99
+ def update_daily_stats(self, date: str, events: int = 0, users: int = 0,
100
+ requests: int = 0, errors: int = 0, cells: int = 0,
101
+ by_type: dict = None):
102
+ """Update daily collection statistics."""
103
+ import json
104
+ by_type_json = json.dumps(by_type) if by_type else None
105
+
106
+ self.conn.execute("""
107
+ INSERT INTO daily_stats (date, events_collected, unique_users,
108
+ api_requests, api_errors, grid_cells_scanned, by_type_json)
109
+ VALUES (?, ?, ?, ?, ?, ?, ?)
110
+ ON CONFLICT(date) DO UPDATE SET
111
+ events_collected = events_collected + excluded.events_collected,
112
+ unique_users = excluded.unique_users,
113
+ api_requests = api_requests + excluded.api_requests,
114
+ api_errors = api_errors + excluded.api_errors,
115
+ grid_cells_scanned = grid_cells_scanned + excluded.grid_cells_scanned,
116
+ by_type_json = excluded.by_type_json
117
+ """, (date, events, users, requests, errors, cells, by_type_json))
118
+ self.conn.commit()
119
+
120
+ def get_daily_stats(self, days: int = 30):
121
+ """Get daily stats for the last N days."""
122
+ return self.conn.execute("""
123
+ SELECT * FROM daily_stats
124
+ ORDER BY date DESC
125
+ LIMIT ?
126
+ """, (days,)).fetchall()
127
+
128
+ def get_collection_summary(self):
129
+ """Get overall collection summary."""
130
+ result = self.conn.execute("""
131
+ SELECT
132
+ COUNT(*) as total_events,
133
+ COUNT(DISTINCT username) as unique_users,
134
+ COUNT(DISTINCT DATE(timestamp_utc)) as days_collected,
135
+ MIN(timestamp_utc) as first_event,
136
+ MAX(timestamp_utc) as last_event,
137
+ COUNT(DISTINCT grid_cell) as grid_cells_used
138
+ FROM events
139
+ """).fetchone()
140
+ return dict(result) if result else {}
141
+
142
+ def execute(self, query: str, params: tuple = ()) -> sqlite3.Cursor:
143
+ return self.conn.execute(query, params)
144
+
145
+ def commit(self):
146
+ self.conn.commit()
147
+
148
+ def close(self):
149
+ self.conn.close()
150
+
151
+ def insert_event(self, event: dict) -> bool:
152
+ """Insert event, return True if inserted, False if duplicate."""
153
+ try:
154
+ self.conn.execute("""
155
+ INSERT INTO events (
156
+ event_hash, username, latitude, longitude,
157
+ timestamp_utc, timestamp_ms, report_type, subtype,
158
+ raw_json, collected_at, grid_cell
159
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
160
+ """, (
161
+ event["event_hash"],
162
+ event["username"],
163
+ event["latitude"],
164
+ event["longitude"],
165
+ event["timestamp_utc"],
166
+ event["timestamp_ms"],
167
+ event["report_type"],
168
+ event.get("subtype"),
169
+ event.get("raw_json"),
170
+ event["collected_at"],
171
+ event["grid_cell"]
172
+ ))
173
+ self.conn.commit()
174
+ return True
175
+ except sqlite3.IntegrityError:
176
+ return False
waze_client.py ADDED
@@ -0,0 +1,234 @@
1
+ # waze_client.py
2
+ import requests
3
+ import time
4
+ import random
5
+ from typing import Tuple, List, Dict, Any
6
+
7
+
8
+ class RateLimiter:
9
+ """Simple rate limiter with exponential backoff."""
10
+
11
+ def __init__(self, min_delay: float = 1.0, max_delay: float = 10.0, backoff_factor: float = 2.0):
12
+ self.min_delay = min_delay
13
+ self.max_delay = max_delay
14
+ self.backoff_factor = backoff_factor
15
+ self.current_delay = min_delay
16
+ self.last_request_time = 0
17
+ self.consecutive_errors = 0
18
+
19
+ def wait(self):
20
+ """Wait before making next request."""
21
+ elapsed = time.time() - self.last_request_time
22
+ # Add jitter to avoid synchronized requests
23
+ jitter = random.uniform(0, 0.5)
24
+ wait_time = max(0, self.current_delay + jitter - elapsed)
25
+ if wait_time > 0:
26
+ time.sleep(wait_time)
27
+ self.last_request_time = time.time()
28
+
29
+ def success(self):
30
+ """Call after successful request to reset backoff."""
31
+ self.consecutive_errors = 0
32
+ self.current_delay = self.min_delay
33
+
34
+ def error(self):
35
+ """Call after failed request to increase backoff."""
36
+ self.consecutive_errors += 1
37
+ self.current_delay = min(
38
+ self.max_delay,
39
+ self.min_delay * (self.backoff_factor ** self.consecutive_errors)
40
+ )
41
+
42
+
43
+ class WazeClient:
44
+ """Client for querying Waze live traffic data directly."""
45
+
46
+ WAZE_API_URL = "https://www.waze.com/live-map/api/georss"
47
+
48
+ def __init__(self, server_url: str = None, timeout: int = 30):
49
+ """
50
+ Initialize WazeClient.
51
+
52
+ Args:
53
+ server_url: Ignored - kept for backwards compatibility.
54
+ We now query Waze API directly.
55
+ timeout: Request timeout in seconds.
56
+ """
57
+ self.timeout = timeout
58
+ self.rate_limiter = RateLimiter(min_delay=1.5, max_delay=30.0)
59
+ self.session = requests.Session()
60
+ self.session.headers.update({
61
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
62
+ "Referer": "https://www.waze.com/live-map",
63
+ "Accept": "application/json",
64
+ "Accept-Language": "en-US,en;q=0.9,es;q=0.8",
65
+ })
66
+
67
+ def get_traffic_notifications(
68
+ self,
69
+ lat_top: float,
70
+ lat_bottom: float,
71
+ lon_left: float,
72
+ lon_right: float,
73
+ max_retries: int = 3
74
+ ) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
75
+ """
76
+ Query Waze for traffic notifications in a bounding box.
77
+ Returns (alerts, jams) tuple.
78
+ Implements retry logic with exponential backoff.
79
+ """
80
+ last_error = None
81
+
82
+ for attempt in range(max_retries):
83
+ try:
84
+ # Wait according to rate limiter
85
+ self.rate_limiter.wait()
86
+
87
+ response = self.session.get(
88
+ self.WAZE_API_URL,
89
+ params={
90
+ "top": str(lat_top),
91
+ "bottom": str(lat_bottom),
92
+ "left": str(lon_left),
93
+ "right": str(lon_right),
94
+ "env": "row",
95
+ "types": "alerts,traffic,users"
96
+ },
97
+ timeout=self.timeout
98
+ )
99
+
100
+ # Check for rate limiting responses
101
+ if response.status_code == 429:
102
+ self.rate_limiter.error()
103
+ retry_after = int(response.headers.get("Retry-After", 60))
104
+ time.sleep(retry_after)
105
+ continue
106
+
107
+ if response.status_code == 403:
108
+ self.rate_limiter.error()
109
+ # Possible IP block, wait longer
110
+ time.sleep(30 + random.uniform(0, 30))
111
+ continue
112
+
113
+ response.raise_for_status()
114
+ self.rate_limiter.success()
115
+
116
+ data = response.json()
117
+
118
+ # Transform alerts to normalize the location format
119
+ alerts = []
120
+ for alert in data.get("alerts", []):
121
+ # Extract location from nested structure
122
+ loc = alert.get("location", {})
123
+ transformed = {
124
+ **alert,
125
+ "latitude": loc.get("y", alert.get("latitude")),
126
+ "longitude": loc.get("x", alert.get("longitude")),
127
+ # Extract username from wazeData if available
128
+ "reportBy": self._extract_username(alert)
129
+ }
130
+ alerts.append(transformed)
131
+
132
+ return alerts, data.get("jams", [])
133
+
134
+ except requests.exceptions.RequestException as e:
135
+ self.rate_limiter.error()
136
+ last_error = e
137
+ if attempt < max_retries - 1:
138
+ # Wait with exponential backoff before retry
139
+ wait_time = (2 ** attempt) + random.uniform(0, 1)
140
+ time.sleep(wait_time)
141
+
142
+ # All retries failed
143
+ raise last_error or Exception("Failed to fetch data after retries")
144
+
145
+ def _extract_username(self, alert: Dict[str, Any]) -> str:
146
+ """Extract username from alert data."""
147
+ # wazeData format: "world,lon,lat,uuid" or sometimes contains username
148
+ waze_data = alert.get("wazeData", "")
149
+ if waze_data:
150
+ parts = waze_data.split(",")
151
+ if len(parts) >= 1:
152
+ # First part is often the username prefix (e.g., "world")
153
+ # or could be an actual username
154
+ return parts[0] if parts[0] != "world" else f"world_{parts[-1][:8]}"
155
+
156
+ # Fallback: use uuid as identifier
157
+ uuid = alert.get("uuid", "")
158
+ if uuid:
159
+ return f"user_{uuid[:8]}"
160
+
161
+ return "anonymous"
162
+
163
+ def get_users(
164
+ self,
165
+ lat_top: float,
166
+ lat_bottom: float,
167
+ lon_left: float,
168
+ lon_right: float
169
+ ) -> List[Dict[str, Any]]:
170
+ """Get active Waze users in a bounding box."""
171
+ self.rate_limiter.wait()
172
+
173
+ try:
174
+ response = self.session.get(
175
+ self.WAZE_API_URL,
176
+ params={
177
+ "top": str(lat_top),
178
+ "bottom": str(lat_bottom),
179
+ "left": str(lon_left),
180
+ "right": str(lon_right),
181
+ "env": "row",
182
+ "types": "users"
183
+ },
184
+ timeout=self.timeout
185
+ )
186
+ response.raise_for_status()
187
+ self.rate_limiter.success()
188
+ data = response.json()
189
+
190
+ users = []
191
+ for user in data.get("users", []):
192
+ loc = user.get("location", {})
193
+ users.append({
194
+ **user,
195
+ "latitude": loc.get("y"),
196
+ "longitude": loc.get("x"),
197
+ })
198
+ return users
199
+ except requests.RequestException:
200
+ self.rate_limiter.error()
201
+ raise
202
+
203
+ def health_check(self) -> bool:
204
+ """Check if the Waze API is responding."""
205
+ try:
206
+ self.rate_limiter.wait()
207
+ response = self.session.get(
208
+ self.WAZE_API_URL,
209
+ params={
210
+ "top": "40.43",
211
+ "bottom": "40.42",
212
+ "left": "-3.71",
213
+ "right": "-3.70",
214
+ "env": "row",
215
+ "types": "alerts"
216
+ },
217
+ timeout=5
218
+ )
219
+ if response.status_code == 200:
220
+ self.rate_limiter.success()
221
+ return True
222
+ self.rate_limiter.error()
223
+ return False
224
+ except requests.RequestException:
225
+ self.rate_limiter.error()
226
+ return False
227
+
228
+ def get_rate_limit_status(self) -> dict:
229
+ """Get current rate limiter status."""
230
+ return {
231
+ "current_delay": self.rate_limiter.current_delay,
232
+ "consecutive_errors": self.rate_limiter.consecutive_errors,
233
+ "last_request": self.rate_limiter.last_request_time
234
+ }