waze-logs 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
collector.py ADDED
@@ -0,0 +1,193 @@
1
+ # collector.py
2
+ import hashlib
3
+ import json
4
+ import time
5
+ import os
6
+ import signal
7
+ import sys
8
+ import yaml
9
+ from datetime import datetime, timezone
10
+ from typing import Dict, Any, Optional, List
11
+
12
+ def generate_event_hash(
13
+ username: str,
14
+ latitude: float,
15
+ longitude: float,
16
+ timestamp_ms: int,
17
+ report_type: str
18
+ ) -> str:
19
+ """Generate unique hash for event deduplication."""
20
+ # Round timestamp to minute for dedup (same event reported twice in same minute)
21
+ timestamp_minute = timestamp_ms // 60000
22
+ # Round coordinates to 4 decimal places (~11m precision)
23
+ lat_rounded = round(latitude, 4)
24
+ lon_rounded = round(longitude, 4)
25
+
26
+ data = f"{username}|{lat_rounded}|{lon_rounded}|{timestamp_minute}|{report_type}"
27
+ return hashlib.sha256(data.encode()).hexdigest()[:16]
28
+
29
+ def process_alert(alert: Dict[str, Any], grid_cell: str) -> Dict[str, Any]:
30
+ """Process raw Waze alert into event record."""
31
+ username = alert.get("reportBy", "anonymous")
32
+ latitude = alert.get("latitude", 0.0)
33
+ longitude = alert.get("longitude", 0.0)
34
+ timestamp_ms = alert.get("pubMillis", int(time.time() * 1000))
35
+ report_type = alert.get("type", "UNKNOWN")
36
+ subtype = alert.get("subtype")
37
+
38
+ timestamp_utc = datetime.fromtimestamp(
39
+ timestamp_ms / 1000, tz=timezone.utc
40
+ ).isoformat()
41
+
42
+ event_hash = generate_event_hash(
43
+ username=username,
44
+ latitude=latitude,
45
+ longitude=longitude,
46
+ timestamp_ms=timestamp_ms,
47
+ report_type=report_type
48
+ )
49
+
50
+ return {
51
+ "event_hash": event_hash,
52
+ "username": username,
53
+ "latitude": latitude,
54
+ "longitude": longitude,
55
+ "timestamp_utc": timestamp_utc,
56
+ "timestamp_ms": timestamp_ms,
57
+ "report_type": report_type,
58
+ "subtype": subtype,
59
+ "raw_json": json.dumps(alert),
60
+ "collected_at": datetime.now(timezone.utc).isoformat(),
61
+ "grid_cell": grid_cell
62
+ }
63
+
64
+
65
+ class Collector:
66
+ def __init__(self, config_path: str = "config.yaml"):
67
+ self.config_path = config_path
68
+ self.config = self._load_config()
69
+ self.running = False
70
+ self.pid_file = "collector.pid"
71
+
72
+ def _load_config(self) -> Dict[str, Any]:
73
+ with open(self.config_path) as f:
74
+ return yaml.safe_load(f)
75
+
76
+ def _save_pid(self):
77
+ with open(self.pid_file, "w") as f:
78
+ f.write(str(os.getpid()))
79
+
80
+ def _remove_pid(self):
81
+ if os.path.exists(self.pid_file):
82
+ os.remove(self.pid_file)
83
+
84
+ @staticmethod
85
+ def get_pid() -> Optional[int]:
86
+ """Get PID of running collector, or None if not running."""
87
+ if os.path.exists("collector.pid"):
88
+ with open("collector.pid") as f:
89
+ pid = int(f.read().strip())
90
+ # Check if process is actually running
91
+ try:
92
+ os.kill(pid, 0)
93
+ return pid
94
+ except OSError:
95
+ return None
96
+ return None
97
+
98
+ def run(self):
99
+ """Main collection loop."""
100
+ from database import Database
101
+ from waze_client import WazeClient
102
+ from grid import load_grid_cells
103
+
104
+ db = Database(self.config["database_path"])
105
+ client = WazeClient(self.config["waze_server_url"])
106
+ cells = load_grid_cells(self.config)
107
+ interval = self.config.get("polling_interval_seconds", 300)
108
+
109
+ self.running = True
110
+ self._save_pid()
111
+
112
+ def handle_signal(signum, frame):
113
+ print("\nShutting down collector...")
114
+ self.running = False
115
+
116
+ signal.signal(signal.SIGINT, handle_signal)
117
+ signal.signal(signal.SIGTERM, handle_signal)
118
+
119
+ print(f"Collector started. Polling every {interval} seconds.")
120
+ print(f"Grid cells: {[c.name for c in cells]}")
121
+ print(f"Rate limiting: enabled (1.5s min delay, exponential backoff)")
122
+
123
+ try:
124
+ while self.running:
125
+ # Daily stats tracking
126
+ today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
127
+ cycle_events = 0
128
+ cycle_errors = 0
129
+ cycle_requests = 0
130
+ type_counts = {}
131
+
132
+ for cell in cells:
133
+ if not self.running:
134
+ break
135
+
136
+ try:
137
+ cycle_requests += 1
138
+ alerts, jams = client.get_traffic_notifications(**cell.to_params())
139
+ new_count = 0
140
+
141
+ for alert in alerts:
142
+ event = process_alert(alert, cell.name)
143
+ if db.insert_event(event):
144
+ new_count += 1
145
+ cycle_events += 1
146
+ # Track the user
147
+ db.upsert_tracked_user(
148
+ event["username"],
149
+ event["timestamp_utc"]
150
+ )
151
+ # Count by type
152
+ t = event["report_type"]
153
+ type_counts[t] = type_counts.get(t, 0) + 1
154
+
155
+ # Show rate limiter status if backing off
156
+ rate_status = client.get_rate_limit_status()
157
+ delay_info = ""
158
+ if rate_status["current_delay"] > 2:
159
+ delay_info = f" [delay: {rate_status['current_delay']:.1f}s]"
160
+
161
+ print(f"[{datetime.now().strftime('%H:%M:%S')}] {cell.name}: "
162
+ f"{len(alerts)} alerts, {new_count} new{delay_info}")
163
+
164
+ except Exception as e:
165
+ cycle_errors += 1
166
+ print(f"[{datetime.now().strftime('%H:%M:%S')}] Error {cell.name}: {e}")
167
+
168
+ # Update daily stats after each full cycle
169
+ unique_users = db.execute(
170
+ "SELECT COUNT(DISTINCT username) FROM events WHERE DATE(timestamp_utc) = ?",
171
+ (today,)
172
+ ).fetchone()[0]
173
+
174
+ db.update_daily_stats(
175
+ date=today,
176
+ events=cycle_events,
177
+ users=unique_users,
178
+ requests=cycle_requests,
179
+ errors=cycle_errors,
180
+ cells=len(cells),
181
+ by_type=type_counts if type_counts else None
182
+ )
183
+
184
+ if cycle_events > 0:
185
+ print(f"[{datetime.now().strftime('%H:%M:%S')}] Cycle complete: "
186
+ f"+{cycle_events} events, {cycle_errors} errors")
187
+
188
+ if self.running:
189
+ time.sleep(interval)
190
+ finally:
191
+ self._remove_pid()
192
+ db.close()
193
+ print("Collector stopped.")
collector_europe.py ADDED
@@ -0,0 +1,312 @@
1
+ # collector_europe.py
2
+ """Autonomous Europe-wide Waze data collector with priority-based scanning."""
3
+
4
+ import hashlib
5
+ import json
6
+ import time
7
+ import os
8
+ import signal
9
+ import sys
10
+ import yaml
11
+ import logging
12
+ from datetime import datetime, timezone
13
+ from typing import Dict, Any, Optional, List
14
+ from pathlib import Path
15
+
16
+ # Set up logging
17
+ logging.basicConfig(
18
+ level=logging.INFO,
19
+ format='%(asctime)s [%(levelname)s] %(message)s',
20
+ datefmt='%Y-%m-%d %H:%M:%S'
21
+ )
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ def generate_event_hash(
26
+ username: str,
27
+ latitude: float,
28
+ longitude: float,
29
+ timestamp_ms: int,
30
+ report_type: str
31
+ ) -> str:
32
+ """Generate unique hash for event deduplication."""
33
+ timestamp_minute = timestamp_ms // 60000
34
+ lat_rounded = round(latitude, 4)
35
+ lon_rounded = round(longitude, 4)
36
+ data = f"{username}|{lat_rounded}|{lon_rounded}|{timestamp_minute}|{report_type}"
37
+ return hashlib.sha256(data.encode()).hexdigest()[:16]
38
+
39
+
40
+ def process_alert(alert: Dict[str, Any], grid_cell: str) -> Dict[str, Any]:
41
+ """Process raw Waze alert into event record."""
42
+ username = alert.get("reportBy", "anonymous")
43
+ latitude = alert.get("latitude", 0.0)
44
+ longitude = alert.get("longitude", 0.0)
45
+ timestamp_ms = alert.get("pubMillis", int(time.time() * 1000))
46
+ report_type = alert.get("type", "UNKNOWN")
47
+ subtype = alert.get("subtype")
48
+
49
+ timestamp_utc = datetime.fromtimestamp(
50
+ timestamp_ms / 1000, tz=timezone.utc
51
+ ).isoformat()
52
+
53
+ event_hash = generate_event_hash(
54
+ username=username,
55
+ latitude=latitude,
56
+ longitude=longitude,
57
+ timestamp_ms=timestamp_ms,
58
+ report_type=report_type
59
+ )
60
+
61
+ return {
62
+ "event_hash": event_hash,
63
+ "username": username,
64
+ "latitude": latitude,
65
+ "longitude": longitude,
66
+ "timestamp_utc": timestamp_utc,
67
+ "timestamp_ms": timestamp_ms,
68
+ "report_type": report_type,
69
+ "subtype": subtype,
70
+ "raw_json": json.dumps(alert),
71
+ "collected_at": datetime.now(timezone.utc).isoformat(),
72
+ "grid_cell": grid_cell
73
+ }
74
+
75
+
76
+ class EuropeCollector:
77
+ """Autonomous collector for Europe-wide Waze data."""
78
+
79
+ def __init__(self, config_path: str = "config_europe.yaml"):
80
+ self.config_path = config_path
81
+ self.config = self._load_config()
82
+ self.running = False
83
+ self.pid_file = "collector_europe.pid"
84
+ self.stats = {
85
+ "total_requests": 0,
86
+ "total_errors": 0,
87
+ "total_events": 0,
88
+ "last_full_scan": None,
89
+ "current_cycle": 0
90
+ }
91
+
92
+ def _load_config(self) -> Dict[str, Any]:
93
+ if not os.path.exists(self.config_path):
94
+ logger.info(f"Config not found, generating Europe grid...")
95
+ from europe_grid import save_europe_config
96
+ save_europe_config(self.config_path)
97
+
98
+ with open(self.config_path) as f:
99
+ return yaml.safe_load(f)
100
+
101
+ def _save_pid(self):
102
+ with open(self.pid_file, "w") as f:
103
+ f.write(str(os.getpid()))
104
+
105
+ def _remove_pid(self):
106
+ if os.path.exists(self.pid_file):
107
+ os.remove(self.pid_file)
108
+
109
+ @staticmethod
110
+ def get_pid() -> Optional[int]:
111
+ """Get PID of running collector, or None if not running."""
112
+ if os.path.exists("collector_europe.pid"):
113
+ with open("collector_europe.pid") as f:
114
+ pid = int(f.read().strip())
115
+ try:
116
+ os.kill(pid, 0)
117
+ return pid
118
+ except OSError:
119
+ return None
120
+ return None
121
+
122
+ def _load_cells_by_priority(self) -> Dict[int, List[Dict]]:
123
+ """Load grid cells grouped by priority."""
124
+ cells = self.config.get("grid_cells", [])
125
+ by_priority = {}
126
+ for cell in cells:
127
+ priority = cell.get("priority", 2)
128
+ if priority not in by_priority:
129
+ by_priority[priority] = []
130
+ by_priority[priority].append(cell)
131
+ return by_priority
132
+
133
+ def _scan_cells(self, cells: List[Dict], db, client) -> Dict[str, int]:
134
+ """Scan a list of grid cells and return stats."""
135
+ stats = {"requests": 0, "errors": 0, "events": 0}
136
+
137
+ for cell in cells:
138
+ if not self.running:
139
+ break
140
+
141
+ try:
142
+ stats["requests"] += 1
143
+ self.stats["total_requests"] += 1
144
+
145
+ alerts, jams = client.get_traffic_notifications(
146
+ lat_top=cell["lat_top"],
147
+ lat_bottom=cell["lat_bottom"],
148
+ lon_left=cell["lon_left"],
149
+ lon_right=cell["lon_right"]
150
+ )
151
+
152
+ new_count = 0
153
+ for alert in alerts:
154
+ event = process_alert(alert, cell["name"])
155
+ if db.insert_event(event):
156
+ new_count += 1
157
+ db.upsert_tracked_user(
158
+ event["username"],
159
+ event["timestamp_utc"]
160
+ )
161
+
162
+ stats["events"] += new_count
163
+ self.stats["total_events"] += new_count
164
+
165
+ # Log progress periodically
166
+ rate_status = client.get_rate_limit_status()
167
+ if new_count > 0 or rate_status["current_delay"] > 2:
168
+ delay_info = f" [delay:{rate_status['current_delay']:.1f}s]" if rate_status["current_delay"] > 2 else ""
169
+ logger.info(f"{cell['name']}: {len(alerts)} alerts, +{new_count} new{delay_info}")
170
+
171
+ except Exception as e:
172
+ stats["errors"] += 1
173
+ self.stats["total_errors"] += 1
174
+ logger.error(f"Error scanning {cell['name']}: {e}")
175
+ # Continue to next cell on error
176
+
177
+ return stats
178
+
179
+ def run(self):
180
+ """Main autonomous collection loop."""
181
+ from database import Database
182
+ from waze_client import WazeClient
183
+
184
+ # Ensure database directory exists
185
+ db_path = self.config["database_path"]
186
+ Path(db_path).parent.mkdir(parents=True, exist_ok=True)
187
+
188
+ db = Database(db_path)
189
+ client = WazeClient(self.config.get("waze_server_url"))
190
+
191
+ # Load cells by priority
192
+ cells_by_priority = self._load_cells_by_priority()
193
+ priorities = sorted(cells_by_priority.keys())
194
+
195
+ total_cells = sum(len(c) for c in cells_by_priority.values())
196
+ priority_1_count = len(cells_by_priority.get(1, []))
197
+ priority_3_count = len(cells_by_priority.get(3, []))
198
+
199
+ self.running = True
200
+ self._save_pid()
201
+
202
+ def handle_signal(signum, frame):
203
+ logger.info("Shutdown signal received...")
204
+ self.running = False
205
+
206
+ signal.signal(signal.SIGINT, handle_signal)
207
+ signal.signal(signal.SIGTERM, handle_signal)
208
+
209
+ logger.info("=" * 60)
210
+ logger.info("Europe Waze Collector starting...")
211
+ logger.info(f"Database: {db_path}")
212
+ logger.info(f"Total grid cells: {total_cells}")
213
+ logger.info(f" Priority 1 (cities): {priority_1_count}")
214
+ logger.info(f" Priority 3 (coverage): {priority_3_count}")
215
+ logger.info("Collection strategy:")
216
+ logger.info(" - Priority 1 cells: every cycle")
217
+ logger.info(" - Priority 3 cells: every 5th cycle")
218
+ logger.info("=" * 60)
219
+
220
+ try:
221
+ while self.running:
222
+ self.stats["current_cycle"] += 1
223
+ cycle = self.stats["current_cycle"]
224
+ today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
225
+
226
+ logger.info(f"--- Cycle {cycle} started ---")
227
+ cycle_stats = {"requests": 0, "errors": 0, "events": 0}
228
+
229
+ # Always scan high-priority cells (cities)
230
+ if 1 in cells_by_priority:
231
+ logger.info(f"Scanning {len(cells_by_priority[1])} priority-1 cells (cities)...")
232
+ stats = self._scan_cells(cells_by_priority[1], db, client)
233
+ for k in cycle_stats:
234
+ cycle_stats[k] += stats[k]
235
+
236
+ # Scan medium-priority cells every 3rd cycle
237
+ if 2 in cells_by_priority and cycle % 3 == 0:
238
+ logger.info(f"Scanning {len(cells_by_priority[2])} priority-2 cells...")
239
+ stats = self._scan_cells(cells_by_priority[2], db, client)
240
+ for k in cycle_stats:
241
+ cycle_stats[k] += stats[k]
242
+
243
+ # Scan low-priority cells (coverage) every 5th cycle
244
+ if 3 in cells_by_priority and cycle % 5 == 0:
245
+ logger.info(f"Scanning {len(cells_by_priority[3])} priority-3 cells (coverage)...")
246
+ stats = self._scan_cells(cells_by_priority[3], db, client)
247
+ for k in cycle_stats:
248
+ cycle_stats[k] += stats[k]
249
+ self.stats["last_full_scan"] = datetime.now(timezone.utc).isoformat()
250
+
251
+ # Update daily stats
252
+ unique_users = db.execute(
253
+ "SELECT COUNT(DISTINCT username) FROM events WHERE DATE(timestamp_utc) = ?",
254
+ (today,)
255
+ ).fetchone()[0]
256
+
257
+ db.update_daily_stats(
258
+ date=today,
259
+ events=cycle_stats["events"],
260
+ users=unique_users,
261
+ requests=cycle_stats["requests"],
262
+ errors=cycle_stats["errors"],
263
+ cells=cycle_stats["requests"]
264
+ )
265
+
266
+ # Log cycle summary
267
+ logger.info(
268
+ f"Cycle {cycle} complete: "
269
+ f"+{cycle_stats['events']} events, "
270
+ f"{cycle_stats['requests']} requests, "
271
+ f"{cycle_stats['errors']} errors"
272
+ )
273
+ logger.info(
274
+ f"Totals: {self.stats['total_events']} events, "
275
+ f"{self.stats['total_requests']} requests"
276
+ )
277
+
278
+ # Wait between cycles
279
+ if self.running:
280
+ interval = self.config.get("polling_interval_seconds", 60)
281
+ logger.info(f"Waiting {interval}s until next cycle...")
282
+ time.sleep(interval)
283
+
284
+ except Exception as e:
285
+ logger.error(f"Fatal error: {e}")
286
+ raise
287
+ finally:
288
+ self._remove_pid()
289
+ db.close()
290
+ logger.info("Europe collector stopped.")
291
+
292
+
293
+ def main():
294
+ """Entry point for Europe collector."""
295
+ import argparse
296
+
297
+ parser = argparse.ArgumentParser(description="Europe Waze Data Collector")
298
+ parser.add_argument("--config", default="config_europe.yaml", help="Config file path")
299
+ parser.add_argument("--generate-config", action="store_true", help="Generate config and exit")
300
+ args = parser.parse_args()
301
+
302
+ if args.generate_config:
303
+ from europe_grid import save_europe_config
304
+ save_europe_config(args.config)
305
+ return
306
+
307
+ collector = EuropeCollector(args.config)
308
+ collector.run()
309
+
310
+
311
+ if __name__ == "__main__":
312
+ main()