waze-logs 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cli.py ADDED
@@ -0,0 +1,1219 @@
1
+ # cli.py
2
+ import os
3
+ import sys
4
+ import signal
5
+ import click
6
+ import yaml
7
+ import json
8
+ import time
9
+ import threading
10
+ import logging
11
+ from pathlib import Path
12
+ from tabulate import tabulate
13
+ from datetime import datetime, timedelta, timezone
14
+ from concurrent.futures import ThreadPoolExecutor, as_completed
15
+
16
+ @click.group()
17
+ def cli():
18
+ """Waze Worldwide Logger - Global traffic event collection and analysis."""
19
+ pass
20
+
21
+ def load_config():
22
+ with open("config.yaml") as f:
23
+ return yaml.safe_load(f)
24
+
25
+ def get_db(region=None):
26
+ """Get database connection for a specific region or default Madrid."""
27
+ from database import Database
28
+ if region:
29
+ db_path = f"./data/waze_{region}.db"
30
+ if os.path.exists(db_path):
31
+ return Database(db_path)
32
+ config = load_config()
33
+ return Database(config["database_path"])
34
+
35
+ def get_all_dbs():
36
+ """Get connections to all existing regional databases."""
37
+ from database import Database
38
+ DB_PATHS = {
39
+ "madrid": "./data/waze_madrid.db",
40
+ "europe": "./data/waze_europe.db",
41
+ "americas": "./data/waze_americas.db",
42
+ "asia": "./data/waze_asia.db",
43
+ "oceania": "./data/waze_oceania.db",
44
+ "africa": "./data/waze_africa.db",
45
+ }
46
+ dbs = []
47
+ for region, path in DB_PATHS.items():
48
+ if os.path.exists(path):
49
+ try:
50
+ dbs.append((region, Database(path)))
51
+ except Exception:
52
+ pass
53
+ return dbs
54
+
55
+ # === Worldwide Collection System ===
56
+
57
+ # Status/checkpoint file paths
58
+ STATUS_FILE = "./data/collector_status.json"
59
+ CHECKPOINT_FILE = "./data/collector_checkpoint.json"
60
+ PID_FILE = "./collector_cli.pid"
61
+
62
+ status_lock = threading.Lock()
63
+ checkpoint_lock = threading.Lock()
64
+
65
+
66
+ def write_status(region: str, cell_name: str, country: str, cell_idx: int, total_cells: int,
67
+ alerts_count: int, new_count: int, event_types: list = None):
68
+ """Write current collector status to file for UI consumption (thread-safe)."""
69
+ try:
70
+ status = {
71
+ "timestamp": datetime.now(timezone.utc).isoformat(),
72
+ "region": region,
73
+ "cell_name": cell_name,
74
+ "country": country,
75
+ "cell_idx": cell_idx,
76
+ "total_cells": total_cells,
77
+ "alerts_found": alerts_count,
78
+ "new_events": new_count,
79
+ "event_types": event_types or [],
80
+ "status": "scanning"
81
+ }
82
+ with status_lock:
83
+ with open(STATUS_FILE, "w") as f:
84
+ json.dump(status, f)
85
+ except Exception:
86
+ pass
87
+
88
+
89
+ def load_checkpoint():
90
+ """Load checkpoint from file."""
91
+ try:
92
+ if os.path.exists(CHECKPOINT_FILE):
93
+ with open(CHECKPOINT_FILE, "r") as f:
94
+ return json.load(f)
95
+ except Exception:
96
+ pass
97
+ return {"cycle": 0, "scanned": {}}
98
+
99
+
100
+ def save_checkpoint(cycle: int, scanned: dict):
101
+ """Save checkpoint to file (thread-safe)."""
102
+ try:
103
+ checkpoint = {
104
+ "cycle": cycle,
105
+ "scanned": scanned,
106
+ "timestamp": datetime.now(timezone.utc).isoformat()
107
+ }
108
+ with checkpoint_lock:
109
+ with open(CHECKPOINT_FILE, "w") as f:
110
+ json.dump(checkpoint, f)
111
+ except Exception:
112
+ pass
113
+
114
+
115
+ def clear_checkpoint():
116
+ """Clear checkpoint file when cycle completes."""
117
+ try:
118
+ if os.path.exists(CHECKPOINT_FILE):
119
+ os.remove(CHECKPOINT_FILE)
120
+ except Exception:
121
+ pass
122
+
123
+
124
+ def generate_event_hash(username: str, latitude: float, longitude: float,
125
+ timestamp_ms: int, report_type: str) -> str:
126
+ """Generate unique hash for event deduplication."""
127
+ import hashlib
128
+ timestamp_minute = timestamp_ms // 60000
129
+ data = f"{username}|{round(latitude, 4)}|{round(longitude, 4)}|{timestamp_minute}|{report_type}"
130
+ return hashlib.sha256(data.encode()).hexdigest()[:16]
131
+
132
+
133
+ def process_alert(alert: dict, grid_cell: str) -> dict:
134
+ """Process a Waze alert into event format."""
135
+ username = alert.get("reportBy", "anonymous")
136
+ latitude = alert.get("latitude", 0.0)
137
+ longitude = alert.get("longitude", 0.0)
138
+ timestamp_ms = alert.get("pubMillis", int(time.time() * 1000))
139
+ report_type = alert.get("type", "UNKNOWN")
140
+ subtype = alert.get("subtype")
141
+
142
+ timestamp_utc = datetime.fromtimestamp(
143
+ timestamp_ms / 1000, tz=timezone.utc
144
+ ).isoformat()
145
+
146
+ return {
147
+ "event_hash": generate_event_hash(username, latitude, longitude, timestamp_ms, report_type),
148
+ "username": username,
149
+ "latitude": latitude,
150
+ "longitude": longitude,
151
+ "timestamp_utc": timestamp_utc,
152
+ "timestamp_ms": timestamp_ms,
153
+ "report_type": report_type,
154
+ "subtype": subtype,
155
+ "raw_json": json.dumps(alert),
156
+ "collected_at": datetime.now(timezone.utc).isoformat(),
157
+ "grid_cell": grid_cell
158
+ }
159
+
160
+
161
+ class RegionScanner:
162
+ """Scanner for a specific region."""
163
+
164
+ def __init__(self, name: str, config_path: str, db, client, logger):
165
+ self.name = name
166
+ self.config_path = config_path
167
+ self.db = db
168
+ self.client = client
169
+ self.logger = logger
170
+ self.cells_by_priority = {}
171
+ self._load_cells()
172
+
173
+ def _load_cells(self):
174
+ with open(self.config_path) as f:
175
+ config = yaml.safe_load(f)
176
+
177
+ for cell in config.get("grid_cells", []):
178
+ p = cell.get("priority", 2)
179
+ if p not in self.cells_by_priority:
180
+ self.cells_by_priority[p] = []
181
+ self.cells_by_priority[p].append(cell)
182
+
183
+ def get_cell_counts(self) -> dict:
184
+ return {p: len(cells) for p, cells in self.cells_by_priority.items()}
185
+
186
+ def scan(self, priority: int, running_flag, already_scanned: set = None,
187
+ on_cell_scanned: callable = None) -> dict:
188
+ """Scan cells of given priority, skipping already-scanned cells."""
189
+ cells = self.cells_by_priority.get(priority, [])
190
+ stats = {"requests": 0, "errors": 0, "events": 0, "cells": len(cells), "scanned_cells": []}
191
+ total_cells = len(cells)
192
+ already_scanned = already_scanned or set()
193
+
194
+ # Filter out already-scanned cells
195
+ remaining_cells = [(idx, cell) for idx, cell in enumerate(cells, 1)
196
+ if cell["name"] not in already_scanned]
197
+
198
+ if len(remaining_cells) < len(cells):
199
+ skipped = len(cells) - len(remaining_cells)
200
+ self.logger.info(f"Resuming: skipping {skipped} already-scanned cells, {len(remaining_cells)} remaining")
201
+
202
+ for idx, cell in remaining_cells:
203
+ if not running_flag():
204
+ break
205
+
206
+ try:
207
+ stats["requests"] += 1
208
+ cell_name = cell["name"]
209
+ country = cell.get("country", "??")
210
+
211
+ alerts, _ = self.client.get_traffic_notifications(
212
+ lat_top=cell["lat_top"],
213
+ lat_bottom=cell["lat_bottom"],
214
+ lon_left=cell["lon_left"],
215
+ lon_right=cell["lon_right"]
216
+ )
217
+
218
+ new_count = 0
219
+ new_types = []
220
+ for alert in alerts:
221
+ event = process_alert(alert, cell_name)
222
+ if self.db.insert_event(event):
223
+ new_count += 1
224
+ new_types.append(event["report_type"])
225
+ self.db.upsert_tracked_user(event["username"], event["timestamp_utc"])
226
+
227
+ stats["events"] += new_count
228
+ stats["scanned_cells"].append(cell_name)
229
+
230
+ if on_cell_scanned:
231
+ on_cell_scanned(cell_name)
232
+
233
+ # Only log when there are alerts or new events
234
+ if len(alerts) > 0 or new_count > 0:
235
+ type_summary = ""
236
+ if new_types:
237
+ from collections import Counter
238
+ counts = Counter(new_types)
239
+ type_summary = " | " + ", ".join(f"{t}:{c}" for t, c in counts.most_common(3))
240
+
241
+ status = f"+{new_count}" if new_count > 0 else "0"
242
+ self.logger.info(f"[{idx:3}/{total_cells}] {cell_name:25} ({country}) -> {len(alerts):3} alerts, {status} new{type_summary}")
243
+
244
+ write_status(
245
+ region=self.name,
246
+ cell_name=cell_name,
247
+ country=country,
248
+ cell_idx=idx,
249
+ total_cells=total_cells,
250
+ alerts_count=len(alerts),
251
+ new_count=new_count,
252
+ event_types=new_types
253
+ )
254
+
255
+ except Exception as e:
256
+ stats["errors"] += 1
257
+ stats["scanned_cells"].append(cell["name"])
258
+ self.logger.error(f"[{idx:3}/{total_cells}] {cell['name']:25} -> ERROR: {e}")
259
+
260
+ return stats
261
+
262
+
263
+ class CLIWorldwideCollector:
264
+ """Multi-threaded worldwide Waze data collector for CLI."""
265
+
266
+ REGIONS = [
267
+ ("europe", "config_europe.yaml", "./data/waze_europe.db"),
268
+ ("americas", "config_americas.yaml", "./data/waze_americas.db"),
269
+ ("asia", "config_asia.yaml", "./data/waze_asia.db"),
270
+ ("oceania", "config_oceania.yaml", "./data/waze_oceania.db"),
271
+ ("africa", "config_africa.yaml", "./data/waze_africa.db"),
272
+ ]
273
+
274
+ def __init__(self, web_port=None, regions=None):
275
+ self.running = False
276
+ self.web_port = web_port
277
+ self.selected_regions = regions # None = all regions
278
+ self.scanners = {}
279
+ self.databases = {}
280
+ self.clients = {}
281
+ self.logger = None
282
+
283
+ def _setup_logging(self):
284
+ """Set up logging for the collector."""
285
+ Path("logs").mkdir(exist_ok=True)
286
+ logging.basicConfig(
287
+ level=logging.INFO,
288
+ format='%(asctime)s [%(levelname)s] %(message)s',
289
+ datefmt='%Y-%m-%d %H:%M:%S',
290
+ handlers=[
291
+ logging.StreamHandler(),
292
+ logging.FileHandler('logs/cli_collector.log')
293
+ ]
294
+ )
295
+ self.logger = logging.getLogger("cli_collector")
296
+
297
+ def _generate_configs(self):
298
+ """Generate regional configs if they don't exist."""
299
+ config_generators = [
300
+ ("europe", "europe_grid", "save_europe_config"),
301
+ ("americas", "americas_grid", "save_americas_config"),
302
+ ("asia", "asia_grid", "save_asia_config"),
303
+ ("oceania", "oceania_grid", "save_oceania_config"),
304
+ ("africa", "africa_grid", "save_africa_config"),
305
+ ]
306
+
307
+ for region_name, module_name, func_name in config_generators:
308
+ config_file = f"config_{region_name}.yaml"
309
+ if not os.path.exists(config_file):
310
+ self.logger.info(f"Generating {config_file}...")
311
+ try:
312
+ module = __import__(module_name)
313
+ getattr(module, func_name)()
314
+ except ImportError:
315
+ self.logger.warning(f"Could not import {module_name}, skipping {region_name}")
316
+
317
+ def _save_pid(self):
318
+ with open(PID_FILE, "w") as f:
319
+ f.write(str(os.getpid()))
320
+
321
+ def _remove_pid(self):
322
+ if os.path.exists(PID_FILE):
323
+ os.remove(PID_FILE)
324
+
325
+ @staticmethod
326
+ def get_pid():
327
+ if os.path.exists(PID_FILE):
328
+ try:
329
+ with open(PID_FILE) as f:
330
+ pid = int(f.read().strip())
331
+ os.kill(pid, 0)
332
+ return pid
333
+ except (OSError, ValueError):
334
+ return None
335
+ return None
336
+
337
+ def _start_web_server(self):
338
+ """Start Flask web server in a background thread."""
339
+ def run_flask():
340
+ # Suppress Flask's default logging
341
+ import logging as flask_logging
342
+ flask_log = flask_logging.getLogger('werkzeug')
343
+ flask_log.setLevel(flask_logging.WARNING)
344
+
345
+ # Add project root to path for web module import
346
+ project_root = os.path.dirname(os.path.abspath(__file__))
347
+ if project_root not in sys.path:
348
+ sys.path.insert(0, project_root)
349
+ from web.app import app
350
+ app.run(host="0.0.0.0", port=self.web_port, debug=False, threaded=True, use_reloader=False)
351
+
352
+ web_thread = threading.Thread(target=run_flask, daemon=True)
353
+ web_thread.start()
354
+ self.logger.info(f"Web UI started at http://localhost:{self.web_port}")
355
+ return web_thread
356
+
357
+ def run(self):
358
+ """Main worldwide collection loop."""
359
+ from database import Database
360
+ from waze_client import WazeClient
361
+
362
+ self._setup_logging()
363
+
364
+ # Create directories
365
+ Path("data").mkdir(exist_ok=True)
366
+ Path("logs").mkdir(exist_ok=True)
367
+
368
+ # Generate configs
369
+ self._generate_configs()
370
+
371
+ # Start web server if requested
372
+ if self.web_port:
373
+ self._start_web_server()
374
+
375
+ # Filter regions if specified
376
+ regions_to_scan = self.REGIONS
377
+ if self.selected_regions:
378
+ regions_to_scan = [r for r in self.REGIONS if r[0] in self.selected_regions]
379
+
380
+ # Initialize scanners
381
+ self.logger.info("=" * 70)
382
+ self.logger.info("WAZE WORLDWIDE COLLECTOR (CLI)")
383
+ self.logger.info("=" * 70)
384
+
385
+ total_p1 = 0
386
+ total_p3 = 0
387
+
388
+ for region_name, config_path, db_path in regions_to_scan:
389
+ if not os.path.exists(config_path):
390
+ self.logger.warning(f"Config not found: {config_path}, skipping {region_name}")
391
+ continue
392
+
393
+ db = Database(db_path, check_same_thread=False)
394
+ client = WazeClient()
395
+
396
+ scanner = RegionScanner(region_name, config_path, db, client, self.logger)
397
+ self.scanners[region_name] = scanner
398
+ self.databases[region_name] = db
399
+ self.clients[region_name] = client
400
+
401
+ counts = scanner.get_cell_counts()
402
+ p1 = counts.get(1, 0)
403
+ p3 = counts.get(3, 0)
404
+ total_p1 += p1
405
+ total_p3 += p3
406
+
407
+ self.logger.info(f" {region_name.upper():10} - P1 (cities): {p1:4}, P3 (coverage): {p3:4}")
408
+
409
+ self.logger.info("-" * 70)
410
+ self.logger.info(f" {'TOTAL':10} - P1 (cities): {total_p1:4}, P3 (coverage): {total_p3:4}")
411
+ self.logger.info(f" {'':10} Grand total: {total_p1 + total_p3} grid cells")
412
+ self.logger.info("=" * 70)
413
+ self.logger.info("Collection strategy (MULTITHREADED):")
414
+ self.logger.info(" - All regions scanned in PARALLEL for P1 (city) scans")
415
+ self.logger.info(" - Full P3 (coverage) scan every 10 cycles (parallel)")
416
+ self.logger.info(" - 10 second pause between cycles")
417
+ if self.web_port:
418
+ self.logger.info(f" - Web UI at http://localhost:{self.web_port}")
419
+ self.logger.info("=" * 70)
420
+
421
+ self.running = True
422
+ self._save_pid()
423
+
424
+ def handle_signal(signum, frame):
425
+ self.logger.info("Shutdown signal received...")
426
+ self.running = False
427
+
428
+ signal.signal(signal.SIGINT, handle_signal)
429
+ signal.signal(signal.SIGTERM, handle_signal)
430
+
431
+ region_names = list(self.scanners.keys())
432
+
433
+ # Load checkpoint
434
+ checkpoint = load_checkpoint()
435
+ cycle = checkpoint.get("cycle", 0)
436
+ scanned_cells = checkpoint.get("scanned", {})
437
+
438
+ if cycle > 0:
439
+ self.logger.info(f"Resuming from checkpoint: cycle {cycle}")
440
+
441
+ def scan_region(region_name: str, priority: int, today: str, already_scanned: set,
442
+ checkpoint_key: str) -> dict:
443
+ """Scan a single region (runs in thread)."""
444
+ scanner = self.scanners[region_name]
445
+ db = self.databases[region_name]
446
+
447
+ p_count = scanner.get_cell_counts().get(priority, 0)
448
+ if p_count == 0:
449
+ return {"region": region_name, "events": 0, "errors": 0, "requests": 0, "cells": 0, "scanned_cells": []}
450
+
451
+ def on_cell_scanned(cell_name):
452
+ with checkpoint_lock:
453
+ if checkpoint_key not in scanned_cells:
454
+ scanned_cells[checkpoint_key] = []
455
+ scanned_cells[checkpoint_key].append(cell_name)
456
+ save_checkpoint(cycle, scanned_cells)
457
+
458
+ stats = scanner.scan(priority, lambda: self.running, already_scanned, on_cell_scanned)
459
+
460
+ db.update_daily_stats(
461
+ date=today,
462
+ events=stats["events"],
463
+ requests=stats["requests"],
464
+ errors=stats["errors"],
465
+ cells=stats["cells"]
466
+ )
467
+
468
+ return {"region": region_name, **stats}
469
+
470
+ try:
471
+ while self.running:
472
+ cycle += 1
473
+ today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
474
+
475
+ self.logger.info(f"\n{'='*50}")
476
+ self.logger.info(f"CYCLE {cycle} (PARALLEL MODE)")
477
+ self.logger.info(f"{'='*50}")
478
+
479
+ # Parallel P1 scan
480
+ self.logger.info(f"Starting parallel P1 scan across {len(region_names)} regions...")
481
+ total_events = 0
482
+ total_errors = 0
483
+ cycle_complete = True
484
+
485
+ with ThreadPoolExecutor(max_workers=len(region_names)) as executor:
486
+ futures = {}
487
+ for region in region_names:
488
+ key = f"{region}_p1"
489
+ already_scanned = set(scanned_cells.get(key, []))
490
+ futures[executor.submit(scan_region, region, 1, today, already_scanned, key)] = (region, key)
491
+
492
+ for future in as_completed(futures):
493
+ region, key = futures[future]
494
+ try:
495
+ result = future.result()
496
+ total_events += result["events"]
497
+ total_errors += result["errors"]
498
+
499
+ if result["events"] > 0 or result["errors"] > 0:
500
+ self.logger.info(f" [{region.upper()}] +{result['events']} events, {result['errors']} errors")
501
+ except Exception as e:
502
+ self.logger.error(f" [{region.upper()}] Thread error: {e}")
503
+ cycle_complete = False
504
+
505
+ self.logger.info(f"P1 cycle complete: +{total_events} total events, {total_errors} errors")
506
+
507
+ if cycle_complete:
508
+ for region in region_names:
509
+ scanned_cells.pop(f"{region}_p1", None)
510
+ save_checkpoint(cycle, scanned_cells)
511
+
512
+ # Full coverage scan every 10 cycles
513
+ if cycle % 10 == 0 and self.running:
514
+ self.logger.info("\n--- FULL COVERAGE SCAN (PARALLEL) ---")
515
+ total_p3_events = 0
516
+
517
+ with ThreadPoolExecutor(max_workers=len(region_names)) as executor:
518
+ futures = {}
519
+ for region in region_names:
520
+ key = f"{region}_p3"
521
+ already_scanned = set(scanned_cells.get(key, []))
522
+ futures[executor.submit(scan_region, region, 3, today, already_scanned, key)] = (region, key)
523
+
524
+ for future in as_completed(futures):
525
+ region, key = futures[future]
526
+ try:
527
+ result = future.result()
528
+ total_p3_events += result["events"]
529
+
530
+ if result["events"] > 0:
531
+ self.logger.info(f" [{region.upper()}] +{result['events']} events")
532
+ except Exception as e:
533
+ self.logger.error(f" [{region.upper()}] Thread error: {e}")
534
+
535
+ self.logger.info(f"P3 coverage complete: +{total_p3_events} total events")
536
+
537
+ for region in region_names:
538
+ scanned_cells.pop(f"{region}_p3", None)
539
+ save_checkpoint(cycle, scanned_cells)
540
+
541
+ # Print summary every 5 cycles
542
+ if cycle % 5 == 0:
543
+ self.logger.info("\n--- DATABASE SUMMARY ---")
544
+ for region_name, db in self.databases.items():
545
+ result = db.execute(
546
+ "SELECT COUNT(*) as events, COUNT(DISTINCT username) as users FROM events"
547
+ ).fetchone()
548
+ self.logger.info(f" {region_name.upper():10}: {result[0]:,} events, {result[1]:,} users")
549
+
550
+ if self.running:
551
+ time.sleep(10)
552
+
553
+ except Exception as e:
554
+ self.logger.error(f"Fatal error: {e}", exc_info=True)
555
+ raise
556
+ finally:
557
+ self._remove_pid()
558
+ for db in self.databases.values():
559
+ db.close()
560
+ self.logger.info("Collector stopped.")
561
+
562
+
563
+ # === Collection Commands ===
564
+
565
+ @cli.command()
566
+ @click.option("--web", "-w", is_flag=True, help="Also start the web UI")
567
+ @click.option("--port", "-p", default=5000, help="Web UI port (default: 5000)")
568
+ @click.option("--region", "-r", multiple=True, help="Specific regions to scan (can be repeated)")
569
+ def collect(web, port, region):
570
+ """Start worldwide multi-threaded data collection.
571
+
572
+ This runs the full worldwide collector which scans all continents
573
+ in parallel. Use --web to also start the visualization UI.
574
+
575
+ Examples:
576
+ waze collect # Collect from all regions
577
+ waze collect --web # Collect + web UI on port 5000
578
+ waze collect --web --port 8080 # Web UI on port 8080
579
+ waze collect -r europe -r asia # Only Europe and Asia
580
+ """
581
+ pid = CLIWorldwideCollector.get_pid()
582
+ if pid:
583
+ click.echo(f"Collector already running (PID {pid})")
584
+ return
585
+
586
+ web_port = port if web else None
587
+ selected_regions = list(region) if region else None
588
+
589
+ click.echo("Starting worldwide collector...")
590
+ if web_port:
591
+ click.echo(f"Web UI will be available at http://localhost:{web_port}")
592
+
593
+ collector = CLIWorldwideCollector(web_port=web_port, regions=selected_regions)
594
+ collector.run()
595
+
596
+
597
+ @cli.command()
598
+ @click.option("--europe", is_flag=True, help="Start Europe-wide collector (legacy)")
599
+ def start(europe):
600
+ """Start the collector daemon (legacy - use 'collect' for worldwide)."""
601
+ if europe:
602
+ from collector_europe import EuropeCollector
603
+ pid = EuropeCollector.get_pid()
604
+ if pid:
605
+ click.echo(f"Europe collector already running (PID {pid})")
606
+ return
607
+ click.echo("Starting Europe collector...")
608
+ collector = EuropeCollector()
609
+ collector.run()
610
+ else:
611
+ from collector import Collector
612
+ pid = Collector.get_pid()
613
+ if pid:
614
+ click.echo(f"Collector already running (PID {pid})")
615
+ return
616
+ click.echo("Starting collector...")
617
+ collector = Collector()
618
+ collector.run()
619
+
620
+
621
+ @cli.command()
622
+ @click.option("--europe", is_flag=True, help="Stop Europe-wide collector")
623
+ @click.option("--worldwide", "-w", is_flag=True, help="Stop worldwide collector")
624
+ def stop(europe, worldwide):
625
+ """Stop the collector daemon."""
626
+ if worldwide:
627
+ pid = CLIWorldwideCollector.get_pid()
628
+ if not pid:
629
+ click.echo("Worldwide collector is not running")
630
+ return
631
+ click.echo(f"Stopping worldwide collector (PID {pid})...")
632
+ os.kill(pid, signal.SIGTERM)
633
+ click.echo("Stop signal sent")
634
+ elif europe:
635
+ from collector_europe import EuropeCollector
636
+ pid = EuropeCollector.get_pid()
637
+ if not pid:
638
+ click.echo("Europe collector is not running")
639
+ return
640
+ click.echo(f"Stopping Europe collector (PID {pid})...")
641
+ os.kill(pid, signal.SIGTERM)
642
+ click.echo("Stop signal sent")
643
+ else:
644
+ from collector import Collector
645
+ pid = Collector.get_pid()
646
+ if not pid:
647
+ click.echo("Collector is not running")
648
+ return
649
+ click.echo(f"Stopping collector (PID {pid})...")
650
+ os.kill(pid, signal.SIGTERM)
651
+ click.echo("Stop signal sent")
652
+
653
+ @cli.command()
654
+ @click.option("--all", "-a", "show_all", is_flag=True, help="Show all regional databases")
655
+ def status(show_all):
656
+ """Show collector status and database summary."""
657
+ from collector import Collector
658
+
659
+ # Check all collector types
660
+ worldwide_pid = CLIWorldwideCollector.get_pid()
661
+ madrid_pid = Collector.get_pid()
662
+
663
+ click.echo("=== Collector Status ===")
664
+ click.echo(f"Worldwide: {'Running (PID ' + str(worldwide_pid) + ')' if worldwide_pid else 'Stopped'}")
665
+ click.echo(f"Madrid: {'Running (PID ' + str(madrid_pid) + ')' if madrid_pid else 'Stopped'}")
666
+
667
+ # Check for Europe collector
668
+ try:
669
+ from collector_europe import EuropeCollector
670
+ europe_pid = EuropeCollector.get_pid()
671
+ click.echo(f"Europe: {'Running (PID ' + str(europe_pid) + ')' if europe_pid else 'Stopped'}")
672
+ except ImportError:
673
+ pass
674
+
675
+ click.echo()
676
+
677
+ if show_all:
678
+ # Show all regional databases
679
+ click.echo("=== Regional Database Summary ===")
680
+ total_events = 0
681
+ all_users = set()
682
+ first_event = None
683
+ last_event = None
684
+ all_types = {}
685
+
686
+ dbs = get_all_dbs()
687
+ if not dbs:
688
+ click.echo("No regional databases found")
689
+ return
690
+
691
+ table = []
692
+ for region, db in dbs:
693
+ try:
694
+ row = db.execute("""
695
+ SELECT COUNT(*) as count,
696
+ COUNT(DISTINCT username) as users,
697
+ MIN(timestamp_utc) as first_event,
698
+ MAX(timestamp_utc) as last_event
699
+ FROM events
700
+ """).fetchone()
701
+
702
+ events = row["count"] or 0
703
+ users = row["users"] or 0
704
+ total_events += events
705
+
706
+ # Get unique users
707
+ user_rows = db.execute("SELECT DISTINCT username FROM events").fetchall()
708
+ for u in user_rows:
709
+ all_users.add(u["username"])
710
+
711
+ # Get event types
712
+ type_rows = db.execute("""
713
+ SELECT report_type, COUNT(*) as count FROM events GROUP BY report_type
714
+ """).fetchall()
715
+ for tr in type_rows:
716
+ t = tr["report_type"]
717
+ all_types[t] = all_types.get(t, 0) + tr["count"]
718
+
719
+ if row["first_event"]:
720
+ if first_event is None or row["first_event"] < first_event:
721
+ first_event = row["first_event"]
722
+ if row["last_event"]:
723
+ if last_event is None or row["last_event"] > last_event:
724
+ last_event = row["last_event"]
725
+
726
+ table.append([
727
+ region.upper(),
728
+ f"{events:,}",
729
+ f"{users:,}",
730
+ row["first_event"][:10] if row["first_event"] else "N/A",
731
+ row["last_event"][:10] if row["last_event"] else "N/A"
732
+ ])
733
+
734
+ db.close()
735
+ except Exception as e:
736
+ table.append([region.upper(), "Error", str(e)[:30], "", ""])
737
+
738
+ click.echo(tabulate(table, headers=["Region", "Events", "Users", "First", "Last"]))
739
+
740
+ click.echo(f"\n=== Totals ===")
741
+ click.echo(f"Total events: {total_events:,}")
742
+ click.echo(f"Unique users: {len(all_users):,}")
743
+ if first_event and last_event:
744
+ click.echo(f"Time range: {first_event[:19]} -> {last_event[:19]}")
745
+
746
+ if all_types:
747
+ click.echo("\nBy type (all regions):")
748
+ for t, count in sorted(all_types.items(), key=lambda x: -x[1])[:10]:
749
+ pct = count / total_events * 100 if total_events else 0
750
+ click.echo(f" {t:12} {count:>8,} ({pct:.1f}%)")
751
+ else:
752
+ # Show default Madrid database (legacy behavior)
753
+ config = load_config()
754
+ click.echo(f"Database: {config['database_path']}")
755
+ click.echo(f"Polling interval: {config.get('polling_interval_seconds', 300)}s")
756
+ click.echo()
757
+
758
+ if os.path.exists(config["database_path"]):
759
+ from analysis import get_stats
760
+ db = get_db()
761
+ stats = get_stats(db)
762
+
763
+ click.echo(f"Total events: {stats['total_events']:,}")
764
+ click.echo(f"Unique users: {stats['unique_users']:,}")
765
+
766
+ if stats['first_event']:
767
+ click.echo(f"Time range: {stats['first_event'][:19]} -> {stats['last_event'][:19]}")
768
+
769
+ if stats['by_type']:
770
+ click.echo("\nBy type:")
771
+ for t, count in sorted(stats['by_type'].items(), key=lambda x: -x[1]):
772
+ pct = count / stats['total_events'] * 100 if stats['total_events'] else 0
773
+ click.echo(f" {t:12} {count:>6,} ({pct:.1f}%)")
774
+
775
+ db.close()
776
+ else:
777
+ click.echo("No data collected yet")
778
+
779
+ click.echo("\nTip: Use 'waze status --all' to see all regional databases")
780
+
781
+ # === Data Exploration Commands ===
782
+
783
+ @cli.command()
784
+ def stats():
785
+ """Show summary statistics."""
786
+ from analysis import get_stats
787
+
788
+ db = get_db()
789
+ s = get_stats(db)
790
+
791
+ click.echo(f"Total events: {s['total_events']:,}")
792
+ click.echo(f"Unique users: {s['unique_users']:,}")
793
+
794
+ if s['first_event']:
795
+ click.echo(f"First event: {s['first_event'][:19]}")
796
+ click.echo(f"Last event: {s['last_event'][:19]}")
797
+
798
+ if s['by_type']:
799
+ click.echo("\nBy type:")
800
+ for t, count in sorted(s['by_type'].items(), key=lambda x: -x[1]):
801
+ pct = count / s['total_events'] * 100 if s['total_events'] else 0
802
+ click.echo(f" {t:12} {count:>6,} ({pct:.1f}%)")
803
+
804
+ db.close()
805
+
806
+ @cli.command()
807
+ @click.option("-n", "--limit", default=20, help="Number of events to show")
808
+ def recent(limit):
809
+ """Show recent events."""
810
+ from analysis import get_recent_events
811
+
812
+ db = get_db()
813
+ events = get_recent_events(db, limit)
814
+
815
+ if not events:
816
+ click.echo("No events found")
817
+ return
818
+
819
+ table = []
820
+ for e in events:
821
+ table.append([
822
+ e["timestamp_utc"][:19],
823
+ e["username"][:20],
824
+ e["report_type"],
825
+ f"{e['latitude']:.4f}",
826
+ f"{e['longitude']:.4f}"
827
+ ])
828
+
829
+ click.echo(tabulate(table, headers=["Time", "User", "Type", "Lat", "Lon"]))
830
+ db.close()
831
+
832
+ @cli.command()
833
+ @click.option("-u", "--username", help="Filter by username")
834
+ @click.option("-t", "--type", "report_type", help="Filter by report type")
835
+ @click.option("--since", help="Time filter (e.g., '2h', '1d')")
836
+ @click.option("-n", "--limit", default=50, help="Max results")
837
+ def search(username, report_type, since, limit):
838
+ """Search events with filters."""
839
+ db = get_db()
840
+
841
+ query = "SELECT * FROM events WHERE 1=1"
842
+ params = []
843
+
844
+ if username:
845
+ query += " AND username = ?"
846
+ params.append(username)
847
+
848
+ if report_type:
849
+ query += " AND report_type = ?"
850
+ params.append(report_type.upper())
851
+
852
+ if since:
853
+ # Parse time filter
854
+ unit = since[-1]
855
+ value = int(since[:-1])
856
+ if unit == 'h':
857
+ delta = timedelta(hours=value)
858
+ elif unit == 'd':
859
+ delta = timedelta(days=value)
860
+ elif unit == 'm':
861
+ delta = timedelta(minutes=value)
862
+ else:
863
+ click.echo(f"Unknown time unit: {unit}")
864
+ return
865
+
866
+ cutoff = datetime.utcnow() - delta
867
+ query += " AND timestamp_utc >= ?"
868
+ params.append(cutoff.isoformat())
869
+
870
+ query += " ORDER BY timestamp_ms DESC LIMIT ?"
871
+ params.append(limit)
872
+
873
+ rows = db.execute(query, tuple(params)).fetchall()
874
+
875
+ if not rows:
876
+ click.echo("No events found")
877
+ return
878
+
879
+ table = []
880
+ for r in rows:
881
+ table.append([
882
+ r["timestamp_utc"][:19],
883
+ r["username"][:20],
884
+ r["report_type"],
885
+ f"{r['latitude']:.4f}",
886
+ f"{r['longitude']:.4f}"
887
+ ])
888
+
889
+ click.echo(tabulate(table, headers=["Time", "User", "Type", "Lat", "Lon"]))
890
+ click.echo(f"\n{len(rows)} events found")
891
+ db.close()
892
+
893
+ # === User Analysis Commands ===
894
+
895
+ @cli.command()
896
+ @click.option("-n", "--limit", default=50, help="Number of users to show")
897
+ def users(limit):
898
+ """List users with event counts."""
899
+ from analysis import get_users_summary
900
+
901
+ db = get_db()
902
+ user_list = get_users_summary(db, limit)
903
+
904
+ if not user_list:
905
+ click.echo("No users found")
906
+ return
907
+
908
+ table = []
909
+ for u in user_list:
910
+ table.append([
911
+ u["username"][:25],
912
+ u["event_count"],
913
+ u["first_seen"][:10],
914
+ u["last_seen"][:10]
915
+ ])
916
+
917
+ click.echo(tabulate(table, headers=["Username", "Events", "First Seen", "Last Seen"]))
918
+ db.close()
919
+
920
+ @cli.command()
921
+ @click.argument("username")
922
+ def profile(username):
923
+ """Show detailed profile for a user."""
924
+ from analysis import get_user_profile
925
+
926
+ db = get_db()
927
+ p = get_user_profile(db, username)
928
+
929
+ if not p:
930
+ click.echo(f"User '{username}' not found")
931
+ return
932
+
933
+ click.echo(f"User: {p['username']}")
934
+ click.echo(f"Events: {p['event_count']}")
935
+ click.echo(f"First seen: {p['first_seen'][:19]}")
936
+ click.echo(f"Last seen: {p['last_seen'][:19]}")
937
+ click.echo(f"Center location: {p['center_location']['lat']:.4f}, {p['center_location']['lon']:.4f}")
938
+
939
+ click.echo("\nReport types:")
940
+ for t, count in sorted(p['type_breakdown'].items(), key=lambda x: -x[1]):
941
+ click.echo(f" {t}: {count}")
942
+
943
+ click.echo("\nRecent events:")
944
+ table = []
945
+ for e in p['events'][-10:]:
946
+ table.append([
947
+ e["timestamp_utc"][:19],
948
+ e["report_type"],
949
+ f"{e['latitude']:.4f}, {e['longitude']:.4f}"
950
+ ])
951
+ click.echo(tabulate(table, headers=["Time", "Type", "Location"]))
952
+
953
+ db.close()
954
+
955
+ # === Export Commands ===
956
+
957
+ @cli.command()
958
+ @click.option("--format", "fmt", type=click.Choice(["csv", "geojson"]), default="csv")
959
+ @click.option("-o", "--output", help="Output file path")
960
+ def export(fmt, output):
961
+ """Export events to CSV or GeoJSON."""
962
+ import json
963
+ import csv
964
+
965
+ db = get_db()
966
+ rows = db.execute("SELECT * FROM events ORDER BY timestamp_ms").fetchall()
967
+
968
+ if not rows:
969
+ click.echo("No events to export")
970
+ return
971
+
972
+ if not output:
973
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
974
+ output = f"exports/events_{timestamp}.{fmt}"
975
+
976
+ os.makedirs(os.path.dirname(output) or ".", exist_ok=True)
977
+
978
+ if fmt == "csv":
979
+ with open(output, "w", newline="") as f:
980
+ writer = csv.writer(f)
981
+ writer.writerow(rows[0].keys())
982
+ for row in rows:
983
+ writer.writerow(tuple(row))
984
+
985
+ elif fmt == "geojson":
986
+ features = []
987
+ for row in rows:
988
+ features.append({
989
+ "type": "Feature",
990
+ "geometry": {
991
+ "type": "Point",
992
+ "coordinates": [row["longitude"], row["latitude"]]
993
+ },
994
+ "properties": {
995
+ "username": row["username"],
996
+ "timestamp": row["timestamp_utc"],
997
+ "type": row["report_type"],
998
+ "subtype": row["subtype"]
999
+ }
1000
+ })
1001
+
1002
+ geojson = {"type": "FeatureCollection", "features": features}
1003
+ with open(output, "w") as f:
1004
+ json.dump(geojson, f)
1005
+
1006
+ click.echo(f"Exported {len(rows)} events to {output}")
1007
+ db.close()
1008
+
1009
+ # === Config Commands ===
1010
+
1011
+ @cli.command()
1012
+ @click.option("--interval", type=int, help="Set polling interval in seconds")
1013
+ def config(interval):
1014
+ """Show or modify configuration."""
1015
+ cfg = load_config()
1016
+
1017
+ if interval:
1018
+ cfg["polling_interval_seconds"] = interval
1019
+ with open("config.yaml", "w") as f:
1020
+ yaml.dump(cfg, f, default_flow_style=False)
1021
+ click.echo(f"Polling interval set to {interval} seconds")
1022
+ else:
1023
+ click.echo(yaml.dump(cfg, default_flow_style=False))
1024
+
1025
+
1026
+ # === Collection Stats Commands ===
1027
+
1028
+ @cli.command()
1029
+ @click.option("-n", "--days", default=7, help="Number of days to show")
1030
+ @click.option("--all", "-a", "show_all", is_flag=True, help="Show all regional databases")
1031
+ def daily(days, show_all):
1032
+ """Show daily collection statistics."""
1033
+ if show_all:
1034
+ # Aggregate from all databases
1035
+ daily_stats = {}
1036
+
1037
+ for region, db in get_all_dbs():
1038
+ try:
1039
+ stats = db.get_daily_stats(days)
1040
+ for s in stats:
1041
+ date = s["date"]
1042
+ if date not in daily_stats:
1043
+ daily_stats[date] = {"events": 0, "users": set(), "requests": 0, "errors": 0}
1044
+ daily_stats[date]["events"] += s.get("events_collected", 0)
1045
+ daily_stats[date]["requests"] += s.get("api_requests", 0)
1046
+ daily_stats[date]["errors"] += s.get("api_errors", 0)
1047
+
1048
+ # Get unique users per day
1049
+ for date in daily_stats.keys():
1050
+ user_rows = db.execute("""
1051
+ SELECT DISTINCT username FROM events
1052
+ WHERE DATE(timestamp_utc) = ?
1053
+ """, (date,)).fetchall()
1054
+ for u in user_rows:
1055
+ daily_stats[date]["users"].add(u["username"])
1056
+
1057
+ db.close()
1058
+ except Exception:
1059
+ pass
1060
+
1061
+ if not daily_stats:
1062
+ click.echo("No daily stats recorded yet")
1063
+ return
1064
+
1065
+ table = []
1066
+ for date in sorted(daily_stats.keys(), reverse=True):
1067
+ s = daily_stats[date]
1068
+ table.append([
1069
+ date,
1070
+ f"{s['events']:,}",
1071
+ f"{len(s['users']):,}",
1072
+ f"{s['requests']:,}",
1073
+ f"{s['errors']:,}",
1074
+ ])
1075
+
1076
+ click.echo("=== Worldwide Daily Statistics ===")
1077
+ click.echo(tabulate(table, headers=["Date", "Events", "Users", "Requests", "Errors"]))
1078
+ else:
1079
+ db = get_db()
1080
+ stats = db.get_daily_stats(days)
1081
+
1082
+ if not stats:
1083
+ click.echo("No daily stats recorded yet")
1084
+ click.echo("Tip: Use 'waze daily --all' to see all regional databases")
1085
+ return
1086
+
1087
+ table = []
1088
+ for s in stats:
1089
+ table.append([
1090
+ s["date"],
1091
+ f"{s['events_collected']:,}",
1092
+ f"{s['unique_users']:,}",
1093
+ f"{s['api_requests']:,}",
1094
+ f"{s['api_errors']:,}",
1095
+ ])
1096
+
1097
+ click.echo(tabulate(table, headers=["Date", "Events", "Users", "Requests", "Errors"]))
1098
+ db.close()
1099
+
1100
+
1101
+ @cli.command()
1102
+ @click.option("--port", "-p", default=5000, help="Port to run the web UI on (default: 5000)")
1103
+ def web(port):
1104
+ """Start the web visualization UI only (no collection).
1105
+
1106
+ Use this to view collected data without running the collector.
1107
+ """
1108
+ click.echo(f"Starting web UI at http://localhost:{port}")
1109
+ click.echo("Press Ctrl+C to stop")
1110
+
1111
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'web'))
1112
+ from web.app import app
1113
+ app.run(host="0.0.0.0", port=port, debug=False, threaded=True)
1114
+
1115
+
1116
+ @cli.command()
1117
+ def summary():
1118
+ """Show overall collection summary (all regions)."""
1119
+ total_events = 0
1120
+ all_users = set()
1121
+ days_collected = set()
1122
+ grid_cells = set()
1123
+ first_event = None
1124
+ last_event = None
1125
+
1126
+ dbs = get_all_dbs()
1127
+ if not dbs:
1128
+ click.echo("No data collected yet")
1129
+ return
1130
+
1131
+ region_stats = []
1132
+
1133
+ for region, db in dbs:
1134
+ try:
1135
+ s = db.get_collection_summary()
1136
+ if s and s.get('total_events'):
1137
+ total_events += s['total_events']
1138
+
1139
+ # Get unique users
1140
+ user_rows = db.execute("SELECT DISTINCT username FROM events").fetchall()
1141
+ for u in user_rows:
1142
+ all_users.add(u["username"])
1143
+
1144
+ # Track dates
1145
+ date_rows = db.execute("""
1146
+ SELECT DISTINCT DATE(timestamp_utc) as dt FROM events
1147
+ """).fetchall()
1148
+ for d in date_rows:
1149
+ if d["dt"]:
1150
+ days_collected.add(d["dt"])
1151
+
1152
+ # Track grid cells
1153
+ cell_rows = db.execute("""
1154
+ SELECT DISTINCT grid_cell FROM events WHERE grid_cell IS NOT NULL
1155
+ """).fetchall()
1156
+ for c in cell_rows:
1157
+ grid_cells.add(c["grid_cell"])
1158
+
1159
+ if s['first_event']:
1160
+ if first_event is None or s['first_event'] < first_event:
1161
+ first_event = s['first_event']
1162
+ if s['last_event']:
1163
+ if last_event is None or s['last_event'] > last_event:
1164
+ last_event = s['last_event']
1165
+
1166
+ region_stats.append((region, s['total_events'], s['unique_users']))
1167
+
1168
+ db.close()
1169
+ except Exception as e:
1170
+ click.echo(f"Error reading {region}: {e}")
1171
+
1172
+ if total_events == 0:
1173
+ click.echo("No data collected yet")
1174
+ return
1175
+
1176
+ click.echo("=== Worldwide Collection Summary ===")
1177
+ click.echo(f"Total events: {total_events:,}")
1178
+ click.echo(f"Unique users: {len(all_users):,}")
1179
+ click.echo(f"Days collected: {len(days_collected)}")
1180
+ click.echo(f"Grid cells used: {len(grid_cells)}")
1181
+ click.echo(f"First event: {first_event[:19] if first_event else 'N/A'}")
1182
+ click.echo(f"Last event: {last_event[:19] if last_event else 'N/A'}")
1183
+
1184
+ if len(days_collected) > 0:
1185
+ avg = total_events / len(days_collected)
1186
+ click.echo(f"Avg events/day: {avg:.1f}")
1187
+
1188
+ click.echo("\n=== By Region ===")
1189
+ for region, events, users in sorted(region_stats, key=lambda x: -x[1]):
1190
+ pct = events / total_events * 100 if total_events else 0
1191
+ click.echo(f" {region.upper():10} {events:>10,} events {users:>8,} users ({pct:.1f}%)")
1192
+
1193
+
1194
+ @cli.command()
1195
+ @click.option("-n", "--limit", default=20, help="Number of users to show")
1196
+ def tracked(limit):
1197
+ """Show tracked users with most events."""
1198
+ db = get_db()
1199
+ users = db.get_tracked_users(limit)
1200
+
1201
+ if not users:
1202
+ click.echo("No tracked users yet")
1203
+ return
1204
+
1205
+ table = []
1206
+ for u in users:
1207
+ table.append([
1208
+ u["username"][:25],
1209
+ f"{u['event_count']:,}",
1210
+ u["first_seen"][:10],
1211
+ u["last_seen"][:10],
1212
+ ])
1213
+
1214
+ click.echo(tabulate(table, headers=["Username", "Events", "First Seen", "Last Seen"]))
1215
+ db.close()
1216
+
1217
+
1218
+ if __name__ == "__main__":
1219
+ cli()