waze-logs 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- analysis.py +91 -0
- cli.py +1219 -0
- collector.py +193 -0
- collector_europe.py +312 -0
- collector_worldwide.py +532 -0
- database.py +176 -0
- waze_client.py +234 -0
- waze_logs-1.0.0.dist-info/METADATA +411 -0
- waze_logs-1.0.0.dist-info/RECORD +15 -0
- waze_logs-1.0.0.dist-info/WHEEL +5 -0
- waze_logs-1.0.0.dist-info/entry_points.txt +2 -0
- waze_logs-1.0.0.dist-info/licenses/LICENSE +21 -0
- waze_logs-1.0.0.dist-info/top_level.txt +8 -0
- web/app.py +536 -0
- web/templates/index.html +1241 -0
cli.py
ADDED
|
@@ -0,0 +1,1219 @@
|
|
|
1
|
+
# cli.py
|
|
2
|
+
import os
|
|
3
|
+
import sys
|
|
4
|
+
import signal
|
|
5
|
+
import click
|
|
6
|
+
import yaml
|
|
7
|
+
import json
|
|
8
|
+
import time
|
|
9
|
+
import threading
|
|
10
|
+
import logging
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from tabulate import tabulate
|
|
13
|
+
from datetime import datetime, timedelta, timezone
|
|
14
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
15
|
+
|
|
16
|
+
@click.group()
|
|
17
|
+
def cli():
|
|
18
|
+
"""Waze Worldwide Logger - Global traffic event collection and analysis."""
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
def load_config():
|
|
22
|
+
with open("config.yaml") as f:
|
|
23
|
+
return yaml.safe_load(f)
|
|
24
|
+
|
|
25
|
+
def get_db(region=None):
|
|
26
|
+
"""Get database connection for a specific region or default Madrid."""
|
|
27
|
+
from database import Database
|
|
28
|
+
if region:
|
|
29
|
+
db_path = f"./data/waze_{region}.db"
|
|
30
|
+
if os.path.exists(db_path):
|
|
31
|
+
return Database(db_path)
|
|
32
|
+
config = load_config()
|
|
33
|
+
return Database(config["database_path"])
|
|
34
|
+
|
|
35
|
+
def get_all_dbs():
|
|
36
|
+
"""Get connections to all existing regional databases."""
|
|
37
|
+
from database import Database
|
|
38
|
+
DB_PATHS = {
|
|
39
|
+
"madrid": "./data/waze_madrid.db",
|
|
40
|
+
"europe": "./data/waze_europe.db",
|
|
41
|
+
"americas": "./data/waze_americas.db",
|
|
42
|
+
"asia": "./data/waze_asia.db",
|
|
43
|
+
"oceania": "./data/waze_oceania.db",
|
|
44
|
+
"africa": "./data/waze_africa.db",
|
|
45
|
+
}
|
|
46
|
+
dbs = []
|
|
47
|
+
for region, path in DB_PATHS.items():
|
|
48
|
+
if os.path.exists(path):
|
|
49
|
+
try:
|
|
50
|
+
dbs.append((region, Database(path)))
|
|
51
|
+
except Exception:
|
|
52
|
+
pass
|
|
53
|
+
return dbs
|
|
54
|
+
|
|
55
|
+
# === Worldwide Collection System ===
|
|
56
|
+
|
|
57
|
+
# Status/checkpoint file paths
|
|
58
|
+
STATUS_FILE = "./data/collector_status.json"
|
|
59
|
+
CHECKPOINT_FILE = "./data/collector_checkpoint.json"
|
|
60
|
+
PID_FILE = "./collector_cli.pid"
|
|
61
|
+
|
|
62
|
+
status_lock = threading.Lock()
|
|
63
|
+
checkpoint_lock = threading.Lock()
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def write_status(region: str, cell_name: str, country: str, cell_idx: int, total_cells: int,
|
|
67
|
+
alerts_count: int, new_count: int, event_types: list = None):
|
|
68
|
+
"""Write current collector status to file for UI consumption (thread-safe)."""
|
|
69
|
+
try:
|
|
70
|
+
status = {
|
|
71
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
72
|
+
"region": region,
|
|
73
|
+
"cell_name": cell_name,
|
|
74
|
+
"country": country,
|
|
75
|
+
"cell_idx": cell_idx,
|
|
76
|
+
"total_cells": total_cells,
|
|
77
|
+
"alerts_found": alerts_count,
|
|
78
|
+
"new_events": new_count,
|
|
79
|
+
"event_types": event_types or [],
|
|
80
|
+
"status": "scanning"
|
|
81
|
+
}
|
|
82
|
+
with status_lock:
|
|
83
|
+
with open(STATUS_FILE, "w") as f:
|
|
84
|
+
json.dump(status, f)
|
|
85
|
+
except Exception:
|
|
86
|
+
pass
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def load_checkpoint():
|
|
90
|
+
"""Load checkpoint from file."""
|
|
91
|
+
try:
|
|
92
|
+
if os.path.exists(CHECKPOINT_FILE):
|
|
93
|
+
with open(CHECKPOINT_FILE, "r") as f:
|
|
94
|
+
return json.load(f)
|
|
95
|
+
except Exception:
|
|
96
|
+
pass
|
|
97
|
+
return {"cycle": 0, "scanned": {}}
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def save_checkpoint(cycle: int, scanned: dict):
|
|
101
|
+
"""Save checkpoint to file (thread-safe)."""
|
|
102
|
+
try:
|
|
103
|
+
checkpoint = {
|
|
104
|
+
"cycle": cycle,
|
|
105
|
+
"scanned": scanned,
|
|
106
|
+
"timestamp": datetime.now(timezone.utc).isoformat()
|
|
107
|
+
}
|
|
108
|
+
with checkpoint_lock:
|
|
109
|
+
with open(CHECKPOINT_FILE, "w") as f:
|
|
110
|
+
json.dump(checkpoint, f)
|
|
111
|
+
except Exception:
|
|
112
|
+
pass
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def clear_checkpoint():
|
|
116
|
+
"""Clear checkpoint file when cycle completes."""
|
|
117
|
+
try:
|
|
118
|
+
if os.path.exists(CHECKPOINT_FILE):
|
|
119
|
+
os.remove(CHECKPOINT_FILE)
|
|
120
|
+
except Exception:
|
|
121
|
+
pass
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def generate_event_hash(username: str, latitude: float, longitude: float,
|
|
125
|
+
timestamp_ms: int, report_type: str) -> str:
|
|
126
|
+
"""Generate unique hash for event deduplication."""
|
|
127
|
+
import hashlib
|
|
128
|
+
timestamp_minute = timestamp_ms // 60000
|
|
129
|
+
data = f"{username}|{round(latitude, 4)}|{round(longitude, 4)}|{timestamp_minute}|{report_type}"
|
|
130
|
+
return hashlib.sha256(data.encode()).hexdigest()[:16]
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def process_alert(alert: dict, grid_cell: str) -> dict:
|
|
134
|
+
"""Process a Waze alert into event format."""
|
|
135
|
+
username = alert.get("reportBy", "anonymous")
|
|
136
|
+
latitude = alert.get("latitude", 0.0)
|
|
137
|
+
longitude = alert.get("longitude", 0.0)
|
|
138
|
+
timestamp_ms = alert.get("pubMillis", int(time.time() * 1000))
|
|
139
|
+
report_type = alert.get("type", "UNKNOWN")
|
|
140
|
+
subtype = alert.get("subtype")
|
|
141
|
+
|
|
142
|
+
timestamp_utc = datetime.fromtimestamp(
|
|
143
|
+
timestamp_ms / 1000, tz=timezone.utc
|
|
144
|
+
).isoformat()
|
|
145
|
+
|
|
146
|
+
return {
|
|
147
|
+
"event_hash": generate_event_hash(username, latitude, longitude, timestamp_ms, report_type),
|
|
148
|
+
"username": username,
|
|
149
|
+
"latitude": latitude,
|
|
150
|
+
"longitude": longitude,
|
|
151
|
+
"timestamp_utc": timestamp_utc,
|
|
152
|
+
"timestamp_ms": timestamp_ms,
|
|
153
|
+
"report_type": report_type,
|
|
154
|
+
"subtype": subtype,
|
|
155
|
+
"raw_json": json.dumps(alert),
|
|
156
|
+
"collected_at": datetime.now(timezone.utc).isoformat(),
|
|
157
|
+
"grid_cell": grid_cell
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
class RegionScanner:
|
|
162
|
+
"""Scanner for a specific region."""
|
|
163
|
+
|
|
164
|
+
def __init__(self, name: str, config_path: str, db, client, logger):
|
|
165
|
+
self.name = name
|
|
166
|
+
self.config_path = config_path
|
|
167
|
+
self.db = db
|
|
168
|
+
self.client = client
|
|
169
|
+
self.logger = logger
|
|
170
|
+
self.cells_by_priority = {}
|
|
171
|
+
self._load_cells()
|
|
172
|
+
|
|
173
|
+
def _load_cells(self):
|
|
174
|
+
with open(self.config_path) as f:
|
|
175
|
+
config = yaml.safe_load(f)
|
|
176
|
+
|
|
177
|
+
for cell in config.get("grid_cells", []):
|
|
178
|
+
p = cell.get("priority", 2)
|
|
179
|
+
if p not in self.cells_by_priority:
|
|
180
|
+
self.cells_by_priority[p] = []
|
|
181
|
+
self.cells_by_priority[p].append(cell)
|
|
182
|
+
|
|
183
|
+
def get_cell_counts(self) -> dict:
|
|
184
|
+
return {p: len(cells) for p, cells in self.cells_by_priority.items()}
|
|
185
|
+
|
|
186
|
+
def scan(self, priority: int, running_flag, already_scanned: set = None,
|
|
187
|
+
on_cell_scanned: callable = None) -> dict:
|
|
188
|
+
"""Scan cells of given priority, skipping already-scanned cells."""
|
|
189
|
+
cells = self.cells_by_priority.get(priority, [])
|
|
190
|
+
stats = {"requests": 0, "errors": 0, "events": 0, "cells": len(cells), "scanned_cells": []}
|
|
191
|
+
total_cells = len(cells)
|
|
192
|
+
already_scanned = already_scanned or set()
|
|
193
|
+
|
|
194
|
+
# Filter out already-scanned cells
|
|
195
|
+
remaining_cells = [(idx, cell) for idx, cell in enumerate(cells, 1)
|
|
196
|
+
if cell["name"] not in already_scanned]
|
|
197
|
+
|
|
198
|
+
if len(remaining_cells) < len(cells):
|
|
199
|
+
skipped = len(cells) - len(remaining_cells)
|
|
200
|
+
self.logger.info(f"Resuming: skipping {skipped} already-scanned cells, {len(remaining_cells)} remaining")
|
|
201
|
+
|
|
202
|
+
for idx, cell in remaining_cells:
|
|
203
|
+
if not running_flag():
|
|
204
|
+
break
|
|
205
|
+
|
|
206
|
+
try:
|
|
207
|
+
stats["requests"] += 1
|
|
208
|
+
cell_name = cell["name"]
|
|
209
|
+
country = cell.get("country", "??")
|
|
210
|
+
|
|
211
|
+
alerts, _ = self.client.get_traffic_notifications(
|
|
212
|
+
lat_top=cell["lat_top"],
|
|
213
|
+
lat_bottom=cell["lat_bottom"],
|
|
214
|
+
lon_left=cell["lon_left"],
|
|
215
|
+
lon_right=cell["lon_right"]
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
new_count = 0
|
|
219
|
+
new_types = []
|
|
220
|
+
for alert in alerts:
|
|
221
|
+
event = process_alert(alert, cell_name)
|
|
222
|
+
if self.db.insert_event(event):
|
|
223
|
+
new_count += 1
|
|
224
|
+
new_types.append(event["report_type"])
|
|
225
|
+
self.db.upsert_tracked_user(event["username"], event["timestamp_utc"])
|
|
226
|
+
|
|
227
|
+
stats["events"] += new_count
|
|
228
|
+
stats["scanned_cells"].append(cell_name)
|
|
229
|
+
|
|
230
|
+
if on_cell_scanned:
|
|
231
|
+
on_cell_scanned(cell_name)
|
|
232
|
+
|
|
233
|
+
# Only log when there are alerts or new events
|
|
234
|
+
if len(alerts) > 0 or new_count > 0:
|
|
235
|
+
type_summary = ""
|
|
236
|
+
if new_types:
|
|
237
|
+
from collections import Counter
|
|
238
|
+
counts = Counter(new_types)
|
|
239
|
+
type_summary = " | " + ", ".join(f"{t}:{c}" for t, c in counts.most_common(3))
|
|
240
|
+
|
|
241
|
+
status = f"+{new_count}" if new_count > 0 else "0"
|
|
242
|
+
self.logger.info(f"[{idx:3}/{total_cells}] {cell_name:25} ({country}) -> {len(alerts):3} alerts, {status} new{type_summary}")
|
|
243
|
+
|
|
244
|
+
write_status(
|
|
245
|
+
region=self.name,
|
|
246
|
+
cell_name=cell_name,
|
|
247
|
+
country=country,
|
|
248
|
+
cell_idx=idx,
|
|
249
|
+
total_cells=total_cells,
|
|
250
|
+
alerts_count=len(alerts),
|
|
251
|
+
new_count=new_count,
|
|
252
|
+
event_types=new_types
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
except Exception as e:
|
|
256
|
+
stats["errors"] += 1
|
|
257
|
+
stats["scanned_cells"].append(cell["name"])
|
|
258
|
+
self.logger.error(f"[{idx:3}/{total_cells}] {cell['name']:25} -> ERROR: {e}")
|
|
259
|
+
|
|
260
|
+
return stats
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
class CLIWorldwideCollector:
|
|
264
|
+
"""Multi-threaded worldwide Waze data collector for CLI."""
|
|
265
|
+
|
|
266
|
+
REGIONS = [
|
|
267
|
+
("europe", "config_europe.yaml", "./data/waze_europe.db"),
|
|
268
|
+
("americas", "config_americas.yaml", "./data/waze_americas.db"),
|
|
269
|
+
("asia", "config_asia.yaml", "./data/waze_asia.db"),
|
|
270
|
+
("oceania", "config_oceania.yaml", "./data/waze_oceania.db"),
|
|
271
|
+
("africa", "config_africa.yaml", "./data/waze_africa.db"),
|
|
272
|
+
]
|
|
273
|
+
|
|
274
|
+
def __init__(self, web_port=None, regions=None):
|
|
275
|
+
self.running = False
|
|
276
|
+
self.web_port = web_port
|
|
277
|
+
self.selected_regions = regions # None = all regions
|
|
278
|
+
self.scanners = {}
|
|
279
|
+
self.databases = {}
|
|
280
|
+
self.clients = {}
|
|
281
|
+
self.logger = None
|
|
282
|
+
|
|
283
|
+
def _setup_logging(self):
|
|
284
|
+
"""Set up logging for the collector."""
|
|
285
|
+
Path("logs").mkdir(exist_ok=True)
|
|
286
|
+
logging.basicConfig(
|
|
287
|
+
level=logging.INFO,
|
|
288
|
+
format='%(asctime)s [%(levelname)s] %(message)s',
|
|
289
|
+
datefmt='%Y-%m-%d %H:%M:%S',
|
|
290
|
+
handlers=[
|
|
291
|
+
logging.StreamHandler(),
|
|
292
|
+
logging.FileHandler('logs/cli_collector.log')
|
|
293
|
+
]
|
|
294
|
+
)
|
|
295
|
+
self.logger = logging.getLogger("cli_collector")
|
|
296
|
+
|
|
297
|
+
def _generate_configs(self):
|
|
298
|
+
"""Generate regional configs if they don't exist."""
|
|
299
|
+
config_generators = [
|
|
300
|
+
("europe", "europe_grid", "save_europe_config"),
|
|
301
|
+
("americas", "americas_grid", "save_americas_config"),
|
|
302
|
+
("asia", "asia_grid", "save_asia_config"),
|
|
303
|
+
("oceania", "oceania_grid", "save_oceania_config"),
|
|
304
|
+
("africa", "africa_grid", "save_africa_config"),
|
|
305
|
+
]
|
|
306
|
+
|
|
307
|
+
for region_name, module_name, func_name in config_generators:
|
|
308
|
+
config_file = f"config_{region_name}.yaml"
|
|
309
|
+
if not os.path.exists(config_file):
|
|
310
|
+
self.logger.info(f"Generating {config_file}...")
|
|
311
|
+
try:
|
|
312
|
+
module = __import__(module_name)
|
|
313
|
+
getattr(module, func_name)()
|
|
314
|
+
except ImportError:
|
|
315
|
+
self.logger.warning(f"Could not import {module_name}, skipping {region_name}")
|
|
316
|
+
|
|
317
|
+
def _save_pid(self):
|
|
318
|
+
with open(PID_FILE, "w") as f:
|
|
319
|
+
f.write(str(os.getpid()))
|
|
320
|
+
|
|
321
|
+
def _remove_pid(self):
|
|
322
|
+
if os.path.exists(PID_FILE):
|
|
323
|
+
os.remove(PID_FILE)
|
|
324
|
+
|
|
325
|
+
@staticmethod
|
|
326
|
+
def get_pid():
|
|
327
|
+
if os.path.exists(PID_FILE):
|
|
328
|
+
try:
|
|
329
|
+
with open(PID_FILE) as f:
|
|
330
|
+
pid = int(f.read().strip())
|
|
331
|
+
os.kill(pid, 0)
|
|
332
|
+
return pid
|
|
333
|
+
except (OSError, ValueError):
|
|
334
|
+
return None
|
|
335
|
+
return None
|
|
336
|
+
|
|
337
|
+
def _start_web_server(self):
|
|
338
|
+
"""Start Flask web server in a background thread."""
|
|
339
|
+
def run_flask():
|
|
340
|
+
# Suppress Flask's default logging
|
|
341
|
+
import logging as flask_logging
|
|
342
|
+
flask_log = flask_logging.getLogger('werkzeug')
|
|
343
|
+
flask_log.setLevel(flask_logging.WARNING)
|
|
344
|
+
|
|
345
|
+
# Add project root to path for web module import
|
|
346
|
+
project_root = os.path.dirname(os.path.abspath(__file__))
|
|
347
|
+
if project_root not in sys.path:
|
|
348
|
+
sys.path.insert(0, project_root)
|
|
349
|
+
from web.app import app
|
|
350
|
+
app.run(host="0.0.0.0", port=self.web_port, debug=False, threaded=True, use_reloader=False)
|
|
351
|
+
|
|
352
|
+
web_thread = threading.Thread(target=run_flask, daemon=True)
|
|
353
|
+
web_thread.start()
|
|
354
|
+
self.logger.info(f"Web UI started at http://localhost:{self.web_port}")
|
|
355
|
+
return web_thread
|
|
356
|
+
|
|
357
|
+
def run(self):
|
|
358
|
+
"""Main worldwide collection loop."""
|
|
359
|
+
from database import Database
|
|
360
|
+
from waze_client import WazeClient
|
|
361
|
+
|
|
362
|
+
self._setup_logging()
|
|
363
|
+
|
|
364
|
+
# Create directories
|
|
365
|
+
Path("data").mkdir(exist_ok=True)
|
|
366
|
+
Path("logs").mkdir(exist_ok=True)
|
|
367
|
+
|
|
368
|
+
# Generate configs
|
|
369
|
+
self._generate_configs()
|
|
370
|
+
|
|
371
|
+
# Start web server if requested
|
|
372
|
+
if self.web_port:
|
|
373
|
+
self._start_web_server()
|
|
374
|
+
|
|
375
|
+
# Filter regions if specified
|
|
376
|
+
regions_to_scan = self.REGIONS
|
|
377
|
+
if self.selected_regions:
|
|
378
|
+
regions_to_scan = [r for r in self.REGIONS if r[0] in self.selected_regions]
|
|
379
|
+
|
|
380
|
+
# Initialize scanners
|
|
381
|
+
self.logger.info("=" * 70)
|
|
382
|
+
self.logger.info("WAZE WORLDWIDE COLLECTOR (CLI)")
|
|
383
|
+
self.logger.info("=" * 70)
|
|
384
|
+
|
|
385
|
+
total_p1 = 0
|
|
386
|
+
total_p3 = 0
|
|
387
|
+
|
|
388
|
+
for region_name, config_path, db_path in regions_to_scan:
|
|
389
|
+
if not os.path.exists(config_path):
|
|
390
|
+
self.logger.warning(f"Config not found: {config_path}, skipping {region_name}")
|
|
391
|
+
continue
|
|
392
|
+
|
|
393
|
+
db = Database(db_path, check_same_thread=False)
|
|
394
|
+
client = WazeClient()
|
|
395
|
+
|
|
396
|
+
scanner = RegionScanner(region_name, config_path, db, client, self.logger)
|
|
397
|
+
self.scanners[region_name] = scanner
|
|
398
|
+
self.databases[region_name] = db
|
|
399
|
+
self.clients[region_name] = client
|
|
400
|
+
|
|
401
|
+
counts = scanner.get_cell_counts()
|
|
402
|
+
p1 = counts.get(1, 0)
|
|
403
|
+
p3 = counts.get(3, 0)
|
|
404
|
+
total_p1 += p1
|
|
405
|
+
total_p3 += p3
|
|
406
|
+
|
|
407
|
+
self.logger.info(f" {region_name.upper():10} - P1 (cities): {p1:4}, P3 (coverage): {p3:4}")
|
|
408
|
+
|
|
409
|
+
self.logger.info("-" * 70)
|
|
410
|
+
self.logger.info(f" {'TOTAL':10} - P1 (cities): {total_p1:4}, P3 (coverage): {total_p3:4}")
|
|
411
|
+
self.logger.info(f" {'':10} Grand total: {total_p1 + total_p3} grid cells")
|
|
412
|
+
self.logger.info("=" * 70)
|
|
413
|
+
self.logger.info("Collection strategy (MULTITHREADED):")
|
|
414
|
+
self.logger.info(" - All regions scanned in PARALLEL for P1 (city) scans")
|
|
415
|
+
self.logger.info(" - Full P3 (coverage) scan every 10 cycles (parallel)")
|
|
416
|
+
self.logger.info(" - 10 second pause between cycles")
|
|
417
|
+
if self.web_port:
|
|
418
|
+
self.logger.info(f" - Web UI at http://localhost:{self.web_port}")
|
|
419
|
+
self.logger.info("=" * 70)
|
|
420
|
+
|
|
421
|
+
self.running = True
|
|
422
|
+
self._save_pid()
|
|
423
|
+
|
|
424
|
+
def handle_signal(signum, frame):
|
|
425
|
+
self.logger.info("Shutdown signal received...")
|
|
426
|
+
self.running = False
|
|
427
|
+
|
|
428
|
+
signal.signal(signal.SIGINT, handle_signal)
|
|
429
|
+
signal.signal(signal.SIGTERM, handle_signal)
|
|
430
|
+
|
|
431
|
+
region_names = list(self.scanners.keys())
|
|
432
|
+
|
|
433
|
+
# Load checkpoint
|
|
434
|
+
checkpoint = load_checkpoint()
|
|
435
|
+
cycle = checkpoint.get("cycle", 0)
|
|
436
|
+
scanned_cells = checkpoint.get("scanned", {})
|
|
437
|
+
|
|
438
|
+
if cycle > 0:
|
|
439
|
+
self.logger.info(f"Resuming from checkpoint: cycle {cycle}")
|
|
440
|
+
|
|
441
|
+
def scan_region(region_name: str, priority: int, today: str, already_scanned: set,
|
|
442
|
+
checkpoint_key: str) -> dict:
|
|
443
|
+
"""Scan a single region (runs in thread)."""
|
|
444
|
+
scanner = self.scanners[region_name]
|
|
445
|
+
db = self.databases[region_name]
|
|
446
|
+
|
|
447
|
+
p_count = scanner.get_cell_counts().get(priority, 0)
|
|
448
|
+
if p_count == 0:
|
|
449
|
+
return {"region": region_name, "events": 0, "errors": 0, "requests": 0, "cells": 0, "scanned_cells": []}
|
|
450
|
+
|
|
451
|
+
def on_cell_scanned(cell_name):
|
|
452
|
+
with checkpoint_lock:
|
|
453
|
+
if checkpoint_key not in scanned_cells:
|
|
454
|
+
scanned_cells[checkpoint_key] = []
|
|
455
|
+
scanned_cells[checkpoint_key].append(cell_name)
|
|
456
|
+
save_checkpoint(cycle, scanned_cells)
|
|
457
|
+
|
|
458
|
+
stats = scanner.scan(priority, lambda: self.running, already_scanned, on_cell_scanned)
|
|
459
|
+
|
|
460
|
+
db.update_daily_stats(
|
|
461
|
+
date=today,
|
|
462
|
+
events=stats["events"],
|
|
463
|
+
requests=stats["requests"],
|
|
464
|
+
errors=stats["errors"],
|
|
465
|
+
cells=stats["cells"]
|
|
466
|
+
)
|
|
467
|
+
|
|
468
|
+
return {"region": region_name, **stats}
|
|
469
|
+
|
|
470
|
+
try:
|
|
471
|
+
while self.running:
|
|
472
|
+
cycle += 1
|
|
473
|
+
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
|
474
|
+
|
|
475
|
+
self.logger.info(f"\n{'='*50}")
|
|
476
|
+
self.logger.info(f"CYCLE {cycle} (PARALLEL MODE)")
|
|
477
|
+
self.logger.info(f"{'='*50}")
|
|
478
|
+
|
|
479
|
+
# Parallel P1 scan
|
|
480
|
+
self.logger.info(f"Starting parallel P1 scan across {len(region_names)} regions...")
|
|
481
|
+
total_events = 0
|
|
482
|
+
total_errors = 0
|
|
483
|
+
cycle_complete = True
|
|
484
|
+
|
|
485
|
+
with ThreadPoolExecutor(max_workers=len(region_names)) as executor:
|
|
486
|
+
futures = {}
|
|
487
|
+
for region in region_names:
|
|
488
|
+
key = f"{region}_p1"
|
|
489
|
+
already_scanned = set(scanned_cells.get(key, []))
|
|
490
|
+
futures[executor.submit(scan_region, region, 1, today, already_scanned, key)] = (region, key)
|
|
491
|
+
|
|
492
|
+
for future in as_completed(futures):
|
|
493
|
+
region, key = futures[future]
|
|
494
|
+
try:
|
|
495
|
+
result = future.result()
|
|
496
|
+
total_events += result["events"]
|
|
497
|
+
total_errors += result["errors"]
|
|
498
|
+
|
|
499
|
+
if result["events"] > 0 or result["errors"] > 0:
|
|
500
|
+
self.logger.info(f" [{region.upper()}] +{result['events']} events, {result['errors']} errors")
|
|
501
|
+
except Exception as e:
|
|
502
|
+
self.logger.error(f" [{region.upper()}] Thread error: {e}")
|
|
503
|
+
cycle_complete = False
|
|
504
|
+
|
|
505
|
+
self.logger.info(f"P1 cycle complete: +{total_events} total events, {total_errors} errors")
|
|
506
|
+
|
|
507
|
+
if cycle_complete:
|
|
508
|
+
for region in region_names:
|
|
509
|
+
scanned_cells.pop(f"{region}_p1", None)
|
|
510
|
+
save_checkpoint(cycle, scanned_cells)
|
|
511
|
+
|
|
512
|
+
# Full coverage scan every 10 cycles
|
|
513
|
+
if cycle % 10 == 0 and self.running:
|
|
514
|
+
self.logger.info("\n--- FULL COVERAGE SCAN (PARALLEL) ---")
|
|
515
|
+
total_p3_events = 0
|
|
516
|
+
|
|
517
|
+
with ThreadPoolExecutor(max_workers=len(region_names)) as executor:
|
|
518
|
+
futures = {}
|
|
519
|
+
for region in region_names:
|
|
520
|
+
key = f"{region}_p3"
|
|
521
|
+
already_scanned = set(scanned_cells.get(key, []))
|
|
522
|
+
futures[executor.submit(scan_region, region, 3, today, already_scanned, key)] = (region, key)
|
|
523
|
+
|
|
524
|
+
for future in as_completed(futures):
|
|
525
|
+
region, key = futures[future]
|
|
526
|
+
try:
|
|
527
|
+
result = future.result()
|
|
528
|
+
total_p3_events += result["events"]
|
|
529
|
+
|
|
530
|
+
if result["events"] > 0:
|
|
531
|
+
self.logger.info(f" [{region.upper()}] +{result['events']} events")
|
|
532
|
+
except Exception as e:
|
|
533
|
+
self.logger.error(f" [{region.upper()}] Thread error: {e}")
|
|
534
|
+
|
|
535
|
+
self.logger.info(f"P3 coverage complete: +{total_p3_events} total events")
|
|
536
|
+
|
|
537
|
+
for region in region_names:
|
|
538
|
+
scanned_cells.pop(f"{region}_p3", None)
|
|
539
|
+
save_checkpoint(cycle, scanned_cells)
|
|
540
|
+
|
|
541
|
+
# Print summary every 5 cycles
|
|
542
|
+
if cycle % 5 == 0:
|
|
543
|
+
self.logger.info("\n--- DATABASE SUMMARY ---")
|
|
544
|
+
for region_name, db in self.databases.items():
|
|
545
|
+
result = db.execute(
|
|
546
|
+
"SELECT COUNT(*) as events, COUNT(DISTINCT username) as users FROM events"
|
|
547
|
+
).fetchone()
|
|
548
|
+
self.logger.info(f" {region_name.upper():10}: {result[0]:,} events, {result[1]:,} users")
|
|
549
|
+
|
|
550
|
+
if self.running:
|
|
551
|
+
time.sleep(10)
|
|
552
|
+
|
|
553
|
+
except Exception as e:
|
|
554
|
+
self.logger.error(f"Fatal error: {e}", exc_info=True)
|
|
555
|
+
raise
|
|
556
|
+
finally:
|
|
557
|
+
self._remove_pid()
|
|
558
|
+
for db in self.databases.values():
|
|
559
|
+
db.close()
|
|
560
|
+
self.logger.info("Collector stopped.")
|
|
561
|
+
|
|
562
|
+
|
|
563
|
+
# === Collection Commands ===
|
|
564
|
+
|
|
565
|
+
@cli.command()
|
|
566
|
+
@click.option("--web", "-w", is_flag=True, help="Also start the web UI")
|
|
567
|
+
@click.option("--port", "-p", default=5000, help="Web UI port (default: 5000)")
|
|
568
|
+
@click.option("--region", "-r", multiple=True, help="Specific regions to scan (can be repeated)")
|
|
569
|
+
def collect(web, port, region):
|
|
570
|
+
"""Start worldwide multi-threaded data collection.
|
|
571
|
+
|
|
572
|
+
This runs the full worldwide collector which scans all continents
|
|
573
|
+
in parallel. Use --web to also start the visualization UI.
|
|
574
|
+
|
|
575
|
+
Examples:
|
|
576
|
+
waze collect # Collect from all regions
|
|
577
|
+
waze collect --web # Collect + web UI on port 5000
|
|
578
|
+
waze collect --web --port 8080 # Web UI on port 8080
|
|
579
|
+
waze collect -r europe -r asia # Only Europe and Asia
|
|
580
|
+
"""
|
|
581
|
+
pid = CLIWorldwideCollector.get_pid()
|
|
582
|
+
if pid:
|
|
583
|
+
click.echo(f"Collector already running (PID {pid})")
|
|
584
|
+
return
|
|
585
|
+
|
|
586
|
+
web_port = port if web else None
|
|
587
|
+
selected_regions = list(region) if region else None
|
|
588
|
+
|
|
589
|
+
click.echo("Starting worldwide collector...")
|
|
590
|
+
if web_port:
|
|
591
|
+
click.echo(f"Web UI will be available at http://localhost:{web_port}")
|
|
592
|
+
|
|
593
|
+
collector = CLIWorldwideCollector(web_port=web_port, regions=selected_regions)
|
|
594
|
+
collector.run()
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
@cli.command()
|
|
598
|
+
@click.option("--europe", is_flag=True, help="Start Europe-wide collector (legacy)")
|
|
599
|
+
def start(europe):
|
|
600
|
+
"""Start the collector daemon (legacy - use 'collect' for worldwide)."""
|
|
601
|
+
if europe:
|
|
602
|
+
from collector_europe import EuropeCollector
|
|
603
|
+
pid = EuropeCollector.get_pid()
|
|
604
|
+
if pid:
|
|
605
|
+
click.echo(f"Europe collector already running (PID {pid})")
|
|
606
|
+
return
|
|
607
|
+
click.echo("Starting Europe collector...")
|
|
608
|
+
collector = EuropeCollector()
|
|
609
|
+
collector.run()
|
|
610
|
+
else:
|
|
611
|
+
from collector import Collector
|
|
612
|
+
pid = Collector.get_pid()
|
|
613
|
+
if pid:
|
|
614
|
+
click.echo(f"Collector already running (PID {pid})")
|
|
615
|
+
return
|
|
616
|
+
click.echo("Starting collector...")
|
|
617
|
+
collector = Collector()
|
|
618
|
+
collector.run()
|
|
619
|
+
|
|
620
|
+
|
|
621
|
+
@cli.command()
|
|
622
|
+
@click.option("--europe", is_flag=True, help="Stop Europe-wide collector")
|
|
623
|
+
@click.option("--worldwide", "-w", is_flag=True, help="Stop worldwide collector")
|
|
624
|
+
def stop(europe, worldwide):
|
|
625
|
+
"""Stop the collector daemon."""
|
|
626
|
+
if worldwide:
|
|
627
|
+
pid = CLIWorldwideCollector.get_pid()
|
|
628
|
+
if not pid:
|
|
629
|
+
click.echo("Worldwide collector is not running")
|
|
630
|
+
return
|
|
631
|
+
click.echo(f"Stopping worldwide collector (PID {pid})...")
|
|
632
|
+
os.kill(pid, signal.SIGTERM)
|
|
633
|
+
click.echo("Stop signal sent")
|
|
634
|
+
elif europe:
|
|
635
|
+
from collector_europe import EuropeCollector
|
|
636
|
+
pid = EuropeCollector.get_pid()
|
|
637
|
+
if not pid:
|
|
638
|
+
click.echo("Europe collector is not running")
|
|
639
|
+
return
|
|
640
|
+
click.echo(f"Stopping Europe collector (PID {pid})...")
|
|
641
|
+
os.kill(pid, signal.SIGTERM)
|
|
642
|
+
click.echo("Stop signal sent")
|
|
643
|
+
else:
|
|
644
|
+
from collector import Collector
|
|
645
|
+
pid = Collector.get_pid()
|
|
646
|
+
if not pid:
|
|
647
|
+
click.echo("Collector is not running")
|
|
648
|
+
return
|
|
649
|
+
click.echo(f"Stopping collector (PID {pid})...")
|
|
650
|
+
os.kill(pid, signal.SIGTERM)
|
|
651
|
+
click.echo("Stop signal sent")
|
|
652
|
+
|
|
653
|
+
@cli.command()
|
|
654
|
+
@click.option("--all", "-a", "show_all", is_flag=True, help="Show all regional databases")
|
|
655
|
+
def status(show_all):
|
|
656
|
+
"""Show collector status and database summary."""
|
|
657
|
+
from collector import Collector
|
|
658
|
+
|
|
659
|
+
# Check all collector types
|
|
660
|
+
worldwide_pid = CLIWorldwideCollector.get_pid()
|
|
661
|
+
madrid_pid = Collector.get_pid()
|
|
662
|
+
|
|
663
|
+
click.echo("=== Collector Status ===")
|
|
664
|
+
click.echo(f"Worldwide: {'Running (PID ' + str(worldwide_pid) + ')' if worldwide_pid else 'Stopped'}")
|
|
665
|
+
click.echo(f"Madrid: {'Running (PID ' + str(madrid_pid) + ')' if madrid_pid else 'Stopped'}")
|
|
666
|
+
|
|
667
|
+
# Check for Europe collector
|
|
668
|
+
try:
|
|
669
|
+
from collector_europe import EuropeCollector
|
|
670
|
+
europe_pid = EuropeCollector.get_pid()
|
|
671
|
+
click.echo(f"Europe: {'Running (PID ' + str(europe_pid) + ')' if europe_pid else 'Stopped'}")
|
|
672
|
+
except ImportError:
|
|
673
|
+
pass
|
|
674
|
+
|
|
675
|
+
click.echo()
|
|
676
|
+
|
|
677
|
+
if show_all:
|
|
678
|
+
# Show all regional databases
|
|
679
|
+
click.echo("=== Regional Database Summary ===")
|
|
680
|
+
total_events = 0
|
|
681
|
+
all_users = set()
|
|
682
|
+
first_event = None
|
|
683
|
+
last_event = None
|
|
684
|
+
all_types = {}
|
|
685
|
+
|
|
686
|
+
dbs = get_all_dbs()
|
|
687
|
+
if not dbs:
|
|
688
|
+
click.echo("No regional databases found")
|
|
689
|
+
return
|
|
690
|
+
|
|
691
|
+
table = []
|
|
692
|
+
for region, db in dbs:
|
|
693
|
+
try:
|
|
694
|
+
row = db.execute("""
|
|
695
|
+
SELECT COUNT(*) as count,
|
|
696
|
+
COUNT(DISTINCT username) as users,
|
|
697
|
+
MIN(timestamp_utc) as first_event,
|
|
698
|
+
MAX(timestamp_utc) as last_event
|
|
699
|
+
FROM events
|
|
700
|
+
""").fetchone()
|
|
701
|
+
|
|
702
|
+
events = row["count"] or 0
|
|
703
|
+
users = row["users"] or 0
|
|
704
|
+
total_events += events
|
|
705
|
+
|
|
706
|
+
# Get unique users
|
|
707
|
+
user_rows = db.execute("SELECT DISTINCT username FROM events").fetchall()
|
|
708
|
+
for u in user_rows:
|
|
709
|
+
all_users.add(u["username"])
|
|
710
|
+
|
|
711
|
+
# Get event types
|
|
712
|
+
type_rows = db.execute("""
|
|
713
|
+
SELECT report_type, COUNT(*) as count FROM events GROUP BY report_type
|
|
714
|
+
""").fetchall()
|
|
715
|
+
for tr in type_rows:
|
|
716
|
+
t = tr["report_type"]
|
|
717
|
+
all_types[t] = all_types.get(t, 0) + tr["count"]
|
|
718
|
+
|
|
719
|
+
if row["first_event"]:
|
|
720
|
+
if first_event is None or row["first_event"] < first_event:
|
|
721
|
+
first_event = row["first_event"]
|
|
722
|
+
if row["last_event"]:
|
|
723
|
+
if last_event is None or row["last_event"] > last_event:
|
|
724
|
+
last_event = row["last_event"]
|
|
725
|
+
|
|
726
|
+
table.append([
|
|
727
|
+
region.upper(),
|
|
728
|
+
f"{events:,}",
|
|
729
|
+
f"{users:,}",
|
|
730
|
+
row["first_event"][:10] if row["first_event"] else "N/A",
|
|
731
|
+
row["last_event"][:10] if row["last_event"] else "N/A"
|
|
732
|
+
])
|
|
733
|
+
|
|
734
|
+
db.close()
|
|
735
|
+
except Exception as e:
|
|
736
|
+
table.append([region.upper(), "Error", str(e)[:30], "", ""])
|
|
737
|
+
|
|
738
|
+
click.echo(tabulate(table, headers=["Region", "Events", "Users", "First", "Last"]))
|
|
739
|
+
|
|
740
|
+
click.echo(f"\n=== Totals ===")
|
|
741
|
+
click.echo(f"Total events: {total_events:,}")
|
|
742
|
+
click.echo(f"Unique users: {len(all_users):,}")
|
|
743
|
+
if first_event and last_event:
|
|
744
|
+
click.echo(f"Time range: {first_event[:19]} -> {last_event[:19]}")
|
|
745
|
+
|
|
746
|
+
if all_types:
|
|
747
|
+
click.echo("\nBy type (all regions):")
|
|
748
|
+
for t, count in sorted(all_types.items(), key=lambda x: -x[1])[:10]:
|
|
749
|
+
pct = count / total_events * 100 if total_events else 0
|
|
750
|
+
click.echo(f" {t:12} {count:>8,} ({pct:.1f}%)")
|
|
751
|
+
else:
|
|
752
|
+
# Show default Madrid database (legacy behavior)
|
|
753
|
+
config = load_config()
|
|
754
|
+
click.echo(f"Database: {config['database_path']}")
|
|
755
|
+
click.echo(f"Polling interval: {config.get('polling_interval_seconds', 300)}s")
|
|
756
|
+
click.echo()
|
|
757
|
+
|
|
758
|
+
if os.path.exists(config["database_path"]):
|
|
759
|
+
from analysis import get_stats
|
|
760
|
+
db = get_db()
|
|
761
|
+
stats = get_stats(db)
|
|
762
|
+
|
|
763
|
+
click.echo(f"Total events: {stats['total_events']:,}")
|
|
764
|
+
click.echo(f"Unique users: {stats['unique_users']:,}")
|
|
765
|
+
|
|
766
|
+
if stats['first_event']:
|
|
767
|
+
click.echo(f"Time range: {stats['first_event'][:19]} -> {stats['last_event'][:19]}")
|
|
768
|
+
|
|
769
|
+
if stats['by_type']:
|
|
770
|
+
click.echo("\nBy type:")
|
|
771
|
+
for t, count in sorted(stats['by_type'].items(), key=lambda x: -x[1]):
|
|
772
|
+
pct = count / stats['total_events'] * 100 if stats['total_events'] else 0
|
|
773
|
+
click.echo(f" {t:12} {count:>6,} ({pct:.1f}%)")
|
|
774
|
+
|
|
775
|
+
db.close()
|
|
776
|
+
else:
|
|
777
|
+
click.echo("No data collected yet")
|
|
778
|
+
|
|
779
|
+
click.echo("\nTip: Use 'waze status --all' to see all regional databases")
|
|
780
|
+
|
|
781
|
+
# === Data Exploration Commands ===
|
|
782
|
+
|
|
783
|
+
@cli.command()
|
|
784
|
+
def stats():
|
|
785
|
+
"""Show summary statistics."""
|
|
786
|
+
from analysis import get_stats
|
|
787
|
+
|
|
788
|
+
db = get_db()
|
|
789
|
+
s = get_stats(db)
|
|
790
|
+
|
|
791
|
+
click.echo(f"Total events: {s['total_events']:,}")
|
|
792
|
+
click.echo(f"Unique users: {s['unique_users']:,}")
|
|
793
|
+
|
|
794
|
+
if s['first_event']:
|
|
795
|
+
click.echo(f"First event: {s['first_event'][:19]}")
|
|
796
|
+
click.echo(f"Last event: {s['last_event'][:19]}")
|
|
797
|
+
|
|
798
|
+
if s['by_type']:
|
|
799
|
+
click.echo("\nBy type:")
|
|
800
|
+
for t, count in sorted(s['by_type'].items(), key=lambda x: -x[1]):
|
|
801
|
+
pct = count / s['total_events'] * 100 if s['total_events'] else 0
|
|
802
|
+
click.echo(f" {t:12} {count:>6,} ({pct:.1f}%)")
|
|
803
|
+
|
|
804
|
+
db.close()
|
|
805
|
+
|
|
806
|
+
@cli.command()
|
|
807
|
+
@click.option("-n", "--limit", default=20, help="Number of events to show")
|
|
808
|
+
def recent(limit):
|
|
809
|
+
"""Show recent events."""
|
|
810
|
+
from analysis import get_recent_events
|
|
811
|
+
|
|
812
|
+
db = get_db()
|
|
813
|
+
events = get_recent_events(db, limit)
|
|
814
|
+
|
|
815
|
+
if not events:
|
|
816
|
+
click.echo("No events found")
|
|
817
|
+
return
|
|
818
|
+
|
|
819
|
+
table = []
|
|
820
|
+
for e in events:
|
|
821
|
+
table.append([
|
|
822
|
+
e["timestamp_utc"][:19],
|
|
823
|
+
e["username"][:20],
|
|
824
|
+
e["report_type"],
|
|
825
|
+
f"{e['latitude']:.4f}",
|
|
826
|
+
f"{e['longitude']:.4f}"
|
|
827
|
+
])
|
|
828
|
+
|
|
829
|
+
click.echo(tabulate(table, headers=["Time", "User", "Type", "Lat", "Lon"]))
|
|
830
|
+
db.close()
|
|
831
|
+
|
|
832
|
+
@cli.command()
|
|
833
|
+
@click.option("-u", "--username", help="Filter by username")
|
|
834
|
+
@click.option("-t", "--type", "report_type", help="Filter by report type")
|
|
835
|
+
@click.option("--since", help="Time filter (e.g., '2h', '1d')")
|
|
836
|
+
@click.option("-n", "--limit", default=50, help="Max results")
|
|
837
|
+
def search(username, report_type, since, limit):
|
|
838
|
+
"""Search events with filters."""
|
|
839
|
+
db = get_db()
|
|
840
|
+
|
|
841
|
+
query = "SELECT * FROM events WHERE 1=1"
|
|
842
|
+
params = []
|
|
843
|
+
|
|
844
|
+
if username:
|
|
845
|
+
query += " AND username = ?"
|
|
846
|
+
params.append(username)
|
|
847
|
+
|
|
848
|
+
if report_type:
|
|
849
|
+
query += " AND report_type = ?"
|
|
850
|
+
params.append(report_type.upper())
|
|
851
|
+
|
|
852
|
+
if since:
|
|
853
|
+
# Parse time filter
|
|
854
|
+
unit = since[-1]
|
|
855
|
+
value = int(since[:-1])
|
|
856
|
+
if unit == 'h':
|
|
857
|
+
delta = timedelta(hours=value)
|
|
858
|
+
elif unit == 'd':
|
|
859
|
+
delta = timedelta(days=value)
|
|
860
|
+
elif unit == 'm':
|
|
861
|
+
delta = timedelta(minutes=value)
|
|
862
|
+
else:
|
|
863
|
+
click.echo(f"Unknown time unit: {unit}")
|
|
864
|
+
return
|
|
865
|
+
|
|
866
|
+
cutoff = datetime.utcnow() - delta
|
|
867
|
+
query += " AND timestamp_utc >= ?"
|
|
868
|
+
params.append(cutoff.isoformat())
|
|
869
|
+
|
|
870
|
+
query += " ORDER BY timestamp_ms DESC LIMIT ?"
|
|
871
|
+
params.append(limit)
|
|
872
|
+
|
|
873
|
+
rows = db.execute(query, tuple(params)).fetchall()
|
|
874
|
+
|
|
875
|
+
if not rows:
|
|
876
|
+
click.echo("No events found")
|
|
877
|
+
return
|
|
878
|
+
|
|
879
|
+
table = []
|
|
880
|
+
for r in rows:
|
|
881
|
+
table.append([
|
|
882
|
+
r["timestamp_utc"][:19],
|
|
883
|
+
r["username"][:20],
|
|
884
|
+
r["report_type"],
|
|
885
|
+
f"{r['latitude']:.4f}",
|
|
886
|
+
f"{r['longitude']:.4f}"
|
|
887
|
+
])
|
|
888
|
+
|
|
889
|
+
click.echo(tabulate(table, headers=["Time", "User", "Type", "Lat", "Lon"]))
|
|
890
|
+
click.echo(f"\n{len(rows)} events found")
|
|
891
|
+
db.close()
|
|
892
|
+
|
|
893
|
+
# === User Analysis Commands ===
|
|
894
|
+
|
|
895
|
+
@cli.command()
|
|
896
|
+
@click.option("-n", "--limit", default=50, help="Number of users to show")
|
|
897
|
+
def users(limit):
|
|
898
|
+
"""List users with event counts."""
|
|
899
|
+
from analysis import get_users_summary
|
|
900
|
+
|
|
901
|
+
db = get_db()
|
|
902
|
+
user_list = get_users_summary(db, limit)
|
|
903
|
+
|
|
904
|
+
if not user_list:
|
|
905
|
+
click.echo("No users found")
|
|
906
|
+
return
|
|
907
|
+
|
|
908
|
+
table = []
|
|
909
|
+
for u in user_list:
|
|
910
|
+
table.append([
|
|
911
|
+
u["username"][:25],
|
|
912
|
+
u["event_count"],
|
|
913
|
+
u["first_seen"][:10],
|
|
914
|
+
u["last_seen"][:10]
|
|
915
|
+
])
|
|
916
|
+
|
|
917
|
+
click.echo(tabulate(table, headers=["Username", "Events", "First Seen", "Last Seen"]))
|
|
918
|
+
db.close()
|
|
919
|
+
|
|
920
|
+
@cli.command()
|
|
921
|
+
@click.argument("username")
|
|
922
|
+
def profile(username):
|
|
923
|
+
"""Show detailed profile for a user."""
|
|
924
|
+
from analysis import get_user_profile
|
|
925
|
+
|
|
926
|
+
db = get_db()
|
|
927
|
+
p = get_user_profile(db, username)
|
|
928
|
+
|
|
929
|
+
if not p:
|
|
930
|
+
click.echo(f"User '{username}' not found")
|
|
931
|
+
return
|
|
932
|
+
|
|
933
|
+
click.echo(f"User: {p['username']}")
|
|
934
|
+
click.echo(f"Events: {p['event_count']}")
|
|
935
|
+
click.echo(f"First seen: {p['first_seen'][:19]}")
|
|
936
|
+
click.echo(f"Last seen: {p['last_seen'][:19]}")
|
|
937
|
+
click.echo(f"Center location: {p['center_location']['lat']:.4f}, {p['center_location']['lon']:.4f}")
|
|
938
|
+
|
|
939
|
+
click.echo("\nReport types:")
|
|
940
|
+
for t, count in sorted(p['type_breakdown'].items(), key=lambda x: -x[1]):
|
|
941
|
+
click.echo(f" {t}: {count}")
|
|
942
|
+
|
|
943
|
+
click.echo("\nRecent events:")
|
|
944
|
+
table = []
|
|
945
|
+
for e in p['events'][-10:]:
|
|
946
|
+
table.append([
|
|
947
|
+
e["timestamp_utc"][:19],
|
|
948
|
+
e["report_type"],
|
|
949
|
+
f"{e['latitude']:.4f}, {e['longitude']:.4f}"
|
|
950
|
+
])
|
|
951
|
+
click.echo(tabulate(table, headers=["Time", "Type", "Location"]))
|
|
952
|
+
|
|
953
|
+
db.close()
|
|
954
|
+
|
|
955
|
+
# === Export Commands ===
|
|
956
|
+
|
|
957
|
+
@cli.command()
|
|
958
|
+
@click.option("--format", "fmt", type=click.Choice(["csv", "geojson"]), default="csv")
|
|
959
|
+
@click.option("-o", "--output", help="Output file path")
|
|
960
|
+
def export(fmt, output):
|
|
961
|
+
"""Export events to CSV or GeoJSON."""
|
|
962
|
+
import json
|
|
963
|
+
import csv
|
|
964
|
+
|
|
965
|
+
db = get_db()
|
|
966
|
+
rows = db.execute("SELECT * FROM events ORDER BY timestamp_ms").fetchall()
|
|
967
|
+
|
|
968
|
+
if not rows:
|
|
969
|
+
click.echo("No events to export")
|
|
970
|
+
return
|
|
971
|
+
|
|
972
|
+
if not output:
|
|
973
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
974
|
+
output = f"exports/events_{timestamp}.{fmt}"
|
|
975
|
+
|
|
976
|
+
os.makedirs(os.path.dirname(output) or ".", exist_ok=True)
|
|
977
|
+
|
|
978
|
+
if fmt == "csv":
|
|
979
|
+
with open(output, "w", newline="") as f:
|
|
980
|
+
writer = csv.writer(f)
|
|
981
|
+
writer.writerow(rows[0].keys())
|
|
982
|
+
for row in rows:
|
|
983
|
+
writer.writerow(tuple(row))
|
|
984
|
+
|
|
985
|
+
elif fmt == "geojson":
|
|
986
|
+
features = []
|
|
987
|
+
for row in rows:
|
|
988
|
+
features.append({
|
|
989
|
+
"type": "Feature",
|
|
990
|
+
"geometry": {
|
|
991
|
+
"type": "Point",
|
|
992
|
+
"coordinates": [row["longitude"], row["latitude"]]
|
|
993
|
+
},
|
|
994
|
+
"properties": {
|
|
995
|
+
"username": row["username"],
|
|
996
|
+
"timestamp": row["timestamp_utc"],
|
|
997
|
+
"type": row["report_type"],
|
|
998
|
+
"subtype": row["subtype"]
|
|
999
|
+
}
|
|
1000
|
+
})
|
|
1001
|
+
|
|
1002
|
+
geojson = {"type": "FeatureCollection", "features": features}
|
|
1003
|
+
with open(output, "w") as f:
|
|
1004
|
+
json.dump(geojson, f)
|
|
1005
|
+
|
|
1006
|
+
click.echo(f"Exported {len(rows)} events to {output}")
|
|
1007
|
+
db.close()
|
|
1008
|
+
|
|
1009
|
+
# === Config Commands ===
|
|
1010
|
+
|
|
1011
|
+
@cli.command()
|
|
1012
|
+
@click.option("--interval", type=int, help="Set polling interval in seconds")
|
|
1013
|
+
def config(interval):
|
|
1014
|
+
"""Show or modify configuration."""
|
|
1015
|
+
cfg = load_config()
|
|
1016
|
+
|
|
1017
|
+
if interval:
|
|
1018
|
+
cfg["polling_interval_seconds"] = interval
|
|
1019
|
+
with open("config.yaml", "w") as f:
|
|
1020
|
+
yaml.dump(cfg, f, default_flow_style=False)
|
|
1021
|
+
click.echo(f"Polling interval set to {interval} seconds")
|
|
1022
|
+
else:
|
|
1023
|
+
click.echo(yaml.dump(cfg, default_flow_style=False))
|
|
1024
|
+
|
|
1025
|
+
|
|
1026
|
+
# === Collection Stats Commands ===
|
|
1027
|
+
|
|
1028
|
+
@cli.command()
|
|
1029
|
+
@click.option("-n", "--days", default=7, help="Number of days to show")
|
|
1030
|
+
@click.option("--all", "-a", "show_all", is_flag=True, help="Show all regional databases")
|
|
1031
|
+
def daily(days, show_all):
|
|
1032
|
+
"""Show daily collection statistics."""
|
|
1033
|
+
if show_all:
|
|
1034
|
+
# Aggregate from all databases
|
|
1035
|
+
daily_stats = {}
|
|
1036
|
+
|
|
1037
|
+
for region, db in get_all_dbs():
|
|
1038
|
+
try:
|
|
1039
|
+
stats = db.get_daily_stats(days)
|
|
1040
|
+
for s in stats:
|
|
1041
|
+
date = s["date"]
|
|
1042
|
+
if date not in daily_stats:
|
|
1043
|
+
daily_stats[date] = {"events": 0, "users": set(), "requests": 0, "errors": 0}
|
|
1044
|
+
daily_stats[date]["events"] += s.get("events_collected", 0)
|
|
1045
|
+
daily_stats[date]["requests"] += s.get("api_requests", 0)
|
|
1046
|
+
daily_stats[date]["errors"] += s.get("api_errors", 0)
|
|
1047
|
+
|
|
1048
|
+
# Get unique users per day
|
|
1049
|
+
for date in daily_stats.keys():
|
|
1050
|
+
user_rows = db.execute("""
|
|
1051
|
+
SELECT DISTINCT username FROM events
|
|
1052
|
+
WHERE DATE(timestamp_utc) = ?
|
|
1053
|
+
""", (date,)).fetchall()
|
|
1054
|
+
for u in user_rows:
|
|
1055
|
+
daily_stats[date]["users"].add(u["username"])
|
|
1056
|
+
|
|
1057
|
+
db.close()
|
|
1058
|
+
except Exception:
|
|
1059
|
+
pass
|
|
1060
|
+
|
|
1061
|
+
if not daily_stats:
|
|
1062
|
+
click.echo("No daily stats recorded yet")
|
|
1063
|
+
return
|
|
1064
|
+
|
|
1065
|
+
table = []
|
|
1066
|
+
for date in sorted(daily_stats.keys(), reverse=True):
|
|
1067
|
+
s = daily_stats[date]
|
|
1068
|
+
table.append([
|
|
1069
|
+
date,
|
|
1070
|
+
f"{s['events']:,}",
|
|
1071
|
+
f"{len(s['users']):,}",
|
|
1072
|
+
f"{s['requests']:,}",
|
|
1073
|
+
f"{s['errors']:,}",
|
|
1074
|
+
])
|
|
1075
|
+
|
|
1076
|
+
click.echo("=== Worldwide Daily Statistics ===")
|
|
1077
|
+
click.echo(tabulate(table, headers=["Date", "Events", "Users", "Requests", "Errors"]))
|
|
1078
|
+
else:
|
|
1079
|
+
db = get_db()
|
|
1080
|
+
stats = db.get_daily_stats(days)
|
|
1081
|
+
|
|
1082
|
+
if not stats:
|
|
1083
|
+
click.echo("No daily stats recorded yet")
|
|
1084
|
+
click.echo("Tip: Use 'waze daily --all' to see all regional databases")
|
|
1085
|
+
return
|
|
1086
|
+
|
|
1087
|
+
table = []
|
|
1088
|
+
for s in stats:
|
|
1089
|
+
table.append([
|
|
1090
|
+
s["date"],
|
|
1091
|
+
f"{s['events_collected']:,}",
|
|
1092
|
+
f"{s['unique_users']:,}",
|
|
1093
|
+
f"{s['api_requests']:,}",
|
|
1094
|
+
f"{s['api_errors']:,}",
|
|
1095
|
+
])
|
|
1096
|
+
|
|
1097
|
+
click.echo(tabulate(table, headers=["Date", "Events", "Users", "Requests", "Errors"]))
|
|
1098
|
+
db.close()
|
|
1099
|
+
|
|
1100
|
+
|
|
1101
|
+
@cli.command()
|
|
1102
|
+
@click.option("--port", "-p", default=5000, help="Port to run the web UI on (default: 5000)")
|
|
1103
|
+
def web(port):
|
|
1104
|
+
"""Start the web visualization UI only (no collection).
|
|
1105
|
+
|
|
1106
|
+
Use this to view collected data without running the collector.
|
|
1107
|
+
"""
|
|
1108
|
+
click.echo(f"Starting web UI at http://localhost:{port}")
|
|
1109
|
+
click.echo("Press Ctrl+C to stop")
|
|
1110
|
+
|
|
1111
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'web'))
|
|
1112
|
+
from web.app import app
|
|
1113
|
+
app.run(host="0.0.0.0", port=port, debug=False, threaded=True)
|
|
1114
|
+
|
|
1115
|
+
|
|
1116
|
+
@cli.command()
|
|
1117
|
+
def summary():
|
|
1118
|
+
"""Show overall collection summary (all regions)."""
|
|
1119
|
+
total_events = 0
|
|
1120
|
+
all_users = set()
|
|
1121
|
+
days_collected = set()
|
|
1122
|
+
grid_cells = set()
|
|
1123
|
+
first_event = None
|
|
1124
|
+
last_event = None
|
|
1125
|
+
|
|
1126
|
+
dbs = get_all_dbs()
|
|
1127
|
+
if not dbs:
|
|
1128
|
+
click.echo("No data collected yet")
|
|
1129
|
+
return
|
|
1130
|
+
|
|
1131
|
+
region_stats = []
|
|
1132
|
+
|
|
1133
|
+
for region, db in dbs:
|
|
1134
|
+
try:
|
|
1135
|
+
s = db.get_collection_summary()
|
|
1136
|
+
if s and s.get('total_events'):
|
|
1137
|
+
total_events += s['total_events']
|
|
1138
|
+
|
|
1139
|
+
# Get unique users
|
|
1140
|
+
user_rows = db.execute("SELECT DISTINCT username FROM events").fetchall()
|
|
1141
|
+
for u in user_rows:
|
|
1142
|
+
all_users.add(u["username"])
|
|
1143
|
+
|
|
1144
|
+
# Track dates
|
|
1145
|
+
date_rows = db.execute("""
|
|
1146
|
+
SELECT DISTINCT DATE(timestamp_utc) as dt FROM events
|
|
1147
|
+
""").fetchall()
|
|
1148
|
+
for d in date_rows:
|
|
1149
|
+
if d["dt"]:
|
|
1150
|
+
days_collected.add(d["dt"])
|
|
1151
|
+
|
|
1152
|
+
# Track grid cells
|
|
1153
|
+
cell_rows = db.execute("""
|
|
1154
|
+
SELECT DISTINCT grid_cell FROM events WHERE grid_cell IS NOT NULL
|
|
1155
|
+
""").fetchall()
|
|
1156
|
+
for c in cell_rows:
|
|
1157
|
+
grid_cells.add(c["grid_cell"])
|
|
1158
|
+
|
|
1159
|
+
if s['first_event']:
|
|
1160
|
+
if first_event is None or s['first_event'] < first_event:
|
|
1161
|
+
first_event = s['first_event']
|
|
1162
|
+
if s['last_event']:
|
|
1163
|
+
if last_event is None or s['last_event'] > last_event:
|
|
1164
|
+
last_event = s['last_event']
|
|
1165
|
+
|
|
1166
|
+
region_stats.append((region, s['total_events'], s['unique_users']))
|
|
1167
|
+
|
|
1168
|
+
db.close()
|
|
1169
|
+
except Exception as e:
|
|
1170
|
+
click.echo(f"Error reading {region}: {e}")
|
|
1171
|
+
|
|
1172
|
+
if total_events == 0:
|
|
1173
|
+
click.echo("No data collected yet")
|
|
1174
|
+
return
|
|
1175
|
+
|
|
1176
|
+
click.echo("=== Worldwide Collection Summary ===")
|
|
1177
|
+
click.echo(f"Total events: {total_events:,}")
|
|
1178
|
+
click.echo(f"Unique users: {len(all_users):,}")
|
|
1179
|
+
click.echo(f"Days collected: {len(days_collected)}")
|
|
1180
|
+
click.echo(f"Grid cells used: {len(grid_cells)}")
|
|
1181
|
+
click.echo(f"First event: {first_event[:19] if first_event else 'N/A'}")
|
|
1182
|
+
click.echo(f"Last event: {last_event[:19] if last_event else 'N/A'}")
|
|
1183
|
+
|
|
1184
|
+
if len(days_collected) > 0:
|
|
1185
|
+
avg = total_events / len(days_collected)
|
|
1186
|
+
click.echo(f"Avg events/day: {avg:.1f}")
|
|
1187
|
+
|
|
1188
|
+
click.echo("\n=== By Region ===")
|
|
1189
|
+
for region, events, users in sorted(region_stats, key=lambda x: -x[1]):
|
|
1190
|
+
pct = events / total_events * 100 if total_events else 0
|
|
1191
|
+
click.echo(f" {region.upper():10} {events:>10,} events {users:>8,} users ({pct:.1f}%)")
|
|
1192
|
+
|
|
1193
|
+
|
|
1194
|
+
@cli.command()
|
|
1195
|
+
@click.option("-n", "--limit", default=20, help="Number of users to show")
|
|
1196
|
+
def tracked(limit):
|
|
1197
|
+
"""Show tracked users with most events."""
|
|
1198
|
+
db = get_db()
|
|
1199
|
+
users = db.get_tracked_users(limit)
|
|
1200
|
+
|
|
1201
|
+
if not users:
|
|
1202
|
+
click.echo("No tracked users yet")
|
|
1203
|
+
return
|
|
1204
|
+
|
|
1205
|
+
table = []
|
|
1206
|
+
for u in users:
|
|
1207
|
+
table.append([
|
|
1208
|
+
u["username"][:25],
|
|
1209
|
+
f"{u['event_count']:,}",
|
|
1210
|
+
u["first_seen"][:10],
|
|
1211
|
+
u["last_seen"][:10],
|
|
1212
|
+
])
|
|
1213
|
+
|
|
1214
|
+
click.echo(tabulate(table, headers=["Username", "Events", "First Seen", "Last Seen"]))
|
|
1215
|
+
db.close()
|
|
1216
|
+
|
|
1217
|
+
|
|
1218
|
+
if __name__ == "__main__":
|
|
1219
|
+
cli()
|