webtap-tool 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webtap-tool might be problematic. Click here for more details.
- webtap/__init__.py +4 -0
- webtap/api.py +50 -57
- webtap/app.py +5 -0
- webtap/cdp/session.py +166 -27
- webtap/commands/TIPS.md +125 -22
- webtap/commands/_builders.py +7 -1
- webtap/commands/_code_generation.py +110 -0
- webtap/commands/body.py +9 -5
- webtap/commands/connection.py +21 -0
- webtap/commands/javascript.py +13 -25
- webtap/commands/navigation.py +5 -0
- webtap/commands/quicktype.py +268 -0
- webtap/commands/to_model.py +23 -75
- webtap/services/body.py +209 -24
- webtap/services/dom.py +19 -12
- webtap/services/fetch.py +19 -0
- webtap/services/main.py +194 -2
- webtap/services/state_snapshot.py +88 -0
- {webtap_tool-0.7.0.dist-info → webtap_tool-0.8.0.dist-info}/METADATA +1 -1
- {webtap_tool-0.7.0.dist-info → webtap_tool-0.8.0.dist-info}/RECORD +22 -19
- {webtap_tool-0.7.0.dist-info → webtap_tool-0.8.0.dist-info}/WHEEL +0 -0
- {webtap_tool-0.7.0.dist-info → webtap_tool-0.8.0.dist-info}/entry_points.txt +0 -0
webtap/__init__.py
CHANGED
|
@@ -9,10 +9,14 @@ PUBLIC API:
|
|
|
9
9
|
- main: Entry point function for CLI
|
|
10
10
|
"""
|
|
11
11
|
|
|
12
|
+
import atexit
|
|
12
13
|
import sys
|
|
13
14
|
|
|
14
15
|
from webtap.app import app
|
|
15
16
|
|
|
17
|
+
# Register cleanup on exit to shutdown DB thread
|
|
18
|
+
atexit.register(lambda: app.state.cleanup() if hasattr(app, "state") and app.state else None)
|
|
19
|
+
|
|
16
20
|
|
|
17
21
|
def main():
|
|
18
22
|
"""Entry point for the WebTap REPL.
|
webtap/api.py
CHANGED
|
@@ -119,9 +119,6 @@ async def connect(request: ConnectRequest) -> Dict[str, Any]:
|
|
|
119
119
|
# Wrap blocking CDP calls (connect + enable domains) in thread
|
|
120
120
|
result = await asyncio.to_thread(app_state.service.connect_to_page, page_id=request.page_id)
|
|
121
121
|
|
|
122
|
-
# Broadcast state change
|
|
123
|
-
await broadcast_state()
|
|
124
|
-
|
|
125
122
|
return result
|
|
126
123
|
|
|
127
124
|
|
|
@@ -134,9 +131,6 @@ async def disconnect() -> Dict[str, Any]:
|
|
|
134
131
|
# Wrap blocking CDP calls (fetch.disable + disconnect) in thread
|
|
135
132
|
result = await asyncio.to_thread(app_state.service.disconnect)
|
|
136
133
|
|
|
137
|
-
# Broadcast state change
|
|
138
|
-
await broadcast_state()
|
|
139
|
-
|
|
140
134
|
return result
|
|
141
135
|
|
|
142
136
|
|
|
@@ -146,7 +140,10 @@ async def clear_events() -> Dict[str, Any]:
|
|
|
146
140
|
if not app_state:
|
|
147
141
|
return {"error": "WebTap not initialized"}
|
|
148
142
|
|
|
149
|
-
|
|
143
|
+
# Wrap blocking DB operation in thread
|
|
144
|
+
result = await asyncio.to_thread(app_state.service.clear_events)
|
|
145
|
+
|
|
146
|
+
return result
|
|
150
147
|
|
|
151
148
|
|
|
152
149
|
@api.post("/fetch")
|
|
@@ -164,7 +161,7 @@ async def set_fetch_interception(request: FetchRequest) -> Dict[str, Any]:
|
|
|
164
161
|
result = await asyncio.to_thread(app_state.service.fetch.disable)
|
|
165
162
|
|
|
166
163
|
# Broadcast state change
|
|
167
|
-
|
|
164
|
+
app_state.service._trigger_broadcast()
|
|
168
165
|
|
|
169
166
|
return result
|
|
170
167
|
|
|
@@ -200,7 +197,7 @@ async def toggle_filter_category(category: str) -> Dict[str, Any]:
|
|
|
200
197
|
fm.save()
|
|
201
198
|
|
|
202
199
|
# Broadcast state change
|
|
203
|
-
|
|
200
|
+
app_state.service._trigger_broadcast()
|
|
204
201
|
|
|
205
202
|
return {"category": category, "enabled": enabled, "total_enabled": len(fm.enabled_categories)}
|
|
206
203
|
|
|
@@ -216,7 +213,7 @@ async def enable_all_filters() -> Dict[str, Any]:
|
|
|
216
213
|
fm.save()
|
|
217
214
|
|
|
218
215
|
# Broadcast state change
|
|
219
|
-
|
|
216
|
+
app_state.service._trigger_broadcast()
|
|
220
217
|
|
|
221
218
|
return {"enabled": list(fm.enabled_categories), "total": len(fm.enabled_categories)}
|
|
222
219
|
|
|
@@ -232,7 +229,7 @@ async def disable_all_filters() -> Dict[str, Any]:
|
|
|
232
229
|
fm.save()
|
|
233
230
|
|
|
234
231
|
# Broadcast state change
|
|
235
|
-
|
|
232
|
+
app_state.service._trigger_broadcast()
|
|
236
233
|
|
|
237
234
|
return {"enabled": [], "total": 0}
|
|
238
235
|
|
|
@@ -249,9 +246,6 @@ async def start_inspect() -> Dict[str, Any]:
|
|
|
249
246
|
# Wrap blocking CDP calls (DOM.enable, CSS.enable, Overlay.enable, setInspectMode) in thread
|
|
250
247
|
result = await asyncio.to_thread(app_state.service.dom.start_inspect)
|
|
251
248
|
|
|
252
|
-
# Broadcast state change
|
|
253
|
-
await broadcast_state()
|
|
254
|
-
|
|
255
249
|
return result
|
|
256
250
|
|
|
257
251
|
|
|
@@ -264,9 +258,6 @@ async def stop_inspect() -> Dict[str, Any]:
|
|
|
264
258
|
# Wrap blocking CDP call (Overlay.setInspectMode) in thread
|
|
265
259
|
result = await asyncio.to_thread(app_state.service.dom.stop_inspect)
|
|
266
260
|
|
|
267
|
-
# Broadcast state change
|
|
268
|
-
await broadcast_state()
|
|
269
|
-
|
|
270
261
|
return result
|
|
271
262
|
|
|
272
263
|
|
|
@@ -278,9 +269,6 @@ async def clear_selections() -> Dict[str, Any]:
|
|
|
278
269
|
|
|
279
270
|
app_state.service.dom.clear_selections()
|
|
280
271
|
|
|
281
|
-
# Broadcast state change
|
|
282
|
-
await broadcast_state()
|
|
283
|
-
|
|
284
272
|
return {"success": True, "selections": {}}
|
|
285
273
|
|
|
286
274
|
|
|
@@ -293,7 +281,7 @@ async def dismiss_error() -> Dict[str, Any]:
|
|
|
293
281
|
app_state.error_state = None
|
|
294
282
|
|
|
295
283
|
# Broadcast state change
|
|
296
|
-
|
|
284
|
+
app_state.service._trigger_broadcast()
|
|
297
285
|
|
|
298
286
|
return {"success": True}
|
|
299
287
|
|
|
@@ -363,11 +351,11 @@ async def stream_events():
|
|
|
363
351
|
def get_full_state() -> Dict[str, Any]:
|
|
364
352
|
"""Get complete WebTap state for broadcasting.
|
|
365
353
|
|
|
366
|
-
|
|
367
|
-
|
|
354
|
+
Thread-safe, zero-lock reads from immutable snapshot.
|
|
355
|
+
No blocking I/O - returns cached snapshot immediately.
|
|
368
356
|
|
|
369
357
|
Returns:
|
|
370
|
-
Dictionary with all state information
|
|
358
|
+
Dictionary with all state information for SSE clients
|
|
371
359
|
"""
|
|
372
360
|
if not app_state:
|
|
373
361
|
return {
|
|
@@ -375,40 +363,35 @@ def get_full_state() -> Dict[str, Any]:
|
|
|
375
363
|
"events": {"total": 0},
|
|
376
364
|
"fetch": {"enabled": False, "paused_count": 0},
|
|
377
365
|
"filters": {"enabled": [], "disabled": []},
|
|
378
|
-
"browser": {"inspect_active": False, "selections": {}, "prompt": ""},
|
|
366
|
+
"browser": {"inspect_active": False, "selections": {}, "prompt": "", "pending_count": 0},
|
|
379
367
|
"error": None,
|
|
380
368
|
}
|
|
381
369
|
|
|
382
|
-
# Get
|
|
383
|
-
|
|
384
|
-
page_info = app_state.cdp.page_info or {}
|
|
385
|
-
|
|
386
|
-
# Get event counts
|
|
387
|
-
event_count = app_state.service.event_count
|
|
388
|
-
|
|
389
|
-
# Get fetch status
|
|
390
|
-
fetch_enabled = app_state.service.fetch.enabled
|
|
391
|
-
paused_count = app_state.service.fetch.paused_count if fetch_enabled else 0
|
|
392
|
-
|
|
393
|
-
# Get filter status
|
|
394
|
-
fm = app_state.service.filters
|
|
395
|
-
filter_categories = list(fm.filters.keys())
|
|
396
|
-
enabled_filters = list(fm.enabled_categories)
|
|
397
|
-
disabled_filters = [cat for cat in filter_categories if cat not in enabled_filters]
|
|
398
|
-
|
|
399
|
-
# Get browser/DOM state (includes pending_count for progress indicator)
|
|
400
|
-
browser_state = app_state.service.dom.get_state()
|
|
370
|
+
# Get immutable snapshot (NO LOCKS NEEDED - inherently thread-safe)
|
|
371
|
+
snapshot = app_state.service.get_state_snapshot()
|
|
401
372
|
|
|
373
|
+
# Convert snapshot to frontend format
|
|
402
374
|
return {
|
|
403
|
-
"connected": connected,
|
|
404
|
-
"page": {
|
|
405
|
-
|
|
375
|
+
"connected": snapshot.connected,
|
|
376
|
+
"page": {
|
|
377
|
+
"id": snapshot.page_id,
|
|
378
|
+
"title": snapshot.page_title,
|
|
379
|
+
"url": snapshot.page_url,
|
|
380
|
+
}
|
|
381
|
+
if snapshot.connected
|
|
382
|
+
else None,
|
|
383
|
+
"events": {"total": snapshot.event_count},
|
|
384
|
+
"fetch": {"enabled": snapshot.fetch_enabled, "paused_count": snapshot.paused_count},
|
|
385
|
+
"filters": {"enabled": list(snapshot.enabled_filters), "disabled": list(snapshot.disabled_filters)},
|
|
386
|
+
"browser": {
|
|
387
|
+
"inspect_active": snapshot.inspect_active,
|
|
388
|
+
"selections": snapshot.selections,
|
|
389
|
+
"prompt": snapshot.prompt,
|
|
390
|
+
"pending_count": snapshot.pending_count,
|
|
391
|
+
},
|
|
392
|
+
"error": {"message": snapshot.error_message, "timestamp": snapshot.error_timestamp}
|
|
393
|
+
if snapshot.error_message
|
|
406
394
|
else None,
|
|
407
|
-
"events": {"total": event_count},
|
|
408
|
-
"fetch": {"enabled": fetch_enabled, "paused_count": paused_count},
|
|
409
|
-
"filters": {"enabled": enabled_filters, "disabled": disabled_filters},
|
|
410
|
-
"browser": browser_state, # Contains inspect_active, selections, prompt, pending_count
|
|
411
|
-
"error": app_state.error_state, # Current error or None
|
|
412
395
|
}
|
|
413
396
|
|
|
414
397
|
|
|
@@ -429,7 +412,14 @@ async def broadcast_state():
|
|
|
429
412
|
try:
|
|
430
413
|
queue.put_nowait(state)
|
|
431
414
|
except asyncio.QueueFull:
|
|
432
|
-
|
|
415
|
+
# Client is falling behind - discard oldest state and retry with latest
|
|
416
|
+
logger.warning(f"SSE client queue full ({queue.qsize()}/{queue.maxsize}), discarding oldest state")
|
|
417
|
+
try:
|
|
418
|
+
queue.get_nowait() # Discard oldest
|
|
419
|
+
queue.put_nowait(state) # Retry with latest
|
|
420
|
+
except Exception as retry_err:
|
|
421
|
+
logger.debug(f"Failed to recover full queue: {retry_err}")
|
|
422
|
+
dead_queues.add(queue)
|
|
433
423
|
except Exception as e:
|
|
434
424
|
logger.debug(f"Failed to broadcast to client: {e}")
|
|
435
425
|
dead_queues.add(queue)
|
|
@@ -470,7 +460,9 @@ async def broadcast_processor():
|
|
|
470
460
|
async with _sse_clients_lock:
|
|
471
461
|
for queue in list(_sse_clients):
|
|
472
462
|
try:
|
|
473
|
-
|
|
463
|
+
queue.put_nowait(None) # Non-blocking shutdown signal
|
|
464
|
+
except asyncio.QueueFull:
|
|
465
|
+
pass # Client is hung, skip
|
|
474
466
|
except Exception:
|
|
475
467
|
pass
|
|
476
468
|
_sse_clients.clear()
|
|
@@ -521,11 +513,12 @@ def start_api_server(state, host: str = "127.0.0.1", port: int = 8765) -> thread
|
|
|
521
513
|
logger.error("Broadcast queue initialization timed out")
|
|
522
514
|
return thread
|
|
523
515
|
|
|
524
|
-
# Wire queue to
|
|
516
|
+
# Wire queue to service and CDP session after event loop starts
|
|
517
|
+
# Note: DOMService uses callback to service._trigger_broadcast instead of direct queue access
|
|
525
518
|
if _broadcast_queue and app_state:
|
|
526
|
-
app_state.service.
|
|
519
|
+
app_state.service.set_broadcast_queue(_broadcast_queue)
|
|
527
520
|
app_state.cdp.set_broadcast_queue(_broadcast_queue)
|
|
528
|
-
logger.info("Broadcast queue wired to
|
|
521
|
+
logger.info("Broadcast queue wired to WebTapService and CDPSession")
|
|
529
522
|
|
|
530
523
|
logger.info(f"API server started on http://{host}:{port}")
|
|
531
524
|
return thread
|
webtap/app.py
CHANGED
|
@@ -57,6 +57,10 @@ class WebTapState:
|
|
|
57
57
|
# Give server 1.5s to close SSE connections and shutdown gracefully
|
|
58
58
|
self.api_thread.join(timeout=1.5)
|
|
59
59
|
|
|
60
|
+
# Shutdown DB thread (this is the only place where DB thread should stop)
|
|
61
|
+
if hasattr(self, "cdp") and self.cdp:
|
|
62
|
+
self.cdp.cleanup()
|
|
63
|
+
|
|
60
64
|
|
|
61
65
|
# Must be created before command imports for decorator registration
|
|
62
66
|
app = App(
|
|
@@ -90,6 +94,7 @@ else:
|
|
|
90
94
|
from webtap.commands import fetch # noqa: E402, F401
|
|
91
95
|
from webtap.commands import body # noqa: E402, F401
|
|
92
96
|
from webtap.commands import to_model # noqa: E402, F401
|
|
97
|
+
from webtap.commands import quicktype # noqa: E402, F401
|
|
93
98
|
from webtap.commands import selections # noqa: E402, F401
|
|
94
99
|
from webtap.commands import server # noqa: E402, F401
|
|
95
100
|
from webtap.commands import setup # noqa: E402, F401
|
webtap/cdp/session.py
CHANGED
|
@@ -6,6 +6,7 @@ PUBLIC API:
|
|
|
6
6
|
|
|
7
7
|
import json
|
|
8
8
|
import logging
|
|
9
|
+
import queue
|
|
9
10
|
import threading
|
|
10
11
|
from concurrent.futures import Future, TimeoutError
|
|
11
12
|
from typing import Any
|
|
@@ -54,9 +55,18 @@ class CDPSession:
|
|
|
54
55
|
self._lock = threading.Lock()
|
|
55
56
|
|
|
56
57
|
# DuckDB storage - store events AS-IS
|
|
58
|
+
# DuckDB connections are NOT thread-safe - use dedicated DB thread
|
|
57
59
|
self.db = duckdb.connect(":memory:")
|
|
60
|
+
self._db_work_queue: queue.Queue = queue.Queue()
|
|
61
|
+
self._db_result_queues: dict[int, queue.Queue] = {}
|
|
62
|
+
self._db_running = True
|
|
58
63
|
|
|
59
|
-
|
|
64
|
+
# Start dedicated database thread
|
|
65
|
+
self._db_thread = threading.Thread(target=self._db_worker, daemon=True)
|
|
66
|
+
self._db_thread.start()
|
|
67
|
+
|
|
68
|
+
# Initialize schema via queue
|
|
69
|
+
self._db_execute("CREATE TABLE events (event JSON)", wait_result=False)
|
|
60
70
|
|
|
61
71
|
# Live field path lookup for fast discovery
|
|
62
72
|
# Maps lowercase field names to their full paths with original case
|
|
@@ -68,8 +78,81 @@ class CDPSession:
|
|
|
68
78
|
|
|
69
79
|
# Broadcast queue for SSE state updates (set by API server)
|
|
70
80
|
self._broadcast_queue: "Any | None" = None
|
|
71
|
-
|
|
72
|
-
|
|
81
|
+
|
|
82
|
+
# Disconnect callback for service-level cleanup
|
|
83
|
+
self._disconnect_callback: "Any | None" = None
|
|
84
|
+
|
|
85
|
+
def _db_worker(self) -> None:
|
|
86
|
+
"""Dedicated thread for all database operations.
|
|
87
|
+
|
|
88
|
+
Ensures thread safety by serializing all DuckDB access through one thread.
|
|
89
|
+
DuckDB connections are not thread-safe - sharing them causes malloc corruption.
|
|
90
|
+
"""
|
|
91
|
+
while self._db_running:
|
|
92
|
+
try:
|
|
93
|
+
task = self._db_work_queue.get(timeout=1)
|
|
94
|
+
|
|
95
|
+
if task is None: # Shutdown signal
|
|
96
|
+
break
|
|
97
|
+
|
|
98
|
+
operation_type, sql, params, result_queue_id = task
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
if operation_type == "execute":
|
|
102
|
+
result = self.db.execute(sql, params or [])
|
|
103
|
+
data = result.fetchall() if result else []
|
|
104
|
+
elif operation_type == "delete":
|
|
105
|
+
self.db.execute(sql, params or [])
|
|
106
|
+
data = None
|
|
107
|
+
else:
|
|
108
|
+
data = None
|
|
109
|
+
|
|
110
|
+
# Send result back if requested
|
|
111
|
+
if result_queue_id and result_queue_id in self._db_result_queues:
|
|
112
|
+
self._db_result_queues[result_queue_id].put(("success", data))
|
|
113
|
+
|
|
114
|
+
except Exception as e:
|
|
115
|
+
logger.error(f"Database error: {e}")
|
|
116
|
+
if result_queue_id and result_queue_id in self._db_result_queues:
|
|
117
|
+
self._db_result_queues[result_queue_id].put(("error", str(e)))
|
|
118
|
+
|
|
119
|
+
finally:
|
|
120
|
+
self._db_work_queue.task_done()
|
|
121
|
+
|
|
122
|
+
except queue.Empty:
|
|
123
|
+
continue
|
|
124
|
+
|
|
125
|
+
def _db_execute(self, sql: str, params: list | None = None, wait_result: bool = True) -> Any:
|
|
126
|
+
"""Submit database operation to dedicated thread.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
sql: SQL query or command
|
|
130
|
+
params: Optional query parameters
|
|
131
|
+
wait_result: Block until operation completes and return result
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
Query results if wait_result=True, None otherwise
|
|
135
|
+
"""
|
|
136
|
+
result_queue_id = None
|
|
137
|
+
result_queue = None
|
|
138
|
+
|
|
139
|
+
if wait_result:
|
|
140
|
+
result_queue_id = id(threading.current_thread())
|
|
141
|
+
result_queue = queue.Queue()
|
|
142
|
+
self._db_result_queues[result_queue_id] = result_queue
|
|
143
|
+
|
|
144
|
+
# Submit to work queue
|
|
145
|
+
self._db_work_queue.put(("execute", sql, params, result_queue_id))
|
|
146
|
+
|
|
147
|
+
if wait_result and result_queue and result_queue_id:
|
|
148
|
+
status, data = result_queue.get()
|
|
149
|
+
del self._db_result_queues[result_queue_id]
|
|
150
|
+
|
|
151
|
+
if status == "error":
|
|
152
|
+
raise RuntimeError(f"Database error: {data}")
|
|
153
|
+
return data
|
|
154
|
+
|
|
155
|
+
return None
|
|
73
156
|
|
|
74
157
|
def list_pages(self) -> list[dict]:
|
|
75
158
|
"""List available Chrome pages via HTTP API.
|
|
@@ -136,7 +219,7 @@ class CDPSession:
|
|
|
136
219
|
kwargs={
|
|
137
220
|
"ping_interval": 30, # Ping every 30s
|
|
138
221
|
"ping_timeout": 20, # Wait 20s for pong (increased from 10s for heavy CDP load)
|
|
139
|
-
|
|
222
|
+
# No auto-reconnect - make disconnects explicit
|
|
140
223
|
"skip_utf8_validation": True, # Faster
|
|
141
224
|
},
|
|
142
225
|
)
|
|
@@ -149,18 +232,46 @@ class CDPSession:
|
|
|
149
232
|
raise TimeoutError("Failed to connect to Chrome")
|
|
150
233
|
|
|
151
234
|
def disconnect(self) -> None:
|
|
152
|
-
"""Disconnect WebSocket and
|
|
153
|
-
|
|
154
|
-
|
|
235
|
+
"""Disconnect WebSocket while preserving events and DB thread.
|
|
236
|
+
|
|
237
|
+
Events and DB thread persist across connection cycles.
|
|
238
|
+
Use cleanup() on app exit to shutdown DB thread.
|
|
239
|
+
"""
|
|
240
|
+
# Atomically clear ws_app to signal manual disconnect
|
|
241
|
+
# This prevents _on_close from triggering service callback
|
|
242
|
+
with self._lock:
|
|
243
|
+
ws_app = self.ws_app
|
|
155
244
|
self.ws_app = None
|
|
156
245
|
|
|
246
|
+
if ws_app:
|
|
247
|
+
ws_app.close()
|
|
248
|
+
|
|
157
249
|
if self.ws_thread and self.ws_thread.is_alive():
|
|
158
250
|
self.ws_thread.join(timeout=2)
|
|
159
251
|
self.ws_thread = None
|
|
160
252
|
|
|
253
|
+
# Keep DB thread running - events preserved for reconnection
|
|
254
|
+
# DB cleanup happens in cleanup() on app exit only
|
|
255
|
+
|
|
161
256
|
self.connected.clear()
|
|
162
257
|
self.page_info = None
|
|
163
258
|
|
|
259
|
+
def cleanup(self) -> None:
|
|
260
|
+
"""Shutdown DB thread and disconnect (call on app exit only).
|
|
261
|
+
|
|
262
|
+
This is the only place where DB thread should be stopped.
|
|
263
|
+
Events are lost when DB thread stops (in-memory database).
|
|
264
|
+
"""
|
|
265
|
+
# Disconnect WebSocket if connected
|
|
266
|
+
if self.ws_app:
|
|
267
|
+
self.disconnect()
|
|
268
|
+
|
|
269
|
+
# Shutdown database thread
|
|
270
|
+
self._db_running = False
|
|
271
|
+
self._db_work_queue.put(None) # Signal shutdown
|
|
272
|
+
if self._db_thread.is_alive():
|
|
273
|
+
self._db_thread.join(timeout=2)
|
|
274
|
+
|
|
164
275
|
def send(self, method: str, params: dict | None = None) -> Future:
|
|
165
276
|
"""Send CDP command asynchronously.
|
|
166
277
|
|
|
@@ -245,7 +356,7 @@ class CDPSession:
|
|
|
245
356
|
|
|
246
357
|
# CDP event - store AS-IS in DuckDB and update field lookup
|
|
247
358
|
elif "method" in data:
|
|
248
|
-
self.
|
|
359
|
+
self._db_execute("INSERT INTO events VALUES (?)", [json.dumps(data)], wait_result=False)
|
|
249
360
|
self._update_field_lookup(data)
|
|
250
361
|
|
|
251
362
|
# Call registered event callbacks
|
|
@@ -263,15 +374,39 @@ class CDPSession:
|
|
|
263
374
|
|
|
264
375
|
def _on_close(self, ws, code, reason):
|
|
265
376
|
"""Handle WebSocket closure and cleanup."""
|
|
266
|
-
logger.info(f"WebSocket closed: {code} {reason}")
|
|
377
|
+
logger.info(f"WebSocket closed: code={code} reason={reason}")
|
|
378
|
+
|
|
379
|
+
# Mark as disconnected
|
|
380
|
+
was_connected = self.connected.is_set()
|
|
267
381
|
self.connected.clear()
|
|
268
382
|
|
|
269
|
-
# Fail pending commands
|
|
383
|
+
# Fail pending commands and check if this is unexpected disconnect
|
|
384
|
+
is_unexpected = False
|
|
270
385
|
with self._lock:
|
|
271
386
|
for future in self._pending.values():
|
|
272
|
-
future.set_exception(RuntimeError("Connection closed"))
|
|
387
|
+
future.set_exception(RuntimeError(f"Connection closed: {reason or 'Unknown'}"))
|
|
273
388
|
self._pending.clear()
|
|
274
389
|
|
|
390
|
+
# Unexpected disconnect: was connected and ws_app still set (not manual disconnect)
|
|
391
|
+
is_unexpected = was_connected and self.ws_app is not None
|
|
392
|
+
|
|
393
|
+
# Clear state to allow reconnection (DB thread and events preserved)
|
|
394
|
+
self.ws_app = None
|
|
395
|
+
self.page_info = None
|
|
396
|
+
|
|
397
|
+
# Trigger service-level cleanup if this was unexpected
|
|
398
|
+
if is_unexpected and self._disconnect_callback:
|
|
399
|
+
try:
|
|
400
|
+
# Call in background to avoid blocking WebSocket thread
|
|
401
|
+
threading.Thread(
|
|
402
|
+
target=self._disconnect_callback, args=(code, reason), daemon=True, name="cdp-disconnect-handler"
|
|
403
|
+
).start()
|
|
404
|
+
except Exception as e:
|
|
405
|
+
logger.error(f"Error calling disconnect callback: {e}")
|
|
406
|
+
|
|
407
|
+
# Trigger SSE broadcast immediately
|
|
408
|
+
self._trigger_state_broadcast()
|
|
409
|
+
|
|
275
410
|
def _extract_paths(self, obj, parent_key=""):
|
|
276
411
|
"""Extract all JSON paths from nested dict structure.
|
|
277
412
|
|
|
@@ -332,7 +467,7 @@ class CDPSession:
|
|
|
332
467
|
|
|
333
468
|
def clear_events(self) -> None:
|
|
334
469
|
"""Clear all stored events and reset field lookup."""
|
|
335
|
-
self.
|
|
470
|
+
self._db_execute("DELETE FROM events", wait_result=False)
|
|
336
471
|
self.field_paths.clear()
|
|
337
472
|
|
|
338
473
|
def query(self, sql: str, params: list | None = None) -> list:
|
|
@@ -352,8 +487,7 @@ class CDPSession:
|
|
|
352
487
|
query("SELECT * FROM events WHERE json_extract_string(event, '$.method') = 'Network.responseReceived'")
|
|
353
488
|
query("SELECT json_extract_string(event, '$.params.request.url') as url FROM events")
|
|
354
489
|
"""
|
|
355
|
-
|
|
356
|
-
return result.fetchall() if result else []
|
|
490
|
+
return self._db_execute(sql, params)
|
|
357
491
|
|
|
358
492
|
def fetch_body(self, request_id: str) -> dict | None:
|
|
359
493
|
"""Fetch response body via Network.getResponseBody CDP call.
|
|
@@ -379,6 +513,18 @@ class CDPSession:
|
|
|
379
513
|
"""
|
|
380
514
|
return self.connected.is_set()
|
|
381
515
|
|
|
516
|
+
def set_disconnect_callback(self, callback) -> None:
|
|
517
|
+
"""Register callback for unexpected disconnect events.
|
|
518
|
+
|
|
519
|
+
Called when WebSocket closes externally (tab close, crash, etc).
|
|
520
|
+
NOT called on manual disconnect() to avoid duplicate cleanup.
|
|
521
|
+
|
|
522
|
+
Args:
|
|
523
|
+
callback: Function called with (code: int, reason: str)
|
|
524
|
+
"""
|
|
525
|
+
self._disconnect_callback = callback
|
|
526
|
+
logger.debug("Disconnect callback registered")
|
|
527
|
+
|
|
382
528
|
def register_event_callback(self, method: str, callback) -> None:
|
|
383
529
|
"""Register callback for specific CDP event.
|
|
384
530
|
|
|
@@ -445,21 +591,14 @@ class CDPSession:
|
|
|
445
591
|
logger.debug("Broadcast queue set on CDPSession")
|
|
446
592
|
|
|
447
593
|
def _trigger_state_broadcast(self) -> None:
|
|
448
|
-
"""Trigger SSE broadcast
|
|
594
|
+
"""Trigger SSE broadcast immediately.
|
|
449
595
|
|
|
450
|
-
Called after CDP events are stored.
|
|
451
|
-
to avoid overwhelming SSE clients during heavy network activity.
|
|
596
|
+
Called after CDP events are stored. Queue naturally buffers rapid-fire events.
|
|
452
597
|
"""
|
|
453
598
|
if not self._broadcast_queue:
|
|
454
599
|
return
|
|
455
600
|
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
self._last_broadcast_time = now
|
|
461
|
-
try:
|
|
462
|
-
self._broadcast_queue.put_nowait({"type": "cdp_event"})
|
|
463
|
-
logger.debug("State broadcast triggered")
|
|
464
|
-
except Exception as e:
|
|
465
|
-
logger.debug(f"Failed to queue broadcast: {e}")
|
|
601
|
+
try:
|
|
602
|
+
self._broadcast_queue.put_nowait({"type": "cdp_event"})
|
|
603
|
+
except Exception as e:
|
|
604
|
+
logger.debug(f"Failed to queue broadcast: {e}")
|