webtap-tool 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- webtap/VISION.md +246 -0
- webtap/__init__.py +84 -0
- webtap/__main__.py +6 -0
- webtap/api/__init__.py +9 -0
- webtap/api/app.py +26 -0
- webtap/api/models.py +69 -0
- webtap/api/server.py +111 -0
- webtap/api/sse.py +182 -0
- webtap/api/state.py +89 -0
- webtap/app.py +79 -0
- webtap/cdp/README.md +275 -0
- webtap/cdp/__init__.py +12 -0
- webtap/cdp/har.py +302 -0
- webtap/cdp/schema/README.md +41 -0
- webtap/cdp/schema/cdp_protocol.json +32785 -0
- webtap/cdp/schema/cdp_version.json +8 -0
- webtap/cdp/session.py +667 -0
- webtap/client.py +81 -0
- webtap/commands/DEVELOPER_GUIDE.md +401 -0
- webtap/commands/TIPS.md +269 -0
- webtap/commands/__init__.py +29 -0
- webtap/commands/_builders.py +331 -0
- webtap/commands/_code_generation.py +110 -0
- webtap/commands/_tips.py +147 -0
- webtap/commands/_utils.py +273 -0
- webtap/commands/connection.py +220 -0
- webtap/commands/console.py +87 -0
- webtap/commands/fetch.py +310 -0
- webtap/commands/filters.py +116 -0
- webtap/commands/javascript.py +73 -0
- webtap/commands/js_export.py +73 -0
- webtap/commands/launch.py +72 -0
- webtap/commands/navigation.py +197 -0
- webtap/commands/network.py +136 -0
- webtap/commands/quicktype.py +306 -0
- webtap/commands/request.py +93 -0
- webtap/commands/selections.py +138 -0
- webtap/commands/setup.py +219 -0
- webtap/commands/to_model.py +163 -0
- webtap/daemon.py +185 -0
- webtap/daemon_state.py +53 -0
- webtap/filters.py +219 -0
- webtap/rpc/__init__.py +14 -0
- webtap/rpc/errors.py +49 -0
- webtap/rpc/framework.py +223 -0
- webtap/rpc/handlers.py +625 -0
- webtap/rpc/machine.py +84 -0
- webtap/services/README.md +83 -0
- webtap/services/__init__.py +15 -0
- webtap/services/console.py +124 -0
- webtap/services/dom.py +547 -0
- webtap/services/fetch.py +415 -0
- webtap/services/main.py +392 -0
- webtap/services/network.py +401 -0
- webtap/services/setup/__init__.py +185 -0
- webtap/services/setup/chrome.py +233 -0
- webtap/services/setup/desktop.py +255 -0
- webtap/services/setup/extension.py +147 -0
- webtap/services/setup/platform.py +162 -0
- webtap/services/state_snapshot.py +86 -0
- webtap_tool-0.11.0.dist-info/METADATA +535 -0
- webtap_tool-0.11.0.dist-info/RECORD +64 -0
- webtap_tool-0.11.0.dist-info/WHEEL +4 -0
- webtap_tool-0.11.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
{
|
|
2
|
+
"Browser": "Chrome/139.0.7258.138",
|
|
3
|
+
"Protocol-Version": "1.3",
|
|
4
|
+
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36",
|
|
5
|
+
"V8-Version": "13.9.205.20",
|
|
6
|
+
"WebKit-Version": "537.36 (@884e54ea8d42947ed636779015c5b4815e069838)",
|
|
7
|
+
"webSocketDebuggerUrl": "ws://localhost:9222/devtools/browser/e2c22d46-fafc-483e-a512-caccea649b20"
|
|
8
|
+
}
|
webtap/cdp/session.py
ADDED
|
@@ -0,0 +1,667 @@
|
|
|
1
|
+
"""CDP Session with native event storage."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import queue
|
|
6
|
+
import threading
|
|
7
|
+
from concurrent.futures import Future, TimeoutError
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import duckdb
|
|
11
|
+
import requests
|
|
12
|
+
import websocket
|
|
13
|
+
|
|
14
|
+
from webtap.cdp.har import create_har_views
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
# Event storage limits
|
|
19
|
+
MAX_EVENTS = 50_000 # FIFO eviction threshold
|
|
20
|
+
PRUNE_BATCH_SIZE = 5_000 # Delete in batches for efficiency
|
|
21
|
+
PRUNE_CHECK_INTERVAL = 1_000 # Check count every N events
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class CDPSession:
|
|
25
|
+
"""WebSocket-based CDP client with native event storage.
|
|
26
|
+
|
|
27
|
+
Stores CDP events as-is in DuckDB for minimal overhead and maximum flexibility.
|
|
28
|
+
Provides field discovery and query capabilities for dynamic data exploration.
|
|
29
|
+
|
|
30
|
+
Attributes:
|
|
31
|
+
port: Chrome debugging port.
|
|
32
|
+
timeout: Default timeout for execute() calls.
|
|
33
|
+
db: DuckDB connection for event storage.
|
|
34
|
+
field_paths: Live field lookup for query building.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(self, port: int = 9222, timeout: float = 30):
|
|
38
|
+
"""Initialize CDP session with WebSocket and DuckDB storage.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
port: Chrome debugging port. Defaults to 9222.
|
|
42
|
+
timeout: Default timeout for execute() calls. Defaults to 30.
|
|
43
|
+
"""
|
|
44
|
+
self.port = port
|
|
45
|
+
self.timeout = timeout
|
|
46
|
+
|
|
47
|
+
# WebSocketApp instance
|
|
48
|
+
self.ws_app: websocket.WebSocketApp | None = None
|
|
49
|
+
self.ws_thread: threading.Thread | None = None
|
|
50
|
+
|
|
51
|
+
# Connection state
|
|
52
|
+
self.connected = threading.Event()
|
|
53
|
+
self.page_info: dict | None = None
|
|
54
|
+
|
|
55
|
+
# CDP request/response tracking
|
|
56
|
+
self._next_id = 1
|
|
57
|
+
self._pending: dict[int, Future] = {}
|
|
58
|
+
self._lock = threading.Lock()
|
|
59
|
+
|
|
60
|
+
# DuckDB storage - store events AS-IS
|
|
61
|
+
# DuckDB connections are NOT thread-safe - use dedicated DB thread
|
|
62
|
+
self.db = duckdb.connect(":memory:")
|
|
63
|
+
self._db_work_queue: queue.Queue = queue.Queue()
|
|
64
|
+
self._db_result_queues: dict[int, queue.Queue] = {}
|
|
65
|
+
self._db_running = True
|
|
66
|
+
|
|
67
|
+
# Start dedicated database thread
|
|
68
|
+
self._db_thread = threading.Thread(target=self._db_worker, daemon=True)
|
|
69
|
+
self._db_thread.start()
|
|
70
|
+
|
|
71
|
+
# Initialize schema with method column for fast filtering
|
|
72
|
+
# Must wait for table to exist before any queries can run
|
|
73
|
+
self._db_execute(
|
|
74
|
+
"CREATE TABLE IF NOT EXISTS events (event JSON, method VARCHAR)",
|
|
75
|
+
wait_result=True,
|
|
76
|
+
)
|
|
77
|
+
self._db_execute(
|
|
78
|
+
"CREATE INDEX IF NOT EXISTS idx_events_method ON events(method)",
|
|
79
|
+
wait_result=True,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# Create HAR views for aggregated network request data
|
|
83
|
+
create_har_views(self._db_execute)
|
|
84
|
+
|
|
85
|
+
# Event count for pruning (approximate, updated periodically)
|
|
86
|
+
self._event_count = 0
|
|
87
|
+
|
|
88
|
+
# Live field path lookup for fast discovery
|
|
89
|
+
# Maps lowercase field names to their full paths with original case
|
|
90
|
+
self.field_paths: dict[str, set[str]] = {}
|
|
91
|
+
|
|
92
|
+
# Event callbacks for real-time handling
|
|
93
|
+
# Maps event method (e.g. "Overlay.inspectNodeRequested") to list of callbacks
|
|
94
|
+
self._event_callbacks: dict[str, list] = {}
|
|
95
|
+
|
|
96
|
+
# Broadcast callback for SSE state updates (set by service)
|
|
97
|
+
self._broadcast_callback: "Any | None" = None
|
|
98
|
+
|
|
99
|
+
# Disconnect callback for service-level cleanup
|
|
100
|
+
self._disconnect_callback: "Any | None" = None
|
|
101
|
+
|
|
102
|
+
def _db_worker(self) -> None:
|
|
103
|
+
"""Dedicated thread for all database operations.
|
|
104
|
+
|
|
105
|
+
Ensures thread safety by serializing all DuckDB access through one thread.
|
|
106
|
+
DuckDB connections are not thread-safe - sharing them causes malloc corruption.
|
|
107
|
+
"""
|
|
108
|
+
while self._db_running:
|
|
109
|
+
try:
|
|
110
|
+
task = self._db_work_queue.get(timeout=1)
|
|
111
|
+
|
|
112
|
+
if task is None: # Shutdown signal
|
|
113
|
+
break
|
|
114
|
+
|
|
115
|
+
operation_type, sql, params, result_queue_id = task
|
|
116
|
+
|
|
117
|
+
try:
|
|
118
|
+
if operation_type == "execute":
|
|
119
|
+
result = self.db.execute(sql, params or [])
|
|
120
|
+
data = result.fetchall() if result else []
|
|
121
|
+
elif operation_type == "delete":
|
|
122
|
+
self.db.execute(sql, params or [])
|
|
123
|
+
data = None
|
|
124
|
+
else:
|
|
125
|
+
data = None
|
|
126
|
+
|
|
127
|
+
# Send result back if requested
|
|
128
|
+
if result_queue_id and result_queue_id in self._db_result_queues:
|
|
129
|
+
self._db_result_queues[result_queue_id].put(("success", data))
|
|
130
|
+
|
|
131
|
+
except Exception as e:
|
|
132
|
+
logger.error(f"Database error: {e}")
|
|
133
|
+
if result_queue_id and result_queue_id in self._db_result_queues:
|
|
134
|
+
self._db_result_queues[result_queue_id].put(("error", str(e)))
|
|
135
|
+
|
|
136
|
+
finally:
|
|
137
|
+
self._db_work_queue.task_done()
|
|
138
|
+
|
|
139
|
+
except queue.Empty:
|
|
140
|
+
continue
|
|
141
|
+
|
|
142
|
+
def _db_execute(self, sql: str, params: list | None = None, wait_result: bool = True) -> Any:
|
|
143
|
+
"""Submit database operation to dedicated thread.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
sql: SQL query or command
|
|
147
|
+
params: Optional query parameters
|
|
148
|
+
wait_result: Block until operation completes and return result
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
Query results if wait_result=True, None otherwise
|
|
152
|
+
|
|
153
|
+
Raises:
|
|
154
|
+
TimeoutError: If operation takes longer than 30 seconds
|
|
155
|
+
RuntimeError: If database operation fails
|
|
156
|
+
"""
|
|
157
|
+
result_queue_id = None
|
|
158
|
+
result_queue = None
|
|
159
|
+
|
|
160
|
+
try:
|
|
161
|
+
if wait_result:
|
|
162
|
+
result_queue_id = id(threading.current_thread())
|
|
163
|
+
result_queue = queue.Queue()
|
|
164
|
+
self._db_result_queues[result_queue_id] = result_queue
|
|
165
|
+
|
|
166
|
+
# Submit to work queue
|
|
167
|
+
self._db_work_queue.put(("execute", sql, params, result_queue_id))
|
|
168
|
+
|
|
169
|
+
if wait_result and result_queue and result_queue_id:
|
|
170
|
+
try:
|
|
171
|
+
status, data = result_queue.get(timeout=30)
|
|
172
|
+
except queue.Empty:
|
|
173
|
+
raise TimeoutError(f"Database operation timed out: {sql[:50]}...")
|
|
174
|
+
|
|
175
|
+
if status == "error":
|
|
176
|
+
raise RuntimeError(f"Database error: {data}")
|
|
177
|
+
return data
|
|
178
|
+
|
|
179
|
+
return None
|
|
180
|
+
finally:
|
|
181
|
+
# Always clean up result queue entry to prevent leaks
|
|
182
|
+
if result_queue_id and result_queue_id in self._db_result_queues:
|
|
183
|
+
del self._db_result_queues[result_queue_id]
|
|
184
|
+
|
|
185
|
+
def list_pages(self) -> list[dict]:
|
|
186
|
+
"""List available Chrome pages via HTTP API.
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
List of page dictionaries with webSocketDebuggerUrl.
|
|
190
|
+
"""
|
|
191
|
+
try:
|
|
192
|
+
resp = requests.get(f"http://localhost:{self.port}/json", timeout=2)
|
|
193
|
+
resp.raise_for_status()
|
|
194
|
+
pages = resp.json()
|
|
195
|
+
return [p for p in pages if p.get("type") == "page" and "webSocketDebuggerUrl" in p]
|
|
196
|
+
except Exception as e:
|
|
197
|
+
logger.error(f"Failed to list pages: {e}")
|
|
198
|
+
return []
|
|
199
|
+
|
|
200
|
+
def connect(self, page_index: int | None = None, page_id: str | None = None) -> None:
|
|
201
|
+
"""Connect to Chrome page via WebSocket.
|
|
202
|
+
|
|
203
|
+
Establishes WebSocket connection and starts event collection.
|
|
204
|
+
Does not auto-enable CDP domains - use execute() for that.
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
page_index: Index of page to connect to. Defaults to 0.
|
|
208
|
+
page_id: Stable page ID across tab reordering.
|
|
209
|
+
|
|
210
|
+
Raises:
|
|
211
|
+
RuntimeError: If already connected or no pages available.
|
|
212
|
+
ValueError: If page_id not found.
|
|
213
|
+
IndexError: If page_index out of range.
|
|
214
|
+
TimeoutError: If connection fails within 5 seconds.
|
|
215
|
+
"""
|
|
216
|
+
if self.ws_app:
|
|
217
|
+
raise RuntimeError("Already connected")
|
|
218
|
+
|
|
219
|
+
pages = self.list_pages()
|
|
220
|
+
if not pages:
|
|
221
|
+
raise RuntimeError("No pages available")
|
|
222
|
+
|
|
223
|
+
# Find the page by ID or index
|
|
224
|
+
if page_id:
|
|
225
|
+
page = next((p for p in pages if p.get("id") == page_id), None)
|
|
226
|
+
if not page:
|
|
227
|
+
raise ValueError(f"Page with ID {page_id} not found")
|
|
228
|
+
elif page_index is not None:
|
|
229
|
+
if page_index >= len(pages):
|
|
230
|
+
raise IndexError(f"Page {page_index} out of range")
|
|
231
|
+
page = pages[page_index]
|
|
232
|
+
else:
|
|
233
|
+
# Default to first page
|
|
234
|
+
page = pages[0]
|
|
235
|
+
|
|
236
|
+
ws_url = page["webSocketDebuggerUrl"]
|
|
237
|
+
self.page_info = page
|
|
238
|
+
|
|
239
|
+
# Create WebSocketApp with callbacks
|
|
240
|
+
self.ws_app = websocket.WebSocketApp(
|
|
241
|
+
ws_url, on_open=self._on_open, on_message=self._on_message, on_error=self._on_error, on_close=self._on_close
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
# Let WebSocketApp handle everything in a thread
|
|
245
|
+
self.ws_thread = threading.Thread(
|
|
246
|
+
target=self.ws_app.run_forever,
|
|
247
|
+
kwargs={
|
|
248
|
+
"ping_interval": 30, # Ping every 30s
|
|
249
|
+
"ping_timeout": 20, # Wait 20s for pong (increased from 10s for heavy CDP load)
|
|
250
|
+
# No auto-reconnect - make disconnects explicit
|
|
251
|
+
"skip_utf8_validation": True, # Faster
|
|
252
|
+
},
|
|
253
|
+
)
|
|
254
|
+
self.ws_thread.daemon = True
|
|
255
|
+
self.ws_thread.start()
|
|
256
|
+
|
|
257
|
+
# Wait for connection
|
|
258
|
+
if not self.connected.wait(timeout=5):
|
|
259
|
+
self.disconnect()
|
|
260
|
+
raise TimeoutError("Failed to connect to Chrome")
|
|
261
|
+
|
|
262
|
+
def disconnect(self) -> None:
|
|
263
|
+
"""Disconnect WebSocket while preserving events and DB thread.
|
|
264
|
+
|
|
265
|
+
Events and DB thread persist across connection cycles.
|
|
266
|
+
Use cleanup() on app exit to shutdown DB thread.
|
|
267
|
+
"""
|
|
268
|
+
# Atomically clear ws_app to signal manual disconnect
|
|
269
|
+
# This prevents _on_close from triggering service callback
|
|
270
|
+
with self._lock:
|
|
271
|
+
ws_app = self.ws_app
|
|
272
|
+
self.ws_app = None
|
|
273
|
+
|
|
274
|
+
if ws_app:
|
|
275
|
+
ws_app.close()
|
|
276
|
+
|
|
277
|
+
if self.ws_thread and self.ws_thread.is_alive():
|
|
278
|
+
self.ws_thread.join(timeout=2)
|
|
279
|
+
self.ws_thread = None
|
|
280
|
+
|
|
281
|
+
# Keep DB thread running - events preserved for reconnection
|
|
282
|
+
# DB cleanup happens in cleanup() on app exit only
|
|
283
|
+
|
|
284
|
+
self.connected.clear()
|
|
285
|
+
self.page_info = None
|
|
286
|
+
|
|
287
|
+
def cleanup(self) -> None:
|
|
288
|
+
"""Shutdown DB thread and disconnect (call on app exit only).
|
|
289
|
+
|
|
290
|
+
This is the only place where DB thread should be stopped.
|
|
291
|
+
Events are lost when DB thread stops (in-memory database).
|
|
292
|
+
"""
|
|
293
|
+
# Disconnect WebSocket if connected
|
|
294
|
+
if self.ws_app:
|
|
295
|
+
self.disconnect()
|
|
296
|
+
|
|
297
|
+
# Shutdown database thread
|
|
298
|
+
self._db_running = False
|
|
299
|
+
self._db_work_queue.put(None) # Signal shutdown
|
|
300
|
+
if self._db_thread.is_alive():
|
|
301
|
+
self._db_thread.join(timeout=2)
|
|
302
|
+
|
|
303
|
+
def send(self, method: str, params: dict | None = None) -> Future:
|
|
304
|
+
"""Send CDP command asynchronously.
|
|
305
|
+
|
|
306
|
+
Args:
|
|
307
|
+
method: CDP method like "Page.navigate" or "Network.enable".
|
|
308
|
+
params: Optional command parameters.
|
|
309
|
+
|
|
310
|
+
Returns:
|
|
311
|
+
Future containing CDP response 'result' field.
|
|
312
|
+
|
|
313
|
+
Raises:
|
|
314
|
+
RuntimeError: If not connected to Chrome.
|
|
315
|
+
"""
|
|
316
|
+
if not self.ws_app:
|
|
317
|
+
raise RuntimeError("Not connected")
|
|
318
|
+
|
|
319
|
+
with self._lock:
|
|
320
|
+
msg_id = self._next_id
|
|
321
|
+
self._next_id += 1
|
|
322
|
+
|
|
323
|
+
future = Future()
|
|
324
|
+
self._pending[msg_id] = future
|
|
325
|
+
|
|
326
|
+
# Send CDP command
|
|
327
|
+
message = {"id": msg_id, "method": method}
|
|
328
|
+
if params:
|
|
329
|
+
message["params"] = params
|
|
330
|
+
|
|
331
|
+
self.ws_app.send(json.dumps(message))
|
|
332
|
+
|
|
333
|
+
return future
|
|
334
|
+
|
|
335
|
+
def execute(self, method: str, params: dict | None = None, timeout: float | None = None) -> Any:
|
|
336
|
+
"""Send CDP command synchronously.
|
|
337
|
+
|
|
338
|
+
Args:
|
|
339
|
+
method: CDP method like "Page.navigate" or "Network.enable".
|
|
340
|
+
params: Optional command parameters.
|
|
341
|
+
timeout: Override default timeout.
|
|
342
|
+
|
|
343
|
+
Returns:
|
|
344
|
+
CDP response 'result' field.
|
|
345
|
+
|
|
346
|
+
Raises:
|
|
347
|
+
TimeoutError: If command times out.
|
|
348
|
+
RuntimeError: If CDP returns error or not connected.
|
|
349
|
+
"""
|
|
350
|
+
future = self.send(method, params)
|
|
351
|
+
|
|
352
|
+
try:
|
|
353
|
+
return future.result(timeout=timeout or self.timeout)
|
|
354
|
+
except TimeoutError:
|
|
355
|
+
# Clean up the pending future
|
|
356
|
+
with self._lock:
|
|
357
|
+
for msg_id, f in list(self._pending.items()):
|
|
358
|
+
if f is future:
|
|
359
|
+
self._pending.pop(msg_id, None)
|
|
360
|
+
break
|
|
361
|
+
raise TimeoutError(f"Command {method} timed out")
|
|
362
|
+
|
|
363
|
+
def _on_open(self, ws):
|
|
364
|
+
"""WebSocket connection established."""
|
|
365
|
+
logger.info("WebSocket connected")
|
|
366
|
+
self.connected.set()
|
|
367
|
+
|
|
368
|
+
def _on_message(self, ws, message):
|
|
369
|
+
"""Handle CDP messages - store events as-is, resolve command futures."""
|
|
370
|
+
try:
|
|
371
|
+
data = json.loads(message)
|
|
372
|
+
|
|
373
|
+
# Command response - resolve future
|
|
374
|
+
if "id" in data:
|
|
375
|
+
msg_id = data["id"]
|
|
376
|
+
with self._lock:
|
|
377
|
+
future = self._pending.pop(msg_id, None)
|
|
378
|
+
|
|
379
|
+
if future:
|
|
380
|
+
if "error" in data:
|
|
381
|
+
future.set_exception(RuntimeError(data["error"]))
|
|
382
|
+
else:
|
|
383
|
+
future.set_result(data.get("result", {}))
|
|
384
|
+
|
|
385
|
+
# CDP event - store AS-IS in DuckDB and update field lookup
|
|
386
|
+
elif "method" in data:
|
|
387
|
+
method = data.get("method", "")
|
|
388
|
+
self._db_execute(
|
|
389
|
+
"INSERT INTO events (event, method) VALUES (?, ?)",
|
|
390
|
+
[json.dumps(data), method],
|
|
391
|
+
wait_result=False,
|
|
392
|
+
)
|
|
393
|
+
self._event_count += 1
|
|
394
|
+
self._update_field_lookup(data)
|
|
395
|
+
|
|
396
|
+
# Prune old events periodically to prevent unbounded growth
|
|
397
|
+
if self._event_count % PRUNE_CHECK_INTERVAL == 0:
|
|
398
|
+
self._maybe_prune_events()
|
|
399
|
+
|
|
400
|
+
# Call registered event callbacks
|
|
401
|
+
self._dispatch_event_callbacks(data)
|
|
402
|
+
|
|
403
|
+
# Trigger SSE broadcast (debounced)
|
|
404
|
+
self._trigger_state_broadcast()
|
|
405
|
+
|
|
406
|
+
except Exception as e:
|
|
407
|
+
logger.error(f"Error handling message: {e}")
|
|
408
|
+
|
|
409
|
+
def _on_error(self, ws, error):
|
|
410
|
+
"""Handle WebSocket errors."""
|
|
411
|
+
logger.error(f"WebSocket error: {error}")
|
|
412
|
+
|
|
413
|
+
def _on_close(self, ws, code, reason):
|
|
414
|
+
"""Handle WebSocket closure and cleanup."""
|
|
415
|
+
logger.info(f"WebSocket closed: code={code} reason={reason}")
|
|
416
|
+
|
|
417
|
+
# Mark as disconnected
|
|
418
|
+
was_connected = self.connected.is_set()
|
|
419
|
+
self.connected.clear()
|
|
420
|
+
|
|
421
|
+
# Fail pending commands and check if this is unexpected disconnect
|
|
422
|
+
is_unexpected = False
|
|
423
|
+
with self._lock:
|
|
424
|
+
# Capture and clear ws_app FIRST to prevent new sends from adding futures
|
|
425
|
+
ws_app_was_set = self.ws_app is not None
|
|
426
|
+
self.ws_app = None
|
|
427
|
+
|
|
428
|
+
# Now safe to clear pending - no new futures can be added
|
|
429
|
+
for future in self._pending.values():
|
|
430
|
+
future.set_exception(RuntimeError(f"Connection closed: {reason or 'Unknown'}"))
|
|
431
|
+
self._pending.clear()
|
|
432
|
+
|
|
433
|
+
# Unexpected disconnect: was connected and ws_app was set (not manual disconnect)
|
|
434
|
+
is_unexpected = was_connected and ws_app_was_set
|
|
435
|
+
self.page_info = None
|
|
436
|
+
|
|
437
|
+
# Trigger service-level cleanup if this was unexpected
|
|
438
|
+
if is_unexpected and self._disconnect_callback:
|
|
439
|
+
try:
|
|
440
|
+
# Call in background to avoid blocking WebSocket thread
|
|
441
|
+
threading.Thread(
|
|
442
|
+
target=self._disconnect_callback, args=(code, reason), daemon=True, name="cdp-disconnect-handler"
|
|
443
|
+
).start()
|
|
444
|
+
except Exception as e:
|
|
445
|
+
logger.error(f"Error calling disconnect callback: {e}")
|
|
446
|
+
|
|
447
|
+
# Trigger SSE broadcast immediately
|
|
448
|
+
self._trigger_state_broadcast()
|
|
449
|
+
|
|
450
|
+
def _maybe_prune_events(self) -> None:
|
|
451
|
+
"""Prune oldest events if count exceeds MAX_EVENTS.
|
|
452
|
+
|
|
453
|
+
Uses FIFO deletion - removes oldest events first (by rowid).
|
|
454
|
+
Non-blocking: queues delete operation to DB thread.
|
|
455
|
+
"""
|
|
456
|
+
if self._event_count <= MAX_EVENTS:
|
|
457
|
+
return
|
|
458
|
+
|
|
459
|
+
excess = self._event_count - MAX_EVENTS
|
|
460
|
+
# Delete in batches, but at least the excess
|
|
461
|
+
delete_count = max(excess, PRUNE_BATCH_SIZE)
|
|
462
|
+
|
|
463
|
+
self._db_execute(
|
|
464
|
+
"DELETE FROM events WHERE rowid IN (SELECT rowid FROM events ORDER BY rowid LIMIT ?)",
|
|
465
|
+
[delete_count],
|
|
466
|
+
wait_result=False,
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
self._event_count -= delete_count
|
|
470
|
+
logger.debug(f"Pruned {delete_count} old events, ~{self._event_count} remaining")
|
|
471
|
+
|
|
472
|
+
def _extract_paths(self, obj, parent_key=""):
|
|
473
|
+
"""Extract all JSON paths from nested dict structure.
|
|
474
|
+
|
|
475
|
+
Args:
|
|
476
|
+
obj: Dictionary to extract paths from.
|
|
477
|
+
parent_key: Current path prefix.
|
|
478
|
+
"""
|
|
479
|
+
paths = []
|
|
480
|
+
if isinstance(obj, dict):
|
|
481
|
+
for k, v in obj.items():
|
|
482
|
+
new_key = f"{parent_key}.{k}" if parent_key else k
|
|
483
|
+
paths.append(new_key)
|
|
484
|
+
if isinstance(v, dict):
|
|
485
|
+
paths.extend(self._extract_paths(v, new_key))
|
|
486
|
+
return paths
|
|
487
|
+
|
|
488
|
+
def _update_field_lookup(self, data):
|
|
489
|
+
"""Update field_paths lookup with new event data.
|
|
490
|
+
|
|
491
|
+
Args:
|
|
492
|
+
data: CDP event dictionary.
|
|
493
|
+
"""
|
|
494
|
+
event_type = data.get("method", "unknown")
|
|
495
|
+
paths = self._extract_paths(data)
|
|
496
|
+
|
|
497
|
+
for path in paths:
|
|
498
|
+
# Store with event type prefix using colon separator
|
|
499
|
+
full_path = f"{event_type}:{path}"
|
|
500
|
+
|
|
501
|
+
# Index by each part of the path for flexible searching
|
|
502
|
+
parts = path.split(".")
|
|
503
|
+
for part in parts:
|
|
504
|
+
key = part.lower()
|
|
505
|
+
if key not in self.field_paths:
|
|
506
|
+
self.field_paths[key] = set()
|
|
507
|
+
self.field_paths[key].add(full_path) # Store with event type and original case
|
|
508
|
+
|
|
509
|
+
def discover_field_paths(self, search_key: str) -> list[str]:
|
|
510
|
+
"""Discover all JSON paths containing the search key.
|
|
511
|
+
|
|
512
|
+
Used by build_query for dynamic field discovery.
|
|
513
|
+
|
|
514
|
+
Args:
|
|
515
|
+
search_key: Field name to search for like "url" or "status".
|
|
516
|
+
|
|
517
|
+
Returns:
|
|
518
|
+
Sorted list of full paths with event type prefixes.
|
|
519
|
+
"""
|
|
520
|
+
search_key = search_key.lower()
|
|
521
|
+
paths = set()
|
|
522
|
+
|
|
523
|
+
# Find all field names that contain our search key
|
|
524
|
+
for field_name, field_paths in self.field_paths.items():
|
|
525
|
+
if search_key in field_name:
|
|
526
|
+
paths.update(field_paths)
|
|
527
|
+
|
|
528
|
+
return sorted(list(paths)) # Sort for consistent results
|
|
529
|
+
|
|
530
|
+
def clear_events(self) -> None:
|
|
531
|
+
"""Clear all stored events and reset field lookup."""
|
|
532
|
+
self._db_execute("DELETE FROM events", wait_result=False)
|
|
533
|
+
self.field_paths.clear()
|
|
534
|
+
self._event_count = 0
|
|
535
|
+
|
|
536
|
+
def query(self, sql: str, params: list | None = None) -> list:
|
|
537
|
+
"""Query stored CDP events using DuckDB SQL.
|
|
538
|
+
|
|
539
|
+
Events are stored in 'events' table with single JSON 'event' column.
|
|
540
|
+
Use json_extract_string() for accessing nested fields.
|
|
541
|
+
|
|
542
|
+
Args:
|
|
543
|
+
sql: DuckDB SQL query string.
|
|
544
|
+
params: Optional query parameters.
|
|
545
|
+
|
|
546
|
+
Returns:
|
|
547
|
+
List of result rows.
|
|
548
|
+
|
|
549
|
+
Examples:
|
|
550
|
+
query("SELECT * FROM events WHERE json_extract_string(event, '$.method') = 'Network.responseReceived'")
|
|
551
|
+
query("SELECT json_extract_string(event, '$.params.request.url') as url FROM events")
|
|
552
|
+
"""
|
|
553
|
+
return self._db_execute(sql, params)
|
|
554
|
+
|
|
555
|
+
def fetch_body(self, request_id: str) -> dict | None:
|
|
556
|
+
"""Fetch response body via Network.getResponseBody CDP call.
|
|
557
|
+
|
|
558
|
+
Args:
|
|
559
|
+
request_id: Network request ID from CDP events.
|
|
560
|
+
|
|
561
|
+
Returns:
|
|
562
|
+
Dict with 'body' and 'base64Encoded' keys, or None if failed.
|
|
563
|
+
"""
|
|
564
|
+
try:
|
|
565
|
+
return self.execute("Network.getResponseBody", {"requestId": request_id})
|
|
566
|
+
except Exception as e:
|
|
567
|
+
logger.debug(f"Failed to fetch body for {request_id}: {e}")
|
|
568
|
+
return None
|
|
569
|
+
|
|
570
|
+
@property
|
|
571
|
+
def is_connected(self) -> bool:
|
|
572
|
+
"""Check if WebSocket connection is active.
|
|
573
|
+
|
|
574
|
+
Returns:
|
|
575
|
+
True if connected to Chrome page.
|
|
576
|
+
"""
|
|
577
|
+
return self.connected.is_set()
|
|
578
|
+
|
|
579
|
+
def set_disconnect_callback(self, callback) -> None:
|
|
580
|
+
"""Register callback for unexpected disconnect events.
|
|
581
|
+
|
|
582
|
+
Called when WebSocket closes externally (tab close, crash, etc).
|
|
583
|
+
NOT called on manual disconnect() to avoid duplicate cleanup.
|
|
584
|
+
|
|
585
|
+
Args:
|
|
586
|
+
callback: Function called with (code: int, reason: str)
|
|
587
|
+
"""
|
|
588
|
+
self._disconnect_callback = callback
|
|
589
|
+
logger.debug("Disconnect callback registered")
|
|
590
|
+
|
|
591
|
+
def register_event_callback(self, method: str, callback) -> None:
|
|
592
|
+
"""Register callback for specific CDP event.
|
|
593
|
+
|
|
594
|
+
Args:
|
|
595
|
+
method: CDP event method (e.g. "Overlay.inspectNodeRequested")
|
|
596
|
+
callback: Async function called with event data dict
|
|
597
|
+
|
|
598
|
+
Example:
|
|
599
|
+
async def on_inspect(event):
|
|
600
|
+
node_id = event.get("params", {}).get("backendNodeId")
|
|
601
|
+
print(f"User clicked node: {node_id}")
|
|
602
|
+
|
|
603
|
+
cdp.register_event_callback("Overlay.inspectNodeRequested", on_inspect)
|
|
604
|
+
"""
|
|
605
|
+
if method not in self._event_callbacks:
|
|
606
|
+
self._event_callbacks[method] = []
|
|
607
|
+
self._event_callbacks[method].append(callback)
|
|
608
|
+
logger.debug(f"Registered callback for {method}")
|
|
609
|
+
|
|
610
|
+
def unregister_event_callback(self, method: str, callback) -> None:
|
|
611
|
+
"""Unregister event callback.
|
|
612
|
+
|
|
613
|
+
Args:
|
|
614
|
+
method: CDP event method
|
|
615
|
+
callback: Callback function to remove
|
|
616
|
+
"""
|
|
617
|
+
if method in self._event_callbacks:
|
|
618
|
+
try:
|
|
619
|
+
self._event_callbacks[method].remove(callback)
|
|
620
|
+
logger.debug(f"Unregistered callback for {method}")
|
|
621
|
+
except ValueError:
|
|
622
|
+
pass
|
|
623
|
+
|
|
624
|
+
def _dispatch_event_callbacks(self, event: dict) -> None:
|
|
625
|
+
"""Dispatch event to registered callbacks.
|
|
626
|
+
|
|
627
|
+
All callbacks must be synchronous and should return quickly.
|
|
628
|
+
Failed callbacks are logged but not retried - WebSocket reconnection
|
|
629
|
+
is handled by websocket-client library automatically.
|
|
630
|
+
|
|
631
|
+
Args:
|
|
632
|
+
event: CDP event dictionary with 'method' and 'params'
|
|
633
|
+
"""
|
|
634
|
+
method = event.get("method")
|
|
635
|
+
if not method or method not in self._event_callbacks:
|
|
636
|
+
return
|
|
637
|
+
|
|
638
|
+
# Call all registered callbacks (must be sync)
|
|
639
|
+
for callback in self._event_callbacks[method]:
|
|
640
|
+
try:
|
|
641
|
+
callback(event)
|
|
642
|
+
except TimeoutError:
|
|
643
|
+
logger.warning(f"{method} callback timed out - page may be busy, user can retry")
|
|
644
|
+
except Exception as e:
|
|
645
|
+
logger.error(f"Error in {method} callback: {e}")
|
|
646
|
+
|
|
647
|
+
def set_broadcast_callback(self, callback: "Any") -> None:
|
|
648
|
+
"""Set callback for broadcasting state changes.
|
|
649
|
+
|
|
650
|
+
Service owns coalescing - CDPSession just signals that state changed.
|
|
651
|
+
|
|
652
|
+
Args:
|
|
653
|
+
callback: Function to call when state changes (service._trigger_broadcast)
|
|
654
|
+
"""
|
|
655
|
+
self._broadcast_callback = callback
|
|
656
|
+
logger.debug("Broadcast callback set on CDPSession")
|
|
657
|
+
|
|
658
|
+
def _trigger_state_broadcast(self) -> None:
|
|
659
|
+
"""Signal that state changed (service handles coalescing).
|
|
660
|
+
|
|
661
|
+
Called after CDP events. Service decides whether to actually broadcast.
|
|
662
|
+
"""
|
|
663
|
+
if self._broadcast_callback:
|
|
664
|
+
try:
|
|
665
|
+
self._broadcast_callback()
|
|
666
|
+
except Exception as e:
|
|
667
|
+
logger.debug(f"Failed to trigger broadcast: {e}")
|