webtap-tool 0.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. webtap/VISION.md +246 -0
  2. webtap/__init__.py +84 -0
  3. webtap/__main__.py +6 -0
  4. webtap/api/__init__.py +9 -0
  5. webtap/api/app.py +26 -0
  6. webtap/api/models.py +69 -0
  7. webtap/api/server.py +111 -0
  8. webtap/api/sse.py +182 -0
  9. webtap/api/state.py +89 -0
  10. webtap/app.py +79 -0
  11. webtap/cdp/README.md +275 -0
  12. webtap/cdp/__init__.py +12 -0
  13. webtap/cdp/har.py +302 -0
  14. webtap/cdp/schema/README.md +41 -0
  15. webtap/cdp/schema/cdp_protocol.json +32785 -0
  16. webtap/cdp/schema/cdp_version.json +8 -0
  17. webtap/cdp/session.py +667 -0
  18. webtap/client.py +81 -0
  19. webtap/commands/DEVELOPER_GUIDE.md +401 -0
  20. webtap/commands/TIPS.md +269 -0
  21. webtap/commands/__init__.py +29 -0
  22. webtap/commands/_builders.py +331 -0
  23. webtap/commands/_code_generation.py +110 -0
  24. webtap/commands/_tips.py +147 -0
  25. webtap/commands/_utils.py +273 -0
  26. webtap/commands/connection.py +220 -0
  27. webtap/commands/console.py +87 -0
  28. webtap/commands/fetch.py +310 -0
  29. webtap/commands/filters.py +116 -0
  30. webtap/commands/javascript.py +73 -0
  31. webtap/commands/js_export.py +73 -0
  32. webtap/commands/launch.py +72 -0
  33. webtap/commands/navigation.py +197 -0
  34. webtap/commands/network.py +136 -0
  35. webtap/commands/quicktype.py +306 -0
  36. webtap/commands/request.py +93 -0
  37. webtap/commands/selections.py +138 -0
  38. webtap/commands/setup.py +219 -0
  39. webtap/commands/to_model.py +163 -0
  40. webtap/daemon.py +185 -0
  41. webtap/daemon_state.py +53 -0
  42. webtap/filters.py +219 -0
  43. webtap/rpc/__init__.py +14 -0
  44. webtap/rpc/errors.py +49 -0
  45. webtap/rpc/framework.py +223 -0
  46. webtap/rpc/handlers.py +625 -0
  47. webtap/rpc/machine.py +84 -0
  48. webtap/services/README.md +83 -0
  49. webtap/services/__init__.py +15 -0
  50. webtap/services/console.py +124 -0
  51. webtap/services/dom.py +547 -0
  52. webtap/services/fetch.py +415 -0
  53. webtap/services/main.py +392 -0
  54. webtap/services/network.py +401 -0
  55. webtap/services/setup/__init__.py +185 -0
  56. webtap/services/setup/chrome.py +233 -0
  57. webtap/services/setup/desktop.py +255 -0
  58. webtap/services/setup/extension.py +147 -0
  59. webtap/services/setup/platform.py +162 -0
  60. webtap/services/state_snapshot.py +86 -0
  61. webtap_tool-0.11.0.dist-info/METADATA +535 -0
  62. webtap_tool-0.11.0.dist-info/RECORD +64 -0
  63. webtap_tool-0.11.0.dist-info/WHEEL +4 -0
  64. webtap_tool-0.11.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,8 @@
1
+ {
2
+ "Browser": "Chrome/139.0.7258.138",
3
+ "Protocol-Version": "1.3",
4
+ "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36",
5
+ "V8-Version": "13.9.205.20",
6
+ "WebKit-Version": "537.36 (@884e54ea8d42947ed636779015c5b4815e069838)",
7
+ "webSocketDebuggerUrl": "ws://localhost:9222/devtools/browser/e2c22d46-fafc-483e-a512-caccea649b20"
8
+ }
webtap/cdp/session.py ADDED
@@ -0,0 +1,667 @@
1
+ """CDP Session with native event storage."""
2
+
3
+ import json
4
+ import logging
5
+ import queue
6
+ import threading
7
+ from concurrent.futures import Future, TimeoutError
8
+ from typing import Any
9
+
10
+ import duckdb
11
+ import requests
12
+ import websocket
13
+
14
+ from webtap.cdp.har import create_har_views
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ # Event storage limits
19
+ MAX_EVENTS = 50_000 # FIFO eviction threshold
20
+ PRUNE_BATCH_SIZE = 5_000 # Delete in batches for efficiency
21
+ PRUNE_CHECK_INTERVAL = 1_000 # Check count every N events
22
+
23
+
24
+ class CDPSession:
25
+ """WebSocket-based CDP client with native event storage.
26
+
27
+ Stores CDP events as-is in DuckDB for minimal overhead and maximum flexibility.
28
+ Provides field discovery and query capabilities for dynamic data exploration.
29
+
30
+ Attributes:
31
+ port: Chrome debugging port.
32
+ timeout: Default timeout for execute() calls.
33
+ db: DuckDB connection for event storage.
34
+ field_paths: Live field lookup for query building.
35
+ """
36
+
37
+ def __init__(self, port: int = 9222, timeout: float = 30):
38
+ """Initialize CDP session with WebSocket and DuckDB storage.
39
+
40
+ Args:
41
+ port: Chrome debugging port. Defaults to 9222.
42
+ timeout: Default timeout for execute() calls. Defaults to 30.
43
+ """
44
+ self.port = port
45
+ self.timeout = timeout
46
+
47
+ # WebSocketApp instance
48
+ self.ws_app: websocket.WebSocketApp | None = None
49
+ self.ws_thread: threading.Thread | None = None
50
+
51
+ # Connection state
52
+ self.connected = threading.Event()
53
+ self.page_info: dict | None = None
54
+
55
+ # CDP request/response tracking
56
+ self._next_id = 1
57
+ self._pending: dict[int, Future] = {}
58
+ self._lock = threading.Lock()
59
+
60
+ # DuckDB storage - store events AS-IS
61
+ # DuckDB connections are NOT thread-safe - use dedicated DB thread
62
+ self.db = duckdb.connect(":memory:")
63
+ self._db_work_queue: queue.Queue = queue.Queue()
64
+ self._db_result_queues: dict[int, queue.Queue] = {}
65
+ self._db_running = True
66
+
67
+ # Start dedicated database thread
68
+ self._db_thread = threading.Thread(target=self._db_worker, daemon=True)
69
+ self._db_thread.start()
70
+
71
+ # Initialize schema with method column for fast filtering
72
+ # Must wait for table to exist before any queries can run
73
+ self._db_execute(
74
+ "CREATE TABLE IF NOT EXISTS events (event JSON, method VARCHAR)",
75
+ wait_result=True,
76
+ )
77
+ self._db_execute(
78
+ "CREATE INDEX IF NOT EXISTS idx_events_method ON events(method)",
79
+ wait_result=True,
80
+ )
81
+
82
+ # Create HAR views for aggregated network request data
83
+ create_har_views(self._db_execute)
84
+
85
+ # Event count for pruning (approximate, updated periodically)
86
+ self._event_count = 0
87
+
88
+ # Live field path lookup for fast discovery
89
+ # Maps lowercase field names to their full paths with original case
90
+ self.field_paths: dict[str, set[str]] = {}
91
+
92
+ # Event callbacks for real-time handling
93
+ # Maps event method (e.g. "Overlay.inspectNodeRequested") to list of callbacks
94
+ self._event_callbacks: dict[str, list] = {}
95
+
96
+ # Broadcast callback for SSE state updates (set by service)
97
+ self._broadcast_callback: "Any | None" = None
98
+
99
+ # Disconnect callback for service-level cleanup
100
+ self._disconnect_callback: "Any | None" = None
101
+
102
+ def _db_worker(self) -> None:
103
+ """Dedicated thread for all database operations.
104
+
105
+ Ensures thread safety by serializing all DuckDB access through one thread.
106
+ DuckDB connections are not thread-safe - sharing them causes malloc corruption.
107
+ """
108
+ while self._db_running:
109
+ try:
110
+ task = self._db_work_queue.get(timeout=1)
111
+
112
+ if task is None: # Shutdown signal
113
+ break
114
+
115
+ operation_type, sql, params, result_queue_id = task
116
+
117
+ try:
118
+ if operation_type == "execute":
119
+ result = self.db.execute(sql, params or [])
120
+ data = result.fetchall() if result else []
121
+ elif operation_type == "delete":
122
+ self.db.execute(sql, params or [])
123
+ data = None
124
+ else:
125
+ data = None
126
+
127
+ # Send result back if requested
128
+ if result_queue_id and result_queue_id in self._db_result_queues:
129
+ self._db_result_queues[result_queue_id].put(("success", data))
130
+
131
+ except Exception as e:
132
+ logger.error(f"Database error: {e}")
133
+ if result_queue_id and result_queue_id in self._db_result_queues:
134
+ self._db_result_queues[result_queue_id].put(("error", str(e)))
135
+
136
+ finally:
137
+ self._db_work_queue.task_done()
138
+
139
+ except queue.Empty:
140
+ continue
141
+
142
+ def _db_execute(self, sql: str, params: list | None = None, wait_result: bool = True) -> Any:
143
+ """Submit database operation to dedicated thread.
144
+
145
+ Args:
146
+ sql: SQL query or command
147
+ params: Optional query parameters
148
+ wait_result: Block until operation completes and return result
149
+
150
+ Returns:
151
+ Query results if wait_result=True, None otherwise
152
+
153
+ Raises:
154
+ TimeoutError: If operation takes longer than 30 seconds
155
+ RuntimeError: If database operation fails
156
+ """
157
+ result_queue_id = None
158
+ result_queue = None
159
+
160
+ try:
161
+ if wait_result:
162
+ result_queue_id = id(threading.current_thread())
163
+ result_queue = queue.Queue()
164
+ self._db_result_queues[result_queue_id] = result_queue
165
+
166
+ # Submit to work queue
167
+ self._db_work_queue.put(("execute", sql, params, result_queue_id))
168
+
169
+ if wait_result and result_queue and result_queue_id:
170
+ try:
171
+ status, data = result_queue.get(timeout=30)
172
+ except queue.Empty:
173
+ raise TimeoutError(f"Database operation timed out: {sql[:50]}...")
174
+
175
+ if status == "error":
176
+ raise RuntimeError(f"Database error: {data}")
177
+ return data
178
+
179
+ return None
180
+ finally:
181
+ # Always clean up result queue entry to prevent leaks
182
+ if result_queue_id and result_queue_id in self._db_result_queues:
183
+ del self._db_result_queues[result_queue_id]
184
+
185
+ def list_pages(self) -> list[dict]:
186
+ """List available Chrome pages via HTTP API.
187
+
188
+ Returns:
189
+ List of page dictionaries with webSocketDebuggerUrl.
190
+ """
191
+ try:
192
+ resp = requests.get(f"http://localhost:{self.port}/json", timeout=2)
193
+ resp.raise_for_status()
194
+ pages = resp.json()
195
+ return [p for p in pages if p.get("type") == "page" and "webSocketDebuggerUrl" in p]
196
+ except Exception as e:
197
+ logger.error(f"Failed to list pages: {e}")
198
+ return []
199
+
200
+ def connect(self, page_index: int | None = None, page_id: str | None = None) -> None:
201
+ """Connect to Chrome page via WebSocket.
202
+
203
+ Establishes WebSocket connection and starts event collection.
204
+ Does not auto-enable CDP domains - use execute() for that.
205
+
206
+ Args:
207
+ page_index: Index of page to connect to. Defaults to 0.
208
+ page_id: Stable page ID across tab reordering.
209
+
210
+ Raises:
211
+ RuntimeError: If already connected or no pages available.
212
+ ValueError: If page_id not found.
213
+ IndexError: If page_index out of range.
214
+ TimeoutError: If connection fails within 5 seconds.
215
+ """
216
+ if self.ws_app:
217
+ raise RuntimeError("Already connected")
218
+
219
+ pages = self.list_pages()
220
+ if not pages:
221
+ raise RuntimeError("No pages available")
222
+
223
+ # Find the page by ID or index
224
+ if page_id:
225
+ page = next((p for p in pages if p.get("id") == page_id), None)
226
+ if not page:
227
+ raise ValueError(f"Page with ID {page_id} not found")
228
+ elif page_index is not None:
229
+ if page_index >= len(pages):
230
+ raise IndexError(f"Page {page_index} out of range")
231
+ page = pages[page_index]
232
+ else:
233
+ # Default to first page
234
+ page = pages[0]
235
+
236
+ ws_url = page["webSocketDebuggerUrl"]
237
+ self.page_info = page
238
+
239
+ # Create WebSocketApp with callbacks
240
+ self.ws_app = websocket.WebSocketApp(
241
+ ws_url, on_open=self._on_open, on_message=self._on_message, on_error=self._on_error, on_close=self._on_close
242
+ )
243
+
244
+ # Let WebSocketApp handle everything in a thread
245
+ self.ws_thread = threading.Thread(
246
+ target=self.ws_app.run_forever,
247
+ kwargs={
248
+ "ping_interval": 30, # Ping every 30s
249
+ "ping_timeout": 20, # Wait 20s for pong (increased from 10s for heavy CDP load)
250
+ # No auto-reconnect - make disconnects explicit
251
+ "skip_utf8_validation": True, # Faster
252
+ },
253
+ )
254
+ self.ws_thread.daemon = True
255
+ self.ws_thread.start()
256
+
257
+ # Wait for connection
258
+ if not self.connected.wait(timeout=5):
259
+ self.disconnect()
260
+ raise TimeoutError("Failed to connect to Chrome")
261
+
262
+ def disconnect(self) -> None:
263
+ """Disconnect WebSocket while preserving events and DB thread.
264
+
265
+ Events and DB thread persist across connection cycles.
266
+ Use cleanup() on app exit to shutdown DB thread.
267
+ """
268
+ # Atomically clear ws_app to signal manual disconnect
269
+ # This prevents _on_close from triggering service callback
270
+ with self._lock:
271
+ ws_app = self.ws_app
272
+ self.ws_app = None
273
+
274
+ if ws_app:
275
+ ws_app.close()
276
+
277
+ if self.ws_thread and self.ws_thread.is_alive():
278
+ self.ws_thread.join(timeout=2)
279
+ self.ws_thread = None
280
+
281
+ # Keep DB thread running - events preserved for reconnection
282
+ # DB cleanup happens in cleanup() on app exit only
283
+
284
+ self.connected.clear()
285
+ self.page_info = None
286
+
287
+ def cleanup(self) -> None:
288
+ """Shutdown DB thread and disconnect (call on app exit only).
289
+
290
+ This is the only place where DB thread should be stopped.
291
+ Events are lost when DB thread stops (in-memory database).
292
+ """
293
+ # Disconnect WebSocket if connected
294
+ if self.ws_app:
295
+ self.disconnect()
296
+
297
+ # Shutdown database thread
298
+ self._db_running = False
299
+ self._db_work_queue.put(None) # Signal shutdown
300
+ if self._db_thread.is_alive():
301
+ self._db_thread.join(timeout=2)
302
+
303
+ def send(self, method: str, params: dict | None = None) -> Future:
304
+ """Send CDP command asynchronously.
305
+
306
+ Args:
307
+ method: CDP method like "Page.navigate" or "Network.enable".
308
+ params: Optional command parameters.
309
+
310
+ Returns:
311
+ Future containing CDP response 'result' field.
312
+
313
+ Raises:
314
+ RuntimeError: If not connected to Chrome.
315
+ """
316
+ if not self.ws_app:
317
+ raise RuntimeError("Not connected")
318
+
319
+ with self._lock:
320
+ msg_id = self._next_id
321
+ self._next_id += 1
322
+
323
+ future = Future()
324
+ self._pending[msg_id] = future
325
+
326
+ # Send CDP command
327
+ message = {"id": msg_id, "method": method}
328
+ if params:
329
+ message["params"] = params
330
+
331
+ self.ws_app.send(json.dumps(message))
332
+
333
+ return future
334
+
335
+ def execute(self, method: str, params: dict | None = None, timeout: float | None = None) -> Any:
336
+ """Send CDP command synchronously.
337
+
338
+ Args:
339
+ method: CDP method like "Page.navigate" or "Network.enable".
340
+ params: Optional command parameters.
341
+ timeout: Override default timeout.
342
+
343
+ Returns:
344
+ CDP response 'result' field.
345
+
346
+ Raises:
347
+ TimeoutError: If command times out.
348
+ RuntimeError: If CDP returns error or not connected.
349
+ """
350
+ future = self.send(method, params)
351
+
352
+ try:
353
+ return future.result(timeout=timeout or self.timeout)
354
+ except TimeoutError:
355
+ # Clean up the pending future
356
+ with self._lock:
357
+ for msg_id, f in list(self._pending.items()):
358
+ if f is future:
359
+ self._pending.pop(msg_id, None)
360
+ break
361
+ raise TimeoutError(f"Command {method} timed out")
362
+
363
+ def _on_open(self, ws):
364
+ """WebSocket connection established."""
365
+ logger.info("WebSocket connected")
366
+ self.connected.set()
367
+
368
+ def _on_message(self, ws, message):
369
+ """Handle CDP messages - store events as-is, resolve command futures."""
370
+ try:
371
+ data = json.loads(message)
372
+
373
+ # Command response - resolve future
374
+ if "id" in data:
375
+ msg_id = data["id"]
376
+ with self._lock:
377
+ future = self._pending.pop(msg_id, None)
378
+
379
+ if future:
380
+ if "error" in data:
381
+ future.set_exception(RuntimeError(data["error"]))
382
+ else:
383
+ future.set_result(data.get("result", {}))
384
+
385
+ # CDP event - store AS-IS in DuckDB and update field lookup
386
+ elif "method" in data:
387
+ method = data.get("method", "")
388
+ self._db_execute(
389
+ "INSERT INTO events (event, method) VALUES (?, ?)",
390
+ [json.dumps(data), method],
391
+ wait_result=False,
392
+ )
393
+ self._event_count += 1
394
+ self._update_field_lookup(data)
395
+
396
+ # Prune old events periodically to prevent unbounded growth
397
+ if self._event_count % PRUNE_CHECK_INTERVAL == 0:
398
+ self._maybe_prune_events()
399
+
400
+ # Call registered event callbacks
401
+ self._dispatch_event_callbacks(data)
402
+
403
+ # Trigger SSE broadcast (debounced)
404
+ self._trigger_state_broadcast()
405
+
406
+ except Exception as e:
407
+ logger.error(f"Error handling message: {e}")
408
+
409
+ def _on_error(self, ws, error):
410
+ """Handle WebSocket errors."""
411
+ logger.error(f"WebSocket error: {error}")
412
+
413
+ def _on_close(self, ws, code, reason):
414
+ """Handle WebSocket closure and cleanup."""
415
+ logger.info(f"WebSocket closed: code={code} reason={reason}")
416
+
417
+ # Mark as disconnected
418
+ was_connected = self.connected.is_set()
419
+ self.connected.clear()
420
+
421
+ # Fail pending commands and check if this is unexpected disconnect
422
+ is_unexpected = False
423
+ with self._lock:
424
+ # Capture and clear ws_app FIRST to prevent new sends from adding futures
425
+ ws_app_was_set = self.ws_app is not None
426
+ self.ws_app = None
427
+
428
+ # Now safe to clear pending - no new futures can be added
429
+ for future in self._pending.values():
430
+ future.set_exception(RuntimeError(f"Connection closed: {reason or 'Unknown'}"))
431
+ self._pending.clear()
432
+
433
+ # Unexpected disconnect: was connected and ws_app was set (not manual disconnect)
434
+ is_unexpected = was_connected and ws_app_was_set
435
+ self.page_info = None
436
+
437
+ # Trigger service-level cleanup if this was unexpected
438
+ if is_unexpected and self._disconnect_callback:
439
+ try:
440
+ # Call in background to avoid blocking WebSocket thread
441
+ threading.Thread(
442
+ target=self._disconnect_callback, args=(code, reason), daemon=True, name="cdp-disconnect-handler"
443
+ ).start()
444
+ except Exception as e:
445
+ logger.error(f"Error calling disconnect callback: {e}")
446
+
447
+ # Trigger SSE broadcast immediately
448
+ self._trigger_state_broadcast()
449
+
450
+ def _maybe_prune_events(self) -> None:
451
+ """Prune oldest events if count exceeds MAX_EVENTS.
452
+
453
+ Uses FIFO deletion - removes oldest events first (by rowid).
454
+ Non-blocking: queues delete operation to DB thread.
455
+ """
456
+ if self._event_count <= MAX_EVENTS:
457
+ return
458
+
459
+ excess = self._event_count - MAX_EVENTS
460
+ # Delete in batches, but at least the excess
461
+ delete_count = max(excess, PRUNE_BATCH_SIZE)
462
+
463
+ self._db_execute(
464
+ "DELETE FROM events WHERE rowid IN (SELECT rowid FROM events ORDER BY rowid LIMIT ?)",
465
+ [delete_count],
466
+ wait_result=False,
467
+ )
468
+
469
+ self._event_count -= delete_count
470
+ logger.debug(f"Pruned {delete_count} old events, ~{self._event_count} remaining")
471
+
472
+ def _extract_paths(self, obj, parent_key=""):
473
+ """Extract all JSON paths from nested dict structure.
474
+
475
+ Args:
476
+ obj: Dictionary to extract paths from.
477
+ parent_key: Current path prefix.
478
+ """
479
+ paths = []
480
+ if isinstance(obj, dict):
481
+ for k, v in obj.items():
482
+ new_key = f"{parent_key}.{k}" if parent_key else k
483
+ paths.append(new_key)
484
+ if isinstance(v, dict):
485
+ paths.extend(self._extract_paths(v, new_key))
486
+ return paths
487
+
488
+ def _update_field_lookup(self, data):
489
+ """Update field_paths lookup with new event data.
490
+
491
+ Args:
492
+ data: CDP event dictionary.
493
+ """
494
+ event_type = data.get("method", "unknown")
495
+ paths = self._extract_paths(data)
496
+
497
+ for path in paths:
498
+ # Store with event type prefix using colon separator
499
+ full_path = f"{event_type}:{path}"
500
+
501
+ # Index by each part of the path for flexible searching
502
+ parts = path.split(".")
503
+ for part in parts:
504
+ key = part.lower()
505
+ if key not in self.field_paths:
506
+ self.field_paths[key] = set()
507
+ self.field_paths[key].add(full_path) # Store with event type and original case
508
+
509
+ def discover_field_paths(self, search_key: str) -> list[str]:
510
+ """Discover all JSON paths containing the search key.
511
+
512
+ Used by build_query for dynamic field discovery.
513
+
514
+ Args:
515
+ search_key: Field name to search for like "url" or "status".
516
+
517
+ Returns:
518
+ Sorted list of full paths with event type prefixes.
519
+ """
520
+ search_key = search_key.lower()
521
+ paths = set()
522
+
523
+ # Find all field names that contain our search key
524
+ for field_name, field_paths in self.field_paths.items():
525
+ if search_key in field_name:
526
+ paths.update(field_paths)
527
+
528
+ return sorted(list(paths)) # Sort for consistent results
529
+
530
+ def clear_events(self) -> None:
531
+ """Clear all stored events and reset field lookup."""
532
+ self._db_execute("DELETE FROM events", wait_result=False)
533
+ self.field_paths.clear()
534
+ self._event_count = 0
535
+
536
+ def query(self, sql: str, params: list | None = None) -> list:
537
+ """Query stored CDP events using DuckDB SQL.
538
+
539
+ Events are stored in 'events' table with single JSON 'event' column.
540
+ Use json_extract_string() for accessing nested fields.
541
+
542
+ Args:
543
+ sql: DuckDB SQL query string.
544
+ params: Optional query parameters.
545
+
546
+ Returns:
547
+ List of result rows.
548
+
549
+ Examples:
550
+ query("SELECT * FROM events WHERE json_extract_string(event, '$.method') = 'Network.responseReceived'")
551
+ query("SELECT json_extract_string(event, '$.params.request.url') as url FROM events")
552
+ """
553
+ return self._db_execute(sql, params)
554
+
555
+ def fetch_body(self, request_id: str) -> dict | None:
556
+ """Fetch response body via Network.getResponseBody CDP call.
557
+
558
+ Args:
559
+ request_id: Network request ID from CDP events.
560
+
561
+ Returns:
562
+ Dict with 'body' and 'base64Encoded' keys, or None if failed.
563
+ """
564
+ try:
565
+ return self.execute("Network.getResponseBody", {"requestId": request_id})
566
+ except Exception as e:
567
+ logger.debug(f"Failed to fetch body for {request_id}: {e}")
568
+ return None
569
+
570
+ @property
571
+ def is_connected(self) -> bool:
572
+ """Check if WebSocket connection is active.
573
+
574
+ Returns:
575
+ True if connected to Chrome page.
576
+ """
577
+ return self.connected.is_set()
578
+
579
+ def set_disconnect_callback(self, callback) -> None:
580
+ """Register callback for unexpected disconnect events.
581
+
582
+ Called when WebSocket closes externally (tab close, crash, etc).
583
+ NOT called on manual disconnect() to avoid duplicate cleanup.
584
+
585
+ Args:
586
+ callback: Function called with (code: int, reason: str)
587
+ """
588
+ self._disconnect_callback = callback
589
+ logger.debug("Disconnect callback registered")
590
+
591
+ def register_event_callback(self, method: str, callback) -> None:
592
+ """Register callback for specific CDP event.
593
+
594
+ Args:
595
+ method: CDP event method (e.g. "Overlay.inspectNodeRequested")
596
+ callback: Async function called with event data dict
597
+
598
+ Example:
599
+ async def on_inspect(event):
600
+ node_id = event.get("params", {}).get("backendNodeId")
601
+ print(f"User clicked node: {node_id}")
602
+
603
+ cdp.register_event_callback("Overlay.inspectNodeRequested", on_inspect)
604
+ """
605
+ if method not in self._event_callbacks:
606
+ self._event_callbacks[method] = []
607
+ self._event_callbacks[method].append(callback)
608
+ logger.debug(f"Registered callback for {method}")
609
+
610
+ def unregister_event_callback(self, method: str, callback) -> None:
611
+ """Unregister event callback.
612
+
613
+ Args:
614
+ method: CDP event method
615
+ callback: Callback function to remove
616
+ """
617
+ if method in self._event_callbacks:
618
+ try:
619
+ self._event_callbacks[method].remove(callback)
620
+ logger.debug(f"Unregistered callback for {method}")
621
+ except ValueError:
622
+ pass
623
+
624
+ def _dispatch_event_callbacks(self, event: dict) -> None:
625
+ """Dispatch event to registered callbacks.
626
+
627
+ All callbacks must be synchronous and should return quickly.
628
+ Failed callbacks are logged but not retried - WebSocket reconnection
629
+ is handled by websocket-client library automatically.
630
+
631
+ Args:
632
+ event: CDP event dictionary with 'method' and 'params'
633
+ """
634
+ method = event.get("method")
635
+ if not method or method not in self._event_callbacks:
636
+ return
637
+
638
+ # Call all registered callbacks (must be sync)
639
+ for callback in self._event_callbacks[method]:
640
+ try:
641
+ callback(event)
642
+ except TimeoutError:
643
+ logger.warning(f"{method} callback timed out - page may be busy, user can retry")
644
+ except Exception as e:
645
+ logger.error(f"Error in {method} callback: {e}")
646
+
647
+ def set_broadcast_callback(self, callback: "Any") -> None:
648
+ """Set callback for broadcasting state changes.
649
+
650
+ Service owns coalescing - CDPSession just signals that state changed.
651
+
652
+ Args:
653
+ callback: Function to call when state changes (service._trigger_broadcast)
654
+ """
655
+ self._broadcast_callback = callback
656
+ logger.debug("Broadcast callback set on CDPSession")
657
+
658
+ def _trigger_state_broadcast(self) -> None:
659
+ """Signal that state changed (service handles coalescing).
660
+
661
+ Called after CDP events. Service decides whether to actually broadcast.
662
+ """
663
+ if self._broadcast_callback:
664
+ try:
665
+ self._broadcast_callback()
666
+ except Exception as e:
667
+ logger.debug(f"Failed to trigger broadcast: {e}")