webtap-tool 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webtap-tool might be problematic. Click here for more details.

webtap/api.py CHANGED
@@ -4,6 +4,7 @@ PUBLIC API:
4
4
  - start_api_server: Start API server in background thread
5
5
  """
6
6
 
7
+ import asyncio
7
8
  import logging
8
9
  import os
9
10
  import socket
@@ -12,8 +13,10 @@ from typing import Any, Dict
12
13
 
13
14
  from fastapi import FastAPI
14
15
  from fastapi.middleware.cors import CORSMiddleware
16
+ from fastapi.responses import StreamingResponse
15
17
  from pydantic import BaseModel
16
18
  import uvicorn
19
+ import json as json_module
17
20
 
18
21
 
19
22
  logger = logging.getLogger(__name__)
@@ -47,7 +50,14 @@ api.add_middleware(
47
50
 
48
51
 
49
52
  # Global reference to WebTap state (set by start_api_server)
50
- app_state = None
53
+ app_state: "Any | None" = None
54
+
55
+ # SSE clients - set of queues for broadcasting state
56
+ _sse_clients: set[asyncio.Queue] = set()
57
+ _sse_clients_lock = asyncio.Lock()
58
+
59
+ # Broadcast queue for cross-thread communication
60
+ _broadcast_queue: "asyncio.Queue[Dict[str, Any]] | None" = None
51
61
 
52
62
 
53
63
  @api.get("/health")
@@ -62,8 +72,8 @@ async def get_info() -> Dict[str, Any]:
62
72
  if not app_state:
63
73
  return {"error": "WebTap not initialized", "pages": [], "pid": os.getpid()}
64
74
 
65
- # Get pages
66
- pages_data = app_state.service.list_pages()
75
+ # Get pages - wrap blocking HTTP call in thread
76
+ pages_data = await asyncio.to_thread(app_state.service.list_pages)
67
77
 
68
78
  # Get instance info
69
79
  connected_to = None
@@ -106,7 +116,13 @@ async def connect(request: ConnectRequest) -> Dict[str, Any]:
106
116
  if not app_state:
107
117
  return {"error": "WebTap not initialized"}
108
118
 
109
- return app_state.service.connect_to_page(page_id=request.page_id)
119
+ # Wrap blocking CDP calls (connect + enable domains) in thread
120
+ result = await asyncio.to_thread(app_state.service.connect_to_page, page_id=request.page_id)
121
+
122
+ # Broadcast state change
123
+ await broadcast_state()
124
+
125
+ return result
110
126
 
111
127
 
112
128
  @api.post("/disconnect")
@@ -115,7 +131,13 @@ async def disconnect() -> Dict[str, Any]:
115
131
  if not app_state:
116
132
  return {"error": "WebTap not initialized"}
117
133
 
118
- return app_state.service.disconnect()
134
+ # Wrap blocking CDP calls (fetch.disable + disconnect) in thread
135
+ result = await asyncio.to_thread(app_state.service.disconnect)
136
+
137
+ # Broadcast state change
138
+ await broadcast_state()
139
+
140
+ return result
119
141
 
120
142
 
121
143
  @api.post("/clear")
@@ -133,10 +155,17 @@ async def set_fetch_interception(request: FetchRequest) -> Dict[str, Any]:
133
155
  if not app_state:
134
156
  return {"error": "WebTap not initialized"}
135
157
 
158
+ # Wrap blocking CDP calls (Fetch.enable/disable) in thread
136
159
  if request.enabled:
137
- result = app_state.service.fetch.enable(app_state.service.cdp, response_stage=request.response_stage)
160
+ result = await asyncio.to_thread(
161
+ app_state.service.fetch.enable, app_state.service.cdp, response_stage=request.response_stage
162
+ )
138
163
  else:
139
- result = app_state.service.fetch.disable()
164
+ result = await asyncio.to_thread(app_state.service.fetch.disable)
165
+
166
+ # Broadcast state change
167
+ await broadcast_state()
168
+
140
169
  return result
141
170
 
142
171
 
@@ -170,6 +199,9 @@ async def toggle_filter_category(category: str) -> Dict[str, Any]:
170
199
 
171
200
  fm.save()
172
201
 
202
+ # Broadcast state change
203
+ await broadcast_state()
204
+
173
205
  return {"category": category, "enabled": enabled, "total_enabled": len(fm.enabled_categories)}
174
206
 
175
207
 
@@ -183,6 +215,9 @@ async def enable_all_filters() -> Dict[str, Any]:
183
215
  fm.set_enabled_categories(None)
184
216
  fm.save()
185
217
 
218
+ # Broadcast state change
219
+ await broadcast_state()
220
+
186
221
  return {"enabled": list(fm.enabled_categories), "total": len(fm.enabled_categories)}
187
222
 
188
223
 
@@ -196,12 +231,259 @@ async def disable_all_filters() -> Dict[str, Any]:
196
231
  fm.set_enabled_categories([])
197
232
  fm.save()
198
233
 
234
+ # Broadcast state change
235
+ await broadcast_state()
236
+
199
237
  return {"enabled": [], "total": 0}
200
238
 
201
239
 
240
+ @api.post("/browser/start-inspect")
241
+ async def start_inspect() -> Dict[str, Any]:
242
+ """Enable CDP element inspection mode."""
243
+ if not app_state:
244
+ return {"error": "WebTap not initialized"}
245
+
246
+ if not app_state.cdp.is_connected:
247
+ return {"error": "Not connected to a page"}
248
+
249
+ # Wrap blocking CDP calls (DOM.enable, CSS.enable, Overlay.enable, setInspectMode) in thread
250
+ result = await asyncio.to_thread(app_state.service.dom.start_inspect)
251
+
252
+ # Broadcast state change
253
+ await broadcast_state()
254
+
255
+ return result
256
+
257
+
258
+ @api.post("/browser/stop-inspect")
259
+ async def stop_inspect() -> Dict[str, Any]:
260
+ """Disable CDP element inspection mode."""
261
+ if not app_state:
262
+ return {"error": "WebTap not initialized"}
263
+
264
+ # Wrap blocking CDP call (Overlay.setInspectMode) in thread
265
+ result = await asyncio.to_thread(app_state.service.dom.stop_inspect)
266
+
267
+ # Broadcast state change
268
+ await broadcast_state()
269
+
270
+ return result
271
+
272
+
273
+ @api.post("/browser/clear")
274
+ async def clear_selections() -> Dict[str, Any]:
275
+ """Clear all element selections."""
276
+ if not app_state:
277
+ return {"error": "WebTap not initialized"}
278
+
279
+ app_state.service.dom.clear_selections()
280
+
281
+ # Broadcast state change
282
+ await broadcast_state()
283
+
284
+ return {"success": True, "selections": {}}
285
+
286
+
287
+ @api.post("/errors/dismiss")
288
+ async def dismiss_error() -> Dict[str, Any]:
289
+ """Dismiss the current error."""
290
+ if not app_state:
291
+ return {"error": "WebTap not initialized"}
292
+
293
+ app_state.error_state = None
294
+
295
+ # Broadcast state change
296
+ await broadcast_state()
297
+
298
+ return {"success": True}
299
+
300
+
301
+ # Removed /browser/prompt endpoint - selections now accessed via @webtap:webtap://selections resource
302
+ # Selections are captured via CDP in DOMService, no submit flow needed
303
+
304
+
305
+ @api.get("/events")
306
+ async def stream_events():
307
+ """Server-Sent Events stream for real-time WebTap state updates.
308
+
309
+ Streams full state object on every change. Extension receives:
310
+ - Connection status
311
+ - Event counts
312
+ - Fetch interception status
313
+ - Filter status
314
+ - Element selection state (inspect_active, selections)
315
+
316
+ Returns:
317
+ StreamingResponse with text/event-stream content type
318
+ """
319
+
320
+ async def event_generator():
321
+ """Generate SSE events with full state."""
322
+ queue: asyncio.Queue[Dict[str, Any]] = asyncio.Queue(maxsize=100)
323
+
324
+ async with _sse_clients_lock:
325
+ _sse_clients.add(queue)
326
+
327
+ try:
328
+ # Send initial state on connect
329
+ initial_state = get_full_state()
330
+ yield f"data: {json_module.dumps(initial_state)}\n\n"
331
+
332
+ # Stream state updates with keepalive
333
+ while True:
334
+ try:
335
+ state = await asyncio.wait_for(queue.get(), timeout=30.0)
336
+ if state is None: # Shutdown signal
337
+ break
338
+ yield f"data: {json_module.dumps(state)}\n\n"
339
+ except asyncio.TimeoutError:
340
+ # Send keepalive comment
341
+ yield ": keepalive\n\n"
342
+
343
+ except asyncio.CancelledError:
344
+ # Expected during shutdown
345
+ pass
346
+ except Exception as e:
347
+ logger.debug(f"SSE stream error: {e}")
348
+ finally:
349
+ async with _sse_clients_lock:
350
+ _sse_clients.discard(queue)
351
+
352
+ return StreamingResponse(
353
+ event_generator(),
354
+ media_type="text/event-stream",
355
+ headers={
356
+ "Cache-Control": "no-cache",
357
+ "X-Accel-Buffering": "no", # Disable nginx buffering
358
+ "Connection": "keep-alive",
359
+ },
360
+ )
361
+
362
+
363
+ def get_full_state() -> Dict[str, Any]:
364
+ """Get complete WebTap state for broadcasting.
365
+
366
+ Returns real-time state only (no blocking I/O).
367
+ Page list excluded - fetch via /info endpoint on-demand.
368
+
369
+ Returns:
370
+ Dictionary with all state information
371
+ """
372
+ if not app_state:
373
+ return {
374
+ "connected": False,
375
+ "events": {"total": 0},
376
+ "fetch": {"enabled": False, "paused_count": 0},
377
+ "filters": {"enabled": [], "disabled": []},
378
+ "browser": {"inspect_active": False, "selections": {}, "prompt": ""},
379
+ "error": None,
380
+ }
381
+
382
+ # Get connection status
383
+ connected = app_state.cdp.is_connected
384
+ page_info = app_state.cdp.page_info or {}
385
+
386
+ # Get event counts
387
+ event_count = app_state.service.event_count
388
+
389
+ # Get fetch status
390
+ fetch_enabled = app_state.service.fetch.enabled
391
+ paused_count = app_state.service.fetch.paused_count if fetch_enabled else 0
392
+
393
+ # Get filter status
394
+ fm = app_state.service.filters
395
+ filter_categories = list(fm.filters.keys())
396
+ enabled_filters = list(fm.enabled_categories)
397
+ disabled_filters = [cat for cat in filter_categories if cat not in enabled_filters]
398
+
399
+ # Get browser/DOM state (includes pending_count for progress indicator)
400
+ browser_state = app_state.service.dom.get_state()
401
+
402
+ return {
403
+ "connected": connected,
404
+ "page": {"id": page_info.get("id", ""), "title": page_info.get("title", ""), "url": page_info.get("url", "")}
405
+ if connected
406
+ else None,
407
+ "events": {"total": event_count},
408
+ "fetch": {"enabled": fetch_enabled, "paused_count": paused_count},
409
+ "filters": {"enabled": enabled_filters, "disabled": disabled_filters},
410
+ "browser": browser_state, # Contains inspect_active, selections, prompt, pending_count
411
+ "error": app_state.error_state, # Current error or None
412
+ }
413
+
414
+
415
+ async def broadcast_state():
416
+ """Broadcast current state to all SSE clients."""
417
+ global _sse_clients
418
+
419
+ async with _sse_clients_lock:
420
+ if not _sse_clients:
421
+ return
422
+ clients = list(_sse_clients)
423
+
424
+ state = get_full_state()
425
+ dead_queues = set()
426
+
427
+ # Send to all connected clients
428
+ for queue in clients:
429
+ try:
430
+ queue.put_nowait(state)
431
+ except asyncio.QueueFull:
432
+ logger.warning("SSE client queue full, skipping broadcast")
433
+ except Exception as e:
434
+ logger.debug(f"Failed to broadcast to client: {e}")
435
+ dead_queues.add(queue)
436
+
437
+ # Remove dead queues
438
+ if dead_queues:
439
+ async with _sse_clients_lock:
440
+ _sse_clients -= dead_queues
441
+
442
+
443
+ async def broadcast_processor():
444
+ """Background task that processes broadcast queue.
445
+
446
+ This runs in the FastAPI event loop and watches for signals
447
+ from WebSocket thread (via asyncio.Queue).
448
+ """
449
+ global _broadcast_queue
450
+ _broadcast_queue = asyncio.Queue()
451
+ _queue_ready.set() # Signal that queue is ready
452
+
453
+ logger.info("Broadcast processor started")
454
+
455
+ while not _shutdown_requested:
456
+ try:
457
+ # Wait for broadcast signal (with timeout for shutdown check)
458
+ signal = await asyncio.wait_for(_broadcast_queue.get(), timeout=1.0)
459
+ logger.debug(f"Broadcast signal received: {signal}")
460
+
461
+ # Broadcast to all SSE clients
462
+ await broadcast_state()
463
+ except asyncio.TimeoutError:
464
+ # Normal timeout, continue loop
465
+ continue
466
+ except Exception as e:
467
+ logger.error(f"Error in broadcast processor: {e}")
468
+
469
+ # Graceful shutdown: close all SSE clients
470
+ async with _sse_clients_lock:
471
+ for queue in list(_sse_clients):
472
+ try:
473
+ await queue.put(None) # Signal shutdown to client
474
+ except Exception:
475
+ pass
476
+ _sse_clients.clear()
477
+
478
+ logger.info("Broadcast processor stopped")
479
+
480
+
202
481
  # Flag to signal shutdown
203
482
  _shutdown_requested = False
204
483
 
484
+ # Event to signal broadcast queue is ready
485
+ _queue_ready = threading.Event()
486
+
205
487
 
206
488
  def start_api_server(state, host: str = "127.0.0.1", port: int = 8765) -> threading.Thread | None:
207
489
  """Start the API server in a background thread.
@@ -215,20 +497,36 @@ def start_api_server(state, host: str = "127.0.0.1", port: int = 8765) -> thread
215
497
  Thread instance running the server, or None if port is in use.
216
498
  """
217
499
  # Check port availability first
500
+ # Use SO_REUSEADDR to properly test availability even if port in TIME_WAIT
218
501
  try:
219
502
  with socket.socket() as s:
503
+ s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
220
504
  s.bind((host, port))
221
505
  except OSError:
222
506
  logger.info(f"Port {port} already in use")
223
507
  return None
224
508
 
225
- global app_state, _shutdown_requested
509
+ global app_state, _shutdown_requested, _broadcast_queue
226
510
  app_state = state
227
511
  _shutdown_requested = False # Reset flag for new instance
512
+ _queue_ready.clear() # Reset event for new instance
228
513
 
229
- thread = threading.Thread(target=run_server, args=(host, port), daemon=True)
514
+ # Use daemon thread so REPL can exit immediately
515
+ # Graceful shutdown handled by atexit → cleanup() → _shutdown_requested
516
+ thread = threading.Thread(target=run_server, args=(host, port), daemon=True, name="webtap-api")
230
517
  thread.start()
231
518
 
519
+ # Wait for broadcast queue to be ready (with timeout)
520
+ if not _queue_ready.wait(timeout=2.0):
521
+ logger.error("Broadcast queue initialization timed out")
522
+ return thread
523
+
524
+ # Wire queue to DOM service and CDP session after event loop starts
525
+ if _broadcast_queue and app_state:
526
+ app_state.service.dom.set_broadcast_queue(_broadcast_queue)
527
+ app_state.cdp.set_broadcast_queue(_broadcast_queue)
528
+ logger.info("Broadcast queue wired to DOMService and CDPSession")
529
+
232
530
  logger.info(f"API server started on http://{host}:{port}")
233
531
  return thread
234
532
 
@@ -251,6 +549,9 @@ def run_server(host: str, port: int):
251
549
  # Start server in background task
252
550
  serve_task = asyncio.create_task(server.serve())
253
551
 
552
+ # Start broadcast processor in background
553
+ broadcast_task = asyncio.create_task(broadcast_processor())
554
+
254
555
  # Wait for shutdown signal
255
556
  while not _shutdown_requested:
256
557
  await asyncio.sleep(0.1)
@@ -272,6 +573,14 @@ def run_server(host: str, port: int):
272
573
  except asyncio.CancelledError:
273
574
  pass
274
575
 
576
+ # Cancel broadcast processor
577
+ if not broadcast_task.done():
578
+ broadcast_task.cancel()
579
+ try:
580
+ await broadcast_task
581
+ except asyncio.CancelledError:
582
+ pass
583
+
275
584
  try:
276
585
  # Use asyncio.run() which properly cleans up
277
586
  asyncio.run(run())
webtap/app.py CHANGED
@@ -31,6 +31,8 @@ class WebTapState:
31
31
  cdp: CDPSession = field(default_factory=CDPSession)
32
32
  service: WebTapService = field(init=False)
33
33
  api_thread: threading.Thread | None = None
34
+ browser_data: dict | None = None # Browser element selections with prompt
35
+ error_state: dict | None = None # Current error: {"message": str, "timestamp": float}
34
36
 
35
37
  def __post_init__(self):
36
38
  """Initialize service with self reference after dataclass init."""
@@ -38,9 +40,13 @@ class WebTapState:
38
40
 
39
41
  def cleanup(self):
40
42
  """Cleanup resources on exit."""
41
- # Disconnect CDP if connected
42
- if self.cdp.is_connected:
43
- self.cdp.disconnect()
43
+ # Disconnect through service to ensure full cleanup (clears selections, cache, etc)
44
+ if hasattr(self, "service") and self.service and self.cdp.is_connected:
45
+ self.service.disconnect()
46
+
47
+ # Stop DOM service cleanup (executor, callbacks)
48
+ if hasattr(self, "service") and self.service and self.service.dom:
49
+ self.service.dom.cleanup()
44
50
 
45
51
  # Stop API server if we own it
46
52
  if self.api_thread and self.api_thread.is_alive():
@@ -48,8 +54,8 @@ class WebTapState:
48
54
  import webtap.api
49
55
 
50
56
  webtap.api._shutdown_requested = True
51
- # Wait up to 1 second for graceful shutdown
52
- self.api_thread.join(timeout=1.0)
57
+ # Give server 1.5s to close SSE connections and shutdown gracefully
58
+ self.api_thread.join(timeout=1.5)
53
59
 
54
60
 
55
61
  # Must be created before command imports for decorator registration
@@ -83,6 +89,7 @@ else:
83
89
  from webtap.commands import inspect # noqa: E402, F401
84
90
  from webtap.commands import fetch # noqa: E402, F401
85
91
  from webtap.commands import body # noqa: E402, F401
92
+ from webtap.commands import selections # noqa: E402, F401
86
93
  from webtap.commands import server # noqa: E402, F401
87
94
  from webtap.commands import setup # noqa: E402, F401
88
95
  from webtap.commands import launch # noqa: E402, F401
webtap/cdp/session.py CHANGED
@@ -62,6 +62,15 @@ class CDPSession:
62
62
  # Maps lowercase field names to their full paths with original case
63
63
  self.field_paths: dict[str, set[str]] = {}
64
64
 
65
+ # Event callbacks for real-time handling
66
+ # Maps event method (e.g. "Overlay.inspectNodeRequested") to list of callbacks
67
+ self._event_callbacks: dict[str, list] = {}
68
+
69
+ # Broadcast queue for SSE state updates (set by API server)
70
+ self._broadcast_queue: "Any | None" = None
71
+ self._last_broadcast_time = 0.0
72
+ self._broadcast_debounce = 1.0 # 1 second debounce
73
+
65
74
  def list_pages(self) -> list[dict]:
66
75
  """List available Chrome pages via HTTP API.
67
76
 
@@ -126,7 +135,7 @@ class CDPSession:
126
135
  target=self.ws_app.run_forever,
127
136
  kwargs={
128
137
  "ping_interval": 30, # Ping every 30s
129
- "ping_timeout": 10, # Wait 10s for pong
138
+ "ping_timeout": 20, # Wait 20s for pong (increased from 10s for heavy CDP load)
130
139
  "reconnect": 5, # Auto-reconnect with max 5s delay
131
140
  "skip_utf8_validation": True, # Faster
132
141
  },
@@ -239,6 +248,12 @@ class CDPSession:
239
248
  self.db.execute("INSERT INTO events VALUES (?)", [json.dumps(data)])
240
249
  self._update_field_lookup(data)
241
250
 
251
+ # Call registered event callbacks
252
+ self._dispatch_event_callbacks(data)
253
+
254
+ # Trigger SSE broadcast (debounced)
255
+ self._trigger_state_broadcast()
256
+
242
257
  except Exception as e:
243
258
  logger.error(f"Error handling message: {e}")
244
259
 
@@ -363,3 +378,88 @@ class CDPSession:
363
378
  True if connected to Chrome page.
364
379
  """
365
380
  return self.connected.is_set()
381
+
382
+ def register_event_callback(self, method: str, callback) -> None:
383
+ """Register callback for specific CDP event.
384
+
385
+ Args:
386
+ method: CDP event method (e.g. "Overlay.inspectNodeRequested")
387
+ callback: Async function called with event data dict
388
+
389
+ Example:
390
+ async def on_inspect(event):
391
+ node_id = event.get("params", {}).get("backendNodeId")
392
+ print(f"User clicked node: {node_id}")
393
+
394
+ cdp.register_event_callback("Overlay.inspectNodeRequested", on_inspect)
395
+ """
396
+ if method not in self._event_callbacks:
397
+ self._event_callbacks[method] = []
398
+ self._event_callbacks[method].append(callback)
399
+ logger.debug(f"Registered callback for {method}")
400
+
401
+ def unregister_event_callback(self, method: str, callback) -> None:
402
+ """Unregister event callback.
403
+
404
+ Args:
405
+ method: CDP event method
406
+ callback: Callback function to remove
407
+ """
408
+ if method in self._event_callbacks:
409
+ try:
410
+ self._event_callbacks[method].remove(callback)
411
+ logger.debug(f"Unregistered callback for {method}")
412
+ except ValueError:
413
+ pass
414
+
415
+ def _dispatch_event_callbacks(self, event: dict) -> None:
416
+ """Dispatch event to registered callbacks.
417
+
418
+ All callbacks must be synchronous and should return quickly.
419
+ Failed callbacks are logged but not retried - WebSocket reconnection
420
+ is handled by websocket-client library automatically.
421
+
422
+ Args:
423
+ event: CDP event dictionary with 'method' and 'params'
424
+ """
425
+ method = event.get("method")
426
+ if not method or method not in self._event_callbacks:
427
+ return
428
+
429
+ # Call all registered callbacks (must be sync)
430
+ for callback in self._event_callbacks[method]:
431
+ try:
432
+ callback(event)
433
+ except TimeoutError:
434
+ logger.warning(f"{method} callback timed out - page may be busy, user can retry")
435
+ except Exception as e:
436
+ logger.error(f"Error in {method} callback: {e}")
437
+
438
+ def set_broadcast_queue(self, queue: "Any") -> None:
439
+ """Set queue for broadcasting state changes to SSE clients.
440
+
441
+ Args:
442
+ queue: asyncio.Queue for thread-safe signaling
443
+ """
444
+ self._broadcast_queue = queue
445
+ logger.debug("Broadcast queue set on CDPSession")
446
+
447
+ def _trigger_state_broadcast(self) -> None:
448
+ """Trigger SSE broadcast with 1s debounce.
449
+
450
+ Called after CDP events are stored. Debounces rapid-fire events
451
+ to avoid overwhelming SSE clients during heavy network activity.
452
+ """
453
+ if not self._broadcast_queue:
454
+ return
455
+
456
+ import time
457
+
458
+ now = time.time()
459
+ if now - self._last_broadcast_time > self._broadcast_debounce:
460
+ self._last_broadcast_time = now
461
+ try:
462
+ self._broadcast_queue.put_nowait({"type": "cdp_event"})
463
+ logger.debug("State broadcast triggered")
464
+ except Exception as e:
465
+ logger.debug(f"Failed to queue broadcast: {e}")