webtap-tool 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webtap-tool might be problematic. Click here for more details.

webtap/__init__.py CHANGED
@@ -9,10 +9,14 @@ PUBLIC API:
9
9
  - main: Entry point function for CLI
10
10
  """
11
11
 
12
+ import atexit
12
13
  import sys
13
14
 
14
15
  from webtap.app import app
15
16
 
17
+ # Register cleanup on exit to shutdown DB thread
18
+ atexit.register(lambda: app.state.cleanup() if hasattr(app, "state") and app.state else None)
19
+
16
20
 
17
21
  def main():
18
22
  """Entry point for the WebTap REPL.
webtap/api.py CHANGED
@@ -119,9 +119,6 @@ async def connect(request: ConnectRequest) -> Dict[str, Any]:
119
119
  # Wrap blocking CDP calls (connect + enable domains) in thread
120
120
  result = await asyncio.to_thread(app_state.service.connect_to_page, page_id=request.page_id)
121
121
 
122
- # Broadcast state change
123
- await broadcast_state()
124
-
125
122
  return result
126
123
 
127
124
 
@@ -134,9 +131,6 @@ async def disconnect() -> Dict[str, Any]:
134
131
  # Wrap blocking CDP calls (fetch.disable + disconnect) in thread
135
132
  result = await asyncio.to_thread(app_state.service.disconnect)
136
133
 
137
- # Broadcast state change
138
- await broadcast_state()
139
-
140
134
  return result
141
135
 
142
136
 
@@ -146,7 +140,10 @@ async def clear_events() -> Dict[str, Any]:
146
140
  if not app_state:
147
141
  return {"error": "WebTap not initialized"}
148
142
 
149
- return app_state.service.clear_events()
143
+ # Wrap blocking DB operation in thread
144
+ result = await asyncio.to_thread(app_state.service.clear_events)
145
+
146
+ return result
150
147
 
151
148
 
152
149
  @api.post("/fetch")
@@ -164,7 +161,7 @@ async def set_fetch_interception(request: FetchRequest) -> Dict[str, Any]:
164
161
  result = await asyncio.to_thread(app_state.service.fetch.disable)
165
162
 
166
163
  # Broadcast state change
167
- await broadcast_state()
164
+ app_state.service._trigger_broadcast()
168
165
 
169
166
  return result
170
167
 
@@ -200,7 +197,7 @@ async def toggle_filter_category(category: str) -> Dict[str, Any]:
200
197
  fm.save()
201
198
 
202
199
  # Broadcast state change
203
- await broadcast_state()
200
+ app_state.service._trigger_broadcast()
204
201
 
205
202
  return {"category": category, "enabled": enabled, "total_enabled": len(fm.enabled_categories)}
206
203
 
@@ -216,7 +213,7 @@ async def enable_all_filters() -> Dict[str, Any]:
216
213
  fm.save()
217
214
 
218
215
  # Broadcast state change
219
- await broadcast_state()
216
+ app_state.service._trigger_broadcast()
220
217
 
221
218
  return {"enabled": list(fm.enabled_categories), "total": len(fm.enabled_categories)}
222
219
 
@@ -232,7 +229,7 @@ async def disable_all_filters() -> Dict[str, Any]:
232
229
  fm.save()
233
230
 
234
231
  # Broadcast state change
235
- await broadcast_state()
232
+ app_state.service._trigger_broadcast()
236
233
 
237
234
  return {"enabled": [], "total": 0}
238
235
 
@@ -249,9 +246,6 @@ async def start_inspect() -> Dict[str, Any]:
249
246
  # Wrap blocking CDP calls (DOM.enable, CSS.enable, Overlay.enable, setInspectMode) in thread
250
247
  result = await asyncio.to_thread(app_state.service.dom.start_inspect)
251
248
 
252
- # Broadcast state change
253
- await broadcast_state()
254
-
255
249
  return result
256
250
 
257
251
 
@@ -264,9 +258,6 @@ async def stop_inspect() -> Dict[str, Any]:
264
258
  # Wrap blocking CDP call (Overlay.setInspectMode) in thread
265
259
  result = await asyncio.to_thread(app_state.service.dom.stop_inspect)
266
260
 
267
- # Broadcast state change
268
- await broadcast_state()
269
-
270
261
  return result
271
262
 
272
263
 
@@ -278,9 +269,6 @@ async def clear_selections() -> Dict[str, Any]:
278
269
 
279
270
  app_state.service.dom.clear_selections()
280
271
 
281
- # Broadcast state change
282
- await broadcast_state()
283
-
284
272
  return {"success": True, "selections": {}}
285
273
 
286
274
 
@@ -293,7 +281,7 @@ async def dismiss_error() -> Dict[str, Any]:
293
281
  app_state.error_state = None
294
282
 
295
283
  # Broadcast state change
296
- await broadcast_state()
284
+ app_state.service._trigger_broadcast()
297
285
 
298
286
  return {"success": True}
299
287
 
@@ -363,11 +351,11 @@ async def stream_events():
363
351
  def get_full_state() -> Dict[str, Any]:
364
352
  """Get complete WebTap state for broadcasting.
365
353
 
366
- Returns real-time state only (no blocking I/O).
367
- Page list excluded - fetch via /info endpoint on-demand.
354
+ Thread-safe, zero-lock reads from immutable snapshot.
355
+ No blocking I/O - returns cached snapshot immediately.
368
356
 
369
357
  Returns:
370
- Dictionary with all state information
358
+ Dictionary with all state information for SSE clients
371
359
  """
372
360
  if not app_state:
373
361
  return {
@@ -375,40 +363,35 @@ def get_full_state() -> Dict[str, Any]:
375
363
  "events": {"total": 0},
376
364
  "fetch": {"enabled": False, "paused_count": 0},
377
365
  "filters": {"enabled": [], "disabled": []},
378
- "browser": {"inspect_active": False, "selections": {}, "prompt": ""},
366
+ "browser": {"inspect_active": False, "selections": {}, "prompt": "", "pending_count": 0},
379
367
  "error": None,
380
368
  }
381
369
 
382
- # Get connection status
383
- connected = app_state.cdp.is_connected
384
- page_info = app_state.cdp.page_info or {}
385
-
386
- # Get event counts
387
- event_count = app_state.service.event_count
388
-
389
- # Get fetch status
390
- fetch_enabled = app_state.service.fetch.enabled
391
- paused_count = app_state.service.fetch.paused_count if fetch_enabled else 0
392
-
393
- # Get filter status
394
- fm = app_state.service.filters
395
- filter_categories = list(fm.filters.keys())
396
- enabled_filters = list(fm.enabled_categories)
397
- disabled_filters = [cat for cat in filter_categories if cat not in enabled_filters]
398
-
399
- # Get browser/DOM state (includes pending_count for progress indicator)
400
- browser_state = app_state.service.dom.get_state()
370
+ # Get immutable snapshot (NO LOCKS NEEDED - inherently thread-safe)
371
+ snapshot = app_state.service.get_state_snapshot()
401
372
 
373
+ # Convert snapshot to frontend format
402
374
  return {
403
- "connected": connected,
404
- "page": {"id": page_info.get("id", ""), "title": page_info.get("title", ""), "url": page_info.get("url", "")}
405
- if connected
375
+ "connected": snapshot.connected,
376
+ "page": {
377
+ "id": snapshot.page_id,
378
+ "title": snapshot.page_title,
379
+ "url": snapshot.page_url,
380
+ }
381
+ if snapshot.connected
382
+ else None,
383
+ "events": {"total": snapshot.event_count},
384
+ "fetch": {"enabled": snapshot.fetch_enabled, "paused_count": snapshot.paused_count},
385
+ "filters": {"enabled": list(snapshot.enabled_filters), "disabled": list(snapshot.disabled_filters)},
386
+ "browser": {
387
+ "inspect_active": snapshot.inspect_active,
388
+ "selections": snapshot.selections,
389
+ "prompt": snapshot.prompt,
390
+ "pending_count": snapshot.pending_count,
391
+ },
392
+ "error": {"message": snapshot.error_message, "timestamp": snapshot.error_timestamp}
393
+ if snapshot.error_message
406
394
  else None,
407
- "events": {"total": event_count},
408
- "fetch": {"enabled": fetch_enabled, "paused_count": paused_count},
409
- "filters": {"enabled": enabled_filters, "disabled": disabled_filters},
410
- "browser": browser_state, # Contains inspect_active, selections, prompt, pending_count
411
- "error": app_state.error_state, # Current error or None
412
395
  }
413
396
 
414
397
 
@@ -429,7 +412,14 @@ async def broadcast_state():
429
412
  try:
430
413
  queue.put_nowait(state)
431
414
  except asyncio.QueueFull:
432
- logger.warning("SSE client queue full, skipping broadcast")
415
+ # Client is falling behind - discard oldest state and retry with latest
416
+ logger.warning(f"SSE client queue full ({queue.qsize()}/{queue.maxsize}), discarding oldest state")
417
+ try:
418
+ queue.get_nowait() # Discard oldest
419
+ queue.put_nowait(state) # Retry with latest
420
+ except Exception as retry_err:
421
+ logger.debug(f"Failed to recover full queue: {retry_err}")
422
+ dead_queues.add(queue)
433
423
  except Exception as e:
434
424
  logger.debug(f"Failed to broadcast to client: {e}")
435
425
  dead_queues.add(queue)
@@ -470,7 +460,9 @@ async def broadcast_processor():
470
460
  async with _sse_clients_lock:
471
461
  for queue in list(_sse_clients):
472
462
  try:
473
- await queue.put(None) # Signal shutdown to client
463
+ queue.put_nowait(None) # Non-blocking shutdown signal
464
+ except asyncio.QueueFull:
465
+ pass # Client is hung, skip
474
466
  except Exception:
475
467
  pass
476
468
  _sse_clients.clear()
@@ -521,11 +513,12 @@ def start_api_server(state, host: str = "127.0.0.1", port: int = 8765) -> thread
521
513
  logger.error("Broadcast queue initialization timed out")
522
514
  return thread
523
515
 
524
- # Wire queue to DOM service and CDP session after event loop starts
516
+ # Wire queue to service and CDP session after event loop starts
517
+ # Note: DOMService uses callback to service._trigger_broadcast instead of direct queue access
525
518
  if _broadcast_queue and app_state:
526
- app_state.service.dom.set_broadcast_queue(_broadcast_queue)
519
+ app_state.service.set_broadcast_queue(_broadcast_queue)
527
520
  app_state.cdp.set_broadcast_queue(_broadcast_queue)
528
- logger.info("Broadcast queue wired to DOMService and CDPSession")
521
+ logger.info("Broadcast queue wired to WebTapService and CDPSession")
529
522
 
530
523
  logger.info(f"API server started on http://{host}:{port}")
531
524
  return thread
webtap/app.py CHANGED
@@ -57,6 +57,10 @@ class WebTapState:
57
57
  # Give server 1.5s to close SSE connections and shutdown gracefully
58
58
  self.api_thread.join(timeout=1.5)
59
59
 
60
+ # Shutdown DB thread (this is the only place where DB thread should stop)
61
+ if hasattr(self, "cdp") and self.cdp:
62
+ self.cdp.cleanup()
63
+
60
64
 
61
65
  # Must be created before command imports for decorator registration
62
66
  app = App(
@@ -90,6 +94,7 @@ else:
90
94
  from webtap.commands import fetch # noqa: E402, F401
91
95
  from webtap.commands import body # noqa: E402, F401
92
96
  from webtap.commands import to_model # noqa: E402, F401
97
+ from webtap.commands import quicktype # noqa: E402, F401
93
98
  from webtap.commands import selections # noqa: E402, F401
94
99
  from webtap.commands import server # noqa: E402, F401
95
100
  from webtap.commands import setup # noqa: E402, F401
webtap/cdp/session.py CHANGED
@@ -6,6 +6,7 @@ PUBLIC API:
6
6
 
7
7
  import json
8
8
  import logging
9
+ import queue
9
10
  import threading
10
11
  from concurrent.futures import Future, TimeoutError
11
12
  from typing import Any
@@ -54,9 +55,18 @@ class CDPSession:
54
55
  self._lock = threading.Lock()
55
56
 
56
57
  # DuckDB storage - store events AS-IS
58
+ # DuckDB connections are NOT thread-safe - use dedicated DB thread
57
59
  self.db = duckdb.connect(":memory:")
60
+ self._db_work_queue: queue.Queue = queue.Queue()
61
+ self._db_result_queues: dict[int, queue.Queue] = {}
62
+ self._db_running = True
58
63
 
59
- self.db.execute("CREATE TABLE events (event JSON)")
64
+ # Start dedicated database thread
65
+ self._db_thread = threading.Thread(target=self._db_worker, daemon=True)
66
+ self._db_thread.start()
67
+
68
+ # Initialize schema via queue
69
+ self._db_execute("CREATE TABLE events (event JSON)", wait_result=False)
60
70
 
61
71
  # Live field path lookup for fast discovery
62
72
  # Maps lowercase field names to their full paths with original case
@@ -68,8 +78,81 @@ class CDPSession:
68
78
 
69
79
  # Broadcast queue for SSE state updates (set by API server)
70
80
  self._broadcast_queue: "Any | None" = None
71
- self._last_broadcast_time = 0.0
72
- self._broadcast_debounce = 1.0 # 1 second debounce
81
+
82
+ # Disconnect callback for service-level cleanup
83
+ self._disconnect_callback: "Any | None" = None
84
+
85
+ def _db_worker(self) -> None:
86
+ """Dedicated thread for all database operations.
87
+
88
+ Ensures thread safety by serializing all DuckDB access through one thread.
89
+ DuckDB connections are not thread-safe - sharing them causes malloc corruption.
90
+ """
91
+ while self._db_running:
92
+ try:
93
+ task = self._db_work_queue.get(timeout=1)
94
+
95
+ if task is None: # Shutdown signal
96
+ break
97
+
98
+ operation_type, sql, params, result_queue_id = task
99
+
100
+ try:
101
+ if operation_type == "execute":
102
+ result = self.db.execute(sql, params or [])
103
+ data = result.fetchall() if result else []
104
+ elif operation_type == "delete":
105
+ self.db.execute(sql, params or [])
106
+ data = None
107
+ else:
108
+ data = None
109
+
110
+ # Send result back if requested
111
+ if result_queue_id and result_queue_id in self._db_result_queues:
112
+ self._db_result_queues[result_queue_id].put(("success", data))
113
+
114
+ except Exception as e:
115
+ logger.error(f"Database error: {e}")
116
+ if result_queue_id and result_queue_id in self._db_result_queues:
117
+ self._db_result_queues[result_queue_id].put(("error", str(e)))
118
+
119
+ finally:
120
+ self._db_work_queue.task_done()
121
+
122
+ except queue.Empty:
123
+ continue
124
+
125
+ def _db_execute(self, sql: str, params: list | None = None, wait_result: bool = True) -> Any:
126
+ """Submit database operation to dedicated thread.
127
+
128
+ Args:
129
+ sql: SQL query or command
130
+ params: Optional query parameters
131
+ wait_result: Block until operation completes and return result
132
+
133
+ Returns:
134
+ Query results if wait_result=True, None otherwise
135
+ """
136
+ result_queue_id = None
137
+ result_queue = None
138
+
139
+ if wait_result:
140
+ result_queue_id = id(threading.current_thread())
141
+ result_queue = queue.Queue()
142
+ self._db_result_queues[result_queue_id] = result_queue
143
+
144
+ # Submit to work queue
145
+ self._db_work_queue.put(("execute", sql, params, result_queue_id))
146
+
147
+ if wait_result and result_queue and result_queue_id:
148
+ status, data = result_queue.get()
149
+ del self._db_result_queues[result_queue_id]
150
+
151
+ if status == "error":
152
+ raise RuntimeError(f"Database error: {data}")
153
+ return data
154
+
155
+ return None
73
156
 
74
157
  def list_pages(self) -> list[dict]:
75
158
  """List available Chrome pages via HTTP API.
@@ -136,7 +219,7 @@ class CDPSession:
136
219
  kwargs={
137
220
  "ping_interval": 30, # Ping every 30s
138
221
  "ping_timeout": 20, # Wait 20s for pong (increased from 10s for heavy CDP load)
139
- "reconnect": 5, # Auto-reconnect with max 5s delay
222
+ # No auto-reconnect - make disconnects explicit
140
223
  "skip_utf8_validation": True, # Faster
141
224
  },
142
225
  )
@@ -149,18 +232,46 @@ class CDPSession:
149
232
  raise TimeoutError("Failed to connect to Chrome")
150
233
 
151
234
  def disconnect(self) -> None:
152
- """Disconnect WebSocket and clean up resources."""
153
- if self.ws_app:
154
- self.ws_app.close()
235
+ """Disconnect WebSocket while preserving events and DB thread.
236
+
237
+ Events and DB thread persist across connection cycles.
238
+ Use cleanup() on app exit to shutdown DB thread.
239
+ """
240
+ # Atomically clear ws_app to signal manual disconnect
241
+ # This prevents _on_close from triggering service callback
242
+ with self._lock:
243
+ ws_app = self.ws_app
155
244
  self.ws_app = None
156
245
 
246
+ if ws_app:
247
+ ws_app.close()
248
+
157
249
  if self.ws_thread and self.ws_thread.is_alive():
158
250
  self.ws_thread.join(timeout=2)
159
251
  self.ws_thread = None
160
252
 
253
+ # Keep DB thread running - events preserved for reconnection
254
+ # DB cleanup happens in cleanup() on app exit only
255
+
161
256
  self.connected.clear()
162
257
  self.page_info = None
163
258
 
259
+ def cleanup(self) -> None:
260
+ """Shutdown DB thread and disconnect (call on app exit only).
261
+
262
+ This is the only place where DB thread should be stopped.
263
+ Events are lost when DB thread stops (in-memory database).
264
+ """
265
+ # Disconnect WebSocket if connected
266
+ if self.ws_app:
267
+ self.disconnect()
268
+
269
+ # Shutdown database thread
270
+ self._db_running = False
271
+ self._db_work_queue.put(None) # Signal shutdown
272
+ if self._db_thread.is_alive():
273
+ self._db_thread.join(timeout=2)
274
+
164
275
  def send(self, method: str, params: dict | None = None) -> Future:
165
276
  """Send CDP command asynchronously.
166
277
 
@@ -245,7 +356,7 @@ class CDPSession:
245
356
 
246
357
  # CDP event - store AS-IS in DuckDB and update field lookup
247
358
  elif "method" in data:
248
- self.db.execute("INSERT INTO events VALUES (?)", [json.dumps(data)])
359
+ self._db_execute("INSERT INTO events VALUES (?)", [json.dumps(data)], wait_result=False)
249
360
  self._update_field_lookup(data)
250
361
 
251
362
  # Call registered event callbacks
@@ -263,15 +374,39 @@ class CDPSession:
263
374
 
264
375
  def _on_close(self, ws, code, reason):
265
376
  """Handle WebSocket closure and cleanup."""
266
- logger.info(f"WebSocket closed: {code} {reason}")
377
+ logger.info(f"WebSocket closed: code={code} reason={reason}")
378
+
379
+ # Mark as disconnected
380
+ was_connected = self.connected.is_set()
267
381
  self.connected.clear()
268
382
 
269
- # Fail pending commands
383
+ # Fail pending commands and check if this is unexpected disconnect
384
+ is_unexpected = False
270
385
  with self._lock:
271
386
  for future in self._pending.values():
272
- future.set_exception(RuntimeError("Connection closed"))
387
+ future.set_exception(RuntimeError(f"Connection closed: {reason or 'Unknown'}"))
273
388
  self._pending.clear()
274
389
 
390
+ # Unexpected disconnect: was connected and ws_app still set (not manual disconnect)
391
+ is_unexpected = was_connected and self.ws_app is not None
392
+
393
+ # Clear state to allow reconnection (DB thread and events preserved)
394
+ self.ws_app = None
395
+ self.page_info = None
396
+
397
+ # Trigger service-level cleanup if this was unexpected
398
+ if is_unexpected and self._disconnect_callback:
399
+ try:
400
+ # Call in background to avoid blocking WebSocket thread
401
+ threading.Thread(
402
+ target=self._disconnect_callback, args=(code, reason), daemon=True, name="cdp-disconnect-handler"
403
+ ).start()
404
+ except Exception as e:
405
+ logger.error(f"Error calling disconnect callback: {e}")
406
+
407
+ # Trigger SSE broadcast immediately
408
+ self._trigger_state_broadcast()
409
+
275
410
  def _extract_paths(self, obj, parent_key=""):
276
411
  """Extract all JSON paths from nested dict structure.
277
412
 
@@ -332,7 +467,7 @@ class CDPSession:
332
467
 
333
468
  def clear_events(self) -> None:
334
469
  """Clear all stored events and reset field lookup."""
335
- self.db.execute("DELETE FROM events")
470
+ self._db_execute("DELETE FROM events", wait_result=False)
336
471
  self.field_paths.clear()
337
472
 
338
473
  def query(self, sql: str, params: list | None = None) -> list:
@@ -352,8 +487,7 @@ class CDPSession:
352
487
  query("SELECT * FROM events WHERE json_extract_string(event, '$.method') = 'Network.responseReceived'")
353
488
  query("SELECT json_extract_string(event, '$.params.request.url') as url FROM events")
354
489
  """
355
- result = self.db.execute(sql, params or [])
356
- return result.fetchall() if result else []
490
+ return self._db_execute(sql, params)
357
491
 
358
492
  def fetch_body(self, request_id: str) -> dict | None:
359
493
  """Fetch response body via Network.getResponseBody CDP call.
@@ -379,6 +513,18 @@ class CDPSession:
379
513
  """
380
514
  return self.connected.is_set()
381
515
 
516
+ def set_disconnect_callback(self, callback) -> None:
517
+ """Register callback for unexpected disconnect events.
518
+
519
+ Called when WebSocket closes externally (tab close, crash, etc).
520
+ NOT called on manual disconnect() to avoid duplicate cleanup.
521
+
522
+ Args:
523
+ callback: Function called with (code: int, reason: str)
524
+ """
525
+ self._disconnect_callback = callback
526
+ logger.debug("Disconnect callback registered")
527
+
382
528
  def register_event_callback(self, method: str, callback) -> None:
383
529
  """Register callback for specific CDP event.
384
530
 
@@ -445,21 +591,14 @@ class CDPSession:
445
591
  logger.debug("Broadcast queue set on CDPSession")
446
592
 
447
593
  def _trigger_state_broadcast(self) -> None:
448
- """Trigger SSE broadcast with 1s debounce.
594
+ """Trigger SSE broadcast immediately.
449
595
 
450
- Called after CDP events are stored. Debounces rapid-fire events
451
- to avoid overwhelming SSE clients during heavy network activity.
596
+ Called after CDP events are stored. Queue naturally buffers rapid-fire events.
452
597
  """
453
598
  if not self._broadcast_queue:
454
599
  return
455
600
 
456
- import time
457
-
458
- now = time.time()
459
- if now - self._last_broadcast_time > self._broadcast_debounce:
460
- self._last_broadcast_time = now
461
- try:
462
- self._broadcast_queue.put_nowait({"type": "cdp_event"})
463
- logger.debug("State broadcast triggered")
464
- except Exception as e:
465
- logger.debug(f"Failed to queue broadcast: {e}")
601
+ try:
602
+ self._broadcast_queue.put_nowait({"type": "cdp_event"})
603
+ except Exception as e:
604
+ logger.debug(f"Failed to queue broadcast: {e}")