webtap-tool 0.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. webtap/VISION.md +246 -0
  2. webtap/__init__.py +84 -0
  3. webtap/__main__.py +6 -0
  4. webtap/api/__init__.py +9 -0
  5. webtap/api/app.py +26 -0
  6. webtap/api/models.py +69 -0
  7. webtap/api/server.py +111 -0
  8. webtap/api/sse.py +182 -0
  9. webtap/api/state.py +89 -0
  10. webtap/app.py +79 -0
  11. webtap/cdp/README.md +275 -0
  12. webtap/cdp/__init__.py +12 -0
  13. webtap/cdp/har.py +302 -0
  14. webtap/cdp/schema/README.md +41 -0
  15. webtap/cdp/schema/cdp_protocol.json +32785 -0
  16. webtap/cdp/schema/cdp_version.json +8 -0
  17. webtap/cdp/session.py +667 -0
  18. webtap/client.py +81 -0
  19. webtap/commands/DEVELOPER_GUIDE.md +401 -0
  20. webtap/commands/TIPS.md +269 -0
  21. webtap/commands/__init__.py +29 -0
  22. webtap/commands/_builders.py +331 -0
  23. webtap/commands/_code_generation.py +110 -0
  24. webtap/commands/_tips.py +147 -0
  25. webtap/commands/_utils.py +273 -0
  26. webtap/commands/connection.py +220 -0
  27. webtap/commands/console.py +87 -0
  28. webtap/commands/fetch.py +310 -0
  29. webtap/commands/filters.py +116 -0
  30. webtap/commands/javascript.py +73 -0
  31. webtap/commands/js_export.py +73 -0
  32. webtap/commands/launch.py +72 -0
  33. webtap/commands/navigation.py +197 -0
  34. webtap/commands/network.py +136 -0
  35. webtap/commands/quicktype.py +306 -0
  36. webtap/commands/request.py +93 -0
  37. webtap/commands/selections.py +138 -0
  38. webtap/commands/setup.py +219 -0
  39. webtap/commands/to_model.py +163 -0
  40. webtap/daemon.py +185 -0
  41. webtap/daemon_state.py +53 -0
  42. webtap/filters.py +219 -0
  43. webtap/rpc/__init__.py +14 -0
  44. webtap/rpc/errors.py +49 -0
  45. webtap/rpc/framework.py +223 -0
  46. webtap/rpc/handlers.py +625 -0
  47. webtap/rpc/machine.py +84 -0
  48. webtap/services/README.md +83 -0
  49. webtap/services/__init__.py +15 -0
  50. webtap/services/console.py +124 -0
  51. webtap/services/dom.py +547 -0
  52. webtap/services/fetch.py +415 -0
  53. webtap/services/main.py +392 -0
  54. webtap/services/network.py +401 -0
  55. webtap/services/setup/__init__.py +185 -0
  56. webtap/services/setup/chrome.py +233 -0
  57. webtap/services/setup/desktop.py +255 -0
  58. webtap/services/setup/extension.py +147 -0
  59. webtap/services/setup/platform.py +162 -0
  60. webtap/services/state_snapshot.py +86 -0
  61. webtap_tool-0.11.0.dist-info/METADATA +535 -0
  62. webtap_tool-0.11.0.dist-info/RECORD +64 -0
  63. webtap_tool-0.11.0.dist-info/WHEEL +4 -0
  64. webtap_tool-0.11.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,392 @@
1
+ """Main service orchestrator for WebTap business logic."""
2
+
3
+ from typing import Any
4
+
5
+ from webtap.filters import FilterManager
6
+ from webtap.services.fetch import FetchService
7
+ from webtap.services.network import NetworkService
8
+ from webtap.services.console import ConsoleService
9
+ from webtap.services.dom import DOMService
10
+ from webtap.services.state_snapshot import StateSnapshot
11
+
12
+
13
+ REQUIRED_DOMAINS = [
14
+ "Page",
15
+ "Network",
16
+ "Runtime",
17
+ "Log",
18
+ "DOMStorage",
19
+ ]
20
+
21
+
22
+ class WebTapService:
23
+ """Main service orchestrating all WebTap domain services.
24
+
25
+ Coordinates CDP session management, domain services, and filter management.
26
+ Shared between REPL commands and API endpoints for consistent state.
27
+
28
+ Attributes:
29
+ state: WebTap application state instance.
30
+ cdp: CDP session for browser communication.
31
+ enabled_domains: Set of currently enabled CDP domains.
32
+ filters: Filter manager for event filtering.
33
+ fetch: Fetch interception service.
34
+ network: Network monitoring service.
35
+ console: Console message service.
36
+ dom: DOM inspection and element selection service.
37
+ """
38
+
39
+ def __init__(self, state):
40
+ """Initialize with WebTapState instance.
41
+
42
+ Args:
43
+ state: WebTapState instance from app.py
44
+ """
45
+ import threading
46
+
47
+ self.state = state
48
+ self.cdp = state.cdp
49
+ self._state_lock = threading.RLock() # Reentrant lock - safe to acquire multiple times by same thread
50
+
51
+ self.enabled_domains: set[str] = set()
52
+ self.filters = FilterManager()
53
+
54
+ # RPC framework (set by server.py after initialization)
55
+ self.rpc: "Any | None" = None
56
+
57
+ self.fetch = FetchService()
58
+ self.network = NetworkService()
59
+ self.console = ConsoleService()
60
+ self.dom = DOMService()
61
+
62
+ self.fetch.cdp = self.cdp
63
+ self.network.cdp = self.cdp
64
+ self.network.filters = self.filters
65
+ self.console.cdp = self.cdp
66
+ self.dom.set_cdp(self.cdp)
67
+ self.dom.set_state(self.state)
68
+ self.dom.set_broadcast_callback(self._trigger_broadcast) # DOM calls back for snapshot updates
69
+
70
+ self.fetch.set_broadcast_callback(self._trigger_broadcast) # Fetch calls back for snapshot updates
71
+
72
+ # Legacy wiring for CDP event handler
73
+ self.cdp.fetch_service = self.fetch
74
+
75
+ # Register DOM event callbacks
76
+ self.cdp.register_event_callback("Overlay.inspectNodeRequested", self.dom.handle_inspect_node_requested)
77
+ self.cdp.register_event_callback("Page.frameNavigated", self.dom.handle_frame_navigated)
78
+
79
+ # Register disconnect callback for unexpected disconnects
80
+ self.cdp.set_disconnect_callback(self._handle_unexpected_disconnect)
81
+
82
+ # CDPSession calls back here when CDP events arrive
83
+ self.cdp.set_broadcast_callback(self._trigger_broadcast)
84
+
85
+ # Broadcast queue for SSE state updates (set by API server)
86
+ self._broadcast_queue: "Any | None" = None
87
+
88
+ # Coalescing flag - prevents duplicate broadcasts during rapid CDP events
89
+ # Service owns coalescing (single source of truth)
90
+ self._broadcast_pending = threading.Event()
91
+
92
+ # Immutable state snapshot for thread-safe SSE reads
93
+ # Updated atomically on every state change, read without locks
94
+ self._state_snapshot: StateSnapshot = StateSnapshot.create_empty()
95
+
96
+ def set_broadcast_queue(self, queue: "Any") -> None:
97
+ """Set queue for broadcasting state changes.
98
+
99
+ Args:
100
+ queue: asyncio.Queue for thread-safe signaling
101
+ """
102
+ self._broadcast_queue = queue
103
+
104
+ def _create_snapshot(self) -> StateSnapshot:
105
+ """Create immutable state snapshot from current state.
106
+
107
+ MUST be called with self._state_lock held to ensure atomic read.
108
+
109
+ Returns:
110
+ Frozen StateSnapshot with current state
111
+ """
112
+ # Connection state (read page_info first to avoid race with disconnect)
113
+ page_info = self.cdp.page_info
114
+ connected = self.cdp.is_connected and page_info is not None
115
+ page_id = page_info.get("id", "") if page_info else ""
116
+ page_title = page_info.get("title", "") if page_info else ""
117
+ page_url = page_info.get("url", "") if page_info else ""
118
+
119
+ # Event count
120
+ event_count = self.event_count
121
+
122
+ # Fetch state
123
+ fetch_enabled = self.fetch.enabled
124
+ response_stage = self.fetch.enable_response_stage
125
+ paused_count = self.fetch.paused_count if fetch_enabled else 0
126
+
127
+ # Filter state (convert to immutable tuples)
128
+ fm = self.filters
129
+ filter_groups = list(fm.groups.keys())
130
+ enabled_filters = tuple(fm.enabled)
131
+ disabled_filters = tuple(name for name in filter_groups if name not in enabled_filters)
132
+
133
+ # Browser/DOM state (get_state() is already thread-safe internally)
134
+ browser_state = self.dom.get_state()
135
+
136
+ # Error state
137
+ error = self.state.error_state
138
+ error_message = error.get("message") if error else None
139
+ error_timestamp = error.get("timestamp") if error else None
140
+
141
+ # Deep copy selections to ensure true immutability
142
+ import copy
143
+
144
+ selections = copy.deepcopy(browser_state["selections"])
145
+
146
+ return StateSnapshot(
147
+ connected=connected,
148
+ page_id=page_id,
149
+ page_title=page_title,
150
+ page_url=page_url,
151
+ event_count=event_count,
152
+ fetch_enabled=fetch_enabled,
153
+ response_stage=response_stage,
154
+ paused_count=paused_count,
155
+ enabled_filters=enabled_filters,
156
+ disabled_filters=disabled_filters,
157
+ inspect_active=browser_state["inspect_active"],
158
+ selections=selections, # Deep copy ensures nested dicts are immutable
159
+ prompt=browser_state["prompt"],
160
+ pending_count=browser_state["pending_count"],
161
+ error_message=error_message,
162
+ error_timestamp=error_timestamp,
163
+ )
164
+
165
+ def _trigger_broadcast(self) -> None:
166
+ """Trigger SSE broadcast with coalescing (thread-safe).
167
+
168
+ Called from:
169
+ - CDPSession (CDP events)
170
+ - DOMService (selections)
171
+ - FetchService (interception state)
172
+ - Service methods (connect, disconnect, clear)
173
+
174
+ Coalescing: Only queues signal if none pending. Prevents 1000s of
175
+ signals during rapid CDP events. Flag cleared by API after broadcast.
176
+
177
+ Uses atomic check-and-set to prevent race where multiple threads
178
+ queue multiple signals before any sets the flag.
179
+ """
180
+ import logging
181
+
182
+ logger = logging.getLogger(__name__)
183
+
184
+ # Early exit if no queue (API not started yet)
185
+ if not self._broadcast_queue:
186
+ return
187
+
188
+ # Always update snapshot, but coalesce broadcast signals
189
+ with self._state_lock:
190
+ # Update snapshot while holding lock (always, for API responses)
191
+ try:
192
+ self._state_snapshot = self._create_snapshot()
193
+ except (TypeError, AttributeError) as e:
194
+ logger.error(f"Programming error in snapshot creation: {e}")
195
+ raise
196
+ except Exception as e:
197
+ logger.error(f"Failed to create state snapshot: {e}", exc_info=True)
198
+ return
199
+
200
+ # Skip queue signal if broadcast already pending (coalescing)
201
+ if self._broadcast_pending.is_set():
202
+ return
203
+ self._broadcast_pending.set()
204
+
205
+ # Signal broadcast (outside lock - queue.put_nowait is thread-safe)
206
+ try:
207
+ self._broadcast_queue.put_nowait({"type": "state_change"})
208
+ except Exception as e:
209
+ # Clear flag if queue failed so next trigger can try
210
+ self._broadcast_pending.clear()
211
+ logger.warning(f"Failed to queue broadcast: {e}")
212
+
213
+ def get_state_snapshot(self) -> StateSnapshot:
214
+ """Get current immutable state snapshot (thread-safe, no locks).
215
+
216
+ Returns:
217
+ Current StateSnapshot - immutable, safe to read from any thread
218
+ """
219
+ return self._state_snapshot
220
+
221
+ def clear_broadcast_pending(self) -> None:
222
+ """Clear broadcast pending flag (called by API after broadcast).
223
+
224
+ Allows next state change to trigger a new broadcast.
225
+ Thread-safe - Event.clear() is atomic.
226
+ """
227
+ self._broadcast_pending.clear()
228
+
229
+ @property
230
+ def event_count(self) -> int:
231
+ """Total count of all CDP events stored."""
232
+ if not self.cdp or not self.cdp.is_connected:
233
+ return 0
234
+ try:
235
+ result = self.cdp.query("SELECT COUNT(*) FROM events")
236
+ return result[0][0] if result else 0
237
+ except Exception:
238
+ return 0
239
+
240
+ def connect_to_page(self, page_index: int | None = None, page_id: str | None = None) -> dict[str, Any]:
241
+ """Connect to Chrome page and enable required domains.
242
+
243
+ Pure domain logic - raises exceptions on failure.
244
+ State machine transitions are handled by RPC handlers.
245
+
246
+ Args:
247
+ page_index: Index of page to connect to (for REPL)
248
+ page_id: ID of page to connect to (for extension)
249
+
250
+ Returns:
251
+ Connection info dict with 'title' and 'url'
252
+
253
+ Raises:
254
+ Exception: On connection or domain enable failure
255
+ """
256
+ # If already connected, disconnect first (enables seamless page switching)
257
+ if self.cdp.is_connected:
258
+ self.disconnect()
259
+
260
+ # Reset DOM service for new connection (executor may have been shutdown by previous disconnect)
261
+ self.dom.reset()
262
+
263
+ # Clear selections BEFORE connect to handle race with pending disconnect cleanup
264
+ # (disconnect handler runs in background thread, might clear after we connect)
265
+ self.dom.clear_selections()
266
+
267
+ self.cdp.connect(page_index=page_index, page_id=page_id)
268
+
269
+ failures = self.enable_domains(REQUIRED_DOMAINS)
270
+
271
+ if failures:
272
+ self.cdp.disconnect()
273
+ raise RuntimeError(f"Failed to enable domains: {failures}")
274
+
275
+ self.filters.load()
276
+
277
+ page_info = self.cdp.page_info or {}
278
+ self._trigger_broadcast()
279
+ return {"title": page_info.get("title", "Untitled"), "url": page_info.get("url", "")}
280
+
281
+ def disconnect(self) -> None:
282
+ """Disconnect from Chrome and clean up all state.
283
+
284
+ Pure domain logic - performs full cleanup.
285
+ State machine transitions are handled by RPC handlers.
286
+ """
287
+ if self.fetch.enabled:
288
+ self.fetch.disable()
289
+
290
+ self.dom.clear_selections()
291
+ self.dom.cleanup() # Shutdown executor properly
292
+
293
+ # Clear error state on disconnect
294
+ if self.state.error_state:
295
+ self.state.error_state = None
296
+
297
+ self.cdp.disconnect()
298
+ self.enabled_domains.clear()
299
+
300
+ self._trigger_broadcast()
301
+
302
+ def enable_domains(self, domains: list[str]) -> dict[str, str]:
303
+ """Enable CDP domains.
304
+
305
+ Args:
306
+ domains: List of domain names to enable
307
+ """
308
+ failures = {}
309
+ for domain in domains:
310
+ try:
311
+ self.cdp.execute(f"{domain}.enable")
312
+ self.enabled_domains.add(domain)
313
+ except Exception as e:
314
+ failures[domain] = str(e)
315
+ return failures
316
+
317
+ def clear_events(self) -> dict[str, Any]:
318
+ """Clear all stored CDP events."""
319
+ self.cdp.clear_events()
320
+ self._trigger_broadcast()
321
+ return {"cleared": True, "events": 0}
322
+
323
+ def list_pages(self) -> dict[str, Any]:
324
+ """List available Chrome pages."""
325
+ try:
326
+ pages = self.cdp.list_pages()
327
+ connected_id = self.cdp.page_info.get("id") if self.cdp.page_info else None
328
+ for page in pages:
329
+ page["is_connected"] = page.get("id") == connected_id
330
+ return {"pages": pages}
331
+ except Exception as e:
332
+ return {"error": str(e), "pages": []}
333
+
334
+ def _handle_unexpected_disconnect(self, code: int, reason: str) -> None:
335
+ """Handle unexpected WebSocket disconnect (tab closed, crashed, etc).
336
+
337
+ Called from background thread by CDPSession._on_close.
338
+ Performs service-level cleanup and notifies SSE clients.
339
+ Events are preserved for debugging.
340
+
341
+ Args:
342
+ code: WebSocket close code (e.g., 1006 = abnormal closure)
343
+ reason: Human-readable close reason
344
+ """
345
+ import logging
346
+ import time
347
+
348
+ logger = logging.getLogger(__name__)
349
+
350
+ # Map WebSocket close codes to user-friendly messages
351
+ reason_map = {
352
+ 1000: "Page closed normally",
353
+ 1001: "Browser tab closed",
354
+ 1006: "Connection lost (tab crashed or browser closed)",
355
+ 1011: "Chrome internal error",
356
+ }
357
+
358
+ # Handle None code (abnormal closure with no code)
359
+ if code is None:
360
+ user_reason = "Connection lost (page closed or crashed)"
361
+ else:
362
+ user_reason = reason_map.get(code, f"Connection closed unexpectedly (code {code})")
363
+
364
+ logger.warning(f"Unexpected disconnect: {user_reason}")
365
+
366
+ try:
367
+ # Thread-safe state cleanup (called from background thread)
368
+ with self._state_lock:
369
+ # Clean up service state (no CDP calls - connection already gone)
370
+ if self.fetch.enabled:
371
+ self.fetch.enabled = False # Direct state update, no CDP disable
372
+
373
+ self.dom.clear_selections()
374
+
375
+ # Events preserved for debugging - use Clear button to remove explicitly
376
+ # DB thread and field_paths persist for reconnection
377
+
378
+ # Set error state with disconnect info
379
+ self.state.error_state = {"message": user_reason, "timestamp": time.time()}
380
+
381
+ self.enabled_domains.clear()
382
+
383
+ # Cleanup outside lock (safe to call multiple times, has internal protection)
384
+ self.dom.cleanup() # Shutdown executor
385
+
386
+ # Notify SSE clients
387
+ self._trigger_broadcast()
388
+
389
+ logger.info("Unexpected disconnect cleanup completed")
390
+
391
+ except Exception as e:
392
+ logger.error(f"Error during unexpected disconnect cleanup: {e}")