webtap-tool 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- webtap/VISION.md +246 -0
- webtap/__init__.py +84 -0
- webtap/__main__.py +6 -0
- webtap/api/__init__.py +9 -0
- webtap/api/app.py +26 -0
- webtap/api/models.py +69 -0
- webtap/api/server.py +111 -0
- webtap/api/sse.py +182 -0
- webtap/api/state.py +89 -0
- webtap/app.py +79 -0
- webtap/cdp/README.md +275 -0
- webtap/cdp/__init__.py +12 -0
- webtap/cdp/har.py +302 -0
- webtap/cdp/schema/README.md +41 -0
- webtap/cdp/schema/cdp_protocol.json +32785 -0
- webtap/cdp/schema/cdp_version.json +8 -0
- webtap/cdp/session.py +667 -0
- webtap/client.py +81 -0
- webtap/commands/DEVELOPER_GUIDE.md +401 -0
- webtap/commands/TIPS.md +269 -0
- webtap/commands/__init__.py +29 -0
- webtap/commands/_builders.py +331 -0
- webtap/commands/_code_generation.py +110 -0
- webtap/commands/_tips.py +147 -0
- webtap/commands/_utils.py +273 -0
- webtap/commands/connection.py +220 -0
- webtap/commands/console.py +87 -0
- webtap/commands/fetch.py +310 -0
- webtap/commands/filters.py +116 -0
- webtap/commands/javascript.py +73 -0
- webtap/commands/js_export.py +73 -0
- webtap/commands/launch.py +72 -0
- webtap/commands/navigation.py +197 -0
- webtap/commands/network.py +136 -0
- webtap/commands/quicktype.py +306 -0
- webtap/commands/request.py +93 -0
- webtap/commands/selections.py +138 -0
- webtap/commands/setup.py +219 -0
- webtap/commands/to_model.py +163 -0
- webtap/daemon.py +185 -0
- webtap/daemon_state.py +53 -0
- webtap/filters.py +219 -0
- webtap/rpc/__init__.py +14 -0
- webtap/rpc/errors.py +49 -0
- webtap/rpc/framework.py +223 -0
- webtap/rpc/handlers.py +625 -0
- webtap/rpc/machine.py +84 -0
- webtap/services/README.md +83 -0
- webtap/services/__init__.py +15 -0
- webtap/services/console.py +124 -0
- webtap/services/dom.py +547 -0
- webtap/services/fetch.py +415 -0
- webtap/services/main.py +392 -0
- webtap/services/network.py +401 -0
- webtap/services/setup/__init__.py +185 -0
- webtap/services/setup/chrome.py +233 -0
- webtap/services/setup/desktop.py +255 -0
- webtap/services/setup/extension.py +147 -0
- webtap/services/setup/platform.py +162 -0
- webtap/services/state_snapshot.py +86 -0
- webtap_tool-0.11.0.dist-info/METADATA +535 -0
- webtap_tool-0.11.0.dist-info/RECORD +64 -0
- webtap_tool-0.11.0.dist-info/WHEEL +4 -0
- webtap_tool-0.11.0.dist-info/entry_points.txt +2 -0
webtap/services/main.py
ADDED
|
@@ -0,0 +1,392 @@
|
|
|
1
|
+
"""Main service orchestrator for WebTap business logic."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from webtap.filters import FilterManager
|
|
6
|
+
from webtap.services.fetch import FetchService
|
|
7
|
+
from webtap.services.network import NetworkService
|
|
8
|
+
from webtap.services.console import ConsoleService
|
|
9
|
+
from webtap.services.dom import DOMService
|
|
10
|
+
from webtap.services.state_snapshot import StateSnapshot
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
REQUIRED_DOMAINS = [
|
|
14
|
+
"Page",
|
|
15
|
+
"Network",
|
|
16
|
+
"Runtime",
|
|
17
|
+
"Log",
|
|
18
|
+
"DOMStorage",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class WebTapService:
|
|
23
|
+
"""Main service orchestrating all WebTap domain services.
|
|
24
|
+
|
|
25
|
+
Coordinates CDP session management, domain services, and filter management.
|
|
26
|
+
Shared between REPL commands and API endpoints for consistent state.
|
|
27
|
+
|
|
28
|
+
Attributes:
|
|
29
|
+
state: WebTap application state instance.
|
|
30
|
+
cdp: CDP session for browser communication.
|
|
31
|
+
enabled_domains: Set of currently enabled CDP domains.
|
|
32
|
+
filters: Filter manager for event filtering.
|
|
33
|
+
fetch: Fetch interception service.
|
|
34
|
+
network: Network monitoring service.
|
|
35
|
+
console: Console message service.
|
|
36
|
+
dom: DOM inspection and element selection service.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(self, state):
|
|
40
|
+
"""Initialize with WebTapState instance.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
state: WebTapState instance from app.py
|
|
44
|
+
"""
|
|
45
|
+
import threading
|
|
46
|
+
|
|
47
|
+
self.state = state
|
|
48
|
+
self.cdp = state.cdp
|
|
49
|
+
self._state_lock = threading.RLock() # Reentrant lock - safe to acquire multiple times by same thread
|
|
50
|
+
|
|
51
|
+
self.enabled_domains: set[str] = set()
|
|
52
|
+
self.filters = FilterManager()
|
|
53
|
+
|
|
54
|
+
# RPC framework (set by server.py after initialization)
|
|
55
|
+
self.rpc: "Any | None" = None
|
|
56
|
+
|
|
57
|
+
self.fetch = FetchService()
|
|
58
|
+
self.network = NetworkService()
|
|
59
|
+
self.console = ConsoleService()
|
|
60
|
+
self.dom = DOMService()
|
|
61
|
+
|
|
62
|
+
self.fetch.cdp = self.cdp
|
|
63
|
+
self.network.cdp = self.cdp
|
|
64
|
+
self.network.filters = self.filters
|
|
65
|
+
self.console.cdp = self.cdp
|
|
66
|
+
self.dom.set_cdp(self.cdp)
|
|
67
|
+
self.dom.set_state(self.state)
|
|
68
|
+
self.dom.set_broadcast_callback(self._trigger_broadcast) # DOM calls back for snapshot updates
|
|
69
|
+
|
|
70
|
+
self.fetch.set_broadcast_callback(self._trigger_broadcast) # Fetch calls back for snapshot updates
|
|
71
|
+
|
|
72
|
+
# Legacy wiring for CDP event handler
|
|
73
|
+
self.cdp.fetch_service = self.fetch
|
|
74
|
+
|
|
75
|
+
# Register DOM event callbacks
|
|
76
|
+
self.cdp.register_event_callback("Overlay.inspectNodeRequested", self.dom.handle_inspect_node_requested)
|
|
77
|
+
self.cdp.register_event_callback("Page.frameNavigated", self.dom.handle_frame_navigated)
|
|
78
|
+
|
|
79
|
+
# Register disconnect callback for unexpected disconnects
|
|
80
|
+
self.cdp.set_disconnect_callback(self._handle_unexpected_disconnect)
|
|
81
|
+
|
|
82
|
+
# CDPSession calls back here when CDP events arrive
|
|
83
|
+
self.cdp.set_broadcast_callback(self._trigger_broadcast)
|
|
84
|
+
|
|
85
|
+
# Broadcast queue for SSE state updates (set by API server)
|
|
86
|
+
self._broadcast_queue: "Any | None" = None
|
|
87
|
+
|
|
88
|
+
# Coalescing flag - prevents duplicate broadcasts during rapid CDP events
|
|
89
|
+
# Service owns coalescing (single source of truth)
|
|
90
|
+
self._broadcast_pending = threading.Event()
|
|
91
|
+
|
|
92
|
+
# Immutable state snapshot for thread-safe SSE reads
|
|
93
|
+
# Updated atomically on every state change, read without locks
|
|
94
|
+
self._state_snapshot: StateSnapshot = StateSnapshot.create_empty()
|
|
95
|
+
|
|
96
|
+
def set_broadcast_queue(self, queue: "Any") -> None:
|
|
97
|
+
"""Set queue for broadcasting state changes.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
queue: asyncio.Queue for thread-safe signaling
|
|
101
|
+
"""
|
|
102
|
+
self._broadcast_queue = queue
|
|
103
|
+
|
|
104
|
+
def _create_snapshot(self) -> StateSnapshot:
|
|
105
|
+
"""Create immutable state snapshot from current state.
|
|
106
|
+
|
|
107
|
+
MUST be called with self._state_lock held to ensure atomic read.
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
Frozen StateSnapshot with current state
|
|
111
|
+
"""
|
|
112
|
+
# Connection state (read page_info first to avoid race with disconnect)
|
|
113
|
+
page_info = self.cdp.page_info
|
|
114
|
+
connected = self.cdp.is_connected and page_info is not None
|
|
115
|
+
page_id = page_info.get("id", "") if page_info else ""
|
|
116
|
+
page_title = page_info.get("title", "") if page_info else ""
|
|
117
|
+
page_url = page_info.get("url", "") if page_info else ""
|
|
118
|
+
|
|
119
|
+
# Event count
|
|
120
|
+
event_count = self.event_count
|
|
121
|
+
|
|
122
|
+
# Fetch state
|
|
123
|
+
fetch_enabled = self.fetch.enabled
|
|
124
|
+
response_stage = self.fetch.enable_response_stage
|
|
125
|
+
paused_count = self.fetch.paused_count if fetch_enabled else 0
|
|
126
|
+
|
|
127
|
+
# Filter state (convert to immutable tuples)
|
|
128
|
+
fm = self.filters
|
|
129
|
+
filter_groups = list(fm.groups.keys())
|
|
130
|
+
enabled_filters = tuple(fm.enabled)
|
|
131
|
+
disabled_filters = tuple(name for name in filter_groups if name not in enabled_filters)
|
|
132
|
+
|
|
133
|
+
# Browser/DOM state (get_state() is already thread-safe internally)
|
|
134
|
+
browser_state = self.dom.get_state()
|
|
135
|
+
|
|
136
|
+
# Error state
|
|
137
|
+
error = self.state.error_state
|
|
138
|
+
error_message = error.get("message") if error else None
|
|
139
|
+
error_timestamp = error.get("timestamp") if error else None
|
|
140
|
+
|
|
141
|
+
# Deep copy selections to ensure true immutability
|
|
142
|
+
import copy
|
|
143
|
+
|
|
144
|
+
selections = copy.deepcopy(browser_state["selections"])
|
|
145
|
+
|
|
146
|
+
return StateSnapshot(
|
|
147
|
+
connected=connected,
|
|
148
|
+
page_id=page_id,
|
|
149
|
+
page_title=page_title,
|
|
150
|
+
page_url=page_url,
|
|
151
|
+
event_count=event_count,
|
|
152
|
+
fetch_enabled=fetch_enabled,
|
|
153
|
+
response_stage=response_stage,
|
|
154
|
+
paused_count=paused_count,
|
|
155
|
+
enabled_filters=enabled_filters,
|
|
156
|
+
disabled_filters=disabled_filters,
|
|
157
|
+
inspect_active=browser_state["inspect_active"],
|
|
158
|
+
selections=selections, # Deep copy ensures nested dicts are immutable
|
|
159
|
+
prompt=browser_state["prompt"],
|
|
160
|
+
pending_count=browser_state["pending_count"],
|
|
161
|
+
error_message=error_message,
|
|
162
|
+
error_timestamp=error_timestamp,
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
def _trigger_broadcast(self) -> None:
|
|
166
|
+
"""Trigger SSE broadcast with coalescing (thread-safe).
|
|
167
|
+
|
|
168
|
+
Called from:
|
|
169
|
+
- CDPSession (CDP events)
|
|
170
|
+
- DOMService (selections)
|
|
171
|
+
- FetchService (interception state)
|
|
172
|
+
- Service methods (connect, disconnect, clear)
|
|
173
|
+
|
|
174
|
+
Coalescing: Only queues signal if none pending. Prevents 1000s of
|
|
175
|
+
signals during rapid CDP events. Flag cleared by API after broadcast.
|
|
176
|
+
|
|
177
|
+
Uses atomic check-and-set to prevent race where multiple threads
|
|
178
|
+
queue multiple signals before any sets the flag.
|
|
179
|
+
"""
|
|
180
|
+
import logging
|
|
181
|
+
|
|
182
|
+
logger = logging.getLogger(__name__)
|
|
183
|
+
|
|
184
|
+
# Early exit if no queue (API not started yet)
|
|
185
|
+
if not self._broadcast_queue:
|
|
186
|
+
return
|
|
187
|
+
|
|
188
|
+
# Always update snapshot, but coalesce broadcast signals
|
|
189
|
+
with self._state_lock:
|
|
190
|
+
# Update snapshot while holding lock (always, for API responses)
|
|
191
|
+
try:
|
|
192
|
+
self._state_snapshot = self._create_snapshot()
|
|
193
|
+
except (TypeError, AttributeError) as e:
|
|
194
|
+
logger.error(f"Programming error in snapshot creation: {e}")
|
|
195
|
+
raise
|
|
196
|
+
except Exception as e:
|
|
197
|
+
logger.error(f"Failed to create state snapshot: {e}", exc_info=True)
|
|
198
|
+
return
|
|
199
|
+
|
|
200
|
+
# Skip queue signal if broadcast already pending (coalescing)
|
|
201
|
+
if self._broadcast_pending.is_set():
|
|
202
|
+
return
|
|
203
|
+
self._broadcast_pending.set()
|
|
204
|
+
|
|
205
|
+
# Signal broadcast (outside lock - queue.put_nowait is thread-safe)
|
|
206
|
+
try:
|
|
207
|
+
self._broadcast_queue.put_nowait({"type": "state_change"})
|
|
208
|
+
except Exception as e:
|
|
209
|
+
# Clear flag if queue failed so next trigger can try
|
|
210
|
+
self._broadcast_pending.clear()
|
|
211
|
+
logger.warning(f"Failed to queue broadcast: {e}")
|
|
212
|
+
|
|
213
|
+
def get_state_snapshot(self) -> StateSnapshot:
|
|
214
|
+
"""Get current immutable state snapshot (thread-safe, no locks).
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
Current StateSnapshot - immutable, safe to read from any thread
|
|
218
|
+
"""
|
|
219
|
+
return self._state_snapshot
|
|
220
|
+
|
|
221
|
+
def clear_broadcast_pending(self) -> None:
|
|
222
|
+
"""Clear broadcast pending flag (called by API after broadcast).
|
|
223
|
+
|
|
224
|
+
Allows next state change to trigger a new broadcast.
|
|
225
|
+
Thread-safe - Event.clear() is atomic.
|
|
226
|
+
"""
|
|
227
|
+
self._broadcast_pending.clear()
|
|
228
|
+
|
|
229
|
+
@property
|
|
230
|
+
def event_count(self) -> int:
|
|
231
|
+
"""Total count of all CDP events stored."""
|
|
232
|
+
if not self.cdp or not self.cdp.is_connected:
|
|
233
|
+
return 0
|
|
234
|
+
try:
|
|
235
|
+
result = self.cdp.query("SELECT COUNT(*) FROM events")
|
|
236
|
+
return result[0][0] if result else 0
|
|
237
|
+
except Exception:
|
|
238
|
+
return 0
|
|
239
|
+
|
|
240
|
+
def connect_to_page(self, page_index: int | None = None, page_id: str | None = None) -> dict[str, Any]:
|
|
241
|
+
"""Connect to Chrome page and enable required domains.
|
|
242
|
+
|
|
243
|
+
Pure domain logic - raises exceptions on failure.
|
|
244
|
+
State machine transitions are handled by RPC handlers.
|
|
245
|
+
|
|
246
|
+
Args:
|
|
247
|
+
page_index: Index of page to connect to (for REPL)
|
|
248
|
+
page_id: ID of page to connect to (for extension)
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
Connection info dict with 'title' and 'url'
|
|
252
|
+
|
|
253
|
+
Raises:
|
|
254
|
+
Exception: On connection or domain enable failure
|
|
255
|
+
"""
|
|
256
|
+
# If already connected, disconnect first (enables seamless page switching)
|
|
257
|
+
if self.cdp.is_connected:
|
|
258
|
+
self.disconnect()
|
|
259
|
+
|
|
260
|
+
# Reset DOM service for new connection (executor may have been shutdown by previous disconnect)
|
|
261
|
+
self.dom.reset()
|
|
262
|
+
|
|
263
|
+
# Clear selections BEFORE connect to handle race with pending disconnect cleanup
|
|
264
|
+
# (disconnect handler runs in background thread, might clear after we connect)
|
|
265
|
+
self.dom.clear_selections()
|
|
266
|
+
|
|
267
|
+
self.cdp.connect(page_index=page_index, page_id=page_id)
|
|
268
|
+
|
|
269
|
+
failures = self.enable_domains(REQUIRED_DOMAINS)
|
|
270
|
+
|
|
271
|
+
if failures:
|
|
272
|
+
self.cdp.disconnect()
|
|
273
|
+
raise RuntimeError(f"Failed to enable domains: {failures}")
|
|
274
|
+
|
|
275
|
+
self.filters.load()
|
|
276
|
+
|
|
277
|
+
page_info = self.cdp.page_info or {}
|
|
278
|
+
self._trigger_broadcast()
|
|
279
|
+
return {"title": page_info.get("title", "Untitled"), "url": page_info.get("url", "")}
|
|
280
|
+
|
|
281
|
+
def disconnect(self) -> None:
|
|
282
|
+
"""Disconnect from Chrome and clean up all state.
|
|
283
|
+
|
|
284
|
+
Pure domain logic - performs full cleanup.
|
|
285
|
+
State machine transitions are handled by RPC handlers.
|
|
286
|
+
"""
|
|
287
|
+
if self.fetch.enabled:
|
|
288
|
+
self.fetch.disable()
|
|
289
|
+
|
|
290
|
+
self.dom.clear_selections()
|
|
291
|
+
self.dom.cleanup() # Shutdown executor properly
|
|
292
|
+
|
|
293
|
+
# Clear error state on disconnect
|
|
294
|
+
if self.state.error_state:
|
|
295
|
+
self.state.error_state = None
|
|
296
|
+
|
|
297
|
+
self.cdp.disconnect()
|
|
298
|
+
self.enabled_domains.clear()
|
|
299
|
+
|
|
300
|
+
self._trigger_broadcast()
|
|
301
|
+
|
|
302
|
+
def enable_domains(self, domains: list[str]) -> dict[str, str]:
|
|
303
|
+
"""Enable CDP domains.
|
|
304
|
+
|
|
305
|
+
Args:
|
|
306
|
+
domains: List of domain names to enable
|
|
307
|
+
"""
|
|
308
|
+
failures = {}
|
|
309
|
+
for domain in domains:
|
|
310
|
+
try:
|
|
311
|
+
self.cdp.execute(f"{domain}.enable")
|
|
312
|
+
self.enabled_domains.add(domain)
|
|
313
|
+
except Exception as e:
|
|
314
|
+
failures[domain] = str(e)
|
|
315
|
+
return failures
|
|
316
|
+
|
|
317
|
+
def clear_events(self) -> dict[str, Any]:
|
|
318
|
+
"""Clear all stored CDP events."""
|
|
319
|
+
self.cdp.clear_events()
|
|
320
|
+
self._trigger_broadcast()
|
|
321
|
+
return {"cleared": True, "events": 0}
|
|
322
|
+
|
|
323
|
+
def list_pages(self) -> dict[str, Any]:
|
|
324
|
+
"""List available Chrome pages."""
|
|
325
|
+
try:
|
|
326
|
+
pages = self.cdp.list_pages()
|
|
327
|
+
connected_id = self.cdp.page_info.get("id") if self.cdp.page_info else None
|
|
328
|
+
for page in pages:
|
|
329
|
+
page["is_connected"] = page.get("id") == connected_id
|
|
330
|
+
return {"pages": pages}
|
|
331
|
+
except Exception as e:
|
|
332
|
+
return {"error": str(e), "pages": []}
|
|
333
|
+
|
|
334
|
+
def _handle_unexpected_disconnect(self, code: int, reason: str) -> None:
|
|
335
|
+
"""Handle unexpected WebSocket disconnect (tab closed, crashed, etc).
|
|
336
|
+
|
|
337
|
+
Called from background thread by CDPSession._on_close.
|
|
338
|
+
Performs service-level cleanup and notifies SSE clients.
|
|
339
|
+
Events are preserved for debugging.
|
|
340
|
+
|
|
341
|
+
Args:
|
|
342
|
+
code: WebSocket close code (e.g., 1006 = abnormal closure)
|
|
343
|
+
reason: Human-readable close reason
|
|
344
|
+
"""
|
|
345
|
+
import logging
|
|
346
|
+
import time
|
|
347
|
+
|
|
348
|
+
logger = logging.getLogger(__name__)
|
|
349
|
+
|
|
350
|
+
# Map WebSocket close codes to user-friendly messages
|
|
351
|
+
reason_map = {
|
|
352
|
+
1000: "Page closed normally",
|
|
353
|
+
1001: "Browser tab closed",
|
|
354
|
+
1006: "Connection lost (tab crashed or browser closed)",
|
|
355
|
+
1011: "Chrome internal error",
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
# Handle None code (abnormal closure with no code)
|
|
359
|
+
if code is None:
|
|
360
|
+
user_reason = "Connection lost (page closed or crashed)"
|
|
361
|
+
else:
|
|
362
|
+
user_reason = reason_map.get(code, f"Connection closed unexpectedly (code {code})")
|
|
363
|
+
|
|
364
|
+
logger.warning(f"Unexpected disconnect: {user_reason}")
|
|
365
|
+
|
|
366
|
+
try:
|
|
367
|
+
# Thread-safe state cleanup (called from background thread)
|
|
368
|
+
with self._state_lock:
|
|
369
|
+
# Clean up service state (no CDP calls - connection already gone)
|
|
370
|
+
if self.fetch.enabled:
|
|
371
|
+
self.fetch.enabled = False # Direct state update, no CDP disable
|
|
372
|
+
|
|
373
|
+
self.dom.clear_selections()
|
|
374
|
+
|
|
375
|
+
# Events preserved for debugging - use Clear button to remove explicitly
|
|
376
|
+
# DB thread and field_paths persist for reconnection
|
|
377
|
+
|
|
378
|
+
# Set error state with disconnect info
|
|
379
|
+
self.state.error_state = {"message": user_reason, "timestamp": time.time()}
|
|
380
|
+
|
|
381
|
+
self.enabled_domains.clear()
|
|
382
|
+
|
|
383
|
+
# Cleanup outside lock (safe to call multiple times, has internal protection)
|
|
384
|
+
self.dom.cleanup() # Shutdown executor
|
|
385
|
+
|
|
386
|
+
# Notify SSE clients
|
|
387
|
+
self._trigger_broadcast()
|
|
388
|
+
|
|
389
|
+
logger.info("Unexpected disconnect cleanup completed")
|
|
390
|
+
|
|
391
|
+
except Exception as e:
|
|
392
|
+
logger.error(f"Error during unexpected disconnect cleanup: {e}")
|