webtap-tool 0.7.1__py3-none-any.whl → 0.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webtap-tool might be problematic. Click here for more details.
- webtap/__init__.py +4 -0
- webtap/api.py +50 -57
- webtap/app.py +5 -0
- webtap/cdp/session.py +77 -25
- webtap/commands/TIPS.md +125 -22
- webtap/commands/_builders.py +7 -1
- webtap/commands/_code_generation.py +110 -0
- webtap/commands/body.py +9 -5
- webtap/commands/connection.py +21 -0
- webtap/commands/javascript.py +13 -25
- webtap/commands/navigation.py +5 -0
- webtap/commands/quicktype.py +268 -0
- webtap/commands/to_model.py +23 -75
- webtap/services/body.py +209 -24
- webtap/services/dom.py +19 -12
- webtap/services/fetch.py +19 -0
- webtap/services/main.py +192 -0
- webtap/services/setup/extension.py +1 -1
- webtap/services/state_snapshot.py +88 -0
- {webtap_tool-0.7.1.dist-info → webtap_tool-0.8.1.dist-info}/METADATA +1 -1
- {webtap_tool-0.7.1.dist-info → webtap_tool-0.8.1.dist-info}/RECORD +23 -20
- {webtap_tool-0.7.1.dist-info → webtap_tool-0.8.1.dist-info}/WHEEL +0 -0
- {webtap_tool-0.7.1.dist-info → webtap_tool-0.8.1.dist-info}/entry_points.txt +0 -0
webtap/services/body.py
CHANGED
|
@@ -19,17 +19,25 @@ class BodyService:
|
|
|
19
19
|
self.cdp: CDPSession | None = None
|
|
20
20
|
self._body_cache: dict[str, dict] = {}
|
|
21
21
|
|
|
22
|
-
def
|
|
23
|
-
"""Fetch response body for
|
|
22
|
+
def get_body(self, rowid: int, use_cache: bool = True) -> dict:
|
|
23
|
+
"""Fetch request or response body for an event.
|
|
24
|
+
|
|
25
|
+
Automatically detects event type and fetches appropriate body:
|
|
26
|
+
- Network.requestWillBeSent: request body (POST data)
|
|
27
|
+
- Network.responseReceived: response body
|
|
28
|
+
- Fetch.requestPaused: request or response body based on stage
|
|
24
29
|
|
|
25
30
|
Args:
|
|
26
|
-
rowid: Row ID from events table
|
|
31
|
+
rowid: Row ID from events table
|
|
27
32
|
use_cache: Whether to use cached body if available
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
Dict with 'body' (str), 'base64Encoded' (bool), and 'event' (dict), or 'error' (str)
|
|
28
36
|
"""
|
|
29
37
|
if not self.cdp:
|
|
30
38
|
return {"error": "No CDP session"}
|
|
31
39
|
|
|
32
|
-
# Get event from DB to extract requestId
|
|
40
|
+
# Get event from DB to extract requestId and method
|
|
33
41
|
result = self.cdp.query("SELECT event FROM events WHERE rowid = ?", [rowid])
|
|
34
42
|
|
|
35
43
|
if not result:
|
|
@@ -42,45 +50,89 @@ class BodyService:
|
|
|
42
50
|
|
|
43
51
|
method = event_data.get("method", "")
|
|
44
52
|
params = event_data.get("params", {})
|
|
53
|
+
request_id = params.get("requestId")
|
|
54
|
+
|
|
55
|
+
if not request_id:
|
|
56
|
+
return {"error": "No requestId in event"}
|
|
57
|
+
|
|
58
|
+
# Check cache first (cache includes event_data)
|
|
59
|
+
cache_key = f"{request_id}:{method}"
|
|
60
|
+
if use_cache and cache_key in self._body_cache:
|
|
61
|
+
logger.debug(f"Using cached body for {cache_key}")
|
|
62
|
+
return self._body_cache[cache_key]
|
|
63
|
+
|
|
64
|
+
# Handle request body (POST data)
|
|
65
|
+
if method == "Network.requestWillBeSent":
|
|
66
|
+
request = params.get("request", {})
|
|
67
|
+
|
|
68
|
+
# Check inline postData first (may be present for small bodies)
|
|
69
|
+
if request.get("postData"):
|
|
70
|
+
logger.debug(f"Using inline postData for {request_id}")
|
|
71
|
+
body_data = {"body": request["postData"], "base64Encoded": False, "event": event_data}
|
|
72
|
+
if use_cache:
|
|
73
|
+
self._body_cache[cache_key] = body_data
|
|
74
|
+
return body_data
|
|
75
|
+
|
|
76
|
+
# Check if request has POST data
|
|
77
|
+
if not request.get("hasPostData"):
|
|
78
|
+
return {"error": "No POST data in this request (GET or no body)"}
|
|
79
|
+
|
|
80
|
+
# Try to fetch POST data via CDP
|
|
81
|
+
try:
|
|
82
|
+
logger.debug(f"Fetching POST data for {request_id} using Network.getRequestPostData")
|
|
83
|
+
result = self.cdp.execute("Network.getRequestPostData", {"requestId": request_id})
|
|
84
|
+
body_data = {"body": result.get("postData", ""), "base64Encoded": False, "event": event_data}
|
|
85
|
+
|
|
86
|
+
if use_cache:
|
|
87
|
+
self._body_cache[cache_key] = body_data
|
|
88
|
+
logger.debug(f"Cached POST data for {request_id}")
|
|
89
|
+
|
|
90
|
+
return body_data
|
|
45
91
|
|
|
46
|
-
|
|
47
|
-
|
|
92
|
+
except Exception as e:
|
|
93
|
+
error_msg = str(e)
|
|
94
|
+
logger.error(f"Failed to fetch POST data for {request_id}: {error_msg}")
|
|
95
|
+
# Provide helpful error message
|
|
96
|
+
if "No resource with given identifier found" in error_msg:
|
|
97
|
+
return {"error": "POST data not available (possibly too large or not captured by CDP)"}
|
|
98
|
+
return {"error": f"Failed to fetch POST data: {error_msg}"}
|
|
99
|
+
|
|
100
|
+
# Handle response body
|
|
101
|
+
elif method == "Fetch.requestPaused":
|
|
48
102
|
# Fetch interception - verify it's response stage
|
|
49
103
|
if "responseStatusCode" not in params:
|
|
50
104
|
return {"error": "Not a response stage event (no responseStatusCode)"}
|
|
51
|
-
request_id = params.get("requestId")
|
|
52
105
|
domain = "Fetch"
|
|
106
|
+
|
|
53
107
|
elif method == "Network.responseReceived":
|
|
54
108
|
# Regular network response
|
|
55
|
-
request_id = params.get("requestId")
|
|
56
109
|
domain = "Network"
|
|
57
|
-
else:
|
|
58
|
-
return {"error": f"Not a response event (method: {method})"}
|
|
59
110
|
|
|
60
|
-
|
|
61
|
-
return
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
if use_cache and request_id in self._body_cache:
|
|
65
|
-
logger.debug(f"Using cached body for {request_id}")
|
|
66
|
-
return self._body_cache[request_id]
|
|
111
|
+
else:
|
|
112
|
+
# Unknown event type - return empty body but include event for expr access
|
|
113
|
+
logger.debug(f"Unknown event type {method} - returning empty body with event data")
|
|
114
|
+
return {"body": "", "base64Encoded": False, "event": event_data}
|
|
67
115
|
|
|
116
|
+
# Fetch response body from CDP
|
|
68
117
|
try:
|
|
69
|
-
|
|
70
|
-
logger.debug(f"Fetching body for {request_id} using {domain}.getResponseBody")
|
|
118
|
+
logger.debug(f"Fetching response body for {request_id} using {domain}.getResponseBody")
|
|
71
119
|
result = self.cdp.execute(f"{domain}.getResponseBody", {"requestId": request_id})
|
|
72
120
|
|
|
73
|
-
body_data = {
|
|
121
|
+
body_data = {
|
|
122
|
+
"body": result.get("body", ""),
|
|
123
|
+
"base64Encoded": result.get("base64Encoded", False),
|
|
124
|
+
"event": event_data,
|
|
125
|
+
}
|
|
74
126
|
|
|
75
|
-
# Cache it
|
|
127
|
+
# Cache it
|
|
76
128
|
if use_cache:
|
|
77
|
-
self._body_cache[
|
|
78
|
-
logger.debug(f"Cached body for {request_id}")
|
|
129
|
+
self._body_cache[cache_key] = body_data
|
|
130
|
+
logger.debug(f"Cached response body for {request_id}")
|
|
79
131
|
|
|
80
132
|
return body_data
|
|
81
133
|
|
|
82
134
|
except Exception as e:
|
|
83
|
-
logger.error(f"Failed to fetch body for {request_id}: {e}")
|
|
135
|
+
logger.error(f"Failed to fetch response body for {request_id}: {e}")
|
|
84
136
|
return {"error": str(e)}
|
|
85
137
|
|
|
86
138
|
def clear_cache(self):
|
|
@@ -111,3 +163,136 @@ class BodyService:
|
|
|
111
163
|
except Exception as e:
|
|
112
164
|
logger.error(f"Failed to decode base64 body: {e}")
|
|
113
165
|
return body_content # Return original if decode fails
|
|
166
|
+
|
|
167
|
+
def prepare_for_generation(
|
|
168
|
+
self,
|
|
169
|
+
event: int,
|
|
170
|
+
json_path: str = None, # pyright: ignore[reportArgumentType]
|
|
171
|
+
expr: str = None, # pyright: ignore[reportArgumentType]
|
|
172
|
+
) -> dict:
|
|
173
|
+
"""Prepare HTTP body for code generation.
|
|
174
|
+
|
|
175
|
+
Orchestrates the complete pipeline:
|
|
176
|
+
1. Fetch body + event from CDP
|
|
177
|
+
2. Decode base64 if needed
|
|
178
|
+
3. Transform via expression OR validate and parse JSON
|
|
179
|
+
4. Extract nested data via json_path
|
|
180
|
+
5. Validate data structure (dict/list)
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
event: Event row ID from network() or events()
|
|
184
|
+
json_path: Optional JSON path for nested extraction (e.g., "data[0]")
|
|
185
|
+
expr: Optional Python expression with 'body' and 'event' variables
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
Dict with 'data' (dict|list) on success, or 'error' (str) on failure.
|
|
189
|
+
May include 'suggestions' (list[str]) for error guidance.
|
|
190
|
+
|
|
191
|
+
Examples:
|
|
192
|
+
result = body_service.prepare_for_generation(123, json_path="data[0]")
|
|
193
|
+
if result.get("error"):
|
|
194
|
+
return error_response(result["error"], suggestions=result.get("suggestions"))
|
|
195
|
+
data = result["data"]
|
|
196
|
+
"""
|
|
197
|
+
# 1. Fetch body + event from CDP
|
|
198
|
+
result = self.get_body(event, use_cache=True)
|
|
199
|
+
if "error" in result:
|
|
200
|
+
return {"error": result["error"], "suggestions": [], "data": None}
|
|
201
|
+
|
|
202
|
+
body_content = result["body"]
|
|
203
|
+
is_base64 = result["base64Encoded"]
|
|
204
|
+
event_data = result["event"]
|
|
205
|
+
|
|
206
|
+
# 2. Decode if base64
|
|
207
|
+
if is_base64:
|
|
208
|
+
decoded = self.decode_body(body_content, is_base64)
|
|
209
|
+
if isinstance(decoded, bytes):
|
|
210
|
+
return {
|
|
211
|
+
"error": "Body is binary content",
|
|
212
|
+
"suggestions": [
|
|
213
|
+
"Only text/JSON can be converted to code",
|
|
214
|
+
"Try a different event with text content",
|
|
215
|
+
],
|
|
216
|
+
"data": None,
|
|
217
|
+
}
|
|
218
|
+
body_content = decoded
|
|
219
|
+
|
|
220
|
+
# 3. Transform via expression OR validate and parse JSON
|
|
221
|
+
if expr:
|
|
222
|
+
# Use expression evaluation from _utils
|
|
223
|
+
from webtap.commands._utils import evaluate_expression
|
|
224
|
+
|
|
225
|
+
try:
|
|
226
|
+
namespace = {"body": body_content, "event": event_data}
|
|
227
|
+
data, _ = evaluate_expression(expr, namespace)
|
|
228
|
+
except Exception as e:
|
|
229
|
+
return {
|
|
230
|
+
"error": f"Expression evaluation failed: {e}",
|
|
231
|
+
"suggestions": [
|
|
232
|
+
"Check your expression syntax",
|
|
233
|
+
"Variables available: 'body' (str), 'event' (dict)",
|
|
234
|
+
"Example: dict(urllib.parse.parse_qsl(body))",
|
|
235
|
+
"Example: json.loads(body)['data'][0]",
|
|
236
|
+
],
|
|
237
|
+
"data": None,
|
|
238
|
+
}
|
|
239
|
+
else:
|
|
240
|
+
# Validate body is not empty before parsing
|
|
241
|
+
if not body_content.strip():
|
|
242
|
+
return {
|
|
243
|
+
"error": "Body is empty",
|
|
244
|
+
"suggestions": [
|
|
245
|
+
"Use expr to extract data from event for non-HTTP events",
|
|
246
|
+
"Example: expr=\"json.loads(event['params']['response']['payloadData'])\"",
|
|
247
|
+
"Check the event structure with inspect() first",
|
|
248
|
+
],
|
|
249
|
+
"data": None,
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
# Parse as JSON
|
|
253
|
+
from webtap.commands._code_generation import parse_json
|
|
254
|
+
|
|
255
|
+
data, error = parse_json(body_content)
|
|
256
|
+
if error:
|
|
257
|
+
return {
|
|
258
|
+
"error": error,
|
|
259
|
+
"suggestions": [
|
|
260
|
+
"Body must be valid JSON or use expr to transform it",
|
|
261
|
+
'For form data: expr="dict(urllib.parse.parse_qsl(body))"',
|
|
262
|
+
"Check the body with body() command first",
|
|
263
|
+
],
|
|
264
|
+
"data": None,
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
# 4. Extract nested path if specified
|
|
268
|
+
if json_path:
|
|
269
|
+
from webtap.commands._code_generation import extract_json_path
|
|
270
|
+
|
|
271
|
+
data, error = extract_json_path(data, json_path)
|
|
272
|
+
if error:
|
|
273
|
+
return {
|
|
274
|
+
"error": error,
|
|
275
|
+
"suggestions": [
|
|
276
|
+
f"Path '{json_path}' not found in body",
|
|
277
|
+
"Check the body structure with body() command",
|
|
278
|
+
'Try a simpler path like "data" or "data[0]"',
|
|
279
|
+
],
|
|
280
|
+
"data": None,
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
# 5. Validate structure
|
|
284
|
+
from webtap.commands._code_generation import validate_generation_data
|
|
285
|
+
|
|
286
|
+
is_valid, error = validate_generation_data(data)
|
|
287
|
+
if not is_valid:
|
|
288
|
+
return {
|
|
289
|
+
"error": error,
|
|
290
|
+
"suggestions": [
|
|
291
|
+
"Code generation requires dict or list structure",
|
|
292
|
+
"Adjust json_path to extract a complex object",
|
|
293
|
+
"Or use expr to transform data into dict/list",
|
|
294
|
+
],
|
|
295
|
+
"data": None,
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
return {"data": data, "error": None, "suggestions": []}
|
webtap/services/dom.py
CHANGED
|
@@ -46,7 +46,7 @@ class DOMService:
|
|
|
46
46
|
self.state = state
|
|
47
47
|
self._inspection_active = False
|
|
48
48
|
self._next_id = 1
|
|
49
|
-
self.
|
|
49
|
+
self._broadcast_callback: "Any | None" = None # Callback to service._trigger_broadcast()
|
|
50
50
|
self._state_lock = threading.Lock() # Protect state mutations
|
|
51
51
|
self._pending_selections = 0 # Track in-flight selection processing
|
|
52
52
|
self._executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="dom-worker")
|
|
@@ -59,13 +59,13 @@ class DOMService:
|
|
|
59
59
|
"""Set state after initialization."""
|
|
60
60
|
self.state = state
|
|
61
61
|
|
|
62
|
-
def
|
|
63
|
-
"""Set
|
|
62
|
+
def set_broadcast_callback(self, callback: "Any") -> None:
|
|
63
|
+
"""Set callback for broadcasting state changes.
|
|
64
64
|
|
|
65
65
|
Args:
|
|
66
|
-
|
|
66
|
+
callback: Function to call when state changes (service._trigger_broadcast)
|
|
67
67
|
"""
|
|
68
|
-
self.
|
|
68
|
+
self._broadcast_callback = callback
|
|
69
69
|
|
|
70
70
|
def start_inspect(self) -> dict[str, Any]:
|
|
71
71
|
"""Enable CDP element inspection mode.
|
|
@@ -116,6 +116,7 @@ class DOMService:
|
|
|
116
116
|
self._inspection_active = True
|
|
117
117
|
logger.info("Element inspection mode enabled")
|
|
118
118
|
|
|
119
|
+
self._trigger_broadcast()
|
|
119
120
|
return {"success": True, "inspect_active": True}
|
|
120
121
|
|
|
121
122
|
except Exception as e:
|
|
@@ -143,6 +144,7 @@ class DOMService:
|
|
|
143
144
|
self._inspection_active = False
|
|
144
145
|
logger.info("Element inspection mode disabled")
|
|
145
146
|
|
|
147
|
+
self._trigger_broadcast()
|
|
146
148
|
return {"success": True, "inspect_active": False}
|
|
147
149
|
|
|
148
150
|
except Exception as e:
|
|
@@ -244,12 +246,12 @@ class DOMService:
|
|
|
244
246
|
self._trigger_broadcast()
|
|
245
247
|
|
|
246
248
|
def _trigger_broadcast(self) -> None:
|
|
247
|
-
"""Trigger SSE broadcast via
|
|
248
|
-
if self.
|
|
249
|
+
"""Trigger SSE broadcast via service callback (ensures snapshot update)."""
|
|
250
|
+
if self._broadcast_callback:
|
|
249
251
|
try:
|
|
250
|
-
self.
|
|
252
|
+
self._broadcast_callback()
|
|
251
253
|
except Exception as e:
|
|
252
|
-
logger.debug(f"Failed to
|
|
254
|
+
logger.debug(f"Failed to trigger broadcast: {e}")
|
|
253
255
|
|
|
254
256
|
def _extract_node_data(self, backend_node_id: int) -> dict[str, Any]:
|
|
255
257
|
"""Extract complete element data via CDP.
|
|
@@ -486,14 +488,16 @@ class DOMService:
|
|
|
486
488
|
self.state.browser_data["selections"] = {}
|
|
487
489
|
self._next_id = 1
|
|
488
490
|
logger.info("Selections cleared")
|
|
491
|
+
self._trigger_broadcast()
|
|
489
492
|
|
|
490
493
|
def cleanup(self) -> None:
|
|
491
494
|
"""Cleanup resources (executor, callbacks).
|
|
492
495
|
|
|
493
496
|
Call this before disconnect or app exit.
|
|
497
|
+
Safe to call multiple times.
|
|
494
498
|
"""
|
|
495
499
|
# Shutdown executor - wait=False to avoid blocking on stuck tasks
|
|
496
|
-
# cancel_futures=True prevents hanging on incomplete selections
|
|
500
|
+
# cancel_futures=True prevents hanging on incomplete selections
|
|
497
501
|
if hasattr(self, "_executor"):
|
|
498
502
|
try:
|
|
499
503
|
self._executor.shutdown(wait=False, cancel_futures=True)
|
|
@@ -501,12 +505,15 @@ class DOMService:
|
|
|
501
505
|
except Exception as e:
|
|
502
506
|
logger.debug(f"Executor shutdown error (non-fatal): {e}")
|
|
503
507
|
|
|
504
|
-
# Clear inspection state
|
|
505
|
-
if self._inspection_active:
|
|
508
|
+
# Clear inspection state (only if connected)
|
|
509
|
+
if self._inspection_active and self.cdp and self.cdp.is_connected:
|
|
506
510
|
try:
|
|
507
511
|
self.stop_inspect()
|
|
508
512
|
except Exception as e:
|
|
509
513
|
logger.debug(f"Failed to stop inspect on cleanup: {e}")
|
|
510
514
|
|
|
515
|
+
# Force clear inspection flag even if CDP call failed
|
|
516
|
+
self._inspection_active = False
|
|
517
|
+
|
|
511
518
|
|
|
512
519
|
__all__ = ["DOMService"]
|
webtap/services/fetch.py
CHANGED
|
@@ -21,6 +21,23 @@ class FetchService:
|
|
|
21
21
|
self.enable_response_stage = False # Config option for future
|
|
22
22
|
self.cdp: CDPSession | None = None
|
|
23
23
|
self.body_service: BodyService | None = None
|
|
24
|
+
self._broadcast_callback: "Any | None" = None # Callback to service._trigger_broadcast()
|
|
25
|
+
|
|
26
|
+
def set_broadcast_callback(self, callback: "Any") -> None:
|
|
27
|
+
"""Set callback for broadcasting state changes.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
callback: Function to call when state changes (service._trigger_broadcast)
|
|
31
|
+
"""
|
|
32
|
+
self._broadcast_callback = callback
|
|
33
|
+
|
|
34
|
+
def _trigger_broadcast(self) -> None:
|
|
35
|
+
"""Trigger SSE broadcast via service callback (ensures snapshot update)."""
|
|
36
|
+
if self._broadcast_callback:
|
|
37
|
+
try:
|
|
38
|
+
self._broadcast_callback()
|
|
39
|
+
except Exception as e:
|
|
40
|
+
logger.debug(f"Failed to trigger broadcast: {e}")
|
|
24
41
|
|
|
25
42
|
# ============= Core State Queries =============
|
|
26
43
|
|
|
@@ -147,6 +164,7 @@ class FetchService:
|
|
|
147
164
|
stage_msg = "Request and Response stages" if response_stage else "Request stage only"
|
|
148
165
|
logger.info(f"Fetch interception enabled ({stage_msg})")
|
|
149
166
|
|
|
167
|
+
self._trigger_broadcast() # Update snapshot
|
|
150
168
|
return {"enabled": True, "stages": stage_msg, "paused": self.paused_count}
|
|
151
169
|
|
|
152
170
|
except Exception as e:
|
|
@@ -174,6 +192,7 @@ class FetchService:
|
|
|
174
192
|
self.body_service.clear_cache()
|
|
175
193
|
|
|
176
194
|
logger.info("Fetch interception disabled")
|
|
195
|
+
self._trigger_broadcast() # Update snapshot
|
|
177
196
|
return {"enabled": False}
|
|
178
197
|
|
|
179
198
|
except Exception as e:
|
webtap/services/main.py
CHANGED
|
@@ -12,6 +12,7 @@ from webtap.services.network import NetworkService
|
|
|
12
12
|
from webtap.services.console import ConsoleService
|
|
13
13
|
from webtap.services.body import BodyService
|
|
14
14
|
from webtap.services.dom import DOMService
|
|
15
|
+
from webtap.services.state_snapshot import StateSnapshot
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
REQUIRED_DOMAINS = [
|
|
@@ -47,8 +48,11 @@ class WebTapService:
|
|
|
47
48
|
Args:
|
|
48
49
|
state: WebTapState instance from app.py
|
|
49
50
|
"""
|
|
51
|
+
import threading
|
|
52
|
+
|
|
50
53
|
self.state = state
|
|
51
54
|
self.cdp = state.cdp
|
|
55
|
+
self._state_lock = threading.RLock() # Reentrant lock - safe to acquire multiple times by same thread
|
|
52
56
|
|
|
53
57
|
self.enabled_domains: set[str] = set()
|
|
54
58
|
self.filters = FilterManager()
|
|
@@ -65,8 +69,10 @@ class WebTapService:
|
|
|
65
69
|
self.body.cdp = self.cdp
|
|
66
70
|
self.dom.set_cdp(self.cdp)
|
|
67
71
|
self.dom.set_state(self.state)
|
|
72
|
+
self.dom.set_broadcast_callback(self._trigger_broadcast) # DOM calls back for snapshot updates
|
|
68
73
|
|
|
69
74
|
self.fetch.body_service = self.body
|
|
75
|
+
self.fetch.set_broadcast_callback(self._trigger_broadcast) # Fetch calls back for snapshot updates
|
|
70
76
|
|
|
71
77
|
# Legacy wiring for CDP event handler
|
|
72
78
|
self.cdp.fetch_service = self.fetch
|
|
@@ -75,6 +81,127 @@ class WebTapService:
|
|
|
75
81
|
self.cdp.register_event_callback("Overlay.inspectNodeRequested", self.dom.handle_inspect_node_requested)
|
|
76
82
|
self.cdp.register_event_callback("Page.frameNavigated", self.dom.handle_frame_navigated)
|
|
77
83
|
|
|
84
|
+
# Register disconnect callback for unexpected disconnects
|
|
85
|
+
self.cdp.set_disconnect_callback(self._handle_unexpected_disconnect)
|
|
86
|
+
|
|
87
|
+
# Broadcast queue for SSE state updates (set by API server)
|
|
88
|
+
self._broadcast_queue: "Any | None" = None
|
|
89
|
+
|
|
90
|
+
# Immutable state snapshot for thread-safe SSE reads
|
|
91
|
+
# Updated atomically on every state change, read without locks
|
|
92
|
+
self._state_snapshot: StateSnapshot = StateSnapshot.create_empty()
|
|
93
|
+
|
|
94
|
+
def set_broadcast_queue(self, queue: "Any") -> None:
|
|
95
|
+
"""Set queue for broadcasting state changes.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
queue: asyncio.Queue for thread-safe signaling
|
|
99
|
+
"""
|
|
100
|
+
self._broadcast_queue = queue
|
|
101
|
+
|
|
102
|
+
def _create_snapshot(self) -> StateSnapshot:
|
|
103
|
+
"""Create immutable state snapshot from current state.
|
|
104
|
+
|
|
105
|
+
MUST be called with self._state_lock held to ensure atomic read.
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
Frozen StateSnapshot with current state
|
|
109
|
+
"""
|
|
110
|
+
# Connection state (read page_info first to avoid race with disconnect)
|
|
111
|
+
page_info = self.cdp.page_info
|
|
112
|
+
connected = self.cdp.is_connected and page_info is not None
|
|
113
|
+
page_id = page_info.get("id", "") if page_info else ""
|
|
114
|
+
page_title = page_info.get("title", "") if page_info else ""
|
|
115
|
+
page_url = page_info.get("url", "") if page_info else ""
|
|
116
|
+
|
|
117
|
+
# Event count
|
|
118
|
+
event_count = self.event_count
|
|
119
|
+
|
|
120
|
+
# Fetch state
|
|
121
|
+
fetch_enabled = self.fetch.enabled
|
|
122
|
+
paused_count = self.fetch.paused_count if fetch_enabled else 0
|
|
123
|
+
|
|
124
|
+
# Filter state (convert to immutable tuples)
|
|
125
|
+
fm = self.filters
|
|
126
|
+
filter_categories = list(fm.filters.keys())
|
|
127
|
+
enabled_filters = tuple(fm.enabled_categories)
|
|
128
|
+
disabled_filters = tuple(cat for cat in filter_categories if cat not in enabled_filters)
|
|
129
|
+
|
|
130
|
+
# Browser/DOM state (get_state() is already thread-safe internally)
|
|
131
|
+
browser_state = self.dom.get_state()
|
|
132
|
+
|
|
133
|
+
# Error state
|
|
134
|
+
error = self.state.error_state
|
|
135
|
+
error_message = error.get("message") if error else None
|
|
136
|
+
error_timestamp = error.get("timestamp") if error else None
|
|
137
|
+
|
|
138
|
+
# Deep copy selections to ensure true immutability
|
|
139
|
+
import copy
|
|
140
|
+
|
|
141
|
+
selections = copy.deepcopy(browser_state["selections"])
|
|
142
|
+
|
|
143
|
+
return StateSnapshot(
|
|
144
|
+
connected=connected,
|
|
145
|
+
page_id=page_id,
|
|
146
|
+
page_title=page_title,
|
|
147
|
+
page_url=page_url,
|
|
148
|
+
event_count=event_count,
|
|
149
|
+
fetch_enabled=fetch_enabled,
|
|
150
|
+
paused_count=paused_count,
|
|
151
|
+
enabled_filters=enabled_filters,
|
|
152
|
+
disabled_filters=disabled_filters,
|
|
153
|
+
inspect_active=browser_state["inspect_active"],
|
|
154
|
+
selections=selections, # Deep copy ensures nested dicts are immutable
|
|
155
|
+
prompt=browser_state["prompt"],
|
|
156
|
+
pending_count=browser_state["pending_count"],
|
|
157
|
+
error_message=error_message,
|
|
158
|
+
error_timestamp=error_timestamp,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
def _trigger_broadcast(self) -> None:
|
|
162
|
+
"""Trigger SSE broadcast with updated state snapshot (thread-safe).
|
|
163
|
+
|
|
164
|
+
Called after service mutations to:
|
|
165
|
+
1. Create fresh immutable snapshot (atomic replacement)
|
|
166
|
+
2. Signal SSE clients to broadcast
|
|
167
|
+
|
|
168
|
+
Uses RLock so same thread can call multiple times safely.
|
|
169
|
+
asyncio.Queue.put_nowait() is thread-safe for cross-thread communication.
|
|
170
|
+
"""
|
|
171
|
+
import logging
|
|
172
|
+
|
|
173
|
+
logger = logging.getLogger(__name__)
|
|
174
|
+
|
|
175
|
+
# Update snapshot atomically
|
|
176
|
+
# RLock allows same thread to acquire multiple times, blocks other threads
|
|
177
|
+
try:
|
|
178
|
+
with self._state_lock:
|
|
179
|
+
self._state_snapshot = self._create_snapshot()
|
|
180
|
+
except (TypeError, AttributeError) as e:
|
|
181
|
+
# Programming errors should propagate for debugging
|
|
182
|
+
logger.error(f"Programming error in snapshot creation: {e}")
|
|
183
|
+
raise
|
|
184
|
+
except Exception as e:
|
|
185
|
+
# Unexpected errors logged but don't crash the app
|
|
186
|
+
logger.error(f"Failed to create state snapshot: {e}", exc_info=True)
|
|
187
|
+
return # Don't signal broadcast if snapshot creation failed
|
|
188
|
+
|
|
189
|
+
# Signal broadcast (store reference to avoid TOCTOU race)
|
|
190
|
+
queue = self._broadcast_queue
|
|
191
|
+
if queue:
|
|
192
|
+
try:
|
|
193
|
+
queue.put_nowait({"type": "state_change"})
|
|
194
|
+
except Exception as e:
|
|
195
|
+
logger.warning(f"Failed to queue broadcast: {e}")
|
|
196
|
+
|
|
197
|
+
def get_state_snapshot(self) -> StateSnapshot:
|
|
198
|
+
"""Get current immutable state snapshot (thread-safe, no locks).
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
Current StateSnapshot - immutable, safe to read from any thread
|
|
202
|
+
"""
|
|
203
|
+
return self._state_snapshot
|
|
204
|
+
|
|
78
205
|
@property
|
|
79
206
|
def event_count(self) -> int:
|
|
80
207
|
"""Total count of all CDP events stored."""
|
|
@@ -105,6 +232,7 @@ class WebTapService:
|
|
|
105
232
|
self.filters.load()
|
|
106
233
|
|
|
107
234
|
page_info = self.cdp.page_info or {}
|
|
235
|
+
self._trigger_broadcast()
|
|
108
236
|
return {"connected": True, "title": page_info.get("title", "Untitled"), "url": page_info.get("url", "")}
|
|
109
237
|
except Exception as e:
|
|
110
238
|
return {"error": str(e)}
|
|
@@ -118,6 +246,7 @@ class WebTapService:
|
|
|
118
246
|
|
|
119
247
|
self.body.clear_cache()
|
|
120
248
|
self.dom.clear_selections()
|
|
249
|
+
self.dom.cleanup() # Shutdown executor properly
|
|
121
250
|
|
|
122
251
|
# Clear error state on disconnect
|
|
123
252
|
if self.state.error_state:
|
|
@@ -126,6 +255,7 @@ class WebTapService:
|
|
|
126
255
|
self.cdp.disconnect()
|
|
127
256
|
self.enabled_domains.clear()
|
|
128
257
|
|
|
258
|
+
self._trigger_broadcast()
|
|
129
259
|
return {"disconnected": True, "was_connected": was_connected}
|
|
130
260
|
|
|
131
261
|
def enable_domains(self, domains: list[str]) -> dict[str, str]:
|
|
@@ -175,6 +305,7 @@ class WebTapService:
|
|
|
175
305
|
def clear_events(self) -> dict[str, Any]:
|
|
176
306
|
"""Clear all stored CDP events."""
|
|
177
307
|
self.cdp.clear_events()
|
|
308
|
+
self._trigger_broadcast()
|
|
178
309
|
return {"cleared": True, "events": 0}
|
|
179
310
|
|
|
180
311
|
def list_pages(self) -> dict[str, Any]:
|
|
@@ -187,3 +318,64 @@ class WebTapService:
|
|
|
187
318
|
return {"pages": pages}
|
|
188
319
|
except Exception as e:
|
|
189
320
|
return {"error": str(e), "pages": []}
|
|
321
|
+
|
|
322
|
+
def _handle_unexpected_disconnect(self, code: int, reason: str) -> None:
|
|
323
|
+
"""Handle unexpected WebSocket disconnect (tab closed, crashed, etc).
|
|
324
|
+
|
|
325
|
+
Called from background thread by CDPSession._on_close.
|
|
326
|
+
Performs service-level cleanup and notifies SSE clients.
|
|
327
|
+
Events are preserved for debugging.
|
|
328
|
+
|
|
329
|
+
Args:
|
|
330
|
+
code: WebSocket close code (e.g., 1006 = abnormal closure)
|
|
331
|
+
reason: Human-readable close reason
|
|
332
|
+
"""
|
|
333
|
+
import logging
|
|
334
|
+
import time
|
|
335
|
+
|
|
336
|
+
logger = logging.getLogger(__name__)
|
|
337
|
+
|
|
338
|
+
# Map WebSocket close codes to user-friendly messages
|
|
339
|
+
reason_map = {
|
|
340
|
+
1000: "Page closed normally",
|
|
341
|
+
1001: "Browser tab closed",
|
|
342
|
+
1006: "Connection lost (tab crashed or browser closed)",
|
|
343
|
+
1011: "Chrome internal error",
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
# Handle None code (abnormal closure with no code)
|
|
347
|
+
if code is None:
|
|
348
|
+
user_reason = "Connection lost (page closed or crashed)"
|
|
349
|
+
else:
|
|
350
|
+
user_reason = reason_map.get(code, f"Connection closed unexpectedly (code {code})")
|
|
351
|
+
|
|
352
|
+
logger.warning(f"Unexpected disconnect: {user_reason}")
|
|
353
|
+
|
|
354
|
+
try:
|
|
355
|
+
# Thread-safe state cleanup (called from background thread)
|
|
356
|
+
with self._state_lock:
|
|
357
|
+
# Clean up service state (no CDP calls - connection already gone)
|
|
358
|
+
if self.fetch.enabled:
|
|
359
|
+
self.fetch.enabled = False # Direct state update, no CDP disable
|
|
360
|
+
|
|
361
|
+
self.body.clear_cache()
|
|
362
|
+
self.dom.clear_selections()
|
|
363
|
+
|
|
364
|
+
# Events preserved for debugging - use Clear button to remove explicitly
|
|
365
|
+
# DB thread and field_paths persist for reconnection
|
|
366
|
+
|
|
367
|
+
# Set error state with disconnect info
|
|
368
|
+
self.state.error_state = {"message": user_reason, "timestamp": time.time()}
|
|
369
|
+
|
|
370
|
+
self.enabled_domains.clear()
|
|
371
|
+
|
|
372
|
+
# Cleanup outside lock (safe to call multiple times, has internal protection)
|
|
373
|
+
self.dom.cleanup() # Shutdown executor
|
|
374
|
+
|
|
375
|
+
# Notify SSE clients
|
|
376
|
+
self._trigger_broadcast()
|
|
377
|
+
|
|
378
|
+
logger.info("Unexpected disconnect cleanup completed")
|
|
379
|
+
|
|
380
|
+
except Exception as e:
|
|
381
|
+
logger.error(f"Error during unexpected disconnect cleanup: {e}")
|
|
@@ -12,7 +12,7 @@ logger = logging.getLogger(__name__)
|
|
|
12
12
|
|
|
13
13
|
# GitHub URLs for extension files
|
|
14
14
|
EXTENSION_BASE_URL = "https://raw.githubusercontent.com/angelsen/tap-tools/main/packages/webtap/extension"
|
|
15
|
-
EXTENSION_FILES = ["manifest.json", "content.js", "sidepanel.html", "sidepanel.js"]
|
|
15
|
+
EXTENSION_FILES = ["manifest.json", "background.js", "content.js", "sidepanel.html", "sidepanel.js"]
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
class ExtensionSetupService:
|