webtap-tool 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webtap-tool might be problematic. Click here for more details.

@@ -0,0 +1,116 @@
1
+ """Console monitoring service for browser messages."""
2
+
3
+ import logging
4
+ from typing import TYPE_CHECKING
5
+
6
+ if TYPE_CHECKING:
7
+ from webtap.cdp import CDPSession
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class ConsoleService:
13
+ """Internal service for console event queries and monitoring."""
14
+
15
+ def __init__(self):
16
+ """Initialize console service."""
17
+ self.cdp: CDPSession | None = None
18
+
19
+ @property
20
+ def message_count(self) -> int:
21
+ """Count of all console messages."""
22
+ if not self.cdp:
23
+ return 0
24
+ result = self.cdp.query(
25
+ "SELECT COUNT(*) FROM events WHERE json_extract_string(event, '$.method') IN ('Runtime.consoleAPICalled', 'Log.entryAdded')"
26
+ )
27
+ return result[0][0] if result else 0
28
+
29
+ @property
30
+ def error_count(self) -> int:
31
+ """Count of console errors."""
32
+ if not self.cdp:
33
+ return 0
34
+ result = self.cdp.query("""
35
+ SELECT COUNT(*) FROM events
36
+ WHERE json_extract_string(event, '$.method') IN ('Runtime.consoleAPICalled', 'Log.entryAdded')
37
+ AND (
38
+ json_extract_string(event, '$.params.type') = 'error'
39
+ OR json_extract_string(event, '$.params.entry.level') = 'error'
40
+ )
41
+ """)
42
+ return result[0][0] if result else 0
43
+
44
+ def get_recent_messages(self, limit: int = 50, level: str | None = None) -> list[tuple]:
45
+ """Get recent console messages with common fields extracted.
46
+
47
+ Args:
48
+ limit: Maximum results
49
+ level: Optional filter by level (error, warning, log, info)
50
+ """
51
+ if not self.cdp:
52
+ return []
53
+
54
+ sql = """
55
+ SELECT
56
+ rowid,
57
+ COALESCE(
58
+ json_extract_string(event, '$.params.type'),
59
+ json_extract_string(event, '$.params.entry.level')
60
+ ) as Level,
61
+ COALESCE(
62
+ json_extract_string(event, '$.params.source'),
63
+ json_extract_string(event, '$.params.entry.source'),
64
+ 'console'
65
+ ) as Source,
66
+ COALESCE(
67
+ json_extract_string(event, '$.params.args[0].value'),
68
+ json_extract_string(event, '$.params.entry.text')
69
+ ) as Message,
70
+ COALESCE(
71
+ json_extract_string(event, '$.params.timestamp'),
72
+ json_extract_string(event, '$.params.entry.timestamp')
73
+ ) as Time
74
+ FROM events
75
+ WHERE json_extract_string(event, '$.method') IN ('Runtime.consoleAPICalled', 'Log.entryAdded')
76
+ """
77
+
78
+ if level:
79
+ sql += f"""
80
+ AND (
81
+ json_extract_string(event, '$.params.type') = '{level.lower()}'
82
+ OR json_extract_string(event, '$.params.entry.level') = '{level.lower()}'
83
+ )
84
+ """
85
+
86
+ sql += f" ORDER BY rowid DESC LIMIT {limit}"
87
+
88
+ return self.cdp.query(sql)
89
+
90
+ def get_errors(self, limit: int = 20) -> list[tuple]:
91
+ """Get console errors only.
92
+
93
+ Args:
94
+ limit: Maximum results
95
+ """
96
+ return self.get_recent_messages(limit=limit, level="error")
97
+
98
+ def get_warnings(self, limit: int = 20) -> list[tuple]:
99
+ """Get console warnings only.
100
+
101
+ Args:
102
+ limit: Maximum results
103
+ """
104
+ return self.get_recent_messages(limit=limit, level="warning")
105
+
106
+ def clear_browser_console(self) -> bool:
107
+ """Clear console in the browser (CDP command)."""
108
+ if not self.cdp:
109
+ return False
110
+
111
+ try:
112
+ self.cdp.execute("Runtime.discardConsoleEntries")
113
+ return True
114
+ except Exception as e:
115
+ logger.error(f"Failed to clear browser console: {e}")
116
+ return False
@@ -0,0 +1,397 @@
1
+ """Fetch interception service for request/response debugging."""
2
+
3
+ import json
4
+ import logging
5
+ import time
6
+ from typing import TYPE_CHECKING, Any
7
+
8
+ if TYPE_CHECKING:
9
+ from webtap.cdp import CDPSession
10
+ from webtap.services.body import BodyService
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class FetchService:
16
+ """Internal service for fetch interception with explicit actions."""
17
+
18
+ def __init__(self):
19
+ """Initialize fetch service."""
20
+ self.enabled = False
21
+ self.enable_response_stage = False # Config option for future
22
+ self.cdp: CDPSession | None = None
23
+ self.body_service: BodyService | None = None
24
+
25
+ # ============= Core State Queries =============
26
+
27
+ def get_paused_list(self) -> list[dict[str, Any]]:
28
+ """Get list of currently paused requests for display.
29
+
30
+ Returns:
31
+ List with ID, Stage, Method, Status, URL for each paused request
32
+ """
33
+ if not self.cdp:
34
+ return []
35
+
36
+ results = self.cdp.query(
37
+ """
38
+ WITH fetch_events AS (
39
+ SELECT
40
+ rowid,
41
+ json_extract_string(event, '$.params.requestId') as request_id,
42
+ json_extract_string(event, '$.params.networkId') as network_id,
43
+ json_extract_string(event, '$.params.responseStatusCode') as response_status,
44
+ json_extract_string(event, '$.params.request.url') as url,
45
+ json_extract_string(event, '$.params.request.method') as method,
46
+ CASE WHEN json_extract_string(event, '$.params.responseStatusCode') IS NOT NULL
47
+ THEN 'Response' ELSE 'Request' END as stage
48
+ FROM events
49
+ WHERE json_extract_string(event, '$.method') = 'Fetch.requestPaused'
50
+ ),
51
+ completed_networks AS (
52
+ SELECT DISTINCT json_extract_string(event, '$.params.requestId') as network_id
53
+ FROM events
54
+ WHERE json_extract_string(event, '$.method') = 'Network.loadingFinished'
55
+ ),
56
+ latest_per_request AS (
57
+ SELECT request_id, MAX(rowid) as max_rowid
58
+ FROM fetch_events
59
+ GROUP BY request_id
60
+ )
61
+ SELECT
62
+ f.rowid,
63
+ f.stage,
64
+ f.method,
65
+ f.response_status,
66
+ f.url,
67
+ f.network_id,
68
+ f.request_id
69
+ FROM fetch_events f
70
+ INNER JOIN latest_per_request l ON f.rowid = l.max_rowid
71
+ WHERE f.network_id NOT IN (SELECT network_id FROM completed_networks)
72
+ ORDER BY f.rowid DESC
73
+ """
74
+ )
75
+
76
+ return [
77
+ {
78
+ "ID": row[0],
79
+ "Stage": row[1],
80
+ "Method": row[2] or "GET",
81
+ "Status": row[3] or "-",
82
+ "URL": row[4][:60] if row[4] else "-",
83
+ "_network_id": row[5],
84
+ "_request_id": row[6],
85
+ }
86
+ for row in results
87
+ ]
88
+
89
+ @property
90
+ def paused_count(self) -> int:
91
+ """Count of actually paused requests (not completed)."""
92
+ return len(self.get_paused_list())
93
+
94
+ def get_paused_event(self, rowid: int) -> dict | None:
95
+ """Get full event data for a paused request.
96
+
97
+ Args:
98
+ rowid: Row ID from the database
99
+
100
+ Returns:
101
+ Full CDP event data or None if not found
102
+ """
103
+ if not self.cdp:
104
+ return None
105
+
106
+ result = self.cdp.query(
107
+ """
108
+ SELECT event
109
+ FROM events
110
+ WHERE rowid = ?
111
+ AND json_extract_string(event, '$.method') = 'Fetch.requestPaused'
112
+ """,
113
+ [rowid],
114
+ )
115
+
116
+ if result:
117
+ return json.loads(result[0][0])
118
+ return None
119
+
120
+ # ============= Enable/Disable =============
121
+
122
+ def enable(self, cdp: "CDPSession", response_stage: bool = False) -> dict[str, Any]:
123
+ """Enable fetch interception.
124
+
125
+ Args:
126
+ cdp: CDP session for executing commands
127
+ response_stage: Whether to also pause at Response stage
128
+
129
+ Returns:
130
+ Status dict with enabled state and paused count
131
+ """
132
+ if self.enabled:
133
+ return {"enabled": True, "message": "Already enabled"}
134
+
135
+ self.cdp = cdp
136
+ self.enable_response_stage = response_stage
137
+
138
+ try:
139
+ patterns = [{"urlPattern": "*", "requestStage": "Request"}]
140
+
141
+ if response_stage:
142
+ patterns.append({"urlPattern": "*", "requestStage": "Response"})
143
+
144
+ cdp.execute("Fetch.enable", {"patterns": patterns})
145
+
146
+ self.enabled = True
147
+ stage_msg = "Request and Response stages" if response_stage else "Request stage only"
148
+ logger.info(f"Fetch interception enabled ({stage_msg})")
149
+
150
+ return {"enabled": True, "stages": stage_msg, "paused": self.paused_count}
151
+
152
+ except Exception as e:
153
+ logger.error(f"Failed to enable fetch: {e}")
154
+ return {"enabled": False, "error": str(e)}
155
+
156
+ def disable(self) -> dict[str, Any]:
157
+ """Disable fetch interception.
158
+
159
+ Returns:
160
+ Status dict with disabled state
161
+ """
162
+ if not self.enabled:
163
+ return {"enabled": False, "message": "Already disabled"}
164
+
165
+ if not self.cdp:
166
+ return {"enabled": False, "error": "No CDP session"}
167
+
168
+ try:
169
+ self.cdp.execute("Fetch.disable")
170
+ self.enabled = False
171
+
172
+ # Clear body cache when fetch is disabled
173
+ if self.body_service:
174
+ self.body_service.clear_cache()
175
+
176
+ logger.info("Fetch interception disabled")
177
+ return {"enabled": False}
178
+
179
+ except Exception as e:
180
+ logger.error(f"Failed to disable fetch: {e}")
181
+ return {"enabled": self.enabled, "error": str(e)}
182
+
183
+ # ============= Explicit Actions =============
184
+
185
+ def continue_request(
186
+ self, rowid: int, modifications: dict[str, Any] | None = None, wait_for_next: float = 0.5
187
+ ) -> dict[str, Any]:
188
+ """Continue a specific paused request.
189
+
190
+ Args:
191
+ rowid: Row ID from requests() table
192
+ modifications: Optional modifications to apply
193
+ wait_for_next: Time to wait for follow-up events (0 to disable)
194
+
195
+ Returns:
196
+ Dict with continuation status and optional next event info
197
+ """
198
+ if not self.enabled or not self.cdp:
199
+ return {"error": "Fetch not enabled"}
200
+
201
+ # Get the event
202
+ event = self.get_paused_event(rowid)
203
+ if not event:
204
+ return {"error": f"Event {rowid} not found"}
205
+
206
+ params = event["params"]
207
+ request_id = params["requestId"]
208
+ network_id = params.get("networkId")
209
+
210
+ # Determine stage and continue
211
+ if params.get("responseStatusCode"):
212
+ # Response stage
213
+ cdp_params = {"requestId": request_id}
214
+ if modifications:
215
+ cdp_params.update(modifications)
216
+ self.cdp.execute("Fetch.continueResponse", cdp_params)
217
+ stage = "response"
218
+ else:
219
+ # Request stage
220
+ cdp_params = {"requestId": request_id}
221
+ if modifications:
222
+ cdp_params.update(modifications)
223
+ self.cdp.execute("Fetch.continueRequest", cdp_params)
224
+ stage = "request"
225
+
226
+ result = {"continued": rowid, "stage": stage, "request_id": request_id}
227
+
228
+ # Wait for follow-up if requested
229
+ if wait_for_next > 0 and network_id:
230
+ next_event = self._wait_for_next_event(request_id, network_id, rowid, wait_for_next)
231
+ if next_event:
232
+ result["next_event"] = next_event
233
+
234
+ # Add remaining count
235
+ result["remaining"] = self.paused_count
236
+
237
+ return result
238
+
239
+ def _wait_for_next_event(
240
+ self, request_id: str, network_id: str, after_rowid: int, timeout: float
241
+ ) -> dict[str, Any] | None:
242
+ """Wait for the next event in the chain (response stage or redirect).
243
+
244
+ Args:
245
+ request_id: The request ID that was just continued
246
+ network_id: The network ID for tracking redirects
247
+ after_rowid: Row ID to search after
248
+ timeout: Maximum time to wait
249
+
250
+ Returns:
251
+ Dict with next event info or None if nothing found
252
+ """
253
+ if not self.cdp:
254
+ return None
255
+
256
+ start = time.time()
257
+
258
+ while time.time() - start < timeout:
259
+ try:
260
+ # Check for response stage (same requestId)
261
+ response = self.cdp.query(
262
+ """
263
+ SELECT
264
+ rowid,
265
+ json_extract_string(event, '$.params.responseStatusCode') as status
266
+ FROM events
267
+ WHERE json_extract_string(event, '$.method') = 'Fetch.requestPaused'
268
+ AND json_extract_string(event, '$.params.requestId') = ?
269
+ AND json_extract_string(event, '$.params.responseStatusCode') IS NOT NULL
270
+ AND rowid > ?
271
+ LIMIT 1
272
+ """,
273
+ [request_id, after_rowid],
274
+ )
275
+
276
+ if response and len(response) > 0:
277
+ return {
278
+ "rowid": response[0][0],
279
+ "type": "response",
280
+ "status": response[0][1],
281
+ "description": f"Response stage ready (status {response[0][1]})",
282
+ }
283
+
284
+ # Check for redirect (new requestId, same networkId)
285
+ redirect = self.cdp.query(
286
+ """
287
+ SELECT
288
+ rowid,
289
+ json_extract_string(event, '$.params.requestId') as new_request_id,
290
+ json_extract_string(event, '$.params.request.url') as url
291
+ FROM events
292
+ WHERE json_extract_string(event, '$.method') = 'Fetch.requestPaused'
293
+ AND json_extract_string(event, '$.params.networkId') = ?
294
+ AND json_extract_string(event, '$.params.redirectedRequestId') = ?
295
+ AND rowid > ?
296
+ LIMIT 1
297
+ """,
298
+ [network_id, request_id, after_rowid],
299
+ )
300
+
301
+ if redirect and len(redirect) > 0:
302
+ url = redirect[0][2]
303
+ return {
304
+ "rowid": redirect[0][0],
305
+ "type": "redirect",
306
+ "request_id": redirect[0][1],
307
+ "url": url[:60] if url else None,
308
+ "description": f"Redirected to {url[:40]}..." if url else "Redirected",
309
+ }
310
+ except Exception as e:
311
+ logger.debug(f"Error during polling: {e}")
312
+ # Continue polling on transient errors
313
+
314
+ time.sleep(0.05) # 50ms polling
315
+
316
+ return None
317
+
318
+ def fail_request(self, rowid: int, reason: str = "BlockedByClient") -> dict[str, Any]:
319
+ """Explicitly fail a request.
320
+
321
+ Args:
322
+ rowid: Row ID from requests() table
323
+ reason: CDP error reason
324
+
325
+ Returns:
326
+ Dict with failure status
327
+ """
328
+ if not self.enabled or not self.cdp:
329
+ return {"error": "Fetch not enabled"}
330
+
331
+ event = self.get_paused_event(rowid)
332
+ if not event:
333
+ return {"error": f"Event {rowid} not found"}
334
+
335
+ request_id = event["params"]["requestId"]
336
+
337
+ try:
338
+ self.cdp.execute("Fetch.failRequest", {"requestId": request_id, "errorReason": reason})
339
+
340
+ return {"failed": rowid, "reason": reason, "remaining": self.paused_count - 1}
341
+
342
+ except Exception as e:
343
+ logger.error(f"Failed to fail request {rowid}: {e}")
344
+ return {"error": str(e)}
345
+
346
+ def fulfill_request(
347
+ self,
348
+ rowid: int,
349
+ response_code: int = 200,
350
+ response_headers: list[dict[str, str]] | None = None,
351
+ body: str = "",
352
+ ) -> dict[str, Any]:
353
+ """Fulfill a request with a custom response.
354
+
355
+ Args:
356
+ rowid: Row ID from requests() table
357
+ response_code: HTTP response code
358
+ response_headers: Response headers
359
+ body: Response body
360
+
361
+ Returns:
362
+ Dict with fulfillment status
363
+ """
364
+ if not self.enabled or not self.cdp:
365
+ return {"error": "Fetch not enabled"}
366
+
367
+ event = self.get_paused_event(rowid)
368
+ if not event:
369
+ return {"error": f"Event {rowid} not found"}
370
+
371
+ request_id = event["params"]["requestId"]
372
+
373
+ try:
374
+ import base64
375
+
376
+ # Encode body to base64
377
+ body_base64 = base64.b64encode(body.encode()).decode()
378
+
379
+ params = {
380
+ "requestId": request_id,
381
+ "responseCode": response_code,
382
+ "body": body_base64,
383
+ }
384
+
385
+ if response_headers:
386
+ params["responseHeaders"] = response_headers
387
+
388
+ self.cdp.execute("Fetch.fulfillRequest", params)
389
+
390
+ return {"fulfilled": rowid, "response_code": response_code, "remaining": self.paused_count - 1}
391
+
392
+ except Exception as e:
393
+ logger.error(f"Failed to fulfill request {rowid}: {e}")
394
+ return {"error": str(e)}
395
+
396
+
397
+ # No exports - internal service only
@@ -0,0 +1,175 @@
1
+ """Main service orchestrator for WebTap business logic.
2
+
3
+ PUBLIC API:
4
+ - WebTapService: Main service orchestrating all domain services
5
+ """
6
+
7
+ from typing import Any
8
+
9
+ from webtap.filters import FilterManager
10
+ from webtap.services.fetch import FetchService
11
+ from webtap.services.network import NetworkService
12
+ from webtap.services.console import ConsoleService
13
+ from webtap.services.body import BodyService
14
+
15
+
16
+ REQUIRED_DOMAINS = [
17
+ "Page",
18
+ "Network",
19
+ "Runtime",
20
+ "Log",
21
+ "DOMStorage",
22
+ ]
23
+
24
+
25
+ class WebTapService:
26
+ """Main service orchestrating all WebTap domain services.
27
+
28
+ Coordinates CDP session management, domain services, and filter management.
29
+ Shared between REPL commands and API endpoints for consistent state.
30
+
31
+ Attributes:
32
+ state: WebTap application state instance.
33
+ cdp: CDP session for browser communication.
34
+ enabled_domains: Set of currently enabled CDP domains.
35
+ filters: Filter manager for event filtering.
36
+ fetch: Fetch interception service.
37
+ network: Network monitoring service.
38
+ console: Console message service.
39
+ body: Response body fetching service.
40
+ """
41
+
42
+ def __init__(self, state):
43
+ """Initialize with WebTapState instance.
44
+
45
+ Args:
46
+ state: WebTapState instance from app.py
47
+ """
48
+ self.state = state
49
+ self.cdp = state.cdp
50
+
51
+ self.enabled_domains: set[str] = set()
52
+ self.filters = FilterManager()
53
+
54
+ self.fetch = FetchService()
55
+ self.network = NetworkService()
56
+ self.console = ConsoleService()
57
+ self.body = BodyService()
58
+
59
+ self.fetch.cdp = self.cdp
60
+ self.network.cdp = self.cdp
61
+ self.console.cdp = self.cdp
62
+ self.body.cdp = self.cdp
63
+
64
+ self.fetch.body_service = self.body
65
+
66
+ # Legacy wiring for CDP event handler
67
+ self.cdp.fetch_service = self.fetch
68
+
69
+ @property
70
+ def event_count(self) -> int:
71
+ """Total count of all CDP events stored."""
72
+ if not self.cdp or not self.cdp.is_connected:
73
+ return 0
74
+ try:
75
+ result = self.cdp.db.execute("SELECT COUNT(*) FROM events").fetchone()
76
+ return result[0] if result else 0
77
+ except Exception:
78
+ return 0
79
+
80
+ def connect_to_page(self, page_index: int | None = None, page_id: str | None = None) -> dict[str, Any]:
81
+ """Connect to Chrome page and enable required domains.
82
+
83
+ Args:
84
+ page_index: Index of page to connect to (for REPL)
85
+ page_id: ID of page to connect to (for extension)
86
+ """
87
+ try:
88
+ self.cdp.connect(page_index=page_index, page_id=page_id)
89
+
90
+ failures = self.enable_domains(REQUIRED_DOMAINS)
91
+
92
+ if failures:
93
+ self.cdp.disconnect()
94
+ return {"error": f"Failed to enable domains: {failures}"}
95
+
96
+ self.filters.load()
97
+
98
+ page_info = self.cdp.page_info or {}
99
+ return {"connected": True, "title": page_info.get("title", "Untitled"), "url": page_info.get("url", "")}
100
+ except Exception as e:
101
+ return {"error": str(e)}
102
+
103
+ def disconnect(self) -> dict[str, Any]:
104
+ """Disconnect from Chrome."""
105
+ was_connected = self.cdp.is_connected
106
+
107
+ if self.fetch.enabled:
108
+ self.fetch.disable()
109
+
110
+ self.body.clear_cache()
111
+
112
+ self.cdp.disconnect()
113
+ self.enabled_domains.clear()
114
+
115
+ return {"disconnected": True, "was_connected": was_connected}
116
+
117
+ def enable_domains(self, domains: list[str]) -> dict[str, str]:
118
+ """Enable CDP domains.
119
+
120
+ Args:
121
+ domains: List of domain names to enable
122
+ """
123
+ failures = {}
124
+ for domain in domains:
125
+ try:
126
+ self.cdp.execute(f"{domain}.enable")
127
+ self.enabled_domains.add(domain)
128
+ except Exception as e:
129
+ failures[domain] = str(e)
130
+ return failures
131
+
132
+ def get_status(self) -> dict[str, Any]:
133
+ """Get current connection and state status."""
134
+ if not self.cdp.is_connected:
135
+ return {
136
+ "connected": False,
137
+ "events": 0,
138
+ "fetch_enabled": self.fetch.enabled,
139
+ "paused_requests": 0,
140
+ "network_requests": 0,
141
+ "console_messages": 0,
142
+ "console_errors": 0,
143
+ }
144
+
145
+ page_info = self.cdp.page_info or {}
146
+
147
+ return {
148
+ "connected": True,
149
+ "connected_page_id": page_info.get("id"), # Stable page ID
150
+ "url": page_info.get("url"),
151
+ "title": page_info.get("title"),
152
+ "events": self.event_count,
153
+ "fetch_enabled": self.fetch.enabled,
154
+ "paused_requests": self.fetch.paused_count if self.fetch.enabled else 0,
155
+ "network_requests": self.network.request_count,
156
+ "console_messages": self.console.message_count,
157
+ "console_errors": self.console.error_count,
158
+ "enabled_domains": list(self.enabled_domains),
159
+ }
160
+
161
+ def clear_events(self) -> dict[str, Any]:
162
+ """Clear all stored CDP events."""
163
+ self.cdp.clear_events()
164
+ return {"cleared": True, "events": 0}
165
+
166
+ def list_pages(self) -> dict[str, Any]:
167
+ """List available Chrome pages."""
168
+ try:
169
+ pages = self.cdp.list_pages()
170
+ connected_id = self.cdp.page_info.get("id") if self.cdp.page_info else None
171
+ for page in pages:
172
+ page["is_connected"] = page.get("id") == connected_id
173
+ return {"pages": pages}
174
+ except Exception as e:
175
+ return {"error": str(e), "pages": []}