webtap-tool 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- webtap/VISION.md +246 -0
- webtap/__init__.py +84 -0
- webtap/__main__.py +6 -0
- webtap/api/__init__.py +9 -0
- webtap/api/app.py +26 -0
- webtap/api/models.py +69 -0
- webtap/api/server.py +111 -0
- webtap/api/sse.py +182 -0
- webtap/api/state.py +89 -0
- webtap/app.py +79 -0
- webtap/cdp/README.md +275 -0
- webtap/cdp/__init__.py +12 -0
- webtap/cdp/har.py +302 -0
- webtap/cdp/schema/README.md +41 -0
- webtap/cdp/schema/cdp_protocol.json +32785 -0
- webtap/cdp/schema/cdp_version.json +8 -0
- webtap/cdp/session.py +667 -0
- webtap/client.py +81 -0
- webtap/commands/DEVELOPER_GUIDE.md +401 -0
- webtap/commands/TIPS.md +269 -0
- webtap/commands/__init__.py +29 -0
- webtap/commands/_builders.py +331 -0
- webtap/commands/_code_generation.py +110 -0
- webtap/commands/_tips.py +147 -0
- webtap/commands/_utils.py +273 -0
- webtap/commands/connection.py +220 -0
- webtap/commands/console.py +87 -0
- webtap/commands/fetch.py +310 -0
- webtap/commands/filters.py +116 -0
- webtap/commands/javascript.py +73 -0
- webtap/commands/js_export.py +73 -0
- webtap/commands/launch.py +72 -0
- webtap/commands/navigation.py +197 -0
- webtap/commands/network.py +136 -0
- webtap/commands/quicktype.py +306 -0
- webtap/commands/request.py +93 -0
- webtap/commands/selections.py +138 -0
- webtap/commands/setup.py +219 -0
- webtap/commands/to_model.py +163 -0
- webtap/daemon.py +185 -0
- webtap/daemon_state.py +53 -0
- webtap/filters.py +219 -0
- webtap/rpc/__init__.py +14 -0
- webtap/rpc/errors.py +49 -0
- webtap/rpc/framework.py +223 -0
- webtap/rpc/handlers.py +625 -0
- webtap/rpc/machine.py +84 -0
- webtap/services/README.md +83 -0
- webtap/services/__init__.py +15 -0
- webtap/services/console.py +124 -0
- webtap/services/dom.py +547 -0
- webtap/services/fetch.py +415 -0
- webtap/services/main.py +392 -0
- webtap/services/network.py +401 -0
- webtap/services/setup/__init__.py +185 -0
- webtap/services/setup/chrome.py +233 -0
- webtap/services/setup/desktop.py +255 -0
- webtap/services/setup/extension.py +147 -0
- webtap/services/setup/platform.py +162 -0
- webtap/services/state_snapshot.py +86 -0
- webtap_tool-0.11.0.dist-info/METADATA +535 -0
- webtap_tool-0.11.0.dist-info/RECORD +64 -0
- webtap_tool-0.11.0.dist-info/WHEEL +4 -0
- webtap_tool-0.11.0.dist-info/entry_points.txt +2 -0
webtap/cdp/har.py
ADDED
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
"""HAR view creation for DuckDB.
|
|
2
|
+
|
|
3
|
+
PUBLIC API:
|
|
4
|
+
- create_har_views: Create HAR aggregation views in DuckDB
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
# HAR entries view - aggregates CDP events into HAR-like structure
|
|
12
|
+
_HAR_ENTRIES_SQL = """
|
|
13
|
+
CREATE OR REPLACE VIEW har_entries AS
|
|
14
|
+
WITH
|
|
15
|
+
-- Paused Fetch events (unresolved)
|
|
16
|
+
paused_fetch AS (
|
|
17
|
+
SELECT
|
|
18
|
+
json_extract_string(event, '$.params.networkId') as network_id,
|
|
19
|
+
rowid as paused_id,
|
|
20
|
+
json_extract_string(event, '$.params.responseStatusCode') as fetch_status,
|
|
21
|
+
json_extract(event, '$.params.responseHeaders') as fetch_response_headers,
|
|
22
|
+
CASE
|
|
23
|
+
WHEN json_extract_string(event, '$.params.responseStatusCode') IS NOT NULL
|
|
24
|
+
THEN 'Response'
|
|
25
|
+
ELSE 'Request'
|
|
26
|
+
END as pause_stage,
|
|
27
|
+
json_extract_string(event, '$.params.requestId') as fetch_request_id
|
|
28
|
+
FROM events
|
|
29
|
+
WHERE method = 'Fetch.requestPaused'
|
|
30
|
+
),
|
|
31
|
+
|
|
32
|
+
-- Resolved Fetch events (continued, failed, or fulfilled)
|
|
33
|
+
resolved_fetch AS (
|
|
34
|
+
SELECT DISTINCT json_extract_string(event, '$.params.requestId') as network_id
|
|
35
|
+
FROM events
|
|
36
|
+
WHERE method IN ('Network.loadingFinished', 'Network.loadingFailed')
|
|
37
|
+
),
|
|
38
|
+
|
|
39
|
+
-- Only unresolved paused events (latest per networkId)
|
|
40
|
+
active_paused AS (
|
|
41
|
+
SELECT pf.*
|
|
42
|
+
FROM paused_fetch pf
|
|
43
|
+
WHERE pf.network_id IS NOT NULL
|
|
44
|
+
AND pf.network_id NOT IN (SELECT network_id FROM resolved_fetch WHERE network_id IS NOT NULL)
|
|
45
|
+
QUALIFY ROW_NUMBER() OVER (PARTITION BY pf.network_id ORDER BY pf.paused_id DESC) = 1
|
|
46
|
+
),
|
|
47
|
+
|
|
48
|
+
-- HTTP Request: extract from requestWillBeSent
|
|
49
|
+
http_requests AS (
|
|
50
|
+
SELECT
|
|
51
|
+
json_extract_string(event, '$.params.requestId') as request_id,
|
|
52
|
+
MIN(rowid) as first_rowid,
|
|
53
|
+
'http' as protocol,
|
|
54
|
+
MAX(json_extract_string(event, '$.params.wallTime')) as started_datetime,
|
|
55
|
+
MAX(json_extract_string(event, '$.params.timestamp')) as started_timestamp,
|
|
56
|
+
MAX(json_extract_string(event, '$.params.request.method')) as method,
|
|
57
|
+
MAX(json_extract_string(event, '$.params.request.url')) as url,
|
|
58
|
+
MAX(json_extract(event, '$.params.request.headers')) as request_headers,
|
|
59
|
+
MAX(json_extract_string(event, '$.params.request.postData')) as post_data,
|
|
60
|
+
MAX(json_extract_string(event, '$.params.type')) as resource_type
|
|
61
|
+
FROM events
|
|
62
|
+
WHERE method = 'Network.requestWillBeSent'
|
|
63
|
+
GROUP BY json_extract_string(event, '$.params.requestId')
|
|
64
|
+
),
|
|
65
|
+
|
|
66
|
+
-- HTTP Response: extract from responseReceived
|
|
67
|
+
http_responses AS (
|
|
68
|
+
SELECT
|
|
69
|
+
json_extract_string(event, '$.params.requestId') as request_id,
|
|
70
|
+
MAX(json_extract_string(event, '$.params.response.status')) as status,
|
|
71
|
+
MAX(json_extract_string(event, '$.params.response.statusText')) as status_text,
|
|
72
|
+
MAX(json_extract(event, '$.params.response.headers')) as response_headers,
|
|
73
|
+
MAX(json_extract_string(event, '$.params.response.mimeType')) as mime_type,
|
|
74
|
+
MAX(json_extract(event, '$.params.response.timing')) as timing
|
|
75
|
+
FROM events
|
|
76
|
+
WHERE method = 'Network.responseReceived'
|
|
77
|
+
GROUP BY json_extract_string(event, '$.params.requestId')
|
|
78
|
+
),
|
|
79
|
+
|
|
80
|
+
-- HTTP Finished: timing and size
|
|
81
|
+
http_finished AS (
|
|
82
|
+
SELECT
|
|
83
|
+
json_extract_string(event, '$.params.requestId') as request_id,
|
|
84
|
+
MAX(json_extract_string(event, '$.params.timestamp')) as finished_timestamp,
|
|
85
|
+
MAX(json_extract_string(event, '$.params.encodedDataLength')) as final_size
|
|
86
|
+
FROM events
|
|
87
|
+
WHERE method = 'Network.loadingFinished'
|
|
88
|
+
GROUP BY json_extract_string(event, '$.params.requestId')
|
|
89
|
+
),
|
|
90
|
+
|
|
91
|
+
-- HTTP Failed: error info
|
|
92
|
+
http_failed AS (
|
|
93
|
+
SELECT
|
|
94
|
+
json_extract_string(event, '$.params.requestId') as request_id,
|
|
95
|
+
MAX(json_extract_string(event, '$.params.errorText')) as error_text
|
|
96
|
+
FROM events
|
|
97
|
+
WHERE method = 'Network.loadingFailed'
|
|
98
|
+
GROUP BY json_extract_string(event, '$.params.requestId')
|
|
99
|
+
),
|
|
100
|
+
|
|
101
|
+
-- Request ExtraInfo: raw headers with cookies (before browser sanitization)
|
|
102
|
+
request_extra AS (
|
|
103
|
+
SELECT
|
|
104
|
+
json_extract_string(event, '$.params.requestId') as request_id,
|
|
105
|
+
MAX(json_extract(event, '$.params.headers')) as raw_headers,
|
|
106
|
+
MAX(json_extract(event, '$.params.associatedCookies')) as cookies
|
|
107
|
+
FROM events
|
|
108
|
+
WHERE method = 'Network.requestWillBeSentExtraInfo'
|
|
109
|
+
GROUP BY json_extract_string(event, '$.params.requestId')
|
|
110
|
+
),
|
|
111
|
+
|
|
112
|
+
-- Response ExtraInfo: Set-Cookie headers and true status
|
|
113
|
+
response_extra AS (
|
|
114
|
+
SELECT
|
|
115
|
+
json_extract_string(event, '$.params.requestId') as request_id,
|
|
116
|
+
MAX(json_extract(event, '$.params.headers')) as raw_headers,
|
|
117
|
+
MAX(json_extract_string(event, '$.params.statusCode')) as true_status
|
|
118
|
+
FROM events
|
|
119
|
+
WHERE method = 'Network.responseReceivedExtraInfo'
|
|
120
|
+
GROUP BY json_extract_string(event, '$.params.requestId')
|
|
121
|
+
),
|
|
122
|
+
|
|
123
|
+
-- WebSocket Created
|
|
124
|
+
ws_created AS (
|
|
125
|
+
SELECT
|
|
126
|
+
json_extract_string(event, '$.params.requestId') as request_id,
|
|
127
|
+
MIN(rowid) as first_rowid,
|
|
128
|
+
'websocket' as protocol,
|
|
129
|
+
MAX(json_extract_string(event, '$.params.url')) as url
|
|
130
|
+
FROM events
|
|
131
|
+
WHERE method = 'Network.webSocketCreated'
|
|
132
|
+
GROUP BY json_extract_string(event, '$.params.requestId')
|
|
133
|
+
),
|
|
134
|
+
|
|
135
|
+
-- WebSocket Handshake
|
|
136
|
+
ws_handshake AS (
|
|
137
|
+
SELECT
|
|
138
|
+
json_extract_string(event, '$.params.requestId') as request_id,
|
|
139
|
+
MAX(json_extract_string(event, '$.params.wallTime')) as started_datetime,
|
|
140
|
+
MAX(json_extract_string(event, '$.params.timestamp')) as started_timestamp,
|
|
141
|
+
MAX(json_extract(event, '$.params.request.headers')) as request_headers,
|
|
142
|
+
MAX(json_extract_string(event, '$.params.response.status')) as status,
|
|
143
|
+
MAX(json_extract(event, '$.params.response.headers')) as response_headers
|
|
144
|
+
FROM events
|
|
145
|
+
WHERE method IN ('Network.webSocketWillSendHandshakeRequest', 'Network.webSocketHandshakeResponseReceived')
|
|
146
|
+
GROUP BY json_extract_string(event, '$.params.requestId')
|
|
147
|
+
),
|
|
148
|
+
|
|
149
|
+
-- WebSocket Frame Stats (aggregated)
|
|
150
|
+
ws_frames AS (
|
|
151
|
+
SELECT
|
|
152
|
+
json_extract_string(event, '$.params.requestId') as request_id,
|
|
153
|
+
SUM(CASE WHEN method = 'Network.webSocketFrameSent' THEN 1 ELSE 0 END) as frames_sent,
|
|
154
|
+
SUM(CASE WHEN method = 'Network.webSocketFrameReceived' THEN 1 ELSE 0 END) as frames_received,
|
|
155
|
+
SUM(LENGTH(COALESCE(json_extract_string(event, '$.params.response.payloadData'), ''))) as total_bytes
|
|
156
|
+
FROM events
|
|
157
|
+
WHERE method IN ('Network.webSocketFrameSent', 'Network.webSocketFrameReceived')
|
|
158
|
+
GROUP BY json_extract_string(event, '$.params.requestId')
|
|
159
|
+
),
|
|
160
|
+
|
|
161
|
+
-- WebSocket Closed
|
|
162
|
+
ws_closed AS (
|
|
163
|
+
SELECT
|
|
164
|
+
json_extract_string(event, '$.params.requestId') as request_id,
|
|
165
|
+
MAX(json_extract_string(event, '$.params.timestamp')) as closed_timestamp
|
|
166
|
+
FROM events
|
|
167
|
+
WHERE method = 'Network.webSocketClosed'
|
|
168
|
+
GROUP BY json_extract_string(event, '$.params.requestId')
|
|
169
|
+
),
|
|
170
|
+
|
|
171
|
+
-- Combine HTTP entries
|
|
172
|
+
http_entries AS (
|
|
173
|
+
SELECT
|
|
174
|
+
req.first_rowid as id,
|
|
175
|
+
req.request_id,
|
|
176
|
+
req.protocol,
|
|
177
|
+
req.method,
|
|
178
|
+
req.url,
|
|
179
|
+
-- Use Fetch status if paused, then ExtraInfo true status, then Network status
|
|
180
|
+
CAST(COALESCE(ap.fetch_status, respx.true_status, resp.status, '0') AS INTEGER) as status,
|
|
181
|
+
resp.status_text,
|
|
182
|
+
req.resource_type as type,
|
|
183
|
+
CAST(COALESCE(fin.final_size, '0') AS INTEGER) as size,
|
|
184
|
+
CASE
|
|
185
|
+
WHEN fin.finished_timestamp IS NOT NULL
|
|
186
|
+
THEN CAST((CAST(fin.finished_timestamp AS DOUBLE) - CAST(req.started_timestamp AS DOUBLE)) * 1000 AS INTEGER)
|
|
187
|
+
ELSE NULL
|
|
188
|
+
END as time_ms,
|
|
189
|
+
-- State priority: paused > failed > complete > loading > pending
|
|
190
|
+
CASE
|
|
191
|
+
WHEN ap.paused_id IS NOT NULL THEN 'paused'
|
|
192
|
+
WHEN fail.error_text IS NOT NULL THEN 'failed'
|
|
193
|
+
WHEN fin.finished_timestamp IS NOT NULL THEN 'complete'
|
|
194
|
+
WHEN resp.status IS NOT NULL THEN 'loading'
|
|
195
|
+
ELSE 'pending'
|
|
196
|
+
END as state,
|
|
197
|
+
ap.pause_stage,
|
|
198
|
+
ap.paused_id,
|
|
199
|
+
-- Prefer raw headers from ExtraInfo (includes Cookie header)
|
|
200
|
+
COALESCE(reqx.raw_headers, req.request_headers) as request_headers,
|
|
201
|
+
req.post_data,
|
|
202
|
+
-- Prefer raw headers from ExtraInfo (includes Set-Cookie), then Fetch headers
|
|
203
|
+
COALESCE(respx.raw_headers, ap.fetch_response_headers, resp.response_headers) as response_headers,
|
|
204
|
+
resp.mime_type,
|
|
205
|
+
resp.timing,
|
|
206
|
+
fail.error_text,
|
|
207
|
+
-- Cookie details from ExtraInfo (httpOnly, Secure, SameSite attributes)
|
|
208
|
+
reqx.cookies as request_cookies,
|
|
209
|
+
CAST(NULL AS BIGINT) as frames_sent,
|
|
210
|
+
CAST(NULL AS BIGINT) as frames_received,
|
|
211
|
+
CAST(NULL AS BIGINT) as ws_total_bytes
|
|
212
|
+
FROM http_requests req
|
|
213
|
+
LEFT JOIN request_extra reqx ON req.request_id = reqx.request_id
|
|
214
|
+
LEFT JOIN http_responses resp ON req.request_id = resp.request_id
|
|
215
|
+
LEFT JOIN response_extra respx ON req.request_id = respx.request_id
|
|
216
|
+
LEFT JOIN http_finished fin ON req.request_id = fin.request_id
|
|
217
|
+
LEFT JOIN http_failed fail ON req.request_id = fail.request_id
|
|
218
|
+
LEFT JOIN active_paused ap ON req.request_id = ap.network_id
|
|
219
|
+
),
|
|
220
|
+
|
|
221
|
+
-- Combine WebSocket entries
|
|
222
|
+
websocket_entries AS (
|
|
223
|
+
SELECT
|
|
224
|
+
ws.first_rowid as id,
|
|
225
|
+
ws.request_id,
|
|
226
|
+
ws.protocol,
|
|
227
|
+
'WS' as method,
|
|
228
|
+
ws.url,
|
|
229
|
+
CAST(COALESCE(hs.status, '101') AS INTEGER) as status,
|
|
230
|
+
CAST(NULL AS VARCHAR) as status_text,
|
|
231
|
+
'WebSocket' as type,
|
|
232
|
+
CAST(COALESCE(wf.total_bytes, 0) AS INTEGER) as size,
|
|
233
|
+
CASE
|
|
234
|
+
WHEN wc.closed_timestamp IS NOT NULL
|
|
235
|
+
THEN CAST((CAST(wc.closed_timestamp AS DOUBLE) - CAST(hs.started_timestamp AS DOUBLE)) * 1000 AS INTEGER)
|
|
236
|
+
ELSE NULL
|
|
237
|
+
END as time_ms,
|
|
238
|
+
CASE
|
|
239
|
+
WHEN wc.closed_timestamp IS NOT NULL THEN 'closed'
|
|
240
|
+
WHEN hs.status IS NOT NULL THEN 'open'
|
|
241
|
+
ELSE 'connecting'
|
|
242
|
+
END as state,
|
|
243
|
+
CAST(NULL AS VARCHAR) as pause_stage,
|
|
244
|
+
CAST(NULL AS BIGINT) as paused_id,
|
|
245
|
+
hs.request_headers,
|
|
246
|
+
CAST(NULL AS VARCHAR) as post_data,
|
|
247
|
+
hs.response_headers,
|
|
248
|
+
'websocket' as mime_type,
|
|
249
|
+
CAST(NULL AS JSON) as timing,
|
|
250
|
+
CAST(NULL AS VARCHAR) as error_text,
|
|
251
|
+
CAST(NULL AS JSON) as request_cookies,
|
|
252
|
+
wf.frames_sent,
|
|
253
|
+
wf.frames_received,
|
|
254
|
+
wf.total_bytes as ws_total_bytes
|
|
255
|
+
FROM ws_created ws
|
|
256
|
+
LEFT JOIN ws_handshake hs ON ws.request_id = hs.request_id
|
|
257
|
+
LEFT JOIN ws_frames wf ON ws.request_id = wf.request_id
|
|
258
|
+
LEFT JOIN ws_closed wc ON ws.request_id = wc.request_id
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
SELECT * FROM http_entries
|
|
262
|
+
UNION ALL
|
|
263
|
+
SELECT * FROM websocket_entries
|
|
264
|
+
ORDER BY id DESC
|
|
265
|
+
"""
|
|
266
|
+
|
|
267
|
+
# HAR summary view - lightweight list for network() command
|
|
268
|
+
_HAR_SUMMARY_SQL = """
|
|
269
|
+
CREATE OR REPLACE VIEW har_summary AS
|
|
270
|
+
SELECT
|
|
271
|
+
id,
|
|
272
|
+
request_id,
|
|
273
|
+
protocol,
|
|
274
|
+
method,
|
|
275
|
+
status,
|
|
276
|
+
url,
|
|
277
|
+
type,
|
|
278
|
+
size,
|
|
279
|
+
time_ms,
|
|
280
|
+
state,
|
|
281
|
+
pause_stage,
|
|
282
|
+
paused_id,
|
|
283
|
+
frames_sent,
|
|
284
|
+
frames_received
|
|
285
|
+
FROM har_entries
|
|
286
|
+
"""
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def create_har_views(db_execute) -> None:
|
|
290
|
+
"""Create HAR-based aggregation views in DuckDB.
|
|
291
|
+
|
|
292
|
+
Creates har_entries and har_summary views for network request aggregation.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
db_execute: Function to execute SQL (session._db_execute)
|
|
296
|
+
"""
|
|
297
|
+
db_execute(_HAR_ENTRIES_SQL, wait_result=True)
|
|
298
|
+
db_execute(_HAR_SUMMARY_SQL, wait_result=True)
|
|
299
|
+
logger.debug("HAR views created")
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
__all__ = ["create_har_views"]
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# Chrome DevTools Protocol Schema
|
|
2
|
+
|
|
3
|
+
This directory contains the CDP protocol schema and version information fetched from Chrome.
|
|
4
|
+
|
|
5
|
+
## Files
|
|
6
|
+
|
|
7
|
+
- `cdp_protocol.json` - Full CDP protocol schema with all domains, commands, events, and types
|
|
8
|
+
- `cdp_version.json` - Chrome version and protocol version information
|
|
9
|
+
|
|
10
|
+
## Fetching Latest Schema
|
|
11
|
+
|
|
12
|
+
To update these files with the latest protocol from your Chrome instance:
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
# Ensure Chrome is running with debugging enabled:
|
|
16
|
+
# google-chrome --remote-debugging-port=9222
|
|
17
|
+
|
|
18
|
+
# Fetch protocol schema
|
|
19
|
+
curl -s http://localhost:9222/json/protocol > cdp_protocol.json
|
|
20
|
+
|
|
21
|
+
# Fetch version info
|
|
22
|
+
curl -s http://localhost:9222/json/version | jq '.' > cdp_version.json
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Using the Schema
|
|
26
|
+
|
|
27
|
+
These files are useful for:
|
|
28
|
+
- Understanding available CDP commands and their parameters
|
|
29
|
+
- Debugging protocol issues
|
|
30
|
+
- Validating command usage
|
|
31
|
+
- Discovering new CDP features
|
|
32
|
+
|
|
33
|
+
## Example: Finding Fetch Commands
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
# List all Fetch domain commands
|
|
37
|
+
cat cdp_protocol.json | jq '.domains[] | select(.domain == "Fetch") | .commands[].name'
|
|
38
|
+
|
|
39
|
+
# Get details for a specific command
|
|
40
|
+
cat cdp_protocol.json | jq '.domains[] | select(.domain == "Fetch") | .commands[] | select(.name == "continueResponse")'
|
|
41
|
+
```
|