webtap-tool 0.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. webtap/VISION.md +246 -0
  2. webtap/__init__.py +84 -0
  3. webtap/__main__.py +6 -0
  4. webtap/api/__init__.py +9 -0
  5. webtap/api/app.py +26 -0
  6. webtap/api/models.py +69 -0
  7. webtap/api/server.py +111 -0
  8. webtap/api/sse.py +182 -0
  9. webtap/api/state.py +89 -0
  10. webtap/app.py +79 -0
  11. webtap/cdp/README.md +275 -0
  12. webtap/cdp/__init__.py +12 -0
  13. webtap/cdp/har.py +302 -0
  14. webtap/cdp/schema/README.md +41 -0
  15. webtap/cdp/schema/cdp_protocol.json +32785 -0
  16. webtap/cdp/schema/cdp_version.json +8 -0
  17. webtap/cdp/session.py +667 -0
  18. webtap/client.py +81 -0
  19. webtap/commands/DEVELOPER_GUIDE.md +401 -0
  20. webtap/commands/TIPS.md +269 -0
  21. webtap/commands/__init__.py +29 -0
  22. webtap/commands/_builders.py +331 -0
  23. webtap/commands/_code_generation.py +110 -0
  24. webtap/commands/_tips.py +147 -0
  25. webtap/commands/_utils.py +273 -0
  26. webtap/commands/connection.py +220 -0
  27. webtap/commands/console.py +87 -0
  28. webtap/commands/fetch.py +310 -0
  29. webtap/commands/filters.py +116 -0
  30. webtap/commands/javascript.py +73 -0
  31. webtap/commands/js_export.py +73 -0
  32. webtap/commands/launch.py +72 -0
  33. webtap/commands/navigation.py +197 -0
  34. webtap/commands/network.py +136 -0
  35. webtap/commands/quicktype.py +306 -0
  36. webtap/commands/request.py +93 -0
  37. webtap/commands/selections.py +138 -0
  38. webtap/commands/setup.py +219 -0
  39. webtap/commands/to_model.py +163 -0
  40. webtap/daemon.py +185 -0
  41. webtap/daemon_state.py +53 -0
  42. webtap/filters.py +219 -0
  43. webtap/rpc/__init__.py +14 -0
  44. webtap/rpc/errors.py +49 -0
  45. webtap/rpc/framework.py +223 -0
  46. webtap/rpc/handlers.py +625 -0
  47. webtap/rpc/machine.py +84 -0
  48. webtap/services/README.md +83 -0
  49. webtap/services/__init__.py +15 -0
  50. webtap/services/console.py +124 -0
  51. webtap/services/dom.py +547 -0
  52. webtap/services/fetch.py +415 -0
  53. webtap/services/main.py +392 -0
  54. webtap/services/network.py +401 -0
  55. webtap/services/setup/__init__.py +185 -0
  56. webtap/services/setup/chrome.py +233 -0
  57. webtap/services/setup/desktop.py +255 -0
  58. webtap/services/setup/extension.py +147 -0
  59. webtap/services/setup/platform.py +162 -0
  60. webtap/services/state_snapshot.py +86 -0
  61. webtap_tool-0.11.0.dist-info/METADATA +535 -0
  62. webtap_tool-0.11.0.dist-info/RECORD +64 -0
  63. webtap_tool-0.11.0.dist-info/WHEEL +4 -0
  64. webtap_tool-0.11.0.dist-info/entry_points.txt +2 -0
webtap/cdp/har.py ADDED
@@ -0,0 +1,302 @@
1
+ """HAR view creation for DuckDB.
2
+
3
+ PUBLIC API:
4
+ - create_har_views: Create HAR aggregation views in DuckDB
5
+ """
6
+
7
+ import logging
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ # HAR entries view - aggregates CDP events into HAR-like structure
12
+ _HAR_ENTRIES_SQL = """
13
+ CREATE OR REPLACE VIEW har_entries AS
14
+ WITH
15
+ -- Paused Fetch events (unresolved)
16
+ paused_fetch AS (
17
+ SELECT
18
+ json_extract_string(event, '$.params.networkId') as network_id,
19
+ rowid as paused_id,
20
+ json_extract_string(event, '$.params.responseStatusCode') as fetch_status,
21
+ json_extract(event, '$.params.responseHeaders') as fetch_response_headers,
22
+ CASE
23
+ WHEN json_extract_string(event, '$.params.responseStatusCode') IS NOT NULL
24
+ THEN 'Response'
25
+ ELSE 'Request'
26
+ END as pause_stage,
27
+ json_extract_string(event, '$.params.requestId') as fetch_request_id
28
+ FROM events
29
+ WHERE method = 'Fetch.requestPaused'
30
+ ),
31
+
32
+ -- Resolved Fetch events (continued, failed, or fulfilled)
33
+ resolved_fetch AS (
34
+ SELECT DISTINCT json_extract_string(event, '$.params.requestId') as network_id
35
+ FROM events
36
+ WHERE method IN ('Network.loadingFinished', 'Network.loadingFailed')
37
+ ),
38
+
39
+ -- Only unresolved paused events (latest per networkId)
40
+ active_paused AS (
41
+ SELECT pf.*
42
+ FROM paused_fetch pf
43
+ WHERE pf.network_id IS NOT NULL
44
+ AND pf.network_id NOT IN (SELECT network_id FROM resolved_fetch WHERE network_id IS NOT NULL)
45
+ QUALIFY ROW_NUMBER() OVER (PARTITION BY pf.network_id ORDER BY pf.paused_id DESC) = 1
46
+ ),
47
+
48
+ -- HTTP Request: extract from requestWillBeSent
49
+ http_requests AS (
50
+ SELECT
51
+ json_extract_string(event, '$.params.requestId') as request_id,
52
+ MIN(rowid) as first_rowid,
53
+ 'http' as protocol,
54
+ MAX(json_extract_string(event, '$.params.wallTime')) as started_datetime,
55
+ MAX(json_extract_string(event, '$.params.timestamp')) as started_timestamp,
56
+ MAX(json_extract_string(event, '$.params.request.method')) as method,
57
+ MAX(json_extract_string(event, '$.params.request.url')) as url,
58
+ MAX(json_extract(event, '$.params.request.headers')) as request_headers,
59
+ MAX(json_extract_string(event, '$.params.request.postData')) as post_data,
60
+ MAX(json_extract_string(event, '$.params.type')) as resource_type
61
+ FROM events
62
+ WHERE method = 'Network.requestWillBeSent'
63
+ GROUP BY json_extract_string(event, '$.params.requestId')
64
+ ),
65
+
66
+ -- HTTP Response: extract from responseReceived
67
+ http_responses AS (
68
+ SELECT
69
+ json_extract_string(event, '$.params.requestId') as request_id,
70
+ MAX(json_extract_string(event, '$.params.response.status')) as status,
71
+ MAX(json_extract_string(event, '$.params.response.statusText')) as status_text,
72
+ MAX(json_extract(event, '$.params.response.headers')) as response_headers,
73
+ MAX(json_extract_string(event, '$.params.response.mimeType')) as mime_type,
74
+ MAX(json_extract(event, '$.params.response.timing')) as timing
75
+ FROM events
76
+ WHERE method = 'Network.responseReceived'
77
+ GROUP BY json_extract_string(event, '$.params.requestId')
78
+ ),
79
+
80
+ -- HTTP Finished: timing and size
81
+ http_finished AS (
82
+ SELECT
83
+ json_extract_string(event, '$.params.requestId') as request_id,
84
+ MAX(json_extract_string(event, '$.params.timestamp')) as finished_timestamp,
85
+ MAX(json_extract_string(event, '$.params.encodedDataLength')) as final_size
86
+ FROM events
87
+ WHERE method = 'Network.loadingFinished'
88
+ GROUP BY json_extract_string(event, '$.params.requestId')
89
+ ),
90
+
91
+ -- HTTP Failed: error info
92
+ http_failed AS (
93
+ SELECT
94
+ json_extract_string(event, '$.params.requestId') as request_id,
95
+ MAX(json_extract_string(event, '$.params.errorText')) as error_text
96
+ FROM events
97
+ WHERE method = 'Network.loadingFailed'
98
+ GROUP BY json_extract_string(event, '$.params.requestId')
99
+ ),
100
+
101
+ -- Request ExtraInfo: raw headers with cookies (before browser sanitization)
102
+ request_extra AS (
103
+ SELECT
104
+ json_extract_string(event, '$.params.requestId') as request_id,
105
+ MAX(json_extract(event, '$.params.headers')) as raw_headers,
106
+ MAX(json_extract(event, '$.params.associatedCookies')) as cookies
107
+ FROM events
108
+ WHERE method = 'Network.requestWillBeSentExtraInfo'
109
+ GROUP BY json_extract_string(event, '$.params.requestId')
110
+ ),
111
+
112
+ -- Response ExtraInfo: Set-Cookie headers and true status
113
+ response_extra AS (
114
+ SELECT
115
+ json_extract_string(event, '$.params.requestId') as request_id,
116
+ MAX(json_extract(event, '$.params.headers')) as raw_headers,
117
+ MAX(json_extract_string(event, '$.params.statusCode')) as true_status
118
+ FROM events
119
+ WHERE method = 'Network.responseReceivedExtraInfo'
120
+ GROUP BY json_extract_string(event, '$.params.requestId')
121
+ ),
122
+
123
+ -- WebSocket Created
124
+ ws_created AS (
125
+ SELECT
126
+ json_extract_string(event, '$.params.requestId') as request_id,
127
+ MIN(rowid) as first_rowid,
128
+ 'websocket' as protocol,
129
+ MAX(json_extract_string(event, '$.params.url')) as url
130
+ FROM events
131
+ WHERE method = 'Network.webSocketCreated'
132
+ GROUP BY json_extract_string(event, '$.params.requestId')
133
+ ),
134
+
135
+ -- WebSocket Handshake
136
+ ws_handshake AS (
137
+ SELECT
138
+ json_extract_string(event, '$.params.requestId') as request_id,
139
+ MAX(json_extract_string(event, '$.params.wallTime')) as started_datetime,
140
+ MAX(json_extract_string(event, '$.params.timestamp')) as started_timestamp,
141
+ MAX(json_extract(event, '$.params.request.headers')) as request_headers,
142
+ MAX(json_extract_string(event, '$.params.response.status')) as status,
143
+ MAX(json_extract(event, '$.params.response.headers')) as response_headers
144
+ FROM events
145
+ WHERE method IN ('Network.webSocketWillSendHandshakeRequest', 'Network.webSocketHandshakeResponseReceived')
146
+ GROUP BY json_extract_string(event, '$.params.requestId')
147
+ ),
148
+
149
+ -- WebSocket Frame Stats (aggregated)
150
+ ws_frames AS (
151
+ SELECT
152
+ json_extract_string(event, '$.params.requestId') as request_id,
153
+ SUM(CASE WHEN method = 'Network.webSocketFrameSent' THEN 1 ELSE 0 END) as frames_sent,
154
+ SUM(CASE WHEN method = 'Network.webSocketFrameReceived' THEN 1 ELSE 0 END) as frames_received,
155
+ SUM(LENGTH(COALESCE(json_extract_string(event, '$.params.response.payloadData'), ''))) as total_bytes
156
+ FROM events
157
+ WHERE method IN ('Network.webSocketFrameSent', 'Network.webSocketFrameReceived')
158
+ GROUP BY json_extract_string(event, '$.params.requestId')
159
+ ),
160
+
161
+ -- WebSocket Closed
162
+ ws_closed AS (
163
+ SELECT
164
+ json_extract_string(event, '$.params.requestId') as request_id,
165
+ MAX(json_extract_string(event, '$.params.timestamp')) as closed_timestamp
166
+ FROM events
167
+ WHERE method = 'Network.webSocketClosed'
168
+ GROUP BY json_extract_string(event, '$.params.requestId')
169
+ ),
170
+
171
+ -- Combine HTTP entries
172
+ http_entries AS (
173
+ SELECT
174
+ req.first_rowid as id,
175
+ req.request_id,
176
+ req.protocol,
177
+ req.method,
178
+ req.url,
179
+ -- Use Fetch status if paused, then ExtraInfo true status, then Network status
180
+ CAST(COALESCE(ap.fetch_status, respx.true_status, resp.status, '0') AS INTEGER) as status,
181
+ resp.status_text,
182
+ req.resource_type as type,
183
+ CAST(COALESCE(fin.final_size, '0') AS INTEGER) as size,
184
+ CASE
185
+ WHEN fin.finished_timestamp IS NOT NULL
186
+ THEN CAST((CAST(fin.finished_timestamp AS DOUBLE) - CAST(req.started_timestamp AS DOUBLE)) * 1000 AS INTEGER)
187
+ ELSE NULL
188
+ END as time_ms,
189
+ -- State priority: paused > failed > complete > loading > pending
190
+ CASE
191
+ WHEN ap.paused_id IS NOT NULL THEN 'paused'
192
+ WHEN fail.error_text IS NOT NULL THEN 'failed'
193
+ WHEN fin.finished_timestamp IS NOT NULL THEN 'complete'
194
+ WHEN resp.status IS NOT NULL THEN 'loading'
195
+ ELSE 'pending'
196
+ END as state,
197
+ ap.pause_stage,
198
+ ap.paused_id,
199
+ -- Prefer raw headers from ExtraInfo (includes Cookie header)
200
+ COALESCE(reqx.raw_headers, req.request_headers) as request_headers,
201
+ req.post_data,
202
+ -- Prefer raw headers from ExtraInfo (includes Set-Cookie), then Fetch headers
203
+ COALESCE(respx.raw_headers, ap.fetch_response_headers, resp.response_headers) as response_headers,
204
+ resp.mime_type,
205
+ resp.timing,
206
+ fail.error_text,
207
+ -- Cookie details from ExtraInfo (httpOnly, Secure, SameSite attributes)
208
+ reqx.cookies as request_cookies,
209
+ CAST(NULL AS BIGINT) as frames_sent,
210
+ CAST(NULL AS BIGINT) as frames_received,
211
+ CAST(NULL AS BIGINT) as ws_total_bytes
212
+ FROM http_requests req
213
+ LEFT JOIN request_extra reqx ON req.request_id = reqx.request_id
214
+ LEFT JOIN http_responses resp ON req.request_id = resp.request_id
215
+ LEFT JOIN response_extra respx ON req.request_id = respx.request_id
216
+ LEFT JOIN http_finished fin ON req.request_id = fin.request_id
217
+ LEFT JOIN http_failed fail ON req.request_id = fail.request_id
218
+ LEFT JOIN active_paused ap ON req.request_id = ap.network_id
219
+ ),
220
+
221
+ -- Combine WebSocket entries
222
+ websocket_entries AS (
223
+ SELECT
224
+ ws.first_rowid as id,
225
+ ws.request_id,
226
+ ws.protocol,
227
+ 'WS' as method,
228
+ ws.url,
229
+ CAST(COALESCE(hs.status, '101') AS INTEGER) as status,
230
+ CAST(NULL AS VARCHAR) as status_text,
231
+ 'WebSocket' as type,
232
+ CAST(COALESCE(wf.total_bytes, 0) AS INTEGER) as size,
233
+ CASE
234
+ WHEN wc.closed_timestamp IS NOT NULL
235
+ THEN CAST((CAST(wc.closed_timestamp AS DOUBLE) - CAST(hs.started_timestamp AS DOUBLE)) * 1000 AS INTEGER)
236
+ ELSE NULL
237
+ END as time_ms,
238
+ CASE
239
+ WHEN wc.closed_timestamp IS NOT NULL THEN 'closed'
240
+ WHEN hs.status IS NOT NULL THEN 'open'
241
+ ELSE 'connecting'
242
+ END as state,
243
+ CAST(NULL AS VARCHAR) as pause_stage,
244
+ CAST(NULL AS BIGINT) as paused_id,
245
+ hs.request_headers,
246
+ CAST(NULL AS VARCHAR) as post_data,
247
+ hs.response_headers,
248
+ 'websocket' as mime_type,
249
+ CAST(NULL AS JSON) as timing,
250
+ CAST(NULL AS VARCHAR) as error_text,
251
+ CAST(NULL AS JSON) as request_cookies,
252
+ wf.frames_sent,
253
+ wf.frames_received,
254
+ wf.total_bytes as ws_total_bytes
255
+ FROM ws_created ws
256
+ LEFT JOIN ws_handshake hs ON ws.request_id = hs.request_id
257
+ LEFT JOIN ws_frames wf ON ws.request_id = wf.request_id
258
+ LEFT JOIN ws_closed wc ON ws.request_id = wc.request_id
259
+ )
260
+
261
+ SELECT * FROM http_entries
262
+ UNION ALL
263
+ SELECT * FROM websocket_entries
264
+ ORDER BY id DESC
265
+ """
266
+
267
+ # HAR summary view - lightweight list for network() command
268
+ _HAR_SUMMARY_SQL = """
269
+ CREATE OR REPLACE VIEW har_summary AS
270
+ SELECT
271
+ id,
272
+ request_id,
273
+ protocol,
274
+ method,
275
+ status,
276
+ url,
277
+ type,
278
+ size,
279
+ time_ms,
280
+ state,
281
+ pause_stage,
282
+ paused_id,
283
+ frames_sent,
284
+ frames_received
285
+ FROM har_entries
286
+ """
287
+
288
+
289
+ def create_har_views(db_execute) -> None:
290
+ """Create HAR-based aggregation views in DuckDB.
291
+
292
+ Creates har_entries and har_summary views for network request aggregation.
293
+
294
+ Args:
295
+ db_execute: Function to execute SQL (session._db_execute)
296
+ """
297
+ db_execute(_HAR_ENTRIES_SQL, wait_result=True)
298
+ db_execute(_HAR_SUMMARY_SQL, wait_result=True)
299
+ logger.debug("HAR views created")
300
+
301
+
302
+ __all__ = ["create_har_views"]
@@ -0,0 +1,41 @@
1
+ # Chrome DevTools Protocol Schema
2
+
3
+ This directory contains the CDP protocol schema and version information fetched from Chrome.
4
+
5
+ ## Files
6
+
7
+ - `cdp_protocol.json` - Full CDP protocol schema with all domains, commands, events, and types
8
+ - `cdp_version.json` - Chrome version and protocol version information
9
+
10
+ ## Fetching Latest Schema
11
+
12
+ To update these files with the latest protocol from your Chrome instance:
13
+
14
+ ```bash
15
+ # Ensure Chrome is running with debugging enabled:
16
+ # google-chrome --remote-debugging-port=9222
17
+
18
+ # Fetch protocol schema
19
+ curl -s http://localhost:9222/json/protocol > cdp_protocol.json
20
+
21
+ # Fetch version info
22
+ curl -s http://localhost:9222/json/version | jq '.' > cdp_version.json
23
+ ```
24
+
25
+ ## Using the Schema
26
+
27
+ These files are useful for:
28
+ - Understanding available CDP commands and their parameters
29
+ - Debugging protocol issues
30
+ - Validating command usage
31
+ - Discovering new CDP features
32
+
33
+ ## Example: Finding Fetch Commands
34
+
35
+ ```bash
36
+ # List all Fetch domain commands
37
+ cat cdp_protocol.json | jq '.domains[] | select(.domain == "Fetch") | .commands[].name'
38
+
39
+ # Get details for a specific command
40
+ cat cdp_protocol.json | jq '.domains[] | select(.domain == "Fetch") | .commands[] | select(.name == "continueResponse")'
41
+ ```