webtap-tool 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- webtap/VISION.md +246 -0
- webtap/__init__.py +84 -0
- webtap/__main__.py +6 -0
- webtap/api/__init__.py +9 -0
- webtap/api/app.py +26 -0
- webtap/api/models.py +69 -0
- webtap/api/server.py +111 -0
- webtap/api/sse.py +182 -0
- webtap/api/state.py +89 -0
- webtap/app.py +79 -0
- webtap/cdp/README.md +275 -0
- webtap/cdp/__init__.py +12 -0
- webtap/cdp/har.py +302 -0
- webtap/cdp/schema/README.md +41 -0
- webtap/cdp/schema/cdp_protocol.json +32785 -0
- webtap/cdp/schema/cdp_version.json +8 -0
- webtap/cdp/session.py +667 -0
- webtap/client.py +81 -0
- webtap/commands/DEVELOPER_GUIDE.md +401 -0
- webtap/commands/TIPS.md +269 -0
- webtap/commands/__init__.py +29 -0
- webtap/commands/_builders.py +331 -0
- webtap/commands/_code_generation.py +110 -0
- webtap/commands/_tips.py +147 -0
- webtap/commands/_utils.py +273 -0
- webtap/commands/connection.py +220 -0
- webtap/commands/console.py +87 -0
- webtap/commands/fetch.py +310 -0
- webtap/commands/filters.py +116 -0
- webtap/commands/javascript.py +73 -0
- webtap/commands/js_export.py +73 -0
- webtap/commands/launch.py +72 -0
- webtap/commands/navigation.py +197 -0
- webtap/commands/network.py +136 -0
- webtap/commands/quicktype.py +306 -0
- webtap/commands/request.py +93 -0
- webtap/commands/selections.py +138 -0
- webtap/commands/setup.py +219 -0
- webtap/commands/to_model.py +163 -0
- webtap/daemon.py +185 -0
- webtap/daemon_state.py +53 -0
- webtap/filters.py +219 -0
- webtap/rpc/__init__.py +14 -0
- webtap/rpc/errors.py +49 -0
- webtap/rpc/framework.py +223 -0
- webtap/rpc/handlers.py +625 -0
- webtap/rpc/machine.py +84 -0
- webtap/services/README.md +83 -0
- webtap/services/__init__.py +15 -0
- webtap/services/console.py +124 -0
- webtap/services/dom.py +547 -0
- webtap/services/fetch.py +415 -0
- webtap/services/main.py +392 -0
- webtap/services/network.py +401 -0
- webtap/services/setup/__init__.py +185 -0
- webtap/services/setup/chrome.py +233 -0
- webtap/services/setup/desktop.py +255 -0
- webtap/services/setup/extension.py +147 -0
- webtap/services/setup/platform.py +162 -0
- webtap/services/state_snapshot.py +86 -0
- webtap_tool-0.11.0.dist-info/METADATA +535 -0
- webtap_tool-0.11.0.dist-info/RECORD +64 -0
- webtap_tool-0.11.0.dist-info/WHEEL +4 -0
- webtap_tool-0.11.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,535 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: webtap-tool
|
|
3
|
+
Version: 0.11.0
|
|
4
|
+
Summary: Terminal-based web page inspector for AI debugging sessions
|
|
5
|
+
Author-email: Fredrik Angelsen <fredrikangelsen@gmail.com>
|
|
6
|
+
Classifier: Development Status :: 3 - Alpha
|
|
7
|
+
Classifier: Operating System :: MacOS :: MacOS X
|
|
8
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
10
|
+
Classifier: Topic :: Internet :: WWW/HTTP :: Browsers
|
|
11
|
+
Classifier: Topic :: Software Development :: Debuggers
|
|
12
|
+
Requires-Python: >=3.12
|
|
13
|
+
Requires-Dist: beautifulsoup4>=4.13.5
|
|
14
|
+
Requires-Dist: cryptography>=45.0.6
|
|
15
|
+
Requires-Dist: datamodel-code-generator>=0.35.0
|
|
16
|
+
Requires-Dist: duckdb>=1.3.2
|
|
17
|
+
Requires-Dist: fastapi>=0.116.1
|
|
18
|
+
Requires-Dist: httpx>=0.28.1
|
|
19
|
+
Requires-Dist: lxml>=6.0.1
|
|
20
|
+
Requires-Dist: msgpack-python>=0.5.6
|
|
21
|
+
Requires-Dist: platformdirs>=4.4.0
|
|
22
|
+
Requires-Dist: protobuf>=6.32.0
|
|
23
|
+
Requires-Dist: pyjwt>=2.10.1
|
|
24
|
+
Requires-Dist: pyyaml>=6.0.2
|
|
25
|
+
Requires-Dist: replkit2[all]>=0.12.0
|
|
26
|
+
Requires-Dist: requests>=2.32.4
|
|
27
|
+
Requires-Dist: transitions>=0.9.3
|
|
28
|
+
Requires-Dist: uvicorn>=0.35.0
|
|
29
|
+
Requires-Dist: websocket-client>=1.8.0
|
|
30
|
+
Requires-Dist: websockets>=15.0.1
|
|
31
|
+
Description-Content-Type: text/markdown
|
|
32
|
+
|
|
33
|
+
# webtap
|
|
34
|
+
|
|
35
|
+
Browser debugging via Chrome DevTools Protocol with native event storage and dynamic querying.
|
|
36
|
+
|
|
37
|
+
## ✨ Features
|
|
38
|
+
|
|
39
|
+
- 🔍 **Native CDP Storage** - Events stored exactly as received in DuckDB
|
|
40
|
+
- 🎯 **Dynamic Field Discovery** - Automatically indexes all field paths from events
|
|
41
|
+
- 🚫 **Smart Filtering** - Built-in filters for ads, tracking, analytics noise
|
|
42
|
+
- 📊 **SQL Querying** - Direct DuckDB access for complex analysis
|
|
43
|
+
- 🔌 **MCP Ready** - Tools and resources for Claude/LLMs
|
|
44
|
+
- 🎨 **Rich Display** - Tables, alerts, and formatted output
|
|
45
|
+
- 🐍 **Python Inspection** - Full Python environment for data exploration
|
|
46
|
+
|
|
47
|
+
## 📋 Prerequisites
|
|
48
|
+
|
|
49
|
+
Required system dependencies:
|
|
50
|
+
- **google-chrome-stable** or **chromium** - Browser with DevTools Protocol support
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
# macOS
|
|
54
|
+
brew install --cask google-chrome
|
|
55
|
+
|
|
56
|
+
# Ubuntu/Debian
|
|
57
|
+
wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | sudo apt-key add -
|
|
58
|
+
sudo sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list'
|
|
59
|
+
sudo apt update
|
|
60
|
+
sudo apt install google-chrome-stable
|
|
61
|
+
|
|
62
|
+
# Arch Linux
|
|
63
|
+
yay -S google-chrome # or google-chrome-stable from AUR
|
|
64
|
+
|
|
65
|
+
# Fedora
|
|
66
|
+
sudo dnf install google-chrome-stable
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## 📦 Installation
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
# Install via uv tool (recommended)
|
|
73
|
+
uv tool install webtap-tool
|
|
74
|
+
|
|
75
|
+
# Or with pipx
|
|
76
|
+
pipx install webtap-tool
|
|
77
|
+
|
|
78
|
+
# Update to latest
|
|
79
|
+
uv tool upgrade webtap-tool
|
|
80
|
+
|
|
81
|
+
# Uninstall
|
|
82
|
+
uv tool uninstall webtap-tool
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## 🚀 Quick Start
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
# 1. Install webtap
|
|
89
|
+
uv tool install webtap-tool
|
|
90
|
+
|
|
91
|
+
# 2. Optional: Setup helpers (first time only)
|
|
92
|
+
webtap --cli setup-filters # Download default filter configurations
|
|
93
|
+
webtap --cli setup-extension # Download Chrome extension files
|
|
94
|
+
webtap --cli setup-chrome # Install Chrome wrapper for debugging
|
|
95
|
+
|
|
96
|
+
# 3. Launch Chrome with debugging
|
|
97
|
+
webtap --cli run-chrome # Or manually: google-chrome-stable --remote-debugging-port=9222
|
|
98
|
+
|
|
99
|
+
# 4. Start webtap REPL (auto-starts daemon)
|
|
100
|
+
webtap
|
|
101
|
+
|
|
102
|
+
# 5. Connect and explore
|
|
103
|
+
>>> pages() # List available Chrome pages
|
|
104
|
+
>>> connect(0) # Connect to first page
|
|
105
|
+
>>> network() # View network requests (filtered)
|
|
106
|
+
>>> network(url="*api*") # Filter by URL pattern
|
|
107
|
+
>>> request(123, ["response.content"]) # Get response body by row ID
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## 🔌 MCP Setup for Claude
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
# Quick setup with Claude CLI
|
|
114
|
+
claude mcp add webtap -- webtap --mcp
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
Or manually configure Claude Desktop (`~/Library/Application Support/Claude/claude_desktop_config.json`):
|
|
118
|
+
```json
|
|
119
|
+
{
|
|
120
|
+
"mcpServers": {
|
|
121
|
+
"webtap": {
|
|
122
|
+
"command": "webtap",
|
|
123
|
+
"args": ["--mcp"]
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## 🎮 Usage
|
|
130
|
+
|
|
131
|
+
### Interactive REPL
|
|
132
|
+
```bash
|
|
133
|
+
webtap # Start REPL
|
|
134
|
+
webtap --mcp # Start as MCP server
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### CLI Commands
|
|
138
|
+
```bash
|
|
139
|
+
webtap --cli setup-filters # Download filter configurations
|
|
140
|
+
webtap --cli setup-extension # Download Chrome extension
|
|
141
|
+
webtap --cli setup-chrome # Install Chrome wrapper script
|
|
142
|
+
webtap --cli run-chrome # Launch Chrome with debugging
|
|
143
|
+
webtap --cli --help # Show all CLI commands
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### Commands
|
|
147
|
+
```python
|
|
148
|
+
>>> pages() # List available Chrome pages
|
|
149
|
+
>>> connect(0) # Connect to first page
|
|
150
|
+
>>> network() # View network requests (filtered)
|
|
151
|
+
>>> network(status=404, url="*api*") # Filter by status and URL
|
|
152
|
+
>>> console() # View console messages
|
|
153
|
+
>>> request(123, ["response.content"]) # Get response body by row ID
|
|
154
|
+
>>> request(123, ["*"]) # Get full HAR entry
|
|
155
|
+
>>> js("document.title") # Execute JavaScript
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
### Command Reference
|
|
159
|
+
|
|
160
|
+
| Command | Description |
|
|
161
|
+
|---------|------------|
|
|
162
|
+
| `pages()` | List available Chrome pages |
|
|
163
|
+
| `connect(page=0)` | Connect to page by index |
|
|
164
|
+
| `disconnect()` | Disconnect from current page |
|
|
165
|
+
| `navigate(url)` | Navigate to URL |
|
|
166
|
+
| `network(status, url, type, method)` | View network requests with filters |
|
|
167
|
+
| `console(level, limit)` | View console messages |
|
|
168
|
+
| `request(id, fields, expr)` | Get HAR request details with field selection |
|
|
169
|
+
| `js(code, await_promise, persist)` | Execute JavaScript |
|
|
170
|
+
| `filters(add, remove, enable, disable)` | Manage noise filters |
|
|
171
|
+
| `fetch(action, options)` | Control request interception |
|
|
172
|
+
| `to_model(id, output, model_name)` | Generate Pydantic models from responses |
|
|
173
|
+
| `quicktype(id, output, type_name)` | Generate TypeScript/Go/Rust types |
|
|
174
|
+
| `clear(events, console)` | Clear events/console |
|
|
175
|
+
|
|
176
|
+
## Core Commands
|
|
177
|
+
|
|
178
|
+
### Connection & Navigation
|
|
179
|
+
```python
|
|
180
|
+
pages() # List Chrome pages
|
|
181
|
+
connect(0) # Connect by index (shorthand)
|
|
182
|
+
connect(page=1) # Connect by index (explicit)
|
|
183
|
+
connect(page_id="xyz") # Connect by page ID
|
|
184
|
+
disconnect() # Disconnect from current page
|
|
185
|
+
navigate("https://...") # Navigate to URL
|
|
186
|
+
reload(ignore_cache=False) # Reload page
|
|
187
|
+
back() / forward() # Navigate history
|
|
188
|
+
page() # Show current page info
|
|
189
|
+
status() # Show connection and daemon status
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
### Network Monitoring
|
|
193
|
+
```python
|
|
194
|
+
network() # Filtered network requests (default)
|
|
195
|
+
network(all=True) # Show everything (bypass filters)
|
|
196
|
+
network(status=404) # Filter by HTTP status
|
|
197
|
+
network(method="POST") # Filter by HTTP method
|
|
198
|
+
network(type="xhr") # Filter by resource type
|
|
199
|
+
network(url="*api*") # Filter by URL pattern
|
|
200
|
+
network(status=200, url="*graphql*") # Combine filters
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
### Request Inspection
|
|
204
|
+
```python
|
|
205
|
+
# Get HAR request details by row ID from network() output
|
|
206
|
+
request(123) # Minimal view (method, url, status)
|
|
207
|
+
request(123, ["*"]) # Full HAR entry
|
|
208
|
+
request(123, ["request.headers.*"]) # Request headers only
|
|
209
|
+
request(123, ["response.content"]) # Fetch response body
|
|
210
|
+
request(123, ["request.postData", "response.content"]) # Both bodies
|
|
211
|
+
|
|
212
|
+
# With Python expression evaluation
|
|
213
|
+
request(123, ["response.content"], expr="json.loads(data['response']['content']['text'])")
|
|
214
|
+
request(123, ["response.content"], expr="BeautifulSoup(data['response']['content']['text'], 'html.parser').title")
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
### Code Generation
|
|
218
|
+
```python
|
|
219
|
+
# Generate Pydantic models from response bodies
|
|
220
|
+
to_model(123, "models/user.py", "User")
|
|
221
|
+
to_model(123, "models/user.py", "User", json_path="data[0]") # Extract nested
|
|
222
|
+
|
|
223
|
+
# Generate TypeScript/Go/Rust/etc types
|
|
224
|
+
quicktype(123, "types/user.ts", "User")
|
|
225
|
+
quicktype(123, "api.go", "ApiResponse")
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
### Filter Management
|
|
229
|
+
```python
|
|
230
|
+
filters() # Show all filter groups
|
|
231
|
+
filters(add="myfilter", hide={"urls": ["*ads*"]}) # Create filter group
|
|
232
|
+
filters(enable="myfilter") # Enable group
|
|
233
|
+
filters(disable="myfilter") # Disable group
|
|
234
|
+
filters(remove="myfilter") # Delete group
|
|
235
|
+
|
|
236
|
+
# Built-in groups: ads, tracking, analytics, telemetry, cdn, fonts, images
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
### Request Interception
|
|
240
|
+
```python
|
|
241
|
+
fetch("status") # Check interception status
|
|
242
|
+
fetch("enable") # Enable request interception
|
|
243
|
+
fetch("enable", {"response": True}) # Intercept responses too
|
|
244
|
+
fetch("disable") # Disable interception
|
|
245
|
+
requests() # Show paused requests
|
|
246
|
+
resume(123) # Continue paused request
|
|
247
|
+
resume(123, modifications={"url": "..."}) # Modify and continue
|
|
248
|
+
fail(123) # Block the request
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
### Console & JavaScript
|
|
252
|
+
```python
|
|
253
|
+
console() # View console messages
|
|
254
|
+
console(level="error") # Filter by level
|
|
255
|
+
js("document.title") # Evaluate JavaScript (returns value)
|
|
256
|
+
js("fetch('/api').then(r=>r.json())", await_promise=True) # Async operations
|
|
257
|
+
js("var x = 1; x + 1", persist=True) # Multi-statement (global scope)
|
|
258
|
+
js("element.offsetWidth", selection=1) # Use browser-selected element
|
|
259
|
+
clear() # Clear events (default)
|
|
260
|
+
clear(console=True) # Clear browser console
|
|
261
|
+
clear(events=True, console=True) # Clear everything
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
## Architecture
|
|
265
|
+
|
|
266
|
+
### Daemon-Based Architecture
|
|
267
|
+
|
|
268
|
+
```
|
|
269
|
+
REPL / MCP Client (webtap)
|
|
270
|
+
↓ JSON-RPC 2.0 (localhost:8765/rpc)
|
|
271
|
+
WebTap Daemon (background process)
|
|
272
|
+
├── FastAPI Server + RPCFramework
|
|
273
|
+
│ └── Single /rpc endpoint (22 methods)
|
|
274
|
+
├── ConnectionMachine (state lifecycle)
|
|
275
|
+
↓
|
|
276
|
+
Service Layer (WebTapService)
|
|
277
|
+
├── NetworkService - Request filtering
|
|
278
|
+
├── ConsoleService - Message handling
|
|
279
|
+
├── FetchService - Request interception
|
|
280
|
+
└── DOMService - Element selection
|
|
281
|
+
↓
|
|
282
|
+
CDPSession + DuckDB
|
|
283
|
+
├── events table (method-indexed)
|
|
284
|
+
└── HAR views (pre-aggregated)
|
|
285
|
+
↓ WebSocket
|
|
286
|
+
Chrome Browser (--remote-debugging-port=9222)
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
### How It Works
|
|
290
|
+
|
|
291
|
+
1. **Daemon manages CDP** - Background process holds WebSocket connection
|
|
292
|
+
2. **Events stored as-is** - No transformation, full CDP data preserved in DuckDB
|
|
293
|
+
3. **HAR views pre-aggregated** - Network requests correlated for fast querying
|
|
294
|
+
4. **Method-indexed events** - O(1) filtering by CDP event type
|
|
295
|
+
5. **On-demand body fetching** - Response bodies fetched only when requested
|
|
296
|
+
6. **Clients are stateless** - REPL/MCP communicate via HTTP to daemon
|
|
297
|
+
|
|
298
|
+
## Advanced Usage
|
|
299
|
+
|
|
300
|
+
### Daemon Management
|
|
301
|
+
```bash
|
|
302
|
+
webtap --daemon # Start daemon in foreground (for debugging)
|
|
303
|
+
webtap --daemon status # Show daemon status (PID, connected page, events)
|
|
304
|
+
webtap --daemon stop # Stop running daemon
|
|
305
|
+
```
|
|
306
|
+
|
|
307
|
+
### Expression Evaluation
|
|
308
|
+
The `request()` command supports Python expressions with pre-imported libraries:
|
|
309
|
+
```python
|
|
310
|
+
# Libraries available: json, re, bs4/BeautifulSoup, lxml, jwt, yaml, httpx, etc.
|
|
311
|
+
request(123, ["response.content"], expr="json.loads(data['response']['content']['text'])")
|
|
312
|
+
request(123, ["response.content"], expr="BeautifulSoup(data['response']['content']['text'], 'html.parser').find_all('a')")
|
|
313
|
+
request(123, ["response.content"], expr="jwt.decode(data['response']['content']['text'], options={'verify_signature': False})")
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
### Browser Element Selection
|
|
317
|
+
Use the Chrome extension to select DOM elements, then access them:
|
|
318
|
+
```python
|
|
319
|
+
selections() # View all selected elements
|
|
320
|
+
selections(expr="data['selections']['1']") # Get element #1 data
|
|
321
|
+
js("element.offsetWidth", selection=1) # Run JS on selected element
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
### Direct CDP Commands via JavaScript
|
|
325
|
+
```python
|
|
326
|
+
# Execute any CDP operation via js()
|
|
327
|
+
js("await fetch('/api/data').then(r => r.json())", await_promise=True)
|
|
328
|
+
```
|
|
329
|
+
|
|
330
|
+
### Chrome Extension
|
|
331
|
+
|
|
332
|
+
Install the extension from `packages/webtap/extension/`:
|
|
333
|
+
1. Open `chrome://extensions/`
|
|
334
|
+
2. Enable Developer mode
|
|
335
|
+
3. Load unpacked → Select extension folder
|
|
336
|
+
4. Click extension icon to connect to pages
|
|
337
|
+
|
|
338
|
+
## Examples
|
|
339
|
+
|
|
340
|
+
### List and Connect to Pages
|
|
341
|
+
```python
|
|
342
|
+
>>> pages()
|
|
343
|
+
## Chrome Pages
|
|
344
|
+
|
|
345
|
+
| Index | Title | URL | ID | Connected |
|
|
346
|
+
|:------|:---------------------|:-------------------------------|:-------|:----------|
|
|
347
|
+
| 0 | Messenger | https://www.m...1743198803269/ | DC8... | No |
|
|
348
|
+
| 1 | GitHub - replkit2 | https://githu...elsen/replkit2 | DD4... | No |
|
|
349
|
+
| 2 | YouTube Music | https://music.youtube.com/ | F83... | No |
|
|
350
|
+
|
|
351
|
+
_3 pages available_
|
|
352
|
+
|
|
353
|
+
>>> connect(1)
|
|
354
|
+
## Connection Established
|
|
355
|
+
|
|
356
|
+
**Page:** GitHub - angelsen/replkit2
|
|
357
|
+
**URL:** https://github.com/angelsen/replkit2
|
|
358
|
+
```
|
|
359
|
+
|
|
360
|
+
### Monitor Network Traffic
|
|
361
|
+
```python
|
|
362
|
+
>>> network()
|
|
363
|
+
## Network Requests
|
|
364
|
+
|
|
365
|
+
| ID | Method | Status | URL | Type | Size |
|
|
366
|
+
|:-----|:-------|:-------|:------------------------------------------------|:---------|:-----|
|
|
367
|
+
| 3264 | GET | 200 | https://api.github.com/graphql | Fetch | 22KB |
|
|
368
|
+
| 2315 | GET | 200 | https://api.github.com/repos/angelsen/replkit2 | Fetch | 16KB |
|
|
369
|
+
| 359 | GET | 200 | https://github.githubassets.com/assets/app.js | Script | 21KB |
|
|
370
|
+
|
|
371
|
+
_3 requests_
|
|
372
|
+
|
|
373
|
+
>>> # Filter by URL pattern
|
|
374
|
+
>>> network(url="*api*")
|
|
375
|
+
|
|
376
|
+
>>> # Filter by status code
|
|
377
|
+
>>> network(status=404)
|
|
378
|
+
|
|
379
|
+
>>> # Combine filters
|
|
380
|
+
>>> network(method="POST", url="*graphql*")
|
|
381
|
+
```
|
|
382
|
+
|
|
383
|
+
### Inspect Request Details
|
|
384
|
+
```python
|
|
385
|
+
>>> # Get response body
|
|
386
|
+
>>> request(3264, ["response.content"])
|
|
387
|
+
|
|
388
|
+
>>> # Parse JSON response
|
|
389
|
+
>>> request(3264, ["response.content"], expr="json.loads(data['response']['content']['text'])")
|
|
390
|
+
{'viewer': {'login': 'octocat', 'name': 'The Octocat'}}
|
|
391
|
+
|
|
392
|
+
>>> # Get full HAR entry
|
|
393
|
+
>>> request(3264, ["*"])
|
|
394
|
+
|
|
395
|
+
>>> # Get just headers
|
|
396
|
+
>>> request(3264, ["request.headers.*", "response.headers.*"])
|
|
397
|
+
```
|
|
398
|
+
|
|
399
|
+
### Generate Types from API Responses
|
|
400
|
+
```python
|
|
401
|
+
>>> # Generate Pydantic model
|
|
402
|
+
>>> to_model(3264, "models/github_response.py", "GitHubResponse")
|
|
403
|
+
Model written to models/github_response.py
|
|
404
|
+
|
|
405
|
+
>>> # Generate TypeScript types
|
|
406
|
+
>>> quicktype(3264, "types/github.ts", "GitHubResponse")
|
|
407
|
+
Types written to types/github.ts
|
|
408
|
+
```
|
|
409
|
+
|
|
410
|
+
### View Console Messages
|
|
411
|
+
```python
|
|
412
|
+
>>> console()
|
|
413
|
+
## Console Messages
|
|
414
|
+
|
|
415
|
+
| ID | Level | Source | Message | Time |
|
|
416
|
+
|:-----|:-----------|:---------|:----------------------------------------------------------------|:---------|
|
|
417
|
+
| 5939 | WARNING | security | An iframe which has both allow-scripts and allow-same-origin... | 11:42:46 |
|
|
418
|
+
| 2319 | LOG | console | API request completed | 11:42:40 |
|
|
419
|
+
| 32 | ERROR | network | Failed to load resource: the server responded with a status... | 12:47:41 |
|
|
420
|
+
|
|
421
|
+
_3 messages_
|
|
422
|
+
|
|
423
|
+
>>> # Filter by level
|
|
424
|
+
>>> console(level="error")
|
|
425
|
+
```
|
|
426
|
+
|
|
427
|
+
### Intercept and Modify Requests
|
|
428
|
+
```python
|
|
429
|
+
>>> fetch("enable")
|
|
430
|
+
## Fetch Interception Enabled
|
|
431
|
+
|
|
432
|
+
>>> # Make a request in the browser - it will pause
|
|
433
|
+
>>> requests()
|
|
434
|
+
## Paused Requests
|
|
435
|
+
|
|
436
|
+
| ID | Stage | Method | URL |
|
|
437
|
+
|:----|:--------|:-------|:-----------------------|
|
|
438
|
+
| 47 | Request | GET | https://api.example.com|
|
|
439
|
+
|
|
440
|
+
>>> # Resume normally
|
|
441
|
+
>>> resume(47)
|
|
442
|
+
|
|
443
|
+
>>> # Or modify the request
|
|
444
|
+
>>> resume(47, modifications={"url": "https://api.example.com/v2"})
|
|
445
|
+
|
|
446
|
+
>>> # Or block it
|
|
447
|
+
>>> fail(47)
|
|
448
|
+
```
|
|
449
|
+
|
|
450
|
+
## Filter Configuration
|
|
451
|
+
|
|
452
|
+
WebTap includes aggressive default filters to reduce noise. Customize in `.webtap/filters.json`:
|
|
453
|
+
|
|
454
|
+
```json
|
|
455
|
+
{
|
|
456
|
+
"ads": {
|
|
457
|
+
"domains": ["*doubleclick*", "*googlesyndication*", "*adsystem*"],
|
|
458
|
+
"types": ["Ping", "Beacon"]
|
|
459
|
+
},
|
|
460
|
+
"tracking": {
|
|
461
|
+
"domains": ["*google-analytics*", "*segment*", "*mixpanel*"],
|
|
462
|
+
"types": ["Image", "Script"]
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
```
|
|
466
|
+
|
|
467
|
+
## Design Principles
|
|
468
|
+
|
|
469
|
+
1. **Store AS-IS** - No transformation of CDP events
|
|
470
|
+
2. **Query On-Demand** - Extract only what's needed
|
|
471
|
+
3. **Daemon Architecture** - Background process manages CDP connection
|
|
472
|
+
4. **HAR-First** - Pre-aggregated views for fast network queries
|
|
473
|
+
5. **Minimal Memory** - Store only CDP data
|
|
474
|
+
|
|
475
|
+
## Requirements
|
|
476
|
+
|
|
477
|
+
- Chrome/Chromium with debugging enabled (`--remote-debugging-port=9222`)
|
|
478
|
+
- Python 3.12+
|
|
479
|
+
- Dependencies: websocket-client, duckdb, replkit2, fastapi, uvicorn, beautifulsoup4
|
|
480
|
+
|
|
481
|
+
## 📚 Documentation
|
|
482
|
+
|
|
483
|
+
- [Vision](src/webtap/VISION.md) - Design philosophy
|
|
484
|
+
- [CDP Module](src/webtap/cdp/README.md) - CDP integration details
|
|
485
|
+
- [Commands Guide](src/webtap/commands/DEVELOPER_GUIDE.md) - Command development
|
|
486
|
+
- [Tips](src/webtap/commands/TIPS.md) - Command documentation and examples
|
|
487
|
+
|
|
488
|
+
## 🛠️ Development
|
|
489
|
+
|
|
490
|
+
```bash
|
|
491
|
+
# Clone repository
|
|
492
|
+
git clone https://github.com/angelsen/tap-tools
|
|
493
|
+
cd tap-tools
|
|
494
|
+
|
|
495
|
+
# Install for development
|
|
496
|
+
uv sync --package webtap
|
|
497
|
+
|
|
498
|
+
# Run development version
|
|
499
|
+
uv run --package webtap webtap
|
|
500
|
+
|
|
501
|
+
# Run tests and checks
|
|
502
|
+
make check # Type check
|
|
503
|
+
make format # Format code
|
|
504
|
+
make lint # Fix linting
|
|
505
|
+
```
|
|
506
|
+
|
|
507
|
+
## Daemon & API
|
|
508
|
+
|
|
509
|
+
WebTap uses a daemon architecture. The daemon auto-starts when you run `webtap` and manages:
|
|
510
|
+
|
|
511
|
+
- CDP WebSocket connection to Chrome
|
|
512
|
+
- DuckDB event storage
|
|
513
|
+
- FastAPI server on port 8765
|
|
514
|
+
|
|
515
|
+
### Daemon Commands
|
|
516
|
+
```bash
|
|
517
|
+
webtap --daemon # Start in foreground (debugging)
|
|
518
|
+
webtap --daemon status # Show status
|
|
519
|
+
webtap --daemon stop # Stop daemon
|
|
520
|
+
```
|
|
521
|
+
|
|
522
|
+
### API Endpoint (JSON-RPC 2.0)
|
|
523
|
+
Single endpoint: `POST /rpc`
|
|
524
|
+
|
|
525
|
+
```json
|
|
526
|
+
{"jsonrpc": "2.0", "method": "connect", "params": {"page": 0}, "id": 1}
|
|
527
|
+
{"jsonrpc": "2.0", "method": "network", "params": {"limit": 50}, "id": 2}
|
|
528
|
+
{"jsonrpc": "2.0", "method": "request", "params": {"id": 123}, "id": 3}
|
|
529
|
+
```
|
|
530
|
+
|
|
531
|
+
Methods: `connect`, `disconnect`, `pages`, `status`, `network`, `request`, `console`, `js`, `navigate`, `reload`, `fetch.enable`, `fetch.disable`, `fetch.resume`, `filters.*`, etc.
|
|
532
|
+
|
|
533
|
+
## 📄 License
|
|
534
|
+
|
|
535
|
+
MIT - see [LICENSE](../../LICENSE) for details.
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
webtap/VISION.md,sha256=v5BGE569TgNhVk0eT2r72VDl2v2H1ftvZIq3NGvXvx8,8484
|
|
2
|
+
webtap/__init__.py,sha256=Elo3kLyXaQHtqyHePoa9WqCkjGd9pDN-xSHN4ksRQNo,2573
|
|
3
|
+
webtap/__main__.py,sha256=rRO8H19Vw-7VYuadghGNDBWu9G9Ya76nCmws8qeNcPA,104
|
|
4
|
+
webtap/app.py,sha256=Cxo3PUBnI6XrBp9y3Z09AqWIM_cGfoZDkW0_ELDZ4Bw,2607
|
|
5
|
+
webtap/client.py,sha256=C5kH9ZSR6NskOkoE_JCoPdIhdkRj4AevLJ2aulM7f9U,2480
|
|
6
|
+
webtap/daemon.py,sha256=YXIjTx5FZgdH513HhARrCibYXJLlEwpI48lAwYroDhk,5157
|
|
7
|
+
webtap/daemon_state.py,sha256=t6SC679r8N4f-kRjBX5pdwVXA4YApfGSzg-GSOLB1fs,1491
|
|
8
|
+
webtap/filters.py,sha256=4fNIbco9EWvIEKr2WU5nHIYdLJVWyfJeKK1pyVB3xq4,7090
|
|
9
|
+
webtap/api/__init__.py,sha256=IAriW_o6cyq1_naFZxifqJ_vqlGKO3o4kkW_17a16z4,174
|
|
10
|
+
webtap/api/app.py,sha256=SH7bDbCtycOHBb80nqioAcl2dcEArp-9k7B7RTKV8eE,660
|
|
11
|
+
webtap/api/models.py,sha256=XKbtPEuQcUKYRQc3xbRx0LxO0ZEuUtA-9c1hI70z2sA,1660
|
|
12
|
+
webtap/api/server.py,sha256=UgzseFcSr4g5VZV6abahlWyQAeiL1R8NNKeyQxYA5XQ,3491
|
|
13
|
+
webtap/api/sse.py,sha256=wHZn33OxOXjLNAlEc5uJ7TxpmgERuaA4pnOfRsC_78o,5968
|
|
14
|
+
webtap/api/state.py,sha256=IW4oHAaJF-KdhupKf24BJ2UNIwsKZtSFHp22O8W-tRc,3464
|
|
15
|
+
webtap/cdp/README.md,sha256=rLNZkacaFSC_-OxQTeV3MYS3JkmRuEzycFmVcvY-qEY,6198
|
|
16
|
+
webtap/cdp/__init__.py,sha256=nM1kEcVkWR5S3nOttUJSiIY7TfixOYvbW5OcJlzdBrc,337
|
|
17
|
+
webtap/cdp/har.py,sha256=viomAcv0Ynz1COXBszDUteiwAuoI5hs9gS8Ji-n59ko,11406
|
|
18
|
+
webtap/cdp/session.py,sha256=0MZ5kqDaYkL6WspJlEP6Nt0XHs_oYe2CuqDnd5727d8,24176
|
|
19
|
+
webtap/cdp/schema/README.md,sha256=hnWCzbXYcYtWaZb_SgjVaFBiG81S9b9Y3x-euQFwQDo,1222
|
|
20
|
+
webtap/cdp/schema/cdp_protocol.json,sha256=dp9_OLYLuVsQb1oV5r6MZfMzURscBLyAXUckdaPWyv4,1488452
|
|
21
|
+
webtap/cdp/schema/cdp_version.json,sha256=OhGy1qpfQjSe3Z7OqL6KynBFlDFBXxKGPZCY-ZN_lVU,399
|
|
22
|
+
webtap/commands/DEVELOPER_GUIDE.md,sha256=cvJjuKeokJndf9uh6U6jYyQ1qNq7BXUbregbZC5pZyc,11950
|
|
23
|
+
webtap/commands/TIPS.md,sha256=6ilCGKGvAiVWXCHm-SzDxnZ15rrpwRsWJmp5BpMPY0U,12133
|
|
24
|
+
webtap/commands/__init__.py,sha256=nkUqitBfJhyohKftyUoSjkCdzkgGw2xtuojlg6Qz_UU,1280
|
|
25
|
+
webtap/commands/_builders.py,sha256=hQM2TcICI6iQX76BfYesbHBVrYSRloeobVJ8vLb2elM,10021
|
|
26
|
+
webtap/commands/_code_generation.py,sha256=uBpusbIGCWqm6HwaWSuhFnRu_J5jm4rJ7qmx-esogf4,3288
|
|
27
|
+
webtap/commands/_tips.py,sha256=XTCKrGhafqghpyl_iqPSSBsQpQiVzy6-8rugWt1aXwQ,4847
|
|
28
|
+
webtap/commands/_utils.py,sha256=PyeezX2ifWddDCJxwQ5LcQ4397L7Oiy_kgDElABjjjw,8393
|
|
29
|
+
webtap/commands/connection.py,sha256=p-0bF5T33WlYXHmrotfzgmpsgSew6ZjwJt6oROmzZP0,7346
|
|
30
|
+
webtap/commands/console.py,sha256=JGIoMTxVkT7m7gvFVaABNUzygFCQyhRhTfMCUK_HcUo,2770
|
|
31
|
+
webtap/commands/fetch.py,sha256=0DsjWwjDYtNFBVm00t2bvj7ioKGr1bs34srWoW4kg-8,11876
|
|
32
|
+
webtap/commands/filters.py,sha256=_1seXgHaLOfWsCyL88vNNcppqg3lp8jBqSMsx47JdPc,4418
|
|
33
|
+
webtap/commands/javascript.py,sha256=L9hEFCvYxQB4bTVNxlnNylp-YLD5EoD21JEPPRQlxGc,2843
|
|
34
|
+
webtap/commands/js_export.py,sha256=mRwUQi2nQs-Do3mi_Q3to5aqX79cOqPmb2G_xTTk-Os,2310
|
|
35
|
+
webtap/commands/launch.py,sha256=iZDLundKlxKRLKf3Vz5at42-tp2f-Uj5wZf7fbhBfA0,2202
|
|
36
|
+
webtap/commands/navigation.py,sha256=kEcZPq1pj-oXEMLioADNEF9X09dQjaD9_59JqvZSok0,5534
|
|
37
|
+
webtap/commands/network.py,sha256=6MWBQ_tZi9ZGkKdx3x9tSVj3WjWu_ORSHex02lVxeuQ,4784
|
|
38
|
+
webtap/commands/quicktype.py,sha256=vw0W3sWB2Rq4ii0Wzl3QL9OUXdg-VVlkEBXXCkNbqe8,9805
|
|
39
|
+
webtap/commands/request.py,sha256=LwNi-el2t17_Kbpj1zGcocMViBdLVJPL6nrwYcP3S0A,3555
|
|
40
|
+
webtap/commands/selections.py,sha256=RdqCs-a4GqL1cKRxFjkpprLUi5JriVtaVt05kuJI-6Q,5241
|
|
41
|
+
webtap/commands/setup.py,sha256=MMyvN9R5Q2g9gJHYc82BcerKWRwY09xjrG4Al9m7v6s,8663
|
|
42
|
+
webtap/commands/to_model.py,sha256=YZCCqJxsFxK2l98CHeuNpHjdrPEtzPKekcrLi2YqK7Q,5696
|
|
43
|
+
webtap/rpc/__init__.py,sha256=F2kCjfZWs0gdOZ9g5tcSS4BYt8LZMCHEo_YoqPbjivE,455
|
|
44
|
+
webtap/rpc/errors.py,sha256=SrlAh_MMgYuSxREv9kfX2HxVx2XZ0iti4u8VLozkU1c,1376
|
|
45
|
+
webtap/rpc/framework.py,sha256=BWm6CBoQdwtvCSMQwZsgVuSUJuMZA-hL4t31K6z5p8U,8340
|
|
46
|
+
webtap/rpc/handlers.py,sha256=tixlOt6QewCujZuk23lM8nAyAwzjY-3flzg8DT9tvio,21369
|
|
47
|
+
webtap/rpc/machine.py,sha256=2u9sJ9mB55nyML5tVQmrzqj4FLyQigPwldBE0Td41sU,2886
|
|
48
|
+
webtap/services/README.md,sha256=mOwZhLGwLVwMDSbHNLc_XM_3yTp8IlkPTO5MEW6qcl0,2540
|
|
49
|
+
webtap/services/__init__.py,sha256=IjFqu0Ak6D-r18aokcQMtenDV3fbelvfjTCejGv6CZ0,570
|
|
50
|
+
webtap/services/console.py,sha256=GaH7dCNA9tgjXd3tLSS8dDXgfrxXfbpUHLImC2Fhpm0,3896
|
|
51
|
+
webtap/services/dom.py,sha256=3xMw3HXrs21JaOa8TfY_6Tq6342g8Olh_FP8xhe4dnU,20891
|
|
52
|
+
webtap/services/fetch.py,sha256=42v9xkLc_lXAnjhB5rmRX_wqub3I3q4AxcMsjXWz3Ac,14341
|
|
53
|
+
webtap/services/main.py,sha256=IRjxrUmT_5OLsK9fOSo0hXIwPJK09fx95lmRBfJ8E-U,14354
|
|
54
|
+
webtap/services/network.py,sha256=6koa3fcwhzzpvPeHzpmjSPMcHZDetJH_1vmCZ5_EO9g,11929
|
|
55
|
+
webtap/services/state_snapshot.py,sha256=4y3AlxSvGE5SXHapMBc0KQIhs8yi7Z6KXvoqWeKxJbw,2684
|
|
56
|
+
webtap/services/setup/__init__.py,sha256=kjOhKm-0ZDlrDOZNlQf_u1Qze1biFpYGxfd-E80C5iI,6746
|
|
57
|
+
webtap/services/setup/chrome.py,sha256=A2XKtcw3lOaFal_q-YyAaskVYUuPqjlT8R5Sp_qWlfo,7260
|
|
58
|
+
webtap/services/setup/desktop.py,sha256=ErtmMNZDtsYXVnoxEkCgX5eiJimESKtdoaTmrd9mNxQ,8099
|
|
59
|
+
webtap/services/setup/extension.py,sha256=xhazmWHbkgQItSrqMW9vQy55bA83N12rpHAgWf6sdv4,4660
|
|
60
|
+
webtap/services/setup/platform.py,sha256=zcHWuicqaAevKDa4Rie5P37GV5nACzITrbVsgx8Ef9I,4746
|
|
61
|
+
webtap_tool-0.11.0.dist-info/METADATA,sha256=WMJtjh2ISnKWl2biwsHYQjVcHLWXMeYFs-4gg93jRGA,17933
|
|
62
|
+
webtap_tool-0.11.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
63
|
+
webtap_tool-0.11.0.dist-info/entry_points.txt,sha256=iFe575I0CIb1MbfPt0oX2VYyY5gSU_dA551PKVR83TU,39
|
|
64
|
+
webtap_tool-0.11.0.dist-info/RECORD,,
|