webtap-tool 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webtap-tool might be problematic. Click here for more details.

Files changed (50) hide show
  1. webtap_tool-0.1.1/.gitignore +3 -0
  2. webtap_tool-0.1.1/ARCHITECTURE.md +150 -0
  3. webtap_tool-0.1.1/CHANGELOG.md +69 -0
  4. webtap_tool-0.1.1/PKG-INFO +427 -0
  5. webtap_tool-0.1.1/README.md +400 -0
  6. webtap_tool-0.1.1/data/filters.json +92 -0
  7. webtap_tool-0.1.1/extension/manifest.json +12 -0
  8. webtap_tool-0.1.1/extension/popup.html +181 -0
  9. webtap_tool-0.1.1/extension/popup.js +298 -0
  10. webtap_tool-0.1.1/llms.txt +310 -0
  11. webtap_tool-0.1.1/pyproject.toml +50 -0
  12. webtap_tool-0.1.1/src/webtap/VISION.md +234 -0
  13. webtap_tool-0.1.1/src/webtap/__init__.py +56 -0
  14. webtap_tool-0.1.1/src/webtap/api.py +222 -0
  15. webtap_tool-0.1.1/src/webtap/app.py +76 -0
  16. webtap_tool-0.1.1/src/webtap/cdp/README.md +268 -0
  17. webtap_tool-0.1.1/src/webtap/cdp/__init__.py +14 -0
  18. webtap_tool-0.1.1/src/webtap/cdp/query.py +107 -0
  19. webtap_tool-0.1.1/src/webtap/cdp/schema/README.md +41 -0
  20. webtap_tool-0.1.1/src/webtap/cdp/schema/cdp_protocol.json +32785 -0
  21. webtap_tool-0.1.1/src/webtap/cdp/schema/cdp_version.json +8 -0
  22. webtap_tool-0.1.1/src/webtap/cdp/session.py +365 -0
  23. webtap_tool-0.1.1/src/webtap/commands/DEVELOPER_GUIDE.md +314 -0
  24. webtap_tool-0.1.1/src/webtap/commands/TIPS.md +153 -0
  25. webtap_tool-0.1.1/src/webtap/commands/__init__.py +7 -0
  26. webtap_tool-0.1.1/src/webtap/commands/_builders.py +127 -0
  27. webtap_tool-0.1.1/src/webtap/commands/_errors.py +108 -0
  28. webtap_tool-0.1.1/src/webtap/commands/_tips.py +147 -0
  29. webtap_tool-0.1.1/src/webtap/commands/_utils.py +227 -0
  30. webtap_tool-0.1.1/src/webtap/commands/body.py +161 -0
  31. webtap_tool-0.1.1/src/webtap/commands/connection.py +168 -0
  32. webtap_tool-0.1.1/src/webtap/commands/console.py +69 -0
  33. webtap_tool-0.1.1/src/webtap/commands/events.py +109 -0
  34. webtap_tool-0.1.1/src/webtap/commands/fetch.py +219 -0
  35. webtap_tool-0.1.1/src/webtap/commands/filters.py +224 -0
  36. webtap_tool-0.1.1/src/webtap/commands/inspect.py +146 -0
  37. webtap_tool-0.1.1/src/webtap/commands/javascript.py +87 -0
  38. webtap_tool-0.1.1/src/webtap/commands/launch.py +86 -0
  39. webtap_tool-0.1.1/src/webtap/commands/navigation.py +199 -0
  40. webtap_tool-0.1.1/src/webtap/commands/network.py +85 -0
  41. webtap_tool-0.1.1/src/webtap/commands/setup.py +127 -0
  42. webtap_tool-0.1.1/src/webtap/filters.py +289 -0
  43. webtap_tool-0.1.1/src/webtap/services/README.md +83 -0
  44. webtap_tool-0.1.1/src/webtap/services/__init__.py +15 -0
  45. webtap_tool-0.1.1/src/webtap/services/body.py +113 -0
  46. webtap_tool-0.1.1/src/webtap/services/console.py +116 -0
  47. webtap_tool-0.1.1/src/webtap/services/fetch.py +397 -0
  48. webtap_tool-0.1.1/src/webtap/services/main.py +175 -0
  49. webtap_tool-0.1.1/src/webtap/services/network.py +105 -0
  50. webtap_tool-0.1.1/src/webtap/services/setup.py +219 -0
@@ -0,0 +1,3 @@
1
+ # Ignore previous version files and directories
2
+ *.v[0-9]
3
+ *.v[0-9][0-9]
@@ -0,0 +1,150 @@
1
+ # WebTap Architecture
2
+
3
+ Implementation guide for WebTap commands following the VISION.
4
+
5
+ ## Core Components
6
+
7
+ ### CDPSession (cdp/session.py)
8
+ - WebSocket connection to Chrome
9
+ - DuckDB in-memory storage: `CREATE TABLE events (event JSON)`
10
+ - Events stored AS-IS: `INSERT INTO events VALUES (?)`
11
+ - Query interface: `query(sql)` - returns result rows
12
+ - Body fetching: `fetch_body(request_id)` - CDP call on-demand
13
+
14
+ ### Command Pattern
15
+
16
+ Commands query DuckDB and return data for Replkit2 display.
17
+
18
+ ```python
19
+ @app.command
20
+ def network(state, query: dict = None):
21
+ """Query network events with flexible filtering."""
22
+
23
+ # Default query
24
+ default = {
25
+ 'limit': 20,
26
+ 'exclude_static': True, # Skip images/fonts
27
+ 'exclude_tracking': True # Skip analytics
28
+ }
29
+ q = {**default, **(query or {})}
30
+
31
+ # Build SQL from query dict
32
+ sql = build_network_sql(q)
33
+
34
+ # Return for Replkit2 display
35
+ return state.cdp.query(sql)
36
+ ```
37
+
38
+ ## Command Implementation Guide
39
+
40
+ ### network(query: dict)
41
+ ```python
42
+ # Query dict can contain:
43
+ # - id: Single request detail
44
+ # - status: Filter by status code
45
+ # - method: Filter by HTTP method
46
+ # - url_contains: Substring match
47
+ # - limit: Result limit
48
+ # - exclude_static: Hide images/css/fonts
49
+ # - exclude_tracking: Hide analytics
50
+
51
+ # Build SQL:
52
+ SELECT
53
+ json_extract_string(event, '$.params.requestId') as id,
54
+ json_extract_string(event, '$.params.response.status') as status,
55
+ json_extract_string(event, '$.params.response.url') as url
56
+ FROM events
57
+ WHERE json_extract_string(event, '$.method') = 'Network.responseReceived'
58
+ AND [additional filters from query dict]
59
+ LIMIT 20
60
+ ```
61
+
62
+ ### console(query: dict)
63
+ ```python
64
+ # Query dict can contain:
65
+ # - level: 'error', 'warn', 'log'
66
+ # - source: 'console', 'network', 'security'
67
+ # - contains: Text search in message
68
+ # - limit: Result limit
69
+
70
+ # Build SQL:
71
+ SELECT
72
+ json_extract_string(event, '$.params.type') as level,
73
+ json_extract_string(event, '$.params.args[0].value') as message,
74
+ json_extract_string(event, '$.params.timestamp') as time
75
+ FROM events
76
+ WHERE json_extract_string(event, '$.method') IN ('Runtime.consoleAPICalled', 'Log.entryAdded')
77
+ AND [additional filters]
78
+ ```
79
+
80
+ ### body(id: str, expr: str = None)
81
+ ```python
82
+ # Fetch body on-demand
83
+ result = state.cdp.fetch_body(id)
84
+
85
+ if not expr:
86
+ return result['body'] # Raw body
87
+
88
+ # Evaluate Python expression on body (like inspect command)
89
+ context = {
90
+ 'data': result['body'],
91
+ 'json': json.loads(result['body']) if parseable,
92
+ 're': __import__('re')
93
+ }
94
+ return eval(expr, {}, context)
95
+ ```
96
+
97
+ ### inspect(query: dict, expr: str)
98
+ ```python
99
+ # Query events then apply Python expression
100
+ events = state.cdp.query(build_sql(query))
101
+
102
+ # Apply expression to each event
103
+ results = []
104
+ for event in events:
105
+ context = {'event': json.loads(event[0])}
106
+ results.append(eval(expr, {}, context))
107
+ return results
108
+ ```
109
+
110
+ ## SQL Patterns
111
+
112
+ ### Fuzzy field matching
113
+ ```sql
114
+ -- Find any field containing 'status'
115
+ SELECT * FROM events
116
+ WHERE json_extract_string(event, '$.params.response.status') = '404'
117
+ OR json_extract_string(event, '$.params.status') = '404'
118
+ ```
119
+
120
+ ### Correlation by requestId
121
+ ```sql
122
+ -- Get all events for a request
123
+ SELECT event FROM events
124
+ WHERE json_extract_string(event, '$.params.requestId') = ?
125
+ ORDER BY rowid
126
+ ```
127
+
128
+ ### Exclude noise
129
+ ```sql
130
+ -- Skip tracking/analytics
131
+ WHERE json_extract_string(event, '$.params.request.url') NOT LIKE '%google-analytics%'
132
+ AND json_extract_string(event, '$.params.request.url') NOT LIKE '%doubleclick%'
133
+ AND json_extract_string(event, '$.params.type') NOT IN ('Image', 'Font', 'Stylesheet')
134
+ ```
135
+
136
+ ## Display Strategy
137
+
138
+ - **Lists**: Return list of dicts for Replkit2 table display
139
+ - **Details**: Return single dict for box display
140
+ - **Raw**: Return JSON strings for inspect/debug
141
+
142
+ Commands should NOT format output - let Replkit2 handle display based on `@app.command(display="table"|"markdown"|"raw")`.
143
+
144
+ ## Future Commands
145
+
146
+ - `storage()` - Query cookies/localStorage via CDP
147
+ - `api()` - Discover API endpoints from traffic
148
+ - `har()` - Export to HAR format
149
+ - `intercept()` - Modify requests (requires Fetch domain)
150
+ - `timeline()` - Request/response correlation view
@@ -0,0 +1,69 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ### Added
11
+
12
+ ### Changed
13
+
14
+ ### Fixed
15
+
16
+ ### Removed
17
+
18
+ ## [0.1.1] - 2025-09-05
19
+
20
+ ### Added
21
+
22
+ ### Changed
23
+
24
+ ### Fixed
25
+
26
+ ### Removed
27
+
28
+ ## [0.1.0] - 2025-09-05
29
+
30
+ ### Added
31
+ - Chrome DevTools Protocol (CDP) integration for browser debugging
32
+ - Native CDP Storage architecture using DuckDB for event storage
33
+ - Dynamic field discovery with fuzzy matching across all CDP events
34
+ - Network request/response monitoring with on-demand body fetching
35
+ - Console message capture with error tracking
36
+ - JavaScript execution in browser context via `js()` command
37
+ - Request interception and modification via `fetch()` command
38
+ - Chrome extension for visual page selection and debugging
39
+ - Bootstrap commands for downloading filters and extension (`setup-filters`, `setup-extension`)
40
+ - Chrome launcher command (`launch-chrome`) for debugging-enabled browser startup
41
+ - FastAPI server on port 8765 for Chrome extension integration
42
+ - Comprehensive filter system (ads, tracking, analytics, CDN, consent, monitoring)
43
+ - Events query system for flexible CDP event exploration
44
+ - Inspect command with Python environment for data analysis
45
+ - Svelte Debug Protocol (SDP) experimental support for Svelte app debugging
46
+ - Service layer architecture with clean dependency injection
47
+ - Markdown-based output formatting for all commands
48
+ - MCP (Model Context Protocol) support via ReplKit2
49
+ - CLI mode with Typer integration
50
+
51
+ ### Changed
52
+ - **BREAKING**: Removed single `bootstrap` command, replaced with separate setup commands
53
+ - **BREAKING**: `eval()` and `exec()` commands replaced by unified `js()` command
54
+ - **BREAKING**: All commands now return markdown dictionaries instead of plain text
55
+ - Aligned with ReplKit2 v0.11.0 API changes (`typer_config` instead of `cli_config`)
56
+ - Store CDP events as-is without transformation (Native CDP Storage philosophy)
57
+ - Connection errors return error responses instead of raising exceptions
58
+ - Standardized command pattern with unified builders and error handling
59
+
60
+ ### Fixed
61
+ - CLI mode parameter handling for dict/list types
62
+ - Type checking errors with proper null checks
63
+ - Import order issues in CLI mode
64
+ - Shell completion options properly hidden in CLI mode
65
+
66
+ <!--
67
+ When you run 'relkit bump', the [Unreleased] section will automatically
68
+ become the new version section. Make sure to add your changes above!
69
+ -->
@@ -0,0 +1,427 @@
1
+ Metadata-Version: 2.4
2
+ Name: webtap-tool
3
+ Version: 0.1.1
4
+ Summary: Terminal-based web page inspector for AI debugging sessions
5
+ Author-email: Fredrik Angelsen <fredrikangelsen@gmail.com>
6
+ Classifier: Development Status :: 3 - Alpha
7
+ Classifier: Programming Language :: Python :: 3.12
8
+ Classifier: Topic :: Internet :: WWW/HTTP :: Browsers
9
+ Classifier: Topic :: Software Development :: Debuggers
10
+ Requires-Python: >=3.12
11
+ Requires-Dist: beautifulsoup4>=4.13.5
12
+ Requires-Dist: cryptography>=45.0.6
13
+ Requires-Dist: duckdb>=1.3.2
14
+ Requires-Dist: fastapi>=0.116.1
15
+ Requires-Dist: httpx>=0.28.1
16
+ Requires-Dist: lxml>=6.0.1
17
+ Requires-Dist: msgpack-python>=0.5.6
18
+ Requires-Dist: protobuf>=6.32.0
19
+ Requires-Dist: pyjwt>=2.10.1
20
+ Requires-Dist: pyyaml>=6.0.2
21
+ Requires-Dist: replkit2[all]>=0.11.0
22
+ Requires-Dist: requests>=2.32.4
23
+ Requires-Dist: uvicorn>=0.35.0
24
+ Requires-Dist: websocket-client>=1.8.0
25
+ Requires-Dist: websockets>=15.0.1
26
+ Description-Content-Type: text/markdown
27
+
28
+ # WebTap
29
+
30
+ Browser debugging via Chrome DevTools Protocol with native event storage and dynamic querying.
31
+
32
+ ## Overview
33
+
34
+ WebTap connects to Chrome's debugging protocol and stores CDP events as-is in DuckDB, enabling powerful SQL queries and dynamic field discovery without complex transformations.
35
+
36
+ ## Key Features
37
+
38
+ - **Native CDP Storage** - Events stored exactly as received in DuckDB
39
+ - **Dynamic Field Discovery** - Automatically indexes all field paths from events
40
+ - **Smart Filtering** - Built-in filters for ads, tracking, analytics noise
41
+ - **SQL Querying** - Direct DuckDB access for complex analysis
42
+ - **Chrome Extension** - Visual page selector and connection management
43
+ - **Python Inspection** - Full Python environment for data exploration
44
+
45
+ ## Installation
46
+
47
+ ```bash
48
+ # Install with uv
49
+ uv tool install webtap
50
+
51
+ # Or from source
52
+ cd packages/webtap
53
+ uv sync
54
+ ```
55
+
56
+ ## Quick Start
57
+
58
+ 1. **Start Chrome with debugging**
59
+ ```bash
60
+ # macOS
61
+ /Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --remote-debugging-port=9222
62
+
63
+ # Linux
64
+ google-chrome --remote-debugging-port=9222
65
+
66
+ # Windows
67
+ chrome.exe --remote-debugging-port=9222
68
+ ```
69
+
70
+ 2. **Launch WebTap**
71
+ ```bash
72
+ webtap
73
+
74
+ # You'll see:
75
+ ================================================================================
76
+ WebTap - Chrome DevTools Protocol REPL
77
+ --------------------------------------------------------------------------------
78
+ Type help() for available commands
79
+ >>>
80
+ ```
81
+
82
+ 3. **Connect and explore**
83
+ ```python
84
+ >>> pages() # List available Chrome pages
85
+ >>> connect(0) # Connect to first page
86
+ >>> network() # View network requests (filtered)
87
+ >>> console() # View console messages
88
+ >>> events({"url": "*api*"}) # Query any CDP field dynamically
89
+ ```
90
+
91
+ ## Core Commands
92
+
93
+ ### Connection & Navigation
94
+ ```python
95
+ pages() # List Chrome pages
96
+ connect(0) # Connect by index (shorthand)
97
+ connect(page=1) # Connect by index (explicit)
98
+ connect(page_id="xyz") # Connect by page ID
99
+ disconnect() # Disconnect from current page
100
+ navigate("https://...") # Navigate to URL
101
+ reload(ignore_cache=False) # Reload page
102
+ back() / forward() # Navigate history
103
+ page() # Show current page info
104
+ ```
105
+
106
+ ### Dynamic Event Querying
107
+ ```python
108
+ # Query ANY field across ALL event types using dict filters
109
+ events({"url": "*github*"}) # Find GitHub requests
110
+ events({"status": 404}) # Find all 404s
111
+ events({"type": "xhr", "method": "POST"}) # Find AJAX POSTs
112
+ events({"headers": "*"}) # Extract all headers
113
+
114
+ # Field names are fuzzy-matched and case-insensitive
115
+ events({"URL": "*api*"}) # Works! Finds 'url', 'URL', 'documentURL'
116
+ events({"err": "*"}) # Finds 'error', 'errorText', 'err'
117
+ ```
118
+
119
+ ### Network Monitoring
120
+ ```python
121
+ network() # Filtered network requests (default)
122
+ network(no_filters=True) # Show everything (noisy!)
123
+ network(filters=["ads", "tracking"]) # Specific filter categories
124
+ ```
125
+
126
+ ### Filter Management
127
+ ```python
128
+ # Manage noise filters
129
+ filters() # Show current filters (default action="list")
130
+ filters(action="load") # Load from .webtap/filters.json
131
+ filters(action="add", config={"domain": "*doubleclick*", "category": "ads"})
132
+ filters(action="save") # Persist to disk
133
+ filters(action="toggle", config={"category": "ads"}) # Toggle category
134
+
135
+ # Built-in categories: ads, tracking, analytics, telemetry, cdn, fonts, images
136
+ ```
137
+
138
+ ### Data Inspection
139
+ ```python
140
+ # Inspect events by rowid
141
+ inspect(49) # View event details by rowid
142
+ inspect(50, expr="data['params']['response']['headers']") # Extract field
143
+
144
+ # Response body inspection with Python expressions
145
+ body(49) # Get response body
146
+ body(49, expr="import json; json.loads(body)") # Parse JSON
147
+ body(49, expr="len(body)") # Check size
148
+
149
+ # Request interception
150
+ fetch("enable") # Enable request interception
151
+ fetch("disable") # Disable request interception
152
+ requests() # Show paused requests
153
+ resume(123) # Continue paused request by ID
154
+ fail(123) # Fail paused request by ID
155
+ ```
156
+
157
+ ### Console & JavaScript
158
+ ```python
159
+ console() # View console messages
160
+ js("document.title") # Evaluate JavaScript (returns value)
161
+ js("console.log('Hello')", wait_return=False) # Execute without waiting
162
+ clear() # Clear events (default)
163
+ clear(console=True) # Clear browser console
164
+ clear(events=True, console=True, cache=True) # Clear everything
165
+ ```
166
+
167
+ ## Architecture
168
+
169
+ ### Native CDP Storage Philosophy
170
+
171
+ ```
172
+ Chrome Tab
173
+ ↓ CDP Events (WebSocket)
174
+ DuckDB Storage (events table)
175
+ ↓ SQL Queries + Field Discovery
176
+ Service Layer (WebTapService)
177
+ ├── NetworkService - Request filtering
178
+ ├── ConsoleService - Message handling
179
+ ├── FetchService - Request interception
180
+ └── BodyService - Response caching
181
+
182
+ Commands (Thin Wrappers)
183
+ ├── events() - Query any field
184
+ ├── network() - Filtered requests
185
+ ├── console() - Messages
186
+ ├── body() - Response bodies
187
+ └── js() - JavaScript execution
188
+
189
+ API Server (FastAPI on :8765)
190
+ └── Chrome Extension Integration
191
+ ```
192
+
193
+ ### How It Works
194
+
195
+ 1. **Events stored as-is** - No transformation, full CDP data preserved
196
+ 2. **Field paths indexed** - Every unique path like `params.response.status` tracked
197
+ 3. **Dynamic discovery** - Fuzzy matching finds fields without schemas
198
+ 4. **SQL generation** - User queries converted to DuckDB JSON queries
199
+ 5. **On-demand fetching** - Bodies, cookies fetched only when needed
200
+
201
+ ## Advanced Usage
202
+
203
+ ### Direct SQL Queries
204
+ ```python
205
+ # Access DuckDB directly
206
+ sql = """
207
+ SELECT json_extract_string(event, '$.params.response.url') as url,
208
+ json_extract_string(event, '$.params.response.status') as status
209
+ FROM events
210
+ WHERE json_extract_string(event, '$.method') = 'Network.responseReceived'
211
+ """
212
+ results = state.cdp.query(sql)
213
+ ```
214
+
215
+ ### Field Discovery
216
+ ```python
217
+ # See what fields are available
218
+ state.cdp.field_paths.keys() # All discovered field names
219
+
220
+ # Find all paths for a field
221
+ state.cdp.discover_field_paths("url")
222
+ # Returns: ['params.request.url', 'params.response.url', 'params.documentURL', ...]
223
+ ```
224
+
225
+ ### Direct CDP Access
226
+ ```python
227
+ # Send CDP commands directly
228
+ state.cdp.execute("Network.getResponseBody", {"requestId": "123"})
229
+ state.cdp.execute("Storage.getCookies", {})
230
+ state.cdp.execute("Runtime.evaluate", {"expression": "window.location.href"})
231
+ ```
232
+
233
+ ### Chrome Extension
234
+
235
+ Install the extension from `packages/webtap/extension/`:
236
+ 1. Open `chrome://extensions/`
237
+ 2. Enable Developer mode
238
+ 3. Load unpacked → Select extension folder
239
+ 4. Click extension icon to connect to pages
240
+
241
+ ## Examples
242
+
243
+ ### List and Connect to Pages
244
+ ```python
245
+ >>> pages()
246
+ ## Chrome Pages
247
+
248
+ | Index | Title | URL | ID | Connected |
249
+ |:------|:---------------------|:-------------------------------|:-------|:----------|
250
+ | 0 | Messenger | https://www.m...1743198803269/ | DC8... | No |
251
+ | 1 | GitHub - replkit2 | https://githu...elsen/replkit2 | DD4... | No |
252
+ | 2 | YouTube Music | https://music.youtube.com/ | F83... | No |
253
+
254
+ _3 pages available_
255
+ <pages: 1 fields>
256
+
257
+ >>> connect(1)
258
+ ## Connection Established
259
+
260
+ **Page:** GitHub - angelsen/replkit2
261
+
262
+ **URL:** https://github.com/angelsen/replkit2
263
+ <connect: 1 fields>
264
+ ```
265
+
266
+ ### Monitor Network Traffic
267
+ ```python
268
+ >>> network()
269
+ ## Network Requests
270
+
271
+ | ID | ReqID | Method | Status | URL | Type | Size |
272
+ |:-----|:-------------|:-------|:-------|:------------------------------------------------|:---------|:-----|
273
+ | 3264 | 682214.9033 | GET | 200 | https://api.github.com/graphql | Fetch | 22KB |
274
+ | 2315 | 682214.8985 | GET | 200 | https://api.github.com/repos/angelsen/replkit2 | Fetch | 16KB |
275
+ | 359 | 682214.8638 | GET | 200 | https://github.githubassets.com/assets/app.js | Script | 21KB |
276
+
277
+ _3 requests_
278
+
279
+ ### Next Steps
280
+
281
+ - **Analyze responses:** `body(3264)` - fetch response body
282
+ - **Parse HTML:** `body(3264, "bs4(body, 'html.parser').find('title').text")`
283
+ - **Extract JSON:** `body(3264, "json.loads(body)['data']")`
284
+ - **Find patterns:** `body(3264, "re.findall(r'/api/\\w+', body)")`
285
+ - **Decode JWT:** `body(3264, "jwt.decode(body, options={'verify_signature': False})")`
286
+ - **Search events:** `events({'url': '*api*'})` - find all API calls
287
+ - **Intercept traffic:** `fetch('enable')` then `requests()` - pause and modify
288
+ <network: 1 fields>
289
+ ```
290
+
291
+ ### View Console Messages
292
+ ```python
293
+ >>> console()
294
+ ## Console Messages
295
+
296
+ | ID | Level | Source | Message | Time |
297
+ |:-----|:-----------|:---------|:----------------------------------------------------------------|:---------|
298
+ | 5939 | WARNING | security | An iframe which has both allow-scripts and allow-same-origin... | 11:42:46 |
299
+ | 2319 | LOG | console | API request completed | 11:42:40 |
300
+ | 32 | ERROR | network | Failed to load resource: the server responded with a status... | 12:47:41 |
301
+
302
+ _3 messages_
303
+
304
+ ### Next Steps
305
+
306
+ - **Inspect error:** `inspect(32)` - view full stack trace
307
+ - **Find all errors:** `events({'level': 'error'})` - filter console errors
308
+ - **Extract stack:** `inspect(32, "data.get('stackTrace', {})")`
309
+ - **Search messages:** `events({'message': '*failed*'})` - pattern match
310
+ - **Check network:** `network()` - may show failed requests causing errors
311
+ <console: 1 fields>
312
+ ```
313
+
314
+ ### Find and Analyze API Calls
315
+ ```python
316
+ >>> events({"url": "*api*", "method": "POST"})
317
+ ## Query Results
318
+
319
+ | RowID | Method | URL | Status |
320
+ |:------|:----------------------------|:--------------------------------|:-------|
321
+ | 49 | Network.requestWillBeSent | https://api.github.com/graphql | - |
322
+ | 50 | Network.responseReceived | https://api.github.com/graphql | 200 |
323
+
324
+ _2 events_
325
+ <events: 1 fields>
326
+
327
+ >>> body(50, expr="import json; json.loads(body)['data']")
328
+ {'viewer': {'login': 'octocat', 'name': 'The Octocat'}}
329
+
330
+ >>> inspect(49) # View full request details
331
+ ```
332
+
333
+ ### Debug Failed Requests
334
+ ```python
335
+ >>> events({"status": 404})
336
+ ## Query Results
337
+
338
+ | RowID | Method | URL | Status |
339
+ |:------|:-------------------------|:----------------------------------|:-------|
340
+ | 32 | Network.responseReceived | https://api.example.com/missing | 404 |
341
+ | 29 | Network.responseReceived | https://api.example.com/notfound | 404 |
342
+
343
+ _2 events_
344
+ <events: 1 fields>
345
+
346
+ >>> events({"errorText": "*"}) # Find network errors
347
+ >>> events({"type": "Failed"}) # Find failed resources
348
+ ```
349
+
350
+ ### Monitor Specific Domains
351
+ ```python
352
+ >>> events({"url": "*myapi.com*"}) # Your API
353
+ >>> events({"url": "*localhost*"}) # Local development
354
+ >>> events({"url": "*stripe*"}) # Payment APIs
355
+ ```
356
+
357
+ ### Extract Headers and Cookies
358
+ ```python
359
+ >>> events({"headers": "*authorization*"}) # Find auth headers
360
+ >>> state.cdp.execute("Storage.getCookies", {}) # Get all cookies
361
+ >>> events({"setCookie": "*"}) # Find Set-Cookie headers
362
+ ```
363
+
364
+ ## Filter Configuration
365
+
366
+ WebTap includes aggressive default filters to reduce noise. Customize in `.webtap/filters.json`:
367
+
368
+ ```json
369
+ {
370
+ "ads": {
371
+ "domains": ["*doubleclick*", "*googlesyndication*", "*adsystem*"],
372
+ "types": ["Ping", "Beacon"]
373
+ },
374
+ "tracking": {
375
+ "domains": ["*google-analytics*", "*segment*", "*mixpanel*"],
376
+ "types": ["Image", "Script"]
377
+ }
378
+ }
379
+ ```
380
+
381
+ ## Design Principles
382
+
383
+ 1. **Store AS-IS** - No transformation of CDP events
384
+ 2. **Query On-Demand** - Extract only what's needed
385
+ 3. **Dynamic Discovery** - No predefined schemas
386
+ 4. **SQL-First** - Leverage DuckDB's JSON capabilities
387
+ 5. **Minimal Memory** - Store only CDP data
388
+
389
+ ## Requirements
390
+
391
+ - Chrome/Chromium with debugging enabled
392
+ - Python 3.12+
393
+ - Dependencies: websocket-client, duckdb, replkit2, fastapi, uvicorn, beautifulsoup4
394
+
395
+ ## Development
396
+
397
+ ```bash
398
+ # Run from source
399
+ cd packages/webtap
400
+ uv run webtap
401
+
402
+ # API server starts automatically on port 8765
403
+ # Chrome extension connects to http://localhost:8765
404
+
405
+ # Type checking and linting
406
+ basedpyright packages/webtap/src/webtap
407
+ ruff check --fix packages/webtap/src/webtap
408
+ ruff format packages/webtap/src/webtap
409
+ ```
410
+
411
+ ## API Server
412
+
413
+ WebTap automatically starts a FastAPI server on port 8765 for Chrome extension integration:
414
+
415
+ - `GET /status` - Connection status
416
+ - `GET /pages` - List available Chrome pages
417
+ - `POST /connect` - Connect to a page
418
+ - `POST /disconnect` - Disconnect from current page
419
+ - `POST /clear` - Clear events/console/cache
420
+ - `GET /fetch/paused` - Get paused requests
421
+ - `POST /filters/toggle/{category}` - Toggle filter categories
422
+
423
+ The API server runs in a background thread and doesn't block the REPL.
424
+
425
+ ## License
426
+
427
+ MIT - See [LICENSE](../../LICENSE) for details.