webtap-tool 0.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. webtap/VISION.md +246 -0
  2. webtap/__init__.py +84 -0
  3. webtap/__main__.py +6 -0
  4. webtap/api/__init__.py +9 -0
  5. webtap/api/app.py +26 -0
  6. webtap/api/models.py +69 -0
  7. webtap/api/server.py +111 -0
  8. webtap/api/sse.py +182 -0
  9. webtap/api/state.py +89 -0
  10. webtap/app.py +79 -0
  11. webtap/cdp/README.md +275 -0
  12. webtap/cdp/__init__.py +12 -0
  13. webtap/cdp/har.py +302 -0
  14. webtap/cdp/schema/README.md +41 -0
  15. webtap/cdp/schema/cdp_protocol.json +32785 -0
  16. webtap/cdp/schema/cdp_version.json +8 -0
  17. webtap/cdp/session.py +667 -0
  18. webtap/client.py +81 -0
  19. webtap/commands/DEVELOPER_GUIDE.md +401 -0
  20. webtap/commands/TIPS.md +269 -0
  21. webtap/commands/__init__.py +29 -0
  22. webtap/commands/_builders.py +331 -0
  23. webtap/commands/_code_generation.py +110 -0
  24. webtap/commands/_tips.py +147 -0
  25. webtap/commands/_utils.py +273 -0
  26. webtap/commands/connection.py +220 -0
  27. webtap/commands/console.py +87 -0
  28. webtap/commands/fetch.py +310 -0
  29. webtap/commands/filters.py +116 -0
  30. webtap/commands/javascript.py +73 -0
  31. webtap/commands/js_export.py +73 -0
  32. webtap/commands/launch.py +72 -0
  33. webtap/commands/navigation.py +197 -0
  34. webtap/commands/network.py +136 -0
  35. webtap/commands/quicktype.py +306 -0
  36. webtap/commands/request.py +93 -0
  37. webtap/commands/selections.py +138 -0
  38. webtap/commands/setup.py +219 -0
  39. webtap/commands/to_model.py +163 -0
  40. webtap/daemon.py +185 -0
  41. webtap/daemon_state.py +53 -0
  42. webtap/filters.py +219 -0
  43. webtap/rpc/__init__.py +14 -0
  44. webtap/rpc/errors.py +49 -0
  45. webtap/rpc/framework.py +223 -0
  46. webtap/rpc/handlers.py +625 -0
  47. webtap/rpc/machine.py +84 -0
  48. webtap/services/README.md +83 -0
  49. webtap/services/__init__.py +15 -0
  50. webtap/services/console.py +124 -0
  51. webtap/services/dom.py +547 -0
  52. webtap/services/fetch.py +415 -0
  53. webtap/services/main.py +392 -0
  54. webtap/services/network.py +401 -0
  55. webtap/services/setup/__init__.py +185 -0
  56. webtap/services/setup/chrome.py +233 -0
  57. webtap/services/setup/desktop.py +255 -0
  58. webtap/services/setup/extension.py +147 -0
  59. webtap/services/setup/platform.py +162 -0
  60. webtap/services/state_snapshot.py +86 -0
  61. webtap_tool-0.11.0.dist-info/METADATA +535 -0
  62. webtap_tool-0.11.0.dist-info/RECORD +64 -0
  63. webtap_tool-0.11.0.dist-info/WHEEL +4 -0
  64. webtap_tool-0.11.0.dist-info/entry_points.txt +2 -0
webtap/VISION.md ADDED
@@ -0,0 +1,246 @@
1
+ # WebTap Vision: Work WITH Chrome DevTools Protocol
2
+
3
+ ## Core Philosophy
4
+
5
+ **Store CDP events as-is. Transform minimally. Query on-demand.**
6
+
7
+ Instead of transforming CDP's complex nested structures into our own models, we embrace CDP's native format. We store events mostly unchanged, extract minimal data for tables, and query additional data on-demand.
8
+
9
+ ## The Problem We're Solving
10
+
11
+ CDP sends rich, nested event data. Previous approaches tried to:
12
+ 1. Transform everything into flat models
13
+ 2. Create abstraction layers over CDP
14
+ 3. Build complex query engines
15
+ 4. Format data for display
16
+
17
+ This led to:
18
+ - Loss of CDP's rich information
19
+ - Complex transformation logic
20
+ - Over-engineered abstractions
21
+ - Unnecessary memory usage
22
+
23
+ ## The Solution: Native CDP Storage
24
+
25
+ ### 1. Store Events As-Is
26
+
27
+ ```python
28
+ # CDP gives us this - we keep it!
29
+ {
30
+ "method": "Network.responseReceived",
31
+ "params": {
32
+ "requestId": "123.456",
33
+ "response": {
34
+ "status": 200,
35
+ "headers": {...},
36
+ "mimeType": "application/json",
37
+ "timing": {...}
38
+ }
39
+ }
40
+ }
41
+ ```
42
+
43
+ ### 2. Minimal Summaries for Tables
44
+
45
+ Extract only what's needed for table display:
46
+ ```python
47
+ NetworkSummary(
48
+ id="123.456",
49
+ method="GET",
50
+ status=200,
51
+ url="https://api.example.com/data",
52
+ type="json",
53
+ size=1234
54
+ )
55
+ ```
56
+
57
+ Keep the full CDP events attached for detail views.
58
+
59
+ ### 3. On-Demand Queries
60
+
61
+ Some data isn't in the event stream:
62
+ - Response bodies: `Network.getResponseBody`
63
+ - Cookies: `Storage.getCookies`
64
+ - LocalStorage: `DOMStorage.getDOMStorageItems`
65
+ - JavaScript evaluation: `Runtime.evaluate`
66
+
67
+ Query these when needed, not preemptively.
68
+
69
+ ## Architecture
70
+
71
+ ```
72
+ ┌─────────────────┐
73
+ │ Chrome Tab │
74
+ └────────┬────────┘
75
+ │ CDP Events
76
+ ┌────────▼────────┐
77
+ │ WebSocket │
78
+ │ (WebSocketApp) │
79
+ └────────┬────────┘
80
+ │ Raw Events
81
+ ┌────────▼────────┐
82
+ │ DuckDB Storage │
83
+ │ (events table) │
84
+ └────────┬────────┘
85
+ │ SQL Queries
86
+ ┌────────────┼────────────┐
87
+ │ │ │
88
+ ┌───────▼──────┐ ┌───▼───┐ ┌──────▼──────┐
89
+ │ Commands │ │ Tables│ │Detail Views │
90
+ │network() │ │ │ │ │
91
+ │console() │ │Minimal│ │Full CDP Data│
92
+ │storage() │ │Summary│ │+ On-Demand │
93
+ └──────────────┘ └───────┘ └─────────────┘
94
+ ```
95
+
96
+ ## Data Flow Examples
97
+
98
+ ### Network Request Lifecycle
99
+
100
+ ```python
101
+ # 1. Request sent - store as-is in DuckDB
102
+ db.execute("INSERT INTO events VALUES (?)", [json.dumps({
103
+ "method": "Network.requestWillBeSent",
104
+ "params": {...} # Full CDP data
105
+ })])
106
+
107
+ # 2. Response received - store as-is
108
+ db.execute("INSERT INTO events VALUES (?)", [json.dumps({
109
+ "method": "Network.responseReceived",
110
+ "params": {...} # Full CDP data
111
+ })])
112
+
113
+ # 3. Query for table view - SQL on JSON
114
+ db.execute("""
115
+ SELECT
116
+ json_extract_string(event, '$.params.requestId') as id,
117
+ json_extract_string(event, '$.params.response.status') as status,
118
+ json_extract_string(event, '$.params.response.url') as url
119
+ FROM events
120
+ WHERE json_extract_string(event, '$.method') = 'Network.responseReceived'
121
+ """)
122
+
123
+ # 4. Detail view - get all events for request
124
+ db.execute("""
125
+ SELECT event FROM events
126
+ WHERE json_extract_string(event, '$.params.requestId') = ?
127
+ """, [request_id])
128
+
129
+ # 5. Body fetch - on-demand CDP call
130
+ cdp.execute("Network.getResponseBody", {"requestId": "123.456"})
131
+ ```
132
+
133
+ ### Console Message
134
+
135
+ ```python
136
+ # 1. Store as-is
137
+ console_events.append({
138
+ "method": "Runtime.consoleAPICalled",
139
+ "params": {
140
+ "type": "error",
141
+ "args": [{"type": "string", "value": "Failed to fetch"}],
142
+ "stackTrace": {...}
143
+ }
144
+ })
145
+
146
+ # 2. Table view - minimal summary
147
+ ConsoleSummary(
148
+ id="console-123",
149
+ level="error",
150
+ message="Failed to fetch",
151
+ source="console"
152
+ )
153
+
154
+ # 3. Detail view - full CDP data
155
+ {
156
+ "summary": summary,
157
+ "raw": console_events[i], # Full CDP event with stack trace
158
+ }
159
+ ```
160
+
161
+ ## Benefits
162
+
163
+ 1. **No Information Loss** - Full CDP data always available
164
+ 2. **Minimal Memory** - Only store what CDP sends
165
+ 3. **Simple Code** - No complex transformations
166
+ 4. **Fast Tables** - Minimal summaries render quickly
167
+ 5. **Rich Details** - Full CDP data for debugging
168
+ 6. **On-Demand Loading** - Expensive operations only when needed
169
+ 7. **Future Proof** - New CDP features automatically available
170
+
171
+ ## Implementation Principles
172
+
173
+ ### DO:
174
+ - Store CDP events as-is
175
+ - Build minimal summaries for tables
176
+ - Query additional data on-demand
177
+ - Group events by correlation ID (requestId)
178
+ - Let Replkit2 handle display
179
+
180
+ ### DON'T:
181
+ - Transform CDP structure unnecessarily
182
+ - Fetch data preemptively
183
+ - Create abstraction layers
184
+ - Build complex query engines
185
+ - Format data for display
186
+
187
+ ## File Structure
188
+
189
+ ```
190
+ webtap/
191
+ ├── VISION.md # This file
192
+ ├── __init__.py # Module initialization
193
+ ├── app.py # REPL app with WebTapState
194
+ ├── client.py # RPCClient for JSON-RPC communication
195
+ ├── daemon.py # Background daemon process
196
+ ├── filters.py # Filter management system
197
+ ├── api/ # FastAPI server (runs in daemon)
198
+ │ ├── __init__.py
199
+ │ ├── server.py # Server initialization & RPC wiring
200
+ │ ├── state.py # State snapshot for SSE broadcasts
201
+ │ └── sse.py # Server-sent events for live updates
202
+ ├── rpc/ # JSON-RPC 2.0 framework
203
+ │ ├── __init__.py
204
+ │ ├── framework.py # RPCFramework class & method registration
205
+ │ ├── machine.py # ConnectionMachine (state transitions)
206
+ │ ├── handlers.py # All RPC method handlers
207
+ │ └── errors.py # RPCError and error codes
208
+ ├── cdp/
209
+ │ ├── __init__.py
210
+ │ ├── session.py # CDPSession with DuckDB storage
211
+ │ ├── har.py # HAR view aggregation
212
+ │ └── schema/ # CDP protocol reference
213
+ │ └── README.md
214
+ ├── services/ # Service layer (business logic)
215
+ │ ├── __init__.py
216
+ │ ├── main.py # WebTapService orchestrator
217
+ │ ├── network.py # Network request handling
218
+ │ ├── console.py # Console message handling
219
+ │ ├── fetch.py # Request interception
220
+ │ └── dom.py # DOM inspection & selection
221
+ └── commands/ # Thin command wrappers
222
+ ├── __init__.py
223
+ ├── _builders.py # Response builders & validators
224
+ ├── _utils.py # Shared utilities & expression eval
225
+ ├── _code_generation.py # JSON/code generation helpers
226
+ ├── _tips.py # Documentation parser (TIPS.md)
227
+ ├── connection.py # connect, disconnect, pages, clear
228
+ ├── navigation.py # navigate, reload, back, forward
229
+ ├── network.py # network() command
230
+ ├── console.py # console() command
231
+ ├── request.py # request() field selection + expr
232
+ ├── javascript.py # js() execution
233
+ ├── fetch.py # fetch(), requests(), resume(), fail()
234
+ ├── filters.py # filters() management
235
+ ├── selections.py # selections/browser() element selection
236
+ ├── to_model.py # to_model() Pydantic generation
237
+ └── quicktype.py # quicktype() type generation
238
+ ```
239
+
240
+ ## Success Metrics
241
+
242
+ - **Lines of Code**: < 500 (excluding commands)
243
+ - **Transformation Logic**: < 100 lines
244
+ - **Memory Usage**: Only what CDP sends
245
+ - **Response Time**: Instant for tables, < 100ms for details
246
+ - **CDP Coverage**: 100% of CDP data accessible
webtap/__init__.py ADDED
@@ -0,0 +1,84 @@
1
+ """WebTap - Chrome DevTools Protocol REPL.
2
+
3
+ Main entry point for WebTap browser debugging tool. Provides both REPL and MCP
4
+ functionality for Chrome DevTools Protocol interaction with native CDP event
5
+ storage and on-demand querying.
6
+
7
+ PUBLIC API:
8
+ - app: Main ReplKit2 App instance
9
+ - main: Entry point function for CLI
10
+ """
11
+
12
+ import atexit
13
+ import sys
14
+
15
+ from webtap.app import app
16
+
17
+ # Register cleanup on exit to shutdown DB thread
18
+ atexit.register(lambda: app.state.cleanup() if hasattr(app, "state") and app.state else None)
19
+
20
+
21
+ def main():
22
+ """Entry point for WebTap.
23
+
24
+ Starts in one of five modes:
25
+ - Daemon mode (with --daemon flag):
26
+ - No args: Start daemon in foreground
27
+ - stop: Stop running daemon
28
+ - status: Show daemon status
29
+ - CLI mode (with --cli flag) for command-line interface
30
+ - MCP mode (with --mcp flag) for Model Context Protocol server
31
+ - REPL mode (default) for interactive shell
32
+
33
+ In REPL and MCP modes, the daemon is automatically started if not running.
34
+ CLI mode doesn't need the daemon (only for setup commands).
35
+ """
36
+ # Handle daemon management
37
+ if "--daemon" in sys.argv:
38
+ from webtap.daemon import start_daemon, stop_daemon, daemon_status
39
+
40
+ # Check for subcommands
41
+ if "stop" in sys.argv:
42
+ try:
43
+ stop_daemon()
44
+ print("Daemon stopped")
45
+ except RuntimeError as e:
46
+ print(f"Error: {e}")
47
+ sys.exit(1)
48
+ elif "status" in sys.argv:
49
+ status = daemon_status()
50
+ if status["running"]:
51
+ print(f"Daemon running (pid: {status['pid']})")
52
+ if status.get("connected"):
53
+ print(f"Connected to: {status.get('page_title', 'Unknown')}")
54
+ print(f"Events: {status.get('event_count', 0)}")
55
+ else:
56
+ print("Not connected to any page")
57
+ else:
58
+ print("Daemon not running")
59
+ if status.get("error"):
60
+ print(f"Error: {status['error']}")
61
+ else:
62
+ # Start daemon in foreground
63
+ start_daemon()
64
+ return
65
+
66
+ # CLI mode doesn't need daemon
67
+ if "--cli" in sys.argv:
68
+ sys.argv.remove("--cli")
69
+ app.cli()
70
+ return
71
+
72
+ # REPL and MCP modes need daemon
73
+ from webtap.daemon import ensure_daemon
74
+
75
+ ensure_daemon()
76
+
77
+ if "--mcp" in sys.argv:
78
+ app.mcp.run()
79
+ else:
80
+ # Run REPL
81
+ app.run(title="WebTap - Chrome DevTools Protocol REPL")
82
+
83
+
84
+ __all__ = ["app", "main"]
webtap/__main__.py ADDED
@@ -0,0 +1,6 @@
1
+ """Entry point for python -m webtap."""
2
+
3
+ from webtap import main
4
+
5
+ if __name__ == "__main__":
6
+ main()
webtap/api/__init__.py ADDED
@@ -0,0 +1,9 @@
1
+ """WebTap API package.
2
+
3
+ PUBLIC API:
4
+ - run_daemon_server: Run daemon server (blocking)
5
+ """
6
+
7
+ from webtap.api.server import run_daemon_server
8
+
9
+ __all__ = ["run_daemon_server"]
webtap/api/app.py ADDED
@@ -0,0 +1,26 @@
1
+ """FastAPI application and shared state.
2
+
3
+ PUBLIC API:
4
+ - api: FastAPI application instance
5
+ - app_state: Global reference to DaemonState (set by server.py on startup)
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from fastapi import FastAPI
11
+ from fastapi.middleware.cors import CORSMiddleware
12
+
13
+ # Create FastAPI app
14
+ api = FastAPI(title="WebTap API", version="0.1.0")
15
+
16
+ # Enable CORS for extension
17
+ api.add_middleware(
18
+ CORSMiddleware,
19
+ allow_origins=["*"], # Chrome extensions have unique origins
20
+ allow_credentials=True,
21
+ allow_methods=["*"],
22
+ allow_headers=["*"],
23
+ )
24
+
25
+ # Global reference to WebTap state (set by server.py on startup)
26
+ app_state: Any | None = None
webtap/api/models.py ADDED
@@ -0,0 +1,69 @@
1
+ """Pydantic request models for API endpoints.
2
+
3
+ PUBLIC API:
4
+ - ConnectRequest: Chrome page connection parameters
5
+ - FetchRequest: Fetch interception configuration
6
+ - CDPRequest: CDP command relay parameters
7
+ - ResumeRequest: Resume paused request parameters
8
+ - FailRequest: Fail paused request parameters
9
+ - FulfillRequest: Fulfill paused request with custom response
10
+ """
11
+
12
+ from typing import Any, Dict
13
+
14
+ from pydantic import BaseModel
15
+
16
+
17
+ class ConnectRequest(BaseModel):
18
+ """Request model for connecting to a Chrome page.
19
+
20
+ Supports either page index (for REPL/MCP) or page_id (for extension).
21
+ """
22
+
23
+ page: int | None = None # Page index (0-based)
24
+ page_id: str | None = None # Page ID from Chrome
25
+
26
+
27
+ class FetchRequest(BaseModel):
28
+ """Request model for enabling/disabling fetch interception."""
29
+
30
+ enabled: bool
31
+ response_stage: bool = False
32
+
33
+
34
+ class CDPRequest(BaseModel):
35
+ """Request model for CDP command relay."""
36
+
37
+ method: str
38
+ params: Dict[str, Any] = {}
39
+
40
+
41
+ class ResumeRequest(BaseModel):
42
+ """Request model for resuming a paused request."""
43
+
44
+ rowid: int
45
+ modifications: Dict[str, Any] = {}
46
+ wait: float = 0.5
47
+
48
+
49
+ class FailRequest(BaseModel):
50
+ """Request model for failing a paused request."""
51
+
52
+ rowid: int
53
+ reason: str = "BlockedByClient"
54
+
55
+
56
+ class FulfillRequest(BaseModel):
57
+ """Request model for fulfilling a paused request with custom response."""
58
+
59
+ rowid: int
60
+ response_code: int = 200
61
+ response_headers: list[dict[str, str]] = []
62
+ body: str = ""
63
+
64
+
65
+ class ClearRequest(BaseModel):
66
+ """Request model for clearing data stores."""
67
+
68
+ events: bool = True
69
+ console: bool = False
webtap/api/server.py ADDED
@@ -0,0 +1,111 @@
1
+ """Daemon server lifecycle management."""
2
+
3
+ import asyncio
4
+ import logging
5
+
6
+ import uvicorn
7
+
8
+ from webtap.api.app import api
9
+ from webtap.api.sse import broadcast_processor, get_broadcast_queue, set_broadcast_ready_event, router as sse_router
10
+ from webtap.daemon_state import DaemonState
11
+ from webtap.rpc import RPCFramework
12
+ from webtap.rpc.handlers import register_handlers
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def run_daemon_server(host: str = "127.0.0.1", port: int = 8765):
18
+ """Run daemon server in foreground (blocking).
19
+
20
+ This function is called by daemon.py when running in --daemon mode.
21
+ It initializes daemon state with CDPSession and WebTapService,
22
+ then runs the API server.
23
+
24
+ Args:
25
+ host: Host to bind to
26
+ port: Port to bind to
27
+ """
28
+ import os
29
+ import webtap.api.app as app_module
30
+ from fastapi import Request
31
+
32
+ # Initialize daemon state
33
+ app_module.app_state = DaemonState()
34
+ logger.info("Daemon initialized with CDPSession and WebTapService")
35
+
36
+ # Initialize RPC framework and register handlers
37
+ rpc = RPCFramework(app_module.app_state.service)
38
+ register_handlers(rpc)
39
+ app_module.app_state.service.rpc = rpc
40
+ logger.info("RPC framework initialized with 22 handlers")
41
+
42
+ # Add single RPC endpoint
43
+ @api.post("/rpc")
44
+ async def handle_rpc(request: Request) -> dict:
45
+ """Handle JSON-RPC 2.0 requests."""
46
+ body = await request.json()
47
+ return await rpc.handle(body)
48
+
49
+ # Add health check endpoint
50
+ @api.get("/health")
51
+ async def health_check() -> dict:
52
+ """Quick health check endpoint for extension."""
53
+ return {"status": "ok", "pid": os.getpid()}
54
+
55
+ # Include SSE endpoint
56
+ api.include_router(sse_router)
57
+
58
+ async def run():
59
+ """Run server with proper shutdown handling."""
60
+ config = uvicorn.Config(
61
+ api,
62
+ host=host,
63
+ port=port,
64
+ log_level="warning",
65
+ access_log=False,
66
+ )
67
+ server = uvicorn.Server(config)
68
+
69
+ # Create event for broadcast processor ready signal
70
+ ready_event = asyncio.Event()
71
+ set_broadcast_ready_event(ready_event)
72
+
73
+ # Start broadcast processor in background
74
+ broadcast_task = asyncio.create_task(broadcast_processor())
75
+
76
+ # Wait for processor to be ready (with timeout)
77
+ try:
78
+ await asyncio.wait_for(ready_event.wait(), timeout=5.0)
79
+ except asyncio.TimeoutError:
80
+ logger.error("Broadcast processor failed to start")
81
+ broadcast_task.cancel()
82
+ return
83
+
84
+ # Wire broadcast queue to service
85
+ queue = get_broadcast_queue()
86
+ if queue and app_module.app_state:
87
+ app_module.app_state.service.set_broadcast_queue(queue)
88
+ logger.debug("Broadcast queue wired to WebTapService")
89
+
90
+ try:
91
+ await server.serve()
92
+ except (SystemExit, KeyboardInterrupt):
93
+ pass
94
+ finally:
95
+ if not broadcast_task.done():
96
+ broadcast_task.cancel()
97
+ try:
98
+ await broadcast_task
99
+ except asyncio.CancelledError:
100
+ pass
101
+
102
+ try:
103
+ asyncio.run(run())
104
+ except (SystemExit, KeyboardInterrupt):
105
+ pass
106
+ except Exception as e:
107
+ logger.error(f"Daemon server failed: {e}")
108
+ finally:
109
+ if app_module.app_state:
110
+ app_module.app_state.cleanup()
111
+ logger.info("Daemon cleanup complete")