webtap-tool 0.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. webtap/VISION.md +246 -0
  2. webtap/__init__.py +84 -0
  3. webtap/__main__.py +6 -0
  4. webtap/api/__init__.py +9 -0
  5. webtap/api/app.py +26 -0
  6. webtap/api/models.py +69 -0
  7. webtap/api/server.py +111 -0
  8. webtap/api/sse.py +182 -0
  9. webtap/api/state.py +89 -0
  10. webtap/app.py +79 -0
  11. webtap/cdp/README.md +275 -0
  12. webtap/cdp/__init__.py +12 -0
  13. webtap/cdp/har.py +302 -0
  14. webtap/cdp/schema/README.md +41 -0
  15. webtap/cdp/schema/cdp_protocol.json +32785 -0
  16. webtap/cdp/schema/cdp_version.json +8 -0
  17. webtap/cdp/session.py +667 -0
  18. webtap/client.py +81 -0
  19. webtap/commands/DEVELOPER_GUIDE.md +401 -0
  20. webtap/commands/TIPS.md +269 -0
  21. webtap/commands/__init__.py +29 -0
  22. webtap/commands/_builders.py +331 -0
  23. webtap/commands/_code_generation.py +110 -0
  24. webtap/commands/_tips.py +147 -0
  25. webtap/commands/_utils.py +273 -0
  26. webtap/commands/connection.py +220 -0
  27. webtap/commands/console.py +87 -0
  28. webtap/commands/fetch.py +310 -0
  29. webtap/commands/filters.py +116 -0
  30. webtap/commands/javascript.py +73 -0
  31. webtap/commands/js_export.py +73 -0
  32. webtap/commands/launch.py +72 -0
  33. webtap/commands/navigation.py +197 -0
  34. webtap/commands/network.py +136 -0
  35. webtap/commands/quicktype.py +306 -0
  36. webtap/commands/request.py +93 -0
  37. webtap/commands/selections.py +138 -0
  38. webtap/commands/setup.py +219 -0
  39. webtap/commands/to_model.py +163 -0
  40. webtap/daemon.py +185 -0
  41. webtap/daemon_state.py +53 -0
  42. webtap/filters.py +219 -0
  43. webtap/rpc/__init__.py +14 -0
  44. webtap/rpc/errors.py +49 -0
  45. webtap/rpc/framework.py +223 -0
  46. webtap/rpc/handlers.py +625 -0
  47. webtap/rpc/machine.py +84 -0
  48. webtap/services/README.md +83 -0
  49. webtap/services/__init__.py +15 -0
  50. webtap/services/console.py +124 -0
  51. webtap/services/dom.py +547 -0
  52. webtap/services/fetch.py +415 -0
  53. webtap/services/main.py +392 -0
  54. webtap/services/network.py +401 -0
  55. webtap/services/setup/__init__.py +185 -0
  56. webtap/services/setup/chrome.py +233 -0
  57. webtap/services/setup/desktop.py +255 -0
  58. webtap/services/setup/extension.py +147 -0
  59. webtap/services/setup/platform.py +162 -0
  60. webtap/services/state_snapshot.py +86 -0
  61. webtap_tool-0.11.0.dist-info/METADATA +535 -0
  62. webtap_tool-0.11.0.dist-info/RECORD +64 -0
  63. webtap_tool-0.11.0.dist-info/WHEEL +4 -0
  64. webtap_tool-0.11.0.dist-info/entry_points.txt +2 -0
webtap/api/sse.py ADDED
@@ -0,0 +1,182 @@
1
+ """SSE streaming and broadcast management.
2
+
3
+ PUBLIC API:
4
+ - router: FastAPI router with SSE endpoints
5
+ - get_broadcast_queue: Get broadcast queue for service
6
+ - set_broadcast_ready_event: Set ready event signal
7
+ - broadcast_processor: Background task for coalesced broadcasts
8
+ """
9
+
10
+ import asyncio
11
+ import json as json_module
12
+ import logging
13
+ from typing import Any, Dict
14
+
15
+ from fastapi import APIRouter
16
+ from fastapi.responses import StreamingResponse
17
+
18
+ import webtap.api.app as app_module
19
+ from webtap.api.state import get_full_state
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ router = APIRouter()
24
+
25
+ # SSE client management
26
+ _sse_clients: set[asyncio.Queue] = set()
27
+ _sse_clients_lock = asyncio.Lock()
28
+ _broadcast_queue: asyncio.Queue[Dict[str, Any]] | None = None
29
+ _broadcast_ready_event: asyncio.Event | None = None
30
+
31
+
32
+ def set_broadcast_ready_event(event: asyncio.Event) -> None:
33
+ """Set the event to signal when broadcast processor is ready."""
34
+ global _broadcast_ready_event
35
+ _broadcast_ready_event = event
36
+
37
+
38
+ @router.get("/events/stream")
39
+ async def stream_events():
40
+ """Server-Sent Events stream for real-time WebTap state updates.
41
+
42
+ Streams full state object on every change. Extension receives:
43
+ - Connection status
44
+ - Event counts
45
+ - Fetch interception status
46
+ - Filter status
47
+ - Element selection state (inspect_active, selections)
48
+
49
+ Returns:
50
+ StreamingResponse with text/event-stream content type
51
+ """
52
+
53
+ async def event_generator():
54
+ """Generate SSE events with full state."""
55
+ queue: asyncio.Queue[Dict[str, Any]] = asyncio.Queue(maxsize=100)
56
+
57
+ async with _sse_clients_lock:
58
+ _sse_clients.add(queue)
59
+
60
+ try:
61
+ # Send initial state on connect
62
+ initial_state = get_full_state()
63
+ yield f"data: {json_module.dumps(initial_state)}\n\n"
64
+
65
+ # Stream state updates with keepalive
66
+ while True:
67
+ try:
68
+ state = await asyncio.wait_for(queue.get(), timeout=30.0)
69
+ if state is None: # Shutdown signal
70
+ break
71
+ yield f"data: {json_module.dumps(state)}\n\n"
72
+ except asyncio.TimeoutError:
73
+ # Send keepalive comment
74
+ yield ": keepalive\n\n"
75
+
76
+ except asyncio.CancelledError:
77
+ # Expected during shutdown
78
+ pass
79
+ except Exception as e:
80
+ logger.debug(f"SSE stream error: {e}")
81
+ finally:
82
+ async with _sse_clients_lock:
83
+ _sse_clients.discard(queue)
84
+
85
+ return StreamingResponse(
86
+ event_generator(),
87
+ media_type="text/event-stream",
88
+ headers={
89
+ "Cache-Control": "no-cache",
90
+ "X-Accel-Buffering": "no", # Disable nginx buffering
91
+ "Connection": "keep-alive",
92
+ },
93
+ )
94
+
95
+
96
+ async def broadcast_state():
97
+ """Broadcast current state to all SSE clients."""
98
+ global _sse_clients
99
+
100
+ async with _sse_clients_lock:
101
+ if not _sse_clients:
102
+ return
103
+ clients = list(_sse_clients)
104
+
105
+ state = get_full_state()
106
+ dead_queues = set()
107
+
108
+ # Send to all connected clients
109
+ for queue in clients:
110
+ try:
111
+ queue.put_nowait(state)
112
+ except asyncio.QueueFull:
113
+ # Client is falling behind - discard oldest state and retry with latest
114
+ logger.warning(f"SSE client queue full ({queue.qsize()}/{queue.maxsize}), discarding oldest state")
115
+ try:
116
+ queue.get_nowait() # Discard oldest
117
+ queue.put_nowait(state) # Retry with latest
118
+ except Exception as retry_err:
119
+ logger.debug(f"Failed to recover full queue: {retry_err}")
120
+ dead_queues.add(queue)
121
+ except Exception as e:
122
+ logger.debug(f"Failed to broadcast to client: {e}")
123
+ dead_queues.add(queue)
124
+
125
+ # Remove dead queues
126
+ if dead_queues:
127
+ async with _sse_clients_lock:
128
+ _sse_clients -= dead_queues
129
+
130
+
131
+ async def broadcast_processor():
132
+ """Background task that processes broadcast queue.
133
+
134
+ This runs in the FastAPI event loop and watches for signals
135
+ from WebSocket thread (via asyncio.Queue).
136
+ """
137
+ global _broadcast_queue
138
+ _broadcast_queue = asyncio.Queue()
139
+
140
+ # Signal that processor is ready
141
+ if _broadcast_ready_event:
142
+ _broadcast_ready_event.set()
143
+
144
+ logger.debug("Broadcast processor started")
145
+
146
+ try:
147
+ while True:
148
+ try:
149
+ # Wait for broadcast signal (with timeout for keepalive)
150
+ signal = await asyncio.wait_for(_broadcast_queue.get(), timeout=1.0)
151
+ logger.debug(f"Broadcast signal received: {signal}")
152
+
153
+ # Broadcast to all SSE clients
154
+ await broadcast_state()
155
+
156
+ # Clear pending flag to allow next broadcast (service owns coalescing)
157
+ if app_module.app_state and app_module.app_state.service:
158
+ app_module.app_state.service.clear_broadcast_pending()
159
+ except asyncio.TimeoutError:
160
+ continue # Normal timeout, continue loop
161
+ except asyncio.CancelledError:
162
+ raise # Propagate cancellation
163
+ except Exception as e:
164
+ logger.error(f"Error in broadcast processor: {e}")
165
+ finally:
166
+ # Graceful shutdown: close all SSE clients
167
+ async with _sse_clients_lock:
168
+ for queue in list(_sse_clients):
169
+ try:
170
+ queue.put_nowait(None) # Non-blocking shutdown signal
171
+ except asyncio.QueueFull:
172
+ pass # Client is hung, skip
173
+ except Exception:
174
+ pass
175
+ _sse_clients.clear()
176
+
177
+ logger.debug("Broadcast processor stopped")
178
+
179
+
180
+ def get_broadcast_queue() -> asyncio.Queue | None:
181
+ """Get broadcast queue for wiring to service."""
182
+ return _broadcast_queue
webtap/api/state.py ADDED
@@ -0,0 +1,89 @@
1
+ """State management for SSE broadcasting."""
2
+
3
+ import hashlib
4
+ from typing import Any, Dict
5
+
6
+ import webtap.api.app as app_module
7
+
8
+
9
+ def _stable_hash(data: str) -> str:
10
+ """Generate deterministic hash for frontend change detection.
11
+
12
+ Uses MD5 for speed (not security). Returns 16-char hex digest.
13
+ Ensures hashes remain stable across process restarts (unlike Python's hash()).
14
+ """
15
+ return hashlib.md5(data.encode()).hexdigest()[:16]
16
+
17
+
18
+ def get_full_state() -> Dict[str, Any]:
19
+ """Get complete WebTap state for broadcasting.
20
+
21
+ Thread-safe, zero-lock reads from immutable snapshot.
22
+ No blocking I/O - returns cached snapshot immediately.
23
+
24
+ Returns:
25
+ Dictionary with all state information for SSE clients
26
+ """
27
+ if not app_module.app_state:
28
+ return {
29
+ "connectionState": "disconnected",
30
+ "epoch": 0,
31
+ "connected": False,
32
+ "events": {"total": 0},
33
+ "fetch": {"enabled": False, "paused_count": 0},
34
+ "filters": {"enabled": [], "disabled": []},
35
+ "browser": {"inspect_active": False, "selections": {}, "prompt": "", "pending_count": 0},
36
+ "error": None,
37
+ }
38
+
39
+ # Get immutable snapshot (NO LOCKS NEEDED - inherently thread-safe)
40
+ snapshot = app_module.app_state.service.get_state_snapshot()
41
+
42
+ # Get connection state and epoch from RPC machine
43
+ machine = app_module.app_state.service.rpc.machine if app_module.app_state.service.rpc else None
44
+ connection_state = machine.state if machine else "disconnected"
45
+ epoch = machine.epoch if machine else 0
46
+
47
+ # Compute content hashes for frontend change detection
48
+ # Only computed here when building SSE response (not on every state change)
49
+ selections_hash = _stable_hash(str(sorted(snapshot.selections.keys())))
50
+ filters_hash = _stable_hash(f"{sorted(snapshot.enabled_filters)}")
51
+ fetch_hash = _stable_hash(f"{snapshot.fetch_enabled}:{snapshot.response_stage}:{snapshot.paused_count}")
52
+ page_hash = _stable_hash(f"{snapshot.connected}:{snapshot.page_id}")
53
+ error_hash = _stable_hash(snapshot.error_message) if snapshot.error_message else ""
54
+
55
+ # Convert snapshot to frontend format
56
+ return {
57
+ "connectionState": connection_state,
58
+ "epoch": epoch,
59
+ "connected": snapshot.connected,
60
+ "page": {
61
+ "id": snapshot.page_id,
62
+ "title": snapshot.page_title,
63
+ "url": snapshot.page_url,
64
+ }
65
+ if snapshot.connected
66
+ else None,
67
+ "events": {"total": snapshot.event_count},
68
+ "fetch": {
69
+ "enabled": snapshot.fetch_enabled,
70
+ "response_stage": snapshot.response_stage,
71
+ "paused_count": snapshot.paused_count,
72
+ },
73
+ "filters": {"enabled": list(snapshot.enabled_filters), "disabled": list(snapshot.disabled_filters)},
74
+ "browser": {
75
+ "inspect_active": snapshot.inspect_active,
76
+ "selections": snapshot.selections,
77
+ "prompt": snapshot.prompt,
78
+ "pending_count": snapshot.pending_count,
79
+ },
80
+ "error": {"message": snapshot.error_message, "timestamp": snapshot.error_timestamp}
81
+ if snapshot.error_message
82
+ else None,
83
+ # Content hashes for efficient change detection
84
+ "selections_hash": selections_hash,
85
+ "filters_hash": filters_hash,
86
+ "fetch_hash": fetch_hash,
87
+ "page_hash": page_hash,
88
+ "error_hash": error_hash,
89
+ }
webtap/app.py ADDED
@@ -0,0 +1,79 @@
1
+ """Main application entry point for WebTap browser debugger.
2
+
3
+ PUBLIC API:
4
+ - WebTapState: Application state class with daemon client
5
+ - app: Main ReplKit2 App instance (imported by commands and __init__)
6
+ """
7
+
8
+ import sys
9
+ from dataclasses import dataclass, field
10
+
11
+ from replkit2 import App
12
+
13
+ from webtap.client import RPCClient
14
+
15
+
16
+ @dataclass
17
+ class WebTapState:
18
+ """Application state for WebTap browser debugging.
19
+
20
+ Client-side state that communicates with the daemon via HTTP.
21
+ All CDP operations and data storage happen in the daemon.
22
+
23
+ Attributes:
24
+ client: RPCClient for JSON-RPC communication with daemon
25
+ """
26
+
27
+ client: RPCClient = field(init=False)
28
+
29
+ def __post_init__(self):
30
+ """Initialize RPC client after dataclass init."""
31
+ self.client = RPCClient()
32
+
33
+ def cleanup(self):
34
+ """Cleanup resources on exit."""
35
+ if hasattr(self, "client") and self.client:
36
+ self.client.close()
37
+
38
+
39
+ # Must be created before command imports for decorator registration
40
+ app = App(
41
+ "webtap",
42
+ WebTapState,
43
+ mcp_config={
44
+ "uri_scheme": "webtap",
45
+ "instructions": "Chrome DevTools Protocol debugger",
46
+ },
47
+ typer_config={
48
+ "add_completion": False, # Hide shell completion options
49
+ "help": "WebTap - Chrome DevTools Protocol CLI",
50
+ },
51
+ )
52
+
53
+ # Command imports trigger @app.command decorator registration
54
+ if "--cli" in sys.argv:
55
+ # Only import CLI-compatible commands (no dict/list parameters)
56
+ from webtap.commands import setup # noqa: E402, F401
57
+ from webtap.commands import launch # noqa: E402, F401
58
+ else:
59
+ # Import all commands for REPL/MCP mode
60
+ from webtap.commands import connection # noqa: E402, F401
61
+ from webtap.commands import navigation # noqa: E402, F401
62
+ from webtap.commands import javascript # noqa: E402, F401
63
+ from webtap.commands import network # noqa: E402, F401
64
+ from webtap.commands import request # noqa: E402, F401
65
+ from webtap.commands import console # noqa: E402, F401
66
+ from webtap.commands import filters # noqa: E402, F401
67
+ from webtap.commands import fetch # noqa: E402, F401
68
+ from webtap.commands import to_model # noqa: E402, F401
69
+ from webtap.commands import quicktype # noqa: E402, F401
70
+ from webtap.commands import js_export # noqa: E402, F401
71
+ from webtap.commands import selections # noqa: E402, F401
72
+ from webtap.commands import setup # noqa: E402, F401
73
+ from webtap.commands import launch # noqa: E402, F401
74
+
75
+
76
+ # Entry point is in __init__.py:main() as specified in pyproject.toml
77
+
78
+
79
+ __all__ = ["WebTapState", "app"]
webtap/cdp/README.md ADDED
@@ -0,0 +1,275 @@
1
+ # Chrome DevTools Protocol (CDP) Integration
2
+
3
+ This module handles the core CDP connection and event management for WebTap.
4
+
5
+ ## Overview
6
+
7
+ The CDP module provides:
8
+ - WebSocket connection to Chrome's debugging port
9
+ - Event capture and storage in DuckDB
10
+ - Dynamic field discovery for flexible querying
11
+ - Native event storage (no transformation)
12
+
13
+ ## Architecture
14
+
15
+ ```
16
+ Chrome Browser
17
+ ↓ (WebSocket)
18
+ CDPSession (session.py)
19
+ ├── WebSocketApp (connection management)
20
+ ├── DuckDB (event storage + HAR views)
21
+ └── Method-indexed events for O(1) filtering
22
+
23
+ WebTap Commands (via RPCClient JSON-RPC 2.0)
24
+ ```
25
+
26
+ ## Core Components
27
+
28
+ ### session.py
29
+ The main CDP session manager:
30
+ - Establishes WebSocket connection
31
+ - Stores events as-is in DuckDB
32
+ - Discovers field paths dynamically
33
+ - Handles CDP command execution
34
+
35
+ ### har.py
36
+ HAR view aggregation:
37
+ - Pre-aggregated network request views (`har_entries`, `har_summary`)
38
+ - Efficient querying without JSON extraction at query time
39
+ - Request/response correlation by requestId
40
+
41
+ ### schema/
42
+ CDP protocol reference:
43
+ - Protocol version information
44
+ - Domain definitions (future)
45
+
46
+ ## Philosophy: Native Storage
47
+
48
+ We store CDP events exactly as received:
49
+
50
+ ```python
51
+ # CDP sends this
52
+ {
53
+ "method": "Network.responseReceived",
54
+ "params": {
55
+ "requestId": "123.456",
56
+ "response": {
57
+ "status": 200,
58
+ "headers": {...}
59
+ }
60
+ }
61
+ }
62
+
63
+ # We store it as-is in DuckDB
64
+ # No transformation, no data loss
65
+ ```
66
+
67
+ ## Event Domains
68
+
69
+ Currently capturing events from:
70
+
71
+ ### Network Domain
72
+ - `Network.requestWillBeSent`
73
+ - `Network.responseReceived`
74
+ - `Network.loadingFinished`
75
+ - `Network.loadingFailed`
76
+
77
+ ### Page Domain
78
+ - `Page.frameNavigated`
79
+ - `Page.domContentEventFired`
80
+ - `Page.loadEventFired`
81
+
82
+ ### Runtime Domain
83
+ - `Runtime.consoleAPICalled`
84
+ - `Runtime.exceptionThrown`
85
+
86
+ ### Fetch Domain
87
+ - `Fetch.requestPaused`
88
+ - `Fetch.authRequired`
89
+
90
+ ### Storage Domain
91
+ - `Storage.cookiesChanged`
92
+ - `Storage.cacheStorageContentUpdated`
93
+
94
+ ## Database Schema
95
+
96
+ ### events table
97
+ ```sql
98
+ CREATE TABLE events (
99
+ rowid INTEGER PRIMARY KEY,
100
+ method VARCHAR, -- Indexed for O(1) event type filtering
101
+ event JSON,
102
+ timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
103
+ )
104
+ CREATE INDEX idx_events_method ON events(method)
105
+ ```
106
+
107
+ ### HAR views (pre-aggregated)
108
+ ```sql
109
+ -- har_entries: Full request/response pairs
110
+ -- har_summary: Minimal data for table display (id, method, url, status, type, size)
111
+ ```
112
+
113
+ ### Query Examples
114
+
115
+ ```sql
116
+ -- Find all 404 responses
117
+ SELECT * FROM events
118
+ WHERE json_extract_string(event, '$.params.response.status') = '404'
119
+
120
+ -- Get request/response pairs
121
+ SELECT
122
+ e1.rowid as request_row,
123
+ e2.rowid as response_row,
124
+ json_extract_string(e1.event, '$.params.request.url') as url
125
+ FROM events e1
126
+ JOIN events e2 ON
127
+ json_extract_string(e1.event, '$.params.requestId') =
128
+ json_extract_string(e2.event, '$.params.requestId')
129
+ WHERE
130
+ json_extract_string(e1.event, '$.method') = 'Network.requestWillBeSent'
131
+ AND json_extract_string(e2.event, '$.method') = 'Network.responseReceived'
132
+ ```
133
+
134
+ ## Field Discovery
135
+
136
+ The system automatically discovers all field paths:
137
+
138
+ ```python
139
+ # When we see this event:
140
+ {
141
+ "method": "Network.responseReceived",
142
+ "params": {
143
+ "response": {
144
+ "status": 200,
145
+ "url": "https://example.com"
146
+ }
147
+ }
148
+ }
149
+
150
+ # We discover these paths:
151
+ # - method
152
+ # - params.response.status
153
+ # - params.response.url
154
+
155
+ # Users can query via commands:
156
+ network(status=200) # Filter by HTTP status
157
+ network(url="*example*") # Filter by URL pattern
158
+ request(123, ["response.*"]) # Get specific request details
159
+ ```
160
+
161
+ ## Connection Management
162
+
163
+ ### Initialization
164
+ ```python
165
+ cdp = CDPSession()
166
+ await cdp.connect("localhost", 9222, page_id)
167
+ ```
168
+
169
+ ### Event Flow
170
+ 1. Chrome sends event over WebSocket
171
+ 2. CDPSession receives in `on_message()`
172
+ 3. Event stored in DuckDB immediately
173
+ 4. Field paths extracted for discovery
174
+ 5. Event available for querying
175
+
176
+ ## CDP Command Execution
177
+
178
+ Direct command execution:
179
+ ```python
180
+ # Get response body
181
+ result = cdp.execute("Network.getResponseBody", {
182
+ "requestId": "123.456"
183
+ })
184
+
185
+ # Evaluate JavaScript
186
+ result = cdp.execute("Runtime.evaluate", {
187
+ "expression": "document.title"
188
+ })
189
+ ```
190
+
191
+ ## Performance Considerations
192
+
193
+ - **Minimal Processing**: Events stored as-is
194
+ - **Lazy Evaluation**: Field discovery on-demand
195
+ - **Efficient Storage**: DuckDB's columnar format
196
+ - **Fast Queries**: JSON functions optimized in DuckDB
197
+
198
+ ## Extension Points
199
+
200
+ ### Adding New Domains
201
+ To capture events from additional CDP domains:
202
+
203
+ 1. Enable the domain:
204
+ ```python
205
+ cdp.execute("DOMStorage.enable")
206
+ ```
207
+
208
+ 2. Events automatically captured and stored with indexed `method` column
209
+
210
+ 3. Query via SQL or implement a new command:
211
+ ```python
212
+ # Events are stored with method for fast filtering
213
+ SELECT * FROM events WHERE method LIKE 'DOMStorage.%'
214
+ ```
215
+
216
+ ### Custom Event Processing
217
+ While we store events as-is, you can add custom processors:
218
+
219
+ ```python
220
+ def process_network_event(event):
221
+ # Custom logic here
222
+ pass
223
+
224
+ # Register processor
225
+ cdp.register_processor("Network.*", process_network_event)
226
+ ```
227
+
228
+ ## Integration with SDP
229
+
230
+ The CDP module will work alongside the future SDP (Svelte Debug Protocol) module:
231
+
232
+ ```
233
+ CDP Events (Network, DOM, Console)
234
+ +
235
+ SDP Events (State, Components, Reactivity)
236
+
237
+ Unified Event Stream in DuckDB
238
+
239
+ Correlated Analysis
240
+ ```
241
+
242
+ ## Best Practices
243
+
244
+ 1. **Don't Transform**: Store CDP data as-is
245
+ 2. **Query Don't Parse**: Use SQL for extraction
246
+ 3. **Discover Don't Define**: Let field paths emerge
247
+ 4. **Correlate Don't Duplicate**: Link events by IDs
248
+
249
+ ## Debugging
250
+
251
+ ### Enable verbose logging
252
+ ```python
253
+ import logging
254
+ logging.basicConfig(level=logging.DEBUG)
255
+ ```
256
+
257
+ ### Check connection
258
+ ```python
259
+ cdp.connected # Should be True
260
+ cdp.ws.sock.connected # WebSocket status
261
+ ```
262
+
263
+ ### Inspect stored events
264
+ ```python
265
+ cdp.query("SELECT COUNT(*) FROM events")
266
+ cdp.query("SELECT * FROM events ORDER BY rowid DESC LIMIT 5")
267
+ ```
268
+
269
+ ## Future Enhancements
270
+
271
+ - [ ] Event compression for long sessions
272
+ - [ ] Streaming to external storage
273
+ - [ ] Real-time event subscriptions
274
+ - [ ] Custom domain definitions
275
+ - [ ] Event replay functionality
webtap/cdp/__init__.py ADDED
@@ -0,0 +1,12 @@
1
+ """Chrome DevTools Protocol client with native event storage.
2
+
3
+ Native CDP approach - store events as-is, query on-demand.
4
+ Built on WebSocketApp + DuckDB for minimal overhead.
5
+
6
+ PUBLIC API:
7
+ - CDPSession: Main CDP client with WebSocket connection and event storage
8
+ """
9
+
10
+ from webtap.cdp.session import CDPSession
11
+
12
+ __all__ = ["CDPSession"]