webtap-tool 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- webtap/VISION.md +246 -0
- webtap/__init__.py +84 -0
- webtap/__main__.py +6 -0
- webtap/api/__init__.py +9 -0
- webtap/api/app.py +26 -0
- webtap/api/models.py +69 -0
- webtap/api/server.py +111 -0
- webtap/api/sse.py +182 -0
- webtap/api/state.py +89 -0
- webtap/app.py +79 -0
- webtap/cdp/README.md +275 -0
- webtap/cdp/__init__.py +12 -0
- webtap/cdp/har.py +302 -0
- webtap/cdp/schema/README.md +41 -0
- webtap/cdp/schema/cdp_protocol.json +32785 -0
- webtap/cdp/schema/cdp_version.json +8 -0
- webtap/cdp/session.py +667 -0
- webtap/client.py +81 -0
- webtap/commands/DEVELOPER_GUIDE.md +401 -0
- webtap/commands/TIPS.md +269 -0
- webtap/commands/__init__.py +29 -0
- webtap/commands/_builders.py +331 -0
- webtap/commands/_code_generation.py +110 -0
- webtap/commands/_tips.py +147 -0
- webtap/commands/_utils.py +273 -0
- webtap/commands/connection.py +220 -0
- webtap/commands/console.py +87 -0
- webtap/commands/fetch.py +310 -0
- webtap/commands/filters.py +116 -0
- webtap/commands/javascript.py +73 -0
- webtap/commands/js_export.py +73 -0
- webtap/commands/launch.py +72 -0
- webtap/commands/navigation.py +197 -0
- webtap/commands/network.py +136 -0
- webtap/commands/quicktype.py +306 -0
- webtap/commands/request.py +93 -0
- webtap/commands/selections.py +138 -0
- webtap/commands/setup.py +219 -0
- webtap/commands/to_model.py +163 -0
- webtap/daemon.py +185 -0
- webtap/daemon_state.py +53 -0
- webtap/filters.py +219 -0
- webtap/rpc/__init__.py +14 -0
- webtap/rpc/errors.py +49 -0
- webtap/rpc/framework.py +223 -0
- webtap/rpc/handlers.py +625 -0
- webtap/rpc/machine.py +84 -0
- webtap/services/README.md +83 -0
- webtap/services/__init__.py +15 -0
- webtap/services/console.py +124 -0
- webtap/services/dom.py +547 -0
- webtap/services/fetch.py +415 -0
- webtap/services/main.py +392 -0
- webtap/services/network.py +401 -0
- webtap/services/setup/__init__.py +185 -0
- webtap/services/setup/chrome.py +233 -0
- webtap/services/setup/desktop.py +255 -0
- webtap/services/setup/extension.py +147 -0
- webtap/services/setup/platform.py +162 -0
- webtap/services/state_snapshot.py +86 -0
- webtap_tool-0.11.0.dist-info/METADATA +535 -0
- webtap_tool-0.11.0.dist-info/RECORD +64 -0
- webtap_tool-0.11.0.dist-info/WHEEL +4 -0
- webtap_tool-0.11.0.dist-info/entry_points.txt +2 -0
webtap/api/sse.py
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
"""SSE streaming and broadcast management.
|
|
2
|
+
|
|
3
|
+
PUBLIC API:
|
|
4
|
+
- router: FastAPI router with SSE endpoints
|
|
5
|
+
- get_broadcast_queue: Get broadcast queue for service
|
|
6
|
+
- set_broadcast_ready_event: Set ready event signal
|
|
7
|
+
- broadcast_processor: Background task for coalesced broadcasts
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import asyncio
|
|
11
|
+
import json as json_module
|
|
12
|
+
import logging
|
|
13
|
+
from typing import Any, Dict
|
|
14
|
+
|
|
15
|
+
from fastapi import APIRouter
|
|
16
|
+
from fastapi.responses import StreamingResponse
|
|
17
|
+
|
|
18
|
+
import webtap.api.app as app_module
|
|
19
|
+
from webtap.api.state import get_full_state
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
router = APIRouter()
|
|
24
|
+
|
|
25
|
+
# SSE client management
|
|
26
|
+
_sse_clients: set[asyncio.Queue] = set()
|
|
27
|
+
_sse_clients_lock = asyncio.Lock()
|
|
28
|
+
_broadcast_queue: asyncio.Queue[Dict[str, Any]] | None = None
|
|
29
|
+
_broadcast_ready_event: asyncio.Event | None = None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def set_broadcast_ready_event(event: asyncio.Event) -> None:
|
|
33
|
+
"""Set the event to signal when broadcast processor is ready."""
|
|
34
|
+
global _broadcast_ready_event
|
|
35
|
+
_broadcast_ready_event = event
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@router.get("/events/stream")
|
|
39
|
+
async def stream_events():
|
|
40
|
+
"""Server-Sent Events stream for real-time WebTap state updates.
|
|
41
|
+
|
|
42
|
+
Streams full state object on every change. Extension receives:
|
|
43
|
+
- Connection status
|
|
44
|
+
- Event counts
|
|
45
|
+
- Fetch interception status
|
|
46
|
+
- Filter status
|
|
47
|
+
- Element selection state (inspect_active, selections)
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
StreamingResponse with text/event-stream content type
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
async def event_generator():
|
|
54
|
+
"""Generate SSE events with full state."""
|
|
55
|
+
queue: asyncio.Queue[Dict[str, Any]] = asyncio.Queue(maxsize=100)
|
|
56
|
+
|
|
57
|
+
async with _sse_clients_lock:
|
|
58
|
+
_sse_clients.add(queue)
|
|
59
|
+
|
|
60
|
+
try:
|
|
61
|
+
# Send initial state on connect
|
|
62
|
+
initial_state = get_full_state()
|
|
63
|
+
yield f"data: {json_module.dumps(initial_state)}\n\n"
|
|
64
|
+
|
|
65
|
+
# Stream state updates with keepalive
|
|
66
|
+
while True:
|
|
67
|
+
try:
|
|
68
|
+
state = await asyncio.wait_for(queue.get(), timeout=30.0)
|
|
69
|
+
if state is None: # Shutdown signal
|
|
70
|
+
break
|
|
71
|
+
yield f"data: {json_module.dumps(state)}\n\n"
|
|
72
|
+
except asyncio.TimeoutError:
|
|
73
|
+
# Send keepalive comment
|
|
74
|
+
yield ": keepalive\n\n"
|
|
75
|
+
|
|
76
|
+
except asyncio.CancelledError:
|
|
77
|
+
# Expected during shutdown
|
|
78
|
+
pass
|
|
79
|
+
except Exception as e:
|
|
80
|
+
logger.debug(f"SSE stream error: {e}")
|
|
81
|
+
finally:
|
|
82
|
+
async with _sse_clients_lock:
|
|
83
|
+
_sse_clients.discard(queue)
|
|
84
|
+
|
|
85
|
+
return StreamingResponse(
|
|
86
|
+
event_generator(),
|
|
87
|
+
media_type="text/event-stream",
|
|
88
|
+
headers={
|
|
89
|
+
"Cache-Control": "no-cache",
|
|
90
|
+
"X-Accel-Buffering": "no", # Disable nginx buffering
|
|
91
|
+
"Connection": "keep-alive",
|
|
92
|
+
},
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
async def broadcast_state():
|
|
97
|
+
"""Broadcast current state to all SSE clients."""
|
|
98
|
+
global _sse_clients
|
|
99
|
+
|
|
100
|
+
async with _sse_clients_lock:
|
|
101
|
+
if not _sse_clients:
|
|
102
|
+
return
|
|
103
|
+
clients = list(_sse_clients)
|
|
104
|
+
|
|
105
|
+
state = get_full_state()
|
|
106
|
+
dead_queues = set()
|
|
107
|
+
|
|
108
|
+
# Send to all connected clients
|
|
109
|
+
for queue in clients:
|
|
110
|
+
try:
|
|
111
|
+
queue.put_nowait(state)
|
|
112
|
+
except asyncio.QueueFull:
|
|
113
|
+
# Client is falling behind - discard oldest state and retry with latest
|
|
114
|
+
logger.warning(f"SSE client queue full ({queue.qsize()}/{queue.maxsize}), discarding oldest state")
|
|
115
|
+
try:
|
|
116
|
+
queue.get_nowait() # Discard oldest
|
|
117
|
+
queue.put_nowait(state) # Retry with latest
|
|
118
|
+
except Exception as retry_err:
|
|
119
|
+
logger.debug(f"Failed to recover full queue: {retry_err}")
|
|
120
|
+
dead_queues.add(queue)
|
|
121
|
+
except Exception as e:
|
|
122
|
+
logger.debug(f"Failed to broadcast to client: {e}")
|
|
123
|
+
dead_queues.add(queue)
|
|
124
|
+
|
|
125
|
+
# Remove dead queues
|
|
126
|
+
if dead_queues:
|
|
127
|
+
async with _sse_clients_lock:
|
|
128
|
+
_sse_clients -= dead_queues
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
async def broadcast_processor():
|
|
132
|
+
"""Background task that processes broadcast queue.
|
|
133
|
+
|
|
134
|
+
This runs in the FastAPI event loop and watches for signals
|
|
135
|
+
from WebSocket thread (via asyncio.Queue).
|
|
136
|
+
"""
|
|
137
|
+
global _broadcast_queue
|
|
138
|
+
_broadcast_queue = asyncio.Queue()
|
|
139
|
+
|
|
140
|
+
# Signal that processor is ready
|
|
141
|
+
if _broadcast_ready_event:
|
|
142
|
+
_broadcast_ready_event.set()
|
|
143
|
+
|
|
144
|
+
logger.debug("Broadcast processor started")
|
|
145
|
+
|
|
146
|
+
try:
|
|
147
|
+
while True:
|
|
148
|
+
try:
|
|
149
|
+
# Wait for broadcast signal (with timeout for keepalive)
|
|
150
|
+
signal = await asyncio.wait_for(_broadcast_queue.get(), timeout=1.0)
|
|
151
|
+
logger.debug(f"Broadcast signal received: {signal}")
|
|
152
|
+
|
|
153
|
+
# Broadcast to all SSE clients
|
|
154
|
+
await broadcast_state()
|
|
155
|
+
|
|
156
|
+
# Clear pending flag to allow next broadcast (service owns coalescing)
|
|
157
|
+
if app_module.app_state and app_module.app_state.service:
|
|
158
|
+
app_module.app_state.service.clear_broadcast_pending()
|
|
159
|
+
except asyncio.TimeoutError:
|
|
160
|
+
continue # Normal timeout, continue loop
|
|
161
|
+
except asyncio.CancelledError:
|
|
162
|
+
raise # Propagate cancellation
|
|
163
|
+
except Exception as e:
|
|
164
|
+
logger.error(f"Error in broadcast processor: {e}")
|
|
165
|
+
finally:
|
|
166
|
+
# Graceful shutdown: close all SSE clients
|
|
167
|
+
async with _sse_clients_lock:
|
|
168
|
+
for queue in list(_sse_clients):
|
|
169
|
+
try:
|
|
170
|
+
queue.put_nowait(None) # Non-blocking shutdown signal
|
|
171
|
+
except asyncio.QueueFull:
|
|
172
|
+
pass # Client is hung, skip
|
|
173
|
+
except Exception:
|
|
174
|
+
pass
|
|
175
|
+
_sse_clients.clear()
|
|
176
|
+
|
|
177
|
+
logger.debug("Broadcast processor stopped")
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def get_broadcast_queue() -> asyncio.Queue | None:
|
|
181
|
+
"""Get broadcast queue for wiring to service."""
|
|
182
|
+
return _broadcast_queue
|
webtap/api/state.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""State management for SSE broadcasting."""
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
from typing import Any, Dict
|
|
5
|
+
|
|
6
|
+
import webtap.api.app as app_module
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _stable_hash(data: str) -> str:
|
|
10
|
+
"""Generate deterministic hash for frontend change detection.
|
|
11
|
+
|
|
12
|
+
Uses MD5 for speed (not security). Returns 16-char hex digest.
|
|
13
|
+
Ensures hashes remain stable across process restarts (unlike Python's hash()).
|
|
14
|
+
"""
|
|
15
|
+
return hashlib.md5(data.encode()).hexdigest()[:16]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def get_full_state() -> Dict[str, Any]:
|
|
19
|
+
"""Get complete WebTap state for broadcasting.
|
|
20
|
+
|
|
21
|
+
Thread-safe, zero-lock reads from immutable snapshot.
|
|
22
|
+
No blocking I/O - returns cached snapshot immediately.
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
Dictionary with all state information for SSE clients
|
|
26
|
+
"""
|
|
27
|
+
if not app_module.app_state:
|
|
28
|
+
return {
|
|
29
|
+
"connectionState": "disconnected",
|
|
30
|
+
"epoch": 0,
|
|
31
|
+
"connected": False,
|
|
32
|
+
"events": {"total": 0},
|
|
33
|
+
"fetch": {"enabled": False, "paused_count": 0},
|
|
34
|
+
"filters": {"enabled": [], "disabled": []},
|
|
35
|
+
"browser": {"inspect_active": False, "selections": {}, "prompt": "", "pending_count": 0},
|
|
36
|
+
"error": None,
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
# Get immutable snapshot (NO LOCKS NEEDED - inherently thread-safe)
|
|
40
|
+
snapshot = app_module.app_state.service.get_state_snapshot()
|
|
41
|
+
|
|
42
|
+
# Get connection state and epoch from RPC machine
|
|
43
|
+
machine = app_module.app_state.service.rpc.machine if app_module.app_state.service.rpc else None
|
|
44
|
+
connection_state = machine.state if machine else "disconnected"
|
|
45
|
+
epoch = machine.epoch if machine else 0
|
|
46
|
+
|
|
47
|
+
# Compute content hashes for frontend change detection
|
|
48
|
+
# Only computed here when building SSE response (not on every state change)
|
|
49
|
+
selections_hash = _stable_hash(str(sorted(snapshot.selections.keys())))
|
|
50
|
+
filters_hash = _stable_hash(f"{sorted(snapshot.enabled_filters)}")
|
|
51
|
+
fetch_hash = _stable_hash(f"{snapshot.fetch_enabled}:{snapshot.response_stage}:{snapshot.paused_count}")
|
|
52
|
+
page_hash = _stable_hash(f"{snapshot.connected}:{snapshot.page_id}")
|
|
53
|
+
error_hash = _stable_hash(snapshot.error_message) if snapshot.error_message else ""
|
|
54
|
+
|
|
55
|
+
# Convert snapshot to frontend format
|
|
56
|
+
return {
|
|
57
|
+
"connectionState": connection_state,
|
|
58
|
+
"epoch": epoch,
|
|
59
|
+
"connected": snapshot.connected,
|
|
60
|
+
"page": {
|
|
61
|
+
"id": snapshot.page_id,
|
|
62
|
+
"title": snapshot.page_title,
|
|
63
|
+
"url": snapshot.page_url,
|
|
64
|
+
}
|
|
65
|
+
if snapshot.connected
|
|
66
|
+
else None,
|
|
67
|
+
"events": {"total": snapshot.event_count},
|
|
68
|
+
"fetch": {
|
|
69
|
+
"enabled": snapshot.fetch_enabled,
|
|
70
|
+
"response_stage": snapshot.response_stage,
|
|
71
|
+
"paused_count": snapshot.paused_count,
|
|
72
|
+
},
|
|
73
|
+
"filters": {"enabled": list(snapshot.enabled_filters), "disabled": list(snapshot.disabled_filters)},
|
|
74
|
+
"browser": {
|
|
75
|
+
"inspect_active": snapshot.inspect_active,
|
|
76
|
+
"selections": snapshot.selections,
|
|
77
|
+
"prompt": snapshot.prompt,
|
|
78
|
+
"pending_count": snapshot.pending_count,
|
|
79
|
+
},
|
|
80
|
+
"error": {"message": snapshot.error_message, "timestamp": snapshot.error_timestamp}
|
|
81
|
+
if snapshot.error_message
|
|
82
|
+
else None,
|
|
83
|
+
# Content hashes for efficient change detection
|
|
84
|
+
"selections_hash": selections_hash,
|
|
85
|
+
"filters_hash": filters_hash,
|
|
86
|
+
"fetch_hash": fetch_hash,
|
|
87
|
+
"page_hash": page_hash,
|
|
88
|
+
"error_hash": error_hash,
|
|
89
|
+
}
|
webtap/app.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""Main application entry point for WebTap browser debugger.
|
|
2
|
+
|
|
3
|
+
PUBLIC API:
|
|
4
|
+
- WebTapState: Application state class with daemon client
|
|
5
|
+
- app: Main ReplKit2 App instance (imported by commands and __init__)
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import sys
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
|
|
11
|
+
from replkit2 import App
|
|
12
|
+
|
|
13
|
+
from webtap.client import RPCClient
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class WebTapState:
|
|
18
|
+
"""Application state for WebTap browser debugging.
|
|
19
|
+
|
|
20
|
+
Client-side state that communicates with the daemon via HTTP.
|
|
21
|
+
All CDP operations and data storage happen in the daemon.
|
|
22
|
+
|
|
23
|
+
Attributes:
|
|
24
|
+
client: RPCClient for JSON-RPC communication with daemon
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
client: RPCClient = field(init=False)
|
|
28
|
+
|
|
29
|
+
def __post_init__(self):
|
|
30
|
+
"""Initialize RPC client after dataclass init."""
|
|
31
|
+
self.client = RPCClient()
|
|
32
|
+
|
|
33
|
+
def cleanup(self):
|
|
34
|
+
"""Cleanup resources on exit."""
|
|
35
|
+
if hasattr(self, "client") and self.client:
|
|
36
|
+
self.client.close()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# Must be created before command imports for decorator registration
|
|
40
|
+
app = App(
|
|
41
|
+
"webtap",
|
|
42
|
+
WebTapState,
|
|
43
|
+
mcp_config={
|
|
44
|
+
"uri_scheme": "webtap",
|
|
45
|
+
"instructions": "Chrome DevTools Protocol debugger",
|
|
46
|
+
},
|
|
47
|
+
typer_config={
|
|
48
|
+
"add_completion": False, # Hide shell completion options
|
|
49
|
+
"help": "WebTap - Chrome DevTools Protocol CLI",
|
|
50
|
+
},
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# Command imports trigger @app.command decorator registration
|
|
54
|
+
if "--cli" in sys.argv:
|
|
55
|
+
# Only import CLI-compatible commands (no dict/list parameters)
|
|
56
|
+
from webtap.commands import setup # noqa: E402, F401
|
|
57
|
+
from webtap.commands import launch # noqa: E402, F401
|
|
58
|
+
else:
|
|
59
|
+
# Import all commands for REPL/MCP mode
|
|
60
|
+
from webtap.commands import connection # noqa: E402, F401
|
|
61
|
+
from webtap.commands import navigation # noqa: E402, F401
|
|
62
|
+
from webtap.commands import javascript # noqa: E402, F401
|
|
63
|
+
from webtap.commands import network # noqa: E402, F401
|
|
64
|
+
from webtap.commands import request # noqa: E402, F401
|
|
65
|
+
from webtap.commands import console # noqa: E402, F401
|
|
66
|
+
from webtap.commands import filters # noqa: E402, F401
|
|
67
|
+
from webtap.commands import fetch # noqa: E402, F401
|
|
68
|
+
from webtap.commands import to_model # noqa: E402, F401
|
|
69
|
+
from webtap.commands import quicktype # noqa: E402, F401
|
|
70
|
+
from webtap.commands import js_export # noqa: E402, F401
|
|
71
|
+
from webtap.commands import selections # noqa: E402, F401
|
|
72
|
+
from webtap.commands import setup # noqa: E402, F401
|
|
73
|
+
from webtap.commands import launch # noqa: E402, F401
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# Entry point is in __init__.py:main() as specified in pyproject.toml
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
__all__ = ["WebTapState", "app"]
|
webtap/cdp/README.md
ADDED
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
# Chrome DevTools Protocol (CDP) Integration
|
|
2
|
+
|
|
3
|
+
This module handles the core CDP connection and event management for WebTap.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
The CDP module provides:
|
|
8
|
+
- WebSocket connection to Chrome's debugging port
|
|
9
|
+
- Event capture and storage in DuckDB
|
|
10
|
+
- Dynamic field discovery for flexible querying
|
|
11
|
+
- Native event storage (no transformation)
|
|
12
|
+
|
|
13
|
+
## Architecture
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
Chrome Browser
|
|
17
|
+
↓ (WebSocket)
|
|
18
|
+
CDPSession (session.py)
|
|
19
|
+
├── WebSocketApp (connection management)
|
|
20
|
+
├── DuckDB (event storage + HAR views)
|
|
21
|
+
└── Method-indexed events for O(1) filtering
|
|
22
|
+
↓
|
|
23
|
+
WebTap Commands (via RPCClient JSON-RPC 2.0)
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Core Components
|
|
27
|
+
|
|
28
|
+
### session.py
|
|
29
|
+
The main CDP session manager:
|
|
30
|
+
- Establishes WebSocket connection
|
|
31
|
+
- Stores events as-is in DuckDB
|
|
32
|
+
- Discovers field paths dynamically
|
|
33
|
+
- Handles CDP command execution
|
|
34
|
+
|
|
35
|
+
### har.py
|
|
36
|
+
HAR view aggregation:
|
|
37
|
+
- Pre-aggregated network request views (`har_entries`, `har_summary`)
|
|
38
|
+
- Efficient querying without JSON extraction at query time
|
|
39
|
+
- Request/response correlation by requestId
|
|
40
|
+
|
|
41
|
+
### schema/
|
|
42
|
+
CDP protocol reference:
|
|
43
|
+
- Protocol version information
|
|
44
|
+
- Domain definitions (future)
|
|
45
|
+
|
|
46
|
+
## Philosophy: Native Storage
|
|
47
|
+
|
|
48
|
+
We store CDP events exactly as received:
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
# CDP sends this
|
|
52
|
+
{
|
|
53
|
+
"method": "Network.responseReceived",
|
|
54
|
+
"params": {
|
|
55
|
+
"requestId": "123.456",
|
|
56
|
+
"response": {
|
|
57
|
+
"status": 200,
|
|
58
|
+
"headers": {...}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
# We store it as-is in DuckDB
|
|
64
|
+
# No transformation, no data loss
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Event Domains
|
|
68
|
+
|
|
69
|
+
Currently capturing events from:
|
|
70
|
+
|
|
71
|
+
### Network Domain
|
|
72
|
+
- `Network.requestWillBeSent`
|
|
73
|
+
- `Network.responseReceived`
|
|
74
|
+
- `Network.loadingFinished`
|
|
75
|
+
- `Network.loadingFailed`
|
|
76
|
+
|
|
77
|
+
### Page Domain
|
|
78
|
+
- `Page.frameNavigated`
|
|
79
|
+
- `Page.domContentEventFired`
|
|
80
|
+
- `Page.loadEventFired`
|
|
81
|
+
|
|
82
|
+
### Runtime Domain
|
|
83
|
+
- `Runtime.consoleAPICalled`
|
|
84
|
+
- `Runtime.exceptionThrown`
|
|
85
|
+
|
|
86
|
+
### Fetch Domain
|
|
87
|
+
- `Fetch.requestPaused`
|
|
88
|
+
- `Fetch.authRequired`
|
|
89
|
+
|
|
90
|
+
### Storage Domain
|
|
91
|
+
- `Storage.cookiesChanged`
|
|
92
|
+
- `Storage.cacheStorageContentUpdated`
|
|
93
|
+
|
|
94
|
+
## Database Schema
|
|
95
|
+
|
|
96
|
+
### events table
|
|
97
|
+
```sql
|
|
98
|
+
CREATE TABLE events (
|
|
99
|
+
rowid INTEGER PRIMARY KEY,
|
|
100
|
+
method VARCHAR, -- Indexed for O(1) event type filtering
|
|
101
|
+
event JSON,
|
|
102
|
+
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
103
|
+
)
|
|
104
|
+
CREATE INDEX idx_events_method ON events(method)
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### HAR views (pre-aggregated)
|
|
108
|
+
```sql
|
|
109
|
+
-- har_entries: Full request/response pairs
|
|
110
|
+
-- har_summary: Minimal data for table display (id, method, url, status, type, size)
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### Query Examples
|
|
114
|
+
|
|
115
|
+
```sql
|
|
116
|
+
-- Find all 404 responses
|
|
117
|
+
SELECT * FROM events
|
|
118
|
+
WHERE json_extract_string(event, '$.params.response.status') = '404'
|
|
119
|
+
|
|
120
|
+
-- Get request/response pairs
|
|
121
|
+
SELECT
|
|
122
|
+
e1.rowid as request_row,
|
|
123
|
+
e2.rowid as response_row,
|
|
124
|
+
json_extract_string(e1.event, '$.params.request.url') as url
|
|
125
|
+
FROM events e1
|
|
126
|
+
JOIN events e2 ON
|
|
127
|
+
json_extract_string(e1.event, '$.params.requestId') =
|
|
128
|
+
json_extract_string(e2.event, '$.params.requestId')
|
|
129
|
+
WHERE
|
|
130
|
+
json_extract_string(e1.event, '$.method') = 'Network.requestWillBeSent'
|
|
131
|
+
AND json_extract_string(e2.event, '$.method') = 'Network.responseReceived'
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
## Field Discovery
|
|
135
|
+
|
|
136
|
+
The system automatically discovers all field paths:
|
|
137
|
+
|
|
138
|
+
```python
|
|
139
|
+
# When we see this event:
|
|
140
|
+
{
|
|
141
|
+
"method": "Network.responseReceived",
|
|
142
|
+
"params": {
|
|
143
|
+
"response": {
|
|
144
|
+
"status": 200,
|
|
145
|
+
"url": "https://example.com"
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
# We discover these paths:
|
|
151
|
+
# - method
|
|
152
|
+
# - params.response.status
|
|
153
|
+
# - params.response.url
|
|
154
|
+
|
|
155
|
+
# Users can query via commands:
|
|
156
|
+
network(status=200) # Filter by HTTP status
|
|
157
|
+
network(url="*example*") # Filter by URL pattern
|
|
158
|
+
request(123, ["response.*"]) # Get specific request details
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
## Connection Management
|
|
162
|
+
|
|
163
|
+
### Initialization
|
|
164
|
+
```python
|
|
165
|
+
cdp = CDPSession()
|
|
166
|
+
await cdp.connect("localhost", 9222, page_id)
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
### Event Flow
|
|
170
|
+
1. Chrome sends event over WebSocket
|
|
171
|
+
2. CDPSession receives in `on_message()`
|
|
172
|
+
3. Event stored in DuckDB immediately
|
|
173
|
+
4. Field paths extracted for discovery
|
|
174
|
+
5. Event available for querying
|
|
175
|
+
|
|
176
|
+
## CDP Command Execution
|
|
177
|
+
|
|
178
|
+
Direct command execution:
|
|
179
|
+
```python
|
|
180
|
+
# Get response body
|
|
181
|
+
result = cdp.execute("Network.getResponseBody", {
|
|
182
|
+
"requestId": "123.456"
|
|
183
|
+
})
|
|
184
|
+
|
|
185
|
+
# Evaluate JavaScript
|
|
186
|
+
result = cdp.execute("Runtime.evaluate", {
|
|
187
|
+
"expression": "document.title"
|
|
188
|
+
})
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
## Performance Considerations
|
|
192
|
+
|
|
193
|
+
- **Minimal Processing**: Events stored as-is
|
|
194
|
+
- **Lazy Evaluation**: Field discovery on-demand
|
|
195
|
+
- **Efficient Storage**: DuckDB's columnar format
|
|
196
|
+
- **Fast Queries**: JSON functions optimized in DuckDB
|
|
197
|
+
|
|
198
|
+
## Extension Points
|
|
199
|
+
|
|
200
|
+
### Adding New Domains
|
|
201
|
+
To capture events from additional CDP domains:
|
|
202
|
+
|
|
203
|
+
1. Enable the domain:
|
|
204
|
+
```python
|
|
205
|
+
cdp.execute("DOMStorage.enable")
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
2. Events automatically captured and stored with indexed `method` column
|
|
209
|
+
|
|
210
|
+
3. Query via SQL or implement a new command:
|
|
211
|
+
```python
|
|
212
|
+
# Events are stored with method for fast filtering
|
|
213
|
+
SELECT * FROM events WHERE method LIKE 'DOMStorage.%'
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
### Custom Event Processing
|
|
217
|
+
While we store events as-is, you can add custom processors:
|
|
218
|
+
|
|
219
|
+
```python
|
|
220
|
+
def process_network_event(event):
|
|
221
|
+
# Custom logic here
|
|
222
|
+
pass
|
|
223
|
+
|
|
224
|
+
# Register processor
|
|
225
|
+
cdp.register_processor("Network.*", process_network_event)
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
## Integration with SDP
|
|
229
|
+
|
|
230
|
+
The CDP module will work alongside the future SDP (Svelte Debug Protocol) module:
|
|
231
|
+
|
|
232
|
+
```
|
|
233
|
+
CDP Events (Network, DOM, Console)
|
|
234
|
+
+
|
|
235
|
+
SDP Events (State, Components, Reactivity)
|
|
236
|
+
↓
|
|
237
|
+
Unified Event Stream in DuckDB
|
|
238
|
+
↓
|
|
239
|
+
Correlated Analysis
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
## Best Practices
|
|
243
|
+
|
|
244
|
+
1. **Don't Transform**: Store CDP data as-is
|
|
245
|
+
2. **Query Don't Parse**: Use SQL for extraction
|
|
246
|
+
3. **Discover Don't Define**: Let field paths emerge
|
|
247
|
+
4. **Correlate Don't Duplicate**: Link events by IDs
|
|
248
|
+
|
|
249
|
+
## Debugging
|
|
250
|
+
|
|
251
|
+
### Enable verbose logging
|
|
252
|
+
```python
|
|
253
|
+
import logging
|
|
254
|
+
logging.basicConfig(level=logging.DEBUG)
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
### Check connection
|
|
258
|
+
```python
|
|
259
|
+
cdp.connected # Should be True
|
|
260
|
+
cdp.ws.sock.connected # WebSocket status
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
### Inspect stored events
|
|
264
|
+
```python
|
|
265
|
+
cdp.query("SELECT COUNT(*) FROM events")
|
|
266
|
+
cdp.query("SELECT * FROM events ORDER BY rowid DESC LIMIT 5")
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
## Future Enhancements
|
|
270
|
+
|
|
271
|
+
- [ ] Event compression for long sessions
|
|
272
|
+
- [ ] Streaming to external storage
|
|
273
|
+
- [ ] Real-time event subscriptions
|
|
274
|
+
- [ ] Custom domain definitions
|
|
275
|
+
- [ ] Event replay functionality
|
webtap/cdp/__init__.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Chrome DevTools Protocol client with native event storage.
|
|
2
|
+
|
|
3
|
+
Native CDP approach - store events as-is, query on-demand.
|
|
4
|
+
Built on WebSocketApp + DuckDB for minimal overhead.
|
|
5
|
+
|
|
6
|
+
PUBLIC API:
|
|
7
|
+
- CDPSession: Main CDP client with WebSocket connection and event storage
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from webtap.cdp.session import CDPSession
|
|
11
|
+
|
|
12
|
+
__all__ = ["CDPSession"]
|