webtap-tool 0.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. webtap/VISION.md +246 -0
  2. webtap/__init__.py +84 -0
  3. webtap/__main__.py +6 -0
  4. webtap/api/__init__.py +9 -0
  5. webtap/api/app.py +26 -0
  6. webtap/api/models.py +69 -0
  7. webtap/api/server.py +111 -0
  8. webtap/api/sse.py +182 -0
  9. webtap/api/state.py +89 -0
  10. webtap/app.py +79 -0
  11. webtap/cdp/README.md +275 -0
  12. webtap/cdp/__init__.py +12 -0
  13. webtap/cdp/har.py +302 -0
  14. webtap/cdp/schema/README.md +41 -0
  15. webtap/cdp/schema/cdp_protocol.json +32785 -0
  16. webtap/cdp/schema/cdp_version.json +8 -0
  17. webtap/cdp/session.py +667 -0
  18. webtap/client.py +81 -0
  19. webtap/commands/DEVELOPER_GUIDE.md +401 -0
  20. webtap/commands/TIPS.md +269 -0
  21. webtap/commands/__init__.py +29 -0
  22. webtap/commands/_builders.py +331 -0
  23. webtap/commands/_code_generation.py +110 -0
  24. webtap/commands/_tips.py +147 -0
  25. webtap/commands/_utils.py +273 -0
  26. webtap/commands/connection.py +220 -0
  27. webtap/commands/console.py +87 -0
  28. webtap/commands/fetch.py +310 -0
  29. webtap/commands/filters.py +116 -0
  30. webtap/commands/javascript.py +73 -0
  31. webtap/commands/js_export.py +73 -0
  32. webtap/commands/launch.py +72 -0
  33. webtap/commands/navigation.py +197 -0
  34. webtap/commands/network.py +136 -0
  35. webtap/commands/quicktype.py +306 -0
  36. webtap/commands/request.py +93 -0
  37. webtap/commands/selections.py +138 -0
  38. webtap/commands/setup.py +219 -0
  39. webtap/commands/to_model.py +163 -0
  40. webtap/daemon.py +185 -0
  41. webtap/daemon_state.py +53 -0
  42. webtap/filters.py +219 -0
  43. webtap/rpc/__init__.py +14 -0
  44. webtap/rpc/errors.py +49 -0
  45. webtap/rpc/framework.py +223 -0
  46. webtap/rpc/handlers.py +625 -0
  47. webtap/rpc/machine.py +84 -0
  48. webtap/services/README.md +83 -0
  49. webtap/services/__init__.py +15 -0
  50. webtap/services/console.py +124 -0
  51. webtap/services/dom.py +547 -0
  52. webtap/services/fetch.py +415 -0
  53. webtap/services/main.py +392 -0
  54. webtap/services/network.py +401 -0
  55. webtap/services/setup/__init__.py +185 -0
  56. webtap/services/setup/chrome.py +233 -0
  57. webtap/services/setup/desktop.py +255 -0
  58. webtap/services/setup/extension.py +147 -0
  59. webtap/services/setup/platform.py +162 -0
  60. webtap/services/state_snapshot.py +86 -0
  61. webtap_tool-0.11.0.dist-info/METADATA +535 -0
  62. webtap_tool-0.11.0.dist-info/RECORD +64 -0
  63. webtap_tool-0.11.0.dist-info/WHEEL +4 -0
  64. webtap_tool-0.11.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,401 @@
1
+ """Network monitoring service using HAR views."""
2
+
3
+ import json
4
+ import logging
5
+ from typing import TYPE_CHECKING
6
+
7
+ if TYPE_CHECKING:
8
+ from webtap.cdp import CDPSession
9
+ from webtap.filters import FilterManager
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class NetworkService:
15
+ """Network event queries using HAR views."""
16
+
17
+ def __init__(self):
18
+ """Initialize network service."""
19
+ self.cdp: CDPSession | None = None
20
+ self.filters: FilterManager | None = None
21
+
22
+ @property
23
+ def request_count(self) -> int:
24
+ """Count of all network requests."""
25
+ if not self.cdp:
26
+ return 0
27
+ result = self.cdp.query("SELECT COUNT(*) FROM har_summary")
28
+ return result[0][0] if result else 0
29
+
30
+ def get_requests(
31
+ self,
32
+ limit: int = 20,
33
+ status: int | None = None,
34
+ method: str | None = None,
35
+ type_filter: str | None = None,
36
+ url: str | None = None,
37
+ state: str | None = None,
38
+ apply_groups: bool = True,
39
+ order: str = "desc",
40
+ ) -> list[dict]:
41
+ """Get network requests from HAR summary view.
42
+
43
+ Args:
44
+ limit: Maximum results.
45
+ status: Filter by HTTP status code.
46
+ method: Filter by HTTP method.
47
+ type_filter: Filter by resource type.
48
+ url: Filter by URL pattern (supports * wildcard).
49
+ state: Filter by state (pending, loading, complete, failed, paused).
50
+ apply_groups: Apply enabled filter groups.
51
+ order: Sort order - "desc" (newest first) or "asc" (oldest first).
52
+
53
+ Returns:
54
+ List of request summary dicts.
55
+ """
56
+ if not self.cdp:
57
+ return []
58
+
59
+ # Build SQL query
60
+ sql = """
61
+ SELECT
62
+ id,
63
+ request_id,
64
+ protocol,
65
+ method,
66
+ status,
67
+ url,
68
+ type,
69
+ size,
70
+ time_ms,
71
+ state,
72
+ pause_stage,
73
+ paused_id,
74
+ frames_sent,
75
+ frames_received
76
+ FROM har_summary
77
+ """
78
+
79
+ # Build filter conditions
80
+ conditions = ""
81
+ if self.filters:
82
+ conditions = self.filters.build_filter_sql(
83
+ status=status,
84
+ method=method,
85
+ type_filter=type_filter,
86
+ url=url,
87
+ apply_groups=apply_groups,
88
+ )
89
+
90
+ # Add state filter
91
+ state_conditions = []
92
+ if state:
93
+ state_conditions.append(f"state = '{state}'")
94
+
95
+ # Combine conditions
96
+ all_conditions = []
97
+ if conditions:
98
+ all_conditions.append(conditions)
99
+ if state_conditions:
100
+ all_conditions.append(" AND ".join(state_conditions))
101
+
102
+ if all_conditions:
103
+ sql += f" WHERE {' AND '.join(all_conditions)}"
104
+
105
+ sort_dir = "ASC" if order.lower() == "asc" else "DESC"
106
+ sql += f" ORDER BY id {sort_dir} LIMIT {limit}"
107
+
108
+ # Execute query and convert to dicts
109
+ rows = self.cdp.query(sql)
110
+ columns = [
111
+ "id",
112
+ "request_id",
113
+ "protocol",
114
+ "method",
115
+ "status",
116
+ "url",
117
+ "type",
118
+ "size",
119
+ "time_ms",
120
+ "state",
121
+ "pause_stage",
122
+ "paused_id",
123
+ "frames_sent",
124
+ "frames_received",
125
+ ]
126
+
127
+ return [dict(zip(columns, row)) for row in rows]
128
+
129
+ def get_request_details(self, row_id: int) -> dict | None:
130
+ """Get HAR entry with proper nested structure.
131
+
132
+ Args:
133
+ row_id: Row ID from har_summary.
134
+
135
+ Returns:
136
+ HAR-structured dict or None if not found.
137
+
138
+ Structure matches HAR spec:
139
+ {
140
+ "id": 123,
141
+ "request": {"method", "url", "headers", "postData"},
142
+ "response": {"status", "statusText", "headers", "content"},
143
+ "time": 150,
144
+ "state": "complete",
145
+ "pause_stage": "Response", # If paused
146
+ ...
147
+ }
148
+ """
149
+ if not self.cdp:
150
+ return None
151
+
152
+ sql = """
153
+ SELECT
154
+ id,
155
+ request_id,
156
+ protocol,
157
+ method,
158
+ url,
159
+ status,
160
+ status_text,
161
+ type,
162
+ size,
163
+ time_ms,
164
+ state,
165
+ pause_stage,
166
+ paused_id,
167
+ request_headers,
168
+ post_data,
169
+ response_headers,
170
+ mime_type,
171
+ timing,
172
+ error_text,
173
+ frames_sent,
174
+ frames_received,
175
+ ws_total_bytes
176
+ FROM har_entries
177
+ WHERE id = ?
178
+ """
179
+
180
+ rows = self.cdp.query(sql, [row_id])
181
+ if not rows:
182
+ return None
183
+
184
+ row = rows[0]
185
+ columns = [
186
+ "id",
187
+ "request_id",
188
+ "protocol",
189
+ "method",
190
+ "url",
191
+ "status",
192
+ "status_text",
193
+ "type",
194
+ "size",
195
+ "time_ms",
196
+ "state",
197
+ "pause_stage",
198
+ "paused_id",
199
+ "request_headers",
200
+ "post_data",
201
+ "response_headers",
202
+ "mime_type",
203
+ "timing",
204
+ "error_text",
205
+ "frames_sent",
206
+ "frames_received",
207
+ "ws_total_bytes",
208
+ ]
209
+ flat = dict(zip(columns, row))
210
+
211
+ # Parse JSON fields
212
+ def parse_json(val):
213
+ if val and isinstance(val, str):
214
+ try:
215
+ return json.loads(val)
216
+ except json.JSONDecodeError:
217
+ return val
218
+ return val
219
+
220
+ # Build HAR-nested structure
221
+ har: dict = {
222
+ "id": flat["id"],
223
+ "request_id": flat["request_id"],
224
+ "protocol": flat["protocol"],
225
+ "type": flat["type"],
226
+ "time": flat["time_ms"],
227
+ "state": flat["state"],
228
+ "request": {
229
+ "method": flat["method"],
230
+ "url": flat["url"],
231
+ "headers": parse_json(flat["request_headers"]) or {},
232
+ "postData": flat["post_data"],
233
+ },
234
+ "response": {
235
+ "status": flat["status"],
236
+ "statusText": flat["status_text"],
237
+ "headers": parse_json(flat["response_headers"]) or {},
238
+ "content": {
239
+ "size": flat["size"],
240
+ "mimeType": flat["mime_type"],
241
+ },
242
+ },
243
+ "timings": parse_json(flat["timing"]),
244
+ }
245
+
246
+ # Add pause info if paused
247
+ if flat["pause_stage"]:
248
+ har["pause_stage"] = flat["pause_stage"]
249
+
250
+ # Add error if failed
251
+ if flat["error_text"]:
252
+ har["error"] = flat["error_text"]
253
+
254
+ # Add WebSocket stats if applicable
255
+ if flat["protocol"] == "websocket":
256
+ har["websocket"] = {
257
+ "framesSent": flat["frames_sent"],
258
+ "framesReceived": flat["frames_received"],
259
+ "totalBytes": flat["ws_total_bytes"],
260
+ }
261
+
262
+ return har
263
+
264
+ def fetch_body(self, request_id: str) -> dict | None:
265
+ """Fetch response body for a request.
266
+
267
+ Args:
268
+ request_id: CDP request ID.
269
+
270
+ Returns:
271
+ Dict with 'body' and 'base64Encoded' keys, or None.
272
+ """
273
+ if not self.cdp:
274
+ return None
275
+ return self.cdp.fetch_body(request_id)
276
+
277
+ def get_request_by_row_id(self, row_id: int) -> str | None:
278
+ """Get request_id for a row ID.
279
+
280
+ Args:
281
+ row_id: Row ID from har_summary.
282
+
283
+ Returns:
284
+ CDP request ID or None.
285
+ """
286
+ if not self.cdp:
287
+ return None
288
+
289
+ result = self.cdp.query("SELECT request_id FROM har_summary WHERE id = ?", [row_id])
290
+ return result[0][0] if result else None
291
+
292
+ def get_request_id(self, row_id: int) -> str | None:
293
+ """Get CDP request_id for a row ID.
294
+
295
+ Args:
296
+ row_id: Row ID from network table.
297
+
298
+ Returns:
299
+ CDP request ID or None.
300
+ """
301
+ return self.get_request_by_row_id(row_id)
302
+
303
+ def select_fields(self, har_entry: dict, patterns: list[str] | None) -> dict:
304
+ """Apply ES-style field selection to HAR entry.
305
+
306
+ Args:
307
+ har_entry: Full HAR entry with nested structure.
308
+ patterns: Field patterns or None for minimal.
309
+
310
+ Patterns:
311
+ - None: minimal default fields
312
+ - ["*"]: all fields
313
+ - ["request.*"]: all request fields
314
+ - ["request.headers.*"]: all request headers
315
+ - ["request.headers.content-type"]: specific header
316
+ - ["response.content"]: fetch response body on-demand
317
+
318
+ Returns:
319
+ HAR entry with only selected fields.
320
+ """
321
+ # Minimal fields for default view
322
+ minimal_fields = ["request.method", "request.url", "response.status", "time", "state"]
323
+
324
+ if patterns is None:
325
+ # Minimal default - extract specific paths
326
+ result: dict = {}
327
+ for pattern in minimal_fields:
328
+ parts = pattern.split(".")
329
+ value = _get_nested(har_entry, parts)
330
+ if value is not None:
331
+ _set_nested(result, parts, value)
332
+ return result
333
+
334
+ if patterns == ["*"]:
335
+ return har_entry
336
+
337
+ result = {}
338
+ for pattern in patterns:
339
+ if pattern == "*":
340
+ return har_entry
341
+
342
+ parts = pattern.split(".")
343
+
344
+ # Special case: response.content triggers body fetch
345
+ if pattern == "response.content" or pattern.startswith("response.content."):
346
+ request_id = har_entry.get("request_id")
347
+ if request_id:
348
+ body_result = self.fetch_body(request_id)
349
+ if body_result:
350
+ content = har_entry.get("response", {}).get("content", {}).copy()
351
+ content["text"] = body_result.get("body")
352
+ content["encoding"] = "base64" if body_result.get("base64Encoded") else None
353
+ _set_nested(result, ["response", "content"], content)
354
+ else:
355
+ _set_nested(result, ["response", "content"], {"text": None})
356
+ continue
357
+
358
+ # Wildcard: "request.headers.*" -> get all under that path
359
+ if pattern.endswith(".*"):
360
+ prefix = pattern[:-2]
361
+ prefix_parts = prefix.split(".")
362
+ obj = _get_nested(har_entry, prefix_parts)
363
+ if obj is not None:
364
+ _set_nested(result, prefix_parts, obj)
365
+ else:
366
+ # Specific path
367
+ value = _get_nested(har_entry, parts)
368
+ if value is not None:
369
+ _set_nested(result, parts, value)
370
+
371
+ return result
372
+
373
+
374
+ def _get_nested(obj: dict | None, path: list[str]):
375
+ """Get nested value by path, case-insensitive for headers."""
376
+ for key in path:
377
+ if obj is None:
378
+ return None
379
+ if isinstance(obj, dict):
380
+ # Case-insensitive lookup
381
+ matching_key = next((k for k in obj.keys() if k.lower() == key.lower()), None)
382
+ if matching_key:
383
+ obj = obj.get(matching_key)
384
+ else:
385
+ return None
386
+ else:
387
+ return None
388
+ return obj
389
+
390
+
391
+ def _set_nested(result: dict, path: list[str], value) -> None:
392
+ """Set nested value by path, creating intermediate dicts."""
393
+ current = result
394
+ for key in path[:-1]:
395
+ if key not in current:
396
+ current[key] = {}
397
+ current = current[key]
398
+ current[path[-1]] = value
399
+
400
+
401
+ __all__ = ["NetworkService"]
@@ -0,0 +1,185 @@
1
+ """Setup service for installing WebTap components (cross-platform).
2
+
3
+ PUBLIC API:
4
+ - SetupService: Main service class for all setup operations
5
+ """
6
+
7
+ from typing import Dict, Any
8
+
9
+ from .extension import ExtensionSetupService
10
+ from .chrome import ChromeSetupService
11
+ from .desktop import DesktopSetupService
12
+ from .platform import get_platform_info, ensure_directories, APP_NAME
13
+
14
+ # Old installation paths to clean up
15
+ OLD_EXTENSION_PATH = ".config/webtap/extension"
16
+ OLD_WRAPPER_PATH = ".local/bin/wrappers/google-chrome-stable"
17
+ OLD_DESKTOP_PATH = ".local/share/applications/google-chrome.desktop"
18
+ OLD_DEBUG_DIR = ".config/google-chrome-debug"
19
+
20
+ # Path components
21
+ WRAPPERS_DIR = "wrappers"
22
+ GOOGLE_CHROME_STABLE = "google-chrome-stable"
23
+
24
+ # Size formatting constants
25
+ KB_SIZE = 1024
26
+ SIZE_FORMAT_KB = "{:.1f} KB"
27
+ SIZE_FORMAT_EMPTY = "empty"
28
+
29
+ # Mount point command
30
+ MOUNTPOINT_CMD = "mountpoint"
31
+ MOUNTPOINT_CHECK_FLAG = "-q"
32
+
33
+
34
+ class SetupService:
35
+ """Orchestrator service for installing WebTap components.
36
+
37
+ Delegates to specialized service classes for each component type.
38
+ """
39
+
40
+ def __init__(self):
41
+ """Initialize setup service with platform information."""
42
+ self.info = get_platform_info()
43
+ ensure_directories()
44
+
45
+ # Initialize component services
46
+ self.extension_service = ExtensionSetupService()
47
+ self.chrome_service = ChromeSetupService()
48
+ self.desktop_service = DesktopSetupService()
49
+
50
+ def install_extension(self, force: bool = False) -> Dict[str, Any]:
51
+ """Install Chrome extension files.
52
+
53
+ Args:
54
+ force: Overwrite existing files
55
+
56
+ Returns:
57
+ Dict with success, message, path, details
58
+ """
59
+ return self.extension_service.install_extension(force=force)
60
+
61
+ def install_chrome_wrapper(self, force: bool = False, bindfs: bool = False) -> Dict[str, Any]:
62
+ """Install Chrome wrapper script.
63
+
64
+ Args:
65
+ force: Overwrite existing script
66
+ bindfs: Use bindfs to mount real Chrome profile (Linux only)
67
+
68
+ Returns:
69
+ Dict with success, message, path, details
70
+ """
71
+ return self.chrome_service.install_wrapper(force=force, bindfs=bindfs)
72
+
73
+ def install_desktop_entry(self, force: bool = False) -> Dict[str, Any]:
74
+ """Install desktop entry or app bundle for GUI integration.
75
+
76
+ On Linux: Creates .desktop file
77
+ On macOS: Creates .app bundle
78
+
79
+ Args:
80
+ force: Overwrite existing entry
81
+
82
+ Returns:
83
+ Dict with success, message, path, details
84
+ """
85
+ return self.desktop_service.install_launcher(force=force)
86
+
87
+ def get_platform_info(self) -> Dict[str, Any]:
88
+ """Get platform information for debugging.
89
+
90
+ Returns:
91
+ Platform information including paths and capabilities
92
+ """
93
+ return self.info
94
+
95
+ def cleanup_old_installations(self, dry_run: bool = True) -> Dict[str, Any]:
96
+ """Clean up old WebTap installations.
97
+
98
+ Checks locations that webtap previously wrote to:
99
+ - ~/.config/webtap/extension/ (old extension location)
100
+ - ~/.local/bin/wrappers/google-chrome-stable (old wrapper location)
101
+ - ~/.local/share/applications/google-chrome.desktop (old desktop entry)
102
+ - ~/.config/google-chrome-debug (bindfs mount)
103
+
104
+ Args:
105
+ dry_run: If True, only report what would be done
106
+
107
+ Returns:
108
+ Dict with cleanup results
109
+ """
110
+ import shutil
111
+ import subprocess
112
+ from pathlib import Path
113
+
114
+ result = {}
115
+
116
+ # Check old extension location
117
+ old_extension_path = Path.home() / OLD_EXTENSION_PATH
118
+ if old_extension_path.exists():
119
+ # Calculate size
120
+ size = sum(f.stat().st_size for f in old_extension_path.rglob("*") if f.is_file())
121
+ size_str = SIZE_FORMAT_KB.format(size / KB_SIZE) if size > 0 else SIZE_FORMAT_EMPTY
122
+
123
+ result["old_extension"] = {"path": str(old_extension_path), "size": size_str, "removed": False}
124
+
125
+ if not dry_run:
126
+ try:
127
+ shutil.rmtree(old_extension_path)
128
+ result["old_extension"]["removed"] = True
129
+ # Also try to remove parent if empty
130
+ parent = old_extension_path.parent
131
+ if parent.exists() and not any(parent.iterdir()):
132
+ parent.rmdir()
133
+ except Exception as e:
134
+ result["old_extension"]["error"] = str(e)
135
+
136
+ # Check old Chrome wrapper location
137
+ old_wrapper_path = Path.home() / OLD_WRAPPER_PATH
138
+ if old_wrapper_path.exists():
139
+ result["old_wrapper"] = {"path": str(old_wrapper_path), "removed": False}
140
+
141
+ if not dry_run:
142
+ try:
143
+ old_wrapper_path.unlink()
144
+ result["old_wrapper"]["removed"] = True
145
+ # Try to remove wrappers dir if empty (but keep it if other wrappers exist)
146
+ wrappers_dir = old_wrapper_path.parent
147
+ if wrappers_dir.exists() and not any(wrappers_dir.iterdir()):
148
+ wrappers_dir.rmdir()
149
+ except Exception as e:
150
+ result["old_wrapper"]["error"] = str(e)
151
+
152
+ # Check old desktop entry
153
+ old_desktop_path = Path.home() / OLD_DESKTOP_PATH
154
+ if old_desktop_path.exists():
155
+ # Check if it's our override (contains reference to wrapper)
156
+ try:
157
+ content = old_desktop_path.read_text()
158
+ wrapper_ref = f"{WRAPPERS_DIR}/{GOOGLE_CHROME_STABLE}"
159
+ if wrapper_ref in content or APP_NAME in content.lower():
160
+ result["old_desktop"] = {"path": str(old_desktop_path), "removed": False}
161
+
162
+ if not dry_run:
163
+ try:
164
+ old_desktop_path.unlink()
165
+ result["old_desktop"]["removed"] = True
166
+ except Exception as e:
167
+ result["old_desktop"]["error"] = str(e)
168
+ except Exception:
169
+ pass # If we can't read it, skip it
170
+
171
+ # Check for bindfs mount
172
+ debug_dir = Path.home() / OLD_DEBUG_DIR
173
+ if debug_dir.exists():
174
+ try:
175
+ # Check if it's a mount point
176
+ output = subprocess.run([MOUNTPOINT_CMD, MOUNTPOINT_CHECK_FLAG, str(debug_dir)], capture_output=True)
177
+ if output.returncode == 0:
178
+ result["bindfs_mount"] = str(debug_dir)
179
+ except (FileNotFoundError, OSError):
180
+ pass # mountpoint command might not exist
181
+
182
+ return result
183
+
184
+
185
+ __all__ = ["SetupService"]