webtap-tool 0.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. webtap/VISION.md +246 -0
  2. webtap/__init__.py +84 -0
  3. webtap/__main__.py +6 -0
  4. webtap/api/__init__.py +9 -0
  5. webtap/api/app.py +26 -0
  6. webtap/api/models.py +69 -0
  7. webtap/api/server.py +111 -0
  8. webtap/api/sse.py +182 -0
  9. webtap/api/state.py +89 -0
  10. webtap/app.py +79 -0
  11. webtap/cdp/README.md +275 -0
  12. webtap/cdp/__init__.py +12 -0
  13. webtap/cdp/har.py +302 -0
  14. webtap/cdp/schema/README.md +41 -0
  15. webtap/cdp/schema/cdp_protocol.json +32785 -0
  16. webtap/cdp/schema/cdp_version.json +8 -0
  17. webtap/cdp/session.py +667 -0
  18. webtap/client.py +81 -0
  19. webtap/commands/DEVELOPER_GUIDE.md +401 -0
  20. webtap/commands/TIPS.md +269 -0
  21. webtap/commands/__init__.py +29 -0
  22. webtap/commands/_builders.py +331 -0
  23. webtap/commands/_code_generation.py +110 -0
  24. webtap/commands/_tips.py +147 -0
  25. webtap/commands/_utils.py +273 -0
  26. webtap/commands/connection.py +220 -0
  27. webtap/commands/console.py +87 -0
  28. webtap/commands/fetch.py +310 -0
  29. webtap/commands/filters.py +116 -0
  30. webtap/commands/javascript.py +73 -0
  31. webtap/commands/js_export.py +73 -0
  32. webtap/commands/launch.py +72 -0
  33. webtap/commands/navigation.py +197 -0
  34. webtap/commands/network.py +136 -0
  35. webtap/commands/quicktype.py +306 -0
  36. webtap/commands/request.py +93 -0
  37. webtap/commands/selections.py +138 -0
  38. webtap/commands/setup.py +219 -0
  39. webtap/commands/to_model.py +163 -0
  40. webtap/daemon.py +185 -0
  41. webtap/daemon_state.py +53 -0
  42. webtap/filters.py +219 -0
  43. webtap/rpc/__init__.py +14 -0
  44. webtap/rpc/errors.py +49 -0
  45. webtap/rpc/framework.py +223 -0
  46. webtap/rpc/handlers.py +625 -0
  47. webtap/rpc/machine.py +84 -0
  48. webtap/services/README.md +83 -0
  49. webtap/services/__init__.py +15 -0
  50. webtap/services/console.py +124 -0
  51. webtap/services/dom.py +547 -0
  52. webtap/services/fetch.py +415 -0
  53. webtap/services/main.py +392 -0
  54. webtap/services/network.py +401 -0
  55. webtap/services/setup/__init__.py +185 -0
  56. webtap/services/setup/chrome.py +233 -0
  57. webtap/services/setup/desktop.py +255 -0
  58. webtap/services/setup/extension.py +147 -0
  59. webtap/services/setup/platform.py +162 -0
  60. webtap/services/state_snapshot.py +86 -0
  61. webtap_tool-0.11.0.dist-info/METADATA +535 -0
  62. webtap_tool-0.11.0.dist-info/RECORD +64 -0
  63. webtap_tool-0.11.0.dist-info/WHEEL +4 -0
  64. webtap_tool-0.11.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,110 @@
1
+ """Code generation utilities for transforming HTTP bodies into code.
2
+
3
+ Pure transformation functions with no dependencies on services or state.
4
+ Used by to_model(), quicktype(), and future code generation commands.
5
+ """
6
+
7
+ import json
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+
12
+ def parse_json(content: str) -> tuple[Any, str | None]:
13
+ """Parse JSON string into Python object.
14
+
15
+ Args:
16
+ content: JSON string to parse.
17
+
18
+ Returns:
19
+ Tuple of (parsed_data, error_message).
20
+ On success: (data, None)
21
+ On failure: (None, error_string)
22
+
23
+ Examples:
24
+ data, error = parse_json('{"key": "value"}')
25
+ if error:
26
+ return error_response(error)
27
+ """
28
+ try:
29
+ return json.loads(content), None
30
+ except json.JSONDecodeError as e:
31
+ return None, f"Invalid JSON: {e}"
32
+
33
+
34
+ def extract_json_path(data: Any, path: str) -> tuple[Any, str | None]:
35
+ """Extract nested data using simple bracket notation.
36
+
37
+ Supports paths like "data[0]", "results.users", or "data[0].items".
38
+
39
+ Args:
40
+ data: Dict or list to extract from.
41
+ path: Path using dot and bracket notation.
42
+
43
+ Returns:
44
+ Tuple of (extracted_data, error_message).
45
+ On success: (data, None)
46
+ On failure: (None, error_string)
47
+
48
+ Examples:
49
+ result, err = extract_json_path({"data": [1,2,3]}, "data[0]")
50
+ # result = 1, err = None
51
+
52
+ result, err = extract_json_path({"user": {"name": "Bob"}}, "user.name")
53
+ # result = "Bob", err = None
54
+ """
55
+ try:
56
+ parts = path.replace("[", ".").replace("]", "").split(".")
57
+ result = data
58
+ for part in parts:
59
+ if part:
60
+ if part.isdigit():
61
+ result = result[int(part)]
62
+ else:
63
+ result = result[part]
64
+ return result, None
65
+ except (KeyError, IndexError, TypeError) as e:
66
+ return None, f"JSON path '{path}' not found: {e}"
67
+
68
+
69
+ def validate_generation_data(data: Any) -> tuple[bool, str | None]:
70
+ """Validate data structure for code generation.
71
+
72
+ Code generators (Pydantic, quicktype) require dict or list structures.
73
+
74
+ Args:
75
+ data: Data to validate.
76
+
77
+ Returns:
78
+ Tuple of (is_valid, error_message).
79
+ On success: (True, None)
80
+ On failure: (False, error_string)
81
+
82
+ Examples:
83
+ is_valid, error = validate_generation_data({"key": "value"})
84
+ # is_valid = True, error = None
85
+
86
+ is_valid, error = validate_generation_data("string")
87
+ # is_valid = False, error = "Data is str, not dict or list"
88
+ """
89
+ if not isinstance(data, (dict, list)):
90
+ return False, f"Data is {type(data).__name__}, not dict or list"
91
+ return True, None
92
+
93
+
94
+ def ensure_output_directory(output: str) -> Path:
95
+ """Create output directory if needed, return resolved path.
96
+
97
+ Args:
98
+ output: Output file path (can be relative, use ~, etc.).
99
+
100
+ Returns:
101
+ Resolved absolute Path object.
102
+
103
+ Examples:
104
+ path = ensure_output_directory("~/models/user.py")
105
+ # Creates ~/models/ if it doesn't exist
106
+ # Returns Path("/home/user/models/user.py")
107
+ """
108
+ output_path = Path(output).expanduser().resolve()
109
+ output_path.parent.mkdir(parents=True, exist_ok=True)
110
+ return output_path
@@ -0,0 +1,147 @@
1
+ """Parser for TIPS.md documentation.
2
+
3
+ This module reads TIPS.md and provides:
4
+ - MCP descriptions for commands
5
+ - Developer tips for command responses
6
+ - Pre-imported libraries documentation
7
+ """
8
+
9
+ import re
10
+ from pathlib import Path
11
+ from typing import Dict, List, Optional
12
+
13
+
14
+ class TipsParser:
15
+ """Parse TIPS.md for command documentation."""
16
+
17
+ def __init__(self):
18
+ # TIPS.md is in the same directory as this file
19
+ self.tips_path = Path(__file__).parent / "TIPS.md"
20
+ self.content = self.tips_path.read_text() if self.tips_path.exists() else ""
21
+ self._cache = {}
22
+
23
+ def _get_libraries(self) -> str:
24
+ """Extract the libraries section."""
25
+ if "libraries" not in self._cache:
26
+ match = re.search(r"## Libraries\n(.*?)(?=\n##)", self.content, re.DOTALL)
27
+ self._cache["libraries"] = match.group(1).strip() if match else ""
28
+ return self._cache["libraries"]
29
+
30
+ def _get_command_section(self, command: str) -> Optional[str]:
31
+ """Get the full section for a command."""
32
+ # Simple and explicit - look for the exact command name
33
+ # Use negative lookahead to ensure we match ### but not ####
34
+ pattern = rf"### {re.escape(command)}\n(.*?)(?=\n###(?!#)|\Z)"
35
+ match = re.search(pattern, self.content, re.DOTALL)
36
+ return match.group(1).strip() if match else None
37
+
38
+ def _get_description(self, command: str) -> Optional[str]:
39
+ """Get command description (text before #### sections)."""
40
+ section = self._get_command_section(command)
41
+ if not section:
42
+ return None
43
+
44
+ # Extract text before first #### heading
45
+ match = re.match(r"(.*?)(?=\n####|\Z)", section, re.DOTALL)
46
+ return match.group(1).strip() if match else ""
47
+
48
+ def _get_examples(self, command: str) -> Optional[str]:
49
+ """Get examples section for a command."""
50
+ section = self._get_command_section(command)
51
+ if not section:
52
+ return None
53
+
54
+ # Extract Examples section
55
+ match = re.search(r"#### Examples\n```python\n(.*?)\n```", section, re.DOTALL)
56
+ return match.group(1).strip() if match else None
57
+
58
+ def _get_tips(self, command: str, context: Optional[Dict] = None) -> Optional[List[str]]:
59
+ """Get tips list for a command."""
60
+ section = self._get_command_section(command)
61
+ if not section:
62
+ return None
63
+
64
+ # Extract Tips section
65
+ match = re.search(r"#### Tips\n(.*?)(?=\n###|\n##|\Z)", section, re.DOTALL)
66
+ if not match:
67
+ return None
68
+
69
+ tips_text = match.group(1)
70
+ # Parse bullet points
71
+ tips = re.findall(r"^- (.+)$", tips_text, re.MULTILINE)
72
+
73
+ # Apply context substitutions
74
+ if context and tips:
75
+ formatted_tips = []
76
+ for tip in tips:
77
+ for key, value in context.items():
78
+ tip = tip.replace(f"{{{key}}}", str(value))
79
+ formatted_tips.append(tip)
80
+ return formatted_tips
81
+
82
+ return tips
83
+
84
+ def _get_mcp_description(self, command: str) -> Optional[str]:
85
+ """Build MCP description from markdown."""
86
+ description = self._get_description(command)
87
+ if not description:
88
+ return None
89
+
90
+ # Build complete MCP description
91
+ parts = [description]
92
+
93
+ # Add libraries section for commands with Python expression support
94
+ if command in ["request", "to_model", "quicktype", "selections"]:
95
+ parts.append("")
96
+ parts.append(self._get_libraries())
97
+
98
+ # Add examples if available
99
+ examples = self._get_examples(command)
100
+ if examples:
101
+ parts.append("")
102
+ parts.append("Examples:")
103
+ # Indent examples
104
+ for line in examples.split("\n"):
105
+ parts.append(f" {line}" if line else "")
106
+
107
+ return "\n".join(parts)
108
+
109
+
110
+ # Global parser instance
111
+ parser = TipsParser()
112
+
113
+
114
+ # Public API
115
+ def get_mcp_description(command: str) -> Optional[str]:
116
+ """Get MCP description for a command from TIPS.md.
117
+
118
+ Args:
119
+ command: Name of the command.
120
+ """
121
+ return parser._get_mcp_description(command)
122
+
123
+
124
+ def get_tips(command: str, context: Optional[Dict] = None) -> Optional[List[str]]:
125
+ """Get developer tips for a command from TIPS.md.
126
+
127
+ Args:
128
+ command: Name of the command.
129
+ context: Optional context for variable substitution.
130
+ """
131
+ return parser._get_tips(command, context)
132
+
133
+
134
+ def get_all_tips() -> Dict[str, List[str]]:
135
+ """Get all available tips from TIPS.md."""
136
+ all_tips = {}
137
+
138
+ # Find all command sections
139
+ pattern = r"### ([^\n]+)\n"
140
+ matches = re.findall(pattern, parser.content)
141
+
142
+ for command in matches:
143
+ tips = parser._get_tips(command)
144
+ if tips:
145
+ all_tips[command] = tips
146
+
147
+ return all_tips
@@ -0,0 +1,273 @@
1
+ """Shared utilities for WebTap command modules."""
2
+
3
+ import ast
4
+ import base64
5
+ import json
6
+ import sys
7
+ from io import StringIO
8
+ from typing import Any, Tuple
9
+
10
+
11
+ def evaluate_expression(expr: str, namespace: dict) -> Tuple[Any, str]:
12
+ """Execute Python code and capture both stdout and the last expression result.
13
+
14
+ Args:
15
+ expr: Python code to execute.
16
+ namespace: Dict of variables available to the code.
17
+ """
18
+ # Standard libraries - always available
19
+ import re
20
+ import base64
21
+ import hashlib
22
+ import html
23
+ import urllib.parse
24
+ import datetime
25
+ import collections
26
+ import itertools
27
+ import pprint
28
+ import textwrap
29
+ import difflib
30
+ import xml.etree.ElementTree as ElementTree
31
+
32
+ # Web scraping & parsing
33
+ from bs4 import BeautifulSoup
34
+ import lxml.etree
35
+ import lxml.html
36
+
37
+ # Reverse engineering essentials
38
+ import jwt
39
+ import yaml
40
+ import httpx
41
+ import cryptography.fernet
42
+ import cryptography.hazmat
43
+ from google.protobuf import json_format as protobuf_json
44
+ from google.protobuf import text_format as protobuf_text
45
+ import msgpack
46
+
47
+ # Update namespace with ALL libraries
48
+ namespace.update(
49
+ {
50
+ # Standard
51
+ "re": re,
52
+ "json": json, # Already imported at module level
53
+ "base64": base64,
54
+ "hashlib": hashlib,
55
+ "html": html,
56
+ "urllib": urllib,
57
+ "datetime": datetime,
58
+ "collections": collections,
59
+ "itertools": itertools,
60
+ "pprint": pprint,
61
+ "textwrap": textwrap,
62
+ "difflib": difflib,
63
+ "ast": ast, # Already imported at module level
64
+ "ElementTree": ElementTree,
65
+ "ET": ElementTree, # Common alias
66
+ # Web scraping
67
+ "BeautifulSoup": BeautifulSoup,
68
+ "bs4": BeautifulSoup, # Alias
69
+ "lxml": lxml,
70
+ # Reverse engineering
71
+ "jwt": jwt,
72
+ "yaml": yaml,
73
+ "httpx": httpx,
74
+ "cryptography": cryptography,
75
+ "protobuf_json": protobuf_json,
76
+ "protobuf_text": protobuf_text,
77
+ "msgpack": msgpack,
78
+ }
79
+ )
80
+
81
+ # Capture stdout
82
+ old_stdout = sys.stdout
83
+ sys.stdout = captured_output = StringIO()
84
+ result = None
85
+
86
+ try:
87
+ # Parse the code to find if last node is an expression
88
+ tree = ast.parse(expr)
89
+ if tree.body:
90
+ # If last node is an Expression, evaluate it separately
91
+ if isinstance(tree.body[-1], ast.Expr):
92
+ # Execute all but the last node
93
+ if len(tree.body) > 1:
94
+ exec_tree = ast.Module(body=tree.body[:-1], type_ignores=[])
95
+ exec(compile(exec_tree, "<string>", "exec"), namespace)
96
+ # Evaluate the last expression
97
+ result = eval(compile(ast.Expression(body=tree.body[-1].value), "<string>", "eval"), namespace)
98
+ else:
99
+ # All statements, just exec everything
100
+ exec(compile(tree, "<string>", "exec"), namespace)
101
+
102
+ except SyntaxError:
103
+ # Fallback to simple exec if parsing fails
104
+ exec(expr, namespace)
105
+ finally:
106
+ # Always restore stdout
107
+ sys.stdout = old_stdout
108
+ output = captured_output.getvalue()
109
+
110
+ return result, output
111
+
112
+
113
+ def format_expression_result(result: Any, output: str, max_length: int = 2000) -> str:
114
+ """Format the result of an expression evaluation for display.
115
+
116
+ Args:
117
+ result: The evaluation result.
118
+ output: Any stdout output captured.
119
+ max_length: Maximum length before truncation.
120
+ """
121
+ parts = []
122
+
123
+ if output:
124
+ parts.append(output.rstrip())
125
+
126
+ if result is not None:
127
+ if isinstance(result, (dict, list)):
128
+ formatted = json.dumps(result, indent=2)
129
+ if len(formatted) > max_length:
130
+ parts.append(formatted[:max_length] + f"\n... [truncated, {len(formatted)} chars total]")
131
+ else:
132
+ parts.append(formatted)
133
+ elif isinstance(result, str) and len(result) > max_length:
134
+ parts.append(result[:max_length] + f"\n... [truncated, {len(result)} chars total]")
135
+ else:
136
+ parts.append(str(result))
137
+
138
+ return "\n".join(parts) if parts else "(no output)"
139
+
140
+
141
+ # ============= MCP Dict Parameter Utilities =============
142
+
143
+
144
+ def parse_options(options: dict | None = None, defaults: dict | None = None) -> dict:
145
+ """Parse options dict with defaults.
146
+
147
+ Args:
148
+ options: User-provided options dict.
149
+ defaults: Default values dict.
150
+ """
151
+ if defaults is None:
152
+ defaults = {}
153
+ if options is None:
154
+ return defaults.copy()
155
+
156
+ result = defaults.copy()
157
+ result.update(options)
158
+ return result
159
+
160
+
161
+ def extract_option(options: dict | None, key: str, default: object = None, required: bool = False) -> object:
162
+ """Extract single option from dict with validation.
163
+
164
+ Args:
165
+ options: Options dict to extract from.
166
+ key: Key to extract.
167
+ default: Default value if not found.
168
+ required: Whether the key is required.
169
+ """
170
+ if options is None:
171
+ if required:
172
+ raise ValueError(f"Required option '{key}' not provided")
173
+ return default
174
+
175
+ if required and key not in options:
176
+ raise ValueError(f"Required option '{key}' not provided")
177
+
178
+ return options.get(key, default)
179
+
180
+
181
+ def validate_dict_keys(options: dict | None, allowed: set, required: set | None = None) -> dict:
182
+ """Validate dict has only allowed keys and all required keys.
183
+
184
+ Args:
185
+ options: Dict to validate.
186
+ allowed: Set of allowed keys.
187
+ required: Optional set of required keys.
188
+ """
189
+ if options is None:
190
+ options = {}
191
+
192
+ # Check for unknown keys
193
+ unknown = set(options.keys()) - allowed
194
+ if unknown:
195
+ raise ValueError(f"Unknown options: {', '.join(sorted(unknown))}")
196
+
197
+ # Check for required keys
198
+ if required:
199
+ missing = required - set(options.keys())
200
+ if missing:
201
+ raise ValueError(f"Missing required options: {', '.join(sorted(missing))}")
202
+
203
+ return options
204
+
205
+
206
+ def extract_nested(options: dict | None, path: str, default: object = None) -> object:
207
+ """Extract nested value from dict using dot notation.
208
+
209
+ Args:
210
+ options: Dict to extract from.
211
+ path: Dot-separated path.
212
+ default: Default value if path not found.
213
+ """
214
+ if options is None:
215
+ return default
216
+
217
+ current = options
218
+ for key in path.split("."):
219
+ if not isinstance(current, dict):
220
+ return default
221
+ current = current.get(key)
222
+ if current is None:
223
+ return default
224
+
225
+ return current
226
+
227
+
228
+ # ============= Body Content Utilities =============
229
+
230
+
231
+ def fetch_body_content(state, har_entry: dict, field: str) -> tuple[str | None, str | None]:
232
+ """Fetch body content based on field selector.
233
+
234
+ Args:
235
+ state: WebTap state with client (RPC client).
236
+ har_entry: HAR entry from request_details().
237
+ field: Field selector ("response.content" or "request.postData").
238
+
239
+ Returns:
240
+ Tuple of (body_content, error_message).
241
+ """
242
+ if field == "response.content":
243
+ request_id = har_entry.get("request_id")
244
+ if not request_id:
245
+ return None, "No request_id in HAR entry"
246
+
247
+ try:
248
+ cdp_result = state.client.call("cdp", command="Network.getResponseBody", params={"requestId": request_id})
249
+ result = cdp_result.get("result", {})
250
+ except Exception as e:
251
+ return None, f"Failed to fetch response body: {e}"
252
+
253
+ if not result:
254
+ return None, "Failed to fetch response body"
255
+
256
+ body = result.get("body", "")
257
+ if result.get("base64Encoded"):
258
+ try:
259
+ body = base64.b64decode(body).decode("utf-8")
260
+ except Exception as e:
261
+ return None, f"Failed to decode base64 body: {e}"
262
+
263
+ return body, None
264
+
265
+ elif field == "request.postData":
266
+ post_data = har_entry.get("request", {}).get("postData", {})
267
+ text = post_data.get("text")
268
+ if not text:
269
+ return None, "No POST data in request"
270
+ return text, None
271
+
272
+ else:
273
+ return None, f"Unknown field: {field}. Use 'response.content' or 'request.postData'"