PyPI - webtap-tool - Versions diffs - 0.1.1__py3-none-any.whl - Mend

webtap-tool 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of webtap-tool might be problematic. Click here for more details.

Files changed (43) hide show

webtap/VISION.md +234 -0
webtap/__init__.py +56 -0
webtap/api.py +222 -0
webtap/app.py +76 -0
webtap/cdp/README.md +268 -0
webtap/cdp/__init__.py +14 -0
webtap/cdp/query.py +107 -0
webtap/cdp/schema/README.md +41 -0
webtap/cdp/schema/cdp_protocol.json +32785 -0
webtap/cdp/schema/cdp_version.json +8 -0
webtap/cdp/session.py +365 -0
webtap/commands/DEVELOPER_GUIDE.md +314 -0
webtap/commands/TIPS.md +153 -0
webtap/commands/__init__.py +7 -0
webtap/commands/_builders.py +127 -0
webtap/commands/_errors.py +108 -0
webtap/commands/_tips.py +147 -0
webtap/commands/_utils.py +227 -0
webtap/commands/body.py +161 -0
webtap/commands/connection.py +168 -0
webtap/commands/console.py +69 -0
webtap/commands/events.py +109 -0
webtap/commands/fetch.py +219 -0
webtap/commands/filters.py +224 -0
webtap/commands/inspect.py +146 -0
webtap/commands/javascript.py +87 -0
webtap/commands/launch.py +86 -0
webtap/commands/navigation.py +199 -0
webtap/commands/network.py +85 -0
webtap/commands/setup.py +127 -0
webtap/filters.py +289 -0
webtap/services/README.md +83 -0
webtap/services/__init__.py +15 -0
webtap/services/body.py +113 -0
webtap/services/console.py +116 -0
webtap/services/fetch.py +397 -0
webtap/services/main.py +175 -0
webtap/services/network.py +105 -0
webtap/services/setup.py +219 -0
webtap_tool-0.1.1.dist-info/METADATA +427 -0
webtap_tool-0.1.1.dist-info/RECORD +43 -0
webtap_tool-0.1.1.dist-info/WHEEL +4 -0
webtap_tool-0.1.1.dist-info/entry_points.txt +2 -0

webtap/commands/_utils.py ADDED Viewed

@@ -0,0 +1,227 @@
+"""Shared utilities for WebTap command modules."""
+import ast
+import json
+import sys
+from io import StringIO
+from typing import Any, Tuple
+def evaluate_expression(expr: str, namespace: dict) -> Tuple[Any, str]:
+    """Execute Python code and capture both stdout and the last expression result.
+    Args:
+        expr: Python code to execute.
+        namespace: Dict of variables available to the code.
+    """
+    # Standard libraries - always available
+    import re
+    import base64
+    import hashlib
+    import html
+    import urllib.parse
+    import datetime
+    import collections
+    import itertools
+    import pprint
+    import textwrap
+    import difflib
+    import xml.etree.ElementTree as ElementTree
+    # Web scraping & parsing
+    from bs4 import BeautifulSoup
+    import lxml.etree
+    import lxml.html
+    # Reverse engineering essentials
+    import jwt
+    import yaml
+    import httpx
+    import cryptography.fernet
+    import cryptography.hazmat
+    from google.protobuf import json_format as protobuf_json
+    from google.protobuf import text_format as protobuf_text
+    import msgpack
+    # Update namespace with ALL libraries
+    namespace.update(
+        {
+            # Standard
+            "re": re,
+            "json": json,  # Already imported at module level
+            "base64": base64,
+            "hashlib": hashlib,
+            "html": html,
+            "urllib": urllib,
+            "datetime": datetime,
+            "collections": collections,
+            "itertools": itertools,
+            "pprint": pprint,
+            "textwrap": textwrap,
+            "difflib": difflib,
+            "ast": ast,  # Already imported at module level
+            "ElementTree": ElementTree,
+            "ET": ElementTree,  # Common alias
+            # Web scraping
+            "BeautifulSoup": BeautifulSoup,
+            "bs4": BeautifulSoup,  # Alias
+            "lxml": lxml,
+            # Reverse engineering
+            "jwt": jwt,
+            "yaml": yaml,
+            "httpx": httpx,
+            "cryptography": cryptography,
+            "protobuf_json": protobuf_json,
+            "protobuf_text": protobuf_text,
+            "msgpack": msgpack,
+        }
+    )
+    # Capture stdout
+    old_stdout = sys.stdout
+    sys.stdout = captured_output = StringIO()
+    result = None
+    try:
+        # Parse the code to find if last node is an expression
+        tree = ast.parse(expr)
+        if tree.body:
+            # If last node is an Expression, evaluate it separately
+            if isinstance(tree.body[-1], ast.Expr):
+                # Execute all but the last node
+                if len(tree.body) > 1:
+                    exec_tree = ast.Module(body=tree.body[:-1], type_ignores=[])
+                    exec(compile(exec_tree, "<string>", "exec"), namespace)
+                # Evaluate the last expression
+                result = eval(compile(ast.Expression(body=tree.body[-1].value), "<string>", "eval"), namespace)
+            else:
+                # All statements, just exec everything
+                exec(compile(tree, "<string>", "exec"), namespace)
+    except SyntaxError:
+        # Fallback to simple exec if parsing fails
+        exec(expr, namespace)
+    finally:
+        # Always restore stdout
+        sys.stdout = old_stdout
+        output = captured_output.getvalue()
+    return result, output
+def format_expression_result(result: Any, output: str, max_length: int = 2000) -> str:
+    """Format the result of an expression evaluation for display.
+    Args:
+        result: The evaluation result.
+        output: Any stdout output captured.
+        max_length: Maximum length before truncation.
+    """
+    parts = []
+    if output:
+        parts.append(output.rstrip())
+    if result is not None:
+        if isinstance(result, (dict, list)):
+            formatted = json.dumps(result, indent=2)
+            if len(formatted) > max_length:
+                parts.append(formatted[:max_length] + f"\n... [truncated, {len(formatted)} chars total]")
+            else:
+                parts.append(formatted)
+        elif isinstance(result, str) and len(result) > max_length:
+            parts.append(result[:max_length] + f"\n... [truncated, {len(result)} chars total]")
+        else:
+            parts.append(str(result))
+    return "\n".join(parts) if parts else "(no output)"
+# ============= MCP Dict Parameter Utilities =============
+# ============= MCP Dict Parameter Utilities =============
+def parse_options(options: dict = None, defaults: dict = None) -> dict:  # pyright: ignore[reportArgumentType]
+    """Parse options dict with defaults.
+    Args:
+        options: User-provided options dict.
+        defaults: Default values dict.
+    """
+    if defaults is None:
+        defaults = {}
+    if options is None:
+        return defaults.copy()
+    result = defaults.copy()
+    result.update(options)
+    return result
+def extract_option(options: dict, key: str, default=None, required: bool = False):
+    """Extract single option from dict with validation.
+    Args:
+        options: Options dict to extract from.
+        key: Key to extract.
+        default: Default value if not found.
+        required: Whether the key is required.
+    """
+    if options is None:
+        if required:
+            raise ValueError(f"Required option '{key}' not provided")
+        return default
+    if required and key not in options:
+        raise ValueError(f"Required option '{key}' not provided")
+    return options.get(key, default)
+def validate_dict_keys(options: dict, allowed: set, required: set = None) -> dict:  # pyright: ignore[reportArgumentType]
+    """Validate dict has only allowed keys and all required keys.
+    Args:
+        options: Dict to validate.
+        allowed: Set of allowed keys.
+        required: Optional set of required keys.
+    """
+    if options is None:
+        options = {}
+    # Check for unknown keys
+    unknown = set(options.keys()) - allowed
+    if unknown:
+        raise ValueError(f"Unknown options: {', '.join(sorted(unknown))}")
+    # Check for required keys
+    if required:
+        missing = required - set(options.keys())
+        if missing:
+            raise ValueError(f"Missing required options: {', '.join(sorted(missing))}")
+    return options
+def extract_nested(options: dict, path: str, default=None):
+    """Extract nested value from dict using dot notation.
+    Args:
+        options: Dict to extract from.
+        path: Dot-separated path.
+        default: Default value if path not found.
+    """
+    if options is None:
+        return default
+    current = options
+    for key in path.split("."):
+        if not isinstance(current, dict):
+            return default
+        current = current.get(key)
+        if current is None:
+            return default
+    return current

webtap/commands/body.py ADDED Viewed

@@ -0,0 +1,161 @@
+"""HTTP response body inspection and analysis commands."""
+import json
+from webtap.app import app
+from webtap.commands._utils import evaluate_expression, format_expression_result
+from webtap.commands._errors import check_connection
+from webtap.commands._builders import info_response, error_response
+from webtap.commands._tips import get_mcp_description
+mcp_desc = get_mcp_description("body")
+@app.command(display="markdown", fastmcp={"type": "tool", "description": mcp_desc} if mcp_desc else {"type": "tool"})
+def body(state, response: int, expr: str = None, decode: bool = True, cache: bool = True) -> dict:  # pyright: ignore[reportArgumentType]
+    """Fetch and analyze response body with Python expressions.
+    Args:
+        response: Response row ID from network() or requests()
+        expr: Optional Python expression with 'body' variable
+        decode: Auto-decode base64 (default: True)
+        cache: Use cached body (default: True)
+    Returns:
+        Body content or expression result
+    """
+    if error := check_connection(state):
+        return error
+    # Get body from service (with optional caching)
+    body_service = state.service.body
+    result = body_service.get_response_body(response, use_cache=cache)
+    if "error" in result:
+        return error_response(result["error"])
+    body_content = result.get("body", "")
+    is_base64 = result.get("base64Encoded", False)
+    # Handle base64 decoding if requested
+    if is_base64 and decode:
+        decoded = body_service.decode_body(body_content, is_base64)
+        if isinstance(decoded, bytes):
+            # Binary content - can't show directly
+            if not expr:
+                return info_response(
+                    title="Response Body",
+                    fields={
+                        "Type": "Binary content",
+                        "Size (base64)": f"{len(body_content)} bytes",
+                        "Size (decoded)": f"{len(decoded)} bytes",
+                    },
+                )
+            # For expressions, provide the bytes
+            body_content = decoded
+        else:
+            # Successfully decoded to text
+            body_content = decoded
+    # No expression - return the body directly
+    if not expr:
+        if isinstance(body_content, bytes):
+            return info_response(
+                title="Response Body", fields={"Type": "Binary content", "Size": f"{len(body_content)} bytes"}
+            )
+        # Build markdown response with body in code block
+        # DATA-LEVEL TRUNCATION for memory/performance (as per refactor plan)
+        MAX_BODY_SIZE = 5000  # Keep data-level truncation for large bodies
+        elements = [{"type": "heading", "content": "Response Body", "level": 2}]
+        # Try to detect content type and format appropriately
+        content_preview = body_content[:100]
+        if content_preview.strip().startswith("{") or content_preview.strip().startswith("["):
+            # Likely JSON
+            try:
+                parsed = json.loads(body_content)
+                formatted = json.dumps(parsed, indent=2)
+                if len(formatted) > MAX_BODY_SIZE:
+                    elements.append({"type": "code_block", "content": formatted[:MAX_BODY_SIZE], "language": "json"})
+                    elements.append(
+                        {"type": "text", "content": f"_[truncated at {MAX_BODY_SIZE} chars, {len(formatted)} total]_"}
+                    )
+                else:
+                    elements.append({"type": "code_block", "content": formatted, "language": "json"})
+            except (json.JSONDecodeError, ValueError):
+                # Not valid JSON, show as text
+                if len(body_content) > MAX_BODY_SIZE:
+                    elements.append({"type": "code_block", "content": body_content[:MAX_BODY_SIZE], "language": ""})
+                    elements.append(
+                        {
+                            "type": "text",
+                            "content": f"_[truncated at {MAX_BODY_SIZE} chars, {len(body_content)} total]_",
+                        }
+                    )
+                else:
+                    elements.append({"type": "code_block", "content": body_content, "language": ""})
+        elif content_preview.strip().startswith("<"):
+            # Likely HTML/XML
+            if len(body_content) > MAX_BODY_SIZE:
+                elements.append({"type": "code_block", "content": body_content[:MAX_BODY_SIZE], "language": "html"})
+                elements.append(
+                    {"type": "text", "content": f"_[truncated at {MAX_BODY_SIZE} chars, {len(body_content)} total]_"}
+                )
+            else:
+                elements.append({"type": "code_block", "content": body_content, "language": "html"})
+        else:
+            # Plain text or unknown
+            if len(body_content) > MAX_BODY_SIZE:
+                elements.append({"type": "code_block", "content": body_content[:MAX_BODY_SIZE], "language": ""})
+                elements.append(
+                    {"type": "text", "content": f"_[truncated at {MAX_BODY_SIZE} chars, {len(body_content)} total]_"}
+                )
+            else:
+                elements.append({"type": "code_block", "content": body_content, "language": ""})
+        elements.append({"type": "text", "content": f"\n**Size:** {len(body_content)} characters"})
+        return {"elements": elements}
+    # Evaluate expression with body available
+    try:
+        namespace = {"body": body_content}
+        result, output = evaluate_expression(expr, namespace)
+        formatted_result = format_expression_result(result, output)
+        # Build markdown response
+        return {
+            "elements": [
+                {"type": "heading", "content": "Expression Result", "level": 2},
+                {"type": "code_block", "content": expr, "language": "python"},
+                {"type": "text", "content": "**Result:**"},
+                {"type": "code_block", "content": formatted_result, "language": ""},
+            ]
+        }
+    except Exception as e:
+        # Provide helpful suggestions based on the error type
+        suggestions = ["The body is available as 'body' variable"]
+        if "NameError" in str(type(e).__name__):
+            suggestions.extend(
+                [
+                    "Common libraries are pre-imported: re, json, bs4, jwt, httpx",
+                    "Example: bs4(body, 'html.parser').find('title')",
+                ]
+            )
+        elif "JSONDecodeError" in str(e):
+            suggestions.extend(
+                [
+                    "Body might not be valid JSON. Try: type(body) to check",
+                    "For HTML, use: bs4(body, 'html.parser')",
+                ]
+            )
+        elif "KeyError" in str(e):
+            suggestions.extend(
+                [
+                    "Key not found. Try: json.loads(body).keys() to see available keys",
+                    "Use .get() for safe access: data.get('key', 'default')",
+                ]
+            )
+        return error_response(f"{type(e).__name__}: {e}", suggestions=suggestions)

webtap/commands/connection.py ADDED Viewed

@@ -0,0 +1,168 @@
+"""Chrome browser connection management commands."""
+from webtap.app import app
+from webtap.commands._errors import check_connection
+from webtap.commands._builders import info_response, table_response, error_response
+@app.command(display="markdown", fastmcp={"type": "tool"})
+def connect(state, page: int = None, page_id: str = None) -> dict:  # pyright: ignore[reportArgumentType]
+    """Connect to Chrome page and enable all required domains.
+    Args:
+        page: Connect by page index (0-based)
+        page_id: Connect by page ID
+    Note: If neither is specified, connects to first available page.
+          Cannot specify both page and page_id.
+    Examples:
+        connect()                    # First page
+        connect(page=2)             # Third page (0-indexed)
+        connect(page_id="xyz")      # Specific page ID
+    Returns:
+        Connection status in markdown
+    """
+    if page is not None and page_id is not None:
+        return error_response("Cannot specify both 'page' and 'page_id'. Use one or the other.")
+    result = state.service.connect_to_page(page_index=page, page_id=page_id)
+    if "error" in result:
+        return error_response(result["error"])
+    # Success - return formatted info with full URL
+    return info_response(
+        title="Connection Established",
+        fields={"Page": result["title"], "URL": result["url"]},  # Full URL
+    )
+@app.command(display="markdown", fastmcp={"type": "tool"})
+def disconnect(state) -> dict:
+    """Disconnect from Chrome."""
+    result = state.service.disconnect()
+    if not result["was_connected"]:
+        return info_response(title="Disconnect Status", fields={"Status": "Not connected"})
+    return info_response(title="Disconnect Status", fields={"Status": "Disconnected"})
+@app.command(display="markdown", fastmcp={"type": "tool"})
+def clear(state, events: bool = True, console: bool = False, cache: bool = False) -> dict:
+    """Clear various data stores.
+    Args:
+        events: Clear CDP events (default: True)
+        console: Clear console messages (default: False)
+        cache: Clear body cache (default: False)
+    Examples:
+        clear()                                    # Clear events only
+        clear(events=True, console=True)          # Clear events and console
+        clear(cache=True)                          # Clear cache only
+        clear(events=False, console=True)         # Console only
+        clear(events=True, console=True, cache=True)  # Clear everything
+    Returns:
+        Summary of what was cleared
+    """
+    cleared = []
+    # Clear CDP events
+    if events:
+        state.service.clear_events()
+        cleared.append("events")
+    # Clear browser console
+    if console:
+        if state.cdp and state.cdp.is_connected:
+            if state.service.console.clear_browser_console():
+                cleared.append("console")
+        else:
+            cleared.append("console (not connected)")
+    # Clear body cache
+    if cache:
+        if hasattr(state.service, "body") and state.service.body:
+            count = state.service.body.clear_cache()
+            cleared.append(f"cache ({count} bodies)")
+        else:
+            cleared.append("cache (0 bodies)")
+    # Return summary
+    if not cleared:
+        return info_response(
+            title="Clear Status",
+            fields={"Result": "Nothing to clear (specify events=True, console=True, or cache=True)"},
+        )
+    return info_response(title="Clear Status", fields={"Cleared": ", ".join(cleared)})
+@app.command(
+    display="markdown",
+    truncate={
+        "Title": {"max": 20, "mode": "end"},
+        "URL": {"max": 30, "mode": "middle"},
+        "ID": {"max": 6, "mode": "end"},
+    },
+    fastmcp={"type": "resource", "mime_type": "application/json"},
+)
+def pages(state) -> dict:
+    """List available Chrome pages.
+    Returns:
+        Table of available pages in markdown
+    """
+    result = state.service.list_pages()
+    pages_list = result.get("pages", [])
+    # Format rows for table with FULL data
+    rows = [
+        {
+            "Index": str(i),
+            "Title": p.get("title", "Untitled"),  # Full title
+            "URL": p.get("url", ""),  # Full URL
+            "ID": p.get("id", ""),  # Full ID
+            "Connected": "Yes" if p.get("is_connected") else "No",
+        }
+        for i, p in enumerate(pages_list)
+    ]
+    # Build markdown response
+    return table_response(
+        title="Chrome Pages",
+        headers=["Index", "Title", "URL", "ID", "Connected"],
+        rows=rows,
+        summary=f"{len(pages_list)} page{'s' if len(pages_list) != 1 else ''} available",
+    )
+@app.command(display="markdown", fastmcp={"type": "resource", "mime_type": "application/json"})
+def status(state) -> dict:
+    """Get connection status.
+    Returns:
+        Status information in markdown
+    """
+    # Check connection - return error dict if not connected
+    if error := check_connection(state):
+        return error
+    status = state.service.get_status()
+    # Build formatted response with full URL
+    return info_response(
+        title="Connection Status",
+        fields={
+            "Page": status.get("title", "Unknown"),
+            "URL": status.get("url", ""),  # Full URL
+            "Events": f"{status['events']} stored",
+            "Fetch": "Enabled" if status["fetch_enabled"] else "Disabled",
+            "Domains": ", ".join(status["enabled_domains"]),
+        },
+    )

webtap/commands/console.py ADDED Viewed

@@ -0,0 +1,69 @@
+"""Browser console message monitoring and display commands."""
+from webtap.app import app
+from webtap.commands._builders import table_response
+from webtap.commands._errors import check_connection
+from webtap.commands._tips import get_tips
+@app.command(
+    display="markdown",
+    truncate={"Message": {"max": 80, "mode": "end"}},
+    transforms={"Time": "format_timestamp"},
+    fastmcp={"type": "resource", "mime_type": "application/json"},
+)
+def console(state, limit: int = 50) -> dict:
+    """Show console messages with full data.
+    Args:
+        limit: Max results (default: 50)
+    Examples:
+        console()           # Recent console messages
+        console(limit=100)  # Show more messages
+    Returns:
+        Table of console messages with full data
+    """
+    # Check connection
+    if error := check_connection(state):
+        return error
+    # Get data from service
+    results = state.service.console.get_recent_messages(limit=limit)
+    # Build rows with FULL data
+    rows = []
+    for row in results:
+        rowid, level, source, message, timestamp = row
+        rows.append(
+            {
+                "ID": str(rowid),
+                "Level": (level or "LOG").upper(),
+                "Source": source or "console",
+                "Message": message or "",  # Full message
+                "Time": timestamp or 0,  # Raw timestamp for transform
+            }
+        )
+    # Build response
+    warnings = []
+    if limit and len(results) == limit:
+        warnings.append(f"Showing first {limit} messages (use limit parameter to see more)")
+    # Get contextual tips from TIPS.md
+    tips = None
+    if rows:
+        # Focus on error/warning messages for debugging
+        error_rows = [r for r in rows if r.get("Level", "").upper() in ["ERROR", "WARN", "WARNING"]]
+        example_id = error_rows[0]["ID"] if error_rows else rows[0]["ID"]
+        tips = get_tips("console", context={"id": example_id})
+    return table_response(
+        title="Console Messages",
+        headers=["ID", "Level", "Source", "Message", "Time"],
+        rows=rows,
+        summary=f"{len(rows)} messages",
+        warnings=warnings,
+        tips=tips,
+    )