PyPI - webtap-tool - Versions diffs - 0.1.1__py3-none-any.whl - Mend

webtap-tool 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of webtap-tool might be problematic. Click here for more details.

Files changed (43) hide show

webtap/VISION.md +234 -0
webtap/__init__.py +56 -0
webtap/api.py +222 -0
webtap/app.py +76 -0
webtap/cdp/README.md +268 -0
webtap/cdp/__init__.py +14 -0
webtap/cdp/query.py +107 -0
webtap/cdp/schema/README.md +41 -0
webtap/cdp/schema/cdp_protocol.json +32785 -0
webtap/cdp/schema/cdp_version.json +8 -0
webtap/cdp/session.py +365 -0
webtap/commands/DEVELOPER_GUIDE.md +314 -0
webtap/commands/TIPS.md +153 -0
webtap/commands/__init__.py +7 -0
webtap/commands/_builders.py +127 -0
webtap/commands/_errors.py +108 -0
webtap/commands/_tips.py +147 -0
webtap/commands/_utils.py +227 -0
webtap/commands/body.py +161 -0
webtap/commands/connection.py +168 -0
webtap/commands/console.py +69 -0
webtap/commands/events.py +109 -0
webtap/commands/fetch.py +219 -0
webtap/commands/filters.py +224 -0
webtap/commands/inspect.py +146 -0
webtap/commands/javascript.py +87 -0
webtap/commands/launch.py +86 -0
webtap/commands/navigation.py +199 -0
webtap/commands/network.py +85 -0
webtap/commands/setup.py +127 -0
webtap/filters.py +289 -0
webtap/services/README.md +83 -0
webtap/services/__init__.py +15 -0
webtap/services/body.py +113 -0
webtap/services/console.py +116 -0
webtap/services/fetch.py +397 -0
webtap/services/main.py +175 -0
webtap/services/network.py +105 -0
webtap/services/setup.py +219 -0
webtap_tool-0.1.1.dist-info/METADATA +427 -0
webtap_tool-0.1.1.dist-info/RECORD +43 -0
webtap_tool-0.1.1.dist-info/WHEEL +4 -0
webtap_tool-0.1.1.dist-info/entry_points.txt +2 -0

webtap/commands/setup.py ADDED Viewed

@@ -0,0 +1,127 @@
+"""Setup commands for WebTap components."""
+from webtap.app import app
+from webtap.services.setup import SetupService
+@app.command(
+    display="markdown",
+    typer={"name": "setup-filters", "help": "Download filter configuration from GitHub"},
+    fastmcp={"enabled": False},
+)
+def setup_filters(state, force: bool = False) -> dict:
+    """Download filter configuration to ./.webtap/filters.json.
+    Args:
+        force: Overwrite existing file (default: False)
+    Returns:
+        Markdown-formatted result with success/error messages
+    """
+    service = SetupService()
+    result = service.install_filters(force=force)
+    return _format_setup_result(result, "filters")
+@app.command(
+    display="markdown",
+    typer={"name": "setup-extension", "help": "Download Chrome extension from GitHub"},
+    fastmcp={"enabled": False},
+)
+def setup_extension(state, force: bool = False) -> dict:
+    """Download Chrome extension to ~/.config/webtap/extension/.
+    Args:
+        force: Overwrite existing files (default: False)
+    Returns:
+        Markdown-formatted result with success/error messages
+    """
+    service = SetupService()
+    result = service.install_extension(force=force)
+    return _format_setup_result(result, "extension")
+@app.command(
+    display="markdown",
+    typer={"name": "setup-chrome", "help": "Install Chrome wrapper script for debugging"},
+    fastmcp={"enabled": False},
+)
+def setup_chrome(state, force: bool = False) -> dict:
+    """Install Chrome wrapper to ~/.local/bin/wrappers/google-chrome-stable.
+    Args:
+        force: Overwrite existing script (default: False)
+    Returns:
+        Markdown-formatted result with success/error messages
+    """
+    service = SetupService()
+    result = service.install_chrome_wrapper(force=force)
+    return _format_setup_result(result, "chrome")
+def _format_setup_result(result: dict, component: str) -> dict:
+    """Format setup result as markdown."""
+    elements = []
+    # Main message as alert (using "message" key for consistency)
+    level = "success" if result["success"] else "error"
+    elements.append({"type": "alert", "message": result["message"], "level": level})
+    # Add details if present
+    if result.get("path"):
+        elements.append({"type": "text", "content": f"**Location:** `{result['path']}`"})
+    if result.get("details"):
+        elements.append({"type": "text", "content": f"**Details:** {result['details']}"})
+    # Component-specific next steps
+    if result["success"]:
+        if component == "filters":
+            elements.append({"type": "text", "content": "\n**Next steps:**"})
+            elements.append(
+                {
+                    "type": "list",
+                    "items": [
+                        "Run `filters('load')` to load the filters",
+                        "Run `filters()` to see loaded categories",
+                    ],
+                }
+            )
+        elif component == "extension":
+            elements.append({"type": "text", "content": "\n**To install in Chrome:**"})
+            elements.append(
+                {
+                    "type": "list",
+                    "items": [
+                        "Open chrome://extensions/",
+                        "Enable Developer mode",
+                        "Click 'Load unpacked'",
+                        f"Select {result['path']}",
+                    ],
+                }
+            )
+        elif component == "chrome":
+            if "Add to PATH" in result.get("details", ""):
+                elements.append({"type": "text", "content": "\n**Setup PATH:**"})
+                elements.append(
+                    {
+                        "type": "code_block",
+                        "language": "bash",
+                        "content": 'export PATH="$HOME/.local/bin/wrappers:$PATH"',
+                    }
+                )
+                elements.append({"type": "text", "content": "Add to ~/.bashrc to make permanent"})
+            else:
+                elements.append({"type": "text", "content": "\n**Usage:**"})
+                elements.append(
+                    {
+                        "type": "list",
+                        "items": [
+                            "Run `google-chrome-stable` to start Chrome with debugging",
+                            "Or use `run-chrome` command for direct launch",
+                        ],
+                    }
+                )
+    return {"elements": elements}

webtap/filters.py ADDED Viewed

@@ -0,0 +1,289 @@
+"""Network request filter management for WebTap.
+PUBLIC API:
+  - FilterManager: Main filter management class
+"""
+import json
+import logging
+from pathlib import Path
+from typing import Dict, List, Any
+logger = logging.getLogger(__name__)
+class FilterManager:
+    """Manages network request filters for noise reduction.
+    Provides filtering of CDP network events based on domain patterns and resource
+    types. Filters are organized into categories that can be enabled/disabled
+    independently. Supports wildcard patterns and generates SQL WHERE clauses
+    for efficient event filtering.
+    Attributes:
+        filter_path: Path to the filters.json file.
+        filters: Dict mapping category names to filter patterns.
+        enabled_categories: Set of currently enabled filter categories.
+    """
+    def __init__(self, filter_path: Path | None = None):
+        """Initialize filter manager.
+        Args:
+            filter_path: Path to filters.json file. Defaults to .webtap/filters.json.
+        """
+        self.filter_path = filter_path or (Path.cwd() / ".webtap" / "filters.json")
+        self.filters: Dict[str, Dict[str, List[str]]] = {}
+        self.enabled_categories: set[str] = set()
+    def load(self) -> bool:
+        """Load filters from disk.
+        Loads filter configuration from the JSON file and enables all categories
+        by default. Creates empty filter dict if file doesn't exist or fails to load.
+        Returns:
+            True if loaded successfully, False otherwise.
+        """
+        if self.filter_path.exists():
+            try:
+                with open(self.filter_path) as f:
+                    self.filters = json.load(f)
+                    # Enable all categories by default
+                    self.enabled_categories = set(self.filters.keys())
+                    logger.info(f"Loaded {len(self.filters)} filter categories from {self.filter_path}")
+                    return True
+            except Exception as e:
+                logger.error(f"Failed to load filters: {e}")
+                self.filters = {}
+                return False
+        else:
+            logger.info(f"No filters found at {self.filter_path}")
+            self.filters = {}
+            return False
+    def save(self) -> bool:
+        """Save current filters to disk.
+        Creates the parent directory if it doesn't exist and writes the filter
+        configuration as JSON with indentation.
+        Returns:
+            True if saved successfully, False on error.
+        """
+        try:
+            self.filter_path.parent.mkdir(parents=True, exist_ok=True)
+            with open(self.filter_path, "w") as f:
+                json.dump(self.filters, f, indent=2)
+            logger.info(f"Saved filters to {self.filter_path}")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to save filters: {e}")
+            return False
+    def add_pattern(self, pattern: str, category: str, pattern_type: str = "domain") -> bool:
+        """Add a filter pattern to a category.
+        Creates the category if it doesn't exist and enables it. Supports wildcard
+        patterns using * for matching. Patterns are deduplicated within categories.
+        Args:
+            pattern: Pattern to add (e.g., "*ads*", "googletagmanager.com").
+            category: Category name (e.g., "ads", "tracking").
+            pattern_type: "domain" or "type". Defaults to "domain".
+        Returns:
+            True if pattern was added, False if it already existed.
+        """
+        if category not in self.filters:
+            self.filters[category] = {"domains": [], "types": []}
+            self.enabled_categories.add(category)
+        key = "domains" if pattern_type == "domain" else "types"
+        if pattern not in self.filters[category][key]:
+            self.filters[category][key].append(pattern)
+            return True
+        return False
+    def remove_pattern(self, pattern: str, pattern_type: str = "domain") -> str:
+        """Remove a pattern from all categories.
+        Searches through all categories to find and remove the specified pattern.
+        Only removes the first occurrence found.
+        Args:
+            pattern: Pattern to remove.
+            pattern_type: "domain" or "type". Defaults to "domain".
+        Returns:
+            Category name it was removed from, or empty string if not found.
+        """
+        key = "domains" if pattern_type == "domain" else "types"
+        for category, filters in self.filters.items():
+            if pattern in filters.get(key, []):
+                filters[key].remove(pattern)
+                return category
+        return ""
+    def update_category(self, category: str, domains: List[str] | None = None, types: List[str] | None = None):
+        """Update or create a category with new patterns.
+        Creates the category if it doesn't exist and enables it. If patterns are
+        provided, they completely replace the existing patterns for that type.
+        Args:
+            category: Category name.
+            domains: List of domain patterns. None leaves existing unchanged.
+            types: List of type patterns. None leaves existing unchanged.
+        """
+        if category not in self.filters:
+            self.filters[category] = {"domains": [], "types": []}
+        if domains is not None:
+            self.filters[category]["domains"] = domains
+        if types is not None:
+            self.filters[category]["types"] = types
+        self.enabled_categories.add(category)
+    def delete_category(self, category: str) -> bool:
+        """Delete a filter category.
+        Removes the category and all its patterns. Also removes it from the
+        enabled categories set.
+        Args:
+            category: Category name to delete.
+        Returns:
+            True if category was deleted, False if it didn't exist.
+        """
+        if category in self.filters:
+            del self.filters[category]
+            self.enabled_categories.discard(category)
+            return True
+        return False
+    def set_enabled_categories(self, categories: List[str] | None = None):
+        """Set which categories are enabled for filtering.
+        Only enabled categories are used when generating SQL filter clauses.
+        Invalid category names are silently ignored.
+        Args:
+            categories: List of category names to enable. None enables all categories.
+        """
+        if categories is None:
+            self.enabled_categories = set(self.filters.keys())
+        else:
+            self.enabled_categories = set(categories) & set(self.filters.keys())
+    def get_filter_sql(self, use_all: bool = True, categories: List[str] | None = None) -> str:
+        """Generate SQL WHERE clause for filtering CDP events.
+        Creates SQL conditions to exclude network requests matching the filter
+        patterns. Handles wildcard patterns by converting them to SQL LIKE patterns
+        and properly escapes SQL strings.
+        Args:
+            use_all: Use all enabled categories. Defaults to True.
+            categories: Specific categories to use (overrides use_all).
+        Returns:
+            SQL WHERE clause string, or empty string if no filters apply.
+        """
+        if not self.filters:
+            return ""
+        # Determine which categories to use
+        if categories:
+            active_categories = set(categories) & set(self.filters.keys())
+        elif use_all:
+            active_categories = self.enabled_categories
+        else:
+            return ""
+        if not active_categories:
+            return ""
+        # Collect all patterns
+        all_domains = []
+        all_types = []
+        for category in active_categories:
+            all_domains.extend(self.filters[category].get("domains", []))
+            all_types.extend(self.filters[category].get("types", []))
+        # Build filter conditions - exclude matching items
+        exclude_conditions = []
+        # Domain filtering - exclude URLs matching these patterns
+        if all_domains:
+            for pattern in all_domains:
+                # Convert wildcard to SQL LIKE pattern, escape single quotes for SQL safety
+                sql_pattern = pattern.replace("'", "''").replace("*", "%")
+                # For Network.responseReceived events - filter on what's actually there
+                exclude_conditions.append(
+                    f"json_extract_string(event, '$.params.response.url') NOT LIKE '{sql_pattern}'"
+                )
+        # Type filtering - exclude these types
+        if all_types:
+            # Escape single quotes in types for SQL safety
+            escaped_types = [t.replace("'", "''") for t in all_types]
+            type_list = ", ".join(f"'{t}'" for t in escaped_types)
+            # Use COALESCE to handle NULL types properly, exclude matching types
+            exclude_conditions.append(
+                f"(COALESCE(json_extract_string(event, '$.params.type'), '') NOT IN ({type_list}) OR "
+                f"json_extract_string(event, '$.params.type') IS NULL)"
+            )
+        if exclude_conditions:
+            # Use AND to ensure ALL conditions are met (item doesn't match ANY filter)
+            return f"({' AND '.join(exclude_conditions)})"
+        return ""
+    def get_status(self) -> Dict[str, Any]:
+        """Get current filter status and statistics.
+        Provides comprehensive information about loaded filters including
+        category counts, enabled status, and file path.
+        Returns:
+            Dict with filter information including loaded status, categories,
+            enabled categories, pattern counts, and file path.
+        """
+        return {
+            "loaded": bool(self.filters),
+            "categories": list(self.filters.keys()),
+            "enabled": list(self.enabled_categories),
+            "total_domains": sum(len(f.get("domains", [])) for f in self.filters.values()),
+            "total_types": sum(len(f.get("types", [])) for f in self.filters.values()),
+            "path": str(self.filter_path),
+        }
+    def get_display_info(self) -> str:
+        """Get formatted filter information for display.
+        Creates a human-readable summary of all filter categories with their
+        enabled status and pattern counts.
+        Returns:
+            Formatted multiline string with filter details.
+        """
+        if not self.filters:
+            return f"No filters loaded (would load from {self.filter_path})"
+        lines = [f"Loaded filters from {self.filter_path}:"]
+        for category in sorted(self.filters.keys()):
+            filters = self.filters[category]
+            enabled = "✓" if category in self.enabled_categories else "✗"
+            domains = len(filters.get("domains", []))
+            types = len(filters.get("types", []))
+            lines.append(f"  {enabled} {category}: {domains} domains, {types} types")
+        return "\n".join(lines)
+__all__ = ["FilterManager"]

webtap/services/README.md ADDED Viewed

@@ -0,0 +1,83 @@
+# WebTap Services Layer
+The services layer provides clean, reusable interfaces for querying and managing CDP events stored in DuckDB.
+## Architecture
+```
+commands/ → services/ → cdp/session → DuckDB
+    ↓          ↓
+   API    Properties/Methods
+```
+## Services
+### WebTapService (`main.py`)
+Main orchestrator that manages all domain-specific services and CDP connection.
+**Key Properties:**
+- `event_count` - Total CDP events stored
+**Key Methods:**
+- `connect_to_page()` - Connect and enable CDP domains
+- `disconnect()` - Clean disconnection
+- `get_status()` - Comprehensive status with metrics from all services
+### FetchService (`fetch.py`)
+Manages HTTP request/response interception.
+**Key Properties:**
+- `paused_count` - Number of paused requests
+**Key Methods:**
+- `get_paused_rowids()` - List of paused request rowids
+- `enable()` / `disable()` - Control interception
+- `continue_request()` / `fail_request()` - Process paused requests
+### NetworkService (`network.py`)
+Queries network events (requests/responses).
+**Key Properties:**
+- `request_count` - Total network requests
+**Key Methods:**
+- `get_recent_requests()` - Network events with filter support
+- `get_failed_requests()` - 4xx/5xx errors
+- `get_request_by_id()` - All events for a request
+### ConsoleService (`console.py`)
+Queries console messages and browser logs.
+**Key Properties:**
+- `message_count` - Total console messages
+- `error_count` - Console errors only
+**Key Methods:**
+- `get_recent_messages()` - Console events with level filter
+- `get_errors()` / `get_warnings()` - Filtered queries
+- `clear_browser_console()` - CDP command to clear console
+## Design Principles
+1. **Rowid-Native**: All queries return rowid as primary identifier
+2. **Direct Queries**: No caching, query DuckDB on-demand
+3. **Properties for Counts**: Common counts exposed as properties
+4. **Methods for Queries**: Complex queries as methods with parameters
+5. **Service Isolation**: Each service manages its domain independently
+## Usage
+Services are accessed through the WebTapState:
+```python
+# In commands
+@app.command()
+def network(state):
+    results = state.service.network.get_recent_requests(limit=20)
+    count = state.service.network.request_count
+# In API
+@api.get("/status")
+async def status():
+    return app_state.service.get_status()
+```

webtap/services/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+"""WebTap service layer for managing CDP state and operations.
+The service layer provides a clean interface between REPL commands/API endpoints
+and the underlying CDP session. Services encapsulate domain-specific queries and
+operations, making them reusable across different interfaces.
+PUBLIC API:
+  - WebTapService: Main service orchestrating all domain services
+  - SetupService: Service for installing WebTap components
+"""
+from webtap.services.main import WebTapService
+from webtap.services.setup import SetupService
+__all__ = ["WebTapService", "SetupService"]

webtap/services/body.py ADDED Viewed

@@ -0,0 +1,113 @@
+"""Body fetching service for response content."""
+import base64
+import json
+import logging
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from webtap.cdp import CDPSession
+logger = logging.getLogger(__name__)
+class BodyService:
+    """Internal service for response body fetching and caching."""
+    def __init__(self):
+        """Initialize body service."""
+        self.cdp: CDPSession | None = None
+        self._body_cache: dict[str, dict] = {}
+    def get_response_body(self, rowid: int, use_cache: bool = True) -> dict:
+        """Fetch response body for a response.
+        Args:
+            rowid: Row ID from events table (Network or Fetch response)
+            use_cache: Whether to use cached body if available
+        """
+        if not self.cdp:
+            return {"error": "No CDP session"}
+        # Get event from DB to extract requestId
+        result = self.cdp.query("SELECT event FROM events WHERE rowid = ?", [rowid])
+        if not result:
+            return {"error": f"Event with rowid {rowid} not found"}
+        try:
+            event_data = json.loads(result[0][0])
+        except json.JSONDecodeError:
+            return {"error": "Failed to parse event data"}
+        method = event_data.get("method", "")
+        params = event_data.get("params", {})
+        # Handle both Fetch and Network events
+        if method == "Fetch.requestPaused":
+            # Fetch interception - verify it's response stage
+            if "responseStatusCode" not in params:
+                return {"error": "Not a response stage event (no responseStatusCode)"}
+            request_id = params.get("requestId")
+            domain = "Fetch"
+        elif method == "Network.responseReceived":
+            # Regular network response
+            request_id = params.get("requestId")
+            domain = "Network"
+        else:
+            return {"error": f"Not a response event (method: {method})"}
+        if not request_id:
+            return {"error": "No requestId in event"}
+        # Check cache
+        if use_cache and request_id in self._body_cache:
+            logger.debug(f"Using cached body for {request_id}")
+            return self._body_cache[request_id]
+        try:
+            # Fetch body from CDP using appropriate domain
+            logger.debug(f"Fetching body for {request_id} using {domain}.getResponseBody")
+            result = self.cdp.execute(f"{domain}.getResponseBody", {"requestId": request_id})
+            body_data = {"body": result.get("body", ""), "base64Encoded": result.get("base64Encoded", False)}
+            # Cache it for this request
+            if use_cache:
+                self._body_cache[request_id] = body_data
+                logger.debug(f"Cached body for {request_id}")
+            return body_data
+        except Exception as e:
+            logger.error(f"Failed to fetch body for {request_id}: {e}")
+            return {"error": str(e)}
+    def clear_cache(self):
+        """Clear all cached bodies."""
+        count = len(self._body_cache)
+        self._body_cache.clear()
+        logger.info(f"Cleared {count} cached bodies")
+        return count
+    def decode_body(self, body_content: str, is_base64: bool) -> str | bytes:
+        """Decode body content if base64 encoded.
+        Args:
+            body_content: The body content (possibly base64)
+            is_base64: Whether the content is base64 encoded
+        """
+        if not is_base64:
+            return body_content
+        try:
+            decoded = base64.b64decode(body_content)
+            # Try to decode as UTF-8 text
+            try:
+                return decoded.decode("utf-8")
+            except UnicodeDecodeError:
+                # Return as bytes for binary content
+                return decoded
+        except Exception as e:
+            logger.error(f"Failed to decode base64 body: {e}")
+            return body_content  # Return original if decode fails