PyPI - webtap-tool - Versions diffs - 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

webtap-tool 0.4.0py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of webtap-tool might be problematic. Click here for more details.

Files changed (24) hide show

webtap/api.py +318 -9
webtap/app.py +12 -5
webtap/cdp/session.py +101 -1
webtap/commands/DEVELOPER_GUIDE.md +108 -22
webtap/commands/TIPS.md +24 -1
webtap/commands/_builders.py +139 -1
webtap/commands/body.py +1 -2
webtap/commands/connection.py +1 -2
webtap/commands/console.py +1 -2
webtap/commands/events.py +1 -2
webtap/commands/fetch.py +1 -2
webtap/commands/inspect.py +1 -2
webtap/commands/javascript.py +61 -28
webtap/commands/navigation.py +1 -2
webtap/commands/network.py +17 -35
webtap/commands/selections.py +129 -0
webtap/commands/server.py +1 -0
webtap/services/dom.py +512 -0
webtap/services/main.py +14 -0
{webtap_tool-0.4.0.dist-info → webtap_tool-0.5.1.dist-info}/METADATA +1 -1
{webtap_tool-0.4.0.dist-info → webtap_tool-0.5.1.dist-info}/RECORD +23 -22
webtap/commands/_errors.py +0 -108
{webtap_tool-0.4.0.dist-info → webtap_tool-0.5.1.dist-info}/WHEEL +0 -0
{webtap_tool-0.4.0.dist-info → webtap_tool-0.5.1.dist-info}/entry_points.txt +0 -0

webtap/commands/selections.py ADDED Viewed

@@ -0,0 +1,129 @@
+"""Browser element selection and prompt analysis commands.
+PUBLIC API:
+  - browser: Analyze browser element selections with prompt
+"""
+from webtap.app import app
+from webtap.commands._utils import evaluate_expression, format_expression_result
+from webtap.commands._builders import error_response
+from webtap.commands._tips import get_tips
+@app.command(
+    display="markdown",
+    fastmcp=[{"type": "resource", "mime_type": "application/json"}, {"type": "tool"}],
+)
+def selections(state, expr: str = None) -> dict:  # pyright: ignore[reportArgumentType]
+    """Browser element selections with prompt and analysis.
+    As Resource (no parameters):
+        browser             # Returns current prompt and all selections
+    As Tool (with parameters):
+        browser(expr="data['prompt']")                          # Get prompt text
+        browser(expr="data['selections']['1']['styles']")       # Get styles for #1
+        browser(expr="len(data['selections'])")                 # Count selections
+        browser(expr="{k: v['selector'] for k, v in data['selections'].items()}")  # All selectors
+    Args:
+        expr: Python expression with 'data' variable containing prompt and selections
+    Returns:
+        Formatted browser data or expression result
+    """
+    # Check if browser data exists
+    if not hasattr(state, "browser_data") or not state.browser_data:
+        return error_response(
+            "No browser selections available",
+            suggestions=[
+                "Use the Chrome extension to select elements",
+                "Click 'Start Selection Mode' in the extension popup",
+                "Select elements on the page and submit a prompt",
+            ],
+        )
+    data = state.browser_data
+    # No expression - RESOURCE MODE: Return formatted view
+    if not expr:
+        return _format_browser_data(data)
+    # TOOL MODE: Evaluate expression
+    try:
+        namespace = {"data": data}
+        result, output = evaluate_expression(expr, namespace)
+        formatted_result = format_expression_result(result, output)
+        # Build markdown response
+        return {
+            "elements": [
+                {"type": "heading", "content": "Expression Result", "level": 2},
+                {"type": "code_block", "content": expr, "language": "python"},
+                {"type": "text", "content": "**Result:**"},
+                {"type": "code_block", "content": formatted_result, "language": ""},
+            ]
+        }
+    except Exception as e:
+        # Provide helpful suggestions
+        suggestions = [
+            "The data is available as 'data' variable",
+            "Access prompt: data['prompt']",
+            "Access selections: data['selections']",
+            "Access specific element: data['selections']['1']",
+            "Available fields: outerHTML, selector, jsPath, styles, xpath, fullXpath, preview",
+        ]
+        if "KeyError" in str(type(e).__name__):
+            suggestions.extend(
+                [
+                    "Check available selection IDs: list(data['selections'].keys())",
+                    "Check available fields: data['selections']['1'].keys()",
+                ]
+            )
+        return error_response(f"{type(e).__name__}: {e}", suggestions=suggestions)
+def _format_browser_data(data: dict) -> dict:
+    """Format browser data as markdown for resource view."""
+    elements = []
+    # Show prompt
+    elements.append({"type": "heading", "content": "Browser Prompt", "level": 2})
+    elements.append({"type": "text", "content": data.get("prompt", "")})
+    # Show selection count
+    selection_count = len(data.get("selections", {}))
+    elements.append({"type": "text", "content": f"\n**Selected Elements:** {selection_count}"})
+    # Show each selection with preview
+    if selection_count > 0:
+        elements.append({"type": "heading", "content": "Element Selections", "level": 3})
+        for sel_id in sorted(data["selections"].keys(), key=lambda x: int(x)):
+            sel = data["selections"][sel_id]
+            preview = sel.get("preview", {})
+            # Build preview line
+            preview_parts = [f"**#{sel_id}:**", preview.get("tag", "unknown")]
+            if preview.get("id"):
+                preview_parts.append(f"#{preview['id']}")
+            if preview.get("classes"):
+                preview_parts.append(f".{preview['classes'][0]}")
+            elements.append({"type": "text", "content": " ".join(preview_parts)})
+            # Show selector
+            elements.append({"type": "code_block", "content": sel.get("selector", ""), "language": "css"})
+        # Show usage tips from TIPS.md
+        tips = get_tips("selections")
+        if tips:
+            elements.append({"type": "heading", "content": "Next Steps", "level": 3})
+            elements.append({"type": "list", "items": tips})
+    return {"elements": elements}
+__all__ = ["selections"]

webtap/commands/server.py CHANGED Viewed

@@ -20,6 +20,7 @@ API_PORT = 8765
 def _check_port() -> bool:
     """Check if API port is in use."""
     with socket.socket() as s:
+        s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
         try:
             s.bind(("127.0.0.1", API_PORT))
             return False  # Port is free

webtap/services/dom.py ADDED Viewed

@@ -0,0 +1,512 @@
+"""DOM inspection service using Chrome DevTools Protocol.
+PUBLIC API:
+  - DOMService: Manages element inspection and selection via CDP Overlay domain
+"""
+import logging
+import re
+import threading
+from concurrent.futures import ThreadPoolExecutor
+from typing import Any, TYPE_CHECKING
+if TYPE_CHECKING:
+    from webtap.cdp.session import CDPSession
+    from webtap.app import WebTapState
+logger = logging.getLogger(__name__)
+class DOMService:
+    """Manages element inspection and selection via CDP Overlay domain.
+    Uses CDP's native inspect mode (Overlay.setInspectMode) which provides:
+    - Native Chrome highlight on hover (no custom overlay needed)
+    - Click events via Overlay.inspectNodeRequested
+    - Accurate element data via DOM.describeNode, CSS.getComputedStyleForNode
+    Selections are stored in state.browser_data (not DuckDB) as they are
+    ephemeral session data cleared after prompt submission.
+    Attributes:
+        cdp: CDP session for executing commands
+        state: WebTap state for storing selections
+        _inspection_active: Whether inspect mode is currently active
+        _next_id: Counter for assigning selection IDs
+    """
+    def __init__(self, cdp: "CDPSession | None" = None, state: "WebTapState | None" = None):
+        """Initialize DOM service.
+        Args:
+            cdp: CDPSession instance. Can be None initially, set via set_cdp().
+            state: WebTapState instance. Can be None initially, set via set_state().
+        """
+        self.cdp = cdp
+        self.state = state
+        self._inspection_active = False
+        self._next_id = 1
+        self._broadcast_queue: "Any | None" = None  # asyncio.Queue for thread-safe broadcasts
+        self._state_lock = threading.Lock()  # Protect state mutations
+        self._pending_selections = 0  # Track in-flight selection processing
+        self._executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="dom-worker")
+    def set_cdp(self, cdp: "CDPSession") -> None:
+        """Set CDP session after initialization."""
+        self.cdp = cdp
+    def set_state(self, state: "WebTapState") -> None:
+        """Set state after initialization."""
+        self.state = state
+    def set_broadcast_queue(self, queue: "Any") -> None:
+        """Set queue for broadcasting state changes.
+        Args:
+            queue: asyncio.Queue for thread-safe signaling
+        """
+        self._broadcast_queue = queue
+    def start_inspect(self) -> dict[str, Any]:
+        """Enable CDP element inspection mode.
+        Enables Overlay.setInspectMode with searchForNode mode, which:
+        - Shows native Chrome highlight on hover
+        - Fires Overlay.inspectNodeRequested on click
+        Returns:
+            Success status dictionary.
+        """
+        if not self.cdp or not self.cdp.ws_app:
+            return {"error": "Not connected to page"}
+        if self._inspection_active:
+            return {"error": "Inspection already active"}
+        try:
+            # Enable DOM domain first (Overlay depends on it)
+            self.cdp.execute("DOM.enable")
+            # Request document to establish DOM tree context
+            # REQUIRED: BackendNodeIds only work after getDocument() is called
+            self.cdp.execute("DOM.getDocument", {"depth": -1})
+            # Enable CSS domain (needed for computed styles)
+            self.cdp.execute("CSS.enable")
+            # Enable Overlay domain
+            self.cdp.execute("Overlay.enable")
+            # Set inspect mode with native Chrome highlighting
+            self.cdp.execute(
+                "Overlay.setInspectMode",
+                {
+                    "mode": "searchForNode",
+                    "highlightConfig": {
+                        "showInfo": True,
+                        "showStyles": True,
+                        "contentColor": {"r": 111, "g": 168, "b": 220, "a": 0.66},
+                        "paddingColor": {"r": 147, "g": 196, "b": 125, "a": 0.55},
+                        "borderColor": {"r": 255, "g": 229, "b": 153, "a": 0.66},
+                        "marginColor": {"r": 246, "g": 178, "b": 107, "a": 0.66},
+                    },
+                },
+            )
+            self._inspection_active = True
+            logger.info("Element inspection mode enabled")
+            return {"success": True, "inspect_active": True}
+        except Exception as e:
+            logger.error(f"Failed to enable inspection mode: {e}")
+            return {"error": str(e)}
+    def stop_inspect(self) -> dict[str, Any]:
+        """Disable CDP element inspection mode.
+        Returns:
+            Success status dictionary.
+        """
+        if not self.cdp or not self.cdp.ws_app:
+            return {"error": "Not connected to page"}
+        if not self._inspection_active:
+            return {"success": True, "inspect_active": False}
+        try:
+            # Disable inspect mode
+            # NOTE: highlightConfig required even for mode=none, otherwise CDP throws:
+            # "Internal error: highlight configuration parameter is missing"
+            self.cdp.execute("Overlay.setInspectMode", {"mode": "none", "highlightConfig": {}})
+            self._inspection_active = False
+            logger.info("Element inspection mode disabled")
+            return {"success": True, "inspect_active": False}
+        except Exception as e:
+            logger.error(f"Failed to disable inspection mode: {e}")
+            return {"error": str(e)}
+    def handle_inspect_node_requested(self, event: dict) -> None:
+        """Handle Overlay.inspectNodeRequested event (user clicked element).
+        CRITICAL: Called from WebSocket thread - MUST NOT make blocking CDP calls!
+        Offload to background thread to avoid deadlock.
+        Args:
+            event: CDP event with method and params
+        """
+        if not self.cdp or not self.state:
+            logger.error("DOMService not properly initialized (missing cdp or state)")
+            return
+        params = event.get("params", {})
+        backend_node_id = params.get("backendNodeId")
+        if not backend_node_id:
+            logger.warning("inspectNodeRequested event missing backendNodeId")
+            return
+        # Increment pending counter (thread-safe)
+        with self._state_lock:
+            self._pending_selections += 1
+        self._trigger_broadcast()
+        # Submit to background thread - returns immediately, no blocking
+        self._executor.submit(self._process_node_selection, backend_node_id)
+    def handle_frame_navigated(self, event: dict) -> None:
+        """Handle Page.frameNavigated event (page navigation).
+        Clears selections when main frame navigates to keep state in sync with page.
+        Called from WebSocket thread - must be non-blocking.
+        Args:
+            event: CDP event with method and params
+        """
+        params = event.get("params", {})
+        frame = params.get("frame", {})
+        # Only clear on main frame navigation (not iframes)
+        if frame.get("parentId"):
+            return
+        logger.info("Main frame navigated - clearing selections")
+        self.clear_selections()
+        self._trigger_broadcast()
+    def _process_node_selection(self, backend_node_id: int) -> None:
+        """Process node selection in background thread.
+        Safe to make blocking CDP calls here - we're not in WebSocket thread.
+        Args:
+            backend_node_id: CDP backend node ID from inspectNodeRequested event
+        """
+        try:
+            # Make blocking CDP calls (OK in background thread)
+            data = self._extract_node_data(backend_node_id)
+            # Thread-safe state update
+            with self._state_lock:
+                if not self.state:
+                    logger.error("DOMService state not initialized")
+                    return
+                selection_id = str(self._next_id)
+                self._next_id += 1
+                if not self.state.browser_data:
+                    self.state.browser_data = {"selections": {}, "prompt": ""}
+                if "selections" not in self.state.browser_data:
+                    self.state.browser_data["selections"] = {}
+                self.state.browser_data["selections"][selection_id] = data
+            logger.info(f"Element selected: {selection_id} - {data.get('preview', {}).get('tag', 'unknown')}")
+        except Exception as e:
+            logger.error(f"Failed to process node selection: {e}")
+            # Set error state for UI display
+            if self.state:
+                import time
+                error_msg = str(e)
+                # Provide user-friendly message for common errors
+                if "timed out" in error_msg.lower() or isinstance(e, TimeoutError):
+                    error_msg = "Element selection timed out - page may be unresponsive"
+                self.state.error_state = {"message": error_msg, "timestamp": time.time()}
+        finally:
+            # Decrement pending counter (thread-safe)
+            with self._state_lock:
+                self._pending_selections -= 1
+            self._trigger_broadcast()
+    def _trigger_broadcast(self) -> None:
+        """Trigger SSE broadcast via queue (thread-safe helper)."""
+        if self._broadcast_queue:
+            try:
+                self._broadcast_queue.put_nowait({"type": "dom_update"})
+            except Exception as e:
+                logger.debug(f"Failed to queue broadcast: {e}")
+    def _extract_node_data(self, backend_node_id: int) -> dict[str, Any]:
+        """Extract complete element data via CDP.
+        Args:
+            backend_node_id: CDP backend node ID from inspectNodeRequested event
+        Returns:
+            Dictionary with element data compatible with browser_data schema
+        Raises:
+            RuntimeError: If CDP is not connected or commands fail
+            TimeoutError: If CDP commands timeout (page busy, heavy load)
+        """
+        if not self.cdp:
+            raise RuntimeError("CDP session not initialized")
+        # Use 15s timeout for interactive operations (balanced between responsiveness and heavy pages)
+        # Still shorter than default 30s to provide faster failure feedback
+        timeout = 15.0
+        try:
+            # Describe node directly with backendNodeId (no need for resolveNode first!)
+            describe_result = self.cdp.execute("DOM.describeNode", {"backendNodeId": backend_node_id}, timeout=timeout)
+            if "node" not in describe_result:
+                raise RuntimeError(f"Failed to describe node {backend_node_id}")
+            node = describe_result["node"]
+            node_id = node["nodeId"]
+            # Get outer HTML
+            html_result = self.cdp.execute("DOM.getOuterHTML", {"nodeId": node_id}, timeout=timeout)
+            outer_html = html_result.get("outerHTML", "")
+            # Get computed styles
+            styles_result = self.cdp.execute("CSS.getComputedStyleForNode", {"nodeId": node_id}, timeout=timeout)
+            # Convert styles to dict
+            styles = {}
+            for prop in styles_result.get("computedStyle", []):
+                styles[prop["name"]] = prop["value"]
+            # Get box model for badge positioning
+            try:
+                box_result = self.cdp.execute("DOM.getBoxModel", {"nodeId": node_id}, timeout=timeout)
+                # Use top-left corner of content box
+                content_box = box_result["model"]["content"]
+                badge_x = int(content_box[0])  # Top-left x
+                badge_y = int(content_box[1])  # Top-left y
+            except Exception:
+                # Fallback if element has no box model (display: none, etc.)
+                badge_x = 0
+                badge_y = 0
+        except TimeoutError as e:
+            logger.warning(f"Timeout extracting node {backend_node_id}: {e}")
+            raise RuntimeError("Element selection timed out - page may be busy or unresponsive") from e
+        # Generate CSS selector
+        css_selector = self._generate_css_selector(node)
+        # Generate XPath
+        xpath = self._generate_xpath(node)
+        # Generate jsPath (for js() command integration)
+        js_path = f"document.querySelector('{css_selector}')"
+        # Build preview
+        tag = node.get("nodeName", "").lower()
+        node_attrs = node.get("attributes", [])
+        attrs_dict = {}
+        for i in range(0, len(node_attrs), 2):
+            if i + 1 < len(node_attrs):
+                attrs_dict[node_attrs[i]] = node_attrs[i + 1]
+        preview = {
+            "tag": tag,
+            "id": attrs_dict.get("id", ""),
+            "classes": attrs_dict.get("class", "").split() if attrs_dict.get("class") else [],
+            "text": self._get_node_text(outer_html)[:100],  # First 100 chars
+        }
+        # Build complete data structure (compatible with existing schema)
+        return {
+            "outerHTML": outer_html,
+            "selector": css_selector,
+            "jsPath": js_path,
+            "styles": styles,
+            "xpath": xpath,
+            "fullXpath": xpath,  # CDP doesn't distinguish, use same
+            "preview": preview,
+            "badge": {"x": badge_x, "y": badge_y},
+            "nodeId": node_id,
+            "backendNodeId": backend_node_id,
+        }
+    def _generate_css_selector(self, node: dict) -> str:
+        """Generate unique CSS selector for node.
+        Uses a combination of strategies to ensure uniqueness:
+        1. ID if available (most unique)
+        2. Tag + classes + nth-child for specificity
+        3. Falls back to full path if needed
+        Args:
+            node: CDP node description
+        Returns:
+            CSS selector string
+        """
+        # Parse attributes
+        attrs_dict = self._parse_node_attributes(node)
+        # Strategy 1: ID selector (unique by definition)
+        if "id" in attrs_dict and attrs_dict["id"]:
+            return f"#{attrs_dict['id']}"
+        # Strategy 2: Build selector with tag + classes + nth-child
+        tag = node.get("nodeName", "").lower()
+        selector = tag
+        # Add first 2 classes for specificity without being too brittle
+        if "class" in attrs_dict and attrs_dict["class"]:
+            classes = attrs_dict["class"].split()[:2]
+            if classes:
+                selector += "." + ".".join(classes)
+        # Add nth-child for uniqueness within parent
+        # This is key to distinguishing elements with same tag/class
+        parent_id = node.get("parentId")
+        if parent_id and self.cdp:
+            try:
+                # Get parent node to count children
+                parent_result = self.cdp.execute("DOM.describeNode", {"nodeId": parent_id}, timeout=5.0)
+                if "node" in parent_result:
+                    parent_node = parent_result["node"]
+                    child_node_ids = parent_node.get("childNodeIds", [])
+                    # Find our position among siblings
+                    node_id = node.get("nodeId")
+                    if node_id in child_node_ids:
+                        nth = child_node_ids.index(node_id) + 1
+                        selector += f":nth-child({nth})"
+            except Exception as e:
+                logger.debug(f"Could not add nth-child to selector: {e}")
+        return selector
+    def _parse_node_attributes(self, node: dict) -> dict:
+        """Parse CDP node attributes array into dictionary.
+        Args:
+            node: CDP node with attributes array [name1, value1, name2, value2, ...]
+        Returns:
+            Dictionary of {name: value}
+        """
+        attrs = node.get("attributes", [])
+        attrs_dict = {}
+        for i in range(0, len(attrs), 2):
+            if i + 1 < len(attrs):
+                attrs_dict[attrs[i]] = attrs[i + 1]
+        return attrs_dict
+    def _generate_xpath(self, node: dict) -> str:
+        """Generate XPath for node.
+        Args:
+            node: CDP node description
+        Returns:
+            XPath string
+        """
+        tag = node.get("nodeName", "").lower()
+        attrs_dict = self._parse_node_attributes(node)
+        # Prefer ID (unique)
+        if "id" in attrs_dict and attrs_dict["id"]:
+            return f"//{tag}[@id='{attrs_dict['id']}']"
+        # Use class attribute if available
+        if "class" in attrs_dict and attrs_dict["class"]:
+            # XPath class matching (contains all classes)
+            classes = attrs_dict["class"].split()
+            if classes:
+                return f"//{tag}[@class='{attrs_dict['class']}']"
+        # Fallback to tag only
+        return f"//{tag}"
+    def _get_node_text(self, html: str) -> str:
+        """Extract text content from HTML (simple implementation).
+        Args:
+            html: Outer HTML string
+        Returns:
+            Extracted text content
+        """
+        # Simple regex to strip tags
+        text = re.sub(r"<[^>]+>", "", html)
+        return text.strip()
+    def get_state(self) -> dict[str, Any]:
+        """Get current DOM service state (thread-safe).
+        Returns:
+            State dictionary with inspect_active, selections, and pending count
+        """
+        # Thread-safe read: protect against concurrent writes from WebSocket thread
+        with self._state_lock:
+            selections = {}
+            prompt = ""
+            if self.state is not None and self.state.browser_data:
+                # Deep copy to prevent mutations during SSE broadcast
+                selections = dict(self.state.browser_data.get("selections", {}))
+                prompt = self.state.browser_data.get("prompt", "")
+        return {
+            "inspect_active": self._inspection_active,
+            "selections": selections,
+            "prompt": prompt,
+            "pending_count": self._pending_selections,  # For progress indicator
+        }
+    def clear_selections(self) -> None:
+        """Clear all selections (thread-safe)."""
+        with self._state_lock:
+            if self.state is not None and self.state.browser_data:
+                self.state.browser_data["selections"] = {}
+            self._next_id = 1
+        logger.info("Selections cleared")
+    def cleanup(self) -> None:
+        """Cleanup resources (executor, callbacks).
+        Call this before disconnect or app exit.
+        """
+        # Shutdown executor - wait=False to avoid blocking on stuck tasks
+        # cancel_futures=True prevents hanging on incomplete selections (Python 3.9+)
+        if hasattr(self, "_executor"):
+            try:
+                self._executor.shutdown(wait=False, cancel_futures=True)
+                logger.info("ThreadPoolExecutor shut down")
+            except Exception as e:
+                logger.debug(f"Executor shutdown error (non-fatal): {e}")
+        # Clear inspection state
+        if self._inspection_active:
+            try:
+                self.stop_inspect()
+            except Exception as e:
+                logger.debug(f"Failed to stop inspect on cleanup: {e}")
+__all__ = ["DOMService"]

webtap-tool 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

Potentially problematic release.

webtap-tool 0.4.0py3-none-any.whl → 0.5.1py3-none-any.whl