PyPI - thordata-sdk - Versions diffs - 1.7.0__tar.gz → 1.8.0__tar.gz - Mend

thordata-sdk 1.7.0tar.gz → 1.8.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

{thordata_sdk-1.7.0/src/thordata_sdk.egg-info → thordata_sdk-1.8.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: thordata-sdk
-Version: 1.7.0
+Version: 1.8.0
 Summary: The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network.
 Author-email: Thordata Developer Team <support@thordata.com>
 License: MIT
@@ -40,6 +40,8 @@ Requires-Dist: ruff>=0.1.0; extra == "dev"
 Requires-Dist: mypy>=1.0.0; extra == "dev"
 Requires-Dist: types-requests>=2.28.0; extra == "dev"
 Requires-Dist: aioresponses>=0.7.6; extra == "dev"
+Provides-Extra: browser
+Requires-Dist: playwright>=1.40.0; extra == "browser"
 Dynamic: license-file
 # Thordata Python SDK
@@ -63,7 +65,7 @@ Dynamic: license-file
 ## 📖 Introduction
-The **Thordata Python SDK v1.6.0** is a production-ready wrapper for Thordata's AI data infrastructure. It is architected for high reliability, strict type safety, and maximum performance.
+The **Thordata Python SDK v1.8.0** is a production-ready wrapper for Thordata's AI data infrastructure. It is architected for high reliability, strict type safety, and maximum performance.
 **Why v1.6.0?**
 *   **🛡️ Bulletproof Networking**: Custom core handles `HTTP`, `HTTPS`, and `SOCKS5h` (Remote DNS) tunneling, solving common SSL/TLS handshake issues in complex network environments.

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/README.md RENAMED Viewed

@@ -19,7 +19,7 @@
 ## 📖 Introduction
-The **Thordata Python SDK v1.6.0** is a production-ready wrapper for Thordata's AI data infrastructure. It is architected for high reliability, strict type safety, and maximum performance.
+The **Thordata Python SDK v1.8.0** is a production-ready wrapper for Thordata's AI data infrastructure. It is architected for high reliability, strict type safety, and maximum performance.
 **Why v1.6.0?**
 *   **🛡️ Bulletproof Networking**: Custom core handles `HTTP`, `HTTPS`, and `SOCKS5h` (Remote DNS) tunneling, solving common SSL/TLS handshake issues in complex network environments.

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/pyproject.toml RENAMED Viewed

@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "thordata-sdk"
-version = "1.7.0"
+version = "1.8.0"
 description = "The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network."
 readme = "README.md"
 requires-python = ">=3.9"
@@ -61,6 +61,9 @@ dev = [
     "types-requests>=2.28.0",
     "aioresponses>=0.7.6",
 ]
+browser = [
+    "playwright>=1.40.0",
+]
 [project.urls]
 "Homepage" = "https://www.thordata.com"

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata/__init__.py RENAMED Viewed

@@ -5,7 +5,7 @@ Official Python client for Thordata's Proxy Network, SERP API,
 Universal Scraping API (Web Unlocker), and Web Scraper API.
 """
-__version__ = "1.6.0"
+__version__ = "1.8.0"
 __author__ = "Thordata Developer Team/Kael Odin"
 __email__ = "support@thordata.com"

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata/async_client.py RENAMED Viewed

@@ -244,6 +244,7 @@ class AsyncThordataClient:
         render_js: bool | None = None,
         no_cache: bool | None = None,
         output_format: str = "json",
+        ai_overview: bool = False,
         **kwargs: Any,
     ) -> dict[str, Any]:
         engine_str = engine.value if isinstance(engine, Engine) else engine.lower()
@@ -258,13 +259,14 @@ class AsyncThordataClient:
             render_js=render_js,
             no_cache=no_cache,
             output_format=output_format,
+            ai_overview=ai_overview,
             extra_params=kwargs,
         )
         return await self.serp_search_advanced(request)
     async def serp_search_advanced(self, request: SerpRequest) -> dict[str, Any]:
         if not self.scraper_token:
-            raise ThordataConfigError("scraper_token required")
+            raise ThordataConfigError("scraper_token is required for SERP API")
         payload = request.to_payload()
         headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
         logger.info(f"Async SERP: {request.engine} - {request.query}")
@@ -324,7 +326,7 @@ class AsyncThordataClient:
         self, request: UniversalScrapeRequest
     ) -> str | bytes | dict[str, str | bytes]:
         if not self.scraper_token:
-            raise ThordataConfigError("scraper_token required")
+            raise ThordataConfigError("scraper_token is required for Universal API")
         payload = request.to_payload()
         headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
@@ -448,7 +450,7 @@ class AsyncThordataClient:
     async def create_scraper_task_advanced(self, config: ScraperTaskConfig) -> str:
         self._require_public_credentials()
         if not self.scraper_token:
-            raise ThordataConfigError("scraper_token required")
+            raise ThordataConfigError("scraper_token is required for Task Builder")
         payload = config.to_payload()
         headers = build_builder_headers(
             self.scraper_token, str(self.public_token), str(self.public_key)
@@ -486,7 +488,9 @@ class AsyncThordataClient:
     async def create_video_task_advanced(self, config: VideoTaskConfig) -> str:
         self._require_public_credentials()
         if not self.scraper_token:
-            raise ThordataConfigError("scraper_token required")
+            raise ThordataConfigError(
+                "scraper_token is required for Video Task Builder"
+            )
         payload = config.to_payload()
         headers = build_builder_headers(
             self.scraper_token, str(self.public_token), str(self.public_key)
@@ -1104,3 +1108,28 @@ class AsyncThordataClient:
         safe_user = quote(final_user, safe="")
         safe_pass = quote(pwd, safe="")
         return f"wss://{safe_user}:{safe_pass}@ws-browser.thordata.com"
+    @property
+    def browser(self):
+        """Get a browser session for automation.
+        Requires playwright: pip install thordata[browser]
+        Returns:
+            BrowserSession instance
+        Example:
+            async with AsyncThordataClient() as client:
+                session = client.browser
+                await session.navigate("https://example.com")
+                snapshot = await session.snapshot()
+        """
+        try:
+            from .browser import BrowserSession
+            return BrowserSession(self)
+        except ImportError as e:
+            raise ImportError(
+                "Playwright is required for browser automation. "
+                "Install it with: pip install thordata[browser]"
+            ) from e

thordata_sdk-1.8.0/src/thordata/browser/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+"""Browser automation module for Thordata Scraping Browser.
+This module provides high-level browser automation capabilities using Playwright.
+Requires optional dependency: pip install thordata[browser]
+"""
+from __future__ import annotations
+try:
+    from .exceptions import BrowserConnectionError, BrowserError
+    from .session import BrowserSession
+    __all__ = ["BrowserSession", "BrowserError", "BrowserConnectionError"]
+except ImportError:
+    # Playwright not installed
+    __all__ = []

thordata_sdk-1.8.0/src/thordata/browser/exceptions.py ADDED Viewed

@@ -0,0 +1,23 @@
+"""Browser automation exceptions."""
+from __future__ import annotations
+from ..exceptions import ThordataError
+class BrowserError(ThordataError):
+    """Base exception for browser automation errors."""
+    pass
+class BrowserConnectionError(BrowserError):
+    """Raised when browser connection fails."""
+    pass
+class BrowserSessionError(BrowserError):
+    """Raised when browser session operations fail."""
+    pass

thordata_sdk-1.8.0/src/thordata/browser/session.py ADDED Viewed

@@ -0,0 +1,469 @@
+"""Browser session management for Thordata Scraping Browser.
+This module provides a high-level wrapper around Playwright connected to
+Thordata's Scraping Browser.
+"""
+from __future__ import annotations
+import logging
+from typing import Any
+from urllib.parse import urlparse
+try:
+    from playwright.async_api import Browser, Page, Playwright, async_playwright
+except ImportError as e:
+    raise ImportError(
+        "Playwright is required for browser automation. "
+        "Install it with: pip install thordata[browser]"
+    ) from e
+from ..async_client import AsyncThordataClient
+from .exceptions import BrowserConnectionError, BrowserSessionError
+logger = logging.getLogger(__name__)
+class BrowserSession:
+    """Domain-aware browser session wrapper for Thordata Scraping Browser."""
+    def __init__(
+        self,
+        client: AsyncThordataClient,
+        username: str | None = None,
+        password: str | None = None,
+    ) -> None:
+        """Initialize browser session.
+        Args:
+            client: AsyncThordataClient instance
+            username: Browser username (optional, can use env var)
+            password: Browser password (optional, can use env var)
+        """
+        self._client = client
+        self._username = username
+        self._password = password
+        self._playwright: Playwright | None = None
+        self._browsers: dict[str, Browser] = {}
+        self._pages: dict[str, Page] = {}
+        self._current_domain: str = "default"
+    @staticmethod
+    def _get_domain(url: str) -> str:
+        """Extract domain from URL."""
+        try:
+            parsed = urlparse(url)
+            return parsed.hostname or "default"
+        except Exception:
+            return "default"
+    async def _ensure_playwright(self) -> Playwright:
+        """Ensure Playwright is started."""
+        if self._playwright is None:
+            self._playwright = await async_playwright().start()
+        return self._playwright
+    async def get_browser(self, domain: str = "default") -> Browser:
+        """Get or create a browser instance for a given domain."""
+        existing = self._browsers.get(domain)
+        if existing and existing.is_connected():
+            return existing
+        # Clean up stale browser/page
+        if existing is not None:
+            logger.info("Browser for domain %s disconnected, recreating", domain)
+            self._browsers.pop(domain, None)
+            self._pages.pop(domain, None)
+        playwright = await self._ensure_playwright()
+        logger.info("Connecting to Thordata Scraping Browser for domain %s", domain)
+        # Get browser credentials
+        import os
+        user = self._username or os.getenv("THORDATA_BROWSER_USERNAME")
+        pwd = self._password or os.getenv("THORDATA_BROWSER_PASSWORD")
+        if not user or not pwd:
+            raise BrowserConnectionError(
+                "Missing browser credentials. Set THORDATA_BROWSER_USERNAME and "
+                "THORDATA_BROWSER_PASSWORD or pass them to BrowserSession."
+            )
+        # Retry logic with exponential backoff
+        max_retries = 3
+        last_error = None
+        for attempt in range(max_retries):
+            try:
+                ws_url = self._client.get_browser_connection_url(
+                    username=user, password=pwd
+                )
+                logger.debug(
+                    "Attempt %d/%d: Connecting to %s...",
+                    attempt + 1,
+                    max_retries,
+                    ws_url[:50],
+                )
+                browser = await playwright.chromium.connect_over_cdp(ws_url)
+                logger.info("Successfully connected to browser for domain %s", domain)
+                self._browsers[domain] = browser
+                return browser
+            except Exception as e:
+                last_error = e
+                logger.warning(
+                    "Browser connection attempt %d/%d failed: %s",
+                    attempt + 1,
+                    max_retries,
+                    e,
+                )
+                if attempt < max_retries - 1:
+                    import asyncio
+                    wait_time = 2**attempt  # Exponential backoff: 1s, 2s, 4s
+                    logger.info("Retrying in %d seconds...", wait_time)
+                    await asyncio.sleep(wait_time)
+        # If all retries failed, raise the last error
+        raise BrowserConnectionError(
+            f"Failed to connect to Thordata Scraping Browser after {max_retries} attempts. "
+            f"Last error: {last_error}"
+        ) from last_error
+    async def get_page(self, url: str | None = None) -> Page:
+        """Get or create a page for the current (or provided) domain."""
+        if url:
+            self._current_domain = self._get_domain(url)
+        domain = self._current_domain
+        existing = self._pages.get(domain)
+        if existing and not existing.is_closed():
+            return existing
+        browser = await self.get_browser(domain)
+        contexts = browser.contexts
+        if not contexts:
+            context = await browser.new_context()
+        else:
+            context = contexts[0]
+        pages = context.pages
+        if pages:
+            page = pages[0]
+        else:
+            page = await context.new_page()
+        self._pages[domain] = page
+        return page
+    async def navigate(self, url: str, timeout: int = 120000) -> dict[str, Any]:
+        """Navigate to a URL.
+        Args:
+            url: Target URL
+            timeout: Navigation timeout in milliseconds
+        Returns:
+            Dictionary with url and title
+        """
+        page = await self.get_page(url)
+        if page.url != url:
+            await page.goto(url, timeout=timeout)
+        title = await page.title()
+        return {"url": page.url, "title": title}
+    async def snapshot(
+        self, filtered: bool = True, max_items: int = 80
+    ) -> dict[str, Any]:
+        """Capture an ARIA-like snapshot of the current page.
+        Args:
+            filtered: Whether to filter to interactive elements only
+            max_items: Maximum number of elements to include
+        Returns:
+            Dictionary with url, title, and aria_snapshot
+        """
+        page = await self.get_page()
+        full_snapshot = await self._get_interactive_snapshot(page)
+        if not filtered:
+            return {
+                "url": page.url,
+                "title": await page.title(),
+                "aria_snapshot": full_snapshot,
+            }
+        # Filter and limit
+        filtered_snapshot = self._filter_snapshot(full_snapshot)
+        filtered_snapshot = self._limit_snapshot_items(
+            filtered_snapshot, max_items=max_items
+        )
+        return {
+            "url": page.url,
+            "title": await page.title(),
+            "aria_snapshot": filtered_snapshot,
+        }
+    async def click_ref(
+        self, ref: str, wait_for_navigation_ms: int | None = None
+    ) -> dict[str, Any]:
+        """Click an element by its ref ID.
+        Args:
+            ref: The ref ID from snapshot (e.g., "1" or "dom-1")
+            wait_for_navigation_ms: Optional wait time in ms to detect navigation
+        Returns:
+            Dictionary with click result information
+        """
+        page = await self.get_page()
+        url_before = page.url
+        try:
+            locator = page.locator(f'[data-fastmcp-ref="{ref}"]').first
+            await locator.click(timeout=5000)
+            # Check for navigation if requested
+            did_navigate = False
+            url_after = url_before
+            if wait_for_navigation_ms and wait_for_navigation_ms > 0:
+                import asyncio
+                await asyncio.sleep(wait_for_navigation_ms / 1000)
+                url_after = page.url
+                did_navigate = url_after != url_before
+            return {
+                "message": "Successfully clicked element",
+                "ref": ref,
+                "url_before": url_before,
+                "url_after": url_after,
+                "did_navigate": did_navigate,
+            }
+        except Exception as e:
+            raise BrowserSessionError(f"Failed to click element: {e}") from e
+    async def type_ref(
+        self, ref: str, text: str, submit: bool = False
+    ) -> dict[str, Any]:
+        """Type text into an element by its ref ID.
+        Args:
+            ref: The ref ID from snapshot
+            text: Text to type
+            submit: Whether to press Enter after typing
+        Returns:
+            Dictionary with type result information
+        """
+        page = await self.get_page()
+        url_before = page.url
+        try:
+            locator = page.locator(f'[data-fastmcp-ref="{ref}"]').first
+            await locator.fill(text)
+            if submit:
+                await locator.press("Enter")
+            return {
+                "message": "Typed into element" + (" and submitted" if submit else ""),
+                "ref": ref,
+                "url_before": url_before,
+                "url_after": page.url,
+            }
+        except Exception as e:
+            raise BrowserSessionError(f"Failed to type into element: {e}") from e
+    async def screenshot_page(self, full_page: bool = False) -> bytes:
+        """Take a screenshot of the current page.
+        Args:
+            full_page: Whether to capture full page or viewport only
+        Returns:
+            Screenshot as bytes (PNG format)
+        """
+        page = await self.get_page()
+        return await page.screenshot(full_page=full_page)
+    async def get_html(self, full_page: bool = False) -> str:
+        """Get the HTML content of the current page.
+        Args:
+            full_page: Whether to get full page HTML or body only
+        Returns:
+            HTML content as string
+        """
+        page = await self.get_page()
+        if full_page:
+            return await page.content()
+        else:
+            try:
+                return await page.evaluate("document.body.innerHTML")
+            except Exception:
+                return await page.content()
+    async def scroll(self) -> dict[str, Any]:
+        """Scroll to the bottom of the page.
+        Returns:
+            Dictionary with scroll result
+        """
+        page = await self.get_page()
+        await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
+        return {"message": "Scrolled to bottom"}
+    async def go_back(self) -> dict[str, Any]:
+        """Navigate back in browser history.
+        Returns:
+            Dictionary with new URL
+        """
+        page = await self.get_page()
+        await page.go_back()
+        return {"url": page.url}
+    async def _get_interactive_snapshot(self, page: Page) -> str:
+        """Generate a text snapshot of interactive elements with refs."""
+        script = """
+        () => {
+            function getSnapshot() {
+                const lines = [];
+                let refCounter = 0;
+                function normalizeRole(tag, explicitRole) {
+                    const role = (explicitRole || '').toLowerCase();
+                    const t = (tag || '').toLowerCase();
+                    if (role) return role;
+                    if (t === 'a') return 'link';
+                    if (t === 'button') return 'button';
+                    if (t === 'input') return 'textbox';
+                    if (t === 'select') return 'combobox';
+                    if (t === 'textarea') return 'textbox';
+                    return t;
+                }
+                function traverse(node) {
+                    if (node.nodeType === Node.ELEMENT_NODE) {
+                        const tag = node.tagName.toLowerCase();
+                        const interactiveTag = ['a', 'button', 'input', 'select', 'textarea'].includes(tag);
+                        const role = normalizeRole(tag, node.getAttribute('role'));
+                        const interactiveRole = ['button', 'link', 'textbox', 'searchbox', 'combobox', 'checkbox', 'radio', 'switch', 'tab', 'menuitem', 'option'].includes(role);
+                        if (interactiveTag || interactiveRole) {
+                            if (!node.dataset.fastmcpRef) {
+                                node.dataset.fastmcpRef = (++refCounter).toString();
+                            }
+                            let name = node.innerText || node.getAttribute('aria-label') || '';
+                            name = (name || '').replace(/\\s+/g, ' ').trim().substring(0, 80);
+                            lines.push(`- ${role} "${name}" [ref=${node.dataset.fastmcpRef}]`);
+                            if (node.href) {
+                                lines.push(`  /url: "${node.href}"`);
+                            }
+                        }
+                    }
+                    node.childNodes.forEach(child => traverse(child));
+                }
+                traverse(document.body);
+                return lines.join('\\n');
+            }
+            return getSnapshot();
+        }
+        """
+        return await page.evaluate(script)
+    @staticmethod
+    def _filter_snapshot(snapshot_text: str) -> str:
+        """Filter snapshot to interactive elements only."""
+        import re
+        lines = snapshot_text.split("\n")
+        filtered = []
+        i = 0
+        while i < len(lines):
+            line = lines[i]
+            trimmed = line.strip()
+            if not trimmed or not trimmed.startswith("-"):
+                i += 1
+                continue
+            # Extract role
+            role_match = re.match(r"^-\s+([a-zA-Z]+)", trimmed)
+            if not role_match:
+                i += 1
+                continue
+            role = role_match.group(1).lower()
+            interactive_roles = {
+                "button",
+                "link",
+                "textbox",
+                "searchbox",
+                "combobox",
+                "checkbox",
+                "radio",
+                "switch",
+                "tab",
+                "menuitem",
+                "option",
+            }
+            if role in interactive_roles:
+                filtered.append(line)
+                # Include next line if it's a URL
+                if i + 1 < len(lines) and "/url:" in lines[i + 1]:
+                    filtered.append(lines[i + 1])
+                    i += 1
+            i += 1
+        return "\n".join(filtered)
+    @staticmethod
+    def _limit_snapshot_items(text: str, *, max_items: int) -> str:
+        """Limit snapshot to the first N interactive element blocks."""
+        if max_items <= 0:
+            return ""
+        if not text:
+            return text
+        lines = text.splitlines()
+        out: list[str] = []
+        items = 0
+        for line in lines:
+            if line.startswith("- ") or line.startswith("["):
+                if items >= max_items:
+                    break
+                items += 1
+            if items > 0:
+                out.append(line)
+        return "\n".join(out).strip()
+    async def close(self) -> None:
+        """Cleanly close all pages, browsers, and Playwright."""
+        import contextlib
+        for page in list(self._pages.values()):
+            with contextlib.suppress(Exception):
+                await page.close()
+        self._pages.clear()
+        for browser in list(self._browsers.values()):
+            with contextlib.suppress(Exception):
+                await browser.close()
+        self._browsers.clear()
+        if self._playwright is not None:
+            try:
+                await self._playwright.stop()
+            finally:
+                self._playwright = None

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata/client.py RENAMED Viewed

@@ -309,6 +309,7 @@ class ThordataClient:
         render_js: bool | None = None,
         no_cache: bool | None = None,
         output_format: str = "json",
+        ai_overview: bool = False,
         **kwargs: Any,
     ) -> dict[str, Any]:
         engine_str = engine.value if isinstance(engine, Engine) else engine.lower()
@@ -324,6 +325,7 @@ class ThordataClient:
             render_js=render_js,
             no_cache=no_cache,
             output_format=output_format,
+            ai_overview=ai_overview,
             extra_params=kwargs,
         )
         return self.serp_search_advanced(request)
@@ -396,7 +398,7 @@ class ThordataClient:
         self, request: UniversalScrapeRequest
     ) -> str | bytes | dict[str, str | bytes]:
         if not self.scraper_token:
-            raise ThordataConfigError("scraper_token required")
+            raise ThordataConfigError("scraper_token is required for Universal API")
         payload = request.to_payload()
         headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata/types/serp.py RENAMED Viewed

@@ -14,6 +14,7 @@ from .common import ThordataBaseConfig
 class Engine(str, Enum):
     # Google
     GOOGLE = "google"
+    GOOGLE_AI_MODE = "google_ai_mode"
     GOOGLE_NEWS = "google_news"
     GOOGLE_SHOPPING = "google_shopping"
     GOOGLE_VIDEOS = "google_videos"
@@ -21,10 +22,18 @@ class Engine(str, Enum):
     GOOGLE_MAPS = "google_maps"
     GOOGLE_JOBS = "google_jobs"
     GOOGLE_PLAY = "google_play"
+    GOOGLE_PLAY_PRODUCT = "google_play_product"
+    GOOGLE_PLAY_GAMES = "google_play_games"
+    GOOGLE_PLAY_MOVIES = "google_play_movies"
+    GOOGLE_PLAY_BOOKS = "google_play_books"
     GOOGLE_TRENDS = "google_trends"
     GOOGLE_SCHOLAR = "google_scholar"
+    GOOGLE_SCHOLAR_CITE = "google_scholar_cite"
+    GOOGLE_SCHOLAR_AUTHOR = "google_scholar_author"
     GOOGLE_PATENTS = "google_patents"
+    GOOGLE_PATENTS_DETAILS = "google_patents_details"
     GOOGLE_FINANCE = "google_finance"
+    GOOGLE_FINANCE_MARKETS = "google_finance_markets"
     GOOGLE_FLIGHTS = "google_flights"
     GOOGLE_LENS = "google_lens"
     GOOGLE_HOTELS = "google_hotels"
@@ -40,7 +49,7 @@ class Engine(str, Enum):
     # Others
     YANDEX = "yandex"
     DUCKDUCKGO = "duckduckgo"
-    BAIDU = "baidu"
+    BAIDU = "baidu"  # Deprecated: Not supported by Dashboard
     # Legacy / Compatibility Aliases
     GOOGLE_SEARCH = "google_search"
@@ -117,12 +126,14 @@ class SerpRequest(ThordataBaseConfig):
     render_js: bool | None = None
     no_cache: bool | None = None
-    # Output format: "json" (json=1), "html" (json=3), "light_json" (json=4), or "both" (json=2)
+    # Output format: "json" (json=1), "html" (json=3), "light_json" (json=4)
+    # Note: "both" (json=2) format is not supported by Dashboard
     output_format: str = "json"
     # Advanced Google
     ludocid: str | None = None
     kgmid: str | None = None
+    ai_overview: bool = False  # Only supported for engine=google
     # Pass-through for any other param
     extra_params: dict[str, Any] = field(default_factory=dict)
@@ -155,7 +166,8 @@ class SerpRequest(ThordataBaseConfig):
         }
         # JSON output handling
-        # Dashboard mapping: json=1 (json), json=3 (html), json=4 (light json), json=2 (both)
+        # Dashboard mapping: json=1 (json), json=3 (html), json=4 (light json)
+        # Note: json=2 (both) format is not supported by Dashboard
         fmt = self.output_format.lower()
         if fmt == "json":
             payload["json"] = "1"
@@ -164,6 +176,14 @@ class SerpRequest(ThordataBaseConfig):
         elif fmt in ("light_json", "light-json", "lightjson"):
             payload["json"] = "4"
         elif fmt in ("2", "both", "json+html"):
+            import warnings
+            warnings.warn(
+                "The 'both' output format (json=2) is not supported by Dashboard. "
+                "Use 'json' or 'html' instead.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
             payload["json"] = "2"
         # If no json param is set, default to HTML (legacy behavior)
@@ -223,6 +243,14 @@ class SerpRequest(ThordataBaseConfig):
         if self.kgmid:
             payload["kgmid"] = self.kgmid
+        # AI Overview (only for Google engine)
+        if self.ai_overview:
+            if engine != "google":
+                raise ValueError(
+                    "ai_overview parameter is only supported for engine=google"
+                )
+            payload["ai_overview"] = "true"
         # Merge extras
         payload.update(self.extra_params)
         return payload

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0/src/thordata_sdk.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: thordata-sdk
-Version: 1.7.0
+Version: 1.8.0
 Summary: The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network.
 Author-email: Thordata Developer Team <support@thordata.com>
 License: MIT
@@ -40,6 +40,8 @@ Requires-Dist: ruff>=0.1.0; extra == "dev"
 Requires-Dist: mypy>=1.0.0; extra == "dev"
 Requires-Dist: types-requests>=2.28.0; extra == "dev"
 Requires-Dist: aioresponses>=0.7.6; extra == "dev"
+Provides-Extra: browser
+Requires-Dist: playwright>=1.40.0; extra == "browser"
 Dynamic: license-file
 # Thordata Python SDK
@@ -63,7 +65,7 @@ Dynamic: license-file
 ## 📖 Introduction
-The **Thordata Python SDK v1.6.0** is a production-ready wrapper for Thordata's AI data infrastructure. It is architected for high reliability, strict type safety, and maximum performance.
+The **Thordata Python SDK v1.8.0** is a production-ready wrapper for Thordata's AI data infrastructure. It is architected for high reliability, strict type safety, and maximum performance.
 **Why v1.6.0?**
 *   **🛡️ Bulletproof Networking**: Custom core handles `HTTP`, `HTTPS`, and `SOCKS5h` (Remote DNS) tunneling, solving common SSL/TLS handshake issues in complex network environments.

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata_sdk.egg-info/SOURCES.txt RENAMED Viewed

@@ -12,6 +12,9 @@ src/thordata/models.py
 src/thordata/retry.py
 src/thordata/serp_engines.py
 src/thordata/unlimited.py
+src/thordata/browser/__init__.py
+src/thordata/browser/exceptions.py
+src/thordata/browser/session.py
 src/thordata/core/__init__.py
 src/thordata/core/async_http_client.py
 src/thordata/core/http_client.py
@@ -39,6 +42,7 @@ src/thordata_sdk.egg-info/top_level.txt
 tests/test_async_client.py
 tests/test_async_client_errors.py
 tests/test_batch_creation.py
+tests/test_browser.py
 tests/test_client.py
 tests/test_client_errors.py
 tests/test_enums.py

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata_sdk.egg-info/requires.txt RENAMED Viewed

@@ -2,6 +2,9 @@ requests>=2.25.0
 aiohttp>=3.9.0
 PySocks>=1.7.1
+[browser]
+playwright>=1.40.0
 [dev]
 pytest>=7.0.0
 pytest-asyncio>=0.21.0

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/tests/test_async_client.py RENAMED Viewed

@@ -127,7 +127,9 @@ async def test_async_missing_scraper_token():
     async with client:
         # 3. Method call should fail
         # Updated match string to match actual code in async_client.py
-        with pytest.raises(ThordataConfigError, match="scraper_token required"):
+        with pytest.raises(
+            ThordataConfigError, match="scraper_token is required for SERP API"
+        ):
             await client.serp_search("test")

thordata_sdk-1.8.0/tests/test_browser.py ADDED Viewed

@@ -0,0 +1,104 @@
+"""Tests for browser automation module."""
+from __future__ import annotations
+import pytest
+try:
+    import playwright.async_api  # noqa: F401
+    PLAYWRIGHT_AVAILABLE = True
+except ImportError:
+    PLAYWRIGHT_AVAILABLE = False
+from thordata import AsyncThordataClient
+from thordata.browser import BrowserConnectionError, BrowserError, BrowserSession
+@pytest.mark.skipif(not PLAYWRIGHT_AVAILABLE, reason="Playwright not installed")
+class TestBrowserSession:
+    """Tests for BrowserSession class."""
+    @pytest.fixture
+    def client(self):
+        """Create a test client."""
+        return AsyncThordataClient(scraper_token="test_token")
+    def test_browser_session_init(self, client):
+        """Test BrowserSession initialization."""
+        session = BrowserSession(client)
+        assert session._client == client
+        assert session._playwright is None
+    def test_browser_session_with_credentials(self, client):
+        """Test BrowserSession with credentials."""
+        session = BrowserSession(client, username="test_user", password="test_pass")
+        assert session._username == "test_user"
+        assert session._password == "test_pass"
+    def test_get_domain(self):
+        """Test domain extraction."""
+        assert BrowserSession._get_domain("https://example.com/page") == "example.com"
+        assert BrowserSession._get_domain("http://test.org") == "test.org"
+        assert BrowserSession._get_domain("invalid") == "default"
+    def test_filter_snapshot(self):
+        """Test snapshot filtering."""
+        snapshot = """
+        - button "Click me" [ref=1]
+          /url: "https://example.com"
+        - div "Not interactive" [ref=2]
+        - link "Go here" [ref=3]
+          /url: "https://example.com/page"
+        """
+        filtered = BrowserSession._filter_snapshot(snapshot)
+        assert "button" in filtered
+        assert "link" in filtered
+        assert "div" not in filtered
+    def test_limit_snapshot_items(self):
+        """Test snapshot item limiting."""
+        snapshot = '- button "1" [ref=1]\n- button "2" [ref=2]\n- button "3" [ref=3]'
+        limited = BrowserSession._limit_snapshot_items(snapshot, max_items=2)
+        assert 'button "1"' in limited
+        assert 'button "2"' in limited
+        assert 'button "3"' not in limited
+@pytest.mark.skipif(not PLAYWRIGHT_AVAILABLE, reason="Playwright not installed")
+class TestBrowserClientIntegration:
+    """Tests for browser integration with AsyncThordataClient."""
+    @pytest.fixture
+    def client(self):
+        """Create a test client."""
+        return AsyncThordataClient(scraper_token="test_token")
+    def test_browser_property(self, client):
+        """Test browser property access."""
+        session = client.browser
+        assert isinstance(session, BrowserSession)
+        assert session._client == client
+    def test_browser_property_import_error(self, monkeypatch):
+        """Test browser property raises ImportError when playwright is not available."""
+        # This test verifies the error message, but since playwright might be installed
+        # in the test environment, we'll just verify the property exists
+        # The actual import error will be raised at runtime when playwright is missing
+        pass
+class TestBrowserExceptions:
+    """Tests for browser exceptions."""
+    def test_browser_error(self):
+        """Test BrowserError exception."""
+        error = BrowserError("Test error")
+        assert str(error) == "Test error"
+        assert isinstance(error, Exception)
+    def test_browser_connection_error(self):
+        """Test BrowserConnectionError exception."""
+        error = BrowserConnectionError("Connection failed")
+        assert str(error) == "Connection failed"
+        assert isinstance(error, BrowserError)

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/LICENSE RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/setup.cfg RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata/_utils.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata/async_unlimited.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata/core/__init__.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata/core/async_http_client.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata/core/http_client.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata/core/tunnel.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata/enums.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata/exceptions.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata/models.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata/retry.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata/serp_engines.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata/tools/__init__.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata/tools/base.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata/tools/code.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata/tools/ecommerce.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata/tools/professional.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata/tools/search.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata/tools/social.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata/tools/travel.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata/tools/video.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata/types/__init__.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata/types/common.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata/types/proxy.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata/types/task.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata/types/universal.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata/unlimited.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata_sdk.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/src/thordata_sdk.egg-info/top_level.txt RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/tests/test_async_client_errors.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/tests/test_batch_creation.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/tests/test_client.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/tests/test_client_errors.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/tests/test_enums.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/tests/test_examples.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/tests/test_exceptions.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/tests/test_integration_proxy_protocols.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/tests/test_models.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/tests/test_retry.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/tests/test_spec_parity.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/tests/test_task_status_and_wait.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/tests/test_tools.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/tests/test_tools_coverage.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/tests/test_unlimited.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/tests/test_user_agent.py RENAMED Viewed

File without changes

{thordata_sdk-1.7.0 → thordata_sdk-1.8.0}/tests/test_utils.py RENAMED Viewed

File without changes

thordata-sdk 1.7.0__tar.gz → 1.8.0__tar.gz

thordata-sdk 1.7.0tar.gz → 1.8.0tar.gz