PyPI - thordata-sdk - Versions diffs - 1.5.0__py3-none-any.whl → 1.6.0__py3-none-any.whl - Mend

thordata-sdk 1.5.0py3-none-any.whl → 1.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

thordata/__init__.py +1 -1
thordata/async_client.py +12 -7
thordata/client.py +12 -7
thordata/enums.py +2 -2
thordata/exceptions.py +70 -19
thordata/models.py +1 -1
thordata/retry.py +1 -1
thordata/tools/__init__.py +11 -1
thordata/tools/code.py +17 -4
thordata/tools/ecommerce.py +194 -10
thordata/tools/professional.py +155 -0
thordata/tools/search.py +47 -5
thordata/tools/social.py +225 -41
thordata/tools/travel.py +100 -0
thordata/tools/video.py +80 -7
thordata/types/task.py +16 -4
{thordata_sdk-1.5.0.dist-info → thordata_sdk-1.6.0.dist-info}/METADATA +63 -7
thordata_sdk-1.6.0.dist-info/RECORD +35 -0
{thordata_sdk-1.5.0.dist-info → thordata_sdk-1.6.0.dist-info}/WHEEL +1 -1
thordata/_example_utils.py +0 -77
thordata/demo.py +0 -138
thordata_sdk-1.5.0.dist-info/RECORD +0 -35
{thordata_sdk-1.5.0.dist-info → thordata_sdk-1.6.0.dist-info}/licenses/LICENSE +0 -0
{thordata_sdk-1.5.0.dist-info → thordata_sdk-1.6.0.dist-info}/top_level.txt +0 -0

thordata/__init__.py CHANGED Viewed

@@ -5,7 +5,7 @@ Official Python client for Thordata's Proxy Network, SERP API,
 Universal Scraping API (Web Unlocker), and Web Scraper API.
 """
-__version__ = "1.5.0"
+__version__ = "1.6.0"
 __author__ = "Thordata Developer Team/Kael Odin"
 __email__ = "support@thordata.com"

thordata/async_client.py CHANGED Viewed

@@ -124,10 +124,10 @@ class AsyncThordataClient:
         ).rstrip("/")
         self._gateway_base_url = os.getenv(
-            "THORDATA_GATEWAY_BASE_URL", "https://api.thordata.com/api/gateway"
+            "THORDATA_GATEWAY_BASE_URL", "https://openapi.thordata.com/api/gateway"
         )
         self._child_base_url = os.getenv(
-            "THORDATA_CHILD_BASE_URL", "https://api.thordata.com/api/child"
+            "THORDATA_CHILD_BASE_URL", "https://openapi.thordata.com/api/child"
         )
         # URL Construction
@@ -145,7 +145,7 @@ class AsyncThordataClient:
         self._proxy_users_url = f"{shared_api_base}/proxy-users"
         whitelist_base = os.getenv(
-            "THORDATA_WHITELIST_BASE_URL", "https://api.thordata.com/api"
+            "THORDATA_WHITELIST_BASE_URL", "https://openapi.thordata.com/api"
         )
         self._whitelist_url = f"{whitelist_base}/whitelisted-ips"
@@ -352,7 +352,7 @@ class AsyncThordataClient:
         file_name: str,
         spider_id: str,
         spider_name: str,
-        parameters: dict[str, Any],
+        parameters: dict[str, Any] | list[dict[str, Any]],
         universal_params: dict[str, Any] | None = None,
     ) -> str:
         config = ScraperTaskConfig(
@@ -434,7 +434,7 @@ class AsyncThordataClient:
         file_name: str,
         spider_id: str,
         spider_name: str,
-        parameters: dict[str, Any],
+        parameters: dict[str, Any] | list[dict[str, Any]],
         common_settings: CommonSettings,
     ) -> str:
         config = VideoTaskConfig(
@@ -550,7 +550,7 @@ class AsyncThordataClient:
         file_name: str,
         spider_id: str,
         spider_name: str,
-        parameters: dict[str, Any],
+        parameters: dict[str, Any] | list[dict[str, Any]],
         universal_params: dict[str, Any] | None = None,
         *,
         max_wait: float = 600.0,
@@ -971,7 +971,12 @@ class AsyncThordataClient:
         if port:
             params["port"] = str(port)
-        username = os.getenv("THORDATA_RESIDENTIAL_USERNAME")
+        if product == "unlimited":
+            username = os.getenv("THORDATA_UNLIMITED_USERNAME") or os.getenv(
+                "THORDATA_RESIDENTIAL_USERNAME"
+            )
+        else:
+            username = os.getenv("THORDATA_RESIDENTIAL_USERNAME")
         if username:
             params["td-customer"] = username

thordata/client.py CHANGED Viewed

@@ -159,10 +159,10 @@ class ThordataClient:
         ).rstrip("/")
         self._gateway_base_url = os.getenv(
-            "THORDATA_GATEWAY_BASE_URL", "https://api.thordata.com/api/gateway"
+            "THORDATA_GATEWAY_BASE_URL", "https://openapi.thordata.com/api/gateway"
         )
         self._child_base_url = os.getenv(
-            "THORDATA_CHILD_BASE_URL", "https://api.thordata.com/api/child"
+            "THORDATA_CHILD_BASE_URL", "https://openapi.thordata.com/api/child"
         )
         # URL Construction
@@ -183,7 +183,7 @@ class ThordataClient:
         self._proxy_users_url = f"{shared_api_base}/proxy-users"
         whitelist_base = os.getenv(
-            "THORDATA_WHITELIST_BASE_URL", "https://api.thordata.com/api"
+            "THORDATA_WHITELIST_BASE_URL", "https://openapi.thordata.com/api"
         )
         self._whitelist_url = f"{whitelist_base}/whitelisted-ips"
@@ -405,7 +405,7 @@ class ThordataClient:
         file_name: str,
         spider_id: str,
         spider_name: str,
-        parameters: dict[str, Any],
+        parameters: dict[str, Any] | list[dict[str, Any]],
         universal_params: dict[str, Any] | None = None,
     ) -> str:
         config = ScraperTaskConfig(
@@ -490,7 +490,7 @@ class ThordataClient:
         file_name: str,
         spider_id: str,
         spider_name: str,
-        parameters: dict[str, Any],
+        parameters: dict[str, Any] | list[dict[str, Any]],
         common_settings: CommonSettings,
     ) -> str:
         config = VideoTaskConfig(
@@ -639,7 +639,7 @@ class ThordataClient:
         file_name: str,
         spider_id: str,
         spider_name: str,
-        parameters: dict[str, Any],
+        parameters: dict[str, Any] | list[dict[str, Any]],
         universal_params: dict[str, Any] | None = None,
         *,
         max_wait: float = 600.0,
@@ -862,7 +862,12 @@ class ThordataClient:
         if port:
             params["port"] = str(port)
-        username = os.getenv("THORDATA_RESIDENTIAL_USERNAME")
+        if product == "unlimited":
+            username = os.getenv("THORDATA_UNLIMITED_USERNAME") or os.getenv(
+                "THORDATA_RESIDENTIAL_USERNAME"
+            )
+        else:
+            username = os.getenv("THORDATA_RESIDENTIAL_USERNAME")
         if username:
             params["td-customer"] = username

thordata/enums.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """
 Enumerations for the Thordata Python SDK.
-Moved to thordata.types in v1.5.0.
+Moved to thordata.types in v1.6.0.
 This file is kept for backward compatibility.
 """
@@ -21,7 +21,7 @@ from .types import (
     SessionType,
     TaskStatus,
     TimeRange,
-    normalize_enum_value,  # 新增
+    normalize_enum_value,
 )
 __all__ = [

thordata/exceptions.py CHANGED Viewed

@@ -15,6 +15,7 @@ Exception Hierarchy:
 from __future__ import annotations
+from collections.abc import Mapping
 from typing import Any
 # =============================================================================
@@ -235,6 +236,46 @@ class ThordataNotCollectedError(ThordataAPIError):
 # =============================================================================
+def _extract_request_id(payload: Any) -> str | None:
+    if isinstance(payload, Mapping):
+        for key in ("request_id", "requestId", "x_request_id", "x-request-id"):
+            val = payload.get(key)
+            if val is not None:
+                return str(val)
+    return None
+def _extract_retry_after(payload: Any) -> int | None:
+    if isinstance(payload, Mapping):
+        for key in ("retry_after", "retryAfter", "retry-after"):
+            val = payload.get(key)
+            if isinstance(val, int):
+                return val
+            if isinstance(val, str) and val.isdigit():
+                return int(val)
+    return None
+def _build_error_message(
+    message: str,
+    *,
+    status_code: int | None,
+    code: int | None,
+    request_id: str | None,
+) -> str:
+    parts: list[str] = [message]
+    meta: list[str] = []
+    if status_code is not None:
+        meta.append(f"http={status_code}")
+    if code is not None and code != status_code:
+        meta.append(f"code={code}")
+    if request_id:
+        meta.append(f"request_id={request_id}")
+    if meta:
+        parts.append("(" + ", ".join(meta) + ")")
+    return " ".join(parts)
 def raise_for_code(
     message: str,
     *,
@@ -266,49 +307,59 @@ def raise_for_code(
     # Determine the effective error code.
     # Prefer payload `code` when present and not success (200),
     # otherwise fall back to HTTP status when it indicates an error.
+    # Determine the effective error code for routing.
     effective_code: int | None = None
     if code is not None and code != 200:
         effective_code = code
-    elif status_code is not None and status_code != 200:
+    elif status_code is not None and status_code >= 400:
         effective_code = status_code
     else:
         effective_code = code if code is not None else status_code
+    # Extract additional context from payload
+    final_request_id = request_id or _extract_request_id(payload)
+    # Build a consistent, informative error message
+    final_message = _build_error_message(
+        message,
+        status_code=status_code,
+        code=code,
+        request_id=final_request_id,
+    )
+    # Prepare common arguments for exception constructors
     kwargs = {
         "status_code": status_code,
         "code": code,
         "payload": payload,
-        "request_id": request_id,
+        "request_id": final_request_id,
     }
+    # --- Route to the correct exception class ---
     # Not collected (API payload code 300, often retryable, not billed)
-    # Check this FIRST since 300 is in API_CODES, not HTTP_STATUS_CODES
     if effective_code in ThordataNotCollectedError.API_CODES:
-        raise ThordataNotCollectedError(message, **kwargs)
+        raise ThordataNotCollectedError(final_message, **kwargs)
-    # Auth errors
+    # Auth errors (401, 403)
     if effective_code in ThordataAuthError.HTTP_STATUS_CODES:
-        raise ThordataAuthError(message, **kwargs)
+        raise ThordataAuthError(final_message, **kwargs)
-    # Rate limit errors
+    # Rate limit errors (429, 402)
     if effective_code in ThordataRateLimitError.HTTP_STATUS_CODES:
-        # Try to extract retry_after from payload
-        retry_after = None
-        if isinstance(payload, dict):
-            retry_after = payload.get("retry_after")
-        raise ThordataRateLimitError(message, retry_after=retry_after, **kwargs)
+        retry_after = _extract_retry_after(payload)
+        raise ThordataRateLimitError(final_message, retry_after=retry_after, **kwargs)
-    # Server errors
+    # Server errors (5xx)
     if effective_code is not None and 500 <= effective_code < 600:
-        raise ThordataServerError(message, **kwargs)
+        raise ThordataServerError(final_message, **kwargs)
-    # Validation errors
+    # Validation errors (400, 422)
     if effective_code in ThordataValidationError.HTTP_STATUS_CODES:
-        raise ThordataValidationError(message, **kwargs)
+        raise ThordataValidationError(final_message, **kwargs)
-    # Generic API error
-    raise ThordataAPIError(message, **kwargs)
+    # Fallback to generic API error if no specific match
+    raise ThordataAPIError(final_message, **kwargs)
 # =============================================================================

thordata/models.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """
 Data models for the Thordata Python SDK.
-Moved to thordata.types in v1.5.0.
+Moved to thordata.types in v1.6.0.
 This file is kept for backward compatibility.
 """

thordata/retry.py CHANGED Viewed

@@ -186,7 +186,7 @@ def with_retry(
                     if isinstance(e, ThordataRateLimitError) and e.retry_after:
                         delay = max(delay, e.retry_after)
-                    logger.warning(
+                    logger.info(
                         f"Retry attempt {attempt + 1}/{config.max_retries} "
                         f"after {delay:.2f}s due to: {e}"
                     )

thordata/tools/__init__.py CHANGED Viewed

@@ -5,15 +5,19 @@ High-level abstractions for specific scraping targets.
 from .base import ToolRequest, VideoToolRequest
 from .code import GitHub
-from .ecommerce import Amazon
+from .ecommerce import Amazon, Walmart, eBay
+from .professional import Crunchbase, Glassdoor, Indeed
 from .search import GoogleMaps, GooglePlay, GoogleShopping
 from .social import Facebook, Instagram, LinkedIn, Reddit, TikTok, Twitter
+from .travel import Airbnb, Booking, Zillow
 from .video import YouTube
 __all__ = [
     "ToolRequest",
     "VideoToolRequest",
     "Amazon",
+    "eBay",
+    "Walmart",
     "GoogleMaps",
     "GoogleShopping",
     "GooglePlay",
@@ -25,4 +29,10 @@ __all__ = [
     "Reddit",
     "YouTube",
     "GitHub",
+    "Indeed",
+    "Glassdoor",
+    "Crunchbase",
+    "Booking",
+    "Zillow",
+    "Airbnb",
 ]

thordata/tools/code.py CHANGED Viewed

@@ -14,13 +14,26 @@ class GitHub:
     @dataclass
     class Repository(ToolRequest):
-        """Github Repository Scraper"""
+        """Github Repository Scraper by Repo URL"""
         SPIDER_ID = "github_repository_by-repo-url"
         SPIDER_NAME = "github.com"
         repo_url: str
-        search_url: str | None = None
-        url: str | None = None  # The generic URL param
+    @dataclass
+    class RepositoryBySearchUrl(ToolRequest):
+        """Github Repository Scraper by Search URL"""
+        SPIDER_ID = "github_repository_by-search-url"
+        SPIDER_NAME = "github.com"
+        search_url: str
         page_turning: int | None = None
         max_num: int | None = None
+    @dataclass
+    class RepositoryByUrl(ToolRequest):
+        """Github Repository Scraper by URL"""
+        SPIDER_ID = "github_repository_by-url"
+        SPIDER_NAME = "github.com"
+        url: str

thordata/tools/ecommerce.py CHANGED Viewed

@@ -12,9 +12,10 @@ from .base import ToolRequest
 class Amazon:
     """Namespace for Amazon tools."""
+    # --- Product Details (5 methods) ---
     @dataclass
-    class Product(ToolRequest):
-        """Amazon Product Details Scraper"""
+    class ProductByAsin(ToolRequest):
+        """Amazon Product Details Scraper by ASIN."""
         SPIDER_ID = "amazon_product_by-asin"
         SPIDER_NAME = "amazon.com"
@@ -22,16 +23,112 @@ class Amazon:
         asin: str
         domain: str = "amazon.com"
+    # Backward compatible alias
+    Product = ProductByAsin
     @dataclass
-    class GlobalProduct(ToolRequest):
-        """Amazon Global Product Details Scraper"""
+    class ProductByUrl(ToolRequest):
+        """Amazon Product Details Scraper by URL."""
-        SPIDER_ID = "amazon_global-product_by-url"
+        SPIDER_ID = "amazon_product_by-url"
         SPIDER_NAME = "amazon.com"
         url: str
         zip_code: str | None = None
+    @dataclass
+    class ProductByKeywords(ToolRequest):
+        """Amazon Product Details Scraper by Keywords."""
+        SPIDER_ID = "amazon_product_by-keywords"
+        SPIDER_NAME = "amazon.com"
+        keyword: str
+        page_turning: int | None = None
+        lowest_price: float | None = None
+        highest_price: float | None = None
+    @dataclass
+    class ProductByCategoryUrl(ToolRequest):
+        """Amazon Product Details Scraper by Category URL."""
+        SPIDER_ID = "amazon_product_by-category-url"
+        SPIDER_NAME = "amazon.com"
+        url: str
+        sort_by: str | None = None
+        page_turning: int | None = None
+    @dataclass
+    class ProductByBestSellers(ToolRequest):
+        """Amazon Product Details Scraper by Best Sellers URL."""
+        SPIDER_ID = "amazon_product_by-best-sellers"
+        SPIDER_NAME = "amazon.com"
+        url: str
+        page_turning: int | None = None
+    # --- Other Amazon Tools ---
+    @dataclass
+    class GlobalProductByUrl(ToolRequest):
+        """Amazon Global Product Details Scraper by URL"""
+        SPIDER_ID = "amazon_global-product_by-url"
+        SPIDER_NAME = "amazon.com"
+        url: str
+    # Backward compatible alias
+    GlobalProduct = GlobalProductByUrl
+    @dataclass
+    class GlobalProductByCategoryUrl(ToolRequest):
+        """Amazon Global Product Details Scraper by Category URL"""
+        SPIDER_ID = "amazon_global-product_by-category-url"
+        SPIDER_NAME = "amazon.com"
+        url: str
+        sort_by: str | None = None
+        get_sponsored: str | None = None
+        maximum: int | None = None
+    @dataclass
+    class GlobalProductBySellerUrl(ToolRequest):
+        """Amazon Global Product Details Scraper by Seller URL"""
+        SPIDER_ID = "amazon_global-product_by-seller-url"
+        SPIDER_NAME = "amazon.com"
+        url: str
+        maximum: int | None = None
+    @dataclass
+    class GlobalProductByKeywords(ToolRequest):
+        """Amazon Global Product Details Scraper by Keywords"""
+        SPIDER_ID = "amazon_global-product_by-keywords"
+        SPIDER_NAME = "amazon.com"
+        keyword: str
+        domain: str = "https://www.amazon.com"
+        lowest_price: str | None = None
+        highest_price: str | None = None
+        page_turning: int | None = None
+    @dataclass
+    class GlobalProductByKeywordsBrand(ToolRequest):
+        """Amazon Global Product Details Scraper by Keywords and Brand"""
+        SPIDER_ID = "amazon_global-product_by-keywords-brand"
+        SPIDER_NAME = "amazon.com"
+        keyword: str
+        brands: str
+        page_turning: int | None = None
     @dataclass
     class Review(ToolRequest):
         """Amazon Product Review Scraper"""
@@ -59,9 +156,96 @@ class Amazon:
         SPIDER_NAME = "amazon.com"
         keyword: str
-        domain: str = "amazon.com"
+        domain: str = "https://www.amazon.com/"
         page_turning: int = 1
-        sort_by: str | None = None  # Best Sellers, Newest Arrivals, etc.
-        min_price: float | None = None
-        max_price: float | None = None
-        get_sponsored: bool | None = None
+class eBay:
+    """Namespace for eBay tools."""
+    @dataclass
+    class ProductByUrl(ToolRequest):
+        """eBay Information Scraper by URL"""
+        SPIDER_ID = "ebay_ebay_by-url"
+        SPIDER_NAME = "ebay.com"
+        url: str
+    @dataclass
+    class ProductByCategoryUrl(ToolRequest):
+        """eBay Information Scraper by Category URL"""
+        SPIDER_ID = "ebay_ebay_by-category-url"
+        SPIDER_NAME = "ebay.com"
+        url: str
+        count: str | None = None
+    @dataclass
+    class ProductByKeywords(ToolRequest):
+        """eBay Information Scraper by Keywords"""
+        SPIDER_ID = "ebay_ebay_by-keywords"
+        SPIDER_NAME = "ebay.com"
+        keywords: str
+        count: str | None = None
+    @dataclass
+    class ProductByListUrl(ToolRequest):
+        """eBay Information Scraper by List URL"""
+        SPIDER_ID = "ebay_ebay_by-listurl"
+        SPIDER_NAME = "ebay.com"
+        url: str
+        count: str | None = None
+class Walmart:
+    """Namespace for Walmart tools."""
+    @dataclass
+    class ProductByUrl(ToolRequest):
+        """Walmart Product Information Scraper by URL"""
+        SPIDER_ID = "walmart_product_by-url"
+        SPIDER_NAME = "walmart.com"
+        url: str
+        all_variations: str | None = None
+    @dataclass
+    class ProductByCategoryUrl(ToolRequest):
+        """Walmart Product Information Scraper by Category URL"""
+        SPIDER_ID = "walmart_product_by-category-url"
+        SPIDER_NAME = "walmart.com"
+        category_url: str
+        all_variations: str | None = None
+        page_turning: int | None = None
+    @dataclass
+    class ProductBySku(ToolRequest):
+        """Walmart Product Information Scraper by SKU"""
+        SPIDER_ID = "walmart_product_by-sku"
+        SPIDER_NAME = "walmart.com"
+        sku: str
+        all_variations: str | None = None
+    @dataclass
+    class ProductByKeywords(ToolRequest):
+        """Walmart Product Information Scraper by Keywords"""
+        SPIDER_ID = "walmart_product_by-keywords"
+        SPIDER_NAME = "walmart.com"
+        keyword: str
+        domain: str = "https://www.walmart.com/"
+        all_variations: str | None = None
+        page_turning: int | None = None
+    @dataclass
+    class ProductByZipcodes(ToolRequest):
+        """Walmart Product Information Scraper by Zipcodes"""
+        SPIDER_ID = "walmart_product_by-zipcodes"
+        SPIDER_NAME = "walmart.com"
+        url: str
+        zip_code: str | None = None

thordata-sdk 1.5.0__py3-none-any.whl → 1.6.0__py3-none-any.whl

thordata-sdk 1.5.0py3-none-any.whl → 1.6.0py3-none-any.whl