PyPI - thordata-sdk - Versions diffs - 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

thordata-sdk 0.2.4py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

thordata/__init__.py +16 -0
{thordata_sdk → thordata}/async_client.py +67 -33
{thordata_sdk → thordata}/client.py +92 -43
thordata/enums.py +25 -0
thordata/parameters.py +52 -0
thordata_sdk-0.3.0.dist-info/METADATA +197 -0
thordata_sdk-0.3.0.dist-info/RECORD +10 -0
{thordata_sdk-0.2.4.dist-info → thordata_sdk-0.3.0.dist-info}/WHEEL +1 -1
thordata_sdk-0.3.0.dist-info/top_level.txt +1 -0
thordata_sdk/__init__.py +0 -9
thordata_sdk/enums.py +0 -20
thordata_sdk/parameters.py +0 -41
thordata_sdk-0.2.4.dist-info/METADATA +0 -113
thordata_sdk-0.2.4.dist-info/RECORD +0 -10
thordata_sdk-0.2.4.dist-info/top_level.txt +0 -1
{thordata_sdk-0.2.4.dist-info → thordata_sdk-0.3.0.dist-info/licenses}/LICENSE +0 -0

thordata/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+# src/thordata/__init__.py
+from .client import ThordataClient
+from .async_client import AsyncThordataClient
+from .enums import Engine, GoogleSearchType
+# Package version
+__version__ = "0.3.0"
+# Explicitly export classes to simplify user imports
+__all__ = [
+    "ThordataClient",
+    "AsyncThordataClient",
+    "Engine",
+    "GoogleSearchType"
+]

{thordata_sdk → thordata}/async_client.py RENAMED Viewed

@@ -4,7 +4,7 @@ import json
 import base64
 from typing import Optional, Dict, Any, Union
-# 复用我们刚刚写好的逻辑和枚举
+# Import shared logic
 from .enums import Engine
 from .parameters import normalize_serp_params
@@ -13,7 +13,8 @@ logger = logging.getLogger(__name__)
 class AsyncThordataClient:
     """
-    Thordata Asynchronous Client (built on aiohttp).
+    The official Asynchronous Python client for Thordata (built on aiohttp).
+    Designed for high-concurrency AI agents and data pipelines.
     """
     def __init__(
@@ -24,13 +25,18 @@ class AsyncThordataClient:
         proxy_host: str = "gate.thordata.com",
         proxy_port: int = 22225
     ):
+        """
+        Initialize the Async Client.
+        """
         self.scraper_token = scraper_token
         self.public_token = public_token
         self.public_key = public_key
+        # Pre-calculate proxy auth for performance
         self.proxy_auth = aiohttp.BasicAuth(login=scraper_token, password='')
         self.proxy_url = f"http://{proxy_host}:{proxy_port}"
+        # API Endpoints
         self.base_url = "https://scraperapi.thordata.com"
         self.universal_url = "https://universalapi.thordata.com"
         self.api_url = "https://api.thordata.com/api/web-scraper-api"
@@ -41,6 +47,7 @@ class AsyncThordataClient:
         self.SCRAPER_STATUS_URL = f"{self.api_url}/tasks-status"
         self.SCRAPER_DOWNLOAD_URL = f"{self.api_url}/tasks-download"
+        # Session is initialized lazily or via context manager
         self._session: Optional[aiohttp.ClientSession] = None
     async def __aenter__(self):
@@ -52,16 +59,27 @@ class AsyncThordataClient:
         await self.close()
     async def close(self):
+        """Close the underlying aiohttp session."""
         if self._session and not self._session.closed:
             await self._session.close()
             self._session = None
-    # --- Proxy (Unchanged) ---
+    def _get_session(self) -> aiohttp.ClientSession:
+        """Internal helper to ensure session exists."""
+        if self._session is None or self._session.closed:
+            raise RuntimeError(
+                "Client session not initialized. Use 'async with ThordataClient(...) as client:'"
+            )
+        return self._session
     async def get(self, url: str, **kwargs) -> aiohttp.ClientResponse:
-        if self._session is None:
-            raise RuntimeError("Client session not initialized.")
+        """
+        Send an async GET request through the Proxy Network.
+        """
+        session = self._get_session()
         try:
-            return await self._session.get(
+            logger.debug(f"Async Proxy Request: {url}")
+            return await session.get(
                 url,
                 proxy=self.proxy_url,
                 proxy_auth=self.proxy_auth,
@@ -71,7 +89,6 @@ class AsyncThordataClient:
             logger.error(f"Async Request failed: {e}")
             raise
-    # --- SERP (Optimized) ---
     async def serp_search(
         self,
         query: str,
@@ -82,13 +99,12 @@ class AsyncThordataClient:
         """
         Execute a real-time SERP search (Async).
         """
-        if self._session is None:
-            raise RuntimeError("Client session not initialized.")
+        session = self._get_session()
-        # 1. 转换枚举
+        # 1. Handle Enum conversion
         engine_str = engine.value if isinstance(engine, Engine) else engine.lower()
-        # 2. 调用 parameters.py 复用逻辑 (Don't Repeat Yourself!)
+        # 2. Normalize parameters
         payload = normalize_serp_params(engine_str, query, num=num, **kwargs)
         headers = {
@@ -96,30 +112,34 @@ class AsyncThordataClient:
             "Content-Type": "application/x-www-form-urlencoded"
         }
-        # 3. 发送请求
-        async with self._session.post(
+        # 3. Execute Request
+        logger.info(f"Async SERP Search: {engine_str} - {query}")
+        async with session.post(
             self.SERP_API_URL, data=payload, headers=headers
         ) as response:
             response.raise_for_status()
             data = await response.json()
+            # Handle double-encoded JSON strings if they occur
             if isinstance(data, str):
                 try:
                     data = json.loads(data)
-                except Exception:
+                except json.JSONDecodeError:
                     pass
             return data
-    # --- Universal (Unchanged) ---
     async def universal_scrape(
         self,
         url: str,
         js_render: bool = False,
         output_format: str = "HTML",
-        country: str = None,
+        country: Optional[str] = None,
         block_resources: bool = False
     ) -> Union[str, bytes]:
-        if self._session is None:
-            raise RuntimeError("Client session not initialized.")
+        """
+        Async Universal Scraping (Bypass Cloudflare/CAPTCHA).
+        """
+        session = self._get_session()
         headers = {
             "Authorization": f"Bearer {self.scraper_token}",
@@ -135,18 +155,21 @@ class AsyncThordataClient:
         if country:
             payload["country"] = country
-        async with self._session.post(
+        logger.info(f"Async Universal Scrape: {url}")
+        async with session.post(
             self.UNIVERSAL_API_URL, data=payload, headers=headers
         ) as response:
             response.raise_for_status()
             try:
                 resp_json = await response.json()
-            except Exception:
+            except json.JSONDecodeError:
+                # Fallback for raw content
                 if output_format.upper() == "PNG":
                     return await response.read()
                 return await response.text()
+            # Check API error codes
             if isinstance(resp_json, dict) and resp_json.get("code") \
                     and resp_json.get("code") != 200:
                 raise Exception(f"Universal API Error: {resp_json}")
@@ -159,39 +182,38 @@ class AsyncThordataClient:
                 if not png_str:
                     raise Exception("API returned empty PNG data")
-                # 🛠️ FIX: 移除 Data URI Scheme 前缀
+                # Clean Data URI Scheme
                 if "," in png_str:
                     png_str = png_str.split(",", 1)[1]
+                # Fix Base64 Padding
                 png_str = png_str.replace("\n", "").replace("\r", "")
                 missing_padding = len(png_str) % 4
                 if missing_padding:
                     png_str += '=' * (4 - missing_padding)
                 return base64.b64decode(png_str)
             return str(resp_json)
-    # --- Web Scraper (Optimized) ---
     async def create_scraper_task(
         self,
         file_name: str,
         spider_id: str,
         spider_name: str,
         individual_params: Dict[str, Any],
-        universal_params: Dict[str, Any] = None
+        universal_params: Optional[Dict[str, Any]] = None
     ) -> str:
         """
         Create an Asynchronous Web Scraper Task.
         """
-        if self._session is None:
-            raise RuntimeError("Client session not initialized.")
+        session = self._get_session()
         headers = {
             "Authorization": f"Bearer {self.scraper_token}",
             "Content-Type": "application/x-www-form-urlencoded"
         }
-        # 简化 Payload 构建，移除不必要的检查
         payload = {
             "file_name": file_name,
             "spider_id": spider_id,
@@ -202,17 +224,23 @@ class AsyncThordataClient:
         if universal_params:
             payload["spider_universal"] = json.dumps(universal_params)
-        async with self._session.post(
+        logger.info(f"Async Task Creation: {spider_name}")
+        async with session.post(
             self.SCRAPER_BUILDER_URL, data=payload, headers=headers
         ) as response:
             response.raise_for_status()
             data = await response.json()
             if data.get("code") != 200:
                 raise Exception(f"Creation failed: {data}")
             return data["data"]["task_id"]
-    # --- Status & Result (Unchanged) ---
     async def get_task_status(self, task_id: str) -> str:
+        """
+        Check task status.
+        """
+        session = self._get_session()
         headers = {
             "token": self.public_token,
             "key": self.public_key,
@@ -220,28 +248,34 @@ class AsyncThordataClient:
         }
         payload = {"tasks_ids": task_id}
-        async with self._session.post(
+        async with session.post(
             self.SCRAPER_STATUS_URL, data=payload, headers=headers
         ) as response:
             data = await response.json()
             if data.get("code") == 200 and data.get("data"):
                 for item in data["data"]:
-                    if str(item["task_id"]) == str(task_id):
+                    if str(item.get("task_id")) == str(task_id):
                         return item["status"]
             return "Unknown"
     async def get_task_result(self, task_id: str, file_type: str = "json") -> str:
+        """
+        Get the download URL for a finished task.
+        """
+        session = self._get_session()
         headers = {
             "token": self.public_token,
             "key": self.public_key,
             "Content-Type": "application/x-www-form-urlencoded"
         }
-        payload = {"tasks_id": task_id, "type": "json"}
+        # Fixed: Use the file_type argument instead of hardcoding "json"
+        payload = {"tasks_id": task_id, "type": file_type}
-        async with self._session.post(
+        async with session.post(
             self.SCRAPER_DOWNLOAD_URL, data=payload, headers=headers
         ) as response:
             data = await response.json()
-            if data.get("code") == 200:
+            if data.get("code") == 200 and data.get("data"):
                 return data["data"]["download"]
             raise Exception(f"Result Error: {data}")

{thordata_sdk → thordata}/client.py RENAMED Viewed

@@ -7,7 +7,7 @@ from typing import Dict, Any, Union, Optional
 from .enums import Engine
 from .parameters import normalize_serp_params
-# Configure a library-specific logger
+# Configure a library-specific logger to avoid interfering with user's logging
 logger = logging.getLogger(__name__)
@@ -15,11 +15,11 @@ class ThordataClient:
     """
     The official synchronous Python client for Thordata.
-    Handles authentication for:
-    1. Proxy Network (HTTP/HTTPS)
-    2. SERP API (Real-time Search)
-    3. Universal Scraping API (Single Page)
-    4. Web Scraper API (Async Task Management)
+    This client handles authentication and communication with:
+    1. Proxy Network (Residential/Datacenter via HTTP/HTTPS)
+    2. SERP API (Real-time Search Engine Results)
+    3. Universal Scraping API (Single Page Rendering & Extraction)
+    4. Web Scraper API (Async Task Management for large scale jobs)
     """
     def __init__(
@@ -34,11 +34,11 @@ class ThordataClient:
         Initialize the Thordata Client.
         Args:
-            scraper_token (str): Token from Dashboard bottom.
-            public_token (str): Token from Public API section.
-            public_key (str): Key from Public API section.
-            proxy_host (str): Proxy gateway host.
-            proxy_port (int): Proxy gateway port.
+            scraper_token (str): The secret token found at the bottom of the Dashboard.
+            public_token (str): The token from the Public API section.
+            public_key (str): The key from the Public API section.
+            proxy_host (str): The proxy gateway host (default: gate.thordata.com).
+            proxy_port (int): The proxy gateway port (default: 22225).
         """
         self.scraper_token = scraper_token
         self.public_token = public_token
@@ -49,7 +49,7 @@ class ThordataClient:
             f"http://{self.scraper_token}:@{proxy_host}:{proxy_port}"
         )
-        # API Endpoints
+        # API Endpoints Definition
         self.base_url = "https://scraperapi.thordata.com"
         self.universal_url = "https://universalapi.thordata.com"
         self.api_url = "https://api.thordata.com/api/web-scraper-api"
@@ -60,6 +60,7 @@ class ThordataClient:
         self.SCRAPER_STATUS_URL = f"{self.api_url}/tasks-status"
         self.SCRAPER_DOWNLOAD_URL = f"{self.api_url}/tasks-download"
+        # Initialize Session with Proxy settings
         self.session = requests.Session()
         self.session.proxies = {
             "http": self.proxy_url,
@@ -68,7 +69,14 @@ class ThordataClient:
     def get(self, url: str, **kwargs) -> requests.Response:
         """
-        Send a GET request through the Thordata Proxy Network.
+        Send a standard GET request through the Thordata Residential Proxy Network.
+        Args:
+            url (str): The target URL.
+            **kwargs: Arguments to pass to requests.get().
+        Returns:
+            requests.Response: The response object.
         """
         logger.debug(f"Proxy Request: {url}")
         kwargs.setdefault("timeout", 30)
@@ -77,23 +85,26 @@ class ThordataClient:
     def serp_search(
         self,
         query: str,
-        engine: Union[Engine, str] = Engine.GOOGLE, # 既可以是枚举，也可以是字符串
+        engine: Union[Engine, str] = Engine.GOOGLE,
         num: int = 10,
-        **kwargs # 这里接收所有额外参数 (比如 type="maps")
+        **kwargs
     ) -> Dict[str, Any]:
         """
-        Execute a real-time SERP search.
+        Execute a real-time SERP (Search Engine Results Page) search.
         Args:
-            query: Keywords
-            engine: 'google', 'bing', 'yandex' etc.
-            num: Number of results (default 10)
-            **kwargs: Extra parameters (e.g., type="shopping", location="London")
+            query (str): The search keywords.
+            engine (Union[Engine, str]): The search engine (e.g., 'google', 'bing').
+            num (int): Number of results to retrieve (default 10).
+            **kwargs: Additional parameters (e.g., type="shopping", location="London").
+        Returns:
+            Dict[str, Any]: The parsed JSON result from the search engine.
         """
-        # 兼容处理：如果用户传的是枚举对象，取它的值；如果是字符串，转小写
+        # Handle Enum or String input for engine
         engine_str = engine.value if isinstance(engine, Engine) else engine.lower()
-        # 调用 parameters.py 里的逻辑
+        # Normalize parameters via internal helper
         payload = normalize_serp_params(engine_str, query, num=num, **kwargs)
         headers = {
@@ -112,25 +123,38 @@ class ThordataClient:
             response.raise_for_status()
             data = response.json()
+            # Handle cases where the API returns a stringified JSON
             if isinstance(data, str):
-                try: data = json.loads(data)
-                except: pass
+                try:
+                    data = json.loads(data)
+                except json.JSONDecodeError:
+                    pass
             return data
         except Exception as e:
             logger.error(f"SERP Request Failed: {e}")
             raise
     def universal_scrape(
         self,
         url: str,
         js_render: bool = False,
         output_format: str = "HTML",
-        country: str = None,
+        country: Optional[str] = None,
         block_resources: bool = False
     ) -> Union[str, bytes]:
         """
         Unlock target pages via the Universal Scraping API.
+        Bypasses Cloudflare, CAPTCHAs, and antibot systems automatically.
+        Args:
+            url (str): Target URL.
+            js_render (bool): Whether to render JavaScript (Headless Browser).
+            output_format (str): "HTML" or "PNG" (screenshot).
+            country (Optional[str]): Geo-targeting country code (e.g., 'us').
+            block_resources (bool): Block images/css to speed up loading.
+        Returns:
+            Union[str, bytes]: HTML string or PNG bytes.
         """
         headers = {
             "Authorization": f"Bearer {self.scraper_token}",
@@ -146,7 +170,7 @@ class ThordataClient:
         if country:
             payload["country"] = country
-        logger.info(f"Universal Scrape: {url}")
+        logger.info(f"Universal Scrape: {url} (Format: {output_format})")
         try:
             response = self.session.post(
@@ -157,35 +181,35 @@ class ThordataClient:
             )
             response.raise_for_status()
-            # Parse JSON wrapper
+            # Attempt to parse JSON wrapper
             try:
                 resp_json = response.json()
             except json.JSONDecodeError:
-                # Fallback for raw response
+                # Fallback: if the API returns raw content directly
                 if output_format.upper() == "PNG":
                     return response.content
                 return response.text
-            # Check API errors
+            # Check for API-level errors inside the JSON
             if isinstance(resp_json, dict) and resp_json.get("code") \
                     and resp_json.get("code") != 200:
                 raise Exception(f"Universal API Error: {resp_json}")
-            # Extract HTML
+            # Case 1: Return HTML
             if "html" in resp_json:
                 return resp_json["html"]
-            # Extract PNG
+            # Case 2: Return PNG Image
             if "png" in resp_json:
                 png_str = resp_json["png"]
                 if not png_str:
                     raise Exception("API returned empty PNG data")
-                # 🛠️ FIX: 移除 Data URI Scheme 前缀 (data:image/png;base64,)
+                # Clean Data URI Scheme if present (e.g., data:image/png;base64,...)
                 if "," in png_str:
                     png_str = png_str.split(",", 1)[1]
-                # Base64 解码 (处理 padding)
+                # Fix Base64 Padding
                 png_str = png_str.replace("\n", "").replace("\r", "")
                 missing_padding = len(png_str) % 4
                 if missing_padding:
@@ -193,6 +217,7 @@ class ThordataClient:
                 return base64.b64decode(png_str)
+            # Fallback
             return str(resp_json)
         except Exception as e:
@@ -202,22 +227,33 @@ class ThordataClient:
     def create_scraper_task(
         self,
         file_name: str,
-        spider_id: str,     # 必须传，用户从仪表板获取
-        spider_name: str,   # 必须传，例如 "youtube.com"
-        individual_params: Dict[str, Any], # 用户把具体的参数打包在这个字典里传进来
-        universal_params: Dict[str, Any] = None
+        spider_id: str,
+        spider_name: str,
+        individual_params: Dict[str, Any],
+        universal_params: Optional[Dict[str, Any]] = None
     ) -> str:
         """
-        Create a generic Web Scraper Task.
+        Create a generic Web Scraper Task (Async).
-        Note: Check the Thordata Dashboard to get the correct 'spider_id' and 'spider_name'.
+        IMPORTANT: You must retrieve the correct 'spider_id' and 'spider_name'
+        from the Thordata Dashboard before calling this method.
+        Args:
+            file_name (str): Name for the output file.
+            spider_id (str): The ID of the spider (from Dashboard).
+            spider_name (str): The name of the spider (e.g., "youtube.com").
+            individual_params (Dict): Parameters specific to the spider.
+            universal_params (Optional[Dict]): Global settings for the scraper.
+        Returns:
+            str: The created task_id.
         """
         headers = {
             "Authorization": f"Bearer {self.scraper_token}",
             "Content-Type": "application/x-www-form-urlencoded"
         }
-        # 直接打包发送，不替用户做太多复杂的校验，保证兼容性
+        # Payload construction
         payload = {
             "spider_name": spider_name,
             "spider_id": spider_id,
@@ -247,7 +283,13 @@ class ThordataClient:
     def get_task_status(self, task_id: str) -> str:
         """
-        Check the status of a task.
+        Check the status of an asynchronous scraping task.
+        Args:
+            task_id (str): The ID returned by create_scraper_task.
+        Returns:
+            str: The status string (e.g., "finished", "running", "error").
         """
         headers = {
             "token": self.public_token,
@@ -277,6 +319,13 @@ class ThordataClient:
     def get_task_result(self, task_id: str, file_type: str = "json") -> str:
         """
         Retrieve the download URL for a completed task.
+        Args:
+            task_id (str): The task ID.
+            file_type (str): Format required (default "json").
+        Returns:
+            str: The URL to download the result file.
         """
         headers = {
             "token": self.public_token,
@@ -285,7 +334,7 @@ class ThordataClient:
         }
         payload = {"tasks_id": task_id, "type": file_type}
-        logger.info(f"Getting result URL: {task_id}")
+        logger.info(f"Getting result URL for Task: {task_id}")
         try:
             response = self.session.post(
                 self.SCRAPER_DOWNLOAD_URL,

thordata/enums.py ADDED Viewed

@@ -0,0 +1,25 @@
+# src/thordata/enums.py
+from enum import Enum
+class Engine(str, Enum):
+    """
+    Supported Search Engines for SERP API.
+    """
+    GOOGLE = "google"
+    BING = "bing"
+    YANDEX = "yandex"
+    DUCKDUCKGO = "duckduckgo"
+    BAIDU = "baidu"
+class GoogleSearchType(str, Enum):
+    """
+    Specific search types for Google Engine.
+    """
+    SEARCH = "search"      # Default web search
+    MAPS = "maps"          # Google Maps
+    SHOPPING = "shopping"  # Google Shopping
+    NEWS = "news"          # Google News
+    IMAGES = "images"      # Google Images
+    VIDEOS = "videos"      # Google Videos
+    # Users can pass other strings manually if needed

thordata/parameters.py ADDED Viewed

@@ -0,0 +1,52 @@
+# src/thordata/parameters.py
+from typing import Dict, Any, Optional
+def normalize_serp_params(engine: str, query: str, **kwargs) -> Dict[str, Any]:
+    """
+    Normalizes parameters across different search engines to ensure a unified API surface.
+    Args:
+        engine (str): The search engine to use (e.g., 'google', 'yandex').
+        query (str): The search query string.
+        **kwargs: Additional parameters to pass to the API.
+    Returns:
+        Dict[str, Any]: The constructed payload for the API request.
+    """
+    # 1. Base parameters
+    payload = {
+        "num": str(kwargs.get("num", 10)),  # Default to 10 results
+        "json": "1",                        # Force JSON response
+        "engine": engine,
+    }
+    # 2. Handle Query Parameter Differences (Yandex uses 'text', others use 'q')
+    if engine == "yandex":
+        payload["text"] = query
+        # Set default URL for Yandex if not provided
+        if "url" not in kwargs:
+            payload["url"] = "yandex.com"
+    else:
+        payload["q"] = query
+        # 3. Handle Default URLs for other engines
+        if "url" not in kwargs:
+            defaults = {
+                "google": "google.com",
+                "bing": "bing.com",
+                "duckduckgo": "duckduckgo.com",
+                "baidu": "baidu.com"
+            }
+            if engine in defaults:
+                payload["url"] = defaults[engine]
+    # 4. Passthrough for all other user-provided arguments
+    # This allows support for engine-specific parameters (e.g., tbm, uule, gl)
+    # without explicitly defining them all.
+    protected_keys = {"num", "engine", "q", "text"}
+    for key, value in kwargs.items():
+        if key not in protected_keys:
+            payload[key] = value
+    return payload

thordata_sdk-0.3.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,197 @@
+Metadata-Version: 2.4
+Name: thordata-sdk
+Version: 0.3.0
+Summary: The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network.
+Author-email: Thordata Developer Team <support@thordata.com>
+License: Apache-2.0
+Project-URL: Homepage, https://www.thordata.com
+Project-URL: Documentation, https://github.com/Thordata/thordata-python-sdk#readme
+Project-URL: Source, https://github.com/Thordata/thordata-python-sdk
+Project-URL: Tracker, https://github.com/Thordata/thordata-python-sdk/issues
+Keywords: web scraping,proxy,ai,llm,data-mining,serp,thordata
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Classifier: Topic :: Internet :: WWW/HTTP
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: requests>=2.25.0
+Requires-Dist: aiohttp>=3.8.0
+Dynamic: license-file
+# Thordata Python SDK
+<h4 align="center">
+  Official Python client for Thordata's Proxy Network, SERP API, Universal Scraping API, and Web Scraper API.
+  <br>
+  <i>Async-ready, built for AI agents and large-scale data collection.</i>
+</h4>
+<p align="center">
+  <a href="https://pypi.org/project/thordata-sdk/">
+    <img src="https://img.shields.io/pypi/v/thordata-sdk?color=blue" alt="PyPI version">
+  </a>
+  <a href="https://github.com/Thordata/thordata-python-sdk/blob/main/LICENSE">
+    <img src="https://img.shields.io/badge/license-Apache%202.0-green" alt="License">
+  </a>
+  <a href="https://python.org">
+    <img src="https://img.shields.io/badge/python-3.8+-blue" alt="Python Versions">
+  </a>
+</p>
+---
+## Installation
+```bash
+pip install thordata-sdk
+```
+## Quick Start
+All examples below use the unified client:
+```python
+from thordata import ThordataClient, AsyncThordataClient
+```
+You can copy `examples/.env.example` to `.env` and fill in your tokens from the Thordata Dashboard.
+### 1. Proxy Network (Simple GET)
+```python
+import os
+from dotenv import load_dotenv
+from thordata import ThordataClient
+load_dotenv()
+client = ThordataClient(
+    scraper_token=os.getenv("THORDATA_SCRAPER_TOKEN"),
+    public_token=os.getenv("THORDATA_PUBLIC_TOKEN"),
+    public_key=os.getenv("THORDATA_PUBLIC_KEY"),
+)
+resp = client.get("http://httpbin.org/ip")
+print(resp.json())
+```
+### 2. SERP API (Google, Bing, Yandex, DuckDuckGo)
+```python
+from thordata import ThordataClient, Engine
+client = ThordataClient("SCRAPER_TOKEN", "PUBLIC_TOKEN", "PUBLIC_KEY")
+results = client.serp_search(
+    query="Thordata technology",
+    engine=Engine.GOOGLE,
+    num=10,
+    # Any engine-specific parameters are passed via **kwargs
+    # e.g. type="shopping", location="United States"
+)
+print(len(results.get("organic", [])))
+```
+### 3. Universal Scraping API
+```python
+from thordata import ThordataClient
+client = ThordataClient("SCRAPER_TOKEN", "PUBLIC_TOKEN", "PUBLIC_KEY")
+html = client.universal_scrape(
+    url="https://www.google.com",
+    js_render=True,
+    output_format="HTML",
+)
+print(html[:200])
+```
+### 4. Web Scraper API (Task-based)
+```python
+import time
+from thordata import ThordataClient
+client = ThordataClient("SCRAPER_TOKEN", "PUBLIC_TOKEN", "PUBLIC_KEY")
+task_id = client.create_scraper_task(
+    file_name="demo_youtube_data",
+    spider_id="youtube_video-post_by-url",
+    spider_name="youtube.com",
+    individual_params={
+        "url": "https://www.youtube.com/@stephcurry/videos",
+        "order_by": "",
+        "num_of_posts": ""
+    },
+)
+for _ in range(10):
+    status = client.get_task_status(task_id)
+    print("Status:", status)
+    if status in ["Ready", "Success"]:
+        break
+    if status == "Failed":
+        raise RuntimeError("Task failed")
+    time.sleep(3)
+download_url = client.get_task_result(task_id)
+print("Download URL:", download_url)
+```
+### 5. Asynchronous Usage (High Concurrency)
+```python
+import asyncio
+from thordata import AsyncThordataClient
+async def main():
+    async with AsyncThordataClient(
+        scraper_token="SCRAPER_TOKEN",
+        public_token="PUBLIC_TOKEN",
+        public_key="PUBLIC_KEY",
+    ) as client:
+        resp = await client.get("http://httpbin.org/ip")
+        print(await resp.json())
+asyncio.run(main())
+```
+More examples are available in the `examples/` directory.
+---
+## Features
+| Feature | Status | Description |
+|---------|--------|-------------|
+| Proxy Network | Stable | Residential, ISP, Mobile, Datacenter via HTTP/HTTPS gateway. |
+| SERP API | Stable | Google / Bing / Yandex / DuckDuckGo, flexible parameters. |
+| Universal Scraping API | Stable | JS rendering, HTML / PNG output, antibot bypass. |
+| Web Scraper API | Stable | Task-based scraping for complex sites (YouTube, E-commerce). |
+| Async Client | Stable | aiohttp-based client for high-concurrency workloads. |
+---
+## Development & Contributing
+See `CONTRIBUTING.md` for local development and contribution guidelines.
+## License
+This project is licensed under the Apache License 2.0.
+## Support
+For technical support, please contact support@thordata.com
+or verify your tokens and quotas in the Thordata Dashboard.

thordata_sdk-0.3.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,10 @@
+thordata/__init__.py,sha256=HVb6cHBsYRFoA1Sf_y_WSZ88vGV3DsT67rCdbZSuUYE,365
+thordata/async_client.py,sha256=cpBtRIzr8oH6GuZs8gTh505tGYYV1aRFBUzbtmFOfEg,9717
+thordata/client.py,sha256=w_EXs6CLM2qFtFPNU-x_Li66LEH1j7pQb2ca2MDKqyA,12432
+thordata/enums.py,sha256=PGUCQX3jw5a9mX8_JfhuyoR1WriWjWQpAgibVP_bpdM,679
+thordata/parameters.py,sha256=1lNx_BSS8ztBKEj_MXZMaIQQ9_W3EAlS-VFiBqSWb9E,1841
+thordata_sdk-0.3.0.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
+thordata_sdk-0.3.0.dist-info/METADATA,sha256=Yj6W3vSLkkUhSXTj6AK4AaMfdlJvGOVaK6cFI2MNqV8,5697
+thordata_sdk-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+thordata_sdk-0.3.0.dist-info/top_level.txt,sha256=Z8R_07m0lXCCSb1hapL9_nxMtyO3rf_9wOvq4n9u2Hg,9
+thordata_sdk-0.3.0.dist-info/RECORD,,

{thordata_sdk-0.2.4.dist-info → thordata_sdk-0.3.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: bdist_wheel (0.45.1)
+Generator: setuptools (80.9.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

thordata_sdk-0.3.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ thordata

thordata_sdk/__init__.py DELETED Viewed

@@ -1,9 +0,0 @@
-# Expose main clients
-from .client import ThordataClient
-from .async_client import AsyncThordataClient
-from .enums import Engine, GoogleSearchType
-# Version of the thordata-sdk package
-__version__ = "0.2.4"
-__all__ = ["ThordataClient", "AsyncThordataClient"]

thordata_sdk/enums.py DELETED Viewed

@@ -1,20 +0,0 @@
-# thordata_sdk/enums.py
-from enum import Enum
-class Engine(str, Enum):
-    """SERP 核心支持的四大引擎"""
-    GOOGLE = "google"
-    BING = "bing"
-    YANDEX = "yandex"
-    DUCKDUCKGO = "duckduckgo"
-    BAIDU = "baidu"
-class GoogleSearchType(str, Enum):
-    """Google 搜索的常见子类型 (参考你的截图)"""
-    SEARCH = "search"      # 默认网页搜索
-    MAPS = "maps"          # 地图
-    SHOPPING = "shopping"  # 购物
-    NEWS = "news"          # 新闻
-    IMAGES = "images"      # 图片
-    VIDEOS = "videos"      # 视频
-    # 其他冷门的先不写，用户可以通过字符串传参

thordata_sdk/parameters.py DELETED Viewed

@@ -1,41 +0,0 @@
-# thordata_sdk/parameters.py
-from typing import Dict, Any
-def normalize_serp_params(engine: str, query: str, **kwargs) -> Dict[str, Any]:
-    """
-    统一不同搜索引擎的参数差异。
-    """
-    # 1. 基础参数
-    payload = {
-        "num": str(kwargs.get("num", 10)),
-        "json": "1",
-        "engine": engine,
-    }
-    # 2. 处理查询关键词 (Yandex 用 text，其他用 q)
-    if engine == "yandex":
-        payload["text"] = query
-        # 如果用户没传 url，给个默认的
-        if "url" not in kwargs:
-            payload["url"] = "yandex.com"
-    else:
-        payload["q"] = query
-        # 3. 处理默认 URL (如果用户没传)
-        if "url" not in kwargs:
-            defaults = {
-                "google": "google.com",
-                "bing": "bing.com",
-                "duckduckgo": "duckduckgo.com",
-                "baidu": "baidu.com"
-            }
-            if engine in defaults:
-                payload["url"] = defaults[engine]
-    # 4. 把用户传入的其他所有参数（比如 type="shopping", google_domain="google.co.uk"）都透传进去
-    # 这样你就不用去定义那几十种类型了，用户传啥就是啥
-    for k, v in kwargs.items():
-        if k not in ["num", "engine", "q", "text"]: # 避免覆盖
-            payload[k] = v
-    return payload

thordata_sdk-0.2.4.dist-info/METADATA DELETED Viewed

@@ -1,113 +0,0 @@
-Metadata-Version: 2.1
-Name: thordata-sdk
-Version: 0.2.4
-Summary: The official Python SDK for Thordata Proxy & Scraper Infrastructure.
-Home-page: https://github.com/Thordata/thordata-python-sdk
-Author: Thordata Developer Team
-Author-email: support@thordata.com
-License: Apache License 2.0
-Project-URL: Bug Tracker, https://github.com/Thordata/thordata-python-sdk/issues
-Project-URL: Documentation, https://github.com/Thordata/thordata-python-sdk#readme
-Classifier: Development Status :: 4 - Beta
-Classifier: Intended Audience :: Developers
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.8
-Classifier: Programming Language :: Python :: 3.9
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3.11
-Classifier: License :: OSI Approved :: Apache Software License
-Classifier: Operating System :: OS Independent
-Classifier: Topic :: Internet :: WWW/HTTP
-Classifier: Topic :: Software Development :: Libraries :: Python Modules
-Requires-Python: >=3.8
-Description-Content-Type: text/markdown
-License-File: LICENSE
-Requires-Dist: requests>=2.25.0
-Requires-Dist: aiohttp>=3.8.0
-# Thordata Python SDK
-<h4 align="center">
-  The Official Python Client for the Thordata Proxy Network & Web Scraper API.
-  <br>
-  <i>High-performance, async-ready, designed for AI Agents and large-scale data collection.</i>
-</h4>
-<p align="center">
-  <a href="https://pypi.org/project/thordata-sdk/"><img src="https://img.shields.io/pypi/v/thordata-sdk?color=blue" alt="PyPI version"></a>
-  <a href="https://github.com/Thordata/thordata-python-sdk/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-Apache%202.0-green" alt="License"></a>
-  <a href="https://python.org"><img src="https://img.shields.io/badge/python-3.8+-blue" alt="Python Versions"></a>
-</p>
----
-## 🛠 Installation
-Install via pip:
-```bash
-pip install thordata-sdk
-```
-## ⚡ Quick Start
-### 1. Proxy Usage (Simple GET Request)
-**Python**
-```python
-from thordata_sdk import ThordataClient
-# Initialize with your credentials from the Thordata Dashboard
-client = ThordataClient(
-    scraper_token="YOUR_SCRAPER_TOKEN", # From "Scraping Tool Token"
-    public_token="YOUR_PUBLIC_TOKEN",   # From "Public API"
-    public_key="YOUR_PUBLIC_KEY"        # From "Public API"
-)
-# Send a request through the proxy
-response = client.get("http://httpbin.org/ip")
-print(response.json())
-```
-### 2. Real-time SERP Search
-**Python**
-```python
-results = client.serp_search("Thordata technology", engine="google")
-print(f"Results found: {len(results.get('organic', []))}")
-```
-### 3. Asynchronous Usage (High Concurrency)
-**Python**
-```python
-import asyncio
-from thordata_sdk import AsyncThordataClient
-async def main():
-    async with AsyncThordataClient(scraper_token="...", public_token="...", public_key="...") as client:
-        response = await client.get("http://httpbin.org/ip")
-        print(await response.json())
-asyncio.run(main())
-```
-## ⚙️ Features Status
-| Feature | Status | Description |
-|---------|--------|-------------|
-| Proxy Network | ✅ Stable | Synchronous & Asynchronous support via aiohttp. |
-| SERP API | ✅ Stable | Real-time Google/Bing/Yandex search results. |
-| Web Scraper | ✅ Stable | Async task management for scraping complex sites (e.g., YouTube). |
-| Authentication | ✅ Secure | Dual-token system for enhanced security. |
-## 📄 License
-This project is licensed under the Apache License 2.0.
-## 📞 Support
-For technical assistance, please contact support@thordata.com or verify your tokens in the Thordata Dashboard.

thordata_sdk-0.2.4.dist-info/RECORD DELETED Viewed

@@ -1,10 +0,0 @@
-thordata_sdk/__init__.py,sha256=TpVRMWiWSkxq6MUoX1LCkfmuZTH9FWC65JbaALeVoVY,268
-thordata_sdk/async_client.py,sha256=YIIKddghCzGAvrx2Bqy8XkGcgFLbCPgzkQw-jcq2WH8,8612
-thordata_sdk/client.py,sha256=UyRLjRFKep2SLOWExjAJ5EB0ED0BUiBlfWGwts3sykw,10372
-thordata_sdk/enums.py,sha256=gKpaqV-_OO7w1LCg9PTuSUiJJq_q4ad5k6f88UlTPQw,639
-thordata_sdk/parameters.py,sha256=3ck0XP0lZaUYs4eEZoLLo6zDTClRRrLO9TlggesMmwI,1384
-thordata_sdk-0.2.4.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
-thordata_sdk-0.2.4.dist-info/METADATA,sha256=mluyngNHvMXlRfAgA4F7JHC6Sc1f0z4cuut3CI42yow,3734
-thordata_sdk-0.2.4.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-thordata_sdk-0.2.4.dist-info/top_level.txt,sha256=0b2NvIf8zEcLXLF0alJAeurAEeB-2e9qh72bLukM6zI,13
-thordata_sdk-0.2.4.dist-info/RECORD,,

thordata_sdk-0.2.4.dist-info/top_level.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- thordata_sdk

{thordata_sdk-0.2.4.dist-info → thordata_sdk-0.3.0.dist-info/licenses}/LICENSE RENAMED Viewed

File without changes

thordata-sdk 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl

thordata-sdk 0.2.4py3-none-any.whl → 0.3.0py3-none-any.whl