PyPI - thordata-sdk - Versions diffs - 0.2.3__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

thordata-sdk 0.2.3py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

thordata/__init__.py +16 -0
{thordata_sdk → thordata}/async_client.py +88 -41
{thordata_sdk → thordata}/client.py +106 -51
thordata/enums.py +25 -0
thordata/parameters.py +52 -0
thordata_sdk-0.3.0.dist-info/METADATA +197 -0
thordata_sdk-0.3.0.dist-info/RECORD +10 -0
thordata_sdk-0.3.0.dist-info/top_level.txt +1 -0
thordata_sdk/__init__.py +0 -8
thordata_sdk-0.2.3.dist-info/METADATA +0 -125
thordata_sdk-0.2.3.dist-info/RECORD +0 -8
thordata_sdk-0.2.3.dist-info/top_level.txt +0 -1
{thordata_sdk-0.2.3.dist-info → thordata_sdk-0.3.0.dist-info}/WHEEL +0 -0
{thordata_sdk-0.2.3.dist-info → thordata_sdk-0.3.0.dist-info}/licenses/LICENSE +0 -0

thordata/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+# src/thordata/__init__.py
+from .client import ThordataClient
+from .async_client import AsyncThordataClient
+from .enums import Engine, GoogleSearchType
+# Package version
+__version__ = "0.3.0"
+# Explicitly export classes to simplify user imports
+__all__ = [
+    "ThordataClient",
+    "AsyncThordataClient",
+    "Engine",
+    "GoogleSearchType"
+]

{thordata_sdk → thordata}/async_client.py RENAMED Viewed

@@ -4,12 +4,17 @@ import json
 import base64
 from typing import Optional, Dict, Any, Union
+# Import shared logic
+from .enums import Engine
+from .parameters import normalize_serp_params
 logger = logging.getLogger(__name__)
 class AsyncThordataClient:
     """
-    Thordata Asynchronous Client (built on aiohttp).
+    The official Asynchronous Python client for Thordata (built on aiohttp).
+    Designed for high-concurrency AI agents and data pipelines.
     """
     def __init__(
@@ -20,13 +25,18 @@ class AsyncThordataClient:
         proxy_host: str = "gate.thordata.com",
         proxy_port: int = 22225
     ):
+        """
+        Initialize the Async Client.
+        """
         self.scraper_token = scraper_token
         self.public_token = public_token
         self.public_key = public_key
+        # Pre-calculate proxy auth for performance
         self.proxy_auth = aiohttp.BasicAuth(login=scraper_token, password='')
         self.proxy_url = f"http://{proxy_host}:{proxy_port}"
+        # API Endpoints
         self.base_url = "https://scraperapi.thordata.com"
         self.universal_url = "https://universalapi.thordata.com"
         self.api_url = "https://api.thordata.com/api/web-scraper-api"
@@ -37,6 +47,7 @@ class AsyncThordataClient:
         self.SCRAPER_STATUS_URL = f"{self.api_url}/tasks-status"
         self.SCRAPER_DOWNLOAD_URL = f"{self.api_url}/tasks-download"
+        # Session is initialized lazily or via context manager
         self._session: Optional[aiohttp.ClientSession] = None
     async def __aenter__(self):
@@ -48,16 +59,27 @@ class AsyncThordataClient:
         await self.close()
     async def close(self):
+        """Close the underlying aiohttp session."""
         if self._session and not self._session.closed:
             await self._session.close()
             self._session = None
-    # --- Proxy ---
+    def _get_session(self) -> aiohttp.ClientSession:
+        """Internal helper to ensure session exists."""
+        if self._session is None or self._session.closed:
+            raise RuntimeError(
+                "Client session not initialized. Use 'async with ThordataClient(...) as client:'"
+            )
+        return self._session
     async def get(self, url: str, **kwargs) -> aiohttp.ClientResponse:
-        if self._session is None:
-            raise RuntimeError("Client session not initialized.")
+        """
+        Send an async GET request through the Proxy Network.
+        """
+        session = self._get_session()
         try:
-            return await self._session.get(
+            logger.debug(f"Async Proxy Request: {url}")
+            return await session.get(
                 url,
                 proxy=self.proxy_url,
                 proxy_auth=self.proxy_auth,
@@ -67,55 +89,57 @@ class AsyncThordataClient:
             logger.error(f"Async Request failed: {e}")
             raise
-    # --- SERP ---
     async def serp_search(
-        self, query: str, engine: str = "google", num: int = 10, **kwargs
+        self,
+        query: str,
+        engine: Union[Engine, str] = Engine.GOOGLE,
+        num: int = 10,
+        **kwargs
     ) -> Dict[str, Any]:
-        if self._session is None:
-            raise RuntimeError("Client session not initialized.")
+        """
+        Execute a real-time SERP search (Async).
+        """
+        session = self._get_session()
-        payload = {
-            "q": query, "num": str(num), "json": "1",
-            "engine": engine.lower(), **kwargs
-        }
-        if engine.lower() == 'yandex':
-            payload['text'] = payload.pop('q')
-            if 'url' not in payload:
-                payload['url'] = "yandex.com"
-        elif 'url' not in payload:
-            if engine == 'google':
-                payload['url'] = "google.com"
-            elif engine == 'bing':
-                payload['url'] = "bing.com"
+        # 1. Handle Enum conversion
+        engine_str = engine.value if isinstance(engine, Engine) else engine.lower()
+        # 2. Normalize parameters
+        payload = normalize_serp_params(engine_str, query, num=num, **kwargs)
         headers = {
             "Authorization": f"Bearer {self.scraper_token}",
             "Content-Type": "application/x-www-form-urlencoded"
         }
-        async with self._session.post(
+        # 3. Execute Request
+        logger.info(f"Async SERP Search: {engine_str} - {query}")
+        async with session.post(
             self.SERP_API_URL, data=payload, headers=headers
         ) as response:
             response.raise_for_status()
             data = await response.json()
+            # Handle double-encoded JSON strings if they occur
             if isinstance(data, str):
                 try:
                     data = json.loads(data)
-                except Exception:
+                except json.JSONDecodeError:
                     pass
             return data
-    # --- Universal ---
     async def universal_scrape(
         self,
         url: str,
         js_render: bool = False,
         output_format: str = "HTML",
-        country: str = None,
+        country: Optional[str] = None,
         block_resources: bool = False
     ) -> Union[str, bytes]:
-        if self._session is None:
-            raise RuntimeError("Client session not initialized.")
+        """
+        Async Universal Scraping (Bypass Cloudflare/CAPTCHA).
+        """
+        session = self._get_session()
         headers = {
             "Authorization": f"Bearer {self.scraper_token}",
@@ -131,18 +155,21 @@ class AsyncThordataClient:
         if country:
             payload["country"] = country
-        async with self._session.post(
+        logger.info(f"Async Universal Scrape: {url}")
+        async with session.post(
             self.UNIVERSAL_API_URL, data=payload, headers=headers
         ) as response:
             response.raise_for_status()
             try:
                 resp_json = await response.json()
-            except Exception:
+            except json.JSONDecodeError:
+                # Fallback for raw content
                 if output_format.upper() == "PNG":
                     return await response.read()
                 return await response.text()
+            # Check API error codes
             if isinstance(resp_json, dict) and resp_json.get("code") \
                     and resp_json.get("code") != 200:
                 raise Exception(f"Universal API Error: {resp_json}")
@@ -155,25 +182,32 @@ class AsyncThordataClient:
                 if not png_str:
                     raise Exception("API returned empty PNG data")
+                # Clean Data URI Scheme
+                if "," in png_str:
+                    png_str = png_str.split(",", 1)[1]
+                # Fix Base64 Padding
                 png_str = png_str.replace("\n", "").replace("\r", "")
                 missing_padding = len(png_str) % 4
                 if missing_padding:
                     png_str += '=' * (4 - missing_padding)
                 return base64.b64decode(png_str)
             return str(resp_json)
-    # --- Web Scraper ---
     async def create_scraper_task(
         self,
         file_name: str,
         spider_id: str,
+        spider_name: str,
         individual_params: Dict[str, Any],
-        spider_name: str = "youtube.com",
-        universal_params: Dict[str, Any] = None
+        universal_params: Optional[Dict[str, Any]] = None
     ) -> str:
-        if self._session is None:
-            raise RuntimeError("Client session not initialized.")
+        """
+        Create an Asynchronous Web Scraper Task.
+        """
+        session = self._get_session()
         headers = {
             "Authorization": f"Bearer {self.scraper_token}",
@@ -190,16 +224,23 @@ class AsyncThordataClient:
         if universal_params:
             payload["spider_universal"] = json.dumps(universal_params)
-        async with self._session.post(
+        logger.info(f"Async Task Creation: {spider_name}")
+        async with session.post(
             self.SCRAPER_BUILDER_URL, data=payload, headers=headers
         ) as response:
             response.raise_for_status()
             data = await response.json()
             if data.get("code") != 200:
                 raise Exception(f"Creation failed: {data}")
             return data["data"]["task_id"]
     async def get_task_status(self, task_id: str) -> str:
+        """
+        Check task status.
+        """
+        session = self._get_session()
         headers = {
             "token": self.public_token,
             "key": self.public_key,
@@ -207,28 +248,34 @@ class AsyncThordataClient:
         }
         payload = {"tasks_ids": task_id}
-        async with self._session.post(
+        async with session.post(
             self.SCRAPER_STATUS_URL, data=payload, headers=headers
         ) as response:
             data = await response.json()
             if data.get("code") == 200 and data.get("data"):
                 for item in data["data"]:
-                    if str(item["task_id"]) == str(task_id):
+                    if str(item.get("task_id")) == str(task_id):
                         return item["status"]
             return "Unknown"
     async def get_task_result(self, task_id: str, file_type: str = "json") -> str:
+        """
+        Get the download URL for a finished task.
+        """
+        session = self._get_session()
         headers = {
             "token": self.public_token,
             "key": self.public_key,
             "Content-Type": "application/x-www-form-urlencoded"
         }
-        payload = {"tasks_id": task_id, "type": "json"}
+        # Fixed: Use the file_type argument instead of hardcoding "json"
+        payload = {"tasks_id": task_id, "type": file_type}
-        async with self._session.post(
+        async with session.post(
             self.SCRAPER_DOWNLOAD_URL, data=payload, headers=headers
         ) as response:
             data = await response.json()
-            if data.get("code") == 200:
+            if data.get("code") == 200 and data.get("data"):
                 return data["data"]["download"]
             raise Exception(f"Result Error: {data}")

{thordata_sdk → thordata}/client.py RENAMED Viewed

@@ -2,9 +2,12 @@ import requests
 import logging
 import json
 import base64
-from typing import Dict, Any, Union
+from typing import Dict, Any, Union, Optional
-# Configure a library-specific logger
+from .enums import Engine
+from .parameters import normalize_serp_params
+# Configure a library-specific logger to avoid interfering with user's logging
 logger = logging.getLogger(__name__)
@@ -12,11 +15,11 @@ class ThordataClient:
     """
     The official synchronous Python client for Thordata.
-    Handles authentication for:
-    1. Proxy Network (HTTP/HTTPS)
-    2. SERP API (Real-time Search)
-    3. Universal Scraping API (Single Page)
-    4. Web Scraper API (Async Task Management)
+    This client handles authentication and communication with:
+    1. Proxy Network (Residential/Datacenter via HTTP/HTTPS)
+    2. SERP API (Real-time Search Engine Results)
+    3. Universal Scraping API (Single Page Rendering & Extraction)
+    4. Web Scraper API (Async Task Management for large scale jobs)
     """
     def __init__(
@@ -31,11 +34,11 @@ class ThordataClient:
         Initialize the Thordata Client.
         Args:
-            scraper_token (str): Token from Dashboard bottom.
-            public_token (str): Token from Public API section.
-            public_key (str): Key from Public API section.
-            proxy_host (str): Proxy gateway host.
-            proxy_port (int): Proxy gateway port.
+            scraper_token (str): The secret token found at the bottom of the Dashboard.
+            public_token (str): The token from the Public API section.
+            public_key (str): The key from the Public API section.
+            proxy_host (str): The proxy gateway host (default: gate.thordata.com).
+            proxy_port (int): The proxy gateway port (default: 22225).
         """
         self.scraper_token = scraper_token
         self.public_token = public_token
@@ -46,7 +49,7 @@ class ThordataClient:
             f"http://{self.scraper_token}:@{proxy_host}:{proxy_port}"
         )
-        # API Endpoints
+        # API Endpoints Definition
         self.base_url = "https://scraperapi.thordata.com"
         self.universal_url = "https://universalapi.thordata.com"
         self.api_url = "https://api.thordata.com/api/web-scraper-api"
@@ -57,6 +60,7 @@ class ThordataClient:
         self.SCRAPER_STATUS_URL = f"{self.api_url}/tasks-status"
         self.SCRAPER_DOWNLOAD_URL = f"{self.api_url}/tasks-download"
+        # Initialize Session with Proxy settings
         self.session = requests.Session()
         self.session.proxies = {
             "http": self.proxy_url,
@@ -65,44 +69,50 @@ class ThordataClient:
     def get(self, url: str, **kwargs) -> requests.Response:
         """
-        Send a GET request through the Thordata Proxy Network.
+        Send a standard GET request through the Thordata Residential Proxy Network.
+        Args:
+            url (str): The target URL.
+            **kwargs: Arguments to pass to requests.get().
+        Returns:
+            requests.Response: The response object.
         """
         logger.debug(f"Proxy Request: {url}")
         kwargs.setdefault("timeout", 30)
         return self.session.get(url, **kwargs)
     def serp_search(
-        self, query: str, engine: str = "google", num: int = 10, **kwargs
+        self,
+        query: str,
+        engine: Union[Engine, str] = Engine.GOOGLE,
+        num: int = 10,
+        **kwargs
     ) -> Dict[str, Any]:
         """
-        Execute a real-time SERP search.
+        Execute a real-time SERP (Search Engine Results Page) search.
+        Args:
+            query (str): The search keywords.
+            engine (Union[Engine, str]): The search engine (e.g., 'google', 'bing').
+            num (int): Number of results to retrieve (default 10).
+            **kwargs: Additional parameters (e.g., type="shopping", location="London").
+        Returns:
+            Dict[str, Any]: The parsed JSON result from the search engine.
         """
-        payload = {
-            "q": query,
-            "num": str(num),
-            "json": "1",
-            "engine": engine.lower(),
-            **kwargs
-        }
+        # Handle Enum or String input for engine
+        engine_str = engine.value if isinstance(engine, Engine) else engine.lower()
-        if engine.lower() == 'yandex':
-            payload['text'] = payload.pop('q')
-            if 'url' not in payload:
-                payload['url'] = "yandex.com"
-        elif 'url' not in payload:
-            if engine == 'google':
-                payload['url'] = "google.com"
-            elif engine == 'bing':
-                payload['url'] = "bing.com"
-            elif engine == 'duckduckgo':
-                payload['url'] = "duckduckgo.com"
+        # Normalize parameters via internal helper
+        payload = normalize_serp_params(engine_str, query, num=num, **kwargs)
         headers = {
             "Authorization": f"Bearer {self.scraper_token}",
             "Content-Type": "application/x-www-form-urlencoded"
         }
-        logger.info(f"SERP Search: {engine} - {query}")
+        logger.info(f"SERP Search: {engine_str} - {query}")
         try:
             response = self.session.post(
                 self.SERP_API_URL,
@@ -111,12 +121,13 @@ class ThordataClient:
                 timeout=60
             )
             response.raise_for_status()
             data = response.json()
+            # Handle cases where the API returns a stringified JSON
             if isinstance(data, str):
-                try:
+                try:
                     data = json.loads(data)
-                except json.JSONDecodeError:
+                except json.JSONDecodeError:
                     pass
             return data
         except Exception as e:
@@ -128,11 +139,22 @@ class ThordataClient:
         url: str,
         js_render: bool = False,
         output_format: str = "HTML",
-        country: str = None,
+        country: Optional[str] = None,
         block_resources: bool = False
     ) -> Union[str, bytes]:
         """
         Unlock target pages via the Universal Scraping API.
+        Bypasses Cloudflare, CAPTCHAs, and antibot systems automatically.
+        Args:
+            url (str): Target URL.
+            js_render (bool): Whether to render JavaScript (Headless Browser).
+            output_format (str): "HTML" or "PNG" (screenshot).
+            country (Optional[str]): Geo-targeting country code (e.g., 'us').
+            block_resources (bool): Block images/css to speed up loading.
+        Returns:
+            Union[str, bytes]: HTML string or PNG bytes.
         """
         headers = {
             "Authorization": f"Bearer {self.scraper_token}",
@@ -148,7 +170,7 @@ class ThordataClient:
         if country:
             payload["country"] = country
-        logger.info(f"Universal Scrape: {url}")
+        logger.info(f"Universal Scrape: {url} (Format: {output_format})")
         try:
             response = self.session.post(
@@ -159,30 +181,35 @@ class ThordataClient:
             )
             response.raise_for_status()
-            # Parse JSON wrapper
+            # Attempt to parse JSON wrapper
             try:
                 resp_json = response.json()
             except json.JSONDecodeError:
-                # Fallback for raw response
+                # Fallback: if the API returns raw content directly
                 if output_format.upper() == "PNG":
                     return response.content
                 return response.text
-            # Check API errors
+            # Check for API-level errors inside the JSON
             if isinstance(resp_json, dict) and resp_json.get("code") \
                     and resp_json.get("code") != 200:
                 raise Exception(f"Universal API Error: {resp_json}")
-            # Extract HTML
+            # Case 1: Return HTML
             if "html" in resp_json:
                 return resp_json["html"]
-            # Extract PNG (Base64 decoding with padding fix)
+            # Case 2: Return PNG Image
             if "png" in resp_json:
                 png_str = resp_json["png"]
                 if not png_str:
                     raise Exception("API returned empty PNG data")
+                # Clean Data URI Scheme if present (e.g., data:image/png;base64,...)
+                if "," in png_str:
+                    png_str = png_str.split(",", 1)[1]
+                # Fix Base64 Padding
                 png_str = png_str.replace("\n", "").replace("\r", "")
                 missing_padding = len(png_str) % 4
                 if missing_padding:
@@ -190,6 +217,7 @@ class ThordataClient:
                 return base64.b64decode(png_str)
+            # Fallback
             return str(resp_json)
         except Exception as e:
@@ -200,18 +228,32 @@ class ThordataClient:
         self,
         file_name: str,
         spider_id: str,
+        spider_name: str,
         individual_params: Dict[str, Any],
-        spider_name: str = "youtube.com",
-        universal_params: Dict[str, Any] = None
+        universal_params: Optional[Dict[str, Any]] = None
     ) -> str:
         """
-        Create an Asynchronous Web Scraper Task.
+        Create a generic Web Scraper Task (Async).
+        IMPORTANT: You must retrieve the correct 'spider_id' and 'spider_name'
+        from the Thordata Dashboard before calling this method.
+        Args:
+            file_name (str): Name for the output file.
+            spider_id (str): The ID of the spider (from Dashboard).
+            spider_name (str): The name of the spider (e.g., "youtube.com").
+            individual_params (Dict): Parameters specific to the spider.
+            universal_params (Optional[Dict]): Global settings for the scraper.
+        Returns:
+            str: The created task_id.
         """
         headers = {
             "Authorization": f"Bearer {self.scraper_token}",
             "Content-Type": "application/x-www-form-urlencoded"
         }
+        # Payload construction
         payload = {
             "spider_name": spider_name,
             "spider_id": spider_id,
@@ -222,7 +264,7 @@ class ThordataClient:
         if universal_params:
             payload["spider_universal"] = json.dumps(universal_params)
-        logger.info(f"Creating Scraper Task: {spider_id}")
+        logger.info(f"Creating Scraper Task: {spider_name} (ID: {spider_id})")
         try:
             response = self.session.post(
                 self.SCRAPER_BUILDER_URL,
@@ -241,7 +283,13 @@ class ThordataClient:
     def get_task_status(self, task_id: str) -> str:
         """
-        Check the status of a task.
+        Check the status of an asynchronous scraping task.
+        Args:
+            task_id (str): The ID returned by create_scraper_task.
+        Returns:
+            str: The status string (e.g., "finished", "running", "error").
         """
         headers = {
             "token": self.public_token,
@@ -271,6 +319,13 @@ class ThordataClient:
     def get_task_result(self, task_id: str, file_type: str = "json") -> str:
         """
         Retrieve the download URL for a completed task.
+        Args:
+            task_id (str): The task ID.
+            file_type (str): Format required (default "json").
+        Returns:
+            str: The URL to download the result file.
         """
         headers = {
             "token": self.public_token,
@@ -279,7 +334,7 @@ class ThordataClient:
         }
         payload = {"tasks_id": task_id, "type": file_type}
-        logger.info(f"Getting result URL: {task_id}")
+        logger.info(f"Getting result URL for Task: {task_id}")
         try:
             response = self.session.post(
                 self.SCRAPER_DOWNLOAD_URL,

thordata/enums.py ADDED Viewed

@@ -0,0 +1,25 @@
+# src/thordata/enums.py
+from enum import Enum
+class Engine(str, Enum):
+    """
+    Supported Search Engines for SERP API.
+    """
+    GOOGLE = "google"
+    BING = "bing"
+    YANDEX = "yandex"
+    DUCKDUCKGO = "duckduckgo"
+    BAIDU = "baidu"
+class GoogleSearchType(str, Enum):
+    """
+    Specific search types for Google Engine.
+    """
+    SEARCH = "search"      # Default web search
+    MAPS = "maps"          # Google Maps
+    SHOPPING = "shopping"  # Google Shopping
+    NEWS = "news"          # Google News
+    IMAGES = "images"      # Google Images
+    VIDEOS = "videos"      # Google Videos
+    # Users can pass other strings manually if needed

thordata/parameters.py ADDED Viewed

@@ -0,0 +1,52 @@
+# src/thordata/parameters.py
+from typing import Dict, Any, Optional
+def normalize_serp_params(engine: str, query: str, **kwargs) -> Dict[str, Any]:
+    """
+    Normalizes parameters across different search engines to ensure a unified API surface.
+    Args:
+        engine (str): The search engine to use (e.g., 'google', 'yandex').
+        query (str): The search query string.
+        **kwargs: Additional parameters to pass to the API.
+    Returns:
+        Dict[str, Any]: The constructed payload for the API request.
+    """
+    # 1. Base parameters
+    payload = {
+        "num": str(kwargs.get("num", 10)),  # Default to 10 results
+        "json": "1",                        # Force JSON response
+        "engine": engine,
+    }
+    # 2. Handle Query Parameter Differences (Yandex uses 'text', others use 'q')
+    if engine == "yandex":
+        payload["text"] = query
+        # Set default URL for Yandex if not provided
+        if "url" not in kwargs:
+            payload["url"] = "yandex.com"
+    else:
+        payload["q"] = query
+        # 3. Handle Default URLs for other engines
+        if "url" not in kwargs:
+            defaults = {
+                "google": "google.com",
+                "bing": "bing.com",
+                "duckduckgo": "duckduckgo.com",
+                "baidu": "baidu.com"
+            }
+            if engine in defaults:
+                payload["url"] = defaults[engine]
+    # 4. Passthrough for all other user-provided arguments
+    # This allows support for engine-specific parameters (e.g., tbm, uule, gl)
+    # without explicitly defining them all.
+    protected_keys = {"num", "engine", "q", "text"}
+    for key, value in kwargs.items():
+        if key not in protected_keys:
+            payload[key] = value
+    return payload

thordata_sdk-0.3.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,197 @@
+Metadata-Version: 2.4
+Name: thordata-sdk
+Version: 0.3.0
+Summary: The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network.
+Author-email: Thordata Developer Team <support@thordata.com>
+License: Apache-2.0
+Project-URL: Homepage, https://www.thordata.com
+Project-URL: Documentation, https://github.com/Thordata/thordata-python-sdk#readme
+Project-URL: Source, https://github.com/Thordata/thordata-python-sdk
+Project-URL: Tracker, https://github.com/Thordata/thordata-python-sdk/issues
+Keywords: web scraping,proxy,ai,llm,data-mining,serp,thordata
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Classifier: Topic :: Internet :: WWW/HTTP
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: requests>=2.25.0
+Requires-Dist: aiohttp>=3.8.0
+Dynamic: license-file
+# Thordata Python SDK
+<h4 align="center">
+  Official Python client for Thordata's Proxy Network, SERP API, Universal Scraping API, and Web Scraper API.
+  <br>
+  <i>Async-ready, built for AI agents and large-scale data collection.</i>
+</h4>
+<p align="center">
+  <a href="https://pypi.org/project/thordata-sdk/">
+    <img src="https://img.shields.io/pypi/v/thordata-sdk?color=blue" alt="PyPI version">
+  </a>
+  <a href="https://github.com/Thordata/thordata-python-sdk/blob/main/LICENSE">
+    <img src="https://img.shields.io/badge/license-Apache%202.0-green" alt="License">
+  </a>
+  <a href="https://python.org">
+    <img src="https://img.shields.io/badge/python-3.8+-blue" alt="Python Versions">
+  </a>
+</p>
+---
+## Installation
+```bash
+pip install thordata-sdk
+```
+## Quick Start
+All examples below use the unified client:
+```python
+from thordata import ThordataClient, AsyncThordataClient
+```
+You can copy `examples/.env.example` to `.env` and fill in your tokens from the Thordata Dashboard.
+### 1. Proxy Network (Simple GET)
+```python
+import os
+from dotenv import load_dotenv
+from thordata import ThordataClient
+load_dotenv()
+client = ThordataClient(
+    scraper_token=os.getenv("THORDATA_SCRAPER_TOKEN"),
+    public_token=os.getenv("THORDATA_PUBLIC_TOKEN"),
+    public_key=os.getenv("THORDATA_PUBLIC_KEY"),
+)
+resp = client.get("http://httpbin.org/ip")
+print(resp.json())
+```
+### 2. SERP API (Google, Bing, Yandex, DuckDuckGo)
+```python
+from thordata import ThordataClient, Engine
+client = ThordataClient("SCRAPER_TOKEN", "PUBLIC_TOKEN", "PUBLIC_KEY")
+results = client.serp_search(
+    query="Thordata technology",
+    engine=Engine.GOOGLE,
+    num=10,
+    # Any engine-specific parameters are passed via **kwargs
+    # e.g. type="shopping", location="United States"
+)
+print(len(results.get("organic", [])))
+```
+### 3. Universal Scraping API
+```python
+from thordata import ThordataClient
+client = ThordataClient("SCRAPER_TOKEN", "PUBLIC_TOKEN", "PUBLIC_KEY")
+html = client.universal_scrape(
+    url="https://www.google.com",
+    js_render=True,
+    output_format="HTML",
+)
+print(html[:200])
+```
+### 4. Web Scraper API (Task-based)
+```python
+import time
+from thordata import ThordataClient
+client = ThordataClient("SCRAPER_TOKEN", "PUBLIC_TOKEN", "PUBLIC_KEY")
+task_id = client.create_scraper_task(
+    file_name="demo_youtube_data",
+    spider_id="youtube_video-post_by-url",
+    spider_name="youtube.com",
+    individual_params={
+        "url": "https://www.youtube.com/@stephcurry/videos",
+        "order_by": "",
+        "num_of_posts": ""
+    },
+)
+for _ in range(10):
+    status = client.get_task_status(task_id)
+    print("Status:", status)
+    if status in ["Ready", "Success"]:
+        break
+    if status == "Failed":
+        raise RuntimeError("Task failed")
+    time.sleep(3)
+download_url = client.get_task_result(task_id)
+print("Download URL:", download_url)
+```
+### 5. Asynchronous Usage (High Concurrency)
+```python
+import asyncio
+from thordata import AsyncThordataClient
+async def main():
+    async with AsyncThordataClient(
+        scraper_token="SCRAPER_TOKEN",
+        public_token="PUBLIC_TOKEN",
+        public_key="PUBLIC_KEY",
+    ) as client:
+        resp = await client.get("http://httpbin.org/ip")
+        print(await resp.json())
+asyncio.run(main())
+```
+More examples are available in the `examples/` directory.
+---
+## Features
+| Feature | Status | Description |
+|---------|--------|-------------|
+| Proxy Network | Stable | Residential, ISP, Mobile, Datacenter via HTTP/HTTPS gateway. |
+| SERP API | Stable | Google / Bing / Yandex / DuckDuckGo, flexible parameters. |
+| Universal Scraping API | Stable | JS rendering, HTML / PNG output, antibot bypass. |
+| Web Scraper API | Stable | Task-based scraping for complex sites (YouTube, E-commerce). |
+| Async Client | Stable | aiohttp-based client for high-concurrency workloads. |
+---
+## Development & Contributing
+See `CONTRIBUTING.md` for local development and contribution guidelines.
+## License
+This project is licensed under the Apache License 2.0.
+## Support
+For technical support, please contact support@thordata.com
+or verify your tokens and quotas in the Thordata Dashboard.

thordata_sdk-0.3.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,10 @@
+thordata/__init__.py,sha256=HVb6cHBsYRFoA1Sf_y_WSZ88vGV3DsT67rCdbZSuUYE,365
+thordata/async_client.py,sha256=cpBtRIzr8oH6GuZs8gTh505tGYYV1aRFBUzbtmFOfEg,9717
+thordata/client.py,sha256=w_EXs6CLM2qFtFPNU-x_Li66LEH1j7pQb2ca2MDKqyA,12432
+thordata/enums.py,sha256=PGUCQX3jw5a9mX8_JfhuyoR1WriWjWQpAgibVP_bpdM,679
+thordata/parameters.py,sha256=1lNx_BSS8ztBKEj_MXZMaIQQ9_W3EAlS-VFiBqSWb9E,1841
+thordata_sdk-0.3.0.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
+thordata_sdk-0.3.0.dist-info/METADATA,sha256=Yj6W3vSLkkUhSXTj6AK4AaMfdlJvGOVaK6cFI2MNqV8,5697
+thordata_sdk-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+thordata_sdk-0.3.0.dist-info/top_level.txt,sha256=Z8R_07m0lXCCSb1hapL9_nxMtyO3rf_9wOvq4n9u2Hg,9
+thordata_sdk-0.3.0.dist-info/RECORD,,

thordata_sdk-0.3.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ thordata

thordata_sdk/__init__.py DELETED Viewed

@@ -1,8 +0,0 @@
-# Expose main clients
-from .client import ThordataClient
-from .async_client import AsyncThordataClient
-# Version of the thordata-sdk package
-__version__ = "0.2.3"
-__all__ = ["ThordataClient", "AsyncThordataClient"]

thordata_sdk-0.2.3.dist-info/METADATA DELETED Viewed

@@ -1,125 +0,0 @@
-Metadata-Version: 2.4
-Name: thordata_sdk
-Version: 0.2.3
-Summary: The official Python SDK for Thordata Proxy & Scraper Infrastructure.
-Home-page: https://github.com/Thordata/thordata-python-sdk
-Author: Thordata Developer Team
-Author-email: support@thordata.com
-License: Apache License 2.0
-Project-URL: Bug Tracker, https://github.com/Thordata/thordata-python-sdk/issues
-Project-URL: Documentation, https://github.com/Thordata/thordata-python-sdk#readme
-Classifier: Development Status :: 4 - Beta
-Classifier: Intended Audience :: Developers
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.8
-Classifier: Programming Language :: Python :: 3.9
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3.11
-Classifier: License :: OSI Approved :: Apache Software License
-Classifier: Operating System :: OS Independent
-Classifier: Topic :: Internet :: WWW/HTTP
-Classifier: Topic :: Software Development :: Libraries :: Python Modules
-Requires-Python: >=3.8
-Description-Content-Type: text/markdown
-License-File: LICENSE
-Requires-Dist: requests>=2.25.0
-Requires-Dist: aiohttp>=3.8.0
-Dynamic: author
-Dynamic: author-email
-Dynamic: classifier
-Dynamic: description
-Dynamic: description-content-type
-Dynamic: home-page
-Dynamic: license
-Dynamic: license-file
-Dynamic: project-url
-Dynamic: requires-dist
-Dynamic: requires-python
-Dynamic: summary
-# Thordata Python SDK
-<h4 align="center">
-  The Official Python Client for the Thordata Proxy Network & Web Scraper API.
-  <br>
-  <i>High-performance, async-ready, designed for AI Agents and large-scale data collection.</i>
-</h4>
-<p align="center">
-  <a href="https://pypi.org/project/thordata-sdk/"><img src="https://img.shields.io/pypi/v/thordata-sdk?color=blue" alt="PyPI version"></a>
-  <a href="https://github.com/Thordata/thordata-python-sdk/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-Apache%202.0-green" alt="License"></a>
-  <a href="https://python.org"><img src="https://img.shields.io/badge/python-3.8+-blue" alt="Python Versions"></a>
-</p>
----
-## 🛠 Installation
-Install via pip:
-```bash
-pip install thordata-sdk
-```
-## ⚡ Quick Start
-### 1. Proxy Usage (Simple GET Request)
-**Python**
-```python
-from thordata_sdk import ThordataClient
-# Initialize with your credentials from the Thordata Dashboard
-client = ThordataClient(
-    scraper_token="YOUR_SCRAPER_TOKEN", # From "Scraping Tool Token"
-    public_token="YOUR_PUBLIC_TOKEN",   # From "Public API"
-    public_key="YOUR_PUBLIC_KEY"        # From "Public API"
-)
-# Send a request through the proxy
-response = client.get("http://httpbin.org/ip")
-print(response.json())
-```
-### 2. Real-time SERP Search
-**Python**
-```python
-results = client.serp_search("Thordata technology", engine="google")
-print(f"Results found: {len(results.get('organic', []))}")
-```
-### 3. Asynchronous Usage (High Concurrency)
-**Python**
-```python
-import asyncio
-from thordata_sdk import AsyncThordataClient
-async def main():
-    async with AsyncThordataClient(scraper_token="...", public_token="...", public_key="...") as client:
-        response = await client.get("http://httpbin.org/ip")
-        print(await response.json())
-asyncio.run(main())
-```
-## ⚙️ Features Status
-| Feature | Status | Description |
-|---------|--------|-------------|
-| Proxy Network | ✅ Stable | Synchronous & Asynchronous support via aiohttp. |
-| SERP API | ✅ Stable | Real-time Google/Bing/Yandex search results. |
-| Web Scraper | ✅ Stable | Async task management for scraping complex sites (e.g., YouTube). |
-| Authentication | ✅ Secure | Dual-token system for enhanced security. |
-## 📄 License
-This project is licensed under the Apache License 2.0.
-## 📞 Support
-For technical assistance, please contact support@thordata.com or verify your tokens in the Thordata Dashboard.

thordata_sdk-0.2.3.dist-info/RECORD DELETED Viewed

@@ -1,8 +0,0 @@
-thordata_sdk/__init__.py,sha256=aZ2P8F15HJlnnuMRYA1R-ENcZRVQ7eo0r1SD4a_1UbI,223
-thordata_sdk/async_client.py,sha256=fwoDSQA2GdikkNHrbKAoLwjqmn-zafEoe2HGf-j8bp8,8202
-thordata_sdk/client.py,sha256=drlhRHCCUoYiwmaJHLsYQZrfj7rB5wsK2P2yn2DkhqQ,9732
-thordata_sdk-0.2.3.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
-thordata_sdk-0.2.3.dist-info/METADATA,sha256=X_b16_FfyQmV7VS9Wy_QRtgXp8JVYhxSatt0HpAA9QU,4003
-thordata_sdk-0.2.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-thordata_sdk-0.2.3.dist-info/top_level.txt,sha256=0b2NvIf8zEcLXLF0alJAeurAEeB-2e9qh72bLukM6zI,13
-thordata_sdk-0.2.3.dist-info/RECORD,,

thordata_sdk-0.2.3.dist-info/top_level.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- thordata_sdk

{thordata_sdk-0.2.3.dist-info → thordata_sdk-0.3.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{thordata_sdk-0.2.3.dist-info → thordata_sdk-0.3.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

thordata-sdk 0.2.3__py3-none-any.whl → 0.3.0__py3-none-any.whl

thordata-sdk 0.2.3py3-none-any.whl → 0.3.0py3-none-any.whl