PyPI - thordata-sdk - Versions diffs - 1.0.1__tar.gz → 1.2.0__tar.gz - Mend

thordata-sdk 1.0.1tar.gz → 1.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

thordata_sdk-1.2.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,208 @@
+Metadata-Version: 2.4
+Name: thordata-sdk
+Version: 1.2.0
+Summary: The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network.
+Author-email: Thordata Developer Team <support@thordata.com>
+License: MIT
+Project-URL: Homepage, https://www.thordata.com
+Project-URL: Documentation, https://github.com/Thordata/thordata-python-sdk#readme
+Project-URL: Source, https://github.com/Thordata/thordata-python-sdk
+Project-URL: Tracker, https://github.com/Thordata/thordata-python-sdk/issues
+Project-URL: Changelog, https://github.com/Thordata/thordata-python-sdk/blob/main/CHANGELOG.md
+Keywords: web scraping,proxy,residential proxy,datacenter proxy,ai,llm,data-mining,serp,thordata,web scraper,anti-bot bypass
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Classifier: Topic :: Internet :: WWW/HTTP
+Classifier: Topic :: Internet :: Proxy Servers
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Typing :: Typed
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: requests>=2.25.0
+Requires-Dist: aiohttp>=3.9.0
+Requires-Dist: PySocks>=1.7.1
+Provides-Extra: dev
+Requires-Dist: pytest>=7.0.0; extra == "dev"
+Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
+Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
+Requires-Dist: pytest-httpserver>=1.0.0; extra == "dev"
+Requires-Dist: python-dotenv>=1.0.0; extra == "dev"
+Requires-Dist: black>=23.0.0; extra == "dev"
+Requires-Dist: ruff>=0.1.0; extra == "dev"
+Requires-Dist: mypy>=1.0.0; extra == "dev"
+Requires-Dist: types-requests>=2.28.0; extra == "dev"
+Requires-Dist: aioresponses>=0.7.6; extra == "dev"
+Dynamic: license-file
+# Thordata Python SDK
+<div align="center">
+<img src="https://img.shields.io/badge/Thordata-AI%20Infrastructure-blue?style=for-the-badge" alt="Thordata Logo">
+**The Official Python Client for Thordata APIs**
+*Proxy Network • SERP API • Web Unlocker • Web Scraper API*
+[![PyPI version](https://img.shields.io/pypi/v/thordata-sdk.svg?style=flat-square)](https://pypi.org/project/thordata-sdk/)
+[![Python Versions](https://img.shields.io/pypi/pyversions/thordata-sdk.svg?style=flat-square)](https://pypi.org/project/thordata-sdk/)
+[![License](https://img.shields.io/badge/license-MIT-green?style=flat-square)](LICENSE)
+[![CI Status](https://img.shields.io/github/actions/workflow/status/Thordata/thordata-python-sdk/ci.yml?branch=main&style=flat-square)](https://github.com/Thordata/thordata-python-sdk/actions)
+</div>
+---
+## 📖 Introduction
+This SDK provides a robust, high-performance interface to Thordata's AI data infrastructure. It is designed for high-concurrency scraping, reliable proxy tunneling, and seamless data extraction.
+**Key Features:**
+*   **🚀 Production Ready:** Built on `urllib3` connection pooling for low-latency proxy requests.
+*   **⚡ Async Support:** Native `aiohttp` client for high-concurrency SERP/Universal scraping.
+*   **🛡️ Robust:** Handles TLS-in-TLS tunneling, retries, and error parsing automatically.
+*   **✨ Developer Experience:** Fully typed (`mypy` compatible) with intuitive IDE autocomplete.
+*   **🧩 Lazy Validation:** Only validate credentials for the features you actually use.
+---
+## 📦 Installation
+```bash
+pip install thordata-sdk
+```
+---
+## 🔐 Configuration
+Set environment variables to avoid hardcoding credentials. You only need to set the variables for the features you use.
+```bash
+# [Required for SERP & Web Unlocker]
+export THORDATA_SCRAPER_TOKEN="your_token_here"
+# [Required for Proxy Network]
+export THORDATA_RESIDENTIAL_USERNAME="your_username"
+export THORDATA_RESIDENTIAL_PASSWORD="your_password"
+export THORDATA_PROXY_HOST="vpnXXXX.pr.thordata.net"
+# [Required for Task Management]
+export THORDATA_PUBLIC_TOKEN="public_token"
+export THORDATA_PUBLIC_KEY="public_key"
+```
+---
+## 🚀 Quick Start
+### 1. SERP Search (Google/Bing/Yandex)
+```python
+from thordata import ThordataClient, Engine
+client = ThordataClient()  # Loads THORDATA_SCRAPER_TOKEN from env
+# Simple Search
+print("Searching...")
+results = client.serp_search("latest AI trends", engine=Engine.GOOGLE_NEWS)
+for news in results.get("news_results", [])[:3]:
+    print(f"- {news['title']} ({news['source']})")
+```
+### 2. Universal Scrape (Web Unlocker)
+Bypass Cloudflare/Akamai and render JavaScript automatically.
+```python
+html = client.universal_scrape(
+    url="https://example.com/protected-page",
+    js_render=True,
+    wait_for=".content-loaded",
+    country="us"
+)
+print(f"Scraped {len(html)} bytes")
+```
+### 3. High-Performance Proxy
+Use Thordata's residential IPs with automatic connection pooling.
+```python
+from thordata import ProxyConfig, ProxyProduct
+# Config is optional if env vars are set, but allows granular control
+proxy = ProxyConfig(
+    product=ProxyProduct.RESIDENTIAL,
+    country="jp",
+    city="tokyo",
+    session_id="session-001",
+    session_duration=10  # Sticky IP for 10 mins
+)
+# Use the client to make requests (Reuses TCP connections)
+response = client.get("https://httpbin.org/ip", proxy_config=proxy)
+print(response.json())
+```
+---
+## ⚙️ Advanced Usage
+### Async Client (High Concurrency)
+For building AI agents or high-throughput spiders.
+```python
+import asyncio
+from thordata import AsyncThordataClient
+async def main():
+    async with AsyncThordataClient() as client:
+        # Fire off multiple requests in parallel
+        tasks = [
+            client.serp_search(f"query {i}")
+            for i in range(5)
+        ]
+        results = await asyncio.gather(*tasks)
+        print(f"Completed {len(results)} searches")
+asyncio.run(main())
+```
+### Web Scraper API (Task Management)
+Create and manage large-scale scraping tasks asynchronously.
+```python
+# 1. Create a task
+task_id = client.create_scraper_task(
+    file_name="daily_scrape",
+    spider_id="universal",
+    spider_name="universal",
+    parameters={"url": "https://example.com"}
+)
+# 2. Wait for completion (Polling)
+status = client.wait_for_task(task_id)
+# 3. Get results
+if status == "ready":
+    url = client.get_task_result(task_id)
+    print(f"Download Data: {url}")
+```
+---
+## 📄 License
+MIT License. See [LICENSE](LICENSE) for details.

thordata_sdk-1.2.0/README.md ADDED Viewed

@@ -0,0 +1,164 @@
+# Thordata Python SDK
+<div align="center">
+<img src="https://img.shields.io/badge/Thordata-AI%20Infrastructure-blue?style=for-the-badge" alt="Thordata Logo">
+**The Official Python Client for Thordata APIs**
+*Proxy Network • SERP API • Web Unlocker • Web Scraper API*
+[![PyPI version](https://img.shields.io/pypi/v/thordata-sdk.svg?style=flat-square)](https://pypi.org/project/thordata-sdk/)
+[![Python Versions](https://img.shields.io/pypi/pyversions/thordata-sdk.svg?style=flat-square)](https://pypi.org/project/thordata-sdk/)
+[![License](https://img.shields.io/badge/license-MIT-green?style=flat-square)](LICENSE)
+[![CI Status](https://img.shields.io/github/actions/workflow/status/Thordata/thordata-python-sdk/ci.yml?branch=main&style=flat-square)](https://github.com/Thordata/thordata-python-sdk/actions)
+</div>
+---
+## 📖 Introduction
+This SDK provides a robust, high-performance interface to Thordata's AI data infrastructure. It is designed for high-concurrency scraping, reliable proxy tunneling, and seamless data extraction.
+**Key Features:**
+*   **🚀 Production Ready:** Built on `urllib3` connection pooling for low-latency proxy requests.
+*   **⚡ Async Support:** Native `aiohttp` client for high-concurrency SERP/Universal scraping.
+*   **🛡️ Robust:** Handles TLS-in-TLS tunneling, retries, and error parsing automatically.
+*   **✨ Developer Experience:** Fully typed (`mypy` compatible) with intuitive IDE autocomplete.
+*   **🧩 Lazy Validation:** Only validate credentials for the features you actually use.
+---
+## 📦 Installation
+```bash
+pip install thordata-sdk
+```
+---
+## 🔐 Configuration
+Set environment variables to avoid hardcoding credentials. You only need to set the variables for the features you use.
+```bash
+# [Required for SERP & Web Unlocker]
+export THORDATA_SCRAPER_TOKEN="your_token_here"
+# [Required for Proxy Network]
+export THORDATA_RESIDENTIAL_USERNAME="your_username"
+export THORDATA_RESIDENTIAL_PASSWORD="your_password"
+export THORDATA_PROXY_HOST="vpnXXXX.pr.thordata.net"
+# [Required for Task Management]
+export THORDATA_PUBLIC_TOKEN="public_token"
+export THORDATA_PUBLIC_KEY="public_key"
+```
+---
+## 🚀 Quick Start
+### 1. SERP Search (Google/Bing/Yandex)
+```python
+from thordata import ThordataClient, Engine
+client = ThordataClient()  # Loads THORDATA_SCRAPER_TOKEN from env
+# Simple Search
+print("Searching...")
+results = client.serp_search("latest AI trends", engine=Engine.GOOGLE_NEWS)
+for news in results.get("news_results", [])[:3]:
+    print(f"- {news['title']} ({news['source']})")
+```
+### 2. Universal Scrape (Web Unlocker)
+Bypass Cloudflare/Akamai and render JavaScript automatically.
+```python
+html = client.universal_scrape(
+    url="https://example.com/protected-page",
+    js_render=True,
+    wait_for=".content-loaded",
+    country="us"
+)
+print(f"Scraped {len(html)} bytes")
+```
+### 3. High-Performance Proxy
+Use Thordata's residential IPs with automatic connection pooling.
+```python
+from thordata import ProxyConfig, ProxyProduct
+# Config is optional if env vars are set, but allows granular control
+proxy = ProxyConfig(
+    product=ProxyProduct.RESIDENTIAL,
+    country="jp",
+    city="tokyo",
+    session_id="session-001",
+    session_duration=10  # Sticky IP for 10 mins
+)
+# Use the client to make requests (Reuses TCP connections)
+response = client.get("https://httpbin.org/ip", proxy_config=proxy)
+print(response.json())
+```
+---
+## ⚙️ Advanced Usage
+### Async Client (High Concurrency)
+For building AI agents or high-throughput spiders.
+```python
+import asyncio
+from thordata import AsyncThordataClient
+async def main():
+    async with AsyncThordataClient() as client:
+        # Fire off multiple requests in parallel
+        tasks = [
+            client.serp_search(f"query {i}")
+            for i in range(5)
+        ]
+        results = await asyncio.gather(*tasks)
+        print(f"Completed {len(results)} searches")
+asyncio.run(main())
+```
+### Web Scraper API (Task Management)
+Create and manage large-scale scraping tasks asynchronously.
+```python
+# 1. Create a task
+task_id = client.create_scraper_task(
+    file_name="daily_scrape",
+    spider_id="universal",
+    spider_name="universal",
+    parameters={"url": "https://example.com"}
+)
+# 2. Wait for completion (Polling)
+status = client.wait_for_task(task_id)
+# 3. Get results
+if status == "ready":
+    url = client.get_task_result(task_id)
+    print(f"Download Data: {url}")
+```
+---
+## 📄 License
+MIT License. See [LICENSE](LICENSE) for details.

{thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/pyproject.toml RENAMED Viewed

@@ -1,10 +1,11 @@
+# thordata-python-sdk/pyproject.toml
 [build-system]
 requires = ["setuptools>=61.0", "wheel"]
 build-backend = "setuptools.build_meta"
 [project]
 name = "thordata-sdk"
-version = "1.0.1"
+version = "1.2.0"
 description = "The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network."
 readme = "README.md"
 requires-python = ">=3.9"
@@ -44,6 +45,7 @@ classifiers = [
 dependencies = [
     "requests>=2.25.0",
     "aiohttp>=3.9.0",
+    "PySocks>=1.7.1",
 ]
 [project.optional-dependencies]
@@ -82,6 +84,7 @@ include = '\.pyi?$'
 [tool.ruff]
 line-length = 88
 target-version = "py39"
+extend-exclude = ["sdk-spec"]
 [tool.ruff.lint]
 select = [
@@ -90,11 +93,12 @@ select = [
     "F",   # pyflakes
     "I",   # isort (import sorting)
     "B",   # flake8-bugbear
+    "UP",  # pyupgrade
+    "SIM", # flake8-simplify
 ]
 ignore = [
     "E501",  # line too long (handled by black)
     "E731",  # do not assign a lambda expression
-    "F401",  # imported but unused (we have some intentional re-exports)
 ]
 [tool.ruff.lint.isort]
@@ -121,6 +125,7 @@ ignore_missing_imports = true
 testpaths = ["tests"]
 asyncio_mode = "auto"
 addopts = "-v --cov=thordata --cov-report=term-missing"
+markers = ["integration: live tests that require real credentials"]
 # Coverage setup
 [tool.coverage.run]
@@ -133,4 +138,4 @@ exclude_lines = [
     "def __repr__",
     "raise NotImplementedError",
     "if TYPE_CHECKING:",
-]
+]

{thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/src/thordata/__init__.py RENAMED Viewed

@@ -35,7 +35,7 @@ Async Usage:
     >>> asyncio.run(main())
 """
-__version__ = "1.0.1"
+__version__ = "1.2.0"
 __author__ = "Thordata Developer Team"
 __email__ = "support@thordata.com"

{thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/src/thordata/_example_utils.py RENAMED Viewed

@@ -2,8 +2,9 @@ from __future__ import annotations
 import json
 import os
+from collections.abc import Iterable
 from pathlib import Path
-from typing import Any, Iterable, Optional
+from typing import Any
 try:
     from dotenv import load_dotenv
@@ -23,7 +24,7 @@ def env(name: str) -> str:
     return (os.getenv(name) or "").strip()
-def skip_if_missing(required: Iterable[str], *, tip: Optional[str] = None) -> bool:
+def skip_if_missing(required: Iterable[str], *, tip: str | None = None) -> bool:
     missing = [k for k in required if not env(k)]
     if not missing:
         return False

{thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/src/thordata/_utils.py RENAMED Viewed

@@ -10,7 +10,7 @@ import base64
 import json
 import logging
 import platform
-from typing import Any, Dict
+from typing import Any
 logger = logging.getLogger(__name__)
@@ -71,7 +71,7 @@ def decode_base64_image(png_str: str) -> bytes:
         raise ValueError(f"Failed to decode base64 image: {e}") from e
-def build_auth_headers(token: str, mode: str = "bearer") -> Dict[str, str]:
+def build_auth_headers(token: str, mode: str = "bearer") -> dict[str, str]:
     """
     Build authorization headers for API requests.
@@ -105,7 +105,7 @@ def build_builder_headers(
     scraper_token: str,
     public_token: str,
     public_key: str,
-) -> Dict[str, str]:
+) -> dict[str, str]:
     """
     Build headers for Web Scraper builder API.
@@ -130,7 +130,7 @@ def build_builder_headers(
     }
-def build_public_api_headers(public_token: str, public_key: str) -> Dict[str, str]:
+def build_public_api_headers(public_token: str, public_key: str) -> dict[str, str]:
     """
     Build headers for public API requests (task status, locations, etc.)

thordata-sdk 1.0.1__tar.gz → 1.2.0__tar.gz

thordata-sdk 1.0.1tar.gz → 1.2.0tar.gz