thordata-sdk 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,212 @@
1
+ Metadata-Version: 2.4
2
+ Name: thordata-sdk
3
+ Version: 0.8.0
4
+ Summary: The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network.
5
+ Author-email: Thordata Developer Team <support@thordata.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://www.thordata.com
8
+ Project-URL: Documentation, https://github.com/Thordata/thordata-python-sdk#readme
9
+ Project-URL: Source, https://github.com/Thordata/thordata-python-sdk
10
+ Project-URL: Tracker, https://github.com/Thordata/thordata-python-sdk/issues
11
+ Project-URL: Changelog, https://github.com/Thordata/thordata-python-sdk/blob/main/CHANGELOG.md
12
+ Keywords: web scraping,proxy,residential proxy,datacenter proxy,ai,llm,data-mining,serp,thordata,web scraper,anti-bot bypass
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
16
+ Classifier: Topic :: Internet :: WWW/HTTP
17
+ Classifier: Topic :: Internet :: Proxy Servers
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.9
20
+ Classifier: Programming Language :: Python :: 3.10
21
+ Classifier: Programming Language :: Python :: 3.11
22
+ Classifier: Programming Language :: Python :: 3.12
23
+ Classifier: License :: OSI Approved :: MIT License
24
+ Classifier: Operating System :: OS Independent
25
+ Classifier: Typing :: Typed
26
+ Requires-Python: >=3.9
27
+ Description-Content-Type: text/markdown
28
+ License-File: LICENSE
29
+ Requires-Dist: requests>=2.25.0
30
+ Requires-Dist: aiohttp>=3.9.0
31
+ Provides-Extra: dev
32
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
33
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
34
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
35
+ Requires-Dist: pytest-httpserver>=1.0.0; extra == "dev"
36
+ Requires-Dist: python-dotenv>=1.0.0; extra == "dev"
37
+ Requires-Dist: black>=23.0.0; extra == "dev"
38
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
39
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
40
+ Requires-Dist: types-requests>=2.28.0; extra == "dev"
41
+ Requires-Dist: aioresponses>=0.7.6; extra == "dev"
42
+ Dynamic: license-file
43
+
44
+ # Thordata Python SDK
45
+
46
+ <div align="center">
47
+
48
+ **Official Python client for Thordata's Proxy Network, SERP API, Web Unlocker, and Web Scraper API.**
49
+
50
+ *Async-ready, type-safe, built for AI agents and large-scale data collection.*
51
+
52
+ [![PyPI](https://img.shields.io/pypi/v/thordata-sdk?color=blue)](https://pypi.org/project/thordata-sdk/)
53
+ [![Python](https://img.shields.io/badge/python-3.9+-blue)](https://python.org)
54
+ [![License](https://img.shields.io/badge/license-MIT-green)](LICENSE)
55
+
56
+ [Documentation](https://doc.thordata.com) • [Dashboard](https://www.thordata.com) • [Examples](examples/)
57
+
58
+ </div>
59
+
60
+ ---
61
+
62
+ ## ✨ Features
63
+
64
+ - 🌐 **Proxy Network**: Residential, Mobile, Datacenter, ISP proxies with geo-targeting
65
+ - 🔍 **SERP API**: Google, Bing, Yandex, DuckDuckGo search results
66
+ - 🔓 **Web Unlocker**: Bypass Cloudflare, CAPTCHAs, anti-bot systems
67
+ - 🕷️ **Web Scraper API**: Async task-based scraping (Text & Video/Audio)
68
+ - 📊 **Account Management**: Usage stats, sub-users, IP whitelist
69
+ - ⚡ **Async Support**: Full async/await support with aiohttp
70
+ - 🔄 **Auto Retry**: Configurable retry with exponential backoff
71
+
72
+ ---
73
+
74
+ ## 📦 Installation
75
+
76
+ ```bash
77
+ pip install thordata-sdk
78
+ ```
79
+
80
+ ---
81
+
82
+ ## 🔐 Configuration
83
+
84
+ Set environment variables:
85
+
86
+ ```bash
87
+ # Required for Scraper APIs (SERP, Universal, Tasks)
88
+ export THORDATA_SCRAPER_TOKEN=your_token
89
+
90
+ # Required for Public/Location APIs (Dashboard -> My Account)
91
+ export THORDATA_PUBLIC_TOKEN=your_public_token
92
+ export THORDATA_PUBLIC_KEY=your_public_key
93
+
94
+ # Required for Public API NEW (Dashboard -> Public API NEW)
95
+ # If not set, SDK falls back to PUBLIC_TOKEN/KEY
96
+ export THORDATA_SIGN=your_sign
97
+ export THORDATA_API_KEY=your_api_key
98
+ ```
99
+
100
+ ---
101
+
102
+ ## 🚀 Quick Start
103
+
104
+ ```python
105
+ from thordata import ThordataClient, Engine
106
+
107
+ # Initialize (reads from env vars)
108
+ client = ThordataClient(
109
+ scraper_token="your_token",
110
+ public_token="pub_token",
111
+ public_key="pub_key"
112
+ )
113
+
114
+ # SERP Search
115
+ results = client.serp_search("python tutorial", engine=Engine.GOOGLE)
116
+ print(f"Found {len(results.get('organic', []))} results")
117
+
118
+ # Universal Scrape
119
+ html = client.universal_scrape("https://httpbin.org/html")
120
+ print(html[:100])
121
+ ```
122
+
123
+ ---
124
+
125
+ ## 📖 Feature Guide
126
+
127
+ ### SERP API
128
+
129
+ ```python
130
+ from thordata import SerpRequest
131
+
132
+ # Advanced search
133
+ results = client.serp_search_advanced(SerpRequest(
134
+ query="pizza",
135
+ engine="google_local",
136
+ country="us",
137
+ location="New York",
138
+ num=10
139
+ ))
140
+ ```
141
+
142
+ ### Web Scraper API (Async Tasks)
143
+
144
+ **Create Task:**
145
+ ```python
146
+ task_id = client.create_scraper_task(
147
+ file_name="my_task",
148
+ spider_id="universal",
149
+ spider_name="universal",
150
+ parameters={"url": "https://example.com"}
151
+ )
152
+ ```
153
+
154
+ **Video Download (New):**
155
+ ```python
156
+ from thordata import CommonSettings
157
+
158
+ task_id = client.create_video_task(
159
+ file_name="{{VideoID}}",
160
+ spider_id="youtube_video_by-url",
161
+ spider_name="youtube.com",
162
+ parameters={"url": "https://youtube.com/watch?v=..."},
163
+ common_settings=CommonSettings(resolution="1080p")
164
+ )
165
+ ```
166
+
167
+ **Wait & Download:**
168
+ ```python
169
+ status = client.wait_for_task(task_id)
170
+ if status == "ready":
171
+ url = client.get_task_result(task_id)
172
+ print(url)
173
+ ```
174
+
175
+ ### Account Management
176
+
177
+ ```python
178
+ # Usage Statistics
179
+ stats = client.get_usage_statistics("2024-01-01", "2024-01-31")
180
+ print(f"Balance: {stats.balance_gb():.2f} GB")
181
+
182
+ # Proxy Users
183
+ users = client.list_proxy_users()
184
+ print(f"Sub-users: {users.user_count}")
185
+
186
+ # Whitelist IP
187
+ client.add_whitelist_ip("1.2.3.4")
188
+ ```
189
+
190
+ ### Proxy Network
191
+
192
+ ```python
193
+ from thordata import ProxyConfig
194
+
195
+ # Generate Proxy URL
196
+ proxy_url = client.build_proxy_url(
197
+ username="proxy_user",
198
+ password="proxy_pass",
199
+ country="us",
200
+ city="ny"
201
+ )
202
+
203
+ # Use with requests
204
+ import requests
205
+ requests.get("https://httpbin.org/ip", proxies={"http": proxy_url, "https": proxy_url})
206
+ ```
207
+
208
+ ---
209
+
210
+ ## 📄 License
211
+
212
+ MIT License
@@ -0,0 +1,14 @@
1
+ thordata/__init__.py,sha256=yaIxW1T_nsCeiPE6iIHunjRzPrtbiN0BciveICgL4dM,3195
2
+ thordata/_utils.py,sha256=epF-ewHyk7McdejlhHNAfxhIQ8sN3TlIjUJ9H4HOaUE,5254
3
+ thordata/async_client.py,sha256=tC9y1wmcO6RsXCysBo0a0GNRZR3QQjJlCmEwG5HVukQ,53169
4
+ thordata/client.py,sha256=VN5Jm3er7fdZDfT2G9g4siBSYNo0ZWj4WOi6TAiAZcE,59638
5
+ thordata/demo.py,sha256=zmG4I4cHXnbmQfbr063SeRK7_9IXrfof9QFoGqGTVm8,3806
6
+ thordata/enums.py,sha256=MpZnS9_8sg2vtcFqM6UicB94cKZm5R1t83L3ejNSbLs,8502
7
+ thordata/exceptions.py,sha256=IgMsFuh49cPxU5YofsKP1UhP5A_snhtuN6xD1yZWLiI,10018
8
+ thordata/models.py,sha256=NG4wn1bq4-FC4Aex8vwBOldiHovwg0JzhdtBsI1mL_8,36118
9
+ thordata/retry.py,sha256=nkh17ca2TIEcTc-uNo-xcNdJPuxZ_VGlMbC70X6p-_Q,11518
10
+ thordata_sdk-0.8.0.dist-info/licenses/LICENSE,sha256=bAxpWgQIzb-5jl3nhLdOwOJ_vlbHLtSG7yev2B7vioY,1088
11
+ thordata_sdk-0.8.0.dist-info/METADATA,sha256=IgL554I6mzya9FdbqCxKdvO3r-bywiHJjZi1xdk8W48,5850
12
+ thordata_sdk-0.8.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
+ thordata_sdk-0.8.0.dist-info/top_level.txt,sha256=Z8R_07m0lXCCSb1hapL9_nxMtyO3rf_9wOvq4n9u2Hg,9
14
+ thordata_sdk-0.8.0.dist-info/RECORD,,
thordata/parameters.py DELETED
@@ -1,53 +0,0 @@
1
- # src/thordata/parameters.py
2
-
3
- from typing import Any, Dict
4
-
5
-
6
- def normalize_serp_params(engine: str, query: str, **kwargs) -> Dict[str, Any]:
7
- """
8
- Normalizes parameters across different search engines to ensure a unified API surface.
9
-
10
- Args:
11
- engine (str): The search engine to use (e.g., 'google', 'yandex').
12
- query (str): The search query string.
13
- **kwargs: Additional parameters to pass to the API.
14
-
15
- Returns:
16
- Dict[str, Any]: The constructed payload for the API request.
17
- """
18
- # 1. Base parameters
19
- payload = {
20
- "num": str(kwargs.get("num", 10)), # Default to 10 results
21
- "json": "1", # Force JSON response
22
- "engine": engine,
23
- }
24
-
25
- # 2. Handle Query Parameter Differences (Yandex uses 'text', others use 'q')
26
- if engine == "yandex":
27
- payload["text"] = query
28
- # Set default URL for Yandex if not provided
29
- if "url" not in kwargs:
30
- payload["url"] = "yandex.com"
31
- else:
32
- payload["q"] = query
33
-
34
- # 3. Handle Default URLs for other engines
35
- if "url" not in kwargs:
36
- defaults = {
37
- "google": "google.com",
38
- "bing": "bing.com",
39
- "duckduckgo": "duckduckgo.com",
40
- "baidu": "baidu.com",
41
- }
42
- if engine in defaults:
43
- payload["url"] = defaults[engine]
44
-
45
- # 4. Passthrough for all other user-provided arguments
46
- # This allows support for engine-specific parameters (e.g., tbm, uule, gl)
47
- # without explicitly defining them all.
48
- protected_keys = {"num", "engine", "q", "text"}
49
- for key, value in kwargs.items():
50
- if key not in protected_keys:
51
- payload[key] = value
52
-
53
- return payload