thordata-sdk 1.4.0__tar.gz → 1.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thordata_sdk-1.6.0/PKG-INFO +287 -0
- thordata_sdk-1.6.0/README.md +243 -0
- {thordata_sdk-1.4.0 → thordata_sdk-1.6.0}/pyproject.toml +3 -3
- {thordata_sdk-1.4.0 → thordata_sdk-1.6.0}/src/thordata/__init__.py +4 -40
- thordata_sdk-1.6.0/src/thordata/async_client.py +1069 -0
- thordata_sdk-1.6.0/src/thordata/client.py +1601 -0
- thordata_sdk-1.6.0/src/thordata/core/__init__.py +23 -0
- thordata_sdk-1.6.0/src/thordata/core/async_http_client.py +91 -0
- thordata_sdk-1.6.0/src/thordata/core/http_client.py +79 -0
- thordata_sdk-1.6.0/src/thordata/core/tunnel.py +287 -0
- thordata_sdk-1.6.0/src/thordata/enums.py +45 -0
- {thordata_sdk-1.4.0 → thordata_sdk-1.6.0}/src/thordata/exceptions.py +70 -19
- thordata_sdk-1.6.0/src/thordata/models.py +41 -0
- {thordata_sdk-1.4.0 → thordata_sdk-1.6.0}/src/thordata/retry.py +1 -1
- thordata_sdk-1.6.0/src/thordata/tools/__init__.py +38 -0
- thordata_sdk-1.6.0/src/thordata/tools/base.py +42 -0
- thordata_sdk-1.6.0/src/thordata/tools/code.py +39 -0
- thordata_sdk-1.6.0/src/thordata/tools/ecommerce.py +251 -0
- thordata_sdk-1.6.0/src/thordata/tools/professional.py +155 -0
- thordata_sdk-1.6.0/src/thordata/tools/search.py +115 -0
- thordata_sdk-1.6.0/src/thordata/tools/social.py +374 -0
- thordata_sdk-1.6.0/src/thordata/tools/travel.py +100 -0
- thordata_sdk-1.6.0/src/thordata/tools/video.py +154 -0
- thordata_sdk-1.6.0/src/thordata/types/__init__.py +77 -0
- thordata_sdk-1.6.0/src/thordata/types/common.py +141 -0
- thordata_sdk-1.6.0/src/thordata/types/proxy.py +340 -0
- thordata_sdk-1.6.0/src/thordata/types/serp.py +224 -0
- thordata_sdk-1.6.0/src/thordata/types/task.py +156 -0
- thordata_sdk-1.6.0/src/thordata/types/universal.py +66 -0
- {thordata_sdk-1.4.0 → thordata_sdk-1.6.0}/src/thordata/unlimited.py +67 -0
- thordata_sdk-1.6.0/src/thordata_sdk.egg-info/PKG-INFO +287 -0
- {thordata_sdk-1.4.0 → thordata_sdk-1.6.0}/src/thordata_sdk.egg-info/SOURCES.txt +26 -3
- thordata_sdk-1.6.0/tests/test_async_client.py +424 -0
- {thordata_sdk-1.4.0 → thordata_sdk-1.6.0}/tests/test_async_client_errors.py +26 -9
- thordata_sdk-1.6.0/tests/test_batch_creation.py +116 -0
- thordata_sdk-1.6.0/tests/test_client.py +606 -0
- {thordata_sdk-1.4.0 → thordata_sdk-1.6.0}/tests/test_enums.py +1 -1
- {thordata_sdk-1.4.0 → thordata_sdk-1.6.0}/tests/test_examples.py +4 -1
- {thordata_sdk-1.4.0 → thordata_sdk-1.6.0}/tests/test_integration_proxy_protocols.py +2 -3
- thordata_sdk-1.6.0/tests/test_retry.py +317 -0
- {thordata_sdk-1.4.0 → thordata_sdk-1.6.0}/tests/test_spec_parity.py +36 -2
- thordata_sdk-1.6.0/tests/test_tools.py +24 -0
- thordata_sdk-1.6.0/tests/test_tools_coverage.py +102 -0
- thordata_sdk-1.6.0/tests/test_unlimited.py +184 -0
- thordata_sdk-1.6.0/tests/test_utils.py +126 -0
- thordata_sdk-1.4.0/PKG-INFO +0 -208
- thordata_sdk-1.4.0/README.md +0 -164
- thordata_sdk-1.4.0/src/thordata/_example_utils.py +0 -77
- thordata_sdk-1.4.0/src/thordata/async_client.py +0 -2362
- thordata_sdk-1.4.0/src/thordata/client.py +0 -2479
- thordata_sdk-1.4.0/src/thordata/demo.py +0 -138
- thordata_sdk-1.4.0/src/thordata/enums.py +0 -384
- thordata_sdk-1.4.0/src/thordata/models.py +0 -1197
- thordata_sdk-1.4.0/src/thordata_sdk.egg-info/PKG-INFO +0 -208
- thordata_sdk-1.4.0/tests/test_async_client.py +0 -95
- thordata_sdk-1.4.0/tests/test_client.py +0 -121
- {thordata_sdk-1.4.0 → thordata_sdk-1.6.0}/LICENSE +0 -0
- {thordata_sdk-1.4.0 → thordata_sdk-1.6.0}/setup.cfg +0 -0
- {thordata_sdk-1.4.0 → thordata_sdk-1.6.0}/src/thordata/_utils.py +0 -0
- {thordata_sdk-1.4.0 → thordata_sdk-1.6.0}/src/thordata/async_unlimited.py +0 -0
- {thordata_sdk-1.4.0 → thordata_sdk-1.6.0}/src/thordata/serp_engines.py +0 -0
- {thordata_sdk-1.4.0 → thordata_sdk-1.6.0}/src/thordata_sdk.egg-info/dependency_links.txt +0 -0
- {thordata_sdk-1.4.0 → thordata_sdk-1.6.0}/src/thordata_sdk.egg-info/requires.txt +0 -0
- {thordata_sdk-1.4.0 → thordata_sdk-1.6.0}/src/thordata_sdk.egg-info/top_level.txt +0 -0
- {thordata_sdk-1.4.0 → thordata_sdk-1.6.0}/tests/test_client_errors.py +0 -0
- {thordata_sdk-1.4.0 → thordata_sdk-1.6.0}/tests/test_exceptions.py +0 -0
- {thordata_sdk-1.4.0 → thordata_sdk-1.6.0}/tests/test_models.py +0 -0
- {thordata_sdk-1.4.0 → thordata_sdk-1.6.0}/tests/test_task_status_and_wait.py +0 -0
- {thordata_sdk-1.4.0 → thordata_sdk-1.6.0}/tests/test_user_agent.py +0 -0
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: thordata-sdk
|
|
3
|
+
Version: 1.6.0
|
|
4
|
+
Summary: The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network.
|
|
5
|
+
Author-email: Thordata Developer Team <support@thordata.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://www.thordata.com
|
|
8
|
+
Project-URL: Documentation, https://github.com/Thordata/thordata-python-sdk#readme
|
|
9
|
+
Project-URL: Source, https://github.com/Thordata/thordata-python-sdk
|
|
10
|
+
Project-URL: Tracker, https://github.com/Thordata/thordata-python-sdk/issues
|
|
11
|
+
Project-URL: Changelog, https://github.com/Thordata/thordata-python-sdk/blob/main/CHANGELOG.md
|
|
12
|
+
Keywords: web scraping,proxy,residential proxy,datacenter proxy,ai,llm,data-mining,serp,thordata,web scraper,anti-bot bypass
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
16
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
17
|
+
Classifier: Topic :: Internet :: Proxy Servers
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
23
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
24
|
+
Classifier: Operating System :: OS Independent
|
|
25
|
+
Classifier: Typing :: Typed
|
|
26
|
+
Requires-Python: >=3.9
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
License-File: LICENSE
|
|
29
|
+
Requires-Dist: requests>=2.25.0
|
|
30
|
+
Requires-Dist: aiohttp>=3.9.0
|
|
31
|
+
Requires-Dist: PySocks>=1.7.1
|
|
32
|
+
Provides-Extra: dev
|
|
33
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
34
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
35
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
36
|
+
Requires-Dist: pytest-httpserver>=1.0.0; extra == "dev"
|
|
37
|
+
Requires-Dist: python-dotenv>=1.0.0; extra == "dev"
|
|
38
|
+
Requires-Dist: black>=25.11.0; extra == "dev"
|
|
39
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
40
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
41
|
+
Requires-Dist: types-requests>=2.28.0; extra == "dev"
|
|
42
|
+
Requires-Dist: aioresponses>=0.7.6; extra == "dev"
|
|
43
|
+
Dynamic: license-file
|
|
44
|
+
|
|
45
|
+
# Thordata Python SDK
|
|
46
|
+
|
|
47
|
+
<div align="center">
|
|
48
|
+
|
|
49
|
+
<img src="https://img.shields.io/badge/Thordata-AI%20Infrastructure-blue?style=for-the-badge" alt="Thordata Logo">
|
|
50
|
+
|
|
51
|
+
**The Official Python Client for Thordata APIs**
|
|
52
|
+
|
|
53
|
+
*Proxy Network • SERP API • Web Unlocker • Web Scraper API*
|
|
54
|
+
|
|
55
|
+
[](https://pypi.org/project/thordata-sdk/)
|
|
56
|
+
[](https://pypi.org/project/thordata-sdk/)
|
|
57
|
+
[](LICENSE)
|
|
58
|
+
[](https://github.com/Thordata/thordata-python-sdk/actions)
|
|
59
|
+
|
|
60
|
+
</div>
|
|
61
|
+
|
|
62
|
+
---
|
|
63
|
+
|
|
64
|
+
## 📖 Introduction
|
|
65
|
+
|
|
66
|
+
The **Thordata Python SDK v1.6.0** is a production-ready wrapper for Thordata's AI data infrastructure. It is architected for high reliability, strict type safety, and maximum performance.
|
|
67
|
+
|
|
68
|
+
**Why v1.6.0?**
|
|
69
|
+
* **🛡️ Bulletproof Networking**: Custom core handles `HTTP`, `HTTPS`, and `SOCKS5h` (Remote DNS) tunneling, solving common SSL/TLS handshake issues in complex network environments.
|
|
70
|
+
* **⚡ Async First**: First-class `asyncio` support with `aiohttp` for high-concurrency scraping (1000+ RPS).
|
|
71
|
+
* **🧩 100% API Coverage**: Every endpoint documented by Thordata (including Hourly Usage, Server Monitor, and Task Management) is implemented.
|
|
72
|
+
* **🤖 Type Safe**: Fully typed (`mypy` strict) for excellent IDE autocompletion and error checking.
|
|
73
|
+
|
|
74
|
+
---
|
|
75
|
+
|
|
76
|
+
## 📦 Installation
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
pip install thordata-sdk
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
## 🔐 Configuration
|
|
85
|
+
|
|
86
|
+
Set environment variables to avoid hardcoding credentials. **Full reference:** copy [.env.example](.env.example) to `.env` and fill in values.
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
# [Scraping APIs]
|
|
90
|
+
export THORDATA_SCRAPER_TOKEN="your_scraper_token"
|
|
91
|
+
|
|
92
|
+
# [Management APIs]
|
|
93
|
+
export THORDATA_PUBLIC_TOKEN="your_public_token"
|
|
94
|
+
export THORDATA_PUBLIC_KEY="your_public_key"
|
|
95
|
+
|
|
96
|
+
# [Proxy: Residential / Unlimited / Datacenter / Mobile / ISP]
|
|
97
|
+
export THORDATA_RESIDENTIAL_USERNAME="your_username"
|
|
98
|
+
export THORDATA_RESIDENTIAL_PASSWORD="your_password"
|
|
99
|
+
# Optional: Unlimited (high-bandwidth) if your plan has separate credentials
|
|
100
|
+
# export THORDATA_UNLIMITED_USERNAME="..."
|
|
101
|
+
# export THORDATA_UNLIMITED_PASSWORD="..."
|
|
102
|
+
|
|
103
|
+
# Optional: Upstream proxy when behind firewall (e.g. Clash Verge port 7897)
|
|
104
|
+
# export THORDATA_UPSTREAM_PROXY="http://127.0.0.1:7897"
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Default proxy port is **9999** (residential); other products use different ports (see `.env.example`).
|
|
108
|
+
|
|
109
|
+
---
|
|
110
|
+
|
|
111
|
+
## 🚀 Quick Start
|
|
112
|
+
|
|
113
|
+
### 1. SERP Search (Google/Bing)
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
from thordata import ThordataClient, Engine
|
|
117
|
+
|
|
118
|
+
client = ThordataClient()
|
|
119
|
+
|
|
120
|
+
# Search Google
|
|
121
|
+
results = client.serp_search(
|
|
122
|
+
query="latest AI trends",
|
|
123
|
+
engine=Engine.GOOGLE,
|
|
124
|
+
num=10,
|
|
125
|
+
location="United States"
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
for item in results.get("organic", []):
|
|
129
|
+
print(f"{item['title']} - {item['link']}")
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
### 2. Universal Scrape (Web Unlocker)
|
|
133
|
+
|
|
134
|
+
Automatically handles JS rendering, CAPTCHAs, and fingerprinting.
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
html = client.universal_scrape(
|
|
138
|
+
url="https://example.com",
|
|
139
|
+
js_render=True,
|
|
140
|
+
country="us",
|
|
141
|
+
wait_for=".content-loaded" # Smart waiting
|
|
142
|
+
)
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
### 3. High-Performance Proxy Tunneling
|
|
146
|
+
|
|
147
|
+
Use Thordata's residential IPs directly with `requests` (Sync) or `aiohttp` (Async). The SDK handles the complex authentication and rotation logic.
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
from thordata import ProxyConfig, ProxyProduct
|
|
151
|
+
|
|
152
|
+
# Config is optional if env vars are set
|
|
153
|
+
proxy = ProxyConfig(
|
|
154
|
+
product=ProxyProduct.RESIDENTIAL,
|
|
155
|
+
country="jp",
|
|
156
|
+
session_duration=10 # Sticky IP for 10 mins
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
# The client automatically routes this through Thordata's network
|
|
160
|
+
response = client.get("https://httpbin.org/ip", proxy_config=proxy)
|
|
161
|
+
print(response.json())
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
---
|
|
165
|
+
|
|
166
|
+
## ⚙️ Advanced Usage
|
|
167
|
+
|
|
168
|
+
### Async High-Concurrency
|
|
169
|
+
|
|
170
|
+
Perfect for building high-throughput AI agents.
|
|
171
|
+
|
|
172
|
+
```python
|
|
173
|
+
import asyncio
|
|
174
|
+
from thordata import AsyncThordataClient
|
|
175
|
+
|
|
176
|
+
async def main():
|
|
177
|
+
async with AsyncThordataClient() as client:
|
|
178
|
+
# Fire off 10 requests in parallel
|
|
179
|
+
tasks = [client.serp_search(f"query {i}") for i in range(10)]
|
|
180
|
+
results = await asyncio.gather(*tasks)
|
|
181
|
+
print(f"Completed {len(results)} searches")
|
|
182
|
+
|
|
183
|
+
asyncio.run(main())
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
### Task Management (Batch Scraping)
|
|
187
|
+
|
|
188
|
+
Handle large-scale scraping jobs asynchronously.
|
|
189
|
+
|
|
190
|
+
```python
|
|
191
|
+
# 1. Create a task
|
|
192
|
+
task_id = client.create_scraper_task(
|
|
193
|
+
file_name="daily_scrape",
|
|
194
|
+
spider_id="universal",
|
|
195
|
+
spider_name="universal",
|
|
196
|
+
parameters={"url": "https://example.com"}
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
# 2. Poll for completion (Helper method)
|
|
200
|
+
status = client.wait_for_task(task_id, max_wait=600)
|
|
201
|
+
|
|
202
|
+
# 3. Download results
|
|
203
|
+
if status == "finished":
|
|
204
|
+
data_url = client.get_task_result(task_id)
|
|
205
|
+
print(f"Download: {data_url}")
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
### Web Scraper Tools (120+ Pre-built Tools)
|
|
209
|
+
|
|
210
|
+
Use pre-built tools for popular platforms. See [Tool Coverage Matrix](docs/TOOL_COVERAGE_MATRIX.md) for full list.
|
|
211
|
+
|
|
212
|
+
```python
|
|
213
|
+
from thordata import ThordataClient
|
|
214
|
+
from thordata.tools import Amazon, GoogleMaps, YouTube, TikTok, eBay, Walmart
|
|
215
|
+
|
|
216
|
+
client = ThordataClient()
|
|
217
|
+
|
|
218
|
+
# Amazon Product by ASIN
|
|
219
|
+
task_id = client.run_tool(Amazon.ProductByAsin(asin="B0BZYCJK89"))
|
|
220
|
+
|
|
221
|
+
# Google Maps by Place ID
|
|
222
|
+
task_id = client.run_tool(GoogleMaps.DetailsByPlaceId(place_id="ChIJPTacEpBQwokRKwIlDXelxkA"))
|
|
223
|
+
|
|
224
|
+
# YouTube Video Download
|
|
225
|
+
from thordata import CommonSettings
|
|
226
|
+
settings = CommonSettings(resolution="<=360p", video_codec="vp9")
|
|
227
|
+
task_id = client.run_tool(YouTube.VideoDownload(
|
|
228
|
+
url="https://www.youtube.com/watch?v=jNQXAC9IVRw",
|
|
229
|
+
common_settings=settings
|
|
230
|
+
))
|
|
231
|
+
|
|
232
|
+
# Wait and get results
|
|
233
|
+
status = client.wait_for_task(task_id, max_wait=300)
|
|
234
|
+
if status == "ready":
|
|
235
|
+
download_url = client.get_task_result(task_id)
|
|
236
|
+
print(f"Results: {download_url}")
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
**Available Platforms:**
|
|
240
|
+
- **E-Commerce**: Amazon, eBay, Walmart
|
|
241
|
+
- **Social Media**: TikTok, Instagram, Facebook, Twitter/X, Reddit, LinkedIn
|
|
242
|
+
- **Search**: Google Maps, Google Shopping, Google Play
|
|
243
|
+
- **Video**: YouTube (download, info, subtitles)
|
|
244
|
+
- **Code**: GitHub
|
|
245
|
+
- **Professional**: Indeed, Glassdoor, Crunchbase
|
|
246
|
+
- **Travel/Real Estate**: Booking, Airbnb, Zillow
|
|
247
|
+
|
|
248
|
+
See `examples/tools/` for more examples.
|
|
249
|
+
|
|
250
|
+
---
|
|
251
|
+
|
|
252
|
+
## 🛠️ Management APIs
|
|
253
|
+
|
|
254
|
+
Manage your infrastructure programmatically.
|
|
255
|
+
|
|
256
|
+
```python
|
|
257
|
+
# Check Balance
|
|
258
|
+
balance = client.get_traffic_balance()
|
|
259
|
+
|
|
260
|
+
# Manage Whitelist
|
|
261
|
+
client.add_whitelist_ip("1.2.3.4")
|
|
262
|
+
|
|
263
|
+
# Create Sub-users
|
|
264
|
+
client.create_proxy_user("new_user", "pass123", traffic_limit=500)
|
|
265
|
+
|
|
266
|
+
# Monitor Unlimited Proxies
|
|
267
|
+
monitor = client.unlimited.get_server_monitor(
|
|
268
|
+
ins_id="ins-123",
|
|
269
|
+
region="us",
|
|
270
|
+
start_time=1700000000,
|
|
271
|
+
end_time=1700003600
|
|
272
|
+
)
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
---
|
|
276
|
+
|
|
277
|
+
## 🧪 Development & Testing
|
|
278
|
+
|
|
279
|
+
- **Full env reference**: Copy [.env.example](.env.example) to `.env` and fill in credentials.
|
|
280
|
+
- **Unit tests** (no network): `pytest` or `python -m coverage run -m pytest -p no:cov tests && python -m coverage report -m`
|
|
281
|
+
- **Integration tests** (live API/proxy): Set `THORDATA_INTEGRATION=true` in `.env`; optional `THORDATA_UPSTREAM_PROXY` (e.g. Clash) if behind a firewall. See [CONTRIBUTING.md](CONTRIBUTING.md#-testing-guidelines).
|
|
282
|
+
|
|
283
|
+
---
|
|
284
|
+
|
|
285
|
+
## 📄 License
|
|
286
|
+
|
|
287
|
+
MIT License. See [LICENSE](LICENSE) for details.
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
# Thordata Python SDK
|
|
2
|
+
|
|
3
|
+
<div align="center">
|
|
4
|
+
|
|
5
|
+
<img src="https://img.shields.io/badge/Thordata-AI%20Infrastructure-blue?style=for-the-badge" alt="Thordata Logo">
|
|
6
|
+
|
|
7
|
+
**The Official Python Client for Thordata APIs**
|
|
8
|
+
|
|
9
|
+
*Proxy Network • SERP API • Web Unlocker • Web Scraper API*
|
|
10
|
+
|
|
11
|
+
[](https://pypi.org/project/thordata-sdk/)
|
|
12
|
+
[](https://pypi.org/project/thordata-sdk/)
|
|
13
|
+
[](LICENSE)
|
|
14
|
+
[](https://github.com/Thordata/thordata-python-sdk/actions)
|
|
15
|
+
|
|
16
|
+
</div>
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## 📖 Introduction
|
|
21
|
+
|
|
22
|
+
The **Thordata Python SDK v1.6.0** is a production-ready wrapper for Thordata's AI data infrastructure. It is architected for high reliability, strict type safety, and maximum performance.
|
|
23
|
+
|
|
24
|
+
**Why v1.6.0?**
|
|
25
|
+
* **🛡️ Bulletproof Networking**: Custom core handles `HTTP`, `HTTPS`, and `SOCKS5h` (Remote DNS) tunneling, solving common SSL/TLS handshake issues in complex network environments.
|
|
26
|
+
* **⚡ Async First**: First-class `asyncio` support with `aiohttp` for high-concurrency scraping (1000+ RPS).
|
|
27
|
+
* **🧩 100% API Coverage**: Every endpoint documented by Thordata (including Hourly Usage, Server Monitor, and Task Management) is implemented.
|
|
28
|
+
* **🤖 Type Safe**: Fully typed (`mypy` strict) for excellent IDE autocompletion and error checking.
|
|
29
|
+
|
|
30
|
+
---
|
|
31
|
+
|
|
32
|
+
## 📦 Installation
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install thordata-sdk
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## 🔐 Configuration
|
|
41
|
+
|
|
42
|
+
Set environment variables to avoid hardcoding credentials. **Full reference:** copy [.env.example](.env.example) to `.env` and fill in values.
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
# [Scraping APIs]
|
|
46
|
+
export THORDATA_SCRAPER_TOKEN="your_scraper_token"
|
|
47
|
+
|
|
48
|
+
# [Management APIs]
|
|
49
|
+
export THORDATA_PUBLIC_TOKEN="your_public_token"
|
|
50
|
+
export THORDATA_PUBLIC_KEY="your_public_key"
|
|
51
|
+
|
|
52
|
+
# [Proxy: Residential / Unlimited / Datacenter / Mobile / ISP]
|
|
53
|
+
export THORDATA_RESIDENTIAL_USERNAME="your_username"
|
|
54
|
+
export THORDATA_RESIDENTIAL_PASSWORD="your_password"
|
|
55
|
+
# Optional: Unlimited (high-bandwidth) if your plan has separate credentials
|
|
56
|
+
# export THORDATA_UNLIMITED_USERNAME="..."
|
|
57
|
+
# export THORDATA_UNLIMITED_PASSWORD="..."
|
|
58
|
+
|
|
59
|
+
# Optional: Upstream proxy when behind firewall (e.g. Clash Verge port 7897)
|
|
60
|
+
# export THORDATA_UPSTREAM_PROXY="http://127.0.0.1:7897"
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Default proxy port is **9999** (residential); other products use different ports (see `.env.example`).
|
|
64
|
+
|
|
65
|
+
---
|
|
66
|
+
|
|
67
|
+
## 🚀 Quick Start
|
|
68
|
+
|
|
69
|
+
### 1. SERP Search (Google/Bing)
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
from thordata import ThordataClient, Engine
|
|
73
|
+
|
|
74
|
+
client = ThordataClient()
|
|
75
|
+
|
|
76
|
+
# Search Google
|
|
77
|
+
results = client.serp_search(
|
|
78
|
+
query="latest AI trends",
|
|
79
|
+
engine=Engine.GOOGLE,
|
|
80
|
+
num=10,
|
|
81
|
+
location="United States"
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
for item in results.get("organic", []):
|
|
85
|
+
print(f"{item['title']} - {item['link']}")
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### 2. Universal Scrape (Web Unlocker)
|
|
89
|
+
|
|
90
|
+
Automatically handles JS rendering, CAPTCHAs, and fingerprinting.
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
html = client.universal_scrape(
|
|
94
|
+
url="https://example.com",
|
|
95
|
+
js_render=True,
|
|
96
|
+
country="us",
|
|
97
|
+
wait_for=".content-loaded" # Smart waiting
|
|
98
|
+
)
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### 3. High-Performance Proxy Tunneling
|
|
102
|
+
|
|
103
|
+
Use Thordata's residential IPs directly with `requests` (Sync) or `aiohttp` (Async). The SDK handles the complex authentication and rotation logic.
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
from thordata import ProxyConfig, ProxyProduct
|
|
107
|
+
|
|
108
|
+
# Config is optional if env vars are set
|
|
109
|
+
proxy = ProxyConfig(
|
|
110
|
+
product=ProxyProduct.RESIDENTIAL,
|
|
111
|
+
country="jp",
|
|
112
|
+
session_duration=10 # Sticky IP for 10 mins
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# The client automatically routes this through Thordata's network
|
|
116
|
+
response = client.get("https://httpbin.org/ip", proxy_config=proxy)
|
|
117
|
+
print(response.json())
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
---
|
|
121
|
+
|
|
122
|
+
## ⚙️ Advanced Usage
|
|
123
|
+
|
|
124
|
+
### Async High-Concurrency
|
|
125
|
+
|
|
126
|
+
Perfect for building high-throughput AI agents.
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
import asyncio
|
|
130
|
+
from thordata import AsyncThordataClient
|
|
131
|
+
|
|
132
|
+
async def main():
|
|
133
|
+
async with AsyncThordataClient() as client:
|
|
134
|
+
# Fire off 10 requests in parallel
|
|
135
|
+
tasks = [client.serp_search(f"query {i}") for i in range(10)]
|
|
136
|
+
results = await asyncio.gather(*tasks)
|
|
137
|
+
print(f"Completed {len(results)} searches")
|
|
138
|
+
|
|
139
|
+
asyncio.run(main())
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
### Task Management (Batch Scraping)
|
|
143
|
+
|
|
144
|
+
Handle large-scale scraping jobs asynchronously.
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
# 1. Create a task
|
|
148
|
+
task_id = client.create_scraper_task(
|
|
149
|
+
file_name="daily_scrape",
|
|
150
|
+
spider_id="universal",
|
|
151
|
+
spider_name="universal",
|
|
152
|
+
parameters={"url": "https://example.com"}
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
# 2. Poll for completion (Helper method)
|
|
156
|
+
status = client.wait_for_task(task_id, max_wait=600)
|
|
157
|
+
|
|
158
|
+
# 3. Download results
|
|
159
|
+
if status == "finished":
|
|
160
|
+
data_url = client.get_task_result(task_id)
|
|
161
|
+
print(f"Download: {data_url}")
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
### Web Scraper Tools (120+ Pre-built Tools)
|
|
165
|
+
|
|
166
|
+
Use pre-built tools for popular platforms. See [Tool Coverage Matrix](docs/TOOL_COVERAGE_MATRIX.md) for full list.
|
|
167
|
+
|
|
168
|
+
```python
|
|
169
|
+
from thordata import ThordataClient
|
|
170
|
+
from thordata.tools import Amazon, GoogleMaps, YouTube, TikTok, eBay, Walmart
|
|
171
|
+
|
|
172
|
+
client = ThordataClient()
|
|
173
|
+
|
|
174
|
+
# Amazon Product by ASIN
|
|
175
|
+
task_id = client.run_tool(Amazon.ProductByAsin(asin="B0BZYCJK89"))
|
|
176
|
+
|
|
177
|
+
# Google Maps by Place ID
|
|
178
|
+
task_id = client.run_tool(GoogleMaps.DetailsByPlaceId(place_id="ChIJPTacEpBQwokRKwIlDXelxkA"))
|
|
179
|
+
|
|
180
|
+
# YouTube Video Download
|
|
181
|
+
from thordata import CommonSettings
|
|
182
|
+
settings = CommonSettings(resolution="<=360p", video_codec="vp9")
|
|
183
|
+
task_id = client.run_tool(YouTube.VideoDownload(
|
|
184
|
+
url="https://www.youtube.com/watch?v=jNQXAC9IVRw",
|
|
185
|
+
common_settings=settings
|
|
186
|
+
))
|
|
187
|
+
|
|
188
|
+
# Wait and get results
|
|
189
|
+
status = client.wait_for_task(task_id, max_wait=300)
|
|
190
|
+
if status == "ready":
|
|
191
|
+
download_url = client.get_task_result(task_id)
|
|
192
|
+
print(f"Results: {download_url}")
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
**Available Platforms:**
|
|
196
|
+
- **E-Commerce**: Amazon, eBay, Walmart
|
|
197
|
+
- **Social Media**: TikTok, Instagram, Facebook, Twitter/X, Reddit, LinkedIn
|
|
198
|
+
- **Search**: Google Maps, Google Shopping, Google Play
|
|
199
|
+
- **Video**: YouTube (download, info, subtitles)
|
|
200
|
+
- **Code**: GitHub
|
|
201
|
+
- **Professional**: Indeed, Glassdoor, Crunchbase
|
|
202
|
+
- **Travel/Real Estate**: Booking, Airbnb, Zillow
|
|
203
|
+
|
|
204
|
+
See `examples/tools/` for more examples.
|
|
205
|
+
|
|
206
|
+
---
|
|
207
|
+
|
|
208
|
+
## 🛠️ Management APIs
|
|
209
|
+
|
|
210
|
+
Manage your infrastructure programmatically.
|
|
211
|
+
|
|
212
|
+
```python
|
|
213
|
+
# Check Balance
|
|
214
|
+
balance = client.get_traffic_balance()
|
|
215
|
+
|
|
216
|
+
# Manage Whitelist
|
|
217
|
+
client.add_whitelist_ip("1.2.3.4")
|
|
218
|
+
|
|
219
|
+
# Create Sub-users
|
|
220
|
+
client.create_proxy_user("new_user", "pass123", traffic_limit=500)
|
|
221
|
+
|
|
222
|
+
# Monitor Unlimited Proxies
|
|
223
|
+
monitor = client.unlimited.get_server_monitor(
|
|
224
|
+
ins_id="ins-123",
|
|
225
|
+
region="us",
|
|
226
|
+
start_time=1700000000,
|
|
227
|
+
end_time=1700003600
|
|
228
|
+
)
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
---
|
|
232
|
+
|
|
233
|
+
## 🧪 Development & Testing
|
|
234
|
+
|
|
235
|
+
- **Full env reference**: Copy [.env.example](.env.example) to `.env` and fill in credentials.
|
|
236
|
+
- **Unit tests** (no network): `pytest` or `python -m coverage run -m pytest -p no:cov tests && python -m coverage report -m`
|
|
237
|
+
- **Integration tests** (live API/proxy): Set `THORDATA_INTEGRATION=true` in `.env`; optional `THORDATA_UPSTREAM_PROXY` (e.g. Clash) if behind a firewall. See [CONTRIBUTING.md](CONTRIBUTING.md#-testing-guidelines).
|
|
238
|
+
|
|
239
|
+
---
|
|
240
|
+
|
|
241
|
+
## 📄 License
|
|
242
|
+
|
|
243
|
+
MIT License. See [LICENSE](LICENSE) for details.
|
|
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
|
|
|
5
5
|
|
|
6
6
|
[project]
|
|
7
7
|
name = "thordata-sdk"
|
|
8
|
-
version = "1.
|
|
8
|
+
version = "1.6.0"
|
|
9
9
|
description = "The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network."
|
|
10
10
|
readme = "README.md"
|
|
11
11
|
requires-python = ">=3.9"
|
|
@@ -120,11 +120,11 @@ ignore_missing_imports = true
|
|
|
120
120
|
module = ["aiohttp.*", "requests.*"]
|
|
121
121
|
ignore_missing_imports = true
|
|
122
122
|
|
|
123
|
-
# Pytest setup
|
|
123
|
+
# Pytest setup (coverage is run separately via coverage CLI for reliability)
|
|
124
124
|
[tool.pytest.ini_options]
|
|
125
125
|
testpaths = ["tests"]
|
|
126
126
|
asyncio_mode = "auto"
|
|
127
|
-
addopts = "-v
|
|
127
|
+
addopts = "-v"
|
|
128
128
|
markers = ["integration: live tests that require real credentials"]
|
|
129
129
|
|
|
130
130
|
# Coverage setup
|
|
@@ -3,47 +3,17 @@ Thordata Python SDK
|
|
|
3
3
|
|
|
4
4
|
Official Python client for Thordata's Proxy Network, SERP API,
|
|
5
5
|
Universal Scraping API (Web Unlocker), and Web Scraper API.
|
|
6
|
-
|
|
7
|
-
Basic Usage:
|
|
8
|
-
>>> from thordata import ThordataClient
|
|
9
|
-
>>>
|
|
10
|
-
>>> client = ThordataClient(
|
|
11
|
-
... scraper_token="your_token",
|
|
12
|
-
... public_token="your_public_token",
|
|
13
|
-
... public_key="your_public_key"
|
|
14
|
-
... )
|
|
15
|
-
>>>
|
|
16
|
-
>>> # Proxy request
|
|
17
|
-
>>> response = client.get("https://httpbin.org/ip")
|
|
18
|
-
>>>
|
|
19
|
-
>>> # SERP search
|
|
20
|
-
>>> results = client.serp_search("python tutorial", engine="google")
|
|
21
|
-
>>>
|
|
22
|
-
>>> # Universal scrape
|
|
23
|
-
>>> html = client.universal_scrape("https://example.com", js_render=True)
|
|
24
|
-
|
|
25
|
-
Async Usage:
|
|
26
|
-
>>> from thordata import AsyncThordataClient
|
|
27
|
-
>>> import asyncio
|
|
28
|
-
>>>
|
|
29
|
-
>>> async def main():
|
|
30
|
-
... async with AsyncThordataClient(
|
|
31
|
-
... scraper_token="your_token"
|
|
32
|
-
... ) as client:
|
|
33
|
-
... response = await client.get("https://httpbin.org/ip")
|
|
34
|
-
>>>
|
|
35
|
-
>>> asyncio.run(main())
|
|
36
6
|
"""
|
|
37
7
|
|
|
38
|
-
__version__ = "1.
|
|
39
|
-
__author__ = "Thordata Developer Team"
|
|
8
|
+
__version__ = "1.6.0"
|
|
9
|
+
__author__ = "Thordata Developer Team/Kael Odin"
|
|
40
10
|
__email__ = "support@thordata.com"
|
|
41
11
|
|
|
42
12
|
# Main clients
|
|
43
13
|
from .async_client import AsyncThordataClient
|
|
44
14
|
from .client import ThordataClient
|
|
45
15
|
|
|
46
|
-
# Enums
|
|
16
|
+
# Enums (Legacy Import Path)
|
|
47
17
|
from .enums import (
|
|
48
18
|
BingSearchType,
|
|
49
19
|
Continent,
|
|
@@ -76,7 +46,7 @@ from .exceptions import (
|
|
|
76
46
|
ThordataValidationError,
|
|
77
47
|
)
|
|
78
48
|
|
|
79
|
-
# Models
|
|
49
|
+
# Models (Legacy Import Path)
|
|
80
50
|
from .models import (
|
|
81
51
|
CommonSettings,
|
|
82
52
|
ProxyConfig,
|
|
@@ -99,12 +69,9 @@ from .retry import RetryConfig
|
|
|
99
69
|
|
|
100
70
|
# Public API
|
|
101
71
|
__all__ = [
|
|
102
|
-
# Version
|
|
103
72
|
"__version__",
|
|
104
|
-
# Clients
|
|
105
73
|
"ThordataClient",
|
|
106
74
|
"AsyncThordataClient",
|
|
107
|
-
# Enums
|
|
108
75
|
"Engine",
|
|
109
76
|
"GoogleSearchType",
|
|
110
77
|
"BingSearchType",
|
|
@@ -120,7 +87,6 @@ __all__ = [
|
|
|
120
87
|
"ProxyHost",
|
|
121
88
|
"ProxyPort",
|
|
122
89
|
"GoogleTbm",
|
|
123
|
-
# Models
|
|
124
90
|
"ProxyConfig",
|
|
125
91
|
"ProxyProduct",
|
|
126
92
|
"ProxyServer",
|
|
@@ -135,7 +101,6 @@ __all__ = [
|
|
|
135
101
|
"CommonSettings",
|
|
136
102
|
"VideoTaskConfig",
|
|
137
103
|
"TaskStatusResponse",
|
|
138
|
-
# Exceptions
|
|
139
104
|
"ThordataError",
|
|
140
105
|
"ThordataConfigError",
|
|
141
106
|
"ThordataNetworkError",
|
|
@@ -146,6 +111,5 @@ __all__ = [
|
|
|
146
111
|
"ThordataServerError",
|
|
147
112
|
"ThordataValidationError",
|
|
148
113
|
"ThordataNotCollectedError",
|
|
149
|
-
# Retry
|
|
150
114
|
"RetryConfig",
|
|
151
115
|
]
|