thordata-sdk 0.3.1__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thordata_sdk-0.4.0/LICENSE +21 -0
- thordata_sdk-0.4.0/PKG-INFO +678 -0
- thordata_sdk-0.4.0/README.md +637 -0
- thordata_sdk-0.4.0/pyproject.toml +136 -0
- thordata_sdk-0.4.0/src/thordata/__init__.py +138 -0
- thordata_sdk-0.4.0/src/thordata/_utils.py +126 -0
- thordata_sdk-0.4.0/src/thordata/async_client.py +730 -0
- thordata_sdk-0.4.0/src/thordata/client.py +936 -0
- thordata_sdk-0.4.0/src/thordata/enums.py +289 -0
- thordata_sdk-0.4.0/src/thordata/exceptions.py +315 -0
- thordata_sdk-0.4.0/src/thordata/models.py +698 -0
- thordata_sdk-0.4.0/src/thordata/retry.py +382 -0
- thordata_sdk-0.4.0/src/thordata_sdk.egg-info/PKG-INFO +678 -0
- {thordata_sdk-0.3.1 → thordata_sdk-0.4.0}/src/thordata_sdk.egg-info/SOURCES.txt +10 -1
- thordata_sdk-0.4.0/src/thordata_sdk.egg-info/requires.txt +11 -0
- {thordata_sdk-0.3.1 → thordata_sdk-0.4.0}/tests/test_async_client.py +25 -12
- thordata_sdk-0.4.0/tests/test_async_client_errors.py +109 -0
- thordata_sdk-0.4.0/tests/test_client.py +81 -0
- thordata_sdk-0.4.0/tests/test_client_errors.py +96 -0
- thordata_sdk-0.4.0/tests/test_enums.py +125 -0
- thordata_sdk-0.4.0/tests/test_exceptions.py +165 -0
- thordata_sdk-0.4.0/tests/test_models.py +328 -0
- thordata_sdk-0.3.1/LICENSE +0 -201
- thordata_sdk-0.3.1/PKG-INFO +0 -200
- thordata_sdk-0.3.1/README.md +0 -171
- thordata_sdk-0.3.1/pyproject.toml +0 -42
- thordata_sdk-0.3.1/src/thordata/__init__.py +0 -16
- thordata_sdk-0.3.1/src/thordata/async_client.py +0 -281
- thordata_sdk-0.3.1/src/thordata/client.py +0 -486
- thordata_sdk-0.3.1/src/thordata/enums.py +0 -25
- thordata_sdk-0.3.1/src/thordata_sdk.egg-info/PKG-INFO +0 -200
- thordata_sdk-0.3.1/src/thordata_sdk.egg-info/requires.txt +0 -2
- thordata_sdk-0.3.1/tests/test_client.py +0 -53
- {thordata_sdk-0.3.1 → thordata_sdk-0.4.0}/setup.cfg +0 -0
- {thordata_sdk-0.3.1 → thordata_sdk-0.4.0}/src/thordata/parameters.py +0 -0
- {thordata_sdk-0.3.1 → thordata_sdk-0.4.0}/src/thordata_sdk.egg-info/dependency_links.txt +0 -0
- {thordata_sdk-0.3.1 → thordata_sdk-0.4.0}/src/thordata_sdk.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Thordata · AI Proxy & Web Data
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,678 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: thordata-sdk
|
|
3
|
+
Version: 0.4.0
|
|
4
|
+
Summary: The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network.
|
|
5
|
+
Author-email: Thordata Developer Team <support@thordata.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://www.thordata.com
|
|
8
|
+
Project-URL: Documentation, https://github.com/Thordata/thordata-python-sdk#readme
|
|
9
|
+
Project-URL: Source, https://github.com/Thordata/thordata-python-sdk
|
|
10
|
+
Project-URL: Tracker, https://github.com/Thordata/thordata-python-sdk/issues
|
|
11
|
+
Project-URL: Changelog, https://github.com/Thordata/thordata-python-sdk/blob/main/CHANGELOG.md
|
|
12
|
+
Keywords: web scraping,proxy,residential proxy,datacenter proxy,ai,llm,data-mining,serp,thordata,web scraper,anti-bot bypass
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
16
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
17
|
+
Classifier: Topic :: Internet :: Proxy Servers
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
24
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
25
|
+
Classifier: Operating System :: OS Independent
|
|
26
|
+
Classifier: Typing :: Typed
|
|
27
|
+
Requires-Python: >=3.8
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
License-File: LICENSE
|
|
30
|
+
Requires-Dist: requests>=2.25.0
|
|
31
|
+
Requires-Dist: aiohttp>=3.8.0
|
|
32
|
+
Provides-Extra: dev
|
|
33
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
34
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
35
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
36
|
+
Requires-Dist: black>=23.0.0; extra == "dev"
|
|
37
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
38
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
39
|
+
Requires-Dist: types-requests>=2.28.0; extra == "dev"
|
|
40
|
+
Dynamic: license-file
|
|
41
|
+
|
|
42
|
+
# Thordata Python SDK
|
|
43
|
+
|
|
44
|
+
<div align="center">
|
|
45
|
+
|
|
46
|
+
**Official Python client for Thordata's Proxy Network, SERP API, Web Unlocker, and Web Scraper API.**
|
|
47
|
+
|
|
48
|
+
*Async-ready, type-safe, built for AI agents and large-scale data collection.*
|
|
49
|
+
|
|
50
|
+
[](https://github.com/Thordata/thordata-python-sdk/actions/workflows/ci.yml)
|
|
51
|
+
[](https://pypi.org/project/thordata-sdk/)
|
|
52
|
+
[](https://python.org)
|
|
53
|
+
[](LICENSE)
|
|
54
|
+
[](https://github.com/Thordata/thordata-python-sdk)
|
|
55
|
+
|
|
56
|
+
[Documentation](https://doc.thordata.com) • [Dashboard](https://www.thordata.com) • [Examples](examples/) • [Changelog](CHANGELOG.md)
|
|
57
|
+
|
|
58
|
+
</div>
|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
## ✨ Features
|
|
63
|
+
|
|
64
|
+
| Feature | Description |
|
|
65
|
+
|---------|-------------|
|
|
66
|
+
| 🌐 **Proxy Network** | Residential, Mobile, Datacenter, ISP proxies with geo-targeting |
|
|
67
|
+
| 🔍 **SERP API** | Google, Bing, Yandex, DuckDuckGo, Baidu search results |
|
|
68
|
+
| 🔓 **Web Unlocker** | Bypass Cloudflare, CAPTCHAs, anti-bot systems automatically |
|
|
69
|
+
| 🕷️ **Web Scraper** | Async task-based scraping for complex sites |
|
|
70
|
+
| ⚡ **Async Support** | Full async/await support with aiohttp |
|
|
71
|
+
| 🔄 **Auto Retry** | Configurable retry with exponential backoff |
|
|
72
|
+
| 📝 **Type Safe** | Full type annotations for IDE autocomplete |
|
|
73
|
+
|
|
74
|
+
---
|
|
75
|
+
|
|
76
|
+
## 📦 Installation
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
pip install thordata-sdk
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
For development:
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
pip install thordata-sdk[dev]
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
## 🚀 Quick Start
|
|
91
|
+
|
|
92
|
+
### Get Your Credentials
|
|
93
|
+
|
|
94
|
+
1. Sign up at [thordata.com](https://www.thordata.com)
|
|
95
|
+
2. Navigate to your Dashboard
|
|
96
|
+
3. Copy your Scraper Token, Public Token, and Public Key
|
|
97
|
+
|
|
98
|
+
### Basic Usage
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
from thordata import ThordataClient
|
|
102
|
+
|
|
103
|
+
# Initialize the client
|
|
104
|
+
client = ThordataClient(
|
|
105
|
+
scraper_token="your_scraper_token",
|
|
106
|
+
public_token="your_public_token", # Optional, for task APIs
|
|
107
|
+
public_key="your_public_key" # Optional, for task APIs
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
# Make a request through the proxy network
|
|
111
|
+
response = client.get("https://httpbin.org/ip")
|
|
112
|
+
print(response.json())
|
|
113
|
+
# {'origin': '123.45.67.89'} # Residential IP
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### Environment Variables
|
|
117
|
+
|
|
118
|
+
Create a `.env` file:
|
|
119
|
+
|
|
120
|
+
```env
|
|
121
|
+
THORDATA_SCRAPER_TOKEN=your_scraper_token
|
|
122
|
+
THORDATA_PUBLIC_TOKEN=your_public_token
|
|
123
|
+
THORDATA_PUBLIC_KEY=your_public_key
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Then use with python-dotenv:
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
import os
|
|
130
|
+
from dotenv import load_dotenv
|
|
131
|
+
from thordata import ThordataClient
|
|
132
|
+
|
|
133
|
+
load_dotenv()
|
|
134
|
+
|
|
135
|
+
client = ThordataClient(
|
|
136
|
+
scraper_token=os.getenv("THORDATA_SCRAPER_TOKEN"),
|
|
137
|
+
public_token=os.getenv("THORDATA_PUBLIC_TOKEN"),
|
|
138
|
+
public_key=os.getenv("THORDATA_PUBLIC_KEY"),
|
|
139
|
+
)
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
---
|
|
143
|
+
|
|
144
|
+
## 📖 Usage Guide
|
|
145
|
+
|
|
146
|
+
### 1. Proxy Network
|
|
147
|
+
|
|
148
|
+
#### Basic Proxy Request
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
from thordata import ThordataClient
|
|
152
|
+
|
|
153
|
+
client = ThordataClient(scraper_token="your_token")
|
|
154
|
+
|
|
155
|
+
# GET request through proxy
|
|
156
|
+
response = client.get("https://example.com")
|
|
157
|
+
print(response.text)
|
|
158
|
+
|
|
159
|
+
# POST request through proxy
|
|
160
|
+
response = client.post("https://httpbin.org/post", json={"key": "value"})
|
|
161
|
+
print(response.json())
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
#### Geo-Targeting
|
|
165
|
+
|
|
166
|
+
```python
|
|
167
|
+
from thordata import ThordataClient, ProxyConfig
|
|
168
|
+
|
|
169
|
+
client = ThordataClient(scraper_token="your_token")
|
|
170
|
+
|
|
171
|
+
# Create a proxy config with geo-targeting
|
|
172
|
+
config = ProxyConfig(
|
|
173
|
+
username="your_username",
|
|
174
|
+
password="your_password",
|
|
175
|
+
country="us", # Target country
|
|
176
|
+
state="california", # Target state
|
|
177
|
+
city="los_angeles", # Target city
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
response = client.get("https://httpbin.org/ip", proxy_config=config)
|
|
181
|
+
print(response.json())
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
#### Sticky Sessions
|
|
185
|
+
|
|
186
|
+
Keep the same IP for multiple requests:
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
from thordata import ThordataClient, StickySession
|
|
190
|
+
|
|
191
|
+
client = ThordataClient(scraper_token="your_token")
|
|
192
|
+
|
|
193
|
+
# Create a sticky session (same IP for 10 minutes)
|
|
194
|
+
session = StickySession(
|
|
195
|
+
username="your_username",
|
|
196
|
+
password="your_password",
|
|
197
|
+
country="gb",
|
|
198
|
+
duration_minutes=10,
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
# All requests use the same IP
|
|
202
|
+
for i in range(5):
|
|
203
|
+
response = client.get("https://httpbin.org/ip", proxy_config=session)
|
|
204
|
+
print(f"Request {i+1}: {response.json()['origin']}")
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
#### Different Proxy Products
|
|
208
|
+
|
|
209
|
+
```python
|
|
210
|
+
from thordata import ProxyConfig, ProxyProduct
|
|
211
|
+
|
|
212
|
+
# Residential proxy (default, port 9999)
|
|
213
|
+
residential = ProxyConfig(
|
|
214
|
+
username="user", password="pass",
|
|
215
|
+
product=ProxyProduct.RESIDENTIAL
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
# Mobile proxy (port 5555)
|
|
219
|
+
mobile = ProxyConfig(
|
|
220
|
+
username="user", password="pass",
|
|
221
|
+
product=ProxyProduct.MOBILE
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
# Datacenter proxy (port 7777)
|
|
225
|
+
datacenter = ProxyConfig(
|
|
226
|
+
username="user", password="pass",
|
|
227
|
+
product=ProxyProduct.DATACENTER
|
|
228
|
+
)
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
### 2. SERP API (Search Engine Results)
|
|
232
|
+
|
|
233
|
+
#### Basic Search
|
|
234
|
+
|
|
235
|
+
```python
|
|
236
|
+
from thordata import ThordataClient, Engine
|
|
237
|
+
|
|
238
|
+
client = ThordataClient(scraper_token="your_token")
|
|
239
|
+
|
|
240
|
+
# Google search
|
|
241
|
+
results = client.serp_search(
|
|
242
|
+
query="python programming",
|
|
243
|
+
engine=Engine.GOOGLE,
|
|
244
|
+
num=10
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
# Print organic results
|
|
248
|
+
for result in results.get("organic", []):
|
|
249
|
+
print(f"{result['title']}: {result['link']}")
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
#### Advanced Search Options
|
|
253
|
+
|
|
254
|
+
```python
|
|
255
|
+
from thordata import ThordataClient, SerpRequest
|
|
256
|
+
|
|
257
|
+
client = ThordataClient(scraper_token="your_token")
|
|
258
|
+
|
|
259
|
+
# Create a detailed search request
|
|
260
|
+
request = SerpRequest(
|
|
261
|
+
query="best laptops 2024",
|
|
262
|
+
engine="google",
|
|
263
|
+
num=20,
|
|
264
|
+
country="us",
|
|
265
|
+
language="en",
|
|
266
|
+
search_type="shopping", # shopping, news, images, videos
|
|
267
|
+
time_filter="month", # hour, day, week, month, year
|
|
268
|
+
safe_search=True,
|
|
269
|
+
device="mobile", # desktop, mobile, tablet
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
results = client.serp_search_advanced(request)
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
#### Multiple Search Engines
|
|
276
|
+
|
|
277
|
+
```python
|
|
278
|
+
from thordata import ThordataClient, Engine
|
|
279
|
+
|
|
280
|
+
client = ThordataClient(scraper_token="your_token")
|
|
281
|
+
|
|
282
|
+
# Google
|
|
283
|
+
google_results = client.serp_search("AI news", engine=Engine.GOOGLE)
|
|
284
|
+
|
|
285
|
+
# Bing
|
|
286
|
+
bing_results = client.serp_search("AI news", engine=Engine.BING)
|
|
287
|
+
|
|
288
|
+
# Yandex (Russian search engine)
|
|
289
|
+
yandex_results = client.serp_search("AI news", engine=Engine.YANDEX)
|
|
290
|
+
|
|
291
|
+
# DuckDuckGo
|
|
292
|
+
ddg_results = client.serp_search("AI news", engine=Engine.DUCKDUCKGO)
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
### 3. Web Unlocker (Universal Scraping API)
|
|
296
|
+
|
|
297
|
+
Automatically bypass anti-bot protections:
|
|
298
|
+
|
|
299
|
+
#### Basic Usage
|
|
300
|
+
|
|
301
|
+
```python
|
|
302
|
+
from thordata import ThordataClient
|
|
303
|
+
|
|
304
|
+
client = ThordataClient(scraper_token="your_token")
|
|
305
|
+
|
|
306
|
+
# Get HTML content
|
|
307
|
+
html = client.universal_scrape(
|
|
308
|
+
url="https://example.com",
|
|
309
|
+
js_render=True, # Enable JavaScript rendering
|
|
310
|
+
)
|
|
311
|
+
print(html[:500])
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
#### Advanced Options
|
|
315
|
+
|
|
316
|
+
```python
|
|
317
|
+
from thordata import ThordataClient, UniversalScrapeRequest
|
|
318
|
+
|
|
319
|
+
client = ThordataClient(scraper_token="your_token")
|
|
320
|
+
|
|
321
|
+
request = UniversalScrapeRequest(
|
|
322
|
+
url="https://example.com",
|
|
323
|
+
js_render=True,
|
|
324
|
+
output_format="html",
|
|
325
|
+
country="us",
|
|
326
|
+
block_resources="image,font", # Speed up by blocking resources
|
|
327
|
+
clean_content="js,css", # Remove JS/CSS from output
|
|
328
|
+
wait=5000, # Wait 5 seconds after load
|
|
329
|
+
wait_for=".content-loaded", # Wait for CSS selector
|
|
330
|
+
headers=[
|
|
331
|
+
{"name": "Accept-Language", "value": "en-US"}
|
|
332
|
+
],
|
|
333
|
+
cookies=[
|
|
334
|
+
{"name": "session", "value": "abc123"}
|
|
335
|
+
],
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
html = client.universal_scrape_advanced(request)
|
|
339
|
+
```
|
|
340
|
+
|
|
341
|
+
#### Take Screenshots
|
|
342
|
+
|
|
343
|
+
```python
|
|
344
|
+
from thordata import ThordataClient
|
|
345
|
+
|
|
346
|
+
client = ThordataClient(scraper_token="your_token")
|
|
347
|
+
|
|
348
|
+
# Get PNG screenshot
|
|
349
|
+
png_bytes = client.universal_scrape(
|
|
350
|
+
url="https://example.com",
|
|
351
|
+
js_render=True,
|
|
352
|
+
output_format="png",
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
# Save to file
|
|
356
|
+
with open("screenshot.png", "wb") as f:
|
|
357
|
+
f.write(png_bytes)
|
|
358
|
+
```
|
|
359
|
+
|
|
360
|
+
### 4. Web Scraper API (Async Tasks)
|
|
361
|
+
|
|
362
|
+
For complex scraping jobs that run asynchronously:
|
|
363
|
+
|
|
364
|
+
```python
|
|
365
|
+
from thordata import ThordataClient
|
|
366
|
+
|
|
367
|
+
client = ThordataClient(
|
|
368
|
+
scraper_token="your_token",
|
|
369
|
+
public_token="your_public_token",
|
|
370
|
+
public_key="your_public_key",
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
# Create a scraping task
|
|
374
|
+
task_id = client.create_scraper_task(
|
|
375
|
+
file_name="youtube_channel_data",
|
|
376
|
+
spider_id="youtube_video-post_by-url", # From Dashboard
|
|
377
|
+
spider_name="youtube.com",
|
|
378
|
+
parameters={
|
|
379
|
+
"url": "https://www.youtube.com/@PewDiePie/videos",
|
|
380
|
+
"num_of_posts": "50"
|
|
381
|
+
}
|
|
382
|
+
)
|
|
383
|
+
print(f"Task created: {task_id}")
|
|
384
|
+
|
|
385
|
+
# Wait for completion (with timeout)
|
|
386
|
+
status = client.wait_for_task(task_id, max_wait=300)
|
|
387
|
+
print(f"Task status: {status}")
|
|
388
|
+
|
|
389
|
+
# Get results
|
|
390
|
+
if status in ("ready", "success"):
|
|
391
|
+
download_url = client.get_task_result(task_id)
|
|
392
|
+
print(f"Download: {download_url}")
|
|
393
|
+
```
|
|
394
|
+
|
|
395
|
+
### 5. Async Client (High Concurrency)
|
|
396
|
+
|
|
397
|
+
For maximum performance with concurrent requests:
|
|
398
|
+
|
|
399
|
+
```python
|
|
400
|
+
import asyncio
|
|
401
|
+
from thordata import AsyncThordataClient
|
|
402
|
+
|
|
403
|
+
async def main():
|
|
404
|
+
async with AsyncThordataClient(
|
|
405
|
+
scraper_token="your_token",
|
|
406
|
+
public_token="your_public_token",
|
|
407
|
+
public_key="your_public_key",
|
|
408
|
+
) as client:
|
|
409
|
+
|
|
410
|
+
# Concurrent proxy requests
|
|
411
|
+
urls = [
|
|
412
|
+
"https://httpbin.org/ip",
|
|
413
|
+
"https://httpbin.org/headers",
|
|
414
|
+
"https://httpbin.org/user-agent",
|
|
415
|
+
]
|
|
416
|
+
|
|
417
|
+
tasks = [client.get(url) for url in urls]
|
|
418
|
+
responses = await asyncio.gather(*tasks)
|
|
419
|
+
|
|
420
|
+
for resp in responses:
|
|
421
|
+
print(await resp.json())
|
|
422
|
+
|
|
423
|
+
asyncio.run(main())
|
|
424
|
+
```
|
|
425
|
+
|
|
426
|
+
#### Async SERP Search
|
|
427
|
+
|
|
428
|
+
```python
|
|
429
|
+
import asyncio
|
|
430
|
+
from thordata import AsyncThordataClient, Engine
|
|
431
|
+
|
|
432
|
+
async def search_multiple():
|
|
433
|
+
async with AsyncThordataClient(scraper_token="your_token") as client:
|
|
434
|
+
queries = ["python", "javascript", "rust", "go"]
|
|
435
|
+
|
|
436
|
+
tasks = [
|
|
437
|
+
client.serp_search(q, engine=Engine.GOOGLE)
|
|
438
|
+
for q in queries
|
|
439
|
+
]
|
|
440
|
+
|
|
441
|
+
results = await asyncio.gather(*tasks)
|
|
442
|
+
|
|
443
|
+
for query, result in zip(queries, results):
|
|
444
|
+
count = len(result.get("organic", []))
|
|
445
|
+
print(f"{query}: {count} results")
|
|
446
|
+
|
|
447
|
+
asyncio.run(search_multiple())
|
|
448
|
+
```
|
|
449
|
+
|
|
450
|
+
### 6. Location APIs
|
|
451
|
+
|
|
452
|
+
Discover available geo-targeting options:
|
|
453
|
+
|
|
454
|
+
```python
|
|
455
|
+
from thordata import ThordataClient, ProxyType
|
|
456
|
+
|
|
457
|
+
client = ThordataClient(
|
|
458
|
+
scraper_token="your_token",
|
|
459
|
+
public_token="your_public_token",
|
|
460
|
+
public_key="your_public_key",
|
|
461
|
+
)
|
|
462
|
+
|
|
463
|
+
# List all supported countries
|
|
464
|
+
countries = client.list_countries(proxy_type=ProxyType.RESIDENTIAL)
|
|
465
|
+
print(f"Supported countries: {len(countries)}")
|
|
466
|
+
|
|
467
|
+
# List states for a country
|
|
468
|
+
states = client.list_states("US")
|
|
469
|
+
for state in states[:5]:
|
|
470
|
+
print(f" {state['state_code']}: {state['state_name']}")
|
|
471
|
+
|
|
472
|
+
# List cities
|
|
473
|
+
cities = client.list_cities("US", state_code="california")
|
|
474
|
+
print(f"Cities in California: {len(cities)}")
|
|
475
|
+
|
|
476
|
+
# List ASNs (for ISP targeting)
|
|
477
|
+
asns = client.list_asn("US")
|
|
478
|
+
for asn in asns[:5]:
|
|
479
|
+
print(f" {asn['asn_code']}: {asn['asn_name']}")
|
|
480
|
+
```
|
|
481
|
+
|
|
482
|
+
### 7. Error Handling
|
|
483
|
+
|
|
484
|
+
```python
|
|
485
|
+
from thordata import (
|
|
486
|
+
ThordataClient,
|
|
487
|
+
ThordataError,
|
|
488
|
+
ThordataAuthError,
|
|
489
|
+
ThordataRateLimitError,
|
|
490
|
+
ThordataNetworkError,
|
|
491
|
+
ThordataTimeoutError,
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
client = ThordataClient(scraper_token="your_token")
|
|
495
|
+
|
|
496
|
+
try:
|
|
497
|
+
result = client.serp_search("test query")
|
|
498
|
+
except ThordataAuthError as e:
|
|
499
|
+
print(f"Authentication failed: {e}")
|
|
500
|
+
print(f"Check your token. Status code: {e.status_code}")
|
|
501
|
+
except ThordataRateLimitError as e:
|
|
502
|
+
print(f"Rate limited: {e}")
|
|
503
|
+
if e.retry_after:
|
|
504
|
+
print(f"Retry after {e.retry_after} seconds")
|
|
505
|
+
except ThordataTimeoutError as e:
|
|
506
|
+
print(f"Request timed out: {e}")
|
|
507
|
+
except ThordataNetworkError as e:
|
|
508
|
+
print(f"Network error: {e}")
|
|
509
|
+
except ThordataError as e:
|
|
510
|
+
print(f"General error: {e}")
|
|
511
|
+
```
|
|
512
|
+
|
|
513
|
+
### 8. Retry Configuration
|
|
514
|
+
|
|
515
|
+
Customize automatic retry behavior:
|
|
516
|
+
|
|
517
|
+
```python
|
|
518
|
+
from thordata import ThordataClient, RetryConfig
|
|
519
|
+
|
|
520
|
+
# Custom retry configuration
|
|
521
|
+
retry_config = RetryConfig(
|
|
522
|
+
max_retries=5, # Maximum retry attempts
|
|
523
|
+
backoff_factor=2.0, # Exponential backoff multiplier
|
|
524
|
+
max_backoff=120.0, # Maximum wait between retries
|
|
525
|
+
jitter=True, # Add randomness to prevent thundering herd
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
client = ThordataClient(
|
|
529
|
+
scraper_token="your_token",
|
|
530
|
+
retry_config=retry_config,
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
# Requests will automatically retry on transient failures
|
|
534
|
+
response = client.get("https://example.com")
|
|
535
|
+
```
|
|
536
|
+
|
|
537
|
+
---
|
|
538
|
+
|
|
539
|
+
## 🔧 Configuration Reference
|
|
540
|
+
|
|
541
|
+
### ThordataClient Parameters
|
|
542
|
+
|
|
543
|
+
| Parameter | Type | Default | Description |
|
|
544
|
+
|-----------|------|---------|-------------|
|
|
545
|
+
| scraper_token | str | required | API token from Dashboard |
|
|
546
|
+
| public_token | str | None | Public API token (for tasks/locations) |
|
|
547
|
+
| public_key | str | None | Public API key |
|
|
548
|
+
| proxy_host | str | "pr.thordata.net" | Proxy gateway host |
|
|
549
|
+
| proxy_port | int | 9999 | Proxy gateway port |
|
|
550
|
+
| timeout | int | 30 | Default request timeout (seconds) |
|
|
551
|
+
| retry_config | RetryConfig | None | Retry configuration |
|
|
552
|
+
|
|
553
|
+
### ProxyConfig Parameters
|
|
554
|
+
|
|
555
|
+
| Parameter | Type | Default | Description |
|
|
556
|
+
|-----------|------|---------|-------------|
|
|
557
|
+
| username | str | required | Proxy username |
|
|
558
|
+
| password | str | required | Proxy password |
|
|
559
|
+
| product | ProxyProduct | RESIDENTIAL | Proxy type |
|
|
560
|
+
| country | str | None | ISO 3166-1 alpha-2 code |
|
|
561
|
+
| state | str | None | State name (lowercase) |
|
|
562
|
+
| city | str | None | City name (lowercase) |
|
|
563
|
+
| continent | str | None | Continent code (af/an/as/eu/na/oc/sa) |
|
|
564
|
+
| asn | str | None | ASN code (requires country) |
|
|
565
|
+
| session_id | str | None | Session ID for sticky sessions |
|
|
566
|
+
| session_duration | int | None | Session duration (1-90 minutes) |
|
|
567
|
+
|
|
568
|
+
### Proxy Products & Ports
|
|
569
|
+
|
|
570
|
+
| Product | Port | Description |
|
|
571
|
+
|---------|------|-------------|
|
|
572
|
+
| RESIDENTIAL | 9999 | Rotating residential IPs |
|
|
573
|
+
| MOBILE | 5555 | Mobile carrier IPs |
|
|
574
|
+
| DATACENTER | 7777 | Datacenter IPs |
|
|
575
|
+
| ISP | 6666 | Static ISP IPs |
|
|
576
|
+
|
|
577
|
+
---
|
|
578
|
+
|
|
579
|
+
## 📁 Project Structure
|
|
580
|
+
|
|
581
|
+
```
|
|
582
|
+
thordata-python-sdk/
|
|
583
|
+
├── src/thordata/
|
|
584
|
+
│ ├── __init__.py # Public API exports
|
|
585
|
+
│ ├── client.py # Sync client
|
|
586
|
+
│ ├── async_client.py # Async client
|
|
587
|
+
│ ├── models.py # Data models (ProxyConfig, SerpRequest, etc.)
|
|
588
|
+
│ ├── enums.py # Enumerations
|
|
589
|
+
│ ├── exceptions.py # Exception hierarchy
|
|
590
|
+
│ ├── retry.py # Retry mechanism
|
|
591
|
+
│ └── _utils.py # Internal utilities
|
|
592
|
+
├── tests/ # Test suite
|
|
593
|
+
├── examples/ # Usage examples
|
|
594
|
+
├── pyproject.toml # Package configuration
|
|
595
|
+
└── README.md
|
|
596
|
+
```
|
|
597
|
+
|
|
598
|
+
---
|
|
599
|
+
|
|
600
|
+
## 🧪 Development
|
|
601
|
+
|
|
602
|
+
### Setup
|
|
603
|
+
|
|
604
|
+
```bash
|
|
605
|
+
# Clone the repository
|
|
606
|
+
git clone https://github.com/Thordata/thordata-python-sdk.git
|
|
607
|
+
cd thordata-python-sdk
|
|
608
|
+
|
|
609
|
+
# Create virtual environment
|
|
610
|
+
python -m venv venv
|
|
611
|
+
source venv/bin/activate # On Windows: venv\Scripts\activate
|
|
612
|
+
|
|
613
|
+
# Install with dev dependencies
|
|
614
|
+
pip install -e ".[dev]"
|
|
615
|
+
```
|
|
616
|
+
|
|
617
|
+
### Run Tests
|
|
618
|
+
|
|
619
|
+
```bash
|
|
620
|
+
# Run all tests
|
|
621
|
+
pytest
|
|
622
|
+
|
|
623
|
+
# Run with coverage
|
|
624
|
+
pytest --cov=thordata --cov-report=html
|
|
625
|
+
|
|
626
|
+
# Run specific test file
|
|
627
|
+
pytest tests/test_client.py -v
|
|
628
|
+
```
|
|
629
|
+
|
|
630
|
+
### Code Quality
|
|
631
|
+
|
|
632
|
+
```bash
|
|
633
|
+
# Format code
|
|
634
|
+
black src tests
|
|
635
|
+
|
|
636
|
+
# Lint
|
|
637
|
+
ruff check src tests
|
|
638
|
+
|
|
639
|
+
# Type check
|
|
640
|
+
mypy src
|
|
641
|
+
```
|
|
642
|
+
|
|
643
|
+
---
|
|
644
|
+
|
|
645
|
+
## 📝 Changelog
|
|
646
|
+
|
|
647
|
+
See [CHANGELOG.md](CHANGELOG.md) for version history.
|
|
648
|
+
|
|
649
|
+
---
|
|
650
|
+
|
|
651
|
+
## 🤝 Contributing
|
|
652
|
+
|
|
653
|
+
Contributions are welcome! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
|
|
654
|
+
|
|
655
|
+
1. Fork the repository
|
|
656
|
+
2. Create a feature branch (`git checkout -b feature/amazing-feature`)
|
|
657
|
+
3. Commit your changes (`git commit -m 'Add amazing feature'`)
|
|
658
|
+
4. Push to the branch (`git push origin feature/amazing-feature`)
|
|
659
|
+
5. Open a Pull Request
|
|
660
|
+
|
|
661
|
+
---
|
|
662
|
+
|
|
663
|
+
## 📄 License
|
|
664
|
+
|
|
665
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
666
|
+
|
|
667
|
+
---
|
|
668
|
+
|
|
669
|
+
## 🆘 Support
|
|
670
|
+
|
|
671
|
+
- 📧 **Email**: support@thordata.com
|
|
672
|
+
- 📚 **Documentation**: [doc.thordata.com](https://doc.thordata.com)
|
|
673
|
+
- 🐛 **Issues**: [GitHub Issues](https://github.com/Thordata/thordata-python-sdk/issues)
|
|
674
|
+
- 💬 **Dashboard**: [thordata.com](https://www.thordata.com)
|
|
675
|
+
|
|
676
|
+
<div align="center">
|
|
677
|
+
<sub>Built with ❤️ by the Thordata Team</sub>
|
|
678
|
+
</div>
|