thordata-sdk 0.6.0__tar.gz → 0.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thordata_sdk-0.8.0/PKG-INFO +212 -0
- thordata_sdk-0.8.0/README.md +169 -0
- {thordata_sdk-0.6.0 → thordata_sdk-0.8.0}/pyproject.toml +2 -1
- {thordata_sdk-0.6.0 → thordata_sdk-0.8.0}/src/thordata/__init__.py +15 -1
- {thordata_sdk-0.6.0 → thordata_sdk-0.8.0}/src/thordata/_utils.py +66 -3
- thordata_sdk-0.8.0/src/thordata/async_client.py +1594 -0
- {thordata_sdk-0.6.0 → thordata_sdk-0.8.0}/src/thordata/client.py +851 -33
- {thordata_sdk-0.6.0 → thordata_sdk-0.8.0}/src/thordata/enums.py +85 -16
- {thordata_sdk-0.6.0 → thordata_sdk-0.8.0}/src/thordata/exceptions.py +16 -5
- {thordata_sdk-0.6.0 → thordata_sdk-0.8.0}/src/thordata/models.py +294 -0
- {thordata_sdk-0.6.0 → thordata_sdk-0.8.0}/src/thordata/retry.py +4 -1
- thordata_sdk-0.8.0/src/thordata_sdk.egg-info/PKG-INFO +212 -0
- {thordata_sdk-0.6.0 → thordata_sdk-0.8.0}/src/thordata_sdk.egg-info/SOURCES.txt +2 -6
- {thordata_sdk-0.6.0 → thordata_sdk-0.8.0}/src/thordata_sdk.egg-info/requires.txt +1 -0
- {thordata_sdk-0.6.0 → thordata_sdk-0.8.0}/tests/test_async_client_errors.py +23 -27
- {thordata_sdk-0.6.0 → thordata_sdk-0.8.0}/tests/test_client.py +34 -2
- {thordata_sdk-0.6.0 → thordata_sdk-0.8.0}/tests/test_client_errors.py +18 -20
- thordata_sdk-0.8.0/tests/test_examples.py +168 -0
- thordata_sdk-0.8.0/tests/test_spec_parity.py +58 -0
- thordata_sdk-0.6.0/PKG-INFO +0 -1053
- thordata_sdk-0.6.0/README.md +0 -1011
- thordata_sdk-0.6.0/src/thordata/async_client.py +0 -815
- thordata_sdk-0.6.0/src/thordata/parameters.py +0 -53
- thordata_sdk-0.6.0/src/thordata_sdk.egg-info/PKG-INFO +0 -1053
- thordata_sdk-0.6.0/tests/test_demo_entrypoint.py +0 -34
- thordata_sdk-0.6.0/tests/test_examples_async_high_concurrency.py +0 -42
- thordata_sdk-0.6.0/tests/test_examples_demo_serp_api.py +0 -69
- thordata_sdk-0.6.0/tests/test_examples_demo_universal.py +0 -61
- thordata_sdk-0.6.0/tests/test_examples_demo_web_scraper_api.py +0 -54
- {thordata_sdk-0.6.0 → thordata_sdk-0.8.0}/LICENSE +0 -0
- {thordata_sdk-0.6.0 → thordata_sdk-0.8.0}/setup.cfg +0 -0
- {thordata_sdk-0.6.0 → thordata_sdk-0.8.0}/src/thordata/demo.py +0 -0
- {thordata_sdk-0.6.0 → thordata_sdk-0.8.0}/src/thordata_sdk.egg-info/dependency_links.txt +0 -0
- {thordata_sdk-0.6.0 → thordata_sdk-0.8.0}/src/thordata_sdk.egg-info/top_level.txt +0 -0
- {thordata_sdk-0.6.0 → thordata_sdk-0.8.0}/tests/test_async_client.py +0 -0
- {thordata_sdk-0.6.0 → thordata_sdk-0.8.0}/tests/test_enums.py +0 -0
- {thordata_sdk-0.6.0 → thordata_sdk-0.8.0}/tests/test_exceptions.py +0 -0
- {thordata_sdk-0.6.0 → thordata_sdk-0.8.0}/tests/test_models.py +0 -0
- {thordata_sdk-0.6.0 → thordata_sdk-0.8.0}/tests/test_task_status_and_wait.py +0 -0
- {thordata_sdk-0.6.0 → thordata_sdk-0.8.0}/tests/test_user_agent.py +0 -0
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: thordata-sdk
|
|
3
|
+
Version: 0.8.0
|
|
4
|
+
Summary: The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network.
|
|
5
|
+
Author-email: Thordata Developer Team <support@thordata.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://www.thordata.com
|
|
8
|
+
Project-URL: Documentation, https://github.com/Thordata/thordata-python-sdk#readme
|
|
9
|
+
Project-URL: Source, https://github.com/Thordata/thordata-python-sdk
|
|
10
|
+
Project-URL: Tracker, https://github.com/Thordata/thordata-python-sdk/issues
|
|
11
|
+
Project-URL: Changelog, https://github.com/Thordata/thordata-python-sdk/blob/main/CHANGELOG.md
|
|
12
|
+
Keywords: web scraping,proxy,residential proxy,datacenter proxy,ai,llm,data-mining,serp,thordata,web scraper,anti-bot bypass
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
16
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
17
|
+
Classifier: Topic :: Internet :: Proxy Servers
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
23
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
24
|
+
Classifier: Operating System :: OS Independent
|
|
25
|
+
Classifier: Typing :: Typed
|
|
26
|
+
Requires-Python: >=3.9
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
License-File: LICENSE
|
|
29
|
+
Requires-Dist: requests>=2.25.0
|
|
30
|
+
Requires-Dist: aiohttp>=3.9.0
|
|
31
|
+
Provides-Extra: dev
|
|
32
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
33
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
34
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
35
|
+
Requires-Dist: pytest-httpserver>=1.0.0; extra == "dev"
|
|
36
|
+
Requires-Dist: python-dotenv>=1.0.0; extra == "dev"
|
|
37
|
+
Requires-Dist: black>=23.0.0; extra == "dev"
|
|
38
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
39
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
40
|
+
Requires-Dist: types-requests>=2.28.0; extra == "dev"
|
|
41
|
+
Requires-Dist: aioresponses>=0.7.6; extra == "dev"
|
|
42
|
+
Dynamic: license-file
|
|
43
|
+
|
|
44
|
+
# Thordata Python SDK
|
|
45
|
+
|
|
46
|
+
<div align="center">
|
|
47
|
+
|
|
48
|
+
**Official Python client for Thordata's Proxy Network, SERP API, Web Unlocker, and Web Scraper API.**
|
|
49
|
+
|
|
50
|
+
*Async-ready, type-safe, built for AI agents and large-scale data collection.*
|
|
51
|
+
|
|
52
|
+
[](https://pypi.org/project/thordata-sdk/)
|
|
53
|
+
[](https://python.org)
|
|
54
|
+
[](LICENSE)
|
|
55
|
+
|
|
56
|
+
[Documentation](https://doc.thordata.com) • [Dashboard](https://www.thordata.com) • [Examples](examples/)
|
|
57
|
+
|
|
58
|
+
</div>
|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
## ✨ Features
|
|
63
|
+
|
|
64
|
+
- 🌐 **Proxy Network**: Residential, Mobile, Datacenter, ISP proxies with geo-targeting
|
|
65
|
+
- 🔍 **SERP API**: Google, Bing, Yandex, DuckDuckGo search results
|
|
66
|
+
- 🔓 **Web Unlocker**: Bypass Cloudflare, CAPTCHAs, anti-bot systems
|
|
67
|
+
- 🕷️ **Web Scraper API**: Async task-based scraping (Text & Video/Audio)
|
|
68
|
+
- 📊 **Account Management**: Usage stats, sub-users, IP whitelist
|
|
69
|
+
- ⚡ **Async Support**: Full async/await support with aiohttp
|
|
70
|
+
- 🔄 **Auto Retry**: Configurable retry with exponential backoff
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
## 📦 Installation
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
pip install thordata-sdk
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
## 🔐 Configuration
|
|
83
|
+
|
|
84
|
+
Set environment variables:
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
# Required for Scraper APIs (SERP, Universal, Tasks)
|
|
88
|
+
export THORDATA_SCRAPER_TOKEN=your_token
|
|
89
|
+
|
|
90
|
+
# Required for Public/Location APIs (Dashboard -> My Account)
|
|
91
|
+
export THORDATA_PUBLIC_TOKEN=your_public_token
|
|
92
|
+
export THORDATA_PUBLIC_KEY=your_public_key
|
|
93
|
+
|
|
94
|
+
# Required for Public API NEW (Dashboard -> Public API NEW)
|
|
95
|
+
# If not set, SDK falls back to PUBLIC_TOKEN/KEY
|
|
96
|
+
export THORDATA_SIGN=your_sign
|
|
97
|
+
export THORDATA_API_KEY=your_api_key
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
---
|
|
101
|
+
|
|
102
|
+
## 🚀 Quick Start
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
from thordata import ThordataClient, Engine
|
|
106
|
+
|
|
107
|
+
# Initialize (reads from env vars)
|
|
108
|
+
client = ThordataClient(
|
|
109
|
+
scraper_token="your_token",
|
|
110
|
+
public_token="pub_token",
|
|
111
|
+
public_key="pub_key"
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
# SERP Search
|
|
115
|
+
results = client.serp_search("python tutorial", engine=Engine.GOOGLE)
|
|
116
|
+
print(f"Found {len(results.get('organic', []))} results")
|
|
117
|
+
|
|
118
|
+
# Universal Scrape
|
|
119
|
+
html = client.universal_scrape("https://httpbin.org/html")
|
|
120
|
+
print(html[:100])
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
---
|
|
124
|
+
|
|
125
|
+
## 📖 Feature Guide
|
|
126
|
+
|
|
127
|
+
### SERP API
|
|
128
|
+
|
|
129
|
+
```python
|
|
130
|
+
from thordata import SerpRequest
|
|
131
|
+
|
|
132
|
+
# Advanced search
|
|
133
|
+
results = client.serp_search_advanced(SerpRequest(
|
|
134
|
+
query="pizza",
|
|
135
|
+
engine="google_local",
|
|
136
|
+
country="us",
|
|
137
|
+
location="New York",
|
|
138
|
+
num=10
|
|
139
|
+
))
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
### Web Scraper API (Async Tasks)
|
|
143
|
+
|
|
144
|
+
**Create Task:**
|
|
145
|
+
```python
|
|
146
|
+
task_id = client.create_scraper_task(
|
|
147
|
+
file_name="my_task",
|
|
148
|
+
spider_id="universal",
|
|
149
|
+
spider_name="universal",
|
|
150
|
+
parameters={"url": "https://example.com"}
|
|
151
|
+
)
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
**Video Download (New):**
|
|
155
|
+
```python
|
|
156
|
+
from thordata import CommonSettings
|
|
157
|
+
|
|
158
|
+
task_id = client.create_video_task(
|
|
159
|
+
file_name="{{VideoID}}",
|
|
160
|
+
spider_id="youtube_video_by-url",
|
|
161
|
+
spider_name="youtube.com",
|
|
162
|
+
parameters={"url": "https://youtube.com/watch?v=..."},
|
|
163
|
+
common_settings=CommonSettings(resolution="1080p")
|
|
164
|
+
)
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
**Wait & Download:**
|
|
168
|
+
```python
|
|
169
|
+
status = client.wait_for_task(task_id)
|
|
170
|
+
if status == "ready":
|
|
171
|
+
url = client.get_task_result(task_id)
|
|
172
|
+
print(url)
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### Account Management
|
|
176
|
+
|
|
177
|
+
```python
|
|
178
|
+
# Usage Statistics
|
|
179
|
+
stats = client.get_usage_statistics("2024-01-01", "2024-01-31")
|
|
180
|
+
print(f"Balance: {stats.balance_gb():.2f} GB")
|
|
181
|
+
|
|
182
|
+
# Proxy Users
|
|
183
|
+
users = client.list_proxy_users()
|
|
184
|
+
print(f"Sub-users: {users.user_count}")
|
|
185
|
+
|
|
186
|
+
# Whitelist IP
|
|
187
|
+
client.add_whitelist_ip("1.2.3.4")
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
### Proxy Network
|
|
191
|
+
|
|
192
|
+
```python
|
|
193
|
+
from thordata import ProxyConfig
|
|
194
|
+
|
|
195
|
+
# Generate Proxy URL
|
|
196
|
+
proxy_url = client.build_proxy_url(
|
|
197
|
+
username="proxy_user",
|
|
198
|
+
password="proxy_pass",
|
|
199
|
+
country="us",
|
|
200
|
+
city="ny"
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
# Use with requests
|
|
204
|
+
import requests
|
|
205
|
+
requests.get("https://httpbin.org/ip", proxies={"http": proxy_url, "https": proxy_url})
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
---
|
|
209
|
+
|
|
210
|
+
## 📄 License
|
|
211
|
+
|
|
212
|
+
MIT License
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
# Thordata Python SDK
|
|
2
|
+
|
|
3
|
+
<div align="center">
|
|
4
|
+
|
|
5
|
+
**Official Python client for Thordata's Proxy Network, SERP API, Web Unlocker, and Web Scraper API.**
|
|
6
|
+
|
|
7
|
+
*Async-ready, type-safe, built for AI agents and large-scale data collection.*
|
|
8
|
+
|
|
9
|
+
[](https://pypi.org/project/thordata-sdk/)
|
|
10
|
+
[](https://python.org)
|
|
11
|
+
[](LICENSE)
|
|
12
|
+
|
|
13
|
+
[Documentation](https://doc.thordata.com) • [Dashboard](https://www.thordata.com) • [Examples](examples/)
|
|
14
|
+
|
|
15
|
+
</div>
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## ✨ Features
|
|
20
|
+
|
|
21
|
+
- 🌐 **Proxy Network**: Residential, Mobile, Datacenter, ISP proxies with geo-targeting
|
|
22
|
+
- 🔍 **SERP API**: Google, Bing, Yandex, DuckDuckGo search results
|
|
23
|
+
- 🔓 **Web Unlocker**: Bypass Cloudflare, CAPTCHAs, anti-bot systems
|
|
24
|
+
- 🕷️ **Web Scraper API**: Async task-based scraping (Text & Video/Audio)
|
|
25
|
+
- 📊 **Account Management**: Usage stats, sub-users, IP whitelist
|
|
26
|
+
- ⚡ **Async Support**: Full async/await support with aiohttp
|
|
27
|
+
- 🔄 **Auto Retry**: Configurable retry with exponential backoff
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## 📦 Installation
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
pip install thordata-sdk
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## 🔐 Configuration
|
|
40
|
+
|
|
41
|
+
Set environment variables:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
# Required for Scraper APIs (SERP, Universal, Tasks)
|
|
45
|
+
export THORDATA_SCRAPER_TOKEN=your_token
|
|
46
|
+
|
|
47
|
+
# Required for Public/Location APIs (Dashboard -> My Account)
|
|
48
|
+
export THORDATA_PUBLIC_TOKEN=your_public_token
|
|
49
|
+
export THORDATA_PUBLIC_KEY=your_public_key
|
|
50
|
+
|
|
51
|
+
# Required for Public API NEW (Dashboard -> Public API NEW)
|
|
52
|
+
# If not set, SDK falls back to PUBLIC_TOKEN/KEY
|
|
53
|
+
export THORDATA_SIGN=your_sign
|
|
54
|
+
export THORDATA_API_KEY=your_api_key
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
## 🚀 Quick Start
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
from thordata import ThordataClient, Engine
|
|
63
|
+
|
|
64
|
+
# Initialize (reads from env vars)
|
|
65
|
+
client = ThordataClient(
|
|
66
|
+
scraper_token="your_token",
|
|
67
|
+
public_token="pub_token",
|
|
68
|
+
public_key="pub_key"
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
# SERP Search
|
|
72
|
+
results = client.serp_search("python tutorial", engine=Engine.GOOGLE)
|
|
73
|
+
print(f"Found {len(results.get('organic', []))} results")
|
|
74
|
+
|
|
75
|
+
# Universal Scrape
|
|
76
|
+
html = client.universal_scrape("https://httpbin.org/html")
|
|
77
|
+
print(html[:100])
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
## 📖 Feature Guide
|
|
83
|
+
|
|
84
|
+
### SERP API
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
from thordata import SerpRequest
|
|
88
|
+
|
|
89
|
+
# Advanced search
|
|
90
|
+
results = client.serp_search_advanced(SerpRequest(
|
|
91
|
+
query="pizza",
|
|
92
|
+
engine="google_local",
|
|
93
|
+
country="us",
|
|
94
|
+
location="New York",
|
|
95
|
+
num=10
|
|
96
|
+
))
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### Web Scraper API (Async Tasks)
|
|
100
|
+
|
|
101
|
+
**Create Task:**
|
|
102
|
+
```python
|
|
103
|
+
task_id = client.create_scraper_task(
|
|
104
|
+
file_name="my_task",
|
|
105
|
+
spider_id="universal",
|
|
106
|
+
spider_name="universal",
|
|
107
|
+
parameters={"url": "https://example.com"}
|
|
108
|
+
)
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
**Video Download (New):**
|
|
112
|
+
```python
|
|
113
|
+
from thordata import CommonSettings
|
|
114
|
+
|
|
115
|
+
task_id = client.create_video_task(
|
|
116
|
+
file_name="{{VideoID}}",
|
|
117
|
+
spider_id="youtube_video_by-url",
|
|
118
|
+
spider_name="youtube.com",
|
|
119
|
+
parameters={"url": "https://youtube.com/watch?v=..."},
|
|
120
|
+
common_settings=CommonSettings(resolution="1080p")
|
|
121
|
+
)
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
**Wait & Download:**
|
|
125
|
+
```python
|
|
126
|
+
status = client.wait_for_task(task_id)
|
|
127
|
+
if status == "ready":
|
|
128
|
+
url = client.get_task_result(task_id)
|
|
129
|
+
print(url)
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
### Account Management
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
# Usage Statistics
|
|
136
|
+
stats = client.get_usage_statistics("2024-01-01", "2024-01-31")
|
|
137
|
+
print(f"Balance: {stats.balance_gb():.2f} GB")
|
|
138
|
+
|
|
139
|
+
# Proxy Users
|
|
140
|
+
users = client.list_proxy_users()
|
|
141
|
+
print(f"Sub-users: {users.user_count}")
|
|
142
|
+
|
|
143
|
+
# Whitelist IP
|
|
144
|
+
client.add_whitelist_ip("1.2.3.4")
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
### Proxy Network
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
from thordata import ProxyConfig
|
|
151
|
+
|
|
152
|
+
# Generate Proxy URL
|
|
153
|
+
proxy_url = client.build_proxy_url(
|
|
154
|
+
username="proxy_user",
|
|
155
|
+
password="proxy_pass",
|
|
156
|
+
country="us",
|
|
157
|
+
city="ny"
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# Use with requests
|
|
161
|
+
import requests
|
|
162
|
+
requests.get("https://httpbin.org/ip", proxies={"http": proxy_url, "https": proxy_url})
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
---
|
|
166
|
+
|
|
167
|
+
## 📄 License
|
|
168
|
+
|
|
169
|
+
MIT License
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "thordata-sdk"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.8.0"
|
|
8
8
|
description = "The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.9"
|
|
@@ -57,6 +57,7 @@ dev = [
|
|
|
57
57
|
"ruff>=0.1.0",
|
|
58
58
|
"mypy>=1.0.0",
|
|
59
59
|
"types-requests>=2.28.0",
|
|
60
|
+
"aioresponses>=0.7.6",
|
|
60
61
|
]
|
|
61
62
|
|
|
62
63
|
[project.urls]
|
|
@@ -35,7 +35,7 @@ Async Usage:
|
|
|
35
35
|
>>> asyncio.run(main())
|
|
36
36
|
"""
|
|
37
37
|
|
|
38
|
-
__version__ = "0.
|
|
38
|
+
__version__ = "0.8.0"
|
|
39
39
|
__author__ = "Thordata Developer Team"
|
|
40
40
|
__email__ = "support@thordata.com"
|
|
41
41
|
|
|
@@ -52,6 +52,7 @@ from .enums import (
|
|
|
52
52
|
Device,
|
|
53
53
|
Engine,
|
|
54
54
|
GoogleSearchType,
|
|
55
|
+
GoogleTbm,
|
|
55
56
|
OutputFormat,
|
|
56
57
|
ProxyHost,
|
|
57
58
|
ProxyPort,
|
|
@@ -77,14 +78,20 @@ from .exceptions import (
|
|
|
77
78
|
|
|
78
79
|
# Models
|
|
79
80
|
from .models import (
|
|
81
|
+
CommonSettings,
|
|
80
82
|
ProxyConfig,
|
|
81
83
|
ProxyProduct,
|
|
84
|
+
ProxyServer,
|
|
85
|
+
ProxyUser,
|
|
86
|
+
ProxyUserList,
|
|
82
87
|
ScraperTaskConfig,
|
|
83
88
|
SerpRequest,
|
|
84
89
|
StaticISPProxy,
|
|
85
90
|
StickySession,
|
|
86
91
|
TaskStatusResponse,
|
|
87
92
|
UniversalScrapeRequest,
|
|
93
|
+
UsageStatistics,
|
|
94
|
+
VideoTaskConfig,
|
|
88
95
|
)
|
|
89
96
|
|
|
90
97
|
# Retry utilities
|
|
@@ -112,14 +119,21 @@ __all__ = [
|
|
|
112
119
|
"TimeRange",
|
|
113
120
|
"ProxyHost",
|
|
114
121
|
"ProxyPort",
|
|
122
|
+
"GoogleTbm",
|
|
115
123
|
# Models
|
|
116
124
|
"ProxyConfig",
|
|
117
125
|
"ProxyProduct",
|
|
126
|
+
"ProxyServer",
|
|
127
|
+
"ProxyUser",
|
|
128
|
+
"ProxyUserList",
|
|
129
|
+
"UsageStatistics",
|
|
118
130
|
"StaticISPProxy",
|
|
119
131
|
"StickySession",
|
|
120
132
|
"SerpRequest",
|
|
121
133
|
"UniversalScrapeRequest",
|
|
122
134
|
"ScraperTaskConfig",
|
|
135
|
+
"CommonSettings",
|
|
136
|
+
"VideoTaskConfig",
|
|
123
137
|
"TaskStatusResponse",
|
|
124
138
|
# Exceptions
|
|
125
139
|
"ThordataError",
|
|
@@ -70,18 +70,61 @@ def decode_base64_image(png_str: str) -> bytes:
|
|
|
70
70
|
raise ValueError(f"Failed to decode base64 image: {e}") from e
|
|
71
71
|
|
|
72
72
|
|
|
73
|
-
def build_auth_headers(token: str) -> Dict[str, str]:
|
|
73
|
+
def build_auth_headers(token: str, mode: str = "bearer") -> Dict[str, str]:
|
|
74
74
|
"""
|
|
75
75
|
Build authorization headers for API requests.
|
|
76
76
|
|
|
77
|
+
Supports two modes:
|
|
78
|
+
- bearer: Authorization: Bearer <token> (Thordata Docs examples)
|
|
79
|
+
- header_token: token: <token> (Interface documentation)
|
|
80
|
+
|
|
77
81
|
Args:
|
|
78
82
|
token: The scraper token.
|
|
83
|
+
mode: Authentication mode ("bearer" or "header_token").
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Headers dict with Authorization/token and Content-Type.
|
|
87
|
+
"""
|
|
88
|
+
headers = {
|
|
89
|
+
"Content-Type": "application/x-www-form-urlencoded",
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
if mode == "bearer":
|
|
93
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
94
|
+
elif mode == "header_token":
|
|
95
|
+
headers["token"] = token
|
|
96
|
+
else:
|
|
97
|
+
# Fallback to bearer for compatibility
|
|
98
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
99
|
+
|
|
100
|
+
return headers
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def build_builder_headers(
|
|
104
|
+
scraper_token: str,
|
|
105
|
+
public_token: str,
|
|
106
|
+
public_key: str,
|
|
107
|
+
) -> Dict[str, str]:
|
|
108
|
+
"""
|
|
109
|
+
Build headers for Web Scraper builder API.
|
|
110
|
+
|
|
111
|
+
Builder requires THREE auth headers per official docs:
|
|
112
|
+
- token: public token
|
|
113
|
+
- key: public key
|
|
114
|
+
- Authorization: Bearer scraper_token
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
scraper_token: The scraper API token.
|
|
118
|
+
public_token: The public API token.
|
|
119
|
+
public_key: The public API key.
|
|
79
120
|
|
|
80
121
|
Returns:
|
|
81
|
-
Headers dict with
|
|
122
|
+
Headers dict with all required auth headers.
|
|
82
123
|
"""
|
|
83
124
|
return {
|
|
84
|
-
"
|
|
125
|
+
"token": public_token,
|
|
126
|
+
"key": public_key,
|
|
127
|
+
"Authorization": f"Bearer {scraper_token}",
|
|
85
128
|
"Content-Type": "application/x-www-form-urlencoded",
|
|
86
129
|
}
|
|
87
130
|
|
|
@@ -142,3 +185,23 @@ def build_user_agent(sdk_version: str, http_client: str) -> str:
|
|
|
142
185
|
py = platform.python_version()
|
|
143
186
|
system = platform.system()
|
|
144
187
|
return f"thordata-python-sdk/{sdk_version} (python {py}; {system}; {http_client})"
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def build_sign_headers(sign: str, api_key: str) -> Dict[str, str]:
|
|
191
|
+
"""
|
|
192
|
+
Build headers for Public API NEW (sign + apiKey authentication).
|
|
193
|
+
|
|
194
|
+
This is a different authentication system from token+key.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
sign: The sign value from Dashboard (immutable).
|
|
198
|
+
api_key: The apiKey value from Dashboard (can be changed).
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
Headers dict with sign, apiKey, and Content-Type.
|
|
202
|
+
"""
|
|
203
|
+
return {
|
|
204
|
+
"sign": sign,
|
|
205
|
+
"apiKey": api_key,
|
|
206
|
+
"Content-Type": "application/x-www-form-urlencoded",
|
|
207
|
+
}
|