thordata-sdk 1.1.0__tar.gz → 1.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thordata_sdk-1.2.0/PKG-INFO +208 -0
- thordata_sdk-1.2.0/README.md +164 -0
- {thordata_sdk-1.1.0 → thordata_sdk-1.2.0}/pyproject.toml +5 -2
- {thordata_sdk-1.1.0 → thordata_sdk-1.2.0}/src/thordata/__init__.py +1 -1
- {thordata_sdk-1.1.0 → thordata_sdk-1.2.0}/src/thordata/async_client.py +26 -7
- {thordata_sdk-1.1.0 → thordata_sdk-1.2.0}/src/thordata/client.py +715 -48
- {thordata_sdk-1.1.0 → thordata_sdk-1.2.0}/src/thordata/models.py +35 -19
- thordata_sdk-1.2.0/src/thordata/serp_engines.py +166 -0
- thordata_sdk-1.2.0/src/thordata_sdk.egg-info/PKG-INFO +208 -0
- {thordata_sdk-1.1.0 → thordata_sdk-1.2.0}/src/thordata_sdk.egg-info/SOURCES.txt +2 -0
- {thordata_sdk-1.1.0 → thordata_sdk-1.2.0}/src/thordata_sdk.egg-info/requires.txt +1 -0
- {thordata_sdk-1.1.0 → thordata_sdk-1.2.0}/tests/test_async_client.py +14 -0
- {thordata_sdk-1.1.0 → thordata_sdk-1.2.0}/tests/test_async_client_errors.py +1 -5
- {thordata_sdk-1.1.0 → thordata_sdk-1.2.0}/tests/test_client.py +9 -2
- {thordata_sdk-1.1.0 → thordata_sdk-1.2.0}/tests/test_client_errors.py +5 -7
- thordata_sdk-1.2.0/tests/test_integration_proxy_protocols.py +113 -0
- {thordata_sdk-1.1.0 → thordata_sdk-1.2.0}/tests/test_task_status_and_wait.py +1 -5
- thordata_sdk-1.1.0/PKG-INFO +0 -271
- thordata_sdk-1.1.0/README.md +0 -228
- thordata_sdk-1.1.0/src/thordata_sdk.egg-info/PKG-INFO +0 -271
- {thordata_sdk-1.1.0 → thordata_sdk-1.2.0}/LICENSE +0 -0
- {thordata_sdk-1.1.0 → thordata_sdk-1.2.0}/setup.cfg +0 -0
- {thordata_sdk-1.1.0 → thordata_sdk-1.2.0}/src/thordata/_example_utils.py +0 -0
- {thordata_sdk-1.1.0 → thordata_sdk-1.2.0}/src/thordata/_utils.py +0 -0
- {thordata_sdk-1.1.0 → thordata_sdk-1.2.0}/src/thordata/demo.py +0 -0
- {thordata_sdk-1.1.0 → thordata_sdk-1.2.0}/src/thordata/enums.py +0 -0
- {thordata_sdk-1.1.0 → thordata_sdk-1.2.0}/src/thordata/exceptions.py +0 -0
- {thordata_sdk-1.1.0 → thordata_sdk-1.2.0}/src/thordata/retry.py +0 -0
- {thordata_sdk-1.1.0 → thordata_sdk-1.2.0}/src/thordata_sdk.egg-info/dependency_links.txt +0 -0
- {thordata_sdk-1.1.0 → thordata_sdk-1.2.0}/src/thordata_sdk.egg-info/top_level.txt +0 -0
- {thordata_sdk-1.1.0 → thordata_sdk-1.2.0}/tests/test_enums.py +0 -0
- {thordata_sdk-1.1.0 → thordata_sdk-1.2.0}/tests/test_examples.py +0 -0
- {thordata_sdk-1.1.0 → thordata_sdk-1.2.0}/tests/test_exceptions.py +0 -0
- {thordata_sdk-1.1.0 → thordata_sdk-1.2.0}/tests/test_models.py +0 -0
- {thordata_sdk-1.1.0 → thordata_sdk-1.2.0}/tests/test_spec_parity.py +0 -0
- {thordata_sdk-1.1.0 → thordata_sdk-1.2.0}/tests/test_user_agent.py +0 -0
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: thordata-sdk
|
|
3
|
+
Version: 1.2.0
|
|
4
|
+
Summary: The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network.
|
|
5
|
+
Author-email: Thordata Developer Team <support@thordata.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://www.thordata.com
|
|
8
|
+
Project-URL: Documentation, https://github.com/Thordata/thordata-python-sdk#readme
|
|
9
|
+
Project-URL: Source, https://github.com/Thordata/thordata-python-sdk
|
|
10
|
+
Project-URL: Tracker, https://github.com/Thordata/thordata-python-sdk/issues
|
|
11
|
+
Project-URL: Changelog, https://github.com/Thordata/thordata-python-sdk/blob/main/CHANGELOG.md
|
|
12
|
+
Keywords: web scraping,proxy,residential proxy,datacenter proxy,ai,llm,data-mining,serp,thordata,web scraper,anti-bot bypass
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
16
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
17
|
+
Classifier: Topic :: Internet :: Proxy Servers
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
23
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
24
|
+
Classifier: Operating System :: OS Independent
|
|
25
|
+
Classifier: Typing :: Typed
|
|
26
|
+
Requires-Python: >=3.9
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
License-File: LICENSE
|
|
29
|
+
Requires-Dist: requests>=2.25.0
|
|
30
|
+
Requires-Dist: aiohttp>=3.9.0
|
|
31
|
+
Requires-Dist: PySocks>=1.7.1
|
|
32
|
+
Provides-Extra: dev
|
|
33
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
34
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
35
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
36
|
+
Requires-Dist: pytest-httpserver>=1.0.0; extra == "dev"
|
|
37
|
+
Requires-Dist: python-dotenv>=1.0.0; extra == "dev"
|
|
38
|
+
Requires-Dist: black>=23.0.0; extra == "dev"
|
|
39
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
40
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
41
|
+
Requires-Dist: types-requests>=2.28.0; extra == "dev"
|
|
42
|
+
Requires-Dist: aioresponses>=0.7.6; extra == "dev"
|
|
43
|
+
Dynamic: license-file
|
|
44
|
+
|
|
45
|
+
# Thordata Python SDK
|
|
46
|
+
|
|
47
|
+
<div align="center">
|
|
48
|
+
|
|
49
|
+
<img src="https://img.shields.io/badge/Thordata-AI%20Infrastructure-blue?style=for-the-badge" alt="Thordata Logo">
|
|
50
|
+
|
|
51
|
+
**The Official Python Client for Thordata APIs**
|
|
52
|
+
|
|
53
|
+
*Proxy Network • SERP API • Web Unlocker • Web Scraper API*
|
|
54
|
+
|
|
55
|
+
[](https://pypi.org/project/thordata-sdk/)
|
|
56
|
+
[](https://pypi.org/project/thordata-sdk/)
|
|
57
|
+
[](LICENSE)
|
|
58
|
+
[](https://github.com/Thordata/thordata-python-sdk/actions)
|
|
59
|
+
|
|
60
|
+
</div>
|
|
61
|
+
|
|
62
|
+
---
|
|
63
|
+
|
|
64
|
+
## 📖 Introduction
|
|
65
|
+
|
|
66
|
+
This SDK provides a robust, high-performance interface to Thordata's AI data infrastructure. It is designed for high-concurrency scraping, reliable proxy tunneling, and seamless data extraction.
|
|
67
|
+
|
|
68
|
+
**Key Features:**
|
|
69
|
+
* **🚀 Production Ready:** Built on `urllib3` connection pooling for low-latency proxy requests.
|
|
70
|
+
* **⚡ Async Support:** Native `aiohttp` client for high-concurrency SERP/Universal scraping.
|
|
71
|
+
* **🛡️ Robust:** Handles TLS-in-TLS tunneling, retries, and error parsing automatically.
|
|
72
|
+
* **✨ Developer Experience:** Fully typed (`mypy` compatible) with intuitive IDE autocomplete.
|
|
73
|
+
* **🧩 Lazy Validation:** Only validate credentials for the features you actually use.
|
|
74
|
+
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
## 📦 Installation
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
pip install thordata-sdk
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
---
|
|
84
|
+
|
|
85
|
+
## 🔐 Configuration
|
|
86
|
+
|
|
87
|
+
Set environment variables to avoid hardcoding credentials. You only need to set the variables for the features you use.
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
# [Required for SERP & Web Unlocker]
|
|
91
|
+
export THORDATA_SCRAPER_TOKEN="your_token_here"
|
|
92
|
+
|
|
93
|
+
# [Required for Proxy Network]
|
|
94
|
+
export THORDATA_RESIDENTIAL_USERNAME="your_username"
|
|
95
|
+
export THORDATA_RESIDENTIAL_PASSWORD="your_password"
|
|
96
|
+
export THORDATA_PROXY_HOST="vpnXXXX.pr.thordata.net"
|
|
97
|
+
|
|
98
|
+
# [Required for Task Management]
|
|
99
|
+
export THORDATA_PUBLIC_TOKEN="public_token"
|
|
100
|
+
export THORDATA_PUBLIC_KEY="public_key"
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
---
|
|
104
|
+
|
|
105
|
+
## 🚀 Quick Start
|
|
106
|
+
|
|
107
|
+
### 1. SERP Search (Google/Bing/Yandex)
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
from thordata import ThordataClient, Engine
|
|
111
|
+
|
|
112
|
+
client = ThordataClient() # Loads THORDATA_SCRAPER_TOKEN from env
|
|
113
|
+
|
|
114
|
+
# Simple Search
|
|
115
|
+
print("Searching...")
|
|
116
|
+
results = client.serp_search("latest AI trends", engine=Engine.GOOGLE_NEWS)
|
|
117
|
+
|
|
118
|
+
for news in results.get("news_results", [])[:3]:
|
|
119
|
+
print(f"- {news['title']} ({news['source']})")
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### 2. Universal Scrape (Web Unlocker)
|
|
123
|
+
|
|
124
|
+
Bypass Cloudflare/Akamai and render JavaScript automatically.
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
html = client.universal_scrape(
|
|
128
|
+
url="https://example.com/protected-page",
|
|
129
|
+
js_render=True,
|
|
130
|
+
wait_for=".content-loaded",
|
|
131
|
+
country="us"
|
|
132
|
+
)
|
|
133
|
+
print(f"Scraped {len(html)} bytes")
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
### 3. High-Performance Proxy
|
|
137
|
+
|
|
138
|
+
Use Thordata's residential IPs with automatic connection pooling.
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
from thordata import ProxyConfig, ProxyProduct
|
|
142
|
+
|
|
143
|
+
# Config is optional if env vars are set, but allows granular control
|
|
144
|
+
proxy = ProxyConfig(
|
|
145
|
+
product=ProxyProduct.RESIDENTIAL,
|
|
146
|
+
country="jp",
|
|
147
|
+
city="tokyo",
|
|
148
|
+
session_id="session-001",
|
|
149
|
+
session_duration=10 # Sticky IP for 10 mins
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
# Use the client to make requests (Reuses TCP connections)
|
|
153
|
+
response = client.get("https://httpbin.org/ip", proxy_config=proxy)
|
|
154
|
+
print(response.json())
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
---
|
|
158
|
+
|
|
159
|
+
## ⚙️ Advanced Usage
|
|
160
|
+
|
|
161
|
+
### Async Client (High Concurrency)
|
|
162
|
+
|
|
163
|
+
For building AI agents or high-throughput spiders.
|
|
164
|
+
|
|
165
|
+
```python
|
|
166
|
+
import asyncio
|
|
167
|
+
from thordata import AsyncThordataClient
|
|
168
|
+
|
|
169
|
+
async def main():
|
|
170
|
+
async with AsyncThordataClient() as client:
|
|
171
|
+
# Fire off multiple requests in parallel
|
|
172
|
+
tasks = [
|
|
173
|
+
client.serp_search(f"query {i}")
|
|
174
|
+
for i in range(5)
|
|
175
|
+
]
|
|
176
|
+
results = await asyncio.gather(*tasks)
|
|
177
|
+
print(f"Completed {len(results)} searches")
|
|
178
|
+
|
|
179
|
+
asyncio.run(main())
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
### Web Scraper API (Task Management)
|
|
183
|
+
|
|
184
|
+
Create and manage large-scale scraping tasks asynchronously.
|
|
185
|
+
|
|
186
|
+
```python
|
|
187
|
+
# 1. Create a task
|
|
188
|
+
task_id = client.create_scraper_task(
|
|
189
|
+
file_name="daily_scrape",
|
|
190
|
+
spider_id="universal",
|
|
191
|
+
spider_name="universal",
|
|
192
|
+
parameters={"url": "https://example.com"}
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
# 2. Wait for completion (Polling)
|
|
196
|
+
status = client.wait_for_task(task_id)
|
|
197
|
+
|
|
198
|
+
# 3. Get results
|
|
199
|
+
if status == "ready":
|
|
200
|
+
url = client.get_task_result(task_id)
|
|
201
|
+
print(f"Download Data: {url}")
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
---
|
|
205
|
+
|
|
206
|
+
## 📄 License
|
|
207
|
+
|
|
208
|
+
MIT License. See [LICENSE](LICENSE) for details.
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
# Thordata Python SDK
|
|
2
|
+
|
|
3
|
+
<div align="center">
|
|
4
|
+
|
|
5
|
+
<img src="https://img.shields.io/badge/Thordata-AI%20Infrastructure-blue?style=for-the-badge" alt="Thordata Logo">
|
|
6
|
+
|
|
7
|
+
**The Official Python Client for Thordata APIs**
|
|
8
|
+
|
|
9
|
+
*Proxy Network • SERP API • Web Unlocker • Web Scraper API*
|
|
10
|
+
|
|
11
|
+
[](https://pypi.org/project/thordata-sdk/)
|
|
12
|
+
[](https://pypi.org/project/thordata-sdk/)
|
|
13
|
+
[](LICENSE)
|
|
14
|
+
[](https://github.com/Thordata/thordata-python-sdk/actions)
|
|
15
|
+
|
|
16
|
+
</div>
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## 📖 Introduction
|
|
21
|
+
|
|
22
|
+
This SDK provides a robust, high-performance interface to Thordata's AI data infrastructure. It is designed for high-concurrency scraping, reliable proxy tunneling, and seamless data extraction.
|
|
23
|
+
|
|
24
|
+
**Key Features:**
|
|
25
|
+
* **🚀 Production Ready:** Built on `urllib3` connection pooling for low-latency proxy requests.
|
|
26
|
+
* **⚡ Async Support:** Native `aiohttp` client for high-concurrency SERP/Universal scraping.
|
|
27
|
+
* **🛡️ Robust:** Handles TLS-in-TLS tunneling, retries, and error parsing automatically.
|
|
28
|
+
* **✨ Developer Experience:** Fully typed (`mypy` compatible) with intuitive IDE autocomplete.
|
|
29
|
+
* **🧩 Lazy Validation:** Only validate credentials for the features you actually use.
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## 📦 Installation
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
pip install thordata-sdk
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
## 🔐 Configuration
|
|
42
|
+
|
|
43
|
+
Set environment variables to avoid hardcoding credentials. You only need to set the variables for the features you use.
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
# [Required for SERP & Web Unlocker]
|
|
47
|
+
export THORDATA_SCRAPER_TOKEN="your_token_here"
|
|
48
|
+
|
|
49
|
+
# [Required for Proxy Network]
|
|
50
|
+
export THORDATA_RESIDENTIAL_USERNAME="your_username"
|
|
51
|
+
export THORDATA_RESIDENTIAL_PASSWORD="your_password"
|
|
52
|
+
export THORDATA_PROXY_HOST="vpnXXXX.pr.thordata.net"
|
|
53
|
+
|
|
54
|
+
# [Required for Task Management]
|
|
55
|
+
export THORDATA_PUBLIC_TOKEN="public_token"
|
|
56
|
+
export THORDATA_PUBLIC_KEY="public_key"
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
---
|
|
60
|
+
|
|
61
|
+
## 🚀 Quick Start
|
|
62
|
+
|
|
63
|
+
### 1. SERP Search (Google/Bing/Yandex)
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
from thordata import ThordataClient, Engine
|
|
67
|
+
|
|
68
|
+
client = ThordataClient() # Loads THORDATA_SCRAPER_TOKEN from env
|
|
69
|
+
|
|
70
|
+
# Simple Search
|
|
71
|
+
print("Searching...")
|
|
72
|
+
results = client.serp_search("latest AI trends", engine=Engine.GOOGLE_NEWS)
|
|
73
|
+
|
|
74
|
+
for news in results.get("news_results", [])[:3]:
|
|
75
|
+
print(f"- {news['title']} ({news['source']})")
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### 2. Universal Scrape (Web Unlocker)
|
|
79
|
+
|
|
80
|
+
Bypass Cloudflare/Akamai and render JavaScript automatically.
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
html = client.universal_scrape(
|
|
84
|
+
url="https://example.com/protected-page",
|
|
85
|
+
js_render=True,
|
|
86
|
+
wait_for=".content-loaded",
|
|
87
|
+
country="us"
|
|
88
|
+
)
|
|
89
|
+
print(f"Scraped {len(html)} bytes")
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### 3. High-Performance Proxy
|
|
93
|
+
|
|
94
|
+
Use Thordata's residential IPs with automatic connection pooling.
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
from thordata import ProxyConfig, ProxyProduct
|
|
98
|
+
|
|
99
|
+
# Config is optional if env vars are set, but allows granular control
|
|
100
|
+
proxy = ProxyConfig(
|
|
101
|
+
product=ProxyProduct.RESIDENTIAL,
|
|
102
|
+
country="jp",
|
|
103
|
+
city="tokyo",
|
|
104
|
+
session_id="session-001",
|
|
105
|
+
session_duration=10 # Sticky IP for 10 mins
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# Use the client to make requests (Reuses TCP connections)
|
|
109
|
+
response = client.get("https://httpbin.org/ip", proxy_config=proxy)
|
|
110
|
+
print(response.json())
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
## ⚙️ Advanced Usage
|
|
116
|
+
|
|
117
|
+
### Async Client (High Concurrency)
|
|
118
|
+
|
|
119
|
+
For building AI agents or high-throughput spiders.
|
|
120
|
+
|
|
121
|
+
```python
|
|
122
|
+
import asyncio
|
|
123
|
+
from thordata import AsyncThordataClient
|
|
124
|
+
|
|
125
|
+
async def main():
|
|
126
|
+
async with AsyncThordataClient() as client:
|
|
127
|
+
# Fire off multiple requests in parallel
|
|
128
|
+
tasks = [
|
|
129
|
+
client.serp_search(f"query {i}")
|
|
130
|
+
for i in range(5)
|
|
131
|
+
]
|
|
132
|
+
results = await asyncio.gather(*tasks)
|
|
133
|
+
print(f"Completed {len(results)} searches")
|
|
134
|
+
|
|
135
|
+
asyncio.run(main())
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### Web Scraper API (Task Management)
|
|
139
|
+
|
|
140
|
+
Create and manage large-scale scraping tasks asynchronously.
|
|
141
|
+
|
|
142
|
+
```python
|
|
143
|
+
# 1. Create a task
|
|
144
|
+
task_id = client.create_scraper_task(
|
|
145
|
+
file_name="daily_scrape",
|
|
146
|
+
spider_id="universal",
|
|
147
|
+
spider_name="universal",
|
|
148
|
+
parameters={"url": "https://example.com"}
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
# 2. Wait for completion (Polling)
|
|
152
|
+
status = client.wait_for_task(task_id)
|
|
153
|
+
|
|
154
|
+
# 3. Get results
|
|
155
|
+
if status == "ready":
|
|
156
|
+
url = client.get_task_result(task_id)
|
|
157
|
+
print(f"Download Data: {url}")
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
---
|
|
161
|
+
|
|
162
|
+
## 📄 License
|
|
163
|
+
|
|
164
|
+
MIT License. See [LICENSE](LICENSE) for details.
|
|
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
|
|
|
5
5
|
|
|
6
6
|
[project]
|
|
7
7
|
name = "thordata-sdk"
|
|
8
|
-
version = "1.
|
|
8
|
+
version = "1.2.0"
|
|
9
9
|
description = "The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network."
|
|
10
10
|
readme = "README.md"
|
|
11
11
|
requires-python = ">=3.9"
|
|
@@ -45,6 +45,7 @@ classifiers = [
|
|
|
45
45
|
dependencies = [
|
|
46
46
|
"requests>=2.25.0",
|
|
47
47
|
"aiohttp>=3.9.0",
|
|
48
|
+
"PySocks>=1.7.1",
|
|
48
49
|
]
|
|
49
50
|
|
|
50
51
|
[project.optional-dependencies]
|
|
@@ -83,6 +84,7 @@ include = '\.pyi?$'
|
|
|
83
84
|
[tool.ruff]
|
|
84
85
|
line-length = 88
|
|
85
86
|
target-version = "py39"
|
|
87
|
+
extend-exclude = ["sdk-spec"]
|
|
86
88
|
|
|
87
89
|
[tool.ruff.lint]
|
|
88
90
|
select = [
|
|
@@ -123,6 +125,7 @@ ignore_missing_imports = true
|
|
|
123
125
|
testpaths = ["tests"]
|
|
124
126
|
asyncio_mode = "auto"
|
|
125
127
|
addopts = "-v --cov=thordata --cov-report=term-missing"
|
|
128
|
+
markers = ["integration: live tests that require real credentials"]
|
|
126
129
|
|
|
127
130
|
# Coverage setup
|
|
128
131
|
[tool.coverage.run]
|
|
@@ -135,4 +138,4 @@ exclude_lines = [
|
|
|
135
138
|
"def __repr__",
|
|
136
139
|
"raise NotImplementedError",
|
|
137
140
|
"if TYPE_CHECKING:",
|
|
138
|
-
]
|
|
141
|
+
]
|
|
@@ -60,6 +60,7 @@ from .models import (
|
|
|
60
60
|
VideoTaskConfig,
|
|
61
61
|
)
|
|
62
62
|
from .retry import RetryConfig
|
|
63
|
+
from .serp_engines import AsyncSerpNamespace
|
|
63
64
|
|
|
64
65
|
logger = logging.getLogger(__name__)
|
|
65
66
|
|
|
@@ -85,7 +86,10 @@ class AsyncThordataClient:
|
|
|
85
86
|
... public_token="pub_token",
|
|
86
87
|
... public_key="pub_key"
|
|
87
88
|
... ) as client:
|
|
89
|
+
... # Old style
|
|
88
90
|
... results = await client.serp_search("python")
|
|
91
|
+
... # New style (Namespaced)
|
|
92
|
+
... maps_results = await client.serp.google.maps("coffee", "@40.7,-74.0,14z")
|
|
89
93
|
"""
|
|
90
94
|
|
|
91
95
|
# API Endpoints (same as sync client)
|
|
@@ -96,7 +100,7 @@ class AsyncThordataClient:
|
|
|
96
100
|
|
|
97
101
|
def __init__(
|
|
98
102
|
self,
|
|
99
|
-
scraper_token: str,
|
|
103
|
+
scraper_token: str | None = None, # Change: Optional
|
|
100
104
|
public_token: str | None = None,
|
|
101
105
|
public_key: str | None = None,
|
|
102
106
|
proxy_host: str = "pr.thordata.net",
|
|
@@ -111,8 +115,6 @@ class AsyncThordataClient:
|
|
|
111
115
|
locations_base_url: str | None = None,
|
|
112
116
|
) -> None:
|
|
113
117
|
"""Initialize the Async Thordata Client."""
|
|
114
|
-
if not scraper_token:
|
|
115
|
-
raise ThordataConfigError("scraper_token is required")
|
|
116
118
|
|
|
117
119
|
self.scraper_token = scraper_token
|
|
118
120
|
self.public_token = public_token
|
|
@@ -195,7 +197,7 @@ class AsyncThordataClient:
|
|
|
195
197
|
self._whitelist_url = f"{whitelist_base}/whitelisted-ips"
|
|
196
198
|
|
|
197
199
|
proxy_api_base = os.getenv(
|
|
198
|
-
"THORDATA_PROXY_API_BASE_URL", "https://
|
|
200
|
+
"THORDATA_PROXY_API_BASE_URL", "https://openapi.thordata.com/api"
|
|
199
201
|
)
|
|
200
202
|
self._proxy_list_url = f"{proxy_api_base}/proxy/proxy-list"
|
|
201
203
|
self._proxy_expiration_url = f"{proxy_api_base}/proxy/expiration-time"
|
|
@@ -203,6 +205,9 @@ class AsyncThordataClient:
|
|
|
203
205
|
# Session initialized lazily
|
|
204
206
|
self._session: aiohttp.ClientSession | None = None
|
|
205
207
|
|
|
208
|
+
# Namespaced Access (e.g. client.serp.google.maps(...))
|
|
209
|
+
self.serp = AsyncSerpNamespace(self)
|
|
210
|
+
|
|
206
211
|
async def __aenter__(self) -> AsyncThordataClient:
|
|
207
212
|
"""Async context manager entry."""
|
|
208
213
|
if self._session is None or self._session.closed:
|
|
@@ -386,6 +391,9 @@ class AsyncThordataClient:
|
|
|
386
391
|
Returns:
|
|
387
392
|
Parsed JSON results or dict with 'html' key.
|
|
388
393
|
"""
|
|
394
|
+
if not self.scraper_token:
|
|
395
|
+
raise ThordataConfigError("scraper_token is required for SERP API")
|
|
396
|
+
|
|
389
397
|
session = self._get_session()
|
|
390
398
|
|
|
391
399
|
engine_str = engine.value if isinstance(engine, Engine) else engine.lower()
|
|
@@ -405,7 +413,8 @@ class AsyncThordataClient:
|
|
|
405
413
|
)
|
|
406
414
|
|
|
407
415
|
payload = request.to_payload()
|
|
408
|
-
|
|
416
|
+
token = self.scraper_token or ""
|
|
417
|
+
headers = build_auth_headers(token, mode=self._auth_mode)
|
|
409
418
|
|
|
410
419
|
logger.info(f"Async SERP Search: {engine_str} - {query}")
|
|
411
420
|
|
|
@@ -451,6 +460,8 @@ class AsyncThordataClient:
|
|
|
451
460
|
Execute an async SERP search using a SerpRequest object.
|
|
452
461
|
"""
|
|
453
462
|
session = self._get_session()
|
|
463
|
+
if not self.scraper_token:
|
|
464
|
+
raise ThordataConfigError("scraper_token is required for SERP API")
|
|
454
465
|
|
|
455
466
|
payload = request.to_payload()
|
|
456
467
|
headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
|
|
@@ -545,6 +556,8 @@ class AsyncThordataClient:
|
|
|
545
556
|
Async scrape using a UniversalScrapeRequest object.
|
|
546
557
|
"""
|
|
547
558
|
session = self._get_session()
|
|
559
|
+
if not self.scraper_token:
|
|
560
|
+
raise ThordataConfigError("scraper_token is required for Universal API")
|
|
548
561
|
|
|
549
562
|
payload = request.to_payload()
|
|
550
563
|
headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
|
|
@@ -621,6 +634,8 @@ class AsyncThordataClient:
|
|
|
621
634
|
"""
|
|
622
635
|
self._require_public_credentials()
|
|
623
636
|
session = self._get_session()
|
|
637
|
+
if not self.scraper_token:
|
|
638
|
+
raise ThordataConfigError("scraper_token is required for Task Builder")
|
|
624
639
|
|
|
625
640
|
payload = config.to_payload()
|
|
626
641
|
# Builder needs 3 headers: token, key, Authorization Bearer
|
|
@@ -682,6 +697,10 @@ class AsyncThordataClient:
|
|
|
682
697
|
|
|
683
698
|
self._require_public_credentials()
|
|
684
699
|
session = self._get_session()
|
|
700
|
+
if not self.scraper_token:
|
|
701
|
+
raise ThordataConfigError(
|
|
702
|
+
"scraper_token is required for Video Task Builder"
|
|
703
|
+
)
|
|
685
704
|
|
|
686
705
|
payload = config.to_payload()
|
|
687
706
|
headers = build_builder_headers(
|
|
@@ -1527,8 +1546,8 @@ class AsyncThordataClient:
|
|
|
1527
1546
|
self._require_public_credentials()
|
|
1528
1547
|
|
|
1529
1548
|
params = {
|
|
1530
|
-
"token": self.public_token,
|
|
1531
|
-
"key": self.public_key,
|
|
1549
|
+
"token": self.public_token or "",
|
|
1550
|
+
"key": self.public_key or "",
|
|
1532
1551
|
}
|
|
1533
1552
|
|
|
1534
1553
|
for key, value in kwargs.items():
|