thordata-sdk 1.0.1__tar.gz → 1.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thordata_sdk-1.2.0/PKG-INFO +208 -0
- thordata_sdk-1.2.0/README.md +164 -0
- {thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/pyproject.toml +8 -3
- {thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/src/thordata/__init__.py +1 -1
- {thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/src/thordata/_example_utils.py +3 -2
- {thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/src/thordata/_utils.py +4 -4
- {thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/src/thordata/async_client.py +106 -86
- {thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/src/thordata/client.py +782 -118
- {thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/src/thordata/demo.py +1 -3
- {thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/src/thordata/exceptions.py +12 -12
- {thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/src/thordata/models.py +102 -89
- {thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/src/thordata/retry.py +13 -13
- thordata_sdk-1.2.0/src/thordata/serp_engines.py +166 -0
- thordata_sdk-1.2.0/src/thordata_sdk.egg-info/PKG-INFO +208 -0
- {thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/src/thordata_sdk.egg-info/SOURCES.txt +2 -0
- {thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/src/thordata_sdk.egg-info/requires.txt +1 -0
- {thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/tests/test_async_client.py +14 -2
- {thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/tests/test_async_client_errors.py +5 -9
- {thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/tests/test_client.py +9 -2
- {thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/tests/test_client_errors.py +24 -22
- {thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/tests/test_examples.py +0 -1
- thordata_sdk-1.2.0/tests/test_integration_proxy_protocols.py +113 -0
- {thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/tests/test_task_status_and_wait.py +1 -5
- {thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/tests/test_user_agent.py +0 -2
- thordata_sdk-1.0.1/PKG-INFO +0 -208
- thordata_sdk-1.0.1/README.md +0 -165
- thordata_sdk-1.0.1/src/thordata_sdk.egg-info/PKG-INFO +0 -208
- {thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/LICENSE +0 -0
- {thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/setup.cfg +0 -0
- {thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/src/thordata/enums.py +0 -0
- {thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/src/thordata_sdk.egg-info/dependency_links.txt +0 -0
- {thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/src/thordata_sdk.egg-info/top_level.txt +0 -0
- {thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/tests/test_enums.py +0 -0
- {thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/tests/test_exceptions.py +0 -0
- {thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/tests/test_models.py +0 -0
- {thordata_sdk-1.0.1 → thordata_sdk-1.2.0}/tests/test_spec_parity.py +0 -0
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: thordata-sdk
|
|
3
|
+
Version: 1.2.0
|
|
4
|
+
Summary: The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network.
|
|
5
|
+
Author-email: Thordata Developer Team <support@thordata.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://www.thordata.com
|
|
8
|
+
Project-URL: Documentation, https://github.com/Thordata/thordata-python-sdk#readme
|
|
9
|
+
Project-URL: Source, https://github.com/Thordata/thordata-python-sdk
|
|
10
|
+
Project-URL: Tracker, https://github.com/Thordata/thordata-python-sdk/issues
|
|
11
|
+
Project-URL: Changelog, https://github.com/Thordata/thordata-python-sdk/blob/main/CHANGELOG.md
|
|
12
|
+
Keywords: web scraping,proxy,residential proxy,datacenter proxy,ai,llm,data-mining,serp,thordata,web scraper,anti-bot bypass
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
16
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
17
|
+
Classifier: Topic :: Internet :: Proxy Servers
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
23
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
24
|
+
Classifier: Operating System :: OS Independent
|
|
25
|
+
Classifier: Typing :: Typed
|
|
26
|
+
Requires-Python: >=3.9
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
License-File: LICENSE
|
|
29
|
+
Requires-Dist: requests>=2.25.0
|
|
30
|
+
Requires-Dist: aiohttp>=3.9.0
|
|
31
|
+
Requires-Dist: PySocks>=1.7.1
|
|
32
|
+
Provides-Extra: dev
|
|
33
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
34
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
35
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
36
|
+
Requires-Dist: pytest-httpserver>=1.0.0; extra == "dev"
|
|
37
|
+
Requires-Dist: python-dotenv>=1.0.0; extra == "dev"
|
|
38
|
+
Requires-Dist: black>=23.0.0; extra == "dev"
|
|
39
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
40
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
41
|
+
Requires-Dist: types-requests>=2.28.0; extra == "dev"
|
|
42
|
+
Requires-Dist: aioresponses>=0.7.6; extra == "dev"
|
|
43
|
+
Dynamic: license-file
|
|
44
|
+
|
|
45
|
+
# Thordata Python SDK
|
|
46
|
+
|
|
47
|
+
<div align="center">
|
|
48
|
+
|
|
49
|
+
<img src="https://img.shields.io/badge/Thordata-AI%20Infrastructure-blue?style=for-the-badge" alt="Thordata Logo">
|
|
50
|
+
|
|
51
|
+
**The Official Python Client for Thordata APIs**
|
|
52
|
+
|
|
53
|
+
*Proxy Network • SERP API • Web Unlocker • Web Scraper API*
|
|
54
|
+
|
|
55
|
+
[](https://pypi.org/project/thordata-sdk/)
|
|
56
|
+
[](https://pypi.org/project/thordata-sdk/)
|
|
57
|
+
[](LICENSE)
|
|
58
|
+
[](https://github.com/Thordata/thordata-python-sdk/actions)
|
|
59
|
+
|
|
60
|
+
</div>
|
|
61
|
+
|
|
62
|
+
---
|
|
63
|
+
|
|
64
|
+
## 📖 Introduction
|
|
65
|
+
|
|
66
|
+
This SDK provides a robust, high-performance interface to Thordata's AI data infrastructure. It is designed for high-concurrency scraping, reliable proxy tunneling, and seamless data extraction.
|
|
67
|
+
|
|
68
|
+
**Key Features:**
|
|
69
|
+
* **🚀 Production Ready:** Built on `urllib3` connection pooling for low-latency proxy requests.
|
|
70
|
+
* **⚡ Async Support:** Native `aiohttp` client for high-concurrency SERP/Universal scraping.
|
|
71
|
+
* **🛡️ Robust:** Handles TLS-in-TLS tunneling, retries, and error parsing automatically.
|
|
72
|
+
* **✨ Developer Experience:** Fully typed (`mypy` compatible) with intuitive IDE autocomplete.
|
|
73
|
+
* **🧩 Lazy Validation:** Only validate credentials for the features you actually use.
|
|
74
|
+
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
## 📦 Installation
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
pip install thordata-sdk
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
---
|
|
84
|
+
|
|
85
|
+
## 🔐 Configuration
|
|
86
|
+
|
|
87
|
+
Set environment variables to avoid hardcoding credentials. You only need to set the variables for the features you use.
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
# [Required for SERP & Web Unlocker]
|
|
91
|
+
export THORDATA_SCRAPER_TOKEN="your_token_here"
|
|
92
|
+
|
|
93
|
+
# [Required for Proxy Network]
|
|
94
|
+
export THORDATA_RESIDENTIAL_USERNAME="your_username"
|
|
95
|
+
export THORDATA_RESIDENTIAL_PASSWORD="your_password"
|
|
96
|
+
export THORDATA_PROXY_HOST="vpnXXXX.pr.thordata.net"
|
|
97
|
+
|
|
98
|
+
# [Required for Task Management]
|
|
99
|
+
export THORDATA_PUBLIC_TOKEN="public_token"
|
|
100
|
+
export THORDATA_PUBLIC_KEY="public_key"
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
---
|
|
104
|
+
|
|
105
|
+
## 🚀 Quick Start
|
|
106
|
+
|
|
107
|
+
### 1. SERP Search (Google/Bing/Yandex)
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
from thordata import ThordataClient, Engine
|
|
111
|
+
|
|
112
|
+
client = ThordataClient() # Loads THORDATA_SCRAPER_TOKEN from env
|
|
113
|
+
|
|
114
|
+
# Simple Search
|
|
115
|
+
print("Searching...")
|
|
116
|
+
results = client.serp_search("latest AI trends", engine=Engine.GOOGLE_NEWS)
|
|
117
|
+
|
|
118
|
+
for news in results.get("news_results", [])[:3]:
|
|
119
|
+
print(f"- {news['title']} ({news['source']})")
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### 2. Universal Scrape (Web Unlocker)
|
|
123
|
+
|
|
124
|
+
Bypass Cloudflare/Akamai and render JavaScript automatically.
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
html = client.universal_scrape(
|
|
128
|
+
url="https://example.com/protected-page",
|
|
129
|
+
js_render=True,
|
|
130
|
+
wait_for=".content-loaded",
|
|
131
|
+
country="us"
|
|
132
|
+
)
|
|
133
|
+
print(f"Scraped {len(html)} bytes")
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
### 3. High-Performance Proxy
|
|
137
|
+
|
|
138
|
+
Use Thordata's residential IPs with automatic connection pooling.
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
from thordata import ProxyConfig, ProxyProduct
|
|
142
|
+
|
|
143
|
+
# Config is optional if env vars are set, but allows granular control
|
|
144
|
+
proxy = ProxyConfig(
|
|
145
|
+
product=ProxyProduct.RESIDENTIAL,
|
|
146
|
+
country="jp",
|
|
147
|
+
city="tokyo",
|
|
148
|
+
session_id="session-001",
|
|
149
|
+
session_duration=10 # Sticky IP for 10 mins
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
# Use the client to make requests (Reuses TCP connections)
|
|
153
|
+
response = client.get("https://httpbin.org/ip", proxy_config=proxy)
|
|
154
|
+
print(response.json())
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
---
|
|
158
|
+
|
|
159
|
+
## ⚙️ Advanced Usage
|
|
160
|
+
|
|
161
|
+
### Async Client (High Concurrency)
|
|
162
|
+
|
|
163
|
+
For building AI agents or high-throughput spiders.
|
|
164
|
+
|
|
165
|
+
```python
|
|
166
|
+
import asyncio
|
|
167
|
+
from thordata import AsyncThordataClient
|
|
168
|
+
|
|
169
|
+
async def main():
|
|
170
|
+
async with AsyncThordataClient() as client:
|
|
171
|
+
# Fire off multiple requests in parallel
|
|
172
|
+
tasks = [
|
|
173
|
+
client.serp_search(f"query {i}")
|
|
174
|
+
for i in range(5)
|
|
175
|
+
]
|
|
176
|
+
results = await asyncio.gather(*tasks)
|
|
177
|
+
print(f"Completed {len(results)} searches")
|
|
178
|
+
|
|
179
|
+
asyncio.run(main())
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
### Web Scraper API (Task Management)
|
|
183
|
+
|
|
184
|
+
Create and manage large-scale scraping tasks asynchronously.
|
|
185
|
+
|
|
186
|
+
```python
|
|
187
|
+
# 1. Create a task
|
|
188
|
+
task_id = client.create_scraper_task(
|
|
189
|
+
file_name="daily_scrape",
|
|
190
|
+
spider_id="universal",
|
|
191
|
+
spider_name="universal",
|
|
192
|
+
parameters={"url": "https://example.com"}
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
# 2. Wait for completion (Polling)
|
|
196
|
+
status = client.wait_for_task(task_id)
|
|
197
|
+
|
|
198
|
+
# 3. Get results
|
|
199
|
+
if status == "ready":
|
|
200
|
+
url = client.get_task_result(task_id)
|
|
201
|
+
print(f"Download Data: {url}")
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
---
|
|
205
|
+
|
|
206
|
+
## 📄 License
|
|
207
|
+
|
|
208
|
+
MIT License. See [LICENSE](LICENSE) for details.
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
# Thordata Python SDK
|
|
2
|
+
|
|
3
|
+
<div align="center">
|
|
4
|
+
|
|
5
|
+
<img src="https://img.shields.io/badge/Thordata-AI%20Infrastructure-blue?style=for-the-badge" alt="Thordata Logo">
|
|
6
|
+
|
|
7
|
+
**The Official Python Client for Thordata APIs**
|
|
8
|
+
|
|
9
|
+
*Proxy Network • SERP API • Web Unlocker • Web Scraper API*
|
|
10
|
+
|
|
11
|
+
[](https://pypi.org/project/thordata-sdk/)
|
|
12
|
+
[](https://pypi.org/project/thordata-sdk/)
|
|
13
|
+
[](LICENSE)
|
|
14
|
+
[](https://github.com/Thordata/thordata-python-sdk/actions)
|
|
15
|
+
|
|
16
|
+
</div>
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## 📖 Introduction
|
|
21
|
+
|
|
22
|
+
This SDK provides a robust, high-performance interface to Thordata's AI data infrastructure. It is designed for high-concurrency scraping, reliable proxy tunneling, and seamless data extraction.
|
|
23
|
+
|
|
24
|
+
**Key Features:**
|
|
25
|
+
* **🚀 Production Ready:** Built on `urllib3` connection pooling for low-latency proxy requests.
|
|
26
|
+
* **⚡ Async Support:** Native `aiohttp` client for high-concurrency SERP/Universal scraping.
|
|
27
|
+
* **🛡️ Robust:** Handles TLS-in-TLS tunneling, retries, and error parsing automatically.
|
|
28
|
+
* **✨ Developer Experience:** Fully typed (`mypy` compatible) with intuitive IDE autocomplete.
|
|
29
|
+
* **🧩 Lazy Validation:** Only validate credentials for the features you actually use.
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## 📦 Installation
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
pip install thordata-sdk
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
## 🔐 Configuration
|
|
42
|
+
|
|
43
|
+
Set environment variables to avoid hardcoding credentials. You only need to set the variables for the features you use.
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
# [Required for SERP & Web Unlocker]
|
|
47
|
+
export THORDATA_SCRAPER_TOKEN="your_token_here"
|
|
48
|
+
|
|
49
|
+
# [Required for Proxy Network]
|
|
50
|
+
export THORDATA_RESIDENTIAL_USERNAME="your_username"
|
|
51
|
+
export THORDATA_RESIDENTIAL_PASSWORD="your_password"
|
|
52
|
+
export THORDATA_PROXY_HOST="vpnXXXX.pr.thordata.net"
|
|
53
|
+
|
|
54
|
+
# [Required for Task Management]
|
|
55
|
+
export THORDATA_PUBLIC_TOKEN="public_token"
|
|
56
|
+
export THORDATA_PUBLIC_KEY="public_key"
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
---
|
|
60
|
+
|
|
61
|
+
## 🚀 Quick Start
|
|
62
|
+
|
|
63
|
+
### 1. SERP Search (Google/Bing/Yandex)
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
from thordata import ThordataClient, Engine
|
|
67
|
+
|
|
68
|
+
client = ThordataClient() # Loads THORDATA_SCRAPER_TOKEN from env
|
|
69
|
+
|
|
70
|
+
# Simple Search
|
|
71
|
+
print("Searching...")
|
|
72
|
+
results = client.serp_search("latest AI trends", engine=Engine.GOOGLE_NEWS)
|
|
73
|
+
|
|
74
|
+
for news in results.get("news_results", [])[:3]:
|
|
75
|
+
print(f"- {news['title']} ({news['source']})")
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### 2. Universal Scrape (Web Unlocker)
|
|
79
|
+
|
|
80
|
+
Bypass Cloudflare/Akamai and render JavaScript automatically.
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
html = client.universal_scrape(
|
|
84
|
+
url="https://example.com/protected-page",
|
|
85
|
+
js_render=True,
|
|
86
|
+
wait_for=".content-loaded",
|
|
87
|
+
country="us"
|
|
88
|
+
)
|
|
89
|
+
print(f"Scraped {len(html)} bytes")
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### 3. High-Performance Proxy
|
|
93
|
+
|
|
94
|
+
Use Thordata's residential IPs with automatic connection pooling.
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
from thordata import ProxyConfig, ProxyProduct
|
|
98
|
+
|
|
99
|
+
# Config is optional if env vars are set, but allows granular control
|
|
100
|
+
proxy = ProxyConfig(
|
|
101
|
+
product=ProxyProduct.RESIDENTIAL,
|
|
102
|
+
country="jp",
|
|
103
|
+
city="tokyo",
|
|
104
|
+
session_id="session-001",
|
|
105
|
+
session_duration=10 # Sticky IP for 10 mins
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# Use the client to make requests (Reuses TCP connections)
|
|
109
|
+
response = client.get("https://httpbin.org/ip", proxy_config=proxy)
|
|
110
|
+
print(response.json())
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
## ⚙️ Advanced Usage
|
|
116
|
+
|
|
117
|
+
### Async Client (High Concurrency)
|
|
118
|
+
|
|
119
|
+
For building AI agents or high-throughput spiders.
|
|
120
|
+
|
|
121
|
+
```python
|
|
122
|
+
import asyncio
|
|
123
|
+
from thordata import AsyncThordataClient
|
|
124
|
+
|
|
125
|
+
async def main():
|
|
126
|
+
async with AsyncThordataClient() as client:
|
|
127
|
+
# Fire off multiple requests in parallel
|
|
128
|
+
tasks = [
|
|
129
|
+
client.serp_search(f"query {i}")
|
|
130
|
+
for i in range(5)
|
|
131
|
+
]
|
|
132
|
+
results = await asyncio.gather(*tasks)
|
|
133
|
+
print(f"Completed {len(results)} searches")
|
|
134
|
+
|
|
135
|
+
asyncio.run(main())
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### Web Scraper API (Task Management)
|
|
139
|
+
|
|
140
|
+
Create and manage large-scale scraping tasks asynchronously.
|
|
141
|
+
|
|
142
|
+
```python
|
|
143
|
+
# 1. Create a task
|
|
144
|
+
task_id = client.create_scraper_task(
|
|
145
|
+
file_name="daily_scrape",
|
|
146
|
+
spider_id="universal",
|
|
147
|
+
spider_name="universal",
|
|
148
|
+
parameters={"url": "https://example.com"}
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
# 2. Wait for completion (Polling)
|
|
152
|
+
status = client.wait_for_task(task_id)
|
|
153
|
+
|
|
154
|
+
# 3. Get results
|
|
155
|
+
if status == "ready":
|
|
156
|
+
url = client.get_task_result(task_id)
|
|
157
|
+
print(f"Download Data: {url}")
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
---
|
|
161
|
+
|
|
162
|
+
## 📄 License
|
|
163
|
+
|
|
164
|
+
MIT License. See [LICENSE](LICENSE) for details.
|
|
@@ -1,10 +1,11 @@
|
|
|
1
|
+
# thordata-python-sdk/pyproject.toml
|
|
1
2
|
[build-system]
|
|
2
3
|
requires = ["setuptools>=61.0", "wheel"]
|
|
3
4
|
build-backend = "setuptools.build_meta"
|
|
4
5
|
|
|
5
6
|
[project]
|
|
6
7
|
name = "thordata-sdk"
|
|
7
|
-
version = "1.0
|
|
8
|
+
version = "1.2.0"
|
|
8
9
|
description = "The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network."
|
|
9
10
|
readme = "README.md"
|
|
10
11
|
requires-python = ">=3.9"
|
|
@@ -44,6 +45,7 @@ classifiers = [
|
|
|
44
45
|
dependencies = [
|
|
45
46
|
"requests>=2.25.0",
|
|
46
47
|
"aiohttp>=3.9.0",
|
|
48
|
+
"PySocks>=1.7.1",
|
|
47
49
|
]
|
|
48
50
|
|
|
49
51
|
[project.optional-dependencies]
|
|
@@ -82,6 +84,7 @@ include = '\.pyi?$'
|
|
|
82
84
|
[tool.ruff]
|
|
83
85
|
line-length = 88
|
|
84
86
|
target-version = "py39"
|
|
87
|
+
extend-exclude = ["sdk-spec"]
|
|
85
88
|
|
|
86
89
|
[tool.ruff.lint]
|
|
87
90
|
select = [
|
|
@@ -90,11 +93,12 @@ select = [
|
|
|
90
93
|
"F", # pyflakes
|
|
91
94
|
"I", # isort (import sorting)
|
|
92
95
|
"B", # flake8-bugbear
|
|
96
|
+
"UP", # pyupgrade
|
|
97
|
+
"SIM", # flake8-simplify
|
|
93
98
|
]
|
|
94
99
|
ignore = [
|
|
95
100
|
"E501", # line too long (handled by black)
|
|
96
101
|
"E731", # do not assign a lambda expression
|
|
97
|
-
"F401", # imported but unused (we have some intentional re-exports)
|
|
98
102
|
]
|
|
99
103
|
|
|
100
104
|
[tool.ruff.lint.isort]
|
|
@@ -121,6 +125,7 @@ ignore_missing_imports = true
|
|
|
121
125
|
testpaths = ["tests"]
|
|
122
126
|
asyncio_mode = "auto"
|
|
123
127
|
addopts = "-v --cov=thordata --cov-report=term-missing"
|
|
128
|
+
markers = ["integration: live tests that require real credentials"]
|
|
124
129
|
|
|
125
130
|
# Coverage setup
|
|
126
131
|
[tool.coverage.run]
|
|
@@ -133,4 +138,4 @@ exclude_lines = [
|
|
|
133
138
|
"def __repr__",
|
|
134
139
|
"raise NotImplementedError",
|
|
135
140
|
"if TYPE_CHECKING:",
|
|
136
|
-
]
|
|
141
|
+
]
|
|
@@ -2,8 +2,9 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
4
|
import os
|
|
5
|
+
from collections.abc import Iterable
|
|
5
6
|
from pathlib import Path
|
|
6
|
-
from typing import Any
|
|
7
|
+
from typing import Any
|
|
7
8
|
|
|
8
9
|
try:
|
|
9
10
|
from dotenv import load_dotenv
|
|
@@ -23,7 +24,7 @@ def env(name: str) -> str:
|
|
|
23
24
|
return (os.getenv(name) or "").strip()
|
|
24
25
|
|
|
25
26
|
|
|
26
|
-
def skip_if_missing(required: Iterable[str], *, tip:
|
|
27
|
+
def skip_if_missing(required: Iterable[str], *, tip: str | None = None) -> bool:
|
|
27
28
|
missing = [k for k in required if not env(k)]
|
|
28
29
|
if not missing:
|
|
29
30
|
return False
|
|
@@ -10,7 +10,7 @@ import base64
|
|
|
10
10
|
import json
|
|
11
11
|
import logging
|
|
12
12
|
import platform
|
|
13
|
-
from typing import Any
|
|
13
|
+
from typing import Any
|
|
14
14
|
|
|
15
15
|
logger = logging.getLogger(__name__)
|
|
16
16
|
|
|
@@ -71,7 +71,7 @@ def decode_base64_image(png_str: str) -> bytes:
|
|
|
71
71
|
raise ValueError(f"Failed to decode base64 image: {e}") from e
|
|
72
72
|
|
|
73
73
|
|
|
74
|
-
def build_auth_headers(token: str, mode: str = "bearer") ->
|
|
74
|
+
def build_auth_headers(token: str, mode: str = "bearer") -> dict[str, str]:
|
|
75
75
|
"""
|
|
76
76
|
Build authorization headers for API requests.
|
|
77
77
|
|
|
@@ -105,7 +105,7 @@ def build_builder_headers(
|
|
|
105
105
|
scraper_token: str,
|
|
106
106
|
public_token: str,
|
|
107
107
|
public_key: str,
|
|
108
|
-
) ->
|
|
108
|
+
) -> dict[str, str]:
|
|
109
109
|
"""
|
|
110
110
|
Build headers for Web Scraper builder API.
|
|
111
111
|
|
|
@@ -130,7 +130,7 @@ def build_builder_headers(
|
|
|
130
130
|
}
|
|
131
131
|
|
|
132
132
|
|
|
133
|
-
def build_public_api_headers(public_token: str, public_key: str) ->
|
|
133
|
+
def build_public_api_headers(public_token: str, public_key: str) -> dict[str, str]:
|
|
134
134
|
"""
|
|
135
135
|
Build headers for public API requests (task status, locations, etc.)
|
|
136
136
|
|