thordata-sdk 0.8.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thordata/__init__.py +1 -1
- thordata/_example_utils.py +76 -0
- thordata/_utils.py +14 -31
- thordata/async_client.py +153 -92
- thordata/client.py +278 -1156
- thordata/models.py +57 -7
- thordata/retry.py +2 -3
- {thordata_sdk-0.8.0.dist-info → thordata_sdk-1.0.1.dist-info}/METADATA +2 -6
- thordata_sdk-1.0.1.dist-info/RECORD +15 -0
- thordata_sdk-0.8.0.dist-info/RECORD +0 -14
- {thordata_sdk-0.8.0.dist-info → thordata_sdk-1.0.1.dist-info}/WHEEL +0 -0
- {thordata_sdk-0.8.0.dist-info → thordata_sdk-1.0.1.dist-info}/licenses/LICENSE +0 -0
- {thordata_sdk-0.8.0.dist-info → thordata_sdk-1.0.1.dist-info}/top_level.txt +0 -0
thordata/client.py
CHANGED
|
@@ -25,17 +25,19 @@ from __future__ import annotations
|
|
|
25
25
|
|
|
26
26
|
import logging
|
|
27
27
|
import os
|
|
28
|
-
|
|
28
|
+
import ssl
|
|
29
|
+
from datetime import date
|
|
29
30
|
from typing import Any, Dict, List, Optional, Union
|
|
31
|
+
from urllib.parse import urlencode
|
|
30
32
|
|
|
31
33
|
import requests
|
|
34
|
+
import urllib3
|
|
32
35
|
|
|
33
36
|
from . import __version__ as _sdk_version
|
|
34
37
|
from ._utils import (
|
|
35
38
|
build_auth_headers,
|
|
36
39
|
build_builder_headers,
|
|
37
40
|
build_public_api_headers,
|
|
38
|
-
build_sign_headers,
|
|
39
41
|
build_user_agent,
|
|
40
42
|
decode_base64_image,
|
|
41
43
|
extract_error_message,
|
|
@@ -53,13 +55,13 @@ from .models import (
|
|
|
53
55
|
ProxyConfig,
|
|
54
56
|
ProxyProduct,
|
|
55
57
|
ProxyServer,
|
|
56
|
-
ProxyUser,
|
|
57
58
|
ProxyUserList,
|
|
58
59
|
ScraperTaskConfig,
|
|
59
60
|
SerpRequest,
|
|
60
61
|
UniversalScrapeRequest,
|
|
61
62
|
UsageStatistics,
|
|
62
63
|
VideoTaskConfig,
|
|
64
|
+
WhitelistProxyConfig,
|
|
63
65
|
)
|
|
64
66
|
from .retry import RetryConfig, with_retry
|
|
65
67
|
|
|
@@ -67,32 +69,6 @@ logger = logging.getLogger(__name__)
|
|
|
67
69
|
|
|
68
70
|
|
|
69
71
|
class ThordataClient:
|
|
70
|
-
"""
|
|
71
|
-
The official synchronous Python client for Thordata.
|
|
72
|
-
|
|
73
|
-
This client handles authentication and communication with:
|
|
74
|
-
- Proxy Network (Residential/Datacenter/Mobile/ISP via HTTP/HTTPS)
|
|
75
|
-
- SERP API (Real-time Search Engine Results)
|
|
76
|
-
- Universal Scraping API (Web Unlocker - Single Page Rendering)
|
|
77
|
-
- Web Scraper API (Async Task Management)
|
|
78
|
-
|
|
79
|
-
Args:
|
|
80
|
-
scraper_token: The API token from your Dashboard.
|
|
81
|
-
public_token: The public API token (for task status, locations).
|
|
82
|
-
public_key: The public API key.
|
|
83
|
-
proxy_host: Custom proxy gateway host (optional).
|
|
84
|
-
proxy_port: Custom proxy gateway port (optional).
|
|
85
|
-
timeout: Default request timeout in seconds (default: 30).
|
|
86
|
-
retry_config: Configuration for automatic retries (optional).
|
|
87
|
-
|
|
88
|
-
Example:
|
|
89
|
-
>>> client = ThordataClient(
|
|
90
|
-
... scraper_token="your_scraper_token",
|
|
91
|
-
... public_token="your_public_token",
|
|
92
|
-
... public_key="your_public_key"
|
|
93
|
-
... )
|
|
94
|
-
"""
|
|
95
|
-
|
|
96
72
|
# API Endpoints
|
|
97
73
|
BASE_URL = "https://scraperapi.thordata.com"
|
|
98
74
|
UNIVERSAL_URL = "https://universalapi.thordata.com"
|
|
@@ -104,8 +80,6 @@ class ThordataClient:
|
|
|
104
80
|
scraper_token: str,
|
|
105
81
|
public_token: Optional[str] = None,
|
|
106
82
|
public_key: Optional[str] = None,
|
|
107
|
-
sign: Optional[str] = None,
|
|
108
|
-
api_key: Optional[str] = None,
|
|
109
83
|
proxy_host: str = "pr.thordata.net",
|
|
110
84
|
proxy_port: int = 9999,
|
|
111
85
|
timeout: int = 30,
|
|
@@ -121,22 +95,14 @@ class ThordataClient:
|
|
|
121
95
|
if not scraper_token:
|
|
122
96
|
raise ThordataConfigError("scraper_token is required")
|
|
123
97
|
|
|
98
|
+
# Core credentials
|
|
124
99
|
self.scraper_token = scraper_token
|
|
125
100
|
self.public_token = public_token
|
|
126
101
|
self.public_key = public_key
|
|
127
102
|
|
|
128
|
-
# Automatic Fallback Logic: If sign/api_key is not provided, try using public_token/key
|
|
129
|
-
self.sign = sign or os.getenv("THORDATA_SIGN") or self.public_token
|
|
130
|
-
self.api_key = api_key or os.getenv("THORDATA_API_KEY") or self.public_key
|
|
131
|
-
|
|
132
|
-
# Public API authentication
|
|
133
|
-
self.sign = sign or os.getenv("THORDATA_SIGN")
|
|
134
|
-
self.api_key = api_key or os.getenv("THORDATA_API_KEY")
|
|
135
|
-
|
|
136
103
|
# Proxy configuration
|
|
137
104
|
self._proxy_host = proxy_host
|
|
138
105
|
self._proxy_port = proxy_port
|
|
139
|
-
self._default_timeout = timeout
|
|
140
106
|
|
|
141
107
|
# Timeout configuration
|
|
142
108
|
self._default_timeout = timeout
|
|
@@ -145,39 +111,28 @@ class ThordataClient:
|
|
|
145
111
|
# Retry configuration
|
|
146
112
|
self._retry_config = retry_config or RetryConfig()
|
|
147
113
|
|
|
148
|
-
# Authentication mode
|
|
114
|
+
# Authentication mode (for scraping APIs)
|
|
149
115
|
self._auth_mode = auth_mode.lower()
|
|
150
116
|
if self._auth_mode not in ("bearer", "header_token"):
|
|
151
117
|
raise ThordataConfigError(
|
|
152
118
|
f"Invalid auth_mode: {auth_mode}. Must be 'bearer' or 'header_token'."
|
|
153
119
|
)
|
|
154
120
|
|
|
155
|
-
#
|
|
156
|
-
self._default_proxy_url = (
|
|
157
|
-
f"http://td-customer-{self.scraper_token}:@{proxy_host}:{proxy_port}"
|
|
158
|
-
)
|
|
159
|
-
|
|
160
|
-
# Sessions:
|
|
161
|
-
# - _proxy_session: used for proxy network traffic to target sites
|
|
162
|
-
# - _api_session: used for Thordata APIs (SERP/Universal/Tasks/Locations)
|
|
163
|
-
#
|
|
164
|
-
# We intentionally do NOT set session-level proxies for _api_session,
|
|
165
|
-
# so developers can rely on system proxy settings (e.g., Clash) via env vars.
|
|
121
|
+
# HTTP Sessions
|
|
166
122
|
self._proxy_session = requests.Session()
|
|
167
123
|
self._proxy_session.trust_env = False
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
}
|
|
124
|
+
|
|
125
|
+
# Cache for ProxyManagers (Connection Pooling Fix)
|
|
126
|
+
# Key: proxy_url (str), Value: urllib3.ProxyManager
|
|
127
|
+
self._proxy_managers: Dict[str, urllib3.ProxyManager] = {}
|
|
172
128
|
|
|
173
129
|
self._api_session = requests.Session()
|
|
174
130
|
self._api_session.trust_env = True
|
|
175
|
-
|
|
176
131
|
self._api_session.headers.update(
|
|
177
132
|
{"User-Agent": build_user_agent(_sdk_version, "requests")}
|
|
178
133
|
)
|
|
179
134
|
|
|
180
|
-
# Base URLs
|
|
135
|
+
# Base URLs
|
|
181
136
|
scraperapi_base = (
|
|
182
137
|
scraperapi_base_url
|
|
183
138
|
or os.getenv("THORDATA_SCRAPERAPI_BASE_URL")
|
|
@@ -205,39 +160,42 @@ class ThordataClient:
|
|
|
205
160
|
gateway_base = os.getenv(
|
|
206
161
|
"THORDATA_GATEWAY_BASE_URL", "https://api.thordata.com/api/gateway"
|
|
207
162
|
)
|
|
208
|
-
|
|
163
|
+
self._gateway_base_url = gateway_base
|
|
164
|
+
self._child_base_url = os.getenv(
|
|
209
165
|
"THORDATA_CHILD_BASE_URL", "https://api.thordata.com/api/child"
|
|
210
166
|
)
|
|
211
167
|
|
|
212
|
-
self._gateway_base_url = gateway_base
|
|
213
|
-
self._child_base_url = child_base
|
|
214
|
-
|
|
215
168
|
self._serp_url = f"{scraperapi_base}/request"
|
|
216
169
|
self._builder_url = f"{scraperapi_base}/builder"
|
|
217
170
|
self._video_builder_url = f"{scraperapi_base}/video_builder"
|
|
218
171
|
self._universal_url = f"{universalapi_base}/request"
|
|
172
|
+
|
|
219
173
|
self._status_url = f"{web_scraper_api_base}/tasks-status"
|
|
220
174
|
self._download_url = f"{web_scraper_api_base}/tasks-download"
|
|
175
|
+
self._list_url = f"{web_scraper_api_base}/tasks-list"
|
|
176
|
+
|
|
221
177
|
self._locations_base_url = locations_base
|
|
178
|
+
|
|
222
179
|
self._usage_stats_url = (
|
|
223
180
|
f"{locations_base.replace('/locations', '')}/account/usage-statistics"
|
|
224
181
|
)
|
|
225
182
|
self._proxy_users_url = (
|
|
226
183
|
f"{locations_base.replace('/locations', '')}/proxy-users"
|
|
227
184
|
)
|
|
185
|
+
|
|
228
186
|
whitelist_base = os.getenv(
|
|
229
187
|
"THORDATA_WHITELIST_BASE_URL", "https://api.thordata.com/api"
|
|
230
188
|
)
|
|
231
189
|
self._whitelist_url = f"{whitelist_base}/whitelisted-ips"
|
|
190
|
+
|
|
232
191
|
proxy_api_base = os.getenv(
|
|
233
192
|
"THORDATA_PROXY_API_BASE_URL", "https://api.thordata.com/api"
|
|
234
193
|
)
|
|
235
194
|
self._proxy_list_url = f"{proxy_api_base}/proxy/proxy-list"
|
|
236
195
|
self._proxy_expiration_url = f"{proxy_api_base}/proxy/expiration-time"
|
|
237
|
-
self._list_url = f"{web_scraper_api_base}/tasks-list"
|
|
238
196
|
|
|
239
197
|
# =========================================================================
|
|
240
|
-
# Proxy Network Methods
|
|
198
|
+
# Proxy Network Methods
|
|
241
199
|
# =========================================================================
|
|
242
200
|
def get(
|
|
243
201
|
self,
|
|
@@ -247,41 +205,8 @@ class ThordataClient:
|
|
|
247
205
|
timeout: Optional[int] = None,
|
|
248
206
|
**kwargs: Any,
|
|
249
207
|
) -> requests.Response:
|
|
250
|
-
"""
|
|
251
|
-
Send a GET request through the Thordata Proxy Network.
|
|
252
|
-
|
|
253
|
-
Args:
|
|
254
|
-
url: The target URL.
|
|
255
|
-
proxy_config: Custom proxy configuration for geo-targeting/sessions.
|
|
256
|
-
timeout: Request timeout in seconds.
|
|
257
|
-
**kwargs: Additional arguments to pass to requests.get().
|
|
258
|
-
|
|
259
|
-
Returns:
|
|
260
|
-
The response object.
|
|
261
|
-
|
|
262
|
-
Example:
|
|
263
|
-
>>> # Basic request
|
|
264
|
-
>>> response = client.get("https://httpbin.org/ip")
|
|
265
|
-
>>>
|
|
266
|
-
>>> # With geo-targeting
|
|
267
|
-
>>> from thordata.models import ProxyConfig
|
|
268
|
-
>>> config = ProxyConfig(
|
|
269
|
-
... username="myuser",
|
|
270
|
-
... password="mypass",
|
|
271
|
-
... country="us",
|
|
272
|
-
... city="seattle"
|
|
273
|
-
... )
|
|
274
|
-
>>> response = client.get("https://httpbin.org/ip", proxy_config=config)
|
|
275
|
-
"""
|
|
276
208
|
logger.debug(f"Proxy GET request: {url}")
|
|
277
|
-
|
|
278
|
-
timeout = timeout or self._default_timeout
|
|
279
|
-
|
|
280
|
-
if proxy_config:
|
|
281
|
-
proxies = proxy_config.to_proxies_dict()
|
|
282
|
-
kwargs["proxies"] = proxies
|
|
283
|
-
|
|
284
|
-
return self._request_with_retry("GET", url, timeout=timeout, **kwargs)
|
|
209
|
+
return self._proxy_verb("GET", url, proxy_config, timeout, **kwargs)
|
|
285
210
|
|
|
286
211
|
def post(
|
|
287
212
|
self,
|
|
@@ -291,32 +216,58 @@ class ThordataClient:
|
|
|
291
216
|
timeout: Optional[int] = None,
|
|
292
217
|
**kwargs: Any,
|
|
293
218
|
) -> requests.Response:
|
|
294
|
-
"""
|
|
295
|
-
Send a POST request through the Thordata Proxy Network.
|
|
296
|
-
|
|
297
|
-
Args:
|
|
298
|
-
url: The target URL.
|
|
299
|
-
proxy_config: Custom proxy configuration.
|
|
300
|
-
timeout: Request timeout in seconds.
|
|
301
|
-
**kwargs: Additional arguments to pass to requests.post().
|
|
302
|
-
|
|
303
|
-
Returns:
|
|
304
|
-
The response object.
|
|
305
|
-
"""
|
|
306
219
|
logger.debug(f"Proxy POST request: {url}")
|
|
220
|
+
return self._proxy_verb("POST", url, proxy_config, timeout, **kwargs)
|
|
307
221
|
|
|
222
|
+
def _proxy_verb(
|
|
223
|
+
self,
|
|
224
|
+
method: str,
|
|
225
|
+
url: str,
|
|
226
|
+
proxy_config: Optional[ProxyConfig],
|
|
227
|
+
timeout: Optional[int],
|
|
228
|
+
**kwargs: Any,
|
|
229
|
+
) -> requests.Response:
|
|
308
230
|
timeout = timeout or self._default_timeout
|
|
309
231
|
|
|
310
|
-
if proxy_config:
|
|
311
|
-
|
|
312
|
-
|
|
232
|
+
if proxy_config is None:
|
|
233
|
+
proxy_config = self._get_default_proxy_config_from_env()
|
|
234
|
+
|
|
235
|
+
if proxy_config is None:
|
|
236
|
+
raise ThordataConfigError(
|
|
237
|
+
"Proxy credentials are missing. "
|
|
238
|
+
"Pass proxy_config or set THORDATA_RESIDENTIAL_USERNAME/PASSWORD env vars."
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
# For requests/urllib3, we don't need 'proxies' dict in kwargs
|
|
242
|
+
# because we use ProxyManager directly.
|
|
243
|
+
# But we remove it if user accidentally passed it to avoid confusion.
|
|
244
|
+
kwargs.pop("proxies", None)
|
|
245
|
+
|
|
246
|
+
@with_retry(self._retry_config)
|
|
247
|
+
def _do() -> requests.Response:
|
|
248
|
+
return self._proxy_request_with_proxy_manager(
|
|
249
|
+
method,
|
|
250
|
+
url,
|
|
251
|
+
proxy_config=proxy_config, # type: ignore
|
|
252
|
+
timeout=timeout, # type: ignore
|
|
253
|
+
headers=kwargs.pop("headers", None),
|
|
254
|
+
params=kwargs.pop("params", None),
|
|
255
|
+
data=kwargs.pop("data", None),
|
|
256
|
+
)
|
|
313
257
|
|
|
314
|
-
|
|
258
|
+
try:
|
|
259
|
+
return _do()
|
|
260
|
+
except requests.Timeout as e:
|
|
261
|
+
raise ThordataTimeoutError(
|
|
262
|
+
f"Request timed out: {e}", original_error=e
|
|
263
|
+
) from e
|
|
264
|
+
except Exception as e:
|
|
265
|
+
raise ThordataNetworkError(f"Request failed: {e}", original_error=e) from e
|
|
315
266
|
|
|
316
267
|
def build_proxy_url(
|
|
317
268
|
self,
|
|
318
|
-
username: str,
|
|
319
|
-
password: str,
|
|
269
|
+
username: str,
|
|
270
|
+
password: str,
|
|
320
271
|
*,
|
|
321
272
|
country: Optional[str] = None,
|
|
322
273
|
state: Optional[str] = None,
|
|
@@ -325,28 +276,6 @@ class ThordataClient:
|
|
|
325
276
|
session_duration: Optional[int] = None,
|
|
326
277
|
product: Union[ProxyProduct, str] = ProxyProduct.RESIDENTIAL,
|
|
327
278
|
) -> str:
|
|
328
|
-
"""
|
|
329
|
-
Build a proxy URL with custom targeting options.
|
|
330
|
-
|
|
331
|
-
This is a convenience method for creating proxy URLs without
|
|
332
|
-
manually constructing a ProxyConfig.
|
|
333
|
-
|
|
334
|
-
Args:
|
|
335
|
-
country: Target country code (e.g., 'us', 'gb').
|
|
336
|
-
state: Target state (e.g., 'california').
|
|
337
|
-
city: Target city (e.g., 'seattle').
|
|
338
|
-
session_id: Session ID for sticky sessions.
|
|
339
|
-
session_duration: Session duration in minutes (1-90).
|
|
340
|
-
product: Proxy product type.
|
|
341
|
-
|
|
342
|
-
Returns:
|
|
343
|
-
The proxy URL string.
|
|
344
|
-
|
|
345
|
-
Example:
|
|
346
|
-
>>> url = client.build_proxy_url(country="us", city="seattle")
|
|
347
|
-
>>> proxies = {"http": url, "https": url}
|
|
348
|
-
>>> requests.get("https://example.com", proxies=proxies)
|
|
349
|
-
"""
|
|
350
279
|
config = ProxyConfig(
|
|
351
280
|
username=username,
|
|
352
281
|
password=password,
|
|
@@ -362,7 +291,7 @@ class ThordataClient:
|
|
|
362
291
|
return config.build_proxy_url()
|
|
363
292
|
|
|
364
293
|
# =========================================================================
|
|
365
|
-
# Internal
|
|
294
|
+
# Internal Request Helpers
|
|
366
295
|
# =========================================================================
|
|
367
296
|
def _api_request_with_retry(
|
|
368
297
|
self,
|
|
@@ -373,8 +302,6 @@ class ThordataClient:
|
|
|
373
302
|
headers: Optional[Dict[str, str]] = None,
|
|
374
303
|
params: Optional[Dict[str, Any]] = None,
|
|
375
304
|
) -> requests.Response:
|
|
376
|
-
"""Make an API request with automatic retry on transient failures."""
|
|
377
|
-
|
|
378
305
|
@with_retry(self._retry_config)
|
|
379
306
|
def _do_request() -> requests.Response:
|
|
380
307
|
return self._api_session.request(
|
|
@@ -397,8 +324,83 @@ class ThordataClient:
|
|
|
397
324
|
f"API request failed: {e}", original_error=e
|
|
398
325
|
) from e
|
|
399
326
|
|
|
327
|
+
def _get_proxy_manager(self, proxy_url: str) -> urllib3.ProxyManager:
|
|
328
|
+
"""Get or create a ProxyManager for the given proxy URL (Pooled)."""
|
|
329
|
+
if proxy_url not in self._proxy_managers:
|
|
330
|
+
# Create a new manager if not cached
|
|
331
|
+
proxy_ssl_context = None
|
|
332
|
+
if proxy_url.startswith("https://"):
|
|
333
|
+
proxy_ssl_context = ssl.create_default_context()
|
|
334
|
+
|
|
335
|
+
self._proxy_managers[proxy_url] = urllib3.ProxyManager(
|
|
336
|
+
proxy_url,
|
|
337
|
+
proxy_ssl_context=proxy_ssl_context,
|
|
338
|
+
num_pools=10, # Allow concurrency
|
|
339
|
+
maxsize=10,
|
|
340
|
+
)
|
|
341
|
+
return self._proxy_managers[proxy_url]
|
|
342
|
+
|
|
343
|
+
def _proxy_request_with_proxy_manager(
|
|
344
|
+
self,
|
|
345
|
+
method: str,
|
|
346
|
+
url: str,
|
|
347
|
+
*,
|
|
348
|
+
proxy_config: ProxyConfig,
|
|
349
|
+
timeout: int,
|
|
350
|
+
headers: Optional[Dict[str, str]] = None,
|
|
351
|
+
params: Optional[Dict[str, Any]] = None,
|
|
352
|
+
data: Any = None,
|
|
353
|
+
) -> requests.Response:
|
|
354
|
+
# 1. Prepare URL and Body
|
|
355
|
+
req = requests.Request(method=method.upper(), url=url, params=params)
|
|
356
|
+
prepped = self._proxy_session.prepare_request(req)
|
|
357
|
+
final_url = prepped.url or url
|
|
358
|
+
|
|
359
|
+
# 2. Get Proxy Configuration
|
|
360
|
+
proxy_url = proxy_config.build_proxy_endpoint()
|
|
361
|
+
proxy_headers = urllib3.make_headers(
|
|
362
|
+
proxy_basic_auth=proxy_config.build_proxy_basic_auth()
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
# 3. Get Cached Proxy Manager
|
|
366
|
+
pm = self._get_proxy_manager(proxy_url)
|
|
367
|
+
|
|
368
|
+
# 4. Prepare Request Headers/Body
|
|
369
|
+
req_headers = dict(headers or {})
|
|
370
|
+
body = None
|
|
371
|
+
if data is not None:
|
|
372
|
+
if isinstance(data, dict):
|
|
373
|
+
body = urlencode({k: str(v) for k, v in data.items()})
|
|
374
|
+
req_headers.setdefault(
|
|
375
|
+
"Content-Type", "application/x-www-form-urlencoded"
|
|
376
|
+
)
|
|
377
|
+
else:
|
|
378
|
+
body = data
|
|
379
|
+
|
|
380
|
+
# 5. Execute Request via urllib3
|
|
381
|
+
http_resp = pm.request(
|
|
382
|
+
method.upper(),
|
|
383
|
+
final_url,
|
|
384
|
+
body=body,
|
|
385
|
+
headers=req_headers or None,
|
|
386
|
+
proxy_headers=proxy_headers, # Attach Auth here
|
|
387
|
+
timeout=urllib3.Timeout(connect=timeout, read=timeout),
|
|
388
|
+
retries=False, # We handle retries in _proxy_verb
|
|
389
|
+
preload_content=True,
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
# 6. Convert back to requests.Response
|
|
393
|
+
r = requests.Response()
|
|
394
|
+
r.status_code = int(getattr(http_resp, "status", 0) or 0)
|
|
395
|
+
r._content = http_resp.data or b""
|
|
396
|
+
r.url = final_url
|
|
397
|
+
r.headers = requests.structures.CaseInsensitiveDict(
|
|
398
|
+
dict(http_resp.headers or {})
|
|
399
|
+
)
|
|
400
|
+
return r
|
|
401
|
+
|
|
400
402
|
# =========================================================================
|
|
401
|
-
# SERP API Methods
|
|
403
|
+
# SERP API Methods
|
|
402
404
|
# =========================================================================
|
|
403
405
|
def serp_search(
|
|
404
406
|
self,
|
|
@@ -415,46 +417,8 @@ class ThordataClient:
|
|
|
415
417
|
output_format: str = "json",
|
|
416
418
|
**kwargs: Any,
|
|
417
419
|
) -> Dict[str, Any]:
|
|
418
|
-
"""
|
|
419
|
-
Execute a real-time SERP (Search Engine Results Page) search.
|
|
420
|
-
|
|
421
|
-
Args:
|
|
422
|
-
query: The search keywords.
|
|
423
|
-
engine: Search engine (google, bing, yandex, duckduckgo, baidu).
|
|
424
|
-
num: Number of results to retrieve (default: 10).
|
|
425
|
-
country: Country code for localized results (e.g., 'us').
|
|
426
|
-
language: Language code for interface (e.g., 'en').
|
|
427
|
-
search_type: Type of search (images, news, shopping, videos, etc.).
|
|
428
|
-
device: Device type ('desktop', 'mobile', 'tablet').
|
|
429
|
-
render_js: Enable JavaScript rendering in SERP (render_js=True).
|
|
430
|
-
no_cache: Disable internal caching (no_cache=True).
|
|
431
|
-
output_format: 'json' to return parsed JSON (default),
|
|
432
|
-
'html' to return HTML wrapped in {'html': ...}.
|
|
433
|
-
**kwargs: Additional engine-specific parameters.
|
|
434
|
-
|
|
435
|
-
Returns:
|
|
436
|
-
Dict[str, Any]: Parsed JSON results or a dict with 'html' key.
|
|
437
|
-
|
|
438
|
-
Example:
|
|
439
|
-
>>> # Basic search
|
|
440
|
-
>>> results = client.serp_search("python tutorial")
|
|
441
|
-
>>>
|
|
442
|
-
>>> # With options
|
|
443
|
-
>>> results = client.serp_search(
|
|
444
|
-
... "laptop reviews",
|
|
445
|
-
... engine="google",
|
|
446
|
-
... num=20,
|
|
447
|
-
... country="us",
|
|
448
|
-
... search_type="shopping",
|
|
449
|
-
... device="mobile",
|
|
450
|
-
... render_js=True,
|
|
451
|
-
... no_cache=True,
|
|
452
|
-
... )
|
|
453
|
-
"""
|
|
454
|
-
# Normalize engine
|
|
455
420
|
engine_str = engine.value if isinstance(engine, Engine) else engine.lower()
|
|
456
421
|
|
|
457
|
-
# Build request using model
|
|
458
422
|
request = SerpRequest(
|
|
459
423
|
query=query,
|
|
460
424
|
engine=engine_str,
|
|
@@ -469,84 +433,13 @@ class ThordataClient:
|
|
|
469
433
|
extra_params=kwargs,
|
|
470
434
|
)
|
|
471
435
|
|
|
472
|
-
|
|
473
|
-
headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
|
|
474
|
-
|
|
475
|
-
logger.info(
|
|
476
|
-
f"SERP Search: {engine_str} - {query[:50]}{'...' if len(query) > 50 else ''}"
|
|
477
|
-
)
|
|
478
|
-
|
|
479
|
-
try:
|
|
480
|
-
response = self._api_request_with_retry(
|
|
481
|
-
"POST",
|
|
482
|
-
self._serp_url,
|
|
483
|
-
data=payload,
|
|
484
|
-
headers=headers,
|
|
485
|
-
)
|
|
486
|
-
response.raise_for_status()
|
|
487
|
-
|
|
488
|
-
# JSON mode (default)
|
|
489
|
-
if output_format.lower() == "json":
|
|
490
|
-
data = response.json()
|
|
491
|
-
|
|
492
|
-
if isinstance(data, dict):
|
|
493
|
-
code = data.get("code")
|
|
494
|
-
if code is not None and code != 200:
|
|
495
|
-
msg = extract_error_message(data)
|
|
496
|
-
raise_for_code(
|
|
497
|
-
f"SERP API Error: {msg}",
|
|
498
|
-
code=code,
|
|
499
|
-
payload=data,
|
|
500
|
-
)
|
|
501
|
-
|
|
502
|
-
return parse_json_response(data)
|
|
503
|
-
|
|
504
|
-
# HTML mode: wrap as dict to keep return type stable
|
|
505
|
-
return {"html": response.text}
|
|
506
|
-
|
|
507
|
-
except requests.Timeout as e:
|
|
508
|
-
raise ThordataTimeoutError(
|
|
509
|
-
f"SERP request timed out: {e}",
|
|
510
|
-
original_error=e,
|
|
511
|
-
) from e
|
|
512
|
-
except requests.RequestException as e:
|
|
513
|
-
raise ThordataNetworkError(
|
|
514
|
-
f"SERP request failed: {e}",
|
|
515
|
-
original_error=e,
|
|
516
|
-
) from e
|
|
436
|
+
return self.serp_search_advanced(request)
|
|
517
437
|
|
|
518
438
|
def serp_search_advanced(self, request: SerpRequest) -> Dict[str, Any]:
|
|
519
|
-
"""
|
|
520
|
-
Execute a SERP search using a SerpRequest object.
|
|
521
|
-
|
|
522
|
-
This method provides full control over all search parameters.
|
|
523
|
-
|
|
524
|
-
Args:
|
|
525
|
-
request: A SerpRequest object with all parameters configured.
|
|
526
|
-
|
|
527
|
-
Returns:
|
|
528
|
-
Dict[str, Any]: Parsed JSON results or dict with 'html' key.
|
|
529
|
-
|
|
530
|
-
Example:
|
|
531
|
-
>>> from thordata.models import SerpRequest
|
|
532
|
-
>>> request = SerpRequest(
|
|
533
|
-
... query="python programming",
|
|
534
|
-
... engine="google",
|
|
535
|
-
... num=50,
|
|
536
|
-
... country="us",
|
|
537
|
-
... language="en",
|
|
538
|
-
... search_type="news",
|
|
539
|
-
... time_filter="week",
|
|
540
|
-
... safe_search=True
|
|
541
|
-
... )
|
|
542
|
-
>>> results = client.serp_search_advanced(request)
|
|
543
|
-
"""
|
|
544
439
|
payload = request.to_payload()
|
|
545
440
|
headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
|
|
546
441
|
|
|
547
|
-
logger.info(
|
|
548
|
-
f"SERP Advanced Search: {request.engine} - {request.query[:50]}{'...' if len(request.query) > 50 else ''}"
|
|
549
|
-
)
|
|
442
|
+
logger.info(f"SERP Advanced Search: {request.engine} - {request.query[:50]}")
|
|
550
443
|
|
|
551
444
|
try:
|
|
552
445
|
response = self._api_request_with_retry(
|
|
@@ -559,34 +452,22 @@ class ThordataClient:
|
|
|
559
452
|
|
|
560
453
|
if request.output_format.lower() == "json":
|
|
561
454
|
data = response.json()
|
|
562
|
-
|
|
563
455
|
if isinstance(data, dict):
|
|
564
456
|
code = data.get("code")
|
|
565
457
|
if code is not None and code != 200:
|
|
566
458
|
msg = extract_error_message(data)
|
|
567
|
-
raise_for_code(
|
|
568
|
-
f"SERP API Error: {msg}",
|
|
569
|
-
code=code,
|
|
570
|
-
payload=data,
|
|
571
|
-
)
|
|
572
|
-
|
|
459
|
+
raise_for_code(f"SERP Error: {msg}", code=code, payload=data)
|
|
573
460
|
return parse_json_response(data)
|
|
574
461
|
|
|
575
462
|
return {"html": response.text}
|
|
576
463
|
|
|
577
464
|
except requests.Timeout as e:
|
|
578
|
-
raise ThordataTimeoutError(
|
|
579
|
-
f"SERP request timed out: {e}",
|
|
580
|
-
original_error=e,
|
|
581
|
-
) from e
|
|
465
|
+
raise ThordataTimeoutError(f"SERP timeout: {e}", original_error=e) from e
|
|
582
466
|
except requests.RequestException as e:
|
|
583
|
-
raise ThordataNetworkError(
|
|
584
|
-
f"SERP request failed: {e}",
|
|
585
|
-
original_error=e,
|
|
586
|
-
) from e
|
|
467
|
+
raise ThordataNetworkError(f"SERP failed: {e}", original_error=e) from e
|
|
587
468
|
|
|
588
469
|
# =========================================================================
|
|
589
|
-
# Universal Scraping API
|
|
470
|
+
# Universal Scraping API
|
|
590
471
|
# =========================================================================
|
|
591
472
|
def universal_scrape(
|
|
592
473
|
self,
|
|
@@ -600,37 +481,6 @@ class ThordataClient:
|
|
|
600
481
|
wait_for: Optional[str] = None,
|
|
601
482
|
**kwargs: Any,
|
|
602
483
|
) -> Union[str, bytes]:
|
|
603
|
-
"""
|
|
604
|
-
Scrape a URL using the Universal Scraping API (Web Unlocker).
|
|
605
|
-
|
|
606
|
-
Automatically bypasses Cloudflare, CAPTCHAs, and antibot systems.
|
|
607
|
-
|
|
608
|
-
Args:
|
|
609
|
-
url: Target URL.
|
|
610
|
-
js_render: Enable JavaScript rendering (headless browser).
|
|
611
|
-
output_format: "html" or "png" (screenshot).
|
|
612
|
-
country: Geo-targeting country code.
|
|
613
|
-
block_resources: Resources to block (e.g., 'script,image').
|
|
614
|
-
wait: Wait time in milliseconds after page load.
|
|
615
|
-
wait_for: CSS selector to wait for.
|
|
616
|
-
**kwargs: Additional parameters.
|
|
617
|
-
|
|
618
|
-
Returns:
|
|
619
|
-
HTML string or PNG bytes depending on output_format.
|
|
620
|
-
|
|
621
|
-
Example:
|
|
622
|
-
>>> # Get HTML
|
|
623
|
-
>>> html = client.universal_scrape("https://example.com", js_render=True)
|
|
624
|
-
>>>
|
|
625
|
-
>>> # Get screenshot
|
|
626
|
-
>>> png = client.universal_scrape(
|
|
627
|
-
... "https://example.com",
|
|
628
|
-
... js_render=True,
|
|
629
|
-
... output_format="png"
|
|
630
|
-
... )
|
|
631
|
-
>>> with open("screenshot.png", "wb") as f:
|
|
632
|
-
... f.write(png)
|
|
633
|
-
"""
|
|
634
484
|
request = UniversalScrapeRequest(
|
|
635
485
|
url=url,
|
|
636
486
|
js_render=js_render,
|
|
@@ -641,27 +491,15 @@ class ThordataClient:
|
|
|
641
491
|
wait_for=wait_for,
|
|
642
492
|
extra_params=kwargs,
|
|
643
493
|
)
|
|
644
|
-
|
|
645
494
|
return self.universal_scrape_advanced(request)
|
|
646
495
|
|
|
647
496
|
def universal_scrape_advanced(
|
|
648
497
|
self, request: UniversalScrapeRequest
|
|
649
498
|
) -> Union[str, bytes]:
|
|
650
|
-
"""
|
|
651
|
-
Scrape using a UniversalScrapeRequest object for full control.
|
|
652
|
-
|
|
653
|
-
Args:
|
|
654
|
-
request: A UniversalScrapeRequest with all parameters.
|
|
655
|
-
|
|
656
|
-
Returns:
|
|
657
|
-
HTML string or PNG bytes.
|
|
658
|
-
"""
|
|
659
499
|
payload = request.to_payload()
|
|
660
500
|
headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
|
|
661
501
|
|
|
662
|
-
logger.info(
|
|
663
|
-
f"Universal Scrape: {request.url} (format: {request.output_format})"
|
|
664
|
-
)
|
|
502
|
+
logger.info(f"Universal Scrape: {request.url}")
|
|
665
503
|
|
|
666
504
|
try:
|
|
667
505
|
response = self._api_request_with_retry(
|
|
@@ -671,53 +509,40 @@ class ThordataClient:
|
|
|
671
509
|
headers=headers,
|
|
672
510
|
)
|
|
673
511
|
response.raise_for_status()
|
|
674
|
-
|
|
675
512
|
return self._process_universal_response(response, request.output_format)
|
|
676
513
|
|
|
677
514
|
except requests.Timeout as e:
|
|
678
515
|
raise ThordataTimeoutError(
|
|
679
|
-
f"Universal
|
|
516
|
+
f"Universal timeout: {e}", original_error=e
|
|
680
517
|
) from e
|
|
681
518
|
except requests.RequestException as e:
|
|
682
519
|
raise ThordataNetworkError(
|
|
683
|
-
f"Universal
|
|
520
|
+
f"Universal failed: {e}", original_error=e
|
|
684
521
|
) from e
|
|
685
522
|
|
|
686
523
|
def _process_universal_response(
|
|
687
524
|
self, response: requests.Response, output_format: str
|
|
688
525
|
) -> Union[str, bytes]:
|
|
689
|
-
"""Process the response from Universal API."""
|
|
690
|
-
# Try to parse as JSON
|
|
691
526
|
try:
|
|
692
527
|
resp_json = response.json()
|
|
693
528
|
except ValueError:
|
|
694
|
-
|
|
695
|
-
if output_format.lower() == "png":
|
|
696
|
-
return response.content
|
|
697
|
-
return response.text
|
|
529
|
+
return response.content if output_format.lower() == "png" else response.text
|
|
698
530
|
|
|
699
|
-
# Check for API-level errors
|
|
700
531
|
if isinstance(resp_json, dict):
|
|
701
532
|
code = resp_json.get("code")
|
|
702
533
|
if code is not None and code != 200:
|
|
703
534
|
msg = extract_error_message(resp_json)
|
|
704
|
-
raise_for_code(
|
|
705
|
-
f"Universal API Error: {msg}", code=code, payload=resp_json
|
|
706
|
-
)
|
|
535
|
+
raise_for_code(f"Universal Error: {msg}", code=code, payload=resp_json)
|
|
707
536
|
|
|
708
|
-
# Extract HTML
|
|
709
537
|
if "html" in resp_json:
|
|
710
538
|
return resp_json["html"]
|
|
711
|
-
|
|
712
|
-
# Extract PNG
|
|
713
539
|
if "png" in resp_json:
|
|
714
540
|
return decode_base64_image(resp_json["png"])
|
|
715
541
|
|
|
716
|
-
# Fallback
|
|
717
542
|
return str(resp_json)
|
|
718
543
|
|
|
719
544
|
# =========================================================================
|
|
720
|
-
# Web Scraper API
|
|
545
|
+
# Web Scraper API (Tasks)
|
|
721
546
|
# =========================================================================
|
|
722
547
|
def create_scraper_task(
|
|
723
548
|
self,
|
|
@@ -727,29 +552,6 @@ class ThordataClient:
|
|
|
727
552
|
parameters: Dict[str, Any],
|
|
728
553
|
universal_params: Optional[Dict[str, Any]] = None,
|
|
729
554
|
) -> str:
|
|
730
|
-
"""
|
|
731
|
-
Create an asynchronous Web Scraper task.
|
|
732
|
-
|
|
733
|
-
Note: Get spider_id and spider_name from the Thordata Dashboard.
|
|
734
|
-
|
|
735
|
-
Args:
|
|
736
|
-
file_name: Name for the output file.
|
|
737
|
-
spider_id: Spider identifier from Dashboard.
|
|
738
|
-
spider_name: Spider name (e.g., "youtube.com").
|
|
739
|
-
parameters: Spider-specific parameters.
|
|
740
|
-
universal_params: Global spider settings.
|
|
741
|
-
|
|
742
|
-
Returns:
|
|
743
|
-
The created task_id.
|
|
744
|
-
|
|
745
|
-
Example:
|
|
746
|
-
>>> task_id = client.create_scraper_task(
|
|
747
|
-
... file_name="youtube_data",
|
|
748
|
-
... spider_id="youtube_video-post_by-url",
|
|
749
|
-
... spider_name="youtube.com",
|
|
750
|
-
... parameters={"url": "https://youtube.com/@channel/videos"}
|
|
751
|
-
... )
|
|
752
|
-
"""
|
|
753
555
|
config = ScraperTaskConfig(
|
|
754
556
|
file_name=file_name,
|
|
755
557
|
spider_id=spider_id,
|
|
@@ -757,50 +559,26 @@ class ThordataClient:
|
|
|
757
559
|
parameters=parameters,
|
|
758
560
|
universal_params=universal_params,
|
|
759
561
|
)
|
|
760
|
-
|
|
761
562
|
return self.create_scraper_task_advanced(config)
|
|
762
563
|
|
|
763
564
|
def create_scraper_task_advanced(self, config: ScraperTaskConfig) -> str:
|
|
764
|
-
"""
|
|
765
|
-
Create a scraper task using a ScraperTaskConfig object.
|
|
766
|
-
|
|
767
|
-
Args:
|
|
768
|
-
config: Task configuration.
|
|
769
|
-
|
|
770
|
-
Returns:
|
|
771
|
-
The created task_id.
|
|
772
|
-
"""
|
|
773
565
|
self._require_public_credentials()
|
|
774
|
-
|
|
775
566
|
payload = config.to_payload()
|
|
776
|
-
|
|
777
|
-
# Builder needs 3 headers: token, key, Authorization Bearer
|
|
778
567
|
headers = build_builder_headers(
|
|
779
|
-
self.scraper_token,
|
|
780
|
-
self.public_token or "",
|
|
781
|
-
self.public_key or "",
|
|
568
|
+
self.scraper_token, self.public_token or "", self.public_key or ""
|
|
782
569
|
)
|
|
783
570
|
|
|
784
|
-
logger.info(f"Creating Scraper Task: {config.spider_name}")
|
|
785
|
-
|
|
786
571
|
try:
|
|
787
572
|
response = self._api_request_with_retry(
|
|
788
|
-
"POST",
|
|
789
|
-
self._builder_url,
|
|
790
|
-
data=payload,
|
|
791
|
-
headers=headers,
|
|
573
|
+
"POST", self._builder_url, data=payload, headers=headers
|
|
792
574
|
)
|
|
793
575
|
response.raise_for_status()
|
|
794
|
-
|
|
795
576
|
data = response.json()
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
raise_for_code(f"Task creation failed: {msg}", code=code, payload=data)
|
|
801
|
-
|
|
577
|
+
if data.get("code") != 200:
|
|
578
|
+
raise_for_code(
|
|
579
|
+
"Task creation failed", code=data.get("code"), payload=data
|
|
580
|
+
)
|
|
802
581
|
return data["data"]["task_id"]
|
|
803
|
-
|
|
804
582
|
except requests.RequestException as e:
|
|
805
583
|
raise ThordataNetworkError(
|
|
806
584
|
f"Task creation failed: {e}", original_error=e
|
|
@@ -814,35 +592,6 @@ class ThordataClient:
|
|
|
814
592
|
parameters: Dict[str, Any],
|
|
815
593
|
common_settings: "CommonSettings",
|
|
816
594
|
) -> str:
|
|
817
|
-
"""
|
|
818
|
-
Create a YouTube video/audio download task.
|
|
819
|
-
|
|
820
|
-
Uses the /video_builder endpoint.
|
|
821
|
-
|
|
822
|
-
Args:
|
|
823
|
-
file_name: Output file name. Supports {{TasksID}}, {{VideoID}}.
|
|
824
|
-
spider_id: Spider identifier (e.g., "youtube_video_by-url").
|
|
825
|
-
spider_name: Spider name (typically "youtube.com").
|
|
826
|
-
parameters: Spider parameters (e.g., {"url": "..."}).
|
|
827
|
-
common_settings: Video/audio settings.
|
|
828
|
-
|
|
829
|
-
Returns:
|
|
830
|
-
The created task_id.
|
|
831
|
-
|
|
832
|
-
Example:
|
|
833
|
-
>>> from thordata import CommonSettings
|
|
834
|
-
>>> task_id = client.create_video_task(
|
|
835
|
-
... file_name="{{VideoID}}",
|
|
836
|
-
... spider_id="youtube_video_by-url",
|
|
837
|
-
... spider_name="youtube.com",
|
|
838
|
-
... parameters={"url": "https://youtube.com/watch?v=xxx"},
|
|
839
|
-
... common_settings=CommonSettings(
|
|
840
|
-
... resolution="1080p",
|
|
841
|
-
... is_subtitles="true"
|
|
842
|
-
... )
|
|
843
|
-
... )
|
|
844
|
-
"""
|
|
845
|
-
|
|
846
595
|
config = VideoTaskConfig(
|
|
847
596
|
file_name=file_name,
|
|
848
597
|
spider_id=spider_id,
|
|
@@ -850,210 +599,97 @@ class ThordataClient:
|
|
|
850
599
|
parameters=parameters,
|
|
851
600
|
common_settings=common_settings,
|
|
852
601
|
)
|
|
853
|
-
|
|
854
602
|
return self.create_video_task_advanced(config)
|
|
855
603
|
|
|
856
604
|
def create_video_task_advanced(self, config: VideoTaskConfig) -> str:
|
|
857
|
-
"""
|
|
858
|
-
Create a video task using VideoTaskConfig object.
|
|
859
|
-
|
|
860
|
-
Args:
|
|
861
|
-
config: Video task configuration.
|
|
862
|
-
|
|
863
|
-
Returns:
|
|
864
|
-
The created task_id.
|
|
865
|
-
"""
|
|
866
|
-
|
|
867
605
|
self._require_public_credentials()
|
|
868
|
-
|
|
869
606
|
payload = config.to_payload()
|
|
870
607
|
headers = build_builder_headers(
|
|
871
|
-
self.scraper_token,
|
|
872
|
-
self.public_token or "",
|
|
873
|
-
self.public_key or "",
|
|
608
|
+
self.scraper_token, self.public_token or "", self.public_key or ""
|
|
874
609
|
)
|
|
875
610
|
|
|
876
|
-
logger.info(f"Creating Video Task: {config.spider_name} - {config.spider_id}")
|
|
877
|
-
|
|
878
611
|
response = self._api_request_with_retry(
|
|
879
|
-
"POST",
|
|
880
|
-
self._video_builder_url,
|
|
881
|
-
data=payload,
|
|
882
|
-
headers=headers,
|
|
612
|
+
"POST", self._video_builder_url, data=payload, headers=headers
|
|
883
613
|
)
|
|
884
614
|
response.raise_for_status()
|
|
885
|
-
|
|
886
615
|
data = response.json()
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
if code != 200:
|
|
890
|
-
msg = extract_error_message(data)
|
|
616
|
+
if data.get("code") != 200:
|
|
891
617
|
raise_for_code(
|
|
892
|
-
|
|
618
|
+
"Video task creation failed", code=data.get("code"), payload=data
|
|
893
619
|
)
|
|
894
|
-
|
|
895
620
|
return data["data"]["task_id"]
|
|
896
621
|
|
|
897
622
|
def get_task_status(self, task_id: str) -> str:
|
|
898
|
-
"""
|
|
899
|
-
Check the status of an asynchronous scraping task.
|
|
900
|
-
|
|
901
|
-
Returns:
|
|
902
|
-
Status string (e.g., "running", "ready", "failed").
|
|
903
|
-
|
|
904
|
-
Raises:
|
|
905
|
-
ThordataConfigError: If public credentials are missing.
|
|
906
|
-
ThordataAPIError: If API returns a non-200 code in JSON payload.
|
|
907
|
-
ThordataNetworkError: If network/HTTP request fails.
|
|
908
|
-
"""
|
|
909
623
|
self._require_public_credentials()
|
|
910
|
-
|
|
911
624
|
headers = build_public_api_headers(
|
|
912
625
|
self.public_token or "", self.public_key or ""
|
|
913
626
|
)
|
|
914
|
-
payload = {"tasks_ids": task_id}
|
|
915
|
-
|
|
916
627
|
try:
|
|
917
628
|
response = self._api_request_with_retry(
|
|
918
629
|
"POST",
|
|
919
630
|
self._status_url,
|
|
920
|
-
data=
|
|
631
|
+
data={"tasks_ids": task_id},
|
|
921
632
|
headers=headers,
|
|
922
633
|
)
|
|
923
634
|
response.raise_for_status()
|
|
924
635
|
data = response.json()
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
payload=data,
|
|
934
|
-
)
|
|
935
|
-
|
|
936
|
-
items = data.get("data") or []
|
|
937
|
-
for item in items:
|
|
938
|
-
if str(item.get("task_id")) == str(task_id):
|
|
939
|
-
return item.get("status", "unknown")
|
|
940
|
-
|
|
941
|
-
return "unknown"
|
|
942
|
-
|
|
943
|
-
# Unexpected payload type
|
|
944
|
-
raise ThordataNetworkError(
|
|
945
|
-
f"Unexpected task status response type: {type(data).__name__}",
|
|
946
|
-
original_error=None,
|
|
947
|
-
)
|
|
948
|
-
|
|
949
|
-
except requests.Timeout as e:
|
|
950
|
-
raise ThordataTimeoutError(
|
|
951
|
-
f"Status check timed out: {e}", original_error=e
|
|
952
|
-
) from e
|
|
636
|
+
if data.get("code") != 200:
|
|
637
|
+
raise_for_code("Task status error", code=data.get("code"), payload=data)
|
|
638
|
+
|
|
639
|
+
items = data.get("data") or []
|
|
640
|
+
for item in items:
|
|
641
|
+
if str(item.get("task_id")) == str(task_id):
|
|
642
|
+
return item.get("status", "unknown")
|
|
643
|
+
return "unknown"
|
|
953
644
|
except requests.RequestException as e:
|
|
954
645
|
raise ThordataNetworkError(
|
|
955
646
|
f"Status check failed: {e}", original_error=e
|
|
956
647
|
) from e
|
|
957
648
|
|
|
958
649
|
def safe_get_task_status(self, task_id: str) -> str:
|
|
959
|
-
"""
|
|
960
|
-
Backward-compatible status check.
|
|
961
|
-
|
|
962
|
-
Returns:
|
|
963
|
-
Status string, or "error" on any exception.
|
|
964
|
-
"""
|
|
965
650
|
try:
|
|
966
651
|
return self.get_task_status(task_id)
|
|
967
652
|
except Exception:
|
|
968
653
|
return "error"
|
|
969
654
|
|
|
970
655
|
def get_task_result(self, task_id: str, file_type: str = "json") -> str:
|
|
971
|
-
"""
|
|
972
|
-
Get the download URL for a completed task.
|
|
973
|
-
"""
|
|
974
656
|
self._require_public_credentials()
|
|
975
|
-
|
|
976
657
|
headers = build_public_api_headers(
|
|
977
658
|
self.public_token or "", self.public_key or ""
|
|
978
659
|
)
|
|
979
|
-
payload = {"tasks_id": task_id, "type": file_type}
|
|
980
|
-
|
|
981
|
-
logger.info(f"Getting result URL for Task: {task_id}")
|
|
982
|
-
|
|
983
660
|
try:
|
|
984
661
|
response = self._api_request_with_retry(
|
|
985
662
|
"POST",
|
|
986
663
|
self._download_url,
|
|
987
|
-
data=
|
|
664
|
+
data={"tasks_id": task_id, "type": file_type},
|
|
988
665
|
headers=headers,
|
|
989
666
|
)
|
|
990
667
|
response.raise_for_status()
|
|
991
|
-
|
|
992
668
|
data = response.json()
|
|
993
|
-
code
|
|
994
|
-
|
|
995
|
-
if code == 200 and data.get("data"):
|
|
669
|
+
if data.get("code") == 200 and data.get("data"):
|
|
996
670
|
return data["data"]["download"]
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
raise_for_code(f"Get result failed: {msg}", code=code, payload=data)
|
|
1000
|
-
# This line won't be reached, but satisfies mypy
|
|
1001
|
-
raise RuntimeError("Unexpected state")
|
|
1002
|
-
|
|
671
|
+
raise_for_code("Get result failed", code=data.get("code"), payload=data)
|
|
672
|
+
return ""
|
|
1003
673
|
except requests.RequestException as e:
|
|
1004
674
|
raise ThordataNetworkError(
|
|
1005
675
|
f"Get result failed: {e}", original_error=e
|
|
1006
676
|
) from e
|
|
1007
677
|
|
|
1008
|
-
def list_tasks(
|
|
1009
|
-
self,
|
|
1010
|
-
page: int = 1,
|
|
1011
|
-
size: int = 20,
|
|
1012
|
-
) -> Dict[str, Any]:
|
|
1013
|
-
"""
|
|
1014
|
-
List all Web Scraper tasks.
|
|
1015
|
-
|
|
1016
|
-
Args:
|
|
1017
|
-
page: Page number (starts from 1).
|
|
1018
|
-
size: Number of tasks per page.
|
|
1019
|
-
|
|
1020
|
-
Returns:
|
|
1021
|
-
Dict containing 'count' and 'list' of tasks.
|
|
1022
|
-
|
|
1023
|
-
Example:
|
|
1024
|
-
>>> result = client.list_tasks(page=1, size=10)
|
|
1025
|
-
>>> print(f"Total tasks: {result['count']}")
|
|
1026
|
-
>>> for task in result['list']:
|
|
1027
|
-
... print(f"Task {task['task_id']}: {task['status']}")
|
|
1028
|
-
"""
|
|
678
|
+
def list_tasks(self, page: int = 1, size: int = 20) -> Dict[str, Any]:
|
|
1029
679
|
self._require_public_credentials()
|
|
1030
|
-
|
|
1031
680
|
headers = build_public_api_headers(
|
|
1032
681
|
self.public_token or "", self.public_key or ""
|
|
1033
682
|
)
|
|
1034
|
-
payload: Dict[str, Any] = {}
|
|
1035
|
-
if page:
|
|
1036
|
-
payload["page"] = str(page)
|
|
1037
|
-
if size:
|
|
1038
|
-
payload["size"] = str(size)
|
|
1039
|
-
|
|
1040
|
-
logger.info(f"Listing tasks: page={page}, size={size}")
|
|
1041
|
-
|
|
1042
683
|
response = self._api_request_with_retry(
|
|
1043
684
|
"POST",
|
|
1044
685
|
self._list_url,
|
|
1045
|
-
data=
|
|
686
|
+
data={"page": str(page), "size": str(size)},
|
|
1046
687
|
headers=headers,
|
|
1047
688
|
)
|
|
1048
689
|
response.raise_for_status()
|
|
1049
|
-
|
|
1050
690
|
data = response.json()
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
if code != 200:
|
|
1054
|
-
msg = extract_error_message(data)
|
|
1055
|
-
raise_for_code(f"List tasks failed: {msg}", code=code, payload=data)
|
|
1056
|
-
|
|
691
|
+
if data.get("code") != 200:
|
|
692
|
+
raise_for_code("List tasks failed", code=data.get("code"), payload=data)
|
|
1057
693
|
return data.get("data", {"count": 0, "list": []})
|
|
1058
694
|
|
|
1059
695
|
def wait_for_task(
|
|
@@ -1063,84 +699,32 @@ class ThordataClient:
|
|
|
1063
699
|
poll_interval: float = 5.0,
|
|
1064
700
|
max_wait: float = 600.0,
|
|
1065
701
|
) -> str:
|
|
1066
|
-
"""
|
|
1067
|
-
Wait for a task to complete.
|
|
1068
|
-
|
|
1069
|
-
Args:
|
|
1070
|
-
task_id: The task ID to wait for.
|
|
1071
|
-
poll_interval: Seconds between status checks.
|
|
1072
|
-
max_wait: Maximum seconds to wait.
|
|
1073
|
-
|
|
1074
|
-
Returns:
|
|
1075
|
-
Final task status.
|
|
1076
|
-
|
|
1077
|
-
Raises:
|
|
1078
|
-
TimeoutError: If max_wait is exceeded.
|
|
1079
|
-
|
|
1080
|
-
Example:
|
|
1081
|
-
>>> task_id = client.create_scraper_task(...)
|
|
1082
|
-
>>> status = client.wait_for_task(task_id, max_wait=300)
|
|
1083
|
-
>>> if status in ("ready", "success"):
|
|
1084
|
-
... url = client.get_task_result(task_id)
|
|
1085
|
-
"""
|
|
1086
702
|
import time
|
|
1087
703
|
|
|
1088
704
|
start = time.monotonic()
|
|
1089
|
-
|
|
1090
705
|
while (time.monotonic() - start) < max_wait:
|
|
1091
706
|
status = self.get_task_status(task_id)
|
|
1092
|
-
|
|
1093
|
-
logger.debug(f"Task {task_id} status: {status}")
|
|
1094
|
-
|
|
1095
|
-
terminal_statuses = {
|
|
707
|
+
if status.lower() in {
|
|
1096
708
|
"ready",
|
|
1097
709
|
"success",
|
|
1098
710
|
"finished",
|
|
1099
711
|
"failed",
|
|
1100
712
|
"error",
|
|
1101
713
|
"cancelled",
|
|
1102
|
-
}
|
|
1103
|
-
|
|
1104
|
-
if status.lower() in terminal_statuses:
|
|
714
|
+
}:
|
|
1105
715
|
return status
|
|
1106
|
-
|
|
1107
716
|
time.sleep(poll_interval)
|
|
1108
|
-
|
|
1109
|
-
raise TimeoutError(f"Task {task_id} did not complete within {max_wait} seconds")
|
|
717
|
+
raise TimeoutError(f"Task {task_id} timeout")
|
|
1110
718
|
|
|
1111
719
|
# =========================================================================
|
|
1112
|
-
#
|
|
720
|
+
# Account / Locations / Utils
|
|
1113
721
|
# =========================================================================
|
|
1114
722
|
def get_usage_statistics(
|
|
1115
723
|
self,
|
|
1116
724
|
from_date: Union[str, date],
|
|
1117
725
|
to_date: Union[str, date],
|
|
1118
726
|
) -> UsageStatistics:
|
|
1119
|
-
"""
|
|
1120
|
-
Get account usage statistics for a date range.
|
|
1121
|
-
|
|
1122
|
-
Args:
|
|
1123
|
-
from_date: Start date (YYYY-MM-DD string or date object).
|
|
1124
|
-
to_date: End date (YYYY-MM-DD string or date object).
|
|
1125
|
-
|
|
1126
|
-
Returns:
|
|
1127
|
-
UsageStatistics object with traffic data.
|
|
1128
|
-
|
|
1129
|
-
Raises:
|
|
1130
|
-
ValueError: If date range exceeds 180 days.
|
|
1131
|
-
|
|
1132
|
-
Example:
|
|
1133
|
-
>>> from datetime import date, timedelta
|
|
1134
|
-
>>> today = date.today()
|
|
1135
|
-
>>> week_ago = today - timedelta(days=7)
|
|
1136
|
-
>>> stats = client.get_usage_statistics(week_ago, today)
|
|
1137
|
-
>>> print(f"Used: {stats.range_usage_gb():.2f} GB")
|
|
1138
|
-
>>> print(f"Balance: {stats.balance_gb():.2f} GB")
|
|
1139
|
-
"""
|
|
1140
|
-
|
|
1141
727
|
self._require_public_credentials()
|
|
1142
|
-
|
|
1143
|
-
# Convert dates to strings
|
|
1144
728
|
if isinstance(from_date, date):
|
|
1145
729
|
from_date = from_date.strftime("%Y-%m-%d")
|
|
1146
730
|
if isinstance(to_date, date):
|
|
@@ -1152,185 +736,33 @@ class ThordataClient:
|
|
|
1152
736
|
"from_date": from_date,
|
|
1153
737
|
"to_date": to_date,
|
|
1154
738
|
}
|
|
1155
|
-
|
|
1156
|
-
logger.info(f"Getting usage statistics: {from_date} to {to_date}")
|
|
1157
|
-
|
|
1158
739
|
response = self._api_request_with_retry(
|
|
1159
|
-
"GET",
|
|
1160
|
-
self._usage_stats_url,
|
|
1161
|
-
params=params,
|
|
740
|
+
"GET", self._usage_stats_url, params=params
|
|
1162
741
|
)
|
|
1163
742
|
response.raise_for_status()
|
|
1164
|
-
|
|
1165
743
|
data = response.json()
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
if code is not None and code != 200:
|
|
1170
|
-
msg = extract_error_message(data)
|
|
1171
|
-
raise_for_code(
|
|
1172
|
-
f"Usage statistics error: {msg}",
|
|
1173
|
-
code=code,
|
|
1174
|
-
payload=data,
|
|
1175
|
-
)
|
|
1176
|
-
|
|
1177
|
-
# Extract data field
|
|
1178
|
-
usage_data = data.get("data", data)
|
|
1179
|
-
return UsageStatistics.from_dict(usage_data)
|
|
1180
|
-
|
|
1181
|
-
raise ThordataNetworkError(
|
|
1182
|
-
f"Unexpected usage statistics response: {type(data).__name__}",
|
|
1183
|
-
original_error=None,
|
|
1184
|
-
)
|
|
1185
|
-
|
|
1186
|
-
def get_residential_balance(self) -> Dict[str, Any]:
|
|
1187
|
-
"""
|
|
1188
|
-
Get residential proxy balance (Public API NEW).
|
|
1189
|
-
|
|
1190
|
-
Requires sign and apiKey credentials.
|
|
1191
|
-
|
|
1192
|
-
Returns:
|
|
1193
|
-
Dict with 'balance' (bytes) and 'expire_time' (timestamp).
|
|
1194
|
-
|
|
1195
|
-
Example:
|
|
1196
|
-
>>> result = client.get_residential_balance()
|
|
1197
|
-
>>> balance_gb = result['balance'] / (1024**3)
|
|
1198
|
-
>>> print(f"Balance: {balance_gb:.2f} GB")
|
|
1199
|
-
"""
|
|
1200
|
-
if not self.sign or not self.api_key:
|
|
1201
|
-
raise ThordataConfigError(
|
|
1202
|
-
"sign and api_key are required for Public API NEW. "
|
|
1203
|
-
"Set THORDATA_SIGN and THORDATA_API_KEY environment variables."
|
|
1204
|
-
)
|
|
1205
|
-
|
|
1206
|
-
headers = build_sign_headers(self.sign, self.api_key)
|
|
1207
|
-
|
|
1208
|
-
logger.info("Getting residential proxy balance (API NEW)")
|
|
1209
|
-
|
|
1210
|
-
response = self._api_request_with_retry(
|
|
1211
|
-
"POST",
|
|
1212
|
-
f"{self._gateway_base_url}/getFlowBalance",
|
|
1213
|
-
headers=headers,
|
|
1214
|
-
data={},
|
|
1215
|
-
)
|
|
1216
|
-
response.raise_for_status()
|
|
1217
|
-
|
|
1218
|
-
data = response.json()
|
|
1219
|
-
code = data.get("code")
|
|
1220
|
-
|
|
1221
|
-
if code != 200:
|
|
1222
|
-
msg = extract_error_message(data)
|
|
1223
|
-
raise_for_code(f"Get balance failed: {msg}", code=code, payload=data)
|
|
1224
|
-
|
|
1225
|
-
return data.get("data", {})
|
|
1226
|
-
|
|
1227
|
-
def get_residential_usage(
|
|
1228
|
-
self,
|
|
1229
|
-
start_time: Union[str, int],
|
|
1230
|
-
end_time: Union[str, int],
|
|
1231
|
-
) -> Dict[str, Any]:
|
|
1232
|
-
"""
|
|
1233
|
-
Get residential proxy usage records (Public API NEW).
|
|
1234
|
-
|
|
1235
|
-
Args:
|
|
1236
|
-
start_time: Start timestamp (Unix timestamp or YYYY-MM-DD HH:MM:SS).
|
|
1237
|
-
end_time: End timestamp (Unix timestamp or YYYY-MM-DD HH:MM:SS).
|
|
1238
|
-
|
|
1239
|
-
Returns:
|
|
1240
|
-
Dict with usage data including 'all_flow', 'all_used_flow', 'data' list.
|
|
1241
|
-
|
|
1242
|
-
Example:
|
|
1243
|
-
>>> import time
|
|
1244
|
-
>>> end = int(time.time())
|
|
1245
|
-
>>> start = end - 7*24*3600 # Last 7 days
|
|
1246
|
-
>>> usage = client.get_residential_usage(start, end)
|
|
1247
|
-
>>> print(f"Total used: {usage['all_used_flow'] / (1024**3):.2f} GB")
|
|
1248
|
-
"""
|
|
1249
|
-
if not self.sign or not self.api_key:
|
|
1250
|
-
raise ThordataConfigError(
|
|
1251
|
-
"sign and api_key are required for Public API NEW."
|
|
1252
|
-
)
|
|
1253
|
-
|
|
1254
|
-
headers = build_sign_headers(self.sign, self.api_key)
|
|
1255
|
-
payload = {
|
|
1256
|
-
"start_time": str(start_time),
|
|
1257
|
-
"end_time": str(end_time),
|
|
1258
|
-
}
|
|
1259
|
-
|
|
1260
|
-
logger.info(f"Getting residential usage: {start_time} to {end_time}")
|
|
1261
|
-
|
|
1262
|
-
response = self._api_request_with_retry(
|
|
1263
|
-
"POST",
|
|
1264
|
-
f"{self._gateway_base_url}/usageRecord",
|
|
1265
|
-
headers=headers,
|
|
1266
|
-
data=payload,
|
|
1267
|
-
)
|
|
1268
|
-
response.raise_for_status()
|
|
1269
|
-
|
|
1270
|
-
data = response.json()
|
|
1271
|
-
code = data.get("code")
|
|
1272
|
-
|
|
1273
|
-
if code != 200:
|
|
1274
|
-
msg = extract_error_message(data)
|
|
1275
|
-
raise_for_code(f"Get usage failed: {msg}", code=code, payload=data)
|
|
1276
|
-
|
|
1277
|
-
return data.get("data", {})
|
|
744
|
+
if data.get("code") != 200:
|
|
745
|
+
raise_for_code("Usage stats error", code=data.get("code"), payload=data)
|
|
746
|
+
return UsageStatistics.from_dict(data.get("data", data))
|
|
1278
747
|
|
|
1279
748
|
def list_proxy_users(
|
|
1280
749
|
self, proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL
|
|
1281
750
|
) -> ProxyUserList:
|
|
1282
|
-
"""
|
|
1283
|
-
List all proxy users (sub-accounts).
|
|
1284
|
-
|
|
1285
|
-
Args:
|
|
1286
|
-
proxy_type: Proxy type (1=Residential, 2=Unlimited).
|
|
1287
|
-
|
|
1288
|
-
Returns:
|
|
1289
|
-
ProxyUserList with user details.
|
|
1290
|
-
|
|
1291
|
-
Example:
|
|
1292
|
-
>>> users = client.list_proxy_users(proxy_type=ProxyType.RESIDENTIAL)
|
|
1293
|
-
>>> print(f"Total users: {users.user_count}")
|
|
1294
|
-
>>> for user in users.users:
|
|
1295
|
-
... print(f"{user.username}: {user.usage_gb():.2f} GB used")
|
|
1296
|
-
"""
|
|
1297
|
-
|
|
1298
751
|
self._require_public_credentials()
|
|
1299
|
-
|
|
752
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1300
753
|
params = {
|
|
1301
754
|
"token": self.public_token,
|
|
1302
755
|
"key": self.public_key,
|
|
1303
|
-
"proxy_type": str(
|
|
1304
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1305
|
-
),
|
|
756
|
+
"proxy_type": str(pt),
|
|
1306
757
|
}
|
|
1307
|
-
|
|
1308
|
-
logger.info(f"Listing proxy users: type={params['proxy_type']}")
|
|
1309
|
-
|
|
1310
758
|
response = self._api_request_with_retry(
|
|
1311
|
-
"GET",
|
|
1312
|
-
f"{self._proxy_users_url}/user-list",
|
|
1313
|
-
params=params,
|
|
759
|
+
"GET", f"{self._proxy_users_url}/user-list", params=params
|
|
1314
760
|
)
|
|
1315
761
|
response.raise_for_status()
|
|
1316
|
-
|
|
1317
762
|
data = response.json()
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
if code is not None and code != 200:
|
|
1322
|
-
msg = extract_error_message(data)
|
|
1323
|
-
raise_for_code(
|
|
1324
|
-
f"List proxy users error: {msg}", code=code, payload=data
|
|
1325
|
-
)
|
|
1326
|
-
|
|
1327
|
-
user_data = data.get("data", data)
|
|
1328
|
-
return ProxyUserList.from_dict(user_data)
|
|
1329
|
-
|
|
1330
|
-
raise ThordataNetworkError(
|
|
1331
|
-
f"Unexpected proxy users response: {type(data).__name__}",
|
|
1332
|
-
original_error=None,
|
|
1333
|
-
)
|
|
763
|
+
if data.get("code") != 200:
|
|
764
|
+
raise_for_code("List users error", code=data.get("code"), payload=data)
|
|
765
|
+
return ProxyUserList.from_dict(data.get("data", data))
|
|
1334
766
|
|
|
1335
767
|
def create_proxy_user(
|
|
1336
768
|
self,
|
|
@@ -1340,45 +772,18 @@ class ThordataClient:
|
|
|
1340
772
|
traffic_limit: int = 0,
|
|
1341
773
|
status: bool = True,
|
|
1342
774
|
) -> Dict[str, Any]:
|
|
1343
|
-
"""
|
|
1344
|
-
Create a new proxy user (sub-account).
|
|
1345
|
-
|
|
1346
|
-
Args:
|
|
1347
|
-
username: Username for the new user.
|
|
1348
|
-
password: Password for the new user.
|
|
1349
|
-
proxy_type: Proxy type (1=Residential, 2=Unlimited).
|
|
1350
|
-
traffic_limit: Traffic limit in MB (0 = unlimited, min 100).
|
|
1351
|
-
status: Enable/disable user (True/False).
|
|
1352
|
-
|
|
1353
|
-
Returns:
|
|
1354
|
-
API response data.
|
|
1355
|
-
|
|
1356
|
-
Example:
|
|
1357
|
-
>>> result = client.create_proxy_user(
|
|
1358
|
-
... username="subuser1",
|
|
1359
|
-
... password="securepass",
|
|
1360
|
-
... traffic_limit=5120, # 5GB
|
|
1361
|
-
... status=True
|
|
1362
|
-
... )
|
|
1363
|
-
"""
|
|
1364
775
|
self._require_public_credentials()
|
|
1365
|
-
|
|
776
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1366
777
|
headers = build_public_api_headers(
|
|
1367
778
|
self.public_token or "", self.public_key or ""
|
|
1368
779
|
)
|
|
1369
|
-
|
|
1370
780
|
payload = {
|
|
1371
|
-
"proxy_type": str(
|
|
1372
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1373
|
-
),
|
|
781
|
+
"proxy_type": str(pt),
|
|
1374
782
|
"username": username,
|
|
1375
783
|
"password": password,
|
|
1376
784
|
"traffic_limit": str(traffic_limit),
|
|
1377
785
|
"status": "true" if status else "false",
|
|
1378
786
|
}
|
|
1379
|
-
|
|
1380
|
-
logger.info(f"Creating proxy user: {username}")
|
|
1381
|
-
|
|
1382
787
|
response = self._api_request_with_retry(
|
|
1383
788
|
"POST",
|
|
1384
789
|
f"{self._proxy_users_url}/create-user",
|
|
@@ -1386,14 +791,9 @@ class ThordataClient:
|
|
|
1386
791
|
headers=headers,
|
|
1387
792
|
)
|
|
1388
793
|
response.raise_for_status()
|
|
1389
|
-
|
|
1390
794
|
data = response.json()
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
if code != 200:
|
|
1394
|
-
msg = extract_error_message(data)
|
|
1395
|
-
raise_for_code(f"Create proxy user failed: {msg}", code=code, payload=data)
|
|
1396
|
-
|
|
795
|
+
if data.get("code") != 200:
|
|
796
|
+
raise_for_code("Create user failed", code=data.get("code"), payload=data)
|
|
1397
797
|
return data.get("data", {})
|
|
1398
798
|
|
|
1399
799
|
def add_whitelist_ip(
|
|
@@ -1402,328 +802,86 @@ class ThordataClient:
|
|
|
1402
802
|
proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
|
|
1403
803
|
status: bool = True,
|
|
1404
804
|
) -> Dict[str, Any]:
|
|
1405
|
-
"""
|
|
1406
|
-
Add an IP to the whitelist for IP authentication.
|
|
1407
|
-
|
|
1408
|
-
Args:
|
|
1409
|
-
ip: IP address to whitelist.
|
|
1410
|
-
proxy_type: Proxy type (1=Residential, 2=Unlimited, 9=Mobile).
|
|
1411
|
-
status: Enable/disable the IP (True/False).
|
|
1412
|
-
|
|
1413
|
-
Returns:
|
|
1414
|
-
API response data.
|
|
1415
|
-
|
|
1416
|
-
Example:
|
|
1417
|
-
>>> result = client.add_whitelist_ip(
|
|
1418
|
-
... ip="123.45.67.89",
|
|
1419
|
-
... proxy_type=ProxyType.RESIDENTIAL,
|
|
1420
|
-
... status=True
|
|
1421
|
-
... )
|
|
1422
|
-
"""
|
|
1423
805
|
self._require_public_credentials()
|
|
1424
|
-
|
|
806
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1425
807
|
headers = build_public_api_headers(
|
|
1426
808
|
self.public_token or "", self.public_key or ""
|
|
1427
809
|
)
|
|
1428
|
-
|
|
1429
|
-
# Convert ProxyType to int
|
|
1430
|
-
proxy_type_int = (
|
|
1431
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1432
|
-
)
|
|
1433
|
-
|
|
1434
810
|
payload = {
|
|
1435
|
-
"proxy_type": str(
|
|
811
|
+
"proxy_type": str(pt),
|
|
1436
812
|
"ip": ip,
|
|
1437
813
|
"status": "true" if status else "false",
|
|
1438
814
|
}
|
|
1439
|
-
|
|
1440
|
-
logger.info(f"Adding whitelist IP: {ip}")
|
|
1441
|
-
|
|
1442
815
|
response = self._api_request_with_retry(
|
|
1443
|
-
"POST",
|
|
1444
|
-
f"{self._whitelist_url}/add-ip",
|
|
1445
|
-
data=payload,
|
|
1446
|
-
headers=headers,
|
|
816
|
+
"POST", f"{self._whitelist_url}/add-ip", data=payload, headers=headers
|
|
1447
817
|
)
|
|
1448
818
|
response.raise_for_status()
|
|
1449
|
-
|
|
1450
819
|
data = response.json()
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
raise_for_code(f"Add whitelist IP failed: {msg}", code=code, payload=data)
|
|
1456
|
-
|
|
820
|
+
if data.get("code") != 200:
|
|
821
|
+
raise_for_code(
|
|
822
|
+
"Add whitelist IP failed", code=data.get("code"), payload=data
|
|
823
|
+
)
|
|
1457
824
|
return data.get("data", {})
|
|
1458
825
|
|
|
1459
|
-
def list_proxy_servers(
|
|
1460
|
-
self,
|
|
1461
|
-
proxy_type: int,
|
|
1462
|
-
) -> List[ProxyServer]:
|
|
1463
|
-
"""
|
|
1464
|
-
List ISP or Datacenter proxy servers.
|
|
1465
|
-
|
|
1466
|
-
Args:
|
|
1467
|
-
proxy_type: Proxy type (1=ISP, 2=Datacenter).
|
|
1468
|
-
|
|
1469
|
-
Returns:
|
|
1470
|
-
List of ProxyServer objects.
|
|
1471
|
-
|
|
1472
|
-
Example:
|
|
1473
|
-
>>> servers = client.list_proxy_servers(proxy_type=1) # ISP proxies
|
|
1474
|
-
>>> for server in servers:
|
|
1475
|
-
... print(f"{server.ip}:{server.port} - expires: {server.expiration_time}")
|
|
1476
|
-
"""
|
|
1477
|
-
|
|
826
|
+
def list_proxy_servers(self, proxy_type: int) -> List[ProxyServer]:
|
|
1478
827
|
self._require_public_credentials()
|
|
1479
|
-
|
|
1480
828
|
params = {
|
|
1481
829
|
"token": self.public_token,
|
|
1482
830
|
"key": self.public_key,
|
|
1483
831
|
"proxy_type": str(proxy_type),
|
|
1484
832
|
}
|
|
1485
|
-
|
|
1486
|
-
logger.info(f"Listing proxy servers: type={proxy_type}")
|
|
1487
|
-
|
|
1488
833
|
response = self._api_request_with_retry(
|
|
1489
|
-
"GET",
|
|
1490
|
-
self._proxy_list_url,
|
|
1491
|
-
params=params,
|
|
834
|
+
"GET", self._proxy_list_url, params=params
|
|
1492
835
|
)
|
|
1493
836
|
response.raise_for_status()
|
|
1494
|
-
|
|
1495
837
|
data = response.json()
|
|
838
|
+
if data.get("code") != 200:
|
|
839
|
+
raise_for_code(
|
|
840
|
+
"List proxy servers error", code=data.get("code"), payload=data
|
|
841
|
+
)
|
|
1496
842
|
|
|
843
|
+
server_list = []
|
|
1497
844
|
if isinstance(data, dict):
|
|
1498
|
-
code = data.get("code")
|
|
1499
|
-
if code is not None and code != 200:
|
|
1500
|
-
msg = extract_error_message(data)
|
|
1501
|
-
raise_for_code(
|
|
1502
|
-
f"List proxy servers error: {msg}", code=code, payload=data
|
|
1503
|
-
)
|
|
1504
|
-
|
|
1505
|
-
# Extract list from data field
|
|
1506
845
|
server_list = data.get("data", data.get("list", []))
|
|
1507
846
|
elif isinstance(data, list):
|
|
1508
847
|
server_list = data
|
|
1509
|
-
else:
|
|
1510
|
-
raise ThordataNetworkError(
|
|
1511
|
-
f"Unexpected proxy list response: {type(data).__name__}",
|
|
1512
|
-
original_error=None,
|
|
1513
|
-
)
|
|
1514
848
|
|
|
1515
849
|
return [ProxyServer.from_dict(s) for s in server_list]
|
|
1516
850
|
|
|
1517
|
-
def get_isp_regions(self) -> List[Dict[str, Any]]:
|
|
1518
|
-
"""
|
|
1519
|
-
Get available ISP proxy regions (Public API NEW).
|
|
1520
|
-
|
|
1521
|
-
Returns:
|
|
1522
|
-
List of regions with id, continent, country, city, num, pricing.
|
|
1523
|
-
|
|
1524
|
-
Example:
|
|
1525
|
-
>>> regions = client.get_isp_regions()
|
|
1526
|
-
>>> for region in regions:
|
|
1527
|
-
... print(f"{region['country']}/{region['city']}: {region['num']} IPs")
|
|
1528
|
-
"""
|
|
1529
|
-
if not self.sign or not self.api_key:
|
|
1530
|
-
raise ThordataConfigError(
|
|
1531
|
-
"sign and api_key are required for Public API NEW."
|
|
1532
|
-
)
|
|
1533
|
-
|
|
1534
|
-
headers = build_sign_headers(self.sign, self.api_key)
|
|
1535
|
-
|
|
1536
|
-
logger.info("Getting ISP regions (API NEW)")
|
|
1537
|
-
|
|
1538
|
-
response = self._api_request_with_retry(
|
|
1539
|
-
"POST",
|
|
1540
|
-
f"{self._gateway_base_url}/getRegionIsp",
|
|
1541
|
-
headers=headers,
|
|
1542
|
-
data={},
|
|
1543
|
-
)
|
|
1544
|
-
response.raise_for_status()
|
|
1545
|
-
|
|
1546
|
-
data = response.json()
|
|
1547
|
-
code = data.get("code")
|
|
1548
|
-
|
|
1549
|
-
if code != 200:
|
|
1550
|
-
msg = extract_error_message(data)
|
|
1551
|
-
raise_for_code(f"Get ISP regions failed: {msg}", code=code, payload=data)
|
|
1552
|
-
|
|
1553
|
-
return data.get("data", [])
|
|
1554
|
-
|
|
1555
|
-
def list_isp_proxies(self) -> List[Dict[str, Any]]:
|
|
1556
|
-
"""
|
|
1557
|
-
List ISP proxies (Public API NEW).
|
|
1558
|
-
|
|
1559
|
-
Returns:
|
|
1560
|
-
List of ISP proxies with ip, port, user, pwd, startTime, expireTime.
|
|
1561
|
-
|
|
1562
|
-
Example:
|
|
1563
|
-
>>> proxies = client.list_isp_proxies()
|
|
1564
|
-
>>> for proxy in proxies:
|
|
1565
|
-
... print(f"{proxy['ip']}:{proxy['port']} - expires: {proxy['expireTime']}")
|
|
1566
|
-
"""
|
|
1567
|
-
if not self.sign or not self.api_key:
|
|
1568
|
-
raise ThordataConfigError(
|
|
1569
|
-
"sign and api_key are required for Public API NEW."
|
|
1570
|
-
)
|
|
1571
|
-
|
|
1572
|
-
headers = build_sign_headers(self.sign, self.api_key)
|
|
1573
|
-
|
|
1574
|
-
logger.info("Listing ISP proxies (API NEW)")
|
|
1575
|
-
|
|
1576
|
-
response = self._api_request_with_retry(
|
|
1577
|
-
"POST",
|
|
1578
|
-
f"{self._gateway_base_url}/queryListIsp",
|
|
1579
|
-
headers=headers,
|
|
1580
|
-
data={},
|
|
1581
|
-
)
|
|
1582
|
-
response.raise_for_status()
|
|
1583
|
-
|
|
1584
|
-
data = response.json()
|
|
1585
|
-
code = data.get("code")
|
|
1586
|
-
|
|
1587
|
-
if code != 200:
|
|
1588
|
-
msg = extract_error_message(data)
|
|
1589
|
-
raise_for_code(f"List ISP proxies failed: {msg}", code=code, payload=data)
|
|
1590
|
-
|
|
1591
|
-
return data.get("data", [])
|
|
1592
|
-
|
|
1593
|
-
def get_wallet_balance(self) -> Dict[str, Any]:
|
|
1594
|
-
"""
|
|
1595
|
-
Get wallet balance for ISP proxies (Public API NEW).
|
|
1596
|
-
|
|
1597
|
-
Returns:
|
|
1598
|
-
Dict with 'walletBalance'.
|
|
1599
|
-
|
|
1600
|
-
Example:
|
|
1601
|
-
>>> result = client.get_wallet_balance()
|
|
1602
|
-
>>> print(f"Wallet: ${result['walletBalance']}")
|
|
1603
|
-
"""
|
|
1604
|
-
if not self.sign or not self.api_key:
|
|
1605
|
-
raise ThordataConfigError(
|
|
1606
|
-
"sign and api_key are required for Public API NEW."
|
|
1607
|
-
)
|
|
1608
|
-
|
|
1609
|
-
headers = build_sign_headers(self.sign, self.api_key)
|
|
1610
|
-
|
|
1611
|
-
logger.info("Getting wallet balance (API NEW)")
|
|
1612
|
-
|
|
1613
|
-
response = self._api_request_with_retry(
|
|
1614
|
-
"POST",
|
|
1615
|
-
f"{self._gateway_base_url}/getBalance",
|
|
1616
|
-
headers=headers,
|
|
1617
|
-
data={},
|
|
1618
|
-
)
|
|
1619
|
-
response.raise_for_status()
|
|
1620
|
-
|
|
1621
|
-
data = response.json()
|
|
1622
|
-
code = data.get("code")
|
|
1623
|
-
|
|
1624
|
-
if code != 200:
|
|
1625
|
-
msg = extract_error_message(data)
|
|
1626
|
-
raise_for_code(f"Get wallet balance failed: {msg}", code=code, payload=data)
|
|
1627
|
-
|
|
1628
|
-
return data.get("data", {})
|
|
1629
|
-
|
|
1630
851
|
def get_proxy_expiration(
|
|
1631
|
-
self,
|
|
1632
|
-
ips: Union[str, List[str]],
|
|
1633
|
-
proxy_type: int,
|
|
852
|
+
self, ips: Union[str, List[str]], proxy_type: int
|
|
1634
853
|
) -> Dict[str, Any]:
|
|
1635
|
-
"""
|
|
1636
|
-
Get expiration time for specific proxy IPs.
|
|
1637
|
-
|
|
1638
|
-
Args:
|
|
1639
|
-
ips: Single IP or list of IPs to check.
|
|
1640
|
-
proxy_type: Proxy type (1=ISP, 2=Datacenter).
|
|
1641
|
-
|
|
1642
|
-
Returns:
|
|
1643
|
-
Dict with expiration information.
|
|
1644
|
-
|
|
1645
|
-
Example:
|
|
1646
|
-
>>> result = client.get_proxy_expiration("123.45.67.89", proxy_type=1)
|
|
1647
|
-
>>> print(result)
|
|
1648
|
-
"""
|
|
1649
854
|
self._require_public_credentials()
|
|
1650
|
-
|
|
1651
|
-
# Convert list to comma-separated string
|
|
1652
855
|
if isinstance(ips, list):
|
|
1653
856
|
ips = ",".join(ips)
|
|
1654
|
-
|
|
1655
857
|
params = {
|
|
1656
858
|
"token": self.public_token,
|
|
1657
859
|
"key": self.public_key,
|
|
1658
860
|
"proxy_type": str(proxy_type),
|
|
1659
861
|
"ips": ips,
|
|
1660
862
|
}
|
|
1661
|
-
|
|
1662
|
-
logger.info(f"Getting proxy expiration: {ips}")
|
|
1663
|
-
|
|
1664
863
|
response = self._api_request_with_retry(
|
|
1665
|
-
"GET",
|
|
1666
|
-
self._proxy_expiration_url,
|
|
1667
|
-
params=params,
|
|
864
|
+
"GET", self._proxy_expiration_url, params=params
|
|
1668
865
|
)
|
|
1669
866
|
response.raise_for_status()
|
|
1670
|
-
|
|
1671
867
|
data = response.json()
|
|
868
|
+
if data.get("code") != 200:
|
|
869
|
+
raise_for_code("Get expiration error", code=data.get("code"), payload=data)
|
|
870
|
+
return data.get("data", data)
|
|
1672
871
|
|
|
1673
|
-
if isinstance(data, dict):
|
|
1674
|
-
code = data.get("code")
|
|
1675
|
-
if code is not None and code != 200:
|
|
1676
|
-
msg = extract_error_message(data)
|
|
1677
|
-
raise_for_code(f"Get expiration error: {msg}", code=code, payload=data)
|
|
1678
|
-
|
|
1679
|
-
return data.get("data", data)
|
|
1680
|
-
|
|
1681
|
-
return data
|
|
1682
|
-
|
|
1683
|
-
# =========================================================================
|
|
1684
|
-
# Location API Methods (Country/State/City/ASN functions)
|
|
1685
|
-
# =========================================================================
|
|
1686
872
|
def list_countries(
|
|
1687
873
|
self, proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL
|
|
1688
874
|
) -> List[Dict[str, Any]]:
|
|
1689
|
-
|
|
1690
|
-
|
|
1691
|
-
|
|
1692
|
-
Args:
|
|
1693
|
-
proxy_type: 1 for residential, 2 for unlimited.
|
|
1694
|
-
|
|
1695
|
-
Returns:
|
|
1696
|
-
List of country records with 'country_code' and 'country_name'.
|
|
1697
|
-
"""
|
|
1698
|
-
return self._get_locations(
|
|
1699
|
-
"countries",
|
|
1700
|
-
proxy_type=(
|
|
1701
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1702
|
-
),
|
|
1703
|
-
)
|
|
875
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
876
|
+
return self._get_locations("countries", proxy_type=pt)
|
|
1704
877
|
|
|
1705
878
|
def list_states(
|
|
1706
879
|
self,
|
|
1707
880
|
country_code: str,
|
|
1708
881
|
proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
|
|
1709
882
|
) -> List[Dict[str, Any]]:
|
|
1710
|
-
|
|
1711
|
-
|
|
1712
|
-
|
|
1713
|
-
Args:
|
|
1714
|
-
country_code: Country code (e.g., 'US').
|
|
1715
|
-
proxy_type: Proxy type.
|
|
1716
|
-
|
|
1717
|
-
Returns:
|
|
1718
|
-
List of state records.
|
|
1719
|
-
"""
|
|
1720
|
-
return self._get_locations(
|
|
1721
|
-
"states",
|
|
1722
|
-
proxy_type=(
|
|
1723
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1724
|
-
),
|
|
1725
|
-
country_code=country_code,
|
|
1726
|
-
)
|
|
883
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
884
|
+
return self._get_locations("states", proxy_type=pt, country_code=country_code)
|
|
1727
885
|
|
|
1728
886
|
def list_cities(
|
|
1729
887
|
self,
|
|
@@ -1731,26 +889,10 @@ class ThordataClient:
|
|
|
1731
889
|
state_code: Optional[str] = None,
|
|
1732
890
|
proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
|
|
1733
891
|
) -> List[Dict[str, Any]]:
|
|
1734
|
-
|
|
1735
|
-
|
|
1736
|
-
|
|
1737
|
-
Args:
|
|
1738
|
-
country_code: Country code.
|
|
1739
|
-
state_code: Optional state code.
|
|
1740
|
-
proxy_type: Proxy type.
|
|
1741
|
-
|
|
1742
|
-
Returns:
|
|
1743
|
-
List of city records.
|
|
1744
|
-
"""
|
|
1745
|
-
kwargs = {
|
|
1746
|
-
"proxy_type": (
|
|
1747
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1748
|
-
),
|
|
1749
|
-
"country_code": country_code,
|
|
1750
|
-
}
|
|
892
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
893
|
+
kwargs = {"proxy_type": pt, "country_code": country_code}
|
|
1751
894
|
if state_code:
|
|
1752
895
|
kwargs["state_code"] = state_code
|
|
1753
|
-
|
|
1754
896
|
return self._get_locations("cities", **kwargs)
|
|
1755
897
|
|
|
1756
898
|
def list_asn(
|
|
@@ -1758,98 +900,78 @@ class ThordataClient:
|
|
|
1758
900
|
country_code: str,
|
|
1759
901
|
proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
|
|
1760
902
|
) -> List[Dict[str, Any]]:
|
|
1761
|
-
|
|
1762
|
-
|
|
1763
|
-
|
|
1764
|
-
Args:
|
|
1765
|
-
country_code: Country code.
|
|
1766
|
-
proxy_type: Proxy type.
|
|
1767
|
-
|
|
1768
|
-
Returns:
|
|
1769
|
-
List of ASN records.
|
|
1770
|
-
"""
|
|
1771
|
-
return self._get_locations(
|
|
1772
|
-
"asn",
|
|
1773
|
-
proxy_type=(
|
|
1774
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1775
|
-
),
|
|
1776
|
-
country_code=country_code,
|
|
1777
|
-
)
|
|
903
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
904
|
+
return self._get_locations("asn", proxy_type=pt, country_code=country_code)
|
|
1778
905
|
|
|
1779
906
|
def _get_locations(self, endpoint: str, **kwargs: Any) -> List[Dict[str, Any]]:
|
|
1780
|
-
"""Internal method to call locations API."""
|
|
1781
907
|
self._require_public_credentials()
|
|
908
|
+
params = {"token": self.public_token, "key": self.public_key}
|
|
909
|
+
for k, v in kwargs.items():
|
|
910
|
+
params[k] = str(v)
|
|
1782
911
|
|
|
1783
|
-
params = {
|
|
1784
|
-
"token": self.public_token,
|
|
1785
|
-
"key": self.public_key,
|
|
1786
|
-
}
|
|
1787
|
-
|
|
1788
|
-
for key, value in kwargs.items():
|
|
1789
|
-
params[key] = str(value)
|
|
1790
|
-
|
|
1791
|
-
url = f"{self._locations_base_url}/{endpoint}"
|
|
1792
|
-
|
|
1793
|
-
logger.debug(f"Locations API request: {url}")
|
|
1794
|
-
|
|
1795
|
-
# Use requests.get directly (no proxy needed for this API)
|
|
1796
912
|
response = self._api_request_with_retry(
|
|
1797
|
-
"GET",
|
|
1798
|
-
url,
|
|
1799
|
-
params=params,
|
|
913
|
+
"GET", f"{self._locations_base_url}/{endpoint}", params=params
|
|
1800
914
|
)
|
|
1801
915
|
response.raise_for_status()
|
|
1802
|
-
|
|
1803
916
|
data = response.json()
|
|
1804
|
-
|
|
1805
917
|
if isinstance(data, dict):
|
|
1806
|
-
|
|
1807
|
-
|
|
1808
|
-
msg = data.get("msg", "")
|
|
1809
|
-
raise RuntimeError(
|
|
1810
|
-
f"Locations API error ({endpoint}): code={code}, msg={msg}"
|
|
1811
|
-
)
|
|
918
|
+
if data.get("code") != 200:
|
|
919
|
+
raise RuntimeError(f"Locations error: {data.get('msg')}")
|
|
1812
920
|
return data.get("data") or []
|
|
921
|
+
return data if isinstance(data, list) else []
|
|
1813
922
|
|
|
1814
|
-
if isinstance(data, list):
|
|
1815
|
-
return data
|
|
1816
|
-
|
|
1817
|
-
return []
|
|
1818
|
-
|
|
1819
|
-
# =========================================================================
|
|
1820
|
-
# Helper Methods (Internal utility functions)
|
|
1821
|
-
# =========================================================================
|
|
1822
923
|
def _require_public_credentials(self) -> None:
|
|
1823
|
-
"""Ensure public API credentials are available."""
|
|
1824
924
|
if not self.public_token or not self.public_key:
|
|
1825
925
|
raise ThordataConfigError(
|
|
1826
|
-
"public_token and public_key are required for this operation.
|
|
1827
|
-
"Please provide them when initializing ThordataClient."
|
|
926
|
+
"public_token and public_key are required for this operation."
|
|
1828
927
|
)
|
|
1829
928
|
|
|
1830
|
-
def
|
|
1831
|
-
self,
|
|
1832
|
-
) ->
|
|
1833
|
-
|
|
1834
|
-
|
|
1835
|
-
|
|
1836
|
-
|
|
1837
|
-
|
|
1838
|
-
|
|
1839
|
-
|
|
1840
|
-
|
|
1841
|
-
|
|
1842
|
-
|
|
1843
|
-
|
|
1844
|
-
|
|
1845
|
-
|
|
1846
|
-
|
|
1847
|
-
|
|
929
|
+
def _get_proxy_endpoint_overrides(
|
|
930
|
+
self, product: ProxyProduct
|
|
931
|
+
) -> tuple[Optional[str], Optional[int], str]:
|
|
932
|
+
prefix = product.value.upper()
|
|
933
|
+
host = os.getenv(f"THORDATA_{prefix}_PROXY_HOST") or os.getenv(
|
|
934
|
+
"THORDATA_PROXY_HOST"
|
|
935
|
+
)
|
|
936
|
+
port_raw = os.getenv(f"THORDATA_{prefix}_PROXY_PORT") or os.getenv(
|
|
937
|
+
"THORDATA_PROXY_PORT"
|
|
938
|
+
)
|
|
939
|
+
protocol = (
|
|
940
|
+
os.getenv(f"THORDATA_{prefix}_PROXY_PROTOCOL")
|
|
941
|
+
or os.getenv("THORDATA_PROXY_PROTOCOL")
|
|
942
|
+
or "http"
|
|
943
|
+
)
|
|
944
|
+
port = int(port_raw) if port_raw and port_raw.isdigit() else None
|
|
945
|
+
return host or None, port, protocol
|
|
946
|
+
|
|
947
|
+
def _get_default_proxy_config_from_env(self) -> Optional[ProxyConfig]:
|
|
948
|
+
for prod in [
|
|
949
|
+
ProxyProduct.RESIDENTIAL,
|
|
950
|
+
ProxyProduct.DATACENTER,
|
|
951
|
+
ProxyProduct.MOBILE,
|
|
952
|
+
]:
|
|
953
|
+
prefix = prod.value.upper()
|
|
954
|
+
u = os.getenv(f"THORDATA_{prefix}_USERNAME")
|
|
955
|
+
p = os.getenv(f"THORDATA_{prefix}_PASSWORD")
|
|
956
|
+
if u and p:
|
|
957
|
+
h, port, proto = self._get_proxy_endpoint_overrides(prod)
|
|
958
|
+
return ProxyConfig(
|
|
959
|
+
username=u,
|
|
960
|
+
password=p,
|
|
961
|
+
product=prod,
|
|
962
|
+
host=h,
|
|
963
|
+
port=port,
|
|
964
|
+
protocol=proto,
|
|
965
|
+
)
|
|
966
|
+
return None
|
|
1848
967
|
|
|
1849
968
|
def close(self) -> None:
|
|
1850
|
-
"""Close the underlying session."""
|
|
1851
969
|
self._proxy_session.close()
|
|
1852
970
|
self._api_session.close()
|
|
971
|
+
# Clean up connection pools
|
|
972
|
+
for pm in self._proxy_managers.values():
|
|
973
|
+
pm.clear()
|
|
974
|
+
self._proxy_managers.clear()
|
|
1853
975
|
|
|
1854
976
|
def __enter__(self) -> ThordataClient:
|
|
1855
977
|
return self
|