thordata-sdk 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thordata/__init__.py +1 -1
- thordata/_example_utils.py +3 -2
- thordata/_utils.py +20 -17
- thordata/async_client.py +80 -79
- thordata/client.py +283 -1318
- thordata/demo.py +1 -3
- thordata/exceptions.py +12 -12
- thordata/models.py +67 -70
- thordata/retry.py +13 -13
- thordata_sdk-1.1.0.dist-info/METADATA +271 -0
- thordata_sdk-1.1.0.dist-info/RECORD +15 -0
- thordata_sdk-1.0.0.dist-info/METADATA +0 -208
- thordata_sdk-1.0.0.dist-info/RECORD +0 -15
- {thordata_sdk-1.0.0.dist-info → thordata_sdk-1.1.0.dist-info}/WHEEL +0 -0
- {thordata_sdk-1.0.0.dist-info → thordata_sdk-1.1.0.dist-info}/licenses/LICENSE +0 -0
- {thordata_sdk-1.0.0.dist-info → thordata_sdk-1.1.0.dist-info}/top_level.txt +0 -0
thordata/client.py
CHANGED
|
@@ -27,7 +27,7 @@ import logging
|
|
|
27
27
|
import os
|
|
28
28
|
import ssl
|
|
29
29
|
from datetime import date
|
|
30
|
-
from typing import Any
|
|
30
|
+
from typing import Any
|
|
31
31
|
from urllib.parse import urlencode
|
|
32
32
|
|
|
33
33
|
import requests
|
|
@@ -61,7 +61,6 @@ from .models import (
|
|
|
61
61
|
UniversalScrapeRequest,
|
|
62
62
|
UsageStatistics,
|
|
63
63
|
VideoTaskConfig,
|
|
64
|
-
WhitelistProxyConfig,
|
|
65
64
|
)
|
|
66
65
|
from .retry import RetryConfig, with_retry
|
|
67
66
|
|
|
@@ -69,32 +68,6 @@ logger = logging.getLogger(__name__)
|
|
|
69
68
|
|
|
70
69
|
|
|
71
70
|
class ThordataClient:
|
|
72
|
-
"""
|
|
73
|
-
The official synchronous Python client for Thordata.
|
|
74
|
-
|
|
75
|
-
This client handles authentication and communication with:
|
|
76
|
-
- Proxy Network (Residential/Datacenter/Mobile/ISP via HTTP/HTTPS)
|
|
77
|
-
- SERP API (Real-time Search Engine Results)
|
|
78
|
-
- Universal Scraping API (Web Unlocker - Single Page Rendering)
|
|
79
|
-
- Web Scraper API (Async Task Management)
|
|
80
|
-
|
|
81
|
-
Args:
|
|
82
|
-
scraper_token: The API token from your Dashboard.
|
|
83
|
-
public_token: The public API token (for task status, locations).
|
|
84
|
-
public_key: The public API key.
|
|
85
|
-
proxy_host: Custom proxy gateway host (optional).
|
|
86
|
-
proxy_port: Custom proxy gateway port (optional).
|
|
87
|
-
timeout: Default request timeout in seconds (default: 30).
|
|
88
|
-
retry_config: Configuration for automatic retries (optional).
|
|
89
|
-
|
|
90
|
-
Example:
|
|
91
|
-
>>> client = ThordataClient(
|
|
92
|
-
... scraper_token="your_scraper_token",
|
|
93
|
-
... public_token="your_public_token",
|
|
94
|
-
... public_key="your_public_key"
|
|
95
|
-
... )
|
|
96
|
-
"""
|
|
97
|
-
|
|
98
71
|
# API Endpoints
|
|
99
72
|
BASE_URL = "https://scraperapi.thordata.com"
|
|
100
73
|
UNIVERSAL_URL = "https://universalapi.thordata.com"
|
|
@@ -104,18 +77,18 @@ class ThordataClient:
|
|
|
104
77
|
def __init__(
|
|
105
78
|
self,
|
|
106
79
|
scraper_token: str,
|
|
107
|
-
public_token:
|
|
108
|
-
public_key:
|
|
80
|
+
public_token: str | None = None,
|
|
81
|
+
public_key: str | None = None,
|
|
109
82
|
proxy_host: str = "pr.thordata.net",
|
|
110
83
|
proxy_port: int = 9999,
|
|
111
84
|
timeout: int = 30,
|
|
112
85
|
api_timeout: int = 60,
|
|
113
|
-
retry_config:
|
|
86
|
+
retry_config: RetryConfig | None = None,
|
|
114
87
|
auth_mode: str = "bearer",
|
|
115
|
-
scraperapi_base_url:
|
|
116
|
-
universalapi_base_url:
|
|
117
|
-
web_scraper_api_base_url:
|
|
118
|
-
locations_base_url:
|
|
88
|
+
scraperapi_base_url: str | None = None,
|
|
89
|
+
universalapi_base_url: str | None = None,
|
|
90
|
+
web_scraper_api_base_url: str | None = None,
|
|
91
|
+
locations_base_url: str | None = None,
|
|
119
92
|
) -> None:
|
|
120
93
|
"""Initialize the Thordata Client."""
|
|
121
94
|
if not scraper_token:
|
|
@@ -144,22 +117,21 @@ class ThordataClient:
|
|
|
144
117
|
f"Invalid auth_mode: {auth_mode}. Must be 'bearer' or 'header_token'."
|
|
145
118
|
)
|
|
146
119
|
|
|
147
|
-
#
|
|
148
|
-
# - _proxy_session: used for proxy network traffic to target sites
|
|
149
|
-
# - _api_session: used for Thordata APIs (SERP/Universal/Tasks/Locations)
|
|
150
|
-
#
|
|
151
|
-
# We intentionally do NOT set session-level proxies for _api_session,
|
|
152
|
-
# so developers can rely on system proxy settings (e.g., Clash) via env vars.
|
|
120
|
+
# HTTP Sessions
|
|
153
121
|
self._proxy_session = requests.Session()
|
|
154
122
|
self._proxy_session.trust_env = False
|
|
155
123
|
|
|
124
|
+
# Cache for ProxyManagers (Connection Pooling Fix)
|
|
125
|
+
# Key: proxy_url (str), Value: urllib3.ProxyManager
|
|
126
|
+
self._proxy_managers: dict[str, urllib3.ProxyManager] = {}
|
|
127
|
+
|
|
156
128
|
self._api_session = requests.Session()
|
|
157
129
|
self._api_session.trust_env = True
|
|
158
130
|
self._api_session.headers.update(
|
|
159
131
|
{"User-Agent": build_user_agent(_sdk_version, "requests")}
|
|
160
132
|
)
|
|
161
133
|
|
|
162
|
-
# Base URLs
|
|
134
|
+
# Base URLs
|
|
163
135
|
scraperapi_base = (
|
|
164
136
|
scraperapi_base_url
|
|
165
137
|
or os.getenv("THORDATA_SCRAPERAPI_BASE_URL")
|
|
@@ -184,15 +156,13 @@ class ThordataClient:
|
|
|
184
156
|
or self.LOCATIONS_URL
|
|
185
157
|
).rstrip("/")
|
|
186
158
|
|
|
187
|
-
# These URLs exist in your codebase; keep them for now (even if your org later migrates fully to openapi)
|
|
188
159
|
gateway_base = os.getenv(
|
|
189
160
|
"THORDATA_GATEWAY_BASE_URL", "https://api.thordata.com/api/gateway"
|
|
190
161
|
)
|
|
191
|
-
|
|
162
|
+
self._gateway_base_url = gateway_base
|
|
163
|
+
self._child_base_url = os.getenv(
|
|
192
164
|
"THORDATA_CHILD_BASE_URL", "https://api.thordata.com/api/child"
|
|
193
165
|
)
|
|
194
|
-
self._gateway_base_url = gateway_base
|
|
195
|
-
self._child_base_url = child_base
|
|
196
166
|
|
|
197
167
|
self._serp_url = f"{scraperapi_base}/request"
|
|
198
168
|
self._builder_url = f"{scraperapi_base}/builder"
|
|
@@ -205,7 +175,6 @@ class ThordataClient:
|
|
|
205
175
|
|
|
206
176
|
self._locations_base_url = locations_base
|
|
207
177
|
|
|
208
|
-
# These 2 lines keep your existing behavior (derive account endpoints from locations_base)
|
|
209
178
|
self._usage_stats_url = (
|
|
210
179
|
f"{locations_base.replace('/locations', '')}/account/usage-statistics"
|
|
211
180
|
)
|
|
@@ -225,100 +194,38 @@ class ThordataClient:
|
|
|
225
194
|
self._proxy_expiration_url = f"{proxy_api_base}/proxy/expiration-time"
|
|
226
195
|
|
|
227
196
|
# =========================================================================
|
|
228
|
-
# Proxy Network Methods
|
|
197
|
+
# Proxy Network Methods
|
|
229
198
|
# =========================================================================
|
|
230
199
|
def get(
|
|
231
200
|
self,
|
|
232
201
|
url: str,
|
|
233
202
|
*,
|
|
234
|
-
proxy_config:
|
|
235
|
-
timeout:
|
|
203
|
+
proxy_config: ProxyConfig | None = None,
|
|
204
|
+
timeout: int | None = None,
|
|
236
205
|
**kwargs: Any,
|
|
237
206
|
) -> requests.Response:
|
|
238
|
-
"""
|
|
239
|
-
Send a GET request through the Thordata Proxy Network.
|
|
240
|
-
|
|
241
|
-
Args:
|
|
242
|
-
url: The target URL.
|
|
243
|
-
proxy_config: Custom proxy configuration for geo-targeting/sessions.
|
|
244
|
-
timeout: Request timeout in seconds.
|
|
245
|
-
**kwargs: Additional arguments to pass to requests.get().
|
|
246
|
-
|
|
247
|
-
Returns:
|
|
248
|
-
The response object.
|
|
249
|
-
|
|
250
|
-
Example:
|
|
251
|
-
>>> # Basic request
|
|
252
|
-
>>> response = client.get("https://httpbin.org/ip")
|
|
253
|
-
>>>
|
|
254
|
-
>>> # With geo-targeting
|
|
255
|
-
>>> from thordata.models import ProxyConfig
|
|
256
|
-
>>> config = ProxyConfig(
|
|
257
|
-
... username="myuser",
|
|
258
|
-
... password="mypass",
|
|
259
|
-
... country="us",
|
|
260
|
-
... city="seattle"
|
|
261
|
-
... )
|
|
262
|
-
>>> response = client.get("https://httpbin.org/ip", proxy_config=config)
|
|
263
|
-
"""
|
|
264
207
|
logger.debug(f"Proxy GET request: {url}")
|
|
265
|
-
|
|
266
|
-
timeout = timeout or self._default_timeout
|
|
267
|
-
|
|
268
|
-
if proxy_config is None:
|
|
269
|
-
proxy_config = self._get_default_proxy_config_from_env()
|
|
270
|
-
|
|
271
|
-
if proxy_config is None:
|
|
272
|
-
raise ThordataConfigError(
|
|
273
|
-
"Proxy credentials are missing. "
|
|
274
|
-
"Pass proxy_config=ProxyConfig(username=..., password=..., product=...) "
|
|
275
|
-
"or set THORDATA_RESIDENTIAL_USERNAME/THORDATA_RESIDENTIAL_PASSWORD (or DATACENTER/MOBILE)."
|
|
276
|
-
)
|
|
277
|
-
|
|
278
|
-
kwargs["proxies"] = proxy_config.to_proxies_dict()
|
|
279
|
-
|
|
280
|
-
@with_retry(self._retry_config)
|
|
281
|
-
def _do() -> requests.Response:
|
|
282
|
-
return self._proxy_request_with_proxy_manager(
|
|
283
|
-
"GET",
|
|
284
|
-
url,
|
|
285
|
-
proxy_config=proxy_config,
|
|
286
|
-
timeout=timeout,
|
|
287
|
-
headers=kwargs.pop("headers", None),
|
|
288
|
-
params=kwargs.pop("params", None),
|
|
289
|
-
)
|
|
290
|
-
|
|
291
|
-
try:
|
|
292
|
-
return _do()
|
|
293
|
-
except requests.Timeout as e:
|
|
294
|
-
raise ThordataTimeoutError(
|
|
295
|
-
f"Request timed out: {e}", original_error=e
|
|
296
|
-
) from e
|
|
297
|
-
except Exception as e:
|
|
298
|
-
raise ThordataNetworkError(f"Request failed: {e}", original_error=e) from e
|
|
208
|
+
return self._proxy_verb("GET", url, proxy_config, timeout, **kwargs)
|
|
299
209
|
|
|
300
210
|
def post(
|
|
301
211
|
self,
|
|
302
212
|
url: str,
|
|
303
213
|
*,
|
|
304
|
-
proxy_config:
|
|
305
|
-
timeout:
|
|
214
|
+
proxy_config: ProxyConfig | None = None,
|
|
215
|
+
timeout: int | None = None,
|
|
306
216
|
**kwargs: Any,
|
|
307
217
|
) -> requests.Response:
|
|
308
|
-
"""
|
|
309
|
-
Send a POST request through the Thordata Proxy Network.
|
|
310
|
-
|
|
311
|
-
Args:
|
|
312
|
-
url: The target URL.
|
|
313
|
-
proxy_config: Custom proxy configuration.
|
|
314
|
-
timeout: Request timeout in seconds.
|
|
315
|
-
**kwargs: Additional arguments to pass to requests.post().
|
|
316
|
-
|
|
317
|
-
Returns:
|
|
318
|
-
The response object.
|
|
319
|
-
"""
|
|
320
218
|
logger.debug(f"Proxy POST request: {url}")
|
|
219
|
+
return self._proxy_verb("POST", url, proxy_config, timeout, **kwargs)
|
|
321
220
|
|
|
221
|
+
def _proxy_verb(
|
|
222
|
+
self,
|
|
223
|
+
method: str,
|
|
224
|
+
url: str,
|
|
225
|
+
proxy_config: ProxyConfig | None,
|
|
226
|
+
timeout: int | None,
|
|
227
|
+
**kwargs: Any,
|
|
228
|
+
) -> requests.Response:
|
|
322
229
|
timeout = timeout or self._default_timeout
|
|
323
230
|
|
|
324
231
|
if proxy_config is None:
|
|
@@ -327,19 +234,21 @@ class ThordataClient:
|
|
|
327
234
|
if proxy_config is None:
|
|
328
235
|
raise ThordataConfigError(
|
|
329
236
|
"Proxy credentials are missing. "
|
|
330
|
-
"Pass proxy_config
|
|
331
|
-
"or set THORDATA_RESIDENTIAL_USERNAME/THORDATA_RESIDENTIAL_PASSWORD (or DATACENTER/MOBILE)."
|
|
237
|
+
"Pass proxy_config or set THORDATA_RESIDENTIAL_USERNAME/PASSWORD env vars."
|
|
332
238
|
)
|
|
333
239
|
|
|
334
|
-
|
|
240
|
+
# For requests/urllib3, we don't need 'proxies' dict in kwargs
|
|
241
|
+
# because we use ProxyManager directly.
|
|
242
|
+
# But we remove it if user accidentally passed it to avoid confusion.
|
|
243
|
+
kwargs.pop("proxies", None)
|
|
335
244
|
|
|
336
245
|
@with_retry(self._retry_config)
|
|
337
246
|
def _do() -> requests.Response:
|
|
338
247
|
return self._proxy_request_with_proxy_manager(
|
|
339
|
-
|
|
248
|
+
method,
|
|
340
249
|
url,
|
|
341
|
-
proxy_config=proxy_config,
|
|
342
|
-
timeout=timeout,
|
|
250
|
+
proxy_config=proxy_config, # type: ignore
|
|
251
|
+
timeout=timeout, # type: ignore
|
|
343
252
|
headers=kwargs.pop("headers", None),
|
|
344
253
|
params=kwargs.pop("params", None),
|
|
345
254
|
data=kwargs.pop("data", None),
|
|
@@ -356,38 +265,16 @@ class ThordataClient:
|
|
|
356
265
|
|
|
357
266
|
def build_proxy_url(
|
|
358
267
|
self,
|
|
359
|
-
username: str,
|
|
360
|
-
password: str,
|
|
268
|
+
username: str,
|
|
269
|
+
password: str,
|
|
361
270
|
*,
|
|
362
|
-
country:
|
|
363
|
-
state:
|
|
364
|
-
city:
|
|
365
|
-
session_id:
|
|
366
|
-
session_duration:
|
|
367
|
-
product:
|
|
271
|
+
country: str | None = None,
|
|
272
|
+
state: str | None = None,
|
|
273
|
+
city: str | None = None,
|
|
274
|
+
session_id: str | None = None,
|
|
275
|
+
session_duration: int | None = None,
|
|
276
|
+
product: ProxyProduct | str = ProxyProduct.RESIDENTIAL,
|
|
368
277
|
) -> str:
|
|
369
|
-
"""
|
|
370
|
-
Build a proxy URL with custom targeting options.
|
|
371
|
-
|
|
372
|
-
This is a convenience method for creating proxy URLs without
|
|
373
|
-
manually constructing a ProxyConfig.
|
|
374
|
-
|
|
375
|
-
Args:
|
|
376
|
-
country: Target country code (e.g., 'us', 'gb').
|
|
377
|
-
state: Target state (e.g., 'california').
|
|
378
|
-
city: Target city (e.g., 'seattle').
|
|
379
|
-
session_id: Session ID for sticky sessions.
|
|
380
|
-
session_duration: Session duration in minutes (1-90).
|
|
381
|
-
product: Proxy product type.
|
|
382
|
-
|
|
383
|
-
Returns:
|
|
384
|
-
The proxy URL string.
|
|
385
|
-
|
|
386
|
-
Example:
|
|
387
|
-
>>> url = client.build_proxy_url(country="us", city="seattle")
|
|
388
|
-
>>> proxies = {"http": url, "https": url}
|
|
389
|
-
>>> requests.get("https://example.com", proxies=proxies)
|
|
390
|
-
"""
|
|
391
278
|
config = ProxyConfig(
|
|
392
279
|
username=username,
|
|
393
280
|
password=password,
|
|
@@ -403,19 +290,17 @@ class ThordataClient:
|
|
|
403
290
|
return config.build_proxy_url()
|
|
404
291
|
|
|
405
292
|
# =========================================================================
|
|
406
|
-
# Internal
|
|
293
|
+
# Internal Request Helpers
|
|
407
294
|
# =========================================================================
|
|
408
295
|
def _api_request_with_retry(
|
|
409
296
|
self,
|
|
410
297
|
method: str,
|
|
411
298
|
url: str,
|
|
412
299
|
*,
|
|
413
|
-
data:
|
|
414
|
-
headers:
|
|
415
|
-
params:
|
|
300
|
+
data: dict[str, Any] | None = None,
|
|
301
|
+
headers: dict[str, str] | None = None,
|
|
302
|
+
params: dict[str, Any] | None = None,
|
|
416
303
|
) -> requests.Response:
|
|
417
|
-
"""Make an API request with automatic retry on transient failures."""
|
|
418
|
-
|
|
419
304
|
@with_retry(self._retry_config)
|
|
420
305
|
def _do_request() -> requests.Response:
|
|
421
306
|
return self._api_session.request(
|
|
@@ -438,64 +323,101 @@ class ThordataClient:
|
|
|
438
323
|
f"API request failed: {e}", original_error=e
|
|
439
324
|
) from e
|
|
440
325
|
|
|
326
|
+
def _get_proxy_manager(self, proxy_url: str) -> urllib3.ProxyManager:
|
|
327
|
+
"""Get or create a ProxyManager for the given proxy URL (Pooled)."""
|
|
328
|
+
if proxy_url not in self._proxy_managers:
|
|
329
|
+
# Create a new manager if not cached
|
|
330
|
+
proxy_ssl_context = None
|
|
331
|
+
if proxy_url.startswith("https://"):
|
|
332
|
+
proxy_ssl_context = ssl.create_default_context()
|
|
333
|
+
|
|
334
|
+
self._proxy_managers[proxy_url] = urllib3.ProxyManager(
|
|
335
|
+
proxy_url,
|
|
336
|
+
proxy_ssl_context=proxy_ssl_context,
|
|
337
|
+
num_pools=10, # Allow concurrency
|
|
338
|
+
maxsize=10,
|
|
339
|
+
)
|
|
340
|
+
return self._proxy_managers[proxy_url]
|
|
341
|
+
|
|
342
|
+
def _proxy_request_with_proxy_manager(
|
|
343
|
+
self,
|
|
344
|
+
method: str,
|
|
345
|
+
url: str,
|
|
346
|
+
*,
|
|
347
|
+
proxy_config: ProxyConfig,
|
|
348
|
+
timeout: int,
|
|
349
|
+
headers: dict[str, str] | None = None,
|
|
350
|
+
params: dict[str, Any] | None = None,
|
|
351
|
+
data: Any = None,
|
|
352
|
+
) -> requests.Response:
|
|
353
|
+
# 1. Prepare URL and Body
|
|
354
|
+
req = requests.Request(method=method.upper(), url=url, params=params)
|
|
355
|
+
prepped = self._proxy_session.prepare_request(req)
|
|
356
|
+
final_url = prepped.url or url
|
|
357
|
+
|
|
358
|
+
# 2. Get Proxy Configuration
|
|
359
|
+
proxy_url = proxy_config.build_proxy_endpoint()
|
|
360
|
+
proxy_headers = urllib3.make_headers(
|
|
361
|
+
proxy_basic_auth=proxy_config.build_proxy_basic_auth()
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
# 3. Get Cached Proxy Manager
|
|
365
|
+
pm = self._get_proxy_manager(proxy_url)
|
|
366
|
+
|
|
367
|
+
# 4. Prepare Request Headers/Body
|
|
368
|
+
req_headers = dict(headers or {})
|
|
369
|
+
body = None
|
|
370
|
+
if data is not None:
|
|
371
|
+
if isinstance(data, dict):
|
|
372
|
+
body = urlencode({k: str(v) for k, v in data.items()})
|
|
373
|
+
req_headers.setdefault(
|
|
374
|
+
"Content-Type", "application/x-www-form-urlencoded"
|
|
375
|
+
)
|
|
376
|
+
else:
|
|
377
|
+
body = data
|
|
378
|
+
|
|
379
|
+
# 5. Execute Request via urllib3
|
|
380
|
+
http_resp = pm.request(
|
|
381
|
+
method.upper(),
|
|
382
|
+
final_url,
|
|
383
|
+
body=body,
|
|
384
|
+
headers=req_headers or None,
|
|
385
|
+
proxy_headers=proxy_headers, # Attach Auth here
|
|
386
|
+
timeout=urllib3.Timeout(connect=timeout, read=timeout),
|
|
387
|
+
retries=False, # We handle retries in _proxy_verb
|
|
388
|
+
preload_content=True,
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
# 6. Convert back to requests.Response
|
|
392
|
+
r = requests.Response()
|
|
393
|
+
r.status_code = int(getattr(http_resp, "status", 0) or 0)
|
|
394
|
+
r._content = http_resp.data or b""
|
|
395
|
+
r.url = final_url
|
|
396
|
+
r.headers = requests.structures.CaseInsensitiveDict(
|
|
397
|
+
dict(http_resp.headers or {})
|
|
398
|
+
)
|
|
399
|
+
return r
|
|
400
|
+
|
|
441
401
|
# =========================================================================
|
|
442
|
-
# SERP API Methods
|
|
402
|
+
# SERP API Methods
|
|
443
403
|
# =========================================================================
|
|
444
404
|
def serp_search(
|
|
445
405
|
self,
|
|
446
406
|
query: str,
|
|
447
407
|
*,
|
|
448
|
-
engine:
|
|
408
|
+
engine: Engine | str = Engine.GOOGLE,
|
|
449
409
|
num: int = 10,
|
|
450
|
-
country:
|
|
451
|
-
language:
|
|
452
|
-
search_type:
|
|
453
|
-
device:
|
|
454
|
-
render_js:
|
|
455
|
-
no_cache:
|
|
410
|
+
country: str | None = None,
|
|
411
|
+
language: str | None = None,
|
|
412
|
+
search_type: str | None = None,
|
|
413
|
+
device: str | None = None,
|
|
414
|
+
render_js: bool | None = None,
|
|
415
|
+
no_cache: bool | None = None,
|
|
456
416
|
output_format: str = "json",
|
|
457
417
|
**kwargs: Any,
|
|
458
|
-
) ->
|
|
459
|
-
"""
|
|
460
|
-
Execute a real-time SERP (Search Engine Results Page) search.
|
|
461
|
-
|
|
462
|
-
Args:
|
|
463
|
-
query: The search keywords.
|
|
464
|
-
engine: Search engine (google, bing, yandex, duckduckgo, baidu).
|
|
465
|
-
num: Number of results to retrieve (default: 10).
|
|
466
|
-
country: Country code for localized results (e.g., 'us').
|
|
467
|
-
language: Language code for interface (e.g., 'en').
|
|
468
|
-
search_type: Type of search (images, news, shopping, videos, etc.).
|
|
469
|
-
device: Device type ('desktop', 'mobile', 'tablet').
|
|
470
|
-
render_js: Enable JavaScript rendering in SERP (render_js=True).
|
|
471
|
-
no_cache: Disable internal caching (no_cache=True).
|
|
472
|
-
output_format: 'json' to return parsed JSON (default),
|
|
473
|
-
'html' to return HTML wrapped in {'html': ...}.
|
|
474
|
-
**kwargs: Additional engine-specific parameters.
|
|
475
|
-
|
|
476
|
-
Returns:
|
|
477
|
-
Dict[str, Any]: Parsed JSON results or a dict with 'html' key.
|
|
478
|
-
|
|
479
|
-
Example:
|
|
480
|
-
>>> # Basic search
|
|
481
|
-
>>> results = client.serp_search("python tutorial")
|
|
482
|
-
>>>
|
|
483
|
-
>>> # With options
|
|
484
|
-
>>> results = client.serp_search(
|
|
485
|
-
... "laptop reviews",
|
|
486
|
-
... engine="google",
|
|
487
|
-
... num=20,
|
|
488
|
-
... country="us",
|
|
489
|
-
... search_type="shopping",
|
|
490
|
-
... device="mobile",
|
|
491
|
-
... render_js=True,
|
|
492
|
-
... no_cache=True,
|
|
493
|
-
... )
|
|
494
|
-
"""
|
|
495
|
-
# Normalize engine
|
|
418
|
+
) -> dict[str, Any]:
|
|
496
419
|
engine_str = engine.value if isinstance(engine, Engine) else engine.lower()
|
|
497
420
|
|
|
498
|
-
# Build request using model
|
|
499
421
|
request = SerpRequest(
|
|
500
422
|
query=query,
|
|
501
423
|
engine=engine_str,
|
|
@@ -510,84 +432,13 @@ class ThordataClient:
|
|
|
510
432
|
extra_params=kwargs,
|
|
511
433
|
)
|
|
512
434
|
|
|
513
|
-
|
|
514
|
-
headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
|
|
515
|
-
|
|
516
|
-
logger.info(
|
|
517
|
-
f"SERP Search: {engine_str} - {query[:50]}{'...' if len(query) > 50 else ''}"
|
|
518
|
-
)
|
|
519
|
-
|
|
520
|
-
try:
|
|
521
|
-
response = self._api_request_with_retry(
|
|
522
|
-
"POST",
|
|
523
|
-
self._serp_url,
|
|
524
|
-
data=payload,
|
|
525
|
-
headers=headers,
|
|
526
|
-
)
|
|
527
|
-
response.raise_for_status()
|
|
528
|
-
|
|
529
|
-
# JSON mode (default)
|
|
530
|
-
if output_format.lower() == "json":
|
|
531
|
-
data = response.json()
|
|
532
|
-
|
|
533
|
-
if isinstance(data, dict):
|
|
534
|
-
code = data.get("code")
|
|
535
|
-
if code is not None and code != 200:
|
|
536
|
-
msg = extract_error_message(data)
|
|
537
|
-
raise_for_code(
|
|
538
|
-
f"SERP API Error: {msg}",
|
|
539
|
-
code=code,
|
|
540
|
-
payload=data,
|
|
541
|
-
)
|
|
542
|
-
|
|
543
|
-
return parse_json_response(data)
|
|
544
|
-
|
|
545
|
-
# HTML mode: wrap as dict to keep return type stable
|
|
546
|
-
return {"html": response.text}
|
|
547
|
-
|
|
548
|
-
except requests.Timeout as e:
|
|
549
|
-
raise ThordataTimeoutError(
|
|
550
|
-
f"SERP request timed out: {e}",
|
|
551
|
-
original_error=e,
|
|
552
|
-
) from e
|
|
553
|
-
except requests.RequestException as e:
|
|
554
|
-
raise ThordataNetworkError(
|
|
555
|
-
f"SERP request failed: {e}",
|
|
556
|
-
original_error=e,
|
|
557
|
-
) from e
|
|
435
|
+
return self.serp_search_advanced(request)
|
|
558
436
|
|
|
559
|
-
def serp_search_advanced(self, request: SerpRequest) ->
|
|
560
|
-
"""
|
|
561
|
-
Execute a SERP search using a SerpRequest object.
|
|
562
|
-
|
|
563
|
-
This method provides full control over all search parameters.
|
|
564
|
-
|
|
565
|
-
Args:
|
|
566
|
-
request: A SerpRequest object with all parameters configured.
|
|
567
|
-
|
|
568
|
-
Returns:
|
|
569
|
-
Dict[str, Any]: Parsed JSON results or dict with 'html' key.
|
|
570
|
-
|
|
571
|
-
Example:
|
|
572
|
-
>>> from thordata.models import SerpRequest
|
|
573
|
-
>>> request = SerpRequest(
|
|
574
|
-
... query="python programming",
|
|
575
|
-
... engine="google",
|
|
576
|
-
... num=50,
|
|
577
|
-
... country="us",
|
|
578
|
-
... language="en",
|
|
579
|
-
... search_type="news",
|
|
580
|
-
... time_filter="week",
|
|
581
|
-
... safe_search=True
|
|
582
|
-
... )
|
|
583
|
-
>>> results = client.serp_search_advanced(request)
|
|
584
|
-
"""
|
|
437
|
+
def serp_search_advanced(self, request: SerpRequest) -> dict[str, Any]:
|
|
585
438
|
payload = request.to_payload()
|
|
586
439
|
headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
|
|
587
440
|
|
|
588
|
-
logger.info(
|
|
589
|
-
f"SERP Advanced Search: {request.engine} - {request.query[:50]}{'...' if len(request.query) > 50 else ''}"
|
|
590
|
-
)
|
|
441
|
+
logger.info(f"SERP Advanced Search: {request.engine} - {request.query[:50]}")
|
|
591
442
|
|
|
592
443
|
try:
|
|
593
444
|
response = self._api_request_with_retry(
|
|
@@ -600,34 +451,22 @@ class ThordataClient:
|
|
|
600
451
|
|
|
601
452
|
if request.output_format.lower() == "json":
|
|
602
453
|
data = response.json()
|
|
603
|
-
|
|
604
454
|
if isinstance(data, dict):
|
|
605
455
|
code = data.get("code")
|
|
606
456
|
if code is not None and code != 200:
|
|
607
457
|
msg = extract_error_message(data)
|
|
608
|
-
raise_for_code(
|
|
609
|
-
f"SERP API Error: {msg}",
|
|
610
|
-
code=code,
|
|
611
|
-
payload=data,
|
|
612
|
-
)
|
|
613
|
-
|
|
458
|
+
raise_for_code(f"SERP Error: {msg}", code=code, payload=data)
|
|
614
459
|
return parse_json_response(data)
|
|
615
460
|
|
|
616
461
|
return {"html": response.text}
|
|
617
462
|
|
|
618
463
|
except requests.Timeout as e:
|
|
619
|
-
raise ThordataTimeoutError(
|
|
620
|
-
f"SERP request timed out: {e}",
|
|
621
|
-
original_error=e,
|
|
622
|
-
) from e
|
|
464
|
+
raise ThordataTimeoutError(f"SERP timeout: {e}", original_error=e) from e
|
|
623
465
|
except requests.RequestException as e:
|
|
624
|
-
raise ThordataNetworkError(
|
|
625
|
-
f"SERP request failed: {e}",
|
|
626
|
-
original_error=e,
|
|
627
|
-
) from e
|
|
466
|
+
raise ThordataNetworkError(f"SERP failed: {e}", original_error=e) from e
|
|
628
467
|
|
|
629
468
|
# =========================================================================
|
|
630
|
-
# Universal Scraping API
|
|
469
|
+
# Universal Scraping API
|
|
631
470
|
# =========================================================================
|
|
632
471
|
def universal_scrape(
|
|
633
472
|
self,
|
|
@@ -635,43 +474,12 @@ class ThordataClient:
|
|
|
635
474
|
*,
|
|
636
475
|
js_render: bool = False,
|
|
637
476
|
output_format: str = "html",
|
|
638
|
-
country:
|
|
639
|
-
block_resources:
|
|
640
|
-
wait:
|
|
641
|
-
wait_for:
|
|
477
|
+
country: str | None = None,
|
|
478
|
+
block_resources: str | None = None,
|
|
479
|
+
wait: int | None = None,
|
|
480
|
+
wait_for: str | None = None,
|
|
642
481
|
**kwargs: Any,
|
|
643
|
-
) ->
|
|
644
|
-
"""
|
|
645
|
-
Scrape a URL using the Universal Scraping API (Web Unlocker).
|
|
646
|
-
|
|
647
|
-
Automatically bypasses Cloudflare, CAPTCHAs, and antibot systems.
|
|
648
|
-
|
|
649
|
-
Args:
|
|
650
|
-
url: Target URL.
|
|
651
|
-
js_render: Enable JavaScript rendering (headless browser).
|
|
652
|
-
output_format: "html" or "png" (screenshot).
|
|
653
|
-
country: Geo-targeting country code.
|
|
654
|
-
block_resources: Resources to block (e.g., 'script,image').
|
|
655
|
-
wait: Wait time in milliseconds after page load.
|
|
656
|
-
wait_for: CSS selector to wait for.
|
|
657
|
-
**kwargs: Additional parameters.
|
|
658
|
-
|
|
659
|
-
Returns:
|
|
660
|
-
HTML string or PNG bytes depending on output_format.
|
|
661
|
-
|
|
662
|
-
Example:
|
|
663
|
-
>>> # Get HTML
|
|
664
|
-
>>> html = client.universal_scrape("https://example.com", js_render=True)
|
|
665
|
-
>>>
|
|
666
|
-
>>> # Get screenshot
|
|
667
|
-
>>> png = client.universal_scrape(
|
|
668
|
-
... "https://example.com",
|
|
669
|
-
... js_render=True,
|
|
670
|
-
... output_format="png"
|
|
671
|
-
... )
|
|
672
|
-
>>> with open("screenshot.png", "wb") as f:
|
|
673
|
-
... f.write(png)
|
|
674
|
-
"""
|
|
482
|
+
) -> str | bytes:
|
|
675
483
|
request = UniversalScrapeRequest(
|
|
676
484
|
url=url,
|
|
677
485
|
js_render=js_render,
|
|
@@ -682,27 +490,13 @@ class ThordataClient:
|
|
|
682
490
|
wait_for=wait_for,
|
|
683
491
|
extra_params=kwargs,
|
|
684
492
|
)
|
|
685
|
-
|
|
686
493
|
return self.universal_scrape_advanced(request)
|
|
687
494
|
|
|
688
|
-
def universal_scrape_advanced(
|
|
689
|
-
self, request: UniversalScrapeRequest
|
|
690
|
-
) -> Union[str, bytes]:
|
|
691
|
-
"""
|
|
692
|
-
Scrape using a UniversalScrapeRequest object for full control.
|
|
693
|
-
|
|
694
|
-
Args:
|
|
695
|
-
request: A UniversalScrapeRequest with all parameters.
|
|
696
|
-
|
|
697
|
-
Returns:
|
|
698
|
-
HTML string or PNG bytes.
|
|
699
|
-
"""
|
|
495
|
+
def universal_scrape_advanced(self, request: UniversalScrapeRequest) -> str | bytes:
|
|
700
496
|
payload = request.to_payload()
|
|
701
497
|
headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
|
|
702
498
|
|
|
703
|
-
logger.info(
|
|
704
|
-
f"Universal Scrape: {request.url} (format: {request.output_format})"
|
|
705
|
-
)
|
|
499
|
+
logger.info(f"Universal Scrape: {request.url}")
|
|
706
500
|
|
|
707
501
|
try:
|
|
708
502
|
response = self._api_request_with_retry(
|
|
@@ -712,85 +506,49 @@ class ThordataClient:
|
|
|
712
506
|
headers=headers,
|
|
713
507
|
)
|
|
714
508
|
response.raise_for_status()
|
|
715
|
-
|
|
716
509
|
return self._process_universal_response(response, request.output_format)
|
|
717
510
|
|
|
718
511
|
except requests.Timeout as e:
|
|
719
512
|
raise ThordataTimeoutError(
|
|
720
|
-
f"Universal
|
|
513
|
+
f"Universal timeout: {e}", original_error=e
|
|
721
514
|
) from e
|
|
722
515
|
except requests.RequestException as e:
|
|
723
516
|
raise ThordataNetworkError(
|
|
724
|
-
f"Universal
|
|
517
|
+
f"Universal failed: {e}", original_error=e
|
|
725
518
|
) from e
|
|
726
519
|
|
|
727
520
|
def _process_universal_response(
|
|
728
521
|
self, response: requests.Response, output_format: str
|
|
729
|
-
) ->
|
|
730
|
-
"""Process the response from Universal API."""
|
|
731
|
-
# Try to parse as JSON
|
|
522
|
+
) -> str | bytes:
|
|
732
523
|
try:
|
|
733
524
|
resp_json = response.json()
|
|
734
525
|
except ValueError:
|
|
735
|
-
|
|
736
|
-
if output_format.lower() == "png":
|
|
737
|
-
return response.content
|
|
738
|
-
return response.text
|
|
526
|
+
return response.content if output_format.lower() == "png" else response.text
|
|
739
527
|
|
|
740
|
-
# Check for API-level errors
|
|
741
528
|
if isinstance(resp_json, dict):
|
|
742
529
|
code = resp_json.get("code")
|
|
743
530
|
if code is not None and code != 200:
|
|
744
531
|
msg = extract_error_message(resp_json)
|
|
745
|
-
raise_for_code(
|
|
746
|
-
f"Universal API Error: {msg}", code=code, payload=resp_json
|
|
747
|
-
)
|
|
532
|
+
raise_for_code(f"Universal Error: {msg}", code=code, payload=resp_json)
|
|
748
533
|
|
|
749
|
-
# Extract HTML
|
|
750
534
|
if "html" in resp_json:
|
|
751
535
|
return resp_json["html"]
|
|
752
|
-
|
|
753
|
-
# Extract PNG
|
|
754
536
|
if "png" in resp_json:
|
|
755
537
|
return decode_base64_image(resp_json["png"])
|
|
756
538
|
|
|
757
|
-
# Fallback
|
|
758
539
|
return str(resp_json)
|
|
759
540
|
|
|
760
541
|
# =========================================================================
|
|
761
|
-
# Web Scraper API
|
|
542
|
+
# Web Scraper API (Tasks)
|
|
762
543
|
# =========================================================================
|
|
763
544
|
def create_scraper_task(
|
|
764
545
|
self,
|
|
765
546
|
file_name: str,
|
|
766
547
|
spider_id: str,
|
|
767
548
|
spider_name: str,
|
|
768
|
-
parameters:
|
|
769
|
-
universal_params:
|
|
549
|
+
parameters: dict[str, Any],
|
|
550
|
+
universal_params: dict[str, Any] | None = None,
|
|
770
551
|
) -> str:
|
|
771
|
-
"""
|
|
772
|
-
Create an asynchronous Web Scraper task.
|
|
773
|
-
|
|
774
|
-
Note: Get spider_id and spider_name from the Thordata Dashboard.
|
|
775
|
-
|
|
776
|
-
Args:
|
|
777
|
-
file_name: Name for the output file.
|
|
778
|
-
spider_id: Spider identifier from Dashboard.
|
|
779
|
-
spider_name: Spider name (e.g., "youtube.com").
|
|
780
|
-
parameters: Spider-specific parameters.
|
|
781
|
-
universal_params: Global spider settings.
|
|
782
|
-
|
|
783
|
-
Returns:
|
|
784
|
-
The created task_id.
|
|
785
|
-
|
|
786
|
-
Example:
|
|
787
|
-
>>> task_id = client.create_scraper_task(
|
|
788
|
-
... file_name="youtube_data",
|
|
789
|
-
... spider_id="youtube_video-post_by-url",
|
|
790
|
-
... spider_name="youtube.com",
|
|
791
|
-
... parameters={"url": "https://youtube.com/@channel/videos"}
|
|
792
|
-
... )
|
|
793
|
-
"""
|
|
794
552
|
config = ScraperTaskConfig(
|
|
795
553
|
file_name=file_name,
|
|
796
554
|
spider_id=spider_id,
|
|
@@ -798,50 +556,26 @@ class ThordataClient:
|
|
|
798
556
|
parameters=parameters,
|
|
799
557
|
universal_params=universal_params,
|
|
800
558
|
)
|
|
801
|
-
|
|
802
559
|
return self.create_scraper_task_advanced(config)
|
|
803
560
|
|
|
804
561
|
def create_scraper_task_advanced(self, config: ScraperTaskConfig) -> str:
|
|
805
|
-
"""
|
|
806
|
-
Create a scraper task using a ScraperTaskConfig object.
|
|
807
|
-
|
|
808
|
-
Args:
|
|
809
|
-
config: Task configuration.
|
|
810
|
-
|
|
811
|
-
Returns:
|
|
812
|
-
The created task_id.
|
|
813
|
-
"""
|
|
814
562
|
self._require_public_credentials()
|
|
815
|
-
|
|
816
563
|
payload = config.to_payload()
|
|
817
|
-
|
|
818
|
-
# Builder needs 3 headers: token, key, Authorization Bearer
|
|
819
564
|
headers = build_builder_headers(
|
|
820
|
-
self.scraper_token,
|
|
821
|
-
self.public_token or "",
|
|
822
|
-
self.public_key or "",
|
|
565
|
+
self.scraper_token, self.public_token or "", self.public_key or ""
|
|
823
566
|
)
|
|
824
567
|
|
|
825
|
-
logger.info(f"Creating Scraper Task: {config.spider_name}")
|
|
826
|
-
|
|
827
568
|
try:
|
|
828
569
|
response = self._api_request_with_retry(
|
|
829
|
-
"POST",
|
|
830
|
-
self._builder_url,
|
|
831
|
-
data=payload,
|
|
832
|
-
headers=headers,
|
|
570
|
+
"POST", self._builder_url, data=payload, headers=headers
|
|
833
571
|
)
|
|
834
572
|
response.raise_for_status()
|
|
835
|
-
|
|
836
573
|
data = response.json()
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
raise_for_code(f"Task creation failed: {msg}", code=code, payload=data)
|
|
842
|
-
|
|
574
|
+
if data.get("code") != 200:
|
|
575
|
+
raise_for_code(
|
|
576
|
+
"Task creation failed", code=data.get("code"), payload=data
|
|
577
|
+
)
|
|
843
578
|
return data["data"]["task_id"]
|
|
844
|
-
|
|
845
579
|
except requests.RequestException as e:
|
|
846
580
|
raise ThordataNetworkError(
|
|
847
581
|
f"Task creation failed: {e}", original_error=e
|
|
@@ -852,38 +586,9 @@ class ThordataClient:
|
|
|
852
586
|
file_name: str,
|
|
853
587
|
spider_id: str,
|
|
854
588
|
spider_name: str,
|
|
855
|
-
parameters:
|
|
856
|
-
common_settings:
|
|
589
|
+
parameters: dict[str, Any],
|
|
590
|
+
common_settings: CommonSettings,
|
|
857
591
|
) -> str:
|
|
858
|
-
"""
|
|
859
|
-
Create a YouTube video/audio download task.
|
|
860
|
-
|
|
861
|
-
Uses the /video_builder endpoint.
|
|
862
|
-
|
|
863
|
-
Args:
|
|
864
|
-
file_name: Output file name. Supports {{TasksID}}, {{VideoID}}.
|
|
865
|
-
spider_id: Spider identifier (e.g., "youtube_video_by-url").
|
|
866
|
-
spider_name: Spider name (typically "youtube.com").
|
|
867
|
-
parameters: Spider parameters (e.g., {"url": "..."}).
|
|
868
|
-
common_settings: Video/audio settings.
|
|
869
|
-
|
|
870
|
-
Returns:
|
|
871
|
-
The created task_id.
|
|
872
|
-
|
|
873
|
-
Example:
|
|
874
|
-
>>> from thordata import CommonSettings
|
|
875
|
-
>>> task_id = client.create_video_task(
|
|
876
|
-
... file_name="{{VideoID}}",
|
|
877
|
-
... spider_id="youtube_video_by-url",
|
|
878
|
-
... spider_name="youtube.com",
|
|
879
|
-
... parameters={"url": "https://youtube.com/watch?v=xxx"},
|
|
880
|
-
... common_settings=CommonSettings(
|
|
881
|
-
... resolution="1080p",
|
|
882
|
-
... is_subtitles="true"
|
|
883
|
-
... )
|
|
884
|
-
... )
|
|
885
|
-
"""
|
|
886
|
-
|
|
887
592
|
config = VideoTaskConfig(
|
|
888
593
|
file_name=file_name,
|
|
889
594
|
spider_id=spider_id,
|
|
@@ -891,210 +596,97 @@ class ThordataClient:
|
|
|
891
596
|
parameters=parameters,
|
|
892
597
|
common_settings=common_settings,
|
|
893
598
|
)
|
|
894
|
-
|
|
895
599
|
return self.create_video_task_advanced(config)
|
|
896
600
|
|
|
897
601
|
def create_video_task_advanced(self, config: VideoTaskConfig) -> str:
|
|
898
|
-
"""
|
|
899
|
-
Create a video task using VideoTaskConfig object.
|
|
900
|
-
|
|
901
|
-
Args:
|
|
902
|
-
config: Video task configuration.
|
|
903
|
-
|
|
904
|
-
Returns:
|
|
905
|
-
The created task_id.
|
|
906
|
-
"""
|
|
907
|
-
|
|
908
602
|
self._require_public_credentials()
|
|
909
|
-
|
|
910
603
|
payload = config.to_payload()
|
|
911
604
|
headers = build_builder_headers(
|
|
912
|
-
self.scraper_token,
|
|
913
|
-
self.public_token or "",
|
|
914
|
-
self.public_key or "",
|
|
605
|
+
self.scraper_token, self.public_token or "", self.public_key or ""
|
|
915
606
|
)
|
|
916
607
|
|
|
917
|
-
logger.info(f"Creating Video Task: {config.spider_name} - {config.spider_id}")
|
|
918
|
-
|
|
919
608
|
response = self._api_request_with_retry(
|
|
920
|
-
"POST",
|
|
921
|
-
self._video_builder_url,
|
|
922
|
-
data=payload,
|
|
923
|
-
headers=headers,
|
|
609
|
+
"POST", self._video_builder_url, data=payload, headers=headers
|
|
924
610
|
)
|
|
925
611
|
response.raise_for_status()
|
|
926
|
-
|
|
927
612
|
data = response.json()
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
if code != 200:
|
|
931
|
-
msg = extract_error_message(data)
|
|
613
|
+
if data.get("code") != 200:
|
|
932
614
|
raise_for_code(
|
|
933
|
-
|
|
615
|
+
"Video task creation failed", code=data.get("code"), payload=data
|
|
934
616
|
)
|
|
935
|
-
|
|
936
617
|
return data["data"]["task_id"]
|
|
937
618
|
|
|
938
619
|
def get_task_status(self, task_id: str) -> str:
|
|
939
|
-
"""
|
|
940
|
-
Check the status of an asynchronous scraping task.
|
|
941
|
-
|
|
942
|
-
Returns:
|
|
943
|
-
Status string (e.g., "running", "ready", "failed").
|
|
944
|
-
|
|
945
|
-
Raises:
|
|
946
|
-
ThordataConfigError: If public credentials are missing.
|
|
947
|
-
ThordataAPIError: If API returns a non-200 code in JSON payload.
|
|
948
|
-
ThordataNetworkError: If network/HTTP request fails.
|
|
949
|
-
"""
|
|
950
620
|
self._require_public_credentials()
|
|
951
|
-
|
|
952
621
|
headers = build_public_api_headers(
|
|
953
622
|
self.public_token or "", self.public_key or ""
|
|
954
623
|
)
|
|
955
|
-
payload = {"tasks_ids": task_id}
|
|
956
|
-
|
|
957
624
|
try:
|
|
958
625
|
response = self._api_request_with_retry(
|
|
959
626
|
"POST",
|
|
960
627
|
self._status_url,
|
|
961
|
-
data=
|
|
628
|
+
data={"tasks_ids": task_id},
|
|
962
629
|
headers=headers,
|
|
963
630
|
)
|
|
964
631
|
response.raise_for_status()
|
|
965
632
|
data = response.json()
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
payload=data,
|
|
975
|
-
)
|
|
976
|
-
|
|
977
|
-
items = data.get("data") or []
|
|
978
|
-
for item in items:
|
|
979
|
-
if str(item.get("task_id")) == str(task_id):
|
|
980
|
-
return item.get("status", "unknown")
|
|
981
|
-
|
|
982
|
-
return "unknown"
|
|
983
|
-
|
|
984
|
-
# Unexpected payload type
|
|
985
|
-
raise ThordataNetworkError(
|
|
986
|
-
f"Unexpected task status response type: {type(data).__name__}",
|
|
987
|
-
original_error=None,
|
|
988
|
-
)
|
|
989
|
-
|
|
990
|
-
except requests.Timeout as e:
|
|
991
|
-
raise ThordataTimeoutError(
|
|
992
|
-
f"Status check timed out: {e}", original_error=e
|
|
993
|
-
) from e
|
|
633
|
+
if data.get("code") != 200:
|
|
634
|
+
raise_for_code("Task status error", code=data.get("code"), payload=data)
|
|
635
|
+
|
|
636
|
+
items = data.get("data") or []
|
|
637
|
+
for item in items:
|
|
638
|
+
if str(item.get("task_id")) == str(task_id):
|
|
639
|
+
return item.get("status", "unknown")
|
|
640
|
+
return "unknown"
|
|
994
641
|
except requests.RequestException as e:
|
|
995
642
|
raise ThordataNetworkError(
|
|
996
643
|
f"Status check failed: {e}", original_error=e
|
|
997
644
|
) from e
|
|
998
645
|
|
|
999
646
|
def safe_get_task_status(self, task_id: str) -> str:
|
|
1000
|
-
"""
|
|
1001
|
-
Backward-compatible status check.
|
|
1002
|
-
|
|
1003
|
-
Returns:
|
|
1004
|
-
Status string, or "error" on any exception.
|
|
1005
|
-
"""
|
|
1006
647
|
try:
|
|
1007
648
|
return self.get_task_status(task_id)
|
|
1008
649
|
except Exception:
|
|
1009
650
|
return "error"
|
|
1010
651
|
|
|
1011
652
|
def get_task_result(self, task_id: str, file_type: str = "json") -> str:
|
|
1012
|
-
"""
|
|
1013
|
-
Get the download URL for a completed task.
|
|
1014
|
-
"""
|
|
1015
653
|
self._require_public_credentials()
|
|
1016
|
-
|
|
1017
654
|
headers = build_public_api_headers(
|
|
1018
655
|
self.public_token or "", self.public_key or ""
|
|
1019
656
|
)
|
|
1020
|
-
payload = {"tasks_id": task_id, "type": file_type}
|
|
1021
|
-
|
|
1022
|
-
logger.info(f"Getting result URL for Task: {task_id}")
|
|
1023
|
-
|
|
1024
657
|
try:
|
|
1025
658
|
response = self._api_request_with_retry(
|
|
1026
659
|
"POST",
|
|
1027
660
|
self._download_url,
|
|
1028
|
-
data=
|
|
661
|
+
data={"tasks_id": task_id, "type": file_type},
|
|
1029
662
|
headers=headers,
|
|
1030
663
|
)
|
|
1031
664
|
response.raise_for_status()
|
|
1032
|
-
|
|
1033
665
|
data = response.json()
|
|
1034
|
-
code
|
|
1035
|
-
|
|
1036
|
-
if code == 200 and data.get("data"):
|
|
666
|
+
if data.get("code") == 200 and data.get("data"):
|
|
1037
667
|
return data["data"]["download"]
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
raise_for_code(f"Get result failed: {msg}", code=code, payload=data)
|
|
1041
|
-
# This line won't be reached, but satisfies mypy
|
|
1042
|
-
raise RuntimeError("Unexpected state")
|
|
1043
|
-
|
|
668
|
+
raise_for_code("Get result failed", code=data.get("code"), payload=data)
|
|
669
|
+
return ""
|
|
1044
670
|
except requests.RequestException as e:
|
|
1045
671
|
raise ThordataNetworkError(
|
|
1046
672
|
f"Get result failed: {e}", original_error=e
|
|
1047
673
|
) from e
|
|
1048
674
|
|
|
1049
|
-
def list_tasks(
|
|
1050
|
-
self,
|
|
1051
|
-
page: int = 1,
|
|
1052
|
-
size: int = 20,
|
|
1053
|
-
) -> Dict[str, Any]:
|
|
1054
|
-
"""
|
|
1055
|
-
List all Web Scraper tasks.
|
|
1056
|
-
|
|
1057
|
-
Args:
|
|
1058
|
-
page: Page number (starts from 1).
|
|
1059
|
-
size: Number of tasks per page.
|
|
1060
|
-
|
|
1061
|
-
Returns:
|
|
1062
|
-
Dict containing 'count' and 'list' of tasks.
|
|
1063
|
-
|
|
1064
|
-
Example:
|
|
1065
|
-
>>> result = client.list_tasks(page=1, size=10)
|
|
1066
|
-
>>> print(f"Total tasks: {result['count']}")
|
|
1067
|
-
>>> for task in result['list']:
|
|
1068
|
-
... print(f"Task {task['task_id']}: {task['status']}")
|
|
1069
|
-
"""
|
|
675
|
+
def list_tasks(self, page: int = 1, size: int = 20) -> dict[str, Any]:
|
|
1070
676
|
self._require_public_credentials()
|
|
1071
|
-
|
|
1072
677
|
headers = build_public_api_headers(
|
|
1073
678
|
self.public_token or "", self.public_key or ""
|
|
1074
679
|
)
|
|
1075
|
-
payload: Dict[str, Any] = {}
|
|
1076
|
-
if page:
|
|
1077
|
-
payload["page"] = str(page)
|
|
1078
|
-
if size:
|
|
1079
|
-
payload["size"] = str(size)
|
|
1080
|
-
|
|
1081
|
-
logger.info(f"Listing tasks: page={page}, size={size}")
|
|
1082
|
-
|
|
1083
680
|
response = self._api_request_with_retry(
|
|
1084
681
|
"POST",
|
|
1085
682
|
self._list_url,
|
|
1086
|
-
data=
|
|
683
|
+
data={"page": str(page), "size": str(size)},
|
|
1087
684
|
headers=headers,
|
|
1088
685
|
)
|
|
1089
686
|
response.raise_for_status()
|
|
1090
|
-
|
|
1091
687
|
data = response.json()
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
if code != 200:
|
|
1095
|
-
msg = extract_error_message(data)
|
|
1096
|
-
raise_for_code(f"List tasks failed: {msg}", code=code, payload=data)
|
|
1097
|
-
|
|
688
|
+
if data.get("code") != 200:
|
|
689
|
+
raise_for_code("List tasks failed", code=data.get("code"), payload=data)
|
|
1098
690
|
return data.get("data", {"count": 0, "list": []})
|
|
1099
691
|
|
|
1100
692
|
def wait_for_task(
|
|
@@ -1104,84 +696,32 @@ class ThordataClient:
|
|
|
1104
696
|
poll_interval: float = 5.0,
|
|
1105
697
|
max_wait: float = 600.0,
|
|
1106
698
|
) -> str:
|
|
1107
|
-
"""
|
|
1108
|
-
Wait for a task to complete.
|
|
1109
|
-
|
|
1110
|
-
Args:
|
|
1111
|
-
task_id: The task ID to wait for.
|
|
1112
|
-
poll_interval: Seconds between status checks.
|
|
1113
|
-
max_wait: Maximum seconds to wait.
|
|
1114
|
-
|
|
1115
|
-
Returns:
|
|
1116
|
-
Final task status.
|
|
1117
|
-
|
|
1118
|
-
Raises:
|
|
1119
|
-
TimeoutError: If max_wait is exceeded.
|
|
1120
|
-
|
|
1121
|
-
Example:
|
|
1122
|
-
>>> task_id = client.create_scraper_task(...)
|
|
1123
|
-
>>> status = client.wait_for_task(task_id, max_wait=300)
|
|
1124
|
-
>>> if status in ("ready", "success"):
|
|
1125
|
-
... url = client.get_task_result(task_id)
|
|
1126
|
-
"""
|
|
1127
699
|
import time
|
|
1128
700
|
|
|
1129
701
|
start = time.monotonic()
|
|
1130
|
-
|
|
1131
702
|
while (time.monotonic() - start) < max_wait:
|
|
1132
703
|
status = self.get_task_status(task_id)
|
|
1133
|
-
|
|
1134
|
-
logger.debug(f"Task {task_id} status: {status}")
|
|
1135
|
-
|
|
1136
|
-
terminal_statuses = {
|
|
704
|
+
if status.lower() in {
|
|
1137
705
|
"ready",
|
|
1138
706
|
"success",
|
|
1139
707
|
"finished",
|
|
1140
708
|
"failed",
|
|
1141
709
|
"error",
|
|
1142
710
|
"cancelled",
|
|
1143
|
-
}
|
|
1144
|
-
|
|
1145
|
-
if status.lower() in terminal_statuses:
|
|
711
|
+
}:
|
|
1146
712
|
return status
|
|
1147
|
-
|
|
1148
713
|
time.sleep(poll_interval)
|
|
1149
|
-
|
|
1150
|
-
raise TimeoutError(f"Task {task_id} did not complete within {max_wait} seconds")
|
|
714
|
+
raise TimeoutError(f"Task {task_id} timeout")
|
|
1151
715
|
|
|
1152
716
|
# =========================================================================
|
|
1153
|
-
#
|
|
717
|
+
# Account / Locations / Utils
|
|
1154
718
|
# =========================================================================
|
|
1155
719
|
def get_usage_statistics(
|
|
1156
720
|
self,
|
|
1157
|
-
from_date:
|
|
1158
|
-
to_date:
|
|
721
|
+
from_date: str | date,
|
|
722
|
+
to_date: str | date,
|
|
1159
723
|
) -> UsageStatistics:
|
|
1160
|
-
"""
|
|
1161
|
-
Get account usage statistics for a date range.
|
|
1162
|
-
|
|
1163
|
-
Args:
|
|
1164
|
-
from_date: Start date (YYYY-MM-DD string or date object).
|
|
1165
|
-
to_date: End date (YYYY-MM-DD string or date object).
|
|
1166
|
-
|
|
1167
|
-
Returns:
|
|
1168
|
-
UsageStatistics object with traffic data.
|
|
1169
|
-
|
|
1170
|
-
Raises:
|
|
1171
|
-
ValueError: If date range exceeds 180 days.
|
|
1172
|
-
|
|
1173
|
-
Example:
|
|
1174
|
-
>>> from datetime import date, timedelta
|
|
1175
|
-
>>> today = date.today()
|
|
1176
|
-
>>> week_ago = today - timedelta(days=7)
|
|
1177
|
-
>>> stats = client.get_usage_statistics(week_ago, today)
|
|
1178
|
-
>>> print(f"Used: {stats.range_usage_gb():.2f} GB")
|
|
1179
|
-
>>> print(f"Balance: {stats.balance_gb():.2f} GB")
|
|
1180
|
-
"""
|
|
1181
|
-
|
|
1182
724
|
self._require_public_credentials()
|
|
1183
|
-
|
|
1184
|
-
# Convert dates to strings
|
|
1185
725
|
if isinstance(from_date, date):
|
|
1186
726
|
from_date = from_date.strftime("%Y-%m-%d")
|
|
1187
727
|
if isinstance(to_date, date):
|
|
@@ -1193,199 +733,54 @@ class ThordataClient:
|
|
|
1193
733
|
"from_date": from_date,
|
|
1194
734
|
"to_date": to_date,
|
|
1195
735
|
}
|
|
1196
|
-
|
|
1197
|
-
logger.info(f"Getting usage statistics: {from_date} to {to_date}")
|
|
1198
|
-
|
|
1199
|
-
response = self._api_request_with_retry(
|
|
1200
|
-
"GET",
|
|
1201
|
-
self._usage_stats_url,
|
|
1202
|
-
params=params,
|
|
1203
|
-
)
|
|
1204
|
-
response.raise_for_status()
|
|
1205
|
-
|
|
1206
|
-
data = response.json()
|
|
1207
|
-
|
|
1208
|
-
if isinstance(data, dict):
|
|
1209
|
-
code = data.get("code")
|
|
1210
|
-
if code is not None and code != 200:
|
|
1211
|
-
msg = extract_error_message(data)
|
|
1212
|
-
raise_for_code(
|
|
1213
|
-
f"Usage statistics error: {msg}",
|
|
1214
|
-
code=code,
|
|
1215
|
-
payload=data,
|
|
1216
|
-
)
|
|
1217
|
-
|
|
1218
|
-
# Extract data field
|
|
1219
|
-
usage_data = data.get("data", data)
|
|
1220
|
-
return UsageStatistics.from_dict(usage_data)
|
|
1221
|
-
|
|
1222
|
-
raise ThordataNetworkError(
|
|
1223
|
-
f"Unexpected usage statistics response: {type(data).__name__}",
|
|
1224
|
-
original_error=None,
|
|
1225
|
-
)
|
|
1226
|
-
|
|
1227
|
-
def get_residential_balance(self) -> Dict[str, Any]:
|
|
1228
|
-
"""
|
|
1229
|
-
Get residential proxy balance.
|
|
1230
|
-
|
|
1231
|
-
Uses public_token/public_key (Dashboard -> My account -> API).
|
|
1232
|
-
"""
|
|
1233
|
-
headers = self._build_gateway_headers()
|
|
1234
|
-
|
|
1235
|
-
logger.info("Getting residential proxy balance")
|
|
1236
|
-
|
|
1237
736
|
response = self._api_request_with_retry(
|
|
1238
|
-
"
|
|
1239
|
-
f"{self._gateway_base_url}/getFlowBalance",
|
|
1240
|
-
headers=headers,
|
|
1241
|
-
data={},
|
|
737
|
+
"GET", self._usage_stats_url, params=params
|
|
1242
738
|
)
|
|
1243
739
|
response.raise_for_status()
|
|
1244
|
-
|
|
1245
740
|
data = response.json()
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
msg = extract_error_message(data)
|
|
1250
|
-
raise_for_code(f"Get balance failed: {msg}", code=code, payload=data)
|
|
1251
|
-
|
|
1252
|
-
return data.get("data", {})
|
|
1253
|
-
|
|
1254
|
-
def get_residential_usage(
|
|
1255
|
-
self,
|
|
1256
|
-
start_time: Union[str, int],
|
|
1257
|
-
end_time: Union[str, int],
|
|
1258
|
-
) -> Dict[str, Any]:
|
|
1259
|
-
"""
|
|
1260
|
-
Get residential proxy usage records.
|
|
1261
|
-
|
|
1262
|
-
Uses public_token/public_key (Dashboard -> My account -> API).
|
|
1263
|
-
"""
|
|
1264
|
-
headers = self._build_gateway_headers()
|
|
1265
|
-
payload = {"start_time": str(start_time), "end_time": str(end_time)}
|
|
1266
|
-
|
|
1267
|
-
logger.info(f"Getting residential usage: {start_time} to {end_time}")
|
|
1268
|
-
|
|
1269
|
-
response = self._api_request_with_retry(
|
|
1270
|
-
"POST",
|
|
1271
|
-
f"{self._gateway_base_url}/usageRecord",
|
|
1272
|
-
headers=headers,
|
|
1273
|
-
data=payload,
|
|
1274
|
-
)
|
|
1275
|
-
response.raise_for_status()
|
|
1276
|
-
|
|
1277
|
-
data = response.json()
|
|
1278
|
-
code = data.get("code")
|
|
1279
|
-
|
|
1280
|
-
if code != 200:
|
|
1281
|
-
msg = extract_error_message(data)
|
|
1282
|
-
raise_for_code(f"Get usage failed: {msg}", code=code, payload=data)
|
|
1283
|
-
|
|
1284
|
-
return data.get("data", {})
|
|
741
|
+
if data.get("code") != 200:
|
|
742
|
+
raise_for_code("Usage stats error", code=data.get("code"), payload=data)
|
|
743
|
+
return UsageStatistics.from_dict(data.get("data", data))
|
|
1285
744
|
|
|
1286
745
|
def list_proxy_users(
|
|
1287
|
-
self, proxy_type:
|
|
746
|
+
self, proxy_type: ProxyType | int = ProxyType.RESIDENTIAL
|
|
1288
747
|
) -> ProxyUserList:
|
|
1289
|
-
"""
|
|
1290
|
-
List all proxy users (sub-accounts).
|
|
1291
|
-
|
|
1292
|
-
Args:
|
|
1293
|
-
proxy_type: Proxy type (1=Residential, 2=Unlimited).
|
|
1294
|
-
|
|
1295
|
-
Returns:
|
|
1296
|
-
ProxyUserList with user details.
|
|
1297
|
-
|
|
1298
|
-
Example:
|
|
1299
|
-
>>> users = client.list_proxy_users(proxy_type=ProxyType.RESIDENTIAL)
|
|
1300
|
-
>>> print(f"Total users: {users.user_count}")
|
|
1301
|
-
>>> for user in users.users:
|
|
1302
|
-
... print(f"{user.username}: {user.usage_gb():.2f} GB used")
|
|
1303
|
-
"""
|
|
1304
|
-
|
|
1305
748
|
self._require_public_credentials()
|
|
1306
|
-
|
|
749
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1307
750
|
params = {
|
|
1308
751
|
"token": self.public_token,
|
|
1309
752
|
"key": self.public_key,
|
|
1310
|
-
"proxy_type": str(
|
|
1311
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1312
|
-
),
|
|
753
|
+
"proxy_type": str(pt),
|
|
1313
754
|
}
|
|
1314
|
-
|
|
1315
|
-
logger.info(f"Listing proxy users: type={params['proxy_type']}")
|
|
1316
|
-
|
|
1317
755
|
response = self._api_request_with_retry(
|
|
1318
|
-
"GET",
|
|
1319
|
-
f"{self._proxy_users_url}/user-list",
|
|
1320
|
-
params=params,
|
|
756
|
+
"GET", f"{self._proxy_users_url}/user-list", params=params
|
|
1321
757
|
)
|
|
1322
758
|
response.raise_for_status()
|
|
1323
|
-
|
|
1324
759
|
data = response.json()
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
if code is not None and code != 200:
|
|
1329
|
-
msg = extract_error_message(data)
|
|
1330
|
-
raise_for_code(
|
|
1331
|
-
f"List proxy users error: {msg}", code=code, payload=data
|
|
1332
|
-
)
|
|
1333
|
-
|
|
1334
|
-
user_data = data.get("data", data)
|
|
1335
|
-
return ProxyUserList.from_dict(user_data)
|
|
1336
|
-
|
|
1337
|
-
raise ThordataNetworkError(
|
|
1338
|
-
f"Unexpected proxy users response: {type(data).__name__}",
|
|
1339
|
-
original_error=None,
|
|
1340
|
-
)
|
|
760
|
+
if data.get("code") != 200:
|
|
761
|
+
raise_for_code("List users error", code=data.get("code"), payload=data)
|
|
762
|
+
return ProxyUserList.from_dict(data.get("data", data))
|
|
1341
763
|
|
|
1342
764
|
def create_proxy_user(
|
|
1343
765
|
self,
|
|
1344
766
|
username: str,
|
|
1345
767
|
password: str,
|
|
1346
|
-
proxy_type:
|
|
768
|
+
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
1347
769
|
traffic_limit: int = 0,
|
|
1348
770
|
status: bool = True,
|
|
1349
|
-
) ->
|
|
1350
|
-
"""
|
|
1351
|
-
Create a new proxy user (sub-account).
|
|
1352
|
-
|
|
1353
|
-
Args:
|
|
1354
|
-
username: Username for the new user.
|
|
1355
|
-
password: Password for the new user.
|
|
1356
|
-
proxy_type: Proxy type (1=Residential, 2=Unlimited).
|
|
1357
|
-
traffic_limit: Traffic limit in MB (0 = unlimited, min 100).
|
|
1358
|
-
status: Enable/disable user (True/False).
|
|
1359
|
-
|
|
1360
|
-
Returns:
|
|
1361
|
-
API response data.
|
|
1362
|
-
|
|
1363
|
-
Example:
|
|
1364
|
-
>>> result = client.create_proxy_user(
|
|
1365
|
-
... username="subuser1",
|
|
1366
|
-
... password="securepass",
|
|
1367
|
-
... traffic_limit=5120, # 5GB
|
|
1368
|
-
... status=True
|
|
1369
|
-
... )
|
|
1370
|
-
"""
|
|
771
|
+
) -> dict[str, Any]:
|
|
1371
772
|
self._require_public_credentials()
|
|
1372
|
-
|
|
773
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1373
774
|
headers = build_public_api_headers(
|
|
1374
775
|
self.public_token or "", self.public_key or ""
|
|
1375
776
|
)
|
|
1376
|
-
|
|
1377
777
|
payload = {
|
|
1378
|
-
"proxy_type": str(
|
|
1379
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1380
|
-
),
|
|
778
|
+
"proxy_type": str(pt),
|
|
1381
779
|
"username": username,
|
|
1382
780
|
"password": password,
|
|
1383
781
|
"traffic_limit": str(traffic_limit),
|
|
1384
782
|
"status": "true" if status else "false",
|
|
1385
783
|
}
|
|
1386
|
-
|
|
1387
|
-
logger.info(f"Creating proxy user: {username}")
|
|
1388
|
-
|
|
1389
784
|
response = self._api_request_with_retry(
|
|
1390
785
|
"POST",
|
|
1391
786
|
f"{self._proxy_users_url}/create-user",
|
|
@@ -1393,428 +788,145 @@ class ThordataClient:
|
|
|
1393
788
|
headers=headers,
|
|
1394
789
|
)
|
|
1395
790
|
response.raise_for_status()
|
|
1396
|
-
|
|
1397
791
|
data = response.json()
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
if code != 200:
|
|
1401
|
-
msg = extract_error_message(data)
|
|
1402
|
-
raise_for_code(f"Create proxy user failed: {msg}", code=code, payload=data)
|
|
1403
|
-
|
|
792
|
+
if data.get("code") != 200:
|
|
793
|
+
raise_for_code("Create user failed", code=data.get("code"), payload=data)
|
|
1404
794
|
return data.get("data", {})
|
|
1405
795
|
|
|
1406
796
|
def add_whitelist_ip(
|
|
1407
797
|
self,
|
|
1408
798
|
ip: str,
|
|
1409
|
-
proxy_type:
|
|
799
|
+
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
1410
800
|
status: bool = True,
|
|
1411
|
-
) ->
|
|
1412
|
-
"""
|
|
1413
|
-
Add an IP to the whitelist for IP authentication.
|
|
1414
|
-
|
|
1415
|
-
Args:
|
|
1416
|
-
ip: IP address to whitelist.
|
|
1417
|
-
proxy_type: Proxy type (1=Residential, 2=Unlimited, 9=Mobile).
|
|
1418
|
-
status: Enable/disable the IP (True/False).
|
|
1419
|
-
|
|
1420
|
-
Returns:
|
|
1421
|
-
API response data.
|
|
1422
|
-
|
|
1423
|
-
Example:
|
|
1424
|
-
>>> result = client.add_whitelist_ip(
|
|
1425
|
-
... ip="123.45.67.89",
|
|
1426
|
-
... proxy_type=ProxyType.RESIDENTIAL,
|
|
1427
|
-
... status=True
|
|
1428
|
-
... )
|
|
1429
|
-
"""
|
|
801
|
+
) -> dict[str, Any]:
|
|
1430
802
|
self._require_public_credentials()
|
|
1431
|
-
|
|
803
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1432
804
|
headers = build_public_api_headers(
|
|
1433
805
|
self.public_token or "", self.public_key or ""
|
|
1434
806
|
)
|
|
1435
|
-
|
|
1436
|
-
# Convert ProxyType to int
|
|
1437
|
-
proxy_type_int = (
|
|
1438
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1439
|
-
)
|
|
1440
|
-
|
|
1441
807
|
payload = {
|
|
1442
|
-
"proxy_type": str(
|
|
808
|
+
"proxy_type": str(pt),
|
|
1443
809
|
"ip": ip,
|
|
1444
810
|
"status": "true" if status else "false",
|
|
1445
811
|
}
|
|
1446
|
-
|
|
1447
|
-
logger.info(f"Adding whitelist IP: {ip}")
|
|
1448
|
-
|
|
1449
812
|
response = self._api_request_with_retry(
|
|
1450
|
-
"POST",
|
|
1451
|
-
f"{self._whitelist_url}/add-ip",
|
|
1452
|
-
data=payload,
|
|
1453
|
-
headers=headers,
|
|
813
|
+
"POST", f"{self._whitelist_url}/add-ip", data=payload, headers=headers
|
|
1454
814
|
)
|
|
1455
815
|
response.raise_for_status()
|
|
1456
|
-
|
|
1457
816
|
data = response.json()
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
raise_for_code(f"Add whitelist IP failed: {msg}", code=code, payload=data)
|
|
1463
|
-
|
|
817
|
+
if data.get("code") != 200:
|
|
818
|
+
raise_for_code(
|
|
819
|
+
"Add whitelist IP failed", code=data.get("code"), payload=data
|
|
820
|
+
)
|
|
1464
821
|
return data.get("data", {})
|
|
1465
822
|
|
|
1466
|
-
def list_proxy_servers(
|
|
1467
|
-
self,
|
|
1468
|
-
proxy_type: int,
|
|
1469
|
-
) -> List[ProxyServer]:
|
|
1470
|
-
"""
|
|
1471
|
-
List ISP or Datacenter proxy servers.
|
|
1472
|
-
|
|
1473
|
-
Args:
|
|
1474
|
-
proxy_type: Proxy type (1=ISP, 2=Datacenter).
|
|
1475
|
-
|
|
1476
|
-
Returns:
|
|
1477
|
-
List of ProxyServer objects.
|
|
1478
|
-
|
|
1479
|
-
Example:
|
|
1480
|
-
>>> servers = client.list_proxy_servers(proxy_type=1) # ISP proxies
|
|
1481
|
-
>>> for server in servers:
|
|
1482
|
-
... print(f"{server.ip}:{server.port} - expires: {server.expiration_time}")
|
|
1483
|
-
"""
|
|
1484
|
-
|
|
823
|
+
def list_proxy_servers(self, proxy_type: int) -> list[ProxyServer]:
|
|
1485
824
|
self._require_public_credentials()
|
|
1486
|
-
|
|
1487
825
|
params = {
|
|
1488
826
|
"token": self.public_token,
|
|
1489
827
|
"key": self.public_key,
|
|
1490
828
|
"proxy_type": str(proxy_type),
|
|
1491
829
|
}
|
|
1492
|
-
|
|
1493
|
-
logger.info(f"Listing proxy servers: type={proxy_type}")
|
|
1494
|
-
|
|
1495
830
|
response = self._api_request_with_retry(
|
|
1496
|
-
"GET",
|
|
1497
|
-
self._proxy_list_url,
|
|
1498
|
-
params=params,
|
|
831
|
+
"GET", self._proxy_list_url, params=params
|
|
1499
832
|
)
|
|
1500
833
|
response.raise_for_status()
|
|
1501
|
-
|
|
1502
834
|
data = response.json()
|
|
835
|
+
if data.get("code") != 200:
|
|
836
|
+
raise_for_code(
|
|
837
|
+
"List proxy servers error", code=data.get("code"), payload=data
|
|
838
|
+
)
|
|
1503
839
|
|
|
840
|
+
server_list = []
|
|
1504
841
|
if isinstance(data, dict):
|
|
1505
|
-
code = data.get("code")
|
|
1506
|
-
if code is not None and code != 200:
|
|
1507
|
-
msg = extract_error_message(data)
|
|
1508
|
-
raise_for_code(
|
|
1509
|
-
f"List proxy servers error: {msg}", code=code, payload=data
|
|
1510
|
-
)
|
|
1511
|
-
|
|
1512
|
-
# Extract list from data field
|
|
1513
842
|
server_list = data.get("data", data.get("list", []))
|
|
1514
843
|
elif isinstance(data, list):
|
|
1515
844
|
server_list = data
|
|
1516
|
-
else:
|
|
1517
|
-
raise ThordataNetworkError(
|
|
1518
|
-
f"Unexpected proxy list response: {type(data).__name__}",
|
|
1519
|
-
original_error=None,
|
|
1520
|
-
)
|
|
1521
845
|
|
|
1522
846
|
return [ProxyServer.from_dict(s) for s in server_list]
|
|
1523
847
|
|
|
1524
|
-
def get_isp_regions(self) -> List[Dict[str, Any]]:
|
|
1525
|
-
"""
|
|
1526
|
-
Get available ISP proxy regions.
|
|
1527
|
-
|
|
1528
|
-
Uses public_token/public_key (Dashboard -> My account -> API).
|
|
1529
|
-
"""
|
|
1530
|
-
headers = self._build_gateway_headers()
|
|
1531
|
-
|
|
1532
|
-
logger.info("Getting ISP regions")
|
|
1533
|
-
|
|
1534
|
-
response = self._api_request_with_retry(
|
|
1535
|
-
"POST",
|
|
1536
|
-
f"{self._gateway_base_url}/getRegionIsp",
|
|
1537
|
-
headers=headers,
|
|
1538
|
-
data={},
|
|
1539
|
-
)
|
|
1540
|
-
response.raise_for_status()
|
|
1541
|
-
|
|
1542
|
-
data = response.json()
|
|
1543
|
-
code = data.get("code")
|
|
1544
|
-
|
|
1545
|
-
if code != 200:
|
|
1546
|
-
msg = extract_error_message(data)
|
|
1547
|
-
raise_for_code(f"Get ISP regions failed: {msg}", code=code, payload=data)
|
|
1548
|
-
|
|
1549
|
-
return data.get("data", [])
|
|
1550
|
-
|
|
1551
|
-
def list_isp_proxies(self) -> List[Dict[str, Any]]:
|
|
1552
|
-
"""
|
|
1553
|
-
List ISP proxies.
|
|
1554
|
-
|
|
1555
|
-
Uses public_token/public_key (Dashboard -> My account -> API).
|
|
1556
|
-
"""
|
|
1557
|
-
headers = self._build_gateway_headers()
|
|
1558
|
-
|
|
1559
|
-
logger.info("Listing ISP proxies")
|
|
1560
|
-
|
|
1561
|
-
response = self._api_request_with_retry(
|
|
1562
|
-
"POST",
|
|
1563
|
-
f"{self._gateway_base_url}/queryListIsp",
|
|
1564
|
-
headers=headers,
|
|
1565
|
-
data={},
|
|
1566
|
-
)
|
|
1567
|
-
response.raise_for_status()
|
|
1568
|
-
|
|
1569
|
-
data = response.json()
|
|
1570
|
-
code = data.get("code")
|
|
1571
|
-
|
|
1572
|
-
if code != 200:
|
|
1573
|
-
msg = extract_error_message(data)
|
|
1574
|
-
raise_for_code(f"List ISP proxies failed: {msg}", code=code, payload=data)
|
|
1575
|
-
|
|
1576
|
-
return data.get("data", [])
|
|
1577
|
-
|
|
1578
|
-
def get_wallet_balance(self) -> Dict[str, Any]:
|
|
1579
|
-
"""
|
|
1580
|
-
Get wallet balance for ISP proxies.
|
|
1581
|
-
|
|
1582
|
-
Uses public_token/public_key (Dashboard -> My account -> API).
|
|
1583
|
-
"""
|
|
1584
|
-
headers = self._build_gateway_headers()
|
|
1585
|
-
|
|
1586
|
-
logger.info("Getting wallet balance")
|
|
1587
|
-
|
|
1588
|
-
response = self._api_request_with_retry(
|
|
1589
|
-
"POST",
|
|
1590
|
-
f"{self._gateway_base_url}/getBalance",
|
|
1591
|
-
headers=headers,
|
|
1592
|
-
data={},
|
|
1593
|
-
)
|
|
1594
|
-
response.raise_for_status()
|
|
1595
|
-
|
|
1596
|
-
data = response.json()
|
|
1597
|
-
code = data.get("code")
|
|
1598
|
-
|
|
1599
|
-
if code != 200:
|
|
1600
|
-
msg = extract_error_message(data)
|
|
1601
|
-
raise_for_code(f"Get wallet balance failed: {msg}", code=code, payload=data)
|
|
1602
|
-
|
|
1603
|
-
return data.get("data", {})
|
|
1604
|
-
|
|
1605
848
|
def get_proxy_expiration(
|
|
1606
|
-
self,
|
|
1607
|
-
|
|
1608
|
-
proxy_type: int,
|
|
1609
|
-
) -> Dict[str, Any]:
|
|
1610
|
-
"""
|
|
1611
|
-
Get expiration time for specific proxy IPs.
|
|
1612
|
-
|
|
1613
|
-
Args:
|
|
1614
|
-
ips: Single IP or list of IPs to check.
|
|
1615
|
-
proxy_type: Proxy type (1=ISP, 2=Datacenter).
|
|
1616
|
-
|
|
1617
|
-
Returns:
|
|
1618
|
-
Dict with expiration information.
|
|
1619
|
-
|
|
1620
|
-
Example:
|
|
1621
|
-
>>> result = client.get_proxy_expiration("123.45.67.89", proxy_type=1)
|
|
1622
|
-
>>> print(result)
|
|
1623
|
-
"""
|
|
849
|
+
self, ips: str | list[str], proxy_type: int
|
|
850
|
+
) -> dict[str, Any]:
|
|
1624
851
|
self._require_public_credentials()
|
|
1625
|
-
|
|
1626
|
-
# Convert list to comma-separated string
|
|
1627
852
|
if isinstance(ips, list):
|
|
1628
853
|
ips = ",".join(ips)
|
|
1629
|
-
|
|
1630
854
|
params = {
|
|
1631
855
|
"token": self.public_token,
|
|
1632
856
|
"key": self.public_key,
|
|
1633
857
|
"proxy_type": str(proxy_type),
|
|
1634
858
|
"ips": ips,
|
|
1635
859
|
}
|
|
1636
|
-
|
|
1637
|
-
logger.info(f"Getting proxy expiration: {ips}")
|
|
1638
|
-
|
|
1639
860
|
response = self._api_request_with_retry(
|
|
1640
|
-
"GET",
|
|
1641
|
-
self._proxy_expiration_url,
|
|
1642
|
-
params=params,
|
|
861
|
+
"GET", self._proxy_expiration_url, params=params
|
|
1643
862
|
)
|
|
1644
863
|
response.raise_for_status()
|
|
1645
|
-
|
|
1646
864
|
data = response.json()
|
|
865
|
+
if data.get("code") != 200:
|
|
866
|
+
raise_for_code("Get expiration error", code=data.get("code"), payload=data)
|
|
867
|
+
return data.get("data", data)
|
|
1647
868
|
|
|
1648
|
-
if isinstance(data, dict):
|
|
1649
|
-
code = data.get("code")
|
|
1650
|
-
if code is not None and code != 200:
|
|
1651
|
-
msg = extract_error_message(data)
|
|
1652
|
-
raise_for_code(f"Get expiration error: {msg}", code=code, payload=data)
|
|
1653
|
-
|
|
1654
|
-
return data.get("data", data)
|
|
1655
|
-
|
|
1656
|
-
return data
|
|
1657
|
-
|
|
1658
|
-
# =========================================================================
|
|
1659
|
-
# Location API Methods (Country/State/City/ASN functions)
|
|
1660
|
-
# =========================================================================
|
|
1661
869
|
def list_countries(
|
|
1662
|
-
self, proxy_type:
|
|
1663
|
-
) ->
|
|
1664
|
-
|
|
1665
|
-
|
|
1666
|
-
|
|
1667
|
-
Args:
|
|
1668
|
-
proxy_type: 1 for residential, 2 for unlimited.
|
|
1669
|
-
|
|
1670
|
-
Returns:
|
|
1671
|
-
List of country records with 'country_code' and 'country_name'.
|
|
1672
|
-
"""
|
|
1673
|
-
return self._get_locations(
|
|
1674
|
-
"countries",
|
|
1675
|
-
proxy_type=(
|
|
1676
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1677
|
-
),
|
|
1678
|
-
)
|
|
870
|
+
self, proxy_type: ProxyType | int = ProxyType.RESIDENTIAL
|
|
871
|
+
) -> list[dict[str, Any]]:
|
|
872
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
873
|
+
return self._get_locations("countries", proxy_type=pt)
|
|
1679
874
|
|
|
1680
875
|
def list_states(
|
|
1681
876
|
self,
|
|
1682
877
|
country_code: str,
|
|
1683
|
-
proxy_type:
|
|
1684
|
-
) ->
|
|
1685
|
-
|
|
1686
|
-
|
|
1687
|
-
|
|
1688
|
-
Args:
|
|
1689
|
-
country_code: Country code (e.g., 'US').
|
|
1690
|
-
proxy_type: Proxy type.
|
|
1691
|
-
|
|
1692
|
-
Returns:
|
|
1693
|
-
List of state records.
|
|
1694
|
-
"""
|
|
1695
|
-
return self._get_locations(
|
|
1696
|
-
"states",
|
|
1697
|
-
proxy_type=(
|
|
1698
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1699
|
-
),
|
|
1700
|
-
country_code=country_code,
|
|
1701
|
-
)
|
|
878
|
+
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
879
|
+
) -> list[dict[str, Any]]:
|
|
880
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
881
|
+
return self._get_locations("states", proxy_type=pt, country_code=country_code)
|
|
1702
882
|
|
|
1703
883
|
def list_cities(
|
|
1704
884
|
self,
|
|
1705
885
|
country_code: str,
|
|
1706
|
-
state_code:
|
|
1707
|
-
proxy_type:
|
|
1708
|
-
) ->
|
|
1709
|
-
|
|
1710
|
-
|
|
1711
|
-
|
|
1712
|
-
Args:
|
|
1713
|
-
country_code: Country code.
|
|
1714
|
-
state_code: Optional state code.
|
|
1715
|
-
proxy_type: Proxy type.
|
|
1716
|
-
|
|
1717
|
-
Returns:
|
|
1718
|
-
List of city records.
|
|
1719
|
-
"""
|
|
1720
|
-
kwargs = {
|
|
1721
|
-
"proxy_type": (
|
|
1722
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1723
|
-
),
|
|
1724
|
-
"country_code": country_code,
|
|
1725
|
-
}
|
|
886
|
+
state_code: str | None = None,
|
|
887
|
+
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
888
|
+
) -> list[dict[str, Any]]:
|
|
889
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
890
|
+
kwargs = {"proxy_type": pt, "country_code": country_code}
|
|
1726
891
|
if state_code:
|
|
1727
892
|
kwargs["state_code"] = state_code
|
|
1728
|
-
|
|
1729
893
|
return self._get_locations("cities", **kwargs)
|
|
1730
894
|
|
|
1731
895
|
def list_asn(
|
|
1732
896
|
self,
|
|
1733
897
|
country_code: str,
|
|
1734
|
-
proxy_type:
|
|
1735
|
-
) ->
|
|
1736
|
-
|
|
1737
|
-
|
|
1738
|
-
|
|
1739
|
-
Args:
|
|
1740
|
-
country_code: Country code.
|
|
1741
|
-
proxy_type: Proxy type.
|
|
1742
|
-
|
|
1743
|
-
Returns:
|
|
1744
|
-
List of ASN records.
|
|
1745
|
-
"""
|
|
1746
|
-
return self._get_locations(
|
|
1747
|
-
"asn",
|
|
1748
|
-
proxy_type=(
|
|
1749
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1750
|
-
),
|
|
1751
|
-
country_code=country_code,
|
|
1752
|
-
)
|
|
898
|
+
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
899
|
+
) -> list[dict[str, Any]]:
|
|
900
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
901
|
+
return self._get_locations("asn", proxy_type=pt, country_code=country_code)
|
|
1753
902
|
|
|
1754
|
-
def _get_locations(self, endpoint: str, **kwargs: Any) ->
|
|
1755
|
-
"""Internal method to call locations API."""
|
|
903
|
+
def _get_locations(self, endpoint: str, **kwargs: Any) -> list[dict[str, Any]]:
|
|
1756
904
|
self._require_public_credentials()
|
|
905
|
+
params = {"token": self.public_token, "key": self.public_key}
|
|
906
|
+
for k, v in kwargs.items():
|
|
907
|
+
params[k] = str(v)
|
|
1757
908
|
|
|
1758
|
-
params = {
|
|
1759
|
-
"token": self.public_token,
|
|
1760
|
-
"key": self.public_key,
|
|
1761
|
-
}
|
|
1762
|
-
|
|
1763
|
-
for key, value in kwargs.items():
|
|
1764
|
-
params[key] = str(value)
|
|
1765
|
-
|
|
1766
|
-
url = f"{self._locations_base_url}/{endpoint}"
|
|
1767
|
-
|
|
1768
|
-
logger.debug(f"Locations API request: {url}")
|
|
1769
|
-
|
|
1770
|
-
# Use requests.get directly (no proxy needed for this API)
|
|
1771
909
|
response = self._api_request_with_retry(
|
|
1772
|
-
"GET",
|
|
1773
|
-
url,
|
|
1774
|
-
params=params,
|
|
910
|
+
"GET", f"{self._locations_base_url}/{endpoint}", params=params
|
|
1775
911
|
)
|
|
1776
912
|
response.raise_for_status()
|
|
1777
|
-
|
|
1778
913
|
data = response.json()
|
|
1779
|
-
|
|
1780
914
|
if isinstance(data, dict):
|
|
1781
|
-
|
|
1782
|
-
|
|
1783
|
-
msg = data.get("msg", "")
|
|
1784
|
-
raise RuntimeError(
|
|
1785
|
-
f"Locations API error ({endpoint}): code={code}, msg={msg}"
|
|
1786
|
-
)
|
|
915
|
+
if data.get("code") != 200:
|
|
916
|
+
raise RuntimeError(f"Locations error: {data.get('msg')}")
|
|
1787
917
|
return data.get("data") or []
|
|
918
|
+
return data if isinstance(data, list) else []
|
|
1788
919
|
|
|
1789
|
-
if isinstance(data, list):
|
|
1790
|
-
return data
|
|
1791
|
-
|
|
1792
|
-
return []
|
|
1793
|
-
|
|
1794
|
-
# =========================================================================
|
|
1795
|
-
# Helper Methods (Internal utility functions)
|
|
1796
|
-
# =========================================================================
|
|
1797
920
|
def _require_public_credentials(self) -> None:
|
|
1798
|
-
"""Ensure public API credentials are available."""
|
|
1799
921
|
if not self.public_token or not self.public_key:
|
|
1800
922
|
raise ThordataConfigError(
|
|
1801
|
-
"public_token and public_key are required for this operation.
|
|
1802
|
-
"Please provide them when initializing ThordataClient."
|
|
923
|
+
"public_token and public_key are required for this operation."
|
|
1803
924
|
)
|
|
1804
925
|
|
|
1805
926
|
def _get_proxy_endpoint_overrides(
|
|
1806
927
|
self, product: ProxyProduct
|
|
1807
|
-
) -> tuple[
|
|
1808
|
-
|
|
1809
|
-
Read proxy endpoint overrides from env.
|
|
1810
|
-
|
|
1811
|
-
Priority:
|
|
1812
|
-
1) THORDATA_<PRODUCT>_PROXY_HOST/PORT/PROTOCOL
|
|
1813
|
-
2) THORDATA_PROXY_HOST/PORT/PROTOCOL
|
|
1814
|
-
3) defaults (host/port None => ProxyConfig will use its product defaults)
|
|
1815
|
-
"""
|
|
1816
|
-
prefix = product.value.upper() # RESIDENTIAL / DATACENTER / MOBILE / ISP
|
|
1817
|
-
|
|
928
|
+
) -> tuple[str | None, int | None, str]:
|
|
929
|
+
prefix = product.value.upper()
|
|
1818
930
|
host = os.getenv(f"THORDATA_{prefix}_PROXY_HOST") or os.getenv(
|
|
1819
931
|
"THORDATA_PROXY_HOST"
|
|
1820
932
|
)
|
|
@@ -1826,184 +938,37 @@ class ThordataClient:
|
|
|
1826
938
|
or os.getenv("THORDATA_PROXY_PROTOCOL")
|
|
1827
939
|
or "http"
|
|
1828
940
|
)
|
|
1829
|
-
|
|
1830
|
-
port: Optional[int] = None
|
|
1831
|
-
if port_raw:
|
|
1832
|
-
try:
|
|
1833
|
-
port = int(port_raw)
|
|
1834
|
-
except ValueError:
|
|
1835
|
-
port = None
|
|
1836
|
-
|
|
941
|
+
port = int(port_raw) if port_raw and port_raw.isdigit() else None
|
|
1837
942
|
return host or None, port, protocol
|
|
1838
943
|
|
|
1839
|
-
def _get_default_proxy_config_from_env(self) ->
|
|
1840
|
-
|
|
1841
|
-
|
|
1842
|
-
|
|
1843
|
-
|
|
1844
|
-
|
|
1845
|
-
|
|
1846
|
-
|
|
1847
|
-
|
|
1848
|
-
|
|
1849
|
-
|
|
1850
|
-
|
|
1851
|
-
|
|
1852
|
-
|
|
1853
|
-
|
|
1854
|
-
|
|
1855
|
-
|
|
1856
|
-
|
|
1857
|
-
password=p,
|
|
1858
|
-
product=ProxyProduct.RESIDENTIAL,
|
|
1859
|
-
host=host,
|
|
1860
|
-
port=port,
|
|
1861
|
-
protocol=protocol,
|
|
1862
|
-
)
|
|
1863
|
-
|
|
1864
|
-
# Datacenter
|
|
1865
|
-
u = os.getenv("THORDATA_DATACENTER_USERNAME")
|
|
1866
|
-
p = os.getenv("THORDATA_DATACENTER_PASSWORD")
|
|
1867
|
-
if u and p:
|
|
1868
|
-
host, port, protocol = self._get_proxy_endpoint_overrides(
|
|
1869
|
-
ProxyProduct.DATACENTER
|
|
1870
|
-
)
|
|
1871
|
-
return ProxyConfig(
|
|
1872
|
-
username=u,
|
|
1873
|
-
password=p,
|
|
1874
|
-
product=ProxyProduct.DATACENTER,
|
|
1875
|
-
host=host,
|
|
1876
|
-
port=port,
|
|
1877
|
-
protocol=protocol,
|
|
1878
|
-
)
|
|
1879
|
-
|
|
1880
|
-
# Mobile
|
|
1881
|
-
u = os.getenv("THORDATA_MOBILE_USERNAME")
|
|
1882
|
-
p = os.getenv("THORDATA_MOBILE_PASSWORD")
|
|
1883
|
-
if u and p:
|
|
1884
|
-
host, port, protocol = self._get_proxy_endpoint_overrides(
|
|
1885
|
-
ProxyProduct.MOBILE
|
|
1886
|
-
)
|
|
1887
|
-
return ProxyConfig(
|
|
1888
|
-
username=u,
|
|
1889
|
-
password=p,
|
|
1890
|
-
product=ProxyProduct.MOBILE,
|
|
1891
|
-
host=host,
|
|
1892
|
-
port=port,
|
|
1893
|
-
protocol=protocol,
|
|
1894
|
-
)
|
|
1895
|
-
|
|
1896
|
-
return None
|
|
1897
|
-
|
|
1898
|
-
def _build_gateway_headers(self) -> Dict[str, str]:
|
|
1899
|
-
"""
|
|
1900
|
-
Build headers for legacy gateway-style endpoints.
|
|
1901
|
-
|
|
1902
|
-
IMPORTANT:
|
|
1903
|
-
- SDK does NOT expose "sign/apiKey" as a separate credential model.
|
|
1904
|
-
- Values ALWAYS come from public_token/public_key.
|
|
1905
|
-
- Some backend endpoints may still expect header field names "sign" and "apiKey".
|
|
1906
|
-
"""
|
|
1907
|
-
self._require_public_credentials()
|
|
1908
|
-
return {
|
|
1909
|
-
"sign": self.public_token or "",
|
|
1910
|
-
"apiKey": self.public_key or "",
|
|
1911
|
-
"Content-Type": "application/x-www-form-urlencoded",
|
|
1912
|
-
}
|
|
1913
|
-
|
|
1914
|
-
def _proxy_request_with_proxy_manager(
|
|
1915
|
-
self,
|
|
1916
|
-
method: str,
|
|
1917
|
-
url: str,
|
|
1918
|
-
*,
|
|
1919
|
-
proxy_config: ProxyConfig,
|
|
1920
|
-
timeout: int,
|
|
1921
|
-
headers: Optional[Dict[str, str]] = None,
|
|
1922
|
-
params: Optional[Dict[str, Any]] = None,
|
|
1923
|
-
data: Any = None,
|
|
1924
|
-
) -> requests.Response:
|
|
1925
|
-
"""
|
|
1926
|
-
Proxy Network request implemented via urllib3.ProxyManager.
|
|
1927
|
-
|
|
1928
|
-
This is required to reliably support HTTPS proxy endpoints like:
|
|
1929
|
-
https://<endpoint>.pr.thordata.net:9999
|
|
1930
|
-
"""
|
|
1931
|
-
# Build final URL (include query params)
|
|
1932
|
-
req = requests.Request(method=method.upper(), url=url, params=params)
|
|
1933
|
-
prepped = self._proxy_session.prepare_request(req)
|
|
1934
|
-
final_url = prepped.url or url
|
|
1935
|
-
|
|
1936
|
-
proxy_url = proxy_config.build_proxy_endpoint()
|
|
1937
|
-
proxy_headers = urllib3.make_headers(
|
|
1938
|
-
proxy_basic_auth=proxy_config.build_proxy_basic_auth()
|
|
1939
|
-
)
|
|
1940
|
-
|
|
1941
|
-
pm = urllib3.ProxyManager(
|
|
1942
|
-
proxy_url,
|
|
1943
|
-
proxy_headers=proxy_headers,
|
|
1944
|
-
proxy_ssl_context=(
|
|
1945
|
-
ssl.create_default_context()
|
|
1946
|
-
if proxy_url.startswith("https://")
|
|
1947
|
-
else None
|
|
1948
|
-
),
|
|
1949
|
-
)
|
|
1950
|
-
|
|
1951
|
-
# Encode form data if dict
|
|
1952
|
-
body = None
|
|
1953
|
-
req_headers = dict(headers or {})
|
|
1954
|
-
if data is not None:
|
|
1955
|
-
if isinstance(data, dict):
|
|
1956
|
-
# form-urlencoded
|
|
1957
|
-
body = urlencode({k: str(v) for k, v in data.items()})
|
|
1958
|
-
req_headers.setdefault(
|
|
1959
|
-
"Content-Type", "application/x-www-form-urlencoded"
|
|
944
|
+
def _get_default_proxy_config_from_env(self) -> ProxyConfig | None:
|
|
945
|
+
for prod in [
|
|
946
|
+
ProxyProduct.RESIDENTIAL,
|
|
947
|
+
ProxyProduct.DATACENTER,
|
|
948
|
+
ProxyProduct.MOBILE,
|
|
949
|
+
]:
|
|
950
|
+
prefix = prod.value.upper()
|
|
951
|
+
u = os.getenv(f"THORDATA_{prefix}_USERNAME")
|
|
952
|
+
p = os.getenv(f"THORDATA_{prefix}_PASSWORD")
|
|
953
|
+
if u and p:
|
|
954
|
+
h, port, proto = self._get_proxy_endpoint_overrides(prod)
|
|
955
|
+
return ProxyConfig(
|
|
956
|
+
username=u,
|
|
957
|
+
password=p,
|
|
958
|
+
product=prod,
|
|
959
|
+
host=h,
|
|
960
|
+
port=port,
|
|
961
|
+
protocol=proto,
|
|
1960
962
|
)
|
|
1961
|
-
|
|
1962
|
-
body = data
|
|
1963
|
-
|
|
1964
|
-
http_resp = pm.request(
|
|
1965
|
-
method.upper(),
|
|
1966
|
-
final_url,
|
|
1967
|
-
body=body,
|
|
1968
|
-
headers=req_headers or None,
|
|
1969
|
-
timeout=urllib3.Timeout(connect=timeout, read=timeout),
|
|
1970
|
-
retries=False,
|
|
1971
|
-
preload_content=True,
|
|
1972
|
-
)
|
|
1973
|
-
|
|
1974
|
-
# Convert urllib3 response -> requests.Response (keep your API stable)
|
|
1975
|
-
r = requests.Response()
|
|
1976
|
-
r.status_code = int(getattr(http_resp, "status", 0) or 0)
|
|
1977
|
-
r._content = http_resp.data or b""
|
|
1978
|
-
r.url = final_url
|
|
1979
|
-
r.headers = requests.structures.CaseInsensitiveDict(
|
|
1980
|
-
dict(http_resp.headers or {})
|
|
1981
|
-
)
|
|
1982
|
-
return r
|
|
1983
|
-
|
|
1984
|
-
def _request_with_retry(
|
|
1985
|
-
self, method: str, url: str, **kwargs: Any
|
|
1986
|
-
) -> requests.Response:
|
|
1987
|
-
"""Make a request with automatic retry."""
|
|
1988
|
-
kwargs.setdefault("timeout", self._default_timeout)
|
|
1989
|
-
|
|
1990
|
-
@with_retry(self._retry_config)
|
|
1991
|
-
def _do_request() -> requests.Response:
|
|
1992
|
-
return self._proxy_session.request(method, url, **kwargs)
|
|
1993
|
-
|
|
1994
|
-
try:
|
|
1995
|
-
return _do_request()
|
|
1996
|
-
except requests.Timeout as e:
|
|
1997
|
-
raise ThordataTimeoutError(
|
|
1998
|
-
f"Request timed out: {e}", original_error=e
|
|
1999
|
-
) from e
|
|
2000
|
-
except requests.RequestException as e:
|
|
2001
|
-
raise ThordataNetworkError(f"Request failed: {e}", original_error=e) from e
|
|
963
|
+
return None
|
|
2002
964
|
|
|
2003
965
|
def close(self) -> None:
|
|
2004
|
-
"""Close the underlying session."""
|
|
2005
966
|
self._proxy_session.close()
|
|
2006
967
|
self._api_session.close()
|
|
968
|
+
# Clean up connection pools
|
|
969
|
+
for pm in self._proxy_managers.values():
|
|
970
|
+
pm.clear()
|
|
971
|
+
self._proxy_managers.clear()
|
|
2007
972
|
|
|
2008
973
|
def __enter__(self) -> ThordataClient:
|
|
2009
974
|
return self
|