thordata-sdk 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thordata/__init__.py +1 -1
- thordata/_utils.py +16 -13
- thordata/client.py +222 -1254
- {thordata_sdk-1.0.0.dist-info → thordata_sdk-1.0.1.dist-info}/METADATA +1 -1
- {thordata_sdk-1.0.0.dist-info → thordata_sdk-1.0.1.dist-info}/RECORD +8 -8
- {thordata_sdk-1.0.0.dist-info → thordata_sdk-1.0.1.dist-info}/WHEEL +0 -0
- {thordata_sdk-1.0.0.dist-info → thordata_sdk-1.0.1.dist-info}/licenses/LICENSE +0 -0
- {thordata_sdk-1.0.0.dist-info → thordata_sdk-1.0.1.dist-info}/top_level.txt +0 -0
thordata/client.py
CHANGED
|
@@ -69,32 +69,6 @@ logger = logging.getLogger(__name__)
|
|
|
69
69
|
|
|
70
70
|
|
|
71
71
|
class ThordataClient:
|
|
72
|
-
"""
|
|
73
|
-
The official synchronous Python client for Thordata.
|
|
74
|
-
|
|
75
|
-
This client handles authentication and communication with:
|
|
76
|
-
- Proxy Network (Residential/Datacenter/Mobile/ISP via HTTP/HTTPS)
|
|
77
|
-
- SERP API (Real-time Search Engine Results)
|
|
78
|
-
- Universal Scraping API (Web Unlocker - Single Page Rendering)
|
|
79
|
-
- Web Scraper API (Async Task Management)
|
|
80
|
-
|
|
81
|
-
Args:
|
|
82
|
-
scraper_token: The API token from your Dashboard.
|
|
83
|
-
public_token: The public API token (for task status, locations).
|
|
84
|
-
public_key: The public API key.
|
|
85
|
-
proxy_host: Custom proxy gateway host (optional).
|
|
86
|
-
proxy_port: Custom proxy gateway port (optional).
|
|
87
|
-
timeout: Default request timeout in seconds (default: 30).
|
|
88
|
-
retry_config: Configuration for automatic retries (optional).
|
|
89
|
-
|
|
90
|
-
Example:
|
|
91
|
-
>>> client = ThordataClient(
|
|
92
|
-
... scraper_token="your_scraper_token",
|
|
93
|
-
... public_token="your_public_token",
|
|
94
|
-
... public_key="your_public_key"
|
|
95
|
-
... )
|
|
96
|
-
"""
|
|
97
|
-
|
|
98
72
|
# API Endpoints
|
|
99
73
|
BASE_URL = "https://scraperapi.thordata.com"
|
|
100
74
|
UNIVERSAL_URL = "https://universalapi.thordata.com"
|
|
@@ -144,22 +118,21 @@ class ThordataClient:
|
|
|
144
118
|
f"Invalid auth_mode: {auth_mode}. Must be 'bearer' or 'header_token'."
|
|
145
119
|
)
|
|
146
120
|
|
|
147
|
-
#
|
|
148
|
-
# - _proxy_session: used for proxy network traffic to target sites
|
|
149
|
-
# - _api_session: used for Thordata APIs (SERP/Universal/Tasks/Locations)
|
|
150
|
-
#
|
|
151
|
-
# We intentionally do NOT set session-level proxies for _api_session,
|
|
152
|
-
# so developers can rely on system proxy settings (e.g., Clash) via env vars.
|
|
121
|
+
# HTTP Sessions
|
|
153
122
|
self._proxy_session = requests.Session()
|
|
154
123
|
self._proxy_session.trust_env = False
|
|
155
124
|
|
|
125
|
+
# Cache for ProxyManagers (Connection Pooling Fix)
|
|
126
|
+
# Key: proxy_url (str), Value: urllib3.ProxyManager
|
|
127
|
+
self._proxy_managers: Dict[str, urllib3.ProxyManager] = {}
|
|
128
|
+
|
|
156
129
|
self._api_session = requests.Session()
|
|
157
130
|
self._api_session.trust_env = True
|
|
158
131
|
self._api_session.headers.update(
|
|
159
132
|
{"User-Agent": build_user_agent(_sdk_version, "requests")}
|
|
160
133
|
)
|
|
161
134
|
|
|
162
|
-
# Base URLs
|
|
135
|
+
# Base URLs
|
|
163
136
|
scraperapi_base = (
|
|
164
137
|
scraperapi_base_url
|
|
165
138
|
or os.getenv("THORDATA_SCRAPERAPI_BASE_URL")
|
|
@@ -184,15 +157,13 @@ class ThordataClient:
|
|
|
184
157
|
or self.LOCATIONS_URL
|
|
185
158
|
).rstrip("/")
|
|
186
159
|
|
|
187
|
-
# These URLs exist in your codebase; keep them for now (even if your org later migrates fully to openapi)
|
|
188
160
|
gateway_base = os.getenv(
|
|
189
161
|
"THORDATA_GATEWAY_BASE_URL", "https://api.thordata.com/api/gateway"
|
|
190
162
|
)
|
|
191
|
-
|
|
163
|
+
self._gateway_base_url = gateway_base
|
|
164
|
+
self._child_base_url = os.getenv(
|
|
192
165
|
"THORDATA_CHILD_BASE_URL", "https://api.thordata.com/api/child"
|
|
193
166
|
)
|
|
194
|
-
self._gateway_base_url = gateway_base
|
|
195
|
-
self._child_base_url = child_base
|
|
196
167
|
|
|
197
168
|
self._serp_url = f"{scraperapi_base}/request"
|
|
198
169
|
self._builder_url = f"{scraperapi_base}/builder"
|
|
@@ -205,7 +176,6 @@ class ThordataClient:
|
|
|
205
176
|
|
|
206
177
|
self._locations_base_url = locations_base
|
|
207
178
|
|
|
208
|
-
# These 2 lines keep your existing behavior (derive account endpoints from locations_base)
|
|
209
179
|
self._usage_stats_url = (
|
|
210
180
|
f"{locations_base.replace('/locations', '')}/account/usage-statistics"
|
|
211
181
|
)
|
|
@@ -225,7 +195,7 @@ class ThordataClient:
|
|
|
225
195
|
self._proxy_expiration_url = f"{proxy_api_base}/proxy/expiration-time"
|
|
226
196
|
|
|
227
197
|
# =========================================================================
|
|
228
|
-
# Proxy Network Methods
|
|
198
|
+
# Proxy Network Methods
|
|
229
199
|
# =========================================================================
|
|
230
200
|
def get(
|
|
231
201
|
self,
|
|
@@ -235,67 +205,8 @@ class ThordataClient:
|
|
|
235
205
|
timeout: Optional[int] = None,
|
|
236
206
|
**kwargs: Any,
|
|
237
207
|
) -> requests.Response:
|
|
238
|
-
"""
|
|
239
|
-
Send a GET request through the Thordata Proxy Network.
|
|
240
|
-
|
|
241
|
-
Args:
|
|
242
|
-
url: The target URL.
|
|
243
|
-
proxy_config: Custom proxy configuration for geo-targeting/sessions.
|
|
244
|
-
timeout: Request timeout in seconds.
|
|
245
|
-
**kwargs: Additional arguments to pass to requests.get().
|
|
246
|
-
|
|
247
|
-
Returns:
|
|
248
|
-
The response object.
|
|
249
|
-
|
|
250
|
-
Example:
|
|
251
|
-
>>> # Basic request
|
|
252
|
-
>>> response = client.get("https://httpbin.org/ip")
|
|
253
|
-
>>>
|
|
254
|
-
>>> # With geo-targeting
|
|
255
|
-
>>> from thordata.models import ProxyConfig
|
|
256
|
-
>>> config = ProxyConfig(
|
|
257
|
-
... username="myuser",
|
|
258
|
-
... password="mypass",
|
|
259
|
-
... country="us",
|
|
260
|
-
... city="seattle"
|
|
261
|
-
... )
|
|
262
|
-
>>> response = client.get("https://httpbin.org/ip", proxy_config=config)
|
|
263
|
-
"""
|
|
264
208
|
logger.debug(f"Proxy GET request: {url}")
|
|
265
|
-
|
|
266
|
-
timeout = timeout or self._default_timeout
|
|
267
|
-
|
|
268
|
-
if proxy_config is None:
|
|
269
|
-
proxy_config = self._get_default_proxy_config_from_env()
|
|
270
|
-
|
|
271
|
-
if proxy_config is None:
|
|
272
|
-
raise ThordataConfigError(
|
|
273
|
-
"Proxy credentials are missing. "
|
|
274
|
-
"Pass proxy_config=ProxyConfig(username=..., password=..., product=...) "
|
|
275
|
-
"or set THORDATA_RESIDENTIAL_USERNAME/THORDATA_RESIDENTIAL_PASSWORD (or DATACENTER/MOBILE)."
|
|
276
|
-
)
|
|
277
|
-
|
|
278
|
-
kwargs["proxies"] = proxy_config.to_proxies_dict()
|
|
279
|
-
|
|
280
|
-
@with_retry(self._retry_config)
|
|
281
|
-
def _do() -> requests.Response:
|
|
282
|
-
return self._proxy_request_with_proxy_manager(
|
|
283
|
-
"GET",
|
|
284
|
-
url,
|
|
285
|
-
proxy_config=proxy_config,
|
|
286
|
-
timeout=timeout,
|
|
287
|
-
headers=kwargs.pop("headers", None),
|
|
288
|
-
params=kwargs.pop("params", None),
|
|
289
|
-
)
|
|
290
|
-
|
|
291
|
-
try:
|
|
292
|
-
return _do()
|
|
293
|
-
except requests.Timeout as e:
|
|
294
|
-
raise ThordataTimeoutError(
|
|
295
|
-
f"Request timed out: {e}", original_error=e
|
|
296
|
-
) from e
|
|
297
|
-
except Exception as e:
|
|
298
|
-
raise ThordataNetworkError(f"Request failed: {e}", original_error=e) from e
|
|
209
|
+
return self._proxy_verb("GET", url, proxy_config, timeout, **kwargs)
|
|
299
210
|
|
|
300
211
|
def post(
|
|
301
212
|
self,
|
|
@@ -305,20 +216,17 @@ class ThordataClient:
|
|
|
305
216
|
timeout: Optional[int] = None,
|
|
306
217
|
**kwargs: Any,
|
|
307
218
|
) -> requests.Response:
|
|
308
|
-
"""
|
|
309
|
-
Send a POST request through the Thordata Proxy Network.
|
|
310
|
-
|
|
311
|
-
Args:
|
|
312
|
-
url: The target URL.
|
|
313
|
-
proxy_config: Custom proxy configuration.
|
|
314
|
-
timeout: Request timeout in seconds.
|
|
315
|
-
**kwargs: Additional arguments to pass to requests.post().
|
|
316
|
-
|
|
317
|
-
Returns:
|
|
318
|
-
The response object.
|
|
319
|
-
"""
|
|
320
219
|
logger.debug(f"Proxy POST request: {url}")
|
|
220
|
+
return self._proxy_verb("POST", url, proxy_config, timeout, **kwargs)
|
|
321
221
|
|
|
222
|
+
def _proxy_verb(
|
|
223
|
+
self,
|
|
224
|
+
method: str,
|
|
225
|
+
url: str,
|
|
226
|
+
proxy_config: Optional[ProxyConfig],
|
|
227
|
+
timeout: Optional[int],
|
|
228
|
+
**kwargs: Any,
|
|
229
|
+
) -> requests.Response:
|
|
322
230
|
timeout = timeout or self._default_timeout
|
|
323
231
|
|
|
324
232
|
if proxy_config is None:
|
|
@@ -327,19 +235,21 @@ class ThordataClient:
|
|
|
327
235
|
if proxy_config is None:
|
|
328
236
|
raise ThordataConfigError(
|
|
329
237
|
"Proxy credentials are missing. "
|
|
330
|
-
"Pass proxy_config
|
|
331
|
-
"or set THORDATA_RESIDENTIAL_USERNAME/THORDATA_RESIDENTIAL_PASSWORD (or DATACENTER/MOBILE)."
|
|
238
|
+
"Pass proxy_config or set THORDATA_RESIDENTIAL_USERNAME/PASSWORD env vars."
|
|
332
239
|
)
|
|
333
240
|
|
|
334
|
-
|
|
241
|
+
# For requests/urllib3, we don't need 'proxies' dict in kwargs
|
|
242
|
+
# because we use ProxyManager directly.
|
|
243
|
+
# But we remove it if user accidentally passed it to avoid confusion.
|
|
244
|
+
kwargs.pop("proxies", None)
|
|
335
245
|
|
|
336
246
|
@with_retry(self._retry_config)
|
|
337
247
|
def _do() -> requests.Response:
|
|
338
248
|
return self._proxy_request_with_proxy_manager(
|
|
339
|
-
|
|
249
|
+
method,
|
|
340
250
|
url,
|
|
341
|
-
proxy_config=proxy_config,
|
|
342
|
-
timeout=timeout,
|
|
251
|
+
proxy_config=proxy_config, # type: ignore
|
|
252
|
+
timeout=timeout, # type: ignore
|
|
343
253
|
headers=kwargs.pop("headers", None),
|
|
344
254
|
params=kwargs.pop("params", None),
|
|
345
255
|
data=kwargs.pop("data", None),
|
|
@@ -356,8 +266,8 @@ class ThordataClient:
|
|
|
356
266
|
|
|
357
267
|
def build_proxy_url(
|
|
358
268
|
self,
|
|
359
|
-
username: str,
|
|
360
|
-
password: str,
|
|
269
|
+
username: str,
|
|
270
|
+
password: str,
|
|
361
271
|
*,
|
|
362
272
|
country: Optional[str] = None,
|
|
363
273
|
state: Optional[str] = None,
|
|
@@ -366,28 +276,6 @@ class ThordataClient:
|
|
|
366
276
|
session_duration: Optional[int] = None,
|
|
367
277
|
product: Union[ProxyProduct, str] = ProxyProduct.RESIDENTIAL,
|
|
368
278
|
) -> str:
|
|
369
|
-
"""
|
|
370
|
-
Build a proxy URL with custom targeting options.
|
|
371
|
-
|
|
372
|
-
This is a convenience method for creating proxy URLs without
|
|
373
|
-
manually constructing a ProxyConfig.
|
|
374
|
-
|
|
375
|
-
Args:
|
|
376
|
-
country: Target country code (e.g., 'us', 'gb').
|
|
377
|
-
state: Target state (e.g., 'california').
|
|
378
|
-
city: Target city (e.g., 'seattle').
|
|
379
|
-
session_id: Session ID for sticky sessions.
|
|
380
|
-
session_duration: Session duration in minutes (1-90).
|
|
381
|
-
product: Proxy product type.
|
|
382
|
-
|
|
383
|
-
Returns:
|
|
384
|
-
The proxy URL string.
|
|
385
|
-
|
|
386
|
-
Example:
|
|
387
|
-
>>> url = client.build_proxy_url(country="us", city="seattle")
|
|
388
|
-
>>> proxies = {"http": url, "https": url}
|
|
389
|
-
>>> requests.get("https://example.com", proxies=proxies)
|
|
390
|
-
"""
|
|
391
279
|
config = ProxyConfig(
|
|
392
280
|
username=username,
|
|
393
281
|
password=password,
|
|
@@ -403,7 +291,7 @@ class ThordataClient:
|
|
|
403
291
|
return config.build_proxy_url()
|
|
404
292
|
|
|
405
293
|
# =========================================================================
|
|
406
|
-
# Internal
|
|
294
|
+
# Internal Request Helpers
|
|
407
295
|
# =========================================================================
|
|
408
296
|
def _api_request_with_retry(
|
|
409
297
|
self,
|
|
@@ -414,8 +302,6 @@ class ThordataClient:
|
|
|
414
302
|
headers: Optional[Dict[str, str]] = None,
|
|
415
303
|
params: Optional[Dict[str, Any]] = None,
|
|
416
304
|
) -> requests.Response:
|
|
417
|
-
"""Make an API request with automatic retry on transient failures."""
|
|
418
|
-
|
|
419
305
|
@with_retry(self._retry_config)
|
|
420
306
|
def _do_request() -> requests.Response:
|
|
421
307
|
return self._api_session.request(
|
|
@@ -438,8 +324,83 @@ class ThordataClient:
|
|
|
438
324
|
f"API request failed: {e}", original_error=e
|
|
439
325
|
) from e
|
|
440
326
|
|
|
327
|
+
def _get_proxy_manager(self, proxy_url: str) -> urllib3.ProxyManager:
|
|
328
|
+
"""Get or create a ProxyManager for the given proxy URL (Pooled)."""
|
|
329
|
+
if proxy_url not in self._proxy_managers:
|
|
330
|
+
# Create a new manager if not cached
|
|
331
|
+
proxy_ssl_context = None
|
|
332
|
+
if proxy_url.startswith("https://"):
|
|
333
|
+
proxy_ssl_context = ssl.create_default_context()
|
|
334
|
+
|
|
335
|
+
self._proxy_managers[proxy_url] = urllib3.ProxyManager(
|
|
336
|
+
proxy_url,
|
|
337
|
+
proxy_ssl_context=proxy_ssl_context,
|
|
338
|
+
num_pools=10, # Allow concurrency
|
|
339
|
+
maxsize=10,
|
|
340
|
+
)
|
|
341
|
+
return self._proxy_managers[proxy_url]
|
|
342
|
+
|
|
343
|
+
def _proxy_request_with_proxy_manager(
|
|
344
|
+
self,
|
|
345
|
+
method: str,
|
|
346
|
+
url: str,
|
|
347
|
+
*,
|
|
348
|
+
proxy_config: ProxyConfig,
|
|
349
|
+
timeout: int,
|
|
350
|
+
headers: Optional[Dict[str, str]] = None,
|
|
351
|
+
params: Optional[Dict[str, Any]] = None,
|
|
352
|
+
data: Any = None,
|
|
353
|
+
) -> requests.Response:
|
|
354
|
+
# 1. Prepare URL and Body
|
|
355
|
+
req = requests.Request(method=method.upper(), url=url, params=params)
|
|
356
|
+
prepped = self._proxy_session.prepare_request(req)
|
|
357
|
+
final_url = prepped.url or url
|
|
358
|
+
|
|
359
|
+
# 2. Get Proxy Configuration
|
|
360
|
+
proxy_url = proxy_config.build_proxy_endpoint()
|
|
361
|
+
proxy_headers = urllib3.make_headers(
|
|
362
|
+
proxy_basic_auth=proxy_config.build_proxy_basic_auth()
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
# 3. Get Cached Proxy Manager
|
|
366
|
+
pm = self._get_proxy_manager(proxy_url)
|
|
367
|
+
|
|
368
|
+
# 4. Prepare Request Headers/Body
|
|
369
|
+
req_headers = dict(headers or {})
|
|
370
|
+
body = None
|
|
371
|
+
if data is not None:
|
|
372
|
+
if isinstance(data, dict):
|
|
373
|
+
body = urlencode({k: str(v) for k, v in data.items()})
|
|
374
|
+
req_headers.setdefault(
|
|
375
|
+
"Content-Type", "application/x-www-form-urlencoded"
|
|
376
|
+
)
|
|
377
|
+
else:
|
|
378
|
+
body = data
|
|
379
|
+
|
|
380
|
+
# 5. Execute Request via urllib3
|
|
381
|
+
http_resp = pm.request(
|
|
382
|
+
method.upper(),
|
|
383
|
+
final_url,
|
|
384
|
+
body=body,
|
|
385
|
+
headers=req_headers or None,
|
|
386
|
+
proxy_headers=proxy_headers, # Attach Auth here
|
|
387
|
+
timeout=urllib3.Timeout(connect=timeout, read=timeout),
|
|
388
|
+
retries=False, # We handle retries in _proxy_verb
|
|
389
|
+
preload_content=True,
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
# 6. Convert back to requests.Response
|
|
393
|
+
r = requests.Response()
|
|
394
|
+
r.status_code = int(getattr(http_resp, "status", 0) or 0)
|
|
395
|
+
r._content = http_resp.data or b""
|
|
396
|
+
r.url = final_url
|
|
397
|
+
r.headers = requests.structures.CaseInsensitiveDict(
|
|
398
|
+
dict(http_resp.headers or {})
|
|
399
|
+
)
|
|
400
|
+
return r
|
|
401
|
+
|
|
441
402
|
# =========================================================================
|
|
442
|
-
# SERP API Methods
|
|
403
|
+
# SERP API Methods
|
|
443
404
|
# =========================================================================
|
|
444
405
|
def serp_search(
|
|
445
406
|
self,
|
|
@@ -456,46 +417,8 @@ class ThordataClient:
|
|
|
456
417
|
output_format: str = "json",
|
|
457
418
|
**kwargs: Any,
|
|
458
419
|
) -> Dict[str, Any]:
|
|
459
|
-
"""
|
|
460
|
-
Execute a real-time SERP (Search Engine Results Page) search.
|
|
461
|
-
|
|
462
|
-
Args:
|
|
463
|
-
query: The search keywords.
|
|
464
|
-
engine: Search engine (google, bing, yandex, duckduckgo, baidu).
|
|
465
|
-
num: Number of results to retrieve (default: 10).
|
|
466
|
-
country: Country code for localized results (e.g., 'us').
|
|
467
|
-
language: Language code for interface (e.g., 'en').
|
|
468
|
-
search_type: Type of search (images, news, shopping, videos, etc.).
|
|
469
|
-
device: Device type ('desktop', 'mobile', 'tablet').
|
|
470
|
-
render_js: Enable JavaScript rendering in SERP (render_js=True).
|
|
471
|
-
no_cache: Disable internal caching (no_cache=True).
|
|
472
|
-
output_format: 'json' to return parsed JSON (default),
|
|
473
|
-
'html' to return HTML wrapped in {'html': ...}.
|
|
474
|
-
**kwargs: Additional engine-specific parameters.
|
|
475
|
-
|
|
476
|
-
Returns:
|
|
477
|
-
Dict[str, Any]: Parsed JSON results or a dict with 'html' key.
|
|
478
|
-
|
|
479
|
-
Example:
|
|
480
|
-
>>> # Basic search
|
|
481
|
-
>>> results = client.serp_search("python tutorial")
|
|
482
|
-
>>>
|
|
483
|
-
>>> # With options
|
|
484
|
-
>>> results = client.serp_search(
|
|
485
|
-
... "laptop reviews",
|
|
486
|
-
... engine="google",
|
|
487
|
-
... num=20,
|
|
488
|
-
... country="us",
|
|
489
|
-
... search_type="shopping",
|
|
490
|
-
... device="mobile",
|
|
491
|
-
... render_js=True,
|
|
492
|
-
... no_cache=True,
|
|
493
|
-
... )
|
|
494
|
-
"""
|
|
495
|
-
# Normalize engine
|
|
496
420
|
engine_str = engine.value if isinstance(engine, Engine) else engine.lower()
|
|
497
421
|
|
|
498
|
-
# Build request using model
|
|
499
422
|
request = SerpRequest(
|
|
500
423
|
query=query,
|
|
501
424
|
engine=engine_str,
|
|
@@ -510,84 +433,13 @@ class ThordataClient:
|
|
|
510
433
|
extra_params=kwargs,
|
|
511
434
|
)
|
|
512
435
|
|
|
513
|
-
|
|
514
|
-
headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
|
|
515
|
-
|
|
516
|
-
logger.info(
|
|
517
|
-
f"SERP Search: {engine_str} - {query[:50]}{'...' if len(query) > 50 else ''}"
|
|
518
|
-
)
|
|
519
|
-
|
|
520
|
-
try:
|
|
521
|
-
response = self._api_request_with_retry(
|
|
522
|
-
"POST",
|
|
523
|
-
self._serp_url,
|
|
524
|
-
data=payload,
|
|
525
|
-
headers=headers,
|
|
526
|
-
)
|
|
527
|
-
response.raise_for_status()
|
|
528
|
-
|
|
529
|
-
# JSON mode (default)
|
|
530
|
-
if output_format.lower() == "json":
|
|
531
|
-
data = response.json()
|
|
532
|
-
|
|
533
|
-
if isinstance(data, dict):
|
|
534
|
-
code = data.get("code")
|
|
535
|
-
if code is not None and code != 200:
|
|
536
|
-
msg = extract_error_message(data)
|
|
537
|
-
raise_for_code(
|
|
538
|
-
f"SERP API Error: {msg}",
|
|
539
|
-
code=code,
|
|
540
|
-
payload=data,
|
|
541
|
-
)
|
|
542
|
-
|
|
543
|
-
return parse_json_response(data)
|
|
544
|
-
|
|
545
|
-
# HTML mode: wrap as dict to keep return type stable
|
|
546
|
-
return {"html": response.text}
|
|
547
|
-
|
|
548
|
-
except requests.Timeout as e:
|
|
549
|
-
raise ThordataTimeoutError(
|
|
550
|
-
f"SERP request timed out: {e}",
|
|
551
|
-
original_error=e,
|
|
552
|
-
) from e
|
|
553
|
-
except requests.RequestException as e:
|
|
554
|
-
raise ThordataNetworkError(
|
|
555
|
-
f"SERP request failed: {e}",
|
|
556
|
-
original_error=e,
|
|
557
|
-
) from e
|
|
436
|
+
return self.serp_search_advanced(request)
|
|
558
437
|
|
|
559
438
|
def serp_search_advanced(self, request: SerpRequest) -> Dict[str, Any]:
|
|
560
|
-
"""
|
|
561
|
-
Execute a SERP search using a SerpRequest object.
|
|
562
|
-
|
|
563
|
-
This method provides full control over all search parameters.
|
|
564
|
-
|
|
565
|
-
Args:
|
|
566
|
-
request: A SerpRequest object with all parameters configured.
|
|
567
|
-
|
|
568
|
-
Returns:
|
|
569
|
-
Dict[str, Any]: Parsed JSON results or dict with 'html' key.
|
|
570
|
-
|
|
571
|
-
Example:
|
|
572
|
-
>>> from thordata.models import SerpRequest
|
|
573
|
-
>>> request = SerpRequest(
|
|
574
|
-
... query="python programming",
|
|
575
|
-
... engine="google",
|
|
576
|
-
... num=50,
|
|
577
|
-
... country="us",
|
|
578
|
-
... language="en",
|
|
579
|
-
... search_type="news",
|
|
580
|
-
... time_filter="week",
|
|
581
|
-
... safe_search=True
|
|
582
|
-
... )
|
|
583
|
-
>>> results = client.serp_search_advanced(request)
|
|
584
|
-
"""
|
|
585
439
|
payload = request.to_payload()
|
|
586
440
|
headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
|
|
587
441
|
|
|
588
|
-
logger.info(
|
|
589
|
-
f"SERP Advanced Search: {request.engine} - {request.query[:50]}{'...' if len(request.query) > 50 else ''}"
|
|
590
|
-
)
|
|
442
|
+
logger.info(f"SERP Advanced Search: {request.engine} - {request.query[:50]}")
|
|
591
443
|
|
|
592
444
|
try:
|
|
593
445
|
response = self._api_request_with_retry(
|
|
@@ -600,34 +452,22 @@ class ThordataClient:
|
|
|
600
452
|
|
|
601
453
|
if request.output_format.lower() == "json":
|
|
602
454
|
data = response.json()
|
|
603
|
-
|
|
604
455
|
if isinstance(data, dict):
|
|
605
456
|
code = data.get("code")
|
|
606
457
|
if code is not None and code != 200:
|
|
607
458
|
msg = extract_error_message(data)
|
|
608
|
-
raise_for_code(
|
|
609
|
-
f"SERP API Error: {msg}",
|
|
610
|
-
code=code,
|
|
611
|
-
payload=data,
|
|
612
|
-
)
|
|
613
|
-
|
|
459
|
+
raise_for_code(f"SERP Error: {msg}", code=code, payload=data)
|
|
614
460
|
return parse_json_response(data)
|
|
615
461
|
|
|
616
462
|
return {"html": response.text}
|
|
617
463
|
|
|
618
464
|
except requests.Timeout as e:
|
|
619
|
-
raise ThordataTimeoutError(
|
|
620
|
-
f"SERP request timed out: {e}",
|
|
621
|
-
original_error=e,
|
|
622
|
-
) from e
|
|
465
|
+
raise ThordataTimeoutError(f"SERP timeout: {e}", original_error=e) from e
|
|
623
466
|
except requests.RequestException as e:
|
|
624
|
-
raise ThordataNetworkError(
|
|
625
|
-
f"SERP request failed: {e}",
|
|
626
|
-
original_error=e,
|
|
627
|
-
) from e
|
|
467
|
+
raise ThordataNetworkError(f"SERP failed: {e}", original_error=e) from e
|
|
628
468
|
|
|
629
469
|
# =========================================================================
|
|
630
|
-
# Universal Scraping API
|
|
470
|
+
# Universal Scraping API
|
|
631
471
|
# =========================================================================
|
|
632
472
|
def universal_scrape(
|
|
633
473
|
self,
|
|
@@ -641,37 +481,6 @@ class ThordataClient:
|
|
|
641
481
|
wait_for: Optional[str] = None,
|
|
642
482
|
**kwargs: Any,
|
|
643
483
|
) -> Union[str, bytes]:
|
|
644
|
-
"""
|
|
645
|
-
Scrape a URL using the Universal Scraping API (Web Unlocker).
|
|
646
|
-
|
|
647
|
-
Automatically bypasses Cloudflare, CAPTCHAs, and antibot systems.
|
|
648
|
-
|
|
649
|
-
Args:
|
|
650
|
-
url: Target URL.
|
|
651
|
-
js_render: Enable JavaScript rendering (headless browser).
|
|
652
|
-
output_format: "html" or "png" (screenshot).
|
|
653
|
-
country: Geo-targeting country code.
|
|
654
|
-
block_resources: Resources to block (e.g., 'script,image').
|
|
655
|
-
wait: Wait time in milliseconds after page load.
|
|
656
|
-
wait_for: CSS selector to wait for.
|
|
657
|
-
**kwargs: Additional parameters.
|
|
658
|
-
|
|
659
|
-
Returns:
|
|
660
|
-
HTML string or PNG bytes depending on output_format.
|
|
661
|
-
|
|
662
|
-
Example:
|
|
663
|
-
>>> # Get HTML
|
|
664
|
-
>>> html = client.universal_scrape("https://example.com", js_render=True)
|
|
665
|
-
>>>
|
|
666
|
-
>>> # Get screenshot
|
|
667
|
-
>>> png = client.universal_scrape(
|
|
668
|
-
... "https://example.com",
|
|
669
|
-
... js_render=True,
|
|
670
|
-
... output_format="png"
|
|
671
|
-
... )
|
|
672
|
-
>>> with open("screenshot.png", "wb") as f:
|
|
673
|
-
... f.write(png)
|
|
674
|
-
"""
|
|
675
484
|
request = UniversalScrapeRequest(
|
|
676
485
|
url=url,
|
|
677
486
|
js_render=js_render,
|
|
@@ -682,27 +491,15 @@ class ThordataClient:
|
|
|
682
491
|
wait_for=wait_for,
|
|
683
492
|
extra_params=kwargs,
|
|
684
493
|
)
|
|
685
|
-
|
|
686
494
|
return self.universal_scrape_advanced(request)
|
|
687
495
|
|
|
688
496
|
def universal_scrape_advanced(
|
|
689
497
|
self, request: UniversalScrapeRequest
|
|
690
498
|
) -> Union[str, bytes]:
|
|
691
|
-
"""
|
|
692
|
-
Scrape using a UniversalScrapeRequest object for full control.
|
|
693
|
-
|
|
694
|
-
Args:
|
|
695
|
-
request: A UniversalScrapeRequest with all parameters.
|
|
696
|
-
|
|
697
|
-
Returns:
|
|
698
|
-
HTML string or PNG bytes.
|
|
699
|
-
"""
|
|
700
499
|
payload = request.to_payload()
|
|
701
500
|
headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
|
|
702
501
|
|
|
703
|
-
logger.info(
|
|
704
|
-
f"Universal Scrape: {request.url} (format: {request.output_format})"
|
|
705
|
-
)
|
|
502
|
+
logger.info(f"Universal Scrape: {request.url}")
|
|
706
503
|
|
|
707
504
|
try:
|
|
708
505
|
response = self._api_request_with_retry(
|
|
@@ -712,53 +509,40 @@ class ThordataClient:
|
|
|
712
509
|
headers=headers,
|
|
713
510
|
)
|
|
714
511
|
response.raise_for_status()
|
|
715
|
-
|
|
716
512
|
return self._process_universal_response(response, request.output_format)
|
|
717
513
|
|
|
718
514
|
except requests.Timeout as e:
|
|
719
515
|
raise ThordataTimeoutError(
|
|
720
|
-
f"Universal
|
|
516
|
+
f"Universal timeout: {e}", original_error=e
|
|
721
517
|
) from e
|
|
722
518
|
except requests.RequestException as e:
|
|
723
519
|
raise ThordataNetworkError(
|
|
724
|
-
f"Universal
|
|
520
|
+
f"Universal failed: {e}", original_error=e
|
|
725
521
|
) from e
|
|
726
522
|
|
|
727
523
|
def _process_universal_response(
|
|
728
524
|
self, response: requests.Response, output_format: str
|
|
729
525
|
) -> Union[str, bytes]:
|
|
730
|
-
"""Process the response from Universal API."""
|
|
731
|
-
# Try to parse as JSON
|
|
732
526
|
try:
|
|
733
527
|
resp_json = response.json()
|
|
734
528
|
except ValueError:
|
|
735
|
-
|
|
736
|
-
if output_format.lower() == "png":
|
|
737
|
-
return response.content
|
|
738
|
-
return response.text
|
|
529
|
+
return response.content if output_format.lower() == "png" else response.text
|
|
739
530
|
|
|
740
|
-
# Check for API-level errors
|
|
741
531
|
if isinstance(resp_json, dict):
|
|
742
532
|
code = resp_json.get("code")
|
|
743
533
|
if code is not None and code != 200:
|
|
744
534
|
msg = extract_error_message(resp_json)
|
|
745
|
-
raise_for_code(
|
|
746
|
-
f"Universal API Error: {msg}", code=code, payload=resp_json
|
|
747
|
-
)
|
|
535
|
+
raise_for_code(f"Universal Error: {msg}", code=code, payload=resp_json)
|
|
748
536
|
|
|
749
|
-
# Extract HTML
|
|
750
537
|
if "html" in resp_json:
|
|
751
538
|
return resp_json["html"]
|
|
752
|
-
|
|
753
|
-
# Extract PNG
|
|
754
539
|
if "png" in resp_json:
|
|
755
540
|
return decode_base64_image(resp_json["png"])
|
|
756
541
|
|
|
757
|
-
# Fallback
|
|
758
542
|
return str(resp_json)
|
|
759
543
|
|
|
760
544
|
# =========================================================================
|
|
761
|
-
# Web Scraper API
|
|
545
|
+
# Web Scraper API (Tasks)
|
|
762
546
|
# =========================================================================
|
|
763
547
|
def create_scraper_task(
|
|
764
548
|
self,
|
|
@@ -768,29 +552,6 @@ class ThordataClient:
|
|
|
768
552
|
parameters: Dict[str, Any],
|
|
769
553
|
universal_params: Optional[Dict[str, Any]] = None,
|
|
770
554
|
) -> str:
|
|
771
|
-
"""
|
|
772
|
-
Create an asynchronous Web Scraper task.
|
|
773
|
-
|
|
774
|
-
Note: Get spider_id and spider_name from the Thordata Dashboard.
|
|
775
|
-
|
|
776
|
-
Args:
|
|
777
|
-
file_name: Name for the output file.
|
|
778
|
-
spider_id: Spider identifier from Dashboard.
|
|
779
|
-
spider_name: Spider name (e.g., "youtube.com").
|
|
780
|
-
parameters: Spider-specific parameters.
|
|
781
|
-
universal_params: Global spider settings.
|
|
782
|
-
|
|
783
|
-
Returns:
|
|
784
|
-
The created task_id.
|
|
785
|
-
|
|
786
|
-
Example:
|
|
787
|
-
>>> task_id = client.create_scraper_task(
|
|
788
|
-
... file_name="youtube_data",
|
|
789
|
-
... spider_id="youtube_video-post_by-url",
|
|
790
|
-
... spider_name="youtube.com",
|
|
791
|
-
... parameters={"url": "https://youtube.com/@channel/videos"}
|
|
792
|
-
... )
|
|
793
|
-
"""
|
|
794
555
|
config = ScraperTaskConfig(
|
|
795
556
|
file_name=file_name,
|
|
796
557
|
spider_id=spider_id,
|
|
@@ -798,50 +559,26 @@ class ThordataClient:
|
|
|
798
559
|
parameters=parameters,
|
|
799
560
|
universal_params=universal_params,
|
|
800
561
|
)
|
|
801
|
-
|
|
802
562
|
return self.create_scraper_task_advanced(config)
|
|
803
563
|
|
|
804
564
|
def create_scraper_task_advanced(self, config: ScraperTaskConfig) -> str:
|
|
805
|
-
"""
|
|
806
|
-
Create a scraper task using a ScraperTaskConfig object.
|
|
807
|
-
|
|
808
|
-
Args:
|
|
809
|
-
config: Task configuration.
|
|
810
|
-
|
|
811
|
-
Returns:
|
|
812
|
-
The created task_id.
|
|
813
|
-
"""
|
|
814
565
|
self._require_public_credentials()
|
|
815
|
-
|
|
816
566
|
payload = config.to_payload()
|
|
817
|
-
|
|
818
|
-
# Builder needs 3 headers: token, key, Authorization Bearer
|
|
819
567
|
headers = build_builder_headers(
|
|
820
|
-
self.scraper_token,
|
|
821
|
-
self.public_token or "",
|
|
822
|
-
self.public_key or "",
|
|
568
|
+
self.scraper_token, self.public_token or "", self.public_key or ""
|
|
823
569
|
)
|
|
824
570
|
|
|
825
|
-
logger.info(f"Creating Scraper Task: {config.spider_name}")
|
|
826
|
-
|
|
827
571
|
try:
|
|
828
572
|
response = self._api_request_with_retry(
|
|
829
|
-
"POST",
|
|
830
|
-
self._builder_url,
|
|
831
|
-
data=payload,
|
|
832
|
-
headers=headers,
|
|
573
|
+
"POST", self._builder_url, data=payload, headers=headers
|
|
833
574
|
)
|
|
834
575
|
response.raise_for_status()
|
|
835
|
-
|
|
836
576
|
data = response.json()
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
raise_for_code(f"Task creation failed: {msg}", code=code, payload=data)
|
|
842
|
-
|
|
577
|
+
if data.get("code") != 200:
|
|
578
|
+
raise_for_code(
|
|
579
|
+
"Task creation failed", code=data.get("code"), payload=data
|
|
580
|
+
)
|
|
843
581
|
return data["data"]["task_id"]
|
|
844
|
-
|
|
845
582
|
except requests.RequestException as e:
|
|
846
583
|
raise ThordataNetworkError(
|
|
847
584
|
f"Task creation failed: {e}", original_error=e
|
|
@@ -855,35 +592,6 @@ class ThordataClient:
|
|
|
855
592
|
parameters: Dict[str, Any],
|
|
856
593
|
common_settings: "CommonSettings",
|
|
857
594
|
) -> str:
|
|
858
|
-
"""
|
|
859
|
-
Create a YouTube video/audio download task.
|
|
860
|
-
|
|
861
|
-
Uses the /video_builder endpoint.
|
|
862
|
-
|
|
863
|
-
Args:
|
|
864
|
-
file_name: Output file name. Supports {{TasksID}}, {{VideoID}}.
|
|
865
|
-
spider_id: Spider identifier (e.g., "youtube_video_by-url").
|
|
866
|
-
spider_name: Spider name (typically "youtube.com").
|
|
867
|
-
parameters: Spider parameters (e.g., {"url": "..."}).
|
|
868
|
-
common_settings: Video/audio settings.
|
|
869
|
-
|
|
870
|
-
Returns:
|
|
871
|
-
The created task_id.
|
|
872
|
-
|
|
873
|
-
Example:
|
|
874
|
-
>>> from thordata import CommonSettings
|
|
875
|
-
>>> task_id = client.create_video_task(
|
|
876
|
-
... file_name="{{VideoID}}",
|
|
877
|
-
... spider_id="youtube_video_by-url",
|
|
878
|
-
... spider_name="youtube.com",
|
|
879
|
-
... parameters={"url": "https://youtube.com/watch?v=xxx"},
|
|
880
|
-
... common_settings=CommonSettings(
|
|
881
|
-
... resolution="1080p",
|
|
882
|
-
... is_subtitles="true"
|
|
883
|
-
... )
|
|
884
|
-
... )
|
|
885
|
-
"""
|
|
886
|
-
|
|
887
595
|
config = VideoTaskConfig(
|
|
888
596
|
file_name=file_name,
|
|
889
597
|
spider_id=spider_id,
|
|
@@ -891,210 +599,97 @@ class ThordataClient:
|
|
|
891
599
|
parameters=parameters,
|
|
892
600
|
common_settings=common_settings,
|
|
893
601
|
)
|
|
894
|
-
|
|
895
602
|
return self.create_video_task_advanced(config)
|
|
896
603
|
|
|
897
604
|
def create_video_task_advanced(self, config: VideoTaskConfig) -> str:
|
|
898
|
-
"""
|
|
899
|
-
Create a video task using VideoTaskConfig object.
|
|
900
|
-
|
|
901
|
-
Args:
|
|
902
|
-
config: Video task configuration.
|
|
903
|
-
|
|
904
|
-
Returns:
|
|
905
|
-
The created task_id.
|
|
906
|
-
"""
|
|
907
|
-
|
|
908
605
|
self._require_public_credentials()
|
|
909
|
-
|
|
910
606
|
payload = config.to_payload()
|
|
911
607
|
headers = build_builder_headers(
|
|
912
|
-
self.scraper_token,
|
|
913
|
-
self.public_token or "",
|
|
914
|
-
self.public_key or "",
|
|
608
|
+
self.scraper_token, self.public_token or "", self.public_key or ""
|
|
915
609
|
)
|
|
916
610
|
|
|
917
|
-
logger.info(f"Creating Video Task: {config.spider_name} - {config.spider_id}")
|
|
918
|
-
|
|
919
611
|
response = self._api_request_with_retry(
|
|
920
|
-
"POST",
|
|
921
|
-
self._video_builder_url,
|
|
922
|
-
data=payload,
|
|
923
|
-
headers=headers,
|
|
612
|
+
"POST", self._video_builder_url, data=payload, headers=headers
|
|
924
613
|
)
|
|
925
614
|
response.raise_for_status()
|
|
926
|
-
|
|
927
615
|
data = response.json()
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
if code != 200:
|
|
931
|
-
msg = extract_error_message(data)
|
|
616
|
+
if data.get("code") != 200:
|
|
932
617
|
raise_for_code(
|
|
933
|
-
|
|
618
|
+
"Video task creation failed", code=data.get("code"), payload=data
|
|
934
619
|
)
|
|
935
|
-
|
|
936
620
|
return data["data"]["task_id"]
|
|
937
621
|
|
|
938
622
|
def get_task_status(self, task_id: str) -> str:
|
|
939
|
-
"""
|
|
940
|
-
Check the status of an asynchronous scraping task.
|
|
941
|
-
|
|
942
|
-
Returns:
|
|
943
|
-
Status string (e.g., "running", "ready", "failed").
|
|
944
|
-
|
|
945
|
-
Raises:
|
|
946
|
-
ThordataConfigError: If public credentials are missing.
|
|
947
|
-
ThordataAPIError: If API returns a non-200 code in JSON payload.
|
|
948
|
-
ThordataNetworkError: If network/HTTP request fails.
|
|
949
|
-
"""
|
|
950
623
|
self._require_public_credentials()
|
|
951
|
-
|
|
952
624
|
headers = build_public_api_headers(
|
|
953
625
|
self.public_token or "", self.public_key or ""
|
|
954
626
|
)
|
|
955
|
-
payload = {"tasks_ids": task_id}
|
|
956
|
-
|
|
957
627
|
try:
|
|
958
628
|
response = self._api_request_with_retry(
|
|
959
629
|
"POST",
|
|
960
630
|
self._status_url,
|
|
961
|
-
data=
|
|
631
|
+
data={"tasks_ids": task_id},
|
|
962
632
|
headers=headers,
|
|
963
633
|
)
|
|
964
634
|
response.raise_for_status()
|
|
965
635
|
data = response.json()
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
payload=data,
|
|
975
|
-
)
|
|
976
|
-
|
|
977
|
-
items = data.get("data") or []
|
|
978
|
-
for item in items:
|
|
979
|
-
if str(item.get("task_id")) == str(task_id):
|
|
980
|
-
return item.get("status", "unknown")
|
|
981
|
-
|
|
982
|
-
return "unknown"
|
|
983
|
-
|
|
984
|
-
# Unexpected payload type
|
|
985
|
-
raise ThordataNetworkError(
|
|
986
|
-
f"Unexpected task status response type: {type(data).__name__}",
|
|
987
|
-
original_error=None,
|
|
988
|
-
)
|
|
989
|
-
|
|
990
|
-
except requests.Timeout as e:
|
|
991
|
-
raise ThordataTimeoutError(
|
|
992
|
-
f"Status check timed out: {e}", original_error=e
|
|
993
|
-
) from e
|
|
636
|
+
if data.get("code") != 200:
|
|
637
|
+
raise_for_code("Task status error", code=data.get("code"), payload=data)
|
|
638
|
+
|
|
639
|
+
items = data.get("data") or []
|
|
640
|
+
for item in items:
|
|
641
|
+
if str(item.get("task_id")) == str(task_id):
|
|
642
|
+
return item.get("status", "unknown")
|
|
643
|
+
return "unknown"
|
|
994
644
|
except requests.RequestException as e:
|
|
995
645
|
raise ThordataNetworkError(
|
|
996
646
|
f"Status check failed: {e}", original_error=e
|
|
997
647
|
) from e
|
|
998
648
|
|
|
999
649
|
def safe_get_task_status(self, task_id: str) -> str:
|
|
1000
|
-
"""
|
|
1001
|
-
Backward-compatible status check.
|
|
1002
|
-
|
|
1003
|
-
Returns:
|
|
1004
|
-
Status string, or "error" on any exception.
|
|
1005
|
-
"""
|
|
1006
650
|
try:
|
|
1007
651
|
return self.get_task_status(task_id)
|
|
1008
652
|
except Exception:
|
|
1009
653
|
return "error"
|
|
1010
654
|
|
|
1011
655
|
def get_task_result(self, task_id: str, file_type: str = "json") -> str:
|
|
1012
|
-
"""
|
|
1013
|
-
Get the download URL for a completed task.
|
|
1014
|
-
"""
|
|
1015
656
|
self._require_public_credentials()
|
|
1016
|
-
|
|
1017
657
|
headers = build_public_api_headers(
|
|
1018
658
|
self.public_token or "", self.public_key or ""
|
|
1019
659
|
)
|
|
1020
|
-
payload = {"tasks_id": task_id, "type": file_type}
|
|
1021
|
-
|
|
1022
|
-
logger.info(f"Getting result URL for Task: {task_id}")
|
|
1023
|
-
|
|
1024
660
|
try:
|
|
1025
661
|
response = self._api_request_with_retry(
|
|
1026
662
|
"POST",
|
|
1027
663
|
self._download_url,
|
|
1028
|
-
data=
|
|
664
|
+
data={"tasks_id": task_id, "type": file_type},
|
|
1029
665
|
headers=headers,
|
|
1030
666
|
)
|
|
1031
667
|
response.raise_for_status()
|
|
1032
|
-
|
|
1033
668
|
data = response.json()
|
|
1034
|
-
code
|
|
1035
|
-
|
|
1036
|
-
if code == 200 and data.get("data"):
|
|
669
|
+
if data.get("code") == 200 and data.get("data"):
|
|
1037
670
|
return data["data"]["download"]
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
raise_for_code(f"Get result failed: {msg}", code=code, payload=data)
|
|
1041
|
-
# This line won't be reached, but satisfies mypy
|
|
1042
|
-
raise RuntimeError("Unexpected state")
|
|
1043
|
-
|
|
671
|
+
raise_for_code("Get result failed", code=data.get("code"), payload=data)
|
|
672
|
+
return ""
|
|
1044
673
|
except requests.RequestException as e:
|
|
1045
674
|
raise ThordataNetworkError(
|
|
1046
675
|
f"Get result failed: {e}", original_error=e
|
|
1047
676
|
) from e
|
|
1048
677
|
|
|
1049
|
-
def list_tasks(
|
|
1050
|
-
self,
|
|
1051
|
-
page: int = 1,
|
|
1052
|
-
size: int = 20,
|
|
1053
|
-
) -> Dict[str, Any]:
|
|
1054
|
-
"""
|
|
1055
|
-
List all Web Scraper tasks.
|
|
1056
|
-
|
|
1057
|
-
Args:
|
|
1058
|
-
page: Page number (starts from 1).
|
|
1059
|
-
size: Number of tasks per page.
|
|
1060
|
-
|
|
1061
|
-
Returns:
|
|
1062
|
-
Dict containing 'count' and 'list' of tasks.
|
|
1063
|
-
|
|
1064
|
-
Example:
|
|
1065
|
-
>>> result = client.list_tasks(page=1, size=10)
|
|
1066
|
-
>>> print(f"Total tasks: {result['count']}")
|
|
1067
|
-
>>> for task in result['list']:
|
|
1068
|
-
... print(f"Task {task['task_id']}: {task['status']}")
|
|
1069
|
-
"""
|
|
678
|
+
def list_tasks(self, page: int = 1, size: int = 20) -> Dict[str, Any]:
|
|
1070
679
|
self._require_public_credentials()
|
|
1071
|
-
|
|
1072
680
|
headers = build_public_api_headers(
|
|
1073
681
|
self.public_token or "", self.public_key or ""
|
|
1074
682
|
)
|
|
1075
|
-
payload: Dict[str, Any] = {}
|
|
1076
|
-
if page:
|
|
1077
|
-
payload["page"] = str(page)
|
|
1078
|
-
if size:
|
|
1079
|
-
payload["size"] = str(size)
|
|
1080
|
-
|
|
1081
|
-
logger.info(f"Listing tasks: page={page}, size={size}")
|
|
1082
|
-
|
|
1083
683
|
response = self._api_request_with_retry(
|
|
1084
684
|
"POST",
|
|
1085
685
|
self._list_url,
|
|
1086
|
-
data=
|
|
686
|
+
data={"page": str(page), "size": str(size)},
|
|
1087
687
|
headers=headers,
|
|
1088
688
|
)
|
|
1089
689
|
response.raise_for_status()
|
|
1090
|
-
|
|
1091
690
|
data = response.json()
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
if code != 200:
|
|
1095
|
-
msg = extract_error_message(data)
|
|
1096
|
-
raise_for_code(f"List tasks failed: {msg}", code=code, payload=data)
|
|
1097
|
-
|
|
691
|
+
if data.get("code") != 200:
|
|
692
|
+
raise_for_code("List tasks failed", code=data.get("code"), payload=data)
|
|
1098
693
|
return data.get("data", {"count": 0, "list": []})
|
|
1099
694
|
|
|
1100
695
|
def wait_for_task(
|
|
@@ -1104,84 +699,32 @@ class ThordataClient:
|
|
|
1104
699
|
poll_interval: float = 5.0,
|
|
1105
700
|
max_wait: float = 600.0,
|
|
1106
701
|
) -> str:
|
|
1107
|
-
"""
|
|
1108
|
-
Wait for a task to complete.
|
|
1109
|
-
|
|
1110
|
-
Args:
|
|
1111
|
-
task_id: The task ID to wait for.
|
|
1112
|
-
poll_interval: Seconds between status checks.
|
|
1113
|
-
max_wait: Maximum seconds to wait.
|
|
1114
|
-
|
|
1115
|
-
Returns:
|
|
1116
|
-
Final task status.
|
|
1117
|
-
|
|
1118
|
-
Raises:
|
|
1119
|
-
TimeoutError: If max_wait is exceeded.
|
|
1120
|
-
|
|
1121
|
-
Example:
|
|
1122
|
-
>>> task_id = client.create_scraper_task(...)
|
|
1123
|
-
>>> status = client.wait_for_task(task_id, max_wait=300)
|
|
1124
|
-
>>> if status in ("ready", "success"):
|
|
1125
|
-
... url = client.get_task_result(task_id)
|
|
1126
|
-
"""
|
|
1127
702
|
import time
|
|
1128
703
|
|
|
1129
704
|
start = time.monotonic()
|
|
1130
|
-
|
|
1131
705
|
while (time.monotonic() - start) < max_wait:
|
|
1132
706
|
status = self.get_task_status(task_id)
|
|
1133
|
-
|
|
1134
|
-
logger.debug(f"Task {task_id} status: {status}")
|
|
1135
|
-
|
|
1136
|
-
terminal_statuses = {
|
|
707
|
+
if status.lower() in {
|
|
1137
708
|
"ready",
|
|
1138
709
|
"success",
|
|
1139
710
|
"finished",
|
|
1140
711
|
"failed",
|
|
1141
712
|
"error",
|
|
1142
713
|
"cancelled",
|
|
1143
|
-
}
|
|
1144
|
-
|
|
1145
|
-
if status.lower() in terminal_statuses:
|
|
714
|
+
}:
|
|
1146
715
|
return status
|
|
1147
|
-
|
|
1148
716
|
time.sleep(poll_interval)
|
|
1149
|
-
|
|
1150
|
-
raise TimeoutError(f"Task {task_id} did not complete within {max_wait} seconds")
|
|
717
|
+
raise TimeoutError(f"Task {task_id} timeout")
|
|
1151
718
|
|
|
1152
719
|
# =========================================================================
|
|
1153
|
-
#
|
|
720
|
+
# Account / Locations / Utils
|
|
1154
721
|
# =========================================================================
|
|
1155
722
|
def get_usage_statistics(
|
|
1156
723
|
self,
|
|
1157
724
|
from_date: Union[str, date],
|
|
1158
725
|
to_date: Union[str, date],
|
|
1159
726
|
) -> UsageStatistics:
|
|
1160
|
-
"""
|
|
1161
|
-
Get account usage statistics for a date range.
|
|
1162
|
-
|
|
1163
|
-
Args:
|
|
1164
|
-
from_date: Start date (YYYY-MM-DD string or date object).
|
|
1165
|
-
to_date: End date (YYYY-MM-DD string or date object).
|
|
1166
|
-
|
|
1167
|
-
Returns:
|
|
1168
|
-
UsageStatistics object with traffic data.
|
|
1169
|
-
|
|
1170
|
-
Raises:
|
|
1171
|
-
ValueError: If date range exceeds 180 days.
|
|
1172
|
-
|
|
1173
|
-
Example:
|
|
1174
|
-
>>> from datetime import date, timedelta
|
|
1175
|
-
>>> today = date.today()
|
|
1176
|
-
>>> week_ago = today - timedelta(days=7)
|
|
1177
|
-
>>> stats = client.get_usage_statistics(week_ago, today)
|
|
1178
|
-
>>> print(f"Used: {stats.range_usage_gb():.2f} GB")
|
|
1179
|
-
>>> print(f"Balance: {stats.balance_gb():.2f} GB")
|
|
1180
|
-
"""
|
|
1181
|
-
|
|
1182
727
|
self._require_public_credentials()
|
|
1183
|
-
|
|
1184
|
-
# Convert dates to strings
|
|
1185
728
|
if isinstance(from_date, date):
|
|
1186
729
|
from_date = from_date.strftime("%Y-%m-%d")
|
|
1187
730
|
if isinstance(to_date, date):
|
|
@@ -1193,151 +736,33 @@ class ThordataClient:
|
|
|
1193
736
|
"from_date": from_date,
|
|
1194
737
|
"to_date": to_date,
|
|
1195
738
|
}
|
|
1196
|
-
|
|
1197
|
-
logger.info(f"Getting usage statistics: {from_date} to {to_date}")
|
|
1198
|
-
|
|
1199
|
-
response = self._api_request_with_retry(
|
|
1200
|
-
"GET",
|
|
1201
|
-
self._usage_stats_url,
|
|
1202
|
-
params=params,
|
|
1203
|
-
)
|
|
1204
|
-
response.raise_for_status()
|
|
1205
|
-
|
|
1206
|
-
data = response.json()
|
|
1207
|
-
|
|
1208
|
-
if isinstance(data, dict):
|
|
1209
|
-
code = data.get("code")
|
|
1210
|
-
if code is not None and code != 200:
|
|
1211
|
-
msg = extract_error_message(data)
|
|
1212
|
-
raise_for_code(
|
|
1213
|
-
f"Usage statistics error: {msg}",
|
|
1214
|
-
code=code,
|
|
1215
|
-
payload=data,
|
|
1216
|
-
)
|
|
1217
|
-
|
|
1218
|
-
# Extract data field
|
|
1219
|
-
usage_data = data.get("data", data)
|
|
1220
|
-
return UsageStatistics.from_dict(usage_data)
|
|
1221
|
-
|
|
1222
|
-
raise ThordataNetworkError(
|
|
1223
|
-
f"Unexpected usage statistics response: {type(data).__name__}",
|
|
1224
|
-
original_error=None,
|
|
1225
|
-
)
|
|
1226
|
-
|
|
1227
|
-
def get_residential_balance(self) -> Dict[str, Any]:
|
|
1228
|
-
"""
|
|
1229
|
-
Get residential proxy balance.
|
|
1230
|
-
|
|
1231
|
-
Uses public_token/public_key (Dashboard -> My account -> API).
|
|
1232
|
-
"""
|
|
1233
|
-
headers = self._build_gateway_headers()
|
|
1234
|
-
|
|
1235
|
-
logger.info("Getting residential proxy balance")
|
|
1236
|
-
|
|
1237
|
-
response = self._api_request_with_retry(
|
|
1238
|
-
"POST",
|
|
1239
|
-
f"{self._gateway_base_url}/getFlowBalance",
|
|
1240
|
-
headers=headers,
|
|
1241
|
-
data={},
|
|
1242
|
-
)
|
|
1243
|
-
response.raise_for_status()
|
|
1244
|
-
|
|
1245
|
-
data = response.json()
|
|
1246
|
-
code = data.get("code")
|
|
1247
|
-
|
|
1248
|
-
if code != 200:
|
|
1249
|
-
msg = extract_error_message(data)
|
|
1250
|
-
raise_for_code(f"Get balance failed: {msg}", code=code, payload=data)
|
|
1251
|
-
|
|
1252
|
-
return data.get("data", {})
|
|
1253
|
-
|
|
1254
|
-
def get_residential_usage(
|
|
1255
|
-
self,
|
|
1256
|
-
start_time: Union[str, int],
|
|
1257
|
-
end_time: Union[str, int],
|
|
1258
|
-
) -> Dict[str, Any]:
|
|
1259
|
-
"""
|
|
1260
|
-
Get residential proxy usage records.
|
|
1261
|
-
|
|
1262
|
-
Uses public_token/public_key (Dashboard -> My account -> API).
|
|
1263
|
-
"""
|
|
1264
|
-
headers = self._build_gateway_headers()
|
|
1265
|
-
payload = {"start_time": str(start_time), "end_time": str(end_time)}
|
|
1266
|
-
|
|
1267
|
-
logger.info(f"Getting residential usage: {start_time} to {end_time}")
|
|
1268
|
-
|
|
1269
739
|
response = self._api_request_with_retry(
|
|
1270
|
-
"
|
|
1271
|
-
f"{self._gateway_base_url}/usageRecord",
|
|
1272
|
-
headers=headers,
|
|
1273
|
-
data=payload,
|
|
740
|
+
"GET", self._usage_stats_url, params=params
|
|
1274
741
|
)
|
|
1275
742
|
response.raise_for_status()
|
|
1276
|
-
|
|
1277
743
|
data = response.json()
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
msg = extract_error_message(data)
|
|
1282
|
-
raise_for_code(f"Get usage failed: {msg}", code=code, payload=data)
|
|
1283
|
-
|
|
1284
|
-
return data.get("data", {})
|
|
744
|
+
if data.get("code") != 200:
|
|
745
|
+
raise_for_code("Usage stats error", code=data.get("code"), payload=data)
|
|
746
|
+
return UsageStatistics.from_dict(data.get("data", data))
|
|
1285
747
|
|
|
1286
748
|
def list_proxy_users(
|
|
1287
749
|
self, proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL
|
|
1288
750
|
) -> ProxyUserList:
|
|
1289
|
-
"""
|
|
1290
|
-
List all proxy users (sub-accounts).
|
|
1291
|
-
|
|
1292
|
-
Args:
|
|
1293
|
-
proxy_type: Proxy type (1=Residential, 2=Unlimited).
|
|
1294
|
-
|
|
1295
|
-
Returns:
|
|
1296
|
-
ProxyUserList with user details.
|
|
1297
|
-
|
|
1298
|
-
Example:
|
|
1299
|
-
>>> users = client.list_proxy_users(proxy_type=ProxyType.RESIDENTIAL)
|
|
1300
|
-
>>> print(f"Total users: {users.user_count}")
|
|
1301
|
-
>>> for user in users.users:
|
|
1302
|
-
... print(f"{user.username}: {user.usage_gb():.2f} GB used")
|
|
1303
|
-
"""
|
|
1304
|
-
|
|
1305
751
|
self._require_public_credentials()
|
|
1306
|
-
|
|
752
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1307
753
|
params = {
|
|
1308
754
|
"token": self.public_token,
|
|
1309
755
|
"key": self.public_key,
|
|
1310
|
-
"proxy_type": str(
|
|
1311
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1312
|
-
),
|
|
756
|
+
"proxy_type": str(pt),
|
|
1313
757
|
}
|
|
1314
|
-
|
|
1315
|
-
logger.info(f"Listing proxy users: type={params['proxy_type']}")
|
|
1316
|
-
|
|
1317
758
|
response = self._api_request_with_retry(
|
|
1318
|
-
"GET",
|
|
1319
|
-
f"{self._proxy_users_url}/user-list",
|
|
1320
|
-
params=params,
|
|
759
|
+
"GET", f"{self._proxy_users_url}/user-list", params=params
|
|
1321
760
|
)
|
|
1322
761
|
response.raise_for_status()
|
|
1323
|
-
|
|
1324
762
|
data = response.json()
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
if code is not None and code != 200:
|
|
1329
|
-
msg = extract_error_message(data)
|
|
1330
|
-
raise_for_code(
|
|
1331
|
-
f"List proxy users error: {msg}", code=code, payload=data
|
|
1332
|
-
)
|
|
1333
|
-
|
|
1334
|
-
user_data = data.get("data", data)
|
|
1335
|
-
return ProxyUserList.from_dict(user_data)
|
|
1336
|
-
|
|
1337
|
-
raise ThordataNetworkError(
|
|
1338
|
-
f"Unexpected proxy users response: {type(data).__name__}",
|
|
1339
|
-
original_error=None,
|
|
1340
|
-
)
|
|
763
|
+
if data.get("code") != 200:
|
|
764
|
+
raise_for_code("List users error", code=data.get("code"), payload=data)
|
|
765
|
+
return ProxyUserList.from_dict(data.get("data", data))
|
|
1341
766
|
|
|
1342
767
|
def create_proxy_user(
|
|
1343
768
|
self,
|
|
@@ -1347,45 +772,18 @@ class ThordataClient:
|
|
|
1347
772
|
traffic_limit: int = 0,
|
|
1348
773
|
status: bool = True,
|
|
1349
774
|
) -> Dict[str, Any]:
|
|
1350
|
-
"""
|
|
1351
|
-
Create a new proxy user (sub-account).
|
|
1352
|
-
|
|
1353
|
-
Args:
|
|
1354
|
-
username: Username for the new user.
|
|
1355
|
-
password: Password for the new user.
|
|
1356
|
-
proxy_type: Proxy type (1=Residential, 2=Unlimited).
|
|
1357
|
-
traffic_limit: Traffic limit in MB (0 = unlimited, min 100).
|
|
1358
|
-
status: Enable/disable user (True/False).
|
|
1359
|
-
|
|
1360
|
-
Returns:
|
|
1361
|
-
API response data.
|
|
1362
|
-
|
|
1363
|
-
Example:
|
|
1364
|
-
>>> result = client.create_proxy_user(
|
|
1365
|
-
... username="subuser1",
|
|
1366
|
-
... password="securepass",
|
|
1367
|
-
... traffic_limit=5120, # 5GB
|
|
1368
|
-
... status=True
|
|
1369
|
-
... )
|
|
1370
|
-
"""
|
|
1371
775
|
self._require_public_credentials()
|
|
1372
|
-
|
|
776
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1373
777
|
headers = build_public_api_headers(
|
|
1374
778
|
self.public_token or "", self.public_key or ""
|
|
1375
779
|
)
|
|
1376
|
-
|
|
1377
780
|
payload = {
|
|
1378
|
-
"proxy_type": str(
|
|
1379
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1380
|
-
),
|
|
781
|
+
"proxy_type": str(pt),
|
|
1381
782
|
"username": username,
|
|
1382
783
|
"password": password,
|
|
1383
784
|
"traffic_limit": str(traffic_limit),
|
|
1384
785
|
"status": "true" if status else "false",
|
|
1385
786
|
}
|
|
1386
|
-
|
|
1387
|
-
logger.info(f"Creating proxy user: {username}")
|
|
1388
|
-
|
|
1389
787
|
response = self._api_request_with_retry(
|
|
1390
788
|
"POST",
|
|
1391
789
|
f"{self._proxy_users_url}/create-user",
|
|
@@ -1393,14 +791,9 @@ class ThordataClient:
|
|
|
1393
791
|
headers=headers,
|
|
1394
792
|
)
|
|
1395
793
|
response.raise_for_status()
|
|
1396
|
-
|
|
1397
794
|
data = response.json()
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
if code != 200:
|
|
1401
|
-
msg = extract_error_message(data)
|
|
1402
|
-
raise_for_code(f"Create proxy user failed: {msg}", code=code, payload=data)
|
|
1403
|
-
|
|
795
|
+
if data.get("code") != 200:
|
|
796
|
+
raise_for_code("Create user failed", code=data.get("code"), payload=data)
|
|
1404
797
|
return data.get("data", {})
|
|
1405
798
|
|
|
1406
799
|
def add_whitelist_ip(
|
|
@@ -1409,296 +802,86 @@ class ThordataClient:
|
|
|
1409
802
|
proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
|
|
1410
803
|
status: bool = True,
|
|
1411
804
|
) -> Dict[str, Any]:
|
|
1412
|
-
"""
|
|
1413
|
-
Add an IP to the whitelist for IP authentication.
|
|
1414
|
-
|
|
1415
|
-
Args:
|
|
1416
|
-
ip: IP address to whitelist.
|
|
1417
|
-
proxy_type: Proxy type (1=Residential, 2=Unlimited, 9=Mobile).
|
|
1418
|
-
status: Enable/disable the IP (True/False).
|
|
1419
|
-
|
|
1420
|
-
Returns:
|
|
1421
|
-
API response data.
|
|
1422
|
-
|
|
1423
|
-
Example:
|
|
1424
|
-
>>> result = client.add_whitelist_ip(
|
|
1425
|
-
... ip="123.45.67.89",
|
|
1426
|
-
... proxy_type=ProxyType.RESIDENTIAL,
|
|
1427
|
-
... status=True
|
|
1428
|
-
... )
|
|
1429
|
-
"""
|
|
1430
805
|
self._require_public_credentials()
|
|
1431
|
-
|
|
806
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1432
807
|
headers = build_public_api_headers(
|
|
1433
808
|
self.public_token or "", self.public_key or ""
|
|
1434
809
|
)
|
|
1435
|
-
|
|
1436
|
-
# Convert ProxyType to int
|
|
1437
|
-
proxy_type_int = (
|
|
1438
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1439
|
-
)
|
|
1440
|
-
|
|
1441
810
|
payload = {
|
|
1442
|
-
"proxy_type": str(
|
|
811
|
+
"proxy_type": str(pt),
|
|
1443
812
|
"ip": ip,
|
|
1444
813
|
"status": "true" if status else "false",
|
|
1445
814
|
}
|
|
1446
|
-
|
|
1447
|
-
logger.info(f"Adding whitelist IP: {ip}")
|
|
1448
|
-
|
|
1449
815
|
response = self._api_request_with_retry(
|
|
1450
|
-
"POST",
|
|
1451
|
-
f"{self._whitelist_url}/add-ip",
|
|
1452
|
-
data=payload,
|
|
1453
|
-
headers=headers,
|
|
816
|
+
"POST", f"{self._whitelist_url}/add-ip", data=payload, headers=headers
|
|
1454
817
|
)
|
|
1455
818
|
response.raise_for_status()
|
|
1456
|
-
|
|
1457
819
|
data = response.json()
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
raise_for_code(f"Add whitelist IP failed: {msg}", code=code, payload=data)
|
|
1463
|
-
|
|
820
|
+
if data.get("code") != 200:
|
|
821
|
+
raise_for_code(
|
|
822
|
+
"Add whitelist IP failed", code=data.get("code"), payload=data
|
|
823
|
+
)
|
|
1464
824
|
return data.get("data", {})
|
|
1465
825
|
|
|
1466
|
-
def list_proxy_servers(
|
|
1467
|
-
self,
|
|
1468
|
-
proxy_type: int,
|
|
1469
|
-
) -> List[ProxyServer]:
|
|
1470
|
-
"""
|
|
1471
|
-
List ISP or Datacenter proxy servers.
|
|
1472
|
-
|
|
1473
|
-
Args:
|
|
1474
|
-
proxy_type: Proxy type (1=ISP, 2=Datacenter).
|
|
1475
|
-
|
|
1476
|
-
Returns:
|
|
1477
|
-
List of ProxyServer objects.
|
|
1478
|
-
|
|
1479
|
-
Example:
|
|
1480
|
-
>>> servers = client.list_proxy_servers(proxy_type=1) # ISP proxies
|
|
1481
|
-
>>> for server in servers:
|
|
1482
|
-
... print(f"{server.ip}:{server.port} - expires: {server.expiration_time}")
|
|
1483
|
-
"""
|
|
1484
|
-
|
|
826
|
+
def list_proxy_servers(self, proxy_type: int) -> List[ProxyServer]:
|
|
1485
827
|
self._require_public_credentials()
|
|
1486
|
-
|
|
1487
828
|
params = {
|
|
1488
829
|
"token": self.public_token,
|
|
1489
830
|
"key": self.public_key,
|
|
1490
831
|
"proxy_type": str(proxy_type),
|
|
1491
832
|
}
|
|
1492
|
-
|
|
1493
|
-
logger.info(f"Listing proxy servers: type={proxy_type}")
|
|
1494
|
-
|
|
1495
833
|
response = self._api_request_with_retry(
|
|
1496
|
-
"GET",
|
|
1497
|
-
self._proxy_list_url,
|
|
1498
|
-
params=params,
|
|
834
|
+
"GET", self._proxy_list_url, params=params
|
|
1499
835
|
)
|
|
1500
836
|
response.raise_for_status()
|
|
1501
|
-
|
|
1502
837
|
data = response.json()
|
|
838
|
+
if data.get("code") != 200:
|
|
839
|
+
raise_for_code(
|
|
840
|
+
"List proxy servers error", code=data.get("code"), payload=data
|
|
841
|
+
)
|
|
1503
842
|
|
|
843
|
+
server_list = []
|
|
1504
844
|
if isinstance(data, dict):
|
|
1505
|
-
code = data.get("code")
|
|
1506
|
-
if code is not None and code != 200:
|
|
1507
|
-
msg = extract_error_message(data)
|
|
1508
|
-
raise_for_code(
|
|
1509
|
-
f"List proxy servers error: {msg}", code=code, payload=data
|
|
1510
|
-
)
|
|
1511
|
-
|
|
1512
|
-
# Extract list from data field
|
|
1513
845
|
server_list = data.get("data", data.get("list", []))
|
|
1514
846
|
elif isinstance(data, list):
|
|
1515
847
|
server_list = data
|
|
1516
|
-
else:
|
|
1517
|
-
raise ThordataNetworkError(
|
|
1518
|
-
f"Unexpected proxy list response: {type(data).__name__}",
|
|
1519
|
-
original_error=None,
|
|
1520
|
-
)
|
|
1521
848
|
|
|
1522
849
|
return [ProxyServer.from_dict(s) for s in server_list]
|
|
1523
850
|
|
|
1524
|
-
def get_isp_regions(self) -> List[Dict[str, Any]]:
|
|
1525
|
-
"""
|
|
1526
|
-
Get available ISP proxy regions.
|
|
1527
|
-
|
|
1528
|
-
Uses public_token/public_key (Dashboard -> My account -> API).
|
|
1529
|
-
"""
|
|
1530
|
-
headers = self._build_gateway_headers()
|
|
1531
|
-
|
|
1532
|
-
logger.info("Getting ISP regions")
|
|
1533
|
-
|
|
1534
|
-
response = self._api_request_with_retry(
|
|
1535
|
-
"POST",
|
|
1536
|
-
f"{self._gateway_base_url}/getRegionIsp",
|
|
1537
|
-
headers=headers,
|
|
1538
|
-
data={},
|
|
1539
|
-
)
|
|
1540
|
-
response.raise_for_status()
|
|
1541
|
-
|
|
1542
|
-
data = response.json()
|
|
1543
|
-
code = data.get("code")
|
|
1544
|
-
|
|
1545
|
-
if code != 200:
|
|
1546
|
-
msg = extract_error_message(data)
|
|
1547
|
-
raise_for_code(f"Get ISP regions failed: {msg}", code=code, payload=data)
|
|
1548
|
-
|
|
1549
|
-
return data.get("data", [])
|
|
1550
|
-
|
|
1551
|
-
def list_isp_proxies(self) -> List[Dict[str, Any]]:
|
|
1552
|
-
"""
|
|
1553
|
-
List ISP proxies.
|
|
1554
|
-
|
|
1555
|
-
Uses public_token/public_key (Dashboard -> My account -> API).
|
|
1556
|
-
"""
|
|
1557
|
-
headers = self._build_gateway_headers()
|
|
1558
|
-
|
|
1559
|
-
logger.info("Listing ISP proxies")
|
|
1560
|
-
|
|
1561
|
-
response = self._api_request_with_retry(
|
|
1562
|
-
"POST",
|
|
1563
|
-
f"{self._gateway_base_url}/queryListIsp",
|
|
1564
|
-
headers=headers,
|
|
1565
|
-
data={},
|
|
1566
|
-
)
|
|
1567
|
-
response.raise_for_status()
|
|
1568
|
-
|
|
1569
|
-
data = response.json()
|
|
1570
|
-
code = data.get("code")
|
|
1571
|
-
|
|
1572
|
-
if code != 200:
|
|
1573
|
-
msg = extract_error_message(data)
|
|
1574
|
-
raise_for_code(f"List ISP proxies failed: {msg}", code=code, payload=data)
|
|
1575
|
-
|
|
1576
|
-
return data.get("data", [])
|
|
1577
|
-
|
|
1578
|
-
def get_wallet_balance(self) -> Dict[str, Any]:
|
|
1579
|
-
"""
|
|
1580
|
-
Get wallet balance for ISP proxies.
|
|
1581
|
-
|
|
1582
|
-
Uses public_token/public_key (Dashboard -> My account -> API).
|
|
1583
|
-
"""
|
|
1584
|
-
headers = self._build_gateway_headers()
|
|
1585
|
-
|
|
1586
|
-
logger.info("Getting wallet balance")
|
|
1587
|
-
|
|
1588
|
-
response = self._api_request_with_retry(
|
|
1589
|
-
"POST",
|
|
1590
|
-
f"{self._gateway_base_url}/getBalance",
|
|
1591
|
-
headers=headers,
|
|
1592
|
-
data={},
|
|
1593
|
-
)
|
|
1594
|
-
response.raise_for_status()
|
|
1595
|
-
|
|
1596
|
-
data = response.json()
|
|
1597
|
-
code = data.get("code")
|
|
1598
|
-
|
|
1599
|
-
if code != 200:
|
|
1600
|
-
msg = extract_error_message(data)
|
|
1601
|
-
raise_for_code(f"Get wallet balance failed: {msg}", code=code, payload=data)
|
|
1602
|
-
|
|
1603
|
-
return data.get("data", {})
|
|
1604
|
-
|
|
1605
851
|
def get_proxy_expiration(
|
|
1606
|
-
self,
|
|
1607
|
-
ips: Union[str, List[str]],
|
|
1608
|
-
proxy_type: int,
|
|
852
|
+
self, ips: Union[str, List[str]], proxy_type: int
|
|
1609
853
|
) -> Dict[str, Any]:
|
|
1610
|
-
"""
|
|
1611
|
-
Get expiration time for specific proxy IPs.
|
|
1612
|
-
|
|
1613
|
-
Args:
|
|
1614
|
-
ips: Single IP or list of IPs to check.
|
|
1615
|
-
proxy_type: Proxy type (1=ISP, 2=Datacenter).
|
|
1616
|
-
|
|
1617
|
-
Returns:
|
|
1618
|
-
Dict with expiration information.
|
|
1619
|
-
|
|
1620
|
-
Example:
|
|
1621
|
-
>>> result = client.get_proxy_expiration("123.45.67.89", proxy_type=1)
|
|
1622
|
-
>>> print(result)
|
|
1623
|
-
"""
|
|
1624
854
|
self._require_public_credentials()
|
|
1625
|
-
|
|
1626
|
-
# Convert list to comma-separated string
|
|
1627
855
|
if isinstance(ips, list):
|
|
1628
856
|
ips = ",".join(ips)
|
|
1629
|
-
|
|
1630
857
|
params = {
|
|
1631
858
|
"token": self.public_token,
|
|
1632
859
|
"key": self.public_key,
|
|
1633
860
|
"proxy_type": str(proxy_type),
|
|
1634
861
|
"ips": ips,
|
|
1635
862
|
}
|
|
1636
|
-
|
|
1637
|
-
logger.info(f"Getting proxy expiration: {ips}")
|
|
1638
|
-
|
|
1639
863
|
response = self._api_request_with_retry(
|
|
1640
|
-
"GET",
|
|
1641
|
-
self._proxy_expiration_url,
|
|
1642
|
-
params=params,
|
|
864
|
+
"GET", self._proxy_expiration_url, params=params
|
|
1643
865
|
)
|
|
1644
866
|
response.raise_for_status()
|
|
1645
|
-
|
|
1646
867
|
data = response.json()
|
|
868
|
+
if data.get("code") != 200:
|
|
869
|
+
raise_for_code("Get expiration error", code=data.get("code"), payload=data)
|
|
870
|
+
return data.get("data", data)
|
|
1647
871
|
|
|
1648
|
-
if isinstance(data, dict):
|
|
1649
|
-
code = data.get("code")
|
|
1650
|
-
if code is not None and code != 200:
|
|
1651
|
-
msg = extract_error_message(data)
|
|
1652
|
-
raise_for_code(f"Get expiration error: {msg}", code=code, payload=data)
|
|
1653
|
-
|
|
1654
|
-
return data.get("data", data)
|
|
1655
|
-
|
|
1656
|
-
return data
|
|
1657
|
-
|
|
1658
|
-
# =========================================================================
|
|
1659
|
-
# Location API Methods (Country/State/City/ASN functions)
|
|
1660
|
-
# =========================================================================
|
|
1661
872
|
def list_countries(
|
|
1662
873
|
self, proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL
|
|
1663
874
|
) -> List[Dict[str, Any]]:
|
|
1664
|
-
|
|
1665
|
-
|
|
1666
|
-
|
|
1667
|
-
Args:
|
|
1668
|
-
proxy_type: 1 for residential, 2 for unlimited.
|
|
1669
|
-
|
|
1670
|
-
Returns:
|
|
1671
|
-
List of country records with 'country_code' and 'country_name'.
|
|
1672
|
-
"""
|
|
1673
|
-
return self._get_locations(
|
|
1674
|
-
"countries",
|
|
1675
|
-
proxy_type=(
|
|
1676
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1677
|
-
),
|
|
1678
|
-
)
|
|
875
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
876
|
+
return self._get_locations("countries", proxy_type=pt)
|
|
1679
877
|
|
|
1680
878
|
def list_states(
|
|
1681
879
|
self,
|
|
1682
880
|
country_code: str,
|
|
1683
881
|
proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
|
|
1684
882
|
) -> List[Dict[str, Any]]:
|
|
1685
|
-
|
|
1686
|
-
|
|
1687
|
-
|
|
1688
|
-
Args:
|
|
1689
|
-
country_code: Country code (e.g., 'US').
|
|
1690
|
-
proxy_type: Proxy type.
|
|
1691
|
-
|
|
1692
|
-
Returns:
|
|
1693
|
-
List of state records.
|
|
1694
|
-
"""
|
|
1695
|
-
return self._get_locations(
|
|
1696
|
-
"states",
|
|
1697
|
-
proxy_type=(
|
|
1698
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1699
|
-
),
|
|
1700
|
-
country_code=country_code,
|
|
1701
|
-
)
|
|
883
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
884
|
+
return self._get_locations("states", proxy_type=pt, country_code=country_code)
|
|
1702
885
|
|
|
1703
886
|
def list_cities(
|
|
1704
887
|
self,
|
|
@@ -1706,26 +889,10 @@ class ThordataClient:
|
|
|
1706
889
|
state_code: Optional[str] = None,
|
|
1707
890
|
proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
|
|
1708
891
|
) -> List[Dict[str, Any]]:
|
|
1709
|
-
|
|
1710
|
-
|
|
1711
|
-
|
|
1712
|
-
Args:
|
|
1713
|
-
country_code: Country code.
|
|
1714
|
-
state_code: Optional state code.
|
|
1715
|
-
proxy_type: Proxy type.
|
|
1716
|
-
|
|
1717
|
-
Returns:
|
|
1718
|
-
List of city records.
|
|
1719
|
-
"""
|
|
1720
|
-
kwargs = {
|
|
1721
|
-
"proxy_type": (
|
|
1722
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1723
|
-
),
|
|
1724
|
-
"country_code": country_code,
|
|
1725
|
-
}
|
|
892
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
893
|
+
kwargs = {"proxy_type": pt, "country_code": country_code}
|
|
1726
894
|
if state_code:
|
|
1727
895
|
kwargs["state_code"] = state_code
|
|
1728
|
-
|
|
1729
896
|
return self._get_locations("cities", **kwargs)
|
|
1730
897
|
|
|
1731
898
|
def list_asn(
|
|
@@ -1733,88 +900,36 @@ class ThordataClient:
|
|
|
1733
900
|
country_code: str,
|
|
1734
901
|
proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
|
|
1735
902
|
) -> List[Dict[str, Any]]:
|
|
1736
|
-
|
|
1737
|
-
|
|
1738
|
-
|
|
1739
|
-
Args:
|
|
1740
|
-
country_code: Country code.
|
|
1741
|
-
proxy_type: Proxy type.
|
|
1742
|
-
|
|
1743
|
-
Returns:
|
|
1744
|
-
List of ASN records.
|
|
1745
|
-
"""
|
|
1746
|
-
return self._get_locations(
|
|
1747
|
-
"asn",
|
|
1748
|
-
proxy_type=(
|
|
1749
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1750
|
-
),
|
|
1751
|
-
country_code=country_code,
|
|
1752
|
-
)
|
|
903
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
904
|
+
return self._get_locations("asn", proxy_type=pt, country_code=country_code)
|
|
1753
905
|
|
|
1754
906
|
def _get_locations(self, endpoint: str, **kwargs: Any) -> List[Dict[str, Any]]:
|
|
1755
|
-
"""Internal method to call locations API."""
|
|
1756
907
|
self._require_public_credentials()
|
|
908
|
+
params = {"token": self.public_token, "key": self.public_key}
|
|
909
|
+
for k, v in kwargs.items():
|
|
910
|
+
params[k] = str(v)
|
|
1757
911
|
|
|
1758
|
-
params = {
|
|
1759
|
-
"token": self.public_token,
|
|
1760
|
-
"key": self.public_key,
|
|
1761
|
-
}
|
|
1762
|
-
|
|
1763
|
-
for key, value in kwargs.items():
|
|
1764
|
-
params[key] = str(value)
|
|
1765
|
-
|
|
1766
|
-
url = f"{self._locations_base_url}/{endpoint}"
|
|
1767
|
-
|
|
1768
|
-
logger.debug(f"Locations API request: {url}")
|
|
1769
|
-
|
|
1770
|
-
# Use requests.get directly (no proxy needed for this API)
|
|
1771
912
|
response = self._api_request_with_retry(
|
|
1772
|
-
"GET",
|
|
1773
|
-
url,
|
|
1774
|
-
params=params,
|
|
913
|
+
"GET", f"{self._locations_base_url}/{endpoint}", params=params
|
|
1775
914
|
)
|
|
1776
915
|
response.raise_for_status()
|
|
1777
|
-
|
|
1778
916
|
data = response.json()
|
|
1779
|
-
|
|
1780
917
|
if isinstance(data, dict):
|
|
1781
|
-
|
|
1782
|
-
|
|
1783
|
-
msg = data.get("msg", "")
|
|
1784
|
-
raise RuntimeError(
|
|
1785
|
-
f"Locations API error ({endpoint}): code={code}, msg={msg}"
|
|
1786
|
-
)
|
|
918
|
+
if data.get("code") != 200:
|
|
919
|
+
raise RuntimeError(f"Locations error: {data.get('msg')}")
|
|
1787
920
|
return data.get("data") or []
|
|
921
|
+
return data if isinstance(data, list) else []
|
|
1788
922
|
|
|
1789
|
-
if isinstance(data, list):
|
|
1790
|
-
return data
|
|
1791
|
-
|
|
1792
|
-
return []
|
|
1793
|
-
|
|
1794
|
-
# =========================================================================
|
|
1795
|
-
# Helper Methods (Internal utility functions)
|
|
1796
|
-
# =========================================================================
|
|
1797
923
|
def _require_public_credentials(self) -> None:
|
|
1798
|
-
"""Ensure public API credentials are available."""
|
|
1799
924
|
if not self.public_token or not self.public_key:
|
|
1800
925
|
raise ThordataConfigError(
|
|
1801
|
-
"public_token and public_key are required for this operation.
|
|
1802
|
-
"Please provide them when initializing ThordataClient."
|
|
926
|
+
"public_token and public_key are required for this operation."
|
|
1803
927
|
)
|
|
1804
928
|
|
|
1805
929
|
def _get_proxy_endpoint_overrides(
|
|
1806
930
|
self, product: ProxyProduct
|
|
1807
931
|
) -> tuple[Optional[str], Optional[int], str]:
|
|
1808
|
-
|
|
1809
|
-
Read proxy endpoint overrides from env.
|
|
1810
|
-
|
|
1811
|
-
Priority:
|
|
1812
|
-
1) THORDATA_<PRODUCT>_PROXY_HOST/PORT/PROTOCOL
|
|
1813
|
-
2) THORDATA_PROXY_HOST/PORT/PROTOCOL
|
|
1814
|
-
3) defaults (host/port None => ProxyConfig will use its product defaults)
|
|
1815
|
-
"""
|
|
1816
|
-
prefix = product.value.upper() # RESIDENTIAL / DATACENTER / MOBILE / ISP
|
|
1817
|
-
|
|
932
|
+
prefix = product.value.upper()
|
|
1818
933
|
host = os.getenv(f"THORDATA_{prefix}_PROXY_HOST") or os.getenv(
|
|
1819
934
|
"THORDATA_PROXY_HOST"
|
|
1820
935
|
)
|
|
@@ -1826,184 +941,37 @@ class ThordataClient:
|
|
|
1826
941
|
or os.getenv("THORDATA_PROXY_PROTOCOL")
|
|
1827
942
|
or "http"
|
|
1828
943
|
)
|
|
1829
|
-
|
|
1830
|
-
port: Optional[int] = None
|
|
1831
|
-
if port_raw:
|
|
1832
|
-
try:
|
|
1833
|
-
port = int(port_raw)
|
|
1834
|
-
except ValueError:
|
|
1835
|
-
port = None
|
|
1836
|
-
|
|
944
|
+
port = int(port_raw) if port_raw and port_raw.isdigit() else None
|
|
1837
945
|
return host or None, port, protocol
|
|
1838
946
|
|
|
1839
947
|
def _get_default_proxy_config_from_env(self) -> Optional[ProxyConfig]:
|
|
1840
|
-
|
|
1841
|
-
|
|
1842
|
-
|
|
1843
|
-
|
|
1844
|
-
|
|
1845
|
-
|
|
1846
|
-
|
|
1847
|
-
|
|
1848
|
-
|
|
1849
|
-
|
|
1850
|
-
|
|
1851
|
-
|
|
1852
|
-
|
|
1853
|
-
|
|
1854
|
-
|
|
1855
|
-
|
|
1856
|
-
|
|
1857
|
-
password=p,
|
|
1858
|
-
product=ProxyProduct.RESIDENTIAL,
|
|
1859
|
-
host=host,
|
|
1860
|
-
port=port,
|
|
1861
|
-
protocol=protocol,
|
|
1862
|
-
)
|
|
1863
|
-
|
|
1864
|
-
# Datacenter
|
|
1865
|
-
u = os.getenv("THORDATA_DATACENTER_USERNAME")
|
|
1866
|
-
p = os.getenv("THORDATA_DATACENTER_PASSWORD")
|
|
1867
|
-
if u and p:
|
|
1868
|
-
host, port, protocol = self._get_proxy_endpoint_overrides(
|
|
1869
|
-
ProxyProduct.DATACENTER
|
|
1870
|
-
)
|
|
1871
|
-
return ProxyConfig(
|
|
1872
|
-
username=u,
|
|
1873
|
-
password=p,
|
|
1874
|
-
product=ProxyProduct.DATACENTER,
|
|
1875
|
-
host=host,
|
|
1876
|
-
port=port,
|
|
1877
|
-
protocol=protocol,
|
|
1878
|
-
)
|
|
1879
|
-
|
|
1880
|
-
# Mobile
|
|
1881
|
-
u = os.getenv("THORDATA_MOBILE_USERNAME")
|
|
1882
|
-
p = os.getenv("THORDATA_MOBILE_PASSWORD")
|
|
1883
|
-
if u and p:
|
|
1884
|
-
host, port, protocol = self._get_proxy_endpoint_overrides(
|
|
1885
|
-
ProxyProduct.MOBILE
|
|
1886
|
-
)
|
|
1887
|
-
return ProxyConfig(
|
|
1888
|
-
username=u,
|
|
1889
|
-
password=p,
|
|
1890
|
-
product=ProxyProduct.MOBILE,
|
|
1891
|
-
host=host,
|
|
1892
|
-
port=port,
|
|
1893
|
-
protocol=protocol,
|
|
1894
|
-
)
|
|
1895
|
-
|
|
1896
|
-
return None
|
|
1897
|
-
|
|
1898
|
-
def _build_gateway_headers(self) -> Dict[str, str]:
|
|
1899
|
-
"""
|
|
1900
|
-
Build headers for legacy gateway-style endpoints.
|
|
1901
|
-
|
|
1902
|
-
IMPORTANT:
|
|
1903
|
-
- SDK does NOT expose "sign/apiKey" as a separate credential model.
|
|
1904
|
-
- Values ALWAYS come from public_token/public_key.
|
|
1905
|
-
- Some backend endpoints may still expect header field names "sign" and "apiKey".
|
|
1906
|
-
"""
|
|
1907
|
-
self._require_public_credentials()
|
|
1908
|
-
return {
|
|
1909
|
-
"sign": self.public_token or "",
|
|
1910
|
-
"apiKey": self.public_key or "",
|
|
1911
|
-
"Content-Type": "application/x-www-form-urlencoded",
|
|
1912
|
-
}
|
|
1913
|
-
|
|
1914
|
-
def _proxy_request_with_proxy_manager(
|
|
1915
|
-
self,
|
|
1916
|
-
method: str,
|
|
1917
|
-
url: str,
|
|
1918
|
-
*,
|
|
1919
|
-
proxy_config: ProxyConfig,
|
|
1920
|
-
timeout: int,
|
|
1921
|
-
headers: Optional[Dict[str, str]] = None,
|
|
1922
|
-
params: Optional[Dict[str, Any]] = None,
|
|
1923
|
-
data: Any = None,
|
|
1924
|
-
) -> requests.Response:
|
|
1925
|
-
"""
|
|
1926
|
-
Proxy Network request implemented via urllib3.ProxyManager.
|
|
1927
|
-
|
|
1928
|
-
This is required to reliably support HTTPS proxy endpoints like:
|
|
1929
|
-
https://<endpoint>.pr.thordata.net:9999
|
|
1930
|
-
"""
|
|
1931
|
-
# Build final URL (include query params)
|
|
1932
|
-
req = requests.Request(method=method.upper(), url=url, params=params)
|
|
1933
|
-
prepped = self._proxy_session.prepare_request(req)
|
|
1934
|
-
final_url = prepped.url or url
|
|
1935
|
-
|
|
1936
|
-
proxy_url = proxy_config.build_proxy_endpoint()
|
|
1937
|
-
proxy_headers = urllib3.make_headers(
|
|
1938
|
-
proxy_basic_auth=proxy_config.build_proxy_basic_auth()
|
|
1939
|
-
)
|
|
1940
|
-
|
|
1941
|
-
pm = urllib3.ProxyManager(
|
|
1942
|
-
proxy_url,
|
|
1943
|
-
proxy_headers=proxy_headers,
|
|
1944
|
-
proxy_ssl_context=(
|
|
1945
|
-
ssl.create_default_context()
|
|
1946
|
-
if proxy_url.startswith("https://")
|
|
1947
|
-
else None
|
|
1948
|
-
),
|
|
1949
|
-
)
|
|
1950
|
-
|
|
1951
|
-
# Encode form data if dict
|
|
1952
|
-
body = None
|
|
1953
|
-
req_headers = dict(headers or {})
|
|
1954
|
-
if data is not None:
|
|
1955
|
-
if isinstance(data, dict):
|
|
1956
|
-
# form-urlencoded
|
|
1957
|
-
body = urlencode({k: str(v) for k, v in data.items()})
|
|
1958
|
-
req_headers.setdefault(
|
|
1959
|
-
"Content-Type", "application/x-www-form-urlencoded"
|
|
948
|
+
for prod in [
|
|
949
|
+
ProxyProduct.RESIDENTIAL,
|
|
950
|
+
ProxyProduct.DATACENTER,
|
|
951
|
+
ProxyProduct.MOBILE,
|
|
952
|
+
]:
|
|
953
|
+
prefix = prod.value.upper()
|
|
954
|
+
u = os.getenv(f"THORDATA_{prefix}_USERNAME")
|
|
955
|
+
p = os.getenv(f"THORDATA_{prefix}_PASSWORD")
|
|
956
|
+
if u and p:
|
|
957
|
+
h, port, proto = self._get_proxy_endpoint_overrides(prod)
|
|
958
|
+
return ProxyConfig(
|
|
959
|
+
username=u,
|
|
960
|
+
password=p,
|
|
961
|
+
product=prod,
|
|
962
|
+
host=h,
|
|
963
|
+
port=port,
|
|
964
|
+
protocol=proto,
|
|
1960
965
|
)
|
|
1961
|
-
|
|
1962
|
-
body = data
|
|
1963
|
-
|
|
1964
|
-
http_resp = pm.request(
|
|
1965
|
-
method.upper(),
|
|
1966
|
-
final_url,
|
|
1967
|
-
body=body,
|
|
1968
|
-
headers=req_headers or None,
|
|
1969
|
-
timeout=urllib3.Timeout(connect=timeout, read=timeout),
|
|
1970
|
-
retries=False,
|
|
1971
|
-
preload_content=True,
|
|
1972
|
-
)
|
|
1973
|
-
|
|
1974
|
-
# Convert urllib3 response -> requests.Response (keep your API stable)
|
|
1975
|
-
r = requests.Response()
|
|
1976
|
-
r.status_code = int(getattr(http_resp, "status", 0) or 0)
|
|
1977
|
-
r._content = http_resp.data or b""
|
|
1978
|
-
r.url = final_url
|
|
1979
|
-
r.headers = requests.structures.CaseInsensitiveDict(
|
|
1980
|
-
dict(http_resp.headers or {})
|
|
1981
|
-
)
|
|
1982
|
-
return r
|
|
1983
|
-
|
|
1984
|
-
def _request_with_retry(
|
|
1985
|
-
self, method: str, url: str, **kwargs: Any
|
|
1986
|
-
) -> requests.Response:
|
|
1987
|
-
"""Make a request with automatic retry."""
|
|
1988
|
-
kwargs.setdefault("timeout", self._default_timeout)
|
|
1989
|
-
|
|
1990
|
-
@with_retry(self._retry_config)
|
|
1991
|
-
def _do_request() -> requests.Response:
|
|
1992
|
-
return self._proxy_session.request(method, url, **kwargs)
|
|
1993
|
-
|
|
1994
|
-
try:
|
|
1995
|
-
return _do_request()
|
|
1996
|
-
except requests.Timeout as e:
|
|
1997
|
-
raise ThordataTimeoutError(
|
|
1998
|
-
f"Request timed out: {e}", original_error=e
|
|
1999
|
-
) from e
|
|
2000
|
-
except requests.RequestException as e:
|
|
2001
|
-
raise ThordataNetworkError(f"Request failed: {e}", original_error=e) from e
|
|
966
|
+
return None
|
|
2002
967
|
|
|
2003
968
|
def close(self) -> None:
|
|
2004
|
-
"""Close the underlying session."""
|
|
2005
969
|
self._proxy_session.close()
|
|
2006
970
|
self._api_session.close()
|
|
971
|
+
# Clean up connection pools
|
|
972
|
+
for pm in self._proxy_managers.values():
|
|
973
|
+
pm.clear()
|
|
974
|
+
self._proxy_managers.clear()
|
|
2007
975
|
|
|
2008
976
|
def __enter__(self) -> ThordataClient:
|
|
2009
977
|
return self
|