thordata-sdk 0.7.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thordata/__init__.py +13 -1
- thordata/_example_utils.py +76 -0
- thordata/_utils.py +46 -3
- thordata/async_client.py +863 -23
- thordata/client.py +1023 -51
- thordata/enums.py +3 -3
- thordata/exceptions.py +16 -5
- thordata/models.py +351 -7
- thordata/retry.py +6 -4
- thordata_sdk-1.0.0.dist-info/METADATA +208 -0
- thordata_sdk-1.0.0.dist-info/RECORD +15 -0
- thordata/parameters.py +0 -53
- thordata_sdk-0.7.0.dist-info/METADATA +0 -1053
- thordata_sdk-0.7.0.dist-info/RECORD +0 -15
- {thordata_sdk-0.7.0.dist-info → thordata_sdk-1.0.0.dist-info}/WHEEL +0 -0
- {thordata_sdk-0.7.0.dist-info → thordata_sdk-1.0.0.dist-info}/licenses/LICENSE +0 -0
- {thordata_sdk-0.7.0.dist-info → thordata_sdk-1.0.0.dist-info}/top_level.txt +0 -0
thordata/client.py
CHANGED
|
@@ -25,13 +25,18 @@ from __future__ import annotations
|
|
|
25
25
|
|
|
26
26
|
import logging
|
|
27
27
|
import os
|
|
28
|
+
import ssl
|
|
29
|
+
from datetime import date
|
|
28
30
|
from typing import Any, Dict, List, Optional, Union
|
|
31
|
+
from urllib.parse import urlencode
|
|
29
32
|
|
|
30
33
|
import requests
|
|
34
|
+
import urllib3
|
|
31
35
|
|
|
32
36
|
from . import __version__ as _sdk_version
|
|
33
37
|
from ._utils import (
|
|
34
38
|
build_auth_headers,
|
|
39
|
+
build_builder_headers,
|
|
35
40
|
build_public_api_headers,
|
|
36
41
|
build_user_agent,
|
|
37
42
|
decode_base64_image,
|
|
@@ -46,11 +51,17 @@ from .exceptions import (
|
|
|
46
51
|
raise_for_code,
|
|
47
52
|
)
|
|
48
53
|
from .models import (
|
|
54
|
+
CommonSettings,
|
|
49
55
|
ProxyConfig,
|
|
50
56
|
ProxyProduct,
|
|
57
|
+
ProxyServer,
|
|
58
|
+
ProxyUserList,
|
|
51
59
|
ScraperTaskConfig,
|
|
52
60
|
SerpRequest,
|
|
53
61
|
UniversalScrapeRequest,
|
|
62
|
+
UsageStatistics,
|
|
63
|
+
VideoTaskConfig,
|
|
64
|
+
WhitelistProxyConfig,
|
|
54
65
|
)
|
|
55
66
|
from .retry import RetryConfig, with_retry
|
|
56
67
|
|
|
@@ -87,8 +98,8 @@ class ThordataClient:
|
|
|
87
98
|
# API Endpoints
|
|
88
99
|
BASE_URL = "https://scraperapi.thordata.com"
|
|
89
100
|
UNIVERSAL_URL = "https://universalapi.thordata.com"
|
|
90
|
-
API_URL = "https://
|
|
91
|
-
LOCATIONS_URL = "https://
|
|
101
|
+
API_URL = "https://openapi.thordata.com/api/web-scraper-api"
|
|
102
|
+
LOCATIONS_URL = "https://openapi.thordata.com/api/locations"
|
|
92
103
|
|
|
93
104
|
def __init__(
|
|
94
105
|
self,
|
|
@@ -98,7 +109,9 @@ class ThordataClient:
|
|
|
98
109
|
proxy_host: str = "pr.thordata.net",
|
|
99
110
|
proxy_port: int = 9999,
|
|
100
111
|
timeout: int = 30,
|
|
112
|
+
api_timeout: int = 60,
|
|
101
113
|
retry_config: Optional[RetryConfig] = None,
|
|
114
|
+
auth_mode: str = "bearer",
|
|
102
115
|
scraperapi_base_url: Optional[str] = None,
|
|
103
116
|
universalapi_base_url: Optional[str] = None,
|
|
104
117
|
web_scraper_api_base_url: Optional[str] = None,
|
|
@@ -108,6 +121,7 @@ class ThordataClient:
|
|
|
108
121
|
if not scraper_token:
|
|
109
122
|
raise ThordataConfigError("scraper_token is required")
|
|
110
123
|
|
|
124
|
+
# Core credentials
|
|
111
125
|
self.scraper_token = scraper_token
|
|
112
126
|
self.public_token = public_token
|
|
113
127
|
self.public_key = public_key
|
|
@@ -115,17 +129,22 @@ class ThordataClient:
|
|
|
115
129
|
# Proxy configuration
|
|
116
130
|
self._proxy_host = proxy_host
|
|
117
131
|
self._proxy_port = proxy_port
|
|
132
|
+
|
|
133
|
+
# Timeout configuration
|
|
118
134
|
self._default_timeout = timeout
|
|
135
|
+
self._api_timeout = api_timeout
|
|
119
136
|
|
|
120
137
|
# Retry configuration
|
|
121
138
|
self._retry_config = retry_config or RetryConfig()
|
|
122
139
|
|
|
123
|
-
#
|
|
124
|
-
self.
|
|
125
|
-
|
|
126
|
-
|
|
140
|
+
# Authentication mode (for scraping APIs)
|
|
141
|
+
self._auth_mode = auth_mode.lower()
|
|
142
|
+
if self._auth_mode not in ("bearer", "header_token"):
|
|
143
|
+
raise ThordataConfigError(
|
|
144
|
+
f"Invalid auth_mode: {auth_mode}. Must be 'bearer' or 'header_token'."
|
|
145
|
+
)
|
|
127
146
|
|
|
128
|
-
#
|
|
147
|
+
# NOTE:
|
|
129
148
|
# - _proxy_session: used for proxy network traffic to target sites
|
|
130
149
|
# - _api_session: used for Thordata APIs (SERP/Universal/Tasks/Locations)
|
|
131
150
|
#
|
|
@@ -133,14 +152,9 @@ class ThordataClient:
|
|
|
133
152
|
# so developers can rely on system proxy settings (e.g., Clash) via env vars.
|
|
134
153
|
self._proxy_session = requests.Session()
|
|
135
154
|
self._proxy_session.trust_env = False
|
|
136
|
-
self._proxy_session.proxies = {
|
|
137
|
-
"http": self._default_proxy_url,
|
|
138
|
-
"https": self._default_proxy_url,
|
|
139
|
-
}
|
|
140
155
|
|
|
141
156
|
self._api_session = requests.Session()
|
|
142
157
|
self._api_session.trust_env = True
|
|
143
|
-
|
|
144
158
|
self._api_session.headers.update(
|
|
145
159
|
{"User-Agent": build_user_agent(_sdk_version, "requests")}
|
|
146
160
|
)
|
|
@@ -170,17 +184,49 @@ class ThordataClient:
|
|
|
170
184
|
or self.LOCATIONS_URL
|
|
171
185
|
).rstrip("/")
|
|
172
186
|
|
|
187
|
+
# These URLs exist in your codebase; keep them for now (even if your org later migrates fully to openapi)
|
|
188
|
+
gateway_base = os.getenv(
|
|
189
|
+
"THORDATA_GATEWAY_BASE_URL", "https://api.thordata.com/api/gateway"
|
|
190
|
+
)
|
|
191
|
+
child_base = os.getenv(
|
|
192
|
+
"THORDATA_CHILD_BASE_URL", "https://api.thordata.com/api/child"
|
|
193
|
+
)
|
|
194
|
+
self._gateway_base_url = gateway_base
|
|
195
|
+
self._child_base_url = child_base
|
|
196
|
+
|
|
173
197
|
self._serp_url = f"{scraperapi_base}/request"
|
|
174
198
|
self._builder_url = f"{scraperapi_base}/builder"
|
|
199
|
+
self._video_builder_url = f"{scraperapi_base}/video_builder"
|
|
175
200
|
self._universal_url = f"{universalapi_base}/request"
|
|
201
|
+
|
|
176
202
|
self._status_url = f"{web_scraper_api_base}/tasks-status"
|
|
177
203
|
self._download_url = f"{web_scraper_api_base}/tasks-download"
|
|
204
|
+
self._list_url = f"{web_scraper_api_base}/tasks-list"
|
|
205
|
+
|
|
178
206
|
self._locations_base_url = locations_base
|
|
179
207
|
|
|
208
|
+
# These 2 lines keep your existing behavior (derive account endpoints from locations_base)
|
|
209
|
+
self._usage_stats_url = (
|
|
210
|
+
f"{locations_base.replace('/locations', '')}/account/usage-statistics"
|
|
211
|
+
)
|
|
212
|
+
self._proxy_users_url = (
|
|
213
|
+
f"{locations_base.replace('/locations', '')}/proxy-users"
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
whitelist_base = os.getenv(
|
|
217
|
+
"THORDATA_WHITELIST_BASE_URL", "https://api.thordata.com/api"
|
|
218
|
+
)
|
|
219
|
+
self._whitelist_url = f"{whitelist_base}/whitelisted-ips"
|
|
220
|
+
|
|
221
|
+
proxy_api_base = os.getenv(
|
|
222
|
+
"THORDATA_PROXY_API_BASE_URL", "https://api.thordata.com/api"
|
|
223
|
+
)
|
|
224
|
+
self._proxy_list_url = f"{proxy_api_base}/proxy/proxy-list"
|
|
225
|
+
self._proxy_expiration_url = f"{proxy_api_base}/proxy/expiration-time"
|
|
226
|
+
|
|
180
227
|
# =========================================================================
|
|
181
|
-
# Proxy Network Methods
|
|
228
|
+
# Proxy Network Methods (Pure proxy network request functions)
|
|
182
229
|
# =========================================================================
|
|
183
|
-
|
|
184
230
|
def get(
|
|
185
231
|
self,
|
|
186
232
|
url: str,
|
|
@@ -219,11 +265,37 @@ class ThordataClient:
|
|
|
219
265
|
|
|
220
266
|
timeout = timeout or self._default_timeout
|
|
221
267
|
|
|
222
|
-
if proxy_config:
|
|
223
|
-
|
|
224
|
-
|
|
268
|
+
if proxy_config is None:
|
|
269
|
+
proxy_config = self._get_default_proxy_config_from_env()
|
|
270
|
+
|
|
271
|
+
if proxy_config is None:
|
|
272
|
+
raise ThordataConfigError(
|
|
273
|
+
"Proxy credentials are missing. "
|
|
274
|
+
"Pass proxy_config=ProxyConfig(username=..., password=..., product=...) "
|
|
275
|
+
"or set THORDATA_RESIDENTIAL_USERNAME/THORDATA_RESIDENTIAL_PASSWORD (or DATACENTER/MOBILE)."
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
kwargs["proxies"] = proxy_config.to_proxies_dict()
|
|
279
|
+
|
|
280
|
+
@with_retry(self._retry_config)
|
|
281
|
+
def _do() -> requests.Response:
|
|
282
|
+
return self._proxy_request_with_proxy_manager(
|
|
283
|
+
"GET",
|
|
284
|
+
url,
|
|
285
|
+
proxy_config=proxy_config,
|
|
286
|
+
timeout=timeout,
|
|
287
|
+
headers=kwargs.pop("headers", None),
|
|
288
|
+
params=kwargs.pop("params", None),
|
|
289
|
+
)
|
|
225
290
|
|
|
226
|
-
|
|
291
|
+
try:
|
|
292
|
+
return _do()
|
|
293
|
+
except requests.Timeout as e:
|
|
294
|
+
raise ThordataTimeoutError(
|
|
295
|
+
f"Request timed out: {e}", original_error=e
|
|
296
|
+
) from e
|
|
297
|
+
except Exception as e:
|
|
298
|
+
raise ThordataNetworkError(f"Request failed: {e}", original_error=e) from e
|
|
227
299
|
|
|
228
300
|
def post(
|
|
229
301
|
self,
|
|
@@ -249,14 +321,43 @@ class ThordataClient:
|
|
|
249
321
|
|
|
250
322
|
timeout = timeout or self._default_timeout
|
|
251
323
|
|
|
252
|
-
if proxy_config:
|
|
253
|
-
|
|
254
|
-
|
|
324
|
+
if proxy_config is None:
|
|
325
|
+
proxy_config = self._get_default_proxy_config_from_env()
|
|
326
|
+
|
|
327
|
+
if proxy_config is None:
|
|
328
|
+
raise ThordataConfigError(
|
|
329
|
+
"Proxy credentials are missing. "
|
|
330
|
+
"Pass proxy_config=ProxyConfig(username=..., password=..., product=...) "
|
|
331
|
+
"or set THORDATA_RESIDENTIAL_USERNAME/THORDATA_RESIDENTIAL_PASSWORD (or DATACENTER/MOBILE)."
|
|
332
|
+
)
|
|
255
333
|
|
|
256
|
-
|
|
334
|
+
kwargs["proxies"] = proxy_config.to_proxies_dict()
|
|
335
|
+
|
|
336
|
+
@with_retry(self._retry_config)
|
|
337
|
+
def _do() -> requests.Response:
|
|
338
|
+
return self._proxy_request_with_proxy_manager(
|
|
339
|
+
"POST",
|
|
340
|
+
url,
|
|
341
|
+
proxy_config=proxy_config,
|
|
342
|
+
timeout=timeout,
|
|
343
|
+
headers=kwargs.pop("headers", None),
|
|
344
|
+
params=kwargs.pop("params", None),
|
|
345
|
+
data=kwargs.pop("data", None),
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
try:
|
|
349
|
+
return _do()
|
|
350
|
+
except requests.Timeout as e:
|
|
351
|
+
raise ThordataTimeoutError(
|
|
352
|
+
f"Request timed out: {e}", original_error=e
|
|
353
|
+
) from e
|
|
354
|
+
except Exception as e:
|
|
355
|
+
raise ThordataNetworkError(f"Request failed: {e}", original_error=e) from e
|
|
257
356
|
|
|
258
357
|
def build_proxy_url(
|
|
259
358
|
self,
|
|
359
|
+
username: str, # Required
|
|
360
|
+
password: str, # Required
|
|
260
361
|
*,
|
|
261
362
|
country: Optional[str] = None,
|
|
262
363
|
state: Optional[str] = None,
|
|
@@ -288,8 +389,8 @@ class ThordataClient:
|
|
|
288
389
|
>>> requests.get("https://example.com", proxies=proxies)
|
|
289
390
|
"""
|
|
290
391
|
config = ProxyConfig(
|
|
291
|
-
username=
|
|
292
|
-
password=
|
|
392
|
+
username=username,
|
|
393
|
+
password=password,
|
|
293
394
|
host=self._proxy_host,
|
|
294
395
|
port=self._proxy_port,
|
|
295
396
|
product=product,
|
|
@@ -302,9 +403,44 @@ class ThordataClient:
|
|
|
302
403
|
return config.build_proxy_url()
|
|
303
404
|
|
|
304
405
|
# =========================================================================
|
|
305
|
-
#
|
|
406
|
+
# Internal API Request Retry Helper (For all API calls)
|
|
306
407
|
# =========================================================================
|
|
408
|
+
def _api_request_with_retry(
|
|
409
|
+
self,
|
|
410
|
+
method: str,
|
|
411
|
+
url: str,
|
|
412
|
+
*,
|
|
413
|
+
data: Optional[Dict[str, Any]] = None,
|
|
414
|
+
headers: Optional[Dict[str, str]] = None,
|
|
415
|
+
params: Optional[Dict[str, Any]] = None,
|
|
416
|
+
) -> requests.Response:
|
|
417
|
+
"""Make an API request with automatic retry on transient failures."""
|
|
418
|
+
|
|
419
|
+
@with_retry(self._retry_config)
|
|
420
|
+
def _do_request() -> requests.Response:
|
|
421
|
+
return self._api_session.request(
|
|
422
|
+
method,
|
|
423
|
+
url,
|
|
424
|
+
data=data,
|
|
425
|
+
headers=headers,
|
|
426
|
+
params=params,
|
|
427
|
+
timeout=self._api_timeout,
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
try:
|
|
431
|
+
return _do_request()
|
|
432
|
+
except requests.Timeout as e:
|
|
433
|
+
raise ThordataTimeoutError(
|
|
434
|
+
f"API request timed out: {e}", original_error=e
|
|
435
|
+
) from e
|
|
436
|
+
except requests.RequestException as e:
|
|
437
|
+
raise ThordataNetworkError(
|
|
438
|
+
f"API request failed: {e}", original_error=e
|
|
439
|
+
) from e
|
|
307
440
|
|
|
441
|
+
# =========================================================================
|
|
442
|
+
# SERP API Methods (Search Engine Results Page functions)
|
|
443
|
+
# =========================================================================
|
|
308
444
|
def serp_search(
|
|
309
445
|
self,
|
|
310
446
|
query: str,
|
|
@@ -375,16 +511,18 @@ class ThordataClient:
|
|
|
375
511
|
)
|
|
376
512
|
|
|
377
513
|
payload = request.to_payload()
|
|
378
|
-
headers = build_auth_headers(self.scraper_token)
|
|
514
|
+
headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
|
|
379
515
|
|
|
380
|
-
logger.info(
|
|
516
|
+
logger.info(
|
|
517
|
+
f"SERP Search: {engine_str} - {query[:50]}{'...' if len(query) > 50 else ''}"
|
|
518
|
+
)
|
|
381
519
|
|
|
382
520
|
try:
|
|
383
|
-
response = self.
|
|
521
|
+
response = self._api_request_with_retry(
|
|
522
|
+
"POST",
|
|
384
523
|
self._serp_url,
|
|
385
524
|
data=payload,
|
|
386
525
|
headers=headers,
|
|
387
|
-
timeout=60,
|
|
388
526
|
)
|
|
389
527
|
response.raise_for_status()
|
|
390
528
|
|
|
@@ -445,16 +583,18 @@ class ThordataClient:
|
|
|
445
583
|
>>> results = client.serp_search_advanced(request)
|
|
446
584
|
"""
|
|
447
585
|
payload = request.to_payload()
|
|
448
|
-
headers = build_auth_headers(self.scraper_token)
|
|
586
|
+
headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
|
|
449
587
|
|
|
450
|
-
logger.info(
|
|
588
|
+
logger.info(
|
|
589
|
+
f"SERP Advanced Search: {request.engine} - {request.query[:50]}{'...' if len(request.query) > 50 else ''}"
|
|
590
|
+
)
|
|
451
591
|
|
|
452
592
|
try:
|
|
453
|
-
response = self.
|
|
593
|
+
response = self._api_request_with_retry(
|
|
594
|
+
"POST",
|
|
454
595
|
self._serp_url,
|
|
455
596
|
data=payload,
|
|
456
597
|
headers=headers,
|
|
457
|
-
timeout=60,
|
|
458
598
|
)
|
|
459
599
|
response.raise_for_status()
|
|
460
600
|
|
|
@@ -487,9 +627,8 @@ class ThordataClient:
|
|
|
487
627
|
) from e
|
|
488
628
|
|
|
489
629
|
# =========================================================================
|
|
490
|
-
# Universal Scraping API (Web Unlocker)
|
|
630
|
+
# Universal Scraping API Methods (Web Unlocker functions)
|
|
491
631
|
# =========================================================================
|
|
492
|
-
|
|
493
632
|
def universal_scrape(
|
|
494
633
|
self,
|
|
495
634
|
url: str,
|
|
@@ -559,18 +698,18 @@ class ThordataClient:
|
|
|
559
698
|
HTML string or PNG bytes.
|
|
560
699
|
"""
|
|
561
700
|
payload = request.to_payload()
|
|
562
|
-
headers = build_auth_headers(self.scraper_token)
|
|
701
|
+
headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
|
|
563
702
|
|
|
564
703
|
logger.info(
|
|
565
704
|
f"Universal Scrape: {request.url} (format: {request.output_format})"
|
|
566
705
|
)
|
|
567
706
|
|
|
568
707
|
try:
|
|
569
|
-
response = self.
|
|
708
|
+
response = self._api_request_with_retry(
|
|
709
|
+
"POST",
|
|
570
710
|
self._universal_url,
|
|
571
711
|
data=payload,
|
|
572
712
|
headers=headers,
|
|
573
|
-
timeout=60,
|
|
574
713
|
)
|
|
575
714
|
response.raise_for_status()
|
|
576
715
|
|
|
@@ -619,9 +758,8 @@ class ThordataClient:
|
|
|
619
758
|
return str(resp_json)
|
|
620
759
|
|
|
621
760
|
# =========================================================================
|
|
622
|
-
# Web Scraper API (
|
|
761
|
+
# Web Scraper API Methods (Only async task management functions)
|
|
623
762
|
# =========================================================================
|
|
624
|
-
|
|
625
763
|
def create_scraper_task(
|
|
626
764
|
self,
|
|
627
765
|
file_name: str,
|
|
@@ -673,17 +811,25 @@ class ThordataClient:
|
|
|
673
811
|
Returns:
|
|
674
812
|
The created task_id.
|
|
675
813
|
"""
|
|
814
|
+
self._require_public_credentials()
|
|
815
|
+
|
|
676
816
|
payload = config.to_payload()
|
|
677
|
-
|
|
817
|
+
|
|
818
|
+
# Builder needs 3 headers: token, key, Authorization Bearer
|
|
819
|
+
headers = build_builder_headers(
|
|
820
|
+
self.scraper_token,
|
|
821
|
+
self.public_token or "",
|
|
822
|
+
self.public_key or "",
|
|
823
|
+
)
|
|
678
824
|
|
|
679
825
|
logger.info(f"Creating Scraper Task: {config.spider_name}")
|
|
680
826
|
|
|
681
827
|
try:
|
|
682
|
-
response = self.
|
|
828
|
+
response = self._api_request_with_retry(
|
|
829
|
+
"POST",
|
|
683
830
|
self._builder_url,
|
|
684
831
|
data=payload,
|
|
685
832
|
headers=headers,
|
|
686
|
-
timeout=30,
|
|
687
833
|
)
|
|
688
834
|
response.raise_for_status()
|
|
689
835
|
|
|
@@ -701,6 +847,94 @@ class ThordataClient:
|
|
|
701
847
|
f"Task creation failed: {e}", original_error=e
|
|
702
848
|
) from e
|
|
703
849
|
|
|
850
|
+
def create_video_task(
|
|
851
|
+
self,
|
|
852
|
+
file_name: str,
|
|
853
|
+
spider_id: str,
|
|
854
|
+
spider_name: str,
|
|
855
|
+
parameters: Dict[str, Any],
|
|
856
|
+
common_settings: "CommonSettings",
|
|
857
|
+
) -> str:
|
|
858
|
+
"""
|
|
859
|
+
Create a YouTube video/audio download task.
|
|
860
|
+
|
|
861
|
+
Uses the /video_builder endpoint.
|
|
862
|
+
|
|
863
|
+
Args:
|
|
864
|
+
file_name: Output file name. Supports {{TasksID}}, {{VideoID}}.
|
|
865
|
+
spider_id: Spider identifier (e.g., "youtube_video_by-url").
|
|
866
|
+
spider_name: Spider name (typically "youtube.com").
|
|
867
|
+
parameters: Spider parameters (e.g., {"url": "..."}).
|
|
868
|
+
common_settings: Video/audio settings.
|
|
869
|
+
|
|
870
|
+
Returns:
|
|
871
|
+
The created task_id.
|
|
872
|
+
|
|
873
|
+
Example:
|
|
874
|
+
>>> from thordata import CommonSettings
|
|
875
|
+
>>> task_id = client.create_video_task(
|
|
876
|
+
... file_name="{{VideoID}}",
|
|
877
|
+
... spider_id="youtube_video_by-url",
|
|
878
|
+
... spider_name="youtube.com",
|
|
879
|
+
... parameters={"url": "https://youtube.com/watch?v=xxx"},
|
|
880
|
+
... common_settings=CommonSettings(
|
|
881
|
+
... resolution="1080p",
|
|
882
|
+
... is_subtitles="true"
|
|
883
|
+
... )
|
|
884
|
+
... )
|
|
885
|
+
"""
|
|
886
|
+
|
|
887
|
+
config = VideoTaskConfig(
|
|
888
|
+
file_name=file_name,
|
|
889
|
+
spider_id=spider_id,
|
|
890
|
+
spider_name=spider_name,
|
|
891
|
+
parameters=parameters,
|
|
892
|
+
common_settings=common_settings,
|
|
893
|
+
)
|
|
894
|
+
|
|
895
|
+
return self.create_video_task_advanced(config)
|
|
896
|
+
|
|
897
|
+
def create_video_task_advanced(self, config: VideoTaskConfig) -> str:
|
|
898
|
+
"""
|
|
899
|
+
Create a video task using VideoTaskConfig object.
|
|
900
|
+
|
|
901
|
+
Args:
|
|
902
|
+
config: Video task configuration.
|
|
903
|
+
|
|
904
|
+
Returns:
|
|
905
|
+
The created task_id.
|
|
906
|
+
"""
|
|
907
|
+
|
|
908
|
+
self._require_public_credentials()
|
|
909
|
+
|
|
910
|
+
payload = config.to_payload()
|
|
911
|
+
headers = build_builder_headers(
|
|
912
|
+
self.scraper_token,
|
|
913
|
+
self.public_token or "",
|
|
914
|
+
self.public_key or "",
|
|
915
|
+
)
|
|
916
|
+
|
|
917
|
+
logger.info(f"Creating Video Task: {config.spider_name} - {config.spider_id}")
|
|
918
|
+
|
|
919
|
+
response = self._api_request_with_retry(
|
|
920
|
+
"POST",
|
|
921
|
+
self._video_builder_url,
|
|
922
|
+
data=payload,
|
|
923
|
+
headers=headers,
|
|
924
|
+
)
|
|
925
|
+
response.raise_for_status()
|
|
926
|
+
|
|
927
|
+
data = response.json()
|
|
928
|
+
code = data.get("code")
|
|
929
|
+
|
|
930
|
+
if code != 200:
|
|
931
|
+
msg = extract_error_message(data)
|
|
932
|
+
raise_for_code(
|
|
933
|
+
f"Video task creation failed: {msg}", code=code, payload=data
|
|
934
|
+
)
|
|
935
|
+
|
|
936
|
+
return data["data"]["task_id"]
|
|
937
|
+
|
|
704
938
|
def get_task_status(self, task_id: str) -> str:
|
|
705
939
|
"""
|
|
706
940
|
Check the status of an asynchronous scraping task.
|
|
@@ -721,11 +955,11 @@ class ThordataClient:
|
|
|
721
955
|
payload = {"tasks_ids": task_id}
|
|
722
956
|
|
|
723
957
|
try:
|
|
724
|
-
response = self.
|
|
958
|
+
response = self._api_request_with_retry(
|
|
959
|
+
"POST",
|
|
725
960
|
self._status_url,
|
|
726
961
|
data=payload,
|
|
727
962
|
headers=headers,
|
|
728
|
-
timeout=30,
|
|
729
963
|
)
|
|
730
964
|
response.raise_for_status()
|
|
731
965
|
data = response.json()
|
|
@@ -788,11 +1022,11 @@ class ThordataClient:
|
|
|
788
1022
|
logger.info(f"Getting result URL for Task: {task_id}")
|
|
789
1023
|
|
|
790
1024
|
try:
|
|
791
|
-
response = self.
|
|
1025
|
+
response = self._api_request_with_retry(
|
|
1026
|
+
"POST",
|
|
792
1027
|
self._download_url,
|
|
793
1028
|
data=payload,
|
|
794
1029
|
headers=headers,
|
|
795
|
-
timeout=30,
|
|
796
1030
|
)
|
|
797
1031
|
response.raise_for_status()
|
|
798
1032
|
|
|
@@ -812,6 +1046,57 @@ class ThordataClient:
|
|
|
812
1046
|
f"Get result failed: {e}", original_error=e
|
|
813
1047
|
) from e
|
|
814
1048
|
|
|
1049
|
+
def list_tasks(
|
|
1050
|
+
self,
|
|
1051
|
+
page: int = 1,
|
|
1052
|
+
size: int = 20,
|
|
1053
|
+
) -> Dict[str, Any]:
|
|
1054
|
+
"""
|
|
1055
|
+
List all Web Scraper tasks.
|
|
1056
|
+
|
|
1057
|
+
Args:
|
|
1058
|
+
page: Page number (starts from 1).
|
|
1059
|
+
size: Number of tasks per page.
|
|
1060
|
+
|
|
1061
|
+
Returns:
|
|
1062
|
+
Dict containing 'count' and 'list' of tasks.
|
|
1063
|
+
|
|
1064
|
+
Example:
|
|
1065
|
+
>>> result = client.list_tasks(page=1, size=10)
|
|
1066
|
+
>>> print(f"Total tasks: {result['count']}")
|
|
1067
|
+
>>> for task in result['list']:
|
|
1068
|
+
... print(f"Task {task['task_id']}: {task['status']}")
|
|
1069
|
+
"""
|
|
1070
|
+
self._require_public_credentials()
|
|
1071
|
+
|
|
1072
|
+
headers = build_public_api_headers(
|
|
1073
|
+
self.public_token or "", self.public_key or ""
|
|
1074
|
+
)
|
|
1075
|
+
payload: Dict[str, Any] = {}
|
|
1076
|
+
if page:
|
|
1077
|
+
payload["page"] = str(page)
|
|
1078
|
+
if size:
|
|
1079
|
+
payload["size"] = str(size)
|
|
1080
|
+
|
|
1081
|
+
logger.info(f"Listing tasks: page={page}, size={size}")
|
|
1082
|
+
|
|
1083
|
+
response = self._api_request_with_retry(
|
|
1084
|
+
"POST",
|
|
1085
|
+
self._list_url,
|
|
1086
|
+
data=payload,
|
|
1087
|
+
headers=headers,
|
|
1088
|
+
)
|
|
1089
|
+
response.raise_for_status()
|
|
1090
|
+
|
|
1091
|
+
data = response.json()
|
|
1092
|
+
code = data.get("code")
|
|
1093
|
+
|
|
1094
|
+
if code != 200:
|
|
1095
|
+
msg = extract_error_message(data)
|
|
1096
|
+
raise_for_code(f"List tasks failed: {msg}", code=code, payload=data)
|
|
1097
|
+
|
|
1098
|
+
return data.get("data", {"count": 0, "list": []})
|
|
1099
|
+
|
|
815
1100
|
def wait_for_task(
|
|
816
1101
|
self,
|
|
817
1102
|
task_id: str,
|
|
@@ -865,9 +1150,514 @@ class ThordataClient:
|
|
|
865
1150
|
raise TimeoutError(f"Task {task_id} did not complete within {max_wait} seconds")
|
|
866
1151
|
|
|
867
1152
|
# =========================================================================
|
|
868
|
-
#
|
|
1153
|
+
# Proxy Account Management Methods (Proxy balance, user, whitelist functions)
|
|
869
1154
|
# =========================================================================
|
|
1155
|
+
def get_usage_statistics(
|
|
1156
|
+
self,
|
|
1157
|
+
from_date: Union[str, date],
|
|
1158
|
+
to_date: Union[str, date],
|
|
1159
|
+
) -> UsageStatistics:
|
|
1160
|
+
"""
|
|
1161
|
+
Get account usage statistics for a date range.
|
|
1162
|
+
|
|
1163
|
+
Args:
|
|
1164
|
+
from_date: Start date (YYYY-MM-DD string or date object).
|
|
1165
|
+
to_date: End date (YYYY-MM-DD string or date object).
|
|
1166
|
+
|
|
1167
|
+
Returns:
|
|
1168
|
+
UsageStatistics object with traffic data.
|
|
1169
|
+
|
|
1170
|
+
Raises:
|
|
1171
|
+
ValueError: If date range exceeds 180 days.
|
|
1172
|
+
|
|
1173
|
+
Example:
|
|
1174
|
+
>>> from datetime import date, timedelta
|
|
1175
|
+
>>> today = date.today()
|
|
1176
|
+
>>> week_ago = today - timedelta(days=7)
|
|
1177
|
+
>>> stats = client.get_usage_statistics(week_ago, today)
|
|
1178
|
+
>>> print(f"Used: {stats.range_usage_gb():.2f} GB")
|
|
1179
|
+
>>> print(f"Balance: {stats.balance_gb():.2f} GB")
|
|
1180
|
+
"""
|
|
1181
|
+
|
|
1182
|
+
self._require_public_credentials()
|
|
1183
|
+
|
|
1184
|
+
# Convert dates to strings
|
|
1185
|
+
if isinstance(from_date, date):
|
|
1186
|
+
from_date = from_date.strftime("%Y-%m-%d")
|
|
1187
|
+
if isinstance(to_date, date):
|
|
1188
|
+
to_date = to_date.strftime("%Y-%m-%d")
|
|
1189
|
+
|
|
1190
|
+
params = {
|
|
1191
|
+
"token": self.public_token,
|
|
1192
|
+
"key": self.public_key,
|
|
1193
|
+
"from_date": from_date,
|
|
1194
|
+
"to_date": to_date,
|
|
1195
|
+
}
|
|
1196
|
+
|
|
1197
|
+
logger.info(f"Getting usage statistics: {from_date} to {to_date}")
|
|
1198
|
+
|
|
1199
|
+
response = self._api_request_with_retry(
|
|
1200
|
+
"GET",
|
|
1201
|
+
self._usage_stats_url,
|
|
1202
|
+
params=params,
|
|
1203
|
+
)
|
|
1204
|
+
response.raise_for_status()
|
|
1205
|
+
|
|
1206
|
+
data = response.json()
|
|
1207
|
+
|
|
1208
|
+
if isinstance(data, dict):
|
|
1209
|
+
code = data.get("code")
|
|
1210
|
+
if code is not None and code != 200:
|
|
1211
|
+
msg = extract_error_message(data)
|
|
1212
|
+
raise_for_code(
|
|
1213
|
+
f"Usage statistics error: {msg}",
|
|
1214
|
+
code=code,
|
|
1215
|
+
payload=data,
|
|
1216
|
+
)
|
|
1217
|
+
|
|
1218
|
+
# Extract data field
|
|
1219
|
+
usage_data = data.get("data", data)
|
|
1220
|
+
return UsageStatistics.from_dict(usage_data)
|
|
1221
|
+
|
|
1222
|
+
raise ThordataNetworkError(
|
|
1223
|
+
f"Unexpected usage statistics response: {type(data).__name__}",
|
|
1224
|
+
original_error=None,
|
|
1225
|
+
)
|
|
1226
|
+
|
|
1227
|
+
def get_residential_balance(self) -> Dict[str, Any]:
|
|
1228
|
+
"""
|
|
1229
|
+
Get residential proxy balance.
|
|
1230
|
+
|
|
1231
|
+
Uses public_token/public_key (Dashboard -> My account -> API).
|
|
1232
|
+
"""
|
|
1233
|
+
headers = self._build_gateway_headers()
|
|
1234
|
+
|
|
1235
|
+
logger.info("Getting residential proxy balance")
|
|
1236
|
+
|
|
1237
|
+
response = self._api_request_with_retry(
|
|
1238
|
+
"POST",
|
|
1239
|
+
f"{self._gateway_base_url}/getFlowBalance",
|
|
1240
|
+
headers=headers,
|
|
1241
|
+
data={},
|
|
1242
|
+
)
|
|
1243
|
+
response.raise_for_status()
|
|
1244
|
+
|
|
1245
|
+
data = response.json()
|
|
1246
|
+
code = data.get("code")
|
|
1247
|
+
|
|
1248
|
+
if code != 200:
|
|
1249
|
+
msg = extract_error_message(data)
|
|
1250
|
+
raise_for_code(f"Get balance failed: {msg}", code=code, payload=data)
|
|
1251
|
+
|
|
1252
|
+
return data.get("data", {})
|
|
1253
|
+
|
|
1254
|
+
def get_residential_usage(
|
|
1255
|
+
self,
|
|
1256
|
+
start_time: Union[str, int],
|
|
1257
|
+
end_time: Union[str, int],
|
|
1258
|
+
) -> Dict[str, Any]:
|
|
1259
|
+
"""
|
|
1260
|
+
Get residential proxy usage records.
|
|
1261
|
+
|
|
1262
|
+
Uses public_token/public_key (Dashboard -> My account -> API).
|
|
1263
|
+
"""
|
|
1264
|
+
headers = self._build_gateway_headers()
|
|
1265
|
+
payload = {"start_time": str(start_time), "end_time": str(end_time)}
|
|
1266
|
+
|
|
1267
|
+
logger.info(f"Getting residential usage: {start_time} to {end_time}")
|
|
1268
|
+
|
|
1269
|
+
response = self._api_request_with_retry(
|
|
1270
|
+
"POST",
|
|
1271
|
+
f"{self._gateway_base_url}/usageRecord",
|
|
1272
|
+
headers=headers,
|
|
1273
|
+
data=payload,
|
|
1274
|
+
)
|
|
1275
|
+
response.raise_for_status()
|
|
1276
|
+
|
|
1277
|
+
data = response.json()
|
|
1278
|
+
code = data.get("code")
|
|
870
1279
|
|
|
1280
|
+
if code != 200:
|
|
1281
|
+
msg = extract_error_message(data)
|
|
1282
|
+
raise_for_code(f"Get usage failed: {msg}", code=code, payload=data)
|
|
1283
|
+
|
|
1284
|
+
return data.get("data", {})
|
|
1285
|
+
|
|
1286
|
+
def list_proxy_users(
|
|
1287
|
+
self, proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL
|
|
1288
|
+
) -> ProxyUserList:
|
|
1289
|
+
"""
|
|
1290
|
+
List all proxy users (sub-accounts).
|
|
1291
|
+
|
|
1292
|
+
Args:
|
|
1293
|
+
proxy_type: Proxy type (1=Residential, 2=Unlimited).
|
|
1294
|
+
|
|
1295
|
+
Returns:
|
|
1296
|
+
ProxyUserList with user details.
|
|
1297
|
+
|
|
1298
|
+
Example:
|
|
1299
|
+
>>> users = client.list_proxy_users(proxy_type=ProxyType.RESIDENTIAL)
|
|
1300
|
+
>>> print(f"Total users: {users.user_count}")
|
|
1301
|
+
>>> for user in users.users:
|
|
1302
|
+
... print(f"{user.username}: {user.usage_gb():.2f} GB used")
|
|
1303
|
+
"""
|
|
1304
|
+
|
|
1305
|
+
self._require_public_credentials()
|
|
1306
|
+
|
|
1307
|
+
params = {
|
|
1308
|
+
"token": self.public_token,
|
|
1309
|
+
"key": self.public_key,
|
|
1310
|
+
"proxy_type": str(
|
|
1311
|
+
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1312
|
+
),
|
|
1313
|
+
}
|
|
1314
|
+
|
|
1315
|
+
logger.info(f"Listing proxy users: type={params['proxy_type']}")
|
|
1316
|
+
|
|
1317
|
+
response = self._api_request_with_retry(
|
|
1318
|
+
"GET",
|
|
1319
|
+
f"{self._proxy_users_url}/user-list",
|
|
1320
|
+
params=params,
|
|
1321
|
+
)
|
|
1322
|
+
response.raise_for_status()
|
|
1323
|
+
|
|
1324
|
+
data = response.json()
|
|
1325
|
+
|
|
1326
|
+
if isinstance(data, dict):
|
|
1327
|
+
code = data.get("code")
|
|
1328
|
+
if code is not None and code != 200:
|
|
1329
|
+
msg = extract_error_message(data)
|
|
1330
|
+
raise_for_code(
|
|
1331
|
+
f"List proxy users error: {msg}", code=code, payload=data
|
|
1332
|
+
)
|
|
1333
|
+
|
|
1334
|
+
user_data = data.get("data", data)
|
|
1335
|
+
return ProxyUserList.from_dict(user_data)
|
|
1336
|
+
|
|
1337
|
+
raise ThordataNetworkError(
|
|
1338
|
+
f"Unexpected proxy users response: {type(data).__name__}",
|
|
1339
|
+
original_error=None,
|
|
1340
|
+
)
|
|
1341
|
+
|
|
1342
|
+
def create_proxy_user(
|
|
1343
|
+
self,
|
|
1344
|
+
username: str,
|
|
1345
|
+
password: str,
|
|
1346
|
+
proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
|
|
1347
|
+
traffic_limit: int = 0,
|
|
1348
|
+
status: bool = True,
|
|
1349
|
+
) -> Dict[str, Any]:
|
|
1350
|
+
"""
|
|
1351
|
+
Create a new proxy user (sub-account).
|
|
1352
|
+
|
|
1353
|
+
Args:
|
|
1354
|
+
username: Username for the new user.
|
|
1355
|
+
password: Password for the new user.
|
|
1356
|
+
proxy_type: Proxy type (1=Residential, 2=Unlimited).
|
|
1357
|
+
traffic_limit: Traffic limit in MB (0 = unlimited, min 100).
|
|
1358
|
+
status: Enable/disable user (True/False).
|
|
1359
|
+
|
|
1360
|
+
Returns:
|
|
1361
|
+
API response data.
|
|
1362
|
+
|
|
1363
|
+
Example:
|
|
1364
|
+
>>> result = client.create_proxy_user(
|
|
1365
|
+
... username="subuser1",
|
|
1366
|
+
... password="securepass",
|
|
1367
|
+
... traffic_limit=5120, # 5GB
|
|
1368
|
+
... status=True
|
|
1369
|
+
... )
|
|
1370
|
+
"""
|
|
1371
|
+
self._require_public_credentials()
|
|
1372
|
+
|
|
1373
|
+
headers = build_public_api_headers(
|
|
1374
|
+
self.public_token or "", self.public_key or ""
|
|
1375
|
+
)
|
|
1376
|
+
|
|
1377
|
+
payload = {
|
|
1378
|
+
"proxy_type": str(
|
|
1379
|
+
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1380
|
+
),
|
|
1381
|
+
"username": username,
|
|
1382
|
+
"password": password,
|
|
1383
|
+
"traffic_limit": str(traffic_limit),
|
|
1384
|
+
"status": "true" if status else "false",
|
|
1385
|
+
}
|
|
1386
|
+
|
|
1387
|
+
logger.info(f"Creating proxy user: {username}")
|
|
1388
|
+
|
|
1389
|
+
response = self._api_request_with_retry(
|
|
1390
|
+
"POST",
|
|
1391
|
+
f"{self._proxy_users_url}/create-user",
|
|
1392
|
+
data=payload,
|
|
1393
|
+
headers=headers,
|
|
1394
|
+
)
|
|
1395
|
+
response.raise_for_status()
|
|
1396
|
+
|
|
1397
|
+
data = response.json()
|
|
1398
|
+
code = data.get("code")
|
|
1399
|
+
|
|
1400
|
+
if code != 200:
|
|
1401
|
+
msg = extract_error_message(data)
|
|
1402
|
+
raise_for_code(f"Create proxy user failed: {msg}", code=code, payload=data)
|
|
1403
|
+
|
|
1404
|
+
return data.get("data", {})
|
|
1405
|
+
|
|
1406
|
+
def add_whitelist_ip(
|
|
1407
|
+
self,
|
|
1408
|
+
ip: str,
|
|
1409
|
+
proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
|
|
1410
|
+
status: bool = True,
|
|
1411
|
+
) -> Dict[str, Any]:
|
|
1412
|
+
"""
|
|
1413
|
+
Add an IP to the whitelist for IP authentication.
|
|
1414
|
+
|
|
1415
|
+
Args:
|
|
1416
|
+
ip: IP address to whitelist.
|
|
1417
|
+
proxy_type: Proxy type (1=Residential, 2=Unlimited, 9=Mobile).
|
|
1418
|
+
status: Enable/disable the IP (True/False).
|
|
1419
|
+
|
|
1420
|
+
Returns:
|
|
1421
|
+
API response data.
|
|
1422
|
+
|
|
1423
|
+
Example:
|
|
1424
|
+
>>> result = client.add_whitelist_ip(
|
|
1425
|
+
... ip="123.45.67.89",
|
|
1426
|
+
... proxy_type=ProxyType.RESIDENTIAL,
|
|
1427
|
+
... status=True
|
|
1428
|
+
... )
|
|
1429
|
+
"""
|
|
1430
|
+
self._require_public_credentials()
|
|
1431
|
+
|
|
1432
|
+
headers = build_public_api_headers(
|
|
1433
|
+
self.public_token or "", self.public_key or ""
|
|
1434
|
+
)
|
|
1435
|
+
|
|
1436
|
+
# Convert ProxyType to int
|
|
1437
|
+
proxy_type_int = (
|
|
1438
|
+
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1439
|
+
)
|
|
1440
|
+
|
|
1441
|
+
payload = {
|
|
1442
|
+
"proxy_type": str(proxy_type_int),
|
|
1443
|
+
"ip": ip,
|
|
1444
|
+
"status": "true" if status else "false",
|
|
1445
|
+
}
|
|
1446
|
+
|
|
1447
|
+
logger.info(f"Adding whitelist IP: {ip}")
|
|
1448
|
+
|
|
1449
|
+
response = self._api_request_with_retry(
|
|
1450
|
+
"POST",
|
|
1451
|
+
f"{self._whitelist_url}/add-ip",
|
|
1452
|
+
data=payload,
|
|
1453
|
+
headers=headers,
|
|
1454
|
+
)
|
|
1455
|
+
response.raise_for_status()
|
|
1456
|
+
|
|
1457
|
+
data = response.json()
|
|
1458
|
+
code = data.get("code")
|
|
1459
|
+
|
|
1460
|
+
if code != 200:
|
|
1461
|
+
msg = extract_error_message(data)
|
|
1462
|
+
raise_for_code(f"Add whitelist IP failed: {msg}", code=code, payload=data)
|
|
1463
|
+
|
|
1464
|
+
return data.get("data", {})
|
|
1465
|
+
|
|
1466
|
+
def list_proxy_servers(
|
|
1467
|
+
self,
|
|
1468
|
+
proxy_type: int,
|
|
1469
|
+
) -> List[ProxyServer]:
|
|
1470
|
+
"""
|
|
1471
|
+
List ISP or Datacenter proxy servers.
|
|
1472
|
+
|
|
1473
|
+
Args:
|
|
1474
|
+
proxy_type: Proxy type (1=ISP, 2=Datacenter).
|
|
1475
|
+
|
|
1476
|
+
Returns:
|
|
1477
|
+
List of ProxyServer objects.
|
|
1478
|
+
|
|
1479
|
+
Example:
|
|
1480
|
+
>>> servers = client.list_proxy_servers(proxy_type=1) # ISP proxies
|
|
1481
|
+
>>> for server in servers:
|
|
1482
|
+
... print(f"{server.ip}:{server.port} - expires: {server.expiration_time}")
|
|
1483
|
+
"""
|
|
1484
|
+
|
|
1485
|
+
self._require_public_credentials()
|
|
1486
|
+
|
|
1487
|
+
params = {
|
|
1488
|
+
"token": self.public_token,
|
|
1489
|
+
"key": self.public_key,
|
|
1490
|
+
"proxy_type": str(proxy_type),
|
|
1491
|
+
}
|
|
1492
|
+
|
|
1493
|
+
logger.info(f"Listing proxy servers: type={proxy_type}")
|
|
1494
|
+
|
|
1495
|
+
response = self._api_request_with_retry(
|
|
1496
|
+
"GET",
|
|
1497
|
+
self._proxy_list_url,
|
|
1498
|
+
params=params,
|
|
1499
|
+
)
|
|
1500
|
+
response.raise_for_status()
|
|
1501
|
+
|
|
1502
|
+
data = response.json()
|
|
1503
|
+
|
|
1504
|
+
if isinstance(data, dict):
|
|
1505
|
+
code = data.get("code")
|
|
1506
|
+
if code is not None and code != 200:
|
|
1507
|
+
msg = extract_error_message(data)
|
|
1508
|
+
raise_for_code(
|
|
1509
|
+
f"List proxy servers error: {msg}", code=code, payload=data
|
|
1510
|
+
)
|
|
1511
|
+
|
|
1512
|
+
# Extract list from data field
|
|
1513
|
+
server_list = data.get("data", data.get("list", []))
|
|
1514
|
+
elif isinstance(data, list):
|
|
1515
|
+
server_list = data
|
|
1516
|
+
else:
|
|
1517
|
+
raise ThordataNetworkError(
|
|
1518
|
+
f"Unexpected proxy list response: {type(data).__name__}",
|
|
1519
|
+
original_error=None,
|
|
1520
|
+
)
|
|
1521
|
+
|
|
1522
|
+
return [ProxyServer.from_dict(s) for s in server_list]
|
|
1523
|
+
|
|
1524
|
+
def get_isp_regions(self) -> List[Dict[str, Any]]:
|
|
1525
|
+
"""
|
|
1526
|
+
Get available ISP proxy regions.
|
|
1527
|
+
|
|
1528
|
+
Uses public_token/public_key (Dashboard -> My account -> API).
|
|
1529
|
+
"""
|
|
1530
|
+
headers = self._build_gateway_headers()
|
|
1531
|
+
|
|
1532
|
+
logger.info("Getting ISP regions")
|
|
1533
|
+
|
|
1534
|
+
response = self._api_request_with_retry(
|
|
1535
|
+
"POST",
|
|
1536
|
+
f"{self._gateway_base_url}/getRegionIsp",
|
|
1537
|
+
headers=headers,
|
|
1538
|
+
data={},
|
|
1539
|
+
)
|
|
1540
|
+
response.raise_for_status()
|
|
1541
|
+
|
|
1542
|
+
data = response.json()
|
|
1543
|
+
code = data.get("code")
|
|
1544
|
+
|
|
1545
|
+
if code != 200:
|
|
1546
|
+
msg = extract_error_message(data)
|
|
1547
|
+
raise_for_code(f"Get ISP regions failed: {msg}", code=code, payload=data)
|
|
1548
|
+
|
|
1549
|
+
return data.get("data", [])
|
|
1550
|
+
|
|
1551
|
+
def list_isp_proxies(self) -> List[Dict[str, Any]]:
|
|
1552
|
+
"""
|
|
1553
|
+
List ISP proxies.
|
|
1554
|
+
|
|
1555
|
+
Uses public_token/public_key (Dashboard -> My account -> API).
|
|
1556
|
+
"""
|
|
1557
|
+
headers = self._build_gateway_headers()
|
|
1558
|
+
|
|
1559
|
+
logger.info("Listing ISP proxies")
|
|
1560
|
+
|
|
1561
|
+
response = self._api_request_with_retry(
|
|
1562
|
+
"POST",
|
|
1563
|
+
f"{self._gateway_base_url}/queryListIsp",
|
|
1564
|
+
headers=headers,
|
|
1565
|
+
data={},
|
|
1566
|
+
)
|
|
1567
|
+
response.raise_for_status()
|
|
1568
|
+
|
|
1569
|
+
data = response.json()
|
|
1570
|
+
code = data.get("code")
|
|
1571
|
+
|
|
1572
|
+
if code != 200:
|
|
1573
|
+
msg = extract_error_message(data)
|
|
1574
|
+
raise_for_code(f"List ISP proxies failed: {msg}", code=code, payload=data)
|
|
1575
|
+
|
|
1576
|
+
return data.get("data", [])
|
|
1577
|
+
|
|
1578
|
+
def get_wallet_balance(self) -> Dict[str, Any]:
|
|
1579
|
+
"""
|
|
1580
|
+
Get wallet balance for ISP proxies.
|
|
1581
|
+
|
|
1582
|
+
Uses public_token/public_key (Dashboard -> My account -> API).
|
|
1583
|
+
"""
|
|
1584
|
+
headers = self._build_gateway_headers()
|
|
1585
|
+
|
|
1586
|
+
logger.info("Getting wallet balance")
|
|
1587
|
+
|
|
1588
|
+
response = self._api_request_with_retry(
|
|
1589
|
+
"POST",
|
|
1590
|
+
f"{self._gateway_base_url}/getBalance",
|
|
1591
|
+
headers=headers,
|
|
1592
|
+
data={},
|
|
1593
|
+
)
|
|
1594
|
+
response.raise_for_status()
|
|
1595
|
+
|
|
1596
|
+
data = response.json()
|
|
1597
|
+
code = data.get("code")
|
|
1598
|
+
|
|
1599
|
+
if code != 200:
|
|
1600
|
+
msg = extract_error_message(data)
|
|
1601
|
+
raise_for_code(f"Get wallet balance failed: {msg}", code=code, payload=data)
|
|
1602
|
+
|
|
1603
|
+
return data.get("data", {})
|
|
1604
|
+
|
|
1605
|
+
def get_proxy_expiration(
|
|
1606
|
+
self,
|
|
1607
|
+
ips: Union[str, List[str]],
|
|
1608
|
+
proxy_type: int,
|
|
1609
|
+
) -> Dict[str, Any]:
|
|
1610
|
+
"""
|
|
1611
|
+
Get expiration time for specific proxy IPs.
|
|
1612
|
+
|
|
1613
|
+
Args:
|
|
1614
|
+
ips: Single IP or list of IPs to check.
|
|
1615
|
+
proxy_type: Proxy type (1=ISP, 2=Datacenter).
|
|
1616
|
+
|
|
1617
|
+
Returns:
|
|
1618
|
+
Dict with expiration information.
|
|
1619
|
+
|
|
1620
|
+
Example:
|
|
1621
|
+
>>> result = client.get_proxy_expiration("123.45.67.89", proxy_type=1)
|
|
1622
|
+
>>> print(result)
|
|
1623
|
+
"""
|
|
1624
|
+
self._require_public_credentials()
|
|
1625
|
+
|
|
1626
|
+
# Convert list to comma-separated string
|
|
1627
|
+
if isinstance(ips, list):
|
|
1628
|
+
ips = ",".join(ips)
|
|
1629
|
+
|
|
1630
|
+
params = {
|
|
1631
|
+
"token": self.public_token,
|
|
1632
|
+
"key": self.public_key,
|
|
1633
|
+
"proxy_type": str(proxy_type),
|
|
1634
|
+
"ips": ips,
|
|
1635
|
+
}
|
|
1636
|
+
|
|
1637
|
+
logger.info(f"Getting proxy expiration: {ips}")
|
|
1638
|
+
|
|
1639
|
+
response = self._api_request_with_retry(
|
|
1640
|
+
"GET",
|
|
1641
|
+
self._proxy_expiration_url,
|
|
1642
|
+
params=params,
|
|
1643
|
+
)
|
|
1644
|
+
response.raise_for_status()
|
|
1645
|
+
|
|
1646
|
+
data = response.json()
|
|
1647
|
+
|
|
1648
|
+
if isinstance(data, dict):
|
|
1649
|
+
code = data.get("code")
|
|
1650
|
+
if code is not None and code != 200:
|
|
1651
|
+
msg = extract_error_message(data)
|
|
1652
|
+
raise_for_code(f"Get expiration error: {msg}", code=code, payload=data)
|
|
1653
|
+
|
|
1654
|
+
return data.get("data", data)
|
|
1655
|
+
|
|
1656
|
+
return data
|
|
1657
|
+
|
|
1658
|
+
# =========================================================================
|
|
1659
|
+
# Location API Methods (Country/State/City/ASN functions)
|
|
1660
|
+
# =========================================================================
|
|
871
1661
|
def list_countries(
|
|
872
1662
|
self, proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL
|
|
873
1663
|
) -> List[Dict[str, Any]]:
|
|
@@ -978,7 +1768,11 @@ class ThordataClient:
|
|
|
978
1768
|
logger.debug(f"Locations API request: {url}")
|
|
979
1769
|
|
|
980
1770
|
# Use requests.get directly (no proxy needed for this API)
|
|
981
|
-
response = self.
|
|
1771
|
+
response = self._api_request_with_retry(
|
|
1772
|
+
"GET",
|
|
1773
|
+
url,
|
|
1774
|
+
params=params,
|
|
1775
|
+
)
|
|
982
1776
|
response.raise_for_status()
|
|
983
1777
|
|
|
984
1778
|
data = response.json()
|
|
@@ -998,9 +1792,8 @@ class ThordataClient:
|
|
|
998
1792
|
return []
|
|
999
1793
|
|
|
1000
1794
|
# =========================================================================
|
|
1001
|
-
# Helper Methods
|
|
1795
|
+
# Helper Methods (Internal utility functions)
|
|
1002
1796
|
# =========================================================================
|
|
1003
|
-
|
|
1004
1797
|
def _require_public_credentials(self) -> None:
|
|
1005
1798
|
"""Ensure public API credentials are available."""
|
|
1006
1799
|
if not self.public_token or not self.public_key:
|
|
@@ -1009,6 +1802,185 @@ class ThordataClient:
|
|
|
1009
1802
|
"Please provide them when initializing ThordataClient."
|
|
1010
1803
|
)
|
|
1011
1804
|
|
|
1805
|
+
def _get_proxy_endpoint_overrides(
|
|
1806
|
+
self, product: ProxyProduct
|
|
1807
|
+
) -> tuple[Optional[str], Optional[int], str]:
|
|
1808
|
+
"""
|
|
1809
|
+
Read proxy endpoint overrides from env.
|
|
1810
|
+
|
|
1811
|
+
Priority:
|
|
1812
|
+
1) THORDATA_<PRODUCT>_PROXY_HOST/PORT/PROTOCOL
|
|
1813
|
+
2) THORDATA_PROXY_HOST/PORT/PROTOCOL
|
|
1814
|
+
3) defaults (host/port None => ProxyConfig will use its product defaults)
|
|
1815
|
+
"""
|
|
1816
|
+
prefix = product.value.upper() # RESIDENTIAL / DATACENTER / MOBILE / ISP
|
|
1817
|
+
|
|
1818
|
+
host = os.getenv(f"THORDATA_{prefix}_PROXY_HOST") or os.getenv(
|
|
1819
|
+
"THORDATA_PROXY_HOST"
|
|
1820
|
+
)
|
|
1821
|
+
port_raw = os.getenv(f"THORDATA_{prefix}_PROXY_PORT") or os.getenv(
|
|
1822
|
+
"THORDATA_PROXY_PORT"
|
|
1823
|
+
)
|
|
1824
|
+
protocol = (
|
|
1825
|
+
os.getenv(f"THORDATA_{prefix}_PROXY_PROTOCOL")
|
|
1826
|
+
or os.getenv("THORDATA_PROXY_PROTOCOL")
|
|
1827
|
+
or "http"
|
|
1828
|
+
)
|
|
1829
|
+
|
|
1830
|
+
port: Optional[int] = None
|
|
1831
|
+
if port_raw:
|
|
1832
|
+
try:
|
|
1833
|
+
port = int(port_raw)
|
|
1834
|
+
except ValueError:
|
|
1835
|
+
port = None
|
|
1836
|
+
|
|
1837
|
+
return host or None, port, protocol
|
|
1838
|
+
|
|
1839
|
+
def _get_default_proxy_config_from_env(self) -> Optional[ProxyConfig]:
|
|
1840
|
+
"""
|
|
1841
|
+
Try to build a default ProxyConfig from env vars.
|
|
1842
|
+
|
|
1843
|
+
Priority order:
|
|
1844
|
+
1) Residential
|
|
1845
|
+
2) Datacenter
|
|
1846
|
+
3) Mobile
|
|
1847
|
+
"""
|
|
1848
|
+
# Residential
|
|
1849
|
+
u = os.getenv("THORDATA_RESIDENTIAL_USERNAME")
|
|
1850
|
+
p = os.getenv("THORDATA_RESIDENTIAL_PASSWORD")
|
|
1851
|
+
if u and p:
|
|
1852
|
+
host, port, protocol = self._get_proxy_endpoint_overrides(
|
|
1853
|
+
ProxyProduct.RESIDENTIAL
|
|
1854
|
+
)
|
|
1855
|
+
return ProxyConfig(
|
|
1856
|
+
username=u,
|
|
1857
|
+
password=p,
|
|
1858
|
+
product=ProxyProduct.RESIDENTIAL,
|
|
1859
|
+
host=host,
|
|
1860
|
+
port=port,
|
|
1861
|
+
protocol=protocol,
|
|
1862
|
+
)
|
|
1863
|
+
|
|
1864
|
+
# Datacenter
|
|
1865
|
+
u = os.getenv("THORDATA_DATACENTER_USERNAME")
|
|
1866
|
+
p = os.getenv("THORDATA_DATACENTER_PASSWORD")
|
|
1867
|
+
if u and p:
|
|
1868
|
+
host, port, protocol = self._get_proxy_endpoint_overrides(
|
|
1869
|
+
ProxyProduct.DATACENTER
|
|
1870
|
+
)
|
|
1871
|
+
return ProxyConfig(
|
|
1872
|
+
username=u,
|
|
1873
|
+
password=p,
|
|
1874
|
+
product=ProxyProduct.DATACENTER,
|
|
1875
|
+
host=host,
|
|
1876
|
+
port=port,
|
|
1877
|
+
protocol=protocol,
|
|
1878
|
+
)
|
|
1879
|
+
|
|
1880
|
+
# Mobile
|
|
1881
|
+
u = os.getenv("THORDATA_MOBILE_USERNAME")
|
|
1882
|
+
p = os.getenv("THORDATA_MOBILE_PASSWORD")
|
|
1883
|
+
if u and p:
|
|
1884
|
+
host, port, protocol = self._get_proxy_endpoint_overrides(
|
|
1885
|
+
ProxyProduct.MOBILE
|
|
1886
|
+
)
|
|
1887
|
+
return ProxyConfig(
|
|
1888
|
+
username=u,
|
|
1889
|
+
password=p,
|
|
1890
|
+
product=ProxyProduct.MOBILE,
|
|
1891
|
+
host=host,
|
|
1892
|
+
port=port,
|
|
1893
|
+
protocol=protocol,
|
|
1894
|
+
)
|
|
1895
|
+
|
|
1896
|
+
return None
|
|
1897
|
+
|
|
1898
|
+
def _build_gateway_headers(self) -> Dict[str, str]:
|
|
1899
|
+
"""
|
|
1900
|
+
Build headers for legacy gateway-style endpoints.
|
|
1901
|
+
|
|
1902
|
+
IMPORTANT:
|
|
1903
|
+
- SDK does NOT expose "sign/apiKey" as a separate credential model.
|
|
1904
|
+
- Values ALWAYS come from public_token/public_key.
|
|
1905
|
+
- Some backend endpoints may still expect header field names "sign" and "apiKey".
|
|
1906
|
+
"""
|
|
1907
|
+
self._require_public_credentials()
|
|
1908
|
+
return {
|
|
1909
|
+
"sign": self.public_token or "",
|
|
1910
|
+
"apiKey": self.public_key or "",
|
|
1911
|
+
"Content-Type": "application/x-www-form-urlencoded",
|
|
1912
|
+
}
|
|
1913
|
+
|
|
1914
|
+
def _proxy_request_with_proxy_manager(
|
|
1915
|
+
self,
|
|
1916
|
+
method: str,
|
|
1917
|
+
url: str,
|
|
1918
|
+
*,
|
|
1919
|
+
proxy_config: ProxyConfig,
|
|
1920
|
+
timeout: int,
|
|
1921
|
+
headers: Optional[Dict[str, str]] = None,
|
|
1922
|
+
params: Optional[Dict[str, Any]] = None,
|
|
1923
|
+
data: Any = None,
|
|
1924
|
+
) -> requests.Response:
|
|
1925
|
+
"""
|
|
1926
|
+
Proxy Network request implemented via urllib3.ProxyManager.
|
|
1927
|
+
|
|
1928
|
+
This is required to reliably support HTTPS proxy endpoints like:
|
|
1929
|
+
https://<endpoint>.pr.thordata.net:9999
|
|
1930
|
+
"""
|
|
1931
|
+
# Build final URL (include query params)
|
|
1932
|
+
req = requests.Request(method=method.upper(), url=url, params=params)
|
|
1933
|
+
prepped = self._proxy_session.prepare_request(req)
|
|
1934
|
+
final_url = prepped.url or url
|
|
1935
|
+
|
|
1936
|
+
proxy_url = proxy_config.build_proxy_endpoint()
|
|
1937
|
+
proxy_headers = urllib3.make_headers(
|
|
1938
|
+
proxy_basic_auth=proxy_config.build_proxy_basic_auth()
|
|
1939
|
+
)
|
|
1940
|
+
|
|
1941
|
+
pm = urllib3.ProxyManager(
|
|
1942
|
+
proxy_url,
|
|
1943
|
+
proxy_headers=proxy_headers,
|
|
1944
|
+
proxy_ssl_context=(
|
|
1945
|
+
ssl.create_default_context()
|
|
1946
|
+
if proxy_url.startswith("https://")
|
|
1947
|
+
else None
|
|
1948
|
+
),
|
|
1949
|
+
)
|
|
1950
|
+
|
|
1951
|
+
# Encode form data if dict
|
|
1952
|
+
body = None
|
|
1953
|
+
req_headers = dict(headers or {})
|
|
1954
|
+
if data is not None:
|
|
1955
|
+
if isinstance(data, dict):
|
|
1956
|
+
# form-urlencoded
|
|
1957
|
+
body = urlencode({k: str(v) for k, v in data.items()})
|
|
1958
|
+
req_headers.setdefault(
|
|
1959
|
+
"Content-Type", "application/x-www-form-urlencoded"
|
|
1960
|
+
)
|
|
1961
|
+
else:
|
|
1962
|
+
body = data
|
|
1963
|
+
|
|
1964
|
+
http_resp = pm.request(
|
|
1965
|
+
method.upper(),
|
|
1966
|
+
final_url,
|
|
1967
|
+
body=body,
|
|
1968
|
+
headers=req_headers or None,
|
|
1969
|
+
timeout=urllib3.Timeout(connect=timeout, read=timeout),
|
|
1970
|
+
retries=False,
|
|
1971
|
+
preload_content=True,
|
|
1972
|
+
)
|
|
1973
|
+
|
|
1974
|
+
# Convert urllib3 response -> requests.Response (keep your API stable)
|
|
1975
|
+
r = requests.Response()
|
|
1976
|
+
r.status_code = int(getattr(http_resp, "status", 0) or 0)
|
|
1977
|
+
r._content = http_resp.data or b""
|
|
1978
|
+
r.url = final_url
|
|
1979
|
+
r.headers = requests.structures.CaseInsensitiveDict(
|
|
1980
|
+
dict(http_resp.headers or {})
|
|
1981
|
+
)
|
|
1982
|
+
return r
|
|
1983
|
+
|
|
1012
1984
|
def _request_with_retry(
|
|
1013
1985
|
self, method: str, url: str, **kwargs: Any
|
|
1014
1986
|
) -> requests.Response:
|