thordata-sdk 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thordata/__init__.py +1 -1
- thordata/async_client.py +862 -233
- thordata/async_unlimited.py +130 -0
- thordata/client.py +1808 -1050
- thordata/demo.py +2 -2
- thordata/unlimited.py +102 -0
- {thordata_sdk-1.3.0.dist-info → thordata_sdk-1.4.0.dist-info}/METADATA +2 -2
- thordata_sdk-1.4.0.dist-info/RECORD +18 -0
- thordata_sdk-1.3.0.dist-info/RECORD +0 -16
- {thordata_sdk-1.3.0.dist-info → thordata_sdk-1.4.0.dist-info}/WHEEL +0 -0
- {thordata_sdk-1.3.0.dist-info → thordata_sdk-1.4.0.dist-info}/licenses/LICENSE +0 -0
- {thordata_sdk-1.3.0.dist-info → thordata_sdk-1.4.0.dist-info}/top_level.txt +0 -0
thordata/async_client.py
CHANGED
|
@@ -23,10 +23,12 @@ Example:
|
|
|
23
23
|
from __future__ import annotations
|
|
24
24
|
|
|
25
25
|
import asyncio
|
|
26
|
+
import json
|
|
26
27
|
import logging
|
|
27
28
|
import os
|
|
28
29
|
from datetime import date
|
|
29
30
|
from typing import Any
|
|
31
|
+
from urllib.parse import quote
|
|
30
32
|
|
|
31
33
|
import aiohttp
|
|
32
34
|
|
|
@@ -40,6 +42,7 @@ from ._utils import (
|
|
|
40
42
|
extract_error_message,
|
|
41
43
|
parse_json_response,
|
|
42
44
|
)
|
|
45
|
+
from .async_unlimited import AsyncUnlimitedNamespace
|
|
43
46
|
from .enums import Engine, ProxyType
|
|
44
47
|
from .exceptions import (
|
|
45
48
|
ThordataConfigError,
|
|
@@ -65,9 +68,13 @@ from .serp_engines import AsyncSerpNamespace
|
|
|
65
68
|
logger = logging.getLogger(__name__)
|
|
66
69
|
|
|
67
70
|
|
|
71
|
+
# =========================================================================
|
|
72
|
+
# Main Client Class
|
|
73
|
+
# =========================================================================
|
|
74
|
+
|
|
75
|
+
|
|
68
76
|
class AsyncThordataClient:
|
|
69
|
-
"""
|
|
70
|
-
The official asynchronous Python client for Thordata.
|
|
77
|
+
"""The official asynchronous Python client for Thordata.
|
|
71
78
|
|
|
72
79
|
Designed for high-concurrency AI agents and data pipelines.
|
|
73
80
|
|
|
@@ -78,7 +85,13 @@ class AsyncThordataClient:
|
|
|
78
85
|
proxy_host: Custom proxy gateway host.
|
|
79
86
|
proxy_port: Custom proxy gateway port.
|
|
80
87
|
timeout: Default request timeout in seconds.
|
|
88
|
+
api_timeout: Default API request timeout in seconds.
|
|
81
89
|
retry_config: Configuration for automatic retries.
|
|
90
|
+
auth_mode: Authentication mode for scraping APIs ("bearer" or "header_token").
|
|
91
|
+
scraperapi_base_url: Override base URL for SERP API.
|
|
92
|
+
universalapi_base_url: Override base URL for Universal Scraping API.
|
|
93
|
+
web_scraper_api_base_url: Override base URL for Web Scraper API.
|
|
94
|
+
locations_base_url: Override base URL for Locations API.
|
|
82
95
|
|
|
83
96
|
Example:
|
|
84
97
|
>>> async with AsyncThordataClient(
|
|
@@ -86,10 +99,7 @@ class AsyncThordataClient:
|
|
|
86
99
|
... public_token="pub_token",
|
|
87
100
|
... public_key="pub_key"
|
|
88
101
|
... ) as client:
|
|
89
|
-
... # Old style
|
|
90
102
|
... results = await client.serp_search("python")
|
|
91
|
-
... # New style (Namespaced)
|
|
92
|
-
... maps_results = await client.serp.google.maps("coffee", "@40.7,-74.0,14z")
|
|
93
103
|
"""
|
|
94
104
|
|
|
95
105
|
# API Endpoints (same as sync client)
|
|
@@ -100,7 +110,7 @@ class AsyncThordataClient:
|
|
|
100
110
|
|
|
101
111
|
def __init__(
|
|
102
112
|
self,
|
|
103
|
-
scraper_token: str | None = None,
|
|
113
|
+
scraper_token: str | None = None,
|
|
104
114
|
public_token: str | None = None,
|
|
105
115
|
public_key: str | None = None,
|
|
106
116
|
proxy_host: str = "pr.thordata.net",
|
|
@@ -114,8 +124,23 @@ class AsyncThordataClient:
|
|
|
114
124
|
web_scraper_api_base_url: str | None = None,
|
|
115
125
|
locations_base_url: str | None = None,
|
|
116
126
|
) -> None:
|
|
117
|
-
"""Initialize the Async Thordata Client.
|
|
127
|
+
"""Initialize the Async Thordata Client.
|
|
118
128
|
|
|
129
|
+
Args:
|
|
130
|
+
scraper_token: Token for SERP/Universal scraping APIs.
|
|
131
|
+
public_token: Public API token for account/management operations.
|
|
132
|
+
public_key: Public API key for account/management operations.
|
|
133
|
+
proxy_host: Default proxy host for residential proxies.
|
|
134
|
+
proxy_port: Default proxy port for residential proxies.
|
|
135
|
+
timeout: Default timeout for proxy requests.
|
|
136
|
+
api_timeout: Default timeout for API requests.
|
|
137
|
+
retry_config: Configuration for retry behavior.
|
|
138
|
+
auth_mode: Authentication mode for scraper_token ("bearer" or "header_token").
|
|
139
|
+
scraperapi_base_url: Override base URL for SERP API.
|
|
140
|
+
universalapi_base_url: Override base URL for Universal Scraping API.
|
|
141
|
+
web_scraper_api_base_url: Override base URL for Web Scraper API.
|
|
142
|
+
locations_base_url: Override base URL for Locations API.
|
|
143
|
+
"""
|
|
119
144
|
self.scraper_token = scraper_token
|
|
120
145
|
self.public_token = public_token
|
|
121
146
|
self.public_key = public_key
|
|
@@ -207,6 +232,11 @@ class AsyncThordataClient:
|
|
|
207
232
|
|
|
208
233
|
# Namespaced Access (e.g. client.serp.google.maps(...))
|
|
209
234
|
self.serp = AsyncSerpNamespace(self)
|
|
235
|
+
self.unlimited = AsyncUnlimitedNamespace(self)
|
|
236
|
+
|
|
237
|
+
# =========================================================================
|
|
238
|
+
# Context Manager
|
|
239
|
+
# =========================================================================
|
|
210
240
|
|
|
211
241
|
async def __aenter__(self) -> AsyncThordataClient:
|
|
212
242
|
"""Async context manager entry."""
|
|
@@ -248,8 +278,7 @@ class AsyncThordataClient:
|
|
|
248
278
|
proxy_config: ProxyConfig | None = None,
|
|
249
279
|
**kwargs: Any,
|
|
250
280
|
) -> aiohttp.ClientResponse:
|
|
251
|
-
"""
|
|
252
|
-
Send an async GET request through the Proxy Network.
|
|
281
|
+
"""Send an async GET request through the Proxy Network.
|
|
253
282
|
|
|
254
283
|
Args:
|
|
255
284
|
url: The target URL.
|
|
@@ -258,6 +287,10 @@ class AsyncThordataClient:
|
|
|
258
287
|
|
|
259
288
|
Returns:
|
|
260
289
|
The aiohttp response object.
|
|
290
|
+
|
|
291
|
+
Note:
|
|
292
|
+
aiohttp has limited support for HTTPS proxies (TLS to proxy / TLS-in-TLS).
|
|
293
|
+
For HTTPS proxy endpoints, please use ThordataClient.get/post (sync client).
|
|
261
294
|
"""
|
|
262
295
|
session = self._get_session()
|
|
263
296
|
|
|
@@ -270,16 +303,13 @@ class AsyncThordataClient:
|
|
|
270
303
|
raise ThordataConfigError(
|
|
271
304
|
"Proxy credentials are missing. "
|
|
272
305
|
"Pass proxy_config=ProxyConfig(username=..., password=..., product=...) "
|
|
273
|
-
"or set THORDATA_RESIDENTIAL_USERNAME/THORDATA_RESIDENTIAL_PASSWORD
|
|
306
|
+
"or set THORDATA_RESIDENTIAL_USERNAME/THORDATA_RESIDENTIAL_PASSWORD."
|
|
274
307
|
)
|
|
275
308
|
|
|
276
|
-
# aiohttp has limited support for "https://" proxies (TLS to proxy / TLS-in-TLS).
|
|
277
|
-
# Your account's proxy endpoint requires HTTPS proxy, so we explicitly block here
|
|
278
|
-
# to avoid confusing "it always fails" behavior.
|
|
279
309
|
if getattr(proxy_config, "protocol", "http").lower() == "https":
|
|
280
310
|
raise ThordataConfigError(
|
|
281
|
-
"Proxy Network requires an HTTPS proxy endpoint
|
|
282
|
-
"aiohttp support for 'https://' proxies is limited
|
|
311
|
+
"Proxy Network requires an HTTPS proxy endpoint. "
|
|
312
|
+
"aiohttp support for 'https://' proxies is limited. "
|
|
283
313
|
"Please use ThordataClient.get/post (sync client) for Proxy Network requests."
|
|
284
314
|
)
|
|
285
315
|
proxy_url, proxy_auth = proxy_config.to_aiohttp_config()
|
|
@@ -304,8 +334,7 @@ class AsyncThordataClient:
|
|
|
304
334
|
proxy_config: ProxyConfig | None = None,
|
|
305
335
|
**kwargs: Any,
|
|
306
336
|
) -> aiohttp.ClientResponse:
|
|
307
|
-
"""
|
|
308
|
-
Send an async POST request through the Proxy Network.
|
|
337
|
+
"""Send an async POST request through the Proxy Network.
|
|
309
338
|
|
|
310
339
|
Args:
|
|
311
340
|
url: The target URL.
|
|
@@ -326,16 +355,13 @@ class AsyncThordataClient:
|
|
|
326
355
|
raise ThordataConfigError(
|
|
327
356
|
"Proxy credentials are missing. "
|
|
328
357
|
"Pass proxy_config=ProxyConfig(username=..., password=..., product=...) "
|
|
329
|
-
"or set THORDATA_RESIDENTIAL_USERNAME/THORDATA_RESIDENTIAL_PASSWORD
|
|
358
|
+
"or set THORDATA_RESIDENTIAL_USERNAME/THORDATA_RESIDENTIAL_PASSWORD."
|
|
330
359
|
)
|
|
331
360
|
|
|
332
|
-
# aiohttp has limited support for "https://" proxies (TLS to proxy / TLS-in-TLS).
|
|
333
|
-
# Your account's proxy endpoint requires HTTPS proxy, so we explicitly block here
|
|
334
|
-
# to avoid confusing "it always fails" behavior.
|
|
335
361
|
if getattr(proxy_config, "protocol", "http").lower() == "https":
|
|
336
362
|
raise ThordataConfigError(
|
|
337
|
-
"Proxy Network requires an HTTPS proxy endpoint
|
|
338
|
-
"aiohttp support for 'https://' proxies is limited
|
|
363
|
+
"Proxy Network requires an HTTPS proxy endpoint. "
|
|
364
|
+
"aiohttp support for 'https://' proxies is limited. "
|
|
339
365
|
"Please use ThordataClient.get/post (sync client) for Proxy Network requests."
|
|
340
366
|
)
|
|
341
367
|
proxy_url, proxy_auth = proxy_config.to_aiohttp_config()
|
|
@@ -372,18 +398,17 @@ class AsyncThordataClient:
|
|
|
372
398
|
output_format: str = "json",
|
|
373
399
|
**kwargs: Any,
|
|
374
400
|
) -> dict[str, Any]:
|
|
375
|
-
"""
|
|
376
|
-
Execute an async SERP search.
|
|
401
|
+
"""Execute an async SERP search.
|
|
377
402
|
|
|
378
403
|
Args:
|
|
379
404
|
query: Search keywords.
|
|
380
|
-
engine: Search engine.
|
|
405
|
+
engine: Search engine (GOOGLE, BING, etc.).
|
|
381
406
|
num: Number of results.
|
|
382
407
|
country: Country code for localization.
|
|
383
408
|
language: Language code.
|
|
384
|
-
search_type: Type of search.
|
|
409
|
+
search_type: Type of search (images, news, video, etc.).
|
|
385
410
|
device: Device type ('desktop', 'mobile', 'tablet').
|
|
386
|
-
render_js: Enable JavaScript rendering
|
|
411
|
+
render_js: Enable JavaScript rendering.
|
|
387
412
|
no_cache: Disable internal caching.
|
|
388
413
|
output_format: 'json' or 'html'.
|
|
389
414
|
**kwargs: Additional parameters.
|
|
@@ -456,8 +481,13 @@ class AsyncThordataClient:
|
|
|
456
481
|
) from e
|
|
457
482
|
|
|
458
483
|
async def serp_search_advanced(self, request: SerpRequest) -> dict[str, Any]:
|
|
459
|
-
"""
|
|
460
|
-
|
|
484
|
+
"""Execute an async SERP search using a SerpRequest object.
|
|
485
|
+
|
|
486
|
+
Args:
|
|
487
|
+
request: SerpRequest object with search parameters.
|
|
488
|
+
|
|
489
|
+
Returns:
|
|
490
|
+
Parsed search results.
|
|
461
491
|
"""
|
|
462
492
|
session = self._get_session()
|
|
463
493
|
if not self.scraper_token:
|
|
@@ -521,17 +551,16 @@ class AsyncThordataClient:
|
|
|
521
551
|
wait_for: str | None = None,
|
|
522
552
|
**kwargs: Any,
|
|
523
553
|
) -> str | bytes:
|
|
524
|
-
"""
|
|
525
|
-
Async scrape using Universal API (Web Unlocker).
|
|
554
|
+
"""Async scrape using Universal API (Web Unlocker).
|
|
526
555
|
|
|
527
556
|
Args:
|
|
528
557
|
url: Target URL.
|
|
529
558
|
js_render: Enable JavaScript rendering.
|
|
530
559
|
output_format: "html" or "png".
|
|
531
560
|
country: Geo-targeting country.
|
|
532
|
-
block_resources: Resources to block.
|
|
533
|
-
wait: Wait time in
|
|
534
|
-
wait_for: CSS selector to wait for.
|
|
561
|
+
block_resources: Resources to block (e.g., "script,css").
|
|
562
|
+
wait: Wait time in milliseconds before fetching.
|
|
563
|
+
wait_for: CSS selector to wait for before fetching.
|
|
535
564
|
|
|
536
565
|
Returns:
|
|
537
566
|
HTML string or PNG bytes.
|
|
@@ -552,8 +581,13 @@ class AsyncThordataClient:
|
|
|
552
581
|
async def universal_scrape_advanced(
|
|
553
582
|
self, request: UniversalScrapeRequest
|
|
554
583
|
) -> str | bytes:
|
|
555
|
-
"""
|
|
556
|
-
|
|
584
|
+
"""Async scrape using a UniversalScrapeRequest object.
|
|
585
|
+
|
|
586
|
+
Args:
|
|
587
|
+
request: UniversalScrapeRequest object with scrape parameters.
|
|
588
|
+
|
|
589
|
+
Returns:
|
|
590
|
+
HTML string or PNG bytes.
|
|
557
591
|
"""
|
|
558
592
|
session = self._get_session()
|
|
559
593
|
if not self.scraper_token:
|
|
@@ -604,7 +638,7 @@ class AsyncThordataClient:
|
|
|
604
638
|
) from e
|
|
605
639
|
|
|
606
640
|
# =========================================================================
|
|
607
|
-
# Web Scraper API
|
|
641
|
+
# Web Scraper API - Task Management
|
|
608
642
|
# =========================================================================
|
|
609
643
|
|
|
610
644
|
async def create_scraper_task(
|
|
@@ -615,8 +649,17 @@ class AsyncThordataClient:
|
|
|
615
649
|
parameters: dict[str, Any],
|
|
616
650
|
universal_params: dict[str, Any] | None = None,
|
|
617
651
|
) -> str:
|
|
618
|
-
"""
|
|
619
|
-
|
|
652
|
+
"""Create an async Web Scraper task.
|
|
653
|
+
|
|
654
|
+
Args:
|
|
655
|
+
file_name: Name for the output file (supports {{TasksID}} template).
|
|
656
|
+
spider_id: Spider identifier from Dashboard.
|
|
657
|
+
spider_name: Spider name (target domain, e.g., "amazon.com").
|
|
658
|
+
parameters: Spider-specific parameters.
|
|
659
|
+
universal_params: Global spider settings.
|
|
660
|
+
|
|
661
|
+
Returns:
|
|
662
|
+
Task ID.
|
|
620
663
|
"""
|
|
621
664
|
config = ScraperTaskConfig(
|
|
622
665
|
file_name=file_name,
|
|
@@ -629,8 +672,13 @@ class AsyncThordataClient:
|
|
|
629
672
|
return await self.create_scraper_task_advanced(config)
|
|
630
673
|
|
|
631
674
|
async def create_scraper_task_advanced(self, config: ScraperTaskConfig) -> str:
|
|
632
|
-
"""
|
|
633
|
-
|
|
675
|
+
"""Create a task using ScraperTaskConfig.
|
|
676
|
+
|
|
677
|
+
Args:
|
|
678
|
+
config: ScraperTaskConfig object with task configuration.
|
|
679
|
+
|
|
680
|
+
Returns:
|
|
681
|
+
Task ID.
|
|
634
682
|
"""
|
|
635
683
|
self._require_public_credentials()
|
|
636
684
|
session = self._get_session()
|
|
@@ -676,10 +724,18 @@ class AsyncThordataClient:
|
|
|
676
724
|
parameters: dict[str, Any],
|
|
677
725
|
common_settings: CommonSettings,
|
|
678
726
|
) -> str:
|
|
679
|
-
"""
|
|
680
|
-
Create a YouTube video/audio download task.
|
|
681
|
-
"""
|
|
727
|
+
"""Create a YouTube video/audio download task.
|
|
682
728
|
|
|
729
|
+
Args:
|
|
730
|
+
file_name: Name for the output file.
|
|
731
|
+
spider_id: Spider identifier (e.g., "youtube_video_by-url").
|
|
732
|
+
spider_name: Target site (e.g., "youtube.com").
|
|
733
|
+
parameters: Spider-specific parameters (URLs, etc.).
|
|
734
|
+
common_settings: Video/audio settings (resolution, subtitles, etc.).
|
|
735
|
+
|
|
736
|
+
Returns:
|
|
737
|
+
Task ID.
|
|
738
|
+
"""
|
|
683
739
|
config = VideoTaskConfig(
|
|
684
740
|
file_name=file_name,
|
|
685
741
|
spider_id=spider_id,
|
|
@@ -691,10 +747,14 @@ class AsyncThordataClient:
|
|
|
691
747
|
return await self.create_video_task_advanced(config)
|
|
692
748
|
|
|
693
749
|
async def create_video_task_advanced(self, config: VideoTaskConfig) -> str:
|
|
694
|
-
"""
|
|
695
|
-
|
|
696
|
-
|
|
750
|
+
"""Create a video task using VideoTaskConfig object.
|
|
751
|
+
|
|
752
|
+
Args:
|
|
753
|
+
config: VideoTaskConfig object with task configuration.
|
|
697
754
|
|
|
755
|
+
Returns:
|
|
756
|
+
Task ID.
|
|
757
|
+
"""
|
|
698
758
|
self._require_public_credentials()
|
|
699
759
|
session = self._get_session()
|
|
700
760
|
if not self.scraper_token:
|
|
@@ -742,12 +802,17 @@ class AsyncThordataClient:
|
|
|
742
802
|
) from e
|
|
743
803
|
|
|
744
804
|
async def get_task_status(self, task_id: str) -> str:
|
|
745
|
-
"""
|
|
746
|
-
|
|
805
|
+
"""Check async task status.
|
|
806
|
+
|
|
807
|
+
Args:
|
|
808
|
+
task_id: Task identifier.
|
|
809
|
+
|
|
810
|
+
Returns:
|
|
811
|
+
Status string (running, success, failed, etc.).
|
|
747
812
|
|
|
748
813
|
Raises:
|
|
749
814
|
ThordataConfigError: If public credentials are missing.
|
|
750
|
-
ThordataAPIError: If API returns a non-200 code
|
|
815
|
+
ThordataAPIError: If API returns a non-200 code.
|
|
751
816
|
ThordataNetworkError: If network/HTTP request fails.
|
|
752
817
|
"""
|
|
753
818
|
self._require_public_credentials()
|
|
@@ -797,8 +862,7 @@ class AsyncThordataClient:
|
|
|
797
862
|
) from e
|
|
798
863
|
|
|
799
864
|
async def safe_get_task_status(self, task_id: str) -> str:
|
|
800
|
-
"""
|
|
801
|
-
Backward-compatible status check.
|
|
865
|
+
"""Backward-compatible status check.
|
|
802
866
|
|
|
803
867
|
Returns:
|
|
804
868
|
Status string, or "error" on any exception.
|
|
@@ -809,8 +873,14 @@ class AsyncThordataClient:
|
|
|
809
873
|
return "error"
|
|
810
874
|
|
|
811
875
|
async def get_task_result(self, task_id: str, file_type: str = "json") -> str:
|
|
812
|
-
"""
|
|
813
|
-
|
|
876
|
+
"""Get download URL for completed task.
|
|
877
|
+
|
|
878
|
+
Args:
|
|
879
|
+
task_id: Task identifier.
|
|
880
|
+
file_type: File type to download (json, csv, video, audio, subtitle).
|
|
881
|
+
|
|
882
|
+
Returns:
|
|
883
|
+
Download URL.
|
|
814
884
|
"""
|
|
815
885
|
self._require_public_credentials()
|
|
816
886
|
session = self._get_session()
|
|
@@ -847,8 +917,7 @@ class AsyncThordataClient:
|
|
|
847
917
|
page: int = 1,
|
|
848
918
|
size: int = 20,
|
|
849
919
|
) -> dict[str, Any]:
|
|
850
|
-
"""
|
|
851
|
-
List all Web Scraper tasks.
|
|
920
|
+
"""List all Web Scraper tasks.
|
|
852
921
|
|
|
853
922
|
Args:
|
|
854
923
|
page: Page number (starts from 1).
|
|
@@ -904,10 +973,16 @@ class AsyncThordataClient:
|
|
|
904
973
|
poll_interval: float = 5.0,
|
|
905
974
|
max_wait: float = 600.0,
|
|
906
975
|
) -> str:
|
|
907
|
-
"""
|
|
908
|
-
Wait for a task to complete.
|
|
909
|
-
"""
|
|
976
|
+
"""Wait for a task to complete.
|
|
910
977
|
|
|
978
|
+
Args:
|
|
979
|
+
task_id: Task identifier.
|
|
980
|
+
poll_interval: Polling interval in seconds.
|
|
981
|
+
max_wait: Maximum time to wait in seconds.
|
|
982
|
+
|
|
983
|
+
Returns:
|
|
984
|
+
Final status of the task.
|
|
985
|
+
"""
|
|
911
986
|
import time
|
|
912
987
|
|
|
913
988
|
start = time.monotonic()
|
|
@@ -945,25 +1020,51 @@ class AsyncThordataClient:
|
|
|
945
1020
|
initial_poll_interval: float = 2.0,
|
|
946
1021
|
max_poll_interval: float = 10.0,
|
|
947
1022
|
include_errors: bool = True,
|
|
1023
|
+
task_type: str = "web",
|
|
1024
|
+
common_settings: CommonSettings | None = None,
|
|
948
1025
|
) -> str:
|
|
949
|
-
"""
|
|
950
|
-
Async high-level wrapper to Run a Web Scraper task and wait for result.
|
|
1026
|
+
"""Async high-level wrapper to run a task and wait for result.
|
|
951
1027
|
|
|
952
1028
|
Lifecycle: Create -> Poll (Backoff) -> Get Download URL.
|
|
953
1029
|
|
|
1030
|
+
Args:
|
|
1031
|
+
file_name: Name for the output file.
|
|
1032
|
+
spider_id: Spider identifier from Dashboard.
|
|
1033
|
+
spider_name: Spider name (target domain).
|
|
1034
|
+
parameters: Spider-specific parameters.
|
|
1035
|
+
universal_params: Global spider settings.
|
|
1036
|
+
max_wait: Maximum seconds to wait for completion.
|
|
1037
|
+
initial_poll_interval: Starting poll interval in seconds.
|
|
1038
|
+
max_poll_interval: Maximum poll interval cap.
|
|
1039
|
+
include_errors: Whether to include error logs.
|
|
1040
|
+
|
|
954
1041
|
Returns:
|
|
955
|
-
|
|
1042
|
+
The download URL.
|
|
956
1043
|
"""
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
1044
|
+
if task_type == "video":
|
|
1045
|
+
if common_settings is None:
|
|
1046
|
+
raise ValueError("common_settings is required for video tasks")
|
|
1047
|
+
|
|
1048
|
+
config_video = VideoTaskConfig(
|
|
1049
|
+
file_name=file_name,
|
|
1050
|
+
spider_id=spider_id,
|
|
1051
|
+
spider_name=spider_name,
|
|
1052
|
+
parameters=parameters,
|
|
1053
|
+
common_settings=common_settings,
|
|
1054
|
+
include_errors=include_errors,
|
|
1055
|
+
)
|
|
1056
|
+
task_id = await self.create_video_task_advanced(config_video)
|
|
1057
|
+
else:
|
|
1058
|
+
config = ScraperTaskConfig(
|
|
1059
|
+
file_name=file_name,
|
|
1060
|
+
spider_id=spider_id,
|
|
1061
|
+
spider_name=spider_name,
|
|
1062
|
+
parameters=parameters,
|
|
1063
|
+
universal_params=universal_params,
|
|
1064
|
+
include_errors=include_errors,
|
|
1065
|
+
)
|
|
1066
|
+
task_id = await self.create_scraper_task_advanced(config)
|
|
1067
|
+
|
|
967
1068
|
logger.info(f"Async Task created: {task_id}. Polling...")
|
|
968
1069
|
|
|
969
1070
|
# 2. Poll Status
|
|
@@ -978,7 +1079,6 @@ class AsyncThordataClient:
|
|
|
978
1079
|
|
|
979
1080
|
if status_lower in {"ready", "success", "finished"}:
|
|
980
1081
|
logger.info(f"Task {task_id} ready.")
|
|
981
|
-
# 3. Get Result
|
|
982
1082
|
return await self.get_task_result(task_id)
|
|
983
1083
|
|
|
984
1084
|
if status_lower in {"failed", "error", "cancelled"}:
|
|
@@ -992,7 +1092,7 @@ class AsyncThordataClient:
|
|
|
992
1092
|
raise ThordataTimeoutError(f"Async Task {task_id} timed out after {max_wait}s")
|
|
993
1093
|
|
|
994
1094
|
# =========================================================================
|
|
995
|
-
#
|
|
1095
|
+
# Account & Usage Methods
|
|
996
1096
|
# =========================================================================
|
|
997
1097
|
|
|
998
1098
|
async def get_usage_statistics(
|
|
@@ -1000,8 +1100,7 @@ class AsyncThordataClient:
|
|
|
1000
1100
|
from_date: str | date,
|
|
1001
1101
|
to_date: str | date,
|
|
1002
1102
|
) -> UsageStatistics:
|
|
1003
|
-
"""
|
|
1004
|
-
Get account usage statistics for a date range.
|
|
1103
|
+
"""Get account usage statistics for a date range.
|
|
1005
1104
|
|
|
1006
1105
|
Args:
|
|
1007
1106
|
from_date: Start date (YYYY-MM-DD string or date object).
|
|
@@ -1010,7 +1109,6 @@ class AsyncThordataClient:
|
|
|
1010
1109
|
Returns:
|
|
1011
1110
|
UsageStatistics object with traffic data.
|
|
1012
1111
|
"""
|
|
1013
|
-
|
|
1014
1112
|
self._require_public_credentials()
|
|
1015
1113
|
session = self._get_session()
|
|
1016
1114
|
|
|
@@ -1066,10 +1164,12 @@ class AsyncThordataClient:
|
|
|
1066
1164
|
) from e
|
|
1067
1165
|
|
|
1068
1166
|
async def get_residential_balance(self) -> dict[str, Any]:
|
|
1069
|
-
"""
|
|
1070
|
-
Get residential proxy balance.
|
|
1167
|
+
"""Get residential proxy balance.
|
|
1071
1168
|
|
|
1072
|
-
Uses public_token/public_key.
|
|
1169
|
+
Uses public_token/public_key via gateway API.
|
|
1170
|
+
|
|
1171
|
+
Returns:
|
|
1172
|
+
Balance data dictionary.
|
|
1073
1173
|
"""
|
|
1074
1174
|
session = self._get_session()
|
|
1075
1175
|
headers = self._build_gateway_headers()
|
|
@@ -1109,10 +1209,16 @@ class AsyncThordataClient:
|
|
|
1109
1209
|
start_time: str | int,
|
|
1110
1210
|
end_time: str | int,
|
|
1111
1211
|
) -> dict[str, Any]:
|
|
1112
|
-
"""
|
|
1113
|
-
|
|
1212
|
+
"""Get residential proxy usage records.
|
|
1213
|
+
|
|
1214
|
+
Uses public_token/public_key via gateway API.
|
|
1114
1215
|
|
|
1115
|
-
|
|
1216
|
+
Args:
|
|
1217
|
+
start_time: Start timestamp or date string.
|
|
1218
|
+
end_time: End timestamp or date string.
|
|
1219
|
+
|
|
1220
|
+
Returns:
|
|
1221
|
+
Usage data dictionary.
|
|
1116
1222
|
"""
|
|
1117
1223
|
session = self._get_session()
|
|
1118
1224
|
headers = self._build_gateway_headers()
|
|
@@ -1146,11 +1252,186 @@ class AsyncThordataClient:
|
|
|
1146
1252
|
f"Get usage failed: {e}", original_error=e
|
|
1147
1253
|
) from e
|
|
1148
1254
|
|
|
1255
|
+
async def get_traffic_balance(self) -> float:
|
|
1256
|
+
"""Get traffic balance in KB via Public API."""
|
|
1257
|
+
self._require_public_credentials()
|
|
1258
|
+
# FIX: Ensure params are strings and dict structure satisfies type checker
|
|
1259
|
+
# _require_public_credentials ensures tokens are not None at runtime,
|
|
1260
|
+
# but for type checking we cast or assert.
|
|
1261
|
+
params = {
|
|
1262
|
+
"token": str(self.public_token),
|
|
1263
|
+
"key": str(self.public_key),
|
|
1264
|
+
}
|
|
1265
|
+
api_base = self._locations_base_url.replace("/locations", "")
|
|
1266
|
+
|
|
1267
|
+
try:
|
|
1268
|
+
async with self._get_session().get(
|
|
1269
|
+
f"{api_base}/account/traffic-balance", params=params
|
|
1270
|
+
) as resp:
|
|
1271
|
+
data = await resp.json()
|
|
1272
|
+
if data.get("code") != 200:
|
|
1273
|
+
raise_for_code(
|
|
1274
|
+
"Get traffic balance failed",
|
|
1275
|
+
code=data.get("code"),
|
|
1276
|
+
payload=data,
|
|
1277
|
+
)
|
|
1278
|
+
return float(data.get("data", {}).get("traffic_balance", 0))
|
|
1279
|
+
except aiohttp.ClientError as e:
|
|
1280
|
+
raise ThordataNetworkError(f"Request failed: {e}", original_error=e) from e
|
|
1281
|
+
|
|
1282
|
+
async def get_proxy_user_usage(
|
|
1283
|
+
self,
|
|
1284
|
+
username: str,
|
|
1285
|
+
start_date: str | date,
|
|
1286
|
+
end_date: str | date,
|
|
1287
|
+
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
1288
|
+
) -> list[dict[str, Any]]:
|
|
1289
|
+
"""Get user usage statistics."""
|
|
1290
|
+
self._require_public_credentials()
|
|
1291
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1292
|
+
|
|
1293
|
+
if isinstance(start_date, date):
|
|
1294
|
+
start_date = start_date.strftime("%Y-%m-%d")
|
|
1295
|
+
if isinstance(end_date, date):
|
|
1296
|
+
end_date = end_date.strftime("%Y-%m-%d")
|
|
1297
|
+
|
|
1298
|
+
params = {
|
|
1299
|
+
"token": self.public_token,
|
|
1300
|
+
"key": self.public_key,
|
|
1301
|
+
"proxy_type": str(pt),
|
|
1302
|
+
"username": username,
|
|
1303
|
+
"from_date": start_date,
|
|
1304
|
+
"to_date": end_date,
|
|
1305
|
+
}
|
|
1306
|
+
|
|
1307
|
+
try:
|
|
1308
|
+
async with self._get_session().get(
|
|
1309
|
+
f"{self._proxy_users_url}/usage-statistics", params=params
|
|
1310
|
+
) as resp:
|
|
1311
|
+
data = await resp.json()
|
|
1312
|
+
if data.get("code") != 200:
|
|
1313
|
+
raise_for_code(
|
|
1314
|
+
"Get user usage failed", code=data.get("code"), payload=data
|
|
1315
|
+
)
|
|
1316
|
+
return data.get("data") or []
|
|
1317
|
+
except aiohttp.ClientError as e:
|
|
1318
|
+
raise ThordataNetworkError(f"Request failed: {e}", original_error=e) from e
|
|
1319
|
+
|
|
1320
|
+
async def extract_ip_list(
|
|
1321
|
+
self,
|
|
1322
|
+
num: int = 1,
|
|
1323
|
+
country: str | None = None,
|
|
1324
|
+
state: str | None = None,
|
|
1325
|
+
city: str | None = None,
|
|
1326
|
+
time_limit: int | None = None,
|
|
1327
|
+
port: int | None = None,
|
|
1328
|
+
return_type: str = "txt",
|
|
1329
|
+
protocol: str = "http",
|
|
1330
|
+
sep: str = "\r\n",
|
|
1331
|
+
product: str = "residential",
|
|
1332
|
+
) -> list[str]:
|
|
1333
|
+
"""Async extract IPs."""
|
|
1334
|
+
base_url = "https://get-ip.thordata.net"
|
|
1335
|
+
endpoint = "/unlimited_api" if product == "unlimited" else "/api"
|
|
1336
|
+
|
|
1337
|
+
params: dict[str, Any] = {
|
|
1338
|
+
"num": str(num),
|
|
1339
|
+
"return_type": return_type,
|
|
1340
|
+
"protocol": protocol,
|
|
1341
|
+
"sep": sep,
|
|
1342
|
+
}
|
|
1343
|
+
if country:
|
|
1344
|
+
params["country"] = country
|
|
1345
|
+
if state:
|
|
1346
|
+
params["state"] = state
|
|
1347
|
+
if city:
|
|
1348
|
+
params["city"] = city
|
|
1349
|
+
if time_limit:
|
|
1350
|
+
params["time"] = str(time_limit)
|
|
1351
|
+
if port:
|
|
1352
|
+
params["port"] = str(port)
|
|
1353
|
+
|
|
1354
|
+
username = os.getenv("THORDATA_RESIDENTIAL_USERNAME")
|
|
1355
|
+
if username:
|
|
1356
|
+
params["td-customer"] = username
|
|
1357
|
+
|
|
1358
|
+
try:
|
|
1359
|
+
async with self._get_session().get(
|
|
1360
|
+
f"{base_url}{endpoint}", params=params
|
|
1361
|
+
) as resp:
|
|
1362
|
+
if return_type == "json":
|
|
1363
|
+
data = await resp.json()
|
|
1364
|
+
if isinstance(data, dict):
|
|
1365
|
+
if data.get("code") == 0 or data.get("code") == 200:
|
|
1366
|
+
raw_list = data.get("data") or []
|
|
1367
|
+
return [f"{item['ip']}:{item['port']}" for item in raw_list]
|
|
1368
|
+
else:
|
|
1369
|
+
raise_for_code(
|
|
1370
|
+
"Extract IPs failed",
|
|
1371
|
+
code=data.get("code"),
|
|
1372
|
+
payload=data,
|
|
1373
|
+
)
|
|
1374
|
+
return []
|
|
1375
|
+
else:
|
|
1376
|
+
text = await resp.text()
|
|
1377
|
+
text = text.strip()
|
|
1378
|
+
if text.startswith("{") and "code" in text:
|
|
1379
|
+
try:
|
|
1380
|
+
err_data = json.loads(text)
|
|
1381
|
+
raise_for_code(
|
|
1382
|
+
"Extract IPs failed",
|
|
1383
|
+
code=err_data.get("code"),
|
|
1384
|
+
payload=err_data,
|
|
1385
|
+
)
|
|
1386
|
+
except json.JSONDecodeError:
|
|
1387
|
+
pass
|
|
1388
|
+
|
|
1389
|
+
actual_sep = sep.replace("\\r", "\r").replace("\\n", "\n")
|
|
1390
|
+
return [
|
|
1391
|
+
line.strip() for line in text.split(actual_sep) if line.strip()
|
|
1392
|
+
]
|
|
1393
|
+
|
|
1394
|
+
except aiohttp.ClientError as e:
|
|
1395
|
+
raise ThordataNetworkError(f"Request failed: {e}", original_error=e) from e
|
|
1396
|
+
|
|
1397
|
+
async def get_wallet_balance(self) -> float:
|
|
1398
|
+
"""Get wallet balance via Public API."""
|
|
1399
|
+
self._require_public_credentials()
|
|
1400
|
+
# FIX: Ensure params are strings
|
|
1401
|
+
params = {
|
|
1402
|
+
"token": str(self.public_token),
|
|
1403
|
+
"key": str(self.public_key),
|
|
1404
|
+
}
|
|
1405
|
+
api_base = self._locations_base_url.replace("/locations", "")
|
|
1406
|
+
|
|
1407
|
+
try:
|
|
1408
|
+
async with self._get_session().get(
|
|
1409
|
+
f"{api_base}/account/wallet-balance", params=params
|
|
1410
|
+
) as resp:
|
|
1411
|
+
data = await resp.json()
|
|
1412
|
+
if data.get("code") != 200:
|
|
1413
|
+
raise_for_code(
|
|
1414
|
+
"Get wallet balance failed", code=data.get("code"), payload=data
|
|
1415
|
+
)
|
|
1416
|
+
return float(data.get("data", {}).get("balance", 0))
|
|
1417
|
+
except aiohttp.ClientError as e:
|
|
1418
|
+
raise ThordataNetworkError(f"Request failed: {e}", original_error=e) from e
|
|
1419
|
+
|
|
1420
|
+
# =========================================================================
|
|
1421
|
+
# Proxy Users Management (Sub-accounts)
|
|
1422
|
+
# =========================================================================
|
|
1423
|
+
|
|
1149
1424
|
async def list_proxy_users(
|
|
1150
1425
|
self, proxy_type: ProxyType | int = ProxyType.RESIDENTIAL
|
|
1151
1426
|
) -> ProxyUserList:
|
|
1152
|
-
"""List all proxy users (sub-accounts).
|
|
1427
|
+
"""List all proxy users (sub-accounts).
|
|
1428
|
+
|
|
1429
|
+
Args:
|
|
1430
|
+
proxy_type: Proxy product type.
|
|
1153
1431
|
|
|
1432
|
+
Returns:
|
|
1433
|
+
ProxyUserList with user information.
|
|
1434
|
+
"""
|
|
1154
1435
|
self._require_public_credentials()
|
|
1155
1436
|
session = self._get_session()
|
|
1156
1437
|
|
|
@@ -1206,7 +1487,18 @@ class AsyncThordataClient:
|
|
|
1206
1487
|
traffic_limit: int = 0,
|
|
1207
1488
|
status: bool = True,
|
|
1208
1489
|
) -> dict[str, Any]:
|
|
1209
|
-
"""Create a new proxy user (sub-account).
|
|
1490
|
+
"""Create a new proxy user (sub-account).
|
|
1491
|
+
|
|
1492
|
+
Args:
|
|
1493
|
+
username: Sub-account username.
|
|
1494
|
+
password: Sub-account password.
|
|
1495
|
+
proxy_type: Proxy product type.
|
|
1496
|
+
traffic_limit: Traffic limit in MB (0 = unlimited).
|
|
1497
|
+
status: Enable or disable the account.
|
|
1498
|
+
|
|
1499
|
+
Returns:
|
|
1500
|
+
API response data.
|
|
1501
|
+
"""
|
|
1210
1502
|
self._require_public_credentials()
|
|
1211
1503
|
session = self._get_session()
|
|
1212
1504
|
|
|
@@ -1254,14 +1546,132 @@ class AsyncThordataClient:
|
|
|
1254
1546
|
f"Create user failed: {e}", original_error=e
|
|
1255
1547
|
) from e
|
|
1256
1548
|
|
|
1549
|
+
async def update_proxy_user(
|
|
1550
|
+
self,
|
|
1551
|
+
username: str,
|
|
1552
|
+
password: str, # Added password
|
|
1553
|
+
traffic_limit: int | None = None,
|
|
1554
|
+
status: bool | None = None,
|
|
1555
|
+
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
1556
|
+
) -> dict[str, Any]:
|
|
1557
|
+
"""Update a proxy user."""
|
|
1558
|
+
self._require_public_credentials()
|
|
1559
|
+
session = self._get_session()
|
|
1560
|
+
headers = build_public_api_headers(
|
|
1561
|
+
self.public_token or "", self.public_key or ""
|
|
1562
|
+
)
|
|
1563
|
+
|
|
1564
|
+
payload = {
|
|
1565
|
+
"proxy_type": str(
|
|
1566
|
+
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1567
|
+
),
|
|
1568
|
+
"username": username,
|
|
1569
|
+
"password": password, # Include password
|
|
1570
|
+
}
|
|
1571
|
+
if traffic_limit is not None:
|
|
1572
|
+
payload["traffic_limit"] = str(traffic_limit)
|
|
1573
|
+
if status is not None:
|
|
1574
|
+
payload["status"] = "true" if status else "false"
|
|
1575
|
+
|
|
1576
|
+
try:
|
|
1577
|
+
async with session.post(
|
|
1578
|
+
f"{self._proxy_users_url}/update-user",
|
|
1579
|
+
data=payload,
|
|
1580
|
+
headers=headers,
|
|
1581
|
+
timeout=self._api_timeout,
|
|
1582
|
+
) as response:
|
|
1583
|
+
response.raise_for_status()
|
|
1584
|
+
data = await response.json()
|
|
1585
|
+
|
|
1586
|
+
if data.get("code") != 200:
|
|
1587
|
+
raise_for_code(
|
|
1588
|
+
f"Update user failed: {data.get('msg')}",
|
|
1589
|
+
code=data.get("code"),
|
|
1590
|
+
payload=data,
|
|
1591
|
+
)
|
|
1592
|
+
|
|
1593
|
+
return data.get("data", {})
|
|
1594
|
+
|
|
1595
|
+
except aiohttp.ClientError as e:
|
|
1596
|
+
raise ThordataNetworkError(
|
|
1597
|
+
f"Update user failed: {e}", original_error=e
|
|
1598
|
+
) from e
|
|
1599
|
+
|
|
1600
|
+
async def delete_proxy_user(
|
|
1601
|
+
self,
|
|
1602
|
+
username: str,
|
|
1603
|
+
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
1604
|
+
) -> dict[str, Any]:
|
|
1605
|
+
"""Delete a proxy user.
|
|
1606
|
+
|
|
1607
|
+
Args:
|
|
1608
|
+
username: The sub-account username.
|
|
1609
|
+
proxy_type: Proxy product type.
|
|
1610
|
+
|
|
1611
|
+
Returns:
|
|
1612
|
+
API response data.
|
|
1613
|
+
"""
|
|
1614
|
+
self._require_public_credentials()
|
|
1615
|
+
session = self._get_session()
|
|
1616
|
+
|
|
1617
|
+
headers = build_public_api_headers(
|
|
1618
|
+
self.public_token or "", self.public_key or ""
|
|
1619
|
+
)
|
|
1620
|
+
|
|
1621
|
+
payload = {
|
|
1622
|
+
"proxy_type": str(
|
|
1623
|
+
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1624
|
+
),
|
|
1625
|
+
"username": username,
|
|
1626
|
+
}
|
|
1627
|
+
|
|
1628
|
+
try:
|
|
1629
|
+
async with session.post(
|
|
1630
|
+
f"{self._proxy_users_url}/delete-user",
|
|
1631
|
+
data=payload,
|
|
1632
|
+
headers=headers,
|
|
1633
|
+
timeout=self._api_timeout,
|
|
1634
|
+
) as response:
|
|
1635
|
+
response.raise_for_status()
|
|
1636
|
+
data = await response.json()
|
|
1637
|
+
|
|
1638
|
+
code = data.get("code")
|
|
1639
|
+
if code != 200:
|
|
1640
|
+
msg = extract_error_message(data)
|
|
1641
|
+
raise_for_code(
|
|
1642
|
+
f"Delete user failed: {msg}", code=code, payload=data
|
|
1643
|
+
)
|
|
1644
|
+
|
|
1645
|
+
return data.get("data", {})
|
|
1646
|
+
|
|
1647
|
+
except asyncio.TimeoutError as e:
|
|
1648
|
+
raise ThordataTimeoutError(
|
|
1649
|
+
f"Delete user timed out: {e}", original_error=e
|
|
1650
|
+
) from e
|
|
1651
|
+
except aiohttp.ClientError as e:
|
|
1652
|
+
raise ThordataNetworkError(
|
|
1653
|
+
f"Delete user failed: {e}", original_error=e
|
|
1654
|
+
) from e
|
|
1655
|
+
|
|
1656
|
+
# =========================================================================
|
|
1657
|
+
# Whitelist IP Management
|
|
1658
|
+
# =========================================================================
|
|
1659
|
+
|
|
1257
1660
|
async def add_whitelist_ip(
|
|
1258
1661
|
self,
|
|
1259
1662
|
ip: str,
|
|
1260
1663
|
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
1261
1664
|
status: bool = True,
|
|
1262
1665
|
) -> dict[str, Any]:
|
|
1263
|
-
"""
|
|
1264
|
-
|
|
1666
|
+
"""Add an IP to the whitelist for IP authentication.
|
|
1667
|
+
|
|
1668
|
+
Args:
|
|
1669
|
+
ip: IP address to whitelist.
|
|
1670
|
+
proxy_type: Proxy product type.
|
|
1671
|
+
status: Enable or disable the whitelist entry.
|
|
1672
|
+
|
|
1673
|
+
Returns:
|
|
1674
|
+
API response data.
|
|
1265
1675
|
"""
|
|
1266
1676
|
self._require_public_credentials()
|
|
1267
1677
|
session = self._get_session()
|
|
@@ -1310,19 +1720,245 @@ class AsyncThordataClient:
|
|
|
1310
1720
|
f"Add whitelist failed: {e}", original_error=e
|
|
1311
1721
|
) from e
|
|
1312
1722
|
|
|
1313
|
-
async def
|
|
1723
|
+
async def delete_whitelist_ip(
|
|
1314
1724
|
self,
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
|
|
1725
|
+
ip: str,
|
|
1726
|
+
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
1727
|
+
) -> dict[str, Any]:
|
|
1728
|
+
"""Delete an IP from the whitelist.
|
|
1729
|
+
|
|
1730
|
+
Args:
|
|
1731
|
+
ip: The IP address to remove.
|
|
1732
|
+
proxy_type: Proxy product type.
|
|
1320
1733
|
|
|
1734
|
+
Returns:
|
|
1735
|
+
API response data.
|
|
1736
|
+
"""
|
|
1321
1737
|
self._require_public_credentials()
|
|
1322
1738
|
session = self._get_session()
|
|
1323
1739
|
|
|
1324
|
-
|
|
1325
|
-
"
|
|
1740
|
+
headers = build_public_api_headers(
|
|
1741
|
+
self.public_token or "", self.public_key or ""
|
|
1742
|
+
)
|
|
1743
|
+
|
|
1744
|
+
payload = {
|
|
1745
|
+
"proxy_type": str(
|
|
1746
|
+
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1747
|
+
),
|
|
1748
|
+
"ip": ip,
|
|
1749
|
+
}
|
|
1750
|
+
|
|
1751
|
+
try:
|
|
1752
|
+
async with session.post(
|
|
1753
|
+
f"{self._whitelist_url}/delete-ip",
|
|
1754
|
+
data=payload,
|
|
1755
|
+
headers=headers,
|
|
1756
|
+
timeout=self._api_timeout,
|
|
1757
|
+
) as response:
|
|
1758
|
+
response.raise_for_status()
|
|
1759
|
+
data = await response.json()
|
|
1760
|
+
|
|
1761
|
+
code = data.get("code")
|
|
1762
|
+
if code != 200:
|
|
1763
|
+
msg = extract_error_message(data)
|
|
1764
|
+
raise_for_code(
|
|
1765
|
+
f"Delete whitelist IP failed: {msg}", code=code, payload=data
|
|
1766
|
+
)
|
|
1767
|
+
|
|
1768
|
+
return data.get("data", {})
|
|
1769
|
+
|
|
1770
|
+
except asyncio.TimeoutError as e:
|
|
1771
|
+
raise ThordataTimeoutError(
|
|
1772
|
+
f"Delete whitelist timed out: {e}", original_error=e
|
|
1773
|
+
) from e
|
|
1774
|
+
except aiohttp.ClientError as e:
|
|
1775
|
+
raise ThordataNetworkError(
|
|
1776
|
+
f"Delete whitelist failed: {e}", original_error=e
|
|
1777
|
+
) from e
|
|
1778
|
+
|
|
1779
|
+
async def list_whitelist_ips(
|
|
1780
|
+
self,
|
|
1781
|
+
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
1782
|
+
) -> list[str]:
|
|
1783
|
+
"""List all whitelisted IPs.
|
|
1784
|
+
|
|
1785
|
+
Args:
|
|
1786
|
+
proxy_type: Proxy product type.
|
|
1787
|
+
|
|
1788
|
+
Returns:
|
|
1789
|
+
List of IP address strings.
|
|
1790
|
+
"""
|
|
1791
|
+
self._require_public_credentials()
|
|
1792
|
+
session = self._get_session()
|
|
1793
|
+
|
|
1794
|
+
params = {
|
|
1795
|
+
k: v
|
|
1796
|
+
for k, v in {
|
|
1797
|
+
"token": self.public_token,
|
|
1798
|
+
"key": self.public_key,
|
|
1799
|
+
"proxy_type": str(
|
|
1800
|
+
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1801
|
+
),
|
|
1802
|
+
}.items()
|
|
1803
|
+
if v is not None
|
|
1804
|
+
}
|
|
1805
|
+
|
|
1806
|
+
try:
|
|
1807
|
+
async with session.get(
|
|
1808
|
+
f"{self._whitelist_url}/ip-list",
|
|
1809
|
+
params=params,
|
|
1810
|
+
timeout=self._api_timeout,
|
|
1811
|
+
) as response:
|
|
1812
|
+
response.raise_for_status()
|
|
1813
|
+
data = await response.json()
|
|
1814
|
+
|
|
1815
|
+
if isinstance(data, dict):
|
|
1816
|
+
code = data.get("code")
|
|
1817
|
+
if code is not None and code != 200:
|
|
1818
|
+
msg = extract_error_message(data)
|
|
1819
|
+
raise_for_code(
|
|
1820
|
+
f"List whitelist IPs error: {msg}", code=code, payload=data
|
|
1821
|
+
)
|
|
1822
|
+
|
|
1823
|
+
items = data.get("data", [])
|
|
1824
|
+
result = []
|
|
1825
|
+
for item in items:
|
|
1826
|
+
if isinstance(item, str):
|
|
1827
|
+
result.append(item)
|
|
1828
|
+
elif isinstance(item, dict) and "ip" in item:
|
|
1829
|
+
result.append(str(item["ip"]))
|
|
1830
|
+
else:
|
|
1831
|
+
result.append(str(item))
|
|
1832
|
+
return result
|
|
1833
|
+
|
|
1834
|
+
raise ThordataNetworkError(
|
|
1835
|
+
f"Unexpected whitelist response: {type(data).__name__}",
|
|
1836
|
+
original_error=None,
|
|
1837
|
+
)
|
|
1838
|
+
|
|
1839
|
+
except asyncio.TimeoutError as e:
|
|
1840
|
+
raise ThordataTimeoutError(
|
|
1841
|
+
f"List whitelist timed out: {e}", original_error=e
|
|
1842
|
+
) from e
|
|
1843
|
+
except aiohttp.ClientError as e:
|
|
1844
|
+
raise ThordataNetworkError(
|
|
1845
|
+
f"List whitelist failed: {e}", original_error=e
|
|
1846
|
+
) from e
|
|
1847
|
+
|
|
1848
|
+
# =========================================================================
|
|
1849
|
+
# Locations & ASN Methods
|
|
1850
|
+
# =========================================================================
|
|
1851
|
+
|
|
1852
|
+
async def list_countries(
|
|
1853
|
+
self, proxy_type: ProxyType | int = ProxyType.RESIDENTIAL
|
|
1854
|
+
) -> list[dict[str, Any]]:
|
|
1855
|
+
"""List supported countries for proxy locations.
|
|
1856
|
+
|
|
1857
|
+
Args:
|
|
1858
|
+
proxy_type: Proxy product type.
|
|
1859
|
+
|
|
1860
|
+
Returns:
|
|
1861
|
+
List of country dictionaries.
|
|
1862
|
+
"""
|
|
1863
|
+
return await self._get_locations(
|
|
1864
|
+
"countries",
|
|
1865
|
+
proxy_type=(
|
|
1866
|
+
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1867
|
+
),
|
|
1868
|
+
)
|
|
1869
|
+
|
|
1870
|
+
async def list_states(
|
|
1871
|
+
self,
|
|
1872
|
+
country_code: str,
|
|
1873
|
+
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
1874
|
+
) -> list[dict[str, Any]]:
|
|
1875
|
+
"""List supported states/provinces for a country.
|
|
1876
|
+
|
|
1877
|
+
Args:
|
|
1878
|
+
country_code: Country code (e.g., "US", "GB").
|
|
1879
|
+
proxy_type: Proxy product type.
|
|
1880
|
+
|
|
1881
|
+
Returns:
|
|
1882
|
+
List of state dictionaries.
|
|
1883
|
+
"""
|
|
1884
|
+
return await self._get_locations(
|
|
1885
|
+
"states",
|
|
1886
|
+
proxy_type=(
|
|
1887
|
+
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1888
|
+
),
|
|
1889
|
+
country_code=country_code,
|
|
1890
|
+
)
|
|
1891
|
+
|
|
1892
|
+
async def list_cities(
|
|
1893
|
+
self,
|
|
1894
|
+
country_code: str,
|
|
1895
|
+
state_code: str | None = None,
|
|
1896
|
+
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
1897
|
+
) -> list[dict[str, Any]]:
|
|
1898
|
+
"""List supported cities for a country/state.
|
|
1899
|
+
|
|
1900
|
+
Args:
|
|
1901
|
+
country_code: Country code.
|
|
1902
|
+
state_code: State code (optional).
|
|
1903
|
+
proxy_type: Proxy product type.
|
|
1904
|
+
|
|
1905
|
+
Returns:
|
|
1906
|
+
List of city dictionaries.
|
|
1907
|
+
"""
|
|
1908
|
+
kwargs = {
|
|
1909
|
+
"proxy_type": (
|
|
1910
|
+
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1911
|
+
),
|
|
1912
|
+
"country_code": country_code,
|
|
1913
|
+
}
|
|
1914
|
+
if state_code:
|
|
1915
|
+
kwargs["state_code"] = state_code
|
|
1916
|
+
|
|
1917
|
+
return await self._get_locations("cities", **kwargs)
|
|
1918
|
+
|
|
1919
|
+
async def list_asn(
|
|
1920
|
+
self,
|
|
1921
|
+
country_code: str,
|
|
1922
|
+
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
1923
|
+
) -> list[dict[str, Any]]:
|
|
1924
|
+
"""List supported ASNs for a country.
|
|
1925
|
+
|
|
1926
|
+
Args:
|
|
1927
|
+
country_code: Country code.
|
|
1928
|
+
proxy_type: Proxy product type.
|
|
1929
|
+
|
|
1930
|
+
Returns:
|
|
1931
|
+
List of ASN dictionaries.
|
|
1932
|
+
"""
|
|
1933
|
+
return await self._get_locations(
|
|
1934
|
+
"asn",
|
|
1935
|
+
proxy_type=(
|
|
1936
|
+
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1937
|
+
),
|
|
1938
|
+
country_code=country_code,
|
|
1939
|
+
)
|
|
1940
|
+
|
|
1941
|
+
# =========================================================================
|
|
1942
|
+
# ISP & Datacenter Proxy Management
|
|
1943
|
+
# =========================================================================
|
|
1944
|
+
|
|
1945
|
+
async def list_proxy_servers(
|
|
1946
|
+
self,
|
|
1947
|
+
proxy_type: int,
|
|
1948
|
+
) -> list[ProxyServer]:
|
|
1949
|
+
"""List ISP or Datacenter proxy servers.
|
|
1950
|
+
|
|
1951
|
+
Args:
|
|
1952
|
+
proxy_type: Proxy type (1=ISP, 2=Datacenter).
|
|
1953
|
+
|
|
1954
|
+
Returns:
|
|
1955
|
+
List of ProxyServer objects.
|
|
1956
|
+
"""
|
|
1957
|
+
self._require_public_credentials()
|
|
1958
|
+
session = self._get_session()
|
|
1959
|
+
|
|
1960
|
+
params = {
|
|
1961
|
+
"token": self.public_token,
|
|
1326
1962
|
"key": self.public_key,
|
|
1327
1963
|
"proxy_type": str(proxy_type),
|
|
1328
1964
|
}
|
|
@@ -1366,11 +2002,72 @@ class AsyncThordataClient:
|
|
|
1366
2002
|
f"List servers failed: {e}", original_error=e
|
|
1367
2003
|
) from e
|
|
1368
2004
|
|
|
1369
|
-
async def
|
|
2005
|
+
async def get_proxy_expiration(
|
|
2006
|
+
self,
|
|
2007
|
+
ips: str | list[str],
|
|
2008
|
+
proxy_type: int,
|
|
2009
|
+
) -> dict[str, Any]:
|
|
2010
|
+
"""Get expiration time for specific proxy IPs.
|
|
2011
|
+
|
|
2012
|
+
Args:
|
|
2013
|
+
ips: Single IP or comma-separated list of IPs.
|
|
2014
|
+
proxy_type: Proxy type (1=ISP, 2=Datacenter).
|
|
2015
|
+
|
|
2016
|
+
Returns:
|
|
2017
|
+
Dictionary with IP expiration times.
|
|
1370
2018
|
"""
|
|
1371
|
-
|
|
2019
|
+
self._require_public_credentials()
|
|
2020
|
+
session = self._get_session()
|
|
2021
|
+
|
|
2022
|
+
if isinstance(ips, list):
|
|
2023
|
+
ips = ",".join(ips)
|
|
2024
|
+
|
|
2025
|
+
params = {
|
|
2026
|
+
"token": self.public_token,
|
|
2027
|
+
"key": self.public_key,
|
|
2028
|
+
"proxy_type": str(proxy_type),
|
|
2029
|
+
"ips": ips,
|
|
2030
|
+
}
|
|
2031
|
+
|
|
2032
|
+
logger.info(f"Async getting proxy expiration: {ips}")
|
|
2033
|
+
|
|
2034
|
+
try:
|
|
2035
|
+
async with session.get(
|
|
2036
|
+
self._proxy_expiration_url,
|
|
2037
|
+
params=params,
|
|
2038
|
+
timeout=self._api_timeout,
|
|
2039
|
+
) as response:
|
|
2040
|
+
response.raise_for_status()
|
|
2041
|
+
data = await response.json()
|
|
2042
|
+
|
|
2043
|
+
if isinstance(data, dict):
|
|
2044
|
+
code = data.get("code")
|
|
2045
|
+
if code is not None and code != 200:
|
|
2046
|
+
msg = extract_error_message(data)
|
|
2047
|
+
raise_for_code(
|
|
2048
|
+
f"Get expiration error: {msg}", code=code, payload=data
|
|
2049
|
+
)
|
|
1372
2050
|
|
|
1373
|
-
|
|
2051
|
+
return data.get("data", data)
|
|
2052
|
+
|
|
2053
|
+
return data
|
|
2054
|
+
|
|
2055
|
+
except asyncio.TimeoutError as e:
|
|
2056
|
+
raise ThordataTimeoutError(
|
|
2057
|
+
f"Get expiration timed out: {e}", original_error=e
|
|
2058
|
+
) from e
|
|
2059
|
+
except aiohttp.ClientError as e:
|
|
2060
|
+
raise ThordataNetworkError(
|
|
2061
|
+
f"Get expiration failed: {e}", original_error=e
|
|
2062
|
+
) from e
|
|
2063
|
+
|
|
2064
|
+
async def get_isp_regions(self) -> list[dict[str, Any]]:
|
|
2065
|
+
"""Get available ISP proxy regions.
|
|
2066
|
+
|
|
2067
|
+
Uses public_token/public_key via gateway API.
|
|
2068
|
+
|
|
2069
|
+
Returns:
|
|
2070
|
+
List of ISP region dictionaries.
|
|
1374
2071
|
"""
|
|
1375
2072
|
session = self._get_session()
|
|
1376
2073
|
headers = self._build_gateway_headers()
|
|
@@ -1406,10 +2103,12 @@ class AsyncThordataClient:
|
|
|
1406
2103
|
) from e
|
|
1407
2104
|
|
|
1408
2105
|
async def list_isp_proxies(self) -> list[dict[str, Any]]:
|
|
1409
|
-
"""
|
|
1410
|
-
|
|
2106
|
+
"""List ISP proxies.
|
|
2107
|
+
|
|
2108
|
+
Uses public_token/public_key via gateway API.
|
|
1411
2109
|
|
|
1412
|
-
|
|
2110
|
+
Returns:
|
|
2111
|
+
List of ISP proxy dictionaries.
|
|
1413
2112
|
"""
|
|
1414
2113
|
session = self._get_session()
|
|
1415
2114
|
headers = self._build_gateway_headers()
|
|
@@ -1444,11 +2143,13 @@ class AsyncThordataClient:
|
|
|
1444
2143
|
f"List ISP proxies failed: {e}", original_error=e
|
|
1445
2144
|
) from e
|
|
1446
2145
|
|
|
1447
|
-
async def
|
|
1448
|
-
"""
|
|
1449
|
-
|
|
2146
|
+
async def get_isp_wallet_balance(self) -> dict[str, Any]:
|
|
2147
|
+
"""Get wallet balance for ISP proxies.
|
|
2148
|
+
|
|
2149
|
+
Uses public_token/public_key via gateway API.
|
|
1450
2150
|
|
|
1451
|
-
|
|
2151
|
+
Returns:
|
|
2152
|
+
Wallet balance data dictionary.
|
|
1452
2153
|
"""
|
|
1453
2154
|
session = self._get_session()
|
|
1454
2155
|
headers = self._build_gateway_headers()
|
|
@@ -1483,124 +2184,35 @@ class AsyncThordataClient:
|
|
|
1483
2184
|
f"Get wallet balance failed: {e}", original_error=e
|
|
1484
2185
|
) from e
|
|
1485
2186
|
|
|
1486
|
-
async def get_proxy_expiration(
|
|
1487
|
-
self,
|
|
1488
|
-
ips: str | list[str],
|
|
1489
|
-
proxy_type: int,
|
|
1490
|
-
) -> dict[str, Any]:
|
|
1491
|
-
"""
|
|
1492
|
-
Get expiration time for specific proxy IPs.
|
|
1493
|
-
"""
|
|
1494
|
-
self._require_public_credentials()
|
|
1495
|
-
session = self._get_session()
|
|
1496
|
-
|
|
1497
|
-
if isinstance(ips, list):
|
|
1498
|
-
ips = ",".join(ips)
|
|
1499
|
-
|
|
1500
|
-
params = {
|
|
1501
|
-
"token": self.public_token,
|
|
1502
|
-
"key": self.public_key,
|
|
1503
|
-
"proxy_type": str(proxy_type),
|
|
1504
|
-
"ips": ips,
|
|
1505
|
-
}
|
|
1506
|
-
|
|
1507
|
-
logger.info(f"Async getting proxy expiration: {ips}")
|
|
1508
|
-
|
|
1509
|
-
try:
|
|
1510
|
-
async with session.get(
|
|
1511
|
-
self._proxy_expiration_url,
|
|
1512
|
-
params=params,
|
|
1513
|
-
timeout=self._api_timeout,
|
|
1514
|
-
) as response:
|
|
1515
|
-
response.raise_for_status()
|
|
1516
|
-
data = await response.json()
|
|
1517
|
-
|
|
1518
|
-
if isinstance(data, dict):
|
|
1519
|
-
code = data.get("code")
|
|
1520
|
-
if code is not None and code != 200:
|
|
1521
|
-
msg = extract_error_message(data)
|
|
1522
|
-
raise_for_code(
|
|
1523
|
-
f"Get expiration error: {msg}", code=code, payload=data
|
|
1524
|
-
)
|
|
1525
|
-
|
|
1526
|
-
return data.get("data", data)
|
|
1527
|
-
|
|
1528
|
-
return data
|
|
1529
|
-
|
|
1530
|
-
except asyncio.TimeoutError as e:
|
|
1531
|
-
raise ThordataTimeoutError(
|
|
1532
|
-
f"Get expiration timed out: {e}", original_error=e
|
|
1533
|
-
) from e
|
|
1534
|
-
except aiohttp.ClientError as e:
|
|
1535
|
-
raise ThordataNetworkError(
|
|
1536
|
-
f"Get expiration failed: {e}", original_error=e
|
|
1537
|
-
) from e
|
|
1538
|
-
|
|
1539
2187
|
# =========================================================================
|
|
1540
|
-
#
|
|
2188
|
+
# Internal Helper Methods
|
|
1541
2189
|
# =========================================================================
|
|
1542
2190
|
|
|
1543
|
-
|
|
1544
|
-
|
|
1545
|
-
|
|
1546
|
-
|
|
1547
|
-
|
|
1548
|
-
|
|
1549
|
-
|
|
1550
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1551
|
-
),
|
|
1552
|
-
)
|
|
1553
|
-
|
|
1554
|
-
async def list_states(
|
|
1555
|
-
self,
|
|
1556
|
-
country_code: str,
|
|
1557
|
-
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
1558
|
-
) -> list[dict[str, Any]]:
|
|
1559
|
-
"""List supported states for a country."""
|
|
1560
|
-
return await self._get_locations(
|
|
1561
|
-
"states",
|
|
1562
|
-
proxy_type=(
|
|
1563
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1564
|
-
),
|
|
1565
|
-
country_code=country_code,
|
|
1566
|
-
)
|
|
1567
|
-
|
|
1568
|
-
async def list_cities(
|
|
1569
|
-
self,
|
|
1570
|
-
country_code: str,
|
|
1571
|
-
state_code: str | None = None,
|
|
1572
|
-
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
1573
|
-
) -> list[dict[str, Any]]:
|
|
1574
|
-
"""List supported cities."""
|
|
1575
|
-
kwargs = {
|
|
1576
|
-
"proxy_type": (
|
|
1577
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1578
|
-
),
|
|
1579
|
-
"country_code": country_code,
|
|
1580
|
-
}
|
|
1581
|
-
if state_code:
|
|
1582
|
-
kwargs["state_code"] = state_code
|
|
1583
|
-
|
|
1584
|
-
return await self._get_locations("cities", **kwargs)
|
|
2191
|
+
def _require_public_credentials(self) -> None:
|
|
2192
|
+
"""Ensure public API credentials are available."""
|
|
2193
|
+
if not self.public_token or not self.public_key:
|
|
2194
|
+
raise ThordataConfigError(
|
|
2195
|
+
"public_token and public_key are required for this operation. "
|
|
2196
|
+
"Please provide them when initializing AsyncThordataClient."
|
|
2197
|
+
)
|
|
1585
2198
|
|
|
1586
|
-
|
|
1587
|
-
|
|
1588
|
-
|
|
1589
|
-
|
|
1590
|
-
) -> list[dict[str, Any]]:
|
|
1591
|
-
"""List supported ASNs."""
|
|
1592
|
-
return await self._get_locations(
|
|
1593
|
-
"asn",
|
|
1594
|
-
proxy_type=(
|
|
1595
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1596
|
-
),
|
|
1597
|
-
country_code=country_code,
|
|
1598
|
-
)
|
|
2199
|
+
def _build_gateway_headers(self) -> dict[str, str]:
|
|
2200
|
+
"""Headers for gateway-style endpoints."""
|
|
2201
|
+
self._require_public_credentials()
|
|
2202
|
+
return build_public_api_headers(self.public_token or "", self.public_key or "")
|
|
1599
2203
|
|
|
1600
2204
|
async def _get_locations(
|
|
1601
2205
|
self, endpoint: str, **kwargs: Any
|
|
1602
2206
|
) -> list[dict[str, Any]]:
|
|
1603
|
-
"""Internal async locations API call.
|
|
2207
|
+
"""Internal async locations API call.
|
|
2208
|
+
|
|
2209
|
+
Args:
|
|
2210
|
+
endpoint: Location endpoint (countries, states, cities, asn).
|
|
2211
|
+
**kwargs: Query parameters.
|
|
2212
|
+
|
|
2213
|
+
Returns:
|
|
2214
|
+
List of location dictionaries.
|
|
2215
|
+
"""
|
|
1604
2216
|
self._require_public_credentials()
|
|
1605
2217
|
|
|
1606
2218
|
params = {
|
|
@@ -1637,21 +2249,10 @@ class AsyncThordataClient:
|
|
|
1637
2249
|
|
|
1638
2250
|
return []
|
|
1639
2251
|
|
|
1640
|
-
# =========================================================================
|
|
1641
|
-
# Helper Methods
|
|
1642
|
-
# =========================================================================
|
|
1643
|
-
|
|
1644
|
-
def _require_public_credentials(self) -> None:
|
|
1645
|
-
"""Ensure public API credentials are available."""
|
|
1646
|
-
if not self.public_token or not self.public_key:
|
|
1647
|
-
raise ThordataConfigError(
|
|
1648
|
-
"public_token and public_key are required for this operation. "
|
|
1649
|
-
"Please provide them when initializing AsyncThordataClient."
|
|
1650
|
-
)
|
|
1651
|
-
|
|
1652
2252
|
def _get_proxy_endpoint_overrides(
|
|
1653
2253
|
self, product: ProxyProduct
|
|
1654
2254
|
) -> tuple[str | None, int | None, str]:
|
|
2255
|
+
"""Get proxy endpoint overrides from environment variables."""
|
|
1655
2256
|
prefix = product.value.upper()
|
|
1656
2257
|
|
|
1657
2258
|
host = os.getenv(f"THORDATA_{prefix}_PROXY_HOST") or os.getenv(
|
|
@@ -1676,6 +2277,8 @@ class AsyncThordataClient:
|
|
|
1676
2277
|
return host or None, port, protocol
|
|
1677
2278
|
|
|
1678
2279
|
def _get_default_proxy_config_from_env(self) -> ProxyConfig | None:
|
|
2280
|
+
"""Get proxy configuration from environment variables."""
|
|
2281
|
+
# Check RESIDENTIAL
|
|
1679
2282
|
u = os.getenv("THORDATA_RESIDENTIAL_USERNAME")
|
|
1680
2283
|
p = os.getenv("THORDATA_RESIDENTIAL_PASSWORD")
|
|
1681
2284
|
if u and p:
|
|
@@ -1691,6 +2294,7 @@ class AsyncThordataClient:
|
|
|
1691
2294
|
protocol=protocol,
|
|
1692
2295
|
)
|
|
1693
2296
|
|
|
2297
|
+
# Check DATACENTER
|
|
1694
2298
|
u = os.getenv("THORDATA_DATACENTER_USERNAME")
|
|
1695
2299
|
p = os.getenv("THORDATA_DATACENTER_PASSWORD")
|
|
1696
2300
|
if u and p:
|
|
@@ -1706,6 +2310,7 @@ class AsyncThordataClient:
|
|
|
1706
2310
|
protocol=protocol,
|
|
1707
2311
|
)
|
|
1708
2312
|
|
|
2313
|
+
# Check MOBILE
|
|
1709
2314
|
u = os.getenv("THORDATA_MOBILE_USERNAME")
|
|
1710
2315
|
p = os.getenv("THORDATA_MOBILE_PASSWORD")
|
|
1711
2316
|
if u and p:
|
|
@@ -1723,11 +2328,35 @@ class AsyncThordataClient:
|
|
|
1723
2328
|
|
|
1724
2329
|
return None
|
|
1725
2330
|
|
|
1726
|
-
def
|
|
2331
|
+
def get_browser_connection_url(
|
|
2332
|
+
self, username: str | None = None, password: str | None = None
|
|
2333
|
+
) -> str:
|
|
1727
2334
|
"""
|
|
1728
|
-
|
|
2335
|
+
Generate the WebSocket URL for connecting to Scraping Browser.
|
|
1729
2336
|
|
|
1730
|
-
|
|
2337
|
+
Note: This method is synchronous as it only does string formatting.
|
|
1731
2338
|
"""
|
|
1732
|
-
|
|
1733
|
-
|
|
2339
|
+
user = (
|
|
2340
|
+
username
|
|
2341
|
+
or os.getenv("THORDATA_BROWSER_USERNAME")
|
|
2342
|
+
or os.getenv("THORDATA_RESIDENTIAL_USERNAME")
|
|
2343
|
+
)
|
|
2344
|
+
pwd = (
|
|
2345
|
+
password
|
|
2346
|
+
or os.getenv("THORDATA_BROWSER_PASSWORD")
|
|
2347
|
+
or os.getenv("THORDATA_RESIDENTIAL_PASSWORD")
|
|
2348
|
+
)
|
|
2349
|
+
|
|
2350
|
+
if not user or not pwd:
|
|
2351
|
+
raise ThordataConfigError(
|
|
2352
|
+
"Browser credentials missing. Set THORDATA_BROWSER_USERNAME/PASSWORD or pass arguments."
|
|
2353
|
+
)
|
|
2354
|
+
|
|
2355
|
+
prefix = "td-customer-"
|
|
2356
|
+
# Fixed SIM108 (ternary operator)
|
|
2357
|
+
final_user = f"{prefix}{user}" if not user.startswith(prefix) else user
|
|
2358
|
+
|
|
2359
|
+
safe_user = quote(final_user, safe="")
|
|
2360
|
+
safe_pass = quote(pwd, safe="")
|
|
2361
|
+
|
|
2362
|
+
return f"wss://{safe_user}:{safe_pass}@ws-browser.thordata.com"
|