thordata-sdk 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thordata/__init__.py +4 -40
- thordata/async_client.py +492 -1790
- thordata/client.py +432 -1315
- thordata/core/__init__.py +23 -0
- thordata/core/async_http_client.py +91 -0
- thordata/core/http_client.py +79 -0
- thordata/core/tunnel.py +287 -0
- thordata/enums.py +41 -380
- thordata/models.py +37 -1193
- thordata/tools/__init__.py +28 -0
- thordata/tools/base.py +42 -0
- thordata/tools/code.py +26 -0
- thordata/tools/ecommerce.py +67 -0
- thordata/tools/search.py +73 -0
- thordata/tools/social.py +190 -0
- thordata/tools/video.py +81 -0
- thordata/types/__init__.py +77 -0
- thordata/types/common.py +141 -0
- thordata/types/proxy.py +340 -0
- thordata/types/serp.py +224 -0
- thordata/types/task.py +144 -0
- thordata/types/universal.py +66 -0
- thordata/unlimited.py +67 -0
- {thordata_sdk-1.4.0.dist-info → thordata_sdk-1.5.0.dist-info}/METADATA +73 -50
- thordata_sdk-1.5.0.dist-info/RECORD +35 -0
- {thordata_sdk-1.4.0.dist-info → thordata_sdk-1.5.0.dist-info}/WHEEL +1 -1
- thordata_sdk-1.4.0.dist-info/RECORD +0 -18
- {thordata_sdk-1.4.0.dist-info → thordata_sdk-1.5.0.dist-info}/licenses/LICENSE +0 -0
- {thordata_sdk-1.4.0.dist-info → thordata_sdk-1.5.0.dist-info}/top_level.txt +0 -0
thordata/async_client.py
CHANGED
|
@@ -3,27 +3,11 @@ Asynchronous client for the Thordata API.
|
|
|
3
3
|
|
|
4
4
|
This module provides the AsyncThordataClient for high-concurrency workloads,
|
|
5
5
|
built on aiohttp.
|
|
6
|
-
|
|
7
|
-
Example:
|
|
8
|
-
>>> import asyncio
|
|
9
|
-
>>> from thordata import AsyncThordataClient
|
|
10
|
-
>>>
|
|
11
|
-
>>> async def main():
|
|
12
|
-
... async with AsyncThordataClient(
|
|
13
|
-
... scraper_token="your_token",
|
|
14
|
-
... public_token="your_public_token",
|
|
15
|
-
... public_key="your_public_key"
|
|
16
|
-
... ) as client:
|
|
17
|
-
... response = await client.get("https://httpbin.org/ip")
|
|
18
|
-
... print(await response.json())
|
|
19
|
-
>>>
|
|
20
|
-
>>> asyncio.run(main())
|
|
21
6
|
"""
|
|
22
7
|
|
|
23
8
|
from __future__ import annotations
|
|
24
9
|
|
|
25
10
|
import asyncio
|
|
26
|
-
import json
|
|
27
11
|
import logging
|
|
28
12
|
import os
|
|
29
13
|
from datetime import date
|
|
@@ -32,29 +16,36 @@ from urllib.parse import quote
|
|
|
32
16
|
|
|
33
17
|
import aiohttp
|
|
34
18
|
|
|
35
|
-
|
|
19
|
+
# Import Legacy/Compat
|
|
36
20
|
from ._utils import (
|
|
37
21
|
build_auth_headers,
|
|
38
22
|
build_builder_headers,
|
|
39
23
|
build_public_api_headers,
|
|
40
|
-
build_user_agent,
|
|
41
24
|
decode_base64_image,
|
|
42
25
|
extract_error_message,
|
|
43
26
|
parse_json_response,
|
|
44
27
|
)
|
|
45
28
|
from .async_unlimited import AsyncUnlimitedNamespace
|
|
46
|
-
|
|
29
|
+
|
|
30
|
+
# Import Core
|
|
31
|
+
from .core.async_http_client import AsyncThordataHttpSession
|
|
32
|
+
from .enums import Engine
|
|
47
33
|
from .exceptions import (
|
|
48
34
|
ThordataConfigError,
|
|
49
35
|
ThordataNetworkError,
|
|
50
36
|
ThordataTimeoutError,
|
|
51
37
|
raise_for_code,
|
|
52
38
|
)
|
|
53
|
-
from .
|
|
39
|
+
from .retry import RetryConfig
|
|
40
|
+
from .serp_engines import AsyncSerpNamespace
|
|
41
|
+
|
|
42
|
+
# Import Types
|
|
43
|
+
from .types import (
|
|
54
44
|
CommonSettings,
|
|
55
45
|
ProxyConfig,
|
|
56
46
|
ProxyProduct,
|
|
57
47
|
ProxyServer,
|
|
48
|
+
ProxyType,
|
|
58
49
|
ProxyUserList,
|
|
59
50
|
ScraperTaskConfig,
|
|
60
51
|
SerpRequest,
|
|
@@ -62,47 +53,14 @@ from .models import (
|
|
|
62
53
|
UsageStatistics,
|
|
63
54
|
VideoTaskConfig,
|
|
64
55
|
)
|
|
65
|
-
from .retry import RetryConfig
|
|
66
|
-
from .serp_engines import AsyncSerpNamespace
|
|
67
56
|
|
|
68
57
|
logger = logging.getLogger(__name__)
|
|
69
58
|
|
|
70
59
|
|
|
71
|
-
# =========================================================================
|
|
72
|
-
# Main Client Class
|
|
73
|
-
# =========================================================================
|
|
74
|
-
|
|
75
|
-
|
|
76
60
|
class AsyncThordataClient:
|
|
77
|
-
"""The official asynchronous Python client for Thordata.
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
Args:
|
|
82
|
-
scraper_token: The API token from your Dashboard.
|
|
83
|
-
public_token: The public API token.
|
|
84
|
-
public_key: The public API key.
|
|
85
|
-
proxy_host: Custom proxy gateway host.
|
|
86
|
-
proxy_port: Custom proxy gateway port.
|
|
87
|
-
timeout: Default request timeout in seconds.
|
|
88
|
-
api_timeout: Default API request timeout in seconds.
|
|
89
|
-
retry_config: Configuration for automatic retries.
|
|
90
|
-
auth_mode: Authentication mode for scraping APIs ("bearer" or "header_token").
|
|
91
|
-
scraperapi_base_url: Override base URL for SERP API.
|
|
92
|
-
universalapi_base_url: Override base URL for Universal Scraping API.
|
|
93
|
-
web_scraper_api_base_url: Override base URL for Web Scraper API.
|
|
94
|
-
locations_base_url: Override base URL for Locations API.
|
|
95
|
-
|
|
96
|
-
Example:
|
|
97
|
-
>>> async with AsyncThordataClient(
|
|
98
|
-
... scraper_token="token",
|
|
99
|
-
... public_token="pub_token",
|
|
100
|
-
... public_key="pub_key"
|
|
101
|
-
... ) as client:
|
|
102
|
-
... results = await client.serp_search("python")
|
|
103
|
-
"""
|
|
104
|
-
|
|
105
|
-
# API Endpoints (same as sync client)
|
|
61
|
+
"""The official asynchronous Python client for Thordata."""
|
|
62
|
+
|
|
63
|
+
# API Endpoints
|
|
106
64
|
BASE_URL = "https://scraperapi.thordata.com"
|
|
107
65
|
UNIVERSAL_URL = "https://universalapi.thordata.com"
|
|
108
66
|
API_URL = "https://openapi.thordata.com/api/web-scraper-api"
|
|
@@ -124,97 +82,67 @@ class AsyncThordataClient:
|
|
|
124
82
|
web_scraper_api_base_url: str | None = None,
|
|
125
83
|
locations_base_url: str | None = None,
|
|
126
84
|
) -> None:
|
|
127
|
-
"""Initialize the Async Thordata Client.
|
|
128
|
-
|
|
129
|
-
Args:
|
|
130
|
-
scraper_token: Token for SERP/Universal scraping APIs.
|
|
131
|
-
public_token: Public API token for account/management operations.
|
|
132
|
-
public_key: Public API key for account/management operations.
|
|
133
|
-
proxy_host: Default proxy host for residential proxies.
|
|
134
|
-
proxy_port: Default proxy port for residential proxies.
|
|
135
|
-
timeout: Default timeout for proxy requests.
|
|
136
|
-
api_timeout: Default timeout for API requests.
|
|
137
|
-
retry_config: Configuration for retry behavior.
|
|
138
|
-
auth_mode: Authentication mode for scraper_token ("bearer" or "header_token").
|
|
139
|
-
scraperapi_base_url: Override base URL for SERP API.
|
|
140
|
-
universalapi_base_url: Override base URL for Universal Scraping API.
|
|
141
|
-
web_scraper_api_base_url: Override base URL for Web Scraper API.
|
|
142
|
-
locations_base_url: Override base URL for Locations API.
|
|
143
|
-
"""
|
|
144
85
|
self.scraper_token = scraper_token
|
|
145
86
|
self.public_token = public_token
|
|
146
87
|
self.public_key = public_key
|
|
147
88
|
|
|
148
|
-
# Proxy configuration
|
|
149
89
|
self._proxy_host = proxy_host
|
|
150
90
|
self._proxy_port = proxy_port
|
|
151
|
-
|
|
152
|
-
# Timeout configuration
|
|
153
|
-
self._default_timeout = aiohttp.ClientTimeout(total=timeout)
|
|
154
|
-
self._api_timeout = aiohttp.ClientTimeout(total=api_timeout)
|
|
155
|
-
|
|
156
|
-
# Retry configuration
|
|
157
91
|
self._retry_config = retry_config or RetryConfig()
|
|
158
92
|
|
|
159
|
-
|
|
93
|
+
self._api_timeout = api_timeout
|
|
94
|
+
|
|
160
95
|
self._auth_mode = auth_mode.lower()
|
|
161
96
|
if self._auth_mode not in ("bearer", "header_token"):
|
|
162
|
-
raise ThordataConfigError(
|
|
163
|
-
f"Invalid auth_mode: {auth_mode}. Must be 'bearer' or 'header_token'."
|
|
164
|
-
)
|
|
97
|
+
raise ThordataConfigError(f"Invalid auth_mode: {auth_mode}")
|
|
165
98
|
|
|
166
|
-
#
|
|
99
|
+
# Core Async HTTP Client
|
|
100
|
+
self._http = AsyncThordataHttpSession(
|
|
101
|
+
timeout=api_timeout, retry_config=self._retry_config
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# Base URLs Configuration
|
|
167
105
|
scraperapi_base = (
|
|
168
106
|
scraperapi_base_url
|
|
169
107
|
or os.getenv("THORDATA_SCRAPERAPI_BASE_URL")
|
|
170
108
|
or self.BASE_URL
|
|
171
109
|
).rstrip("/")
|
|
172
|
-
|
|
173
110
|
universalapi_base = (
|
|
174
111
|
universalapi_base_url
|
|
175
112
|
or os.getenv("THORDATA_UNIVERSALAPI_BASE_URL")
|
|
176
113
|
or self.UNIVERSAL_URL
|
|
177
114
|
).rstrip("/")
|
|
178
|
-
|
|
179
115
|
web_scraper_api_base = (
|
|
180
116
|
web_scraper_api_base_url
|
|
181
117
|
or os.getenv("THORDATA_WEB_SCRAPER_API_BASE_URL")
|
|
182
118
|
or self.API_URL
|
|
183
119
|
).rstrip("/")
|
|
184
|
-
|
|
185
120
|
locations_base = (
|
|
186
121
|
locations_base_url
|
|
187
122
|
or os.getenv("THORDATA_LOCATIONS_BASE_URL")
|
|
188
123
|
or self.LOCATIONS_URL
|
|
189
124
|
).rstrip("/")
|
|
190
125
|
|
|
191
|
-
|
|
192
|
-
gateway_base = os.getenv(
|
|
126
|
+
self._gateway_base_url = os.getenv(
|
|
193
127
|
"THORDATA_GATEWAY_BASE_URL", "https://api.thordata.com/api/gateway"
|
|
194
128
|
)
|
|
195
|
-
|
|
129
|
+
self._child_base_url = os.getenv(
|
|
196
130
|
"THORDATA_CHILD_BASE_URL", "https://api.thordata.com/api/child"
|
|
197
131
|
)
|
|
198
132
|
|
|
199
|
-
|
|
200
|
-
self._child_base_url = child_base
|
|
201
|
-
|
|
133
|
+
# URL Construction
|
|
202
134
|
self._serp_url = f"{scraperapi_base}/request"
|
|
203
135
|
self._builder_url = f"{scraperapi_base}/builder"
|
|
204
136
|
self._video_builder_url = f"{scraperapi_base}/video_builder"
|
|
205
137
|
self._universal_url = f"{universalapi_base}/request"
|
|
206
|
-
|
|
207
138
|
self._status_url = f"{web_scraper_api_base}/tasks-status"
|
|
208
139
|
self._download_url = f"{web_scraper_api_base}/tasks-download"
|
|
209
140
|
self._list_url = f"{web_scraper_api_base}/tasks-list"
|
|
210
|
-
|
|
211
141
|
self._locations_base_url = locations_base
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
self._proxy_users_url =
|
|
216
|
-
f"{locations_base.replace('/locations', '')}/proxy-users"
|
|
217
|
-
)
|
|
142
|
+
|
|
143
|
+
shared_api_base = locations_base.replace("/locations", "")
|
|
144
|
+
self._usage_stats_url = f"{shared_api_base}/account/usage-statistics"
|
|
145
|
+
self._proxy_users_url = f"{shared_api_base}/proxy-users"
|
|
218
146
|
|
|
219
147
|
whitelist_base = os.getenv(
|
|
220
148
|
"THORDATA_WHITELIST_BASE_URL", "https://api.thordata.com/api"
|
|
@@ -227,45 +155,29 @@ class AsyncThordataClient:
|
|
|
227
155
|
self._proxy_list_url = f"{proxy_api_base}/proxy/proxy-list"
|
|
228
156
|
self._proxy_expiration_url = f"{proxy_api_base}/proxy/expiration-time"
|
|
229
157
|
|
|
230
|
-
#
|
|
231
|
-
self._session: aiohttp.ClientSession | None = None
|
|
232
|
-
|
|
233
|
-
# Namespaced Access (e.g. client.serp.google.maps(...))
|
|
158
|
+
# Namespaces
|
|
234
159
|
self.serp = AsyncSerpNamespace(self)
|
|
235
160
|
self.unlimited = AsyncUnlimitedNamespace(self)
|
|
236
161
|
|
|
237
|
-
# =========================================================================
|
|
238
|
-
# Context Manager
|
|
239
|
-
# =========================================================================
|
|
240
|
-
|
|
241
162
|
async def __aenter__(self) -> AsyncThordataClient:
|
|
242
|
-
|
|
243
|
-
if self._session is None or self._session.closed:
|
|
244
|
-
self._session = aiohttp.ClientSession(
|
|
245
|
-
timeout=self._api_timeout,
|
|
246
|
-
trust_env=True,
|
|
247
|
-
headers={"User-Agent": build_user_agent(_sdk_version, "aiohttp")},
|
|
248
|
-
)
|
|
163
|
+
await self._http._ensure_session()
|
|
249
164
|
return self
|
|
250
165
|
|
|
251
166
|
async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
252
|
-
"""Async context manager exit."""
|
|
253
167
|
await self.close()
|
|
254
168
|
|
|
255
169
|
async def close(self) -> None:
|
|
256
|
-
|
|
257
|
-
if self._session and not self._session.closed:
|
|
258
|
-
await self._session.close()
|
|
259
|
-
self._session = None
|
|
170
|
+
await self._http.close()
|
|
260
171
|
|
|
261
172
|
def _get_session(self) -> aiohttp.ClientSession:
|
|
262
|
-
"""
|
|
263
|
-
if
|
|
264
|
-
raise RuntimeError(
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
173
|
+
"""Internal helper for namespaces that expect direct session access (legacy compat)."""
|
|
174
|
+
if not self._http._session:
|
|
175
|
+
raise RuntimeError("Session not initialized. Use 'async with client'.")
|
|
176
|
+
return self._http._session
|
|
177
|
+
|
|
178
|
+
def _require_public_credentials(self) -> None:
|
|
179
|
+
if not self.public_token or not self.public_key:
|
|
180
|
+
raise ThordataConfigError("public_token and public_key are required.")
|
|
269
181
|
|
|
270
182
|
# =========================================================================
|
|
271
183
|
# Proxy Network Methods
|
|
@@ -278,54 +190,7 @@ class AsyncThordataClient:
|
|
|
278
190
|
proxy_config: ProxyConfig | None = None,
|
|
279
191
|
**kwargs: Any,
|
|
280
192
|
) -> aiohttp.ClientResponse:
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
Args:
|
|
284
|
-
url: The target URL.
|
|
285
|
-
proxy_config: Custom proxy configuration.
|
|
286
|
-
**kwargs: Additional aiohttp arguments.
|
|
287
|
-
|
|
288
|
-
Returns:
|
|
289
|
-
The aiohttp response object.
|
|
290
|
-
|
|
291
|
-
Note:
|
|
292
|
-
aiohttp has limited support for HTTPS proxies (TLS to proxy / TLS-in-TLS).
|
|
293
|
-
For HTTPS proxy endpoints, please use ThordataClient.get/post (sync client).
|
|
294
|
-
"""
|
|
295
|
-
session = self._get_session()
|
|
296
|
-
|
|
297
|
-
logger.debug(f"Async Proxy GET: {url}")
|
|
298
|
-
|
|
299
|
-
if proxy_config is None:
|
|
300
|
-
proxy_config = self._get_default_proxy_config_from_env()
|
|
301
|
-
|
|
302
|
-
if proxy_config is None:
|
|
303
|
-
raise ThordataConfigError(
|
|
304
|
-
"Proxy credentials are missing. "
|
|
305
|
-
"Pass proxy_config=ProxyConfig(username=..., password=..., product=...) "
|
|
306
|
-
"or set THORDATA_RESIDENTIAL_USERNAME/THORDATA_RESIDENTIAL_PASSWORD."
|
|
307
|
-
)
|
|
308
|
-
|
|
309
|
-
if getattr(proxy_config, "protocol", "http").lower() == "https":
|
|
310
|
-
raise ThordataConfigError(
|
|
311
|
-
"Proxy Network requires an HTTPS proxy endpoint. "
|
|
312
|
-
"aiohttp support for 'https://' proxies is limited. "
|
|
313
|
-
"Please use ThordataClient.get/post (sync client) for Proxy Network requests."
|
|
314
|
-
)
|
|
315
|
-
proxy_url, proxy_auth = proxy_config.to_aiohttp_config()
|
|
316
|
-
|
|
317
|
-
try:
|
|
318
|
-
return await session.get(
|
|
319
|
-
url, proxy=proxy_url, proxy_auth=proxy_auth, **kwargs
|
|
320
|
-
)
|
|
321
|
-
except asyncio.TimeoutError as e:
|
|
322
|
-
raise ThordataTimeoutError(
|
|
323
|
-
f"Async request timed out: {e}", original_error=e
|
|
324
|
-
) from e
|
|
325
|
-
except aiohttp.ClientError as e:
|
|
326
|
-
raise ThordataNetworkError(
|
|
327
|
-
f"Async request failed: {e}", original_error=e
|
|
328
|
-
) from e
|
|
193
|
+
return await self._proxy_request("GET", url, proxy_config, **kwargs)
|
|
329
194
|
|
|
330
195
|
async def post(
|
|
331
196
|
self,
|
|
@@ -334,53 +199,36 @@ class AsyncThordataClient:
|
|
|
334
199
|
proxy_config: ProxyConfig | None = None,
|
|
335
200
|
**kwargs: Any,
|
|
336
201
|
) -> aiohttp.ClientResponse:
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
Args:
|
|
340
|
-
url: The target URL.
|
|
341
|
-
proxy_config: Custom proxy configuration.
|
|
342
|
-
**kwargs: Additional aiohttp arguments.
|
|
202
|
+
return await self._proxy_request("POST", url, proxy_config, **kwargs)
|
|
343
203
|
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
logger.debug(f"Async Proxy POST: {url}")
|
|
204
|
+
async def _proxy_request(
|
|
205
|
+
self, method: str, url: str, proxy_config: ProxyConfig | None, **kwargs: Any
|
|
206
|
+
) -> aiohttp.ClientResponse:
|
|
207
|
+
logger.debug(f"Async Proxy {method}: {url}")
|
|
350
208
|
|
|
351
209
|
if proxy_config is None:
|
|
352
210
|
proxy_config = self._get_default_proxy_config_from_env()
|
|
353
211
|
|
|
354
212
|
if proxy_config is None:
|
|
355
|
-
raise ThordataConfigError(
|
|
356
|
-
"Proxy credentials are missing. "
|
|
357
|
-
"Pass proxy_config=ProxyConfig(username=..., password=..., product=...) "
|
|
358
|
-
"or set THORDATA_RESIDENTIAL_USERNAME/THORDATA_RESIDENTIAL_PASSWORD."
|
|
359
|
-
)
|
|
213
|
+
raise ThordataConfigError("Proxy credentials are missing.")
|
|
360
214
|
|
|
215
|
+
# Restore strict check for aiohttp HTTPS proxy limitation
|
|
361
216
|
if getattr(proxy_config, "protocol", "http").lower() == "https":
|
|
362
217
|
raise ThordataConfigError(
|
|
363
218
|
"Proxy Network requires an HTTPS proxy endpoint. "
|
|
364
219
|
"aiohttp support for 'https://' proxies is limited. "
|
|
365
220
|
"Please use ThordataClient.get/post (sync client) for Proxy Network requests."
|
|
366
221
|
)
|
|
222
|
+
|
|
367
223
|
proxy_url, proxy_auth = proxy_config.to_aiohttp_config()
|
|
368
224
|
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
except asyncio.TimeoutError as e:
|
|
374
|
-
raise ThordataTimeoutError(
|
|
375
|
-
f"Async request timed out: {e}", original_error=e
|
|
376
|
-
) from e
|
|
377
|
-
except aiohttp.ClientError as e:
|
|
378
|
-
raise ThordataNetworkError(
|
|
379
|
-
f"Async request failed: {e}", original_error=e
|
|
380
|
-
) from e
|
|
225
|
+
# Use the core HTTP client to execute, leveraging retry logic
|
|
226
|
+
return await self._http.request(
|
|
227
|
+
method=method, url=url, proxy=proxy_url, proxy_auth=proxy_auth, **kwargs
|
|
228
|
+
)
|
|
381
229
|
|
|
382
230
|
# =========================================================================
|
|
383
|
-
#
|
|
231
|
+
# API Methods (SERP, Universal)
|
|
384
232
|
# =========================================================================
|
|
385
233
|
|
|
386
234
|
async def serp_search(
|
|
@@ -398,31 +246,7 @@ class AsyncThordataClient:
|
|
|
398
246
|
output_format: str = "json",
|
|
399
247
|
**kwargs: Any,
|
|
400
248
|
) -> dict[str, Any]:
|
|
401
|
-
"""Execute an async SERP search.
|
|
402
|
-
|
|
403
|
-
Args:
|
|
404
|
-
query: Search keywords.
|
|
405
|
-
engine: Search engine (GOOGLE, BING, etc.).
|
|
406
|
-
num: Number of results.
|
|
407
|
-
country: Country code for localization.
|
|
408
|
-
language: Language code.
|
|
409
|
-
search_type: Type of search (images, news, video, etc.).
|
|
410
|
-
device: Device type ('desktop', 'mobile', 'tablet').
|
|
411
|
-
render_js: Enable JavaScript rendering.
|
|
412
|
-
no_cache: Disable internal caching.
|
|
413
|
-
output_format: 'json' or 'html'.
|
|
414
|
-
**kwargs: Additional parameters.
|
|
415
|
-
|
|
416
|
-
Returns:
|
|
417
|
-
Parsed JSON results or dict with 'html' key.
|
|
418
|
-
"""
|
|
419
|
-
if not self.scraper_token:
|
|
420
|
-
raise ThordataConfigError("scraper_token is required for SERP API")
|
|
421
|
-
|
|
422
|
-
session = self._get_session()
|
|
423
|
-
|
|
424
249
|
engine_str = engine.value if isinstance(engine, Engine) else engine.lower()
|
|
425
|
-
|
|
426
250
|
request = SerpRequest(
|
|
427
251
|
query=query,
|
|
428
252
|
engine=engine_str,
|
|
@@ -436,108 +260,33 @@ class AsyncThordataClient:
|
|
|
436
260
|
output_format=output_format,
|
|
437
261
|
extra_params=kwargs,
|
|
438
262
|
)
|
|
439
|
-
|
|
440
|
-
payload = request.to_payload()
|
|
441
|
-
token = self.scraper_token or ""
|
|
442
|
-
headers = build_auth_headers(token, mode=self._auth_mode)
|
|
443
|
-
|
|
444
|
-
logger.info(f"Async SERP Search: {engine_str} - {query}")
|
|
445
|
-
|
|
446
|
-
try:
|
|
447
|
-
async with session.post(
|
|
448
|
-
self._serp_url,
|
|
449
|
-
data=payload,
|
|
450
|
-
headers=headers,
|
|
451
|
-
) as response:
|
|
452
|
-
response.raise_for_status()
|
|
453
|
-
|
|
454
|
-
if output_format.lower() == "json":
|
|
455
|
-
data = await response.json()
|
|
456
|
-
|
|
457
|
-
if isinstance(data, dict):
|
|
458
|
-
code = data.get("code")
|
|
459
|
-
if code is not None and code != 200:
|
|
460
|
-
msg = extract_error_message(data)
|
|
461
|
-
raise_for_code(
|
|
462
|
-
f"SERP API Error: {msg}",
|
|
463
|
-
code=code,
|
|
464
|
-
payload=data,
|
|
465
|
-
)
|
|
466
|
-
|
|
467
|
-
return parse_json_response(data)
|
|
468
|
-
|
|
469
|
-
text = await response.text()
|
|
470
|
-
return {"html": text}
|
|
471
|
-
|
|
472
|
-
except asyncio.TimeoutError as e:
|
|
473
|
-
raise ThordataTimeoutError(
|
|
474
|
-
f"SERP request timed out: {e}",
|
|
475
|
-
original_error=e,
|
|
476
|
-
) from e
|
|
477
|
-
except aiohttp.ClientError as e:
|
|
478
|
-
raise ThordataNetworkError(
|
|
479
|
-
f"SERP request failed: {e}",
|
|
480
|
-
original_error=e,
|
|
481
|
-
) from e
|
|
263
|
+
return await self.serp_search_advanced(request)
|
|
482
264
|
|
|
483
265
|
async def serp_search_advanced(self, request: SerpRequest) -> dict[str, Any]:
|
|
484
|
-
"""Execute an async SERP search using a SerpRequest object.
|
|
485
|
-
|
|
486
|
-
Args:
|
|
487
|
-
request: SerpRequest object with search parameters.
|
|
488
|
-
|
|
489
|
-
Returns:
|
|
490
|
-
Parsed search results.
|
|
491
|
-
"""
|
|
492
|
-
session = self._get_session()
|
|
493
266
|
if not self.scraper_token:
|
|
494
|
-
raise ThordataConfigError("scraper_token
|
|
495
|
-
|
|
267
|
+
raise ThordataConfigError("scraper_token required")
|
|
496
268
|
payload = request.to_payload()
|
|
497
269
|
headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
|
|
270
|
+
logger.info(f"Async SERP: {request.engine} - {request.query}")
|
|
498
271
|
|
|
499
|
-
|
|
272
|
+
response = await self._http.request(
|
|
273
|
+
"POST", self._serp_url, data=payload, headers=headers
|
|
274
|
+
)
|
|
500
275
|
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
data
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
if isinstance(data, dict):
|
|
513
|
-
code = data.get("code")
|
|
514
|
-
if code is not None and code != 200:
|
|
515
|
-
msg = extract_error_message(data)
|
|
516
|
-
raise_for_code(
|
|
517
|
-
f"SERP API Error: {msg}",
|
|
518
|
-
code=code,
|
|
519
|
-
payload=data,
|
|
520
|
-
)
|
|
521
|
-
|
|
522
|
-
return parse_json_response(data)
|
|
523
|
-
|
|
524
|
-
text = await response.text()
|
|
525
|
-
return {"html": text}
|
|
526
|
-
|
|
527
|
-
except asyncio.TimeoutError as e:
|
|
528
|
-
raise ThordataTimeoutError(
|
|
529
|
-
f"SERP request timed out: {e}",
|
|
530
|
-
original_error=e,
|
|
531
|
-
) from e
|
|
532
|
-
except aiohttp.ClientError as e:
|
|
533
|
-
raise ThordataNetworkError(
|
|
534
|
-
f"SERP request failed: {e}",
|
|
535
|
-
original_error=e,
|
|
536
|
-
) from e
|
|
276
|
+
if request.output_format.lower() == "json":
|
|
277
|
+
data = await response.json()
|
|
278
|
+
if isinstance(data, dict):
|
|
279
|
+
code = data.get("code")
|
|
280
|
+
if code is not None and code != 200:
|
|
281
|
+
raise_for_code(
|
|
282
|
+
f"SERP Error: {extract_error_message(data)}",
|
|
283
|
+
code=code,
|
|
284
|
+
payload=data,
|
|
285
|
+
)
|
|
286
|
+
return parse_json_response(data)
|
|
537
287
|
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
# =========================================================================
|
|
288
|
+
text = await response.text()
|
|
289
|
+
return {"html": text}
|
|
541
290
|
|
|
542
291
|
async def universal_scrape(
|
|
543
292
|
self,
|
|
@@ -551,20 +300,6 @@ class AsyncThordataClient:
|
|
|
551
300
|
wait_for: str | None = None,
|
|
552
301
|
**kwargs: Any,
|
|
553
302
|
) -> str | bytes:
|
|
554
|
-
"""Async scrape using Universal API (Web Unlocker).
|
|
555
|
-
|
|
556
|
-
Args:
|
|
557
|
-
url: Target URL.
|
|
558
|
-
js_render: Enable JavaScript rendering.
|
|
559
|
-
output_format: "html" or "png".
|
|
560
|
-
country: Geo-targeting country.
|
|
561
|
-
block_resources: Resources to block (e.g., "script,css").
|
|
562
|
-
wait: Wait time in milliseconds before fetching.
|
|
563
|
-
wait_for: CSS selector to wait for before fetching.
|
|
564
|
-
|
|
565
|
-
Returns:
|
|
566
|
-
HTML string or PNG bytes.
|
|
567
|
-
"""
|
|
568
303
|
request = UniversalScrapeRequest(
|
|
569
304
|
url=url,
|
|
570
305
|
js_render=js_render,
|
|
@@ -575,70 +310,41 @@ class AsyncThordataClient:
|
|
|
575
310
|
wait_for=wait_for,
|
|
576
311
|
extra_params=kwargs,
|
|
577
312
|
)
|
|
578
|
-
|
|
579
313
|
return await self.universal_scrape_advanced(request)
|
|
580
314
|
|
|
581
315
|
async def universal_scrape_advanced(
|
|
582
316
|
self, request: UniversalScrapeRequest
|
|
583
317
|
) -> str | bytes:
|
|
584
|
-
"""Async scrape using a UniversalScrapeRequest object.
|
|
585
|
-
|
|
586
|
-
Args:
|
|
587
|
-
request: UniversalScrapeRequest object with scrape parameters.
|
|
588
|
-
|
|
589
|
-
Returns:
|
|
590
|
-
HTML string or PNG bytes.
|
|
591
|
-
"""
|
|
592
|
-
session = self._get_session()
|
|
593
318
|
if not self.scraper_token:
|
|
594
|
-
raise ThordataConfigError("scraper_token
|
|
595
|
-
|
|
319
|
+
raise ThordataConfigError("scraper_token required")
|
|
596
320
|
payload = request.to_payload()
|
|
597
321
|
headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
|
|
598
322
|
|
|
599
|
-
|
|
323
|
+
response = await self._http.request(
|
|
324
|
+
"POST", self._universal_url, data=payload, headers=headers
|
|
325
|
+
)
|
|
600
326
|
|
|
601
327
|
try:
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
)
|
|
605
|
-
response.
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
raise_for_code(
|
|
620
|
-
f"Universal API Error: {msg}", code=code, payload=resp_json
|
|
621
|
-
)
|
|
622
|
-
|
|
623
|
-
if "html" in resp_json:
|
|
624
|
-
return resp_json["html"]
|
|
625
|
-
|
|
626
|
-
if "png" in resp_json:
|
|
627
|
-
return decode_base64_image(resp_json["png"])
|
|
628
|
-
|
|
629
|
-
return str(resp_json)
|
|
630
|
-
|
|
631
|
-
except asyncio.TimeoutError as e:
|
|
632
|
-
raise ThordataTimeoutError(
|
|
633
|
-
f"Universal scrape timed out: {e}", original_error=e
|
|
634
|
-
) from e
|
|
635
|
-
except aiohttp.ClientError as e:
|
|
636
|
-
raise ThordataNetworkError(
|
|
637
|
-
f"Universal scrape failed: {e}", original_error=e
|
|
638
|
-
) from e
|
|
328
|
+
resp_json = await response.json()
|
|
329
|
+
except ValueError:
|
|
330
|
+
if request.output_format.lower() == "png":
|
|
331
|
+
return await response.read()
|
|
332
|
+
return await response.text()
|
|
333
|
+
|
|
334
|
+
if isinstance(resp_json, dict):
|
|
335
|
+
code = resp_json.get("code")
|
|
336
|
+
if code is not None and code != 200:
|
|
337
|
+
msg = extract_error_message(resp_json)
|
|
338
|
+
raise_for_code(f"Universal Error: {msg}", code=code, payload=resp_json)
|
|
339
|
+
|
|
340
|
+
if "html" in resp_json:
|
|
341
|
+
return resp_json["html"]
|
|
342
|
+
if "png" in resp_json:
|
|
343
|
+
return decode_base64_image(resp_json["png"])
|
|
344
|
+
return str(resp_json)
|
|
639
345
|
|
|
640
346
|
# =========================================================================
|
|
641
|
-
#
|
|
347
|
+
# Task Management
|
|
642
348
|
# =========================================================================
|
|
643
349
|
|
|
644
350
|
async def create_scraper_task(
|
|
@@ -649,18 +355,6 @@ class AsyncThordataClient:
|
|
|
649
355
|
parameters: dict[str, Any],
|
|
650
356
|
universal_params: dict[str, Any] | None = None,
|
|
651
357
|
) -> str:
|
|
652
|
-
"""Create an async Web Scraper task.
|
|
653
|
-
|
|
654
|
-
Args:
|
|
655
|
-
file_name: Name for the output file (supports {{TasksID}} template).
|
|
656
|
-
spider_id: Spider identifier from Dashboard.
|
|
657
|
-
spider_name: Spider name (target domain, e.g., "amazon.com").
|
|
658
|
-
parameters: Spider-specific parameters.
|
|
659
|
-
universal_params: Global spider settings.
|
|
660
|
-
|
|
661
|
-
Returns:
|
|
662
|
-
Task ID.
|
|
663
|
-
"""
|
|
664
358
|
config = ScraperTaskConfig(
|
|
665
359
|
file_name=file_name,
|
|
666
360
|
spider_id=spider_id,
|
|
@@ -668,53 +362,72 @@ class AsyncThordataClient:
|
|
|
668
362
|
parameters=parameters,
|
|
669
363
|
universal_params=universal_params,
|
|
670
364
|
)
|
|
671
|
-
|
|
672
365
|
return await self.create_scraper_task_advanced(config)
|
|
673
366
|
|
|
674
|
-
async def
|
|
675
|
-
|
|
367
|
+
async def run_tool(
|
|
368
|
+
self,
|
|
369
|
+
tool_request: Any,
|
|
370
|
+
file_name: str | None = None,
|
|
371
|
+
universal_params: dict[str, Any] | None = None,
|
|
372
|
+
) -> str:
|
|
373
|
+
"""Run a specific pre-defined tool (Async)."""
|
|
374
|
+
if not hasattr(tool_request, "to_task_parameters") or not hasattr(
|
|
375
|
+
tool_request, "get_spider_id"
|
|
376
|
+
):
|
|
377
|
+
raise ValueError(
|
|
378
|
+
"tool_request must be an instance of a thordata.tools class"
|
|
379
|
+
)
|
|
676
380
|
|
|
677
|
-
|
|
678
|
-
|
|
381
|
+
spider_id = tool_request.get_spider_id()
|
|
382
|
+
spider_name = tool_request.get_spider_name()
|
|
383
|
+
params = tool_request.to_task_parameters()
|
|
679
384
|
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
385
|
+
if not file_name:
|
|
386
|
+
import uuid
|
|
387
|
+
|
|
388
|
+
short_id = uuid.uuid4().hex[:8]
|
|
389
|
+
file_name = f"{spider_id}_{short_id}"
|
|
390
|
+
|
|
391
|
+
# Check if it's a Video Tool
|
|
392
|
+
if hasattr(tool_request, "common_settings"):
|
|
393
|
+
config_video = VideoTaskConfig(
|
|
394
|
+
file_name=file_name,
|
|
395
|
+
spider_id=spider_id,
|
|
396
|
+
spider_name=spider_name,
|
|
397
|
+
parameters=params,
|
|
398
|
+
common_settings=tool_request.common_settings,
|
|
399
|
+
)
|
|
400
|
+
return await self.create_video_task_advanced(config_video)
|
|
401
|
+
else:
|
|
402
|
+
config = ScraperTaskConfig(
|
|
403
|
+
file_name=file_name,
|
|
404
|
+
spider_id=spider_id,
|
|
405
|
+
spider_name=spider_name,
|
|
406
|
+
parameters=params,
|
|
407
|
+
universal_params=universal_params,
|
|
408
|
+
)
|
|
409
|
+
return await self.create_scraper_task_advanced(config)
|
|
410
|
+
|
|
411
|
+
async def create_scraper_task_advanced(self, config: ScraperTaskConfig) -> str:
|
|
683
412
|
self._require_public_credentials()
|
|
684
|
-
session = self._get_session()
|
|
685
413
|
if not self.scraper_token:
|
|
686
|
-
raise ThordataConfigError("scraper_token
|
|
687
|
-
|
|
414
|
+
raise ThordataConfigError("scraper_token required")
|
|
688
415
|
payload = config.to_payload()
|
|
689
|
-
# Builder needs 3 headers: token, key, Authorization Bearer
|
|
690
416
|
headers = build_builder_headers(
|
|
691
|
-
self.scraper_token,
|
|
692
|
-
self.public_token or "",
|
|
693
|
-
self.public_key or "",
|
|
417
|
+
self.scraper_token, str(self.public_token), str(self.public_key)
|
|
694
418
|
)
|
|
695
419
|
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
data
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
msg = extract_error_message(data)
|
|
708
|
-
raise_for_code(
|
|
709
|
-
f"Task creation failed: {msg}", code=code, payload=data
|
|
710
|
-
)
|
|
711
|
-
|
|
712
|
-
return data["data"]["task_id"]
|
|
713
|
-
|
|
714
|
-
except aiohttp.ClientError as e:
|
|
715
|
-
raise ThordataNetworkError(
|
|
716
|
-
f"Task creation failed: {e}", original_error=e
|
|
717
|
-
) from e
|
|
420
|
+
response = await self._http.request(
|
|
421
|
+
"POST", self._builder_url, data=payload, headers=headers
|
|
422
|
+
)
|
|
423
|
+
data = await response.json(content_type=None)
|
|
424
|
+
if data.get("code") != 200:
|
|
425
|
+
raise_for_code(
|
|
426
|
+
f"Task creation failed: {extract_error_message(data)}",
|
|
427
|
+
code=data.get("code"),
|
|
428
|
+
payload=data,
|
|
429
|
+
)
|
|
430
|
+
return data["data"]["task_id"]
|
|
718
431
|
|
|
719
432
|
async def create_video_task(
|
|
720
433
|
self,
|
|
@@ -724,18 +437,6 @@ class AsyncThordataClient:
|
|
|
724
437
|
parameters: dict[str, Any],
|
|
725
438
|
common_settings: CommonSettings,
|
|
726
439
|
) -> str:
|
|
727
|
-
"""Create a YouTube video/audio download task.
|
|
728
|
-
|
|
729
|
-
Args:
|
|
730
|
-
file_name: Name for the output file.
|
|
731
|
-
spider_id: Spider identifier (e.g., "youtube_video_by-url").
|
|
732
|
-
spider_name: Target site (e.g., "youtube.com").
|
|
733
|
-
parameters: Spider-specific parameters (URLs, etc.).
|
|
734
|
-
common_settings: Video/audio settings (resolution, subtitles, etc.).
|
|
735
|
-
|
|
736
|
-
Returns:
|
|
737
|
-
Task ID.
|
|
738
|
-
"""
|
|
739
440
|
config = VideoTaskConfig(
|
|
740
441
|
file_name=file_name,
|
|
741
442
|
spider_id=spider_id,
|
|
@@ -743,270 +444,106 @@ class AsyncThordataClient:
|
|
|
743
444
|
parameters=parameters,
|
|
744
445
|
common_settings=common_settings,
|
|
745
446
|
)
|
|
746
|
-
|
|
747
447
|
return await self.create_video_task_advanced(config)
|
|
748
448
|
|
|
749
449
|
async def create_video_task_advanced(self, config: VideoTaskConfig) -> str:
|
|
750
|
-
"""Create a video task using VideoTaskConfig object.
|
|
751
|
-
|
|
752
|
-
Args:
|
|
753
|
-
config: VideoTaskConfig object with task configuration.
|
|
754
|
-
|
|
755
|
-
Returns:
|
|
756
|
-
Task ID.
|
|
757
|
-
"""
|
|
758
450
|
self._require_public_credentials()
|
|
759
|
-
session = self._get_session()
|
|
760
451
|
if not self.scraper_token:
|
|
761
|
-
raise ThordataConfigError(
|
|
762
|
-
"scraper_token is required for Video Task Builder"
|
|
763
|
-
)
|
|
764
|
-
|
|
452
|
+
raise ThordataConfigError("scraper_token required")
|
|
765
453
|
payload = config.to_payload()
|
|
766
454
|
headers = build_builder_headers(
|
|
767
|
-
self.scraper_token,
|
|
768
|
-
self.public_token or "",
|
|
769
|
-
self.public_key or "",
|
|
455
|
+
self.scraper_token, str(self.public_token), str(self.public_key)
|
|
770
456
|
)
|
|
771
457
|
|
|
772
|
-
|
|
773
|
-
|
|
458
|
+
response = await self._http.request(
|
|
459
|
+
"POST", self._video_builder_url, data=payload, headers=headers
|
|
774
460
|
)
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
data
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
response.raise_for_status()
|
|
784
|
-
data = await response.json()
|
|
785
|
-
|
|
786
|
-
code = data.get("code")
|
|
787
|
-
if code != 200:
|
|
788
|
-
msg = extract_error_message(data)
|
|
789
|
-
raise_for_code(
|
|
790
|
-
f"Video task creation failed: {msg}", code=code, payload=data
|
|
791
|
-
)
|
|
792
|
-
|
|
793
|
-
return data["data"]["task_id"]
|
|
794
|
-
|
|
795
|
-
except asyncio.TimeoutError as e:
|
|
796
|
-
raise ThordataTimeoutError(
|
|
797
|
-
f"Video task creation timed out: {e}", original_error=e
|
|
798
|
-
) from e
|
|
799
|
-
except aiohttp.ClientError as e:
|
|
800
|
-
raise ThordataNetworkError(
|
|
801
|
-
f"Video task creation failed: {e}", original_error=e
|
|
802
|
-
) from e
|
|
461
|
+
data = await response.json()
|
|
462
|
+
if data.get("code") != 200:
|
|
463
|
+
raise_for_code(
|
|
464
|
+
f"Video task failed: {extract_error_message(data)}",
|
|
465
|
+
code=data.get("code"),
|
|
466
|
+
payload=data,
|
|
467
|
+
)
|
|
468
|
+
return data["data"]["task_id"]
|
|
803
469
|
|
|
804
470
|
async def get_task_status(self, task_id: str) -> str:
|
|
805
|
-
"""Check async task status.
|
|
806
|
-
|
|
807
|
-
Args:
|
|
808
|
-
task_id: Task identifier.
|
|
809
|
-
|
|
810
|
-
Returns:
|
|
811
|
-
Status string (running, success, failed, etc.).
|
|
812
|
-
|
|
813
|
-
Raises:
|
|
814
|
-
ThordataConfigError: If public credentials are missing.
|
|
815
|
-
ThordataAPIError: If API returns a non-200 code.
|
|
816
|
-
ThordataNetworkError: If network/HTTP request fails.
|
|
817
|
-
"""
|
|
818
471
|
self._require_public_credentials()
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
self.public_token or "", self.public_key or ""
|
|
472
|
+
headers = build_public_api_headers(str(self.public_token), str(self.public_key))
|
|
473
|
+
response = await self._http.request(
|
|
474
|
+
"POST", self._status_url, data={"tasks_ids": task_id}, headers=headers
|
|
823
475
|
)
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
if isinstance(data, dict):
|
|
834
|
-
code = data.get("code")
|
|
835
|
-
if code is not None and code != 200:
|
|
836
|
-
msg = extract_error_message(data)
|
|
837
|
-
raise_for_code(
|
|
838
|
-
f"Task status API Error: {msg}",
|
|
839
|
-
code=code,
|
|
840
|
-
payload=data,
|
|
841
|
-
)
|
|
842
|
-
|
|
843
|
-
items = data.get("data") or []
|
|
844
|
-
for item in items:
|
|
845
|
-
if str(item.get("task_id")) == str(task_id):
|
|
846
|
-
return item.get("status", "unknown")
|
|
847
|
-
|
|
848
|
-
return "unknown"
|
|
849
|
-
|
|
850
|
-
raise ThordataNetworkError(
|
|
851
|
-
f"Unexpected task status response type: {type(data).__name__}",
|
|
852
|
-
original_error=None,
|
|
476
|
+
data = await response.json(content_type=None)
|
|
477
|
+
|
|
478
|
+
if isinstance(data, dict):
|
|
479
|
+
code = data.get("code")
|
|
480
|
+
if code is not None and code != 200:
|
|
481
|
+
raise_for_code(
|
|
482
|
+
f"Status error: {extract_error_message(data)}",
|
|
483
|
+
code=code,
|
|
484
|
+
payload=data,
|
|
853
485
|
)
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
raise ThordataNetworkError(
|
|
861
|
-
f"Async status check failed: {e}", original_error=e
|
|
862
|
-
) from e
|
|
486
|
+
items = data.get("data") or []
|
|
487
|
+
for item in items:
|
|
488
|
+
if str(item.get("task_id")) == str(task_id):
|
|
489
|
+
return item.get("status", "unknown")
|
|
490
|
+
return "unknown"
|
|
491
|
+
raise ThordataNetworkError(f"Unexpected response type: {type(data)}")
|
|
863
492
|
|
|
864
493
|
async def safe_get_task_status(self, task_id: str) -> str:
|
|
865
|
-
"""Backward-compatible status check.
|
|
866
|
-
|
|
867
|
-
Returns:
|
|
868
|
-
Status string, or "error" on any exception.
|
|
869
|
-
"""
|
|
870
494
|
try:
|
|
871
495
|
return await self.get_task_status(task_id)
|
|
872
496
|
except Exception:
|
|
873
497
|
return "error"
|
|
874
498
|
|
|
875
499
|
async def get_task_result(self, task_id: str, file_type: str = "json") -> str:
|
|
876
|
-
"""Get download URL for completed task.
|
|
877
|
-
|
|
878
|
-
Args:
|
|
879
|
-
task_id: Task identifier.
|
|
880
|
-
file_type: File type to download (json, csv, video, audio, subtitle).
|
|
881
|
-
|
|
882
|
-
Returns:
|
|
883
|
-
Download URL.
|
|
884
|
-
"""
|
|
885
500
|
self._require_public_credentials()
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
self.
|
|
501
|
+
headers = build_public_api_headers(str(self.public_token), str(self.public_key))
|
|
502
|
+
response = await self._http.request(
|
|
503
|
+
"POST",
|
|
504
|
+
self._download_url,
|
|
505
|
+
data={"tasks_id": task_id, "type": file_type},
|
|
506
|
+
headers=headers,
|
|
890
507
|
)
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
508
|
+
data = await response.json(content_type=None)
|
|
509
|
+
if data.get("code") == 200 and data.get("data"):
|
|
510
|
+
return data["data"]["download"]
|
|
511
|
+
raise_for_code("Get result failed", code=data.get("code"), payload=data)
|
|
512
|
+
return ""
|
|
894
513
|
|
|
895
|
-
|
|
896
|
-
async with session.post(
|
|
897
|
-
self._download_url, data=payload, headers=headers
|
|
898
|
-
) as response:
|
|
899
|
-
data = await response.json(content_type=None)
|
|
900
|
-
code = data.get("code")
|
|
901
|
-
|
|
902
|
-
if code == 200 and data.get("data"):
|
|
903
|
-
return data["data"]["download"]
|
|
904
|
-
|
|
905
|
-
msg = extract_error_message(data)
|
|
906
|
-
raise_for_code(f"Get result failed: {msg}", code=code, payload=data)
|
|
907
|
-
# This line won't be reached, but satisfies mypy
|
|
908
|
-
raise RuntimeError("Unexpected state")
|
|
909
|
-
|
|
910
|
-
except aiohttp.ClientError as e:
|
|
911
|
-
raise ThordataNetworkError(
|
|
912
|
-
f"Get result failed: {e}", original_error=e
|
|
913
|
-
) from e
|
|
914
|
-
|
|
915
|
-
async def list_tasks(
|
|
916
|
-
self,
|
|
917
|
-
page: int = 1,
|
|
918
|
-
size: int = 20,
|
|
919
|
-
) -> dict[str, Any]:
|
|
920
|
-
"""List all Web Scraper tasks.
|
|
921
|
-
|
|
922
|
-
Args:
|
|
923
|
-
page: Page number (starts from 1).
|
|
924
|
-
size: Number of tasks per page.
|
|
925
|
-
|
|
926
|
-
Returns:
|
|
927
|
-
Dict containing 'count' and 'list' of tasks.
|
|
928
|
-
"""
|
|
514
|
+
async def list_tasks(self, page: int = 1, size: int = 20) -> dict[str, Any]:
|
|
929
515
|
self._require_public_credentials()
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
self.
|
|
516
|
+
headers = build_public_api_headers(str(self.public_token), str(self.public_key))
|
|
517
|
+
response = await self._http.request(
|
|
518
|
+
"POST",
|
|
519
|
+
self._list_url,
|
|
520
|
+
data={"page": str(page), "size": str(size)},
|
|
521
|
+
headers=headers,
|
|
934
522
|
)
|
|
935
|
-
|
|
936
|
-
if
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
payload["size"] = str(size)
|
|
940
|
-
|
|
941
|
-
logger.info(f"Async listing tasks: page={page}, size={size}")
|
|
942
|
-
|
|
943
|
-
try:
|
|
944
|
-
async with session.post(
|
|
945
|
-
self._list_url,
|
|
946
|
-
data=payload,
|
|
947
|
-
headers=headers,
|
|
948
|
-
timeout=self._api_timeout,
|
|
949
|
-
) as response:
|
|
950
|
-
response.raise_for_status()
|
|
951
|
-
data = await response.json(content_type=None)
|
|
952
|
-
|
|
953
|
-
code = data.get("code")
|
|
954
|
-
if code != 200:
|
|
955
|
-
msg = extract_error_message(data)
|
|
956
|
-
raise_for_code(f"List tasks failed: {msg}", code=code, payload=data)
|
|
957
|
-
|
|
958
|
-
return data.get("data", {"count": 0, "list": []})
|
|
959
|
-
|
|
960
|
-
except asyncio.TimeoutError as e:
|
|
961
|
-
raise ThordataTimeoutError(
|
|
962
|
-
f"List tasks timed out: {e}", original_error=e
|
|
963
|
-
) from e
|
|
964
|
-
except aiohttp.ClientError as e:
|
|
965
|
-
raise ThordataNetworkError(
|
|
966
|
-
f"List tasks failed: {e}", original_error=e
|
|
967
|
-
) from e
|
|
523
|
+
data = await response.json(content_type=None)
|
|
524
|
+
if data.get("code") != 200:
|
|
525
|
+
raise_for_code("List tasks failed", code=data.get("code"), payload=data)
|
|
526
|
+
return data.get("data", {"count": 0, "list": []})
|
|
968
527
|
|
|
969
528
|
async def wait_for_task(
|
|
970
|
-
self,
|
|
971
|
-
task_id: str,
|
|
972
|
-
*,
|
|
973
|
-
poll_interval: float = 5.0,
|
|
974
|
-
max_wait: float = 600.0,
|
|
529
|
+
self, task_id: str, *, poll_interval: float = 5.0, max_wait: float = 600.0
|
|
975
530
|
) -> str:
|
|
976
|
-
"""Wait for a task to complete.
|
|
977
|
-
|
|
978
|
-
Args:
|
|
979
|
-
task_id: Task identifier.
|
|
980
|
-
poll_interval: Polling interval in seconds.
|
|
981
|
-
max_wait: Maximum time to wait in seconds.
|
|
982
|
-
|
|
983
|
-
Returns:
|
|
984
|
-
Final status of the task.
|
|
985
|
-
"""
|
|
986
531
|
import time
|
|
987
532
|
|
|
988
533
|
start = time.monotonic()
|
|
989
|
-
|
|
990
534
|
while (time.monotonic() - start) < max_wait:
|
|
991
535
|
status = await self.get_task_status(task_id)
|
|
992
|
-
|
|
993
|
-
logger.debug(f"Task {task_id} status: {status}")
|
|
994
|
-
|
|
995
|
-
terminal_statuses = {
|
|
536
|
+
if status.lower() in {
|
|
996
537
|
"ready",
|
|
997
538
|
"success",
|
|
998
539
|
"finished",
|
|
999
540
|
"failed",
|
|
1000
541
|
"error",
|
|
1001
542
|
"cancelled",
|
|
1002
|
-
}
|
|
1003
|
-
|
|
1004
|
-
if status.lower() in terminal_statuses:
|
|
543
|
+
}:
|
|
1005
544
|
return status
|
|
1006
|
-
|
|
1007
545
|
await asyncio.sleep(poll_interval)
|
|
1008
|
-
|
|
1009
|
-
raise TimeoutError(f"Task {task_id} did not complete within {max_wait} seconds")
|
|
546
|
+
raise TimeoutError(f"Task {task_id} timeout")
|
|
1010
547
|
|
|
1011
548
|
async def run_task(
|
|
1012
549
|
self,
|
|
@@ -1023,51 +560,17 @@ class AsyncThordataClient:
|
|
|
1023
560
|
task_type: str = "web",
|
|
1024
561
|
common_settings: CommonSettings | None = None,
|
|
1025
562
|
) -> str:
|
|
1026
|
-
"""Async high-level wrapper to run a task and wait for result.
|
|
1027
|
-
|
|
1028
|
-
Lifecycle: Create -> Poll (Backoff) -> Get Download URL.
|
|
1029
|
-
|
|
1030
|
-
Args:
|
|
1031
|
-
file_name: Name for the output file.
|
|
1032
|
-
spider_id: Spider identifier from Dashboard.
|
|
1033
|
-
spider_name: Spider name (target domain).
|
|
1034
|
-
parameters: Spider-specific parameters.
|
|
1035
|
-
universal_params: Global spider settings.
|
|
1036
|
-
max_wait: Maximum seconds to wait for completion.
|
|
1037
|
-
initial_poll_interval: Starting poll interval in seconds.
|
|
1038
|
-
max_poll_interval: Maximum poll interval cap.
|
|
1039
|
-
include_errors: Whether to include error logs.
|
|
1040
|
-
|
|
1041
|
-
Returns:
|
|
1042
|
-
The download URL.
|
|
1043
|
-
"""
|
|
1044
563
|
if task_type == "video":
|
|
1045
564
|
if common_settings is None:
|
|
1046
|
-
raise ValueError("common_settings
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
file_name=file_name,
|
|
1050
|
-
spider_id=spider_id,
|
|
1051
|
-
spider_name=spider_name,
|
|
1052
|
-
parameters=parameters,
|
|
1053
|
-
common_settings=common_settings,
|
|
1054
|
-
include_errors=include_errors,
|
|
565
|
+
raise ValueError("common_settings required for video")
|
|
566
|
+
task_id = await self.create_video_task(
|
|
567
|
+
file_name, spider_id, spider_name, parameters, common_settings
|
|
1055
568
|
)
|
|
1056
|
-
task_id = await self.create_video_task_advanced(config_video)
|
|
1057
569
|
else:
|
|
1058
|
-
|
|
1059
|
-
file_name
|
|
1060
|
-
spider_id=spider_id,
|
|
1061
|
-
spider_name=spider_name,
|
|
1062
|
-
parameters=parameters,
|
|
1063
|
-
universal_params=universal_params,
|
|
1064
|
-
include_errors=include_errors,
|
|
570
|
+
task_id = await self.create_scraper_task(
|
|
571
|
+
file_name, spider_id, spider_name, parameters, universal_params
|
|
1065
572
|
)
|
|
1066
|
-
task_id = await self.create_scraper_task_advanced(config)
|
|
1067
573
|
|
|
1068
|
-
logger.info(f"Async Task created: {task_id}. Polling...")
|
|
1069
|
-
|
|
1070
|
-
# 2. Poll Status
|
|
1071
574
|
import time
|
|
1072
575
|
|
|
1073
576
|
start_time = time.monotonic()
|
|
@@ -1075,221 +578,71 @@ class AsyncThordataClient:
|
|
|
1075
578
|
|
|
1076
579
|
while (time.monotonic() - start_time) < max_wait:
|
|
1077
580
|
status = await self.get_task_status(task_id)
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
if status_lower in {"ready", "success", "finished"}:
|
|
1081
|
-
logger.info(f"Task {task_id} ready.")
|
|
581
|
+
if status.lower() in {"ready", "success", "finished"}:
|
|
1082
582
|
return await self.get_task_result(task_id)
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
raise ThordataNetworkError(
|
|
1086
|
-
f"Task {task_id} failed with status: {status}"
|
|
1087
|
-
)
|
|
1088
|
-
|
|
583
|
+
if status.lower() in {"failed", "error", "cancelled"}:
|
|
584
|
+
raise ThordataNetworkError(f"Task {task_id} failed: {status}")
|
|
1089
585
|
await asyncio.sleep(current_poll)
|
|
1090
586
|
current_poll = min(current_poll * 1.5, max_poll_interval)
|
|
1091
|
-
|
|
1092
|
-
raise ThordataTimeoutError(f"Async Task {task_id} timed out after {max_wait}s")
|
|
587
|
+
raise ThordataTimeoutError(f"Task {task_id} timed out")
|
|
1093
588
|
|
|
1094
589
|
# =========================================================================
|
|
1095
|
-
# Account
|
|
590
|
+
# Account, Usage, Proxy Management (Delegated to HTTP)
|
|
1096
591
|
# =========================================================================
|
|
1097
592
|
|
|
1098
593
|
async def get_usage_statistics(
|
|
1099
|
-
self,
|
|
1100
|
-
from_date: str | date,
|
|
1101
|
-
to_date: str | date,
|
|
594
|
+
self, from_date: str | date, to_date: str | date
|
|
1102
595
|
) -> UsageStatistics:
|
|
1103
|
-
"""Get account usage statistics for a date range.
|
|
1104
|
-
|
|
1105
|
-
Args:
|
|
1106
|
-
from_date: Start date (YYYY-MM-DD string or date object).
|
|
1107
|
-
to_date: End date (YYYY-MM-DD string or date object).
|
|
1108
|
-
|
|
1109
|
-
Returns:
|
|
1110
|
-
UsageStatistics object with traffic data.
|
|
1111
|
-
"""
|
|
1112
596
|
self._require_public_credentials()
|
|
1113
|
-
session = self._get_session()
|
|
1114
|
-
|
|
1115
|
-
# Convert dates to strings
|
|
1116
597
|
if isinstance(from_date, date):
|
|
1117
598
|
from_date = from_date.strftime("%Y-%m-%d")
|
|
1118
599
|
if isinstance(to_date, date):
|
|
1119
600
|
to_date = to_date.strftime("%Y-%m-%d")
|
|
1120
|
-
|
|
1121
601
|
params = {
|
|
1122
602
|
"token": self.public_token,
|
|
1123
603
|
"key": self.public_key,
|
|
1124
604
|
"from_date": from_date,
|
|
1125
605
|
"to_date": to_date,
|
|
1126
606
|
}
|
|
607
|
+
response = await self._http.request("GET", self._usage_stats_url, params=params)
|
|
608
|
+
data = await response.json()
|
|
609
|
+
if data.get("code") != 200:
|
|
610
|
+
raise_for_code("Usage error", code=data.get("code"), payload=data)
|
|
611
|
+
return UsageStatistics.from_dict(data.get("data", data))
|
|
1127
612
|
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
if isinstance(data, dict):
|
|
1140
|
-
code = data.get("code")
|
|
1141
|
-
if code is not None and code != 200:
|
|
1142
|
-
msg = extract_error_message(data)
|
|
1143
|
-
raise_for_code(
|
|
1144
|
-
f"Usage statistics error: {msg}",
|
|
1145
|
-
code=code,
|
|
1146
|
-
payload=data,
|
|
1147
|
-
)
|
|
1148
|
-
|
|
1149
|
-
usage_data = data.get("data", data)
|
|
1150
|
-
return UsageStatistics.from_dict(usage_data)
|
|
1151
|
-
|
|
1152
|
-
raise ThordataNetworkError(
|
|
1153
|
-
f"Unexpected usage statistics response: {type(data).__name__}",
|
|
1154
|
-
original_error=None,
|
|
1155
|
-
)
|
|
1156
|
-
|
|
1157
|
-
except asyncio.TimeoutError as e:
|
|
1158
|
-
raise ThordataTimeoutError(
|
|
1159
|
-
f"Usage statistics timed out: {e}", original_error=e
|
|
1160
|
-
) from e
|
|
1161
|
-
except aiohttp.ClientError as e:
|
|
1162
|
-
raise ThordataNetworkError(
|
|
1163
|
-
f"Usage statistics failed: {e}", original_error=e
|
|
1164
|
-
) from e
|
|
1165
|
-
|
|
1166
|
-
async def get_residential_balance(self) -> dict[str, Any]:
|
|
1167
|
-
"""Get residential proxy balance.
|
|
1168
|
-
|
|
1169
|
-
Uses public_token/public_key via gateway API.
|
|
1170
|
-
|
|
1171
|
-
Returns:
|
|
1172
|
-
Balance data dictionary.
|
|
1173
|
-
"""
|
|
1174
|
-
session = self._get_session()
|
|
1175
|
-
headers = self._build_gateway_headers()
|
|
1176
|
-
|
|
1177
|
-
logger.info("Async getting residential proxy balance")
|
|
1178
|
-
|
|
1179
|
-
try:
|
|
1180
|
-
async with session.post(
|
|
1181
|
-
f"{self._gateway_base_url}/getFlowBalance",
|
|
1182
|
-
headers=headers,
|
|
1183
|
-
data={},
|
|
1184
|
-
timeout=self._api_timeout,
|
|
1185
|
-
) as response:
|
|
1186
|
-
response.raise_for_status()
|
|
1187
|
-
data = await response.json()
|
|
1188
|
-
|
|
1189
|
-
code = data.get("code")
|
|
1190
|
-
if code != 200:
|
|
1191
|
-
msg = extract_error_message(data)
|
|
1192
|
-
raise_for_code(
|
|
1193
|
-
f"Get balance failed: {msg}", code=code, payload=data
|
|
1194
|
-
)
|
|
1195
|
-
|
|
1196
|
-
return data.get("data", {})
|
|
613
|
+
async def get_traffic_balance(self) -> float:
|
|
614
|
+
self._require_public_credentials()
|
|
615
|
+
api_base = self._locations_base_url.replace("/locations", "")
|
|
616
|
+
params = {"token": str(self.public_token), "key": str(self.public_key)}
|
|
617
|
+
response = await self._http.request(
|
|
618
|
+
"GET", f"{api_base}/account/traffic-balance", params=params
|
|
619
|
+
)
|
|
620
|
+
data = await response.json()
|
|
621
|
+
if data.get("code") != 200:
|
|
622
|
+
raise_for_code("Balance error", code=data.get("code"), payload=data)
|
|
623
|
+
return float(data.get("data", {}).get("traffic_balance", 0))
|
|
1197
624
|
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
625
|
+
async def get_wallet_balance(self) -> float:
|
|
626
|
+
self._require_public_credentials()
|
|
627
|
+
api_base = self._locations_base_url.replace("/locations", "")
|
|
628
|
+
params = {"token": str(self.public_token), "key": str(self.public_key)}
|
|
629
|
+
response = await self._http.request(
|
|
630
|
+
"GET", f"{api_base}/account/wallet-balance", params=params
|
|
631
|
+
)
|
|
632
|
+
data = await response.json()
|
|
633
|
+
if data.get("code") != 200:
|
|
634
|
+
raise_for_code("Balance error", code=data.get("code"), payload=data)
|
|
635
|
+
return float(data.get("data", {}).get("balance", 0))
|
|
1206
636
|
|
|
1207
|
-
async def
|
|
1208
|
-
self,
|
|
1209
|
-
start_time: str | int,
|
|
1210
|
-
end_time: str | int,
|
|
1211
|
-
) -> dict[str, Any]:
|
|
1212
|
-
"""Get residential proxy usage records.
|
|
1213
|
-
|
|
1214
|
-
Uses public_token/public_key via gateway API.
|
|
1215
|
-
|
|
1216
|
-
Args:
|
|
1217
|
-
start_time: Start timestamp or date string.
|
|
1218
|
-
end_time: End timestamp or date string.
|
|
1219
|
-
|
|
1220
|
-
Returns:
|
|
1221
|
-
Usage data dictionary.
|
|
1222
|
-
"""
|
|
1223
|
-
session = self._get_session()
|
|
1224
|
-
headers = self._build_gateway_headers()
|
|
1225
|
-
payload = {"start_time": str(start_time), "end_time": str(end_time)}
|
|
1226
|
-
|
|
1227
|
-
logger.info(f"Async getting residential usage: {start_time} to {end_time}")
|
|
1228
|
-
|
|
1229
|
-
try:
|
|
1230
|
-
async with session.post(
|
|
1231
|
-
f"{self._gateway_base_url}/usageRecord",
|
|
1232
|
-
headers=headers,
|
|
1233
|
-
data=payload,
|
|
1234
|
-
timeout=self._api_timeout,
|
|
1235
|
-
) as response:
|
|
1236
|
-
response.raise_for_status()
|
|
1237
|
-
data = await response.json()
|
|
1238
|
-
|
|
1239
|
-
code = data.get("code")
|
|
1240
|
-
if code != 200:
|
|
1241
|
-
msg = extract_error_message(data)
|
|
1242
|
-
raise_for_code(f"Get usage failed: {msg}", code=code, payload=data)
|
|
1243
|
-
|
|
1244
|
-
return data.get("data", {})
|
|
1245
|
-
|
|
1246
|
-
except asyncio.TimeoutError as e:
|
|
1247
|
-
raise ThordataTimeoutError(
|
|
1248
|
-
f"Get usage timed out: {e}", original_error=e
|
|
1249
|
-
) from e
|
|
1250
|
-
except aiohttp.ClientError as e:
|
|
1251
|
-
raise ThordataNetworkError(
|
|
1252
|
-
f"Get usage failed: {e}", original_error=e
|
|
1253
|
-
) from e
|
|
1254
|
-
|
|
1255
|
-
async def get_traffic_balance(self) -> float:
|
|
1256
|
-
"""Get traffic balance in KB via Public API."""
|
|
1257
|
-
self._require_public_credentials()
|
|
1258
|
-
# FIX: Ensure params are strings and dict structure satisfies type checker
|
|
1259
|
-
# _require_public_credentials ensures tokens are not None at runtime,
|
|
1260
|
-
# but for type checking we cast or assert.
|
|
1261
|
-
params = {
|
|
1262
|
-
"token": str(self.public_token),
|
|
1263
|
-
"key": str(self.public_key),
|
|
1264
|
-
}
|
|
1265
|
-
api_base = self._locations_base_url.replace("/locations", "")
|
|
1266
|
-
|
|
1267
|
-
try:
|
|
1268
|
-
async with self._get_session().get(
|
|
1269
|
-
f"{api_base}/account/traffic-balance", params=params
|
|
1270
|
-
) as resp:
|
|
1271
|
-
data = await resp.json()
|
|
1272
|
-
if data.get("code") != 200:
|
|
1273
|
-
raise_for_code(
|
|
1274
|
-
"Get traffic balance failed",
|
|
1275
|
-
code=data.get("code"),
|
|
1276
|
-
payload=data,
|
|
1277
|
-
)
|
|
1278
|
-
return float(data.get("data", {}).get("traffic_balance", 0))
|
|
1279
|
-
except aiohttp.ClientError as e:
|
|
1280
|
-
raise ThordataNetworkError(f"Request failed: {e}", original_error=e) from e
|
|
1281
|
-
|
|
1282
|
-
async def get_proxy_user_usage(
|
|
637
|
+
async def get_proxy_user_usage(
|
|
1283
638
|
self,
|
|
1284
639
|
username: str,
|
|
1285
640
|
start_date: str | date,
|
|
1286
641
|
end_date: str | date,
|
|
1287
642
|
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
1288
643
|
) -> list[dict[str, Any]]:
|
|
1289
|
-
"""Get user usage statistics."""
|
|
1290
644
|
self._require_public_credentials()
|
|
1291
645
|
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1292
|
-
|
|
1293
646
|
if isinstance(start_date, date):
|
|
1294
647
|
start_date = start_date.strftime("%Y-%m-%d")
|
|
1295
648
|
if isinstance(end_date, date):
|
|
@@ -1303,181 +656,31 @@ class AsyncThordataClient:
|
|
|
1303
656
|
"from_date": start_date,
|
|
1304
657
|
"to_date": end_date,
|
|
1305
658
|
}
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
raise_for_code(
|
|
1314
|
-
"Get user usage failed", code=data.get("code"), payload=data
|
|
1315
|
-
)
|
|
1316
|
-
return data.get("data") or []
|
|
1317
|
-
except aiohttp.ClientError as e:
|
|
1318
|
-
raise ThordataNetworkError(f"Request failed: {e}", original_error=e) from e
|
|
1319
|
-
|
|
1320
|
-
async def extract_ip_list(
|
|
1321
|
-
self,
|
|
1322
|
-
num: int = 1,
|
|
1323
|
-
country: str | None = None,
|
|
1324
|
-
state: str | None = None,
|
|
1325
|
-
city: str | None = None,
|
|
1326
|
-
time_limit: int | None = None,
|
|
1327
|
-
port: int | None = None,
|
|
1328
|
-
return_type: str = "txt",
|
|
1329
|
-
protocol: str = "http",
|
|
1330
|
-
sep: str = "\r\n",
|
|
1331
|
-
product: str = "residential",
|
|
1332
|
-
) -> list[str]:
|
|
1333
|
-
"""Async extract IPs."""
|
|
1334
|
-
base_url = "https://get-ip.thordata.net"
|
|
1335
|
-
endpoint = "/unlimited_api" if product == "unlimited" else "/api"
|
|
1336
|
-
|
|
1337
|
-
params: dict[str, Any] = {
|
|
1338
|
-
"num": str(num),
|
|
1339
|
-
"return_type": return_type,
|
|
1340
|
-
"protocol": protocol,
|
|
1341
|
-
"sep": sep,
|
|
1342
|
-
}
|
|
1343
|
-
if country:
|
|
1344
|
-
params["country"] = country
|
|
1345
|
-
if state:
|
|
1346
|
-
params["state"] = state
|
|
1347
|
-
if city:
|
|
1348
|
-
params["city"] = city
|
|
1349
|
-
if time_limit:
|
|
1350
|
-
params["time"] = str(time_limit)
|
|
1351
|
-
if port:
|
|
1352
|
-
params["port"] = str(port)
|
|
1353
|
-
|
|
1354
|
-
username = os.getenv("THORDATA_RESIDENTIAL_USERNAME")
|
|
1355
|
-
if username:
|
|
1356
|
-
params["td-customer"] = username
|
|
1357
|
-
|
|
1358
|
-
try:
|
|
1359
|
-
async with self._get_session().get(
|
|
1360
|
-
f"{base_url}{endpoint}", params=params
|
|
1361
|
-
) as resp:
|
|
1362
|
-
if return_type == "json":
|
|
1363
|
-
data = await resp.json()
|
|
1364
|
-
if isinstance(data, dict):
|
|
1365
|
-
if data.get("code") == 0 or data.get("code") == 200:
|
|
1366
|
-
raw_list = data.get("data") or []
|
|
1367
|
-
return [f"{item['ip']}:{item['port']}" for item in raw_list]
|
|
1368
|
-
else:
|
|
1369
|
-
raise_for_code(
|
|
1370
|
-
"Extract IPs failed",
|
|
1371
|
-
code=data.get("code"),
|
|
1372
|
-
payload=data,
|
|
1373
|
-
)
|
|
1374
|
-
return []
|
|
1375
|
-
else:
|
|
1376
|
-
text = await resp.text()
|
|
1377
|
-
text = text.strip()
|
|
1378
|
-
if text.startswith("{") and "code" in text:
|
|
1379
|
-
try:
|
|
1380
|
-
err_data = json.loads(text)
|
|
1381
|
-
raise_for_code(
|
|
1382
|
-
"Extract IPs failed",
|
|
1383
|
-
code=err_data.get("code"),
|
|
1384
|
-
payload=err_data,
|
|
1385
|
-
)
|
|
1386
|
-
except json.JSONDecodeError:
|
|
1387
|
-
pass
|
|
1388
|
-
|
|
1389
|
-
actual_sep = sep.replace("\\r", "\r").replace("\\n", "\n")
|
|
1390
|
-
return [
|
|
1391
|
-
line.strip() for line in text.split(actual_sep) if line.strip()
|
|
1392
|
-
]
|
|
1393
|
-
|
|
1394
|
-
except aiohttp.ClientError as e:
|
|
1395
|
-
raise ThordataNetworkError(f"Request failed: {e}", original_error=e) from e
|
|
1396
|
-
|
|
1397
|
-
async def get_wallet_balance(self) -> float:
|
|
1398
|
-
"""Get wallet balance via Public API."""
|
|
1399
|
-
self._require_public_credentials()
|
|
1400
|
-
# FIX: Ensure params are strings
|
|
1401
|
-
params = {
|
|
1402
|
-
"token": str(self.public_token),
|
|
1403
|
-
"key": str(self.public_key),
|
|
1404
|
-
}
|
|
1405
|
-
api_base = self._locations_base_url.replace("/locations", "")
|
|
1406
|
-
|
|
1407
|
-
try:
|
|
1408
|
-
async with self._get_session().get(
|
|
1409
|
-
f"{api_base}/account/wallet-balance", params=params
|
|
1410
|
-
) as resp:
|
|
1411
|
-
data = await resp.json()
|
|
1412
|
-
if data.get("code") != 200:
|
|
1413
|
-
raise_for_code(
|
|
1414
|
-
"Get wallet balance failed", code=data.get("code"), payload=data
|
|
1415
|
-
)
|
|
1416
|
-
return float(data.get("data", {}).get("balance", 0))
|
|
1417
|
-
except aiohttp.ClientError as e:
|
|
1418
|
-
raise ThordataNetworkError(f"Request failed: {e}", original_error=e) from e
|
|
1419
|
-
|
|
1420
|
-
# =========================================================================
|
|
1421
|
-
# Proxy Users Management (Sub-accounts)
|
|
1422
|
-
# =========================================================================
|
|
659
|
+
response = await self._http.request(
|
|
660
|
+
"GET", f"{self._proxy_users_url}/usage-statistics", params=params
|
|
661
|
+
)
|
|
662
|
+
data = await response.json()
|
|
663
|
+
if data.get("code") != 200:
|
|
664
|
+
raise_for_code("Get usage failed", code=data.get("code"), payload=data)
|
|
665
|
+
return data.get("data", [])
|
|
1423
666
|
|
|
1424
667
|
async def list_proxy_users(
|
|
1425
668
|
self, proxy_type: ProxyType | int = ProxyType.RESIDENTIAL
|
|
1426
669
|
) -> ProxyUserList:
|
|
1427
|
-
"""List all proxy users (sub-accounts).
|
|
1428
|
-
|
|
1429
|
-
Args:
|
|
1430
|
-
proxy_type: Proxy product type.
|
|
1431
|
-
|
|
1432
|
-
Returns:
|
|
1433
|
-
ProxyUserList with user information.
|
|
1434
|
-
"""
|
|
1435
670
|
self._require_public_credentials()
|
|
1436
|
-
|
|
1437
|
-
|
|
671
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1438
672
|
params = {
|
|
1439
673
|
"token": self.public_token,
|
|
1440
674
|
"key": self.public_key,
|
|
1441
|
-
"proxy_type": str(
|
|
1442
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1443
|
-
),
|
|
675
|
+
"proxy_type": str(pt),
|
|
1444
676
|
}
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
|
|
1450
|
-
|
|
1451
|
-
|
|
1452
|
-
timeout=self._api_timeout,
|
|
1453
|
-
) as response:
|
|
1454
|
-
response.raise_for_status()
|
|
1455
|
-
data = await response.json()
|
|
1456
|
-
|
|
1457
|
-
if isinstance(data, dict):
|
|
1458
|
-
code = data.get("code")
|
|
1459
|
-
if code is not None and code != 200:
|
|
1460
|
-
msg = extract_error_message(data)
|
|
1461
|
-
raise_for_code(
|
|
1462
|
-
f"List proxy users error: {msg}", code=code, payload=data
|
|
1463
|
-
)
|
|
1464
|
-
|
|
1465
|
-
user_data = data.get("data", data)
|
|
1466
|
-
return ProxyUserList.from_dict(user_data)
|
|
1467
|
-
|
|
1468
|
-
raise ThordataNetworkError(
|
|
1469
|
-
f"Unexpected proxy users response: {type(data).__name__}",
|
|
1470
|
-
original_error=None,
|
|
1471
|
-
)
|
|
1472
|
-
|
|
1473
|
-
except asyncio.TimeoutError as e:
|
|
1474
|
-
raise ThordataTimeoutError(
|
|
1475
|
-
f"List users timed out: {e}", original_error=e
|
|
1476
|
-
) from e
|
|
1477
|
-
except aiohttp.ClientError as e:
|
|
1478
|
-
raise ThordataNetworkError(
|
|
1479
|
-
f"List users failed: {e}", original_error=e
|
|
1480
|
-
) from e
|
|
677
|
+
response = await self._http.request(
|
|
678
|
+
"GET", f"{self._proxy_users_url}/user-list", params=params
|
|
679
|
+
)
|
|
680
|
+
data = await response.json()
|
|
681
|
+
if data.get("code") != 200:
|
|
682
|
+
raise_for_code("List users error", code=data.get("code"), payload=data)
|
|
683
|
+
return ProxyUserList.from_dict(data.get("data", data))
|
|
1481
684
|
|
|
1482
685
|
async def create_proxy_user(
|
|
1483
686
|
self,
|
|
@@ -1487,175 +690,85 @@ class AsyncThordataClient:
|
|
|
1487
690
|
traffic_limit: int = 0,
|
|
1488
691
|
status: bool = True,
|
|
1489
692
|
) -> dict[str, Any]:
|
|
1490
|
-
"""Create a new proxy user (sub-account).
|
|
1491
|
-
|
|
1492
|
-
Args:
|
|
1493
|
-
username: Sub-account username.
|
|
1494
|
-
password: Sub-account password.
|
|
1495
|
-
proxy_type: Proxy product type.
|
|
1496
|
-
traffic_limit: Traffic limit in MB (0 = unlimited).
|
|
1497
|
-
status: Enable or disable the account.
|
|
1498
|
-
|
|
1499
|
-
Returns:
|
|
1500
|
-
API response data.
|
|
1501
|
-
"""
|
|
1502
693
|
self._require_public_credentials()
|
|
1503
|
-
|
|
1504
|
-
|
|
1505
|
-
headers = build_public_api_headers(
|
|
1506
|
-
self.public_token or "", self.public_key or ""
|
|
1507
|
-
)
|
|
1508
|
-
|
|
694
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
695
|
+
headers = build_public_api_headers(str(self.public_token), str(self.public_key))
|
|
1509
696
|
payload = {
|
|
1510
|
-
"proxy_type": str(
|
|
1511
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1512
|
-
),
|
|
697
|
+
"proxy_type": str(pt),
|
|
1513
698
|
"username": username,
|
|
1514
699
|
"password": password,
|
|
1515
700
|
"traffic_limit": str(traffic_limit),
|
|
1516
701
|
"status": "true" if status else "false",
|
|
1517
702
|
}
|
|
1518
|
-
|
|
1519
|
-
|
|
1520
|
-
|
|
1521
|
-
|
|
1522
|
-
|
|
1523
|
-
|
|
1524
|
-
|
|
1525
|
-
|
|
1526
|
-
|
|
1527
|
-
|
|
1528
|
-
response.raise_for_status()
|
|
1529
|
-
data = await response.json()
|
|
1530
|
-
|
|
1531
|
-
code = data.get("code")
|
|
1532
|
-
if code != 200:
|
|
1533
|
-
msg = extract_error_message(data)
|
|
1534
|
-
raise_for_code(
|
|
1535
|
-
f"Create proxy user failed: {msg}", code=code, payload=data
|
|
1536
|
-
)
|
|
1537
|
-
|
|
1538
|
-
return data.get("data", {})
|
|
1539
|
-
|
|
1540
|
-
except asyncio.TimeoutError as e:
|
|
1541
|
-
raise ThordataTimeoutError(
|
|
1542
|
-
f"Create user timed out: {e}", original_error=e
|
|
1543
|
-
) from e
|
|
1544
|
-
except aiohttp.ClientError as e:
|
|
1545
|
-
raise ThordataNetworkError(
|
|
1546
|
-
f"Create user failed: {e}", original_error=e
|
|
1547
|
-
) from e
|
|
703
|
+
response = await self._http.request(
|
|
704
|
+
"POST",
|
|
705
|
+
f"{self._proxy_users_url}/create-user",
|
|
706
|
+
data=payload,
|
|
707
|
+
headers=headers,
|
|
708
|
+
)
|
|
709
|
+
data = await response.json()
|
|
710
|
+
if data.get("code") != 200:
|
|
711
|
+
raise_for_code("Create user failed", code=data.get("code"), payload=data)
|
|
712
|
+
return data.get("data", {})
|
|
1548
713
|
|
|
1549
714
|
async def update_proxy_user(
|
|
1550
715
|
self,
|
|
1551
716
|
username: str,
|
|
1552
|
-
password: str,
|
|
717
|
+
password: str,
|
|
1553
718
|
traffic_limit: int | None = None,
|
|
1554
719
|
status: bool | None = None,
|
|
1555
720
|
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
721
|
+
new_username: str | None = None,
|
|
1556
722
|
) -> dict[str, Any]:
|
|
1557
|
-
"""
|
|
723
|
+
"""
|
|
724
|
+
Update a proxy user.
|
|
725
|
+
Note: API requires 'new_' prefixed fields and ALL are required.
|
|
726
|
+
"""
|
|
1558
727
|
self._require_public_credentials()
|
|
1559
|
-
|
|
1560
|
-
headers = build_public_api_headers(
|
|
1561
|
-
|
|
1562
|
-
)
|
|
728
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
729
|
+
headers = build_public_api_headers(str(self.public_token), str(self.public_key))
|
|
730
|
+
|
|
731
|
+
limit_val = str(traffic_limit) if traffic_limit is not None else "0"
|
|
732
|
+
status_val = "true" if (status is None or status) else "false"
|
|
733
|
+
target_username = new_username or username
|
|
1563
734
|
|
|
1564
735
|
payload = {
|
|
1565
|
-
"proxy_type": str(
|
|
1566
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1567
|
-
),
|
|
736
|
+
"proxy_type": str(pt),
|
|
1568
737
|
"username": username,
|
|
1569
|
-
"
|
|
738
|
+
"new_username": target_username,
|
|
739
|
+
"new_password": password,
|
|
740
|
+
"new_traffic_limit": limit_val,
|
|
741
|
+
"new_status": status_val,
|
|
1570
742
|
}
|
|
1571
|
-
if traffic_limit is not None:
|
|
1572
|
-
payload["traffic_limit"] = str(traffic_limit)
|
|
1573
|
-
if status is not None:
|
|
1574
|
-
payload["status"] = "true" if status else "false"
|
|
1575
743
|
|
|
1576
|
-
|
|
1577
|
-
|
|
1578
|
-
|
|
1579
|
-
|
|
1580
|
-
|
|
1581
|
-
|
|
1582
|
-
|
|
1583
|
-
|
|
1584
|
-
|
|
1585
|
-
|
|
1586
|
-
if data.get("code") != 200:
|
|
1587
|
-
raise_for_code(
|
|
1588
|
-
f"Update user failed: {data.get('msg')}",
|
|
1589
|
-
code=data.get("code"),
|
|
1590
|
-
payload=data,
|
|
1591
|
-
)
|
|
1592
|
-
|
|
1593
|
-
return data.get("data", {})
|
|
1594
|
-
|
|
1595
|
-
except aiohttp.ClientError as e:
|
|
1596
|
-
raise ThordataNetworkError(
|
|
1597
|
-
f"Update user failed: {e}", original_error=e
|
|
1598
|
-
) from e
|
|
744
|
+
response = await self._http.request(
|
|
745
|
+
"POST",
|
|
746
|
+
f"{self._proxy_users_url}/update-user",
|
|
747
|
+
data=payload,
|
|
748
|
+
headers=headers,
|
|
749
|
+
)
|
|
750
|
+
data = await response.json()
|
|
751
|
+
if data.get("code") != 200:
|
|
752
|
+
raise_for_code("Update user failed", code=data.get("code"), payload=data)
|
|
753
|
+
return data.get("data", {})
|
|
1599
754
|
|
|
1600
755
|
async def delete_proxy_user(
|
|
1601
|
-
self,
|
|
1602
|
-
username: str,
|
|
1603
|
-
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
756
|
+
self, username: str, proxy_type: ProxyType | int = ProxyType.RESIDENTIAL
|
|
1604
757
|
) -> dict[str, Any]:
|
|
1605
|
-
"""Delete a proxy user.
|
|
1606
|
-
|
|
1607
|
-
Args:
|
|
1608
|
-
username: The sub-account username.
|
|
1609
|
-
proxy_type: Proxy product type.
|
|
1610
|
-
|
|
1611
|
-
Returns:
|
|
1612
|
-
API response data.
|
|
1613
|
-
"""
|
|
1614
758
|
self._require_public_credentials()
|
|
1615
|
-
|
|
1616
|
-
|
|
1617
|
-
|
|
1618
|
-
|
|
759
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
760
|
+
headers = build_public_api_headers(str(self.public_token), str(self.public_key))
|
|
761
|
+
payload = {"proxy_type": str(pt), "username": username}
|
|
762
|
+
response = await self._http.request(
|
|
763
|
+
"POST",
|
|
764
|
+
f"{self._proxy_users_url}/delete-user",
|
|
765
|
+
data=payload,
|
|
766
|
+
headers=headers,
|
|
1619
767
|
)
|
|
1620
|
-
|
|
1621
|
-
|
|
1622
|
-
"
|
|
1623
|
-
|
|
1624
|
-
),
|
|
1625
|
-
"username": username,
|
|
1626
|
-
}
|
|
1627
|
-
|
|
1628
|
-
try:
|
|
1629
|
-
async with session.post(
|
|
1630
|
-
f"{self._proxy_users_url}/delete-user",
|
|
1631
|
-
data=payload,
|
|
1632
|
-
headers=headers,
|
|
1633
|
-
timeout=self._api_timeout,
|
|
1634
|
-
) as response:
|
|
1635
|
-
response.raise_for_status()
|
|
1636
|
-
data = await response.json()
|
|
1637
|
-
|
|
1638
|
-
code = data.get("code")
|
|
1639
|
-
if code != 200:
|
|
1640
|
-
msg = extract_error_message(data)
|
|
1641
|
-
raise_for_code(
|
|
1642
|
-
f"Delete user failed: {msg}", code=code, payload=data
|
|
1643
|
-
)
|
|
1644
|
-
|
|
1645
|
-
return data.get("data", {})
|
|
1646
|
-
|
|
1647
|
-
except asyncio.TimeoutError as e:
|
|
1648
|
-
raise ThordataTimeoutError(
|
|
1649
|
-
f"Delete user timed out: {e}", original_error=e
|
|
1650
|
-
) from e
|
|
1651
|
-
except aiohttp.ClientError as e:
|
|
1652
|
-
raise ThordataNetworkError(
|
|
1653
|
-
f"Delete user failed: {e}", original_error=e
|
|
1654
|
-
) from e
|
|
1655
|
-
|
|
1656
|
-
# =========================================================================
|
|
1657
|
-
# Whitelist IP Management
|
|
1658
|
-
# =========================================================================
|
|
768
|
+
data = await response.json()
|
|
769
|
+
if data.get("code") != 200:
|
|
770
|
+
raise_for_code("Delete user failed", code=data.get("code"), payload=data)
|
|
771
|
+
return data.get("data", {})
|
|
1659
772
|
|
|
1660
773
|
async def add_whitelist_ip(
|
|
1661
774
|
self,
|
|
@@ -1663,230 +776,102 @@ class AsyncThordataClient:
|
|
|
1663
776
|
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
1664
777
|
status: bool = True,
|
|
1665
778
|
) -> dict[str, Any]:
|
|
1666
|
-
"""Add an IP to the whitelist for IP authentication.
|
|
1667
|
-
|
|
1668
|
-
Args:
|
|
1669
|
-
ip: IP address to whitelist.
|
|
1670
|
-
proxy_type: Proxy product type.
|
|
1671
|
-
status: Enable or disable the whitelist entry.
|
|
1672
|
-
|
|
1673
|
-
Returns:
|
|
1674
|
-
API response data.
|
|
1675
|
-
"""
|
|
1676
779
|
self._require_public_credentials()
|
|
1677
|
-
|
|
1678
|
-
|
|
1679
|
-
headers = build_public_api_headers(
|
|
1680
|
-
self.public_token or "", self.public_key or ""
|
|
1681
|
-
)
|
|
1682
|
-
|
|
1683
|
-
proxy_type_int = (
|
|
1684
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1685
|
-
)
|
|
1686
|
-
|
|
780
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
781
|
+
headers = build_public_api_headers(str(self.public_token), str(self.public_key))
|
|
1687
782
|
payload = {
|
|
1688
|
-
"proxy_type": str(
|
|
783
|
+
"proxy_type": str(pt),
|
|
1689
784
|
"ip": ip,
|
|
1690
785
|
"status": "true" if status else "false",
|
|
1691
786
|
}
|
|
1692
|
-
|
|
1693
|
-
|
|
1694
|
-
|
|
1695
|
-
|
|
1696
|
-
|
|
1697
|
-
|
|
1698
|
-
|
|
1699
|
-
headers=headers,
|
|
1700
|
-
timeout=self._api_timeout,
|
|
1701
|
-
) as response:
|
|
1702
|
-
response.raise_for_status()
|
|
1703
|
-
data = await response.json()
|
|
1704
|
-
|
|
1705
|
-
code = data.get("code")
|
|
1706
|
-
if code != 200:
|
|
1707
|
-
msg = extract_error_message(data)
|
|
1708
|
-
raise_for_code(
|
|
1709
|
-
f"Add whitelist IP failed: {msg}", code=code, payload=data
|
|
1710
|
-
)
|
|
1711
|
-
|
|
1712
|
-
return data.get("data", {})
|
|
1713
|
-
|
|
1714
|
-
except asyncio.TimeoutError as e:
|
|
1715
|
-
raise ThordataTimeoutError(
|
|
1716
|
-
f"Add whitelist timed out: {e}", original_error=e
|
|
1717
|
-
) from e
|
|
1718
|
-
except aiohttp.ClientError as e:
|
|
1719
|
-
raise ThordataNetworkError(
|
|
1720
|
-
f"Add whitelist failed: {e}", original_error=e
|
|
1721
|
-
) from e
|
|
787
|
+
response = await self._http.request(
|
|
788
|
+
"POST", f"{self._whitelist_url}/add-ip", data=payload, headers=headers
|
|
789
|
+
)
|
|
790
|
+
data = await response.json()
|
|
791
|
+
if data.get("code") != 200:
|
|
792
|
+
raise_for_code("Add whitelist failed", code=data.get("code"), payload=data)
|
|
793
|
+
return data.get("data", {})
|
|
1722
794
|
|
|
1723
795
|
async def delete_whitelist_ip(
|
|
1724
|
-
self,
|
|
1725
|
-
ip: str,
|
|
1726
|
-
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
796
|
+
self, ip: str, proxy_type: ProxyType | int = ProxyType.RESIDENTIAL
|
|
1727
797
|
) -> dict[str, Any]:
|
|
1728
|
-
"""Delete an IP from the whitelist.
|
|
1729
|
-
|
|
1730
|
-
Args:
|
|
1731
|
-
ip: The IP address to remove.
|
|
1732
|
-
proxy_type: Proxy product type.
|
|
1733
|
-
|
|
1734
|
-
Returns:
|
|
1735
|
-
API response data.
|
|
1736
|
-
"""
|
|
1737
798
|
self._require_public_credentials()
|
|
1738
|
-
|
|
1739
|
-
|
|
1740
|
-
|
|
1741
|
-
|
|
799
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
800
|
+
headers = build_public_api_headers(str(self.public_token), str(self.public_key))
|
|
801
|
+
payload = {"proxy_type": str(pt), "ip": ip}
|
|
802
|
+
response = await self._http.request(
|
|
803
|
+
"POST", f"{self._whitelist_url}/delete-ip", data=payload, headers=headers
|
|
1742
804
|
)
|
|
1743
|
-
|
|
1744
|
-
|
|
1745
|
-
|
|
1746
|
-
|
|
1747
|
-
)
|
|
1748
|
-
|
|
1749
|
-
}
|
|
1750
|
-
|
|
1751
|
-
try:
|
|
1752
|
-
async with session.post(
|
|
1753
|
-
f"{self._whitelist_url}/delete-ip",
|
|
1754
|
-
data=payload,
|
|
1755
|
-
headers=headers,
|
|
1756
|
-
timeout=self._api_timeout,
|
|
1757
|
-
) as response:
|
|
1758
|
-
response.raise_for_status()
|
|
1759
|
-
data = await response.json()
|
|
1760
|
-
|
|
1761
|
-
code = data.get("code")
|
|
1762
|
-
if code != 200:
|
|
1763
|
-
msg = extract_error_message(data)
|
|
1764
|
-
raise_for_code(
|
|
1765
|
-
f"Delete whitelist IP failed: {msg}", code=code, payload=data
|
|
1766
|
-
)
|
|
1767
|
-
|
|
1768
|
-
return data.get("data", {})
|
|
1769
|
-
|
|
1770
|
-
except asyncio.TimeoutError as e:
|
|
1771
|
-
raise ThordataTimeoutError(
|
|
1772
|
-
f"Delete whitelist timed out: {e}", original_error=e
|
|
1773
|
-
) from e
|
|
1774
|
-
except aiohttp.ClientError as e:
|
|
1775
|
-
raise ThordataNetworkError(
|
|
1776
|
-
f"Delete whitelist failed: {e}", original_error=e
|
|
1777
|
-
) from e
|
|
805
|
+
data = await response.json()
|
|
806
|
+
if data.get("code") != 200:
|
|
807
|
+
raise_for_code(
|
|
808
|
+
"Delete whitelist failed", code=data.get("code"), payload=data
|
|
809
|
+
)
|
|
810
|
+
return data.get("data", {})
|
|
1778
811
|
|
|
1779
812
|
async def list_whitelist_ips(
|
|
1780
|
-
self,
|
|
1781
|
-
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
813
|
+
self, proxy_type: ProxyType | int = ProxyType.RESIDENTIAL
|
|
1782
814
|
) -> list[str]:
|
|
1783
|
-
"""List all whitelisted IPs.
|
|
1784
|
-
|
|
1785
|
-
Args:
|
|
1786
|
-
proxy_type: Proxy product type.
|
|
1787
|
-
|
|
1788
|
-
Returns:
|
|
1789
|
-
List of IP address strings.
|
|
1790
|
-
"""
|
|
1791
815
|
self._require_public_credentials()
|
|
1792
|
-
|
|
1793
|
-
|
|
816
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1794
817
|
params = {
|
|
1795
|
-
|
|
1796
|
-
|
|
1797
|
-
|
|
1798
|
-
"key": self.public_key,
|
|
1799
|
-
"proxy_type": str(
|
|
1800
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1801
|
-
),
|
|
1802
|
-
}.items()
|
|
1803
|
-
if v is not None
|
|
818
|
+
"token": self.public_token,
|
|
819
|
+
"key": self.public_key,
|
|
820
|
+
"proxy_type": str(pt),
|
|
1804
821
|
}
|
|
1805
|
-
|
|
1806
|
-
|
|
1807
|
-
|
|
1808
|
-
|
|
1809
|
-
|
|
1810
|
-
|
|
1811
|
-
|
|
1812
|
-
|
|
1813
|
-
|
|
1814
|
-
|
|
1815
|
-
|
|
1816
|
-
|
|
1817
|
-
|
|
1818
|
-
|
|
1819
|
-
|
|
1820
|
-
|
|
1821
|
-
|
|
1822
|
-
|
|
1823
|
-
items = data.get("data", [])
|
|
1824
|
-
result = []
|
|
1825
|
-
for item in items:
|
|
1826
|
-
if isinstance(item, str):
|
|
1827
|
-
result.append(item)
|
|
1828
|
-
elif isinstance(item, dict) and "ip" in item:
|
|
1829
|
-
result.append(str(item["ip"]))
|
|
1830
|
-
else:
|
|
1831
|
-
result.append(str(item))
|
|
1832
|
-
return result
|
|
1833
|
-
|
|
1834
|
-
raise ThordataNetworkError(
|
|
1835
|
-
f"Unexpected whitelist response: {type(data).__name__}",
|
|
1836
|
-
original_error=None,
|
|
1837
|
-
)
|
|
1838
|
-
|
|
1839
|
-
except asyncio.TimeoutError as e:
|
|
1840
|
-
raise ThordataTimeoutError(
|
|
1841
|
-
f"List whitelist timed out: {e}", original_error=e
|
|
1842
|
-
) from e
|
|
1843
|
-
except aiohttp.ClientError as e:
|
|
1844
|
-
raise ThordataNetworkError(
|
|
1845
|
-
f"List whitelist failed: {e}", original_error=e
|
|
1846
|
-
) from e
|
|
822
|
+
response = await self._http.request(
|
|
823
|
+
"GET", f"{self._whitelist_url}/ip-list", params=params
|
|
824
|
+
)
|
|
825
|
+
data = await response.json()
|
|
826
|
+
if data.get("code") != 200:
|
|
827
|
+
raise_for_code("List whitelist failed", code=data.get("code"), payload=data)
|
|
828
|
+
|
|
829
|
+
items = data.get("data", []) or []
|
|
830
|
+
result = []
|
|
831
|
+
for item in items:
|
|
832
|
+
if isinstance(item, str):
|
|
833
|
+
result.append(item)
|
|
834
|
+
elif isinstance(item, dict) and "ip" in item:
|
|
835
|
+
result.append(str(item["ip"]))
|
|
836
|
+
else:
|
|
837
|
+
result.append(str(item))
|
|
838
|
+
return result
|
|
1847
839
|
|
|
1848
840
|
# =========================================================================
|
|
1849
841
|
# Locations & ASN Methods
|
|
1850
842
|
# =========================================================================
|
|
1851
843
|
|
|
1852
|
-
async def
|
|
1853
|
-
self,
|
|
844
|
+
async def _get_locations(
|
|
845
|
+
self, endpoint: str, **kwargs: Any
|
|
1854
846
|
) -> list[dict[str, Any]]:
|
|
1855
|
-
|
|
1856
|
-
|
|
1857
|
-
|
|
1858
|
-
|
|
847
|
+
self._require_public_credentials()
|
|
848
|
+
params = {"token": self.public_token, "key": self.public_key}
|
|
849
|
+
for k, v in kwargs.items():
|
|
850
|
+
params[k] = str(v)
|
|
1859
851
|
|
|
1860
|
-
|
|
1861
|
-
|
|
1862
|
-
"""
|
|
1863
|
-
return await self._get_locations(
|
|
1864
|
-
"countries",
|
|
1865
|
-
proxy_type=(
|
|
1866
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1867
|
-
),
|
|
852
|
+
response = await self._http.request(
|
|
853
|
+
"GET", f"{self._locations_base_url}/{endpoint}", params=params
|
|
1868
854
|
)
|
|
855
|
+
data = await response.json()
|
|
1869
856
|
|
|
1870
|
-
|
|
1871
|
-
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
|
|
1875
|
-
"""List supported states/provinces for a country.
|
|
857
|
+
if isinstance(data, dict):
|
|
858
|
+
if data.get("code") != 200:
|
|
859
|
+
raise RuntimeError(f"Locations error: {data.get('msg')}")
|
|
860
|
+
return data.get("data") or []
|
|
861
|
+
return data if isinstance(data, list) else []
|
|
1876
862
|
|
|
1877
|
-
|
|
1878
|
-
|
|
1879
|
-
|
|
863
|
+
async def list_countries(
|
|
864
|
+
self, proxy_type: ProxyType | int = ProxyType.RESIDENTIAL
|
|
865
|
+
) -> list[dict[str, Any]]:
|
|
866
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
867
|
+
return await self._get_locations("countries", proxy_type=pt)
|
|
1880
868
|
|
|
1881
|
-
|
|
1882
|
-
|
|
1883
|
-
|
|
869
|
+
async def list_states(
|
|
870
|
+
self, country_code: str, proxy_type: ProxyType | int = ProxyType.RESIDENTIAL
|
|
871
|
+
) -> list[dict[str, Any]]:
|
|
872
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1884
873
|
return await self._get_locations(
|
|
1885
|
-
"states",
|
|
1886
|
-
proxy_type=(
|
|
1887
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1888
|
-
),
|
|
1889
|
-
country_code=country_code,
|
|
874
|
+
"states", proxy_type=pt, country_code=country_code
|
|
1890
875
|
)
|
|
1891
876
|
|
|
1892
877
|
async def list_cities(
|
|
@@ -1895,366 +880,140 @@ class AsyncThordataClient:
|
|
|
1895
880
|
state_code: str | None = None,
|
|
1896
881
|
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
1897
882
|
) -> list[dict[str, Any]]:
|
|
1898
|
-
|
|
1899
|
-
|
|
1900
|
-
Args:
|
|
1901
|
-
country_code: Country code.
|
|
1902
|
-
state_code: State code (optional).
|
|
1903
|
-
proxy_type: Proxy product type.
|
|
1904
|
-
|
|
1905
|
-
Returns:
|
|
1906
|
-
List of city dictionaries.
|
|
1907
|
-
"""
|
|
1908
|
-
kwargs = {
|
|
1909
|
-
"proxy_type": (
|
|
1910
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1911
|
-
),
|
|
1912
|
-
"country_code": country_code,
|
|
1913
|
-
}
|
|
883
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
884
|
+
kwargs = {"proxy_type": pt, "country_code": country_code}
|
|
1914
885
|
if state_code:
|
|
1915
886
|
kwargs["state_code"] = state_code
|
|
1916
|
-
|
|
1917
887
|
return await self._get_locations("cities", **kwargs)
|
|
1918
888
|
|
|
1919
889
|
async def list_asn(
|
|
1920
|
-
self,
|
|
1921
|
-
country_code: str,
|
|
1922
|
-
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
890
|
+
self, country_code: str, proxy_type: ProxyType | int = ProxyType.RESIDENTIAL
|
|
1923
891
|
) -> list[dict[str, Any]]:
|
|
1924
|
-
|
|
1925
|
-
|
|
1926
|
-
Args:
|
|
1927
|
-
country_code: Country code.
|
|
1928
|
-
proxy_type: Proxy product type.
|
|
1929
|
-
|
|
1930
|
-
Returns:
|
|
1931
|
-
List of ASN dictionaries.
|
|
1932
|
-
"""
|
|
892
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1933
893
|
return await self._get_locations(
|
|
1934
|
-
"asn",
|
|
1935
|
-
proxy_type=(
|
|
1936
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1937
|
-
),
|
|
1938
|
-
country_code=country_code,
|
|
894
|
+
"asn", proxy_type=pt, country_code=country_code
|
|
1939
895
|
)
|
|
1940
896
|
|
|
1941
897
|
# =========================================================================
|
|
1942
898
|
# ISP & Datacenter Proxy Management
|
|
1943
899
|
# =========================================================================
|
|
1944
900
|
|
|
1945
|
-
async def list_proxy_servers(
|
|
1946
|
-
self,
|
|
1947
|
-
proxy_type: int,
|
|
1948
|
-
) -> list[ProxyServer]:
|
|
1949
|
-
"""List ISP or Datacenter proxy servers.
|
|
1950
|
-
|
|
1951
|
-
Args:
|
|
1952
|
-
proxy_type: Proxy type (1=ISP, 2=Datacenter).
|
|
1953
|
-
|
|
1954
|
-
Returns:
|
|
1955
|
-
List of ProxyServer objects.
|
|
1956
|
-
"""
|
|
901
|
+
async def list_proxy_servers(self, proxy_type: int) -> list[ProxyServer]:
|
|
1957
902
|
self._require_public_credentials()
|
|
1958
|
-
session = self._get_session()
|
|
1959
|
-
|
|
1960
903
|
params = {
|
|
1961
904
|
"token": self.public_token,
|
|
1962
905
|
"key": self.public_key,
|
|
1963
906
|
"proxy_type": str(proxy_type),
|
|
1964
907
|
}
|
|
908
|
+
response = await self._http.request("GET", self._proxy_list_url, params=params)
|
|
909
|
+
data = await response.json()
|
|
910
|
+
if data.get("code") != 200:
|
|
911
|
+
raise_for_code(
|
|
912
|
+
"List proxy servers error", code=data.get("code"), payload=data
|
|
913
|
+
)
|
|
1965
914
|
|
|
1966
|
-
|
|
1967
|
-
|
|
1968
|
-
|
|
1969
|
-
|
|
1970
|
-
|
|
1971
|
-
|
|
1972
|
-
timeout=self._api_timeout,
|
|
1973
|
-
) as response:
|
|
1974
|
-
response.raise_for_status()
|
|
1975
|
-
data = await response.json()
|
|
1976
|
-
|
|
1977
|
-
if isinstance(data, dict):
|
|
1978
|
-
code = data.get("code")
|
|
1979
|
-
if code is not None and code != 200:
|
|
1980
|
-
msg = extract_error_message(data)
|
|
1981
|
-
raise_for_code(
|
|
1982
|
-
f"List proxy servers error: {msg}", code=code, payload=data
|
|
1983
|
-
)
|
|
1984
|
-
|
|
1985
|
-
server_list = data.get("data", data.get("list", []))
|
|
1986
|
-
elif isinstance(data, list):
|
|
1987
|
-
server_list = data
|
|
1988
|
-
else:
|
|
1989
|
-
raise ThordataNetworkError(
|
|
1990
|
-
f"Unexpected proxy list response: {type(data).__name__}",
|
|
1991
|
-
original_error=None,
|
|
1992
|
-
)
|
|
1993
|
-
|
|
1994
|
-
return [ProxyServer.from_dict(s) for s in server_list]
|
|
1995
|
-
|
|
1996
|
-
except asyncio.TimeoutError as e:
|
|
1997
|
-
raise ThordataTimeoutError(
|
|
1998
|
-
f"List servers timed out: {e}", original_error=e
|
|
1999
|
-
) from e
|
|
2000
|
-
except aiohttp.ClientError as e:
|
|
2001
|
-
raise ThordataNetworkError(
|
|
2002
|
-
f"List servers failed: {e}", original_error=e
|
|
2003
|
-
) from e
|
|
915
|
+
server_list = []
|
|
916
|
+
if isinstance(data, dict):
|
|
917
|
+
server_list = data.get("data", data.get("list", []))
|
|
918
|
+
elif isinstance(data, list):
|
|
919
|
+
server_list = data
|
|
920
|
+
return [ProxyServer.from_dict(s) for s in server_list]
|
|
2004
921
|
|
|
2005
922
|
async def get_proxy_expiration(
|
|
2006
|
-
self,
|
|
2007
|
-
ips: str | list[str],
|
|
2008
|
-
proxy_type: int,
|
|
923
|
+
self, ips: str | list[str], proxy_type: int
|
|
2009
924
|
) -> dict[str, Any]:
|
|
2010
|
-
"""Get expiration time for specific proxy IPs.
|
|
2011
|
-
|
|
2012
|
-
Args:
|
|
2013
|
-
ips: Single IP or comma-separated list of IPs.
|
|
2014
|
-
proxy_type: Proxy type (1=ISP, 2=Datacenter).
|
|
2015
|
-
|
|
2016
|
-
Returns:
|
|
2017
|
-
Dictionary with IP expiration times.
|
|
2018
|
-
"""
|
|
2019
925
|
self._require_public_credentials()
|
|
2020
|
-
session = self._get_session()
|
|
2021
|
-
|
|
2022
926
|
if isinstance(ips, list):
|
|
2023
927
|
ips = ",".join(ips)
|
|
2024
|
-
|
|
2025
928
|
params = {
|
|
2026
929
|
"token": self.public_token,
|
|
2027
930
|
"key": self.public_key,
|
|
2028
931
|
"proxy_type": str(proxy_type),
|
|
2029
932
|
"ips": ips,
|
|
2030
933
|
}
|
|
934
|
+
response = await self._http.request(
|
|
935
|
+
"GET", self._proxy_expiration_url, params=params
|
|
936
|
+
)
|
|
937
|
+
data = await response.json()
|
|
938
|
+
if data.get("code") != 200:
|
|
939
|
+
raise_for_code("Get expiration error", code=data.get("code"), payload=data)
|
|
940
|
+
return data.get("data", data)
|
|
2031
941
|
|
|
2032
|
-
|
|
2033
|
-
|
|
2034
|
-
|
|
2035
|
-
|
|
2036
|
-
|
|
2037
|
-
|
|
2038
|
-
|
|
2039
|
-
|
|
2040
|
-
|
|
2041
|
-
|
|
2042
|
-
|
|
2043
|
-
|
|
2044
|
-
|
|
2045
|
-
|
|
2046
|
-
|
|
2047
|
-
|
|
2048
|
-
|
|
2049
|
-
|
|
2050
|
-
|
|
2051
|
-
|
|
2052
|
-
|
|
2053
|
-
|
|
2054
|
-
|
|
2055
|
-
|
|
2056
|
-
|
|
2057
|
-
|
|
2058
|
-
|
|
2059
|
-
|
|
2060
|
-
|
|
2061
|
-
|
|
2062
|
-
|
|
2063
|
-
|
|
2064
|
-
async def get_isp_regions(self) -> list[dict[str, Any]]:
|
|
2065
|
-
"""Get available ISP proxy regions.
|
|
2066
|
-
|
|
2067
|
-
Uses public_token/public_key via gateway API.
|
|
2068
|
-
|
|
2069
|
-
Returns:
|
|
2070
|
-
List of ISP region dictionaries.
|
|
2071
|
-
"""
|
|
2072
|
-
session = self._get_session()
|
|
2073
|
-
headers = self._build_gateway_headers()
|
|
2074
|
-
|
|
2075
|
-
logger.info("Async getting ISP regions")
|
|
2076
|
-
|
|
2077
|
-
try:
|
|
2078
|
-
async with session.post(
|
|
2079
|
-
f"{self._gateway_base_url}/getRegionIsp",
|
|
2080
|
-
headers=headers,
|
|
2081
|
-
data={},
|
|
2082
|
-
timeout=self._api_timeout,
|
|
2083
|
-
) as response:
|
|
2084
|
-
response.raise_for_status()
|
|
2085
|
-
data = await response.json()
|
|
2086
|
-
|
|
2087
|
-
code = data.get("code")
|
|
2088
|
-
if code != 200:
|
|
2089
|
-
msg = extract_error_message(data)
|
|
2090
|
-
raise_for_code(
|
|
2091
|
-
f"Get ISP regions failed: {msg}", code=code, payload=data
|
|
2092
|
-
)
|
|
2093
|
-
|
|
2094
|
-
return data.get("data", [])
|
|
2095
|
-
|
|
2096
|
-
except asyncio.TimeoutError as e:
|
|
2097
|
-
raise ThordataTimeoutError(
|
|
2098
|
-
f"Get ISP regions timed out: {e}", original_error=e
|
|
2099
|
-
) from e
|
|
2100
|
-
except aiohttp.ClientError as e:
|
|
2101
|
-
raise ThordataNetworkError(
|
|
2102
|
-
f"Get ISP regions failed: {e}", original_error=e
|
|
2103
|
-
) from e
|
|
2104
|
-
|
|
2105
|
-
async def list_isp_proxies(self) -> list[dict[str, Any]]:
|
|
2106
|
-
"""List ISP proxies.
|
|
2107
|
-
|
|
2108
|
-
Uses public_token/public_key via gateway API.
|
|
2109
|
-
|
|
2110
|
-
Returns:
|
|
2111
|
-
List of ISP proxy dictionaries.
|
|
2112
|
-
"""
|
|
2113
|
-
session = self._get_session()
|
|
2114
|
-
headers = self._build_gateway_headers()
|
|
942
|
+
async def extract_ip_list(
|
|
943
|
+
self,
|
|
944
|
+
num: int = 1,
|
|
945
|
+
country: str | None = None,
|
|
946
|
+
state: str | None = None,
|
|
947
|
+
city: str | None = None,
|
|
948
|
+
time_limit: int | None = None,
|
|
949
|
+
port: int | None = None,
|
|
950
|
+
return_type: str = "txt",
|
|
951
|
+
protocol: str = "http",
|
|
952
|
+
sep: str = "\r\n",
|
|
953
|
+
product: str = "residential",
|
|
954
|
+
) -> list[str]:
|
|
955
|
+
base_url = "https://get-ip.thordata.net"
|
|
956
|
+
endpoint = "/unlimited_api" if product == "unlimited" else "/api"
|
|
957
|
+
params: dict[str, Any] = {
|
|
958
|
+
"num": str(num),
|
|
959
|
+
"return_type": return_type,
|
|
960
|
+
"protocol": protocol,
|
|
961
|
+
"sep": sep,
|
|
962
|
+
}
|
|
963
|
+
if country:
|
|
964
|
+
params["country"] = country
|
|
965
|
+
if state:
|
|
966
|
+
params["state"] = state
|
|
967
|
+
if city:
|
|
968
|
+
params["city"] = city
|
|
969
|
+
if time_limit:
|
|
970
|
+
params["time"] = str(time_limit)
|
|
971
|
+
if port:
|
|
972
|
+
params["port"] = str(port)
|
|
2115
973
|
|
|
2116
|
-
|
|
974
|
+
username = os.getenv("THORDATA_RESIDENTIAL_USERNAME")
|
|
975
|
+
if username:
|
|
976
|
+
params["td-customer"] = username
|
|
2117
977
|
|
|
2118
|
-
|
|
2119
|
-
|
|
2120
|
-
|
|
2121
|
-
headers=headers,
|
|
2122
|
-
data={},
|
|
2123
|
-
timeout=self._api_timeout,
|
|
2124
|
-
) as response:
|
|
2125
|
-
response.raise_for_status()
|
|
2126
|
-
data = await response.json()
|
|
978
|
+
response = await self._http.request(
|
|
979
|
+
"GET", f"{base_url}{endpoint}", params=params
|
|
980
|
+
)
|
|
2127
981
|
|
|
2128
|
-
|
|
2129
|
-
|
|
2130
|
-
|
|
982
|
+
if return_type == "json":
|
|
983
|
+
data = await response.json()
|
|
984
|
+
if isinstance(data, dict):
|
|
985
|
+
if data.get("code") in (0, 200):
|
|
986
|
+
raw_list = data.get("data") or []
|
|
987
|
+
return [f"{item['ip']}:{item['port']}" for item in raw_list]
|
|
988
|
+
else:
|
|
2131
989
|
raise_for_code(
|
|
2132
|
-
|
|
990
|
+
"Extract IPs failed", code=data.get("code"), payload=data
|
|
2133
991
|
)
|
|
2134
|
-
|
|
2135
|
-
|
|
2136
|
-
|
|
2137
|
-
|
|
2138
|
-
|
|
2139
|
-
|
|
2140
|
-
|
|
2141
|
-
except aiohttp.ClientError as e:
|
|
2142
|
-
raise ThordataNetworkError(
|
|
2143
|
-
f"List ISP proxies failed: {e}", original_error=e
|
|
2144
|
-
) from e
|
|
2145
|
-
|
|
2146
|
-
async def get_isp_wallet_balance(self) -> dict[str, Any]:
|
|
2147
|
-
"""Get wallet balance for ISP proxies.
|
|
2148
|
-
|
|
2149
|
-
Uses public_token/public_key via gateway API.
|
|
2150
|
-
|
|
2151
|
-
Returns:
|
|
2152
|
-
Wallet balance data dictionary.
|
|
2153
|
-
"""
|
|
2154
|
-
session = self._get_session()
|
|
2155
|
-
headers = self._build_gateway_headers()
|
|
2156
|
-
|
|
2157
|
-
logger.info("Async getting wallet balance")
|
|
2158
|
-
|
|
2159
|
-
try:
|
|
2160
|
-
async with session.post(
|
|
2161
|
-
f"{self._gateway_base_url}/getBalance",
|
|
2162
|
-
headers=headers,
|
|
2163
|
-
data={},
|
|
2164
|
-
timeout=self._api_timeout,
|
|
2165
|
-
) as response:
|
|
2166
|
-
response.raise_for_status()
|
|
2167
|
-
data = await response.json()
|
|
2168
|
-
|
|
2169
|
-
code = data.get("code")
|
|
2170
|
-
if code != 200:
|
|
2171
|
-
msg = extract_error_message(data)
|
|
992
|
+
return []
|
|
993
|
+
else:
|
|
994
|
+
text = await response.text()
|
|
995
|
+
text = text.strip()
|
|
996
|
+
if text.startswith("{") and "code" in text:
|
|
997
|
+
try:
|
|
998
|
+
err_data = await response.json()
|
|
2172
999
|
raise_for_code(
|
|
2173
|
-
|
|
1000
|
+
"Extract IPs failed",
|
|
1001
|
+
code=err_data.get("code"),
|
|
1002
|
+
payload=err_data,
|
|
2174
1003
|
)
|
|
2175
|
-
|
|
2176
|
-
|
|
2177
|
-
|
|
2178
|
-
|
|
2179
|
-
raise ThordataTimeoutError(
|
|
2180
|
-
f"Get wallet balance timed out: {e}", original_error=e
|
|
2181
|
-
) from e
|
|
2182
|
-
except aiohttp.ClientError as e:
|
|
2183
|
-
raise ThordataNetworkError(
|
|
2184
|
-
f"Get wallet balance failed: {e}", original_error=e
|
|
2185
|
-
) from e
|
|
1004
|
+
except ValueError:
|
|
1005
|
+
pass
|
|
1006
|
+
actual_sep = sep.replace("\\r", "\r").replace("\\n", "\n")
|
|
1007
|
+
return [line.strip() for line in text.split(actual_sep) if line.strip()]
|
|
2186
1008
|
|
|
2187
1009
|
# =========================================================================
|
|
2188
|
-
#
|
|
1010
|
+
# Helpers
|
|
2189
1011
|
# =========================================================================
|
|
2190
1012
|
|
|
2191
|
-
def _require_public_credentials(self) -> None:
|
|
2192
|
-
"""Ensure public API credentials are available."""
|
|
2193
|
-
if not self.public_token or not self.public_key:
|
|
2194
|
-
raise ThordataConfigError(
|
|
2195
|
-
"public_token and public_key are required for this operation. "
|
|
2196
|
-
"Please provide them when initializing AsyncThordataClient."
|
|
2197
|
-
)
|
|
2198
|
-
|
|
2199
|
-
def _build_gateway_headers(self) -> dict[str, str]:
|
|
2200
|
-
"""Headers for gateway-style endpoints."""
|
|
2201
|
-
self._require_public_credentials()
|
|
2202
|
-
return build_public_api_headers(self.public_token or "", self.public_key or "")
|
|
2203
|
-
|
|
2204
|
-
async def _get_locations(
|
|
2205
|
-
self, endpoint: str, **kwargs: Any
|
|
2206
|
-
) -> list[dict[str, Any]]:
|
|
2207
|
-
"""Internal async locations API call.
|
|
2208
|
-
|
|
2209
|
-
Args:
|
|
2210
|
-
endpoint: Location endpoint (countries, states, cities, asn).
|
|
2211
|
-
**kwargs: Query parameters.
|
|
2212
|
-
|
|
2213
|
-
Returns:
|
|
2214
|
-
List of location dictionaries.
|
|
2215
|
-
"""
|
|
2216
|
-
self._require_public_credentials()
|
|
2217
|
-
|
|
2218
|
-
params = {
|
|
2219
|
-
"token": self.public_token or "",
|
|
2220
|
-
"key": self.public_key or "",
|
|
2221
|
-
}
|
|
2222
|
-
|
|
2223
|
-
for key, value in kwargs.items():
|
|
2224
|
-
params[key] = str(value)
|
|
2225
|
-
|
|
2226
|
-
url = f"{self._locations_base_url}/{endpoint}"
|
|
2227
|
-
|
|
2228
|
-
logger.debug(f"Async Locations API: {url}")
|
|
2229
|
-
|
|
2230
|
-
# Create temporary session for this request (no proxy needed)
|
|
2231
|
-
async with (
|
|
2232
|
-
aiohttp.ClientSession(trust_env=True) as temp_session,
|
|
2233
|
-
temp_session.get(url, params=params) as response,
|
|
2234
|
-
):
|
|
2235
|
-
response.raise_for_status()
|
|
2236
|
-
data = await response.json()
|
|
2237
|
-
|
|
2238
|
-
if isinstance(data, dict):
|
|
2239
|
-
code = data.get("code")
|
|
2240
|
-
if code is not None and code != 200:
|
|
2241
|
-
msg = data.get("msg", "")
|
|
2242
|
-
raise RuntimeError(
|
|
2243
|
-
f"Locations API error ({endpoint}): code={code}, msg={msg}"
|
|
2244
|
-
)
|
|
2245
|
-
return data.get("data") or []
|
|
2246
|
-
|
|
2247
|
-
if isinstance(data, list):
|
|
2248
|
-
return data
|
|
2249
|
-
|
|
2250
|
-
return []
|
|
2251
|
-
|
|
2252
1013
|
def _get_proxy_endpoint_overrides(
|
|
2253
1014
|
self, product: ProxyProduct
|
|
2254
1015
|
) -> tuple[str | None, int | None, str]:
|
|
2255
|
-
"""Get proxy endpoint overrides from environment variables."""
|
|
2256
1016
|
prefix = product.value.upper()
|
|
2257
|
-
|
|
2258
1017
|
host = os.getenv(f"THORDATA_{prefix}_PROXY_HOST") or os.getenv(
|
|
2259
1018
|
"THORDATA_PROXY_HOST"
|
|
2260
1019
|
)
|
|
@@ -2266,97 +1025,40 @@ class AsyncThordataClient:
|
|
|
2266
1025
|
or os.getenv("THORDATA_PROXY_PROTOCOL")
|
|
2267
1026
|
or "http"
|
|
2268
1027
|
)
|
|
2269
|
-
|
|
2270
|
-
port: int | None = None
|
|
2271
|
-
if port_raw:
|
|
2272
|
-
try:
|
|
2273
|
-
port = int(port_raw)
|
|
2274
|
-
except ValueError:
|
|
2275
|
-
port = None
|
|
2276
|
-
|
|
1028
|
+
port = int(port_raw) if port_raw and port_raw.isdigit() else None
|
|
2277
1029
|
return host or None, port, protocol
|
|
2278
1030
|
|
|
2279
1031
|
def _get_default_proxy_config_from_env(self) -> ProxyConfig | None:
|
|
2280
|
-
|
|
2281
|
-
|
|
2282
|
-
|
|
2283
|
-
|
|
2284
|
-
|
|
2285
|
-
|
|
2286
|
-
|
|
2287
|
-
)
|
|
2288
|
-
|
|
2289
|
-
|
|
2290
|
-
|
|
2291
|
-
|
|
2292
|
-
|
|
2293
|
-
|
|
2294
|
-
|
|
2295
|
-
|
|
2296
|
-
|
|
2297
|
-
|
|
2298
|
-
u = os.getenv("THORDATA_DATACENTER_USERNAME")
|
|
2299
|
-
p = os.getenv("THORDATA_DATACENTER_PASSWORD")
|
|
2300
|
-
if u and p:
|
|
2301
|
-
host, port, protocol = self._get_proxy_endpoint_overrides(
|
|
2302
|
-
ProxyProduct.DATACENTER
|
|
2303
|
-
)
|
|
2304
|
-
return ProxyConfig(
|
|
2305
|
-
username=u,
|
|
2306
|
-
password=p,
|
|
2307
|
-
product=ProxyProduct.DATACENTER,
|
|
2308
|
-
host=host,
|
|
2309
|
-
port=port,
|
|
2310
|
-
protocol=protocol,
|
|
2311
|
-
)
|
|
2312
|
-
|
|
2313
|
-
# Check MOBILE
|
|
2314
|
-
u = os.getenv("THORDATA_MOBILE_USERNAME")
|
|
2315
|
-
p = os.getenv("THORDATA_MOBILE_PASSWORD")
|
|
2316
|
-
if u and p:
|
|
2317
|
-
host, port, protocol = self._get_proxy_endpoint_overrides(
|
|
2318
|
-
ProxyProduct.MOBILE
|
|
2319
|
-
)
|
|
2320
|
-
return ProxyConfig(
|
|
2321
|
-
username=u,
|
|
2322
|
-
password=p,
|
|
2323
|
-
product=ProxyProduct.MOBILE,
|
|
2324
|
-
host=host,
|
|
2325
|
-
port=port,
|
|
2326
|
-
protocol=protocol,
|
|
2327
|
-
)
|
|
2328
|
-
|
|
1032
|
+
for prod in [
|
|
1033
|
+
ProxyProduct.RESIDENTIAL,
|
|
1034
|
+
ProxyProduct.DATACENTER,
|
|
1035
|
+
ProxyProduct.MOBILE,
|
|
1036
|
+
]:
|
|
1037
|
+
prefix = prod.value.upper()
|
|
1038
|
+
u = os.getenv(f"THORDATA_{prefix}_USERNAME")
|
|
1039
|
+
p = os.getenv(f"THORDATA_{prefix}_PASSWORD")
|
|
1040
|
+
if u and p:
|
|
1041
|
+
h, port, proto = self._get_proxy_endpoint_overrides(prod)
|
|
1042
|
+
return ProxyConfig(
|
|
1043
|
+
username=u,
|
|
1044
|
+
password=p,
|
|
1045
|
+
product=prod,
|
|
1046
|
+
host=h,
|
|
1047
|
+
port=port,
|
|
1048
|
+
protocol=proto,
|
|
1049
|
+
)
|
|
2329
1050
|
return None
|
|
2330
1051
|
|
|
2331
1052
|
def get_browser_connection_url(
|
|
2332
1053
|
self, username: str | None = None, password: str | None = None
|
|
2333
1054
|
) -> str:
|
|
2334
|
-
""
|
|
2335
|
-
|
|
2336
|
-
|
|
2337
|
-
Note: This method is synchronous as it only does string formatting.
|
|
2338
|
-
"""
|
|
2339
|
-
user = (
|
|
2340
|
-
username
|
|
2341
|
-
or os.getenv("THORDATA_BROWSER_USERNAME")
|
|
2342
|
-
or os.getenv("THORDATA_RESIDENTIAL_USERNAME")
|
|
2343
|
-
)
|
|
2344
|
-
pwd = (
|
|
2345
|
-
password
|
|
2346
|
-
or os.getenv("THORDATA_BROWSER_PASSWORD")
|
|
2347
|
-
or os.getenv("THORDATA_RESIDENTIAL_PASSWORD")
|
|
2348
|
-
)
|
|
2349
|
-
|
|
1055
|
+
user = username or os.getenv("THORDATA_BROWSER_USERNAME")
|
|
1056
|
+
pwd = password or os.getenv("THORDATA_BROWSER_PASSWORD")
|
|
2350
1057
|
if not user or not pwd:
|
|
2351
|
-
raise ThordataConfigError(
|
|
2352
|
-
"Browser credentials missing. Set THORDATA_BROWSER_USERNAME/PASSWORD or pass arguments."
|
|
2353
|
-
)
|
|
2354
|
-
|
|
1058
|
+
raise ThordataConfigError("Browser credentials missing.")
|
|
2355
1059
|
prefix = "td-customer-"
|
|
2356
|
-
# Fixed SIM108 (ternary operator)
|
|
2357
1060
|
final_user = f"{prefix}{user}" if not user.startswith(prefix) else user
|
|
2358
1061
|
|
|
2359
1062
|
safe_user = quote(final_user, safe="")
|
|
2360
1063
|
safe_pass = quote(pwd, safe="")
|
|
2361
|
-
|
|
2362
1064
|
return f"wss://{safe_user}:{safe_pass}@ws-browser.thordata.com"
|