thordata-sdk 1.3.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thordata/__init__.py +4 -40
- thordata/async_client.py +572 -1241
- thordata/async_unlimited.py +130 -0
- thordata/client.py +1184 -1309
- thordata/core/__init__.py +23 -0
- thordata/core/async_http_client.py +91 -0
- thordata/core/http_client.py +79 -0
- thordata/core/tunnel.py +287 -0
- thordata/demo.py +2 -2
- thordata/enums.py +41 -380
- thordata/models.py +37 -1193
- thordata/tools/__init__.py +28 -0
- thordata/tools/base.py +42 -0
- thordata/tools/code.py +26 -0
- thordata/tools/ecommerce.py +67 -0
- thordata/tools/search.py +73 -0
- thordata/tools/social.py +190 -0
- thordata/tools/video.py +81 -0
- thordata/types/__init__.py +77 -0
- thordata/types/common.py +141 -0
- thordata/types/proxy.py +340 -0
- thordata/types/serp.py +224 -0
- thordata/types/task.py +144 -0
- thordata/types/universal.py +66 -0
- thordata/unlimited.py +169 -0
- {thordata_sdk-1.3.0.dist-info → thordata_sdk-1.5.0.dist-info}/METADATA +74 -51
- thordata_sdk-1.5.0.dist-info/RECORD +35 -0
- {thordata_sdk-1.3.0.dist-info → thordata_sdk-1.5.0.dist-info}/WHEEL +1 -1
- thordata_sdk-1.3.0.dist-info/RECORD +0 -16
- {thordata_sdk-1.3.0.dist-info → thordata_sdk-1.5.0.dist-info}/licenses/LICENSE +0 -0
- {thordata_sdk-1.3.0.dist-info → thordata_sdk-1.5.0.dist-info}/top_level.txt +0 -0
thordata/async_client.py
CHANGED
|
@@ -3,21 +3,6 @@ Asynchronous client for the Thordata API.
|
|
|
3
3
|
|
|
4
4
|
This module provides the AsyncThordataClient for high-concurrency workloads,
|
|
5
5
|
built on aiohttp.
|
|
6
|
-
|
|
7
|
-
Example:
|
|
8
|
-
>>> import asyncio
|
|
9
|
-
>>> from thordata import AsyncThordataClient
|
|
10
|
-
>>>
|
|
11
|
-
>>> async def main():
|
|
12
|
-
... async with AsyncThordataClient(
|
|
13
|
-
... scraper_token="your_token",
|
|
14
|
-
... public_token="your_public_token",
|
|
15
|
-
... public_key="your_public_key"
|
|
16
|
-
... ) as client:
|
|
17
|
-
... response = await client.get("https://httpbin.org/ip")
|
|
18
|
-
... print(await response.json())
|
|
19
|
-
>>>
|
|
20
|
-
>>> asyncio.run(main())
|
|
21
6
|
"""
|
|
22
7
|
|
|
23
8
|
from __future__ import annotations
|
|
@@ -27,31 +12,40 @@ import logging
|
|
|
27
12
|
import os
|
|
28
13
|
from datetime import date
|
|
29
14
|
from typing import Any
|
|
15
|
+
from urllib.parse import quote
|
|
30
16
|
|
|
31
17
|
import aiohttp
|
|
32
18
|
|
|
33
|
-
|
|
19
|
+
# Import Legacy/Compat
|
|
34
20
|
from ._utils import (
|
|
35
21
|
build_auth_headers,
|
|
36
22
|
build_builder_headers,
|
|
37
23
|
build_public_api_headers,
|
|
38
|
-
build_user_agent,
|
|
39
24
|
decode_base64_image,
|
|
40
25
|
extract_error_message,
|
|
41
26
|
parse_json_response,
|
|
42
27
|
)
|
|
43
|
-
from .
|
|
28
|
+
from .async_unlimited import AsyncUnlimitedNamespace
|
|
29
|
+
|
|
30
|
+
# Import Core
|
|
31
|
+
from .core.async_http_client import AsyncThordataHttpSession
|
|
32
|
+
from .enums import Engine
|
|
44
33
|
from .exceptions import (
|
|
45
34
|
ThordataConfigError,
|
|
46
35
|
ThordataNetworkError,
|
|
47
36
|
ThordataTimeoutError,
|
|
48
37
|
raise_for_code,
|
|
49
38
|
)
|
|
50
|
-
from .
|
|
39
|
+
from .retry import RetryConfig
|
|
40
|
+
from .serp_engines import AsyncSerpNamespace
|
|
41
|
+
|
|
42
|
+
# Import Types
|
|
43
|
+
from .types import (
|
|
51
44
|
CommonSettings,
|
|
52
45
|
ProxyConfig,
|
|
53
46
|
ProxyProduct,
|
|
54
47
|
ProxyServer,
|
|
48
|
+
ProxyType,
|
|
55
49
|
ProxyUserList,
|
|
56
50
|
ScraperTaskConfig,
|
|
57
51
|
SerpRequest,
|
|
@@ -59,40 +53,14 @@ from .models import (
|
|
|
59
53
|
UsageStatistics,
|
|
60
54
|
VideoTaskConfig,
|
|
61
55
|
)
|
|
62
|
-
from .retry import RetryConfig
|
|
63
|
-
from .serp_engines import AsyncSerpNamespace
|
|
64
56
|
|
|
65
57
|
logger = logging.getLogger(__name__)
|
|
66
58
|
|
|
67
59
|
|
|
68
60
|
class AsyncThordataClient:
|
|
69
|
-
"""
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
Designed for high-concurrency AI agents and data pipelines.
|
|
73
|
-
|
|
74
|
-
Args:
|
|
75
|
-
scraper_token: The API token from your Dashboard.
|
|
76
|
-
public_token: The public API token.
|
|
77
|
-
public_key: The public API key.
|
|
78
|
-
proxy_host: Custom proxy gateway host.
|
|
79
|
-
proxy_port: Custom proxy gateway port.
|
|
80
|
-
timeout: Default request timeout in seconds.
|
|
81
|
-
retry_config: Configuration for automatic retries.
|
|
82
|
-
|
|
83
|
-
Example:
|
|
84
|
-
>>> async with AsyncThordataClient(
|
|
85
|
-
... scraper_token="token",
|
|
86
|
-
... public_token="pub_token",
|
|
87
|
-
... public_key="pub_key"
|
|
88
|
-
... ) as client:
|
|
89
|
-
... # Old style
|
|
90
|
-
... results = await client.serp_search("python")
|
|
91
|
-
... # New style (Namespaced)
|
|
92
|
-
... maps_results = await client.serp.google.maps("coffee", "@40.7,-74.0,14z")
|
|
93
|
-
"""
|
|
94
|
-
|
|
95
|
-
# API Endpoints (same as sync client)
|
|
61
|
+
"""The official asynchronous Python client for Thordata."""
|
|
62
|
+
|
|
63
|
+
# API Endpoints
|
|
96
64
|
BASE_URL = "https://scraperapi.thordata.com"
|
|
97
65
|
UNIVERSAL_URL = "https://universalapi.thordata.com"
|
|
98
66
|
API_URL = "https://openapi.thordata.com/api/web-scraper-api"
|
|
@@ -100,7 +68,7 @@ class AsyncThordataClient:
|
|
|
100
68
|
|
|
101
69
|
def __init__(
|
|
102
70
|
self,
|
|
103
|
-
scraper_token: str | None = None,
|
|
71
|
+
scraper_token: str | None = None,
|
|
104
72
|
public_token: str | None = None,
|
|
105
73
|
public_key: str | None = None,
|
|
106
74
|
proxy_host: str = "pr.thordata.net",
|
|
@@ -114,82 +82,67 @@ class AsyncThordataClient:
|
|
|
114
82
|
web_scraper_api_base_url: str | None = None,
|
|
115
83
|
locations_base_url: str | None = None,
|
|
116
84
|
) -> None:
|
|
117
|
-
"""Initialize the Async Thordata Client."""
|
|
118
|
-
|
|
119
85
|
self.scraper_token = scraper_token
|
|
120
86
|
self.public_token = public_token
|
|
121
87
|
self.public_key = public_key
|
|
122
88
|
|
|
123
|
-
# Proxy configuration
|
|
124
89
|
self._proxy_host = proxy_host
|
|
125
90
|
self._proxy_port = proxy_port
|
|
126
|
-
|
|
127
|
-
# Timeout configuration
|
|
128
|
-
self._default_timeout = aiohttp.ClientTimeout(total=timeout)
|
|
129
|
-
self._api_timeout = aiohttp.ClientTimeout(total=api_timeout)
|
|
130
|
-
|
|
131
|
-
# Retry configuration
|
|
132
91
|
self._retry_config = retry_config or RetryConfig()
|
|
133
92
|
|
|
134
|
-
|
|
93
|
+
self._api_timeout = api_timeout
|
|
94
|
+
|
|
135
95
|
self._auth_mode = auth_mode.lower()
|
|
136
96
|
if self._auth_mode not in ("bearer", "header_token"):
|
|
137
|
-
raise ThordataConfigError(
|
|
138
|
-
f"Invalid auth_mode: {auth_mode}. Must be 'bearer' or 'header_token'."
|
|
139
|
-
)
|
|
97
|
+
raise ThordataConfigError(f"Invalid auth_mode: {auth_mode}")
|
|
140
98
|
|
|
141
|
-
#
|
|
99
|
+
# Core Async HTTP Client
|
|
100
|
+
self._http = AsyncThordataHttpSession(
|
|
101
|
+
timeout=api_timeout, retry_config=self._retry_config
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# Base URLs Configuration
|
|
142
105
|
scraperapi_base = (
|
|
143
106
|
scraperapi_base_url
|
|
144
107
|
or os.getenv("THORDATA_SCRAPERAPI_BASE_URL")
|
|
145
108
|
or self.BASE_URL
|
|
146
109
|
).rstrip("/")
|
|
147
|
-
|
|
148
110
|
universalapi_base = (
|
|
149
111
|
universalapi_base_url
|
|
150
112
|
or os.getenv("THORDATA_UNIVERSALAPI_BASE_URL")
|
|
151
113
|
or self.UNIVERSAL_URL
|
|
152
114
|
).rstrip("/")
|
|
153
|
-
|
|
154
115
|
web_scraper_api_base = (
|
|
155
116
|
web_scraper_api_base_url
|
|
156
117
|
or os.getenv("THORDATA_WEB_SCRAPER_API_BASE_URL")
|
|
157
118
|
or self.API_URL
|
|
158
119
|
).rstrip("/")
|
|
159
|
-
|
|
160
120
|
locations_base = (
|
|
161
121
|
locations_base_url
|
|
162
122
|
or os.getenv("THORDATA_LOCATIONS_BASE_URL")
|
|
163
123
|
or self.LOCATIONS_URL
|
|
164
124
|
).rstrip("/")
|
|
165
125
|
|
|
166
|
-
|
|
167
|
-
gateway_base = os.getenv(
|
|
126
|
+
self._gateway_base_url = os.getenv(
|
|
168
127
|
"THORDATA_GATEWAY_BASE_URL", "https://api.thordata.com/api/gateway"
|
|
169
128
|
)
|
|
170
|
-
|
|
129
|
+
self._child_base_url = os.getenv(
|
|
171
130
|
"THORDATA_CHILD_BASE_URL", "https://api.thordata.com/api/child"
|
|
172
131
|
)
|
|
173
132
|
|
|
174
|
-
|
|
175
|
-
self._child_base_url = child_base
|
|
176
|
-
|
|
133
|
+
# URL Construction
|
|
177
134
|
self._serp_url = f"{scraperapi_base}/request"
|
|
178
135
|
self._builder_url = f"{scraperapi_base}/builder"
|
|
179
136
|
self._video_builder_url = f"{scraperapi_base}/video_builder"
|
|
180
137
|
self._universal_url = f"{universalapi_base}/request"
|
|
181
|
-
|
|
182
138
|
self._status_url = f"{web_scraper_api_base}/tasks-status"
|
|
183
139
|
self._download_url = f"{web_scraper_api_base}/tasks-download"
|
|
184
140
|
self._list_url = f"{web_scraper_api_base}/tasks-list"
|
|
185
|
-
|
|
186
141
|
self._locations_base_url = locations_base
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
self._proxy_users_url =
|
|
191
|
-
f"{locations_base.replace('/locations', '')}/proxy-users"
|
|
192
|
-
)
|
|
142
|
+
|
|
143
|
+
shared_api_base = locations_base.replace("/locations", "")
|
|
144
|
+
self._usage_stats_url = f"{shared_api_base}/account/usage-statistics"
|
|
145
|
+
self._proxy_users_url = f"{shared_api_base}/proxy-users"
|
|
193
146
|
|
|
194
147
|
whitelist_base = os.getenv(
|
|
195
148
|
"THORDATA_WHITELIST_BASE_URL", "https://api.thordata.com/api"
|
|
@@ -202,40 +155,29 @@ class AsyncThordataClient:
|
|
|
202
155
|
self._proxy_list_url = f"{proxy_api_base}/proxy/proxy-list"
|
|
203
156
|
self._proxy_expiration_url = f"{proxy_api_base}/proxy/expiration-time"
|
|
204
157
|
|
|
205
|
-
#
|
|
206
|
-
self._session: aiohttp.ClientSession | None = None
|
|
207
|
-
|
|
208
|
-
# Namespaced Access (e.g. client.serp.google.maps(...))
|
|
158
|
+
# Namespaces
|
|
209
159
|
self.serp = AsyncSerpNamespace(self)
|
|
160
|
+
self.unlimited = AsyncUnlimitedNamespace(self)
|
|
210
161
|
|
|
211
162
|
async def __aenter__(self) -> AsyncThordataClient:
|
|
212
|
-
|
|
213
|
-
if self._session is None or self._session.closed:
|
|
214
|
-
self._session = aiohttp.ClientSession(
|
|
215
|
-
timeout=self._api_timeout,
|
|
216
|
-
trust_env=True,
|
|
217
|
-
headers={"User-Agent": build_user_agent(_sdk_version, "aiohttp")},
|
|
218
|
-
)
|
|
163
|
+
await self._http._ensure_session()
|
|
219
164
|
return self
|
|
220
165
|
|
|
221
166
|
async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
222
|
-
"""Async context manager exit."""
|
|
223
167
|
await self.close()
|
|
224
168
|
|
|
225
169
|
async def close(self) -> None:
|
|
226
|
-
|
|
227
|
-
if self._session and not self._session.closed:
|
|
228
|
-
await self._session.close()
|
|
229
|
-
self._session = None
|
|
170
|
+
await self._http.close()
|
|
230
171
|
|
|
231
172
|
def _get_session(self) -> aiohttp.ClientSession:
|
|
232
|
-
"""
|
|
233
|
-
if
|
|
234
|
-
raise RuntimeError(
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
173
|
+
"""Internal helper for namespaces that expect direct session access (legacy compat)."""
|
|
174
|
+
if not self._http._session:
|
|
175
|
+
raise RuntimeError("Session not initialized. Use 'async with client'.")
|
|
176
|
+
return self._http._session
|
|
177
|
+
|
|
178
|
+
def _require_public_credentials(self) -> None:
|
|
179
|
+
if not self.public_token or not self.public_key:
|
|
180
|
+
raise ThordataConfigError("public_token and public_key are required.")
|
|
239
181
|
|
|
240
182
|
# =========================================================================
|
|
241
183
|
# Proxy Network Methods
|
|
@@ -248,54 +190,7 @@ class AsyncThordataClient:
|
|
|
248
190
|
proxy_config: ProxyConfig | None = None,
|
|
249
191
|
**kwargs: Any,
|
|
250
192
|
) -> aiohttp.ClientResponse:
|
|
251
|
-
""
|
|
252
|
-
Send an async GET request through the Proxy Network.
|
|
253
|
-
|
|
254
|
-
Args:
|
|
255
|
-
url: The target URL.
|
|
256
|
-
proxy_config: Custom proxy configuration.
|
|
257
|
-
**kwargs: Additional aiohttp arguments.
|
|
258
|
-
|
|
259
|
-
Returns:
|
|
260
|
-
The aiohttp response object.
|
|
261
|
-
"""
|
|
262
|
-
session = self._get_session()
|
|
263
|
-
|
|
264
|
-
logger.debug(f"Async Proxy GET: {url}")
|
|
265
|
-
|
|
266
|
-
if proxy_config is None:
|
|
267
|
-
proxy_config = self._get_default_proxy_config_from_env()
|
|
268
|
-
|
|
269
|
-
if proxy_config is None:
|
|
270
|
-
raise ThordataConfigError(
|
|
271
|
-
"Proxy credentials are missing. "
|
|
272
|
-
"Pass proxy_config=ProxyConfig(username=..., password=..., product=...) "
|
|
273
|
-
"or set THORDATA_RESIDENTIAL_USERNAME/THORDATA_RESIDENTIAL_PASSWORD (or DATACENTER/MOBILE)."
|
|
274
|
-
)
|
|
275
|
-
|
|
276
|
-
# aiohttp has limited support for "https://" proxies (TLS to proxy / TLS-in-TLS).
|
|
277
|
-
# Your account's proxy endpoint requires HTTPS proxy, so we explicitly block here
|
|
278
|
-
# to avoid confusing "it always fails" behavior.
|
|
279
|
-
if getattr(proxy_config, "protocol", "http").lower() == "https":
|
|
280
|
-
raise ThordataConfigError(
|
|
281
|
-
"Proxy Network requires an HTTPS proxy endpoint (TLS to proxy) for your account. "
|
|
282
|
-
"aiohttp support for 'https://' proxies is limited and may fail. "
|
|
283
|
-
"Please use ThordataClient.get/post (sync client) for Proxy Network requests."
|
|
284
|
-
)
|
|
285
|
-
proxy_url, proxy_auth = proxy_config.to_aiohttp_config()
|
|
286
|
-
|
|
287
|
-
try:
|
|
288
|
-
return await session.get(
|
|
289
|
-
url, proxy=proxy_url, proxy_auth=proxy_auth, **kwargs
|
|
290
|
-
)
|
|
291
|
-
except asyncio.TimeoutError as e:
|
|
292
|
-
raise ThordataTimeoutError(
|
|
293
|
-
f"Async request timed out: {e}", original_error=e
|
|
294
|
-
) from e
|
|
295
|
-
except aiohttp.ClientError as e:
|
|
296
|
-
raise ThordataNetworkError(
|
|
297
|
-
f"Async request failed: {e}", original_error=e
|
|
298
|
-
) from e
|
|
193
|
+
return await self._proxy_request("GET", url, proxy_config, **kwargs)
|
|
299
194
|
|
|
300
195
|
async def post(
|
|
301
196
|
self,
|
|
@@ -304,57 +199,36 @@ class AsyncThordataClient:
|
|
|
304
199
|
proxy_config: ProxyConfig | None = None,
|
|
305
200
|
**kwargs: Any,
|
|
306
201
|
) -> aiohttp.ClientResponse:
|
|
307
|
-
""
|
|
308
|
-
Send an async POST request through the Proxy Network.
|
|
202
|
+
return await self._proxy_request("POST", url, proxy_config, **kwargs)
|
|
309
203
|
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
Returns:
|
|
316
|
-
The aiohttp response object.
|
|
317
|
-
"""
|
|
318
|
-
session = self._get_session()
|
|
319
|
-
|
|
320
|
-
logger.debug(f"Async Proxy POST: {url}")
|
|
204
|
+
async def _proxy_request(
|
|
205
|
+
self, method: str, url: str, proxy_config: ProxyConfig | None, **kwargs: Any
|
|
206
|
+
) -> aiohttp.ClientResponse:
|
|
207
|
+
logger.debug(f"Async Proxy {method}: {url}")
|
|
321
208
|
|
|
322
209
|
if proxy_config is None:
|
|
323
210
|
proxy_config = self._get_default_proxy_config_from_env()
|
|
324
211
|
|
|
325
212
|
if proxy_config is None:
|
|
326
|
-
raise ThordataConfigError(
|
|
327
|
-
"Proxy credentials are missing. "
|
|
328
|
-
"Pass proxy_config=ProxyConfig(username=..., password=..., product=...) "
|
|
329
|
-
"or set THORDATA_RESIDENTIAL_USERNAME/THORDATA_RESIDENTIAL_PASSWORD (or DATACENTER/MOBILE)."
|
|
330
|
-
)
|
|
213
|
+
raise ThordataConfigError("Proxy credentials are missing.")
|
|
331
214
|
|
|
332
|
-
#
|
|
333
|
-
# Your account's proxy endpoint requires HTTPS proxy, so we explicitly block here
|
|
334
|
-
# to avoid confusing "it always fails" behavior.
|
|
215
|
+
# Restore strict check for aiohttp HTTPS proxy limitation
|
|
335
216
|
if getattr(proxy_config, "protocol", "http").lower() == "https":
|
|
336
217
|
raise ThordataConfigError(
|
|
337
|
-
"Proxy Network requires an HTTPS proxy endpoint
|
|
338
|
-
"aiohttp support for 'https://' proxies is limited
|
|
218
|
+
"Proxy Network requires an HTTPS proxy endpoint. "
|
|
219
|
+
"aiohttp support for 'https://' proxies is limited. "
|
|
339
220
|
"Please use ThordataClient.get/post (sync client) for Proxy Network requests."
|
|
340
221
|
)
|
|
222
|
+
|
|
341
223
|
proxy_url, proxy_auth = proxy_config.to_aiohttp_config()
|
|
342
224
|
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
except asyncio.TimeoutError as e:
|
|
348
|
-
raise ThordataTimeoutError(
|
|
349
|
-
f"Async request timed out: {e}", original_error=e
|
|
350
|
-
) from e
|
|
351
|
-
except aiohttp.ClientError as e:
|
|
352
|
-
raise ThordataNetworkError(
|
|
353
|
-
f"Async request failed: {e}", original_error=e
|
|
354
|
-
) from e
|
|
225
|
+
# Use the core HTTP client to execute, leveraging retry logic
|
|
226
|
+
return await self._http.request(
|
|
227
|
+
method=method, url=url, proxy=proxy_url, proxy_auth=proxy_auth, **kwargs
|
|
228
|
+
)
|
|
355
229
|
|
|
356
230
|
# =========================================================================
|
|
357
|
-
#
|
|
231
|
+
# API Methods (SERP, Universal)
|
|
358
232
|
# =========================================================================
|
|
359
233
|
|
|
360
234
|
async def serp_search(
|
|
@@ -372,32 +246,7 @@ class AsyncThordataClient:
|
|
|
372
246
|
output_format: str = "json",
|
|
373
247
|
**kwargs: Any,
|
|
374
248
|
) -> dict[str, Any]:
|
|
375
|
-
"""
|
|
376
|
-
Execute an async SERP search.
|
|
377
|
-
|
|
378
|
-
Args:
|
|
379
|
-
query: Search keywords.
|
|
380
|
-
engine: Search engine.
|
|
381
|
-
num: Number of results.
|
|
382
|
-
country: Country code for localization.
|
|
383
|
-
language: Language code.
|
|
384
|
-
search_type: Type of search.
|
|
385
|
-
device: Device type ('desktop', 'mobile', 'tablet').
|
|
386
|
-
render_js: Enable JavaScript rendering in SERP.
|
|
387
|
-
no_cache: Disable internal caching.
|
|
388
|
-
output_format: 'json' or 'html'.
|
|
389
|
-
**kwargs: Additional parameters.
|
|
390
|
-
|
|
391
|
-
Returns:
|
|
392
|
-
Parsed JSON results or dict with 'html' key.
|
|
393
|
-
"""
|
|
394
|
-
if not self.scraper_token:
|
|
395
|
-
raise ThordataConfigError("scraper_token is required for SERP API")
|
|
396
|
-
|
|
397
|
-
session = self._get_session()
|
|
398
|
-
|
|
399
249
|
engine_str = engine.value if isinstance(engine, Engine) else engine.lower()
|
|
400
|
-
|
|
401
250
|
request = SerpRequest(
|
|
402
251
|
query=query,
|
|
403
252
|
engine=engine_str,
|
|
@@ -411,103 +260,33 @@ class AsyncThordataClient:
|
|
|
411
260
|
output_format=output_format,
|
|
412
261
|
extra_params=kwargs,
|
|
413
262
|
)
|
|
414
|
-
|
|
415
|
-
payload = request.to_payload()
|
|
416
|
-
token = self.scraper_token or ""
|
|
417
|
-
headers = build_auth_headers(token, mode=self._auth_mode)
|
|
418
|
-
|
|
419
|
-
logger.info(f"Async SERP Search: {engine_str} - {query}")
|
|
420
|
-
|
|
421
|
-
try:
|
|
422
|
-
async with session.post(
|
|
423
|
-
self._serp_url,
|
|
424
|
-
data=payload,
|
|
425
|
-
headers=headers,
|
|
426
|
-
) as response:
|
|
427
|
-
response.raise_for_status()
|
|
428
|
-
|
|
429
|
-
if output_format.lower() == "json":
|
|
430
|
-
data = await response.json()
|
|
431
|
-
|
|
432
|
-
if isinstance(data, dict):
|
|
433
|
-
code = data.get("code")
|
|
434
|
-
if code is not None and code != 200:
|
|
435
|
-
msg = extract_error_message(data)
|
|
436
|
-
raise_for_code(
|
|
437
|
-
f"SERP API Error: {msg}",
|
|
438
|
-
code=code,
|
|
439
|
-
payload=data,
|
|
440
|
-
)
|
|
441
|
-
|
|
442
|
-
return parse_json_response(data)
|
|
443
|
-
|
|
444
|
-
text = await response.text()
|
|
445
|
-
return {"html": text}
|
|
446
|
-
|
|
447
|
-
except asyncio.TimeoutError as e:
|
|
448
|
-
raise ThordataTimeoutError(
|
|
449
|
-
f"SERP request timed out: {e}",
|
|
450
|
-
original_error=e,
|
|
451
|
-
) from e
|
|
452
|
-
except aiohttp.ClientError as e:
|
|
453
|
-
raise ThordataNetworkError(
|
|
454
|
-
f"SERP request failed: {e}",
|
|
455
|
-
original_error=e,
|
|
456
|
-
) from e
|
|
263
|
+
return await self.serp_search_advanced(request)
|
|
457
264
|
|
|
458
265
|
async def serp_search_advanced(self, request: SerpRequest) -> dict[str, Any]:
|
|
459
|
-
"""
|
|
460
|
-
Execute an async SERP search using a SerpRequest object.
|
|
461
|
-
"""
|
|
462
|
-
session = self._get_session()
|
|
463
266
|
if not self.scraper_token:
|
|
464
|
-
raise ThordataConfigError("scraper_token
|
|
465
|
-
|
|
267
|
+
raise ThordataConfigError("scraper_token required")
|
|
466
268
|
payload = request.to_payload()
|
|
467
269
|
headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
|
|
270
|
+
logger.info(f"Async SERP: {request.engine} - {request.query}")
|
|
468
271
|
|
|
469
|
-
|
|
272
|
+
response = await self._http.request(
|
|
273
|
+
"POST", self._serp_url, data=payload, headers=headers
|
|
274
|
+
)
|
|
470
275
|
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
data
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
if isinstance(data, dict):
|
|
483
|
-
code = data.get("code")
|
|
484
|
-
if code is not None and code != 200:
|
|
485
|
-
msg = extract_error_message(data)
|
|
486
|
-
raise_for_code(
|
|
487
|
-
f"SERP API Error: {msg}",
|
|
488
|
-
code=code,
|
|
489
|
-
payload=data,
|
|
490
|
-
)
|
|
491
|
-
|
|
492
|
-
return parse_json_response(data)
|
|
493
|
-
|
|
494
|
-
text = await response.text()
|
|
495
|
-
return {"html": text}
|
|
496
|
-
|
|
497
|
-
except asyncio.TimeoutError as e:
|
|
498
|
-
raise ThordataTimeoutError(
|
|
499
|
-
f"SERP request timed out: {e}",
|
|
500
|
-
original_error=e,
|
|
501
|
-
) from e
|
|
502
|
-
except aiohttp.ClientError as e:
|
|
503
|
-
raise ThordataNetworkError(
|
|
504
|
-
f"SERP request failed: {e}",
|
|
505
|
-
original_error=e,
|
|
506
|
-
) from e
|
|
276
|
+
if request.output_format.lower() == "json":
|
|
277
|
+
data = await response.json()
|
|
278
|
+
if isinstance(data, dict):
|
|
279
|
+
code = data.get("code")
|
|
280
|
+
if code is not None and code != 200:
|
|
281
|
+
raise_for_code(
|
|
282
|
+
f"SERP Error: {extract_error_message(data)}",
|
|
283
|
+
code=code,
|
|
284
|
+
payload=data,
|
|
285
|
+
)
|
|
286
|
+
return parse_json_response(data)
|
|
507
287
|
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
# =========================================================================
|
|
288
|
+
text = await response.text()
|
|
289
|
+
return {"html": text}
|
|
511
290
|
|
|
512
291
|
async def universal_scrape(
|
|
513
292
|
self,
|
|
@@ -521,21 +300,6 @@ class AsyncThordataClient:
|
|
|
521
300
|
wait_for: str | None = None,
|
|
522
301
|
**kwargs: Any,
|
|
523
302
|
) -> str | bytes:
|
|
524
|
-
"""
|
|
525
|
-
Async scrape using Universal API (Web Unlocker).
|
|
526
|
-
|
|
527
|
-
Args:
|
|
528
|
-
url: Target URL.
|
|
529
|
-
js_render: Enable JavaScript rendering.
|
|
530
|
-
output_format: "html" or "png".
|
|
531
|
-
country: Geo-targeting country.
|
|
532
|
-
block_resources: Resources to block.
|
|
533
|
-
wait: Wait time in ms.
|
|
534
|
-
wait_for: CSS selector to wait for.
|
|
535
|
-
|
|
536
|
-
Returns:
|
|
537
|
-
HTML string or PNG bytes.
|
|
538
|
-
"""
|
|
539
303
|
request = UniversalScrapeRequest(
|
|
540
304
|
url=url,
|
|
541
305
|
js_render=js_render,
|
|
@@ -546,65 +310,41 @@ class AsyncThordataClient:
|
|
|
546
310
|
wait_for=wait_for,
|
|
547
311
|
extra_params=kwargs,
|
|
548
312
|
)
|
|
549
|
-
|
|
550
313
|
return await self.universal_scrape_advanced(request)
|
|
551
314
|
|
|
552
315
|
async def universal_scrape_advanced(
|
|
553
316
|
self, request: UniversalScrapeRequest
|
|
554
317
|
) -> str | bytes:
|
|
555
|
-
"""
|
|
556
|
-
Async scrape using a UniversalScrapeRequest object.
|
|
557
|
-
"""
|
|
558
|
-
session = self._get_session()
|
|
559
318
|
if not self.scraper_token:
|
|
560
|
-
raise ThordataConfigError("scraper_token
|
|
561
|
-
|
|
319
|
+
raise ThordataConfigError("scraper_token required")
|
|
562
320
|
payload = request.to_payload()
|
|
563
321
|
headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
|
|
564
322
|
|
|
565
|
-
|
|
323
|
+
response = await self._http.request(
|
|
324
|
+
"POST", self._universal_url, data=payload, headers=headers
|
|
325
|
+
)
|
|
566
326
|
|
|
567
327
|
try:
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
)
|
|
571
|
-
response.
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
raise_for_code(
|
|
586
|
-
f"Universal API Error: {msg}", code=code, payload=resp_json
|
|
587
|
-
)
|
|
588
|
-
|
|
589
|
-
if "html" in resp_json:
|
|
590
|
-
return resp_json["html"]
|
|
591
|
-
|
|
592
|
-
if "png" in resp_json:
|
|
593
|
-
return decode_base64_image(resp_json["png"])
|
|
594
|
-
|
|
595
|
-
return str(resp_json)
|
|
596
|
-
|
|
597
|
-
except asyncio.TimeoutError as e:
|
|
598
|
-
raise ThordataTimeoutError(
|
|
599
|
-
f"Universal scrape timed out: {e}", original_error=e
|
|
600
|
-
) from e
|
|
601
|
-
except aiohttp.ClientError as e:
|
|
602
|
-
raise ThordataNetworkError(
|
|
603
|
-
f"Universal scrape failed: {e}", original_error=e
|
|
604
|
-
) from e
|
|
328
|
+
resp_json = await response.json()
|
|
329
|
+
except ValueError:
|
|
330
|
+
if request.output_format.lower() == "png":
|
|
331
|
+
return await response.read()
|
|
332
|
+
return await response.text()
|
|
333
|
+
|
|
334
|
+
if isinstance(resp_json, dict):
|
|
335
|
+
code = resp_json.get("code")
|
|
336
|
+
if code is not None and code != 200:
|
|
337
|
+
msg = extract_error_message(resp_json)
|
|
338
|
+
raise_for_code(f"Universal Error: {msg}", code=code, payload=resp_json)
|
|
339
|
+
|
|
340
|
+
if "html" in resp_json:
|
|
341
|
+
return resp_json["html"]
|
|
342
|
+
if "png" in resp_json:
|
|
343
|
+
return decode_base64_image(resp_json["png"])
|
|
344
|
+
return str(resp_json)
|
|
605
345
|
|
|
606
346
|
# =========================================================================
|
|
607
|
-
#
|
|
347
|
+
# Task Management
|
|
608
348
|
# =========================================================================
|
|
609
349
|
|
|
610
350
|
async def create_scraper_task(
|
|
@@ -615,9 +355,6 @@ class AsyncThordataClient:
|
|
|
615
355
|
parameters: dict[str, Any],
|
|
616
356
|
universal_params: dict[str, Any] | None = None,
|
|
617
357
|
) -> str:
|
|
618
|
-
"""
|
|
619
|
-
Create an async Web Scraper task.
|
|
620
|
-
"""
|
|
621
358
|
config = ScraperTaskConfig(
|
|
622
359
|
file_name=file_name,
|
|
623
360
|
spider_id=spider_id,
|
|
@@ -625,48 +362,72 @@ class AsyncThordataClient:
|
|
|
625
362
|
parameters=parameters,
|
|
626
363
|
universal_params=universal_params,
|
|
627
364
|
)
|
|
628
|
-
|
|
629
365
|
return await self.create_scraper_task_advanced(config)
|
|
630
366
|
|
|
367
|
+
async def run_tool(
|
|
368
|
+
self,
|
|
369
|
+
tool_request: Any,
|
|
370
|
+
file_name: str | None = None,
|
|
371
|
+
universal_params: dict[str, Any] | None = None,
|
|
372
|
+
) -> str:
|
|
373
|
+
"""Run a specific pre-defined tool (Async)."""
|
|
374
|
+
if not hasattr(tool_request, "to_task_parameters") or not hasattr(
|
|
375
|
+
tool_request, "get_spider_id"
|
|
376
|
+
):
|
|
377
|
+
raise ValueError(
|
|
378
|
+
"tool_request must be an instance of a thordata.tools class"
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
spider_id = tool_request.get_spider_id()
|
|
382
|
+
spider_name = tool_request.get_spider_name()
|
|
383
|
+
params = tool_request.to_task_parameters()
|
|
384
|
+
|
|
385
|
+
if not file_name:
|
|
386
|
+
import uuid
|
|
387
|
+
|
|
388
|
+
short_id = uuid.uuid4().hex[:8]
|
|
389
|
+
file_name = f"{spider_id}_{short_id}"
|
|
390
|
+
|
|
391
|
+
# Check if it's a Video Tool
|
|
392
|
+
if hasattr(tool_request, "common_settings"):
|
|
393
|
+
config_video = VideoTaskConfig(
|
|
394
|
+
file_name=file_name,
|
|
395
|
+
spider_id=spider_id,
|
|
396
|
+
spider_name=spider_name,
|
|
397
|
+
parameters=params,
|
|
398
|
+
common_settings=tool_request.common_settings,
|
|
399
|
+
)
|
|
400
|
+
return await self.create_video_task_advanced(config_video)
|
|
401
|
+
else:
|
|
402
|
+
config = ScraperTaskConfig(
|
|
403
|
+
file_name=file_name,
|
|
404
|
+
spider_id=spider_id,
|
|
405
|
+
spider_name=spider_name,
|
|
406
|
+
parameters=params,
|
|
407
|
+
universal_params=universal_params,
|
|
408
|
+
)
|
|
409
|
+
return await self.create_scraper_task_advanced(config)
|
|
410
|
+
|
|
631
411
|
async def create_scraper_task_advanced(self, config: ScraperTaskConfig) -> str:
|
|
632
|
-
"""
|
|
633
|
-
Create a task using ScraperTaskConfig.
|
|
634
|
-
"""
|
|
635
412
|
self._require_public_credentials()
|
|
636
|
-
session = self._get_session()
|
|
637
413
|
if not self.scraper_token:
|
|
638
|
-
raise ThordataConfigError("scraper_token
|
|
639
|
-
|
|
414
|
+
raise ThordataConfigError("scraper_token required")
|
|
640
415
|
payload = config.to_payload()
|
|
641
|
-
# Builder needs 3 headers: token, key, Authorization Bearer
|
|
642
416
|
headers = build_builder_headers(
|
|
643
|
-
self.scraper_token,
|
|
644
|
-
self.public_token or "",
|
|
645
|
-
self.public_key or "",
|
|
417
|
+
self.scraper_token, str(self.public_token), str(self.public_key)
|
|
646
418
|
)
|
|
647
419
|
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
data
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
msg = extract_error_message(data)
|
|
660
|
-
raise_for_code(
|
|
661
|
-
f"Task creation failed: {msg}", code=code, payload=data
|
|
662
|
-
)
|
|
663
|
-
|
|
664
|
-
return data["data"]["task_id"]
|
|
665
|
-
|
|
666
|
-
except aiohttp.ClientError as e:
|
|
667
|
-
raise ThordataNetworkError(
|
|
668
|
-
f"Task creation failed: {e}", original_error=e
|
|
669
|
-
) from e
|
|
420
|
+
response = await self._http.request(
|
|
421
|
+
"POST", self._builder_url, data=payload, headers=headers
|
|
422
|
+
)
|
|
423
|
+
data = await response.json(content_type=None)
|
|
424
|
+
if data.get("code") != 200:
|
|
425
|
+
raise_for_code(
|
|
426
|
+
f"Task creation failed: {extract_error_message(data)}",
|
|
427
|
+
code=data.get("code"),
|
|
428
|
+
payload=data,
|
|
429
|
+
)
|
|
430
|
+
return data["data"]["task_id"]
|
|
670
431
|
|
|
671
432
|
async def create_video_task(
|
|
672
433
|
self,
|
|
@@ -676,10 +437,6 @@ class AsyncThordataClient:
|
|
|
676
437
|
parameters: dict[str, Any],
|
|
677
438
|
common_settings: CommonSettings,
|
|
678
439
|
) -> str:
|
|
679
|
-
"""
|
|
680
|
-
Create a YouTube video/audio download task.
|
|
681
|
-
"""
|
|
682
|
-
|
|
683
440
|
config = VideoTaskConfig(
|
|
684
441
|
file_name=file_name,
|
|
685
442
|
spider_id=spider_id,
|
|
@@ -687,251 +444,106 @@ class AsyncThordataClient:
|
|
|
687
444
|
parameters=parameters,
|
|
688
445
|
common_settings=common_settings,
|
|
689
446
|
)
|
|
690
|
-
|
|
691
447
|
return await self.create_video_task_advanced(config)
|
|
692
448
|
|
|
693
449
|
async def create_video_task_advanced(self, config: VideoTaskConfig) -> str:
|
|
694
|
-
"""
|
|
695
|
-
Create a video task using VideoTaskConfig object.
|
|
696
|
-
"""
|
|
697
|
-
|
|
698
450
|
self._require_public_credentials()
|
|
699
|
-
session = self._get_session()
|
|
700
451
|
if not self.scraper_token:
|
|
701
|
-
raise ThordataConfigError(
|
|
702
|
-
"scraper_token is required for Video Task Builder"
|
|
703
|
-
)
|
|
704
|
-
|
|
452
|
+
raise ThordataConfigError("scraper_token required")
|
|
705
453
|
payload = config.to_payload()
|
|
706
454
|
headers = build_builder_headers(
|
|
707
|
-
self.scraper_token,
|
|
708
|
-
self.public_token or "",
|
|
709
|
-
self.public_key or "",
|
|
455
|
+
self.scraper_token, str(self.public_token), str(self.public_key)
|
|
710
456
|
)
|
|
711
457
|
|
|
712
|
-
|
|
713
|
-
|
|
458
|
+
response = await self._http.request(
|
|
459
|
+
"POST", self._video_builder_url, data=payload, headers=headers
|
|
714
460
|
)
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
data
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
response.raise_for_status()
|
|
724
|
-
data = await response.json()
|
|
725
|
-
|
|
726
|
-
code = data.get("code")
|
|
727
|
-
if code != 200:
|
|
728
|
-
msg = extract_error_message(data)
|
|
729
|
-
raise_for_code(
|
|
730
|
-
f"Video task creation failed: {msg}", code=code, payload=data
|
|
731
|
-
)
|
|
732
|
-
|
|
733
|
-
return data["data"]["task_id"]
|
|
734
|
-
|
|
735
|
-
except asyncio.TimeoutError as e:
|
|
736
|
-
raise ThordataTimeoutError(
|
|
737
|
-
f"Video task creation timed out: {e}", original_error=e
|
|
738
|
-
) from e
|
|
739
|
-
except aiohttp.ClientError as e:
|
|
740
|
-
raise ThordataNetworkError(
|
|
741
|
-
f"Video task creation failed: {e}", original_error=e
|
|
742
|
-
) from e
|
|
461
|
+
data = await response.json()
|
|
462
|
+
if data.get("code") != 200:
|
|
463
|
+
raise_for_code(
|
|
464
|
+
f"Video task failed: {extract_error_message(data)}",
|
|
465
|
+
code=data.get("code"),
|
|
466
|
+
payload=data,
|
|
467
|
+
)
|
|
468
|
+
return data["data"]["task_id"]
|
|
743
469
|
|
|
744
470
|
async def get_task_status(self, task_id: str) -> str:
|
|
745
|
-
"""
|
|
746
|
-
Check async task status.
|
|
747
|
-
|
|
748
|
-
Raises:
|
|
749
|
-
ThordataConfigError: If public credentials are missing.
|
|
750
|
-
ThordataAPIError: If API returns a non-200 code in JSON payload.
|
|
751
|
-
ThordataNetworkError: If network/HTTP request fails.
|
|
752
|
-
"""
|
|
753
471
|
self._require_public_credentials()
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
self.public_token or "", self.public_key or ""
|
|
472
|
+
headers = build_public_api_headers(str(self.public_token), str(self.public_key))
|
|
473
|
+
response = await self._http.request(
|
|
474
|
+
"POST", self._status_url, data={"tasks_ids": task_id}, headers=headers
|
|
758
475
|
)
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
if isinstance(data, dict):
|
|
769
|
-
code = data.get("code")
|
|
770
|
-
if code is not None and code != 200:
|
|
771
|
-
msg = extract_error_message(data)
|
|
772
|
-
raise_for_code(
|
|
773
|
-
f"Task status API Error: {msg}",
|
|
774
|
-
code=code,
|
|
775
|
-
payload=data,
|
|
776
|
-
)
|
|
777
|
-
|
|
778
|
-
items = data.get("data") or []
|
|
779
|
-
for item in items:
|
|
780
|
-
if str(item.get("task_id")) == str(task_id):
|
|
781
|
-
return item.get("status", "unknown")
|
|
782
|
-
|
|
783
|
-
return "unknown"
|
|
784
|
-
|
|
785
|
-
raise ThordataNetworkError(
|
|
786
|
-
f"Unexpected task status response type: {type(data).__name__}",
|
|
787
|
-
original_error=None,
|
|
476
|
+
data = await response.json(content_type=None)
|
|
477
|
+
|
|
478
|
+
if isinstance(data, dict):
|
|
479
|
+
code = data.get("code")
|
|
480
|
+
if code is not None and code != 200:
|
|
481
|
+
raise_for_code(
|
|
482
|
+
f"Status error: {extract_error_message(data)}",
|
|
483
|
+
code=code,
|
|
484
|
+
payload=data,
|
|
788
485
|
)
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
raise ThordataNetworkError(
|
|
796
|
-
f"Async status check failed: {e}", original_error=e
|
|
797
|
-
) from e
|
|
486
|
+
items = data.get("data") or []
|
|
487
|
+
for item in items:
|
|
488
|
+
if str(item.get("task_id")) == str(task_id):
|
|
489
|
+
return item.get("status", "unknown")
|
|
490
|
+
return "unknown"
|
|
491
|
+
raise ThordataNetworkError(f"Unexpected response type: {type(data)}")
|
|
798
492
|
|
|
799
493
|
async def safe_get_task_status(self, task_id: str) -> str:
|
|
800
|
-
"""
|
|
801
|
-
Backward-compatible status check.
|
|
802
|
-
|
|
803
|
-
Returns:
|
|
804
|
-
Status string, or "error" on any exception.
|
|
805
|
-
"""
|
|
806
494
|
try:
|
|
807
495
|
return await self.get_task_status(task_id)
|
|
808
496
|
except Exception:
|
|
809
497
|
return "error"
|
|
810
498
|
|
|
811
499
|
async def get_task_result(self, task_id: str, file_type: str = "json") -> str:
|
|
812
|
-
"""
|
|
813
|
-
Get download URL for completed task.
|
|
814
|
-
"""
|
|
815
500
|
self._require_public_credentials()
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
self.
|
|
501
|
+
headers = build_public_api_headers(str(self.public_token), str(self.public_key))
|
|
502
|
+
response = await self._http.request(
|
|
503
|
+
"POST",
|
|
504
|
+
self._download_url,
|
|
505
|
+
data={"tasks_id": task_id, "type": file_type},
|
|
506
|
+
headers=headers,
|
|
820
507
|
)
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
508
|
+
data = await response.json(content_type=None)
|
|
509
|
+
if data.get("code") == 200 and data.get("data"):
|
|
510
|
+
return data["data"]["download"]
|
|
511
|
+
raise_for_code("Get result failed", code=data.get("code"), payload=data)
|
|
512
|
+
return ""
|
|
824
513
|
|
|
825
|
-
|
|
826
|
-
async with session.post(
|
|
827
|
-
self._download_url, data=payload, headers=headers
|
|
828
|
-
) as response:
|
|
829
|
-
data = await response.json(content_type=None)
|
|
830
|
-
code = data.get("code")
|
|
831
|
-
|
|
832
|
-
if code == 200 and data.get("data"):
|
|
833
|
-
return data["data"]["download"]
|
|
834
|
-
|
|
835
|
-
msg = extract_error_message(data)
|
|
836
|
-
raise_for_code(f"Get result failed: {msg}", code=code, payload=data)
|
|
837
|
-
# This line won't be reached, but satisfies mypy
|
|
838
|
-
raise RuntimeError("Unexpected state")
|
|
839
|
-
|
|
840
|
-
except aiohttp.ClientError as e:
|
|
841
|
-
raise ThordataNetworkError(
|
|
842
|
-
f"Get result failed: {e}", original_error=e
|
|
843
|
-
) from e
|
|
844
|
-
|
|
845
|
-
async def list_tasks(
|
|
846
|
-
self,
|
|
847
|
-
page: int = 1,
|
|
848
|
-
size: int = 20,
|
|
849
|
-
) -> dict[str, Any]:
|
|
850
|
-
"""
|
|
851
|
-
List all Web Scraper tasks.
|
|
852
|
-
|
|
853
|
-
Args:
|
|
854
|
-
page: Page number (starts from 1).
|
|
855
|
-
size: Number of tasks per page.
|
|
856
|
-
|
|
857
|
-
Returns:
|
|
858
|
-
Dict containing 'count' and 'list' of tasks.
|
|
859
|
-
"""
|
|
514
|
+
async def list_tasks(self, page: int = 1, size: int = 20) -> dict[str, Any]:
|
|
860
515
|
self._require_public_credentials()
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
self.
|
|
516
|
+
headers = build_public_api_headers(str(self.public_token), str(self.public_key))
|
|
517
|
+
response = await self._http.request(
|
|
518
|
+
"POST",
|
|
519
|
+
self._list_url,
|
|
520
|
+
data={"page": str(page), "size": str(size)},
|
|
521
|
+
headers=headers,
|
|
865
522
|
)
|
|
866
|
-
|
|
867
|
-
if
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
payload["size"] = str(size)
|
|
871
|
-
|
|
872
|
-
logger.info(f"Async listing tasks: page={page}, size={size}")
|
|
873
|
-
|
|
874
|
-
try:
|
|
875
|
-
async with session.post(
|
|
876
|
-
self._list_url,
|
|
877
|
-
data=payload,
|
|
878
|
-
headers=headers,
|
|
879
|
-
timeout=self._api_timeout,
|
|
880
|
-
) as response:
|
|
881
|
-
response.raise_for_status()
|
|
882
|
-
data = await response.json(content_type=None)
|
|
883
|
-
|
|
884
|
-
code = data.get("code")
|
|
885
|
-
if code != 200:
|
|
886
|
-
msg = extract_error_message(data)
|
|
887
|
-
raise_for_code(f"List tasks failed: {msg}", code=code, payload=data)
|
|
888
|
-
|
|
889
|
-
return data.get("data", {"count": 0, "list": []})
|
|
890
|
-
|
|
891
|
-
except asyncio.TimeoutError as e:
|
|
892
|
-
raise ThordataTimeoutError(
|
|
893
|
-
f"List tasks timed out: {e}", original_error=e
|
|
894
|
-
) from e
|
|
895
|
-
except aiohttp.ClientError as e:
|
|
896
|
-
raise ThordataNetworkError(
|
|
897
|
-
f"List tasks failed: {e}", original_error=e
|
|
898
|
-
) from e
|
|
523
|
+
data = await response.json(content_type=None)
|
|
524
|
+
if data.get("code") != 200:
|
|
525
|
+
raise_for_code("List tasks failed", code=data.get("code"), payload=data)
|
|
526
|
+
return data.get("data", {"count": 0, "list": []})
|
|
899
527
|
|
|
900
528
|
async def wait_for_task(
|
|
901
|
-
self,
|
|
902
|
-
task_id: str,
|
|
903
|
-
*,
|
|
904
|
-
poll_interval: float = 5.0,
|
|
905
|
-
max_wait: float = 600.0,
|
|
529
|
+
self, task_id: str, *, poll_interval: float = 5.0, max_wait: float = 600.0
|
|
906
530
|
) -> str:
|
|
907
|
-
"""
|
|
908
|
-
Wait for a task to complete.
|
|
909
|
-
"""
|
|
910
|
-
|
|
911
531
|
import time
|
|
912
532
|
|
|
913
533
|
start = time.monotonic()
|
|
914
|
-
|
|
915
534
|
while (time.monotonic() - start) < max_wait:
|
|
916
535
|
status = await self.get_task_status(task_id)
|
|
917
|
-
|
|
918
|
-
logger.debug(f"Task {task_id} status: {status}")
|
|
919
|
-
|
|
920
|
-
terminal_statuses = {
|
|
536
|
+
if status.lower() in {
|
|
921
537
|
"ready",
|
|
922
538
|
"success",
|
|
923
539
|
"finished",
|
|
924
540
|
"failed",
|
|
925
541
|
"error",
|
|
926
542
|
"cancelled",
|
|
927
|
-
}
|
|
928
|
-
|
|
929
|
-
if status.lower() in terminal_statuses:
|
|
543
|
+
}:
|
|
930
544
|
return status
|
|
931
|
-
|
|
932
545
|
await asyncio.sleep(poll_interval)
|
|
933
|
-
|
|
934
|
-
raise TimeoutError(f"Task {task_id} did not complete within {max_wait} seconds")
|
|
546
|
+
raise TimeoutError(f"Task {task_id} timeout")
|
|
935
547
|
|
|
936
548
|
async def run_task(
|
|
937
549
|
self,
|
|
@@ -945,28 +557,20 @@ class AsyncThordataClient:
|
|
|
945
557
|
initial_poll_interval: float = 2.0,
|
|
946
558
|
max_poll_interval: float = 10.0,
|
|
947
559
|
include_errors: bool = True,
|
|
560
|
+
task_type: str = "web",
|
|
561
|
+
common_settings: CommonSettings | None = None,
|
|
948
562
|
) -> str:
|
|
949
|
-
""
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
file_name=file_name,
|
|
960
|
-
spider_id=spider_id,
|
|
961
|
-
spider_name=spider_name,
|
|
962
|
-
parameters=parameters,
|
|
963
|
-
universal_params=universal_params,
|
|
964
|
-
include_errors=include_errors,
|
|
965
|
-
)
|
|
966
|
-
task_id = await self.create_scraper_task_advanced(config)
|
|
967
|
-
logger.info(f"Async Task created: {task_id}. Polling...")
|
|
563
|
+
if task_type == "video":
|
|
564
|
+
if common_settings is None:
|
|
565
|
+
raise ValueError("common_settings required for video")
|
|
566
|
+
task_id = await self.create_video_task(
|
|
567
|
+
file_name, spider_id, spider_name, parameters, common_settings
|
|
568
|
+
)
|
|
569
|
+
else:
|
|
570
|
+
task_id = await self.create_scraper_task(
|
|
571
|
+
file_name, spider_id, spider_name, parameters, universal_params
|
|
572
|
+
)
|
|
968
573
|
|
|
969
|
-
# 2. Poll Status
|
|
970
574
|
import time
|
|
971
575
|
|
|
972
576
|
start_time = time.monotonic()
|
|
@@ -974,229 +578,109 @@ class AsyncThordataClient:
|
|
|
974
578
|
|
|
975
579
|
while (time.monotonic() - start_time) < max_wait:
|
|
976
580
|
status = await self.get_task_status(task_id)
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
if status_lower in {"ready", "success", "finished"}:
|
|
980
|
-
logger.info(f"Task {task_id} ready.")
|
|
981
|
-
# 3. Get Result
|
|
581
|
+
if status.lower() in {"ready", "success", "finished"}:
|
|
982
582
|
return await self.get_task_result(task_id)
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
raise ThordataNetworkError(
|
|
986
|
-
f"Task {task_id} failed with status: {status}"
|
|
987
|
-
)
|
|
988
|
-
|
|
583
|
+
if status.lower() in {"failed", "error", "cancelled"}:
|
|
584
|
+
raise ThordataNetworkError(f"Task {task_id} failed: {status}")
|
|
989
585
|
await asyncio.sleep(current_poll)
|
|
990
586
|
current_poll = min(current_poll * 1.5, max_poll_interval)
|
|
991
|
-
|
|
992
|
-
raise ThordataTimeoutError(f"Async Task {task_id} timed out after {max_wait}s")
|
|
587
|
+
raise ThordataTimeoutError(f"Task {task_id} timed out")
|
|
993
588
|
|
|
994
589
|
# =========================================================================
|
|
995
|
-
# Proxy
|
|
590
|
+
# Account, Usage, Proxy Management (Delegated to HTTP)
|
|
996
591
|
# =========================================================================
|
|
997
592
|
|
|
998
593
|
async def get_usage_statistics(
|
|
999
|
-
self,
|
|
1000
|
-
from_date: str | date,
|
|
1001
|
-
to_date: str | date,
|
|
594
|
+
self, from_date: str | date, to_date: str | date
|
|
1002
595
|
) -> UsageStatistics:
|
|
1003
|
-
"""
|
|
1004
|
-
Get account usage statistics for a date range.
|
|
1005
|
-
|
|
1006
|
-
Args:
|
|
1007
|
-
from_date: Start date (YYYY-MM-DD string or date object).
|
|
1008
|
-
to_date: End date (YYYY-MM-DD string or date object).
|
|
1009
|
-
|
|
1010
|
-
Returns:
|
|
1011
|
-
UsageStatistics object with traffic data.
|
|
1012
|
-
"""
|
|
1013
|
-
|
|
1014
596
|
self._require_public_credentials()
|
|
1015
|
-
session = self._get_session()
|
|
1016
|
-
|
|
1017
|
-
# Convert dates to strings
|
|
1018
597
|
if isinstance(from_date, date):
|
|
1019
598
|
from_date = from_date.strftime("%Y-%m-%d")
|
|
1020
599
|
if isinstance(to_date, date):
|
|
1021
600
|
to_date = to_date.strftime("%Y-%m-%d")
|
|
1022
|
-
|
|
1023
601
|
params = {
|
|
1024
602
|
"token": self.public_token,
|
|
1025
603
|
"key": self.public_key,
|
|
1026
604
|
"from_date": from_date,
|
|
1027
605
|
"to_date": to_date,
|
|
1028
606
|
}
|
|
607
|
+
response = await self._http.request("GET", self._usage_stats_url, params=params)
|
|
608
|
+
data = await response.json()
|
|
609
|
+
if data.get("code") != 200:
|
|
610
|
+
raise_for_code("Usage error", code=data.get("code"), payload=data)
|
|
611
|
+
return UsageStatistics.from_dict(data.get("data", data))
|
|
1029
612
|
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
if isinstance(data, dict):
|
|
1042
|
-
code = data.get("code")
|
|
1043
|
-
if code is not None and code != 200:
|
|
1044
|
-
msg = extract_error_message(data)
|
|
1045
|
-
raise_for_code(
|
|
1046
|
-
f"Usage statistics error: {msg}",
|
|
1047
|
-
code=code,
|
|
1048
|
-
payload=data,
|
|
1049
|
-
)
|
|
1050
|
-
|
|
1051
|
-
usage_data = data.get("data", data)
|
|
1052
|
-
return UsageStatistics.from_dict(usage_data)
|
|
1053
|
-
|
|
1054
|
-
raise ThordataNetworkError(
|
|
1055
|
-
f"Unexpected usage statistics response: {type(data).__name__}",
|
|
1056
|
-
original_error=None,
|
|
1057
|
-
)
|
|
1058
|
-
|
|
1059
|
-
except asyncio.TimeoutError as e:
|
|
1060
|
-
raise ThordataTimeoutError(
|
|
1061
|
-
f"Usage statistics timed out: {e}", original_error=e
|
|
1062
|
-
) from e
|
|
1063
|
-
except aiohttp.ClientError as e:
|
|
1064
|
-
raise ThordataNetworkError(
|
|
1065
|
-
f"Usage statistics failed: {e}", original_error=e
|
|
1066
|
-
) from e
|
|
1067
|
-
|
|
1068
|
-
async def get_residential_balance(self) -> dict[str, Any]:
|
|
1069
|
-
"""
|
|
1070
|
-
Get residential proxy balance.
|
|
1071
|
-
|
|
1072
|
-
Uses public_token/public_key.
|
|
1073
|
-
"""
|
|
1074
|
-
session = self._get_session()
|
|
1075
|
-
headers = self._build_gateway_headers()
|
|
1076
|
-
|
|
1077
|
-
logger.info("Async getting residential proxy balance")
|
|
1078
|
-
|
|
1079
|
-
try:
|
|
1080
|
-
async with session.post(
|
|
1081
|
-
f"{self._gateway_base_url}/getFlowBalance",
|
|
1082
|
-
headers=headers,
|
|
1083
|
-
data={},
|
|
1084
|
-
timeout=self._api_timeout,
|
|
1085
|
-
) as response:
|
|
1086
|
-
response.raise_for_status()
|
|
1087
|
-
data = await response.json()
|
|
1088
|
-
|
|
1089
|
-
code = data.get("code")
|
|
1090
|
-
if code != 200:
|
|
1091
|
-
msg = extract_error_message(data)
|
|
1092
|
-
raise_for_code(
|
|
1093
|
-
f"Get balance failed: {msg}", code=code, payload=data
|
|
1094
|
-
)
|
|
1095
|
-
|
|
1096
|
-
return data.get("data", {})
|
|
613
|
+
async def get_traffic_balance(self) -> float:
|
|
614
|
+
self._require_public_credentials()
|
|
615
|
+
api_base = self._locations_base_url.replace("/locations", "")
|
|
616
|
+
params = {"token": str(self.public_token), "key": str(self.public_key)}
|
|
617
|
+
response = await self._http.request(
|
|
618
|
+
"GET", f"{api_base}/account/traffic-balance", params=params
|
|
619
|
+
)
|
|
620
|
+
data = await response.json()
|
|
621
|
+
if data.get("code") != 200:
|
|
622
|
+
raise_for_code("Balance error", code=data.get("code"), payload=data)
|
|
623
|
+
return float(data.get("data", {}).get("traffic_balance", 0))
|
|
1097
624
|
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
625
|
+
async def get_wallet_balance(self) -> float:
|
|
626
|
+
self._require_public_credentials()
|
|
627
|
+
api_base = self._locations_base_url.replace("/locations", "")
|
|
628
|
+
params = {"token": str(self.public_token), "key": str(self.public_key)}
|
|
629
|
+
response = await self._http.request(
|
|
630
|
+
"GET", f"{api_base}/account/wallet-balance", params=params
|
|
631
|
+
)
|
|
632
|
+
data = await response.json()
|
|
633
|
+
if data.get("code") != 200:
|
|
634
|
+
raise_for_code("Balance error", code=data.get("code"), payload=data)
|
|
635
|
+
return float(data.get("data", {}).get("balance", 0))
|
|
1106
636
|
|
|
1107
|
-
async def
|
|
637
|
+
async def get_proxy_user_usage(
|
|
1108
638
|
self,
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
logger.info(f"Async getting residential usage: {start_time} to {end_time}")
|
|
1122
|
-
|
|
1123
|
-
try:
|
|
1124
|
-
async with session.post(
|
|
1125
|
-
f"{self._gateway_base_url}/usageRecord",
|
|
1126
|
-
headers=headers,
|
|
1127
|
-
data=payload,
|
|
1128
|
-
timeout=self._api_timeout,
|
|
1129
|
-
) as response:
|
|
1130
|
-
response.raise_for_status()
|
|
1131
|
-
data = await response.json()
|
|
1132
|
-
|
|
1133
|
-
code = data.get("code")
|
|
1134
|
-
if code != 200:
|
|
1135
|
-
msg = extract_error_message(data)
|
|
1136
|
-
raise_for_code(f"Get usage failed: {msg}", code=code, payload=data)
|
|
1137
|
-
|
|
1138
|
-
return data.get("data", {})
|
|
639
|
+
username: str,
|
|
640
|
+
start_date: str | date,
|
|
641
|
+
end_date: str | date,
|
|
642
|
+
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
643
|
+
) -> list[dict[str, Any]]:
|
|
644
|
+
self._require_public_credentials()
|
|
645
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
646
|
+
if isinstance(start_date, date):
|
|
647
|
+
start_date = start_date.strftime("%Y-%m-%d")
|
|
648
|
+
if isinstance(end_date, date):
|
|
649
|
+
end_date = end_date.strftime("%Y-%m-%d")
|
|
1139
650
|
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
)
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
651
|
+
params = {
|
|
652
|
+
"token": self.public_token,
|
|
653
|
+
"key": self.public_key,
|
|
654
|
+
"proxy_type": str(pt),
|
|
655
|
+
"username": username,
|
|
656
|
+
"from_date": start_date,
|
|
657
|
+
"to_date": end_date,
|
|
658
|
+
}
|
|
659
|
+
response = await self._http.request(
|
|
660
|
+
"GET", f"{self._proxy_users_url}/usage-statistics", params=params
|
|
661
|
+
)
|
|
662
|
+
data = await response.json()
|
|
663
|
+
if data.get("code") != 200:
|
|
664
|
+
raise_for_code("Get usage failed", code=data.get("code"), payload=data)
|
|
665
|
+
return data.get("data", [])
|
|
1148
666
|
|
|
1149
667
|
async def list_proxy_users(
|
|
1150
668
|
self, proxy_type: ProxyType | int = ProxyType.RESIDENTIAL
|
|
1151
669
|
) -> ProxyUserList:
|
|
1152
|
-
"""List all proxy users (sub-accounts)."""
|
|
1153
|
-
|
|
1154
670
|
self._require_public_credentials()
|
|
1155
|
-
|
|
1156
|
-
|
|
671
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1157
672
|
params = {
|
|
1158
673
|
"token": self.public_token,
|
|
1159
674
|
"key": self.public_key,
|
|
1160
|
-
"proxy_type": str(
|
|
1161
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1162
|
-
),
|
|
675
|
+
"proxy_type": str(pt),
|
|
1163
676
|
}
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
timeout=self._api_timeout,
|
|
1172
|
-
) as response:
|
|
1173
|
-
response.raise_for_status()
|
|
1174
|
-
data = await response.json()
|
|
1175
|
-
|
|
1176
|
-
if isinstance(data, dict):
|
|
1177
|
-
code = data.get("code")
|
|
1178
|
-
if code is not None and code != 200:
|
|
1179
|
-
msg = extract_error_message(data)
|
|
1180
|
-
raise_for_code(
|
|
1181
|
-
f"List proxy users error: {msg}", code=code, payload=data
|
|
1182
|
-
)
|
|
1183
|
-
|
|
1184
|
-
user_data = data.get("data", data)
|
|
1185
|
-
return ProxyUserList.from_dict(user_data)
|
|
1186
|
-
|
|
1187
|
-
raise ThordataNetworkError(
|
|
1188
|
-
f"Unexpected proxy users response: {type(data).__name__}",
|
|
1189
|
-
original_error=None,
|
|
1190
|
-
)
|
|
1191
|
-
|
|
1192
|
-
except asyncio.TimeoutError as e:
|
|
1193
|
-
raise ThordataTimeoutError(
|
|
1194
|
-
f"List users timed out: {e}", original_error=e
|
|
1195
|
-
) from e
|
|
1196
|
-
except aiohttp.ClientError as e:
|
|
1197
|
-
raise ThordataNetworkError(
|
|
1198
|
-
f"List users failed: {e}", original_error=e
|
|
1199
|
-
) from e
|
|
677
|
+
response = await self._http.request(
|
|
678
|
+
"GET", f"{self._proxy_users_url}/user-list", params=params
|
|
679
|
+
)
|
|
680
|
+
data = await response.json()
|
|
681
|
+
if data.get("code") != 200:
|
|
682
|
+
raise_for_code("List users error", code=data.get("code"), payload=data)
|
|
683
|
+
return ProxyUserList.from_dict(data.get("data", data))
|
|
1200
684
|
|
|
1201
685
|
async def create_proxy_user(
|
|
1202
686
|
self,
|
|
@@ -1206,53 +690,85 @@ class AsyncThordataClient:
|
|
|
1206
690
|
traffic_limit: int = 0,
|
|
1207
691
|
status: bool = True,
|
|
1208
692
|
) -> dict[str, Any]:
|
|
1209
|
-
"""Create a new proxy user (sub-account)."""
|
|
1210
693
|
self._require_public_credentials()
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
headers = build_public_api_headers(
|
|
1214
|
-
self.public_token or "", self.public_key or ""
|
|
1215
|
-
)
|
|
1216
|
-
|
|
694
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
695
|
+
headers = build_public_api_headers(str(self.public_token), str(self.public_key))
|
|
1217
696
|
payload = {
|
|
1218
|
-
"proxy_type": str(
|
|
1219
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1220
|
-
),
|
|
697
|
+
"proxy_type": str(pt),
|
|
1221
698
|
"username": username,
|
|
1222
699
|
"password": password,
|
|
1223
700
|
"traffic_limit": str(traffic_limit),
|
|
1224
701
|
"status": "true" if status else "false",
|
|
1225
702
|
}
|
|
703
|
+
response = await self._http.request(
|
|
704
|
+
"POST",
|
|
705
|
+
f"{self._proxy_users_url}/create-user",
|
|
706
|
+
data=payload,
|
|
707
|
+
headers=headers,
|
|
708
|
+
)
|
|
709
|
+
data = await response.json()
|
|
710
|
+
if data.get("code") != 200:
|
|
711
|
+
raise_for_code("Create user failed", code=data.get("code"), payload=data)
|
|
712
|
+
return data.get("data", {})
|
|
1226
713
|
|
|
1227
|
-
|
|
714
|
+
async def update_proxy_user(
|
|
715
|
+
self,
|
|
716
|
+
username: str,
|
|
717
|
+
password: str,
|
|
718
|
+
traffic_limit: int | None = None,
|
|
719
|
+
status: bool | None = None,
|
|
720
|
+
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
721
|
+
new_username: str | None = None,
|
|
722
|
+
) -> dict[str, Any]:
|
|
723
|
+
"""
|
|
724
|
+
Update a proxy user.
|
|
725
|
+
Note: API requires 'new_' prefixed fields and ALL are required.
|
|
726
|
+
"""
|
|
727
|
+
self._require_public_credentials()
|
|
728
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
729
|
+
headers = build_public_api_headers(str(self.public_token), str(self.public_key))
|
|
1228
730
|
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
data=payload,
|
|
1233
|
-
headers=headers,
|
|
1234
|
-
timeout=self._api_timeout,
|
|
1235
|
-
) as response:
|
|
1236
|
-
response.raise_for_status()
|
|
1237
|
-
data = await response.json()
|
|
731
|
+
limit_val = str(traffic_limit) if traffic_limit is not None else "0"
|
|
732
|
+
status_val = "true" if (status is None or status) else "false"
|
|
733
|
+
target_username = new_username or username
|
|
1238
734
|
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
735
|
+
payload = {
|
|
736
|
+
"proxy_type": str(pt),
|
|
737
|
+
"username": username,
|
|
738
|
+
"new_username": target_username,
|
|
739
|
+
"new_password": password,
|
|
740
|
+
"new_traffic_limit": limit_val,
|
|
741
|
+
"new_status": status_val,
|
|
742
|
+
}
|
|
1245
743
|
|
|
1246
|
-
|
|
744
|
+
response = await self._http.request(
|
|
745
|
+
"POST",
|
|
746
|
+
f"{self._proxy_users_url}/update-user",
|
|
747
|
+
data=payload,
|
|
748
|
+
headers=headers,
|
|
749
|
+
)
|
|
750
|
+
data = await response.json()
|
|
751
|
+
if data.get("code") != 200:
|
|
752
|
+
raise_for_code("Update user failed", code=data.get("code"), payload=data)
|
|
753
|
+
return data.get("data", {})
|
|
1247
754
|
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
755
|
+
async def delete_proxy_user(
|
|
756
|
+
self, username: str, proxy_type: ProxyType | int = ProxyType.RESIDENTIAL
|
|
757
|
+
) -> dict[str, Any]:
|
|
758
|
+
self._require_public_credentials()
|
|
759
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
760
|
+
headers = build_public_api_headers(str(self.public_token), str(self.public_key))
|
|
761
|
+
payload = {"proxy_type": str(pt), "username": username}
|
|
762
|
+
response = await self._http.request(
|
|
763
|
+
"POST",
|
|
764
|
+
f"{self._proxy_users_url}/delete-user",
|
|
765
|
+
data=payload,
|
|
766
|
+
headers=headers,
|
|
767
|
+
)
|
|
768
|
+
data = await response.json()
|
|
769
|
+
if data.get("code") != 200:
|
|
770
|
+
raise_for_code("Delete user failed", code=data.get("code"), payload=data)
|
|
771
|
+
return data.get("data", {})
|
|
1256
772
|
|
|
1257
773
|
async def add_whitelist_ip(
|
|
1258
774
|
self,
|
|
@@ -1260,309 +776,102 @@ class AsyncThordataClient:
|
|
|
1260
776
|
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
1261
777
|
status: bool = True,
|
|
1262
778
|
) -> dict[str, Any]:
|
|
1263
|
-
"""
|
|
1264
|
-
Add an IP to the whitelist for IP authentication.
|
|
1265
|
-
"""
|
|
1266
779
|
self._require_public_credentials()
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
headers = build_public_api_headers(
|
|
1270
|
-
self.public_token or "", self.public_key or ""
|
|
1271
|
-
)
|
|
1272
|
-
|
|
1273
|
-
proxy_type_int = (
|
|
1274
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1275
|
-
)
|
|
1276
|
-
|
|
780
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
781
|
+
headers = build_public_api_headers(str(self.public_token), str(self.public_key))
|
|
1277
782
|
payload = {
|
|
1278
|
-
"proxy_type": str(
|
|
783
|
+
"proxy_type": str(pt),
|
|
1279
784
|
"ip": ip,
|
|
1280
785
|
"status": "true" if status else "false",
|
|
1281
786
|
}
|
|
787
|
+
response = await self._http.request(
|
|
788
|
+
"POST", f"{self._whitelist_url}/add-ip", data=payload, headers=headers
|
|
789
|
+
)
|
|
790
|
+
data = await response.json()
|
|
791
|
+
if data.get("code") != 200:
|
|
792
|
+
raise_for_code("Add whitelist failed", code=data.get("code"), payload=data)
|
|
793
|
+
return data.get("data", {})
|
|
1282
794
|
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
async with session.post(
|
|
1287
|
-
f"{self._whitelist_url}/add-ip",
|
|
1288
|
-
data=payload,
|
|
1289
|
-
headers=headers,
|
|
1290
|
-
timeout=self._api_timeout,
|
|
1291
|
-
) as response:
|
|
1292
|
-
response.raise_for_status()
|
|
1293
|
-
data = await response.json()
|
|
1294
|
-
|
|
1295
|
-
code = data.get("code")
|
|
1296
|
-
if code != 200:
|
|
1297
|
-
msg = extract_error_message(data)
|
|
1298
|
-
raise_for_code(
|
|
1299
|
-
f"Add whitelist IP failed: {msg}", code=code, payload=data
|
|
1300
|
-
)
|
|
1301
|
-
|
|
1302
|
-
return data.get("data", {})
|
|
1303
|
-
|
|
1304
|
-
except asyncio.TimeoutError as e:
|
|
1305
|
-
raise ThordataTimeoutError(
|
|
1306
|
-
f"Add whitelist timed out: {e}", original_error=e
|
|
1307
|
-
) from e
|
|
1308
|
-
except aiohttp.ClientError as e:
|
|
1309
|
-
raise ThordataNetworkError(
|
|
1310
|
-
f"Add whitelist failed: {e}", original_error=e
|
|
1311
|
-
) from e
|
|
1312
|
-
|
|
1313
|
-
async def list_proxy_servers(
|
|
1314
|
-
self,
|
|
1315
|
-
proxy_type: int,
|
|
1316
|
-
) -> list[ProxyServer]:
|
|
1317
|
-
"""
|
|
1318
|
-
List ISP or Datacenter proxy servers.
|
|
1319
|
-
"""
|
|
1320
|
-
|
|
795
|
+
async def delete_whitelist_ip(
|
|
796
|
+
self, ip: str, proxy_type: ProxyType | int = ProxyType.RESIDENTIAL
|
|
797
|
+
) -> dict[str, Any]:
|
|
1321
798
|
self._require_public_credentials()
|
|
1322
|
-
|
|
799
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
800
|
+
headers = build_public_api_headers(str(self.public_token), str(self.public_key))
|
|
801
|
+
payload = {"proxy_type": str(pt), "ip": ip}
|
|
802
|
+
response = await self._http.request(
|
|
803
|
+
"POST", f"{self._whitelist_url}/delete-ip", data=payload, headers=headers
|
|
804
|
+
)
|
|
805
|
+
data = await response.json()
|
|
806
|
+
if data.get("code") != 200:
|
|
807
|
+
raise_for_code(
|
|
808
|
+
"Delete whitelist failed", code=data.get("code"), payload=data
|
|
809
|
+
)
|
|
810
|
+
return data.get("data", {})
|
|
1323
811
|
|
|
812
|
+
async def list_whitelist_ips(
|
|
813
|
+
self, proxy_type: ProxyType | int = ProxyType.RESIDENTIAL
|
|
814
|
+
) -> list[str]:
|
|
815
|
+
self._require_public_credentials()
|
|
816
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1324
817
|
params = {
|
|
1325
818
|
"token": self.public_token,
|
|
1326
819
|
"key": self.public_key,
|
|
1327
|
-
"proxy_type": str(
|
|
820
|
+
"proxy_type": str(pt),
|
|
1328
821
|
}
|
|
822
|
+
response = await self._http.request(
|
|
823
|
+
"GET", f"{self._whitelist_url}/ip-list", params=params
|
|
824
|
+
)
|
|
825
|
+
data = await response.json()
|
|
826
|
+
if data.get("code") != 200:
|
|
827
|
+
raise_for_code("List whitelist failed", code=data.get("code"), payload=data)
|
|
828
|
+
|
|
829
|
+
items = data.get("data", []) or []
|
|
830
|
+
result = []
|
|
831
|
+
for item in items:
|
|
832
|
+
if isinstance(item, str):
|
|
833
|
+
result.append(item)
|
|
834
|
+
elif isinstance(item, dict) and "ip" in item:
|
|
835
|
+
result.append(str(item["ip"]))
|
|
836
|
+
else:
|
|
837
|
+
result.append(str(item))
|
|
838
|
+
return result
|
|
1329
839
|
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
async with session.get(
|
|
1334
|
-
self._proxy_list_url,
|
|
1335
|
-
params=params,
|
|
1336
|
-
timeout=self._api_timeout,
|
|
1337
|
-
) as response:
|
|
1338
|
-
response.raise_for_status()
|
|
1339
|
-
data = await response.json()
|
|
1340
|
-
|
|
1341
|
-
if isinstance(data, dict):
|
|
1342
|
-
code = data.get("code")
|
|
1343
|
-
if code is not None and code != 200:
|
|
1344
|
-
msg = extract_error_message(data)
|
|
1345
|
-
raise_for_code(
|
|
1346
|
-
f"List proxy servers error: {msg}", code=code, payload=data
|
|
1347
|
-
)
|
|
1348
|
-
|
|
1349
|
-
server_list = data.get("data", data.get("list", []))
|
|
1350
|
-
elif isinstance(data, list):
|
|
1351
|
-
server_list = data
|
|
1352
|
-
else:
|
|
1353
|
-
raise ThordataNetworkError(
|
|
1354
|
-
f"Unexpected proxy list response: {type(data).__name__}",
|
|
1355
|
-
original_error=None,
|
|
1356
|
-
)
|
|
1357
|
-
|
|
1358
|
-
return [ProxyServer.from_dict(s) for s in server_list]
|
|
1359
|
-
|
|
1360
|
-
except asyncio.TimeoutError as e:
|
|
1361
|
-
raise ThordataTimeoutError(
|
|
1362
|
-
f"List servers timed out: {e}", original_error=e
|
|
1363
|
-
) from e
|
|
1364
|
-
except aiohttp.ClientError as e:
|
|
1365
|
-
raise ThordataNetworkError(
|
|
1366
|
-
f"List servers failed: {e}", original_error=e
|
|
1367
|
-
) from e
|
|
1368
|
-
|
|
1369
|
-
async def get_isp_regions(self) -> list[dict[str, Any]]:
|
|
1370
|
-
"""
|
|
1371
|
-
Get available ISP proxy regions.
|
|
1372
|
-
|
|
1373
|
-
Uses public_token/public_key.
|
|
1374
|
-
"""
|
|
1375
|
-
session = self._get_session()
|
|
1376
|
-
headers = self._build_gateway_headers()
|
|
1377
|
-
|
|
1378
|
-
logger.info("Async getting ISP regions")
|
|
1379
|
-
|
|
1380
|
-
try:
|
|
1381
|
-
async with session.post(
|
|
1382
|
-
f"{self._gateway_base_url}/getRegionIsp",
|
|
1383
|
-
headers=headers,
|
|
1384
|
-
data={},
|
|
1385
|
-
timeout=self._api_timeout,
|
|
1386
|
-
) as response:
|
|
1387
|
-
response.raise_for_status()
|
|
1388
|
-
data = await response.json()
|
|
1389
|
-
|
|
1390
|
-
code = data.get("code")
|
|
1391
|
-
if code != 200:
|
|
1392
|
-
msg = extract_error_message(data)
|
|
1393
|
-
raise_for_code(
|
|
1394
|
-
f"Get ISP regions failed: {msg}", code=code, payload=data
|
|
1395
|
-
)
|
|
1396
|
-
|
|
1397
|
-
return data.get("data", [])
|
|
1398
|
-
|
|
1399
|
-
except asyncio.TimeoutError as e:
|
|
1400
|
-
raise ThordataTimeoutError(
|
|
1401
|
-
f"Get ISP regions timed out: {e}", original_error=e
|
|
1402
|
-
) from e
|
|
1403
|
-
except aiohttp.ClientError as e:
|
|
1404
|
-
raise ThordataNetworkError(
|
|
1405
|
-
f"Get ISP regions failed: {e}", original_error=e
|
|
1406
|
-
) from e
|
|
1407
|
-
|
|
1408
|
-
async def list_isp_proxies(self) -> list[dict[str, Any]]:
|
|
1409
|
-
"""
|
|
1410
|
-
List ISP proxies.
|
|
1411
|
-
|
|
1412
|
-
Uses public_token/public_key.
|
|
1413
|
-
"""
|
|
1414
|
-
session = self._get_session()
|
|
1415
|
-
headers = self._build_gateway_headers()
|
|
1416
|
-
|
|
1417
|
-
logger.info("Async listing ISP proxies")
|
|
1418
|
-
|
|
1419
|
-
try:
|
|
1420
|
-
async with session.post(
|
|
1421
|
-
f"{self._gateway_base_url}/queryListIsp",
|
|
1422
|
-
headers=headers,
|
|
1423
|
-
data={},
|
|
1424
|
-
timeout=self._api_timeout,
|
|
1425
|
-
) as response:
|
|
1426
|
-
response.raise_for_status()
|
|
1427
|
-
data = await response.json()
|
|
1428
|
-
|
|
1429
|
-
code = data.get("code")
|
|
1430
|
-
if code != 200:
|
|
1431
|
-
msg = extract_error_message(data)
|
|
1432
|
-
raise_for_code(
|
|
1433
|
-
f"List ISP proxies failed: {msg}", code=code, payload=data
|
|
1434
|
-
)
|
|
1435
|
-
|
|
1436
|
-
return data.get("data", [])
|
|
1437
|
-
|
|
1438
|
-
except asyncio.TimeoutError as e:
|
|
1439
|
-
raise ThordataTimeoutError(
|
|
1440
|
-
f"List ISP proxies timed out: {e}", original_error=e
|
|
1441
|
-
) from e
|
|
1442
|
-
except aiohttp.ClientError as e:
|
|
1443
|
-
raise ThordataNetworkError(
|
|
1444
|
-
f"List ISP proxies failed: {e}", original_error=e
|
|
1445
|
-
) from e
|
|
1446
|
-
|
|
1447
|
-
async def get_wallet_balance(self) -> dict[str, Any]:
|
|
1448
|
-
"""
|
|
1449
|
-
Get wallet balance for ISP proxies.
|
|
1450
|
-
|
|
1451
|
-
Uses public_token/public_key.
|
|
1452
|
-
"""
|
|
1453
|
-
session = self._get_session()
|
|
1454
|
-
headers = self._build_gateway_headers()
|
|
1455
|
-
|
|
1456
|
-
logger.info("Async getting wallet balance")
|
|
1457
|
-
|
|
1458
|
-
try:
|
|
1459
|
-
async with session.post(
|
|
1460
|
-
f"{self._gateway_base_url}/getBalance",
|
|
1461
|
-
headers=headers,
|
|
1462
|
-
data={},
|
|
1463
|
-
timeout=self._api_timeout,
|
|
1464
|
-
) as response:
|
|
1465
|
-
response.raise_for_status()
|
|
1466
|
-
data = await response.json()
|
|
1467
|
-
|
|
1468
|
-
code = data.get("code")
|
|
1469
|
-
if code != 200:
|
|
1470
|
-
msg = extract_error_message(data)
|
|
1471
|
-
raise_for_code(
|
|
1472
|
-
f"Get wallet balance failed: {msg}", code=code, payload=data
|
|
1473
|
-
)
|
|
1474
|
-
|
|
1475
|
-
return data.get("data", {})
|
|
1476
|
-
|
|
1477
|
-
except asyncio.TimeoutError as e:
|
|
1478
|
-
raise ThordataTimeoutError(
|
|
1479
|
-
f"Get wallet balance timed out: {e}", original_error=e
|
|
1480
|
-
) from e
|
|
1481
|
-
except aiohttp.ClientError as e:
|
|
1482
|
-
raise ThordataNetworkError(
|
|
1483
|
-
f"Get wallet balance failed: {e}", original_error=e
|
|
1484
|
-
) from e
|
|
840
|
+
# =========================================================================
|
|
841
|
+
# Locations & ASN Methods
|
|
842
|
+
# =========================================================================
|
|
1485
843
|
|
|
1486
|
-
async def
|
|
1487
|
-
self,
|
|
1488
|
-
|
|
1489
|
-
proxy_type: int,
|
|
1490
|
-
) -> dict[str, Any]:
|
|
1491
|
-
"""
|
|
1492
|
-
Get expiration time for specific proxy IPs.
|
|
1493
|
-
"""
|
|
844
|
+
async def _get_locations(
|
|
845
|
+
self, endpoint: str, **kwargs: Any
|
|
846
|
+
) -> list[dict[str, Any]]:
|
|
1494
847
|
self._require_public_credentials()
|
|
1495
|
-
|
|
848
|
+
params = {"token": self.public_token, "key": self.public_key}
|
|
849
|
+
for k, v in kwargs.items():
|
|
850
|
+
params[k] = str(v)
|
|
1496
851
|
|
|
1497
|
-
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
1501
|
-
"token": self.public_token,
|
|
1502
|
-
"key": self.public_key,
|
|
1503
|
-
"proxy_type": str(proxy_type),
|
|
1504
|
-
"ips": ips,
|
|
1505
|
-
}
|
|
1506
|
-
|
|
1507
|
-
logger.info(f"Async getting proxy expiration: {ips}")
|
|
852
|
+
response = await self._http.request(
|
|
853
|
+
"GET", f"{self._locations_base_url}/{endpoint}", params=params
|
|
854
|
+
)
|
|
855
|
+
data = await response.json()
|
|
1508
856
|
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
|
|
1514
|
-
) as response:
|
|
1515
|
-
response.raise_for_status()
|
|
1516
|
-
data = await response.json()
|
|
1517
|
-
|
|
1518
|
-
if isinstance(data, dict):
|
|
1519
|
-
code = data.get("code")
|
|
1520
|
-
if code is not None and code != 200:
|
|
1521
|
-
msg = extract_error_message(data)
|
|
1522
|
-
raise_for_code(
|
|
1523
|
-
f"Get expiration error: {msg}", code=code, payload=data
|
|
1524
|
-
)
|
|
1525
|
-
|
|
1526
|
-
return data.get("data", data)
|
|
1527
|
-
|
|
1528
|
-
return data
|
|
1529
|
-
|
|
1530
|
-
except asyncio.TimeoutError as e:
|
|
1531
|
-
raise ThordataTimeoutError(
|
|
1532
|
-
f"Get expiration timed out: {e}", original_error=e
|
|
1533
|
-
) from e
|
|
1534
|
-
except aiohttp.ClientError as e:
|
|
1535
|
-
raise ThordataNetworkError(
|
|
1536
|
-
f"Get expiration failed: {e}", original_error=e
|
|
1537
|
-
) from e
|
|
1538
|
-
|
|
1539
|
-
# =========================================================================
|
|
1540
|
-
# Location API Methods
|
|
1541
|
-
# =========================================================================
|
|
857
|
+
if isinstance(data, dict):
|
|
858
|
+
if data.get("code") != 200:
|
|
859
|
+
raise RuntimeError(f"Locations error: {data.get('msg')}")
|
|
860
|
+
return data.get("data") or []
|
|
861
|
+
return data if isinstance(data, list) else []
|
|
1542
862
|
|
|
1543
863
|
async def list_countries(
|
|
1544
864
|
self, proxy_type: ProxyType | int = ProxyType.RESIDENTIAL
|
|
1545
865
|
) -> list[dict[str, Any]]:
|
|
1546
|
-
|
|
1547
|
-
return await self._get_locations(
|
|
1548
|
-
"countries",
|
|
1549
|
-
proxy_type=(
|
|
1550
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1551
|
-
),
|
|
1552
|
-
)
|
|
866
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
867
|
+
return await self._get_locations("countries", proxy_type=pt)
|
|
1553
868
|
|
|
1554
869
|
async def list_states(
|
|
1555
|
-
self,
|
|
1556
|
-
country_code: str,
|
|
1557
|
-
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
870
|
+
self, country_code: str, proxy_type: ProxyType | int = ProxyType.RESIDENTIAL
|
|
1558
871
|
) -> list[dict[str, Any]]:
|
|
1559
|
-
|
|
872
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1560
873
|
return await self._get_locations(
|
|
1561
|
-
"states",
|
|
1562
|
-
proxy_type=(
|
|
1563
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1564
|
-
),
|
|
1565
|
-
country_code=country_code,
|
|
874
|
+
"states", proxy_type=pt, country_code=country_code
|
|
1566
875
|
)
|
|
1567
876
|
|
|
1568
877
|
async def list_cities(
|
|
@@ -1571,89 +880,140 @@ class AsyncThordataClient:
|
|
|
1571
880
|
state_code: str | None = None,
|
|
1572
881
|
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
1573
882
|
) -> list[dict[str, Any]]:
|
|
1574
|
-
|
|
1575
|
-
kwargs = {
|
|
1576
|
-
"proxy_type": (
|
|
1577
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1578
|
-
),
|
|
1579
|
-
"country_code": country_code,
|
|
1580
|
-
}
|
|
883
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
884
|
+
kwargs = {"proxy_type": pt, "country_code": country_code}
|
|
1581
885
|
if state_code:
|
|
1582
886
|
kwargs["state_code"] = state_code
|
|
1583
|
-
|
|
1584
887
|
return await self._get_locations("cities", **kwargs)
|
|
1585
888
|
|
|
1586
889
|
async def list_asn(
|
|
1587
|
-
self,
|
|
1588
|
-
country_code: str,
|
|
1589
|
-
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
890
|
+
self, country_code: str, proxy_type: ProxyType | int = ProxyType.RESIDENTIAL
|
|
1590
891
|
) -> list[dict[str, Any]]:
|
|
1591
|
-
|
|
892
|
+
pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1592
893
|
return await self._get_locations(
|
|
1593
|
-
"asn",
|
|
1594
|
-
proxy_type=(
|
|
1595
|
-
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1596
|
-
),
|
|
1597
|
-
country_code=country_code,
|
|
894
|
+
"asn", proxy_type=pt, country_code=country_code
|
|
1598
895
|
)
|
|
1599
896
|
|
|
1600
|
-
|
|
1601
|
-
|
|
1602
|
-
|
|
1603
|
-
"""Internal async locations API call."""
|
|
1604
|
-
self._require_public_credentials()
|
|
897
|
+
# =========================================================================
|
|
898
|
+
# ISP & Datacenter Proxy Management
|
|
899
|
+
# =========================================================================
|
|
1605
900
|
|
|
901
|
+
async def list_proxy_servers(self, proxy_type: int) -> list[ProxyServer]:
|
|
902
|
+
self._require_public_credentials()
|
|
1606
903
|
params = {
|
|
1607
|
-
"token": self.public_token
|
|
1608
|
-
"key": self.public_key
|
|
904
|
+
"token": self.public_token,
|
|
905
|
+
"key": self.public_key,
|
|
906
|
+
"proxy_type": str(proxy_type),
|
|
1609
907
|
}
|
|
908
|
+
response = await self._http.request("GET", self._proxy_list_url, params=params)
|
|
909
|
+
data = await response.json()
|
|
910
|
+
if data.get("code") != 200:
|
|
911
|
+
raise_for_code(
|
|
912
|
+
"List proxy servers error", code=data.get("code"), payload=data
|
|
913
|
+
)
|
|
1610
914
|
|
|
1611
|
-
|
|
1612
|
-
|
|
915
|
+
server_list = []
|
|
916
|
+
if isinstance(data, dict):
|
|
917
|
+
server_list = data.get("data", data.get("list", []))
|
|
918
|
+
elif isinstance(data, list):
|
|
919
|
+
server_list = data
|
|
920
|
+
return [ProxyServer.from_dict(s) for s in server_list]
|
|
1613
921
|
|
|
1614
|
-
|
|
922
|
+
async def get_proxy_expiration(
|
|
923
|
+
self, ips: str | list[str], proxy_type: int
|
|
924
|
+
) -> dict[str, Any]:
|
|
925
|
+
self._require_public_credentials()
|
|
926
|
+
if isinstance(ips, list):
|
|
927
|
+
ips = ",".join(ips)
|
|
928
|
+
params = {
|
|
929
|
+
"token": self.public_token,
|
|
930
|
+
"key": self.public_key,
|
|
931
|
+
"proxy_type": str(proxy_type),
|
|
932
|
+
"ips": ips,
|
|
933
|
+
}
|
|
934
|
+
response = await self._http.request(
|
|
935
|
+
"GET", self._proxy_expiration_url, params=params
|
|
936
|
+
)
|
|
937
|
+
data = await response.json()
|
|
938
|
+
if data.get("code") != 200:
|
|
939
|
+
raise_for_code("Get expiration error", code=data.get("code"), payload=data)
|
|
940
|
+
return data.get("data", data)
|
|
1615
941
|
|
|
1616
|
-
|
|
942
|
+
async def extract_ip_list(
|
|
943
|
+
self,
|
|
944
|
+
num: int = 1,
|
|
945
|
+
country: str | None = None,
|
|
946
|
+
state: str | None = None,
|
|
947
|
+
city: str | None = None,
|
|
948
|
+
time_limit: int | None = None,
|
|
949
|
+
port: int | None = None,
|
|
950
|
+
return_type: str = "txt",
|
|
951
|
+
protocol: str = "http",
|
|
952
|
+
sep: str = "\r\n",
|
|
953
|
+
product: str = "residential",
|
|
954
|
+
) -> list[str]:
|
|
955
|
+
base_url = "https://get-ip.thordata.net"
|
|
956
|
+
endpoint = "/unlimited_api" if product == "unlimited" else "/api"
|
|
957
|
+
params: dict[str, Any] = {
|
|
958
|
+
"num": str(num),
|
|
959
|
+
"return_type": return_type,
|
|
960
|
+
"protocol": protocol,
|
|
961
|
+
"sep": sep,
|
|
962
|
+
}
|
|
963
|
+
if country:
|
|
964
|
+
params["country"] = country
|
|
965
|
+
if state:
|
|
966
|
+
params["state"] = state
|
|
967
|
+
if city:
|
|
968
|
+
params["city"] = city
|
|
969
|
+
if time_limit:
|
|
970
|
+
params["time"] = str(time_limit)
|
|
971
|
+
if port:
|
|
972
|
+
params["port"] = str(port)
|
|
973
|
+
|
|
974
|
+
username = os.getenv("THORDATA_RESIDENTIAL_USERNAME")
|
|
975
|
+
if username:
|
|
976
|
+
params["td-customer"] = username
|
|
977
|
+
|
|
978
|
+
response = await self._http.request(
|
|
979
|
+
"GET", f"{base_url}{endpoint}", params=params
|
|
980
|
+
)
|
|
1617
981
|
|
|
1618
|
-
|
|
1619
|
-
async with (
|
|
1620
|
-
aiohttp.ClientSession(trust_env=True) as temp_session,
|
|
1621
|
-
temp_session.get(url, params=params) as response,
|
|
1622
|
-
):
|
|
1623
|
-
response.raise_for_status()
|
|
982
|
+
if return_type == "json":
|
|
1624
983
|
data = await response.json()
|
|
1625
|
-
|
|
1626
984
|
if isinstance(data, dict):
|
|
1627
|
-
|
|
1628
|
-
|
|
1629
|
-
|
|
1630
|
-
|
|
1631
|
-
|
|
985
|
+
if data.get("code") in (0, 200):
|
|
986
|
+
raw_list = data.get("data") or []
|
|
987
|
+
return [f"{item['ip']}:{item['port']}" for item in raw_list]
|
|
988
|
+
else:
|
|
989
|
+
raise_for_code(
|
|
990
|
+
"Extract IPs failed", code=data.get("code"), payload=data
|
|
1632
991
|
)
|
|
1633
|
-
return data.get("data") or []
|
|
1634
|
-
|
|
1635
|
-
if isinstance(data, list):
|
|
1636
|
-
return data
|
|
1637
|
-
|
|
1638
992
|
return []
|
|
993
|
+
else:
|
|
994
|
+
text = await response.text()
|
|
995
|
+
text = text.strip()
|
|
996
|
+
if text.startswith("{") and "code" in text:
|
|
997
|
+
try:
|
|
998
|
+
err_data = await response.json()
|
|
999
|
+
raise_for_code(
|
|
1000
|
+
"Extract IPs failed",
|
|
1001
|
+
code=err_data.get("code"),
|
|
1002
|
+
payload=err_data,
|
|
1003
|
+
)
|
|
1004
|
+
except ValueError:
|
|
1005
|
+
pass
|
|
1006
|
+
actual_sep = sep.replace("\\r", "\r").replace("\\n", "\n")
|
|
1007
|
+
return [line.strip() for line in text.split(actual_sep) if line.strip()]
|
|
1639
1008
|
|
|
1640
1009
|
# =========================================================================
|
|
1641
|
-
#
|
|
1010
|
+
# Helpers
|
|
1642
1011
|
# =========================================================================
|
|
1643
1012
|
|
|
1644
|
-
def _require_public_credentials(self) -> None:
|
|
1645
|
-
"""Ensure public API credentials are available."""
|
|
1646
|
-
if not self.public_token or not self.public_key:
|
|
1647
|
-
raise ThordataConfigError(
|
|
1648
|
-
"public_token and public_key are required for this operation. "
|
|
1649
|
-
"Please provide them when initializing AsyncThordataClient."
|
|
1650
|
-
)
|
|
1651
|
-
|
|
1652
1013
|
def _get_proxy_endpoint_overrides(
|
|
1653
1014
|
self, product: ProxyProduct
|
|
1654
1015
|
) -> tuple[str | None, int | None, str]:
|
|
1655
1016
|
prefix = product.value.upper()
|
|
1656
|
-
|
|
1657
1017
|
host = os.getenv(f"THORDATA_{prefix}_PROXY_HOST") or os.getenv(
|
|
1658
1018
|
"THORDATA_PROXY_HOST"
|
|
1659
1019
|
)
|
|
@@ -1665,69 +1025,40 @@ class AsyncThordataClient:
|
|
|
1665
1025
|
or os.getenv("THORDATA_PROXY_PROTOCOL")
|
|
1666
1026
|
or "http"
|
|
1667
1027
|
)
|
|
1668
|
-
|
|
1669
|
-
port: int | None = None
|
|
1670
|
-
if port_raw:
|
|
1671
|
-
try:
|
|
1672
|
-
port = int(port_raw)
|
|
1673
|
-
except ValueError:
|
|
1674
|
-
port = None
|
|
1675
|
-
|
|
1028
|
+
port = int(port_raw) if port_raw and port_raw.isdigit() else None
|
|
1676
1029
|
return host or None, port, protocol
|
|
1677
1030
|
|
|
1678
1031
|
def _get_default_proxy_config_from_env(self) -> ProxyConfig | None:
|
|
1679
|
-
|
|
1680
|
-
|
|
1681
|
-
|
|
1682
|
-
|
|
1683
|
-
|
|
1684
|
-
)
|
|
1685
|
-
|
|
1686
|
-
|
|
1687
|
-
|
|
1688
|
-
|
|
1689
|
-
|
|
1690
|
-
|
|
1691
|
-
|
|
1692
|
-
|
|
1693
|
-
|
|
1694
|
-
|
|
1695
|
-
|
|
1696
|
-
|
|
1697
|
-
host, port, protocol = self._get_proxy_endpoint_overrides(
|
|
1698
|
-
ProxyProduct.DATACENTER
|
|
1699
|
-
)
|
|
1700
|
-
return ProxyConfig(
|
|
1701
|
-
username=u,
|
|
1702
|
-
password=p,
|
|
1703
|
-
product=ProxyProduct.DATACENTER,
|
|
1704
|
-
host=host,
|
|
1705
|
-
port=port,
|
|
1706
|
-
protocol=protocol,
|
|
1707
|
-
)
|
|
1708
|
-
|
|
1709
|
-
u = os.getenv("THORDATA_MOBILE_USERNAME")
|
|
1710
|
-
p = os.getenv("THORDATA_MOBILE_PASSWORD")
|
|
1711
|
-
if u and p:
|
|
1712
|
-
host, port, protocol = self._get_proxy_endpoint_overrides(
|
|
1713
|
-
ProxyProduct.MOBILE
|
|
1714
|
-
)
|
|
1715
|
-
return ProxyConfig(
|
|
1716
|
-
username=u,
|
|
1717
|
-
password=p,
|
|
1718
|
-
product=ProxyProduct.MOBILE,
|
|
1719
|
-
host=host,
|
|
1720
|
-
port=port,
|
|
1721
|
-
protocol=protocol,
|
|
1722
|
-
)
|
|
1723
|
-
|
|
1032
|
+
for prod in [
|
|
1033
|
+
ProxyProduct.RESIDENTIAL,
|
|
1034
|
+
ProxyProduct.DATACENTER,
|
|
1035
|
+
ProxyProduct.MOBILE,
|
|
1036
|
+
]:
|
|
1037
|
+
prefix = prod.value.upper()
|
|
1038
|
+
u = os.getenv(f"THORDATA_{prefix}_USERNAME")
|
|
1039
|
+
p = os.getenv(f"THORDATA_{prefix}_PASSWORD")
|
|
1040
|
+
if u and p:
|
|
1041
|
+
h, port, proto = self._get_proxy_endpoint_overrides(prod)
|
|
1042
|
+
return ProxyConfig(
|
|
1043
|
+
username=u,
|
|
1044
|
+
password=p,
|
|
1045
|
+
product=prod,
|
|
1046
|
+
host=h,
|
|
1047
|
+
port=port,
|
|
1048
|
+
protocol=proto,
|
|
1049
|
+
)
|
|
1724
1050
|
return None
|
|
1725
1051
|
|
|
1726
|
-
def
|
|
1727
|
-
|
|
1728
|
-
|
|
1729
|
-
|
|
1730
|
-
|
|
1731
|
-
|
|
1732
|
-
|
|
1733
|
-
|
|
1052
|
+
def get_browser_connection_url(
|
|
1053
|
+
self, username: str | None = None, password: str | None = None
|
|
1054
|
+
) -> str:
|
|
1055
|
+
user = username or os.getenv("THORDATA_BROWSER_USERNAME")
|
|
1056
|
+
pwd = password or os.getenv("THORDATA_BROWSER_PASSWORD")
|
|
1057
|
+
if not user or not pwd:
|
|
1058
|
+
raise ThordataConfigError("Browser credentials missing.")
|
|
1059
|
+
prefix = "td-customer-"
|
|
1060
|
+
final_user = f"{prefix}{user}" if not user.startswith(prefix) else user
|
|
1061
|
+
|
|
1062
|
+
safe_user = quote(final_user, safe="")
|
|
1063
|
+
safe_pass = quote(pwd, safe="")
|
|
1064
|
+
return f"wss://{safe_user}:{safe_pass}@ws-browser.thordata.com"
|