thordata-sdk 0.2.4__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thordata/__init__.py +151 -0
- thordata/_example_utils.py +77 -0
- thordata/_utils.py +190 -0
- thordata/async_client.py +1675 -0
- thordata/client.py +1644 -0
- thordata/demo.py +138 -0
- thordata/enums.py +384 -0
- thordata/exceptions.py +355 -0
- thordata/models.py +1197 -0
- thordata/retry.py +382 -0
- thordata/serp_engines.py +166 -0
- thordata_sdk-1.2.0.dist-info/METADATA +208 -0
- thordata_sdk-1.2.0.dist-info/RECORD +16 -0
- {thordata_sdk-0.2.4.dist-info → thordata_sdk-1.2.0.dist-info}/WHEEL +1 -1
- thordata_sdk-1.2.0.dist-info/licenses/LICENSE +21 -0
- thordata_sdk-1.2.0.dist-info/top_level.txt +1 -0
- thordata_sdk/__init__.py +0 -9
- thordata_sdk/async_client.py +0 -247
- thordata_sdk/client.py +0 -303
- thordata_sdk/enums.py +0 -20
- thordata_sdk/parameters.py +0 -41
- thordata_sdk-0.2.4.dist-info/LICENSE +0 -201
- thordata_sdk-0.2.4.dist-info/METADATA +0 -113
- thordata_sdk-0.2.4.dist-info/RECORD +0 -10
- thordata_sdk-0.2.4.dist-info/top_level.txt +0 -1
thordata/async_client.py
ADDED
|
@@ -0,0 +1,1675 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Asynchronous client for the Thordata API.
|
|
3
|
+
|
|
4
|
+
This module provides the AsyncThordataClient for high-concurrency workloads,
|
|
5
|
+
built on aiohttp.
|
|
6
|
+
|
|
7
|
+
Example:
|
|
8
|
+
>>> import asyncio
|
|
9
|
+
>>> from thordata import AsyncThordataClient
|
|
10
|
+
>>>
|
|
11
|
+
>>> async def main():
|
|
12
|
+
... async with AsyncThordataClient(
|
|
13
|
+
... scraper_token="your_token",
|
|
14
|
+
... public_token="your_public_token",
|
|
15
|
+
... public_key="your_public_key"
|
|
16
|
+
... ) as client:
|
|
17
|
+
... response = await client.get("https://httpbin.org/ip")
|
|
18
|
+
... print(await response.json())
|
|
19
|
+
>>>
|
|
20
|
+
>>> asyncio.run(main())
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import asyncio
|
|
26
|
+
import logging
|
|
27
|
+
import os
|
|
28
|
+
from datetime import date
|
|
29
|
+
from typing import Any
|
|
30
|
+
|
|
31
|
+
import aiohttp
|
|
32
|
+
|
|
33
|
+
from . import __version__ as _sdk_version
|
|
34
|
+
from ._utils import (
|
|
35
|
+
build_auth_headers,
|
|
36
|
+
build_builder_headers,
|
|
37
|
+
build_public_api_headers,
|
|
38
|
+
build_user_agent,
|
|
39
|
+
decode_base64_image,
|
|
40
|
+
extract_error_message,
|
|
41
|
+
parse_json_response,
|
|
42
|
+
)
|
|
43
|
+
from .enums import Engine, ProxyType
|
|
44
|
+
from .exceptions import (
|
|
45
|
+
ThordataConfigError,
|
|
46
|
+
ThordataNetworkError,
|
|
47
|
+
ThordataTimeoutError,
|
|
48
|
+
raise_for_code,
|
|
49
|
+
)
|
|
50
|
+
from .models import (
|
|
51
|
+
CommonSettings,
|
|
52
|
+
ProxyConfig,
|
|
53
|
+
ProxyProduct,
|
|
54
|
+
ProxyServer,
|
|
55
|
+
ProxyUserList,
|
|
56
|
+
ScraperTaskConfig,
|
|
57
|
+
SerpRequest,
|
|
58
|
+
UniversalScrapeRequest,
|
|
59
|
+
UsageStatistics,
|
|
60
|
+
VideoTaskConfig,
|
|
61
|
+
)
|
|
62
|
+
from .retry import RetryConfig
|
|
63
|
+
from .serp_engines import AsyncSerpNamespace
|
|
64
|
+
|
|
65
|
+
logger = logging.getLogger(__name__)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class AsyncThordataClient:
|
|
69
|
+
"""
|
|
70
|
+
The official asynchronous Python client for Thordata.
|
|
71
|
+
|
|
72
|
+
Designed for high-concurrency AI agents and data pipelines.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
scraper_token: The API token from your Dashboard.
|
|
76
|
+
public_token: The public API token.
|
|
77
|
+
public_key: The public API key.
|
|
78
|
+
proxy_host: Custom proxy gateway host.
|
|
79
|
+
proxy_port: Custom proxy gateway port.
|
|
80
|
+
timeout: Default request timeout in seconds.
|
|
81
|
+
retry_config: Configuration for automatic retries.
|
|
82
|
+
|
|
83
|
+
Example:
|
|
84
|
+
>>> async with AsyncThordataClient(
|
|
85
|
+
... scraper_token="token",
|
|
86
|
+
... public_token="pub_token",
|
|
87
|
+
... public_key="pub_key"
|
|
88
|
+
... ) as client:
|
|
89
|
+
... # Old style
|
|
90
|
+
... results = await client.serp_search("python")
|
|
91
|
+
... # New style (Namespaced)
|
|
92
|
+
... maps_results = await client.serp.google.maps("coffee", "@40.7,-74.0,14z")
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
# API Endpoints (same as sync client)
|
|
96
|
+
BASE_URL = "https://scraperapi.thordata.com"
|
|
97
|
+
UNIVERSAL_URL = "https://universalapi.thordata.com"
|
|
98
|
+
API_URL = "https://openapi.thordata.com/api/web-scraper-api"
|
|
99
|
+
LOCATIONS_URL = "https://openapi.thordata.com/api/locations"
|
|
100
|
+
|
|
101
|
+
def __init__(
|
|
102
|
+
self,
|
|
103
|
+
scraper_token: str | None = None, # Change: Optional
|
|
104
|
+
public_token: str | None = None,
|
|
105
|
+
public_key: str | None = None,
|
|
106
|
+
proxy_host: str = "pr.thordata.net",
|
|
107
|
+
proxy_port: int = 9999,
|
|
108
|
+
timeout: int = 30,
|
|
109
|
+
api_timeout: int = 60,
|
|
110
|
+
retry_config: RetryConfig | None = None,
|
|
111
|
+
auth_mode: str = "bearer",
|
|
112
|
+
scraperapi_base_url: str | None = None,
|
|
113
|
+
universalapi_base_url: str | None = None,
|
|
114
|
+
web_scraper_api_base_url: str | None = None,
|
|
115
|
+
locations_base_url: str | None = None,
|
|
116
|
+
) -> None:
|
|
117
|
+
"""Initialize the Async Thordata Client."""
|
|
118
|
+
|
|
119
|
+
self.scraper_token = scraper_token
|
|
120
|
+
self.public_token = public_token
|
|
121
|
+
self.public_key = public_key
|
|
122
|
+
|
|
123
|
+
# Proxy configuration
|
|
124
|
+
self._proxy_host = proxy_host
|
|
125
|
+
self._proxy_port = proxy_port
|
|
126
|
+
|
|
127
|
+
# Timeout configuration
|
|
128
|
+
self._default_timeout = aiohttp.ClientTimeout(total=timeout)
|
|
129
|
+
self._api_timeout = aiohttp.ClientTimeout(total=api_timeout)
|
|
130
|
+
|
|
131
|
+
# Retry configuration
|
|
132
|
+
self._retry_config = retry_config or RetryConfig()
|
|
133
|
+
|
|
134
|
+
# Authentication mode (for scraping APIs)
|
|
135
|
+
self._auth_mode = auth_mode.lower()
|
|
136
|
+
if self._auth_mode not in ("bearer", "header_token"):
|
|
137
|
+
raise ThordataConfigError(
|
|
138
|
+
f"Invalid auth_mode: {auth_mode}. Must be 'bearer' or 'header_token'."
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
# Base URLs (allow override via args or env vars for testing and custom routing)
|
|
142
|
+
scraperapi_base = (
|
|
143
|
+
scraperapi_base_url
|
|
144
|
+
or os.getenv("THORDATA_SCRAPERAPI_BASE_URL")
|
|
145
|
+
or self.BASE_URL
|
|
146
|
+
).rstrip("/")
|
|
147
|
+
|
|
148
|
+
universalapi_base = (
|
|
149
|
+
universalapi_base_url
|
|
150
|
+
or os.getenv("THORDATA_UNIVERSALAPI_BASE_URL")
|
|
151
|
+
or self.UNIVERSAL_URL
|
|
152
|
+
).rstrip("/")
|
|
153
|
+
|
|
154
|
+
web_scraper_api_base = (
|
|
155
|
+
web_scraper_api_base_url
|
|
156
|
+
or os.getenv("THORDATA_WEB_SCRAPER_API_BASE_URL")
|
|
157
|
+
or self.API_URL
|
|
158
|
+
).rstrip("/")
|
|
159
|
+
|
|
160
|
+
locations_base = (
|
|
161
|
+
locations_base_url
|
|
162
|
+
or os.getenv("THORDATA_LOCATIONS_BASE_URL")
|
|
163
|
+
or self.LOCATIONS_URL
|
|
164
|
+
).rstrip("/")
|
|
165
|
+
|
|
166
|
+
# Keep these env overrides for now
|
|
167
|
+
gateway_base = os.getenv(
|
|
168
|
+
"THORDATA_GATEWAY_BASE_URL", "https://api.thordata.com/api/gateway"
|
|
169
|
+
)
|
|
170
|
+
child_base = os.getenv(
|
|
171
|
+
"THORDATA_CHILD_BASE_URL", "https://api.thordata.com/api/child"
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
self._gateway_base_url = gateway_base
|
|
175
|
+
self._child_base_url = child_base
|
|
176
|
+
|
|
177
|
+
self._serp_url = f"{scraperapi_base}/request"
|
|
178
|
+
self._builder_url = f"{scraperapi_base}/builder"
|
|
179
|
+
self._video_builder_url = f"{scraperapi_base}/video_builder"
|
|
180
|
+
self._universal_url = f"{universalapi_base}/request"
|
|
181
|
+
|
|
182
|
+
self._status_url = f"{web_scraper_api_base}/tasks-status"
|
|
183
|
+
self._download_url = f"{web_scraper_api_base}/tasks-download"
|
|
184
|
+
self._list_url = f"{web_scraper_api_base}/tasks-list"
|
|
185
|
+
|
|
186
|
+
self._locations_base_url = locations_base
|
|
187
|
+
self._usage_stats_url = (
|
|
188
|
+
f"{locations_base.replace('/locations', '')}/account/usage-statistics"
|
|
189
|
+
)
|
|
190
|
+
self._proxy_users_url = (
|
|
191
|
+
f"{locations_base.replace('/locations', '')}/proxy-users"
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
whitelist_base = os.getenv(
|
|
195
|
+
"THORDATA_WHITELIST_BASE_URL", "https://api.thordata.com/api"
|
|
196
|
+
)
|
|
197
|
+
self._whitelist_url = f"{whitelist_base}/whitelisted-ips"
|
|
198
|
+
|
|
199
|
+
proxy_api_base = os.getenv(
|
|
200
|
+
"THORDATA_PROXY_API_BASE_URL", "https://openapi.thordata.com/api"
|
|
201
|
+
)
|
|
202
|
+
self._proxy_list_url = f"{proxy_api_base}/proxy/proxy-list"
|
|
203
|
+
self._proxy_expiration_url = f"{proxy_api_base}/proxy/expiration-time"
|
|
204
|
+
|
|
205
|
+
# Session initialized lazily
|
|
206
|
+
self._session: aiohttp.ClientSession | None = None
|
|
207
|
+
|
|
208
|
+
# Namespaced Access (e.g. client.serp.google.maps(...))
|
|
209
|
+
self.serp = AsyncSerpNamespace(self)
|
|
210
|
+
|
|
211
|
+
async def __aenter__(self) -> AsyncThordataClient:
|
|
212
|
+
"""Async context manager entry."""
|
|
213
|
+
if self._session is None or self._session.closed:
|
|
214
|
+
self._session = aiohttp.ClientSession(
|
|
215
|
+
timeout=self._api_timeout,
|
|
216
|
+
trust_env=True,
|
|
217
|
+
headers={"User-Agent": build_user_agent(_sdk_version, "aiohttp")},
|
|
218
|
+
)
|
|
219
|
+
return self
|
|
220
|
+
|
|
221
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
222
|
+
"""Async context manager exit."""
|
|
223
|
+
await self.close()
|
|
224
|
+
|
|
225
|
+
async def close(self) -> None:
|
|
226
|
+
"""Close the underlying aiohttp session."""
|
|
227
|
+
if self._session and not self._session.closed:
|
|
228
|
+
await self._session.close()
|
|
229
|
+
self._session = None
|
|
230
|
+
|
|
231
|
+
def _get_session(self) -> aiohttp.ClientSession:
|
|
232
|
+
"""Get the session, raising if not initialized."""
|
|
233
|
+
if self._session is None or self._session.closed:
|
|
234
|
+
raise RuntimeError(
|
|
235
|
+
"Client session not initialized. "
|
|
236
|
+
"Use 'async with AsyncThordataClient(...) as client:'"
|
|
237
|
+
)
|
|
238
|
+
return self._session
|
|
239
|
+
|
|
240
|
+
# =========================================================================
|
|
241
|
+
# Proxy Network Methods
|
|
242
|
+
# =========================================================================
|
|
243
|
+
|
|
244
|
+
async def get(
|
|
245
|
+
self,
|
|
246
|
+
url: str,
|
|
247
|
+
*,
|
|
248
|
+
proxy_config: ProxyConfig | None = None,
|
|
249
|
+
**kwargs: Any,
|
|
250
|
+
) -> aiohttp.ClientResponse:
|
|
251
|
+
"""
|
|
252
|
+
Send an async GET request through the Proxy Network.
|
|
253
|
+
|
|
254
|
+
Args:
|
|
255
|
+
url: The target URL.
|
|
256
|
+
proxy_config: Custom proxy configuration.
|
|
257
|
+
**kwargs: Additional aiohttp arguments.
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
The aiohttp response object.
|
|
261
|
+
"""
|
|
262
|
+
session = self._get_session()
|
|
263
|
+
|
|
264
|
+
logger.debug(f"Async Proxy GET: {url}")
|
|
265
|
+
|
|
266
|
+
if proxy_config is None:
|
|
267
|
+
proxy_config = self._get_default_proxy_config_from_env()
|
|
268
|
+
|
|
269
|
+
if proxy_config is None:
|
|
270
|
+
raise ThordataConfigError(
|
|
271
|
+
"Proxy credentials are missing. "
|
|
272
|
+
"Pass proxy_config=ProxyConfig(username=..., password=..., product=...) "
|
|
273
|
+
"or set THORDATA_RESIDENTIAL_USERNAME/THORDATA_RESIDENTIAL_PASSWORD (or DATACENTER/MOBILE)."
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
# aiohttp has limited support for "https://" proxies (TLS to proxy / TLS-in-TLS).
|
|
277
|
+
# Your account's proxy endpoint requires HTTPS proxy, so we explicitly block here
|
|
278
|
+
# to avoid confusing "it always fails" behavior.
|
|
279
|
+
if getattr(proxy_config, "protocol", "http").lower() == "https":
|
|
280
|
+
raise ThordataConfigError(
|
|
281
|
+
"Proxy Network requires an HTTPS proxy endpoint (TLS to proxy) for your account. "
|
|
282
|
+
"aiohttp support for 'https://' proxies is limited and may fail. "
|
|
283
|
+
"Please use ThordataClient.get/post (sync client) for Proxy Network requests."
|
|
284
|
+
)
|
|
285
|
+
proxy_url, proxy_auth = proxy_config.to_aiohttp_config()
|
|
286
|
+
|
|
287
|
+
try:
|
|
288
|
+
return await session.get(
|
|
289
|
+
url, proxy=proxy_url, proxy_auth=proxy_auth, **kwargs
|
|
290
|
+
)
|
|
291
|
+
except asyncio.TimeoutError as e:
|
|
292
|
+
raise ThordataTimeoutError(
|
|
293
|
+
f"Async request timed out: {e}", original_error=e
|
|
294
|
+
) from e
|
|
295
|
+
except aiohttp.ClientError as e:
|
|
296
|
+
raise ThordataNetworkError(
|
|
297
|
+
f"Async request failed: {e}", original_error=e
|
|
298
|
+
) from e
|
|
299
|
+
|
|
300
|
+
async def post(
|
|
301
|
+
self,
|
|
302
|
+
url: str,
|
|
303
|
+
*,
|
|
304
|
+
proxy_config: ProxyConfig | None = None,
|
|
305
|
+
**kwargs: Any,
|
|
306
|
+
) -> aiohttp.ClientResponse:
|
|
307
|
+
"""
|
|
308
|
+
Send an async POST request through the Proxy Network.
|
|
309
|
+
|
|
310
|
+
Args:
|
|
311
|
+
url: The target URL.
|
|
312
|
+
proxy_config: Custom proxy configuration.
|
|
313
|
+
**kwargs: Additional aiohttp arguments.
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
The aiohttp response object.
|
|
317
|
+
"""
|
|
318
|
+
session = self._get_session()
|
|
319
|
+
|
|
320
|
+
logger.debug(f"Async Proxy POST: {url}")
|
|
321
|
+
|
|
322
|
+
if proxy_config is None:
|
|
323
|
+
proxy_config = self._get_default_proxy_config_from_env()
|
|
324
|
+
|
|
325
|
+
if proxy_config is None:
|
|
326
|
+
raise ThordataConfigError(
|
|
327
|
+
"Proxy credentials are missing. "
|
|
328
|
+
"Pass proxy_config=ProxyConfig(username=..., password=..., product=...) "
|
|
329
|
+
"or set THORDATA_RESIDENTIAL_USERNAME/THORDATA_RESIDENTIAL_PASSWORD (or DATACENTER/MOBILE)."
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
# aiohttp has limited support for "https://" proxies (TLS to proxy / TLS-in-TLS).
|
|
333
|
+
# Your account's proxy endpoint requires HTTPS proxy, so we explicitly block here
|
|
334
|
+
# to avoid confusing "it always fails" behavior.
|
|
335
|
+
if getattr(proxy_config, "protocol", "http").lower() == "https":
|
|
336
|
+
raise ThordataConfigError(
|
|
337
|
+
"Proxy Network requires an HTTPS proxy endpoint (TLS to proxy) for your account. "
|
|
338
|
+
"aiohttp support for 'https://' proxies is limited and may fail. "
|
|
339
|
+
"Please use ThordataClient.get/post (sync client) for Proxy Network requests."
|
|
340
|
+
)
|
|
341
|
+
proxy_url, proxy_auth = proxy_config.to_aiohttp_config()
|
|
342
|
+
|
|
343
|
+
try:
|
|
344
|
+
return await session.post(
|
|
345
|
+
url, proxy=proxy_url, proxy_auth=proxy_auth, **kwargs
|
|
346
|
+
)
|
|
347
|
+
except asyncio.TimeoutError as e:
|
|
348
|
+
raise ThordataTimeoutError(
|
|
349
|
+
f"Async request timed out: {e}", original_error=e
|
|
350
|
+
) from e
|
|
351
|
+
except aiohttp.ClientError as e:
|
|
352
|
+
raise ThordataNetworkError(
|
|
353
|
+
f"Async request failed: {e}", original_error=e
|
|
354
|
+
) from e
|
|
355
|
+
|
|
356
|
+
# =========================================================================
|
|
357
|
+
# SERP API Methods
|
|
358
|
+
# =========================================================================
|
|
359
|
+
|
|
360
|
+
async def serp_search(
|
|
361
|
+
self,
|
|
362
|
+
query: str,
|
|
363
|
+
*,
|
|
364
|
+
engine: Engine | str = Engine.GOOGLE,
|
|
365
|
+
num: int = 10,
|
|
366
|
+
country: str | None = None,
|
|
367
|
+
language: str | None = None,
|
|
368
|
+
search_type: str | None = None,
|
|
369
|
+
device: str | None = None,
|
|
370
|
+
render_js: bool | None = None,
|
|
371
|
+
no_cache: bool | None = None,
|
|
372
|
+
output_format: str = "json",
|
|
373
|
+
**kwargs: Any,
|
|
374
|
+
) -> dict[str, Any]:
|
|
375
|
+
"""
|
|
376
|
+
Execute an async SERP search.
|
|
377
|
+
|
|
378
|
+
Args:
|
|
379
|
+
query: Search keywords.
|
|
380
|
+
engine: Search engine.
|
|
381
|
+
num: Number of results.
|
|
382
|
+
country: Country code for localization.
|
|
383
|
+
language: Language code.
|
|
384
|
+
search_type: Type of search.
|
|
385
|
+
device: Device type ('desktop', 'mobile', 'tablet').
|
|
386
|
+
render_js: Enable JavaScript rendering in SERP.
|
|
387
|
+
no_cache: Disable internal caching.
|
|
388
|
+
output_format: 'json' or 'html'.
|
|
389
|
+
**kwargs: Additional parameters.
|
|
390
|
+
|
|
391
|
+
Returns:
|
|
392
|
+
Parsed JSON results or dict with 'html' key.
|
|
393
|
+
"""
|
|
394
|
+
if not self.scraper_token:
|
|
395
|
+
raise ThordataConfigError("scraper_token is required for SERP API")
|
|
396
|
+
|
|
397
|
+
session = self._get_session()
|
|
398
|
+
|
|
399
|
+
engine_str = engine.value if isinstance(engine, Engine) else engine.lower()
|
|
400
|
+
|
|
401
|
+
request = SerpRequest(
|
|
402
|
+
query=query,
|
|
403
|
+
engine=engine_str,
|
|
404
|
+
num=num,
|
|
405
|
+
country=country,
|
|
406
|
+
language=language,
|
|
407
|
+
search_type=search_type,
|
|
408
|
+
device=device,
|
|
409
|
+
render_js=render_js,
|
|
410
|
+
no_cache=no_cache,
|
|
411
|
+
output_format=output_format,
|
|
412
|
+
extra_params=kwargs,
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
payload = request.to_payload()
|
|
416
|
+
token = self.scraper_token or ""
|
|
417
|
+
headers = build_auth_headers(token, mode=self._auth_mode)
|
|
418
|
+
|
|
419
|
+
logger.info(f"Async SERP Search: {engine_str} - {query}")
|
|
420
|
+
|
|
421
|
+
try:
|
|
422
|
+
async with session.post(
|
|
423
|
+
self._serp_url,
|
|
424
|
+
data=payload,
|
|
425
|
+
headers=headers,
|
|
426
|
+
) as response:
|
|
427
|
+
response.raise_for_status()
|
|
428
|
+
|
|
429
|
+
if output_format.lower() == "json":
|
|
430
|
+
data = await response.json()
|
|
431
|
+
|
|
432
|
+
if isinstance(data, dict):
|
|
433
|
+
code = data.get("code")
|
|
434
|
+
if code is not None and code != 200:
|
|
435
|
+
msg = extract_error_message(data)
|
|
436
|
+
raise_for_code(
|
|
437
|
+
f"SERP API Error: {msg}",
|
|
438
|
+
code=code,
|
|
439
|
+
payload=data,
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
return parse_json_response(data)
|
|
443
|
+
|
|
444
|
+
text = await response.text()
|
|
445
|
+
return {"html": text}
|
|
446
|
+
|
|
447
|
+
except asyncio.TimeoutError as e:
|
|
448
|
+
raise ThordataTimeoutError(
|
|
449
|
+
f"SERP request timed out: {e}",
|
|
450
|
+
original_error=e,
|
|
451
|
+
) from e
|
|
452
|
+
except aiohttp.ClientError as e:
|
|
453
|
+
raise ThordataNetworkError(
|
|
454
|
+
f"SERP request failed: {e}",
|
|
455
|
+
original_error=e,
|
|
456
|
+
) from e
|
|
457
|
+
|
|
458
|
+
async def serp_search_advanced(self, request: SerpRequest) -> dict[str, Any]:
|
|
459
|
+
"""
|
|
460
|
+
Execute an async SERP search using a SerpRequest object.
|
|
461
|
+
"""
|
|
462
|
+
session = self._get_session()
|
|
463
|
+
if not self.scraper_token:
|
|
464
|
+
raise ThordataConfigError("scraper_token is required for SERP API")
|
|
465
|
+
|
|
466
|
+
payload = request.to_payload()
|
|
467
|
+
headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
|
|
468
|
+
|
|
469
|
+
logger.info(f"Async SERP Advanced: {request.engine} - {request.query}")
|
|
470
|
+
|
|
471
|
+
try:
|
|
472
|
+
async with session.post(
|
|
473
|
+
self._serp_url,
|
|
474
|
+
data=payload,
|
|
475
|
+
headers=headers,
|
|
476
|
+
) as response:
|
|
477
|
+
response.raise_for_status()
|
|
478
|
+
|
|
479
|
+
if request.output_format.lower() == "json":
|
|
480
|
+
data = await response.json()
|
|
481
|
+
|
|
482
|
+
if isinstance(data, dict):
|
|
483
|
+
code = data.get("code")
|
|
484
|
+
if code is not None and code != 200:
|
|
485
|
+
msg = extract_error_message(data)
|
|
486
|
+
raise_for_code(
|
|
487
|
+
f"SERP API Error: {msg}",
|
|
488
|
+
code=code,
|
|
489
|
+
payload=data,
|
|
490
|
+
)
|
|
491
|
+
|
|
492
|
+
return parse_json_response(data)
|
|
493
|
+
|
|
494
|
+
text = await response.text()
|
|
495
|
+
return {"html": text}
|
|
496
|
+
|
|
497
|
+
except asyncio.TimeoutError as e:
|
|
498
|
+
raise ThordataTimeoutError(
|
|
499
|
+
f"SERP request timed out: {e}",
|
|
500
|
+
original_error=e,
|
|
501
|
+
) from e
|
|
502
|
+
except aiohttp.ClientError as e:
|
|
503
|
+
raise ThordataNetworkError(
|
|
504
|
+
f"SERP request failed: {e}",
|
|
505
|
+
original_error=e,
|
|
506
|
+
) from e
|
|
507
|
+
|
|
508
|
+
# =========================================================================
|
|
509
|
+
# Universal Scraping API Methods
|
|
510
|
+
# =========================================================================
|
|
511
|
+
|
|
512
|
+
async def universal_scrape(
|
|
513
|
+
self,
|
|
514
|
+
url: str,
|
|
515
|
+
*,
|
|
516
|
+
js_render: bool = False,
|
|
517
|
+
output_format: str = "html",
|
|
518
|
+
country: str | None = None,
|
|
519
|
+
block_resources: str | None = None,
|
|
520
|
+
wait: int | None = None,
|
|
521
|
+
wait_for: str | None = None,
|
|
522
|
+
**kwargs: Any,
|
|
523
|
+
) -> str | bytes:
|
|
524
|
+
"""
|
|
525
|
+
Async scrape using Universal API (Web Unlocker).
|
|
526
|
+
|
|
527
|
+
Args:
|
|
528
|
+
url: Target URL.
|
|
529
|
+
js_render: Enable JavaScript rendering.
|
|
530
|
+
output_format: "html" or "png".
|
|
531
|
+
country: Geo-targeting country.
|
|
532
|
+
block_resources: Resources to block.
|
|
533
|
+
wait: Wait time in ms.
|
|
534
|
+
wait_for: CSS selector to wait for.
|
|
535
|
+
|
|
536
|
+
Returns:
|
|
537
|
+
HTML string or PNG bytes.
|
|
538
|
+
"""
|
|
539
|
+
request = UniversalScrapeRequest(
|
|
540
|
+
url=url,
|
|
541
|
+
js_render=js_render,
|
|
542
|
+
output_format=output_format,
|
|
543
|
+
country=country,
|
|
544
|
+
block_resources=block_resources,
|
|
545
|
+
wait=wait,
|
|
546
|
+
wait_for=wait_for,
|
|
547
|
+
extra_params=kwargs,
|
|
548
|
+
)
|
|
549
|
+
|
|
550
|
+
return await self.universal_scrape_advanced(request)
|
|
551
|
+
|
|
552
|
+
async def universal_scrape_advanced(
|
|
553
|
+
self, request: UniversalScrapeRequest
|
|
554
|
+
) -> str | bytes:
|
|
555
|
+
"""
|
|
556
|
+
Async scrape using a UniversalScrapeRequest object.
|
|
557
|
+
"""
|
|
558
|
+
session = self._get_session()
|
|
559
|
+
if not self.scraper_token:
|
|
560
|
+
raise ThordataConfigError("scraper_token is required for Universal API")
|
|
561
|
+
|
|
562
|
+
payload = request.to_payload()
|
|
563
|
+
headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
|
|
564
|
+
|
|
565
|
+
logger.info(f"Async Universal Scrape: {request.url}")
|
|
566
|
+
|
|
567
|
+
try:
|
|
568
|
+
async with session.post(
|
|
569
|
+
self._universal_url, data=payload, headers=headers
|
|
570
|
+
) as response:
|
|
571
|
+
response.raise_for_status()
|
|
572
|
+
|
|
573
|
+
try:
|
|
574
|
+
resp_json = await response.json()
|
|
575
|
+
except ValueError:
|
|
576
|
+
if request.output_format.lower() == "png":
|
|
577
|
+
return await response.read()
|
|
578
|
+
return await response.text()
|
|
579
|
+
|
|
580
|
+
# Check for API errors
|
|
581
|
+
if isinstance(resp_json, dict):
|
|
582
|
+
code = resp_json.get("code")
|
|
583
|
+
if code is not None and code != 200:
|
|
584
|
+
msg = extract_error_message(resp_json)
|
|
585
|
+
raise_for_code(
|
|
586
|
+
f"Universal API Error: {msg}", code=code, payload=resp_json
|
|
587
|
+
)
|
|
588
|
+
|
|
589
|
+
if "html" in resp_json:
|
|
590
|
+
return resp_json["html"]
|
|
591
|
+
|
|
592
|
+
if "png" in resp_json:
|
|
593
|
+
return decode_base64_image(resp_json["png"])
|
|
594
|
+
|
|
595
|
+
return str(resp_json)
|
|
596
|
+
|
|
597
|
+
except asyncio.TimeoutError as e:
|
|
598
|
+
raise ThordataTimeoutError(
|
|
599
|
+
f"Universal scrape timed out: {e}", original_error=e
|
|
600
|
+
) from e
|
|
601
|
+
except aiohttp.ClientError as e:
|
|
602
|
+
raise ThordataNetworkError(
|
|
603
|
+
f"Universal scrape failed: {e}", original_error=e
|
|
604
|
+
) from e
|
|
605
|
+
|
|
606
|
+
# =========================================================================
|
|
607
|
+
# Web Scraper API Methods
|
|
608
|
+
# =========================================================================
|
|
609
|
+
|
|
610
|
+
async def create_scraper_task(
|
|
611
|
+
self,
|
|
612
|
+
file_name: str,
|
|
613
|
+
spider_id: str,
|
|
614
|
+
spider_name: str,
|
|
615
|
+
parameters: dict[str, Any],
|
|
616
|
+
universal_params: dict[str, Any] | None = None,
|
|
617
|
+
) -> str:
|
|
618
|
+
"""
|
|
619
|
+
Create an async Web Scraper task.
|
|
620
|
+
"""
|
|
621
|
+
config = ScraperTaskConfig(
|
|
622
|
+
file_name=file_name,
|
|
623
|
+
spider_id=spider_id,
|
|
624
|
+
spider_name=spider_name,
|
|
625
|
+
parameters=parameters,
|
|
626
|
+
universal_params=universal_params,
|
|
627
|
+
)
|
|
628
|
+
|
|
629
|
+
return await self.create_scraper_task_advanced(config)
|
|
630
|
+
|
|
631
|
+
async def create_scraper_task_advanced(self, config: ScraperTaskConfig) -> str:
|
|
632
|
+
"""
|
|
633
|
+
Create a task using ScraperTaskConfig.
|
|
634
|
+
"""
|
|
635
|
+
self._require_public_credentials()
|
|
636
|
+
session = self._get_session()
|
|
637
|
+
if not self.scraper_token:
|
|
638
|
+
raise ThordataConfigError("scraper_token is required for Task Builder")
|
|
639
|
+
|
|
640
|
+
payload = config.to_payload()
|
|
641
|
+
# Builder needs 3 headers: token, key, Authorization Bearer
|
|
642
|
+
headers = build_builder_headers(
|
|
643
|
+
self.scraper_token,
|
|
644
|
+
self.public_token or "",
|
|
645
|
+
self.public_key or "",
|
|
646
|
+
)
|
|
647
|
+
|
|
648
|
+
logger.info(f"Async Task Creation: {config.spider_name}")
|
|
649
|
+
|
|
650
|
+
try:
|
|
651
|
+
async with session.post(
|
|
652
|
+
self._builder_url, data=payload, headers=headers
|
|
653
|
+
) as response:
|
|
654
|
+
response.raise_for_status()
|
|
655
|
+
data = await response.json()
|
|
656
|
+
|
|
657
|
+
code = data.get("code")
|
|
658
|
+
if code != 200:
|
|
659
|
+
msg = extract_error_message(data)
|
|
660
|
+
raise_for_code(
|
|
661
|
+
f"Task creation failed: {msg}", code=code, payload=data
|
|
662
|
+
)
|
|
663
|
+
|
|
664
|
+
return data["data"]["task_id"]
|
|
665
|
+
|
|
666
|
+
except aiohttp.ClientError as e:
|
|
667
|
+
raise ThordataNetworkError(
|
|
668
|
+
f"Task creation failed: {e}", original_error=e
|
|
669
|
+
) from e
|
|
670
|
+
|
|
671
|
+
async def create_video_task(
|
|
672
|
+
self,
|
|
673
|
+
file_name: str,
|
|
674
|
+
spider_id: str,
|
|
675
|
+
spider_name: str,
|
|
676
|
+
parameters: dict[str, Any],
|
|
677
|
+
common_settings: CommonSettings,
|
|
678
|
+
) -> str:
|
|
679
|
+
"""
|
|
680
|
+
Create a YouTube video/audio download task.
|
|
681
|
+
"""
|
|
682
|
+
|
|
683
|
+
config = VideoTaskConfig(
|
|
684
|
+
file_name=file_name,
|
|
685
|
+
spider_id=spider_id,
|
|
686
|
+
spider_name=spider_name,
|
|
687
|
+
parameters=parameters,
|
|
688
|
+
common_settings=common_settings,
|
|
689
|
+
)
|
|
690
|
+
|
|
691
|
+
return await self.create_video_task_advanced(config)
|
|
692
|
+
|
|
693
|
+
async def create_video_task_advanced(self, config: VideoTaskConfig) -> str:
|
|
694
|
+
"""
|
|
695
|
+
Create a video task using VideoTaskConfig object.
|
|
696
|
+
"""
|
|
697
|
+
|
|
698
|
+
self._require_public_credentials()
|
|
699
|
+
session = self._get_session()
|
|
700
|
+
if not self.scraper_token:
|
|
701
|
+
raise ThordataConfigError(
|
|
702
|
+
"scraper_token is required for Video Task Builder"
|
|
703
|
+
)
|
|
704
|
+
|
|
705
|
+
payload = config.to_payload()
|
|
706
|
+
headers = build_builder_headers(
|
|
707
|
+
self.scraper_token,
|
|
708
|
+
self.public_token or "",
|
|
709
|
+
self.public_key or "",
|
|
710
|
+
)
|
|
711
|
+
|
|
712
|
+
logger.info(
|
|
713
|
+
f"Async Video Task Creation: {config.spider_name} - {config.spider_id}"
|
|
714
|
+
)
|
|
715
|
+
|
|
716
|
+
try:
|
|
717
|
+
async with session.post(
|
|
718
|
+
self._video_builder_url,
|
|
719
|
+
data=payload,
|
|
720
|
+
headers=headers,
|
|
721
|
+
timeout=self._api_timeout,
|
|
722
|
+
) as response:
|
|
723
|
+
response.raise_for_status()
|
|
724
|
+
data = await response.json()
|
|
725
|
+
|
|
726
|
+
code = data.get("code")
|
|
727
|
+
if code != 200:
|
|
728
|
+
msg = extract_error_message(data)
|
|
729
|
+
raise_for_code(
|
|
730
|
+
f"Video task creation failed: {msg}", code=code, payload=data
|
|
731
|
+
)
|
|
732
|
+
|
|
733
|
+
return data["data"]["task_id"]
|
|
734
|
+
|
|
735
|
+
except asyncio.TimeoutError as e:
|
|
736
|
+
raise ThordataTimeoutError(
|
|
737
|
+
f"Video task creation timed out: {e}", original_error=e
|
|
738
|
+
) from e
|
|
739
|
+
except aiohttp.ClientError as e:
|
|
740
|
+
raise ThordataNetworkError(
|
|
741
|
+
f"Video task creation failed: {e}", original_error=e
|
|
742
|
+
) from e
|
|
743
|
+
|
|
744
|
+
async def get_task_status(self, task_id: str) -> str:
|
|
745
|
+
"""
|
|
746
|
+
Check async task status.
|
|
747
|
+
|
|
748
|
+
Raises:
|
|
749
|
+
ThordataConfigError: If public credentials are missing.
|
|
750
|
+
ThordataAPIError: If API returns a non-200 code in JSON payload.
|
|
751
|
+
ThordataNetworkError: If network/HTTP request fails.
|
|
752
|
+
"""
|
|
753
|
+
self._require_public_credentials()
|
|
754
|
+
session = self._get_session()
|
|
755
|
+
|
|
756
|
+
headers = build_public_api_headers(
|
|
757
|
+
self.public_token or "", self.public_key or ""
|
|
758
|
+
)
|
|
759
|
+
payload = {"tasks_ids": task_id}
|
|
760
|
+
|
|
761
|
+
try:
|
|
762
|
+
async with session.post(
|
|
763
|
+
self._status_url, data=payload, headers=headers
|
|
764
|
+
) as response:
|
|
765
|
+
response.raise_for_status()
|
|
766
|
+
data = await response.json()
|
|
767
|
+
|
|
768
|
+
if isinstance(data, dict):
|
|
769
|
+
code = data.get("code")
|
|
770
|
+
if code is not None and code != 200:
|
|
771
|
+
msg = extract_error_message(data)
|
|
772
|
+
raise_for_code(
|
|
773
|
+
f"Task status API Error: {msg}",
|
|
774
|
+
code=code,
|
|
775
|
+
payload=data,
|
|
776
|
+
)
|
|
777
|
+
|
|
778
|
+
items = data.get("data") or []
|
|
779
|
+
for item in items:
|
|
780
|
+
if str(item.get("task_id")) == str(task_id):
|
|
781
|
+
return item.get("status", "unknown")
|
|
782
|
+
|
|
783
|
+
return "unknown"
|
|
784
|
+
|
|
785
|
+
raise ThordataNetworkError(
|
|
786
|
+
f"Unexpected task status response type: {type(data).__name__}",
|
|
787
|
+
original_error=None,
|
|
788
|
+
)
|
|
789
|
+
|
|
790
|
+
except asyncio.TimeoutError as e:
|
|
791
|
+
raise ThordataTimeoutError(
|
|
792
|
+
f"Async status check timed out: {e}", original_error=e
|
|
793
|
+
) from e
|
|
794
|
+
except aiohttp.ClientError as e:
|
|
795
|
+
raise ThordataNetworkError(
|
|
796
|
+
f"Async status check failed: {e}", original_error=e
|
|
797
|
+
) from e
|
|
798
|
+
|
|
799
|
+
async def safe_get_task_status(self, task_id: str) -> str:
|
|
800
|
+
"""
|
|
801
|
+
Backward-compatible status check.
|
|
802
|
+
|
|
803
|
+
Returns:
|
|
804
|
+
Status string, or "error" on any exception.
|
|
805
|
+
"""
|
|
806
|
+
try:
|
|
807
|
+
return await self.get_task_status(task_id)
|
|
808
|
+
except Exception:
|
|
809
|
+
return "error"
|
|
810
|
+
|
|
811
|
+
async def get_task_result(self, task_id: str, file_type: str = "json") -> str:
|
|
812
|
+
"""
|
|
813
|
+
Get download URL for completed task.
|
|
814
|
+
"""
|
|
815
|
+
self._require_public_credentials()
|
|
816
|
+
session = self._get_session()
|
|
817
|
+
|
|
818
|
+
headers = build_public_api_headers(
|
|
819
|
+
self.public_token or "", self.public_key or ""
|
|
820
|
+
)
|
|
821
|
+
payload = {"tasks_id": task_id, "type": file_type}
|
|
822
|
+
|
|
823
|
+
logger.info(f"Async getting result for Task: {task_id}")
|
|
824
|
+
|
|
825
|
+
try:
|
|
826
|
+
async with session.post(
|
|
827
|
+
self._download_url, data=payload, headers=headers
|
|
828
|
+
) as response:
|
|
829
|
+
data = await response.json()
|
|
830
|
+
code = data.get("code")
|
|
831
|
+
|
|
832
|
+
if code == 200 and data.get("data"):
|
|
833
|
+
return data["data"]["download"]
|
|
834
|
+
|
|
835
|
+
msg = extract_error_message(data)
|
|
836
|
+
raise_for_code(f"Get result failed: {msg}", code=code, payload=data)
|
|
837
|
+
# This line won't be reached, but satisfies mypy
|
|
838
|
+
raise RuntimeError("Unexpected state")
|
|
839
|
+
|
|
840
|
+
except aiohttp.ClientError as e:
|
|
841
|
+
raise ThordataNetworkError(
|
|
842
|
+
f"Get result failed: {e}", original_error=e
|
|
843
|
+
) from e
|
|
844
|
+
|
|
845
|
+
async def list_tasks(
|
|
846
|
+
self,
|
|
847
|
+
page: int = 1,
|
|
848
|
+
size: int = 20,
|
|
849
|
+
) -> dict[str, Any]:
|
|
850
|
+
"""
|
|
851
|
+
List all Web Scraper tasks.
|
|
852
|
+
|
|
853
|
+
Args:
|
|
854
|
+
page: Page number (starts from 1).
|
|
855
|
+
size: Number of tasks per page.
|
|
856
|
+
|
|
857
|
+
Returns:
|
|
858
|
+
Dict containing 'count' and 'list' of tasks.
|
|
859
|
+
"""
|
|
860
|
+
self._require_public_credentials()
|
|
861
|
+
session = self._get_session()
|
|
862
|
+
|
|
863
|
+
headers = build_public_api_headers(
|
|
864
|
+
self.public_token or "", self.public_key or ""
|
|
865
|
+
)
|
|
866
|
+
payload: dict[str, Any] = {}
|
|
867
|
+
if page:
|
|
868
|
+
payload["page"] = str(page)
|
|
869
|
+
if size:
|
|
870
|
+
payload["size"] = str(size)
|
|
871
|
+
|
|
872
|
+
logger.info(f"Async listing tasks: page={page}, size={size}")
|
|
873
|
+
|
|
874
|
+
try:
|
|
875
|
+
async with session.post(
|
|
876
|
+
self._list_url,
|
|
877
|
+
data=payload,
|
|
878
|
+
headers=headers,
|
|
879
|
+
timeout=self._api_timeout,
|
|
880
|
+
) as response:
|
|
881
|
+
response.raise_for_status()
|
|
882
|
+
data = await response.json()
|
|
883
|
+
|
|
884
|
+
code = data.get("code")
|
|
885
|
+
if code != 200:
|
|
886
|
+
msg = extract_error_message(data)
|
|
887
|
+
raise_for_code(f"List tasks failed: {msg}", code=code, payload=data)
|
|
888
|
+
|
|
889
|
+
return data.get("data", {"count": 0, "list": []})
|
|
890
|
+
|
|
891
|
+
except asyncio.TimeoutError as e:
|
|
892
|
+
raise ThordataTimeoutError(
|
|
893
|
+
f"List tasks timed out: {e}", original_error=e
|
|
894
|
+
) from e
|
|
895
|
+
except aiohttp.ClientError as e:
|
|
896
|
+
raise ThordataNetworkError(
|
|
897
|
+
f"List tasks failed: {e}", original_error=e
|
|
898
|
+
) from e
|
|
899
|
+
|
|
900
|
+
async def wait_for_task(
|
|
901
|
+
self,
|
|
902
|
+
task_id: str,
|
|
903
|
+
*,
|
|
904
|
+
poll_interval: float = 5.0,
|
|
905
|
+
max_wait: float = 600.0,
|
|
906
|
+
) -> str:
|
|
907
|
+
"""
|
|
908
|
+
Wait for a task to complete.
|
|
909
|
+
"""
|
|
910
|
+
|
|
911
|
+
import time
|
|
912
|
+
|
|
913
|
+
start = time.monotonic()
|
|
914
|
+
|
|
915
|
+
while (time.monotonic() - start) < max_wait:
|
|
916
|
+
status = await self.get_task_status(task_id)
|
|
917
|
+
|
|
918
|
+
logger.debug(f"Task {task_id} status: {status}")
|
|
919
|
+
|
|
920
|
+
terminal_statuses = {
|
|
921
|
+
"ready",
|
|
922
|
+
"success",
|
|
923
|
+
"finished",
|
|
924
|
+
"failed",
|
|
925
|
+
"error",
|
|
926
|
+
"cancelled",
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
if status.lower() in terminal_statuses:
|
|
930
|
+
return status
|
|
931
|
+
|
|
932
|
+
await asyncio.sleep(poll_interval)
|
|
933
|
+
|
|
934
|
+
raise TimeoutError(f"Task {task_id} did not complete within {max_wait} seconds")
|
|
935
|
+
|
|
936
|
+
# =========================================================================
|
|
937
|
+
# Proxy Account Management Methods
|
|
938
|
+
# =========================================================================
|
|
939
|
+
|
|
940
|
+
async def get_usage_statistics(
|
|
941
|
+
self,
|
|
942
|
+
from_date: str | date,
|
|
943
|
+
to_date: str | date,
|
|
944
|
+
) -> UsageStatistics:
|
|
945
|
+
"""
|
|
946
|
+
Get account usage statistics for a date range.
|
|
947
|
+
|
|
948
|
+
Args:
|
|
949
|
+
from_date: Start date (YYYY-MM-DD string or date object).
|
|
950
|
+
to_date: End date (YYYY-MM-DD string or date object).
|
|
951
|
+
|
|
952
|
+
Returns:
|
|
953
|
+
UsageStatistics object with traffic data.
|
|
954
|
+
"""
|
|
955
|
+
|
|
956
|
+
self._require_public_credentials()
|
|
957
|
+
session = self._get_session()
|
|
958
|
+
|
|
959
|
+
# Convert dates to strings
|
|
960
|
+
if isinstance(from_date, date):
|
|
961
|
+
from_date = from_date.strftime("%Y-%m-%d")
|
|
962
|
+
if isinstance(to_date, date):
|
|
963
|
+
to_date = to_date.strftime("%Y-%m-%d")
|
|
964
|
+
|
|
965
|
+
params = {
|
|
966
|
+
"token": self.public_token,
|
|
967
|
+
"key": self.public_key,
|
|
968
|
+
"from_date": from_date,
|
|
969
|
+
"to_date": to_date,
|
|
970
|
+
}
|
|
971
|
+
|
|
972
|
+
logger.info(f"Async getting usage statistics: {from_date} to {to_date}")
|
|
973
|
+
|
|
974
|
+
try:
|
|
975
|
+
async with session.get(
|
|
976
|
+
self._usage_stats_url,
|
|
977
|
+
params=params,
|
|
978
|
+
timeout=self._api_timeout,
|
|
979
|
+
) as response:
|
|
980
|
+
response.raise_for_status()
|
|
981
|
+
data = await response.json()
|
|
982
|
+
|
|
983
|
+
if isinstance(data, dict):
|
|
984
|
+
code = data.get("code")
|
|
985
|
+
if code is not None and code != 200:
|
|
986
|
+
msg = extract_error_message(data)
|
|
987
|
+
raise_for_code(
|
|
988
|
+
f"Usage statistics error: {msg}",
|
|
989
|
+
code=code,
|
|
990
|
+
payload=data,
|
|
991
|
+
)
|
|
992
|
+
|
|
993
|
+
usage_data = data.get("data", data)
|
|
994
|
+
return UsageStatistics.from_dict(usage_data)
|
|
995
|
+
|
|
996
|
+
raise ThordataNetworkError(
|
|
997
|
+
f"Unexpected usage statistics response: {type(data).__name__}",
|
|
998
|
+
original_error=None,
|
|
999
|
+
)
|
|
1000
|
+
|
|
1001
|
+
except asyncio.TimeoutError as e:
|
|
1002
|
+
raise ThordataTimeoutError(
|
|
1003
|
+
f"Usage statistics timed out: {e}", original_error=e
|
|
1004
|
+
) from e
|
|
1005
|
+
except aiohttp.ClientError as e:
|
|
1006
|
+
raise ThordataNetworkError(
|
|
1007
|
+
f"Usage statistics failed: {e}", original_error=e
|
|
1008
|
+
) from e
|
|
1009
|
+
|
|
1010
|
+
async def get_residential_balance(self) -> dict[str, Any]:
|
|
1011
|
+
"""
|
|
1012
|
+
Get residential proxy balance.
|
|
1013
|
+
|
|
1014
|
+
Uses public_token/public_key.
|
|
1015
|
+
"""
|
|
1016
|
+
session = self._get_session()
|
|
1017
|
+
headers = self._build_gateway_headers()
|
|
1018
|
+
|
|
1019
|
+
logger.info("Async getting residential proxy balance")
|
|
1020
|
+
|
|
1021
|
+
try:
|
|
1022
|
+
async with session.post(
|
|
1023
|
+
f"{self._gateway_base_url}/getFlowBalance",
|
|
1024
|
+
headers=headers,
|
|
1025
|
+
data={},
|
|
1026
|
+
timeout=self._api_timeout,
|
|
1027
|
+
) as response:
|
|
1028
|
+
response.raise_for_status()
|
|
1029
|
+
data = await response.json()
|
|
1030
|
+
|
|
1031
|
+
code = data.get("code")
|
|
1032
|
+
if code != 200:
|
|
1033
|
+
msg = extract_error_message(data)
|
|
1034
|
+
raise_for_code(
|
|
1035
|
+
f"Get balance failed: {msg}", code=code, payload=data
|
|
1036
|
+
)
|
|
1037
|
+
|
|
1038
|
+
return data.get("data", {})
|
|
1039
|
+
|
|
1040
|
+
except asyncio.TimeoutError as e:
|
|
1041
|
+
raise ThordataTimeoutError(
|
|
1042
|
+
f"Get balance timed out: {e}", original_error=e
|
|
1043
|
+
) from e
|
|
1044
|
+
except aiohttp.ClientError as e:
|
|
1045
|
+
raise ThordataNetworkError(
|
|
1046
|
+
f"Get balance failed: {e}", original_error=e
|
|
1047
|
+
) from e
|
|
1048
|
+
|
|
1049
|
+
async def get_residential_usage(
|
|
1050
|
+
self,
|
|
1051
|
+
start_time: str | int,
|
|
1052
|
+
end_time: str | int,
|
|
1053
|
+
) -> dict[str, Any]:
|
|
1054
|
+
"""
|
|
1055
|
+
Get residential proxy usage records.
|
|
1056
|
+
|
|
1057
|
+
Uses public_token/public_key.
|
|
1058
|
+
"""
|
|
1059
|
+
session = self._get_session()
|
|
1060
|
+
headers = self._build_gateway_headers()
|
|
1061
|
+
payload = {"start_time": str(start_time), "end_time": str(end_time)}
|
|
1062
|
+
|
|
1063
|
+
logger.info(f"Async getting residential usage: {start_time} to {end_time}")
|
|
1064
|
+
|
|
1065
|
+
try:
|
|
1066
|
+
async with session.post(
|
|
1067
|
+
f"{self._gateway_base_url}/usageRecord",
|
|
1068
|
+
headers=headers,
|
|
1069
|
+
data=payload,
|
|
1070
|
+
timeout=self._api_timeout,
|
|
1071
|
+
) as response:
|
|
1072
|
+
response.raise_for_status()
|
|
1073
|
+
data = await response.json()
|
|
1074
|
+
|
|
1075
|
+
code = data.get("code")
|
|
1076
|
+
if code != 200:
|
|
1077
|
+
msg = extract_error_message(data)
|
|
1078
|
+
raise_for_code(f"Get usage failed: {msg}", code=code, payload=data)
|
|
1079
|
+
|
|
1080
|
+
return data.get("data", {})
|
|
1081
|
+
|
|
1082
|
+
except asyncio.TimeoutError as e:
|
|
1083
|
+
raise ThordataTimeoutError(
|
|
1084
|
+
f"Get usage timed out: {e}", original_error=e
|
|
1085
|
+
) from e
|
|
1086
|
+
except aiohttp.ClientError as e:
|
|
1087
|
+
raise ThordataNetworkError(
|
|
1088
|
+
f"Get usage failed: {e}", original_error=e
|
|
1089
|
+
) from e
|
|
1090
|
+
|
|
1091
|
+
async def list_proxy_users(
|
|
1092
|
+
self, proxy_type: ProxyType | int = ProxyType.RESIDENTIAL
|
|
1093
|
+
) -> ProxyUserList:
|
|
1094
|
+
"""List all proxy users (sub-accounts)."""
|
|
1095
|
+
|
|
1096
|
+
self._require_public_credentials()
|
|
1097
|
+
session = self._get_session()
|
|
1098
|
+
|
|
1099
|
+
params = {
|
|
1100
|
+
"token": self.public_token,
|
|
1101
|
+
"key": self.public_key,
|
|
1102
|
+
"proxy_type": str(
|
|
1103
|
+
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1104
|
+
),
|
|
1105
|
+
}
|
|
1106
|
+
|
|
1107
|
+
logger.info(f"Async listing proxy users: type={params['proxy_type']}")
|
|
1108
|
+
|
|
1109
|
+
try:
|
|
1110
|
+
async with session.get(
|
|
1111
|
+
f"{self._proxy_users_url}/user-list",
|
|
1112
|
+
params=params,
|
|
1113
|
+
timeout=self._api_timeout,
|
|
1114
|
+
) as response:
|
|
1115
|
+
response.raise_for_status()
|
|
1116
|
+
data = await response.json()
|
|
1117
|
+
|
|
1118
|
+
if isinstance(data, dict):
|
|
1119
|
+
code = data.get("code")
|
|
1120
|
+
if code is not None and code != 200:
|
|
1121
|
+
msg = extract_error_message(data)
|
|
1122
|
+
raise_for_code(
|
|
1123
|
+
f"List proxy users error: {msg}", code=code, payload=data
|
|
1124
|
+
)
|
|
1125
|
+
|
|
1126
|
+
user_data = data.get("data", data)
|
|
1127
|
+
return ProxyUserList.from_dict(user_data)
|
|
1128
|
+
|
|
1129
|
+
raise ThordataNetworkError(
|
|
1130
|
+
f"Unexpected proxy users response: {type(data).__name__}",
|
|
1131
|
+
original_error=None,
|
|
1132
|
+
)
|
|
1133
|
+
|
|
1134
|
+
except asyncio.TimeoutError as e:
|
|
1135
|
+
raise ThordataTimeoutError(
|
|
1136
|
+
f"List users timed out: {e}", original_error=e
|
|
1137
|
+
) from e
|
|
1138
|
+
except aiohttp.ClientError as e:
|
|
1139
|
+
raise ThordataNetworkError(
|
|
1140
|
+
f"List users failed: {e}", original_error=e
|
|
1141
|
+
) from e
|
|
1142
|
+
|
|
1143
|
+
async def create_proxy_user(
|
|
1144
|
+
self,
|
|
1145
|
+
username: str,
|
|
1146
|
+
password: str,
|
|
1147
|
+
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
1148
|
+
traffic_limit: int = 0,
|
|
1149
|
+
status: bool = True,
|
|
1150
|
+
) -> dict[str, Any]:
|
|
1151
|
+
"""Create a new proxy user (sub-account)."""
|
|
1152
|
+
self._require_public_credentials()
|
|
1153
|
+
session = self._get_session()
|
|
1154
|
+
|
|
1155
|
+
headers = build_public_api_headers(
|
|
1156
|
+
self.public_token or "", self.public_key or ""
|
|
1157
|
+
)
|
|
1158
|
+
|
|
1159
|
+
payload = {
|
|
1160
|
+
"proxy_type": str(
|
|
1161
|
+
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1162
|
+
),
|
|
1163
|
+
"username": username,
|
|
1164
|
+
"password": password,
|
|
1165
|
+
"traffic_limit": str(traffic_limit),
|
|
1166
|
+
"status": "true" if status else "false",
|
|
1167
|
+
}
|
|
1168
|
+
|
|
1169
|
+
logger.info(f"Async creating proxy user: {username}")
|
|
1170
|
+
|
|
1171
|
+
try:
|
|
1172
|
+
async with session.post(
|
|
1173
|
+
f"{self._proxy_users_url}/create-user",
|
|
1174
|
+
data=payload,
|
|
1175
|
+
headers=headers,
|
|
1176
|
+
timeout=self._api_timeout,
|
|
1177
|
+
) as response:
|
|
1178
|
+
response.raise_for_status()
|
|
1179
|
+
data = await response.json()
|
|
1180
|
+
|
|
1181
|
+
code = data.get("code")
|
|
1182
|
+
if code != 200:
|
|
1183
|
+
msg = extract_error_message(data)
|
|
1184
|
+
raise_for_code(
|
|
1185
|
+
f"Create proxy user failed: {msg}", code=code, payload=data
|
|
1186
|
+
)
|
|
1187
|
+
|
|
1188
|
+
return data.get("data", {})
|
|
1189
|
+
|
|
1190
|
+
except asyncio.TimeoutError as e:
|
|
1191
|
+
raise ThordataTimeoutError(
|
|
1192
|
+
f"Create user timed out: {e}", original_error=e
|
|
1193
|
+
) from e
|
|
1194
|
+
except aiohttp.ClientError as e:
|
|
1195
|
+
raise ThordataNetworkError(
|
|
1196
|
+
f"Create user failed: {e}", original_error=e
|
|
1197
|
+
) from e
|
|
1198
|
+
|
|
1199
|
+
async def add_whitelist_ip(
|
|
1200
|
+
self,
|
|
1201
|
+
ip: str,
|
|
1202
|
+
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
1203
|
+
status: bool = True,
|
|
1204
|
+
) -> dict[str, Any]:
|
|
1205
|
+
"""
|
|
1206
|
+
Add an IP to the whitelist for IP authentication.
|
|
1207
|
+
"""
|
|
1208
|
+
self._require_public_credentials()
|
|
1209
|
+
session = self._get_session()
|
|
1210
|
+
|
|
1211
|
+
headers = build_public_api_headers(
|
|
1212
|
+
self.public_token or "", self.public_key or ""
|
|
1213
|
+
)
|
|
1214
|
+
|
|
1215
|
+
proxy_type_int = (
|
|
1216
|
+
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1217
|
+
)
|
|
1218
|
+
|
|
1219
|
+
payload = {
|
|
1220
|
+
"proxy_type": str(proxy_type_int),
|
|
1221
|
+
"ip": ip,
|
|
1222
|
+
"status": "true" if status else "false",
|
|
1223
|
+
}
|
|
1224
|
+
|
|
1225
|
+
logger.info(f"Async adding whitelist IP: {ip}")
|
|
1226
|
+
|
|
1227
|
+
try:
|
|
1228
|
+
async with session.post(
|
|
1229
|
+
f"{self._whitelist_url}/add-ip",
|
|
1230
|
+
data=payload,
|
|
1231
|
+
headers=headers,
|
|
1232
|
+
timeout=self._api_timeout,
|
|
1233
|
+
) as response:
|
|
1234
|
+
response.raise_for_status()
|
|
1235
|
+
data = await response.json()
|
|
1236
|
+
|
|
1237
|
+
code = data.get("code")
|
|
1238
|
+
if code != 200:
|
|
1239
|
+
msg = extract_error_message(data)
|
|
1240
|
+
raise_for_code(
|
|
1241
|
+
f"Add whitelist IP failed: {msg}", code=code, payload=data
|
|
1242
|
+
)
|
|
1243
|
+
|
|
1244
|
+
return data.get("data", {})
|
|
1245
|
+
|
|
1246
|
+
except asyncio.TimeoutError as e:
|
|
1247
|
+
raise ThordataTimeoutError(
|
|
1248
|
+
f"Add whitelist timed out: {e}", original_error=e
|
|
1249
|
+
) from e
|
|
1250
|
+
except aiohttp.ClientError as e:
|
|
1251
|
+
raise ThordataNetworkError(
|
|
1252
|
+
f"Add whitelist failed: {e}", original_error=e
|
|
1253
|
+
) from e
|
|
1254
|
+
|
|
1255
|
+
async def list_proxy_servers(
|
|
1256
|
+
self,
|
|
1257
|
+
proxy_type: int,
|
|
1258
|
+
) -> list[ProxyServer]:
|
|
1259
|
+
"""
|
|
1260
|
+
List ISP or Datacenter proxy servers.
|
|
1261
|
+
"""
|
|
1262
|
+
|
|
1263
|
+
self._require_public_credentials()
|
|
1264
|
+
session = self._get_session()
|
|
1265
|
+
|
|
1266
|
+
params = {
|
|
1267
|
+
"token": self.public_token,
|
|
1268
|
+
"key": self.public_key,
|
|
1269
|
+
"proxy_type": str(proxy_type),
|
|
1270
|
+
}
|
|
1271
|
+
|
|
1272
|
+
logger.info(f"Async listing proxy servers: type={proxy_type}")
|
|
1273
|
+
|
|
1274
|
+
try:
|
|
1275
|
+
async with session.get(
|
|
1276
|
+
self._proxy_list_url,
|
|
1277
|
+
params=params,
|
|
1278
|
+
timeout=self._api_timeout,
|
|
1279
|
+
) as response:
|
|
1280
|
+
response.raise_for_status()
|
|
1281
|
+
data = await response.json()
|
|
1282
|
+
|
|
1283
|
+
if isinstance(data, dict):
|
|
1284
|
+
code = data.get("code")
|
|
1285
|
+
if code is not None and code != 200:
|
|
1286
|
+
msg = extract_error_message(data)
|
|
1287
|
+
raise_for_code(
|
|
1288
|
+
f"List proxy servers error: {msg}", code=code, payload=data
|
|
1289
|
+
)
|
|
1290
|
+
|
|
1291
|
+
server_list = data.get("data", data.get("list", []))
|
|
1292
|
+
elif isinstance(data, list):
|
|
1293
|
+
server_list = data
|
|
1294
|
+
else:
|
|
1295
|
+
raise ThordataNetworkError(
|
|
1296
|
+
f"Unexpected proxy list response: {type(data).__name__}",
|
|
1297
|
+
original_error=None,
|
|
1298
|
+
)
|
|
1299
|
+
|
|
1300
|
+
return [ProxyServer.from_dict(s) for s in server_list]
|
|
1301
|
+
|
|
1302
|
+
except asyncio.TimeoutError as e:
|
|
1303
|
+
raise ThordataTimeoutError(
|
|
1304
|
+
f"List servers timed out: {e}", original_error=e
|
|
1305
|
+
) from e
|
|
1306
|
+
except aiohttp.ClientError as e:
|
|
1307
|
+
raise ThordataNetworkError(
|
|
1308
|
+
f"List servers failed: {e}", original_error=e
|
|
1309
|
+
) from e
|
|
1310
|
+
|
|
1311
|
+
async def get_isp_regions(self) -> list[dict[str, Any]]:
|
|
1312
|
+
"""
|
|
1313
|
+
Get available ISP proxy regions.
|
|
1314
|
+
|
|
1315
|
+
Uses public_token/public_key.
|
|
1316
|
+
"""
|
|
1317
|
+
session = self._get_session()
|
|
1318
|
+
headers = self._build_gateway_headers()
|
|
1319
|
+
|
|
1320
|
+
logger.info("Async getting ISP regions")
|
|
1321
|
+
|
|
1322
|
+
try:
|
|
1323
|
+
async with session.post(
|
|
1324
|
+
f"{self._gateway_base_url}/getRegionIsp",
|
|
1325
|
+
headers=headers,
|
|
1326
|
+
data={},
|
|
1327
|
+
timeout=self._api_timeout,
|
|
1328
|
+
) as response:
|
|
1329
|
+
response.raise_for_status()
|
|
1330
|
+
data = await response.json()
|
|
1331
|
+
|
|
1332
|
+
code = data.get("code")
|
|
1333
|
+
if code != 200:
|
|
1334
|
+
msg = extract_error_message(data)
|
|
1335
|
+
raise_for_code(
|
|
1336
|
+
f"Get ISP regions failed: {msg}", code=code, payload=data
|
|
1337
|
+
)
|
|
1338
|
+
|
|
1339
|
+
return data.get("data", [])
|
|
1340
|
+
|
|
1341
|
+
except asyncio.TimeoutError as e:
|
|
1342
|
+
raise ThordataTimeoutError(
|
|
1343
|
+
f"Get ISP regions timed out: {e}", original_error=e
|
|
1344
|
+
) from e
|
|
1345
|
+
except aiohttp.ClientError as e:
|
|
1346
|
+
raise ThordataNetworkError(
|
|
1347
|
+
f"Get ISP regions failed: {e}", original_error=e
|
|
1348
|
+
) from e
|
|
1349
|
+
|
|
1350
|
+
async def list_isp_proxies(self) -> list[dict[str, Any]]:
|
|
1351
|
+
"""
|
|
1352
|
+
List ISP proxies.
|
|
1353
|
+
|
|
1354
|
+
Uses public_token/public_key.
|
|
1355
|
+
"""
|
|
1356
|
+
session = self._get_session()
|
|
1357
|
+
headers = self._build_gateway_headers()
|
|
1358
|
+
|
|
1359
|
+
logger.info("Async listing ISP proxies")
|
|
1360
|
+
|
|
1361
|
+
try:
|
|
1362
|
+
async with session.post(
|
|
1363
|
+
f"{self._gateway_base_url}/queryListIsp",
|
|
1364
|
+
headers=headers,
|
|
1365
|
+
data={},
|
|
1366
|
+
timeout=self._api_timeout,
|
|
1367
|
+
) as response:
|
|
1368
|
+
response.raise_for_status()
|
|
1369
|
+
data = await response.json()
|
|
1370
|
+
|
|
1371
|
+
code = data.get("code")
|
|
1372
|
+
if code != 200:
|
|
1373
|
+
msg = extract_error_message(data)
|
|
1374
|
+
raise_for_code(
|
|
1375
|
+
f"List ISP proxies failed: {msg}", code=code, payload=data
|
|
1376
|
+
)
|
|
1377
|
+
|
|
1378
|
+
return data.get("data", [])
|
|
1379
|
+
|
|
1380
|
+
except asyncio.TimeoutError as e:
|
|
1381
|
+
raise ThordataTimeoutError(
|
|
1382
|
+
f"List ISP proxies timed out: {e}", original_error=e
|
|
1383
|
+
) from e
|
|
1384
|
+
except aiohttp.ClientError as e:
|
|
1385
|
+
raise ThordataNetworkError(
|
|
1386
|
+
f"List ISP proxies failed: {e}", original_error=e
|
|
1387
|
+
) from e
|
|
1388
|
+
|
|
1389
|
+
async def get_wallet_balance(self) -> dict[str, Any]:
|
|
1390
|
+
"""
|
|
1391
|
+
Get wallet balance for ISP proxies.
|
|
1392
|
+
|
|
1393
|
+
Uses public_token/public_key.
|
|
1394
|
+
"""
|
|
1395
|
+
session = self._get_session()
|
|
1396
|
+
headers = self._build_gateway_headers()
|
|
1397
|
+
|
|
1398
|
+
logger.info("Async getting wallet balance")
|
|
1399
|
+
|
|
1400
|
+
try:
|
|
1401
|
+
async with session.post(
|
|
1402
|
+
f"{self._gateway_base_url}/getBalance",
|
|
1403
|
+
headers=headers,
|
|
1404
|
+
data={},
|
|
1405
|
+
timeout=self._api_timeout,
|
|
1406
|
+
) as response:
|
|
1407
|
+
response.raise_for_status()
|
|
1408
|
+
data = await response.json()
|
|
1409
|
+
|
|
1410
|
+
code = data.get("code")
|
|
1411
|
+
if code != 200:
|
|
1412
|
+
msg = extract_error_message(data)
|
|
1413
|
+
raise_for_code(
|
|
1414
|
+
f"Get wallet balance failed: {msg}", code=code, payload=data
|
|
1415
|
+
)
|
|
1416
|
+
|
|
1417
|
+
return data.get("data", {})
|
|
1418
|
+
|
|
1419
|
+
except asyncio.TimeoutError as e:
|
|
1420
|
+
raise ThordataTimeoutError(
|
|
1421
|
+
f"Get wallet balance timed out: {e}", original_error=e
|
|
1422
|
+
) from e
|
|
1423
|
+
except aiohttp.ClientError as e:
|
|
1424
|
+
raise ThordataNetworkError(
|
|
1425
|
+
f"Get wallet balance failed: {e}", original_error=e
|
|
1426
|
+
) from e
|
|
1427
|
+
|
|
1428
|
+
async def get_proxy_expiration(
|
|
1429
|
+
self,
|
|
1430
|
+
ips: str | list[str],
|
|
1431
|
+
proxy_type: int,
|
|
1432
|
+
) -> dict[str, Any]:
|
|
1433
|
+
"""
|
|
1434
|
+
Get expiration time for specific proxy IPs.
|
|
1435
|
+
"""
|
|
1436
|
+
self._require_public_credentials()
|
|
1437
|
+
session = self._get_session()
|
|
1438
|
+
|
|
1439
|
+
if isinstance(ips, list):
|
|
1440
|
+
ips = ",".join(ips)
|
|
1441
|
+
|
|
1442
|
+
params = {
|
|
1443
|
+
"token": self.public_token,
|
|
1444
|
+
"key": self.public_key,
|
|
1445
|
+
"proxy_type": str(proxy_type),
|
|
1446
|
+
"ips": ips,
|
|
1447
|
+
}
|
|
1448
|
+
|
|
1449
|
+
logger.info(f"Async getting proxy expiration: {ips}")
|
|
1450
|
+
|
|
1451
|
+
try:
|
|
1452
|
+
async with session.get(
|
|
1453
|
+
self._proxy_expiration_url,
|
|
1454
|
+
params=params,
|
|
1455
|
+
timeout=self._api_timeout,
|
|
1456
|
+
) as response:
|
|
1457
|
+
response.raise_for_status()
|
|
1458
|
+
data = await response.json()
|
|
1459
|
+
|
|
1460
|
+
if isinstance(data, dict):
|
|
1461
|
+
code = data.get("code")
|
|
1462
|
+
if code is not None and code != 200:
|
|
1463
|
+
msg = extract_error_message(data)
|
|
1464
|
+
raise_for_code(
|
|
1465
|
+
f"Get expiration error: {msg}", code=code, payload=data
|
|
1466
|
+
)
|
|
1467
|
+
|
|
1468
|
+
return data.get("data", data)
|
|
1469
|
+
|
|
1470
|
+
return data
|
|
1471
|
+
|
|
1472
|
+
except asyncio.TimeoutError as e:
|
|
1473
|
+
raise ThordataTimeoutError(
|
|
1474
|
+
f"Get expiration timed out: {e}", original_error=e
|
|
1475
|
+
) from e
|
|
1476
|
+
except aiohttp.ClientError as e:
|
|
1477
|
+
raise ThordataNetworkError(
|
|
1478
|
+
f"Get expiration failed: {e}", original_error=e
|
|
1479
|
+
) from e
|
|
1480
|
+
|
|
1481
|
+
# =========================================================================
|
|
1482
|
+
# Location API Methods
|
|
1483
|
+
# =========================================================================
|
|
1484
|
+
|
|
1485
|
+
async def list_countries(
|
|
1486
|
+
self, proxy_type: ProxyType | int = ProxyType.RESIDENTIAL
|
|
1487
|
+
) -> list[dict[str, Any]]:
|
|
1488
|
+
"""List supported countries."""
|
|
1489
|
+
return await self._get_locations(
|
|
1490
|
+
"countries",
|
|
1491
|
+
proxy_type=(
|
|
1492
|
+
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1493
|
+
),
|
|
1494
|
+
)
|
|
1495
|
+
|
|
1496
|
+
async def list_states(
|
|
1497
|
+
self,
|
|
1498
|
+
country_code: str,
|
|
1499
|
+
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
1500
|
+
) -> list[dict[str, Any]]:
|
|
1501
|
+
"""List supported states for a country."""
|
|
1502
|
+
return await self._get_locations(
|
|
1503
|
+
"states",
|
|
1504
|
+
proxy_type=(
|
|
1505
|
+
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1506
|
+
),
|
|
1507
|
+
country_code=country_code,
|
|
1508
|
+
)
|
|
1509
|
+
|
|
1510
|
+
async def list_cities(
|
|
1511
|
+
self,
|
|
1512
|
+
country_code: str,
|
|
1513
|
+
state_code: str | None = None,
|
|
1514
|
+
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
1515
|
+
) -> list[dict[str, Any]]:
|
|
1516
|
+
"""List supported cities."""
|
|
1517
|
+
kwargs = {
|
|
1518
|
+
"proxy_type": (
|
|
1519
|
+
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1520
|
+
),
|
|
1521
|
+
"country_code": country_code,
|
|
1522
|
+
}
|
|
1523
|
+
if state_code:
|
|
1524
|
+
kwargs["state_code"] = state_code
|
|
1525
|
+
|
|
1526
|
+
return await self._get_locations("cities", **kwargs)
|
|
1527
|
+
|
|
1528
|
+
async def list_asn(
|
|
1529
|
+
self,
|
|
1530
|
+
country_code: str,
|
|
1531
|
+
proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
|
|
1532
|
+
) -> list[dict[str, Any]]:
|
|
1533
|
+
"""List supported ASNs."""
|
|
1534
|
+
return await self._get_locations(
|
|
1535
|
+
"asn",
|
|
1536
|
+
proxy_type=(
|
|
1537
|
+
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
1538
|
+
),
|
|
1539
|
+
country_code=country_code,
|
|
1540
|
+
)
|
|
1541
|
+
|
|
1542
|
+
async def _get_locations(
|
|
1543
|
+
self, endpoint: str, **kwargs: Any
|
|
1544
|
+
) -> list[dict[str, Any]]:
|
|
1545
|
+
"""Internal async locations API call."""
|
|
1546
|
+
self._require_public_credentials()
|
|
1547
|
+
|
|
1548
|
+
params = {
|
|
1549
|
+
"token": self.public_token or "",
|
|
1550
|
+
"key": self.public_key or "",
|
|
1551
|
+
}
|
|
1552
|
+
|
|
1553
|
+
for key, value in kwargs.items():
|
|
1554
|
+
params[key] = str(value)
|
|
1555
|
+
|
|
1556
|
+
url = f"{self._locations_base_url}/{endpoint}"
|
|
1557
|
+
|
|
1558
|
+
logger.debug(f"Async Locations API: {url}")
|
|
1559
|
+
|
|
1560
|
+
# Create temporary session for this request (no proxy needed)
|
|
1561
|
+
async with (
|
|
1562
|
+
aiohttp.ClientSession(trust_env=True) as temp_session,
|
|
1563
|
+
temp_session.get(url, params=params) as response,
|
|
1564
|
+
):
|
|
1565
|
+
response.raise_for_status()
|
|
1566
|
+
data = await response.json()
|
|
1567
|
+
|
|
1568
|
+
if isinstance(data, dict):
|
|
1569
|
+
code = data.get("code")
|
|
1570
|
+
if code is not None and code != 200:
|
|
1571
|
+
msg = data.get("msg", "")
|
|
1572
|
+
raise RuntimeError(
|
|
1573
|
+
f"Locations API error ({endpoint}): code={code}, msg={msg}"
|
|
1574
|
+
)
|
|
1575
|
+
return data.get("data") or []
|
|
1576
|
+
|
|
1577
|
+
if isinstance(data, list):
|
|
1578
|
+
return data
|
|
1579
|
+
|
|
1580
|
+
return []
|
|
1581
|
+
|
|
1582
|
+
# =========================================================================
|
|
1583
|
+
# Helper Methods
|
|
1584
|
+
# =========================================================================
|
|
1585
|
+
|
|
1586
|
+
def _require_public_credentials(self) -> None:
|
|
1587
|
+
"""Ensure public API credentials are available."""
|
|
1588
|
+
if not self.public_token or not self.public_key:
|
|
1589
|
+
raise ThordataConfigError(
|
|
1590
|
+
"public_token and public_key are required for this operation. "
|
|
1591
|
+
"Please provide them when initializing AsyncThordataClient."
|
|
1592
|
+
)
|
|
1593
|
+
|
|
1594
|
+
def _get_proxy_endpoint_overrides(
|
|
1595
|
+
self, product: ProxyProduct
|
|
1596
|
+
) -> tuple[str | None, int | None, str]:
|
|
1597
|
+
prefix = product.value.upper()
|
|
1598
|
+
|
|
1599
|
+
host = os.getenv(f"THORDATA_{prefix}_PROXY_HOST") or os.getenv(
|
|
1600
|
+
"THORDATA_PROXY_HOST"
|
|
1601
|
+
)
|
|
1602
|
+
port_raw = os.getenv(f"THORDATA_{prefix}_PROXY_PORT") or os.getenv(
|
|
1603
|
+
"THORDATA_PROXY_PORT"
|
|
1604
|
+
)
|
|
1605
|
+
protocol = (
|
|
1606
|
+
os.getenv(f"THORDATA_{prefix}_PROXY_PROTOCOL")
|
|
1607
|
+
or os.getenv("THORDATA_PROXY_PROTOCOL")
|
|
1608
|
+
or "http"
|
|
1609
|
+
)
|
|
1610
|
+
|
|
1611
|
+
port: int | None = None
|
|
1612
|
+
if port_raw:
|
|
1613
|
+
try:
|
|
1614
|
+
port = int(port_raw)
|
|
1615
|
+
except ValueError:
|
|
1616
|
+
port = None
|
|
1617
|
+
|
|
1618
|
+
return host or None, port, protocol
|
|
1619
|
+
|
|
1620
|
+
def _get_default_proxy_config_from_env(self) -> ProxyConfig | None:
|
|
1621
|
+
u = os.getenv("THORDATA_RESIDENTIAL_USERNAME")
|
|
1622
|
+
p = os.getenv("THORDATA_RESIDENTIAL_PASSWORD")
|
|
1623
|
+
if u and p:
|
|
1624
|
+
host, port, protocol = self._get_proxy_endpoint_overrides(
|
|
1625
|
+
ProxyProduct.RESIDENTIAL
|
|
1626
|
+
)
|
|
1627
|
+
return ProxyConfig(
|
|
1628
|
+
username=u,
|
|
1629
|
+
password=p,
|
|
1630
|
+
product=ProxyProduct.RESIDENTIAL,
|
|
1631
|
+
host=host,
|
|
1632
|
+
port=port,
|
|
1633
|
+
protocol=protocol,
|
|
1634
|
+
)
|
|
1635
|
+
|
|
1636
|
+
u = os.getenv("THORDATA_DATACENTER_USERNAME")
|
|
1637
|
+
p = os.getenv("THORDATA_DATACENTER_PASSWORD")
|
|
1638
|
+
if u and p:
|
|
1639
|
+
host, port, protocol = self._get_proxy_endpoint_overrides(
|
|
1640
|
+
ProxyProduct.DATACENTER
|
|
1641
|
+
)
|
|
1642
|
+
return ProxyConfig(
|
|
1643
|
+
username=u,
|
|
1644
|
+
password=p,
|
|
1645
|
+
product=ProxyProduct.DATACENTER,
|
|
1646
|
+
host=host,
|
|
1647
|
+
port=port,
|
|
1648
|
+
protocol=protocol,
|
|
1649
|
+
)
|
|
1650
|
+
|
|
1651
|
+
u = os.getenv("THORDATA_MOBILE_USERNAME")
|
|
1652
|
+
p = os.getenv("THORDATA_MOBILE_PASSWORD")
|
|
1653
|
+
if u and p:
|
|
1654
|
+
host, port, protocol = self._get_proxy_endpoint_overrides(
|
|
1655
|
+
ProxyProduct.MOBILE
|
|
1656
|
+
)
|
|
1657
|
+
return ProxyConfig(
|
|
1658
|
+
username=u,
|
|
1659
|
+
password=p,
|
|
1660
|
+
product=ProxyProduct.MOBILE,
|
|
1661
|
+
host=host,
|
|
1662
|
+
port=port,
|
|
1663
|
+
protocol=protocol,
|
|
1664
|
+
)
|
|
1665
|
+
|
|
1666
|
+
return None
|
|
1667
|
+
|
|
1668
|
+
def _build_gateway_headers(self) -> dict[str, str]:
|
|
1669
|
+
"""
|
|
1670
|
+
Headers for gateway-style endpoints.
|
|
1671
|
+
|
|
1672
|
+
Per our SDK rule: ONLY public_token/public_key exist.
|
|
1673
|
+
"""
|
|
1674
|
+
self._require_public_credentials()
|
|
1675
|
+
return build_public_api_headers(self.public_token or "", self.public_key or "")
|