thordata-sdk 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1733 @@
1
+ """
2
+ Asynchronous client for the Thordata API.
3
+
4
+ This module provides the AsyncThordataClient for high-concurrency workloads,
5
+ built on aiohttp.
6
+
7
+ Example:
8
+ >>> import asyncio
9
+ >>> from thordata import AsyncThordataClient
10
+ >>>
11
+ >>> async def main():
12
+ ... async with AsyncThordataClient(
13
+ ... scraper_token="your_token",
14
+ ... public_token="your_public_token",
15
+ ... public_key="your_public_key"
16
+ ... ) as client:
17
+ ... response = await client.get("https://httpbin.org/ip")
18
+ ... print(await response.json())
19
+ >>>
20
+ >>> asyncio.run(main())
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import asyncio
26
+ import logging
27
+ import os
28
+ from datetime import date
29
+ from typing import Any
30
+
31
+ import aiohttp
32
+
33
+ from . import __version__ as _sdk_version
34
+ from ._utils import (
35
+ build_auth_headers,
36
+ build_builder_headers,
37
+ build_public_api_headers,
38
+ build_user_agent,
39
+ decode_base64_image,
40
+ extract_error_message,
41
+ parse_json_response,
42
+ )
43
+ from .enums import Engine, ProxyType
44
+ from .exceptions import (
45
+ ThordataConfigError,
46
+ ThordataNetworkError,
47
+ ThordataTimeoutError,
48
+ raise_for_code,
49
+ )
50
+ from .models import (
51
+ CommonSettings,
52
+ ProxyConfig,
53
+ ProxyProduct,
54
+ ProxyServer,
55
+ ProxyUserList,
56
+ ScraperTaskConfig,
57
+ SerpRequest,
58
+ UniversalScrapeRequest,
59
+ UsageStatistics,
60
+ VideoTaskConfig,
61
+ )
62
+ from .retry import RetryConfig
63
+ from .serp_engines import AsyncSerpNamespace
64
+
65
+ logger = logging.getLogger(__name__)
66
+
67
+
68
+ class AsyncThordataClient:
69
+ """
70
+ The official asynchronous Python client for Thordata.
71
+
72
+ Designed for high-concurrency AI agents and data pipelines.
73
+
74
+ Args:
75
+ scraper_token: The API token from your Dashboard.
76
+ public_token: The public API token.
77
+ public_key: The public API key.
78
+ proxy_host: Custom proxy gateway host.
79
+ proxy_port: Custom proxy gateway port.
80
+ timeout: Default request timeout in seconds.
81
+ retry_config: Configuration for automatic retries.
82
+
83
+ Example:
84
+ >>> async with AsyncThordataClient(
85
+ ... scraper_token="token",
86
+ ... public_token="pub_token",
87
+ ... public_key="pub_key"
88
+ ... ) as client:
89
+ ... # Old style
90
+ ... results = await client.serp_search("python")
91
+ ... # New style (Namespaced)
92
+ ... maps_results = await client.serp.google.maps("coffee", "@40.7,-74.0,14z")
93
+ """
94
+
95
+ # API Endpoints (same as sync client)
96
+ BASE_URL = "https://scraperapi.thordata.com"
97
+ UNIVERSAL_URL = "https://universalapi.thordata.com"
98
+ API_URL = "https://openapi.thordata.com/api/web-scraper-api"
99
+ LOCATIONS_URL = "https://openapi.thordata.com/api/locations"
100
+
101
+ def __init__(
102
+ self,
103
+ scraper_token: str | None = None, # Change: Optional
104
+ public_token: str | None = None,
105
+ public_key: str | None = None,
106
+ proxy_host: str = "pr.thordata.net",
107
+ proxy_port: int = 9999,
108
+ timeout: int = 30,
109
+ api_timeout: int = 60,
110
+ retry_config: RetryConfig | None = None,
111
+ auth_mode: str = "bearer",
112
+ scraperapi_base_url: str | None = None,
113
+ universalapi_base_url: str | None = None,
114
+ web_scraper_api_base_url: str | None = None,
115
+ locations_base_url: str | None = None,
116
+ ) -> None:
117
+ """Initialize the Async Thordata Client."""
118
+
119
+ self.scraper_token = scraper_token
120
+ self.public_token = public_token
121
+ self.public_key = public_key
122
+
123
+ # Proxy configuration
124
+ self._proxy_host = proxy_host
125
+ self._proxy_port = proxy_port
126
+
127
+ # Timeout configuration
128
+ self._default_timeout = aiohttp.ClientTimeout(total=timeout)
129
+ self._api_timeout = aiohttp.ClientTimeout(total=api_timeout)
130
+
131
+ # Retry configuration
132
+ self._retry_config = retry_config or RetryConfig()
133
+
134
+ # Authentication mode (for scraping APIs)
135
+ self._auth_mode = auth_mode.lower()
136
+ if self._auth_mode not in ("bearer", "header_token"):
137
+ raise ThordataConfigError(
138
+ f"Invalid auth_mode: {auth_mode}. Must be 'bearer' or 'header_token'."
139
+ )
140
+
141
+ # Base URLs (allow override via args or env vars for testing and custom routing)
142
+ scraperapi_base = (
143
+ scraperapi_base_url
144
+ or os.getenv("THORDATA_SCRAPERAPI_BASE_URL")
145
+ or self.BASE_URL
146
+ ).rstrip("/")
147
+
148
+ universalapi_base = (
149
+ universalapi_base_url
150
+ or os.getenv("THORDATA_UNIVERSALAPI_BASE_URL")
151
+ or self.UNIVERSAL_URL
152
+ ).rstrip("/")
153
+
154
+ web_scraper_api_base = (
155
+ web_scraper_api_base_url
156
+ or os.getenv("THORDATA_WEB_SCRAPER_API_BASE_URL")
157
+ or self.API_URL
158
+ ).rstrip("/")
159
+
160
+ locations_base = (
161
+ locations_base_url
162
+ or os.getenv("THORDATA_LOCATIONS_BASE_URL")
163
+ or self.LOCATIONS_URL
164
+ ).rstrip("/")
165
+
166
+ # Keep these env overrides for now
167
+ gateway_base = os.getenv(
168
+ "THORDATA_GATEWAY_BASE_URL", "https://api.thordata.com/api/gateway"
169
+ )
170
+ child_base = os.getenv(
171
+ "THORDATA_CHILD_BASE_URL", "https://api.thordata.com/api/child"
172
+ )
173
+
174
+ self._gateway_base_url = gateway_base
175
+ self._child_base_url = child_base
176
+
177
+ self._serp_url = f"{scraperapi_base}/request"
178
+ self._builder_url = f"{scraperapi_base}/builder"
179
+ self._video_builder_url = f"{scraperapi_base}/video_builder"
180
+ self._universal_url = f"{universalapi_base}/request"
181
+
182
+ self._status_url = f"{web_scraper_api_base}/tasks-status"
183
+ self._download_url = f"{web_scraper_api_base}/tasks-download"
184
+ self._list_url = f"{web_scraper_api_base}/tasks-list"
185
+
186
+ self._locations_base_url = locations_base
187
+ self._usage_stats_url = (
188
+ f"{locations_base.replace('/locations', '')}/account/usage-statistics"
189
+ )
190
+ self._proxy_users_url = (
191
+ f"{locations_base.replace('/locations', '')}/proxy-users"
192
+ )
193
+
194
+ whitelist_base = os.getenv(
195
+ "THORDATA_WHITELIST_BASE_URL", "https://api.thordata.com/api"
196
+ )
197
+ self._whitelist_url = f"{whitelist_base}/whitelisted-ips"
198
+
199
+ proxy_api_base = os.getenv(
200
+ "THORDATA_PROXY_API_BASE_URL", "https://openapi.thordata.com/api"
201
+ )
202
+ self._proxy_list_url = f"{proxy_api_base}/proxy/proxy-list"
203
+ self._proxy_expiration_url = f"{proxy_api_base}/proxy/expiration-time"
204
+
205
+ # Session initialized lazily
206
+ self._session: aiohttp.ClientSession | None = None
207
+
208
+ # Namespaced Access (e.g. client.serp.google.maps(...))
209
+ self.serp = AsyncSerpNamespace(self)
210
+
211
+ async def __aenter__(self) -> AsyncThordataClient:
212
+ """Async context manager entry."""
213
+ if self._session is None or self._session.closed:
214
+ self._session = aiohttp.ClientSession(
215
+ timeout=self._api_timeout,
216
+ trust_env=True,
217
+ headers={"User-Agent": build_user_agent(_sdk_version, "aiohttp")},
218
+ )
219
+ return self
220
+
221
+ async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
222
+ """Async context manager exit."""
223
+ await self.close()
224
+
225
+ async def close(self) -> None:
226
+ """Close the underlying aiohttp session."""
227
+ if self._session and not self._session.closed:
228
+ await self._session.close()
229
+ self._session = None
230
+
231
+ def _get_session(self) -> aiohttp.ClientSession:
232
+ """Get the session, raising if not initialized."""
233
+ if self._session is None or self._session.closed:
234
+ raise RuntimeError(
235
+ "Client session not initialized. "
236
+ "Use 'async with AsyncThordataClient(...) as client:'"
237
+ )
238
+ return self._session
239
+
240
+ # =========================================================================
241
+ # Proxy Network Methods
242
+ # =========================================================================
243
+
244
+ async def get(
245
+ self,
246
+ url: str,
247
+ *,
248
+ proxy_config: ProxyConfig | None = None,
249
+ **kwargs: Any,
250
+ ) -> aiohttp.ClientResponse:
251
+ """
252
+ Send an async GET request through the Proxy Network.
253
+
254
+ Args:
255
+ url: The target URL.
256
+ proxy_config: Custom proxy configuration.
257
+ **kwargs: Additional aiohttp arguments.
258
+
259
+ Returns:
260
+ The aiohttp response object.
261
+ """
262
+ session = self._get_session()
263
+
264
+ logger.debug(f"Async Proxy GET: {url}")
265
+
266
+ if proxy_config is None:
267
+ proxy_config = self._get_default_proxy_config_from_env()
268
+
269
+ if proxy_config is None:
270
+ raise ThordataConfigError(
271
+ "Proxy credentials are missing. "
272
+ "Pass proxy_config=ProxyConfig(username=..., password=..., product=...) "
273
+ "or set THORDATA_RESIDENTIAL_USERNAME/THORDATA_RESIDENTIAL_PASSWORD (or DATACENTER/MOBILE)."
274
+ )
275
+
276
+ # aiohttp has limited support for "https://" proxies (TLS to proxy / TLS-in-TLS).
277
+ # Your account's proxy endpoint requires HTTPS proxy, so we explicitly block here
278
+ # to avoid confusing "it always fails" behavior.
279
+ if getattr(proxy_config, "protocol", "http").lower() == "https":
280
+ raise ThordataConfigError(
281
+ "Proxy Network requires an HTTPS proxy endpoint (TLS to proxy) for your account. "
282
+ "aiohttp support for 'https://' proxies is limited and may fail. "
283
+ "Please use ThordataClient.get/post (sync client) for Proxy Network requests."
284
+ )
285
+ proxy_url, proxy_auth = proxy_config.to_aiohttp_config()
286
+
287
+ try:
288
+ return await session.get(
289
+ url, proxy=proxy_url, proxy_auth=proxy_auth, **kwargs
290
+ )
291
+ except asyncio.TimeoutError as e:
292
+ raise ThordataTimeoutError(
293
+ f"Async request timed out: {e}", original_error=e
294
+ ) from e
295
+ except aiohttp.ClientError as e:
296
+ raise ThordataNetworkError(
297
+ f"Async request failed: {e}", original_error=e
298
+ ) from e
299
+
300
+ async def post(
301
+ self,
302
+ url: str,
303
+ *,
304
+ proxy_config: ProxyConfig | None = None,
305
+ **kwargs: Any,
306
+ ) -> aiohttp.ClientResponse:
307
+ """
308
+ Send an async POST request through the Proxy Network.
309
+
310
+ Args:
311
+ url: The target URL.
312
+ proxy_config: Custom proxy configuration.
313
+ **kwargs: Additional aiohttp arguments.
314
+
315
+ Returns:
316
+ The aiohttp response object.
317
+ """
318
+ session = self._get_session()
319
+
320
+ logger.debug(f"Async Proxy POST: {url}")
321
+
322
+ if proxy_config is None:
323
+ proxy_config = self._get_default_proxy_config_from_env()
324
+
325
+ if proxy_config is None:
326
+ raise ThordataConfigError(
327
+ "Proxy credentials are missing. "
328
+ "Pass proxy_config=ProxyConfig(username=..., password=..., product=...) "
329
+ "or set THORDATA_RESIDENTIAL_USERNAME/THORDATA_RESIDENTIAL_PASSWORD (or DATACENTER/MOBILE)."
330
+ )
331
+
332
+ # aiohttp has limited support for "https://" proxies (TLS to proxy / TLS-in-TLS).
333
+ # Your account's proxy endpoint requires HTTPS proxy, so we explicitly block here
334
+ # to avoid confusing "it always fails" behavior.
335
+ if getattr(proxy_config, "protocol", "http").lower() == "https":
336
+ raise ThordataConfigError(
337
+ "Proxy Network requires an HTTPS proxy endpoint (TLS to proxy) for your account. "
338
+ "aiohttp support for 'https://' proxies is limited and may fail. "
339
+ "Please use ThordataClient.get/post (sync client) for Proxy Network requests."
340
+ )
341
+ proxy_url, proxy_auth = proxy_config.to_aiohttp_config()
342
+
343
+ try:
344
+ return await session.post(
345
+ url, proxy=proxy_url, proxy_auth=proxy_auth, **kwargs
346
+ )
347
+ except asyncio.TimeoutError as e:
348
+ raise ThordataTimeoutError(
349
+ f"Async request timed out: {e}", original_error=e
350
+ ) from e
351
+ except aiohttp.ClientError as e:
352
+ raise ThordataNetworkError(
353
+ f"Async request failed: {e}", original_error=e
354
+ ) from e
355
+
356
+ # =========================================================================
357
+ # SERP API Methods
358
+ # =========================================================================
359
+
360
+ async def serp_search(
361
+ self,
362
+ query: str,
363
+ *,
364
+ engine: Engine | str = Engine.GOOGLE,
365
+ num: int = 10,
366
+ country: str | None = None,
367
+ language: str | None = None,
368
+ search_type: str | None = None,
369
+ device: str | None = None,
370
+ render_js: bool | None = None,
371
+ no_cache: bool | None = None,
372
+ output_format: str = "json",
373
+ **kwargs: Any,
374
+ ) -> dict[str, Any]:
375
+ """
376
+ Execute an async SERP search.
377
+
378
+ Args:
379
+ query: Search keywords.
380
+ engine: Search engine.
381
+ num: Number of results.
382
+ country: Country code for localization.
383
+ language: Language code.
384
+ search_type: Type of search.
385
+ device: Device type ('desktop', 'mobile', 'tablet').
386
+ render_js: Enable JavaScript rendering in SERP.
387
+ no_cache: Disable internal caching.
388
+ output_format: 'json' or 'html'.
389
+ **kwargs: Additional parameters.
390
+
391
+ Returns:
392
+ Parsed JSON results or dict with 'html' key.
393
+ """
394
+ if not self.scraper_token:
395
+ raise ThordataConfigError("scraper_token is required for SERP API")
396
+
397
+ session = self._get_session()
398
+
399
+ engine_str = engine.value if isinstance(engine, Engine) else engine.lower()
400
+
401
+ request = SerpRequest(
402
+ query=query,
403
+ engine=engine_str,
404
+ num=num,
405
+ country=country,
406
+ language=language,
407
+ search_type=search_type,
408
+ device=device,
409
+ render_js=render_js,
410
+ no_cache=no_cache,
411
+ output_format=output_format,
412
+ extra_params=kwargs,
413
+ )
414
+
415
+ payload = request.to_payload()
416
+ token = self.scraper_token or ""
417
+ headers = build_auth_headers(token, mode=self._auth_mode)
418
+
419
+ logger.info(f"Async SERP Search: {engine_str} - {query}")
420
+
421
+ try:
422
+ async with session.post(
423
+ self._serp_url,
424
+ data=payload,
425
+ headers=headers,
426
+ ) as response:
427
+ response.raise_for_status()
428
+
429
+ if output_format.lower() == "json":
430
+ data = await response.json()
431
+
432
+ if isinstance(data, dict):
433
+ code = data.get("code")
434
+ if code is not None and code != 200:
435
+ msg = extract_error_message(data)
436
+ raise_for_code(
437
+ f"SERP API Error: {msg}",
438
+ code=code,
439
+ payload=data,
440
+ )
441
+
442
+ return parse_json_response(data)
443
+
444
+ text = await response.text()
445
+ return {"html": text}
446
+
447
+ except asyncio.TimeoutError as e:
448
+ raise ThordataTimeoutError(
449
+ f"SERP request timed out: {e}",
450
+ original_error=e,
451
+ ) from e
452
+ except aiohttp.ClientError as e:
453
+ raise ThordataNetworkError(
454
+ f"SERP request failed: {e}",
455
+ original_error=e,
456
+ ) from e
457
+
458
+ async def serp_search_advanced(self, request: SerpRequest) -> dict[str, Any]:
459
+ """
460
+ Execute an async SERP search using a SerpRequest object.
461
+ """
462
+ session = self._get_session()
463
+ if not self.scraper_token:
464
+ raise ThordataConfigError("scraper_token is required for SERP API")
465
+
466
+ payload = request.to_payload()
467
+ headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
468
+
469
+ logger.info(f"Async SERP Advanced: {request.engine} - {request.query}")
470
+
471
+ try:
472
+ async with session.post(
473
+ self._serp_url,
474
+ data=payload,
475
+ headers=headers,
476
+ ) as response:
477
+ response.raise_for_status()
478
+
479
+ if request.output_format.lower() == "json":
480
+ data = await response.json()
481
+
482
+ if isinstance(data, dict):
483
+ code = data.get("code")
484
+ if code is not None and code != 200:
485
+ msg = extract_error_message(data)
486
+ raise_for_code(
487
+ f"SERP API Error: {msg}",
488
+ code=code,
489
+ payload=data,
490
+ )
491
+
492
+ return parse_json_response(data)
493
+
494
+ text = await response.text()
495
+ return {"html": text}
496
+
497
+ except asyncio.TimeoutError as e:
498
+ raise ThordataTimeoutError(
499
+ f"SERP request timed out: {e}",
500
+ original_error=e,
501
+ ) from e
502
+ except aiohttp.ClientError as e:
503
+ raise ThordataNetworkError(
504
+ f"SERP request failed: {e}",
505
+ original_error=e,
506
+ ) from e
507
+
508
+ # =========================================================================
509
+ # Universal Scraping API Methods
510
+ # =========================================================================
511
+
512
+ async def universal_scrape(
513
+ self,
514
+ url: str,
515
+ *,
516
+ js_render: bool = False,
517
+ output_format: str = "html",
518
+ country: str | None = None,
519
+ block_resources: str | None = None,
520
+ wait: int | None = None,
521
+ wait_for: str | None = None,
522
+ **kwargs: Any,
523
+ ) -> str | bytes:
524
+ """
525
+ Async scrape using Universal API (Web Unlocker).
526
+
527
+ Args:
528
+ url: Target URL.
529
+ js_render: Enable JavaScript rendering.
530
+ output_format: "html" or "png".
531
+ country: Geo-targeting country.
532
+ block_resources: Resources to block.
533
+ wait: Wait time in ms.
534
+ wait_for: CSS selector to wait for.
535
+
536
+ Returns:
537
+ HTML string or PNG bytes.
538
+ """
539
+ request = UniversalScrapeRequest(
540
+ url=url,
541
+ js_render=js_render,
542
+ output_format=output_format,
543
+ country=country,
544
+ block_resources=block_resources,
545
+ wait=wait,
546
+ wait_for=wait_for,
547
+ extra_params=kwargs,
548
+ )
549
+
550
+ return await self.universal_scrape_advanced(request)
551
+
552
+ async def universal_scrape_advanced(
553
+ self, request: UniversalScrapeRequest
554
+ ) -> str | bytes:
555
+ """
556
+ Async scrape using a UniversalScrapeRequest object.
557
+ """
558
+ session = self._get_session()
559
+ if not self.scraper_token:
560
+ raise ThordataConfigError("scraper_token is required for Universal API")
561
+
562
+ payload = request.to_payload()
563
+ headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
564
+
565
+ logger.info(f"Async Universal Scrape: {request.url}")
566
+
567
+ try:
568
+ async with session.post(
569
+ self._universal_url, data=payload, headers=headers
570
+ ) as response:
571
+ response.raise_for_status()
572
+
573
+ try:
574
+ resp_json = await response.json()
575
+ except ValueError:
576
+ if request.output_format.lower() == "png":
577
+ return await response.read()
578
+ return await response.text()
579
+
580
+ # Check for API errors
581
+ if isinstance(resp_json, dict):
582
+ code = resp_json.get("code")
583
+ if code is not None and code != 200:
584
+ msg = extract_error_message(resp_json)
585
+ raise_for_code(
586
+ f"Universal API Error: {msg}", code=code, payload=resp_json
587
+ )
588
+
589
+ if "html" in resp_json:
590
+ return resp_json["html"]
591
+
592
+ if "png" in resp_json:
593
+ return decode_base64_image(resp_json["png"])
594
+
595
+ return str(resp_json)
596
+
597
+ except asyncio.TimeoutError as e:
598
+ raise ThordataTimeoutError(
599
+ f"Universal scrape timed out: {e}", original_error=e
600
+ ) from e
601
+ except aiohttp.ClientError as e:
602
+ raise ThordataNetworkError(
603
+ f"Universal scrape failed: {e}", original_error=e
604
+ ) from e
605
+
606
+ # =========================================================================
607
+ # Web Scraper API Methods
608
+ # =========================================================================
609
+
610
+ async def create_scraper_task(
611
+ self,
612
+ file_name: str,
613
+ spider_id: str,
614
+ spider_name: str,
615
+ parameters: dict[str, Any],
616
+ universal_params: dict[str, Any] | None = None,
617
+ ) -> str:
618
+ """
619
+ Create an async Web Scraper task.
620
+ """
621
+ config = ScraperTaskConfig(
622
+ file_name=file_name,
623
+ spider_id=spider_id,
624
+ spider_name=spider_name,
625
+ parameters=parameters,
626
+ universal_params=universal_params,
627
+ )
628
+
629
+ return await self.create_scraper_task_advanced(config)
630
+
631
+ async def create_scraper_task_advanced(self, config: ScraperTaskConfig) -> str:
632
+ """
633
+ Create a task using ScraperTaskConfig.
634
+ """
635
+ self._require_public_credentials()
636
+ session = self._get_session()
637
+ if not self.scraper_token:
638
+ raise ThordataConfigError("scraper_token is required for Task Builder")
639
+
640
+ payload = config.to_payload()
641
+ # Builder needs 3 headers: token, key, Authorization Bearer
642
+ headers = build_builder_headers(
643
+ self.scraper_token,
644
+ self.public_token or "",
645
+ self.public_key or "",
646
+ )
647
+
648
+ logger.info(f"Async Task Creation: {config.spider_name}")
649
+
650
+ try:
651
+ async with session.post(
652
+ self._builder_url, data=payload, headers=headers
653
+ ) as response:
654
+ response.raise_for_status()
655
+ data = await response.json(content_type=None)
656
+
657
+ code = data.get("code")
658
+ if code != 200:
659
+ msg = extract_error_message(data)
660
+ raise_for_code(
661
+ f"Task creation failed: {msg}", code=code, payload=data
662
+ )
663
+
664
+ return data["data"]["task_id"]
665
+
666
+ except aiohttp.ClientError as e:
667
+ raise ThordataNetworkError(
668
+ f"Task creation failed: {e}", original_error=e
669
+ ) from e
670
+
671
+ async def create_video_task(
672
+ self,
673
+ file_name: str,
674
+ spider_id: str,
675
+ spider_name: str,
676
+ parameters: dict[str, Any],
677
+ common_settings: CommonSettings,
678
+ ) -> str:
679
+ """
680
+ Create a YouTube video/audio download task.
681
+ """
682
+
683
+ config = VideoTaskConfig(
684
+ file_name=file_name,
685
+ spider_id=spider_id,
686
+ spider_name=spider_name,
687
+ parameters=parameters,
688
+ common_settings=common_settings,
689
+ )
690
+
691
+ return await self.create_video_task_advanced(config)
692
+
693
+ async def create_video_task_advanced(self, config: VideoTaskConfig) -> str:
694
+ """
695
+ Create a video task using VideoTaskConfig object.
696
+ """
697
+
698
+ self._require_public_credentials()
699
+ session = self._get_session()
700
+ if not self.scraper_token:
701
+ raise ThordataConfigError(
702
+ "scraper_token is required for Video Task Builder"
703
+ )
704
+
705
+ payload = config.to_payload()
706
+ headers = build_builder_headers(
707
+ self.scraper_token,
708
+ self.public_token or "",
709
+ self.public_key or "",
710
+ )
711
+
712
+ logger.info(
713
+ f"Async Video Task Creation: {config.spider_name} - {config.spider_id}"
714
+ )
715
+
716
+ try:
717
+ async with session.post(
718
+ self._video_builder_url,
719
+ data=payload,
720
+ headers=headers,
721
+ timeout=self._api_timeout,
722
+ ) as response:
723
+ response.raise_for_status()
724
+ data = await response.json()
725
+
726
+ code = data.get("code")
727
+ if code != 200:
728
+ msg = extract_error_message(data)
729
+ raise_for_code(
730
+ f"Video task creation failed: {msg}", code=code, payload=data
731
+ )
732
+
733
+ return data["data"]["task_id"]
734
+
735
+ except asyncio.TimeoutError as e:
736
+ raise ThordataTimeoutError(
737
+ f"Video task creation timed out: {e}", original_error=e
738
+ ) from e
739
+ except aiohttp.ClientError as e:
740
+ raise ThordataNetworkError(
741
+ f"Video task creation failed: {e}", original_error=e
742
+ ) from e
743
+
744
+ async def get_task_status(self, task_id: str) -> str:
745
+ """
746
+ Check async task status.
747
+
748
+ Raises:
749
+ ThordataConfigError: If public credentials are missing.
750
+ ThordataAPIError: If API returns a non-200 code in JSON payload.
751
+ ThordataNetworkError: If network/HTTP request fails.
752
+ """
753
+ self._require_public_credentials()
754
+ session = self._get_session()
755
+
756
+ headers = build_public_api_headers(
757
+ self.public_token or "", self.public_key or ""
758
+ )
759
+ payload = {"tasks_ids": task_id}
760
+
761
+ try:
762
+ async with session.post(
763
+ self._status_url, data=payload, headers=headers
764
+ ) as response:
765
+ response.raise_for_status()
766
+ data = await response.json(content_type=None)
767
+
768
+ if isinstance(data, dict):
769
+ code = data.get("code")
770
+ if code is not None and code != 200:
771
+ msg = extract_error_message(data)
772
+ raise_for_code(
773
+ f"Task status API Error: {msg}",
774
+ code=code,
775
+ payload=data,
776
+ )
777
+
778
+ items = data.get("data") or []
779
+ for item in items:
780
+ if str(item.get("task_id")) == str(task_id):
781
+ return item.get("status", "unknown")
782
+
783
+ return "unknown"
784
+
785
+ raise ThordataNetworkError(
786
+ f"Unexpected task status response type: {type(data).__name__}",
787
+ original_error=None,
788
+ )
789
+
790
+ except asyncio.TimeoutError as e:
791
+ raise ThordataTimeoutError(
792
+ f"Async status check timed out: {e}", original_error=e
793
+ ) from e
794
+ except aiohttp.ClientError as e:
795
+ raise ThordataNetworkError(
796
+ f"Async status check failed: {e}", original_error=e
797
+ ) from e
798
+
799
+ async def safe_get_task_status(self, task_id: str) -> str:
800
+ """
801
+ Backward-compatible status check.
802
+
803
+ Returns:
804
+ Status string, or "error" on any exception.
805
+ """
806
+ try:
807
+ return await self.get_task_status(task_id)
808
+ except Exception:
809
+ return "error"
810
+
811
+ async def get_task_result(self, task_id: str, file_type: str = "json") -> str:
812
+ """
813
+ Get download URL for completed task.
814
+ """
815
+ self._require_public_credentials()
816
+ session = self._get_session()
817
+
818
+ headers = build_public_api_headers(
819
+ self.public_token or "", self.public_key or ""
820
+ )
821
+ payload = {"tasks_id": task_id, "type": file_type}
822
+
823
+ logger.info(f"Async getting result for Task: {task_id}")
824
+
825
+ try:
826
+ async with session.post(
827
+ self._download_url, data=payload, headers=headers
828
+ ) as response:
829
+ data = await response.json(content_type=None)
830
+ code = data.get("code")
831
+
832
+ if code == 200 and data.get("data"):
833
+ return data["data"]["download"]
834
+
835
+ msg = extract_error_message(data)
836
+ raise_for_code(f"Get result failed: {msg}", code=code, payload=data)
837
+ # This line won't be reached, but satisfies mypy
838
+ raise RuntimeError("Unexpected state")
839
+
840
+ except aiohttp.ClientError as e:
841
+ raise ThordataNetworkError(
842
+ f"Get result failed: {e}", original_error=e
843
+ ) from e
844
+
845
+ async def list_tasks(
846
+ self,
847
+ page: int = 1,
848
+ size: int = 20,
849
+ ) -> dict[str, Any]:
850
+ """
851
+ List all Web Scraper tasks.
852
+
853
+ Args:
854
+ page: Page number (starts from 1).
855
+ size: Number of tasks per page.
856
+
857
+ Returns:
858
+ Dict containing 'count' and 'list' of tasks.
859
+ """
860
+ self._require_public_credentials()
861
+ session = self._get_session()
862
+
863
+ headers = build_public_api_headers(
864
+ self.public_token or "", self.public_key or ""
865
+ )
866
+ payload: dict[str, Any] = {}
867
+ if page:
868
+ payload["page"] = str(page)
869
+ if size:
870
+ payload["size"] = str(size)
871
+
872
+ logger.info(f"Async listing tasks: page={page}, size={size}")
873
+
874
+ try:
875
+ async with session.post(
876
+ self._list_url,
877
+ data=payload,
878
+ headers=headers,
879
+ timeout=self._api_timeout,
880
+ ) as response:
881
+ response.raise_for_status()
882
+ data = await response.json(content_type=None)
883
+
884
+ code = data.get("code")
885
+ if code != 200:
886
+ msg = extract_error_message(data)
887
+ raise_for_code(f"List tasks failed: {msg}", code=code, payload=data)
888
+
889
+ return data.get("data", {"count": 0, "list": []})
890
+
891
+ except asyncio.TimeoutError as e:
892
+ raise ThordataTimeoutError(
893
+ f"List tasks timed out: {e}", original_error=e
894
+ ) from e
895
+ except aiohttp.ClientError as e:
896
+ raise ThordataNetworkError(
897
+ f"List tasks failed: {e}", original_error=e
898
+ ) from e
899
+
900
+ async def wait_for_task(
901
+ self,
902
+ task_id: str,
903
+ *,
904
+ poll_interval: float = 5.0,
905
+ max_wait: float = 600.0,
906
+ ) -> str:
907
+ """
908
+ Wait for a task to complete.
909
+ """
910
+
911
+ import time
912
+
913
+ start = time.monotonic()
914
+
915
+ while (time.monotonic() - start) < max_wait:
916
+ status = await self.get_task_status(task_id)
917
+
918
+ logger.debug(f"Task {task_id} status: {status}")
919
+
920
+ terminal_statuses = {
921
+ "ready",
922
+ "success",
923
+ "finished",
924
+ "failed",
925
+ "error",
926
+ "cancelled",
927
+ }
928
+
929
+ if status.lower() in terminal_statuses:
930
+ return status
931
+
932
+ await asyncio.sleep(poll_interval)
933
+
934
+ raise TimeoutError(f"Task {task_id} did not complete within {max_wait} seconds")
935
+
936
+ async def run_task(
937
+ self,
938
+ file_name: str,
939
+ spider_id: str,
940
+ spider_name: str,
941
+ parameters: dict[str, Any],
942
+ universal_params: dict[str, Any] | None = None,
943
+ *,
944
+ max_wait: float = 600.0,
945
+ initial_poll_interval: float = 2.0,
946
+ max_poll_interval: float = 10.0,
947
+ include_errors: bool = True,
948
+ ) -> str:
949
+ """
950
+ Async high-level wrapper to Run a Web Scraper task and wait for result.
951
+
952
+ Lifecycle: Create -> Poll (Backoff) -> Get Download URL.
953
+
954
+ Returns:
955
+ str: The download URL.
956
+ """
957
+ # 1. Create Task
958
+ config = ScraperTaskConfig(
959
+ file_name=file_name,
960
+ spider_id=spider_id,
961
+ spider_name=spider_name,
962
+ parameters=parameters,
963
+ universal_params=universal_params,
964
+ include_errors=include_errors,
965
+ )
966
+ task_id = await self.create_scraper_task_advanced(config)
967
+ logger.info(f"Async Task created: {task_id}. Polling...")
968
+
969
+ # 2. Poll Status
970
+ import time
971
+
972
+ start_time = time.monotonic()
973
+ current_poll = initial_poll_interval
974
+
975
+ while (time.monotonic() - start_time) < max_wait:
976
+ status = await self.get_task_status(task_id)
977
+ status_lower = status.lower()
978
+
979
+ if status_lower in {"ready", "success", "finished"}:
980
+ logger.info(f"Task {task_id} ready.")
981
+ # 3. Get Result
982
+ return await self.get_task_result(task_id)
983
+
984
+ if status_lower in {"failed", "error", "cancelled"}:
985
+ raise ThordataNetworkError(
986
+ f"Task {task_id} failed with status: {status}"
987
+ )
988
+
989
+ await asyncio.sleep(current_poll)
990
+ current_poll = min(current_poll * 1.5, max_poll_interval)
991
+
992
+ raise ThordataTimeoutError(f"Async Task {task_id} timed out after {max_wait}s")
993
+
994
+ # =========================================================================
995
+ # Proxy Account Management Methods
996
+ # =========================================================================
997
+
998
+ async def get_usage_statistics(
999
+ self,
1000
+ from_date: str | date,
1001
+ to_date: str | date,
1002
+ ) -> UsageStatistics:
1003
+ """
1004
+ Get account usage statistics for a date range.
1005
+
1006
+ Args:
1007
+ from_date: Start date (YYYY-MM-DD string or date object).
1008
+ to_date: End date (YYYY-MM-DD string or date object).
1009
+
1010
+ Returns:
1011
+ UsageStatistics object with traffic data.
1012
+ """
1013
+
1014
+ self._require_public_credentials()
1015
+ session = self._get_session()
1016
+
1017
+ # Convert dates to strings
1018
+ if isinstance(from_date, date):
1019
+ from_date = from_date.strftime("%Y-%m-%d")
1020
+ if isinstance(to_date, date):
1021
+ to_date = to_date.strftime("%Y-%m-%d")
1022
+
1023
+ params = {
1024
+ "token": self.public_token,
1025
+ "key": self.public_key,
1026
+ "from_date": from_date,
1027
+ "to_date": to_date,
1028
+ }
1029
+
1030
+ logger.info(f"Async getting usage statistics: {from_date} to {to_date}")
1031
+
1032
+ try:
1033
+ async with session.get(
1034
+ self._usage_stats_url,
1035
+ params=params,
1036
+ timeout=self._api_timeout,
1037
+ ) as response:
1038
+ response.raise_for_status()
1039
+ data = await response.json()
1040
+
1041
+ if isinstance(data, dict):
1042
+ code = data.get("code")
1043
+ if code is not None and code != 200:
1044
+ msg = extract_error_message(data)
1045
+ raise_for_code(
1046
+ f"Usage statistics error: {msg}",
1047
+ code=code,
1048
+ payload=data,
1049
+ )
1050
+
1051
+ usage_data = data.get("data", data)
1052
+ return UsageStatistics.from_dict(usage_data)
1053
+
1054
+ raise ThordataNetworkError(
1055
+ f"Unexpected usage statistics response: {type(data).__name__}",
1056
+ original_error=None,
1057
+ )
1058
+
1059
+ except asyncio.TimeoutError as e:
1060
+ raise ThordataTimeoutError(
1061
+ f"Usage statistics timed out: {e}", original_error=e
1062
+ ) from e
1063
+ except aiohttp.ClientError as e:
1064
+ raise ThordataNetworkError(
1065
+ f"Usage statistics failed: {e}", original_error=e
1066
+ ) from e
1067
+
1068
+ async def get_residential_balance(self) -> dict[str, Any]:
1069
+ """
1070
+ Get residential proxy balance.
1071
+
1072
+ Uses public_token/public_key.
1073
+ """
1074
+ session = self._get_session()
1075
+ headers = self._build_gateway_headers()
1076
+
1077
+ logger.info("Async getting residential proxy balance")
1078
+
1079
+ try:
1080
+ async with session.post(
1081
+ f"{self._gateway_base_url}/getFlowBalance",
1082
+ headers=headers,
1083
+ data={},
1084
+ timeout=self._api_timeout,
1085
+ ) as response:
1086
+ response.raise_for_status()
1087
+ data = await response.json()
1088
+
1089
+ code = data.get("code")
1090
+ if code != 200:
1091
+ msg = extract_error_message(data)
1092
+ raise_for_code(
1093
+ f"Get balance failed: {msg}", code=code, payload=data
1094
+ )
1095
+
1096
+ return data.get("data", {})
1097
+
1098
+ except asyncio.TimeoutError as e:
1099
+ raise ThordataTimeoutError(
1100
+ f"Get balance timed out: {e}", original_error=e
1101
+ ) from e
1102
+ except aiohttp.ClientError as e:
1103
+ raise ThordataNetworkError(
1104
+ f"Get balance failed: {e}", original_error=e
1105
+ ) from e
1106
+
1107
+ async def get_residential_usage(
1108
+ self,
1109
+ start_time: str | int,
1110
+ end_time: str | int,
1111
+ ) -> dict[str, Any]:
1112
+ """
1113
+ Get residential proxy usage records.
1114
+
1115
+ Uses public_token/public_key.
1116
+ """
1117
+ session = self._get_session()
1118
+ headers = self._build_gateway_headers()
1119
+ payload = {"start_time": str(start_time), "end_time": str(end_time)}
1120
+
1121
+ logger.info(f"Async getting residential usage: {start_time} to {end_time}")
1122
+
1123
+ try:
1124
+ async with session.post(
1125
+ f"{self._gateway_base_url}/usageRecord",
1126
+ headers=headers,
1127
+ data=payload,
1128
+ timeout=self._api_timeout,
1129
+ ) as response:
1130
+ response.raise_for_status()
1131
+ data = await response.json()
1132
+
1133
+ code = data.get("code")
1134
+ if code != 200:
1135
+ msg = extract_error_message(data)
1136
+ raise_for_code(f"Get usage failed: {msg}", code=code, payload=data)
1137
+
1138
+ return data.get("data", {})
1139
+
1140
+ except asyncio.TimeoutError as e:
1141
+ raise ThordataTimeoutError(
1142
+ f"Get usage timed out: {e}", original_error=e
1143
+ ) from e
1144
+ except aiohttp.ClientError as e:
1145
+ raise ThordataNetworkError(
1146
+ f"Get usage failed: {e}", original_error=e
1147
+ ) from e
1148
+
1149
+ async def list_proxy_users(
1150
+ self, proxy_type: ProxyType | int = ProxyType.RESIDENTIAL
1151
+ ) -> ProxyUserList:
1152
+ """List all proxy users (sub-accounts)."""
1153
+
1154
+ self._require_public_credentials()
1155
+ session = self._get_session()
1156
+
1157
+ params = {
1158
+ "token": self.public_token,
1159
+ "key": self.public_key,
1160
+ "proxy_type": str(
1161
+ int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1162
+ ),
1163
+ }
1164
+
1165
+ logger.info(f"Async listing proxy users: type={params['proxy_type']}")
1166
+
1167
+ try:
1168
+ async with session.get(
1169
+ f"{self._proxy_users_url}/user-list",
1170
+ params=params,
1171
+ timeout=self._api_timeout,
1172
+ ) as response:
1173
+ response.raise_for_status()
1174
+ data = await response.json()
1175
+
1176
+ if isinstance(data, dict):
1177
+ code = data.get("code")
1178
+ if code is not None and code != 200:
1179
+ msg = extract_error_message(data)
1180
+ raise_for_code(
1181
+ f"List proxy users error: {msg}", code=code, payload=data
1182
+ )
1183
+
1184
+ user_data = data.get("data", data)
1185
+ return ProxyUserList.from_dict(user_data)
1186
+
1187
+ raise ThordataNetworkError(
1188
+ f"Unexpected proxy users response: {type(data).__name__}",
1189
+ original_error=None,
1190
+ )
1191
+
1192
+ except asyncio.TimeoutError as e:
1193
+ raise ThordataTimeoutError(
1194
+ f"List users timed out: {e}", original_error=e
1195
+ ) from e
1196
+ except aiohttp.ClientError as e:
1197
+ raise ThordataNetworkError(
1198
+ f"List users failed: {e}", original_error=e
1199
+ ) from e
1200
+
1201
+ async def create_proxy_user(
1202
+ self,
1203
+ username: str,
1204
+ password: str,
1205
+ proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
1206
+ traffic_limit: int = 0,
1207
+ status: bool = True,
1208
+ ) -> dict[str, Any]:
1209
+ """Create a new proxy user (sub-account)."""
1210
+ self._require_public_credentials()
1211
+ session = self._get_session()
1212
+
1213
+ headers = build_public_api_headers(
1214
+ self.public_token or "", self.public_key or ""
1215
+ )
1216
+
1217
+ payload = {
1218
+ "proxy_type": str(
1219
+ int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1220
+ ),
1221
+ "username": username,
1222
+ "password": password,
1223
+ "traffic_limit": str(traffic_limit),
1224
+ "status": "true" if status else "false",
1225
+ }
1226
+
1227
+ logger.info(f"Async creating proxy user: {username}")
1228
+
1229
+ try:
1230
+ async with session.post(
1231
+ f"{self._proxy_users_url}/create-user",
1232
+ data=payload,
1233
+ headers=headers,
1234
+ timeout=self._api_timeout,
1235
+ ) as response:
1236
+ response.raise_for_status()
1237
+ data = await response.json()
1238
+
1239
+ code = data.get("code")
1240
+ if code != 200:
1241
+ msg = extract_error_message(data)
1242
+ raise_for_code(
1243
+ f"Create proxy user failed: {msg}", code=code, payload=data
1244
+ )
1245
+
1246
+ return data.get("data", {})
1247
+
1248
+ except asyncio.TimeoutError as e:
1249
+ raise ThordataTimeoutError(
1250
+ f"Create user timed out: {e}", original_error=e
1251
+ ) from e
1252
+ except aiohttp.ClientError as e:
1253
+ raise ThordataNetworkError(
1254
+ f"Create user failed: {e}", original_error=e
1255
+ ) from e
1256
+
1257
+ async def add_whitelist_ip(
1258
+ self,
1259
+ ip: str,
1260
+ proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
1261
+ status: bool = True,
1262
+ ) -> dict[str, Any]:
1263
+ """
1264
+ Add an IP to the whitelist for IP authentication.
1265
+ """
1266
+ self._require_public_credentials()
1267
+ session = self._get_session()
1268
+
1269
+ headers = build_public_api_headers(
1270
+ self.public_token or "", self.public_key or ""
1271
+ )
1272
+
1273
+ proxy_type_int = (
1274
+ int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1275
+ )
1276
+
1277
+ payload = {
1278
+ "proxy_type": str(proxy_type_int),
1279
+ "ip": ip,
1280
+ "status": "true" if status else "false",
1281
+ }
1282
+
1283
+ logger.info(f"Async adding whitelist IP: {ip}")
1284
+
1285
+ try:
1286
+ async with session.post(
1287
+ f"{self._whitelist_url}/add-ip",
1288
+ data=payload,
1289
+ headers=headers,
1290
+ timeout=self._api_timeout,
1291
+ ) as response:
1292
+ response.raise_for_status()
1293
+ data = await response.json()
1294
+
1295
+ code = data.get("code")
1296
+ if code != 200:
1297
+ msg = extract_error_message(data)
1298
+ raise_for_code(
1299
+ f"Add whitelist IP failed: {msg}", code=code, payload=data
1300
+ )
1301
+
1302
+ return data.get("data", {})
1303
+
1304
+ except asyncio.TimeoutError as e:
1305
+ raise ThordataTimeoutError(
1306
+ f"Add whitelist timed out: {e}", original_error=e
1307
+ ) from e
1308
+ except aiohttp.ClientError as e:
1309
+ raise ThordataNetworkError(
1310
+ f"Add whitelist failed: {e}", original_error=e
1311
+ ) from e
1312
+
1313
+ async def list_proxy_servers(
1314
+ self,
1315
+ proxy_type: int,
1316
+ ) -> list[ProxyServer]:
1317
+ """
1318
+ List ISP or Datacenter proxy servers.
1319
+ """
1320
+
1321
+ self._require_public_credentials()
1322
+ session = self._get_session()
1323
+
1324
+ params = {
1325
+ "token": self.public_token,
1326
+ "key": self.public_key,
1327
+ "proxy_type": str(proxy_type),
1328
+ }
1329
+
1330
+ logger.info(f"Async listing proxy servers: type={proxy_type}")
1331
+
1332
+ try:
1333
+ async with session.get(
1334
+ self._proxy_list_url,
1335
+ params=params,
1336
+ timeout=self._api_timeout,
1337
+ ) as response:
1338
+ response.raise_for_status()
1339
+ data = await response.json()
1340
+
1341
+ if isinstance(data, dict):
1342
+ code = data.get("code")
1343
+ if code is not None and code != 200:
1344
+ msg = extract_error_message(data)
1345
+ raise_for_code(
1346
+ f"List proxy servers error: {msg}", code=code, payload=data
1347
+ )
1348
+
1349
+ server_list = data.get("data", data.get("list", []))
1350
+ elif isinstance(data, list):
1351
+ server_list = data
1352
+ else:
1353
+ raise ThordataNetworkError(
1354
+ f"Unexpected proxy list response: {type(data).__name__}",
1355
+ original_error=None,
1356
+ )
1357
+
1358
+ return [ProxyServer.from_dict(s) for s in server_list]
1359
+
1360
+ except asyncio.TimeoutError as e:
1361
+ raise ThordataTimeoutError(
1362
+ f"List servers timed out: {e}", original_error=e
1363
+ ) from e
1364
+ except aiohttp.ClientError as e:
1365
+ raise ThordataNetworkError(
1366
+ f"List servers failed: {e}", original_error=e
1367
+ ) from e
1368
+
1369
+ async def get_isp_regions(self) -> list[dict[str, Any]]:
1370
+ """
1371
+ Get available ISP proxy regions.
1372
+
1373
+ Uses public_token/public_key.
1374
+ """
1375
+ session = self._get_session()
1376
+ headers = self._build_gateway_headers()
1377
+
1378
+ logger.info("Async getting ISP regions")
1379
+
1380
+ try:
1381
+ async with session.post(
1382
+ f"{self._gateway_base_url}/getRegionIsp",
1383
+ headers=headers,
1384
+ data={},
1385
+ timeout=self._api_timeout,
1386
+ ) as response:
1387
+ response.raise_for_status()
1388
+ data = await response.json()
1389
+
1390
+ code = data.get("code")
1391
+ if code != 200:
1392
+ msg = extract_error_message(data)
1393
+ raise_for_code(
1394
+ f"Get ISP regions failed: {msg}", code=code, payload=data
1395
+ )
1396
+
1397
+ return data.get("data", [])
1398
+
1399
+ except asyncio.TimeoutError as e:
1400
+ raise ThordataTimeoutError(
1401
+ f"Get ISP regions timed out: {e}", original_error=e
1402
+ ) from e
1403
+ except aiohttp.ClientError as e:
1404
+ raise ThordataNetworkError(
1405
+ f"Get ISP regions failed: {e}", original_error=e
1406
+ ) from e
1407
+
1408
+ async def list_isp_proxies(self) -> list[dict[str, Any]]:
1409
+ """
1410
+ List ISP proxies.
1411
+
1412
+ Uses public_token/public_key.
1413
+ """
1414
+ session = self._get_session()
1415
+ headers = self._build_gateway_headers()
1416
+
1417
+ logger.info("Async listing ISP proxies")
1418
+
1419
+ try:
1420
+ async with session.post(
1421
+ f"{self._gateway_base_url}/queryListIsp",
1422
+ headers=headers,
1423
+ data={},
1424
+ timeout=self._api_timeout,
1425
+ ) as response:
1426
+ response.raise_for_status()
1427
+ data = await response.json()
1428
+
1429
+ code = data.get("code")
1430
+ if code != 200:
1431
+ msg = extract_error_message(data)
1432
+ raise_for_code(
1433
+ f"List ISP proxies failed: {msg}", code=code, payload=data
1434
+ )
1435
+
1436
+ return data.get("data", [])
1437
+
1438
+ except asyncio.TimeoutError as e:
1439
+ raise ThordataTimeoutError(
1440
+ f"List ISP proxies timed out: {e}", original_error=e
1441
+ ) from e
1442
+ except aiohttp.ClientError as e:
1443
+ raise ThordataNetworkError(
1444
+ f"List ISP proxies failed: {e}", original_error=e
1445
+ ) from e
1446
+
1447
+ async def get_wallet_balance(self) -> dict[str, Any]:
1448
+ """
1449
+ Get wallet balance for ISP proxies.
1450
+
1451
+ Uses public_token/public_key.
1452
+ """
1453
+ session = self._get_session()
1454
+ headers = self._build_gateway_headers()
1455
+
1456
+ logger.info("Async getting wallet balance")
1457
+
1458
+ try:
1459
+ async with session.post(
1460
+ f"{self._gateway_base_url}/getBalance",
1461
+ headers=headers,
1462
+ data={},
1463
+ timeout=self._api_timeout,
1464
+ ) as response:
1465
+ response.raise_for_status()
1466
+ data = await response.json()
1467
+
1468
+ code = data.get("code")
1469
+ if code != 200:
1470
+ msg = extract_error_message(data)
1471
+ raise_for_code(
1472
+ f"Get wallet balance failed: {msg}", code=code, payload=data
1473
+ )
1474
+
1475
+ return data.get("data", {})
1476
+
1477
+ except asyncio.TimeoutError as e:
1478
+ raise ThordataTimeoutError(
1479
+ f"Get wallet balance timed out: {e}", original_error=e
1480
+ ) from e
1481
+ except aiohttp.ClientError as e:
1482
+ raise ThordataNetworkError(
1483
+ f"Get wallet balance failed: {e}", original_error=e
1484
+ ) from e
1485
+
1486
+ async def get_proxy_expiration(
1487
+ self,
1488
+ ips: str | list[str],
1489
+ proxy_type: int,
1490
+ ) -> dict[str, Any]:
1491
+ """
1492
+ Get expiration time for specific proxy IPs.
1493
+ """
1494
+ self._require_public_credentials()
1495
+ session = self._get_session()
1496
+
1497
+ if isinstance(ips, list):
1498
+ ips = ",".join(ips)
1499
+
1500
+ params = {
1501
+ "token": self.public_token,
1502
+ "key": self.public_key,
1503
+ "proxy_type": str(proxy_type),
1504
+ "ips": ips,
1505
+ }
1506
+
1507
+ logger.info(f"Async getting proxy expiration: {ips}")
1508
+
1509
+ try:
1510
+ async with session.get(
1511
+ self._proxy_expiration_url,
1512
+ params=params,
1513
+ timeout=self._api_timeout,
1514
+ ) as response:
1515
+ response.raise_for_status()
1516
+ data = await response.json()
1517
+
1518
+ if isinstance(data, dict):
1519
+ code = data.get("code")
1520
+ if code is not None and code != 200:
1521
+ msg = extract_error_message(data)
1522
+ raise_for_code(
1523
+ f"Get expiration error: {msg}", code=code, payload=data
1524
+ )
1525
+
1526
+ return data.get("data", data)
1527
+
1528
+ return data
1529
+
1530
+ except asyncio.TimeoutError as e:
1531
+ raise ThordataTimeoutError(
1532
+ f"Get expiration timed out: {e}", original_error=e
1533
+ ) from e
1534
+ except aiohttp.ClientError as e:
1535
+ raise ThordataNetworkError(
1536
+ f"Get expiration failed: {e}", original_error=e
1537
+ ) from e
1538
+
1539
+ # =========================================================================
1540
+ # Location API Methods
1541
+ # =========================================================================
1542
+
1543
+ async def list_countries(
1544
+ self, proxy_type: ProxyType | int = ProxyType.RESIDENTIAL
1545
+ ) -> list[dict[str, Any]]:
1546
+ """List supported countries."""
1547
+ return await self._get_locations(
1548
+ "countries",
1549
+ proxy_type=(
1550
+ int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1551
+ ),
1552
+ )
1553
+
1554
+ async def list_states(
1555
+ self,
1556
+ country_code: str,
1557
+ proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
1558
+ ) -> list[dict[str, Any]]:
1559
+ """List supported states for a country."""
1560
+ return await self._get_locations(
1561
+ "states",
1562
+ proxy_type=(
1563
+ int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1564
+ ),
1565
+ country_code=country_code,
1566
+ )
1567
+
1568
+ async def list_cities(
1569
+ self,
1570
+ country_code: str,
1571
+ state_code: str | None = None,
1572
+ proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
1573
+ ) -> list[dict[str, Any]]:
1574
+ """List supported cities."""
1575
+ kwargs = {
1576
+ "proxy_type": (
1577
+ int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1578
+ ),
1579
+ "country_code": country_code,
1580
+ }
1581
+ if state_code:
1582
+ kwargs["state_code"] = state_code
1583
+
1584
+ return await self._get_locations("cities", **kwargs)
1585
+
1586
+ async def list_asn(
1587
+ self,
1588
+ country_code: str,
1589
+ proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
1590
+ ) -> list[dict[str, Any]]:
1591
+ """List supported ASNs."""
1592
+ return await self._get_locations(
1593
+ "asn",
1594
+ proxy_type=(
1595
+ int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1596
+ ),
1597
+ country_code=country_code,
1598
+ )
1599
+
1600
+ async def _get_locations(
1601
+ self, endpoint: str, **kwargs: Any
1602
+ ) -> list[dict[str, Any]]:
1603
+ """Internal async locations API call."""
1604
+ self._require_public_credentials()
1605
+
1606
+ params = {
1607
+ "token": self.public_token or "",
1608
+ "key": self.public_key or "",
1609
+ }
1610
+
1611
+ for key, value in kwargs.items():
1612
+ params[key] = str(value)
1613
+
1614
+ url = f"{self._locations_base_url}/{endpoint}"
1615
+
1616
+ logger.debug(f"Async Locations API: {url}")
1617
+
1618
+ # Create temporary session for this request (no proxy needed)
1619
+ async with (
1620
+ aiohttp.ClientSession(trust_env=True) as temp_session,
1621
+ temp_session.get(url, params=params) as response,
1622
+ ):
1623
+ response.raise_for_status()
1624
+ data = await response.json()
1625
+
1626
+ if isinstance(data, dict):
1627
+ code = data.get("code")
1628
+ if code is not None and code != 200:
1629
+ msg = data.get("msg", "")
1630
+ raise RuntimeError(
1631
+ f"Locations API error ({endpoint}): code={code}, msg={msg}"
1632
+ )
1633
+ return data.get("data") or []
1634
+
1635
+ if isinstance(data, list):
1636
+ return data
1637
+
1638
+ return []
1639
+
1640
+ # =========================================================================
1641
+ # Helper Methods
1642
+ # =========================================================================
1643
+
1644
+ def _require_public_credentials(self) -> None:
1645
+ """Ensure public API credentials are available."""
1646
+ if not self.public_token or not self.public_key:
1647
+ raise ThordataConfigError(
1648
+ "public_token and public_key are required for this operation. "
1649
+ "Please provide them when initializing AsyncThordataClient."
1650
+ )
1651
+
1652
+ def _get_proxy_endpoint_overrides(
1653
+ self, product: ProxyProduct
1654
+ ) -> tuple[str | None, int | None, str]:
1655
+ prefix = product.value.upper()
1656
+
1657
+ host = os.getenv(f"THORDATA_{prefix}_PROXY_HOST") or os.getenv(
1658
+ "THORDATA_PROXY_HOST"
1659
+ )
1660
+ port_raw = os.getenv(f"THORDATA_{prefix}_PROXY_PORT") or os.getenv(
1661
+ "THORDATA_PROXY_PORT"
1662
+ )
1663
+ protocol = (
1664
+ os.getenv(f"THORDATA_{prefix}_PROXY_PROTOCOL")
1665
+ or os.getenv("THORDATA_PROXY_PROTOCOL")
1666
+ or "http"
1667
+ )
1668
+
1669
+ port: int | None = None
1670
+ if port_raw:
1671
+ try:
1672
+ port = int(port_raw)
1673
+ except ValueError:
1674
+ port = None
1675
+
1676
+ return host or None, port, protocol
1677
+
1678
+ def _get_default_proxy_config_from_env(self) -> ProxyConfig | None:
1679
+ u = os.getenv("THORDATA_RESIDENTIAL_USERNAME")
1680
+ p = os.getenv("THORDATA_RESIDENTIAL_PASSWORD")
1681
+ if u and p:
1682
+ host, port, protocol = self._get_proxy_endpoint_overrides(
1683
+ ProxyProduct.RESIDENTIAL
1684
+ )
1685
+ return ProxyConfig(
1686
+ username=u,
1687
+ password=p,
1688
+ product=ProxyProduct.RESIDENTIAL,
1689
+ host=host,
1690
+ port=port,
1691
+ protocol=protocol,
1692
+ )
1693
+
1694
+ u = os.getenv("THORDATA_DATACENTER_USERNAME")
1695
+ p = os.getenv("THORDATA_DATACENTER_PASSWORD")
1696
+ if u and p:
1697
+ host, port, protocol = self._get_proxy_endpoint_overrides(
1698
+ ProxyProduct.DATACENTER
1699
+ )
1700
+ return ProxyConfig(
1701
+ username=u,
1702
+ password=p,
1703
+ product=ProxyProduct.DATACENTER,
1704
+ host=host,
1705
+ port=port,
1706
+ protocol=protocol,
1707
+ )
1708
+
1709
+ u = os.getenv("THORDATA_MOBILE_USERNAME")
1710
+ p = os.getenv("THORDATA_MOBILE_PASSWORD")
1711
+ if u and p:
1712
+ host, port, protocol = self._get_proxy_endpoint_overrides(
1713
+ ProxyProduct.MOBILE
1714
+ )
1715
+ return ProxyConfig(
1716
+ username=u,
1717
+ password=p,
1718
+ product=ProxyProduct.MOBILE,
1719
+ host=host,
1720
+ port=port,
1721
+ protocol=protocol,
1722
+ )
1723
+
1724
+ return None
1725
+
1726
+ def _build_gateway_headers(self) -> dict[str, str]:
1727
+ """
1728
+ Headers for gateway-style endpoints.
1729
+
1730
+ Per our SDK rule: ONLY public_token/public_key exist.
1731
+ """
1732
+ self._require_public_credentials()
1733
+ return build_public_api_headers(self.public_token or "", self.public_key or "")