thordata-sdk 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
thordata/async_client.py CHANGED
@@ -1,281 +1,730 @@
1
- import aiohttp
1
+ """
2
+ Asynchronous client for the Thordata API.
3
+
4
+ This module provides the AsyncThordataClient for high-concurrency workloads,
5
+ built on aiohttp.
6
+
7
+ Example:
8
+ >>> import asyncio
9
+ >>> from thordata import AsyncThordataClient
10
+ >>>
11
+ >>> async def main():
12
+ ... async with AsyncThordataClient(
13
+ ... scraper_token="your_token",
14
+ ... public_token="your_public_token",
15
+ ... public_key="your_public_key"
16
+ ... ) as client:
17
+ ... response = await client.get("https://httpbin.org/ip")
18
+ ... print(await response.json())
19
+ >>>
20
+ >>> asyncio.run(main())
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import asyncio
2
26
  import logging
3
- import json
4
- import base64
5
- from typing import Optional, Dict, Any, Union
27
+ from typing import Any, Dict, List, Optional, Union
28
+
29
+ import aiohttp
6
30
 
7
- # Import shared logic
8
- from .enums import Engine
9
- from .parameters import normalize_serp_params
31
+ from .enums import Engine, ProxyType
32
+ from .exceptions import (
33
+ ThordataConfigError,
34
+ ThordataNetworkError,
35
+ ThordataTimeoutError,
36
+ raise_for_code,
37
+ )
38
+ from .models import (
39
+ ProxyConfig,
40
+ ProxyProduct,
41
+ SerpRequest,
42
+ UniversalScrapeRequest,
43
+ ScraperTaskConfig,
44
+ )
45
+ from .retry import RetryConfig
46
+ from ._utils import (
47
+ parse_json_response,
48
+ decode_base64_image,
49
+ build_auth_headers,
50
+ build_public_api_headers,
51
+ extract_error_message,
52
+ )
10
53
 
11
54
  logger = logging.getLogger(__name__)
12
55
 
13
56
 
14
57
  class AsyncThordataClient:
15
58
  """
16
- The official Asynchronous Python client for Thordata (built on aiohttp).
59
+ The official asynchronous Python client for Thordata.
60
+
17
61
  Designed for high-concurrency AI agents and data pipelines.
62
+
63
+ Args:
64
+ scraper_token: The API token from your Dashboard.
65
+ public_token: The public API token.
66
+ public_key: The public API key.
67
+ proxy_host: Custom proxy gateway host.
68
+ proxy_port: Custom proxy gateway port.
69
+ timeout: Default request timeout in seconds.
70
+ retry_config: Configuration for automatic retries.
71
+
72
+ Example:
73
+ >>> async with AsyncThordataClient(
74
+ ... scraper_token="token",
75
+ ... public_token="pub_token",
76
+ ... public_key="pub_key"
77
+ ... ) as client:
78
+ ... results = await client.serp_search("python")
18
79
  """
19
80
 
81
+ # API Endpoints (same as sync client)
82
+ BASE_URL = "https://scraperapi.thordata.com"
83
+ UNIVERSAL_URL = "https://universalapi.thordata.com"
84
+ API_URL = "https://api.thordata.com/api/web-scraper-api"
85
+ LOCATIONS_URL = "https://api.thordata.com/api/locations"
86
+
20
87
  def __init__(
21
88
  self,
22
89
  scraper_token: str,
23
- public_token: str,
24
- public_key: str,
25
- proxy_host: str = "gate.thordata.com",
26
- proxy_port: int = 22225
27
- ):
28
- """
29
- Initialize the Async Client.
30
- """
90
+ public_token: Optional[str] = None,
91
+ public_key: Optional[str] = None,
92
+ proxy_host: str = "pr.thordata.net",
93
+ proxy_port: int = 9999,
94
+ timeout: int = 30,
95
+ retry_config: Optional[RetryConfig] = None,
96
+ ) -> None:
97
+ """Initialize the Async Thordata Client."""
98
+ if not scraper_token:
99
+ raise ThordataConfigError("scraper_token is required")
100
+
31
101
  self.scraper_token = scraper_token
32
102
  self.public_token = public_token
33
103
  self.public_key = public_key
34
-
35
- # Pre-calculate proxy auth for performance
36
- self.proxy_auth = aiohttp.BasicAuth(login=scraper_token, password='')
37
- self.proxy_url = f"http://{proxy_host}:{proxy_port}"
38
-
39
- # API Endpoints
40
- self.base_url = "https://scraperapi.thordata.com"
41
- self.universal_url = "https://universalapi.thordata.com"
42
- self.api_url = "https://api.thordata.com/api/web-scraper-api"
43
-
44
- self.SERP_API_URL = f"{self.base_url}/request"
45
- self.UNIVERSAL_API_URL = f"{self.universal_url}/request"
46
- self.SCRAPER_BUILDER_URL = f"{self.base_url}/builder"
47
- self.SCRAPER_STATUS_URL = f"{self.api_url}/tasks-status"
48
- self.SCRAPER_DOWNLOAD_URL = f"{self.api_url}/tasks-download"
49
-
50
- # Session is initialized lazily or via context manager
104
+
105
+ # Proxy configuration
106
+ self._proxy_host = proxy_host
107
+ self._proxy_port = proxy_port
108
+ self._default_timeout = aiohttp.ClientTimeout(total=timeout)
109
+
110
+ # Retry configuration
111
+ self._retry_config = retry_config or RetryConfig()
112
+
113
+ # Pre-calculate proxy auth
114
+ self._proxy_url = f"http://{proxy_host}:{proxy_port}"
115
+ self._proxy_auth = aiohttp.BasicAuth(
116
+ login=f"td-customer-{scraper_token}",
117
+ password=""
118
+ )
119
+
120
+ # Store endpoint URLs
121
+ self._serp_url = f"{self.BASE_URL}/request"
122
+ self._universal_url = f"{self.UNIVERSAL_URL}/request"
123
+ self._builder_url = f"{self.BASE_URL}/builder"
124
+ self._status_url = f"{self.API_URL}/tasks-status"
125
+ self._download_url = f"{self.API_URL}/tasks-download"
126
+
127
+ # Session initialized lazily
51
128
  self._session: Optional[aiohttp.ClientSession] = None
52
129
 
53
- async def __aenter__(self):
130
+ async def __aenter__(self) -> "AsyncThordataClient":
131
+ """Async context manager entry."""
54
132
  if self._session is None or self._session.closed:
55
- self._session = aiohttp.ClientSession(trust_env=True)
133
+ self._session = aiohttp.ClientSession(
134
+ timeout=self._default_timeout,
135
+ trust_env=True
136
+ )
56
137
  return self
57
138
 
58
- async def __aexit__(self, exc_type, exc, tb):
139
+ async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
140
+ """Async context manager exit."""
59
141
  await self.close()
60
142
 
61
- async def close(self):
143
+ async def close(self) -> None:
62
144
  """Close the underlying aiohttp session."""
63
145
  if self._session and not self._session.closed:
64
146
  await self._session.close()
65
147
  self._session = None
66
148
 
67
149
  def _get_session(self) -> aiohttp.ClientSession:
68
- """Internal helper to ensure session exists."""
150
+ """Get the session, raising if not initialized."""
69
151
  if self._session is None or self._session.closed:
70
152
  raise RuntimeError(
71
- "Client session not initialized. Use 'async with ThordataClient(...) as client:'"
153
+ "Client session not initialized. "
154
+ "Use 'async with AsyncThordataClient(...) as client:'"
72
155
  )
73
156
  return self._session
74
157
 
75
- async def get(self, url: str, **kwargs) -> aiohttp.ClientResponse:
158
+ # =========================================================================
159
+ # Proxy Network Methods
160
+ # =========================================================================
161
+
162
+ async def get(
163
+ self,
164
+ url: str,
165
+ *,
166
+ proxy_config: Optional[ProxyConfig] = None,
167
+ **kwargs: Any,
168
+ ) -> aiohttp.ClientResponse:
76
169
  """
77
170
  Send an async GET request through the Proxy Network.
171
+
172
+ Args:
173
+ url: The target URL.
174
+ proxy_config: Custom proxy configuration.
175
+ **kwargs: Additional aiohttp arguments.
176
+
177
+ Returns:
178
+ The aiohttp response object.
78
179
  """
79
180
  session = self._get_session()
181
+
182
+ logger.debug(f"Async Proxy GET: {url}")
183
+
184
+ if proxy_config:
185
+ proxy_url, proxy_auth = proxy_config.to_aiohttp_config()
186
+ else:
187
+ proxy_url = self._proxy_url
188
+ proxy_auth = self._proxy_auth
189
+
80
190
  try:
81
- logger.debug(f"Async Proxy Request: {url}")
82
191
  return await session.get(
83
192
  url,
84
- proxy=self.proxy_url,
85
- proxy_auth=self.proxy_auth,
193
+ proxy=proxy_url,
194
+ proxy_auth=proxy_auth,
86
195
  **kwargs
87
196
  )
197
+ except asyncio.TimeoutError as e:
198
+ raise ThordataTimeoutError(
199
+ f"Async request timed out: {e}",
200
+ original_error=e
201
+ )
88
202
  except aiohttp.ClientError as e:
89
- logger.error(f"Async Request failed: {e}")
90
- raise
203
+ raise ThordataNetworkError(
204
+ f"Async request failed: {e}",
205
+ original_error=e
206
+ )
207
+
208
+ async def post(
209
+ self,
210
+ url: str,
211
+ *,
212
+ proxy_config: Optional[ProxyConfig] = None,
213
+ **kwargs: Any,
214
+ ) -> aiohttp.ClientResponse:
215
+ """
216
+ Send an async POST request through the Proxy Network.
217
+
218
+ Args:
219
+ url: The target URL.
220
+ proxy_config: Custom proxy configuration.
221
+ **kwargs: Additional aiohttp arguments.
222
+
223
+ Returns:
224
+ The aiohttp response object.
225
+ """
226
+ session = self._get_session()
227
+
228
+ logger.debug(f"Async Proxy POST: {url}")
229
+
230
+ if proxy_config:
231
+ proxy_url, proxy_auth = proxy_config.to_aiohttp_config()
232
+ else:
233
+ proxy_url = self._proxy_url
234
+ proxy_auth = self._proxy_auth
235
+
236
+ try:
237
+ return await session.post(
238
+ url,
239
+ proxy=proxy_url,
240
+ proxy_auth=proxy_auth,
241
+ **kwargs
242
+ )
243
+ except asyncio.TimeoutError as e:
244
+ raise ThordataTimeoutError(
245
+ f"Async request timed out: {e}",
246
+ original_error=e
247
+ )
248
+ except aiohttp.ClientError as e:
249
+ raise ThordataNetworkError(
250
+ f"Async request failed: {e}",
251
+ original_error=e
252
+ )
253
+
254
+ # =========================================================================
255
+ # SERP API Methods
256
+ # =========================================================================
91
257
 
92
258
  async def serp_search(
93
- self,
94
- query: str,
95
- engine: Union[Engine, str] = Engine.GOOGLE,
96
- num: int = 10,
97
- **kwargs
259
+ self,
260
+ query: str,
261
+ *,
262
+ engine: Union[Engine, str] = Engine.GOOGLE,
263
+ num: int = 10,
264
+ country: Optional[str] = None,
265
+ language: Optional[str] = None,
266
+ search_type: Optional[str] = None,
267
+ **kwargs: Any,
98
268
  ) -> Dict[str, Any]:
99
269
  """
100
- Execute a real-time SERP search (Async).
270
+ Execute an async SERP search.
271
+
272
+ Args:
273
+ query: Search keywords.
274
+ engine: Search engine.
275
+ num: Number of results.
276
+ country: Country code for localization.
277
+ language: Language code.
278
+ search_type: Type of search.
279
+ **kwargs: Additional parameters.
280
+
281
+ Returns:
282
+ Parsed JSON results.
101
283
  """
102
284
  session = self._get_session()
103
-
104
- # 1. Handle Enum conversion
285
+
105
286
  engine_str = engine.value if isinstance(engine, Engine) else engine.lower()
287
+
288
+ request = SerpRequest(
289
+ query=query,
290
+ engine=engine_str,
291
+ num=num,
292
+ country=country,
293
+ language=language,
294
+ search_type=search_type,
295
+ extra_params=kwargs,
296
+ )
297
+
298
+ payload = request.to_payload()
299
+ headers = build_auth_headers(self.scraper_token)
300
+
301
+ logger.info(f"Async SERP Search: {engine_str} - {query}")
302
+
303
+ try:
304
+ async with session.post(
305
+ self._serp_url,
306
+ data=payload,
307
+ headers=headers
308
+ ) as response:
309
+ response.raise_for_status()
310
+ data = await response.json()
311
+ return parse_json_response(data)
312
+
313
+ except asyncio.TimeoutError as e:
314
+ raise ThordataTimeoutError(
315
+ f"SERP request timed out: {e}",
316
+ original_error=e
317
+ )
318
+ except aiohttp.ClientError as e:
319
+ raise ThordataNetworkError(
320
+ f"SERP request failed: {e}",
321
+ original_error=e
322
+ )
106
323
 
107
- # 2. Normalize parameters
108
- payload = normalize_serp_params(engine_str, query, num=num, **kwargs)
109
-
110
- headers = {
111
- "Authorization": f"Bearer {self.scraper_token}",
112
- "Content-Type": "application/x-www-form-urlencoded"
113
- }
324
+ async def serp_search_advanced(self, request: SerpRequest) -> Dict[str, Any]:
325
+ """
326
+ Execute an async SERP search using a SerpRequest object.
327
+ """
328
+ session = self._get_session()
329
+
330
+ payload = request.to_payload()
331
+ headers = build_auth_headers(self.scraper_token)
332
+
333
+ logger.info(f"Async SERP Advanced: {request.engine} - {request.query}")
334
+
335
+ try:
336
+ async with session.post(
337
+ self._serp_url,
338
+ data=payload,
339
+ headers=headers
340
+ ) as response:
341
+ response.raise_for_status()
342
+ data = await response.json()
343
+ return parse_json_response(data)
344
+
345
+ except asyncio.TimeoutError as e:
346
+ raise ThordataTimeoutError(
347
+ f"SERP request timed out: {e}",
348
+ original_error=e
349
+ )
350
+ except aiohttp.ClientError as e:
351
+ raise ThordataNetworkError(
352
+ f"SERP request failed: {e}",
353
+ original_error=e
354
+ )
114
355
 
115
- # 3. Execute Request
116
- logger.info(f"Async SERP Search: {engine_str} - {query}")
117
- async with session.post(
118
- self.SERP_API_URL, data=payload, headers=headers
119
- ) as response:
120
- response.raise_for_status()
121
-
122
- data = await response.json()
123
- # Handle double-encoded JSON strings if they occur
124
- if isinstance(data, str):
125
- try:
126
- data = json.loads(data)
127
- except json.JSONDecodeError:
128
- pass
129
- return data
356
+ # =========================================================================
357
+ # Universal Scraping API Methods
358
+ # =========================================================================
130
359
 
131
360
  async def universal_scrape(
132
361
  self,
133
362
  url: str,
363
+ *,
134
364
  js_render: bool = False,
135
- output_format: str = "HTML",
365
+ output_format: str = "html",
136
366
  country: Optional[str] = None,
137
- block_resources: bool = False
367
+ block_resources: Optional[str] = None,
368
+ wait: Optional[int] = None,
369
+ wait_for: Optional[str] = None,
370
+ **kwargs: Any,
138
371
  ) -> Union[str, bytes]:
139
372
  """
140
- Async Universal Scraping (Bypass Cloudflare/CAPTCHA).
373
+ Async scrape using Universal API (Web Unlocker).
374
+
375
+ Args:
376
+ url: Target URL.
377
+ js_render: Enable JavaScript rendering.
378
+ output_format: "html" or "png".
379
+ country: Geo-targeting country.
380
+ block_resources: Resources to block.
381
+ wait: Wait time in ms.
382
+ wait_for: CSS selector to wait for.
383
+
384
+ Returns:
385
+ HTML string or PNG bytes.
141
386
  """
142
- session = self._get_session()
143
-
144
- headers = {
145
- "Authorization": f"Bearer {self.scraper_token}",
146
- "Content-Type": "application/x-www-form-urlencoded"
147
- }
387
+ request = UniversalScrapeRequest(
388
+ url=url,
389
+ js_render=js_render,
390
+ output_format=output_format,
391
+ country=country,
392
+ block_resources=block_resources,
393
+ wait=wait,
394
+ wait_for=wait_for,
395
+ extra_params=kwargs,
396
+ )
397
+
398
+ return await self.universal_scrape_advanced(request)
148
399
 
149
- payload = {
150
- "url": url,
151
- "js_render": "True" if js_render else "False",
152
- "type": output_format.lower(),
153
- "block_resources": "True" if block_resources else "False"
154
- }
155
- if country:
156
- payload["country"] = country
157
-
158
- logger.info(f"Async Universal Scrape: {url}")
159
- async with session.post(
160
- self.UNIVERSAL_API_URL, data=payload, headers=headers
161
- ) as response:
162
- response.raise_for_status()
163
-
164
- try:
165
- resp_json = await response.json()
166
- except json.JSONDecodeError:
167
- # Fallback for raw content
168
- if output_format.upper() == "PNG":
169
- return await response.read()
170
- return await response.text()
171
-
172
- # Check API error codes
173
- if isinstance(resp_json, dict) and resp_json.get("code") \
174
- and resp_json.get("code") != 200:
175
- raise Exception(f"Universal API Error: {resp_json}")
176
-
177
- if "html" in resp_json:
178
- return resp_json["html"]
179
-
180
- if "png" in resp_json:
181
- png_str = resp_json["png"]
182
- if not png_str:
183
- raise Exception("API returned empty PNG data")
184
-
185
- # Clean Data URI Scheme
186
- if "," in png_str:
187
- png_str = png_str.split(",", 1)[1]
188
-
189
- # Fix Base64 Padding
190
- png_str = png_str.replace("\n", "").replace("\r", "")
191
- missing_padding = len(png_str) % 4
192
- if missing_padding:
193
- png_str += '=' * (4 - missing_padding)
400
+ async def universal_scrape_advanced(
401
+ self,
402
+ request: UniversalScrapeRequest
403
+ ) -> Union[str, bytes]:
404
+ """
405
+ Async scrape using a UniversalScrapeRequest object.
406
+ """
407
+ session = self._get_session()
408
+
409
+ payload = request.to_payload()
410
+ headers = build_auth_headers(self.scraper_token)
411
+
412
+ logger.info(f"Async Universal Scrape: {request.url}")
413
+
414
+ try:
415
+ async with session.post(
416
+ self._universal_url,
417
+ data=payload,
418
+ headers=headers
419
+ ) as response:
420
+ response.raise_for_status()
421
+
422
+ try:
423
+ resp_json = await response.json()
424
+ except ValueError:
425
+ if request.output_format.lower() == "png":
426
+ return await response.read()
427
+ return await response.text()
428
+
429
+ # Check for API errors
430
+ if isinstance(resp_json, dict):
431
+ code = resp_json.get("code")
432
+ if code is not None and code != 200:
433
+ msg = extract_error_message(resp_json)
434
+ raise_for_code(
435
+ f"Universal API Error: {msg}",
436
+ code=code,
437
+ payload=resp_json
438
+ )
439
+
440
+ if "html" in resp_json:
441
+ return resp_json["html"]
442
+
443
+ if "png" in resp_json:
444
+ return decode_base64_image(resp_json["png"])
445
+
446
+ return str(resp_json)
194
447
 
195
- return base64.b64decode(png_str)
448
+ except asyncio.TimeoutError as e:
449
+ raise ThordataTimeoutError(
450
+ f"Universal scrape timed out: {e}",
451
+ original_error=e
452
+ )
453
+ except aiohttp.ClientError as e:
454
+ raise ThordataNetworkError(
455
+ f"Universal scrape failed: {e}",
456
+ original_error=e
457
+ )
196
458
 
197
- return str(resp_json)
459
+ # =========================================================================
460
+ # Web Scraper API Methods
461
+ # =========================================================================
198
462
 
199
463
  async def create_scraper_task(
200
464
  self,
201
465
  file_name: str,
202
466
  spider_id: str,
203
467
  spider_name: str,
204
- individual_params: Dict[str, Any],
205
- universal_params: Optional[Dict[str, Any]] = None
468
+ parameters: Dict[str, Any],
469
+ universal_params: Optional[Dict[str, Any]] = None,
206
470
  ) -> str:
207
471
  """
208
- Create an Asynchronous Web Scraper Task.
472
+ Create an async Web Scraper task.
209
473
  """
210
- session = self._get_session()
211
-
212
- headers = {
213
- "Authorization": f"Bearer {self.scraper_token}",
214
- "Content-Type": "application/x-www-form-urlencoded"
215
- }
474
+ config = ScraperTaskConfig(
475
+ file_name=file_name,
476
+ spider_id=spider_id,
477
+ spider_name=spider_name,
478
+ parameters=parameters,
479
+ universal_params=universal_params,
480
+ )
481
+
482
+ return await self.create_scraper_task_advanced(config)
216
483
 
217
- payload = {
218
- "file_name": file_name,
219
- "spider_id": spider_id,
220
- "spider_name": spider_name,
221
- "spider_parameters": json.dumps([individual_params]),
222
- "spider_errors": "true"
223
- }
224
- if universal_params:
225
- payload["spider_universal"] = json.dumps(universal_params)
226
-
227
- logger.info(f"Async Task Creation: {spider_name}")
228
- async with session.post(
229
- self.SCRAPER_BUILDER_URL, data=payload, headers=headers
230
- ) as response:
231
- response.raise_for_status()
232
- data = await response.json()
233
-
234
- if data.get("code") != 200:
235
- raise Exception(f"Creation failed: {data}")
236
- return data["data"]["task_id"]
484
+ async def create_scraper_task_advanced(
485
+ self,
486
+ config: ScraperTaskConfig
487
+ ) -> str:
488
+ """
489
+ Create a task using ScraperTaskConfig.
490
+ """
491
+ session = self._get_session()
492
+
493
+ payload = config.to_payload()
494
+ headers = build_auth_headers(self.scraper_token)
495
+
496
+ logger.info(f"Async Task Creation: {config.spider_name}")
497
+
498
+ try:
499
+ async with session.post(
500
+ self._builder_url,
501
+ data=payload,
502
+ headers=headers
503
+ ) as response:
504
+ response.raise_for_status()
505
+ data = await response.json()
506
+
507
+ code = data.get("code")
508
+ if code != 200:
509
+ msg = extract_error_message(data)
510
+ raise_for_code(
511
+ f"Task creation failed: {msg}",
512
+ code=code,
513
+ payload=data
514
+ )
515
+
516
+ return data["data"]["task_id"]
517
+
518
+ except aiohttp.ClientError as e:
519
+ raise ThordataNetworkError(
520
+ f"Task creation failed: {e}",
521
+ original_error=e
522
+ )
237
523
 
238
524
  async def get_task_status(self, task_id: str) -> str:
239
525
  """
240
- Check task status.
526
+ Check async task status.
241
527
  """
528
+ self._require_public_credentials()
242
529
  session = self._get_session()
243
-
244
- headers = {
245
- "token": self.public_token,
246
- "key": self.public_key,
247
- "Content-Type": "application/x-www-form-urlencoded"
248
- }
530
+
531
+ headers = build_public_api_headers(self.public_token, self.public_key)
249
532
  payload = {"tasks_ids": task_id}
533
+
534
+ try:
535
+ async with session.post(
536
+ self._status_url,
537
+ data=payload,
538
+ headers=headers
539
+ ) as response:
540
+ data = await response.json()
541
+
542
+ if data.get("code") == 200 and data.get("data"):
543
+ for item in data["data"]:
544
+ if str(item.get("task_id")) == str(task_id):
545
+ return item.get("status", "unknown")
546
+
547
+ return "unknown"
548
+
549
+ except Exception as e:
550
+ logger.error(f"Async status check failed: {e}")
551
+ return "error"
250
552
 
251
- async with session.post(
252
- self.SCRAPER_STATUS_URL, data=payload, headers=headers
253
- ) as response:
254
- data = await response.json()
255
- if data.get("code") == 200 and data.get("data"):
256
- for item in data["data"]:
257
- if str(item.get("task_id")) == str(task_id):
258
- return item["status"]
259
- return "Unknown"
260
-
261
- async def get_task_result(self, task_id: str, file_type: str = "json") -> str:
553
+ async def get_task_result(
554
+ self,
555
+ task_id: str,
556
+ file_type: str = "json"
557
+ ) -> str:
262
558
  """
263
- Get the download URL for a finished task.
559
+ Get download URL for completed task.
264
560
  """
561
+ self._require_public_credentials()
265
562
  session = self._get_session()
266
563
 
267
- headers = {
564
+ headers = build_public_api_headers(self.public_token, self.public_key)
565
+ payload = {"tasks_id": task_id, "type": file_type}
566
+
567
+ logger.info(f"Async getting result for Task: {task_id}")
568
+
569
+ try:
570
+ async with session.post(
571
+ self._download_url,
572
+ data=payload,
573
+ headers=headers
574
+ ) as response:
575
+ data = await response.json()
576
+ code = data.get("code")
577
+
578
+ if code == 200 and data.get("data"):
579
+ return data["data"]["download"]
580
+
581
+ msg = extract_error_message(data)
582
+ raise_for_code(
583
+ f"Get result failed: {msg}",
584
+ code=code,
585
+ payload=data
586
+ )
587
+
588
+ except aiohttp.ClientError as e:
589
+ raise ThordataNetworkError(
590
+ f"Get result failed: {e}",
591
+ original_error=e
592
+ )
593
+
594
+ async def wait_for_task(
595
+ self,
596
+ task_id: str,
597
+ *,
598
+ poll_interval: float = 5.0,
599
+ max_wait: float = 600.0,
600
+ ) -> str:
601
+ """
602
+ Wait for a task to complete.
603
+ """
604
+ elapsed = 0.0
605
+
606
+ while elapsed < max_wait:
607
+ status = await self.get_task_status(task_id)
608
+
609
+ logger.debug(f"Task {task_id} status: {status}")
610
+
611
+ terminal_statuses = {
612
+ "ready", "success", "finished",
613
+ "failed", "error", "cancelled"
614
+ }
615
+
616
+ if status.lower() in terminal_statuses:
617
+ return status
618
+
619
+ await asyncio.sleep(poll_interval)
620
+ elapsed += poll_interval
621
+
622
+ raise TimeoutError(
623
+ f"Task {task_id} did not complete within {max_wait} seconds"
624
+ )
625
+
626
+ # =========================================================================
627
+ # Location API Methods
628
+ # =========================================================================
629
+
630
+ async def list_countries(
631
+ self,
632
+ proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL
633
+ ) -> List[Dict[str, Any]]:
634
+ """List supported countries."""
635
+ return await self._get_locations(
636
+ "countries",
637
+ proxy_type=int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
638
+ )
639
+
640
+ async def list_states(
641
+ self,
642
+ country_code: str,
643
+ proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL
644
+ ) -> List[Dict[str, Any]]:
645
+ """List supported states for a country."""
646
+ return await self._get_locations(
647
+ "states",
648
+ proxy_type=int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type,
649
+ country_code=country_code
650
+ )
651
+
652
+ async def list_cities(
653
+ self,
654
+ country_code: str,
655
+ state_code: Optional[str] = None,
656
+ proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL
657
+ ) -> List[Dict[str, Any]]:
658
+ """List supported cities."""
659
+ kwargs = {
660
+ "proxy_type": int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type,
661
+ "country_code": country_code
662
+ }
663
+ if state_code:
664
+ kwargs["state_code"] = state_code
665
+
666
+ return await self._get_locations("cities", **kwargs)
667
+
668
+ async def list_asn(
669
+ self,
670
+ country_code: str,
671
+ proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL
672
+ ) -> List[Dict[str, Any]]:
673
+ """List supported ASNs."""
674
+ return await self._get_locations(
675
+ "asn",
676
+ proxy_type=int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type,
677
+ country_code=country_code
678
+ )
679
+
680
+ async def _get_locations(
681
+ self,
682
+ endpoint: str,
683
+ **kwargs: Any
684
+ ) -> List[Dict[str, Any]]:
685
+ """Internal async locations API call."""
686
+ self._require_public_credentials()
687
+
688
+ params = {
268
689
  "token": self.public_token,
269
690
  "key": self.public_key,
270
- "Content-Type": "application/x-www-form-urlencoded"
271
691
  }
272
- # Fixed: Use the file_type argument instead of hardcoding "json"
273
- payload = {"tasks_id": task_id, "type": file_type}
274
-
275
- async with session.post(
276
- self.SCRAPER_DOWNLOAD_URL, data=payload, headers=headers
277
- ) as response:
278
- data = await response.json()
279
- if data.get("code") == 200 and data.get("data"):
280
- return data["data"]["download"]
281
- raise Exception(f"Result Error: {data}")
692
+
693
+ for key, value in kwargs.items():
694
+ params[key] = str(value)
695
+
696
+ url = f"{self.LOCATIONS_URL}/{endpoint}"
697
+
698
+ logger.debug(f"Async Locations API: {url}")
699
+
700
+ # Create temporary session for this request (no proxy needed)
701
+ async with aiohttp.ClientSession() as temp_session:
702
+ async with temp_session.get(url, params=params) as response:
703
+ response.raise_for_status()
704
+ data = await response.json()
705
+
706
+ if isinstance(data, dict):
707
+ code = data.get("code")
708
+ if code is not None and code != 200:
709
+ msg = data.get("msg", "")
710
+ raise RuntimeError(
711
+ f"Locations API error ({endpoint}): code={code}, msg={msg}"
712
+ )
713
+ return data.get("data") or []
714
+
715
+ if isinstance(data, list):
716
+ return data
717
+
718
+ return []
719
+
720
+ # =========================================================================
721
+ # Helper Methods
722
+ # =========================================================================
723
+
724
+ def _require_public_credentials(self) -> None:
725
+ """Ensure public API credentials are available."""
726
+ if not self.public_token or not self.public_key:
727
+ raise ThordataConfigError(
728
+ "public_token and public_key are required for this operation. "
729
+ "Please provide them when initializing AsyncThordataClient."
730
+ )