thordata-sdk 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
thordata/client.py CHANGED
@@ -6,17 +6,17 @@ Thordata's proxy network, SERP API, Universal Scraping API, and Web Scraper API.
6
6
 
7
7
  Example:
8
8
  >>> from thordata import ThordataClient
9
- >>>
9
+ >>>
10
10
  >>> client = ThordataClient(
11
11
  ... scraper_token="your_token",
12
12
  ... public_token="your_public_token",
13
13
  ... public_key="your_public_key"
14
14
  ... )
15
- >>>
15
+ >>>
16
16
  >>> # Use the proxy network
17
17
  >>> response = client.get("https://httpbin.org/ip")
18
18
  >>> print(response.json())
19
- >>>
19
+ >>>
20
20
  >>> # Search with SERP API
21
21
  >>> results = client.serp_search("python tutorial", engine="google")
22
22
  """
@@ -24,9 +24,18 @@ Example:
24
24
  from __future__ import annotations
25
25
 
26
26
  import logging
27
- import requests
28
27
  from typing import Any, Dict, List, Optional, Union
29
28
 
29
+ import os
30
+ import requests
31
+
32
+ from ._utils import (
33
+ build_auth_headers,
34
+ build_public_api_headers,
35
+ decode_base64_image,
36
+ extract_error_message,
37
+ parse_json_response,
38
+ )
30
39
  from .enums import Engine, ProxyType
31
40
  from .exceptions import (
32
41
  ThordataConfigError,
@@ -37,18 +46,11 @@ from .exceptions import (
37
46
  from .models import (
38
47
  ProxyConfig,
39
48
  ProxyProduct,
49
+ ScraperTaskConfig,
40
50
  SerpRequest,
41
51
  UniversalScrapeRequest,
42
- ScraperTaskConfig,
43
52
  )
44
53
  from .retry import RetryConfig, with_retry
45
- from ._utils import (
46
- parse_json_response,
47
- decode_base64_image,
48
- build_auth_headers,
49
- build_public_api_headers,
50
- extract_error_message,
51
- )
52
54
 
53
55
  logger = logging.getLogger(__name__)
54
56
 
@@ -62,7 +64,7 @@ class ThordataClient:
62
64
  - SERP API (Real-time Search Engine Results)
63
65
  - Universal Scraping API (Web Unlocker - Single Page Rendering)
64
66
  - Web Scraper API (Async Task Management)
65
-
67
+
66
68
  Args:
67
69
  scraper_token: The API token from your Dashboard.
68
70
  public_token: The public API token (for task status, locations).
@@ -71,7 +73,7 @@ class ThordataClient:
71
73
  proxy_port: Custom proxy gateway port (optional).
72
74
  timeout: Default request timeout in seconds (default: 30).
73
75
  retry_config: Configuration for automatic retries (optional).
74
-
76
+
75
77
  Example:
76
78
  >>> client = ThordataClient(
77
79
  ... scraper_token="your_scraper_token",
@@ -95,41 +97,79 @@ class ThordataClient:
95
97
  proxy_port: int = 9999,
96
98
  timeout: int = 30,
97
99
  retry_config: Optional[RetryConfig] = None,
100
+ scraperapi_base_url: Optional[str] = None,
101
+ universalapi_base_url: Optional[str] = None,
102
+ web_scraper_api_base_url: Optional[str] = None,
103
+ locations_base_url: Optional[str] = None,
98
104
  ) -> None:
99
105
  """Initialize the Thordata Client."""
100
106
  if not scraper_token:
101
107
  raise ThordataConfigError("scraper_token is required")
102
-
108
+
103
109
  self.scraper_token = scraper_token
104
110
  self.public_token = public_token
105
111
  self.public_key = public_key
106
-
112
+
107
113
  # Proxy configuration
108
114
  self._proxy_host = proxy_host
109
115
  self._proxy_port = proxy_port
110
116
  self._default_timeout = timeout
111
-
117
+
112
118
  # Retry configuration
113
119
  self._retry_config = retry_config or RetryConfig()
114
-
120
+
115
121
  # Build default proxy URL (for basic usage)
116
122
  self._default_proxy_url = (
117
123
  f"http://td-customer-{self.scraper_token}:@{proxy_host}:{proxy_port}"
118
124
  )
119
-
120
- # Initialize session with default proxy settings
121
- self._session = requests.Session()
122
- self._session.proxies = {
125
+
126
+ # Sessions:
127
+ # - _proxy_session: used for proxy network traffic to target sites
128
+ # - _api_session: used for Thordata APIs (SERP/Universal/Tasks/Locations)
129
+ #
130
+ # We intentionally do NOT set session-level proxies for _api_session,
131
+ # so developers can rely on system proxy settings (e.g., Clash) via env vars.
132
+ self._proxy_session = requests.Session()
133
+ self._proxy_session.trust_env = False
134
+ self._proxy_session.proxies = {
123
135
  "http": self._default_proxy_url,
124
136
  "https": self._default_proxy_url,
125
137
  }
126
-
127
- # Store endpoint URLs
128
- self._serp_url = f"{self.BASE_URL}/request"
129
- self._universal_url = f"{self.UNIVERSAL_URL}/request"
130
- self._builder_url = f"{self.BASE_URL}/builder"
131
- self._status_url = f"{self.API_URL}/tasks-status"
132
- self._download_url = f"{self.API_URL}/tasks-download"
138
+
139
+ self._api_session = requests.Session()
140
+ self._api_session.trust_env = True
141
+
142
+ # Base URLs (allow override via args or env vars for testing and custom routing)
143
+ scraperapi_base = (
144
+ scraperapi_base_url
145
+ or os.getenv("THORDATA_SCRAPERAPI_BASE_URL")
146
+ or self.BASE_URL
147
+ ).rstrip("/")
148
+
149
+ universalapi_base = (
150
+ universalapi_base_url
151
+ or os.getenv("THORDATA_UNIVERSALAPI_BASE_URL")
152
+ or self.UNIVERSAL_URL
153
+ ).rstrip("/")
154
+
155
+ web_scraper_api_base = (
156
+ web_scraper_api_base_url
157
+ or os.getenv("THORDATA_WEB_SCRAPER_API_BASE_URL")
158
+ or self.API_URL
159
+ ).rstrip("/")
160
+
161
+ locations_base = (
162
+ locations_base_url
163
+ or os.getenv("THORDATA_LOCATIONS_BASE_URL")
164
+ or self.LOCATIONS_URL
165
+ ).rstrip("/")
166
+
167
+ self._serp_url = f"{scraperapi_base}/request"
168
+ self._builder_url = f"{scraperapi_base}/builder"
169
+ self._universal_url = f"{universalapi_base}/request"
170
+ self._status_url = f"{web_scraper_api_base}/tasks-status"
171
+ self._download_url = f"{web_scraper_api_base}/tasks-download"
172
+ self._locations_base_url = locations_base
133
173
 
134
174
  # =========================================================================
135
175
  # Proxy Network Methods
@@ -154,11 +194,11 @@ class ThordataClient:
154
194
 
155
195
  Returns:
156
196
  The response object.
157
-
197
+
158
198
  Example:
159
199
  >>> # Basic request
160
200
  >>> response = client.get("https://httpbin.org/ip")
161
- >>>
201
+ >>>
162
202
  >>> # With geo-targeting
163
203
  >>> from thordata.models import ProxyConfig
164
204
  >>> config = ProxyConfig(
@@ -170,13 +210,13 @@ class ThordataClient:
170
210
  >>> response = client.get("https://httpbin.org/ip", proxy_config=config)
171
211
  """
172
212
  logger.debug(f"Proxy GET request: {url}")
173
-
213
+
174
214
  timeout = timeout or self._default_timeout
175
-
215
+
176
216
  if proxy_config:
177
217
  proxies = proxy_config.to_proxies_dict()
178
218
  kwargs["proxies"] = proxies
179
-
219
+
180
220
  return self._request_with_retry("GET", url, timeout=timeout, **kwargs)
181
221
 
182
222
  def post(
@@ -200,13 +240,13 @@ class ThordataClient:
200
240
  The response object.
201
241
  """
202
242
  logger.debug(f"Proxy POST request: {url}")
203
-
243
+
204
244
  timeout = timeout or self._default_timeout
205
-
245
+
206
246
  if proxy_config:
207
247
  proxies = proxy_config.to_proxies_dict()
208
248
  kwargs["proxies"] = proxies
209
-
249
+
210
250
  return self._request_with_retry("POST", url, timeout=timeout, **kwargs)
211
251
 
212
252
  def build_proxy_url(
@@ -221,10 +261,10 @@ class ThordataClient:
221
261
  ) -> str:
222
262
  """
223
263
  Build a proxy URL with custom targeting options.
224
-
264
+
225
265
  This is a convenience method for creating proxy URLs without
226
266
  manually constructing a ProxyConfig.
227
-
267
+
228
268
  Args:
229
269
  country: Target country code (e.g., 'us', 'gb').
230
270
  state: Target state (e.g., 'california').
@@ -232,10 +272,10 @@ class ThordataClient:
232
272
  session_id: Session ID for sticky sessions.
233
273
  session_duration: Session duration in minutes (1-90).
234
274
  product: Proxy product type.
235
-
275
+
236
276
  Returns:
237
277
  The proxy URL string.
238
-
278
+
239
279
  Example:
240
280
  >>> url = client.build_proxy_url(country="us", city="seattle")
241
281
  >>> proxies = {"http": url, "https": url}
@@ -268,39 +308,51 @@ class ThordataClient:
268
308
  country: Optional[str] = None,
269
309
  language: Optional[str] = None,
270
310
  search_type: Optional[str] = None,
311
+ device: Optional[str] = None,
312
+ render_js: Optional[bool] = None,
313
+ no_cache: Optional[bool] = None,
314
+ output_format: str = "json",
271
315
  **kwargs: Any,
272
316
  ) -> Dict[str, Any]:
273
317
  """
274
318
  Execute a real-time SERP (Search Engine Results Page) search.
275
-
319
+
276
320
  Args:
277
321
  query: The search keywords.
278
322
  engine: Search engine (google, bing, yandex, duckduckgo, baidu).
279
323
  num: Number of results to retrieve (default: 10).
280
324
  country: Country code for localized results (e.g., 'us').
281
325
  language: Language code for interface (e.g., 'en').
282
- search_type: Type of search (images, news, shopping, videos).
326
+ search_type: Type of search (images, news, shopping, videos, etc.).
327
+ device: Device type ('desktop', 'mobile', 'tablet').
328
+ render_js: Enable JavaScript rendering in SERP (render_js=True).
329
+ no_cache: Disable internal caching (no_cache=True).
330
+ output_format: 'json' to return parsed JSON (default),
331
+ 'html' to return HTML wrapped in {'html': ...}.
283
332
  **kwargs: Additional engine-specific parameters.
284
333
 
285
334
  Returns:
286
- Parsed JSON results from the search.
287
-
335
+ Dict[str, Any]: Parsed JSON results or a dict with 'html' key.
336
+
288
337
  Example:
289
338
  >>> # Basic search
290
339
  >>> results = client.serp_search("python tutorial")
291
- >>>
340
+ >>>
292
341
  >>> # With options
293
342
  >>> results = client.serp_search(
294
343
  ... "laptop reviews",
295
344
  ... engine="google",
296
345
  ... num=20,
297
346
  ... country="us",
298
- ... search_type="shopping"
347
+ ... search_type="shopping",
348
+ ... device="mobile",
349
+ ... render_js=True,
350
+ ... no_cache=True,
299
351
  ... )
300
352
  """
301
353
  # Normalize engine
302
354
  engine_str = engine.value if isinstance(engine, Engine) else engine.lower()
303
-
355
+
304
356
  # Build request using model
305
357
  request = SerpRequest(
306
358
  query=query,
@@ -309,49 +361,69 @@ class ThordataClient:
309
361
  country=country,
310
362
  language=language,
311
363
  search_type=search_type,
364
+ device=device,
365
+ render_js=render_js,
366
+ no_cache=no_cache,
367
+ output_format=output_format,
312
368
  extra_params=kwargs,
313
369
  )
314
-
370
+
315
371
  payload = request.to_payload()
316
372
  headers = build_auth_headers(self.scraper_token)
317
-
373
+
318
374
  logger.info(f"SERP Search: {engine_str} - {query}")
319
-
375
+
320
376
  try:
321
- response = self._session.post(
377
+ response = self._api_session.post(
322
378
  self._serp_url,
323
379
  data=payload,
324
380
  headers=headers,
325
381
  timeout=60,
326
382
  )
327
383
  response.raise_for_status()
328
-
329
- data = response.json()
330
- return parse_json_response(data)
331
-
384
+
385
+ # JSON mode (default)
386
+ if output_format.lower() == "json":
387
+ data = response.json()
388
+
389
+ if isinstance(data, dict):
390
+ code = data.get("code")
391
+ if code is not None and code != 200:
392
+ msg = extract_error_message(data)
393
+ raise_for_code(
394
+ f"SERP API Error: {msg}",
395
+ code=code,
396
+ payload=data,
397
+ )
398
+
399
+ return parse_json_response(data)
400
+
401
+ # HTML mode: wrap as dict to keep return type stable
402
+ return {"html": response.text}
403
+
332
404
  except requests.Timeout as e:
333
405
  raise ThordataTimeoutError(
334
406
  f"SERP request timed out: {e}",
335
- original_error=e
407
+ original_error=e,
336
408
  )
337
409
  except requests.RequestException as e:
338
410
  raise ThordataNetworkError(
339
411
  f"SERP request failed: {e}",
340
- original_error=e
412
+ original_error=e,
341
413
  )
342
414
 
343
415
  def serp_search_advanced(self, request: SerpRequest) -> Dict[str, Any]:
344
416
  """
345
417
  Execute a SERP search using a SerpRequest object.
346
-
418
+
347
419
  This method provides full control over all search parameters.
348
-
420
+
349
421
  Args:
350
422
  request: A SerpRequest object with all parameters configured.
351
-
423
+
352
424
  Returns:
353
- Parsed JSON results.
354
-
425
+ Dict[str, Any]: Parsed JSON results or dict with 'html' key.
426
+
355
427
  Example:
356
428
  >>> from thordata.models import SerpRequest
357
429
  >>> request = SerpRequest(
@@ -368,30 +440,44 @@ class ThordataClient:
368
440
  """
369
441
  payload = request.to_payload()
370
442
  headers = build_auth_headers(self.scraper_token)
371
-
443
+
372
444
  logger.info(f"SERP Advanced Search: {request.engine} - {request.query}")
373
-
445
+
374
446
  try:
375
- response = self._session.post(
447
+ response = self._api_session.post(
376
448
  self._serp_url,
377
449
  data=payload,
378
450
  headers=headers,
379
451
  timeout=60,
380
452
  )
381
453
  response.raise_for_status()
382
-
383
- data = response.json()
384
- return parse_json_response(data)
385
-
454
+
455
+ if request.output_format.lower() == "json":
456
+ data = response.json()
457
+
458
+ if isinstance(data, dict):
459
+ code = data.get("code")
460
+ if code is not None and code != 200:
461
+ msg = extract_error_message(data)
462
+ raise_for_code(
463
+ f"SERP API Error: {msg}",
464
+ code=code,
465
+ payload=data,
466
+ )
467
+
468
+ return parse_json_response(data)
469
+
470
+ return {"html": response.text}
471
+
386
472
  except requests.Timeout as e:
387
473
  raise ThordataTimeoutError(
388
474
  f"SERP request timed out: {e}",
389
- original_error=e
475
+ original_error=e,
390
476
  )
391
477
  except requests.RequestException as e:
392
478
  raise ThordataNetworkError(
393
479
  f"SERP request failed: {e}",
394
- original_error=e
480
+ original_error=e,
395
481
  )
396
482
 
397
483
  # =========================================================================
@@ -412,7 +498,7 @@ class ThordataClient:
412
498
  ) -> Union[str, bytes]:
413
499
  """
414
500
  Scrape a URL using the Universal Scraping API (Web Unlocker).
415
-
501
+
416
502
  Automatically bypasses Cloudflare, CAPTCHAs, and antibot systems.
417
503
 
418
504
  Args:
@@ -427,11 +513,11 @@ class ThordataClient:
427
513
 
428
514
  Returns:
429
515
  HTML string or PNG bytes depending on output_format.
430
-
516
+
431
517
  Example:
432
518
  >>> # Get HTML
433
519
  >>> html = client.universal_scrape("https://example.com", js_render=True)
434
- >>>
520
+ >>>
435
521
  >>> # Get screenshot
436
522
  >>> png = client.universal_scrape(
437
523
  ... "https://example.com",
@@ -451,53 +537,50 @@ class ThordataClient:
451
537
  wait_for=wait_for,
452
538
  extra_params=kwargs,
453
539
  )
454
-
540
+
455
541
  return self.universal_scrape_advanced(request)
456
542
 
457
543
  def universal_scrape_advanced(
458
- self,
459
- request: UniversalScrapeRequest
544
+ self, request: UniversalScrapeRequest
460
545
  ) -> Union[str, bytes]:
461
546
  """
462
547
  Scrape using a UniversalScrapeRequest object for full control.
463
-
548
+
464
549
  Args:
465
550
  request: A UniversalScrapeRequest with all parameters.
466
-
551
+
467
552
  Returns:
468
553
  HTML string or PNG bytes.
469
554
  """
470
555
  payload = request.to_payload()
471
556
  headers = build_auth_headers(self.scraper_token)
472
-
473
- logger.info(f"Universal Scrape: {request.url} (format: {request.output_format})")
474
-
557
+
558
+ logger.info(
559
+ f"Universal Scrape: {request.url} (format: {request.output_format})"
560
+ )
561
+
475
562
  try:
476
- response = self._session.post(
563
+ response = self._api_session.post(
477
564
  self._universal_url,
478
565
  data=payload,
479
566
  headers=headers,
480
567
  timeout=60,
481
568
  )
482
569
  response.raise_for_status()
483
-
570
+
484
571
  return self._process_universal_response(response, request.output_format)
485
-
572
+
486
573
  except requests.Timeout as e:
487
574
  raise ThordataTimeoutError(
488
- f"Universal scrape timed out: {e}",
489
- original_error=e
575
+ f"Universal scrape timed out: {e}", original_error=e
490
576
  )
491
577
  except requests.RequestException as e:
492
578
  raise ThordataNetworkError(
493
- f"Universal scrape failed: {e}",
494
- original_error=e
579
+ f"Universal scrape failed: {e}", original_error=e
495
580
  )
496
581
 
497
582
  def _process_universal_response(
498
- self,
499
- response: requests.Response,
500
- output_format: str
583
+ self, response: requests.Response, output_format: str
501
584
  ) -> Union[str, bytes]:
502
585
  """Process the response from Universal API."""
503
586
  # Try to parse as JSON
@@ -508,26 +591,24 @@ class ThordataClient:
508
591
  if output_format.lower() == "png":
509
592
  return response.content
510
593
  return response.text
511
-
594
+
512
595
  # Check for API-level errors
513
596
  if isinstance(resp_json, dict):
514
597
  code = resp_json.get("code")
515
598
  if code is not None and code != 200:
516
599
  msg = extract_error_message(resp_json)
517
600
  raise_for_code(
518
- f"Universal API Error: {msg}",
519
- code=code,
520
- payload=resp_json
601
+ f"Universal API Error: {msg}", code=code, payload=resp_json
521
602
  )
522
-
603
+
523
604
  # Extract HTML
524
605
  if "html" in resp_json:
525
606
  return resp_json["html"]
526
-
607
+
527
608
  # Extract PNG
528
609
  if "png" in resp_json:
529
610
  return decode_base64_image(resp_json["png"])
530
-
611
+
531
612
  # Fallback
532
613
  return str(resp_json)
533
614
 
@@ -545,7 +626,7 @@ class ThordataClient:
545
626
  ) -> str:
546
627
  """
547
628
  Create an asynchronous Web Scraper task.
548
-
629
+
549
630
  Note: Get spider_id and spider_name from the Thordata Dashboard.
550
631
 
551
632
  Args:
@@ -557,7 +638,7 @@ class ThordataClient:
557
638
 
558
639
  Returns:
559
640
  The created task_id.
560
-
641
+
561
642
  Example:
562
643
  >>> task_id = client.create_scraper_task(
563
644
  ... file_name="youtube_data",
@@ -573,51 +654,44 @@ class ThordataClient:
573
654
  parameters=parameters,
574
655
  universal_params=universal_params,
575
656
  )
576
-
657
+
577
658
  return self.create_scraper_task_advanced(config)
578
659
 
579
660
  def create_scraper_task_advanced(self, config: ScraperTaskConfig) -> str:
580
661
  """
581
662
  Create a scraper task using a ScraperTaskConfig object.
582
-
663
+
583
664
  Args:
584
665
  config: Task configuration.
585
-
666
+
586
667
  Returns:
587
668
  The created task_id.
588
669
  """
589
670
  payload = config.to_payload()
590
671
  headers = build_auth_headers(self.scraper_token)
591
-
672
+
592
673
  logger.info(f"Creating Scraper Task: {config.spider_name}")
593
-
674
+
594
675
  try:
595
- response = self._session.post(
676
+ response = self._api_session.post(
596
677
  self._builder_url,
597
678
  data=payload,
598
679
  headers=headers,
599
680
  timeout=30,
600
681
  )
601
682
  response.raise_for_status()
602
-
683
+
603
684
  data = response.json()
604
685
  code = data.get("code")
605
-
686
+
606
687
  if code != 200:
607
688
  msg = extract_error_message(data)
608
- raise_for_code(
609
- f"Task creation failed: {msg}",
610
- code=code,
611
- payload=data
612
- )
613
-
689
+ raise_for_code(f"Task creation failed: {msg}", code=code, payload=data)
690
+
614
691
  return data["data"]["task_id"]
615
-
692
+
616
693
  except requests.RequestException as e:
617
- raise ThordataNetworkError(
618
- f"Task creation failed: {e}",
619
- original_error=e
620
- )
694
+ raise ThordataNetworkError(f"Task creation failed: {e}", original_error=e)
621
695
 
622
696
  def get_task_status(self, task_id: str) -> str:
623
697
  """
@@ -630,81 +704,69 @@ class ThordataClient:
630
704
  Status string (e.g., "running", "ready", "failed").
631
705
  """
632
706
  self._require_public_credentials()
633
-
634
- headers = build_public_api_headers(self.public_token, self.public_key)
707
+
708
+ headers = build_public_api_headers(
709
+ self.public_token or "", self.public_key or ""
710
+ )
635
711
  payload = {"tasks_ids": task_id}
636
-
712
+
637
713
  try:
638
- response = self._session.post(
714
+ response = self._api_session.post(
639
715
  self._status_url,
640
716
  data=payload,
641
717
  headers=headers,
642
718
  timeout=30,
643
719
  )
644
720
  response.raise_for_status()
645
-
721
+
646
722
  data = response.json()
647
-
723
+
648
724
  if data.get("code") == 200 and data.get("data"):
649
725
  for item in data["data"]:
650
726
  if str(item.get("task_id")) == str(task_id):
651
727
  return item.get("status", "unknown")
652
-
728
+
653
729
  return "unknown"
654
-
730
+
655
731
  except Exception as e:
656
732
  logger.error(f"Status check failed: {e}")
657
733
  return "error"
658
734
 
659
- def get_task_result(
660
- self,
661
- task_id: str,
662
- file_type: str = "json"
663
- ) -> str:
735
+ def get_task_result(self, task_id: str, file_type: str = "json") -> str:
664
736
  """
665
737
  Get the download URL for a completed task.
666
-
667
- Args:
668
- task_id: The task ID.
669
- file_type: Output format ("json", "csv", "xlsx").
670
-
671
- Returns:
672
- The download URL for the result file.
673
738
  """
674
739
  self._require_public_credentials()
675
-
676
- headers = build_public_api_headers(self.public_token, self.public_key)
740
+
741
+ headers = build_public_api_headers(
742
+ self.public_token or "", self.public_key or ""
743
+ )
677
744
  payload = {"tasks_id": task_id, "type": file_type}
678
-
745
+
679
746
  logger.info(f"Getting result URL for Task: {task_id}")
680
-
747
+
681
748
  try:
682
- response = self._session.post(
749
+ response = self._api_session.post(
683
750
  self._download_url,
684
751
  data=payload,
685
752
  headers=headers,
686
753
  timeout=30,
687
754
  )
688
755
  response.raise_for_status()
689
-
756
+
690
757
  data = response.json()
691
758
  code = data.get("code")
692
-
759
+
693
760
  if code == 200 and data.get("data"):
694
761
  return data["data"]["download"]
695
-
762
+
696
763
  msg = extract_error_message(data)
697
- raise_for_code(
698
- f"Get result failed: {msg}",
699
- code=code,
700
- payload=data
701
- )
702
-
764
+ raise_for_code(f"Get result failed: {msg}", code=code, payload=data)
765
+ # This line won't be reached, but satisfies mypy
766
+ raise RuntimeError("Unexpected state")
767
+
703
768
  except requests.RequestException as e:
704
- raise ThordataNetworkError(
705
- f"Get result failed: {e}",
706
- original_error=e
707
- )
769
+ raise ThordataNetworkError(f"Get result failed: {e}", original_error=e)
708
770
 
709
771
  def wait_for_task(
710
772
  self,
@@ -715,18 +777,18 @@ class ThordataClient:
715
777
  ) -> str:
716
778
  """
717
779
  Wait for a task to complete.
718
-
780
+
719
781
  Args:
720
782
  task_id: The task ID to wait for.
721
783
  poll_interval: Seconds between status checks.
722
784
  max_wait: Maximum seconds to wait.
723
-
785
+
724
786
  Returns:
725
787
  Final task status.
726
-
788
+
727
789
  Raises:
728
790
  TimeoutError: If max_wait is exceeded.
729
-
791
+
730
792
  Example:
731
793
  >>> task_id = client.create_scraper_task(...)
732
794
  >>> status = client.wait_for_task(task_id, max_wait=300)
@@ -734,36 +796,37 @@ class ThordataClient:
734
796
  ... url = client.get_task_result(task_id)
735
797
  """
736
798
  import time
737
-
799
+
738
800
  elapsed = 0.0
739
-
801
+
740
802
  while elapsed < max_wait:
741
803
  status = self.get_task_status(task_id)
742
-
804
+
743
805
  logger.debug(f"Task {task_id} status: {status}")
744
-
806
+
745
807
  terminal_statuses = {
746
- "ready", "success", "finished",
747
- "failed", "error", "cancelled"
808
+ "ready",
809
+ "success",
810
+ "finished",
811
+ "failed",
812
+ "error",
813
+ "cancelled",
748
814
  }
749
-
815
+
750
816
  if status.lower() in terminal_statuses:
751
817
  return status
752
-
818
+
753
819
  time.sleep(poll_interval)
754
820
  elapsed += poll_interval
755
-
756
- raise TimeoutError(
757
- f"Task {task_id} did not complete within {max_wait} seconds"
758
- )
821
+
822
+ raise TimeoutError(f"Task {task_id} did not complete within {max_wait} seconds")
759
823
 
760
824
  # =========================================================================
761
825
  # Location API Methods
762
826
  # =========================================================================
763
827
 
764
828
  def list_countries(
765
- self,
766
- proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL
829
+ self, proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL
767
830
  ) -> List[Dict[str, Any]]:
768
831
  """
769
832
  List supported countries for proxies.
@@ -776,13 +839,15 @@ class ThordataClient:
776
839
  """
777
840
  return self._get_locations(
778
841
  "countries",
779
- proxy_type=int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
842
+ proxy_type=(
843
+ int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
844
+ ),
780
845
  )
781
846
 
782
847
  def list_states(
783
848
  self,
784
849
  country_code: str,
785
- proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL
850
+ proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
786
851
  ) -> List[Dict[str, Any]]:
787
852
  """
788
853
  List supported states for a country.
@@ -796,15 +861,17 @@ class ThordataClient:
796
861
  """
797
862
  return self._get_locations(
798
863
  "states",
799
- proxy_type=int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type,
800
- country_code=country_code
864
+ proxy_type=(
865
+ int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
866
+ ),
867
+ country_code=country_code,
801
868
  )
802
869
 
803
870
  def list_cities(
804
871
  self,
805
872
  country_code: str,
806
873
  state_code: Optional[str] = None,
807
- proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL
874
+ proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
808
875
  ) -> List[Dict[str, Any]]:
809
876
  """
810
877
  List supported cities for a country/state.
@@ -818,18 +885,20 @@ class ThordataClient:
818
885
  List of city records.
819
886
  """
820
887
  kwargs = {
821
- "proxy_type": int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type,
822
- "country_code": country_code
888
+ "proxy_type": (
889
+ int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
890
+ ),
891
+ "country_code": country_code,
823
892
  }
824
893
  if state_code:
825
894
  kwargs["state_code"] = state_code
826
-
895
+
827
896
  return self._get_locations("cities", **kwargs)
828
897
 
829
898
  def list_asn(
830
899
  self,
831
900
  country_code: str,
832
- proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL
901
+ proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
833
902
  ) -> List[Dict[str, Any]]:
834
903
  """
835
904
  List supported ASNs for a country.
@@ -843,36 +912,34 @@ class ThordataClient:
843
912
  """
844
913
  return self._get_locations(
845
914
  "asn",
846
- proxy_type=int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type,
847
- country_code=country_code
915
+ proxy_type=(
916
+ int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
917
+ ),
918
+ country_code=country_code,
848
919
  )
849
920
 
850
- def _get_locations(
851
- self,
852
- endpoint: str,
853
- **kwargs: Any
854
- ) -> List[Dict[str, Any]]:
921
+ def _get_locations(self, endpoint: str, **kwargs: Any) -> List[Dict[str, Any]]:
855
922
  """Internal method to call locations API."""
856
923
  self._require_public_credentials()
857
-
924
+
858
925
  params = {
859
926
  "token": self.public_token,
860
927
  "key": self.public_key,
861
928
  }
862
-
929
+
863
930
  for key, value in kwargs.items():
864
931
  params[key] = str(value)
865
-
866
- url = f"{self.LOCATIONS_URL}/{endpoint}"
867
-
932
+
933
+ url = f"{self._locations_base_url}/{endpoint}"
934
+
868
935
  logger.debug(f"Locations API request: {url}")
869
-
936
+
870
937
  # Use requests.get directly (no proxy needed for this API)
871
- response = requests.get(url, params=params, timeout=30)
938
+ response = self._api_session.get(url, params=params, timeout=30)
872
939
  response.raise_for_status()
873
-
940
+
874
941
  data = response.json()
875
-
942
+
876
943
  if isinstance(data, dict):
877
944
  code = data.get("code")
878
945
  if code is not None and code != 200:
@@ -881,10 +948,10 @@ class ThordataClient:
881
948
  f"Locations API error ({endpoint}): code={code}, msg={msg}"
882
949
  )
883
950
  return data.get("data") or []
884
-
951
+
885
952
  if isinstance(data, list):
886
953
  return data
887
-
954
+
888
955
  return []
889
956
 
890
957
  # =========================================================================
@@ -900,37 +967,29 @@ class ThordataClient:
900
967
  )
901
968
 
902
969
  def _request_with_retry(
903
- self,
904
- method: str,
905
- url: str,
906
- **kwargs: Any
970
+ self, method: str, url: str, **kwargs: Any
907
971
  ) -> requests.Response:
908
972
  """Make a request with automatic retry."""
909
973
  kwargs.setdefault("timeout", self._default_timeout)
910
-
974
+
911
975
  @with_retry(self._retry_config)
912
976
  def _do_request() -> requests.Response:
913
- return self._session.request(method, url, **kwargs)
914
-
977
+ return self._proxy_session.request(method, url, **kwargs)
978
+
915
979
  try:
916
980
  return _do_request()
917
981
  except requests.Timeout as e:
918
- raise ThordataTimeoutError(
919
- f"Request timed out: {e}",
920
- original_error=e
921
- )
982
+ raise ThordataTimeoutError(f"Request timed out: {e}", original_error=e)
922
983
  except requests.RequestException as e:
923
- raise ThordataNetworkError(
924
- f"Request failed: {e}",
925
- original_error=e
926
- )
984
+ raise ThordataNetworkError(f"Request failed: {e}", original_error=e)
927
985
 
928
986
  def close(self) -> None:
929
987
  """Close the underlying session."""
930
- self._session.close()
988
+ self._proxy_session.close()
989
+ self._api_session.close()
931
990
 
932
- def __enter__(self) -> "ThordataClient":
991
+ def __enter__(self) -> ThordataClient:
933
992
  return self
934
993
 
935
994
  def __exit__(self, exc_type, exc_val, exc_tb) -> None:
936
- self.close()
995
+ self.close()