thordata-sdk 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
thordata/client.py CHANGED
@@ -69,32 +69,6 @@ logger = logging.getLogger(__name__)
69
69
 
70
70
 
71
71
  class ThordataClient:
72
- """
73
- The official synchronous Python client for Thordata.
74
-
75
- This client handles authentication and communication with:
76
- - Proxy Network (Residential/Datacenter/Mobile/ISP via HTTP/HTTPS)
77
- - SERP API (Real-time Search Engine Results)
78
- - Universal Scraping API (Web Unlocker - Single Page Rendering)
79
- - Web Scraper API (Async Task Management)
80
-
81
- Args:
82
- scraper_token: The API token from your Dashboard.
83
- public_token: The public API token (for task status, locations).
84
- public_key: The public API key.
85
- proxy_host: Custom proxy gateway host (optional).
86
- proxy_port: Custom proxy gateway port (optional).
87
- timeout: Default request timeout in seconds (default: 30).
88
- retry_config: Configuration for automatic retries (optional).
89
-
90
- Example:
91
- >>> client = ThordataClient(
92
- ... scraper_token="your_scraper_token",
93
- ... public_token="your_public_token",
94
- ... public_key="your_public_key"
95
- ... )
96
- """
97
-
98
72
  # API Endpoints
99
73
  BASE_URL = "https://scraperapi.thordata.com"
100
74
  UNIVERSAL_URL = "https://universalapi.thordata.com"
@@ -144,22 +118,21 @@ class ThordataClient:
144
118
  f"Invalid auth_mode: {auth_mode}. Must be 'bearer' or 'header_token'."
145
119
  )
146
120
 
147
- # NOTE:
148
- # - _proxy_session: used for proxy network traffic to target sites
149
- # - _api_session: used for Thordata APIs (SERP/Universal/Tasks/Locations)
150
- #
151
- # We intentionally do NOT set session-level proxies for _api_session,
152
- # so developers can rely on system proxy settings (e.g., Clash) via env vars.
121
+ # HTTP Sessions
153
122
  self._proxy_session = requests.Session()
154
123
  self._proxy_session.trust_env = False
155
124
 
125
+ # Cache for ProxyManagers (Connection Pooling Fix)
126
+ # Key: proxy_url (str), Value: urllib3.ProxyManager
127
+ self._proxy_managers: Dict[str, urllib3.ProxyManager] = {}
128
+
156
129
  self._api_session = requests.Session()
157
130
  self._api_session.trust_env = True
158
131
  self._api_session.headers.update(
159
132
  {"User-Agent": build_user_agent(_sdk_version, "requests")}
160
133
  )
161
134
 
162
- # Base URLs (allow override via args or env vars for testing and custom routing)
135
+ # Base URLs
163
136
  scraperapi_base = (
164
137
  scraperapi_base_url
165
138
  or os.getenv("THORDATA_SCRAPERAPI_BASE_URL")
@@ -184,15 +157,13 @@ class ThordataClient:
184
157
  or self.LOCATIONS_URL
185
158
  ).rstrip("/")
186
159
 
187
- # These URLs exist in your codebase; keep them for now (even if your org later migrates fully to openapi)
188
160
  gateway_base = os.getenv(
189
161
  "THORDATA_GATEWAY_BASE_URL", "https://api.thordata.com/api/gateway"
190
162
  )
191
- child_base = os.getenv(
163
+ self._gateway_base_url = gateway_base
164
+ self._child_base_url = os.getenv(
192
165
  "THORDATA_CHILD_BASE_URL", "https://api.thordata.com/api/child"
193
166
  )
194
- self._gateway_base_url = gateway_base
195
- self._child_base_url = child_base
196
167
 
197
168
  self._serp_url = f"{scraperapi_base}/request"
198
169
  self._builder_url = f"{scraperapi_base}/builder"
@@ -205,7 +176,6 @@ class ThordataClient:
205
176
 
206
177
  self._locations_base_url = locations_base
207
178
 
208
- # These 2 lines keep your existing behavior (derive account endpoints from locations_base)
209
179
  self._usage_stats_url = (
210
180
  f"{locations_base.replace('/locations', '')}/account/usage-statistics"
211
181
  )
@@ -225,7 +195,7 @@ class ThordataClient:
225
195
  self._proxy_expiration_url = f"{proxy_api_base}/proxy/expiration-time"
226
196
 
227
197
  # =========================================================================
228
- # Proxy Network Methods (Pure proxy network request functions)
198
+ # Proxy Network Methods
229
199
  # =========================================================================
230
200
  def get(
231
201
  self,
@@ -235,67 +205,8 @@ class ThordataClient:
235
205
  timeout: Optional[int] = None,
236
206
  **kwargs: Any,
237
207
  ) -> requests.Response:
238
- """
239
- Send a GET request through the Thordata Proxy Network.
240
-
241
- Args:
242
- url: The target URL.
243
- proxy_config: Custom proxy configuration for geo-targeting/sessions.
244
- timeout: Request timeout in seconds.
245
- **kwargs: Additional arguments to pass to requests.get().
246
-
247
- Returns:
248
- The response object.
249
-
250
- Example:
251
- >>> # Basic request
252
- >>> response = client.get("https://httpbin.org/ip")
253
- >>>
254
- >>> # With geo-targeting
255
- >>> from thordata.models import ProxyConfig
256
- >>> config = ProxyConfig(
257
- ... username="myuser",
258
- ... password="mypass",
259
- ... country="us",
260
- ... city="seattle"
261
- ... )
262
- >>> response = client.get("https://httpbin.org/ip", proxy_config=config)
263
- """
264
208
  logger.debug(f"Proxy GET request: {url}")
265
-
266
- timeout = timeout or self._default_timeout
267
-
268
- if proxy_config is None:
269
- proxy_config = self._get_default_proxy_config_from_env()
270
-
271
- if proxy_config is None:
272
- raise ThordataConfigError(
273
- "Proxy credentials are missing. "
274
- "Pass proxy_config=ProxyConfig(username=..., password=..., product=...) "
275
- "or set THORDATA_RESIDENTIAL_USERNAME/THORDATA_RESIDENTIAL_PASSWORD (or DATACENTER/MOBILE)."
276
- )
277
-
278
- kwargs["proxies"] = proxy_config.to_proxies_dict()
279
-
280
- @with_retry(self._retry_config)
281
- def _do() -> requests.Response:
282
- return self._proxy_request_with_proxy_manager(
283
- "GET",
284
- url,
285
- proxy_config=proxy_config,
286
- timeout=timeout,
287
- headers=kwargs.pop("headers", None),
288
- params=kwargs.pop("params", None),
289
- )
290
-
291
- try:
292
- return _do()
293
- except requests.Timeout as e:
294
- raise ThordataTimeoutError(
295
- f"Request timed out: {e}", original_error=e
296
- ) from e
297
- except Exception as e:
298
- raise ThordataNetworkError(f"Request failed: {e}", original_error=e) from e
209
+ return self._proxy_verb("GET", url, proxy_config, timeout, **kwargs)
299
210
 
300
211
  def post(
301
212
  self,
@@ -305,20 +216,17 @@ class ThordataClient:
305
216
  timeout: Optional[int] = None,
306
217
  **kwargs: Any,
307
218
  ) -> requests.Response:
308
- """
309
- Send a POST request through the Thordata Proxy Network.
310
-
311
- Args:
312
- url: The target URL.
313
- proxy_config: Custom proxy configuration.
314
- timeout: Request timeout in seconds.
315
- **kwargs: Additional arguments to pass to requests.post().
316
-
317
- Returns:
318
- The response object.
319
- """
320
219
  logger.debug(f"Proxy POST request: {url}")
220
+ return self._proxy_verb("POST", url, proxy_config, timeout, **kwargs)
321
221
 
222
+ def _proxy_verb(
223
+ self,
224
+ method: str,
225
+ url: str,
226
+ proxy_config: Optional[ProxyConfig],
227
+ timeout: Optional[int],
228
+ **kwargs: Any,
229
+ ) -> requests.Response:
322
230
  timeout = timeout or self._default_timeout
323
231
 
324
232
  if proxy_config is None:
@@ -327,19 +235,21 @@ class ThordataClient:
327
235
  if proxy_config is None:
328
236
  raise ThordataConfigError(
329
237
  "Proxy credentials are missing. "
330
- "Pass proxy_config=ProxyConfig(username=..., password=..., product=...) "
331
- "or set THORDATA_RESIDENTIAL_USERNAME/THORDATA_RESIDENTIAL_PASSWORD (or DATACENTER/MOBILE)."
238
+ "Pass proxy_config or set THORDATA_RESIDENTIAL_USERNAME/PASSWORD env vars."
332
239
  )
333
240
 
334
- kwargs["proxies"] = proxy_config.to_proxies_dict()
241
+ # For requests/urllib3, we don't need 'proxies' dict in kwargs
242
+ # because we use ProxyManager directly.
243
+ # But we remove it if user accidentally passed it to avoid confusion.
244
+ kwargs.pop("proxies", None)
335
245
 
336
246
  @with_retry(self._retry_config)
337
247
  def _do() -> requests.Response:
338
248
  return self._proxy_request_with_proxy_manager(
339
- "POST",
249
+ method,
340
250
  url,
341
- proxy_config=proxy_config,
342
- timeout=timeout,
251
+ proxy_config=proxy_config, # type: ignore
252
+ timeout=timeout, # type: ignore
343
253
  headers=kwargs.pop("headers", None),
344
254
  params=kwargs.pop("params", None),
345
255
  data=kwargs.pop("data", None),
@@ -356,8 +266,8 @@ class ThordataClient:
356
266
 
357
267
  def build_proxy_url(
358
268
  self,
359
- username: str, # Required
360
- password: str, # Required
269
+ username: str,
270
+ password: str,
361
271
  *,
362
272
  country: Optional[str] = None,
363
273
  state: Optional[str] = None,
@@ -366,28 +276,6 @@ class ThordataClient:
366
276
  session_duration: Optional[int] = None,
367
277
  product: Union[ProxyProduct, str] = ProxyProduct.RESIDENTIAL,
368
278
  ) -> str:
369
- """
370
- Build a proxy URL with custom targeting options.
371
-
372
- This is a convenience method for creating proxy URLs without
373
- manually constructing a ProxyConfig.
374
-
375
- Args:
376
- country: Target country code (e.g., 'us', 'gb').
377
- state: Target state (e.g., 'california').
378
- city: Target city (e.g., 'seattle').
379
- session_id: Session ID for sticky sessions.
380
- session_duration: Session duration in minutes (1-90).
381
- product: Proxy product type.
382
-
383
- Returns:
384
- The proxy URL string.
385
-
386
- Example:
387
- >>> url = client.build_proxy_url(country="us", city="seattle")
388
- >>> proxies = {"http": url, "https": url}
389
- >>> requests.get("https://example.com", proxies=proxies)
390
- """
391
279
  config = ProxyConfig(
392
280
  username=username,
393
281
  password=password,
@@ -403,7 +291,7 @@ class ThordataClient:
403
291
  return config.build_proxy_url()
404
292
 
405
293
  # =========================================================================
406
- # Internal API Request Retry Helper (For all API calls)
294
+ # Internal Request Helpers
407
295
  # =========================================================================
408
296
  def _api_request_with_retry(
409
297
  self,
@@ -414,8 +302,6 @@ class ThordataClient:
414
302
  headers: Optional[Dict[str, str]] = None,
415
303
  params: Optional[Dict[str, Any]] = None,
416
304
  ) -> requests.Response:
417
- """Make an API request with automatic retry on transient failures."""
418
-
419
305
  @with_retry(self._retry_config)
420
306
  def _do_request() -> requests.Response:
421
307
  return self._api_session.request(
@@ -438,8 +324,83 @@ class ThordataClient:
438
324
  f"API request failed: {e}", original_error=e
439
325
  ) from e
440
326
 
327
+ def _get_proxy_manager(self, proxy_url: str) -> urllib3.ProxyManager:
328
+ """Get or create a ProxyManager for the given proxy URL (Pooled)."""
329
+ if proxy_url not in self._proxy_managers:
330
+ # Create a new manager if not cached
331
+ proxy_ssl_context = None
332
+ if proxy_url.startswith("https://"):
333
+ proxy_ssl_context = ssl.create_default_context()
334
+
335
+ self._proxy_managers[proxy_url] = urllib3.ProxyManager(
336
+ proxy_url,
337
+ proxy_ssl_context=proxy_ssl_context,
338
+ num_pools=10, # Allow concurrency
339
+ maxsize=10,
340
+ )
341
+ return self._proxy_managers[proxy_url]
342
+
343
+ def _proxy_request_with_proxy_manager(
344
+ self,
345
+ method: str,
346
+ url: str,
347
+ *,
348
+ proxy_config: ProxyConfig,
349
+ timeout: int,
350
+ headers: Optional[Dict[str, str]] = None,
351
+ params: Optional[Dict[str, Any]] = None,
352
+ data: Any = None,
353
+ ) -> requests.Response:
354
+ # 1. Prepare URL and Body
355
+ req = requests.Request(method=method.upper(), url=url, params=params)
356
+ prepped = self._proxy_session.prepare_request(req)
357
+ final_url = prepped.url or url
358
+
359
+ # 2. Get Proxy Configuration
360
+ proxy_url = proxy_config.build_proxy_endpoint()
361
+ proxy_headers = urllib3.make_headers(
362
+ proxy_basic_auth=proxy_config.build_proxy_basic_auth()
363
+ )
364
+
365
+ # 3. Get Cached Proxy Manager
366
+ pm = self._get_proxy_manager(proxy_url)
367
+
368
+ # 4. Prepare Request Headers/Body
369
+ req_headers = dict(headers or {})
370
+ body = None
371
+ if data is not None:
372
+ if isinstance(data, dict):
373
+ body = urlencode({k: str(v) for k, v in data.items()})
374
+ req_headers.setdefault(
375
+ "Content-Type", "application/x-www-form-urlencoded"
376
+ )
377
+ else:
378
+ body = data
379
+
380
+ # 5. Execute Request via urllib3
381
+ http_resp = pm.request(
382
+ method.upper(),
383
+ final_url,
384
+ body=body,
385
+ headers=req_headers or None,
386
+ proxy_headers=proxy_headers, # Attach Auth here
387
+ timeout=urllib3.Timeout(connect=timeout, read=timeout),
388
+ retries=False, # We handle retries in _proxy_verb
389
+ preload_content=True,
390
+ )
391
+
392
+ # 6. Convert back to requests.Response
393
+ r = requests.Response()
394
+ r.status_code = int(getattr(http_resp, "status", 0) or 0)
395
+ r._content = http_resp.data or b""
396
+ r.url = final_url
397
+ r.headers = requests.structures.CaseInsensitiveDict(
398
+ dict(http_resp.headers or {})
399
+ )
400
+ return r
401
+
441
402
  # =========================================================================
442
- # SERP API Methods (Search Engine Results Page functions)
403
+ # SERP API Methods
443
404
  # =========================================================================
444
405
  def serp_search(
445
406
  self,
@@ -456,46 +417,8 @@ class ThordataClient:
456
417
  output_format: str = "json",
457
418
  **kwargs: Any,
458
419
  ) -> Dict[str, Any]:
459
- """
460
- Execute a real-time SERP (Search Engine Results Page) search.
461
-
462
- Args:
463
- query: The search keywords.
464
- engine: Search engine (google, bing, yandex, duckduckgo, baidu).
465
- num: Number of results to retrieve (default: 10).
466
- country: Country code for localized results (e.g., 'us').
467
- language: Language code for interface (e.g., 'en').
468
- search_type: Type of search (images, news, shopping, videos, etc.).
469
- device: Device type ('desktop', 'mobile', 'tablet').
470
- render_js: Enable JavaScript rendering in SERP (render_js=True).
471
- no_cache: Disable internal caching (no_cache=True).
472
- output_format: 'json' to return parsed JSON (default),
473
- 'html' to return HTML wrapped in {'html': ...}.
474
- **kwargs: Additional engine-specific parameters.
475
-
476
- Returns:
477
- Dict[str, Any]: Parsed JSON results or a dict with 'html' key.
478
-
479
- Example:
480
- >>> # Basic search
481
- >>> results = client.serp_search("python tutorial")
482
- >>>
483
- >>> # With options
484
- >>> results = client.serp_search(
485
- ... "laptop reviews",
486
- ... engine="google",
487
- ... num=20,
488
- ... country="us",
489
- ... search_type="shopping",
490
- ... device="mobile",
491
- ... render_js=True,
492
- ... no_cache=True,
493
- ... )
494
- """
495
- # Normalize engine
496
420
  engine_str = engine.value if isinstance(engine, Engine) else engine.lower()
497
421
 
498
- # Build request using model
499
422
  request = SerpRequest(
500
423
  query=query,
501
424
  engine=engine_str,
@@ -510,84 +433,13 @@ class ThordataClient:
510
433
  extra_params=kwargs,
511
434
  )
512
435
 
513
- payload = request.to_payload()
514
- headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
515
-
516
- logger.info(
517
- f"SERP Search: {engine_str} - {query[:50]}{'...' if len(query) > 50 else ''}"
518
- )
519
-
520
- try:
521
- response = self._api_request_with_retry(
522
- "POST",
523
- self._serp_url,
524
- data=payload,
525
- headers=headers,
526
- )
527
- response.raise_for_status()
528
-
529
- # JSON mode (default)
530
- if output_format.lower() == "json":
531
- data = response.json()
532
-
533
- if isinstance(data, dict):
534
- code = data.get("code")
535
- if code is not None and code != 200:
536
- msg = extract_error_message(data)
537
- raise_for_code(
538
- f"SERP API Error: {msg}",
539
- code=code,
540
- payload=data,
541
- )
542
-
543
- return parse_json_response(data)
544
-
545
- # HTML mode: wrap as dict to keep return type stable
546
- return {"html": response.text}
547
-
548
- except requests.Timeout as e:
549
- raise ThordataTimeoutError(
550
- f"SERP request timed out: {e}",
551
- original_error=e,
552
- ) from e
553
- except requests.RequestException as e:
554
- raise ThordataNetworkError(
555
- f"SERP request failed: {e}",
556
- original_error=e,
557
- ) from e
436
+ return self.serp_search_advanced(request)
558
437
 
559
438
  def serp_search_advanced(self, request: SerpRequest) -> Dict[str, Any]:
560
- """
561
- Execute a SERP search using a SerpRequest object.
562
-
563
- This method provides full control over all search parameters.
564
-
565
- Args:
566
- request: A SerpRequest object with all parameters configured.
567
-
568
- Returns:
569
- Dict[str, Any]: Parsed JSON results or dict with 'html' key.
570
-
571
- Example:
572
- >>> from thordata.models import SerpRequest
573
- >>> request = SerpRequest(
574
- ... query="python programming",
575
- ... engine="google",
576
- ... num=50,
577
- ... country="us",
578
- ... language="en",
579
- ... search_type="news",
580
- ... time_filter="week",
581
- ... safe_search=True
582
- ... )
583
- >>> results = client.serp_search_advanced(request)
584
- """
585
439
  payload = request.to_payload()
586
440
  headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
587
441
 
588
- logger.info(
589
- f"SERP Advanced Search: {request.engine} - {request.query[:50]}{'...' if len(request.query) > 50 else ''}"
590
- )
442
+ logger.info(f"SERP Advanced Search: {request.engine} - {request.query[:50]}")
591
443
 
592
444
  try:
593
445
  response = self._api_request_with_retry(
@@ -600,34 +452,22 @@ class ThordataClient:
600
452
 
601
453
  if request.output_format.lower() == "json":
602
454
  data = response.json()
603
-
604
455
  if isinstance(data, dict):
605
456
  code = data.get("code")
606
457
  if code is not None and code != 200:
607
458
  msg = extract_error_message(data)
608
- raise_for_code(
609
- f"SERP API Error: {msg}",
610
- code=code,
611
- payload=data,
612
- )
613
-
459
+ raise_for_code(f"SERP Error: {msg}", code=code, payload=data)
614
460
  return parse_json_response(data)
615
461
 
616
462
  return {"html": response.text}
617
463
 
618
464
  except requests.Timeout as e:
619
- raise ThordataTimeoutError(
620
- f"SERP request timed out: {e}",
621
- original_error=e,
622
- ) from e
465
+ raise ThordataTimeoutError(f"SERP timeout: {e}", original_error=e) from e
623
466
  except requests.RequestException as e:
624
- raise ThordataNetworkError(
625
- f"SERP request failed: {e}",
626
- original_error=e,
627
- ) from e
467
+ raise ThordataNetworkError(f"SERP failed: {e}", original_error=e) from e
628
468
 
629
469
  # =========================================================================
630
- # Universal Scraping API Methods (Web Unlocker functions)
470
+ # Universal Scraping API
631
471
  # =========================================================================
632
472
  def universal_scrape(
633
473
  self,
@@ -641,37 +481,6 @@ class ThordataClient:
641
481
  wait_for: Optional[str] = None,
642
482
  **kwargs: Any,
643
483
  ) -> Union[str, bytes]:
644
- """
645
- Scrape a URL using the Universal Scraping API (Web Unlocker).
646
-
647
- Automatically bypasses Cloudflare, CAPTCHAs, and antibot systems.
648
-
649
- Args:
650
- url: Target URL.
651
- js_render: Enable JavaScript rendering (headless browser).
652
- output_format: "html" or "png" (screenshot).
653
- country: Geo-targeting country code.
654
- block_resources: Resources to block (e.g., 'script,image').
655
- wait: Wait time in milliseconds after page load.
656
- wait_for: CSS selector to wait for.
657
- **kwargs: Additional parameters.
658
-
659
- Returns:
660
- HTML string or PNG bytes depending on output_format.
661
-
662
- Example:
663
- >>> # Get HTML
664
- >>> html = client.universal_scrape("https://example.com", js_render=True)
665
- >>>
666
- >>> # Get screenshot
667
- >>> png = client.universal_scrape(
668
- ... "https://example.com",
669
- ... js_render=True,
670
- ... output_format="png"
671
- ... )
672
- >>> with open("screenshot.png", "wb") as f:
673
- ... f.write(png)
674
- """
675
484
  request = UniversalScrapeRequest(
676
485
  url=url,
677
486
  js_render=js_render,
@@ -682,27 +491,15 @@ class ThordataClient:
682
491
  wait_for=wait_for,
683
492
  extra_params=kwargs,
684
493
  )
685
-
686
494
  return self.universal_scrape_advanced(request)
687
495
 
688
496
  def universal_scrape_advanced(
689
497
  self, request: UniversalScrapeRequest
690
498
  ) -> Union[str, bytes]:
691
- """
692
- Scrape using a UniversalScrapeRequest object for full control.
693
-
694
- Args:
695
- request: A UniversalScrapeRequest with all parameters.
696
-
697
- Returns:
698
- HTML string or PNG bytes.
699
- """
700
499
  payload = request.to_payload()
701
500
  headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
702
501
 
703
- logger.info(
704
- f"Universal Scrape: {request.url} (format: {request.output_format})"
705
- )
502
+ logger.info(f"Universal Scrape: {request.url}")
706
503
 
707
504
  try:
708
505
  response = self._api_request_with_retry(
@@ -712,53 +509,40 @@ class ThordataClient:
712
509
  headers=headers,
713
510
  )
714
511
  response.raise_for_status()
715
-
716
512
  return self._process_universal_response(response, request.output_format)
717
513
 
718
514
  except requests.Timeout as e:
719
515
  raise ThordataTimeoutError(
720
- f"Universal scrape timed out: {e}", original_error=e
516
+ f"Universal timeout: {e}", original_error=e
721
517
  ) from e
722
518
  except requests.RequestException as e:
723
519
  raise ThordataNetworkError(
724
- f"Universal scrape failed: {e}", original_error=e
520
+ f"Universal failed: {e}", original_error=e
725
521
  ) from e
726
522
 
727
523
  def _process_universal_response(
728
524
  self, response: requests.Response, output_format: str
729
525
  ) -> Union[str, bytes]:
730
- """Process the response from Universal API."""
731
- # Try to parse as JSON
732
526
  try:
733
527
  resp_json = response.json()
734
528
  except ValueError:
735
- # Raw content returned
736
- if output_format.lower() == "png":
737
- return response.content
738
- return response.text
529
+ return response.content if output_format.lower() == "png" else response.text
739
530
 
740
- # Check for API-level errors
741
531
  if isinstance(resp_json, dict):
742
532
  code = resp_json.get("code")
743
533
  if code is not None and code != 200:
744
534
  msg = extract_error_message(resp_json)
745
- raise_for_code(
746
- f"Universal API Error: {msg}", code=code, payload=resp_json
747
- )
535
+ raise_for_code(f"Universal Error: {msg}", code=code, payload=resp_json)
748
536
 
749
- # Extract HTML
750
537
  if "html" in resp_json:
751
538
  return resp_json["html"]
752
-
753
- # Extract PNG
754
539
  if "png" in resp_json:
755
540
  return decode_base64_image(resp_json["png"])
756
541
 
757
- # Fallback
758
542
  return str(resp_json)
759
543
 
760
544
  # =========================================================================
761
- # Web Scraper API Methods (Only async task management functions)
545
+ # Web Scraper API (Tasks)
762
546
  # =========================================================================
763
547
  def create_scraper_task(
764
548
  self,
@@ -768,29 +552,6 @@ class ThordataClient:
768
552
  parameters: Dict[str, Any],
769
553
  universal_params: Optional[Dict[str, Any]] = None,
770
554
  ) -> str:
771
- """
772
- Create an asynchronous Web Scraper task.
773
-
774
- Note: Get spider_id and spider_name from the Thordata Dashboard.
775
-
776
- Args:
777
- file_name: Name for the output file.
778
- spider_id: Spider identifier from Dashboard.
779
- spider_name: Spider name (e.g., "youtube.com").
780
- parameters: Spider-specific parameters.
781
- universal_params: Global spider settings.
782
-
783
- Returns:
784
- The created task_id.
785
-
786
- Example:
787
- >>> task_id = client.create_scraper_task(
788
- ... file_name="youtube_data",
789
- ... spider_id="youtube_video-post_by-url",
790
- ... spider_name="youtube.com",
791
- ... parameters={"url": "https://youtube.com/@channel/videos"}
792
- ... )
793
- """
794
555
  config = ScraperTaskConfig(
795
556
  file_name=file_name,
796
557
  spider_id=spider_id,
@@ -798,50 +559,26 @@ class ThordataClient:
798
559
  parameters=parameters,
799
560
  universal_params=universal_params,
800
561
  )
801
-
802
562
  return self.create_scraper_task_advanced(config)
803
563
 
804
564
  def create_scraper_task_advanced(self, config: ScraperTaskConfig) -> str:
805
- """
806
- Create a scraper task using a ScraperTaskConfig object.
807
-
808
- Args:
809
- config: Task configuration.
810
-
811
- Returns:
812
- The created task_id.
813
- """
814
565
  self._require_public_credentials()
815
-
816
566
  payload = config.to_payload()
817
-
818
- # Builder needs 3 headers: token, key, Authorization Bearer
819
567
  headers = build_builder_headers(
820
- self.scraper_token,
821
- self.public_token or "",
822
- self.public_key or "",
568
+ self.scraper_token, self.public_token or "", self.public_key or ""
823
569
  )
824
570
 
825
- logger.info(f"Creating Scraper Task: {config.spider_name}")
826
-
827
571
  try:
828
572
  response = self._api_request_with_retry(
829
- "POST",
830
- self._builder_url,
831
- data=payload,
832
- headers=headers,
573
+ "POST", self._builder_url, data=payload, headers=headers
833
574
  )
834
575
  response.raise_for_status()
835
-
836
576
  data = response.json()
837
- code = data.get("code")
838
-
839
- if code != 200:
840
- msg = extract_error_message(data)
841
- raise_for_code(f"Task creation failed: {msg}", code=code, payload=data)
842
-
577
+ if data.get("code") != 200:
578
+ raise_for_code(
579
+ "Task creation failed", code=data.get("code"), payload=data
580
+ )
843
581
  return data["data"]["task_id"]
844
-
845
582
  except requests.RequestException as e:
846
583
  raise ThordataNetworkError(
847
584
  f"Task creation failed: {e}", original_error=e
@@ -855,35 +592,6 @@ class ThordataClient:
855
592
  parameters: Dict[str, Any],
856
593
  common_settings: "CommonSettings",
857
594
  ) -> str:
858
- """
859
- Create a YouTube video/audio download task.
860
-
861
- Uses the /video_builder endpoint.
862
-
863
- Args:
864
- file_name: Output file name. Supports {{TasksID}}, {{VideoID}}.
865
- spider_id: Spider identifier (e.g., "youtube_video_by-url").
866
- spider_name: Spider name (typically "youtube.com").
867
- parameters: Spider parameters (e.g., {"url": "..."}).
868
- common_settings: Video/audio settings.
869
-
870
- Returns:
871
- The created task_id.
872
-
873
- Example:
874
- >>> from thordata import CommonSettings
875
- >>> task_id = client.create_video_task(
876
- ... file_name="{{VideoID}}",
877
- ... spider_id="youtube_video_by-url",
878
- ... spider_name="youtube.com",
879
- ... parameters={"url": "https://youtube.com/watch?v=xxx"},
880
- ... common_settings=CommonSettings(
881
- ... resolution="1080p",
882
- ... is_subtitles="true"
883
- ... )
884
- ... )
885
- """
886
-
887
595
  config = VideoTaskConfig(
888
596
  file_name=file_name,
889
597
  spider_id=spider_id,
@@ -891,210 +599,97 @@ class ThordataClient:
891
599
  parameters=parameters,
892
600
  common_settings=common_settings,
893
601
  )
894
-
895
602
  return self.create_video_task_advanced(config)
896
603
 
897
604
  def create_video_task_advanced(self, config: VideoTaskConfig) -> str:
898
- """
899
- Create a video task using VideoTaskConfig object.
900
-
901
- Args:
902
- config: Video task configuration.
903
-
904
- Returns:
905
- The created task_id.
906
- """
907
-
908
605
  self._require_public_credentials()
909
-
910
606
  payload = config.to_payload()
911
607
  headers = build_builder_headers(
912
- self.scraper_token,
913
- self.public_token or "",
914
- self.public_key or "",
608
+ self.scraper_token, self.public_token or "", self.public_key or ""
915
609
  )
916
610
 
917
- logger.info(f"Creating Video Task: {config.spider_name} - {config.spider_id}")
918
-
919
611
  response = self._api_request_with_retry(
920
- "POST",
921
- self._video_builder_url,
922
- data=payload,
923
- headers=headers,
612
+ "POST", self._video_builder_url, data=payload, headers=headers
924
613
  )
925
614
  response.raise_for_status()
926
-
927
615
  data = response.json()
928
- code = data.get("code")
929
-
930
- if code != 200:
931
- msg = extract_error_message(data)
616
+ if data.get("code") != 200:
932
617
  raise_for_code(
933
- f"Video task creation failed: {msg}", code=code, payload=data
618
+ "Video task creation failed", code=data.get("code"), payload=data
934
619
  )
935
-
936
620
  return data["data"]["task_id"]
937
621
 
938
622
  def get_task_status(self, task_id: str) -> str:
939
- """
940
- Check the status of an asynchronous scraping task.
941
-
942
- Returns:
943
- Status string (e.g., "running", "ready", "failed").
944
-
945
- Raises:
946
- ThordataConfigError: If public credentials are missing.
947
- ThordataAPIError: If API returns a non-200 code in JSON payload.
948
- ThordataNetworkError: If network/HTTP request fails.
949
- """
950
623
  self._require_public_credentials()
951
-
952
624
  headers = build_public_api_headers(
953
625
  self.public_token or "", self.public_key or ""
954
626
  )
955
- payload = {"tasks_ids": task_id}
956
-
957
627
  try:
958
628
  response = self._api_request_with_retry(
959
629
  "POST",
960
630
  self._status_url,
961
- data=payload,
631
+ data={"tasks_ids": task_id},
962
632
  headers=headers,
963
633
  )
964
634
  response.raise_for_status()
965
635
  data = response.json()
966
-
967
- if isinstance(data, dict):
968
- code = data.get("code")
969
- if code is not None and code != 200:
970
- msg = extract_error_message(data)
971
- raise_for_code(
972
- f"Task status API Error: {msg}",
973
- code=code,
974
- payload=data,
975
- )
976
-
977
- items = data.get("data") or []
978
- for item in items:
979
- if str(item.get("task_id")) == str(task_id):
980
- return item.get("status", "unknown")
981
-
982
- return "unknown"
983
-
984
- # Unexpected payload type
985
- raise ThordataNetworkError(
986
- f"Unexpected task status response type: {type(data).__name__}",
987
- original_error=None,
988
- )
989
-
990
- except requests.Timeout as e:
991
- raise ThordataTimeoutError(
992
- f"Status check timed out: {e}", original_error=e
993
- ) from e
636
+ if data.get("code") != 200:
637
+ raise_for_code("Task status error", code=data.get("code"), payload=data)
638
+
639
+ items = data.get("data") or []
640
+ for item in items:
641
+ if str(item.get("task_id")) == str(task_id):
642
+ return item.get("status", "unknown")
643
+ return "unknown"
994
644
  except requests.RequestException as e:
995
645
  raise ThordataNetworkError(
996
646
  f"Status check failed: {e}", original_error=e
997
647
  ) from e
998
648
 
999
649
  def safe_get_task_status(self, task_id: str) -> str:
1000
- """
1001
- Backward-compatible status check.
1002
-
1003
- Returns:
1004
- Status string, or "error" on any exception.
1005
- """
1006
650
  try:
1007
651
  return self.get_task_status(task_id)
1008
652
  except Exception:
1009
653
  return "error"
1010
654
 
1011
655
  def get_task_result(self, task_id: str, file_type: str = "json") -> str:
1012
- """
1013
- Get the download URL for a completed task.
1014
- """
1015
656
  self._require_public_credentials()
1016
-
1017
657
  headers = build_public_api_headers(
1018
658
  self.public_token or "", self.public_key or ""
1019
659
  )
1020
- payload = {"tasks_id": task_id, "type": file_type}
1021
-
1022
- logger.info(f"Getting result URL for Task: {task_id}")
1023
-
1024
660
  try:
1025
661
  response = self._api_request_with_retry(
1026
662
  "POST",
1027
663
  self._download_url,
1028
- data=payload,
664
+ data={"tasks_id": task_id, "type": file_type},
1029
665
  headers=headers,
1030
666
  )
1031
667
  response.raise_for_status()
1032
-
1033
668
  data = response.json()
1034
- code = data.get("code")
1035
-
1036
- if code == 200 and data.get("data"):
669
+ if data.get("code") == 200 and data.get("data"):
1037
670
  return data["data"]["download"]
1038
-
1039
- msg = extract_error_message(data)
1040
- raise_for_code(f"Get result failed: {msg}", code=code, payload=data)
1041
- # This line won't be reached, but satisfies mypy
1042
- raise RuntimeError("Unexpected state")
1043
-
671
+ raise_for_code("Get result failed", code=data.get("code"), payload=data)
672
+ return ""
1044
673
  except requests.RequestException as e:
1045
674
  raise ThordataNetworkError(
1046
675
  f"Get result failed: {e}", original_error=e
1047
676
  ) from e
1048
677
 
1049
- def list_tasks(
1050
- self,
1051
- page: int = 1,
1052
- size: int = 20,
1053
- ) -> Dict[str, Any]:
1054
- """
1055
- List all Web Scraper tasks.
1056
-
1057
- Args:
1058
- page: Page number (starts from 1).
1059
- size: Number of tasks per page.
1060
-
1061
- Returns:
1062
- Dict containing 'count' and 'list' of tasks.
1063
-
1064
- Example:
1065
- >>> result = client.list_tasks(page=1, size=10)
1066
- >>> print(f"Total tasks: {result['count']}")
1067
- >>> for task in result['list']:
1068
- ... print(f"Task {task['task_id']}: {task['status']}")
1069
- """
678
+ def list_tasks(self, page: int = 1, size: int = 20) -> Dict[str, Any]:
1070
679
  self._require_public_credentials()
1071
-
1072
680
  headers = build_public_api_headers(
1073
681
  self.public_token or "", self.public_key or ""
1074
682
  )
1075
- payload: Dict[str, Any] = {}
1076
- if page:
1077
- payload["page"] = str(page)
1078
- if size:
1079
- payload["size"] = str(size)
1080
-
1081
- logger.info(f"Listing tasks: page={page}, size={size}")
1082
-
1083
683
  response = self._api_request_with_retry(
1084
684
  "POST",
1085
685
  self._list_url,
1086
- data=payload,
686
+ data={"page": str(page), "size": str(size)},
1087
687
  headers=headers,
1088
688
  )
1089
689
  response.raise_for_status()
1090
-
1091
690
  data = response.json()
1092
- code = data.get("code")
1093
-
1094
- if code != 200:
1095
- msg = extract_error_message(data)
1096
- raise_for_code(f"List tasks failed: {msg}", code=code, payload=data)
1097
-
691
+ if data.get("code") != 200:
692
+ raise_for_code("List tasks failed", code=data.get("code"), payload=data)
1098
693
  return data.get("data", {"count": 0, "list": []})
1099
694
 
1100
695
  def wait_for_task(
@@ -1104,84 +699,32 @@ class ThordataClient:
1104
699
  poll_interval: float = 5.0,
1105
700
  max_wait: float = 600.0,
1106
701
  ) -> str:
1107
- """
1108
- Wait for a task to complete.
1109
-
1110
- Args:
1111
- task_id: The task ID to wait for.
1112
- poll_interval: Seconds between status checks.
1113
- max_wait: Maximum seconds to wait.
1114
-
1115
- Returns:
1116
- Final task status.
1117
-
1118
- Raises:
1119
- TimeoutError: If max_wait is exceeded.
1120
-
1121
- Example:
1122
- >>> task_id = client.create_scraper_task(...)
1123
- >>> status = client.wait_for_task(task_id, max_wait=300)
1124
- >>> if status in ("ready", "success"):
1125
- ... url = client.get_task_result(task_id)
1126
- """
1127
702
  import time
1128
703
 
1129
704
  start = time.monotonic()
1130
-
1131
705
  while (time.monotonic() - start) < max_wait:
1132
706
  status = self.get_task_status(task_id)
1133
-
1134
- logger.debug(f"Task {task_id} status: {status}")
1135
-
1136
- terminal_statuses = {
707
+ if status.lower() in {
1137
708
  "ready",
1138
709
  "success",
1139
710
  "finished",
1140
711
  "failed",
1141
712
  "error",
1142
713
  "cancelled",
1143
- }
1144
-
1145
- if status.lower() in terminal_statuses:
714
+ }:
1146
715
  return status
1147
-
1148
716
  time.sleep(poll_interval)
1149
-
1150
- raise TimeoutError(f"Task {task_id} did not complete within {max_wait} seconds")
717
+ raise TimeoutError(f"Task {task_id} timeout")
1151
718
 
1152
719
  # =========================================================================
1153
- # Proxy Account Management Methods (Proxy balance, user, whitelist functions)
720
+ # Account / Locations / Utils
1154
721
  # =========================================================================
1155
722
  def get_usage_statistics(
1156
723
  self,
1157
724
  from_date: Union[str, date],
1158
725
  to_date: Union[str, date],
1159
726
  ) -> UsageStatistics:
1160
- """
1161
- Get account usage statistics for a date range.
1162
-
1163
- Args:
1164
- from_date: Start date (YYYY-MM-DD string or date object).
1165
- to_date: End date (YYYY-MM-DD string or date object).
1166
-
1167
- Returns:
1168
- UsageStatistics object with traffic data.
1169
-
1170
- Raises:
1171
- ValueError: If date range exceeds 180 days.
1172
-
1173
- Example:
1174
- >>> from datetime import date, timedelta
1175
- >>> today = date.today()
1176
- >>> week_ago = today - timedelta(days=7)
1177
- >>> stats = client.get_usage_statistics(week_ago, today)
1178
- >>> print(f"Used: {stats.range_usage_gb():.2f} GB")
1179
- >>> print(f"Balance: {stats.balance_gb():.2f} GB")
1180
- """
1181
-
1182
727
  self._require_public_credentials()
1183
-
1184
- # Convert dates to strings
1185
728
  if isinstance(from_date, date):
1186
729
  from_date = from_date.strftime("%Y-%m-%d")
1187
730
  if isinstance(to_date, date):
@@ -1193,151 +736,33 @@ class ThordataClient:
1193
736
  "from_date": from_date,
1194
737
  "to_date": to_date,
1195
738
  }
1196
-
1197
- logger.info(f"Getting usage statistics: {from_date} to {to_date}")
1198
-
1199
- response = self._api_request_with_retry(
1200
- "GET",
1201
- self._usage_stats_url,
1202
- params=params,
1203
- )
1204
- response.raise_for_status()
1205
-
1206
- data = response.json()
1207
-
1208
- if isinstance(data, dict):
1209
- code = data.get("code")
1210
- if code is not None and code != 200:
1211
- msg = extract_error_message(data)
1212
- raise_for_code(
1213
- f"Usage statistics error: {msg}",
1214
- code=code,
1215
- payload=data,
1216
- )
1217
-
1218
- # Extract data field
1219
- usage_data = data.get("data", data)
1220
- return UsageStatistics.from_dict(usage_data)
1221
-
1222
- raise ThordataNetworkError(
1223
- f"Unexpected usage statistics response: {type(data).__name__}",
1224
- original_error=None,
1225
- )
1226
-
1227
- def get_residential_balance(self) -> Dict[str, Any]:
1228
- """
1229
- Get residential proxy balance.
1230
-
1231
- Uses public_token/public_key (Dashboard -> My account -> API).
1232
- """
1233
- headers = self._build_gateway_headers()
1234
-
1235
- logger.info("Getting residential proxy balance")
1236
-
1237
- response = self._api_request_with_retry(
1238
- "POST",
1239
- f"{self._gateway_base_url}/getFlowBalance",
1240
- headers=headers,
1241
- data={},
1242
- )
1243
- response.raise_for_status()
1244
-
1245
- data = response.json()
1246
- code = data.get("code")
1247
-
1248
- if code != 200:
1249
- msg = extract_error_message(data)
1250
- raise_for_code(f"Get balance failed: {msg}", code=code, payload=data)
1251
-
1252
- return data.get("data", {})
1253
-
1254
- def get_residential_usage(
1255
- self,
1256
- start_time: Union[str, int],
1257
- end_time: Union[str, int],
1258
- ) -> Dict[str, Any]:
1259
- """
1260
- Get residential proxy usage records.
1261
-
1262
- Uses public_token/public_key (Dashboard -> My account -> API).
1263
- """
1264
- headers = self._build_gateway_headers()
1265
- payload = {"start_time": str(start_time), "end_time": str(end_time)}
1266
-
1267
- logger.info(f"Getting residential usage: {start_time} to {end_time}")
1268
-
1269
739
  response = self._api_request_with_retry(
1270
- "POST",
1271
- f"{self._gateway_base_url}/usageRecord",
1272
- headers=headers,
1273
- data=payload,
740
+ "GET", self._usage_stats_url, params=params
1274
741
  )
1275
742
  response.raise_for_status()
1276
-
1277
743
  data = response.json()
1278
- code = data.get("code")
1279
-
1280
- if code != 200:
1281
- msg = extract_error_message(data)
1282
- raise_for_code(f"Get usage failed: {msg}", code=code, payload=data)
1283
-
1284
- return data.get("data", {})
744
+ if data.get("code") != 200:
745
+ raise_for_code("Usage stats error", code=data.get("code"), payload=data)
746
+ return UsageStatistics.from_dict(data.get("data", data))
1285
747
 
1286
748
  def list_proxy_users(
1287
749
  self, proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL
1288
750
  ) -> ProxyUserList:
1289
- """
1290
- List all proxy users (sub-accounts).
1291
-
1292
- Args:
1293
- proxy_type: Proxy type (1=Residential, 2=Unlimited).
1294
-
1295
- Returns:
1296
- ProxyUserList with user details.
1297
-
1298
- Example:
1299
- >>> users = client.list_proxy_users(proxy_type=ProxyType.RESIDENTIAL)
1300
- >>> print(f"Total users: {users.user_count}")
1301
- >>> for user in users.users:
1302
- ... print(f"{user.username}: {user.usage_gb():.2f} GB used")
1303
- """
1304
-
1305
751
  self._require_public_credentials()
1306
-
752
+ pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1307
753
  params = {
1308
754
  "token": self.public_token,
1309
755
  "key": self.public_key,
1310
- "proxy_type": str(
1311
- int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1312
- ),
756
+ "proxy_type": str(pt),
1313
757
  }
1314
-
1315
- logger.info(f"Listing proxy users: type={params['proxy_type']}")
1316
-
1317
758
  response = self._api_request_with_retry(
1318
- "GET",
1319
- f"{self._proxy_users_url}/user-list",
1320
- params=params,
759
+ "GET", f"{self._proxy_users_url}/user-list", params=params
1321
760
  )
1322
761
  response.raise_for_status()
1323
-
1324
762
  data = response.json()
1325
-
1326
- if isinstance(data, dict):
1327
- code = data.get("code")
1328
- if code is not None and code != 200:
1329
- msg = extract_error_message(data)
1330
- raise_for_code(
1331
- f"List proxy users error: {msg}", code=code, payload=data
1332
- )
1333
-
1334
- user_data = data.get("data", data)
1335
- return ProxyUserList.from_dict(user_data)
1336
-
1337
- raise ThordataNetworkError(
1338
- f"Unexpected proxy users response: {type(data).__name__}",
1339
- original_error=None,
1340
- )
763
+ if data.get("code") != 200:
764
+ raise_for_code("List users error", code=data.get("code"), payload=data)
765
+ return ProxyUserList.from_dict(data.get("data", data))
1341
766
 
1342
767
  def create_proxy_user(
1343
768
  self,
@@ -1347,45 +772,18 @@ class ThordataClient:
1347
772
  traffic_limit: int = 0,
1348
773
  status: bool = True,
1349
774
  ) -> Dict[str, Any]:
1350
- """
1351
- Create a new proxy user (sub-account).
1352
-
1353
- Args:
1354
- username: Username for the new user.
1355
- password: Password for the new user.
1356
- proxy_type: Proxy type (1=Residential, 2=Unlimited).
1357
- traffic_limit: Traffic limit in MB (0 = unlimited, min 100).
1358
- status: Enable/disable user (True/False).
1359
-
1360
- Returns:
1361
- API response data.
1362
-
1363
- Example:
1364
- >>> result = client.create_proxy_user(
1365
- ... username="subuser1",
1366
- ... password="securepass",
1367
- ... traffic_limit=5120, # 5GB
1368
- ... status=True
1369
- ... )
1370
- """
1371
775
  self._require_public_credentials()
1372
-
776
+ pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1373
777
  headers = build_public_api_headers(
1374
778
  self.public_token or "", self.public_key or ""
1375
779
  )
1376
-
1377
780
  payload = {
1378
- "proxy_type": str(
1379
- int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1380
- ),
781
+ "proxy_type": str(pt),
1381
782
  "username": username,
1382
783
  "password": password,
1383
784
  "traffic_limit": str(traffic_limit),
1384
785
  "status": "true" if status else "false",
1385
786
  }
1386
-
1387
- logger.info(f"Creating proxy user: {username}")
1388
-
1389
787
  response = self._api_request_with_retry(
1390
788
  "POST",
1391
789
  f"{self._proxy_users_url}/create-user",
@@ -1393,14 +791,9 @@ class ThordataClient:
1393
791
  headers=headers,
1394
792
  )
1395
793
  response.raise_for_status()
1396
-
1397
794
  data = response.json()
1398
- code = data.get("code")
1399
-
1400
- if code != 200:
1401
- msg = extract_error_message(data)
1402
- raise_for_code(f"Create proxy user failed: {msg}", code=code, payload=data)
1403
-
795
+ if data.get("code") != 200:
796
+ raise_for_code("Create user failed", code=data.get("code"), payload=data)
1404
797
  return data.get("data", {})
1405
798
 
1406
799
  def add_whitelist_ip(
@@ -1409,296 +802,86 @@ class ThordataClient:
1409
802
  proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
1410
803
  status: bool = True,
1411
804
  ) -> Dict[str, Any]:
1412
- """
1413
- Add an IP to the whitelist for IP authentication.
1414
-
1415
- Args:
1416
- ip: IP address to whitelist.
1417
- proxy_type: Proxy type (1=Residential, 2=Unlimited, 9=Mobile).
1418
- status: Enable/disable the IP (True/False).
1419
-
1420
- Returns:
1421
- API response data.
1422
-
1423
- Example:
1424
- >>> result = client.add_whitelist_ip(
1425
- ... ip="123.45.67.89",
1426
- ... proxy_type=ProxyType.RESIDENTIAL,
1427
- ... status=True
1428
- ... )
1429
- """
1430
805
  self._require_public_credentials()
1431
-
806
+ pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1432
807
  headers = build_public_api_headers(
1433
808
  self.public_token or "", self.public_key or ""
1434
809
  )
1435
-
1436
- # Convert ProxyType to int
1437
- proxy_type_int = (
1438
- int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1439
- )
1440
-
1441
810
  payload = {
1442
- "proxy_type": str(proxy_type_int),
811
+ "proxy_type": str(pt),
1443
812
  "ip": ip,
1444
813
  "status": "true" if status else "false",
1445
814
  }
1446
-
1447
- logger.info(f"Adding whitelist IP: {ip}")
1448
-
1449
815
  response = self._api_request_with_retry(
1450
- "POST",
1451
- f"{self._whitelist_url}/add-ip",
1452
- data=payload,
1453
- headers=headers,
816
+ "POST", f"{self._whitelist_url}/add-ip", data=payload, headers=headers
1454
817
  )
1455
818
  response.raise_for_status()
1456
-
1457
819
  data = response.json()
1458
- code = data.get("code")
1459
-
1460
- if code != 200:
1461
- msg = extract_error_message(data)
1462
- raise_for_code(f"Add whitelist IP failed: {msg}", code=code, payload=data)
1463
-
820
+ if data.get("code") != 200:
821
+ raise_for_code(
822
+ "Add whitelist IP failed", code=data.get("code"), payload=data
823
+ )
1464
824
  return data.get("data", {})
1465
825
 
1466
- def list_proxy_servers(
1467
- self,
1468
- proxy_type: int,
1469
- ) -> List[ProxyServer]:
1470
- """
1471
- List ISP or Datacenter proxy servers.
1472
-
1473
- Args:
1474
- proxy_type: Proxy type (1=ISP, 2=Datacenter).
1475
-
1476
- Returns:
1477
- List of ProxyServer objects.
1478
-
1479
- Example:
1480
- >>> servers = client.list_proxy_servers(proxy_type=1) # ISP proxies
1481
- >>> for server in servers:
1482
- ... print(f"{server.ip}:{server.port} - expires: {server.expiration_time}")
1483
- """
1484
-
826
+ def list_proxy_servers(self, proxy_type: int) -> List[ProxyServer]:
1485
827
  self._require_public_credentials()
1486
-
1487
828
  params = {
1488
829
  "token": self.public_token,
1489
830
  "key": self.public_key,
1490
831
  "proxy_type": str(proxy_type),
1491
832
  }
1492
-
1493
- logger.info(f"Listing proxy servers: type={proxy_type}")
1494
-
1495
833
  response = self._api_request_with_retry(
1496
- "GET",
1497
- self._proxy_list_url,
1498
- params=params,
834
+ "GET", self._proxy_list_url, params=params
1499
835
  )
1500
836
  response.raise_for_status()
1501
-
1502
837
  data = response.json()
838
+ if data.get("code") != 200:
839
+ raise_for_code(
840
+ "List proxy servers error", code=data.get("code"), payload=data
841
+ )
1503
842
 
843
+ server_list = []
1504
844
  if isinstance(data, dict):
1505
- code = data.get("code")
1506
- if code is not None and code != 200:
1507
- msg = extract_error_message(data)
1508
- raise_for_code(
1509
- f"List proxy servers error: {msg}", code=code, payload=data
1510
- )
1511
-
1512
- # Extract list from data field
1513
845
  server_list = data.get("data", data.get("list", []))
1514
846
  elif isinstance(data, list):
1515
847
  server_list = data
1516
- else:
1517
- raise ThordataNetworkError(
1518
- f"Unexpected proxy list response: {type(data).__name__}",
1519
- original_error=None,
1520
- )
1521
848
 
1522
849
  return [ProxyServer.from_dict(s) for s in server_list]
1523
850
 
1524
- def get_isp_regions(self) -> List[Dict[str, Any]]:
1525
- """
1526
- Get available ISP proxy regions.
1527
-
1528
- Uses public_token/public_key (Dashboard -> My account -> API).
1529
- """
1530
- headers = self._build_gateway_headers()
1531
-
1532
- logger.info("Getting ISP regions")
1533
-
1534
- response = self._api_request_with_retry(
1535
- "POST",
1536
- f"{self._gateway_base_url}/getRegionIsp",
1537
- headers=headers,
1538
- data={},
1539
- )
1540
- response.raise_for_status()
1541
-
1542
- data = response.json()
1543
- code = data.get("code")
1544
-
1545
- if code != 200:
1546
- msg = extract_error_message(data)
1547
- raise_for_code(f"Get ISP regions failed: {msg}", code=code, payload=data)
1548
-
1549
- return data.get("data", [])
1550
-
1551
- def list_isp_proxies(self) -> List[Dict[str, Any]]:
1552
- """
1553
- List ISP proxies.
1554
-
1555
- Uses public_token/public_key (Dashboard -> My account -> API).
1556
- """
1557
- headers = self._build_gateway_headers()
1558
-
1559
- logger.info("Listing ISP proxies")
1560
-
1561
- response = self._api_request_with_retry(
1562
- "POST",
1563
- f"{self._gateway_base_url}/queryListIsp",
1564
- headers=headers,
1565
- data={},
1566
- )
1567
- response.raise_for_status()
1568
-
1569
- data = response.json()
1570
- code = data.get("code")
1571
-
1572
- if code != 200:
1573
- msg = extract_error_message(data)
1574
- raise_for_code(f"List ISP proxies failed: {msg}", code=code, payload=data)
1575
-
1576
- return data.get("data", [])
1577
-
1578
- def get_wallet_balance(self) -> Dict[str, Any]:
1579
- """
1580
- Get wallet balance for ISP proxies.
1581
-
1582
- Uses public_token/public_key (Dashboard -> My account -> API).
1583
- """
1584
- headers = self._build_gateway_headers()
1585
-
1586
- logger.info("Getting wallet balance")
1587
-
1588
- response = self._api_request_with_retry(
1589
- "POST",
1590
- f"{self._gateway_base_url}/getBalance",
1591
- headers=headers,
1592
- data={},
1593
- )
1594
- response.raise_for_status()
1595
-
1596
- data = response.json()
1597
- code = data.get("code")
1598
-
1599
- if code != 200:
1600
- msg = extract_error_message(data)
1601
- raise_for_code(f"Get wallet balance failed: {msg}", code=code, payload=data)
1602
-
1603
- return data.get("data", {})
1604
-
1605
851
  def get_proxy_expiration(
1606
- self,
1607
- ips: Union[str, List[str]],
1608
- proxy_type: int,
852
+ self, ips: Union[str, List[str]], proxy_type: int
1609
853
  ) -> Dict[str, Any]:
1610
- """
1611
- Get expiration time for specific proxy IPs.
1612
-
1613
- Args:
1614
- ips: Single IP or list of IPs to check.
1615
- proxy_type: Proxy type (1=ISP, 2=Datacenter).
1616
-
1617
- Returns:
1618
- Dict with expiration information.
1619
-
1620
- Example:
1621
- >>> result = client.get_proxy_expiration("123.45.67.89", proxy_type=1)
1622
- >>> print(result)
1623
- """
1624
854
  self._require_public_credentials()
1625
-
1626
- # Convert list to comma-separated string
1627
855
  if isinstance(ips, list):
1628
856
  ips = ",".join(ips)
1629
-
1630
857
  params = {
1631
858
  "token": self.public_token,
1632
859
  "key": self.public_key,
1633
860
  "proxy_type": str(proxy_type),
1634
861
  "ips": ips,
1635
862
  }
1636
-
1637
- logger.info(f"Getting proxy expiration: {ips}")
1638
-
1639
863
  response = self._api_request_with_retry(
1640
- "GET",
1641
- self._proxy_expiration_url,
1642
- params=params,
864
+ "GET", self._proxy_expiration_url, params=params
1643
865
  )
1644
866
  response.raise_for_status()
1645
-
1646
867
  data = response.json()
868
+ if data.get("code") != 200:
869
+ raise_for_code("Get expiration error", code=data.get("code"), payload=data)
870
+ return data.get("data", data)
1647
871
 
1648
- if isinstance(data, dict):
1649
- code = data.get("code")
1650
- if code is not None and code != 200:
1651
- msg = extract_error_message(data)
1652
- raise_for_code(f"Get expiration error: {msg}", code=code, payload=data)
1653
-
1654
- return data.get("data", data)
1655
-
1656
- return data
1657
-
1658
- # =========================================================================
1659
- # Location API Methods (Country/State/City/ASN functions)
1660
- # =========================================================================
1661
872
  def list_countries(
1662
873
  self, proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL
1663
874
  ) -> List[Dict[str, Any]]:
1664
- """
1665
- List supported countries for proxies.
1666
-
1667
- Args:
1668
- proxy_type: 1 for residential, 2 for unlimited.
1669
-
1670
- Returns:
1671
- List of country records with 'country_code' and 'country_name'.
1672
- """
1673
- return self._get_locations(
1674
- "countries",
1675
- proxy_type=(
1676
- int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1677
- ),
1678
- )
875
+ pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
876
+ return self._get_locations("countries", proxy_type=pt)
1679
877
 
1680
878
  def list_states(
1681
879
  self,
1682
880
  country_code: str,
1683
881
  proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
1684
882
  ) -> List[Dict[str, Any]]:
1685
- """
1686
- List supported states for a country.
1687
-
1688
- Args:
1689
- country_code: Country code (e.g., 'US').
1690
- proxy_type: Proxy type.
1691
-
1692
- Returns:
1693
- List of state records.
1694
- """
1695
- return self._get_locations(
1696
- "states",
1697
- proxy_type=(
1698
- int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1699
- ),
1700
- country_code=country_code,
1701
- )
883
+ pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
884
+ return self._get_locations("states", proxy_type=pt, country_code=country_code)
1702
885
 
1703
886
  def list_cities(
1704
887
  self,
@@ -1706,26 +889,10 @@ class ThordataClient:
1706
889
  state_code: Optional[str] = None,
1707
890
  proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
1708
891
  ) -> List[Dict[str, Any]]:
1709
- """
1710
- List supported cities for a country/state.
1711
-
1712
- Args:
1713
- country_code: Country code.
1714
- state_code: Optional state code.
1715
- proxy_type: Proxy type.
1716
-
1717
- Returns:
1718
- List of city records.
1719
- """
1720
- kwargs = {
1721
- "proxy_type": (
1722
- int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1723
- ),
1724
- "country_code": country_code,
1725
- }
892
+ pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
893
+ kwargs = {"proxy_type": pt, "country_code": country_code}
1726
894
  if state_code:
1727
895
  kwargs["state_code"] = state_code
1728
-
1729
896
  return self._get_locations("cities", **kwargs)
1730
897
 
1731
898
  def list_asn(
@@ -1733,88 +900,36 @@ class ThordataClient:
1733
900
  country_code: str,
1734
901
  proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
1735
902
  ) -> List[Dict[str, Any]]:
1736
- """
1737
- List supported ASNs for a country.
1738
-
1739
- Args:
1740
- country_code: Country code.
1741
- proxy_type: Proxy type.
1742
-
1743
- Returns:
1744
- List of ASN records.
1745
- """
1746
- return self._get_locations(
1747
- "asn",
1748
- proxy_type=(
1749
- int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1750
- ),
1751
- country_code=country_code,
1752
- )
903
+ pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
904
+ return self._get_locations("asn", proxy_type=pt, country_code=country_code)
1753
905
 
1754
906
  def _get_locations(self, endpoint: str, **kwargs: Any) -> List[Dict[str, Any]]:
1755
- """Internal method to call locations API."""
1756
907
  self._require_public_credentials()
908
+ params = {"token": self.public_token, "key": self.public_key}
909
+ for k, v in kwargs.items():
910
+ params[k] = str(v)
1757
911
 
1758
- params = {
1759
- "token": self.public_token,
1760
- "key": self.public_key,
1761
- }
1762
-
1763
- for key, value in kwargs.items():
1764
- params[key] = str(value)
1765
-
1766
- url = f"{self._locations_base_url}/{endpoint}"
1767
-
1768
- logger.debug(f"Locations API request: {url}")
1769
-
1770
- # Use requests.get directly (no proxy needed for this API)
1771
912
  response = self._api_request_with_retry(
1772
- "GET",
1773
- url,
1774
- params=params,
913
+ "GET", f"{self._locations_base_url}/{endpoint}", params=params
1775
914
  )
1776
915
  response.raise_for_status()
1777
-
1778
916
  data = response.json()
1779
-
1780
917
  if isinstance(data, dict):
1781
- code = data.get("code")
1782
- if code is not None and code != 200:
1783
- msg = data.get("msg", "")
1784
- raise RuntimeError(
1785
- f"Locations API error ({endpoint}): code={code}, msg={msg}"
1786
- )
918
+ if data.get("code") != 200:
919
+ raise RuntimeError(f"Locations error: {data.get('msg')}")
1787
920
  return data.get("data") or []
921
+ return data if isinstance(data, list) else []
1788
922
 
1789
- if isinstance(data, list):
1790
- return data
1791
-
1792
- return []
1793
-
1794
- # =========================================================================
1795
- # Helper Methods (Internal utility functions)
1796
- # =========================================================================
1797
923
  def _require_public_credentials(self) -> None:
1798
- """Ensure public API credentials are available."""
1799
924
  if not self.public_token or not self.public_key:
1800
925
  raise ThordataConfigError(
1801
- "public_token and public_key are required for this operation. "
1802
- "Please provide them when initializing ThordataClient."
926
+ "public_token and public_key are required for this operation."
1803
927
  )
1804
928
 
1805
929
  def _get_proxy_endpoint_overrides(
1806
930
  self, product: ProxyProduct
1807
931
  ) -> tuple[Optional[str], Optional[int], str]:
1808
- """
1809
- Read proxy endpoint overrides from env.
1810
-
1811
- Priority:
1812
- 1) THORDATA_<PRODUCT>_PROXY_HOST/PORT/PROTOCOL
1813
- 2) THORDATA_PROXY_HOST/PORT/PROTOCOL
1814
- 3) defaults (host/port None => ProxyConfig will use its product defaults)
1815
- """
1816
- prefix = product.value.upper() # RESIDENTIAL / DATACENTER / MOBILE / ISP
1817
-
932
+ prefix = product.value.upper()
1818
933
  host = os.getenv(f"THORDATA_{prefix}_PROXY_HOST") or os.getenv(
1819
934
  "THORDATA_PROXY_HOST"
1820
935
  )
@@ -1826,184 +941,37 @@ class ThordataClient:
1826
941
  or os.getenv("THORDATA_PROXY_PROTOCOL")
1827
942
  or "http"
1828
943
  )
1829
-
1830
- port: Optional[int] = None
1831
- if port_raw:
1832
- try:
1833
- port = int(port_raw)
1834
- except ValueError:
1835
- port = None
1836
-
944
+ port = int(port_raw) if port_raw and port_raw.isdigit() else None
1837
945
  return host or None, port, protocol
1838
946
 
1839
947
  def _get_default_proxy_config_from_env(self) -> Optional[ProxyConfig]:
1840
- """
1841
- Try to build a default ProxyConfig from env vars.
1842
-
1843
- Priority order:
1844
- 1) Residential
1845
- 2) Datacenter
1846
- 3) Mobile
1847
- """
1848
- # Residential
1849
- u = os.getenv("THORDATA_RESIDENTIAL_USERNAME")
1850
- p = os.getenv("THORDATA_RESIDENTIAL_PASSWORD")
1851
- if u and p:
1852
- host, port, protocol = self._get_proxy_endpoint_overrides(
1853
- ProxyProduct.RESIDENTIAL
1854
- )
1855
- return ProxyConfig(
1856
- username=u,
1857
- password=p,
1858
- product=ProxyProduct.RESIDENTIAL,
1859
- host=host,
1860
- port=port,
1861
- protocol=protocol,
1862
- )
1863
-
1864
- # Datacenter
1865
- u = os.getenv("THORDATA_DATACENTER_USERNAME")
1866
- p = os.getenv("THORDATA_DATACENTER_PASSWORD")
1867
- if u and p:
1868
- host, port, protocol = self._get_proxy_endpoint_overrides(
1869
- ProxyProduct.DATACENTER
1870
- )
1871
- return ProxyConfig(
1872
- username=u,
1873
- password=p,
1874
- product=ProxyProduct.DATACENTER,
1875
- host=host,
1876
- port=port,
1877
- protocol=protocol,
1878
- )
1879
-
1880
- # Mobile
1881
- u = os.getenv("THORDATA_MOBILE_USERNAME")
1882
- p = os.getenv("THORDATA_MOBILE_PASSWORD")
1883
- if u and p:
1884
- host, port, protocol = self._get_proxy_endpoint_overrides(
1885
- ProxyProduct.MOBILE
1886
- )
1887
- return ProxyConfig(
1888
- username=u,
1889
- password=p,
1890
- product=ProxyProduct.MOBILE,
1891
- host=host,
1892
- port=port,
1893
- protocol=protocol,
1894
- )
1895
-
1896
- return None
1897
-
1898
- def _build_gateway_headers(self) -> Dict[str, str]:
1899
- """
1900
- Build headers for legacy gateway-style endpoints.
1901
-
1902
- IMPORTANT:
1903
- - SDK does NOT expose "sign/apiKey" as a separate credential model.
1904
- - Values ALWAYS come from public_token/public_key.
1905
- - Some backend endpoints may still expect header field names "sign" and "apiKey".
1906
- """
1907
- self._require_public_credentials()
1908
- return {
1909
- "sign": self.public_token or "",
1910
- "apiKey": self.public_key or "",
1911
- "Content-Type": "application/x-www-form-urlencoded",
1912
- }
1913
-
1914
- def _proxy_request_with_proxy_manager(
1915
- self,
1916
- method: str,
1917
- url: str,
1918
- *,
1919
- proxy_config: ProxyConfig,
1920
- timeout: int,
1921
- headers: Optional[Dict[str, str]] = None,
1922
- params: Optional[Dict[str, Any]] = None,
1923
- data: Any = None,
1924
- ) -> requests.Response:
1925
- """
1926
- Proxy Network request implemented via urllib3.ProxyManager.
1927
-
1928
- This is required to reliably support HTTPS proxy endpoints like:
1929
- https://<endpoint>.pr.thordata.net:9999
1930
- """
1931
- # Build final URL (include query params)
1932
- req = requests.Request(method=method.upper(), url=url, params=params)
1933
- prepped = self._proxy_session.prepare_request(req)
1934
- final_url = prepped.url or url
1935
-
1936
- proxy_url = proxy_config.build_proxy_endpoint()
1937
- proxy_headers = urllib3.make_headers(
1938
- proxy_basic_auth=proxy_config.build_proxy_basic_auth()
1939
- )
1940
-
1941
- pm = urllib3.ProxyManager(
1942
- proxy_url,
1943
- proxy_headers=proxy_headers,
1944
- proxy_ssl_context=(
1945
- ssl.create_default_context()
1946
- if proxy_url.startswith("https://")
1947
- else None
1948
- ),
1949
- )
1950
-
1951
- # Encode form data if dict
1952
- body = None
1953
- req_headers = dict(headers or {})
1954
- if data is not None:
1955
- if isinstance(data, dict):
1956
- # form-urlencoded
1957
- body = urlencode({k: str(v) for k, v in data.items()})
1958
- req_headers.setdefault(
1959
- "Content-Type", "application/x-www-form-urlencoded"
948
+ for prod in [
949
+ ProxyProduct.RESIDENTIAL,
950
+ ProxyProduct.DATACENTER,
951
+ ProxyProduct.MOBILE,
952
+ ]:
953
+ prefix = prod.value.upper()
954
+ u = os.getenv(f"THORDATA_{prefix}_USERNAME")
955
+ p = os.getenv(f"THORDATA_{prefix}_PASSWORD")
956
+ if u and p:
957
+ h, port, proto = self._get_proxy_endpoint_overrides(prod)
958
+ return ProxyConfig(
959
+ username=u,
960
+ password=p,
961
+ product=prod,
962
+ host=h,
963
+ port=port,
964
+ protocol=proto,
1960
965
  )
1961
- else:
1962
- body = data
1963
-
1964
- http_resp = pm.request(
1965
- method.upper(),
1966
- final_url,
1967
- body=body,
1968
- headers=req_headers or None,
1969
- timeout=urllib3.Timeout(connect=timeout, read=timeout),
1970
- retries=False,
1971
- preload_content=True,
1972
- )
1973
-
1974
- # Convert urllib3 response -> requests.Response (keep your API stable)
1975
- r = requests.Response()
1976
- r.status_code = int(getattr(http_resp, "status", 0) or 0)
1977
- r._content = http_resp.data or b""
1978
- r.url = final_url
1979
- r.headers = requests.structures.CaseInsensitiveDict(
1980
- dict(http_resp.headers or {})
1981
- )
1982
- return r
1983
-
1984
- def _request_with_retry(
1985
- self, method: str, url: str, **kwargs: Any
1986
- ) -> requests.Response:
1987
- """Make a request with automatic retry."""
1988
- kwargs.setdefault("timeout", self._default_timeout)
1989
-
1990
- @with_retry(self._retry_config)
1991
- def _do_request() -> requests.Response:
1992
- return self._proxy_session.request(method, url, **kwargs)
1993
-
1994
- try:
1995
- return _do_request()
1996
- except requests.Timeout as e:
1997
- raise ThordataTimeoutError(
1998
- f"Request timed out: {e}", original_error=e
1999
- ) from e
2000
- except requests.RequestException as e:
2001
- raise ThordataNetworkError(f"Request failed: {e}", original_error=e) from e
966
+ return None
2002
967
 
2003
968
  def close(self) -> None:
2004
- """Close the underlying session."""
2005
969
  self._proxy_session.close()
2006
970
  self._api_session.close()
971
+ # Clean up connection pools
972
+ for pm in self._proxy_managers.values():
973
+ pm.clear()
974
+ self._proxy_managers.clear()
2007
975
 
2008
976
  def __enter__(self) -> ThordataClient:
2009
977
  return self