thordata-sdk 0.8.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
thordata/client.py CHANGED
@@ -25,17 +25,19 @@ from __future__ import annotations
25
25
 
26
26
  import logging
27
27
  import os
28
- from datetime import date, datetime
28
+ import ssl
29
+ from datetime import date
29
30
  from typing import Any, Dict, List, Optional, Union
31
+ from urllib.parse import urlencode
30
32
 
31
33
  import requests
34
+ import urllib3
32
35
 
33
36
  from . import __version__ as _sdk_version
34
37
  from ._utils import (
35
38
  build_auth_headers,
36
39
  build_builder_headers,
37
40
  build_public_api_headers,
38
- build_sign_headers,
39
41
  build_user_agent,
40
42
  decode_base64_image,
41
43
  extract_error_message,
@@ -53,13 +55,13 @@ from .models import (
53
55
  ProxyConfig,
54
56
  ProxyProduct,
55
57
  ProxyServer,
56
- ProxyUser,
57
58
  ProxyUserList,
58
59
  ScraperTaskConfig,
59
60
  SerpRequest,
60
61
  UniversalScrapeRequest,
61
62
  UsageStatistics,
62
63
  VideoTaskConfig,
64
+ WhitelistProxyConfig,
63
65
  )
64
66
  from .retry import RetryConfig, with_retry
65
67
 
@@ -67,32 +69,6 @@ logger = logging.getLogger(__name__)
67
69
 
68
70
 
69
71
  class ThordataClient:
70
- """
71
- The official synchronous Python client for Thordata.
72
-
73
- This client handles authentication and communication with:
74
- - Proxy Network (Residential/Datacenter/Mobile/ISP via HTTP/HTTPS)
75
- - SERP API (Real-time Search Engine Results)
76
- - Universal Scraping API (Web Unlocker - Single Page Rendering)
77
- - Web Scraper API (Async Task Management)
78
-
79
- Args:
80
- scraper_token: The API token from your Dashboard.
81
- public_token: The public API token (for task status, locations).
82
- public_key: The public API key.
83
- proxy_host: Custom proxy gateway host (optional).
84
- proxy_port: Custom proxy gateway port (optional).
85
- timeout: Default request timeout in seconds (default: 30).
86
- retry_config: Configuration for automatic retries (optional).
87
-
88
- Example:
89
- >>> client = ThordataClient(
90
- ... scraper_token="your_scraper_token",
91
- ... public_token="your_public_token",
92
- ... public_key="your_public_key"
93
- ... )
94
- """
95
-
96
72
  # API Endpoints
97
73
  BASE_URL = "https://scraperapi.thordata.com"
98
74
  UNIVERSAL_URL = "https://universalapi.thordata.com"
@@ -104,8 +80,6 @@ class ThordataClient:
104
80
  scraper_token: str,
105
81
  public_token: Optional[str] = None,
106
82
  public_key: Optional[str] = None,
107
- sign: Optional[str] = None,
108
- api_key: Optional[str] = None,
109
83
  proxy_host: str = "pr.thordata.net",
110
84
  proxy_port: int = 9999,
111
85
  timeout: int = 30,
@@ -121,22 +95,14 @@ class ThordataClient:
121
95
  if not scraper_token:
122
96
  raise ThordataConfigError("scraper_token is required")
123
97
 
98
+ # Core credentials
124
99
  self.scraper_token = scraper_token
125
100
  self.public_token = public_token
126
101
  self.public_key = public_key
127
102
 
128
- # Automatic Fallback Logic: If sign/api_key is not provided, try using public_token/key
129
- self.sign = sign or os.getenv("THORDATA_SIGN") or self.public_token
130
- self.api_key = api_key or os.getenv("THORDATA_API_KEY") or self.public_key
131
-
132
- # Public API authentication
133
- self.sign = sign or os.getenv("THORDATA_SIGN")
134
- self.api_key = api_key or os.getenv("THORDATA_API_KEY")
135
-
136
103
  # Proxy configuration
137
104
  self._proxy_host = proxy_host
138
105
  self._proxy_port = proxy_port
139
- self._default_timeout = timeout
140
106
 
141
107
  # Timeout configuration
142
108
  self._default_timeout = timeout
@@ -145,39 +111,28 @@ class ThordataClient:
145
111
  # Retry configuration
146
112
  self._retry_config = retry_config or RetryConfig()
147
113
 
148
- # Authentication mode
114
+ # Authentication mode (for scraping APIs)
149
115
  self._auth_mode = auth_mode.lower()
150
116
  if self._auth_mode not in ("bearer", "header_token"):
151
117
  raise ThordataConfigError(
152
118
  f"Invalid auth_mode: {auth_mode}. Must be 'bearer' or 'header_token'."
153
119
  )
154
120
 
155
- # Build default proxy URL (for basic usage)
156
- self._default_proxy_url = (
157
- f"http://td-customer-{self.scraper_token}:@{proxy_host}:{proxy_port}"
158
- )
159
-
160
- # Sessions:
161
- # - _proxy_session: used for proxy network traffic to target sites
162
- # - _api_session: used for Thordata APIs (SERP/Universal/Tasks/Locations)
163
- #
164
- # We intentionally do NOT set session-level proxies for _api_session,
165
- # so developers can rely on system proxy settings (e.g., Clash) via env vars.
121
+ # HTTP Sessions
166
122
  self._proxy_session = requests.Session()
167
123
  self._proxy_session.trust_env = False
168
- self._proxy_session.proxies = {
169
- "http": self._default_proxy_url,
170
- "https": self._default_proxy_url,
171
- }
124
+
125
+ # Cache for ProxyManagers (Connection Pooling Fix)
126
+ # Key: proxy_url (str), Value: urllib3.ProxyManager
127
+ self._proxy_managers: Dict[str, urllib3.ProxyManager] = {}
172
128
 
173
129
  self._api_session = requests.Session()
174
130
  self._api_session.trust_env = True
175
-
176
131
  self._api_session.headers.update(
177
132
  {"User-Agent": build_user_agent(_sdk_version, "requests")}
178
133
  )
179
134
 
180
- # Base URLs (allow override via args or env vars for testing and custom routing)
135
+ # Base URLs
181
136
  scraperapi_base = (
182
137
  scraperapi_base_url
183
138
  or os.getenv("THORDATA_SCRAPERAPI_BASE_URL")
@@ -205,39 +160,42 @@ class ThordataClient:
205
160
  gateway_base = os.getenv(
206
161
  "THORDATA_GATEWAY_BASE_URL", "https://api.thordata.com/api/gateway"
207
162
  )
208
- child_base = os.getenv(
163
+ self._gateway_base_url = gateway_base
164
+ self._child_base_url = os.getenv(
209
165
  "THORDATA_CHILD_BASE_URL", "https://api.thordata.com/api/child"
210
166
  )
211
167
 
212
- self._gateway_base_url = gateway_base
213
- self._child_base_url = child_base
214
-
215
168
  self._serp_url = f"{scraperapi_base}/request"
216
169
  self._builder_url = f"{scraperapi_base}/builder"
217
170
  self._video_builder_url = f"{scraperapi_base}/video_builder"
218
171
  self._universal_url = f"{universalapi_base}/request"
172
+
219
173
  self._status_url = f"{web_scraper_api_base}/tasks-status"
220
174
  self._download_url = f"{web_scraper_api_base}/tasks-download"
175
+ self._list_url = f"{web_scraper_api_base}/tasks-list"
176
+
221
177
  self._locations_base_url = locations_base
178
+
222
179
  self._usage_stats_url = (
223
180
  f"{locations_base.replace('/locations', '')}/account/usage-statistics"
224
181
  )
225
182
  self._proxy_users_url = (
226
183
  f"{locations_base.replace('/locations', '')}/proxy-users"
227
184
  )
185
+
228
186
  whitelist_base = os.getenv(
229
187
  "THORDATA_WHITELIST_BASE_URL", "https://api.thordata.com/api"
230
188
  )
231
189
  self._whitelist_url = f"{whitelist_base}/whitelisted-ips"
190
+
232
191
  proxy_api_base = os.getenv(
233
192
  "THORDATA_PROXY_API_BASE_URL", "https://api.thordata.com/api"
234
193
  )
235
194
  self._proxy_list_url = f"{proxy_api_base}/proxy/proxy-list"
236
195
  self._proxy_expiration_url = f"{proxy_api_base}/proxy/expiration-time"
237
- self._list_url = f"{web_scraper_api_base}/tasks-list"
238
196
 
239
197
  # =========================================================================
240
- # Proxy Network Methods (Pure proxy network request functions)
198
+ # Proxy Network Methods
241
199
  # =========================================================================
242
200
  def get(
243
201
  self,
@@ -247,41 +205,8 @@ class ThordataClient:
247
205
  timeout: Optional[int] = None,
248
206
  **kwargs: Any,
249
207
  ) -> requests.Response:
250
- """
251
- Send a GET request through the Thordata Proxy Network.
252
-
253
- Args:
254
- url: The target URL.
255
- proxy_config: Custom proxy configuration for geo-targeting/sessions.
256
- timeout: Request timeout in seconds.
257
- **kwargs: Additional arguments to pass to requests.get().
258
-
259
- Returns:
260
- The response object.
261
-
262
- Example:
263
- >>> # Basic request
264
- >>> response = client.get("https://httpbin.org/ip")
265
- >>>
266
- >>> # With geo-targeting
267
- >>> from thordata.models import ProxyConfig
268
- >>> config = ProxyConfig(
269
- ... username="myuser",
270
- ... password="mypass",
271
- ... country="us",
272
- ... city="seattle"
273
- ... )
274
- >>> response = client.get("https://httpbin.org/ip", proxy_config=config)
275
- """
276
208
  logger.debug(f"Proxy GET request: {url}")
277
-
278
- timeout = timeout or self._default_timeout
279
-
280
- if proxy_config:
281
- proxies = proxy_config.to_proxies_dict()
282
- kwargs["proxies"] = proxies
283
-
284
- return self._request_with_retry("GET", url, timeout=timeout, **kwargs)
209
+ return self._proxy_verb("GET", url, proxy_config, timeout, **kwargs)
285
210
 
286
211
  def post(
287
212
  self,
@@ -291,32 +216,58 @@ class ThordataClient:
291
216
  timeout: Optional[int] = None,
292
217
  **kwargs: Any,
293
218
  ) -> requests.Response:
294
- """
295
- Send a POST request through the Thordata Proxy Network.
296
-
297
- Args:
298
- url: The target URL.
299
- proxy_config: Custom proxy configuration.
300
- timeout: Request timeout in seconds.
301
- **kwargs: Additional arguments to pass to requests.post().
302
-
303
- Returns:
304
- The response object.
305
- """
306
219
  logger.debug(f"Proxy POST request: {url}")
220
+ return self._proxy_verb("POST", url, proxy_config, timeout, **kwargs)
307
221
 
222
+ def _proxy_verb(
223
+ self,
224
+ method: str,
225
+ url: str,
226
+ proxy_config: Optional[ProxyConfig],
227
+ timeout: Optional[int],
228
+ **kwargs: Any,
229
+ ) -> requests.Response:
308
230
  timeout = timeout or self._default_timeout
309
231
 
310
- if proxy_config:
311
- proxies = proxy_config.to_proxies_dict()
312
- kwargs["proxies"] = proxies
232
+ if proxy_config is None:
233
+ proxy_config = self._get_default_proxy_config_from_env()
234
+
235
+ if proxy_config is None:
236
+ raise ThordataConfigError(
237
+ "Proxy credentials are missing. "
238
+ "Pass proxy_config or set THORDATA_RESIDENTIAL_USERNAME/PASSWORD env vars."
239
+ )
240
+
241
+ # For requests/urllib3, we don't need 'proxies' dict in kwargs
242
+ # because we use ProxyManager directly.
243
+ # But we remove it if user accidentally passed it to avoid confusion.
244
+ kwargs.pop("proxies", None)
245
+
246
+ @with_retry(self._retry_config)
247
+ def _do() -> requests.Response:
248
+ return self._proxy_request_with_proxy_manager(
249
+ method,
250
+ url,
251
+ proxy_config=proxy_config, # type: ignore
252
+ timeout=timeout, # type: ignore
253
+ headers=kwargs.pop("headers", None),
254
+ params=kwargs.pop("params", None),
255
+ data=kwargs.pop("data", None),
256
+ )
313
257
 
314
- return self._request_with_retry("POST", url, timeout=timeout, **kwargs)
258
+ try:
259
+ return _do()
260
+ except requests.Timeout as e:
261
+ raise ThordataTimeoutError(
262
+ f"Request timed out: {e}", original_error=e
263
+ ) from e
264
+ except Exception as e:
265
+ raise ThordataNetworkError(f"Request failed: {e}", original_error=e) from e
315
266
 
316
267
  def build_proxy_url(
317
268
  self,
318
- username: str, # Required
319
- password: str, # Required
269
+ username: str,
270
+ password: str,
320
271
  *,
321
272
  country: Optional[str] = None,
322
273
  state: Optional[str] = None,
@@ -325,28 +276,6 @@ class ThordataClient:
325
276
  session_duration: Optional[int] = None,
326
277
  product: Union[ProxyProduct, str] = ProxyProduct.RESIDENTIAL,
327
278
  ) -> str:
328
- """
329
- Build a proxy URL with custom targeting options.
330
-
331
- This is a convenience method for creating proxy URLs without
332
- manually constructing a ProxyConfig.
333
-
334
- Args:
335
- country: Target country code (e.g., 'us', 'gb').
336
- state: Target state (e.g., 'california').
337
- city: Target city (e.g., 'seattle').
338
- session_id: Session ID for sticky sessions.
339
- session_duration: Session duration in minutes (1-90).
340
- product: Proxy product type.
341
-
342
- Returns:
343
- The proxy URL string.
344
-
345
- Example:
346
- >>> url = client.build_proxy_url(country="us", city="seattle")
347
- >>> proxies = {"http": url, "https": url}
348
- >>> requests.get("https://example.com", proxies=proxies)
349
- """
350
279
  config = ProxyConfig(
351
280
  username=username,
352
281
  password=password,
@@ -362,7 +291,7 @@ class ThordataClient:
362
291
  return config.build_proxy_url()
363
292
 
364
293
  # =========================================================================
365
- # Internal API Request Retry Helper (For all API calls)
294
+ # Internal Request Helpers
366
295
  # =========================================================================
367
296
  def _api_request_with_retry(
368
297
  self,
@@ -373,8 +302,6 @@ class ThordataClient:
373
302
  headers: Optional[Dict[str, str]] = None,
374
303
  params: Optional[Dict[str, Any]] = None,
375
304
  ) -> requests.Response:
376
- """Make an API request with automatic retry on transient failures."""
377
-
378
305
  @with_retry(self._retry_config)
379
306
  def _do_request() -> requests.Response:
380
307
  return self._api_session.request(
@@ -397,8 +324,83 @@ class ThordataClient:
397
324
  f"API request failed: {e}", original_error=e
398
325
  ) from e
399
326
 
327
+ def _get_proxy_manager(self, proxy_url: str) -> urllib3.ProxyManager:
328
+ """Get or create a ProxyManager for the given proxy URL (Pooled)."""
329
+ if proxy_url not in self._proxy_managers:
330
+ # Create a new manager if not cached
331
+ proxy_ssl_context = None
332
+ if proxy_url.startswith("https://"):
333
+ proxy_ssl_context = ssl.create_default_context()
334
+
335
+ self._proxy_managers[proxy_url] = urllib3.ProxyManager(
336
+ proxy_url,
337
+ proxy_ssl_context=proxy_ssl_context,
338
+ num_pools=10, # Allow concurrency
339
+ maxsize=10,
340
+ )
341
+ return self._proxy_managers[proxy_url]
342
+
343
+ def _proxy_request_with_proxy_manager(
344
+ self,
345
+ method: str,
346
+ url: str,
347
+ *,
348
+ proxy_config: ProxyConfig,
349
+ timeout: int,
350
+ headers: Optional[Dict[str, str]] = None,
351
+ params: Optional[Dict[str, Any]] = None,
352
+ data: Any = None,
353
+ ) -> requests.Response:
354
+ # 1. Prepare URL and Body
355
+ req = requests.Request(method=method.upper(), url=url, params=params)
356
+ prepped = self._proxy_session.prepare_request(req)
357
+ final_url = prepped.url or url
358
+
359
+ # 2. Get Proxy Configuration
360
+ proxy_url = proxy_config.build_proxy_endpoint()
361
+ proxy_headers = urllib3.make_headers(
362
+ proxy_basic_auth=proxy_config.build_proxy_basic_auth()
363
+ )
364
+
365
+ # 3. Get Cached Proxy Manager
366
+ pm = self._get_proxy_manager(proxy_url)
367
+
368
+ # 4. Prepare Request Headers/Body
369
+ req_headers = dict(headers or {})
370
+ body = None
371
+ if data is not None:
372
+ if isinstance(data, dict):
373
+ body = urlencode({k: str(v) for k, v in data.items()})
374
+ req_headers.setdefault(
375
+ "Content-Type", "application/x-www-form-urlencoded"
376
+ )
377
+ else:
378
+ body = data
379
+
380
+ # 5. Execute Request via urllib3
381
+ http_resp = pm.request(
382
+ method.upper(),
383
+ final_url,
384
+ body=body,
385
+ headers=req_headers or None,
386
+ proxy_headers=proxy_headers, # Attach Auth here
387
+ timeout=urllib3.Timeout(connect=timeout, read=timeout),
388
+ retries=False, # We handle retries in _proxy_verb
389
+ preload_content=True,
390
+ )
391
+
392
+ # 6. Convert back to requests.Response
393
+ r = requests.Response()
394
+ r.status_code = int(getattr(http_resp, "status", 0) or 0)
395
+ r._content = http_resp.data or b""
396
+ r.url = final_url
397
+ r.headers = requests.structures.CaseInsensitiveDict(
398
+ dict(http_resp.headers or {})
399
+ )
400
+ return r
401
+
400
402
  # =========================================================================
401
- # SERP API Methods (Search Engine Results Page functions)
403
+ # SERP API Methods
402
404
  # =========================================================================
403
405
  def serp_search(
404
406
  self,
@@ -415,46 +417,8 @@ class ThordataClient:
415
417
  output_format: str = "json",
416
418
  **kwargs: Any,
417
419
  ) -> Dict[str, Any]:
418
- """
419
- Execute a real-time SERP (Search Engine Results Page) search.
420
-
421
- Args:
422
- query: The search keywords.
423
- engine: Search engine (google, bing, yandex, duckduckgo, baidu).
424
- num: Number of results to retrieve (default: 10).
425
- country: Country code for localized results (e.g., 'us').
426
- language: Language code for interface (e.g., 'en').
427
- search_type: Type of search (images, news, shopping, videos, etc.).
428
- device: Device type ('desktop', 'mobile', 'tablet').
429
- render_js: Enable JavaScript rendering in SERP (render_js=True).
430
- no_cache: Disable internal caching (no_cache=True).
431
- output_format: 'json' to return parsed JSON (default),
432
- 'html' to return HTML wrapped in {'html': ...}.
433
- **kwargs: Additional engine-specific parameters.
434
-
435
- Returns:
436
- Dict[str, Any]: Parsed JSON results or a dict with 'html' key.
437
-
438
- Example:
439
- >>> # Basic search
440
- >>> results = client.serp_search("python tutorial")
441
- >>>
442
- >>> # With options
443
- >>> results = client.serp_search(
444
- ... "laptop reviews",
445
- ... engine="google",
446
- ... num=20,
447
- ... country="us",
448
- ... search_type="shopping",
449
- ... device="mobile",
450
- ... render_js=True,
451
- ... no_cache=True,
452
- ... )
453
- """
454
- # Normalize engine
455
420
  engine_str = engine.value if isinstance(engine, Engine) else engine.lower()
456
421
 
457
- # Build request using model
458
422
  request = SerpRequest(
459
423
  query=query,
460
424
  engine=engine_str,
@@ -469,84 +433,13 @@ class ThordataClient:
469
433
  extra_params=kwargs,
470
434
  )
471
435
 
472
- payload = request.to_payload()
473
- headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
474
-
475
- logger.info(
476
- f"SERP Search: {engine_str} - {query[:50]}{'...' if len(query) > 50 else ''}"
477
- )
478
-
479
- try:
480
- response = self._api_request_with_retry(
481
- "POST",
482
- self._serp_url,
483
- data=payload,
484
- headers=headers,
485
- )
486
- response.raise_for_status()
487
-
488
- # JSON mode (default)
489
- if output_format.lower() == "json":
490
- data = response.json()
491
-
492
- if isinstance(data, dict):
493
- code = data.get("code")
494
- if code is not None and code != 200:
495
- msg = extract_error_message(data)
496
- raise_for_code(
497
- f"SERP API Error: {msg}",
498
- code=code,
499
- payload=data,
500
- )
501
-
502
- return parse_json_response(data)
503
-
504
- # HTML mode: wrap as dict to keep return type stable
505
- return {"html": response.text}
506
-
507
- except requests.Timeout as e:
508
- raise ThordataTimeoutError(
509
- f"SERP request timed out: {e}",
510
- original_error=e,
511
- ) from e
512
- except requests.RequestException as e:
513
- raise ThordataNetworkError(
514
- f"SERP request failed: {e}",
515
- original_error=e,
516
- ) from e
436
+ return self.serp_search_advanced(request)
517
437
 
518
438
  def serp_search_advanced(self, request: SerpRequest) -> Dict[str, Any]:
519
- """
520
- Execute a SERP search using a SerpRequest object.
521
-
522
- This method provides full control over all search parameters.
523
-
524
- Args:
525
- request: A SerpRequest object with all parameters configured.
526
-
527
- Returns:
528
- Dict[str, Any]: Parsed JSON results or dict with 'html' key.
529
-
530
- Example:
531
- >>> from thordata.models import SerpRequest
532
- >>> request = SerpRequest(
533
- ... query="python programming",
534
- ... engine="google",
535
- ... num=50,
536
- ... country="us",
537
- ... language="en",
538
- ... search_type="news",
539
- ... time_filter="week",
540
- ... safe_search=True
541
- ... )
542
- >>> results = client.serp_search_advanced(request)
543
- """
544
439
  payload = request.to_payload()
545
440
  headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
546
441
 
547
- logger.info(
548
- f"SERP Advanced Search: {request.engine} - {request.query[:50]}{'...' if len(request.query) > 50 else ''}"
549
- )
442
+ logger.info(f"SERP Advanced Search: {request.engine} - {request.query[:50]}")
550
443
 
551
444
  try:
552
445
  response = self._api_request_with_retry(
@@ -559,34 +452,22 @@ class ThordataClient:
559
452
 
560
453
  if request.output_format.lower() == "json":
561
454
  data = response.json()
562
-
563
455
  if isinstance(data, dict):
564
456
  code = data.get("code")
565
457
  if code is not None and code != 200:
566
458
  msg = extract_error_message(data)
567
- raise_for_code(
568
- f"SERP API Error: {msg}",
569
- code=code,
570
- payload=data,
571
- )
572
-
459
+ raise_for_code(f"SERP Error: {msg}", code=code, payload=data)
573
460
  return parse_json_response(data)
574
461
 
575
462
  return {"html": response.text}
576
463
 
577
464
  except requests.Timeout as e:
578
- raise ThordataTimeoutError(
579
- f"SERP request timed out: {e}",
580
- original_error=e,
581
- ) from e
465
+ raise ThordataTimeoutError(f"SERP timeout: {e}", original_error=e) from e
582
466
  except requests.RequestException as e:
583
- raise ThordataNetworkError(
584
- f"SERP request failed: {e}",
585
- original_error=e,
586
- ) from e
467
+ raise ThordataNetworkError(f"SERP failed: {e}", original_error=e) from e
587
468
 
588
469
  # =========================================================================
589
- # Universal Scraping API Methods (Web Unlocker functions)
470
+ # Universal Scraping API
590
471
  # =========================================================================
591
472
  def universal_scrape(
592
473
  self,
@@ -600,37 +481,6 @@ class ThordataClient:
600
481
  wait_for: Optional[str] = None,
601
482
  **kwargs: Any,
602
483
  ) -> Union[str, bytes]:
603
- """
604
- Scrape a URL using the Universal Scraping API (Web Unlocker).
605
-
606
- Automatically bypasses Cloudflare, CAPTCHAs, and antibot systems.
607
-
608
- Args:
609
- url: Target URL.
610
- js_render: Enable JavaScript rendering (headless browser).
611
- output_format: "html" or "png" (screenshot).
612
- country: Geo-targeting country code.
613
- block_resources: Resources to block (e.g., 'script,image').
614
- wait: Wait time in milliseconds after page load.
615
- wait_for: CSS selector to wait for.
616
- **kwargs: Additional parameters.
617
-
618
- Returns:
619
- HTML string or PNG bytes depending on output_format.
620
-
621
- Example:
622
- >>> # Get HTML
623
- >>> html = client.universal_scrape("https://example.com", js_render=True)
624
- >>>
625
- >>> # Get screenshot
626
- >>> png = client.universal_scrape(
627
- ... "https://example.com",
628
- ... js_render=True,
629
- ... output_format="png"
630
- ... )
631
- >>> with open("screenshot.png", "wb") as f:
632
- ... f.write(png)
633
- """
634
484
  request = UniversalScrapeRequest(
635
485
  url=url,
636
486
  js_render=js_render,
@@ -641,27 +491,15 @@ class ThordataClient:
641
491
  wait_for=wait_for,
642
492
  extra_params=kwargs,
643
493
  )
644
-
645
494
  return self.universal_scrape_advanced(request)
646
495
 
647
496
  def universal_scrape_advanced(
648
497
  self, request: UniversalScrapeRequest
649
498
  ) -> Union[str, bytes]:
650
- """
651
- Scrape using a UniversalScrapeRequest object for full control.
652
-
653
- Args:
654
- request: A UniversalScrapeRequest with all parameters.
655
-
656
- Returns:
657
- HTML string or PNG bytes.
658
- """
659
499
  payload = request.to_payload()
660
500
  headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
661
501
 
662
- logger.info(
663
- f"Universal Scrape: {request.url} (format: {request.output_format})"
664
- )
502
+ logger.info(f"Universal Scrape: {request.url}")
665
503
 
666
504
  try:
667
505
  response = self._api_request_with_retry(
@@ -671,53 +509,40 @@ class ThordataClient:
671
509
  headers=headers,
672
510
  )
673
511
  response.raise_for_status()
674
-
675
512
  return self._process_universal_response(response, request.output_format)
676
513
 
677
514
  except requests.Timeout as e:
678
515
  raise ThordataTimeoutError(
679
- f"Universal scrape timed out: {e}", original_error=e
516
+ f"Universal timeout: {e}", original_error=e
680
517
  ) from e
681
518
  except requests.RequestException as e:
682
519
  raise ThordataNetworkError(
683
- f"Universal scrape failed: {e}", original_error=e
520
+ f"Universal failed: {e}", original_error=e
684
521
  ) from e
685
522
 
686
523
  def _process_universal_response(
687
524
  self, response: requests.Response, output_format: str
688
525
  ) -> Union[str, bytes]:
689
- """Process the response from Universal API."""
690
- # Try to parse as JSON
691
526
  try:
692
527
  resp_json = response.json()
693
528
  except ValueError:
694
- # Raw content returned
695
- if output_format.lower() == "png":
696
- return response.content
697
- return response.text
529
+ return response.content if output_format.lower() == "png" else response.text
698
530
 
699
- # Check for API-level errors
700
531
  if isinstance(resp_json, dict):
701
532
  code = resp_json.get("code")
702
533
  if code is not None and code != 200:
703
534
  msg = extract_error_message(resp_json)
704
- raise_for_code(
705
- f"Universal API Error: {msg}", code=code, payload=resp_json
706
- )
535
+ raise_for_code(f"Universal Error: {msg}", code=code, payload=resp_json)
707
536
 
708
- # Extract HTML
709
537
  if "html" in resp_json:
710
538
  return resp_json["html"]
711
-
712
- # Extract PNG
713
539
  if "png" in resp_json:
714
540
  return decode_base64_image(resp_json["png"])
715
541
 
716
- # Fallback
717
542
  return str(resp_json)
718
543
 
719
544
  # =========================================================================
720
- # Web Scraper API Methods (Only async task management functions)
545
+ # Web Scraper API (Tasks)
721
546
  # =========================================================================
722
547
  def create_scraper_task(
723
548
  self,
@@ -727,29 +552,6 @@ class ThordataClient:
727
552
  parameters: Dict[str, Any],
728
553
  universal_params: Optional[Dict[str, Any]] = None,
729
554
  ) -> str:
730
- """
731
- Create an asynchronous Web Scraper task.
732
-
733
- Note: Get spider_id and spider_name from the Thordata Dashboard.
734
-
735
- Args:
736
- file_name: Name for the output file.
737
- spider_id: Spider identifier from Dashboard.
738
- spider_name: Spider name (e.g., "youtube.com").
739
- parameters: Spider-specific parameters.
740
- universal_params: Global spider settings.
741
-
742
- Returns:
743
- The created task_id.
744
-
745
- Example:
746
- >>> task_id = client.create_scraper_task(
747
- ... file_name="youtube_data",
748
- ... spider_id="youtube_video-post_by-url",
749
- ... spider_name="youtube.com",
750
- ... parameters={"url": "https://youtube.com/@channel/videos"}
751
- ... )
752
- """
753
555
  config = ScraperTaskConfig(
754
556
  file_name=file_name,
755
557
  spider_id=spider_id,
@@ -757,50 +559,26 @@ class ThordataClient:
757
559
  parameters=parameters,
758
560
  universal_params=universal_params,
759
561
  )
760
-
761
562
  return self.create_scraper_task_advanced(config)
762
563
 
763
564
  def create_scraper_task_advanced(self, config: ScraperTaskConfig) -> str:
764
- """
765
- Create a scraper task using a ScraperTaskConfig object.
766
-
767
- Args:
768
- config: Task configuration.
769
-
770
- Returns:
771
- The created task_id.
772
- """
773
565
  self._require_public_credentials()
774
-
775
566
  payload = config.to_payload()
776
-
777
- # Builder needs 3 headers: token, key, Authorization Bearer
778
567
  headers = build_builder_headers(
779
- self.scraper_token,
780
- self.public_token or "",
781
- self.public_key or "",
568
+ self.scraper_token, self.public_token or "", self.public_key or ""
782
569
  )
783
570
 
784
- logger.info(f"Creating Scraper Task: {config.spider_name}")
785
-
786
571
  try:
787
572
  response = self._api_request_with_retry(
788
- "POST",
789
- self._builder_url,
790
- data=payload,
791
- headers=headers,
573
+ "POST", self._builder_url, data=payload, headers=headers
792
574
  )
793
575
  response.raise_for_status()
794
-
795
576
  data = response.json()
796
- code = data.get("code")
797
-
798
- if code != 200:
799
- msg = extract_error_message(data)
800
- raise_for_code(f"Task creation failed: {msg}", code=code, payload=data)
801
-
577
+ if data.get("code") != 200:
578
+ raise_for_code(
579
+ "Task creation failed", code=data.get("code"), payload=data
580
+ )
802
581
  return data["data"]["task_id"]
803
-
804
582
  except requests.RequestException as e:
805
583
  raise ThordataNetworkError(
806
584
  f"Task creation failed: {e}", original_error=e
@@ -814,35 +592,6 @@ class ThordataClient:
814
592
  parameters: Dict[str, Any],
815
593
  common_settings: "CommonSettings",
816
594
  ) -> str:
817
- """
818
- Create a YouTube video/audio download task.
819
-
820
- Uses the /video_builder endpoint.
821
-
822
- Args:
823
- file_name: Output file name. Supports {{TasksID}}, {{VideoID}}.
824
- spider_id: Spider identifier (e.g., "youtube_video_by-url").
825
- spider_name: Spider name (typically "youtube.com").
826
- parameters: Spider parameters (e.g., {"url": "..."}).
827
- common_settings: Video/audio settings.
828
-
829
- Returns:
830
- The created task_id.
831
-
832
- Example:
833
- >>> from thordata import CommonSettings
834
- >>> task_id = client.create_video_task(
835
- ... file_name="{{VideoID}}",
836
- ... spider_id="youtube_video_by-url",
837
- ... spider_name="youtube.com",
838
- ... parameters={"url": "https://youtube.com/watch?v=xxx"},
839
- ... common_settings=CommonSettings(
840
- ... resolution="1080p",
841
- ... is_subtitles="true"
842
- ... )
843
- ... )
844
- """
845
-
846
595
  config = VideoTaskConfig(
847
596
  file_name=file_name,
848
597
  spider_id=spider_id,
@@ -850,210 +599,97 @@ class ThordataClient:
850
599
  parameters=parameters,
851
600
  common_settings=common_settings,
852
601
  )
853
-
854
602
  return self.create_video_task_advanced(config)
855
603
 
856
604
  def create_video_task_advanced(self, config: VideoTaskConfig) -> str:
857
- """
858
- Create a video task using VideoTaskConfig object.
859
-
860
- Args:
861
- config: Video task configuration.
862
-
863
- Returns:
864
- The created task_id.
865
- """
866
-
867
605
  self._require_public_credentials()
868
-
869
606
  payload = config.to_payload()
870
607
  headers = build_builder_headers(
871
- self.scraper_token,
872
- self.public_token or "",
873
- self.public_key or "",
608
+ self.scraper_token, self.public_token or "", self.public_key or ""
874
609
  )
875
610
 
876
- logger.info(f"Creating Video Task: {config.spider_name} - {config.spider_id}")
877
-
878
611
  response = self._api_request_with_retry(
879
- "POST",
880
- self._video_builder_url,
881
- data=payload,
882
- headers=headers,
612
+ "POST", self._video_builder_url, data=payload, headers=headers
883
613
  )
884
614
  response.raise_for_status()
885
-
886
615
  data = response.json()
887
- code = data.get("code")
888
-
889
- if code != 200:
890
- msg = extract_error_message(data)
616
+ if data.get("code") != 200:
891
617
  raise_for_code(
892
- f"Video task creation failed: {msg}", code=code, payload=data
618
+ "Video task creation failed", code=data.get("code"), payload=data
893
619
  )
894
-
895
620
  return data["data"]["task_id"]
896
621
 
897
622
  def get_task_status(self, task_id: str) -> str:
898
- """
899
- Check the status of an asynchronous scraping task.
900
-
901
- Returns:
902
- Status string (e.g., "running", "ready", "failed").
903
-
904
- Raises:
905
- ThordataConfigError: If public credentials are missing.
906
- ThordataAPIError: If API returns a non-200 code in JSON payload.
907
- ThordataNetworkError: If network/HTTP request fails.
908
- """
909
623
  self._require_public_credentials()
910
-
911
624
  headers = build_public_api_headers(
912
625
  self.public_token or "", self.public_key or ""
913
626
  )
914
- payload = {"tasks_ids": task_id}
915
-
916
627
  try:
917
628
  response = self._api_request_with_retry(
918
629
  "POST",
919
630
  self._status_url,
920
- data=payload,
631
+ data={"tasks_ids": task_id},
921
632
  headers=headers,
922
633
  )
923
634
  response.raise_for_status()
924
635
  data = response.json()
925
-
926
- if isinstance(data, dict):
927
- code = data.get("code")
928
- if code is not None and code != 200:
929
- msg = extract_error_message(data)
930
- raise_for_code(
931
- f"Task status API Error: {msg}",
932
- code=code,
933
- payload=data,
934
- )
935
-
936
- items = data.get("data") or []
937
- for item in items:
938
- if str(item.get("task_id")) == str(task_id):
939
- return item.get("status", "unknown")
940
-
941
- return "unknown"
942
-
943
- # Unexpected payload type
944
- raise ThordataNetworkError(
945
- f"Unexpected task status response type: {type(data).__name__}",
946
- original_error=None,
947
- )
948
-
949
- except requests.Timeout as e:
950
- raise ThordataTimeoutError(
951
- f"Status check timed out: {e}", original_error=e
952
- ) from e
636
+ if data.get("code") != 200:
637
+ raise_for_code("Task status error", code=data.get("code"), payload=data)
638
+
639
+ items = data.get("data") or []
640
+ for item in items:
641
+ if str(item.get("task_id")) == str(task_id):
642
+ return item.get("status", "unknown")
643
+ return "unknown"
953
644
  except requests.RequestException as e:
954
645
  raise ThordataNetworkError(
955
646
  f"Status check failed: {e}", original_error=e
956
647
  ) from e
957
648
 
958
649
  def safe_get_task_status(self, task_id: str) -> str:
959
- """
960
- Backward-compatible status check.
961
-
962
- Returns:
963
- Status string, or "error" on any exception.
964
- """
965
650
  try:
966
651
  return self.get_task_status(task_id)
967
652
  except Exception:
968
653
  return "error"
969
654
 
970
655
  def get_task_result(self, task_id: str, file_type: str = "json") -> str:
971
- """
972
- Get the download URL for a completed task.
973
- """
974
656
  self._require_public_credentials()
975
-
976
657
  headers = build_public_api_headers(
977
658
  self.public_token or "", self.public_key or ""
978
659
  )
979
- payload = {"tasks_id": task_id, "type": file_type}
980
-
981
- logger.info(f"Getting result URL for Task: {task_id}")
982
-
983
660
  try:
984
661
  response = self._api_request_with_retry(
985
662
  "POST",
986
663
  self._download_url,
987
- data=payload,
664
+ data={"tasks_id": task_id, "type": file_type},
988
665
  headers=headers,
989
666
  )
990
667
  response.raise_for_status()
991
-
992
668
  data = response.json()
993
- code = data.get("code")
994
-
995
- if code == 200 and data.get("data"):
669
+ if data.get("code") == 200 and data.get("data"):
996
670
  return data["data"]["download"]
997
-
998
- msg = extract_error_message(data)
999
- raise_for_code(f"Get result failed: {msg}", code=code, payload=data)
1000
- # This line won't be reached, but satisfies mypy
1001
- raise RuntimeError("Unexpected state")
1002
-
671
+ raise_for_code("Get result failed", code=data.get("code"), payload=data)
672
+ return ""
1003
673
  except requests.RequestException as e:
1004
674
  raise ThordataNetworkError(
1005
675
  f"Get result failed: {e}", original_error=e
1006
676
  ) from e
1007
677
 
1008
- def list_tasks(
1009
- self,
1010
- page: int = 1,
1011
- size: int = 20,
1012
- ) -> Dict[str, Any]:
1013
- """
1014
- List all Web Scraper tasks.
1015
-
1016
- Args:
1017
- page: Page number (starts from 1).
1018
- size: Number of tasks per page.
1019
-
1020
- Returns:
1021
- Dict containing 'count' and 'list' of tasks.
1022
-
1023
- Example:
1024
- >>> result = client.list_tasks(page=1, size=10)
1025
- >>> print(f"Total tasks: {result['count']}")
1026
- >>> for task in result['list']:
1027
- ... print(f"Task {task['task_id']}: {task['status']}")
1028
- """
678
+ def list_tasks(self, page: int = 1, size: int = 20) -> Dict[str, Any]:
1029
679
  self._require_public_credentials()
1030
-
1031
680
  headers = build_public_api_headers(
1032
681
  self.public_token or "", self.public_key or ""
1033
682
  )
1034
- payload: Dict[str, Any] = {}
1035
- if page:
1036
- payload["page"] = str(page)
1037
- if size:
1038
- payload["size"] = str(size)
1039
-
1040
- logger.info(f"Listing tasks: page={page}, size={size}")
1041
-
1042
683
  response = self._api_request_with_retry(
1043
684
  "POST",
1044
685
  self._list_url,
1045
- data=payload,
686
+ data={"page": str(page), "size": str(size)},
1046
687
  headers=headers,
1047
688
  )
1048
689
  response.raise_for_status()
1049
-
1050
690
  data = response.json()
1051
- code = data.get("code")
1052
-
1053
- if code != 200:
1054
- msg = extract_error_message(data)
1055
- raise_for_code(f"List tasks failed: {msg}", code=code, payload=data)
1056
-
691
+ if data.get("code") != 200:
692
+ raise_for_code("List tasks failed", code=data.get("code"), payload=data)
1057
693
  return data.get("data", {"count": 0, "list": []})
1058
694
 
1059
695
  def wait_for_task(
@@ -1063,84 +699,32 @@ class ThordataClient:
1063
699
  poll_interval: float = 5.0,
1064
700
  max_wait: float = 600.0,
1065
701
  ) -> str:
1066
- """
1067
- Wait for a task to complete.
1068
-
1069
- Args:
1070
- task_id: The task ID to wait for.
1071
- poll_interval: Seconds between status checks.
1072
- max_wait: Maximum seconds to wait.
1073
-
1074
- Returns:
1075
- Final task status.
1076
-
1077
- Raises:
1078
- TimeoutError: If max_wait is exceeded.
1079
-
1080
- Example:
1081
- >>> task_id = client.create_scraper_task(...)
1082
- >>> status = client.wait_for_task(task_id, max_wait=300)
1083
- >>> if status in ("ready", "success"):
1084
- ... url = client.get_task_result(task_id)
1085
- """
1086
702
  import time
1087
703
 
1088
704
  start = time.monotonic()
1089
-
1090
705
  while (time.monotonic() - start) < max_wait:
1091
706
  status = self.get_task_status(task_id)
1092
-
1093
- logger.debug(f"Task {task_id} status: {status}")
1094
-
1095
- terminal_statuses = {
707
+ if status.lower() in {
1096
708
  "ready",
1097
709
  "success",
1098
710
  "finished",
1099
711
  "failed",
1100
712
  "error",
1101
713
  "cancelled",
1102
- }
1103
-
1104
- if status.lower() in terminal_statuses:
714
+ }:
1105
715
  return status
1106
-
1107
716
  time.sleep(poll_interval)
1108
-
1109
- raise TimeoutError(f"Task {task_id} did not complete within {max_wait} seconds")
717
+ raise TimeoutError(f"Task {task_id} timeout")
1110
718
 
1111
719
  # =========================================================================
1112
- # Proxy Account Management Methods (Proxy balance, user, whitelist functions)
720
+ # Account / Locations / Utils
1113
721
  # =========================================================================
1114
722
  def get_usage_statistics(
1115
723
  self,
1116
724
  from_date: Union[str, date],
1117
725
  to_date: Union[str, date],
1118
726
  ) -> UsageStatistics:
1119
- """
1120
- Get account usage statistics for a date range.
1121
-
1122
- Args:
1123
- from_date: Start date (YYYY-MM-DD string or date object).
1124
- to_date: End date (YYYY-MM-DD string or date object).
1125
-
1126
- Returns:
1127
- UsageStatistics object with traffic data.
1128
-
1129
- Raises:
1130
- ValueError: If date range exceeds 180 days.
1131
-
1132
- Example:
1133
- >>> from datetime import date, timedelta
1134
- >>> today = date.today()
1135
- >>> week_ago = today - timedelta(days=7)
1136
- >>> stats = client.get_usage_statistics(week_ago, today)
1137
- >>> print(f"Used: {stats.range_usage_gb():.2f} GB")
1138
- >>> print(f"Balance: {stats.balance_gb():.2f} GB")
1139
- """
1140
-
1141
727
  self._require_public_credentials()
1142
-
1143
- # Convert dates to strings
1144
728
  if isinstance(from_date, date):
1145
729
  from_date = from_date.strftime("%Y-%m-%d")
1146
730
  if isinstance(to_date, date):
@@ -1152,185 +736,33 @@ class ThordataClient:
1152
736
  "from_date": from_date,
1153
737
  "to_date": to_date,
1154
738
  }
1155
-
1156
- logger.info(f"Getting usage statistics: {from_date} to {to_date}")
1157
-
1158
739
  response = self._api_request_with_retry(
1159
- "GET",
1160
- self._usage_stats_url,
1161
- params=params,
740
+ "GET", self._usage_stats_url, params=params
1162
741
  )
1163
742
  response.raise_for_status()
1164
-
1165
743
  data = response.json()
1166
-
1167
- if isinstance(data, dict):
1168
- code = data.get("code")
1169
- if code is not None and code != 200:
1170
- msg = extract_error_message(data)
1171
- raise_for_code(
1172
- f"Usage statistics error: {msg}",
1173
- code=code,
1174
- payload=data,
1175
- )
1176
-
1177
- # Extract data field
1178
- usage_data = data.get("data", data)
1179
- return UsageStatistics.from_dict(usage_data)
1180
-
1181
- raise ThordataNetworkError(
1182
- f"Unexpected usage statistics response: {type(data).__name__}",
1183
- original_error=None,
1184
- )
1185
-
1186
- def get_residential_balance(self) -> Dict[str, Any]:
1187
- """
1188
- Get residential proxy balance (Public API NEW).
1189
-
1190
- Requires sign and apiKey credentials.
1191
-
1192
- Returns:
1193
- Dict with 'balance' (bytes) and 'expire_time' (timestamp).
1194
-
1195
- Example:
1196
- >>> result = client.get_residential_balance()
1197
- >>> balance_gb = result['balance'] / (1024**3)
1198
- >>> print(f"Balance: {balance_gb:.2f} GB")
1199
- """
1200
- if not self.sign or not self.api_key:
1201
- raise ThordataConfigError(
1202
- "sign and api_key are required for Public API NEW. "
1203
- "Set THORDATA_SIGN and THORDATA_API_KEY environment variables."
1204
- )
1205
-
1206
- headers = build_sign_headers(self.sign, self.api_key)
1207
-
1208
- logger.info("Getting residential proxy balance (API NEW)")
1209
-
1210
- response = self._api_request_with_retry(
1211
- "POST",
1212
- f"{self._gateway_base_url}/getFlowBalance",
1213
- headers=headers,
1214
- data={},
1215
- )
1216
- response.raise_for_status()
1217
-
1218
- data = response.json()
1219
- code = data.get("code")
1220
-
1221
- if code != 200:
1222
- msg = extract_error_message(data)
1223
- raise_for_code(f"Get balance failed: {msg}", code=code, payload=data)
1224
-
1225
- return data.get("data", {})
1226
-
1227
- def get_residential_usage(
1228
- self,
1229
- start_time: Union[str, int],
1230
- end_time: Union[str, int],
1231
- ) -> Dict[str, Any]:
1232
- """
1233
- Get residential proxy usage records (Public API NEW).
1234
-
1235
- Args:
1236
- start_time: Start timestamp (Unix timestamp or YYYY-MM-DD HH:MM:SS).
1237
- end_time: End timestamp (Unix timestamp or YYYY-MM-DD HH:MM:SS).
1238
-
1239
- Returns:
1240
- Dict with usage data including 'all_flow', 'all_used_flow', 'data' list.
1241
-
1242
- Example:
1243
- >>> import time
1244
- >>> end = int(time.time())
1245
- >>> start = end - 7*24*3600 # Last 7 days
1246
- >>> usage = client.get_residential_usage(start, end)
1247
- >>> print(f"Total used: {usage['all_used_flow'] / (1024**3):.2f} GB")
1248
- """
1249
- if not self.sign or not self.api_key:
1250
- raise ThordataConfigError(
1251
- "sign and api_key are required for Public API NEW."
1252
- )
1253
-
1254
- headers = build_sign_headers(self.sign, self.api_key)
1255
- payload = {
1256
- "start_time": str(start_time),
1257
- "end_time": str(end_time),
1258
- }
1259
-
1260
- logger.info(f"Getting residential usage: {start_time} to {end_time}")
1261
-
1262
- response = self._api_request_with_retry(
1263
- "POST",
1264
- f"{self._gateway_base_url}/usageRecord",
1265
- headers=headers,
1266
- data=payload,
1267
- )
1268
- response.raise_for_status()
1269
-
1270
- data = response.json()
1271
- code = data.get("code")
1272
-
1273
- if code != 200:
1274
- msg = extract_error_message(data)
1275
- raise_for_code(f"Get usage failed: {msg}", code=code, payload=data)
1276
-
1277
- return data.get("data", {})
744
+ if data.get("code") != 200:
745
+ raise_for_code("Usage stats error", code=data.get("code"), payload=data)
746
+ return UsageStatistics.from_dict(data.get("data", data))
1278
747
 
1279
748
  def list_proxy_users(
1280
749
  self, proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL
1281
750
  ) -> ProxyUserList:
1282
- """
1283
- List all proxy users (sub-accounts).
1284
-
1285
- Args:
1286
- proxy_type: Proxy type (1=Residential, 2=Unlimited).
1287
-
1288
- Returns:
1289
- ProxyUserList with user details.
1290
-
1291
- Example:
1292
- >>> users = client.list_proxy_users(proxy_type=ProxyType.RESIDENTIAL)
1293
- >>> print(f"Total users: {users.user_count}")
1294
- >>> for user in users.users:
1295
- ... print(f"{user.username}: {user.usage_gb():.2f} GB used")
1296
- """
1297
-
1298
751
  self._require_public_credentials()
1299
-
752
+ pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1300
753
  params = {
1301
754
  "token": self.public_token,
1302
755
  "key": self.public_key,
1303
- "proxy_type": str(
1304
- int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1305
- ),
756
+ "proxy_type": str(pt),
1306
757
  }
1307
-
1308
- logger.info(f"Listing proxy users: type={params['proxy_type']}")
1309
-
1310
758
  response = self._api_request_with_retry(
1311
- "GET",
1312
- f"{self._proxy_users_url}/user-list",
1313
- params=params,
759
+ "GET", f"{self._proxy_users_url}/user-list", params=params
1314
760
  )
1315
761
  response.raise_for_status()
1316
-
1317
762
  data = response.json()
1318
-
1319
- if isinstance(data, dict):
1320
- code = data.get("code")
1321
- if code is not None and code != 200:
1322
- msg = extract_error_message(data)
1323
- raise_for_code(
1324
- f"List proxy users error: {msg}", code=code, payload=data
1325
- )
1326
-
1327
- user_data = data.get("data", data)
1328
- return ProxyUserList.from_dict(user_data)
1329
-
1330
- raise ThordataNetworkError(
1331
- f"Unexpected proxy users response: {type(data).__name__}",
1332
- original_error=None,
1333
- )
763
+ if data.get("code") != 200:
764
+ raise_for_code("List users error", code=data.get("code"), payload=data)
765
+ return ProxyUserList.from_dict(data.get("data", data))
1334
766
 
1335
767
  def create_proxy_user(
1336
768
  self,
@@ -1340,45 +772,18 @@ class ThordataClient:
1340
772
  traffic_limit: int = 0,
1341
773
  status: bool = True,
1342
774
  ) -> Dict[str, Any]:
1343
- """
1344
- Create a new proxy user (sub-account).
1345
-
1346
- Args:
1347
- username: Username for the new user.
1348
- password: Password for the new user.
1349
- proxy_type: Proxy type (1=Residential, 2=Unlimited).
1350
- traffic_limit: Traffic limit in MB (0 = unlimited, min 100).
1351
- status: Enable/disable user (True/False).
1352
-
1353
- Returns:
1354
- API response data.
1355
-
1356
- Example:
1357
- >>> result = client.create_proxy_user(
1358
- ... username="subuser1",
1359
- ... password="securepass",
1360
- ... traffic_limit=5120, # 5GB
1361
- ... status=True
1362
- ... )
1363
- """
1364
775
  self._require_public_credentials()
1365
-
776
+ pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1366
777
  headers = build_public_api_headers(
1367
778
  self.public_token or "", self.public_key or ""
1368
779
  )
1369
-
1370
780
  payload = {
1371
- "proxy_type": str(
1372
- int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1373
- ),
781
+ "proxy_type": str(pt),
1374
782
  "username": username,
1375
783
  "password": password,
1376
784
  "traffic_limit": str(traffic_limit),
1377
785
  "status": "true" if status else "false",
1378
786
  }
1379
-
1380
- logger.info(f"Creating proxy user: {username}")
1381
-
1382
787
  response = self._api_request_with_retry(
1383
788
  "POST",
1384
789
  f"{self._proxy_users_url}/create-user",
@@ -1386,14 +791,9 @@ class ThordataClient:
1386
791
  headers=headers,
1387
792
  )
1388
793
  response.raise_for_status()
1389
-
1390
794
  data = response.json()
1391
- code = data.get("code")
1392
-
1393
- if code != 200:
1394
- msg = extract_error_message(data)
1395
- raise_for_code(f"Create proxy user failed: {msg}", code=code, payload=data)
1396
-
795
+ if data.get("code") != 200:
796
+ raise_for_code("Create user failed", code=data.get("code"), payload=data)
1397
797
  return data.get("data", {})
1398
798
 
1399
799
  def add_whitelist_ip(
@@ -1402,328 +802,86 @@ class ThordataClient:
1402
802
  proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
1403
803
  status: bool = True,
1404
804
  ) -> Dict[str, Any]:
1405
- """
1406
- Add an IP to the whitelist for IP authentication.
1407
-
1408
- Args:
1409
- ip: IP address to whitelist.
1410
- proxy_type: Proxy type (1=Residential, 2=Unlimited, 9=Mobile).
1411
- status: Enable/disable the IP (True/False).
1412
-
1413
- Returns:
1414
- API response data.
1415
-
1416
- Example:
1417
- >>> result = client.add_whitelist_ip(
1418
- ... ip="123.45.67.89",
1419
- ... proxy_type=ProxyType.RESIDENTIAL,
1420
- ... status=True
1421
- ... )
1422
- """
1423
805
  self._require_public_credentials()
1424
-
806
+ pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1425
807
  headers = build_public_api_headers(
1426
808
  self.public_token or "", self.public_key or ""
1427
809
  )
1428
-
1429
- # Convert ProxyType to int
1430
- proxy_type_int = (
1431
- int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1432
- )
1433
-
1434
810
  payload = {
1435
- "proxy_type": str(proxy_type_int),
811
+ "proxy_type": str(pt),
1436
812
  "ip": ip,
1437
813
  "status": "true" if status else "false",
1438
814
  }
1439
-
1440
- logger.info(f"Adding whitelist IP: {ip}")
1441
-
1442
815
  response = self._api_request_with_retry(
1443
- "POST",
1444
- f"{self._whitelist_url}/add-ip",
1445
- data=payload,
1446
- headers=headers,
816
+ "POST", f"{self._whitelist_url}/add-ip", data=payload, headers=headers
1447
817
  )
1448
818
  response.raise_for_status()
1449
-
1450
819
  data = response.json()
1451
- code = data.get("code")
1452
-
1453
- if code != 200:
1454
- msg = extract_error_message(data)
1455
- raise_for_code(f"Add whitelist IP failed: {msg}", code=code, payload=data)
1456
-
820
+ if data.get("code") != 200:
821
+ raise_for_code(
822
+ "Add whitelist IP failed", code=data.get("code"), payload=data
823
+ )
1457
824
  return data.get("data", {})
1458
825
 
1459
- def list_proxy_servers(
1460
- self,
1461
- proxy_type: int,
1462
- ) -> List[ProxyServer]:
1463
- """
1464
- List ISP or Datacenter proxy servers.
1465
-
1466
- Args:
1467
- proxy_type: Proxy type (1=ISP, 2=Datacenter).
1468
-
1469
- Returns:
1470
- List of ProxyServer objects.
1471
-
1472
- Example:
1473
- >>> servers = client.list_proxy_servers(proxy_type=1) # ISP proxies
1474
- >>> for server in servers:
1475
- ... print(f"{server.ip}:{server.port} - expires: {server.expiration_time}")
1476
- """
1477
-
826
+ def list_proxy_servers(self, proxy_type: int) -> List[ProxyServer]:
1478
827
  self._require_public_credentials()
1479
-
1480
828
  params = {
1481
829
  "token": self.public_token,
1482
830
  "key": self.public_key,
1483
831
  "proxy_type": str(proxy_type),
1484
832
  }
1485
-
1486
- logger.info(f"Listing proxy servers: type={proxy_type}")
1487
-
1488
833
  response = self._api_request_with_retry(
1489
- "GET",
1490
- self._proxy_list_url,
1491
- params=params,
834
+ "GET", self._proxy_list_url, params=params
1492
835
  )
1493
836
  response.raise_for_status()
1494
-
1495
837
  data = response.json()
838
+ if data.get("code") != 200:
839
+ raise_for_code(
840
+ "List proxy servers error", code=data.get("code"), payload=data
841
+ )
1496
842
 
843
+ server_list = []
1497
844
  if isinstance(data, dict):
1498
- code = data.get("code")
1499
- if code is not None and code != 200:
1500
- msg = extract_error_message(data)
1501
- raise_for_code(
1502
- f"List proxy servers error: {msg}", code=code, payload=data
1503
- )
1504
-
1505
- # Extract list from data field
1506
845
  server_list = data.get("data", data.get("list", []))
1507
846
  elif isinstance(data, list):
1508
847
  server_list = data
1509
- else:
1510
- raise ThordataNetworkError(
1511
- f"Unexpected proxy list response: {type(data).__name__}",
1512
- original_error=None,
1513
- )
1514
848
 
1515
849
  return [ProxyServer.from_dict(s) for s in server_list]
1516
850
 
1517
- def get_isp_regions(self) -> List[Dict[str, Any]]:
1518
- """
1519
- Get available ISP proxy regions (Public API NEW).
1520
-
1521
- Returns:
1522
- List of regions with id, continent, country, city, num, pricing.
1523
-
1524
- Example:
1525
- >>> regions = client.get_isp_regions()
1526
- >>> for region in regions:
1527
- ... print(f"{region['country']}/{region['city']}: {region['num']} IPs")
1528
- """
1529
- if not self.sign or not self.api_key:
1530
- raise ThordataConfigError(
1531
- "sign and api_key are required for Public API NEW."
1532
- )
1533
-
1534
- headers = build_sign_headers(self.sign, self.api_key)
1535
-
1536
- logger.info("Getting ISP regions (API NEW)")
1537
-
1538
- response = self._api_request_with_retry(
1539
- "POST",
1540
- f"{self._gateway_base_url}/getRegionIsp",
1541
- headers=headers,
1542
- data={},
1543
- )
1544
- response.raise_for_status()
1545
-
1546
- data = response.json()
1547
- code = data.get("code")
1548
-
1549
- if code != 200:
1550
- msg = extract_error_message(data)
1551
- raise_for_code(f"Get ISP regions failed: {msg}", code=code, payload=data)
1552
-
1553
- return data.get("data", [])
1554
-
1555
- def list_isp_proxies(self) -> List[Dict[str, Any]]:
1556
- """
1557
- List ISP proxies (Public API NEW).
1558
-
1559
- Returns:
1560
- List of ISP proxies with ip, port, user, pwd, startTime, expireTime.
1561
-
1562
- Example:
1563
- >>> proxies = client.list_isp_proxies()
1564
- >>> for proxy in proxies:
1565
- ... print(f"{proxy['ip']}:{proxy['port']} - expires: {proxy['expireTime']}")
1566
- """
1567
- if not self.sign or not self.api_key:
1568
- raise ThordataConfigError(
1569
- "sign and api_key are required for Public API NEW."
1570
- )
1571
-
1572
- headers = build_sign_headers(self.sign, self.api_key)
1573
-
1574
- logger.info("Listing ISP proxies (API NEW)")
1575
-
1576
- response = self._api_request_with_retry(
1577
- "POST",
1578
- f"{self._gateway_base_url}/queryListIsp",
1579
- headers=headers,
1580
- data={},
1581
- )
1582
- response.raise_for_status()
1583
-
1584
- data = response.json()
1585
- code = data.get("code")
1586
-
1587
- if code != 200:
1588
- msg = extract_error_message(data)
1589
- raise_for_code(f"List ISP proxies failed: {msg}", code=code, payload=data)
1590
-
1591
- return data.get("data", [])
1592
-
1593
- def get_wallet_balance(self) -> Dict[str, Any]:
1594
- """
1595
- Get wallet balance for ISP proxies (Public API NEW).
1596
-
1597
- Returns:
1598
- Dict with 'walletBalance'.
1599
-
1600
- Example:
1601
- >>> result = client.get_wallet_balance()
1602
- >>> print(f"Wallet: ${result['walletBalance']}")
1603
- """
1604
- if not self.sign or not self.api_key:
1605
- raise ThordataConfigError(
1606
- "sign and api_key are required for Public API NEW."
1607
- )
1608
-
1609
- headers = build_sign_headers(self.sign, self.api_key)
1610
-
1611
- logger.info("Getting wallet balance (API NEW)")
1612
-
1613
- response = self._api_request_with_retry(
1614
- "POST",
1615
- f"{self._gateway_base_url}/getBalance",
1616
- headers=headers,
1617
- data={},
1618
- )
1619
- response.raise_for_status()
1620
-
1621
- data = response.json()
1622
- code = data.get("code")
1623
-
1624
- if code != 200:
1625
- msg = extract_error_message(data)
1626
- raise_for_code(f"Get wallet balance failed: {msg}", code=code, payload=data)
1627
-
1628
- return data.get("data", {})
1629
-
1630
851
  def get_proxy_expiration(
1631
- self,
1632
- ips: Union[str, List[str]],
1633
- proxy_type: int,
852
+ self, ips: Union[str, List[str]], proxy_type: int
1634
853
  ) -> Dict[str, Any]:
1635
- """
1636
- Get expiration time for specific proxy IPs.
1637
-
1638
- Args:
1639
- ips: Single IP or list of IPs to check.
1640
- proxy_type: Proxy type (1=ISP, 2=Datacenter).
1641
-
1642
- Returns:
1643
- Dict with expiration information.
1644
-
1645
- Example:
1646
- >>> result = client.get_proxy_expiration("123.45.67.89", proxy_type=1)
1647
- >>> print(result)
1648
- """
1649
854
  self._require_public_credentials()
1650
-
1651
- # Convert list to comma-separated string
1652
855
  if isinstance(ips, list):
1653
856
  ips = ",".join(ips)
1654
-
1655
857
  params = {
1656
858
  "token": self.public_token,
1657
859
  "key": self.public_key,
1658
860
  "proxy_type": str(proxy_type),
1659
861
  "ips": ips,
1660
862
  }
1661
-
1662
- logger.info(f"Getting proxy expiration: {ips}")
1663
-
1664
863
  response = self._api_request_with_retry(
1665
- "GET",
1666
- self._proxy_expiration_url,
1667
- params=params,
864
+ "GET", self._proxy_expiration_url, params=params
1668
865
  )
1669
866
  response.raise_for_status()
1670
-
1671
867
  data = response.json()
868
+ if data.get("code") != 200:
869
+ raise_for_code("Get expiration error", code=data.get("code"), payload=data)
870
+ return data.get("data", data)
1672
871
 
1673
- if isinstance(data, dict):
1674
- code = data.get("code")
1675
- if code is not None and code != 200:
1676
- msg = extract_error_message(data)
1677
- raise_for_code(f"Get expiration error: {msg}", code=code, payload=data)
1678
-
1679
- return data.get("data", data)
1680
-
1681
- return data
1682
-
1683
- # =========================================================================
1684
- # Location API Methods (Country/State/City/ASN functions)
1685
- # =========================================================================
1686
872
  def list_countries(
1687
873
  self, proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL
1688
874
  ) -> List[Dict[str, Any]]:
1689
- """
1690
- List supported countries for proxies.
1691
-
1692
- Args:
1693
- proxy_type: 1 for residential, 2 for unlimited.
1694
-
1695
- Returns:
1696
- List of country records with 'country_code' and 'country_name'.
1697
- """
1698
- return self._get_locations(
1699
- "countries",
1700
- proxy_type=(
1701
- int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1702
- ),
1703
- )
875
+ pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
876
+ return self._get_locations("countries", proxy_type=pt)
1704
877
 
1705
878
  def list_states(
1706
879
  self,
1707
880
  country_code: str,
1708
881
  proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
1709
882
  ) -> List[Dict[str, Any]]:
1710
- """
1711
- List supported states for a country.
1712
-
1713
- Args:
1714
- country_code: Country code (e.g., 'US').
1715
- proxy_type: Proxy type.
1716
-
1717
- Returns:
1718
- List of state records.
1719
- """
1720
- return self._get_locations(
1721
- "states",
1722
- proxy_type=(
1723
- int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1724
- ),
1725
- country_code=country_code,
1726
- )
883
+ pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
884
+ return self._get_locations("states", proxy_type=pt, country_code=country_code)
1727
885
 
1728
886
  def list_cities(
1729
887
  self,
@@ -1731,26 +889,10 @@ class ThordataClient:
1731
889
  state_code: Optional[str] = None,
1732
890
  proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
1733
891
  ) -> List[Dict[str, Any]]:
1734
- """
1735
- List supported cities for a country/state.
1736
-
1737
- Args:
1738
- country_code: Country code.
1739
- state_code: Optional state code.
1740
- proxy_type: Proxy type.
1741
-
1742
- Returns:
1743
- List of city records.
1744
- """
1745
- kwargs = {
1746
- "proxy_type": (
1747
- int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1748
- ),
1749
- "country_code": country_code,
1750
- }
892
+ pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
893
+ kwargs = {"proxy_type": pt, "country_code": country_code}
1751
894
  if state_code:
1752
895
  kwargs["state_code"] = state_code
1753
-
1754
896
  return self._get_locations("cities", **kwargs)
1755
897
 
1756
898
  def list_asn(
@@ -1758,98 +900,78 @@ class ThordataClient:
1758
900
  country_code: str,
1759
901
  proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
1760
902
  ) -> List[Dict[str, Any]]:
1761
- """
1762
- List supported ASNs for a country.
1763
-
1764
- Args:
1765
- country_code: Country code.
1766
- proxy_type: Proxy type.
1767
-
1768
- Returns:
1769
- List of ASN records.
1770
- """
1771
- return self._get_locations(
1772
- "asn",
1773
- proxy_type=(
1774
- int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1775
- ),
1776
- country_code=country_code,
1777
- )
903
+ pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
904
+ return self._get_locations("asn", proxy_type=pt, country_code=country_code)
1778
905
 
1779
906
  def _get_locations(self, endpoint: str, **kwargs: Any) -> List[Dict[str, Any]]:
1780
- """Internal method to call locations API."""
1781
907
  self._require_public_credentials()
908
+ params = {"token": self.public_token, "key": self.public_key}
909
+ for k, v in kwargs.items():
910
+ params[k] = str(v)
1782
911
 
1783
- params = {
1784
- "token": self.public_token,
1785
- "key": self.public_key,
1786
- }
1787
-
1788
- for key, value in kwargs.items():
1789
- params[key] = str(value)
1790
-
1791
- url = f"{self._locations_base_url}/{endpoint}"
1792
-
1793
- logger.debug(f"Locations API request: {url}")
1794
-
1795
- # Use requests.get directly (no proxy needed for this API)
1796
912
  response = self._api_request_with_retry(
1797
- "GET",
1798
- url,
1799
- params=params,
913
+ "GET", f"{self._locations_base_url}/{endpoint}", params=params
1800
914
  )
1801
915
  response.raise_for_status()
1802
-
1803
916
  data = response.json()
1804
-
1805
917
  if isinstance(data, dict):
1806
- code = data.get("code")
1807
- if code is not None and code != 200:
1808
- msg = data.get("msg", "")
1809
- raise RuntimeError(
1810
- f"Locations API error ({endpoint}): code={code}, msg={msg}"
1811
- )
918
+ if data.get("code") != 200:
919
+ raise RuntimeError(f"Locations error: {data.get('msg')}")
1812
920
  return data.get("data") or []
921
+ return data if isinstance(data, list) else []
1813
922
 
1814
- if isinstance(data, list):
1815
- return data
1816
-
1817
- return []
1818
-
1819
- # =========================================================================
1820
- # Helper Methods (Internal utility functions)
1821
- # =========================================================================
1822
923
  def _require_public_credentials(self) -> None:
1823
- """Ensure public API credentials are available."""
1824
924
  if not self.public_token or not self.public_key:
1825
925
  raise ThordataConfigError(
1826
- "public_token and public_key are required for this operation. "
1827
- "Please provide them when initializing ThordataClient."
926
+ "public_token and public_key are required for this operation."
1828
927
  )
1829
928
 
1830
- def _request_with_retry(
1831
- self, method: str, url: str, **kwargs: Any
1832
- ) -> requests.Response:
1833
- """Make a request with automatic retry."""
1834
- kwargs.setdefault("timeout", self._default_timeout)
1835
-
1836
- @with_retry(self._retry_config)
1837
- def _do_request() -> requests.Response:
1838
- return self._proxy_session.request(method, url, **kwargs)
1839
-
1840
- try:
1841
- return _do_request()
1842
- except requests.Timeout as e:
1843
- raise ThordataTimeoutError(
1844
- f"Request timed out: {e}", original_error=e
1845
- ) from e
1846
- except requests.RequestException as e:
1847
- raise ThordataNetworkError(f"Request failed: {e}", original_error=e) from e
929
+ def _get_proxy_endpoint_overrides(
930
+ self, product: ProxyProduct
931
+ ) -> tuple[Optional[str], Optional[int], str]:
932
+ prefix = product.value.upper()
933
+ host = os.getenv(f"THORDATA_{prefix}_PROXY_HOST") or os.getenv(
934
+ "THORDATA_PROXY_HOST"
935
+ )
936
+ port_raw = os.getenv(f"THORDATA_{prefix}_PROXY_PORT") or os.getenv(
937
+ "THORDATA_PROXY_PORT"
938
+ )
939
+ protocol = (
940
+ os.getenv(f"THORDATA_{prefix}_PROXY_PROTOCOL")
941
+ or os.getenv("THORDATA_PROXY_PROTOCOL")
942
+ or "http"
943
+ )
944
+ port = int(port_raw) if port_raw and port_raw.isdigit() else None
945
+ return host or None, port, protocol
946
+
947
+ def _get_default_proxy_config_from_env(self) -> Optional[ProxyConfig]:
948
+ for prod in [
949
+ ProxyProduct.RESIDENTIAL,
950
+ ProxyProduct.DATACENTER,
951
+ ProxyProduct.MOBILE,
952
+ ]:
953
+ prefix = prod.value.upper()
954
+ u = os.getenv(f"THORDATA_{prefix}_USERNAME")
955
+ p = os.getenv(f"THORDATA_{prefix}_PASSWORD")
956
+ if u and p:
957
+ h, port, proto = self._get_proxy_endpoint_overrides(prod)
958
+ return ProxyConfig(
959
+ username=u,
960
+ password=p,
961
+ product=prod,
962
+ host=h,
963
+ port=port,
964
+ protocol=proto,
965
+ )
966
+ return None
1848
967
 
1849
968
  def close(self) -> None:
1850
- """Close the underlying session."""
1851
969
  self._proxy_session.close()
1852
970
  self._api_session.close()
971
+ # Clean up connection pools
972
+ for pm in self._proxy_managers.values():
973
+ pm.clear()
974
+ self._proxy_managers.clear()
1853
975
 
1854
976
  def __enter__(self) -> ThordataClient:
1855
977
  return self