thordata-sdk 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
thordata/client.py CHANGED
@@ -27,7 +27,7 @@ import logging
27
27
  import os
28
28
  import ssl
29
29
  from datetime import date
30
- from typing import Any, Dict, List, Optional, Union
30
+ from typing import Any
31
31
  from urllib.parse import urlencode
32
32
 
33
33
  import requests
@@ -61,7 +61,6 @@ from .models import (
61
61
  UniversalScrapeRequest,
62
62
  UsageStatistics,
63
63
  VideoTaskConfig,
64
- WhitelistProxyConfig,
65
64
  )
66
65
  from .retry import RetryConfig, with_retry
67
66
 
@@ -69,32 +68,6 @@ logger = logging.getLogger(__name__)
69
68
 
70
69
 
71
70
  class ThordataClient:
72
- """
73
- The official synchronous Python client for Thordata.
74
-
75
- This client handles authentication and communication with:
76
- - Proxy Network (Residential/Datacenter/Mobile/ISP via HTTP/HTTPS)
77
- - SERP API (Real-time Search Engine Results)
78
- - Universal Scraping API (Web Unlocker - Single Page Rendering)
79
- - Web Scraper API (Async Task Management)
80
-
81
- Args:
82
- scraper_token: The API token from your Dashboard.
83
- public_token: The public API token (for task status, locations).
84
- public_key: The public API key.
85
- proxy_host: Custom proxy gateway host (optional).
86
- proxy_port: Custom proxy gateway port (optional).
87
- timeout: Default request timeout in seconds (default: 30).
88
- retry_config: Configuration for automatic retries (optional).
89
-
90
- Example:
91
- >>> client = ThordataClient(
92
- ... scraper_token="your_scraper_token",
93
- ... public_token="your_public_token",
94
- ... public_key="your_public_key"
95
- ... )
96
- """
97
-
98
71
  # API Endpoints
99
72
  BASE_URL = "https://scraperapi.thordata.com"
100
73
  UNIVERSAL_URL = "https://universalapi.thordata.com"
@@ -104,18 +77,18 @@ class ThordataClient:
104
77
  def __init__(
105
78
  self,
106
79
  scraper_token: str,
107
- public_token: Optional[str] = None,
108
- public_key: Optional[str] = None,
80
+ public_token: str | None = None,
81
+ public_key: str | None = None,
109
82
  proxy_host: str = "pr.thordata.net",
110
83
  proxy_port: int = 9999,
111
84
  timeout: int = 30,
112
85
  api_timeout: int = 60,
113
- retry_config: Optional[RetryConfig] = None,
86
+ retry_config: RetryConfig | None = None,
114
87
  auth_mode: str = "bearer",
115
- scraperapi_base_url: Optional[str] = None,
116
- universalapi_base_url: Optional[str] = None,
117
- web_scraper_api_base_url: Optional[str] = None,
118
- locations_base_url: Optional[str] = None,
88
+ scraperapi_base_url: str | None = None,
89
+ universalapi_base_url: str | None = None,
90
+ web_scraper_api_base_url: str | None = None,
91
+ locations_base_url: str | None = None,
119
92
  ) -> None:
120
93
  """Initialize the Thordata Client."""
121
94
  if not scraper_token:
@@ -144,22 +117,21 @@ class ThordataClient:
144
117
  f"Invalid auth_mode: {auth_mode}. Must be 'bearer' or 'header_token'."
145
118
  )
146
119
 
147
- # NOTE:
148
- # - _proxy_session: used for proxy network traffic to target sites
149
- # - _api_session: used for Thordata APIs (SERP/Universal/Tasks/Locations)
150
- #
151
- # We intentionally do NOT set session-level proxies for _api_session,
152
- # so developers can rely on system proxy settings (e.g., Clash) via env vars.
120
+ # HTTP Sessions
153
121
  self._proxy_session = requests.Session()
154
122
  self._proxy_session.trust_env = False
155
123
 
124
+ # Cache for ProxyManagers (Connection Pooling Fix)
125
+ # Key: proxy_url (str), Value: urllib3.ProxyManager
126
+ self._proxy_managers: dict[str, urllib3.ProxyManager] = {}
127
+
156
128
  self._api_session = requests.Session()
157
129
  self._api_session.trust_env = True
158
130
  self._api_session.headers.update(
159
131
  {"User-Agent": build_user_agent(_sdk_version, "requests")}
160
132
  )
161
133
 
162
- # Base URLs (allow override via args or env vars for testing and custom routing)
134
+ # Base URLs
163
135
  scraperapi_base = (
164
136
  scraperapi_base_url
165
137
  or os.getenv("THORDATA_SCRAPERAPI_BASE_URL")
@@ -184,15 +156,13 @@ class ThordataClient:
184
156
  or self.LOCATIONS_URL
185
157
  ).rstrip("/")
186
158
 
187
- # These URLs exist in your codebase; keep them for now (even if your org later migrates fully to openapi)
188
159
  gateway_base = os.getenv(
189
160
  "THORDATA_GATEWAY_BASE_URL", "https://api.thordata.com/api/gateway"
190
161
  )
191
- child_base = os.getenv(
162
+ self._gateway_base_url = gateway_base
163
+ self._child_base_url = os.getenv(
192
164
  "THORDATA_CHILD_BASE_URL", "https://api.thordata.com/api/child"
193
165
  )
194
- self._gateway_base_url = gateway_base
195
- self._child_base_url = child_base
196
166
 
197
167
  self._serp_url = f"{scraperapi_base}/request"
198
168
  self._builder_url = f"{scraperapi_base}/builder"
@@ -205,7 +175,6 @@ class ThordataClient:
205
175
 
206
176
  self._locations_base_url = locations_base
207
177
 
208
- # These 2 lines keep your existing behavior (derive account endpoints from locations_base)
209
178
  self._usage_stats_url = (
210
179
  f"{locations_base.replace('/locations', '')}/account/usage-statistics"
211
180
  )
@@ -225,100 +194,38 @@ class ThordataClient:
225
194
  self._proxy_expiration_url = f"{proxy_api_base}/proxy/expiration-time"
226
195
 
227
196
  # =========================================================================
228
- # Proxy Network Methods (Pure proxy network request functions)
197
+ # Proxy Network Methods
229
198
  # =========================================================================
230
199
  def get(
231
200
  self,
232
201
  url: str,
233
202
  *,
234
- proxy_config: Optional[ProxyConfig] = None,
235
- timeout: Optional[int] = None,
203
+ proxy_config: ProxyConfig | None = None,
204
+ timeout: int | None = None,
236
205
  **kwargs: Any,
237
206
  ) -> requests.Response:
238
- """
239
- Send a GET request through the Thordata Proxy Network.
240
-
241
- Args:
242
- url: The target URL.
243
- proxy_config: Custom proxy configuration for geo-targeting/sessions.
244
- timeout: Request timeout in seconds.
245
- **kwargs: Additional arguments to pass to requests.get().
246
-
247
- Returns:
248
- The response object.
249
-
250
- Example:
251
- >>> # Basic request
252
- >>> response = client.get("https://httpbin.org/ip")
253
- >>>
254
- >>> # With geo-targeting
255
- >>> from thordata.models import ProxyConfig
256
- >>> config = ProxyConfig(
257
- ... username="myuser",
258
- ... password="mypass",
259
- ... country="us",
260
- ... city="seattle"
261
- ... )
262
- >>> response = client.get("https://httpbin.org/ip", proxy_config=config)
263
- """
264
207
  logger.debug(f"Proxy GET request: {url}")
265
-
266
- timeout = timeout or self._default_timeout
267
-
268
- if proxy_config is None:
269
- proxy_config = self._get_default_proxy_config_from_env()
270
-
271
- if proxy_config is None:
272
- raise ThordataConfigError(
273
- "Proxy credentials are missing. "
274
- "Pass proxy_config=ProxyConfig(username=..., password=..., product=...) "
275
- "or set THORDATA_RESIDENTIAL_USERNAME/THORDATA_RESIDENTIAL_PASSWORD (or DATACENTER/MOBILE)."
276
- )
277
-
278
- kwargs["proxies"] = proxy_config.to_proxies_dict()
279
-
280
- @with_retry(self._retry_config)
281
- def _do() -> requests.Response:
282
- return self._proxy_request_with_proxy_manager(
283
- "GET",
284
- url,
285
- proxy_config=proxy_config,
286
- timeout=timeout,
287
- headers=kwargs.pop("headers", None),
288
- params=kwargs.pop("params", None),
289
- )
290
-
291
- try:
292
- return _do()
293
- except requests.Timeout as e:
294
- raise ThordataTimeoutError(
295
- f"Request timed out: {e}", original_error=e
296
- ) from e
297
- except Exception as e:
298
- raise ThordataNetworkError(f"Request failed: {e}", original_error=e) from e
208
+ return self._proxy_verb("GET", url, proxy_config, timeout, **kwargs)
299
209
 
300
210
  def post(
301
211
  self,
302
212
  url: str,
303
213
  *,
304
- proxy_config: Optional[ProxyConfig] = None,
305
- timeout: Optional[int] = None,
214
+ proxy_config: ProxyConfig | None = None,
215
+ timeout: int | None = None,
306
216
  **kwargs: Any,
307
217
  ) -> requests.Response:
308
- """
309
- Send a POST request through the Thordata Proxy Network.
310
-
311
- Args:
312
- url: The target URL.
313
- proxy_config: Custom proxy configuration.
314
- timeout: Request timeout in seconds.
315
- **kwargs: Additional arguments to pass to requests.post().
316
-
317
- Returns:
318
- The response object.
319
- """
320
218
  logger.debug(f"Proxy POST request: {url}")
219
+ return self._proxy_verb("POST", url, proxy_config, timeout, **kwargs)
321
220
 
221
+ def _proxy_verb(
222
+ self,
223
+ method: str,
224
+ url: str,
225
+ proxy_config: ProxyConfig | None,
226
+ timeout: int | None,
227
+ **kwargs: Any,
228
+ ) -> requests.Response:
322
229
  timeout = timeout or self._default_timeout
323
230
 
324
231
  if proxy_config is None:
@@ -327,19 +234,21 @@ class ThordataClient:
327
234
  if proxy_config is None:
328
235
  raise ThordataConfigError(
329
236
  "Proxy credentials are missing. "
330
- "Pass proxy_config=ProxyConfig(username=..., password=..., product=...) "
331
- "or set THORDATA_RESIDENTIAL_USERNAME/THORDATA_RESIDENTIAL_PASSWORD (or DATACENTER/MOBILE)."
237
+ "Pass proxy_config or set THORDATA_RESIDENTIAL_USERNAME/PASSWORD env vars."
332
238
  )
333
239
 
334
- kwargs["proxies"] = proxy_config.to_proxies_dict()
240
+ # For requests/urllib3, we don't need 'proxies' dict in kwargs
241
+ # because we use ProxyManager directly.
242
+ # But we remove it if user accidentally passed it to avoid confusion.
243
+ kwargs.pop("proxies", None)
335
244
 
336
245
  @with_retry(self._retry_config)
337
246
  def _do() -> requests.Response:
338
247
  return self._proxy_request_with_proxy_manager(
339
- "POST",
248
+ method,
340
249
  url,
341
- proxy_config=proxy_config,
342
- timeout=timeout,
250
+ proxy_config=proxy_config, # type: ignore
251
+ timeout=timeout, # type: ignore
343
252
  headers=kwargs.pop("headers", None),
344
253
  params=kwargs.pop("params", None),
345
254
  data=kwargs.pop("data", None),
@@ -356,38 +265,16 @@ class ThordataClient:
356
265
 
357
266
  def build_proxy_url(
358
267
  self,
359
- username: str, # Required
360
- password: str, # Required
268
+ username: str,
269
+ password: str,
361
270
  *,
362
- country: Optional[str] = None,
363
- state: Optional[str] = None,
364
- city: Optional[str] = None,
365
- session_id: Optional[str] = None,
366
- session_duration: Optional[int] = None,
367
- product: Union[ProxyProduct, str] = ProxyProduct.RESIDENTIAL,
271
+ country: str | None = None,
272
+ state: str | None = None,
273
+ city: str | None = None,
274
+ session_id: str | None = None,
275
+ session_duration: int | None = None,
276
+ product: ProxyProduct | str = ProxyProduct.RESIDENTIAL,
368
277
  ) -> str:
369
- """
370
- Build a proxy URL with custom targeting options.
371
-
372
- This is a convenience method for creating proxy URLs without
373
- manually constructing a ProxyConfig.
374
-
375
- Args:
376
- country: Target country code (e.g., 'us', 'gb').
377
- state: Target state (e.g., 'california').
378
- city: Target city (e.g., 'seattle').
379
- session_id: Session ID for sticky sessions.
380
- session_duration: Session duration in minutes (1-90).
381
- product: Proxy product type.
382
-
383
- Returns:
384
- The proxy URL string.
385
-
386
- Example:
387
- >>> url = client.build_proxy_url(country="us", city="seattle")
388
- >>> proxies = {"http": url, "https": url}
389
- >>> requests.get("https://example.com", proxies=proxies)
390
- """
391
278
  config = ProxyConfig(
392
279
  username=username,
393
280
  password=password,
@@ -403,19 +290,17 @@ class ThordataClient:
403
290
  return config.build_proxy_url()
404
291
 
405
292
  # =========================================================================
406
- # Internal API Request Retry Helper (For all API calls)
293
+ # Internal Request Helpers
407
294
  # =========================================================================
408
295
  def _api_request_with_retry(
409
296
  self,
410
297
  method: str,
411
298
  url: str,
412
299
  *,
413
- data: Optional[Dict[str, Any]] = None,
414
- headers: Optional[Dict[str, str]] = None,
415
- params: Optional[Dict[str, Any]] = None,
300
+ data: dict[str, Any] | None = None,
301
+ headers: dict[str, str] | None = None,
302
+ params: dict[str, Any] | None = None,
416
303
  ) -> requests.Response:
417
- """Make an API request with automatic retry on transient failures."""
418
-
419
304
  @with_retry(self._retry_config)
420
305
  def _do_request() -> requests.Response:
421
306
  return self._api_session.request(
@@ -438,64 +323,101 @@ class ThordataClient:
438
323
  f"API request failed: {e}", original_error=e
439
324
  ) from e
440
325
 
326
+ def _get_proxy_manager(self, proxy_url: str) -> urllib3.ProxyManager:
327
+ """Get or create a ProxyManager for the given proxy URL (Pooled)."""
328
+ if proxy_url not in self._proxy_managers:
329
+ # Create a new manager if not cached
330
+ proxy_ssl_context = None
331
+ if proxy_url.startswith("https://"):
332
+ proxy_ssl_context = ssl.create_default_context()
333
+
334
+ self._proxy_managers[proxy_url] = urllib3.ProxyManager(
335
+ proxy_url,
336
+ proxy_ssl_context=proxy_ssl_context,
337
+ num_pools=10, # Allow concurrency
338
+ maxsize=10,
339
+ )
340
+ return self._proxy_managers[proxy_url]
341
+
342
+ def _proxy_request_with_proxy_manager(
343
+ self,
344
+ method: str,
345
+ url: str,
346
+ *,
347
+ proxy_config: ProxyConfig,
348
+ timeout: int,
349
+ headers: dict[str, str] | None = None,
350
+ params: dict[str, Any] | None = None,
351
+ data: Any = None,
352
+ ) -> requests.Response:
353
+ # 1. Prepare URL and Body
354
+ req = requests.Request(method=method.upper(), url=url, params=params)
355
+ prepped = self._proxy_session.prepare_request(req)
356
+ final_url = prepped.url or url
357
+
358
+ # 2. Get Proxy Configuration
359
+ proxy_url = proxy_config.build_proxy_endpoint()
360
+ proxy_headers = urllib3.make_headers(
361
+ proxy_basic_auth=proxy_config.build_proxy_basic_auth()
362
+ )
363
+
364
+ # 3. Get Cached Proxy Manager
365
+ pm = self._get_proxy_manager(proxy_url)
366
+
367
+ # 4. Prepare Request Headers/Body
368
+ req_headers = dict(headers or {})
369
+ body = None
370
+ if data is not None:
371
+ if isinstance(data, dict):
372
+ body = urlencode({k: str(v) for k, v in data.items()})
373
+ req_headers.setdefault(
374
+ "Content-Type", "application/x-www-form-urlencoded"
375
+ )
376
+ else:
377
+ body = data
378
+
379
+ # 5. Execute Request via urllib3
380
+ http_resp = pm.request(
381
+ method.upper(),
382
+ final_url,
383
+ body=body,
384
+ headers=req_headers or None,
385
+ proxy_headers=proxy_headers, # Attach Auth here
386
+ timeout=urllib3.Timeout(connect=timeout, read=timeout),
387
+ retries=False, # We handle retries in _proxy_verb
388
+ preload_content=True,
389
+ )
390
+
391
+ # 6. Convert back to requests.Response
392
+ r = requests.Response()
393
+ r.status_code = int(getattr(http_resp, "status", 0) or 0)
394
+ r._content = http_resp.data or b""
395
+ r.url = final_url
396
+ r.headers = requests.structures.CaseInsensitiveDict(
397
+ dict(http_resp.headers or {})
398
+ )
399
+ return r
400
+
441
401
  # =========================================================================
442
- # SERP API Methods (Search Engine Results Page functions)
402
+ # SERP API Methods
443
403
  # =========================================================================
444
404
  def serp_search(
445
405
  self,
446
406
  query: str,
447
407
  *,
448
- engine: Union[Engine, str] = Engine.GOOGLE,
408
+ engine: Engine | str = Engine.GOOGLE,
449
409
  num: int = 10,
450
- country: Optional[str] = None,
451
- language: Optional[str] = None,
452
- search_type: Optional[str] = None,
453
- device: Optional[str] = None,
454
- render_js: Optional[bool] = None,
455
- no_cache: Optional[bool] = None,
410
+ country: str | None = None,
411
+ language: str | None = None,
412
+ search_type: str | None = None,
413
+ device: str | None = None,
414
+ render_js: bool | None = None,
415
+ no_cache: bool | None = None,
456
416
  output_format: str = "json",
457
417
  **kwargs: Any,
458
- ) -> Dict[str, Any]:
459
- """
460
- Execute a real-time SERP (Search Engine Results Page) search.
461
-
462
- Args:
463
- query: The search keywords.
464
- engine: Search engine (google, bing, yandex, duckduckgo, baidu).
465
- num: Number of results to retrieve (default: 10).
466
- country: Country code for localized results (e.g., 'us').
467
- language: Language code for interface (e.g., 'en').
468
- search_type: Type of search (images, news, shopping, videos, etc.).
469
- device: Device type ('desktop', 'mobile', 'tablet').
470
- render_js: Enable JavaScript rendering in SERP (render_js=True).
471
- no_cache: Disable internal caching (no_cache=True).
472
- output_format: 'json' to return parsed JSON (default),
473
- 'html' to return HTML wrapped in {'html': ...}.
474
- **kwargs: Additional engine-specific parameters.
475
-
476
- Returns:
477
- Dict[str, Any]: Parsed JSON results or a dict with 'html' key.
478
-
479
- Example:
480
- >>> # Basic search
481
- >>> results = client.serp_search("python tutorial")
482
- >>>
483
- >>> # With options
484
- >>> results = client.serp_search(
485
- ... "laptop reviews",
486
- ... engine="google",
487
- ... num=20,
488
- ... country="us",
489
- ... search_type="shopping",
490
- ... device="mobile",
491
- ... render_js=True,
492
- ... no_cache=True,
493
- ... )
494
- """
495
- # Normalize engine
418
+ ) -> dict[str, Any]:
496
419
  engine_str = engine.value if isinstance(engine, Engine) else engine.lower()
497
420
 
498
- # Build request using model
499
421
  request = SerpRequest(
500
422
  query=query,
501
423
  engine=engine_str,
@@ -510,84 +432,13 @@ class ThordataClient:
510
432
  extra_params=kwargs,
511
433
  )
512
434
 
513
- payload = request.to_payload()
514
- headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
515
-
516
- logger.info(
517
- f"SERP Search: {engine_str} - {query[:50]}{'...' if len(query) > 50 else ''}"
518
- )
519
-
520
- try:
521
- response = self._api_request_with_retry(
522
- "POST",
523
- self._serp_url,
524
- data=payload,
525
- headers=headers,
526
- )
527
- response.raise_for_status()
528
-
529
- # JSON mode (default)
530
- if output_format.lower() == "json":
531
- data = response.json()
532
-
533
- if isinstance(data, dict):
534
- code = data.get("code")
535
- if code is not None and code != 200:
536
- msg = extract_error_message(data)
537
- raise_for_code(
538
- f"SERP API Error: {msg}",
539
- code=code,
540
- payload=data,
541
- )
542
-
543
- return parse_json_response(data)
544
-
545
- # HTML mode: wrap as dict to keep return type stable
546
- return {"html": response.text}
547
-
548
- except requests.Timeout as e:
549
- raise ThordataTimeoutError(
550
- f"SERP request timed out: {e}",
551
- original_error=e,
552
- ) from e
553
- except requests.RequestException as e:
554
- raise ThordataNetworkError(
555
- f"SERP request failed: {e}",
556
- original_error=e,
557
- ) from e
435
+ return self.serp_search_advanced(request)
558
436
 
559
- def serp_search_advanced(self, request: SerpRequest) -> Dict[str, Any]:
560
- """
561
- Execute a SERP search using a SerpRequest object.
562
-
563
- This method provides full control over all search parameters.
564
-
565
- Args:
566
- request: A SerpRequest object with all parameters configured.
567
-
568
- Returns:
569
- Dict[str, Any]: Parsed JSON results or dict with 'html' key.
570
-
571
- Example:
572
- >>> from thordata.models import SerpRequest
573
- >>> request = SerpRequest(
574
- ... query="python programming",
575
- ... engine="google",
576
- ... num=50,
577
- ... country="us",
578
- ... language="en",
579
- ... search_type="news",
580
- ... time_filter="week",
581
- ... safe_search=True
582
- ... )
583
- >>> results = client.serp_search_advanced(request)
584
- """
437
+ def serp_search_advanced(self, request: SerpRequest) -> dict[str, Any]:
585
438
  payload = request.to_payload()
586
439
  headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
587
440
 
588
- logger.info(
589
- f"SERP Advanced Search: {request.engine} - {request.query[:50]}{'...' if len(request.query) > 50 else ''}"
590
- )
441
+ logger.info(f"SERP Advanced Search: {request.engine} - {request.query[:50]}")
591
442
 
592
443
  try:
593
444
  response = self._api_request_with_retry(
@@ -600,34 +451,22 @@ class ThordataClient:
600
451
 
601
452
  if request.output_format.lower() == "json":
602
453
  data = response.json()
603
-
604
454
  if isinstance(data, dict):
605
455
  code = data.get("code")
606
456
  if code is not None and code != 200:
607
457
  msg = extract_error_message(data)
608
- raise_for_code(
609
- f"SERP API Error: {msg}",
610
- code=code,
611
- payload=data,
612
- )
613
-
458
+ raise_for_code(f"SERP Error: {msg}", code=code, payload=data)
614
459
  return parse_json_response(data)
615
460
 
616
461
  return {"html": response.text}
617
462
 
618
463
  except requests.Timeout as e:
619
- raise ThordataTimeoutError(
620
- f"SERP request timed out: {e}",
621
- original_error=e,
622
- ) from e
464
+ raise ThordataTimeoutError(f"SERP timeout: {e}", original_error=e) from e
623
465
  except requests.RequestException as e:
624
- raise ThordataNetworkError(
625
- f"SERP request failed: {e}",
626
- original_error=e,
627
- ) from e
466
+ raise ThordataNetworkError(f"SERP failed: {e}", original_error=e) from e
628
467
 
629
468
  # =========================================================================
630
- # Universal Scraping API Methods (Web Unlocker functions)
469
+ # Universal Scraping API
631
470
  # =========================================================================
632
471
  def universal_scrape(
633
472
  self,
@@ -635,43 +474,12 @@ class ThordataClient:
635
474
  *,
636
475
  js_render: bool = False,
637
476
  output_format: str = "html",
638
- country: Optional[str] = None,
639
- block_resources: Optional[str] = None,
640
- wait: Optional[int] = None,
641
- wait_for: Optional[str] = None,
477
+ country: str | None = None,
478
+ block_resources: str | None = None,
479
+ wait: int | None = None,
480
+ wait_for: str | None = None,
642
481
  **kwargs: Any,
643
- ) -> Union[str, bytes]:
644
- """
645
- Scrape a URL using the Universal Scraping API (Web Unlocker).
646
-
647
- Automatically bypasses Cloudflare, CAPTCHAs, and antibot systems.
648
-
649
- Args:
650
- url: Target URL.
651
- js_render: Enable JavaScript rendering (headless browser).
652
- output_format: "html" or "png" (screenshot).
653
- country: Geo-targeting country code.
654
- block_resources: Resources to block (e.g., 'script,image').
655
- wait: Wait time in milliseconds after page load.
656
- wait_for: CSS selector to wait for.
657
- **kwargs: Additional parameters.
658
-
659
- Returns:
660
- HTML string or PNG bytes depending on output_format.
661
-
662
- Example:
663
- >>> # Get HTML
664
- >>> html = client.universal_scrape("https://example.com", js_render=True)
665
- >>>
666
- >>> # Get screenshot
667
- >>> png = client.universal_scrape(
668
- ... "https://example.com",
669
- ... js_render=True,
670
- ... output_format="png"
671
- ... )
672
- >>> with open("screenshot.png", "wb") as f:
673
- ... f.write(png)
674
- """
482
+ ) -> str | bytes:
675
483
  request = UniversalScrapeRequest(
676
484
  url=url,
677
485
  js_render=js_render,
@@ -682,27 +490,13 @@ class ThordataClient:
682
490
  wait_for=wait_for,
683
491
  extra_params=kwargs,
684
492
  )
685
-
686
493
  return self.universal_scrape_advanced(request)
687
494
 
688
- def universal_scrape_advanced(
689
- self, request: UniversalScrapeRequest
690
- ) -> Union[str, bytes]:
691
- """
692
- Scrape using a UniversalScrapeRequest object for full control.
693
-
694
- Args:
695
- request: A UniversalScrapeRequest with all parameters.
696
-
697
- Returns:
698
- HTML string or PNG bytes.
699
- """
495
+ def universal_scrape_advanced(self, request: UniversalScrapeRequest) -> str | bytes:
700
496
  payload = request.to_payload()
701
497
  headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
702
498
 
703
- logger.info(
704
- f"Universal Scrape: {request.url} (format: {request.output_format})"
705
- )
499
+ logger.info(f"Universal Scrape: {request.url}")
706
500
 
707
501
  try:
708
502
  response = self._api_request_with_retry(
@@ -712,85 +506,49 @@ class ThordataClient:
712
506
  headers=headers,
713
507
  )
714
508
  response.raise_for_status()
715
-
716
509
  return self._process_universal_response(response, request.output_format)
717
510
 
718
511
  except requests.Timeout as e:
719
512
  raise ThordataTimeoutError(
720
- f"Universal scrape timed out: {e}", original_error=e
513
+ f"Universal timeout: {e}", original_error=e
721
514
  ) from e
722
515
  except requests.RequestException as e:
723
516
  raise ThordataNetworkError(
724
- f"Universal scrape failed: {e}", original_error=e
517
+ f"Universal failed: {e}", original_error=e
725
518
  ) from e
726
519
 
727
520
  def _process_universal_response(
728
521
  self, response: requests.Response, output_format: str
729
- ) -> Union[str, bytes]:
730
- """Process the response from Universal API."""
731
- # Try to parse as JSON
522
+ ) -> str | bytes:
732
523
  try:
733
524
  resp_json = response.json()
734
525
  except ValueError:
735
- # Raw content returned
736
- if output_format.lower() == "png":
737
- return response.content
738
- return response.text
526
+ return response.content if output_format.lower() == "png" else response.text
739
527
 
740
- # Check for API-level errors
741
528
  if isinstance(resp_json, dict):
742
529
  code = resp_json.get("code")
743
530
  if code is not None and code != 200:
744
531
  msg = extract_error_message(resp_json)
745
- raise_for_code(
746
- f"Universal API Error: {msg}", code=code, payload=resp_json
747
- )
532
+ raise_for_code(f"Universal Error: {msg}", code=code, payload=resp_json)
748
533
 
749
- # Extract HTML
750
534
  if "html" in resp_json:
751
535
  return resp_json["html"]
752
-
753
- # Extract PNG
754
536
  if "png" in resp_json:
755
537
  return decode_base64_image(resp_json["png"])
756
538
 
757
- # Fallback
758
539
  return str(resp_json)
759
540
 
760
541
  # =========================================================================
761
- # Web Scraper API Methods (Only async task management functions)
542
+ # Web Scraper API (Tasks)
762
543
  # =========================================================================
763
544
  def create_scraper_task(
764
545
  self,
765
546
  file_name: str,
766
547
  spider_id: str,
767
548
  spider_name: str,
768
- parameters: Dict[str, Any],
769
- universal_params: Optional[Dict[str, Any]] = None,
549
+ parameters: dict[str, Any],
550
+ universal_params: dict[str, Any] | None = None,
770
551
  ) -> str:
771
- """
772
- Create an asynchronous Web Scraper task.
773
-
774
- Note: Get spider_id and spider_name from the Thordata Dashboard.
775
-
776
- Args:
777
- file_name: Name for the output file.
778
- spider_id: Spider identifier from Dashboard.
779
- spider_name: Spider name (e.g., "youtube.com").
780
- parameters: Spider-specific parameters.
781
- universal_params: Global spider settings.
782
-
783
- Returns:
784
- The created task_id.
785
-
786
- Example:
787
- >>> task_id = client.create_scraper_task(
788
- ... file_name="youtube_data",
789
- ... spider_id="youtube_video-post_by-url",
790
- ... spider_name="youtube.com",
791
- ... parameters={"url": "https://youtube.com/@channel/videos"}
792
- ... )
793
- """
794
552
  config = ScraperTaskConfig(
795
553
  file_name=file_name,
796
554
  spider_id=spider_id,
@@ -798,50 +556,26 @@ class ThordataClient:
798
556
  parameters=parameters,
799
557
  universal_params=universal_params,
800
558
  )
801
-
802
559
  return self.create_scraper_task_advanced(config)
803
560
 
804
561
  def create_scraper_task_advanced(self, config: ScraperTaskConfig) -> str:
805
- """
806
- Create a scraper task using a ScraperTaskConfig object.
807
-
808
- Args:
809
- config: Task configuration.
810
-
811
- Returns:
812
- The created task_id.
813
- """
814
562
  self._require_public_credentials()
815
-
816
563
  payload = config.to_payload()
817
-
818
- # Builder needs 3 headers: token, key, Authorization Bearer
819
564
  headers = build_builder_headers(
820
- self.scraper_token,
821
- self.public_token or "",
822
- self.public_key or "",
565
+ self.scraper_token, self.public_token or "", self.public_key or ""
823
566
  )
824
567
 
825
- logger.info(f"Creating Scraper Task: {config.spider_name}")
826
-
827
568
  try:
828
569
  response = self._api_request_with_retry(
829
- "POST",
830
- self._builder_url,
831
- data=payload,
832
- headers=headers,
570
+ "POST", self._builder_url, data=payload, headers=headers
833
571
  )
834
572
  response.raise_for_status()
835
-
836
573
  data = response.json()
837
- code = data.get("code")
838
-
839
- if code != 200:
840
- msg = extract_error_message(data)
841
- raise_for_code(f"Task creation failed: {msg}", code=code, payload=data)
842
-
574
+ if data.get("code") != 200:
575
+ raise_for_code(
576
+ "Task creation failed", code=data.get("code"), payload=data
577
+ )
843
578
  return data["data"]["task_id"]
844
-
845
579
  except requests.RequestException as e:
846
580
  raise ThordataNetworkError(
847
581
  f"Task creation failed: {e}", original_error=e
@@ -852,38 +586,9 @@ class ThordataClient:
852
586
  file_name: str,
853
587
  spider_id: str,
854
588
  spider_name: str,
855
- parameters: Dict[str, Any],
856
- common_settings: "CommonSettings",
589
+ parameters: dict[str, Any],
590
+ common_settings: CommonSettings,
857
591
  ) -> str:
858
- """
859
- Create a YouTube video/audio download task.
860
-
861
- Uses the /video_builder endpoint.
862
-
863
- Args:
864
- file_name: Output file name. Supports {{TasksID}}, {{VideoID}}.
865
- spider_id: Spider identifier (e.g., "youtube_video_by-url").
866
- spider_name: Spider name (typically "youtube.com").
867
- parameters: Spider parameters (e.g., {"url": "..."}).
868
- common_settings: Video/audio settings.
869
-
870
- Returns:
871
- The created task_id.
872
-
873
- Example:
874
- >>> from thordata import CommonSettings
875
- >>> task_id = client.create_video_task(
876
- ... file_name="{{VideoID}}",
877
- ... spider_id="youtube_video_by-url",
878
- ... spider_name="youtube.com",
879
- ... parameters={"url": "https://youtube.com/watch?v=xxx"},
880
- ... common_settings=CommonSettings(
881
- ... resolution="1080p",
882
- ... is_subtitles="true"
883
- ... )
884
- ... )
885
- """
886
-
887
592
  config = VideoTaskConfig(
888
593
  file_name=file_name,
889
594
  spider_id=spider_id,
@@ -891,210 +596,97 @@ class ThordataClient:
891
596
  parameters=parameters,
892
597
  common_settings=common_settings,
893
598
  )
894
-
895
599
  return self.create_video_task_advanced(config)
896
600
 
897
601
  def create_video_task_advanced(self, config: VideoTaskConfig) -> str:
898
- """
899
- Create a video task using VideoTaskConfig object.
900
-
901
- Args:
902
- config: Video task configuration.
903
-
904
- Returns:
905
- The created task_id.
906
- """
907
-
908
602
  self._require_public_credentials()
909
-
910
603
  payload = config.to_payload()
911
604
  headers = build_builder_headers(
912
- self.scraper_token,
913
- self.public_token or "",
914
- self.public_key or "",
605
+ self.scraper_token, self.public_token or "", self.public_key or ""
915
606
  )
916
607
 
917
- logger.info(f"Creating Video Task: {config.spider_name} - {config.spider_id}")
918
-
919
608
  response = self._api_request_with_retry(
920
- "POST",
921
- self._video_builder_url,
922
- data=payload,
923
- headers=headers,
609
+ "POST", self._video_builder_url, data=payload, headers=headers
924
610
  )
925
611
  response.raise_for_status()
926
-
927
612
  data = response.json()
928
- code = data.get("code")
929
-
930
- if code != 200:
931
- msg = extract_error_message(data)
613
+ if data.get("code") != 200:
932
614
  raise_for_code(
933
- f"Video task creation failed: {msg}", code=code, payload=data
615
+ "Video task creation failed", code=data.get("code"), payload=data
934
616
  )
935
-
936
617
  return data["data"]["task_id"]
937
618
 
938
619
  def get_task_status(self, task_id: str) -> str:
939
- """
940
- Check the status of an asynchronous scraping task.
941
-
942
- Returns:
943
- Status string (e.g., "running", "ready", "failed").
944
-
945
- Raises:
946
- ThordataConfigError: If public credentials are missing.
947
- ThordataAPIError: If API returns a non-200 code in JSON payload.
948
- ThordataNetworkError: If network/HTTP request fails.
949
- """
950
620
  self._require_public_credentials()
951
-
952
621
  headers = build_public_api_headers(
953
622
  self.public_token or "", self.public_key or ""
954
623
  )
955
- payload = {"tasks_ids": task_id}
956
-
957
624
  try:
958
625
  response = self._api_request_with_retry(
959
626
  "POST",
960
627
  self._status_url,
961
- data=payload,
628
+ data={"tasks_ids": task_id},
962
629
  headers=headers,
963
630
  )
964
631
  response.raise_for_status()
965
632
  data = response.json()
966
-
967
- if isinstance(data, dict):
968
- code = data.get("code")
969
- if code is not None and code != 200:
970
- msg = extract_error_message(data)
971
- raise_for_code(
972
- f"Task status API Error: {msg}",
973
- code=code,
974
- payload=data,
975
- )
976
-
977
- items = data.get("data") or []
978
- for item in items:
979
- if str(item.get("task_id")) == str(task_id):
980
- return item.get("status", "unknown")
981
-
982
- return "unknown"
983
-
984
- # Unexpected payload type
985
- raise ThordataNetworkError(
986
- f"Unexpected task status response type: {type(data).__name__}",
987
- original_error=None,
988
- )
989
-
990
- except requests.Timeout as e:
991
- raise ThordataTimeoutError(
992
- f"Status check timed out: {e}", original_error=e
993
- ) from e
633
+ if data.get("code") != 200:
634
+ raise_for_code("Task status error", code=data.get("code"), payload=data)
635
+
636
+ items = data.get("data") or []
637
+ for item in items:
638
+ if str(item.get("task_id")) == str(task_id):
639
+ return item.get("status", "unknown")
640
+ return "unknown"
994
641
  except requests.RequestException as e:
995
642
  raise ThordataNetworkError(
996
643
  f"Status check failed: {e}", original_error=e
997
644
  ) from e
998
645
 
999
646
  def safe_get_task_status(self, task_id: str) -> str:
1000
- """
1001
- Backward-compatible status check.
1002
-
1003
- Returns:
1004
- Status string, or "error" on any exception.
1005
- """
1006
647
  try:
1007
648
  return self.get_task_status(task_id)
1008
649
  except Exception:
1009
650
  return "error"
1010
651
 
1011
652
  def get_task_result(self, task_id: str, file_type: str = "json") -> str:
1012
- """
1013
- Get the download URL for a completed task.
1014
- """
1015
653
  self._require_public_credentials()
1016
-
1017
654
  headers = build_public_api_headers(
1018
655
  self.public_token or "", self.public_key or ""
1019
656
  )
1020
- payload = {"tasks_id": task_id, "type": file_type}
1021
-
1022
- logger.info(f"Getting result URL for Task: {task_id}")
1023
-
1024
657
  try:
1025
658
  response = self._api_request_with_retry(
1026
659
  "POST",
1027
660
  self._download_url,
1028
- data=payload,
661
+ data={"tasks_id": task_id, "type": file_type},
1029
662
  headers=headers,
1030
663
  )
1031
664
  response.raise_for_status()
1032
-
1033
665
  data = response.json()
1034
- code = data.get("code")
1035
-
1036
- if code == 200 and data.get("data"):
666
+ if data.get("code") == 200 and data.get("data"):
1037
667
  return data["data"]["download"]
1038
-
1039
- msg = extract_error_message(data)
1040
- raise_for_code(f"Get result failed: {msg}", code=code, payload=data)
1041
- # This line won't be reached, but satisfies mypy
1042
- raise RuntimeError("Unexpected state")
1043
-
668
+ raise_for_code("Get result failed", code=data.get("code"), payload=data)
669
+ return ""
1044
670
  except requests.RequestException as e:
1045
671
  raise ThordataNetworkError(
1046
672
  f"Get result failed: {e}", original_error=e
1047
673
  ) from e
1048
674
 
1049
- def list_tasks(
1050
- self,
1051
- page: int = 1,
1052
- size: int = 20,
1053
- ) -> Dict[str, Any]:
1054
- """
1055
- List all Web Scraper tasks.
1056
-
1057
- Args:
1058
- page: Page number (starts from 1).
1059
- size: Number of tasks per page.
1060
-
1061
- Returns:
1062
- Dict containing 'count' and 'list' of tasks.
1063
-
1064
- Example:
1065
- >>> result = client.list_tasks(page=1, size=10)
1066
- >>> print(f"Total tasks: {result['count']}")
1067
- >>> for task in result['list']:
1068
- ... print(f"Task {task['task_id']}: {task['status']}")
1069
- """
675
+ def list_tasks(self, page: int = 1, size: int = 20) -> dict[str, Any]:
1070
676
  self._require_public_credentials()
1071
-
1072
677
  headers = build_public_api_headers(
1073
678
  self.public_token or "", self.public_key or ""
1074
679
  )
1075
- payload: Dict[str, Any] = {}
1076
- if page:
1077
- payload["page"] = str(page)
1078
- if size:
1079
- payload["size"] = str(size)
1080
-
1081
- logger.info(f"Listing tasks: page={page}, size={size}")
1082
-
1083
680
  response = self._api_request_with_retry(
1084
681
  "POST",
1085
682
  self._list_url,
1086
- data=payload,
683
+ data={"page": str(page), "size": str(size)},
1087
684
  headers=headers,
1088
685
  )
1089
686
  response.raise_for_status()
1090
-
1091
687
  data = response.json()
1092
- code = data.get("code")
1093
-
1094
- if code != 200:
1095
- msg = extract_error_message(data)
1096
- raise_for_code(f"List tasks failed: {msg}", code=code, payload=data)
1097
-
688
+ if data.get("code") != 200:
689
+ raise_for_code("List tasks failed", code=data.get("code"), payload=data)
1098
690
  return data.get("data", {"count": 0, "list": []})
1099
691
 
1100
692
  def wait_for_task(
@@ -1104,84 +696,32 @@ class ThordataClient:
1104
696
  poll_interval: float = 5.0,
1105
697
  max_wait: float = 600.0,
1106
698
  ) -> str:
1107
- """
1108
- Wait for a task to complete.
1109
-
1110
- Args:
1111
- task_id: The task ID to wait for.
1112
- poll_interval: Seconds between status checks.
1113
- max_wait: Maximum seconds to wait.
1114
-
1115
- Returns:
1116
- Final task status.
1117
-
1118
- Raises:
1119
- TimeoutError: If max_wait is exceeded.
1120
-
1121
- Example:
1122
- >>> task_id = client.create_scraper_task(...)
1123
- >>> status = client.wait_for_task(task_id, max_wait=300)
1124
- >>> if status in ("ready", "success"):
1125
- ... url = client.get_task_result(task_id)
1126
- """
1127
699
  import time
1128
700
 
1129
701
  start = time.monotonic()
1130
-
1131
702
  while (time.monotonic() - start) < max_wait:
1132
703
  status = self.get_task_status(task_id)
1133
-
1134
- logger.debug(f"Task {task_id} status: {status}")
1135
-
1136
- terminal_statuses = {
704
+ if status.lower() in {
1137
705
  "ready",
1138
706
  "success",
1139
707
  "finished",
1140
708
  "failed",
1141
709
  "error",
1142
710
  "cancelled",
1143
- }
1144
-
1145
- if status.lower() in terminal_statuses:
711
+ }:
1146
712
  return status
1147
-
1148
713
  time.sleep(poll_interval)
1149
-
1150
- raise TimeoutError(f"Task {task_id} did not complete within {max_wait} seconds")
714
+ raise TimeoutError(f"Task {task_id} timeout")
1151
715
 
1152
716
  # =========================================================================
1153
- # Proxy Account Management Methods (Proxy balance, user, whitelist functions)
717
+ # Account / Locations / Utils
1154
718
  # =========================================================================
1155
719
  def get_usage_statistics(
1156
720
  self,
1157
- from_date: Union[str, date],
1158
- to_date: Union[str, date],
721
+ from_date: str | date,
722
+ to_date: str | date,
1159
723
  ) -> UsageStatistics:
1160
- """
1161
- Get account usage statistics for a date range.
1162
-
1163
- Args:
1164
- from_date: Start date (YYYY-MM-DD string or date object).
1165
- to_date: End date (YYYY-MM-DD string or date object).
1166
-
1167
- Returns:
1168
- UsageStatistics object with traffic data.
1169
-
1170
- Raises:
1171
- ValueError: If date range exceeds 180 days.
1172
-
1173
- Example:
1174
- >>> from datetime import date, timedelta
1175
- >>> today = date.today()
1176
- >>> week_ago = today - timedelta(days=7)
1177
- >>> stats = client.get_usage_statistics(week_ago, today)
1178
- >>> print(f"Used: {stats.range_usage_gb():.2f} GB")
1179
- >>> print(f"Balance: {stats.balance_gb():.2f} GB")
1180
- """
1181
-
1182
724
  self._require_public_credentials()
1183
-
1184
- # Convert dates to strings
1185
725
  if isinstance(from_date, date):
1186
726
  from_date = from_date.strftime("%Y-%m-%d")
1187
727
  if isinstance(to_date, date):
@@ -1193,199 +733,54 @@ class ThordataClient:
1193
733
  "from_date": from_date,
1194
734
  "to_date": to_date,
1195
735
  }
1196
-
1197
- logger.info(f"Getting usage statistics: {from_date} to {to_date}")
1198
-
1199
- response = self._api_request_with_retry(
1200
- "GET",
1201
- self._usage_stats_url,
1202
- params=params,
1203
- )
1204
- response.raise_for_status()
1205
-
1206
- data = response.json()
1207
-
1208
- if isinstance(data, dict):
1209
- code = data.get("code")
1210
- if code is not None and code != 200:
1211
- msg = extract_error_message(data)
1212
- raise_for_code(
1213
- f"Usage statistics error: {msg}",
1214
- code=code,
1215
- payload=data,
1216
- )
1217
-
1218
- # Extract data field
1219
- usage_data = data.get("data", data)
1220
- return UsageStatistics.from_dict(usage_data)
1221
-
1222
- raise ThordataNetworkError(
1223
- f"Unexpected usage statistics response: {type(data).__name__}",
1224
- original_error=None,
1225
- )
1226
-
1227
- def get_residential_balance(self) -> Dict[str, Any]:
1228
- """
1229
- Get residential proxy balance.
1230
-
1231
- Uses public_token/public_key (Dashboard -> My account -> API).
1232
- """
1233
- headers = self._build_gateway_headers()
1234
-
1235
- logger.info("Getting residential proxy balance")
1236
-
1237
736
  response = self._api_request_with_retry(
1238
- "POST",
1239
- f"{self._gateway_base_url}/getFlowBalance",
1240
- headers=headers,
1241
- data={},
737
+ "GET", self._usage_stats_url, params=params
1242
738
  )
1243
739
  response.raise_for_status()
1244
-
1245
740
  data = response.json()
1246
- code = data.get("code")
1247
-
1248
- if code != 200:
1249
- msg = extract_error_message(data)
1250
- raise_for_code(f"Get balance failed: {msg}", code=code, payload=data)
1251
-
1252
- return data.get("data", {})
1253
-
1254
- def get_residential_usage(
1255
- self,
1256
- start_time: Union[str, int],
1257
- end_time: Union[str, int],
1258
- ) -> Dict[str, Any]:
1259
- """
1260
- Get residential proxy usage records.
1261
-
1262
- Uses public_token/public_key (Dashboard -> My account -> API).
1263
- """
1264
- headers = self._build_gateway_headers()
1265
- payload = {"start_time": str(start_time), "end_time": str(end_time)}
1266
-
1267
- logger.info(f"Getting residential usage: {start_time} to {end_time}")
1268
-
1269
- response = self._api_request_with_retry(
1270
- "POST",
1271
- f"{self._gateway_base_url}/usageRecord",
1272
- headers=headers,
1273
- data=payload,
1274
- )
1275
- response.raise_for_status()
1276
-
1277
- data = response.json()
1278
- code = data.get("code")
1279
-
1280
- if code != 200:
1281
- msg = extract_error_message(data)
1282
- raise_for_code(f"Get usage failed: {msg}", code=code, payload=data)
1283
-
1284
- return data.get("data", {})
741
+ if data.get("code") != 200:
742
+ raise_for_code("Usage stats error", code=data.get("code"), payload=data)
743
+ return UsageStatistics.from_dict(data.get("data", data))
1285
744
 
1286
745
  def list_proxy_users(
1287
- self, proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL
746
+ self, proxy_type: ProxyType | int = ProxyType.RESIDENTIAL
1288
747
  ) -> ProxyUserList:
1289
- """
1290
- List all proxy users (sub-accounts).
1291
-
1292
- Args:
1293
- proxy_type: Proxy type (1=Residential, 2=Unlimited).
1294
-
1295
- Returns:
1296
- ProxyUserList with user details.
1297
-
1298
- Example:
1299
- >>> users = client.list_proxy_users(proxy_type=ProxyType.RESIDENTIAL)
1300
- >>> print(f"Total users: {users.user_count}")
1301
- >>> for user in users.users:
1302
- ... print(f"{user.username}: {user.usage_gb():.2f} GB used")
1303
- """
1304
-
1305
748
  self._require_public_credentials()
1306
-
749
+ pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1307
750
  params = {
1308
751
  "token": self.public_token,
1309
752
  "key": self.public_key,
1310
- "proxy_type": str(
1311
- int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1312
- ),
753
+ "proxy_type": str(pt),
1313
754
  }
1314
-
1315
- logger.info(f"Listing proxy users: type={params['proxy_type']}")
1316
-
1317
755
  response = self._api_request_with_retry(
1318
- "GET",
1319
- f"{self._proxy_users_url}/user-list",
1320
- params=params,
756
+ "GET", f"{self._proxy_users_url}/user-list", params=params
1321
757
  )
1322
758
  response.raise_for_status()
1323
-
1324
759
  data = response.json()
1325
-
1326
- if isinstance(data, dict):
1327
- code = data.get("code")
1328
- if code is not None and code != 200:
1329
- msg = extract_error_message(data)
1330
- raise_for_code(
1331
- f"List proxy users error: {msg}", code=code, payload=data
1332
- )
1333
-
1334
- user_data = data.get("data", data)
1335
- return ProxyUserList.from_dict(user_data)
1336
-
1337
- raise ThordataNetworkError(
1338
- f"Unexpected proxy users response: {type(data).__name__}",
1339
- original_error=None,
1340
- )
760
+ if data.get("code") != 200:
761
+ raise_for_code("List users error", code=data.get("code"), payload=data)
762
+ return ProxyUserList.from_dict(data.get("data", data))
1341
763
 
1342
764
  def create_proxy_user(
1343
765
  self,
1344
766
  username: str,
1345
767
  password: str,
1346
- proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
768
+ proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
1347
769
  traffic_limit: int = 0,
1348
770
  status: bool = True,
1349
- ) -> Dict[str, Any]:
1350
- """
1351
- Create a new proxy user (sub-account).
1352
-
1353
- Args:
1354
- username: Username for the new user.
1355
- password: Password for the new user.
1356
- proxy_type: Proxy type (1=Residential, 2=Unlimited).
1357
- traffic_limit: Traffic limit in MB (0 = unlimited, min 100).
1358
- status: Enable/disable user (True/False).
1359
-
1360
- Returns:
1361
- API response data.
1362
-
1363
- Example:
1364
- >>> result = client.create_proxy_user(
1365
- ... username="subuser1",
1366
- ... password="securepass",
1367
- ... traffic_limit=5120, # 5GB
1368
- ... status=True
1369
- ... )
1370
- """
771
+ ) -> dict[str, Any]:
1371
772
  self._require_public_credentials()
1372
-
773
+ pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1373
774
  headers = build_public_api_headers(
1374
775
  self.public_token or "", self.public_key or ""
1375
776
  )
1376
-
1377
777
  payload = {
1378
- "proxy_type": str(
1379
- int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1380
- ),
778
+ "proxy_type": str(pt),
1381
779
  "username": username,
1382
780
  "password": password,
1383
781
  "traffic_limit": str(traffic_limit),
1384
782
  "status": "true" if status else "false",
1385
783
  }
1386
-
1387
- logger.info(f"Creating proxy user: {username}")
1388
-
1389
784
  response = self._api_request_with_retry(
1390
785
  "POST",
1391
786
  f"{self._proxy_users_url}/create-user",
@@ -1393,428 +788,145 @@ class ThordataClient:
1393
788
  headers=headers,
1394
789
  )
1395
790
  response.raise_for_status()
1396
-
1397
791
  data = response.json()
1398
- code = data.get("code")
1399
-
1400
- if code != 200:
1401
- msg = extract_error_message(data)
1402
- raise_for_code(f"Create proxy user failed: {msg}", code=code, payload=data)
1403
-
792
+ if data.get("code") != 200:
793
+ raise_for_code("Create user failed", code=data.get("code"), payload=data)
1404
794
  return data.get("data", {})
1405
795
 
1406
796
  def add_whitelist_ip(
1407
797
  self,
1408
798
  ip: str,
1409
- proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
799
+ proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
1410
800
  status: bool = True,
1411
- ) -> Dict[str, Any]:
1412
- """
1413
- Add an IP to the whitelist for IP authentication.
1414
-
1415
- Args:
1416
- ip: IP address to whitelist.
1417
- proxy_type: Proxy type (1=Residential, 2=Unlimited, 9=Mobile).
1418
- status: Enable/disable the IP (True/False).
1419
-
1420
- Returns:
1421
- API response data.
1422
-
1423
- Example:
1424
- >>> result = client.add_whitelist_ip(
1425
- ... ip="123.45.67.89",
1426
- ... proxy_type=ProxyType.RESIDENTIAL,
1427
- ... status=True
1428
- ... )
1429
- """
801
+ ) -> dict[str, Any]:
1430
802
  self._require_public_credentials()
1431
-
803
+ pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1432
804
  headers = build_public_api_headers(
1433
805
  self.public_token or "", self.public_key or ""
1434
806
  )
1435
-
1436
- # Convert ProxyType to int
1437
- proxy_type_int = (
1438
- int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1439
- )
1440
-
1441
807
  payload = {
1442
- "proxy_type": str(proxy_type_int),
808
+ "proxy_type": str(pt),
1443
809
  "ip": ip,
1444
810
  "status": "true" if status else "false",
1445
811
  }
1446
-
1447
- logger.info(f"Adding whitelist IP: {ip}")
1448
-
1449
812
  response = self._api_request_with_retry(
1450
- "POST",
1451
- f"{self._whitelist_url}/add-ip",
1452
- data=payload,
1453
- headers=headers,
813
+ "POST", f"{self._whitelist_url}/add-ip", data=payload, headers=headers
1454
814
  )
1455
815
  response.raise_for_status()
1456
-
1457
816
  data = response.json()
1458
- code = data.get("code")
1459
-
1460
- if code != 200:
1461
- msg = extract_error_message(data)
1462
- raise_for_code(f"Add whitelist IP failed: {msg}", code=code, payload=data)
1463
-
817
+ if data.get("code") != 200:
818
+ raise_for_code(
819
+ "Add whitelist IP failed", code=data.get("code"), payload=data
820
+ )
1464
821
  return data.get("data", {})
1465
822
 
1466
- def list_proxy_servers(
1467
- self,
1468
- proxy_type: int,
1469
- ) -> List[ProxyServer]:
1470
- """
1471
- List ISP or Datacenter proxy servers.
1472
-
1473
- Args:
1474
- proxy_type: Proxy type (1=ISP, 2=Datacenter).
1475
-
1476
- Returns:
1477
- List of ProxyServer objects.
1478
-
1479
- Example:
1480
- >>> servers = client.list_proxy_servers(proxy_type=1) # ISP proxies
1481
- >>> for server in servers:
1482
- ... print(f"{server.ip}:{server.port} - expires: {server.expiration_time}")
1483
- """
1484
-
823
+ def list_proxy_servers(self, proxy_type: int) -> list[ProxyServer]:
1485
824
  self._require_public_credentials()
1486
-
1487
825
  params = {
1488
826
  "token": self.public_token,
1489
827
  "key": self.public_key,
1490
828
  "proxy_type": str(proxy_type),
1491
829
  }
1492
-
1493
- logger.info(f"Listing proxy servers: type={proxy_type}")
1494
-
1495
830
  response = self._api_request_with_retry(
1496
- "GET",
1497
- self._proxy_list_url,
1498
- params=params,
831
+ "GET", self._proxy_list_url, params=params
1499
832
  )
1500
833
  response.raise_for_status()
1501
-
1502
834
  data = response.json()
835
+ if data.get("code") != 200:
836
+ raise_for_code(
837
+ "List proxy servers error", code=data.get("code"), payload=data
838
+ )
1503
839
 
840
+ server_list = []
1504
841
  if isinstance(data, dict):
1505
- code = data.get("code")
1506
- if code is not None and code != 200:
1507
- msg = extract_error_message(data)
1508
- raise_for_code(
1509
- f"List proxy servers error: {msg}", code=code, payload=data
1510
- )
1511
-
1512
- # Extract list from data field
1513
842
  server_list = data.get("data", data.get("list", []))
1514
843
  elif isinstance(data, list):
1515
844
  server_list = data
1516
- else:
1517
- raise ThordataNetworkError(
1518
- f"Unexpected proxy list response: {type(data).__name__}",
1519
- original_error=None,
1520
- )
1521
845
 
1522
846
  return [ProxyServer.from_dict(s) for s in server_list]
1523
847
 
1524
- def get_isp_regions(self) -> List[Dict[str, Any]]:
1525
- """
1526
- Get available ISP proxy regions.
1527
-
1528
- Uses public_token/public_key (Dashboard -> My account -> API).
1529
- """
1530
- headers = self._build_gateway_headers()
1531
-
1532
- logger.info("Getting ISP regions")
1533
-
1534
- response = self._api_request_with_retry(
1535
- "POST",
1536
- f"{self._gateway_base_url}/getRegionIsp",
1537
- headers=headers,
1538
- data={},
1539
- )
1540
- response.raise_for_status()
1541
-
1542
- data = response.json()
1543
- code = data.get("code")
1544
-
1545
- if code != 200:
1546
- msg = extract_error_message(data)
1547
- raise_for_code(f"Get ISP regions failed: {msg}", code=code, payload=data)
1548
-
1549
- return data.get("data", [])
1550
-
1551
- def list_isp_proxies(self) -> List[Dict[str, Any]]:
1552
- """
1553
- List ISP proxies.
1554
-
1555
- Uses public_token/public_key (Dashboard -> My account -> API).
1556
- """
1557
- headers = self._build_gateway_headers()
1558
-
1559
- logger.info("Listing ISP proxies")
1560
-
1561
- response = self._api_request_with_retry(
1562
- "POST",
1563
- f"{self._gateway_base_url}/queryListIsp",
1564
- headers=headers,
1565
- data={},
1566
- )
1567
- response.raise_for_status()
1568
-
1569
- data = response.json()
1570
- code = data.get("code")
1571
-
1572
- if code != 200:
1573
- msg = extract_error_message(data)
1574
- raise_for_code(f"List ISP proxies failed: {msg}", code=code, payload=data)
1575
-
1576
- return data.get("data", [])
1577
-
1578
- def get_wallet_balance(self) -> Dict[str, Any]:
1579
- """
1580
- Get wallet balance for ISP proxies.
1581
-
1582
- Uses public_token/public_key (Dashboard -> My account -> API).
1583
- """
1584
- headers = self._build_gateway_headers()
1585
-
1586
- logger.info("Getting wallet balance")
1587
-
1588
- response = self._api_request_with_retry(
1589
- "POST",
1590
- f"{self._gateway_base_url}/getBalance",
1591
- headers=headers,
1592
- data={},
1593
- )
1594
- response.raise_for_status()
1595
-
1596
- data = response.json()
1597
- code = data.get("code")
1598
-
1599
- if code != 200:
1600
- msg = extract_error_message(data)
1601
- raise_for_code(f"Get wallet balance failed: {msg}", code=code, payload=data)
1602
-
1603
- return data.get("data", {})
1604
-
1605
848
  def get_proxy_expiration(
1606
- self,
1607
- ips: Union[str, List[str]],
1608
- proxy_type: int,
1609
- ) -> Dict[str, Any]:
1610
- """
1611
- Get expiration time for specific proxy IPs.
1612
-
1613
- Args:
1614
- ips: Single IP or list of IPs to check.
1615
- proxy_type: Proxy type (1=ISP, 2=Datacenter).
1616
-
1617
- Returns:
1618
- Dict with expiration information.
1619
-
1620
- Example:
1621
- >>> result = client.get_proxy_expiration("123.45.67.89", proxy_type=1)
1622
- >>> print(result)
1623
- """
849
+ self, ips: str | list[str], proxy_type: int
850
+ ) -> dict[str, Any]:
1624
851
  self._require_public_credentials()
1625
-
1626
- # Convert list to comma-separated string
1627
852
  if isinstance(ips, list):
1628
853
  ips = ",".join(ips)
1629
-
1630
854
  params = {
1631
855
  "token": self.public_token,
1632
856
  "key": self.public_key,
1633
857
  "proxy_type": str(proxy_type),
1634
858
  "ips": ips,
1635
859
  }
1636
-
1637
- logger.info(f"Getting proxy expiration: {ips}")
1638
-
1639
860
  response = self._api_request_with_retry(
1640
- "GET",
1641
- self._proxy_expiration_url,
1642
- params=params,
861
+ "GET", self._proxy_expiration_url, params=params
1643
862
  )
1644
863
  response.raise_for_status()
1645
-
1646
864
  data = response.json()
865
+ if data.get("code") != 200:
866
+ raise_for_code("Get expiration error", code=data.get("code"), payload=data)
867
+ return data.get("data", data)
1647
868
 
1648
- if isinstance(data, dict):
1649
- code = data.get("code")
1650
- if code is not None and code != 200:
1651
- msg = extract_error_message(data)
1652
- raise_for_code(f"Get expiration error: {msg}", code=code, payload=data)
1653
-
1654
- return data.get("data", data)
1655
-
1656
- return data
1657
-
1658
- # =========================================================================
1659
- # Location API Methods (Country/State/City/ASN functions)
1660
- # =========================================================================
1661
869
  def list_countries(
1662
- self, proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL
1663
- ) -> List[Dict[str, Any]]:
1664
- """
1665
- List supported countries for proxies.
1666
-
1667
- Args:
1668
- proxy_type: 1 for residential, 2 for unlimited.
1669
-
1670
- Returns:
1671
- List of country records with 'country_code' and 'country_name'.
1672
- """
1673
- return self._get_locations(
1674
- "countries",
1675
- proxy_type=(
1676
- int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1677
- ),
1678
- )
870
+ self, proxy_type: ProxyType | int = ProxyType.RESIDENTIAL
871
+ ) -> list[dict[str, Any]]:
872
+ pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
873
+ return self._get_locations("countries", proxy_type=pt)
1679
874
 
1680
875
  def list_states(
1681
876
  self,
1682
877
  country_code: str,
1683
- proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
1684
- ) -> List[Dict[str, Any]]:
1685
- """
1686
- List supported states for a country.
1687
-
1688
- Args:
1689
- country_code: Country code (e.g., 'US').
1690
- proxy_type: Proxy type.
1691
-
1692
- Returns:
1693
- List of state records.
1694
- """
1695
- return self._get_locations(
1696
- "states",
1697
- proxy_type=(
1698
- int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1699
- ),
1700
- country_code=country_code,
1701
- )
878
+ proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
879
+ ) -> list[dict[str, Any]]:
880
+ pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
881
+ return self._get_locations("states", proxy_type=pt, country_code=country_code)
1702
882
 
1703
883
  def list_cities(
1704
884
  self,
1705
885
  country_code: str,
1706
- state_code: Optional[str] = None,
1707
- proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
1708
- ) -> List[Dict[str, Any]]:
1709
- """
1710
- List supported cities for a country/state.
1711
-
1712
- Args:
1713
- country_code: Country code.
1714
- state_code: Optional state code.
1715
- proxy_type: Proxy type.
1716
-
1717
- Returns:
1718
- List of city records.
1719
- """
1720
- kwargs = {
1721
- "proxy_type": (
1722
- int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1723
- ),
1724
- "country_code": country_code,
1725
- }
886
+ state_code: str | None = None,
887
+ proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
888
+ ) -> list[dict[str, Any]]:
889
+ pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
890
+ kwargs = {"proxy_type": pt, "country_code": country_code}
1726
891
  if state_code:
1727
892
  kwargs["state_code"] = state_code
1728
-
1729
893
  return self._get_locations("cities", **kwargs)
1730
894
 
1731
895
  def list_asn(
1732
896
  self,
1733
897
  country_code: str,
1734
- proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
1735
- ) -> List[Dict[str, Any]]:
1736
- """
1737
- List supported ASNs for a country.
1738
-
1739
- Args:
1740
- country_code: Country code.
1741
- proxy_type: Proxy type.
1742
-
1743
- Returns:
1744
- List of ASN records.
1745
- """
1746
- return self._get_locations(
1747
- "asn",
1748
- proxy_type=(
1749
- int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1750
- ),
1751
- country_code=country_code,
1752
- )
898
+ proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
899
+ ) -> list[dict[str, Any]]:
900
+ pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
901
+ return self._get_locations("asn", proxy_type=pt, country_code=country_code)
1753
902
 
1754
- def _get_locations(self, endpoint: str, **kwargs: Any) -> List[Dict[str, Any]]:
1755
- """Internal method to call locations API."""
903
+ def _get_locations(self, endpoint: str, **kwargs: Any) -> list[dict[str, Any]]:
1756
904
  self._require_public_credentials()
905
+ params = {"token": self.public_token, "key": self.public_key}
906
+ for k, v in kwargs.items():
907
+ params[k] = str(v)
1757
908
 
1758
- params = {
1759
- "token": self.public_token,
1760
- "key": self.public_key,
1761
- }
1762
-
1763
- for key, value in kwargs.items():
1764
- params[key] = str(value)
1765
-
1766
- url = f"{self._locations_base_url}/{endpoint}"
1767
-
1768
- logger.debug(f"Locations API request: {url}")
1769
-
1770
- # Use requests.get directly (no proxy needed for this API)
1771
909
  response = self._api_request_with_retry(
1772
- "GET",
1773
- url,
1774
- params=params,
910
+ "GET", f"{self._locations_base_url}/{endpoint}", params=params
1775
911
  )
1776
912
  response.raise_for_status()
1777
-
1778
913
  data = response.json()
1779
-
1780
914
  if isinstance(data, dict):
1781
- code = data.get("code")
1782
- if code is not None and code != 200:
1783
- msg = data.get("msg", "")
1784
- raise RuntimeError(
1785
- f"Locations API error ({endpoint}): code={code}, msg={msg}"
1786
- )
915
+ if data.get("code") != 200:
916
+ raise RuntimeError(f"Locations error: {data.get('msg')}")
1787
917
  return data.get("data") or []
918
+ return data if isinstance(data, list) else []
1788
919
 
1789
- if isinstance(data, list):
1790
- return data
1791
-
1792
- return []
1793
-
1794
- # =========================================================================
1795
- # Helper Methods (Internal utility functions)
1796
- # =========================================================================
1797
920
  def _require_public_credentials(self) -> None:
1798
- """Ensure public API credentials are available."""
1799
921
  if not self.public_token or not self.public_key:
1800
922
  raise ThordataConfigError(
1801
- "public_token and public_key are required for this operation. "
1802
- "Please provide them when initializing ThordataClient."
923
+ "public_token and public_key are required for this operation."
1803
924
  )
1804
925
 
1805
926
  def _get_proxy_endpoint_overrides(
1806
927
  self, product: ProxyProduct
1807
- ) -> tuple[Optional[str], Optional[int], str]:
1808
- """
1809
- Read proxy endpoint overrides from env.
1810
-
1811
- Priority:
1812
- 1) THORDATA_<PRODUCT>_PROXY_HOST/PORT/PROTOCOL
1813
- 2) THORDATA_PROXY_HOST/PORT/PROTOCOL
1814
- 3) defaults (host/port None => ProxyConfig will use its product defaults)
1815
- """
1816
- prefix = product.value.upper() # RESIDENTIAL / DATACENTER / MOBILE / ISP
1817
-
928
+ ) -> tuple[str | None, int | None, str]:
929
+ prefix = product.value.upper()
1818
930
  host = os.getenv(f"THORDATA_{prefix}_PROXY_HOST") or os.getenv(
1819
931
  "THORDATA_PROXY_HOST"
1820
932
  )
@@ -1826,184 +938,37 @@ class ThordataClient:
1826
938
  or os.getenv("THORDATA_PROXY_PROTOCOL")
1827
939
  or "http"
1828
940
  )
1829
-
1830
- port: Optional[int] = None
1831
- if port_raw:
1832
- try:
1833
- port = int(port_raw)
1834
- except ValueError:
1835
- port = None
1836
-
941
+ port = int(port_raw) if port_raw and port_raw.isdigit() else None
1837
942
  return host or None, port, protocol
1838
943
 
1839
- def _get_default_proxy_config_from_env(self) -> Optional[ProxyConfig]:
1840
- """
1841
- Try to build a default ProxyConfig from env vars.
1842
-
1843
- Priority order:
1844
- 1) Residential
1845
- 2) Datacenter
1846
- 3) Mobile
1847
- """
1848
- # Residential
1849
- u = os.getenv("THORDATA_RESIDENTIAL_USERNAME")
1850
- p = os.getenv("THORDATA_RESIDENTIAL_PASSWORD")
1851
- if u and p:
1852
- host, port, protocol = self._get_proxy_endpoint_overrides(
1853
- ProxyProduct.RESIDENTIAL
1854
- )
1855
- return ProxyConfig(
1856
- username=u,
1857
- password=p,
1858
- product=ProxyProduct.RESIDENTIAL,
1859
- host=host,
1860
- port=port,
1861
- protocol=protocol,
1862
- )
1863
-
1864
- # Datacenter
1865
- u = os.getenv("THORDATA_DATACENTER_USERNAME")
1866
- p = os.getenv("THORDATA_DATACENTER_PASSWORD")
1867
- if u and p:
1868
- host, port, protocol = self._get_proxy_endpoint_overrides(
1869
- ProxyProduct.DATACENTER
1870
- )
1871
- return ProxyConfig(
1872
- username=u,
1873
- password=p,
1874
- product=ProxyProduct.DATACENTER,
1875
- host=host,
1876
- port=port,
1877
- protocol=protocol,
1878
- )
1879
-
1880
- # Mobile
1881
- u = os.getenv("THORDATA_MOBILE_USERNAME")
1882
- p = os.getenv("THORDATA_MOBILE_PASSWORD")
1883
- if u and p:
1884
- host, port, protocol = self._get_proxy_endpoint_overrides(
1885
- ProxyProduct.MOBILE
1886
- )
1887
- return ProxyConfig(
1888
- username=u,
1889
- password=p,
1890
- product=ProxyProduct.MOBILE,
1891
- host=host,
1892
- port=port,
1893
- protocol=protocol,
1894
- )
1895
-
1896
- return None
1897
-
1898
- def _build_gateway_headers(self) -> Dict[str, str]:
1899
- """
1900
- Build headers for legacy gateway-style endpoints.
1901
-
1902
- IMPORTANT:
1903
- - SDK does NOT expose "sign/apiKey" as a separate credential model.
1904
- - Values ALWAYS come from public_token/public_key.
1905
- - Some backend endpoints may still expect header field names "sign" and "apiKey".
1906
- """
1907
- self._require_public_credentials()
1908
- return {
1909
- "sign": self.public_token or "",
1910
- "apiKey": self.public_key or "",
1911
- "Content-Type": "application/x-www-form-urlencoded",
1912
- }
1913
-
1914
- def _proxy_request_with_proxy_manager(
1915
- self,
1916
- method: str,
1917
- url: str,
1918
- *,
1919
- proxy_config: ProxyConfig,
1920
- timeout: int,
1921
- headers: Optional[Dict[str, str]] = None,
1922
- params: Optional[Dict[str, Any]] = None,
1923
- data: Any = None,
1924
- ) -> requests.Response:
1925
- """
1926
- Proxy Network request implemented via urllib3.ProxyManager.
1927
-
1928
- This is required to reliably support HTTPS proxy endpoints like:
1929
- https://<endpoint>.pr.thordata.net:9999
1930
- """
1931
- # Build final URL (include query params)
1932
- req = requests.Request(method=method.upper(), url=url, params=params)
1933
- prepped = self._proxy_session.prepare_request(req)
1934
- final_url = prepped.url or url
1935
-
1936
- proxy_url = proxy_config.build_proxy_endpoint()
1937
- proxy_headers = urllib3.make_headers(
1938
- proxy_basic_auth=proxy_config.build_proxy_basic_auth()
1939
- )
1940
-
1941
- pm = urllib3.ProxyManager(
1942
- proxy_url,
1943
- proxy_headers=proxy_headers,
1944
- proxy_ssl_context=(
1945
- ssl.create_default_context()
1946
- if proxy_url.startswith("https://")
1947
- else None
1948
- ),
1949
- )
1950
-
1951
- # Encode form data if dict
1952
- body = None
1953
- req_headers = dict(headers or {})
1954
- if data is not None:
1955
- if isinstance(data, dict):
1956
- # form-urlencoded
1957
- body = urlencode({k: str(v) for k, v in data.items()})
1958
- req_headers.setdefault(
1959
- "Content-Type", "application/x-www-form-urlencoded"
944
+ def _get_default_proxy_config_from_env(self) -> ProxyConfig | None:
945
+ for prod in [
946
+ ProxyProduct.RESIDENTIAL,
947
+ ProxyProduct.DATACENTER,
948
+ ProxyProduct.MOBILE,
949
+ ]:
950
+ prefix = prod.value.upper()
951
+ u = os.getenv(f"THORDATA_{prefix}_USERNAME")
952
+ p = os.getenv(f"THORDATA_{prefix}_PASSWORD")
953
+ if u and p:
954
+ h, port, proto = self._get_proxy_endpoint_overrides(prod)
955
+ return ProxyConfig(
956
+ username=u,
957
+ password=p,
958
+ product=prod,
959
+ host=h,
960
+ port=port,
961
+ protocol=proto,
1960
962
  )
1961
- else:
1962
- body = data
1963
-
1964
- http_resp = pm.request(
1965
- method.upper(),
1966
- final_url,
1967
- body=body,
1968
- headers=req_headers or None,
1969
- timeout=urllib3.Timeout(connect=timeout, read=timeout),
1970
- retries=False,
1971
- preload_content=True,
1972
- )
1973
-
1974
- # Convert urllib3 response -> requests.Response (keep your API stable)
1975
- r = requests.Response()
1976
- r.status_code = int(getattr(http_resp, "status", 0) or 0)
1977
- r._content = http_resp.data or b""
1978
- r.url = final_url
1979
- r.headers = requests.structures.CaseInsensitiveDict(
1980
- dict(http_resp.headers or {})
1981
- )
1982
- return r
1983
-
1984
- def _request_with_retry(
1985
- self, method: str, url: str, **kwargs: Any
1986
- ) -> requests.Response:
1987
- """Make a request with automatic retry."""
1988
- kwargs.setdefault("timeout", self._default_timeout)
1989
-
1990
- @with_retry(self._retry_config)
1991
- def _do_request() -> requests.Response:
1992
- return self._proxy_session.request(method, url, **kwargs)
1993
-
1994
- try:
1995
- return _do_request()
1996
- except requests.Timeout as e:
1997
- raise ThordataTimeoutError(
1998
- f"Request timed out: {e}", original_error=e
1999
- ) from e
2000
- except requests.RequestException as e:
2001
- raise ThordataNetworkError(f"Request failed: {e}", original_error=e) from e
963
+ return None
2002
964
 
2003
965
  def close(self) -> None:
2004
- """Close the underlying session."""
2005
966
  self._proxy_session.close()
2006
967
  self._api_session.close()
968
+ # Clean up connection pools
969
+ for pm in self._proxy_managers.values():
970
+ pm.clear()
971
+ self._proxy_managers.clear()
2007
972
 
2008
973
  def __enter__(self) -> ThordataClient:
2009
974
  return self