thordata-sdk 1.0.1__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
thordata/models.py CHANGED
@@ -26,13 +26,10 @@ from __future__ import annotations
26
26
 
27
27
  import json
28
28
  import re
29
- import ssl
30
29
  import uuid
31
30
  from dataclasses import dataclass, field
32
31
  from enum import Enum
33
- from typing import Any, Dict, List, Optional, Union
34
-
35
- import urllib3
32
+ from typing import Any
36
33
 
37
34
  # =============================================================================
38
35
  # Proxy Product Types
@@ -111,21 +108,21 @@ class ProxyConfig:
111
108
 
112
109
  username: str
113
110
  password: str
114
- product: Union[ProxyProduct, str] = ProxyProduct.RESIDENTIAL
115
- host: Optional[str] = None
116
- port: Optional[int] = None
111
+ product: ProxyProduct | str = ProxyProduct.RESIDENTIAL
112
+ host: str | None = None
113
+ port: int | None = None
117
114
  protocol: str = "http"
118
115
 
119
116
  # Geo-targeting
120
- continent: Optional[str] = None
121
- country: Optional[str] = None
122
- state: Optional[str] = None
123
- city: Optional[str] = None
124
- asn: Optional[str] = None
117
+ continent: str | None = None
118
+ country: str | None = None
119
+ state: str | None = None
120
+ city: str | None = None
121
+ asn: str | None = None
125
122
 
126
123
  # Session control
127
- session_id: Optional[str] = None
128
- session_duration: Optional[int] = None # minutes, 1-90
124
+ session_id: str | None = None
125
+ session_duration: int | None = None # minutes, 1-90
129
126
 
130
127
  # Valid continent codes
131
128
  VALID_CONTINENTS = {"af", "an", "as", "eu", "na", "oc", "sa"}
@@ -245,7 +242,7 @@ class ProxyConfig:
245
242
  """Basic auth string 'username:password' for Proxy-Authorization."""
246
243
  return f"{self.build_username()}:{self.password}"
247
244
 
248
- def to_proxies_dict(self) -> Dict[str, str]:
245
+ def to_proxies_dict(self) -> dict[str, str]:
249
246
  """
250
247
  Build a proxies dict suitable for the requests library.
251
248
 
@@ -300,7 +297,7 @@ class WhitelistProxyConfig:
300
297
  def build_proxy_url(self) -> str:
301
298
  return f"{self.protocol}://{self.host}:{self.port}"
302
299
 
303
- def to_proxies_dict(self) -> Dict[str, str]:
300
+ def to_proxies_dict(self) -> dict[str, str]:
304
301
  url = self.build_proxy_url()
305
302
  return {"http": url, "https": url}
306
303
 
@@ -358,7 +355,7 @@ class StaticISPProxy:
358
355
  f"{self.protocol}://{self.username}:{self.password}@{self.host}:{self.port}"
359
356
  )
360
357
 
361
- def to_proxies_dict(self) -> Dict[str, str]:
358
+ def to_proxies_dict(self) -> dict[str, str]:
362
359
  """
363
360
  Build a proxies dict suitable for the requests library.
364
361
 
@@ -387,7 +384,7 @@ class StaticISPProxy:
387
384
  ) from e
388
385
 
389
386
  @classmethod
390
- def from_env(cls) -> "StaticISPProxy":
387
+ def from_env(cls) -> StaticISPProxy:
391
388
  """
392
389
  Create StaticISPProxy from environment variables.
393
390
 
@@ -513,39 +510,39 @@ class SerpRequest:
513
510
  start: int = 0
514
511
 
515
512
  # Localization
516
- country: Optional[str] = None # 'gl' for Google
517
- language: Optional[str] = None # 'hl' for Google
518
- google_domain: Optional[str] = None
519
- countries_filter: Optional[str] = None # 'cr' parameter
520
- languages_filter: Optional[str] = None # 'lr' parameter
513
+ country: str | None = None # 'gl' for Google
514
+ language: str | None = None # 'hl' for Google
515
+ google_domain: str | None = None
516
+ countries_filter: str | None = None # 'cr' parameter
517
+ languages_filter: str | None = None # 'lr' parameter
521
518
 
522
519
  # Geo-targeting
523
- location: Optional[str] = None
524
- uule: Optional[str] = None # Encoded location
520
+ location: str | None = None
521
+ uule: str | None = None # Encoded location
525
522
 
526
523
  # Search type
527
- search_type: Optional[str] = None # tbm parameter (isch, shop, nws, vid, ...)
524
+ search_type: str | None = None # tbm parameter (isch, shop, nws, vid, ...)
528
525
 
529
526
  # Filters
530
- safe_search: Optional[bool] = None
531
- time_filter: Optional[str] = None # tbs parameter (time part)
527
+ safe_search: bool | None = None
528
+ time_filter: str | None = None # tbs parameter (time part)
532
529
  no_autocorrect: bool = False # nfpr parameter
533
- filter_duplicates: Optional[bool] = None # filter parameter
530
+ filter_duplicates: bool | None = None # filter parameter
534
531
 
535
532
  # Device & Rendering
536
- device: Optional[str] = None # 'desktop', 'mobile', 'tablet'
537
- render_js: Optional[bool] = None # render_js parameter
538
- no_cache: Optional[bool] = None # no_cache parameter
533
+ device: str | None = None # 'desktop', 'mobile', 'tablet'
534
+ render_js: bool | None = None # render_js parameter
535
+ no_cache: bool | None = None # no_cache parameter
539
536
 
540
537
  # Output format
541
538
  output_format: str = "json" # 'json' or 'html'
542
539
 
543
540
  # Advanced Google parameters
544
- ludocid: Optional[str] = None # Google Place ID
545
- kgmid: Optional[str] = None # Knowledge Graph ID
541
+ ludocid: str | None = None # Google Place ID
542
+ kgmid: str | None = None # Knowledge Graph ID
546
543
 
547
544
  # Pass-through
548
- extra_params: Dict[str, Any] = field(default_factory=dict)
545
+ extra_params: dict[str, Any] = field(default_factory=dict)
549
546
 
550
547
  # Search type mappings for tbm parameter
551
548
  SEARCH_TYPE_MAP = {
@@ -578,7 +575,7 @@ class SerpRequest:
578
575
  "baidu": "baidu.com",
579
576
  }
580
577
 
581
- def to_payload(self) -> Dict[str, Any]:
578
+ def to_payload(self) -> dict[str, Any]:
582
579
  """
583
580
  Convert to API request payload.
584
581
 
@@ -587,7 +584,7 @@ class SerpRequest:
587
584
  """
588
585
  engine = self.engine.lower()
589
586
 
590
- payload: Dict[str, Any] = {
587
+ payload: dict[str, Any] = {
591
588
  "engine": engine,
592
589
  "num": str(self.num),
593
590
  }
@@ -722,14 +719,14 @@ class UniversalScrapeRequest:
722
719
  url: str
723
720
  js_render: bool = False
724
721
  output_format: str = "html" # 'html' or 'png'
725
- country: Optional[str] = None
726
- block_resources: Optional[str] = None # e.g., 'script', 'image', 'script,image'
727
- clean_content: Optional[str] = None # e.g., 'js', 'css', 'js,css'
728
- wait: Optional[int] = None # Milliseconds, max 100000
729
- wait_for: Optional[str] = None # CSS selector
730
- headers: Optional[List[Dict[str, str]]] = None # [{"name": "...", "value": "..."}]
731
- cookies: Optional[List[Dict[str, str]]] = None # [{"name": "...", "value": "..."}]
732
- extra_params: Dict[str, Any] = field(default_factory=dict) # 这个必须用 field()
722
+ country: str | None = None
723
+ block_resources: str | None = None # e.g., 'script', 'image', 'script,image'
724
+ clean_content: str | None = None # e.g., 'js', 'css', 'js,css'
725
+ wait: int | None = None # Milliseconds, max 100000
726
+ wait_for: str | None = None # CSS selector
727
+ headers: list[dict[str, str]] | None = None # [{"name": "...", "value": "..."}]
728
+ cookies: list[dict[str, str]] | None = None # [{"name": "...", "value": "..."}]
729
+ extra_params: dict[str, Any] = field(default_factory=dict) # 这个必须用 field()
733
730
 
734
731
  def __post_init__(self) -> None:
735
732
  """Validate configuration."""
@@ -745,14 +742,14 @@ class UniversalScrapeRequest:
745
742
  f"wait must be between 0 and 100000 milliseconds, got {self.wait}"
746
743
  )
747
744
 
748
- def to_payload(self) -> Dict[str, Any]:
745
+ def to_payload(self) -> dict[str, Any]:
749
746
  """
750
747
  Convert to API request payload.
751
748
 
752
749
  Returns:
753
750
  Dictionary ready to be sent to the Universal API.
754
751
  """
755
- payload: Dict[str, Any] = {
752
+ payload: dict[str, Any] = {
756
753
  "url": self.url,
757
754
  "js_render": "True" if self.js_render else "False",
758
755
  "type": self.output_format.lower(),
@@ -820,18 +817,18 @@ class ScraperTaskConfig:
820
817
  file_name: str
821
818
  spider_id: str
822
819
  spider_name: str
823
- parameters: Dict[str, Any]
824
- universal_params: Optional[Dict[str, Any]] = None
820
+ parameters: dict[str, Any]
821
+ universal_params: dict[str, Any] | None = None
825
822
  include_errors: bool = True
826
823
 
827
- def to_payload(self) -> Dict[str, Any]:
824
+ def to_payload(self) -> dict[str, Any]:
828
825
  """
829
826
  Convert to API request payload.
830
827
 
831
828
  Returns:
832
829
  Dictionary ready to be sent to the Web Scraper API.
833
830
  """
834
- payload: Dict[str, Any] = {
831
+ payload: dict[str, Any] = {
835
832
  "file_name": self.file_name,
836
833
  "spider_id": self.spider_id,
837
834
  "spider_name": self.spider_name,
@@ -877,21 +874,21 @@ class CommonSettings:
877
874
  """
878
875
 
879
876
  # Video settings
880
- resolution: Optional[str] = None
877
+ resolution: str | None = None
881
878
 
882
879
  # Audio settings
883
- audio_format: Optional[str] = None
884
- bitrate: Optional[str] = None
880
+ audio_format: str | None = None
881
+ bitrate: str | None = None
885
882
 
886
883
  # Subtitle settings (used by both video and audio)
887
- is_subtitles: Optional[str] = None
888
- subtitles_language: Optional[str] = None
884
+ is_subtitles: str | None = None
885
+ subtitles_language: str | None = None
889
886
 
890
887
  # Valid values for validation
891
888
  VALID_RESOLUTIONS = {"360p", "480p", "720p", "1080p", "1440p", "2160p"}
892
889
  VALID_AUDIO_FORMATS = {"opus", "mp3"}
893
890
 
894
- def to_dict(self) -> Dict[str, Any]:
891
+ def to_dict(self) -> dict[str, Any]:
895
892
  """Convert to dictionary, excluding None values."""
896
893
  result = {}
897
894
  if self.resolution is not None:
@@ -943,18 +940,18 @@ class VideoTaskConfig:
943
940
  file_name: str
944
941
  spider_id: str
945
942
  spider_name: str
946
- parameters: Dict[str, Any]
943
+ parameters: dict[str, Any]
947
944
  common_settings: CommonSettings
948
945
  include_errors: bool = True
949
946
 
950
- def to_payload(self) -> Dict[str, Any]:
947
+ def to_payload(self) -> dict[str, Any]:
951
948
  """
952
949
  Convert to API request payload.
953
950
 
954
951
  Returns:
955
952
  Dictionary ready to be sent to the video_builder API.
956
953
  """
957
- payload: Dict[str, Any] = {
954
+ payload: dict[str, Any] = {
958
955
  "file_name": self.file_name,
959
956
  "spider_id": self.spider_id,
960
957
  "spider_name": self.spider_name,
@@ -984,8 +981,8 @@ class TaskStatusResponse:
984
981
 
985
982
  task_id: str
986
983
  status: str
987
- progress: Optional[int] = None
988
- message: Optional[str] = None
984
+ progress: int | None = None
985
+ message: str | None = None
989
986
 
990
987
  def is_complete(self) -> bool:
991
988
  """Check if the task has completed (success or failure)."""
@@ -1027,10 +1024,10 @@ class UsageStatistics:
1027
1024
  traffic_balance: float
1028
1025
  query_days: int
1029
1026
  range_usage_traffic: float
1030
- data: List[Dict[str, Any]]
1027
+ data: list[dict[str, Any]]
1031
1028
 
1032
1029
  @classmethod
1033
- def from_dict(cls, data: Dict[str, Any]) -> "UsageStatistics":
1030
+ def from_dict(cls, data: dict[str, Any]) -> UsageStatistics:
1034
1031
  """Create from API response dict."""
1035
1032
  return cls(
1036
1033
  total_usage_traffic=float(data.get("total_usage_traffic", 0)),
@@ -1073,7 +1070,7 @@ class ProxyUser:
1073
1070
  usage_traffic: float
1074
1071
 
1075
1072
  @classmethod
1076
- def from_dict(cls, data: Dict[str, Any]) -> "ProxyUser":
1073
+ def from_dict(cls, data: dict[str, Any]) -> ProxyUser:
1077
1074
  """Create from API response dict."""
1078
1075
  return cls(
1079
1076
  username=data.get("username", ""),
@@ -1109,10 +1106,10 @@ class ProxyUserList:
1109
1106
  limit: float
1110
1107
  remaining_limit: float
1111
1108
  user_count: int
1112
- users: List[ProxyUser]
1109
+ users: list[ProxyUser]
1113
1110
 
1114
1111
  @classmethod
1115
- def from_dict(cls, data: Dict[str, Any]) -> "ProxyUserList":
1112
+ def from_dict(cls, data: dict[str, Any]) -> ProxyUserList:
1116
1113
  """Create from API response dict."""
1117
1114
  user_list = data.get("list", [])
1118
1115
  users = [ProxyUser.from_dict(u) for u in user_list]
@@ -1143,11 +1140,11 @@ class ProxyServer:
1143
1140
  port: int
1144
1141
  username: str
1145
1142
  password: str
1146
- expiration_time: Optional[Union[int, str]] = None
1147
- region: Optional[str] = None
1143
+ expiration_time: int | str | None = None
1144
+ region: str | None = None
1148
1145
 
1149
1146
  @classmethod
1150
- def from_dict(cls, data: Dict[str, Any]) -> "ProxyServer":
1147
+ def from_dict(cls, data: dict[str, Any]) -> ProxyServer:
1151
1148
  """Create from API response dict."""
1152
1149
  return cls(
1153
1150
  ip=data.get("ip", ""),
thordata/retry.py CHANGED
@@ -22,7 +22,7 @@ import random
22
22
  import time
23
23
  from dataclasses import dataclass, field
24
24
  from functools import wraps
25
- from typing import Any, Callable, Optional, Set, Tuple
25
+ from typing import Any, Callable
26
26
 
27
27
  from .exceptions import (
28
28
  ThordataNetworkError,
@@ -64,15 +64,15 @@ class RetryConfig:
64
64
  jitter_factor: float = 0.1
65
65
 
66
66
  # Status codes to retry on (5xx server errors + 429 rate limit)
67
- retry_on_status_codes: Set[int] = field(
67
+ retry_on_status_codes: set[int] = field(
68
68
  default_factory=lambda: {429, 500, 502, 503, 504}
69
69
  )
70
- retry_on_api_codes: Set[int] = field(
70
+ retry_on_api_codes: set[int] = field(
71
71
  default_factory=lambda: {300} # API response body code
72
72
  )
73
73
 
74
74
  # Exception types to always retry on
75
- retry_on_exceptions: Tuple[type, ...] = field(
75
+ retry_on_exceptions: tuple[type, ...] = field(
76
76
  default_factory=lambda: (
77
77
  ThordataNetworkError,
78
78
  ThordataServerError,
@@ -104,7 +104,7 @@ class RetryConfig:
104
104
  return delay
105
105
 
106
106
  def should_retry(
107
- self, exception: Exception, attempt: int, status_code: Optional[int] = None
107
+ self, exception: Exception, attempt: int, status_code: int | None = None
108
108
  ) -> bool:
109
109
  """
110
110
  Determine if a request should be retried.
@@ -138,8 +138,8 @@ class RetryConfig:
138
138
 
139
139
 
140
140
  def with_retry(
141
- config: Optional[RetryConfig] = None,
142
- on_retry: Optional[Callable[[int, Exception, float], None]] = None,
141
+ config: RetryConfig | None = None,
142
+ on_retry: Callable[[int, Exception, float], None] | None = None,
143
143
  ) -> Callable:
144
144
  """
145
145
  Decorator to add retry logic to a function.
@@ -168,7 +168,7 @@ def with_retry(
168
168
  def decorator(func: Callable) -> Callable:
169
169
  @wraps(func)
170
170
  def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
171
- last_exception: Optional[Exception] = None
171
+ last_exception: Exception | None = None
172
172
 
173
173
  for attempt in range(config.max_retries + 1):
174
174
  try:
@@ -202,7 +202,7 @@ def with_retry(
202
202
 
203
203
  @wraps(func)
204
204
  async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
205
- last_exception: Optional[Exception] = None
205
+ last_exception: Exception | None = None
206
206
 
207
207
  for attempt in range(config.max_retries + 1):
208
208
  try:
@@ -244,7 +244,7 @@ def with_retry(
244
244
  return decorator
245
245
 
246
246
 
247
- def _extract_status_code(exception: Exception) -> Optional[int]:
247
+ def _extract_status_code(exception: Exception) -> int | None:
248
248
  """
249
249
  Extract HTTP status code from various exception types.
250
250
 
@@ -302,10 +302,10 @@ class RetryableRequest:
302
302
  ... retry.wait()
303
303
  """
304
304
 
305
- def __init__(self, config: Optional[RetryConfig] = None) -> None:
305
+ def __init__(self, config: RetryConfig | None = None) -> None:
306
306
  self.config = config or RetryConfig()
307
307
  self.attempt = 0
308
- self.last_exception: Optional[Exception] = None
308
+ self.last_exception: Exception | None = None
309
309
 
310
310
  def __enter__(self) -> RetryableRequest:
311
311
  return self
@@ -314,7 +314,7 @@ class RetryableRequest:
314
314
  pass
315
315
 
316
316
  def should_continue(
317
- self, exception: Exception, status_code: Optional[int] = None
317
+ self, exception: Exception, status_code: int | None = None
318
318
  ) -> bool:
319
319
  """
320
320
  Check if we should continue retrying.
@@ -0,0 +1,271 @@
1
+ Metadata-Version: 2.4
2
+ Name: thordata-sdk
3
+ Version: 1.1.0
4
+ Summary: The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network.
5
+ Author-email: Thordata Developer Team <support@thordata.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://www.thordata.com
8
+ Project-URL: Documentation, https://github.com/Thordata/thordata-python-sdk#readme
9
+ Project-URL: Source, https://github.com/Thordata/thordata-python-sdk
10
+ Project-URL: Tracker, https://github.com/Thordata/thordata-python-sdk/issues
11
+ Project-URL: Changelog, https://github.com/Thordata/thordata-python-sdk/blob/main/CHANGELOG.md
12
+ Keywords: web scraping,proxy,residential proxy,datacenter proxy,ai,llm,data-mining,serp,thordata,web scraper,anti-bot bypass
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
16
+ Classifier: Topic :: Internet :: WWW/HTTP
17
+ Classifier: Topic :: Internet :: Proxy Servers
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.9
20
+ Classifier: Programming Language :: Python :: 3.10
21
+ Classifier: Programming Language :: Python :: 3.11
22
+ Classifier: Programming Language :: Python :: 3.12
23
+ Classifier: License :: OSI Approved :: MIT License
24
+ Classifier: Operating System :: OS Independent
25
+ Classifier: Typing :: Typed
26
+ Requires-Python: >=3.9
27
+ Description-Content-Type: text/markdown
28
+ License-File: LICENSE
29
+ Requires-Dist: requests>=2.25.0
30
+ Requires-Dist: aiohttp>=3.9.0
31
+ Provides-Extra: dev
32
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
33
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
34
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
35
+ Requires-Dist: pytest-httpserver>=1.0.0; extra == "dev"
36
+ Requires-Dist: python-dotenv>=1.0.0; extra == "dev"
37
+ Requires-Dist: black>=23.0.0; extra == "dev"
38
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
39
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
40
+ Requires-Dist: types-requests>=2.28.0; extra == "dev"
41
+ Requires-Dist: aioresponses>=0.7.6; extra == "dev"
42
+ Dynamic: license-file
43
+
44
+ # Thordata Python SDK
45
+
46
+ <div align="center">
47
+
48
+ **Official Python Client for Thordata APIs**
49
+
50
+ *Proxy Network • SERP API • Web Unlocker • Web Scraper API*
51
+
52
+ [![PyPI version](https://img.shields.io/pypi/v/thordata-sdk.svg)](https://pypi.org/project/thordata-sdk/)
53
+ [![Python Versions](https://img.shields.io/pypi/pyversions/thordata-sdk.svg)](https://pypi.org/project/thordata-sdk/)
54
+ [![License](https://img.shields.io/badge/license-MIT-green)](LICENSE)
55
+
56
+ </div>
57
+
58
+ ---
59
+
60
+ ## 📦 Installation
61
+
62
+ ```bash
63
+ pip install thordata-sdk
64
+ ```
65
+
66
+ Optional dependencies for Scraping Browser examples:
67
+ ```bash
68
+ pip install playwright
69
+ ```
70
+
71
+ ## 🔐 Configuration
72
+
73
+ Set the following environment variables (recommended):
74
+
75
+ ```bash
76
+ # Required for SERP, Universal, and Proxy Network
77
+ export THORDATA_SCRAPER_TOKEN="your_scraper_token"
78
+
79
+ # Required for Web Scraper Tasks & Account Management
80
+ export THORDATA_PUBLIC_TOKEN="your_public_token"
81
+ export THORDATA_PUBLIC_KEY="your_public_key"
82
+
83
+ # Optional: Default Proxy Credentials (for Proxy Network)
84
+ export THORDATA_RESIDENTIAL_USERNAME="user"
85
+ export THORDATA_RESIDENTIAL_PASSWORD="pass"
86
+ ```
87
+
88
+ ## 🚀 Quick Start
89
+
90
+ ```python
91
+ from thordata import ThordataClient
92
+
93
+ # Initialize (credentials loaded from env)
94
+ client = ThordataClient(scraper_token="...")
95
+
96
+ # 1. SERP Search
97
+ print("--- SERP Search ---")
98
+ results = client.serp_search("python tutorial", engine="google")
99
+ print(f"Title: {results['organic'][0]['title']}")
100
+
101
+ # 2. Universal Scrape (Web Unlocker)
102
+ print("\n--- Universal Scrape ---")
103
+ html = client.universal_scrape("https://httpbin.org/html")
104
+ print(f"HTML Length: {len(html)}")
105
+ ```
106
+
107
+ ## 📚 Core Features
108
+
109
+ ### 🌐 Proxy Network
110
+
111
+ Easily generate proxy URLs with geo-targeting and sticky sessions. The SDK handles connection pooling automatically.
112
+
113
+ ```python
114
+ from thordata import ProxyConfig, ProxyProduct
115
+
116
+ # Create a proxy configuration
117
+ proxy = ProxyConfig(
118
+ username="user",
119
+ password="pass",
120
+ product=ProxyProduct.RESIDENTIAL,
121
+ country="us",
122
+ city="new_york",
123
+ session_id="session123",
124
+ session_duration=10 # Sticky for 10 mins
125
+ )
126
+
127
+ # Use with the client (high performance)
128
+ response = client.get("https://httpbin.org/ip", proxy_config=proxy)
129
+ print(response.json())
130
+
131
+ # Or get the URL string for other libs (requests, scrapy, etc.)
132
+ proxy_url = proxy.build_proxy_url()
133
+ print(f"Proxy URL: {proxy_url}")
134
+ ```
135
+
136
+ ### 🔍 SERP API
137
+
138
+ Real-time search results from Google, Bing, Yandex, etc.
139
+
140
+ ```python
141
+ from thordata import SerpRequest, Engine
142
+
143
+ # Simple
144
+ results = client.serp_search(
145
+ query="pizza near me",
146
+ engine=Engine.GOOGLE_MAPS,
147
+ country="us"
148
+ )
149
+
150
+ # Advanced (Strongly Typed)
151
+ request = SerpRequest(
152
+ query="AI news",
153
+ engine="google_news",
154
+ num=50,
155
+ time_filter="week",
156
+ location="San Francisco",
157
+ render_js=True
158
+ )
159
+ results = client.serp_search_advanced(request)
160
+ ```
161
+
162
+ ### 🔓 Universal Scraping API (Web Unlocker)
163
+
164
+ Bypass Cloudflare, CAPTCHAs, and antibot systems.
165
+
166
+ ```python
167
+ html = client.universal_scrape(
168
+ url="https://example.com/protected",
169
+ js_render=True,
170
+ wait_for=".content",
171
+ country="gb",
172
+ output_format="html"
173
+ )
174
+ ```
175
+
176
+ ### 🕷️ Web Scraper API (Async Tasks)
177
+
178
+ Manage asynchronous scraping tasks for massive scale.
179
+
180
+ ```python
181
+ # 1. Create Task
182
+ task_id = client.create_scraper_task(
183
+ file_name="my_task",
184
+ spider_id="universal",
185
+ spider_name="universal",
186
+ parameters={"url": "https://example.com"}
187
+ )
188
+ print(f"Task Created: {task_id}")
189
+
190
+ # 2. Wait for Completion
191
+ status = client.wait_for_task(task_id, max_wait=600)
192
+
193
+ # 3. Get Result
194
+ if status == "ready":
195
+ download_url = client.get_task_result(task_id)
196
+ print(f"Result: {download_url}")
197
+ ```
198
+
199
+ ### 📹 Video/Audio Tasks
200
+
201
+ Download content from YouTube and other supported platforms.
202
+
203
+ ```python
204
+ from thordata import CommonSettings
205
+
206
+ task_id = client.create_video_task(
207
+ file_name="video_{{VideoID}}",
208
+ spider_id="youtube_video_by-url",
209
+ spider_name="youtube.com",
210
+ parameters={"url": "https://youtube.com/watch?v=..."},
211
+ common_settings=CommonSettings(resolution="1080p")
212
+ )
213
+ ```
214
+
215
+ ### 📊 Account Management
216
+
217
+ Access usage statistics, manage sub-users, and whitelist IPs.
218
+
219
+ ```python
220
+ # Get Usage Stats
221
+ stats = client.get_usage_statistics("2024-01-01", "2024-01-31")
222
+ print(f"Balance: {stats.balance_gb():.2f} GB")
223
+
224
+ # List Proxy Users
225
+ users = client.list_proxy_users()
226
+ print(f"Active Sub-users: {users.user_count}")
227
+
228
+ # Whitelist IP
229
+ client.add_whitelist_ip("1.2.3.4")
230
+ ```
231
+
232
+ ## ⚙️ Advanced Usage
233
+
234
+ ### Async Client
235
+
236
+ For high-concurrency applications, use `AsyncThordataClient`.
237
+
238
+ ```python
239
+ import asyncio
240
+ from thordata import AsyncThordataClient
241
+
242
+ async def main():
243
+ async with AsyncThordataClient(scraper_token="...") as client:
244
+ # SERP
245
+ results = await client.serp_search("async python")
246
+
247
+ # Universal
248
+ html = await client.universal_scrape("https://example.com")
249
+
250
+ asyncio.run(main())
251
+ ```
252
+
253
+ Note: `AsyncThordataClient` does not support HTTPS proxy tunneling (TLS-in-TLS) due to `aiohttp` limitations. For proxy network requests, use the sync client.
254
+
255
+ ### Custom Retry Configuration
256
+
257
+ ```python
258
+ from thordata import RetryConfig
259
+
260
+ retry = RetryConfig(
261
+ max_retries=5,
262
+ backoff_factor=1.5,
263
+ retry_on_status_codes={429, 500, 502, 503, 504}
264
+ )
265
+
266
+ client = ThordataClient(..., retry_config=retry)
267
+ ```
268
+
269
+ ## 📄 License
270
+
271
+ MIT License