thordata-sdk 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
thordata/enums.py CHANGED
@@ -34,12 +34,13 @@ class Continent(str, Enum):
34
34
  class ProxyHost(str, Enum):
35
35
  """
36
36
  Available proxy gateway hosts.
37
+
38
+ Note: Dashboard provides user-specific hosts like {shard}.{region}.thordata.net
37
39
  """
38
40
 
39
41
  DEFAULT = "pr.thordata.net"
40
42
  NORTH_AMERICA = "t.na.thordata.net"
41
43
  EUROPE = "t.eu.thordata.net"
42
- GATE = "gate.thordata.com"
43
44
 
44
45
 
45
46
  class ProxyPort(IntEnum):
@@ -47,11 +48,10 @@ class ProxyPort(IntEnum):
47
48
  Available proxy gateway ports.
48
49
  """
49
50
 
50
- DEFAULT = 9999
51
+ RESIDENTIAL = 9999
51
52
  MOBILE = 5555
52
53
  DATACENTER = 7777
53
54
  ISP = 6666
54
- ALTERNATIVE = 22225
55
55
 
56
56
 
57
57
  # =============================================================================
thordata/exceptions.py CHANGED
@@ -222,7 +222,8 @@ class ThordataNotCollectedError(ThordataAPIError):
222
222
  This error is often transient and typically safe to retry.
223
223
  """
224
224
 
225
- HTTP_STATUS_CODES = {300}
225
+ API_CODES = {300}
226
+ HTTP_STATUS_CODES: Set[int] = set()
226
227
 
227
228
  @property
228
229
  def is_retryable(self) -> bool:
@@ -262,8 +263,17 @@ def raise_for_code(
262
263
  ThordataValidationError: For 400/422 codes.
263
264
  ThordataAPIError: For all other error codes.
264
265
  """
265
- # Use the code from payload if status_code not available
266
- effective_code = status_code or code
266
+ # Determine the effective error code.
267
+ # Prefer payload `code` when present and not success (200),
268
+ # otherwise fall back to HTTP status when it indicates an error.
269
+ effective_code: Optional[int] = None
270
+
271
+ if code is not None and code != 200:
272
+ effective_code = code
273
+ elif status_code is not None and status_code != 200:
274
+ effective_code = status_code
275
+ else:
276
+ effective_code = code if code is not None else status_code
267
277
 
268
278
  kwargs = {
269
279
  "status_code": status_code,
@@ -272,8 +282,9 @@ def raise_for_code(
272
282
  "request_id": request_id,
273
283
  }
274
284
 
275
- # Not collected (often retryable, not billed)
276
- if effective_code in ThordataNotCollectedError.HTTP_STATUS_CODES:
285
+ # Not collected (API payload code 300, often retryable, not billed)
286
+ # Check this FIRST since 300 is in API_CODES, not HTTP_STATUS_CODES
287
+ if effective_code in ThordataNotCollectedError.API_CODES:
277
288
  raise ThordataNotCollectedError(message, **kwargs)
278
289
 
279
290
  # Auth errors
thordata/models.py CHANGED
@@ -795,6 +795,126 @@ class ScraperTaskConfig:
795
795
  return payload
796
796
 
797
797
 
798
+ @dataclass
799
+ class CommonSettings:
800
+ """
801
+ Common settings for YouTube video/audio downloads.
802
+
803
+ Used by /video_builder endpoint as `common_settings` parameter.
804
+ Also known as `spider_universal` in some documentation.
805
+
806
+ Args:
807
+ resolution: Video resolution (360p/480p/720p/1080p/1440p/2160p).
808
+ audio_format: Audio format (opus/mp3).
809
+ bitrate: Audio bitrate (48/64/128/160/256/320 or with Kbps suffix).
810
+ is_subtitles: Whether to download subtitles ("true"/"false").
811
+ subtitles_language: Subtitle language code (e.g., "en", "zh-Hans").
812
+
813
+ Example for video:
814
+ >>> settings = CommonSettings(
815
+ ... resolution="1080p",
816
+ ... is_subtitles="true",
817
+ ... subtitles_language="en"
818
+ ... )
819
+
820
+ Example for audio:
821
+ >>> settings = CommonSettings(
822
+ ... audio_format="mp3",
823
+ ... bitrate="320",
824
+ ... is_subtitles="true",
825
+ ... subtitles_language="en"
826
+ ... )
827
+ """
828
+
829
+ # Video settings
830
+ resolution: Optional[str] = None
831
+
832
+ # Audio settings
833
+ audio_format: Optional[str] = None
834
+ bitrate: Optional[str] = None
835
+
836
+ # Subtitle settings (used by both video and audio)
837
+ is_subtitles: Optional[str] = None
838
+ subtitles_language: Optional[str] = None
839
+
840
+ # Valid values for validation
841
+ VALID_RESOLUTIONS = {"360p", "480p", "720p", "1080p", "1440p", "2160p"}
842
+ VALID_AUDIO_FORMATS = {"opus", "mp3"}
843
+
844
+ def to_dict(self) -> Dict[str, Any]:
845
+ """Convert to dictionary, excluding None values."""
846
+ result = {}
847
+ if self.resolution is not None:
848
+ result["resolution"] = self.resolution
849
+ if self.audio_format is not None:
850
+ result["audio_format"] = self.audio_format
851
+ if self.bitrate is not None:
852
+ result["bitrate"] = self.bitrate
853
+ if self.is_subtitles is not None:
854
+ result["is_subtitles"] = self.is_subtitles
855
+ if self.subtitles_language is not None:
856
+ result["subtitles_language"] = self.subtitles_language
857
+ return result
858
+
859
+ def to_json(self) -> str:
860
+ """Convert to JSON string for form submission."""
861
+ return json.dumps(self.to_dict())
862
+
863
+
864
+ @dataclass
865
+ class VideoTaskConfig:
866
+ """
867
+ Configuration for creating a YouTube video/audio download task.
868
+
869
+ Uses the /video_builder endpoint.
870
+
871
+ Args:
872
+ file_name: Name for the output file. Supports {{TasksID}}, {{VideoID}}.
873
+ spider_id: Spider identifier (e.g., "youtube_video_by-url", "youtube_audio_by-url").
874
+ spider_name: Spider name (typically "youtube.com").
875
+ parameters: Spider-specific parameters (e.g., video URL).
876
+ common_settings: Video/audio settings (resolution, format, subtitles).
877
+ include_errors: Include error details in output.
878
+
879
+ Example:
880
+ >>> config = VideoTaskConfig(
881
+ ... file_name="{{VideoID}}",
882
+ ... spider_id="youtube_video_by-url",
883
+ ... spider_name="youtube.com",
884
+ ... parameters={"url": "https://www.youtube.com/watch?v=xxx"},
885
+ ... common_settings=CommonSettings(
886
+ ... resolution="1080p",
887
+ ... is_subtitles="true",
888
+ ... subtitles_language="en"
889
+ ... )
890
+ ... )
891
+ """
892
+
893
+ file_name: str
894
+ spider_id: str
895
+ spider_name: str
896
+ parameters: Dict[str, Any]
897
+ common_settings: CommonSettings
898
+ include_errors: bool = True
899
+
900
+ def to_payload(self) -> Dict[str, Any]:
901
+ """
902
+ Convert to API request payload.
903
+
904
+ Returns:
905
+ Dictionary ready to be sent to the video_builder API.
906
+ """
907
+ payload: Dict[str, Any] = {
908
+ "file_name": self.file_name,
909
+ "spider_id": self.spider_id,
910
+ "spider_name": self.spider_name,
911
+ "spider_parameters": json.dumps([self.parameters]),
912
+ "spider_errors": "true" if self.include_errors else "false",
913
+ "common_settings": self.common_settings.to_json(),
914
+ }
915
+ return payload
916
+
917
+
798
918
  # =============================================================================
799
919
  # Response Models
800
920
  # =============================================================================
@@ -838,3 +958,177 @@ class TaskStatusResponse:
838
958
  """Check if the task failed."""
839
959
  failure_statuses = {"failed", "error"}
840
960
  return self.status.lower() in failure_statuses
961
+
962
+
963
+ @dataclass
964
+ class UsageStatistics:
965
+ """
966
+ Response model for account usage statistics.
967
+
968
+ Attributes:
969
+ total_usage_traffic: Total traffic used (KB).
970
+ traffic_balance: Remaining traffic balance (KB).
971
+ query_days: Number of days in the query range.
972
+ range_usage_traffic: Traffic used in the specified date range (KB).
973
+ data: Daily usage breakdown.
974
+ """
975
+
976
+ total_usage_traffic: float
977
+ traffic_balance: float
978
+ query_days: int
979
+ range_usage_traffic: float
980
+ data: List[Dict[str, Any]]
981
+
982
+ @classmethod
983
+ def from_dict(cls, data: Dict[str, Any]) -> "UsageStatistics":
984
+ """Create from API response dict."""
985
+ return cls(
986
+ total_usage_traffic=float(data.get("total_usage_traffic", 0)),
987
+ traffic_balance=float(data.get("traffic_balance", 0)),
988
+ query_days=int(data.get("query_days", 0)),
989
+ range_usage_traffic=float(data.get("range_usage_traffic", 0)),
990
+ data=data.get("data", []),
991
+ )
992
+
993
+ def total_usage_gb(self) -> float:
994
+ """Get total usage in GB."""
995
+ return self.total_usage_traffic / (1024 * 1024)
996
+
997
+ def balance_gb(self) -> float:
998
+ """Get balance in GB."""
999
+ return self.traffic_balance / (1024 * 1024)
1000
+
1001
+ def range_usage_gb(self) -> float:
1002
+ """Get range usage in GB."""
1003
+ return self.range_usage_traffic / (1024 * 1024)
1004
+
1005
+
1006
+ @dataclass
1007
+ class ProxyUser:
1008
+ """
1009
+ Proxy user (sub-account) information.
1010
+
1011
+ Attributes:
1012
+ username: User's username.
1013
+ password: User's password.
1014
+ status: User status (True=enabled, False=disabled).
1015
+ traffic_limit: Traffic limit in MB (0 = unlimited).
1016
+ usage_traffic: Traffic used in KB.
1017
+ """
1018
+
1019
+ username: str
1020
+ password: str
1021
+ status: bool
1022
+ traffic_limit: int
1023
+ usage_traffic: float
1024
+
1025
+ @classmethod
1026
+ def from_dict(cls, data: Dict[str, Any]) -> "ProxyUser":
1027
+ """Create from API response dict."""
1028
+ return cls(
1029
+ username=data.get("username", ""),
1030
+ password=data.get("password", ""),
1031
+ status=data.get("status") in (True, "true", 1),
1032
+ traffic_limit=int(data.get("traffic_limit", 0)),
1033
+ usage_traffic=float(data.get("usage_traffic", 0)),
1034
+ )
1035
+
1036
+ def usage_gb(self) -> float:
1037
+ """Get usage in GB."""
1038
+ return self.usage_traffic / (1024 * 1024)
1039
+
1040
+ def limit_gb(self) -> float:
1041
+ """Get limit in GB (0 means unlimited)."""
1042
+ if self.traffic_limit == 0:
1043
+ return 0
1044
+ return self.traffic_limit / 1024
1045
+
1046
+
1047
+ @dataclass
1048
+ class ProxyUserList:
1049
+ """
1050
+ Response model for proxy user list.
1051
+
1052
+ Attributes:
1053
+ limit: Total traffic limit (KB).
1054
+ remaining_limit: Remaining traffic limit (KB).
1055
+ user_count: Number of users.
1056
+ users: List of proxy users.
1057
+ """
1058
+
1059
+ limit: float
1060
+ remaining_limit: float
1061
+ user_count: int
1062
+ users: List[ProxyUser]
1063
+
1064
+ @classmethod
1065
+ def from_dict(cls, data: Dict[str, Any]) -> "ProxyUserList":
1066
+ """Create from API response dict."""
1067
+ user_list = data.get("list", [])
1068
+ users = [ProxyUser.from_dict(u) for u in user_list]
1069
+
1070
+ return cls(
1071
+ limit=float(data.get("limit", 0)),
1072
+ remaining_limit=float(data.get("remaining_limit", 0)),
1073
+ user_count=int(data.get("user_count", len(users))),
1074
+ users=users,
1075
+ )
1076
+
1077
+
1078
+ @dataclass
1079
+ class ProxyServer:
1080
+ """
1081
+ ISP or Datacenter proxy server information.
1082
+
1083
+ Attributes:
1084
+ ip: Proxy server IP address.
1085
+ port: Proxy server port.
1086
+ username: Authentication username.
1087
+ password: Authentication password.
1088
+ expiration_time: Expiration timestamp (Unix timestamp or datetime string).
1089
+ region: Server region (optional).
1090
+ """
1091
+
1092
+ ip: str
1093
+ port: int
1094
+ username: str
1095
+ password: str
1096
+ expiration_time: Optional[Union[int, str]] = None
1097
+ region: Optional[str] = None
1098
+
1099
+ @classmethod
1100
+ def from_dict(cls, data: Dict[str, Any]) -> "ProxyServer":
1101
+ """Create from API response dict."""
1102
+ return cls(
1103
+ ip=data.get("ip", ""),
1104
+ port=int(data.get("port", 0)),
1105
+ username=data.get("username", data.get("user", "")),
1106
+ password=data.get("password", data.get("pwd", "")),
1107
+ expiration_time=data.get("expiration_time", data.get("expireTime")),
1108
+ region=data.get("region"),
1109
+ )
1110
+
1111
+ def to_proxy_url(self, protocol: str = "http") -> str:
1112
+ """
1113
+ Build proxy URL for this server.
1114
+
1115
+ Args:
1116
+ protocol: Proxy protocol (http/https/socks5).
1117
+
1118
+ Returns:
1119
+ Complete proxy URL.
1120
+ """
1121
+ return f"{protocol}://{self.username}:{self.password}@{self.ip}:{self.port}"
1122
+
1123
+ def is_expired(self) -> bool:
1124
+ """Check if proxy has expired (if expiration_time is available)."""
1125
+ if self.expiration_time is None:
1126
+ return False
1127
+
1128
+ import time
1129
+
1130
+ if isinstance(self.expiration_time, int):
1131
+ return time.time() > self.expiration_time
1132
+
1133
+ # String timestamp handling would need datetime parsing
1134
+ return False
thordata/retry.py CHANGED
@@ -64,7 +64,10 @@ class RetryConfig:
64
64
 
65
65
  # Status codes to retry on (5xx server errors + 429 rate limit)
66
66
  retry_on_status_codes: Set[int] = field(
67
- default_factory=lambda: {300, 429, 500, 502, 503, 504}
67
+ default_factory=lambda: {429, 500, 502, 503, 504}
68
+ )
69
+ retry_on_api_codes: Set[int] = field(
70
+ default_factory=lambda: {300} # API response body code
68
71
  )
69
72
 
70
73
  # Exception types to always retry on
@@ -0,0 +1,212 @@
1
+ Metadata-Version: 2.4
2
+ Name: thordata-sdk
3
+ Version: 0.8.0
4
+ Summary: The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network.
5
+ Author-email: Thordata Developer Team <support@thordata.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://www.thordata.com
8
+ Project-URL: Documentation, https://github.com/Thordata/thordata-python-sdk#readme
9
+ Project-URL: Source, https://github.com/Thordata/thordata-python-sdk
10
+ Project-URL: Tracker, https://github.com/Thordata/thordata-python-sdk/issues
11
+ Project-URL: Changelog, https://github.com/Thordata/thordata-python-sdk/blob/main/CHANGELOG.md
12
+ Keywords: web scraping,proxy,residential proxy,datacenter proxy,ai,llm,data-mining,serp,thordata,web scraper,anti-bot bypass
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
16
+ Classifier: Topic :: Internet :: WWW/HTTP
17
+ Classifier: Topic :: Internet :: Proxy Servers
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.9
20
+ Classifier: Programming Language :: Python :: 3.10
21
+ Classifier: Programming Language :: Python :: 3.11
22
+ Classifier: Programming Language :: Python :: 3.12
23
+ Classifier: License :: OSI Approved :: MIT License
24
+ Classifier: Operating System :: OS Independent
25
+ Classifier: Typing :: Typed
26
+ Requires-Python: >=3.9
27
+ Description-Content-Type: text/markdown
28
+ License-File: LICENSE
29
+ Requires-Dist: requests>=2.25.0
30
+ Requires-Dist: aiohttp>=3.9.0
31
+ Provides-Extra: dev
32
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
33
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
34
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
35
+ Requires-Dist: pytest-httpserver>=1.0.0; extra == "dev"
36
+ Requires-Dist: python-dotenv>=1.0.0; extra == "dev"
37
+ Requires-Dist: black>=23.0.0; extra == "dev"
38
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
39
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
40
+ Requires-Dist: types-requests>=2.28.0; extra == "dev"
41
+ Requires-Dist: aioresponses>=0.7.6; extra == "dev"
42
+ Dynamic: license-file
43
+
44
+ # Thordata Python SDK
45
+
46
+ <div align="center">
47
+
48
+ **Official Python client for Thordata's Proxy Network, SERP API, Web Unlocker, and Web Scraper API.**
49
+
50
+ *Async-ready, type-safe, built for AI agents and large-scale data collection.*
51
+
52
+ [![PyPI](https://img.shields.io/pypi/v/thordata-sdk?color=blue)](https://pypi.org/project/thordata-sdk/)
53
+ [![Python](https://img.shields.io/badge/python-3.9+-blue)](https://python.org)
54
+ [![License](https://img.shields.io/badge/license-MIT-green)](LICENSE)
55
+
56
+ [Documentation](https://doc.thordata.com) • [Dashboard](https://www.thordata.com) • [Examples](examples/)
57
+
58
+ </div>
59
+
60
+ ---
61
+
62
+ ## ✨ Features
63
+
64
+ - 🌐 **Proxy Network**: Residential, Mobile, Datacenter, ISP proxies with geo-targeting
65
+ - 🔍 **SERP API**: Google, Bing, Yandex, DuckDuckGo search results
66
+ - 🔓 **Web Unlocker**: Bypass Cloudflare, CAPTCHAs, anti-bot systems
67
+ - 🕷️ **Web Scraper API**: Async task-based scraping (Text & Video/Audio)
68
+ - 📊 **Account Management**: Usage stats, sub-users, IP whitelist
69
+ - ⚡ **Async Support**: Full async/await support with aiohttp
70
+ - 🔄 **Auto Retry**: Configurable retry with exponential backoff
71
+
72
+ ---
73
+
74
+ ## 📦 Installation
75
+
76
+ ```bash
77
+ pip install thordata-sdk
78
+ ```
79
+
80
+ ---
81
+
82
+ ## 🔐 Configuration
83
+
84
+ Set environment variables:
85
+
86
+ ```bash
87
+ # Required for Scraper APIs (SERP, Universal, Tasks)
88
+ export THORDATA_SCRAPER_TOKEN=your_token
89
+
90
+ # Required for Public/Location APIs (Dashboard -> My Account)
91
+ export THORDATA_PUBLIC_TOKEN=your_public_token
92
+ export THORDATA_PUBLIC_KEY=your_public_key
93
+
94
+ # Required for Public API NEW (Dashboard -> Public API NEW)
95
+ # If not set, SDK falls back to PUBLIC_TOKEN/KEY
96
+ export THORDATA_SIGN=your_sign
97
+ export THORDATA_API_KEY=your_api_key
98
+ ```
99
+
100
+ ---
101
+
102
+ ## 🚀 Quick Start
103
+
104
+ ```python
105
+ from thordata import ThordataClient, Engine
106
+
107
+ # Initialize (reads from env vars)
108
+ client = ThordataClient(
109
+ scraper_token="your_token",
110
+ public_token="pub_token",
111
+ public_key="pub_key"
112
+ )
113
+
114
+ # SERP Search
115
+ results = client.serp_search("python tutorial", engine=Engine.GOOGLE)
116
+ print(f"Found {len(results.get('organic', []))} results")
117
+
118
+ # Universal Scrape
119
+ html = client.universal_scrape("https://httpbin.org/html")
120
+ print(html[:100])
121
+ ```
122
+
123
+ ---
124
+
125
+ ## 📖 Feature Guide
126
+
127
+ ### SERP API
128
+
129
+ ```python
130
+ from thordata import SerpRequest
131
+
132
+ # Advanced search
133
+ results = client.serp_search_advanced(SerpRequest(
134
+ query="pizza",
135
+ engine="google_local",
136
+ country="us",
137
+ location="New York",
138
+ num=10
139
+ ))
140
+ ```
141
+
142
+ ### Web Scraper API (Async Tasks)
143
+
144
+ **Create Task:**
145
+ ```python
146
+ task_id = client.create_scraper_task(
147
+ file_name="my_task",
148
+ spider_id="universal",
149
+ spider_name="universal",
150
+ parameters={"url": "https://example.com"}
151
+ )
152
+ ```
153
+
154
+ **Video Download (New):**
155
+ ```python
156
+ from thordata import CommonSettings
157
+
158
+ task_id = client.create_video_task(
159
+ file_name="{{VideoID}}",
160
+ spider_id="youtube_video_by-url",
161
+ spider_name="youtube.com",
162
+ parameters={"url": "https://youtube.com/watch?v=..."},
163
+ common_settings=CommonSettings(resolution="1080p")
164
+ )
165
+ ```
166
+
167
+ **Wait & Download:**
168
+ ```python
169
+ status = client.wait_for_task(task_id)
170
+ if status == "ready":
171
+ url = client.get_task_result(task_id)
172
+ print(url)
173
+ ```
174
+
175
+ ### Account Management
176
+
177
+ ```python
178
+ # Usage Statistics
179
+ stats = client.get_usage_statistics("2024-01-01", "2024-01-31")
180
+ print(f"Balance: {stats.balance_gb():.2f} GB")
181
+
182
+ # Proxy Users
183
+ users = client.list_proxy_users()
184
+ print(f"Sub-users: {users.user_count}")
185
+
186
+ # Whitelist IP
187
+ client.add_whitelist_ip("1.2.3.4")
188
+ ```
189
+
190
+ ### Proxy Network
191
+
192
+ ```python
193
+ from thordata import ProxyConfig
194
+
195
+ # Generate Proxy URL
196
+ proxy_url = client.build_proxy_url(
197
+ username="proxy_user",
198
+ password="proxy_pass",
199
+ country="us",
200
+ city="ny"
201
+ )
202
+
203
+ # Use with requests
204
+ import requests
205
+ requests.get("https://httpbin.org/ip", proxies={"http": proxy_url, "https": proxy_url})
206
+ ```
207
+
208
+ ---
209
+
210
+ ## 📄 License
211
+
212
+ MIT License
@@ -0,0 +1,14 @@
1
+ thordata/__init__.py,sha256=yaIxW1T_nsCeiPE6iIHunjRzPrtbiN0BciveICgL4dM,3195
2
+ thordata/_utils.py,sha256=epF-ewHyk7McdejlhHNAfxhIQ8sN3TlIjUJ9H4HOaUE,5254
3
+ thordata/async_client.py,sha256=tC9y1wmcO6RsXCysBo0a0GNRZR3QQjJlCmEwG5HVukQ,53169
4
+ thordata/client.py,sha256=VN5Jm3er7fdZDfT2G9g4siBSYNo0ZWj4WOi6TAiAZcE,59638
5
+ thordata/demo.py,sha256=zmG4I4cHXnbmQfbr063SeRK7_9IXrfof9QFoGqGTVm8,3806
6
+ thordata/enums.py,sha256=MpZnS9_8sg2vtcFqM6UicB94cKZm5R1t83L3ejNSbLs,8502
7
+ thordata/exceptions.py,sha256=IgMsFuh49cPxU5YofsKP1UhP5A_snhtuN6xD1yZWLiI,10018
8
+ thordata/models.py,sha256=NG4wn1bq4-FC4Aex8vwBOldiHovwg0JzhdtBsI1mL_8,36118
9
+ thordata/retry.py,sha256=nkh17ca2TIEcTc-uNo-xcNdJPuxZ_VGlMbC70X6p-_Q,11518
10
+ thordata_sdk-0.8.0.dist-info/licenses/LICENSE,sha256=bAxpWgQIzb-5jl3nhLdOwOJ_vlbHLtSG7yev2B7vioY,1088
11
+ thordata_sdk-0.8.0.dist-info/METADATA,sha256=IgL554I6mzya9FdbqCxKdvO3r-bywiHJjZi1xdk8W48,5850
12
+ thordata_sdk-0.8.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
+ thordata_sdk-0.8.0.dist-info/top_level.txt,sha256=Z8R_07m0lXCCSb1hapL9_nxMtyO3rf_9wOvq4n9u2Hg,9
14
+ thordata_sdk-0.8.0.dist-info/RECORD,,
thordata/parameters.py DELETED
@@ -1,53 +0,0 @@
1
- # src/thordata/parameters.py
2
-
3
- from typing import Any, Dict
4
-
5
-
6
- def normalize_serp_params(engine: str, query: str, **kwargs) -> Dict[str, Any]:
7
- """
8
- Normalizes parameters across different search engines to ensure a unified API surface.
9
-
10
- Args:
11
- engine (str): The search engine to use (e.g., 'google', 'yandex').
12
- query (str): The search query string.
13
- **kwargs: Additional parameters to pass to the API.
14
-
15
- Returns:
16
- Dict[str, Any]: The constructed payload for the API request.
17
- """
18
- # 1. Base parameters
19
- payload = {
20
- "num": str(kwargs.get("num", 10)), # Default to 10 results
21
- "json": "1", # Force JSON response
22
- "engine": engine,
23
- }
24
-
25
- # 2. Handle Query Parameter Differences (Yandex uses 'text', others use 'q')
26
- if engine == "yandex":
27
- payload["text"] = query
28
- # Set default URL for Yandex if not provided
29
- if "url" not in kwargs:
30
- payload["url"] = "yandex.com"
31
- else:
32
- payload["q"] = query
33
-
34
- # 3. Handle Default URLs for other engines
35
- if "url" not in kwargs:
36
- defaults = {
37
- "google": "google.com",
38
- "bing": "bing.com",
39
- "duckduckgo": "duckduckgo.com",
40
- "baidu": "baidu.com",
41
- }
42
- if engine in defaults:
43
- payload["url"] = defaults[engine]
44
-
45
- # 4. Passthrough for all other user-provided arguments
46
- # This allows support for engine-specific parameters (e.g., tbm, uule, gl)
47
- # without explicitly defining them all.
48
- protected_keys = {"num", "engine", "q", "text"}
49
- for key, value in kwargs.items():
50
- if key not in protected_keys:
51
- payload[key] = value
52
-
53
- return payload