thordata-sdk 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
thordata/enums.py CHANGED
@@ -1,25 +1,289 @@
1
- # src/thordata/enums.py
1
+ """
2
+ Enumerations for the Thordata Python SDK.
2
3
 
3
- from enum import Enum
4
+ This module provides type-safe enumerations for all Thordata API parameters,
5
+ making it easier to discover available options via IDE autocomplete.
6
+ """
7
+
8
+ from enum import Enum, IntEnum
9
+
10
+
11
+ # =============================================================================
12
+ # Continent Enum
13
+ # =============================================================================
14
+
15
+ class Continent(str, Enum):
16
+ """
17
+ Continent codes for geo-targeting.
18
+ """
19
+ AFRICA = "af"
20
+ ANTARCTICA = "an"
21
+ ASIA = "as"
22
+ EUROPE = "eu"
23
+ NORTH_AMERICA = "na"
24
+ OCEANIA = "oc"
25
+ SOUTH_AMERICA = "sa"
26
+
27
+
28
+ # =============================================================================
29
+ # Proxy Host Enum
30
+ # =============================================================================
31
+
32
+ class ProxyHost(str, Enum):
33
+ """
34
+ Available proxy gateway hosts.
35
+ """
36
+ DEFAULT = "pr.thordata.net"
37
+ NORTH_AMERICA = "t.na.thordata.net"
38
+ EUROPE = "t.eu.thordata.net"
39
+ GATE = "gate.thordata.com"
40
+
41
+
42
+ class ProxyPort(IntEnum):
43
+ """
44
+ Available proxy gateway ports.
45
+ """
46
+ DEFAULT = 9999
47
+ MOBILE = 5555
48
+ DATACENTER = 7777
49
+ ISP = 6666
50
+ ALTERNATIVE = 22225
51
+
52
+
53
+ # =============================================================================
54
+ # Search Engine Enums
55
+ # =============================================================================
4
56
 
5
57
  class Engine(str, Enum):
6
58
  """
7
- Supported Search Engines for SERP API.
59
+ Supported search engines for SERP API.
8
60
  """
9
61
  GOOGLE = "google"
10
62
  BING = "bing"
11
63
  YANDEX = "yandex"
12
64
  DUCKDUCKGO = "duckduckgo"
13
65
  BAIDU = "baidu"
66
+ YAHOO = "yahoo"
67
+ NAVER = "naver"
68
+
14
69
 
15
70
  class GoogleSearchType(str, Enum):
16
71
  """
17
- Specific search types for Google Engine.
72
+ Search types specific to Google.
73
+ """
74
+ SEARCH = "search"
75
+ MAPS = "maps"
76
+ SHOPPING = "shopping"
77
+ NEWS = "news"
78
+ IMAGES = "images"
79
+ VIDEOS = "videos"
80
+ SCHOLAR = "scholar"
81
+ PATENTS = "patents"
82
+ JOBS = "jobs"
83
+ FLIGHTS = "flights"
84
+ FINANCE = "finance"
85
+
86
+
87
+ class BingSearchType(str, Enum):
88
+ """
89
+ Search types specific to Bing.
90
+ """
91
+ SEARCH = "search"
92
+ IMAGES = "images"
93
+ VIDEOS = "videos"
94
+ NEWS = "news"
95
+ MAPS = "maps"
96
+
97
+
98
+ class Device(str, Enum):
99
+ """
100
+ Device types for SERP API.
101
+ """
102
+ DESKTOP = "desktop"
103
+ MOBILE = "mobile"
104
+ TABLET = "tablet"
105
+
106
+
107
+ class TimeRange(str, Enum):
108
+ """
109
+ Time range filters for search results.
110
+ """
111
+ HOUR = "hour"
112
+ DAY = "day"
113
+ WEEK = "week"
114
+ MONTH = "month"
115
+ YEAR = "year"
116
+
117
+
118
+ # =============================================================================
119
+ # Proxy Enums
120
+ # =============================================================================
121
+
122
+ class ProxyType(IntEnum):
123
+ """
124
+ Types of proxy networks available.
125
+ """
126
+ RESIDENTIAL = 1
127
+ UNLIMITED = 2
128
+ DATACENTER = 3
129
+ ISP = 4
130
+ MOBILE = 5
131
+
132
+
133
+ class SessionType(str, Enum):
134
+ """
135
+ Proxy session types for connection persistence.
136
+ """
137
+ ROTATING = "rotating"
138
+ STICKY = "sticky"
139
+
140
+
141
+ # =============================================================================
142
+ # Output Format Enums
143
+ # =============================================================================
144
+
145
+ class OutputFormat(str, Enum):
146
+ """
147
+ Output formats for Universal Scraping API.
148
+ """
149
+ HTML = "html"
150
+ PNG = "png"
151
+ PDF = "pdf"
152
+ MARKDOWN = "markdown"
153
+ TEXT = "text"
154
+
155
+
156
+ class DataFormat(str, Enum):
157
+ """
158
+ Data formats for task result download.
159
+ """
160
+ JSON = "json"
161
+ CSV = "csv"
162
+ XLSX = "xlsx"
163
+
164
+
165
+ # =============================================================================
166
+ # Task Status Enums
167
+ # =============================================================================
168
+
169
+ class TaskStatus(str, Enum):
170
+ """
171
+ Possible statuses for async scraping tasks.
172
+ """
173
+ PENDING = "pending"
174
+ RUNNING = "running"
175
+ READY = "ready"
176
+ SUCCESS = "success"
177
+ FINISHED = "finished"
178
+ FAILED = "failed"
179
+ ERROR = "error"
180
+ CANCELLED = "cancelled"
181
+ UNKNOWN = "unknown"
182
+
183
+ @classmethod
184
+ def is_terminal(cls, status: "TaskStatus") -> bool:
185
+ """Check if a status is terminal (no more updates expected)."""
186
+ return status in {
187
+ cls.READY, cls.SUCCESS, cls.FINISHED,
188
+ cls.FAILED, cls.ERROR, cls.CANCELLED
189
+ }
190
+
191
+ @classmethod
192
+ def is_success(cls, status: "TaskStatus") -> bool:
193
+ """Check if a status indicates success."""
194
+ return status in {cls.READY, cls.SUCCESS, cls.FINISHED}
195
+
196
+ @classmethod
197
+ def is_failure(cls, status: "TaskStatus") -> bool:
198
+ """Check if a status indicates failure."""
199
+ return status in {cls.FAILED, cls.ERROR}
200
+
201
+
202
+ # =============================================================================
203
+ # Country Enum (常用国家)
204
+ # =============================================================================
205
+
206
+ class Country(str, Enum):
207
+ """
208
+ Common country codes for geo-targeting.
209
+ """
210
+ # North America
211
+ US = "us"
212
+ CA = "ca"
213
+ MX = "mx"
214
+
215
+ # Europe
216
+ GB = "gb"
217
+ DE = "de"
218
+ FR = "fr"
219
+ ES = "es"
220
+ IT = "it"
221
+ NL = "nl"
222
+ PL = "pl"
223
+ RU = "ru"
224
+ UA = "ua"
225
+ SE = "se"
226
+ NO = "no"
227
+ DK = "dk"
228
+ FI = "fi"
229
+ CH = "ch"
230
+ AT = "at"
231
+ BE = "be"
232
+ PT = "pt"
233
+ IE = "ie"
234
+ CZ = "cz"
235
+ GR = "gr"
236
+
237
+ # Asia Pacific
238
+ CN = "cn"
239
+ JP = "jp"
240
+ KR = "kr"
241
+ IN = "in"
242
+ AU = "au"
243
+ NZ = "nz"
244
+ SG = "sg"
245
+ HK = "hk"
246
+ TW = "tw"
247
+ TH = "th"
248
+ VN = "vn"
249
+ ID = "id"
250
+ MY = "my"
251
+ PH = "ph"
252
+ PK = "pk"
253
+ BD = "bd"
254
+
255
+ # South America
256
+ BR = "br"
257
+ AR = "ar"
258
+ CL = "cl"
259
+ CO = "co"
260
+ PE = "pe"
261
+ VE = "ve"
262
+
263
+ # Middle East & Africa
264
+ AE = "ae"
265
+ SA = "sa"
266
+ IL = "il"
267
+ TR = "tr"
268
+ ZA = "za"
269
+ EG = "eg"
270
+ NG = "ng"
271
+ KE = "ke"
272
+ MA = "ma"
273
+
274
+
275
+ # =============================================================================
276
+ # Helper Functions
277
+ # =============================================================================
278
+
279
+ def normalize_enum_value(value, enum_class: type) -> str:
280
+ """
281
+ Safely convert an enum or string to its string value.
18
282
  """
19
- SEARCH = "search" # Default web search
20
- MAPS = "maps" # Google Maps
21
- SHOPPING = "shopping" # Google Shopping
22
- NEWS = "news" # Google News
23
- IMAGES = "images" # Google Images
24
- VIDEOS = "videos" # Google Videos
25
- # Users can pass other strings manually if needed
283
+ if isinstance(value, enum_class):
284
+ return value.value
285
+ if isinstance(value, str):
286
+ return value.lower()
287
+ raise TypeError(
288
+ f"Expected {enum_class.__name__} or str, got {type(value).__name__}"
289
+ )
thordata/exceptions.py ADDED
@@ -0,0 +1,315 @@
1
+ """
2
+ Custom exception types for the Thordata Python SDK.
3
+
4
+ Exception Hierarchy:
5
+ ThordataError (base)
6
+ ├── ThordataConfigError - Configuration/initialization issues
7
+ ├── ThordataNetworkError - Network connectivity issues (retryable)
8
+ │ └── ThordataTimeoutError - Request timeout (retryable)
9
+ └── ThordataAPIError - API returned an error
10
+ ├── ThordataAuthError - 401/403 authentication issues
11
+ ├── ThordataRateLimitError - 429/402 rate limit/quota issues
12
+ ├── ThordataServerError - 5xx server errors (retryable)
13
+ └── ThordataValidationError - 400 bad request / validation errors
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ from typing import Any, Optional, Set
19
+
20
+
21
+ # =============================================================================
22
+ # Base Exception
23
+ # =============================================================================
24
+
25
+ class ThordataError(Exception):
26
+ """Base error for all Thordata SDK issues."""
27
+
28
+ def __init__(self, message: str) -> None:
29
+ super().__init__(message)
30
+ self.message = message
31
+
32
+
33
+ # =============================================================================
34
+ # Configuration Errors
35
+ # =============================================================================
36
+
37
+ class ThordataConfigError(ThordataError):
38
+ """
39
+ Raised when the SDK is misconfigured.
40
+
41
+ Examples:
42
+ - Missing required tokens
43
+ - Invalid parameter combinations
44
+ """
45
+ pass
46
+
47
+
48
+ # =============================================================================
49
+ # Network Errors (Usually Retryable)
50
+ # =============================================================================
51
+
52
+ class ThordataNetworkError(ThordataError):
53
+ """
54
+ Raised when a network-level error occurs.
55
+
56
+ This is typically retryable (DNS failures, connection refused, etc.)
57
+ """
58
+
59
+ def __init__(
60
+ self,
61
+ message: str,
62
+ *,
63
+ original_error: Optional[Exception] = None,
64
+ ) -> None:
65
+ super().__init__(message)
66
+ self.original_error = original_error
67
+
68
+
69
+ class ThordataTimeoutError(ThordataNetworkError):
70
+ """
71
+ Raised when a request times out.
72
+
73
+ This is typically retryable.
74
+ """
75
+ pass
76
+
77
+
78
+ # =============================================================================
79
+ # API Errors
80
+ # =============================================================================
81
+
82
+ class ThordataAPIError(ThordataError):
83
+ """
84
+ Generic API error raised when the backend returns a non-success code
85
+ or an unexpected response payload.
86
+
87
+ Attributes:
88
+ message: Human-readable error message.
89
+ status_code: HTTP status code from the response (e.g., 401, 500).
90
+ code: Application-level code from the Thordata API JSON response.
91
+ payload: Raw payload (dict/str) returned by the API.
92
+ request_id: Optional request ID for debugging with support.
93
+ """
94
+
95
+ # HTTP status codes that indicate this error type
96
+ HTTP_STATUS_CODES: Set[int] = set()
97
+
98
+ def __init__(
99
+ self,
100
+ message: str,
101
+ *,
102
+ status_code: Optional[int] = None,
103
+ code: Optional[int] = None,
104
+ payload: Any = None,
105
+ request_id: Optional[str] = None,
106
+ ) -> None:
107
+ super().__init__(message)
108
+ self.status_code = status_code
109
+ self.code = code
110
+ self.payload = payload
111
+ self.request_id = request_id
112
+
113
+ def __repr__(self) -> str:
114
+ return (
115
+ f"{self.__class__.__name__}("
116
+ f"message={self.message!r}, "
117
+ f"status_code={self.status_code}, "
118
+ f"code={self.code}, "
119
+ f"request_id={self.request_id!r})"
120
+ )
121
+
122
+ @property
123
+ def is_retryable(self) -> bool:
124
+ """Whether this error is typically safe to retry."""
125
+ return False
126
+
127
+
128
+ class ThordataAuthError(ThordataAPIError):
129
+ """
130
+ Authentication or authorization failures.
131
+
132
+ HTTP Status: 401, 403
133
+ Common causes:
134
+ - Invalid or expired token
135
+ - Insufficient permissions
136
+ - IP not whitelisted
137
+ """
138
+
139
+ HTTP_STATUS_CODES = {401, 403}
140
+
141
+ @property
142
+ def is_retryable(self) -> bool:
143
+ return False # Auth errors shouldn't be retried
144
+
145
+
146
+ class ThordataRateLimitError(ThordataAPIError):
147
+ """
148
+ Rate limiting or quota/balance issues.
149
+
150
+ HTTP Status: 429, 402
151
+ Common causes:
152
+ - Too many requests per second
153
+ - Insufficient account balance
154
+ - Quota exceeded
155
+
156
+ Attributes:
157
+ retry_after: Suggested seconds to wait before retrying (if provided).
158
+ """
159
+
160
+ HTTP_STATUS_CODES = {429, 402}
161
+
162
+ def __init__(
163
+ self,
164
+ message: str,
165
+ *,
166
+ retry_after: Optional[int] = None,
167
+ **kwargs: Any,
168
+ ) -> None:
169
+ super().__init__(message, **kwargs)
170
+ self.retry_after = retry_after
171
+
172
+ @property
173
+ def is_retryable(self) -> bool:
174
+ # Rate limits are retryable after waiting
175
+ return True
176
+
177
+
178
+ class ThordataServerError(ThordataAPIError):
179
+ """
180
+ Server-side errors (5xx).
181
+
182
+ HTTP Status: 500, 502, 503, 504
183
+ These are typically transient and safe to retry.
184
+ """
185
+
186
+ HTTP_STATUS_CODES = {500, 502, 503, 504}
187
+
188
+ @property
189
+ def is_retryable(self) -> bool:
190
+ return True
191
+
192
+
193
+ class ThordataValidationError(ThordataAPIError):
194
+ """
195
+ Request validation errors.
196
+
197
+ HTTP Status: 400, 422
198
+ Common causes:
199
+ - Invalid parameters
200
+ - Missing required fields
201
+ - Malformed request body
202
+ """
203
+
204
+ HTTP_STATUS_CODES = {400, 422}
205
+
206
+ @property
207
+ def is_retryable(self) -> bool:
208
+ return False # Bad requests shouldn't be retried
209
+
210
+
211
+ # =============================================================================
212
+ # Exception Factory
213
+ # =============================================================================
214
+
215
+ def raise_for_code(
216
+ message: str,
217
+ *,
218
+ status_code: Optional[int] = None,
219
+ code: Optional[int] = None,
220
+ payload: Any = None,
221
+ request_id: Optional[str] = None,
222
+ ) -> None:
223
+ """
224
+ Factory function to raise the appropriate exception based on status/code.
225
+
226
+ This centralizes the error-mapping logic that was previously duplicated
227
+ across multiple methods.
228
+
229
+ Args:
230
+ message: Human-readable error message.
231
+ status_code: HTTP status code (if available).
232
+ code: Application-level code from API response.
233
+ payload: Raw API response payload.
234
+ request_id: Optional request ID for debugging.
235
+
236
+ Raises:
237
+ ThordataAuthError: For 401/403 codes.
238
+ ThordataRateLimitError: For 429/402 codes.
239
+ ThordataServerError: For 5xx codes.
240
+ ThordataValidationError: For 400/422 codes.
241
+ ThordataAPIError: For all other error codes.
242
+ """
243
+ # Use the code from payload if status_code not available
244
+ effective_code = status_code or code
245
+
246
+ kwargs = {
247
+ "status_code": status_code,
248
+ "code": code,
249
+ "payload": payload,
250
+ "request_id": request_id,
251
+ }
252
+
253
+ # Auth errors
254
+ if effective_code in ThordataAuthError.HTTP_STATUS_CODES:
255
+ raise ThordataAuthError(message, **kwargs)
256
+
257
+ # Rate limit errors
258
+ if effective_code in ThordataRateLimitError.HTTP_STATUS_CODES:
259
+ # Try to extract retry_after from payload
260
+ retry_after = None
261
+ if isinstance(payload, dict):
262
+ retry_after = payload.get("retry_after")
263
+ raise ThordataRateLimitError(message, retry_after=retry_after, **kwargs)
264
+
265
+ # Server errors
266
+ if effective_code is not None and 500 <= effective_code < 600:
267
+ raise ThordataServerError(message, **kwargs)
268
+
269
+ # Validation errors
270
+ if effective_code in ThordataValidationError.HTTP_STATUS_CODES:
271
+ raise ThordataValidationError(message, **kwargs)
272
+
273
+ # Generic API error
274
+ raise ThordataAPIError(message, **kwargs)
275
+
276
+
277
+ # =============================================================================
278
+ # Retry Helper
279
+ # =============================================================================
280
+
281
+ def is_retryable_exception(exc: Exception) -> bool:
282
+ """
283
+ Check if an exception is safe to retry.
284
+
285
+ Args:
286
+ exc: The exception to check.
287
+
288
+ Returns:
289
+ True if the exception is typically safe to retry.
290
+ """
291
+ # Network errors are retryable
292
+ if isinstance(exc, ThordataNetworkError):
293
+ return True
294
+
295
+ # Check API errors with is_retryable property
296
+ if isinstance(exc, ThordataAPIError):
297
+ return exc.is_retryable
298
+
299
+ # requests/aiohttp specific exceptions
300
+ # (imported dynamically to avoid hard dependency)
301
+ try:
302
+ import requests
303
+ if isinstance(exc, (requests.Timeout, requests.ConnectionError)):
304
+ return True
305
+ except ImportError:
306
+ pass
307
+
308
+ try:
309
+ import aiohttp
310
+ if isinstance(exc, (aiohttp.ClientError, aiohttp.ServerTimeoutError)):
311
+ return True
312
+ except ImportError:
313
+ pass
314
+
315
+ return False