thordata-sdk 0.3.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
thordata/enums.py CHANGED
@@ -1,25 +1,315 @@
1
- # src/thordata/enums.py
1
+ """
2
+ Enumerations for the Thordata Python SDK.
3
+
4
+ This module provides type-safe enumerations for all Thordata API parameters,
5
+ making it easier to discover available options via IDE autocomplete.
6
+ """
7
+
8
+ from enum import Enum, IntEnum
9
+
10
+ # =============================================================================
11
+ # Continent Enum
12
+ # =============================================================================
13
+
14
+
15
+ class Continent(str, Enum):
16
+ """
17
+ Continent codes for geo-targeting.
18
+ """
19
+
20
+ AFRICA = "af"
21
+ ANTARCTICA = "an"
22
+ ASIA = "as"
23
+ EUROPE = "eu"
24
+ NORTH_AMERICA = "na"
25
+ OCEANIA = "oc"
26
+ SOUTH_AMERICA = "sa"
27
+
28
+
29
+ # =============================================================================
30
+ # Proxy Host Enum
31
+ # =============================================================================
32
+
33
+
34
+ class ProxyHost(str, Enum):
35
+ """
36
+ Available proxy gateway hosts.
37
+ """
38
+
39
+ DEFAULT = "pr.thordata.net"
40
+ NORTH_AMERICA = "t.na.thordata.net"
41
+ EUROPE = "t.eu.thordata.net"
42
+ GATE = "gate.thordata.com"
43
+
44
+
45
+ class ProxyPort(IntEnum):
46
+ """
47
+ Available proxy gateway ports.
48
+ """
49
+
50
+ DEFAULT = 9999
51
+ MOBILE = 5555
52
+ DATACENTER = 7777
53
+ ISP = 6666
54
+ ALTERNATIVE = 22225
55
+
56
+
57
+ # =============================================================================
58
+ # Search Engine Enums
59
+ # =============================================================================
2
60
 
3
- from enum import Enum
4
61
 
5
62
  class Engine(str, Enum):
6
63
  """
7
- Supported Search Engines for SERP API.
64
+ Supported search engines for SERP API.
8
65
  """
66
+
9
67
  GOOGLE = "google"
10
68
  BING = "bing"
11
69
  YANDEX = "yandex"
12
70
  DUCKDUCKGO = "duckduckgo"
13
71
  BAIDU = "baidu"
72
+ YAHOO = "yahoo"
73
+ NAVER = "naver"
74
+
14
75
 
15
76
  class GoogleSearchType(str, Enum):
16
77
  """
17
- Specific search types for Google Engine.
78
+ Search types specific to Google.
79
+ """
80
+
81
+ SEARCH = "search"
82
+ MAPS = "maps"
83
+ SHOPPING = "shopping"
84
+ NEWS = "news"
85
+ IMAGES = "images"
86
+ VIDEOS = "videos"
87
+ SCHOLAR = "scholar"
88
+ PATENTS = "patents"
89
+ JOBS = "jobs"
90
+ FLIGHTS = "flights"
91
+ FINANCE = "finance"
92
+
93
+
94
+ class BingSearchType(str, Enum):
95
+ """
96
+ Search types specific to Bing.
97
+ """
98
+
99
+ SEARCH = "search"
100
+ IMAGES = "images"
101
+ VIDEOS = "videos"
102
+ NEWS = "news"
103
+ MAPS = "maps"
104
+
105
+
106
+ class Device(str, Enum):
107
+ """
108
+ Device types for SERP API.
109
+ """
110
+
111
+ DESKTOP = "desktop"
112
+ MOBILE = "mobile"
113
+ TABLET = "tablet"
114
+
115
+
116
+ class TimeRange(str, Enum):
117
+ """
118
+ Time range filters for search results.
119
+ """
120
+
121
+ HOUR = "hour"
122
+ DAY = "day"
123
+ WEEK = "week"
124
+ MONTH = "month"
125
+ YEAR = "year"
126
+
127
+
128
+ # =============================================================================
129
+ # Proxy Enums
130
+ # =============================================================================
131
+
132
+
133
+ class ProxyType(IntEnum):
134
+ """
135
+ Types of proxy networks available.
136
+ """
137
+
138
+ RESIDENTIAL = 1
139
+ UNLIMITED = 2
140
+ DATACENTER = 3
141
+ ISP = 4
142
+ MOBILE = 5
143
+
144
+
145
+ class SessionType(str, Enum):
146
+ """
147
+ Proxy session types for connection persistence.
148
+ """
149
+
150
+ ROTATING = "rotating"
151
+ STICKY = "sticky"
152
+
153
+
154
+ # =============================================================================
155
+ # Output Format Enums
156
+ # =============================================================================
157
+
158
+
159
+ class OutputFormat(str, Enum):
160
+ """
161
+ Output formats for Universal Scraping API.
162
+ """
163
+
164
+ HTML = "html"
165
+ PNG = "png"
166
+ PDF = "pdf"
167
+ MARKDOWN = "markdown"
168
+ TEXT = "text"
169
+
170
+
171
+ class DataFormat(str, Enum):
172
+ """
173
+ Data formats for task result download.
174
+ """
175
+
176
+ JSON = "json"
177
+ CSV = "csv"
178
+ XLSX = "xlsx"
179
+
180
+
181
+ # =============================================================================
182
+ # Task Status Enums
183
+ # =============================================================================
184
+
185
+
186
+ class TaskStatus(str, Enum):
187
+ """
188
+ Possible statuses for async scraping tasks.
189
+ """
190
+
191
+ PENDING = "pending"
192
+ RUNNING = "running"
193
+ READY = "ready"
194
+ SUCCESS = "success"
195
+ FINISHED = "finished"
196
+ FAILED = "failed"
197
+ ERROR = "error"
198
+ CANCELLED = "cancelled"
199
+ UNKNOWN = "unknown"
200
+
201
+ @classmethod
202
+ def is_terminal(cls, status: "TaskStatus") -> bool:
203
+ """Check if a status is terminal (no more updates expected)."""
204
+ return status in {
205
+ cls.READY,
206
+ cls.SUCCESS,
207
+ cls.FINISHED,
208
+ cls.FAILED,
209
+ cls.ERROR,
210
+ cls.CANCELLED,
211
+ }
212
+
213
+ @classmethod
214
+ def is_success(cls, status: "TaskStatus") -> bool:
215
+ """Check if a status indicates success."""
216
+ return status in {cls.READY, cls.SUCCESS, cls.FINISHED}
217
+
218
+ @classmethod
219
+ def is_failure(cls, status: "TaskStatus") -> bool:
220
+ """Check if a status indicates failure."""
221
+ return status in {cls.FAILED, cls.ERROR}
222
+
223
+
224
+ # =============================================================================
225
+ # Country Enum (常用国家)
226
+ # =============================================================================
227
+
228
+
229
+ class Country(str, Enum):
230
+ """
231
+ Common country codes for geo-targeting.
232
+ """
233
+
234
+ # North America
235
+ US = "us"
236
+ CA = "ca"
237
+ MX = "mx"
238
+
239
+ # Europe
240
+ GB = "gb"
241
+ DE = "de"
242
+ FR = "fr"
243
+ ES = "es"
244
+ IT = "it"
245
+ NL = "nl"
246
+ PL = "pl"
247
+ RU = "ru"
248
+ UA = "ua"
249
+ SE = "se"
250
+ NO = "no"
251
+ DK = "dk"
252
+ FI = "fi"
253
+ CH = "ch"
254
+ AT = "at"
255
+ BE = "be"
256
+ PT = "pt"
257
+ IE = "ie"
258
+ CZ = "cz"
259
+ GR = "gr"
260
+
261
+ # Asia Pacific
262
+ CN = "cn"
263
+ JP = "jp"
264
+ KR = "kr"
265
+ IN = "in"
266
+ AU = "au"
267
+ NZ = "nz"
268
+ SG = "sg"
269
+ HK = "hk"
270
+ TW = "tw"
271
+ TH = "th"
272
+ VN = "vn"
273
+ ID = "id"
274
+ MY = "my"
275
+ PH = "ph"
276
+ PK = "pk"
277
+ BD = "bd"
278
+
279
+ # South America
280
+ BR = "br"
281
+ AR = "ar"
282
+ CL = "cl"
283
+ CO = "co"
284
+ PE = "pe"
285
+ VE = "ve"
286
+
287
+ # Middle East & Africa
288
+ AE = "ae"
289
+ SA = "sa"
290
+ IL = "il"
291
+ TR = "tr"
292
+ ZA = "za"
293
+ EG = "eg"
294
+ NG = "ng"
295
+ KE = "ke"
296
+ MA = "ma"
297
+
298
+
299
+ # =============================================================================
300
+ # Helper Functions
301
+ # =============================================================================
302
+
303
+
304
+ def normalize_enum_value(value: object, enum_class: type) -> str:
305
+ """
306
+ Safely convert an enum or string to its string value.
18
307
  """
19
- SEARCH = "search" # Default web search
20
- MAPS = "maps" # Google Maps
21
- SHOPPING = "shopping" # Google Shopping
22
- NEWS = "news" # Google News
23
- IMAGES = "images" # Google Images
24
- VIDEOS = "videos" # Google Videos
25
- # Users can pass other strings manually if needed
308
+ if isinstance(value, enum_class):
309
+ # value is an enum member, get its .value
310
+ return str(getattr(value, "value", value)).lower()
311
+ if isinstance(value, str):
312
+ return value.lower()
313
+ raise TypeError(
314
+ f"Expected {enum_class.__name__} or str, got {type(value).__name__}"
315
+ )
thordata/exceptions.py ADDED
@@ -0,0 +1,344 @@
1
+ """
2
+ Custom exception types for the Thordata Python SDK.
3
+
4
+ Exception Hierarchy:
5
+ ThordataError (base)
6
+ ├── ThordataConfigError - Configuration/initialization issues
7
+ ├── ThordataNetworkError - Network connectivity issues (retryable)
8
+ │ └── ThordataTimeoutError - Request timeout (retryable)
9
+ └── ThordataAPIError - API returned an error
10
+ ├── ThordataAuthError - 401/403 authentication issues
11
+ ├── ThordataRateLimitError - 429/402 rate limit/quota issues
12
+ ├── ThordataServerError - 5xx server errors (retryable)
13
+ └── ThordataValidationError - 400 bad request / validation errors
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ from typing import Any, Optional, Set
19
+
20
+ # =============================================================================
21
+ # Base Exception
22
+ # =============================================================================
23
+
24
+
25
+ class ThordataError(Exception):
26
+ """Base error for all Thordata SDK issues."""
27
+
28
+ def __init__(self, message: str) -> None:
29
+ super().__init__(message)
30
+ self.message = message
31
+
32
+
33
+ # =============================================================================
34
+ # Configuration Errors
35
+ # =============================================================================
36
+
37
+
38
+ class ThordataConfigError(ThordataError):
39
+ """
40
+ Raised when the SDK is misconfigured.
41
+
42
+ Examples:
43
+ - Missing required tokens
44
+ - Invalid parameter combinations
45
+ """
46
+
47
+ pass
48
+
49
+
50
+ # =============================================================================
51
+ # Network Errors (Usually Retryable)
52
+ # =============================================================================
53
+
54
+
55
+ class ThordataNetworkError(ThordataError):
56
+ """
57
+ Raised when a network-level error occurs.
58
+
59
+ This is typically retryable (DNS failures, connection refused, etc.)
60
+ """
61
+
62
+ def __init__(
63
+ self,
64
+ message: str,
65
+ *,
66
+ original_error: Optional[Exception] = None,
67
+ ) -> None:
68
+ super().__init__(message)
69
+ self.original_error = original_error
70
+
71
+
72
+ class ThordataTimeoutError(ThordataNetworkError):
73
+ """
74
+ Raised when a request times out.
75
+
76
+ This is typically retryable.
77
+ """
78
+
79
+ pass
80
+
81
+
82
+ # =============================================================================
83
+ # API Errors
84
+ # =============================================================================
85
+
86
+
87
+ class ThordataAPIError(ThordataError):
88
+ """
89
+ Generic API error raised when the backend returns a non-success code
90
+ or an unexpected response payload.
91
+
92
+ Attributes:
93
+ message: Human-readable error message.
94
+ status_code: HTTP status code from the response (e.g., 401, 500).
95
+ code: Application-level code from the Thordata API JSON response.
96
+ payload: Raw payload (dict/str) returned by the API.
97
+ request_id: Optional request ID for debugging with support.
98
+ """
99
+
100
+ # HTTP status codes that indicate this error type
101
+ HTTP_STATUS_CODES: Set[int] = set()
102
+
103
+ def __init__(
104
+ self,
105
+ message: str,
106
+ *,
107
+ status_code: Optional[int] = None,
108
+ code: Optional[int] = None,
109
+ payload: Any = None,
110
+ request_id: Optional[str] = None,
111
+ ) -> None:
112
+ super().__init__(message)
113
+ self.status_code = status_code
114
+ self.code = code
115
+ self.payload = payload
116
+ self.request_id = request_id
117
+
118
+ def __repr__(self) -> str:
119
+ return (
120
+ f"{self.__class__.__name__}("
121
+ f"message={self.message!r}, "
122
+ f"status_code={self.status_code}, "
123
+ f"code={self.code}, "
124
+ f"request_id={self.request_id!r})"
125
+ )
126
+
127
+ @property
128
+ def is_retryable(self) -> bool:
129
+ """Whether this error is typically safe to retry."""
130
+ return False
131
+
132
+
133
+ class ThordataAuthError(ThordataAPIError):
134
+ """
135
+ Authentication or authorization failures.
136
+
137
+ HTTP Status: 401, 403
138
+ Common causes:
139
+ - Invalid or expired token
140
+ - Insufficient permissions
141
+ - IP not whitelisted
142
+ """
143
+
144
+ HTTP_STATUS_CODES = {401, 403}
145
+
146
+ @property
147
+ def is_retryable(self) -> bool:
148
+ return False # Auth errors shouldn't be retried
149
+
150
+
151
+ class ThordataRateLimitError(ThordataAPIError):
152
+ """
153
+ Rate limiting or quota/balance issues.
154
+
155
+ HTTP Status: 429, 402
156
+ Common causes:
157
+ - Too many requests per second
158
+ - Insufficient account balance
159
+ - Quota exceeded
160
+
161
+ Attributes:
162
+ retry_after: Suggested seconds to wait before retrying (if provided).
163
+ """
164
+
165
+ HTTP_STATUS_CODES = {429, 402}
166
+
167
+ def __init__(
168
+ self,
169
+ message: str,
170
+ *,
171
+ retry_after: Optional[int] = None,
172
+ **kwargs: Any,
173
+ ) -> None:
174
+ super().__init__(message, **kwargs)
175
+ self.retry_after = retry_after
176
+
177
+ @property
178
+ def is_retryable(self) -> bool:
179
+ # Rate limits are retryable after waiting
180
+ return True
181
+
182
+
183
+ class ThordataServerError(ThordataAPIError):
184
+ """
185
+ Server-side errors (5xx).
186
+
187
+ HTTP Status: 500, 502, 503, 504
188
+ These are typically transient and safe to retry.
189
+ """
190
+
191
+ HTTP_STATUS_CODES = {500, 502, 503, 504}
192
+
193
+ @property
194
+ def is_retryable(self) -> bool:
195
+ return True
196
+
197
+
198
+ class ThordataValidationError(ThordataAPIError):
199
+ """
200
+ Request validation errors.
201
+
202
+ HTTP Status: 400, 422
203
+ Common causes:
204
+ - Invalid parameters
205
+ - Missing required fields
206
+ - Malformed request body
207
+ """
208
+
209
+ HTTP_STATUS_CODES = {400, 422}
210
+
211
+ @property
212
+ def is_retryable(self) -> bool:
213
+ return False # Bad requests shouldn't be retried
214
+
215
+
216
+ class ThordataNotCollectedError(ThordataAPIError):
217
+ """
218
+ The request was accepted but no valid data could be collected/parsed.
219
+
220
+ API Code: 300
221
+ Billing: Not billed (per Thordata billing rules).
222
+ This error is often transient and typically safe to retry.
223
+ """
224
+
225
+ HTTP_STATUS_CODES = {300}
226
+
227
+ @property
228
+ def is_retryable(self) -> bool:
229
+ return True
230
+
231
+
232
+ # =============================================================================
233
+ # Exception Factory
234
+ # =============================================================================
235
+
236
+
237
+ def raise_for_code(
238
+ message: str,
239
+ *,
240
+ status_code: Optional[int] = None,
241
+ code: Optional[int] = None,
242
+ payload: Any = None,
243
+ request_id: Optional[str] = None,
244
+ ) -> None:
245
+ """
246
+ Factory function to raise the appropriate exception based on status/code.
247
+
248
+ This centralizes the error-mapping logic that was previously duplicated
249
+ across multiple methods.
250
+
251
+ Args:
252
+ message: Human-readable error message.
253
+ status_code: HTTP status code (if available).
254
+ code: Application-level code from API response.
255
+ payload: Raw API response payload.
256
+ request_id: Optional request ID for debugging.
257
+
258
+ Raises:
259
+ ThordataAuthError: For 401/403 codes.
260
+ ThordataRateLimitError: For 429/402 codes.
261
+ ThordataServerError: For 5xx codes.
262
+ ThordataValidationError: For 400/422 codes.
263
+ ThordataAPIError: For all other error codes.
264
+ """
265
+ # Use the code from payload if status_code not available
266
+ effective_code = status_code or code
267
+
268
+ kwargs = {
269
+ "status_code": status_code,
270
+ "code": code,
271
+ "payload": payload,
272
+ "request_id": request_id,
273
+ }
274
+
275
+ # Not collected (often retryable, not billed)
276
+ if effective_code in ThordataNotCollectedError.HTTP_STATUS_CODES:
277
+ raise ThordataNotCollectedError(message, **kwargs)
278
+
279
+ # Auth errors
280
+ if effective_code in ThordataAuthError.HTTP_STATUS_CODES:
281
+ raise ThordataAuthError(message, **kwargs)
282
+
283
+ # Rate limit errors
284
+ if effective_code in ThordataRateLimitError.HTTP_STATUS_CODES:
285
+ # Try to extract retry_after from payload
286
+ retry_after = None
287
+ if isinstance(payload, dict):
288
+ retry_after = payload.get("retry_after")
289
+ raise ThordataRateLimitError(message, retry_after=retry_after, **kwargs)
290
+
291
+ # Server errors
292
+ if effective_code is not None and 500 <= effective_code < 600:
293
+ raise ThordataServerError(message, **kwargs)
294
+
295
+ # Validation errors
296
+ if effective_code in ThordataValidationError.HTTP_STATUS_CODES:
297
+ raise ThordataValidationError(message, **kwargs)
298
+
299
+ # Generic API error
300
+ raise ThordataAPIError(message, **kwargs)
301
+
302
+
303
+ # =============================================================================
304
+ # Retry Helper
305
+ # =============================================================================
306
+
307
+
308
+ def is_retryable_exception(exc: Exception) -> bool:
309
+ """
310
+ Check if an exception is safe to retry.
311
+
312
+ Args:
313
+ exc: The exception to check.
314
+
315
+ Returns:
316
+ True if the exception is typically safe to retry.
317
+ """
318
+ # Network errors are retryable
319
+ if isinstance(exc, ThordataNetworkError):
320
+ return True
321
+
322
+ # Check API errors with is_retryable property
323
+ if isinstance(exc, ThordataAPIError):
324
+ return exc.is_retryable
325
+
326
+ # requests/aiohttp specific exceptions
327
+ # (imported dynamically to avoid hard dependency)
328
+ try:
329
+ import requests
330
+
331
+ if isinstance(exc, (requests.Timeout, requests.ConnectionError)):
332
+ return True
333
+ except ImportError:
334
+ pass
335
+
336
+ try:
337
+ import aiohttp
338
+
339
+ if isinstance(exc, (aiohttp.ClientError, aiohttp.ServerTimeoutError)):
340
+ return True
341
+ except ImportError:
342
+ pass
343
+
344
+ return False