thordata-sdk 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
thordata/enums.py CHANGED
@@ -7,15 +7,16 @@ making it easier to discover available options via IDE autocomplete.
7
7
 
8
8
  from enum import Enum, IntEnum
9
9
 
10
-
11
10
  # =============================================================================
12
11
  # Continent Enum
13
12
  # =============================================================================
14
13
 
14
+
15
15
  class Continent(str, Enum):
16
16
  """
17
17
  Continent codes for geo-targeting.
18
18
  """
19
+
19
20
  AFRICA = "af"
20
21
  ANTARCTICA = "an"
21
22
  ASIA = "as"
@@ -29,10 +30,12 @@ class Continent(str, Enum):
29
30
  # Proxy Host Enum
30
31
  # =============================================================================
31
32
 
33
+
32
34
  class ProxyHost(str, Enum):
33
35
  """
34
36
  Available proxy gateway hosts.
35
37
  """
38
+
36
39
  DEFAULT = "pr.thordata.net"
37
40
  NORTH_AMERICA = "t.na.thordata.net"
38
41
  EUROPE = "t.eu.thordata.net"
@@ -43,6 +46,7 @@ class ProxyPort(IntEnum):
43
46
  """
44
47
  Available proxy gateway ports.
45
48
  """
49
+
46
50
  DEFAULT = 9999
47
51
  MOBILE = 5555
48
52
  DATACENTER = 7777
@@ -54,10 +58,12 @@ class ProxyPort(IntEnum):
54
58
  # Search Engine Enums
55
59
  # =============================================================================
56
60
 
61
+
57
62
  class Engine(str, Enum):
58
63
  """
59
64
  Supported search engines for SERP API.
60
65
  """
66
+
61
67
  GOOGLE = "google"
62
68
  BING = "bing"
63
69
  YANDEX = "yandex"
@@ -71,6 +77,7 @@ class GoogleSearchType(str, Enum):
71
77
  """
72
78
  Search types specific to Google.
73
79
  """
80
+
74
81
  SEARCH = "search"
75
82
  MAPS = "maps"
76
83
  SHOPPING = "shopping"
@@ -88,6 +95,7 @@ class BingSearchType(str, Enum):
88
95
  """
89
96
  Search types specific to Bing.
90
97
  """
98
+
91
99
  SEARCH = "search"
92
100
  IMAGES = "images"
93
101
  VIDEOS = "videos"
@@ -99,6 +107,7 @@ class Device(str, Enum):
99
107
  """
100
108
  Device types for SERP API.
101
109
  """
110
+
102
111
  DESKTOP = "desktop"
103
112
  MOBILE = "mobile"
104
113
  TABLET = "tablet"
@@ -108,6 +117,7 @@ class TimeRange(str, Enum):
108
117
  """
109
118
  Time range filters for search results.
110
119
  """
120
+
111
121
  HOUR = "hour"
112
122
  DAY = "day"
113
123
  WEEK = "week"
@@ -119,10 +129,12 @@ class TimeRange(str, Enum):
119
129
  # Proxy Enums
120
130
  # =============================================================================
121
131
 
132
+
122
133
  class ProxyType(IntEnum):
123
134
  """
124
135
  Types of proxy networks available.
125
136
  """
137
+
126
138
  RESIDENTIAL = 1
127
139
  UNLIMITED = 2
128
140
  DATACENTER = 3
@@ -134,6 +146,7 @@ class SessionType(str, Enum):
134
146
  """
135
147
  Proxy session types for connection persistence.
136
148
  """
149
+
137
150
  ROTATING = "rotating"
138
151
  STICKY = "sticky"
139
152
 
@@ -142,10 +155,12 @@ class SessionType(str, Enum):
142
155
  # Output Format Enums
143
156
  # =============================================================================
144
157
 
158
+
145
159
  class OutputFormat(str, Enum):
146
160
  """
147
161
  Output formats for Universal Scraping API.
148
162
  """
163
+
149
164
  HTML = "html"
150
165
  PNG = "png"
151
166
  PDF = "pdf"
@@ -157,6 +172,7 @@ class DataFormat(str, Enum):
157
172
  """
158
173
  Data formats for task result download.
159
174
  """
175
+
160
176
  JSON = "json"
161
177
  CSV = "csv"
162
178
  XLSX = "xlsx"
@@ -166,10 +182,12 @@ class DataFormat(str, Enum):
166
182
  # Task Status Enums
167
183
  # =============================================================================
168
184
 
185
+
169
186
  class TaskStatus(str, Enum):
170
187
  """
171
188
  Possible statuses for async scraping tasks.
172
189
  """
190
+
173
191
  PENDING = "pending"
174
192
  RUNNING = "running"
175
193
  READY = "ready"
@@ -184,8 +202,12 @@ class TaskStatus(str, Enum):
184
202
  def is_terminal(cls, status: "TaskStatus") -> bool:
185
203
  """Check if a status is terminal (no more updates expected)."""
186
204
  return status in {
187
- cls.READY, cls.SUCCESS, cls.FINISHED,
188
- cls.FAILED, cls.ERROR, cls.CANCELLED
205
+ cls.READY,
206
+ cls.SUCCESS,
207
+ cls.FINISHED,
208
+ cls.FAILED,
209
+ cls.ERROR,
210
+ cls.CANCELLED,
189
211
  }
190
212
 
191
213
  @classmethod
@@ -203,10 +225,12 @@ class TaskStatus(str, Enum):
203
225
  # Country Enum (常用国家)
204
226
  # =============================================================================
205
227
 
228
+
206
229
  class Country(str, Enum):
207
230
  """
208
231
  Common country codes for geo-targeting.
209
232
  """
233
+
210
234
  # North America
211
235
  US = "us"
212
236
  CA = "ca"
@@ -276,14 +300,16 @@ class Country(str, Enum):
276
300
  # Helper Functions
277
301
  # =============================================================================
278
302
 
279
- def normalize_enum_value(value, enum_class: type) -> str:
303
+
304
+ def normalize_enum_value(value: object, enum_class: type) -> str:
280
305
  """
281
306
  Safely convert an enum or string to its string value.
282
307
  """
283
308
  if isinstance(value, enum_class):
284
- return value.value
309
+ # value is an enum member, get its .value
310
+ return str(getattr(value, "value", value)).lower()
285
311
  if isinstance(value, str):
286
312
  return value.lower()
287
313
  raise TypeError(
288
314
  f"Expected {enum_class.__name__} or str, got {type(value).__name__}"
289
- )
315
+ )
thordata/exceptions.py CHANGED
@@ -17,14 +17,14 @@ from __future__ import annotations
17
17
 
18
18
  from typing import Any, Optional, Set
19
19
 
20
-
21
20
  # =============================================================================
22
21
  # Base Exception
23
22
  # =============================================================================
24
23
 
24
+
25
25
  class ThordataError(Exception):
26
26
  """Base error for all Thordata SDK issues."""
27
-
27
+
28
28
  def __init__(self, message: str) -> None:
29
29
  super().__init__(message)
30
30
  self.message = message
@@ -34,14 +34,16 @@ class ThordataError(Exception):
34
34
  # Configuration Errors
35
35
  # =============================================================================
36
36
 
37
+
37
38
  class ThordataConfigError(ThordataError):
38
39
  """
39
40
  Raised when the SDK is misconfigured.
40
-
41
+
41
42
  Examples:
42
43
  - Missing required tokens
43
44
  - Invalid parameter combinations
44
45
  """
46
+
45
47
  pass
46
48
 
47
49
 
@@ -49,13 +51,14 @@ class ThordataConfigError(ThordataError):
49
51
  # Network Errors (Usually Retryable)
50
52
  # =============================================================================
51
53
 
54
+
52
55
  class ThordataNetworkError(ThordataError):
53
56
  """
54
57
  Raised when a network-level error occurs.
55
-
58
+
56
59
  This is typically retryable (DNS failures, connection refused, etc.)
57
60
  """
58
-
61
+
59
62
  def __init__(
60
63
  self,
61
64
  message: str,
@@ -69,9 +72,10 @@ class ThordataNetworkError(ThordataError):
69
72
  class ThordataTimeoutError(ThordataNetworkError):
70
73
  """
71
74
  Raised when a request times out.
72
-
75
+
73
76
  This is typically retryable.
74
77
  """
78
+
75
79
  pass
76
80
 
77
81
 
@@ -79,6 +83,7 @@ class ThordataTimeoutError(ThordataNetworkError):
79
83
  # API Errors
80
84
  # =============================================================================
81
85
 
86
+
82
87
  class ThordataAPIError(ThordataError):
83
88
  """
84
89
  Generic API error raised when the backend returns a non-success code
@@ -128,14 +133,14 @@ class ThordataAPIError(ThordataError):
128
133
  class ThordataAuthError(ThordataAPIError):
129
134
  """
130
135
  Authentication or authorization failures.
131
-
136
+
132
137
  HTTP Status: 401, 403
133
138
  Common causes:
134
139
  - Invalid or expired token
135
140
  - Insufficient permissions
136
141
  - IP not whitelisted
137
142
  """
138
-
143
+
139
144
  HTTP_STATUS_CODES = {401, 403}
140
145
 
141
146
  @property
@@ -146,17 +151,17 @@ class ThordataAuthError(ThordataAPIError):
146
151
  class ThordataRateLimitError(ThordataAPIError):
147
152
  """
148
153
  Rate limiting or quota/balance issues.
149
-
154
+
150
155
  HTTP Status: 429, 402
151
156
  Common causes:
152
157
  - Too many requests per second
153
158
  - Insufficient account balance
154
159
  - Quota exceeded
155
-
160
+
156
161
  Attributes:
157
162
  retry_after: Suggested seconds to wait before retrying (if provided).
158
163
  """
159
-
164
+
160
165
  HTTP_STATUS_CODES = {429, 402}
161
166
 
162
167
  def __init__(
@@ -178,11 +183,11 @@ class ThordataRateLimitError(ThordataAPIError):
178
183
  class ThordataServerError(ThordataAPIError):
179
184
  """
180
185
  Server-side errors (5xx).
181
-
186
+
182
187
  HTTP Status: 500, 502, 503, 504
183
188
  These are typically transient and safe to retry.
184
189
  """
185
-
190
+
186
191
  HTTP_STATUS_CODES = {500, 502, 503, 504}
187
192
 
188
193
  @property
@@ -193,14 +198,14 @@ class ThordataServerError(ThordataAPIError):
193
198
  class ThordataValidationError(ThordataAPIError):
194
199
  """
195
200
  Request validation errors.
196
-
201
+
197
202
  HTTP Status: 400, 422
198
203
  Common causes:
199
204
  - Invalid parameters
200
205
  - Missing required fields
201
206
  - Malformed request body
202
207
  """
203
-
208
+
204
209
  HTTP_STATUS_CODES = {400, 422}
205
210
 
206
211
  @property
@@ -208,10 +213,27 @@ class ThordataValidationError(ThordataAPIError):
208
213
  return False # Bad requests shouldn't be retried
209
214
 
210
215
 
216
+ class ThordataNotCollectedError(ThordataAPIError):
217
+ """
218
+ The request was accepted but no valid data could be collected/parsed.
219
+
220
+ API Code: 300
221
+ Billing: Not billed (per Thordata billing rules).
222
+ This error is often transient and typically safe to retry.
223
+ """
224
+
225
+ HTTP_STATUS_CODES = {300}
226
+
227
+ @property
228
+ def is_retryable(self) -> bool:
229
+ return True
230
+
231
+
211
232
  # =============================================================================
212
233
  # Exception Factory
213
234
  # =============================================================================
214
235
 
236
+
215
237
  def raise_for_code(
216
238
  message: str,
217
239
  *,
@@ -222,17 +244,17 @@ def raise_for_code(
222
244
  ) -> None:
223
245
  """
224
246
  Factory function to raise the appropriate exception based on status/code.
225
-
247
+
226
248
  This centralizes the error-mapping logic that was previously duplicated
227
249
  across multiple methods.
228
-
250
+
229
251
  Args:
230
252
  message: Human-readable error message.
231
253
  status_code: HTTP status code (if available).
232
254
  code: Application-level code from API response.
233
255
  payload: Raw API response payload.
234
256
  request_id: Optional request ID for debugging.
235
-
257
+
236
258
  Raises:
237
259
  ThordataAuthError: For 401/403 codes.
238
260
  ThordataRateLimitError: For 429/402 codes.
@@ -242,18 +264,22 @@ def raise_for_code(
242
264
  """
243
265
  # Use the code from payload if status_code not available
244
266
  effective_code = status_code or code
245
-
267
+
246
268
  kwargs = {
247
269
  "status_code": status_code,
248
270
  "code": code,
249
271
  "payload": payload,
250
272
  "request_id": request_id,
251
273
  }
252
-
274
+
275
+ # Not collected (often retryable, not billed)
276
+ if effective_code in ThordataNotCollectedError.HTTP_STATUS_CODES:
277
+ raise ThordataNotCollectedError(message, **kwargs)
278
+
253
279
  # Auth errors
254
280
  if effective_code in ThordataAuthError.HTTP_STATUS_CODES:
255
281
  raise ThordataAuthError(message, **kwargs)
256
-
282
+
257
283
  # Rate limit errors
258
284
  if effective_code in ThordataRateLimitError.HTTP_STATUS_CODES:
259
285
  # Try to extract retry_after from payload
@@ -261,15 +287,15 @@ def raise_for_code(
261
287
  if isinstance(payload, dict):
262
288
  retry_after = payload.get("retry_after")
263
289
  raise ThordataRateLimitError(message, retry_after=retry_after, **kwargs)
264
-
290
+
265
291
  # Server errors
266
292
  if effective_code is not None and 500 <= effective_code < 600:
267
293
  raise ThordataServerError(message, **kwargs)
268
-
294
+
269
295
  # Validation errors
270
296
  if effective_code in ThordataValidationError.HTTP_STATUS_CODES:
271
297
  raise ThordataValidationError(message, **kwargs)
272
-
298
+
273
299
  # Generic API error
274
300
  raise ThordataAPIError(message, **kwargs)
275
301
 
@@ -278,38 +304,41 @@ def raise_for_code(
278
304
  # Retry Helper
279
305
  # =============================================================================
280
306
 
307
+
281
308
  def is_retryable_exception(exc: Exception) -> bool:
282
309
  """
283
310
  Check if an exception is safe to retry.
284
-
311
+
285
312
  Args:
286
313
  exc: The exception to check.
287
-
314
+
288
315
  Returns:
289
316
  True if the exception is typically safe to retry.
290
317
  """
291
318
  # Network errors are retryable
292
319
  if isinstance(exc, ThordataNetworkError):
293
320
  return True
294
-
321
+
295
322
  # Check API errors with is_retryable property
296
323
  if isinstance(exc, ThordataAPIError):
297
324
  return exc.is_retryable
298
-
325
+
299
326
  # requests/aiohttp specific exceptions
300
327
  # (imported dynamically to avoid hard dependency)
301
328
  try:
302
329
  import requests
330
+
303
331
  if isinstance(exc, (requests.Timeout, requests.ConnectionError)):
304
332
  return True
305
333
  except ImportError:
306
334
  pass
307
-
335
+
308
336
  try:
309
337
  import aiohttp
338
+
310
339
  if isinstance(exc, (aiohttp.ClientError, aiohttp.ServerTimeoutError)):
311
340
  return True
312
341
  except ImportError:
313
342
  pass
314
-
315
- return False
343
+
344
+ return False