thordata-sdk 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thordata/__init__.py +33 -36
- thordata/_utils.py +21 -21
- thordata/async_client.py +230 -192
- thordata/client.py +281 -222
- thordata/enums.py +32 -6
- thordata/exceptions.py +60 -31
- thordata/models.py +173 -146
- thordata/parameters.py +7 -6
- thordata/retry.py +109 -111
- {thordata_sdk-0.4.0.dist-info → thordata_sdk-0.5.0.dist-info}/METADATA +228 -10
- thordata_sdk-0.5.0.dist-info/RECORD +14 -0
- thordata_sdk-0.4.0.dist-info/RECORD +0 -14
- {thordata_sdk-0.4.0.dist-info → thordata_sdk-0.5.0.dist-info}/WHEEL +0 -0
- {thordata_sdk-0.4.0.dist-info → thordata_sdk-0.5.0.dist-info}/licenses/LICENSE +0 -0
- {thordata_sdk-0.4.0.dist-info → thordata_sdk-0.5.0.dist-info}/top_level.txt +0 -0
thordata/enums.py
CHANGED
|
@@ -7,15 +7,16 @@ making it easier to discover available options via IDE autocomplete.
|
|
|
7
7
|
|
|
8
8
|
from enum import Enum, IntEnum
|
|
9
9
|
|
|
10
|
-
|
|
11
10
|
# =============================================================================
|
|
12
11
|
# Continent Enum
|
|
13
12
|
# =============================================================================
|
|
14
13
|
|
|
14
|
+
|
|
15
15
|
class Continent(str, Enum):
|
|
16
16
|
"""
|
|
17
17
|
Continent codes for geo-targeting.
|
|
18
18
|
"""
|
|
19
|
+
|
|
19
20
|
AFRICA = "af"
|
|
20
21
|
ANTARCTICA = "an"
|
|
21
22
|
ASIA = "as"
|
|
@@ -29,10 +30,12 @@ class Continent(str, Enum):
|
|
|
29
30
|
# Proxy Host Enum
|
|
30
31
|
# =============================================================================
|
|
31
32
|
|
|
33
|
+
|
|
32
34
|
class ProxyHost(str, Enum):
|
|
33
35
|
"""
|
|
34
36
|
Available proxy gateway hosts.
|
|
35
37
|
"""
|
|
38
|
+
|
|
36
39
|
DEFAULT = "pr.thordata.net"
|
|
37
40
|
NORTH_AMERICA = "t.na.thordata.net"
|
|
38
41
|
EUROPE = "t.eu.thordata.net"
|
|
@@ -43,6 +46,7 @@ class ProxyPort(IntEnum):
|
|
|
43
46
|
"""
|
|
44
47
|
Available proxy gateway ports.
|
|
45
48
|
"""
|
|
49
|
+
|
|
46
50
|
DEFAULT = 9999
|
|
47
51
|
MOBILE = 5555
|
|
48
52
|
DATACENTER = 7777
|
|
@@ -54,10 +58,12 @@ class ProxyPort(IntEnum):
|
|
|
54
58
|
# Search Engine Enums
|
|
55
59
|
# =============================================================================
|
|
56
60
|
|
|
61
|
+
|
|
57
62
|
class Engine(str, Enum):
|
|
58
63
|
"""
|
|
59
64
|
Supported search engines for SERP API.
|
|
60
65
|
"""
|
|
66
|
+
|
|
61
67
|
GOOGLE = "google"
|
|
62
68
|
BING = "bing"
|
|
63
69
|
YANDEX = "yandex"
|
|
@@ -71,6 +77,7 @@ class GoogleSearchType(str, Enum):
|
|
|
71
77
|
"""
|
|
72
78
|
Search types specific to Google.
|
|
73
79
|
"""
|
|
80
|
+
|
|
74
81
|
SEARCH = "search"
|
|
75
82
|
MAPS = "maps"
|
|
76
83
|
SHOPPING = "shopping"
|
|
@@ -88,6 +95,7 @@ class BingSearchType(str, Enum):
|
|
|
88
95
|
"""
|
|
89
96
|
Search types specific to Bing.
|
|
90
97
|
"""
|
|
98
|
+
|
|
91
99
|
SEARCH = "search"
|
|
92
100
|
IMAGES = "images"
|
|
93
101
|
VIDEOS = "videos"
|
|
@@ -99,6 +107,7 @@ class Device(str, Enum):
|
|
|
99
107
|
"""
|
|
100
108
|
Device types for SERP API.
|
|
101
109
|
"""
|
|
110
|
+
|
|
102
111
|
DESKTOP = "desktop"
|
|
103
112
|
MOBILE = "mobile"
|
|
104
113
|
TABLET = "tablet"
|
|
@@ -108,6 +117,7 @@ class TimeRange(str, Enum):
|
|
|
108
117
|
"""
|
|
109
118
|
Time range filters for search results.
|
|
110
119
|
"""
|
|
120
|
+
|
|
111
121
|
HOUR = "hour"
|
|
112
122
|
DAY = "day"
|
|
113
123
|
WEEK = "week"
|
|
@@ -119,10 +129,12 @@ class TimeRange(str, Enum):
|
|
|
119
129
|
# Proxy Enums
|
|
120
130
|
# =============================================================================
|
|
121
131
|
|
|
132
|
+
|
|
122
133
|
class ProxyType(IntEnum):
|
|
123
134
|
"""
|
|
124
135
|
Types of proxy networks available.
|
|
125
136
|
"""
|
|
137
|
+
|
|
126
138
|
RESIDENTIAL = 1
|
|
127
139
|
UNLIMITED = 2
|
|
128
140
|
DATACENTER = 3
|
|
@@ -134,6 +146,7 @@ class SessionType(str, Enum):
|
|
|
134
146
|
"""
|
|
135
147
|
Proxy session types for connection persistence.
|
|
136
148
|
"""
|
|
149
|
+
|
|
137
150
|
ROTATING = "rotating"
|
|
138
151
|
STICKY = "sticky"
|
|
139
152
|
|
|
@@ -142,10 +155,12 @@ class SessionType(str, Enum):
|
|
|
142
155
|
# Output Format Enums
|
|
143
156
|
# =============================================================================
|
|
144
157
|
|
|
158
|
+
|
|
145
159
|
class OutputFormat(str, Enum):
|
|
146
160
|
"""
|
|
147
161
|
Output formats for Universal Scraping API.
|
|
148
162
|
"""
|
|
163
|
+
|
|
149
164
|
HTML = "html"
|
|
150
165
|
PNG = "png"
|
|
151
166
|
PDF = "pdf"
|
|
@@ -157,6 +172,7 @@ class DataFormat(str, Enum):
|
|
|
157
172
|
"""
|
|
158
173
|
Data formats for task result download.
|
|
159
174
|
"""
|
|
175
|
+
|
|
160
176
|
JSON = "json"
|
|
161
177
|
CSV = "csv"
|
|
162
178
|
XLSX = "xlsx"
|
|
@@ -166,10 +182,12 @@ class DataFormat(str, Enum):
|
|
|
166
182
|
# Task Status Enums
|
|
167
183
|
# =============================================================================
|
|
168
184
|
|
|
185
|
+
|
|
169
186
|
class TaskStatus(str, Enum):
|
|
170
187
|
"""
|
|
171
188
|
Possible statuses for async scraping tasks.
|
|
172
189
|
"""
|
|
190
|
+
|
|
173
191
|
PENDING = "pending"
|
|
174
192
|
RUNNING = "running"
|
|
175
193
|
READY = "ready"
|
|
@@ -184,8 +202,12 @@ class TaskStatus(str, Enum):
|
|
|
184
202
|
def is_terminal(cls, status: "TaskStatus") -> bool:
|
|
185
203
|
"""Check if a status is terminal (no more updates expected)."""
|
|
186
204
|
return status in {
|
|
187
|
-
cls.READY,
|
|
188
|
-
cls.
|
|
205
|
+
cls.READY,
|
|
206
|
+
cls.SUCCESS,
|
|
207
|
+
cls.FINISHED,
|
|
208
|
+
cls.FAILED,
|
|
209
|
+
cls.ERROR,
|
|
210
|
+
cls.CANCELLED,
|
|
189
211
|
}
|
|
190
212
|
|
|
191
213
|
@classmethod
|
|
@@ -203,10 +225,12 @@ class TaskStatus(str, Enum):
|
|
|
203
225
|
# Country Enum (常用国家)
|
|
204
226
|
# =============================================================================
|
|
205
227
|
|
|
228
|
+
|
|
206
229
|
class Country(str, Enum):
|
|
207
230
|
"""
|
|
208
231
|
Common country codes for geo-targeting.
|
|
209
232
|
"""
|
|
233
|
+
|
|
210
234
|
# North America
|
|
211
235
|
US = "us"
|
|
212
236
|
CA = "ca"
|
|
@@ -276,14 +300,16 @@ class Country(str, Enum):
|
|
|
276
300
|
# Helper Functions
|
|
277
301
|
# =============================================================================
|
|
278
302
|
|
|
279
|
-
|
|
303
|
+
|
|
304
|
+
def normalize_enum_value(value: object, enum_class: type) -> str:
|
|
280
305
|
"""
|
|
281
306
|
Safely convert an enum or string to its string value.
|
|
282
307
|
"""
|
|
283
308
|
if isinstance(value, enum_class):
|
|
284
|
-
|
|
309
|
+
# value is an enum member, get its .value
|
|
310
|
+
return str(getattr(value, "value", value)).lower()
|
|
285
311
|
if isinstance(value, str):
|
|
286
312
|
return value.lower()
|
|
287
313
|
raise TypeError(
|
|
288
314
|
f"Expected {enum_class.__name__} or str, got {type(value).__name__}"
|
|
289
|
-
)
|
|
315
|
+
)
|
thordata/exceptions.py
CHANGED
|
@@ -17,14 +17,14 @@ from __future__ import annotations
|
|
|
17
17
|
|
|
18
18
|
from typing import Any, Optional, Set
|
|
19
19
|
|
|
20
|
-
|
|
21
20
|
# =============================================================================
|
|
22
21
|
# Base Exception
|
|
23
22
|
# =============================================================================
|
|
24
23
|
|
|
24
|
+
|
|
25
25
|
class ThordataError(Exception):
|
|
26
26
|
"""Base error for all Thordata SDK issues."""
|
|
27
|
-
|
|
27
|
+
|
|
28
28
|
def __init__(self, message: str) -> None:
|
|
29
29
|
super().__init__(message)
|
|
30
30
|
self.message = message
|
|
@@ -34,14 +34,16 @@ class ThordataError(Exception):
|
|
|
34
34
|
# Configuration Errors
|
|
35
35
|
# =============================================================================
|
|
36
36
|
|
|
37
|
+
|
|
37
38
|
class ThordataConfigError(ThordataError):
|
|
38
39
|
"""
|
|
39
40
|
Raised when the SDK is misconfigured.
|
|
40
|
-
|
|
41
|
+
|
|
41
42
|
Examples:
|
|
42
43
|
- Missing required tokens
|
|
43
44
|
- Invalid parameter combinations
|
|
44
45
|
"""
|
|
46
|
+
|
|
45
47
|
pass
|
|
46
48
|
|
|
47
49
|
|
|
@@ -49,13 +51,14 @@ class ThordataConfigError(ThordataError):
|
|
|
49
51
|
# Network Errors (Usually Retryable)
|
|
50
52
|
# =============================================================================
|
|
51
53
|
|
|
54
|
+
|
|
52
55
|
class ThordataNetworkError(ThordataError):
|
|
53
56
|
"""
|
|
54
57
|
Raised when a network-level error occurs.
|
|
55
|
-
|
|
58
|
+
|
|
56
59
|
This is typically retryable (DNS failures, connection refused, etc.)
|
|
57
60
|
"""
|
|
58
|
-
|
|
61
|
+
|
|
59
62
|
def __init__(
|
|
60
63
|
self,
|
|
61
64
|
message: str,
|
|
@@ -69,9 +72,10 @@ class ThordataNetworkError(ThordataError):
|
|
|
69
72
|
class ThordataTimeoutError(ThordataNetworkError):
|
|
70
73
|
"""
|
|
71
74
|
Raised when a request times out.
|
|
72
|
-
|
|
75
|
+
|
|
73
76
|
This is typically retryable.
|
|
74
77
|
"""
|
|
78
|
+
|
|
75
79
|
pass
|
|
76
80
|
|
|
77
81
|
|
|
@@ -79,6 +83,7 @@ class ThordataTimeoutError(ThordataNetworkError):
|
|
|
79
83
|
# API Errors
|
|
80
84
|
# =============================================================================
|
|
81
85
|
|
|
86
|
+
|
|
82
87
|
class ThordataAPIError(ThordataError):
|
|
83
88
|
"""
|
|
84
89
|
Generic API error raised when the backend returns a non-success code
|
|
@@ -128,14 +133,14 @@ class ThordataAPIError(ThordataError):
|
|
|
128
133
|
class ThordataAuthError(ThordataAPIError):
|
|
129
134
|
"""
|
|
130
135
|
Authentication or authorization failures.
|
|
131
|
-
|
|
136
|
+
|
|
132
137
|
HTTP Status: 401, 403
|
|
133
138
|
Common causes:
|
|
134
139
|
- Invalid or expired token
|
|
135
140
|
- Insufficient permissions
|
|
136
141
|
- IP not whitelisted
|
|
137
142
|
"""
|
|
138
|
-
|
|
143
|
+
|
|
139
144
|
HTTP_STATUS_CODES = {401, 403}
|
|
140
145
|
|
|
141
146
|
@property
|
|
@@ -146,17 +151,17 @@ class ThordataAuthError(ThordataAPIError):
|
|
|
146
151
|
class ThordataRateLimitError(ThordataAPIError):
|
|
147
152
|
"""
|
|
148
153
|
Rate limiting or quota/balance issues.
|
|
149
|
-
|
|
154
|
+
|
|
150
155
|
HTTP Status: 429, 402
|
|
151
156
|
Common causes:
|
|
152
157
|
- Too many requests per second
|
|
153
158
|
- Insufficient account balance
|
|
154
159
|
- Quota exceeded
|
|
155
|
-
|
|
160
|
+
|
|
156
161
|
Attributes:
|
|
157
162
|
retry_after: Suggested seconds to wait before retrying (if provided).
|
|
158
163
|
"""
|
|
159
|
-
|
|
164
|
+
|
|
160
165
|
HTTP_STATUS_CODES = {429, 402}
|
|
161
166
|
|
|
162
167
|
def __init__(
|
|
@@ -178,11 +183,11 @@ class ThordataRateLimitError(ThordataAPIError):
|
|
|
178
183
|
class ThordataServerError(ThordataAPIError):
|
|
179
184
|
"""
|
|
180
185
|
Server-side errors (5xx).
|
|
181
|
-
|
|
186
|
+
|
|
182
187
|
HTTP Status: 500, 502, 503, 504
|
|
183
188
|
These are typically transient and safe to retry.
|
|
184
189
|
"""
|
|
185
|
-
|
|
190
|
+
|
|
186
191
|
HTTP_STATUS_CODES = {500, 502, 503, 504}
|
|
187
192
|
|
|
188
193
|
@property
|
|
@@ -193,14 +198,14 @@ class ThordataServerError(ThordataAPIError):
|
|
|
193
198
|
class ThordataValidationError(ThordataAPIError):
|
|
194
199
|
"""
|
|
195
200
|
Request validation errors.
|
|
196
|
-
|
|
201
|
+
|
|
197
202
|
HTTP Status: 400, 422
|
|
198
203
|
Common causes:
|
|
199
204
|
- Invalid parameters
|
|
200
205
|
- Missing required fields
|
|
201
206
|
- Malformed request body
|
|
202
207
|
"""
|
|
203
|
-
|
|
208
|
+
|
|
204
209
|
HTTP_STATUS_CODES = {400, 422}
|
|
205
210
|
|
|
206
211
|
@property
|
|
@@ -208,10 +213,27 @@ class ThordataValidationError(ThordataAPIError):
|
|
|
208
213
|
return False # Bad requests shouldn't be retried
|
|
209
214
|
|
|
210
215
|
|
|
216
|
+
class ThordataNotCollectedError(ThordataAPIError):
|
|
217
|
+
"""
|
|
218
|
+
The request was accepted but no valid data could be collected/parsed.
|
|
219
|
+
|
|
220
|
+
API Code: 300
|
|
221
|
+
Billing: Not billed (per Thordata billing rules).
|
|
222
|
+
This error is often transient and typically safe to retry.
|
|
223
|
+
"""
|
|
224
|
+
|
|
225
|
+
HTTP_STATUS_CODES = {300}
|
|
226
|
+
|
|
227
|
+
@property
|
|
228
|
+
def is_retryable(self) -> bool:
|
|
229
|
+
return True
|
|
230
|
+
|
|
231
|
+
|
|
211
232
|
# =============================================================================
|
|
212
233
|
# Exception Factory
|
|
213
234
|
# =============================================================================
|
|
214
235
|
|
|
236
|
+
|
|
215
237
|
def raise_for_code(
|
|
216
238
|
message: str,
|
|
217
239
|
*,
|
|
@@ -222,17 +244,17 @@ def raise_for_code(
|
|
|
222
244
|
) -> None:
|
|
223
245
|
"""
|
|
224
246
|
Factory function to raise the appropriate exception based on status/code.
|
|
225
|
-
|
|
247
|
+
|
|
226
248
|
This centralizes the error-mapping logic that was previously duplicated
|
|
227
249
|
across multiple methods.
|
|
228
|
-
|
|
250
|
+
|
|
229
251
|
Args:
|
|
230
252
|
message: Human-readable error message.
|
|
231
253
|
status_code: HTTP status code (if available).
|
|
232
254
|
code: Application-level code from API response.
|
|
233
255
|
payload: Raw API response payload.
|
|
234
256
|
request_id: Optional request ID for debugging.
|
|
235
|
-
|
|
257
|
+
|
|
236
258
|
Raises:
|
|
237
259
|
ThordataAuthError: For 401/403 codes.
|
|
238
260
|
ThordataRateLimitError: For 429/402 codes.
|
|
@@ -242,18 +264,22 @@ def raise_for_code(
|
|
|
242
264
|
"""
|
|
243
265
|
# Use the code from payload if status_code not available
|
|
244
266
|
effective_code = status_code or code
|
|
245
|
-
|
|
267
|
+
|
|
246
268
|
kwargs = {
|
|
247
269
|
"status_code": status_code,
|
|
248
270
|
"code": code,
|
|
249
271
|
"payload": payload,
|
|
250
272
|
"request_id": request_id,
|
|
251
273
|
}
|
|
252
|
-
|
|
274
|
+
|
|
275
|
+
# Not collected (often retryable, not billed)
|
|
276
|
+
if effective_code in ThordataNotCollectedError.HTTP_STATUS_CODES:
|
|
277
|
+
raise ThordataNotCollectedError(message, **kwargs)
|
|
278
|
+
|
|
253
279
|
# Auth errors
|
|
254
280
|
if effective_code in ThordataAuthError.HTTP_STATUS_CODES:
|
|
255
281
|
raise ThordataAuthError(message, **kwargs)
|
|
256
|
-
|
|
282
|
+
|
|
257
283
|
# Rate limit errors
|
|
258
284
|
if effective_code in ThordataRateLimitError.HTTP_STATUS_CODES:
|
|
259
285
|
# Try to extract retry_after from payload
|
|
@@ -261,15 +287,15 @@ def raise_for_code(
|
|
|
261
287
|
if isinstance(payload, dict):
|
|
262
288
|
retry_after = payload.get("retry_after")
|
|
263
289
|
raise ThordataRateLimitError(message, retry_after=retry_after, **kwargs)
|
|
264
|
-
|
|
290
|
+
|
|
265
291
|
# Server errors
|
|
266
292
|
if effective_code is not None and 500 <= effective_code < 600:
|
|
267
293
|
raise ThordataServerError(message, **kwargs)
|
|
268
|
-
|
|
294
|
+
|
|
269
295
|
# Validation errors
|
|
270
296
|
if effective_code in ThordataValidationError.HTTP_STATUS_CODES:
|
|
271
297
|
raise ThordataValidationError(message, **kwargs)
|
|
272
|
-
|
|
298
|
+
|
|
273
299
|
# Generic API error
|
|
274
300
|
raise ThordataAPIError(message, **kwargs)
|
|
275
301
|
|
|
@@ -278,38 +304,41 @@ def raise_for_code(
|
|
|
278
304
|
# Retry Helper
|
|
279
305
|
# =============================================================================
|
|
280
306
|
|
|
307
|
+
|
|
281
308
|
def is_retryable_exception(exc: Exception) -> bool:
|
|
282
309
|
"""
|
|
283
310
|
Check if an exception is safe to retry.
|
|
284
|
-
|
|
311
|
+
|
|
285
312
|
Args:
|
|
286
313
|
exc: The exception to check.
|
|
287
|
-
|
|
314
|
+
|
|
288
315
|
Returns:
|
|
289
316
|
True if the exception is typically safe to retry.
|
|
290
317
|
"""
|
|
291
318
|
# Network errors are retryable
|
|
292
319
|
if isinstance(exc, ThordataNetworkError):
|
|
293
320
|
return True
|
|
294
|
-
|
|
321
|
+
|
|
295
322
|
# Check API errors with is_retryable property
|
|
296
323
|
if isinstance(exc, ThordataAPIError):
|
|
297
324
|
return exc.is_retryable
|
|
298
|
-
|
|
325
|
+
|
|
299
326
|
# requests/aiohttp specific exceptions
|
|
300
327
|
# (imported dynamically to avoid hard dependency)
|
|
301
328
|
try:
|
|
302
329
|
import requests
|
|
330
|
+
|
|
303
331
|
if isinstance(exc, (requests.Timeout, requests.ConnectionError)):
|
|
304
332
|
return True
|
|
305
333
|
except ImportError:
|
|
306
334
|
pass
|
|
307
|
-
|
|
335
|
+
|
|
308
336
|
try:
|
|
309
337
|
import aiohttp
|
|
338
|
+
|
|
310
339
|
if isinstance(exc, (aiohttp.ClientError, aiohttp.ServerTimeoutError)):
|
|
311
340
|
return True
|
|
312
341
|
except ImportError:
|
|
313
342
|
pass
|
|
314
|
-
|
|
315
|
-
return False
|
|
343
|
+
|
|
344
|
+
return False
|