thordata-sdk 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thordata/__init__.py +33 -36
- thordata/_utils.py +21 -21
- thordata/async_client.py +230 -192
- thordata/client.py +281 -222
- thordata/enums.py +32 -6
- thordata/exceptions.py +60 -31
- thordata/models.py +173 -146
- thordata/parameters.py +7 -6
- thordata/retry.py +109 -111
- {thordata_sdk-0.4.0.dist-info → thordata_sdk-0.5.0.dist-info}/METADATA +228 -10
- thordata_sdk-0.5.0.dist-info/RECORD +14 -0
- thordata_sdk-0.4.0.dist-info/RECORD +0 -14
- {thordata_sdk-0.4.0.dist-info → thordata_sdk-0.5.0.dist-info}/WHEEL +0 -0
- {thordata_sdk-0.4.0.dist-info → thordata_sdk-0.5.0.dist-info}/licenses/LICENSE +0 -0
- {thordata_sdk-0.4.0.dist-info → thordata_sdk-0.5.0.dist-info}/top_level.txt +0 -0
thordata/parameters.py
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
# src/thordata/parameters.py
|
|
2
2
|
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import Any, Dict
|
|
4
|
+
|
|
4
5
|
|
|
5
6
|
def normalize_serp_params(engine: str, query: str, **kwargs) -> Dict[str, Any]:
|
|
6
7
|
"""
|
|
7
8
|
Normalizes parameters across different search engines to ensure a unified API surface.
|
|
8
|
-
|
|
9
|
+
|
|
9
10
|
Args:
|
|
10
11
|
engine (str): The search engine to use (e.g., 'google', 'yandex').
|
|
11
12
|
query (str): The search query string.
|
|
@@ -17,7 +18,7 @@ def normalize_serp_params(engine: str, query: str, **kwargs) -> Dict[str, Any]:
|
|
|
17
18
|
# 1. Base parameters
|
|
18
19
|
payload = {
|
|
19
20
|
"num": str(kwargs.get("num", 10)), # Default to 10 results
|
|
20
|
-
"json": "1",
|
|
21
|
+
"json": "1", # Force JSON response
|
|
21
22
|
"engine": engine,
|
|
22
23
|
}
|
|
23
24
|
|
|
@@ -29,14 +30,14 @@ def normalize_serp_params(engine: str, query: str, **kwargs) -> Dict[str, Any]:
|
|
|
29
30
|
payload["url"] = "yandex.com"
|
|
30
31
|
else:
|
|
31
32
|
payload["q"] = query
|
|
32
|
-
|
|
33
|
+
|
|
33
34
|
# 3. Handle Default URLs for other engines
|
|
34
35
|
if "url" not in kwargs:
|
|
35
36
|
defaults = {
|
|
36
37
|
"google": "google.com",
|
|
37
38
|
"bing": "bing.com",
|
|
38
39
|
"duckduckgo": "duckduckgo.com",
|
|
39
|
-
"baidu": "baidu.com"
|
|
40
|
+
"baidu": "baidu.com",
|
|
40
41
|
}
|
|
41
42
|
if engine in defaults:
|
|
42
43
|
payload["url"] = defaults[engine]
|
|
@@ -49,4 +50,4 @@ def normalize_serp_params(engine: str, query: str, **kwargs) -> Dict[str, Any]:
|
|
|
49
50
|
if key not in protected_keys:
|
|
50
51
|
payload[key] = value
|
|
51
52
|
|
|
52
|
-
return payload
|
|
53
|
+
return payload
|
thordata/retry.py
CHANGED
|
@@ -6,9 +6,9 @@ in API requests, with support for exponential backoff and jitter.
|
|
|
6
6
|
|
|
7
7
|
Example:
|
|
8
8
|
>>> from thordata.retry import RetryConfig, with_retry
|
|
9
|
-
>>>
|
|
9
|
+
>>>
|
|
10
10
|
>>> config = RetryConfig(max_retries=3, backoff_factor=1.0)
|
|
11
|
-
>>>
|
|
11
|
+
>>>
|
|
12
12
|
>>> @with_retry(config)
|
|
13
13
|
>>> def make_request():
|
|
14
14
|
... return requests.get("https://api.example.com")
|
|
@@ -16,33 +16,28 @@ Example:
|
|
|
16
16
|
|
|
17
17
|
from __future__ import annotations
|
|
18
18
|
|
|
19
|
-
import time
|
|
20
|
-
import random
|
|
21
19
|
import logging
|
|
20
|
+
import random
|
|
21
|
+
import time
|
|
22
22
|
from dataclasses import dataclass, field
|
|
23
|
-
from typing import (
|
|
24
|
-
Callable, TypeVar, Set, Optional, Union, Tuple, Any
|
|
25
|
-
)
|
|
26
23
|
from functools import wraps
|
|
24
|
+
from typing import Any, Callable, Optional, Set, Tuple
|
|
27
25
|
|
|
28
26
|
from .exceptions import (
|
|
29
|
-
ThordataError,
|
|
30
27
|
ThordataNetworkError,
|
|
31
|
-
ThordataServerError,
|
|
32
28
|
ThordataRateLimitError,
|
|
29
|
+
ThordataServerError,
|
|
33
30
|
is_retryable_exception,
|
|
34
31
|
)
|
|
35
32
|
|
|
36
33
|
logger = logging.getLogger(__name__)
|
|
37
34
|
|
|
38
|
-
T = TypeVar("T")
|
|
39
|
-
|
|
40
35
|
|
|
41
36
|
@dataclass
|
|
42
37
|
class RetryConfig:
|
|
43
38
|
"""
|
|
44
39
|
Configuration for retry behavior.
|
|
45
|
-
|
|
40
|
+
|
|
46
41
|
Attributes:
|
|
47
42
|
max_retries: Maximum number of retry attempts (default: 3).
|
|
48
43
|
backoff_factor: Multiplier for exponential backoff (default: 1.0).
|
|
@@ -52,7 +47,7 @@ class RetryConfig:
|
|
|
52
47
|
jitter_factor: Maximum jitter as fraction of wait time (default: 0.1).
|
|
53
48
|
retry_on_status_codes: HTTP status codes to retry on.
|
|
54
49
|
retry_on_exceptions: Exception types to retry on.
|
|
55
|
-
|
|
50
|
+
|
|
56
51
|
Example:
|
|
57
52
|
>>> config = RetryConfig(
|
|
58
53
|
... max_retries=5,
|
|
@@ -60,18 +55,18 @@ class RetryConfig:
|
|
|
60
55
|
... max_backoff=120
|
|
61
56
|
... )
|
|
62
57
|
"""
|
|
63
|
-
|
|
58
|
+
|
|
64
59
|
max_retries: int = 3
|
|
65
60
|
backoff_factor: float = 1.0
|
|
66
61
|
max_backoff: float = 60.0
|
|
67
62
|
jitter: bool = True
|
|
68
63
|
jitter_factor: float = 0.1
|
|
69
|
-
|
|
64
|
+
|
|
70
65
|
# Status codes to retry on (5xx server errors + 429 rate limit)
|
|
71
66
|
retry_on_status_codes: Set[int] = field(
|
|
72
|
-
default_factory=lambda: {429, 500, 502, 503, 504}
|
|
67
|
+
default_factory=lambda: {300, 429, 500, 502, 503, 504}
|
|
73
68
|
)
|
|
74
|
-
|
|
69
|
+
|
|
75
70
|
# Exception types to always retry on
|
|
76
71
|
retry_on_exceptions: Tuple[type, ...] = field(
|
|
77
72
|
default_factory=lambda: (
|
|
@@ -79,64 +74,61 @@ class RetryConfig:
|
|
|
79
74
|
ThordataServerError,
|
|
80
75
|
)
|
|
81
76
|
)
|
|
82
|
-
|
|
77
|
+
|
|
83
78
|
def calculate_delay(self, attempt: int) -> float:
|
|
84
79
|
"""
|
|
85
80
|
Calculate the delay before the next retry attempt.
|
|
86
|
-
|
|
81
|
+
|
|
87
82
|
Args:
|
|
88
83
|
attempt: Current attempt number (0-indexed).
|
|
89
|
-
|
|
84
|
+
|
|
90
85
|
Returns:
|
|
91
86
|
Delay in seconds.
|
|
92
87
|
"""
|
|
93
88
|
# Exponential backoff
|
|
94
|
-
delay = self.backoff_factor * (2
|
|
95
|
-
|
|
89
|
+
delay = self.backoff_factor * (2**attempt)
|
|
90
|
+
|
|
96
91
|
# Apply maximum cap
|
|
97
92
|
delay = min(delay, self.max_backoff)
|
|
98
|
-
|
|
93
|
+
|
|
99
94
|
# Add jitter if enabled
|
|
100
95
|
if self.jitter:
|
|
101
96
|
jitter_range = delay * self.jitter_factor
|
|
102
97
|
delay += random.uniform(-jitter_range, jitter_range)
|
|
103
98
|
delay = max(0.1, delay) # Ensure positive delay
|
|
104
|
-
|
|
99
|
+
|
|
105
100
|
return delay
|
|
106
|
-
|
|
101
|
+
|
|
107
102
|
def should_retry(
|
|
108
|
-
self,
|
|
109
|
-
exception: Exception,
|
|
110
|
-
attempt: int,
|
|
111
|
-
status_code: Optional[int] = None
|
|
103
|
+
self, exception: Exception, attempt: int, status_code: Optional[int] = None
|
|
112
104
|
) -> bool:
|
|
113
105
|
"""
|
|
114
106
|
Determine if a request should be retried.
|
|
115
|
-
|
|
107
|
+
|
|
116
108
|
Args:
|
|
117
109
|
exception: The exception that was raised.
|
|
118
110
|
attempt: Current attempt number.
|
|
119
111
|
status_code: HTTP status code if available.
|
|
120
|
-
|
|
112
|
+
|
|
121
113
|
Returns:
|
|
122
114
|
True if the request should be retried.
|
|
123
115
|
"""
|
|
124
116
|
# Check if we've exceeded max retries
|
|
125
117
|
if attempt >= self.max_retries:
|
|
126
118
|
return False
|
|
127
|
-
|
|
119
|
+
|
|
128
120
|
# Check status code
|
|
129
121
|
if status_code and status_code in self.retry_on_status_codes:
|
|
130
122
|
return True
|
|
131
|
-
|
|
123
|
+
|
|
132
124
|
# Check exception type
|
|
133
125
|
if isinstance(exception, self.retry_on_exceptions):
|
|
134
126
|
return True
|
|
135
|
-
|
|
127
|
+
|
|
136
128
|
# Check rate limit with retry_after
|
|
137
129
|
if isinstance(exception, ThordataRateLimitError):
|
|
138
130
|
return True
|
|
139
|
-
|
|
131
|
+
|
|
140
132
|
# Use generic retryable check
|
|
141
133
|
return is_retryable_exception(exception)
|
|
142
134
|
|
|
@@ -144,23 +136,23 @@ class RetryConfig:
|
|
|
144
136
|
def with_retry(
|
|
145
137
|
config: Optional[RetryConfig] = None,
|
|
146
138
|
on_retry: Optional[Callable[[int, Exception, float], None]] = None,
|
|
147
|
-
) -> Callable
|
|
139
|
+
) -> Callable:
|
|
148
140
|
"""
|
|
149
141
|
Decorator to add retry logic to a function.
|
|
150
|
-
|
|
142
|
+
|
|
151
143
|
Args:
|
|
152
144
|
config: Retry configuration. Uses defaults if not provided.
|
|
153
145
|
on_retry: Optional callback called before each retry.
|
|
154
146
|
Receives (attempt, exception, delay).
|
|
155
|
-
|
|
147
|
+
|
|
156
148
|
Returns:
|
|
157
149
|
Decorated function with retry logic.
|
|
158
|
-
|
|
150
|
+
|
|
159
151
|
Example:
|
|
160
152
|
>>> @with_retry(RetryConfig(max_retries=3))
|
|
161
153
|
... def fetch_data():
|
|
162
154
|
... return requests.get("https://api.example.com")
|
|
163
|
-
|
|
155
|
+
|
|
164
156
|
>>> @with_retry()
|
|
165
157
|
... async def async_fetch():
|
|
166
158
|
... async with aiohttp.ClientSession() as session:
|
|
@@ -168,122 +160,132 @@ def with_retry(
|
|
|
168
160
|
"""
|
|
169
161
|
if config is None:
|
|
170
162
|
config = RetryConfig()
|
|
171
|
-
|
|
172
|
-
def decorator(func: Callable
|
|
163
|
+
|
|
164
|
+
def decorator(func: Callable) -> Callable:
|
|
173
165
|
@wraps(func)
|
|
174
|
-
def sync_wrapper(*args: Any, **kwargs: Any) ->
|
|
166
|
+
def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
175
167
|
last_exception: Optional[Exception] = None
|
|
176
|
-
|
|
168
|
+
|
|
177
169
|
for attempt in range(config.max_retries + 1):
|
|
178
170
|
try:
|
|
179
171
|
return func(*args, **kwargs)
|
|
180
172
|
except Exception as e:
|
|
181
173
|
last_exception = e
|
|
182
|
-
|
|
183
|
-
# Extract status code if available
|
|
174
|
+
|
|
184
175
|
status_code = _extract_status_code(e)
|
|
185
|
-
|
|
176
|
+
|
|
186
177
|
if not config.should_retry(e, attempt, status_code):
|
|
187
178
|
raise
|
|
188
|
-
|
|
179
|
+
|
|
189
180
|
delay = config.calculate_delay(attempt)
|
|
190
|
-
|
|
191
|
-
# Handle rate limit retry_after
|
|
181
|
+
|
|
192
182
|
if isinstance(e, ThordataRateLimitError) and e.retry_after:
|
|
193
183
|
delay = max(delay, e.retry_after)
|
|
194
|
-
|
|
184
|
+
|
|
195
185
|
logger.warning(
|
|
196
186
|
f"Retry attempt {attempt + 1}/{config.max_retries} "
|
|
197
187
|
f"after {delay:.2f}s due to: {e}"
|
|
198
188
|
)
|
|
199
|
-
|
|
189
|
+
|
|
200
190
|
if on_retry:
|
|
201
191
|
on_retry(attempt, e, delay)
|
|
202
|
-
|
|
192
|
+
|
|
203
193
|
time.sleep(delay)
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
194
|
+
|
|
195
|
+
if last_exception:
|
|
196
|
+
raise last_exception
|
|
197
|
+
raise RuntimeError("Unexpected retry loop exit")
|
|
198
|
+
|
|
208
199
|
@wraps(func)
|
|
209
|
-
async def async_wrapper(*args: Any, **kwargs: Any) ->
|
|
200
|
+
async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
210
201
|
import asyncio
|
|
211
|
-
|
|
202
|
+
|
|
212
203
|
last_exception: Optional[Exception] = None
|
|
213
|
-
|
|
204
|
+
|
|
214
205
|
for attempt in range(config.max_retries + 1):
|
|
215
206
|
try:
|
|
216
207
|
return await func(*args, **kwargs)
|
|
217
208
|
except Exception as e:
|
|
218
209
|
last_exception = e
|
|
219
|
-
|
|
210
|
+
|
|
220
211
|
status_code = _extract_status_code(e)
|
|
221
|
-
|
|
212
|
+
|
|
222
213
|
if not config.should_retry(e, attempt, status_code):
|
|
223
214
|
raise
|
|
224
|
-
|
|
215
|
+
|
|
225
216
|
delay = config.calculate_delay(attempt)
|
|
226
|
-
|
|
217
|
+
|
|
227
218
|
if isinstance(e, ThordataRateLimitError) and e.retry_after:
|
|
228
219
|
delay = max(delay, e.retry_after)
|
|
229
|
-
|
|
220
|
+
|
|
230
221
|
logger.warning(
|
|
231
222
|
f"Async retry attempt {attempt + 1}/{config.max_retries} "
|
|
232
223
|
f"after {delay:.2f}s due to: {e}"
|
|
233
224
|
)
|
|
234
|
-
|
|
225
|
+
|
|
235
226
|
if on_retry:
|
|
236
227
|
on_retry(attempt, e, delay)
|
|
237
|
-
|
|
228
|
+
|
|
238
229
|
await asyncio.sleep(delay)
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
230
|
+
|
|
231
|
+
if last_exception:
|
|
232
|
+
raise last_exception
|
|
233
|
+
raise RuntimeError("Unexpected retry loop exit")
|
|
234
|
+
|
|
242
235
|
# Check if the function is async
|
|
243
236
|
import asyncio
|
|
237
|
+
|
|
244
238
|
if asyncio.iscoroutinefunction(func):
|
|
245
|
-
return async_wrapper
|
|
239
|
+
return async_wrapper
|
|
246
240
|
return sync_wrapper
|
|
247
|
-
|
|
241
|
+
|
|
248
242
|
return decorator
|
|
249
243
|
|
|
250
244
|
|
|
251
245
|
def _extract_status_code(exception: Exception) -> Optional[int]:
|
|
252
246
|
"""
|
|
253
247
|
Extract HTTP status code from various exception types.
|
|
254
|
-
|
|
248
|
+
|
|
255
249
|
Args:
|
|
256
250
|
exception: The exception to extract from.
|
|
257
|
-
|
|
251
|
+
|
|
258
252
|
Returns:
|
|
259
253
|
HTTP status code if found, None otherwise.
|
|
260
254
|
"""
|
|
255
|
+
# Unwrap nested/original errors (e.g., ThordataNetworkError(original_error=...))
|
|
256
|
+
if hasattr(exception, "original_error") and getattr(exception, "original_error"):
|
|
257
|
+
nested = getattr(exception, "original_error")
|
|
258
|
+
if isinstance(nested, Exception):
|
|
259
|
+
nested_code = _extract_status_code(nested)
|
|
260
|
+
if nested_code is not None:
|
|
261
|
+
return nested_code
|
|
262
|
+
|
|
261
263
|
# Check Thordata exceptions
|
|
262
264
|
if hasattr(exception, "status_code"):
|
|
263
265
|
return exception.status_code
|
|
264
266
|
if hasattr(exception, "code"):
|
|
265
267
|
return exception.code
|
|
266
|
-
|
|
268
|
+
|
|
267
269
|
# Check requests exceptions
|
|
268
270
|
if hasattr(exception, "response"):
|
|
269
271
|
response = exception.response
|
|
270
272
|
if response is not None and hasattr(response, "status_code"):
|
|
271
273
|
return response.status_code
|
|
272
|
-
|
|
274
|
+
|
|
273
275
|
# Check aiohttp exceptions
|
|
274
276
|
if hasattr(exception, "status"):
|
|
275
277
|
return exception.status
|
|
276
|
-
|
|
278
|
+
|
|
277
279
|
return None
|
|
278
280
|
|
|
279
281
|
|
|
280
282
|
class RetryableRequest:
|
|
281
283
|
"""
|
|
282
284
|
Context manager for retryable requests with detailed control.
|
|
283
|
-
|
|
285
|
+
|
|
284
286
|
This provides more control than the decorator approach, allowing
|
|
285
287
|
you to check retry status during execution.
|
|
286
|
-
|
|
288
|
+
|
|
287
289
|
Example:
|
|
288
290
|
>>> config = RetryConfig(max_retries=3)
|
|
289
291
|
>>> with RetryableRequest(config) as retry:
|
|
@@ -297,86 +299,82 @@ class RetryableRequest:
|
|
|
297
299
|
... raise
|
|
298
300
|
... retry.wait()
|
|
299
301
|
"""
|
|
300
|
-
|
|
302
|
+
|
|
301
303
|
def __init__(self, config: Optional[RetryConfig] = None) -> None:
|
|
302
304
|
self.config = config or RetryConfig()
|
|
303
305
|
self.attempt = 0
|
|
304
306
|
self.last_exception: Optional[Exception] = None
|
|
305
|
-
|
|
306
|
-
def __enter__(self) ->
|
|
307
|
+
|
|
308
|
+
def __enter__(self) -> RetryableRequest:
|
|
307
309
|
return self
|
|
308
|
-
|
|
309
|
-
def __exit__(self, exc_type, exc_val, exc_tb) ->
|
|
310
|
-
|
|
311
|
-
|
|
310
|
+
|
|
311
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
312
|
+
pass
|
|
313
|
+
|
|
312
314
|
def should_continue(
|
|
313
|
-
self,
|
|
314
|
-
exception: Exception,
|
|
315
|
-
status_code: Optional[int] = None
|
|
315
|
+
self, exception: Exception, status_code: Optional[int] = None
|
|
316
316
|
) -> bool:
|
|
317
317
|
"""
|
|
318
318
|
Check if we should continue retrying.
|
|
319
|
-
|
|
319
|
+
|
|
320
320
|
Args:
|
|
321
321
|
exception: The exception that occurred.
|
|
322
322
|
status_code: HTTP status code if available.
|
|
323
|
-
|
|
323
|
+
|
|
324
324
|
Returns:
|
|
325
325
|
True if we should retry, False otherwise.
|
|
326
326
|
"""
|
|
327
327
|
self.last_exception = exception
|
|
328
|
-
|
|
328
|
+
|
|
329
329
|
if status_code is None:
|
|
330
330
|
status_code = _extract_status_code(exception)
|
|
331
|
-
|
|
332
|
-
should_retry = self.config.should_retry(
|
|
333
|
-
|
|
334
|
-
)
|
|
335
|
-
|
|
331
|
+
|
|
332
|
+
should_retry = self.config.should_retry(exception, self.attempt, status_code)
|
|
333
|
+
|
|
336
334
|
if should_retry:
|
|
337
335
|
self.attempt += 1
|
|
338
|
-
|
|
336
|
+
|
|
339
337
|
return should_retry
|
|
340
|
-
|
|
338
|
+
|
|
341
339
|
def wait(self) -> float:
|
|
342
340
|
"""
|
|
343
341
|
Wait before the next retry attempt.
|
|
344
|
-
|
|
342
|
+
|
|
345
343
|
Returns:
|
|
346
344
|
The actual delay used.
|
|
347
345
|
"""
|
|
348
346
|
delay = self.config.calculate_delay(self.attempt - 1)
|
|
349
|
-
|
|
347
|
+
|
|
350
348
|
# Handle rate limit retry_after
|
|
351
349
|
if (
|
|
352
|
-
isinstance(self.last_exception, ThordataRateLimitError)
|
|
350
|
+
isinstance(self.last_exception, ThordataRateLimitError)
|
|
353
351
|
and self.last_exception.retry_after
|
|
354
352
|
):
|
|
355
353
|
delay = max(delay, self.last_exception.retry_after)
|
|
356
|
-
|
|
354
|
+
|
|
357
355
|
logger.debug(f"Waiting {delay:.2f}s before retry {self.attempt}")
|
|
358
356
|
time.sleep(delay)
|
|
359
|
-
|
|
357
|
+
|
|
360
358
|
return delay
|
|
361
|
-
|
|
359
|
+
|
|
362
360
|
async def async_wait(self) -> float:
|
|
363
361
|
"""
|
|
364
362
|
Async version of wait().
|
|
365
|
-
|
|
363
|
+
|
|
366
364
|
Returns:
|
|
367
365
|
The actual delay used.
|
|
368
366
|
"""
|
|
369
367
|
import asyncio
|
|
370
|
-
|
|
368
|
+
|
|
371
369
|
delay = self.config.calculate_delay(self.attempt - 1)
|
|
372
|
-
|
|
370
|
+
|
|
373
371
|
if (
|
|
374
372
|
isinstance(self.last_exception, ThordataRateLimitError)
|
|
375
373
|
and self.last_exception.retry_after
|
|
376
374
|
):
|
|
377
375
|
delay = max(delay, self.last_exception.retry_after)
|
|
378
|
-
|
|
376
|
+
|
|
379
377
|
logger.debug(f"Async waiting {delay:.2f}s before retry {self.attempt}")
|
|
380
378
|
await asyncio.sleep(delay)
|
|
381
|
-
|
|
382
|
-
return delay
|
|
379
|
+
|
|
380
|
+
return delay
|