thordata-sdk 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
thordata/parameters.py ADDED
@@ -0,0 +1,53 @@
1
+ # src/thordata/parameters.py
2
+
3
+ from typing import Any, Dict
4
+
5
+
6
+ def normalize_serp_params(engine: str, query: str, **kwargs) -> Dict[str, Any]:
7
+ """
8
+ Normalizes parameters across different search engines to ensure a unified API surface.
9
+
10
+ Args:
11
+ engine (str): The search engine to use (e.g., 'google', 'yandex').
12
+ query (str): The search query string.
13
+ **kwargs: Additional parameters to pass to the API.
14
+
15
+ Returns:
16
+ Dict[str, Any]: The constructed payload for the API request.
17
+ """
18
+ # 1. Base parameters
19
+ payload = {
20
+ "num": str(kwargs.get("num", 10)), # Default to 10 results
21
+ "json": "1", # Force JSON response
22
+ "engine": engine,
23
+ }
24
+
25
+ # 2. Handle Query Parameter Differences (Yandex uses 'text', others use 'q')
26
+ if engine == "yandex":
27
+ payload["text"] = query
28
+ # Set default URL for Yandex if not provided
29
+ if "url" not in kwargs:
30
+ payload["url"] = "yandex.com"
31
+ else:
32
+ payload["q"] = query
33
+
34
+ # 3. Handle Default URLs for other engines
35
+ if "url" not in kwargs:
36
+ defaults = {
37
+ "google": "google.com",
38
+ "bing": "bing.com",
39
+ "duckduckgo": "duckduckgo.com",
40
+ "baidu": "baidu.com",
41
+ }
42
+ if engine in defaults:
43
+ payload["url"] = defaults[engine]
44
+
45
+ # 4. Passthrough for all other user-provided arguments
46
+ # This allows support for engine-specific parameters (e.g., tbm, uule, gl)
47
+ # without explicitly defining them all.
48
+ protected_keys = {"num", "engine", "q", "text"}
49
+ for key, value in kwargs.items():
50
+ if key not in protected_keys:
51
+ payload[key] = value
52
+
53
+ return payload
thordata/retry.py ADDED
@@ -0,0 +1,380 @@
1
+ """
2
+ Retry mechanism for the Thordata Python SDK.
3
+
4
+ This module provides configurable retry logic for handling transient failures
5
+ in API requests, with support for exponential backoff and jitter.
6
+
7
+ Example:
8
+ >>> from thordata.retry import RetryConfig, with_retry
9
+ >>>
10
+ >>> config = RetryConfig(max_retries=3, backoff_factor=1.0)
11
+ >>>
12
+ >>> @with_retry(config)
13
+ >>> def make_request():
14
+ ... return requests.get("https://api.example.com")
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import logging
20
+ import random
21
+ import time
22
+ from dataclasses import dataclass, field
23
+ from functools import wraps
24
+ from typing import Any, Callable, Optional, Set, Tuple
25
+
26
+ from .exceptions import (
27
+ ThordataNetworkError,
28
+ ThordataRateLimitError,
29
+ ThordataServerError,
30
+ is_retryable_exception,
31
+ )
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+
36
+ @dataclass
37
+ class RetryConfig:
38
+ """
39
+ Configuration for retry behavior.
40
+
41
+ Attributes:
42
+ max_retries: Maximum number of retry attempts (default: 3).
43
+ backoff_factor: Multiplier for exponential backoff (default: 1.0).
44
+ Wait time = backoff_factor * (2 ** attempt_number)
45
+ max_backoff: Maximum wait time in seconds (default: 60).
46
+ jitter: Add random jitter to prevent thundering herd (default: True).
47
+ jitter_factor: Maximum jitter as fraction of wait time (default: 0.1).
48
+ retry_on_status_codes: HTTP status codes to retry on.
49
+ retry_on_exceptions: Exception types to retry on.
50
+
51
+ Example:
52
+ >>> config = RetryConfig(
53
+ ... max_retries=5,
54
+ ... backoff_factor=2.0,
55
+ ... max_backoff=120
56
+ ... )
57
+ """
58
+
59
+ max_retries: int = 3
60
+ backoff_factor: float = 1.0
61
+ max_backoff: float = 60.0
62
+ jitter: bool = True
63
+ jitter_factor: float = 0.1
64
+
65
+ # Status codes to retry on (5xx server errors + 429 rate limit)
66
+ retry_on_status_codes: Set[int] = field(
67
+ default_factory=lambda: {300, 429, 500, 502, 503, 504}
68
+ )
69
+
70
+ # Exception types to always retry on
71
+ retry_on_exceptions: Tuple[type, ...] = field(
72
+ default_factory=lambda: (
73
+ ThordataNetworkError,
74
+ ThordataServerError,
75
+ )
76
+ )
77
+
78
+ def calculate_delay(self, attempt: int) -> float:
79
+ """
80
+ Calculate the delay before the next retry attempt.
81
+
82
+ Args:
83
+ attempt: Current attempt number (0-indexed).
84
+
85
+ Returns:
86
+ Delay in seconds.
87
+ """
88
+ # Exponential backoff
89
+ delay = self.backoff_factor * (2**attempt)
90
+
91
+ # Apply maximum cap
92
+ delay = min(delay, self.max_backoff)
93
+
94
+ # Add jitter if enabled
95
+ if self.jitter:
96
+ jitter_range = delay * self.jitter_factor
97
+ delay += random.uniform(-jitter_range, jitter_range)
98
+ delay = max(0.1, delay) # Ensure positive delay
99
+
100
+ return delay
101
+
102
+ def should_retry(
103
+ self, exception: Exception, attempt: int, status_code: Optional[int] = None
104
+ ) -> bool:
105
+ """
106
+ Determine if a request should be retried.
107
+
108
+ Args:
109
+ exception: The exception that was raised.
110
+ attempt: Current attempt number.
111
+ status_code: HTTP status code if available.
112
+
113
+ Returns:
114
+ True if the request should be retried.
115
+ """
116
+ # Check if we've exceeded max retries
117
+ if attempt >= self.max_retries:
118
+ return False
119
+
120
+ # Check status code
121
+ if status_code and status_code in self.retry_on_status_codes:
122
+ return True
123
+
124
+ # Check exception type
125
+ if isinstance(exception, self.retry_on_exceptions):
126
+ return True
127
+
128
+ # Check rate limit with retry_after
129
+ if isinstance(exception, ThordataRateLimitError):
130
+ return True
131
+
132
+ # Use generic retryable check
133
+ return is_retryable_exception(exception)
134
+
135
+
136
+ def with_retry(
137
+ config: Optional[RetryConfig] = None,
138
+ on_retry: Optional[Callable[[int, Exception, float], None]] = None,
139
+ ) -> Callable:
140
+ """
141
+ Decorator to add retry logic to a function.
142
+
143
+ Args:
144
+ config: Retry configuration. Uses defaults if not provided.
145
+ on_retry: Optional callback called before each retry.
146
+ Receives (attempt, exception, delay).
147
+
148
+ Returns:
149
+ Decorated function with retry logic.
150
+
151
+ Example:
152
+ >>> @with_retry(RetryConfig(max_retries=3))
153
+ ... def fetch_data():
154
+ ... return requests.get("https://api.example.com")
155
+
156
+ >>> @with_retry()
157
+ ... async def async_fetch():
158
+ ... async with aiohttp.ClientSession() as session:
159
+ ... return await session.get("https://api.example.com")
160
+ """
161
+ if config is None:
162
+ config = RetryConfig()
163
+
164
+ def decorator(func: Callable) -> Callable:
165
+ @wraps(func)
166
+ def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
167
+ last_exception: Optional[Exception] = None
168
+
169
+ for attempt in range(config.max_retries + 1):
170
+ try:
171
+ return func(*args, **kwargs)
172
+ except Exception as e:
173
+ last_exception = e
174
+
175
+ status_code = _extract_status_code(e)
176
+
177
+ if not config.should_retry(e, attempt, status_code):
178
+ raise
179
+
180
+ delay = config.calculate_delay(attempt)
181
+
182
+ if isinstance(e, ThordataRateLimitError) and e.retry_after:
183
+ delay = max(delay, e.retry_after)
184
+
185
+ logger.warning(
186
+ f"Retry attempt {attempt + 1}/{config.max_retries} "
187
+ f"after {delay:.2f}s due to: {e}"
188
+ )
189
+
190
+ if on_retry:
191
+ on_retry(attempt, e, delay)
192
+
193
+ time.sleep(delay)
194
+
195
+ if last_exception:
196
+ raise last_exception
197
+ raise RuntimeError("Unexpected retry loop exit")
198
+
199
+ @wraps(func)
200
+ async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
201
+ import asyncio
202
+
203
+ last_exception: Optional[Exception] = None
204
+
205
+ for attempt in range(config.max_retries + 1):
206
+ try:
207
+ return await func(*args, **kwargs)
208
+ except Exception as e:
209
+ last_exception = e
210
+
211
+ status_code = _extract_status_code(e)
212
+
213
+ if not config.should_retry(e, attempt, status_code):
214
+ raise
215
+
216
+ delay = config.calculate_delay(attempt)
217
+
218
+ if isinstance(e, ThordataRateLimitError) and e.retry_after:
219
+ delay = max(delay, e.retry_after)
220
+
221
+ logger.warning(
222
+ f"Async retry attempt {attempt + 1}/{config.max_retries} "
223
+ f"after {delay:.2f}s due to: {e}"
224
+ )
225
+
226
+ if on_retry:
227
+ on_retry(attempt, e, delay)
228
+
229
+ await asyncio.sleep(delay)
230
+
231
+ if last_exception:
232
+ raise last_exception
233
+ raise RuntimeError("Unexpected retry loop exit")
234
+
235
+ # Check if the function is async
236
+ import asyncio
237
+
238
+ if asyncio.iscoroutinefunction(func):
239
+ return async_wrapper
240
+ return sync_wrapper
241
+
242
+ return decorator
243
+
244
+
245
+ def _extract_status_code(exception: Exception) -> Optional[int]:
246
+ """
247
+ Extract HTTP status code from various exception types.
248
+
249
+ Args:
250
+ exception: The exception to extract from.
251
+
252
+ Returns:
253
+ HTTP status code if found, None otherwise.
254
+ """
255
+ # Unwrap nested/original errors (e.g., ThordataNetworkError(original_error=...))
256
+ if hasattr(exception, "original_error") and exception.original_error:
257
+ nested = exception.original_error
258
+ if isinstance(nested, Exception):
259
+ nested_code = _extract_status_code(nested)
260
+ if nested_code is not None:
261
+ return nested_code
262
+
263
+ # Check Thordata exceptions
264
+ if hasattr(exception, "status_code"):
265
+ return exception.status_code
266
+ if hasattr(exception, "code"):
267
+ return exception.code
268
+
269
+ # Check requests exceptions
270
+ if hasattr(exception, "response"):
271
+ response = exception.response
272
+ if response is not None and hasattr(response, "status_code"):
273
+ return response.status_code
274
+
275
+ # Check aiohttp exceptions
276
+ if hasattr(exception, "status"):
277
+ return exception.status
278
+
279
+ return None
280
+
281
+
282
+ class RetryableRequest:
283
+ """
284
+ Context manager for retryable requests with detailed control.
285
+
286
+ This provides more control than the decorator approach, allowing
287
+ you to check retry status during execution.
288
+
289
+ Example:
290
+ >>> config = RetryConfig(max_retries=3)
291
+ >>> with RetryableRequest(config) as retry:
292
+ ... while True:
293
+ ... try:
294
+ ... response = requests.get("https://api.example.com")
295
+ ... response.raise_for_status()
296
+ ... break
297
+ ... except Exception as e:
298
+ ... if not retry.should_continue(e):
299
+ ... raise
300
+ ... retry.wait()
301
+ """
302
+
303
+ def __init__(self, config: Optional[RetryConfig] = None) -> None:
304
+ self.config = config or RetryConfig()
305
+ self.attempt = 0
306
+ self.last_exception: Optional[Exception] = None
307
+
308
+ def __enter__(self) -> RetryableRequest:
309
+ return self
310
+
311
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
312
+ pass
313
+
314
+ def should_continue(
315
+ self, exception: Exception, status_code: Optional[int] = None
316
+ ) -> bool:
317
+ """
318
+ Check if we should continue retrying.
319
+
320
+ Args:
321
+ exception: The exception that occurred.
322
+ status_code: HTTP status code if available.
323
+
324
+ Returns:
325
+ True if we should retry, False otherwise.
326
+ """
327
+ self.last_exception = exception
328
+
329
+ if status_code is None:
330
+ status_code = _extract_status_code(exception)
331
+
332
+ should_retry = self.config.should_retry(exception, self.attempt, status_code)
333
+
334
+ if should_retry:
335
+ self.attempt += 1
336
+
337
+ return should_retry
338
+
339
+ def wait(self) -> float:
340
+ """
341
+ Wait before the next retry attempt.
342
+
343
+ Returns:
344
+ The actual delay used.
345
+ """
346
+ delay = self.config.calculate_delay(self.attempt - 1)
347
+
348
+ # Handle rate limit retry_after
349
+ if (
350
+ isinstance(self.last_exception, ThordataRateLimitError)
351
+ and self.last_exception.retry_after
352
+ ):
353
+ delay = max(delay, self.last_exception.retry_after)
354
+
355
+ logger.debug(f"Waiting {delay:.2f}s before retry {self.attempt}")
356
+ time.sleep(delay)
357
+
358
+ return delay
359
+
360
+ async def async_wait(self) -> float:
361
+ """
362
+ Async version of wait().
363
+
364
+ Returns:
365
+ The actual delay used.
366
+ """
367
+ import asyncio
368
+
369
+ delay = self.config.calculate_delay(self.attempt - 1)
370
+
371
+ if (
372
+ isinstance(self.last_exception, ThordataRateLimitError)
373
+ and self.last_exception.retry_after
374
+ ):
375
+ delay = max(delay, self.last_exception.retry_after)
376
+
377
+ logger.debug(f"Async waiting {delay:.2f}s before retry {self.attempt}")
378
+ await asyncio.sleep(delay)
379
+
380
+ return delay