thordata-sdk 0.2.4__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
thordata/retry.py ADDED
@@ -0,0 +1,382 @@
1
+ """
2
+ Retry mechanism for the Thordata Python SDK.
3
+
4
+ This module provides configurable retry logic for handling transient failures
5
+ in API requests, with support for exponential backoff and jitter.
6
+
7
+ Example:
8
+ >>> from thordata.retry import RetryConfig, with_retry
9
+ >>>
10
+ >>> config = RetryConfig(max_retries=3, backoff_factor=1.0)
11
+ >>>
12
+ >>> @with_retry(config)
13
+ >>> def make_request():
14
+ ... return requests.get("https://api.example.com")
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import inspect
20
+ import logging
21
+ import random
22
+ import time
23
+ from dataclasses import dataclass, field
24
+ from functools import wraps
25
+ from typing import Any, Callable
26
+
27
+ from .exceptions import (
28
+ ThordataNetworkError,
29
+ ThordataRateLimitError,
30
+ ThordataServerError,
31
+ is_retryable_exception,
32
+ )
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ @dataclass
38
+ class RetryConfig:
39
+ """
40
+ Configuration for retry behavior.
41
+
42
+ Attributes:
43
+ max_retries: Maximum number of retry attempts (default: 3).
44
+ backoff_factor: Multiplier for exponential backoff (default: 1.0).
45
+ Wait time = backoff_factor * (2 ** attempt_number)
46
+ max_backoff: Maximum wait time in seconds (default: 60).
47
+ jitter: Add random jitter to prevent thundering herd (default: True).
48
+ jitter_factor: Maximum jitter as fraction of wait time (default: 0.1).
49
+ retry_on_status_codes: HTTP status codes to retry on.
50
+ retry_on_exceptions: Exception types to retry on.
51
+
52
+ Example:
53
+ >>> config = RetryConfig(
54
+ ... max_retries=5,
55
+ ... backoff_factor=2.0,
56
+ ... max_backoff=120
57
+ ... )
58
+ """
59
+
60
+ max_retries: int = 3
61
+ backoff_factor: float = 1.0
62
+ max_backoff: float = 60.0
63
+ jitter: bool = True
64
+ jitter_factor: float = 0.1
65
+
66
+ # Status codes to retry on (5xx server errors + 429 rate limit)
67
+ retry_on_status_codes: set[int] = field(
68
+ default_factory=lambda: {429, 500, 502, 503, 504}
69
+ )
70
+ retry_on_api_codes: set[int] = field(
71
+ default_factory=lambda: {300} # API response body code
72
+ )
73
+
74
+ # Exception types to always retry on
75
+ retry_on_exceptions: tuple[type, ...] = field(
76
+ default_factory=lambda: (
77
+ ThordataNetworkError,
78
+ ThordataServerError,
79
+ )
80
+ )
81
+
82
+ def calculate_delay(self, attempt: int) -> float:
83
+ """
84
+ Calculate the delay before the next retry attempt.
85
+
86
+ Args:
87
+ attempt: Current attempt number (0-indexed).
88
+
89
+ Returns:
90
+ Delay in seconds.
91
+ """
92
+ # Exponential backoff
93
+ delay = self.backoff_factor * (2**attempt)
94
+
95
+ # Apply maximum cap
96
+ delay = min(delay, self.max_backoff)
97
+
98
+ # Add jitter if enabled
99
+ if self.jitter:
100
+ jitter_range = delay * self.jitter_factor
101
+ delay += random.uniform(-jitter_range, jitter_range)
102
+ delay = max(0.1, delay) # Ensure positive delay
103
+
104
+ return delay
105
+
106
+ def should_retry(
107
+ self, exception: Exception, attempt: int, status_code: int | None = None
108
+ ) -> bool:
109
+ """
110
+ Determine if a request should be retried.
111
+
112
+ Args:
113
+ exception: The exception that was raised.
114
+ attempt: Current attempt number.
115
+ status_code: HTTP status code if available.
116
+
117
+ Returns:
118
+ True if the request should be retried.
119
+ """
120
+ # Check if we've exceeded max retries
121
+ if attempt >= self.max_retries:
122
+ return False
123
+
124
+ # Check status code
125
+ if status_code and status_code in self.retry_on_status_codes:
126
+ return True
127
+
128
+ # Check exception type
129
+ if isinstance(exception, self.retry_on_exceptions):
130
+ return True
131
+
132
+ # Check rate limit with retry_after
133
+ if isinstance(exception, ThordataRateLimitError):
134
+ return True
135
+
136
+ # Use generic retryable check
137
+ return is_retryable_exception(exception)
138
+
139
+
140
+ def with_retry(
141
+ config: RetryConfig | None = None,
142
+ on_retry: Callable[[int, Exception, float], None] | None = None,
143
+ ) -> Callable:
144
+ """
145
+ Decorator to add retry logic to a function.
146
+
147
+ Args:
148
+ config: Retry configuration. Uses defaults if not provided.
149
+ on_retry: Optional callback called before each retry.
150
+ Receives (attempt, exception, delay).
151
+
152
+ Returns:
153
+ Decorated function with retry logic.
154
+
155
+ Example:
156
+ >>> @with_retry(RetryConfig(max_retries=3))
157
+ ... def fetch_data():
158
+ ... return requests.get("https://api.example.com")
159
+
160
+ >>> @with_retry()
161
+ ... async def async_fetch():
162
+ ... async with aiohttp.ClientSession() as session:
163
+ ... return await session.get("https://api.example.com")
164
+ """
165
+ if config is None:
166
+ config = RetryConfig()
167
+
168
+ def decorator(func: Callable) -> Callable:
169
+ @wraps(func)
170
+ def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
171
+ last_exception: Exception | None = None
172
+
173
+ for attempt in range(config.max_retries + 1):
174
+ try:
175
+ return func(*args, **kwargs)
176
+ except Exception as e:
177
+ last_exception = e
178
+
179
+ status_code = _extract_status_code(e)
180
+
181
+ if not config.should_retry(e, attempt, status_code):
182
+ raise
183
+
184
+ delay = config.calculate_delay(attempt)
185
+
186
+ if isinstance(e, ThordataRateLimitError) and e.retry_after:
187
+ delay = max(delay, e.retry_after)
188
+
189
+ logger.warning(
190
+ f"Retry attempt {attempt + 1}/{config.max_retries} "
191
+ f"after {delay:.2f}s due to: {e}"
192
+ )
193
+
194
+ if on_retry:
195
+ on_retry(attempt, e, delay)
196
+
197
+ time.sleep(delay)
198
+
199
+ if last_exception:
200
+ raise last_exception
201
+ raise RuntimeError("Unexpected retry loop exit")
202
+
203
+ @wraps(func)
204
+ async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
205
+ last_exception: Exception | None = None
206
+
207
+ for attempt in range(config.max_retries + 1):
208
+ try:
209
+ return await func(*args, **kwargs)
210
+ except Exception as e:
211
+ last_exception = e
212
+
213
+ status_code = _extract_status_code(e)
214
+
215
+ if not config.should_retry(e, attempt, status_code):
216
+ raise
217
+
218
+ delay = config.calculate_delay(attempt)
219
+
220
+ if isinstance(e, ThordataRateLimitError) and e.retry_after:
221
+ delay = max(delay, e.retry_after)
222
+
223
+ logger.warning(
224
+ f"Async retry attempt {attempt + 1}/{config.max_retries} "
225
+ f"after {delay:.2f}s due to: {e}"
226
+ )
227
+
228
+ if on_retry:
229
+ on_retry(attempt, e, delay)
230
+
231
+ await asyncio.sleep(delay)
232
+
233
+ if last_exception:
234
+ raise last_exception
235
+ raise RuntimeError("Unexpected retry loop exit")
236
+
237
+ # Check if the function is async
238
+ import asyncio
239
+
240
+ if inspect.iscoroutinefunction(func):
241
+ return async_wrapper
242
+ return sync_wrapper
243
+
244
+ return decorator
245
+
246
+
247
+ def _extract_status_code(exception: Exception) -> int | None:
248
+ """
249
+ Extract HTTP status code from various exception types.
250
+
251
+ Args:
252
+ exception: The exception to extract from.
253
+
254
+ Returns:
255
+ HTTP status code if found, None otherwise.
256
+ """
257
+ # Unwrap nested/original errors (e.g., ThordataNetworkError(original_error=...))
258
+ if hasattr(exception, "original_error") and exception.original_error:
259
+ nested = exception.original_error
260
+ if isinstance(nested, Exception):
261
+ nested_code = _extract_status_code(nested)
262
+ if nested_code is not None:
263
+ return nested_code
264
+
265
+ # Check Thordata exceptions
266
+ if hasattr(exception, "status_code"):
267
+ return exception.status_code
268
+ if hasattr(exception, "code"):
269
+ return exception.code
270
+
271
+ # Check requests exceptions
272
+ if hasattr(exception, "response"):
273
+ response = exception.response
274
+ if response is not None and hasattr(response, "status_code"):
275
+ return response.status_code
276
+
277
+ # Check aiohttp exceptions
278
+ if hasattr(exception, "status"):
279
+ return exception.status
280
+
281
+ return None
282
+
283
+
284
+ class RetryableRequest:
285
+ """
286
+ Context manager for retryable requests with detailed control.
287
+
288
+ This provides more control than the decorator approach, allowing
289
+ you to check retry status during execution.
290
+
291
+ Example:
292
+ >>> config = RetryConfig(max_retries=3)
293
+ >>> with RetryableRequest(config) as retry:
294
+ ... while True:
295
+ ... try:
296
+ ... response = requests.get("https://api.example.com")
297
+ ... response.raise_for_status()
298
+ ... break
299
+ ... except Exception as e:
300
+ ... if not retry.should_continue(e):
301
+ ... raise
302
+ ... retry.wait()
303
+ """
304
+
305
+ def __init__(self, config: RetryConfig | None = None) -> None:
306
+ self.config = config or RetryConfig()
307
+ self.attempt = 0
308
+ self.last_exception: Exception | None = None
309
+
310
+ def __enter__(self) -> RetryableRequest:
311
+ return self
312
+
313
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
314
+ pass
315
+
316
+ def should_continue(
317
+ self, exception: Exception, status_code: int | None = None
318
+ ) -> bool:
319
+ """
320
+ Check if we should continue retrying.
321
+
322
+ Args:
323
+ exception: The exception that occurred.
324
+ status_code: HTTP status code if available.
325
+
326
+ Returns:
327
+ True if we should retry, False otherwise.
328
+ """
329
+ self.last_exception = exception
330
+
331
+ if status_code is None:
332
+ status_code = _extract_status_code(exception)
333
+
334
+ should_retry = self.config.should_retry(exception, self.attempt, status_code)
335
+
336
+ if should_retry:
337
+ self.attempt += 1
338
+
339
+ return should_retry
340
+
341
+ def wait(self) -> float:
342
+ """
343
+ Wait before the next retry attempt.
344
+
345
+ Returns:
346
+ The actual delay used.
347
+ """
348
+ delay = self.config.calculate_delay(self.attempt - 1)
349
+
350
+ # Handle rate limit retry_after
351
+ if (
352
+ isinstance(self.last_exception, ThordataRateLimitError)
353
+ and self.last_exception.retry_after
354
+ ):
355
+ delay = max(delay, self.last_exception.retry_after)
356
+
357
+ logger.debug(f"Waiting {delay:.2f}s before retry {self.attempt}")
358
+ time.sleep(delay)
359
+
360
+ return delay
361
+
362
+ async def async_wait(self) -> float:
363
+ """
364
+ Async version of wait().
365
+
366
+ Returns:
367
+ The actual delay used.
368
+ """
369
+ import asyncio
370
+
371
+ delay = self.config.calculate_delay(self.attempt - 1)
372
+
373
+ if (
374
+ isinstance(self.last_exception, ThordataRateLimitError)
375
+ and self.last_exception.retry_after
376
+ ):
377
+ delay = max(delay, self.last_exception.retry_after)
378
+
379
+ logger.debug(f"Async waiting {delay:.2f}s before retry {self.attempt}")
380
+ await asyncio.sleep(delay)
381
+
382
+ return delay
@@ -0,0 +1,166 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
4
+
5
+ if TYPE_CHECKING:
6
+ from .async_client import AsyncThordataClient
7
+ from .client import ThordataClient
8
+
9
+ # --- Sync Engines ---
10
+
11
+
12
+ class EngineBase:
13
+ def __init__(self, client: ThordataClient):
14
+ self._client = client
15
+
16
+
17
+ class GoogleEngine(EngineBase):
18
+ """Namespaced interface for Google features (Sync)."""
19
+
20
+ def search(self, query: str, **kwargs: Any) -> dict[str, Any]:
21
+ return self._client.serp_search(query, engine="google", **kwargs)
22
+
23
+ def news(self, query: str, **kwargs: Any) -> dict[str, Any]:
24
+ return self._client.serp_search(query, engine="google_news", **kwargs)
25
+
26
+ def jobs(self, query: str, **kwargs: Any) -> dict[str, Any]:
27
+ return self._client.serp_search(query, engine="google_jobs", **kwargs)
28
+
29
+ def shopping(
30
+ self, query: str, product_id: str | None = None, **kwargs: Any
31
+ ) -> dict[str, Any]:
32
+ if product_id:
33
+ kwargs["product_id"] = product_id
34
+ return self._client.serp_search(query, engine="google_product", **kwargs)
35
+ return self._client.serp_search(query, engine="google_shopping", **kwargs)
36
+
37
+ def maps(
38
+ self, query: str, coordinates: str | None = None, **kwargs: Any
39
+ ) -> dict[str, Any]:
40
+ if coordinates:
41
+ kwargs["ll"] = coordinates
42
+ return self._client.serp_search(query, engine="google_maps", **kwargs)
43
+
44
+ def flights(
45
+ self,
46
+ query: str = "",
47
+ departure_id: str | None = None,
48
+ arrival_id: str | None = None,
49
+ outbound_date: str | None = None,
50
+ return_date: str | None = None,
51
+ **kwargs: Any,
52
+ ) -> dict[str, Any]:
53
+ if departure_id:
54
+ kwargs["departure_id"] = departure_id
55
+ if arrival_id:
56
+ kwargs["arrival_id"] = arrival_id
57
+ if outbound_date:
58
+ kwargs["outbound_date"] = outbound_date
59
+ if return_date:
60
+ kwargs["return_date"] = return_date
61
+ return self._client.serp_search(query, engine="google_flights", **kwargs)
62
+
63
+ def patents(self, query: str, **kwargs: Any) -> dict[str, Any]:
64
+ return self._client.serp_search(query, engine="google_patents", **kwargs)
65
+
66
+ def trends(self, query: str, **kwargs: Any) -> dict[str, Any]:
67
+ return self._client.serp_search(query, engine="google_trends", **kwargs)
68
+
69
+
70
+ class BingEngine(EngineBase):
71
+ def search(self, query: str, **kwargs: Any) -> dict[str, Any]:
72
+ return self._client.serp_search(query, engine="bing", **kwargs)
73
+
74
+ def news(self, query: str, **kwargs: Any) -> dict[str, Any]:
75
+ return self._client.serp_search(query, engine="bing_news", **kwargs)
76
+
77
+
78
+ class SerpNamespace:
79
+ def __init__(self, client: ThordataClient):
80
+ self.google = GoogleEngine(client)
81
+ self.bing = BingEngine(client)
82
+ self._client = client
83
+
84
+ def search(self, *args, **kwargs):
85
+ return self._client.serp_search(*args, **kwargs)
86
+
87
+
88
+ # --- Async Engines ---
89
+
90
+
91
+ class AsyncEngineBase:
92
+ def __init__(self, client: AsyncThordataClient):
93
+ self._client = client
94
+
95
+
96
+ class AsyncGoogleEngine(AsyncEngineBase):
97
+ """Namespaced interface for Google features (Async)."""
98
+
99
+ async def search(self, query: str, **kwargs: Any) -> dict[str, Any]:
100
+ return await self._client.serp_search(query, engine="google", **kwargs)
101
+
102
+ async def news(self, query: str, **kwargs: Any) -> dict[str, Any]:
103
+ return await self._client.serp_search(query, engine="google_news", **kwargs)
104
+
105
+ async def jobs(self, query: str, **kwargs: Any) -> dict[str, Any]:
106
+ return await self._client.serp_search(query, engine="google_jobs", **kwargs)
107
+
108
+ async def shopping(
109
+ self, query: str, product_id: str | None = None, **kwargs: Any
110
+ ) -> dict[str, Any]:
111
+ if product_id:
112
+ kwargs["product_id"] = product_id
113
+ return await self._client.serp_search(
114
+ query, engine="google_product", **kwargs
115
+ )
116
+ return await self._client.serp_search(query, engine="google_shopping", **kwargs)
117
+
118
+ async def maps(
119
+ self, query: str, coordinates: str | None = None, **kwargs: Any
120
+ ) -> dict[str, Any]:
121
+ if coordinates:
122
+ kwargs["ll"] = coordinates
123
+ return await self._client.serp_search(query, engine="google_maps", **kwargs)
124
+
125
+ async def flights(
126
+ self,
127
+ query: str = "",
128
+ departure_id: str | None = None,
129
+ arrival_id: str | None = None,
130
+ outbound_date: str | None = None,
131
+ return_date: str | None = None,
132
+ **kwargs: Any,
133
+ ) -> dict[str, Any]:
134
+ if departure_id:
135
+ kwargs["departure_id"] = departure_id
136
+ if arrival_id:
137
+ kwargs["arrival_id"] = arrival_id
138
+ if outbound_date:
139
+ kwargs["outbound_date"] = outbound_date
140
+ if return_date:
141
+ kwargs["return_date"] = return_date
142
+ return await self._client.serp_search(query, engine="google_flights", **kwargs)
143
+
144
+ async def patents(self, query: str, **kwargs: Any) -> dict[str, Any]:
145
+ return await self._client.serp_search(query, engine="google_patents", **kwargs)
146
+
147
+ async def trends(self, query: str, **kwargs: Any) -> dict[str, Any]:
148
+ return await self._client.serp_search(query, engine="google_trends", **kwargs)
149
+
150
+
151
+ class AsyncBingEngine(AsyncEngineBase):
152
+ async def search(self, query: str, **kwargs: Any) -> dict[str, Any]:
153
+ return await self._client.serp_search(query, engine="bing", **kwargs)
154
+
155
+ async def news(self, query: str, **kwargs: Any) -> dict[str, Any]:
156
+ return await self._client.serp_search(query, engine="bing_news", **kwargs)
157
+
158
+
159
+ class AsyncSerpNamespace:
160
+ def __init__(self, client: AsyncThordataClient):
161
+ self.google = AsyncGoogleEngine(client)
162
+ self.bing = AsyncBingEngine(client)
163
+ self._client = client
164
+
165
+ async def search(self, *args, **kwargs):
166
+ return await self._client.serp_search(*args, **kwargs)