thordata-sdk 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
thordata/parameters.py CHANGED
@@ -1,11 +1,12 @@
1
1
  # src/thordata/parameters.py
2
2
 
3
- from typing import Dict, Any, Optional
3
+ from typing import Any, Dict
4
+
4
5
 
5
6
  def normalize_serp_params(engine: str, query: str, **kwargs) -> Dict[str, Any]:
6
7
  """
7
8
  Normalizes parameters across different search engines to ensure a unified API surface.
8
-
9
+
9
10
  Args:
10
11
  engine (str): The search engine to use (e.g., 'google', 'yandex').
11
12
  query (str): The search query string.
@@ -17,7 +18,7 @@ def normalize_serp_params(engine: str, query: str, **kwargs) -> Dict[str, Any]:
17
18
  # 1. Base parameters
18
19
  payload = {
19
20
  "num": str(kwargs.get("num", 10)), # Default to 10 results
20
- "json": "1", # Force JSON response
21
+ "json": "1", # Force JSON response
21
22
  "engine": engine,
22
23
  }
23
24
 
@@ -29,14 +30,14 @@ def normalize_serp_params(engine: str, query: str, **kwargs) -> Dict[str, Any]:
29
30
  payload["url"] = "yandex.com"
30
31
  else:
31
32
  payload["q"] = query
32
-
33
+
33
34
  # 3. Handle Default URLs for other engines
34
35
  if "url" not in kwargs:
35
36
  defaults = {
36
37
  "google": "google.com",
37
38
  "bing": "bing.com",
38
39
  "duckduckgo": "duckduckgo.com",
39
- "baidu": "baidu.com"
40
+ "baidu": "baidu.com",
40
41
  }
41
42
  if engine in defaults:
42
43
  payload["url"] = defaults[engine]
@@ -49,4 +50,4 @@ def normalize_serp_params(engine: str, query: str, **kwargs) -> Dict[str, Any]:
49
50
  if key not in protected_keys:
50
51
  payload[key] = value
51
52
 
52
- return payload
53
+ return payload
thordata/retry.py CHANGED
@@ -6,9 +6,9 @@ in API requests, with support for exponential backoff and jitter.
6
6
 
7
7
  Example:
8
8
  >>> from thordata.retry import RetryConfig, with_retry
9
- >>>
9
+ >>>
10
10
  >>> config = RetryConfig(max_retries=3, backoff_factor=1.0)
11
- >>>
11
+ >>>
12
12
  >>> @with_retry(config)
13
13
  >>> def make_request():
14
14
  ... return requests.get("https://api.example.com")
@@ -16,33 +16,28 @@ Example:
16
16
 
17
17
  from __future__ import annotations
18
18
 
19
- import time
20
- import random
21
19
  import logging
20
+ import random
21
+ import time
22
22
  from dataclasses import dataclass, field
23
- from typing import (
24
- Callable, TypeVar, Set, Optional, Union, Tuple, Any
25
- )
26
23
  from functools import wraps
24
+ from typing import Any, Callable, Optional, Set, Tuple
27
25
 
28
26
  from .exceptions import (
29
- ThordataError,
30
27
  ThordataNetworkError,
31
- ThordataServerError,
32
28
  ThordataRateLimitError,
29
+ ThordataServerError,
33
30
  is_retryable_exception,
34
31
  )
35
32
 
36
33
  logger = logging.getLogger(__name__)
37
34
 
38
- T = TypeVar("T")
39
-
40
35
 
41
36
  @dataclass
42
37
  class RetryConfig:
43
38
  """
44
39
  Configuration for retry behavior.
45
-
40
+
46
41
  Attributes:
47
42
  max_retries: Maximum number of retry attempts (default: 3).
48
43
  backoff_factor: Multiplier for exponential backoff (default: 1.0).
@@ -52,7 +47,7 @@ class RetryConfig:
52
47
  jitter_factor: Maximum jitter as fraction of wait time (default: 0.1).
53
48
  retry_on_status_codes: HTTP status codes to retry on.
54
49
  retry_on_exceptions: Exception types to retry on.
55
-
50
+
56
51
  Example:
57
52
  >>> config = RetryConfig(
58
53
  ... max_retries=5,
@@ -60,18 +55,18 @@ class RetryConfig:
60
55
  ... max_backoff=120
61
56
  ... )
62
57
  """
63
-
58
+
64
59
  max_retries: int = 3
65
60
  backoff_factor: float = 1.0
66
61
  max_backoff: float = 60.0
67
62
  jitter: bool = True
68
63
  jitter_factor: float = 0.1
69
-
64
+
70
65
  # Status codes to retry on (5xx server errors + 429 rate limit)
71
66
  retry_on_status_codes: Set[int] = field(
72
- default_factory=lambda: {429, 500, 502, 503, 504}
67
+ default_factory=lambda: {300, 429, 500, 502, 503, 504}
73
68
  )
74
-
69
+
75
70
  # Exception types to always retry on
76
71
  retry_on_exceptions: Tuple[type, ...] = field(
77
72
  default_factory=lambda: (
@@ -79,64 +74,61 @@ class RetryConfig:
79
74
  ThordataServerError,
80
75
  )
81
76
  )
82
-
77
+
83
78
  def calculate_delay(self, attempt: int) -> float:
84
79
  """
85
80
  Calculate the delay before the next retry attempt.
86
-
81
+
87
82
  Args:
88
83
  attempt: Current attempt number (0-indexed).
89
-
84
+
90
85
  Returns:
91
86
  Delay in seconds.
92
87
  """
93
88
  # Exponential backoff
94
- delay = self.backoff_factor * (2 ** attempt)
95
-
89
+ delay = self.backoff_factor * (2**attempt)
90
+
96
91
  # Apply maximum cap
97
92
  delay = min(delay, self.max_backoff)
98
-
93
+
99
94
  # Add jitter if enabled
100
95
  if self.jitter:
101
96
  jitter_range = delay * self.jitter_factor
102
97
  delay += random.uniform(-jitter_range, jitter_range)
103
98
  delay = max(0.1, delay) # Ensure positive delay
104
-
99
+
105
100
  return delay
106
-
101
+
107
102
  def should_retry(
108
- self,
109
- exception: Exception,
110
- attempt: int,
111
- status_code: Optional[int] = None
103
+ self, exception: Exception, attempt: int, status_code: Optional[int] = None
112
104
  ) -> bool:
113
105
  """
114
106
  Determine if a request should be retried.
115
-
107
+
116
108
  Args:
117
109
  exception: The exception that was raised.
118
110
  attempt: Current attempt number.
119
111
  status_code: HTTP status code if available.
120
-
112
+
121
113
  Returns:
122
114
  True if the request should be retried.
123
115
  """
124
116
  # Check if we've exceeded max retries
125
117
  if attempt >= self.max_retries:
126
118
  return False
127
-
119
+
128
120
  # Check status code
129
121
  if status_code and status_code in self.retry_on_status_codes:
130
122
  return True
131
-
123
+
132
124
  # Check exception type
133
125
  if isinstance(exception, self.retry_on_exceptions):
134
126
  return True
135
-
127
+
136
128
  # Check rate limit with retry_after
137
129
  if isinstance(exception, ThordataRateLimitError):
138
130
  return True
139
-
131
+
140
132
  # Use generic retryable check
141
133
  return is_retryable_exception(exception)
142
134
 
@@ -144,23 +136,23 @@ class RetryConfig:
144
136
  def with_retry(
145
137
  config: Optional[RetryConfig] = None,
146
138
  on_retry: Optional[Callable[[int, Exception, float], None]] = None,
147
- ) -> Callable[[Callable[..., T]], Callable[..., T]]:
139
+ ) -> Callable:
148
140
  """
149
141
  Decorator to add retry logic to a function.
150
-
142
+
151
143
  Args:
152
144
  config: Retry configuration. Uses defaults if not provided.
153
145
  on_retry: Optional callback called before each retry.
154
146
  Receives (attempt, exception, delay).
155
-
147
+
156
148
  Returns:
157
149
  Decorated function with retry logic.
158
-
150
+
159
151
  Example:
160
152
  >>> @with_retry(RetryConfig(max_retries=3))
161
153
  ... def fetch_data():
162
154
  ... return requests.get("https://api.example.com")
163
-
155
+
164
156
  >>> @with_retry()
165
157
  ... async def async_fetch():
166
158
  ... async with aiohttp.ClientSession() as session:
@@ -168,122 +160,132 @@ def with_retry(
168
160
  """
169
161
  if config is None:
170
162
  config = RetryConfig()
171
-
172
- def decorator(func: Callable[..., T]) -> Callable[..., T]:
163
+
164
+ def decorator(func: Callable) -> Callable:
173
165
  @wraps(func)
174
- def sync_wrapper(*args: Any, **kwargs: Any) -> T:
166
+ def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
175
167
  last_exception: Optional[Exception] = None
176
-
168
+
177
169
  for attempt in range(config.max_retries + 1):
178
170
  try:
179
171
  return func(*args, **kwargs)
180
172
  except Exception as e:
181
173
  last_exception = e
182
-
183
- # Extract status code if available
174
+
184
175
  status_code = _extract_status_code(e)
185
-
176
+
186
177
  if not config.should_retry(e, attempt, status_code):
187
178
  raise
188
-
179
+
189
180
  delay = config.calculate_delay(attempt)
190
-
191
- # Handle rate limit retry_after
181
+
192
182
  if isinstance(e, ThordataRateLimitError) and e.retry_after:
193
183
  delay = max(delay, e.retry_after)
194
-
184
+
195
185
  logger.warning(
196
186
  f"Retry attempt {attempt + 1}/{config.max_retries} "
197
187
  f"after {delay:.2f}s due to: {e}"
198
188
  )
199
-
189
+
200
190
  if on_retry:
201
191
  on_retry(attempt, e, delay)
202
-
192
+
203
193
  time.sleep(delay)
204
-
205
- # Should not reach here, but just in case
206
- raise last_exception # type: ignore
207
-
194
+
195
+ if last_exception:
196
+ raise last_exception
197
+ raise RuntimeError("Unexpected retry loop exit")
198
+
208
199
  @wraps(func)
209
- async def async_wrapper(*args: Any, **kwargs: Any) -> T:
200
+ async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
210
201
  import asyncio
211
-
202
+
212
203
  last_exception: Optional[Exception] = None
213
-
204
+
214
205
  for attempt in range(config.max_retries + 1):
215
206
  try:
216
207
  return await func(*args, **kwargs)
217
208
  except Exception as e:
218
209
  last_exception = e
219
-
210
+
220
211
  status_code = _extract_status_code(e)
221
-
212
+
222
213
  if not config.should_retry(e, attempt, status_code):
223
214
  raise
224
-
215
+
225
216
  delay = config.calculate_delay(attempt)
226
-
217
+
227
218
  if isinstance(e, ThordataRateLimitError) and e.retry_after:
228
219
  delay = max(delay, e.retry_after)
229
-
220
+
230
221
  logger.warning(
231
222
  f"Async retry attempt {attempt + 1}/{config.max_retries} "
232
223
  f"after {delay:.2f}s due to: {e}"
233
224
  )
234
-
225
+
235
226
  if on_retry:
236
227
  on_retry(attempt, e, delay)
237
-
228
+
238
229
  await asyncio.sleep(delay)
239
-
240
- raise last_exception # type: ignore
241
-
230
+
231
+ if last_exception:
232
+ raise last_exception
233
+ raise RuntimeError("Unexpected retry loop exit")
234
+
242
235
  # Check if the function is async
243
236
  import asyncio
237
+
244
238
  if asyncio.iscoroutinefunction(func):
245
- return async_wrapper # type: ignore
239
+ return async_wrapper
246
240
  return sync_wrapper
247
-
241
+
248
242
  return decorator
249
243
 
250
244
 
251
245
  def _extract_status_code(exception: Exception) -> Optional[int]:
252
246
  """
253
247
  Extract HTTP status code from various exception types.
254
-
248
+
255
249
  Args:
256
250
  exception: The exception to extract from.
257
-
251
+
258
252
  Returns:
259
253
  HTTP status code if found, None otherwise.
260
254
  """
255
+ # Unwrap nested/original errors (e.g., ThordataNetworkError(original_error=...))
256
+ if hasattr(exception, "original_error") and getattr(exception, "original_error"):
257
+ nested = getattr(exception, "original_error")
258
+ if isinstance(nested, Exception):
259
+ nested_code = _extract_status_code(nested)
260
+ if nested_code is not None:
261
+ return nested_code
262
+
261
263
  # Check Thordata exceptions
262
264
  if hasattr(exception, "status_code"):
263
265
  return exception.status_code
264
266
  if hasattr(exception, "code"):
265
267
  return exception.code
266
-
268
+
267
269
  # Check requests exceptions
268
270
  if hasattr(exception, "response"):
269
271
  response = exception.response
270
272
  if response is not None and hasattr(response, "status_code"):
271
273
  return response.status_code
272
-
274
+
273
275
  # Check aiohttp exceptions
274
276
  if hasattr(exception, "status"):
275
277
  return exception.status
276
-
278
+
277
279
  return None
278
280
 
279
281
 
280
282
  class RetryableRequest:
281
283
  """
282
284
  Context manager for retryable requests with detailed control.
283
-
285
+
284
286
  This provides more control than the decorator approach, allowing
285
287
  you to check retry status during execution.
286
-
288
+
287
289
  Example:
288
290
  >>> config = RetryConfig(max_retries=3)
289
291
  >>> with RetryableRequest(config) as retry:
@@ -297,86 +299,82 @@ class RetryableRequest:
297
299
  ... raise
298
300
  ... retry.wait()
299
301
  """
300
-
302
+
301
303
  def __init__(self, config: Optional[RetryConfig] = None) -> None:
302
304
  self.config = config or RetryConfig()
303
305
  self.attempt = 0
304
306
  self.last_exception: Optional[Exception] = None
305
-
306
- def __enter__(self) -> "RetryableRequest":
307
+
308
+ def __enter__(self) -> RetryableRequest:
307
309
  return self
308
-
309
- def __exit__(self, exc_type, exc_val, exc_tb) -> bool:
310
- return False
311
-
310
+
311
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
312
+ pass
313
+
312
314
  def should_continue(
313
- self,
314
- exception: Exception,
315
- status_code: Optional[int] = None
315
+ self, exception: Exception, status_code: Optional[int] = None
316
316
  ) -> bool:
317
317
  """
318
318
  Check if we should continue retrying.
319
-
319
+
320
320
  Args:
321
321
  exception: The exception that occurred.
322
322
  status_code: HTTP status code if available.
323
-
323
+
324
324
  Returns:
325
325
  True if we should retry, False otherwise.
326
326
  """
327
327
  self.last_exception = exception
328
-
328
+
329
329
  if status_code is None:
330
330
  status_code = _extract_status_code(exception)
331
-
332
- should_retry = self.config.should_retry(
333
- exception, self.attempt, status_code
334
- )
335
-
331
+
332
+ should_retry = self.config.should_retry(exception, self.attempt, status_code)
333
+
336
334
  if should_retry:
337
335
  self.attempt += 1
338
-
336
+
339
337
  return should_retry
340
-
338
+
341
339
  def wait(self) -> float:
342
340
  """
343
341
  Wait before the next retry attempt.
344
-
342
+
345
343
  Returns:
346
344
  The actual delay used.
347
345
  """
348
346
  delay = self.config.calculate_delay(self.attempt - 1)
349
-
347
+
350
348
  # Handle rate limit retry_after
351
349
  if (
352
- isinstance(self.last_exception, ThordataRateLimitError)
350
+ isinstance(self.last_exception, ThordataRateLimitError)
353
351
  and self.last_exception.retry_after
354
352
  ):
355
353
  delay = max(delay, self.last_exception.retry_after)
356
-
354
+
357
355
  logger.debug(f"Waiting {delay:.2f}s before retry {self.attempt}")
358
356
  time.sleep(delay)
359
-
357
+
360
358
  return delay
361
-
359
+
362
360
  async def async_wait(self) -> float:
363
361
  """
364
362
  Async version of wait().
365
-
363
+
366
364
  Returns:
367
365
  The actual delay used.
368
366
  """
369
367
  import asyncio
370
-
368
+
371
369
  delay = self.config.calculate_delay(self.attempt - 1)
372
-
370
+
373
371
  if (
374
372
  isinstance(self.last_exception, ThordataRateLimitError)
375
373
  and self.last_exception.retry_after
376
374
  ):
377
375
  delay = max(delay, self.last_exception.retry_after)
378
-
376
+
379
377
  logger.debug(f"Async waiting {delay:.2f}s before retry {self.attempt}")
380
378
  await asyncio.sleep(delay)
381
-
382
- return delay
379
+
380
+ return delay