PyPI - thordata-sdk - Versions diffs - 0.2.4__py3-none-any.whl → 1.2.0__py3-none-any.whl - Mend

thordata-sdk 0.2.4py3-none-any.whl → 1.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

thordata/__init__.py +151 -0
thordata/_example_utils.py +77 -0
thordata/_utils.py +190 -0
thordata/async_client.py +1675 -0
thordata/client.py +1644 -0
thordata/demo.py +138 -0
thordata/enums.py +384 -0
thordata/exceptions.py +355 -0
thordata/models.py +1197 -0
thordata/retry.py +382 -0
thordata/serp_engines.py +166 -0
thordata_sdk-1.2.0.dist-info/METADATA +208 -0
thordata_sdk-1.2.0.dist-info/RECORD +16 -0
{thordata_sdk-0.2.4.dist-info → thordata_sdk-1.2.0.dist-info}/WHEEL +1 -1
thordata_sdk-1.2.0.dist-info/licenses/LICENSE +21 -0
thordata_sdk-1.2.0.dist-info/top_level.txt +1 -0
thordata_sdk/__init__.py +0 -9
thordata_sdk/async_client.py +0 -247
thordata_sdk/client.py +0 -303
thordata_sdk/enums.py +0 -20
thordata_sdk/parameters.py +0 -41
thordata_sdk-0.2.4.dist-info/LICENSE +0 -201
thordata_sdk-0.2.4.dist-info/METADATA +0 -113
thordata_sdk-0.2.4.dist-info/RECORD +0 -10
thordata_sdk-0.2.4.dist-info/top_level.txt +0 -1

thordata/retry.py ADDED Viewed

@@ -0,0 +1,382 @@
+"""
+Retry mechanism for the Thordata Python SDK.
+This module provides configurable retry logic for handling transient failures
+in API requests, with support for exponential backoff and jitter.
+Example:
+    >>> from thordata.retry import RetryConfig, with_retry
+    >>>
+    >>> config = RetryConfig(max_retries=3, backoff_factor=1.0)
+    >>>
+    >>> @with_retry(config)
+    >>> def make_request():
+    ...     return requests.get("https://api.example.com")
+"""
+from __future__ import annotations
+import inspect
+import logging
+import random
+import time
+from dataclasses import dataclass, field
+from functools import wraps
+from typing import Any, Callable
+from .exceptions import (
+    ThordataNetworkError,
+    ThordataRateLimitError,
+    ThordataServerError,
+    is_retryable_exception,
+)
+logger = logging.getLogger(__name__)
+@dataclass
+class RetryConfig:
+    """
+    Configuration for retry behavior.
+    Attributes:
+        max_retries: Maximum number of retry attempts (default: 3).
+        backoff_factor: Multiplier for exponential backoff (default: 1.0).
+            Wait time = backoff_factor * (2 ** attempt_number)
+        max_backoff: Maximum wait time in seconds (default: 60).
+        jitter: Add random jitter to prevent thundering herd (default: True).
+        jitter_factor: Maximum jitter as fraction of wait time (default: 0.1).
+        retry_on_status_codes: HTTP status codes to retry on.
+        retry_on_exceptions: Exception types to retry on.
+    Example:
+        >>> config = RetryConfig(
+        ...     max_retries=5,
+        ...     backoff_factor=2.0,
+        ...     max_backoff=120
+        ... )
+    """
+    max_retries: int = 3
+    backoff_factor: float = 1.0
+    max_backoff: float = 60.0
+    jitter: bool = True
+    jitter_factor: float = 0.1
+    # Status codes to retry on (5xx server errors + 429 rate limit)
+    retry_on_status_codes: set[int] = field(
+        default_factory=lambda: {429, 500, 502, 503, 504}
+    )
+    retry_on_api_codes: set[int] = field(
+        default_factory=lambda: {300}  # API response body code
+    )
+    # Exception types to always retry on
+    retry_on_exceptions: tuple[type, ...] = field(
+        default_factory=lambda: (
+            ThordataNetworkError,
+            ThordataServerError,
+        )
+    )
+    def calculate_delay(self, attempt: int) -> float:
+        """
+        Calculate the delay before the next retry attempt.
+        Args:
+            attempt: Current attempt number (0-indexed).
+        Returns:
+            Delay in seconds.
+        """
+        # Exponential backoff
+        delay = self.backoff_factor * (2**attempt)
+        # Apply maximum cap
+        delay = min(delay, self.max_backoff)
+        # Add jitter if enabled
+        if self.jitter:
+            jitter_range = delay * self.jitter_factor
+            delay += random.uniform(-jitter_range, jitter_range)
+            delay = max(0.1, delay)  # Ensure positive delay
+        return delay
+    def should_retry(
+        self, exception: Exception, attempt: int, status_code: int | None = None
+    ) -> bool:
+        """
+        Determine if a request should be retried.
+        Args:
+            exception: The exception that was raised.
+            attempt: Current attempt number.
+            status_code: HTTP status code if available.
+        Returns:
+            True if the request should be retried.
+        """
+        # Check if we've exceeded max retries
+        if attempt >= self.max_retries:
+            return False
+        # Check status code
+        if status_code and status_code in self.retry_on_status_codes:
+            return True
+        # Check exception type
+        if isinstance(exception, self.retry_on_exceptions):
+            return True
+        # Check rate limit with retry_after
+        if isinstance(exception, ThordataRateLimitError):
+            return True
+        # Use generic retryable check
+        return is_retryable_exception(exception)
+def with_retry(
+    config: RetryConfig | None = None,
+    on_retry: Callable[[int, Exception, float], None] | None = None,
+) -> Callable:
+    """
+    Decorator to add retry logic to a function.
+    Args:
+        config: Retry configuration. Uses defaults if not provided.
+        on_retry: Optional callback called before each retry.
+            Receives (attempt, exception, delay).
+    Returns:
+        Decorated function with retry logic.
+    Example:
+        >>> @with_retry(RetryConfig(max_retries=3))
+        ... def fetch_data():
+        ...     return requests.get("https://api.example.com")
+        >>> @with_retry()
+        ... async def async_fetch():
+        ...     async with aiohttp.ClientSession() as session:
+        ...         return await session.get("https://api.example.com")
+    """
+    if config is None:
+        config = RetryConfig()
+    def decorator(func: Callable) -> Callable:
+        @wraps(func)
+        def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
+            last_exception: Exception | None = None
+            for attempt in range(config.max_retries + 1):
+                try:
+                    return func(*args, **kwargs)
+                except Exception as e:
+                    last_exception = e
+                    status_code = _extract_status_code(e)
+                    if not config.should_retry(e, attempt, status_code):
+                        raise
+                    delay = config.calculate_delay(attempt)
+                    if isinstance(e, ThordataRateLimitError) and e.retry_after:
+                        delay = max(delay, e.retry_after)
+                    logger.warning(
+                        f"Retry attempt {attempt + 1}/{config.max_retries} "
+                        f"after {delay:.2f}s due to: {e}"
+                    )
+                    if on_retry:
+                        on_retry(attempt, e, delay)
+                    time.sleep(delay)
+            if last_exception:
+                raise last_exception
+            raise RuntimeError("Unexpected retry loop exit")
+        @wraps(func)
+        async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
+            last_exception: Exception | None = None
+            for attempt in range(config.max_retries + 1):
+                try:
+                    return await func(*args, **kwargs)
+                except Exception as e:
+                    last_exception = e
+                    status_code = _extract_status_code(e)
+                    if not config.should_retry(e, attempt, status_code):
+                        raise
+                    delay = config.calculate_delay(attempt)
+                    if isinstance(e, ThordataRateLimitError) and e.retry_after:
+                        delay = max(delay, e.retry_after)
+                    logger.warning(
+                        f"Async retry attempt {attempt + 1}/{config.max_retries} "
+                        f"after {delay:.2f}s due to: {e}"
+                    )
+                    if on_retry:
+                        on_retry(attempt, e, delay)
+                    await asyncio.sleep(delay)
+            if last_exception:
+                raise last_exception
+            raise RuntimeError("Unexpected retry loop exit")
+        # Check if the function is async
+        import asyncio
+        if inspect.iscoroutinefunction(func):
+            return async_wrapper
+        return sync_wrapper
+    return decorator
+def _extract_status_code(exception: Exception) -> int | None:
+    """
+    Extract HTTP status code from various exception types.
+    Args:
+        exception: The exception to extract from.
+    Returns:
+        HTTP status code if found, None otherwise.
+    """
+    # Unwrap nested/original errors (e.g., ThordataNetworkError(original_error=...))
+    if hasattr(exception, "original_error") and exception.original_error:
+        nested = exception.original_error
+        if isinstance(nested, Exception):
+            nested_code = _extract_status_code(nested)
+            if nested_code is not None:
+                return nested_code
+    # Check Thordata exceptions
+    if hasattr(exception, "status_code"):
+        return exception.status_code
+    if hasattr(exception, "code"):
+        return exception.code
+    # Check requests exceptions
+    if hasattr(exception, "response"):
+        response = exception.response
+        if response is not None and hasattr(response, "status_code"):
+            return response.status_code
+    # Check aiohttp exceptions
+    if hasattr(exception, "status"):
+        return exception.status
+    return None
+class RetryableRequest:
+    """
+    Context manager for retryable requests with detailed control.
+    This provides more control than the decorator approach, allowing
+    you to check retry status during execution.
+    Example:
+        >>> config = RetryConfig(max_retries=3)
+        >>> with RetryableRequest(config) as retry:
+        ...     while True:
+        ...         try:
+        ...             response = requests.get("https://api.example.com")
+        ...             response.raise_for_status()
+        ...             break
+        ...         except Exception as e:
+        ...             if not retry.should_continue(e):
+        ...                 raise
+        ...             retry.wait()
+    """
+    def __init__(self, config: RetryConfig | None = None) -> None:
+        self.config = config or RetryConfig()
+        self.attempt = 0
+        self.last_exception: Exception | None = None
+    def __enter__(self) -> RetryableRequest:
+        return self
+    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
+        pass
+    def should_continue(
+        self, exception: Exception, status_code: int | None = None
+    ) -> bool:
+        """
+        Check if we should continue retrying.
+        Args:
+            exception: The exception that occurred.
+            status_code: HTTP status code if available.
+        Returns:
+            True if we should retry, False otherwise.
+        """
+        self.last_exception = exception
+        if status_code is None:
+            status_code = _extract_status_code(exception)
+        should_retry = self.config.should_retry(exception, self.attempt, status_code)
+        if should_retry:
+            self.attempt += 1
+        return should_retry
+    def wait(self) -> float:
+        """
+        Wait before the next retry attempt.
+        Returns:
+            The actual delay used.
+        """
+        delay = self.config.calculate_delay(self.attempt - 1)
+        # Handle rate limit retry_after
+        if (
+            isinstance(self.last_exception, ThordataRateLimitError)
+            and self.last_exception.retry_after
+        ):
+            delay = max(delay, self.last_exception.retry_after)
+        logger.debug(f"Waiting {delay:.2f}s before retry {self.attempt}")
+        time.sleep(delay)
+        return delay
+    async def async_wait(self) -> float:
+        """
+        Async version of wait().
+        Returns:
+            The actual delay used.
+        """
+        import asyncio
+        delay = self.config.calculate_delay(self.attempt - 1)
+        if (
+            isinstance(self.last_exception, ThordataRateLimitError)
+            and self.last_exception.retry_after
+        ):
+            delay = max(delay, self.last_exception.retry_after)
+        logger.debug(f"Async waiting {delay:.2f}s before retry {self.attempt}")
+        await asyncio.sleep(delay)
+        return delay

thordata/serp_engines.py ADDED Viewed

@@ -0,0 +1,166 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any
+if TYPE_CHECKING:
+    from .async_client import AsyncThordataClient
+    from .client import ThordataClient
+# --- Sync Engines ---
+class EngineBase:
+    def __init__(self, client: ThordataClient):
+        self._client = client
+class GoogleEngine(EngineBase):
+    """Namespaced interface for Google features (Sync)."""
+    def search(self, query: str, **kwargs: Any) -> dict[str, Any]:
+        return self._client.serp_search(query, engine="google", **kwargs)
+    def news(self, query: str, **kwargs: Any) -> dict[str, Any]:
+        return self._client.serp_search(query, engine="google_news", **kwargs)
+    def jobs(self, query: str, **kwargs: Any) -> dict[str, Any]:
+        return self._client.serp_search(query, engine="google_jobs", **kwargs)
+    def shopping(
+        self, query: str, product_id: str | None = None, **kwargs: Any
+    ) -> dict[str, Any]:
+        if product_id:
+            kwargs["product_id"] = product_id
+            return self._client.serp_search(query, engine="google_product", **kwargs)
+        return self._client.serp_search(query, engine="google_shopping", **kwargs)
+    def maps(
+        self, query: str, coordinates: str | None = None, **kwargs: Any
+    ) -> dict[str, Any]:
+        if coordinates:
+            kwargs["ll"] = coordinates
+        return self._client.serp_search(query, engine="google_maps", **kwargs)
+    def flights(
+        self,
+        query: str = "",
+        departure_id: str | None = None,
+        arrival_id: str | None = None,
+        outbound_date: str | None = None,
+        return_date: str | None = None,
+        **kwargs: Any,
+    ) -> dict[str, Any]:
+        if departure_id:
+            kwargs["departure_id"] = departure_id
+        if arrival_id:
+            kwargs["arrival_id"] = arrival_id
+        if outbound_date:
+            kwargs["outbound_date"] = outbound_date
+        if return_date:
+            kwargs["return_date"] = return_date
+        return self._client.serp_search(query, engine="google_flights", **kwargs)
+    def patents(self, query: str, **kwargs: Any) -> dict[str, Any]:
+        return self._client.serp_search(query, engine="google_patents", **kwargs)
+    def trends(self, query: str, **kwargs: Any) -> dict[str, Any]:
+        return self._client.serp_search(query, engine="google_trends", **kwargs)
+class BingEngine(EngineBase):
+    def search(self, query: str, **kwargs: Any) -> dict[str, Any]:
+        return self._client.serp_search(query, engine="bing", **kwargs)
+    def news(self, query: str, **kwargs: Any) -> dict[str, Any]:
+        return self._client.serp_search(query, engine="bing_news", **kwargs)
+class SerpNamespace:
+    def __init__(self, client: ThordataClient):
+        self.google = GoogleEngine(client)
+        self.bing = BingEngine(client)
+        self._client = client
+    def search(self, *args, **kwargs):
+        return self._client.serp_search(*args, **kwargs)
+# --- Async Engines ---
+class AsyncEngineBase:
+    def __init__(self, client: AsyncThordataClient):
+        self._client = client
+class AsyncGoogleEngine(AsyncEngineBase):
+    """Namespaced interface for Google features (Async)."""
+    async def search(self, query: str, **kwargs: Any) -> dict[str, Any]:
+        return await self._client.serp_search(query, engine="google", **kwargs)
+    async def news(self, query: str, **kwargs: Any) -> dict[str, Any]:
+        return await self._client.serp_search(query, engine="google_news", **kwargs)
+    async def jobs(self, query: str, **kwargs: Any) -> dict[str, Any]:
+        return await self._client.serp_search(query, engine="google_jobs", **kwargs)
+    async def shopping(
+        self, query: str, product_id: str | None = None, **kwargs: Any
+    ) -> dict[str, Any]:
+        if product_id:
+            kwargs["product_id"] = product_id
+            return await self._client.serp_search(
+                query, engine="google_product", **kwargs
+            )
+        return await self._client.serp_search(query, engine="google_shopping", **kwargs)
+    async def maps(
+        self, query: str, coordinates: str | None = None, **kwargs: Any
+    ) -> dict[str, Any]:
+        if coordinates:
+            kwargs["ll"] = coordinates
+        return await self._client.serp_search(query, engine="google_maps", **kwargs)
+    async def flights(
+        self,
+        query: str = "",
+        departure_id: str | None = None,
+        arrival_id: str | None = None,
+        outbound_date: str | None = None,
+        return_date: str | None = None,
+        **kwargs: Any,
+    ) -> dict[str, Any]:
+        if departure_id:
+            kwargs["departure_id"] = departure_id
+        if arrival_id:
+            kwargs["arrival_id"] = arrival_id
+        if outbound_date:
+            kwargs["outbound_date"] = outbound_date
+        if return_date:
+            kwargs["return_date"] = return_date
+        return await self._client.serp_search(query, engine="google_flights", **kwargs)
+    async def patents(self, query: str, **kwargs: Any) -> dict[str, Any]:
+        return await self._client.serp_search(query, engine="google_patents", **kwargs)
+    async def trends(self, query: str, **kwargs: Any) -> dict[str, Any]:
+        return await self._client.serp_search(query, engine="google_trends", **kwargs)
+class AsyncBingEngine(AsyncEngineBase):
+    async def search(self, query: str, **kwargs: Any) -> dict[str, Any]:
+        return await self._client.serp_search(query, engine="bing", **kwargs)
+    async def news(self, query: str, **kwargs: Any) -> dict[str, Any]:
+        return await self._client.serp_search(query, engine="bing_news", **kwargs)
+class AsyncSerpNamespace:
+    def __init__(self, client: AsyncThordataClient):
+        self.google = AsyncGoogleEngine(client)
+        self.bing = AsyncBingEngine(client)
+        self._client = client
+    async def search(self, *args, **kwargs):
+        return await self._client.serp_search(*args, **kwargs)

thordata-sdk 0.2.4__py3-none-any.whl → 1.2.0__py3-none-any.whl

thordata-sdk 0.2.4py3-none-any.whl → 1.2.0py3-none-any.whl