thordata-sdk 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
thordata/__init__.py CHANGED
@@ -1,16 +1,138 @@
1
- # src/thordata/__init__.py
1
+ """
2
+ Thordata Python SDK
2
3
 
4
+ Official Python client for Thordata's Proxy Network, SERP API,
5
+ Universal Scraping API (Web Unlocker), and Web Scraper API.
6
+
7
+ Basic Usage:
8
+ >>> from thordata import ThordataClient
9
+ >>>
10
+ >>> client = ThordataClient(
11
+ ... scraper_token="your_token",
12
+ ... public_token="your_public_token",
13
+ ... public_key="your_public_key"
14
+ ... )
15
+ >>>
16
+ >>> # Proxy request
17
+ >>> response = client.get("https://httpbin.org/ip")
18
+ >>>
19
+ >>> # SERP search
20
+ >>> results = client.serp_search("python tutorial", engine="google")
21
+ >>>
22
+ >>> # Universal scrape
23
+ >>> html = client.universal_scrape("https://example.com", js_render=True)
24
+
25
+ Async Usage:
26
+ >>> from thordata import AsyncThordataClient
27
+ >>> import asyncio
28
+ >>>
29
+ >>> async def main():
30
+ ... async with AsyncThordataClient(
31
+ ... scraper_token="your_token"
32
+ ... ) as client:
33
+ ... response = await client.get("https://httpbin.org/ip")
34
+ >>>
35
+ >>> asyncio.run(main())
36
+ """
37
+
38
+ __version__ = "0.4.0"
39
+ __author__ = "Thordata Developer Team"
40
+ __email__ = "support@thordata.com"
41
+
42
+ # Main clients
3
43
  from .client import ThordataClient
4
44
  from .async_client import AsyncThordataClient
5
- from .enums import Engine, GoogleSearchType
6
45
 
7
- # Package version
8
- __version__ = "0.3.1"
46
+ # Enums
47
+ from .enums import (
48
+ Engine,
49
+ GoogleSearchType,
50
+ BingSearchType,
51
+ ProxyType,
52
+ SessionType,
53
+ Continent,
54
+ Country,
55
+ OutputFormat,
56
+ DataFormat,
57
+ TaskStatus,
58
+ Device,
59
+ TimeRange,
60
+ ProxyHost,
61
+ ProxyPort,
62
+ )
63
+
64
+ # Models
65
+ from .models import (
66
+ ProxyConfig,
67
+ ProxyProduct,
68
+ StickySession,
69
+ SerpRequest,
70
+ UniversalScrapeRequest,
71
+ ScraperTaskConfig,
72
+ TaskStatusResponse,
73
+ )
74
+
75
+ # Exceptions
76
+ from .exceptions import (
77
+ ThordataError,
78
+ ThordataConfigError,
79
+ ThordataNetworkError,
80
+ ThordataTimeoutError,
81
+ ThordataAPIError,
82
+ ThordataAuthError,
83
+ ThordataRateLimitError,
84
+ ThordataServerError,
85
+ ThordataValidationError,
86
+ )
87
+
88
+ # Retry utilities
89
+ from .retry import RetryConfig
9
90
 
10
- # Explicitly export classes to simplify user imports
91
+ # Public API
11
92
  __all__ = [
12
- "ThordataClient",
13
- "AsyncThordataClient",
14
- "Engine",
15
- "GoogleSearchType"
93
+ # Version
94
+ "__version__",
95
+
96
+ # Clients
97
+ "ThordataClient",
98
+ "AsyncThordataClient",
99
+
100
+ # Enums
101
+ "Engine",
102
+ "GoogleSearchType",
103
+ "BingSearchType",
104
+ "ProxyType",
105
+ "SessionType",
106
+ "Continent",
107
+ "Country",
108
+ "OutputFormat",
109
+ "DataFormat",
110
+ "TaskStatus",
111
+ "Device",
112
+ "TimeRange",
113
+ "ProxyHost",
114
+ "ProxyPort",
115
+
116
+ # Models
117
+ "ProxyConfig",
118
+ "ProxyProduct",
119
+ "StickySession",
120
+ "SerpRequest",
121
+ "UniversalScrapeRequest",
122
+ "ScraperTaskConfig",
123
+ "TaskStatusResponse",
124
+
125
+ # Exceptions
126
+ "ThordataError",
127
+ "ThordataConfigError",
128
+ "ThordataNetworkError",
129
+ "ThordataTimeoutError",
130
+ "ThordataAPIError",
131
+ "ThordataAuthError",
132
+ "ThordataRateLimitError",
133
+ "ThordataServerError",
134
+ "ThordataValidationError",
135
+
136
+ # Retry
137
+ "RetryConfig",
16
138
  ]
thordata/_utils.py ADDED
@@ -0,0 +1,126 @@
1
+ """
2
+ Internal utility functions for the Thordata Python SDK.
3
+
4
+ These are not part of the public API and may change without notice.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import base64
10
+ import json
11
+ import logging
12
+ from typing import Any, Dict, Optional, Union
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def parse_json_response(data: Any) -> Any:
18
+ """
19
+ Parse a response that might be double-encoded JSON.
20
+
21
+ Some API endpoints return JSON as a string inside JSON.
22
+
23
+ Args:
24
+ data: The response data to parse.
25
+
26
+ Returns:
27
+ Parsed data.
28
+ """
29
+ if isinstance(data, str):
30
+ try:
31
+ return json.loads(data)
32
+ except json.JSONDecodeError:
33
+ return data
34
+ return data
35
+
36
+
37
+ def decode_base64_image(png_str: str) -> bytes:
38
+ """
39
+ Decode a base64-encoded PNG image.
40
+
41
+ Handles Data URI scheme (data:image/png;base64,...) and fixes padding.
42
+
43
+ Args:
44
+ png_str: Base64-encoded string, possibly with Data URI prefix.
45
+
46
+ Returns:
47
+ Decoded PNG bytes.
48
+
49
+ Raises:
50
+ ValueError: If the string is empty or cannot be decoded.
51
+ """
52
+ if not png_str:
53
+ raise ValueError("Empty PNG data received")
54
+
55
+ # Remove Data URI scheme if present
56
+ if "," in png_str:
57
+ png_str = png_str.split(",", 1)[1]
58
+
59
+ # Clean up whitespace
60
+ png_str = png_str.replace("\n", "").replace("\r", "").replace(" ", "")
61
+
62
+ # Fix Base64 padding
63
+ missing_padding = len(png_str) % 4
64
+ if missing_padding:
65
+ png_str += "=" * (4 - missing_padding)
66
+
67
+ try:
68
+ return base64.b64decode(png_str)
69
+ except Exception as e:
70
+ raise ValueError(f"Failed to decode base64 image: {e}")
71
+
72
+
73
+ def build_auth_headers(token: str) -> Dict[str, str]:
74
+ """
75
+ Build authorization headers for API requests.
76
+
77
+ Args:
78
+ token: The scraper token.
79
+
80
+ Returns:
81
+ Headers dict with Authorization and Content-Type.
82
+ """
83
+ return {
84
+ "Authorization": f"Bearer {token}",
85
+ "Content-Type": "application/x-www-form-urlencoded",
86
+ }
87
+
88
+
89
+ def build_public_api_headers(public_token: str, public_key: str) -> Dict[str, str]:
90
+ """
91
+ Build headers for public API requests (task status, locations, etc.)
92
+
93
+ Args:
94
+ public_token: The public API token.
95
+ public_key: The public API key.
96
+
97
+ Returns:
98
+ Headers dict with token, key, and Content-Type.
99
+ """
100
+ return {
101
+ "token": public_token,
102
+ "key": public_key,
103
+ "Content-Type": "application/x-www-form-urlencoded",
104
+ }
105
+
106
+
107
+ def extract_error_message(payload: Any) -> str:
108
+ """
109
+ Extract a human-readable error message from an API response.
110
+
111
+ Args:
112
+ payload: The API response payload.
113
+
114
+ Returns:
115
+ Error message string.
116
+ """
117
+ if isinstance(payload, dict):
118
+ # Try common error message fields
119
+ for key in ("msg", "message", "error", "detail", "description"):
120
+ if key in payload:
121
+ return str(payload[key])
122
+
123
+ # Fall back to full payload
124
+ return str(payload)
125
+
126
+ return str(payload)