thordata-sdk 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
thordata/__init__.py CHANGED
@@ -6,83 +6,84 @@ Universal Scraping API (Web Unlocker), and Web Scraper API.
6
6
 
7
7
  Basic Usage:
8
8
  >>> from thordata import ThordataClient
9
- >>>
9
+ >>>
10
10
  >>> client = ThordataClient(
11
11
  ... scraper_token="your_token",
12
12
  ... public_token="your_public_token",
13
13
  ... public_key="your_public_key"
14
14
  ... )
15
- >>>
15
+ >>>
16
16
  >>> # Proxy request
17
17
  >>> response = client.get("https://httpbin.org/ip")
18
- >>>
18
+ >>>
19
19
  >>> # SERP search
20
20
  >>> results = client.serp_search("python tutorial", engine="google")
21
- >>>
21
+ >>>
22
22
  >>> # Universal scrape
23
23
  >>> html = client.universal_scrape("https://example.com", js_render=True)
24
24
 
25
25
  Async Usage:
26
26
  >>> from thordata import AsyncThordataClient
27
27
  >>> import asyncio
28
- >>>
28
+ >>>
29
29
  >>> async def main():
30
30
  ... async with AsyncThordataClient(
31
31
  ... scraper_token="your_token"
32
32
  ... ) as client:
33
33
  ... response = await client.get("https://httpbin.org/ip")
34
- >>>
34
+ >>>
35
35
  >>> asyncio.run(main())
36
36
  """
37
37
 
38
- __version__ = "0.4.0"
38
+ __version__ = "0.5.0"
39
39
  __author__ = "Thordata Developer Team"
40
40
  __email__ = "support@thordata.com"
41
41
 
42
42
  # Main clients
43
- from .client import ThordataClient
44
43
  from .async_client import AsyncThordataClient
44
+ from .client import ThordataClient
45
45
 
46
46
  # Enums
47
47
  from .enums import (
48
- Engine,
49
- GoogleSearchType,
50
48
  BingSearchType,
51
- ProxyType,
52
- SessionType,
53
49
  Continent,
54
50
  Country,
55
- OutputFormat,
56
51
  DataFormat,
57
- TaskStatus,
58
52
  Device,
59
- TimeRange,
53
+ Engine,
54
+ GoogleSearchType,
55
+ OutputFormat,
60
56
  ProxyHost,
61
57
  ProxyPort,
62
- )
63
-
64
- # Models
65
- from .models import (
66
- ProxyConfig,
67
- ProxyProduct,
68
- StickySession,
69
- SerpRequest,
70
- UniversalScrapeRequest,
71
- ScraperTaskConfig,
72
- TaskStatusResponse,
58
+ ProxyType,
59
+ SessionType,
60
+ TaskStatus,
61
+ TimeRange,
73
62
  )
74
63
 
75
64
  # Exceptions
76
65
  from .exceptions import (
77
- ThordataError,
78
- ThordataConfigError,
79
- ThordataNetworkError,
80
- ThordataTimeoutError,
81
66
  ThordataAPIError,
82
67
  ThordataAuthError,
68
+ ThordataConfigError,
69
+ ThordataError,
70
+ ThordataNetworkError,
83
71
  ThordataRateLimitError,
84
72
  ThordataServerError,
73
+ ThordataTimeoutError,
85
74
  ThordataValidationError,
75
+ ThordataNotCollectedError,
76
+ )
77
+
78
+ # Models
79
+ from .models import (
80
+ ProxyConfig,
81
+ ProxyProduct,
82
+ ScraperTaskConfig,
83
+ SerpRequest,
84
+ StickySession,
85
+ TaskStatusResponse,
86
+ UniversalScrapeRequest,
86
87
  )
87
88
 
88
89
  # Retry utilities
@@ -92,11 +93,9 @@ from .retry import RetryConfig
92
93
  __all__ = [
93
94
  # Version
94
95
  "__version__",
95
-
96
96
  # Clients
97
97
  "ThordataClient",
98
98
  "AsyncThordataClient",
99
-
100
99
  # Enums
101
100
  "Engine",
102
101
  "GoogleSearchType",
@@ -112,7 +111,6 @@ __all__ = [
112
111
  "TimeRange",
113
112
  "ProxyHost",
114
113
  "ProxyPort",
115
-
116
114
  # Models
117
115
  "ProxyConfig",
118
116
  "ProxyProduct",
@@ -121,7 +119,6 @@ __all__ = [
121
119
  "UniversalScrapeRequest",
122
120
  "ScraperTaskConfig",
123
121
  "TaskStatusResponse",
124
-
125
122
  # Exceptions
126
123
  "ThordataError",
127
124
  "ThordataConfigError",
@@ -132,7 +129,7 @@ __all__ = [
132
129
  "ThordataRateLimitError",
133
130
  "ThordataServerError",
134
131
  "ThordataValidationError",
135
-
132
+ "ThordataNotCollectedError",
136
133
  # Retry
137
134
  "RetryConfig",
138
- ]
135
+ ]
thordata/_utils.py CHANGED
@@ -9,7 +9,7 @@ from __future__ import annotations
9
9
  import base64
10
10
  import json
11
11
  import logging
12
- from typing import Any, Dict, Optional, Union
12
+ from typing import Any, Dict
13
13
 
14
14
  logger = logging.getLogger(__name__)
15
15
 
@@ -17,12 +17,12 @@ logger = logging.getLogger(__name__)
17
17
  def parse_json_response(data: Any) -> Any:
18
18
  """
19
19
  Parse a response that might be double-encoded JSON.
20
-
20
+
21
21
  Some API endpoints return JSON as a string inside JSON.
22
-
22
+
23
23
  Args:
24
24
  data: The response data to parse.
25
-
25
+
26
26
  Returns:
27
27
  Parsed data.
28
28
  """
@@ -37,33 +37,33 @@ def parse_json_response(data: Any) -> Any:
37
37
  def decode_base64_image(png_str: str) -> bytes:
38
38
  """
39
39
  Decode a base64-encoded PNG image.
40
-
40
+
41
41
  Handles Data URI scheme (data:image/png;base64,...) and fixes padding.
42
-
42
+
43
43
  Args:
44
44
  png_str: Base64-encoded string, possibly with Data URI prefix.
45
-
45
+
46
46
  Returns:
47
47
  Decoded PNG bytes.
48
-
48
+
49
49
  Raises:
50
50
  ValueError: If the string is empty or cannot be decoded.
51
51
  """
52
52
  if not png_str:
53
53
  raise ValueError("Empty PNG data received")
54
-
54
+
55
55
  # Remove Data URI scheme if present
56
56
  if "," in png_str:
57
57
  png_str = png_str.split(",", 1)[1]
58
-
58
+
59
59
  # Clean up whitespace
60
60
  png_str = png_str.replace("\n", "").replace("\r", "").replace(" ", "")
61
-
61
+
62
62
  # Fix Base64 padding
63
63
  missing_padding = len(png_str) % 4
64
64
  if missing_padding:
65
65
  png_str += "=" * (4 - missing_padding)
66
-
66
+
67
67
  try:
68
68
  return base64.b64decode(png_str)
69
69
  except Exception as e:
@@ -73,10 +73,10 @@ def decode_base64_image(png_str: str) -> bytes:
73
73
  def build_auth_headers(token: str) -> Dict[str, str]:
74
74
  """
75
75
  Build authorization headers for API requests.
76
-
76
+
77
77
  Args:
78
78
  token: The scraper token.
79
-
79
+
80
80
  Returns:
81
81
  Headers dict with Authorization and Content-Type.
82
82
  """
@@ -89,11 +89,11 @@ def build_auth_headers(token: str) -> Dict[str, str]:
89
89
  def build_public_api_headers(public_token: str, public_key: str) -> Dict[str, str]:
90
90
  """
91
91
  Build headers for public API requests (task status, locations, etc.)
92
-
92
+
93
93
  Args:
94
94
  public_token: The public API token.
95
95
  public_key: The public API key.
96
-
96
+
97
97
  Returns:
98
98
  Headers dict with token, key, and Content-Type.
99
99
  """
@@ -107,10 +107,10 @@ def build_public_api_headers(public_token: str, public_key: str) -> Dict[str, st
107
107
  def extract_error_message(payload: Any) -> str:
108
108
  """
109
109
  Extract a human-readable error message from an API response.
110
-
110
+
111
111
  Args:
112
112
  payload: The API response payload.
113
-
113
+
114
114
  Returns:
115
115
  Error message string.
116
116
  """
@@ -119,8 +119,8 @@ def extract_error_message(payload: Any) -> str:
119
119
  for key in ("msg", "message", "error", "detail", "description"):
120
120
  if key in payload:
121
121
  return str(payload[key])
122
-
122
+
123
123
  # Fall back to full payload
124
124
  return str(payload)
125
-
126
- return str(payload)
125
+
126
+ return str(payload)