thordata-sdk 1.0.0__tar.gz → 1.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. thordata_sdk-1.1.0/PKG-INFO +271 -0
  2. thordata_sdk-1.1.0/README.md +228 -0
  3. {thordata_sdk-1.0.0 → thordata_sdk-1.1.0}/pyproject.toml +4 -2
  4. {thordata_sdk-1.0.0 → thordata_sdk-1.1.0}/src/thordata/__init__.py +1 -1
  5. {thordata_sdk-1.0.0 → thordata_sdk-1.1.0}/src/thordata/_example_utils.py +3 -2
  6. {thordata_sdk-1.0.0 → thordata_sdk-1.1.0}/src/thordata/_utils.py +20 -17
  7. {thordata_sdk-1.0.0 → thordata_sdk-1.1.0}/src/thordata/async_client.py +80 -79
  8. thordata_sdk-1.1.0/src/thordata/client.py +977 -0
  9. {thordata_sdk-1.0.0 → thordata_sdk-1.1.0}/src/thordata/demo.py +1 -3
  10. {thordata_sdk-1.0.0 → thordata_sdk-1.1.0}/src/thordata/exceptions.py +12 -12
  11. {thordata_sdk-1.0.0 → thordata_sdk-1.1.0}/src/thordata/models.py +67 -70
  12. {thordata_sdk-1.0.0 → thordata_sdk-1.1.0}/src/thordata/retry.py +13 -13
  13. thordata_sdk-1.1.0/src/thordata_sdk.egg-info/PKG-INFO +271 -0
  14. {thordata_sdk-1.0.0 → thordata_sdk-1.1.0}/tests/test_async_client.py +0 -2
  15. {thordata_sdk-1.0.0 → thordata_sdk-1.1.0}/tests/test_async_client_errors.py +4 -4
  16. {thordata_sdk-1.0.0 → thordata_sdk-1.1.0}/tests/test_client_errors.py +20 -16
  17. {thordata_sdk-1.0.0 → thordata_sdk-1.1.0}/tests/test_examples.py +0 -1
  18. {thordata_sdk-1.0.0 → thordata_sdk-1.1.0}/tests/test_user_agent.py +0 -2
  19. thordata_sdk-1.0.0/PKG-INFO +0 -208
  20. thordata_sdk-1.0.0/README.md +0 -165
  21. thordata_sdk-1.0.0/src/thordata/client.py +0 -2012
  22. thordata_sdk-1.0.0/src/thordata_sdk.egg-info/PKG-INFO +0 -208
  23. {thordata_sdk-1.0.0 → thordata_sdk-1.1.0}/LICENSE +0 -0
  24. {thordata_sdk-1.0.0 → thordata_sdk-1.1.0}/setup.cfg +0 -0
  25. {thordata_sdk-1.0.0 → thordata_sdk-1.1.0}/src/thordata/enums.py +0 -0
  26. {thordata_sdk-1.0.0 → thordata_sdk-1.1.0}/src/thordata_sdk.egg-info/SOURCES.txt +0 -0
  27. {thordata_sdk-1.0.0 → thordata_sdk-1.1.0}/src/thordata_sdk.egg-info/dependency_links.txt +0 -0
  28. {thordata_sdk-1.0.0 → thordata_sdk-1.1.0}/src/thordata_sdk.egg-info/requires.txt +0 -0
  29. {thordata_sdk-1.0.0 → thordata_sdk-1.1.0}/src/thordata_sdk.egg-info/top_level.txt +0 -0
  30. {thordata_sdk-1.0.0 → thordata_sdk-1.1.0}/tests/test_client.py +0 -0
  31. {thordata_sdk-1.0.0 → thordata_sdk-1.1.0}/tests/test_enums.py +0 -0
  32. {thordata_sdk-1.0.0 → thordata_sdk-1.1.0}/tests/test_exceptions.py +0 -0
  33. {thordata_sdk-1.0.0 → thordata_sdk-1.1.0}/tests/test_models.py +0 -0
  34. {thordata_sdk-1.0.0 → thordata_sdk-1.1.0}/tests/test_spec_parity.py +0 -0
  35. {thordata_sdk-1.0.0 → thordata_sdk-1.1.0}/tests/test_task_status_and_wait.py +0 -0
@@ -0,0 +1,271 @@
1
+ Metadata-Version: 2.4
2
+ Name: thordata-sdk
3
+ Version: 1.1.0
4
+ Summary: The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network.
5
+ Author-email: Thordata Developer Team <support@thordata.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://www.thordata.com
8
+ Project-URL: Documentation, https://github.com/Thordata/thordata-python-sdk#readme
9
+ Project-URL: Source, https://github.com/Thordata/thordata-python-sdk
10
+ Project-URL: Tracker, https://github.com/Thordata/thordata-python-sdk/issues
11
+ Project-URL: Changelog, https://github.com/Thordata/thordata-python-sdk/blob/main/CHANGELOG.md
12
+ Keywords: web scraping,proxy,residential proxy,datacenter proxy,ai,llm,data-mining,serp,thordata,web scraper,anti-bot bypass
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
16
+ Classifier: Topic :: Internet :: WWW/HTTP
17
+ Classifier: Topic :: Internet :: Proxy Servers
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.9
20
+ Classifier: Programming Language :: Python :: 3.10
21
+ Classifier: Programming Language :: Python :: 3.11
22
+ Classifier: Programming Language :: Python :: 3.12
23
+ Classifier: License :: OSI Approved :: MIT License
24
+ Classifier: Operating System :: OS Independent
25
+ Classifier: Typing :: Typed
26
+ Requires-Python: >=3.9
27
+ Description-Content-Type: text/markdown
28
+ License-File: LICENSE
29
+ Requires-Dist: requests>=2.25.0
30
+ Requires-Dist: aiohttp>=3.9.0
31
+ Provides-Extra: dev
32
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
33
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
34
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
35
+ Requires-Dist: pytest-httpserver>=1.0.0; extra == "dev"
36
+ Requires-Dist: python-dotenv>=1.0.0; extra == "dev"
37
+ Requires-Dist: black>=23.0.0; extra == "dev"
38
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
39
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
40
+ Requires-Dist: types-requests>=2.28.0; extra == "dev"
41
+ Requires-Dist: aioresponses>=0.7.6; extra == "dev"
42
+ Dynamic: license-file
43
+
44
+ # Thordata Python SDK
45
+
46
+ <div align="center">
47
+
48
+ **Official Python Client for Thordata APIs**
49
+
50
+ *Proxy Network • SERP API • Web Unlocker • Web Scraper API*
51
+
52
+ [![PyPI version](https://img.shields.io/pypi/v/thordata-sdk.svg)](https://pypi.org/project/thordata-sdk/)
53
+ [![Python Versions](https://img.shields.io/pypi/pyversions/thordata-sdk.svg)](https://pypi.org/project/thordata-sdk/)
54
+ [![License](https://img.shields.io/badge/license-MIT-green)](LICENSE)
55
+
56
+ </div>
57
+
58
+ ---
59
+
60
+ ## 📦 Installation
61
+
62
+ ```bash
63
+ pip install thordata-sdk
64
+ ```
65
+
66
+ Optional dependencies for Scraping Browser examples:
67
+ ```bash
68
+ pip install playwright
69
+ ```
70
+
71
+ ## 🔐 Configuration
72
+
73
+ Set the following environment variables (recommended):
74
+
75
+ ```bash
76
+ # Required for SERP, Universal, and Proxy Network
77
+ export THORDATA_SCRAPER_TOKEN="your_scraper_token"
78
+
79
+ # Required for Web Scraper Tasks & Account Management
80
+ export THORDATA_PUBLIC_TOKEN="your_public_token"
81
+ export THORDATA_PUBLIC_KEY="your_public_key"
82
+
83
+ # Optional: Default Proxy Credentials (for Proxy Network)
84
+ export THORDATA_RESIDENTIAL_USERNAME="user"
85
+ export THORDATA_RESIDENTIAL_PASSWORD="pass"
86
+ ```
87
+
88
+ ## 🚀 Quick Start
89
+
90
+ ```python
91
+ from thordata import ThordataClient
92
+
93
+ # Initialize (credentials loaded from env)
94
+ client = ThordataClient(scraper_token="...")
95
+
96
+ # 1. SERP Search
97
+ print("--- SERP Search ---")
98
+ results = client.serp_search("python tutorial", engine="google")
99
+ print(f"Title: {results['organic'][0]['title']}")
100
+
101
+ # 2. Universal Scrape (Web Unlocker)
102
+ print("\n--- Universal Scrape ---")
103
+ html = client.universal_scrape("https://httpbin.org/html")
104
+ print(f"HTML Length: {len(html)}")
105
+ ```
106
+
107
+ ## 📚 Core Features
108
+
109
+ ### 🌐 Proxy Network
110
+
111
+ Easily generate proxy URLs with geo-targeting and sticky sessions. The SDK handles connection pooling automatically.
112
+
113
+ ```python
114
+ from thordata import ProxyConfig, ProxyProduct
115
+
116
+ # Create a proxy configuration
117
+ proxy = ProxyConfig(
118
+ username="user",
119
+ password="pass",
120
+ product=ProxyProduct.RESIDENTIAL,
121
+ country="us",
122
+ city="new_york",
123
+ session_id="session123",
124
+ session_duration=10 # Sticky for 10 mins
125
+ )
126
+
127
+ # Use with the client (high performance)
128
+ response = client.get("https://httpbin.org/ip", proxy_config=proxy)
129
+ print(response.json())
130
+
131
+ # Or get the URL string for other libs (requests, scrapy, etc.)
132
+ proxy_url = proxy.build_proxy_url()
133
+ print(f"Proxy URL: {proxy_url}")
134
+ ```
135
+
136
+ ### 🔍 SERP API
137
+
138
+ Real-time search results from Google, Bing, Yandex, etc.
139
+
140
+ ```python
141
+ from thordata import SerpRequest, Engine
142
+
143
+ # Simple
144
+ results = client.serp_search(
145
+ query="pizza near me",
146
+ engine=Engine.GOOGLE_MAPS,
147
+ country="us"
148
+ )
149
+
150
+ # Advanced (Strongly Typed)
151
+ request = SerpRequest(
152
+ query="AI news",
153
+ engine="google_news",
154
+ num=50,
155
+ time_filter="week",
156
+ location="San Francisco",
157
+ render_js=True
158
+ )
159
+ results = client.serp_search_advanced(request)
160
+ ```
161
+
162
+ ### 🔓 Universal Scraping API (Web Unlocker)
163
+
164
+ Bypass Cloudflare, CAPTCHAs, and antibot systems.
165
+
166
+ ```python
167
+ html = client.universal_scrape(
168
+ url="https://example.com/protected",
169
+ js_render=True,
170
+ wait_for=".content",
171
+ country="gb",
172
+ output_format="html"
173
+ )
174
+ ```
175
+
176
+ ### 🕷️ Web Scraper API (Async Tasks)
177
+
178
+ Manage asynchronous scraping tasks for massive scale.
179
+
180
+ ```python
181
+ # 1. Create Task
182
+ task_id = client.create_scraper_task(
183
+ file_name="my_task",
184
+ spider_id="universal",
185
+ spider_name="universal",
186
+ parameters={"url": "https://example.com"}
187
+ )
188
+ print(f"Task Created: {task_id}")
189
+
190
+ # 2. Wait for Completion
191
+ status = client.wait_for_task(task_id, max_wait=600)
192
+
193
+ # 3. Get Result
194
+ if status == "ready":
195
+ download_url = client.get_task_result(task_id)
196
+ print(f"Result: {download_url}")
197
+ ```
198
+
199
+ ### 📹 Video/Audio Tasks
200
+
201
+ Download content from YouTube and other supported platforms.
202
+
203
+ ```python
204
+ from thordata import CommonSettings
205
+
206
+ task_id = client.create_video_task(
207
+ file_name="video_{{VideoID}}",
208
+ spider_id="youtube_video_by-url",
209
+ spider_name="youtube.com",
210
+ parameters={"url": "https://youtube.com/watch?v=..."},
211
+ common_settings=CommonSettings(resolution="1080p")
212
+ )
213
+ ```
214
+
215
+ ### 📊 Account Management
216
+
217
+ Access usage statistics, manage sub-users, and whitelist IPs.
218
+
219
+ ```python
220
+ # Get Usage Stats
221
+ stats = client.get_usage_statistics("2024-01-01", "2024-01-31")
222
+ print(f"Balance: {stats.balance_gb():.2f} GB")
223
+
224
+ # List Proxy Users
225
+ users = client.list_proxy_users()
226
+ print(f"Active Sub-users: {users.user_count}")
227
+
228
+ # Whitelist IP
229
+ client.add_whitelist_ip("1.2.3.4")
230
+ ```
231
+
232
+ ## ⚙️ Advanced Usage
233
+
234
+ ### Async Client
235
+
236
+ For high-concurrency applications, use `AsyncThordataClient`.
237
+
238
+ ```python
239
+ import asyncio
240
+ from thordata import AsyncThordataClient
241
+
242
+ async def main():
243
+ async with AsyncThordataClient(scraper_token="...") as client:
244
+ # SERP
245
+ results = await client.serp_search("async python")
246
+
247
+ # Universal
248
+ html = await client.universal_scrape("https://example.com")
249
+
250
+ asyncio.run(main())
251
+ ```
252
+
253
+ Note: `AsyncThordataClient` does not support HTTPS proxy tunneling (TLS-in-TLS) due to `aiohttp` limitations. For proxy network requests, use the sync client.
254
+
255
+ ### Custom Retry Configuration
256
+
257
+ ```python
258
+ from thordata import RetryConfig
259
+
260
+ retry = RetryConfig(
261
+ max_retries=5,
262
+ backoff_factor=1.5,
263
+ retry_on_status_codes={429, 500, 502, 503, 504}
264
+ )
265
+
266
+ client = ThordataClient(..., retry_config=retry)
267
+ ```
268
+
269
+ ## 📄 License
270
+
271
+ MIT License
@@ -0,0 +1,228 @@
1
+ # Thordata Python SDK
2
+
3
+ <div align="center">
4
+
5
+ **Official Python Client for Thordata APIs**
6
+
7
+ *Proxy Network • SERP API • Web Unlocker • Web Scraper API*
8
+
9
+ [![PyPI version](https://img.shields.io/pypi/v/thordata-sdk.svg)](https://pypi.org/project/thordata-sdk/)
10
+ [![Python Versions](https://img.shields.io/pypi/pyversions/thordata-sdk.svg)](https://pypi.org/project/thordata-sdk/)
11
+ [![License](https://img.shields.io/badge/license-MIT-green)](LICENSE)
12
+
13
+ </div>
14
+
15
+ ---
16
+
17
+ ## 📦 Installation
18
+
19
+ ```bash
20
+ pip install thordata-sdk
21
+ ```
22
+
23
+ Optional dependencies for Scraping Browser examples:
24
+ ```bash
25
+ pip install playwright
26
+ ```
27
+
28
+ ## 🔐 Configuration
29
+
30
+ Set the following environment variables (recommended):
31
+
32
+ ```bash
33
+ # Required for SERP, Universal, and Proxy Network
34
+ export THORDATA_SCRAPER_TOKEN="your_scraper_token"
35
+
36
+ # Required for Web Scraper Tasks & Account Management
37
+ export THORDATA_PUBLIC_TOKEN="your_public_token"
38
+ export THORDATA_PUBLIC_KEY="your_public_key"
39
+
40
+ # Optional: Default Proxy Credentials (for Proxy Network)
41
+ export THORDATA_RESIDENTIAL_USERNAME="user"
42
+ export THORDATA_RESIDENTIAL_PASSWORD="pass"
43
+ ```
44
+
45
+ ## 🚀 Quick Start
46
+
47
+ ```python
48
+ from thordata import ThordataClient
49
+
50
+ # Initialize (credentials loaded from env)
51
+ client = ThordataClient(scraper_token="...")
52
+
53
+ # 1. SERP Search
54
+ print("--- SERP Search ---")
55
+ results = client.serp_search("python tutorial", engine="google")
56
+ print(f"Title: {results['organic'][0]['title']}")
57
+
58
+ # 2. Universal Scrape (Web Unlocker)
59
+ print("\n--- Universal Scrape ---")
60
+ html = client.universal_scrape("https://httpbin.org/html")
61
+ print(f"HTML Length: {len(html)}")
62
+ ```
63
+
64
+ ## 📚 Core Features
65
+
66
+ ### 🌐 Proxy Network
67
+
68
+ Easily generate proxy URLs with geo-targeting and sticky sessions. The SDK handles connection pooling automatically.
69
+
70
+ ```python
71
+ from thordata import ProxyConfig, ProxyProduct
72
+
73
+ # Create a proxy configuration
74
+ proxy = ProxyConfig(
75
+ username="user",
76
+ password="pass",
77
+ product=ProxyProduct.RESIDENTIAL,
78
+ country="us",
79
+ city="new_york",
80
+ session_id="session123",
81
+ session_duration=10 # Sticky for 10 mins
82
+ )
83
+
84
+ # Use with the client (high performance)
85
+ response = client.get("https://httpbin.org/ip", proxy_config=proxy)
86
+ print(response.json())
87
+
88
+ # Or get the URL string for other libs (requests, scrapy, etc.)
89
+ proxy_url = proxy.build_proxy_url()
90
+ print(f"Proxy URL: {proxy_url}")
91
+ ```
92
+
93
+ ### 🔍 SERP API
94
+
95
+ Real-time search results from Google, Bing, Yandex, etc.
96
+
97
+ ```python
98
+ from thordata import SerpRequest, Engine
99
+
100
+ # Simple
101
+ results = client.serp_search(
102
+ query="pizza near me",
103
+ engine=Engine.GOOGLE_MAPS,
104
+ country="us"
105
+ )
106
+
107
+ # Advanced (Strongly Typed)
108
+ request = SerpRequest(
109
+ query="AI news",
110
+ engine="google_news",
111
+ num=50,
112
+ time_filter="week",
113
+ location="San Francisco",
114
+ render_js=True
115
+ )
116
+ results = client.serp_search_advanced(request)
117
+ ```
118
+
119
+ ### 🔓 Universal Scraping API (Web Unlocker)
120
+
121
+ Bypass Cloudflare, CAPTCHAs, and antibot systems.
122
+
123
+ ```python
124
+ html = client.universal_scrape(
125
+ url="https://example.com/protected",
126
+ js_render=True,
127
+ wait_for=".content",
128
+ country="gb",
129
+ output_format="html"
130
+ )
131
+ ```
132
+
133
+ ### 🕷️ Web Scraper API (Async Tasks)
134
+
135
+ Manage asynchronous scraping tasks for massive scale.
136
+
137
+ ```python
138
+ # 1. Create Task
139
+ task_id = client.create_scraper_task(
140
+ file_name="my_task",
141
+ spider_id="universal",
142
+ spider_name="universal",
143
+ parameters={"url": "https://example.com"}
144
+ )
145
+ print(f"Task Created: {task_id}")
146
+
147
+ # 2. Wait for Completion
148
+ status = client.wait_for_task(task_id, max_wait=600)
149
+
150
+ # 3. Get Result
151
+ if status == "ready":
152
+ download_url = client.get_task_result(task_id)
153
+ print(f"Result: {download_url}")
154
+ ```
155
+
156
+ ### 📹 Video/Audio Tasks
157
+
158
+ Download content from YouTube and other supported platforms.
159
+
160
+ ```python
161
+ from thordata import CommonSettings
162
+
163
+ task_id = client.create_video_task(
164
+ file_name="video_{{VideoID}}",
165
+ spider_id="youtube_video_by-url",
166
+ spider_name="youtube.com",
167
+ parameters={"url": "https://youtube.com/watch?v=..."},
168
+ common_settings=CommonSettings(resolution="1080p")
169
+ )
170
+ ```
171
+
172
+ ### 📊 Account Management
173
+
174
+ Access usage statistics, manage sub-users, and whitelist IPs.
175
+
176
+ ```python
177
+ # Get Usage Stats
178
+ stats = client.get_usage_statistics("2024-01-01", "2024-01-31")
179
+ print(f"Balance: {stats.balance_gb():.2f} GB")
180
+
181
+ # List Proxy Users
182
+ users = client.list_proxy_users()
183
+ print(f"Active Sub-users: {users.user_count}")
184
+
185
+ # Whitelist IP
186
+ client.add_whitelist_ip("1.2.3.4")
187
+ ```
188
+
189
+ ## ⚙️ Advanced Usage
190
+
191
+ ### Async Client
192
+
193
+ For high-concurrency applications, use `AsyncThordataClient`.
194
+
195
+ ```python
196
+ import asyncio
197
+ from thordata import AsyncThordataClient
198
+
199
+ async def main():
200
+ async with AsyncThordataClient(scraper_token="...") as client:
201
+ # SERP
202
+ results = await client.serp_search("async python")
203
+
204
+ # Universal
205
+ html = await client.universal_scrape("https://example.com")
206
+
207
+ asyncio.run(main())
208
+ ```
209
+
210
+ Note: `AsyncThordataClient` does not support HTTPS proxy tunneling (TLS-in-TLS) due to `aiohttp` limitations. For proxy network requests, use the sync client.
211
+
212
+ ### Custom Retry Configuration
213
+
214
+ ```python
215
+ from thordata import RetryConfig
216
+
217
+ retry = RetryConfig(
218
+ max_retries=5,
219
+ backoff_factor=1.5,
220
+ retry_on_status_codes={429, 500, 502, 503, 504}
221
+ )
222
+
223
+ client = ThordataClient(..., retry_config=retry)
224
+ ```
225
+
226
+ ## 📄 License
227
+
228
+ MIT License
@@ -1,10 +1,11 @@
1
+ # thordata-python-sdk/pyproject.toml
1
2
  [build-system]
2
3
  requires = ["setuptools>=61.0", "wheel"]
3
4
  build-backend = "setuptools.build_meta"
4
5
 
5
6
  [project]
6
7
  name = "thordata-sdk"
7
- version = "1.0.0"
8
+ version = "1.1.0"
8
9
  description = "The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network."
9
10
  readme = "README.md"
10
11
  requires-python = ">=3.9"
@@ -90,11 +91,12 @@ select = [
90
91
  "F", # pyflakes
91
92
  "I", # isort (import sorting)
92
93
  "B", # flake8-bugbear
94
+ "UP", # pyupgrade
95
+ "SIM", # flake8-simplify
93
96
  ]
94
97
  ignore = [
95
98
  "E501", # line too long (handled by black)
96
99
  "E731", # do not assign a lambda expression
97
- "F401", # imported but unused (we have some intentional re-exports)
98
100
  ]
99
101
 
100
102
  [tool.ruff.lint.isort]
@@ -35,7 +35,7 @@ Async Usage:
35
35
  >>> asyncio.run(main())
36
36
  """
37
37
 
38
- __version__ = "1.0.0"
38
+ __version__ = "1.1.0"
39
39
  __author__ = "Thordata Developer Team"
40
40
  __email__ = "support@thordata.com"
41
41
 
@@ -2,8 +2,9 @@ from __future__ import annotations
2
2
 
3
3
  import json
4
4
  import os
5
+ from collections.abc import Iterable
5
6
  from pathlib import Path
6
- from typing import Any, Iterable, Optional
7
+ from typing import Any
7
8
 
8
9
  try:
9
10
  from dotenv import load_dotenv
@@ -23,7 +24,7 @@ def env(name: str) -> str:
23
24
  return (os.getenv(name) or "").strip()
24
25
 
25
26
 
26
- def skip_if_missing(required: Iterable[str], *, tip: Optional[str] = None) -> bool:
27
+ def skip_if_missing(required: Iterable[str], *, tip: str | None = None) -> bool:
27
28
  missing = [k for k in required if not env(k)]
28
29
  if not missing:
29
30
  return False
@@ -9,7 +9,8 @@ from __future__ import annotations
9
9
  import base64
10
10
  import json
11
11
  import logging
12
- from typing import Any, Dict
12
+ import platform
13
+ from typing import Any
13
14
 
14
15
  logger = logging.getLogger(__name__)
15
16
 
@@ -70,7 +71,7 @@ def decode_base64_image(png_str: str) -> bytes:
70
71
  raise ValueError(f"Failed to decode base64 image: {e}") from e
71
72
 
72
73
 
73
- def build_auth_headers(token: str, mode: str = "bearer") -> Dict[str, str]:
74
+ def build_auth_headers(token: str, mode: str = "bearer") -> dict[str, str]:
74
75
  """
75
76
  Build authorization headers for API requests.
76
77
 
@@ -104,7 +105,7 @@ def build_builder_headers(
104
105
  scraper_token: str,
105
106
  public_token: str,
106
107
  public_key: str,
107
- ) -> Dict[str, str]:
108
+ ) -> dict[str, str]:
108
109
  """
109
110
  Build headers for Web Scraper builder API.
110
111
 
@@ -129,7 +130,7 @@ def build_builder_headers(
129
130
  }
130
131
 
131
132
 
132
- def build_public_api_headers(public_token: str, public_key: str) -> Dict[str, str]:
133
+ def build_public_api_headers(public_token: str, public_key: str) -> dict[str, str]:
133
134
  """
134
135
  Build headers for public API requests (task status, locations, etc.)
135
136
 
@@ -171,17 +172,19 @@ def extract_error_message(payload: Any) -> str:
171
172
 
172
173
  def build_user_agent(sdk_version: str, http_client: str) -> str:
173
174
  """
174
- Build a default User-Agent for the SDK.
175
-
176
- Args:
177
- sdk_version: SDK version string.
178
- http_client: "requests" or "aiohttp" (or any identifier).
179
-
180
- Returns:
181
- A User-Agent string.
175
+ Build a standardized User-Agent for the SDK.
176
+ Format: thordata-python-sdk/{version} python/{py_ver} ({system}/{release}; {machine})
182
177
  """
183
- import platform
184
-
185
- py = platform.python_version()
186
- system = platform.system()
187
- return f"thordata-python-sdk/{sdk_version} (python {py}; {system}; {http_client})"
178
+ py_ver = platform.python_version()
179
+ system = platform.system() or "unknown"
180
+ release = platform.release() or "unknown"
181
+ machine = platform.machine() or "unknown"
182
+
183
+ # Clean up strings to avoid UA parsing issues (remove newlines, etc)
184
+ system = system.replace(";", "").strip()
185
+
186
+ return (
187
+ f"thordata-python-sdk/{sdk_version} "
188
+ f"python/{py_ver} "
189
+ f"({system}/{release}; {machine}; {http_client})"
190
+ )