thordata-sdk 1.5.0__tar.gz → 1.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/PKG-INFO +63 -7
  2. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/README.md +62 -6
  3. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/pyproject.toml +4 -3
  4. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/src/thordata/__init__.py +1 -1
  5. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/src/thordata/async_client.py +55 -13
  6. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/src/thordata/client.py +64 -13
  7. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/src/thordata/enums.py +2 -2
  8. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/src/thordata/exceptions.py +80 -20
  9. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/src/thordata/models.py +1 -1
  10. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/src/thordata/retry.py +1 -1
  11. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/src/thordata/tools/__init__.py +11 -1
  12. thordata_sdk-1.7.0/src/thordata/tools/code.py +39 -0
  13. thordata_sdk-1.7.0/src/thordata/tools/ecommerce.py +251 -0
  14. thordata_sdk-1.7.0/src/thordata/tools/professional.py +155 -0
  15. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/src/thordata/tools/search.py +47 -5
  16. thordata_sdk-1.7.0/src/thordata/tools/social.py +374 -0
  17. thordata_sdk-1.7.0/src/thordata/tools/travel.py +100 -0
  18. thordata_sdk-1.7.0/src/thordata/tools/video.py +154 -0
  19. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/src/thordata/types/serp.py +6 -2
  20. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/src/thordata/types/task.py +75 -9
  21. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/src/thordata/types/universal.py +37 -5
  22. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/src/thordata_sdk.egg-info/PKG-INFO +63 -7
  23. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/src/thordata_sdk.egg-info/SOURCES.txt +8 -3
  24. thordata_sdk-1.7.0/tests/test_async_client.py +424 -0
  25. thordata_sdk-1.7.0/tests/test_batch_creation.py +116 -0
  26. thordata_sdk-1.7.0/tests/test_client.py +606 -0
  27. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/tests/test_enums.py +1 -1
  28. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/tests/test_examples.py +4 -1
  29. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/tests/test_integration_proxy_protocols.py +2 -3
  30. thordata_sdk-1.7.0/tests/test_retry.py +317 -0
  31. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/tests/test_spec_parity.py +36 -2
  32. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/tests/test_tools.py +5 -3
  33. thordata_sdk-1.7.0/tests/test_tools_coverage.py +102 -0
  34. thordata_sdk-1.7.0/tests/test_unlimited.py +184 -0
  35. thordata_sdk-1.7.0/tests/test_utils.py +126 -0
  36. thordata_sdk-1.5.0/src/thordata/_example_utils.py +0 -77
  37. thordata_sdk-1.5.0/src/thordata/demo.py +0 -138
  38. thordata_sdk-1.5.0/src/thordata/tools/code.py +0 -26
  39. thordata_sdk-1.5.0/src/thordata/tools/ecommerce.py +0 -67
  40. thordata_sdk-1.5.0/src/thordata/tools/social.py +0 -190
  41. thordata_sdk-1.5.0/src/thordata/tools/video.py +0 -81
  42. thordata_sdk-1.5.0/tests/test_async_client.py +0 -111
  43. thordata_sdk-1.5.0/tests/test_client.py +0 -121
  44. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/LICENSE +0 -0
  45. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/setup.cfg +0 -0
  46. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/src/thordata/_utils.py +0 -0
  47. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/src/thordata/async_unlimited.py +0 -0
  48. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/src/thordata/core/__init__.py +0 -0
  49. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/src/thordata/core/async_http_client.py +0 -0
  50. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/src/thordata/core/http_client.py +0 -0
  51. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/src/thordata/core/tunnel.py +0 -0
  52. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/src/thordata/serp_engines.py +0 -0
  53. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/src/thordata/tools/base.py +0 -0
  54. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/src/thordata/types/__init__.py +0 -0
  55. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/src/thordata/types/common.py +0 -0
  56. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/src/thordata/types/proxy.py +0 -0
  57. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/src/thordata/unlimited.py +0 -0
  58. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/src/thordata_sdk.egg-info/dependency_links.txt +0 -0
  59. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/src/thordata_sdk.egg-info/requires.txt +0 -0
  60. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/src/thordata_sdk.egg-info/top_level.txt +0 -0
  61. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/tests/test_async_client_errors.py +0 -0
  62. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/tests/test_client_errors.py +0 -0
  63. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/tests/test_exceptions.py +0 -0
  64. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/tests/test_models.py +0 -0
  65. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/tests/test_task_status_and_wait.py +0 -0
  66. {thordata_sdk-1.5.0 → thordata_sdk-1.7.0}/tests/test_user_agent.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: thordata-sdk
3
- Version: 1.5.0
3
+ Version: 1.7.0
4
4
  Summary: The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network.
5
5
  Author-email: Thordata Developer Team <support@thordata.com>
6
6
  License: MIT
@@ -63,9 +63,9 @@ Dynamic: license-file
63
63
 
64
64
  ## 📖 Introduction
65
65
 
66
- The **Thordata Python SDK v1.5.0** is a production-ready wrapper for Thordata's AI data infrastructure. It is architected for high reliability, strict type safety, and maximum performance.
66
+ The **Thordata Python SDK v1.6.0** is a production-ready wrapper for Thordata's AI data infrastructure. It is architected for high reliability, strict type safety, and maximum performance.
67
67
 
68
- **Why v1.5.0?**
68
+ **Why v1.6.0?**
69
69
  * **🛡️ Bulletproof Networking**: Custom core handles `HTTP`, `HTTPS`, and `SOCKS5h` (Remote DNS) tunneling, solving common SSL/TLS handshake issues in complex network environments.
70
70
  * **⚡ Async First**: First-class `asyncio` support with `aiohttp` for high-concurrency scraping (1000+ RPS).
71
71
  * **🧩 100% API Coverage**: Every endpoint documented by Thordata (including Hourly Usage, Server Monitor, and Task Management) is implemented.
@@ -83,7 +83,7 @@ pip install thordata-sdk
83
83
 
84
84
  ## 🔐 Configuration
85
85
 
86
- Set environment variables to avoid hardcoding credentials.
86
+ Set environment variables to avoid hardcoding credentials. **Full reference:** copy [.env.example](.env.example) to `.env` and fill in values.
87
87
 
88
88
  ```bash
89
89
  # [Scraping APIs]
@@ -93,13 +93,19 @@ export THORDATA_SCRAPER_TOKEN="your_scraper_token"
93
93
  export THORDATA_PUBLIC_TOKEN="your_public_token"
94
94
  export THORDATA_PUBLIC_KEY="your_public_key"
95
95
 
96
- # [Proxy Network]
96
+ # [Proxy: Residential / Unlimited / Datacenter / Mobile / ISP]
97
97
  export THORDATA_RESIDENTIAL_USERNAME="your_username"
98
98
  export THORDATA_RESIDENTIAL_PASSWORD="your_password"
99
- # Optional: Set upstream proxy for local dev (e.g., Clash)
100
- # export THORDATA_UPSTREAM_PROXY="http://127.0.0.1:7890"
99
+ # Optional: Unlimited (high-bandwidth) if your plan has separate credentials
100
+ # export THORDATA_UNLIMITED_USERNAME="..."
101
+ # export THORDATA_UNLIMITED_PASSWORD="..."
102
+
103
+ # Optional: Upstream proxy when behind firewall (e.g. Clash Verge port 7897)
104
+ # export THORDATA_UPSTREAM_PROXY="http://127.0.0.1:7897"
101
105
  ```
102
106
 
107
+ Default proxy port is **9999** (residential); other products use different ports (see `.env.example`).
108
+
103
109
  ---
104
110
 
105
111
  ## 🚀 Quick Start
@@ -199,6 +205,48 @@ if status == "finished":
199
205
  print(f"Download: {data_url}")
200
206
  ```
201
207
 
208
+ ### Web Scraper Tools (120+ Pre-built Tools)
209
+
210
+ Use pre-built tools for popular platforms. See [Tool Coverage Matrix](docs/TOOL_COVERAGE_MATRIX.md) for full list.
211
+
212
+ ```python
213
+ from thordata import ThordataClient
214
+ from thordata.tools import Amazon, GoogleMaps, YouTube, TikTok, eBay, Walmart
215
+
216
+ client = ThordataClient()
217
+
218
+ # Amazon Product by ASIN
219
+ task_id = client.run_tool(Amazon.ProductByAsin(asin="B0BZYCJK89"))
220
+
221
+ # Google Maps by Place ID
222
+ task_id = client.run_tool(GoogleMaps.DetailsByPlaceId(place_id="ChIJPTacEpBQwokRKwIlDXelxkA"))
223
+
224
+ # YouTube Video Download
225
+ from thordata import CommonSettings
226
+ settings = CommonSettings(resolution="<=360p", video_codec="vp9")
227
+ task_id = client.run_tool(YouTube.VideoDownload(
228
+ url="https://www.youtube.com/watch?v=jNQXAC9IVRw",
229
+ common_settings=settings
230
+ ))
231
+
232
+ # Wait and get results
233
+ status = client.wait_for_task(task_id, max_wait=300)
234
+ if status == "ready":
235
+ download_url = client.get_task_result(task_id)
236
+ print(f"Results: {download_url}")
237
+ ```
238
+
239
+ **Available Platforms:**
240
+ - **E-Commerce**: Amazon, eBay, Walmart
241
+ - **Social Media**: TikTok, Instagram, Facebook, Twitter/X, Reddit, LinkedIn
242
+ - **Search**: Google Maps, Google Shopping, Google Play
243
+ - **Video**: YouTube (download, info, subtitles)
244
+ - **Code**: GitHub
245
+ - **Professional**: Indeed, Glassdoor, Crunchbase
246
+ - **Travel/Real Estate**: Booking, Airbnb, Zillow
247
+
248
+ See `examples/tools/` for more examples.
249
+
202
250
  ---
203
251
 
204
252
  ## 🛠️ Management APIs
@@ -226,6 +274,14 @@ monitor = client.unlimited.get_server_monitor(
226
274
 
227
275
  ---
228
276
 
277
+ ## 🧪 Development & Testing
278
+
279
+ - **Full env reference**: Copy [.env.example](.env.example) to `.env` and fill in credentials.
280
+ - **Unit tests** (no network): `pytest` or `python -m coverage run -m pytest -p no:cov tests && python -m coverage report -m`
281
+ - **Integration tests** (live API/proxy): Set `THORDATA_INTEGRATION=true` in `.env`; optional `THORDATA_UPSTREAM_PROXY` (e.g. Clash) if behind a firewall. See [CONTRIBUTING.md](CONTRIBUTING.md#-testing-guidelines).
282
+
283
+ ---
284
+
229
285
  ## 📄 License
230
286
 
231
287
  MIT License. See [LICENSE](LICENSE) for details.
@@ -19,9 +19,9 @@
19
19
 
20
20
  ## 📖 Introduction
21
21
 
22
- The **Thordata Python SDK v1.5.0** is a production-ready wrapper for Thordata's AI data infrastructure. It is architected for high reliability, strict type safety, and maximum performance.
22
+ The **Thordata Python SDK v1.6.0** is a production-ready wrapper for Thordata's AI data infrastructure. It is architected for high reliability, strict type safety, and maximum performance.
23
23
 
24
- **Why v1.5.0?**
24
+ **Why v1.6.0?**
25
25
  * **🛡️ Bulletproof Networking**: Custom core handles `HTTP`, `HTTPS`, and `SOCKS5h` (Remote DNS) tunneling, solving common SSL/TLS handshake issues in complex network environments.
26
26
  * **⚡ Async First**: First-class `asyncio` support with `aiohttp` for high-concurrency scraping (1000+ RPS).
27
27
  * **🧩 100% API Coverage**: Every endpoint documented by Thordata (including Hourly Usage, Server Monitor, and Task Management) is implemented.
@@ -39,7 +39,7 @@ pip install thordata-sdk
39
39
 
40
40
  ## 🔐 Configuration
41
41
 
42
- Set environment variables to avoid hardcoding credentials.
42
+ Set environment variables to avoid hardcoding credentials. **Full reference:** copy [.env.example](.env.example) to `.env` and fill in values.
43
43
 
44
44
  ```bash
45
45
  # [Scraping APIs]
@@ -49,13 +49,19 @@ export THORDATA_SCRAPER_TOKEN="your_scraper_token"
49
49
  export THORDATA_PUBLIC_TOKEN="your_public_token"
50
50
  export THORDATA_PUBLIC_KEY="your_public_key"
51
51
 
52
- # [Proxy Network]
52
+ # [Proxy: Residential / Unlimited / Datacenter / Mobile / ISP]
53
53
  export THORDATA_RESIDENTIAL_USERNAME="your_username"
54
54
  export THORDATA_RESIDENTIAL_PASSWORD="your_password"
55
- # Optional: Set upstream proxy for local dev (e.g., Clash)
56
- # export THORDATA_UPSTREAM_PROXY="http://127.0.0.1:7890"
55
+ # Optional: Unlimited (high-bandwidth) if your plan has separate credentials
56
+ # export THORDATA_UNLIMITED_USERNAME="..."
57
+ # export THORDATA_UNLIMITED_PASSWORD="..."
58
+
59
+ # Optional: Upstream proxy when behind firewall (e.g. Clash Verge port 7897)
60
+ # export THORDATA_UPSTREAM_PROXY="http://127.0.0.1:7897"
57
61
  ```
58
62
 
63
+ Default proxy port is **9999** (residential); other products use different ports (see `.env.example`).
64
+
59
65
  ---
60
66
 
61
67
  ## 🚀 Quick Start
@@ -155,6 +161,48 @@ if status == "finished":
155
161
  print(f"Download: {data_url}")
156
162
  ```
157
163
 
164
+ ### Web Scraper Tools (120+ Pre-built Tools)
165
+
166
+ Use pre-built tools for popular platforms. See [Tool Coverage Matrix](docs/TOOL_COVERAGE_MATRIX.md) for full list.
167
+
168
+ ```python
169
+ from thordata import ThordataClient
170
+ from thordata.tools import Amazon, GoogleMaps, YouTube, TikTok, eBay, Walmart
171
+
172
+ client = ThordataClient()
173
+
174
+ # Amazon Product by ASIN
175
+ task_id = client.run_tool(Amazon.ProductByAsin(asin="B0BZYCJK89"))
176
+
177
+ # Google Maps by Place ID
178
+ task_id = client.run_tool(GoogleMaps.DetailsByPlaceId(place_id="ChIJPTacEpBQwokRKwIlDXelxkA"))
179
+
180
+ # YouTube Video Download
181
+ from thordata import CommonSettings
182
+ settings = CommonSettings(resolution="<=360p", video_codec="vp9")
183
+ task_id = client.run_tool(YouTube.VideoDownload(
184
+ url="https://www.youtube.com/watch?v=jNQXAC9IVRw",
185
+ common_settings=settings
186
+ ))
187
+
188
+ # Wait and get results
189
+ status = client.wait_for_task(task_id, max_wait=300)
190
+ if status == "ready":
191
+ download_url = client.get_task_result(task_id)
192
+ print(f"Results: {download_url}")
193
+ ```
194
+
195
+ **Available Platforms:**
196
+ - **E-Commerce**: Amazon, eBay, Walmart
197
+ - **Social Media**: TikTok, Instagram, Facebook, Twitter/X, Reddit, LinkedIn
198
+ - **Search**: Google Maps, Google Shopping, Google Play
199
+ - **Video**: YouTube (download, info, subtitles)
200
+ - **Code**: GitHub
201
+ - **Professional**: Indeed, Glassdoor, Crunchbase
202
+ - **Travel/Real Estate**: Booking, Airbnb, Zillow
203
+
204
+ See `examples/tools/` for more examples.
205
+
158
206
  ---
159
207
 
160
208
  ## 🛠️ Management APIs
@@ -182,6 +230,14 @@ monitor = client.unlimited.get_server_monitor(
182
230
 
183
231
  ---
184
232
 
233
+ ## 🧪 Development & Testing
234
+
235
+ - **Full env reference**: Copy [.env.example](.env.example) to `.env` and fill in credentials.
236
+ - **Unit tests** (no network): `pytest` or `python -m coverage run -m pytest -p no:cov tests && python -m coverage report -m`
237
+ - **Integration tests** (live API/proxy): Set `THORDATA_INTEGRATION=true` in `.env`; optional `THORDATA_UPSTREAM_PROXY` (e.g. Clash) if behind a firewall. See [CONTRIBUTING.md](CONTRIBUTING.md#-testing-guidelines).
238
+
239
+ ---
240
+
185
241
  ## 📄 License
186
242
 
187
243
  MIT License. See [LICENSE](LICENSE) for details.
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
5
5
 
6
6
  [project]
7
7
  name = "thordata-sdk"
8
- version = "1.5.0"
8
+ version = "1.7.0"
9
9
  description = "The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network."
10
10
  readme = "README.md"
11
11
  requires-python = ">=3.9"
@@ -115,16 +115,17 @@ check_untyped_defs = false
115
115
  strict_optional = false
116
116
  show_error_codes = true
117
117
  ignore_missing_imports = true
118
+ follow_imports = "skip"
118
119
 
119
120
  [[tool.mypy.overrides]]
120
121
  module = ["aiohttp.*", "requests.*"]
121
122
  ignore_missing_imports = true
122
123
 
123
- # Pytest setup
124
+ # Pytest setup (coverage is run separately via coverage CLI for reliability)
124
125
  [tool.pytest.ini_options]
125
126
  testpaths = ["tests"]
126
127
  asyncio_mode = "auto"
127
- addopts = "-v --cov=thordata --cov-report=term-missing"
128
+ addopts = "-v"
128
129
  markers = ["integration: live tests that require real credentials"]
129
130
 
130
131
  # Coverage setup
@@ -5,7 +5,7 @@ Official Python client for Thordata's Proxy Network, SERP API,
5
5
  Universal Scraping API (Web Unlocker), and Web Scraper API.
6
6
  """
7
7
 
8
- __version__ = "1.5.0"
8
+ __version__ = "1.6.0"
9
9
  __author__ = "Thordata Developer Team/Kael Odin"
10
10
  __email__ = "support@thordata.com"
11
11
 
@@ -124,10 +124,10 @@ class AsyncThordataClient:
124
124
  ).rstrip("/")
125
125
 
126
126
  self._gateway_base_url = os.getenv(
127
- "THORDATA_GATEWAY_BASE_URL", "https://api.thordata.com/api/gateway"
127
+ "THORDATA_GATEWAY_BASE_URL", "https://openapi.thordata.com/api/gateway"
128
128
  )
129
129
  self._child_base_url = os.getenv(
130
- "THORDATA_CHILD_BASE_URL", "https://api.thordata.com/api/child"
130
+ "THORDATA_CHILD_BASE_URL", "https://openapi.thordata.com/api/child"
131
131
  )
132
132
 
133
133
  # URL Construction
@@ -145,7 +145,7 @@ class AsyncThordataClient:
145
145
  self._proxy_users_url = f"{shared_api_base}/proxy-users"
146
146
 
147
147
  whitelist_base = os.getenv(
148
- "THORDATA_WHITELIST_BASE_URL", "https://api.thordata.com/api"
148
+ "THORDATA_WHITELIST_BASE_URL", "https://openapi.thordata.com/api"
149
149
  )
150
150
  self._whitelist_url = f"{whitelist_base}/whitelisted-ips"
151
151
 
@@ -293,28 +293,36 @@ class AsyncThordataClient:
293
293
  url: str,
294
294
  *,
295
295
  js_render: bool = False,
296
- output_format: str = "html",
296
+ output_format: str | list[str] = "html",
297
297
  country: str | None = None,
298
298
  block_resources: str | None = None,
299
+ clean_content: str | None = None,
299
300
  wait: int | None = None,
300
301
  wait_for: str | None = None,
302
+ follow_redirect: bool | None = None,
303
+ headers: list[dict[str, str]] | None = None,
304
+ cookies: list[dict[str, str]] | None = None,
301
305
  **kwargs: Any,
302
- ) -> str | bytes:
306
+ ) -> str | bytes | dict[str, str | bytes]:
303
307
  request = UniversalScrapeRequest(
304
308
  url=url,
305
309
  js_render=js_render,
306
310
  output_format=output_format,
307
311
  country=country,
308
312
  block_resources=block_resources,
313
+ clean_content=clean_content,
309
314
  wait=wait,
310
315
  wait_for=wait_for,
316
+ follow_redirect=follow_redirect,
317
+ headers=headers,
318
+ cookies=cookies,
311
319
  extra_params=kwargs,
312
320
  )
313
321
  return await self.universal_scrape_advanced(request)
314
322
 
315
323
  async def universal_scrape_advanced(
316
324
  self, request: UniversalScrapeRequest
317
- ) -> str | bytes:
325
+ ) -> str | bytes | dict[str, str | bytes]:
318
326
  if not self.scraper_token:
319
327
  raise ThordataConfigError("scraper_token required")
320
328
  payload = request.to_payload()
@@ -327,9 +335,17 @@ class AsyncThordataClient:
327
335
  try:
328
336
  resp_json = await response.json()
329
337
  except ValueError:
330
- if request.output_format.lower() == "png":
331
- return await response.read()
332
- return await response.text()
338
+ # If not JSON, return raw content based on format
339
+ if isinstance(request.output_format, list) or (
340
+ isinstance(request.output_format, str) and "," in request.output_format
341
+ ):
342
+ return {"raw": await response.read()}
343
+ fmt = (
344
+ request.output_format.lower()
345
+ if isinstance(request.output_format, str)
346
+ else str(request.output_format).lower()
347
+ )
348
+ return await response.read() if fmt == "png" else await response.text()
333
349
 
334
350
  if isinstance(resp_json, dict):
335
351
  code = resp_json.get("code")
@@ -337,6 +353,27 @@ class AsyncThordataClient:
337
353
  msg = extract_error_message(resp_json)
338
354
  raise_for_code(f"Universal Error: {msg}", code=code, payload=resp_json)
339
355
 
356
+ # Handle multiple output formats
357
+ if isinstance(request.output_format, list) or (
358
+ isinstance(request.output_format, str) and "," in request.output_format
359
+ ):
360
+ result: dict[str, str | bytes] = {}
361
+ formats = (
362
+ request.output_format
363
+ if isinstance(request.output_format, list)
364
+ else [f.strip() for f in request.output_format.split(",")]
365
+ )
366
+
367
+ for fmt in formats:
368
+ fmt_lower = fmt.lower()
369
+ if fmt_lower == "html" and "html" in resp_json:
370
+ result["html"] = resp_json["html"]
371
+ elif fmt_lower == "png" and "png" in resp_json:
372
+ result["png"] = decode_base64_image(resp_json["png"])
373
+
374
+ if result:
375
+ return result
376
+
340
377
  if "html" in resp_json:
341
378
  return resp_json["html"]
342
379
  if "png" in resp_json:
@@ -352,7 +389,7 @@ class AsyncThordataClient:
352
389
  file_name: str,
353
390
  spider_id: str,
354
391
  spider_name: str,
355
- parameters: dict[str, Any],
392
+ parameters: dict[str, Any] | list[dict[str, Any]],
356
393
  universal_params: dict[str, Any] | None = None,
357
394
  ) -> str:
358
395
  config = ScraperTaskConfig(
@@ -434,7 +471,7 @@ class AsyncThordataClient:
434
471
  file_name: str,
435
472
  spider_id: str,
436
473
  spider_name: str,
437
- parameters: dict[str, Any],
474
+ parameters: dict[str, Any] | list[dict[str, Any]],
438
475
  common_settings: CommonSettings,
439
476
  ) -> str:
440
477
  config = VideoTaskConfig(
@@ -550,7 +587,7 @@ class AsyncThordataClient:
550
587
  file_name: str,
551
588
  spider_id: str,
552
589
  spider_name: str,
553
- parameters: dict[str, Any],
590
+ parameters: dict[str, Any] | list[dict[str, Any]],
554
591
  universal_params: dict[str, Any] | None = None,
555
592
  *,
556
593
  max_wait: float = 600.0,
@@ -971,7 +1008,12 @@ class AsyncThordataClient:
971
1008
  if port:
972
1009
  params["port"] = str(port)
973
1010
 
974
- username = os.getenv("THORDATA_RESIDENTIAL_USERNAME")
1011
+ if product == "unlimited":
1012
+ username = os.getenv("THORDATA_UNLIMITED_USERNAME") or os.getenv(
1013
+ "THORDATA_RESIDENTIAL_USERNAME"
1014
+ )
1015
+ else:
1016
+ username = os.getenv("THORDATA_RESIDENTIAL_USERNAME")
975
1017
  if username:
976
1018
  params["td-customer"] = username
977
1019
 
@@ -53,6 +53,7 @@ from .serp_engines import SerpNamespace
53
53
  # Import Types (Modernized)
54
54
  from .types import (
55
55
  CommonSettings,
56
+ DataFormat,
56
57
  ProxyConfig,
57
58
  ProxyProduct,
58
59
  ProxyServer,
@@ -159,10 +160,10 @@ class ThordataClient:
159
160
  ).rstrip("/")
160
161
 
161
162
  self._gateway_base_url = os.getenv(
162
- "THORDATA_GATEWAY_BASE_URL", "https://api.thordata.com/api/gateway"
163
+ "THORDATA_GATEWAY_BASE_URL", "https://openapi.thordata.com/api/gateway"
163
164
  )
164
165
  self._child_base_url = os.getenv(
165
- "THORDATA_CHILD_BASE_URL", "https://api.thordata.com/api/child"
166
+ "THORDATA_CHILD_BASE_URL", "https://openapi.thordata.com/api/child"
166
167
  )
167
168
 
168
169
  # URL Construction
@@ -183,7 +184,7 @@ class ThordataClient:
183
184
  self._proxy_users_url = f"{shared_api_base}/proxy-users"
184
185
 
185
186
  whitelist_base = os.getenv(
186
- "THORDATA_WHITELIST_BASE_URL", "https://api.thordata.com/api"
187
+ "THORDATA_WHITELIST_BASE_URL", "https://openapi.thordata.com/api"
187
188
  )
188
189
  self._whitelist_url = f"{whitelist_base}/whitelisted-ips"
189
190
 
@@ -364,26 +365,36 @@ class ThordataClient:
364
365
  url: str,
365
366
  *,
366
367
  js_render: bool = False,
367
- output_format: str = "html",
368
+ output_format: str | list[str] = "html",
368
369
  country: str | None = None,
369
370
  block_resources: str | None = None,
371
+ clean_content: str | None = None,
370
372
  wait: int | None = None,
371
373
  wait_for: str | None = None,
374
+ follow_redirect: bool | None = None,
375
+ headers: list[dict[str, str]] | None = None,
376
+ cookies: list[dict[str, str]] | None = None,
372
377
  **kwargs: Any,
373
- ) -> str | bytes:
378
+ ) -> str | bytes | dict[str, str | bytes]:
374
379
  request = UniversalScrapeRequest(
375
380
  url=url,
376
381
  js_render=js_render,
377
382
  output_format=output_format,
378
383
  country=country,
379
384
  block_resources=block_resources,
385
+ clean_content=clean_content,
380
386
  wait=wait,
381
387
  wait_for=wait_for,
388
+ follow_redirect=follow_redirect,
389
+ headers=headers,
390
+ cookies=cookies,
382
391
  extra_params=kwargs,
383
392
  )
384
393
  return self.universal_scrape_advanced(request)
385
394
 
386
- def universal_scrape_advanced(self, request: UniversalScrapeRequest) -> str | bytes:
395
+ def universal_scrape_advanced(
396
+ self, request: UniversalScrapeRequest
397
+ ) -> str | bytes | dict[str, str | bytes]:
387
398
  if not self.scraper_token:
388
399
  raise ThordataConfigError("scraper_token required")
389
400
 
@@ -405,7 +416,7 @@ class ThordataClient:
405
416
  file_name: str,
406
417
  spider_id: str,
407
418
  spider_name: str,
408
- parameters: dict[str, Any],
419
+ parameters: dict[str, Any] | list[dict[str, Any]],
409
420
  universal_params: dict[str, Any] | None = None,
410
421
  ) -> str:
411
422
  config = ScraperTaskConfig(
@@ -490,7 +501,7 @@ class ThordataClient:
490
501
  file_name: str,
491
502
  spider_id: str,
492
503
  spider_name: str,
493
- parameters: dict[str, Any],
504
+ parameters: dict[str, Any] | list[dict[str, Any]],
494
505
  common_settings: CommonSettings,
495
506
  ) -> str:
496
507
  config = VideoTaskConfig(
@@ -639,7 +650,7 @@ class ThordataClient:
639
650
  file_name: str,
640
651
  spider_id: str,
641
652
  spider_name: str,
642
- parameters: dict[str, Any],
653
+ parameters: dict[str, Any] | list[dict[str, Any]],
643
654
  universal_params: dict[str, Any] | None = None,
644
655
  *,
645
656
  max_wait: float = 600.0,
@@ -648,6 +659,7 @@ class ThordataClient:
648
659
  include_errors: bool = True,
649
660
  task_type: str = "web",
650
661
  common_settings: CommonSettings | None = None,
662
+ data_format: DataFormat | str | None = None,
651
663
  ) -> str:
652
664
  import time
653
665
 
@@ -671,6 +683,7 @@ class ThordataClient:
671
683
  parameters=parameters,
672
684
  universal_params=universal_params,
673
685
  include_errors=include_errors,
686
+ data_format=data_format,
674
687
  )
675
688
  task_id = self.create_scraper_task_advanced(config)
676
689
 
@@ -862,7 +875,12 @@ class ThordataClient:
862
875
  if port:
863
876
  params["port"] = str(port)
864
877
 
865
- username = os.getenv("THORDATA_RESIDENTIAL_USERNAME")
878
+ if product == "unlimited":
879
+ username = os.getenv("THORDATA_UNLIMITED_USERNAME") or os.getenv(
880
+ "THORDATA_RESIDENTIAL_USERNAME"
881
+ )
882
+ else:
883
+ username = os.getenv("THORDATA_RESIDENTIAL_USERNAME")
866
884
  if username:
867
885
  params["td-customer"] = username
868
886
 
@@ -1207,12 +1225,22 @@ class ThordataClient:
1207
1225
  # =========================================================================
1208
1226
 
1209
1227
  def _process_universal_response(
1210
- self, response: requests.Response, output_format: str
1211
- ) -> str | bytes:
1228
+ self, response: requests.Response, output_format: str | list[str]
1229
+ ) -> str | bytes | dict[str, str | bytes]:
1230
+ """Process universal scrape response. Returns single value or dict if multiple formats requested."""
1212
1231
  try:
1213
1232
  resp_json = response.json()
1214
1233
  except ValueError:
1215
- return response.content if output_format.lower() == "png" else response.text
1234
+ # If not JSON, return raw content based on format
1235
+ if isinstance(output_format, list):
1236
+ # Multiple formats requested but got non-JSON response
1237
+ return {"raw": response.content}
1238
+ fmt = (
1239
+ output_format.lower()
1240
+ if isinstance(output_format, str)
1241
+ else str(output_format).lower()
1242
+ )
1243
+ return response.content if fmt == "png" else response.text
1216
1244
 
1217
1245
  if isinstance(resp_json, dict):
1218
1246
  code = resp_json.get("code")
@@ -1220,6 +1248,29 @@ class ThordataClient:
1220
1248
  msg = extract_error_message(resp_json)
1221
1249
  raise_for_code(f"Universal Error: {msg}", code=code, payload=resp_json)
1222
1250
 
1251
+ # Handle multiple output formats
1252
+ if isinstance(output_format, list) or (
1253
+ isinstance(output_format, str) and "," in output_format
1254
+ ):
1255
+ result: dict[str, str | bytes] = {}
1256
+ formats = (
1257
+ output_format
1258
+ if isinstance(output_format, list)
1259
+ else [f.strip() for f in output_format.split(",")]
1260
+ )
1261
+
1262
+ for fmt in formats:
1263
+ fmt_lower = fmt.lower()
1264
+ if fmt_lower == "html" and "html" in resp_json:
1265
+ result["html"] = resp_json["html"]
1266
+ elif fmt_lower == "png" and "png" in resp_json:
1267
+ result["png"] = decode_base64_image(resp_json["png"])
1268
+
1269
+ # If we got results, return dict; otherwise return single value for backward compatibility
1270
+ if result:
1271
+ return result
1272
+
1273
+ # Single format (backward compatibility)
1223
1274
  if "html" in resp_json:
1224
1275
  return resp_json["html"]
1225
1276
  if "png" in resp_json:
@@ -1,6 +1,6 @@
1
1
  """
2
2
  Enumerations for the Thordata Python SDK.
3
- Moved to thordata.types in v1.5.0.
3
+ Moved to thordata.types in v1.6.0.
4
4
  This file is kept for backward compatibility.
5
5
  """
6
6
 
@@ -21,7 +21,7 @@ from .types import (
21
21
  SessionType,
22
22
  TaskStatus,
23
23
  TimeRange,
24
- normalize_enum_value, # 新增
24
+ normalize_enum_value,
25
25
  )
26
26
 
27
27
  __all__ = [