thordata-sdk 1.5.0__tar.gz → 1.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/PKG-INFO +63 -7
  2. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/README.md +62 -6
  3. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/pyproject.toml +3 -3
  4. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/src/thordata/__init__.py +1 -1
  5. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/src/thordata/async_client.py +12 -7
  6. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/src/thordata/client.py +12 -7
  7. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/src/thordata/enums.py +2 -2
  8. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/src/thordata/exceptions.py +70 -19
  9. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/src/thordata/models.py +1 -1
  10. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/src/thordata/retry.py +1 -1
  11. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/src/thordata/tools/__init__.py +11 -1
  12. thordata_sdk-1.6.0/src/thordata/tools/code.py +39 -0
  13. thordata_sdk-1.6.0/src/thordata/tools/ecommerce.py +251 -0
  14. thordata_sdk-1.6.0/src/thordata/tools/professional.py +155 -0
  15. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/src/thordata/tools/search.py +47 -5
  16. thordata_sdk-1.6.0/src/thordata/tools/social.py +374 -0
  17. thordata_sdk-1.6.0/src/thordata/tools/travel.py +100 -0
  18. thordata_sdk-1.6.0/src/thordata/tools/video.py +154 -0
  19. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/src/thordata/types/task.py +16 -4
  20. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/src/thordata_sdk.egg-info/PKG-INFO +63 -7
  21. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/src/thordata_sdk.egg-info/SOURCES.txt +8 -3
  22. thordata_sdk-1.6.0/tests/test_async_client.py +424 -0
  23. thordata_sdk-1.6.0/tests/test_batch_creation.py +116 -0
  24. thordata_sdk-1.6.0/tests/test_client.py +606 -0
  25. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/tests/test_enums.py +1 -1
  26. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/tests/test_examples.py +4 -1
  27. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/tests/test_integration_proxy_protocols.py +2 -3
  28. thordata_sdk-1.6.0/tests/test_retry.py +317 -0
  29. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/tests/test_spec_parity.py +36 -2
  30. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/tests/test_tools.py +5 -3
  31. thordata_sdk-1.6.0/tests/test_tools_coverage.py +102 -0
  32. thordata_sdk-1.6.0/tests/test_unlimited.py +184 -0
  33. thordata_sdk-1.6.0/tests/test_utils.py +126 -0
  34. thordata_sdk-1.5.0/src/thordata/_example_utils.py +0 -77
  35. thordata_sdk-1.5.0/src/thordata/demo.py +0 -138
  36. thordata_sdk-1.5.0/src/thordata/tools/code.py +0 -26
  37. thordata_sdk-1.5.0/src/thordata/tools/ecommerce.py +0 -67
  38. thordata_sdk-1.5.0/src/thordata/tools/social.py +0 -190
  39. thordata_sdk-1.5.0/src/thordata/tools/video.py +0 -81
  40. thordata_sdk-1.5.0/tests/test_async_client.py +0 -111
  41. thordata_sdk-1.5.0/tests/test_client.py +0 -121
  42. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/LICENSE +0 -0
  43. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/setup.cfg +0 -0
  44. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/src/thordata/_utils.py +0 -0
  45. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/src/thordata/async_unlimited.py +0 -0
  46. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/src/thordata/core/__init__.py +0 -0
  47. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/src/thordata/core/async_http_client.py +0 -0
  48. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/src/thordata/core/http_client.py +0 -0
  49. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/src/thordata/core/tunnel.py +0 -0
  50. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/src/thordata/serp_engines.py +0 -0
  51. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/src/thordata/tools/base.py +0 -0
  52. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/src/thordata/types/__init__.py +0 -0
  53. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/src/thordata/types/common.py +0 -0
  54. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/src/thordata/types/proxy.py +0 -0
  55. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/src/thordata/types/serp.py +0 -0
  56. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/src/thordata/types/universal.py +0 -0
  57. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/src/thordata/unlimited.py +0 -0
  58. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/src/thordata_sdk.egg-info/dependency_links.txt +0 -0
  59. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/src/thordata_sdk.egg-info/requires.txt +0 -0
  60. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/src/thordata_sdk.egg-info/top_level.txt +0 -0
  61. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/tests/test_async_client_errors.py +0 -0
  62. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/tests/test_client_errors.py +0 -0
  63. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/tests/test_exceptions.py +0 -0
  64. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/tests/test_models.py +0 -0
  65. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/tests/test_task_status_and_wait.py +0 -0
  66. {thordata_sdk-1.5.0 → thordata_sdk-1.6.0}/tests/test_user_agent.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: thordata-sdk
3
- Version: 1.5.0
3
+ Version: 1.6.0
4
4
  Summary: The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network.
5
5
  Author-email: Thordata Developer Team <support@thordata.com>
6
6
  License: MIT
@@ -63,9 +63,9 @@ Dynamic: license-file
63
63
 
64
64
  ## 📖 Introduction
65
65
 
66
- The **Thordata Python SDK v1.5.0** is a production-ready wrapper for Thordata's AI data infrastructure. It is architected for high reliability, strict type safety, and maximum performance.
66
+ The **Thordata Python SDK v1.6.0** is a production-ready wrapper for Thordata's AI data infrastructure. It is architected for high reliability, strict type safety, and maximum performance.
67
67
 
68
- **Why v1.5.0?**
68
+ **Why v1.6.0?**
69
69
  * **🛡️ Bulletproof Networking**: Custom core handles `HTTP`, `HTTPS`, and `SOCKS5h` (Remote DNS) tunneling, solving common SSL/TLS handshake issues in complex network environments.
70
70
  * **⚡ Async First**: First-class `asyncio` support with `aiohttp` for high-concurrency scraping (1000+ RPS).
71
71
  * **🧩 100% API Coverage**: Every endpoint documented by Thordata (including Hourly Usage, Server Monitor, and Task Management) is implemented.
@@ -83,7 +83,7 @@ pip install thordata-sdk
83
83
 
84
84
  ## 🔐 Configuration
85
85
 
86
- Set environment variables to avoid hardcoding credentials.
86
+ Set environment variables to avoid hardcoding credentials. **Full reference:** copy [.env.example](.env.example) to `.env` and fill in values.
87
87
 
88
88
  ```bash
89
89
  # [Scraping APIs]
@@ -93,13 +93,19 @@ export THORDATA_SCRAPER_TOKEN="your_scraper_token"
93
93
  export THORDATA_PUBLIC_TOKEN="your_public_token"
94
94
  export THORDATA_PUBLIC_KEY="your_public_key"
95
95
 
96
- # [Proxy Network]
96
+ # [Proxy: Residential / Unlimited / Datacenter / Mobile / ISP]
97
97
  export THORDATA_RESIDENTIAL_USERNAME="your_username"
98
98
  export THORDATA_RESIDENTIAL_PASSWORD="your_password"
99
- # Optional: Set upstream proxy for local dev (e.g., Clash)
100
- # export THORDATA_UPSTREAM_PROXY="http://127.0.0.1:7890"
99
+ # Optional: Unlimited (high-bandwidth) if your plan has separate credentials
100
+ # export THORDATA_UNLIMITED_USERNAME="..."
101
+ # export THORDATA_UNLIMITED_PASSWORD="..."
102
+
103
+ # Optional: Upstream proxy when behind firewall (e.g. Clash Verge port 7897)
104
+ # export THORDATA_UPSTREAM_PROXY="http://127.0.0.1:7897"
101
105
  ```
102
106
 
107
+ Default proxy port is **9999** (residential); other products use different ports (see `.env.example`).
108
+
103
109
  ---
104
110
 
105
111
  ## 🚀 Quick Start
@@ -199,6 +205,48 @@ if status == "finished":
199
205
  print(f"Download: {data_url}")
200
206
  ```
201
207
 
208
+ ### Web Scraper Tools (120+ Pre-built Tools)
209
+
210
+ Use pre-built tools for popular platforms. See [Tool Coverage Matrix](docs/TOOL_COVERAGE_MATRIX.md) for full list.
211
+
212
+ ```python
213
+ from thordata import ThordataClient
214
+ from thordata.tools import Amazon, GoogleMaps, YouTube, TikTok, eBay, Walmart
215
+
216
+ client = ThordataClient()
217
+
218
+ # Amazon Product by ASIN
219
+ task_id = client.run_tool(Amazon.ProductByAsin(asin="B0BZYCJK89"))
220
+
221
+ # Google Maps by Place ID
222
+ task_id = client.run_tool(GoogleMaps.DetailsByPlaceId(place_id="ChIJPTacEpBQwokRKwIlDXelxkA"))
223
+
224
+ # YouTube Video Download
225
+ from thordata import CommonSettings
226
+ settings = CommonSettings(resolution="<=360p", video_codec="vp9")
227
+ task_id = client.run_tool(YouTube.VideoDownload(
228
+ url="https://www.youtube.com/watch?v=jNQXAC9IVRw",
229
+ common_settings=settings
230
+ ))
231
+
232
+ # Wait and get results
233
+ status = client.wait_for_task(task_id, max_wait=300)
234
+ if status == "ready":
235
+ download_url = client.get_task_result(task_id)
236
+ print(f"Results: {download_url}")
237
+ ```
238
+
239
+ **Available Platforms:**
240
+ - **E-Commerce**: Amazon, eBay, Walmart
241
+ - **Social Media**: TikTok, Instagram, Facebook, Twitter/X, Reddit, LinkedIn
242
+ - **Search**: Google Maps, Google Shopping, Google Play
243
+ - **Video**: YouTube (download, info, subtitles)
244
+ - **Code**: GitHub
245
+ - **Professional**: Indeed, Glassdoor, Crunchbase
246
+ - **Travel/Real Estate**: Booking, Airbnb, Zillow
247
+
248
+ See `examples/tools/` for more examples.
249
+
202
250
  ---
203
251
 
204
252
  ## 🛠️ Management APIs
@@ -226,6 +274,14 @@ monitor = client.unlimited.get_server_monitor(
226
274
 
227
275
  ---
228
276
 
277
+ ## 🧪 Development & Testing
278
+
279
+ - **Full env reference**: Copy [.env.example](.env.example) to `.env` and fill in credentials.
280
+ - **Unit tests** (no network): `pytest` or `python -m coverage run -m pytest -p no:cov tests && python -m coverage report -m`
281
+ - **Integration tests** (live API/proxy): Set `THORDATA_INTEGRATION=true` in `.env`; optional `THORDATA_UPSTREAM_PROXY` (e.g. Clash) if behind a firewall. See [CONTRIBUTING.md](CONTRIBUTING.md#-testing-guidelines).
282
+
283
+ ---
284
+
229
285
  ## 📄 License
230
286
 
231
287
  MIT License. See [LICENSE](LICENSE) for details.
@@ -19,9 +19,9 @@
19
19
 
20
20
  ## 📖 Introduction
21
21
 
22
- The **Thordata Python SDK v1.5.0** is a production-ready wrapper for Thordata's AI data infrastructure. It is architected for high reliability, strict type safety, and maximum performance.
22
+ The **Thordata Python SDK v1.6.0** is a production-ready wrapper for Thordata's AI data infrastructure. It is architected for high reliability, strict type safety, and maximum performance.
23
23
 
24
- **Why v1.5.0?**
24
+ **Why v1.6.0?**
25
25
  * **🛡️ Bulletproof Networking**: Custom core handles `HTTP`, `HTTPS`, and `SOCKS5h` (Remote DNS) tunneling, solving common SSL/TLS handshake issues in complex network environments.
26
26
  * **⚡ Async First**: First-class `asyncio` support with `aiohttp` for high-concurrency scraping (1000+ RPS).
27
27
  * **🧩 100% API Coverage**: Every endpoint documented by Thordata (including Hourly Usage, Server Monitor, and Task Management) is implemented.
@@ -39,7 +39,7 @@ pip install thordata-sdk
39
39
 
40
40
  ## 🔐 Configuration
41
41
 
42
- Set environment variables to avoid hardcoding credentials.
42
+ Set environment variables to avoid hardcoding credentials. **Full reference:** copy [.env.example](.env.example) to `.env` and fill in values.
43
43
 
44
44
  ```bash
45
45
  # [Scraping APIs]
@@ -49,13 +49,19 @@ export THORDATA_SCRAPER_TOKEN="your_scraper_token"
49
49
  export THORDATA_PUBLIC_TOKEN="your_public_token"
50
50
  export THORDATA_PUBLIC_KEY="your_public_key"
51
51
 
52
- # [Proxy Network]
52
+ # [Proxy: Residential / Unlimited / Datacenter / Mobile / ISP]
53
53
  export THORDATA_RESIDENTIAL_USERNAME="your_username"
54
54
  export THORDATA_RESIDENTIAL_PASSWORD="your_password"
55
- # Optional: Set upstream proxy for local dev (e.g., Clash)
56
- # export THORDATA_UPSTREAM_PROXY="http://127.0.0.1:7890"
55
+ # Optional: Unlimited (high-bandwidth) if your plan has separate credentials
56
+ # export THORDATA_UNLIMITED_USERNAME="..."
57
+ # export THORDATA_UNLIMITED_PASSWORD="..."
58
+
59
+ # Optional: Upstream proxy when behind firewall (e.g. Clash Verge port 7897)
60
+ # export THORDATA_UPSTREAM_PROXY="http://127.0.0.1:7897"
57
61
  ```
58
62
 
63
+ Default proxy port is **9999** (residential); other products use different ports (see `.env.example`).
64
+
59
65
  ---
60
66
 
61
67
  ## 🚀 Quick Start
@@ -155,6 +161,48 @@ if status == "finished":
155
161
  print(f"Download: {data_url}")
156
162
  ```
157
163
 
164
+ ### Web Scraper Tools (120+ Pre-built Tools)
165
+
166
+ Use pre-built tools for popular platforms. See [Tool Coverage Matrix](docs/TOOL_COVERAGE_MATRIX.md) for full list.
167
+
168
+ ```python
169
+ from thordata import ThordataClient
170
+ from thordata.tools import Amazon, GoogleMaps, YouTube, TikTok, eBay, Walmart
171
+
172
+ client = ThordataClient()
173
+
174
+ # Amazon Product by ASIN
175
+ task_id = client.run_tool(Amazon.ProductByAsin(asin="B0BZYCJK89"))
176
+
177
+ # Google Maps by Place ID
178
+ task_id = client.run_tool(GoogleMaps.DetailsByPlaceId(place_id="ChIJPTacEpBQwokRKwIlDXelxkA"))
179
+
180
+ # YouTube Video Download
181
+ from thordata import CommonSettings
182
+ settings = CommonSettings(resolution="<=360p", video_codec="vp9")
183
+ task_id = client.run_tool(YouTube.VideoDownload(
184
+ url="https://www.youtube.com/watch?v=jNQXAC9IVRw",
185
+ common_settings=settings
186
+ ))
187
+
188
+ # Wait and get results
189
+ status = client.wait_for_task(task_id, max_wait=300)
190
+ if status == "ready":
191
+ download_url = client.get_task_result(task_id)
192
+ print(f"Results: {download_url}")
193
+ ```
194
+
195
+ **Available Platforms:**
196
+ - **E-Commerce**: Amazon, eBay, Walmart
197
+ - **Social Media**: TikTok, Instagram, Facebook, Twitter/X, Reddit, LinkedIn
198
+ - **Search**: Google Maps, Google Shopping, Google Play
199
+ - **Video**: YouTube (download, info, subtitles)
200
+ - **Code**: GitHub
201
+ - **Professional**: Indeed, Glassdoor, Crunchbase
202
+ - **Travel/Real Estate**: Booking, Airbnb, Zillow
203
+
204
+ See `examples/tools/` for more examples.
205
+
158
206
  ---
159
207
 
160
208
  ## 🛠️ Management APIs
@@ -182,6 +230,14 @@ monitor = client.unlimited.get_server_monitor(
182
230
 
183
231
  ---
184
232
 
233
+ ## 🧪 Development & Testing
234
+
235
+ - **Full env reference**: Copy [.env.example](.env.example) to `.env` and fill in credentials.
236
+ - **Unit tests** (no network): `pytest` or `python -m coverage run -m pytest -p no:cov tests && python -m coverage report -m`
237
+ - **Integration tests** (live API/proxy): Set `THORDATA_INTEGRATION=true` in `.env`; optional `THORDATA_UPSTREAM_PROXY` (e.g. Clash) if behind a firewall. See [CONTRIBUTING.md](CONTRIBUTING.md#-testing-guidelines).
238
+
239
+ ---
240
+
185
241
  ## 📄 License
186
242
 
187
243
  MIT License. See [LICENSE](LICENSE) for details.
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
5
5
 
6
6
  [project]
7
7
  name = "thordata-sdk"
8
- version = "1.5.0"
8
+ version = "1.6.0"
9
9
  description = "The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network."
10
10
  readme = "README.md"
11
11
  requires-python = ">=3.9"
@@ -120,11 +120,11 @@ ignore_missing_imports = true
120
120
  module = ["aiohttp.*", "requests.*"]
121
121
  ignore_missing_imports = true
122
122
 
123
- # Pytest setup
123
+ # Pytest setup (coverage is run separately via coverage CLI for reliability)
124
124
  [tool.pytest.ini_options]
125
125
  testpaths = ["tests"]
126
126
  asyncio_mode = "auto"
127
- addopts = "-v --cov=thordata --cov-report=term-missing"
127
+ addopts = "-v"
128
128
  markers = ["integration: live tests that require real credentials"]
129
129
 
130
130
  # Coverage setup
@@ -5,7 +5,7 @@ Official Python client for Thordata's Proxy Network, SERP API,
5
5
  Universal Scraping API (Web Unlocker), and Web Scraper API.
6
6
  """
7
7
 
8
- __version__ = "1.5.0"
8
+ __version__ = "1.6.0"
9
9
  __author__ = "Thordata Developer Team/Kael Odin"
10
10
  __email__ = "support@thordata.com"
11
11
 
@@ -124,10 +124,10 @@ class AsyncThordataClient:
124
124
  ).rstrip("/")
125
125
 
126
126
  self._gateway_base_url = os.getenv(
127
- "THORDATA_GATEWAY_BASE_URL", "https://api.thordata.com/api/gateway"
127
+ "THORDATA_GATEWAY_BASE_URL", "https://openapi.thordata.com/api/gateway"
128
128
  )
129
129
  self._child_base_url = os.getenv(
130
- "THORDATA_CHILD_BASE_URL", "https://api.thordata.com/api/child"
130
+ "THORDATA_CHILD_BASE_URL", "https://openapi.thordata.com/api/child"
131
131
  )
132
132
 
133
133
  # URL Construction
@@ -145,7 +145,7 @@ class AsyncThordataClient:
145
145
  self._proxy_users_url = f"{shared_api_base}/proxy-users"
146
146
 
147
147
  whitelist_base = os.getenv(
148
- "THORDATA_WHITELIST_BASE_URL", "https://api.thordata.com/api"
148
+ "THORDATA_WHITELIST_BASE_URL", "https://openapi.thordata.com/api"
149
149
  )
150
150
  self._whitelist_url = f"{whitelist_base}/whitelisted-ips"
151
151
 
@@ -352,7 +352,7 @@ class AsyncThordataClient:
352
352
  file_name: str,
353
353
  spider_id: str,
354
354
  spider_name: str,
355
- parameters: dict[str, Any],
355
+ parameters: dict[str, Any] | list[dict[str, Any]],
356
356
  universal_params: dict[str, Any] | None = None,
357
357
  ) -> str:
358
358
  config = ScraperTaskConfig(
@@ -434,7 +434,7 @@ class AsyncThordataClient:
434
434
  file_name: str,
435
435
  spider_id: str,
436
436
  spider_name: str,
437
- parameters: dict[str, Any],
437
+ parameters: dict[str, Any] | list[dict[str, Any]],
438
438
  common_settings: CommonSettings,
439
439
  ) -> str:
440
440
  config = VideoTaskConfig(
@@ -550,7 +550,7 @@ class AsyncThordataClient:
550
550
  file_name: str,
551
551
  spider_id: str,
552
552
  spider_name: str,
553
- parameters: dict[str, Any],
553
+ parameters: dict[str, Any] | list[dict[str, Any]],
554
554
  universal_params: dict[str, Any] | None = None,
555
555
  *,
556
556
  max_wait: float = 600.0,
@@ -971,7 +971,12 @@ class AsyncThordataClient:
971
971
  if port:
972
972
  params["port"] = str(port)
973
973
 
974
- username = os.getenv("THORDATA_RESIDENTIAL_USERNAME")
974
+ if product == "unlimited":
975
+ username = os.getenv("THORDATA_UNLIMITED_USERNAME") or os.getenv(
976
+ "THORDATA_RESIDENTIAL_USERNAME"
977
+ )
978
+ else:
979
+ username = os.getenv("THORDATA_RESIDENTIAL_USERNAME")
975
980
  if username:
976
981
  params["td-customer"] = username
977
982
 
@@ -159,10 +159,10 @@ class ThordataClient:
159
159
  ).rstrip("/")
160
160
 
161
161
  self._gateway_base_url = os.getenv(
162
- "THORDATA_GATEWAY_BASE_URL", "https://api.thordata.com/api/gateway"
162
+ "THORDATA_GATEWAY_BASE_URL", "https://openapi.thordata.com/api/gateway"
163
163
  )
164
164
  self._child_base_url = os.getenv(
165
- "THORDATA_CHILD_BASE_URL", "https://api.thordata.com/api/child"
165
+ "THORDATA_CHILD_BASE_URL", "https://openapi.thordata.com/api/child"
166
166
  )
167
167
 
168
168
  # URL Construction
@@ -183,7 +183,7 @@ class ThordataClient:
183
183
  self._proxy_users_url = f"{shared_api_base}/proxy-users"
184
184
 
185
185
  whitelist_base = os.getenv(
186
- "THORDATA_WHITELIST_BASE_URL", "https://api.thordata.com/api"
186
+ "THORDATA_WHITELIST_BASE_URL", "https://openapi.thordata.com/api"
187
187
  )
188
188
  self._whitelist_url = f"{whitelist_base}/whitelisted-ips"
189
189
 
@@ -405,7 +405,7 @@ class ThordataClient:
405
405
  file_name: str,
406
406
  spider_id: str,
407
407
  spider_name: str,
408
- parameters: dict[str, Any],
408
+ parameters: dict[str, Any] | list[dict[str, Any]],
409
409
  universal_params: dict[str, Any] | None = None,
410
410
  ) -> str:
411
411
  config = ScraperTaskConfig(
@@ -490,7 +490,7 @@ class ThordataClient:
490
490
  file_name: str,
491
491
  spider_id: str,
492
492
  spider_name: str,
493
- parameters: dict[str, Any],
493
+ parameters: dict[str, Any] | list[dict[str, Any]],
494
494
  common_settings: CommonSettings,
495
495
  ) -> str:
496
496
  config = VideoTaskConfig(
@@ -639,7 +639,7 @@ class ThordataClient:
639
639
  file_name: str,
640
640
  spider_id: str,
641
641
  spider_name: str,
642
- parameters: dict[str, Any],
642
+ parameters: dict[str, Any] | list[dict[str, Any]],
643
643
  universal_params: dict[str, Any] | None = None,
644
644
  *,
645
645
  max_wait: float = 600.0,
@@ -862,7 +862,12 @@ class ThordataClient:
862
862
  if port:
863
863
  params["port"] = str(port)
864
864
 
865
- username = os.getenv("THORDATA_RESIDENTIAL_USERNAME")
865
+ if product == "unlimited":
866
+ username = os.getenv("THORDATA_UNLIMITED_USERNAME") or os.getenv(
867
+ "THORDATA_RESIDENTIAL_USERNAME"
868
+ )
869
+ else:
870
+ username = os.getenv("THORDATA_RESIDENTIAL_USERNAME")
866
871
  if username:
867
872
  params["td-customer"] = username
868
873
 
@@ -1,6 +1,6 @@
1
1
  """
2
2
  Enumerations for the Thordata Python SDK.
3
- Moved to thordata.types in v1.5.0.
3
+ Moved to thordata.types in v1.6.0.
4
4
  This file is kept for backward compatibility.
5
5
  """
6
6
 
@@ -21,7 +21,7 @@ from .types import (
21
21
  SessionType,
22
22
  TaskStatus,
23
23
  TimeRange,
24
- normalize_enum_value, # 新增
24
+ normalize_enum_value,
25
25
  )
26
26
 
27
27
  __all__ = [
@@ -15,6 +15,7 @@ Exception Hierarchy:
15
15
 
16
16
  from __future__ import annotations
17
17
 
18
+ from collections.abc import Mapping
18
19
  from typing import Any
19
20
 
20
21
  # =============================================================================
@@ -235,6 +236,46 @@ class ThordataNotCollectedError(ThordataAPIError):
235
236
  # =============================================================================
236
237
 
237
238
 
239
+ def _extract_request_id(payload: Any) -> str | None:
240
+ if isinstance(payload, Mapping):
241
+ for key in ("request_id", "requestId", "x_request_id", "x-request-id"):
242
+ val = payload.get(key)
243
+ if val is not None:
244
+ return str(val)
245
+ return None
246
+
247
+
248
+ def _extract_retry_after(payload: Any) -> int | None:
249
+ if isinstance(payload, Mapping):
250
+ for key in ("retry_after", "retryAfter", "retry-after"):
251
+ val = payload.get(key)
252
+ if isinstance(val, int):
253
+ return val
254
+ if isinstance(val, str) and val.isdigit():
255
+ return int(val)
256
+ return None
257
+
258
+
259
+ def _build_error_message(
260
+ message: str,
261
+ *,
262
+ status_code: int | None,
263
+ code: int | None,
264
+ request_id: str | None,
265
+ ) -> str:
266
+ parts: list[str] = [message]
267
+ meta: list[str] = []
268
+ if status_code is not None:
269
+ meta.append(f"http={status_code}")
270
+ if code is not None and code != status_code:
271
+ meta.append(f"code={code}")
272
+ if request_id:
273
+ meta.append(f"request_id={request_id}")
274
+ if meta:
275
+ parts.append("(" + ", ".join(meta) + ")")
276
+ return " ".join(parts)
277
+
278
+
238
279
  def raise_for_code(
239
280
  message: str,
240
281
  *,
@@ -266,49 +307,59 @@ def raise_for_code(
266
307
  # Determine the effective error code.
267
308
  # Prefer payload `code` when present and not success (200),
268
309
  # otherwise fall back to HTTP status when it indicates an error.
310
+ # Determine the effective error code for routing.
269
311
  effective_code: int | None = None
270
-
271
312
  if code is not None and code != 200:
272
313
  effective_code = code
273
- elif status_code is not None and status_code != 200:
314
+ elif status_code is not None and status_code >= 400:
274
315
  effective_code = status_code
275
316
  else:
276
317
  effective_code = code if code is not None else status_code
277
318
 
319
+ # Extract additional context from payload
320
+ final_request_id = request_id or _extract_request_id(payload)
321
+
322
+ # Build a consistent, informative error message
323
+ final_message = _build_error_message(
324
+ message,
325
+ status_code=status_code,
326
+ code=code,
327
+ request_id=final_request_id,
328
+ )
329
+
330
+ # Prepare common arguments for exception constructors
278
331
  kwargs = {
279
332
  "status_code": status_code,
280
333
  "code": code,
281
334
  "payload": payload,
282
- "request_id": request_id,
335
+ "request_id": final_request_id,
283
336
  }
284
337
 
338
+ # --- Route to the correct exception class ---
339
+
285
340
  # Not collected (API payload code 300, often retryable, not billed)
286
- # Check this FIRST since 300 is in API_CODES, not HTTP_STATUS_CODES
287
341
  if effective_code in ThordataNotCollectedError.API_CODES:
288
- raise ThordataNotCollectedError(message, **kwargs)
342
+ raise ThordataNotCollectedError(final_message, **kwargs)
289
343
 
290
- # Auth errors
344
+ # Auth errors (401, 403)
291
345
  if effective_code in ThordataAuthError.HTTP_STATUS_CODES:
292
- raise ThordataAuthError(message, **kwargs)
346
+ raise ThordataAuthError(final_message, **kwargs)
293
347
 
294
- # Rate limit errors
348
+ # Rate limit errors (429, 402)
295
349
  if effective_code in ThordataRateLimitError.HTTP_STATUS_CODES:
296
- # Try to extract retry_after from payload
297
- retry_after = None
298
- if isinstance(payload, dict):
299
- retry_after = payload.get("retry_after")
300
- raise ThordataRateLimitError(message, retry_after=retry_after, **kwargs)
350
+ retry_after = _extract_retry_after(payload)
351
+ raise ThordataRateLimitError(final_message, retry_after=retry_after, **kwargs)
301
352
 
302
- # Server errors
353
+ # Server errors (5xx)
303
354
  if effective_code is not None and 500 <= effective_code < 600:
304
- raise ThordataServerError(message, **kwargs)
355
+ raise ThordataServerError(final_message, **kwargs)
305
356
 
306
- # Validation errors
357
+ # Validation errors (400, 422)
307
358
  if effective_code in ThordataValidationError.HTTP_STATUS_CODES:
308
- raise ThordataValidationError(message, **kwargs)
359
+ raise ThordataValidationError(final_message, **kwargs)
309
360
 
310
- # Generic API error
311
- raise ThordataAPIError(message, **kwargs)
361
+ # Fallback to generic API error if no specific match
362
+ raise ThordataAPIError(final_message, **kwargs)
312
363
 
313
364
 
314
365
  # =============================================================================
@@ -1,6 +1,6 @@
1
1
  """
2
2
  Data models for the Thordata Python SDK.
3
- Moved to thordata.types in v1.5.0.
3
+ Moved to thordata.types in v1.6.0.
4
4
  This file is kept for backward compatibility.
5
5
  """
6
6
 
@@ -186,7 +186,7 @@ def with_retry(
186
186
  if isinstance(e, ThordataRateLimitError) and e.retry_after:
187
187
  delay = max(delay, e.retry_after)
188
188
 
189
- logger.warning(
189
+ logger.info(
190
190
  f"Retry attempt {attempt + 1}/{config.max_retries} "
191
191
  f"after {delay:.2f}s due to: {e}"
192
192
  )
@@ -5,15 +5,19 @@ High-level abstractions for specific scraping targets.
5
5
 
6
6
  from .base import ToolRequest, VideoToolRequest
7
7
  from .code import GitHub
8
- from .ecommerce import Amazon
8
+ from .ecommerce import Amazon, Walmart, eBay
9
+ from .professional import Crunchbase, Glassdoor, Indeed
9
10
  from .search import GoogleMaps, GooglePlay, GoogleShopping
10
11
  from .social import Facebook, Instagram, LinkedIn, Reddit, TikTok, Twitter
12
+ from .travel import Airbnb, Booking, Zillow
11
13
  from .video import YouTube
12
14
 
13
15
  __all__ = [
14
16
  "ToolRequest",
15
17
  "VideoToolRequest",
16
18
  "Amazon",
19
+ "eBay",
20
+ "Walmart",
17
21
  "GoogleMaps",
18
22
  "GoogleShopping",
19
23
  "GooglePlay",
@@ -25,4 +29,10 @@ __all__ = [
25
29
  "Reddit",
26
30
  "YouTube",
27
31
  "GitHub",
32
+ "Indeed",
33
+ "Glassdoor",
34
+ "Crunchbase",
35
+ "Booking",
36
+ "Zillow",
37
+ "Airbnb",
28
38
  ]
@@ -0,0 +1,39 @@
1
+ """
2
+ Code Repository Scraper Tools (GitHub, etc.)
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from dataclasses import dataclass
8
+
9
+ from .base import ToolRequest
10
+
11
+
12
+ class GitHub:
13
+ """Namespace for GitHub tools."""
14
+
15
+ @dataclass
16
+ class Repository(ToolRequest):
17
+ """Github Repository Scraper by Repo URL"""
18
+
19
+ SPIDER_ID = "github_repository_by-repo-url"
20
+ SPIDER_NAME = "github.com"
21
+ repo_url: str
22
+
23
+ @dataclass
24
+ class RepositoryBySearchUrl(ToolRequest):
25
+ """Github Repository Scraper by Search URL"""
26
+
27
+ SPIDER_ID = "github_repository_by-search-url"
28
+ SPIDER_NAME = "github.com"
29
+ search_url: str
30
+ page_turning: int | None = None
31
+ max_num: int | None = None
32
+
33
+ @dataclass
34
+ class RepositoryByUrl(ToolRequest):
35
+ """Github Repository Scraper by URL"""
36
+
37
+ SPIDER_ID = "github_repository_by-url"
38
+ SPIDER_NAME = "github.com"
39
+ url: str