thordata-sdk 1.5.0__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thordata/__init__.py +1 -1
- thordata/async_client.py +55 -13
- thordata/client.py +64 -13
- thordata/enums.py +2 -2
- thordata/exceptions.py +80 -20
- thordata/models.py +1 -1
- thordata/retry.py +1 -1
- thordata/tools/__init__.py +11 -1
- thordata/tools/code.py +17 -4
- thordata/tools/ecommerce.py +194 -10
- thordata/tools/professional.py +155 -0
- thordata/tools/search.py +47 -5
- thordata/tools/social.py +225 -41
- thordata/tools/travel.py +100 -0
- thordata/tools/video.py +80 -7
- thordata/types/serp.py +6 -2
- thordata/types/task.py +75 -9
- thordata/types/universal.py +37 -5
- {thordata_sdk-1.5.0.dist-info → thordata_sdk-1.7.0.dist-info}/METADATA +63 -7
- thordata_sdk-1.7.0.dist-info/RECORD +35 -0
- {thordata_sdk-1.5.0.dist-info → thordata_sdk-1.7.0.dist-info}/WHEEL +1 -1
- thordata/_example_utils.py +0 -77
- thordata/demo.py +0 -138
- thordata_sdk-1.5.0.dist-info/RECORD +0 -35
- {thordata_sdk-1.5.0.dist-info → thordata_sdk-1.7.0.dist-info}/licenses/LICENSE +0 -0
- {thordata_sdk-1.5.0.dist-info → thordata_sdk-1.7.0.dist-info}/top_level.txt +0 -0
thordata/types/task.py
CHANGED
|
@@ -8,6 +8,7 @@ import json
|
|
|
8
8
|
from dataclasses import dataclass
|
|
9
9
|
from enum import Enum
|
|
10
10
|
from typing import Any
|
|
11
|
+
from urllib.parse import unquote
|
|
11
12
|
|
|
12
13
|
from .common import CommonSettings, ThordataBaseConfig
|
|
13
14
|
|
|
@@ -49,25 +50,90 @@ class DataFormat(str, Enum):
|
|
|
49
50
|
XLSX = "xlsx"
|
|
50
51
|
|
|
51
52
|
|
|
53
|
+
def _normalize_url_value(value: Any) -> Any:
|
|
54
|
+
if not isinstance(value, str):
|
|
55
|
+
return value
|
|
56
|
+
# Decode all percent-encoded characters to match Dashboard format
|
|
57
|
+
# Dashboard expects URLs in their raw/decoded form, not URL-encoded
|
|
58
|
+
# This ensures API/SDK submissions match manual Dashboard input exactly
|
|
59
|
+
try:
|
|
60
|
+
# Check if URL contains any percent-encoded characters
|
|
61
|
+
if "%" in value:
|
|
62
|
+
# Fully decode the URL to match Dashboard format
|
|
63
|
+
decoded = unquote(value)
|
|
64
|
+
# If decoding changed the value, use decoded version
|
|
65
|
+
# This handles cases like %26 -> &, %3A -> :, %2F -> /, etc.
|
|
66
|
+
if decoded != value:
|
|
67
|
+
return decoded
|
|
68
|
+
except Exception:
|
|
69
|
+
# If decoding fails, return original value
|
|
70
|
+
pass
|
|
71
|
+
return value
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _normalize_parameters(params: dict[str, Any]) -> dict[str, Any]:
|
|
75
|
+
# All parameter keys that contain URLs and should be normalized
|
|
76
|
+
# This ensures API/SDK submissions match Dashboard format exactly
|
|
77
|
+
url_keys = {
|
|
78
|
+
"url",
|
|
79
|
+
"domain",
|
|
80
|
+
"profileurl",
|
|
81
|
+
"posturl",
|
|
82
|
+
"seller_url",
|
|
83
|
+
# Additional URL-related keys that may be used
|
|
84
|
+
"link",
|
|
85
|
+
"href",
|
|
86
|
+
"page_url",
|
|
87
|
+
"product_url",
|
|
88
|
+
"category_url",
|
|
89
|
+
}
|
|
90
|
+
out: dict[str, Any] = {}
|
|
91
|
+
for k, v in params.items():
|
|
92
|
+
if k in url_keys:
|
|
93
|
+
out[k] = _normalize_url_value(v)
|
|
94
|
+
else:
|
|
95
|
+
out[k] = v
|
|
96
|
+
return out
|
|
97
|
+
|
|
98
|
+
|
|
52
99
|
@dataclass
|
|
53
100
|
class ScraperTaskConfig(ThordataBaseConfig):
|
|
54
101
|
file_name: str
|
|
55
102
|
spider_id: str
|
|
56
103
|
spider_name: str
|
|
57
|
-
parameters: dict[str, Any]
|
|
104
|
+
parameters: dict[str, Any] | list[dict[str, Any]]
|
|
58
105
|
universal_params: dict[str, Any] | None = None
|
|
59
106
|
include_errors: bool = True
|
|
107
|
+
data_format: DataFormat | str | None = (
|
|
108
|
+
None # Support json, csv, xlsx output formats
|
|
109
|
+
)
|
|
60
110
|
|
|
61
111
|
def to_payload(self) -> dict[str, Any]:
|
|
112
|
+
# Normalize parameters: decode percent-encoded URLs to reduce API/Dashboard divergence
|
|
113
|
+
if isinstance(self.parameters, list):
|
|
114
|
+
normalized_list = [_normalize_parameters(p) for p in self.parameters]
|
|
115
|
+
params_json = json.dumps(normalized_list)
|
|
116
|
+
else:
|
|
117
|
+
normalized_one = _normalize_parameters(self.parameters)
|
|
118
|
+
params_json = json.dumps([normalized_one])
|
|
119
|
+
|
|
62
120
|
payload: dict[str, Any] = {
|
|
63
121
|
"file_name": self.file_name,
|
|
64
122
|
"spider_id": self.spider_id,
|
|
65
123
|
"spider_name": self.spider_name,
|
|
66
|
-
"spider_parameters":
|
|
124
|
+
"spider_parameters": params_json,
|
|
67
125
|
"spider_errors": "true" if self.include_errors else "false",
|
|
68
126
|
}
|
|
69
127
|
if self.universal_params:
|
|
70
128
|
payload["spider_universal"] = json.dumps(self.universal_params)
|
|
129
|
+
# Add data_format if specified (for json/csv/xlsx output)
|
|
130
|
+
if self.data_format:
|
|
131
|
+
fmt = (
|
|
132
|
+
self.data_format.value
|
|
133
|
+
if isinstance(self.data_format, DataFormat)
|
|
134
|
+
else str(self.data_format).lower()
|
|
135
|
+
)
|
|
136
|
+
payload["data_format"] = fmt
|
|
71
137
|
return payload
|
|
72
138
|
|
|
73
139
|
|
|
@@ -76,24 +142,24 @@ class VideoTaskConfig(ThordataBaseConfig):
|
|
|
76
142
|
file_name: str
|
|
77
143
|
spider_id: str
|
|
78
144
|
spider_name: str
|
|
79
|
-
parameters: dict[str, Any]
|
|
145
|
+
parameters: dict[str, Any] | list[dict[str, Any]]
|
|
80
146
|
common_settings: CommonSettings
|
|
81
147
|
include_errors: bool = True
|
|
82
148
|
|
|
83
149
|
def to_payload(self) -> dict[str, Any]:
|
|
150
|
+
if isinstance(self.parameters, list):
|
|
151
|
+
params_json = json.dumps(self.parameters)
|
|
152
|
+
else:
|
|
153
|
+
params_json = json.dumps([self.parameters])
|
|
154
|
+
|
|
84
155
|
payload: dict[str, Any] = {
|
|
85
156
|
"file_name": self.file_name,
|
|
86
157
|
"spider_id": self.spider_id,
|
|
87
158
|
"spider_name": self.spider_name,
|
|
88
|
-
"spider_parameters":
|
|
159
|
+
"spider_parameters": params_json,
|
|
89
160
|
"spider_errors": "true" if self.include_errors else "false",
|
|
90
|
-
# v2.0 Doc explicitly requires 'spider_universal' key for video tasks too sometimes,
|
|
91
|
-
# but usually it's passed as 'common_settings' or 'spider_universal'.
|
|
92
|
-
# Sticking to original models.py key logic for now to ensure stability.
|
|
93
161
|
"spider_universal": self.common_settings.to_json(),
|
|
94
162
|
}
|
|
95
|
-
# Note: If API expects 'common_settings' key specifically, adjust here.
|
|
96
|
-
# Based on v2 context, video builder often uses spider_universal.
|
|
97
163
|
return payload
|
|
98
164
|
|
|
99
165
|
|
thordata/types/universal.py
CHANGED
|
@@ -15,12 +15,15 @@ from .common import ThordataBaseConfig
|
|
|
15
15
|
class UniversalScrapeRequest(ThordataBaseConfig):
|
|
16
16
|
url: str
|
|
17
17
|
js_render: bool = False
|
|
18
|
-
output_format: str
|
|
18
|
+
output_format: str | list[str] = (
|
|
19
|
+
"html" # 'html', 'png', or ['png', 'html'] for both
|
|
20
|
+
)
|
|
19
21
|
country: str | None = None
|
|
20
|
-
block_resources: str | None = None # 'script,image'
|
|
22
|
+
block_resources: str | None = None # 'script,image,video'
|
|
21
23
|
clean_content: str | None = None # 'js,css'
|
|
22
24
|
wait: int | None = None # ms
|
|
23
25
|
wait_for: str | None = None # selector
|
|
26
|
+
follow_redirect: bool | None = None # Follow redirects
|
|
24
27
|
|
|
25
28
|
# Headers/Cookies must be serialized to JSON in payload
|
|
26
29
|
headers: list[dict[str, str]] | None = None
|
|
@@ -29,12 +32,26 @@ class UniversalScrapeRequest(ThordataBaseConfig):
|
|
|
29
32
|
extra_params: dict[str, Any] = field(default_factory=dict)
|
|
30
33
|
|
|
31
34
|
def __post_init__(self) -> None:
|
|
35
|
+
# Normalize output_format to list for easier handling
|
|
36
|
+
if isinstance(self.output_format, str):
|
|
37
|
+
formats = [f.strip().lower() for f in self.output_format.split(",")]
|
|
38
|
+
else:
|
|
39
|
+
formats = [
|
|
40
|
+
f.lower() if isinstance(f, str) else str(f).lower()
|
|
41
|
+
for f in self.output_format
|
|
42
|
+
]
|
|
43
|
+
|
|
32
44
|
valid_formats = {"html", "png"}
|
|
33
|
-
if
|
|
45
|
+
invalid = [f for f in formats if f not in valid_formats]
|
|
46
|
+
if invalid:
|
|
34
47
|
raise ValueError(
|
|
35
|
-
f"Invalid output_format: {
|
|
48
|
+
f"Invalid output_format: {invalid}. Must be one or more of: {valid_formats}. "
|
|
49
|
+
f"Use comma-separated string like 'png,html' or list ['png', 'html'] for multiple formats."
|
|
36
50
|
)
|
|
37
51
|
|
|
52
|
+
# Store as list for to_payload
|
|
53
|
+
self._output_formats = formats
|
|
54
|
+
|
|
38
55
|
if self.wait is not None and (self.wait < 0 or self.wait > 100000):
|
|
39
56
|
raise ValueError("wait must be between 0 and 100000 milliseconds")
|
|
40
57
|
|
|
@@ -42,9 +59,22 @@ class UniversalScrapeRequest(ThordataBaseConfig):
|
|
|
42
59
|
payload: dict[str, Any] = {
|
|
43
60
|
"url": self.url,
|
|
44
61
|
"js_render": "True" if self.js_render else "False",
|
|
45
|
-
"type": self.output_format.lower(),
|
|
46
62
|
}
|
|
47
63
|
|
|
64
|
+
# Handle output format: support single or multiple formats (e.g., "png,html")
|
|
65
|
+
if hasattr(self, "_output_formats") and self._output_formats:
|
|
66
|
+
if len(self._output_formats) == 1:
|
|
67
|
+
payload["type"] = self._output_formats[0]
|
|
68
|
+
else:
|
|
69
|
+
# Multiple formats: join with comma (e.g., "png,html")
|
|
70
|
+
payload["type"] = ",".join(self._output_formats)
|
|
71
|
+
else:
|
|
72
|
+
# Fallback for backward compatibility
|
|
73
|
+
if isinstance(self.output_format, str):
|
|
74
|
+
payload["type"] = self.output_format.lower()
|
|
75
|
+
else:
|
|
76
|
+
payload["type"] = ",".join([str(f).lower() for f in self.output_format])
|
|
77
|
+
|
|
48
78
|
if self.country:
|
|
49
79
|
payload["country"] = self.country.lower()
|
|
50
80
|
if self.block_resources:
|
|
@@ -55,6 +85,8 @@ class UniversalScrapeRequest(ThordataBaseConfig):
|
|
|
55
85
|
payload["wait"] = str(self.wait)
|
|
56
86
|
if self.wait_for:
|
|
57
87
|
payload["wait_for"] = self.wait_for
|
|
88
|
+
if self.follow_redirect is not None:
|
|
89
|
+
payload["follow_redirect"] = "True" if self.follow_redirect else "False"
|
|
58
90
|
|
|
59
91
|
# Serialize complex objects as JSON strings
|
|
60
92
|
if self.headers:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: thordata-sdk
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.7.0
|
|
4
4
|
Summary: The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network.
|
|
5
5
|
Author-email: Thordata Developer Team <support@thordata.com>
|
|
6
6
|
License: MIT
|
|
@@ -63,9 +63,9 @@ Dynamic: license-file
|
|
|
63
63
|
|
|
64
64
|
## 📖 Introduction
|
|
65
65
|
|
|
66
|
-
The **Thordata Python SDK v1.
|
|
66
|
+
The **Thordata Python SDK v1.6.0** is a production-ready wrapper for Thordata's AI data infrastructure. It is architected for high reliability, strict type safety, and maximum performance.
|
|
67
67
|
|
|
68
|
-
**Why v1.
|
|
68
|
+
**Why v1.6.0?**
|
|
69
69
|
* **🛡️ Bulletproof Networking**: Custom core handles `HTTP`, `HTTPS`, and `SOCKS5h` (Remote DNS) tunneling, solving common SSL/TLS handshake issues in complex network environments.
|
|
70
70
|
* **⚡ Async First**: First-class `asyncio` support with `aiohttp` for high-concurrency scraping (1000+ RPS).
|
|
71
71
|
* **🧩 100% API Coverage**: Every endpoint documented by Thordata (including Hourly Usage, Server Monitor, and Task Management) is implemented.
|
|
@@ -83,7 +83,7 @@ pip install thordata-sdk
|
|
|
83
83
|
|
|
84
84
|
## 🔐 Configuration
|
|
85
85
|
|
|
86
|
-
Set environment variables to avoid hardcoding credentials.
|
|
86
|
+
Set environment variables to avoid hardcoding credentials. **Full reference:** copy [.env.example](.env.example) to `.env` and fill in values.
|
|
87
87
|
|
|
88
88
|
```bash
|
|
89
89
|
# [Scraping APIs]
|
|
@@ -93,13 +93,19 @@ export THORDATA_SCRAPER_TOKEN="your_scraper_token"
|
|
|
93
93
|
export THORDATA_PUBLIC_TOKEN="your_public_token"
|
|
94
94
|
export THORDATA_PUBLIC_KEY="your_public_key"
|
|
95
95
|
|
|
96
|
-
# [Proxy
|
|
96
|
+
# [Proxy: Residential / Unlimited / Datacenter / Mobile / ISP]
|
|
97
97
|
export THORDATA_RESIDENTIAL_USERNAME="your_username"
|
|
98
98
|
export THORDATA_RESIDENTIAL_PASSWORD="your_password"
|
|
99
|
-
# Optional:
|
|
100
|
-
# export
|
|
99
|
+
# Optional: Unlimited (high-bandwidth) if your plan has separate credentials
|
|
100
|
+
# export THORDATA_UNLIMITED_USERNAME="..."
|
|
101
|
+
# export THORDATA_UNLIMITED_PASSWORD="..."
|
|
102
|
+
|
|
103
|
+
# Optional: Upstream proxy when behind firewall (e.g. Clash Verge port 7897)
|
|
104
|
+
# export THORDATA_UPSTREAM_PROXY="http://127.0.0.1:7897"
|
|
101
105
|
```
|
|
102
106
|
|
|
107
|
+
Default proxy port is **9999** (residential); other products use different ports (see `.env.example`).
|
|
108
|
+
|
|
103
109
|
---
|
|
104
110
|
|
|
105
111
|
## 🚀 Quick Start
|
|
@@ -199,6 +205,48 @@ if status == "finished":
|
|
|
199
205
|
print(f"Download: {data_url}")
|
|
200
206
|
```
|
|
201
207
|
|
|
208
|
+
### Web Scraper Tools (120+ Pre-built Tools)
|
|
209
|
+
|
|
210
|
+
Use pre-built tools for popular platforms. See [Tool Coverage Matrix](docs/TOOL_COVERAGE_MATRIX.md) for full list.
|
|
211
|
+
|
|
212
|
+
```python
|
|
213
|
+
from thordata import ThordataClient
|
|
214
|
+
from thordata.tools import Amazon, GoogleMaps, YouTube, TikTok, eBay, Walmart
|
|
215
|
+
|
|
216
|
+
client = ThordataClient()
|
|
217
|
+
|
|
218
|
+
# Amazon Product by ASIN
|
|
219
|
+
task_id = client.run_tool(Amazon.ProductByAsin(asin="B0BZYCJK89"))
|
|
220
|
+
|
|
221
|
+
# Google Maps by Place ID
|
|
222
|
+
task_id = client.run_tool(GoogleMaps.DetailsByPlaceId(place_id="ChIJPTacEpBQwokRKwIlDXelxkA"))
|
|
223
|
+
|
|
224
|
+
# YouTube Video Download
|
|
225
|
+
from thordata import CommonSettings
|
|
226
|
+
settings = CommonSettings(resolution="<=360p", video_codec="vp9")
|
|
227
|
+
task_id = client.run_tool(YouTube.VideoDownload(
|
|
228
|
+
url="https://www.youtube.com/watch?v=jNQXAC9IVRw",
|
|
229
|
+
common_settings=settings
|
|
230
|
+
))
|
|
231
|
+
|
|
232
|
+
# Wait and get results
|
|
233
|
+
status = client.wait_for_task(task_id, max_wait=300)
|
|
234
|
+
if status == "ready":
|
|
235
|
+
download_url = client.get_task_result(task_id)
|
|
236
|
+
print(f"Results: {download_url}")
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
**Available Platforms:**
|
|
240
|
+
- **E-Commerce**: Amazon, eBay, Walmart
|
|
241
|
+
- **Social Media**: TikTok, Instagram, Facebook, Twitter/X, Reddit, LinkedIn
|
|
242
|
+
- **Search**: Google Maps, Google Shopping, Google Play
|
|
243
|
+
- **Video**: YouTube (download, info, subtitles)
|
|
244
|
+
- **Code**: GitHub
|
|
245
|
+
- **Professional**: Indeed, Glassdoor, Crunchbase
|
|
246
|
+
- **Travel/Real Estate**: Booking, Airbnb, Zillow
|
|
247
|
+
|
|
248
|
+
See `examples/tools/` for more examples.
|
|
249
|
+
|
|
202
250
|
---
|
|
203
251
|
|
|
204
252
|
## 🛠️ Management APIs
|
|
@@ -226,6 +274,14 @@ monitor = client.unlimited.get_server_monitor(
|
|
|
226
274
|
|
|
227
275
|
---
|
|
228
276
|
|
|
277
|
+
## 🧪 Development & Testing
|
|
278
|
+
|
|
279
|
+
- **Full env reference**: Copy [.env.example](.env.example) to `.env` and fill in credentials.
|
|
280
|
+
- **Unit tests** (no network): `pytest` or `python -m coverage run -m pytest -p no:cov tests && python -m coverage report -m`
|
|
281
|
+
- **Integration tests** (live API/proxy): Set `THORDATA_INTEGRATION=true` in `.env`; optional `THORDATA_UPSTREAM_PROXY` (e.g. Clash) if behind a firewall. See [CONTRIBUTING.md](CONTRIBUTING.md#-testing-guidelines).
|
|
282
|
+
|
|
283
|
+
---
|
|
284
|
+
|
|
229
285
|
## 📄 License
|
|
230
286
|
|
|
231
287
|
MIT License. See [LICENSE](LICENSE) for details.
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
thordata/__init__.py,sha256=FMOku6d17GrFjiJlRhvkx-JmhLLD7VlaADLC3FP6hHg,2287
|
|
2
|
+
thordata/_utils.py,sha256=Acr_6sHgdZXU7SQozd6FEYTZV6iHw__nlhpBTDwb66U,4917
|
|
3
|
+
thordata/async_client.py,sha256=akYyUVm7aeUzmuYUKcQOtsgCNreqWw1x3kEShmBL-_c,41363
|
|
4
|
+
thordata/async_unlimited.py,sha256=kzTksFkN21rDM21Pwy3hcayjfyGYNGGyGR3fRLtZC6I,4510
|
|
5
|
+
thordata/client.py,sha256=LxLLUfT75_nFSfTK8NxoFPepPXyq8qwvJKdq7bieXkY,58981
|
|
6
|
+
thordata/enums.py,sha256=dO5QWpPFLpYP2GfLAdoFtxMTemhGNdr_NPqBoYfSFkk,764
|
|
7
|
+
thordata/exceptions.py,sha256=ntiq3F5sxAiEDmCnlcfS2GNb3Qa7DpRvMrhmgXhAGIg,11947
|
|
8
|
+
thordata/models.py,sha256=7GshQklo5aqke_ZQ2QIXiz9Ac5v6IRtvjWIjsBKEq6A,853
|
|
9
|
+
thordata/retry.py,sha256=X6Sa5IIb5EWD5fUJjKyhvWJyWQGPVgxLB3-vKoWfa5Q,11453
|
|
10
|
+
thordata/serp_engines.py,sha256=iuMWncelcGOskCHXFzpcPMMTL5qfiLkazHB1uj3zpZo,5985
|
|
11
|
+
thordata/unlimited.py,sha256=RzrtwcotYlbOWuSLysDyI75IkMVL7ygdfE9HKNoe02M,6087
|
|
12
|
+
thordata/core/__init__.py,sha256=EFT6mZpSdec_7uFUpSpDDHVwbTxy314uxJC_uprR6J4,500
|
|
13
|
+
thordata/core/async_http_client.py,sha256=KKsmhXN6bWRTDFvqa0H-WRf4R-TWH8WSgpDBRv6TEvg,3052
|
|
14
|
+
thordata/core/http_client.py,sha256=8lSwclmVweM-Go1qMW36zYnMKAUT_9RyDdPF7qMS4-Y,2280
|
|
15
|
+
thordata/core/tunnel.py,sha256=rbM_4zGwY4FXqdxYmCOURQw2s1EuAWFBVBM-1joNjGI,8373
|
|
16
|
+
thordata/tools/__init__.py,sha256=_Sr042bW-OMMj-WruA93YeQ6FfeIXvWmHoHMAFQ72a8,840
|
|
17
|
+
thordata/tools/base.py,sha256=fHuCp53y8eB59DuCdA1wHcbMVmsd5ikL9KlT5m_jJn0,1006
|
|
18
|
+
thordata/tools/code.py,sha256=fGuLEn_CydIq79XgMw5-EJDcp-nq2fenWVp7hKpsRNw,930
|
|
19
|
+
thordata/tools/ecommerce.py,sha256=8iZ7f46CYovPDfAS3lZhRXpXEyJ9PSFBw9w99-Zw8Qs,6584
|
|
20
|
+
thordata/tools/professional.py,sha256=2RJ76Sx1seftFpwgD4VRfRinoo-HAqYZucTnuIdV4Kw,4350
|
|
21
|
+
thordata/tools/search.py,sha256=2HLQaYK6JiGvzOFF9or9ORXNrzv6nDQUaEt83YbqiQA,2903
|
|
22
|
+
thordata/tools/social.py,sha256=6gcj1GUWJvDALpBMeobohIn6yPVo-LsqDsuUroNpHG8,10465
|
|
23
|
+
thordata/tools/travel.py,sha256=vRJAU-uzFVvLQ5Tc58vp3CY7OPWd2lcWh_9MvWMc1fs,2725
|
|
24
|
+
thordata/tools/video.py,sha256=HUFqdue-dtWmTVlYtmf5ffzuYDIzw5l3wk3Vr7AXQW0,4689
|
|
25
|
+
thordata/types/__init__.py,sha256=hlLt5UCVm7QdeOCN5_YWXS4Vy8tJUhIp0XbWjAoQiQg,1357
|
|
26
|
+
thordata/types/common.py,sha256=hkTZ1QtokpE1yT9BvTmYfQz9AUjeCIIPvjib2pnq_Ag,2818
|
|
27
|
+
thordata/types/proxy.py,sha256=IU45wQHCBOIlbdcCN9veypAkDT0q9NIikLu674CudOU,10438
|
|
28
|
+
thordata/types/serp.py,sha256=2jMqims_hmvFfqvvmyyp_SAecupWayAWNJGuHPzPI6o,6150
|
|
29
|
+
thordata/types/task.py,sha256=PVKNyzXZHNunThVb5dwHDtMZ9WmFpbaePDnkeV754CQ,6264
|
|
30
|
+
thordata/types/universal.py,sha256=8OIZs239fBxzSuLEe3VB9qCp1ddN5XKAXbpVm9MJuls,3631
|
|
31
|
+
thordata_sdk-1.7.0.dist-info/licenses/LICENSE,sha256=bAxpWgQIzb-5jl3nhLdOwOJ_vlbHLtSG7yev2B7vioY,1088
|
|
32
|
+
thordata_sdk-1.7.0.dist-info/METADATA,sha256=0LIpFeY8-83XNvaNSZzZ5w2V8ET3N2puPMBz_4t-A5k,9308
|
|
33
|
+
thordata_sdk-1.7.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
34
|
+
thordata_sdk-1.7.0.dist-info/top_level.txt,sha256=Z8R_07m0lXCCSb1hapL9_nxMtyO3rf_9wOvq4n9u2Hg,9
|
|
35
|
+
thordata_sdk-1.7.0.dist-info/RECORD,,
|
thordata/_example_utils.py
DELETED
|
@@ -1,77 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import json
|
|
4
|
-
import os
|
|
5
|
-
from collections.abc import Iterable
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
from typing import Any
|
|
8
|
-
|
|
9
|
-
try:
|
|
10
|
-
from dotenv import load_dotenv
|
|
11
|
-
except Exception: # pragma: no cover
|
|
12
|
-
load_dotenv = None
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def load_env() -> None:
|
|
16
|
-
"""Load .env from repo root if python-dotenv is installed."""
|
|
17
|
-
if load_dotenv is None:
|
|
18
|
-
return
|
|
19
|
-
repo_root = Path(__file__).resolve().parents[2]
|
|
20
|
-
load_dotenv(dotenv_path=repo_root / ".env")
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def env(name: str) -> str:
|
|
24
|
-
return (os.getenv(name) or "").strip()
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def skip_if_missing(required: Iterable[str], *, tip: str | None = None) -> bool:
|
|
28
|
-
missing = [k for k in required if not env(k)]
|
|
29
|
-
if not missing:
|
|
30
|
-
return False
|
|
31
|
-
print("Skipping live example: missing env:", ", ".join(missing))
|
|
32
|
-
if tip:
|
|
33
|
-
print(tip)
|
|
34
|
-
else:
|
|
35
|
-
print("Tip: copy .env.example to .env and fill values, then re-run.")
|
|
36
|
-
return True
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def parse_json_env(name: str, default: str = "{}") -> Any:
|
|
40
|
-
raw = env(name) or default
|
|
41
|
-
return json.loads(raw)
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
def normalize_task_parameters(raw: Any) -> dict[str, Any]:
|
|
45
|
-
"""Accept {..} or [{..}] and return a single dict for create_scraper_task(parameters=...)."""
|
|
46
|
-
if isinstance(raw, list):
|
|
47
|
-
if not raw:
|
|
48
|
-
raise ValueError("Task parameters JSON array must not be empty")
|
|
49
|
-
raw = raw[0]
|
|
50
|
-
if not isinstance(raw, dict):
|
|
51
|
-
raise ValueError("Task parameters must be a JSON object (or array of objects)")
|
|
52
|
-
return raw
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
def output_dir() -> Path:
|
|
56
|
-
"""Return output dir for examples; defaults to examples/output (ignored by git)."""
|
|
57
|
-
repo_root = Path(__file__).resolve().parents[2]
|
|
58
|
-
d = env("THORDATA_OUTPUT_DIR") or str(repo_root / "examples" / "output")
|
|
59
|
-
p = Path(d)
|
|
60
|
-
p.mkdir(parents=True, exist_ok=True)
|
|
61
|
-
return p
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
def write_text(filename: str, content: str) -> Path:
|
|
65
|
-
p = output_dir() / filename
|
|
66
|
-
p.write_text(content, encoding="utf-8", errors="replace")
|
|
67
|
-
return p
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
def write_json(filename: str, data: Any) -> Path:
|
|
71
|
-
p = output_dir() / filename
|
|
72
|
-
p.write_text(
|
|
73
|
-
json.dumps(data, ensure_ascii=False, indent=2),
|
|
74
|
-
encoding="utf-8",
|
|
75
|
-
errors="replace",
|
|
76
|
-
)
|
|
77
|
-
return p
|
thordata/demo.py
DELETED
|
@@ -1,138 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Unified demo entrypoint for the Thordata Python SDK.
|
|
3
|
-
|
|
4
|
-
This module runs the example scripts from the repository's `examples/` directory
|
|
5
|
-
using `runpy`, so it does not require `examples/` to be an importable package.
|
|
6
|
-
|
|
7
|
-
Usage:
|
|
8
|
-
python -m thordata.demo serp
|
|
9
|
-
python -m thordata.demo universal
|
|
10
|
-
python -m thordata.demo scraper
|
|
11
|
-
python -m thordata.demo concurrency
|
|
12
|
-
|
|
13
|
-
Notes:
|
|
14
|
-
- This entrypoint is primarily intended for repository usage (dev/demo).
|
|
15
|
-
- When installed from PyPI, the `examples/` directory is typically not included.
|
|
16
|
-
"""
|
|
17
|
-
|
|
18
|
-
from __future__ import annotations
|
|
19
|
-
|
|
20
|
-
import runpy
|
|
21
|
-
import sys
|
|
22
|
-
from pathlib import Path
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def _configure_stdio() -> None:
|
|
26
|
-
# Avoid UnicodeEncodeError on Windows consoles with legacy encodings.
|
|
27
|
-
if hasattr(sys.stdout, "reconfigure"):
|
|
28
|
-
sys.stdout.reconfigure(encoding="utf-8", errors="replace") # type: ignore[attr-defined]
|
|
29
|
-
if hasattr(sys.stderr, "reconfigure"):
|
|
30
|
-
sys.stderr.reconfigure(encoding="utf-8", errors="replace") # type: ignore[attr-defined]
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def _load_env() -> None:
|
|
34
|
-
# Optional .env support for local development
|
|
35
|
-
try:
|
|
36
|
-
from dotenv import load_dotenv
|
|
37
|
-
except ImportError:
|
|
38
|
-
return
|
|
39
|
-
load_dotenv()
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def _repo_root() -> Path:
|
|
43
|
-
"""
|
|
44
|
-
Resolve repository root based on src layout:
|
|
45
|
-
<repo>/src/thordata/demo.py -> parents[2] == <repo>
|
|
46
|
-
"""
|
|
47
|
-
return Path(__file__).resolve().parents[2]
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
def _examples_dir() -> Path:
|
|
51
|
-
return _repo_root() / "examples"
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def _demo_map() -> dict[str, Path]:
|
|
55
|
-
ex = _examples_dir()
|
|
56
|
-
return {
|
|
57
|
-
"serp": ex / "demo_serp_api.py",
|
|
58
|
-
"universal": ex / "demo_universal.py",
|
|
59
|
-
"scraper": ex / "demo_web_scraper_api.py",
|
|
60
|
-
"concurrency": ex / "async_high_concurrency.py",
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
def _usage() -> str:
|
|
65
|
-
names = ", ".join(sorted(_demo_map().keys()))
|
|
66
|
-
return f"Usage: python -m thordata.demo [{names}]"
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
def _run_demo(path: Path) -> int:
|
|
70
|
-
if not path.exists():
|
|
71
|
-
print(f"Error: demo script not found: {path}")
|
|
72
|
-
return 2
|
|
73
|
-
|
|
74
|
-
# Ensure examples dir is on sys.path (helpful if demo imports local helpers).
|
|
75
|
-
examples_dir = str(path.parent.resolve())
|
|
76
|
-
if examples_dir not in sys.path:
|
|
77
|
-
sys.path.insert(0, examples_dir)
|
|
78
|
-
|
|
79
|
-
try:
|
|
80
|
-
# Load without triggering `if __name__ == "__main__": ...`
|
|
81
|
-
ns = runpy.run_path(str(path), run_name="__thordata_demo__")
|
|
82
|
-
|
|
83
|
-
main_func = ns.get("main")
|
|
84
|
-
if callable(main_func):
|
|
85
|
-
return int(main_func()) # type: ignore[arg-type]
|
|
86
|
-
|
|
87
|
-
# Fallback: run as __main__ for scripts without main()
|
|
88
|
-
runpy.run_path(str(path), run_name="__main__")
|
|
89
|
-
return 0
|
|
90
|
-
|
|
91
|
-
except KeyboardInterrupt:
|
|
92
|
-
raise
|
|
93
|
-
except SystemExit as e:
|
|
94
|
-
# In case fallback run as __main__ triggered SystemExit
|
|
95
|
-
code = e.code
|
|
96
|
-
if code is None:
|
|
97
|
-
return 0
|
|
98
|
-
if isinstance(code, int):
|
|
99
|
-
return code
|
|
100
|
-
return 1
|
|
101
|
-
except Exception as e:
|
|
102
|
-
import traceback
|
|
103
|
-
|
|
104
|
-
print()
|
|
105
|
-
print("-" * 60)
|
|
106
|
-
print("[thordata.demo] The demo script raised an exception.")
|
|
107
|
-
print(f"[thordata.demo] Script: {path.name}")
|
|
108
|
-
print(f"[thordata.demo] Error: {type(e).__name__}: {e}")
|
|
109
|
-
print()
|
|
110
|
-
print("Note: This is a failure within the demo script itself,")
|
|
111
|
-
print(" not an issue with the thordata.demo entrypoint.")
|
|
112
|
-
print("-" * 60)
|
|
113
|
-
traceback.print_exc()
|
|
114
|
-
return 1
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
def main() -> int:
|
|
118
|
-
_configure_stdio()
|
|
119
|
-
_load_env()
|
|
120
|
-
|
|
121
|
-
if len(sys.argv) < 2:
|
|
122
|
-
print(_usage())
|
|
123
|
-
return 2
|
|
124
|
-
|
|
125
|
-
name = sys.argv[1].strip().lower()
|
|
126
|
-
mapping = _demo_map()
|
|
127
|
-
|
|
128
|
-
path = mapping.get(name)
|
|
129
|
-
if path is None:
|
|
130
|
-
print(f"Unknown demo: {name}")
|
|
131
|
-
print(_usage())
|
|
132
|
-
return 2
|
|
133
|
-
|
|
134
|
-
return _run_demo(path)
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
if __name__ == "__main__":
|
|
138
|
-
raise SystemExit(main())
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
thordata/__init__.py,sha256=-2bXx3LckBWrJ_E5HqFTOj7sm45AgrOnSWV4QN6f-7U,2287
|
|
2
|
-
thordata/_example_utils.py,sha256=T9QtVq9BHhubOShgtGp2GSusYYd-ZFUJFJAw7ubIsa4,2199
|
|
3
|
-
thordata/_utils.py,sha256=Acr_6sHgdZXU7SQozd6FEYTZV6iHw__nlhpBTDwb66U,4917
|
|
4
|
-
thordata/async_client.py,sha256=zN59ZQfFVCuAGnGcyj-C_S9MbHzb17QbUISm46n6gpY,39439
|
|
5
|
-
thordata/async_unlimited.py,sha256=kzTksFkN21rDM21Pwy3hcayjfyGYNGGyGR3fRLtZC6I,4510
|
|
6
|
-
thordata/client.py,sha256=eA6jav_aAw2CQdSyrg3P59rELKo13K5tHqmSjEw3L_8,56717
|
|
7
|
-
thordata/demo.py,sha256=DojJRFqUm9XAMBkjmk03WGeiUdLCbXguMIwtMOzfN6M,3822
|
|
8
|
-
thordata/enums.py,sha256=_pahGhcq9Eh2ptL_WiNU2WlqKrydV_6e4U9G4erV9-s,774
|
|
9
|
-
thordata/exceptions.py,sha256=P9czrxkFhT439DxW3LE5W-koS595ObH4-mAQOfaDM18,9976
|
|
10
|
-
thordata/models.py,sha256=wozvlpS-Uv1DgkM_CEKOvldQ2InicxhIN0QiezIXPE4,853
|
|
11
|
-
thordata/retry.py,sha256=5kRwULl3X68Nx8PlSzr9benfyCL0nRSpVQXrwjWr45M,11456
|
|
12
|
-
thordata/serp_engines.py,sha256=iuMWncelcGOskCHXFzpcPMMTL5qfiLkazHB1uj3zpZo,5985
|
|
13
|
-
thordata/unlimited.py,sha256=RzrtwcotYlbOWuSLysDyI75IkMVL7ygdfE9HKNoe02M,6087
|
|
14
|
-
thordata/core/__init__.py,sha256=EFT6mZpSdec_7uFUpSpDDHVwbTxy314uxJC_uprR6J4,500
|
|
15
|
-
thordata/core/async_http_client.py,sha256=KKsmhXN6bWRTDFvqa0H-WRf4R-TWH8WSgpDBRv6TEvg,3052
|
|
16
|
-
thordata/core/http_client.py,sha256=8lSwclmVweM-Go1qMW36zYnMKAUT_9RyDdPF7qMS4-Y,2280
|
|
17
|
-
thordata/core/tunnel.py,sha256=rbM_4zGwY4FXqdxYmCOURQw2s1EuAWFBVBM-1joNjGI,8373
|
|
18
|
-
thordata/tools/__init__.py,sha256=ROryBBlCfq9cydaKXEPtnevjhg6GdFioAjdnp2VTR0M,606
|
|
19
|
-
thordata/tools/base.py,sha256=fHuCp53y8eB59DuCdA1wHcbMVmsd5ikL9KlT5m_jJn0,1006
|
|
20
|
-
thordata/tools/code.py,sha256=opYMG7LdR90VjW5tn8wnRCwDT-zUC0uteMKW01TMPTI,580
|
|
21
|
-
thordata/tools/ecommerce.py,sha256=u-s-RGMSAGifsMnyMrwtJ3yVDgu3n74bv8yyX6TbMNU,1560
|
|
22
|
-
thordata/tools/search.py,sha256=toWMOnnfQXgafyndHs23Yn049vpPlGPHdZA7SpiJJTE,1724
|
|
23
|
-
thordata/tools/social.py,sha256=VbujfbA5Man6Shsik4QYBpf9z2FJhhJkZLNKll09Ots,4886
|
|
24
|
-
thordata/tools/video.py,sha256=WikUOYPSVtHdrS0Z7VVexlUPyFZRv9v7cerkpzzO5jU,2549
|
|
25
|
-
thordata/types/__init__.py,sha256=hlLt5UCVm7QdeOCN5_YWXS4Vy8tJUhIp0XbWjAoQiQg,1357
|
|
26
|
-
thordata/types/common.py,sha256=hkTZ1QtokpE1yT9BvTmYfQz9AUjeCIIPvjib2pnq_Ag,2818
|
|
27
|
-
thordata/types/proxy.py,sha256=IU45wQHCBOIlbdcCN9veypAkDT0q9NIikLu674CudOU,10438
|
|
28
|
-
thordata/types/serp.py,sha256=NO52I1NprjVBgKQe4o2xEp82a3Oy9wCBYG-2Q0oegnU,5817
|
|
29
|
-
thordata/types/task.py,sha256=f5xGeH4BrE7sHIgWhRJuMr3iuPooxJlg7ztr8lwcSx8,4139
|
|
30
|
-
thordata/types/universal.py,sha256=Kw8lf_2ElXIfylsNfVosLE1MvlEQkryv4fWEaQw6ecg,2161
|
|
31
|
-
thordata_sdk-1.5.0.dist-info/licenses/LICENSE,sha256=bAxpWgQIzb-5jl3nhLdOwOJ_vlbHLtSG7yev2B7vioY,1088
|
|
32
|
-
thordata_sdk-1.5.0.dist-info/METADATA,sha256=VqsfaJsguO-KSMOjWjPodO1nIa510qpjNBdVzCMHshQ,7026
|
|
33
|
-
thordata_sdk-1.5.0.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
|
|
34
|
-
thordata_sdk-1.5.0.dist-info/top_level.txt,sha256=Z8R_07m0lXCCSb1hapL9_nxMtyO3rf_9wOvq4n9u2Hg,9
|
|
35
|
-
thordata_sdk-1.5.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|