thordata-sdk 1.5.0__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thordata/__init__.py +1 -1
- thordata/async_client.py +12 -7
- thordata/client.py +12 -7
- thordata/enums.py +2 -2
- thordata/exceptions.py +70 -19
- thordata/models.py +1 -1
- thordata/retry.py +1 -1
- thordata/tools/__init__.py +11 -1
- thordata/tools/code.py +17 -4
- thordata/tools/ecommerce.py +194 -10
- thordata/tools/professional.py +155 -0
- thordata/tools/search.py +47 -5
- thordata/tools/social.py +225 -41
- thordata/tools/travel.py +100 -0
- thordata/tools/video.py +80 -7
- thordata/types/task.py +16 -4
- {thordata_sdk-1.5.0.dist-info → thordata_sdk-1.6.0.dist-info}/METADATA +63 -7
- thordata_sdk-1.6.0.dist-info/RECORD +35 -0
- {thordata_sdk-1.5.0.dist-info → thordata_sdk-1.6.0.dist-info}/WHEEL +1 -1
- thordata/_example_utils.py +0 -77
- thordata/demo.py +0 -138
- thordata_sdk-1.5.0.dist-info/RECORD +0 -35
- {thordata_sdk-1.5.0.dist-info → thordata_sdk-1.6.0.dist-info}/licenses/LICENSE +0 -0
- {thordata_sdk-1.5.0.dist-info → thordata_sdk-1.6.0.dist-info}/top_level.txt +0 -0
thordata/tools/travel.py
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Travel & Real Estate Scraper Tools (Booking, Zillow, Airbnb)
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
|
|
9
|
+
from .base import ToolRequest
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Booking:
|
|
13
|
+
"""Namespace for Booking.com tools."""
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class HotelByUrl(ToolRequest):
|
|
17
|
+
"""Booking Hotel Information Scraper by URL"""
|
|
18
|
+
|
|
19
|
+
SPIDER_ID = "booking_hotellist_by-url"
|
|
20
|
+
SPIDER_NAME = "booking.com"
|
|
21
|
+
url: str
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Zillow:
|
|
25
|
+
"""Namespace for Zillow tools."""
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class PriceByUrl(ToolRequest):
|
|
29
|
+
"""Zillow Property Price History Information Scraper by URL"""
|
|
30
|
+
|
|
31
|
+
SPIDER_ID = "zillow_price_by-url"
|
|
32
|
+
SPIDER_NAME = "zillow.com"
|
|
33
|
+
url: str
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class ProductByUrl(ToolRequest):
|
|
37
|
+
"""Zillow Property Details Information Scraper by URL"""
|
|
38
|
+
|
|
39
|
+
SPIDER_ID = "zillow_product_by-url"
|
|
40
|
+
SPIDER_NAME = "zillow.com"
|
|
41
|
+
url: str
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class ProductByFilter(ToolRequest):
|
|
45
|
+
"""Zillow Property Details Information Scraper by Filter"""
|
|
46
|
+
|
|
47
|
+
SPIDER_ID = "zillow_product_by-filter"
|
|
48
|
+
SPIDER_NAME = "zillow.com"
|
|
49
|
+
keywords_location: str
|
|
50
|
+
listingCategory: str | None = None # For Rent, For Sale
|
|
51
|
+
HomeType: str | None = None # Houses
|
|
52
|
+
days_on_zillow: str | None = None # Any
|
|
53
|
+
maximum: int | None = None
|
|
54
|
+
|
|
55
|
+
@dataclass
|
|
56
|
+
class ProductByListUrl(ToolRequest):
|
|
57
|
+
"""Zillow Property Details Information Scraper by List URL"""
|
|
58
|
+
|
|
59
|
+
SPIDER_ID = "zillow_product_by-listurl"
|
|
60
|
+
SPIDER_NAME = "zillow.com"
|
|
61
|
+
url: str
|
|
62
|
+
maximum: int | None = None
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class Airbnb:
|
|
66
|
+
"""Namespace for Airbnb tools."""
|
|
67
|
+
|
|
68
|
+
@dataclass
|
|
69
|
+
class ProductBySearchUrl(ToolRequest):
|
|
70
|
+
"""Airbnb Properties Information Scraper by Search URL"""
|
|
71
|
+
|
|
72
|
+
SPIDER_ID = "airbnb_product_by-searchurl"
|
|
73
|
+
SPIDER_NAME = "airbnb.com"
|
|
74
|
+
searchurl: str
|
|
75
|
+
country: str | None = None
|
|
76
|
+
|
|
77
|
+
@dataclass
|
|
78
|
+
class ProductByLocation(ToolRequest):
|
|
79
|
+
"""Airbnb Properties Information Scraper by Location"""
|
|
80
|
+
|
|
81
|
+
SPIDER_ID = "airbnb_product_by-location"
|
|
82
|
+
SPIDER_NAME = "airbnb.com"
|
|
83
|
+
location: str
|
|
84
|
+
check_in: str | None = None
|
|
85
|
+
check_out: str | None = None
|
|
86
|
+
num_of_adults: str | None = None
|
|
87
|
+
num_of_children: str | None = None
|
|
88
|
+
num_of_infants: str | None = None
|
|
89
|
+
num_of_pets: str | None = None
|
|
90
|
+
country: str | None = None
|
|
91
|
+
currency: str | None = None
|
|
92
|
+
|
|
93
|
+
@dataclass
|
|
94
|
+
class ProductByUrl(ToolRequest):
|
|
95
|
+
"""Airbnb Properties Information Scraper by URL"""
|
|
96
|
+
|
|
97
|
+
SPIDER_ID = "airbnb_product_by-url"
|
|
98
|
+
SPIDER_NAME = "airbnb.com"
|
|
99
|
+
url: str
|
|
100
|
+
country: str | None = None
|
thordata/tools/video.py
CHANGED
|
@@ -46,14 +46,23 @@ class YouTube:
|
|
|
46
46
|
|
|
47
47
|
@dataclass
|
|
48
48
|
class Profile(VideoToolRequest):
|
|
49
|
-
"""YouTube Profile Scraper. Uses video_builder."""
|
|
49
|
+
"""YouTube Profile Scraper by Keyword. Uses video_builder."""
|
|
50
50
|
|
|
51
51
|
SPIDER_ID = "youtube_profiles_by-keyword"
|
|
52
52
|
SPIDER_NAME = "youtube.com"
|
|
53
53
|
|
|
54
|
-
|
|
54
|
+
keyword: str
|
|
55
55
|
page_turning: int = 1
|
|
56
|
-
|
|
56
|
+
common_settings: CommonSettings = field(default_factory=CommonSettings)
|
|
57
|
+
|
|
58
|
+
@dataclass
|
|
59
|
+
class ProfileByUrl(VideoToolRequest):
|
|
60
|
+
"""YouTube Profile Scraper by URL. Uses video_builder."""
|
|
61
|
+
|
|
62
|
+
SPIDER_ID = "youtube_profiles_by-url"
|
|
63
|
+
SPIDER_NAME = "youtube.com"
|
|
64
|
+
|
|
65
|
+
url: str # Channel URL
|
|
57
66
|
common_settings: CommonSettings = field(default_factory=CommonSettings)
|
|
58
67
|
|
|
59
68
|
@dataclass
|
|
@@ -69,13 +78,77 @@ class YouTube:
|
|
|
69
78
|
common_settings: CommonSettings = field(default_factory=CommonSettings)
|
|
70
79
|
|
|
71
80
|
@dataclass
|
|
72
|
-
class VideoInfo(
|
|
73
|
-
"""YouTube Video
|
|
81
|
+
class VideoInfo(VideoToolRequest):
|
|
82
|
+
"""YouTube Video Basic Information Scraper. Uses video_builder."""
|
|
83
|
+
|
|
84
|
+
SPIDER_ID = "youtube_product_by-id"
|
|
85
|
+
SPIDER_NAME = "youtube.com"
|
|
86
|
+
|
|
87
|
+
video_id: str
|
|
88
|
+
common_settings: CommonSettings = field(default_factory=CommonSettings)
|
|
89
|
+
|
|
90
|
+
@dataclass
|
|
91
|
+
class VideoPostByUrl(ToolRequest):
|
|
92
|
+
"""YouTube Video Post Scraper by URL. Uses standard builder."""
|
|
74
93
|
|
|
75
|
-
# Note: This one does NOT inherit from VideoToolRequest because it uses the standard builder
|
|
76
|
-
# and doesn't support common_settings in the same way.
|
|
77
94
|
SPIDER_ID = "youtube_video-post_by-url"
|
|
78
95
|
SPIDER_NAME = "youtube.com"
|
|
79
96
|
|
|
80
97
|
url: str # Channel Video URL
|
|
98
|
+
order_by: str | None = None
|
|
99
|
+
start_index: str | None = None
|
|
81
100
|
num_of_posts: str | None = None
|
|
101
|
+
|
|
102
|
+
@dataclass
|
|
103
|
+
class VideoPostBySearchFilters(ToolRequest):
|
|
104
|
+
"""YouTube Video Post Scraper by Search Filters. Uses standard builder."""
|
|
105
|
+
|
|
106
|
+
SPIDER_ID = "youtube_video-post_by-search-filters"
|
|
107
|
+
SPIDER_NAME = "youtube.com"
|
|
108
|
+
|
|
109
|
+
keyword_search: str
|
|
110
|
+
features: str | None = None
|
|
111
|
+
type: str | None = None # Videos
|
|
112
|
+
duration: str | None = None
|
|
113
|
+
upload_date: str | None = None
|
|
114
|
+
num_of_posts: str | None = None
|
|
115
|
+
|
|
116
|
+
@dataclass
|
|
117
|
+
class VideoPostByHashtag(ToolRequest):
|
|
118
|
+
"""YouTube Video Post Scraper by Hashtag. Uses standard builder."""
|
|
119
|
+
|
|
120
|
+
SPIDER_ID = "youtube_video-post_by-hashtag"
|
|
121
|
+
SPIDER_NAME = "youtube.com"
|
|
122
|
+
|
|
123
|
+
hashtag: str
|
|
124
|
+
num_of_posts: str | None = None
|
|
125
|
+
|
|
126
|
+
@dataclass
|
|
127
|
+
class VideoPostByPodcastUrl(ToolRequest):
|
|
128
|
+
"""YouTube Video Post Scraper by Podcast URL. Uses standard builder."""
|
|
129
|
+
|
|
130
|
+
SPIDER_ID = "youtube_video-post_by-podcast-url"
|
|
131
|
+
SPIDER_NAME = "youtube.com"
|
|
132
|
+
|
|
133
|
+
url: str # Playlist URL
|
|
134
|
+
num_of_posts: str | None = None
|
|
135
|
+
|
|
136
|
+
@dataclass
|
|
137
|
+
class VideoPostByKeyword(ToolRequest):
|
|
138
|
+
"""YouTube Video Post Scraper by Keyword. Uses standard builder."""
|
|
139
|
+
|
|
140
|
+
SPIDER_ID = "youtube_video-post_by-keyword"
|
|
141
|
+
SPIDER_NAME = "youtube.com"
|
|
142
|
+
|
|
143
|
+
keyword: str
|
|
144
|
+
num_of_posts: str | None = None
|
|
145
|
+
|
|
146
|
+
@dataclass
|
|
147
|
+
class VideoPostByExplore(ToolRequest):
|
|
148
|
+
"""YouTube Video Post Scraper by Explore URL. Uses standard builder."""
|
|
149
|
+
|
|
150
|
+
SPIDER_ID = "youtube_video-post_by-explore"
|
|
151
|
+
SPIDER_NAME = "youtube.com"
|
|
152
|
+
|
|
153
|
+
url: str
|
|
154
|
+
all_tabs: str | None = None
|
thordata/types/task.py
CHANGED
|
@@ -54,16 +54,22 @@ class ScraperTaskConfig(ThordataBaseConfig):
|
|
|
54
54
|
file_name: str
|
|
55
55
|
spider_id: str
|
|
56
56
|
spider_name: str
|
|
57
|
-
parameters: dict[str, Any]
|
|
57
|
+
parameters: dict[str, Any] | list[dict[str, Any]]
|
|
58
58
|
universal_params: dict[str, Any] | None = None
|
|
59
59
|
include_errors: bool = True
|
|
60
60
|
|
|
61
61
|
def to_payload(self) -> dict[str, Any]:
|
|
62
|
+
# Handle batch parameters: if list, use as is; if dict, wrap in list
|
|
63
|
+
if isinstance(self.parameters, list):
|
|
64
|
+
params_json = json.dumps(self.parameters)
|
|
65
|
+
else:
|
|
66
|
+
params_json = json.dumps([self.parameters])
|
|
67
|
+
|
|
62
68
|
payload: dict[str, Any] = {
|
|
63
69
|
"file_name": self.file_name,
|
|
64
70
|
"spider_id": self.spider_id,
|
|
65
71
|
"spider_name": self.spider_name,
|
|
66
|
-
"spider_parameters":
|
|
72
|
+
"spider_parameters": params_json,
|
|
67
73
|
"spider_errors": "true" if self.include_errors else "false",
|
|
68
74
|
}
|
|
69
75
|
if self.universal_params:
|
|
@@ -76,16 +82,22 @@ class VideoTaskConfig(ThordataBaseConfig):
|
|
|
76
82
|
file_name: str
|
|
77
83
|
spider_id: str
|
|
78
84
|
spider_name: str
|
|
79
|
-
parameters: dict[str, Any]
|
|
85
|
+
parameters: dict[str, Any] | list[dict[str, Any]]
|
|
80
86
|
common_settings: CommonSettings
|
|
81
87
|
include_errors: bool = True
|
|
82
88
|
|
|
83
89
|
def to_payload(self) -> dict[str, Any]:
|
|
90
|
+
# Handle batch parameters
|
|
91
|
+
if isinstance(self.parameters, list):
|
|
92
|
+
params_json = json.dumps(self.parameters)
|
|
93
|
+
else:
|
|
94
|
+
params_json = json.dumps([self.parameters])
|
|
95
|
+
|
|
84
96
|
payload: dict[str, Any] = {
|
|
85
97
|
"file_name": self.file_name,
|
|
86
98
|
"spider_id": self.spider_id,
|
|
87
99
|
"spider_name": self.spider_name,
|
|
88
|
-
"spider_parameters":
|
|
100
|
+
"spider_parameters": params_json,
|
|
89
101
|
"spider_errors": "true" if self.include_errors else "false",
|
|
90
102
|
# v2.0 Doc explicitly requires 'spider_universal' key for video tasks too sometimes,
|
|
91
103
|
# but usually it's passed as 'common_settings' or 'spider_universal'.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: thordata-sdk
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.6.0
|
|
4
4
|
Summary: The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network.
|
|
5
5
|
Author-email: Thordata Developer Team <support@thordata.com>
|
|
6
6
|
License: MIT
|
|
@@ -63,9 +63,9 @@ Dynamic: license-file
|
|
|
63
63
|
|
|
64
64
|
## 📖 Introduction
|
|
65
65
|
|
|
66
|
-
The **Thordata Python SDK v1.
|
|
66
|
+
The **Thordata Python SDK v1.6.0** is a production-ready wrapper for Thordata's AI data infrastructure. It is architected for high reliability, strict type safety, and maximum performance.
|
|
67
67
|
|
|
68
|
-
**Why v1.
|
|
68
|
+
**Why v1.6.0?**
|
|
69
69
|
* **🛡️ Bulletproof Networking**: Custom core handles `HTTP`, `HTTPS`, and `SOCKS5h` (Remote DNS) tunneling, solving common SSL/TLS handshake issues in complex network environments.
|
|
70
70
|
* **⚡ Async First**: First-class `asyncio` support with `aiohttp` for high-concurrency scraping (1000+ RPS).
|
|
71
71
|
* **🧩 100% API Coverage**: Every endpoint documented by Thordata (including Hourly Usage, Server Monitor, and Task Management) is implemented.
|
|
@@ -83,7 +83,7 @@ pip install thordata-sdk
|
|
|
83
83
|
|
|
84
84
|
## 🔐 Configuration
|
|
85
85
|
|
|
86
|
-
Set environment variables to avoid hardcoding credentials.
|
|
86
|
+
Set environment variables to avoid hardcoding credentials. **Full reference:** copy [.env.example](.env.example) to `.env` and fill in values.
|
|
87
87
|
|
|
88
88
|
```bash
|
|
89
89
|
# [Scraping APIs]
|
|
@@ -93,13 +93,19 @@ export THORDATA_SCRAPER_TOKEN="your_scraper_token"
|
|
|
93
93
|
export THORDATA_PUBLIC_TOKEN="your_public_token"
|
|
94
94
|
export THORDATA_PUBLIC_KEY="your_public_key"
|
|
95
95
|
|
|
96
|
-
# [Proxy
|
|
96
|
+
# [Proxy: Residential / Unlimited / Datacenter / Mobile / ISP]
|
|
97
97
|
export THORDATA_RESIDENTIAL_USERNAME="your_username"
|
|
98
98
|
export THORDATA_RESIDENTIAL_PASSWORD="your_password"
|
|
99
|
-
# Optional:
|
|
100
|
-
# export
|
|
99
|
+
# Optional: Unlimited (high-bandwidth) if your plan has separate credentials
|
|
100
|
+
# export THORDATA_UNLIMITED_USERNAME="..."
|
|
101
|
+
# export THORDATA_UNLIMITED_PASSWORD="..."
|
|
102
|
+
|
|
103
|
+
# Optional: Upstream proxy when behind firewall (e.g. Clash Verge port 7897)
|
|
104
|
+
# export THORDATA_UPSTREAM_PROXY="http://127.0.0.1:7897"
|
|
101
105
|
```
|
|
102
106
|
|
|
107
|
+
Default proxy port is **9999** (residential); other products use different ports (see `.env.example`).
|
|
108
|
+
|
|
103
109
|
---
|
|
104
110
|
|
|
105
111
|
## 🚀 Quick Start
|
|
@@ -199,6 +205,48 @@ if status == "finished":
|
|
|
199
205
|
print(f"Download: {data_url}")
|
|
200
206
|
```
|
|
201
207
|
|
|
208
|
+
### Web Scraper Tools (120+ Pre-built Tools)
|
|
209
|
+
|
|
210
|
+
Use pre-built tools for popular platforms. See [Tool Coverage Matrix](docs/TOOL_COVERAGE_MATRIX.md) for full list.
|
|
211
|
+
|
|
212
|
+
```python
|
|
213
|
+
from thordata import ThordataClient
|
|
214
|
+
from thordata.tools import Amazon, GoogleMaps, YouTube, TikTok, eBay, Walmart
|
|
215
|
+
|
|
216
|
+
client = ThordataClient()
|
|
217
|
+
|
|
218
|
+
# Amazon Product by ASIN
|
|
219
|
+
task_id = client.run_tool(Amazon.ProductByAsin(asin="B0BZYCJK89"))
|
|
220
|
+
|
|
221
|
+
# Google Maps by Place ID
|
|
222
|
+
task_id = client.run_tool(GoogleMaps.DetailsByPlaceId(place_id="ChIJPTacEpBQwokRKwIlDXelxkA"))
|
|
223
|
+
|
|
224
|
+
# YouTube Video Download
|
|
225
|
+
from thordata import CommonSettings
|
|
226
|
+
settings = CommonSettings(resolution="<=360p", video_codec="vp9")
|
|
227
|
+
task_id = client.run_tool(YouTube.VideoDownload(
|
|
228
|
+
url="https://www.youtube.com/watch?v=jNQXAC9IVRw",
|
|
229
|
+
common_settings=settings
|
|
230
|
+
))
|
|
231
|
+
|
|
232
|
+
# Wait and get results
|
|
233
|
+
status = client.wait_for_task(task_id, max_wait=300)
|
|
234
|
+
if status == "ready":
|
|
235
|
+
download_url = client.get_task_result(task_id)
|
|
236
|
+
print(f"Results: {download_url}")
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
**Available Platforms:**
|
|
240
|
+
- **E-Commerce**: Amazon, eBay, Walmart
|
|
241
|
+
- **Social Media**: TikTok, Instagram, Facebook, Twitter/X, Reddit, LinkedIn
|
|
242
|
+
- **Search**: Google Maps, Google Shopping, Google Play
|
|
243
|
+
- **Video**: YouTube (download, info, subtitles)
|
|
244
|
+
- **Code**: GitHub
|
|
245
|
+
- **Professional**: Indeed, Glassdoor, Crunchbase
|
|
246
|
+
- **Travel/Real Estate**: Booking, Airbnb, Zillow
|
|
247
|
+
|
|
248
|
+
See `examples/tools/` for more examples.
|
|
249
|
+
|
|
202
250
|
---
|
|
203
251
|
|
|
204
252
|
## 🛠️ Management APIs
|
|
@@ -226,6 +274,14 @@ monitor = client.unlimited.get_server_monitor(
|
|
|
226
274
|
|
|
227
275
|
---
|
|
228
276
|
|
|
277
|
+
## 🧪 Development & Testing
|
|
278
|
+
|
|
279
|
+
- **Full env reference**: Copy [.env.example](.env.example) to `.env` and fill in credentials.
|
|
280
|
+
- **Unit tests** (no network): `pytest` or `python -m coverage run -m pytest -p no:cov tests && python -m coverage report -m`
|
|
281
|
+
- **Integration tests** (live API/proxy): Set `THORDATA_INTEGRATION=true` in `.env`; optional `THORDATA_UPSTREAM_PROXY` (e.g. Clash) if behind a firewall. See [CONTRIBUTING.md](CONTRIBUTING.md#-testing-guidelines).
|
|
282
|
+
|
|
283
|
+
---
|
|
284
|
+
|
|
229
285
|
## 📄 License
|
|
230
286
|
|
|
231
287
|
MIT License. See [LICENSE](LICENSE) for details.
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
thordata/__init__.py,sha256=FMOku6d17GrFjiJlRhvkx-JmhLLD7VlaADLC3FP6hHg,2287
|
|
2
|
+
thordata/_utils.py,sha256=Acr_6sHgdZXU7SQozd6FEYTZV6iHw__nlhpBTDwb66U,4917
|
|
3
|
+
thordata/async_client.py,sha256=F_t5EeYUM8BYM9tOQb2lzrcO81whGfO1g53Qagxcyq8,39713
|
|
4
|
+
thordata/async_unlimited.py,sha256=kzTksFkN21rDM21Pwy3hcayjfyGYNGGyGR3fRLtZC6I,4510
|
|
5
|
+
thordata/client.py,sha256=fG7X9JpFS0HKlWZl_6R_Phzt_o2hV25rVUUyCXhioYM,56991
|
|
6
|
+
thordata/enums.py,sha256=dO5QWpPFLpYP2GfLAdoFtxMTemhGNdr_NPqBoYfSFkk,764
|
|
7
|
+
thordata/exceptions.py,sha256=foAtH5U2pLUXM6u1C_63AVVh4-afuwt5y5MO7jDF0s8,11585
|
|
8
|
+
thordata/models.py,sha256=7GshQklo5aqke_ZQ2QIXiz9Ac5v6IRtvjWIjsBKEq6A,853
|
|
9
|
+
thordata/retry.py,sha256=X6Sa5IIb5EWD5fUJjKyhvWJyWQGPVgxLB3-vKoWfa5Q,11453
|
|
10
|
+
thordata/serp_engines.py,sha256=iuMWncelcGOskCHXFzpcPMMTL5qfiLkazHB1uj3zpZo,5985
|
|
11
|
+
thordata/unlimited.py,sha256=RzrtwcotYlbOWuSLysDyI75IkMVL7ygdfE9HKNoe02M,6087
|
|
12
|
+
thordata/core/__init__.py,sha256=EFT6mZpSdec_7uFUpSpDDHVwbTxy314uxJC_uprR6J4,500
|
|
13
|
+
thordata/core/async_http_client.py,sha256=KKsmhXN6bWRTDFvqa0H-WRf4R-TWH8WSgpDBRv6TEvg,3052
|
|
14
|
+
thordata/core/http_client.py,sha256=8lSwclmVweM-Go1qMW36zYnMKAUT_9RyDdPF7qMS4-Y,2280
|
|
15
|
+
thordata/core/tunnel.py,sha256=rbM_4zGwY4FXqdxYmCOURQw2s1EuAWFBVBM-1joNjGI,8373
|
|
16
|
+
thordata/tools/__init__.py,sha256=_Sr042bW-OMMj-WruA93YeQ6FfeIXvWmHoHMAFQ72a8,840
|
|
17
|
+
thordata/tools/base.py,sha256=fHuCp53y8eB59DuCdA1wHcbMVmsd5ikL9KlT5m_jJn0,1006
|
|
18
|
+
thordata/tools/code.py,sha256=fGuLEn_CydIq79XgMw5-EJDcp-nq2fenWVp7hKpsRNw,930
|
|
19
|
+
thordata/tools/ecommerce.py,sha256=8iZ7f46CYovPDfAS3lZhRXpXEyJ9PSFBw9w99-Zw8Qs,6584
|
|
20
|
+
thordata/tools/professional.py,sha256=2RJ76Sx1seftFpwgD4VRfRinoo-HAqYZucTnuIdV4Kw,4350
|
|
21
|
+
thordata/tools/search.py,sha256=2HLQaYK6JiGvzOFF9or9ORXNrzv6nDQUaEt83YbqiQA,2903
|
|
22
|
+
thordata/tools/social.py,sha256=6gcj1GUWJvDALpBMeobohIn6yPVo-LsqDsuUroNpHG8,10465
|
|
23
|
+
thordata/tools/travel.py,sha256=vRJAU-uzFVvLQ5Tc58vp3CY7OPWd2lcWh_9MvWMc1fs,2725
|
|
24
|
+
thordata/tools/video.py,sha256=HUFqdue-dtWmTVlYtmf5ffzuYDIzw5l3wk3Vr7AXQW0,4689
|
|
25
|
+
thordata/types/__init__.py,sha256=hlLt5UCVm7QdeOCN5_YWXS4Vy8tJUhIp0XbWjAoQiQg,1357
|
|
26
|
+
thordata/types/common.py,sha256=hkTZ1QtokpE1yT9BvTmYfQz9AUjeCIIPvjib2pnq_Ag,2818
|
|
27
|
+
thordata/types/proxy.py,sha256=IU45wQHCBOIlbdcCN9veypAkDT0q9NIikLu674CudOU,10438
|
|
28
|
+
thordata/types/serp.py,sha256=NO52I1NprjVBgKQe4o2xEp82a3Oy9wCBYG-2Q0oegnU,5817
|
|
29
|
+
thordata/types/task.py,sha256=b9TzcFigWUJDsr2t1hvaDv_CU1xk2d2cMrthmwPn7VU,4602
|
|
30
|
+
thordata/types/universal.py,sha256=Kw8lf_2ElXIfylsNfVosLE1MvlEQkryv4fWEaQw6ecg,2161
|
|
31
|
+
thordata_sdk-1.6.0.dist-info/licenses/LICENSE,sha256=bAxpWgQIzb-5jl3nhLdOwOJ_vlbHLtSG7yev2B7vioY,1088
|
|
32
|
+
thordata_sdk-1.6.0.dist-info/METADATA,sha256=bBy6xzDLWZ9l5bGLu0Jh91X9GtYVjlKCtpp13OZchmU,9308
|
|
33
|
+
thordata_sdk-1.6.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
34
|
+
thordata_sdk-1.6.0.dist-info/top_level.txt,sha256=Z8R_07m0lXCCSb1hapL9_nxMtyO3rf_9wOvq4n9u2Hg,9
|
|
35
|
+
thordata_sdk-1.6.0.dist-info/RECORD,,
|
thordata/_example_utils.py
DELETED
|
@@ -1,77 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import json
|
|
4
|
-
import os
|
|
5
|
-
from collections.abc import Iterable
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
from typing import Any
|
|
8
|
-
|
|
9
|
-
try:
|
|
10
|
-
from dotenv import load_dotenv
|
|
11
|
-
except Exception: # pragma: no cover
|
|
12
|
-
load_dotenv = None
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def load_env() -> None:
|
|
16
|
-
"""Load .env from repo root if python-dotenv is installed."""
|
|
17
|
-
if load_dotenv is None:
|
|
18
|
-
return
|
|
19
|
-
repo_root = Path(__file__).resolve().parents[2]
|
|
20
|
-
load_dotenv(dotenv_path=repo_root / ".env")
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def env(name: str) -> str:
|
|
24
|
-
return (os.getenv(name) or "").strip()
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def skip_if_missing(required: Iterable[str], *, tip: str | None = None) -> bool:
|
|
28
|
-
missing = [k for k in required if not env(k)]
|
|
29
|
-
if not missing:
|
|
30
|
-
return False
|
|
31
|
-
print("Skipping live example: missing env:", ", ".join(missing))
|
|
32
|
-
if tip:
|
|
33
|
-
print(tip)
|
|
34
|
-
else:
|
|
35
|
-
print("Tip: copy .env.example to .env and fill values, then re-run.")
|
|
36
|
-
return True
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def parse_json_env(name: str, default: str = "{}") -> Any:
|
|
40
|
-
raw = env(name) or default
|
|
41
|
-
return json.loads(raw)
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
def normalize_task_parameters(raw: Any) -> dict[str, Any]:
|
|
45
|
-
"""Accept {..} or [{..}] and return a single dict for create_scraper_task(parameters=...)."""
|
|
46
|
-
if isinstance(raw, list):
|
|
47
|
-
if not raw:
|
|
48
|
-
raise ValueError("Task parameters JSON array must not be empty")
|
|
49
|
-
raw = raw[0]
|
|
50
|
-
if not isinstance(raw, dict):
|
|
51
|
-
raise ValueError("Task parameters must be a JSON object (or array of objects)")
|
|
52
|
-
return raw
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
def output_dir() -> Path:
|
|
56
|
-
"""Return output dir for examples; defaults to examples/output (ignored by git)."""
|
|
57
|
-
repo_root = Path(__file__).resolve().parents[2]
|
|
58
|
-
d = env("THORDATA_OUTPUT_DIR") or str(repo_root / "examples" / "output")
|
|
59
|
-
p = Path(d)
|
|
60
|
-
p.mkdir(parents=True, exist_ok=True)
|
|
61
|
-
return p
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
def write_text(filename: str, content: str) -> Path:
|
|
65
|
-
p = output_dir() / filename
|
|
66
|
-
p.write_text(content, encoding="utf-8", errors="replace")
|
|
67
|
-
return p
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
def write_json(filename: str, data: Any) -> Path:
|
|
71
|
-
p = output_dir() / filename
|
|
72
|
-
p.write_text(
|
|
73
|
-
json.dumps(data, ensure_ascii=False, indent=2),
|
|
74
|
-
encoding="utf-8",
|
|
75
|
-
errors="replace",
|
|
76
|
-
)
|
|
77
|
-
return p
|
thordata/demo.py
DELETED
|
@@ -1,138 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Unified demo entrypoint for the Thordata Python SDK.
|
|
3
|
-
|
|
4
|
-
This module runs the example scripts from the repository's `examples/` directory
|
|
5
|
-
using `runpy`, so it does not require `examples/` to be an importable package.
|
|
6
|
-
|
|
7
|
-
Usage:
|
|
8
|
-
python -m thordata.demo serp
|
|
9
|
-
python -m thordata.demo universal
|
|
10
|
-
python -m thordata.demo scraper
|
|
11
|
-
python -m thordata.demo concurrency
|
|
12
|
-
|
|
13
|
-
Notes:
|
|
14
|
-
- This entrypoint is primarily intended for repository usage (dev/demo).
|
|
15
|
-
- When installed from PyPI, the `examples/` directory is typically not included.
|
|
16
|
-
"""
|
|
17
|
-
|
|
18
|
-
from __future__ import annotations
|
|
19
|
-
|
|
20
|
-
import runpy
|
|
21
|
-
import sys
|
|
22
|
-
from pathlib import Path
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def _configure_stdio() -> None:
|
|
26
|
-
# Avoid UnicodeEncodeError on Windows consoles with legacy encodings.
|
|
27
|
-
if hasattr(sys.stdout, "reconfigure"):
|
|
28
|
-
sys.stdout.reconfigure(encoding="utf-8", errors="replace") # type: ignore[attr-defined]
|
|
29
|
-
if hasattr(sys.stderr, "reconfigure"):
|
|
30
|
-
sys.stderr.reconfigure(encoding="utf-8", errors="replace") # type: ignore[attr-defined]
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def _load_env() -> None:
|
|
34
|
-
# Optional .env support for local development
|
|
35
|
-
try:
|
|
36
|
-
from dotenv import load_dotenv
|
|
37
|
-
except ImportError:
|
|
38
|
-
return
|
|
39
|
-
load_dotenv()
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def _repo_root() -> Path:
|
|
43
|
-
"""
|
|
44
|
-
Resolve repository root based on src layout:
|
|
45
|
-
<repo>/src/thordata/demo.py -> parents[2] == <repo>
|
|
46
|
-
"""
|
|
47
|
-
return Path(__file__).resolve().parents[2]
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
def _examples_dir() -> Path:
|
|
51
|
-
return _repo_root() / "examples"
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def _demo_map() -> dict[str, Path]:
|
|
55
|
-
ex = _examples_dir()
|
|
56
|
-
return {
|
|
57
|
-
"serp": ex / "demo_serp_api.py",
|
|
58
|
-
"universal": ex / "demo_universal.py",
|
|
59
|
-
"scraper": ex / "demo_web_scraper_api.py",
|
|
60
|
-
"concurrency": ex / "async_high_concurrency.py",
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
def _usage() -> str:
|
|
65
|
-
names = ", ".join(sorted(_demo_map().keys()))
|
|
66
|
-
return f"Usage: python -m thordata.demo [{names}]"
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
def _run_demo(path: Path) -> int:
|
|
70
|
-
if not path.exists():
|
|
71
|
-
print(f"Error: demo script not found: {path}")
|
|
72
|
-
return 2
|
|
73
|
-
|
|
74
|
-
# Ensure examples dir is on sys.path (helpful if demo imports local helpers).
|
|
75
|
-
examples_dir = str(path.parent.resolve())
|
|
76
|
-
if examples_dir not in sys.path:
|
|
77
|
-
sys.path.insert(0, examples_dir)
|
|
78
|
-
|
|
79
|
-
try:
|
|
80
|
-
# Load without triggering `if __name__ == "__main__": ...`
|
|
81
|
-
ns = runpy.run_path(str(path), run_name="__thordata_demo__")
|
|
82
|
-
|
|
83
|
-
main_func = ns.get("main")
|
|
84
|
-
if callable(main_func):
|
|
85
|
-
return int(main_func()) # type: ignore[arg-type]
|
|
86
|
-
|
|
87
|
-
# Fallback: run as __main__ for scripts without main()
|
|
88
|
-
runpy.run_path(str(path), run_name="__main__")
|
|
89
|
-
return 0
|
|
90
|
-
|
|
91
|
-
except KeyboardInterrupt:
|
|
92
|
-
raise
|
|
93
|
-
except SystemExit as e:
|
|
94
|
-
# In case fallback run as __main__ triggered SystemExit
|
|
95
|
-
code = e.code
|
|
96
|
-
if code is None:
|
|
97
|
-
return 0
|
|
98
|
-
if isinstance(code, int):
|
|
99
|
-
return code
|
|
100
|
-
return 1
|
|
101
|
-
except Exception as e:
|
|
102
|
-
import traceback
|
|
103
|
-
|
|
104
|
-
print()
|
|
105
|
-
print("-" * 60)
|
|
106
|
-
print("[thordata.demo] The demo script raised an exception.")
|
|
107
|
-
print(f"[thordata.demo] Script: {path.name}")
|
|
108
|
-
print(f"[thordata.demo] Error: {type(e).__name__}: {e}")
|
|
109
|
-
print()
|
|
110
|
-
print("Note: This is a failure within the demo script itself,")
|
|
111
|
-
print(" not an issue with the thordata.demo entrypoint.")
|
|
112
|
-
print("-" * 60)
|
|
113
|
-
traceback.print_exc()
|
|
114
|
-
return 1
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
def main() -> int:
|
|
118
|
-
_configure_stdio()
|
|
119
|
-
_load_env()
|
|
120
|
-
|
|
121
|
-
if len(sys.argv) < 2:
|
|
122
|
-
print(_usage())
|
|
123
|
-
return 2
|
|
124
|
-
|
|
125
|
-
name = sys.argv[1].strip().lower()
|
|
126
|
-
mapping = _demo_map()
|
|
127
|
-
|
|
128
|
-
path = mapping.get(name)
|
|
129
|
-
if path is None:
|
|
130
|
-
print(f"Unknown demo: {name}")
|
|
131
|
-
print(_usage())
|
|
132
|
-
return 2
|
|
133
|
-
|
|
134
|
-
return _run_demo(path)
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
if __name__ == "__main__":
|
|
138
|
-
raise SystemExit(main())
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
thordata/__init__.py,sha256=-2bXx3LckBWrJ_E5HqFTOj7sm45AgrOnSWV4QN6f-7U,2287
|
|
2
|
-
thordata/_example_utils.py,sha256=T9QtVq9BHhubOShgtGp2GSusYYd-ZFUJFJAw7ubIsa4,2199
|
|
3
|
-
thordata/_utils.py,sha256=Acr_6sHgdZXU7SQozd6FEYTZV6iHw__nlhpBTDwb66U,4917
|
|
4
|
-
thordata/async_client.py,sha256=zN59ZQfFVCuAGnGcyj-C_S9MbHzb17QbUISm46n6gpY,39439
|
|
5
|
-
thordata/async_unlimited.py,sha256=kzTksFkN21rDM21Pwy3hcayjfyGYNGGyGR3fRLtZC6I,4510
|
|
6
|
-
thordata/client.py,sha256=eA6jav_aAw2CQdSyrg3P59rELKo13K5tHqmSjEw3L_8,56717
|
|
7
|
-
thordata/demo.py,sha256=DojJRFqUm9XAMBkjmk03WGeiUdLCbXguMIwtMOzfN6M,3822
|
|
8
|
-
thordata/enums.py,sha256=_pahGhcq9Eh2ptL_WiNU2WlqKrydV_6e4U9G4erV9-s,774
|
|
9
|
-
thordata/exceptions.py,sha256=P9czrxkFhT439DxW3LE5W-koS595ObH4-mAQOfaDM18,9976
|
|
10
|
-
thordata/models.py,sha256=wozvlpS-Uv1DgkM_CEKOvldQ2InicxhIN0QiezIXPE4,853
|
|
11
|
-
thordata/retry.py,sha256=5kRwULl3X68Nx8PlSzr9benfyCL0nRSpVQXrwjWr45M,11456
|
|
12
|
-
thordata/serp_engines.py,sha256=iuMWncelcGOskCHXFzpcPMMTL5qfiLkazHB1uj3zpZo,5985
|
|
13
|
-
thordata/unlimited.py,sha256=RzrtwcotYlbOWuSLysDyI75IkMVL7ygdfE9HKNoe02M,6087
|
|
14
|
-
thordata/core/__init__.py,sha256=EFT6mZpSdec_7uFUpSpDDHVwbTxy314uxJC_uprR6J4,500
|
|
15
|
-
thordata/core/async_http_client.py,sha256=KKsmhXN6bWRTDFvqa0H-WRf4R-TWH8WSgpDBRv6TEvg,3052
|
|
16
|
-
thordata/core/http_client.py,sha256=8lSwclmVweM-Go1qMW36zYnMKAUT_9RyDdPF7qMS4-Y,2280
|
|
17
|
-
thordata/core/tunnel.py,sha256=rbM_4zGwY4FXqdxYmCOURQw2s1EuAWFBVBM-1joNjGI,8373
|
|
18
|
-
thordata/tools/__init__.py,sha256=ROryBBlCfq9cydaKXEPtnevjhg6GdFioAjdnp2VTR0M,606
|
|
19
|
-
thordata/tools/base.py,sha256=fHuCp53y8eB59DuCdA1wHcbMVmsd5ikL9KlT5m_jJn0,1006
|
|
20
|
-
thordata/tools/code.py,sha256=opYMG7LdR90VjW5tn8wnRCwDT-zUC0uteMKW01TMPTI,580
|
|
21
|
-
thordata/tools/ecommerce.py,sha256=u-s-RGMSAGifsMnyMrwtJ3yVDgu3n74bv8yyX6TbMNU,1560
|
|
22
|
-
thordata/tools/search.py,sha256=toWMOnnfQXgafyndHs23Yn049vpPlGPHdZA7SpiJJTE,1724
|
|
23
|
-
thordata/tools/social.py,sha256=VbujfbA5Man6Shsik4QYBpf9z2FJhhJkZLNKll09Ots,4886
|
|
24
|
-
thordata/tools/video.py,sha256=WikUOYPSVtHdrS0Z7VVexlUPyFZRv9v7cerkpzzO5jU,2549
|
|
25
|
-
thordata/types/__init__.py,sha256=hlLt5UCVm7QdeOCN5_YWXS4Vy8tJUhIp0XbWjAoQiQg,1357
|
|
26
|
-
thordata/types/common.py,sha256=hkTZ1QtokpE1yT9BvTmYfQz9AUjeCIIPvjib2pnq_Ag,2818
|
|
27
|
-
thordata/types/proxy.py,sha256=IU45wQHCBOIlbdcCN9veypAkDT0q9NIikLu674CudOU,10438
|
|
28
|
-
thordata/types/serp.py,sha256=NO52I1NprjVBgKQe4o2xEp82a3Oy9wCBYG-2Q0oegnU,5817
|
|
29
|
-
thordata/types/task.py,sha256=f5xGeH4BrE7sHIgWhRJuMr3iuPooxJlg7ztr8lwcSx8,4139
|
|
30
|
-
thordata/types/universal.py,sha256=Kw8lf_2ElXIfylsNfVosLE1MvlEQkryv4fWEaQw6ecg,2161
|
|
31
|
-
thordata_sdk-1.5.0.dist-info/licenses/LICENSE,sha256=bAxpWgQIzb-5jl3nhLdOwOJ_vlbHLtSG7yev2B7vioY,1088
|
|
32
|
-
thordata_sdk-1.5.0.dist-info/METADATA,sha256=VqsfaJsguO-KSMOjWjPodO1nIa510qpjNBdVzCMHshQ,7026
|
|
33
|
-
thordata_sdk-1.5.0.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
|
|
34
|
-
thordata_sdk-1.5.0.dist-info/top_level.txt,sha256=Z8R_07m0lXCCSb1hapL9_nxMtyO3rf_9wOvq4n9u2Hg,9
|
|
35
|
-
thordata_sdk-1.5.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|