thordata-sdk 1.5.0__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,100 @@
1
+ """
2
+ Travel & Real Estate Scraper Tools (Booking, Zillow, Airbnb)
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from dataclasses import dataclass
8
+
9
+ from .base import ToolRequest
10
+
11
+
12
+ class Booking:
13
+ """Namespace for Booking.com tools."""
14
+
15
+ @dataclass
16
+ class HotelByUrl(ToolRequest):
17
+ """Booking Hotel Information Scraper by URL"""
18
+
19
+ SPIDER_ID = "booking_hotellist_by-url"
20
+ SPIDER_NAME = "booking.com"
21
+ url: str
22
+
23
+
24
+ class Zillow:
25
+ """Namespace for Zillow tools."""
26
+
27
+ @dataclass
28
+ class PriceByUrl(ToolRequest):
29
+ """Zillow Property Price History Information Scraper by URL"""
30
+
31
+ SPIDER_ID = "zillow_price_by-url"
32
+ SPIDER_NAME = "zillow.com"
33
+ url: str
34
+
35
+ @dataclass
36
+ class ProductByUrl(ToolRequest):
37
+ """Zillow Property Details Information Scraper by URL"""
38
+
39
+ SPIDER_ID = "zillow_product_by-url"
40
+ SPIDER_NAME = "zillow.com"
41
+ url: str
42
+
43
+ @dataclass
44
+ class ProductByFilter(ToolRequest):
45
+ """Zillow Property Details Information Scraper by Filter"""
46
+
47
+ SPIDER_ID = "zillow_product_by-filter"
48
+ SPIDER_NAME = "zillow.com"
49
+ keywords_location: str
50
+ listingCategory: str | None = None # For Rent, For Sale
51
+ HomeType: str | None = None # Houses
52
+ days_on_zillow: str | None = None # Any
53
+ maximum: int | None = None
54
+
55
+ @dataclass
56
+ class ProductByListUrl(ToolRequest):
57
+ """Zillow Property Details Information Scraper by List URL"""
58
+
59
+ SPIDER_ID = "zillow_product_by-listurl"
60
+ SPIDER_NAME = "zillow.com"
61
+ url: str
62
+ maximum: int | None = None
63
+
64
+
65
+ class Airbnb:
66
+ """Namespace for Airbnb tools."""
67
+
68
+ @dataclass
69
+ class ProductBySearchUrl(ToolRequest):
70
+ """Airbnb Properties Information Scraper by Search URL"""
71
+
72
+ SPIDER_ID = "airbnb_product_by-searchurl"
73
+ SPIDER_NAME = "airbnb.com"
74
+ searchurl: str
75
+ country: str | None = None
76
+
77
+ @dataclass
78
+ class ProductByLocation(ToolRequest):
79
+ """Airbnb Properties Information Scraper by Location"""
80
+
81
+ SPIDER_ID = "airbnb_product_by-location"
82
+ SPIDER_NAME = "airbnb.com"
83
+ location: str
84
+ check_in: str | None = None
85
+ check_out: str | None = None
86
+ num_of_adults: str | None = None
87
+ num_of_children: str | None = None
88
+ num_of_infants: str | None = None
89
+ num_of_pets: str | None = None
90
+ country: str | None = None
91
+ currency: str | None = None
92
+
93
+ @dataclass
94
+ class ProductByUrl(ToolRequest):
95
+ """Airbnb Properties Information Scraper by URL"""
96
+
97
+ SPIDER_ID = "airbnb_product_by-url"
98
+ SPIDER_NAME = "airbnb.com"
99
+ url: str
100
+ country: str | None = None
thordata/tools/video.py CHANGED
@@ -46,14 +46,23 @@ class YouTube:
46
46
 
47
47
  @dataclass
48
48
  class Profile(VideoToolRequest):
49
- """YouTube Profile Scraper. Uses video_builder."""
49
+ """YouTube Profile Scraper by Keyword. Uses video_builder."""
50
50
 
51
51
  SPIDER_ID = "youtube_profiles_by-keyword"
52
52
  SPIDER_NAME = "youtube.com"
53
53
 
54
- url: str # Channel URL
54
+ keyword: str
55
55
  page_turning: int = 1
56
- keyword: str | None = None
56
+ common_settings: CommonSettings = field(default_factory=CommonSettings)
57
+
58
+ @dataclass
59
+ class ProfileByUrl(VideoToolRequest):
60
+ """YouTube Profile Scraper by URL. Uses video_builder."""
61
+
62
+ SPIDER_ID = "youtube_profiles_by-url"
63
+ SPIDER_NAME = "youtube.com"
64
+
65
+ url: str # Channel URL
57
66
  common_settings: CommonSettings = field(default_factory=CommonSettings)
58
67
 
59
68
  @dataclass
@@ -69,13 +78,77 @@ class YouTube:
69
78
  common_settings: CommonSettings = field(default_factory=CommonSettings)
70
79
 
71
80
  @dataclass
72
- class VideoInfo(ToolRequest):
73
- """YouTube Video Post Scraper (Metadata only). Standard builder."""
81
+ class VideoInfo(VideoToolRequest):
82
+ """YouTube Video Basic Information Scraper. Uses video_builder."""
83
+
84
+ SPIDER_ID = "youtube_product_by-id"
85
+ SPIDER_NAME = "youtube.com"
86
+
87
+ video_id: str
88
+ common_settings: CommonSettings = field(default_factory=CommonSettings)
89
+
90
+ @dataclass
91
+ class VideoPostByUrl(ToolRequest):
92
+ """YouTube Video Post Scraper by URL. Uses standard builder."""
74
93
 
75
- # Note: This one does NOT inherit from VideoToolRequest because it uses the standard builder
76
- # and doesn't support common_settings in the same way.
77
94
  SPIDER_ID = "youtube_video-post_by-url"
78
95
  SPIDER_NAME = "youtube.com"
79
96
 
80
97
  url: str # Channel Video URL
98
+ order_by: str | None = None
99
+ start_index: str | None = None
81
100
  num_of_posts: str | None = None
101
+
102
+ @dataclass
103
+ class VideoPostBySearchFilters(ToolRequest):
104
+ """YouTube Video Post Scraper by Search Filters. Uses standard builder."""
105
+
106
+ SPIDER_ID = "youtube_video-post_by-search-filters"
107
+ SPIDER_NAME = "youtube.com"
108
+
109
+ keyword_search: str
110
+ features: str | None = None
111
+ type: str | None = None # Videos
112
+ duration: str | None = None
113
+ upload_date: str | None = None
114
+ num_of_posts: str | None = None
115
+
116
+ @dataclass
117
+ class VideoPostByHashtag(ToolRequest):
118
+ """YouTube Video Post Scraper by Hashtag. Uses standard builder."""
119
+
120
+ SPIDER_ID = "youtube_video-post_by-hashtag"
121
+ SPIDER_NAME = "youtube.com"
122
+
123
+ hashtag: str
124
+ num_of_posts: str | None = None
125
+
126
+ @dataclass
127
+ class VideoPostByPodcastUrl(ToolRequest):
128
+ """YouTube Video Post Scraper by Podcast URL. Uses standard builder."""
129
+
130
+ SPIDER_ID = "youtube_video-post_by-podcast-url"
131
+ SPIDER_NAME = "youtube.com"
132
+
133
+ url: str # Playlist URL
134
+ num_of_posts: str | None = None
135
+
136
+ @dataclass
137
+ class VideoPostByKeyword(ToolRequest):
138
+ """YouTube Video Post Scraper by Keyword. Uses standard builder."""
139
+
140
+ SPIDER_ID = "youtube_video-post_by-keyword"
141
+ SPIDER_NAME = "youtube.com"
142
+
143
+ keyword: str
144
+ num_of_posts: str | None = None
145
+
146
+ @dataclass
147
+ class VideoPostByExplore(ToolRequest):
148
+ """YouTube Video Post Scraper by Explore URL. Uses standard builder."""
149
+
150
+ SPIDER_ID = "youtube_video-post_by-explore"
151
+ SPIDER_NAME = "youtube.com"
152
+
153
+ url: str
154
+ all_tabs: str | None = None
thordata/types/task.py CHANGED
@@ -54,16 +54,22 @@ class ScraperTaskConfig(ThordataBaseConfig):
54
54
  file_name: str
55
55
  spider_id: str
56
56
  spider_name: str
57
- parameters: dict[str, Any]
57
+ parameters: dict[str, Any] | list[dict[str, Any]]
58
58
  universal_params: dict[str, Any] | None = None
59
59
  include_errors: bool = True
60
60
 
61
61
  def to_payload(self) -> dict[str, Any]:
62
+ # Handle batch parameters: if list, use as is; if dict, wrap in list
63
+ if isinstance(self.parameters, list):
64
+ params_json = json.dumps(self.parameters)
65
+ else:
66
+ params_json = json.dumps([self.parameters])
67
+
62
68
  payload: dict[str, Any] = {
63
69
  "file_name": self.file_name,
64
70
  "spider_id": self.spider_id,
65
71
  "spider_name": self.spider_name,
66
- "spider_parameters": json.dumps([self.parameters]),
72
+ "spider_parameters": params_json,
67
73
  "spider_errors": "true" if self.include_errors else "false",
68
74
  }
69
75
  if self.universal_params:
@@ -76,16 +82,22 @@ class VideoTaskConfig(ThordataBaseConfig):
76
82
  file_name: str
77
83
  spider_id: str
78
84
  spider_name: str
79
- parameters: dict[str, Any]
85
+ parameters: dict[str, Any] | list[dict[str, Any]]
80
86
  common_settings: CommonSettings
81
87
  include_errors: bool = True
82
88
 
83
89
  def to_payload(self) -> dict[str, Any]:
90
+ # Handle batch parameters
91
+ if isinstance(self.parameters, list):
92
+ params_json = json.dumps(self.parameters)
93
+ else:
94
+ params_json = json.dumps([self.parameters])
95
+
84
96
  payload: dict[str, Any] = {
85
97
  "file_name": self.file_name,
86
98
  "spider_id": self.spider_id,
87
99
  "spider_name": self.spider_name,
88
- "spider_parameters": json.dumps([self.parameters]),
100
+ "spider_parameters": params_json,
89
101
  "spider_errors": "true" if self.include_errors else "false",
90
102
  # v2.0 Doc explicitly requires 'spider_universal' key for video tasks too sometimes,
91
103
  # but usually it's passed as 'common_settings' or 'spider_universal'.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: thordata-sdk
3
- Version: 1.5.0
3
+ Version: 1.6.0
4
4
  Summary: The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network.
5
5
  Author-email: Thordata Developer Team <support@thordata.com>
6
6
  License: MIT
@@ -63,9 +63,9 @@ Dynamic: license-file
63
63
 
64
64
  ## 📖 Introduction
65
65
 
66
- The **Thordata Python SDK v1.5.0** is a production-ready wrapper for Thordata's AI data infrastructure. It is architected for high reliability, strict type safety, and maximum performance.
66
+ The **Thordata Python SDK v1.6.0** is a production-ready wrapper for Thordata's AI data infrastructure. It is architected for high reliability, strict type safety, and maximum performance.
67
67
 
68
- **Why v1.5.0?**
68
+ **Why v1.6.0?**
69
69
  * **🛡️ Bulletproof Networking**: Custom core handles `HTTP`, `HTTPS`, and `SOCKS5h` (Remote DNS) tunneling, solving common SSL/TLS handshake issues in complex network environments.
70
70
  * **⚡ Async First**: First-class `asyncio` support with `aiohttp` for high-concurrency scraping (1000+ RPS).
71
71
  * **🧩 100% API Coverage**: Every endpoint documented by Thordata (including Hourly Usage, Server Monitor, and Task Management) is implemented.
@@ -83,7 +83,7 @@ pip install thordata-sdk
83
83
 
84
84
  ## 🔐 Configuration
85
85
 
86
- Set environment variables to avoid hardcoding credentials.
86
+ Set environment variables to avoid hardcoding credentials. **Full reference:** copy [.env.example](.env.example) to `.env` and fill in values.
87
87
 
88
88
  ```bash
89
89
  # [Scraping APIs]
@@ -93,13 +93,19 @@ export THORDATA_SCRAPER_TOKEN="your_scraper_token"
93
93
  export THORDATA_PUBLIC_TOKEN="your_public_token"
94
94
  export THORDATA_PUBLIC_KEY="your_public_key"
95
95
 
96
- # [Proxy Network]
96
+ # [Proxy: Residential / Unlimited / Datacenter / Mobile / ISP]
97
97
  export THORDATA_RESIDENTIAL_USERNAME="your_username"
98
98
  export THORDATA_RESIDENTIAL_PASSWORD="your_password"
99
- # Optional: Set upstream proxy for local dev (e.g., Clash)
100
- # export THORDATA_UPSTREAM_PROXY="http://127.0.0.1:7890"
99
+ # Optional: Unlimited (high-bandwidth) if your plan has separate credentials
100
+ # export THORDATA_UNLIMITED_USERNAME="..."
101
+ # export THORDATA_UNLIMITED_PASSWORD="..."
102
+
103
+ # Optional: Upstream proxy when behind firewall (e.g. Clash Verge port 7897)
104
+ # export THORDATA_UPSTREAM_PROXY="http://127.0.0.1:7897"
101
105
  ```
102
106
 
107
+ Default proxy port is **9999** (residential); other products use different ports (see `.env.example`).
108
+
103
109
  ---
104
110
 
105
111
  ## 🚀 Quick Start
@@ -199,6 +205,48 @@ if status == "finished":
199
205
  print(f"Download: {data_url}")
200
206
  ```
201
207
 
208
+ ### Web Scraper Tools (120+ Pre-built Tools)
209
+
210
+ Use pre-built tools for popular platforms. See [Tool Coverage Matrix](docs/TOOL_COVERAGE_MATRIX.md) for full list.
211
+
212
+ ```python
213
+ from thordata import ThordataClient
214
+ from thordata.tools import Amazon, GoogleMaps, YouTube, TikTok, eBay, Walmart
215
+
216
+ client = ThordataClient()
217
+
218
+ # Amazon Product by ASIN
219
+ task_id = client.run_tool(Amazon.ProductByAsin(asin="B0BZYCJK89"))
220
+
221
+ # Google Maps by Place ID
222
+ task_id = client.run_tool(GoogleMaps.DetailsByPlaceId(place_id="ChIJPTacEpBQwokRKwIlDXelxkA"))
223
+
224
+ # YouTube Video Download
225
+ from thordata import CommonSettings
226
+ settings = CommonSettings(resolution="<=360p", video_codec="vp9")
227
+ task_id = client.run_tool(YouTube.VideoDownload(
228
+ url="https://www.youtube.com/watch?v=jNQXAC9IVRw",
229
+ common_settings=settings
230
+ ))
231
+
232
+ # Wait and get results
233
+ status = client.wait_for_task(task_id, max_wait=300)
234
+ if status == "ready":
235
+ download_url = client.get_task_result(task_id)
236
+ print(f"Results: {download_url}")
237
+ ```
238
+
239
+ **Available Platforms:**
240
+ - **E-Commerce**: Amazon, eBay, Walmart
241
+ - **Social Media**: TikTok, Instagram, Facebook, Twitter/X, Reddit, LinkedIn
242
+ - **Search**: Google Maps, Google Shopping, Google Play
243
+ - **Video**: YouTube (download, info, subtitles)
244
+ - **Code**: GitHub
245
+ - **Professional**: Indeed, Glassdoor, Crunchbase
246
+ - **Travel/Real Estate**: Booking, Airbnb, Zillow
247
+
248
+ See `examples/tools/` for more examples.
249
+
202
250
  ---
203
251
 
204
252
  ## 🛠️ Management APIs
@@ -226,6 +274,14 @@ monitor = client.unlimited.get_server_monitor(
226
274
 
227
275
  ---
228
276
 
277
+ ## 🧪 Development & Testing
278
+
279
+ - **Full env reference**: Copy [.env.example](.env.example) to `.env` and fill in credentials.
280
+ - **Unit tests** (no network): `pytest` or `python -m coverage run -m pytest -p no:cov tests && python -m coverage report -m`
281
+ - **Integration tests** (live API/proxy): Set `THORDATA_INTEGRATION=true` in `.env`; optional `THORDATA_UPSTREAM_PROXY` (e.g. Clash) if behind a firewall. See [CONTRIBUTING.md](CONTRIBUTING.md#-testing-guidelines).
282
+
283
+ ---
284
+
229
285
  ## 📄 License
230
286
 
231
287
  MIT License. See [LICENSE](LICENSE) for details.
@@ -0,0 +1,35 @@
1
+ thordata/__init__.py,sha256=FMOku6d17GrFjiJlRhvkx-JmhLLD7VlaADLC3FP6hHg,2287
2
+ thordata/_utils.py,sha256=Acr_6sHgdZXU7SQozd6FEYTZV6iHw__nlhpBTDwb66U,4917
3
+ thordata/async_client.py,sha256=F_t5EeYUM8BYM9tOQb2lzrcO81whGfO1g53Qagxcyq8,39713
4
+ thordata/async_unlimited.py,sha256=kzTksFkN21rDM21Pwy3hcayjfyGYNGGyGR3fRLtZC6I,4510
5
+ thordata/client.py,sha256=fG7X9JpFS0HKlWZl_6R_Phzt_o2hV25rVUUyCXhioYM,56991
6
+ thordata/enums.py,sha256=dO5QWpPFLpYP2GfLAdoFtxMTemhGNdr_NPqBoYfSFkk,764
7
+ thordata/exceptions.py,sha256=foAtH5U2pLUXM6u1C_63AVVh4-afuwt5y5MO7jDF0s8,11585
8
+ thordata/models.py,sha256=7GshQklo5aqke_ZQ2QIXiz9Ac5v6IRtvjWIjsBKEq6A,853
9
+ thordata/retry.py,sha256=X6Sa5IIb5EWD5fUJjKyhvWJyWQGPVgxLB3-vKoWfa5Q,11453
10
+ thordata/serp_engines.py,sha256=iuMWncelcGOskCHXFzpcPMMTL5qfiLkazHB1uj3zpZo,5985
11
+ thordata/unlimited.py,sha256=RzrtwcotYlbOWuSLysDyI75IkMVL7ygdfE9HKNoe02M,6087
12
+ thordata/core/__init__.py,sha256=EFT6mZpSdec_7uFUpSpDDHVwbTxy314uxJC_uprR6J4,500
13
+ thordata/core/async_http_client.py,sha256=KKsmhXN6bWRTDFvqa0H-WRf4R-TWH8WSgpDBRv6TEvg,3052
14
+ thordata/core/http_client.py,sha256=8lSwclmVweM-Go1qMW36zYnMKAUT_9RyDdPF7qMS4-Y,2280
15
+ thordata/core/tunnel.py,sha256=rbM_4zGwY4FXqdxYmCOURQw2s1EuAWFBVBM-1joNjGI,8373
16
+ thordata/tools/__init__.py,sha256=_Sr042bW-OMMj-WruA93YeQ6FfeIXvWmHoHMAFQ72a8,840
17
+ thordata/tools/base.py,sha256=fHuCp53y8eB59DuCdA1wHcbMVmsd5ikL9KlT5m_jJn0,1006
18
+ thordata/tools/code.py,sha256=fGuLEn_CydIq79XgMw5-EJDcp-nq2fenWVp7hKpsRNw,930
19
+ thordata/tools/ecommerce.py,sha256=8iZ7f46CYovPDfAS3lZhRXpXEyJ9PSFBw9w99-Zw8Qs,6584
20
+ thordata/tools/professional.py,sha256=2RJ76Sx1seftFpwgD4VRfRinoo-HAqYZucTnuIdV4Kw,4350
21
+ thordata/tools/search.py,sha256=2HLQaYK6JiGvzOFF9or9ORXNrzv6nDQUaEt83YbqiQA,2903
22
+ thordata/tools/social.py,sha256=6gcj1GUWJvDALpBMeobohIn6yPVo-LsqDsuUroNpHG8,10465
23
+ thordata/tools/travel.py,sha256=vRJAU-uzFVvLQ5Tc58vp3CY7OPWd2lcWh_9MvWMc1fs,2725
24
+ thordata/tools/video.py,sha256=HUFqdue-dtWmTVlYtmf5ffzuYDIzw5l3wk3Vr7AXQW0,4689
25
+ thordata/types/__init__.py,sha256=hlLt5UCVm7QdeOCN5_YWXS4Vy8tJUhIp0XbWjAoQiQg,1357
26
+ thordata/types/common.py,sha256=hkTZ1QtokpE1yT9BvTmYfQz9AUjeCIIPvjib2pnq_Ag,2818
27
+ thordata/types/proxy.py,sha256=IU45wQHCBOIlbdcCN9veypAkDT0q9NIikLu674CudOU,10438
28
+ thordata/types/serp.py,sha256=NO52I1NprjVBgKQe4o2xEp82a3Oy9wCBYG-2Q0oegnU,5817
29
+ thordata/types/task.py,sha256=b9TzcFigWUJDsr2t1hvaDv_CU1xk2d2cMrthmwPn7VU,4602
30
+ thordata/types/universal.py,sha256=Kw8lf_2ElXIfylsNfVosLE1MvlEQkryv4fWEaQw6ecg,2161
31
+ thordata_sdk-1.6.0.dist-info/licenses/LICENSE,sha256=bAxpWgQIzb-5jl3nhLdOwOJ_vlbHLtSG7yev2B7vioY,1088
32
+ thordata_sdk-1.6.0.dist-info/METADATA,sha256=bBy6xzDLWZ9l5bGLu0Jh91X9GtYVjlKCtpp13OZchmU,9308
33
+ thordata_sdk-1.6.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
34
+ thordata_sdk-1.6.0.dist-info/top_level.txt,sha256=Z8R_07m0lXCCSb1hapL9_nxMtyO3rf_9wOvq4n9u2Hg,9
35
+ thordata_sdk-1.6.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.10.1)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,77 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import json
4
- import os
5
- from collections.abc import Iterable
6
- from pathlib import Path
7
- from typing import Any
8
-
9
- try:
10
- from dotenv import load_dotenv
11
- except Exception: # pragma: no cover
12
- load_dotenv = None
13
-
14
-
15
- def load_env() -> None:
16
- """Load .env from repo root if python-dotenv is installed."""
17
- if load_dotenv is None:
18
- return
19
- repo_root = Path(__file__).resolve().parents[2]
20
- load_dotenv(dotenv_path=repo_root / ".env")
21
-
22
-
23
- def env(name: str) -> str:
24
- return (os.getenv(name) or "").strip()
25
-
26
-
27
- def skip_if_missing(required: Iterable[str], *, tip: str | None = None) -> bool:
28
- missing = [k for k in required if not env(k)]
29
- if not missing:
30
- return False
31
- print("Skipping live example: missing env:", ", ".join(missing))
32
- if tip:
33
- print(tip)
34
- else:
35
- print("Tip: copy .env.example to .env and fill values, then re-run.")
36
- return True
37
-
38
-
39
- def parse_json_env(name: str, default: str = "{}") -> Any:
40
- raw = env(name) or default
41
- return json.loads(raw)
42
-
43
-
44
- def normalize_task_parameters(raw: Any) -> dict[str, Any]:
45
- """Accept {..} or [{..}] and return a single dict for create_scraper_task(parameters=...)."""
46
- if isinstance(raw, list):
47
- if not raw:
48
- raise ValueError("Task parameters JSON array must not be empty")
49
- raw = raw[0]
50
- if not isinstance(raw, dict):
51
- raise ValueError("Task parameters must be a JSON object (or array of objects)")
52
- return raw
53
-
54
-
55
- def output_dir() -> Path:
56
- """Return output dir for examples; defaults to examples/output (ignored by git)."""
57
- repo_root = Path(__file__).resolve().parents[2]
58
- d = env("THORDATA_OUTPUT_DIR") or str(repo_root / "examples" / "output")
59
- p = Path(d)
60
- p.mkdir(parents=True, exist_ok=True)
61
- return p
62
-
63
-
64
- def write_text(filename: str, content: str) -> Path:
65
- p = output_dir() / filename
66
- p.write_text(content, encoding="utf-8", errors="replace")
67
- return p
68
-
69
-
70
- def write_json(filename: str, data: Any) -> Path:
71
- p = output_dir() / filename
72
- p.write_text(
73
- json.dumps(data, ensure_ascii=False, indent=2),
74
- encoding="utf-8",
75
- errors="replace",
76
- )
77
- return p
thordata/demo.py DELETED
@@ -1,138 +0,0 @@
1
- """
2
- Unified demo entrypoint for the Thordata Python SDK.
3
-
4
- This module runs the example scripts from the repository's `examples/` directory
5
- using `runpy`, so it does not require `examples/` to be an importable package.
6
-
7
- Usage:
8
- python -m thordata.demo serp
9
- python -m thordata.demo universal
10
- python -m thordata.demo scraper
11
- python -m thordata.demo concurrency
12
-
13
- Notes:
14
- - This entrypoint is primarily intended for repository usage (dev/demo).
15
- - When installed from PyPI, the `examples/` directory is typically not included.
16
- """
17
-
18
- from __future__ import annotations
19
-
20
- import runpy
21
- import sys
22
- from pathlib import Path
23
-
24
-
25
- def _configure_stdio() -> None:
26
- # Avoid UnicodeEncodeError on Windows consoles with legacy encodings.
27
- if hasattr(sys.stdout, "reconfigure"):
28
- sys.stdout.reconfigure(encoding="utf-8", errors="replace") # type: ignore[attr-defined]
29
- if hasattr(sys.stderr, "reconfigure"):
30
- sys.stderr.reconfigure(encoding="utf-8", errors="replace") # type: ignore[attr-defined]
31
-
32
-
33
- def _load_env() -> None:
34
- # Optional .env support for local development
35
- try:
36
- from dotenv import load_dotenv
37
- except ImportError:
38
- return
39
- load_dotenv()
40
-
41
-
42
- def _repo_root() -> Path:
43
- """
44
- Resolve repository root based on src layout:
45
- <repo>/src/thordata/demo.py -> parents[2] == <repo>
46
- """
47
- return Path(__file__).resolve().parents[2]
48
-
49
-
50
- def _examples_dir() -> Path:
51
- return _repo_root() / "examples"
52
-
53
-
54
- def _demo_map() -> dict[str, Path]:
55
- ex = _examples_dir()
56
- return {
57
- "serp": ex / "demo_serp_api.py",
58
- "universal": ex / "demo_universal.py",
59
- "scraper": ex / "demo_web_scraper_api.py",
60
- "concurrency": ex / "async_high_concurrency.py",
61
- }
62
-
63
-
64
- def _usage() -> str:
65
- names = ", ".join(sorted(_demo_map().keys()))
66
- return f"Usage: python -m thordata.demo [{names}]"
67
-
68
-
69
- def _run_demo(path: Path) -> int:
70
- if not path.exists():
71
- print(f"Error: demo script not found: {path}")
72
- return 2
73
-
74
- # Ensure examples dir is on sys.path (helpful if demo imports local helpers).
75
- examples_dir = str(path.parent.resolve())
76
- if examples_dir not in sys.path:
77
- sys.path.insert(0, examples_dir)
78
-
79
- try:
80
- # Load without triggering `if __name__ == "__main__": ...`
81
- ns = runpy.run_path(str(path), run_name="__thordata_demo__")
82
-
83
- main_func = ns.get("main")
84
- if callable(main_func):
85
- return int(main_func()) # type: ignore[arg-type]
86
-
87
- # Fallback: run as __main__ for scripts without main()
88
- runpy.run_path(str(path), run_name="__main__")
89
- return 0
90
-
91
- except KeyboardInterrupt:
92
- raise
93
- except SystemExit as e:
94
- # In case fallback run as __main__ triggered SystemExit
95
- code = e.code
96
- if code is None:
97
- return 0
98
- if isinstance(code, int):
99
- return code
100
- return 1
101
- except Exception as e:
102
- import traceback
103
-
104
- print()
105
- print("-" * 60)
106
- print("[thordata.demo] The demo script raised an exception.")
107
- print(f"[thordata.demo] Script: {path.name}")
108
- print(f"[thordata.demo] Error: {type(e).__name__}: {e}")
109
- print()
110
- print("Note: This is a failure within the demo script itself,")
111
- print(" not an issue with the thordata.demo entrypoint.")
112
- print("-" * 60)
113
- traceback.print_exc()
114
- return 1
115
-
116
-
117
- def main() -> int:
118
- _configure_stdio()
119
- _load_env()
120
-
121
- if len(sys.argv) < 2:
122
- print(_usage())
123
- return 2
124
-
125
- name = sys.argv[1].strip().lower()
126
- mapping = _demo_map()
127
-
128
- path = mapping.get(name)
129
- if path is None:
130
- print(f"Unknown demo: {name}")
131
- print(_usage())
132
- return 2
133
-
134
- return _run_demo(path)
135
-
136
-
137
- if __name__ == "__main__":
138
- raise SystemExit(main())
@@ -1,35 +0,0 @@
1
- thordata/__init__.py,sha256=-2bXx3LckBWrJ_E5HqFTOj7sm45AgrOnSWV4QN6f-7U,2287
2
- thordata/_example_utils.py,sha256=T9QtVq9BHhubOShgtGp2GSusYYd-ZFUJFJAw7ubIsa4,2199
3
- thordata/_utils.py,sha256=Acr_6sHgdZXU7SQozd6FEYTZV6iHw__nlhpBTDwb66U,4917
4
- thordata/async_client.py,sha256=zN59ZQfFVCuAGnGcyj-C_S9MbHzb17QbUISm46n6gpY,39439
5
- thordata/async_unlimited.py,sha256=kzTksFkN21rDM21Pwy3hcayjfyGYNGGyGR3fRLtZC6I,4510
6
- thordata/client.py,sha256=eA6jav_aAw2CQdSyrg3P59rELKo13K5tHqmSjEw3L_8,56717
7
- thordata/demo.py,sha256=DojJRFqUm9XAMBkjmk03WGeiUdLCbXguMIwtMOzfN6M,3822
8
- thordata/enums.py,sha256=_pahGhcq9Eh2ptL_WiNU2WlqKrydV_6e4U9G4erV9-s,774
9
- thordata/exceptions.py,sha256=P9czrxkFhT439DxW3LE5W-koS595ObH4-mAQOfaDM18,9976
10
- thordata/models.py,sha256=wozvlpS-Uv1DgkM_CEKOvldQ2InicxhIN0QiezIXPE4,853
11
- thordata/retry.py,sha256=5kRwULl3X68Nx8PlSzr9benfyCL0nRSpVQXrwjWr45M,11456
12
- thordata/serp_engines.py,sha256=iuMWncelcGOskCHXFzpcPMMTL5qfiLkazHB1uj3zpZo,5985
13
- thordata/unlimited.py,sha256=RzrtwcotYlbOWuSLysDyI75IkMVL7ygdfE9HKNoe02M,6087
14
- thordata/core/__init__.py,sha256=EFT6mZpSdec_7uFUpSpDDHVwbTxy314uxJC_uprR6J4,500
15
- thordata/core/async_http_client.py,sha256=KKsmhXN6bWRTDFvqa0H-WRf4R-TWH8WSgpDBRv6TEvg,3052
16
- thordata/core/http_client.py,sha256=8lSwclmVweM-Go1qMW36zYnMKAUT_9RyDdPF7qMS4-Y,2280
17
- thordata/core/tunnel.py,sha256=rbM_4zGwY4FXqdxYmCOURQw2s1EuAWFBVBM-1joNjGI,8373
18
- thordata/tools/__init__.py,sha256=ROryBBlCfq9cydaKXEPtnevjhg6GdFioAjdnp2VTR0M,606
19
- thordata/tools/base.py,sha256=fHuCp53y8eB59DuCdA1wHcbMVmsd5ikL9KlT5m_jJn0,1006
20
- thordata/tools/code.py,sha256=opYMG7LdR90VjW5tn8wnRCwDT-zUC0uteMKW01TMPTI,580
21
- thordata/tools/ecommerce.py,sha256=u-s-RGMSAGifsMnyMrwtJ3yVDgu3n74bv8yyX6TbMNU,1560
22
- thordata/tools/search.py,sha256=toWMOnnfQXgafyndHs23Yn049vpPlGPHdZA7SpiJJTE,1724
23
- thordata/tools/social.py,sha256=VbujfbA5Man6Shsik4QYBpf9z2FJhhJkZLNKll09Ots,4886
24
- thordata/tools/video.py,sha256=WikUOYPSVtHdrS0Z7VVexlUPyFZRv9v7cerkpzzO5jU,2549
25
- thordata/types/__init__.py,sha256=hlLt5UCVm7QdeOCN5_YWXS4Vy8tJUhIp0XbWjAoQiQg,1357
26
- thordata/types/common.py,sha256=hkTZ1QtokpE1yT9BvTmYfQz9AUjeCIIPvjib2pnq_Ag,2818
27
- thordata/types/proxy.py,sha256=IU45wQHCBOIlbdcCN9veypAkDT0q9NIikLu674CudOU,10438
28
- thordata/types/serp.py,sha256=NO52I1NprjVBgKQe4o2xEp82a3Oy9wCBYG-2Q0oegnU,5817
29
- thordata/types/task.py,sha256=f5xGeH4BrE7sHIgWhRJuMr3iuPooxJlg7ztr8lwcSx8,4139
30
- thordata/types/universal.py,sha256=Kw8lf_2ElXIfylsNfVosLE1MvlEQkryv4fWEaQw6ecg,2161
31
- thordata_sdk-1.5.0.dist-info/licenses/LICENSE,sha256=bAxpWgQIzb-5jl3nhLdOwOJ_vlbHLtSG7yev2B7vioY,1088
32
- thordata_sdk-1.5.0.dist-info/METADATA,sha256=VqsfaJsguO-KSMOjWjPodO1nIa510qpjNBdVzCMHshQ,7026
33
- thordata_sdk-1.5.0.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
34
- thordata_sdk-1.5.0.dist-info/top_level.txt,sha256=Z8R_07m0lXCCSb1hapL9_nxMtyO3rf_9wOvq4n9u2Hg,9
35
- thordata_sdk-1.5.0.dist-info/RECORD,,