thordata-sdk 1.5.0__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
thordata/types/task.py CHANGED
@@ -8,6 +8,7 @@ import json
8
8
  from dataclasses import dataclass
9
9
  from enum import Enum
10
10
  from typing import Any
11
+ from urllib.parse import unquote
11
12
 
12
13
  from .common import CommonSettings, ThordataBaseConfig
13
14
 
@@ -49,25 +50,90 @@ class DataFormat(str, Enum):
49
50
  XLSX = "xlsx"
50
51
 
51
52
 
53
+ def _normalize_url_value(value: Any) -> Any:
54
+ if not isinstance(value, str):
55
+ return value
56
+ # Decode all percent-encoded characters to match Dashboard format
57
+ # Dashboard expects URLs in their raw/decoded form, not URL-encoded
58
+ # This ensures API/SDK submissions match manual Dashboard input exactly
59
+ try:
60
+ # Check if URL contains any percent-encoded characters
61
+ if "%" in value:
62
+ # Fully decode the URL to match Dashboard format
63
+ decoded = unquote(value)
64
+ # If decoding changed the value, use decoded version
65
+ # This handles cases like %26 -> &, %3A -> :, %2F -> /, etc.
66
+ if decoded != value:
67
+ return decoded
68
+ except Exception:
69
+ # If decoding fails, return original value
70
+ pass
71
+ return value
72
+
73
+
74
+ def _normalize_parameters(params: dict[str, Any]) -> dict[str, Any]:
75
+ # All parameter keys that contain URLs and should be normalized
76
+ # This ensures API/SDK submissions match Dashboard format exactly
77
+ url_keys = {
78
+ "url",
79
+ "domain",
80
+ "profileurl",
81
+ "posturl",
82
+ "seller_url",
83
+ # Additional URL-related keys that may be used
84
+ "link",
85
+ "href",
86
+ "page_url",
87
+ "product_url",
88
+ "category_url",
89
+ }
90
+ out: dict[str, Any] = {}
91
+ for k, v in params.items():
92
+ if k in url_keys:
93
+ out[k] = _normalize_url_value(v)
94
+ else:
95
+ out[k] = v
96
+ return out
97
+
98
+
52
99
  @dataclass
53
100
  class ScraperTaskConfig(ThordataBaseConfig):
54
101
  file_name: str
55
102
  spider_id: str
56
103
  spider_name: str
57
- parameters: dict[str, Any]
104
+ parameters: dict[str, Any] | list[dict[str, Any]]
58
105
  universal_params: dict[str, Any] | None = None
59
106
  include_errors: bool = True
107
+ data_format: DataFormat | str | None = (
108
+ None # Support json, csv, xlsx output formats
109
+ )
60
110
 
61
111
  def to_payload(self) -> dict[str, Any]:
112
+ # Normalize parameters: decode percent-encoded URLs to reduce API/Dashboard divergence
113
+ if isinstance(self.parameters, list):
114
+ normalized_list = [_normalize_parameters(p) for p in self.parameters]
115
+ params_json = json.dumps(normalized_list)
116
+ else:
117
+ normalized_one = _normalize_parameters(self.parameters)
118
+ params_json = json.dumps([normalized_one])
119
+
62
120
  payload: dict[str, Any] = {
63
121
  "file_name": self.file_name,
64
122
  "spider_id": self.spider_id,
65
123
  "spider_name": self.spider_name,
66
- "spider_parameters": json.dumps([self.parameters]),
124
+ "spider_parameters": params_json,
67
125
  "spider_errors": "true" if self.include_errors else "false",
68
126
  }
69
127
  if self.universal_params:
70
128
  payload["spider_universal"] = json.dumps(self.universal_params)
129
+ # Add data_format if specified (for json/csv/xlsx output)
130
+ if self.data_format:
131
+ fmt = (
132
+ self.data_format.value
133
+ if isinstance(self.data_format, DataFormat)
134
+ else str(self.data_format).lower()
135
+ )
136
+ payload["data_format"] = fmt
71
137
  return payload
72
138
 
73
139
 
@@ -76,24 +142,24 @@ class VideoTaskConfig(ThordataBaseConfig):
76
142
  file_name: str
77
143
  spider_id: str
78
144
  spider_name: str
79
- parameters: dict[str, Any]
145
+ parameters: dict[str, Any] | list[dict[str, Any]]
80
146
  common_settings: CommonSettings
81
147
  include_errors: bool = True
82
148
 
83
149
  def to_payload(self) -> dict[str, Any]:
150
+ if isinstance(self.parameters, list):
151
+ params_json = json.dumps(self.parameters)
152
+ else:
153
+ params_json = json.dumps([self.parameters])
154
+
84
155
  payload: dict[str, Any] = {
85
156
  "file_name": self.file_name,
86
157
  "spider_id": self.spider_id,
87
158
  "spider_name": self.spider_name,
88
- "spider_parameters": json.dumps([self.parameters]),
159
+ "spider_parameters": params_json,
89
160
  "spider_errors": "true" if self.include_errors else "false",
90
- # v2.0 Doc explicitly requires 'spider_universal' key for video tasks too sometimes,
91
- # but usually it's passed as 'common_settings' or 'spider_universal'.
92
- # Sticking to original models.py key logic for now to ensure stability.
93
161
  "spider_universal": self.common_settings.to_json(),
94
162
  }
95
- # Note: If API expects 'common_settings' key specifically, adjust here.
96
- # Based on v2 context, video builder often uses spider_universal.
97
163
  return payload
98
164
 
99
165
 
@@ -15,12 +15,15 @@ from .common import ThordataBaseConfig
15
15
  class UniversalScrapeRequest(ThordataBaseConfig):
16
16
  url: str
17
17
  js_render: bool = False
18
- output_format: str = "html" # 'html' or 'png'
18
+ output_format: str | list[str] = (
19
+ "html" # 'html', 'png', or ['png', 'html'] for both
20
+ )
19
21
  country: str | None = None
20
- block_resources: str | None = None # 'script,image'
22
+ block_resources: str | None = None # 'script,image,video'
21
23
  clean_content: str | None = None # 'js,css'
22
24
  wait: int | None = None # ms
23
25
  wait_for: str | None = None # selector
26
+ follow_redirect: bool | None = None # Follow redirects
24
27
 
25
28
  # Headers/Cookies must be serialized to JSON in payload
26
29
  headers: list[dict[str, str]] | None = None
@@ -29,12 +32,26 @@ class UniversalScrapeRequest(ThordataBaseConfig):
29
32
  extra_params: dict[str, Any] = field(default_factory=dict)
30
33
 
31
34
  def __post_init__(self) -> None:
35
+ # Normalize output_format to list for easier handling
36
+ if isinstance(self.output_format, str):
37
+ formats = [f.strip().lower() for f in self.output_format.split(",")]
38
+ else:
39
+ formats = [
40
+ f.lower() if isinstance(f, str) else str(f).lower()
41
+ for f in self.output_format
42
+ ]
43
+
32
44
  valid_formats = {"html", "png"}
33
- if self.output_format.lower() not in valid_formats:
45
+ invalid = [f for f in formats if f not in valid_formats]
46
+ if invalid:
34
47
  raise ValueError(
35
- f"Invalid output_format: {self.output_format}. Must be one of: {valid_formats}"
48
+ f"Invalid output_format: {invalid}. Must be one or more of: {valid_formats}. "
49
+ f"Use comma-separated string like 'png,html' or list ['png', 'html'] for multiple formats."
36
50
  )
37
51
 
52
+ # Store as list for to_payload
53
+ self._output_formats = formats
54
+
38
55
  if self.wait is not None and (self.wait < 0 or self.wait > 100000):
39
56
  raise ValueError("wait must be between 0 and 100000 milliseconds")
40
57
 
@@ -42,9 +59,22 @@ class UniversalScrapeRequest(ThordataBaseConfig):
42
59
  payload: dict[str, Any] = {
43
60
  "url": self.url,
44
61
  "js_render": "True" if self.js_render else "False",
45
- "type": self.output_format.lower(),
46
62
  }
47
63
 
64
+ # Handle output format: support single or multiple formats (e.g., "png,html")
65
+ if hasattr(self, "_output_formats") and self._output_formats:
66
+ if len(self._output_formats) == 1:
67
+ payload["type"] = self._output_formats[0]
68
+ else:
69
+ # Multiple formats: join with comma (e.g., "png,html")
70
+ payload["type"] = ",".join(self._output_formats)
71
+ else:
72
+ # Fallback for backward compatibility
73
+ if isinstance(self.output_format, str):
74
+ payload["type"] = self.output_format.lower()
75
+ else:
76
+ payload["type"] = ",".join([str(f).lower() for f in self.output_format])
77
+
48
78
  if self.country:
49
79
  payload["country"] = self.country.lower()
50
80
  if self.block_resources:
@@ -55,6 +85,8 @@ class UniversalScrapeRequest(ThordataBaseConfig):
55
85
  payload["wait"] = str(self.wait)
56
86
  if self.wait_for:
57
87
  payload["wait_for"] = self.wait_for
88
+ if self.follow_redirect is not None:
89
+ payload["follow_redirect"] = "True" if self.follow_redirect else "False"
58
90
 
59
91
  # Serialize complex objects as JSON strings
60
92
  if self.headers:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: thordata-sdk
3
- Version: 1.5.0
3
+ Version: 1.7.0
4
4
  Summary: The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network.
5
5
  Author-email: Thordata Developer Team <support@thordata.com>
6
6
  License: MIT
@@ -63,9 +63,9 @@ Dynamic: license-file
63
63
 
64
64
  ## 📖 Introduction
65
65
 
66
- The **Thordata Python SDK v1.5.0** is a production-ready wrapper for Thordata's AI data infrastructure. It is architected for high reliability, strict type safety, and maximum performance.
66
+ The **Thordata Python SDK v1.6.0** is a production-ready wrapper for Thordata's AI data infrastructure. It is architected for high reliability, strict type safety, and maximum performance.
67
67
 
68
- **Why v1.5.0?**
68
+ **Why v1.6.0?**
69
69
  * **🛡️ Bulletproof Networking**: Custom core handles `HTTP`, `HTTPS`, and `SOCKS5h` (Remote DNS) tunneling, solving common SSL/TLS handshake issues in complex network environments.
70
70
  * **⚡ Async First**: First-class `asyncio` support with `aiohttp` for high-concurrency scraping (1000+ RPS).
71
71
  * **🧩 100% API Coverage**: Every endpoint documented by Thordata (including Hourly Usage, Server Monitor, and Task Management) is implemented.
@@ -83,7 +83,7 @@ pip install thordata-sdk
83
83
 
84
84
  ## 🔐 Configuration
85
85
 
86
- Set environment variables to avoid hardcoding credentials.
86
+ Set environment variables to avoid hardcoding credentials. **Full reference:** copy [.env.example](.env.example) to `.env` and fill in values.
87
87
 
88
88
  ```bash
89
89
  # [Scraping APIs]
@@ -93,13 +93,19 @@ export THORDATA_SCRAPER_TOKEN="your_scraper_token"
93
93
  export THORDATA_PUBLIC_TOKEN="your_public_token"
94
94
  export THORDATA_PUBLIC_KEY="your_public_key"
95
95
 
96
- # [Proxy Network]
96
+ # [Proxy: Residential / Unlimited / Datacenter / Mobile / ISP]
97
97
  export THORDATA_RESIDENTIAL_USERNAME="your_username"
98
98
  export THORDATA_RESIDENTIAL_PASSWORD="your_password"
99
- # Optional: Set upstream proxy for local dev (e.g., Clash)
100
- # export THORDATA_UPSTREAM_PROXY="http://127.0.0.1:7890"
99
+ # Optional: Unlimited (high-bandwidth) if your plan has separate credentials
100
+ # export THORDATA_UNLIMITED_USERNAME="..."
101
+ # export THORDATA_UNLIMITED_PASSWORD="..."
102
+
103
+ # Optional: Upstream proxy when behind firewall (e.g. Clash Verge port 7897)
104
+ # export THORDATA_UPSTREAM_PROXY="http://127.0.0.1:7897"
101
105
  ```
102
106
 
107
+ Default proxy port is **9999** (residential); other products use different ports (see `.env.example`).
108
+
103
109
  ---
104
110
 
105
111
  ## 🚀 Quick Start
@@ -199,6 +205,48 @@ if status == "finished":
199
205
  print(f"Download: {data_url}")
200
206
  ```
201
207
 
208
+ ### Web Scraper Tools (120+ Pre-built Tools)
209
+
210
+ Use pre-built tools for popular platforms. See [Tool Coverage Matrix](docs/TOOL_COVERAGE_MATRIX.md) for full list.
211
+
212
+ ```python
213
+ from thordata import ThordataClient
214
+ from thordata.tools import Amazon, GoogleMaps, YouTube, TikTok, eBay, Walmart
215
+
216
+ client = ThordataClient()
217
+
218
+ # Amazon Product by ASIN
219
+ task_id = client.run_tool(Amazon.ProductByAsin(asin="B0BZYCJK89"))
220
+
221
+ # Google Maps by Place ID
222
+ task_id = client.run_tool(GoogleMaps.DetailsByPlaceId(place_id="ChIJPTacEpBQwokRKwIlDXelxkA"))
223
+
224
+ # YouTube Video Download
225
+ from thordata import CommonSettings
226
+ settings = CommonSettings(resolution="<=360p", video_codec="vp9")
227
+ task_id = client.run_tool(YouTube.VideoDownload(
228
+ url="https://www.youtube.com/watch?v=jNQXAC9IVRw",
229
+ common_settings=settings
230
+ ))
231
+
232
+ # Wait and get results
233
+ status = client.wait_for_task(task_id, max_wait=300)
234
+ if status == "ready":
235
+ download_url = client.get_task_result(task_id)
236
+ print(f"Results: {download_url}")
237
+ ```
238
+
239
+ **Available Platforms:**
240
+ - **E-Commerce**: Amazon, eBay, Walmart
241
+ - **Social Media**: TikTok, Instagram, Facebook, Twitter/X, Reddit, LinkedIn
242
+ - **Search**: Google Maps, Google Shopping, Google Play
243
+ - **Video**: YouTube (download, info, subtitles)
244
+ - **Code**: GitHub
245
+ - **Professional**: Indeed, Glassdoor, Crunchbase
246
+ - **Travel/Real Estate**: Booking, Airbnb, Zillow
247
+
248
+ See `examples/tools/` for more examples.
249
+
202
250
  ---
203
251
 
204
252
  ## 🛠️ Management APIs
@@ -226,6 +274,14 @@ monitor = client.unlimited.get_server_monitor(
226
274
 
227
275
  ---
228
276
 
277
+ ## 🧪 Development & Testing
278
+
279
+ - **Full env reference**: Copy [.env.example](.env.example) to `.env` and fill in credentials.
280
+ - **Unit tests** (no network): `pytest` or `python -m coverage run -m pytest -p no:cov tests && python -m coverage report -m`
281
+ - **Integration tests** (live API/proxy): Set `THORDATA_INTEGRATION=true` in `.env`; optional `THORDATA_UPSTREAM_PROXY` (e.g. Clash) if behind a firewall. See [CONTRIBUTING.md](CONTRIBUTING.md#-testing-guidelines).
282
+
283
+ ---
284
+
229
285
  ## 📄 License
230
286
 
231
287
  MIT License. See [LICENSE](LICENSE) for details.
@@ -0,0 +1,35 @@
1
+ thordata/__init__.py,sha256=FMOku6d17GrFjiJlRhvkx-JmhLLD7VlaADLC3FP6hHg,2287
2
+ thordata/_utils.py,sha256=Acr_6sHgdZXU7SQozd6FEYTZV6iHw__nlhpBTDwb66U,4917
3
+ thordata/async_client.py,sha256=akYyUVm7aeUzmuYUKcQOtsgCNreqWw1x3kEShmBL-_c,41363
4
+ thordata/async_unlimited.py,sha256=kzTksFkN21rDM21Pwy3hcayjfyGYNGGyGR3fRLtZC6I,4510
5
+ thordata/client.py,sha256=LxLLUfT75_nFSfTK8NxoFPepPXyq8qwvJKdq7bieXkY,58981
6
+ thordata/enums.py,sha256=dO5QWpPFLpYP2GfLAdoFtxMTemhGNdr_NPqBoYfSFkk,764
7
+ thordata/exceptions.py,sha256=ntiq3F5sxAiEDmCnlcfS2GNb3Qa7DpRvMrhmgXhAGIg,11947
8
+ thordata/models.py,sha256=7GshQklo5aqke_ZQ2QIXiz9Ac5v6IRtvjWIjsBKEq6A,853
9
+ thordata/retry.py,sha256=X6Sa5IIb5EWD5fUJjKyhvWJyWQGPVgxLB3-vKoWfa5Q,11453
10
+ thordata/serp_engines.py,sha256=iuMWncelcGOskCHXFzpcPMMTL5qfiLkazHB1uj3zpZo,5985
11
+ thordata/unlimited.py,sha256=RzrtwcotYlbOWuSLysDyI75IkMVL7ygdfE9HKNoe02M,6087
12
+ thordata/core/__init__.py,sha256=EFT6mZpSdec_7uFUpSpDDHVwbTxy314uxJC_uprR6J4,500
13
+ thordata/core/async_http_client.py,sha256=KKsmhXN6bWRTDFvqa0H-WRf4R-TWH8WSgpDBRv6TEvg,3052
14
+ thordata/core/http_client.py,sha256=8lSwclmVweM-Go1qMW36zYnMKAUT_9RyDdPF7qMS4-Y,2280
15
+ thordata/core/tunnel.py,sha256=rbM_4zGwY4FXqdxYmCOURQw2s1EuAWFBVBM-1joNjGI,8373
16
+ thordata/tools/__init__.py,sha256=_Sr042bW-OMMj-WruA93YeQ6FfeIXvWmHoHMAFQ72a8,840
17
+ thordata/tools/base.py,sha256=fHuCp53y8eB59DuCdA1wHcbMVmsd5ikL9KlT5m_jJn0,1006
18
+ thordata/tools/code.py,sha256=fGuLEn_CydIq79XgMw5-EJDcp-nq2fenWVp7hKpsRNw,930
19
+ thordata/tools/ecommerce.py,sha256=8iZ7f46CYovPDfAS3lZhRXpXEyJ9PSFBw9w99-Zw8Qs,6584
20
+ thordata/tools/professional.py,sha256=2RJ76Sx1seftFpwgD4VRfRinoo-HAqYZucTnuIdV4Kw,4350
21
+ thordata/tools/search.py,sha256=2HLQaYK6JiGvzOFF9or9ORXNrzv6nDQUaEt83YbqiQA,2903
22
+ thordata/tools/social.py,sha256=6gcj1GUWJvDALpBMeobohIn6yPVo-LsqDsuUroNpHG8,10465
23
+ thordata/tools/travel.py,sha256=vRJAU-uzFVvLQ5Tc58vp3CY7OPWd2lcWh_9MvWMc1fs,2725
24
+ thordata/tools/video.py,sha256=HUFqdue-dtWmTVlYtmf5ffzuYDIzw5l3wk3Vr7AXQW0,4689
25
+ thordata/types/__init__.py,sha256=hlLt5UCVm7QdeOCN5_YWXS4Vy8tJUhIp0XbWjAoQiQg,1357
26
+ thordata/types/common.py,sha256=hkTZ1QtokpE1yT9BvTmYfQz9AUjeCIIPvjib2pnq_Ag,2818
27
+ thordata/types/proxy.py,sha256=IU45wQHCBOIlbdcCN9veypAkDT0q9NIikLu674CudOU,10438
28
+ thordata/types/serp.py,sha256=2jMqims_hmvFfqvvmyyp_SAecupWayAWNJGuHPzPI6o,6150
29
+ thordata/types/task.py,sha256=PVKNyzXZHNunThVb5dwHDtMZ9WmFpbaePDnkeV754CQ,6264
30
+ thordata/types/universal.py,sha256=8OIZs239fBxzSuLEe3VB9qCp1ddN5XKAXbpVm9MJuls,3631
31
+ thordata_sdk-1.7.0.dist-info/licenses/LICENSE,sha256=bAxpWgQIzb-5jl3nhLdOwOJ_vlbHLtSG7yev2B7vioY,1088
32
+ thordata_sdk-1.7.0.dist-info/METADATA,sha256=0LIpFeY8-83XNvaNSZzZ5w2V8ET3N2puPMBz_4t-A5k,9308
33
+ thordata_sdk-1.7.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
34
+ thordata_sdk-1.7.0.dist-info/top_level.txt,sha256=Z8R_07m0lXCCSb1hapL9_nxMtyO3rf_9wOvq4n9u2Hg,9
35
+ thordata_sdk-1.7.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.10.1)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,77 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import json
4
- import os
5
- from collections.abc import Iterable
6
- from pathlib import Path
7
- from typing import Any
8
-
9
- try:
10
- from dotenv import load_dotenv
11
- except Exception: # pragma: no cover
12
- load_dotenv = None
13
-
14
-
15
- def load_env() -> None:
16
- """Load .env from repo root if python-dotenv is installed."""
17
- if load_dotenv is None:
18
- return
19
- repo_root = Path(__file__).resolve().parents[2]
20
- load_dotenv(dotenv_path=repo_root / ".env")
21
-
22
-
23
- def env(name: str) -> str:
24
- return (os.getenv(name) or "").strip()
25
-
26
-
27
- def skip_if_missing(required: Iterable[str], *, tip: str | None = None) -> bool:
28
- missing = [k for k in required if not env(k)]
29
- if not missing:
30
- return False
31
- print("Skipping live example: missing env:", ", ".join(missing))
32
- if tip:
33
- print(tip)
34
- else:
35
- print("Tip: copy .env.example to .env and fill values, then re-run.")
36
- return True
37
-
38
-
39
- def parse_json_env(name: str, default: str = "{}") -> Any:
40
- raw = env(name) or default
41
- return json.loads(raw)
42
-
43
-
44
- def normalize_task_parameters(raw: Any) -> dict[str, Any]:
45
- """Accept {..} or [{..}] and return a single dict for create_scraper_task(parameters=...)."""
46
- if isinstance(raw, list):
47
- if not raw:
48
- raise ValueError("Task parameters JSON array must not be empty")
49
- raw = raw[0]
50
- if not isinstance(raw, dict):
51
- raise ValueError("Task parameters must be a JSON object (or array of objects)")
52
- return raw
53
-
54
-
55
- def output_dir() -> Path:
56
- """Return output dir for examples; defaults to examples/output (ignored by git)."""
57
- repo_root = Path(__file__).resolve().parents[2]
58
- d = env("THORDATA_OUTPUT_DIR") or str(repo_root / "examples" / "output")
59
- p = Path(d)
60
- p.mkdir(parents=True, exist_ok=True)
61
- return p
62
-
63
-
64
- def write_text(filename: str, content: str) -> Path:
65
- p = output_dir() / filename
66
- p.write_text(content, encoding="utf-8", errors="replace")
67
- return p
68
-
69
-
70
- def write_json(filename: str, data: Any) -> Path:
71
- p = output_dir() / filename
72
- p.write_text(
73
- json.dumps(data, ensure_ascii=False, indent=2),
74
- encoding="utf-8",
75
- errors="replace",
76
- )
77
- return p
thordata/demo.py DELETED
@@ -1,138 +0,0 @@
1
- """
2
- Unified demo entrypoint for the Thordata Python SDK.
3
-
4
- This module runs the example scripts from the repository's `examples/` directory
5
- using `runpy`, so it does not require `examples/` to be an importable package.
6
-
7
- Usage:
8
- python -m thordata.demo serp
9
- python -m thordata.demo universal
10
- python -m thordata.demo scraper
11
- python -m thordata.demo concurrency
12
-
13
- Notes:
14
- - This entrypoint is primarily intended for repository usage (dev/demo).
15
- - When installed from PyPI, the `examples/` directory is typically not included.
16
- """
17
-
18
- from __future__ import annotations
19
-
20
- import runpy
21
- import sys
22
- from pathlib import Path
23
-
24
-
25
- def _configure_stdio() -> None:
26
- # Avoid UnicodeEncodeError on Windows consoles with legacy encodings.
27
- if hasattr(sys.stdout, "reconfigure"):
28
- sys.stdout.reconfigure(encoding="utf-8", errors="replace") # type: ignore[attr-defined]
29
- if hasattr(sys.stderr, "reconfigure"):
30
- sys.stderr.reconfigure(encoding="utf-8", errors="replace") # type: ignore[attr-defined]
31
-
32
-
33
- def _load_env() -> None:
34
- # Optional .env support for local development
35
- try:
36
- from dotenv import load_dotenv
37
- except ImportError:
38
- return
39
- load_dotenv()
40
-
41
-
42
- def _repo_root() -> Path:
43
- """
44
- Resolve repository root based on src layout:
45
- <repo>/src/thordata/demo.py -> parents[2] == <repo>
46
- """
47
- return Path(__file__).resolve().parents[2]
48
-
49
-
50
- def _examples_dir() -> Path:
51
- return _repo_root() / "examples"
52
-
53
-
54
- def _demo_map() -> dict[str, Path]:
55
- ex = _examples_dir()
56
- return {
57
- "serp": ex / "demo_serp_api.py",
58
- "universal": ex / "demo_universal.py",
59
- "scraper": ex / "demo_web_scraper_api.py",
60
- "concurrency": ex / "async_high_concurrency.py",
61
- }
62
-
63
-
64
- def _usage() -> str:
65
- names = ", ".join(sorted(_demo_map().keys()))
66
- return f"Usage: python -m thordata.demo [{names}]"
67
-
68
-
69
- def _run_demo(path: Path) -> int:
70
- if not path.exists():
71
- print(f"Error: demo script not found: {path}")
72
- return 2
73
-
74
- # Ensure examples dir is on sys.path (helpful if demo imports local helpers).
75
- examples_dir = str(path.parent.resolve())
76
- if examples_dir not in sys.path:
77
- sys.path.insert(0, examples_dir)
78
-
79
- try:
80
- # Load without triggering `if __name__ == "__main__": ...`
81
- ns = runpy.run_path(str(path), run_name="__thordata_demo__")
82
-
83
- main_func = ns.get("main")
84
- if callable(main_func):
85
- return int(main_func()) # type: ignore[arg-type]
86
-
87
- # Fallback: run as __main__ for scripts without main()
88
- runpy.run_path(str(path), run_name="__main__")
89
- return 0
90
-
91
- except KeyboardInterrupt:
92
- raise
93
- except SystemExit as e:
94
- # In case fallback run as __main__ triggered SystemExit
95
- code = e.code
96
- if code is None:
97
- return 0
98
- if isinstance(code, int):
99
- return code
100
- return 1
101
- except Exception as e:
102
- import traceback
103
-
104
- print()
105
- print("-" * 60)
106
- print("[thordata.demo] The demo script raised an exception.")
107
- print(f"[thordata.demo] Script: {path.name}")
108
- print(f"[thordata.demo] Error: {type(e).__name__}: {e}")
109
- print()
110
- print("Note: This is a failure within the demo script itself,")
111
- print(" not an issue with the thordata.demo entrypoint.")
112
- print("-" * 60)
113
- traceback.print_exc()
114
- return 1
115
-
116
-
117
- def main() -> int:
118
- _configure_stdio()
119
- _load_env()
120
-
121
- if len(sys.argv) < 2:
122
- print(_usage())
123
- return 2
124
-
125
- name = sys.argv[1].strip().lower()
126
- mapping = _demo_map()
127
-
128
- path = mapping.get(name)
129
- if path is None:
130
- print(f"Unknown demo: {name}")
131
- print(_usage())
132
- return 2
133
-
134
- return _run_demo(path)
135
-
136
-
137
- if __name__ == "__main__":
138
- raise SystemExit(main())
@@ -1,35 +0,0 @@
1
- thordata/__init__.py,sha256=-2bXx3LckBWrJ_E5HqFTOj7sm45AgrOnSWV4QN6f-7U,2287
2
- thordata/_example_utils.py,sha256=T9QtVq9BHhubOShgtGp2GSusYYd-ZFUJFJAw7ubIsa4,2199
3
- thordata/_utils.py,sha256=Acr_6sHgdZXU7SQozd6FEYTZV6iHw__nlhpBTDwb66U,4917
4
- thordata/async_client.py,sha256=zN59ZQfFVCuAGnGcyj-C_S9MbHzb17QbUISm46n6gpY,39439
5
- thordata/async_unlimited.py,sha256=kzTksFkN21rDM21Pwy3hcayjfyGYNGGyGR3fRLtZC6I,4510
6
- thordata/client.py,sha256=eA6jav_aAw2CQdSyrg3P59rELKo13K5tHqmSjEw3L_8,56717
7
- thordata/demo.py,sha256=DojJRFqUm9XAMBkjmk03WGeiUdLCbXguMIwtMOzfN6M,3822
8
- thordata/enums.py,sha256=_pahGhcq9Eh2ptL_WiNU2WlqKrydV_6e4U9G4erV9-s,774
9
- thordata/exceptions.py,sha256=P9czrxkFhT439DxW3LE5W-koS595ObH4-mAQOfaDM18,9976
10
- thordata/models.py,sha256=wozvlpS-Uv1DgkM_CEKOvldQ2InicxhIN0QiezIXPE4,853
11
- thordata/retry.py,sha256=5kRwULl3X68Nx8PlSzr9benfyCL0nRSpVQXrwjWr45M,11456
12
- thordata/serp_engines.py,sha256=iuMWncelcGOskCHXFzpcPMMTL5qfiLkazHB1uj3zpZo,5985
13
- thordata/unlimited.py,sha256=RzrtwcotYlbOWuSLysDyI75IkMVL7ygdfE9HKNoe02M,6087
14
- thordata/core/__init__.py,sha256=EFT6mZpSdec_7uFUpSpDDHVwbTxy314uxJC_uprR6J4,500
15
- thordata/core/async_http_client.py,sha256=KKsmhXN6bWRTDFvqa0H-WRf4R-TWH8WSgpDBRv6TEvg,3052
16
- thordata/core/http_client.py,sha256=8lSwclmVweM-Go1qMW36zYnMKAUT_9RyDdPF7qMS4-Y,2280
17
- thordata/core/tunnel.py,sha256=rbM_4zGwY4FXqdxYmCOURQw2s1EuAWFBVBM-1joNjGI,8373
18
- thordata/tools/__init__.py,sha256=ROryBBlCfq9cydaKXEPtnevjhg6GdFioAjdnp2VTR0M,606
19
- thordata/tools/base.py,sha256=fHuCp53y8eB59DuCdA1wHcbMVmsd5ikL9KlT5m_jJn0,1006
20
- thordata/tools/code.py,sha256=opYMG7LdR90VjW5tn8wnRCwDT-zUC0uteMKW01TMPTI,580
21
- thordata/tools/ecommerce.py,sha256=u-s-RGMSAGifsMnyMrwtJ3yVDgu3n74bv8yyX6TbMNU,1560
22
- thordata/tools/search.py,sha256=toWMOnnfQXgafyndHs23Yn049vpPlGPHdZA7SpiJJTE,1724
23
- thordata/tools/social.py,sha256=VbujfbA5Man6Shsik4QYBpf9z2FJhhJkZLNKll09Ots,4886
24
- thordata/tools/video.py,sha256=WikUOYPSVtHdrS0Z7VVexlUPyFZRv9v7cerkpzzO5jU,2549
25
- thordata/types/__init__.py,sha256=hlLt5UCVm7QdeOCN5_YWXS4Vy8tJUhIp0XbWjAoQiQg,1357
26
- thordata/types/common.py,sha256=hkTZ1QtokpE1yT9BvTmYfQz9AUjeCIIPvjib2pnq_Ag,2818
27
- thordata/types/proxy.py,sha256=IU45wQHCBOIlbdcCN9veypAkDT0q9NIikLu674CudOU,10438
28
- thordata/types/serp.py,sha256=NO52I1NprjVBgKQe4o2xEp82a3Oy9wCBYG-2Q0oegnU,5817
29
- thordata/types/task.py,sha256=f5xGeH4BrE7sHIgWhRJuMr3iuPooxJlg7ztr8lwcSx8,4139
30
- thordata/types/universal.py,sha256=Kw8lf_2ElXIfylsNfVosLE1MvlEQkryv4fWEaQw6ecg,2161
31
- thordata_sdk-1.5.0.dist-info/licenses/LICENSE,sha256=bAxpWgQIzb-5jl3nhLdOwOJ_vlbHLtSG7yev2B7vioY,1088
32
- thordata_sdk-1.5.0.dist-info/METADATA,sha256=VqsfaJsguO-KSMOjWjPodO1nIa510qpjNBdVzCMHshQ,7026
33
- thordata_sdk-1.5.0.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
34
- thordata_sdk-1.5.0.dist-info/top_level.txt,sha256=Z8R_07m0lXCCSb1hapL9_nxMtyO3rf_9wOvq4n9u2Hg,9
35
- thordata_sdk-1.5.0.dist-info/RECORD,,