thordata-sdk 1.4.0__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,77 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import json
4
- import os
5
- from collections.abc import Iterable
6
- from pathlib import Path
7
- from typing import Any
8
-
9
- try:
10
- from dotenv import load_dotenv
11
- except Exception: # pragma: no cover
12
- load_dotenv = None
13
-
14
-
15
- def load_env() -> None:
16
- """Load .env from repo root if python-dotenv is installed."""
17
- if load_dotenv is None:
18
- return
19
- repo_root = Path(__file__).resolve().parents[2]
20
- load_dotenv(dotenv_path=repo_root / ".env")
21
-
22
-
23
- def env(name: str) -> str:
24
- return (os.getenv(name) or "").strip()
25
-
26
-
27
- def skip_if_missing(required: Iterable[str], *, tip: str | None = None) -> bool:
28
- missing = [k for k in required if not env(k)]
29
- if not missing:
30
- return False
31
- print("Skipping live example: missing env:", ", ".join(missing))
32
- if tip:
33
- print(tip)
34
- else:
35
- print("Tip: copy .env.example to .env and fill values, then re-run.")
36
- return True
37
-
38
-
39
- def parse_json_env(name: str, default: str = "{}") -> Any:
40
- raw = env(name) or default
41
- return json.loads(raw)
42
-
43
-
44
- def normalize_task_parameters(raw: Any) -> dict[str, Any]:
45
- """Accept {..} or [{..}] and return a single dict for create_scraper_task(parameters=...)."""
46
- if isinstance(raw, list):
47
- if not raw:
48
- raise ValueError("Task parameters JSON array must not be empty")
49
- raw = raw[0]
50
- if not isinstance(raw, dict):
51
- raise ValueError("Task parameters must be a JSON object (or array of objects)")
52
- return raw
53
-
54
-
55
- def output_dir() -> Path:
56
- """Return output dir for examples; defaults to examples/output (ignored by git)."""
57
- repo_root = Path(__file__).resolve().parents[2]
58
- d = env("THORDATA_OUTPUT_DIR") or str(repo_root / "examples" / "output")
59
- p = Path(d)
60
- p.mkdir(parents=True, exist_ok=True)
61
- return p
62
-
63
-
64
- def write_text(filename: str, content: str) -> Path:
65
- p = output_dir() / filename
66
- p.write_text(content, encoding="utf-8", errors="replace")
67
- return p
68
-
69
-
70
- def write_json(filename: str, data: Any) -> Path:
71
- p = output_dir() / filename
72
- p.write_text(
73
- json.dumps(data, ensure_ascii=False, indent=2),
74
- encoding="utf-8",
75
- errors="replace",
76
- )
77
- return p
thordata/demo.py DELETED
@@ -1,138 +0,0 @@
1
- """
2
- Unified demo entrypoint for the Thordata Python SDK.
3
-
4
- This module runs the example scripts from the repository's `examples/` directory
5
- using `runpy`, so it does not require `examples/` to be an importable package.
6
-
7
- Usage:
8
- python -m thordata.demo serp
9
- python -m thordata.demo universal
10
- python -m thordata.demo scraper
11
- python -m thordata.demo concurrency
12
-
13
- Notes:
14
- - This entrypoint is primarily intended for repository usage (dev/demo).
15
- - When installed from PyPI, the `examples/` directory is typically not included.
16
- """
17
-
18
- from __future__ import annotations
19
-
20
- import runpy
21
- import sys
22
- from pathlib import Path
23
-
24
-
25
- def _configure_stdio() -> None:
26
- # Avoid UnicodeEncodeError on Windows consoles with legacy encodings.
27
- if hasattr(sys.stdout, "reconfigure"):
28
- sys.stdout.reconfigure(encoding="utf-8", errors="replace") # type: ignore[attr-defined]
29
- if hasattr(sys.stderr, "reconfigure"):
30
- sys.stderr.reconfigure(encoding="utf-8", errors="replace") # type: ignore[attr-defined]
31
-
32
-
33
- def _load_env() -> None:
34
- # Optional .env support for local development
35
- try:
36
- from dotenv import load_dotenv
37
- except ImportError:
38
- return
39
- load_dotenv()
40
-
41
-
42
- def _repo_root() -> Path:
43
- """
44
- Resolve repository root based on src layout:
45
- <repo>/src/thordata/demo.py -> parents[2] == <repo>
46
- """
47
- return Path(__file__).resolve().parents[2]
48
-
49
-
50
- def _examples_dir() -> Path:
51
- return _repo_root() / "examples"
52
-
53
-
54
- def _demo_map() -> dict[str, Path]:
55
- ex = _examples_dir()
56
- return {
57
- "serp": ex / "demo_serp_api.py",
58
- "universal": ex / "demo_universal.py",
59
- "scraper": ex / "demo_web_scraper_api.py",
60
- "concurrency": ex / "async_high_concurrency.py",
61
- }
62
-
63
-
64
- def _usage() -> str:
65
- names = ", ".join(sorted(_demo_map().keys()))
66
- return f"Usage: python -m thordata.demo [{names}]"
67
-
68
-
69
- def _run_demo(path: Path) -> int:
70
- if not path.exists():
71
- print(f"Error: demo script not found: {path}")
72
- return 2
73
-
74
- # Ensure examples dir is on sys.path (helpful if demo imports local helpers).
75
- examples_dir = str(path.parent.resolve())
76
- if examples_dir not in sys.path:
77
- sys.path.insert(0, examples_dir)
78
-
79
- try:
80
- # Load without triggering `if __name__ == "__main__": ...`
81
- ns = runpy.run_path(str(path), run_name="__thordata_demo__")
82
-
83
- main_func = ns.get("main")
84
- if callable(main_func):
85
- return int(main_func()) # type: ignore[arg-type]
86
-
87
- # Fallback: run as __main__ for scripts without main()
88
- runpy.run_path(str(path), run_name="__main__")
89
- return 0
90
-
91
- except KeyboardInterrupt:
92
- raise
93
- except SystemExit as e:
94
- # In case fallback run as __main__ triggered SystemExit
95
- code = e.code
96
- if code is None:
97
- return 0
98
- if isinstance(code, int):
99
- return code
100
- return 1
101
- except Exception as e:
102
- import traceback
103
-
104
- print()
105
- print("-" * 60)
106
- print("[thordata.demo] The demo script raised an exception.")
107
- print(f"[thordata.demo] Script: {path.name}")
108
- print(f"[thordata.demo] Error: {type(e).__name__}: {e}")
109
- print()
110
- print("Note: This is a failure within the demo script itself,")
111
- print(" not an issue with the thordata.demo entrypoint.")
112
- print("-" * 60)
113
- traceback.print_exc()
114
- return 1
115
-
116
-
117
- def main() -> int:
118
- _configure_stdio()
119
- _load_env()
120
-
121
- if len(sys.argv) < 2:
122
- print(_usage())
123
- return 2
124
-
125
- name = sys.argv[1].strip().lower()
126
- mapping = _demo_map()
127
-
128
- path = mapping.get(name)
129
- if path is None:
130
- print(f"Unknown demo: {name}")
131
- print(_usage())
132
- return 2
133
-
134
- return _run_demo(path)
135
-
136
-
137
- if __name__ == "__main__":
138
- raise SystemExit(main())
@@ -1,208 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: thordata-sdk
3
- Version: 1.4.0
4
- Summary: The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network.
5
- Author-email: Thordata Developer Team <support@thordata.com>
6
- License: MIT
7
- Project-URL: Homepage, https://www.thordata.com
8
- Project-URL: Documentation, https://github.com/Thordata/thordata-python-sdk#readme
9
- Project-URL: Source, https://github.com/Thordata/thordata-python-sdk
10
- Project-URL: Tracker, https://github.com/Thordata/thordata-python-sdk/issues
11
- Project-URL: Changelog, https://github.com/Thordata/thordata-python-sdk/blob/main/CHANGELOG.md
12
- Keywords: web scraping,proxy,residential proxy,datacenter proxy,ai,llm,data-mining,serp,thordata,web scraper,anti-bot bypass
13
- Classifier: Development Status :: 4 - Beta
14
- Classifier: Intended Audience :: Developers
15
- Classifier: Topic :: Software Development :: Libraries :: Python Modules
16
- Classifier: Topic :: Internet :: WWW/HTTP
17
- Classifier: Topic :: Internet :: Proxy Servers
18
- Classifier: Programming Language :: Python :: 3
19
- Classifier: Programming Language :: Python :: 3.9
20
- Classifier: Programming Language :: Python :: 3.10
21
- Classifier: Programming Language :: Python :: 3.11
22
- Classifier: Programming Language :: Python :: 3.12
23
- Classifier: License :: OSI Approved :: MIT License
24
- Classifier: Operating System :: OS Independent
25
- Classifier: Typing :: Typed
26
- Requires-Python: >=3.9
27
- Description-Content-Type: text/markdown
28
- License-File: LICENSE
29
- Requires-Dist: requests>=2.25.0
30
- Requires-Dist: aiohttp>=3.9.0
31
- Requires-Dist: PySocks>=1.7.1
32
- Provides-Extra: dev
33
- Requires-Dist: pytest>=7.0.0; extra == "dev"
34
- Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
35
- Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
36
- Requires-Dist: pytest-httpserver>=1.0.0; extra == "dev"
37
- Requires-Dist: python-dotenv>=1.0.0; extra == "dev"
38
- Requires-Dist: black>=25.11.0; extra == "dev"
39
- Requires-Dist: ruff>=0.1.0; extra == "dev"
40
- Requires-Dist: mypy>=1.0.0; extra == "dev"
41
- Requires-Dist: types-requests>=2.28.0; extra == "dev"
42
- Requires-Dist: aioresponses>=0.7.6; extra == "dev"
43
- Dynamic: license-file
44
-
45
- # Thordata Python SDK
46
-
47
- <div align="center">
48
-
49
- <img src="https://img.shields.io/badge/Thordata-AI%20Infrastructure-blue?style=for-the-badge" alt="Thordata Logo">
50
-
51
- **The Official Python Client for Thordata APIs**
52
-
53
- *Proxy Network • SERP API • Web Unlocker • Web Scraper API*
54
-
55
- [![PyPI version](https://img.shields.io/pypi/v/thordata-sdk.svg?style=flat-square)](https://pypi.org/project/thordata-sdk/)
56
- [![Python Versions](https://img.shields.io/pypi/pyversions/thordata-sdk.svg?style=flat-square)](https://pypi.org/project/thordata-sdk/)
57
- [![License](https://img.shields.io/badge/license-MIT-green?style=flat-square)](LICENSE)
58
- [![CI Status](https://img.shields.io/github/actions/workflow/status/Thordata/thordata-python-sdk/ci.yml?branch=main&style=flat-square)](https://github.com/Thordata/thordata-python-sdk/actions)
59
-
60
- </div>
61
-
62
- ---
63
-
64
- ## 📖 Introduction
65
-
66
- This SDK provides a robust, high-performance interface to Thordata's AI data infrastructure. It is designed for high-concurrency scraping, reliable proxy tunneling, and seamless data extraction.
67
-
68
- **Key Features:**
69
- * **🚀 Production Ready:** Built on `urllib3` connection pooling for low-latency proxy requests.
70
- * **⚡ Async Support:** Native `aiohttp` client for high-concurrency SERP/Universal scraping.
71
- * **🛡️ Robust:** Handles TLS-in-TLS tunneling, retries, and error parsing automatically.
72
- * **✨ Developer Experience:** Fully typed (`mypy` compatible) with intuitive IDE autocomplete.
73
- * **🧩 Lazy Validation:** Only validate credentials for the features you actually use.
74
-
75
- ---
76
-
77
- ## 📦 Installation
78
-
79
- ```bash
80
- pip install thordata-sdk
81
- ```
82
-
83
- ---
84
-
85
- ## 🔐 Configuration
86
-
87
- Set environment variables to avoid hardcoding credentials. You only need to set the variables for the features you use.
88
-
89
- ```bash
90
- # [Required for SERP & Web Unlocker]
91
- export THORDATA_SCRAPER_TOKEN="your_token_here"
92
-
93
- # [Required for Proxy Network]
94
- export THORDATA_RESIDENTIAL_USERNAME="your_username"
95
- export THORDATA_RESIDENTIAL_PASSWORD="your_password"
96
- export THORDATA_PROXY_HOST="vpnXXXX.pr.thordata.net"
97
-
98
- # [Required for Task Management]
99
- export THORDATA_PUBLIC_TOKEN="public_token"
100
- export THORDATA_PUBLIC_KEY="public_key"
101
- ```
102
-
103
- ---
104
-
105
- ## 🚀 Quick Start
106
-
107
- ### 1. SERP Search (Google/Bing/Yandex)
108
-
109
- ```python
110
- from thordata import ThordataClient, Engine
111
-
112
- client = ThordataClient() # Loads THORDATA_SCRAPER_TOKEN from env
113
-
114
- # Simple Search
115
- print("Searching...")
116
- results = client.serp_search("latest AI trends", engine=Engine.GOOGLE_NEWS)
117
-
118
- for news in results.get("news_results", [])[:3]:
119
- print(f"- {news['title']} ({news['source']})")
120
- ```
121
-
122
- ### 2. Universal Scrape (Web Unlocker)
123
-
124
- Bypass Cloudflare/Akamai and render JavaScript automatically.
125
-
126
- ```python
127
- html = client.universal_scrape(
128
- url="https://example.com/protected-page",
129
- js_render=True,
130
- wait_for=".content-loaded",
131
- country="us"
132
- )
133
- print(f"Scraped {len(html)} bytes")
134
- ```
135
-
136
- ### 3. High-Performance Proxy
137
-
138
- Use Thordata's residential IPs with automatic connection pooling.
139
-
140
- ```python
141
- from thordata import ProxyConfig, ProxyProduct
142
-
143
- # Config is optional if env vars are set, but allows granular control
144
- proxy = ProxyConfig(
145
- product=ProxyProduct.RESIDENTIAL,
146
- country="jp",
147
- city="tokyo",
148
- session_id="session-001",
149
- session_duration=10 # Sticky IP for 10 mins
150
- )
151
-
152
- # Use the client to make requests (Reuses TCP connections)
153
- response = client.get("https://httpbin.org/ip", proxy_config=proxy)
154
- print(response.json())
155
- ```
156
-
157
- ---
158
-
159
- ## ⚙️ Advanced Usage
160
-
161
- ### Async Client (High Concurrency)
162
-
163
- For building AI agents or high-throughput spiders.
164
-
165
- ```python
166
- import asyncio
167
- from thordata import AsyncThordataClient
168
-
169
- async def main():
170
- async with AsyncThordataClient() as client:
171
- # Fire off multiple requests in parallel
172
- tasks = [
173
- client.serp_search(f"query {i}")
174
- for i in range(5)
175
- ]
176
- results = await asyncio.gather(*tasks)
177
- print(f"Completed {len(results)} searches")
178
-
179
- asyncio.run(main())
180
- ```
181
-
182
- ### Web Scraper API (Task Management)
183
-
184
- Create and manage large-scale scraping tasks asynchronously.
185
-
186
- ```python
187
- # 1. Create a task
188
- task_id = client.create_scraper_task(
189
- file_name="daily_scrape",
190
- spider_id="universal",
191
- spider_name="universal",
192
- parameters={"url": "https://example.com"}
193
- )
194
-
195
- # 2. Wait for completion (Polling)
196
- status = client.wait_for_task(task_id)
197
-
198
- # 3. Get results
199
- if status == "ready":
200
- url = client.get_task_result(task_id)
201
- print(f"Download Data: {url}")
202
- ```
203
-
204
- ---
205
-
206
- ## 📄 License
207
-
208
- MIT License. See [LICENSE](LICENSE) for details.
@@ -1,18 +0,0 @@
1
- thordata/__init__.py,sha256=WVs3pZALETqLZDoAWyUoYNEjTAtC9FPvBxJEvDHnu04,3195
2
- thordata/_example_utils.py,sha256=T9QtVq9BHhubOShgtGp2GSusYYd-ZFUJFJAw7ubIsa4,2199
3
- thordata/_utils.py,sha256=Acr_6sHgdZXU7SQozd6FEYTZV6iHw__nlhpBTDwb66U,4917
4
- thordata/async_client.py,sha256=U6VgjsaX3-dzaIwEHtPborwYVrw-K1flTLg9LP9Xozk,80378
5
- thordata/async_unlimited.py,sha256=kzTksFkN21rDM21Pwy3hcayjfyGYNGGyGR3fRLtZC6I,4510
6
- thordata/client.py,sha256=79XzN4VxyY9EgYSYi5FhicHzo7ASGLb4hoEWig0nT7w,84500
7
- thordata/demo.py,sha256=DojJRFqUm9XAMBkjmk03WGeiUdLCbXguMIwtMOzfN6M,3822
8
- thordata/enums.py,sha256=MpZnS9_8sg2vtcFqM6UicB94cKZm5R1t83L3ejNSbLs,8502
9
- thordata/exceptions.py,sha256=P9czrxkFhT439DxW3LE5W-koS595ObH4-mAQOfaDM18,9976
10
- thordata/models.py,sha256=qtB7jE0v5zNEQfSpmOqdiacB5DgM2QfVR2PaYs-DisM,38206
11
- thordata/retry.py,sha256=5kRwULl3X68Nx8PlSzr9benfyCL0nRSpVQXrwjWr45M,11456
12
- thordata/serp_engines.py,sha256=iuMWncelcGOskCHXFzpcPMMTL5qfiLkazHB1uj3zpZo,5985
13
- thordata/unlimited.py,sha256=U0MEHeowNe6js8aezPFD8vGOft0VA8cpL-TUsLNyv2k,3855
14
- thordata_sdk-1.4.0.dist-info/licenses/LICENSE,sha256=bAxpWgQIzb-5jl3nhLdOwOJ_vlbHLtSG7yev2B7vioY,1088
15
- thordata_sdk-1.4.0.dist-info/METADATA,sha256=RJvTjs-N-ZaI7X9KfBI1ST5lcNTCeNpTZ92Cz7Ky9wk,6601
16
- thordata_sdk-1.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
17
- thordata_sdk-1.4.0.dist-info/top_level.txt,sha256=Z8R_07m0lXCCSb1hapL9_nxMtyO3rf_9wOvq4n9u2Hg,9
18
- thordata_sdk-1.4.0.dist-info/RECORD,,