thordata-sdk 0.2.3__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,197 @@
1
+ Metadata-Version: 2.4
2
+ Name: thordata-sdk
3
+ Version: 0.3.0
4
+ Summary: The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network.
5
+ Author-email: Thordata Developer Team <support@thordata.com>
6
+ License: Apache-2.0
7
+ Project-URL: Homepage, https://www.thordata.com
8
+ Project-URL: Documentation, https://github.com/Thordata/thordata-python-sdk#readme
9
+ Project-URL: Source, https://github.com/Thordata/thordata-python-sdk
10
+ Project-URL: Tracker, https://github.com/Thordata/thordata-python-sdk/issues
11
+ Keywords: web scraping,proxy,ai,llm,data-mining,serp,thordata
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
15
+ Classifier: Topic :: Internet :: WWW/HTTP
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.8
18
+ Classifier: Programming Language :: Python :: 3.9
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: License :: OSI Approved :: Apache Software License
22
+ Classifier: Operating System :: OS Independent
23
+ Requires-Python: >=3.8
24
+ Description-Content-Type: text/markdown
25
+ License-File: LICENSE
26
+ Requires-Dist: requests>=2.25.0
27
+ Requires-Dist: aiohttp>=3.8.0
28
+ Dynamic: license-file
29
+
30
+ # Thordata Python SDK
31
+
32
+ <h4 align="center">
33
+ Official Python client for Thordata's Proxy Network, SERP API, Universal Scraping API, and Web Scraper API.
34
+ <br>
35
+ <i>Async-ready, built for AI agents and large-scale data collection.</i>
36
+ </h4>
37
+
38
+ <p align="center">
39
+ <a href="https://pypi.org/project/thordata-sdk/">
40
+ <img src="https://img.shields.io/pypi/v/thordata-sdk?color=blue" alt="PyPI version">
41
+ </a>
42
+ <a href="https://github.com/Thordata/thordata-python-sdk/blob/main/LICENSE">
43
+ <img src="https://img.shields.io/badge/license-Apache%202.0-green" alt="License">
44
+ </a>
45
+ <a href="https://python.org">
46
+ <img src="https://img.shields.io/badge/python-3.8+-blue" alt="Python Versions">
47
+ </a>
48
+ </p>
49
+
50
+ ---
51
+
52
+ ## Installation
53
+
54
+ ```bash
55
+ pip install thordata-sdk
56
+ ```
57
+
58
+ ## Quick Start
59
+
60
+ All examples below use the unified client:
61
+
62
+ ```python
63
+ from thordata import ThordataClient, AsyncThordataClient
64
+ ```
65
+
66
+ You can copy `examples/.env.example` to `.env` and fill in your tokens from the Thordata Dashboard.
67
+
68
+ ### 1. Proxy Network (Simple GET)
69
+
70
+ ```python
71
+ import os
72
+ from dotenv import load_dotenv
73
+ from thordata import ThordataClient
74
+
75
+ load_dotenv()
76
+
77
+ client = ThordataClient(
78
+ scraper_token=os.getenv("THORDATA_SCRAPER_TOKEN"),
79
+ public_token=os.getenv("THORDATA_PUBLIC_TOKEN"),
80
+ public_key=os.getenv("THORDATA_PUBLIC_KEY"),
81
+ )
82
+
83
+ resp = client.get("http://httpbin.org/ip")
84
+ print(resp.json())
85
+ ```
86
+
87
+ ### 2. SERP API (Google, Bing, Yandex, DuckDuckGo)
88
+
89
+ ```python
90
+ from thordata import ThordataClient, Engine
91
+
92
+ client = ThordataClient("SCRAPER_TOKEN", "PUBLIC_TOKEN", "PUBLIC_KEY")
93
+
94
+ results = client.serp_search(
95
+ query="Thordata technology",
96
+ engine=Engine.GOOGLE,
97
+ num=10,
98
+ # Any engine-specific parameters are passed via **kwargs
99
+ # e.g. type="shopping", location="United States"
100
+ )
101
+
102
+ print(len(results.get("organic", [])))
103
+ ```
104
+
105
+ ### 3. Universal Scraping API
106
+
107
+ ```python
108
+ from thordata import ThordataClient
109
+
110
+ client = ThordataClient("SCRAPER_TOKEN", "PUBLIC_TOKEN", "PUBLIC_KEY")
111
+
112
+ html = client.universal_scrape(
113
+ url="https://www.google.com",
114
+ js_render=True,
115
+ output_format="HTML",
116
+ )
117
+ print(html[:200])
118
+ ```
119
+
120
+ ### 4. Web Scraper API (Task-based)
121
+
122
+ ```python
123
+ import time
124
+ from thordata import ThordataClient
125
+
126
+ client = ThordataClient("SCRAPER_TOKEN", "PUBLIC_TOKEN", "PUBLIC_KEY")
127
+
128
+ task_id = client.create_scraper_task(
129
+ file_name="demo_youtube_data",
130
+ spider_id="youtube_video-post_by-url",
131
+ spider_name="youtube.com",
132
+ individual_params={
133
+ "url": "https://www.youtube.com/@stephcurry/videos",
134
+ "order_by": "",
135
+ "num_of_posts": ""
136
+ },
137
+ )
138
+
139
+ for _ in range(10):
140
+ status = client.get_task_status(task_id)
141
+ print("Status:", status)
142
+ if status in ["Ready", "Success"]:
143
+ break
144
+ if status == "Failed":
145
+ raise RuntimeError("Task failed")
146
+ time.sleep(3)
147
+
148
+ download_url = client.get_task_result(task_id)
149
+ print("Download URL:", download_url)
150
+ ```
151
+
152
+ ### 5. Asynchronous Usage (High Concurrency)
153
+
154
+ ```python
155
+ import asyncio
156
+ from thordata import AsyncThordataClient
157
+
158
+ async def main():
159
+ async with AsyncThordataClient(
160
+ scraper_token="SCRAPER_TOKEN",
161
+ public_token="PUBLIC_TOKEN",
162
+ public_key="PUBLIC_KEY",
163
+ ) as client:
164
+ resp = await client.get("http://httpbin.org/ip")
165
+ print(await resp.json())
166
+
167
+ asyncio.run(main())
168
+ ```
169
+
170
+ More examples are available in the `examples/` directory.
171
+
172
+ ---
173
+
174
+ ## Features
175
+
176
+ | Feature | Status | Description |
177
+ |---------|--------|-------------|
178
+ | Proxy Network | Stable | Residential, ISP, Mobile, Datacenter via HTTP/HTTPS gateway. |
179
+ | SERP API | Stable | Google / Bing / Yandex / DuckDuckGo, flexible parameters. |
180
+ | Universal Scraping API | Stable | JS rendering, HTML / PNG output, antibot bypass. |
181
+ | Web Scraper API | Stable | Task-based scraping for complex sites (YouTube, E-commerce). |
182
+ | Async Client | Stable | aiohttp-based client for high-concurrency workloads. |
183
+
184
+ ---
185
+
186
+ ## Development & Contributing
187
+
188
+ See `CONTRIBUTING.md` for local development and contribution guidelines.
189
+
190
+ ## License
191
+
192
+ This project is licensed under the Apache License 2.0.
193
+
194
+ ## Support
195
+
196
+ For technical support, please contact support@thordata.com
197
+ or verify your tokens and quotas in the Thordata Dashboard.
@@ -0,0 +1,168 @@
1
+ # Thordata Python SDK
2
+
3
+ <h4 align="center">
4
+ Official Python client for Thordata's Proxy Network, SERP API, Universal Scraping API, and Web Scraper API.
5
+ <br>
6
+ <i>Async-ready, built for AI agents and large-scale data collection.</i>
7
+ </h4>
8
+
9
+ <p align="center">
10
+ <a href="https://pypi.org/project/thordata-sdk/">
11
+ <img src="https://img.shields.io/pypi/v/thordata-sdk?color=blue" alt="PyPI version">
12
+ </a>
13
+ <a href="https://github.com/Thordata/thordata-python-sdk/blob/main/LICENSE">
14
+ <img src="https://img.shields.io/badge/license-Apache%202.0-green" alt="License">
15
+ </a>
16
+ <a href="https://python.org">
17
+ <img src="https://img.shields.io/badge/python-3.8+-blue" alt="Python Versions">
18
+ </a>
19
+ </p>
20
+
21
+ ---
22
+
23
+ ## Installation
24
+
25
+ ```bash
26
+ pip install thordata-sdk
27
+ ```
28
+
29
+ ## Quick Start
30
+
31
+ All examples below use the unified client:
32
+
33
+ ```python
34
+ from thordata import ThordataClient, AsyncThordataClient
35
+ ```
36
+
37
+ You can copy `examples/.env.example` to `.env` and fill in your tokens from the Thordata Dashboard.
38
+
39
+ ### 1. Proxy Network (Simple GET)
40
+
41
+ ```python
42
+ import os
43
+ from dotenv import load_dotenv
44
+ from thordata import ThordataClient
45
+
46
+ load_dotenv()
47
+
48
+ client = ThordataClient(
49
+ scraper_token=os.getenv("THORDATA_SCRAPER_TOKEN"),
50
+ public_token=os.getenv("THORDATA_PUBLIC_TOKEN"),
51
+ public_key=os.getenv("THORDATA_PUBLIC_KEY"),
52
+ )
53
+
54
+ resp = client.get("http://httpbin.org/ip")
55
+ print(resp.json())
56
+ ```
57
+
58
+ ### 2. SERP API (Google, Bing, Yandex, DuckDuckGo)
59
+
60
+ ```python
61
+ from thordata import ThordataClient, Engine
62
+
63
+ client = ThordataClient("SCRAPER_TOKEN", "PUBLIC_TOKEN", "PUBLIC_KEY")
64
+
65
+ results = client.serp_search(
66
+ query="Thordata technology",
67
+ engine=Engine.GOOGLE,
68
+ num=10,
69
+ # Any engine-specific parameters are passed via **kwargs
70
+ # e.g. type="shopping", location="United States"
71
+ )
72
+
73
+ print(len(results.get("organic", [])))
74
+ ```
75
+
76
+ ### 3. Universal Scraping API
77
+
78
+ ```python
79
+ from thordata import ThordataClient
80
+
81
+ client = ThordataClient("SCRAPER_TOKEN", "PUBLIC_TOKEN", "PUBLIC_KEY")
82
+
83
+ html = client.universal_scrape(
84
+ url="https://www.google.com",
85
+ js_render=True,
86
+ output_format="HTML",
87
+ )
88
+ print(html[:200])
89
+ ```
90
+
91
+ ### 4. Web Scraper API (Task-based)
92
+
93
+ ```python
94
+ import time
95
+ from thordata import ThordataClient
96
+
97
+ client = ThordataClient("SCRAPER_TOKEN", "PUBLIC_TOKEN", "PUBLIC_KEY")
98
+
99
+ task_id = client.create_scraper_task(
100
+ file_name="demo_youtube_data",
101
+ spider_id="youtube_video-post_by-url",
102
+ spider_name="youtube.com",
103
+ individual_params={
104
+ "url": "https://www.youtube.com/@stephcurry/videos",
105
+ "order_by": "",
106
+ "num_of_posts": ""
107
+ },
108
+ )
109
+
110
+ for _ in range(10):
111
+ status = client.get_task_status(task_id)
112
+ print("Status:", status)
113
+ if status in ["Ready", "Success"]:
114
+ break
115
+ if status == "Failed":
116
+ raise RuntimeError("Task failed")
117
+ time.sleep(3)
118
+
119
+ download_url = client.get_task_result(task_id)
120
+ print("Download URL:", download_url)
121
+ ```
122
+
123
+ ### 5. Asynchronous Usage (High Concurrency)
124
+
125
+ ```python
126
+ import asyncio
127
+ from thordata import AsyncThordataClient
128
+
129
+ async def main():
130
+ async with AsyncThordataClient(
131
+ scraper_token="SCRAPER_TOKEN",
132
+ public_token="PUBLIC_TOKEN",
133
+ public_key="PUBLIC_KEY",
134
+ ) as client:
135
+ resp = await client.get("http://httpbin.org/ip")
136
+ print(await resp.json())
137
+
138
+ asyncio.run(main())
139
+ ```
140
+
141
+ More examples are available in the `examples/` directory.
142
+
143
+ ---
144
+
145
+ ## Features
146
+
147
+ | Feature | Status | Description |
148
+ |---------|--------|-------------|
149
+ | Proxy Network | Stable | Residential, ISP, Mobile, Datacenter via HTTP/HTTPS gateway. |
150
+ | SERP API | Stable | Google / Bing / Yandex / DuckDuckGo, flexible parameters. |
151
+ | Universal Scraping API | Stable | JS rendering, HTML / PNG output, antibot bypass. |
152
+ | Web Scraper API | Stable | Task-based scraping for complex sites (YouTube, E-commerce). |
153
+ | Async Client | Stable | aiohttp-based client for high-concurrency workloads. |
154
+
155
+ ---
156
+
157
+ ## Development & Contributing
158
+
159
+ See `CONTRIBUTING.md` for local development and contribution guidelines.
160
+
161
+ ## License
162
+
163
+ This project is licensed under the Apache License 2.0.
164
+
165
+ ## Support
166
+
167
+ For technical support, please contact support@thordata.com
168
+ or verify your tokens and quotas in the Thordata Dashboard.
@@ -0,0 +1,42 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "thordata-sdk"
7
+ version = "0.3.0" # Major structure change = Minor version bump
8
+ description = "The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network."
9
+ readme = "README.md"
10
+ requires-python = ">=3.8"
11
+ license = {text = "Apache-2.0"}
12
+ authors = [
13
+ {name = "Thordata Developer Team", email = "support@thordata.com"}
14
+ ]
15
+ keywords = ["web scraping", "proxy", "ai", "llm", "data-mining", "serp", "thordata"]
16
+ classifiers = [
17
+ "Development Status :: 4 - Beta",
18
+ "Intended Audience :: Developers",
19
+ "Topic :: Software Development :: Libraries :: Python Modules",
20
+ "Topic :: Internet :: WWW/HTTP",
21
+ "Programming Language :: Python :: 3",
22
+ "Programming Language :: Python :: 3.8",
23
+ "Programming Language :: Python :: 3.9",
24
+ "Programming Language :: Python :: 3.10",
25
+ "Programming Language :: Python :: 3.11",
26
+ "License :: OSI Approved :: Apache Software License",
27
+ "Operating System :: OS Independent",
28
+ ]
29
+ dependencies = [
30
+ "requests>=2.25.0",
31
+ "aiohttp>=3.8.0", # For the async client
32
+ ]
33
+
34
+ [project.urls]
35
+ "Homepage" = "https://www.thordata.com"
36
+ "Documentation" = "https://github.com/Thordata/thordata-python-sdk#readme"
37
+ "Source" = "https://github.com/Thordata/thordata-python-sdk"
38
+ "Tracker" = "https://github.com/Thordata/thordata-python-sdk/issues"
39
+
40
+ # 关键配置:告诉 setuptools 源码在 src 目录下
41
+ [tool.setuptools.packages.find]
42
+ where = ["src"]
@@ -0,0 +1,16 @@
1
+ # src/thordata/__init__.py
2
+
3
+ from .client import ThordataClient
4
+ from .async_client import AsyncThordataClient
5
+ from .enums import Engine, GoogleSearchType
6
+
7
+ # Package version
8
+ __version__ = "0.3.0"
9
+
10
+ # Explicitly export classes to simplify user imports
11
+ __all__ = [
12
+ "ThordataClient",
13
+ "AsyncThordataClient",
14
+ "Engine",
15
+ "GoogleSearchType"
16
+ ]