thordata-sdk 0.2.3__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thordata_sdk-0.3.0/PKG-INFO +197 -0
- thordata_sdk-0.3.0/README.md +168 -0
- thordata_sdk-0.3.0/pyproject.toml +42 -0
- thordata_sdk-0.3.0/src/thordata/__init__.py +16 -0
- {thordata_sdk-0.2.3/thordata_sdk → thordata_sdk-0.3.0/src/thordata}/async_client.py +88 -41
- {thordata_sdk-0.2.3/thordata_sdk → thordata_sdk-0.3.0/src/thordata}/client.py +106 -51
- thordata_sdk-0.3.0/src/thordata/enums.py +25 -0
- thordata_sdk-0.3.0/src/thordata/parameters.py +52 -0
- thordata_sdk-0.3.0/src/thordata_sdk.egg-info/PKG-INFO +197 -0
- thordata_sdk-0.3.0/src/thordata_sdk.egg-info/SOURCES.txt +15 -0
- thordata_sdk-0.3.0/src/thordata_sdk.egg-info/top_level.txt +1 -0
- {thordata_sdk-0.2.3 → thordata_sdk-0.3.0}/tests/test_async_client.py +1 -1
- {thordata_sdk-0.2.3 → thordata_sdk-0.3.0}/tests/test_client.py +1 -1
- thordata_sdk-0.2.3/PKG-INFO +0 -125
- thordata_sdk-0.2.3/README.md +0 -86
- thordata_sdk-0.2.3/setup.py +0 -38
- thordata_sdk-0.2.3/thordata_sdk/__init__.py +0 -8
- thordata_sdk-0.2.3/thordata_sdk.egg-info/PKG-INFO +0 -125
- thordata_sdk-0.2.3/thordata_sdk.egg-info/SOURCES.txt +0 -13
- thordata_sdk-0.2.3/thordata_sdk.egg-info/top_level.txt +0 -1
- {thordata_sdk-0.2.3 → thordata_sdk-0.3.0}/LICENSE +0 -0
- {thordata_sdk-0.2.3 → thordata_sdk-0.3.0}/setup.cfg +0 -0
- {thordata_sdk-0.2.3 → thordata_sdk-0.3.0/src}/thordata_sdk.egg-info/dependency_links.txt +0 -0
- {thordata_sdk-0.2.3 → thordata_sdk-0.3.0/src}/thordata_sdk.egg-info/requires.txt +0 -0
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: thordata-sdk
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network.
|
|
5
|
+
Author-email: Thordata Developer Team <support@thordata.com>
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://www.thordata.com
|
|
8
|
+
Project-URL: Documentation, https://github.com/Thordata/thordata-python-sdk#readme
|
|
9
|
+
Project-URL: Source, https://github.com/Thordata/thordata-python-sdk
|
|
10
|
+
Project-URL: Tracker, https://github.com/Thordata/thordata-python-sdk/issues
|
|
11
|
+
Keywords: web scraping,proxy,ai,llm,data-mining,serp,thordata
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
15
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
22
|
+
Classifier: Operating System :: OS Independent
|
|
23
|
+
Requires-Python: >=3.8
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
License-File: LICENSE
|
|
26
|
+
Requires-Dist: requests>=2.25.0
|
|
27
|
+
Requires-Dist: aiohttp>=3.8.0
|
|
28
|
+
Dynamic: license-file
|
|
29
|
+
|
|
30
|
+
# Thordata Python SDK
|
|
31
|
+
|
|
32
|
+
<h4 align="center">
|
|
33
|
+
Official Python client for Thordata's Proxy Network, SERP API, Universal Scraping API, and Web Scraper API.
|
|
34
|
+
<br>
|
|
35
|
+
<i>Async-ready, built for AI agents and large-scale data collection.</i>
|
|
36
|
+
</h4>
|
|
37
|
+
|
|
38
|
+
<p align="center">
|
|
39
|
+
<a href="https://pypi.org/project/thordata-sdk/">
|
|
40
|
+
<img src="https://img.shields.io/pypi/v/thordata-sdk?color=blue" alt="PyPI version">
|
|
41
|
+
</a>
|
|
42
|
+
<a href="https://github.com/Thordata/thordata-python-sdk/blob/main/LICENSE">
|
|
43
|
+
<img src="https://img.shields.io/badge/license-Apache%202.0-green" alt="License">
|
|
44
|
+
</a>
|
|
45
|
+
<a href="https://python.org">
|
|
46
|
+
<img src="https://img.shields.io/badge/python-3.8+-blue" alt="Python Versions">
|
|
47
|
+
</a>
|
|
48
|
+
</p>
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
## Installation
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
pip install thordata-sdk
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Quick Start
|
|
59
|
+
|
|
60
|
+
All examples below use the unified client:
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
from thordata import ThordataClient, AsyncThordataClient
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
You can copy `examples/.env.example` to `.env` and fill in your tokens from the Thordata Dashboard.
|
|
67
|
+
|
|
68
|
+
### 1. Proxy Network (Simple GET)
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
import os
|
|
72
|
+
from dotenv import load_dotenv
|
|
73
|
+
from thordata import ThordataClient
|
|
74
|
+
|
|
75
|
+
load_dotenv()
|
|
76
|
+
|
|
77
|
+
client = ThordataClient(
|
|
78
|
+
scraper_token=os.getenv("THORDATA_SCRAPER_TOKEN"),
|
|
79
|
+
public_token=os.getenv("THORDATA_PUBLIC_TOKEN"),
|
|
80
|
+
public_key=os.getenv("THORDATA_PUBLIC_KEY"),
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
resp = client.get("http://httpbin.org/ip")
|
|
84
|
+
print(resp.json())
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### 2. SERP API (Google, Bing, Yandex, DuckDuckGo)
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
from thordata import ThordataClient, Engine
|
|
91
|
+
|
|
92
|
+
client = ThordataClient("SCRAPER_TOKEN", "PUBLIC_TOKEN", "PUBLIC_KEY")
|
|
93
|
+
|
|
94
|
+
results = client.serp_search(
|
|
95
|
+
query="Thordata technology",
|
|
96
|
+
engine=Engine.GOOGLE,
|
|
97
|
+
num=10,
|
|
98
|
+
# Any engine-specific parameters are passed via **kwargs
|
|
99
|
+
# e.g. type="shopping", location="United States"
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
print(len(results.get("organic", [])))
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### 3. Universal Scraping API
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
from thordata import ThordataClient
|
|
109
|
+
|
|
110
|
+
client = ThordataClient("SCRAPER_TOKEN", "PUBLIC_TOKEN", "PUBLIC_KEY")
|
|
111
|
+
|
|
112
|
+
html = client.universal_scrape(
|
|
113
|
+
url="https://www.google.com",
|
|
114
|
+
js_render=True,
|
|
115
|
+
output_format="HTML",
|
|
116
|
+
)
|
|
117
|
+
print(html[:200])
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### 4. Web Scraper API (Task-based)
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
import time
|
|
124
|
+
from thordata import ThordataClient
|
|
125
|
+
|
|
126
|
+
client = ThordataClient("SCRAPER_TOKEN", "PUBLIC_TOKEN", "PUBLIC_KEY")
|
|
127
|
+
|
|
128
|
+
task_id = client.create_scraper_task(
|
|
129
|
+
file_name="demo_youtube_data",
|
|
130
|
+
spider_id="youtube_video-post_by-url",
|
|
131
|
+
spider_name="youtube.com",
|
|
132
|
+
individual_params={
|
|
133
|
+
"url": "https://www.youtube.com/@stephcurry/videos",
|
|
134
|
+
"order_by": "",
|
|
135
|
+
"num_of_posts": ""
|
|
136
|
+
},
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
for _ in range(10):
|
|
140
|
+
status = client.get_task_status(task_id)
|
|
141
|
+
print("Status:", status)
|
|
142
|
+
if status in ["Ready", "Success"]:
|
|
143
|
+
break
|
|
144
|
+
if status == "Failed":
|
|
145
|
+
raise RuntimeError("Task failed")
|
|
146
|
+
time.sleep(3)
|
|
147
|
+
|
|
148
|
+
download_url = client.get_task_result(task_id)
|
|
149
|
+
print("Download URL:", download_url)
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
### 5. Asynchronous Usage (High Concurrency)
|
|
153
|
+
|
|
154
|
+
```python
|
|
155
|
+
import asyncio
|
|
156
|
+
from thordata import AsyncThordataClient
|
|
157
|
+
|
|
158
|
+
async def main():
|
|
159
|
+
async with AsyncThordataClient(
|
|
160
|
+
scraper_token="SCRAPER_TOKEN",
|
|
161
|
+
public_token="PUBLIC_TOKEN",
|
|
162
|
+
public_key="PUBLIC_KEY",
|
|
163
|
+
) as client:
|
|
164
|
+
resp = await client.get("http://httpbin.org/ip")
|
|
165
|
+
print(await resp.json())
|
|
166
|
+
|
|
167
|
+
asyncio.run(main())
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
More examples are available in the `examples/` directory.
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
## Features
|
|
175
|
+
|
|
176
|
+
| Feature | Status | Description |
|
|
177
|
+
|---------|--------|-------------|
|
|
178
|
+
| Proxy Network | Stable | Residential, ISP, Mobile, Datacenter via HTTP/HTTPS gateway. |
|
|
179
|
+
| SERP API | Stable | Google / Bing / Yandex / DuckDuckGo, flexible parameters. |
|
|
180
|
+
| Universal Scraping API | Stable | JS rendering, HTML / PNG output, antibot bypass. |
|
|
181
|
+
| Web Scraper API | Stable | Task-based scraping for complex sites (YouTube, E-commerce). |
|
|
182
|
+
| Async Client | Stable | aiohttp-based client for high-concurrency workloads. |
|
|
183
|
+
|
|
184
|
+
---
|
|
185
|
+
|
|
186
|
+
## Development & Contributing
|
|
187
|
+
|
|
188
|
+
See `CONTRIBUTING.md` for local development and contribution guidelines.
|
|
189
|
+
|
|
190
|
+
## License
|
|
191
|
+
|
|
192
|
+
This project is licensed under the Apache License 2.0.
|
|
193
|
+
|
|
194
|
+
## Support
|
|
195
|
+
|
|
196
|
+
For technical support, please contact support@thordata.com
|
|
197
|
+
or verify your tokens and quotas in the Thordata Dashboard.
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
# Thordata Python SDK
|
|
2
|
+
|
|
3
|
+
<h4 align="center">
|
|
4
|
+
Official Python client for Thordata's Proxy Network, SERP API, Universal Scraping API, and Web Scraper API.
|
|
5
|
+
<br>
|
|
6
|
+
<i>Async-ready, built for AI agents and large-scale data collection.</i>
|
|
7
|
+
</h4>
|
|
8
|
+
|
|
9
|
+
<p align="center">
|
|
10
|
+
<a href="https://pypi.org/project/thordata-sdk/">
|
|
11
|
+
<img src="https://img.shields.io/pypi/v/thordata-sdk?color=blue" alt="PyPI version">
|
|
12
|
+
</a>
|
|
13
|
+
<a href="https://github.com/Thordata/thordata-python-sdk/blob/main/LICENSE">
|
|
14
|
+
<img src="https://img.shields.io/badge/license-Apache%202.0-green" alt="License">
|
|
15
|
+
</a>
|
|
16
|
+
<a href="https://python.org">
|
|
17
|
+
<img src="https://img.shields.io/badge/python-3.8+-blue" alt="Python Versions">
|
|
18
|
+
</a>
|
|
19
|
+
</p>
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## Installation
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
pip install thordata-sdk
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## Quick Start
|
|
30
|
+
|
|
31
|
+
All examples below use the unified client:
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
from thordata import ThordataClient, AsyncThordataClient
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
You can copy `examples/.env.example` to `.env` and fill in your tokens from the Thordata Dashboard.
|
|
38
|
+
|
|
39
|
+
### 1. Proxy Network (Simple GET)
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
import os
|
|
43
|
+
from dotenv import load_dotenv
|
|
44
|
+
from thordata import ThordataClient
|
|
45
|
+
|
|
46
|
+
load_dotenv()
|
|
47
|
+
|
|
48
|
+
client = ThordataClient(
|
|
49
|
+
scraper_token=os.getenv("THORDATA_SCRAPER_TOKEN"),
|
|
50
|
+
public_token=os.getenv("THORDATA_PUBLIC_TOKEN"),
|
|
51
|
+
public_key=os.getenv("THORDATA_PUBLIC_KEY"),
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
resp = client.get("http://httpbin.org/ip")
|
|
55
|
+
print(resp.json())
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### 2. SERP API (Google, Bing, Yandex, DuckDuckGo)
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
from thordata import ThordataClient, Engine
|
|
62
|
+
|
|
63
|
+
client = ThordataClient("SCRAPER_TOKEN", "PUBLIC_TOKEN", "PUBLIC_KEY")
|
|
64
|
+
|
|
65
|
+
results = client.serp_search(
|
|
66
|
+
query="Thordata technology",
|
|
67
|
+
engine=Engine.GOOGLE,
|
|
68
|
+
num=10,
|
|
69
|
+
# Any engine-specific parameters are passed via **kwargs
|
|
70
|
+
# e.g. type="shopping", location="United States"
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
print(len(results.get("organic", [])))
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### 3. Universal Scraping API
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
from thordata import ThordataClient
|
|
80
|
+
|
|
81
|
+
client = ThordataClient("SCRAPER_TOKEN", "PUBLIC_TOKEN", "PUBLIC_KEY")
|
|
82
|
+
|
|
83
|
+
html = client.universal_scrape(
|
|
84
|
+
url="https://www.google.com",
|
|
85
|
+
js_render=True,
|
|
86
|
+
output_format="HTML",
|
|
87
|
+
)
|
|
88
|
+
print(html[:200])
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### 4. Web Scraper API (Task-based)
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
import time
|
|
95
|
+
from thordata import ThordataClient
|
|
96
|
+
|
|
97
|
+
client = ThordataClient("SCRAPER_TOKEN", "PUBLIC_TOKEN", "PUBLIC_KEY")
|
|
98
|
+
|
|
99
|
+
task_id = client.create_scraper_task(
|
|
100
|
+
file_name="demo_youtube_data",
|
|
101
|
+
spider_id="youtube_video-post_by-url",
|
|
102
|
+
spider_name="youtube.com",
|
|
103
|
+
individual_params={
|
|
104
|
+
"url": "https://www.youtube.com/@stephcurry/videos",
|
|
105
|
+
"order_by": "",
|
|
106
|
+
"num_of_posts": ""
|
|
107
|
+
},
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
for _ in range(10):
|
|
111
|
+
status = client.get_task_status(task_id)
|
|
112
|
+
print("Status:", status)
|
|
113
|
+
if status in ["Ready", "Success"]:
|
|
114
|
+
break
|
|
115
|
+
if status == "Failed":
|
|
116
|
+
raise RuntimeError("Task failed")
|
|
117
|
+
time.sleep(3)
|
|
118
|
+
|
|
119
|
+
download_url = client.get_task_result(task_id)
|
|
120
|
+
print("Download URL:", download_url)
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### 5. Asynchronous Usage (High Concurrency)
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
import asyncio
|
|
127
|
+
from thordata import AsyncThordataClient
|
|
128
|
+
|
|
129
|
+
async def main():
|
|
130
|
+
async with AsyncThordataClient(
|
|
131
|
+
scraper_token="SCRAPER_TOKEN",
|
|
132
|
+
public_token="PUBLIC_TOKEN",
|
|
133
|
+
public_key="PUBLIC_KEY",
|
|
134
|
+
) as client:
|
|
135
|
+
resp = await client.get("http://httpbin.org/ip")
|
|
136
|
+
print(await resp.json())
|
|
137
|
+
|
|
138
|
+
asyncio.run(main())
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
More examples are available in the `examples/` directory.
|
|
142
|
+
|
|
143
|
+
---
|
|
144
|
+
|
|
145
|
+
## Features
|
|
146
|
+
|
|
147
|
+
| Feature | Status | Description |
|
|
148
|
+
|---------|--------|-------------|
|
|
149
|
+
| Proxy Network | Stable | Residential, ISP, Mobile, Datacenter via HTTP/HTTPS gateway. |
|
|
150
|
+
| SERP API | Stable | Google / Bing / Yandex / DuckDuckGo, flexible parameters. |
|
|
151
|
+
| Universal Scraping API | Stable | JS rendering, HTML / PNG output, antibot bypass. |
|
|
152
|
+
| Web Scraper API | Stable | Task-based scraping for complex sites (YouTube, E-commerce). |
|
|
153
|
+
| Async Client | Stable | aiohttp-based client for high-concurrency workloads. |
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
## Development & Contributing
|
|
158
|
+
|
|
159
|
+
See `CONTRIBUTING.md` for local development and contribution guidelines.
|
|
160
|
+
|
|
161
|
+
## License
|
|
162
|
+
|
|
163
|
+
This project is licensed under the Apache License 2.0.
|
|
164
|
+
|
|
165
|
+
## Support
|
|
166
|
+
|
|
167
|
+
For technical support, please contact support@thordata.com
|
|
168
|
+
or verify your tokens and quotas in the Thordata Dashboard.
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "thordata-sdk"
|
|
7
|
+
version = "0.3.0" # Major structure change = Minor version bump
|
|
8
|
+
description = "The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.8"
|
|
11
|
+
license = {text = "Apache-2.0"}
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "Thordata Developer Team", email = "support@thordata.com"}
|
|
14
|
+
]
|
|
15
|
+
keywords = ["web scraping", "proxy", "ai", "llm", "data-mining", "serp", "thordata"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 4 - Beta",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
20
|
+
"Topic :: Internet :: WWW/HTTP",
|
|
21
|
+
"Programming Language :: Python :: 3",
|
|
22
|
+
"Programming Language :: Python :: 3.8",
|
|
23
|
+
"Programming Language :: Python :: 3.9",
|
|
24
|
+
"Programming Language :: Python :: 3.10",
|
|
25
|
+
"Programming Language :: Python :: 3.11",
|
|
26
|
+
"License :: OSI Approved :: Apache Software License",
|
|
27
|
+
"Operating System :: OS Independent",
|
|
28
|
+
]
|
|
29
|
+
dependencies = [
|
|
30
|
+
"requests>=2.25.0",
|
|
31
|
+
"aiohttp>=3.8.0", # For the async client
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
[project.urls]
|
|
35
|
+
"Homepage" = "https://www.thordata.com"
|
|
36
|
+
"Documentation" = "https://github.com/Thordata/thordata-python-sdk#readme"
|
|
37
|
+
"Source" = "https://github.com/Thordata/thordata-python-sdk"
|
|
38
|
+
"Tracker" = "https://github.com/Thordata/thordata-python-sdk/issues"
|
|
39
|
+
|
|
40
|
+
# 关键配置:告诉 setuptools 源码在 src 目录下
|
|
41
|
+
[tool.setuptools.packages.find]
|
|
42
|
+
where = ["src"]
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# src/thordata/__init__.py
|
|
2
|
+
|
|
3
|
+
from .client import ThordataClient
|
|
4
|
+
from .async_client import AsyncThordataClient
|
|
5
|
+
from .enums import Engine, GoogleSearchType
|
|
6
|
+
|
|
7
|
+
# Package version
|
|
8
|
+
__version__ = "0.3.0"
|
|
9
|
+
|
|
10
|
+
# Explicitly export classes to simplify user imports
|
|
11
|
+
__all__ = [
|
|
12
|
+
"ThordataClient",
|
|
13
|
+
"AsyncThordataClient",
|
|
14
|
+
"Engine",
|
|
15
|
+
"GoogleSearchType"
|
|
16
|
+
]
|