thordata-sdk 1.3.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thordata/__init__.py +4 -40
- thordata/async_client.py +572 -1241
- thordata/async_unlimited.py +130 -0
- thordata/client.py +1184 -1309
- thordata/core/__init__.py +23 -0
- thordata/core/async_http_client.py +91 -0
- thordata/core/http_client.py +79 -0
- thordata/core/tunnel.py +287 -0
- thordata/demo.py +2 -2
- thordata/enums.py +41 -380
- thordata/models.py +37 -1193
- thordata/tools/__init__.py +28 -0
- thordata/tools/base.py +42 -0
- thordata/tools/code.py +26 -0
- thordata/tools/ecommerce.py +67 -0
- thordata/tools/search.py +73 -0
- thordata/tools/social.py +190 -0
- thordata/tools/video.py +81 -0
- thordata/types/__init__.py +77 -0
- thordata/types/common.py +141 -0
- thordata/types/proxy.py +340 -0
- thordata/types/serp.py +224 -0
- thordata/types/task.py +144 -0
- thordata/types/universal.py +66 -0
- thordata/unlimited.py +169 -0
- {thordata_sdk-1.3.0.dist-info → thordata_sdk-1.5.0.dist-info}/METADATA +74 -51
- thordata_sdk-1.5.0.dist-info/RECORD +35 -0
- {thordata_sdk-1.3.0.dist-info → thordata_sdk-1.5.0.dist-info}/WHEEL +1 -1
- thordata_sdk-1.3.0.dist-info/RECORD +0 -16
- {thordata_sdk-1.3.0.dist-info → thordata_sdk-1.5.0.dist-info}/licenses/LICENSE +0 -0
- {thordata_sdk-1.3.0.dist-info → thordata_sdk-1.5.0.dist-info}/top_level.txt +0 -0
thordata/types/task.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Web Scraper Task types.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from enum import Enum
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from .common import CommonSettings, ThordataBaseConfig
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class TaskStatus(str, Enum):
|
|
16
|
+
PENDING = "pending"
|
|
17
|
+
RUNNING = "running"
|
|
18
|
+
READY = "ready"
|
|
19
|
+
SUCCESS = "success"
|
|
20
|
+
FINISHED = "finished"
|
|
21
|
+
FAILED = "failed"
|
|
22
|
+
ERROR = "error"
|
|
23
|
+
CANCELLED = "cancelled"
|
|
24
|
+
UNKNOWN = "unknown"
|
|
25
|
+
|
|
26
|
+
@classmethod
|
|
27
|
+
def is_terminal(cls, status: TaskStatus) -> bool:
|
|
28
|
+
return status in {
|
|
29
|
+
cls.READY,
|
|
30
|
+
cls.SUCCESS,
|
|
31
|
+
cls.FINISHED,
|
|
32
|
+
cls.FAILED,
|
|
33
|
+
cls.ERROR,
|
|
34
|
+
cls.CANCELLED,
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
@classmethod
|
|
38
|
+
def is_success(cls, status: TaskStatus) -> bool:
|
|
39
|
+
return status in {cls.READY, cls.SUCCESS, cls.FINISHED}
|
|
40
|
+
|
|
41
|
+
@classmethod
|
|
42
|
+
def is_failure(cls, status: TaskStatus) -> bool:
|
|
43
|
+
return status in {cls.FAILED, cls.ERROR}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class DataFormat(str, Enum):
|
|
47
|
+
JSON = "json"
|
|
48
|
+
CSV = "csv"
|
|
49
|
+
XLSX = "xlsx"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass
|
|
53
|
+
class ScraperTaskConfig(ThordataBaseConfig):
|
|
54
|
+
file_name: str
|
|
55
|
+
spider_id: str
|
|
56
|
+
spider_name: str
|
|
57
|
+
parameters: dict[str, Any]
|
|
58
|
+
universal_params: dict[str, Any] | None = None
|
|
59
|
+
include_errors: bool = True
|
|
60
|
+
|
|
61
|
+
def to_payload(self) -> dict[str, Any]:
|
|
62
|
+
payload: dict[str, Any] = {
|
|
63
|
+
"file_name": self.file_name,
|
|
64
|
+
"spider_id": self.spider_id,
|
|
65
|
+
"spider_name": self.spider_name,
|
|
66
|
+
"spider_parameters": json.dumps([self.parameters]),
|
|
67
|
+
"spider_errors": "true" if self.include_errors else "false",
|
|
68
|
+
}
|
|
69
|
+
if self.universal_params:
|
|
70
|
+
payload["spider_universal"] = json.dumps(self.universal_params)
|
|
71
|
+
return payload
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@dataclass
|
|
75
|
+
class VideoTaskConfig(ThordataBaseConfig):
|
|
76
|
+
file_name: str
|
|
77
|
+
spider_id: str
|
|
78
|
+
spider_name: str
|
|
79
|
+
parameters: dict[str, Any]
|
|
80
|
+
common_settings: CommonSettings
|
|
81
|
+
include_errors: bool = True
|
|
82
|
+
|
|
83
|
+
def to_payload(self) -> dict[str, Any]:
|
|
84
|
+
payload: dict[str, Any] = {
|
|
85
|
+
"file_name": self.file_name,
|
|
86
|
+
"spider_id": self.spider_id,
|
|
87
|
+
"spider_name": self.spider_name,
|
|
88
|
+
"spider_parameters": json.dumps([self.parameters]),
|
|
89
|
+
"spider_errors": "true" if self.include_errors else "false",
|
|
90
|
+
# v2.0 Doc explicitly requires 'spider_universal' key for video tasks too sometimes,
|
|
91
|
+
# but usually it's passed as 'common_settings' or 'spider_universal'.
|
|
92
|
+
# Sticking to original models.py key logic for now to ensure stability.
|
|
93
|
+
"spider_universal": self.common_settings.to_json(),
|
|
94
|
+
}
|
|
95
|
+
# Note: If API expects 'common_settings' key specifically, adjust here.
|
|
96
|
+
# Based on v2 context, video builder often uses spider_universal.
|
|
97
|
+
return payload
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@dataclass
|
|
101
|
+
class TaskStatusResponse:
|
|
102
|
+
task_id: str
|
|
103
|
+
status: str
|
|
104
|
+
progress: int | None = None
|
|
105
|
+
message: str | None = None
|
|
106
|
+
|
|
107
|
+
def is_complete(self) -> bool:
|
|
108
|
+
terminal_statuses = {
|
|
109
|
+
"ready",
|
|
110
|
+
"success",
|
|
111
|
+
"finished",
|
|
112
|
+
"failed",
|
|
113
|
+
"error",
|
|
114
|
+
"cancelled",
|
|
115
|
+
}
|
|
116
|
+
return self.status.lower() in terminal_statuses
|
|
117
|
+
|
|
118
|
+
def is_success(self) -> bool:
|
|
119
|
+
return self.status.lower() in {"ready", "success", "finished"}
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
@dataclass
|
|
123
|
+
class UsageStatistics:
|
|
124
|
+
total_usage_traffic: float
|
|
125
|
+
traffic_balance: float
|
|
126
|
+
query_days: int
|
|
127
|
+
range_usage_traffic: float
|
|
128
|
+
data: list[dict[str, Any]]
|
|
129
|
+
|
|
130
|
+
@classmethod
|
|
131
|
+
def from_dict(cls, data: dict[str, Any]) -> UsageStatistics:
|
|
132
|
+
return cls(
|
|
133
|
+
total_usage_traffic=float(data.get("total_usage_traffic", 0)),
|
|
134
|
+
traffic_balance=float(data.get("traffic_balance", 0)),
|
|
135
|
+
query_days=int(data.get("query_days", 0)),
|
|
136
|
+
range_usage_traffic=float(data.get("range_usage_traffic", 0)),
|
|
137
|
+
data=data.get("data", []),
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
def total_usage_gb(self) -> float:
|
|
141
|
+
return self.total_usage_traffic / (1024 * 1024)
|
|
142
|
+
|
|
143
|
+
def balance_gb(self) -> float:
|
|
144
|
+
return self.traffic_balance / (1024 * 1024)
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Universal Scraping (Web Unlocker) types.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from .common import ThordataBaseConfig
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class UniversalScrapeRequest(ThordataBaseConfig):
|
|
16
|
+
url: str
|
|
17
|
+
js_render: bool = False
|
|
18
|
+
output_format: str = "html" # 'html' or 'png'
|
|
19
|
+
country: str | None = None
|
|
20
|
+
block_resources: str | None = None # 'script,image'
|
|
21
|
+
clean_content: str | None = None # 'js,css'
|
|
22
|
+
wait: int | None = None # ms
|
|
23
|
+
wait_for: str | None = None # selector
|
|
24
|
+
|
|
25
|
+
# Headers/Cookies must be serialized to JSON in payload
|
|
26
|
+
headers: list[dict[str, str]] | None = None
|
|
27
|
+
cookies: list[dict[str, str]] | None = None
|
|
28
|
+
|
|
29
|
+
extra_params: dict[str, Any] = field(default_factory=dict)
|
|
30
|
+
|
|
31
|
+
def __post_init__(self) -> None:
|
|
32
|
+
valid_formats = {"html", "png"}
|
|
33
|
+
if self.output_format.lower() not in valid_formats:
|
|
34
|
+
raise ValueError(
|
|
35
|
+
f"Invalid output_format: {self.output_format}. Must be one of: {valid_formats}"
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
if self.wait is not None and (self.wait < 0 or self.wait > 100000):
|
|
39
|
+
raise ValueError("wait must be between 0 and 100000 milliseconds")
|
|
40
|
+
|
|
41
|
+
def to_payload(self) -> dict[str, Any]:
|
|
42
|
+
payload: dict[str, Any] = {
|
|
43
|
+
"url": self.url,
|
|
44
|
+
"js_render": "True" if self.js_render else "False",
|
|
45
|
+
"type": self.output_format.lower(),
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
if self.country:
|
|
49
|
+
payload["country"] = self.country.lower()
|
|
50
|
+
if self.block_resources:
|
|
51
|
+
payload["block_resources"] = self.block_resources
|
|
52
|
+
if self.clean_content:
|
|
53
|
+
payload["clean_content"] = self.clean_content
|
|
54
|
+
if self.wait is not None:
|
|
55
|
+
payload["wait"] = str(self.wait)
|
|
56
|
+
if self.wait_for:
|
|
57
|
+
payload["wait_for"] = self.wait_for
|
|
58
|
+
|
|
59
|
+
# Serialize complex objects as JSON strings
|
|
60
|
+
if self.headers:
|
|
61
|
+
payload["headers"] = json.dumps(self.headers)
|
|
62
|
+
if self.cookies:
|
|
63
|
+
payload["cookies"] = json.dumps(self.cookies)
|
|
64
|
+
|
|
65
|
+
payload.update(self.extra_params)
|
|
66
|
+
return payload
|
thordata/unlimited.py
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Sync interface for Unlimited Residential Proxy management.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import TYPE_CHECKING, Any
|
|
8
|
+
|
|
9
|
+
from ._utils import build_public_api_headers
|
|
10
|
+
from .exceptions import raise_for_code
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from .client import ThordataClient
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class UnlimitedNamespace:
|
|
17
|
+
"""
|
|
18
|
+
Namespace for Unlimited Residential Proxy operations.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, client: ThordataClient):
|
|
22
|
+
self._client = client
|
|
23
|
+
# Base URL for unlimited endpoints (usually same as public API base)
|
|
24
|
+
# We can reuse _locations_base_url logic to find the API root
|
|
25
|
+
# locations_base: .../api/locations -> root: .../api
|
|
26
|
+
self._api_base = client._locations_base_url.replace("/locations", "")
|
|
27
|
+
|
|
28
|
+
def list_servers(self) -> list[dict[str, Any]]:
|
|
29
|
+
"""
|
|
30
|
+
Get the list of unlimited proxy servers.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
List of server objects.
|
|
34
|
+
"""
|
|
35
|
+
self._client._require_public_credentials()
|
|
36
|
+
params = {
|
|
37
|
+
"token": self._client.public_token,
|
|
38
|
+
"key": self._client.public_key,
|
|
39
|
+
}
|
|
40
|
+
response = self._client._api_request_with_retry(
|
|
41
|
+
"GET", f"{self._api_base}/unlimited/server-list", params=params
|
|
42
|
+
)
|
|
43
|
+
response.raise_for_status()
|
|
44
|
+
data = response.json()
|
|
45
|
+
if data.get("code") != 200:
|
|
46
|
+
raise_for_code("List servers failed", code=data.get("code"), payload=data)
|
|
47
|
+
|
|
48
|
+
# API returns { "data": [...] } OR { "data": { "list": [...] } } sometimes
|
|
49
|
+
# Assuming standard list return
|
|
50
|
+
return data.get("data") or []
|
|
51
|
+
|
|
52
|
+
def restart_server(self, plan_name: str) -> dict[str, Any]:
|
|
53
|
+
"""Restart an unlimited proxy server."""
|
|
54
|
+
return self._post_action("/unlimited/restart-server", {"plan_name": plan_name})
|
|
55
|
+
|
|
56
|
+
def renew(self, plan_name: str, month: int) -> dict[str, Any]:
|
|
57
|
+
"""Renew an unlimited plan."""
|
|
58
|
+
return self._post_action(
|
|
59
|
+
"/unlimited/renew", {"plan_name": plan_name, "month": str(month)}
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
def upgrade(self, plan_name: str, target_plan: str) -> dict[str, Any]:
|
|
63
|
+
"""Upgrade an unlimited plan."""
|
|
64
|
+
return self._post_action(
|
|
65
|
+
"/unlimited/upgrade",
|
|
66
|
+
{"plan_name": plan_name, "target_plan": target_plan},
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
def list_bound_users(self, ip: str) -> list[dict[str, Any]]:
|
|
70
|
+
"""List users bound to a specific unlimited server IP."""
|
|
71
|
+
data = self._post_action("/get_unlimited_servers_bind_user", {"ip": ip})
|
|
72
|
+
# Assuming data structure similar to other lists
|
|
73
|
+
return data.get("list") or [] if isinstance(data, dict) else []
|
|
74
|
+
|
|
75
|
+
def bind_user(self, ip: str, username: str) -> dict[str, Any]:
|
|
76
|
+
"""Bind a sub-user to an unlimited server IP."""
|
|
77
|
+
return self._post_action(
|
|
78
|
+
"/add_unlimited_servers_bind_user", {"ip": ip, "username": username}
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
def unbind_user(self, ip: str, username: str) -> dict[str, Any]:
|
|
82
|
+
"""Unbind a sub-user from an unlimited server IP."""
|
|
83
|
+
return self._post_action(
|
|
84
|
+
"/del_unlimited_servers_bind_user", {"ip": ip, "username": username}
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
def _post_action(self, endpoint: str, payload: dict[str, Any]) -> dict[str, Any]:
|
|
88
|
+
"""Helper for POST actions."""
|
|
89
|
+
self._client._require_public_credentials()
|
|
90
|
+
headers = build_public_api_headers(
|
|
91
|
+
self._client.public_token or "", self._client.public_key or ""
|
|
92
|
+
)
|
|
93
|
+
response = self._client._api_request_with_retry(
|
|
94
|
+
"POST", f"{self._api_base}{endpoint}", data=payload, headers=headers
|
|
95
|
+
)
|
|
96
|
+
response.raise_for_status()
|
|
97
|
+
data = response.json()
|
|
98
|
+
if data.get("code") != 200:
|
|
99
|
+
raise_for_code(
|
|
100
|
+
f"Action {endpoint} failed", code=data.get("code"), payload=data
|
|
101
|
+
)
|
|
102
|
+
return data.get("data", {})
|
|
103
|
+
|
|
104
|
+
def get_server_monitor(
|
|
105
|
+
self,
|
|
106
|
+
ins_id: str,
|
|
107
|
+
region: str,
|
|
108
|
+
start_time: int,
|
|
109
|
+
end_time: int,
|
|
110
|
+
period: int = 300,
|
|
111
|
+
) -> dict[str, Any]:
|
|
112
|
+
"""
|
|
113
|
+
Get Unlimited Proxy Server monitor (CPU, Mem, TCP, Bandwidth).
|
|
114
|
+
"""
|
|
115
|
+
self._client._require_public_credentials()
|
|
116
|
+
params = {
|
|
117
|
+
"token": self._client.public_token,
|
|
118
|
+
"key": self._client.public_key,
|
|
119
|
+
"ins_id": ins_id,
|
|
120
|
+
"region": region,
|
|
121
|
+
"start_time": str(start_time),
|
|
122
|
+
"end_time": str(end_time),
|
|
123
|
+
"period": str(period),
|
|
124
|
+
}
|
|
125
|
+
# Note: Endpoint is /api/unlimited/server-monitor
|
|
126
|
+
response = self._client._api_request_with_retry(
|
|
127
|
+
"GET", f"{self._api_base}/unlimited/server-monitor", params=params
|
|
128
|
+
)
|
|
129
|
+
response.raise_for_status()
|
|
130
|
+
data = response.json()
|
|
131
|
+
if data.get("code") != 200:
|
|
132
|
+
raise_for_code(
|
|
133
|
+
"Get server monitor failed", code=data.get("code"), payload=data
|
|
134
|
+
)
|
|
135
|
+
return data.get("data", {})
|
|
136
|
+
|
|
137
|
+
def get_balancing_monitor(
|
|
138
|
+
self,
|
|
139
|
+
ins_id: str,
|
|
140
|
+
region: str,
|
|
141
|
+
start_time: int,
|
|
142
|
+
end_time: int,
|
|
143
|
+
period: int = 300,
|
|
144
|
+
) -> dict[str, Any]:
|
|
145
|
+
"""
|
|
146
|
+
Get Unlimited Residential Proxy Load Balancing Machine Monitoring.
|
|
147
|
+
"""
|
|
148
|
+
self._client._require_public_credentials()
|
|
149
|
+
params = {
|
|
150
|
+
"token": self._client.public_token,
|
|
151
|
+
"key": self._client.public_key,
|
|
152
|
+
"ins_id": ins_id,
|
|
153
|
+
"region": region,
|
|
154
|
+
"start_time": str(start_time),
|
|
155
|
+
"end_time": str(end_time),
|
|
156
|
+
"period": str(period),
|
|
157
|
+
}
|
|
158
|
+
response = self._client._api_request_with_retry(
|
|
159
|
+
"GET", f"{self._api_base}/unlimited/balancing-monitor", params=params
|
|
160
|
+
)
|
|
161
|
+
response.raise_for_status()
|
|
162
|
+
data = response.json()
|
|
163
|
+
|
|
164
|
+
# Note: This endpoint uses 'status_code' instead of 'code' in the root
|
|
165
|
+
code = data.get("status_code", data.get("code"))
|
|
166
|
+
if code != 200:
|
|
167
|
+
raise_for_code("Get balancing monitor failed", code=code, payload=data)
|
|
168
|
+
|
|
169
|
+
return data.get("payload", {})
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: thordata-sdk
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.5.0
|
|
4
4
|
Summary: The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network.
|
|
5
5
|
Author-email: Thordata Developer Team <support@thordata.com>
|
|
6
6
|
License: MIT
|
|
@@ -35,7 +35,7 @@ Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
|
35
35
|
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
36
36
|
Requires-Dist: pytest-httpserver>=1.0.0; extra == "dev"
|
|
37
37
|
Requires-Dist: python-dotenv>=1.0.0; extra == "dev"
|
|
38
|
-
Requires-Dist: black>=
|
|
38
|
+
Requires-Dist: black>=25.11.0; extra == "dev"
|
|
39
39
|
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
40
40
|
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
41
41
|
Requires-Dist: types-requests>=2.28.0; extra == "dev"
|
|
@@ -63,14 +63,13 @@ Dynamic: license-file
|
|
|
63
63
|
|
|
64
64
|
## 📖 Introduction
|
|
65
65
|
|
|
66
|
-
|
|
66
|
+
The **Thordata Python SDK v1.5.0** is a production-ready wrapper for Thordata's AI data infrastructure. It is architected for high reliability, strict type safety, and maximum performance.
|
|
67
67
|
|
|
68
|
-
**
|
|
69
|
-
*
|
|
70
|
-
* **⚡ Async
|
|
71
|
-
*
|
|
72
|
-
*
|
|
73
|
-
* **🧩 Lazy Validation:** Only validate credentials for the features you actually use.
|
|
68
|
+
**Why v1.5.0?**
|
|
69
|
+
* **🛡️ Bulletproof Networking**: Custom core handles `HTTP`, `HTTPS`, and `SOCKS5h` (Remote DNS) tunneling, solving common SSL/TLS handshake issues in complex network environments.
|
|
70
|
+
* **⚡ Async First**: First-class `asyncio` support with `aiohttp` for high-concurrency scraping (1000+ RPS).
|
|
71
|
+
* **🧩 100% API Coverage**: Every endpoint documented by Thordata (including Hourly Usage, Server Monitor, and Task Management) is implemented.
|
|
72
|
+
* **🤖 Type Safe**: Fully typed (`mypy` strict) for excellent IDE autocompletion and error checking.
|
|
74
73
|
|
|
75
74
|
---
|
|
76
75
|
|
|
@@ -84,72 +83,74 @@ pip install thordata-sdk
|
|
|
84
83
|
|
|
85
84
|
## 🔐 Configuration
|
|
86
85
|
|
|
87
|
-
Set environment variables to avoid hardcoding credentials.
|
|
86
|
+
Set environment variables to avoid hardcoding credentials.
|
|
88
87
|
|
|
89
88
|
```bash
|
|
90
|
-
# [
|
|
91
|
-
export THORDATA_SCRAPER_TOKEN="
|
|
89
|
+
# [Scraping APIs]
|
|
90
|
+
export THORDATA_SCRAPER_TOKEN="your_scraper_token"
|
|
92
91
|
|
|
93
|
-
# [
|
|
92
|
+
# [Management APIs]
|
|
93
|
+
export THORDATA_PUBLIC_TOKEN="your_public_token"
|
|
94
|
+
export THORDATA_PUBLIC_KEY="your_public_key"
|
|
95
|
+
|
|
96
|
+
# [Proxy Network]
|
|
94
97
|
export THORDATA_RESIDENTIAL_USERNAME="your_username"
|
|
95
98
|
export THORDATA_RESIDENTIAL_PASSWORD="your_password"
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
# [Required for Task Management]
|
|
99
|
-
export THORDATA_PUBLIC_TOKEN="public_token"
|
|
100
|
-
export THORDATA_PUBLIC_KEY="public_key"
|
|
99
|
+
# Optional: Set upstream proxy for local dev (e.g., Clash)
|
|
100
|
+
# export THORDATA_UPSTREAM_PROXY="http://127.0.0.1:7890"
|
|
101
101
|
```
|
|
102
102
|
|
|
103
103
|
---
|
|
104
104
|
|
|
105
105
|
## 🚀 Quick Start
|
|
106
106
|
|
|
107
|
-
### 1. SERP Search (Google/Bing
|
|
107
|
+
### 1. SERP Search (Google/Bing)
|
|
108
108
|
|
|
109
109
|
```python
|
|
110
110
|
from thordata import ThordataClient, Engine
|
|
111
111
|
|
|
112
|
-
client = ThordataClient()
|
|
112
|
+
client = ThordataClient()
|
|
113
113
|
|
|
114
|
-
#
|
|
115
|
-
|
|
116
|
-
|
|
114
|
+
# Search Google
|
|
115
|
+
results = client.serp_search(
|
|
116
|
+
query="latest AI trends",
|
|
117
|
+
engine=Engine.GOOGLE,
|
|
118
|
+
num=10,
|
|
119
|
+
location="United States"
|
|
120
|
+
)
|
|
117
121
|
|
|
118
|
-
for
|
|
119
|
-
print(f"
|
|
122
|
+
for item in results.get("organic", []):
|
|
123
|
+
print(f"{item['title']} - {item['link']}")
|
|
120
124
|
```
|
|
121
125
|
|
|
122
126
|
### 2. Universal Scrape (Web Unlocker)
|
|
123
127
|
|
|
124
|
-
|
|
128
|
+
Automatically handles JS rendering, CAPTCHAs, and fingerprinting.
|
|
125
129
|
|
|
126
130
|
```python
|
|
127
131
|
html = client.universal_scrape(
|
|
128
|
-
url="https://example.com
|
|
132
|
+
url="https://example.com",
|
|
129
133
|
js_render=True,
|
|
130
|
-
|
|
131
|
-
|
|
134
|
+
country="us",
|
|
135
|
+
wait_for=".content-loaded" # Smart waiting
|
|
132
136
|
)
|
|
133
|
-
print(f"Scraped {len(html)} bytes")
|
|
134
137
|
```
|
|
135
138
|
|
|
136
|
-
### 3. High-Performance Proxy
|
|
139
|
+
### 3. High-Performance Proxy Tunneling
|
|
137
140
|
|
|
138
|
-
Use Thordata's residential IPs with
|
|
141
|
+
Use Thordata's residential IPs directly with `requests` (Sync) or `aiohttp` (Async). The SDK handles the complex authentication and rotation logic.
|
|
139
142
|
|
|
140
143
|
```python
|
|
141
144
|
from thordata import ProxyConfig, ProxyProduct
|
|
142
145
|
|
|
143
|
-
# Config is optional if env vars are set
|
|
146
|
+
# Config is optional if env vars are set
|
|
144
147
|
proxy = ProxyConfig(
|
|
145
148
|
product=ProxyProduct.RESIDENTIAL,
|
|
146
149
|
country="jp",
|
|
147
|
-
city="tokyo",
|
|
148
|
-
session_id="session-001",
|
|
149
150
|
session_duration=10 # Sticky IP for 10 mins
|
|
150
151
|
)
|
|
151
152
|
|
|
152
|
-
#
|
|
153
|
+
# The client automatically routes this through Thordata's network
|
|
153
154
|
response = client.get("https://httpbin.org/ip", proxy_config=proxy)
|
|
154
155
|
print(response.json())
|
|
155
156
|
```
|
|
@@ -158,9 +159,9 @@ print(response.json())
|
|
|
158
159
|
|
|
159
160
|
## ⚙️ Advanced Usage
|
|
160
161
|
|
|
161
|
-
### Async
|
|
162
|
+
### Async High-Concurrency
|
|
162
163
|
|
|
163
|
-
|
|
164
|
+
Perfect for building high-throughput AI agents.
|
|
164
165
|
|
|
165
166
|
```python
|
|
166
167
|
import asyncio
|
|
@@ -168,20 +169,17 @@ from thordata import AsyncThordataClient
|
|
|
168
169
|
|
|
169
170
|
async def main():
|
|
170
171
|
async with AsyncThordataClient() as client:
|
|
171
|
-
# Fire off
|
|
172
|
-
tasks = [
|
|
173
|
-
client.serp_search(f"query {i}")
|
|
174
|
-
for i in range(5)
|
|
175
|
-
]
|
|
172
|
+
# Fire off 10 requests in parallel
|
|
173
|
+
tasks = [client.serp_search(f"query {i}") for i in range(10)]
|
|
176
174
|
results = await asyncio.gather(*tasks)
|
|
177
175
|
print(f"Completed {len(results)} searches")
|
|
178
176
|
|
|
179
177
|
asyncio.run(main())
|
|
180
178
|
```
|
|
181
179
|
|
|
182
|
-
###
|
|
180
|
+
### Task Management (Batch Scraping)
|
|
183
181
|
|
|
184
|
-
|
|
182
|
+
Handle large-scale scraping jobs asynchronously.
|
|
185
183
|
|
|
186
184
|
```python
|
|
187
185
|
# 1. Create a task
|
|
@@ -192,13 +190,38 @@ task_id = client.create_scraper_task(
|
|
|
192
190
|
parameters={"url": "https://example.com"}
|
|
193
191
|
)
|
|
194
192
|
|
|
195
|
-
# 2.
|
|
196
|
-
status = client.wait_for_task(task_id)
|
|
193
|
+
# 2. Poll for completion (Helper method)
|
|
194
|
+
status = client.wait_for_task(task_id, max_wait=600)
|
|
195
|
+
|
|
196
|
+
# 3. Download results
|
|
197
|
+
if status == "finished":
|
|
198
|
+
data_url = client.get_task_result(task_id)
|
|
199
|
+
print(f"Download: {data_url}")
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
---
|
|
203
|
+
|
|
204
|
+
## 🛠️ Management APIs
|
|
205
|
+
|
|
206
|
+
Manage your infrastructure programmatically.
|
|
207
|
+
|
|
208
|
+
```python
|
|
209
|
+
# Check Balance
|
|
210
|
+
balance = client.get_traffic_balance()
|
|
197
211
|
|
|
198
|
-
#
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
212
|
+
# Manage Whitelist
|
|
213
|
+
client.add_whitelist_ip("1.2.3.4")
|
|
214
|
+
|
|
215
|
+
# Create Sub-users
|
|
216
|
+
client.create_proxy_user("new_user", "pass123", traffic_limit=500)
|
|
217
|
+
|
|
218
|
+
# Monitor Unlimited Proxies
|
|
219
|
+
monitor = client.unlimited.get_server_monitor(
|
|
220
|
+
ins_id="ins-123",
|
|
221
|
+
region="us",
|
|
222
|
+
start_time=1700000000,
|
|
223
|
+
end_time=1700003600
|
|
224
|
+
)
|
|
202
225
|
```
|
|
203
226
|
|
|
204
227
|
---
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
thordata/__init__.py,sha256=-2bXx3LckBWrJ_E5HqFTOj7sm45AgrOnSWV4QN6f-7U,2287
|
|
2
|
+
thordata/_example_utils.py,sha256=T9QtVq9BHhubOShgtGp2GSusYYd-ZFUJFJAw7ubIsa4,2199
|
|
3
|
+
thordata/_utils.py,sha256=Acr_6sHgdZXU7SQozd6FEYTZV6iHw__nlhpBTDwb66U,4917
|
|
4
|
+
thordata/async_client.py,sha256=zN59ZQfFVCuAGnGcyj-C_S9MbHzb17QbUISm46n6gpY,39439
|
|
5
|
+
thordata/async_unlimited.py,sha256=kzTksFkN21rDM21Pwy3hcayjfyGYNGGyGR3fRLtZC6I,4510
|
|
6
|
+
thordata/client.py,sha256=eA6jav_aAw2CQdSyrg3P59rELKo13K5tHqmSjEw3L_8,56717
|
|
7
|
+
thordata/demo.py,sha256=DojJRFqUm9XAMBkjmk03WGeiUdLCbXguMIwtMOzfN6M,3822
|
|
8
|
+
thordata/enums.py,sha256=_pahGhcq9Eh2ptL_WiNU2WlqKrydV_6e4U9G4erV9-s,774
|
|
9
|
+
thordata/exceptions.py,sha256=P9czrxkFhT439DxW3LE5W-koS595ObH4-mAQOfaDM18,9976
|
|
10
|
+
thordata/models.py,sha256=wozvlpS-Uv1DgkM_CEKOvldQ2InicxhIN0QiezIXPE4,853
|
|
11
|
+
thordata/retry.py,sha256=5kRwULl3X68Nx8PlSzr9benfyCL0nRSpVQXrwjWr45M,11456
|
|
12
|
+
thordata/serp_engines.py,sha256=iuMWncelcGOskCHXFzpcPMMTL5qfiLkazHB1uj3zpZo,5985
|
|
13
|
+
thordata/unlimited.py,sha256=RzrtwcotYlbOWuSLysDyI75IkMVL7ygdfE9HKNoe02M,6087
|
|
14
|
+
thordata/core/__init__.py,sha256=EFT6mZpSdec_7uFUpSpDDHVwbTxy314uxJC_uprR6J4,500
|
|
15
|
+
thordata/core/async_http_client.py,sha256=KKsmhXN6bWRTDFvqa0H-WRf4R-TWH8WSgpDBRv6TEvg,3052
|
|
16
|
+
thordata/core/http_client.py,sha256=8lSwclmVweM-Go1qMW36zYnMKAUT_9RyDdPF7qMS4-Y,2280
|
|
17
|
+
thordata/core/tunnel.py,sha256=rbM_4zGwY4FXqdxYmCOURQw2s1EuAWFBVBM-1joNjGI,8373
|
|
18
|
+
thordata/tools/__init__.py,sha256=ROryBBlCfq9cydaKXEPtnevjhg6GdFioAjdnp2VTR0M,606
|
|
19
|
+
thordata/tools/base.py,sha256=fHuCp53y8eB59DuCdA1wHcbMVmsd5ikL9KlT5m_jJn0,1006
|
|
20
|
+
thordata/tools/code.py,sha256=opYMG7LdR90VjW5tn8wnRCwDT-zUC0uteMKW01TMPTI,580
|
|
21
|
+
thordata/tools/ecommerce.py,sha256=u-s-RGMSAGifsMnyMrwtJ3yVDgu3n74bv8yyX6TbMNU,1560
|
|
22
|
+
thordata/tools/search.py,sha256=toWMOnnfQXgafyndHs23Yn049vpPlGPHdZA7SpiJJTE,1724
|
|
23
|
+
thordata/tools/social.py,sha256=VbujfbA5Man6Shsik4QYBpf9z2FJhhJkZLNKll09Ots,4886
|
|
24
|
+
thordata/tools/video.py,sha256=WikUOYPSVtHdrS0Z7VVexlUPyFZRv9v7cerkpzzO5jU,2549
|
|
25
|
+
thordata/types/__init__.py,sha256=hlLt5UCVm7QdeOCN5_YWXS4Vy8tJUhIp0XbWjAoQiQg,1357
|
|
26
|
+
thordata/types/common.py,sha256=hkTZ1QtokpE1yT9BvTmYfQz9AUjeCIIPvjib2pnq_Ag,2818
|
|
27
|
+
thordata/types/proxy.py,sha256=IU45wQHCBOIlbdcCN9veypAkDT0q9NIikLu674CudOU,10438
|
|
28
|
+
thordata/types/serp.py,sha256=NO52I1NprjVBgKQe4o2xEp82a3Oy9wCBYG-2Q0oegnU,5817
|
|
29
|
+
thordata/types/task.py,sha256=f5xGeH4BrE7sHIgWhRJuMr3iuPooxJlg7ztr8lwcSx8,4139
|
|
30
|
+
thordata/types/universal.py,sha256=Kw8lf_2ElXIfylsNfVosLE1MvlEQkryv4fWEaQw6ecg,2161
|
|
31
|
+
thordata_sdk-1.5.0.dist-info/licenses/LICENSE,sha256=bAxpWgQIzb-5jl3nhLdOwOJ_vlbHLtSG7yev2B7vioY,1088
|
|
32
|
+
thordata_sdk-1.5.0.dist-info/METADATA,sha256=VqsfaJsguO-KSMOjWjPodO1nIa510qpjNBdVzCMHshQ,7026
|
|
33
|
+
thordata_sdk-1.5.0.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
|
|
34
|
+
thordata_sdk-1.5.0.dist-info/top_level.txt,sha256=Z8R_07m0lXCCSb1hapL9_nxMtyO3rf_9wOvq4n9u2Hg,9
|
|
35
|
+
thordata_sdk-1.5.0.dist-info/RECORD,,
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
thordata/__init__.py,sha256=O9R2zY6qQXWIkQQ8Hcqqcwshymk4-2Bo_Pmm-Ma3SeI,3195
|
|
2
|
-
thordata/_example_utils.py,sha256=T9QtVq9BHhubOShgtGp2GSusYYd-ZFUJFJAw7ubIsa4,2199
|
|
3
|
-
thordata/_utils.py,sha256=Acr_6sHgdZXU7SQozd6FEYTZV6iHw__nlhpBTDwb66U,4917
|
|
4
|
-
thordata/async_client.py,sha256=9swh6AoAgvLRavFzXaM1rUz9Zm66r7GKjThjfILLiSI,58082
|
|
5
|
-
thordata/client.py,sha256=IvkPjm4v9ViiNMUM4uUbUOY3kNtJdw3mzSd3BT3yD0A,59039
|
|
6
|
-
thordata/demo.py,sha256=HQzgaUM33bWD7mBQ6HEkK5K6zqFnSAHLvaam6BwPgFA,3762
|
|
7
|
-
thordata/enums.py,sha256=MpZnS9_8sg2vtcFqM6UicB94cKZm5R1t83L3ejNSbLs,8502
|
|
8
|
-
thordata/exceptions.py,sha256=P9czrxkFhT439DxW3LE5W-koS595ObH4-mAQOfaDM18,9976
|
|
9
|
-
thordata/models.py,sha256=qtB7jE0v5zNEQfSpmOqdiacB5DgM2QfVR2PaYs-DisM,38206
|
|
10
|
-
thordata/retry.py,sha256=5kRwULl3X68Nx8PlSzr9benfyCL0nRSpVQXrwjWr45M,11456
|
|
11
|
-
thordata/serp_engines.py,sha256=iuMWncelcGOskCHXFzpcPMMTL5qfiLkazHB1uj3zpZo,5985
|
|
12
|
-
thordata_sdk-1.3.0.dist-info/licenses/LICENSE,sha256=bAxpWgQIzb-5jl3nhLdOwOJ_vlbHLtSG7yev2B7vioY,1088
|
|
13
|
-
thordata_sdk-1.3.0.dist-info/METADATA,sha256=x7p4JY94WCbvVmLiSaPDcUiKKiSw_4bRn3sLr1PRBPM,6600
|
|
14
|
-
thordata_sdk-1.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
15
|
-
thordata_sdk-1.3.0.dist-info/top_level.txt,sha256=Z8R_07m0lXCCSb1hapL9_nxMtyO3rf_9wOvq4n9u2Hg,9
|
|
16
|
-
thordata_sdk-1.3.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|