thordata-sdk 0.7.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thordata/__init__.py +13 -1
- thordata/_example_utils.py +76 -0
- thordata/_utils.py +46 -3
- thordata/async_client.py +863 -23
- thordata/client.py +1023 -51
- thordata/enums.py +3 -3
- thordata/exceptions.py +16 -5
- thordata/models.py +351 -7
- thordata/retry.py +6 -4
- thordata_sdk-1.0.0.dist-info/METADATA +208 -0
- thordata_sdk-1.0.0.dist-info/RECORD +15 -0
- thordata/parameters.py +0 -53
- thordata_sdk-0.7.0.dist-info/METADATA +0 -1053
- thordata_sdk-0.7.0.dist-info/RECORD +0 -15
- {thordata_sdk-0.7.0.dist-info → thordata_sdk-1.0.0.dist-info}/WHEEL +0 -0
- {thordata_sdk-0.7.0.dist-info → thordata_sdk-1.0.0.dist-info}/licenses/LICENSE +0 -0
- {thordata_sdk-0.7.0.dist-info → thordata_sdk-1.0.0.dist-info}/top_level.txt +0 -0
thordata/__init__.py
CHANGED
|
@@ -35,7 +35,7 @@ Async Usage:
|
|
|
35
35
|
>>> asyncio.run(main())
|
|
36
36
|
"""
|
|
37
37
|
|
|
38
|
-
__version__ = "0.
|
|
38
|
+
__version__ = "1.0.0"
|
|
39
39
|
__author__ = "Thordata Developer Team"
|
|
40
40
|
__email__ = "support@thordata.com"
|
|
41
41
|
|
|
@@ -78,14 +78,20 @@ from .exceptions import (
|
|
|
78
78
|
|
|
79
79
|
# Models
|
|
80
80
|
from .models import (
|
|
81
|
+
CommonSettings,
|
|
81
82
|
ProxyConfig,
|
|
82
83
|
ProxyProduct,
|
|
84
|
+
ProxyServer,
|
|
85
|
+
ProxyUser,
|
|
86
|
+
ProxyUserList,
|
|
83
87
|
ScraperTaskConfig,
|
|
84
88
|
SerpRequest,
|
|
85
89
|
StaticISPProxy,
|
|
86
90
|
StickySession,
|
|
87
91
|
TaskStatusResponse,
|
|
88
92
|
UniversalScrapeRequest,
|
|
93
|
+
UsageStatistics,
|
|
94
|
+
VideoTaskConfig,
|
|
89
95
|
)
|
|
90
96
|
|
|
91
97
|
# Retry utilities
|
|
@@ -117,11 +123,17 @@ __all__ = [
|
|
|
117
123
|
# Models
|
|
118
124
|
"ProxyConfig",
|
|
119
125
|
"ProxyProduct",
|
|
126
|
+
"ProxyServer",
|
|
127
|
+
"ProxyUser",
|
|
128
|
+
"ProxyUserList",
|
|
129
|
+
"UsageStatistics",
|
|
120
130
|
"StaticISPProxy",
|
|
121
131
|
"StickySession",
|
|
122
132
|
"SerpRequest",
|
|
123
133
|
"UniversalScrapeRequest",
|
|
124
134
|
"ScraperTaskConfig",
|
|
135
|
+
"CommonSettings",
|
|
136
|
+
"VideoTaskConfig",
|
|
125
137
|
"TaskStatusResponse",
|
|
126
138
|
# Exceptions
|
|
127
139
|
"ThordataError",
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Iterable, Optional
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
from dotenv import load_dotenv
|
|
10
|
+
except Exception: # pragma: no cover
|
|
11
|
+
load_dotenv = None
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def load_env() -> None:
|
|
15
|
+
"""Load .env from repo root if python-dotenv is installed."""
|
|
16
|
+
if load_dotenv is None:
|
|
17
|
+
return
|
|
18
|
+
repo_root = Path(__file__).resolve().parents[2]
|
|
19
|
+
load_dotenv(dotenv_path=repo_root / ".env")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def env(name: str) -> str:
|
|
23
|
+
return (os.getenv(name) or "").strip()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def skip_if_missing(required: Iterable[str], *, tip: Optional[str] = None) -> bool:
|
|
27
|
+
missing = [k for k in required if not env(k)]
|
|
28
|
+
if not missing:
|
|
29
|
+
return False
|
|
30
|
+
print("Skipping live example: missing env:", ", ".join(missing))
|
|
31
|
+
if tip:
|
|
32
|
+
print(tip)
|
|
33
|
+
else:
|
|
34
|
+
print("Tip: copy .env.example to .env and fill values, then re-run.")
|
|
35
|
+
return True
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def parse_json_env(name: str, default: str = "{}") -> Any:
|
|
39
|
+
raw = env(name) or default
|
|
40
|
+
return json.loads(raw)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def normalize_task_parameters(raw: Any) -> dict[str, Any]:
|
|
44
|
+
"""Accept {..} or [{..}] and return a single dict for create_scraper_task(parameters=...)."""
|
|
45
|
+
if isinstance(raw, list):
|
|
46
|
+
if not raw:
|
|
47
|
+
raise ValueError("Task parameters JSON array must not be empty")
|
|
48
|
+
raw = raw[0]
|
|
49
|
+
if not isinstance(raw, dict):
|
|
50
|
+
raise ValueError("Task parameters must be a JSON object (or array of objects)")
|
|
51
|
+
return raw
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def output_dir() -> Path:
|
|
55
|
+
"""Return output dir for examples; defaults to examples/output (ignored by git)."""
|
|
56
|
+
repo_root = Path(__file__).resolve().parents[2]
|
|
57
|
+
d = env("THORDATA_OUTPUT_DIR") or str(repo_root / "examples" / "output")
|
|
58
|
+
p = Path(d)
|
|
59
|
+
p.mkdir(parents=True, exist_ok=True)
|
|
60
|
+
return p
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def write_text(filename: str, content: str) -> Path:
|
|
64
|
+
p = output_dir() / filename
|
|
65
|
+
p.write_text(content, encoding="utf-8", errors="replace")
|
|
66
|
+
return p
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def write_json(filename: str, data: Any) -> Path:
|
|
70
|
+
p = output_dir() / filename
|
|
71
|
+
p.write_text(
|
|
72
|
+
json.dumps(data, ensure_ascii=False, indent=2),
|
|
73
|
+
encoding="utf-8",
|
|
74
|
+
errors="replace",
|
|
75
|
+
)
|
|
76
|
+
return p
|
thordata/_utils.py
CHANGED
|
@@ -70,18 +70,61 @@ def decode_base64_image(png_str: str) -> bytes:
|
|
|
70
70
|
raise ValueError(f"Failed to decode base64 image: {e}") from e
|
|
71
71
|
|
|
72
72
|
|
|
73
|
-
def build_auth_headers(token: str) -> Dict[str, str]:
|
|
73
|
+
def build_auth_headers(token: str, mode: str = "bearer") -> Dict[str, str]:
|
|
74
74
|
"""
|
|
75
75
|
Build authorization headers for API requests.
|
|
76
76
|
|
|
77
|
+
Supports two modes:
|
|
78
|
+
- bearer: Authorization: Bearer <token> (Thordata Docs examples)
|
|
79
|
+
- header_token: token: <token> (Interface documentation)
|
|
80
|
+
|
|
77
81
|
Args:
|
|
78
82
|
token: The scraper token.
|
|
83
|
+
mode: Authentication mode ("bearer" or "header_token").
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Headers dict with Authorization/token and Content-Type.
|
|
87
|
+
"""
|
|
88
|
+
headers = {
|
|
89
|
+
"Content-Type": "application/x-www-form-urlencoded",
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
if mode == "bearer":
|
|
93
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
94
|
+
elif mode == "header_token":
|
|
95
|
+
headers["token"] = token
|
|
96
|
+
else:
|
|
97
|
+
# Fallback to bearer for compatibility
|
|
98
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
99
|
+
|
|
100
|
+
return headers
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def build_builder_headers(
|
|
104
|
+
scraper_token: str,
|
|
105
|
+
public_token: str,
|
|
106
|
+
public_key: str,
|
|
107
|
+
) -> Dict[str, str]:
|
|
108
|
+
"""
|
|
109
|
+
Build headers for Web Scraper builder API.
|
|
110
|
+
|
|
111
|
+
Builder requires THREE auth headers per official docs:
|
|
112
|
+
- token: public token
|
|
113
|
+
- key: public key
|
|
114
|
+
- Authorization: Bearer scraper_token
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
scraper_token: The scraper API token.
|
|
118
|
+
public_token: The public API token.
|
|
119
|
+
public_key: The public API key.
|
|
79
120
|
|
|
80
121
|
Returns:
|
|
81
|
-
Headers dict with
|
|
122
|
+
Headers dict with all required auth headers.
|
|
82
123
|
"""
|
|
83
124
|
return {
|
|
84
|
-
"
|
|
125
|
+
"token": public_token,
|
|
126
|
+
"key": public_key,
|
|
127
|
+
"Authorization": f"Bearer {scraper_token}",
|
|
85
128
|
"Content-Type": "application/x-www-form-urlencoded",
|
|
86
129
|
}
|
|
87
130
|
|