thordata-sdk 0.7.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
thordata/__init__.py CHANGED
@@ -35,7 +35,7 @@ Async Usage:
35
35
  >>> asyncio.run(main())
36
36
  """
37
37
 
38
- __version__ = "0.7.0"
38
+ __version__ = "1.0.0"
39
39
  __author__ = "Thordata Developer Team"
40
40
  __email__ = "support@thordata.com"
41
41
 
@@ -78,14 +78,20 @@ from .exceptions import (
78
78
 
79
79
  # Models
80
80
  from .models import (
81
+ CommonSettings,
81
82
  ProxyConfig,
82
83
  ProxyProduct,
84
+ ProxyServer,
85
+ ProxyUser,
86
+ ProxyUserList,
83
87
  ScraperTaskConfig,
84
88
  SerpRequest,
85
89
  StaticISPProxy,
86
90
  StickySession,
87
91
  TaskStatusResponse,
88
92
  UniversalScrapeRequest,
93
+ UsageStatistics,
94
+ VideoTaskConfig,
89
95
  )
90
96
 
91
97
  # Retry utilities
@@ -117,11 +123,17 @@ __all__ = [
117
123
  # Models
118
124
  "ProxyConfig",
119
125
  "ProxyProduct",
126
+ "ProxyServer",
127
+ "ProxyUser",
128
+ "ProxyUserList",
129
+ "UsageStatistics",
120
130
  "StaticISPProxy",
121
131
  "StickySession",
122
132
  "SerpRequest",
123
133
  "UniversalScrapeRequest",
124
134
  "ScraperTaskConfig",
135
+ "CommonSettings",
136
+ "VideoTaskConfig",
125
137
  "TaskStatusResponse",
126
138
  # Exceptions
127
139
  "ThordataError",
@@ -0,0 +1,76 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ from pathlib import Path
6
+ from typing import Any, Iterable, Optional
7
+
8
+ try:
9
+ from dotenv import load_dotenv
10
+ except Exception: # pragma: no cover
11
+ load_dotenv = None
12
+
13
+
14
+ def load_env() -> None:
15
+ """Load .env from repo root if python-dotenv is installed."""
16
+ if load_dotenv is None:
17
+ return
18
+ repo_root = Path(__file__).resolve().parents[2]
19
+ load_dotenv(dotenv_path=repo_root / ".env")
20
+
21
+
22
+ def env(name: str) -> str:
23
+ return (os.getenv(name) or "").strip()
24
+
25
+
26
+ def skip_if_missing(required: Iterable[str], *, tip: Optional[str] = None) -> bool:
27
+ missing = [k for k in required if not env(k)]
28
+ if not missing:
29
+ return False
30
+ print("Skipping live example: missing env:", ", ".join(missing))
31
+ if tip:
32
+ print(tip)
33
+ else:
34
+ print("Tip: copy .env.example to .env and fill values, then re-run.")
35
+ return True
36
+
37
+
38
+ def parse_json_env(name: str, default: str = "{}") -> Any:
39
+ raw = env(name) or default
40
+ return json.loads(raw)
41
+
42
+
43
+ def normalize_task_parameters(raw: Any) -> dict[str, Any]:
44
+ """Accept {..} or [{..}] and return a single dict for create_scraper_task(parameters=...)."""
45
+ if isinstance(raw, list):
46
+ if not raw:
47
+ raise ValueError("Task parameters JSON array must not be empty")
48
+ raw = raw[0]
49
+ if not isinstance(raw, dict):
50
+ raise ValueError("Task parameters must be a JSON object (or array of objects)")
51
+ return raw
52
+
53
+
54
+ def output_dir() -> Path:
55
+ """Return output dir for examples; defaults to examples/output (ignored by git)."""
56
+ repo_root = Path(__file__).resolve().parents[2]
57
+ d = env("THORDATA_OUTPUT_DIR") or str(repo_root / "examples" / "output")
58
+ p = Path(d)
59
+ p.mkdir(parents=True, exist_ok=True)
60
+ return p
61
+
62
+
63
+ def write_text(filename: str, content: str) -> Path:
64
+ p = output_dir() / filename
65
+ p.write_text(content, encoding="utf-8", errors="replace")
66
+ return p
67
+
68
+
69
+ def write_json(filename: str, data: Any) -> Path:
70
+ p = output_dir() / filename
71
+ p.write_text(
72
+ json.dumps(data, ensure_ascii=False, indent=2),
73
+ encoding="utf-8",
74
+ errors="replace",
75
+ )
76
+ return p
thordata/_utils.py CHANGED
@@ -70,18 +70,61 @@ def decode_base64_image(png_str: str) -> bytes:
70
70
  raise ValueError(f"Failed to decode base64 image: {e}") from e
71
71
 
72
72
 
73
- def build_auth_headers(token: str) -> Dict[str, str]:
73
+ def build_auth_headers(token: str, mode: str = "bearer") -> Dict[str, str]:
74
74
  """
75
75
  Build authorization headers for API requests.
76
76
 
77
+ Supports two modes:
78
+ - bearer: Authorization: Bearer <token> (Thordata Docs examples)
79
+ - header_token: token: <token> (Interface documentation)
80
+
77
81
  Args:
78
82
  token: The scraper token.
83
+ mode: Authentication mode ("bearer" or "header_token").
84
+
85
+ Returns:
86
+ Headers dict with Authorization/token and Content-Type.
87
+ """
88
+ headers = {
89
+ "Content-Type": "application/x-www-form-urlencoded",
90
+ }
91
+
92
+ if mode == "bearer":
93
+ headers["Authorization"] = f"Bearer {token}"
94
+ elif mode == "header_token":
95
+ headers["token"] = token
96
+ else:
97
+ # Fallback to bearer for compatibility
98
+ headers["Authorization"] = f"Bearer {token}"
99
+
100
+ return headers
101
+
102
+
103
+ def build_builder_headers(
104
+ scraper_token: str,
105
+ public_token: str,
106
+ public_key: str,
107
+ ) -> Dict[str, str]:
108
+ """
109
+ Build headers for Web Scraper builder API.
110
+
111
+ Builder requires THREE auth headers per official docs:
112
+ - token: public token
113
+ - key: public key
114
+ - Authorization: Bearer scraper_token
115
+
116
+ Args:
117
+ scraper_token: The scraper API token.
118
+ public_token: The public API token.
119
+ public_key: The public API key.
79
120
 
80
121
  Returns:
81
- Headers dict with Authorization and Content-Type.
122
+ Headers dict with all required auth headers.
82
123
  """
83
124
  return {
84
- "Authorization": f"Bearer {token}",
125
+ "token": public_token,
126
+ "key": public_key,
127
+ "Authorization": f"Bearer {scraper_token}",
85
128
  "Content-Type": "application/x-www-form-urlencoded",
86
129
  }
87
130