thordata-sdk 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
thordata/demo.py ADDED
@@ -0,0 +1,140 @@
1
+ """
2
+ Unified demo entrypoint for the Thordata Python SDK.
3
+
4
+ This module runs the example scripts from the repository's `examples/` directory
5
+ using `runpy`, so it does not require `examples/` to be an importable package.
6
+
7
+ Usage:
8
+ python -m thordata.demo serp
9
+ python -m thordata.demo universal
10
+ python -m thordata.demo scraper
11
+ python -m thordata.demo concurrency
12
+
13
+ Notes:
14
+ - This entrypoint is primarily intended for repository usage (dev/demo).
15
+ - When installed from PyPI, the `examples/` directory is typically not included.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import os
21
+ import runpy
22
+ import sys
23
+ from pathlib import Path
24
+ from typing import Callable, Dict
25
+
26
+
27
+ def _configure_stdio() -> None:
28
+ # Avoid UnicodeEncodeError on Windows consoles with legacy encodings.
29
+ if hasattr(sys.stdout, "reconfigure"):
30
+ sys.stdout.reconfigure(encoding="utf-8", errors="replace")
31
+ if hasattr(sys.stderr, "reconfigure"):
32
+ sys.stderr.reconfigure(encoding="utf-8", errors="replace")
33
+
34
+
35
+ def _load_env() -> None:
36
+ # Optional .env support for local development
37
+ try:
38
+ from dotenv import load_dotenv
39
+ except ImportError:
40
+ return
41
+ load_dotenv()
42
+
43
+
44
+ def _repo_root() -> Path:
45
+ """
46
+ Resolve repository root based on src layout:
47
+ <repo>/src/thordata/demo.py -> parents[2] == <repo>
48
+ """
49
+ return Path(__file__).resolve().parents[2]
50
+
51
+
52
+ def _examples_dir() -> Path:
53
+ return _repo_root() / "examples"
54
+
55
+
56
+ def _demo_map() -> Dict[str, Path]:
57
+ ex = _examples_dir()
58
+ return {
59
+ "serp": ex / "demo_serp_api.py",
60
+ "universal": ex / "demo_universal.py",
61
+ "scraper": ex / "demo_web_scraper_api.py",
62
+ "concurrency": ex / "async_high_concurrency.py",
63
+ }
64
+
65
+
66
+ def _usage() -> str:
67
+ names = ", ".join(sorted(_demo_map().keys()))
68
+ return f"Usage: python -m thordata.demo [{names}]"
69
+
70
+
71
+ def _run_demo(path: Path) -> int:
72
+ if not path.exists():
73
+ print(f"Error: demo script not found: {path}")
74
+ return 2
75
+
76
+ # Ensure examples dir is on sys.path (helpful if demo imports local helpers).
77
+ examples_dir = str(path.parent.resolve())
78
+ if examples_dir not in sys.path:
79
+ sys.path.insert(0, examples_dir)
80
+
81
+ try:
82
+ # Load without triggering `if __name__ == "__main__": ...`
83
+ ns = runpy.run_path(str(path), run_name="__thordata_demo__")
84
+
85
+ main_func = ns.get("main")
86
+ if callable(main_func):
87
+ return int(main_func()) # type: ignore[arg-type]
88
+
89
+ # Fallback: run as __main__ for scripts without main()
90
+ runpy.run_path(str(path), run_name="__main__")
91
+ return 0
92
+
93
+ except KeyboardInterrupt:
94
+ raise
95
+ except SystemExit as e:
96
+ # In case fallback run as __main__ triggered SystemExit
97
+ code = e.code
98
+ if code is None:
99
+ return 0
100
+ if isinstance(code, int):
101
+ return code
102
+ return 1
103
+ except Exception as e:
104
+ import traceback
105
+
106
+ print()
107
+ print("-" * 60)
108
+ print("[thordata.demo] The demo script raised an exception.")
109
+ print(f"[thordata.demo] Script: {path.name}")
110
+ print(f"[thordata.demo] Error: {type(e).__name__}: {e}")
111
+ print()
112
+ print("Note: This is a failure within the demo script itself,")
113
+ print(" not an issue with the thordata.demo entrypoint.")
114
+ print("-" * 60)
115
+ traceback.print_exc()
116
+ return 1
117
+
118
+
119
+ def main() -> int:
120
+ _configure_stdio()
121
+ _load_env()
122
+
123
+ if len(sys.argv) < 2:
124
+ print(_usage())
125
+ return 2
126
+
127
+ name = sys.argv[1].strip().lower()
128
+ mapping = _demo_map()
129
+
130
+ path = mapping.get(name)
131
+ if path is None:
132
+ print(f"Unknown demo: {name}")
133
+ print(_usage())
134
+ return 2
135
+
136
+ return _run_demo(path)
137
+
138
+
139
+ if __name__ == "__main__":
140
+ raise SystemExit(main())
thordata/enums.py ADDED
@@ -0,0 +1,315 @@
1
+ """
2
+ Enumerations for the Thordata Python SDK.
3
+
4
+ This module provides type-safe enumerations for all Thordata API parameters,
5
+ making it easier to discover available options via IDE autocomplete.
6
+ """
7
+
8
+ from enum import Enum, IntEnum
9
+
10
+ # =============================================================================
11
+ # Continent Enum
12
+ # =============================================================================
13
+
14
+
15
+ class Continent(str, Enum):
16
+ """
17
+ Continent codes for geo-targeting.
18
+ """
19
+
20
+ AFRICA = "af"
21
+ ANTARCTICA = "an"
22
+ ASIA = "as"
23
+ EUROPE = "eu"
24
+ NORTH_AMERICA = "na"
25
+ OCEANIA = "oc"
26
+ SOUTH_AMERICA = "sa"
27
+
28
+
29
+ # =============================================================================
30
+ # Proxy Host Enum
31
+ # =============================================================================
32
+
33
+
34
+ class ProxyHost(str, Enum):
35
+ """
36
+ Available proxy gateway hosts.
37
+ """
38
+
39
+ DEFAULT = "pr.thordata.net"
40
+ NORTH_AMERICA = "t.na.thordata.net"
41
+ EUROPE = "t.eu.thordata.net"
42
+ GATE = "gate.thordata.com"
43
+
44
+
45
+ class ProxyPort(IntEnum):
46
+ """
47
+ Available proxy gateway ports.
48
+ """
49
+
50
+ DEFAULT = 9999
51
+ MOBILE = 5555
52
+ DATACENTER = 7777
53
+ ISP = 6666
54
+ ALTERNATIVE = 22225
55
+
56
+
57
+ # =============================================================================
58
+ # Search Engine Enums
59
+ # =============================================================================
60
+
61
+
62
+ class Engine(str, Enum):
63
+ """
64
+ Supported search engines for SERP API.
65
+ """
66
+
67
+ GOOGLE = "google"
68
+ BING = "bing"
69
+ YANDEX = "yandex"
70
+ DUCKDUCKGO = "duckduckgo"
71
+ BAIDU = "baidu"
72
+ YAHOO = "yahoo"
73
+ NAVER = "naver"
74
+
75
+
76
+ class GoogleSearchType(str, Enum):
77
+ """
78
+ Search types specific to Google.
79
+ """
80
+
81
+ SEARCH = "search"
82
+ MAPS = "maps"
83
+ SHOPPING = "shopping"
84
+ NEWS = "news"
85
+ IMAGES = "images"
86
+ VIDEOS = "videos"
87
+ SCHOLAR = "scholar"
88
+ PATENTS = "patents"
89
+ JOBS = "jobs"
90
+ FLIGHTS = "flights"
91
+ FINANCE = "finance"
92
+
93
+
94
+ class BingSearchType(str, Enum):
95
+ """
96
+ Search types specific to Bing.
97
+ """
98
+
99
+ SEARCH = "search"
100
+ IMAGES = "images"
101
+ VIDEOS = "videos"
102
+ NEWS = "news"
103
+ MAPS = "maps"
104
+
105
+
106
+ class Device(str, Enum):
107
+ """
108
+ Device types for SERP API.
109
+ """
110
+
111
+ DESKTOP = "desktop"
112
+ MOBILE = "mobile"
113
+ TABLET = "tablet"
114
+
115
+
116
+ class TimeRange(str, Enum):
117
+ """
118
+ Time range filters for search results.
119
+ """
120
+
121
+ HOUR = "hour"
122
+ DAY = "day"
123
+ WEEK = "week"
124
+ MONTH = "month"
125
+ YEAR = "year"
126
+
127
+
128
+ # =============================================================================
129
+ # Proxy Enums
130
+ # =============================================================================
131
+
132
+
133
+ class ProxyType(IntEnum):
134
+ """
135
+ Types of proxy networks available.
136
+ """
137
+
138
+ RESIDENTIAL = 1
139
+ UNLIMITED = 2
140
+ DATACENTER = 3
141
+ ISP = 4
142
+ MOBILE = 5
143
+
144
+
145
+ class SessionType(str, Enum):
146
+ """
147
+ Proxy session types for connection persistence.
148
+ """
149
+
150
+ ROTATING = "rotating"
151
+ STICKY = "sticky"
152
+
153
+
154
+ # =============================================================================
155
+ # Output Format Enums
156
+ # =============================================================================
157
+
158
+
159
+ class OutputFormat(str, Enum):
160
+ """
161
+ Output formats for Universal Scraping API.
162
+ """
163
+
164
+ HTML = "html"
165
+ PNG = "png"
166
+ PDF = "pdf"
167
+ MARKDOWN = "markdown"
168
+ TEXT = "text"
169
+
170
+
171
+ class DataFormat(str, Enum):
172
+ """
173
+ Data formats for task result download.
174
+ """
175
+
176
+ JSON = "json"
177
+ CSV = "csv"
178
+ XLSX = "xlsx"
179
+
180
+
181
+ # =============================================================================
182
+ # Task Status Enums
183
+ # =============================================================================
184
+
185
+
186
+ class TaskStatus(str, Enum):
187
+ """
188
+ Possible statuses for async scraping tasks.
189
+ """
190
+
191
+ PENDING = "pending"
192
+ RUNNING = "running"
193
+ READY = "ready"
194
+ SUCCESS = "success"
195
+ FINISHED = "finished"
196
+ FAILED = "failed"
197
+ ERROR = "error"
198
+ CANCELLED = "cancelled"
199
+ UNKNOWN = "unknown"
200
+
201
+ @classmethod
202
+ def is_terminal(cls, status: "TaskStatus") -> bool:
203
+ """Check if a status is terminal (no more updates expected)."""
204
+ return status in {
205
+ cls.READY,
206
+ cls.SUCCESS,
207
+ cls.FINISHED,
208
+ cls.FAILED,
209
+ cls.ERROR,
210
+ cls.CANCELLED,
211
+ }
212
+
213
+ @classmethod
214
+ def is_success(cls, status: "TaskStatus") -> bool:
215
+ """Check if a status indicates success."""
216
+ return status in {cls.READY, cls.SUCCESS, cls.FINISHED}
217
+
218
+ @classmethod
219
+ def is_failure(cls, status: "TaskStatus") -> bool:
220
+ """Check if a status indicates failure."""
221
+ return status in {cls.FAILED, cls.ERROR}
222
+
223
+
224
+ # =============================================================================
225
+ # Country Enum (常用国家)
226
+ # =============================================================================
227
+
228
+
229
+ class Country(str, Enum):
230
+ """
231
+ Common country codes for geo-targeting.
232
+ """
233
+
234
+ # North America
235
+ US = "us"
236
+ CA = "ca"
237
+ MX = "mx"
238
+
239
+ # Europe
240
+ GB = "gb"
241
+ DE = "de"
242
+ FR = "fr"
243
+ ES = "es"
244
+ IT = "it"
245
+ NL = "nl"
246
+ PL = "pl"
247
+ RU = "ru"
248
+ UA = "ua"
249
+ SE = "se"
250
+ NO = "no"
251
+ DK = "dk"
252
+ FI = "fi"
253
+ CH = "ch"
254
+ AT = "at"
255
+ BE = "be"
256
+ PT = "pt"
257
+ IE = "ie"
258
+ CZ = "cz"
259
+ GR = "gr"
260
+
261
+ # Asia Pacific
262
+ CN = "cn"
263
+ JP = "jp"
264
+ KR = "kr"
265
+ IN = "in"
266
+ AU = "au"
267
+ NZ = "nz"
268
+ SG = "sg"
269
+ HK = "hk"
270
+ TW = "tw"
271
+ TH = "th"
272
+ VN = "vn"
273
+ ID = "id"
274
+ MY = "my"
275
+ PH = "ph"
276
+ PK = "pk"
277
+ BD = "bd"
278
+
279
+ # South America
280
+ BR = "br"
281
+ AR = "ar"
282
+ CL = "cl"
283
+ CO = "co"
284
+ PE = "pe"
285
+ VE = "ve"
286
+
287
+ # Middle East & Africa
288
+ AE = "ae"
289
+ SA = "sa"
290
+ IL = "il"
291
+ TR = "tr"
292
+ ZA = "za"
293
+ EG = "eg"
294
+ NG = "ng"
295
+ KE = "ke"
296
+ MA = "ma"
297
+
298
+
299
+ # =============================================================================
300
+ # Helper Functions
301
+ # =============================================================================
302
+
303
+
304
+ def normalize_enum_value(value: object, enum_class: type) -> str:
305
+ """
306
+ Safely convert an enum or string to its string value.
307
+ """
308
+ if isinstance(value, enum_class):
309
+ # value is an enum member, get its .value
310
+ return str(getattr(value, "value", value)).lower()
311
+ if isinstance(value, str):
312
+ return value.lower()
313
+ raise TypeError(
314
+ f"Expected {enum_class.__name__} or str, got {type(value).__name__}"
315
+ )