thordata-sdk 0.2.4__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
thordata/demo.py ADDED
@@ -0,0 +1,138 @@
1
+ """
2
+ Unified demo entrypoint for the Thordata Python SDK.
3
+
4
+ This module runs the example scripts from the repository's `examples/` directory
5
+ using `runpy`, so it does not require `examples/` to be an importable package.
6
+
7
+ Usage:
8
+ python -m thordata.demo serp
9
+ python -m thordata.demo universal
10
+ python -m thordata.demo scraper
11
+ python -m thordata.demo concurrency
12
+
13
+ Notes:
14
+ - This entrypoint is primarily intended for repository usage (dev/demo).
15
+ - When installed from PyPI, the `examples/` directory is typically not included.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import runpy
21
+ import sys
22
+ from pathlib import Path
23
+
24
+
25
+ def _configure_stdio() -> None:
26
+ # Avoid UnicodeEncodeError on Windows consoles with legacy encodings.
27
+ if hasattr(sys.stdout, "reconfigure"):
28
+ sys.stdout.reconfigure(encoding="utf-8", errors="replace")
29
+ if hasattr(sys.stderr, "reconfigure"):
30
+ sys.stderr.reconfigure(encoding="utf-8", errors="replace")
31
+
32
+
33
+ def _load_env() -> None:
34
+ # Optional .env support for local development
35
+ try:
36
+ from dotenv import load_dotenv
37
+ except ImportError:
38
+ return
39
+ load_dotenv()
40
+
41
+
42
+ def _repo_root() -> Path:
43
+ """
44
+ Resolve repository root based on src layout:
45
+ <repo>/src/thordata/demo.py -> parents[2] == <repo>
46
+ """
47
+ return Path(__file__).resolve().parents[2]
48
+
49
+
50
+ def _examples_dir() -> Path:
51
+ return _repo_root() / "examples"
52
+
53
+
54
+ def _demo_map() -> dict[str, Path]:
55
+ ex = _examples_dir()
56
+ return {
57
+ "serp": ex / "demo_serp_api.py",
58
+ "universal": ex / "demo_universal.py",
59
+ "scraper": ex / "demo_web_scraper_api.py",
60
+ "concurrency": ex / "async_high_concurrency.py",
61
+ }
62
+
63
+
64
+ def _usage() -> str:
65
+ names = ", ".join(sorted(_demo_map().keys()))
66
+ return f"Usage: python -m thordata.demo [{names}]"
67
+
68
+
69
+ def _run_demo(path: Path) -> int:
70
+ if not path.exists():
71
+ print(f"Error: demo script not found: {path}")
72
+ return 2
73
+
74
+ # Ensure examples dir is on sys.path (helpful if demo imports local helpers).
75
+ examples_dir = str(path.parent.resolve())
76
+ if examples_dir not in sys.path:
77
+ sys.path.insert(0, examples_dir)
78
+
79
+ try:
80
+ # Load without triggering `if __name__ == "__main__": ...`
81
+ ns = runpy.run_path(str(path), run_name="__thordata_demo__")
82
+
83
+ main_func = ns.get("main")
84
+ if callable(main_func):
85
+ return int(main_func()) # type: ignore[arg-type]
86
+
87
+ # Fallback: run as __main__ for scripts without main()
88
+ runpy.run_path(str(path), run_name="__main__")
89
+ return 0
90
+
91
+ except KeyboardInterrupt:
92
+ raise
93
+ except SystemExit as e:
94
+ # In case fallback run as __main__ triggered SystemExit
95
+ code = e.code
96
+ if code is None:
97
+ return 0
98
+ if isinstance(code, int):
99
+ return code
100
+ return 1
101
+ except Exception as e:
102
+ import traceback
103
+
104
+ print()
105
+ print("-" * 60)
106
+ print("[thordata.demo] The demo script raised an exception.")
107
+ print(f"[thordata.demo] Script: {path.name}")
108
+ print(f"[thordata.demo] Error: {type(e).__name__}: {e}")
109
+ print()
110
+ print("Note: This is a failure within the demo script itself,")
111
+ print(" not an issue with the thordata.demo entrypoint.")
112
+ print("-" * 60)
113
+ traceback.print_exc()
114
+ return 1
115
+
116
+
117
+ def main() -> int:
118
+ _configure_stdio()
119
+ _load_env()
120
+
121
+ if len(sys.argv) < 2:
122
+ print(_usage())
123
+ return 2
124
+
125
+ name = sys.argv[1].strip().lower()
126
+ mapping = _demo_map()
127
+
128
+ path = mapping.get(name)
129
+ if path is None:
130
+ print(f"Unknown demo: {name}")
131
+ print(_usage())
132
+ return 2
133
+
134
+ return _run_demo(path)
135
+
136
+
137
+ if __name__ == "__main__":
138
+ raise SystemExit(main())
thordata/enums.py ADDED
@@ -0,0 +1,384 @@
1
+ """
2
+ Enumerations for the Thordata Python SDK.
3
+
4
+ This module provides type-safe enumerations for all Thordata API parameters,
5
+ making it easier to discover available options via IDE autocomplete.
6
+ """
7
+
8
+ from enum import Enum, IntEnum
9
+
10
+ # =============================================================================
11
+ # Continent Enum
12
+ # =============================================================================
13
+
14
+
15
+ class Continent(str, Enum):
16
+ """
17
+ Continent codes for geo-targeting.
18
+ """
19
+
20
+ AFRICA = "af"
21
+ ANTARCTICA = "an"
22
+ ASIA = "as"
23
+ EUROPE = "eu"
24
+ NORTH_AMERICA = "na"
25
+ OCEANIA = "oc"
26
+ SOUTH_AMERICA = "sa"
27
+
28
+
29
+ # =============================================================================
30
+ # Proxy Host Enum
31
+ # =============================================================================
32
+
33
+
34
+ class ProxyHost(str, Enum):
35
+ """
36
+ Available proxy gateway hosts.
37
+
38
+ Note: Dashboard provides user-specific hosts like {shard}.{region}.thordata.net
39
+ """
40
+
41
+ DEFAULT = "pr.thordata.net"
42
+ NORTH_AMERICA = "t.na.thordata.net"
43
+ EUROPE = "t.eu.thordata.net"
44
+
45
+
46
+ class ProxyPort(IntEnum):
47
+ """
48
+ Available proxy gateway ports.
49
+ """
50
+
51
+ RESIDENTIAL = 9999
52
+ MOBILE = 5555
53
+ DATACENTER = 7777
54
+ ISP = 6666
55
+
56
+
57
+ # =============================================================================
58
+ # Search Engine Enums
59
+ # =============================================================================
60
+
61
+
62
+ class Engine(str, Enum):
63
+ """
64
+ Supported search engines for SERP API.
65
+
66
+ Engine naming convention:
67
+ - Base search: {engine} for basic web search (google, bing, yandex, duckduckgo)
68
+ - Verticals: {engine}_{vertical} (e.g., google_news, bing_images)
69
+ - Sub-verticals: {engine}_{vertical}_{sub} (e.g., google_scholar_cite)
70
+ """
71
+
72
+ # ===================
73
+ # Google
74
+ # ===================
75
+ GOOGLE = "google"
76
+ GOOGLE_SEARCH = "google_search"
77
+ GOOGLE_AI_MODE = "google_ai_mode"
78
+ GOOGLE_WEB = "google_web"
79
+ GOOGLE_SHOPPING = "google_shopping"
80
+ GOOGLE_LOCAL = "google_local"
81
+ GOOGLE_VIDEOS = "google_videos"
82
+ GOOGLE_NEWS = "google_news"
83
+ GOOGLE_FLIGHTS = "google_flights"
84
+ GOOGLE_IMAGES = "google_images"
85
+ GOOGLE_LENS = "google_lens"
86
+ GOOGLE_TRENDS = "google_trends"
87
+ GOOGLE_HOTELS = "google_hotels"
88
+ GOOGLE_PLAY = "google_play"
89
+ GOOGLE_JOBS = "google_jobs"
90
+ GOOGLE_SCHOLAR = "google_scholar"
91
+ GOOGLE_SCHOLAR_CITE = "google_scholar_cite"
92
+ GOOGLE_SCHOLAR_AUTHOR = "google_scholar_author"
93
+ GOOGLE_MAPS = "google_maps"
94
+ GOOGLE_FINANCE = "google_finance"
95
+ GOOGLE_FINANCE_MARKETS = "google_finance_markets"
96
+ GOOGLE_PATENTS = "google_patents"
97
+ GOOGLE_PATENTS_DETAILS = "google_patents_details"
98
+
99
+ # ===================
100
+ # Bing
101
+ # ===================
102
+ BING = "bing"
103
+ BING_SEARCH = "bing_search"
104
+ BING_IMAGES = "bing_images"
105
+ BING_VIDEOS = "bing_videos"
106
+ BING_NEWS = "bing_news"
107
+ BING_MAPS = "bing_maps"
108
+ BING_SHOPPING = "bing_shopping"
109
+
110
+ # ===================
111
+ # Yandex
112
+ # ===================
113
+ YANDEX = "yandex"
114
+ YANDEX_SEARCH = "yandex_search"
115
+
116
+ # ===================
117
+ # DuckDuckGo
118
+ # ===================
119
+ DUCKDUCKGO = "duckduckgo"
120
+ DUCKDUCKGO_SEARCH = "duckduckgo_search"
121
+
122
+
123
+ class GoogleSearchType(str, Enum):
124
+ """
125
+ Search types specific to Google.
126
+
127
+ These map to the second part of Google engine names.
128
+ For example, GOOGLE + NEWS = google_news
129
+ """
130
+
131
+ SEARCH = "search"
132
+ AI_MODE = "ai_mode"
133
+ WEB = "web"
134
+ SHOPPING = "shopping"
135
+ LOCAL = "local"
136
+ VIDEOS = "videos"
137
+ NEWS = "news"
138
+ FLIGHTS = "flights"
139
+ IMAGES = "images"
140
+ LENS = "lens"
141
+ TRENDS = "trends"
142
+ HOTELS = "hotels"
143
+ PLAY = "play"
144
+ JOBS = "jobs"
145
+ SCHOLAR = "scholar"
146
+ MAPS = "maps"
147
+ FINANCE = "finance"
148
+ PATENTS = "patents"
149
+
150
+
151
+ class BingSearchType(str, Enum):
152
+ """
153
+ Search types specific to Bing.
154
+ """
155
+
156
+ SEARCH = "search"
157
+ IMAGES = "images"
158
+ VIDEOS = "videos"
159
+ NEWS = "news"
160
+ MAPS = "maps"
161
+ SHOPPING = "shopping"
162
+
163
+
164
+ class GoogleTbm(str, Enum):
165
+ """
166
+ Google tbm (to be matched) parameter values.
167
+
168
+ Only available when using specific Google engines that support tbm.
169
+ """
170
+
171
+ NEWS = "nws"
172
+ SHOPPING = "shop"
173
+ IMAGES = "isch"
174
+ VIDEOS = "vid"
175
+
176
+
177
+ class Device(str, Enum):
178
+ """
179
+ Device types for SERP API.
180
+ """
181
+
182
+ DESKTOP = "desktop"
183
+ MOBILE = "mobile"
184
+ TABLET = "tablet"
185
+
186
+
187
+ class TimeRange(str, Enum):
188
+ """
189
+ Time range filters for search results.
190
+ """
191
+
192
+ HOUR = "hour"
193
+ DAY = "day"
194
+ WEEK = "week"
195
+ MONTH = "month"
196
+ YEAR = "year"
197
+
198
+
199
+ # =============================================================================
200
+ # Proxy Enums
201
+ # =============================================================================
202
+
203
+
204
+ class ProxyType(IntEnum):
205
+ """
206
+ Types of proxy networks available.
207
+ """
208
+
209
+ RESIDENTIAL = 1
210
+ UNLIMITED = 2
211
+ DATACENTER = 3
212
+ ISP = 4
213
+ MOBILE = 5
214
+
215
+
216
+ class SessionType(str, Enum):
217
+ """
218
+ Proxy session types for connection persistence.
219
+ """
220
+
221
+ ROTATING = "rotating"
222
+ STICKY = "sticky"
223
+
224
+
225
+ # =============================================================================
226
+ # Output Format Enums
227
+ # =============================================================================
228
+
229
+
230
+ class OutputFormat(str, Enum):
231
+ """
232
+ Output formats for Universal Scraping API.
233
+
234
+ Currently supported: html, png
235
+ """
236
+
237
+ HTML = "html"
238
+ PNG = "png"
239
+
240
+
241
+ class DataFormat(str, Enum):
242
+ """
243
+ Data formats for task result download.
244
+ """
245
+
246
+ JSON = "json"
247
+ CSV = "csv"
248
+ XLSX = "xlsx"
249
+
250
+
251
+ # =============================================================================
252
+ # Task Status Enums
253
+ # =============================================================================
254
+
255
+
256
+ class TaskStatus(str, Enum):
257
+ """
258
+ Possible statuses for async scraping tasks.
259
+ """
260
+
261
+ PENDING = "pending"
262
+ RUNNING = "running"
263
+ READY = "ready"
264
+ SUCCESS = "success"
265
+ FINISHED = "finished"
266
+ FAILED = "failed"
267
+ ERROR = "error"
268
+ CANCELLED = "cancelled"
269
+ UNKNOWN = "unknown"
270
+
271
+ @classmethod
272
+ def is_terminal(cls, status: "TaskStatus") -> bool:
273
+ """Check if a status is terminal (no more updates expected)."""
274
+ return status in {
275
+ cls.READY,
276
+ cls.SUCCESS,
277
+ cls.FINISHED,
278
+ cls.FAILED,
279
+ cls.ERROR,
280
+ cls.CANCELLED,
281
+ }
282
+
283
+ @classmethod
284
+ def is_success(cls, status: "TaskStatus") -> bool:
285
+ """Check if a status indicates success."""
286
+ return status in {cls.READY, cls.SUCCESS, cls.FINISHED}
287
+
288
+ @classmethod
289
+ def is_failure(cls, status: "TaskStatus") -> bool:
290
+ """Check if a status indicates failure."""
291
+ return status in {cls.FAILED, cls.ERROR}
292
+
293
+
294
+ # =============================================================================
295
+ # Country Enum (Common Countries)
296
+ # =============================================================================
297
+
298
+
299
+ class Country(str, Enum):
300
+ """
301
+ Common country codes for geo-targeting.
302
+ """
303
+
304
+ # North America
305
+ US = "us"
306
+ CA = "ca"
307
+ MX = "mx"
308
+
309
+ # Europe
310
+ GB = "gb"
311
+ DE = "de"
312
+ FR = "fr"
313
+ ES = "es"
314
+ IT = "it"
315
+ NL = "nl"
316
+ PL = "pl"
317
+ RU = "ru"
318
+ UA = "ua"
319
+ SE = "se"
320
+ NO = "no"
321
+ DK = "dk"
322
+ FI = "fi"
323
+ CH = "ch"
324
+ AT = "at"
325
+ BE = "be"
326
+ PT = "pt"
327
+ IE = "ie"
328
+ CZ = "cz"
329
+ GR = "gr"
330
+
331
+ # Asia Pacific
332
+ CN = "cn"
333
+ JP = "jp"
334
+ KR = "kr"
335
+ IN = "in"
336
+ AU = "au"
337
+ NZ = "nz"
338
+ SG = "sg"
339
+ HK = "hk"
340
+ TW = "tw"
341
+ TH = "th"
342
+ VN = "vn"
343
+ ID = "id"
344
+ MY = "my"
345
+ PH = "ph"
346
+ PK = "pk"
347
+ BD = "bd"
348
+
349
+ # South America
350
+ BR = "br"
351
+ AR = "ar"
352
+ CL = "cl"
353
+ CO = "co"
354
+ PE = "pe"
355
+ VE = "ve"
356
+
357
+ # Middle East & Africa
358
+ AE = "ae"
359
+ SA = "sa"
360
+ IL = "il"
361
+ TR = "tr"
362
+ ZA = "za"
363
+ EG = "eg"
364
+ NG = "ng"
365
+ KE = "ke"
366
+ MA = "ma"
367
+
368
+
369
+ # =============================================================================
370
+ # Helper Functions
371
+ # =============================================================================
372
+
373
+
374
+ def normalize_enum_value(value: object, enum_class: type) -> str:
375
+ """
376
+ Safely convert an enum or string to its string value.
377
+ """
378
+ if isinstance(value, enum_class):
379
+ return str(getattr(value, "value", value)).lower()
380
+ if isinstance(value, str):
381
+ return value.lower()
382
+ raise TypeError(
383
+ f"Expected {enum_class.__name__} or str, got {type(value).__name__}"
384
+ )