thordata-sdk 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thordata/__init__.py +4 -40
- thordata/async_client.py +492 -1790
- thordata/client.py +432 -1315
- thordata/core/__init__.py +23 -0
- thordata/core/async_http_client.py +91 -0
- thordata/core/http_client.py +79 -0
- thordata/core/tunnel.py +287 -0
- thordata/enums.py +41 -380
- thordata/models.py +37 -1193
- thordata/tools/__init__.py +28 -0
- thordata/tools/base.py +42 -0
- thordata/tools/code.py +26 -0
- thordata/tools/ecommerce.py +67 -0
- thordata/tools/search.py +73 -0
- thordata/tools/social.py +190 -0
- thordata/tools/video.py +81 -0
- thordata/types/__init__.py +77 -0
- thordata/types/common.py +141 -0
- thordata/types/proxy.py +340 -0
- thordata/types/serp.py +224 -0
- thordata/types/task.py +144 -0
- thordata/types/universal.py +66 -0
- thordata/unlimited.py +67 -0
- {thordata_sdk-1.4.0.dist-info → thordata_sdk-1.5.0.dist-info}/METADATA +73 -50
- thordata_sdk-1.5.0.dist-info/RECORD +35 -0
- {thordata_sdk-1.4.0.dist-info → thordata_sdk-1.5.0.dist-info}/WHEEL +1 -1
- thordata_sdk-1.4.0.dist-info/RECORD +0 -18
- {thordata_sdk-1.4.0.dist-info → thordata_sdk-1.5.0.dist-info}/licenses/LICENSE +0 -0
- {thordata_sdk-1.4.0.dist-info → thordata_sdk-1.5.0.dist-info}/top_level.txt +0 -0
thordata/enums.py
CHANGED
|
@@ -1,384 +1,45 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Enumerations for the Thordata Python SDK.
|
|
3
|
-
|
|
4
|
-
This
|
|
5
|
-
making it easier to discover available options via IDE autocomplete.
|
|
3
|
+
Moved to thordata.types in v1.5.0.
|
|
4
|
+
This file is kept for backward compatibility.
|
|
6
5
|
"""
|
|
7
6
|
|
|
8
|
-
from
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
""
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
""
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
"""
|
|
48
|
-
Available proxy gateway ports.
|
|
49
|
-
"""
|
|
50
|
-
|
|
51
|
-
RESIDENTIAL = 9999
|
|
52
|
-
MOBILE = 5555
|
|
53
|
-
DATACENTER = 7777
|
|
54
|
-
ISP = 6666
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
# =============================================================================
|
|
58
|
-
# Search Engine Enums
|
|
59
|
-
# =============================================================================
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
class Engine(str, Enum):
|
|
63
|
-
"""
|
|
64
|
-
Supported search engines for SERP API.
|
|
65
|
-
|
|
66
|
-
Engine naming convention:
|
|
67
|
-
- Base search: {engine} for basic web search (google, bing, yandex, duckduckgo)
|
|
68
|
-
- Verticals: {engine}_{vertical} (e.g., google_news, bing_images)
|
|
69
|
-
- Sub-verticals: {engine}_{vertical}_{sub} (e.g., google_scholar_cite)
|
|
70
|
-
"""
|
|
71
|
-
|
|
72
|
-
# ===================
|
|
73
|
-
# Google
|
|
74
|
-
# ===================
|
|
75
|
-
GOOGLE = "google"
|
|
76
|
-
GOOGLE_SEARCH = "google_search"
|
|
77
|
-
GOOGLE_AI_MODE = "google_ai_mode"
|
|
78
|
-
GOOGLE_WEB = "google_web"
|
|
79
|
-
GOOGLE_SHOPPING = "google_shopping"
|
|
80
|
-
GOOGLE_LOCAL = "google_local"
|
|
81
|
-
GOOGLE_VIDEOS = "google_videos"
|
|
82
|
-
GOOGLE_NEWS = "google_news"
|
|
83
|
-
GOOGLE_FLIGHTS = "google_flights"
|
|
84
|
-
GOOGLE_IMAGES = "google_images"
|
|
85
|
-
GOOGLE_LENS = "google_lens"
|
|
86
|
-
GOOGLE_TRENDS = "google_trends"
|
|
87
|
-
GOOGLE_HOTELS = "google_hotels"
|
|
88
|
-
GOOGLE_PLAY = "google_play"
|
|
89
|
-
GOOGLE_JOBS = "google_jobs"
|
|
90
|
-
GOOGLE_SCHOLAR = "google_scholar"
|
|
91
|
-
GOOGLE_SCHOLAR_CITE = "google_scholar_cite"
|
|
92
|
-
GOOGLE_SCHOLAR_AUTHOR = "google_scholar_author"
|
|
93
|
-
GOOGLE_MAPS = "google_maps"
|
|
94
|
-
GOOGLE_FINANCE = "google_finance"
|
|
95
|
-
GOOGLE_FINANCE_MARKETS = "google_finance_markets"
|
|
96
|
-
GOOGLE_PATENTS = "google_patents"
|
|
97
|
-
GOOGLE_PATENTS_DETAILS = "google_patents_details"
|
|
98
|
-
|
|
99
|
-
# ===================
|
|
100
|
-
# Bing
|
|
101
|
-
# ===================
|
|
102
|
-
BING = "bing"
|
|
103
|
-
BING_SEARCH = "bing_search"
|
|
104
|
-
BING_IMAGES = "bing_images"
|
|
105
|
-
BING_VIDEOS = "bing_videos"
|
|
106
|
-
BING_NEWS = "bing_news"
|
|
107
|
-
BING_MAPS = "bing_maps"
|
|
108
|
-
BING_SHOPPING = "bing_shopping"
|
|
109
|
-
|
|
110
|
-
# ===================
|
|
111
|
-
# Yandex
|
|
112
|
-
# ===================
|
|
113
|
-
YANDEX = "yandex"
|
|
114
|
-
YANDEX_SEARCH = "yandex_search"
|
|
115
|
-
|
|
116
|
-
# ===================
|
|
117
|
-
# DuckDuckGo
|
|
118
|
-
# ===================
|
|
119
|
-
DUCKDUCKGO = "duckduckgo"
|
|
120
|
-
DUCKDUCKGO_SEARCH = "duckduckgo_search"
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
class GoogleSearchType(str, Enum):
|
|
124
|
-
"""
|
|
125
|
-
Search types specific to Google.
|
|
126
|
-
|
|
127
|
-
These map to the second part of Google engine names.
|
|
128
|
-
For example, GOOGLE + NEWS = google_news
|
|
129
|
-
"""
|
|
130
|
-
|
|
131
|
-
SEARCH = "search"
|
|
132
|
-
AI_MODE = "ai_mode"
|
|
133
|
-
WEB = "web"
|
|
134
|
-
SHOPPING = "shopping"
|
|
135
|
-
LOCAL = "local"
|
|
136
|
-
VIDEOS = "videos"
|
|
137
|
-
NEWS = "news"
|
|
138
|
-
FLIGHTS = "flights"
|
|
139
|
-
IMAGES = "images"
|
|
140
|
-
LENS = "lens"
|
|
141
|
-
TRENDS = "trends"
|
|
142
|
-
HOTELS = "hotels"
|
|
143
|
-
PLAY = "play"
|
|
144
|
-
JOBS = "jobs"
|
|
145
|
-
SCHOLAR = "scholar"
|
|
146
|
-
MAPS = "maps"
|
|
147
|
-
FINANCE = "finance"
|
|
148
|
-
PATENTS = "patents"
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
class BingSearchType(str, Enum):
|
|
152
|
-
"""
|
|
153
|
-
Search types specific to Bing.
|
|
154
|
-
"""
|
|
155
|
-
|
|
156
|
-
SEARCH = "search"
|
|
157
|
-
IMAGES = "images"
|
|
158
|
-
VIDEOS = "videos"
|
|
159
|
-
NEWS = "news"
|
|
160
|
-
MAPS = "maps"
|
|
161
|
-
SHOPPING = "shopping"
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
class GoogleTbm(str, Enum):
|
|
165
|
-
"""
|
|
166
|
-
Google tbm (to be matched) parameter values.
|
|
167
|
-
|
|
168
|
-
Only available when using specific Google engines that support tbm.
|
|
169
|
-
"""
|
|
170
|
-
|
|
171
|
-
NEWS = "nws"
|
|
172
|
-
SHOPPING = "shop"
|
|
173
|
-
IMAGES = "isch"
|
|
174
|
-
VIDEOS = "vid"
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
class Device(str, Enum):
|
|
178
|
-
"""
|
|
179
|
-
Device types for SERP API.
|
|
180
|
-
"""
|
|
181
|
-
|
|
182
|
-
DESKTOP = "desktop"
|
|
183
|
-
MOBILE = "mobile"
|
|
184
|
-
TABLET = "tablet"
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
class TimeRange(str, Enum):
|
|
188
|
-
"""
|
|
189
|
-
Time range filters for search results.
|
|
190
|
-
"""
|
|
191
|
-
|
|
192
|
-
HOUR = "hour"
|
|
193
|
-
DAY = "day"
|
|
194
|
-
WEEK = "week"
|
|
195
|
-
MONTH = "month"
|
|
196
|
-
YEAR = "year"
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
# =============================================================================
|
|
200
|
-
# Proxy Enums
|
|
201
|
-
# =============================================================================
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
class ProxyType(IntEnum):
|
|
205
|
-
"""
|
|
206
|
-
Types of proxy networks available.
|
|
207
|
-
"""
|
|
208
|
-
|
|
209
|
-
RESIDENTIAL = 1
|
|
210
|
-
UNLIMITED = 2
|
|
211
|
-
DATACENTER = 3
|
|
212
|
-
ISP = 4
|
|
213
|
-
MOBILE = 5
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
class SessionType(str, Enum):
|
|
217
|
-
"""
|
|
218
|
-
Proxy session types for connection persistence.
|
|
219
|
-
"""
|
|
220
|
-
|
|
221
|
-
ROTATING = "rotating"
|
|
222
|
-
STICKY = "sticky"
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
# =============================================================================
|
|
226
|
-
# Output Format Enums
|
|
227
|
-
# =============================================================================
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
class OutputFormat(str, Enum):
|
|
231
|
-
"""
|
|
232
|
-
Output formats for Universal Scraping API.
|
|
233
|
-
|
|
234
|
-
Currently supported: html, png
|
|
235
|
-
"""
|
|
236
|
-
|
|
237
|
-
HTML = "html"
|
|
238
|
-
PNG = "png"
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
class DataFormat(str, Enum):
|
|
242
|
-
"""
|
|
243
|
-
Data formats for task result download.
|
|
244
|
-
"""
|
|
245
|
-
|
|
246
|
-
JSON = "json"
|
|
247
|
-
CSV = "csv"
|
|
248
|
-
XLSX = "xlsx"
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
# =============================================================================
|
|
252
|
-
# Task Status Enums
|
|
253
|
-
# =============================================================================
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
class TaskStatus(str, Enum):
|
|
257
|
-
"""
|
|
258
|
-
Possible statuses for async scraping tasks.
|
|
259
|
-
"""
|
|
260
|
-
|
|
261
|
-
PENDING = "pending"
|
|
262
|
-
RUNNING = "running"
|
|
263
|
-
READY = "ready"
|
|
264
|
-
SUCCESS = "success"
|
|
265
|
-
FINISHED = "finished"
|
|
266
|
-
FAILED = "failed"
|
|
267
|
-
ERROR = "error"
|
|
268
|
-
CANCELLED = "cancelled"
|
|
269
|
-
UNKNOWN = "unknown"
|
|
270
|
-
|
|
271
|
-
@classmethod
|
|
272
|
-
def is_terminal(cls, status: "TaskStatus") -> bool:
|
|
273
|
-
"""Check if a status is terminal (no more updates expected)."""
|
|
274
|
-
return status in {
|
|
275
|
-
cls.READY,
|
|
276
|
-
cls.SUCCESS,
|
|
277
|
-
cls.FINISHED,
|
|
278
|
-
cls.FAILED,
|
|
279
|
-
cls.ERROR,
|
|
280
|
-
cls.CANCELLED,
|
|
281
|
-
}
|
|
282
|
-
|
|
283
|
-
@classmethod
|
|
284
|
-
def is_success(cls, status: "TaskStatus") -> bool:
|
|
285
|
-
"""Check if a status indicates success."""
|
|
286
|
-
return status in {cls.READY, cls.SUCCESS, cls.FINISHED}
|
|
287
|
-
|
|
288
|
-
@classmethod
|
|
289
|
-
def is_failure(cls, status: "TaskStatus") -> bool:
|
|
290
|
-
"""Check if a status indicates failure."""
|
|
291
|
-
return status in {cls.FAILED, cls.ERROR}
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
# =============================================================================
|
|
295
|
-
# Country Enum (Common Countries)
|
|
296
|
-
# =============================================================================
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
class Country(str, Enum):
|
|
300
|
-
"""
|
|
301
|
-
Common country codes for geo-targeting.
|
|
302
|
-
"""
|
|
303
|
-
|
|
304
|
-
# North America
|
|
305
|
-
US = "us"
|
|
306
|
-
CA = "ca"
|
|
307
|
-
MX = "mx"
|
|
308
|
-
|
|
309
|
-
# Europe
|
|
310
|
-
GB = "gb"
|
|
311
|
-
DE = "de"
|
|
312
|
-
FR = "fr"
|
|
313
|
-
ES = "es"
|
|
314
|
-
IT = "it"
|
|
315
|
-
NL = "nl"
|
|
316
|
-
PL = "pl"
|
|
317
|
-
RU = "ru"
|
|
318
|
-
UA = "ua"
|
|
319
|
-
SE = "se"
|
|
320
|
-
NO = "no"
|
|
321
|
-
DK = "dk"
|
|
322
|
-
FI = "fi"
|
|
323
|
-
CH = "ch"
|
|
324
|
-
AT = "at"
|
|
325
|
-
BE = "be"
|
|
326
|
-
PT = "pt"
|
|
327
|
-
IE = "ie"
|
|
328
|
-
CZ = "cz"
|
|
329
|
-
GR = "gr"
|
|
330
|
-
|
|
331
|
-
# Asia Pacific
|
|
332
|
-
CN = "cn"
|
|
333
|
-
JP = "jp"
|
|
334
|
-
KR = "kr"
|
|
335
|
-
IN = "in"
|
|
336
|
-
AU = "au"
|
|
337
|
-
NZ = "nz"
|
|
338
|
-
SG = "sg"
|
|
339
|
-
HK = "hk"
|
|
340
|
-
TW = "tw"
|
|
341
|
-
TH = "th"
|
|
342
|
-
VN = "vn"
|
|
343
|
-
ID = "id"
|
|
344
|
-
MY = "my"
|
|
345
|
-
PH = "ph"
|
|
346
|
-
PK = "pk"
|
|
347
|
-
BD = "bd"
|
|
348
|
-
|
|
349
|
-
# South America
|
|
350
|
-
BR = "br"
|
|
351
|
-
AR = "ar"
|
|
352
|
-
CL = "cl"
|
|
353
|
-
CO = "co"
|
|
354
|
-
PE = "pe"
|
|
355
|
-
VE = "ve"
|
|
356
|
-
|
|
357
|
-
# Middle East & Africa
|
|
358
|
-
AE = "ae"
|
|
359
|
-
SA = "sa"
|
|
360
|
-
IL = "il"
|
|
361
|
-
TR = "tr"
|
|
362
|
-
ZA = "za"
|
|
363
|
-
EG = "eg"
|
|
364
|
-
NG = "ng"
|
|
365
|
-
KE = "ke"
|
|
366
|
-
MA = "ma"
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
# =============================================================================
|
|
370
|
-
# Helper Functions
|
|
371
|
-
# =============================================================================
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
def normalize_enum_value(value: object, enum_class: type) -> str:
|
|
375
|
-
"""
|
|
376
|
-
Safely convert an enum or string to its string value.
|
|
377
|
-
"""
|
|
378
|
-
if isinstance(value, enum_class):
|
|
379
|
-
return str(getattr(value, "value", value)).lower()
|
|
380
|
-
if isinstance(value, str):
|
|
381
|
-
return value.lower()
|
|
382
|
-
raise TypeError(
|
|
383
|
-
f"Expected {enum_class.__name__} or str, got {type(value).__name__}"
|
|
384
|
-
)
|
|
7
|
+
from .types import (
|
|
8
|
+
BingSearchType,
|
|
9
|
+
Continent,
|
|
10
|
+
Country,
|
|
11
|
+
DataFormat,
|
|
12
|
+
Device,
|
|
13
|
+
Engine,
|
|
14
|
+
GoogleSearchType,
|
|
15
|
+
GoogleTbm,
|
|
16
|
+
OutputFormat,
|
|
17
|
+
ProxyHost,
|
|
18
|
+
ProxyPort,
|
|
19
|
+
ProxyProduct,
|
|
20
|
+
ProxyType,
|
|
21
|
+
SessionType,
|
|
22
|
+
TaskStatus,
|
|
23
|
+
TimeRange,
|
|
24
|
+
normalize_enum_value, # 新增
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
"Continent",
|
|
29
|
+
"ProxyHost",
|
|
30
|
+
"ProxyPort",
|
|
31
|
+
"Engine",
|
|
32
|
+
"GoogleSearchType",
|
|
33
|
+
"BingSearchType",
|
|
34
|
+
"GoogleTbm",
|
|
35
|
+
"Device",
|
|
36
|
+
"TimeRange",
|
|
37
|
+
"ProxyType",
|
|
38
|
+
"SessionType",
|
|
39
|
+
"OutputFormat",
|
|
40
|
+
"DataFormat",
|
|
41
|
+
"TaskStatus",
|
|
42
|
+
"Country",
|
|
43
|
+
"ProxyProduct",
|
|
44
|
+
"normalize_enum_value",
|
|
45
|
+
]
|