thordata-sdk 1.4.0__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
thordata/enums.py CHANGED
@@ -1,384 +1,45 @@
1
1
  """
2
2
  Enumerations for the Thordata Python SDK.
3
-
4
- This module provides type-safe enumerations for all Thordata API parameters,
5
- making it easier to discover available options via IDE autocomplete.
3
+ Moved to thordata.types in v1.6.0.
4
+ This file is kept for backward compatibility.
6
5
  """
7
6
 
8
- from enum import Enum, IntEnum
9
-
10
- # =============================================================================
11
- # Continent Enum
12
- # =============================================================================
13
-
14
-
15
- class Continent(str, Enum):
16
- """
17
- Continent codes for geo-targeting.
18
- """
19
-
20
- AFRICA = "af"
21
- ANTARCTICA = "an"
22
- ASIA = "as"
23
- EUROPE = "eu"
24
- NORTH_AMERICA = "na"
25
- OCEANIA = "oc"
26
- SOUTH_AMERICA = "sa"
27
-
28
-
29
- # =============================================================================
30
- # Proxy Host Enum
31
- # =============================================================================
32
-
33
-
34
- class ProxyHost(str, Enum):
35
- """
36
- Available proxy gateway hosts.
37
-
38
- Note: Dashboard provides user-specific hosts like {shard}.{region}.thordata.net
39
- """
40
-
41
- DEFAULT = "pr.thordata.net"
42
- NORTH_AMERICA = "t.na.thordata.net"
43
- EUROPE = "t.eu.thordata.net"
44
-
45
-
46
- class ProxyPort(IntEnum):
47
- """
48
- Available proxy gateway ports.
49
- """
50
-
51
- RESIDENTIAL = 9999
52
- MOBILE = 5555
53
- DATACENTER = 7777
54
- ISP = 6666
55
-
56
-
57
- # =============================================================================
58
- # Search Engine Enums
59
- # =============================================================================
60
-
61
-
62
- class Engine(str, Enum):
63
- """
64
- Supported search engines for SERP API.
65
-
66
- Engine naming convention:
67
- - Base search: {engine} for basic web search (google, bing, yandex, duckduckgo)
68
- - Verticals: {engine}_{vertical} (e.g., google_news, bing_images)
69
- - Sub-verticals: {engine}_{vertical}_{sub} (e.g., google_scholar_cite)
70
- """
71
-
72
- # ===================
73
- # Google
74
- # ===================
75
- GOOGLE = "google"
76
- GOOGLE_SEARCH = "google_search"
77
- GOOGLE_AI_MODE = "google_ai_mode"
78
- GOOGLE_WEB = "google_web"
79
- GOOGLE_SHOPPING = "google_shopping"
80
- GOOGLE_LOCAL = "google_local"
81
- GOOGLE_VIDEOS = "google_videos"
82
- GOOGLE_NEWS = "google_news"
83
- GOOGLE_FLIGHTS = "google_flights"
84
- GOOGLE_IMAGES = "google_images"
85
- GOOGLE_LENS = "google_lens"
86
- GOOGLE_TRENDS = "google_trends"
87
- GOOGLE_HOTELS = "google_hotels"
88
- GOOGLE_PLAY = "google_play"
89
- GOOGLE_JOBS = "google_jobs"
90
- GOOGLE_SCHOLAR = "google_scholar"
91
- GOOGLE_SCHOLAR_CITE = "google_scholar_cite"
92
- GOOGLE_SCHOLAR_AUTHOR = "google_scholar_author"
93
- GOOGLE_MAPS = "google_maps"
94
- GOOGLE_FINANCE = "google_finance"
95
- GOOGLE_FINANCE_MARKETS = "google_finance_markets"
96
- GOOGLE_PATENTS = "google_patents"
97
- GOOGLE_PATENTS_DETAILS = "google_patents_details"
98
-
99
- # ===================
100
- # Bing
101
- # ===================
102
- BING = "bing"
103
- BING_SEARCH = "bing_search"
104
- BING_IMAGES = "bing_images"
105
- BING_VIDEOS = "bing_videos"
106
- BING_NEWS = "bing_news"
107
- BING_MAPS = "bing_maps"
108
- BING_SHOPPING = "bing_shopping"
109
-
110
- # ===================
111
- # Yandex
112
- # ===================
113
- YANDEX = "yandex"
114
- YANDEX_SEARCH = "yandex_search"
115
-
116
- # ===================
117
- # DuckDuckGo
118
- # ===================
119
- DUCKDUCKGO = "duckduckgo"
120
- DUCKDUCKGO_SEARCH = "duckduckgo_search"
121
-
122
-
123
- class GoogleSearchType(str, Enum):
124
- """
125
- Search types specific to Google.
126
-
127
- These map to the second part of Google engine names.
128
- For example, GOOGLE + NEWS = google_news
129
- """
130
-
131
- SEARCH = "search"
132
- AI_MODE = "ai_mode"
133
- WEB = "web"
134
- SHOPPING = "shopping"
135
- LOCAL = "local"
136
- VIDEOS = "videos"
137
- NEWS = "news"
138
- FLIGHTS = "flights"
139
- IMAGES = "images"
140
- LENS = "lens"
141
- TRENDS = "trends"
142
- HOTELS = "hotels"
143
- PLAY = "play"
144
- JOBS = "jobs"
145
- SCHOLAR = "scholar"
146
- MAPS = "maps"
147
- FINANCE = "finance"
148
- PATENTS = "patents"
149
-
150
-
151
- class BingSearchType(str, Enum):
152
- """
153
- Search types specific to Bing.
154
- """
155
-
156
- SEARCH = "search"
157
- IMAGES = "images"
158
- VIDEOS = "videos"
159
- NEWS = "news"
160
- MAPS = "maps"
161
- SHOPPING = "shopping"
162
-
163
-
164
- class GoogleTbm(str, Enum):
165
- """
166
- Google tbm (to be matched) parameter values.
167
-
168
- Only available when using specific Google engines that support tbm.
169
- """
170
-
171
- NEWS = "nws"
172
- SHOPPING = "shop"
173
- IMAGES = "isch"
174
- VIDEOS = "vid"
175
-
176
-
177
- class Device(str, Enum):
178
- """
179
- Device types for SERP API.
180
- """
181
-
182
- DESKTOP = "desktop"
183
- MOBILE = "mobile"
184
- TABLET = "tablet"
185
-
186
-
187
- class TimeRange(str, Enum):
188
- """
189
- Time range filters for search results.
190
- """
191
-
192
- HOUR = "hour"
193
- DAY = "day"
194
- WEEK = "week"
195
- MONTH = "month"
196
- YEAR = "year"
197
-
198
-
199
- # =============================================================================
200
- # Proxy Enums
201
- # =============================================================================
202
-
203
-
204
- class ProxyType(IntEnum):
205
- """
206
- Types of proxy networks available.
207
- """
208
-
209
- RESIDENTIAL = 1
210
- UNLIMITED = 2
211
- DATACENTER = 3
212
- ISP = 4
213
- MOBILE = 5
214
-
215
-
216
- class SessionType(str, Enum):
217
- """
218
- Proxy session types for connection persistence.
219
- """
220
-
221
- ROTATING = "rotating"
222
- STICKY = "sticky"
223
-
224
-
225
- # =============================================================================
226
- # Output Format Enums
227
- # =============================================================================
228
-
229
-
230
- class OutputFormat(str, Enum):
231
- """
232
- Output formats for Universal Scraping API.
233
-
234
- Currently supported: html, png
235
- """
236
-
237
- HTML = "html"
238
- PNG = "png"
239
-
240
-
241
- class DataFormat(str, Enum):
242
- """
243
- Data formats for task result download.
244
- """
245
-
246
- JSON = "json"
247
- CSV = "csv"
248
- XLSX = "xlsx"
249
-
250
-
251
- # =============================================================================
252
- # Task Status Enums
253
- # =============================================================================
254
-
255
-
256
- class TaskStatus(str, Enum):
257
- """
258
- Possible statuses for async scraping tasks.
259
- """
260
-
261
- PENDING = "pending"
262
- RUNNING = "running"
263
- READY = "ready"
264
- SUCCESS = "success"
265
- FINISHED = "finished"
266
- FAILED = "failed"
267
- ERROR = "error"
268
- CANCELLED = "cancelled"
269
- UNKNOWN = "unknown"
270
-
271
- @classmethod
272
- def is_terminal(cls, status: "TaskStatus") -> bool:
273
- """Check if a status is terminal (no more updates expected)."""
274
- return status in {
275
- cls.READY,
276
- cls.SUCCESS,
277
- cls.FINISHED,
278
- cls.FAILED,
279
- cls.ERROR,
280
- cls.CANCELLED,
281
- }
282
-
283
- @classmethod
284
- def is_success(cls, status: "TaskStatus") -> bool:
285
- """Check if a status indicates success."""
286
- return status in {cls.READY, cls.SUCCESS, cls.FINISHED}
287
-
288
- @classmethod
289
- def is_failure(cls, status: "TaskStatus") -> bool:
290
- """Check if a status indicates failure."""
291
- return status in {cls.FAILED, cls.ERROR}
292
-
293
-
294
- # =============================================================================
295
- # Country Enum (Common Countries)
296
- # =============================================================================
297
-
298
-
299
- class Country(str, Enum):
300
- """
301
- Common country codes for geo-targeting.
302
- """
303
-
304
- # North America
305
- US = "us"
306
- CA = "ca"
307
- MX = "mx"
308
-
309
- # Europe
310
- GB = "gb"
311
- DE = "de"
312
- FR = "fr"
313
- ES = "es"
314
- IT = "it"
315
- NL = "nl"
316
- PL = "pl"
317
- RU = "ru"
318
- UA = "ua"
319
- SE = "se"
320
- NO = "no"
321
- DK = "dk"
322
- FI = "fi"
323
- CH = "ch"
324
- AT = "at"
325
- BE = "be"
326
- PT = "pt"
327
- IE = "ie"
328
- CZ = "cz"
329
- GR = "gr"
330
-
331
- # Asia Pacific
332
- CN = "cn"
333
- JP = "jp"
334
- KR = "kr"
335
- IN = "in"
336
- AU = "au"
337
- NZ = "nz"
338
- SG = "sg"
339
- HK = "hk"
340
- TW = "tw"
341
- TH = "th"
342
- VN = "vn"
343
- ID = "id"
344
- MY = "my"
345
- PH = "ph"
346
- PK = "pk"
347
- BD = "bd"
348
-
349
- # South America
350
- BR = "br"
351
- AR = "ar"
352
- CL = "cl"
353
- CO = "co"
354
- PE = "pe"
355
- VE = "ve"
356
-
357
- # Middle East & Africa
358
- AE = "ae"
359
- SA = "sa"
360
- IL = "il"
361
- TR = "tr"
362
- ZA = "za"
363
- EG = "eg"
364
- NG = "ng"
365
- KE = "ke"
366
- MA = "ma"
367
-
368
-
369
- # =============================================================================
370
- # Helper Functions
371
- # =============================================================================
372
-
373
-
374
- def normalize_enum_value(value: object, enum_class: type) -> str:
375
- """
376
- Safely convert an enum or string to its string value.
377
- """
378
- if isinstance(value, enum_class):
379
- return str(getattr(value, "value", value)).lower()
380
- if isinstance(value, str):
381
- return value.lower()
382
- raise TypeError(
383
- f"Expected {enum_class.__name__} or str, got {type(value).__name__}"
384
- )
7
+ from .types import (
8
+ BingSearchType,
9
+ Continent,
10
+ Country,
11
+ DataFormat,
12
+ Device,
13
+ Engine,
14
+ GoogleSearchType,
15
+ GoogleTbm,
16
+ OutputFormat,
17
+ ProxyHost,
18
+ ProxyPort,
19
+ ProxyProduct,
20
+ ProxyType,
21
+ SessionType,
22
+ TaskStatus,
23
+ TimeRange,
24
+ normalize_enum_value,
25
+ )
26
+
27
+ __all__ = [
28
+ "Continent",
29
+ "ProxyHost",
30
+ "ProxyPort",
31
+ "Engine",
32
+ "GoogleSearchType",
33
+ "BingSearchType",
34
+ "GoogleTbm",
35
+ "Device",
36
+ "TimeRange",
37
+ "ProxyType",
38
+ "SessionType",
39
+ "OutputFormat",
40
+ "DataFormat",
41
+ "TaskStatus",
42
+ "Country",
43
+ "ProxyProduct",
44
+ "normalize_enum_value",
45
+ ]
thordata/exceptions.py CHANGED
@@ -15,6 +15,7 @@ Exception Hierarchy:
15
15
 
16
16
  from __future__ import annotations
17
17
 
18
+ from collections.abc import Mapping
18
19
  from typing import Any
19
20
 
20
21
  # =============================================================================
@@ -235,6 +236,46 @@ class ThordataNotCollectedError(ThordataAPIError):
235
236
  # =============================================================================
236
237
 
237
238
 
239
+ def _extract_request_id(payload: Any) -> str | None:
240
+ if isinstance(payload, Mapping):
241
+ for key in ("request_id", "requestId", "x_request_id", "x-request-id"):
242
+ val = payload.get(key)
243
+ if val is not None:
244
+ return str(val)
245
+ return None
246
+
247
+
248
+ def _extract_retry_after(payload: Any) -> int | None:
249
+ if isinstance(payload, Mapping):
250
+ for key in ("retry_after", "retryAfter", "retry-after"):
251
+ val = payload.get(key)
252
+ if isinstance(val, int):
253
+ return val
254
+ if isinstance(val, str) and val.isdigit():
255
+ return int(val)
256
+ return None
257
+
258
+
259
+ def _build_error_message(
260
+ message: str,
261
+ *,
262
+ status_code: int | None,
263
+ code: int | None,
264
+ request_id: str | None,
265
+ ) -> str:
266
+ parts: list[str] = [message]
267
+ meta: list[str] = []
268
+ if status_code is not None:
269
+ meta.append(f"http={status_code}")
270
+ if code is not None and code != status_code:
271
+ meta.append(f"code={code}")
272
+ if request_id:
273
+ meta.append(f"request_id={request_id}")
274
+ if meta:
275
+ parts.append("(" + ", ".join(meta) + ")")
276
+ return " ".join(parts)
277
+
278
+
238
279
  def raise_for_code(
239
280
  message: str,
240
281
  *,
@@ -266,49 +307,59 @@ def raise_for_code(
266
307
  # Determine the effective error code.
267
308
  # Prefer payload `code` when present and not success (200),
268
309
  # otherwise fall back to HTTP status when it indicates an error.
310
+ # Determine the effective error code for routing.
269
311
  effective_code: int | None = None
270
-
271
312
  if code is not None and code != 200:
272
313
  effective_code = code
273
- elif status_code is not None and status_code != 200:
314
+ elif status_code is not None and status_code >= 400:
274
315
  effective_code = status_code
275
316
  else:
276
317
  effective_code = code if code is not None else status_code
277
318
 
319
+ # Extract additional context from payload
320
+ final_request_id = request_id or _extract_request_id(payload)
321
+
322
+ # Build a consistent, informative error message
323
+ final_message = _build_error_message(
324
+ message,
325
+ status_code=status_code,
326
+ code=code,
327
+ request_id=final_request_id,
328
+ )
329
+
330
+ # Prepare common arguments for exception constructors
278
331
  kwargs = {
279
332
  "status_code": status_code,
280
333
  "code": code,
281
334
  "payload": payload,
282
- "request_id": request_id,
335
+ "request_id": final_request_id,
283
336
  }
284
337
 
338
+ # --- Route to the correct exception class ---
339
+
285
340
  # Not collected (API payload code 300, often retryable, not billed)
286
- # Check this FIRST since 300 is in API_CODES, not HTTP_STATUS_CODES
287
341
  if effective_code in ThordataNotCollectedError.API_CODES:
288
- raise ThordataNotCollectedError(message, **kwargs)
342
+ raise ThordataNotCollectedError(final_message, **kwargs)
289
343
 
290
- # Auth errors
344
+ # Auth errors (401, 403)
291
345
  if effective_code in ThordataAuthError.HTTP_STATUS_CODES:
292
- raise ThordataAuthError(message, **kwargs)
346
+ raise ThordataAuthError(final_message, **kwargs)
293
347
 
294
- # Rate limit errors
348
+ # Rate limit errors (429, 402)
295
349
  if effective_code in ThordataRateLimitError.HTTP_STATUS_CODES:
296
- # Try to extract retry_after from payload
297
- retry_after = None
298
- if isinstance(payload, dict):
299
- retry_after = payload.get("retry_after")
300
- raise ThordataRateLimitError(message, retry_after=retry_after, **kwargs)
350
+ retry_after = _extract_retry_after(payload)
351
+ raise ThordataRateLimitError(final_message, retry_after=retry_after, **kwargs)
301
352
 
302
- # Server errors
353
+ # Server errors (5xx)
303
354
  if effective_code is not None and 500 <= effective_code < 600:
304
- raise ThordataServerError(message, **kwargs)
355
+ raise ThordataServerError(final_message, **kwargs)
305
356
 
306
- # Validation errors
357
+ # Validation errors (400, 422)
307
358
  if effective_code in ThordataValidationError.HTTP_STATUS_CODES:
308
- raise ThordataValidationError(message, **kwargs)
359
+ raise ThordataValidationError(final_message, **kwargs)
309
360
 
310
- # Generic API error
311
- raise ThordataAPIError(message, **kwargs)
361
+ # Fallback to generic API error if no specific match
362
+ raise ThordataAPIError(final_message, **kwargs)
312
363
 
313
364
 
314
365
  # =============================================================================