thordata-sdk 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thordata/__init__.py +15 -1
- thordata/_utils.py +66 -3
- thordata/async_client.py +787 -8
- thordata/client.py +851 -33
- thordata/enums.py +85 -16
- thordata/exceptions.py +16 -5
- thordata/models.py +294 -0
- thordata/retry.py +4 -1
- thordata_sdk-0.8.0.dist-info/METADATA +212 -0
- thordata_sdk-0.8.0.dist-info/RECORD +14 -0
- thordata/parameters.py +0 -53
- thordata_sdk-0.6.0.dist-info/METADATA +0 -1053
- thordata_sdk-0.6.0.dist-info/RECORD +0 -15
- {thordata_sdk-0.6.0.dist-info → thordata_sdk-0.8.0.dist-info}/WHEEL +0 -0
- {thordata_sdk-0.6.0.dist-info → thordata_sdk-0.8.0.dist-info}/licenses/LICENSE +0 -0
- {thordata_sdk-0.6.0.dist-info → thordata_sdk-0.8.0.dist-info}/top_level.txt +0 -0
thordata/enums.py
CHANGED
|
@@ -34,12 +34,13 @@ class Continent(str, Enum):
|
|
|
34
34
|
class ProxyHost(str, Enum):
|
|
35
35
|
"""
|
|
36
36
|
Available proxy gateway hosts.
|
|
37
|
+
|
|
38
|
+
Note: Dashboard provides user-specific hosts like {shard}.{region}.thordata.net
|
|
37
39
|
"""
|
|
38
40
|
|
|
39
41
|
DEFAULT = "pr.thordata.net"
|
|
40
42
|
NORTH_AMERICA = "t.na.thordata.net"
|
|
41
43
|
EUROPE = "t.eu.thordata.net"
|
|
42
|
-
GATE = "gate.thordata.com"
|
|
43
44
|
|
|
44
45
|
|
|
45
46
|
class ProxyPort(IntEnum):
|
|
@@ -47,11 +48,10 @@ class ProxyPort(IntEnum):
|
|
|
47
48
|
Available proxy gateway ports.
|
|
48
49
|
"""
|
|
49
50
|
|
|
50
|
-
|
|
51
|
+
RESIDENTIAL = 9999
|
|
51
52
|
MOBILE = 5555
|
|
52
53
|
DATACENTER = 7777
|
|
53
54
|
ISP = 6666
|
|
54
|
-
ALTERNATIVE = 22225
|
|
55
55
|
|
|
56
56
|
|
|
57
57
|
# =============================================================================
|
|
@@ -62,33 +62,90 @@ class ProxyPort(IntEnum):
|
|
|
62
62
|
class Engine(str, Enum):
|
|
63
63
|
"""
|
|
64
64
|
Supported search engines for SERP API.
|
|
65
|
+
|
|
66
|
+
Engine naming convention:
|
|
67
|
+
- Base search: {engine} for basic web search (google, bing, yandex, duckduckgo)
|
|
68
|
+
- Verticals: {engine}_{vertical} (e.g., google_news, bing_images)
|
|
69
|
+
- Sub-verticals: {engine}_{vertical}_{sub} (e.g., google_scholar_cite)
|
|
65
70
|
"""
|
|
66
71
|
|
|
72
|
+
# ===================
|
|
73
|
+
# Google
|
|
74
|
+
# ===================
|
|
67
75
|
GOOGLE = "google"
|
|
76
|
+
GOOGLE_SEARCH = "google_search"
|
|
77
|
+
GOOGLE_AI_MODE = "google_ai_mode"
|
|
78
|
+
GOOGLE_WEB = "google_web"
|
|
79
|
+
GOOGLE_SHOPPING = "google_shopping"
|
|
80
|
+
GOOGLE_LOCAL = "google_local"
|
|
81
|
+
GOOGLE_VIDEOS = "google_videos"
|
|
82
|
+
GOOGLE_NEWS = "google_news"
|
|
83
|
+
GOOGLE_FLIGHTS = "google_flights"
|
|
84
|
+
GOOGLE_IMAGES = "google_images"
|
|
85
|
+
GOOGLE_LENS = "google_lens"
|
|
86
|
+
GOOGLE_TRENDS = "google_trends"
|
|
87
|
+
GOOGLE_HOTELS = "google_hotels"
|
|
88
|
+
GOOGLE_PLAY = "google_play"
|
|
89
|
+
GOOGLE_JOBS = "google_jobs"
|
|
90
|
+
GOOGLE_SCHOLAR = "google_scholar"
|
|
91
|
+
GOOGLE_SCHOLAR_CITE = "google_scholar_cite"
|
|
92
|
+
GOOGLE_SCHOLAR_AUTHOR = "google_scholar_author"
|
|
93
|
+
GOOGLE_MAPS = "google_maps"
|
|
94
|
+
GOOGLE_FINANCE = "google_finance"
|
|
95
|
+
GOOGLE_FINANCE_MARKETS = "google_finance_markets"
|
|
96
|
+
GOOGLE_PATENTS = "google_patents"
|
|
97
|
+
GOOGLE_PATENTS_DETAILS = "google_patents_details"
|
|
98
|
+
|
|
99
|
+
# ===================
|
|
100
|
+
# Bing
|
|
101
|
+
# ===================
|
|
68
102
|
BING = "bing"
|
|
103
|
+
BING_SEARCH = "bing_search"
|
|
104
|
+
BING_IMAGES = "bing_images"
|
|
105
|
+
BING_VIDEOS = "bing_videos"
|
|
106
|
+
BING_NEWS = "bing_news"
|
|
107
|
+
BING_MAPS = "bing_maps"
|
|
108
|
+
BING_SHOPPING = "bing_shopping"
|
|
109
|
+
|
|
110
|
+
# ===================
|
|
111
|
+
# Yandex
|
|
112
|
+
# ===================
|
|
69
113
|
YANDEX = "yandex"
|
|
114
|
+
YANDEX_SEARCH = "yandex_search"
|
|
115
|
+
|
|
116
|
+
# ===================
|
|
117
|
+
# DuckDuckGo
|
|
118
|
+
# ===================
|
|
70
119
|
DUCKDUCKGO = "duckduckgo"
|
|
71
|
-
|
|
72
|
-
YAHOO = "yahoo"
|
|
73
|
-
NAVER = "naver"
|
|
120
|
+
DUCKDUCKGO_SEARCH = "duckduckgo_search"
|
|
74
121
|
|
|
75
122
|
|
|
76
123
|
class GoogleSearchType(str, Enum):
|
|
77
124
|
"""
|
|
78
125
|
Search types specific to Google.
|
|
126
|
+
|
|
127
|
+
These map to the second part of Google engine names.
|
|
128
|
+
For example, GOOGLE + NEWS = google_news
|
|
79
129
|
"""
|
|
80
130
|
|
|
81
131
|
SEARCH = "search"
|
|
82
|
-
|
|
132
|
+
AI_MODE = "ai_mode"
|
|
133
|
+
WEB = "web"
|
|
83
134
|
SHOPPING = "shopping"
|
|
135
|
+
LOCAL = "local"
|
|
136
|
+
VIDEOS = "videos"
|
|
84
137
|
NEWS = "news"
|
|
138
|
+
FLIGHTS = "flights"
|
|
85
139
|
IMAGES = "images"
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
140
|
+
LENS = "lens"
|
|
141
|
+
TRENDS = "trends"
|
|
142
|
+
HOTELS = "hotels"
|
|
143
|
+
PLAY = "play"
|
|
89
144
|
JOBS = "jobs"
|
|
90
|
-
|
|
145
|
+
SCHOLAR = "scholar"
|
|
146
|
+
MAPS = "maps"
|
|
91
147
|
FINANCE = "finance"
|
|
148
|
+
PATENTS = "patents"
|
|
92
149
|
|
|
93
150
|
|
|
94
151
|
class BingSearchType(str, Enum):
|
|
@@ -101,6 +158,20 @@ class BingSearchType(str, Enum):
|
|
|
101
158
|
VIDEOS = "videos"
|
|
102
159
|
NEWS = "news"
|
|
103
160
|
MAPS = "maps"
|
|
161
|
+
SHOPPING = "shopping"
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
class GoogleTbm(str, Enum):
|
|
165
|
+
"""
|
|
166
|
+
Google tbm (to be matched) parameter values.
|
|
167
|
+
|
|
168
|
+
Only available when using specific Google engines that support tbm.
|
|
169
|
+
"""
|
|
170
|
+
|
|
171
|
+
NEWS = "nws"
|
|
172
|
+
SHOPPING = "shop"
|
|
173
|
+
IMAGES = "isch"
|
|
174
|
+
VIDEOS = "vid"
|
|
104
175
|
|
|
105
176
|
|
|
106
177
|
class Device(str, Enum):
|
|
@@ -159,13 +230,12 @@ class SessionType(str, Enum):
|
|
|
159
230
|
class OutputFormat(str, Enum):
|
|
160
231
|
"""
|
|
161
232
|
Output formats for Universal Scraping API.
|
|
233
|
+
|
|
234
|
+
Currently supported: html, png
|
|
162
235
|
"""
|
|
163
236
|
|
|
164
237
|
HTML = "html"
|
|
165
238
|
PNG = "png"
|
|
166
|
-
PDF = "pdf"
|
|
167
|
-
MARKDOWN = "markdown"
|
|
168
|
-
TEXT = "text"
|
|
169
239
|
|
|
170
240
|
|
|
171
241
|
class DataFormat(str, Enum):
|
|
@@ -222,7 +292,7 @@ class TaskStatus(str, Enum):
|
|
|
222
292
|
|
|
223
293
|
|
|
224
294
|
# =============================================================================
|
|
225
|
-
# Country Enum (
|
|
295
|
+
# Country Enum (Common Countries)
|
|
226
296
|
# =============================================================================
|
|
227
297
|
|
|
228
298
|
|
|
@@ -306,7 +376,6 @@ def normalize_enum_value(value: object, enum_class: type) -> str:
|
|
|
306
376
|
Safely convert an enum or string to its string value.
|
|
307
377
|
"""
|
|
308
378
|
if isinstance(value, enum_class):
|
|
309
|
-
# value is an enum member, get its .value
|
|
310
379
|
return str(getattr(value, "value", value)).lower()
|
|
311
380
|
if isinstance(value, str):
|
|
312
381
|
return value.lower()
|
thordata/exceptions.py
CHANGED
|
@@ -222,7 +222,8 @@ class ThordataNotCollectedError(ThordataAPIError):
|
|
|
222
222
|
This error is often transient and typically safe to retry.
|
|
223
223
|
"""
|
|
224
224
|
|
|
225
|
-
|
|
225
|
+
API_CODES = {300}
|
|
226
|
+
HTTP_STATUS_CODES: Set[int] = set()
|
|
226
227
|
|
|
227
228
|
@property
|
|
228
229
|
def is_retryable(self) -> bool:
|
|
@@ -262,8 +263,17 @@ def raise_for_code(
|
|
|
262
263
|
ThordataValidationError: For 400/422 codes.
|
|
263
264
|
ThordataAPIError: For all other error codes.
|
|
264
265
|
"""
|
|
265
|
-
#
|
|
266
|
-
|
|
266
|
+
# Determine the effective error code.
|
|
267
|
+
# Prefer payload `code` when present and not success (200),
|
|
268
|
+
# otherwise fall back to HTTP status when it indicates an error.
|
|
269
|
+
effective_code: Optional[int] = None
|
|
270
|
+
|
|
271
|
+
if code is not None and code != 200:
|
|
272
|
+
effective_code = code
|
|
273
|
+
elif status_code is not None and status_code != 200:
|
|
274
|
+
effective_code = status_code
|
|
275
|
+
else:
|
|
276
|
+
effective_code = code if code is not None else status_code
|
|
267
277
|
|
|
268
278
|
kwargs = {
|
|
269
279
|
"status_code": status_code,
|
|
@@ -272,8 +282,9 @@ def raise_for_code(
|
|
|
272
282
|
"request_id": request_id,
|
|
273
283
|
}
|
|
274
284
|
|
|
275
|
-
# Not collected (often retryable, not billed)
|
|
276
|
-
|
|
285
|
+
# Not collected (API payload code 300, often retryable, not billed)
|
|
286
|
+
# Check this FIRST since 300 is in API_CODES, not HTTP_STATUS_CODES
|
|
287
|
+
if effective_code in ThordataNotCollectedError.API_CODES:
|
|
277
288
|
raise ThordataNotCollectedError(message, **kwargs)
|
|
278
289
|
|
|
279
290
|
# Auth errors
|
thordata/models.py
CHANGED
|
@@ -795,6 +795,126 @@ class ScraperTaskConfig:
|
|
|
795
795
|
return payload
|
|
796
796
|
|
|
797
797
|
|
|
798
|
+
@dataclass
|
|
799
|
+
class CommonSettings:
|
|
800
|
+
"""
|
|
801
|
+
Common settings for YouTube video/audio downloads.
|
|
802
|
+
|
|
803
|
+
Used by /video_builder endpoint as `common_settings` parameter.
|
|
804
|
+
Also known as `spider_universal` in some documentation.
|
|
805
|
+
|
|
806
|
+
Args:
|
|
807
|
+
resolution: Video resolution (360p/480p/720p/1080p/1440p/2160p).
|
|
808
|
+
audio_format: Audio format (opus/mp3).
|
|
809
|
+
bitrate: Audio bitrate (48/64/128/160/256/320 or with Kbps suffix).
|
|
810
|
+
is_subtitles: Whether to download subtitles ("true"/"false").
|
|
811
|
+
subtitles_language: Subtitle language code (e.g., "en", "zh-Hans").
|
|
812
|
+
|
|
813
|
+
Example for video:
|
|
814
|
+
>>> settings = CommonSettings(
|
|
815
|
+
... resolution="1080p",
|
|
816
|
+
... is_subtitles="true",
|
|
817
|
+
... subtitles_language="en"
|
|
818
|
+
... )
|
|
819
|
+
|
|
820
|
+
Example for audio:
|
|
821
|
+
>>> settings = CommonSettings(
|
|
822
|
+
... audio_format="mp3",
|
|
823
|
+
... bitrate="320",
|
|
824
|
+
... is_subtitles="true",
|
|
825
|
+
... subtitles_language="en"
|
|
826
|
+
... )
|
|
827
|
+
"""
|
|
828
|
+
|
|
829
|
+
# Video settings
|
|
830
|
+
resolution: Optional[str] = None
|
|
831
|
+
|
|
832
|
+
# Audio settings
|
|
833
|
+
audio_format: Optional[str] = None
|
|
834
|
+
bitrate: Optional[str] = None
|
|
835
|
+
|
|
836
|
+
# Subtitle settings (used by both video and audio)
|
|
837
|
+
is_subtitles: Optional[str] = None
|
|
838
|
+
subtitles_language: Optional[str] = None
|
|
839
|
+
|
|
840
|
+
# Valid values for validation
|
|
841
|
+
VALID_RESOLUTIONS = {"360p", "480p", "720p", "1080p", "1440p", "2160p"}
|
|
842
|
+
VALID_AUDIO_FORMATS = {"opus", "mp3"}
|
|
843
|
+
|
|
844
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
845
|
+
"""Convert to dictionary, excluding None values."""
|
|
846
|
+
result = {}
|
|
847
|
+
if self.resolution is not None:
|
|
848
|
+
result["resolution"] = self.resolution
|
|
849
|
+
if self.audio_format is not None:
|
|
850
|
+
result["audio_format"] = self.audio_format
|
|
851
|
+
if self.bitrate is not None:
|
|
852
|
+
result["bitrate"] = self.bitrate
|
|
853
|
+
if self.is_subtitles is not None:
|
|
854
|
+
result["is_subtitles"] = self.is_subtitles
|
|
855
|
+
if self.subtitles_language is not None:
|
|
856
|
+
result["subtitles_language"] = self.subtitles_language
|
|
857
|
+
return result
|
|
858
|
+
|
|
859
|
+
def to_json(self) -> str:
|
|
860
|
+
"""Convert to JSON string for form submission."""
|
|
861
|
+
return json.dumps(self.to_dict())
|
|
862
|
+
|
|
863
|
+
|
|
864
|
+
@dataclass
|
|
865
|
+
class VideoTaskConfig:
|
|
866
|
+
"""
|
|
867
|
+
Configuration for creating a YouTube video/audio download task.
|
|
868
|
+
|
|
869
|
+
Uses the /video_builder endpoint.
|
|
870
|
+
|
|
871
|
+
Args:
|
|
872
|
+
file_name: Name for the output file. Supports {{TasksID}}, {{VideoID}}.
|
|
873
|
+
spider_id: Spider identifier (e.g., "youtube_video_by-url", "youtube_audio_by-url").
|
|
874
|
+
spider_name: Spider name (typically "youtube.com").
|
|
875
|
+
parameters: Spider-specific parameters (e.g., video URL).
|
|
876
|
+
common_settings: Video/audio settings (resolution, format, subtitles).
|
|
877
|
+
include_errors: Include error details in output.
|
|
878
|
+
|
|
879
|
+
Example:
|
|
880
|
+
>>> config = VideoTaskConfig(
|
|
881
|
+
... file_name="{{VideoID}}",
|
|
882
|
+
... spider_id="youtube_video_by-url",
|
|
883
|
+
... spider_name="youtube.com",
|
|
884
|
+
... parameters={"url": "https://www.youtube.com/watch?v=xxx"},
|
|
885
|
+
... common_settings=CommonSettings(
|
|
886
|
+
... resolution="1080p",
|
|
887
|
+
... is_subtitles="true",
|
|
888
|
+
... subtitles_language="en"
|
|
889
|
+
... )
|
|
890
|
+
... )
|
|
891
|
+
"""
|
|
892
|
+
|
|
893
|
+
file_name: str
|
|
894
|
+
spider_id: str
|
|
895
|
+
spider_name: str
|
|
896
|
+
parameters: Dict[str, Any]
|
|
897
|
+
common_settings: CommonSettings
|
|
898
|
+
include_errors: bool = True
|
|
899
|
+
|
|
900
|
+
def to_payload(self) -> Dict[str, Any]:
|
|
901
|
+
"""
|
|
902
|
+
Convert to API request payload.
|
|
903
|
+
|
|
904
|
+
Returns:
|
|
905
|
+
Dictionary ready to be sent to the video_builder API.
|
|
906
|
+
"""
|
|
907
|
+
payload: Dict[str, Any] = {
|
|
908
|
+
"file_name": self.file_name,
|
|
909
|
+
"spider_id": self.spider_id,
|
|
910
|
+
"spider_name": self.spider_name,
|
|
911
|
+
"spider_parameters": json.dumps([self.parameters]),
|
|
912
|
+
"spider_errors": "true" if self.include_errors else "false",
|
|
913
|
+
"common_settings": self.common_settings.to_json(),
|
|
914
|
+
}
|
|
915
|
+
return payload
|
|
916
|
+
|
|
917
|
+
|
|
798
918
|
# =============================================================================
|
|
799
919
|
# Response Models
|
|
800
920
|
# =============================================================================
|
|
@@ -838,3 +958,177 @@ class TaskStatusResponse:
|
|
|
838
958
|
"""Check if the task failed."""
|
|
839
959
|
failure_statuses = {"failed", "error"}
|
|
840
960
|
return self.status.lower() in failure_statuses
|
|
961
|
+
|
|
962
|
+
|
|
963
|
+
@dataclass
|
|
964
|
+
class UsageStatistics:
|
|
965
|
+
"""
|
|
966
|
+
Response model for account usage statistics.
|
|
967
|
+
|
|
968
|
+
Attributes:
|
|
969
|
+
total_usage_traffic: Total traffic used (KB).
|
|
970
|
+
traffic_balance: Remaining traffic balance (KB).
|
|
971
|
+
query_days: Number of days in the query range.
|
|
972
|
+
range_usage_traffic: Traffic used in the specified date range (KB).
|
|
973
|
+
data: Daily usage breakdown.
|
|
974
|
+
"""
|
|
975
|
+
|
|
976
|
+
total_usage_traffic: float
|
|
977
|
+
traffic_balance: float
|
|
978
|
+
query_days: int
|
|
979
|
+
range_usage_traffic: float
|
|
980
|
+
data: List[Dict[str, Any]]
|
|
981
|
+
|
|
982
|
+
@classmethod
|
|
983
|
+
def from_dict(cls, data: Dict[str, Any]) -> "UsageStatistics":
|
|
984
|
+
"""Create from API response dict."""
|
|
985
|
+
return cls(
|
|
986
|
+
total_usage_traffic=float(data.get("total_usage_traffic", 0)),
|
|
987
|
+
traffic_balance=float(data.get("traffic_balance", 0)),
|
|
988
|
+
query_days=int(data.get("query_days", 0)),
|
|
989
|
+
range_usage_traffic=float(data.get("range_usage_traffic", 0)),
|
|
990
|
+
data=data.get("data", []),
|
|
991
|
+
)
|
|
992
|
+
|
|
993
|
+
def total_usage_gb(self) -> float:
|
|
994
|
+
"""Get total usage in GB."""
|
|
995
|
+
return self.total_usage_traffic / (1024 * 1024)
|
|
996
|
+
|
|
997
|
+
def balance_gb(self) -> float:
|
|
998
|
+
"""Get balance in GB."""
|
|
999
|
+
return self.traffic_balance / (1024 * 1024)
|
|
1000
|
+
|
|
1001
|
+
def range_usage_gb(self) -> float:
|
|
1002
|
+
"""Get range usage in GB."""
|
|
1003
|
+
return self.range_usage_traffic / (1024 * 1024)
|
|
1004
|
+
|
|
1005
|
+
|
|
1006
|
+
@dataclass
|
|
1007
|
+
class ProxyUser:
|
|
1008
|
+
"""
|
|
1009
|
+
Proxy user (sub-account) information.
|
|
1010
|
+
|
|
1011
|
+
Attributes:
|
|
1012
|
+
username: User's username.
|
|
1013
|
+
password: User's password.
|
|
1014
|
+
status: User status (True=enabled, False=disabled).
|
|
1015
|
+
traffic_limit: Traffic limit in MB (0 = unlimited).
|
|
1016
|
+
usage_traffic: Traffic used in KB.
|
|
1017
|
+
"""
|
|
1018
|
+
|
|
1019
|
+
username: str
|
|
1020
|
+
password: str
|
|
1021
|
+
status: bool
|
|
1022
|
+
traffic_limit: int
|
|
1023
|
+
usage_traffic: float
|
|
1024
|
+
|
|
1025
|
+
@classmethod
|
|
1026
|
+
def from_dict(cls, data: Dict[str, Any]) -> "ProxyUser":
|
|
1027
|
+
"""Create from API response dict."""
|
|
1028
|
+
return cls(
|
|
1029
|
+
username=data.get("username", ""),
|
|
1030
|
+
password=data.get("password", ""),
|
|
1031
|
+
status=data.get("status") in (True, "true", 1),
|
|
1032
|
+
traffic_limit=int(data.get("traffic_limit", 0)),
|
|
1033
|
+
usage_traffic=float(data.get("usage_traffic", 0)),
|
|
1034
|
+
)
|
|
1035
|
+
|
|
1036
|
+
def usage_gb(self) -> float:
|
|
1037
|
+
"""Get usage in GB."""
|
|
1038
|
+
return self.usage_traffic / (1024 * 1024)
|
|
1039
|
+
|
|
1040
|
+
def limit_gb(self) -> float:
|
|
1041
|
+
"""Get limit in GB (0 means unlimited)."""
|
|
1042
|
+
if self.traffic_limit == 0:
|
|
1043
|
+
return 0
|
|
1044
|
+
return self.traffic_limit / 1024
|
|
1045
|
+
|
|
1046
|
+
|
|
1047
|
+
@dataclass
|
|
1048
|
+
class ProxyUserList:
|
|
1049
|
+
"""
|
|
1050
|
+
Response model for proxy user list.
|
|
1051
|
+
|
|
1052
|
+
Attributes:
|
|
1053
|
+
limit: Total traffic limit (KB).
|
|
1054
|
+
remaining_limit: Remaining traffic limit (KB).
|
|
1055
|
+
user_count: Number of users.
|
|
1056
|
+
users: List of proxy users.
|
|
1057
|
+
"""
|
|
1058
|
+
|
|
1059
|
+
limit: float
|
|
1060
|
+
remaining_limit: float
|
|
1061
|
+
user_count: int
|
|
1062
|
+
users: List[ProxyUser]
|
|
1063
|
+
|
|
1064
|
+
@classmethod
|
|
1065
|
+
def from_dict(cls, data: Dict[str, Any]) -> "ProxyUserList":
|
|
1066
|
+
"""Create from API response dict."""
|
|
1067
|
+
user_list = data.get("list", [])
|
|
1068
|
+
users = [ProxyUser.from_dict(u) for u in user_list]
|
|
1069
|
+
|
|
1070
|
+
return cls(
|
|
1071
|
+
limit=float(data.get("limit", 0)),
|
|
1072
|
+
remaining_limit=float(data.get("remaining_limit", 0)),
|
|
1073
|
+
user_count=int(data.get("user_count", len(users))),
|
|
1074
|
+
users=users,
|
|
1075
|
+
)
|
|
1076
|
+
|
|
1077
|
+
|
|
1078
|
+
@dataclass
|
|
1079
|
+
class ProxyServer:
|
|
1080
|
+
"""
|
|
1081
|
+
ISP or Datacenter proxy server information.
|
|
1082
|
+
|
|
1083
|
+
Attributes:
|
|
1084
|
+
ip: Proxy server IP address.
|
|
1085
|
+
port: Proxy server port.
|
|
1086
|
+
username: Authentication username.
|
|
1087
|
+
password: Authentication password.
|
|
1088
|
+
expiration_time: Expiration timestamp (Unix timestamp or datetime string).
|
|
1089
|
+
region: Server region (optional).
|
|
1090
|
+
"""
|
|
1091
|
+
|
|
1092
|
+
ip: str
|
|
1093
|
+
port: int
|
|
1094
|
+
username: str
|
|
1095
|
+
password: str
|
|
1096
|
+
expiration_time: Optional[Union[int, str]] = None
|
|
1097
|
+
region: Optional[str] = None
|
|
1098
|
+
|
|
1099
|
+
@classmethod
|
|
1100
|
+
def from_dict(cls, data: Dict[str, Any]) -> "ProxyServer":
|
|
1101
|
+
"""Create from API response dict."""
|
|
1102
|
+
return cls(
|
|
1103
|
+
ip=data.get("ip", ""),
|
|
1104
|
+
port=int(data.get("port", 0)),
|
|
1105
|
+
username=data.get("username", data.get("user", "")),
|
|
1106
|
+
password=data.get("password", data.get("pwd", "")),
|
|
1107
|
+
expiration_time=data.get("expiration_time", data.get("expireTime")),
|
|
1108
|
+
region=data.get("region"),
|
|
1109
|
+
)
|
|
1110
|
+
|
|
1111
|
+
def to_proxy_url(self, protocol: str = "http") -> str:
|
|
1112
|
+
"""
|
|
1113
|
+
Build proxy URL for this server.
|
|
1114
|
+
|
|
1115
|
+
Args:
|
|
1116
|
+
protocol: Proxy protocol (http/https/socks5).
|
|
1117
|
+
|
|
1118
|
+
Returns:
|
|
1119
|
+
Complete proxy URL.
|
|
1120
|
+
"""
|
|
1121
|
+
return f"{protocol}://{self.username}:{self.password}@{self.ip}:{self.port}"
|
|
1122
|
+
|
|
1123
|
+
def is_expired(self) -> bool:
|
|
1124
|
+
"""Check if proxy has expired (if expiration_time is available)."""
|
|
1125
|
+
if self.expiration_time is None:
|
|
1126
|
+
return False
|
|
1127
|
+
|
|
1128
|
+
import time
|
|
1129
|
+
|
|
1130
|
+
if isinstance(self.expiration_time, int):
|
|
1131
|
+
return time.time() > self.expiration_time
|
|
1132
|
+
|
|
1133
|
+
# String timestamp handling would need datetime parsing
|
|
1134
|
+
return False
|
thordata/retry.py
CHANGED
|
@@ -64,7 +64,10 @@ class RetryConfig:
|
|
|
64
64
|
|
|
65
65
|
# Status codes to retry on (5xx server errors + 429 rate limit)
|
|
66
66
|
retry_on_status_codes: Set[int] = field(
|
|
67
|
-
default_factory=lambda: {
|
|
67
|
+
default_factory=lambda: {429, 500, 502, 503, 504}
|
|
68
|
+
)
|
|
69
|
+
retry_on_api_codes: Set[int] = field(
|
|
70
|
+
default_factory=lambda: {300} # API response body code
|
|
68
71
|
)
|
|
69
72
|
|
|
70
73
|
# Exception types to always retry on
|