thordata-sdk 0.7.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thordata/__init__.py +13 -1
- thordata/_example_utils.py +76 -0
- thordata/_utils.py +46 -3
- thordata/async_client.py +863 -23
- thordata/client.py +1023 -51
- thordata/enums.py +3 -3
- thordata/exceptions.py +16 -5
- thordata/models.py +351 -7
- thordata/retry.py +6 -4
- thordata_sdk-1.0.0.dist-info/METADATA +208 -0
- thordata_sdk-1.0.0.dist-info/RECORD +15 -0
- thordata/parameters.py +0 -53
- thordata_sdk-0.7.0.dist-info/METADATA +0 -1053
- thordata_sdk-0.7.0.dist-info/RECORD +0 -15
- {thordata_sdk-0.7.0.dist-info → thordata_sdk-1.0.0.dist-info}/WHEEL +0 -0
- {thordata_sdk-0.7.0.dist-info → thordata_sdk-1.0.0.dist-info}/licenses/LICENSE +0 -0
- {thordata_sdk-0.7.0.dist-info → thordata_sdk-1.0.0.dist-info}/top_level.txt +0 -0
thordata/enums.py
CHANGED
|
@@ -34,12 +34,13 @@ class Continent(str, Enum):
|
|
|
34
34
|
class ProxyHost(str, Enum):
|
|
35
35
|
"""
|
|
36
36
|
Available proxy gateway hosts.
|
|
37
|
+
|
|
38
|
+
Note: Dashboard provides user-specific hosts like {shard}.{region}.thordata.net
|
|
37
39
|
"""
|
|
38
40
|
|
|
39
41
|
DEFAULT = "pr.thordata.net"
|
|
40
42
|
NORTH_AMERICA = "t.na.thordata.net"
|
|
41
43
|
EUROPE = "t.eu.thordata.net"
|
|
42
|
-
GATE = "gate.thordata.com"
|
|
43
44
|
|
|
44
45
|
|
|
45
46
|
class ProxyPort(IntEnum):
|
|
@@ -47,11 +48,10 @@ class ProxyPort(IntEnum):
|
|
|
47
48
|
Available proxy gateway ports.
|
|
48
49
|
"""
|
|
49
50
|
|
|
50
|
-
|
|
51
|
+
RESIDENTIAL = 9999
|
|
51
52
|
MOBILE = 5555
|
|
52
53
|
DATACENTER = 7777
|
|
53
54
|
ISP = 6666
|
|
54
|
-
ALTERNATIVE = 22225
|
|
55
55
|
|
|
56
56
|
|
|
57
57
|
# =============================================================================
|
thordata/exceptions.py
CHANGED
|
@@ -222,7 +222,8 @@ class ThordataNotCollectedError(ThordataAPIError):
|
|
|
222
222
|
This error is often transient and typically safe to retry.
|
|
223
223
|
"""
|
|
224
224
|
|
|
225
|
-
|
|
225
|
+
API_CODES = {300}
|
|
226
|
+
HTTP_STATUS_CODES: Set[int] = set()
|
|
226
227
|
|
|
227
228
|
@property
|
|
228
229
|
def is_retryable(self) -> bool:
|
|
@@ -262,8 +263,17 @@ def raise_for_code(
|
|
|
262
263
|
ThordataValidationError: For 400/422 codes.
|
|
263
264
|
ThordataAPIError: For all other error codes.
|
|
264
265
|
"""
|
|
265
|
-
#
|
|
266
|
-
|
|
266
|
+
# Determine the effective error code.
|
|
267
|
+
# Prefer payload `code` when present and not success (200),
|
|
268
|
+
# otherwise fall back to HTTP status when it indicates an error.
|
|
269
|
+
effective_code: Optional[int] = None
|
|
270
|
+
|
|
271
|
+
if code is not None and code != 200:
|
|
272
|
+
effective_code = code
|
|
273
|
+
elif status_code is not None and status_code != 200:
|
|
274
|
+
effective_code = status_code
|
|
275
|
+
else:
|
|
276
|
+
effective_code = code if code is not None else status_code
|
|
267
277
|
|
|
268
278
|
kwargs = {
|
|
269
279
|
"status_code": status_code,
|
|
@@ -272,8 +282,9 @@ def raise_for_code(
|
|
|
272
282
|
"request_id": request_id,
|
|
273
283
|
}
|
|
274
284
|
|
|
275
|
-
# Not collected (often retryable, not billed)
|
|
276
|
-
|
|
285
|
+
# Not collected (API payload code 300, often retryable, not billed)
|
|
286
|
+
# Check this FIRST since 300 is in API_CODES, not HTTP_STATUS_CODES
|
|
287
|
+
if effective_code in ThordataNotCollectedError.API_CODES:
|
|
277
288
|
raise ThordataNotCollectedError(message, **kwargs)
|
|
278
289
|
|
|
279
290
|
# Auth errors
|
thordata/models.py
CHANGED
|
@@ -26,11 +26,14 @@ from __future__ import annotations
|
|
|
26
26
|
|
|
27
27
|
import json
|
|
28
28
|
import re
|
|
29
|
+
import ssl
|
|
29
30
|
import uuid
|
|
30
31
|
from dataclasses import dataclass, field
|
|
31
32
|
from enum import Enum
|
|
32
33
|
from typing import Any, Dict, List, Optional, Union
|
|
33
34
|
|
|
35
|
+
import urllib3
|
|
36
|
+
|
|
34
37
|
# =============================================================================
|
|
35
38
|
# Proxy Product Types
|
|
36
39
|
# =============================================================================
|
|
@@ -137,6 +140,7 @@ class ProxyConfig:
|
|
|
137
140
|
if self.host is None:
|
|
138
141
|
# Set host based on product type
|
|
139
142
|
host_map = {
|
|
143
|
+
# User&Pass auth entry (docs examples use t.pr.thordata.net for authenticated proxy)
|
|
140
144
|
ProxyProduct.RESIDENTIAL: "t.pr.thordata.net",
|
|
141
145
|
ProxyProduct.DATACENTER: "dc.pr.thordata.net",
|
|
142
146
|
ProxyProduct.MOBILE: "m.pr.thordata.net",
|
|
@@ -233,6 +237,14 @@ class ProxyConfig:
|
|
|
233
237
|
username = self.build_username()
|
|
234
238
|
return f"{self.protocol}://{username}:{self.password}@{self.host}:{self.port}"
|
|
235
239
|
|
|
240
|
+
def build_proxy_endpoint(self) -> str:
|
|
241
|
+
"""Proxy endpoint without credentials, for HTTPS proxy managers."""
|
|
242
|
+
return f"{self.protocol}://{self.host}:{self.port}"
|
|
243
|
+
|
|
244
|
+
def build_proxy_basic_auth(self) -> str:
|
|
245
|
+
"""Basic auth string 'username:password' for Proxy-Authorization."""
|
|
246
|
+
return f"{self.build_username()}:{self.password}"
|
|
247
|
+
|
|
236
248
|
def to_proxies_dict(self) -> Dict[str, str]:
|
|
237
249
|
"""
|
|
238
250
|
Build a proxies dict suitable for the requests library.
|
|
@@ -264,6 +276,39 @@ class ProxyConfig:
|
|
|
264
276
|
) from e
|
|
265
277
|
|
|
266
278
|
|
|
279
|
+
@dataclass
|
|
280
|
+
class WhitelistProxyConfig:
|
|
281
|
+
"""
|
|
282
|
+
Proxy config for IP-whitelist authentication mode (no username/password).
|
|
283
|
+
|
|
284
|
+
In whitelist mode, you do NOT pass proxy auth.
|
|
285
|
+
You only connect to the proxy entry node (host:port).
|
|
286
|
+
|
|
287
|
+
Examples (from docs):
|
|
288
|
+
- Global random: pr.thordata.net:9999
|
|
289
|
+
- Country nodes: us-pr.thordata.net:10000, etc.
|
|
290
|
+
"""
|
|
291
|
+
|
|
292
|
+
host: str = "pr.thordata.net"
|
|
293
|
+
port: int = 9999
|
|
294
|
+
protocol: str = "http" # use http for proxy scheme; target URL can still be https
|
|
295
|
+
|
|
296
|
+
def __post_init__(self) -> None:
|
|
297
|
+
if self.protocol not in ("http", "https"):
|
|
298
|
+
raise ValueError("protocol must be 'http' or 'https'")
|
|
299
|
+
|
|
300
|
+
def build_proxy_url(self) -> str:
|
|
301
|
+
return f"{self.protocol}://{self.host}:{self.port}"
|
|
302
|
+
|
|
303
|
+
def to_proxies_dict(self) -> Dict[str, str]:
|
|
304
|
+
url = self.build_proxy_url()
|
|
305
|
+
return {"http": url, "https": url}
|
|
306
|
+
|
|
307
|
+
def to_aiohttp_config(self) -> tuple:
|
|
308
|
+
# aiohttp: proxy_auth should be None in whitelist mode
|
|
309
|
+
return self.build_proxy_url(), None
|
|
310
|
+
|
|
311
|
+
|
|
267
312
|
@dataclass
|
|
268
313
|
class StaticISPProxy:
|
|
269
314
|
"""
|
|
@@ -545,23 +590,28 @@ class SerpRequest:
|
|
|
545
590
|
payload: Dict[str, Any] = {
|
|
546
591
|
"engine": engine,
|
|
547
592
|
"num": str(self.num),
|
|
548
|
-
# output_format: json=1 for JSON, json=0 for raw HTML
|
|
549
|
-
"json": "1" if self.output_format.lower() == "json" else "0",
|
|
550
593
|
}
|
|
551
594
|
|
|
595
|
+
fmt = self.output_format.lower()
|
|
596
|
+
if fmt == "json":
|
|
597
|
+
payload["json"] = "1"
|
|
598
|
+
elif fmt == "html":
|
|
599
|
+
# omit "json" to get raw HTML (per docs: no json -> HTML)
|
|
600
|
+
pass
|
|
601
|
+
else:
|
|
602
|
+
# keep backward compatibility: if user passes "2"/"both"/etc.
|
|
603
|
+
if fmt in ("2", "both", "json+html", "json_html"):
|
|
604
|
+
payload["json"] = "2"
|
|
605
|
+
|
|
552
606
|
# Handle query parameter (Yandex uses 'text', others use 'q')
|
|
553
607
|
if engine == "yandex":
|
|
554
608
|
payload["text"] = self.query
|
|
555
609
|
else:
|
|
556
610
|
payload["q"] = self.query
|
|
557
611
|
|
|
558
|
-
#
|
|
612
|
+
# Domain overrides (preferred by docs)
|
|
559
613
|
if self.google_domain:
|
|
560
|
-
# 显式设置 google_domain 参数,同时设置 url
|
|
561
614
|
payload["google_domain"] = self.google_domain
|
|
562
|
-
payload["url"] = self.google_domain
|
|
563
|
-
elif engine in self.ENGINE_URLS:
|
|
564
|
-
payload["url"] = self.ENGINE_URLS[engine]
|
|
565
615
|
|
|
566
616
|
# Pagination
|
|
567
617
|
if self.start > 0:
|
|
@@ -795,6 +845,126 @@ class ScraperTaskConfig:
|
|
|
795
845
|
return payload
|
|
796
846
|
|
|
797
847
|
|
|
848
|
+
@dataclass
|
|
849
|
+
class CommonSettings:
|
|
850
|
+
"""
|
|
851
|
+
Common settings for YouTube video/audio downloads.
|
|
852
|
+
|
|
853
|
+
Used by /video_builder endpoint as `common_settings` parameter.
|
|
854
|
+
Also known as `spider_universal` in some documentation.
|
|
855
|
+
|
|
856
|
+
Args:
|
|
857
|
+
resolution: Video resolution (360p/480p/720p/1080p/1440p/2160p).
|
|
858
|
+
audio_format: Audio format (opus/mp3).
|
|
859
|
+
bitrate: Audio bitrate (48/64/128/160/256/320 or with Kbps suffix).
|
|
860
|
+
is_subtitles: Whether to download subtitles ("true"/"false").
|
|
861
|
+
subtitles_language: Subtitle language code (e.g., "en", "zh-Hans").
|
|
862
|
+
|
|
863
|
+
Example for video:
|
|
864
|
+
>>> settings = CommonSettings(
|
|
865
|
+
... resolution="1080p",
|
|
866
|
+
... is_subtitles="true",
|
|
867
|
+
... subtitles_language="en"
|
|
868
|
+
... )
|
|
869
|
+
|
|
870
|
+
Example for audio:
|
|
871
|
+
>>> settings = CommonSettings(
|
|
872
|
+
... audio_format="mp3",
|
|
873
|
+
... bitrate="320",
|
|
874
|
+
... is_subtitles="true",
|
|
875
|
+
... subtitles_language="en"
|
|
876
|
+
... )
|
|
877
|
+
"""
|
|
878
|
+
|
|
879
|
+
# Video settings
|
|
880
|
+
resolution: Optional[str] = None
|
|
881
|
+
|
|
882
|
+
# Audio settings
|
|
883
|
+
audio_format: Optional[str] = None
|
|
884
|
+
bitrate: Optional[str] = None
|
|
885
|
+
|
|
886
|
+
# Subtitle settings (used by both video and audio)
|
|
887
|
+
is_subtitles: Optional[str] = None
|
|
888
|
+
subtitles_language: Optional[str] = None
|
|
889
|
+
|
|
890
|
+
# Valid values for validation
|
|
891
|
+
VALID_RESOLUTIONS = {"360p", "480p", "720p", "1080p", "1440p", "2160p"}
|
|
892
|
+
VALID_AUDIO_FORMATS = {"opus", "mp3"}
|
|
893
|
+
|
|
894
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
895
|
+
"""Convert to dictionary, excluding None values."""
|
|
896
|
+
result = {}
|
|
897
|
+
if self.resolution is not None:
|
|
898
|
+
result["resolution"] = self.resolution
|
|
899
|
+
if self.audio_format is not None:
|
|
900
|
+
result["audio_format"] = self.audio_format
|
|
901
|
+
if self.bitrate is not None:
|
|
902
|
+
result["bitrate"] = self.bitrate
|
|
903
|
+
if self.is_subtitles is not None:
|
|
904
|
+
result["is_subtitles"] = self.is_subtitles
|
|
905
|
+
if self.subtitles_language is not None:
|
|
906
|
+
result["subtitles_language"] = self.subtitles_language
|
|
907
|
+
return result
|
|
908
|
+
|
|
909
|
+
def to_json(self) -> str:
|
|
910
|
+
"""Convert to JSON string for form submission."""
|
|
911
|
+
return json.dumps(self.to_dict())
|
|
912
|
+
|
|
913
|
+
|
|
914
|
+
@dataclass
|
|
915
|
+
class VideoTaskConfig:
|
|
916
|
+
"""
|
|
917
|
+
Configuration for creating a YouTube video/audio download task.
|
|
918
|
+
|
|
919
|
+
Uses the /video_builder endpoint.
|
|
920
|
+
|
|
921
|
+
Args:
|
|
922
|
+
file_name: Name for the output file. Supports {{TasksID}}, {{VideoID}}.
|
|
923
|
+
spider_id: Spider identifier (e.g., "youtube_video_by-url", "youtube_audio_by-url").
|
|
924
|
+
spider_name: Spider name (typically "youtube.com").
|
|
925
|
+
parameters: Spider-specific parameters (e.g., video URL).
|
|
926
|
+
common_settings: Video/audio settings (resolution, format, subtitles).
|
|
927
|
+
include_errors: Include error details in output.
|
|
928
|
+
|
|
929
|
+
Example:
|
|
930
|
+
>>> config = VideoTaskConfig(
|
|
931
|
+
... file_name="{{VideoID}}",
|
|
932
|
+
... spider_id="youtube_video_by-url",
|
|
933
|
+
... spider_name="youtube.com",
|
|
934
|
+
... parameters={"url": "https://www.youtube.com/watch?v=xxx"},
|
|
935
|
+
... common_settings=CommonSettings(
|
|
936
|
+
... resolution="1080p",
|
|
937
|
+
... is_subtitles="true",
|
|
938
|
+
... subtitles_language="en"
|
|
939
|
+
... )
|
|
940
|
+
... )
|
|
941
|
+
"""
|
|
942
|
+
|
|
943
|
+
file_name: str
|
|
944
|
+
spider_id: str
|
|
945
|
+
spider_name: str
|
|
946
|
+
parameters: Dict[str, Any]
|
|
947
|
+
common_settings: CommonSettings
|
|
948
|
+
include_errors: bool = True
|
|
949
|
+
|
|
950
|
+
def to_payload(self) -> Dict[str, Any]:
|
|
951
|
+
"""
|
|
952
|
+
Convert to API request payload.
|
|
953
|
+
|
|
954
|
+
Returns:
|
|
955
|
+
Dictionary ready to be sent to the video_builder API.
|
|
956
|
+
"""
|
|
957
|
+
payload: Dict[str, Any] = {
|
|
958
|
+
"file_name": self.file_name,
|
|
959
|
+
"spider_id": self.spider_id,
|
|
960
|
+
"spider_name": self.spider_name,
|
|
961
|
+
"spider_parameters": json.dumps([self.parameters]),
|
|
962
|
+
"spider_errors": "true" if self.include_errors else "false",
|
|
963
|
+
"common_settings": self.common_settings.to_json(),
|
|
964
|
+
}
|
|
965
|
+
return payload
|
|
966
|
+
|
|
967
|
+
|
|
798
968
|
# =============================================================================
|
|
799
969
|
# Response Models
|
|
800
970
|
# =============================================================================
|
|
@@ -838,3 +1008,177 @@ class TaskStatusResponse:
|
|
|
838
1008
|
"""Check if the task failed."""
|
|
839
1009
|
failure_statuses = {"failed", "error"}
|
|
840
1010
|
return self.status.lower() in failure_statuses
|
|
1011
|
+
|
|
1012
|
+
|
|
1013
|
+
@dataclass
|
|
1014
|
+
class UsageStatistics:
|
|
1015
|
+
"""
|
|
1016
|
+
Response model for account usage statistics.
|
|
1017
|
+
|
|
1018
|
+
Attributes:
|
|
1019
|
+
total_usage_traffic: Total traffic used (KB).
|
|
1020
|
+
traffic_balance: Remaining traffic balance (KB).
|
|
1021
|
+
query_days: Number of days in the query range.
|
|
1022
|
+
range_usage_traffic: Traffic used in the specified date range (KB).
|
|
1023
|
+
data: Daily usage breakdown.
|
|
1024
|
+
"""
|
|
1025
|
+
|
|
1026
|
+
total_usage_traffic: float
|
|
1027
|
+
traffic_balance: float
|
|
1028
|
+
query_days: int
|
|
1029
|
+
range_usage_traffic: float
|
|
1030
|
+
data: List[Dict[str, Any]]
|
|
1031
|
+
|
|
1032
|
+
@classmethod
|
|
1033
|
+
def from_dict(cls, data: Dict[str, Any]) -> "UsageStatistics":
|
|
1034
|
+
"""Create from API response dict."""
|
|
1035
|
+
return cls(
|
|
1036
|
+
total_usage_traffic=float(data.get("total_usage_traffic", 0)),
|
|
1037
|
+
traffic_balance=float(data.get("traffic_balance", 0)),
|
|
1038
|
+
query_days=int(data.get("query_days", 0)),
|
|
1039
|
+
range_usage_traffic=float(data.get("range_usage_traffic", 0)),
|
|
1040
|
+
data=data.get("data", []),
|
|
1041
|
+
)
|
|
1042
|
+
|
|
1043
|
+
def total_usage_gb(self) -> float:
|
|
1044
|
+
"""Get total usage in GB."""
|
|
1045
|
+
return self.total_usage_traffic / (1024 * 1024)
|
|
1046
|
+
|
|
1047
|
+
def balance_gb(self) -> float:
|
|
1048
|
+
"""Get balance in GB."""
|
|
1049
|
+
return self.traffic_balance / (1024 * 1024)
|
|
1050
|
+
|
|
1051
|
+
def range_usage_gb(self) -> float:
|
|
1052
|
+
"""Get range usage in GB."""
|
|
1053
|
+
return self.range_usage_traffic / (1024 * 1024)
|
|
1054
|
+
|
|
1055
|
+
|
|
1056
|
+
@dataclass
|
|
1057
|
+
class ProxyUser:
|
|
1058
|
+
"""
|
|
1059
|
+
Proxy user (sub-account) information.
|
|
1060
|
+
|
|
1061
|
+
Attributes:
|
|
1062
|
+
username: User's username.
|
|
1063
|
+
password: User's password.
|
|
1064
|
+
status: User status (True=enabled, False=disabled).
|
|
1065
|
+
traffic_limit: Traffic limit in MB (0 = unlimited).
|
|
1066
|
+
usage_traffic: Traffic used in KB.
|
|
1067
|
+
"""
|
|
1068
|
+
|
|
1069
|
+
username: str
|
|
1070
|
+
password: str
|
|
1071
|
+
status: bool
|
|
1072
|
+
traffic_limit: int
|
|
1073
|
+
usage_traffic: float
|
|
1074
|
+
|
|
1075
|
+
@classmethod
|
|
1076
|
+
def from_dict(cls, data: Dict[str, Any]) -> "ProxyUser":
|
|
1077
|
+
"""Create from API response dict."""
|
|
1078
|
+
return cls(
|
|
1079
|
+
username=data.get("username", ""),
|
|
1080
|
+
password=data.get("password", ""),
|
|
1081
|
+
status=data.get("status") in (True, "true", 1),
|
|
1082
|
+
traffic_limit=int(data.get("traffic_limit", 0)),
|
|
1083
|
+
usage_traffic=float(data.get("usage_traffic", 0)),
|
|
1084
|
+
)
|
|
1085
|
+
|
|
1086
|
+
def usage_gb(self) -> float:
|
|
1087
|
+
"""Get usage in GB."""
|
|
1088
|
+
return self.usage_traffic / (1024 * 1024)
|
|
1089
|
+
|
|
1090
|
+
def limit_gb(self) -> float:
|
|
1091
|
+
"""Get limit in GB (0 means unlimited)."""
|
|
1092
|
+
if self.traffic_limit == 0:
|
|
1093
|
+
return 0
|
|
1094
|
+
return self.traffic_limit / 1024
|
|
1095
|
+
|
|
1096
|
+
|
|
1097
|
+
@dataclass
|
|
1098
|
+
class ProxyUserList:
|
|
1099
|
+
"""
|
|
1100
|
+
Response model for proxy user list.
|
|
1101
|
+
|
|
1102
|
+
Attributes:
|
|
1103
|
+
limit: Total traffic limit (KB).
|
|
1104
|
+
remaining_limit: Remaining traffic limit (KB).
|
|
1105
|
+
user_count: Number of users.
|
|
1106
|
+
users: List of proxy users.
|
|
1107
|
+
"""
|
|
1108
|
+
|
|
1109
|
+
limit: float
|
|
1110
|
+
remaining_limit: float
|
|
1111
|
+
user_count: int
|
|
1112
|
+
users: List[ProxyUser]
|
|
1113
|
+
|
|
1114
|
+
@classmethod
|
|
1115
|
+
def from_dict(cls, data: Dict[str, Any]) -> "ProxyUserList":
|
|
1116
|
+
"""Create from API response dict."""
|
|
1117
|
+
user_list = data.get("list", [])
|
|
1118
|
+
users = [ProxyUser.from_dict(u) for u in user_list]
|
|
1119
|
+
|
|
1120
|
+
return cls(
|
|
1121
|
+
limit=float(data.get("limit", 0)),
|
|
1122
|
+
remaining_limit=float(data.get("remaining_limit", 0)),
|
|
1123
|
+
user_count=int(data.get("user_count", len(users))),
|
|
1124
|
+
users=users,
|
|
1125
|
+
)
|
|
1126
|
+
|
|
1127
|
+
|
|
1128
|
+
@dataclass
|
|
1129
|
+
class ProxyServer:
|
|
1130
|
+
"""
|
|
1131
|
+
ISP or Datacenter proxy server information.
|
|
1132
|
+
|
|
1133
|
+
Attributes:
|
|
1134
|
+
ip: Proxy server IP address.
|
|
1135
|
+
port: Proxy server port.
|
|
1136
|
+
username: Authentication username.
|
|
1137
|
+
password: Authentication password.
|
|
1138
|
+
expiration_time: Expiration timestamp (Unix timestamp or datetime string).
|
|
1139
|
+
region: Server region (optional).
|
|
1140
|
+
"""
|
|
1141
|
+
|
|
1142
|
+
ip: str
|
|
1143
|
+
port: int
|
|
1144
|
+
username: str
|
|
1145
|
+
password: str
|
|
1146
|
+
expiration_time: Optional[Union[int, str]] = None
|
|
1147
|
+
region: Optional[str] = None
|
|
1148
|
+
|
|
1149
|
+
@classmethod
|
|
1150
|
+
def from_dict(cls, data: Dict[str, Any]) -> "ProxyServer":
|
|
1151
|
+
"""Create from API response dict."""
|
|
1152
|
+
return cls(
|
|
1153
|
+
ip=data.get("ip", ""),
|
|
1154
|
+
port=int(data.get("port", 0)),
|
|
1155
|
+
username=data.get("username", data.get("user", "")),
|
|
1156
|
+
password=data.get("password", data.get("pwd", "")),
|
|
1157
|
+
expiration_time=data.get("expiration_time", data.get("expireTime")),
|
|
1158
|
+
region=data.get("region"),
|
|
1159
|
+
)
|
|
1160
|
+
|
|
1161
|
+
def to_proxy_url(self, protocol: str = "http") -> str:
|
|
1162
|
+
"""
|
|
1163
|
+
Build proxy URL for this server.
|
|
1164
|
+
|
|
1165
|
+
Args:
|
|
1166
|
+
protocol: Proxy protocol (http/https/socks5).
|
|
1167
|
+
|
|
1168
|
+
Returns:
|
|
1169
|
+
Complete proxy URL.
|
|
1170
|
+
"""
|
|
1171
|
+
return f"{protocol}://{self.username}:{self.password}@{self.ip}:{self.port}"
|
|
1172
|
+
|
|
1173
|
+
def is_expired(self) -> bool:
|
|
1174
|
+
"""Check if proxy has expired (if expiration_time is available)."""
|
|
1175
|
+
if self.expiration_time is None:
|
|
1176
|
+
return False
|
|
1177
|
+
|
|
1178
|
+
import time
|
|
1179
|
+
|
|
1180
|
+
if isinstance(self.expiration_time, int):
|
|
1181
|
+
return time.time() > self.expiration_time
|
|
1182
|
+
|
|
1183
|
+
# String timestamp handling would need datetime parsing
|
|
1184
|
+
return False
|
thordata/retry.py
CHANGED
|
@@ -16,6 +16,7 @@ Example:
|
|
|
16
16
|
|
|
17
17
|
from __future__ import annotations
|
|
18
18
|
|
|
19
|
+
import inspect
|
|
19
20
|
import logging
|
|
20
21
|
import random
|
|
21
22
|
import time
|
|
@@ -64,7 +65,10 @@ class RetryConfig:
|
|
|
64
65
|
|
|
65
66
|
# Status codes to retry on (5xx server errors + 429 rate limit)
|
|
66
67
|
retry_on_status_codes: Set[int] = field(
|
|
67
|
-
default_factory=lambda: {
|
|
68
|
+
default_factory=lambda: {429, 500, 502, 503, 504}
|
|
69
|
+
)
|
|
70
|
+
retry_on_api_codes: Set[int] = field(
|
|
71
|
+
default_factory=lambda: {300} # API response body code
|
|
68
72
|
)
|
|
69
73
|
|
|
70
74
|
# Exception types to always retry on
|
|
@@ -198,8 +202,6 @@ def with_retry(
|
|
|
198
202
|
|
|
199
203
|
@wraps(func)
|
|
200
204
|
async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
201
|
-
import asyncio
|
|
202
|
-
|
|
203
205
|
last_exception: Optional[Exception] = None
|
|
204
206
|
|
|
205
207
|
for attempt in range(config.max_retries + 1):
|
|
@@ -235,7 +237,7 @@ def with_retry(
|
|
|
235
237
|
# Check if the function is async
|
|
236
238
|
import asyncio
|
|
237
239
|
|
|
238
|
-
if
|
|
240
|
+
if inspect.iscoroutinefunction(func):
|
|
239
241
|
return async_wrapper
|
|
240
242
|
return sync_wrapper
|
|
241
243
|
|