thordata-sdk 0.7.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
thordata/client.py CHANGED
@@ -25,13 +25,18 @@ from __future__ import annotations
25
25
 
26
26
  import logging
27
27
  import os
28
+ import ssl
29
+ from datetime import date
28
30
  from typing import Any, Dict, List, Optional, Union
31
+ from urllib.parse import urlencode
29
32
 
30
33
  import requests
34
+ import urllib3
31
35
 
32
36
  from . import __version__ as _sdk_version
33
37
  from ._utils import (
34
38
  build_auth_headers,
39
+ build_builder_headers,
35
40
  build_public_api_headers,
36
41
  build_user_agent,
37
42
  decode_base64_image,
@@ -46,11 +51,17 @@ from .exceptions import (
46
51
  raise_for_code,
47
52
  )
48
53
  from .models import (
54
+ CommonSettings,
49
55
  ProxyConfig,
50
56
  ProxyProduct,
57
+ ProxyServer,
58
+ ProxyUserList,
51
59
  ScraperTaskConfig,
52
60
  SerpRequest,
53
61
  UniversalScrapeRequest,
62
+ UsageStatistics,
63
+ VideoTaskConfig,
64
+ WhitelistProxyConfig,
54
65
  )
55
66
  from .retry import RetryConfig, with_retry
56
67
 
@@ -87,8 +98,8 @@ class ThordataClient:
87
98
  # API Endpoints
88
99
  BASE_URL = "https://scraperapi.thordata.com"
89
100
  UNIVERSAL_URL = "https://universalapi.thordata.com"
90
- API_URL = "https://api.thordata.com/api/web-scraper-api"
91
- LOCATIONS_URL = "https://api.thordata.com/api/locations"
101
+ API_URL = "https://openapi.thordata.com/api/web-scraper-api"
102
+ LOCATIONS_URL = "https://openapi.thordata.com/api/locations"
92
103
 
93
104
  def __init__(
94
105
  self,
@@ -98,7 +109,9 @@ class ThordataClient:
98
109
  proxy_host: str = "pr.thordata.net",
99
110
  proxy_port: int = 9999,
100
111
  timeout: int = 30,
112
+ api_timeout: int = 60,
101
113
  retry_config: Optional[RetryConfig] = None,
114
+ auth_mode: str = "bearer",
102
115
  scraperapi_base_url: Optional[str] = None,
103
116
  universalapi_base_url: Optional[str] = None,
104
117
  web_scraper_api_base_url: Optional[str] = None,
@@ -108,6 +121,7 @@ class ThordataClient:
108
121
  if not scraper_token:
109
122
  raise ThordataConfigError("scraper_token is required")
110
123
 
124
+ # Core credentials
111
125
  self.scraper_token = scraper_token
112
126
  self.public_token = public_token
113
127
  self.public_key = public_key
@@ -115,17 +129,22 @@ class ThordataClient:
115
129
  # Proxy configuration
116
130
  self._proxy_host = proxy_host
117
131
  self._proxy_port = proxy_port
132
+
133
+ # Timeout configuration
118
134
  self._default_timeout = timeout
135
+ self._api_timeout = api_timeout
119
136
 
120
137
  # Retry configuration
121
138
  self._retry_config = retry_config or RetryConfig()
122
139
 
123
- # Build default proxy URL (for basic usage)
124
- self._default_proxy_url = (
125
- f"http://td-customer-{self.scraper_token}:@{proxy_host}:{proxy_port}"
126
- )
140
+ # Authentication mode (for scraping APIs)
141
+ self._auth_mode = auth_mode.lower()
142
+ if self._auth_mode not in ("bearer", "header_token"):
143
+ raise ThordataConfigError(
144
+ f"Invalid auth_mode: {auth_mode}. Must be 'bearer' or 'header_token'."
145
+ )
127
146
 
128
- # Sessions:
147
+ # NOTE:
129
148
  # - _proxy_session: used for proxy network traffic to target sites
130
149
  # - _api_session: used for Thordata APIs (SERP/Universal/Tasks/Locations)
131
150
  #
@@ -133,14 +152,9 @@ class ThordataClient:
133
152
  # so developers can rely on system proxy settings (e.g., Clash) via env vars.
134
153
  self._proxy_session = requests.Session()
135
154
  self._proxy_session.trust_env = False
136
- self._proxy_session.proxies = {
137
- "http": self._default_proxy_url,
138
- "https": self._default_proxy_url,
139
- }
140
155
 
141
156
  self._api_session = requests.Session()
142
157
  self._api_session.trust_env = True
143
-
144
158
  self._api_session.headers.update(
145
159
  {"User-Agent": build_user_agent(_sdk_version, "requests")}
146
160
  )
@@ -170,17 +184,49 @@ class ThordataClient:
170
184
  or self.LOCATIONS_URL
171
185
  ).rstrip("/")
172
186
 
187
+ # These URLs exist in your codebase; keep them for now (even if your org later migrates fully to openapi)
188
+ gateway_base = os.getenv(
189
+ "THORDATA_GATEWAY_BASE_URL", "https://api.thordata.com/api/gateway"
190
+ )
191
+ child_base = os.getenv(
192
+ "THORDATA_CHILD_BASE_URL", "https://api.thordata.com/api/child"
193
+ )
194
+ self._gateway_base_url = gateway_base
195
+ self._child_base_url = child_base
196
+
173
197
  self._serp_url = f"{scraperapi_base}/request"
174
198
  self._builder_url = f"{scraperapi_base}/builder"
199
+ self._video_builder_url = f"{scraperapi_base}/video_builder"
175
200
  self._universal_url = f"{universalapi_base}/request"
201
+
176
202
  self._status_url = f"{web_scraper_api_base}/tasks-status"
177
203
  self._download_url = f"{web_scraper_api_base}/tasks-download"
204
+ self._list_url = f"{web_scraper_api_base}/tasks-list"
205
+
178
206
  self._locations_base_url = locations_base
179
207
 
208
+ # These 2 lines keep your existing behavior (derive account endpoints from locations_base)
209
+ self._usage_stats_url = (
210
+ f"{locations_base.replace('/locations', '')}/account/usage-statistics"
211
+ )
212
+ self._proxy_users_url = (
213
+ f"{locations_base.replace('/locations', '')}/proxy-users"
214
+ )
215
+
216
+ whitelist_base = os.getenv(
217
+ "THORDATA_WHITELIST_BASE_URL", "https://api.thordata.com/api"
218
+ )
219
+ self._whitelist_url = f"{whitelist_base}/whitelisted-ips"
220
+
221
+ proxy_api_base = os.getenv(
222
+ "THORDATA_PROXY_API_BASE_URL", "https://api.thordata.com/api"
223
+ )
224
+ self._proxy_list_url = f"{proxy_api_base}/proxy/proxy-list"
225
+ self._proxy_expiration_url = f"{proxy_api_base}/proxy/expiration-time"
226
+
180
227
  # =========================================================================
181
- # Proxy Network Methods
228
+ # Proxy Network Methods (Pure proxy network request functions)
182
229
  # =========================================================================
183
-
184
230
  def get(
185
231
  self,
186
232
  url: str,
@@ -219,11 +265,37 @@ class ThordataClient:
219
265
 
220
266
  timeout = timeout or self._default_timeout
221
267
 
222
- if proxy_config:
223
- proxies = proxy_config.to_proxies_dict()
224
- kwargs["proxies"] = proxies
268
+ if proxy_config is None:
269
+ proxy_config = self._get_default_proxy_config_from_env()
270
+
271
+ if proxy_config is None:
272
+ raise ThordataConfigError(
273
+ "Proxy credentials are missing. "
274
+ "Pass proxy_config=ProxyConfig(username=..., password=..., product=...) "
275
+ "or set THORDATA_RESIDENTIAL_USERNAME/THORDATA_RESIDENTIAL_PASSWORD (or DATACENTER/MOBILE)."
276
+ )
277
+
278
+ kwargs["proxies"] = proxy_config.to_proxies_dict()
279
+
280
+ @with_retry(self._retry_config)
281
+ def _do() -> requests.Response:
282
+ return self._proxy_request_with_proxy_manager(
283
+ "GET",
284
+ url,
285
+ proxy_config=proxy_config,
286
+ timeout=timeout,
287
+ headers=kwargs.pop("headers", None),
288
+ params=kwargs.pop("params", None),
289
+ )
225
290
 
226
- return self._request_with_retry("GET", url, timeout=timeout, **kwargs)
291
+ try:
292
+ return _do()
293
+ except requests.Timeout as e:
294
+ raise ThordataTimeoutError(
295
+ f"Request timed out: {e}", original_error=e
296
+ ) from e
297
+ except Exception as e:
298
+ raise ThordataNetworkError(f"Request failed: {e}", original_error=e) from e
227
299
 
228
300
  def post(
229
301
  self,
@@ -249,14 +321,43 @@ class ThordataClient:
249
321
 
250
322
  timeout = timeout or self._default_timeout
251
323
 
252
- if proxy_config:
253
- proxies = proxy_config.to_proxies_dict()
254
- kwargs["proxies"] = proxies
324
+ if proxy_config is None:
325
+ proxy_config = self._get_default_proxy_config_from_env()
326
+
327
+ if proxy_config is None:
328
+ raise ThordataConfigError(
329
+ "Proxy credentials are missing. "
330
+ "Pass proxy_config=ProxyConfig(username=..., password=..., product=...) "
331
+ "or set THORDATA_RESIDENTIAL_USERNAME/THORDATA_RESIDENTIAL_PASSWORD (or DATACENTER/MOBILE)."
332
+ )
255
333
 
256
- return self._request_with_retry("POST", url, timeout=timeout, **kwargs)
334
+ kwargs["proxies"] = proxy_config.to_proxies_dict()
335
+
336
+ @with_retry(self._retry_config)
337
+ def _do() -> requests.Response:
338
+ return self._proxy_request_with_proxy_manager(
339
+ "POST",
340
+ url,
341
+ proxy_config=proxy_config,
342
+ timeout=timeout,
343
+ headers=kwargs.pop("headers", None),
344
+ params=kwargs.pop("params", None),
345
+ data=kwargs.pop("data", None),
346
+ )
347
+
348
+ try:
349
+ return _do()
350
+ except requests.Timeout as e:
351
+ raise ThordataTimeoutError(
352
+ f"Request timed out: {e}", original_error=e
353
+ ) from e
354
+ except Exception as e:
355
+ raise ThordataNetworkError(f"Request failed: {e}", original_error=e) from e
257
356
 
258
357
  def build_proxy_url(
259
358
  self,
359
+ username: str, # Required
360
+ password: str, # Required
260
361
  *,
261
362
  country: Optional[str] = None,
262
363
  state: Optional[str] = None,
@@ -288,8 +389,8 @@ class ThordataClient:
288
389
  >>> requests.get("https://example.com", proxies=proxies)
289
390
  """
290
391
  config = ProxyConfig(
291
- username=self.scraper_token,
292
- password="",
392
+ username=username,
393
+ password=password,
293
394
  host=self._proxy_host,
294
395
  port=self._proxy_port,
295
396
  product=product,
@@ -302,9 +403,44 @@ class ThordataClient:
302
403
  return config.build_proxy_url()
303
404
 
304
405
  # =========================================================================
305
- # SERP API Methods
406
+ # Internal API Request Retry Helper (For all API calls)
306
407
  # =========================================================================
408
+ def _api_request_with_retry(
409
+ self,
410
+ method: str,
411
+ url: str,
412
+ *,
413
+ data: Optional[Dict[str, Any]] = None,
414
+ headers: Optional[Dict[str, str]] = None,
415
+ params: Optional[Dict[str, Any]] = None,
416
+ ) -> requests.Response:
417
+ """Make an API request with automatic retry on transient failures."""
418
+
419
+ @with_retry(self._retry_config)
420
+ def _do_request() -> requests.Response:
421
+ return self._api_session.request(
422
+ method,
423
+ url,
424
+ data=data,
425
+ headers=headers,
426
+ params=params,
427
+ timeout=self._api_timeout,
428
+ )
429
+
430
+ try:
431
+ return _do_request()
432
+ except requests.Timeout as e:
433
+ raise ThordataTimeoutError(
434
+ f"API request timed out: {e}", original_error=e
435
+ ) from e
436
+ except requests.RequestException as e:
437
+ raise ThordataNetworkError(
438
+ f"API request failed: {e}", original_error=e
439
+ ) from e
307
440
 
441
+ # =========================================================================
442
+ # SERP API Methods (Search Engine Results Page functions)
443
+ # =========================================================================
308
444
  def serp_search(
309
445
  self,
310
446
  query: str,
@@ -375,16 +511,18 @@ class ThordataClient:
375
511
  )
376
512
 
377
513
  payload = request.to_payload()
378
- headers = build_auth_headers(self.scraper_token)
514
+ headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
379
515
 
380
- logger.info(f"SERP Search: {engine_str} - {query}")
516
+ logger.info(
517
+ f"SERP Search: {engine_str} - {query[:50]}{'...' if len(query) > 50 else ''}"
518
+ )
381
519
 
382
520
  try:
383
- response = self._api_session.post(
521
+ response = self._api_request_with_retry(
522
+ "POST",
384
523
  self._serp_url,
385
524
  data=payload,
386
525
  headers=headers,
387
- timeout=60,
388
526
  )
389
527
  response.raise_for_status()
390
528
 
@@ -445,16 +583,18 @@ class ThordataClient:
445
583
  >>> results = client.serp_search_advanced(request)
446
584
  """
447
585
  payload = request.to_payload()
448
- headers = build_auth_headers(self.scraper_token)
586
+ headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
449
587
 
450
- logger.info(f"SERP Advanced Search: {request.engine} - {request.query}")
588
+ logger.info(
589
+ f"SERP Advanced Search: {request.engine} - {request.query[:50]}{'...' if len(request.query) > 50 else ''}"
590
+ )
451
591
 
452
592
  try:
453
- response = self._api_session.post(
593
+ response = self._api_request_with_retry(
594
+ "POST",
454
595
  self._serp_url,
455
596
  data=payload,
456
597
  headers=headers,
457
- timeout=60,
458
598
  )
459
599
  response.raise_for_status()
460
600
 
@@ -487,9 +627,8 @@ class ThordataClient:
487
627
  ) from e
488
628
 
489
629
  # =========================================================================
490
- # Universal Scraping API (Web Unlocker) Methods
630
+ # Universal Scraping API Methods (Web Unlocker functions)
491
631
  # =========================================================================
492
-
493
632
  def universal_scrape(
494
633
  self,
495
634
  url: str,
@@ -559,18 +698,18 @@ class ThordataClient:
559
698
  HTML string or PNG bytes.
560
699
  """
561
700
  payload = request.to_payload()
562
- headers = build_auth_headers(self.scraper_token)
701
+ headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
563
702
 
564
703
  logger.info(
565
704
  f"Universal Scrape: {request.url} (format: {request.output_format})"
566
705
  )
567
706
 
568
707
  try:
569
- response = self._api_session.post(
708
+ response = self._api_request_with_retry(
709
+ "POST",
570
710
  self._universal_url,
571
711
  data=payload,
572
712
  headers=headers,
573
- timeout=60,
574
713
  )
575
714
  response.raise_for_status()
576
715
 
@@ -619,9 +758,8 @@ class ThordataClient:
619
758
  return str(resp_json)
620
759
 
621
760
  # =========================================================================
622
- # Web Scraper API (Task-based) Methods
761
+ # Web Scraper API Methods (Only async task management functions)
623
762
  # =========================================================================
624
-
625
763
  def create_scraper_task(
626
764
  self,
627
765
  file_name: str,
@@ -673,17 +811,25 @@ class ThordataClient:
673
811
  Returns:
674
812
  The created task_id.
675
813
  """
814
+ self._require_public_credentials()
815
+
676
816
  payload = config.to_payload()
677
- headers = build_auth_headers(self.scraper_token)
817
+
818
+ # Builder needs 3 headers: token, key, Authorization Bearer
819
+ headers = build_builder_headers(
820
+ self.scraper_token,
821
+ self.public_token or "",
822
+ self.public_key or "",
823
+ )
678
824
 
679
825
  logger.info(f"Creating Scraper Task: {config.spider_name}")
680
826
 
681
827
  try:
682
- response = self._api_session.post(
828
+ response = self._api_request_with_retry(
829
+ "POST",
683
830
  self._builder_url,
684
831
  data=payload,
685
832
  headers=headers,
686
- timeout=30,
687
833
  )
688
834
  response.raise_for_status()
689
835
 
@@ -701,6 +847,94 @@ class ThordataClient:
701
847
  f"Task creation failed: {e}", original_error=e
702
848
  ) from e
703
849
 
850
+ def create_video_task(
851
+ self,
852
+ file_name: str,
853
+ spider_id: str,
854
+ spider_name: str,
855
+ parameters: Dict[str, Any],
856
+ common_settings: "CommonSettings",
857
+ ) -> str:
858
+ """
859
+ Create a YouTube video/audio download task.
860
+
861
+ Uses the /video_builder endpoint.
862
+
863
+ Args:
864
+ file_name: Output file name. Supports {{TasksID}}, {{VideoID}}.
865
+ spider_id: Spider identifier (e.g., "youtube_video_by-url").
866
+ spider_name: Spider name (typically "youtube.com").
867
+ parameters: Spider parameters (e.g., {"url": "..."}).
868
+ common_settings: Video/audio settings.
869
+
870
+ Returns:
871
+ The created task_id.
872
+
873
+ Example:
874
+ >>> from thordata import CommonSettings
875
+ >>> task_id = client.create_video_task(
876
+ ... file_name="{{VideoID}}",
877
+ ... spider_id="youtube_video_by-url",
878
+ ... spider_name="youtube.com",
879
+ ... parameters={"url": "https://youtube.com/watch?v=xxx"},
880
+ ... common_settings=CommonSettings(
881
+ ... resolution="1080p",
882
+ ... is_subtitles="true"
883
+ ... )
884
+ ... )
885
+ """
886
+
887
+ config = VideoTaskConfig(
888
+ file_name=file_name,
889
+ spider_id=spider_id,
890
+ spider_name=spider_name,
891
+ parameters=parameters,
892
+ common_settings=common_settings,
893
+ )
894
+
895
+ return self.create_video_task_advanced(config)
896
+
897
+ def create_video_task_advanced(self, config: VideoTaskConfig) -> str:
898
+ """
899
+ Create a video task using VideoTaskConfig object.
900
+
901
+ Args:
902
+ config: Video task configuration.
903
+
904
+ Returns:
905
+ The created task_id.
906
+ """
907
+
908
+ self._require_public_credentials()
909
+
910
+ payload = config.to_payload()
911
+ headers = build_builder_headers(
912
+ self.scraper_token,
913
+ self.public_token or "",
914
+ self.public_key or "",
915
+ )
916
+
917
+ logger.info(f"Creating Video Task: {config.spider_name} - {config.spider_id}")
918
+
919
+ response = self._api_request_with_retry(
920
+ "POST",
921
+ self._video_builder_url,
922
+ data=payload,
923
+ headers=headers,
924
+ )
925
+ response.raise_for_status()
926
+
927
+ data = response.json()
928
+ code = data.get("code")
929
+
930
+ if code != 200:
931
+ msg = extract_error_message(data)
932
+ raise_for_code(
933
+ f"Video task creation failed: {msg}", code=code, payload=data
934
+ )
935
+
936
+ return data["data"]["task_id"]
937
+
704
938
  def get_task_status(self, task_id: str) -> str:
705
939
  """
706
940
  Check the status of an asynchronous scraping task.
@@ -721,11 +955,11 @@ class ThordataClient:
721
955
  payload = {"tasks_ids": task_id}
722
956
 
723
957
  try:
724
- response = self._api_session.post(
958
+ response = self._api_request_with_retry(
959
+ "POST",
725
960
  self._status_url,
726
961
  data=payload,
727
962
  headers=headers,
728
- timeout=30,
729
963
  )
730
964
  response.raise_for_status()
731
965
  data = response.json()
@@ -788,11 +1022,11 @@ class ThordataClient:
788
1022
  logger.info(f"Getting result URL for Task: {task_id}")
789
1023
 
790
1024
  try:
791
- response = self._api_session.post(
1025
+ response = self._api_request_with_retry(
1026
+ "POST",
792
1027
  self._download_url,
793
1028
  data=payload,
794
1029
  headers=headers,
795
- timeout=30,
796
1030
  )
797
1031
  response.raise_for_status()
798
1032
 
@@ -812,6 +1046,57 @@ class ThordataClient:
812
1046
  f"Get result failed: {e}", original_error=e
813
1047
  ) from e
814
1048
 
1049
+ def list_tasks(
1050
+ self,
1051
+ page: int = 1,
1052
+ size: int = 20,
1053
+ ) -> Dict[str, Any]:
1054
+ """
1055
+ List all Web Scraper tasks.
1056
+
1057
+ Args:
1058
+ page: Page number (starts from 1).
1059
+ size: Number of tasks per page.
1060
+
1061
+ Returns:
1062
+ Dict containing 'count' and 'list' of tasks.
1063
+
1064
+ Example:
1065
+ >>> result = client.list_tasks(page=1, size=10)
1066
+ >>> print(f"Total tasks: {result['count']}")
1067
+ >>> for task in result['list']:
1068
+ ... print(f"Task {task['task_id']}: {task['status']}")
1069
+ """
1070
+ self._require_public_credentials()
1071
+
1072
+ headers = build_public_api_headers(
1073
+ self.public_token or "", self.public_key or ""
1074
+ )
1075
+ payload: Dict[str, Any] = {}
1076
+ if page:
1077
+ payload["page"] = str(page)
1078
+ if size:
1079
+ payload["size"] = str(size)
1080
+
1081
+ logger.info(f"Listing tasks: page={page}, size={size}")
1082
+
1083
+ response = self._api_request_with_retry(
1084
+ "POST",
1085
+ self._list_url,
1086
+ data=payload,
1087
+ headers=headers,
1088
+ )
1089
+ response.raise_for_status()
1090
+
1091
+ data = response.json()
1092
+ code = data.get("code")
1093
+
1094
+ if code != 200:
1095
+ msg = extract_error_message(data)
1096
+ raise_for_code(f"List tasks failed: {msg}", code=code, payload=data)
1097
+
1098
+ return data.get("data", {"count": 0, "list": []})
1099
+
815
1100
  def wait_for_task(
816
1101
  self,
817
1102
  task_id: str,
@@ -865,9 +1150,514 @@ class ThordataClient:
865
1150
  raise TimeoutError(f"Task {task_id} did not complete within {max_wait} seconds")
866
1151
 
867
1152
  # =========================================================================
868
- # Location API Methods
1153
+ # Proxy Account Management Methods (Proxy balance, user, whitelist functions)
869
1154
  # =========================================================================
1155
+ def get_usage_statistics(
1156
+ self,
1157
+ from_date: Union[str, date],
1158
+ to_date: Union[str, date],
1159
+ ) -> UsageStatistics:
1160
+ """
1161
+ Get account usage statistics for a date range.
1162
+
1163
+ Args:
1164
+ from_date: Start date (YYYY-MM-DD string or date object).
1165
+ to_date: End date (YYYY-MM-DD string or date object).
1166
+
1167
+ Returns:
1168
+ UsageStatistics object with traffic data.
1169
+
1170
+ Raises:
1171
+ ValueError: If date range exceeds 180 days.
1172
+
1173
+ Example:
1174
+ >>> from datetime import date, timedelta
1175
+ >>> today = date.today()
1176
+ >>> week_ago = today - timedelta(days=7)
1177
+ >>> stats = client.get_usage_statistics(week_ago, today)
1178
+ >>> print(f"Used: {stats.range_usage_gb():.2f} GB")
1179
+ >>> print(f"Balance: {stats.balance_gb():.2f} GB")
1180
+ """
1181
+
1182
+ self._require_public_credentials()
1183
+
1184
+ # Convert dates to strings
1185
+ if isinstance(from_date, date):
1186
+ from_date = from_date.strftime("%Y-%m-%d")
1187
+ if isinstance(to_date, date):
1188
+ to_date = to_date.strftime("%Y-%m-%d")
1189
+
1190
+ params = {
1191
+ "token": self.public_token,
1192
+ "key": self.public_key,
1193
+ "from_date": from_date,
1194
+ "to_date": to_date,
1195
+ }
1196
+
1197
+ logger.info(f"Getting usage statistics: {from_date} to {to_date}")
1198
+
1199
+ response = self._api_request_with_retry(
1200
+ "GET",
1201
+ self._usage_stats_url,
1202
+ params=params,
1203
+ )
1204
+ response.raise_for_status()
1205
+
1206
+ data = response.json()
1207
+
1208
+ if isinstance(data, dict):
1209
+ code = data.get("code")
1210
+ if code is not None and code != 200:
1211
+ msg = extract_error_message(data)
1212
+ raise_for_code(
1213
+ f"Usage statistics error: {msg}",
1214
+ code=code,
1215
+ payload=data,
1216
+ )
1217
+
1218
+ # Extract data field
1219
+ usage_data = data.get("data", data)
1220
+ return UsageStatistics.from_dict(usage_data)
1221
+
1222
+ raise ThordataNetworkError(
1223
+ f"Unexpected usage statistics response: {type(data).__name__}",
1224
+ original_error=None,
1225
+ )
1226
+
1227
+ def get_residential_balance(self) -> Dict[str, Any]:
1228
+ """
1229
+ Get residential proxy balance.
1230
+
1231
+ Uses public_token/public_key (Dashboard -> My account -> API).
1232
+ """
1233
+ headers = self._build_gateway_headers()
1234
+
1235
+ logger.info("Getting residential proxy balance")
1236
+
1237
+ response = self._api_request_with_retry(
1238
+ "POST",
1239
+ f"{self._gateway_base_url}/getFlowBalance",
1240
+ headers=headers,
1241
+ data={},
1242
+ )
1243
+ response.raise_for_status()
1244
+
1245
+ data = response.json()
1246
+ code = data.get("code")
1247
+
1248
+ if code != 200:
1249
+ msg = extract_error_message(data)
1250
+ raise_for_code(f"Get balance failed: {msg}", code=code, payload=data)
1251
+
1252
+ return data.get("data", {})
1253
+
1254
+ def get_residential_usage(
1255
+ self,
1256
+ start_time: Union[str, int],
1257
+ end_time: Union[str, int],
1258
+ ) -> Dict[str, Any]:
1259
+ """
1260
+ Get residential proxy usage records.
1261
+
1262
+ Uses public_token/public_key (Dashboard -> My account -> API).
1263
+ """
1264
+ headers = self._build_gateway_headers()
1265
+ payload = {"start_time": str(start_time), "end_time": str(end_time)}
1266
+
1267
+ logger.info(f"Getting residential usage: {start_time} to {end_time}")
1268
+
1269
+ response = self._api_request_with_retry(
1270
+ "POST",
1271
+ f"{self._gateway_base_url}/usageRecord",
1272
+ headers=headers,
1273
+ data=payload,
1274
+ )
1275
+ response.raise_for_status()
1276
+
1277
+ data = response.json()
1278
+ code = data.get("code")
870
1279
 
1280
+ if code != 200:
1281
+ msg = extract_error_message(data)
1282
+ raise_for_code(f"Get usage failed: {msg}", code=code, payload=data)
1283
+
1284
+ return data.get("data", {})
1285
+
1286
+ def list_proxy_users(
1287
+ self, proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL
1288
+ ) -> ProxyUserList:
1289
+ """
1290
+ List all proxy users (sub-accounts).
1291
+
1292
+ Args:
1293
+ proxy_type: Proxy type (1=Residential, 2=Unlimited).
1294
+
1295
+ Returns:
1296
+ ProxyUserList with user details.
1297
+
1298
+ Example:
1299
+ >>> users = client.list_proxy_users(proxy_type=ProxyType.RESIDENTIAL)
1300
+ >>> print(f"Total users: {users.user_count}")
1301
+ >>> for user in users.users:
1302
+ ... print(f"{user.username}: {user.usage_gb():.2f} GB used")
1303
+ """
1304
+
1305
+ self._require_public_credentials()
1306
+
1307
+ params = {
1308
+ "token": self.public_token,
1309
+ "key": self.public_key,
1310
+ "proxy_type": str(
1311
+ int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1312
+ ),
1313
+ }
1314
+
1315
+ logger.info(f"Listing proxy users: type={params['proxy_type']}")
1316
+
1317
+ response = self._api_request_with_retry(
1318
+ "GET",
1319
+ f"{self._proxy_users_url}/user-list",
1320
+ params=params,
1321
+ )
1322
+ response.raise_for_status()
1323
+
1324
+ data = response.json()
1325
+
1326
+ if isinstance(data, dict):
1327
+ code = data.get("code")
1328
+ if code is not None and code != 200:
1329
+ msg = extract_error_message(data)
1330
+ raise_for_code(
1331
+ f"List proxy users error: {msg}", code=code, payload=data
1332
+ )
1333
+
1334
+ user_data = data.get("data", data)
1335
+ return ProxyUserList.from_dict(user_data)
1336
+
1337
+ raise ThordataNetworkError(
1338
+ f"Unexpected proxy users response: {type(data).__name__}",
1339
+ original_error=None,
1340
+ )
1341
+
1342
+ def create_proxy_user(
1343
+ self,
1344
+ username: str,
1345
+ password: str,
1346
+ proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
1347
+ traffic_limit: int = 0,
1348
+ status: bool = True,
1349
+ ) -> Dict[str, Any]:
1350
+ """
1351
+ Create a new proxy user (sub-account).
1352
+
1353
+ Args:
1354
+ username: Username for the new user.
1355
+ password: Password for the new user.
1356
+ proxy_type: Proxy type (1=Residential, 2=Unlimited).
1357
+ traffic_limit: Traffic limit in MB (0 = unlimited, min 100).
1358
+ status: Enable/disable user (True/False).
1359
+
1360
+ Returns:
1361
+ API response data.
1362
+
1363
+ Example:
1364
+ >>> result = client.create_proxy_user(
1365
+ ... username="subuser1",
1366
+ ... password="securepass",
1367
+ ... traffic_limit=5120, # 5GB
1368
+ ... status=True
1369
+ ... )
1370
+ """
1371
+ self._require_public_credentials()
1372
+
1373
+ headers = build_public_api_headers(
1374
+ self.public_token or "", self.public_key or ""
1375
+ )
1376
+
1377
+ payload = {
1378
+ "proxy_type": str(
1379
+ int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1380
+ ),
1381
+ "username": username,
1382
+ "password": password,
1383
+ "traffic_limit": str(traffic_limit),
1384
+ "status": "true" if status else "false",
1385
+ }
1386
+
1387
+ logger.info(f"Creating proxy user: {username}")
1388
+
1389
+ response = self._api_request_with_retry(
1390
+ "POST",
1391
+ f"{self._proxy_users_url}/create-user",
1392
+ data=payload,
1393
+ headers=headers,
1394
+ )
1395
+ response.raise_for_status()
1396
+
1397
+ data = response.json()
1398
+ code = data.get("code")
1399
+
1400
+ if code != 200:
1401
+ msg = extract_error_message(data)
1402
+ raise_for_code(f"Create proxy user failed: {msg}", code=code, payload=data)
1403
+
1404
+ return data.get("data", {})
1405
+
1406
+ def add_whitelist_ip(
1407
+ self,
1408
+ ip: str,
1409
+ proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
1410
+ status: bool = True,
1411
+ ) -> Dict[str, Any]:
1412
+ """
1413
+ Add an IP to the whitelist for IP authentication.
1414
+
1415
+ Args:
1416
+ ip: IP address to whitelist.
1417
+ proxy_type: Proxy type (1=Residential, 2=Unlimited, 9=Mobile).
1418
+ status: Enable/disable the IP (True/False).
1419
+
1420
+ Returns:
1421
+ API response data.
1422
+
1423
+ Example:
1424
+ >>> result = client.add_whitelist_ip(
1425
+ ... ip="123.45.67.89",
1426
+ ... proxy_type=ProxyType.RESIDENTIAL,
1427
+ ... status=True
1428
+ ... )
1429
+ """
1430
+ self._require_public_credentials()
1431
+
1432
+ headers = build_public_api_headers(
1433
+ self.public_token or "", self.public_key or ""
1434
+ )
1435
+
1436
+ # Convert ProxyType to int
1437
+ proxy_type_int = (
1438
+ int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
1439
+ )
1440
+
1441
+ payload = {
1442
+ "proxy_type": str(proxy_type_int),
1443
+ "ip": ip,
1444
+ "status": "true" if status else "false",
1445
+ }
1446
+
1447
+ logger.info(f"Adding whitelist IP: {ip}")
1448
+
1449
+ response = self._api_request_with_retry(
1450
+ "POST",
1451
+ f"{self._whitelist_url}/add-ip",
1452
+ data=payload,
1453
+ headers=headers,
1454
+ )
1455
+ response.raise_for_status()
1456
+
1457
+ data = response.json()
1458
+ code = data.get("code")
1459
+
1460
+ if code != 200:
1461
+ msg = extract_error_message(data)
1462
+ raise_for_code(f"Add whitelist IP failed: {msg}", code=code, payload=data)
1463
+
1464
+ return data.get("data", {})
1465
+
1466
+ def list_proxy_servers(
1467
+ self,
1468
+ proxy_type: int,
1469
+ ) -> List[ProxyServer]:
1470
+ """
1471
+ List ISP or Datacenter proxy servers.
1472
+
1473
+ Args:
1474
+ proxy_type: Proxy type (1=ISP, 2=Datacenter).
1475
+
1476
+ Returns:
1477
+ List of ProxyServer objects.
1478
+
1479
+ Example:
1480
+ >>> servers = client.list_proxy_servers(proxy_type=1) # ISP proxies
1481
+ >>> for server in servers:
1482
+ ... print(f"{server.ip}:{server.port} - expires: {server.expiration_time}")
1483
+ """
1484
+
1485
+ self._require_public_credentials()
1486
+
1487
+ params = {
1488
+ "token": self.public_token,
1489
+ "key": self.public_key,
1490
+ "proxy_type": str(proxy_type),
1491
+ }
1492
+
1493
+ logger.info(f"Listing proxy servers: type={proxy_type}")
1494
+
1495
+ response = self._api_request_with_retry(
1496
+ "GET",
1497
+ self._proxy_list_url,
1498
+ params=params,
1499
+ )
1500
+ response.raise_for_status()
1501
+
1502
+ data = response.json()
1503
+
1504
+ if isinstance(data, dict):
1505
+ code = data.get("code")
1506
+ if code is not None and code != 200:
1507
+ msg = extract_error_message(data)
1508
+ raise_for_code(
1509
+ f"List proxy servers error: {msg}", code=code, payload=data
1510
+ )
1511
+
1512
+ # Extract list from data field
1513
+ server_list = data.get("data", data.get("list", []))
1514
+ elif isinstance(data, list):
1515
+ server_list = data
1516
+ else:
1517
+ raise ThordataNetworkError(
1518
+ f"Unexpected proxy list response: {type(data).__name__}",
1519
+ original_error=None,
1520
+ )
1521
+
1522
+ return [ProxyServer.from_dict(s) for s in server_list]
1523
+
1524
+ def get_isp_regions(self) -> List[Dict[str, Any]]:
1525
+ """
1526
+ Get available ISP proxy regions.
1527
+
1528
+ Uses public_token/public_key (Dashboard -> My account -> API).
1529
+ """
1530
+ headers = self._build_gateway_headers()
1531
+
1532
+ logger.info("Getting ISP regions")
1533
+
1534
+ response = self._api_request_with_retry(
1535
+ "POST",
1536
+ f"{self._gateway_base_url}/getRegionIsp",
1537
+ headers=headers,
1538
+ data={},
1539
+ )
1540
+ response.raise_for_status()
1541
+
1542
+ data = response.json()
1543
+ code = data.get("code")
1544
+
1545
+ if code != 200:
1546
+ msg = extract_error_message(data)
1547
+ raise_for_code(f"Get ISP regions failed: {msg}", code=code, payload=data)
1548
+
1549
+ return data.get("data", [])
1550
+
1551
+ def list_isp_proxies(self) -> List[Dict[str, Any]]:
1552
+ """
1553
+ List ISP proxies.
1554
+
1555
+ Uses public_token/public_key (Dashboard -> My account -> API).
1556
+ """
1557
+ headers = self._build_gateway_headers()
1558
+
1559
+ logger.info("Listing ISP proxies")
1560
+
1561
+ response = self._api_request_with_retry(
1562
+ "POST",
1563
+ f"{self._gateway_base_url}/queryListIsp",
1564
+ headers=headers,
1565
+ data={},
1566
+ )
1567
+ response.raise_for_status()
1568
+
1569
+ data = response.json()
1570
+ code = data.get("code")
1571
+
1572
+ if code != 200:
1573
+ msg = extract_error_message(data)
1574
+ raise_for_code(f"List ISP proxies failed: {msg}", code=code, payload=data)
1575
+
1576
+ return data.get("data", [])
1577
+
1578
+ def get_wallet_balance(self) -> Dict[str, Any]:
1579
+ """
1580
+ Get wallet balance for ISP proxies.
1581
+
1582
+ Uses public_token/public_key (Dashboard -> My account -> API).
1583
+ """
1584
+ headers = self._build_gateway_headers()
1585
+
1586
+ logger.info("Getting wallet balance")
1587
+
1588
+ response = self._api_request_with_retry(
1589
+ "POST",
1590
+ f"{self._gateway_base_url}/getBalance",
1591
+ headers=headers,
1592
+ data={},
1593
+ )
1594
+ response.raise_for_status()
1595
+
1596
+ data = response.json()
1597
+ code = data.get("code")
1598
+
1599
+ if code != 200:
1600
+ msg = extract_error_message(data)
1601
+ raise_for_code(f"Get wallet balance failed: {msg}", code=code, payload=data)
1602
+
1603
+ return data.get("data", {})
1604
+
1605
+ def get_proxy_expiration(
1606
+ self,
1607
+ ips: Union[str, List[str]],
1608
+ proxy_type: int,
1609
+ ) -> Dict[str, Any]:
1610
+ """
1611
+ Get expiration time for specific proxy IPs.
1612
+
1613
+ Args:
1614
+ ips: Single IP or list of IPs to check.
1615
+ proxy_type: Proxy type (1=ISP, 2=Datacenter).
1616
+
1617
+ Returns:
1618
+ Dict with expiration information.
1619
+
1620
+ Example:
1621
+ >>> result = client.get_proxy_expiration("123.45.67.89", proxy_type=1)
1622
+ >>> print(result)
1623
+ """
1624
+ self._require_public_credentials()
1625
+
1626
+ # Convert list to comma-separated string
1627
+ if isinstance(ips, list):
1628
+ ips = ",".join(ips)
1629
+
1630
+ params = {
1631
+ "token": self.public_token,
1632
+ "key": self.public_key,
1633
+ "proxy_type": str(proxy_type),
1634
+ "ips": ips,
1635
+ }
1636
+
1637
+ logger.info(f"Getting proxy expiration: {ips}")
1638
+
1639
+ response = self._api_request_with_retry(
1640
+ "GET",
1641
+ self._proxy_expiration_url,
1642
+ params=params,
1643
+ )
1644
+ response.raise_for_status()
1645
+
1646
+ data = response.json()
1647
+
1648
+ if isinstance(data, dict):
1649
+ code = data.get("code")
1650
+ if code is not None and code != 200:
1651
+ msg = extract_error_message(data)
1652
+ raise_for_code(f"Get expiration error: {msg}", code=code, payload=data)
1653
+
1654
+ return data.get("data", data)
1655
+
1656
+ return data
1657
+
1658
+ # =========================================================================
1659
+ # Location API Methods (Country/State/City/ASN functions)
1660
+ # =========================================================================
871
1661
  def list_countries(
872
1662
  self, proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL
873
1663
  ) -> List[Dict[str, Any]]:
@@ -978,7 +1768,11 @@ class ThordataClient:
978
1768
  logger.debug(f"Locations API request: {url}")
979
1769
 
980
1770
  # Use requests.get directly (no proxy needed for this API)
981
- response = self._api_session.get(url, params=params, timeout=30)
1771
+ response = self._api_request_with_retry(
1772
+ "GET",
1773
+ url,
1774
+ params=params,
1775
+ )
982
1776
  response.raise_for_status()
983
1777
 
984
1778
  data = response.json()
@@ -998,9 +1792,8 @@ class ThordataClient:
998
1792
  return []
999
1793
 
1000
1794
  # =========================================================================
1001
- # Helper Methods
1795
+ # Helper Methods (Internal utility functions)
1002
1796
  # =========================================================================
1003
-
1004
1797
  def _require_public_credentials(self) -> None:
1005
1798
  """Ensure public API credentials are available."""
1006
1799
  if not self.public_token or not self.public_key:
@@ -1009,6 +1802,185 @@ class ThordataClient:
1009
1802
  "Please provide them when initializing ThordataClient."
1010
1803
  )
1011
1804
 
1805
+ def _get_proxy_endpoint_overrides(
1806
+ self, product: ProxyProduct
1807
+ ) -> tuple[Optional[str], Optional[int], str]:
1808
+ """
1809
+ Read proxy endpoint overrides from env.
1810
+
1811
+ Priority:
1812
+ 1) THORDATA_<PRODUCT>_PROXY_HOST/PORT/PROTOCOL
1813
+ 2) THORDATA_PROXY_HOST/PORT/PROTOCOL
1814
+ 3) defaults (host/port None => ProxyConfig will use its product defaults)
1815
+ """
1816
+ prefix = product.value.upper() # RESIDENTIAL / DATACENTER / MOBILE / ISP
1817
+
1818
+ host = os.getenv(f"THORDATA_{prefix}_PROXY_HOST") or os.getenv(
1819
+ "THORDATA_PROXY_HOST"
1820
+ )
1821
+ port_raw = os.getenv(f"THORDATA_{prefix}_PROXY_PORT") or os.getenv(
1822
+ "THORDATA_PROXY_PORT"
1823
+ )
1824
+ protocol = (
1825
+ os.getenv(f"THORDATA_{prefix}_PROXY_PROTOCOL")
1826
+ or os.getenv("THORDATA_PROXY_PROTOCOL")
1827
+ or "http"
1828
+ )
1829
+
1830
+ port: Optional[int] = None
1831
+ if port_raw:
1832
+ try:
1833
+ port = int(port_raw)
1834
+ except ValueError:
1835
+ port = None
1836
+
1837
+ return host or None, port, protocol
1838
+
1839
+ def _get_default_proxy_config_from_env(self) -> Optional[ProxyConfig]:
1840
+ """
1841
+ Try to build a default ProxyConfig from env vars.
1842
+
1843
+ Priority order:
1844
+ 1) Residential
1845
+ 2) Datacenter
1846
+ 3) Mobile
1847
+ """
1848
+ # Residential
1849
+ u = os.getenv("THORDATA_RESIDENTIAL_USERNAME")
1850
+ p = os.getenv("THORDATA_RESIDENTIAL_PASSWORD")
1851
+ if u and p:
1852
+ host, port, protocol = self._get_proxy_endpoint_overrides(
1853
+ ProxyProduct.RESIDENTIAL
1854
+ )
1855
+ return ProxyConfig(
1856
+ username=u,
1857
+ password=p,
1858
+ product=ProxyProduct.RESIDENTIAL,
1859
+ host=host,
1860
+ port=port,
1861
+ protocol=protocol,
1862
+ )
1863
+
1864
+ # Datacenter
1865
+ u = os.getenv("THORDATA_DATACENTER_USERNAME")
1866
+ p = os.getenv("THORDATA_DATACENTER_PASSWORD")
1867
+ if u and p:
1868
+ host, port, protocol = self._get_proxy_endpoint_overrides(
1869
+ ProxyProduct.DATACENTER
1870
+ )
1871
+ return ProxyConfig(
1872
+ username=u,
1873
+ password=p,
1874
+ product=ProxyProduct.DATACENTER,
1875
+ host=host,
1876
+ port=port,
1877
+ protocol=protocol,
1878
+ )
1879
+
1880
+ # Mobile
1881
+ u = os.getenv("THORDATA_MOBILE_USERNAME")
1882
+ p = os.getenv("THORDATA_MOBILE_PASSWORD")
1883
+ if u and p:
1884
+ host, port, protocol = self._get_proxy_endpoint_overrides(
1885
+ ProxyProduct.MOBILE
1886
+ )
1887
+ return ProxyConfig(
1888
+ username=u,
1889
+ password=p,
1890
+ product=ProxyProduct.MOBILE,
1891
+ host=host,
1892
+ port=port,
1893
+ protocol=protocol,
1894
+ )
1895
+
1896
+ return None
1897
+
1898
+ def _build_gateway_headers(self) -> Dict[str, str]:
1899
+ """
1900
+ Build headers for legacy gateway-style endpoints.
1901
+
1902
+ IMPORTANT:
1903
+ - SDK does NOT expose "sign/apiKey" as a separate credential model.
1904
+ - Values ALWAYS come from public_token/public_key.
1905
+ - Some backend endpoints may still expect header field names "sign" and "apiKey".
1906
+ """
1907
+ self._require_public_credentials()
1908
+ return {
1909
+ "sign": self.public_token or "",
1910
+ "apiKey": self.public_key or "",
1911
+ "Content-Type": "application/x-www-form-urlencoded",
1912
+ }
1913
+
1914
+ def _proxy_request_with_proxy_manager(
1915
+ self,
1916
+ method: str,
1917
+ url: str,
1918
+ *,
1919
+ proxy_config: ProxyConfig,
1920
+ timeout: int,
1921
+ headers: Optional[Dict[str, str]] = None,
1922
+ params: Optional[Dict[str, Any]] = None,
1923
+ data: Any = None,
1924
+ ) -> requests.Response:
1925
+ """
1926
+ Proxy Network request implemented via urllib3.ProxyManager.
1927
+
1928
+ This is required to reliably support HTTPS proxy endpoints like:
1929
+ https://<endpoint>.pr.thordata.net:9999
1930
+ """
1931
+ # Build final URL (include query params)
1932
+ req = requests.Request(method=method.upper(), url=url, params=params)
1933
+ prepped = self._proxy_session.prepare_request(req)
1934
+ final_url = prepped.url or url
1935
+
1936
+ proxy_url = proxy_config.build_proxy_endpoint()
1937
+ proxy_headers = urllib3.make_headers(
1938
+ proxy_basic_auth=proxy_config.build_proxy_basic_auth()
1939
+ )
1940
+
1941
+ pm = urllib3.ProxyManager(
1942
+ proxy_url,
1943
+ proxy_headers=proxy_headers,
1944
+ proxy_ssl_context=(
1945
+ ssl.create_default_context()
1946
+ if proxy_url.startswith("https://")
1947
+ else None
1948
+ ),
1949
+ )
1950
+
1951
+ # Encode form data if dict
1952
+ body = None
1953
+ req_headers = dict(headers or {})
1954
+ if data is not None:
1955
+ if isinstance(data, dict):
1956
+ # form-urlencoded
1957
+ body = urlencode({k: str(v) for k, v in data.items()})
1958
+ req_headers.setdefault(
1959
+ "Content-Type", "application/x-www-form-urlencoded"
1960
+ )
1961
+ else:
1962
+ body = data
1963
+
1964
+ http_resp = pm.request(
1965
+ method.upper(),
1966
+ final_url,
1967
+ body=body,
1968
+ headers=req_headers or None,
1969
+ timeout=urllib3.Timeout(connect=timeout, read=timeout),
1970
+ retries=False,
1971
+ preload_content=True,
1972
+ )
1973
+
1974
+ # Convert urllib3 response -> requests.Response (keep your API stable)
1975
+ r = requests.Response()
1976
+ r.status_code = int(getattr(http_resp, "status", 0) or 0)
1977
+ r._content = http_resp.data or b""
1978
+ r.url = final_url
1979
+ r.headers = requests.structures.CaseInsensitiveDict(
1980
+ dict(http_resp.headers or {})
1981
+ )
1982
+ return r
1983
+
1012
1984
  def _request_with_retry(
1013
1985
  self, method: str, url: str, **kwargs: Any
1014
1986
  ) -> requests.Response: