thordata-sdk 1.0.1__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
thordata/client.py CHANGED
@@ -27,7 +27,7 @@ import logging
27
27
  import os
28
28
  import ssl
29
29
  from datetime import date
30
- from typing import Any, Dict, List, Optional, Union
30
+ from typing import Any
31
31
  from urllib.parse import urlencode
32
32
 
33
33
  import requests
@@ -61,7 +61,6 @@ from .models import (
61
61
  UniversalScrapeRequest,
62
62
  UsageStatistics,
63
63
  VideoTaskConfig,
64
- WhitelistProxyConfig,
65
64
  )
66
65
  from .retry import RetryConfig, with_retry
67
66
 
@@ -78,18 +77,18 @@ class ThordataClient:
78
77
  def __init__(
79
78
  self,
80
79
  scraper_token: str,
81
- public_token: Optional[str] = None,
82
- public_key: Optional[str] = None,
80
+ public_token: str | None = None,
81
+ public_key: str | None = None,
83
82
  proxy_host: str = "pr.thordata.net",
84
83
  proxy_port: int = 9999,
85
84
  timeout: int = 30,
86
85
  api_timeout: int = 60,
87
- retry_config: Optional[RetryConfig] = None,
86
+ retry_config: RetryConfig | None = None,
88
87
  auth_mode: str = "bearer",
89
- scraperapi_base_url: Optional[str] = None,
90
- universalapi_base_url: Optional[str] = None,
91
- web_scraper_api_base_url: Optional[str] = None,
92
- locations_base_url: Optional[str] = None,
88
+ scraperapi_base_url: str | None = None,
89
+ universalapi_base_url: str | None = None,
90
+ web_scraper_api_base_url: str | None = None,
91
+ locations_base_url: str | None = None,
93
92
  ) -> None:
94
93
  """Initialize the Thordata Client."""
95
94
  if not scraper_token:
@@ -124,7 +123,7 @@ class ThordataClient:
124
123
 
125
124
  # Cache for ProxyManagers (Connection Pooling Fix)
126
125
  # Key: proxy_url (str), Value: urllib3.ProxyManager
127
- self._proxy_managers: Dict[str, urllib3.ProxyManager] = {}
126
+ self._proxy_managers: dict[str, urllib3.ProxyManager] = {}
128
127
 
129
128
  self._api_session = requests.Session()
130
129
  self._api_session.trust_env = True
@@ -201,8 +200,8 @@ class ThordataClient:
201
200
  self,
202
201
  url: str,
203
202
  *,
204
- proxy_config: Optional[ProxyConfig] = None,
205
- timeout: Optional[int] = None,
203
+ proxy_config: ProxyConfig | None = None,
204
+ timeout: int | None = None,
206
205
  **kwargs: Any,
207
206
  ) -> requests.Response:
208
207
  logger.debug(f"Proxy GET request: {url}")
@@ -212,8 +211,8 @@ class ThordataClient:
212
211
  self,
213
212
  url: str,
214
213
  *,
215
- proxy_config: Optional[ProxyConfig] = None,
216
- timeout: Optional[int] = None,
214
+ proxy_config: ProxyConfig | None = None,
215
+ timeout: int | None = None,
217
216
  **kwargs: Any,
218
217
  ) -> requests.Response:
219
218
  logger.debug(f"Proxy POST request: {url}")
@@ -223,8 +222,8 @@ class ThordataClient:
223
222
  self,
224
223
  method: str,
225
224
  url: str,
226
- proxy_config: Optional[ProxyConfig],
227
- timeout: Optional[int],
225
+ proxy_config: ProxyConfig | None,
226
+ timeout: int | None,
228
227
  **kwargs: Any,
229
228
  ) -> requests.Response:
230
229
  timeout = timeout or self._default_timeout
@@ -269,12 +268,12 @@ class ThordataClient:
269
268
  username: str,
270
269
  password: str,
271
270
  *,
272
- country: Optional[str] = None,
273
- state: Optional[str] = None,
274
- city: Optional[str] = None,
275
- session_id: Optional[str] = None,
276
- session_duration: Optional[int] = None,
277
- product: Union[ProxyProduct, str] = ProxyProduct.RESIDENTIAL,
271
+ country: str | None = None,
272
+ state: str | None = None,
273
+ city: str | None = None,
274
+ session_id: str | None = None,
275
+ session_duration: int | None = None,
276
+ product: ProxyProduct | str = ProxyProduct.RESIDENTIAL,
278
277
  ) -> str:
279
278
  config = ProxyConfig(
280
279
  username=username,
@@ -298,9 +297,9 @@ class ThordataClient:
298
297
  method: str,
299
298
  url: str,
300
299
  *,
301
- data: Optional[Dict[str, Any]] = None,
302
- headers: Optional[Dict[str, str]] = None,
303
- params: Optional[Dict[str, Any]] = None,
300
+ data: dict[str, Any] | None = None,
301
+ headers: dict[str, str] | None = None,
302
+ params: dict[str, Any] | None = None,
304
303
  ) -> requests.Response:
305
304
  @with_retry(self._retry_config)
306
305
  def _do_request() -> requests.Response:
@@ -347,8 +346,8 @@ class ThordataClient:
347
346
  *,
348
347
  proxy_config: ProxyConfig,
349
348
  timeout: int,
350
- headers: Optional[Dict[str, str]] = None,
351
- params: Optional[Dict[str, Any]] = None,
349
+ headers: dict[str, str] | None = None,
350
+ params: dict[str, Any] | None = None,
352
351
  data: Any = None,
353
352
  ) -> requests.Response:
354
353
  # 1. Prepare URL and Body
@@ -406,17 +405,17 @@ class ThordataClient:
406
405
  self,
407
406
  query: str,
408
407
  *,
409
- engine: Union[Engine, str] = Engine.GOOGLE,
408
+ engine: Engine | str = Engine.GOOGLE,
410
409
  num: int = 10,
411
- country: Optional[str] = None,
412
- language: Optional[str] = None,
413
- search_type: Optional[str] = None,
414
- device: Optional[str] = None,
415
- render_js: Optional[bool] = None,
416
- no_cache: Optional[bool] = None,
410
+ country: str | None = None,
411
+ language: str | None = None,
412
+ search_type: str | None = None,
413
+ device: str | None = None,
414
+ render_js: bool | None = None,
415
+ no_cache: bool | None = None,
417
416
  output_format: str = "json",
418
417
  **kwargs: Any,
419
- ) -> Dict[str, Any]:
418
+ ) -> dict[str, Any]:
420
419
  engine_str = engine.value if isinstance(engine, Engine) else engine.lower()
421
420
 
422
421
  request = SerpRequest(
@@ -435,7 +434,7 @@ class ThordataClient:
435
434
 
436
435
  return self.serp_search_advanced(request)
437
436
 
438
- def serp_search_advanced(self, request: SerpRequest) -> Dict[str, Any]:
437
+ def serp_search_advanced(self, request: SerpRequest) -> dict[str, Any]:
439
438
  payload = request.to_payload()
440
439
  headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
441
440
 
@@ -475,12 +474,12 @@ class ThordataClient:
475
474
  *,
476
475
  js_render: bool = False,
477
476
  output_format: str = "html",
478
- country: Optional[str] = None,
479
- block_resources: Optional[str] = None,
480
- wait: Optional[int] = None,
481
- wait_for: Optional[str] = None,
477
+ country: str | None = None,
478
+ block_resources: str | None = None,
479
+ wait: int | None = None,
480
+ wait_for: str | None = None,
482
481
  **kwargs: Any,
483
- ) -> Union[str, bytes]:
482
+ ) -> str | bytes:
484
483
  request = UniversalScrapeRequest(
485
484
  url=url,
486
485
  js_render=js_render,
@@ -493,9 +492,7 @@ class ThordataClient:
493
492
  )
494
493
  return self.universal_scrape_advanced(request)
495
494
 
496
- def universal_scrape_advanced(
497
- self, request: UniversalScrapeRequest
498
- ) -> Union[str, bytes]:
495
+ def universal_scrape_advanced(self, request: UniversalScrapeRequest) -> str | bytes:
499
496
  payload = request.to_payload()
500
497
  headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
501
498
 
@@ -522,7 +519,7 @@ class ThordataClient:
522
519
 
523
520
  def _process_universal_response(
524
521
  self, response: requests.Response, output_format: str
525
- ) -> Union[str, bytes]:
522
+ ) -> str | bytes:
526
523
  try:
527
524
  resp_json = response.json()
528
525
  except ValueError:
@@ -549,8 +546,8 @@ class ThordataClient:
549
546
  file_name: str,
550
547
  spider_id: str,
551
548
  spider_name: str,
552
- parameters: Dict[str, Any],
553
- universal_params: Optional[Dict[str, Any]] = None,
549
+ parameters: dict[str, Any],
550
+ universal_params: dict[str, Any] | None = None,
554
551
  ) -> str:
555
552
  config = ScraperTaskConfig(
556
553
  file_name=file_name,
@@ -589,8 +586,8 @@ class ThordataClient:
589
586
  file_name: str,
590
587
  spider_id: str,
591
588
  spider_name: str,
592
- parameters: Dict[str, Any],
593
- common_settings: "CommonSettings",
589
+ parameters: dict[str, Any],
590
+ common_settings: CommonSettings,
594
591
  ) -> str:
595
592
  config = VideoTaskConfig(
596
593
  file_name=file_name,
@@ -675,7 +672,7 @@ class ThordataClient:
675
672
  f"Get result failed: {e}", original_error=e
676
673
  ) from e
677
674
 
678
- def list_tasks(self, page: int = 1, size: int = 20) -> Dict[str, Any]:
675
+ def list_tasks(self, page: int = 1, size: int = 20) -> dict[str, Any]:
679
676
  self._require_public_credentials()
680
677
  headers = build_public_api_headers(
681
678
  self.public_token or "", self.public_key or ""
@@ -721,8 +718,8 @@ class ThordataClient:
721
718
  # =========================================================================
722
719
  def get_usage_statistics(
723
720
  self,
724
- from_date: Union[str, date],
725
- to_date: Union[str, date],
721
+ from_date: str | date,
722
+ to_date: str | date,
726
723
  ) -> UsageStatistics:
727
724
  self._require_public_credentials()
728
725
  if isinstance(from_date, date):
@@ -746,7 +743,7 @@ class ThordataClient:
746
743
  return UsageStatistics.from_dict(data.get("data", data))
747
744
 
748
745
  def list_proxy_users(
749
- self, proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL
746
+ self, proxy_type: ProxyType | int = ProxyType.RESIDENTIAL
750
747
  ) -> ProxyUserList:
751
748
  self._require_public_credentials()
752
749
  pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
@@ -768,10 +765,10 @@ class ThordataClient:
768
765
  self,
769
766
  username: str,
770
767
  password: str,
771
- proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
768
+ proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
772
769
  traffic_limit: int = 0,
773
770
  status: bool = True,
774
- ) -> Dict[str, Any]:
771
+ ) -> dict[str, Any]:
775
772
  self._require_public_credentials()
776
773
  pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
777
774
  headers = build_public_api_headers(
@@ -799,9 +796,9 @@ class ThordataClient:
799
796
  def add_whitelist_ip(
800
797
  self,
801
798
  ip: str,
802
- proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
799
+ proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
803
800
  status: bool = True,
804
- ) -> Dict[str, Any]:
801
+ ) -> dict[str, Any]:
805
802
  self._require_public_credentials()
806
803
  pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
807
804
  headers = build_public_api_headers(
@@ -823,7 +820,7 @@ class ThordataClient:
823
820
  )
824
821
  return data.get("data", {})
825
822
 
826
- def list_proxy_servers(self, proxy_type: int) -> List[ProxyServer]:
823
+ def list_proxy_servers(self, proxy_type: int) -> list[ProxyServer]:
827
824
  self._require_public_credentials()
828
825
  params = {
829
826
  "token": self.public_token,
@@ -849,8 +846,8 @@ class ThordataClient:
849
846
  return [ProxyServer.from_dict(s) for s in server_list]
850
847
 
851
848
  def get_proxy_expiration(
852
- self, ips: Union[str, List[str]], proxy_type: int
853
- ) -> Dict[str, Any]:
849
+ self, ips: str | list[str], proxy_type: int
850
+ ) -> dict[str, Any]:
854
851
  self._require_public_credentials()
855
852
  if isinstance(ips, list):
856
853
  ips = ",".join(ips)
@@ -870,25 +867,25 @@ class ThordataClient:
870
867
  return data.get("data", data)
871
868
 
872
869
  def list_countries(
873
- self, proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL
874
- ) -> List[Dict[str, Any]]:
870
+ self, proxy_type: ProxyType | int = ProxyType.RESIDENTIAL
871
+ ) -> list[dict[str, Any]]:
875
872
  pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
876
873
  return self._get_locations("countries", proxy_type=pt)
877
874
 
878
875
  def list_states(
879
876
  self,
880
877
  country_code: str,
881
- proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
882
- ) -> List[Dict[str, Any]]:
878
+ proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
879
+ ) -> list[dict[str, Any]]:
883
880
  pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
884
881
  return self._get_locations("states", proxy_type=pt, country_code=country_code)
885
882
 
886
883
  def list_cities(
887
884
  self,
888
885
  country_code: str,
889
- state_code: Optional[str] = None,
890
- proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
891
- ) -> List[Dict[str, Any]]:
886
+ state_code: str | None = None,
887
+ proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
888
+ ) -> list[dict[str, Any]]:
892
889
  pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
893
890
  kwargs = {"proxy_type": pt, "country_code": country_code}
894
891
  if state_code:
@@ -898,12 +895,12 @@ class ThordataClient:
898
895
  def list_asn(
899
896
  self,
900
897
  country_code: str,
901
- proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
902
- ) -> List[Dict[str, Any]]:
898
+ proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
899
+ ) -> list[dict[str, Any]]:
903
900
  pt = int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
904
901
  return self._get_locations("asn", proxy_type=pt, country_code=country_code)
905
902
 
906
- def _get_locations(self, endpoint: str, **kwargs: Any) -> List[Dict[str, Any]]:
903
+ def _get_locations(self, endpoint: str, **kwargs: Any) -> list[dict[str, Any]]:
907
904
  self._require_public_credentials()
908
905
  params = {"token": self.public_token, "key": self.public_key}
909
906
  for k, v in kwargs.items():
@@ -928,7 +925,7 @@ class ThordataClient:
928
925
 
929
926
  def _get_proxy_endpoint_overrides(
930
927
  self, product: ProxyProduct
931
- ) -> tuple[Optional[str], Optional[int], str]:
928
+ ) -> tuple[str | None, int | None, str]:
932
929
  prefix = product.value.upper()
933
930
  host = os.getenv(f"THORDATA_{prefix}_PROXY_HOST") or os.getenv(
934
931
  "THORDATA_PROXY_HOST"
@@ -944,7 +941,7 @@ class ThordataClient:
944
941
  port = int(port_raw) if port_raw and port_raw.isdigit() else None
945
942
  return host or None, port, protocol
946
943
 
947
- def _get_default_proxy_config_from_env(self) -> Optional[ProxyConfig]:
944
+ def _get_default_proxy_config_from_env(self) -> ProxyConfig | None:
948
945
  for prod in [
949
946
  ProxyProduct.RESIDENTIAL,
950
947
  ProxyProduct.DATACENTER,
thordata/demo.py CHANGED
@@ -17,11 +17,9 @@ Notes:
17
17
 
18
18
  from __future__ import annotations
19
19
 
20
- import os
21
20
  import runpy
22
21
  import sys
23
22
  from pathlib import Path
24
- from typing import Callable, Dict
25
23
 
26
24
 
27
25
  def _configure_stdio() -> None:
@@ -53,7 +51,7 @@ def _examples_dir() -> Path:
53
51
  return _repo_root() / "examples"
54
52
 
55
53
 
56
- def _demo_map() -> Dict[str, Path]:
54
+ def _demo_map() -> dict[str, Path]:
57
55
  ex = _examples_dir()
58
56
  return {
59
57
  "serp": ex / "demo_serp_api.py",
thordata/exceptions.py CHANGED
@@ -15,7 +15,7 @@ Exception Hierarchy:
15
15
 
16
16
  from __future__ import annotations
17
17
 
18
- from typing import Any, Optional, Set
18
+ from typing import Any
19
19
 
20
20
  # =============================================================================
21
21
  # Base Exception
@@ -63,7 +63,7 @@ class ThordataNetworkError(ThordataError):
63
63
  self,
64
64
  message: str,
65
65
  *,
66
- original_error: Optional[Exception] = None,
66
+ original_error: Exception | None = None,
67
67
  ) -> None:
68
68
  super().__init__(message)
69
69
  self.original_error = original_error
@@ -98,16 +98,16 @@ class ThordataAPIError(ThordataError):
98
98
  """
99
99
 
100
100
  # HTTP status codes that indicate this error type
101
- HTTP_STATUS_CODES: Set[int] = set()
101
+ HTTP_STATUS_CODES: set[int] = set()
102
102
 
103
103
  def __init__(
104
104
  self,
105
105
  message: str,
106
106
  *,
107
- status_code: Optional[int] = None,
108
- code: Optional[int] = None,
107
+ status_code: int | None = None,
108
+ code: int | None = None,
109
109
  payload: Any = None,
110
- request_id: Optional[str] = None,
110
+ request_id: str | None = None,
111
111
  ) -> None:
112
112
  super().__init__(message)
113
113
  self.status_code = status_code
@@ -168,7 +168,7 @@ class ThordataRateLimitError(ThordataAPIError):
168
168
  self,
169
169
  message: str,
170
170
  *,
171
- retry_after: Optional[int] = None,
171
+ retry_after: int | None = None,
172
172
  **kwargs: Any,
173
173
  ) -> None:
174
174
  super().__init__(message, **kwargs)
@@ -223,7 +223,7 @@ class ThordataNotCollectedError(ThordataAPIError):
223
223
  """
224
224
 
225
225
  API_CODES = {300}
226
- HTTP_STATUS_CODES: Set[int] = set()
226
+ HTTP_STATUS_CODES: set[int] = set()
227
227
 
228
228
  @property
229
229
  def is_retryable(self) -> bool:
@@ -238,10 +238,10 @@ class ThordataNotCollectedError(ThordataAPIError):
238
238
  def raise_for_code(
239
239
  message: str,
240
240
  *,
241
- status_code: Optional[int] = None,
242
- code: Optional[int] = None,
241
+ status_code: int | None = None,
242
+ code: int | None = None,
243
243
  payload: Any = None,
244
- request_id: Optional[str] = None,
244
+ request_id: str | None = None,
245
245
  ) -> None:
246
246
  """
247
247
  Factory function to raise the appropriate exception based on status/code.
@@ -266,7 +266,7 @@ def raise_for_code(
266
266
  # Determine the effective error code.
267
267
  # Prefer payload `code` when present and not success (200),
268
268
  # otherwise fall back to HTTP status when it indicates an error.
269
- effective_code: Optional[int] = None
269
+ effective_code: int | None = None
270
270
 
271
271
  if code is not None and code != 200:
272
272
  effective_code = code