thordata-sdk 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
thordata/__init__.py CHANGED
@@ -35,7 +35,7 @@ Async Usage:
35
35
  >>> asyncio.run(main())
36
36
  """
37
37
 
38
- __version__ = "1.0.0"
38
+ __version__ = "1.1.0"
39
39
  __author__ = "Thordata Developer Team"
40
40
  __email__ = "support@thordata.com"
41
41
 
@@ -2,8 +2,9 @@ from __future__ import annotations
2
2
 
3
3
  import json
4
4
  import os
5
+ from collections.abc import Iterable
5
6
  from pathlib import Path
6
- from typing import Any, Iterable, Optional
7
+ from typing import Any
7
8
 
8
9
  try:
9
10
  from dotenv import load_dotenv
@@ -23,7 +24,7 @@ def env(name: str) -> str:
23
24
  return (os.getenv(name) or "").strip()
24
25
 
25
26
 
26
- def skip_if_missing(required: Iterable[str], *, tip: Optional[str] = None) -> bool:
27
+ def skip_if_missing(required: Iterable[str], *, tip: str | None = None) -> bool:
27
28
  missing = [k for k in required if not env(k)]
28
29
  if not missing:
29
30
  return False
thordata/_utils.py CHANGED
@@ -9,7 +9,8 @@ from __future__ import annotations
9
9
  import base64
10
10
  import json
11
11
  import logging
12
- from typing import Any, Dict
12
+ import platform
13
+ from typing import Any
13
14
 
14
15
  logger = logging.getLogger(__name__)
15
16
 
@@ -70,7 +71,7 @@ def decode_base64_image(png_str: str) -> bytes:
70
71
  raise ValueError(f"Failed to decode base64 image: {e}") from e
71
72
 
72
73
 
73
- def build_auth_headers(token: str, mode: str = "bearer") -> Dict[str, str]:
74
+ def build_auth_headers(token: str, mode: str = "bearer") -> dict[str, str]:
74
75
  """
75
76
  Build authorization headers for API requests.
76
77
 
@@ -104,7 +105,7 @@ def build_builder_headers(
104
105
  scraper_token: str,
105
106
  public_token: str,
106
107
  public_key: str,
107
- ) -> Dict[str, str]:
108
+ ) -> dict[str, str]:
108
109
  """
109
110
  Build headers for Web Scraper builder API.
110
111
 
@@ -129,7 +130,7 @@ def build_builder_headers(
129
130
  }
130
131
 
131
132
 
132
- def build_public_api_headers(public_token: str, public_key: str) -> Dict[str, str]:
133
+ def build_public_api_headers(public_token: str, public_key: str) -> dict[str, str]:
133
134
  """
134
135
  Build headers for public API requests (task status, locations, etc.)
135
136
 
@@ -171,17 +172,19 @@ def extract_error_message(payload: Any) -> str:
171
172
 
172
173
  def build_user_agent(sdk_version: str, http_client: str) -> str:
173
174
  """
174
- Build a default User-Agent for the SDK.
175
-
176
- Args:
177
- sdk_version: SDK version string.
178
- http_client: "requests" or "aiohttp" (or any identifier).
179
-
180
- Returns:
181
- A User-Agent string.
175
+ Build a standardized User-Agent for the SDK.
176
+ Format: thordata-python-sdk/{version} python/{py_ver} ({system}/{release}; {machine})
182
177
  """
183
- import platform
184
-
185
- py = platform.python_version()
186
- system = platform.system()
187
- return f"thordata-python-sdk/{sdk_version} (python {py}; {system}; {http_client})"
178
+ py_ver = platform.python_version()
179
+ system = platform.system() or "unknown"
180
+ release = platform.release() or "unknown"
181
+ machine = platform.machine() or "unknown"
182
+
183
+ # Clean up strings to avoid UA parsing issues (remove newlines, etc)
184
+ system = system.replace(";", "").strip()
185
+
186
+ return (
187
+ f"thordata-python-sdk/{sdk_version} "
188
+ f"python/{py_ver} "
189
+ f"({system}/{release}; {machine}; {http_client})"
190
+ )
thordata/async_client.py CHANGED
@@ -26,7 +26,7 @@ import asyncio
26
26
  import logging
27
27
  import os
28
28
  from datetime import date
29
- from typing import Any, Dict, List, Optional, Union
29
+ from typing import Any
30
30
 
31
31
  import aiohttp
32
32
 
@@ -58,7 +58,6 @@ from .models import (
58
58
  UniversalScrapeRequest,
59
59
  UsageStatistics,
60
60
  VideoTaskConfig,
61
- WhitelistProxyConfig,
62
61
  )
63
62
  from .retry import RetryConfig
64
63
 
@@ -98,18 +97,18 @@ class AsyncThordataClient:
98
97
  def __init__(
99
98
  self,
100
99
  scraper_token: str,
101
- public_token: Optional[str] = None,
102
- public_key: Optional[str] = None,
100
+ public_token: str | None = None,
101
+ public_key: str | None = None,
103
102
  proxy_host: str = "pr.thordata.net",
104
103
  proxy_port: int = 9999,
105
104
  timeout: int = 30,
106
105
  api_timeout: int = 60,
107
- retry_config: Optional[RetryConfig] = None,
106
+ retry_config: RetryConfig | None = None,
108
107
  auth_mode: str = "bearer",
109
- scraperapi_base_url: Optional[str] = None,
110
- universalapi_base_url: Optional[str] = None,
111
- web_scraper_api_base_url: Optional[str] = None,
112
- locations_base_url: Optional[str] = None,
108
+ scraperapi_base_url: str | None = None,
109
+ universalapi_base_url: str | None = None,
110
+ web_scraper_api_base_url: str | None = None,
111
+ locations_base_url: str | None = None,
113
112
  ) -> None:
114
113
  """Initialize the Async Thordata Client."""
115
114
  if not scraper_token:
@@ -202,7 +201,7 @@ class AsyncThordataClient:
202
201
  self._proxy_expiration_url = f"{proxy_api_base}/proxy/expiration-time"
203
202
 
204
203
  # Session initialized lazily
205
- self._session: Optional[aiohttp.ClientSession] = None
204
+ self._session: aiohttp.ClientSession | None = None
206
205
 
207
206
  async def __aenter__(self) -> AsyncThordataClient:
208
207
  """Async context manager entry."""
@@ -241,7 +240,7 @@ class AsyncThordataClient:
241
240
  self,
242
241
  url: str,
243
242
  *,
244
- proxy_config: Optional[ProxyConfig] = None,
243
+ proxy_config: ProxyConfig | None = None,
245
244
  **kwargs: Any,
246
245
  ) -> aiohttp.ClientResponse:
247
246
  """
@@ -297,7 +296,7 @@ class AsyncThordataClient:
297
296
  self,
298
297
  url: str,
299
298
  *,
300
- proxy_config: Optional[ProxyConfig] = None,
299
+ proxy_config: ProxyConfig | None = None,
301
300
  **kwargs: Any,
302
301
  ) -> aiohttp.ClientResponse:
303
302
  """
@@ -357,17 +356,17 @@ class AsyncThordataClient:
357
356
  self,
358
357
  query: str,
359
358
  *,
360
- engine: Union[Engine, str] = Engine.GOOGLE,
359
+ engine: Engine | str = Engine.GOOGLE,
361
360
  num: int = 10,
362
- country: Optional[str] = None,
363
- language: Optional[str] = None,
364
- search_type: Optional[str] = None,
365
- device: Optional[str] = None,
366
- render_js: Optional[bool] = None,
367
- no_cache: Optional[bool] = None,
361
+ country: str | None = None,
362
+ language: str | None = None,
363
+ search_type: str | None = None,
364
+ device: str | None = None,
365
+ render_js: bool | None = None,
366
+ no_cache: bool | None = None,
368
367
  output_format: str = "json",
369
368
  **kwargs: Any,
370
- ) -> Dict[str, Any]:
369
+ ) -> dict[str, Any]:
371
370
  """
372
371
  Execute an async SERP search.
373
372
 
@@ -447,7 +446,7 @@ class AsyncThordataClient:
447
446
  original_error=e,
448
447
  ) from e
449
448
 
450
- async def serp_search_advanced(self, request: SerpRequest) -> Dict[str, Any]:
449
+ async def serp_search_advanced(self, request: SerpRequest) -> dict[str, Any]:
451
450
  """
452
451
  Execute an async SERP search using a SerpRequest object.
453
452
  """
@@ -505,12 +504,12 @@ class AsyncThordataClient:
505
504
  *,
506
505
  js_render: bool = False,
507
506
  output_format: str = "html",
508
- country: Optional[str] = None,
509
- block_resources: Optional[str] = None,
510
- wait: Optional[int] = None,
511
- wait_for: Optional[str] = None,
507
+ country: str | None = None,
508
+ block_resources: str | None = None,
509
+ wait: int | None = None,
510
+ wait_for: str | None = None,
512
511
  **kwargs: Any,
513
- ) -> Union[str, bytes]:
512
+ ) -> str | bytes:
514
513
  """
515
514
  Async scrape using Universal API (Web Unlocker).
516
515
 
@@ -541,7 +540,7 @@ class AsyncThordataClient:
541
540
 
542
541
  async def universal_scrape_advanced(
543
542
  self, request: UniversalScrapeRequest
544
- ) -> Union[str, bytes]:
543
+ ) -> str | bytes:
545
544
  """
546
545
  Async scrape using a UniversalScrapeRequest object.
547
546
  """
@@ -600,8 +599,8 @@ class AsyncThordataClient:
600
599
  file_name: str,
601
600
  spider_id: str,
602
601
  spider_name: str,
603
- parameters: Dict[str, Any],
604
- universal_params: Optional[Dict[str, Any]] = None,
602
+ parameters: dict[str, Any],
603
+ universal_params: dict[str, Any] | None = None,
605
604
  ) -> str:
606
605
  """
607
606
  Create an async Web Scraper task.
@@ -659,7 +658,7 @@ class AsyncThordataClient:
659
658
  file_name: str,
660
659
  spider_id: str,
661
660
  spider_name: str,
662
- parameters: Dict[str, Any],
661
+ parameters: dict[str, Any],
663
662
  common_settings: CommonSettings,
664
663
  ) -> str:
665
664
  """
@@ -828,7 +827,7 @@ class AsyncThordataClient:
828
827
  self,
829
828
  page: int = 1,
830
829
  size: int = 20,
831
- ) -> Dict[str, Any]:
830
+ ) -> dict[str, Any]:
832
831
  """
833
832
  List all Web Scraper tasks.
834
833
 
@@ -845,7 +844,7 @@ class AsyncThordataClient:
845
844
  headers = build_public_api_headers(
846
845
  self.public_token or "", self.public_key or ""
847
846
  )
848
- payload: Dict[str, Any] = {}
847
+ payload: dict[str, Any] = {}
849
848
  if page:
850
849
  payload["page"] = str(page)
851
850
  if size:
@@ -921,8 +920,8 @@ class AsyncThordataClient:
921
920
 
922
921
  async def get_usage_statistics(
923
922
  self,
924
- from_date: Union[str, date],
925
- to_date: Union[str, date],
923
+ from_date: str | date,
924
+ to_date: str | date,
926
925
  ) -> UsageStatistics:
927
926
  """
928
927
  Get account usage statistics for a date range.
@@ -989,7 +988,7 @@ class AsyncThordataClient:
989
988
  f"Usage statistics failed: {e}", original_error=e
990
989
  ) from e
991
990
 
992
- async def get_residential_balance(self) -> Dict[str, Any]:
991
+ async def get_residential_balance(self) -> dict[str, Any]:
993
992
  """
994
993
  Get residential proxy balance.
995
994
 
@@ -1030,9 +1029,9 @@ class AsyncThordataClient:
1030
1029
 
1031
1030
  async def get_residential_usage(
1032
1031
  self,
1033
- start_time: Union[str, int],
1034
- end_time: Union[str, int],
1035
- ) -> Dict[str, Any]:
1032
+ start_time: str | int,
1033
+ end_time: str | int,
1034
+ ) -> dict[str, Any]:
1036
1035
  """
1037
1036
  Get residential proxy usage records.
1038
1037
 
@@ -1071,7 +1070,7 @@ class AsyncThordataClient:
1071
1070
  ) from e
1072
1071
 
1073
1072
  async def list_proxy_users(
1074
- self, proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL
1073
+ self, proxy_type: ProxyType | int = ProxyType.RESIDENTIAL
1075
1074
  ) -> ProxyUserList:
1076
1075
  """List all proxy users (sub-accounts)."""
1077
1076
 
@@ -1126,10 +1125,10 @@ class AsyncThordataClient:
1126
1125
  self,
1127
1126
  username: str,
1128
1127
  password: str,
1129
- proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
1128
+ proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
1130
1129
  traffic_limit: int = 0,
1131
1130
  status: bool = True,
1132
- ) -> Dict[str, Any]:
1131
+ ) -> dict[str, Any]:
1133
1132
  """Create a new proxy user (sub-account)."""
1134
1133
  self._require_public_credentials()
1135
1134
  session = self._get_session()
@@ -1181,9 +1180,9 @@ class AsyncThordataClient:
1181
1180
  async def add_whitelist_ip(
1182
1181
  self,
1183
1182
  ip: str,
1184
- proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
1183
+ proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
1185
1184
  status: bool = True,
1186
- ) -> Dict[str, Any]:
1185
+ ) -> dict[str, Any]:
1187
1186
  """
1188
1187
  Add an IP to the whitelist for IP authentication.
1189
1188
  """
@@ -1237,7 +1236,7 @@ class AsyncThordataClient:
1237
1236
  async def list_proxy_servers(
1238
1237
  self,
1239
1238
  proxy_type: int,
1240
- ) -> List[ProxyServer]:
1239
+ ) -> list[ProxyServer]:
1241
1240
  """
1242
1241
  List ISP or Datacenter proxy servers.
1243
1242
  """
@@ -1290,7 +1289,7 @@ class AsyncThordataClient:
1290
1289
  f"List servers failed: {e}", original_error=e
1291
1290
  ) from e
1292
1291
 
1293
- async def get_isp_regions(self) -> List[Dict[str, Any]]:
1292
+ async def get_isp_regions(self) -> list[dict[str, Any]]:
1294
1293
  """
1295
1294
  Get available ISP proxy regions.
1296
1295
 
@@ -1329,7 +1328,7 @@ class AsyncThordataClient:
1329
1328
  f"Get ISP regions failed: {e}", original_error=e
1330
1329
  ) from e
1331
1330
 
1332
- async def list_isp_proxies(self) -> List[Dict[str, Any]]:
1331
+ async def list_isp_proxies(self) -> list[dict[str, Any]]:
1333
1332
  """
1334
1333
  List ISP proxies.
1335
1334
 
@@ -1368,7 +1367,7 @@ class AsyncThordataClient:
1368
1367
  f"List ISP proxies failed: {e}", original_error=e
1369
1368
  ) from e
1370
1369
 
1371
- async def get_wallet_balance(self) -> Dict[str, Any]:
1370
+ async def get_wallet_balance(self) -> dict[str, Any]:
1372
1371
  """
1373
1372
  Get wallet balance for ISP proxies.
1374
1373
 
@@ -1409,9 +1408,9 @@ class AsyncThordataClient:
1409
1408
 
1410
1409
  async def get_proxy_expiration(
1411
1410
  self,
1412
- ips: Union[str, List[str]],
1411
+ ips: str | list[str],
1413
1412
  proxy_type: int,
1414
- ) -> Dict[str, Any]:
1413
+ ) -> dict[str, Any]:
1415
1414
  """
1416
1415
  Get expiration time for specific proxy IPs.
1417
1416
  """
@@ -1465,8 +1464,8 @@ class AsyncThordataClient:
1465
1464
  # =========================================================================
1466
1465
 
1467
1466
  async def list_countries(
1468
- self, proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL
1469
- ) -> List[Dict[str, Any]]:
1467
+ self, proxy_type: ProxyType | int = ProxyType.RESIDENTIAL
1468
+ ) -> list[dict[str, Any]]:
1470
1469
  """List supported countries."""
1471
1470
  return await self._get_locations(
1472
1471
  "countries",
@@ -1478,8 +1477,8 @@ class AsyncThordataClient:
1478
1477
  async def list_states(
1479
1478
  self,
1480
1479
  country_code: str,
1481
- proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
1482
- ) -> List[Dict[str, Any]]:
1480
+ proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
1481
+ ) -> list[dict[str, Any]]:
1483
1482
  """List supported states for a country."""
1484
1483
  return await self._get_locations(
1485
1484
  "states",
@@ -1492,9 +1491,9 @@ class AsyncThordataClient:
1492
1491
  async def list_cities(
1493
1492
  self,
1494
1493
  country_code: str,
1495
- state_code: Optional[str] = None,
1496
- proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
1497
- ) -> List[Dict[str, Any]]:
1494
+ state_code: str | None = None,
1495
+ proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
1496
+ ) -> list[dict[str, Any]]:
1498
1497
  """List supported cities."""
1499
1498
  kwargs = {
1500
1499
  "proxy_type": (
@@ -1510,8 +1509,8 @@ class AsyncThordataClient:
1510
1509
  async def list_asn(
1511
1510
  self,
1512
1511
  country_code: str,
1513
- proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
1514
- ) -> List[Dict[str, Any]]:
1512
+ proxy_type: ProxyType | int = ProxyType.RESIDENTIAL,
1513
+ ) -> list[dict[str, Any]]:
1515
1514
  """List supported ASNs."""
1516
1515
  return await self._get_locations(
1517
1516
  "asn",
@@ -1523,7 +1522,7 @@ class AsyncThordataClient:
1523
1522
 
1524
1523
  async def _get_locations(
1525
1524
  self, endpoint: str, **kwargs: Any
1526
- ) -> List[Dict[str, Any]]:
1525
+ ) -> list[dict[str, Any]]:
1527
1526
  """Internal async locations API call."""
1528
1527
  self._require_public_credentials()
1529
1528
 
@@ -1540,24 +1539,26 @@ class AsyncThordataClient:
1540
1539
  logger.debug(f"Async Locations API: {url}")
1541
1540
 
1542
1541
  # Create temporary session for this request (no proxy needed)
1543
- async with aiohttp.ClientSession(trust_env=True) as temp_session:
1544
- async with temp_session.get(url, params=params) as response:
1545
- response.raise_for_status()
1546
- data = await response.json()
1547
-
1548
- if isinstance(data, dict):
1549
- code = data.get("code")
1550
- if code is not None and code != 200:
1551
- msg = data.get("msg", "")
1552
- raise RuntimeError(
1553
- f"Locations API error ({endpoint}): code={code}, msg={msg}"
1554
- )
1555
- return data.get("data") or []
1542
+ async with (
1543
+ aiohttp.ClientSession(trust_env=True) as temp_session,
1544
+ temp_session.get(url, params=params) as response,
1545
+ ):
1546
+ response.raise_for_status()
1547
+ data = await response.json()
1548
+
1549
+ if isinstance(data, dict):
1550
+ code = data.get("code")
1551
+ if code is not None and code != 200:
1552
+ msg = data.get("msg", "")
1553
+ raise RuntimeError(
1554
+ f"Locations API error ({endpoint}): code={code}, msg={msg}"
1555
+ )
1556
+ return data.get("data") or []
1556
1557
 
1557
- if isinstance(data, list):
1558
- return data
1558
+ if isinstance(data, list):
1559
+ return data
1559
1560
 
1560
- return []
1561
+ return []
1561
1562
 
1562
1563
  # =========================================================================
1563
1564
  # Helper Methods
@@ -1573,7 +1574,7 @@ class AsyncThordataClient:
1573
1574
 
1574
1575
  def _get_proxy_endpoint_overrides(
1575
1576
  self, product: ProxyProduct
1576
- ) -> tuple[Optional[str], Optional[int], str]:
1577
+ ) -> tuple[str | None, int | None, str]:
1577
1578
  prefix = product.value.upper()
1578
1579
 
1579
1580
  host = os.getenv(f"THORDATA_{prefix}_PROXY_HOST") or os.getenv(
@@ -1588,7 +1589,7 @@ class AsyncThordataClient:
1588
1589
  or "http"
1589
1590
  )
1590
1591
 
1591
- port: Optional[int] = None
1592
+ port: int | None = None
1592
1593
  if port_raw:
1593
1594
  try:
1594
1595
  port = int(port_raw)
@@ -1597,7 +1598,7 @@ class AsyncThordataClient:
1597
1598
 
1598
1599
  return host or None, port, protocol
1599
1600
 
1600
- def _get_default_proxy_config_from_env(self) -> Optional[ProxyConfig]:
1601
+ def _get_default_proxy_config_from_env(self) -> ProxyConfig | None:
1601
1602
  u = os.getenv("THORDATA_RESIDENTIAL_USERNAME")
1602
1603
  p = os.getenv("THORDATA_RESIDENTIAL_PASSWORD")
1603
1604
  if u and p:
@@ -1645,7 +1646,7 @@ class AsyncThordataClient:
1645
1646
 
1646
1647
  return None
1647
1648
 
1648
- def _build_gateway_headers(self) -> Dict[str, str]:
1649
+ def _build_gateway_headers(self) -> dict[str, str]:
1649
1650
  """
1650
1651
  Headers for gateway-style endpoints.
1651
1652