thordata-sdk 0.8.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
thordata/__init__.py CHANGED
@@ -35,7 +35,7 @@ Async Usage:
35
35
  >>> asyncio.run(main())
36
36
  """
37
37
 
38
- __version__ = "0.8.0"
38
+ __version__ = "1.0.0"
39
39
  __author__ = "Thordata Developer Team"
40
40
  __email__ = "support@thordata.com"
41
41
 
@@ -0,0 +1,76 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ from pathlib import Path
6
+ from typing import Any, Iterable, Optional
7
+
8
+ try:
9
+ from dotenv import load_dotenv
10
+ except Exception: # pragma: no cover
11
+ load_dotenv = None
12
+
13
+
14
+ def load_env() -> None:
15
+ """Load .env from repo root if python-dotenv is installed."""
16
+ if load_dotenv is None:
17
+ return
18
+ repo_root = Path(__file__).resolve().parents[2]
19
+ load_dotenv(dotenv_path=repo_root / ".env")
20
+
21
+
22
+ def env(name: str) -> str:
23
+ return (os.getenv(name) or "").strip()
24
+
25
+
26
+ def skip_if_missing(required: Iterable[str], *, tip: Optional[str] = None) -> bool:
27
+ missing = [k for k in required if not env(k)]
28
+ if not missing:
29
+ return False
30
+ print("Skipping live example: missing env:", ", ".join(missing))
31
+ if tip:
32
+ print(tip)
33
+ else:
34
+ print("Tip: copy .env.example to .env and fill values, then re-run.")
35
+ return True
36
+
37
+
38
+ def parse_json_env(name: str, default: str = "{}") -> Any:
39
+ raw = env(name) or default
40
+ return json.loads(raw)
41
+
42
+
43
+ def normalize_task_parameters(raw: Any) -> dict[str, Any]:
44
+ """Accept {..} or [{..}] and return a single dict for create_scraper_task(parameters=...)."""
45
+ if isinstance(raw, list):
46
+ if not raw:
47
+ raise ValueError("Task parameters JSON array must not be empty")
48
+ raw = raw[0]
49
+ if not isinstance(raw, dict):
50
+ raise ValueError("Task parameters must be a JSON object (or array of objects)")
51
+ return raw
52
+
53
+
54
+ def output_dir() -> Path:
55
+ """Return output dir for examples; defaults to examples/output (ignored by git)."""
56
+ repo_root = Path(__file__).resolve().parents[2]
57
+ d = env("THORDATA_OUTPUT_DIR") or str(repo_root / "examples" / "output")
58
+ p = Path(d)
59
+ p.mkdir(parents=True, exist_ok=True)
60
+ return p
61
+
62
+
63
+ def write_text(filename: str, content: str) -> Path:
64
+ p = output_dir() / filename
65
+ p.write_text(content, encoding="utf-8", errors="replace")
66
+ return p
67
+
68
+
69
+ def write_json(filename: str, data: Any) -> Path:
70
+ p = output_dir() / filename
71
+ p.write_text(
72
+ json.dumps(data, ensure_ascii=False, indent=2),
73
+ encoding="utf-8",
74
+ errors="replace",
75
+ )
76
+ return p
thordata/_utils.py CHANGED
@@ -185,23 +185,3 @@ def build_user_agent(sdk_version: str, http_client: str) -> str:
185
185
  py = platform.python_version()
186
186
  system = platform.system()
187
187
  return f"thordata-python-sdk/{sdk_version} (python {py}; {system}; {http_client})"
188
-
189
-
190
- def build_sign_headers(sign: str, api_key: str) -> Dict[str, str]:
191
- """
192
- Build headers for Public API NEW (sign + apiKey authentication).
193
-
194
- This is a different authentication system from token+key.
195
-
196
- Args:
197
- sign: The sign value from Dashboard (immutable).
198
- api_key: The apiKey value from Dashboard (can be changed).
199
-
200
- Returns:
201
- Headers dict with sign, apiKey, and Content-Type.
202
- """
203
- return {
204
- "sign": sign,
205
- "apiKey": api_key,
206
- "Content-Type": "application/x-www-form-urlencoded",
207
- }
thordata/async_client.py CHANGED
@@ -25,7 +25,7 @@ from __future__ import annotations
25
25
  import asyncio
26
26
  import logging
27
27
  import os
28
- from datetime import date, datetime
28
+ from datetime import date
29
29
  from typing import Any, Dict, List, Optional, Union
30
30
 
31
31
  import aiohttp
@@ -35,7 +35,6 @@ from ._utils import (
35
35
  build_auth_headers,
36
36
  build_builder_headers,
37
37
  build_public_api_headers,
38
- build_sign_headers,
39
38
  build_user_agent,
40
39
  decode_base64_image,
41
40
  extract_error_message,
@@ -51,14 +50,15 @@ from .exceptions import (
51
50
  from .models import (
52
51
  CommonSettings,
53
52
  ProxyConfig,
53
+ ProxyProduct,
54
54
  ProxyServer,
55
- ProxyUser,
56
55
  ProxyUserList,
57
56
  ScraperTaskConfig,
58
57
  SerpRequest,
59
58
  UniversalScrapeRequest,
60
59
  UsageStatistics,
61
60
  VideoTaskConfig,
61
+ WhitelistProxyConfig,
62
62
  )
63
63
  from .retry import RetryConfig
64
64
 
@@ -100,8 +100,6 @@ class AsyncThordataClient:
100
100
  scraper_token: str,
101
101
  public_token: Optional[str] = None,
102
102
  public_key: Optional[str] = None,
103
- sign: Optional[str] = None,
104
- api_key: Optional[str] = None,
105
103
  proxy_host: str = "pr.thordata.net",
106
104
  proxy_port: int = 9999,
107
105
  timeout: int = 30,
@@ -121,18 +119,9 @@ class AsyncThordataClient:
121
119
  self.public_token = public_token
122
120
  self.public_key = public_key
123
121
 
124
- # Automatic Fallback Logic: If sign/api_key is not provided, try using public_token/key
125
- self.sign = sign or os.getenv("THORDATA_SIGN") or self.public_token
126
- self.api_key = api_key or os.getenv("THORDATA_API_KEY") or self.public_key
127
-
128
- # Public API authentication
129
- self.sign = sign or os.getenv("THORDATA_SIGN")
130
- self.api_key = api_key or os.getenv("THORDATA_API_KEY")
131
-
132
122
  # Proxy configuration
133
123
  self._proxy_host = proxy_host
134
124
  self._proxy_port = proxy_port
135
- self._default_timeout = aiohttp.ClientTimeout(total=timeout)
136
125
 
137
126
  # Timeout configuration
138
127
  self._default_timeout = aiohttp.ClientTimeout(total=timeout)
@@ -141,19 +130,13 @@ class AsyncThordataClient:
141
130
  # Retry configuration
142
131
  self._retry_config = retry_config or RetryConfig()
143
132
 
144
- # Authentication mode
133
+ # Authentication mode (for scraping APIs)
145
134
  self._auth_mode = auth_mode.lower()
146
135
  if self._auth_mode not in ("bearer", "header_token"):
147
136
  raise ThordataConfigError(
148
137
  f"Invalid auth_mode: {auth_mode}. Must be 'bearer' or 'header_token'."
149
138
  )
150
139
 
151
- # Pre-calculate proxy auth
152
- self._proxy_url = f"http://{proxy_host}:{proxy_port}"
153
- self._proxy_auth = aiohttp.BasicAuth(
154
- login=f"td-customer-{scraper_token}", password=""
155
- )
156
-
157
140
  # Base URLs (allow override via args or env vars for testing and custom routing)
158
141
  scraperapi_base = (
159
142
  scraperapi_base_url
@@ -179,6 +162,7 @@ class AsyncThordataClient:
179
162
  or self.LOCATIONS_URL
180
163
  ).rstrip("/")
181
164
 
165
+ # Keep these env overrides for now
182
166
  gateway_base = os.getenv(
183
167
  "THORDATA_GATEWAY_BASE_URL", "https://api.thordata.com/api/gateway"
184
168
  )
@@ -193,8 +177,11 @@ class AsyncThordataClient:
193
177
  self._builder_url = f"{scraperapi_base}/builder"
194
178
  self._video_builder_url = f"{scraperapi_base}/video_builder"
195
179
  self._universal_url = f"{universalapi_base}/request"
180
+
196
181
  self._status_url = f"{web_scraper_api_base}/tasks-status"
197
182
  self._download_url = f"{web_scraper_api_base}/tasks-download"
183
+ self._list_url = f"{web_scraper_api_base}/tasks-list"
184
+
198
185
  self._locations_base_url = locations_base
199
186
  self._usage_stats_url = (
200
187
  f"{locations_base.replace('/locations', '')}/account/usage-statistics"
@@ -202,16 +189,17 @@ class AsyncThordataClient:
202
189
  self._proxy_users_url = (
203
190
  f"{locations_base.replace('/locations', '')}/proxy-users"
204
191
  )
192
+
205
193
  whitelist_base = os.getenv(
206
194
  "THORDATA_WHITELIST_BASE_URL", "https://api.thordata.com/api"
207
195
  )
208
196
  self._whitelist_url = f"{whitelist_base}/whitelisted-ips"
197
+
209
198
  proxy_api_base = os.getenv(
210
199
  "THORDATA_PROXY_API_BASE_URL", "https://api.thordata.com/api"
211
200
  )
212
201
  self._proxy_list_url = f"{proxy_api_base}/proxy/proxy-list"
213
202
  self._proxy_expiration_url = f"{proxy_api_base}/proxy/expiration-time"
214
- self._list_url = f"{web_scraper_api_base}/tasks-list"
215
203
 
216
204
  # Session initialized lazily
217
205
  self._session: Optional[aiohttp.ClientSession] = None
@@ -271,11 +259,26 @@ class AsyncThordataClient:
271
259
 
272
260
  logger.debug(f"Async Proxy GET: {url}")
273
261
 
274
- if proxy_config:
275
- proxy_url, proxy_auth = proxy_config.to_aiohttp_config()
276
- else:
277
- proxy_url = self._proxy_url
278
- proxy_auth = self._proxy_auth
262
+ if proxy_config is None:
263
+ proxy_config = self._get_default_proxy_config_from_env()
264
+
265
+ if proxy_config is None:
266
+ raise ThordataConfigError(
267
+ "Proxy credentials are missing. "
268
+ "Pass proxy_config=ProxyConfig(username=..., password=..., product=...) "
269
+ "or set THORDATA_RESIDENTIAL_USERNAME/THORDATA_RESIDENTIAL_PASSWORD (or DATACENTER/MOBILE)."
270
+ )
271
+
272
+ # aiohttp has limited support for "https://" proxies (TLS to proxy / TLS-in-TLS).
273
+ # Your account's proxy endpoint requires HTTPS proxy, so we explicitly block here
274
+ # to avoid confusing "it always fails" behavior.
275
+ if getattr(proxy_config, "protocol", "http").lower() == "https":
276
+ raise ThordataConfigError(
277
+ "Proxy Network requires an HTTPS proxy endpoint (TLS to proxy) for your account. "
278
+ "aiohttp support for 'https://' proxies is limited and may fail. "
279
+ "Please use ThordataClient.get/post (sync client) for Proxy Network requests."
280
+ )
281
+ proxy_url, proxy_auth = proxy_config.to_aiohttp_config()
279
282
 
280
283
  try:
281
284
  return await session.get(
@@ -312,11 +315,26 @@ class AsyncThordataClient:
312
315
 
313
316
  logger.debug(f"Async Proxy POST: {url}")
314
317
 
315
- if proxy_config:
316
- proxy_url, proxy_auth = proxy_config.to_aiohttp_config()
317
- else:
318
- proxy_url = self._proxy_url
319
- proxy_auth = self._proxy_auth
318
+ if proxy_config is None:
319
+ proxy_config = self._get_default_proxy_config_from_env()
320
+
321
+ if proxy_config is None:
322
+ raise ThordataConfigError(
323
+ "Proxy credentials are missing. "
324
+ "Pass proxy_config=ProxyConfig(username=..., password=..., product=...) "
325
+ "or set THORDATA_RESIDENTIAL_USERNAME/THORDATA_RESIDENTIAL_PASSWORD (or DATACENTER/MOBILE)."
326
+ )
327
+
328
+ # aiohttp has limited support for "https://" proxies (TLS to proxy / TLS-in-TLS).
329
+ # Your account's proxy endpoint requires HTTPS proxy, so we explicitly block here
330
+ # to avoid confusing "it always fails" behavior.
331
+ if getattr(proxy_config, "protocol", "http").lower() == "https":
332
+ raise ThordataConfigError(
333
+ "Proxy Network requires an HTTPS proxy endpoint (TLS to proxy) for your account. "
334
+ "aiohttp support for 'https://' proxies is limited and may fail. "
335
+ "Please use ThordataClient.get/post (sync client) for Proxy Network requests."
336
+ )
337
+ proxy_url, proxy_auth = proxy_config.to_aiohttp_config()
320
338
 
321
339
  try:
322
340
  return await session.post(
@@ -973,23 +991,14 @@ class AsyncThordataClient:
973
991
 
974
992
  async def get_residential_balance(self) -> Dict[str, Any]:
975
993
  """
976
- Get residential proxy balance (Public API NEW).
977
-
978
- Requires sign and apiKey credentials.
994
+ Get residential proxy balance.
979
995
 
980
- Returns:
981
- Dict with 'balance' (bytes) and 'expire_time' (timestamp).
996
+ Uses public_token/public_key.
982
997
  """
983
- if not self.sign or not self.api_key:
984
- raise ThordataConfigError(
985
- "sign and api_key are required for Public API NEW. "
986
- "Set THORDATA_SIGN and THORDATA_API_KEY environment variables."
987
- )
988
-
989
998
  session = self._get_session()
990
- headers = build_sign_headers(self.sign, self.api_key)
999
+ headers = self._build_gateway_headers()
991
1000
 
992
- logger.info("Async getting residential proxy balance (API NEW)")
1001
+ logger.info("Async getting residential proxy balance")
993
1002
 
994
1003
  try:
995
1004
  async with session.post(
@@ -1025,26 +1034,13 @@ class AsyncThordataClient:
1025
1034
  end_time: Union[str, int],
1026
1035
  ) -> Dict[str, Any]:
1027
1036
  """
1028
- Get residential proxy usage records (Public API NEW).
1029
-
1030
- Args:
1031
- start_time: Start timestamp (Unix timestamp or YYYY-MM-DD HH:MM:SS).
1032
- end_time: End timestamp (Unix timestamp or YYYY-MM-DD HH:MM:SS).
1037
+ Get residential proxy usage records.
1033
1038
 
1034
- Returns:
1035
- Dict with usage data including 'all_flow', 'all_used_flow', 'data' list.
1039
+ Uses public_token/public_key.
1036
1040
  """
1037
- if not self.sign or not self.api_key:
1038
- raise ThordataConfigError(
1039
- "sign and api_key are required for Public API NEW."
1040
- )
1041
-
1042
1041
  session = self._get_session()
1043
- headers = build_sign_headers(self.sign, self.api_key)
1044
- payload = {
1045
- "start_time": str(start_time),
1046
- "end_time": str(end_time),
1047
- }
1042
+ headers = self._build_gateway_headers()
1043
+ payload = {"start_time": str(start_time), "end_time": str(end_time)}
1048
1044
 
1049
1045
  logger.info(f"Async getting residential usage: {start_time} to {end_time}")
1050
1046
 
@@ -1296,20 +1292,14 @@ class AsyncThordataClient:
1296
1292
 
1297
1293
  async def get_isp_regions(self) -> List[Dict[str, Any]]:
1298
1294
  """
1299
- Get available ISP proxy regions (Public API NEW).
1295
+ Get available ISP proxy regions.
1300
1296
 
1301
- Returns:
1302
- List of regions with id, continent, country, city, num, pricing.
1297
+ Uses public_token/public_key.
1303
1298
  """
1304
- if not self.sign or not self.api_key:
1305
- raise ThordataConfigError(
1306
- "sign and api_key are required for Public API NEW."
1307
- )
1308
-
1309
1299
  session = self._get_session()
1310
- headers = build_sign_headers(self.sign, self.api_key)
1300
+ headers = self._build_gateway_headers()
1311
1301
 
1312
- logger.info("Async getting ISP regions (API NEW)")
1302
+ logger.info("Async getting ISP regions")
1313
1303
 
1314
1304
  try:
1315
1305
  async with session.post(
@@ -1341,20 +1331,14 @@ class AsyncThordataClient:
1341
1331
 
1342
1332
  async def list_isp_proxies(self) -> List[Dict[str, Any]]:
1343
1333
  """
1344
- List ISP proxies (Public API NEW).
1334
+ List ISP proxies.
1345
1335
 
1346
- Returns:
1347
- List of ISP proxies with ip, port, user, pwd, startTime, expireTime.
1336
+ Uses public_token/public_key.
1348
1337
  """
1349
- if not self.sign or not self.api_key:
1350
- raise ThordataConfigError(
1351
- "sign and api_key are required for Public API NEW."
1352
- )
1353
-
1354
1338
  session = self._get_session()
1355
- headers = build_sign_headers(self.sign, self.api_key)
1339
+ headers = self._build_gateway_headers()
1356
1340
 
1357
- logger.info("Async listing ISP proxies (API NEW)")
1341
+ logger.info("Async listing ISP proxies")
1358
1342
 
1359
1343
  try:
1360
1344
  async with session.post(
@@ -1386,20 +1370,14 @@ class AsyncThordataClient:
1386
1370
 
1387
1371
  async def get_wallet_balance(self) -> Dict[str, Any]:
1388
1372
  """
1389
- Get wallet balance for ISP proxies (Public API NEW).
1373
+ Get wallet balance for ISP proxies.
1390
1374
 
1391
- Returns:
1392
- Dict with 'walletBalance'.
1375
+ Uses public_token/public_key.
1393
1376
  """
1394
- if not self.sign or not self.api_key:
1395
- raise ThordataConfigError(
1396
- "sign and api_key are required for Public API NEW."
1397
- )
1398
-
1399
1377
  session = self._get_session()
1400
- headers = build_sign_headers(self.sign, self.api_key)
1378
+ headers = self._build_gateway_headers()
1401
1379
 
1402
- logger.info("Async getting wallet balance (API NEW)")
1380
+ logger.info("Async getting wallet balance")
1403
1381
 
1404
1382
  try:
1405
1383
  async with session.post(
@@ -1592,3 +1570,86 @@ class AsyncThordataClient:
1592
1570
  "public_token and public_key are required for this operation. "
1593
1571
  "Please provide them when initializing AsyncThordataClient."
1594
1572
  )
1573
+
1574
+ def _get_proxy_endpoint_overrides(
1575
+ self, product: ProxyProduct
1576
+ ) -> tuple[Optional[str], Optional[int], str]:
1577
+ prefix = product.value.upper()
1578
+
1579
+ host = os.getenv(f"THORDATA_{prefix}_PROXY_HOST") or os.getenv(
1580
+ "THORDATA_PROXY_HOST"
1581
+ )
1582
+ port_raw = os.getenv(f"THORDATA_{prefix}_PROXY_PORT") or os.getenv(
1583
+ "THORDATA_PROXY_PORT"
1584
+ )
1585
+ protocol = (
1586
+ os.getenv(f"THORDATA_{prefix}_PROXY_PROTOCOL")
1587
+ or os.getenv("THORDATA_PROXY_PROTOCOL")
1588
+ or "http"
1589
+ )
1590
+
1591
+ port: Optional[int] = None
1592
+ if port_raw:
1593
+ try:
1594
+ port = int(port_raw)
1595
+ except ValueError:
1596
+ port = None
1597
+
1598
+ return host or None, port, protocol
1599
+
1600
+ def _get_default_proxy_config_from_env(self) -> Optional[ProxyConfig]:
1601
+ u = os.getenv("THORDATA_RESIDENTIAL_USERNAME")
1602
+ p = os.getenv("THORDATA_RESIDENTIAL_PASSWORD")
1603
+ if u and p:
1604
+ host, port, protocol = self._get_proxy_endpoint_overrides(
1605
+ ProxyProduct.RESIDENTIAL
1606
+ )
1607
+ return ProxyConfig(
1608
+ username=u,
1609
+ password=p,
1610
+ product=ProxyProduct.RESIDENTIAL,
1611
+ host=host,
1612
+ port=port,
1613
+ protocol=protocol,
1614
+ )
1615
+
1616
+ u = os.getenv("THORDATA_DATACENTER_USERNAME")
1617
+ p = os.getenv("THORDATA_DATACENTER_PASSWORD")
1618
+ if u and p:
1619
+ host, port, protocol = self._get_proxy_endpoint_overrides(
1620
+ ProxyProduct.DATACENTER
1621
+ )
1622
+ return ProxyConfig(
1623
+ username=u,
1624
+ password=p,
1625
+ product=ProxyProduct.DATACENTER,
1626
+ host=host,
1627
+ port=port,
1628
+ protocol=protocol,
1629
+ )
1630
+
1631
+ u = os.getenv("THORDATA_MOBILE_USERNAME")
1632
+ p = os.getenv("THORDATA_MOBILE_PASSWORD")
1633
+ if u and p:
1634
+ host, port, protocol = self._get_proxy_endpoint_overrides(
1635
+ ProxyProduct.MOBILE
1636
+ )
1637
+ return ProxyConfig(
1638
+ username=u,
1639
+ password=p,
1640
+ product=ProxyProduct.MOBILE,
1641
+ host=host,
1642
+ port=port,
1643
+ protocol=protocol,
1644
+ )
1645
+
1646
+ return None
1647
+
1648
+ def _build_gateway_headers(self) -> Dict[str, str]:
1649
+ """
1650
+ Headers for gateway-style endpoints.
1651
+
1652
+ Per our SDK rule: ONLY public_token/public_key exist.
1653
+ """
1654
+ self._require_public_credentials()
1655
+ return build_public_api_headers(self.public_token or "", self.public_key or "")
thordata/client.py CHANGED
@@ -25,17 +25,19 @@ from __future__ import annotations
25
25
 
26
26
  import logging
27
27
  import os
28
- from datetime import date, datetime
28
+ import ssl
29
+ from datetime import date
29
30
  from typing import Any, Dict, List, Optional, Union
31
+ from urllib.parse import urlencode
30
32
 
31
33
  import requests
34
+ import urllib3
32
35
 
33
36
  from . import __version__ as _sdk_version
34
37
  from ._utils import (
35
38
  build_auth_headers,
36
39
  build_builder_headers,
37
40
  build_public_api_headers,
38
- build_sign_headers,
39
41
  build_user_agent,
40
42
  decode_base64_image,
41
43
  extract_error_message,
@@ -53,13 +55,13 @@ from .models import (
53
55
  ProxyConfig,
54
56
  ProxyProduct,
55
57
  ProxyServer,
56
- ProxyUser,
57
58
  ProxyUserList,
58
59
  ScraperTaskConfig,
59
60
  SerpRequest,
60
61
  UniversalScrapeRequest,
61
62
  UsageStatistics,
62
63
  VideoTaskConfig,
64
+ WhitelistProxyConfig,
63
65
  )
64
66
  from .retry import RetryConfig, with_retry
65
67
 
@@ -104,8 +106,6 @@ class ThordataClient:
104
106
  scraper_token: str,
105
107
  public_token: Optional[str] = None,
106
108
  public_key: Optional[str] = None,
107
- sign: Optional[str] = None,
108
- api_key: Optional[str] = None,
109
109
  proxy_host: str = "pr.thordata.net",
110
110
  proxy_port: int = 9999,
111
111
  timeout: int = 30,
@@ -121,22 +121,14 @@ class ThordataClient:
121
121
  if not scraper_token:
122
122
  raise ThordataConfigError("scraper_token is required")
123
123
 
124
+ # Core credentials
124
125
  self.scraper_token = scraper_token
125
126
  self.public_token = public_token
126
127
  self.public_key = public_key
127
128
 
128
- # Automatic Fallback Logic: If sign/api_key is not provided, try using public_token/key
129
- self.sign = sign or os.getenv("THORDATA_SIGN") or self.public_token
130
- self.api_key = api_key or os.getenv("THORDATA_API_KEY") or self.public_key
131
-
132
- # Public API authentication
133
- self.sign = sign or os.getenv("THORDATA_SIGN")
134
- self.api_key = api_key or os.getenv("THORDATA_API_KEY")
135
-
136
129
  # Proxy configuration
137
130
  self._proxy_host = proxy_host
138
131
  self._proxy_port = proxy_port
139
- self._default_timeout = timeout
140
132
 
141
133
  # Timeout configuration
142
134
  self._default_timeout = timeout
@@ -145,19 +137,14 @@ class ThordataClient:
145
137
  # Retry configuration
146
138
  self._retry_config = retry_config or RetryConfig()
147
139
 
148
- # Authentication mode
140
+ # Authentication mode (for scraping APIs)
149
141
  self._auth_mode = auth_mode.lower()
150
142
  if self._auth_mode not in ("bearer", "header_token"):
151
143
  raise ThordataConfigError(
152
144
  f"Invalid auth_mode: {auth_mode}. Must be 'bearer' or 'header_token'."
153
145
  )
154
146
 
155
- # Build default proxy URL (for basic usage)
156
- self._default_proxy_url = (
157
- f"http://td-customer-{self.scraper_token}:@{proxy_host}:{proxy_port}"
158
- )
159
-
160
- # Sessions:
147
+ # NOTE:
161
148
  # - _proxy_session: used for proxy network traffic to target sites
162
149
  # - _api_session: used for Thordata APIs (SERP/Universal/Tasks/Locations)
163
150
  #
@@ -165,14 +152,9 @@ class ThordataClient:
165
152
  # so developers can rely on system proxy settings (e.g., Clash) via env vars.
166
153
  self._proxy_session = requests.Session()
167
154
  self._proxy_session.trust_env = False
168
- self._proxy_session.proxies = {
169
- "http": self._default_proxy_url,
170
- "https": self._default_proxy_url,
171
- }
172
155
 
173
156
  self._api_session = requests.Session()
174
157
  self._api_session.trust_env = True
175
-
176
158
  self._api_session.headers.update(
177
159
  {"User-Agent": build_user_agent(_sdk_version, "requests")}
178
160
  )
@@ -202,13 +184,13 @@ class ThordataClient:
202
184
  or self.LOCATIONS_URL
203
185
  ).rstrip("/")
204
186
 
187
+ # These URLs exist in your codebase; keep them for now (even if your org later migrates fully to openapi)
205
188
  gateway_base = os.getenv(
206
189
  "THORDATA_GATEWAY_BASE_URL", "https://api.thordata.com/api/gateway"
207
190
  )
208
191
  child_base = os.getenv(
209
192
  "THORDATA_CHILD_BASE_URL", "https://api.thordata.com/api/child"
210
193
  )
211
-
212
194
  self._gateway_base_url = gateway_base
213
195
  self._child_base_url = child_base
214
196
 
@@ -216,25 +198,31 @@ class ThordataClient:
216
198
  self._builder_url = f"{scraperapi_base}/builder"
217
199
  self._video_builder_url = f"{scraperapi_base}/video_builder"
218
200
  self._universal_url = f"{universalapi_base}/request"
201
+
219
202
  self._status_url = f"{web_scraper_api_base}/tasks-status"
220
203
  self._download_url = f"{web_scraper_api_base}/tasks-download"
204
+ self._list_url = f"{web_scraper_api_base}/tasks-list"
205
+
221
206
  self._locations_base_url = locations_base
207
+
208
+ # These 2 lines keep your existing behavior (derive account endpoints from locations_base)
222
209
  self._usage_stats_url = (
223
210
  f"{locations_base.replace('/locations', '')}/account/usage-statistics"
224
211
  )
225
212
  self._proxy_users_url = (
226
213
  f"{locations_base.replace('/locations', '')}/proxy-users"
227
214
  )
215
+
228
216
  whitelist_base = os.getenv(
229
217
  "THORDATA_WHITELIST_BASE_URL", "https://api.thordata.com/api"
230
218
  )
231
219
  self._whitelist_url = f"{whitelist_base}/whitelisted-ips"
220
+
232
221
  proxy_api_base = os.getenv(
233
222
  "THORDATA_PROXY_API_BASE_URL", "https://api.thordata.com/api"
234
223
  )
235
224
  self._proxy_list_url = f"{proxy_api_base}/proxy/proxy-list"
236
225
  self._proxy_expiration_url = f"{proxy_api_base}/proxy/expiration-time"
237
- self._list_url = f"{web_scraper_api_base}/tasks-list"
238
226
 
239
227
  # =========================================================================
240
228
  # Proxy Network Methods (Pure proxy network request functions)
@@ -277,11 +265,37 @@ class ThordataClient:
277
265
 
278
266
  timeout = timeout or self._default_timeout
279
267
 
280
- if proxy_config:
281
- proxies = proxy_config.to_proxies_dict()
282
- kwargs["proxies"] = proxies
268
+ if proxy_config is None:
269
+ proxy_config = self._get_default_proxy_config_from_env()
283
270
 
284
- return self._request_with_retry("GET", url, timeout=timeout, **kwargs)
271
+ if proxy_config is None:
272
+ raise ThordataConfigError(
273
+ "Proxy credentials are missing. "
274
+ "Pass proxy_config=ProxyConfig(username=..., password=..., product=...) "
275
+ "or set THORDATA_RESIDENTIAL_USERNAME/THORDATA_RESIDENTIAL_PASSWORD (or DATACENTER/MOBILE)."
276
+ )
277
+
278
+ kwargs["proxies"] = proxy_config.to_proxies_dict()
279
+
280
+ @with_retry(self._retry_config)
281
+ def _do() -> requests.Response:
282
+ return self._proxy_request_with_proxy_manager(
283
+ "GET",
284
+ url,
285
+ proxy_config=proxy_config,
286
+ timeout=timeout,
287
+ headers=kwargs.pop("headers", None),
288
+ params=kwargs.pop("params", None),
289
+ )
290
+
291
+ try:
292
+ return _do()
293
+ except requests.Timeout as e:
294
+ raise ThordataTimeoutError(
295
+ f"Request timed out: {e}", original_error=e
296
+ ) from e
297
+ except Exception as e:
298
+ raise ThordataNetworkError(f"Request failed: {e}", original_error=e) from e
285
299
 
286
300
  def post(
287
301
  self,
@@ -307,11 +321,38 @@ class ThordataClient:
307
321
 
308
322
  timeout = timeout or self._default_timeout
309
323
 
310
- if proxy_config:
311
- proxies = proxy_config.to_proxies_dict()
312
- kwargs["proxies"] = proxies
324
+ if proxy_config is None:
325
+ proxy_config = self._get_default_proxy_config_from_env()
313
326
 
314
- return self._request_with_retry("POST", url, timeout=timeout, **kwargs)
327
+ if proxy_config is None:
328
+ raise ThordataConfigError(
329
+ "Proxy credentials are missing. "
330
+ "Pass proxy_config=ProxyConfig(username=..., password=..., product=...) "
331
+ "or set THORDATA_RESIDENTIAL_USERNAME/THORDATA_RESIDENTIAL_PASSWORD (or DATACENTER/MOBILE)."
332
+ )
333
+
334
+ kwargs["proxies"] = proxy_config.to_proxies_dict()
335
+
336
+ @with_retry(self._retry_config)
337
+ def _do() -> requests.Response:
338
+ return self._proxy_request_with_proxy_manager(
339
+ "POST",
340
+ url,
341
+ proxy_config=proxy_config,
342
+ timeout=timeout,
343
+ headers=kwargs.pop("headers", None),
344
+ params=kwargs.pop("params", None),
345
+ data=kwargs.pop("data", None),
346
+ )
347
+
348
+ try:
349
+ return _do()
350
+ except requests.Timeout as e:
351
+ raise ThordataTimeoutError(
352
+ f"Request timed out: {e}", original_error=e
353
+ ) from e
354
+ except Exception as e:
355
+ raise ThordataNetworkError(f"Request failed: {e}", original_error=e) from e
315
356
 
316
357
  def build_proxy_url(
317
358
  self,
@@ -1185,27 +1226,13 @@ class ThordataClient:
1185
1226
 
1186
1227
  def get_residential_balance(self) -> Dict[str, Any]:
1187
1228
  """
1188
- Get residential proxy balance (Public API NEW).
1189
-
1190
- Requires sign and apiKey credentials.
1229
+ Get residential proxy balance.
1191
1230
 
1192
- Returns:
1193
- Dict with 'balance' (bytes) and 'expire_time' (timestamp).
1194
-
1195
- Example:
1196
- >>> result = client.get_residential_balance()
1197
- >>> balance_gb = result['balance'] / (1024**3)
1198
- >>> print(f"Balance: {balance_gb:.2f} GB")
1231
+ Uses public_token/public_key (Dashboard -> My account -> API).
1199
1232
  """
1200
- if not self.sign or not self.api_key:
1201
- raise ThordataConfigError(
1202
- "sign and api_key are required for Public API NEW. "
1203
- "Set THORDATA_SIGN and THORDATA_API_KEY environment variables."
1204
- )
1205
-
1206
- headers = build_sign_headers(self.sign, self.api_key)
1233
+ headers = self._build_gateway_headers()
1207
1234
 
1208
- logger.info("Getting residential proxy balance (API NEW)")
1235
+ logger.info("Getting residential proxy balance")
1209
1236
 
1210
1237
  response = self._api_request_with_retry(
1211
1238
  "POST",
@@ -1230,32 +1257,12 @@ class ThordataClient:
1230
1257
  end_time: Union[str, int],
1231
1258
  ) -> Dict[str, Any]:
1232
1259
  """
1233
- Get residential proxy usage records (Public API NEW).
1234
-
1235
- Args:
1236
- start_time: Start timestamp (Unix timestamp or YYYY-MM-DD HH:MM:SS).
1237
- end_time: End timestamp (Unix timestamp or YYYY-MM-DD HH:MM:SS).
1260
+ Get residential proxy usage records.
1238
1261
 
1239
- Returns:
1240
- Dict with usage data including 'all_flow', 'all_used_flow', 'data' list.
1241
-
1242
- Example:
1243
- >>> import time
1244
- >>> end = int(time.time())
1245
- >>> start = end - 7*24*3600 # Last 7 days
1246
- >>> usage = client.get_residential_usage(start, end)
1247
- >>> print(f"Total used: {usage['all_used_flow'] / (1024**3):.2f} GB")
1262
+ Uses public_token/public_key (Dashboard -> My account -> API).
1248
1263
  """
1249
- if not self.sign or not self.api_key:
1250
- raise ThordataConfigError(
1251
- "sign and api_key are required for Public API NEW."
1252
- )
1253
-
1254
- headers = build_sign_headers(self.sign, self.api_key)
1255
- payload = {
1256
- "start_time": str(start_time),
1257
- "end_time": str(end_time),
1258
- }
1264
+ headers = self._build_gateway_headers()
1265
+ payload = {"start_time": str(start_time), "end_time": str(end_time)}
1259
1266
 
1260
1267
  logger.info(f"Getting residential usage: {start_time} to {end_time}")
1261
1268
 
@@ -1516,24 +1523,13 @@ class ThordataClient:
1516
1523
 
1517
1524
  def get_isp_regions(self) -> List[Dict[str, Any]]:
1518
1525
  """
1519
- Get available ISP proxy regions (Public API NEW).
1526
+ Get available ISP proxy regions.
1520
1527
 
1521
- Returns:
1522
- List of regions with id, continent, country, city, num, pricing.
1523
-
1524
- Example:
1525
- >>> regions = client.get_isp_regions()
1526
- >>> for region in regions:
1527
- ... print(f"{region['country']}/{region['city']}: {region['num']} IPs")
1528
+ Uses public_token/public_key (Dashboard -> My account -> API).
1528
1529
  """
1529
- if not self.sign or not self.api_key:
1530
- raise ThordataConfigError(
1531
- "sign and api_key are required for Public API NEW."
1532
- )
1533
-
1534
- headers = build_sign_headers(self.sign, self.api_key)
1530
+ headers = self._build_gateway_headers()
1535
1531
 
1536
- logger.info("Getting ISP regions (API NEW)")
1532
+ logger.info("Getting ISP regions")
1537
1533
 
1538
1534
  response = self._api_request_with_retry(
1539
1535
  "POST",
@@ -1554,24 +1550,13 @@ class ThordataClient:
1554
1550
 
1555
1551
  def list_isp_proxies(self) -> List[Dict[str, Any]]:
1556
1552
  """
1557
- List ISP proxies (Public API NEW).
1558
-
1559
- Returns:
1560
- List of ISP proxies with ip, port, user, pwd, startTime, expireTime.
1553
+ List ISP proxies.
1561
1554
 
1562
- Example:
1563
- >>> proxies = client.list_isp_proxies()
1564
- >>> for proxy in proxies:
1565
- ... print(f"{proxy['ip']}:{proxy['port']} - expires: {proxy['expireTime']}")
1555
+ Uses public_token/public_key (Dashboard -> My account -> API).
1566
1556
  """
1567
- if not self.sign or not self.api_key:
1568
- raise ThordataConfigError(
1569
- "sign and api_key are required for Public API NEW."
1570
- )
1571
-
1572
- headers = build_sign_headers(self.sign, self.api_key)
1557
+ headers = self._build_gateway_headers()
1573
1558
 
1574
- logger.info("Listing ISP proxies (API NEW)")
1559
+ logger.info("Listing ISP proxies")
1575
1560
 
1576
1561
  response = self._api_request_with_retry(
1577
1562
  "POST",
@@ -1592,23 +1577,13 @@ class ThordataClient:
1592
1577
 
1593
1578
  def get_wallet_balance(self) -> Dict[str, Any]:
1594
1579
  """
1595
- Get wallet balance for ISP proxies (Public API NEW).
1580
+ Get wallet balance for ISP proxies.
1596
1581
 
1597
- Returns:
1598
- Dict with 'walletBalance'.
1599
-
1600
- Example:
1601
- >>> result = client.get_wallet_balance()
1602
- >>> print(f"Wallet: ${result['walletBalance']}")
1582
+ Uses public_token/public_key (Dashboard -> My account -> API).
1603
1583
  """
1604
- if not self.sign or not self.api_key:
1605
- raise ThordataConfigError(
1606
- "sign and api_key are required for Public API NEW."
1607
- )
1608
-
1609
- headers = build_sign_headers(self.sign, self.api_key)
1584
+ headers = self._build_gateway_headers()
1610
1585
 
1611
- logger.info("Getting wallet balance (API NEW)")
1586
+ logger.info("Getting wallet balance")
1612
1587
 
1613
1588
  response = self._api_request_with_retry(
1614
1589
  "POST",
@@ -1827,6 +1802,185 @@ class ThordataClient:
1827
1802
  "Please provide them when initializing ThordataClient."
1828
1803
  )
1829
1804
 
1805
+ def _get_proxy_endpoint_overrides(
1806
+ self, product: ProxyProduct
1807
+ ) -> tuple[Optional[str], Optional[int], str]:
1808
+ """
1809
+ Read proxy endpoint overrides from env.
1810
+
1811
+ Priority:
1812
+ 1) THORDATA_<PRODUCT>_PROXY_HOST/PORT/PROTOCOL
1813
+ 2) THORDATA_PROXY_HOST/PORT/PROTOCOL
1814
+ 3) defaults (host/port None => ProxyConfig will use its product defaults)
1815
+ """
1816
+ prefix = product.value.upper() # RESIDENTIAL / DATACENTER / MOBILE / ISP
1817
+
1818
+ host = os.getenv(f"THORDATA_{prefix}_PROXY_HOST") or os.getenv(
1819
+ "THORDATA_PROXY_HOST"
1820
+ )
1821
+ port_raw = os.getenv(f"THORDATA_{prefix}_PROXY_PORT") or os.getenv(
1822
+ "THORDATA_PROXY_PORT"
1823
+ )
1824
+ protocol = (
1825
+ os.getenv(f"THORDATA_{prefix}_PROXY_PROTOCOL")
1826
+ or os.getenv("THORDATA_PROXY_PROTOCOL")
1827
+ or "http"
1828
+ )
1829
+
1830
+ port: Optional[int] = None
1831
+ if port_raw:
1832
+ try:
1833
+ port = int(port_raw)
1834
+ except ValueError:
1835
+ port = None
1836
+
1837
+ return host or None, port, protocol
1838
+
1839
+ def _get_default_proxy_config_from_env(self) -> Optional[ProxyConfig]:
1840
+ """
1841
+ Try to build a default ProxyConfig from env vars.
1842
+
1843
+ Priority order:
1844
+ 1) Residential
1845
+ 2) Datacenter
1846
+ 3) Mobile
1847
+ """
1848
+ # Residential
1849
+ u = os.getenv("THORDATA_RESIDENTIAL_USERNAME")
1850
+ p = os.getenv("THORDATA_RESIDENTIAL_PASSWORD")
1851
+ if u and p:
1852
+ host, port, protocol = self._get_proxy_endpoint_overrides(
1853
+ ProxyProduct.RESIDENTIAL
1854
+ )
1855
+ return ProxyConfig(
1856
+ username=u,
1857
+ password=p,
1858
+ product=ProxyProduct.RESIDENTIAL,
1859
+ host=host,
1860
+ port=port,
1861
+ protocol=protocol,
1862
+ )
1863
+
1864
+ # Datacenter
1865
+ u = os.getenv("THORDATA_DATACENTER_USERNAME")
1866
+ p = os.getenv("THORDATA_DATACENTER_PASSWORD")
1867
+ if u and p:
1868
+ host, port, protocol = self._get_proxy_endpoint_overrides(
1869
+ ProxyProduct.DATACENTER
1870
+ )
1871
+ return ProxyConfig(
1872
+ username=u,
1873
+ password=p,
1874
+ product=ProxyProduct.DATACENTER,
1875
+ host=host,
1876
+ port=port,
1877
+ protocol=protocol,
1878
+ )
1879
+
1880
+ # Mobile
1881
+ u = os.getenv("THORDATA_MOBILE_USERNAME")
1882
+ p = os.getenv("THORDATA_MOBILE_PASSWORD")
1883
+ if u and p:
1884
+ host, port, protocol = self._get_proxy_endpoint_overrides(
1885
+ ProxyProduct.MOBILE
1886
+ )
1887
+ return ProxyConfig(
1888
+ username=u,
1889
+ password=p,
1890
+ product=ProxyProduct.MOBILE,
1891
+ host=host,
1892
+ port=port,
1893
+ protocol=protocol,
1894
+ )
1895
+
1896
+ return None
1897
+
1898
+ def _build_gateway_headers(self) -> Dict[str, str]:
1899
+ """
1900
+ Build headers for legacy gateway-style endpoints.
1901
+
1902
+ IMPORTANT:
1903
+ - SDK does NOT expose "sign/apiKey" as a separate credential model.
1904
+ - Values ALWAYS come from public_token/public_key.
1905
+ - Some backend endpoints may still expect header field names "sign" and "apiKey".
1906
+ """
1907
+ self._require_public_credentials()
1908
+ return {
1909
+ "sign": self.public_token or "",
1910
+ "apiKey": self.public_key or "",
1911
+ "Content-Type": "application/x-www-form-urlencoded",
1912
+ }
1913
+
1914
+ def _proxy_request_with_proxy_manager(
1915
+ self,
1916
+ method: str,
1917
+ url: str,
1918
+ *,
1919
+ proxy_config: ProxyConfig,
1920
+ timeout: int,
1921
+ headers: Optional[Dict[str, str]] = None,
1922
+ params: Optional[Dict[str, Any]] = None,
1923
+ data: Any = None,
1924
+ ) -> requests.Response:
1925
+ """
1926
+ Proxy Network request implemented via urllib3.ProxyManager.
1927
+
1928
+ This is required to reliably support HTTPS proxy endpoints like:
1929
+ https://<endpoint>.pr.thordata.net:9999
1930
+ """
1931
+ # Build final URL (include query params)
1932
+ req = requests.Request(method=method.upper(), url=url, params=params)
1933
+ prepped = self._proxy_session.prepare_request(req)
1934
+ final_url = prepped.url or url
1935
+
1936
+ proxy_url = proxy_config.build_proxy_endpoint()
1937
+ proxy_headers = urllib3.make_headers(
1938
+ proxy_basic_auth=proxy_config.build_proxy_basic_auth()
1939
+ )
1940
+
1941
+ pm = urllib3.ProxyManager(
1942
+ proxy_url,
1943
+ proxy_headers=proxy_headers,
1944
+ proxy_ssl_context=(
1945
+ ssl.create_default_context()
1946
+ if proxy_url.startswith("https://")
1947
+ else None
1948
+ ),
1949
+ )
1950
+
1951
+ # Encode form data if dict
1952
+ body = None
1953
+ req_headers = dict(headers or {})
1954
+ if data is not None:
1955
+ if isinstance(data, dict):
1956
+ # form-urlencoded
1957
+ body = urlencode({k: str(v) for k, v in data.items()})
1958
+ req_headers.setdefault(
1959
+ "Content-Type", "application/x-www-form-urlencoded"
1960
+ )
1961
+ else:
1962
+ body = data
1963
+
1964
+ http_resp = pm.request(
1965
+ method.upper(),
1966
+ final_url,
1967
+ body=body,
1968
+ headers=req_headers or None,
1969
+ timeout=urllib3.Timeout(connect=timeout, read=timeout),
1970
+ retries=False,
1971
+ preload_content=True,
1972
+ )
1973
+
1974
+ # Convert urllib3 response -> requests.Response (keep your API stable)
1975
+ r = requests.Response()
1976
+ r.status_code = int(getattr(http_resp, "status", 0) or 0)
1977
+ r._content = http_resp.data or b""
1978
+ r.url = final_url
1979
+ r.headers = requests.structures.CaseInsensitiveDict(
1980
+ dict(http_resp.headers or {})
1981
+ )
1982
+ return r
1983
+
1830
1984
  def _request_with_retry(
1831
1985
  self, method: str, url: str, **kwargs: Any
1832
1986
  ) -> requests.Response:
thordata/models.py CHANGED
@@ -26,11 +26,14 @@ from __future__ import annotations
26
26
 
27
27
  import json
28
28
  import re
29
+ import ssl
29
30
  import uuid
30
31
  from dataclasses import dataclass, field
31
32
  from enum import Enum
32
33
  from typing import Any, Dict, List, Optional, Union
33
34
 
35
+ import urllib3
36
+
34
37
  # =============================================================================
35
38
  # Proxy Product Types
36
39
  # =============================================================================
@@ -137,6 +140,7 @@ class ProxyConfig:
137
140
  if self.host is None:
138
141
  # Set host based on product type
139
142
  host_map = {
143
+ # User&Pass auth entry (docs examples use t.pr.thordata.net for authenticated proxy)
140
144
  ProxyProduct.RESIDENTIAL: "t.pr.thordata.net",
141
145
  ProxyProduct.DATACENTER: "dc.pr.thordata.net",
142
146
  ProxyProduct.MOBILE: "m.pr.thordata.net",
@@ -233,6 +237,14 @@ class ProxyConfig:
233
237
  username = self.build_username()
234
238
  return f"{self.protocol}://{username}:{self.password}@{self.host}:{self.port}"
235
239
 
240
+ def build_proxy_endpoint(self) -> str:
241
+ """Proxy endpoint without credentials, for HTTPS proxy managers."""
242
+ return f"{self.protocol}://{self.host}:{self.port}"
243
+
244
+ def build_proxy_basic_auth(self) -> str:
245
+ """Basic auth string 'username:password' for Proxy-Authorization."""
246
+ return f"{self.build_username()}:{self.password}"
247
+
236
248
  def to_proxies_dict(self) -> Dict[str, str]:
237
249
  """
238
250
  Build a proxies dict suitable for the requests library.
@@ -264,6 +276,39 @@ class ProxyConfig:
264
276
  ) from e
265
277
 
266
278
 
279
+ @dataclass
280
+ class WhitelistProxyConfig:
281
+ """
282
+ Proxy config for IP-whitelist authentication mode (no username/password).
283
+
284
+ In whitelist mode, you do NOT pass proxy auth.
285
+ You only connect to the proxy entry node (host:port).
286
+
287
+ Examples (from docs):
288
+ - Global random: pr.thordata.net:9999
289
+ - Country nodes: us-pr.thordata.net:10000, etc.
290
+ """
291
+
292
+ host: str = "pr.thordata.net"
293
+ port: int = 9999
294
+ protocol: str = "http" # use http for proxy scheme; target URL can still be https
295
+
296
+ def __post_init__(self) -> None:
297
+ if self.protocol not in ("http", "https"):
298
+ raise ValueError("protocol must be 'http' or 'https'")
299
+
300
+ def build_proxy_url(self) -> str:
301
+ return f"{self.protocol}://{self.host}:{self.port}"
302
+
303
+ def to_proxies_dict(self) -> Dict[str, str]:
304
+ url = self.build_proxy_url()
305
+ return {"http": url, "https": url}
306
+
307
+ def to_aiohttp_config(self) -> tuple:
308
+ # aiohttp: proxy_auth should be None in whitelist mode
309
+ return self.build_proxy_url(), None
310
+
311
+
267
312
  @dataclass
268
313
  class StaticISPProxy:
269
314
  """
@@ -545,23 +590,28 @@ class SerpRequest:
545
590
  payload: Dict[str, Any] = {
546
591
  "engine": engine,
547
592
  "num": str(self.num),
548
- # output_format: json=1 for JSON, json=0 for raw HTML
549
- "json": "1" if self.output_format.lower() == "json" else "0",
550
593
  }
551
594
 
595
+ fmt = self.output_format.lower()
596
+ if fmt == "json":
597
+ payload["json"] = "1"
598
+ elif fmt == "html":
599
+ # omit "json" to get raw HTML (per docs: no json -> HTML)
600
+ pass
601
+ else:
602
+ # keep backward compatibility: if user passes "2"/"both"/etc.
603
+ if fmt in ("2", "both", "json+html", "json_html"):
604
+ payload["json"] = "2"
605
+
552
606
  # Handle query parameter (Yandex uses 'text', others use 'q')
553
607
  if engine == "yandex":
554
608
  payload["text"] = self.query
555
609
  else:
556
610
  payload["q"] = self.query
557
611
 
558
- # Set URL / domain based on google_domain or engine default
612
+ # Domain overrides (preferred by docs)
559
613
  if self.google_domain:
560
- # 显式设置 google_domain 参数,同时设置 url
561
614
  payload["google_domain"] = self.google_domain
562
- payload["url"] = self.google_domain
563
- elif engine in self.ENGINE_URLS:
564
- payload["url"] = self.ENGINE_URLS[engine]
565
615
 
566
616
  # Pagination
567
617
  if self.start > 0:
thordata/retry.py CHANGED
@@ -16,6 +16,7 @@ Example:
16
16
 
17
17
  from __future__ import annotations
18
18
 
19
+ import inspect
19
20
  import logging
20
21
  import random
21
22
  import time
@@ -201,8 +202,6 @@ def with_retry(
201
202
 
202
203
  @wraps(func)
203
204
  async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
204
- import asyncio
205
-
206
205
  last_exception: Optional[Exception] = None
207
206
 
208
207
  for attempt in range(config.max_retries + 1):
@@ -238,7 +237,7 @@ def with_retry(
238
237
  # Check if the function is async
239
238
  import asyncio
240
239
 
241
- if asyncio.iscoroutinefunction(func):
240
+ if inspect.iscoroutinefunction(func):
242
241
  return async_wrapper
243
242
  return sync_wrapper
244
243
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: thordata-sdk
3
- Version: 0.8.0
3
+ Version: 1.0.0
4
4
  Summary: The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network.
5
5
  Author-email: Thordata Developer Team <support@thordata.com>
6
6
  License: MIT
@@ -87,14 +87,10 @@ Set environment variables:
87
87
  # Required for Scraper APIs (SERP, Universal, Tasks)
88
88
  export THORDATA_SCRAPER_TOKEN=your_token
89
89
 
90
- # Required for Public/Location APIs (Dashboard -> My Account)
90
+ # Public/Location APIs (Dashboard -> My account -> API)
91
91
  export THORDATA_PUBLIC_TOKEN=your_public_token
92
92
  export THORDATA_PUBLIC_KEY=your_public_key
93
93
 
94
- # Required for Public API NEW (Dashboard -> Public API NEW)
95
- # If not set, SDK falls back to PUBLIC_TOKEN/KEY
96
- export THORDATA_SIGN=your_sign
97
- export THORDATA_API_KEY=your_api_key
98
94
  ```
99
95
 
100
96
  ---
@@ -0,0 +1,15 @@
1
+ thordata/__init__.py,sha256=MILcOkXK1A3U7pCNZxGqVGFMEvdfMJC-Eki2QaNlCdc,3195
2
+ thordata/_example_utils.py,sha256=a7hSQwxS9OqOatvRYm2NkZRakSJ186tNWbHpJ7WskFc,2185
3
+ thordata/_utils.py,sha256=oMPjR6wSmNonU5dJP6NHgYyV0BDO8b_0zxtKuck1htg,4701
4
+ thordata/async_client.py,sha256=XSsT3ariDWSvP32ny_aqf5dTN56T7yMTraaJOgeDi-g,55416
5
+ thordata/client.py,sha256=mUp6pwZKLca9tNljV_Gv4leOrMJ4LQsveiqdKroYp3I,64494
6
+ thordata/demo.py,sha256=zmG4I4cHXnbmQfbr063SeRK7_9IXrfof9QFoGqGTVm8,3806
7
+ thordata/enums.py,sha256=MpZnS9_8sg2vtcFqM6UicB94cKZm5R1t83L3ejNSbLs,8502
8
+ thordata/exceptions.py,sha256=IgMsFuh49cPxU5YofsKP1UhP5A_snhtuN6xD1yZWLiI,10018
9
+ thordata/models.py,sha256=X7wxS6kk18OGX-OLvCdxl0rhpQrAAM3WYLRnjzrWcOM,37690
10
+ thordata/retry.py,sha256=vb05YexCHjiiZTSm8_eK-_3BsCecplGpFjQ3XBwQ8FY,11505
11
+ thordata_sdk-1.0.0.dist-info/licenses/LICENSE,sha256=bAxpWgQIzb-5jl3nhLdOwOJ_vlbHLtSG7yev2B7vioY,1088
12
+ thordata_sdk-1.0.0.dist-info/METADATA,sha256=qI0Weaz5DOzafF6CWv8gKBkozlhX7HVc4-qFQC66sgY,5667
13
+ thordata_sdk-1.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
+ thordata_sdk-1.0.0.dist-info/top_level.txt,sha256=Z8R_07m0lXCCSb1hapL9_nxMtyO3rf_9wOvq4n9u2Hg,9
15
+ thordata_sdk-1.0.0.dist-info/RECORD,,
@@ -1,14 +0,0 @@
1
- thordata/__init__.py,sha256=yaIxW1T_nsCeiPE6iIHunjRzPrtbiN0BciveICgL4dM,3195
2
- thordata/_utils.py,sha256=epF-ewHyk7McdejlhHNAfxhIQ8sN3TlIjUJ9H4HOaUE,5254
3
- thordata/async_client.py,sha256=tC9y1wmcO6RsXCysBo0a0GNRZR3QQjJlCmEwG5HVukQ,53169
4
- thordata/client.py,sha256=VN5Jm3er7fdZDfT2G9g4siBSYNo0ZWj4WOi6TAiAZcE,59638
5
- thordata/demo.py,sha256=zmG4I4cHXnbmQfbr063SeRK7_9IXrfof9QFoGqGTVm8,3806
6
- thordata/enums.py,sha256=MpZnS9_8sg2vtcFqM6UicB94cKZm5R1t83L3ejNSbLs,8502
7
- thordata/exceptions.py,sha256=IgMsFuh49cPxU5YofsKP1UhP5A_snhtuN6xD1yZWLiI,10018
8
- thordata/models.py,sha256=NG4wn1bq4-FC4Aex8vwBOldiHovwg0JzhdtBsI1mL_8,36118
9
- thordata/retry.py,sha256=nkh17ca2TIEcTc-uNo-xcNdJPuxZ_VGlMbC70X6p-_Q,11518
10
- thordata_sdk-0.8.0.dist-info/licenses/LICENSE,sha256=bAxpWgQIzb-5jl3nhLdOwOJ_vlbHLtSG7yev2B7vioY,1088
11
- thordata_sdk-0.8.0.dist-info/METADATA,sha256=IgL554I6mzya9FdbqCxKdvO3r-bywiHJjZi1xdk8W48,5850
12
- thordata_sdk-0.8.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
- thordata_sdk-0.8.0.dist-info/top_level.txt,sha256=Z8R_07m0lXCCSb1hapL9_nxMtyO3rf_9wOvq4n9u2Hg,9
14
- thordata_sdk-0.8.0.dist-info/RECORD,,