thordata-sdk 1.6.0__tar.gz → 1.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {thordata_sdk-1.6.0/src/thordata_sdk.egg-info → thordata_sdk-1.8.0}/PKG-INFO +4 -2
  2. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/README.md +1 -1
  3. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/pyproject.toml +5 -1
  4. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/__init__.py +1 -1
  5. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/async_client.py +76 -10
  6. thordata_sdk-1.8.0/src/thordata/browser/__init__.py +16 -0
  7. thordata_sdk-1.8.0/src/thordata/browser/exceptions.py +23 -0
  8. thordata_sdk-1.8.0/src/thordata/browser/session.py +469 -0
  9. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/client.py +55 -7
  10. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/exceptions.py +10 -1
  11. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/types/serp.py +35 -3
  12. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/types/task.py +63 -9
  13. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/types/universal.py +37 -5
  14. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0/src/thordata_sdk.egg-info}/PKG-INFO +4 -2
  15. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata_sdk.egg-info/SOURCES.txt +4 -0
  16. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata_sdk.egg-info/requires.txt +3 -0
  17. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_async_client.py +3 -1
  18. thordata_sdk-1.8.0/tests/test_browser.py +104 -0
  19. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/LICENSE +0 -0
  20. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/setup.cfg +0 -0
  21. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/_utils.py +0 -0
  22. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/async_unlimited.py +0 -0
  23. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/core/__init__.py +0 -0
  24. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/core/async_http_client.py +0 -0
  25. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/core/http_client.py +0 -0
  26. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/core/tunnel.py +0 -0
  27. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/enums.py +0 -0
  28. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/models.py +0 -0
  29. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/retry.py +0 -0
  30. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/serp_engines.py +0 -0
  31. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/tools/__init__.py +0 -0
  32. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/tools/base.py +0 -0
  33. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/tools/code.py +0 -0
  34. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/tools/ecommerce.py +0 -0
  35. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/tools/professional.py +0 -0
  36. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/tools/search.py +0 -0
  37. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/tools/social.py +0 -0
  38. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/tools/travel.py +0 -0
  39. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/tools/video.py +0 -0
  40. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/types/__init__.py +0 -0
  41. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/types/common.py +0 -0
  42. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/types/proxy.py +0 -0
  43. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/unlimited.py +0 -0
  44. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata_sdk.egg-info/dependency_links.txt +0 -0
  45. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata_sdk.egg-info/top_level.txt +0 -0
  46. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_async_client_errors.py +0 -0
  47. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_batch_creation.py +0 -0
  48. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_client.py +0 -0
  49. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_client_errors.py +0 -0
  50. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_enums.py +0 -0
  51. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_examples.py +0 -0
  52. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_exceptions.py +0 -0
  53. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_integration_proxy_protocols.py +0 -0
  54. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_models.py +0 -0
  55. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_retry.py +0 -0
  56. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_spec_parity.py +0 -0
  57. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_task_status_and_wait.py +0 -0
  58. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_tools.py +0 -0
  59. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_tools_coverage.py +0 -0
  60. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_unlimited.py +0 -0
  61. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_user_agent.py +0 -0
  62. {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: thordata-sdk
3
- Version: 1.6.0
3
+ Version: 1.8.0
4
4
  Summary: The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network.
5
5
  Author-email: Thordata Developer Team <support@thordata.com>
6
6
  License: MIT
@@ -40,6 +40,8 @@ Requires-Dist: ruff>=0.1.0; extra == "dev"
40
40
  Requires-Dist: mypy>=1.0.0; extra == "dev"
41
41
  Requires-Dist: types-requests>=2.28.0; extra == "dev"
42
42
  Requires-Dist: aioresponses>=0.7.6; extra == "dev"
43
+ Provides-Extra: browser
44
+ Requires-Dist: playwright>=1.40.0; extra == "browser"
43
45
  Dynamic: license-file
44
46
 
45
47
  # Thordata Python SDK
@@ -63,7 +65,7 @@ Dynamic: license-file
63
65
 
64
66
  ## 📖 Introduction
65
67
 
66
- The **Thordata Python SDK v1.6.0** is a production-ready wrapper for Thordata's AI data infrastructure. It is architected for high reliability, strict type safety, and maximum performance.
68
+ The **Thordata Python SDK v1.8.0** is a production-ready wrapper for Thordata's AI data infrastructure. It is architected for high reliability, strict type safety, and maximum performance.
67
69
 
68
70
  **Why v1.6.0?**
69
71
  * **🛡️ Bulletproof Networking**: Custom core handles `HTTP`, `HTTPS`, and `SOCKS5h` (Remote DNS) tunneling, solving common SSL/TLS handshake issues in complex network environments.
@@ -19,7 +19,7 @@
19
19
 
20
20
  ## 📖 Introduction
21
21
 
22
- The **Thordata Python SDK v1.6.0** is a production-ready wrapper for Thordata's AI data infrastructure. It is architected for high reliability, strict type safety, and maximum performance.
22
+ The **Thordata Python SDK v1.8.0** is a production-ready wrapper for Thordata's AI data infrastructure. It is architected for high reliability, strict type safety, and maximum performance.
23
23
 
24
24
  **Why v1.6.0?**
25
25
  * **🛡️ Bulletproof Networking**: Custom core handles `HTTP`, `HTTPS`, and `SOCKS5h` (Remote DNS) tunneling, solving common SSL/TLS handshake issues in complex network environments.
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
5
5
 
6
6
  [project]
7
7
  name = "thordata-sdk"
8
- version = "1.6.0"
8
+ version = "1.8.0"
9
9
  description = "The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network."
10
10
  readme = "README.md"
11
11
  requires-python = ">=3.9"
@@ -61,6 +61,9 @@ dev = [
61
61
  "types-requests>=2.28.0",
62
62
  "aioresponses>=0.7.6",
63
63
  ]
64
+ browser = [
65
+ "playwright>=1.40.0",
66
+ ]
64
67
 
65
68
  [project.urls]
66
69
  "Homepage" = "https://www.thordata.com"
@@ -115,6 +118,7 @@ check_untyped_defs = false
115
118
  strict_optional = false
116
119
  show_error_codes = true
117
120
  ignore_missing_imports = true
121
+ follow_imports = "skip"
118
122
 
119
123
  [[tool.mypy.overrides]]
120
124
  module = ["aiohttp.*", "requests.*"]
@@ -5,7 +5,7 @@ Official Python client for Thordata's Proxy Network, SERP API,
5
5
  Universal Scraping API (Web Unlocker), and Web Scraper API.
6
6
  """
7
7
 
8
- __version__ = "1.6.0"
8
+ __version__ = "1.8.0"
9
9
  __author__ = "Thordata Developer Team/Kael Odin"
10
10
  __email__ = "support@thordata.com"
11
11
 
@@ -244,6 +244,7 @@ class AsyncThordataClient:
244
244
  render_js: bool | None = None,
245
245
  no_cache: bool | None = None,
246
246
  output_format: str = "json",
247
+ ai_overview: bool = False,
247
248
  **kwargs: Any,
248
249
  ) -> dict[str, Any]:
249
250
  engine_str = engine.value if isinstance(engine, Engine) else engine.lower()
@@ -258,13 +259,14 @@ class AsyncThordataClient:
258
259
  render_js=render_js,
259
260
  no_cache=no_cache,
260
261
  output_format=output_format,
262
+ ai_overview=ai_overview,
261
263
  extra_params=kwargs,
262
264
  )
263
265
  return await self.serp_search_advanced(request)
264
266
 
265
267
  async def serp_search_advanced(self, request: SerpRequest) -> dict[str, Any]:
266
268
  if not self.scraper_token:
267
- raise ThordataConfigError("scraper_token required")
269
+ raise ThordataConfigError("scraper_token is required for SERP API")
268
270
  payload = request.to_payload()
269
271
  headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
270
272
  logger.info(f"Async SERP: {request.engine} - {request.query}")
@@ -293,30 +295,38 @@ class AsyncThordataClient:
293
295
  url: str,
294
296
  *,
295
297
  js_render: bool = False,
296
- output_format: str = "html",
298
+ output_format: str | list[str] = "html",
297
299
  country: str | None = None,
298
300
  block_resources: str | None = None,
301
+ clean_content: str | None = None,
299
302
  wait: int | None = None,
300
303
  wait_for: str | None = None,
304
+ follow_redirect: bool | None = None,
305
+ headers: list[dict[str, str]] | None = None,
306
+ cookies: list[dict[str, str]] | None = None,
301
307
  **kwargs: Any,
302
- ) -> str | bytes:
308
+ ) -> str | bytes | dict[str, str | bytes]:
303
309
  request = UniversalScrapeRequest(
304
310
  url=url,
305
311
  js_render=js_render,
306
312
  output_format=output_format,
307
313
  country=country,
308
314
  block_resources=block_resources,
315
+ clean_content=clean_content,
309
316
  wait=wait,
310
317
  wait_for=wait_for,
318
+ follow_redirect=follow_redirect,
319
+ headers=headers,
320
+ cookies=cookies,
311
321
  extra_params=kwargs,
312
322
  )
313
323
  return await self.universal_scrape_advanced(request)
314
324
 
315
325
  async def universal_scrape_advanced(
316
326
  self, request: UniversalScrapeRequest
317
- ) -> str | bytes:
327
+ ) -> str | bytes | dict[str, str | bytes]:
318
328
  if not self.scraper_token:
319
- raise ThordataConfigError("scraper_token required")
329
+ raise ThordataConfigError("scraper_token is required for Universal API")
320
330
  payload = request.to_payload()
321
331
  headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
322
332
 
@@ -327,9 +337,17 @@ class AsyncThordataClient:
327
337
  try:
328
338
  resp_json = await response.json()
329
339
  except ValueError:
330
- if request.output_format.lower() == "png":
331
- return await response.read()
332
- return await response.text()
340
+ # If not JSON, return raw content based on format
341
+ if isinstance(request.output_format, list) or (
342
+ isinstance(request.output_format, str) and "," in request.output_format
343
+ ):
344
+ return {"raw": await response.read()}
345
+ fmt = (
346
+ request.output_format.lower()
347
+ if isinstance(request.output_format, str)
348
+ else str(request.output_format).lower()
349
+ )
350
+ return await response.read() if fmt == "png" else await response.text()
333
351
 
334
352
  if isinstance(resp_json, dict):
335
353
  code = resp_json.get("code")
@@ -337,6 +355,27 @@ class AsyncThordataClient:
337
355
  msg = extract_error_message(resp_json)
338
356
  raise_for_code(f"Universal Error: {msg}", code=code, payload=resp_json)
339
357
 
358
+ # Handle multiple output formats
359
+ if isinstance(request.output_format, list) or (
360
+ isinstance(request.output_format, str) and "," in request.output_format
361
+ ):
362
+ result: dict[str, str | bytes] = {}
363
+ formats = (
364
+ request.output_format
365
+ if isinstance(request.output_format, list)
366
+ else [f.strip() for f in request.output_format.split(",")]
367
+ )
368
+
369
+ for fmt in formats:
370
+ fmt_lower = fmt.lower()
371
+ if fmt_lower == "html" and "html" in resp_json:
372
+ result["html"] = resp_json["html"]
373
+ elif fmt_lower == "png" and "png" in resp_json:
374
+ result["png"] = decode_base64_image(resp_json["png"])
375
+
376
+ if result:
377
+ return result
378
+
340
379
  if "html" in resp_json:
341
380
  return resp_json["html"]
342
381
  if "png" in resp_json:
@@ -411,7 +450,7 @@ class AsyncThordataClient:
411
450
  async def create_scraper_task_advanced(self, config: ScraperTaskConfig) -> str:
412
451
  self._require_public_credentials()
413
452
  if not self.scraper_token:
414
- raise ThordataConfigError("scraper_token required")
453
+ raise ThordataConfigError("scraper_token is required for Task Builder")
415
454
  payload = config.to_payload()
416
455
  headers = build_builder_headers(
417
456
  self.scraper_token, str(self.public_token), str(self.public_key)
@@ -449,7 +488,9 @@ class AsyncThordataClient:
449
488
  async def create_video_task_advanced(self, config: VideoTaskConfig) -> str:
450
489
  self._require_public_credentials()
451
490
  if not self.scraper_token:
452
- raise ThordataConfigError("scraper_token required")
491
+ raise ThordataConfigError(
492
+ "scraper_token is required for Video Task Builder"
493
+ )
453
494
  payload = config.to_payload()
454
495
  headers = build_builder_headers(
455
496
  self.scraper_token, str(self.public_token), str(self.public_key)
@@ -1067,3 +1108,28 @@ class AsyncThordataClient:
1067
1108
  safe_user = quote(final_user, safe="")
1068
1109
  safe_pass = quote(pwd, safe="")
1069
1110
  return f"wss://{safe_user}:{safe_pass}@ws-browser.thordata.com"
1111
+
1112
+ @property
1113
+ def browser(self):
1114
+ """Get a browser session for automation.
1115
+
1116
+ Requires playwright: pip install thordata[browser]
1117
+
1118
+ Returns:
1119
+ BrowserSession instance
1120
+
1121
+ Example:
1122
+ async with AsyncThordataClient() as client:
1123
+ session = client.browser
1124
+ await session.navigate("https://example.com")
1125
+ snapshot = await session.snapshot()
1126
+ """
1127
+ try:
1128
+ from .browser import BrowserSession
1129
+
1130
+ return BrowserSession(self)
1131
+ except ImportError as e:
1132
+ raise ImportError(
1133
+ "Playwright is required for browser automation. "
1134
+ "Install it with: pip install thordata[browser]"
1135
+ ) from e
@@ -0,0 +1,16 @@
1
+ """Browser automation module for Thordata Scraping Browser.
2
+
3
+ This module provides high-level browser automation capabilities using Playwright.
4
+ Requires optional dependency: pip install thordata[browser]
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ try:
10
+ from .exceptions import BrowserConnectionError, BrowserError
11
+ from .session import BrowserSession
12
+
13
+ __all__ = ["BrowserSession", "BrowserError", "BrowserConnectionError"]
14
+ except ImportError:
15
+ # Playwright not installed
16
+ __all__ = []
@@ -0,0 +1,23 @@
1
+ """Browser automation exceptions."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from ..exceptions import ThordataError
6
+
7
+
8
+ class BrowserError(ThordataError):
9
+ """Base exception for browser automation errors."""
10
+
11
+ pass
12
+
13
+
14
+ class BrowserConnectionError(BrowserError):
15
+ """Raised when browser connection fails."""
16
+
17
+ pass
18
+
19
+
20
+ class BrowserSessionError(BrowserError):
21
+ """Raised when browser session operations fail."""
22
+
23
+ pass