thordata-sdk 1.6.0__tar.gz → 1.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {thordata_sdk-1.6.0/src/thordata_sdk.egg-info → thordata_sdk-1.8.0}/PKG-INFO +4 -2
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/README.md +1 -1
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/pyproject.toml +5 -1
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/__init__.py +1 -1
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/async_client.py +76 -10
- thordata_sdk-1.8.0/src/thordata/browser/__init__.py +16 -0
- thordata_sdk-1.8.0/src/thordata/browser/exceptions.py +23 -0
- thordata_sdk-1.8.0/src/thordata/browser/session.py +469 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/client.py +55 -7
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/exceptions.py +10 -1
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/types/serp.py +35 -3
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/types/task.py +63 -9
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/types/universal.py +37 -5
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0/src/thordata_sdk.egg-info}/PKG-INFO +4 -2
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata_sdk.egg-info/SOURCES.txt +4 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata_sdk.egg-info/requires.txt +3 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_async_client.py +3 -1
- thordata_sdk-1.8.0/tests/test_browser.py +104 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/LICENSE +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/setup.cfg +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/_utils.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/async_unlimited.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/core/__init__.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/core/async_http_client.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/core/http_client.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/core/tunnel.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/enums.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/models.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/retry.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/serp_engines.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/tools/__init__.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/tools/base.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/tools/code.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/tools/ecommerce.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/tools/professional.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/tools/search.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/tools/social.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/tools/travel.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/tools/video.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/types/__init__.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/types/common.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/types/proxy.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata/unlimited.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata_sdk.egg-info/dependency_links.txt +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/src/thordata_sdk.egg-info/top_level.txt +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_async_client_errors.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_batch_creation.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_client.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_client_errors.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_enums.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_examples.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_exceptions.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_integration_proxy_protocols.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_models.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_retry.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_spec_parity.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_task_status_and_wait.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_tools.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_tools_coverage.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_unlimited.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_user_agent.py +0 -0
- {thordata_sdk-1.6.0 → thordata_sdk-1.8.0}/tests/test_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: thordata-sdk
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.8.0
|
|
4
4
|
Summary: The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network.
|
|
5
5
|
Author-email: Thordata Developer Team <support@thordata.com>
|
|
6
6
|
License: MIT
|
|
@@ -40,6 +40,8 @@ Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
|
40
40
|
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
41
41
|
Requires-Dist: types-requests>=2.28.0; extra == "dev"
|
|
42
42
|
Requires-Dist: aioresponses>=0.7.6; extra == "dev"
|
|
43
|
+
Provides-Extra: browser
|
|
44
|
+
Requires-Dist: playwright>=1.40.0; extra == "browser"
|
|
43
45
|
Dynamic: license-file
|
|
44
46
|
|
|
45
47
|
# Thordata Python SDK
|
|
@@ -63,7 +65,7 @@ Dynamic: license-file
|
|
|
63
65
|
|
|
64
66
|
## 📖 Introduction
|
|
65
67
|
|
|
66
|
-
The **Thordata Python SDK v1.
|
|
68
|
+
The **Thordata Python SDK v1.8.0** is a production-ready wrapper for Thordata's AI data infrastructure. It is architected for high reliability, strict type safety, and maximum performance.
|
|
67
69
|
|
|
68
70
|
**Why v1.6.0?**
|
|
69
71
|
* **🛡️ Bulletproof Networking**: Custom core handles `HTTP`, `HTTPS`, and `SOCKS5h` (Remote DNS) tunneling, solving common SSL/TLS handshake issues in complex network environments.
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
|
|
20
20
|
## 📖 Introduction
|
|
21
21
|
|
|
22
|
-
The **Thordata Python SDK v1.
|
|
22
|
+
The **Thordata Python SDK v1.8.0** is a production-ready wrapper for Thordata's AI data infrastructure. It is architected for high reliability, strict type safety, and maximum performance.
|
|
23
23
|
|
|
24
24
|
**Why v1.6.0?**
|
|
25
25
|
* **🛡️ Bulletproof Networking**: Custom core handles `HTTP`, `HTTPS`, and `SOCKS5h` (Remote DNS) tunneling, solving common SSL/TLS handshake issues in complex network environments.
|
|
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
|
|
|
5
5
|
|
|
6
6
|
[project]
|
|
7
7
|
name = "thordata-sdk"
|
|
8
|
-
version = "1.
|
|
8
|
+
version = "1.8.0"
|
|
9
9
|
description = "The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network."
|
|
10
10
|
readme = "README.md"
|
|
11
11
|
requires-python = ">=3.9"
|
|
@@ -61,6 +61,9 @@ dev = [
|
|
|
61
61
|
"types-requests>=2.28.0",
|
|
62
62
|
"aioresponses>=0.7.6",
|
|
63
63
|
]
|
|
64
|
+
browser = [
|
|
65
|
+
"playwright>=1.40.0",
|
|
66
|
+
]
|
|
64
67
|
|
|
65
68
|
[project.urls]
|
|
66
69
|
"Homepage" = "https://www.thordata.com"
|
|
@@ -115,6 +118,7 @@ check_untyped_defs = false
|
|
|
115
118
|
strict_optional = false
|
|
116
119
|
show_error_codes = true
|
|
117
120
|
ignore_missing_imports = true
|
|
121
|
+
follow_imports = "skip"
|
|
118
122
|
|
|
119
123
|
[[tool.mypy.overrides]]
|
|
120
124
|
module = ["aiohttp.*", "requests.*"]
|
|
@@ -5,7 +5,7 @@ Official Python client for Thordata's Proxy Network, SERP API,
|
|
|
5
5
|
Universal Scraping API (Web Unlocker), and Web Scraper API.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
__version__ = "1.
|
|
8
|
+
__version__ = "1.8.0"
|
|
9
9
|
__author__ = "Thordata Developer Team/Kael Odin"
|
|
10
10
|
__email__ = "support@thordata.com"
|
|
11
11
|
|
|
@@ -244,6 +244,7 @@ class AsyncThordataClient:
|
|
|
244
244
|
render_js: bool | None = None,
|
|
245
245
|
no_cache: bool | None = None,
|
|
246
246
|
output_format: str = "json",
|
|
247
|
+
ai_overview: bool = False,
|
|
247
248
|
**kwargs: Any,
|
|
248
249
|
) -> dict[str, Any]:
|
|
249
250
|
engine_str = engine.value if isinstance(engine, Engine) else engine.lower()
|
|
@@ -258,13 +259,14 @@ class AsyncThordataClient:
|
|
|
258
259
|
render_js=render_js,
|
|
259
260
|
no_cache=no_cache,
|
|
260
261
|
output_format=output_format,
|
|
262
|
+
ai_overview=ai_overview,
|
|
261
263
|
extra_params=kwargs,
|
|
262
264
|
)
|
|
263
265
|
return await self.serp_search_advanced(request)
|
|
264
266
|
|
|
265
267
|
async def serp_search_advanced(self, request: SerpRequest) -> dict[str, Any]:
|
|
266
268
|
if not self.scraper_token:
|
|
267
|
-
raise ThordataConfigError("scraper_token required")
|
|
269
|
+
raise ThordataConfigError("scraper_token is required for SERP API")
|
|
268
270
|
payload = request.to_payload()
|
|
269
271
|
headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
|
|
270
272
|
logger.info(f"Async SERP: {request.engine} - {request.query}")
|
|
@@ -293,30 +295,38 @@ class AsyncThordataClient:
|
|
|
293
295
|
url: str,
|
|
294
296
|
*,
|
|
295
297
|
js_render: bool = False,
|
|
296
|
-
output_format: str = "html",
|
|
298
|
+
output_format: str | list[str] = "html",
|
|
297
299
|
country: str | None = None,
|
|
298
300
|
block_resources: str | None = None,
|
|
301
|
+
clean_content: str | None = None,
|
|
299
302
|
wait: int | None = None,
|
|
300
303
|
wait_for: str | None = None,
|
|
304
|
+
follow_redirect: bool | None = None,
|
|
305
|
+
headers: list[dict[str, str]] | None = None,
|
|
306
|
+
cookies: list[dict[str, str]] | None = None,
|
|
301
307
|
**kwargs: Any,
|
|
302
|
-
) -> str | bytes:
|
|
308
|
+
) -> str | bytes | dict[str, str | bytes]:
|
|
303
309
|
request = UniversalScrapeRequest(
|
|
304
310
|
url=url,
|
|
305
311
|
js_render=js_render,
|
|
306
312
|
output_format=output_format,
|
|
307
313
|
country=country,
|
|
308
314
|
block_resources=block_resources,
|
|
315
|
+
clean_content=clean_content,
|
|
309
316
|
wait=wait,
|
|
310
317
|
wait_for=wait_for,
|
|
318
|
+
follow_redirect=follow_redirect,
|
|
319
|
+
headers=headers,
|
|
320
|
+
cookies=cookies,
|
|
311
321
|
extra_params=kwargs,
|
|
312
322
|
)
|
|
313
323
|
return await self.universal_scrape_advanced(request)
|
|
314
324
|
|
|
315
325
|
async def universal_scrape_advanced(
|
|
316
326
|
self, request: UniversalScrapeRequest
|
|
317
|
-
) -> str | bytes:
|
|
327
|
+
) -> str | bytes | dict[str, str | bytes]:
|
|
318
328
|
if not self.scraper_token:
|
|
319
|
-
raise ThordataConfigError("scraper_token required")
|
|
329
|
+
raise ThordataConfigError("scraper_token is required for Universal API")
|
|
320
330
|
payload = request.to_payload()
|
|
321
331
|
headers = build_auth_headers(self.scraper_token, mode=self._auth_mode)
|
|
322
332
|
|
|
@@ -327,9 +337,17 @@ class AsyncThordataClient:
|
|
|
327
337
|
try:
|
|
328
338
|
resp_json = await response.json()
|
|
329
339
|
except ValueError:
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
340
|
+
# If not JSON, return raw content based on format
|
|
341
|
+
if isinstance(request.output_format, list) or (
|
|
342
|
+
isinstance(request.output_format, str) and "," in request.output_format
|
|
343
|
+
):
|
|
344
|
+
return {"raw": await response.read()}
|
|
345
|
+
fmt = (
|
|
346
|
+
request.output_format.lower()
|
|
347
|
+
if isinstance(request.output_format, str)
|
|
348
|
+
else str(request.output_format).lower()
|
|
349
|
+
)
|
|
350
|
+
return await response.read() if fmt == "png" else await response.text()
|
|
333
351
|
|
|
334
352
|
if isinstance(resp_json, dict):
|
|
335
353
|
code = resp_json.get("code")
|
|
@@ -337,6 +355,27 @@ class AsyncThordataClient:
|
|
|
337
355
|
msg = extract_error_message(resp_json)
|
|
338
356
|
raise_for_code(f"Universal Error: {msg}", code=code, payload=resp_json)
|
|
339
357
|
|
|
358
|
+
# Handle multiple output formats
|
|
359
|
+
if isinstance(request.output_format, list) or (
|
|
360
|
+
isinstance(request.output_format, str) and "," in request.output_format
|
|
361
|
+
):
|
|
362
|
+
result: dict[str, str | bytes] = {}
|
|
363
|
+
formats = (
|
|
364
|
+
request.output_format
|
|
365
|
+
if isinstance(request.output_format, list)
|
|
366
|
+
else [f.strip() for f in request.output_format.split(",")]
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
for fmt in formats:
|
|
370
|
+
fmt_lower = fmt.lower()
|
|
371
|
+
if fmt_lower == "html" and "html" in resp_json:
|
|
372
|
+
result["html"] = resp_json["html"]
|
|
373
|
+
elif fmt_lower == "png" and "png" in resp_json:
|
|
374
|
+
result["png"] = decode_base64_image(resp_json["png"])
|
|
375
|
+
|
|
376
|
+
if result:
|
|
377
|
+
return result
|
|
378
|
+
|
|
340
379
|
if "html" in resp_json:
|
|
341
380
|
return resp_json["html"]
|
|
342
381
|
if "png" in resp_json:
|
|
@@ -411,7 +450,7 @@ class AsyncThordataClient:
|
|
|
411
450
|
async def create_scraper_task_advanced(self, config: ScraperTaskConfig) -> str:
|
|
412
451
|
self._require_public_credentials()
|
|
413
452
|
if not self.scraper_token:
|
|
414
|
-
raise ThordataConfigError("scraper_token required")
|
|
453
|
+
raise ThordataConfigError("scraper_token is required for Task Builder")
|
|
415
454
|
payload = config.to_payload()
|
|
416
455
|
headers = build_builder_headers(
|
|
417
456
|
self.scraper_token, str(self.public_token), str(self.public_key)
|
|
@@ -449,7 +488,9 @@ class AsyncThordataClient:
|
|
|
449
488
|
async def create_video_task_advanced(self, config: VideoTaskConfig) -> str:
|
|
450
489
|
self._require_public_credentials()
|
|
451
490
|
if not self.scraper_token:
|
|
452
|
-
raise ThordataConfigError(
|
|
491
|
+
raise ThordataConfigError(
|
|
492
|
+
"scraper_token is required for Video Task Builder"
|
|
493
|
+
)
|
|
453
494
|
payload = config.to_payload()
|
|
454
495
|
headers = build_builder_headers(
|
|
455
496
|
self.scraper_token, str(self.public_token), str(self.public_key)
|
|
@@ -1067,3 +1108,28 @@ class AsyncThordataClient:
|
|
|
1067
1108
|
safe_user = quote(final_user, safe="")
|
|
1068
1109
|
safe_pass = quote(pwd, safe="")
|
|
1069
1110
|
return f"wss://{safe_user}:{safe_pass}@ws-browser.thordata.com"
|
|
1111
|
+
|
|
1112
|
+
@property
|
|
1113
|
+
def browser(self):
|
|
1114
|
+
"""Get a browser session for automation.
|
|
1115
|
+
|
|
1116
|
+
Requires playwright: pip install thordata[browser]
|
|
1117
|
+
|
|
1118
|
+
Returns:
|
|
1119
|
+
BrowserSession instance
|
|
1120
|
+
|
|
1121
|
+
Example:
|
|
1122
|
+
async with AsyncThordataClient() as client:
|
|
1123
|
+
session = client.browser
|
|
1124
|
+
await session.navigate("https://example.com")
|
|
1125
|
+
snapshot = await session.snapshot()
|
|
1126
|
+
"""
|
|
1127
|
+
try:
|
|
1128
|
+
from .browser import BrowserSession
|
|
1129
|
+
|
|
1130
|
+
return BrowserSession(self)
|
|
1131
|
+
except ImportError as e:
|
|
1132
|
+
raise ImportError(
|
|
1133
|
+
"Playwright is required for browser automation. "
|
|
1134
|
+
"Install it with: pip install thordata[browser]"
|
|
1135
|
+
) from e
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Browser automation module for Thordata Scraping Browser.
|
|
2
|
+
|
|
3
|
+
This module provides high-level browser automation capabilities using Playwright.
|
|
4
|
+
Requires optional dependency: pip install thordata[browser]
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
from .exceptions import BrowserConnectionError, BrowserError
|
|
11
|
+
from .session import BrowserSession
|
|
12
|
+
|
|
13
|
+
__all__ = ["BrowserSession", "BrowserError", "BrowserConnectionError"]
|
|
14
|
+
except ImportError:
|
|
15
|
+
# Playwright not installed
|
|
16
|
+
__all__ = []
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Browser automation exceptions."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from ..exceptions import ThordataError
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class BrowserError(ThordataError):
|
|
9
|
+
"""Base exception for browser automation errors."""
|
|
10
|
+
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class BrowserConnectionError(BrowserError):
|
|
15
|
+
"""Raised when browser connection fails."""
|
|
16
|
+
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class BrowserSessionError(BrowserError):
|
|
21
|
+
"""Raised when browser session operations fail."""
|
|
22
|
+
|
|
23
|
+
pass
|