thordata-sdk 1.2.0__tar.gz → 1.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {thordata_sdk-1.2.0/src/thordata_sdk.egg-info → thordata_sdk-1.3.0}/PKG-INFO +1 -1
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/pyproject.toml +1 -1
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/src/thordata/__init__.py +1 -1
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/src/thordata/async_client.py +62 -4
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/src/thordata/client.py +77 -0
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0/src/thordata_sdk.egg-info}/PKG-INFO +1 -1
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/tests/test_async_client_errors.py +1 -1
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/LICENSE +0 -0
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/README.md +0 -0
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/setup.cfg +0 -0
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/src/thordata/_example_utils.py +0 -0
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/src/thordata/_utils.py +0 -0
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/src/thordata/demo.py +0 -0
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/src/thordata/enums.py +0 -0
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/src/thordata/exceptions.py +0 -0
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/src/thordata/models.py +0 -0
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/src/thordata/retry.py +0 -0
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/src/thordata/serp_engines.py +0 -0
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/src/thordata_sdk.egg-info/SOURCES.txt +0 -0
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/src/thordata_sdk.egg-info/dependency_links.txt +0 -0
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/src/thordata_sdk.egg-info/requires.txt +0 -0
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/src/thordata_sdk.egg-info/top_level.txt +0 -0
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/tests/test_async_client.py +0 -0
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/tests/test_client.py +0 -0
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/tests/test_client_errors.py +0 -0
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/tests/test_enums.py +0 -0
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/tests/test_examples.py +0 -0
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/tests/test_exceptions.py +0 -0
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/tests/test_integration_proxy_protocols.py +0 -0
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/tests/test_models.py +0 -0
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/tests/test_spec_parity.py +0 -0
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/tests/test_task_status_and_wait.py +0 -0
- {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/tests/test_user_agent.py +0 -0
|
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
|
|
|
5
5
|
|
|
6
6
|
[project]
|
|
7
7
|
name = "thordata-sdk"
|
|
8
|
-
version = "1.
|
|
8
|
+
version = "1.3.0"
|
|
9
9
|
description = "The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network."
|
|
10
10
|
readme = "README.md"
|
|
11
11
|
requires-python = ">=3.9"
|
|
@@ -652,7 +652,7 @@ class AsyncThordataClient:
|
|
|
652
652
|
self._builder_url, data=payload, headers=headers
|
|
653
653
|
) as response:
|
|
654
654
|
response.raise_for_status()
|
|
655
|
-
data = await response.json()
|
|
655
|
+
data = await response.json(content_type=None)
|
|
656
656
|
|
|
657
657
|
code = data.get("code")
|
|
658
658
|
if code != 200:
|
|
@@ -763,7 +763,7 @@ class AsyncThordataClient:
|
|
|
763
763
|
self._status_url, data=payload, headers=headers
|
|
764
764
|
) as response:
|
|
765
765
|
response.raise_for_status()
|
|
766
|
-
data = await response.json()
|
|
766
|
+
data = await response.json(content_type=None)
|
|
767
767
|
|
|
768
768
|
if isinstance(data, dict):
|
|
769
769
|
code = data.get("code")
|
|
@@ -826,7 +826,7 @@ class AsyncThordataClient:
|
|
|
826
826
|
async with session.post(
|
|
827
827
|
self._download_url, data=payload, headers=headers
|
|
828
828
|
) as response:
|
|
829
|
-
data = await response.json()
|
|
829
|
+
data = await response.json(content_type=None)
|
|
830
830
|
code = data.get("code")
|
|
831
831
|
|
|
832
832
|
if code == 200 and data.get("data"):
|
|
@@ -879,7 +879,7 @@ class AsyncThordataClient:
|
|
|
879
879
|
timeout=self._api_timeout,
|
|
880
880
|
) as response:
|
|
881
881
|
response.raise_for_status()
|
|
882
|
-
data = await response.json()
|
|
882
|
+
data = await response.json(content_type=None)
|
|
883
883
|
|
|
884
884
|
code = data.get("code")
|
|
885
885
|
if code != 200:
|
|
@@ -933,6 +933,64 @@ class AsyncThordataClient:
|
|
|
933
933
|
|
|
934
934
|
raise TimeoutError(f"Task {task_id} did not complete within {max_wait} seconds")
|
|
935
935
|
|
|
936
|
+
async def run_task(
|
|
937
|
+
self,
|
|
938
|
+
file_name: str,
|
|
939
|
+
spider_id: str,
|
|
940
|
+
spider_name: str,
|
|
941
|
+
parameters: dict[str, Any],
|
|
942
|
+
universal_params: dict[str, Any] | None = None,
|
|
943
|
+
*,
|
|
944
|
+
max_wait: float = 600.0,
|
|
945
|
+
initial_poll_interval: float = 2.0,
|
|
946
|
+
max_poll_interval: float = 10.0,
|
|
947
|
+
include_errors: bool = True,
|
|
948
|
+
) -> str:
|
|
949
|
+
"""
|
|
950
|
+
Async high-level wrapper to Run a Web Scraper task and wait for result.
|
|
951
|
+
|
|
952
|
+
Lifecycle: Create -> Poll (Backoff) -> Get Download URL.
|
|
953
|
+
|
|
954
|
+
Returns:
|
|
955
|
+
str: The download URL.
|
|
956
|
+
"""
|
|
957
|
+
# 1. Create Task
|
|
958
|
+
config = ScraperTaskConfig(
|
|
959
|
+
file_name=file_name,
|
|
960
|
+
spider_id=spider_id,
|
|
961
|
+
spider_name=spider_name,
|
|
962
|
+
parameters=parameters,
|
|
963
|
+
universal_params=universal_params,
|
|
964
|
+
include_errors=include_errors,
|
|
965
|
+
)
|
|
966
|
+
task_id = await self.create_scraper_task_advanced(config)
|
|
967
|
+
logger.info(f"Async Task created: {task_id}. Polling...")
|
|
968
|
+
|
|
969
|
+
# 2. Poll Status
|
|
970
|
+
import time
|
|
971
|
+
|
|
972
|
+
start_time = time.monotonic()
|
|
973
|
+
current_poll = initial_poll_interval
|
|
974
|
+
|
|
975
|
+
while (time.monotonic() - start_time) < max_wait:
|
|
976
|
+
status = await self.get_task_status(task_id)
|
|
977
|
+
status_lower = status.lower()
|
|
978
|
+
|
|
979
|
+
if status_lower in {"ready", "success", "finished"}:
|
|
980
|
+
logger.info(f"Task {task_id} ready.")
|
|
981
|
+
# 3. Get Result
|
|
982
|
+
return await self.get_task_result(task_id)
|
|
983
|
+
|
|
984
|
+
if status_lower in {"failed", "error", "cancelled"}:
|
|
985
|
+
raise ThordataNetworkError(
|
|
986
|
+
f"Task {task_id} failed with status: {status}"
|
|
987
|
+
)
|
|
988
|
+
|
|
989
|
+
await asyncio.sleep(current_poll)
|
|
990
|
+
current_poll = min(current_poll * 1.5, max_poll_interval)
|
|
991
|
+
|
|
992
|
+
raise ThordataTimeoutError(f"Async Task {task_id} timed out after {max_wait}s")
|
|
993
|
+
|
|
936
994
|
# =========================================================================
|
|
937
995
|
# Proxy Account Management Methods
|
|
938
996
|
# =========================================================================
|
|
@@ -1380,6 +1380,83 @@ class ThordataClient:
|
|
|
1380
1380
|
time.sleep(poll_interval)
|
|
1381
1381
|
raise TimeoutError(f"Task {task_id} timeout")
|
|
1382
1382
|
|
|
1383
|
+
def run_task(
|
|
1384
|
+
self,
|
|
1385
|
+
file_name: str,
|
|
1386
|
+
spider_id: str,
|
|
1387
|
+
spider_name: str,
|
|
1388
|
+
parameters: dict[str, Any],
|
|
1389
|
+
universal_params: dict[str, Any] | None = None,
|
|
1390
|
+
*,
|
|
1391
|
+
max_wait: float = 600.0,
|
|
1392
|
+
initial_poll_interval: float = 2.0,
|
|
1393
|
+
max_poll_interval: float = 10.0,
|
|
1394
|
+
include_errors: bool = True,
|
|
1395
|
+
) -> str:
|
|
1396
|
+
"""
|
|
1397
|
+
High-level wrapper to Run a Web Scraper task and wait for the result download URL.
|
|
1398
|
+
|
|
1399
|
+
This method handles the entire lifecycle:
|
|
1400
|
+
1. Create Task
|
|
1401
|
+
2. Poll status (with exponential backoff)
|
|
1402
|
+
3. Get download URL when ready
|
|
1403
|
+
|
|
1404
|
+
Args:
|
|
1405
|
+
file_name: Name for the output file.
|
|
1406
|
+
spider_id: Spider identifier from Dashboard.
|
|
1407
|
+
spider_name: Spider name (target domain).
|
|
1408
|
+
parameters: Spider-specific parameters.
|
|
1409
|
+
universal_params: Global spider settings.
|
|
1410
|
+
max_wait: Maximum seconds to wait for task completion (default 600).
|
|
1411
|
+
initial_poll_interval: Starting poll interval in seconds.
|
|
1412
|
+
max_poll_interval: Maximum poll interval cap.
|
|
1413
|
+
include_errors: Whether to include error logs in the task result.
|
|
1414
|
+
|
|
1415
|
+
Returns:
|
|
1416
|
+
str: The download URL for the task result (default JSON).
|
|
1417
|
+
|
|
1418
|
+
Raises:
|
|
1419
|
+
ThordataTimeoutError: If task takes longer than max_wait.
|
|
1420
|
+
ThordataAPIError: If task fails or is cancelled.
|
|
1421
|
+
"""
|
|
1422
|
+
import time
|
|
1423
|
+
|
|
1424
|
+
# 1. Create Task
|
|
1425
|
+
config = ScraperTaskConfig(
|
|
1426
|
+
file_name=file_name,
|
|
1427
|
+
spider_id=spider_id,
|
|
1428
|
+
spider_name=spider_name,
|
|
1429
|
+
parameters=parameters,
|
|
1430
|
+
universal_params=universal_params,
|
|
1431
|
+
include_errors=include_errors,
|
|
1432
|
+
)
|
|
1433
|
+
task_id = self.create_scraper_task_advanced(config)
|
|
1434
|
+
logger.info(f"Task created successfully: {task_id}. Waiting for completion...")
|
|
1435
|
+
|
|
1436
|
+
# 2. Poll Status (Smart Backoff)
|
|
1437
|
+
start_time = time.monotonic()
|
|
1438
|
+
current_poll = initial_poll_interval
|
|
1439
|
+
|
|
1440
|
+
while (time.monotonic() - start_time) < max_wait:
|
|
1441
|
+
status = self.get_task_status(task_id)
|
|
1442
|
+
status_lower = status.lower()
|
|
1443
|
+
|
|
1444
|
+
if status_lower in {"ready", "success", "finished"}:
|
|
1445
|
+
logger.info(f"Task {task_id} finished. Status: {status}")
|
|
1446
|
+
# 3. Get Result
|
|
1447
|
+
return self.get_task_result(task_id)
|
|
1448
|
+
|
|
1449
|
+
if status_lower in {"failed", "error", "cancelled"}:
|
|
1450
|
+
raise ThordataNetworkError(
|
|
1451
|
+
f"Task {task_id} ended with failed status: {status}"
|
|
1452
|
+
)
|
|
1453
|
+
|
|
1454
|
+
# Wait and increase interval (capped)
|
|
1455
|
+
time.sleep(current_poll)
|
|
1456
|
+
current_poll = min(current_poll * 1.5, max_poll_interval)
|
|
1457
|
+
|
|
1458
|
+
raise ThordataTimeoutError(f"Task {task_id} timed out after {max_wait} seconds")
|
|
1459
|
+
|
|
1383
1460
|
# =========================================================================
|
|
1384
1461
|
# Account / Locations / Utils
|
|
1385
1462
|
# =========================================================================
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|