thordata-sdk 1.2.0__tar.gz → 1.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {thordata_sdk-1.2.0/src/thordata_sdk.egg-info → thordata_sdk-1.3.0}/PKG-INFO +1 -1
  2. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/pyproject.toml +1 -1
  3. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/src/thordata/__init__.py +1 -1
  4. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/src/thordata/async_client.py +62 -4
  5. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/src/thordata/client.py +77 -0
  6. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0/src/thordata_sdk.egg-info}/PKG-INFO +1 -1
  7. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/tests/test_async_client_errors.py +1 -1
  8. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/LICENSE +0 -0
  9. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/README.md +0 -0
  10. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/setup.cfg +0 -0
  11. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/src/thordata/_example_utils.py +0 -0
  12. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/src/thordata/_utils.py +0 -0
  13. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/src/thordata/demo.py +0 -0
  14. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/src/thordata/enums.py +0 -0
  15. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/src/thordata/exceptions.py +0 -0
  16. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/src/thordata/models.py +0 -0
  17. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/src/thordata/retry.py +0 -0
  18. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/src/thordata/serp_engines.py +0 -0
  19. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/src/thordata_sdk.egg-info/SOURCES.txt +0 -0
  20. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/src/thordata_sdk.egg-info/dependency_links.txt +0 -0
  21. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/src/thordata_sdk.egg-info/requires.txt +0 -0
  22. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/src/thordata_sdk.egg-info/top_level.txt +0 -0
  23. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/tests/test_async_client.py +0 -0
  24. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/tests/test_client.py +0 -0
  25. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/tests/test_client_errors.py +0 -0
  26. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/tests/test_enums.py +0 -0
  27. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/tests/test_examples.py +0 -0
  28. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/tests/test_exceptions.py +0 -0
  29. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/tests/test_integration_proxy_protocols.py +0 -0
  30. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/tests/test_models.py +0 -0
  31. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/tests/test_spec_parity.py +0 -0
  32. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/tests/test_task_status_and_wait.py +0 -0
  33. {thordata_sdk-1.2.0 → thordata_sdk-1.3.0}/tests/test_user_agent.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: thordata-sdk
3
- Version: 1.2.0
3
+ Version: 1.3.0
4
4
  Summary: The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network.
5
5
  Author-email: Thordata Developer Team <support@thordata.com>
6
6
  License: MIT
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
5
5
 
6
6
  [project]
7
7
  name = "thordata-sdk"
8
- version = "1.2.0"
8
+ version = "1.3.0"
9
9
  description = "The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network."
10
10
  readme = "README.md"
11
11
  requires-python = ">=3.9"
@@ -35,7 +35,7 @@ Async Usage:
35
35
  >>> asyncio.run(main())
36
36
  """
37
37
 
38
- __version__ = "1.2.0"
38
+ __version__ = "1.3.0"
39
39
  __author__ = "Thordata Developer Team"
40
40
  __email__ = "support@thordata.com"
41
41
 
@@ -652,7 +652,7 @@ class AsyncThordataClient:
652
652
  self._builder_url, data=payload, headers=headers
653
653
  ) as response:
654
654
  response.raise_for_status()
655
- data = await response.json()
655
+ data = await response.json(content_type=None)
656
656
 
657
657
  code = data.get("code")
658
658
  if code != 200:
@@ -763,7 +763,7 @@ class AsyncThordataClient:
763
763
  self._status_url, data=payload, headers=headers
764
764
  ) as response:
765
765
  response.raise_for_status()
766
- data = await response.json()
766
+ data = await response.json(content_type=None)
767
767
 
768
768
  if isinstance(data, dict):
769
769
  code = data.get("code")
@@ -826,7 +826,7 @@ class AsyncThordataClient:
826
826
  async with session.post(
827
827
  self._download_url, data=payload, headers=headers
828
828
  ) as response:
829
- data = await response.json()
829
+ data = await response.json(content_type=None)
830
830
  code = data.get("code")
831
831
 
832
832
  if code == 200 and data.get("data"):
@@ -879,7 +879,7 @@ class AsyncThordataClient:
879
879
  timeout=self._api_timeout,
880
880
  ) as response:
881
881
  response.raise_for_status()
882
- data = await response.json()
882
+ data = await response.json(content_type=None)
883
883
 
884
884
  code = data.get("code")
885
885
  if code != 200:
@@ -933,6 +933,64 @@ class AsyncThordataClient:
933
933
 
934
934
  raise TimeoutError(f"Task {task_id} did not complete within {max_wait} seconds")
935
935
 
936
+ async def run_task(
937
+ self,
938
+ file_name: str,
939
+ spider_id: str,
940
+ spider_name: str,
941
+ parameters: dict[str, Any],
942
+ universal_params: dict[str, Any] | None = None,
943
+ *,
944
+ max_wait: float = 600.0,
945
+ initial_poll_interval: float = 2.0,
946
+ max_poll_interval: float = 10.0,
947
+ include_errors: bool = True,
948
+ ) -> str:
949
+ """
950
+ Async high-level wrapper to Run a Web Scraper task and wait for result.
951
+
952
+ Lifecycle: Create -> Poll (Backoff) -> Get Download URL.
953
+
954
+ Returns:
955
+ str: The download URL.
956
+ """
957
+ # 1. Create Task
958
+ config = ScraperTaskConfig(
959
+ file_name=file_name,
960
+ spider_id=spider_id,
961
+ spider_name=spider_name,
962
+ parameters=parameters,
963
+ universal_params=universal_params,
964
+ include_errors=include_errors,
965
+ )
966
+ task_id = await self.create_scraper_task_advanced(config)
967
+ logger.info(f"Async Task created: {task_id}. Polling...")
968
+
969
+ # 2. Poll Status
970
+ import time
971
+
972
+ start_time = time.monotonic()
973
+ current_poll = initial_poll_interval
974
+
975
+ while (time.monotonic() - start_time) < max_wait:
976
+ status = await self.get_task_status(task_id)
977
+ status_lower = status.lower()
978
+
979
+ if status_lower in {"ready", "success", "finished"}:
980
+ logger.info(f"Task {task_id} ready.")
981
+ # 3. Get Result
982
+ return await self.get_task_result(task_id)
983
+
984
+ if status_lower in {"failed", "error", "cancelled"}:
985
+ raise ThordataNetworkError(
986
+ f"Task {task_id} failed with status: {status}"
987
+ )
988
+
989
+ await asyncio.sleep(current_poll)
990
+ current_poll = min(current_poll * 1.5, max_poll_interval)
991
+
992
+ raise ThordataTimeoutError(f"Async Task {task_id} timed out after {max_wait}s")
993
+
936
994
  # =========================================================================
937
995
  # Proxy Account Management Methods
938
996
  # =========================================================================
@@ -1380,6 +1380,83 @@ class ThordataClient:
1380
1380
  time.sleep(poll_interval)
1381
1381
  raise TimeoutError(f"Task {task_id} timeout")
1382
1382
 
1383
+ def run_task(
1384
+ self,
1385
+ file_name: str,
1386
+ spider_id: str,
1387
+ spider_name: str,
1388
+ parameters: dict[str, Any],
1389
+ universal_params: dict[str, Any] | None = None,
1390
+ *,
1391
+ max_wait: float = 600.0,
1392
+ initial_poll_interval: float = 2.0,
1393
+ max_poll_interval: float = 10.0,
1394
+ include_errors: bool = True,
1395
+ ) -> str:
1396
+ """
1397
+ High-level wrapper to Run a Web Scraper task and wait for the result download URL.
1398
+
1399
+ This method handles the entire lifecycle:
1400
+ 1. Create Task
1401
+ 2. Poll status (with exponential backoff)
1402
+ 3. Get download URL when ready
1403
+
1404
+ Args:
1405
+ file_name: Name for the output file.
1406
+ spider_id: Spider identifier from Dashboard.
1407
+ spider_name: Spider name (target domain).
1408
+ parameters: Spider-specific parameters.
1409
+ universal_params: Global spider settings.
1410
+ max_wait: Maximum seconds to wait for task completion (default 600).
1411
+ initial_poll_interval: Starting poll interval in seconds.
1412
+ max_poll_interval: Maximum poll interval cap.
1413
+ include_errors: Whether to include error logs in the task result.
1414
+
1415
+ Returns:
1416
+ str: The download URL for the task result (default JSON).
1417
+
1418
+ Raises:
1419
+ ThordataTimeoutError: If task takes longer than max_wait.
1420
+ ThordataAPIError: If task fails or is cancelled.
1421
+ """
1422
+ import time
1423
+
1424
+ # 1. Create Task
1425
+ config = ScraperTaskConfig(
1426
+ file_name=file_name,
1427
+ spider_id=spider_id,
1428
+ spider_name=spider_name,
1429
+ parameters=parameters,
1430
+ universal_params=universal_params,
1431
+ include_errors=include_errors,
1432
+ )
1433
+ task_id = self.create_scraper_task_advanced(config)
1434
+ logger.info(f"Task created successfully: {task_id}. Waiting for completion...")
1435
+
1436
+ # 2. Poll Status (Smart Backoff)
1437
+ start_time = time.monotonic()
1438
+ current_poll = initial_poll_interval
1439
+
1440
+ while (time.monotonic() - start_time) < max_wait:
1441
+ status = self.get_task_status(task_id)
1442
+ status_lower = status.lower()
1443
+
1444
+ if status_lower in {"ready", "success", "finished"}:
1445
+ logger.info(f"Task {task_id} finished. Status: {status}")
1446
+ # 3. Get Result
1447
+ return self.get_task_result(task_id)
1448
+
1449
+ if status_lower in {"failed", "error", "cancelled"}:
1450
+ raise ThordataNetworkError(
1451
+ f"Task {task_id} ended with failed status: {status}"
1452
+ )
1453
+
1454
+ # Wait and increase interval (capped)
1455
+ time.sleep(current_poll)
1456
+ current_poll = min(current_poll * 1.5, max_poll_interval)
1457
+
1458
+ raise ThordataTimeoutError(f"Task {task_id} timed out after {max_wait} seconds")
1459
+
1383
1460
  # =========================================================================
1384
1461
  # Account / Locations / Utils
1385
1462
  # =========================================================================
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: thordata-sdk
3
- Version: 1.2.0
3
+ Version: 1.3.0
4
4
  Summary: The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network.
5
5
  Author-email: Thordata Developer Team <support@thordata.com>
6
6
  License: MIT
@@ -28,7 +28,7 @@ class DummyAsyncResponse:
28
28
  def raise_for_status(self) -> None:
29
29
  pass
30
30
 
31
- async def json(self) -> dict[str, Any]:
31
+ async def json(self, **kwargs: Any) -> dict[str, Any]:
32
32
  return self._json_data
33
33
 
34
34
  async def read(self) -> bytes:
File without changes
File without changes
File without changes