PyPI - webcrawlerapi - Versions diffs - 2.0.11__tar.gz → 2.0.12__tar.gz - Mend

webcrawlerapi 2.0.11tar.gz → 2.0.12tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

{webcrawlerapi-2.0.11 → webcrawlerapi-2.0.12}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: webcrawlerapi
-Version: 2.0.11
+Version: 2.0.12
 Summary: Python SDK for WebCrawler API
 Home-page: https://github.com/webcrawlerapi/webcrawlerapi-python-sdk
 Author: Andrew

{webcrawlerapi-2.0.11 → webcrawlerapi-2.0.12}/setup.py RENAMED Viewed

@@ -2,7 +2,7 @@ from setuptools import find_packages, setup
 setup(
     name="webcrawlerapi",
-    version="2.0.11",
+    version="2.0.12",
     packages=find_packages(),
     install_requires=[
         "requests>=2.25.0",

{webcrawlerapi-2.0.11 → webcrawlerapi-2.0.12}/webcrawlerapi/__init__.py RENAMED Viewed

@@ -28,6 +28,7 @@ from .models import (
     CrawlResponse,
     Job,
     JobItem,
+    JobMarkdownResponse,
     ScrapeId,
     ScrapeResponse,
     ScrapeResponseError,
@@ -39,6 +40,7 @@ __all__ = [
     "WebCrawlerAPI",
     "Job",
     "JobItem",
+    "JobMarkdownResponse",
     "CrawlResponse",
     "ScrapeId",
     "ScrapeResponse",

{webcrawlerapi-2.0.11 → webcrawlerapi-2.0.12}/webcrawlerapi/client.py RENAMED Viewed

@@ -8,6 +8,7 @@ from .models import (
     Action,
     CrawlResponse,
     Job,
+    JobMarkdownResponse,
     ScrapeId,
     ScrapeResponse,
     ScrapeResponseError,
@@ -123,15 +124,15 @@ class WebCrawlerAPI:
         response.raise_for_status()
         return Job(response.json())
-    def get_job_markdown(self, job_id: str) -> str:
+    def get_job_markdown(self, job_id: str) -> JobMarkdownResponse:
         """
-        Get combined markdown content for a completed markdown job.
+        Get the URL to the combined markdown file for a completed markdown job.
         Args:
             job_id (str): The unique identifier of the job
         Returns:
-            str: Combined markdown content
+            JobMarkdownResponse: Response containing the content_url to the markdown file
         Raises:
             requests.exceptions.RequestException: If the API request fails
@@ -139,6 +140,26 @@ class WebCrawlerAPI:
         response = self.session.get(
             urljoin(self.base_url, f"/{CRAWLER_VERSION}/job/{job_id}/markdown")
         )
+        response.raise_for_status()
+        data = response.json()
+        return JobMarkdownResponse(content_url=data["content_url"])
+    def get_job_markdown_content(self, job_id: str) -> str:
+        """
+        Download the combined markdown content for a completed markdown job.
+        Args:
+            job_id (str): The unique identifier of the job
+        Returns:
+            str: Combined markdown content as plain text
+        Raises:
+            requests.exceptions.RequestException: If the API request fails
+        """
+        response = self.session.get(
+            urljoin(self.base_url, f"/{CRAWLER_VERSION}/job/{job_id}/markdown/content")
+        )
         if not response.ok:
             try:
@@ -303,7 +324,7 @@ class WebCrawlerAPI:
                 f"Job finished with status {job.status}"
             )
-        return self.get_job_markdown(job.id)
+        return self.get_job_markdown_content(job.id)
     def scrape_async(
         self,

{webcrawlerapi-2.0.11 → webcrawlerapi-2.0.12}/webcrawlerapi/models.py RENAMED Viewed

@@ -38,6 +38,13 @@ class CrawlResponse:
     id: str
+@dataclass
+class JobMarkdownResponse:
+    """Response from the get_job_markdown endpoint."""
+    content_url: str
 @dataclass
 class ScrapeId:
     """Response from an asynchronous scrape request."""

{webcrawlerapi-2.0.11 → webcrawlerapi-2.0.12}/webcrawlerapi.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: webcrawlerapi
-Version: 2.0.11
+Version: 2.0.12
 Summary: Python SDK for WebCrawler API
 Home-page: https://github.com/webcrawlerapi/webcrawlerapi-python-sdk
 Author: Andrew