PyPI - webcrawlerapi - Versions diffs - 2.0.1__tar.gz → 2.0.4__tar.gz - Mend

webcrawlerapi 2.0.1tar.gz → 2.0.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

{webcrawlerapi-2.0.1 → webcrawlerapi-2.0.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: webcrawlerapi
-Version: 2.0.1
+Version: 2.0.4
 Summary: Python SDK for WebCrawler API
 Home-page: https://github.com/webcrawlerapi/webcrawlerapi-python-sdk
 Author: Andrew

{webcrawlerapi-2.0.1 → webcrawlerapi-2.0.4}/setup.py RENAMED Viewed

@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
 setup(
     name="webcrawlerapi",
-    version="2.0.1",
+    version="2.0.4",
     packages=find_packages(),
     install_requires=[
         "requests>=2.25.0",

{webcrawlerapi-2.0.1 → webcrawlerapi-2.0.4}/webcrawlerapi/client.py RENAMED Viewed

@@ -12,24 +12,25 @@ from .models import (
     Action,
 )
+CRAWLER_VERSION = "v1"
+SCRAPER_VERSION = "v2"
 class WebCrawlerAPI:
     """Python SDK for WebCrawler API."""
     DEFAULT_POLL_DELAY_SECONDS = 5
-    def __init__(self, api_key: str, base_url: str = "https://api.webcrawlerapi.com", version: str = "v1"):
+    def __init__(self, api_key: str, base_url: str = "https://api.webcrawlerapi.com"):
         """
         Initialize the WebCrawler API client.
         Args:
             api_key (str): Your API key for authentication
             base_url (str): The base URL of the API (optional)
-            version (str): API version to use (optional, defaults to 'v2')
+            version (str): API version to use (optional, defaults to 'v1')
         """
         self.api_key = api_key
         self.base_url = base_url.rstrip('/')
-        self.version = version
         self.session = requests.Session()
         self.session.headers.update({
             'Authorization': f'Bearer {api_key}',
@@ -86,7 +87,7 @@ class WebCrawlerAPI:
             payload["actions"] = [vars(action) for action in action_list]
         response = self.session.post(
-            urljoin(self.base_url, f"/{self.version}/crawl"),
+            urljoin(self.base_url, f"/{CRAWLER_VERSION}/crawl"),
             json=payload
         )
         response.raise_for_status()
@@ -106,7 +107,7 @@ class WebCrawlerAPI:
             requests.exceptions.RequestException: If the API request fails
         """
         response = self.session.get(
-            urljoin(self.base_url, f"/{self.version}/job/{job_id}")
+            urljoin(self.base_url, f"/{CRAWLER_VERSION}/job/{job_id}")
         )
         response.raise_for_status()
         return Job(response.json())
@@ -126,7 +127,7 @@ class WebCrawlerAPI:
             requests.exceptions.RequestException: If the API request fails
         """
         response = self.session.put(
-            urljoin(self.base_url, f"/{self.version}/job/{job_id}/cancel")
+            urljoin(self.base_url, f"/{CRAWLER_VERSION}/job/{job_id}/cancel")
         )
         response.raise_for_status()
         return response.json()
@@ -208,6 +209,7 @@ class WebCrawlerAPI:
         output_format: str = "markdown",
         webhook_url: Optional[str] = None,
         clean_selectors: Optional[str] = None,
+        prompt: Optional[str] = None,
         actions: Optional[Union[Action, List[Action]]] = None
     ) -> ScrapeId:
         """
@@ -218,6 +220,7 @@ class WebCrawlerAPI:
             output_format (str): Output format (markdown, cleaned, html)
             webhook_url (str, optional): URL to receive a POST request when scraping is complete
             clean_selectors (str, optional): CSS selectors to clean from the content
+            prompt (str, optional): Prompt to guide the AI response
             actions (Action or List[Action], optional): Actions to perform after scraping (for example S3 upload)
         Returns:
@@ -235,6 +238,8 @@ class WebCrawlerAPI:
             payload["webhook_url"] = webhook_url
         if clean_selectors:
             payload["clean_selectors"] = clean_selectors
+        if prompt:
+            payload["prompt"] = prompt
         if actions:
             # Convert single action to list if needed
             action_list = [actions] if not isinstance(actions, list) else actions
@@ -242,7 +247,7 @@ class WebCrawlerAPI:
             payload["actions"] = [vars(action) for action in action_list]
         response = self.session.post(
-            urljoin(self.base_url, f"/{self.version}/scrape?async=true"),
+            urljoin(self.base_url, f"/{SCRAPER_VERSION}/scrape?async=true"),
             json=payload
         )
@@ -275,7 +280,7 @@ class WebCrawlerAPI:
             requests.exceptions.RequestException: If the API request fails
         """
         response = self.session.get(
-            urljoin(self.base_url, f"/{self.version}/scrape/{scrape_id}")
+            urljoin(self.base_url, f"/{SCRAPER_VERSION}/scrape/{scrape_id}")
         )
         response.raise_for_status()
@@ -291,7 +296,8 @@ class WebCrawlerAPI:
                 cleaned_content=response_data.get("cleaned_content"),
                 raw_content=response_data.get("raw_content"),
                 page_status_code=response_data.get("page_status_code", 0),
-                page_title=response_data.get("page_title")
+                page_title=response_data.get("page_title"),
+                structured_data=response_data.get("structured_data")
             )
         elif status == "error":
             return ScrapeResponseError(
@@ -312,6 +318,7 @@ class WebCrawlerAPI:
         output_format: str = "markdown",
         webhook_url: Optional[str] = None,
         clean_selectors: Optional[str] = None,
+        prompt: Optional[str] = None,
         actions: Optional[Union[Action, List[Action]]] = None,
         max_polls: int = 100
     ) -> Union[ScrapeResponse, ScrapeResponseError]:
@@ -327,6 +334,7 @@ class WebCrawlerAPI:
             output_format (str): Output format (markdown, cleaned, html)
             webhook_url (str, optional): URL to receive a POST request when scraping is complete
             clean_selectors (str, optional): CSS selectors to clean from the content
+            prompt (str, optional): Prompt to guide the AI response
             actions (Action or List[Action], optional): Actions to perform during scraping
             max_polls (int): Maximum number of status checks before returning (default: 100)
@@ -342,6 +350,7 @@ class WebCrawlerAPI:
             output_format=output_format,
             webhook_url=webhook_url,
             clean_selectors=clean_selectors,
+            prompt=prompt,
             actions=actions
         )

{webcrawlerapi-2.0.1 → webcrawlerapi-2.0.4}/webcrawlerapi/models.py RENAMED Viewed

@@ -25,6 +25,7 @@ class ScrapeResponse:
     raw_content: Optional[str] = None
     page_status_code: int = 0
     page_title: Optional[str] = None
+    structured_data: Optional[Dict[str, Any]] = None
 @dataclass

{webcrawlerapi-2.0.1 → webcrawlerapi-2.0.4}/webcrawlerapi.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: webcrawlerapi
-Version: 2.0.1
+Version: 2.0.4
 Summary: Python SDK for WebCrawler API
 Home-page: https://github.com/webcrawlerapi/webcrawlerapi-python-sdk
 Author: Andrew

{webcrawlerapi-2.0.1 → webcrawlerapi-2.0.4}/README.md RENAMED Viewed

File without changes

{webcrawlerapi-2.0.1 → webcrawlerapi-2.0.4}/setup.cfg RENAMED Viewed

File without changes

{webcrawlerapi-2.0.1 → webcrawlerapi-2.0.4}/webcrawlerapi/__init__.py RENAMED Viewed

File without changes

{webcrawlerapi-2.0.1 → webcrawlerapi-2.0.4}/webcrawlerapi.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{webcrawlerapi-2.0.1 → webcrawlerapi-2.0.4}/webcrawlerapi.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{webcrawlerapi-2.0.1 → webcrawlerapi-2.0.4}/webcrawlerapi.egg-info/requires.txt RENAMED Viewed

File without changes

{webcrawlerapi-2.0.1 → webcrawlerapi-2.0.4}/webcrawlerapi.egg-info/top_level.txt RENAMED Viewed

File without changes

webcrawlerapi 2.0.1__tar.gz → 2.0.4__tar.gz

webcrawlerapi 2.0.1tar.gz → 2.0.4tar.gz