PyPI - webcrawlerapi - Versions diffs - 2.0.3__tar.gz → 2.0.4__tar.gz - Mend

webcrawlerapi 2.0.3tar.gz → 2.0.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

{webcrawlerapi-2.0.3 → webcrawlerapi-2.0.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: webcrawlerapi
-Version: 2.0.3
+Version: 2.0.4
 Summary: Python SDK for WebCrawler API
 Home-page: https://github.com/webcrawlerapi/webcrawlerapi-python-sdk
 Author: Andrew

{webcrawlerapi-2.0.3 → webcrawlerapi-2.0.4}/setup.py RENAMED Viewed

@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
 setup(
     name="webcrawlerapi",
-    version="2.0.3",
+    version="2.0.4",
     packages=find_packages(),
     install_requires=[
         "requests>=2.25.0",

{webcrawlerapi-2.0.3 → webcrawlerapi-2.0.4}/webcrawlerapi/client.py RENAMED Viewed

@@ -12,13 +12,15 @@ from .models import (
     Action,
 )
+CRAWLER_VERSION = "v1"
+SCRAPER_VERSION = "v2"
 class WebCrawlerAPI:
     """Python SDK for WebCrawler API."""
     DEFAULT_POLL_DELAY_SECONDS = 5
-    def __init__(self, api_key: str, base_url: str = "https://api.webcrawlerapi.com", version: str = "v1"):
+    def __init__(self, api_key: str, base_url: str = "https://api.webcrawlerapi.com"):
         """
         Initialize the WebCrawler API client.
@@ -29,7 +31,6 @@ class WebCrawlerAPI:
         """
         self.api_key = api_key
         self.base_url = base_url.rstrip('/')
-        self.version = version
         self.session = requests.Session()
         self.session.headers.update({
             'Authorization': f'Bearer {api_key}',
@@ -86,7 +87,7 @@ class WebCrawlerAPI:
             payload["actions"] = [vars(action) for action in action_list]
         response = self.session.post(
-            urljoin(self.base_url, f"/{self.version}/crawl"),
+            urljoin(self.base_url, f"/{CRAWLER_VERSION}/crawl"),
             json=payload
         )
         response.raise_for_status()
@@ -106,7 +107,7 @@ class WebCrawlerAPI:
             requests.exceptions.RequestException: If the API request fails
         """
         response = self.session.get(
-            urljoin(self.base_url, f"/{self.version}/job/{job_id}")
+            urljoin(self.base_url, f"/{CRAWLER_VERSION}/job/{job_id}")
         )
         response.raise_for_status()
         return Job(response.json())
@@ -126,7 +127,7 @@ class WebCrawlerAPI:
             requests.exceptions.RequestException: If the API request fails
         """
         response = self.session.put(
-            urljoin(self.base_url, f"/{self.version}/job/{job_id}/cancel")
+            urljoin(self.base_url, f"/{CRAWLER_VERSION}/job/{job_id}/cancel")
         )
         response.raise_for_status()
         return response.json()
@@ -246,7 +247,7 @@ class WebCrawlerAPI:
             payload["actions"] = [vars(action) for action in action_list]
         response = self.session.post(
-            urljoin(self.base_url, f"/{self.version}/scrape?async=true"),
+            urljoin(self.base_url, f"/{SCRAPER_VERSION}/scrape?async=true"),
             json=payload
         )
@@ -279,7 +280,7 @@ class WebCrawlerAPI:
             requests.exceptions.RequestException: If the API request fails
         """
         response = self.session.get(
-            urljoin(self.base_url, f"/{self.version}/scrape/{scrape_id}")
+            urljoin(self.base_url, f"/{SCRAPER_VERSION}/scrape/{scrape_id}")
         )
         response.raise_for_status()

{webcrawlerapi-2.0.3 → webcrawlerapi-2.0.4}/webcrawlerapi/models.py RENAMED Viewed

@@ -1,34 +1,6 @@
 from typing import Optional, Dict, Any, List
 from datetime import datetime
 from dataclasses import dataclass
-import re
-def parse_datetime(datetime_str: str) -> datetime:
-    """
-    Parse datetime string from API response, handling various microsecond formats.
-    Args:
-        datetime_str (str): Datetime string from API
-    Returns:
-        datetime: Parsed datetime object
-    """
-    # Replace 'Z' with '+00:00' for timezone
-    datetime_str = datetime_str.replace('Z', '+00:00')
-    # Handle microseconds - pad to 6 digits or remove if present
-    # Pattern matches: YYYY-MM-DDTHH:MM:SS.microseconds followed by timezone or end
-    pattern = r'(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})\.(\d+)(.*)'
-    match = re.match(pattern, datetime_str)
-    if match:
-        base_time, microseconds, timezone_part = match.groups()
-        # Pad microseconds to 6 digits or truncate if longer
-        microseconds = microseconds.ljust(6, '0')[:6]
-        datetime_str = f"{base_time}.{microseconds}{timezone_part}"
-    return datetime.fromisoformat(datetime_str)
 @dataclass
@@ -106,8 +78,8 @@ class JobItem:
         self.page_status_code: int = data["page_status_code"]
         self.status: str = data["status"]
         self.title: str = data["title"]
-        self.created_at: datetime = parse_datetime(data["created_at"])
-        self.updated_at: datetime = parse_datetime(data["updated_at"])
+        self.created_at: datetime = datetime.fromisoformat(data["created_at"].replace('Z', '+00:00'))
+        self.updated_at: datetime = datetime.fromisoformat(data["updated_at"].replace('Z', '+00:00'))
         self.cost: int = data.get("cost", 0)
         self.referred_url: Optional[str] = data.get("referred_url")
         self.last_error: Optional[str] = data.get("last_error")
@@ -185,17 +157,17 @@ class Job:
         self.scrape_type: str = data["scrape_type"]
         self.whitelist_regexp: Optional[str] = data.get("whitelist_regexp")
         self.blacklist_regexp: Optional[str] = data.get("blacklist_regexp")
-        self.allow_subdomains: bool = data.get("allow_subdomains", False)
+        self.allow_subdomains: bool = data["allow_subdomains"]
         self.items_limit: int = data["items_limit"]
-        self.created_at: datetime = parse_datetime(data["created_at"])
-        self.updated_at: datetime = parse_datetime(data["updated_at"])
+        self.created_at: datetime = datetime.fromisoformat(data["created_at"].replace('Z', '+00:00'))
+        self.updated_at: datetime = datetime.fromisoformat(data["updated_at"].replace('Z', '+00:00'))
         self.webhook_url: Optional[str] = data.get("webhook_url")
         self.recommended_pull_delay_ms: int = data.get("recommended_pull_delay_ms", 0)
         # Optional fields
         self.finished_at: Optional[datetime] = None
         if data.get("finished_at"):
-            self.finished_at = parse_datetime(data["finished_at"])
+            self.finished_at = datetime.fromisoformat(data["finished_at"].replace('Z', '+00:00'))
         self.webhook_status: Optional[str] = data.get("webhook_status")
         self.webhook_error: Optional[str] = data.get("webhook_error")

{webcrawlerapi-2.0.3 → webcrawlerapi-2.0.4}/webcrawlerapi.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: webcrawlerapi
-Version: 2.0.3
+Version: 2.0.4
 Summary: Python SDK for WebCrawler API
 Home-page: https://github.com/webcrawlerapi/webcrawlerapi-python-sdk
 Author: Andrew

{webcrawlerapi-2.0.3 → webcrawlerapi-2.0.4}/README.md RENAMED Viewed

File without changes

{webcrawlerapi-2.0.3 → webcrawlerapi-2.0.4}/setup.cfg RENAMED Viewed

File without changes

{webcrawlerapi-2.0.3 → webcrawlerapi-2.0.4}/webcrawlerapi/__init__.py RENAMED Viewed

File without changes

{webcrawlerapi-2.0.3 → webcrawlerapi-2.0.4}/webcrawlerapi.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{webcrawlerapi-2.0.3 → webcrawlerapi-2.0.4}/webcrawlerapi.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{webcrawlerapi-2.0.3 → webcrawlerapi-2.0.4}/webcrawlerapi.egg-info/requires.txt RENAMED Viewed

File without changes

{webcrawlerapi-2.0.3 → webcrawlerapi-2.0.4}/webcrawlerapi.egg-info/top_level.txt RENAMED Viewed

File without changes

webcrawlerapi 2.0.3__tar.gz → 2.0.4__tar.gz

webcrawlerapi 2.0.3tar.gz → 2.0.4tar.gz