PyPI - webcrawlerapi - Versions diffs - 2.0.4__tar.gz → 2.0.5__tar.gz - Mend

webcrawlerapi 2.0.4tar.gz → 2.0.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

{webcrawlerapi-2.0.4 → webcrawlerapi-2.0.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: webcrawlerapi
-Version: 2.0.4
+Version: 2.0.5
 Summary: Python SDK for WebCrawler API
 Home-page: https://github.com/webcrawlerapi/webcrawlerapi-python-sdk
 Author: Andrew
@@ -101,11 +101,11 @@ print(f"Cancellation response: {cancel_response['message']}")
 ```
 ### Scraping
+Check a working code example of [scraping](https://github.com/WebCrawlerAPI/webcrawlerapi-examples/tree/master/python/scraping) and [scraping with a prompt](https://github.com/WebCrawlerAPI/webcrawlerapi-examples/tree/master/python/scraping_prompt)
 ```python
 # Returns structured data directly
 response = crawler.scrape(
-    "url": "https://webcrawlerapi.com"
+    url="https://webcrawlerapi.com"
 )
 if response.success:
     print(response.markdown)

{webcrawlerapi-2.0.4 → webcrawlerapi-2.0.5}/README.md RENAMED Viewed

@@ -80,11 +80,11 @@ print(f"Cancellation response: {cancel_response['message']}")
 ```
 ### Scraping
+Check a working code example of [scraping](https://github.com/WebCrawlerAPI/webcrawlerapi-examples/tree/master/python/scraping) and [scraping with a prompt](https://github.com/WebCrawlerAPI/webcrawlerapi-examples/tree/master/python/scraping_prompt)
 ```python
 # Returns structured data directly
 response = crawler.scrape(
-    "url": "https://webcrawlerapi.com"
+    url="https://webcrawlerapi.com"
 )
 if response.success:
     print(response.markdown)

{webcrawlerapi-2.0.4 → webcrawlerapi-2.0.5}/setup.py RENAMED Viewed

@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
 setup(
     name="webcrawlerapi",
-    version="2.0.4",
+    version="2.0.5",
     packages=find_packages(),
     install_requires=[
         "requests>=2.25.0",

{webcrawlerapi-2.0.4 → webcrawlerapi-2.0.5}/webcrawlerapi/models.py RENAMED Viewed

@@ -1,6 +1,34 @@
 from typing import Optional, Dict, Any, List
 from datetime import datetime
 from dataclasses import dataclass
+import re
+def parse_datetime(datetime_str: str) -> datetime:
+    """
+    Parse datetime string from API response, handling various microsecond formats.
+    Args:
+        datetime_str (str): Datetime string from API
+    Returns:
+        datetime: Parsed datetime object
+    """
+    # Replace 'Z' with '+00:00' for timezone
+    datetime_str = datetime_str.replace('Z', '+00:00')
+    # Handle microseconds - pad to 6 digits or remove if present
+    # Pattern matches: YYYY-MM-DDTHH:MM:SS.microseconds followed by timezone or end
+    pattern = r'(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})\.(\d+)(.*)'
+    match = re.match(pattern, datetime_str)
+    if match:
+        base_time, microseconds, timezone_part = match.groups()
+        # Pad microseconds to 6 digits or truncate if longer
+        microseconds = microseconds.ljust(6, '0')[:6]
+        datetime_str = f"{base_time}.{microseconds}{timezone_part}"
+    return datetime.fromisoformat(datetime_str)
 @dataclass
@@ -78,8 +106,8 @@ class JobItem:
         self.page_status_code: int = data["page_status_code"]
         self.status: str = data["status"]
         self.title: str = data["title"]
-        self.created_at: datetime = datetime.fromisoformat(data["created_at"].replace('Z', '+00:00'))
-        self.updated_at: datetime = datetime.fromisoformat(data["updated_at"].replace('Z', '+00:00'))
+        self.created_at: datetime = parse_datetime(data["created_at"])
+        self.updated_at: datetime = parse_datetime(data["updated_at"])
         self.cost: int = data.get("cost", 0)
         self.referred_url: Optional[str] = data.get("referred_url")
         self.last_error: Optional[str] = data.get("last_error")
@@ -157,17 +185,17 @@ class Job:
         self.scrape_type: str = data["scrape_type"]
         self.whitelist_regexp: Optional[str] = data.get("whitelist_regexp")
         self.blacklist_regexp: Optional[str] = data.get("blacklist_regexp")
-        self.allow_subdomains: bool = data["allow_subdomains"]
+        self.allow_subdomains: bool = data.get("allow_subdomains", False)
         self.items_limit: int = data["items_limit"]
-        self.created_at: datetime = datetime.fromisoformat(data["created_at"].replace('Z', '+00:00'))
-        self.updated_at: datetime = datetime.fromisoformat(data["updated_at"].replace('Z', '+00:00'))
+        self.created_at: datetime = parse_datetime(data["created_at"])
+        self.updated_at: datetime = parse_datetime(data["updated_at"])
         self.webhook_url: Optional[str] = data.get("webhook_url")
         self.recommended_pull_delay_ms: int = data.get("recommended_pull_delay_ms", 0)
         # Optional fields
         self.finished_at: Optional[datetime] = None
         if data.get("finished_at"):
-            self.finished_at = datetime.fromisoformat(data["finished_at"].replace('Z', '+00:00'))
+            self.finished_at = parse_datetime(data["finished_at"])
         self.webhook_status: Optional[str] = data.get("webhook_status")
         self.webhook_error: Optional[str] = data.get("webhook_error")

{webcrawlerapi-2.0.4 → webcrawlerapi-2.0.5}/webcrawlerapi.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: webcrawlerapi
-Version: 2.0.4
+Version: 2.0.5
 Summary: Python SDK for WebCrawler API
 Home-page: https://github.com/webcrawlerapi/webcrawlerapi-python-sdk
 Author: Andrew
@@ -101,11 +101,11 @@ print(f"Cancellation response: {cancel_response['message']}")
 ```
 ### Scraping
+Check a working code example of [scraping](https://github.com/WebCrawlerAPI/webcrawlerapi-examples/tree/master/python/scraping) and [scraping with a prompt](https://github.com/WebCrawlerAPI/webcrawlerapi-examples/tree/master/python/scraping_prompt)
 ```python
 # Returns structured data directly
 response = crawler.scrape(
-    "url": "https://webcrawlerapi.com"
+    url="https://webcrawlerapi.com"
 )
 if response.success:
     print(response.markdown)

{webcrawlerapi-2.0.4 → webcrawlerapi-2.0.5}/setup.cfg RENAMED Viewed

File without changes

{webcrawlerapi-2.0.4 → webcrawlerapi-2.0.5}/webcrawlerapi/__init__.py RENAMED Viewed

File without changes

{webcrawlerapi-2.0.4 → webcrawlerapi-2.0.5}/webcrawlerapi/client.py RENAMED Viewed

File without changes

{webcrawlerapi-2.0.4 → webcrawlerapi-2.0.5}/webcrawlerapi.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{webcrawlerapi-2.0.4 → webcrawlerapi-2.0.5}/webcrawlerapi.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{webcrawlerapi-2.0.4 → webcrawlerapi-2.0.5}/webcrawlerapi.egg-info/requires.txt RENAMED Viewed

File without changes

{webcrawlerapi-2.0.4 → webcrawlerapi-2.0.5}/webcrawlerapi.egg-info/top_level.txt RENAMED Viewed

File without changes

webcrawlerapi 2.0.4__tar.gz → 2.0.5__tar.gz

webcrawlerapi 2.0.4tar.gz → 2.0.5tar.gz