webcrawlerapi 2.0.4__tar.gz → 2.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: webcrawlerapi
3
- Version: 2.0.4
3
+ Version: 2.0.5
4
4
  Summary: Python SDK for WebCrawler API
5
5
  Home-page: https://github.com/webcrawlerapi/webcrawlerapi-python-sdk
6
6
  Author: Andrew
@@ -101,11 +101,11 @@ print(f"Cancellation response: {cancel_response['message']}")
101
101
  ```
102
102
 
103
103
  ### Scraping
104
-
104
+ Check a working code example of [scraping](https://github.com/WebCrawlerAPI/webcrawlerapi-examples/tree/master/python/scraping) and [scraping with a prompt](https://github.com/WebCrawlerAPI/webcrawlerapi-examples/tree/master/python/scraping_prompt)
105
105
  ```python
106
106
  # Returns structured data directly
107
107
  response = crawler.scrape(
108
- "url": "https://webcrawlerapi.com"
108
+ url="https://webcrawlerapi.com"
109
109
  )
110
110
  if response.success:
111
111
  print(response.markdown)
@@ -80,11 +80,11 @@ print(f"Cancellation response: {cancel_response['message']}")
80
80
  ```
81
81
 
82
82
  ### Scraping
83
-
83
+ Check a working code example of [scraping](https://github.com/WebCrawlerAPI/webcrawlerapi-examples/tree/master/python/scraping) and [scraping with a prompt](https://github.com/WebCrawlerAPI/webcrawlerapi-examples/tree/master/python/scraping_prompt)
84
84
  ```python
85
85
  # Returns structured data directly
86
86
  response = crawler.scrape(
87
- "url": "https://webcrawlerapi.com"
87
+ url="https://webcrawlerapi.com"
88
88
  )
89
89
  if response.success:
90
90
  print(response.markdown)
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name="webcrawlerapi",
5
- version="2.0.4",
5
+ version="2.0.5",
6
6
  packages=find_packages(),
7
7
  install_requires=[
8
8
  "requests>=2.25.0",
@@ -1,6 +1,34 @@
1
1
  from typing import Optional, Dict, Any, List
2
2
  from datetime import datetime
3
3
  from dataclasses import dataclass
4
+ import re
5
+
6
+
7
+ def parse_datetime(datetime_str: str) -> datetime:
8
+ """
9
+ Parse datetime string from API response, handling various microsecond formats.
10
+
11
+ Args:
12
+ datetime_str (str): Datetime string from API
13
+
14
+ Returns:
15
+ datetime: Parsed datetime object
16
+ """
17
+ # Replace 'Z' with '+00:00' for timezone
18
+ datetime_str = datetime_str.replace('Z', '+00:00')
19
+
20
+ # Handle microseconds - pad to 6 digits or remove if present
21
+ # Pattern matches: YYYY-MM-DDTHH:MM:SS.microseconds followed by timezone or end
22
+ pattern = r'(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})\.(\d+)(.*)'
23
+ match = re.match(pattern, datetime_str)
24
+
25
+ if match:
26
+ base_time, microseconds, timezone_part = match.groups()
27
+ # Pad microseconds to 6 digits or truncate if longer
28
+ microseconds = microseconds.ljust(6, '0')[:6]
29
+ datetime_str = f"{base_time}.{microseconds}{timezone_part}"
30
+
31
+ return datetime.fromisoformat(datetime_str)
4
32
 
5
33
 
6
34
  @dataclass
@@ -78,8 +106,8 @@ class JobItem:
78
106
  self.page_status_code: int = data["page_status_code"]
79
107
  self.status: str = data["status"]
80
108
  self.title: str = data["title"]
81
- self.created_at: datetime = datetime.fromisoformat(data["created_at"].replace('Z', '+00:00'))
82
- self.updated_at: datetime = datetime.fromisoformat(data["updated_at"].replace('Z', '+00:00'))
109
+ self.created_at: datetime = parse_datetime(data["created_at"])
110
+ self.updated_at: datetime = parse_datetime(data["updated_at"])
83
111
  self.cost: int = data.get("cost", 0)
84
112
  self.referred_url: Optional[str] = data.get("referred_url")
85
113
  self.last_error: Optional[str] = data.get("last_error")
@@ -157,17 +185,17 @@ class Job:
157
185
  self.scrape_type: str = data["scrape_type"]
158
186
  self.whitelist_regexp: Optional[str] = data.get("whitelist_regexp")
159
187
  self.blacklist_regexp: Optional[str] = data.get("blacklist_regexp")
160
- self.allow_subdomains: bool = data["allow_subdomains"]
188
+ self.allow_subdomains: bool = data.get("allow_subdomains", False)
161
189
  self.items_limit: int = data["items_limit"]
162
- self.created_at: datetime = datetime.fromisoformat(data["created_at"].replace('Z', '+00:00'))
163
- self.updated_at: datetime = datetime.fromisoformat(data["updated_at"].replace('Z', '+00:00'))
190
+ self.created_at: datetime = parse_datetime(data["created_at"])
191
+ self.updated_at: datetime = parse_datetime(data["updated_at"])
164
192
  self.webhook_url: Optional[str] = data.get("webhook_url")
165
193
  self.recommended_pull_delay_ms: int = data.get("recommended_pull_delay_ms", 0)
166
194
 
167
195
  # Optional fields
168
196
  self.finished_at: Optional[datetime] = None
169
197
  if data.get("finished_at"):
170
- self.finished_at = datetime.fromisoformat(data["finished_at"].replace('Z', '+00:00'))
198
+ self.finished_at = parse_datetime(data["finished_at"])
171
199
 
172
200
  self.webhook_status: Optional[str] = data.get("webhook_status")
173
201
  self.webhook_error: Optional[str] = data.get("webhook_error")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: webcrawlerapi
3
- Version: 2.0.4
3
+ Version: 2.0.5
4
4
  Summary: Python SDK for WebCrawler API
5
5
  Home-page: https://github.com/webcrawlerapi/webcrawlerapi-python-sdk
6
6
  Author: Andrew
@@ -101,11 +101,11 @@ print(f"Cancellation response: {cancel_response['message']}")
101
101
  ```
102
102
 
103
103
  ### Scraping
104
-
104
+ Check a working code example of [scraping](https://github.com/WebCrawlerAPI/webcrawlerapi-examples/tree/master/python/scraping) and [scraping with a prompt](https://github.com/WebCrawlerAPI/webcrawlerapi-examples/tree/master/python/scraping_prompt)
105
105
  ```python
106
106
  # Returns structured data directly
107
107
  response = crawler.scrape(
108
- "url": "https://webcrawlerapi.com"
108
+ url="https://webcrawlerapi.com"
109
109
  )
110
110
  if response.success:
111
111
  print(response.markdown)
File without changes