spiderforce4ai 0.1.6__tar.gz → 0.1.7__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 0.1.6
3
+ Version: 0.1.7
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "spiderforce4ai"
7
- version = "0.1.6"
7
+ version = "0.1.7"
8
8
  description = "Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service"
9
9
  readme = "README.md"
10
10
  authors = [{name = "Piotr Tamulewicz", email = "pt@petertam.pro"}]
@@ -3,7 +3,7 @@ from setuptools import setup, find_packages
3
3
 
4
4
  setup(
5
5
  name="spiderforce4ai",
6
- version="0.1.6",
6
+ version="0.1.7",
7
7
  author="Piotr Tamulewicz",
8
8
  author_email="pt@petertam.pro",
9
9
  description="Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service",
@@ -86,6 +86,31 @@ class CrawlConfig:
86
86
  payload["remove_selectors_regex"] = self.remove_selectors_regex
87
87
  return payload
88
88
 
89
+
90
+ def _send_webhook_sync(result: CrawlResult, config: CrawlConfig) -> None:
91
+ """Synchronous version of webhook sender for parallel processing."""
92
+ if not config.webhook_url:
93
+ return
94
+
95
+ payload = {
96
+ "url": result.url,
97
+ "status": result.status,
98
+ "markdown": result.markdown if result.status == "success" else None,
99
+ "error": result.error if result.status == "failed" else None,
100
+ "timestamp": result.timestamp,
101
+ "config": config.to_dict()
102
+ }
103
+
104
+ try:
105
+ response = requests.post(
106
+ config.webhook_url,
107
+ json=payload,
108
+ timeout=config.webhook_timeout
109
+ )
110
+ response.raise_for_status()
111
+ except Exception as e:
112
+ print(f"Warning: Failed to send webhook for {result.url}: {str(e)}")
113
+
89
114
  # Module level function for multiprocessing
90
115
  def _process_url_parallel(args: Tuple[str, str, CrawlConfig]) -> CrawlResult:
91
116
  """Process a single URL for parallel processing."""
@@ -99,12 +124,15 @@ def _process_url_parallel(args: Tuple[str, str, CrawlConfig]) -> CrawlResult:
99
124
 
100
125
  response = requests.post(endpoint, json=payload, timeout=config.timeout)
101
126
  if response.status_code != 200:
102
- return CrawlResult(
127
+ result = CrawlResult(
103
128
  url=url,
104
129
  status="failed",
105
130
  error=f"HTTP {response.status_code}: {response.text}",
106
131
  config=config.to_dict()
107
132
  )
133
+ # Send webhook for failed result
134
+ _send_webhook_sync(result, config)
135
+ return result
108
136
 
109
137
  markdown = response.text
110
138
 
@@ -114,24 +142,32 @@ def _process_url_parallel(args: Tuple[str, str, CrawlConfig]) -> CrawlResult:
114
142
  with open(filepath, 'w', encoding='utf-8') as f:
115
143
  f.write(markdown)
116
144
 
117
- # Add delay if configured
118
- if config.request_delay:
119
- time.sleep(config.request_delay)
120
-
121
- return CrawlResult(
145
+ result = CrawlResult(
122
146
  url=url,
123
147
  status="success",
124
148
  markdown=markdown,
125
149
  config=config.to_dict()
126
150
  )
151
+
152
+ # Send webhook for successful result
153
+ _send_webhook_sync(result, config)
154
+
155
+ # Add delay if configured
156
+ if config.request_delay:
157
+ time.sleep(config.request_delay)
158
+
159
+ return result
127
160
 
128
161
  except Exception as e:
129
- return CrawlResult(
162
+ result = CrawlResult(
130
163
  url=url,
131
164
  status="failed",
132
165
  error=str(e),
133
166
  config=config.to_dict()
134
167
  )
168
+ # Send webhook for error result
169
+ _send_webhook_sync(result, config)
170
+ return result
135
171
 
136
172
  class SpiderForce4AI:
137
173
  """Main class for interacting with SpiderForce4AI service."""
@@ -424,4 +460,5 @@ class SpiderForce4AI:
424
460
 
425
461
  def __exit__(self, exc_type, exc_val, exc_tb):
426
462
  """Sync context manager exit."""
427
- self._executor.shutdown(wait=True)
463
+ self._executor.shutdown(wait=True)
464
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 0.1.6
3
+ Version: 0.1.7
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
File without changes
File without changes