spiderforce4ai 0.1.6__tar.gz → 0.1.7__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {spiderforce4ai-0.1.6 → spiderforce4ai-0.1.7}/PKG-INFO +1 -1
- {spiderforce4ai-0.1.6 → spiderforce4ai-0.1.7}/pyproject.toml +1 -1
- {spiderforce4ai-0.1.6 → spiderforce4ai-0.1.7}/setup.py +1 -1
- {spiderforce4ai-0.1.6 → spiderforce4ai-0.1.7}/spiderforce4ai/__init__.py +45 -8
- {spiderforce4ai-0.1.6 → spiderforce4ai-0.1.7}/spiderforce4ai.egg-info/PKG-INFO +1 -1
- {spiderforce4ai-0.1.6 → spiderforce4ai-0.1.7}/README.md +0 -0
- {spiderforce4ai-0.1.6 → spiderforce4ai-0.1.7}/setup.cfg +0 -0
- {spiderforce4ai-0.1.6 → spiderforce4ai-0.1.7}/spiderforce4ai.egg-info/SOURCES.txt +0 -0
- {spiderforce4ai-0.1.6 → spiderforce4ai-0.1.7}/spiderforce4ai.egg-info/dependency_links.txt +0 -0
- {spiderforce4ai-0.1.6 → spiderforce4ai-0.1.7}/spiderforce4ai.egg-info/requires.txt +0 -0
- {spiderforce4ai-0.1.6 → spiderforce4ai-0.1.7}/spiderforce4ai.egg-info/top_level.txt +0 -0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "spiderforce4ai"
|
7
|
-
version = "0.1.
|
7
|
+
version = "0.1.7"
|
8
8
|
description = "Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service"
|
9
9
|
readme = "README.md"
|
10
10
|
authors = [{name = "Piotr Tamulewicz", email = "pt@petertam.pro"}]
|
@@ -86,6 +86,31 @@ class CrawlConfig:
|
|
86
86
|
payload["remove_selectors_regex"] = self.remove_selectors_regex
|
87
87
|
return payload
|
88
88
|
|
89
|
+
|
90
|
+
def _send_webhook_sync(result: CrawlResult, config: CrawlConfig) -> None:
|
91
|
+
"""Synchronous version of webhook sender for parallel processing."""
|
92
|
+
if not config.webhook_url:
|
93
|
+
return
|
94
|
+
|
95
|
+
payload = {
|
96
|
+
"url": result.url,
|
97
|
+
"status": result.status,
|
98
|
+
"markdown": result.markdown if result.status == "success" else None,
|
99
|
+
"error": result.error if result.status == "failed" else None,
|
100
|
+
"timestamp": result.timestamp,
|
101
|
+
"config": config.to_dict()
|
102
|
+
}
|
103
|
+
|
104
|
+
try:
|
105
|
+
response = requests.post(
|
106
|
+
config.webhook_url,
|
107
|
+
json=payload,
|
108
|
+
timeout=config.webhook_timeout
|
109
|
+
)
|
110
|
+
response.raise_for_status()
|
111
|
+
except Exception as e:
|
112
|
+
print(f"Warning: Failed to send webhook for {result.url}: {str(e)}")
|
113
|
+
|
89
114
|
# Module level function for multiprocessing
|
90
115
|
def _process_url_parallel(args: Tuple[str, str, CrawlConfig]) -> CrawlResult:
|
91
116
|
"""Process a single URL for parallel processing."""
|
@@ -99,12 +124,15 @@ def _process_url_parallel(args: Tuple[str, str, CrawlConfig]) -> CrawlResult:
|
|
99
124
|
|
100
125
|
response = requests.post(endpoint, json=payload, timeout=config.timeout)
|
101
126
|
if response.status_code != 200:
|
102
|
-
|
127
|
+
result = CrawlResult(
|
103
128
|
url=url,
|
104
129
|
status="failed",
|
105
130
|
error=f"HTTP {response.status_code}: {response.text}",
|
106
131
|
config=config.to_dict()
|
107
132
|
)
|
133
|
+
# Send webhook for failed result
|
134
|
+
_send_webhook_sync(result, config)
|
135
|
+
return result
|
108
136
|
|
109
137
|
markdown = response.text
|
110
138
|
|
@@ -114,24 +142,32 @@ def _process_url_parallel(args: Tuple[str, str, CrawlConfig]) -> CrawlResult:
|
|
114
142
|
with open(filepath, 'w', encoding='utf-8') as f:
|
115
143
|
f.write(markdown)
|
116
144
|
|
117
|
-
|
118
|
-
if config.request_delay:
|
119
|
-
time.sleep(config.request_delay)
|
120
|
-
|
121
|
-
return CrawlResult(
|
145
|
+
result = CrawlResult(
|
122
146
|
url=url,
|
123
147
|
status="success",
|
124
148
|
markdown=markdown,
|
125
149
|
config=config.to_dict()
|
126
150
|
)
|
151
|
+
|
152
|
+
# Send webhook for successful result
|
153
|
+
_send_webhook_sync(result, config)
|
154
|
+
|
155
|
+
# Add delay if configured
|
156
|
+
if config.request_delay:
|
157
|
+
time.sleep(config.request_delay)
|
158
|
+
|
159
|
+
return result
|
127
160
|
|
128
161
|
except Exception as e:
|
129
|
-
|
162
|
+
result = CrawlResult(
|
130
163
|
url=url,
|
131
164
|
status="failed",
|
132
165
|
error=str(e),
|
133
166
|
config=config.to_dict()
|
134
167
|
)
|
168
|
+
# Send webhook for error result
|
169
|
+
_send_webhook_sync(result, config)
|
170
|
+
return result
|
135
171
|
|
136
172
|
class SpiderForce4AI:
|
137
173
|
"""Main class for interacting with SpiderForce4AI service."""
|
@@ -424,4 +460,5 @@ class SpiderForce4AI:
|
|
424
460
|
|
425
461
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
426
462
|
"""Sync context manager exit."""
|
427
|
-
self._executor.shutdown(wait=True)
|
463
|
+
self._executor.shutdown(wait=True)
|
464
|
+
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|