spiderforce4ai 2.4.3__py3-none-any.whl → 2.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spiderforce4ai/__init__.py +1 -1
- spiderforce4ai/post_extraction_agent.py +24 -11
- {spiderforce4ai-2.4.3.dist-info → spiderforce4ai-2.4.5.dist-info}/METADATA +1 -1
- spiderforce4ai-2.4.5.dist-info/RECORD +7 -0
- spiderforce4ai-2.4.3.dist-info/RECORD +0 -7
- {spiderforce4ai-2.4.3.dist-info → spiderforce4ai-2.4.5.dist-info}/WHEEL +0 -0
- {spiderforce4ai-2.4.3.dist-info → spiderforce4ai-2.4.5.dist-info}/entry_points.txt +0 -0
- {spiderforce4ai-2.4.3.dist-info → spiderforce4ai-2.4.5.dist-info}/top_level.txt +0 -0
spiderforce4ai/__init__.py
CHANGED
@@ -586,7 +586,7 @@ class SpiderForce4AI:
|
|
586
586
|
for result in results:
|
587
587
|
if result.status == "success":
|
588
588
|
try:
|
589
|
-
result.extraction_result =
|
589
|
+
result.extraction_result = agent.process_content(result.url, result.markdown)
|
590
590
|
progress.update(llm_task, advance=1)
|
591
591
|
except Exception as e:
|
592
592
|
console.print(f"[red]Error in post-extraction processing for {result.url}: {str(e)}[/red]")
|
@@ -164,12 +164,9 @@ class PostExtractionAgent:
|
|
164
164
|
self.config.output_file.rename(backup_path)
|
165
165
|
self.config.output_file.touch()
|
166
166
|
|
167
|
-
|
167
|
+
def _process_single_content(self, url: str, content: str) -> Optional[Dict]:
|
168
168
|
"""Process a single piece of content through the LLM."""
|
169
169
|
try:
|
170
|
-
# Apply rate limiting
|
171
|
-
await self.rate_limiter.acquire()
|
172
|
-
|
173
170
|
# Replace placeholder in messages with actual content
|
174
171
|
messages = [
|
175
172
|
{**msg, 'content': msg['content'].replace('{here_markdown_content}', content)}
|
@@ -183,7 +180,8 @@ class PostExtractionAgent:
|
|
183
180
|
|
184
181
|
for attempt in range(max_retries):
|
185
182
|
try:
|
186
|
-
|
183
|
+
# Call completion synchronously
|
184
|
+
response = completion(
|
187
185
|
model=self.config.model,
|
188
186
|
messages=messages,
|
189
187
|
max_tokens=self.config.max_tokens,
|
@@ -200,11 +198,11 @@ class PostExtractionAgent:
|
|
200
198
|
except json.JSONDecodeError as e:
|
201
199
|
last_error = f"Invalid JSON response from LLM: {e}"
|
202
200
|
if attempt < max_retries - 1:
|
203
|
-
|
201
|
+
time.sleep(retry_delay * (attempt + 1))
|
204
202
|
except Exception as e:
|
205
203
|
last_error = str(e)
|
206
204
|
if attempt < max_retries - 1:
|
207
|
-
|
205
|
+
time.sleep(retry_delay * (attempt + 1))
|
208
206
|
|
209
207
|
# If we get here, all retries failed
|
210
208
|
raise Exception(last_error)
|
@@ -214,6 +212,20 @@ class PostExtractionAgent:
|
|
214
212
|
self.buffer.add_failed_request(url, content, str(e))
|
215
213
|
return None
|
216
214
|
|
215
|
+
def _save_result_sync(self, url: str, result: Dict) -> None:
|
216
|
+
"""Save individual or combined results synchronously."""
|
217
|
+
try:
|
218
|
+
if self.config.combine_output and self.config.output_file:
|
219
|
+
self.results[url] = result
|
220
|
+
with open(self.config.output_file, 'w') as f:
|
221
|
+
json.dump(self.results, f, indent=2)
|
222
|
+
elif not self.config.combine_output and self.config.output_file:
|
223
|
+
individual_file = self.config.output_file.parent / f"{url.replace('/', '_')}.json"
|
224
|
+
with open(individual_file, 'w') as f:
|
225
|
+
json.dump(result, f, indent=2)
|
226
|
+
except Exception as e:
|
227
|
+
logger.error(f"Error saving results for {url}: {str(e)}")
|
228
|
+
|
217
229
|
async def _save_result(self, url: str, result: Dict) -> None:
|
218
230
|
"""Save individual or combined results."""
|
219
231
|
try:
|
@@ -228,10 +240,10 @@ class PostExtractionAgent:
|
|
228
240
|
except Exception as e:
|
229
241
|
logger.error(f"Error saving results for {url}: {str(e)}")
|
230
242
|
|
231
|
-
|
243
|
+
def process_content(self, url: str, content: str) -> Optional[Dict]:
|
232
244
|
"""Process content with retry mechanism."""
|
233
245
|
for attempt in range(self.config.max_retries):
|
234
|
-
result =
|
246
|
+
result = self._process_single_content(url, content)
|
235
247
|
if result:
|
236
248
|
# Apply custom transformation if provided
|
237
249
|
if self.config.custom_transform_function:
|
@@ -240,12 +252,13 @@ class PostExtractionAgent:
|
|
240
252
|
except Exception as e:
|
241
253
|
logger.error(f"Error in custom transform for {url}: {str(e)}")
|
242
254
|
|
243
|
-
|
255
|
+
# Save result synchronously
|
256
|
+
self._save_result_sync(url, result)
|
244
257
|
return result
|
245
258
|
|
246
259
|
# Wait before retry
|
247
260
|
if attempt < self.config.max_retries - 1:
|
248
|
-
|
261
|
+
time.sleep(self.config.retry_delay)
|
249
262
|
|
250
263
|
return None
|
251
264
|
|
@@ -0,0 +1,7 @@
|
|
1
|
+
spiderforce4ai/__init__.py,sha256=PPpJLowJhgoRijsF2ebmdkFbIriI_yIFlCi1wL6hSP8,42267
|
2
|
+
spiderforce4ai/post_extraction_agent.py,sha256=t9KxjuNw16-6kige6ULPLyykNkiGmKhpCi8QjskdaTk,11959
|
3
|
+
spiderforce4ai-2.4.5.dist-info/METADATA,sha256=q3VBuGb5wxsi9OPkzEMwFMyg9f_vT2RamWYIgu2JbLc,9012
|
4
|
+
spiderforce4ai-2.4.5.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
5
|
+
spiderforce4ai-2.4.5.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
|
6
|
+
spiderforce4ai-2.4.5.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
7
|
+
spiderforce4ai-2.4.5.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
spiderforce4ai/__init__.py,sha256=iwCLSvooHtFAo-rU52-nsFgyn99Dflpt_OpSrIW-PqA,42273
|
2
|
-
spiderforce4ai/post_extraction_agent.py,sha256=m00-y0SCoutUnxsMwHxPaW-qRm4o5alQWjggDStUSrg,11249
|
3
|
-
spiderforce4ai-2.4.3.dist-info/METADATA,sha256=-i_vH6DDs4xVFVdDfaFG_Xka0pqXCSQdCrKgym5r5b0,9012
|
4
|
-
spiderforce4ai-2.4.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
5
|
-
spiderforce4ai-2.4.3.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
|
6
|
-
spiderforce4ai-2.4.3.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
7
|
-
spiderforce4ai-2.4.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|