spiderforce4ai 2.5.7__py3-none-any.whl → 2.5.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spiderforce4ai/post_extraction_agent.py +31 -4
- {spiderforce4ai-2.5.7.dist-info → spiderforce4ai-2.5.9.dist-info}/METADATA +1 -1
- spiderforce4ai-2.5.9.dist-info/RECORD +7 -0
- spiderforce4ai-2.5.7.dist-info/RECORD +0 -7
- {spiderforce4ai-2.5.7.dist-info → spiderforce4ai-2.5.9.dist-info}/WHEEL +0 -0
- {spiderforce4ai-2.5.7.dist-info → spiderforce4ai-2.5.9.dist-info}/entry_points.txt +0 -0
- {spiderforce4ai-2.5.7.dist-info → spiderforce4ai-2.5.9.dist-info}/top_level.txt +0 -0
@@ -167,6 +167,8 @@ class PostExtractionAgent:
|
|
167
167
|
backup_path = self.config.output_file.with_suffix(f".bak_{int(time.time())}")
|
168
168
|
self.config.output_file.rename(backup_path)
|
169
169
|
self.config.output_file.touch()
|
170
|
+
# Initialize empty results dictionary
|
171
|
+
self.results = {}
|
170
172
|
|
171
173
|
def _process_single_content(self, url: str, content: str) -> Optional[Dict]:
|
172
174
|
"""Process a single piece of content through the LLM."""
|
@@ -258,13 +260,33 @@ class PostExtractionAgent:
|
|
258
260
|
"""Save individual or combined results synchronously."""
|
259
261
|
try:
|
260
262
|
if self.config.combine_output and self.config.output_file:
|
263
|
+
# Update the results dictionary
|
261
264
|
self.results[url] = result
|
262
|
-
|
265
|
+
|
266
|
+
# Ensure output directory exists
|
267
|
+
self.config.output_file.parent.mkdir(parents=True, exist_ok=True)
|
268
|
+
|
269
|
+
# Save combined results atomically
|
270
|
+
temp_file = self.config.output_file.with_suffix('.tmp')
|
271
|
+
with open(temp_file, 'w') as f:
|
263
272
|
json.dump(self.results, f, indent=2)
|
273
|
+
temp_file.replace(self.config.output_file)
|
274
|
+
|
275
|
+
logger.info(f"Updated combined results file with {url}")
|
276
|
+
|
277
|
+
# Cleanup backup files after successful save
|
278
|
+
for backup_file in self.config.output_file.parent.glob(f"{self.config.output_file.stem}.bak_*"):
|
279
|
+
try:
|
280
|
+
backup_file.unlink()
|
281
|
+
logger.info(f"Cleaned up backup file: {backup_file}")
|
282
|
+
except Exception as e:
|
283
|
+
logger.warning(f"Failed to remove backup file {backup_file}: {e}")
|
284
|
+
|
264
285
|
elif not self.config.combine_output and self.config.output_file:
|
265
286
|
individual_file = self.config.output_file.parent / f"{url.replace('/', '_')}.json"
|
266
287
|
with open(individual_file, 'w') as f:
|
267
288
|
json.dump(result, f, indent=2)
|
289
|
+
logger.info(f"Saved individual result file for {url}")
|
268
290
|
except Exception as e:
|
269
291
|
logger.error(f"Error saving results for {url}: {str(e)}")
|
270
292
|
|
@@ -311,14 +333,19 @@ class PostExtractionAgent:
|
|
311
333
|
|
312
334
|
# Save result synchronously
|
313
335
|
try:
|
314
|
-
|
315
|
-
|
336
|
+
# Save both original and transformed result
|
337
|
+
if self.config.custom_transform_function:
|
338
|
+
self._save_result_sync(url, transformed_result)
|
339
|
+
logger.info(f"Saved transformed results for {url}")
|
340
|
+
else:
|
341
|
+
self._save_result_sync(url, result)
|
342
|
+
logger.info(f"Saved original results for {url}")
|
316
343
|
except Exception as e:
|
317
344
|
error_msg = f"Error saving results for {url}: {str(e)}"
|
318
345
|
logger.error(error_msg)
|
319
346
|
console.print(f"[red]{error_msg}[/red]")
|
320
347
|
|
321
|
-
return result
|
348
|
+
return transformed_result if self.config.custom_transform_function else result
|
322
349
|
|
323
350
|
# Wait before retry
|
324
351
|
if attempt < self.config.max_retries - 1:
|
@@ -0,0 +1,7 @@
|
|
1
|
+
spiderforce4ai/__init__.py,sha256=BlrRIrtpDUHjmDedqgXP1KbAAsAH9vwFPncUR5VGGyM,44804
|
2
|
+
spiderforce4ai/post_extraction_agent.py,sha256=K6AGqeDO_MZ4pQMlkmnfK6Y5Sa1BWkUWv9u7_LMxsuM,17314
|
3
|
+
spiderforce4ai-2.5.9.dist-info/METADATA,sha256=4qXFZ6sEYnqsjULabDNc0ez0ZTuTPa1FuUTXpGuXG0I,9012
|
4
|
+
spiderforce4ai-2.5.9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
5
|
+
spiderforce4ai-2.5.9.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
|
6
|
+
spiderforce4ai-2.5.9.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
7
|
+
spiderforce4ai-2.5.9.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
spiderforce4ai/__init__.py,sha256=BlrRIrtpDUHjmDedqgXP1KbAAsAH9vwFPncUR5VGGyM,44804
|
2
|
-
spiderforce4ai/post_extraction_agent.py,sha256=e7_f5Fd1BYCvqYAQ9D2jypVt8ScXw55FkX9SOeRA8Co,15812
|
3
|
-
spiderforce4ai-2.5.7.dist-info/METADATA,sha256=F4raYf8aoAlbV7f-YFkCNWfoOlhv2q7voSLzQF9k6s0,9012
|
4
|
-
spiderforce4ai-2.5.7.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
5
|
-
spiderforce4ai-2.5.7.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
|
6
|
-
spiderforce4ai-2.5.7.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
7
|
-
spiderforce4ai-2.5.7.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|