spiderforce4ai 2.5.8__py3-none-any.whl → 2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spiderforce4ai/post_extraction_agent.py +34 -7
- {spiderforce4ai-2.5.8.dist-info → spiderforce4ai-2.6.dist-info}/METADATA +1 -1
- spiderforce4ai-2.6.dist-info/RECORD +7 -0
- spiderforce4ai-2.5.8.dist-info/RECORD +0 -7
- {spiderforce4ai-2.5.8.dist-info → spiderforce4ai-2.6.dist-info}/WHEEL +0 -0
- {spiderforce4ai-2.5.8.dist-info → spiderforce4ai-2.6.dist-info}/entry_points.txt +0 -0
- {spiderforce4ai-2.5.8.dist-info → spiderforce4ai-2.6.dist-info}/top_level.txt +0 -0
@@ -260,13 +260,34 @@ class PostExtractionAgent:
|
|
260
260
|
"""Save individual or combined results synchronously."""
|
261
261
|
try:
|
262
262
|
if self.config.combine_output and self.config.output_file:
|
263
|
+
# Convert Path to string if needed
|
264
|
+
output_file = str(self.config.output_file) if isinstance(self.config.output_file, Path) else self.config.output_file
|
265
|
+
|
266
|
+
# Load existing results if file exists
|
267
|
+
if Path(output_file).exists():
|
268
|
+
try:
|
269
|
+
with open(output_file, 'r') as f:
|
270
|
+
self.results = json.load(f)
|
271
|
+
except json.JSONDecodeError:
|
272
|
+
self.results = {}
|
273
|
+
|
274
|
+
# Update results with new data
|
263
275
|
self.results[url] = result
|
264
|
-
|
265
|
-
|
276
|
+
|
277
|
+
# Ensure output directory exists
|
278
|
+
Path(output_file).parent.mkdir(parents=True, exist_ok=True)
|
279
|
+
|
280
|
+
# Save combined results atomically
|
281
|
+
temp_file = f"{output_file}.tmp"
|
282
|
+
with open(temp_file, 'w') as f:
|
266
283
|
json.dump(self.results, f, indent=2)
|
267
284
|
|
268
|
-
#
|
269
|
-
|
285
|
+
# Atomic replace
|
286
|
+
Path(temp_file).replace(output_file)
|
287
|
+
logger.info(f"Updated combined results file with {url}")
|
288
|
+
|
289
|
+
# Cleanup backup files
|
290
|
+
for backup_file in Path(output_file).parent.glob(f"{Path(output_file).stem}.bak_*"):
|
270
291
|
try:
|
271
292
|
backup_file.unlink()
|
272
293
|
logger.info(f"Cleaned up backup file: {backup_file}")
|
@@ -277,6 +298,7 @@ class PostExtractionAgent:
|
|
277
298
|
individual_file = self.config.output_file.parent / f"{url.replace('/', '_')}.json"
|
278
299
|
with open(individual_file, 'w') as f:
|
279
300
|
json.dump(result, f, indent=2)
|
301
|
+
logger.info(f"Saved individual result file for {url}")
|
280
302
|
except Exception as e:
|
281
303
|
logger.error(f"Error saving results for {url}: {str(e)}")
|
282
304
|
|
@@ -323,14 +345,19 @@ class PostExtractionAgent:
|
|
323
345
|
|
324
346
|
# Save result synchronously
|
325
347
|
try:
|
326
|
-
|
327
|
-
|
348
|
+
# Save both original and transformed result
|
349
|
+
if self.config.custom_transform_function:
|
350
|
+
self._save_result_sync(url, transformed_result)
|
351
|
+
logger.info(f"Saved transformed results for {url}")
|
352
|
+
else:
|
353
|
+
self._save_result_sync(url, result)
|
354
|
+
logger.info(f"Saved original results for {url}")
|
328
355
|
except Exception as e:
|
329
356
|
error_msg = f"Error saving results for {url}: {str(e)}"
|
330
357
|
logger.error(error_msg)
|
331
358
|
console.print(f"[red]{error_msg}[/red]")
|
332
359
|
|
333
|
-
return result
|
360
|
+
return transformed_result if self.config.custom_transform_function else result
|
334
361
|
|
335
362
|
# Wait before retry
|
336
363
|
if attempt < self.config.max_retries - 1:
|
@@ -0,0 +1,7 @@
|
|
1
|
+
spiderforce4ai/__init__.py,sha256=BlrRIrtpDUHjmDedqgXP1KbAAsAH9vwFPncUR5VGGyM,44804
|
2
|
+
spiderforce4ai/post_extraction_agent.py,sha256=AysrHIoD-IreqbvWqCDxyN7v8EPSdLOG9yxABamTZSg,17827
|
3
|
+
spiderforce4ai-2.6.dist-info/METADATA,sha256=JtIZ1-ojRvfm773-yF1a_M_x6eB5kbnb6WT5XT04KDA,9010
|
4
|
+
spiderforce4ai-2.6.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
5
|
+
spiderforce4ai-2.6.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
|
6
|
+
spiderforce4ai-2.6.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
7
|
+
spiderforce4ai-2.6.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
spiderforce4ai/__init__.py,sha256=BlrRIrtpDUHjmDedqgXP1KbAAsAH9vwFPncUR5VGGyM,44804
|
2
|
-
spiderforce4ai/post_extraction_agent.py,sha256=nyc6V21E3laTetFG2aEBMmFiXv5goumo8eHoupp3mKA,16430
|
3
|
-
spiderforce4ai-2.5.8.dist-info/METADATA,sha256=eitdlF0yEzoFabYliZWvNSx7hGQMBDVvKwgCh21lU18,9012
|
4
|
-
spiderforce4ai-2.5.8.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
5
|
-
spiderforce4ai-2.5.8.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
|
6
|
-
spiderforce4ai-2.5.8.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
7
|
-
spiderforce4ai-2.5.8.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|