spiderforce4ai 2.6.3__py3-none-any.whl → 2.6.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spiderforce4ai/post_extraction_agent.py +27 -47
- {spiderforce4ai-2.6.3.dist-info → spiderforce4ai-2.6.4.dist-info}/METADATA +1 -1
- spiderforce4ai-2.6.4.dist-info/RECORD +7 -0
- spiderforce4ai-2.6.3.dist-info/RECORD +0 -7
- {spiderforce4ai-2.6.3.dist-info → spiderforce4ai-2.6.4.dist-info}/WHEEL +0 -0
- {spiderforce4ai-2.6.3.dist-info → spiderforce4ai-2.6.4.dist-info}/entry_points.txt +0 -0
- {spiderforce4ai-2.6.3.dist-info → spiderforce4ai-2.6.4.dist-info}/top_level.txt +0 -0
@@ -159,6 +159,12 @@ class PostExtractionAgent:
|
|
159
159
|
# Convert string path to Path object if needed
|
160
160
|
if isinstance(self.config.output_file, str):
|
161
161
|
self.config.output_file = Path(self.config.output_file)
|
162
|
+
# Ensure parent directory exists
|
163
|
+
self.config.output_file.parent.mkdir(parents=True, exist_ok=True)
|
164
|
+
# Create empty JSON file if it doesn't exist
|
165
|
+
if not self.config.output_file.exists():
|
166
|
+
with open(self.config.output_file, 'w') as f:
|
167
|
+
json.dump({}, f)
|
162
168
|
self._setup_output()
|
163
169
|
|
164
170
|
def _setup_output(self) -> None:
|
@@ -272,58 +278,35 @@ class PostExtractionAgent:
|
|
272
278
|
return None
|
273
279
|
|
274
280
|
def _save_result_sync(self, url: str, result: Dict) -> None:
|
275
|
-
"""Save
|
281
|
+
"""Save results synchronously to combined output file."""
|
276
282
|
try:
|
277
|
-
if self.config.
|
278
|
-
#
|
279
|
-
output_file = str(self.config.output_file) if isinstance(self.config.output_file, Path) else self.config.output_file
|
280
|
-
|
281
|
-
# Ensure output directory exists
|
282
|
-
Path(output_file).parent.mkdir(parents=True, exist_ok=True)
|
283
|
-
|
284
|
-
# Load existing results or create new
|
283
|
+
if self.config.output_file:
|
284
|
+
# Load existing results
|
285
285
|
try:
|
286
|
-
|
287
|
-
|
288
|
-
self.results = json.load(f)
|
289
|
-
else:
|
290
|
-
self.results = {}
|
286
|
+
with open(self.config.output_file, 'r', encoding='utf-8') as f:
|
287
|
+
current_results = json.load(f)
|
291
288
|
except (json.JSONDecodeError, FileNotFoundError):
|
292
|
-
|
293
|
-
|
294
|
-
# Update
|
295
|
-
|
296
|
-
|
297
|
-
# Save
|
298
|
-
temp_file =
|
289
|
+
current_results = {}
|
290
|
+
|
291
|
+
# Update with new result
|
292
|
+
current_results[url] = result
|
293
|
+
|
294
|
+
# Save atomically using temporary file
|
295
|
+
temp_file = self.config.output_file.with_suffix('.tmp')
|
299
296
|
with open(temp_file, 'w', encoding='utf-8') as f:
|
300
|
-
json.dump(
|
301
|
-
|
302
|
-
# Atomic replace
|
303
|
-
|
304
|
-
logger.info(f"Updated combined results file with {url}
|
305
|
-
|
306
|
-
# Cleanup all backup files
|
307
|
-
for backup_file in Path(output_file).parent.glob(f"{Path(output_file).stem}.bak_*"):
|
308
|
-
try:
|
309
|
-
backup_file.unlink()
|
310
|
-
logger.info(f"Cleaned up backup file: {backup_file}")
|
311
|
-
except Exception as e:
|
312
|
-
logger.warning(f"Failed to remove backup file {backup_file}: {e}")
|
313
|
-
|
297
|
+
json.dump(current_results, f, indent=2, ensure_ascii=False)
|
298
|
+
|
299
|
+
# Atomic replace
|
300
|
+
temp_file.replace(self.config.output_file)
|
301
|
+
logger.info(f"Updated combined results file with {url}")
|
302
|
+
|
314
303
|
# Cleanup backup files
|
315
|
-
for backup_file in
|
304
|
+
for backup_file in self.config.output_file.parent.glob(f"{self.config.output_file.stem}.bak_*"):
|
316
305
|
try:
|
317
306
|
backup_file.unlink()
|
318
307
|
logger.info(f"Cleaned up backup file: {backup_file}")
|
319
308
|
except Exception as e:
|
320
309
|
logger.warning(f"Failed to remove backup file {backup_file}: {e}")
|
321
|
-
|
322
|
-
elif not self.config.combine_output and self.config.output_file:
|
323
|
-
individual_file = self.config.output_file.parent / f"{url.replace('/', '_')}.json"
|
324
|
-
with open(individual_file, 'w') as f:
|
325
|
-
json.dump(result, f, indent=2)
|
326
|
-
logger.info(f"Saved individual result file for {url}")
|
327
310
|
except Exception as e:
|
328
311
|
logger.error(f"Error saving results for {url}: {str(e)}")
|
329
312
|
|
@@ -372,12 +355,9 @@ class PostExtractionAgent:
|
|
372
355
|
try:
|
373
356
|
# Always save the result, whether transformed or original
|
374
357
|
result_to_save = transformed_result if self.config.custom_transform_function else result
|
375
|
-
if self.config.
|
376
|
-
self._save_result_sync(url, result_to_save)
|
377
|
-
logger.info(f"Saved results for {url} to combined output")
|
378
|
-
else:
|
358
|
+
if self.config.output_file:
|
379
359
|
self._save_result_sync(url, result_to_save)
|
380
|
-
logger.info(f"Saved
|
360
|
+
logger.info(f"Saved results for {url} to {self.config.output_file}")
|
381
361
|
except Exception as e:
|
382
362
|
error_msg = f"Error saving results for {url}: {str(e)}"
|
383
363
|
logger.error(error_msg)
|
@@ -0,0 +1,7 @@
|
|
1
|
+
spiderforce4ai/__init__.py,sha256=BlrRIrtpDUHjmDedqgXP1KbAAsAH9vwFPncUR5VGGyM,44804
|
2
|
+
spiderforce4ai/post_extraction_agent.py,sha256=GJXV-qTi9xAwil8YSUBaB0OUDDPDzWfcYScldxKsenw,18121
|
3
|
+
spiderforce4ai-2.6.4.dist-info/METADATA,sha256=pzdSGVryHkPAOuf3UcBrimThEi4paBzYKFoWCM-wVvY,9012
|
4
|
+
spiderforce4ai-2.6.4.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
5
|
+
spiderforce4ai-2.6.4.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
|
6
|
+
spiderforce4ai-2.6.4.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
7
|
+
spiderforce4ai-2.6.4.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
spiderforce4ai/__init__.py,sha256=BlrRIrtpDUHjmDedqgXP1KbAAsAH9vwFPncUR5VGGyM,44804
|
2
|
-
spiderforce4ai/post_extraction_agent.py,sha256=3HB54YrPCbQTUoZWINr7yHvwXwQywmq0f_RBJwKr2gg,19355
|
3
|
-
spiderforce4ai-2.6.3.dist-info/METADATA,sha256=L5GCJHggqks18Z31ru5DbDdXT3mdS8pYEDhfdR9igms,9012
|
4
|
-
spiderforce4ai-2.6.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
5
|
-
spiderforce4ai-2.6.3.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
|
6
|
-
spiderforce4ai-2.6.3.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
7
|
-
spiderforce4ai-2.6.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|