spiderforce4ai 2.6.3__py3-none-any.whl → 2.6.4__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- spiderforce4ai/post_extraction_agent.py +27 -47
- {spiderforce4ai-2.6.3.dist-info → spiderforce4ai-2.6.4.dist-info}/METADATA +1 -1
- spiderforce4ai-2.6.4.dist-info/RECORD +7 -0
- spiderforce4ai-2.6.3.dist-info/RECORD +0 -7
- {spiderforce4ai-2.6.3.dist-info → spiderforce4ai-2.6.4.dist-info}/WHEEL +0 -0
- {spiderforce4ai-2.6.3.dist-info → spiderforce4ai-2.6.4.dist-info}/entry_points.txt +0 -0
- {spiderforce4ai-2.6.3.dist-info → spiderforce4ai-2.6.4.dist-info}/top_level.txt +0 -0
@@ -159,6 +159,12 @@ class PostExtractionAgent:
|
|
159
159
|
# Convert string path to Path object if needed
|
160
160
|
if isinstance(self.config.output_file, str):
|
161
161
|
self.config.output_file = Path(self.config.output_file)
|
162
|
+
# Ensure parent directory exists
|
163
|
+
self.config.output_file.parent.mkdir(parents=True, exist_ok=True)
|
164
|
+
# Create empty JSON file if it doesn't exist
|
165
|
+
if not self.config.output_file.exists():
|
166
|
+
with open(self.config.output_file, 'w') as f:
|
167
|
+
json.dump({}, f)
|
162
168
|
self._setup_output()
|
163
169
|
|
164
170
|
def _setup_output(self) -> None:
|
@@ -272,58 +278,35 @@ class PostExtractionAgent:
|
|
272
278
|
return None
|
273
279
|
|
274
280
|
def _save_result_sync(self, url: str, result: Dict) -> None:
|
275
|
-
"""Save
|
281
|
+
"""Save results synchronously to combined output file."""
|
276
282
|
try:
|
277
|
-
if self.config.
|
278
|
-
#
|
279
|
-
output_file = str(self.config.output_file) if isinstance(self.config.output_file, Path) else self.config.output_file
|
280
|
-
|
281
|
-
# Ensure output directory exists
|
282
|
-
Path(output_file).parent.mkdir(parents=True, exist_ok=True)
|
283
|
-
|
284
|
-
# Load existing results or create new
|
283
|
+
if self.config.output_file:
|
284
|
+
# Load existing results
|
285
285
|
try:
|
286
|
-
|
287
|
-
|
288
|
-
self.results = json.load(f)
|
289
|
-
else:
|
290
|
-
self.results = {}
|
286
|
+
with open(self.config.output_file, 'r', encoding='utf-8') as f:
|
287
|
+
current_results = json.load(f)
|
291
288
|
except (json.JSONDecodeError, FileNotFoundError):
|
292
|
-
|
293
|
-
|
294
|
-
# Update
|
295
|
-
|
296
|
-
|
297
|
-
# Save
|
298
|
-
temp_file =
|
289
|
+
current_results = {}
|
290
|
+
|
291
|
+
# Update with new result
|
292
|
+
current_results[url] = result
|
293
|
+
|
294
|
+
# Save atomically using temporary file
|
295
|
+
temp_file = self.config.output_file.with_suffix('.tmp')
|
299
296
|
with open(temp_file, 'w', encoding='utf-8') as f:
|
300
|
-
json.dump(
|
301
|
-
|
302
|
-
# Atomic replace
|
303
|
-
|
304
|
-
logger.info(f"Updated combined results file with {url}
|
305
|
-
|
306
|
-
# Cleanup all backup files
|
307
|
-
for backup_file in Path(output_file).parent.glob(f"{Path(output_file).stem}.bak_*"):
|
308
|
-
try:
|
309
|
-
backup_file.unlink()
|
310
|
-
logger.info(f"Cleaned up backup file: {backup_file}")
|
311
|
-
except Exception as e:
|
312
|
-
logger.warning(f"Failed to remove backup file {backup_file}: {e}")
|
313
|
-
|
297
|
+
json.dump(current_results, f, indent=2, ensure_ascii=False)
|
298
|
+
|
299
|
+
# Atomic replace
|
300
|
+
temp_file.replace(self.config.output_file)
|
301
|
+
logger.info(f"Updated combined results file with {url}")
|
302
|
+
|
314
303
|
# Cleanup backup files
|
315
|
-
for backup_file in
|
304
|
+
for backup_file in self.config.output_file.parent.glob(f"{self.config.output_file.stem}.bak_*"):
|
316
305
|
try:
|
317
306
|
backup_file.unlink()
|
318
307
|
logger.info(f"Cleaned up backup file: {backup_file}")
|
319
308
|
except Exception as e:
|
320
309
|
logger.warning(f"Failed to remove backup file {backup_file}: {e}")
|
321
|
-
|
322
|
-
elif not self.config.combine_output and self.config.output_file:
|
323
|
-
individual_file = self.config.output_file.parent / f"{url.replace('/', '_')}.json"
|
324
|
-
with open(individual_file, 'w') as f:
|
325
|
-
json.dump(result, f, indent=2)
|
326
|
-
logger.info(f"Saved individual result file for {url}")
|
327
310
|
except Exception as e:
|
328
311
|
logger.error(f"Error saving results for {url}: {str(e)}")
|
329
312
|
|
@@ -372,12 +355,9 @@ class PostExtractionAgent:
|
|
372
355
|
try:
|
373
356
|
# Always save the result, whether transformed or original
|
374
357
|
result_to_save = transformed_result if self.config.custom_transform_function else result
|
375
|
-
if self.config.
|
376
|
-
self._save_result_sync(url, result_to_save)
|
377
|
-
logger.info(f"Saved results for {url} to combined output")
|
378
|
-
else:
|
358
|
+
if self.config.output_file:
|
379
359
|
self._save_result_sync(url, result_to_save)
|
380
|
-
logger.info(f"Saved
|
360
|
+
logger.info(f"Saved results for {url} to {self.config.output_file}")
|
381
361
|
except Exception as e:
|
382
362
|
error_msg = f"Error saving results for {url}: {str(e)}"
|
383
363
|
logger.error(error_msg)
|
@@ -0,0 +1,7 @@
|
|
1
|
+
spiderforce4ai/__init__.py,sha256=BlrRIrtpDUHjmDedqgXP1KbAAsAH9vwFPncUR5VGGyM,44804
|
2
|
+
spiderforce4ai/post_extraction_agent.py,sha256=GJXV-qTi9xAwil8YSUBaB0OUDDPDzWfcYScldxKsenw,18121
|
3
|
+
spiderforce4ai-2.6.4.dist-info/METADATA,sha256=pzdSGVryHkPAOuf3UcBrimThEi4paBzYKFoWCM-wVvY,9012
|
4
|
+
spiderforce4ai-2.6.4.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
5
|
+
spiderforce4ai-2.6.4.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
|
6
|
+
spiderforce4ai-2.6.4.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
7
|
+
spiderforce4ai-2.6.4.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
spiderforce4ai/__init__.py,sha256=BlrRIrtpDUHjmDedqgXP1KbAAsAH9vwFPncUR5VGGyM,44804
|
2
|
-
spiderforce4ai/post_extraction_agent.py,sha256=3HB54YrPCbQTUoZWINr7yHvwXwQywmq0f_RBJwKr2gg,19355
|
3
|
-
spiderforce4ai-2.6.3.dist-info/METADATA,sha256=L5GCJHggqks18Z31ru5DbDdXT3mdS8pYEDhfdR9igms,9012
|
4
|
-
spiderforce4ai-2.6.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
5
|
-
spiderforce4ai-2.6.3.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
|
6
|
-
spiderforce4ai-2.6.3.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
7
|
-
spiderforce4ai-2.6.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|