spiderforce4ai 2.5.8__py3-none-any.whl → 2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -260,13 +260,34 @@ class PostExtractionAgent:
260
260
  """Save individual or combined results synchronously."""
261
261
  try:
262
262
  if self.config.combine_output and self.config.output_file:
263
+ # Convert Path to string if needed
264
+ output_file = str(self.config.output_file) if isinstance(self.config.output_file, Path) else self.config.output_file
265
+
266
+ # Load existing results if file exists
267
+ if Path(output_file).exists():
268
+ try:
269
+ with open(output_file, 'r') as f:
270
+ self.results = json.load(f)
271
+ except json.JSONDecodeError:
272
+ self.results = {}
273
+
274
+ # Update results with new data
263
275
  self.results[url] = result
264
- # Save combined results
265
- with open(self.config.output_file, 'w') as f:
276
+
277
+ # Ensure output directory exists
278
+ Path(output_file).parent.mkdir(parents=True, exist_ok=True)
279
+
280
+ # Save combined results atomically
281
+ temp_file = f"{output_file}.tmp"
282
+ with open(temp_file, 'w') as f:
266
283
  json.dump(self.results, f, indent=2)
267
284
 
268
- # Cleanup backup files after successful save
269
- for backup_file in self.config.output_file.parent.glob(f"{self.config.output_file.stem}.bak_*"):
285
+ # Atomic replace
286
+ Path(temp_file).replace(output_file)
287
+ logger.info(f"Updated combined results file with {url}")
288
+
289
+ # Cleanup backup files
290
+ for backup_file in Path(output_file).parent.glob(f"{Path(output_file).stem}.bak_*"):
270
291
  try:
271
292
  backup_file.unlink()
272
293
  logger.info(f"Cleaned up backup file: {backup_file}")
@@ -277,6 +298,7 @@ class PostExtractionAgent:
277
298
  individual_file = self.config.output_file.parent / f"{url.replace('/', '_')}.json"
278
299
  with open(individual_file, 'w') as f:
279
300
  json.dump(result, f, indent=2)
301
+ logger.info(f"Saved individual result file for {url}")
280
302
  except Exception as e:
281
303
  logger.error(f"Error saving results for {url}: {str(e)}")
282
304
 
@@ -323,14 +345,19 @@ class PostExtractionAgent:
323
345
 
324
346
  # Save result synchronously
325
347
  try:
326
- self._save_result_sync(url, result)
327
- logger.info(f"Saved results for {url}")
348
+ # Save both original and transformed result
349
+ if self.config.custom_transform_function:
350
+ self._save_result_sync(url, transformed_result)
351
+ logger.info(f"Saved transformed results for {url}")
352
+ else:
353
+ self._save_result_sync(url, result)
354
+ logger.info(f"Saved original results for {url}")
328
355
  except Exception as e:
329
356
  error_msg = f"Error saving results for {url}: {str(e)}"
330
357
  logger.error(error_msg)
331
358
  console.print(f"[red]{error_msg}[/red]")
332
359
 
333
- return result
360
+ return transformed_result if self.config.custom_transform_function else result
334
361
 
335
362
  # Wait before retry
336
363
  if attempt < self.config.max_retries - 1:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 2.5.8
3
+ Version: 2.6
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
@@ -0,0 +1,7 @@
1
+ spiderforce4ai/__init__.py,sha256=BlrRIrtpDUHjmDedqgXP1KbAAsAH9vwFPncUR5VGGyM,44804
2
+ spiderforce4ai/post_extraction_agent.py,sha256=AysrHIoD-IreqbvWqCDxyN7v8EPSdLOG9yxABamTZSg,17827
3
+ spiderforce4ai-2.6.dist-info/METADATA,sha256=JtIZ1-ojRvfm773-yF1a_M_x6eB5kbnb6WT5XT04KDA,9010
4
+ spiderforce4ai-2.6.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
+ spiderforce4ai-2.6.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
+ spiderforce4ai-2.6.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
+ spiderforce4ai-2.6.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- spiderforce4ai/__init__.py,sha256=BlrRIrtpDUHjmDedqgXP1KbAAsAH9vwFPncUR5VGGyM,44804
2
- spiderforce4ai/post_extraction_agent.py,sha256=nyc6V21E3laTetFG2aEBMmFiXv5goumo8eHoupp3mKA,16430
3
- spiderforce4ai-2.5.8.dist-info/METADATA,sha256=eitdlF0yEzoFabYliZWvNSx7hGQMBDVvKwgCh21lU18,9012
4
- spiderforce4ai-2.5.8.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
- spiderforce4ai-2.5.8.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
- spiderforce4ai-2.5.8.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
- spiderforce4ai-2.5.8.dist-info/RECORD,,