spiderforce4ai 2.5.8__py3-none-any.whl → 2.6__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -260,13 +260,34 @@ class PostExtractionAgent:
260
260
  """Save individual or combined results synchronously."""
261
261
  try:
262
262
  if self.config.combine_output and self.config.output_file:
263
+ # Convert Path to string if needed
264
+ output_file = str(self.config.output_file) if isinstance(self.config.output_file, Path) else self.config.output_file
265
+
266
+ # Load existing results if file exists
267
+ if Path(output_file).exists():
268
+ try:
269
+ with open(output_file, 'r') as f:
270
+ self.results = json.load(f)
271
+ except json.JSONDecodeError:
272
+ self.results = {}
273
+
274
+ # Update results with new data
263
275
  self.results[url] = result
264
- # Save combined results
265
- with open(self.config.output_file, 'w') as f:
276
+
277
+ # Ensure output directory exists
278
+ Path(output_file).parent.mkdir(parents=True, exist_ok=True)
279
+
280
+ # Save combined results atomically
281
+ temp_file = f"{output_file}.tmp"
282
+ with open(temp_file, 'w') as f:
266
283
  json.dump(self.results, f, indent=2)
267
284
 
268
- # Cleanup backup files after successful save
269
- for backup_file in self.config.output_file.parent.glob(f"{self.config.output_file.stem}.bak_*"):
285
+ # Atomic replace
286
+ Path(temp_file).replace(output_file)
287
+ logger.info(f"Updated combined results file with {url}")
288
+
289
+ # Cleanup backup files
290
+ for backup_file in Path(output_file).parent.glob(f"{Path(output_file).stem}.bak_*"):
270
291
  try:
271
292
  backup_file.unlink()
272
293
  logger.info(f"Cleaned up backup file: {backup_file}")
@@ -277,6 +298,7 @@ class PostExtractionAgent:
277
298
  individual_file = self.config.output_file.parent / f"{url.replace('/', '_')}.json"
278
299
  with open(individual_file, 'w') as f:
279
300
  json.dump(result, f, indent=2)
301
+ logger.info(f"Saved individual result file for {url}")
280
302
  except Exception as e:
281
303
  logger.error(f"Error saving results for {url}: {str(e)}")
282
304
 
@@ -323,14 +345,19 @@ class PostExtractionAgent:
323
345
 
324
346
  # Save result synchronously
325
347
  try:
326
- self._save_result_sync(url, result)
327
- logger.info(f"Saved results for {url}")
348
+ # Save both original and transformed result
349
+ if self.config.custom_transform_function:
350
+ self._save_result_sync(url, transformed_result)
351
+ logger.info(f"Saved transformed results for {url}")
352
+ else:
353
+ self._save_result_sync(url, result)
354
+ logger.info(f"Saved original results for {url}")
328
355
  except Exception as e:
329
356
  error_msg = f"Error saving results for {url}: {str(e)}"
330
357
  logger.error(error_msg)
331
358
  console.print(f"[red]{error_msg}[/red]")
332
359
 
333
- return result
360
+ return transformed_result if self.config.custom_transform_function else result
334
361
 
335
362
  # Wait before retry
336
363
  if attempt < self.config.max_retries - 1:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 2.5.8
3
+ Version: 2.6
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
@@ -0,0 +1,7 @@
1
+ spiderforce4ai/__init__.py,sha256=BlrRIrtpDUHjmDedqgXP1KbAAsAH9vwFPncUR5VGGyM,44804
2
+ spiderforce4ai/post_extraction_agent.py,sha256=AysrHIoD-IreqbvWqCDxyN7v8EPSdLOG9yxABamTZSg,17827
3
+ spiderforce4ai-2.6.dist-info/METADATA,sha256=JtIZ1-ojRvfm773-yF1a_M_x6eB5kbnb6WT5XT04KDA,9010
4
+ spiderforce4ai-2.6.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
+ spiderforce4ai-2.6.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
+ spiderforce4ai-2.6.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
+ spiderforce4ai-2.6.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- spiderforce4ai/__init__.py,sha256=BlrRIrtpDUHjmDedqgXP1KbAAsAH9vwFPncUR5VGGyM,44804
2
- spiderforce4ai/post_extraction_agent.py,sha256=nyc6V21E3laTetFG2aEBMmFiXv5goumo8eHoupp3mKA,16430
3
- spiderforce4ai-2.5.8.dist-info/METADATA,sha256=eitdlF0yEzoFabYliZWvNSx7hGQMBDVvKwgCh21lU18,9012
4
- spiderforce4ai-2.5.8.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
- spiderforce4ai-2.5.8.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
- spiderforce4ai-2.5.8.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
- spiderforce4ai-2.5.8.dist-info/RECORD,,