spiderforce4ai 2.5.7__py3-none-any.whl → 2.5.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -167,6 +167,8 @@ class PostExtractionAgent:
167
167
  backup_path = self.config.output_file.with_suffix(f".bak_{int(time.time())}")
168
168
  self.config.output_file.rename(backup_path)
169
169
  self.config.output_file.touch()
170
+ # Initialize empty results dictionary
171
+ self.results = {}
170
172
 
171
173
  def _process_single_content(self, url: str, content: str) -> Optional[Dict]:
172
174
  """Process a single piece of content through the LLM."""
@@ -258,13 +260,33 @@ class PostExtractionAgent:
258
260
  """Save individual or combined results synchronously."""
259
261
  try:
260
262
  if self.config.combine_output and self.config.output_file:
263
+ # Update the results dictionary
261
264
  self.results[url] = result
262
- with open(self.config.output_file, 'w') as f:
265
+
266
+ # Ensure output directory exists
267
+ self.config.output_file.parent.mkdir(parents=True, exist_ok=True)
268
+
269
+ # Save combined results atomically
270
+ temp_file = self.config.output_file.with_suffix('.tmp')
271
+ with open(temp_file, 'w') as f:
263
272
  json.dump(self.results, f, indent=2)
273
+ temp_file.replace(self.config.output_file)
274
+
275
+ logger.info(f"Updated combined results file with {url}")
276
+
277
+ # Cleanup backup files after successful save
278
+ for backup_file in self.config.output_file.parent.glob(f"{self.config.output_file.stem}.bak_*"):
279
+ try:
280
+ backup_file.unlink()
281
+ logger.info(f"Cleaned up backup file: {backup_file}")
282
+ except Exception as e:
283
+ logger.warning(f"Failed to remove backup file {backup_file}: {e}")
284
+
264
285
  elif not self.config.combine_output and self.config.output_file:
265
286
  individual_file = self.config.output_file.parent / f"{url.replace('/', '_')}.json"
266
287
  with open(individual_file, 'w') as f:
267
288
  json.dump(result, f, indent=2)
289
+ logger.info(f"Saved individual result file for {url}")
268
290
  except Exception as e:
269
291
  logger.error(f"Error saving results for {url}: {str(e)}")
270
292
 
@@ -311,14 +333,19 @@ class PostExtractionAgent:
311
333
 
312
334
  # Save result synchronously
313
335
  try:
314
- self._save_result_sync(url, result)
315
- logger.info(f"Saved results for {url}")
336
+ # Save both original and transformed result
337
+ if self.config.custom_transform_function:
338
+ self._save_result_sync(url, transformed_result)
339
+ logger.info(f"Saved transformed results for {url}")
340
+ else:
341
+ self._save_result_sync(url, result)
342
+ logger.info(f"Saved original results for {url}")
316
343
  except Exception as e:
317
344
  error_msg = f"Error saving results for {url}: {str(e)}"
318
345
  logger.error(error_msg)
319
346
  console.print(f"[red]{error_msg}[/red]")
320
347
 
321
- return result
348
+ return transformed_result if self.config.custom_transform_function else result
322
349
 
323
350
  # Wait before retry
324
351
  if attempt < self.config.max_retries - 1:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 2.5.7
3
+ Version: 2.5.9
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
@@ -0,0 +1,7 @@
1
+ spiderforce4ai/__init__.py,sha256=BlrRIrtpDUHjmDedqgXP1KbAAsAH9vwFPncUR5VGGyM,44804
2
+ spiderforce4ai/post_extraction_agent.py,sha256=K6AGqeDO_MZ4pQMlkmnfK6Y5Sa1BWkUWv9u7_LMxsuM,17314
3
+ spiderforce4ai-2.5.9.dist-info/METADATA,sha256=4qXFZ6sEYnqsjULabDNc0ez0ZTuTPa1FuUTXpGuXG0I,9012
4
+ spiderforce4ai-2.5.9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
+ spiderforce4ai-2.5.9.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
+ spiderforce4ai-2.5.9.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
+ spiderforce4ai-2.5.9.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- spiderforce4ai/__init__.py,sha256=BlrRIrtpDUHjmDedqgXP1KbAAsAH9vwFPncUR5VGGyM,44804
2
- spiderforce4ai/post_extraction_agent.py,sha256=e7_f5Fd1BYCvqYAQ9D2jypVt8ScXw55FkX9SOeRA8Co,15812
3
- spiderforce4ai-2.5.7.dist-info/METADATA,sha256=F4raYf8aoAlbV7f-YFkCNWfoOlhv2q7voSLzQF9k6s0,9012
4
- spiderforce4ai-2.5.7.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
- spiderforce4ai-2.5.7.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
- spiderforce4ai-2.5.7.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
- spiderforce4ai-2.5.7.dist-info/RECORD,,