spiderforce4ai 2.5.6__py3-none-any.whl → 2.5.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -167,6 +167,8 @@ class PostExtractionAgent:
167
167
  backup_path = self.config.output_file.with_suffix(f".bak_{int(time.time())}")
168
168
  self.config.output_file.rename(backup_path)
169
169
  self.config.output_file.touch()
170
+ # Initialize empty results dictionary
171
+ self.results = {}
170
172
 
171
173
  def _process_single_content(self, url: str, content: str) -> Optional[Dict]:
172
174
  """Process a single piece of content through the LLM."""
@@ -259,8 +261,18 @@ class PostExtractionAgent:
259
261
  try:
260
262
  if self.config.combine_output and self.config.output_file:
261
263
  self.results[url] = result
264
+ # Save combined results
262
265
  with open(self.config.output_file, 'w') as f:
263
266
  json.dump(self.results, f, indent=2)
267
+
268
+ # Cleanup backup files after successful save
269
+ for backup_file in self.config.output_file.parent.glob(f"{self.config.output_file.stem}.bak_*"):
270
+ try:
271
+ backup_file.unlink()
272
+ logger.info(f"Cleaned up backup file: {backup_file}")
273
+ except Exception as e:
274
+ logger.warning(f"Failed to remove backup file {backup_file}: {e}")
275
+
264
276
  elif not self.config.combine_output and self.config.output_file:
265
277
  individual_file = self.config.output_file.parent / f"{url.replace('/', '_')}.json"
266
278
  with open(individual_file, 'w') as f:
@@ -299,13 +311,6 @@ class PostExtractionAgent:
299
311
  # Add URL to result before transformation
300
312
  result['url'] = url
301
313
 
302
- # Check for required fields and set to None if missing
303
- required_fields = ['ContactInformation', 'CallToAction', 'KeyPoints', 'Title', 'Description', 'CanonicalUrl']
304
- for field in required_fields:
305
- if field not in result:
306
- logger.warning(f"Missing field '{field}' in LLM response for {url}, setting to None")
307
- result[field] = None
308
-
309
314
  logger.info(f"Executing custom transformer function for {url}")
310
315
  transformed_result = self.config.custom_transform_function(result)
311
316
  logger.info(f"Successfully applied custom transformation for {url}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 2.5.6
3
+ Version: 2.5.8
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
@@ -0,0 +1,7 @@
1
+ spiderforce4ai/__init__.py,sha256=BlrRIrtpDUHjmDedqgXP1KbAAsAH9vwFPncUR5VGGyM,44804
2
+ spiderforce4ai/post_extraction_agent.py,sha256=nyc6V21E3laTetFG2aEBMmFiXv5goumo8eHoupp3mKA,16430
3
+ spiderforce4ai-2.5.8.dist-info/METADATA,sha256=eitdlF0yEzoFabYliZWvNSx7hGQMBDVvKwgCh21lU18,9012
4
+ spiderforce4ai-2.5.8.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
+ spiderforce4ai-2.5.8.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
+ spiderforce4ai-2.5.8.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
+ spiderforce4ai-2.5.8.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- spiderforce4ai/__init__.py,sha256=BlrRIrtpDUHjmDedqgXP1KbAAsAH9vwFPncUR5VGGyM,44804
2
- spiderforce4ai/post_extraction_agent.py,sha256=OP8_gVWzWdQ3B6d0vd54IbcopHSVpAkWrUmnrDVT5_k,16327
3
- spiderforce4ai-2.5.6.dist-info/METADATA,sha256=TjgWAC0iHPrSaedmZL-1PZWCoLEMZ7NJUhd74tfWj6A,9012
4
- spiderforce4ai-2.5.6.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
- spiderforce4ai-2.5.6.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
- spiderforce4ai-2.5.6.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
- spiderforce4ai-2.5.6.dist-info/RECORD,,