spiderforce4ai 2.5.6__py3-none-any.whl → 2.5.8__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -167,6 +167,8 @@ class PostExtractionAgent:
167
167
  backup_path = self.config.output_file.with_suffix(f".bak_{int(time.time())}")
168
168
  self.config.output_file.rename(backup_path)
169
169
  self.config.output_file.touch()
170
+ # Initialize empty results dictionary
171
+ self.results = {}
170
172
 
171
173
  def _process_single_content(self, url: str, content: str) -> Optional[Dict]:
172
174
  """Process a single piece of content through the LLM."""
@@ -259,8 +261,18 @@ class PostExtractionAgent:
259
261
  try:
260
262
  if self.config.combine_output and self.config.output_file:
261
263
  self.results[url] = result
264
+ # Save combined results
262
265
  with open(self.config.output_file, 'w') as f:
263
266
  json.dump(self.results, f, indent=2)
267
+
268
+ # Cleanup backup files after successful save
269
+ for backup_file in self.config.output_file.parent.glob(f"{self.config.output_file.stem}.bak_*"):
270
+ try:
271
+ backup_file.unlink()
272
+ logger.info(f"Cleaned up backup file: {backup_file}")
273
+ except Exception as e:
274
+ logger.warning(f"Failed to remove backup file {backup_file}: {e}")
275
+
264
276
  elif not self.config.combine_output and self.config.output_file:
265
277
  individual_file = self.config.output_file.parent / f"{url.replace('/', '_')}.json"
266
278
  with open(individual_file, 'w') as f:
@@ -299,13 +311,6 @@ class PostExtractionAgent:
299
311
  # Add URL to result before transformation
300
312
  result['url'] = url
301
313
 
302
- # Check for required fields and set to None if missing
303
- required_fields = ['ContactInformation', 'CallToAction', 'KeyPoints', 'Title', 'Description', 'CanonicalUrl']
304
- for field in required_fields:
305
- if field not in result:
306
- logger.warning(f"Missing field '{field}' in LLM response for {url}, setting to None")
307
- result[field] = None
308
-
309
314
  logger.info(f"Executing custom transformer function for {url}")
310
315
  transformed_result = self.config.custom_transform_function(result)
311
316
  logger.info(f"Successfully applied custom transformation for {url}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 2.5.6
3
+ Version: 2.5.8
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
@@ -0,0 +1,7 @@
1
+ spiderforce4ai/__init__.py,sha256=BlrRIrtpDUHjmDedqgXP1KbAAsAH9vwFPncUR5VGGyM,44804
2
+ spiderforce4ai/post_extraction_agent.py,sha256=nyc6V21E3laTetFG2aEBMmFiXv5goumo8eHoupp3mKA,16430
3
+ spiderforce4ai-2.5.8.dist-info/METADATA,sha256=eitdlF0yEzoFabYliZWvNSx7hGQMBDVvKwgCh21lU18,9012
4
+ spiderforce4ai-2.5.8.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
+ spiderforce4ai-2.5.8.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
+ spiderforce4ai-2.5.8.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
+ spiderforce4ai-2.5.8.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- spiderforce4ai/__init__.py,sha256=BlrRIrtpDUHjmDedqgXP1KbAAsAH9vwFPncUR5VGGyM,44804
2
- spiderforce4ai/post_extraction_agent.py,sha256=OP8_gVWzWdQ3B6d0vd54IbcopHSVpAkWrUmnrDVT5_k,16327
3
- spiderforce4ai-2.5.6.dist-info/METADATA,sha256=TjgWAC0iHPrSaedmZL-1PZWCoLEMZ7NJUhd74tfWj6A,9012
4
- spiderforce4ai-2.5.6.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
- spiderforce4ai-2.5.6.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
- spiderforce4ai-2.5.6.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
- spiderforce4ai-2.5.6.dist-info/RECORD,,