spiderforce4ai 2.5.7__py3-none-any.whl → 2.5.9__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -167,6 +167,8 @@ class PostExtractionAgent:
167
167
  backup_path = self.config.output_file.with_suffix(f".bak_{int(time.time())}")
168
168
  self.config.output_file.rename(backup_path)
169
169
  self.config.output_file.touch()
170
+ # Initialize empty results dictionary
171
+ self.results = {}
170
172
 
171
173
  def _process_single_content(self, url: str, content: str) -> Optional[Dict]:
172
174
  """Process a single piece of content through the LLM."""
@@ -258,13 +260,33 @@ class PostExtractionAgent:
258
260
  """Save individual or combined results synchronously."""
259
261
  try:
260
262
  if self.config.combine_output and self.config.output_file:
263
+ # Update the results dictionary
261
264
  self.results[url] = result
262
- with open(self.config.output_file, 'w') as f:
265
+
266
+ # Ensure output directory exists
267
+ self.config.output_file.parent.mkdir(parents=True, exist_ok=True)
268
+
269
+ # Save combined results atomically
270
+ temp_file = self.config.output_file.with_suffix('.tmp')
271
+ with open(temp_file, 'w') as f:
263
272
  json.dump(self.results, f, indent=2)
273
+ temp_file.replace(self.config.output_file)
274
+
275
+ logger.info(f"Updated combined results file with {url}")
276
+
277
+ # Cleanup backup files after successful save
278
+ for backup_file in self.config.output_file.parent.glob(f"{self.config.output_file.stem}.bak_*"):
279
+ try:
280
+ backup_file.unlink()
281
+ logger.info(f"Cleaned up backup file: {backup_file}")
282
+ except Exception as e:
283
+ logger.warning(f"Failed to remove backup file {backup_file}: {e}")
284
+
264
285
  elif not self.config.combine_output and self.config.output_file:
265
286
  individual_file = self.config.output_file.parent / f"{url.replace('/', '_')}.json"
266
287
  with open(individual_file, 'w') as f:
267
288
  json.dump(result, f, indent=2)
289
+ logger.info(f"Saved individual result file for {url}")
268
290
  except Exception as e:
269
291
  logger.error(f"Error saving results for {url}: {str(e)}")
270
292
 
@@ -311,14 +333,19 @@ class PostExtractionAgent:
311
333
 
312
334
  # Save result synchronously
313
335
  try:
314
- self._save_result_sync(url, result)
315
- logger.info(f"Saved results for {url}")
336
+ # Save both original and transformed result
337
+ if self.config.custom_transform_function:
338
+ self._save_result_sync(url, transformed_result)
339
+ logger.info(f"Saved transformed results for {url}")
340
+ else:
341
+ self._save_result_sync(url, result)
342
+ logger.info(f"Saved original results for {url}")
316
343
  except Exception as e:
317
344
  error_msg = f"Error saving results for {url}: {str(e)}"
318
345
  logger.error(error_msg)
319
346
  console.print(f"[red]{error_msg}[/red]")
320
347
 
321
- return result
348
+ return transformed_result if self.config.custom_transform_function else result
322
349
 
323
350
  # Wait before retry
324
351
  if attempt < self.config.max_retries - 1:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 2.5.7
3
+ Version: 2.5.9
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
@@ -0,0 +1,7 @@
1
+ spiderforce4ai/__init__.py,sha256=BlrRIrtpDUHjmDedqgXP1KbAAsAH9vwFPncUR5VGGyM,44804
2
+ spiderforce4ai/post_extraction_agent.py,sha256=K6AGqeDO_MZ4pQMlkmnfK6Y5Sa1BWkUWv9u7_LMxsuM,17314
3
+ spiderforce4ai-2.5.9.dist-info/METADATA,sha256=4qXFZ6sEYnqsjULabDNc0ez0ZTuTPa1FuUTXpGuXG0I,9012
4
+ spiderforce4ai-2.5.9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
+ spiderforce4ai-2.5.9.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
+ spiderforce4ai-2.5.9.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
+ spiderforce4ai-2.5.9.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- spiderforce4ai/__init__.py,sha256=BlrRIrtpDUHjmDedqgXP1KbAAsAH9vwFPncUR5VGGyM,44804
2
- spiderforce4ai/post_extraction_agent.py,sha256=e7_f5Fd1BYCvqYAQ9D2jypVt8ScXw55FkX9SOeRA8Co,15812
3
- spiderforce4ai-2.5.7.dist-info/METADATA,sha256=F4raYf8aoAlbV7f-YFkCNWfoOlhv2q7voSLzQF9k6s0,9012
4
- spiderforce4ai-2.5.7.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
- spiderforce4ai-2.5.7.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
- spiderforce4ai-2.5.7.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
- spiderforce4ai-2.5.7.dist-info/RECORD,,