spiderforce4ai 1.2__py3-none-any.whl → 1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -350,17 +350,23 @@ class SpiderForce4AI:
350
350
 
351
351
  def _save_report_sync(self, results: List[CrawlResult], config: CrawlConfig) -> None:
352
352
  """Save crawl report synchronously."""
353
+ # Separate successful and failed results
354
+ successful_results = [r for r in results if r.status == "success"]
355
+ failed_results = [r for r in results if r.status == "failed"]
356
+
357
+ # Create report with only final state
353
358
  report = {
354
359
  "timestamp": datetime.now().isoformat(),
355
360
  "config": config.to_dict(),
356
361
  "results": {
357
- "successful": [asdict(r) for r in results if r.status == "success"],
358
- "failed": [asdict(r) for r in results if r.status == "failed"]
362
+ "successful": [asdict(r) for r in successful_results],
363
+ "failed": [asdict(r) for r in failed_results] # Only truly failed URLs after retries
359
364
  },
360
365
  "summary": {
361
366
  "total": len(results),
362
- "successful": len([r for r in results if r.status == "success"]),
363
- "failed": len([r for r in results if r.status == "failed"])
367
+ "successful": len(successful_results),
368
+ "failed": len(failed_results),
369
+ "retry_info": getattr(self, '_retry_stats', {}) # Include retry statistics if available
364
370
  }
365
371
  }
366
372
 
@@ -372,17 +378,22 @@ class SpiderForce4AI:
372
378
  if not config.report_file:
373
379
  return
374
380
 
381
+ # Separate successful and failed results
382
+ successful_results = [r for r in self.crawl_results if r.status == "success"]
383
+ failed_results = [r for r in self.crawl_results if r.status == "failed"]
384
+
375
385
  report = {
376
386
  "timestamp": datetime.now().isoformat(),
377
387
  "config": config.to_dict(),
378
388
  "results": {
379
- "successful": [asdict(r) for r in self.crawl_results if r.status == "success"],
380
- "failed": [asdict(r) for r in self.crawl_results if r.status == "failed"]
389
+ "successful": [asdict(r) for r in successful_results],
390
+ "failed": [asdict(r) for r in failed_results] # Only truly failed URLs after retries
381
391
  },
382
392
  "summary": {
383
393
  "total": len(self.crawl_results),
384
- "successful": len([r for r in self.crawl_results if r.status == "success"]),
385
- "failed": len([r for r in self.crawl_results if r.status == "failed"])
394
+ "successful": len(successful_results),
395
+ "failed": len(failed_results),
396
+ "retry_info": getattr(self, '_retry_stats', {}) # Include retry statistics if available
386
397
  }
387
398
  }
388
399
 
@@ -545,10 +556,7 @@ class SpiderForce4AI:
545
556
  else:
546
557
  results = initial_results
547
558
 
548
- # Save final report
549
- await self._save_report(config)
550
-
551
- # Calculate final statistics
559
+ # Calculate final statistics before saving report
552
560
  final_successful = len([r for r in results if r.status == "success"])
553
561
  final_failed = len([r for r in results if r.status == "failed"])
554
562
 
@@ -564,7 +572,15 @@ class SpiderForce4AI:
564
572
  retry_successful = initial_failed - final_failed
565
573
  console.print(f"Retry success rate: {retry_successful}/{initial_failed} ({(retry_successful/initial_failed)*100:.1f}%)")
566
574
 
567
- if config.report_file:
575
+ # Save final report after all retries are complete
576
+ if config.save_reports:
577
+ self._retry_stats = {
578
+ "initial_failures": initial_failed,
579
+ "failure_ratio": failure_ratio,
580
+ "retry_successful": retry_successful if initial_failed > 0 else 0,
581
+ "retry_failed": final_failed
582
+ }
583
+ await self._save_report(config)
568
584
  console.print(f"📊 Report saved to: {config.report_file}")
569
585
 
570
586
  return results
@@ -645,11 +661,6 @@ class SpiderForce4AI:
645
661
  status = "✓" if result.status == "success" else "✗"
646
662
  progress.description = f"Last: {status} {result.url}"
647
663
 
648
- # Save final report
649
- if config.report_file:
650
- self._save_report_sync(results, config)
651
- print(f"\nReport saved to: {config.report_file}")
652
-
653
664
  # Calculate initial failure statistics
654
665
  failed_results = [r for r in results if r.status == "failed"]
655
666
  initial_failed = len(failed_results)
@@ -704,6 +715,17 @@ class SpiderForce4AI:
704
715
  retry_successful = initial_failed - final_failed
705
716
  console.print(f"Retry success rate: {retry_successful}/{initial_failed} ({(retry_successful/initial_failed)*100:.1f}%)")
706
717
 
718
+ # Save final report after all retries are complete
719
+ if config.save_reports:
720
+ self._retry_stats = {
721
+ "initial_failures": initial_failed,
722
+ "failure_ratio": failure_ratio,
723
+ "retry_successful": retry_successful if initial_failed > 0 else 0,
724
+ "retry_failed": final_failed
725
+ }
726
+ self._save_report_sync(results, config)
727
+ console.print(f"📊 Report saved to: {config.report_file}")
728
+
707
729
  return results
708
730
 
709
731
  async def __aenter__(self):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 1.2
3
+ Version: 1.4
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
@@ -0,0 +1,5 @@
1
+ spiderforce4ai/__init__.py,sha256=7EMEEfFY3WLq58-vnK1Yhcb1trF2ZXU-Ny3licz45Yk,30585
2
+ spiderforce4ai-1.4.dist-info/METADATA,sha256=7GRBz_bTtXOQ2N-gHRPJFEWW8mmOB_1gwrJCf-el8LM,7183
3
+ spiderforce4ai-1.4.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
4
+ spiderforce4ai-1.4.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
5
+ spiderforce4ai-1.4.dist-info/RECORD,,
@@ -1,5 +0,0 @@
1
- spiderforce4ai/__init__.py,sha256=BHsdGGxEyS4RHbHyTnYRBE4oRy2i1pGSrEt_LT4vKWc,29384
2
- spiderforce4ai-1.2.dist-info/METADATA,sha256=I5gmglzuRXSKwRc0lWk2Vslnx_4PIffIwjJ-SOTeYpU,7183
3
- spiderforce4ai-1.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
4
- spiderforce4ai-1.2.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
5
- spiderforce4ai-1.2.dist-info/RECORD,,