spiderforce4ai 0.1.9__py3-none-any.whl → 1.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -519,24 +519,41 @@ class SpiderForce4AI:
519
519
  # Identify failed URLs
520
520
  failed_results = [r for r in initial_results if r.status == "failed"]
521
521
 
522
- # Retry failed URLs
522
+ # Calculate initial failure ratio
523
+ initial_failed = len(failed_results)
524
+ total_urls = len(urls)
525
+ failure_ratio = (initial_failed / total_urls) * 100
526
+
527
+ # Retry failed URLs if ratio is acceptable
523
528
  if failed_results:
524
- retry_results = await self._retry_failed_urls(failed_results, config, progress)
525
-
526
- # Replace failed results with retry results
527
- results = [r for r in initial_results if r.status == "success"] + retry_results
529
+ if failure_ratio > 20:
530
+ console.print(f"\n[red]Failure ratio too high ({failure_ratio:.1f}%) - aborting retry due to possible server overload[/red]")
531
+ results = initial_results
532
+ else:
533
+ retry_results = await self._retry_failed_urls(failed_results, config, progress)
534
+ # Replace failed results with retry results
535
+ results = [r for r in initial_results if r.status == "success"] + retry_results
528
536
  else:
529
537
  results = initial_results
530
538
 
531
539
  # Save final report
532
540
  await self._save_report(config)
533
541
 
534
- # Print final summary
535
- successful = len([r for r in results if r.status == "success"])
536
- failed = len([r for r in results if r.status == "failed"])
537
- console.print(f"\n[green]Final crawling results:[/green]")
538
- console.print(f"✓ Successful: {successful}")
539
- console.print(f" Failed: {failed}")
542
+ # Calculate final statistics
543
+ final_successful = len([r for r in results if r.status == "success"])
544
+ final_failed = len([r for r in results if r.status == "failed"])
545
+
546
+ # Print detailed summary
547
+ console.print(f"\n[green]Crawling Summary:[/green]")
548
+ console.print(f"Total URLs processed: {total_urls}")
549
+ console.print(f"Initial failures: {initial_failed} ({failure_ratio:.1f}%)")
550
+ console.print(f"Final results:")
551
+ console.print(f" ✓ Successful: {final_successful}")
552
+ console.print(f" ✗ Failed: {final_failed}")
553
+
554
+ if initial_failed > 0:
555
+ retry_successful = initial_failed - final_failed
556
+ console.print(f"Retry success rate: {retry_successful}/{initial_failed} ({(retry_successful/initial_failed)*100:.1f}%)")
540
557
 
541
558
  if config.report_file:
542
559
  console.print(f"📊 Report saved to: {config.report_file}")
@@ -624,25 +641,42 @@ class SpiderForce4AI:
624
641
  self._save_report_sync(results, config)
625
642
  print(f"\nReport saved to: {config.report_file}")
626
643
 
627
- # Identify failed URLs and retry them
644
+ # Calculate initial failure statistics
628
645
  failed_results = [r for r in results if r.status == "failed"]
629
- if failed_results:
630
- console.print("\n[yellow]Retrying failed URLs...[/yellow]")
631
- for result in failed_results:
632
- new_result = _process_url_parallel((result.url, self.base_url, config))
633
- if new_result.status == "success":
634
- console.print(f"[green]✓ Retry successful: {result.url}[/green]")
635
- # Replace the failed result with the successful retry
636
- results[results.index(result)] = new_result
637
- else:
638
- console.print(f"[red]✗ Retry failed: {result.url} - {new_result.error}[/red]")
646
+ initial_failed = len(failed_results)
647
+ total_urls = len(urls)
648
+ failure_ratio = (initial_failed / total_urls) * 100
639
649
 
640
- # Print final summary
641
- successful = len([r for r in results if r.status == "success"])
642
- failed = len([r for r in results if r.status == "failed"])
643
- console.print(f"\n[green]Final crawling results:[/green]")
644
- console.print(f"✓ Successful: {successful}")
645
- console.print(f" Failed: {failed}")
650
+ # Retry failed URLs if ratio is acceptable
651
+ if failed_results:
652
+ if failure_ratio > 20:
653
+ console.print(f"\n[red]Failure ratio too high ({failure_ratio:.1f}%) - aborting retry due to possible server overload[/red]")
654
+ else:
655
+ console.print("\n[yellow]Retrying failed URLs...[/yellow]")
656
+ for result in failed_results:
657
+ new_result = _process_url_parallel((result.url, self.base_url, config))
658
+ if new_result.status == "success":
659
+ console.print(f"[green]✓ Retry successful: {result.url}[/green]")
660
+ # Replace the failed result with the successful retry
661
+ results[results.index(result)] = new_result
662
+ else:
663
+ console.print(f"[red]✗ Retry failed: {result.url} - {new_result.error}[/red]")
664
+
665
+ # Calculate final statistics
666
+ final_successful = len([r for r in results if r.status == "success"])
667
+ final_failed = len([r for r in results if r.status == "failed"])
668
+
669
+ # Print detailed summary
670
+ console.print(f"\n[green]Crawling Summary:[/green]")
671
+ console.print(f"Total URLs processed: {total_urls}")
672
+ console.print(f"Initial failures: {initial_failed} ({failure_ratio:.1f}%)")
673
+ console.print(f"Final results:")
674
+ console.print(f" ✓ Successful: {final_successful}")
675
+ console.print(f" ✗ Failed: {final_failed}")
676
+
677
+ if initial_failed > 0:
678
+ retry_successful = initial_failed - final_failed
679
+ console.print(f"Retry success rate: {retry_successful}/{initial_failed} ({(retry_successful/initial_failed)*100:.1f}%)")
646
680
 
647
681
  return results
648
682
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 0.1.9
3
+ Version: 1.0
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
@@ -0,0 +1,5 @@
1
+ spiderforce4ai/__init__.py,sha256=8WEcryB8fckf5yIvH55s7a5FtxvK_AhXdi_dyaqqing,27929
2
+ spiderforce4ai-1.0.dist-info/METADATA,sha256=VqydJoQcHkzvIhYTPeH3j8ZSHK-lGbo1xmZwQZk6w2s,7769
3
+ spiderforce4ai-1.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
4
+ spiderforce4ai-1.0.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
5
+ spiderforce4ai-1.0.dist-info/RECORD,,
@@ -1,5 +0,0 @@
1
- spiderforce4ai/__init__.py,sha256=oU_UIdzsQxExaVgD7NCaVm4G-9zMtKGnREfY6xL1uFY,26041
2
- spiderforce4ai-0.1.9.dist-info/METADATA,sha256=poV1i_-H3AgzFhs9juRDJSfaWO0gVePb5JXN7ynL4Y4,7771
3
- spiderforce4ai-0.1.9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
4
- spiderforce4ai-0.1.9.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
5
- spiderforce4ai-0.1.9.dist-info/RECORD,,