spiderforce4ai 0.1.9__py3-none-any.whl → 1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -519,24 +519,41 @@ class SpiderForce4AI:
519
519
  # Identify failed URLs
520
520
  failed_results = [r for r in initial_results if r.status == "failed"]
521
521
 
522
- # Retry failed URLs
522
+ # Calculate initial failure ratio
523
+ initial_failed = len(failed_results)
524
+ total_urls = len(urls)
525
+ failure_ratio = (initial_failed / total_urls) * 100
526
+
527
+ # Retry failed URLs if ratio is acceptable
523
528
  if failed_results:
524
- retry_results = await self._retry_failed_urls(failed_results, config, progress)
525
-
526
- # Replace failed results with retry results
527
- results = [r for r in initial_results if r.status == "success"] + retry_results
529
+ if failure_ratio > 20:
530
+ console.print(f"\n[red]Failure ratio too high ({failure_ratio:.1f}%) - aborting retry due to possible server overload[/red]")
531
+ results = initial_results
532
+ else:
533
+ retry_results = await self._retry_failed_urls(failed_results, config, progress)
534
+ # Replace failed results with retry results
535
+ results = [r for r in initial_results if r.status == "success"] + retry_results
528
536
  else:
529
537
  results = initial_results
530
538
 
531
539
  # Save final report
532
540
  await self._save_report(config)
533
541
 
534
- # Print final summary
535
- successful = len([r for r in results if r.status == "success"])
536
- failed = len([r for r in results if r.status == "failed"])
537
- console.print(f"\n[green]Final crawling results:[/green]")
538
- console.print(f"✓ Successful: {successful}")
539
- console.print(f" Failed: {failed}")
542
+ # Calculate final statistics
543
+ final_successful = len([r for r in results if r.status == "success"])
544
+ final_failed = len([r for r in results if r.status == "failed"])
545
+
546
+ # Print detailed summary
547
+ console.print(f"\n[green]Crawling Summary:[/green]")
548
+ console.print(f"Total URLs processed: {total_urls}")
549
+ console.print(f"Initial failures: {initial_failed} ({failure_ratio:.1f}%)")
550
+ console.print(f"Final results:")
551
+ console.print(f" ✓ Successful: {final_successful}")
552
+ console.print(f" ✗ Failed: {final_failed}")
553
+
554
+ if initial_failed > 0:
555
+ retry_successful = initial_failed - final_failed
556
+ console.print(f"Retry success rate: {retry_successful}/{initial_failed} ({(retry_successful/initial_failed)*100:.1f}%)")
540
557
 
541
558
  if config.report_file:
542
559
  console.print(f"📊 Report saved to: {config.report_file}")
@@ -624,25 +641,42 @@ class SpiderForce4AI:
624
641
  self._save_report_sync(results, config)
625
642
  print(f"\nReport saved to: {config.report_file}")
626
643
 
627
- # Identify failed URLs and retry them
644
+ # Calculate initial failure statistics
628
645
  failed_results = [r for r in results if r.status == "failed"]
629
- if failed_results:
630
- console.print("\n[yellow]Retrying failed URLs...[/yellow]")
631
- for result in failed_results:
632
- new_result = _process_url_parallel((result.url, self.base_url, config))
633
- if new_result.status == "success":
634
- console.print(f"[green]✓ Retry successful: {result.url}[/green]")
635
- # Replace the failed result with the successful retry
636
- results[results.index(result)] = new_result
637
- else:
638
- console.print(f"[red]✗ Retry failed: {result.url} - {new_result.error}[/red]")
646
+ initial_failed = len(failed_results)
647
+ total_urls = len(urls)
648
+ failure_ratio = (initial_failed / total_urls) * 100
639
649
 
640
- # Print final summary
641
- successful = len([r for r in results if r.status == "success"])
642
- failed = len([r for r in results if r.status == "failed"])
643
- console.print(f"\n[green]Final crawling results:[/green]")
644
- console.print(f"✓ Successful: {successful}")
645
- console.print(f" Failed: {failed}")
650
+ # Retry failed URLs if ratio is acceptable
651
+ if failed_results:
652
+ if failure_ratio > 20:
653
+ console.print(f"\n[red]Failure ratio too high ({failure_ratio:.1f}%) - aborting retry due to possible server overload[/red]")
654
+ else:
655
+ console.print("\n[yellow]Retrying failed URLs...[/yellow]")
656
+ for result in failed_results:
657
+ new_result = _process_url_parallel((result.url, self.base_url, config))
658
+ if new_result.status == "success":
659
+ console.print(f"[green]✓ Retry successful: {result.url}[/green]")
660
+ # Replace the failed result with the successful retry
661
+ results[results.index(result)] = new_result
662
+ else:
663
+ console.print(f"[red]✗ Retry failed: {result.url} - {new_result.error}[/red]")
664
+
665
+ # Calculate final statistics
666
+ final_successful = len([r for r in results if r.status == "success"])
667
+ final_failed = len([r for r in results if r.status == "failed"])
668
+
669
+ # Print detailed summary
670
+ console.print(f"\n[green]Crawling Summary:[/green]")
671
+ console.print(f"Total URLs processed: {total_urls}")
672
+ console.print(f"Initial failures: {initial_failed} ({failure_ratio:.1f}%)")
673
+ console.print(f"Final results:")
674
+ console.print(f" ✓ Successful: {final_successful}")
675
+ console.print(f" ✗ Failed: {final_failed}")
676
+
677
+ if initial_failed > 0:
678
+ retry_successful = initial_failed - final_failed
679
+ console.print(f"Retry success rate: {retry_successful}/{initial_failed} ({(retry_successful/initial_failed)*100:.1f}%)")
646
680
 
647
681
  return results
648
682
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 0.1.9
3
+ Version: 1.0
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
@@ -0,0 +1,5 @@
1
+ spiderforce4ai/__init__.py,sha256=8WEcryB8fckf5yIvH55s7a5FtxvK_AhXdi_dyaqqing,27929
2
+ spiderforce4ai-1.0.dist-info/METADATA,sha256=VqydJoQcHkzvIhYTPeH3j8ZSHK-lGbo1xmZwQZk6w2s,7769
3
+ spiderforce4ai-1.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
4
+ spiderforce4ai-1.0.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
5
+ spiderforce4ai-1.0.dist-info/RECORD,,
@@ -1,5 +0,0 @@
1
- spiderforce4ai/__init__.py,sha256=oU_UIdzsQxExaVgD7NCaVm4G-9zMtKGnREfY6xL1uFY,26041
2
- spiderforce4ai-0.1.9.dist-info/METADATA,sha256=poV1i_-H3AgzFhs9juRDJSfaWO0gVePb5JXN7ynL4Y4,7771
3
- spiderforce4ai-0.1.9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
4
- spiderforce4ai-0.1.9.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
5
- spiderforce4ai-0.1.9.dist-info/RECORD,,