spiderforce4ai 0.1.9__py3-none-any.whl → 1.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -445,7 +445,11 @@ class SpiderForce4AI:
445
445
  if not failed_results:
446
446
  return []
447
447
 
448
- console.print("\n[yellow]Retrying failed URLs...[/yellow]")
448
+ failed_count = len(failed_results)
449
+ total_count = len([r for r in self.crawl_results])
450
+ failure_ratio = (failed_count / total_count) * 100
451
+
452
+ console.print(f"\n[yellow]Retrying failed URLs: {failed_count} ({failure_ratio:.1f}% failed)[/yellow]")
449
453
  retry_results = []
450
454
 
451
455
  # Create a new progress bar if one wasn't provided
@@ -519,24 +523,41 @@ class SpiderForce4AI:
519
523
  # Identify failed URLs
520
524
  failed_results = [r for r in initial_results if r.status == "failed"]
521
525
 
522
- # Retry failed URLs
526
+ # Calculate initial failure ratio
527
+ initial_failed = len(failed_results)
528
+ total_urls = len(urls)
529
+ failure_ratio = (initial_failed / total_urls) * 100
530
+
531
+ # Retry failed URLs if ratio is acceptable
523
532
  if failed_results:
524
- retry_results = await self._retry_failed_urls(failed_results, config, progress)
525
-
526
- # Replace failed results with retry results
527
- results = [r for r in initial_results if r.status == "success"] + retry_results
533
+ if failure_ratio > 20:
534
+ console.print(f"\n[red]Failure ratio too high ({failure_ratio:.1f}%) - aborting retry due to possible server overload[/red]")
535
+ results = initial_results
536
+ else:
537
+ retry_results = await self._retry_failed_urls(failed_results, config, progress)
538
+ # Replace failed results with retry results
539
+ results = [r for r in initial_results if r.status == "success"] + retry_results
528
540
  else:
529
541
  results = initial_results
530
542
 
531
543
  # Save final report
532
544
  await self._save_report(config)
533
545
 
534
- # Print final summary
535
- successful = len([r for r in results if r.status == "success"])
536
- failed = len([r for r in results if r.status == "failed"])
537
- console.print(f"\n[green]Final crawling results:[/green]")
538
- console.print(f"✓ Successful: {successful}")
539
- console.print(f" Failed: {failed}")
546
+ # Calculate final statistics
547
+ final_successful = len([r for r in results if r.status == "success"])
548
+ final_failed = len([r for r in results if r.status == "failed"])
549
+
550
+ # Print detailed summary
551
+ console.print(f"\n[green]Crawling Summary:[/green]")
552
+ console.print(f"Total URLs processed: {total_urls}")
553
+ console.print(f"Initial failures: {initial_failed} ({failure_ratio:.1f}%)")
554
+ console.print(f"Final results:")
555
+ console.print(f" ✓ Successful: {final_successful}")
556
+ console.print(f" ✗ Failed: {final_failed}")
557
+
558
+ if initial_failed > 0:
559
+ retry_successful = initial_failed - final_failed
560
+ console.print(f"Retry success rate: {retry_successful}/{initial_failed} ({(retry_successful/initial_failed)*100:.1f}%)")
540
561
 
541
562
  if config.report_file:
542
563
  console.print(f"📊 Report saved to: {config.report_file}")
@@ -624,25 +645,44 @@ class SpiderForce4AI:
624
645
  self._save_report_sync(results, config)
625
646
  print(f"\nReport saved to: {config.report_file}")
626
647
 
627
- # Identify failed URLs and retry them
648
+ # Calculate initial failure statistics
628
649
  failed_results = [r for r in results if r.status == "failed"]
629
- if failed_results:
630
- console.print("\n[yellow]Retrying failed URLs...[/yellow]")
631
- for result in failed_results:
632
- new_result = _process_url_parallel((result.url, self.base_url, config))
633
- if new_result.status == "success":
634
- console.print(f"[green]✓ Retry successful: {result.url}[/green]")
635
- # Replace the failed result with the successful retry
636
- results[results.index(result)] = new_result
637
- else:
638
- console.print(f"[red]✗ Retry failed: {result.url} - {new_result.error}[/red]")
650
+ initial_failed = len(failed_results)
651
+ total_urls = len(urls)
652
+ failure_ratio = (initial_failed / total_urls) * 100
639
653
 
640
- # Print final summary
641
- successful = len([r for r in results if r.status == "success"])
642
- failed = len([r for r in results if r.status == "failed"])
643
- console.print(f"\n[green]Final crawling results:[/green]")
644
- console.print(f"✓ Successful: {successful}")
645
- console.print(f"✗ Failed: {failed}")
654
+ # Retry failed URLs if ratio is acceptable
655
+ if failed_results:
656
+ if failure_ratio > 20:
657
+ console.print(f"\n[red]Failure ratio too high ({failure_ratio:.1f}%) - aborting retry due to possible server overload[/red]")
658
+ else:
659
+ failed_count = len(failed_results)
660
+ failure_ratio = (failed_count / total_urls) * 100
661
+ console.print(f"\n[yellow]Retrying failed URLs: {failed_count} ({failure_ratio:.1f}% failed)[/yellow]")
662
+ for result in failed_results:
663
+ new_result = _process_url_parallel((result.url, self.base_url, config))
664
+ if new_result.status == "success":
665
+ console.print(f"[green]✓ Retry successful: {result.url}[/green]")
666
+ # Replace the failed result with the successful retry
667
+ results[results.index(result)] = new_result
668
+ else:
669
+ console.print(f"[red]✗ Retry failed: {result.url} - {new_result.error}[/red]")
670
+
671
+ # Calculate final statistics
672
+ final_successful = len([r for r in results if r.status == "success"])
673
+ final_failed = len([r for r in results if r.status == "failed"])
674
+
675
+ # Print detailed summary
676
+ console.print(f"\n[green]Crawling Summary:[/green]")
677
+ console.print(f"Total URLs processed: {total_urls}")
678
+ console.print(f"Initial failures: {initial_failed} ({failure_ratio:.1f}%)")
679
+ console.print(f"Final results:")
680
+ console.print(f" ✓ Successful: {final_successful}")
681
+ console.print(f" ✗ Failed: {final_failed}")
682
+
683
+ if initial_failed > 0:
684
+ retry_successful = initial_failed - final_failed
685
+ console.print(f"Retry success rate: {retry_successful}/{initial_failed} ({(retry_successful/initial_failed)*100:.1f}%)")
646
686
 
647
687
  return results
648
688
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 0.1.9
3
+ Version: 1.1
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
@@ -0,0 +1,5 @@
1
+ spiderforce4ai/__init__.py,sha256=lCviRhfLngSMehFJZwyK4LirPwbWEyZ0RJjCt5FkBcY,28304
2
+ spiderforce4ai-1.1.dist-info/METADATA,sha256=lQfqXn0ifJOmOmLkgr8YTSYUFiu6-HS3YsRD0togylo,7769
3
+ spiderforce4ai-1.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
4
+ spiderforce4ai-1.1.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
5
+ spiderforce4ai-1.1.dist-info/RECORD,,
@@ -1,5 +0,0 @@
1
- spiderforce4ai/__init__.py,sha256=oU_UIdzsQxExaVgD7NCaVm4G-9zMtKGnREfY6xL1uFY,26041
2
- spiderforce4ai-0.1.9.dist-info/METADATA,sha256=poV1i_-H3AgzFhs9juRDJSfaWO0gVePb5JXN7ynL4Y4,7771
3
- spiderforce4ai-0.1.9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
4
- spiderforce4ai-0.1.9.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
5
- spiderforce4ai-0.1.9.dist-info/RECORD,,