spiderforce4ai 0.1.9__py3-none-any.whl → 1.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- spiderforce4ai/__init__.py +62 -28
- {spiderforce4ai-0.1.9.dist-info → spiderforce4ai-1.0.dist-info}/METADATA +1 -1
- spiderforce4ai-1.0.dist-info/RECORD +5 -0
- spiderforce4ai-0.1.9.dist-info/RECORD +0 -5
- {spiderforce4ai-0.1.9.dist-info → spiderforce4ai-1.0.dist-info}/WHEEL +0 -0
- {spiderforce4ai-0.1.9.dist-info → spiderforce4ai-1.0.dist-info}/top_level.txt +0 -0
spiderforce4ai/__init__.py
CHANGED
@@ -519,24 +519,41 @@ class SpiderForce4AI:
|
|
519
519
|
# Identify failed URLs
|
520
520
|
failed_results = [r for r in initial_results if r.status == "failed"]
|
521
521
|
|
522
|
-
#
|
522
|
+
# Calculate initial failure ratio
|
523
|
+
initial_failed = len(failed_results)
|
524
|
+
total_urls = len(urls)
|
525
|
+
failure_ratio = (initial_failed / total_urls) * 100
|
526
|
+
|
527
|
+
# Retry failed URLs if ratio is acceptable
|
523
528
|
if failed_results:
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
529
|
+
if failure_ratio > 20:
|
530
|
+
console.print(f"\n[red]Failure ratio too high ({failure_ratio:.1f}%) - aborting retry due to possible server overload[/red]")
|
531
|
+
results = initial_results
|
532
|
+
else:
|
533
|
+
retry_results = await self._retry_failed_urls(failed_results, config, progress)
|
534
|
+
# Replace failed results with retry results
|
535
|
+
results = [r for r in initial_results if r.status == "success"] + retry_results
|
528
536
|
else:
|
529
537
|
results = initial_results
|
530
538
|
|
531
539
|
# Save final report
|
532
540
|
await self._save_report(config)
|
533
541
|
|
534
|
-
#
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
console.print(f"
|
542
|
+
# Calculate final statistics
|
543
|
+
final_successful = len([r for r in results if r.status == "success"])
|
544
|
+
final_failed = len([r for r in results if r.status == "failed"])
|
545
|
+
|
546
|
+
# Print detailed summary
|
547
|
+
console.print(f"\n[green]Crawling Summary:[/green]")
|
548
|
+
console.print(f"Total URLs processed: {total_urls}")
|
549
|
+
console.print(f"Initial failures: {initial_failed} ({failure_ratio:.1f}%)")
|
550
|
+
console.print(f"Final results:")
|
551
|
+
console.print(f" ✓ Successful: {final_successful}")
|
552
|
+
console.print(f" ✗ Failed: {final_failed}")
|
553
|
+
|
554
|
+
if initial_failed > 0:
|
555
|
+
retry_successful = initial_failed - final_failed
|
556
|
+
console.print(f"Retry success rate: {retry_successful}/{initial_failed} ({(retry_successful/initial_failed)*100:.1f}%)")
|
540
557
|
|
541
558
|
if config.report_file:
|
542
559
|
console.print(f"📊 Report saved to: {config.report_file}")
|
@@ -624,25 +641,42 @@ class SpiderForce4AI:
|
|
624
641
|
self._save_report_sync(results, config)
|
625
642
|
print(f"\nReport saved to: {config.report_file}")
|
626
643
|
|
627
|
-
#
|
644
|
+
# Calculate initial failure statistics
|
628
645
|
failed_results = [r for r in results if r.status == "failed"]
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
new_result = _process_url_parallel((result.url, self.base_url, config))
|
633
|
-
if new_result.status == "success":
|
634
|
-
console.print(f"[green]✓ Retry successful: {result.url}[/green]")
|
635
|
-
# Replace the failed result with the successful retry
|
636
|
-
results[results.index(result)] = new_result
|
637
|
-
else:
|
638
|
-
console.print(f"[red]✗ Retry failed: {result.url} - {new_result.error}[/red]")
|
646
|
+
initial_failed = len(failed_results)
|
647
|
+
total_urls = len(urls)
|
648
|
+
failure_ratio = (initial_failed / total_urls) * 100
|
639
649
|
|
640
|
-
#
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
650
|
+
# Retry failed URLs if ratio is acceptable
|
651
|
+
if failed_results:
|
652
|
+
if failure_ratio > 20:
|
653
|
+
console.print(f"\n[red]Failure ratio too high ({failure_ratio:.1f}%) - aborting retry due to possible server overload[/red]")
|
654
|
+
else:
|
655
|
+
console.print("\n[yellow]Retrying failed URLs...[/yellow]")
|
656
|
+
for result in failed_results:
|
657
|
+
new_result = _process_url_parallel((result.url, self.base_url, config))
|
658
|
+
if new_result.status == "success":
|
659
|
+
console.print(f"[green]✓ Retry successful: {result.url}[/green]")
|
660
|
+
# Replace the failed result with the successful retry
|
661
|
+
results[results.index(result)] = new_result
|
662
|
+
else:
|
663
|
+
console.print(f"[red]✗ Retry failed: {result.url} - {new_result.error}[/red]")
|
664
|
+
|
665
|
+
# Calculate final statistics
|
666
|
+
final_successful = len([r for r in results if r.status == "success"])
|
667
|
+
final_failed = len([r for r in results if r.status == "failed"])
|
668
|
+
|
669
|
+
# Print detailed summary
|
670
|
+
console.print(f"\n[green]Crawling Summary:[/green]")
|
671
|
+
console.print(f"Total URLs processed: {total_urls}")
|
672
|
+
console.print(f"Initial failures: {initial_failed} ({failure_ratio:.1f}%)")
|
673
|
+
console.print(f"Final results:")
|
674
|
+
console.print(f" ✓ Successful: {final_successful}")
|
675
|
+
console.print(f" ✗ Failed: {final_failed}")
|
676
|
+
|
677
|
+
if initial_failed > 0:
|
678
|
+
retry_successful = initial_failed - final_failed
|
679
|
+
console.print(f"Retry success rate: {retry_successful}/{initial_failed} ({(retry_successful/initial_failed)*100:.1f}%)")
|
646
680
|
|
647
681
|
return results
|
648
682
|
|
@@ -0,0 +1,5 @@
|
|
1
|
+
spiderforce4ai/__init__.py,sha256=8WEcryB8fckf5yIvH55s7a5FtxvK_AhXdi_dyaqqing,27929
|
2
|
+
spiderforce4ai-1.0.dist-info/METADATA,sha256=VqydJoQcHkzvIhYTPeH3j8ZSHK-lGbo1xmZwQZk6w2s,7769
|
3
|
+
spiderforce4ai-1.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
4
|
+
spiderforce4ai-1.0.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
5
|
+
spiderforce4ai-1.0.dist-info/RECORD,,
|
@@ -1,5 +0,0 @@
|
|
1
|
-
spiderforce4ai/__init__.py,sha256=oU_UIdzsQxExaVgD7NCaVm4G-9zMtKGnREfY6xL1uFY,26041
|
2
|
-
spiderforce4ai-0.1.9.dist-info/METADATA,sha256=poV1i_-H3AgzFhs9juRDJSfaWO0gVePb5JXN7ynL4Y4,7771
|
3
|
-
spiderforce4ai-0.1.9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
4
|
-
spiderforce4ai-0.1.9.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
5
|
-
spiderforce4ai-0.1.9.dist-info/RECORD,,
|
File without changes
|
File without changes
|