spiderforce4ai 0.1.9__py3-none-any.whl → 1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spiderforce4ai/__init__.py +62 -28
- {spiderforce4ai-0.1.9.dist-info → spiderforce4ai-1.0.dist-info}/METADATA +1 -1
- spiderforce4ai-1.0.dist-info/RECORD +5 -0
- spiderforce4ai-0.1.9.dist-info/RECORD +0 -5
- {spiderforce4ai-0.1.9.dist-info → spiderforce4ai-1.0.dist-info}/WHEEL +0 -0
- {spiderforce4ai-0.1.9.dist-info → spiderforce4ai-1.0.dist-info}/top_level.txt +0 -0
spiderforce4ai/__init__.py
CHANGED
@@ -519,24 +519,41 @@ class SpiderForce4AI:
|
|
519
519
|
# Identify failed URLs
|
520
520
|
failed_results = [r for r in initial_results if r.status == "failed"]
|
521
521
|
|
522
|
-
#
|
522
|
+
# Calculate initial failure ratio
|
523
|
+
initial_failed = len(failed_results)
|
524
|
+
total_urls = len(urls)
|
525
|
+
failure_ratio = (initial_failed / total_urls) * 100
|
526
|
+
|
527
|
+
# Retry failed URLs if ratio is acceptable
|
523
528
|
if failed_results:
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
529
|
+
if failure_ratio > 20:
|
530
|
+
console.print(f"\n[red]Failure ratio too high ({failure_ratio:.1f}%) - aborting retry due to possible server overload[/red]")
|
531
|
+
results = initial_results
|
532
|
+
else:
|
533
|
+
retry_results = await self._retry_failed_urls(failed_results, config, progress)
|
534
|
+
# Replace failed results with retry results
|
535
|
+
results = [r for r in initial_results if r.status == "success"] + retry_results
|
528
536
|
else:
|
529
537
|
results = initial_results
|
530
538
|
|
531
539
|
# Save final report
|
532
540
|
await self._save_report(config)
|
533
541
|
|
534
|
-
#
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
console.print(f"
|
542
|
+
# Calculate final statistics
|
543
|
+
final_successful = len([r for r in results if r.status == "success"])
|
544
|
+
final_failed = len([r for r in results if r.status == "failed"])
|
545
|
+
|
546
|
+
# Print detailed summary
|
547
|
+
console.print(f"\n[green]Crawling Summary:[/green]")
|
548
|
+
console.print(f"Total URLs processed: {total_urls}")
|
549
|
+
console.print(f"Initial failures: {initial_failed} ({failure_ratio:.1f}%)")
|
550
|
+
console.print(f"Final results:")
|
551
|
+
console.print(f" ✓ Successful: {final_successful}")
|
552
|
+
console.print(f" ✗ Failed: {final_failed}")
|
553
|
+
|
554
|
+
if initial_failed > 0:
|
555
|
+
retry_successful = initial_failed - final_failed
|
556
|
+
console.print(f"Retry success rate: {retry_successful}/{initial_failed} ({(retry_successful/initial_failed)*100:.1f}%)")
|
540
557
|
|
541
558
|
if config.report_file:
|
542
559
|
console.print(f"📊 Report saved to: {config.report_file}")
|
@@ -624,25 +641,42 @@ class SpiderForce4AI:
|
|
624
641
|
self._save_report_sync(results, config)
|
625
642
|
print(f"\nReport saved to: {config.report_file}")
|
626
643
|
|
627
|
-
#
|
644
|
+
# Calculate initial failure statistics
|
628
645
|
failed_results = [r for r in results if r.status == "failed"]
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
new_result = _process_url_parallel((result.url, self.base_url, config))
|
633
|
-
if new_result.status == "success":
|
634
|
-
console.print(f"[green]✓ Retry successful: {result.url}[/green]")
|
635
|
-
# Replace the failed result with the successful retry
|
636
|
-
results[results.index(result)] = new_result
|
637
|
-
else:
|
638
|
-
console.print(f"[red]✗ Retry failed: {result.url} - {new_result.error}[/red]")
|
646
|
+
initial_failed = len(failed_results)
|
647
|
+
total_urls = len(urls)
|
648
|
+
failure_ratio = (initial_failed / total_urls) * 100
|
639
649
|
|
640
|
-
#
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
650
|
+
# Retry failed URLs if ratio is acceptable
|
651
|
+
if failed_results:
|
652
|
+
if failure_ratio > 20:
|
653
|
+
console.print(f"\n[red]Failure ratio too high ({failure_ratio:.1f}%) - aborting retry due to possible server overload[/red]")
|
654
|
+
else:
|
655
|
+
console.print("\n[yellow]Retrying failed URLs...[/yellow]")
|
656
|
+
for result in failed_results:
|
657
|
+
new_result = _process_url_parallel((result.url, self.base_url, config))
|
658
|
+
if new_result.status == "success":
|
659
|
+
console.print(f"[green]✓ Retry successful: {result.url}[/green]")
|
660
|
+
# Replace the failed result with the successful retry
|
661
|
+
results[results.index(result)] = new_result
|
662
|
+
else:
|
663
|
+
console.print(f"[red]✗ Retry failed: {result.url} - {new_result.error}[/red]")
|
664
|
+
|
665
|
+
# Calculate final statistics
|
666
|
+
final_successful = len([r for r in results if r.status == "success"])
|
667
|
+
final_failed = len([r for r in results if r.status == "failed"])
|
668
|
+
|
669
|
+
# Print detailed summary
|
670
|
+
console.print(f"\n[green]Crawling Summary:[/green]")
|
671
|
+
console.print(f"Total URLs processed: {total_urls}")
|
672
|
+
console.print(f"Initial failures: {initial_failed} ({failure_ratio:.1f}%)")
|
673
|
+
console.print(f"Final results:")
|
674
|
+
console.print(f" ✓ Successful: {final_successful}")
|
675
|
+
console.print(f" ✗ Failed: {final_failed}")
|
676
|
+
|
677
|
+
if initial_failed > 0:
|
678
|
+
retry_successful = initial_failed - final_failed
|
679
|
+
console.print(f"Retry success rate: {retry_successful}/{initial_failed} ({(retry_successful/initial_failed)*100:.1f}%)")
|
646
680
|
|
647
681
|
return results
|
648
682
|
|
@@ -0,0 +1,5 @@
|
|
1
|
+
spiderforce4ai/__init__.py,sha256=8WEcryB8fckf5yIvH55s7a5FtxvK_AhXdi_dyaqqing,27929
|
2
|
+
spiderforce4ai-1.0.dist-info/METADATA,sha256=VqydJoQcHkzvIhYTPeH3j8ZSHK-lGbo1xmZwQZk6w2s,7769
|
3
|
+
spiderforce4ai-1.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
4
|
+
spiderforce4ai-1.0.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
5
|
+
spiderforce4ai-1.0.dist-info/RECORD,,
|
@@ -1,5 +0,0 @@
|
|
1
|
-
spiderforce4ai/__init__.py,sha256=oU_UIdzsQxExaVgD7NCaVm4G-9zMtKGnREfY6xL1uFY,26041
|
2
|
-
spiderforce4ai-0.1.9.dist-info/METADATA,sha256=poV1i_-H3AgzFhs9juRDJSfaWO0gVePb5JXN7ynL4Y4,7771
|
3
|
-
spiderforce4ai-0.1.9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
4
|
-
spiderforce4ai-0.1.9.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
5
|
-
spiderforce4ai-0.1.9.dist-info/RECORD,,
|
File without changes
|
File without changes
|