spiderforce4ai 1.2__py3-none-any.whl → 1.4__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- spiderforce4ai/__init__.py +40 -18
- {spiderforce4ai-1.2.dist-info → spiderforce4ai-1.4.dist-info}/METADATA +1 -1
- spiderforce4ai-1.4.dist-info/RECORD +5 -0
- spiderforce4ai-1.2.dist-info/RECORD +0 -5
- {spiderforce4ai-1.2.dist-info → spiderforce4ai-1.4.dist-info}/WHEEL +0 -0
- {spiderforce4ai-1.2.dist-info → spiderforce4ai-1.4.dist-info}/top_level.txt +0 -0
spiderforce4ai/__init__.py
CHANGED
@@ -350,17 +350,23 @@ class SpiderForce4AI:
|
|
350
350
|
|
351
351
|
def _save_report_sync(self, results: List[CrawlResult], config: CrawlConfig) -> None:
|
352
352
|
"""Save crawl report synchronously."""
|
353
|
+
# Separate successful and failed results
|
354
|
+
successful_results = [r for r in results if r.status == "success"]
|
355
|
+
failed_results = [r for r in results if r.status == "failed"]
|
356
|
+
|
357
|
+
# Create report with only final state
|
353
358
|
report = {
|
354
359
|
"timestamp": datetime.now().isoformat(),
|
355
360
|
"config": config.to_dict(),
|
356
361
|
"results": {
|
357
|
-
"successful": [asdict(r) for r in
|
358
|
-
"failed": [asdict(r) for r in
|
362
|
+
"successful": [asdict(r) for r in successful_results],
|
363
|
+
"failed": [asdict(r) for r in failed_results] # Only truly failed URLs after retries
|
359
364
|
},
|
360
365
|
"summary": {
|
361
366
|
"total": len(results),
|
362
|
-
"successful": len(
|
363
|
-
"failed": len(
|
367
|
+
"successful": len(successful_results),
|
368
|
+
"failed": len(failed_results),
|
369
|
+
"retry_info": getattr(self, '_retry_stats', {}) # Include retry statistics if available
|
364
370
|
}
|
365
371
|
}
|
366
372
|
|
@@ -372,17 +378,22 @@ class SpiderForce4AI:
|
|
372
378
|
if not config.report_file:
|
373
379
|
return
|
374
380
|
|
381
|
+
# Separate successful and failed results
|
382
|
+
successful_results = [r for r in self.crawl_results if r.status == "success"]
|
383
|
+
failed_results = [r for r in self.crawl_results if r.status == "failed"]
|
384
|
+
|
375
385
|
report = {
|
376
386
|
"timestamp": datetime.now().isoformat(),
|
377
387
|
"config": config.to_dict(),
|
378
388
|
"results": {
|
379
|
-
"successful": [asdict(r) for r in
|
380
|
-
"failed": [asdict(r) for r in
|
389
|
+
"successful": [asdict(r) for r in successful_results],
|
390
|
+
"failed": [asdict(r) for r in failed_results] # Only truly failed URLs after retries
|
381
391
|
},
|
382
392
|
"summary": {
|
383
393
|
"total": len(self.crawl_results),
|
384
|
-
"successful": len(
|
385
|
-
"failed": len(
|
394
|
+
"successful": len(successful_results),
|
395
|
+
"failed": len(failed_results),
|
396
|
+
"retry_info": getattr(self, '_retry_stats', {}) # Include retry statistics if available
|
386
397
|
}
|
387
398
|
}
|
388
399
|
|
@@ -545,10 +556,7 @@ class SpiderForce4AI:
|
|
545
556
|
else:
|
546
557
|
results = initial_results
|
547
558
|
|
548
|
-
#
|
549
|
-
await self._save_report(config)
|
550
|
-
|
551
|
-
# Calculate final statistics
|
559
|
+
# Calculate final statistics before saving report
|
552
560
|
final_successful = len([r for r in results if r.status == "success"])
|
553
561
|
final_failed = len([r for r in results if r.status == "failed"])
|
554
562
|
|
@@ -564,7 +572,15 @@ class SpiderForce4AI:
|
|
564
572
|
retry_successful = initial_failed - final_failed
|
565
573
|
console.print(f"Retry success rate: {retry_successful}/{initial_failed} ({(retry_successful/initial_failed)*100:.1f}%)")
|
566
574
|
|
567
|
-
|
575
|
+
# Save final report after all retries are complete
|
576
|
+
if config.save_reports:
|
577
|
+
self._retry_stats = {
|
578
|
+
"initial_failures": initial_failed,
|
579
|
+
"failure_ratio": failure_ratio,
|
580
|
+
"retry_successful": retry_successful if initial_failed > 0 else 0,
|
581
|
+
"retry_failed": final_failed
|
582
|
+
}
|
583
|
+
await self._save_report(config)
|
568
584
|
console.print(f"📊 Report saved to: {config.report_file}")
|
569
585
|
|
570
586
|
return results
|
@@ -645,11 +661,6 @@ class SpiderForce4AI:
|
|
645
661
|
status = "✓" if result.status == "success" else "✗"
|
646
662
|
progress.description = f"Last: {status} {result.url}"
|
647
663
|
|
648
|
-
# Save final report
|
649
|
-
if config.report_file:
|
650
|
-
self._save_report_sync(results, config)
|
651
|
-
print(f"\nReport saved to: {config.report_file}")
|
652
|
-
|
653
664
|
# Calculate initial failure statistics
|
654
665
|
failed_results = [r for r in results if r.status == "failed"]
|
655
666
|
initial_failed = len(failed_results)
|
@@ -704,6 +715,17 @@ class SpiderForce4AI:
|
|
704
715
|
retry_successful = initial_failed - final_failed
|
705
716
|
console.print(f"Retry success rate: {retry_successful}/{initial_failed} ({(retry_successful/initial_failed)*100:.1f}%)")
|
706
717
|
|
718
|
+
# Save final report after all retries are complete
|
719
|
+
if config.save_reports:
|
720
|
+
self._retry_stats = {
|
721
|
+
"initial_failures": initial_failed,
|
722
|
+
"failure_ratio": failure_ratio,
|
723
|
+
"retry_successful": retry_successful if initial_failed > 0 else 0,
|
724
|
+
"retry_failed": final_failed
|
725
|
+
}
|
726
|
+
self._save_report_sync(results, config)
|
727
|
+
console.print(f"📊 Report saved to: {config.report_file}")
|
728
|
+
|
707
729
|
return results
|
708
730
|
|
709
731
|
async def __aenter__(self):
|
@@ -0,0 +1,5 @@
|
|
1
|
+
spiderforce4ai/__init__.py,sha256=7EMEEfFY3WLq58-vnK1Yhcb1trF2ZXU-Ny3licz45Yk,30585
|
2
|
+
spiderforce4ai-1.4.dist-info/METADATA,sha256=7GRBz_bTtXOQ2N-gHRPJFEWW8mmOB_1gwrJCf-el8LM,7183
|
3
|
+
spiderforce4ai-1.4.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
4
|
+
spiderforce4ai-1.4.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
5
|
+
spiderforce4ai-1.4.dist-info/RECORD,,
|
@@ -1,5 +0,0 @@
|
|
1
|
-
spiderforce4ai/__init__.py,sha256=BHsdGGxEyS4RHbHyTnYRBE4oRy2i1pGSrEt_LT4vKWc,29384
|
2
|
-
spiderforce4ai-1.2.dist-info/METADATA,sha256=I5gmglzuRXSKwRc0lWk2Vslnx_4PIffIwjJ-SOTeYpU,7183
|
3
|
-
spiderforce4ai-1.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
4
|
-
spiderforce4ai-1.2.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
5
|
-
spiderforce4ai-1.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|