spiderforce4ai 1.2__py3-none-any.whl → 1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spiderforce4ai/__init__.py +40 -18
- {spiderforce4ai-1.2.dist-info → spiderforce4ai-1.4.dist-info}/METADATA +1 -1
- spiderforce4ai-1.4.dist-info/RECORD +5 -0
- spiderforce4ai-1.2.dist-info/RECORD +0 -5
- {spiderforce4ai-1.2.dist-info → spiderforce4ai-1.4.dist-info}/WHEEL +0 -0
- {spiderforce4ai-1.2.dist-info → spiderforce4ai-1.4.dist-info}/top_level.txt +0 -0
spiderforce4ai/__init__.py
CHANGED
@@ -350,17 +350,23 @@ class SpiderForce4AI:
|
|
350
350
|
|
351
351
|
def _save_report_sync(self, results: List[CrawlResult], config: CrawlConfig) -> None:
|
352
352
|
"""Save crawl report synchronously."""
|
353
|
+
# Separate successful and failed results
|
354
|
+
successful_results = [r for r in results if r.status == "success"]
|
355
|
+
failed_results = [r for r in results if r.status == "failed"]
|
356
|
+
|
357
|
+
# Create report with only final state
|
353
358
|
report = {
|
354
359
|
"timestamp": datetime.now().isoformat(),
|
355
360
|
"config": config.to_dict(),
|
356
361
|
"results": {
|
357
|
-
"successful": [asdict(r) for r in
|
358
|
-
"failed": [asdict(r) for r in
|
362
|
+
"successful": [asdict(r) for r in successful_results],
|
363
|
+
"failed": [asdict(r) for r in failed_results] # Only truly failed URLs after retries
|
359
364
|
},
|
360
365
|
"summary": {
|
361
366
|
"total": len(results),
|
362
|
-
"successful": len(
|
363
|
-
"failed": len(
|
367
|
+
"successful": len(successful_results),
|
368
|
+
"failed": len(failed_results),
|
369
|
+
"retry_info": getattr(self, '_retry_stats', {}) # Include retry statistics if available
|
364
370
|
}
|
365
371
|
}
|
366
372
|
|
@@ -372,17 +378,22 @@ class SpiderForce4AI:
|
|
372
378
|
if not config.report_file:
|
373
379
|
return
|
374
380
|
|
381
|
+
# Separate successful and failed results
|
382
|
+
successful_results = [r for r in self.crawl_results if r.status == "success"]
|
383
|
+
failed_results = [r for r in self.crawl_results if r.status == "failed"]
|
384
|
+
|
375
385
|
report = {
|
376
386
|
"timestamp": datetime.now().isoformat(),
|
377
387
|
"config": config.to_dict(),
|
378
388
|
"results": {
|
379
|
-
"successful": [asdict(r) for r in
|
380
|
-
"failed": [asdict(r) for r in
|
389
|
+
"successful": [asdict(r) for r in successful_results],
|
390
|
+
"failed": [asdict(r) for r in failed_results] # Only truly failed URLs after retries
|
381
391
|
},
|
382
392
|
"summary": {
|
383
393
|
"total": len(self.crawl_results),
|
384
|
-
"successful": len(
|
385
|
-
"failed": len(
|
394
|
+
"successful": len(successful_results),
|
395
|
+
"failed": len(failed_results),
|
396
|
+
"retry_info": getattr(self, '_retry_stats', {}) # Include retry statistics if available
|
386
397
|
}
|
387
398
|
}
|
388
399
|
|
@@ -545,10 +556,7 @@ class SpiderForce4AI:
|
|
545
556
|
else:
|
546
557
|
results = initial_results
|
547
558
|
|
548
|
-
#
|
549
|
-
await self._save_report(config)
|
550
|
-
|
551
|
-
# Calculate final statistics
|
559
|
+
# Calculate final statistics before saving report
|
552
560
|
final_successful = len([r for r in results if r.status == "success"])
|
553
561
|
final_failed = len([r for r in results if r.status == "failed"])
|
554
562
|
|
@@ -564,7 +572,15 @@ class SpiderForce4AI:
|
|
564
572
|
retry_successful = initial_failed - final_failed
|
565
573
|
console.print(f"Retry success rate: {retry_successful}/{initial_failed} ({(retry_successful/initial_failed)*100:.1f}%)")
|
566
574
|
|
567
|
-
|
575
|
+
# Save final report after all retries are complete
|
576
|
+
if config.save_reports:
|
577
|
+
self._retry_stats = {
|
578
|
+
"initial_failures": initial_failed,
|
579
|
+
"failure_ratio": failure_ratio,
|
580
|
+
"retry_successful": retry_successful if initial_failed > 0 else 0,
|
581
|
+
"retry_failed": final_failed
|
582
|
+
}
|
583
|
+
await self._save_report(config)
|
568
584
|
console.print(f"📊 Report saved to: {config.report_file}")
|
569
585
|
|
570
586
|
return results
|
@@ -645,11 +661,6 @@ class SpiderForce4AI:
|
|
645
661
|
status = "✓" if result.status == "success" else "✗"
|
646
662
|
progress.description = f"Last: {status} {result.url}"
|
647
663
|
|
648
|
-
# Save final report
|
649
|
-
if config.report_file:
|
650
|
-
self._save_report_sync(results, config)
|
651
|
-
print(f"\nReport saved to: {config.report_file}")
|
652
|
-
|
653
664
|
# Calculate initial failure statistics
|
654
665
|
failed_results = [r for r in results if r.status == "failed"]
|
655
666
|
initial_failed = len(failed_results)
|
@@ -704,6 +715,17 @@ class SpiderForce4AI:
|
|
704
715
|
retry_successful = initial_failed - final_failed
|
705
716
|
console.print(f"Retry success rate: {retry_successful}/{initial_failed} ({(retry_successful/initial_failed)*100:.1f}%)")
|
706
717
|
|
718
|
+
# Save final report after all retries are complete
|
719
|
+
if config.save_reports:
|
720
|
+
self._retry_stats = {
|
721
|
+
"initial_failures": initial_failed,
|
722
|
+
"failure_ratio": failure_ratio,
|
723
|
+
"retry_successful": retry_successful if initial_failed > 0 else 0,
|
724
|
+
"retry_failed": final_failed
|
725
|
+
}
|
726
|
+
self._save_report_sync(results, config)
|
727
|
+
console.print(f"📊 Report saved to: {config.report_file}")
|
728
|
+
|
707
729
|
return results
|
708
730
|
|
709
731
|
async def __aenter__(self):
|
@@ -0,0 +1,5 @@
|
|
1
|
+
spiderforce4ai/__init__.py,sha256=7EMEEfFY3WLq58-vnK1Yhcb1trF2ZXU-Ny3licz45Yk,30585
|
2
|
+
spiderforce4ai-1.4.dist-info/METADATA,sha256=7GRBz_bTtXOQ2N-gHRPJFEWW8mmOB_1gwrJCf-el8LM,7183
|
3
|
+
spiderforce4ai-1.4.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
4
|
+
spiderforce4ai-1.4.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
5
|
+
spiderforce4ai-1.4.dist-info/RECORD,,
|
@@ -1,5 +0,0 @@
|
|
1
|
-
spiderforce4ai/__init__.py,sha256=BHsdGGxEyS4RHbHyTnYRBE4oRy2i1pGSrEt_LT4vKWc,29384
|
2
|
-
spiderforce4ai-1.2.dist-info/METADATA,sha256=I5gmglzuRXSKwRc0lWk2Vslnx_4PIffIwjJ-SOTeYpU,7183
|
3
|
-
spiderforce4ai-1.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
4
|
-
spiderforce4ai-1.2.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
5
|
-
spiderforce4ai-1.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|