spiderforce4ai 1.2__tar.gz → 1.4__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 1.2
3
+ Version: 1.4
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "spiderforce4ai"
7
- version = "1.2"
7
+ version = "1.4"
8
8
  description = "Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service"
9
9
  readme = "README.md"
10
10
  authors = [{name = "Piotr Tamulewicz", email = "pt@petertam.pro"}]
@@ -3,7 +3,7 @@ from setuptools import setup, find_packages
3
3
 
4
4
  setup(
5
5
  name="spiderforce4ai",
6
- version="1.2",
6
+ version="1.4",
7
7
  author="Piotr Tamulewicz",
8
8
  author_email="pt@petertam.pro",
9
9
  description="Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service",
@@ -350,17 +350,23 @@ class SpiderForce4AI:
350
350
 
351
351
  def _save_report_sync(self, results: List[CrawlResult], config: CrawlConfig) -> None:
352
352
  """Save crawl report synchronously."""
353
+ # Separate successful and failed results
354
+ successful_results = [r for r in results if r.status == "success"]
355
+ failed_results = [r for r in results if r.status == "failed"]
356
+
357
+ # Create report with only final state
353
358
  report = {
354
359
  "timestamp": datetime.now().isoformat(),
355
360
  "config": config.to_dict(),
356
361
  "results": {
357
- "successful": [asdict(r) for r in results if r.status == "success"],
358
- "failed": [asdict(r) for r in results if r.status == "failed"]
362
+ "successful": [asdict(r) for r in successful_results],
363
+ "failed": [asdict(r) for r in failed_results] # Only truly failed URLs after retries
359
364
  },
360
365
  "summary": {
361
366
  "total": len(results),
362
- "successful": len([r for r in results if r.status == "success"]),
363
- "failed": len([r for r in results if r.status == "failed"])
367
+ "successful": len(successful_results),
368
+ "failed": len(failed_results),
369
+ "retry_info": getattr(self, '_retry_stats', {}) # Include retry statistics if available
364
370
  }
365
371
  }
366
372
 
@@ -372,17 +378,22 @@ class SpiderForce4AI:
372
378
  if not config.report_file:
373
379
  return
374
380
 
381
+ # Separate successful and failed results
382
+ successful_results = [r for r in self.crawl_results if r.status == "success"]
383
+ failed_results = [r for r in self.crawl_results if r.status == "failed"]
384
+
375
385
  report = {
376
386
  "timestamp": datetime.now().isoformat(),
377
387
  "config": config.to_dict(),
378
388
  "results": {
379
- "successful": [asdict(r) for r in self.crawl_results if r.status == "success"],
380
- "failed": [asdict(r) for r in self.crawl_results if r.status == "failed"]
389
+ "successful": [asdict(r) for r in successful_results],
390
+ "failed": [asdict(r) for r in failed_results] # Only truly failed URLs after retries
381
391
  },
382
392
  "summary": {
383
393
  "total": len(self.crawl_results),
384
- "successful": len([r for r in self.crawl_results if r.status == "success"]),
385
- "failed": len([r for r in self.crawl_results if r.status == "failed"])
394
+ "successful": len(successful_results),
395
+ "failed": len(failed_results),
396
+ "retry_info": getattr(self, '_retry_stats', {}) # Include retry statistics if available
386
397
  }
387
398
  }
388
399
 
@@ -545,10 +556,7 @@ class SpiderForce4AI:
545
556
  else:
546
557
  results = initial_results
547
558
 
548
- # Save final report
549
- await self._save_report(config)
550
-
551
- # Calculate final statistics
559
+ # Calculate final statistics before saving report
552
560
  final_successful = len([r for r in results if r.status == "success"])
553
561
  final_failed = len([r for r in results if r.status == "failed"])
554
562
 
@@ -564,7 +572,15 @@ class SpiderForce4AI:
564
572
  retry_successful = initial_failed - final_failed
565
573
  console.print(f"Retry success rate: {retry_successful}/{initial_failed} ({(retry_successful/initial_failed)*100:.1f}%)")
566
574
 
567
- if config.report_file:
575
+ # Save final report after all retries are complete
576
+ if config.save_reports:
577
+ self._retry_stats = {
578
+ "initial_failures": initial_failed,
579
+ "failure_ratio": failure_ratio,
580
+ "retry_successful": retry_successful if initial_failed > 0 else 0,
581
+ "retry_failed": final_failed
582
+ }
583
+ await self._save_report(config)
568
584
  console.print(f"📊 Report saved to: {config.report_file}")
569
585
 
570
586
  return results
@@ -645,11 +661,6 @@ class SpiderForce4AI:
645
661
  status = "✓" if result.status == "success" else "✗"
646
662
  progress.description = f"Last: {status} {result.url}"
647
663
 
648
- # Save final report
649
- if config.report_file:
650
- self._save_report_sync(results, config)
651
- print(f"\nReport saved to: {config.report_file}")
652
-
653
664
  # Calculate initial failure statistics
654
665
  failed_results = [r for r in results if r.status == "failed"]
655
666
  initial_failed = len(failed_results)
@@ -704,6 +715,17 @@ class SpiderForce4AI:
704
715
  retry_successful = initial_failed - final_failed
705
716
  console.print(f"Retry success rate: {retry_successful}/{initial_failed} ({(retry_successful/initial_failed)*100:.1f}%)")
706
717
 
718
+ # Save final report after all retries are complete
719
+ if config.save_reports:
720
+ self._retry_stats = {
721
+ "initial_failures": initial_failed,
722
+ "failure_ratio": failure_ratio,
723
+ "retry_successful": retry_successful if initial_failed > 0 else 0,
724
+ "retry_failed": final_failed
725
+ }
726
+ self._save_report_sync(results, config)
727
+ console.print(f"📊 Report saved to: {config.report_file}")
728
+
707
729
  return results
708
730
 
709
731
  async def __aenter__(self):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 1.2
3
+ Version: 1.4
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
File without changes
File without changes