spiderforce4ai 2.4.1__py3-none-any.whl → 2.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -460,28 +460,6 @@ class SpiderForce4AI:
460
460
  if config.output_dir:
461
461
  await _save_markdown_async(url, markdown, config)
462
462
 
463
- # Handle post-extraction if configured
464
- if config.post_extraction_agent and result.status == "success":
465
- try:
466
- post_config = PostExtractionConfig(
467
- model=config.post_extraction_agent["model"],
468
- messages=config.post_extraction_agent["messages"],
469
- api_key=config.post_extraction_agent["api_key"],
470
- max_tokens=config.post_extraction_agent.get("max_tokens", 1000),
471
- temperature=config.post_extraction_agent.get("temperature", 0.7),
472
- base_url=config.post_extraction_agent.get("base_url"),
473
- combine_output=bool(config.post_extraction_agent_save_to_file),
474
- output_file=config.post_extraction_agent_save_to_file,
475
- custom_transform_function=config.post_agent_transformer_function
476
- )
477
-
478
- agent = PostExtractionAgent(post_config)
479
- extraction_result = await agent.process_content(url, markdown)
480
- if extraction_result:
481
- result.extraction_result = extraction_result
482
- except Exception as e:
483
- console.print(f"[red]Error in post-extraction processing for {url}: {str(e)}[/red]")
484
-
485
463
  await _send_webhook_async(result, config)
486
464
 
487
465
  self.crawl_results.append(result)
@@ -635,10 +613,40 @@ class SpiderForce4AI:
635
613
  except Exception as e:
636
614
  console.print(f"[red]Error in post-extraction processing for {result.url}: {str(e)}[/red]")
637
615
 
616
+ # Process LLM requests sequentially after all crawling is complete
617
+ llm_successful = 0
618
+ if config.post_extraction_agent:
619
+ console.print("\n[cyan]Starting post-extraction processing...[/cyan]")
620
+ successful_results = [r for r in results if r.status == "success"]
621
+ llm_task = progress.add_task("[cyan]Post-extraction processing...", total=len(successful_results))
622
+
623
+ post_config = PostExtractionConfig(
624
+ model=config.post_extraction_agent["model"],
625
+ messages=config.post_extraction_agent["messages"],
626
+ api_key=config.post_extraction_agent["api_key"],
627
+ max_tokens=config.post_extraction_agent.get("max_tokens", 1000),
628
+ temperature=config.post_extraction_agent.get("temperature", 0.7),
629
+ base_url=config.post_extraction_agent.get("base_url"),
630
+ combine_output=bool(config.post_extraction_agent_save_to_file),
631
+ output_file=config.post_extraction_agent_save_to_file,
632
+ custom_transform_function=config.post_agent_transformer_function
633
+ )
634
+ agent = PostExtractionAgent(post_config)
635
+
636
+ for result in successful_results:
637
+ try:
638
+ result.extraction_result = await agent.process_content(result.url, result.markdown)
639
+ if result.extraction_result:
640
+ llm_successful += 1
641
+ progress.update(llm_task, advance=1)
642
+ except Exception as e:
643
+ console.print(f"[red]Error in post-extraction processing for {result.url}: {str(e)}[/red]")
644
+ await asyncio.sleep(1) # Add delay after error
645
+ await asyncio.sleep(0.5) # Rate limiting between requests
646
+
638
647
  # Calculate final statistics
639
648
  final_successful = len([r for r in results if r.status == "success"])
640
649
  final_failed = len([r for r in results if r.status == "failed"])
641
- llm_successful = len([r for r in results if r.extraction_result is not None])
642
650
 
643
651
  # Update retry stats
644
652
  self._retry_stats = {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 2.4.1
3
+ Version: 2.4.2
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
@@ -0,0 +1,7 @@
1
+ spiderforce4ai/__init__.py,sha256=6hqYztIqL_jRuKmQOGnap2-hP8Lq1YXarUQXTFwIVxY,40841
2
+ spiderforce4ai/post_extraction_agent.py,sha256=m00-y0SCoutUnxsMwHxPaW-qRm4o5alQWjggDStUSrg,11249
3
+ spiderforce4ai-2.4.2.dist-info/METADATA,sha256=hyIp437hoWVVkbN88P6yNcKwvkvf2NpP6fyOsWxhM_I,9012
4
+ spiderforce4ai-2.4.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
+ spiderforce4ai-2.4.2.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
+ spiderforce4ai-2.4.2.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
+ spiderforce4ai-2.4.2.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- spiderforce4ai/__init__.py,sha256=IjoJSE-7PX8zxBF0Pl1ELQUraLU3agAtY_J6NvQSPf4,40533
2
- spiderforce4ai/post_extraction_agent.py,sha256=m00-y0SCoutUnxsMwHxPaW-qRm4o5alQWjggDStUSrg,11249
3
- spiderforce4ai-2.4.1.dist-info/METADATA,sha256=xVm-JdLz6Kx73Bi0DA1QG6D9Ya_OLqWd_80PNWHXLsA,9012
4
- spiderforce4ai-2.4.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
- spiderforce4ai-2.4.1.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
- spiderforce4ai-2.4.1.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
- spiderforce4ai-2.4.1.dist-info/RECORD,,