spiderforce4ai 2.4.1__py3-none-any.whl → 2.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spiderforce4ai/__init__.py +31 -23
- {spiderforce4ai-2.4.1.dist-info → spiderforce4ai-2.4.2.dist-info}/METADATA +1 -1
- spiderforce4ai-2.4.2.dist-info/RECORD +7 -0
- spiderforce4ai-2.4.1.dist-info/RECORD +0 -7
- {spiderforce4ai-2.4.1.dist-info → spiderforce4ai-2.4.2.dist-info}/WHEEL +0 -0
- {spiderforce4ai-2.4.1.dist-info → spiderforce4ai-2.4.2.dist-info}/entry_points.txt +0 -0
- {spiderforce4ai-2.4.1.dist-info → spiderforce4ai-2.4.2.dist-info}/top_level.txt +0 -0
spiderforce4ai/__init__.py
CHANGED
@@ -460,28 +460,6 @@ class SpiderForce4AI:
|
|
460
460
|
if config.output_dir:
|
461
461
|
await _save_markdown_async(url, markdown, config)
|
462
462
|
|
463
|
-
# Handle post-extraction if configured
|
464
|
-
if config.post_extraction_agent and result.status == "success":
|
465
|
-
try:
|
466
|
-
post_config = PostExtractionConfig(
|
467
|
-
model=config.post_extraction_agent["model"],
|
468
|
-
messages=config.post_extraction_agent["messages"],
|
469
|
-
api_key=config.post_extraction_agent["api_key"],
|
470
|
-
max_tokens=config.post_extraction_agent.get("max_tokens", 1000),
|
471
|
-
temperature=config.post_extraction_agent.get("temperature", 0.7),
|
472
|
-
base_url=config.post_extraction_agent.get("base_url"),
|
473
|
-
combine_output=bool(config.post_extraction_agent_save_to_file),
|
474
|
-
output_file=config.post_extraction_agent_save_to_file,
|
475
|
-
custom_transform_function=config.post_agent_transformer_function
|
476
|
-
)
|
477
|
-
|
478
|
-
agent = PostExtractionAgent(post_config)
|
479
|
-
extraction_result = await agent.process_content(url, markdown)
|
480
|
-
if extraction_result:
|
481
|
-
result.extraction_result = extraction_result
|
482
|
-
except Exception as e:
|
483
|
-
console.print(f"[red]Error in post-extraction processing for {url}: {str(e)}[/red]")
|
484
|
-
|
485
463
|
await _send_webhook_async(result, config)
|
486
464
|
|
487
465
|
self.crawl_results.append(result)
|
@@ -635,10 +613,40 @@ class SpiderForce4AI:
|
|
635
613
|
except Exception as e:
|
636
614
|
console.print(f"[red]Error in post-extraction processing for {result.url}: {str(e)}[/red]")
|
637
615
|
|
616
|
+
# Process LLM requests sequentially after all crawling is complete
|
617
|
+
llm_successful = 0
|
618
|
+
if config.post_extraction_agent:
|
619
|
+
console.print("\n[cyan]Starting post-extraction processing...[/cyan]")
|
620
|
+
successful_results = [r for r in results if r.status == "success"]
|
621
|
+
llm_task = progress.add_task("[cyan]Post-extraction processing...", total=len(successful_results))
|
622
|
+
|
623
|
+
post_config = PostExtractionConfig(
|
624
|
+
model=config.post_extraction_agent["model"],
|
625
|
+
messages=config.post_extraction_agent["messages"],
|
626
|
+
api_key=config.post_extraction_agent["api_key"],
|
627
|
+
max_tokens=config.post_extraction_agent.get("max_tokens", 1000),
|
628
|
+
temperature=config.post_extraction_agent.get("temperature", 0.7),
|
629
|
+
base_url=config.post_extraction_agent.get("base_url"),
|
630
|
+
combine_output=bool(config.post_extraction_agent_save_to_file),
|
631
|
+
output_file=config.post_extraction_agent_save_to_file,
|
632
|
+
custom_transform_function=config.post_agent_transformer_function
|
633
|
+
)
|
634
|
+
agent = PostExtractionAgent(post_config)
|
635
|
+
|
636
|
+
for result in successful_results:
|
637
|
+
try:
|
638
|
+
result.extraction_result = await agent.process_content(result.url, result.markdown)
|
639
|
+
if result.extraction_result:
|
640
|
+
llm_successful += 1
|
641
|
+
progress.update(llm_task, advance=1)
|
642
|
+
except Exception as e:
|
643
|
+
console.print(f"[red]Error in post-extraction processing for {result.url}: {str(e)}[/red]")
|
644
|
+
await asyncio.sleep(1) # Add delay after error
|
645
|
+
await asyncio.sleep(0.5) # Rate limiting between requests
|
646
|
+
|
638
647
|
# Calculate final statistics
|
639
648
|
final_successful = len([r for r in results if r.status == "success"])
|
640
649
|
final_failed = len([r for r in results if r.status == "failed"])
|
641
|
-
llm_successful = len([r for r in results if r.extraction_result is not None])
|
642
650
|
|
643
651
|
# Update retry stats
|
644
652
|
self._retry_stats = {
|
@@ -0,0 +1,7 @@
|
|
1
|
+
spiderforce4ai/__init__.py,sha256=6hqYztIqL_jRuKmQOGnap2-hP8Lq1YXarUQXTFwIVxY,40841
|
2
|
+
spiderforce4ai/post_extraction_agent.py,sha256=m00-y0SCoutUnxsMwHxPaW-qRm4o5alQWjggDStUSrg,11249
|
3
|
+
spiderforce4ai-2.4.2.dist-info/METADATA,sha256=hyIp437hoWVVkbN88P6yNcKwvkvf2NpP6fyOsWxhM_I,9012
|
4
|
+
spiderforce4ai-2.4.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
5
|
+
spiderforce4ai-2.4.2.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
|
6
|
+
spiderforce4ai-2.4.2.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
7
|
+
spiderforce4ai-2.4.2.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
spiderforce4ai/__init__.py,sha256=IjoJSE-7PX8zxBF0Pl1ELQUraLU3agAtY_J6NvQSPf4,40533
|
2
|
-
spiderforce4ai/post_extraction_agent.py,sha256=m00-y0SCoutUnxsMwHxPaW-qRm4o5alQWjggDStUSrg,11249
|
3
|
-
spiderforce4ai-2.4.1.dist-info/METADATA,sha256=xVm-JdLz6Kx73Bi0DA1QG6D9Ya_OLqWd_80PNWHXLsA,9012
|
4
|
-
spiderforce4ai-2.4.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
5
|
-
spiderforce4ai-2.4.1.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
|
6
|
-
spiderforce4ai-2.4.1.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
7
|
-
spiderforce4ai-2.4.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|