spiderforce4ai 2.4.1__py3-none-any.whl → 2.4.2__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- spiderforce4ai/__init__.py +31 -23
- {spiderforce4ai-2.4.1.dist-info → spiderforce4ai-2.4.2.dist-info}/METADATA +1 -1
- spiderforce4ai-2.4.2.dist-info/RECORD +7 -0
- spiderforce4ai-2.4.1.dist-info/RECORD +0 -7
- {spiderforce4ai-2.4.1.dist-info → spiderforce4ai-2.4.2.dist-info}/WHEEL +0 -0
- {spiderforce4ai-2.4.1.dist-info → spiderforce4ai-2.4.2.dist-info}/entry_points.txt +0 -0
- {spiderforce4ai-2.4.1.dist-info → spiderforce4ai-2.4.2.dist-info}/top_level.txt +0 -0
spiderforce4ai/__init__.py
CHANGED
@@ -460,28 +460,6 @@ class SpiderForce4AI:
|
|
460
460
|
if config.output_dir:
|
461
461
|
await _save_markdown_async(url, markdown, config)
|
462
462
|
|
463
|
-
# Handle post-extraction if configured
|
464
|
-
if config.post_extraction_agent and result.status == "success":
|
465
|
-
try:
|
466
|
-
post_config = PostExtractionConfig(
|
467
|
-
model=config.post_extraction_agent["model"],
|
468
|
-
messages=config.post_extraction_agent["messages"],
|
469
|
-
api_key=config.post_extraction_agent["api_key"],
|
470
|
-
max_tokens=config.post_extraction_agent.get("max_tokens", 1000),
|
471
|
-
temperature=config.post_extraction_agent.get("temperature", 0.7),
|
472
|
-
base_url=config.post_extraction_agent.get("base_url"),
|
473
|
-
combine_output=bool(config.post_extraction_agent_save_to_file),
|
474
|
-
output_file=config.post_extraction_agent_save_to_file,
|
475
|
-
custom_transform_function=config.post_agent_transformer_function
|
476
|
-
)
|
477
|
-
|
478
|
-
agent = PostExtractionAgent(post_config)
|
479
|
-
extraction_result = await agent.process_content(url, markdown)
|
480
|
-
if extraction_result:
|
481
|
-
result.extraction_result = extraction_result
|
482
|
-
except Exception as e:
|
483
|
-
console.print(f"[red]Error in post-extraction processing for {url}: {str(e)}[/red]")
|
484
|
-
|
485
463
|
await _send_webhook_async(result, config)
|
486
464
|
|
487
465
|
self.crawl_results.append(result)
|
@@ -635,10 +613,40 @@ class SpiderForce4AI:
|
|
635
613
|
except Exception as e:
|
636
614
|
console.print(f"[red]Error in post-extraction processing for {result.url}: {str(e)}[/red]")
|
637
615
|
|
616
|
+
# Process LLM requests sequentially after all crawling is complete
|
617
|
+
llm_successful = 0
|
618
|
+
if config.post_extraction_agent:
|
619
|
+
console.print("\n[cyan]Starting post-extraction processing...[/cyan]")
|
620
|
+
successful_results = [r for r in results if r.status == "success"]
|
621
|
+
llm_task = progress.add_task("[cyan]Post-extraction processing...", total=len(successful_results))
|
622
|
+
|
623
|
+
post_config = PostExtractionConfig(
|
624
|
+
model=config.post_extraction_agent["model"],
|
625
|
+
messages=config.post_extraction_agent["messages"],
|
626
|
+
api_key=config.post_extraction_agent["api_key"],
|
627
|
+
max_tokens=config.post_extraction_agent.get("max_tokens", 1000),
|
628
|
+
temperature=config.post_extraction_agent.get("temperature", 0.7),
|
629
|
+
base_url=config.post_extraction_agent.get("base_url"),
|
630
|
+
combine_output=bool(config.post_extraction_agent_save_to_file),
|
631
|
+
output_file=config.post_extraction_agent_save_to_file,
|
632
|
+
custom_transform_function=config.post_agent_transformer_function
|
633
|
+
)
|
634
|
+
agent = PostExtractionAgent(post_config)
|
635
|
+
|
636
|
+
for result in successful_results:
|
637
|
+
try:
|
638
|
+
result.extraction_result = await agent.process_content(result.url, result.markdown)
|
639
|
+
if result.extraction_result:
|
640
|
+
llm_successful += 1
|
641
|
+
progress.update(llm_task, advance=1)
|
642
|
+
except Exception as e:
|
643
|
+
console.print(f"[red]Error in post-extraction processing for {result.url}: {str(e)}[/red]")
|
644
|
+
await asyncio.sleep(1) # Add delay after error
|
645
|
+
await asyncio.sleep(0.5) # Rate limiting between requests
|
646
|
+
|
638
647
|
# Calculate final statistics
|
639
648
|
final_successful = len([r for r in results if r.status == "success"])
|
640
649
|
final_failed = len([r for r in results if r.status == "failed"])
|
641
|
-
llm_successful = len([r for r in results if r.extraction_result is not None])
|
642
650
|
|
643
651
|
# Update retry stats
|
644
652
|
self._retry_stats = {
|
@@ -0,0 +1,7 @@
|
|
1
|
+
spiderforce4ai/__init__.py,sha256=6hqYztIqL_jRuKmQOGnap2-hP8Lq1YXarUQXTFwIVxY,40841
|
2
|
+
spiderforce4ai/post_extraction_agent.py,sha256=m00-y0SCoutUnxsMwHxPaW-qRm4o5alQWjggDStUSrg,11249
|
3
|
+
spiderforce4ai-2.4.2.dist-info/METADATA,sha256=hyIp437hoWVVkbN88P6yNcKwvkvf2NpP6fyOsWxhM_I,9012
|
4
|
+
spiderforce4ai-2.4.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
5
|
+
spiderforce4ai-2.4.2.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
|
6
|
+
spiderforce4ai-2.4.2.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
7
|
+
spiderforce4ai-2.4.2.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
spiderforce4ai/__init__.py,sha256=IjoJSE-7PX8zxBF0Pl1ELQUraLU3agAtY_J6NvQSPf4,40533
|
2
|
-
spiderforce4ai/post_extraction_agent.py,sha256=m00-y0SCoutUnxsMwHxPaW-qRm4o5alQWjggDStUSrg,11249
|
3
|
-
spiderforce4ai-2.4.1.dist-info/METADATA,sha256=xVm-JdLz6Kx73Bi0DA1QG6D9Ya_OLqWd_80PNWHXLsA,9012
|
4
|
-
spiderforce4ai-2.4.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
5
|
-
spiderforce4ai-2.4.1.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
|
6
|
-
spiderforce4ai-2.4.1.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
7
|
-
spiderforce4ai-2.4.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|