spaik-sdk 0.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spaik_sdk/__init__.py +21 -0
- spaik_sdk/agent/__init__.py +0 -0
- spaik_sdk/agent/base_agent.py +249 -0
- spaik_sdk/attachments/__init__.py +22 -0
- spaik_sdk/attachments/builder.py +61 -0
- spaik_sdk/attachments/file_storage_provider.py +27 -0
- spaik_sdk/attachments/mime_types.py +118 -0
- spaik_sdk/attachments/models.py +63 -0
- spaik_sdk/attachments/provider_support.py +53 -0
- spaik_sdk/attachments/storage/__init__.py +0 -0
- spaik_sdk/attachments/storage/base_file_storage.py +32 -0
- spaik_sdk/attachments/storage/impl/__init__.py +0 -0
- spaik_sdk/attachments/storage/impl/local_file_storage.py +101 -0
- spaik_sdk/audio/__init__.py +12 -0
- spaik_sdk/audio/options.py +53 -0
- spaik_sdk/audio/providers/__init__.py +1 -0
- spaik_sdk/audio/providers/google_tts.py +77 -0
- spaik_sdk/audio/providers/openai_stt.py +71 -0
- spaik_sdk/audio/providers/openai_tts.py +111 -0
- spaik_sdk/audio/stt.py +61 -0
- spaik_sdk/audio/tts.py +124 -0
- spaik_sdk/config/credentials_provider.py +10 -0
- spaik_sdk/config/env.py +59 -0
- spaik_sdk/config/env_credentials_provider.py +7 -0
- spaik_sdk/config/get_credentials_provider.py +14 -0
- spaik_sdk/image_gen/__init__.py +9 -0
- spaik_sdk/image_gen/image_generator.py +83 -0
- spaik_sdk/image_gen/options.py +24 -0
- spaik_sdk/image_gen/providers/__init__.py +0 -0
- spaik_sdk/image_gen/providers/google.py +75 -0
- spaik_sdk/image_gen/providers/openai.py +60 -0
- spaik_sdk/llm/__init__.py +0 -0
- spaik_sdk/llm/cancellation_handle.py +10 -0
- spaik_sdk/llm/consumption/__init__.py +0 -0
- spaik_sdk/llm/consumption/consumption_estimate.py +26 -0
- spaik_sdk/llm/consumption/consumption_estimate_builder.py +113 -0
- spaik_sdk/llm/consumption/consumption_extractor.py +59 -0
- spaik_sdk/llm/consumption/token_usage.py +31 -0
- spaik_sdk/llm/converters.py +146 -0
- spaik_sdk/llm/cost/__init__.py +1 -0
- spaik_sdk/llm/cost/builtin_cost_provider.py +83 -0
- spaik_sdk/llm/cost/cost_estimate.py +8 -0
- spaik_sdk/llm/cost/cost_provider.py +28 -0
- spaik_sdk/llm/extract_error_message.py +37 -0
- spaik_sdk/llm/langchain_loop_manager.py +270 -0
- spaik_sdk/llm/langchain_service.py +196 -0
- spaik_sdk/llm/message_handler.py +188 -0
- spaik_sdk/llm/streaming/__init__.py +1 -0
- spaik_sdk/llm/streaming/block_manager.py +152 -0
- spaik_sdk/llm/streaming/models.py +42 -0
- spaik_sdk/llm/streaming/streaming_content_handler.py +157 -0
- spaik_sdk/llm/streaming/streaming_event_handler.py +215 -0
- spaik_sdk/llm/streaming/streaming_state_manager.py +58 -0
- spaik_sdk/models/__init__.py +0 -0
- spaik_sdk/models/factories/__init__.py +0 -0
- spaik_sdk/models/factories/anthropic_factory.py +33 -0
- spaik_sdk/models/factories/base_model_factory.py +71 -0
- spaik_sdk/models/factories/google_factory.py +30 -0
- spaik_sdk/models/factories/ollama_factory.py +41 -0
- spaik_sdk/models/factories/openai_factory.py +50 -0
- spaik_sdk/models/llm_config.py +46 -0
- spaik_sdk/models/llm_families.py +7 -0
- spaik_sdk/models/llm_model.py +17 -0
- spaik_sdk/models/llm_wrapper.py +25 -0
- spaik_sdk/models/model_registry.py +156 -0
- spaik_sdk/models/providers/__init__.py +0 -0
- spaik_sdk/models/providers/anthropic_provider.py +29 -0
- spaik_sdk/models/providers/azure_provider.py +31 -0
- spaik_sdk/models/providers/base_provider.py +62 -0
- spaik_sdk/models/providers/google_provider.py +26 -0
- spaik_sdk/models/providers/ollama_provider.py +26 -0
- spaik_sdk/models/providers/openai_provider.py +26 -0
- spaik_sdk/models/providers/provider_type.py +90 -0
- spaik_sdk/orchestration/__init__.py +24 -0
- spaik_sdk/orchestration/base_orchestrator.py +238 -0
- spaik_sdk/orchestration/checkpoint.py +80 -0
- spaik_sdk/orchestration/models.py +103 -0
- spaik_sdk/prompt/__init__.py +0 -0
- spaik_sdk/prompt/get_prompt_loader.py +13 -0
- spaik_sdk/prompt/local_prompt_loader.py +21 -0
- spaik_sdk/prompt/prompt_loader.py +48 -0
- spaik_sdk/prompt/prompt_loader_mode.py +14 -0
- spaik_sdk/py.typed +1 -0
- spaik_sdk/recording/__init__.py +1 -0
- spaik_sdk/recording/base_playback.py +90 -0
- spaik_sdk/recording/base_recorder.py +50 -0
- spaik_sdk/recording/conditional_recorder.py +38 -0
- spaik_sdk/recording/impl/__init__.py +1 -0
- spaik_sdk/recording/impl/local_playback.py +76 -0
- spaik_sdk/recording/impl/local_recorder.py +85 -0
- spaik_sdk/recording/langchain_serializer.py +88 -0
- spaik_sdk/server/__init__.py +1 -0
- spaik_sdk/server/api/routers/__init__.py +0 -0
- spaik_sdk/server/api/routers/api_builder.py +149 -0
- spaik_sdk/server/api/routers/audio_router_factory.py +201 -0
- spaik_sdk/server/api/routers/file_router_factory.py +111 -0
- spaik_sdk/server/api/routers/thread_router_factory.py +284 -0
- spaik_sdk/server/api/streaming/__init__.py +0 -0
- spaik_sdk/server/api/streaming/format_sse_event.py +41 -0
- spaik_sdk/server/api/streaming/negotiate_streaming_response.py +8 -0
- spaik_sdk/server/api/streaming/streaming_negotiator.py +10 -0
- spaik_sdk/server/authorization/__init__.py +0 -0
- spaik_sdk/server/authorization/base_authorizer.py +64 -0
- spaik_sdk/server/authorization/base_user.py +13 -0
- spaik_sdk/server/authorization/dummy_authorizer.py +17 -0
- spaik_sdk/server/job_processor/__init__.py +0 -0
- spaik_sdk/server/job_processor/base_job_processor.py +8 -0
- spaik_sdk/server/job_processor/thread_job_processor.py +32 -0
- spaik_sdk/server/pubsub/__init__.py +1 -0
- spaik_sdk/server/pubsub/cancellation_publisher.py +7 -0
- spaik_sdk/server/pubsub/cancellation_subscriber.py +38 -0
- spaik_sdk/server/pubsub/event_publisher.py +13 -0
- spaik_sdk/server/pubsub/impl/__init__.py +1 -0
- spaik_sdk/server/pubsub/impl/local_cancellation_pubsub.py +48 -0
- spaik_sdk/server/pubsub/impl/signalr_publisher.py +36 -0
- spaik_sdk/server/queue/__init__.py +1 -0
- spaik_sdk/server/queue/agent_job_queue.py +27 -0
- spaik_sdk/server/queue/impl/__init__.py +1 -0
- spaik_sdk/server/queue/impl/azure_queue.py +24 -0
- spaik_sdk/server/response/__init__.py +0 -0
- spaik_sdk/server/response/agent_response_generator.py +39 -0
- spaik_sdk/server/response/response_generator.py +13 -0
- spaik_sdk/server/response/simple_agent_response_generator.py +14 -0
- spaik_sdk/server/services/__init__.py +0 -0
- spaik_sdk/server/services/thread_converters.py +113 -0
- spaik_sdk/server/services/thread_models.py +90 -0
- spaik_sdk/server/services/thread_service.py +91 -0
- spaik_sdk/server/storage/__init__.py +1 -0
- spaik_sdk/server/storage/base_thread_repository.py +51 -0
- spaik_sdk/server/storage/impl/__init__.py +0 -0
- spaik_sdk/server/storage/impl/in_memory_thread_repository.py +100 -0
- spaik_sdk/server/storage/impl/local_file_thread_repository.py +217 -0
- spaik_sdk/server/storage/thread_filter.py +166 -0
- spaik_sdk/server/storage/thread_metadata.py +53 -0
- spaik_sdk/thread/__init__.py +0 -0
- spaik_sdk/thread/adapters/__init__.py +0 -0
- spaik_sdk/thread/adapters/cli/__init__.py +0 -0
- spaik_sdk/thread/adapters/cli/block_display.py +92 -0
- spaik_sdk/thread/adapters/cli/display_manager.py +84 -0
- spaik_sdk/thread/adapters/cli/live_cli.py +235 -0
- spaik_sdk/thread/adapters/event_adapter.py +28 -0
- spaik_sdk/thread/adapters/streaming_block_adapter.py +57 -0
- spaik_sdk/thread/adapters/sync_adapter.py +76 -0
- spaik_sdk/thread/models.py +224 -0
- spaik_sdk/thread/thread_container.py +468 -0
- spaik_sdk/tools/__init__.py +0 -0
- spaik_sdk/tools/impl/__init__.py +0 -0
- spaik_sdk/tools/impl/mcp_tool_provider.py +93 -0
- spaik_sdk/tools/impl/search_tool_provider.py +18 -0
- spaik_sdk/tools/tool_provider.py +131 -0
- spaik_sdk/tracing/__init__.py +13 -0
- spaik_sdk/tracing/agent_trace.py +72 -0
- spaik_sdk/tracing/get_trace_sink.py +15 -0
- spaik_sdk/tracing/local_trace_sink.py +23 -0
- spaik_sdk/tracing/trace_sink.py +19 -0
- spaik_sdk/tracing/trace_sink_mode.py +14 -0
- spaik_sdk/utils/__init__.py +0 -0
- spaik_sdk/utils/init_logger.py +24 -0
- spaik_sdk-0.6.2.dist-info/METADATA +379 -0
- spaik_sdk-0.6.2.dist-info/RECORD +161 -0
- spaik_sdk-0.6.2.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
from spaik_sdk.llm.consumption.token_usage import TokenUsage
|
|
2
|
+
from spaik_sdk.llm.cost.cost_provider import CostProvider
|
|
3
|
+
from spaik_sdk.models.llm_model import LLMModel
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class BuiltinCostProvider(CostProvider):
|
|
7
|
+
def get_token_pricing(self, model: LLMModel) -> TokenUsage:
|
|
8
|
+
"""Get token pricing in USD cents per million tokens."""
|
|
9
|
+
name = model.name
|
|
10
|
+
|
|
11
|
+
# Anthropic Claude models
|
|
12
|
+
if name.startswith("claude-3-7-sonnet"):
|
|
13
|
+
# Claude 3.7 Sonnet: $3.00 input, $15.00 output per 1M tokens
|
|
14
|
+
return TokenUsage(
|
|
15
|
+
input_tokens=300, # $3.00 in cents per 1M tokens
|
|
16
|
+
output_tokens=1500, # $15.00 in cents per 1M tokens
|
|
17
|
+
reasoning_tokens=0,
|
|
18
|
+
cache_creation_tokens=375, # 25% markup on input for cache creation
|
|
19
|
+
cache_read_tokens=30, # 10% of input cost for cache reads
|
|
20
|
+
)
|
|
21
|
+
elif name.startswith("claude-sonnet-4") or name.startswith("claude-4-sonnet"):
|
|
22
|
+
# Claude 4 Sonnet: $3.00 input, $15.00 output per 1M tokens
|
|
23
|
+
return TokenUsage(input_tokens=300, output_tokens=1500, reasoning_tokens=0, cache_creation_tokens=375, cache_read_tokens=30)
|
|
24
|
+
elif name.startswith("claude-opus-4") or name.startswith("claude-4-opus"):
|
|
25
|
+
# Claude 4 Opus: $15.00 input, $75.00 output per 1M tokens
|
|
26
|
+
return TokenUsage(input_tokens=1500, output_tokens=7500, reasoning_tokens=0, cache_creation_tokens=1875, cache_read_tokens=150)
|
|
27
|
+
|
|
28
|
+
# OpenAI models
|
|
29
|
+
elif name.startswith("gpt-4.1"):
|
|
30
|
+
# GPT-4.1: $2.00 input, $8.00 output per 1M tokens
|
|
31
|
+
return TokenUsage(input_tokens=200, output_tokens=800, reasoning_tokens=0, cache_creation_tokens=250, cache_read_tokens=20)
|
|
32
|
+
elif name.startswith("gpt-4o"):
|
|
33
|
+
# GPT-4o: $2.50 input, $10.00 output per 1M tokens
|
|
34
|
+
return TokenUsage(input_tokens=250, output_tokens=1000, reasoning_tokens=0, cache_creation_tokens=312, cache_read_tokens=25)
|
|
35
|
+
elif name.startswith("o4-mini"):
|
|
36
|
+
# O4-mini: $0.40 input, $1.60 output per 1M tokens (based on GPT-4.1-mini pricing)
|
|
37
|
+
return TokenUsage(
|
|
38
|
+
input_tokens=40,
|
|
39
|
+
output_tokens=160,
|
|
40
|
+
reasoning_tokens=440, # 110% markup for reasoning tokens
|
|
41
|
+
cache_creation_tokens=50,
|
|
42
|
+
cache_read_tokens=4,
|
|
43
|
+
)
|
|
44
|
+
elif name.startswith("gpt-5"):
|
|
45
|
+
if "nano" in name:
|
|
46
|
+
# GPT-5 Nano: $0.05 input, $0.40 output per 1M tokens
|
|
47
|
+
return TokenUsage(
|
|
48
|
+
input_tokens=5,
|
|
49
|
+
output_tokens=40,
|
|
50
|
+
reasoning_tokens=44, # 10% markup for reasoning
|
|
51
|
+
cache_creation_tokens=6, # 25% markup on input for cache creation
|
|
52
|
+
cache_read_tokens=0, # 90% discount: $0.005 per 1M tokens
|
|
53
|
+
)
|
|
54
|
+
elif "mini" in name:
|
|
55
|
+
# GPT-5 Mini: $0.25 input, $2.00 output per 1M tokens
|
|
56
|
+
return TokenUsage(
|
|
57
|
+
input_tokens=25,
|
|
58
|
+
output_tokens=200,
|
|
59
|
+
reasoning_tokens=220, # 10% markup for reasoning
|
|
60
|
+
cache_creation_tokens=31, # 25% markup on input for cache creation
|
|
61
|
+
cache_read_tokens=2, # 90% discount: $0.025 per 1M tokens
|
|
62
|
+
)
|
|
63
|
+
else:
|
|
64
|
+
# GPT-5: $1.25 input, $10.00 output per 1M tokens
|
|
65
|
+
return TokenUsage(
|
|
66
|
+
input_tokens=125,
|
|
67
|
+
output_tokens=1000,
|
|
68
|
+
reasoning_tokens=1100, # 10% markup for reasoning
|
|
69
|
+
cache_creation_tokens=156, # 25% markup on input for cache creation
|
|
70
|
+
cache_read_tokens=12, # 90% discount: $0.125 per 1M tokens
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# Google Gemini models
|
|
74
|
+
elif name.startswith("gemini-2.5-flash"):
|
|
75
|
+
# Gemini 2.5 Flash: $0.15 input, $0.60 output per 1M tokens
|
|
76
|
+
return TokenUsage(input_tokens=15, output_tokens=60, reasoning_tokens=0, cache_creation_tokens=19, cache_read_tokens=1)
|
|
77
|
+
elif name.startswith("gemini-2.5-pro"):
|
|
78
|
+
# Gemini 2.5 Pro: $1.25 input, $10.00 output per 1M tokens
|
|
79
|
+
return TokenUsage(input_tokens=125, output_tokens=1000, reasoning_tokens=0, cache_creation_tokens=156, cache_read_tokens=12)
|
|
80
|
+
|
|
81
|
+
# Default fallback for unknown models
|
|
82
|
+
else:
|
|
83
|
+
return TokenUsage(input_tokens=0, output_tokens=0, reasoning_tokens=0, cache_creation_tokens=0, cache_read_tokens=0)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
|
|
3
|
+
from spaik_sdk.llm.consumption.token_usage import TokenUsage
|
|
4
|
+
from spaik_sdk.llm.cost.cost_estimate import CostEstimate
|
|
5
|
+
from spaik_sdk.models.llm_model import LLMModel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class CostProvider(ABC):
|
|
9
|
+
def get_cost_estimate(self, model: LLMModel, token_usage: TokenUsage) -> CostEstimate:
|
|
10
|
+
token_pricing: TokenUsage = self.get_token_pricing(model)
|
|
11
|
+
|
|
12
|
+
total = 0
|
|
13
|
+
|
|
14
|
+
total += token_usage.input_tokens * token_pricing.input_tokens
|
|
15
|
+
total += token_usage.output_tokens * token_pricing.output_tokens
|
|
16
|
+
total += token_usage.reasoning_tokens * token_pricing.reasoning_tokens
|
|
17
|
+
total += token_usage.cache_creation_tokens * token_pricing.cache_creation_tokens
|
|
18
|
+
total += token_usage.cache_read_tokens * token_pricing.cache_read_tokens
|
|
19
|
+
|
|
20
|
+
return CostEstimate(
|
|
21
|
+
cost=(total) / 100000000.0,
|
|
22
|
+
currency="USD",
|
|
23
|
+
is_estimate=False,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
@abstractmethod
|
|
27
|
+
def get_token_pricing(self, model: LLMModel) -> TokenUsage:
|
|
28
|
+
pass
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def extract_error_message(exception: Exception) -> str:
|
|
5
|
+
"""Extract a meaningful error message from various exception types."""
|
|
6
|
+
error_str = str(exception)
|
|
7
|
+
|
|
8
|
+
# Try to parse as JSON if it looks like a structured error
|
|
9
|
+
if "Error code:" in error_str and "{" in error_str:
|
|
10
|
+
try:
|
|
11
|
+
# Extract JSON part from the error string
|
|
12
|
+
json_start = error_str.find("{")
|
|
13
|
+
json_part = error_str[json_start:]
|
|
14
|
+
error_data = json.loads(json_part)
|
|
15
|
+
|
|
16
|
+
# Handle Azure OpenAI content filter errors
|
|
17
|
+
if "error" in error_data:
|
|
18
|
+
error_info = error_data["error"]
|
|
19
|
+
if error_info.get("code") == "content_filter":
|
|
20
|
+
return f"Content filtered: {error_info.get('message', 'Content policy violation')}"
|
|
21
|
+
else:
|
|
22
|
+
return error_info.get("message", error_str)
|
|
23
|
+
except (json.JSONDecodeError, KeyError):
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
# Handle other common error patterns
|
|
27
|
+
if "content management policy" in error_str.lower():
|
|
28
|
+
return "Content was filtered due to content management policy"
|
|
29
|
+
elif "rate limit" in error_str.lower():
|
|
30
|
+
return "Rate limit exceeded"
|
|
31
|
+
elif "authentication" in error_str.lower():
|
|
32
|
+
return "Authentication failed"
|
|
33
|
+
elif "quota" in error_str.lower():
|
|
34
|
+
return "Quota exceeded"
|
|
35
|
+
|
|
36
|
+
# Return the original error message if no specific pattern matches
|
|
37
|
+
return error_str
|
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LangChain Loop Manager - Event Loop Isolation for Models with Event Loop Issues
|
|
3
|
+
|
|
4
|
+
This module exists to work around a fundamental incompatibility between certain
|
|
5
|
+
model providers (Google/Gemini, Ollama) and the way asyncio.run() manages event loops.
|
|
6
|
+
|
|
7
|
+
THE PROBLEM:
|
|
8
|
+
============
|
|
9
|
+
Some model providers (Google's gRPC-based clients, Ollama's async HTTP client, etc.)
|
|
10
|
+
create internal connections and async state that get bound to the specific
|
|
11
|
+
event loop instance they're created in. When that event loop closes, these
|
|
12
|
+
internal connections become unusable and raise "Event loop is closed" errors.
|
|
13
|
+
|
|
14
|
+
This manifests in two scenarios:
|
|
15
|
+
|
|
16
|
+
1. STANDALONE SCRIPTS with multiple asyncio.run() calls:
|
|
17
|
+
```python
|
|
18
|
+
asyncio.run(main()) # Creates event loop, Google client binds to it
|
|
19
|
+
# Event loop closes here
|
|
20
|
+
asyncio.run(main()) # Creates NEW event loop, but Google client still references old one
|
|
21
|
+
# → RuntimeError: Event loop is closed
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
2. WEB SERVERS (FastAPI, etc.) with persistent event loops:
|
|
25
|
+
```python
|
|
26
|
+
# Web server starts one event loop and keeps it running
|
|
27
|
+
# All requests use the SAME loop, so Google client works fine
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
THE WORKAROUND:
|
|
31
|
+
===============
|
|
32
|
+
We detect the execution context and apply different strategies:
|
|
33
|
+
|
|
34
|
+
1. **Standalone Context** (detected by stack frame inspection):
|
|
35
|
+
- Use a persistent background event loop in a separate thread
|
|
36
|
+
- All affected model operations run in this persistent loop
|
|
37
|
+
- The background loop never closes, so the clients stay happy
|
|
38
|
+
|
|
39
|
+
2. **Web Server Context** (detected by uvicorn/fastapi in call stack):
|
|
40
|
+
- Use normal execution (no loop manager)
|
|
41
|
+
- The web server's persistent loop handles everything naturally
|
|
42
|
+
|
|
43
|
+
WHY WEB SERVERS CAN'T USE THE LOOP MANAGER:
|
|
44
|
+
===========================================
|
|
45
|
+
Web servers MUST NOT use the external event loop approach because:
|
|
46
|
+
|
|
47
|
+
1. **Streaming breaks**: When operations run in a separate thread's event loop,
|
|
48
|
+
you lose the ability to stream results back to the web server's event loop
|
|
49
|
+
in real-time. The thread boundary kills the streaming semantics.
|
|
50
|
+
|
|
51
|
+
2. **Request context isolation**: Web frameworks expect all operations for a
|
|
52
|
+
request to happen in the same event loop to maintain proper async context,
|
|
53
|
+
request isolation, and cancellation semantics.
|
|
54
|
+
|
|
55
|
+
3. **Performance overhead**: Cross-thread async communication adds significant
|
|
56
|
+
latency and complexity that's unnecessary when the web server already
|
|
57
|
+
provides a persistent event loop.
|
|
58
|
+
|
|
59
|
+
The key insight: Web servers naturally solve these client issues by having
|
|
60
|
+
persistent event loops, so they don't need (and can't use) the workaround.
|
|
61
|
+
|
|
62
|
+
DETECTION STRATEGY:
|
|
63
|
+
===================
|
|
64
|
+
We use multiple heuristics to detect execution context:
|
|
65
|
+
- Thread names (uvicorn, fastapi, etc.)
|
|
66
|
+
- Call stack inspection (looking for web framework files)
|
|
67
|
+
- Event loop state (persistent vs transient)
|
|
68
|
+
|
|
69
|
+
This is admittedly hacky, but it's the only way to transparently handle
|
|
70
|
+
both contexts without requiring users to explicitly configure the behavior.
|
|
71
|
+
|
|
72
|
+
AFFECTED MODELS:
|
|
73
|
+
================
|
|
74
|
+
- All Google/Gemini models (provider_type == ProviderType.GOOGLE)
|
|
75
|
+
- All Ollama models (provider_type == ProviderType.OLLAMA)
|
|
76
|
+
- Other providers (Anthropic, OpenAI) are unaffected
|
|
77
|
+
|
|
78
|
+
EXAMPLES:
|
|
79
|
+
=========
|
|
80
|
+
```python
|
|
81
|
+
# This would fail with Gemini without the loop manager:
|
|
82
|
+
asyncio.run(agent.get_response("hello"))
|
|
83
|
+
asyncio.run(agent.get_response("world")) # ← RuntimeError
|
|
84
|
+
|
|
85
|
+
# This works fine with web servers (no loop manager needed):
|
|
86
|
+
@app.post("/chat")
|
|
87
|
+
async def chat():
|
|
88
|
+
return await agent.get_response("hello") # Same persistent loop
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
ALTERNATIVES CONSIDERED:
|
|
92
|
+
========================
|
|
93
|
+
1. Process isolation - Too heavy, breaks streaming
|
|
94
|
+
2. Raw Google API - Bypasses LangChain ecosystem
|
|
95
|
+
3. Client recreation - Google's state is deeper than we can reach
|
|
96
|
+
4. Thread-per-call - Breaks async/await semantics
|
|
97
|
+
5. User configuration - Poor DX, easy to get wrong
|
|
98
|
+
|
|
99
|
+
WHY THIS IS NECESSARY:
|
|
100
|
+
======================
|
|
101
|
+
Some client designs assume a long-lived event loop (like in web servers).
|
|
102
|
+
The asyncio.run() pattern creates short-lived loops that violate this assumption.
|
|
103
|
+
Other providers (Anthropic, OpenAI) handle this gracefully by recreating connections
|
|
104
|
+
or using stateless clients.
|
|
105
|
+
|
|
106
|
+
This is a known limitation that's unlikely to be fixed in these clients
|
|
107
|
+
since it would require significant architectural changes on their end.
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
import asyncio
|
|
111
|
+
import threading
|
|
112
|
+
import time
|
|
113
|
+
from typing import Optional
|
|
114
|
+
|
|
115
|
+
from spaik_sdk.models.llm_config import LLMConfig
|
|
116
|
+
from spaik_sdk.models.providers.provider_type import ProviderType
|
|
117
|
+
from spaik_sdk.utils.init_logger import init_logger
|
|
118
|
+
|
|
119
|
+
logger = init_logger(__name__)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class LangChainLoopManager:
|
|
123
|
+
"""Manages a persistent event loop for langchain operations"""
|
|
124
|
+
|
|
125
|
+
def __init__(self):
|
|
126
|
+
self._loop: Optional[asyncio.AbstractEventLoop] = None
|
|
127
|
+
self._loop_thread: Optional[threading.Thread] = None
|
|
128
|
+
self._lock = threading.Lock()
|
|
129
|
+
|
|
130
|
+
def get_loop(self) -> asyncio.AbstractEventLoop:
|
|
131
|
+
"""Get or create the persistent event loop for langchain operations"""
|
|
132
|
+
with self._lock:
|
|
133
|
+
if self._loop is None or self._loop.is_closed():
|
|
134
|
+
self._loop = None
|
|
135
|
+
|
|
136
|
+
def run_loop():
|
|
137
|
+
self._loop = asyncio.new_event_loop()
|
|
138
|
+
asyncio.set_event_loop(self._loop)
|
|
139
|
+
self._loop.run_forever()
|
|
140
|
+
|
|
141
|
+
self._loop_thread = threading.Thread(target=run_loop, daemon=True)
|
|
142
|
+
self._loop_thread.start()
|
|
143
|
+
|
|
144
|
+
# Wait for loop to be created
|
|
145
|
+
while self._loop is None:
|
|
146
|
+
time.sleep(0.01)
|
|
147
|
+
|
|
148
|
+
return self._loop
|
|
149
|
+
|
|
150
|
+
async def run_in_loop(self, coro):
|
|
151
|
+
"""Run a coroutine in the langchain loop and return the result"""
|
|
152
|
+
loop = self.get_loop()
|
|
153
|
+
|
|
154
|
+
try:
|
|
155
|
+
current_loop = asyncio.get_running_loop()
|
|
156
|
+
if current_loop != loop:
|
|
157
|
+
# We're in a different loop context, run in langchain loop
|
|
158
|
+
future = asyncio.run_coroutine_threadsafe(coro, loop)
|
|
159
|
+
return future.result()
|
|
160
|
+
else:
|
|
161
|
+
# We're already in the langchain loop, run directly
|
|
162
|
+
return await coro
|
|
163
|
+
except RuntimeError:
|
|
164
|
+
# No running loop, run in langchain loop
|
|
165
|
+
future = asyncio.run_coroutine_threadsafe(coro, loop)
|
|
166
|
+
return future.result()
|
|
167
|
+
|
|
168
|
+
async def stream_in_loop(self, async_generator):
|
|
169
|
+
"""Stream results from an async generator running in the langchain loop"""
|
|
170
|
+
loop = self.get_loop()
|
|
171
|
+
|
|
172
|
+
try:
|
|
173
|
+
current_loop = asyncio.get_running_loop()
|
|
174
|
+
if current_loop != loop:
|
|
175
|
+
# We're in a different loop context, collect all results first
|
|
176
|
+
future = asyncio.run_coroutine_threadsafe(self._collect_from_async_generator(async_generator), loop)
|
|
177
|
+
results = future.result()
|
|
178
|
+
for result in results:
|
|
179
|
+
yield result
|
|
180
|
+
else:
|
|
181
|
+
# We're already in the langchain loop, stream directly
|
|
182
|
+
async for result in async_generator:
|
|
183
|
+
yield result
|
|
184
|
+
except RuntimeError:
|
|
185
|
+
# No running loop, collect all results first
|
|
186
|
+
future = asyncio.run_coroutine_threadsafe(self._collect_from_async_generator(async_generator), loop)
|
|
187
|
+
results = future.result()
|
|
188
|
+
for result in results:
|
|
189
|
+
yield result
|
|
190
|
+
|
|
191
|
+
async def _collect_from_async_generator(self, async_generator):
|
|
192
|
+
"""Collect all items from an async generator"""
|
|
193
|
+
results = []
|
|
194
|
+
async for item in async_generator:
|
|
195
|
+
results.append(item)
|
|
196
|
+
return results
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def _is_in_web_server_context() -> bool:
|
|
200
|
+
"""Detect if we're running in a web server/FastAPI context vs standalone asyncio.run()."""
|
|
201
|
+
try:
|
|
202
|
+
# Check if we're in an event loop
|
|
203
|
+
loop = asyncio.get_running_loop()
|
|
204
|
+
|
|
205
|
+
# FastAPI/web servers typically run event loops indefinitely
|
|
206
|
+
# Check for common web server indicators in the call stack
|
|
207
|
+
import inspect
|
|
208
|
+
import threading
|
|
209
|
+
|
|
210
|
+
# Get current thread name - web servers often have descriptive thread names
|
|
211
|
+
thread_name = threading.current_thread().name
|
|
212
|
+
if any(name in thread_name.lower() for name in ["uvicorn", "fastapi", "starlette", "asgi", "wsgi"]):
|
|
213
|
+
return True
|
|
214
|
+
|
|
215
|
+
# Check the call stack for web framework indicators
|
|
216
|
+
frame = inspect.currentframe()
|
|
217
|
+
try:
|
|
218
|
+
while frame:
|
|
219
|
+
frame_info = inspect.getframeinfo(frame)
|
|
220
|
+
filename = frame_info.filename.lower()
|
|
221
|
+
|
|
222
|
+
# Look for web framework files in the call stack
|
|
223
|
+
if any(
|
|
224
|
+
indicator in filename
|
|
225
|
+
for indicator in ["uvicorn", "fastapi", "starlette", "asgi", "wsgi", "tornado", "aiohttp", "sanic", "quart"]
|
|
226
|
+
):
|
|
227
|
+
return True
|
|
228
|
+
|
|
229
|
+
frame = frame.f_back
|
|
230
|
+
finally:
|
|
231
|
+
del frame
|
|
232
|
+
|
|
233
|
+
# Check if the event loop has been running for a while (web servers)
|
|
234
|
+
# vs just started (asyncio.run())
|
|
235
|
+
if hasattr(loop, "_ready") and hasattr(loop._ready, "__len__") and len(loop._ready) > 0: # type: ignore[arg-type]
|
|
236
|
+
# This is a heuristic - web servers tend to have more pending tasks
|
|
237
|
+
return True
|
|
238
|
+
|
|
239
|
+
return False
|
|
240
|
+
|
|
241
|
+
except RuntimeError:
|
|
242
|
+
# No running event loop - definitely not in a web server
|
|
243
|
+
return False
|
|
244
|
+
except Exception as e:
|
|
245
|
+
logger.debug(f"Error detecting web server context: {e}")
|
|
246
|
+
return False
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _needs_loop_manager(llm_config: LLMConfig) -> bool:
|
|
250
|
+
"""Check if this model provider might have event loop issues with multiple asyncio.run() calls."""
|
|
251
|
+
return llm_config.provider_type in (ProviderType.GOOGLE, ProviderType.OLLAMA)
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def should_use_loop_manager(llm_config: LLMConfig) -> bool:
|
|
255
|
+
"""Determine if we should use the loop manager for models with event loop issues."""
|
|
256
|
+
if not _needs_loop_manager(llm_config):
|
|
257
|
+
return False
|
|
258
|
+
|
|
259
|
+
# Only use loop manager if NOT in a web server context
|
|
260
|
+
# Web servers have persistent event loops, so the issue doesn't occur
|
|
261
|
+
return not _is_in_web_server_context()
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
# Global instance
|
|
265
|
+
_loop_manager = LangChainLoopManager()
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def get_langchain_loop_manager() -> LangChainLoopManager:
|
|
269
|
+
"""Get the global langchain loop manager instance"""
|
|
270
|
+
return _loop_manager
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import time
|
|
4
|
+
import uuid
|
|
5
|
+
from typing import Dict, List, Optional, Type, TypeVar, cast
|
|
6
|
+
|
|
7
|
+
from langchain_core.runnables import RunnableConfig
|
|
8
|
+
from langchain_core.tools import BaseTool
|
|
9
|
+
|
|
10
|
+
# Using create_react_agent because create_agent from langchain.agents
|
|
11
|
+
# uses invoke() internally and does NOT emit on_chat_model_stream events,
|
|
12
|
+
# which breaks token-level streaming. See: https://github.com/langchain-ai/langchain/issues/34017
|
|
13
|
+
from langgraph.prebuilt import create_react_agent
|
|
14
|
+
from pydantic import BaseModel
|
|
15
|
+
|
|
16
|
+
from spaik_sdk.attachments.file_storage_provider import get_file_storage
|
|
17
|
+
from spaik_sdk.attachments.models import Attachment
|
|
18
|
+
from spaik_sdk.config.env import env_config
|
|
19
|
+
from spaik_sdk.llm.cancellation_handle import CancellationHandle
|
|
20
|
+
from spaik_sdk.llm.extract_error_message import extract_error_message
|
|
21
|
+
from spaik_sdk.llm.langchain_loop_manager import get_langchain_loop_manager, should_use_loop_manager
|
|
22
|
+
from spaik_sdk.llm.message_handler import MessageHandler
|
|
23
|
+
from spaik_sdk.models.llm_config import LLMConfig
|
|
24
|
+
from spaik_sdk.recording.base_playback import BasePlayback
|
|
25
|
+
from spaik_sdk.recording.base_recorder import BaseRecorder
|
|
26
|
+
from spaik_sdk.thread.models import MessageBlock, MessageBlockType, ThreadMessage
|
|
27
|
+
from spaik_sdk.thread.thread_container import ThreadContainer
|
|
28
|
+
from spaik_sdk.utils.init_logger import init_logger
|
|
29
|
+
|
|
30
|
+
DEBUG = env_config.is_debug_mode("langchain")
|
|
31
|
+
logger = init_logger(__name__)
|
|
32
|
+
|
|
33
|
+
# Suppress noisy HTTP request logs from anthropic and httpx
|
|
34
|
+
logging.getLogger("anthropic._base_client").setLevel(logging.WARNING)
|
|
35
|
+
logging.getLogger("httpx").setLevel(logging.WARNING)
|
|
36
|
+
logging.getLogger("httpcore").setLevel(logging.WARNING)
|
|
37
|
+
|
|
38
|
+
if DEBUG:
|
|
39
|
+
from langchain_core.globals import set_debug
|
|
40
|
+
|
|
41
|
+
set_debug(True)
|
|
42
|
+
|
|
43
|
+
config = RunnableConfig(recursion_limit=100)
|
|
44
|
+
|
|
45
|
+
T = TypeVar("T", bound=BaseModel)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class LangChainService:
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
llm_config: LLMConfig,
|
|
52
|
+
thread_container: ThreadContainer,
|
|
53
|
+
assistant_name: str,
|
|
54
|
+
assistant_id: str,
|
|
55
|
+
recorder: Optional[BaseRecorder] = None,
|
|
56
|
+
playback: Optional[BasePlayback] = None,
|
|
57
|
+
cancellation_handle: Optional[CancellationHandle] = None,
|
|
58
|
+
):
|
|
59
|
+
self.llm_config = llm_config
|
|
60
|
+
|
|
61
|
+
self.thread_container = thread_container
|
|
62
|
+
self.message_handler = MessageHandler(self.thread_container, assistant_name, assistant_id, recorder)
|
|
63
|
+
self.is_used = False
|
|
64
|
+
self.recorder = recorder
|
|
65
|
+
self.playback = playback
|
|
66
|
+
self.cancellation_handle = cancellation_handle
|
|
67
|
+
|
|
68
|
+
def create_executor(self, tools: list[BaseTool]):
|
|
69
|
+
return create_react_agent(self._get_model(), tools)
|
|
70
|
+
|
|
71
|
+
def _get_model(self):
|
|
72
|
+
return self.llm_config.get_model_wrapper().get_langchain_model()
|
|
73
|
+
|
|
74
|
+
def get_structured_response(self, input: str, output_schema: Type[T]) -> T:
|
|
75
|
+
# Handle playback mode
|
|
76
|
+
if self.playback is not None:
|
|
77
|
+
ret = output_schema.model_validate(next(self.playback))
|
|
78
|
+
self._on_request_completed()
|
|
79
|
+
return ret
|
|
80
|
+
|
|
81
|
+
self.thread_container.add_message(
|
|
82
|
+
ThreadMessage(
|
|
83
|
+
id=str(uuid.uuid4()),
|
|
84
|
+
ai=False,
|
|
85
|
+
author_id="structured_response ",
|
|
86
|
+
author_name="structured_response",
|
|
87
|
+
timestamp=int(time.time() * 1000),
|
|
88
|
+
blocks=[MessageBlock(id=str(uuid.uuid4()), streaming=False, type=MessageBlockType.PLAIN, content=input)],
|
|
89
|
+
)
|
|
90
|
+
)
|
|
91
|
+
model_with_tools = self._get_model().with_structured_output(output_schema)
|
|
92
|
+
ret = cast(T, model_with_tools.invoke(input))
|
|
93
|
+
|
|
94
|
+
# Record structured response if recorder is present
|
|
95
|
+
if self.recorder is not None:
|
|
96
|
+
self.recorder.record_structured(ret.model_dump())
|
|
97
|
+
|
|
98
|
+
as_json_block = "```json\n" + json.dumps(ret.model_dump()) + "\n```"
|
|
99
|
+
self.thread_container.add_message(
|
|
100
|
+
ThreadMessage(
|
|
101
|
+
id=str(uuid.uuid4()),
|
|
102
|
+
ai=True,
|
|
103
|
+
author_id=self.message_handler.assistant_id,
|
|
104
|
+
author_name=self.message_handler.assistant_name,
|
|
105
|
+
timestamp=int(time.time() * 1000),
|
|
106
|
+
blocks=[MessageBlock(id=str(uuid.uuid4()), streaming=False, type=MessageBlockType.PLAIN, content=as_json_block)],
|
|
107
|
+
)
|
|
108
|
+
)
|
|
109
|
+
self._on_request_completed()
|
|
110
|
+
return ret
|
|
111
|
+
|
|
112
|
+
async def execute_stream_tokens(
|
|
113
|
+
self,
|
|
114
|
+
user_input: Optional[str] = None,
|
|
115
|
+
tools: List[BaseTool] = [],
|
|
116
|
+
attachments: Optional[List[Attachment]] = None,
|
|
117
|
+
):
|
|
118
|
+
"""Execute agent and yield individual tokens as they arrive.
|
|
119
|
+
|
|
120
|
+
Gemini models have weird hickups regarding event loops and require a hack.
|
|
121
|
+
|
|
122
|
+
See documentation of LangChainLoopManager for more details.
|
|
123
|
+
"""
|
|
124
|
+
if self.is_used:
|
|
125
|
+
raise ValueError("LangChainService is single use because of reasons")
|
|
126
|
+
self.is_used = True
|
|
127
|
+
|
|
128
|
+
try:
|
|
129
|
+
if should_use_loop_manager(self.llm_config):
|
|
130
|
+
logger.debug("Using loop manager for Google model in standalone context")
|
|
131
|
+
async for token_data in get_langchain_loop_manager().stream_in_loop(
|
|
132
|
+
self._execute_stream_tokens_direct(user_input, tools, attachments)
|
|
133
|
+
):
|
|
134
|
+
yield token_data
|
|
135
|
+
else:
|
|
136
|
+
async for token_data in self._execute_stream_tokens_direct(user_input, tools, attachments):
|
|
137
|
+
yield token_data
|
|
138
|
+
|
|
139
|
+
except Exception as e:
|
|
140
|
+
yield {"type": "error", "error": self._handle_error(e)}
|
|
141
|
+
finally:
|
|
142
|
+
self._on_request_completed()
|
|
143
|
+
|
|
144
|
+
async def _execute_stream_tokens_direct(
|
|
145
|
+
self,
|
|
146
|
+
user_input: Optional[str] = None,
|
|
147
|
+
tools: List[BaseTool] = [],
|
|
148
|
+
attachments: Optional[List[Attachment]] = None,
|
|
149
|
+
):
|
|
150
|
+
"""Direct execution of stream tokens (core logic)"""
|
|
151
|
+
if self.playback is not None:
|
|
152
|
+
# Playback mode - yield recorded tokens
|
|
153
|
+
async for token_data in self.message_handler.process_agent_token_stream(self.playback):
|
|
154
|
+
# Check for cancellation even in playback mode
|
|
155
|
+
if self.cancellation_handle and await self.cancellation_handle.is_cancelled():
|
|
156
|
+
self.message_handler.handle_cancellation()
|
|
157
|
+
return
|
|
158
|
+
yield token_data
|
|
159
|
+
return
|
|
160
|
+
|
|
161
|
+
agent = self.create_executor(tools)
|
|
162
|
+
if user_input is not None:
|
|
163
|
+
self.message_handler.add_user_message(user_input, "user", "user", attachments)
|
|
164
|
+
|
|
165
|
+
# Get messages - use multimodal converter if file_storage is available
|
|
166
|
+
file_storage = get_file_storage()
|
|
167
|
+
if file_storage is not None:
|
|
168
|
+
provider_family = self.llm_config.model.family
|
|
169
|
+
messages = await self.thread_container.get_langchain_messages_multimodal(file_storage, provider_family)
|
|
170
|
+
else:
|
|
171
|
+
messages = self.thread_container.get_langchain_messages()
|
|
172
|
+
|
|
173
|
+
# Use astream_events to get individual token events
|
|
174
|
+
agent_stream = agent.astream_events({"messages": messages}, version="v2", config=config)
|
|
175
|
+
|
|
176
|
+
# Let MessageHandler handle the token stream processing
|
|
177
|
+
async for token_data in self.message_handler.process_agent_token_stream(agent_stream):
|
|
178
|
+
if self.cancellation_handle and await self.cancellation_handle.is_cancelled():
|
|
179
|
+
logger.info("Cancellation detected, stopping stream")
|
|
180
|
+
self.message_handler.handle_cancellation()
|
|
181
|
+
return
|
|
182
|
+
yield token_data
|
|
183
|
+
|
|
184
|
+
def _handle_error(self, error: Exception) -> Dict[str, str]:
|
|
185
|
+
"""Handle and format errors consistently."""
|
|
186
|
+
error_message = extract_error_message(error)
|
|
187
|
+
logger.error(f"Error executing agent: {error_message}")
|
|
188
|
+
|
|
189
|
+
# Add error to thread container
|
|
190
|
+
self.message_handler.add_error(error_message, "system")
|
|
191
|
+
|
|
192
|
+
return {"error": error_message}
|
|
193
|
+
|
|
194
|
+
def _on_request_completed(self):
|
|
195
|
+
if self.recorder is not None:
|
|
196
|
+
self.recorder.request_completed()
|