spaik-sdk 0.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. spaik_sdk/__init__.py +21 -0
  2. spaik_sdk/agent/__init__.py +0 -0
  3. spaik_sdk/agent/base_agent.py +249 -0
  4. spaik_sdk/attachments/__init__.py +22 -0
  5. spaik_sdk/attachments/builder.py +61 -0
  6. spaik_sdk/attachments/file_storage_provider.py +27 -0
  7. spaik_sdk/attachments/mime_types.py +118 -0
  8. spaik_sdk/attachments/models.py +63 -0
  9. spaik_sdk/attachments/provider_support.py +53 -0
  10. spaik_sdk/attachments/storage/__init__.py +0 -0
  11. spaik_sdk/attachments/storage/base_file_storage.py +32 -0
  12. spaik_sdk/attachments/storage/impl/__init__.py +0 -0
  13. spaik_sdk/attachments/storage/impl/local_file_storage.py +101 -0
  14. spaik_sdk/audio/__init__.py +12 -0
  15. spaik_sdk/audio/options.py +53 -0
  16. spaik_sdk/audio/providers/__init__.py +1 -0
  17. spaik_sdk/audio/providers/google_tts.py +77 -0
  18. spaik_sdk/audio/providers/openai_stt.py +71 -0
  19. spaik_sdk/audio/providers/openai_tts.py +111 -0
  20. spaik_sdk/audio/stt.py +61 -0
  21. spaik_sdk/audio/tts.py +124 -0
  22. spaik_sdk/config/credentials_provider.py +10 -0
  23. spaik_sdk/config/env.py +59 -0
  24. spaik_sdk/config/env_credentials_provider.py +7 -0
  25. spaik_sdk/config/get_credentials_provider.py +14 -0
  26. spaik_sdk/image_gen/__init__.py +9 -0
  27. spaik_sdk/image_gen/image_generator.py +83 -0
  28. spaik_sdk/image_gen/options.py +24 -0
  29. spaik_sdk/image_gen/providers/__init__.py +0 -0
  30. spaik_sdk/image_gen/providers/google.py +75 -0
  31. spaik_sdk/image_gen/providers/openai.py +60 -0
  32. spaik_sdk/llm/__init__.py +0 -0
  33. spaik_sdk/llm/cancellation_handle.py +10 -0
  34. spaik_sdk/llm/consumption/__init__.py +0 -0
  35. spaik_sdk/llm/consumption/consumption_estimate.py +26 -0
  36. spaik_sdk/llm/consumption/consumption_estimate_builder.py +113 -0
  37. spaik_sdk/llm/consumption/consumption_extractor.py +59 -0
  38. spaik_sdk/llm/consumption/token_usage.py +31 -0
  39. spaik_sdk/llm/converters.py +146 -0
  40. spaik_sdk/llm/cost/__init__.py +1 -0
  41. spaik_sdk/llm/cost/builtin_cost_provider.py +83 -0
  42. spaik_sdk/llm/cost/cost_estimate.py +8 -0
  43. spaik_sdk/llm/cost/cost_provider.py +28 -0
  44. spaik_sdk/llm/extract_error_message.py +37 -0
  45. spaik_sdk/llm/langchain_loop_manager.py +270 -0
  46. spaik_sdk/llm/langchain_service.py +196 -0
  47. spaik_sdk/llm/message_handler.py +188 -0
  48. spaik_sdk/llm/streaming/__init__.py +1 -0
  49. spaik_sdk/llm/streaming/block_manager.py +152 -0
  50. spaik_sdk/llm/streaming/models.py +42 -0
  51. spaik_sdk/llm/streaming/streaming_content_handler.py +157 -0
  52. spaik_sdk/llm/streaming/streaming_event_handler.py +215 -0
  53. spaik_sdk/llm/streaming/streaming_state_manager.py +58 -0
  54. spaik_sdk/models/__init__.py +0 -0
  55. spaik_sdk/models/factories/__init__.py +0 -0
  56. spaik_sdk/models/factories/anthropic_factory.py +33 -0
  57. spaik_sdk/models/factories/base_model_factory.py +71 -0
  58. spaik_sdk/models/factories/google_factory.py +30 -0
  59. spaik_sdk/models/factories/ollama_factory.py +41 -0
  60. spaik_sdk/models/factories/openai_factory.py +50 -0
  61. spaik_sdk/models/llm_config.py +46 -0
  62. spaik_sdk/models/llm_families.py +7 -0
  63. spaik_sdk/models/llm_model.py +17 -0
  64. spaik_sdk/models/llm_wrapper.py +25 -0
  65. spaik_sdk/models/model_registry.py +156 -0
  66. spaik_sdk/models/providers/__init__.py +0 -0
  67. spaik_sdk/models/providers/anthropic_provider.py +29 -0
  68. spaik_sdk/models/providers/azure_provider.py +31 -0
  69. spaik_sdk/models/providers/base_provider.py +62 -0
  70. spaik_sdk/models/providers/google_provider.py +26 -0
  71. spaik_sdk/models/providers/ollama_provider.py +26 -0
  72. spaik_sdk/models/providers/openai_provider.py +26 -0
  73. spaik_sdk/models/providers/provider_type.py +90 -0
  74. spaik_sdk/orchestration/__init__.py +24 -0
  75. spaik_sdk/orchestration/base_orchestrator.py +238 -0
  76. spaik_sdk/orchestration/checkpoint.py +80 -0
  77. spaik_sdk/orchestration/models.py +103 -0
  78. spaik_sdk/prompt/__init__.py +0 -0
  79. spaik_sdk/prompt/get_prompt_loader.py +13 -0
  80. spaik_sdk/prompt/local_prompt_loader.py +21 -0
  81. spaik_sdk/prompt/prompt_loader.py +48 -0
  82. spaik_sdk/prompt/prompt_loader_mode.py +14 -0
  83. spaik_sdk/py.typed +1 -0
  84. spaik_sdk/recording/__init__.py +1 -0
  85. spaik_sdk/recording/base_playback.py +90 -0
  86. spaik_sdk/recording/base_recorder.py +50 -0
  87. spaik_sdk/recording/conditional_recorder.py +38 -0
  88. spaik_sdk/recording/impl/__init__.py +1 -0
  89. spaik_sdk/recording/impl/local_playback.py +76 -0
  90. spaik_sdk/recording/impl/local_recorder.py +85 -0
  91. spaik_sdk/recording/langchain_serializer.py +88 -0
  92. spaik_sdk/server/__init__.py +1 -0
  93. spaik_sdk/server/api/routers/__init__.py +0 -0
  94. spaik_sdk/server/api/routers/api_builder.py +149 -0
  95. spaik_sdk/server/api/routers/audio_router_factory.py +201 -0
  96. spaik_sdk/server/api/routers/file_router_factory.py +111 -0
  97. spaik_sdk/server/api/routers/thread_router_factory.py +284 -0
  98. spaik_sdk/server/api/streaming/__init__.py +0 -0
  99. spaik_sdk/server/api/streaming/format_sse_event.py +41 -0
  100. spaik_sdk/server/api/streaming/negotiate_streaming_response.py +8 -0
  101. spaik_sdk/server/api/streaming/streaming_negotiator.py +10 -0
  102. spaik_sdk/server/authorization/__init__.py +0 -0
  103. spaik_sdk/server/authorization/base_authorizer.py +64 -0
  104. spaik_sdk/server/authorization/base_user.py +13 -0
  105. spaik_sdk/server/authorization/dummy_authorizer.py +17 -0
  106. spaik_sdk/server/job_processor/__init__.py +0 -0
  107. spaik_sdk/server/job_processor/base_job_processor.py +8 -0
  108. spaik_sdk/server/job_processor/thread_job_processor.py +32 -0
  109. spaik_sdk/server/pubsub/__init__.py +1 -0
  110. spaik_sdk/server/pubsub/cancellation_publisher.py +7 -0
  111. spaik_sdk/server/pubsub/cancellation_subscriber.py +38 -0
  112. spaik_sdk/server/pubsub/event_publisher.py +13 -0
  113. spaik_sdk/server/pubsub/impl/__init__.py +1 -0
  114. spaik_sdk/server/pubsub/impl/local_cancellation_pubsub.py +48 -0
  115. spaik_sdk/server/pubsub/impl/signalr_publisher.py +36 -0
  116. spaik_sdk/server/queue/__init__.py +1 -0
  117. spaik_sdk/server/queue/agent_job_queue.py +27 -0
  118. spaik_sdk/server/queue/impl/__init__.py +1 -0
  119. spaik_sdk/server/queue/impl/azure_queue.py +24 -0
  120. spaik_sdk/server/response/__init__.py +0 -0
  121. spaik_sdk/server/response/agent_response_generator.py +39 -0
  122. spaik_sdk/server/response/response_generator.py +13 -0
  123. spaik_sdk/server/response/simple_agent_response_generator.py +14 -0
  124. spaik_sdk/server/services/__init__.py +0 -0
  125. spaik_sdk/server/services/thread_converters.py +113 -0
  126. spaik_sdk/server/services/thread_models.py +90 -0
  127. spaik_sdk/server/services/thread_service.py +91 -0
  128. spaik_sdk/server/storage/__init__.py +1 -0
  129. spaik_sdk/server/storage/base_thread_repository.py +51 -0
  130. spaik_sdk/server/storage/impl/__init__.py +0 -0
  131. spaik_sdk/server/storage/impl/in_memory_thread_repository.py +100 -0
  132. spaik_sdk/server/storage/impl/local_file_thread_repository.py +217 -0
  133. spaik_sdk/server/storage/thread_filter.py +166 -0
  134. spaik_sdk/server/storage/thread_metadata.py +53 -0
  135. spaik_sdk/thread/__init__.py +0 -0
  136. spaik_sdk/thread/adapters/__init__.py +0 -0
  137. spaik_sdk/thread/adapters/cli/__init__.py +0 -0
  138. spaik_sdk/thread/adapters/cli/block_display.py +92 -0
  139. spaik_sdk/thread/adapters/cli/display_manager.py +84 -0
  140. spaik_sdk/thread/adapters/cli/live_cli.py +235 -0
  141. spaik_sdk/thread/adapters/event_adapter.py +28 -0
  142. spaik_sdk/thread/adapters/streaming_block_adapter.py +57 -0
  143. spaik_sdk/thread/adapters/sync_adapter.py +76 -0
  144. spaik_sdk/thread/models.py +224 -0
  145. spaik_sdk/thread/thread_container.py +468 -0
  146. spaik_sdk/tools/__init__.py +0 -0
  147. spaik_sdk/tools/impl/__init__.py +0 -0
  148. spaik_sdk/tools/impl/mcp_tool_provider.py +93 -0
  149. spaik_sdk/tools/impl/search_tool_provider.py +18 -0
  150. spaik_sdk/tools/tool_provider.py +131 -0
  151. spaik_sdk/tracing/__init__.py +13 -0
  152. spaik_sdk/tracing/agent_trace.py +72 -0
  153. spaik_sdk/tracing/get_trace_sink.py +15 -0
  154. spaik_sdk/tracing/local_trace_sink.py +23 -0
  155. spaik_sdk/tracing/trace_sink.py +19 -0
  156. spaik_sdk/tracing/trace_sink_mode.py +14 -0
  157. spaik_sdk/utils/__init__.py +0 -0
  158. spaik_sdk/utils/init_logger.py +24 -0
  159. spaik_sdk-0.6.2.dist-info/METADATA +379 -0
  160. spaik_sdk-0.6.2.dist-info/RECORD +161 -0
  161. spaik_sdk-0.6.2.dist-info/WHEEL +4 -0
@@ -0,0 +1,83 @@
1
+ from spaik_sdk.llm.consumption.token_usage import TokenUsage
2
+ from spaik_sdk.llm.cost.cost_provider import CostProvider
3
+ from spaik_sdk.models.llm_model import LLMModel
4
+
5
+
6
+ class BuiltinCostProvider(CostProvider):
7
+ def get_token_pricing(self, model: LLMModel) -> TokenUsage:
8
+ """Get token pricing in USD cents per million tokens."""
9
+ name = model.name
10
+
11
+ # Anthropic Claude models
12
+ if name.startswith("claude-3-7-sonnet"):
13
+ # Claude 3.7 Sonnet: $3.00 input, $15.00 output per 1M tokens
14
+ return TokenUsage(
15
+ input_tokens=300, # $3.00 in cents per 1M tokens
16
+ output_tokens=1500, # $15.00 in cents per 1M tokens
17
+ reasoning_tokens=0,
18
+ cache_creation_tokens=375, # 25% markup on input for cache creation
19
+ cache_read_tokens=30, # 10% of input cost for cache reads
20
+ )
21
+ elif name.startswith("claude-sonnet-4") or name.startswith("claude-4-sonnet"):
22
+ # Claude 4 Sonnet: $3.00 input, $15.00 output per 1M tokens
23
+ return TokenUsage(input_tokens=300, output_tokens=1500, reasoning_tokens=0, cache_creation_tokens=375, cache_read_tokens=30)
24
+ elif name.startswith("claude-opus-4") or name.startswith("claude-4-opus"):
25
+ # Claude 4 Opus: $15.00 input, $75.00 output per 1M tokens
26
+ return TokenUsage(input_tokens=1500, output_tokens=7500, reasoning_tokens=0, cache_creation_tokens=1875, cache_read_tokens=150)
27
+
28
+ # OpenAI models
29
+ elif name.startswith("gpt-4.1"):
30
+ # GPT-4.1: $2.00 input, $8.00 output per 1M tokens
31
+ return TokenUsage(input_tokens=200, output_tokens=800, reasoning_tokens=0, cache_creation_tokens=250, cache_read_tokens=20)
32
+ elif name.startswith("gpt-4o"):
33
+ # GPT-4o: $2.50 input, $10.00 output per 1M tokens
34
+ return TokenUsage(input_tokens=250, output_tokens=1000, reasoning_tokens=0, cache_creation_tokens=312, cache_read_tokens=25)
35
+ elif name.startswith("o4-mini"):
36
+ # O4-mini: $0.40 input, $1.60 output per 1M tokens (based on GPT-4.1-mini pricing)
37
+ return TokenUsage(
38
+ input_tokens=40,
39
+ output_tokens=160,
40
+ reasoning_tokens=440, # 110% markup for reasoning tokens
41
+ cache_creation_tokens=50,
42
+ cache_read_tokens=4,
43
+ )
44
+ elif name.startswith("gpt-5"):
45
+ if "nano" in name:
46
+ # GPT-5 Nano: $0.05 input, $0.40 output per 1M tokens
47
+ return TokenUsage(
48
+ input_tokens=5,
49
+ output_tokens=40,
50
+ reasoning_tokens=44, # 10% markup for reasoning
51
+ cache_creation_tokens=6, # 25% markup on input for cache creation
52
+ cache_read_tokens=0, # 90% discount: $0.005 per 1M tokens
53
+ )
54
+ elif "mini" in name:
55
+ # GPT-5 Mini: $0.25 input, $2.00 output per 1M tokens
56
+ return TokenUsage(
57
+ input_tokens=25,
58
+ output_tokens=200,
59
+ reasoning_tokens=220, # 10% markup for reasoning
60
+ cache_creation_tokens=31, # 25% markup on input for cache creation
61
+ cache_read_tokens=2, # 90% discount: $0.025 per 1M tokens
62
+ )
63
+ else:
64
+ # GPT-5: $1.25 input, $10.00 output per 1M tokens
65
+ return TokenUsage(
66
+ input_tokens=125,
67
+ output_tokens=1000,
68
+ reasoning_tokens=1100, # 10% markup for reasoning
69
+ cache_creation_tokens=156, # 25% markup on input for cache creation
70
+ cache_read_tokens=12, # 90% discount: $0.125 per 1M tokens
71
+ )
72
+
73
+ # Google Gemini models
74
+ elif name.startswith("gemini-2.5-flash"):
75
+ # Gemini 2.5 Flash: $0.15 input, $0.60 output per 1M tokens
76
+ return TokenUsage(input_tokens=15, output_tokens=60, reasoning_tokens=0, cache_creation_tokens=19, cache_read_tokens=1)
77
+ elif name.startswith("gemini-2.5-pro"):
78
+ # Gemini 2.5 Pro: $1.25 input, $10.00 output per 1M tokens
79
+ return TokenUsage(input_tokens=125, output_tokens=1000, reasoning_tokens=0, cache_creation_tokens=156, cache_read_tokens=12)
80
+
81
+ # Default fallback for unknown models
82
+ else:
83
+ return TokenUsage(input_tokens=0, output_tokens=0, reasoning_tokens=0, cache_creation_tokens=0, cache_read_tokens=0)
@@ -0,0 +1,8 @@
1
+ from dataclasses import dataclass
2
+
3
+
4
+ @dataclass
5
+ class CostEstimate:
6
+ cost: float
7
+ currency: str
8
+ is_estimate: bool
@@ -0,0 +1,28 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+ from spaik_sdk.llm.consumption.token_usage import TokenUsage
4
+ from spaik_sdk.llm.cost.cost_estimate import CostEstimate
5
+ from spaik_sdk.models.llm_model import LLMModel
6
+
7
+
8
+ class CostProvider(ABC):
9
+ def get_cost_estimate(self, model: LLMModel, token_usage: TokenUsage) -> CostEstimate:
10
+ token_pricing: TokenUsage = self.get_token_pricing(model)
11
+
12
+ total = 0
13
+
14
+ total += token_usage.input_tokens * token_pricing.input_tokens
15
+ total += token_usage.output_tokens * token_pricing.output_tokens
16
+ total += token_usage.reasoning_tokens * token_pricing.reasoning_tokens
17
+ total += token_usage.cache_creation_tokens * token_pricing.cache_creation_tokens
18
+ total += token_usage.cache_read_tokens * token_pricing.cache_read_tokens
19
+
20
+ return CostEstimate(
21
+ cost=(total) / 100000000.0,
22
+ currency="USD",
23
+ is_estimate=False,
24
+ )
25
+
26
+ @abstractmethod
27
+ def get_token_pricing(self, model: LLMModel) -> TokenUsage:
28
+ pass
@@ -0,0 +1,37 @@
1
+ import json
2
+
3
+
4
+ def extract_error_message(exception: Exception) -> str:
5
+ """Extract a meaningful error message from various exception types."""
6
+ error_str = str(exception)
7
+
8
+ # Try to parse as JSON if it looks like a structured error
9
+ if "Error code:" in error_str and "{" in error_str:
10
+ try:
11
+ # Extract JSON part from the error string
12
+ json_start = error_str.find("{")
13
+ json_part = error_str[json_start:]
14
+ error_data = json.loads(json_part)
15
+
16
+ # Handle Azure OpenAI content filter errors
17
+ if "error" in error_data:
18
+ error_info = error_data["error"]
19
+ if error_info.get("code") == "content_filter":
20
+ return f"Content filtered: {error_info.get('message', 'Content policy violation')}"
21
+ else:
22
+ return error_info.get("message", error_str)
23
+ except (json.JSONDecodeError, KeyError):
24
+ pass
25
+
26
+ # Handle other common error patterns
27
+ if "content management policy" in error_str.lower():
28
+ return "Content was filtered due to content management policy"
29
+ elif "rate limit" in error_str.lower():
30
+ return "Rate limit exceeded"
31
+ elif "authentication" in error_str.lower():
32
+ return "Authentication failed"
33
+ elif "quota" in error_str.lower():
34
+ return "Quota exceeded"
35
+
36
+ # Return the original error message if no specific pattern matches
37
+ return error_str
@@ -0,0 +1,270 @@
1
+ """
2
+ LangChain Loop Manager - Event Loop Isolation for Models with Event Loop Issues
3
+
4
+ This module exists to work around a fundamental incompatibility between certain
5
+ model providers (Google/Gemini, Ollama) and the way asyncio.run() manages event loops.
6
+
7
+ THE PROBLEM:
8
+ ============
9
+ Some model providers (Google's gRPC-based clients, Ollama's async HTTP client, etc.)
10
+ create internal connections and async state that get bound to the specific
11
+ event loop instance they're created in. When that event loop closes, these
12
+ internal connections become unusable and raise "Event loop is closed" errors.
13
+
14
+ This manifests in two scenarios:
15
+
16
+ 1. STANDALONE SCRIPTS with multiple asyncio.run() calls:
17
+ ```python
18
+ asyncio.run(main()) # Creates event loop, Google client binds to it
19
+ # Event loop closes here
20
+ asyncio.run(main()) # Creates NEW event loop, but Google client still references old one
21
+ # → RuntimeError: Event loop is closed
22
+ ```
23
+
24
+ 2. WEB SERVERS (FastAPI, etc.) with persistent event loops:
25
+ ```python
26
+ # Web server starts one event loop and keeps it running
27
+ # All requests use the SAME loop, so Google client works fine
28
+ ```
29
+
30
+ THE WORKAROUND:
31
+ ===============
32
+ We detect the execution context and apply different strategies:
33
+
34
+ 1. **Standalone Context** (detected by stack frame inspection):
35
+ - Use a persistent background event loop in a separate thread
36
+ - All affected model operations run in this persistent loop
37
+ - The background loop never closes, so the clients stay happy
38
+
39
+ 2. **Web Server Context** (detected by uvicorn/fastapi in call stack):
40
+ - Use normal execution (no loop manager)
41
+ - The web server's persistent loop handles everything naturally
42
+
43
+ WHY WEB SERVERS CAN'T USE THE LOOP MANAGER:
44
+ ===========================================
45
+ Web servers MUST NOT use the external event loop approach because:
46
+
47
+ 1. **Streaming breaks**: When operations run in a separate thread's event loop,
48
+ you lose the ability to stream results back to the web server's event loop
49
+ in real-time. The thread boundary kills the streaming semantics.
50
+
51
+ 2. **Request context isolation**: Web frameworks expect all operations for a
52
+ request to happen in the same event loop to maintain proper async context,
53
+ request isolation, and cancellation semantics.
54
+
55
+ 3. **Performance overhead**: Cross-thread async communication adds significant
56
+ latency and complexity that's unnecessary when the web server already
57
+ provides a persistent event loop.
58
+
59
+ The key insight: Web servers naturally solve these client issues by having
60
+ persistent event loops, so they don't need (and can't use) the workaround.
61
+
62
+ DETECTION STRATEGY:
63
+ ===================
64
+ We use multiple heuristics to detect execution context:
65
+ - Thread names (uvicorn, fastapi, etc.)
66
+ - Call stack inspection (looking for web framework files)
67
+ - Event loop state (persistent vs transient)
68
+
69
+ This is admittedly hacky, but it's the only way to transparently handle
70
+ both contexts without requiring users to explicitly configure the behavior.
71
+
72
+ AFFECTED MODELS:
73
+ ================
74
+ - All Google/Gemini models (provider_type == ProviderType.GOOGLE)
75
+ - All Ollama models (provider_type == ProviderType.OLLAMA)
76
+ - Other providers (Anthropic, OpenAI) are unaffected
77
+
78
+ EXAMPLES:
79
+ =========
80
+ ```python
81
+ # This would fail with Gemini without the loop manager:
82
+ asyncio.run(agent.get_response("hello"))
83
+ asyncio.run(agent.get_response("world")) # ← RuntimeError
84
+
85
+ # This works fine with web servers (no loop manager needed):
86
+ @app.post("/chat")
87
+ async def chat():
88
+ return await agent.get_response("hello") # Same persistent loop
89
+ ```
90
+
91
+ ALTERNATIVES CONSIDERED:
92
+ ========================
93
+ 1. Process isolation - Too heavy, breaks streaming
94
+ 2. Raw Google API - Bypasses LangChain ecosystem
95
+ 3. Client recreation - Google's state is deeper than we can reach
96
+ 4. Thread-per-call - Breaks async/await semantics
97
+ 5. User configuration - Poor DX, easy to get wrong
98
+
99
+ WHY THIS IS NECESSARY:
100
+ ======================
101
+ Some client designs assume a long-lived event loop (like in web servers).
102
+ The asyncio.run() pattern creates short-lived loops that violate this assumption.
103
+ Other providers (Anthropic, OpenAI) handle this gracefully by recreating connections
104
+ or using stateless clients.
105
+
106
+ This is a known limitation that's unlikely to be fixed in these clients
107
+ since it would require significant architectural changes on their end.
108
+ """
109
+
110
+ import asyncio
111
+ import threading
112
+ import time
113
+ from typing import Optional
114
+
115
+ from spaik_sdk.models.llm_config import LLMConfig
116
+ from spaik_sdk.models.providers.provider_type import ProviderType
117
+ from spaik_sdk.utils.init_logger import init_logger
118
+
119
+ logger = init_logger(__name__)
120
+
121
+
122
+ class LangChainLoopManager:
123
+ """Manages a persistent event loop for langchain operations"""
124
+
125
+ def __init__(self):
126
+ self._loop: Optional[asyncio.AbstractEventLoop] = None
127
+ self._loop_thread: Optional[threading.Thread] = None
128
+ self._lock = threading.Lock()
129
+
130
+ def get_loop(self) -> asyncio.AbstractEventLoop:
131
+ """Get or create the persistent event loop for langchain operations"""
132
+ with self._lock:
133
+ if self._loop is None or self._loop.is_closed():
134
+ self._loop = None
135
+
136
+ def run_loop():
137
+ self._loop = asyncio.new_event_loop()
138
+ asyncio.set_event_loop(self._loop)
139
+ self._loop.run_forever()
140
+
141
+ self._loop_thread = threading.Thread(target=run_loop, daemon=True)
142
+ self._loop_thread.start()
143
+
144
+ # Wait for loop to be created
145
+ while self._loop is None:
146
+ time.sleep(0.01)
147
+
148
+ return self._loop
149
+
150
+ async def run_in_loop(self, coro):
151
+ """Run a coroutine in the langchain loop and return the result"""
152
+ loop = self.get_loop()
153
+
154
+ try:
155
+ current_loop = asyncio.get_running_loop()
156
+ if current_loop != loop:
157
+ # We're in a different loop context, run in langchain loop
158
+ future = asyncio.run_coroutine_threadsafe(coro, loop)
159
+ return future.result()
160
+ else:
161
+ # We're already in the langchain loop, run directly
162
+ return await coro
163
+ except RuntimeError:
164
+ # No running loop, run in langchain loop
165
+ future = asyncio.run_coroutine_threadsafe(coro, loop)
166
+ return future.result()
167
+
168
+ async def stream_in_loop(self, async_generator):
169
+ """Stream results from an async generator running in the langchain loop"""
170
+ loop = self.get_loop()
171
+
172
+ try:
173
+ current_loop = asyncio.get_running_loop()
174
+ if current_loop != loop:
175
+ # We're in a different loop context, collect all results first
176
+ future = asyncio.run_coroutine_threadsafe(self._collect_from_async_generator(async_generator), loop)
177
+ results = future.result()
178
+ for result in results:
179
+ yield result
180
+ else:
181
+ # We're already in the langchain loop, stream directly
182
+ async for result in async_generator:
183
+ yield result
184
+ except RuntimeError:
185
+ # No running loop, collect all results first
186
+ future = asyncio.run_coroutine_threadsafe(self._collect_from_async_generator(async_generator), loop)
187
+ results = future.result()
188
+ for result in results:
189
+ yield result
190
+
191
+ async def _collect_from_async_generator(self, async_generator):
192
+ """Collect all items from an async generator"""
193
+ results = []
194
+ async for item in async_generator:
195
+ results.append(item)
196
+ return results
197
+
198
+
199
+ def _is_in_web_server_context() -> bool:
200
+ """Detect if we're running in a web server/FastAPI context vs standalone asyncio.run()."""
201
+ try:
202
+ # Check if we're in an event loop
203
+ loop = asyncio.get_running_loop()
204
+
205
+ # FastAPI/web servers typically run event loops indefinitely
206
+ # Check for common web server indicators in the call stack
207
+ import inspect
208
+ import threading
209
+
210
+ # Get current thread name - web servers often have descriptive thread names
211
+ thread_name = threading.current_thread().name
212
+ if any(name in thread_name.lower() for name in ["uvicorn", "fastapi", "starlette", "asgi", "wsgi"]):
213
+ return True
214
+
215
+ # Check the call stack for web framework indicators
216
+ frame = inspect.currentframe()
217
+ try:
218
+ while frame:
219
+ frame_info = inspect.getframeinfo(frame)
220
+ filename = frame_info.filename.lower()
221
+
222
+ # Look for web framework files in the call stack
223
+ if any(
224
+ indicator in filename
225
+ for indicator in ["uvicorn", "fastapi", "starlette", "asgi", "wsgi", "tornado", "aiohttp", "sanic", "quart"]
226
+ ):
227
+ return True
228
+
229
+ frame = frame.f_back
230
+ finally:
231
+ del frame
232
+
233
+ # Check if the event loop has been running for a while (web servers)
234
+ # vs just started (asyncio.run())
235
+ if hasattr(loop, "_ready") and hasattr(loop._ready, "__len__") and len(loop._ready) > 0: # type: ignore[arg-type]
236
+ # This is a heuristic - web servers tend to have more pending tasks
237
+ return True
238
+
239
+ return False
240
+
241
+ except RuntimeError:
242
+ # No running event loop - definitely not in a web server
243
+ return False
244
+ except Exception as e:
245
+ logger.debug(f"Error detecting web server context: {e}")
246
+ return False
247
+
248
+
249
+ def _needs_loop_manager(llm_config: LLMConfig) -> bool:
250
+ """Check if this model provider might have event loop issues with multiple asyncio.run() calls."""
251
+ return llm_config.provider_type in (ProviderType.GOOGLE, ProviderType.OLLAMA)
252
+
253
+
254
+ def should_use_loop_manager(llm_config: LLMConfig) -> bool:
255
+ """Determine if we should use the loop manager for models with event loop issues."""
256
+ if not _needs_loop_manager(llm_config):
257
+ return False
258
+
259
+ # Only use loop manager if NOT in a web server context
260
+ # Web servers have persistent event loops, so the issue doesn't occur
261
+ return not _is_in_web_server_context()
262
+
263
+
264
+ # Global instance
265
+ _loop_manager = LangChainLoopManager()
266
+
267
+
268
+ def get_langchain_loop_manager() -> LangChainLoopManager:
269
+ """Get the global langchain loop manager instance"""
270
+ return _loop_manager
@@ -0,0 +1,196 @@
1
+ import json
2
+ import logging
3
+ import time
4
+ import uuid
5
+ from typing import Dict, List, Optional, Type, TypeVar, cast
6
+
7
+ from langchain_core.runnables import RunnableConfig
8
+ from langchain_core.tools import BaseTool
9
+
10
+ # Using create_react_agent because create_agent from langchain.agents
11
+ # uses invoke() internally and does NOT emit on_chat_model_stream events,
12
+ # which breaks token-level streaming. See: https://github.com/langchain-ai/langchain/issues/34017
13
+ from langgraph.prebuilt import create_react_agent
14
+ from pydantic import BaseModel
15
+
16
+ from spaik_sdk.attachments.file_storage_provider import get_file_storage
17
+ from spaik_sdk.attachments.models import Attachment
18
+ from spaik_sdk.config.env import env_config
19
+ from spaik_sdk.llm.cancellation_handle import CancellationHandle
20
+ from spaik_sdk.llm.extract_error_message import extract_error_message
21
+ from spaik_sdk.llm.langchain_loop_manager import get_langchain_loop_manager, should_use_loop_manager
22
+ from spaik_sdk.llm.message_handler import MessageHandler
23
+ from spaik_sdk.models.llm_config import LLMConfig
24
+ from spaik_sdk.recording.base_playback import BasePlayback
25
+ from spaik_sdk.recording.base_recorder import BaseRecorder
26
+ from spaik_sdk.thread.models import MessageBlock, MessageBlockType, ThreadMessage
27
+ from spaik_sdk.thread.thread_container import ThreadContainer
28
+ from spaik_sdk.utils.init_logger import init_logger
29
+
30
+ DEBUG = env_config.is_debug_mode("langchain")
31
+ logger = init_logger(__name__)
32
+
33
+ # Suppress noisy HTTP request logs from anthropic and httpx
34
+ logging.getLogger("anthropic._base_client").setLevel(logging.WARNING)
35
+ logging.getLogger("httpx").setLevel(logging.WARNING)
36
+ logging.getLogger("httpcore").setLevel(logging.WARNING)
37
+
38
+ if DEBUG:
39
+ from langchain_core.globals import set_debug
40
+
41
+ set_debug(True)
42
+
43
+ config = RunnableConfig(recursion_limit=100)
44
+
45
+ T = TypeVar("T", bound=BaseModel)
46
+
47
+
48
+ class LangChainService:
49
+ def __init__(
50
+ self,
51
+ llm_config: LLMConfig,
52
+ thread_container: ThreadContainer,
53
+ assistant_name: str,
54
+ assistant_id: str,
55
+ recorder: Optional[BaseRecorder] = None,
56
+ playback: Optional[BasePlayback] = None,
57
+ cancellation_handle: Optional[CancellationHandle] = None,
58
+ ):
59
+ self.llm_config = llm_config
60
+
61
+ self.thread_container = thread_container
62
+ self.message_handler = MessageHandler(self.thread_container, assistant_name, assistant_id, recorder)
63
+ self.is_used = False
64
+ self.recorder = recorder
65
+ self.playback = playback
66
+ self.cancellation_handle = cancellation_handle
67
+
68
+ def create_executor(self, tools: list[BaseTool]):
69
+ return create_react_agent(self._get_model(), tools)
70
+
71
+ def _get_model(self):
72
+ return self.llm_config.get_model_wrapper().get_langchain_model()
73
+
74
+ def get_structured_response(self, input: str, output_schema: Type[T]) -> T:
75
+ # Handle playback mode
76
+ if self.playback is not None:
77
+ ret = output_schema.model_validate(next(self.playback))
78
+ self._on_request_completed()
79
+ return ret
80
+
81
+ self.thread_container.add_message(
82
+ ThreadMessage(
83
+ id=str(uuid.uuid4()),
84
+ ai=False,
85
+ author_id="structured_response ",
86
+ author_name="structured_response",
87
+ timestamp=int(time.time() * 1000),
88
+ blocks=[MessageBlock(id=str(uuid.uuid4()), streaming=False, type=MessageBlockType.PLAIN, content=input)],
89
+ )
90
+ )
91
+ model_with_tools = self._get_model().with_structured_output(output_schema)
92
+ ret = cast(T, model_with_tools.invoke(input))
93
+
94
+ # Record structured response if recorder is present
95
+ if self.recorder is not None:
96
+ self.recorder.record_structured(ret.model_dump())
97
+
98
+ as_json_block = "```json\n" + json.dumps(ret.model_dump()) + "\n```"
99
+ self.thread_container.add_message(
100
+ ThreadMessage(
101
+ id=str(uuid.uuid4()),
102
+ ai=True,
103
+ author_id=self.message_handler.assistant_id,
104
+ author_name=self.message_handler.assistant_name,
105
+ timestamp=int(time.time() * 1000),
106
+ blocks=[MessageBlock(id=str(uuid.uuid4()), streaming=False, type=MessageBlockType.PLAIN, content=as_json_block)],
107
+ )
108
+ )
109
+ self._on_request_completed()
110
+ return ret
111
+
112
+ async def execute_stream_tokens(
113
+ self,
114
+ user_input: Optional[str] = None,
115
+ tools: List[BaseTool] = [],
116
+ attachments: Optional[List[Attachment]] = None,
117
+ ):
118
+ """Execute agent and yield individual tokens as they arrive.
119
+
120
+ Gemini models have weird hickups regarding event loops and require a hack.
121
+
122
+ See documentation of LangChainLoopManager for more details.
123
+ """
124
+ if self.is_used:
125
+ raise ValueError("LangChainService is single use because of reasons")
126
+ self.is_used = True
127
+
128
+ try:
129
+ if should_use_loop_manager(self.llm_config):
130
+ logger.debug("Using loop manager for Google model in standalone context")
131
+ async for token_data in get_langchain_loop_manager().stream_in_loop(
132
+ self._execute_stream_tokens_direct(user_input, tools, attachments)
133
+ ):
134
+ yield token_data
135
+ else:
136
+ async for token_data in self._execute_stream_tokens_direct(user_input, tools, attachments):
137
+ yield token_data
138
+
139
+ except Exception as e:
140
+ yield {"type": "error", "error": self._handle_error(e)}
141
+ finally:
142
+ self._on_request_completed()
143
+
144
+ async def _execute_stream_tokens_direct(
145
+ self,
146
+ user_input: Optional[str] = None,
147
+ tools: List[BaseTool] = [],
148
+ attachments: Optional[List[Attachment]] = None,
149
+ ):
150
+ """Direct execution of stream tokens (core logic)"""
151
+ if self.playback is not None:
152
+ # Playback mode - yield recorded tokens
153
+ async for token_data in self.message_handler.process_agent_token_stream(self.playback):
154
+ # Check for cancellation even in playback mode
155
+ if self.cancellation_handle and await self.cancellation_handle.is_cancelled():
156
+ self.message_handler.handle_cancellation()
157
+ return
158
+ yield token_data
159
+ return
160
+
161
+ agent = self.create_executor(tools)
162
+ if user_input is not None:
163
+ self.message_handler.add_user_message(user_input, "user", "user", attachments)
164
+
165
+ # Get messages - use multimodal converter if file_storage is available
166
+ file_storage = get_file_storage()
167
+ if file_storage is not None:
168
+ provider_family = self.llm_config.model.family
169
+ messages = await self.thread_container.get_langchain_messages_multimodal(file_storage, provider_family)
170
+ else:
171
+ messages = self.thread_container.get_langchain_messages()
172
+
173
+ # Use astream_events to get individual token events
174
+ agent_stream = agent.astream_events({"messages": messages}, version="v2", config=config)
175
+
176
+ # Let MessageHandler handle the token stream processing
177
+ async for token_data in self.message_handler.process_agent_token_stream(agent_stream):
178
+ if self.cancellation_handle and await self.cancellation_handle.is_cancelled():
179
+ logger.info("Cancellation detected, stopping stream")
180
+ self.message_handler.handle_cancellation()
181
+ return
182
+ yield token_data
183
+
184
+ def _handle_error(self, error: Exception) -> Dict[str, str]:
185
+ """Handle and format errors consistently."""
186
+ error_message = extract_error_message(error)
187
+ logger.error(f"Error executing agent: {error_message}")
188
+
189
+ # Add error to thread container
190
+ self.message_handler.add_error(error_message, "system")
191
+
192
+ return {"error": error_message}
193
+
194
+ def _on_request_completed(self):
195
+ if self.recorder is not None:
196
+ self.recorder.request_completed()