webagents 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- webagents/__init__.py +18 -0
- webagents/__main__.py +55 -0
- webagents/agents/__init__.py +13 -0
- webagents/agents/core/__init__.py +19 -0
- webagents/agents/core/base_agent.py +1834 -0
- webagents/agents/core/handoffs.py +293 -0
- webagents/agents/handoffs/__init__.py +0 -0
- webagents/agents/interfaces/__init__.py +0 -0
- webagents/agents/lifecycle/__init__.py +0 -0
- webagents/agents/skills/__init__.py +109 -0
- webagents/agents/skills/base.py +136 -0
- webagents/agents/skills/core/__init__.py +8 -0
- webagents/agents/skills/core/guardrails/__init__.py +0 -0
- webagents/agents/skills/core/llm/__init__.py +0 -0
- webagents/agents/skills/core/llm/anthropic/__init__.py +1 -0
- webagents/agents/skills/core/llm/litellm/__init__.py +10 -0
- webagents/agents/skills/core/llm/litellm/skill.py +538 -0
- webagents/agents/skills/core/llm/openai/__init__.py +1 -0
- webagents/agents/skills/core/llm/xai/__init__.py +1 -0
- webagents/agents/skills/core/mcp/README.md +375 -0
- webagents/agents/skills/core/mcp/__init__.py +15 -0
- webagents/agents/skills/core/mcp/skill.py +731 -0
- webagents/agents/skills/core/memory/__init__.py +11 -0
- webagents/agents/skills/core/memory/long_term_memory/__init__.py +10 -0
- webagents/agents/skills/core/memory/long_term_memory/memory_skill.py +639 -0
- webagents/agents/skills/core/memory/short_term_memory/__init__.py +9 -0
- webagents/agents/skills/core/memory/short_term_memory/skill.py +341 -0
- webagents/agents/skills/core/memory/vector_memory/skill.py +447 -0
- webagents/agents/skills/core/planning/__init__.py +9 -0
- webagents/agents/skills/core/planning/planner.py +343 -0
- webagents/agents/skills/ecosystem/__init__.py +0 -0
- webagents/agents/skills/ecosystem/crewai/__init__.py +1 -0
- webagents/agents/skills/ecosystem/database/__init__.py +1 -0
- webagents/agents/skills/ecosystem/filesystem/__init__.py +0 -0
- webagents/agents/skills/ecosystem/google/__init__.py +0 -0
- webagents/agents/skills/ecosystem/google/calendar/__init__.py +6 -0
- webagents/agents/skills/ecosystem/google/calendar/skill.py +306 -0
- webagents/agents/skills/ecosystem/n8n/__init__.py +0 -0
- webagents/agents/skills/ecosystem/openai_agents/__init__.py +0 -0
- webagents/agents/skills/ecosystem/web/__init__.py +0 -0
- webagents/agents/skills/ecosystem/zapier/__init__.py +0 -0
- webagents/agents/skills/robutler/__init__.py +11 -0
- webagents/agents/skills/robutler/auth/README.md +63 -0
- webagents/agents/skills/robutler/auth/__init__.py +17 -0
- webagents/agents/skills/robutler/auth/skill.py +354 -0
- webagents/agents/skills/robutler/crm/__init__.py +18 -0
- webagents/agents/skills/robutler/crm/skill.py +368 -0
- webagents/agents/skills/robutler/discovery/README.md +281 -0
- webagents/agents/skills/robutler/discovery/__init__.py +16 -0
- webagents/agents/skills/robutler/discovery/skill.py +230 -0
- webagents/agents/skills/robutler/kv/__init__.py +6 -0
- webagents/agents/skills/robutler/kv/skill.py +80 -0
- webagents/agents/skills/robutler/message_history/__init__.py +9 -0
- webagents/agents/skills/robutler/message_history/skill.py +270 -0
- webagents/agents/skills/robutler/messages/__init__.py +0 -0
- webagents/agents/skills/robutler/nli/__init__.py +13 -0
- webagents/agents/skills/robutler/nli/skill.py +687 -0
- webagents/agents/skills/robutler/notifications/__init__.py +5 -0
- webagents/agents/skills/robutler/notifications/skill.py +141 -0
- webagents/agents/skills/robutler/payments/__init__.py +41 -0
- webagents/agents/skills/robutler/payments/exceptions.py +255 -0
- webagents/agents/skills/robutler/payments/skill.py +610 -0
- webagents/agents/skills/robutler/storage/__init__.py +10 -0
- webagents/agents/skills/robutler/storage/files/__init__.py +9 -0
- webagents/agents/skills/robutler/storage/files/skill.py +445 -0
- webagents/agents/skills/robutler/storage/json/__init__.py +9 -0
- webagents/agents/skills/robutler/storage/json/skill.py +336 -0
- webagents/agents/skills/robutler/storage/kv/skill.py +88 -0
- webagents/agents/skills/robutler/storage.py +389 -0
- webagents/agents/tools/__init__.py +0 -0
- webagents/agents/tools/decorators.py +426 -0
- webagents/agents/tracing/__init__.py +0 -0
- webagents/agents/workflows/__init__.py +0 -0
- webagents/scripts/__init__.py +0 -0
- webagents/server/__init__.py +28 -0
- webagents/server/context/__init__.py +0 -0
- webagents/server/context/context_vars.py +121 -0
- webagents/server/core/__init__.py +0 -0
- webagents/server/core/app.py +843 -0
- webagents/server/core/middleware.py +69 -0
- webagents/server/core/models.py +98 -0
- webagents/server/core/monitoring.py +59 -0
- webagents/server/endpoints/__init__.py +0 -0
- webagents/server/interfaces/__init__.py +0 -0
- webagents/server/middleware.py +330 -0
- webagents/server/models.py +92 -0
- webagents/server/monitoring.py +659 -0
- webagents/utils/__init__.py +0 -0
- webagents/utils/logging.py +359 -0
- webagents-0.1.0.dist-info/METADATA +230 -0
- webagents-0.1.0.dist-info/RECORD +94 -0
- webagents-0.1.0.dist-info/WHEEL +4 -0
- webagents-0.1.0.dist-info/entry_points.txt +2 -0
- webagents-0.1.0.dist-info/licenses/LICENSE +20 -0
@@ -0,0 +1,538 @@
|
|
1
|
+
"""
|
2
|
+
LiteLLM Skill - WebAgents V2.0
|
3
|
+
|
4
|
+
Cross-provider LLM routing using LiteLLM for unified access to:
|
5
|
+
- OpenAI (GPT-4, GPT-3.5, etc.)
|
6
|
+
- Anthropic (Claude-3.5, Claude-3, etc.)
|
7
|
+
- XAI/Grok (grok-beta, etc.)
|
8
|
+
- Google (Gemini, etc.)
|
9
|
+
- And many more providers
|
10
|
+
|
11
|
+
Features:
|
12
|
+
- Automatic provider routing based on model names
|
13
|
+
- Streaming and non-streaming support
|
14
|
+
- Tool calling with OpenAI compatibility
|
15
|
+
- Automatic fallbacks and error handling
|
16
|
+
- Cost tracking and usage monitoring
|
17
|
+
- Model parameter optimization
|
18
|
+
"""
|
19
|
+
|
20
|
+
import os
|
21
|
+
import json
|
22
|
+
import time
|
23
|
+
import asyncio
|
24
|
+
from typing import Dict, Any, List, Optional, AsyncGenerator, Union, TYPE_CHECKING
|
25
|
+
from dataclasses import dataclass
|
26
|
+
|
27
|
+
try:
|
28
|
+
import litellm
|
29
|
+
from litellm import acompletion
|
30
|
+
LITELLM_AVAILABLE = True
|
31
|
+
except Exception:
|
32
|
+
LITELLM_AVAILABLE = False
|
33
|
+
litellm = None
|
34
|
+
|
35
|
+
if TYPE_CHECKING:
|
36
|
+
from webagents.agents.core.base_agent import BaseAgent
|
37
|
+
|
38
|
+
from webagents.agents.skills.base import Skill
|
39
|
+
from webagents.agents.tools.decorators import tool, hook
|
40
|
+
from webagents.utils.logging import get_logger, log_skill_event, log_tool_execution, timer
|
41
|
+
|
42
|
+
|
43
|
+
@dataclass
|
44
|
+
class ModelConfig:
|
45
|
+
"""Configuration for a specific model"""
|
46
|
+
name: str
|
47
|
+
provider: str
|
48
|
+
max_tokens: int = 4096
|
49
|
+
supports_tools: bool = True
|
50
|
+
supports_streaming: bool = True
|
51
|
+
|
52
|
+
|
53
|
+
class LiteLLMSkill(Skill):
|
54
|
+
"""
|
55
|
+
Cross-provider LLM skill using LiteLLM for unified access
|
56
|
+
|
57
|
+
Supports multiple providers with automatic routing, fallbacks,
|
58
|
+
streaming, tool calling, and comprehensive error handling.
|
59
|
+
"""
|
60
|
+
|
61
|
+
# Default model configurations
|
62
|
+
DEFAULT_MODELS = {
|
63
|
+
# OpenAI
|
64
|
+
"gpt-4o": ModelConfig("gpt-4o", "openai", 4096, True, True),
|
65
|
+
"gpt-4o-mini": ModelConfig("gpt-4o-mini", "openai", 16384, True, True),
|
66
|
+
"gpt-4.1": ModelConfig("gpt-4.1", "openai", 4096, True, True),
|
67
|
+
"text-embedding-3-small": ModelConfig("text-embedding-3-small", "openai", 8192, False, False),
|
68
|
+
|
69
|
+
# Anthropic
|
70
|
+
"claude-3-5-sonnet": ModelConfig("claude-3-5-sonnet", "anthropic", 8192, True, True),
|
71
|
+
"claude-3-5-haiku": ModelConfig("claude-3-5-haiku", "anthropic", 4096, True, True),
|
72
|
+
"claude-3-opus": ModelConfig("claude-3-opus", "anthropic", 4096, True, True),
|
73
|
+
"claude-4-opus": ModelConfig("claude-4-opus", "anthropic", 8192, True, True),
|
74
|
+
|
75
|
+
# XAI/Grok
|
76
|
+
"xai/grok-4": ModelConfig("xai/grok-4", "xai", 8192, True, True),
|
77
|
+
"grok-4": ModelConfig("grok-4", "xai", 8192, True, True),
|
78
|
+
}
|
79
|
+
|
80
|
+
def __init__(self, config: Dict[str, Any] = None):
|
81
|
+
super().__init__(config, scope="all")
|
82
|
+
|
83
|
+
# Store full configuration
|
84
|
+
self.config = config or {}
|
85
|
+
|
86
|
+
# Configuration
|
87
|
+
self.model = config.get('model', 'gpt-4o-mini') if config else 'gpt-4o-mini'
|
88
|
+
self.temperature = config.get('temperature', 0.7) if config else 0.7
|
89
|
+
self.max_tokens = config.get('max_tokens') if config else None
|
90
|
+
self.fallback_models = config.get('fallback_models', []) if config else []
|
91
|
+
|
92
|
+
# API configuration
|
93
|
+
self.api_keys = self._load_api_keys(config)
|
94
|
+
self.model_configs = {**self.DEFAULT_MODELS}
|
95
|
+
if config and 'custom_models' in config:
|
96
|
+
self.model_configs.update(config['custom_models'])
|
97
|
+
|
98
|
+
# Runtime state
|
99
|
+
self.current_model = self.model
|
100
|
+
self.error_counts = {}
|
101
|
+
|
102
|
+
# Validate LiteLLM availability
|
103
|
+
if not LITELLM_AVAILABLE:
|
104
|
+
raise ImportError("LiteLLM not available. Install with: pip install litellm")
|
105
|
+
|
106
|
+
def _load_api_keys(self, config: Dict[str, Any] = None) -> Dict[str, str]:
|
107
|
+
"""Load API keys from config and environment - CONFIG HAS PRIORITY"""
|
108
|
+
keys = {}
|
109
|
+
|
110
|
+
# Load from environment variables first
|
111
|
+
env_keys = {
|
112
|
+
'openai': 'OPENAI_API_KEY',
|
113
|
+
'anthropic': 'ANTHROPIC_API_KEY',
|
114
|
+
'xai': 'XAI_API_KEY',
|
115
|
+
'google': 'GOOGLE_API_KEY',
|
116
|
+
}
|
117
|
+
|
118
|
+
for provider, env_var in env_keys.items():
|
119
|
+
if env_var in os.environ:
|
120
|
+
keys[provider] = os.environ[env_var]
|
121
|
+
|
122
|
+
# Override with config keys (config has priority)
|
123
|
+
if config and 'api_keys' in config:
|
124
|
+
keys.update(config['api_keys'])
|
125
|
+
|
126
|
+
return keys
|
127
|
+
|
128
|
+
async def initialize(self, agent: 'BaseAgent') -> None:
|
129
|
+
"""Initialize LiteLLM skill"""
|
130
|
+
from webagents.utils.logging import get_logger, log_skill_event
|
131
|
+
|
132
|
+
self.agent = agent
|
133
|
+
self.logger = get_logger('skill.llm.litellm', agent.name)
|
134
|
+
|
135
|
+
# Configure LiteLLM
|
136
|
+
if litellm:
|
137
|
+
# Note: API keys are now passed directly to completion calls rather than set globally
|
138
|
+
|
139
|
+
# Configure base URL if provided (for proxy usage)
|
140
|
+
if self.config and 'base_url' in self.config:
|
141
|
+
litellm.api_base = self.config['base_url']
|
142
|
+
os.environ['OPENAI_API_BASE'] = self.config['base_url']
|
143
|
+
self.logger.info(f"LiteLLM configured with base URL: {self.config['base_url']}")
|
144
|
+
|
145
|
+
# Configure LiteLLM settings
|
146
|
+
litellm.set_verbose = False # We handle logging ourselves
|
147
|
+
litellm.drop_params = True # Drop unsupported parameters
|
148
|
+
|
149
|
+
log_skill_event(agent.name, 'litellm', 'initialized', {
|
150
|
+
'model': self.model,
|
151
|
+
'temperature': self.temperature,
|
152
|
+
'available_providers': list(self.api_keys.keys()),
|
153
|
+
'fallback_models': self.fallback_models,
|
154
|
+
'total_models': len(self.model_configs)
|
155
|
+
})
|
156
|
+
|
157
|
+
|
158
|
+
|
159
|
+
# Core LLM functionality
|
160
|
+
|
161
|
+
async def chat_completion(self, messages: List[Dict[str, Any]],
|
162
|
+
model: Optional[str] = None,
|
163
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
164
|
+
stream: bool = False,
|
165
|
+
**kwargs: Any) -> Dict[str, Any]:
|
166
|
+
"""
|
167
|
+
Create a chat completion using LiteLLM
|
168
|
+
|
169
|
+
Args:
|
170
|
+
messages: OpenAI-format messages
|
171
|
+
model: Override model (defaults to skill's current model)
|
172
|
+
tools: OpenAI-format tool definitions
|
173
|
+
stream: Whether to stream (handled by chat_completion_stream)
|
174
|
+
**kwargs: Additional LLM parameters
|
175
|
+
"""
|
176
|
+
|
177
|
+
if stream:
|
178
|
+
raise ValueError("Use chat_completion_stream() for streaming responses")
|
179
|
+
|
180
|
+
target_model = model or self.current_model
|
181
|
+
|
182
|
+
with timer(f"chat_completion_{target_model}", self.agent.name):
|
183
|
+
try:
|
184
|
+
response = await self._execute_completion(
|
185
|
+
messages=messages,
|
186
|
+
model=target_model,
|
187
|
+
tools=tools,
|
188
|
+
stream=False,
|
189
|
+
**kwargs
|
190
|
+
)
|
191
|
+
# Log token usage to context.usage if available
|
192
|
+
try:
|
193
|
+
usage_obj = None
|
194
|
+
if hasattr(response, 'usage'):
|
195
|
+
usage_obj = getattr(response, 'usage')
|
196
|
+
elif isinstance(response, dict):
|
197
|
+
usage_obj = response.get('usage')
|
198
|
+
if usage_obj:
|
199
|
+
prompt_tokens = int(getattr(usage_obj, 'prompt_tokens', None) or usage_obj.get('prompt_tokens') or 0)
|
200
|
+
completion_tokens = int(getattr(usage_obj, 'completion_tokens', None) or usage_obj.get('completion_tokens') or 0)
|
201
|
+
total_tokens = int(getattr(usage_obj, 'total_tokens', None) or usage_obj.get('total_tokens') or (prompt_tokens + completion_tokens))
|
202
|
+
self._append_usage_record(model=target_model, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, total_tokens=total_tokens, streaming=False)
|
203
|
+
except Exception:
|
204
|
+
# Never fail the call on logging issues
|
205
|
+
pass
|
206
|
+
|
207
|
+
return response
|
208
|
+
|
209
|
+
except Exception as e:
|
210
|
+
self.logger.error(f"Chat completion failed for {target_model}: {e}")
|
211
|
+
|
212
|
+
# Try fallback models
|
213
|
+
if self.fallback_models:
|
214
|
+
for fallback_model in self.fallback_models:
|
215
|
+
try:
|
216
|
+
self.logger.info(f"Trying fallback model: {fallback_model}")
|
217
|
+
response = await self._execute_completion(
|
218
|
+
messages=messages,
|
219
|
+
model=fallback_model,
|
220
|
+
tools=tools,
|
221
|
+
stream=False,
|
222
|
+
**kwargs
|
223
|
+
)
|
224
|
+
|
225
|
+
return response
|
226
|
+
|
227
|
+
except Exception as fallback_error:
|
228
|
+
self.logger.warning(f"Fallback {fallback_model} also failed: {fallback_error}")
|
229
|
+
continue
|
230
|
+
|
231
|
+
# All models failed
|
232
|
+
self._track_error(target_model)
|
233
|
+
raise e
|
234
|
+
|
235
|
+
async def chat_completion_stream(self, messages: List[Dict[str, Any]],
|
236
|
+
model: Optional[str] = None,
|
237
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
238
|
+
**kwargs) -> AsyncGenerator[Dict[str, Any], None]:
|
239
|
+
"""
|
240
|
+
Create a streaming chat completion using LiteLLM
|
241
|
+
"""
|
242
|
+
|
243
|
+
target_model = model or self.current_model
|
244
|
+
|
245
|
+
try:
|
246
|
+
async for chunk in self._execute_completion_stream(
|
247
|
+
messages=messages,
|
248
|
+
model=target_model,
|
249
|
+
tools=tools,
|
250
|
+
**kwargs
|
251
|
+
):
|
252
|
+
yield chunk
|
253
|
+
|
254
|
+
# Usage logging handled via final usage chunk during streaming
|
255
|
+
|
256
|
+
except Exception as e:
|
257
|
+
self.logger.error(f"Streaming completion failed for {target_model}: {e}")
|
258
|
+
|
259
|
+
# Try fallback models
|
260
|
+
if self.fallback_models:
|
261
|
+
for fallback_model in self.fallback_models:
|
262
|
+
try:
|
263
|
+
self.logger.info(f"Trying fallback streaming with: {fallback_model}")
|
264
|
+
async for chunk in self._execute_completion_stream(
|
265
|
+
messages=messages,
|
266
|
+
model=fallback_model,
|
267
|
+
tools=tools,
|
268
|
+
**kwargs
|
269
|
+
):
|
270
|
+
yield chunk
|
271
|
+
|
272
|
+
self._track_usage(fallback_model)
|
273
|
+
return
|
274
|
+
|
275
|
+
except Exception as fallback_error:
|
276
|
+
self.logger.warning(f"Fallback streaming {fallback_model} failed: {fallback_error}")
|
277
|
+
continue
|
278
|
+
|
279
|
+
# All models failed
|
280
|
+
self._track_error(target_model)
|
281
|
+
raise e
|
282
|
+
|
283
|
+
# Private helper methods
|
284
|
+
|
285
|
+
def _get_api_key_for_model(self, model: str) -> Optional[str]:
|
286
|
+
"""Get the appropriate API key based on the model provider"""
|
287
|
+
# Determine provider from model name
|
288
|
+
if model.startswith('azure/'):
|
289
|
+
return self.api_keys.get('azure')
|
290
|
+
elif model.startswith('openai/') or model in ['gpt-4', 'gpt-3.5-turbo', 'gpt-4o', 'gpt-4o-mini', 'gpt-4.1', 'text-embedding-3-small']:
|
291
|
+
return self.api_keys.get('openai')
|
292
|
+
elif model.startswith('anthropic/') or model.startswith('claude'):
|
293
|
+
return self.api_keys.get('anthropic')
|
294
|
+
elif model.startswith('xai/') or model.startswith('grok') or model == 'grok-4':
|
295
|
+
return self.api_keys.get('xai')
|
296
|
+
elif model.startswith('google/') or model.startswith('gemini'):
|
297
|
+
return self.api_keys.get('google')
|
298
|
+
else:
|
299
|
+
# Try to find a matching provider from model configs
|
300
|
+
model_config = self.model_configs.get(model)
|
301
|
+
if model_config:
|
302
|
+
return self.api_keys.get(model_config.provider)
|
303
|
+
# Fallback to default
|
304
|
+
return self.api_keys.get('openai')
|
305
|
+
|
306
|
+
async def _execute_completion(self, messages: List[Dict[str, Any]],
|
307
|
+
model: str,
|
308
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
309
|
+
stream: bool = False,
|
310
|
+
**kwargs) -> Dict[str, Any]:
|
311
|
+
"""Execute a single completion request"""
|
312
|
+
|
313
|
+
# Prepare parameters
|
314
|
+
params = {
|
315
|
+
"model": model,
|
316
|
+
"messages": messages,
|
317
|
+
"temperature": kwargs.get('temperature', self.temperature),
|
318
|
+
"stream": stream,
|
319
|
+
# Ensure usage is available when streaming is requested later
|
320
|
+
"stream_options": {"include_usage": True} if stream else None,
|
321
|
+
}
|
322
|
+
|
323
|
+
# Add base URL if configured (for proxy support)
|
324
|
+
if hasattr(self, 'config') and self.config and 'base_url' in self.config:
|
325
|
+
params["api_base"] = self.config['base_url']
|
326
|
+
|
327
|
+
# Add max_tokens if specified
|
328
|
+
if self.max_tokens or 'max_tokens' in kwargs:
|
329
|
+
params["max_tokens"] = kwargs.get('max_tokens', self.max_tokens)
|
330
|
+
|
331
|
+
# Add tools if provided - most modern models support tools
|
332
|
+
# Only skip tools for models explicitly marked as non-supporting
|
333
|
+
model_config = self.model_configs.get(model)
|
334
|
+
skip_tools = model_config and not model_config.supports_tools
|
335
|
+
|
336
|
+
if tools is not None and tools and not skip_tools:
|
337
|
+
params["tools"] = tools
|
338
|
+
|
339
|
+
# Add other parameters
|
340
|
+
for param in ['top_p', 'frequency_penalty', 'presence_penalty', 'stop']:
|
341
|
+
if param in kwargs:
|
342
|
+
params[param] = kwargs[param]
|
343
|
+
|
344
|
+
# Add API key based on model provider
|
345
|
+
api_key = self._get_api_key_for_model(model)
|
346
|
+
if api_key:
|
347
|
+
params["api_key"] = api_key
|
348
|
+
|
349
|
+
self.logger.debug(f"Executing completion with model {model}")
|
350
|
+
self.logger.debug(f"Parameters: {params}")
|
351
|
+
|
352
|
+
# Validate parameters before calling LiteLLM
|
353
|
+
if not messages or not isinstance(messages, list):
|
354
|
+
raise ValueError(f"Messages must be a non-empty list, got: {type(messages)}")
|
355
|
+
|
356
|
+
for i, msg in enumerate(messages):
|
357
|
+
if not isinstance(msg, dict):
|
358
|
+
raise ValueError(f"Message {i} must be a dict, got: {type(msg)}")
|
359
|
+
if 'role' not in msg:
|
360
|
+
raise ValueError(f"Message {i} missing required 'role' field")
|
361
|
+
|
362
|
+
try:
|
363
|
+
# Execute completion
|
364
|
+
response = await acompletion(**params)
|
365
|
+
|
366
|
+
# Convert LiteLLM response to our format
|
367
|
+
return self._normalize_response(response, model)
|
368
|
+
except Exception as e:
|
369
|
+
self.logger.error(f"LiteLLM completion failed with params: {params}")
|
370
|
+
self.logger.error(f"Error details: {type(e).__name__}: {str(e)}")
|
371
|
+
raise
|
372
|
+
|
373
|
+
async def _execute_completion_stream(self, messages: List[Dict[str, Any]],
|
374
|
+
model: str,
|
375
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
376
|
+
**kwargs) -> AsyncGenerator[Dict[str, Any], None]:
|
377
|
+
"""Execute a streaming completion request"""
|
378
|
+
|
379
|
+
# Prepare parameters (same as non-streaming)
|
380
|
+
params = {
|
381
|
+
"model": model,
|
382
|
+
"messages": messages,
|
383
|
+
"temperature": kwargs.get('temperature', self.temperature),
|
384
|
+
"stream": True,
|
385
|
+
# Include a final usage chunk before [DONE] per LiteLLM docs
|
386
|
+
"stream_options": {"include_usage": True},
|
387
|
+
}
|
388
|
+
|
389
|
+
# Add base URL if configured (for proxy support)
|
390
|
+
if self.config and 'base_url' in self.config:
|
391
|
+
params["api_base"] = self.config['base_url']
|
392
|
+
|
393
|
+
if self.max_tokens or 'max_tokens' in kwargs:
|
394
|
+
params["max_tokens"] = kwargs.get('max_tokens', self.max_tokens)
|
395
|
+
|
396
|
+
# Always pass tools if provided - most modern models support tools
|
397
|
+
# Only skip tools for models explicitly marked as non-supporting
|
398
|
+
model_config = self.model_configs.get(model)
|
399
|
+
skip_tools = model_config and not model_config.supports_tools
|
400
|
+
|
401
|
+
if tools is not None and tools and not skip_tools:
|
402
|
+
params["tools"] = tools
|
403
|
+
|
404
|
+
for param in ['top_p', 'frequency_penalty', 'presence_penalty', 'stop']:
|
405
|
+
if param in kwargs:
|
406
|
+
params[param] = kwargs[param]
|
407
|
+
|
408
|
+
# Add API key based on model provider
|
409
|
+
api_key = self._get_api_key_for_model(model)
|
410
|
+
if api_key:
|
411
|
+
params["api_key"] = api_key
|
412
|
+
|
413
|
+
self.logger.debug(f"Executing streaming completion with model {model}")
|
414
|
+
|
415
|
+
# Execute streaming completion
|
416
|
+
stream = await acompletion(**params)
|
417
|
+
|
418
|
+
async for chunk in stream:
|
419
|
+
# Normalize and yield chunk
|
420
|
+
normalized_chunk = self._normalize_streaming_chunk(chunk, model)
|
421
|
+
|
422
|
+
# If LiteLLM sent a final usage chunk, log tokens to context.usage
|
423
|
+
try:
|
424
|
+
usage = normalized_chunk.get('usage') if isinstance(normalized_chunk, dict) else None
|
425
|
+
is_final_usage_chunk = (
|
426
|
+
usage
|
427
|
+
and isinstance(usage, dict)
|
428
|
+
and (not normalized_chunk.get('choices'))
|
429
|
+
)
|
430
|
+
if is_final_usage_chunk:
|
431
|
+
prompt_tokens = int(usage.get('prompt_tokens') or 0)
|
432
|
+
completion_tokens = int(usage.get('completion_tokens') or 0)
|
433
|
+
total_tokens = int(usage.get('total_tokens') or (prompt_tokens + completion_tokens))
|
434
|
+
self._append_usage_record(model=model, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, total_tokens=total_tokens, streaming=True)
|
435
|
+
except Exception:
|
436
|
+
# Never break streaming on usage logging
|
437
|
+
pass
|
438
|
+
yield normalized_chunk
|
439
|
+
|
440
|
+
def _normalize_response(self, response: Any, model: str) -> Dict[str, Any]:
|
441
|
+
"""Normalize LiteLLM response to OpenAI format"""
|
442
|
+
|
443
|
+
# LiteLLM already returns OpenAI-compatible format
|
444
|
+
# Just ensure model name is correct
|
445
|
+
if hasattr(response, 'model'):
|
446
|
+
response.model = model
|
447
|
+
elif isinstance(response, dict) and 'model' in response:
|
448
|
+
response['model'] = model
|
449
|
+
|
450
|
+
return response
|
451
|
+
|
452
|
+
def _normalize_streaming_chunk(self, chunk: Any, model: str) -> Dict[str, Any]:
|
453
|
+
"""Normalize LiteLLM streaming chunk to OpenAI format"""
|
454
|
+
|
455
|
+
# Convert chunk to dictionary if it's not already
|
456
|
+
if hasattr(chunk, 'model_dump'):
|
457
|
+
# Pydantic v2
|
458
|
+
chunk_dict = chunk.model_dump()
|
459
|
+
elif hasattr(chunk, 'dict'):
|
460
|
+
# Pydantic v1
|
461
|
+
chunk_dict = chunk.dict()
|
462
|
+
elif hasattr(chunk, '__dict__'):
|
463
|
+
# Generic object with attributes
|
464
|
+
chunk_dict = vars(chunk)
|
465
|
+
elif isinstance(chunk, dict):
|
466
|
+
# Already a dictionary
|
467
|
+
chunk_dict = chunk.copy()
|
468
|
+
else:
|
469
|
+
# Try to convert to dict
|
470
|
+
try:
|
471
|
+
chunk_dict = dict(chunk)
|
472
|
+
except:
|
473
|
+
# Fallback - return as-is and hope for the best
|
474
|
+
return chunk
|
475
|
+
|
476
|
+
# Ensure model name is correct
|
477
|
+
chunk_dict['model'] = model
|
478
|
+
|
479
|
+
return chunk_dict
|
480
|
+
|
481
|
+
def _append_usage_record(
|
482
|
+
self,
|
483
|
+
model: str,
|
484
|
+
prompt_tokens: int,
|
485
|
+
completion_tokens: int,
|
486
|
+
total_tokens: int,
|
487
|
+
streaming: bool,
|
488
|
+
) -> None:
|
489
|
+
"""Append a normalized usage record to context.usage"""
|
490
|
+
try:
|
491
|
+
from webagents.server.context.context_vars import get_context
|
492
|
+
context = get_context()
|
493
|
+
if not context or not hasattr(context, 'usage'):
|
494
|
+
return
|
495
|
+
context.usage.append({
|
496
|
+
'type': 'llm',
|
497
|
+
'skill': 'litellm',
|
498
|
+
'model': model,
|
499
|
+
'prompt_tokens': int(prompt_tokens or 0),
|
500
|
+
'completion_tokens': int(completion_tokens or 0),
|
501
|
+
'total_tokens': int(total_tokens or 0),
|
502
|
+
'streaming': bool(streaming),
|
503
|
+
'timestamp': time.time(),
|
504
|
+
})
|
505
|
+
except Exception:
|
506
|
+
# Do not raise from logging
|
507
|
+
return
|
508
|
+
|
509
|
+
def _track_error(self, model: str):
|
510
|
+
"""Track model error statistics"""
|
511
|
+
if model not in self.error_counts:
|
512
|
+
self.error_counts[model] = 0
|
513
|
+
self.error_counts[model] += 1
|
514
|
+
|
515
|
+
self.logger.warning(f"Model error tracked: {model} ({self.error_counts[model]} total errors)")
|
516
|
+
|
517
|
+
# Compatibility methods for BaseAgent integration
|
518
|
+
|
519
|
+
def get_dependencies(self) -> List[str]:
|
520
|
+
"""Get skill dependencies"""
|
521
|
+
return [] # LiteLLM skill is self-contained
|
522
|
+
|
523
|
+
async def query_litellm(self, prompt: str, model: Optional[str] = None, **kwargs: Any) -> str:
|
524
|
+
"""Simple query interface for compatibility"""
|
525
|
+
|
526
|
+
messages = [{"role": "user", "content": prompt}]
|
527
|
+
response = await self.chat_completion(messages, model=model, **kwargs)
|
528
|
+
|
529
|
+
if isinstance(response, dict) and 'choices' in response:
|
530
|
+
return response['choices'][0]['message']['content']
|
531
|
+
|
532
|
+
return str(response)
|
533
|
+
|
534
|
+
async def generate_embedding(self, text: str, model: Optional[str] = None) -> List[float]:
|
535
|
+
"""Generate embeddings (placeholder for V2.1)"""
|
536
|
+
# This would use LiteLLM's embedding support in V2.1
|
537
|
+
self.logger.info("Embedding generation requested - will be implemented in V2.1")
|
538
|
+
return [0.0] * 1536 # Placeholder embedding
|
@@ -0,0 +1 @@
|
|
1
|
+
# TODO: Implement dedicated OpenAI skill, currently using litellm
|
@@ -0,0 +1 @@
|
|
1
|
+
# TODO: Implement dedicated XAI skill, currently using litellm
|