webagents 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- webagents/__init__.py +9 -0
- webagents/agents/core/base_agent.py +865 -69
- webagents/agents/core/handoffs.py +14 -6
- webagents/agents/skills/base.py +33 -2
- webagents/agents/skills/core/llm/litellm/skill.py +906 -27
- webagents/agents/skills/core/memory/vector_memory/skill.py +8 -16
- webagents/agents/skills/ecosystem/openai/__init__.py +6 -0
- webagents/agents/skills/ecosystem/openai/skill.py +867 -0
- webagents/agents/skills/ecosystem/replicate/README.md +440 -0
- webagents/agents/skills/ecosystem/replicate/__init__.py +10 -0
- webagents/agents/skills/ecosystem/replicate/skill.py +517 -0
- webagents/agents/skills/examples/__init__.py +6 -0
- webagents/agents/skills/examples/music_player.py +329 -0
- webagents/agents/skills/robutler/handoff/__init__.py +6 -0
- webagents/agents/skills/robutler/handoff/skill.py +191 -0
- webagents/agents/skills/robutler/nli/skill.py +180 -24
- webagents/agents/skills/robutler/payments/exceptions.py +27 -7
- webagents/agents/skills/robutler/payments/skill.py +64 -14
- webagents/agents/skills/robutler/storage/files/skill.py +2 -2
- webagents/agents/tools/decorators.py +243 -47
- webagents/agents/widgets/__init__.py +6 -0
- webagents/agents/widgets/renderer.py +150 -0
- webagents/server/core/app.py +130 -15
- webagents/server/core/models.py +1 -1
- webagents/utils/logging.py +13 -1
- {webagents-0.2.2.dist-info → webagents-0.2.3.dist-info}/METADATA +8 -25
- {webagents-0.2.2.dist-info → webagents-0.2.3.dist-info}/RECORD +30 -20
- webagents/agents/skills/ecosystem/openai_agents/__init__.py +0 -0
- {webagents-0.2.2.dist-info → webagents-0.2.3.dist-info}/WHEEL +0 -0
- {webagents-0.2.2.dist-info → webagents-0.2.3.dist-info}/entry_points.txt +0 -0
- {webagents-0.2.2.dist-info → webagents-0.2.3.dist-info}/licenses/LICENSE +0 -0
@@ -21,16 +21,42 @@ import os
|
|
21
21
|
import json
|
22
22
|
import time
|
23
23
|
import asyncio
|
24
|
+
import base64
|
25
|
+
import uuid
|
26
|
+
import hashlib
|
27
|
+
import tempfile
|
24
28
|
from typing import Dict, Any, List, Optional, AsyncGenerator, Union, TYPE_CHECKING
|
29
|
+
import re
|
25
30
|
from dataclasses import dataclass
|
26
31
|
|
32
|
+
try:
|
33
|
+
import httpx
|
34
|
+
HTTPX_AVAILABLE = True
|
35
|
+
except ImportError:
|
36
|
+
HTTPX_AVAILABLE = False
|
37
|
+
httpx = None
|
38
|
+
|
39
|
+
try:
|
40
|
+
from webagents.agents.skills.robutler.payments import pricing
|
41
|
+
PRICING_AVAILABLE = True
|
42
|
+
except ImportError:
|
43
|
+
# Fallback: create a no-op decorator if pricing is not available
|
44
|
+
def pricing(**kwargs):
|
45
|
+
def decorator(func):
|
46
|
+
return func
|
47
|
+
return decorator
|
48
|
+
PRICING_AVAILABLE = False
|
49
|
+
|
50
|
+
|
27
51
|
try:
|
28
52
|
import litellm
|
29
|
-
from litellm import acompletion
|
53
|
+
from litellm import acompletion, token_counter, register_model
|
30
54
|
LITELLM_AVAILABLE = True
|
31
55
|
except Exception:
|
32
56
|
LITELLM_AVAILABLE = False
|
33
57
|
litellm = None
|
58
|
+
token_counter = None
|
59
|
+
register_model = None
|
34
60
|
|
35
61
|
if TYPE_CHECKING:
|
36
62
|
from webagents.agents.core.base_agent import BaseAgent
|
@@ -72,6 +98,18 @@ class LiteLLMSkill(Skill):
|
|
72
98
|
"claude-3-opus": ModelConfig("claude-3-opus", "anthropic", 4096, True, True),
|
73
99
|
"claude-4-opus": ModelConfig("claude-4-opus", "anthropic", 8192, True, True),
|
74
100
|
|
101
|
+
# Google Vertex AI (Gemini)
|
102
|
+
"vertex_ai/gemini-2.5-pro": ModelConfig("vertex_ai/gemini-2.5-pro", "google", 8192, True, True),
|
103
|
+
"vertex_ai/gemini-2.5-flash": ModelConfig("vertex_ai/gemini-2.5-flash", "google", 8192, True, True),
|
104
|
+
"vertex_ai/gemini-2.5-flash-image": ModelConfig("vertex_ai/gemini-2.5-flash-image", "google", 8192, True, True),
|
105
|
+
"gemini-2.5-pro": ModelConfig("gemini-2.5-pro", "google", 8192, True, True),
|
106
|
+
"gemini-2.5-flash": ModelConfig("gemini-2.5-flash", "google", 8192, True, True),
|
107
|
+
"gemini-2.5-flash-image": ModelConfig("gemini-2.5-flash-image", "google", 8192, True, True),
|
108
|
+
"gemini-pro": ModelConfig("gemini-pro", "google", 8192, True, True),
|
109
|
+
"gemini-flash": ModelConfig("gemini-flash", "google", 8192, True, True),
|
110
|
+
"gemini-image-preview": ModelConfig("gemini-image-preview", "google", 8192, True, True),
|
111
|
+
"gemini-flash-image": ModelConfig("gemini-flash-image", "google", 8192, True, True),
|
112
|
+
|
75
113
|
# XAI/Grok
|
76
114
|
"xai/grok-4": ModelConfig("xai/grok-4", "xai", 8192, True, True),
|
77
115
|
"grok-4": ModelConfig("grok-4", "xai", 8192, True, True),
|
@@ -88,6 +126,8 @@ class LiteLLMSkill(Skill):
|
|
88
126
|
self.temperature = config.get('temperature', 0.7) if config else 0.7
|
89
127
|
self.max_tokens = config.get('max_tokens') if config else None
|
90
128
|
self.fallback_models = config.get('fallback_models', []) if config else []
|
129
|
+
self.custom_llm_provider = config.get('custom_llm_provider') if config else None
|
130
|
+
self.disable_streaming = bool(config.get('disable_streaming')) if config else False
|
91
131
|
|
92
132
|
# API configuration
|
93
133
|
self.api_keys = self._load_api_keys(config)
|
@@ -102,6 +142,77 @@ class LiteLLMSkill(Skill):
|
|
102
142
|
# Validate LiteLLM availability
|
103
143
|
if not LITELLM_AVAILABLE:
|
104
144
|
raise ImportError("LiteLLM not available. Install with: pip install litellm")
|
145
|
+
|
146
|
+
# Register Gemini 2.5 experimental models pricing
|
147
|
+
# Official pricing from https://ai.google.dev/gemini-api/docs/models
|
148
|
+
# IMPORTANT: Register both with and without vertex_ai/ prefix for compatibility
|
149
|
+
if LITELLM_AVAILABLE and register_model:
|
150
|
+
try:
|
151
|
+
gemini_models = {
|
152
|
+
# Gemini Flash - base model (alias for gemini-2.5-flash-thinking)
|
153
|
+
"gemini-flash": {
|
154
|
+
"max_tokens": 65535,
|
155
|
+
"max_input_tokens": 1048576,
|
156
|
+
"max_output_tokens": 65535,
|
157
|
+
"input_cost_per_token": 0.0000003, # $0.30 per 1M tokens
|
158
|
+
"output_cost_per_token": 0.0000025, # $2.50 per 1M tokens
|
159
|
+
"cache_read_input_token_cost": 0.000000075, # $0.075 per 1M cached tokens
|
160
|
+
"litellm_provider": "vertex_ai",
|
161
|
+
"mode": "chat",
|
162
|
+
"supports_function_calling": True,
|
163
|
+
"supports_vision": True
|
164
|
+
},
|
165
|
+
# Gemini 2.5 Flash Thinking - standard reasoning model
|
166
|
+
"gemini-2.5-flash-thinking": {
|
167
|
+
"max_tokens": 65535,
|
168
|
+
"max_input_tokens": 1048576,
|
169
|
+
"max_output_tokens": 65535,
|
170
|
+
"input_cost_per_token": 0.0000003, # $0.30 per 1M tokens
|
171
|
+
"output_cost_per_token": 0.0000025, # $2.50 per 1M tokens
|
172
|
+
"cache_read_input_token_cost": 0.000000075, # $0.075 per 1M cached tokens
|
173
|
+
"litellm_provider": "vertex_ai",
|
174
|
+
"mode": "chat",
|
175
|
+
"supports_function_calling": True,
|
176
|
+
"supports_vision": True
|
177
|
+
},
|
178
|
+
# Gemini 2.5 Flash Image Preview - experimental image model (more expensive output)
|
179
|
+
"gemini-2.5-flash-image": {
|
180
|
+
"max_tokens": 65535,
|
181
|
+
"max_input_tokens": 1048576,
|
182
|
+
"max_output_tokens": 65535,
|
183
|
+
"input_cost_per_token": 0.0000003, # $0.30 per 1M tokens
|
184
|
+
"output_cost_per_token": 0.00003, # $30 per 1M tokens (image model premium)
|
185
|
+
"cache_read_input_token_cost": 0.000000075, # $0.075 per 1M cached tokens
|
186
|
+
"litellm_provider": "vertex_ai",
|
187
|
+
"mode": "chat",
|
188
|
+
"supports_function_calling": True,
|
189
|
+
"supports_vision": True
|
190
|
+
},
|
191
|
+
# Alias for gemini-flash-image (same pricing as standard flash)
|
192
|
+
"gemini-flash-image": {
|
193
|
+
"max_tokens": 65535,
|
194
|
+
"max_input_tokens": 1048576,
|
195
|
+
"max_output_tokens": 65535,
|
196
|
+
"input_cost_per_token": 0.0000003, # $0.30 per 1M tokens
|
197
|
+
"output_cost_per_token": 0.0000025, # $2.50 per 1M tokens
|
198
|
+
"cache_read_input_token_cost": 0.000000075, # $0.075 per 1M cached tokens
|
199
|
+
"litellm_provider": "vertex_ai",
|
200
|
+
"mode": "chat",
|
201
|
+
"supports_function_calling": True,
|
202
|
+
"supports_vision": True
|
203
|
+
}
|
204
|
+
}
|
205
|
+
|
206
|
+
# Register models with and without vertex_ai/ prefix
|
207
|
+
models_to_register = {}
|
208
|
+
for model_name, config in gemini_models.items():
|
209
|
+
models_to_register[model_name] = config
|
210
|
+
models_to_register[f"vertex_ai/{model_name}"] = config.copy()
|
211
|
+
|
212
|
+
register_model(models_to_register)
|
213
|
+
except Exception:
|
214
|
+
# Silent fail - not critical
|
215
|
+
pass
|
105
216
|
|
106
217
|
def _load_api_keys(self, config: Dict[str, Any] = None) -> Dict[str, str]:
|
107
218
|
"""Load API keys from config and environment - CONFIG HAS PRIORITY"""
|
@@ -126,8 +237,9 @@ class LiteLLMSkill(Skill):
|
|
126
237
|
return keys
|
127
238
|
|
128
239
|
async def initialize(self, agent: 'BaseAgent') -> None:
|
129
|
-
"""Initialize LiteLLM skill"""
|
240
|
+
"""Initialize LiteLLM skill and register as handoff"""
|
130
241
|
from webagents.utils.logging import get_logger, log_skill_event
|
242
|
+
from webagents.agents.skills.base import Handoff
|
131
243
|
|
132
244
|
self.agent = agent
|
133
245
|
self.logger = get_logger('skill.llm.litellm', agent.name)
|
@@ -146,6 +258,27 @@ class LiteLLMSkill(Skill):
|
|
146
258
|
litellm.set_verbose = False # We handle logging ourselves
|
147
259
|
litellm.drop_params = True # Drop unsupported parameters
|
148
260
|
|
261
|
+
# Register as handoff (completion handler)
|
262
|
+
# Priority=10 (high priority - likely to be the default for local LLMs)
|
263
|
+
# NOTE: We register the STREAMING function so it works in both modes:
|
264
|
+
# - Streaming: Returns generator directly
|
265
|
+
# - Non-streaming: Agent consumes generator and reconstructs response
|
266
|
+
agent.register_handoff(
|
267
|
+
Handoff(
|
268
|
+
target=f"litellm_{self.model.replace('/', '_')}",
|
269
|
+
description=f"LiteLLM completion handler using {self.model}",
|
270
|
+
scope="all",
|
271
|
+
metadata={
|
272
|
+
'function': self.chat_completion_stream,
|
273
|
+
'priority': 10,
|
274
|
+
'is_generator': True # chat_completion_stream is async generator
|
275
|
+
}
|
276
|
+
),
|
277
|
+
source="litellm"
|
278
|
+
)
|
279
|
+
|
280
|
+
self.logger.info(f"📨 Registered LiteLLM as handoff with model: {self.model}")
|
281
|
+
|
149
282
|
log_skill_event(agent.name, 'litellm', 'initialized', {
|
150
283
|
'model': self.model,
|
151
284
|
'temperature': self.temperature,
|
@@ -239,7 +372,14 @@ class LiteLLMSkill(Skill):
|
|
239
372
|
"""
|
240
373
|
Create a streaming chat completion using LiteLLM
|
241
374
|
"""
|
242
|
-
|
375
|
+
# If streaming is disabled for this skill, fallback to non-streaming and yield once
|
376
|
+
if self.disable_streaming:
|
377
|
+
non_stream_response = await self.chat_completion(messages, model=model, tools=tools, stream=False, **kwargs)
|
378
|
+
# Normalize into a single streaming-style chunk
|
379
|
+
normalized = self._normalize_response(non_stream_response, model or self.current_model)
|
380
|
+
yield normalized
|
381
|
+
return
|
382
|
+
|
243
383
|
target_model = model or self.current_model
|
244
384
|
|
245
385
|
try:
|
@@ -293,7 +433,7 @@ class LiteLLMSkill(Skill):
|
|
293
433
|
return self.api_keys.get('anthropic')
|
294
434
|
elif model.startswith('xai/') or model.startswith('grok') or model == 'grok-4':
|
295
435
|
return self.api_keys.get('xai')
|
296
|
-
elif model.startswith('google/') or model.startswith('gemini'):
|
436
|
+
elif model.startswith('google/') or model.startswith('gemini') or model.startswith('vertex_ai/'):
|
297
437
|
return self.api_keys.get('google')
|
298
438
|
else:
|
299
439
|
# Try to find a matching provider from model configs
|
@@ -303,6 +443,271 @@ class LiteLLMSkill(Skill):
|
|
303
443
|
# Fallback to default
|
304
444
|
return self.api_keys.get('openai')
|
305
445
|
|
446
|
+
|
447
|
+
async def _upload_image_to_content_api(self, image_base64_url: str, model: str) -> str:
|
448
|
+
"""
|
449
|
+
Upload base64 image data to content API and return a public URL.
|
450
|
+
Similar to openlicense skill approach.
|
451
|
+
"""
|
452
|
+
if not HTTPX_AVAILABLE:
|
453
|
+
self.logger.warning("httpx not available, cannot upload image to content API")
|
454
|
+
return image_base64_url
|
455
|
+
|
456
|
+
try:
|
457
|
+
# Extract base64 data from data URL
|
458
|
+
if not image_base64_url.startswith('data:image/'):
|
459
|
+
self.logger.warning(f"Invalid image URL format: {image_base64_url[:50]}...")
|
460
|
+
return image_base64_url # Return as-is if not a data URL
|
461
|
+
|
462
|
+
# Parse the data URL: data:image/png;base64,<base64_data>
|
463
|
+
header, base64_data = image_base64_url.split(',', 1)
|
464
|
+
image_format = 'png' # Default to PNG
|
465
|
+
|
466
|
+
# Extract format from header if available
|
467
|
+
if 'image/' in header:
|
468
|
+
try:
|
469
|
+
format_part = header.split('image/')[1].split(';')[0]
|
470
|
+
if format_part in ['png', 'jpeg', 'jpg', 'webp']:
|
471
|
+
image_format = format_part
|
472
|
+
except:
|
473
|
+
pass # Use default PNG
|
474
|
+
|
475
|
+
# Decode base64 data
|
476
|
+
image_data = base64.b64decode(base64_data)
|
477
|
+
self.logger.debug(f"Decoded image data: {len(image_data)} bytes, format: {image_format}")
|
478
|
+
|
479
|
+
# Generate a short filename
|
480
|
+
short_id = hashlib.md5(str(uuid.uuid4()).encode()).hexdigest()[:8]
|
481
|
+
filename = f"gemini_{short_id}.{image_format}"
|
482
|
+
|
483
|
+
# Get portal URL from environment (same as openlicense skill)
|
484
|
+
portal_url = os.getenv("ROBUTLER_INTERNAL_API_URL", "https://robutler.ai")
|
485
|
+
upload_url = f"{portal_url}/api/content"
|
486
|
+
|
487
|
+
# Prepare metadata for upload
|
488
|
+
description = f"AI-generated image from {model}"
|
489
|
+
tags = ['ai-generated', 'gemini', 'litellm']
|
490
|
+
|
491
|
+
# Create form data for upload
|
492
|
+
files = {
|
493
|
+
'file': (filename, image_data, f'image/{image_format}')
|
494
|
+
}
|
495
|
+
|
496
|
+
data = {
|
497
|
+
'description': description,
|
498
|
+
'tags': ','.join(tags),
|
499
|
+
'userId': 'gemini-agent', # Store under agent account like openlicense
|
500
|
+
'visibility': 'public'
|
501
|
+
}
|
502
|
+
|
503
|
+
# Get API key from context (similar to openlicense approach)
|
504
|
+
try:
|
505
|
+
from webagents.server.context.context_vars import get_context
|
506
|
+
context = get_context()
|
507
|
+
api_key = None
|
508
|
+
|
509
|
+
if context:
|
510
|
+
# Try multiple possible key names
|
511
|
+
api_key = (context.get("api_key") or
|
512
|
+
context.get("robutler_api_key") or
|
513
|
+
context.get("agent_api_key") or
|
514
|
+
getattr(context, 'api_key', None))
|
515
|
+
|
516
|
+
# Also try to get from identity info or token info
|
517
|
+
if not api_key:
|
518
|
+
identity_info = context.get("identity_info")
|
519
|
+
if identity_info and isinstance(identity_info, dict):
|
520
|
+
api_key = identity_info.get("api_key")
|
521
|
+
|
522
|
+
if not api_key:
|
523
|
+
token_info = context.get("token_info")
|
524
|
+
if token_info and isinstance(token_info, dict):
|
525
|
+
api_key = token_info.get("api_key")
|
526
|
+
|
527
|
+
# Fallback to skill config
|
528
|
+
if not api_key and hasattr(self, 'config') and self.config:
|
529
|
+
api_key = self.config.get('robutler_api_key')
|
530
|
+
|
531
|
+
# Try environment variables as last resort
|
532
|
+
if not api_key:
|
533
|
+
api_key = os.getenv('ROBUTLER_API_KEY') or os.getenv('API_KEY')
|
534
|
+
|
535
|
+
if not api_key:
|
536
|
+
self.logger.warning("No API key found for content upload, trying without authentication")
|
537
|
+
# Don't return early - try the upload anyway, it might work without auth in dev mode
|
538
|
+
|
539
|
+
headers = {}
|
540
|
+
if api_key:
|
541
|
+
headers['Authorization'] = f'Bearer {api_key}'
|
542
|
+
|
543
|
+
# Upload the image
|
544
|
+
async with httpx.AsyncClient(timeout=30.0) as client:
|
545
|
+
response = await client.post(upload_url, files=files, data=data, headers=headers)
|
546
|
+
|
547
|
+
if response.status_code == 200:
|
548
|
+
result = response.json()
|
549
|
+
public_url = result.get('publicUrl')
|
550
|
+
|
551
|
+
if public_url:
|
552
|
+
# Rewrite URL to chat server if needed (like openlicense)
|
553
|
+
chat_base = (os.getenv('ROBUTLER_CHAT_URL') or 'http://localhost:3001').rstrip('/')
|
554
|
+
if public_url.startswith('/api/content/public'):
|
555
|
+
public_url = f"{chat_base}{public_url}"
|
556
|
+
|
557
|
+
self.logger.info(f"Successfully uploaded image: {filename} -> {public_url}")
|
558
|
+
return public_url
|
559
|
+
else:
|
560
|
+
self.logger.error(f"Upload successful but no publicUrl in response: {result}")
|
561
|
+
return image_base64_url
|
562
|
+
else:
|
563
|
+
self.logger.error(f"Failed to upload image: {response.status_code} - {response.text}")
|
564
|
+
return image_base64_url
|
565
|
+
|
566
|
+
except Exception as e:
|
567
|
+
self.logger.error(f"Error during image upload: {e}")
|
568
|
+
return image_base64_url
|
569
|
+
|
570
|
+
except Exception as e:
|
571
|
+
self.logger.error(f"Failed to process image for upload: {e}")
|
572
|
+
return image_base64_url
|
573
|
+
|
574
|
+
def _truncate_data_urls_in_params(self, params: Dict[str, Any]) -> Dict[str, Any]:
|
575
|
+
"""Truncate data URLs in params for safe logging"""
|
576
|
+
import copy
|
577
|
+
safe_params = copy.deepcopy(params)
|
578
|
+
|
579
|
+
messages = safe_params.get('messages', [])
|
580
|
+
for msg in messages:
|
581
|
+
content = msg.get('content')
|
582
|
+
if isinstance(content, list):
|
583
|
+
for part in content:
|
584
|
+
if isinstance(part, dict) and part.get('type') == 'image_url':
|
585
|
+
url = part.get('image_url', {}).get('url', '')
|
586
|
+
if url.startswith('data:') and len(url) > 100:
|
587
|
+
# Truncate data URL
|
588
|
+
prefix = url.split(',', 1)[0] if ',' in url else url[:50]
|
589
|
+
part['image_url']['url'] = f"{prefix},...[TRUNCATED {len(url)} bytes]"
|
590
|
+
elif isinstance(content, str) and content.startswith('data:') and len(content) > 100:
|
591
|
+
prefix = content.split(',', 1)[0] if ',' in content else content[:50]
|
592
|
+
msg['content'] = f"{prefix},...[TRUNCATED {len(content)} bytes]"
|
593
|
+
|
594
|
+
return safe_params
|
595
|
+
|
596
|
+
def _optimize_vertex_ai_params(self, params: Dict[str, Any], model: str) -> Dict[str, Any]:
|
597
|
+
"""Optimize parameters for Vertex AI models"""
|
598
|
+
optimized_params = params.copy()
|
599
|
+
|
600
|
+
# Check if this is a Vertex AI model
|
601
|
+
is_vertex_model = (
|
602
|
+
model.startswith('vertex_ai/') or
|
603
|
+
model.startswith('gemini-') or
|
604
|
+
'vertex' in model.lower()
|
605
|
+
)
|
606
|
+
|
607
|
+
if is_vertex_model:
|
608
|
+
is_image_model = "image" in model.lower()
|
609
|
+
has_tools = "tools" in optimized_params and optimized_params.get("tools")
|
610
|
+
|
611
|
+
# Always include usage for streaming requests (tools + images supported per latest guidance)
|
612
|
+
if optimized_params.get('stream'):
|
613
|
+
optimized_params["stream_options"] = {"include_usage": True}
|
614
|
+
|
615
|
+
# If image model and tools are provided in OpenAI format, convert to Vertex function_declarations
|
616
|
+
if is_image_model and has_tools:
|
617
|
+
try:
|
618
|
+
tools_value = optimized_params.get("tools")
|
619
|
+
# If tools is already an object with function_declarations, keep as-is
|
620
|
+
if isinstance(tools_value, dict) and "function_declarations" in tools_value:
|
621
|
+
pass
|
622
|
+
else:
|
623
|
+
# Expect OpenAI-format list -> convert
|
624
|
+
if isinstance(tools_value, list):
|
625
|
+
fdecls = []
|
626
|
+
for t in tools_value:
|
627
|
+
if isinstance(t, dict) and t.get("type") == "function" and "function" in t:
|
628
|
+
fdecls.append(t["function"])
|
629
|
+
if fdecls:
|
630
|
+
optimized_params["tools"] = {"function_declarations": fdecls}
|
631
|
+
self.logger.debug(f"Converted tools to function_declarations for {model}")
|
632
|
+
except Exception as e:
|
633
|
+
self.logger.debug(f"Tool conversion skipped due to error: {e}")
|
634
|
+
|
635
|
+
# Set optimal temperature for Vertex AI if not specified
|
636
|
+
if 'temperature' not in params or params['temperature'] is None:
|
637
|
+
optimized_params['temperature'] = 0.7
|
638
|
+
|
639
|
+
# Ensure reasonable token limits for Vertex AI
|
640
|
+
if not optimized_params.get('max_tokens') and not self.max_tokens:
|
641
|
+
optimized_params['max_tokens'] = 8192
|
642
|
+
|
643
|
+
return optimized_params
|
644
|
+
|
645
|
+
def _convert_markdown_images_to_multimodal(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
646
|
+
"""
|
647
|
+
Convert markdown image links to multimodal format for vision models.
|
648
|
+
This allows LLMs to see images visually while preserving URLs as text.
|
649
|
+
|
650
|
+
Pattern:  -> multimodal content with both text and image
|
651
|
+
"""
|
652
|
+
markdown_image_pattern = re.compile(r'!\[([^\]]*)\]\(([^)]+)\)')
|
653
|
+
processed_messages = []
|
654
|
+
|
655
|
+
for message in messages:
|
656
|
+
msg_copy = dict(message)
|
657
|
+
content = msg_copy.get('content')
|
658
|
+
|
659
|
+
# Only process user/assistant messages with string content containing markdown images
|
660
|
+
if msg_copy.get('role') not in ('user', 'assistant') or not isinstance(content, str):
|
661
|
+
processed_messages.append(msg_copy)
|
662
|
+
continue
|
663
|
+
|
664
|
+
# Check if there are markdown images
|
665
|
+
markdown_images = markdown_image_pattern.findall(content)
|
666
|
+
if not markdown_images:
|
667
|
+
processed_messages.append(msg_copy)
|
668
|
+
continue
|
669
|
+
|
670
|
+
# Convert to multimodal format
|
671
|
+
content_parts = []
|
672
|
+
last_end = 0
|
673
|
+
|
674
|
+
for match in markdown_image_pattern.finditer(content):
|
675
|
+
# Add text before the image (including the markdown link for URL extraction)
|
676
|
+
text_chunk = content[last_end:match.end()].strip()
|
677
|
+
if text_chunk:
|
678
|
+
content_parts.append({
|
679
|
+
"type": "text",
|
680
|
+
"text": text_chunk
|
681
|
+
})
|
682
|
+
|
683
|
+
# Add the image part
|
684
|
+
alt_text, image_url = match.groups()
|
685
|
+
content_parts.append({
|
686
|
+
"type": "image_url",
|
687
|
+
"image_url": {
|
688
|
+
"url": image_url
|
689
|
+
}
|
690
|
+
})
|
691
|
+
|
692
|
+
last_end = match.end()
|
693
|
+
|
694
|
+
# Add any remaining text after the last image
|
695
|
+
text_after = content[last_end:].strip()
|
696
|
+
if text_after:
|
697
|
+
content_parts.append({
|
698
|
+
"type": "text",
|
699
|
+
"text": text_after
|
700
|
+
})
|
701
|
+
|
702
|
+
# Update message with multimodal content
|
703
|
+
if content_parts:
|
704
|
+
msg_copy['content'] = content_parts
|
705
|
+
self.logger.info(f"🖼️ Converted {len(markdown_images)} markdown image(s) to multimodal format")
|
706
|
+
|
707
|
+
processed_messages.append(msg_copy)
|
708
|
+
|
709
|
+
return processed_messages
|
710
|
+
|
306
711
|
async def _execute_completion(self, messages: List[Dict[str, Any]],
|
307
712
|
model: str,
|
308
713
|
tools: Optional[List[Dict[str, Any]]] = None,
|
@@ -310,6 +715,16 @@ class LiteLLMSkill(Skill):
|
|
310
715
|
**kwargs) -> Dict[str, Any]:
|
311
716
|
"""Execute a single completion request"""
|
312
717
|
|
718
|
+
# Convert markdown images to multimodal format for vision models
|
719
|
+
messages = self._convert_markdown_images_to_multimodal(messages)
|
720
|
+
|
721
|
+
# For Vertex AI image models, use direct HTTP to preserve custom fields
|
722
|
+
is_vertex_image_model = (
|
723
|
+
'image' in model.lower() and
|
724
|
+
(model.startswith('vertex_ai/') or model.startswith('gemini-'))
|
725
|
+
)
|
726
|
+
|
727
|
+
|
313
728
|
# Prepare parameters
|
314
729
|
params = {
|
315
730
|
"model": model,
|
@@ -317,11 +732,20 @@ class LiteLLMSkill(Skill):
|
|
317
732
|
"temperature": kwargs.get('temperature', self.temperature),
|
318
733
|
"stream": stream,
|
319
734
|
# Ensure usage is available when streaming is requested later
|
735
|
+
# Note: stream_options will be set by _optimize_vertex_ai_params for supported models
|
320
736
|
"stream_options": {"include_usage": True} if stream else None,
|
321
737
|
}
|
738
|
+
|
739
|
+
# Force a specific provider routing when using an OpenAI-compatible proxy
|
740
|
+
# For image models, always use 'openai' to prevent response filtering and disable caching
|
741
|
+
if self.custom_llm_provider or is_vertex_image_model:
|
742
|
+
params["custom_llm_provider"] = self.custom_llm_provider or 'openai'
|
743
|
+
if is_vertex_image_model:
|
744
|
+
params["caching"] = False # Disable caching for image models
|
745
|
+
self.logger.debug(f"Using custom_llm_provider='openai' and disabled caching for image model {model}")
|
322
746
|
|
323
747
|
# Add base URL if configured (for proxy support)
|
324
|
-
if
|
748
|
+
if self.config and 'base_url' in self.config:
|
325
749
|
params["api_base"] = self.config['base_url']
|
326
750
|
|
327
751
|
# Add max_tokens if specified
|
@@ -346,8 +770,10 @@ class LiteLLMSkill(Skill):
|
|
346
770
|
if api_key:
|
347
771
|
params["api_key"] = api_key
|
348
772
|
|
773
|
+
# Optimize parameters for Vertex AI models
|
774
|
+
params = self._optimize_vertex_ai_params(params, model)
|
775
|
+
|
349
776
|
self.logger.debug(f"Executing completion with model {model}")
|
350
|
-
self.logger.debug(f"Parameters: {params}")
|
351
777
|
|
352
778
|
# Validate parameters before calling LiteLLM
|
353
779
|
if not messages or not isinstance(messages, list):
|
@@ -366,7 +792,29 @@ class LiteLLMSkill(Skill):
|
|
366
792
|
# Convert LiteLLM response to our format
|
367
793
|
return self._normalize_response(response, model)
|
368
794
|
except Exception as e:
|
369
|
-
|
795
|
+
# Log params summary without huge data URLs
|
796
|
+
message_summary = []
|
797
|
+
for msg in params.get('messages', []):
|
798
|
+
role = msg.get('role', '?')
|
799
|
+
content = msg.get('content', '')
|
800
|
+
if isinstance(content, list):
|
801
|
+
parts = []
|
802
|
+
for part in content:
|
803
|
+
if part.get('type') == 'image_url':
|
804
|
+
url = part.get('image_url', {}).get('url', '')
|
805
|
+
if url.startswith('data:'):
|
806
|
+
parts.append('[data:image]')
|
807
|
+
else:
|
808
|
+
parts.append(f'[image:{url[:30]}...]')
|
809
|
+
elif part.get('type') == 'text':
|
810
|
+
parts.append(f'"{part.get("text", "")[:50]}..."')
|
811
|
+
message_summary.append(f"{role}: [{', '.join(parts)}]")
|
812
|
+
else:
|
813
|
+
message_summary.append(f"{role}: {str(content)[:100]}...")
|
814
|
+
|
815
|
+
self.logger.error(f"LiteLLM completion failed for model={params.get('model')}")
|
816
|
+
self.logger.error(f"Messages: {'; '.join(message_summary)}")
|
817
|
+
self.logger.error(f"Tools: {len(params.get('tools', []))} tool(s)")
|
370
818
|
self.logger.error(f"Error details: {type(e).__name__}: {str(e)}")
|
371
819
|
raise
|
372
820
|
|
@@ -376,6 +824,15 @@ class LiteLLMSkill(Skill):
|
|
376
824
|
**kwargs) -> AsyncGenerator[Dict[str, Any], None]:
|
377
825
|
"""Execute a streaming completion request"""
|
378
826
|
|
827
|
+
# Convert markdown images to multimodal format for vision models
|
828
|
+
messages = self._convert_markdown_images_to_multimodal(messages)
|
829
|
+
|
830
|
+
# For Vertex AI image models, use custom_llm_provider='openai' to prevent response filtering
|
831
|
+
is_vertex_image_model = (
|
832
|
+
'image' in model.lower() and
|
833
|
+
(model.startswith('vertex_ai/') or model.startswith('gemini-'))
|
834
|
+
)
|
835
|
+
|
379
836
|
# Prepare parameters (same as non-streaming)
|
380
837
|
params = {
|
381
838
|
"model": model,
|
@@ -383,8 +840,17 @@ class LiteLLMSkill(Skill):
|
|
383
840
|
"temperature": kwargs.get('temperature', self.temperature),
|
384
841
|
"stream": True,
|
385
842
|
# Include a final usage chunk before [DONE] per LiteLLM docs
|
843
|
+
# Note: stream_options will be optimized by _optimize_vertex_ai_params for model compatibility
|
386
844
|
"stream_options": {"include_usage": True},
|
387
845
|
}
|
846
|
+
|
847
|
+
# Force a specific provider routing when using an OpenAI-compatible proxy
|
848
|
+
# For image models, always use 'openai' to prevent response filtering and disable caching
|
849
|
+
if self.custom_llm_provider or is_vertex_image_model:
|
850
|
+
params["custom_llm_provider"] = self.custom_llm_provider or 'openai'
|
851
|
+
if is_vertex_image_model:
|
852
|
+
params["caching"] = False # Disable caching for image models
|
853
|
+
self.logger.debug(f"Using custom_llm_provider='openai' and disabled caching for streaming image model {model}")
|
388
854
|
|
389
855
|
# Add base URL if configured (for proxy support)
|
390
856
|
if self.config and 'base_url' in self.config:
|
@@ -410,47 +876,173 @@ class LiteLLMSkill(Skill):
|
|
410
876
|
if api_key:
|
411
877
|
params["api_key"] = api_key
|
412
878
|
|
879
|
+
# Optimize parameters for Vertex AI models
|
880
|
+
params = self._optimize_vertex_ai_params(params, model)
|
881
|
+
|
413
882
|
self.logger.debug(f"Executing streaming completion with model {model}")
|
414
883
|
|
884
|
+
# Add special handling for Vertex AI models
|
885
|
+
is_vertex_model = (
|
886
|
+
model.startswith('vertex_ai/') or
|
887
|
+
model.startswith('gemini-') or
|
888
|
+
'vertex' in model.lower()
|
889
|
+
)
|
890
|
+
|
415
891
|
# Execute streaming completion
|
416
892
|
stream = await acompletion(**params)
|
417
893
|
|
894
|
+
chunk_count = 0
|
895
|
+
content_chunks = 0
|
896
|
+
|
418
897
|
async for chunk in stream:
|
898
|
+
chunk_count += 1
|
899
|
+
|
419
900
|
# Normalize and yield chunk
|
420
901
|
normalized_chunk = self._normalize_streaming_chunk(chunk, model)
|
902
|
+
# After normalization, upload any data:image content inside the same loop/task
|
903
|
+
try:
|
904
|
+
normalized_chunk = await self._upload_and_rewrite_chunk_images(normalized_chunk, model)
|
905
|
+
except Exception:
|
906
|
+
pass
|
907
|
+
|
908
|
+
# Debug Vertex AI streaming
|
909
|
+
if is_vertex_model and chunk_count <= 3: # Log first few chunks for debugging
|
910
|
+
if isinstance(normalized_chunk, dict):
|
911
|
+
choices = normalized_chunk.get('choices', [])
|
912
|
+
if choices and len(choices) > 0:
|
913
|
+
delta = choices[0].get('delta', {})
|
914
|
+
if 'content' in delta and delta['content']:
|
915
|
+
content_chunks += 1
|
916
|
+
self.logger.debug(f"Vertex AI streaming chunk {chunk_count}: got content ({len(delta['content'])} chars)")
|
421
917
|
|
422
918
|
# If LiteLLM sent a final usage chunk, log tokens to context.usage
|
423
919
|
try:
|
424
920
|
usage = normalized_chunk.get('usage') if isinstance(normalized_chunk, dict) else None
|
425
|
-
|
426
|
-
usage
|
427
|
-
and isinstance(usage, dict)
|
428
|
-
and (not normalized_chunk.get('choices'))
|
429
|
-
)
|
430
|
-
if is_final_usage_chunk:
|
921
|
+
if usage and isinstance(usage, dict):
|
431
922
|
prompt_tokens = int(usage.get('prompt_tokens') or 0)
|
432
923
|
completion_tokens = int(usage.get('completion_tokens') or 0)
|
433
924
|
total_tokens = int(usage.get('total_tokens') or (prompt_tokens + completion_tokens))
|
925
|
+
|
926
|
+
# DEBUG: Log all usage fields to see what Gemini sends
|
927
|
+
self.logger.info(f"🔍 USAGE CHUNK RECEIVED: {usage}")
|
928
|
+
|
434
929
|
self._append_usage_record(model=model, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, total_tokens=total_tokens, streaming=True)
|
435
|
-
except Exception:
|
930
|
+
except Exception as e:
|
436
931
|
# Never break streaming on usage logging
|
932
|
+
self.logger.warning(f"💰 LiteLLM streaming: Failed to log usage: {e}")
|
437
933
|
pass
|
438
934
|
yield normalized_chunk
|
935
|
+
|
936
|
+
# Log streaming completion stats for Vertex AI models
|
937
|
+
if is_vertex_model:
|
938
|
+
self.logger.debug(f"Vertex AI streaming completed: {chunk_count} total chunks, {content_chunks} with content")
|
439
939
|
|
440
940
|
def _normalize_response(self, response: Any, model: str) -> Dict[str, Any]:
|
441
|
-
"""Normalize LiteLLM response to OpenAI format"""
|
941
|
+
"""Normalize LiteLLM response to OpenAI format and handle images"""
|
942
|
+
|
943
|
+
self.logger.info(f"🔍 _normalize_response called for model: {model}")
|
944
|
+
|
945
|
+
# Log the raw response object type and attributes
|
946
|
+
self.logger.debug(f"🔍 Raw response type: {type(response)}")
|
947
|
+
if hasattr(response, '__dict__'):
|
948
|
+
all_attrs = list(vars(response).keys())
|
949
|
+
self.logger.debug(f"🔍 Raw response attributes: {all_attrs[:20]}")
|
950
|
+
|
951
|
+
# Convert response to dict if needed
|
952
|
+
if hasattr(response, 'model_dump'):
|
953
|
+
response_dict = response.model_dump()
|
954
|
+
elif hasattr(response, 'dict'):
|
955
|
+
response_dict = response.dict()
|
956
|
+
elif isinstance(response, dict):
|
957
|
+
response_dict = response.copy()
|
958
|
+
else:
|
959
|
+
response_dict = dict(response) if response else {}
|
960
|
+
|
961
|
+
self.logger.debug(f"🔍 Response dict keys: {response_dict.keys()}")
|
962
|
+
|
963
|
+
# Check for response data in dict format
|
964
|
+
if 'choices' in response_dict and response_dict['choices']:
|
965
|
+
self.logger.debug(f"🔍 First choice keys: {response_dict['choices'][0].keys()}")
|
966
|
+
if 'message' in response_dict['choices'][0]:
|
967
|
+
msg = response_dict['choices'][0]['message']
|
968
|
+
self.logger.debug(f"🔍 Message keys: {msg.keys()}")
|
969
|
+
self.logger.debug(f"🔍 Message content length: {len(str(msg.get('content', '')))}")
|
970
|
+
|
971
|
+
# Ensure model name is correct and includes provider prefix for cost lookup
|
972
|
+
# Add vertex_ai/ prefix if it's a Gemini model without a provider prefix
|
973
|
+
if model and not model.startswith(('vertex_ai/', 'openai/', 'anthropic/', 'xai/')):
|
974
|
+
if 'gemini' in model.lower() or 'flash' in model.lower():
|
975
|
+
response_dict['model'] = f"vertex_ai/{model}"
|
976
|
+
else:
|
977
|
+
response_dict['model'] = model
|
978
|
+
else:
|
979
|
+
response_dict['model'] = model
|
442
980
|
|
443
|
-
#
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
981
|
+
# Handle custom image field from Gemini models
|
982
|
+
if 'choices' in response_dict and response_dict['choices']:
|
983
|
+
for choice in response_dict['choices']:
|
984
|
+
if 'message' in choice and choice['message']:
|
985
|
+
message = choice['message']
|
986
|
+
|
987
|
+
# Check for custom image field
|
988
|
+
if 'image' in message and message['image']:
|
989
|
+
image_data = message['image']
|
990
|
+
|
991
|
+
self.logger.info(f"Found image field in non-streaming response for {model}")
|
992
|
+
|
993
|
+
# Convert image to markdown format for display
|
994
|
+
if 'url' in image_data and image_data['url']:
|
995
|
+
image_url = image_data['url']
|
996
|
+
|
997
|
+
# Upload all base64 images to content API for better performance and reliability
|
998
|
+
if image_url.startswith('data:image/'):
|
999
|
+
self.logger.info(f"Base64 image detected ({len(image_url)} chars), uploading to content API")
|
1000
|
+
try:
|
1001
|
+
# Handle async upload in sync context
|
1002
|
+
import asyncio
|
1003
|
+
import concurrent.futures
|
1004
|
+
|
1005
|
+
def run_upload():
|
1006
|
+
return asyncio.run(self._upload_image_to_content_api(image_url, model))
|
1007
|
+
|
1008
|
+
try:
|
1009
|
+
loop = asyncio.get_event_loop()
|
1010
|
+
if loop.is_running():
|
1011
|
+
# If loop is already running, run in a separate thread
|
1012
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
1013
|
+
future = executor.submit(run_upload)
|
1014
|
+
uploaded_url = future.result(timeout=30)
|
1015
|
+
else:
|
1016
|
+
uploaded_url = loop.run_until_complete(self._upload_image_to_content_api(image_url, model))
|
1017
|
+
except RuntimeError:
|
1018
|
+
# Fallback: run in new event loop
|
1019
|
+
uploaded_url = run_upload()
|
1020
|
+
|
1021
|
+
if uploaded_url != image_url: # Only update if upload was successful
|
1022
|
+
image_url = uploaded_url
|
1023
|
+
self.logger.info(f"Successfully uploaded image to: {uploaded_url}")
|
1024
|
+
except Exception as e:
|
1025
|
+
self.logger.error(f"Failed to upload image: {e}, using original URL")
|
1026
|
+
|
1027
|
+
# Create markdown image syntax
|
1028
|
+
image_markdown = f""
|
1029
|
+
|
1030
|
+
# Append to content or replace if no content
|
1031
|
+
current_content = message.get('content') or ''
|
1032
|
+
if current_content:
|
1033
|
+
message['content'] = f"{current_content}\n\n{image_markdown}"
|
1034
|
+
else:
|
1035
|
+
message['content'] = image_markdown
|
1036
|
+
|
1037
|
+
self.logger.info(f"Converted image field to markdown for model {model}")
|
1038
|
+
|
1039
|
+
# Remove the custom image field since we've converted it
|
1040
|
+
del message['image']
|
449
1041
|
|
450
|
-
return
|
1042
|
+
return response_dict
|
451
1043
|
|
452
1044
|
def _normalize_streaming_chunk(self, chunk: Any, model: str) -> Dict[str, Any]:
|
453
|
-
"""Normalize LiteLLM streaming chunk to OpenAI format"""
|
1045
|
+
"""Normalize LiteLLM streaming chunk to OpenAI format and handle images"""
|
454
1046
|
|
455
1047
|
# Convert chunk to dictionary if it's not already
|
456
1048
|
if hasattr(chunk, 'model_dump'):
|
@@ -473,10 +1065,84 @@ class LiteLLMSkill(Skill):
|
|
473
1065
|
# Fallback - return as-is and hope for the best
|
474
1066
|
return chunk
|
475
1067
|
|
476
|
-
# Ensure model name is correct
|
477
|
-
|
1068
|
+
# Ensure model name is correct and includes provider prefix for cost lookup
|
1069
|
+
# Add vertex_ai/ prefix if it's a Gemini model without a provider prefix
|
1070
|
+
if model and not model.startswith(('vertex_ai/', 'openai/', 'anthropic/', 'xai/')):
|
1071
|
+
if 'gemini' in model.lower() or 'flash' in model.lower():
|
1072
|
+
chunk_dict['model'] = f"vertex_ai/{model}"
|
1073
|
+
else:
|
1074
|
+
chunk_dict['model'] = model
|
1075
|
+
else:
|
1076
|
+
chunk_dict['model'] = model
|
1077
|
+
|
1078
|
+
# Handle custom image field from Gemini models in streaming
|
1079
|
+
if 'choices' in chunk_dict and chunk_dict['choices']:
|
1080
|
+
for choice in chunk_dict['choices']:
|
1081
|
+
# Check both delta and message for image data
|
1082
|
+
for message_key in ['delta', 'message']:
|
1083
|
+
if message_key in choice and choice[message_key]:
|
1084
|
+
message = choice[message_key]
|
1085
|
+
|
1086
|
+
# Check for custom image field
|
1087
|
+
if 'image' in message and message['image']:
|
1088
|
+
self.logger.info(f"🔍 STREAMING: Found 'image' field in {message_key}")
|
1089
|
+
image_data = message['image']
|
1090
|
+
|
1091
|
+
# Convert image to markdown format for display (do not upload here)
|
1092
|
+
if 'url' in image_data and image_data['url']:
|
1093
|
+
image_url = image_data['url']
|
1094
|
+
self.logger.info(f"🔍 STREAMING: Image URL: {image_url[:100]}...")
|
1095
|
+
|
1096
|
+
# Create markdown image syntax
|
1097
|
+
image_markdown = f"\n\n"
|
1098
|
+
|
1099
|
+
# For streaming, replace any existing content with the image markdown
|
1100
|
+
message['content'] = image_markdown
|
1101
|
+
|
1102
|
+
self.logger.info(f"Converted streaming image field to markdown for model {model}")
|
1103
|
+
|
1104
|
+
# Remove the custom image field since we've converted it
|
1105
|
+
del message['image']
|
478
1106
|
|
479
1107
|
return chunk_dict
|
1108
|
+
|
1109
|
+
async def _upload_and_rewrite_chunk_images(self, chunk: Dict[str, Any], model: str) -> Dict[str, Any]:
|
1110
|
+
"""
|
1111
|
+
Detect data:image URLs in chunk content, upload to content API, rewrite URLs,
|
1112
|
+
and log pricing. Runs inside the streaming loop so context is available.
|
1113
|
+
"""
|
1114
|
+
try:
|
1115
|
+
if not chunk or 'choices' not in chunk:
|
1116
|
+
return chunk
|
1117
|
+
data_url_pattern = re.compile(r"data:image/[a-zA-Z]+;base64,[A-Za-z0-9+/=]+")
|
1118
|
+
image_logged = False
|
1119
|
+
for choice in chunk['choices']:
|
1120
|
+
for key in ['delta', 'message']:
|
1121
|
+
if key in choice and choice[key] and isinstance(choice[key], dict):
|
1122
|
+
msg = choice[key]
|
1123
|
+
content = msg.get('content')
|
1124
|
+
if isinstance(content, str):
|
1125
|
+
matches = list(data_url_pattern.finditer(content))
|
1126
|
+
if not matches:
|
1127
|
+
continue
|
1128
|
+
new_content = content
|
1129
|
+
for m in matches:
|
1130
|
+
data_url = m.group(0)
|
1131
|
+
try:
|
1132
|
+
public_url = await self._upload_image_to_content_api(data_url, model)
|
1133
|
+
if public_url and public_url != data_url:
|
1134
|
+
new_content = new_content.replace(data_url, public_url)
|
1135
|
+
# Always log one pricing record per uploaded image URL
|
1136
|
+
self._log_image_upload_pricing()
|
1137
|
+
image_logged = True
|
1138
|
+
except Exception as e:
|
1139
|
+
self.logger.error(f"Image upload failed during streaming rewrite: {e}")
|
1140
|
+
if new_content != content:
|
1141
|
+
msg['content'] = new_content
|
1142
|
+
return chunk
|
1143
|
+
except Exception as e:
|
1144
|
+
self.logger.error(f"Error rewriting streaming image URLs: {e}")
|
1145
|
+
return chunk
|
480
1146
|
|
481
1147
|
def _append_usage_record(
|
482
1148
|
self,
|
@@ -492,7 +1158,8 @@ class LiteLLMSkill(Skill):
|
|
492
1158
|
context = get_context()
|
493
1159
|
if not context or not hasattr(context, 'usage'):
|
494
1160
|
return
|
495
|
-
|
1161
|
+
|
1162
|
+
record = {
|
496
1163
|
'type': 'llm',
|
497
1164
|
'skill': 'litellm',
|
498
1165
|
'model': model,
|
@@ -501,7 +1168,8 @@ class LiteLLMSkill(Skill):
|
|
501
1168
|
'total_tokens': int(total_tokens or 0),
|
502
1169
|
'streaming': bool(streaming),
|
503
1170
|
'timestamp': time.time(),
|
504
|
-
}
|
1171
|
+
}
|
1172
|
+
context.usage.append(record)
|
505
1173
|
except Exception:
|
506
1174
|
# Do not raise from logging
|
507
1175
|
return
|
@@ -531,6 +1199,217 @@ class LiteLLMSkill(Skill):
|
|
531
1199
|
|
532
1200
|
return str(response)
|
533
1201
|
|
1202
|
+
async def _upload_image_to_content_api(self, image_base64_url: str, model: str) -> str:
|
1203
|
+
"""
|
1204
|
+
Upload base64 image data to content API and return a public URL.
|
1205
|
+
Uses the same approach as the openlicense skill.
|
1206
|
+
Charges 5 cents per image upload.
|
1207
|
+
"""
|
1208
|
+
if not HTTPX_AVAILABLE:
|
1209
|
+
self.logger.warning("httpx not available, cannot upload image to content API")
|
1210
|
+
return image_base64_url
|
1211
|
+
|
1212
|
+
try:
|
1213
|
+
# Extract base64 data from data URL
|
1214
|
+
if not image_base64_url.startswith('data:image/'):
|
1215
|
+
self.logger.warning(f"Invalid image URL format: {image_base64_url[:50]}...")
|
1216
|
+
return image_base64_url # Return as-is if not a data URL
|
1217
|
+
|
1218
|
+
# Parse the data URL: data:image/png;base64,<data>
|
1219
|
+
try:
|
1220
|
+
header, data = image_base64_url.split(',', 1)
|
1221
|
+
image_format = header.split('/')[1].split(';')[0] # Extract format (png, jpeg, etc.)
|
1222
|
+
image_data = base64.b64decode(data)
|
1223
|
+
except Exception as e:
|
1224
|
+
self.logger.error(f"Failed to parse base64 image data: {e}")
|
1225
|
+
return image_base64_url
|
1226
|
+
|
1227
|
+
# Get API key - try multiple sources with comprehensive fallbacks
|
1228
|
+
agent_api_key = None
|
1229
|
+
|
1230
|
+
# Method 1: Try context first (same approach as openlicense skill)
|
1231
|
+
try:
|
1232
|
+
from webagents.server.context.context_vars import get_context
|
1233
|
+
context = get_context()
|
1234
|
+
|
1235
|
+
if context:
|
1236
|
+
# Try multiple possible keys (same as openlicense)
|
1237
|
+
agent_api_key = (context.get("api_key") or
|
1238
|
+
context.get("robutler_api_key") or
|
1239
|
+
context.get("agent_api_key") or
|
1240
|
+
getattr(context, 'api_key', None))
|
1241
|
+
|
1242
|
+
if agent_api_key:
|
1243
|
+
self.logger.info(f"Found agent API key from context: {agent_api_key[:10]}...{agent_api_key[-4:] if len(agent_api_key) > 14 else ''}")
|
1244
|
+
else:
|
1245
|
+
self.logger.debug("No agent API key found in context")
|
1246
|
+
else:
|
1247
|
+
self.logger.debug("No context available")
|
1248
|
+
except Exception as e:
|
1249
|
+
self.logger.debug(f"Error accessing context: {e}")
|
1250
|
+
|
1251
|
+
# Method 2: Try skill's own config (from dynamic factory)
|
1252
|
+
if not agent_api_key and hasattr(self, 'config') and self.config:
|
1253
|
+
self.logger.debug("Trying to get API key from skill config...")
|
1254
|
+
try:
|
1255
|
+
# For r-banana, the API key should be in api_keys dict
|
1256
|
+
if isinstance(self.config, dict):
|
1257
|
+
# Try robutler_api_key first
|
1258
|
+
agent_api_key = self.config.get('robutler_api_key')
|
1259
|
+
|
1260
|
+
# Try api_keys dict (from dynamic factory)
|
1261
|
+
if not agent_api_key and 'api_keys' in self.config:
|
1262
|
+
api_keys = self.config['api_keys']
|
1263
|
+
if isinstance(api_keys, dict):
|
1264
|
+
# Try different provider keys
|
1265
|
+
agent_api_key = (api_keys.get('azure') or
|
1266
|
+
api_keys.get('openai') or
|
1267
|
+
api_keys.get('anthropic') or
|
1268
|
+
api_keys.get('google'))
|
1269
|
+
|
1270
|
+
if agent_api_key:
|
1271
|
+
self.logger.info(f"Found agent API key from skill config: {agent_api_key[:10]}...{agent_api_key[-4:] if len(agent_api_key) > 14 else ''}")
|
1272
|
+
else:
|
1273
|
+
self.logger.debug(f"No API key found in skill config. Config keys: {list(self.config.keys())}")
|
1274
|
+
if 'api_keys' in self.config:
|
1275
|
+
self.logger.debug(f"api_keys dict keys: {list(self.config['api_keys'].keys()) if isinstance(self.config['api_keys'], dict) else 'not a dict'}")
|
1276
|
+
except (KeyError, TypeError, AttributeError) as e:
|
1277
|
+
self.logger.debug(f"Could not access skill config: {e}")
|
1278
|
+
|
1279
|
+
# Method 3: Try environment variables as last resort
|
1280
|
+
if not agent_api_key:
|
1281
|
+
self.logger.debug("Trying environment variables as fallback...")
|
1282
|
+
agent_api_key = (os.getenv('ROBUTLER_API_KEY') or
|
1283
|
+
os.getenv('API_KEY') or
|
1284
|
+
os.getenv('AGENT_API_KEY'))
|
1285
|
+
if agent_api_key:
|
1286
|
+
self.logger.info(f"Found agent API key from environment: {agent_api_key[:10]}...{agent_api_key[-4:] if len(agent_api_key) > 14 else ''}")
|
1287
|
+
else:
|
1288
|
+
self.logger.debug("No API key found in environment variables")
|
1289
|
+
|
1290
|
+
if not agent_api_key:
|
1291
|
+
self.logger.error("No agent API key found anywhere - content upload will fail")
|
1292
|
+
return image_base64_url # Fallback to original URL
|
1293
|
+
|
1294
|
+
# Get portal URL (same as openlicense)
|
1295
|
+
portal_url = os.getenv('ROBUTLER_INTERNAL_API_URL', 'http://localhost:3000')
|
1296
|
+
upload_url = f"{portal_url}/api/content"
|
1297
|
+
|
1298
|
+
# Get agent ID for access scope
|
1299
|
+
agent_id = None
|
1300
|
+
if context:
|
1301
|
+
agent_id = context.get("agent_id") or context.get("current_agent_id")
|
1302
|
+
if agent_id:
|
1303
|
+
self.logger.debug(f"Found agent_id in context for access scope: {agent_id}")
|
1304
|
+
|
1305
|
+
# Prepare file data
|
1306
|
+
filename = f"ai_image_{uuid.uuid4().hex[:8]}.{image_format}"
|
1307
|
+
files = {
|
1308
|
+
'file': (filename, image_data, f'image/{image_format}')
|
1309
|
+
}
|
1310
|
+
|
1311
|
+
# Prepare metadata
|
1312
|
+
description = f"AI-generated image from {model}"
|
1313
|
+
tags = ['ai-generated', 'litellm-skill', model.replace('/', '-')]
|
1314
|
+
|
1315
|
+
# Prepare agent access
|
1316
|
+
grant_agent_access = []
|
1317
|
+
if agent_id:
|
1318
|
+
grant_agent_access.append(agent_id)
|
1319
|
+
self.logger.debug(f"Granting agent access to: {agent_id}")
|
1320
|
+
|
1321
|
+
data = {
|
1322
|
+
'visibility': 'public',
|
1323
|
+
'description': description,
|
1324
|
+
'tags': json.dumps(tags),
|
1325
|
+
'grantAgentAccess': json.dumps(grant_agent_access) if grant_agent_access else None
|
1326
|
+
}
|
1327
|
+
|
1328
|
+
# Make authenticated request to upload API (same as openlicense)
|
1329
|
+
async with httpx.AsyncClient(timeout=60.0) as client:
|
1330
|
+
# Prepare headers with proper API key authentication
|
1331
|
+
headers = {'User-Agent': 'LiteLLM-Skill/1.0'}
|
1332
|
+
|
1333
|
+
if agent_api_key:
|
1334
|
+
# Use Bearer token format as expected by the content API
|
1335
|
+
headers['Authorization'] = f'Bearer {agent_api_key}'
|
1336
|
+
self.logger.debug("Added Authorization header with Bearer token")
|
1337
|
+
else:
|
1338
|
+
self.logger.warning("No API key available for authentication - upload may fail")
|
1339
|
+
|
1340
|
+
response = await client.post(
|
1341
|
+
upload_url,
|
1342
|
+
files=files,
|
1343
|
+
data=data,
|
1344
|
+
headers=headers
|
1345
|
+
)
|
1346
|
+
|
1347
|
+
if response.status_code in [200, 201]:
|
1348
|
+
result = response.json()
|
1349
|
+
content_url = result.get('url')
|
1350
|
+
if content_url:
|
1351
|
+
# Replace portal URL with chat URL for public access (same as openlicense skill)
|
1352
|
+
chat_base = (os.getenv('ROBUTLER_CHAT_URL') or 'http://localhost:3001').rstrip('/')
|
1353
|
+
portal_base = (os.getenv('ROBUTLER_INTERNAL_API_URL') or 'http://localhost:3000').rstrip('/')
|
1354
|
+
|
1355
|
+
if content_url.startswith(portal_base):
|
1356
|
+
# Replace portal base with chat base for public URL
|
1357
|
+
public_url = content_url.replace(portal_base, chat_base, 1)
|
1358
|
+
self.logger.info(f"Successfully uploaded image to content API: {public_url}")
|
1359
|
+
|
1360
|
+
return public_url
|
1361
|
+
else:
|
1362
|
+
self.logger.info(f"Successfully uploaded image to content API: {content_url}")
|
1363
|
+
return content_url
|
1364
|
+
else:
|
1365
|
+
self.logger.error(f"Upload successful but no URL returned: {result}")
|
1366
|
+
return image_base64_url
|
1367
|
+
else:
|
1368
|
+
self.logger.error(f"Failed to upload image to content API: {response.status_code} - {response.text}")
|
1369
|
+
return image_base64_url
|
1370
|
+
|
1371
|
+
except Exception as e:
|
1372
|
+
self.logger.error(f"Error uploading image to content API: {e}")
|
1373
|
+
return image_base64_url
|
1374
|
+
|
1375
|
+
def _log_image_upload_pricing(self) -> None:
|
1376
|
+
"""
|
1377
|
+
FIXME: this is a temporary hack to log image upload pricing to context for PaymentSkill to process.
|
1378
|
+
This should be removed in V0.3.0
|
1379
|
+
Log image upload pricing to context for PaymentSkill to process.
|
1380
|
+
Charges 5 cents per image upload.
|
1381
|
+
"""
|
1382
|
+
try:
|
1383
|
+
from webagents.server.context.context_vars import get_context
|
1384
|
+
context = get_context()
|
1385
|
+
|
1386
|
+
if context:
|
1387
|
+
# Initialize usage list if not present
|
1388
|
+
if not hasattr(context, 'usage') or context.usage is None:
|
1389
|
+
context.usage = []
|
1390
|
+
|
1391
|
+
# Add tool usage record in the format PaymentSkill expects
|
1392
|
+
usage_record = {
|
1393
|
+
'type': 'tool',
|
1394
|
+
'tool_name': 'image_upload',
|
1395
|
+
'pricing': {
|
1396
|
+
'credits': 0.05,
|
1397
|
+
'reason': 'AI image upload to content API',
|
1398
|
+
'metadata': {
|
1399
|
+
'service': 'image_upload',
|
1400
|
+
'model': 'content_api'
|
1401
|
+
}
|
1402
|
+
}
|
1403
|
+
}
|
1404
|
+
|
1405
|
+
context.usage.append(usage_record)
|
1406
|
+
self.logger.info(f"💰 Logged image upload pricing: 0.05 credits - AI image upload to content API")
|
1407
|
+
else:
|
1408
|
+
self.logger.warning("No context available to log image upload pricing")
|
1409
|
+
|
1410
|
+
except Exception as e:
|
1411
|
+
self.logger.error(f"Error logging image upload pricing: {e}")
|
1412
|
+
|
534
1413
|
async def generate_embedding(self, text: str, model: Optional[str] = None) -> List[float]:
|
535
1414
|
"""Generate embeddings (placeholder for V2.1)"""
|
536
1415
|
# This would use LiteLLM's embedding support in V2.1
|