webagents 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. webagents/__init__.py +9 -0
  2. webagents/agents/core/base_agent.py +865 -69
  3. webagents/agents/core/handoffs.py +14 -6
  4. webagents/agents/skills/base.py +33 -2
  5. webagents/agents/skills/core/llm/litellm/skill.py +906 -27
  6. webagents/agents/skills/core/memory/vector_memory/skill.py +8 -16
  7. webagents/agents/skills/ecosystem/openai/__init__.py +6 -0
  8. webagents/agents/skills/ecosystem/openai/skill.py +867 -0
  9. webagents/agents/skills/ecosystem/replicate/README.md +440 -0
  10. webagents/agents/skills/ecosystem/replicate/__init__.py +10 -0
  11. webagents/agents/skills/ecosystem/replicate/skill.py +517 -0
  12. webagents/agents/skills/examples/__init__.py +6 -0
  13. webagents/agents/skills/examples/music_player.py +329 -0
  14. webagents/agents/skills/robutler/handoff/__init__.py +6 -0
  15. webagents/agents/skills/robutler/handoff/skill.py +191 -0
  16. webagents/agents/skills/robutler/nli/skill.py +180 -24
  17. webagents/agents/skills/robutler/payments/exceptions.py +27 -7
  18. webagents/agents/skills/robutler/payments/skill.py +64 -14
  19. webagents/agents/skills/robutler/storage/files/skill.py +2 -2
  20. webagents/agents/tools/decorators.py +243 -47
  21. webagents/agents/widgets/__init__.py +6 -0
  22. webagents/agents/widgets/renderer.py +150 -0
  23. webagents/server/core/app.py +130 -15
  24. webagents/server/core/models.py +1 -1
  25. webagents/utils/logging.py +13 -1
  26. {webagents-0.2.2.dist-info → webagents-0.2.3.dist-info}/METADATA +8 -25
  27. {webagents-0.2.2.dist-info → webagents-0.2.3.dist-info}/RECORD +30 -20
  28. webagents/agents/skills/ecosystem/openai_agents/__init__.py +0 -0
  29. {webagents-0.2.2.dist-info → webagents-0.2.3.dist-info}/WHEEL +0 -0
  30. {webagents-0.2.2.dist-info → webagents-0.2.3.dist-info}/entry_points.txt +0 -0
  31. {webagents-0.2.2.dist-info → webagents-0.2.3.dist-info}/licenses/LICENSE +0 -0
@@ -21,16 +21,42 @@ import os
21
21
  import json
22
22
  import time
23
23
  import asyncio
24
+ import base64
25
+ import uuid
26
+ import hashlib
27
+ import tempfile
24
28
  from typing import Dict, Any, List, Optional, AsyncGenerator, Union, TYPE_CHECKING
29
+ import re
25
30
  from dataclasses import dataclass
26
31
 
32
+ try:
33
+ import httpx
34
+ HTTPX_AVAILABLE = True
35
+ except ImportError:
36
+ HTTPX_AVAILABLE = False
37
+ httpx = None
38
+
39
+ try:
40
+ from webagents.agents.skills.robutler.payments import pricing
41
+ PRICING_AVAILABLE = True
42
+ except ImportError:
43
+ # Fallback: create a no-op decorator if pricing is not available
44
+ def pricing(**kwargs):
45
+ def decorator(func):
46
+ return func
47
+ return decorator
48
+ PRICING_AVAILABLE = False
49
+
50
+
27
51
  try:
28
52
  import litellm
29
- from litellm import acompletion
53
+ from litellm import acompletion, token_counter, register_model
30
54
  LITELLM_AVAILABLE = True
31
55
  except Exception:
32
56
  LITELLM_AVAILABLE = False
33
57
  litellm = None
58
+ token_counter = None
59
+ register_model = None
34
60
 
35
61
  if TYPE_CHECKING:
36
62
  from webagents.agents.core.base_agent import BaseAgent
@@ -72,6 +98,18 @@ class LiteLLMSkill(Skill):
72
98
  "claude-3-opus": ModelConfig("claude-3-opus", "anthropic", 4096, True, True),
73
99
  "claude-4-opus": ModelConfig("claude-4-opus", "anthropic", 8192, True, True),
74
100
 
101
+ # Google Vertex AI (Gemini)
102
+ "vertex_ai/gemini-2.5-pro": ModelConfig("vertex_ai/gemini-2.5-pro", "google", 8192, True, True),
103
+ "vertex_ai/gemini-2.5-flash": ModelConfig("vertex_ai/gemini-2.5-flash", "google", 8192, True, True),
104
+ "vertex_ai/gemini-2.5-flash-image": ModelConfig("vertex_ai/gemini-2.5-flash-image", "google", 8192, True, True),
105
+ "gemini-2.5-pro": ModelConfig("gemini-2.5-pro", "google", 8192, True, True),
106
+ "gemini-2.5-flash": ModelConfig("gemini-2.5-flash", "google", 8192, True, True),
107
+ "gemini-2.5-flash-image": ModelConfig("gemini-2.5-flash-image", "google", 8192, True, True),
108
+ "gemini-pro": ModelConfig("gemini-pro", "google", 8192, True, True),
109
+ "gemini-flash": ModelConfig("gemini-flash", "google", 8192, True, True),
110
+ "gemini-image-preview": ModelConfig("gemini-image-preview", "google", 8192, True, True),
111
+ "gemini-flash-image": ModelConfig("gemini-flash-image", "google", 8192, True, True),
112
+
75
113
  # XAI/Grok
76
114
  "xai/grok-4": ModelConfig("xai/grok-4", "xai", 8192, True, True),
77
115
  "grok-4": ModelConfig("grok-4", "xai", 8192, True, True),
@@ -88,6 +126,8 @@ class LiteLLMSkill(Skill):
88
126
  self.temperature = config.get('temperature', 0.7) if config else 0.7
89
127
  self.max_tokens = config.get('max_tokens') if config else None
90
128
  self.fallback_models = config.get('fallback_models', []) if config else []
129
+ self.custom_llm_provider = config.get('custom_llm_provider') if config else None
130
+ self.disable_streaming = bool(config.get('disable_streaming')) if config else False
91
131
 
92
132
  # API configuration
93
133
  self.api_keys = self._load_api_keys(config)
@@ -102,6 +142,77 @@ class LiteLLMSkill(Skill):
102
142
  # Validate LiteLLM availability
103
143
  if not LITELLM_AVAILABLE:
104
144
  raise ImportError("LiteLLM not available. Install with: pip install litellm")
145
+
146
+ # Register Gemini 2.5 experimental models pricing
147
+ # Official pricing from https://ai.google.dev/gemini-api/docs/models
148
+ # IMPORTANT: Register both with and without vertex_ai/ prefix for compatibility
149
+ if LITELLM_AVAILABLE and register_model:
150
+ try:
151
+ gemini_models = {
152
+ # Gemini Flash - base model (alias for gemini-2.5-flash-thinking)
153
+ "gemini-flash": {
154
+ "max_tokens": 65535,
155
+ "max_input_tokens": 1048576,
156
+ "max_output_tokens": 65535,
157
+ "input_cost_per_token": 0.0000003, # $0.30 per 1M tokens
158
+ "output_cost_per_token": 0.0000025, # $2.50 per 1M tokens
159
+ "cache_read_input_token_cost": 0.000000075, # $0.075 per 1M cached tokens
160
+ "litellm_provider": "vertex_ai",
161
+ "mode": "chat",
162
+ "supports_function_calling": True,
163
+ "supports_vision": True
164
+ },
165
+ # Gemini 2.5 Flash Thinking - standard reasoning model
166
+ "gemini-2.5-flash-thinking": {
167
+ "max_tokens": 65535,
168
+ "max_input_tokens": 1048576,
169
+ "max_output_tokens": 65535,
170
+ "input_cost_per_token": 0.0000003, # $0.30 per 1M tokens
171
+ "output_cost_per_token": 0.0000025, # $2.50 per 1M tokens
172
+ "cache_read_input_token_cost": 0.000000075, # $0.075 per 1M cached tokens
173
+ "litellm_provider": "vertex_ai",
174
+ "mode": "chat",
175
+ "supports_function_calling": True,
176
+ "supports_vision": True
177
+ },
178
+ # Gemini 2.5 Flash Image Preview - experimental image model (more expensive output)
179
+ "gemini-2.5-flash-image": {
180
+ "max_tokens": 65535,
181
+ "max_input_tokens": 1048576,
182
+ "max_output_tokens": 65535,
183
+ "input_cost_per_token": 0.0000003, # $0.30 per 1M tokens
184
+ "output_cost_per_token": 0.00003, # $30 per 1M tokens (image model premium)
185
+ "cache_read_input_token_cost": 0.000000075, # $0.075 per 1M cached tokens
186
+ "litellm_provider": "vertex_ai",
187
+ "mode": "chat",
188
+ "supports_function_calling": True,
189
+ "supports_vision": True
190
+ },
191
+ # Alias for gemini-flash-image (same pricing as standard flash)
192
+ "gemini-flash-image": {
193
+ "max_tokens": 65535,
194
+ "max_input_tokens": 1048576,
195
+ "max_output_tokens": 65535,
196
+ "input_cost_per_token": 0.0000003, # $0.30 per 1M tokens
197
+ "output_cost_per_token": 0.0000025, # $2.50 per 1M tokens
198
+ "cache_read_input_token_cost": 0.000000075, # $0.075 per 1M cached tokens
199
+ "litellm_provider": "vertex_ai",
200
+ "mode": "chat",
201
+ "supports_function_calling": True,
202
+ "supports_vision": True
203
+ }
204
+ }
205
+
206
+ # Register models with and without vertex_ai/ prefix
207
+ models_to_register = {}
208
+ for model_name, config in gemini_models.items():
209
+ models_to_register[model_name] = config
210
+ models_to_register[f"vertex_ai/{model_name}"] = config.copy()
211
+
212
+ register_model(models_to_register)
213
+ except Exception:
214
+ # Silent fail - not critical
215
+ pass
105
216
 
106
217
  def _load_api_keys(self, config: Dict[str, Any] = None) -> Dict[str, str]:
107
218
  """Load API keys from config and environment - CONFIG HAS PRIORITY"""
@@ -126,8 +237,9 @@ class LiteLLMSkill(Skill):
126
237
  return keys
127
238
 
128
239
  async def initialize(self, agent: 'BaseAgent') -> None:
129
- """Initialize LiteLLM skill"""
240
+ """Initialize LiteLLM skill and register as handoff"""
130
241
  from webagents.utils.logging import get_logger, log_skill_event
242
+ from webagents.agents.skills.base import Handoff
131
243
 
132
244
  self.agent = agent
133
245
  self.logger = get_logger('skill.llm.litellm', agent.name)
@@ -146,6 +258,27 @@ class LiteLLMSkill(Skill):
146
258
  litellm.set_verbose = False # We handle logging ourselves
147
259
  litellm.drop_params = True # Drop unsupported parameters
148
260
 
261
+ # Register as handoff (completion handler)
262
+ # Priority=10 (high priority - likely to be the default for local LLMs)
263
+ # NOTE: We register the STREAMING function so it works in both modes:
264
+ # - Streaming: Returns generator directly
265
+ # - Non-streaming: Agent consumes generator and reconstructs response
266
+ agent.register_handoff(
267
+ Handoff(
268
+ target=f"litellm_{self.model.replace('/', '_')}",
269
+ description=f"LiteLLM completion handler using {self.model}",
270
+ scope="all",
271
+ metadata={
272
+ 'function': self.chat_completion_stream,
273
+ 'priority': 10,
274
+ 'is_generator': True # chat_completion_stream is async generator
275
+ }
276
+ ),
277
+ source="litellm"
278
+ )
279
+
280
+ self.logger.info(f"📨 Registered LiteLLM as handoff with model: {self.model}")
281
+
149
282
  log_skill_event(agent.name, 'litellm', 'initialized', {
150
283
  'model': self.model,
151
284
  'temperature': self.temperature,
@@ -239,7 +372,14 @@ class LiteLLMSkill(Skill):
239
372
  """
240
373
  Create a streaming chat completion using LiteLLM
241
374
  """
242
-
375
+ # If streaming is disabled for this skill, fallback to non-streaming and yield once
376
+ if self.disable_streaming:
377
+ non_stream_response = await self.chat_completion(messages, model=model, tools=tools, stream=False, **kwargs)
378
+ # Normalize into a single streaming-style chunk
379
+ normalized = self._normalize_response(non_stream_response, model or self.current_model)
380
+ yield normalized
381
+ return
382
+
243
383
  target_model = model or self.current_model
244
384
 
245
385
  try:
@@ -293,7 +433,7 @@ class LiteLLMSkill(Skill):
293
433
  return self.api_keys.get('anthropic')
294
434
  elif model.startswith('xai/') or model.startswith('grok') or model == 'grok-4':
295
435
  return self.api_keys.get('xai')
296
- elif model.startswith('google/') or model.startswith('gemini'):
436
+ elif model.startswith('google/') or model.startswith('gemini') or model.startswith('vertex_ai/'):
297
437
  return self.api_keys.get('google')
298
438
  else:
299
439
  # Try to find a matching provider from model configs
@@ -303,6 +443,271 @@ class LiteLLMSkill(Skill):
303
443
  # Fallback to default
304
444
  return self.api_keys.get('openai')
305
445
 
446
+
447
+ async def _upload_image_to_content_api(self, image_base64_url: str, model: str) -> str:
448
+ """
449
+ Upload base64 image data to content API and return a public URL.
450
+ Similar to openlicense skill approach.
451
+ """
452
+ if not HTTPX_AVAILABLE:
453
+ self.logger.warning("httpx not available, cannot upload image to content API")
454
+ return image_base64_url
455
+
456
+ try:
457
+ # Extract base64 data from data URL
458
+ if not image_base64_url.startswith('data:image/'):
459
+ self.logger.warning(f"Invalid image URL format: {image_base64_url[:50]}...")
460
+ return image_base64_url # Return as-is if not a data URL
461
+
462
+ # Parse the data URL: data:image/png;base64,<base64_data>
463
+ header, base64_data = image_base64_url.split(',', 1)
464
+ image_format = 'png' # Default to PNG
465
+
466
+ # Extract format from header if available
467
+ if 'image/' in header:
468
+ try:
469
+ format_part = header.split('image/')[1].split(';')[0]
470
+ if format_part in ['png', 'jpeg', 'jpg', 'webp']:
471
+ image_format = format_part
472
+ except:
473
+ pass # Use default PNG
474
+
475
+ # Decode base64 data
476
+ image_data = base64.b64decode(base64_data)
477
+ self.logger.debug(f"Decoded image data: {len(image_data)} bytes, format: {image_format}")
478
+
479
+ # Generate a short filename
480
+ short_id = hashlib.md5(str(uuid.uuid4()).encode()).hexdigest()[:8]
481
+ filename = f"gemini_{short_id}.{image_format}"
482
+
483
+ # Get portal URL from environment (same as openlicense skill)
484
+ portal_url = os.getenv("ROBUTLER_INTERNAL_API_URL", "https://robutler.ai")
485
+ upload_url = f"{portal_url}/api/content"
486
+
487
+ # Prepare metadata for upload
488
+ description = f"AI-generated image from {model}"
489
+ tags = ['ai-generated', 'gemini', 'litellm']
490
+
491
+ # Create form data for upload
492
+ files = {
493
+ 'file': (filename, image_data, f'image/{image_format}')
494
+ }
495
+
496
+ data = {
497
+ 'description': description,
498
+ 'tags': ','.join(tags),
499
+ 'userId': 'gemini-agent', # Store under agent account like openlicense
500
+ 'visibility': 'public'
501
+ }
502
+
503
+ # Get API key from context (similar to openlicense approach)
504
+ try:
505
+ from webagents.server.context.context_vars import get_context
506
+ context = get_context()
507
+ api_key = None
508
+
509
+ if context:
510
+ # Try multiple possible key names
511
+ api_key = (context.get("api_key") or
512
+ context.get("robutler_api_key") or
513
+ context.get("agent_api_key") or
514
+ getattr(context, 'api_key', None))
515
+
516
+ # Also try to get from identity info or token info
517
+ if not api_key:
518
+ identity_info = context.get("identity_info")
519
+ if identity_info and isinstance(identity_info, dict):
520
+ api_key = identity_info.get("api_key")
521
+
522
+ if not api_key:
523
+ token_info = context.get("token_info")
524
+ if token_info and isinstance(token_info, dict):
525
+ api_key = token_info.get("api_key")
526
+
527
+ # Fallback to skill config
528
+ if not api_key and hasattr(self, 'config') and self.config:
529
+ api_key = self.config.get('robutler_api_key')
530
+
531
+ # Try environment variables as last resort
532
+ if not api_key:
533
+ api_key = os.getenv('ROBUTLER_API_KEY') or os.getenv('API_KEY')
534
+
535
+ if not api_key:
536
+ self.logger.warning("No API key found for content upload, trying without authentication")
537
+ # Don't return early - try the upload anyway, it might work without auth in dev mode
538
+
539
+ headers = {}
540
+ if api_key:
541
+ headers['Authorization'] = f'Bearer {api_key}'
542
+
543
+ # Upload the image
544
+ async with httpx.AsyncClient(timeout=30.0) as client:
545
+ response = await client.post(upload_url, files=files, data=data, headers=headers)
546
+
547
+ if response.status_code == 200:
548
+ result = response.json()
549
+ public_url = result.get('publicUrl')
550
+
551
+ if public_url:
552
+ # Rewrite URL to chat server if needed (like openlicense)
553
+ chat_base = (os.getenv('ROBUTLER_CHAT_URL') or 'http://localhost:3001').rstrip('/')
554
+ if public_url.startswith('/api/content/public'):
555
+ public_url = f"{chat_base}{public_url}"
556
+
557
+ self.logger.info(f"Successfully uploaded image: {filename} -> {public_url}")
558
+ return public_url
559
+ else:
560
+ self.logger.error(f"Upload successful but no publicUrl in response: {result}")
561
+ return image_base64_url
562
+ else:
563
+ self.logger.error(f"Failed to upload image: {response.status_code} - {response.text}")
564
+ return image_base64_url
565
+
566
+ except Exception as e:
567
+ self.logger.error(f"Error during image upload: {e}")
568
+ return image_base64_url
569
+
570
+ except Exception as e:
571
+ self.logger.error(f"Failed to process image for upload: {e}")
572
+ return image_base64_url
573
+
574
+ def _truncate_data_urls_in_params(self, params: Dict[str, Any]) -> Dict[str, Any]:
575
+ """Truncate data URLs in params for safe logging"""
576
+ import copy
577
+ safe_params = copy.deepcopy(params)
578
+
579
+ messages = safe_params.get('messages', [])
580
+ for msg in messages:
581
+ content = msg.get('content')
582
+ if isinstance(content, list):
583
+ for part in content:
584
+ if isinstance(part, dict) and part.get('type') == 'image_url':
585
+ url = part.get('image_url', {}).get('url', '')
586
+ if url.startswith('data:') and len(url) > 100:
587
+ # Truncate data URL
588
+ prefix = url.split(',', 1)[0] if ',' in url else url[:50]
589
+ part['image_url']['url'] = f"{prefix},...[TRUNCATED {len(url)} bytes]"
590
+ elif isinstance(content, str) and content.startswith('data:') and len(content) > 100:
591
+ prefix = content.split(',', 1)[0] if ',' in content else content[:50]
592
+ msg['content'] = f"{prefix},...[TRUNCATED {len(content)} bytes]"
593
+
594
+ return safe_params
595
+
596
+ def _optimize_vertex_ai_params(self, params: Dict[str, Any], model: str) -> Dict[str, Any]:
597
+ """Optimize parameters for Vertex AI models"""
598
+ optimized_params = params.copy()
599
+
600
+ # Check if this is a Vertex AI model
601
+ is_vertex_model = (
602
+ model.startswith('vertex_ai/') or
603
+ model.startswith('gemini-') or
604
+ 'vertex' in model.lower()
605
+ )
606
+
607
+ if is_vertex_model:
608
+ is_image_model = "image" in model.lower()
609
+ has_tools = "tools" in optimized_params and optimized_params.get("tools")
610
+
611
+ # Always include usage for streaming requests (tools + images supported per latest guidance)
612
+ if optimized_params.get('stream'):
613
+ optimized_params["stream_options"] = {"include_usage": True}
614
+
615
+ # If image model and tools are provided in OpenAI format, convert to Vertex function_declarations
616
+ if is_image_model and has_tools:
617
+ try:
618
+ tools_value = optimized_params.get("tools")
619
+ # If tools is already an object with function_declarations, keep as-is
620
+ if isinstance(tools_value, dict) and "function_declarations" in tools_value:
621
+ pass
622
+ else:
623
+ # Expect OpenAI-format list -> convert
624
+ if isinstance(tools_value, list):
625
+ fdecls = []
626
+ for t in tools_value:
627
+ if isinstance(t, dict) and t.get("type") == "function" and "function" in t:
628
+ fdecls.append(t["function"])
629
+ if fdecls:
630
+ optimized_params["tools"] = {"function_declarations": fdecls}
631
+ self.logger.debug(f"Converted tools to function_declarations for {model}")
632
+ except Exception as e:
633
+ self.logger.debug(f"Tool conversion skipped due to error: {e}")
634
+
635
+ # Set optimal temperature for Vertex AI if not specified
636
+ if 'temperature' not in params or params['temperature'] is None:
637
+ optimized_params['temperature'] = 0.7
638
+
639
+ # Ensure reasonable token limits for Vertex AI
640
+ if not optimized_params.get('max_tokens') and not self.max_tokens:
641
+ optimized_params['max_tokens'] = 8192
642
+
643
+ return optimized_params
644
+
645
+ def _convert_markdown_images_to_multimodal(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
646
+ """
647
+ Convert markdown image links to multimodal format for vision models.
648
+ This allows LLMs to see images visually while preserving URLs as text.
649
+
650
+ Pattern: ![alt](url) -> multimodal content with both text and image
651
+ """
652
+ markdown_image_pattern = re.compile(r'!\[([^\]]*)\]\(([^)]+)\)')
653
+ processed_messages = []
654
+
655
+ for message in messages:
656
+ msg_copy = dict(message)
657
+ content = msg_copy.get('content')
658
+
659
+ # Only process user/assistant messages with string content containing markdown images
660
+ if msg_copy.get('role') not in ('user', 'assistant') or not isinstance(content, str):
661
+ processed_messages.append(msg_copy)
662
+ continue
663
+
664
+ # Check if there are markdown images
665
+ markdown_images = markdown_image_pattern.findall(content)
666
+ if not markdown_images:
667
+ processed_messages.append(msg_copy)
668
+ continue
669
+
670
+ # Convert to multimodal format
671
+ content_parts = []
672
+ last_end = 0
673
+
674
+ for match in markdown_image_pattern.finditer(content):
675
+ # Add text before the image (including the markdown link for URL extraction)
676
+ text_chunk = content[last_end:match.end()].strip()
677
+ if text_chunk:
678
+ content_parts.append({
679
+ "type": "text",
680
+ "text": text_chunk
681
+ })
682
+
683
+ # Add the image part
684
+ alt_text, image_url = match.groups()
685
+ content_parts.append({
686
+ "type": "image_url",
687
+ "image_url": {
688
+ "url": image_url
689
+ }
690
+ })
691
+
692
+ last_end = match.end()
693
+
694
+ # Add any remaining text after the last image
695
+ text_after = content[last_end:].strip()
696
+ if text_after:
697
+ content_parts.append({
698
+ "type": "text",
699
+ "text": text_after
700
+ })
701
+
702
+ # Update message with multimodal content
703
+ if content_parts:
704
+ msg_copy['content'] = content_parts
705
+ self.logger.info(f"🖼️ Converted {len(markdown_images)} markdown image(s) to multimodal format")
706
+
707
+ processed_messages.append(msg_copy)
708
+
709
+ return processed_messages
710
+
306
711
  async def _execute_completion(self, messages: List[Dict[str, Any]],
307
712
  model: str,
308
713
  tools: Optional[List[Dict[str, Any]]] = None,
@@ -310,6 +715,16 @@ class LiteLLMSkill(Skill):
310
715
  **kwargs) -> Dict[str, Any]:
311
716
  """Execute a single completion request"""
312
717
 
718
+ # Convert markdown images to multimodal format for vision models
719
+ messages = self._convert_markdown_images_to_multimodal(messages)
720
+
721
+ # For Vertex AI image models, use direct HTTP to preserve custom fields
722
+ is_vertex_image_model = (
723
+ 'image' in model.lower() and
724
+ (model.startswith('vertex_ai/') or model.startswith('gemini-'))
725
+ )
726
+
727
+
313
728
  # Prepare parameters
314
729
  params = {
315
730
  "model": model,
@@ -317,11 +732,20 @@ class LiteLLMSkill(Skill):
317
732
  "temperature": kwargs.get('temperature', self.temperature),
318
733
  "stream": stream,
319
734
  # Ensure usage is available when streaming is requested later
735
+ # Note: stream_options will be set by _optimize_vertex_ai_params for supported models
320
736
  "stream_options": {"include_usage": True} if stream else None,
321
737
  }
738
+
739
+ # Force a specific provider routing when using an OpenAI-compatible proxy
740
+ # For image models, always use 'openai' to prevent response filtering and disable caching
741
+ if self.custom_llm_provider or is_vertex_image_model:
742
+ params["custom_llm_provider"] = self.custom_llm_provider or 'openai'
743
+ if is_vertex_image_model:
744
+ params["caching"] = False # Disable caching for image models
745
+ self.logger.debug(f"Using custom_llm_provider='openai' and disabled caching for image model {model}")
322
746
 
323
747
  # Add base URL if configured (for proxy support)
324
- if hasattr(self, 'config') and self.config and 'base_url' in self.config:
748
+ if self.config and 'base_url' in self.config:
325
749
  params["api_base"] = self.config['base_url']
326
750
 
327
751
  # Add max_tokens if specified
@@ -346,8 +770,10 @@ class LiteLLMSkill(Skill):
346
770
  if api_key:
347
771
  params["api_key"] = api_key
348
772
 
773
+ # Optimize parameters for Vertex AI models
774
+ params = self._optimize_vertex_ai_params(params, model)
775
+
349
776
  self.logger.debug(f"Executing completion with model {model}")
350
- self.logger.debug(f"Parameters: {params}")
351
777
 
352
778
  # Validate parameters before calling LiteLLM
353
779
  if not messages or not isinstance(messages, list):
@@ -366,7 +792,29 @@ class LiteLLMSkill(Skill):
366
792
  # Convert LiteLLM response to our format
367
793
  return self._normalize_response(response, model)
368
794
  except Exception as e:
369
- self.logger.error(f"LiteLLM completion failed with params: {params}")
795
+ # Log params summary without huge data URLs
796
+ message_summary = []
797
+ for msg in params.get('messages', []):
798
+ role = msg.get('role', '?')
799
+ content = msg.get('content', '')
800
+ if isinstance(content, list):
801
+ parts = []
802
+ for part in content:
803
+ if part.get('type') == 'image_url':
804
+ url = part.get('image_url', {}).get('url', '')
805
+ if url.startswith('data:'):
806
+ parts.append('[data:image]')
807
+ else:
808
+ parts.append(f'[image:{url[:30]}...]')
809
+ elif part.get('type') == 'text':
810
+ parts.append(f'"{part.get("text", "")[:50]}..."')
811
+ message_summary.append(f"{role}: [{', '.join(parts)}]")
812
+ else:
813
+ message_summary.append(f"{role}: {str(content)[:100]}...")
814
+
815
+ self.logger.error(f"LiteLLM completion failed for model={params.get('model')}")
816
+ self.logger.error(f"Messages: {'; '.join(message_summary)}")
817
+ self.logger.error(f"Tools: {len(params.get('tools', []))} tool(s)")
370
818
  self.logger.error(f"Error details: {type(e).__name__}: {str(e)}")
371
819
  raise
372
820
 
@@ -376,6 +824,15 @@ class LiteLLMSkill(Skill):
376
824
  **kwargs) -> AsyncGenerator[Dict[str, Any], None]:
377
825
  """Execute a streaming completion request"""
378
826
 
827
+ # Convert markdown images to multimodal format for vision models
828
+ messages = self._convert_markdown_images_to_multimodal(messages)
829
+
830
+ # For Vertex AI image models, use custom_llm_provider='openai' to prevent response filtering
831
+ is_vertex_image_model = (
832
+ 'image' in model.lower() and
833
+ (model.startswith('vertex_ai/') or model.startswith('gemini-'))
834
+ )
835
+
379
836
  # Prepare parameters (same as non-streaming)
380
837
  params = {
381
838
  "model": model,
@@ -383,8 +840,17 @@ class LiteLLMSkill(Skill):
383
840
  "temperature": kwargs.get('temperature', self.temperature),
384
841
  "stream": True,
385
842
  # Include a final usage chunk before [DONE] per LiteLLM docs
843
+ # Note: stream_options will be optimized by _optimize_vertex_ai_params for model compatibility
386
844
  "stream_options": {"include_usage": True},
387
845
  }
846
+
847
+ # Force a specific provider routing when using an OpenAI-compatible proxy
848
+ # For image models, always use 'openai' to prevent response filtering and disable caching
849
+ if self.custom_llm_provider or is_vertex_image_model:
850
+ params["custom_llm_provider"] = self.custom_llm_provider or 'openai'
851
+ if is_vertex_image_model:
852
+ params["caching"] = False # Disable caching for image models
853
+ self.logger.debug(f"Using custom_llm_provider='openai' and disabled caching for streaming image model {model}")
388
854
 
389
855
  # Add base URL if configured (for proxy support)
390
856
  if self.config and 'base_url' in self.config:
@@ -410,47 +876,173 @@ class LiteLLMSkill(Skill):
410
876
  if api_key:
411
877
  params["api_key"] = api_key
412
878
 
879
+ # Optimize parameters for Vertex AI models
880
+ params = self._optimize_vertex_ai_params(params, model)
881
+
413
882
  self.logger.debug(f"Executing streaming completion with model {model}")
414
883
 
884
+ # Add special handling for Vertex AI models
885
+ is_vertex_model = (
886
+ model.startswith('vertex_ai/') or
887
+ model.startswith('gemini-') or
888
+ 'vertex' in model.lower()
889
+ )
890
+
415
891
  # Execute streaming completion
416
892
  stream = await acompletion(**params)
417
893
 
894
+ chunk_count = 0
895
+ content_chunks = 0
896
+
418
897
  async for chunk in stream:
898
+ chunk_count += 1
899
+
419
900
  # Normalize and yield chunk
420
901
  normalized_chunk = self._normalize_streaming_chunk(chunk, model)
902
+ # After normalization, upload any data:image content inside the same loop/task
903
+ try:
904
+ normalized_chunk = await self._upload_and_rewrite_chunk_images(normalized_chunk, model)
905
+ except Exception:
906
+ pass
907
+
908
+ # Debug Vertex AI streaming
909
+ if is_vertex_model and chunk_count <= 3: # Log first few chunks for debugging
910
+ if isinstance(normalized_chunk, dict):
911
+ choices = normalized_chunk.get('choices', [])
912
+ if choices and len(choices) > 0:
913
+ delta = choices[0].get('delta', {})
914
+ if 'content' in delta and delta['content']:
915
+ content_chunks += 1
916
+ self.logger.debug(f"Vertex AI streaming chunk {chunk_count}: got content ({len(delta['content'])} chars)")
421
917
 
422
918
  # If LiteLLM sent a final usage chunk, log tokens to context.usage
423
919
  try:
424
920
  usage = normalized_chunk.get('usage') if isinstance(normalized_chunk, dict) else None
425
- is_final_usage_chunk = (
426
- usage
427
- and isinstance(usage, dict)
428
- and (not normalized_chunk.get('choices'))
429
- )
430
- if is_final_usage_chunk:
921
+ if usage and isinstance(usage, dict):
431
922
  prompt_tokens = int(usage.get('prompt_tokens') or 0)
432
923
  completion_tokens = int(usage.get('completion_tokens') or 0)
433
924
  total_tokens = int(usage.get('total_tokens') or (prompt_tokens + completion_tokens))
925
+
926
+ # DEBUG: Log all usage fields to see what Gemini sends
927
+ self.logger.info(f"🔍 USAGE CHUNK RECEIVED: {usage}")
928
+
434
929
  self._append_usage_record(model=model, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, total_tokens=total_tokens, streaming=True)
435
- except Exception:
930
+ except Exception as e:
436
931
  # Never break streaming on usage logging
932
+ self.logger.warning(f"💰 LiteLLM streaming: Failed to log usage: {e}")
437
933
  pass
438
934
  yield normalized_chunk
935
+
936
+ # Log streaming completion stats for Vertex AI models
937
+ if is_vertex_model:
938
+ self.logger.debug(f"Vertex AI streaming completed: {chunk_count} total chunks, {content_chunks} with content")
439
939
 
440
940
  def _normalize_response(self, response: Any, model: str) -> Dict[str, Any]:
441
- """Normalize LiteLLM response to OpenAI format"""
941
+ """Normalize LiteLLM response to OpenAI format and handle images"""
942
+
943
+ self.logger.info(f"🔍 _normalize_response called for model: {model}")
944
+
945
+ # Log the raw response object type and attributes
946
+ self.logger.debug(f"🔍 Raw response type: {type(response)}")
947
+ if hasattr(response, '__dict__'):
948
+ all_attrs = list(vars(response).keys())
949
+ self.logger.debug(f"🔍 Raw response attributes: {all_attrs[:20]}")
950
+
951
+ # Convert response to dict if needed
952
+ if hasattr(response, 'model_dump'):
953
+ response_dict = response.model_dump()
954
+ elif hasattr(response, 'dict'):
955
+ response_dict = response.dict()
956
+ elif isinstance(response, dict):
957
+ response_dict = response.copy()
958
+ else:
959
+ response_dict = dict(response) if response else {}
960
+
961
+ self.logger.debug(f"🔍 Response dict keys: {response_dict.keys()}")
962
+
963
+ # Check for response data in dict format
964
+ if 'choices' in response_dict and response_dict['choices']:
965
+ self.logger.debug(f"🔍 First choice keys: {response_dict['choices'][0].keys()}")
966
+ if 'message' in response_dict['choices'][0]:
967
+ msg = response_dict['choices'][0]['message']
968
+ self.logger.debug(f"🔍 Message keys: {msg.keys()}")
969
+ self.logger.debug(f"🔍 Message content length: {len(str(msg.get('content', '')))}")
970
+
971
+ # Ensure model name is correct and includes provider prefix for cost lookup
972
+ # Add vertex_ai/ prefix if it's a Gemini model without a provider prefix
973
+ if model and not model.startswith(('vertex_ai/', 'openai/', 'anthropic/', 'xai/')):
974
+ if 'gemini' in model.lower() or 'flash' in model.lower():
975
+ response_dict['model'] = f"vertex_ai/{model}"
976
+ else:
977
+ response_dict['model'] = model
978
+ else:
979
+ response_dict['model'] = model
442
980
 
443
- # LiteLLM already returns OpenAI-compatible format
444
- # Just ensure model name is correct
445
- if hasattr(response, 'model'):
446
- response.model = model
447
- elif isinstance(response, dict) and 'model' in response:
448
- response['model'] = model
981
+ # Handle custom image field from Gemini models
982
+ if 'choices' in response_dict and response_dict['choices']:
983
+ for choice in response_dict['choices']:
984
+ if 'message' in choice and choice['message']:
985
+ message = choice['message']
986
+
987
+ # Check for custom image field
988
+ if 'image' in message and message['image']:
989
+ image_data = message['image']
990
+
991
+ self.logger.info(f"Found image field in non-streaming response for {model}")
992
+
993
+ # Convert image to markdown format for display
994
+ if 'url' in image_data and image_data['url']:
995
+ image_url = image_data['url']
996
+
997
+ # Upload all base64 images to content API for better performance and reliability
998
+ if image_url.startswith('data:image/'):
999
+ self.logger.info(f"Base64 image detected ({len(image_url)} chars), uploading to content API")
1000
+ try:
1001
+ # Handle async upload in sync context
1002
+ import asyncio
1003
+ import concurrent.futures
1004
+
1005
+ def run_upload():
1006
+ return asyncio.run(self._upload_image_to_content_api(image_url, model))
1007
+
1008
+ try:
1009
+ loop = asyncio.get_event_loop()
1010
+ if loop.is_running():
1011
+ # If loop is already running, run in a separate thread
1012
+ with concurrent.futures.ThreadPoolExecutor() as executor:
1013
+ future = executor.submit(run_upload)
1014
+ uploaded_url = future.result(timeout=30)
1015
+ else:
1016
+ uploaded_url = loop.run_until_complete(self._upload_image_to_content_api(image_url, model))
1017
+ except RuntimeError:
1018
+ # Fallback: run in new event loop
1019
+ uploaded_url = run_upload()
1020
+
1021
+ if uploaded_url != image_url: # Only update if upload was successful
1022
+ image_url = uploaded_url
1023
+ self.logger.info(f"Successfully uploaded image to: {uploaded_url}")
1024
+ except Exception as e:
1025
+ self.logger.error(f"Failed to upload image: {e}, using original URL")
1026
+
1027
+ # Create markdown image syntax
1028
+ image_markdown = f"![Generated Image]({image_url})"
1029
+
1030
+ # Append to content or replace if no content
1031
+ current_content = message.get('content') or ''
1032
+ if current_content:
1033
+ message['content'] = f"{current_content}\n\n{image_markdown}"
1034
+ else:
1035
+ message['content'] = image_markdown
1036
+
1037
+ self.logger.info(f"Converted image field to markdown for model {model}")
1038
+
1039
+ # Remove the custom image field since we've converted it
1040
+ del message['image']
449
1041
 
450
- return response
1042
+ return response_dict
451
1043
 
452
1044
  def _normalize_streaming_chunk(self, chunk: Any, model: str) -> Dict[str, Any]:
453
- """Normalize LiteLLM streaming chunk to OpenAI format"""
1045
+ """Normalize LiteLLM streaming chunk to OpenAI format and handle images"""
454
1046
 
455
1047
  # Convert chunk to dictionary if it's not already
456
1048
  if hasattr(chunk, 'model_dump'):
@@ -473,10 +1065,84 @@ class LiteLLMSkill(Skill):
473
1065
  # Fallback - return as-is and hope for the best
474
1066
  return chunk
475
1067
 
476
- # Ensure model name is correct
477
- chunk_dict['model'] = model
1068
+ # Ensure model name is correct and includes provider prefix for cost lookup
1069
+ # Add vertex_ai/ prefix if it's a Gemini model without a provider prefix
1070
+ if model and not model.startswith(('vertex_ai/', 'openai/', 'anthropic/', 'xai/')):
1071
+ if 'gemini' in model.lower() or 'flash' in model.lower():
1072
+ chunk_dict['model'] = f"vertex_ai/{model}"
1073
+ else:
1074
+ chunk_dict['model'] = model
1075
+ else:
1076
+ chunk_dict['model'] = model
1077
+
1078
+ # Handle custom image field from Gemini models in streaming
1079
+ if 'choices' in chunk_dict and chunk_dict['choices']:
1080
+ for choice in chunk_dict['choices']:
1081
+ # Check both delta and message for image data
1082
+ for message_key in ['delta', 'message']:
1083
+ if message_key in choice and choice[message_key]:
1084
+ message = choice[message_key]
1085
+
1086
+ # Check for custom image field
1087
+ if 'image' in message and message['image']:
1088
+ self.logger.info(f"🔍 STREAMING: Found 'image' field in {message_key}")
1089
+ image_data = message['image']
1090
+
1091
+ # Convert image to markdown format for display (do not upload here)
1092
+ if 'url' in image_data and image_data['url']:
1093
+ image_url = image_data['url']
1094
+ self.logger.info(f"🔍 STREAMING: Image URL: {image_url[:100]}...")
1095
+
1096
+ # Create markdown image syntax
1097
+ image_markdown = f"\n\n![Generated Image]({image_url})"
1098
+
1099
+ # For streaming, replace any existing content with the image markdown
1100
+ message['content'] = image_markdown
1101
+
1102
+ self.logger.info(f"Converted streaming image field to markdown for model {model}")
1103
+
1104
+ # Remove the custom image field since we've converted it
1105
+ del message['image']
478
1106
 
479
1107
  return chunk_dict
1108
+
1109
+ async def _upload_and_rewrite_chunk_images(self, chunk: Dict[str, Any], model: str) -> Dict[str, Any]:
1110
+ """
1111
+ Detect data:image URLs in chunk content, upload to content API, rewrite URLs,
1112
+ and log pricing. Runs inside the streaming loop so context is available.
1113
+ """
1114
+ try:
1115
+ if not chunk or 'choices' not in chunk:
1116
+ return chunk
1117
+ data_url_pattern = re.compile(r"data:image/[a-zA-Z]+;base64,[A-Za-z0-9+/=]+")
1118
+ image_logged = False
1119
+ for choice in chunk['choices']:
1120
+ for key in ['delta', 'message']:
1121
+ if key in choice and choice[key] and isinstance(choice[key], dict):
1122
+ msg = choice[key]
1123
+ content = msg.get('content')
1124
+ if isinstance(content, str):
1125
+ matches = list(data_url_pattern.finditer(content))
1126
+ if not matches:
1127
+ continue
1128
+ new_content = content
1129
+ for m in matches:
1130
+ data_url = m.group(0)
1131
+ try:
1132
+ public_url = await self._upload_image_to_content_api(data_url, model)
1133
+ if public_url and public_url != data_url:
1134
+ new_content = new_content.replace(data_url, public_url)
1135
+ # Always log one pricing record per uploaded image URL
1136
+ self._log_image_upload_pricing()
1137
+ image_logged = True
1138
+ except Exception as e:
1139
+ self.logger.error(f"Image upload failed during streaming rewrite: {e}")
1140
+ if new_content != content:
1141
+ msg['content'] = new_content
1142
+ return chunk
1143
+ except Exception as e:
1144
+ self.logger.error(f"Error rewriting streaming image URLs: {e}")
1145
+ return chunk
480
1146
 
481
1147
  def _append_usage_record(
482
1148
  self,
@@ -492,7 +1158,8 @@ class LiteLLMSkill(Skill):
492
1158
  context = get_context()
493
1159
  if not context or not hasattr(context, 'usage'):
494
1160
  return
495
- context.usage.append({
1161
+
1162
+ record = {
496
1163
  'type': 'llm',
497
1164
  'skill': 'litellm',
498
1165
  'model': model,
@@ -501,7 +1168,8 @@ class LiteLLMSkill(Skill):
501
1168
  'total_tokens': int(total_tokens or 0),
502
1169
  'streaming': bool(streaming),
503
1170
  'timestamp': time.time(),
504
- })
1171
+ }
1172
+ context.usage.append(record)
505
1173
  except Exception:
506
1174
  # Do not raise from logging
507
1175
  return
@@ -531,6 +1199,217 @@ class LiteLLMSkill(Skill):
531
1199
 
532
1200
  return str(response)
533
1201
 
1202
+ async def _upload_image_to_content_api(self, image_base64_url: str, model: str) -> str:
1203
+ """
1204
+ Upload base64 image data to content API and return a public URL.
1205
+ Uses the same approach as the openlicense skill.
1206
+ Charges 5 cents per image upload.
1207
+ """
1208
+ if not HTTPX_AVAILABLE:
1209
+ self.logger.warning("httpx not available, cannot upload image to content API")
1210
+ return image_base64_url
1211
+
1212
+ try:
1213
+ # Extract base64 data from data URL
1214
+ if not image_base64_url.startswith('data:image/'):
1215
+ self.logger.warning(f"Invalid image URL format: {image_base64_url[:50]}...")
1216
+ return image_base64_url # Return as-is if not a data URL
1217
+
1218
+ # Parse the data URL: data:image/png;base64,<data>
1219
+ try:
1220
+ header, data = image_base64_url.split(',', 1)
1221
+ image_format = header.split('/')[1].split(';')[0] # Extract format (png, jpeg, etc.)
1222
+ image_data = base64.b64decode(data)
1223
+ except Exception as e:
1224
+ self.logger.error(f"Failed to parse base64 image data: {e}")
1225
+ return image_base64_url
1226
+
1227
+ # Get API key - try multiple sources with comprehensive fallbacks
1228
+ agent_api_key = None
1229
+
1230
+ # Method 1: Try context first (same approach as openlicense skill)
1231
+ try:
1232
+ from webagents.server.context.context_vars import get_context
1233
+ context = get_context()
1234
+
1235
+ if context:
1236
+ # Try multiple possible keys (same as openlicense)
1237
+ agent_api_key = (context.get("api_key") or
1238
+ context.get("robutler_api_key") or
1239
+ context.get("agent_api_key") or
1240
+ getattr(context, 'api_key', None))
1241
+
1242
+ if agent_api_key:
1243
+ self.logger.info(f"Found agent API key from context: {agent_api_key[:10]}...{agent_api_key[-4:] if len(agent_api_key) > 14 else ''}")
1244
+ else:
1245
+ self.logger.debug("No agent API key found in context")
1246
+ else:
1247
+ self.logger.debug("No context available")
1248
+ except Exception as e:
1249
+ self.logger.debug(f"Error accessing context: {e}")
1250
+
1251
+ # Method 2: Try skill's own config (from dynamic factory)
1252
+ if not agent_api_key and hasattr(self, 'config') and self.config:
1253
+ self.logger.debug("Trying to get API key from skill config...")
1254
+ try:
1255
+ # For r-banana, the API key should be in api_keys dict
1256
+ if isinstance(self.config, dict):
1257
+ # Try robutler_api_key first
1258
+ agent_api_key = self.config.get('robutler_api_key')
1259
+
1260
+ # Try api_keys dict (from dynamic factory)
1261
+ if not agent_api_key and 'api_keys' in self.config:
1262
+ api_keys = self.config['api_keys']
1263
+ if isinstance(api_keys, dict):
1264
+ # Try different provider keys
1265
+ agent_api_key = (api_keys.get('azure') or
1266
+ api_keys.get('openai') or
1267
+ api_keys.get('anthropic') or
1268
+ api_keys.get('google'))
1269
+
1270
+ if agent_api_key:
1271
+ self.logger.info(f"Found agent API key from skill config: {agent_api_key[:10]}...{agent_api_key[-4:] if len(agent_api_key) > 14 else ''}")
1272
+ else:
1273
+ self.logger.debug(f"No API key found in skill config. Config keys: {list(self.config.keys())}")
1274
+ if 'api_keys' in self.config:
1275
+ self.logger.debug(f"api_keys dict keys: {list(self.config['api_keys'].keys()) if isinstance(self.config['api_keys'], dict) else 'not a dict'}")
1276
+ except (KeyError, TypeError, AttributeError) as e:
1277
+ self.logger.debug(f"Could not access skill config: {e}")
1278
+
1279
+ # Method 3: Try environment variables as last resort
1280
+ if not agent_api_key:
1281
+ self.logger.debug("Trying environment variables as fallback...")
1282
+ agent_api_key = (os.getenv('ROBUTLER_API_KEY') or
1283
+ os.getenv('API_KEY') or
1284
+ os.getenv('AGENT_API_KEY'))
1285
+ if agent_api_key:
1286
+ self.logger.info(f"Found agent API key from environment: {agent_api_key[:10]}...{agent_api_key[-4:] if len(agent_api_key) > 14 else ''}")
1287
+ else:
1288
+ self.logger.debug("No API key found in environment variables")
1289
+
1290
+ if not agent_api_key:
1291
+ self.logger.error("No agent API key found anywhere - content upload will fail")
1292
+ return image_base64_url # Fallback to original URL
1293
+
1294
+ # Get portal URL (same as openlicense)
1295
+ portal_url = os.getenv('ROBUTLER_INTERNAL_API_URL', 'http://localhost:3000')
1296
+ upload_url = f"{portal_url}/api/content"
1297
+
1298
+ # Get agent ID for access scope
1299
+ agent_id = None
1300
+ if context:
1301
+ agent_id = context.get("agent_id") or context.get("current_agent_id")
1302
+ if agent_id:
1303
+ self.logger.debug(f"Found agent_id in context for access scope: {agent_id}")
1304
+
1305
+ # Prepare file data
1306
+ filename = f"ai_image_{uuid.uuid4().hex[:8]}.{image_format}"
1307
+ files = {
1308
+ 'file': (filename, image_data, f'image/{image_format}')
1309
+ }
1310
+
1311
+ # Prepare metadata
1312
+ description = f"AI-generated image from {model}"
1313
+ tags = ['ai-generated', 'litellm-skill', model.replace('/', '-')]
1314
+
1315
+ # Prepare agent access
1316
+ grant_agent_access = []
1317
+ if agent_id:
1318
+ grant_agent_access.append(agent_id)
1319
+ self.logger.debug(f"Granting agent access to: {agent_id}")
1320
+
1321
+ data = {
1322
+ 'visibility': 'public',
1323
+ 'description': description,
1324
+ 'tags': json.dumps(tags),
1325
+ 'grantAgentAccess': json.dumps(grant_agent_access) if grant_agent_access else None
1326
+ }
1327
+
1328
+ # Make authenticated request to upload API (same as openlicense)
1329
+ async with httpx.AsyncClient(timeout=60.0) as client:
1330
+ # Prepare headers with proper API key authentication
1331
+ headers = {'User-Agent': 'LiteLLM-Skill/1.0'}
1332
+
1333
+ if agent_api_key:
1334
+ # Use Bearer token format as expected by the content API
1335
+ headers['Authorization'] = f'Bearer {agent_api_key}'
1336
+ self.logger.debug("Added Authorization header with Bearer token")
1337
+ else:
1338
+ self.logger.warning("No API key available for authentication - upload may fail")
1339
+
1340
+ response = await client.post(
1341
+ upload_url,
1342
+ files=files,
1343
+ data=data,
1344
+ headers=headers
1345
+ )
1346
+
1347
+ if response.status_code in [200, 201]:
1348
+ result = response.json()
1349
+ content_url = result.get('url')
1350
+ if content_url:
1351
+ # Replace portal URL with chat URL for public access (same as openlicense skill)
1352
+ chat_base = (os.getenv('ROBUTLER_CHAT_URL') or 'http://localhost:3001').rstrip('/')
1353
+ portal_base = (os.getenv('ROBUTLER_INTERNAL_API_URL') or 'http://localhost:3000').rstrip('/')
1354
+
1355
+ if content_url.startswith(portal_base):
1356
+ # Replace portal base with chat base for public URL
1357
+ public_url = content_url.replace(portal_base, chat_base, 1)
1358
+ self.logger.info(f"Successfully uploaded image to content API: {public_url}")
1359
+
1360
+ return public_url
1361
+ else:
1362
+ self.logger.info(f"Successfully uploaded image to content API: {content_url}")
1363
+ return content_url
1364
+ else:
1365
+ self.logger.error(f"Upload successful but no URL returned: {result}")
1366
+ return image_base64_url
1367
+ else:
1368
+ self.logger.error(f"Failed to upload image to content API: {response.status_code} - {response.text}")
1369
+ return image_base64_url
1370
+
1371
+ except Exception as e:
1372
+ self.logger.error(f"Error uploading image to content API: {e}")
1373
+ return image_base64_url
1374
+
1375
+ def _log_image_upload_pricing(self) -> None:
1376
+ """
1377
+ FIXME: this is a temporary hack to log image upload pricing to context for PaymentSkill to process.
1378
+ This should be removed in V0.3.0
1379
+ Log image upload pricing to context for PaymentSkill to process.
1380
+ Charges 5 cents per image upload.
1381
+ """
1382
+ try:
1383
+ from webagents.server.context.context_vars import get_context
1384
+ context = get_context()
1385
+
1386
+ if context:
1387
+ # Initialize usage list if not present
1388
+ if not hasattr(context, 'usage') or context.usage is None:
1389
+ context.usage = []
1390
+
1391
+ # Add tool usage record in the format PaymentSkill expects
1392
+ usage_record = {
1393
+ 'type': 'tool',
1394
+ 'tool_name': 'image_upload',
1395
+ 'pricing': {
1396
+ 'credits': 0.05,
1397
+ 'reason': 'AI image upload to content API',
1398
+ 'metadata': {
1399
+ 'service': 'image_upload',
1400
+ 'model': 'content_api'
1401
+ }
1402
+ }
1403
+ }
1404
+
1405
+ context.usage.append(usage_record)
1406
+ self.logger.info(f"💰 Logged image upload pricing: 0.05 credits - AI image upload to content API")
1407
+ else:
1408
+ self.logger.warning("No context available to log image upload pricing")
1409
+
1410
+ except Exception as e:
1411
+ self.logger.error(f"Error logging image upload pricing: {e}")
1412
+
534
1413
  async def generate_embedding(self, text: str, model: Optional[str] = None) -> List[float]:
535
1414
  """Generate embeddings (placeholder for V2.1)"""
536
1415
  # This would use LiteLLM's embedding support in V2.1