superqode 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- superqode/__init__.py +33 -0
- superqode/acp/__init__.py +23 -0
- superqode/acp/client.py +913 -0
- superqode/acp/permission_screen.py +457 -0
- superqode/acp/types.py +480 -0
- superqode/acp_discovery.py +856 -0
- superqode/agent/__init__.py +22 -0
- superqode/agent/edit_strategies.py +334 -0
- superqode/agent/loop.py +892 -0
- superqode/agent/qe_report_templates.py +39 -0
- superqode/agent/system_prompts.py +353 -0
- superqode/agent_output.py +721 -0
- superqode/agent_stream.py +953 -0
- superqode/agents/__init__.py +59 -0
- superqode/agents/acp_registry.py +305 -0
- superqode/agents/client.py +249 -0
- superqode/agents/data/augmentcode.com.toml +51 -0
- superqode/agents/data/cagent.dev.toml +51 -0
- superqode/agents/data/claude.com.toml +60 -0
- superqode/agents/data/codeassistant.dev.toml +51 -0
- superqode/agents/data/codex.openai.com.toml +57 -0
- superqode/agents/data/fastagent.ai.toml +66 -0
- superqode/agents/data/geminicli.com.toml +77 -0
- superqode/agents/data/goose.block.xyz.toml +54 -0
- superqode/agents/data/junie.jetbrains.com.toml +56 -0
- superqode/agents/data/kimi.moonshot.cn.toml +57 -0
- superqode/agents/data/llmlingagent.dev.toml +51 -0
- superqode/agents/data/molt.bot.toml +49 -0
- superqode/agents/data/opencode.ai.toml +60 -0
- superqode/agents/data/stakpak.dev.toml +51 -0
- superqode/agents/data/vtcode.dev.toml +51 -0
- superqode/agents/discovery.py +266 -0
- superqode/agents/messaging.py +160 -0
- superqode/agents/persona.py +166 -0
- superqode/agents/registry.py +421 -0
- superqode/agents/schema.py +72 -0
- superqode/agents/unified.py +367 -0
- superqode/app/__init__.py +111 -0
- superqode/app/constants.py +314 -0
- superqode/app/css.py +366 -0
- superqode/app/models.py +118 -0
- superqode/app/suggester.py +125 -0
- superqode/app/widgets.py +1591 -0
- superqode/app_enhanced.py +399 -0
- superqode/app_main.py +17187 -0
- superqode/approval.py +312 -0
- superqode/atomic.py +296 -0
- superqode/commands/__init__.py +1 -0
- superqode/commands/acp.py +965 -0
- superqode/commands/agents.py +180 -0
- superqode/commands/auth.py +278 -0
- superqode/commands/config.py +374 -0
- superqode/commands/init.py +826 -0
- superqode/commands/providers.py +819 -0
- superqode/commands/qe.py +1145 -0
- superqode/commands/roles.py +380 -0
- superqode/commands/serve.py +172 -0
- superqode/commands/suggestions.py +127 -0
- superqode/commands/superqe.py +460 -0
- superqode/config/__init__.py +51 -0
- superqode/config/loader.py +812 -0
- superqode/config/schema.py +498 -0
- superqode/core/__init__.py +111 -0
- superqode/core/roles.py +281 -0
- superqode/danger.py +386 -0
- superqode/data/superqode-template.yaml +1522 -0
- superqode/design_system.py +1080 -0
- superqode/dialogs/__init__.py +6 -0
- superqode/dialogs/base.py +39 -0
- superqode/dialogs/model.py +130 -0
- superqode/dialogs/provider.py +870 -0
- superqode/diff_view.py +919 -0
- superqode/enterprise.py +21 -0
- superqode/evaluation/__init__.py +25 -0
- superqode/evaluation/adapters.py +93 -0
- superqode/evaluation/behaviors.py +89 -0
- superqode/evaluation/engine.py +209 -0
- superqode/evaluation/scenarios.py +96 -0
- superqode/execution/__init__.py +36 -0
- superqode/execution/linter.py +538 -0
- superqode/execution/modes.py +347 -0
- superqode/execution/resolver.py +283 -0
- superqode/execution/runner.py +642 -0
- superqode/file_explorer.py +811 -0
- superqode/file_viewer.py +471 -0
- superqode/flash.py +183 -0
- superqode/guidance/__init__.py +58 -0
- superqode/guidance/config.py +203 -0
- superqode/guidance/prompts.py +71 -0
- superqode/harness/__init__.py +54 -0
- superqode/harness/accelerator.py +291 -0
- superqode/harness/config.py +319 -0
- superqode/harness/validator.py +147 -0
- superqode/history.py +279 -0
- superqode/integrations/superopt_runner.py +124 -0
- superqode/logging/__init__.py +49 -0
- superqode/logging/adapters.py +219 -0
- superqode/logging/formatter.py +923 -0
- superqode/logging/integration.py +341 -0
- superqode/logging/sinks.py +170 -0
- superqode/logging/unified_log.py +417 -0
- superqode/lsp/__init__.py +26 -0
- superqode/lsp/client.py +544 -0
- superqode/main.py +1069 -0
- superqode/mcp/__init__.py +89 -0
- superqode/mcp/auth_storage.py +380 -0
- superqode/mcp/client.py +1236 -0
- superqode/mcp/config.py +319 -0
- superqode/mcp/integration.py +337 -0
- superqode/mcp/oauth.py +436 -0
- superqode/mcp/oauth_callback.py +385 -0
- superqode/mcp/types.py +290 -0
- superqode/memory/__init__.py +31 -0
- superqode/memory/feedback.py +342 -0
- superqode/memory/store.py +522 -0
- superqode/notifications.py +369 -0
- superqode/optimization/__init__.py +5 -0
- superqode/optimization/config.py +33 -0
- superqode/permissions/__init__.py +25 -0
- superqode/permissions/rules.py +488 -0
- superqode/plan.py +323 -0
- superqode/providers/__init__.py +33 -0
- superqode/providers/gateway/__init__.py +165 -0
- superqode/providers/gateway/base.py +228 -0
- superqode/providers/gateway/litellm_gateway.py +1170 -0
- superqode/providers/gateway/openresponses_gateway.py +436 -0
- superqode/providers/health.py +297 -0
- superqode/providers/huggingface/__init__.py +74 -0
- superqode/providers/huggingface/downloader.py +472 -0
- superqode/providers/huggingface/endpoints.py +442 -0
- superqode/providers/huggingface/hub.py +531 -0
- superqode/providers/huggingface/inference.py +394 -0
- superqode/providers/huggingface/transformers_runner.py +516 -0
- superqode/providers/local/__init__.py +100 -0
- superqode/providers/local/base.py +438 -0
- superqode/providers/local/discovery.py +418 -0
- superqode/providers/local/lmstudio.py +256 -0
- superqode/providers/local/mlx.py +457 -0
- superqode/providers/local/ollama.py +486 -0
- superqode/providers/local/sglang.py +268 -0
- superqode/providers/local/tgi.py +260 -0
- superqode/providers/local/tool_support.py +477 -0
- superqode/providers/local/vllm.py +258 -0
- superqode/providers/manager.py +1338 -0
- superqode/providers/models.py +1016 -0
- superqode/providers/models_dev.py +578 -0
- superqode/providers/openresponses/__init__.py +87 -0
- superqode/providers/openresponses/converters/__init__.py +17 -0
- superqode/providers/openresponses/converters/messages.py +343 -0
- superqode/providers/openresponses/converters/tools.py +268 -0
- superqode/providers/openresponses/schema/__init__.py +56 -0
- superqode/providers/openresponses/schema/models.py +585 -0
- superqode/providers/openresponses/streaming/__init__.py +5 -0
- superqode/providers/openresponses/streaming/parser.py +338 -0
- superqode/providers/openresponses/tools/__init__.py +21 -0
- superqode/providers/openresponses/tools/apply_patch.py +352 -0
- superqode/providers/openresponses/tools/code_interpreter.py +290 -0
- superqode/providers/openresponses/tools/file_search.py +333 -0
- superqode/providers/openresponses/tools/mcp_adapter.py +252 -0
- superqode/providers/registry.py +716 -0
- superqode/providers/usage.py +332 -0
- superqode/pure_mode.py +384 -0
- superqode/qr/__init__.py +23 -0
- superqode/qr/dashboard.py +781 -0
- superqode/qr/generator.py +1018 -0
- superqode/qr/templates.py +135 -0
- superqode/safety/__init__.py +41 -0
- superqode/safety/sandbox.py +413 -0
- superqode/safety/warnings.py +256 -0
- superqode/server/__init__.py +33 -0
- superqode/server/lsp_server.py +775 -0
- superqode/server/web.py +250 -0
- superqode/session/__init__.py +25 -0
- superqode/session/persistence.py +580 -0
- superqode/session/sharing.py +477 -0
- superqode/session.py +475 -0
- superqode/sidebar.py +2991 -0
- superqode/stream_view.py +648 -0
- superqode/styles/__init__.py +3 -0
- superqode/superqe/__init__.py +184 -0
- superqode/superqe/acp_runner.py +1064 -0
- superqode/superqe/constitution/__init__.py +62 -0
- superqode/superqe/constitution/evaluator.py +308 -0
- superqode/superqe/constitution/loader.py +432 -0
- superqode/superqe/constitution/schema.py +250 -0
- superqode/superqe/events.py +591 -0
- superqode/superqe/frameworks/__init__.py +65 -0
- superqode/superqe/frameworks/base.py +234 -0
- superqode/superqe/frameworks/e2e.py +263 -0
- superqode/superqe/frameworks/executor.py +237 -0
- superqode/superqe/frameworks/javascript.py +409 -0
- superqode/superqe/frameworks/python.py +373 -0
- superqode/superqe/frameworks/registry.py +92 -0
- superqode/superqe/mcp_tools/__init__.py +47 -0
- superqode/superqe/mcp_tools/core_tools.py +418 -0
- superqode/superqe/mcp_tools/registry.py +230 -0
- superqode/superqe/mcp_tools/testing_tools.py +167 -0
- superqode/superqe/noise.py +89 -0
- superqode/superqe/orchestrator.py +778 -0
- superqode/superqe/roles.py +609 -0
- superqode/superqe/session.py +713 -0
- superqode/superqe/skills/__init__.py +57 -0
- superqode/superqe/skills/base.py +106 -0
- superqode/superqe/skills/core_skills.py +899 -0
- superqode/superqe/skills/registry.py +90 -0
- superqode/superqe/verifier.py +101 -0
- superqode/superqe_cli.py +76 -0
- superqode/tool_call.py +358 -0
- superqode/tools/__init__.py +93 -0
- superqode/tools/agent_tools.py +496 -0
- superqode/tools/base.py +324 -0
- superqode/tools/batch_tool.py +133 -0
- superqode/tools/diagnostics.py +311 -0
- superqode/tools/edit_tools.py +653 -0
- superqode/tools/enhanced_base.py +515 -0
- superqode/tools/file_tools.py +269 -0
- superqode/tools/file_tracking.py +45 -0
- superqode/tools/lsp_tools.py +610 -0
- superqode/tools/network_tools.py +350 -0
- superqode/tools/permissions.py +400 -0
- superqode/tools/question_tool.py +324 -0
- superqode/tools/search_tools.py +598 -0
- superqode/tools/shell_tools.py +259 -0
- superqode/tools/todo_tools.py +121 -0
- superqode/tools/validation.py +80 -0
- superqode/tools/web_tools.py +639 -0
- superqode/tui.py +1152 -0
- superqode/tui_integration.py +875 -0
- superqode/tui_widgets/__init__.py +27 -0
- superqode/tui_widgets/widgets/__init__.py +18 -0
- superqode/tui_widgets/widgets/progress.py +185 -0
- superqode/tui_widgets/widgets/tool_display.py +188 -0
- superqode/undo_manager.py +574 -0
- superqode/utils/__init__.py +5 -0
- superqode/utils/error_handling.py +323 -0
- superqode/utils/fuzzy.py +257 -0
- superqode/widgets/__init__.py +477 -0
- superqode/widgets/agent_collab.py +390 -0
- superqode/widgets/agent_store.py +936 -0
- superqode/widgets/agent_switcher.py +395 -0
- superqode/widgets/animation_manager.py +284 -0
- superqode/widgets/code_context.py +356 -0
- superqode/widgets/command_palette.py +412 -0
- superqode/widgets/connection_status.py +537 -0
- superqode/widgets/conversation_history.py +470 -0
- superqode/widgets/diff_indicator.py +155 -0
- superqode/widgets/enhanced_status_bar.py +385 -0
- superqode/widgets/enhanced_toast.py +476 -0
- superqode/widgets/file_browser.py +809 -0
- superqode/widgets/file_reference.py +585 -0
- superqode/widgets/issue_timeline.py +340 -0
- superqode/widgets/leader_key.py +264 -0
- superqode/widgets/mode_switcher.py +445 -0
- superqode/widgets/model_picker.py +234 -0
- superqode/widgets/permission_preview.py +1205 -0
- superqode/widgets/prompt.py +358 -0
- superqode/widgets/provider_connect.py +725 -0
- superqode/widgets/pty_shell.py +587 -0
- superqode/widgets/qe_dashboard.py +321 -0
- superqode/widgets/resizable_sidebar.py +377 -0
- superqode/widgets/response_changes.py +218 -0
- superqode/widgets/response_display.py +528 -0
- superqode/widgets/rich_tool_display.py +613 -0
- superqode/widgets/sidebar_panels.py +1180 -0
- superqode/widgets/slash_complete.py +356 -0
- superqode/widgets/split_view.py +612 -0
- superqode/widgets/status_bar.py +273 -0
- superqode/widgets/superqode_display.py +786 -0
- superqode/widgets/thinking_display.py +815 -0
- superqode/widgets/throbber.py +87 -0
- superqode/widgets/toast.py +206 -0
- superqode/widgets/unified_output.py +1073 -0
- superqode/workspace/__init__.py +75 -0
- superqode/workspace/artifacts.py +472 -0
- superqode/workspace/coordinator.py +353 -0
- superqode/workspace/diff_tracker.py +429 -0
- superqode/workspace/git_guard.py +373 -0
- superqode/workspace/git_snapshot.py +526 -0
- superqode/workspace/manager.py +750 -0
- superqode/workspace/snapshot.py +357 -0
- superqode/workspace/watcher.py +535 -0
- superqode/workspace/worktree.py +440 -0
- superqode-0.1.5.dist-info/METADATA +204 -0
- superqode-0.1.5.dist-info/RECORD +288 -0
- superqode-0.1.5.dist-info/WHEEL +5 -0
- superqode-0.1.5.dist-info/entry_points.txt +3 -0
- superqode-0.1.5.dist-info/licenses/LICENSE +648 -0
- superqode-0.1.5.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1338 @@
|
|
|
1
|
+
"""Provider manager for discovering and managing LLM providers and models."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import pathlib
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import List, Optional, Dict, Any
|
|
7
|
+
from enum import Enum
|
|
8
|
+
|
|
9
|
+
# litellm is imported lazily to avoid import errors when CWD doesn't exist
|
|
10
|
+
# (litellm tries to resolve current directory during import via pydantic plugins)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _safe_import_litellm():
|
|
14
|
+
"""Safely import litellm, handling cases where CWD doesn't exist."""
|
|
15
|
+
try:
|
|
16
|
+
import litellm
|
|
17
|
+
|
|
18
|
+
return litellm
|
|
19
|
+
except (FileNotFoundError, OSError) as e:
|
|
20
|
+
# Handle case where current directory doesn't exist during import
|
|
21
|
+
# This can happen if CWD was deleted or is invalid
|
|
22
|
+
try:
|
|
23
|
+
# Try to change to a safe directory if current one doesn't exist
|
|
24
|
+
cwd = os.getcwd()
|
|
25
|
+
if not pathlib.Path(cwd).exists():
|
|
26
|
+
# Use home directory as fallback
|
|
27
|
+
os.chdir(os.path.expanduser("~"))
|
|
28
|
+
# Try importing again
|
|
29
|
+
import litellm
|
|
30
|
+
|
|
31
|
+
return litellm
|
|
32
|
+
except Exception:
|
|
33
|
+
# If we still can't import, raise a more helpful error
|
|
34
|
+
raise ImportError(
|
|
35
|
+
f"Failed to import litellm. This may be due to an invalid current working directory. "
|
|
36
|
+
f"Please ensure you're in a valid directory. Original error: {str(e)}"
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class ModelInfo:
|
|
42
|
+
"""Information about an LLM model."""
|
|
43
|
+
|
|
44
|
+
id: str
|
|
45
|
+
name: str
|
|
46
|
+
provider_id: str
|
|
47
|
+
description: Optional[str] = None
|
|
48
|
+
context_size: Optional[int] = None
|
|
49
|
+
available: bool = True
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass
|
|
53
|
+
class ProviderInfo:
|
|
54
|
+
"""Information about an LLM provider."""
|
|
55
|
+
|
|
56
|
+
id: str
|
|
57
|
+
name: str
|
|
58
|
+
description: str
|
|
59
|
+
requires_api_key: bool = True
|
|
60
|
+
configured: bool = False
|
|
61
|
+
models: List[ModelInfo] = None
|
|
62
|
+
|
|
63
|
+
def __post_init__(self):
|
|
64
|
+
if self.models is None:
|
|
65
|
+
self.models = []
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class ProviderManager:
|
|
69
|
+
"""Lightweight LLM provider manager using LiteLLM."""
|
|
70
|
+
|
|
71
|
+
# Provider priority for sorting (lower number = higher priority)
|
|
72
|
+
PROVIDER_PRIORITY = {
|
|
73
|
+
"ollama": 1,
|
|
74
|
+
"vllm": 2,
|
|
75
|
+
"sglang": 2,
|
|
76
|
+
"openai": 3,
|
|
77
|
+
"anthropic": 4,
|
|
78
|
+
"google": 5,
|
|
79
|
+
"xai": 6,
|
|
80
|
+
"groq": 7,
|
|
81
|
+
"openrouter": 8,
|
|
82
|
+
"qwen": 9,
|
|
83
|
+
"deepseek": 10,
|
|
84
|
+
"together": 11,
|
|
85
|
+
"deepinfra": 12,
|
|
86
|
+
"github-copilot": 13,
|
|
87
|
+
"perplexity": 14,
|
|
88
|
+
"mistral": 15,
|
|
89
|
+
"cerebras": 16,
|
|
90
|
+
"zhipu": 17,
|
|
91
|
+
"moonshot": 18,
|
|
92
|
+
"minimax": 19,
|
|
93
|
+
"baidu": 20,
|
|
94
|
+
"tencent": 21,
|
|
95
|
+
"doubao": 22,
|
|
96
|
+
"01-ai": 23,
|
|
97
|
+
"azure-openai": 24,
|
|
98
|
+
"vertex-ai": 25,
|
|
99
|
+
"openai-compatible": 26,
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
def __init__(self):
|
|
103
|
+
"""Initialize the provider manager."""
|
|
104
|
+
self._configured_providers: Dict[str, Dict[str, Any]] = {}
|
|
105
|
+
|
|
106
|
+
# Set up LiteLLM API keys from environment
|
|
107
|
+
self._setup_litellm_keys()
|
|
108
|
+
|
|
109
|
+
def _setup_litellm_keys(self):
|
|
110
|
+
"""Set up LiteLLM API keys from environment variables."""
|
|
111
|
+
# OpenAI
|
|
112
|
+
if os.getenv("OPENAI_API_KEY"):
|
|
113
|
+
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
|
|
114
|
+
|
|
115
|
+
# Anthropic
|
|
116
|
+
if os.getenv("ANTHROPIC_API_KEY"):
|
|
117
|
+
os.environ["ANTHROPIC_API_KEY"] = os.getenv("ANTHROPIC_API_KEY")
|
|
118
|
+
|
|
119
|
+
# Google - supports both GOOGLE_API_KEY and GEMINI_API_KEY
|
|
120
|
+
google_key = os.getenv("GOOGLE_API_KEY") or os.getenv("GEMINI_API_KEY")
|
|
121
|
+
if google_key:
|
|
122
|
+
os.environ["GOOGLE_API_KEY"] = google_key
|
|
123
|
+
# Also set GEMINI_API_KEY if it's not already set (for compatibility)
|
|
124
|
+
if not os.getenv("GEMINI_API_KEY"):
|
|
125
|
+
os.environ["GEMINI_API_KEY"] = google_key
|
|
126
|
+
|
|
127
|
+
# xAI
|
|
128
|
+
if os.getenv("XAI_API_KEY"):
|
|
129
|
+
os.environ["XAI_API_KEY"] = os.getenv("XAI_API_KEY")
|
|
130
|
+
|
|
131
|
+
# Other providers
|
|
132
|
+
if os.getenv("GROQ_API_KEY"):
|
|
133
|
+
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
|
|
134
|
+
|
|
135
|
+
if os.getenv("OPENROUTER_API_KEY"):
|
|
136
|
+
os.environ["OPENROUTER_API_KEY"] = os.getenv("OPENROUTER_API_KEY")
|
|
137
|
+
|
|
138
|
+
if os.getenv("DASHSCOPE_API_KEY"):
|
|
139
|
+
os.environ["DASHSCOPE_API_KEY"] = os.getenv("DASHSCOPE_API_KEY")
|
|
140
|
+
|
|
141
|
+
if os.getenv("DEEPSEEK_API_KEY"):
|
|
142
|
+
os.environ["DEEPSEEK_API_KEY"] = os.getenv("DEEPSEEK_API_KEY")
|
|
143
|
+
|
|
144
|
+
if os.getenv("GITHUB_TOKEN"):
|
|
145
|
+
os.environ["GITHUB_TOKEN"] = os.getenv("GITHUB_TOKEN")
|
|
146
|
+
|
|
147
|
+
def _is_provider_configured(self, provider_id: str) -> bool:
|
|
148
|
+
"""Check if a provider has API keys configured."""
|
|
149
|
+
if provider_id in ("ollama", "mlx", "vllm", "sglang"):
|
|
150
|
+
# Local providers don't need API keys
|
|
151
|
+
return True
|
|
152
|
+
|
|
153
|
+
key_mapping = {
|
|
154
|
+
"openai": "OPENAI_API_KEY",
|
|
155
|
+
"anthropic": "ANTHROPIC_API_KEY",
|
|
156
|
+
"google": ["GOOGLE_API_KEY", "GEMINI_API_KEY"], # Google supports both
|
|
157
|
+
"xai": "XAI_API_KEY",
|
|
158
|
+
"groq": "GROQ_API_KEY",
|
|
159
|
+
"openrouter": "OPENROUTER_API_KEY",
|
|
160
|
+
"qwen": "DASHSCOPE_API_KEY",
|
|
161
|
+
"deepseek": "DEEPSEEK_API_KEY",
|
|
162
|
+
"github-copilot": "GITHUB_TOKEN",
|
|
163
|
+
"together": "TOGETHER_API_KEY",
|
|
164
|
+
"deepinfra": "DEEPINFRA_API_KEY",
|
|
165
|
+
"perplexity": "PERPLEXITY_API_KEY",
|
|
166
|
+
"mistral": "MISTRAL_API_KEY",
|
|
167
|
+
"cerebras": "CEREBRAS_API_KEY",
|
|
168
|
+
"zhipu": "ZHIPU_API_KEY",
|
|
169
|
+
"moonshot": "MOONSHOT_API_KEY",
|
|
170
|
+
"minimax": "MINIMAX_API_KEY",
|
|
171
|
+
"baidu": "BAIDU_API_KEY",
|
|
172
|
+
"tencent": "TENCENT_API_KEY",
|
|
173
|
+
"doubao": "DOUBAO_API_KEY",
|
|
174
|
+
"01-ai": "ZEROONE_API_KEY",
|
|
175
|
+
"azure-openai": "AZURE_OPENAI_API_KEY",
|
|
176
|
+
"vertex-ai": "GOOGLE_APPLICATION_CREDENTIALS",
|
|
177
|
+
"openai-compatible": "OPENAI_COMPATIBLE_API_KEY",
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
env_vars = key_mapping.get(provider_id)
|
|
181
|
+
if not env_vars:
|
|
182
|
+
return False
|
|
183
|
+
|
|
184
|
+
# Handle both single string and list of env vars (for Google)
|
|
185
|
+
if isinstance(env_vars, list):
|
|
186
|
+
# Check if any of the environment variables exist and have valid values
|
|
187
|
+
for env_var in env_vars:
|
|
188
|
+
api_key = os.getenv(env_var)
|
|
189
|
+
if api_key and api_key.strip():
|
|
190
|
+
return True
|
|
191
|
+
return False
|
|
192
|
+
else:
|
|
193
|
+
# Single environment variable
|
|
194
|
+
api_key = os.getenv(env_vars)
|
|
195
|
+
return bool(api_key and api_key.strip())
|
|
196
|
+
|
|
197
|
+
def _check_api_key(self, key_name: str) -> bool:
|
|
198
|
+
"""Check if an API key is available."""
|
|
199
|
+
return bool(os.getenv(key_name))
|
|
200
|
+
|
|
201
|
+
def _get_ollama_models(self) -> List[ModelInfo]:
|
|
202
|
+
"""Get available models from Ollama daemon."""
|
|
203
|
+
try:
|
|
204
|
+
import requests
|
|
205
|
+
|
|
206
|
+
# Try to connect to Ollama API
|
|
207
|
+
ollama_host = os.getenv("OLLAMA_HOST", "http://localhost:11434")
|
|
208
|
+
response = requests.get(f"{ollama_host}/api/tags", timeout=5)
|
|
209
|
+
|
|
210
|
+
if response.status_code == 200:
|
|
211
|
+
data = response.json()
|
|
212
|
+
models = []
|
|
213
|
+
|
|
214
|
+
for model_data in data.get("models", []):
|
|
215
|
+
name = model_data.get("name", "")
|
|
216
|
+
size = model_data.get("size", 0)
|
|
217
|
+
modified = model_data.get("modified_at", "")
|
|
218
|
+
|
|
219
|
+
# Estimate context size based on model size (rough heuristic)
|
|
220
|
+
if "3.1" in name or "llama3.1" in name:
|
|
221
|
+
if "405b" in name:
|
|
222
|
+
context_size = 131072
|
|
223
|
+
elif "70b" in name:
|
|
224
|
+
context_size = 131072
|
|
225
|
+
else:
|
|
226
|
+
context_size = 131072
|
|
227
|
+
elif "3.2" in name or "llama3.2" in name:
|
|
228
|
+
context_size = 32768
|
|
229
|
+
elif "codellama" in name or "code" in name:
|
|
230
|
+
context_size = 16384
|
|
231
|
+
elif "mistral" in name:
|
|
232
|
+
context_size = 32768
|
|
233
|
+
elif "mixtral" in name:
|
|
234
|
+
context_size = 32768
|
|
235
|
+
elif "phi3" in name:
|
|
236
|
+
context_size = 128000
|
|
237
|
+
elif "gemma" in name:
|
|
238
|
+
context_size = 8192
|
|
239
|
+
elif "qwen" in name:
|
|
240
|
+
context_size = 32768
|
|
241
|
+
else:
|
|
242
|
+
context_size = 4096 # Default
|
|
243
|
+
|
|
244
|
+
size_str = self._format_size(size)
|
|
245
|
+
models.append(
|
|
246
|
+
ModelInfo(
|
|
247
|
+
id=name,
|
|
248
|
+
name=f"{name} ({size_str})",
|
|
249
|
+
provider_id="ollama",
|
|
250
|
+
context_size=context_size,
|
|
251
|
+
)
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
return models if models else self._get_default_ollama_models()
|
|
255
|
+
else:
|
|
256
|
+
# Fallback to default models if Ollama is not running
|
|
257
|
+
return self._get_default_ollama_models()
|
|
258
|
+
|
|
259
|
+
except Exception as e:
|
|
260
|
+
# Fallback to default models if there's any error
|
|
261
|
+
return self._get_default_ollama_models()
|
|
262
|
+
|
|
263
|
+
def _get_default_ollama_models(self) -> List[ModelInfo]:
|
|
264
|
+
"""Get default Ollama models when API is not available."""
|
|
265
|
+
return [
|
|
266
|
+
ModelInfo("llama3.2:3b", "Llama 3.2 3B (default)", "ollama", context_size=32768),
|
|
267
|
+
ModelInfo("llama3.1:8b", "Llama 3.1 8B (default)", "ollama", context_size=131072),
|
|
268
|
+
ModelInfo("codellama:7b", "Code Llama 7B (default)", "ollama", context_size=16384),
|
|
269
|
+
]
|
|
270
|
+
|
|
271
|
+
def _get_mlx_models(self) -> List[ModelInfo]:
|
|
272
|
+
"""Get available MLX models from server and cache."""
|
|
273
|
+
models = []
|
|
274
|
+
|
|
275
|
+
# Try to get models from running MLX server
|
|
276
|
+
try:
|
|
277
|
+
import asyncio
|
|
278
|
+
from ..providers.local.mlx import get_mlx_client
|
|
279
|
+
|
|
280
|
+
async def get_mlx_server_models():
|
|
281
|
+
client = await get_mlx_client()
|
|
282
|
+
if client:
|
|
283
|
+
server_models = await client.list_models()
|
|
284
|
+
return [
|
|
285
|
+
ModelInfo(
|
|
286
|
+
id=model.id,
|
|
287
|
+
name=model.name,
|
|
288
|
+
provider_id="mlx",
|
|
289
|
+
description=f"{model.family} - {model.parameter_count} params",
|
|
290
|
+
context_size=model.context_window or 4096,
|
|
291
|
+
)
|
|
292
|
+
for model in server_models
|
|
293
|
+
]
|
|
294
|
+
return []
|
|
295
|
+
|
|
296
|
+
# Run in sync context
|
|
297
|
+
server_models = asyncio.run(get_mlx_server_models())
|
|
298
|
+
models.extend(server_models)
|
|
299
|
+
except Exception:
|
|
300
|
+
# If MLX client fails, continue with cached models
|
|
301
|
+
pass
|
|
302
|
+
|
|
303
|
+
# Add cached models if no server models found
|
|
304
|
+
if not models:
|
|
305
|
+
try:
|
|
306
|
+
from ..providers.local.mlx import MLXClient
|
|
307
|
+
|
|
308
|
+
cache_models = MLXClient.discover_huggingface_models()
|
|
309
|
+
for model_info in cache_models[:5]: # Limit to 5 cached models
|
|
310
|
+
model_id = model_info["id"]
|
|
311
|
+
size_mb = model_info["size_bytes"] / (1024 * 1024)
|
|
312
|
+
models.append(
|
|
313
|
+
ModelInfo(
|
|
314
|
+
id=model_id,
|
|
315
|
+
name=f"{model_id.split('/')[-1]} (cached)",
|
|
316
|
+
provider_id="mlx",
|
|
317
|
+
description=".1f",
|
|
318
|
+
context_size=4096,
|
|
319
|
+
)
|
|
320
|
+
)
|
|
321
|
+
except Exception:
|
|
322
|
+
pass
|
|
323
|
+
|
|
324
|
+
# Fallback to registry models if nothing found
|
|
325
|
+
if not models:
|
|
326
|
+
from ..providers.registry import PROVIDERS
|
|
327
|
+
|
|
328
|
+
mlx_provider = PROVIDERS.get("mlx")
|
|
329
|
+
if mlx_provider and mlx_provider.example_models:
|
|
330
|
+
for model_id in mlx_provider.example_models[:3]:
|
|
331
|
+
models.append(
|
|
332
|
+
ModelInfo(
|
|
333
|
+
id=model_id,
|
|
334
|
+
name=model_id.split("/")[-1],
|
|
335
|
+
provider_id="mlx",
|
|
336
|
+
description="Example MLX model",
|
|
337
|
+
context_size=4096,
|
|
338
|
+
)
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
return models
|
|
342
|
+
|
|
343
|
+
def _get_vllm_models(self) -> List[ModelInfo]:
|
|
344
|
+
"""Get available vLLM models from server."""
|
|
345
|
+
models = []
|
|
346
|
+
|
|
347
|
+
# Try to get models from running vLLM server
|
|
348
|
+
try:
|
|
349
|
+
import asyncio
|
|
350
|
+
from ..providers.local.vllm import get_vllm_client
|
|
351
|
+
|
|
352
|
+
async def get_vllm_server_models():
|
|
353
|
+
client = await get_vllm_client()
|
|
354
|
+
if client:
|
|
355
|
+
server_models = await client.list_models()
|
|
356
|
+
return [
|
|
357
|
+
ModelInfo(
|
|
358
|
+
id=model.id,
|
|
359
|
+
name=model.name,
|
|
360
|
+
provider_id="vllm",
|
|
361
|
+
description=f"{model.family} - {model.parameter_count} params"
|
|
362
|
+
if model.parameter_count
|
|
363
|
+
else model.family,
|
|
364
|
+
context_size=model.context_window or 4096,
|
|
365
|
+
)
|
|
366
|
+
for model in server_models
|
|
367
|
+
]
|
|
368
|
+
return []
|
|
369
|
+
|
|
370
|
+
# Run in sync context
|
|
371
|
+
server_models = asyncio.run(get_vllm_server_models())
|
|
372
|
+
models.extend(server_models)
|
|
373
|
+
except Exception:
|
|
374
|
+
# If vLLM client fails, continue with default models
|
|
375
|
+
pass
|
|
376
|
+
|
|
377
|
+
# Fallback to registry models if nothing found
|
|
378
|
+
if not models:
|
|
379
|
+
from ..providers.registry import PROVIDERS
|
|
380
|
+
|
|
381
|
+
vllm_provider = PROVIDERS.get("vllm")
|
|
382
|
+
if vllm_provider and vllm_provider.example_models:
|
|
383
|
+
for model_id in vllm_provider.example_models[:3]:
|
|
384
|
+
models.append(
|
|
385
|
+
ModelInfo(
|
|
386
|
+
id=model_id,
|
|
387
|
+
name=model_id.split("/")[-1],
|
|
388
|
+
provider_id="vllm",
|
|
389
|
+
description="Example vLLM model (server not running)",
|
|
390
|
+
context_size=131072,
|
|
391
|
+
)
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
return models
|
|
395
|
+
|
|
396
|
+
def _get_sglang_models(self) -> List[ModelInfo]:
|
|
397
|
+
"""Get available SGLang models from server."""
|
|
398
|
+
models = []
|
|
399
|
+
|
|
400
|
+
# Try to get models from running SGLang server
|
|
401
|
+
try:
|
|
402
|
+
import asyncio
|
|
403
|
+
from ..providers.local.sglang import get_sglang_client
|
|
404
|
+
|
|
405
|
+
async def get_sglang_server_models():
|
|
406
|
+
client = await get_sglang_client()
|
|
407
|
+
if client:
|
|
408
|
+
server_models = await client.list_models()
|
|
409
|
+
return [
|
|
410
|
+
ModelInfo(
|
|
411
|
+
id=model.id,
|
|
412
|
+
name=model.name,
|
|
413
|
+
provider_id="sglang",
|
|
414
|
+
description=f"{model.family} - {model.parameter_count} params"
|
|
415
|
+
if model.parameter_count
|
|
416
|
+
else model.family,
|
|
417
|
+
context_size=model.context_window or 4096,
|
|
418
|
+
)
|
|
419
|
+
for model in server_models
|
|
420
|
+
]
|
|
421
|
+
return []
|
|
422
|
+
|
|
423
|
+
# Run in sync context
|
|
424
|
+
server_models = asyncio.run(get_sglang_server_models())
|
|
425
|
+
models.extend(server_models)
|
|
426
|
+
except Exception:
|
|
427
|
+
# If SGLang client fails, continue with default models
|
|
428
|
+
pass
|
|
429
|
+
|
|
430
|
+
# Fallback to registry models if nothing found
|
|
431
|
+
if not models:
|
|
432
|
+
from ..providers.registry import PROVIDERS
|
|
433
|
+
|
|
434
|
+
sglang_provider = PROVIDERS.get("sglang")
|
|
435
|
+
if sglang_provider and sglang_provider.example_models:
|
|
436
|
+
for model_id in sglang_provider.example_models[:3]:
|
|
437
|
+
models.append(
|
|
438
|
+
ModelInfo(
|
|
439
|
+
id=model_id,
|
|
440
|
+
name=model_id.split("/")[-1],
|
|
441
|
+
provider_id="sglang",
|
|
442
|
+
description="Example SGLang model (server not running)",
|
|
443
|
+
context_size=131072,
|
|
444
|
+
)
|
|
445
|
+
)
|
|
446
|
+
|
|
447
|
+
return models
|
|
448
|
+
|
|
449
|
+
def _format_size(self, size_bytes: int) -> str:
|
|
450
|
+
"""Format size in human readable format."""
|
|
451
|
+
if size_bytes >= 1024**3: # GB
|
|
452
|
+
return f"{size_bytes / 1024**3:.1f}GB"
|
|
453
|
+
elif size_bytes >= 1024**2: # MB
|
|
454
|
+
return f"{size_bytes / 1024**2:.1f}MB"
|
|
455
|
+
else: # KB
|
|
456
|
+
return f"{size_bytes / 1024:.1f}KB"
|
|
457
|
+
|
|
458
|
+
def list_providers(self) -> List[ProviderInfo]:
|
|
459
|
+
"""List available LLM providers with latest models."""
|
|
460
|
+
providers = []
|
|
461
|
+
|
|
462
|
+
# Local & Self-hosted Models
|
|
463
|
+
ollama_models = self._get_ollama_models()
|
|
464
|
+
providers.append(
|
|
465
|
+
ProviderInfo(
|
|
466
|
+
id="ollama",
|
|
467
|
+
name="Ollama",
|
|
468
|
+
description="Local models via Ollama daemon (privacy-focused, no API key required)",
|
|
469
|
+
requires_api_key=False,
|
|
470
|
+
configured=self._is_provider_configured("ollama"),
|
|
471
|
+
models=ollama_models,
|
|
472
|
+
)
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
# MLX (Apple Silicon) Models
|
|
476
|
+
mlx_models = self._get_mlx_models()
|
|
477
|
+
providers.append(
|
|
478
|
+
ProviderInfo(
|
|
479
|
+
id="mlx",
|
|
480
|
+
name="MLX (Apple Silicon)",
|
|
481
|
+
description="Local MLX models optimized for Apple Silicon (requires mlx_lm.server)",
|
|
482
|
+
requires_api_key=False,
|
|
483
|
+
configured=self._is_provider_configured("mlx"),
|
|
484
|
+
models=mlx_models,
|
|
485
|
+
)
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
# vLLM (Experimental) Models
|
|
489
|
+
vllm_models = self._get_vllm_models()
|
|
490
|
+
providers.append(
|
|
491
|
+
ProviderInfo(
|
|
492
|
+
id="vllm",
|
|
493
|
+
name="vLLM (Experimental)",
|
|
494
|
+
description="High-throughput local inference with PagedAttention [EXPERIMENTAL]",
|
|
495
|
+
requires_api_key=False,
|
|
496
|
+
configured=self._is_provider_configured("vllm"),
|
|
497
|
+
models=vllm_models,
|
|
498
|
+
)
|
|
499
|
+
)
|
|
500
|
+
|
|
501
|
+
# SGLang (Experimental) Models
|
|
502
|
+
sglang_models = self._get_sglang_models()
|
|
503
|
+
providers.append(
|
|
504
|
+
ProviderInfo(
|
|
505
|
+
id="sglang",
|
|
506
|
+
name="SGLang (Experimental)",
|
|
507
|
+
description="Fast structured generation with RadixAttention [EXPERIMENTAL]",
|
|
508
|
+
requires_api_key=False,
|
|
509
|
+
configured=self._is_provider_configured("sglang"),
|
|
510
|
+
models=sglang_models,
|
|
511
|
+
)
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
# US Labs - Premium Cloud Models
|
|
515
|
+
providers.append(
|
|
516
|
+
ProviderInfo(
|
|
517
|
+
id="openai",
|
|
518
|
+
name="OpenAI",
|
|
519
|
+
description="Latest GPT-5.2, GPT-5.1, o1 models from models.dev",
|
|
520
|
+
requires_api_key=True,
|
|
521
|
+
configured=self._is_provider_configured("openai"),
|
|
522
|
+
models=[
|
|
523
|
+
ModelInfo("gpt-5.2", "GPT-5.2 (Latest)", "openai", context_size=256000),
|
|
524
|
+
ModelInfo("gpt-5.2-pro", "GPT-5.2 Pro", "openai", context_size=256000),
|
|
525
|
+
ModelInfo("gpt-5.2-codex", "GPT-5.2 Codex", "openai", context_size=256000),
|
|
526
|
+
ModelInfo("gpt-5.1", "GPT-5.1", "openai", context_size=200000),
|
|
527
|
+
ModelInfo("gpt-5.1-codex", "GPT-5.1 Codex", "openai", context_size=200000),
|
|
528
|
+
ModelInfo(
|
|
529
|
+
"gpt-5.1-codex-mini", "GPT-5.1 Codex Mini", "openai", context_size=200000
|
|
530
|
+
),
|
|
531
|
+
ModelInfo(
|
|
532
|
+
"gpt-4o-2024-11-20", "GPT-4o (Nov 2024)", "openai", context_size=128000
|
|
533
|
+
),
|
|
534
|
+
ModelInfo("gpt-4o", "GPT-4o", "openai", context_size=128000),
|
|
535
|
+
ModelInfo("gpt-4o-mini", "GPT-4o Mini", "openai", context_size=128000),
|
|
536
|
+
ModelInfo("o1", "o1 (Reasoning)", "openai", context_size=200000),
|
|
537
|
+
ModelInfo("o1-mini", "o1 Mini", "openai", context_size=128000),
|
|
538
|
+
ModelInfo("gpt-4-turbo", "GPT-4 Turbo", "openai", context_size=128000),
|
|
539
|
+
],
|
|
540
|
+
)
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
providers.append(
|
|
544
|
+
ProviderInfo(
|
|
545
|
+
id="anthropic",
|
|
546
|
+
name="Anthropic",
|
|
547
|
+
description="Latest Claude 4.5 models from models.dev",
|
|
548
|
+
requires_api_key=True,
|
|
549
|
+
configured=self._is_provider_configured("anthropic"),
|
|
550
|
+
models=[
|
|
551
|
+
ModelInfo(
|
|
552
|
+
"claude-opus-4-5-20251101",
|
|
553
|
+
"Claude Opus 4.5 (Latest)",
|
|
554
|
+
"anthropic",
|
|
555
|
+
context_size=200000,
|
|
556
|
+
),
|
|
557
|
+
ModelInfo(
|
|
558
|
+
"claude-sonnet-4-5-20250929",
|
|
559
|
+
"Claude Sonnet 4.5",
|
|
560
|
+
"anthropic",
|
|
561
|
+
context_size=200000,
|
|
562
|
+
),
|
|
563
|
+
ModelInfo(
|
|
564
|
+
"claude-haiku-4-5-20251001",
|
|
565
|
+
"Claude Haiku 4.5",
|
|
566
|
+
"anthropic",
|
|
567
|
+
context_size=200000,
|
|
568
|
+
),
|
|
569
|
+
ModelInfo(
|
|
570
|
+
"claude-sonnet-4-20250514",
|
|
571
|
+
"Claude Sonnet 4",
|
|
572
|
+
"anthropic",
|
|
573
|
+
context_size=200000,
|
|
574
|
+
),
|
|
575
|
+
ModelInfo(
|
|
576
|
+
"claude-opus-4-20250514", "Claude Opus 4", "anthropic", context_size=200000
|
|
577
|
+
),
|
|
578
|
+
ModelInfo(
|
|
579
|
+
"claude-haiku-4-20250514",
|
|
580
|
+
"Claude Haiku 4",
|
|
581
|
+
"anthropic",
|
|
582
|
+
context_size=200000,
|
|
583
|
+
),
|
|
584
|
+
],
|
|
585
|
+
)
|
|
586
|
+
)
|
|
587
|
+
|
|
588
|
+
providers.append(
|
|
589
|
+
ProviderInfo(
|
|
590
|
+
id="google",
|
|
591
|
+
name="Google",
|
|
592
|
+
description="Latest Gemini 3.x models from models.dev",
|
|
593
|
+
requires_api_key=True,
|
|
594
|
+
configured=self._is_provider_configured("google"),
|
|
595
|
+
models=[
|
|
596
|
+
ModelInfo(
|
|
597
|
+
"gemini-3-pro-preview",
|
|
598
|
+
"Gemini 3 Pro Preview (Latest)",
|
|
599
|
+
"google",
|
|
600
|
+
context_size=2000000,
|
|
601
|
+
),
|
|
602
|
+
ModelInfo(
|
|
603
|
+
"gemini-3-flash-preview",
|
|
604
|
+
"Gemini 3 Flash Preview (Latest)",
|
|
605
|
+
"google",
|
|
606
|
+
context_size=1000000,
|
|
607
|
+
),
|
|
608
|
+
ModelInfo("gemini-2.5-pro", "Gemini 2.5 Pro", "google", context_size=2000000),
|
|
609
|
+
ModelInfo(
|
|
610
|
+
"gemini-2.5-flash", "Gemini 2.5 Flash", "google", context_size=1000000
|
|
611
|
+
),
|
|
612
|
+
ModelInfo(
|
|
613
|
+
"gemini-2.0-flash", "Gemini 2.0 Flash", "google", context_size=1000000
|
|
614
|
+
),
|
|
615
|
+
],
|
|
616
|
+
)
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
providers.append(
|
|
620
|
+
ProviderInfo(
|
|
621
|
+
id="xai",
|
|
622
|
+
name="xAI",
|
|
623
|
+
description="Latest Grok models",
|
|
624
|
+
requires_api_key=True,
|
|
625
|
+
configured=self._is_provider_configured("xai"),
|
|
626
|
+
models=[
|
|
627
|
+
ModelInfo("grok-3", "Grok-3 (Latest)", "xai", context_size=262144),
|
|
628
|
+
ModelInfo("grok-3-mini", "Grok-3 Mini", "xai", context_size=131072),
|
|
629
|
+
ModelInfo("grok-2", "Grok-2", "xai", context_size=131072),
|
|
630
|
+
ModelInfo("grok-beta", "Grok Beta", "xai", context_size=131072),
|
|
631
|
+
],
|
|
632
|
+
)
|
|
633
|
+
)
|
|
634
|
+
|
|
635
|
+
# Other Labs & Providers
|
|
636
|
+
providers.append(
|
|
637
|
+
ProviderInfo(
|
|
638
|
+
id="groq",
|
|
639
|
+
name="Groq",
|
|
640
|
+
description="Ultra-fast inference for open-source models",
|
|
641
|
+
requires_api_key=True,
|
|
642
|
+
configured=self._is_provider_configured("groq"),
|
|
643
|
+
models=[
|
|
644
|
+
ModelInfo(
|
|
645
|
+
"llama-3.1-8b-instant", "Llama 3.1 8B Instant", "groq", context_size=131072
|
|
646
|
+
),
|
|
647
|
+
ModelInfo(
|
|
648
|
+
"llama-3.1-70b-versatile",
|
|
649
|
+
"Llama 3.1 70B Versatile",
|
|
650
|
+
"groq",
|
|
651
|
+
context_size=131072,
|
|
652
|
+
),
|
|
653
|
+
ModelInfo(
|
|
654
|
+
"llama-3.1-405b-instruct", "Llama 3.1 405B", "groq", context_size=131072
|
|
655
|
+
),
|
|
656
|
+
ModelInfo("llama3-8b-8192", "Llama 3 8B", "groq", context_size=8192),
|
|
657
|
+
ModelInfo("llama3-70b-8192", "Llama 3 70B", "groq", context_size=8192),
|
|
658
|
+
ModelInfo("mixtral-8x7b-32768", "Mixtral 8x7B", "groq", context_size=32768),
|
|
659
|
+
ModelInfo("gemma2-9b-it", "Gemma 2 9B", "groq", context_size=8192),
|
|
660
|
+
ModelInfo("llama2-70b-4096", "Llama 2 70B", "groq", context_size=4096),
|
|
661
|
+
],
|
|
662
|
+
)
|
|
663
|
+
)
|
|
664
|
+
|
|
665
|
+
providers.append(
|
|
666
|
+
ProviderInfo(
|
|
667
|
+
id="openrouter",
|
|
668
|
+
name="OpenRouter",
|
|
669
|
+
description="Unified API for 100+ LLMs (Claude, GPT-4, Llama, etc.)",
|
|
670
|
+
requires_api_key=True,
|
|
671
|
+
configured=self._is_provider_configured("openrouter"),
|
|
672
|
+
models=[
|
|
673
|
+
ModelInfo(
|
|
674
|
+
"anthropic/claude-3.5-sonnet",
|
|
675
|
+
"Claude 3.5 Sonnet",
|
|
676
|
+
"openrouter",
|
|
677
|
+
context_size=200000,
|
|
678
|
+
),
|
|
679
|
+
ModelInfo("openai/gpt-4o", "GPT-4o", "openrouter", context_size=128000),
|
|
680
|
+
ModelInfo(
|
|
681
|
+
"openai/gpt-4o-mini", "GPT-4o Mini", "openrouter", context_size=128000
|
|
682
|
+
),
|
|
683
|
+
ModelInfo("openai/o1-preview", "o1 Preview", "openrouter", context_size=128000),
|
|
684
|
+
ModelInfo("openai/o1-mini", "o1 Mini", "openrouter", context_size=128000),
|
|
685
|
+
ModelInfo(
|
|
686
|
+
"meta-llama/llama-3.1-405b-instruct",
|
|
687
|
+
"Llama 3.1 405B",
|
|
688
|
+
"openrouter",
|
|
689
|
+
context_size=131072,
|
|
690
|
+
),
|
|
691
|
+
ModelInfo(
|
|
692
|
+
"meta-llama/llama-3.1-70b-instruct",
|
|
693
|
+
"Llama 3.1 70B",
|
|
694
|
+
"openrouter",
|
|
695
|
+
context_size=131072,
|
|
696
|
+
),
|
|
697
|
+
ModelInfo(
|
|
698
|
+
"google/gemini-pro-1.5",
|
|
699
|
+
"Gemini Pro 1.5",
|
|
700
|
+
"openrouter",
|
|
701
|
+
context_size=2097152,
|
|
702
|
+
),
|
|
703
|
+
ModelInfo(
|
|
704
|
+
"mistralai/mistral-7b-instruct",
|
|
705
|
+
"Mistral 7B",
|
|
706
|
+
"openrouter",
|
|
707
|
+
context_size=32768,
|
|
708
|
+
),
|
|
709
|
+
ModelInfo(
|
|
710
|
+
"anthropic/claude-3-haiku",
|
|
711
|
+
"Claude 3 Haiku",
|
|
712
|
+
"openrouter",
|
|
713
|
+
context_size=200000,
|
|
714
|
+
),
|
|
715
|
+
],
|
|
716
|
+
)
|
|
717
|
+
)
|
|
718
|
+
|
|
719
|
+
providers.append(
|
|
720
|
+
ProviderInfo(
|
|
721
|
+
id="github-copilot",
|
|
722
|
+
name="GitHub Copilot",
|
|
723
|
+
description="GitHub Copilot models (Claude, GPT-4, etc.)",
|
|
724
|
+
requires_api_key=True,
|
|
725
|
+
configured=self._is_provider_configured("github-copilot"),
|
|
726
|
+
models=[
|
|
727
|
+
ModelInfo("gpt-4", "GPT-4 (Copilot)", "github-copilot", context_size=8192),
|
|
728
|
+
ModelInfo(
|
|
729
|
+
"claude-3.5-sonnet",
|
|
730
|
+
"Claude 3.5 Sonnet (Copilot)",
|
|
731
|
+
"github-copilot",
|
|
732
|
+
context_size=200000,
|
|
733
|
+
),
|
|
734
|
+
],
|
|
735
|
+
)
|
|
736
|
+
)
|
|
737
|
+
|
|
738
|
+
providers.append(
|
|
739
|
+
ProviderInfo(
|
|
740
|
+
id="together",
|
|
741
|
+
name="Together AI",
|
|
742
|
+
description="High-performance open-source models",
|
|
743
|
+
requires_api_key=True,
|
|
744
|
+
configured=self._is_provider_configured("together"),
|
|
745
|
+
models=[
|
|
746
|
+
ModelInfo(
|
|
747
|
+
"meta-llama/Llama-3.1-405B-Instruct-Turbo",
|
|
748
|
+
"Llama 3.1 405B Turbo",
|
|
749
|
+
"together",
|
|
750
|
+
context_size=131072,
|
|
751
|
+
),
|
|
752
|
+
ModelInfo(
|
|
753
|
+
"meta-llama/Llama-3.1-70B-Instruct-Turbo",
|
|
754
|
+
"Llama 3.1 70B Turbo",
|
|
755
|
+
"together",
|
|
756
|
+
context_size=131072,
|
|
757
|
+
),
|
|
758
|
+
ModelInfo(
|
|
759
|
+
"meta-llama/Llama-3.1-8B-Instruct-Turbo",
|
|
760
|
+
"Llama 3.1 8B Turbo",
|
|
761
|
+
"together",
|
|
762
|
+
context_size=131072,
|
|
763
|
+
),
|
|
764
|
+
ModelInfo(
|
|
765
|
+
"meta-llama/Llama-3-70B-Instruct-Turbo",
|
|
766
|
+
"Llama 3 70B Turbo",
|
|
767
|
+
"together",
|
|
768
|
+
context_size=8192,
|
|
769
|
+
),
|
|
770
|
+
ModelInfo(
|
|
771
|
+
"meta-llama/Llama-3-8B-Instruct-Turbo",
|
|
772
|
+
"Llama 3 8B Turbo",
|
|
773
|
+
"together",
|
|
774
|
+
context_size=8192,
|
|
775
|
+
),
|
|
776
|
+
ModelInfo(
|
|
777
|
+
"mistralai/Mistral-7B-Instruct-v0.1",
|
|
778
|
+
"Mistral 7B",
|
|
779
|
+
"together",
|
|
780
|
+
context_size=32768,
|
|
781
|
+
),
|
|
782
|
+
ModelInfo(
|
|
783
|
+
"mistralai/Mixtral-8x7B-Instruct-v0.1",
|
|
784
|
+
"Mixtral 8x7B",
|
|
785
|
+
"together",
|
|
786
|
+
context_size=32768,
|
|
787
|
+
),
|
|
788
|
+
ModelInfo(
|
|
789
|
+
"mistralai/Mistral-7B-Instruct-v0.2",
|
|
790
|
+
"Mistral 7B v0.2",
|
|
791
|
+
"together",
|
|
792
|
+
context_size=32768,
|
|
793
|
+
),
|
|
794
|
+
ModelInfo(
|
|
795
|
+
"Qwen/Qwen2-72B-Instruct", "Qwen2 72B", "together", context_size=32768
|
|
796
|
+
),
|
|
797
|
+
ModelInfo(
|
|
798
|
+
"codellama/CodeLlama-34b-Instruct-hf",
|
|
799
|
+
"Code Llama 34B",
|
|
800
|
+
"together",
|
|
801
|
+
context_size=16384,
|
|
802
|
+
),
|
|
803
|
+
ModelInfo(
|
|
804
|
+
"codellama/CodeLlama-13b-Instruct-hf",
|
|
805
|
+
"Code Llama 13B",
|
|
806
|
+
"together",
|
|
807
|
+
context_size=16384,
|
|
808
|
+
),
|
|
809
|
+
],
|
|
810
|
+
)
|
|
811
|
+
)
|
|
812
|
+
|
|
813
|
+
providers.append(
|
|
814
|
+
ProviderInfo(
|
|
815
|
+
id="deepinfra",
|
|
816
|
+
name="DeepInfra",
|
|
817
|
+
description="Fast inference for open source models",
|
|
818
|
+
requires_api_key=True,
|
|
819
|
+
configured=self._is_provider_configured("deepinfra"),
|
|
820
|
+
models=[
|
|
821
|
+
ModelInfo(
|
|
822
|
+
"meta-llama/Llama-2-70b-chat-hf",
|
|
823
|
+
"Llama 2 70B",
|
|
824
|
+
"deepinfra",
|
|
825
|
+
context_size=4096,
|
|
826
|
+
),
|
|
827
|
+
ModelInfo(
|
|
828
|
+
"meta-llama/Llama-2-13b-chat-hf",
|
|
829
|
+
"Llama 2 13B",
|
|
830
|
+
"deepinfra",
|
|
831
|
+
context_size=4096,
|
|
832
|
+
),
|
|
833
|
+
ModelInfo(
|
|
834
|
+
"codellama/CodeLlama-34b-Instruct-hf",
|
|
835
|
+
"Code Llama 34B",
|
|
836
|
+
"deepinfra",
|
|
837
|
+
context_size=16384,
|
|
838
|
+
),
|
|
839
|
+
ModelInfo(
|
|
840
|
+
"jondurbin/airoboros-l2-70b-gpt4-1.4.1",
|
|
841
|
+
"Airoboros 70B",
|
|
842
|
+
"deepinfra",
|
|
843
|
+
context_size=4096,
|
|
844
|
+
),
|
|
845
|
+
],
|
|
846
|
+
)
|
|
847
|
+
)
|
|
848
|
+
|
|
849
|
+
providers.append(
|
|
850
|
+
ProviderInfo(
|
|
851
|
+
id="perplexity",
|
|
852
|
+
name="Perplexity",
|
|
853
|
+
description="Perplexity models (Sonar, etc.)",
|
|
854
|
+
requires_api_key=True,
|
|
855
|
+
configured=self._is_provider_configured("perplexity"),
|
|
856
|
+
models=[
|
|
857
|
+
ModelInfo("sonar-pro", "Sonar Pro", "perplexity", context_size=200000),
|
|
858
|
+
ModelInfo("sonar", "Sonar", "perplexity", context_size=127072),
|
|
859
|
+
],
|
|
860
|
+
)
|
|
861
|
+
)
|
|
862
|
+
|
|
863
|
+
providers.append(
|
|
864
|
+
ProviderInfo(
|
|
865
|
+
id="mistral",
|
|
866
|
+
name="Mistral AI",
|
|
867
|
+
description="Mistral models (Mistral Large, Medium, Small, etc.)",
|
|
868
|
+
requires_api_key=True,
|
|
869
|
+
configured=self._is_provider_configured("mistral"),
|
|
870
|
+
models=[
|
|
871
|
+
ModelInfo(
|
|
872
|
+
"mistral-large-latest", "Mistral Large", "mistral", context_size=128000
|
|
873
|
+
),
|
|
874
|
+
ModelInfo("mistral-medium", "Mistral Medium", "mistral", context_size=32768),
|
|
875
|
+
ModelInfo("mistral-small", "Mistral Small", "mistral", context_size=32768),
|
|
876
|
+
ModelInfo("codestral-latest", "Codestral", "mistral", context_size=32768),
|
|
877
|
+
],
|
|
878
|
+
)
|
|
879
|
+
)
|
|
880
|
+
|
|
881
|
+
providers.append(
|
|
882
|
+
ProviderInfo(
|
|
883
|
+
id="cerebras",
|
|
884
|
+
name="Cerebras",
|
|
885
|
+
description="Cerebras models (Llama, etc.)",
|
|
886
|
+
requires_api_key=True,
|
|
887
|
+
configured=self._is_provider_configured("cerebras"),
|
|
888
|
+
models=[
|
|
889
|
+
ModelInfo("llama3.1-8b", "Llama 3.1 8B", "cerebras", context_size=8192),
|
|
890
|
+
ModelInfo("llama3.1-70b", "Llama 3.1 70B", "cerebras", context_size=8192),
|
|
891
|
+
],
|
|
892
|
+
)
|
|
893
|
+
)
|
|
894
|
+
|
|
895
|
+
# Meta AI (Llama models)
|
|
896
|
+
providers.append(
|
|
897
|
+
ProviderInfo(
|
|
898
|
+
id="meta",
|
|
899
|
+
name="Meta AI",
|
|
900
|
+
description="Latest Llama 4 models from models.dev",
|
|
901
|
+
requires_api_key=True,
|
|
902
|
+
configured=self._is_provider_configured("meta"),
|
|
903
|
+
models=[
|
|
904
|
+
ModelInfo(
|
|
905
|
+
"meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
|
|
906
|
+
"Llama 4 Maverick 17B (Latest)",
|
|
907
|
+
"meta",
|
|
908
|
+
context_size=262144,
|
|
909
|
+
),
|
|
910
|
+
ModelInfo(
|
|
911
|
+
"llama-3.3-70b-versatile",
|
|
912
|
+
"Llama 3.3 70B Versatile",
|
|
913
|
+
"meta",
|
|
914
|
+
context_size=131072,
|
|
915
|
+
),
|
|
916
|
+
ModelInfo(
|
|
917
|
+
"llama-3.1-405b-instruct", "Llama 3.1 405B", "meta", context_size=131072
|
|
918
|
+
),
|
|
919
|
+
ModelInfo(
|
|
920
|
+
"llama-3.1-70b-instruct", "Llama 3.1 70B", "meta", context_size=131072
|
|
921
|
+
),
|
|
922
|
+
],
|
|
923
|
+
)
|
|
924
|
+
)
|
|
925
|
+
|
|
926
|
+
# Chinese AI Providers
|
|
927
|
+
providers.append(
|
|
928
|
+
ProviderInfo(
|
|
929
|
+
id="qwen",
|
|
930
|
+
name="Alibaba Qwen",
|
|
931
|
+
description="Latest Qwen3 models from models.dev - Alibaba Cloud",
|
|
932
|
+
requires_api_key=True,
|
|
933
|
+
configured=self._is_provider_configured("qwen"),
|
|
934
|
+
models=[
|
|
935
|
+
ModelInfo("qwen3-max", "Qwen3 Max (Latest)", "qwen", context_size=262144),
|
|
936
|
+
ModelInfo(
|
|
937
|
+
"qwen3-coder-480b-a35b-instruct",
|
|
938
|
+
"Qwen3 Coder 480B",
|
|
939
|
+
"qwen",
|
|
940
|
+
context_size=131072,
|
|
941
|
+
),
|
|
942
|
+
ModelInfo("qwen-flash", "Qwen Flash", "qwen", context_size=32768),
|
|
943
|
+
ModelInfo("qwen2.5-72b-instruct", "Qwen2.5 72B", "qwen", context_size=32768),
|
|
944
|
+
ModelInfo(
|
|
945
|
+
"qwen2.5-coder-32b-instruct",
|
|
946
|
+
"Qwen2.5 Coder 32B",
|
|
947
|
+
"qwen",
|
|
948
|
+
context_size=32768,
|
|
949
|
+
),
|
|
950
|
+
],
|
|
951
|
+
)
|
|
952
|
+
)
|
|
953
|
+
|
|
954
|
+
providers.append(
|
|
955
|
+
ProviderInfo(
|
|
956
|
+
id="deepseek",
|
|
957
|
+
name="DeepSeek",
|
|
958
|
+
description="Latest DeepSeek V3.2, R1 models from models.dev",
|
|
959
|
+
requires_api_key=True,
|
|
960
|
+
configured=self._is_provider_configured("deepseek"),
|
|
961
|
+
models=[
|
|
962
|
+
ModelInfo(
|
|
963
|
+
"deepseek-ai/DeepSeek-V3.2",
|
|
964
|
+
"DeepSeek V3.2 (Latest)",
|
|
965
|
+
"deepseek",
|
|
966
|
+
context_size=128000,
|
|
967
|
+
),
|
|
968
|
+
ModelInfo(
|
|
969
|
+
"deepseek-ai/DeepSeek-R1",
|
|
970
|
+
"DeepSeek R1 (Reasoning)",
|
|
971
|
+
"deepseek",
|
|
972
|
+
context_size=64000,
|
|
973
|
+
),
|
|
974
|
+
ModelInfo(
|
|
975
|
+
"deepseek-chat", "DeepSeek Chat (V3)", "deepseek", context_size=64000
|
|
976
|
+
),
|
|
977
|
+
ModelInfo("deepseek-coder", "DeepSeek Coder", "deepseek", context_size=64000),
|
|
978
|
+
ModelInfo(
|
|
979
|
+
"deepseek-reasoner", "DeepSeek Reasoner", "deepseek", context_size=64000
|
|
980
|
+
),
|
|
981
|
+
],
|
|
982
|
+
)
|
|
983
|
+
)
|
|
984
|
+
|
|
985
|
+
providers.append(
|
|
986
|
+
ProviderInfo(
|
|
987
|
+
id="zhipu",
|
|
988
|
+
name="Zhipu AI",
|
|
989
|
+
description="GLM models (GLM-4, ChatGLM, etc.) - Tsinghua University",
|
|
990
|
+
requires_api_key=True,
|
|
991
|
+
configured=self._is_provider_configured("zhipu"),
|
|
992
|
+
models=[
|
|
993
|
+
ModelInfo("glm-4", "GLM-4", "zhipu", context_size=128000),
|
|
994
|
+
ModelInfo("glm-3-turbo", "GLM-3 Turbo", "zhipu", context_size=128000),
|
|
995
|
+
ModelInfo("chatglm_turbo", "ChatGLM Turbo", "zhipu", context_size=32768),
|
|
996
|
+
],
|
|
997
|
+
)
|
|
998
|
+
)
|
|
999
|
+
|
|
1000
|
+
providers.append(
|
|
1001
|
+
ProviderInfo(
|
|
1002
|
+
id="moonshot",
|
|
1003
|
+
name="Moonshot AI",
|
|
1004
|
+
description="Kimi models (Kimi-2, Kimi-K2, etc.)",
|
|
1005
|
+
requires_api_key=True,
|
|
1006
|
+
configured=self._is_provider_configured("moonshot"),
|
|
1007
|
+
models=[
|
|
1008
|
+
ModelInfo("moonshot-v1-8k", "Moonshot v1 8K", "moonshot", context_size=8192),
|
|
1009
|
+
ModelInfo("moonshot-v1-32k", "Moonshot v1 32K", "moonshot", context_size=32768),
|
|
1010
|
+
ModelInfo(
|
|
1011
|
+
"moonshot-v1-128k", "Moonshot v1 128K", "moonshot", context_size=131072
|
|
1012
|
+
),
|
|
1013
|
+
],
|
|
1014
|
+
)
|
|
1015
|
+
)
|
|
1016
|
+
|
|
1017
|
+
providers.append(
|
|
1018
|
+
ProviderInfo(
|
|
1019
|
+
id="minimax",
|
|
1020
|
+
name="MiniMax",
|
|
1021
|
+
description="MiniMax models (abab-6, abab-6.5, etc.)",
|
|
1022
|
+
requires_api_key=True,
|
|
1023
|
+
configured=self._is_provider_configured("minimax"),
|
|
1024
|
+
models=[
|
|
1025
|
+
ModelInfo("abab-6-5-chat", "abab-6.5 Chat", "minimax", context_size=24576),
|
|
1026
|
+
ModelInfo("abab-6-chat", "abab-6 Chat", "minimax", context_size=8192),
|
|
1027
|
+
],
|
|
1028
|
+
)
|
|
1029
|
+
)
|
|
1030
|
+
|
|
1031
|
+
providers.append(
|
|
1032
|
+
ProviderInfo(
|
|
1033
|
+
id="baidu",
|
|
1034
|
+
name="Baidu",
|
|
1035
|
+
description="Ernie models (ERNIE-4.0, ERNIE-3.5, etc.)",
|
|
1036
|
+
requires_api_key=True,
|
|
1037
|
+
configured=self._is_provider_configured("baidu"),
|
|
1038
|
+
models=[
|
|
1039
|
+
ModelInfo("ernie-4.0-8k", "ERNIE-4.0 8K", "baidu", context_size=8192),
|
|
1040
|
+
ModelInfo("ernie-3.5-8k", "ERNIE-3.5 8K", "baidu", context_size=8192),
|
|
1041
|
+
ModelInfo("ernie-speed-8k", "ERNIE Speed 8K", "baidu", context_size=8192),
|
|
1042
|
+
],
|
|
1043
|
+
)
|
|
1044
|
+
)
|
|
1045
|
+
|
|
1046
|
+
providers.append(
|
|
1047
|
+
ProviderInfo(
|
|
1048
|
+
id="tencent",
|
|
1049
|
+
name="Tencent",
|
|
1050
|
+
description="Hunyuan models (Hunyuan-Lite, etc.)",
|
|
1051
|
+
requires_api_key=True,
|
|
1052
|
+
configured=self._is_provider_configured("tencent"),
|
|
1053
|
+
models=[
|
|
1054
|
+
ModelInfo("hunyuan-lite", "Hunyuan Lite", "tencent", context_size=32768),
|
|
1055
|
+
ModelInfo(
|
|
1056
|
+
"hunyuan-standard", "Hunyuan Standard", "tencent", context_size=32768
|
|
1057
|
+
),
|
|
1058
|
+
ModelInfo("hunyuan-pro", "Hunyuan Pro", "tencent", context_size=32768),
|
|
1059
|
+
],
|
|
1060
|
+
)
|
|
1061
|
+
)
|
|
1062
|
+
|
|
1063
|
+
providers.append(
|
|
1064
|
+
ProviderInfo(
|
|
1065
|
+
id="doubao",
|
|
1066
|
+
name="ByteDance Doubao",
|
|
1067
|
+
description="Doubao models (Doubao-Pro, Doubao-Lite, etc.)",
|
|
1068
|
+
requires_api_key=True,
|
|
1069
|
+
configured=self._is_provider_configured("doubao"),
|
|
1070
|
+
models=[
|
|
1071
|
+
ModelInfo("doubao-lite-4k", "Doubao Lite 4K", "doubao", context_size=4096),
|
|
1072
|
+
ModelInfo("doubao-lite-32k", "Doubao Lite 32K", "doubao", context_size=32768),
|
|
1073
|
+
ModelInfo("doubao-pro-4k", "Doubao Pro 4K", "doubao", context_size=4096),
|
|
1074
|
+
ModelInfo("doubao-pro-32k", "Doubao Pro 32K", "doubao", context_size=32768),
|
|
1075
|
+
],
|
|
1076
|
+
)
|
|
1077
|
+
)
|
|
1078
|
+
|
|
1079
|
+
providers.append(
|
|
1080
|
+
ProviderInfo(
|
|
1081
|
+
id="01-ai",
|
|
1082
|
+
name="01.AI",
|
|
1083
|
+
description="Yi models (Yi-1.5, Yi-34B, etc.)",
|
|
1084
|
+
requires_api_key=True,
|
|
1085
|
+
configured=self._is_provider_configured("01-ai"),
|
|
1086
|
+
models=[
|
|
1087
|
+
ModelInfo("yi-large", "Yi Large", "01-ai", context_size=32768),
|
|
1088
|
+
ModelInfo("yi-medium", "Yi Medium", "01-ai", context_size=16384),
|
|
1089
|
+
ModelInfo("yi-spark", "Yi Spark", "01-ai", context_size=16384),
|
|
1090
|
+
],
|
|
1091
|
+
)
|
|
1092
|
+
)
|
|
1093
|
+
|
|
1094
|
+
# Azure OpenAI
|
|
1095
|
+
providers.append(
|
|
1096
|
+
ProviderInfo(
|
|
1097
|
+
id="azure-openai",
|
|
1098
|
+
name="Azure OpenAI",
|
|
1099
|
+
description="Azure-hosted OpenAI models (GPT-4, GPT-3.5, etc.)",
|
|
1100
|
+
requires_api_key=True,
|
|
1101
|
+
configured=self._is_provider_configured("azure-openai"),
|
|
1102
|
+
models=[
|
|
1103
|
+
ModelInfo("gpt-4", "GPT-4 (Azure)", "azure-openai", context_size=8192),
|
|
1104
|
+
ModelInfo("gpt-4-32k", "GPT-4 32K (Azure)", "azure-openai", context_size=32768),
|
|
1105
|
+
ModelInfo(
|
|
1106
|
+
"gpt-35-turbo", "GPT-3.5 Turbo (Azure)", "azure-openai", context_size=4096
|
|
1107
|
+
),
|
|
1108
|
+
ModelInfo(
|
|
1109
|
+
"gpt-35-turbo-16k",
|
|
1110
|
+
"GPT-3.5 Turbo 16K (Azure)",
|
|
1111
|
+
"azure-openai",
|
|
1112
|
+
context_size=16384,
|
|
1113
|
+
),
|
|
1114
|
+
],
|
|
1115
|
+
)
|
|
1116
|
+
)
|
|
1117
|
+
|
|
1118
|
+
# Google Vertex AI
|
|
1119
|
+
providers.append(
|
|
1120
|
+
ProviderInfo(
|
|
1121
|
+
id="vertex-ai",
|
|
1122
|
+
name="Google Vertex AI",
|
|
1123
|
+
description="Google Vertex AI models (Gemini, PaLM, etc.)",
|
|
1124
|
+
requires_api_key=True,
|
|
1125
|
+
configured=self._is_provider_configured("vertex-ai"),
|
|
1126
|
+
models=[
|
|
1127
|
+
ModelInfo("gemini-pro", "Gemini Pro (Vertex)", "vertex-ai", context_size=32768),
|
|
1128
|
+
ModelInfo(
|
|
1129
|
+
"gemini-pro-vision",
|
|
1130
|
+
"Gemini Pro Vision (Vertex)",
|
|
1131
|
+
"vertex-ai",
|
|
1132
|
+
context_size=16384,
|
|
1133
|
+
),
|
|
1134
|
+
ModelInfo(
|
|
1135
|
+
"palm-2-chat-bison", "PaLM 2 Chat Bison", "vertex-ai", context_size=8192
|
|
1136
|
+
),
|
|
1137
|
+
ModelInfo(
|
|
1138
|
+
"palm-2-codechat-bison",
|
|
1139
|
+
"PaLM 2 CodeChat Bison",
|
|
1140
|
+
"vertex-ai",
|
|
1141
|
+
context_size=8192,
|
|
1142
|
+
),
|
|
1143
|
+
],
|
|
1144
|
+
)
|
|
1145
|
+
)
|
|
1146
|
+
|
|
1147
|
+
# OpenAI Compatible
|
|
1148
|
+
providers.append(
|
|
1149
|
+
ProviderInfo(
|
|
1150
|
+
id="openai-compatible",
|
|
1151
|
+
name="OpenAI Compatible",
|
|
1152
|
+
description="Any OpenAI-compatible API endpoint",
|
|
1153
|
+
requires_api_key=True,
|
|
1154
|
+
configured=self._is_provider_configured("openai-compatible"),
|
|
1155
|
+
models=[
|
|
1156
|
+
ModelInfo("gpt-4", "GPT-4 Compatible", "openai-compatible", context_size=8192),
|
|
1157
|
+
ModelInfo(
|
|
1158
|
+
"gpt-3.5-turbo",
|
|
1159
|
+
"GPT-3.5 Turbo Compatible",
|
|
1160
|
+
"openai-compatible",
|
|
1161
|
+
context_size=4096,
|
|
1162
|
+
),
|
|
1163
|
+
ModelInfo(
|
|
1164
|
+
"claude-3-sonnet",
|
|
1165
|
+
"Claude 3 Sonnet Compatible",
|
|
1166
|
+
"openai-compatible",
|
|
1167
|
+
context_size=200000,
|
|
1168
|
+
),
|
|
1169
|
+
],
|
|
1170
|
+
)
|
|
1171
|
+
)
|
|
1172
|
+
|
|
1173
|
+
# Sort by priority
|
|
1174
|
+
providers.sort(key=lambda p: self.PROVIDER_PRIORITY.get(p.id, 99))
|
|
1175
|
+
|
|
1176
|
+
return providers
|
|
1177
|
+
|
|
1178
|
+
def get_models(self, provider_id: str, refresh: bool = False) -> List[ModelInfo]:
|
|
1179
|
+
"""Get available models for a provider."""
|
|
1180
|
+
# Return basic model list for now
|
|
1181
|
+
provider = next((p for p in self.list_providers() if p.id == provider_id), None)
|
|
1182
|
+
return provider.models if provider else []
|
|
1183
|
+
|
|
1184
|
+
def test_connection(
|
|
1185
|
+
self, provider_id: str, model_id: Optional[str] = None
|
|
1186
|
+
) -> tuple[bool, Optional[str]]:
|
|
1187
|
+
"""Test connection to a provider and optionally a specific model."""
|
|
1188
|
+
try:
|
|
1189
|
+
# For Ollama, we don't need to test connection
|
|
1190
|
+
if provider_id == "ollama":
|
|
1191
|
+
if model_id:
|
|
1192
|
+
# Test if the specific Ollama model exists
|
|
1193
|
+
try:
|
|
1194
|
+
models = self.get_models(provider_id)
|
|
1195
|
+
if any(m.id == model_id for m in models):
|
|
1196
|
+
return True, None
|
|
1197
|
+
else:
|
|
1198
|
+
return (
|
|
1199
|
+
False,
|
|
1200
|
+
f"Model '{model_id}' not found. Available models: {', '.join([m.id for m in models[:5]])}",
|
|
1201
|
+
)
|
|
1202
|
+
except Exception as e:
|
|
1203
|
+
return False, f"Failed to check Ollama models: {str(e)}"
|
|
1204
|
+
return True, None
|
|
1205
|
+
|
|
1206
|
+
# Test specific model if provided
|
|
1207
|
+
if model_id:
|
|
1208
|
+
try:
|
|
1209
|
+
# Try a minimal chat completion to test the model
|
|
1210
|
+
messages = [{"role": "user", "content": "Hi"}]
|
|
1211
|
+
response = self.chat_completion(provider_id, model_id, messages, max_tokens=5)
|
|
1212
|
+
if response and response.strip():
|
|
1213
|
+
return True, None
|
|
1214
|
+
else:
|
|
1215
|
+
return False, f"Model '{model_id}' returned empty response"
|
|
1216
|
+
except Exception as e:
|
|
1217
|
+
error_msg = str(e).lower()
|
|
1218
|
+
if "model not found" in error_msg or "invalid model" in error_msg:
|
|
1219
|
+
return False, f"Model '{model_id}' not found or not available"
|
|
1220
|
+
elif "authentication" in error_msg or "api key" in error_msg:
|
|
1221
|
+
return False, f"Authentication failed for provider '{provider_id}'"
|
|
1222
|
+
elif "rate limit" in error_msg:
|
|
1223
|
+
return False, f"Rate limit exceeded for provider '{provider_id}'"
|
|
1224
|
+
else:
|
|
1225
|
+
return False, f"Model '{model_id}' failed: {str(e)}"
|
|
1226
|
+
|
|
1227
|
+
# For other providers without specific model, check API key first
|
|
1228
|
+
if not self._is_provider_configured(provider_id):
|
|
1229
|
+
key_mapping = {
|
|
1230
|
+
"openai": "OPENAI_API_KEY",
|
|
1231
|
+
"anthropic": "ANTHROPIC_API_KEY",
|
|
1232
|
+
"google": "GOOGLE_API_KEY or GEMINI_API_KEY", # Google supports both
|
|
1233
|
+
"xai": "XAI_API_KEY",
|
|
1234
|
+
"groq": "GROQ_API_KEY",
|
|
1235
|
+
"openrouter": "OPENROUTER_API_KEY",
|
|
1236
|
+
"qwen": "DASHSCOPE_API_KEY",
|
|
1237
|
+
"deepseek": "DEEPSEEK_API_KEY",
|
|
1238
|
+
"github-copilot": "GITHUB_TOKEN",
|
|
1239
|
+
"together": "TOGETHER_API_KEY",
|
|
1240
|
+
"deepinfra": "DEEPINFRA_API_KEY",
|
|
1241
|
+
"perplexity": "PERPLEXITY_API_KEY",
|
|
1242
|
+
"mistral": "MISTRAL_API_KEY",
|
|
1243
|
+
"cerebras": "CEREBRAS_API_KEY",
|
|
1244
|
+
"zhipu": "ZHIPU_API_KEY",
|
|
1245
|
+
"moonshot": "MOONSHOT_API_KEY",
|
|
1246
|
+
"minimax": "MINIMAX_API_KEY",
|
|
1247
|
+
"baidu": "BAIDU_API_KEY",
|
|
1248
|
+
"tencent": "TENCENT_API_KEY",
|
|
1249
|
+
"doubao": "DOUBAO_API_KEY",
|
|
1250
|
+
"01-ai": "ZEROONE_API_KEY",
|
|
1251
|
+
"azure-openai": "AZURE_OPENAI_API_KEY",
|
|
1252
|
+
"vertex-ai": "GOOGLE_APPLICATION_CREDENTIALS",
|
|
1253
|
+
"openai-compatible": "OPENAI_COMPATIBLE_API_KEY",
|
|
1254
|
+
}
|
|
1255
|
+
env_var = key_mapping.get(provider_id)
|
|
1256
|
+
if env_var:
|
|
1257
|
+
return False, f"API key not set. Please set {env_var} environment variable."
|
|
1258
|
+
else:
|
|
1259
|
+
return False, f"Provider '{provider_id}' requires API key configuration."
|
|
1260
|
+
|
|
1261
|
+
# Try to get models - this will also validate the API key works
|
|
1262
|
+
models = self.get_models(provider_id)
|
|
1263
|
+
if models:
|
|
1264
|
+
# If we have models, try a quick test with the first model to validate API key
|
|
1265
|
+
try:
|
|
1266
|
+
test_model = models[0].id
|
|
1267
|
+
messages = [{"role": "user", "content": "Hi"}]
|
|
1268
|
+
response = self.chat_completion(provider_id, test_model, messages, max_tokens=5)
|
|
1269
|
+
if response and response.strip():
|
|
1270
|
+
return True, None
|
|
1271
|
+
else:
|
|
1272
|
+
return False, f"API key validation failed - received empty response"
|
|
1273
|
+
except Exception as e:
|
|
1274
|
+
error_msg = str(e).lower()
|
|
1275
|
+
if (
|
|
1276
|
+
"authentication" in error_msg
|
|
1277
|
+
or "api key" in error_msg
|
|
1278
|
+
or "api_key" in error_msg
|
|
1279
|
+
):
|
|
1280
|
+
key_mapping = {
|
|
1281
|
+
"openai": "OPENAI_API_KEY",
|
|
1282
|
+
"anthropic": "ANTHROPIC_API_KEY",
|
|
1283
|
+
"google": "GOOGLE_API_KEY or GEMINI_API_KEY", # Google supports both
|
|
1284
|
+
"xai": "XAI_API_KEY",
|
|
1285
|
+
"groq": "GROQ_API_KEY",
|
|
1286
|
+
"openrouter": "OPENROUTER_API_KEY",
|
|
1287
|
+
"qwen": "DASHSCOPE_API_KEY",
|
|
1288
|
+
"deepseek": "DEEPSEEK_API_KEY",
|
|
1289
|
+
}
|
|
1290
|
+
env_var = key_mapping.get(provider_id, "API_KEY")
|
|
1291
|
+
return (
|
|
1292
|
+
False,
|
|
1293
|
+
f"API key validation failed: {str(e)}. Please check your {env_var} environment variable.",
|
|
1294
|
+
)
|
|
1295
|
+
else:
|
|
1296
|
+
return False, f"Connection test failed: {str(e)}"
|
|
1297
|
+
else:
|
|
1298
|
+
return False, "No models available"
|
|
1299
|
+
except Exception as e:
|
|
1300
|
+
return False, str(e)
|
|
1301
|
+
|
|
1302
|
+
def chat_completion(
|
|
1303
|
+
self, provider_id: str, model_id: str, messages: List[Dict[str, str]], **kwargs
|
|
1304
|
+
) -> str:
|
|
1305
|
+
"""Make a chat completion request."""
|
|
1306
|
+
try:
|
|
1307
|
+
# Construct the full model name for LiteLLM
|
|
1308
|
+
if provider_id == "ollama":
|
|
1309
|
+
full_model = f"ollama/{model_id}"
|
|
1310
|
+
elif provider_id == "openai":
|
|
1311
|
+
full_model = f"openai/{model_id}"
|
|
1312
|
+
elif provider_id == "anthropic":
|
|
1313
|
+
full_model = f"anthropic/{model_id}"
|
|
1314
|
+
elif provider_id == "google":
|
|
1315
|
+
full_model = f"gemini/{model_id}"
|
|
1316
|
+
elif provider_id == "xai":
|
|
1317
|
+
full_model = f"xai/{model_id}"
|
|
1318
|
+
elif provider_id == "groq":
|
|
1319
|
+
full_model = f"groq/{model_id}"
|
|
1320
|
+
elif provider_id == "openrouter":
|
|
1321
|
+
full_model = f"openrouter/{model_id}"
|
|
1322
|
+
elif provider_id == "qwen":
|
|
1323
|
+
full_model = f"qwen/{model_id}"
|
|
1324
|
+
elif provider_id == "deepseek":
|
|
1325
|
+
full_model = f"deepseek/{model_id}"
|
|
1326
|
+
else:
|
|
1327
|
+
full_model = f"{provider_id}/{model_id}"
|
|
1328
|
+
|
|
1329
|
+
# Lazy import litellm to avoid import errors when CWD doesn't exist
|
|
1330
|
+
litellm = _safe_import_litellm()
|
|
1331
|
+
|
|
1332
|
+
# Make the request
|
|
1333
|
+
response = litellm.completion(model=full_model, messages=messages, **kwargs)
|
|
1334
|
+
|
|
1335
|
+
return response.choices[0].message.content
|
|
1336
|
+
|
|
1337
|
+
except Exception as e:
|
|
1338
|
+
raise Exception(f"Failed to get response from {provider_id}: {str(e)}")
|