superqode 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (288) hide show
  1. superqode/__init__.py +33 -0
  2. superqode/acp/__init__.py +23 -0
  3. superqode/acp/client.py +913 -0
  4. superqode/acp/permission_screen.py +457 -0
  5. superqode/acp/types.py +480 -0
  6. superqode/acp_discovery.py +856 -0
  7. superqode/agent/__init__.py +22 -0
  8. superqode/agent/edit_strategies.py +334 -0
  9. superqode/agent/loop.py +892 -0
  10. superqode/agent/qe_report_templates.py +39 -0
  11. superqode/agent/system_prompts.py +353 -0
  12. superqode/agent_output.py +721 -0
  13. superqode/agent_stream.py +953 -0
  14. superqode/agents/__init__.py +59 -0
  15. superqode/agents/acp_registry.py +305 -0
  16. superqode/agents/client.py +249 -0
  17. superqode/agents/data/augmentcode.com.toml +51 -0
  18. superqode/agents/data/cagent.dev.toml +51 -0
  19. superqode/agents/data/claude.com.toml +60 -0
  20. superqode/agents/data/codeassistant.dev.toml +51 -0
  21. superqode/agents/data/codex.openai.com.toml +57 -0
  22. superqode/agents/data/fastagent.ai.toml +66 -0
  23. superqode/agents/data/geminicli.com.toml +77 -0
  24. superqode/agents/data/goose.block.xyz.toml +54 -0
  25. superqode/agents/data/junie.jetbrains.com.toml +56 -0
  26. superqode/agents/data/kimi.moonshot.cn.toml +57 -0
  27. superqode/agents/data/llmlingagent.dev.toml +51 -0
  28. superqode/agents/data/molt.bot.toml +49 -0
  29. superqode/agents/data/opencode.ai.toml +60 -0
  30. superqode/agents/data/stakpak.dev.toml +51 -0
  31. superqode/agents/data/vtcode.dev.toml +51 -0
  32. superqode/agents/discovery.py +266 -0
  33. superqode/agents/messaging.py +160 -0
  34. superqode/agents/persona.py +166 -0
  35. superqode/agents/registry.py +421 -0
  36. superqode/agents/schema.py +72 -0
  37. superqode/agents/unified.py +367 -0
  38. superqode/app/__init__.py +111 -0
  39. superqode/app/constants.py +314 -0
  40. superqode/app/css.py +366 -0
  41. superqode/app/models.py +118 -0
  42. superqode/app/suggester.py +125 -0
  43. superqode/app/widgets.py +1591 -0
  44. superqode/app_enhanced.py +399 -0
  45. superqode/app_main.py +17187 -0
  46. superqode/approval.py +312 -0
  47. superqode/atomic.py +296 -0
  48. superqode/commands/__init__.py +1 -0
  49. superqode/commands/acp.py +965 -0
  50. superqode/commands/agents.py +180 -0
  51. superqode/commands/auth.py +278 -0
  52. superqode/commands/config.py +374 -0
  53. superqode/commands/init.py +826 -0
  54. superqode/commands/providers.py +819 -0
  55. superqode/commands/qe.py +1145 -0
  56. superqode/commands/roles.py +380 -0
  57. superqode/commands/serve.py +172 -0
  58. superqode/commands/suggestions.py +127 -0
  59. superqode/commands/superqe.py +460 -0
  60. superqode/config/__init__.py +51 -0
  61. superqode/config/loader.py +812 -0
  62. superqode/config/schema.py +498 -0
  63. superqode/core/__init__.py +111 -0
  64. superqode/core/roles.py +281 -0
  65. superqode/danger.py +386 -0
  66. superqode/data/superqode-template.yaml +1522 -0
  67. superqode/design_system.py +1080 -0
  68. superqode/dialogs/__init__.py +6 -0
  69. superqode/dialogs/base.py +39 -0
  70. superqode/dialogs/model.py +130 -0
  71. superqode/dialogs/provider.py +870 -0
  72. superqode/diff_view.py +919 -0
  73. superqode/enterprise.py +21 -0
  74. superqode/evaluation/__init__.py +25 -0
  75. superqode/evaluation/adapters.py +93 -0
  76. superqode/evaluation/behaviors.py +89 -0
  77. superqode/evaluation/engine.py +209 -0
  78. superqode/evaluation/scenarios.py +96 -0
  79. superqode/execution/__init__.py +36 -0
  80. superqode/execution/linter.py +538 -0
  81. superqode/execution/modes.py +347 -0
  82. superqode/execution/resolver.py +283 -0
  83. superqode/execution/runner.py +642 -0
  84. superqode/file_explorer.py +811 -0
  85. superqode/file_viewer.py +471 -0
  86. superqode/flash.py +183 -0
  87. superqode/guidance/__init__.py +58 -0
  88. superqode/guidance/config.py +203 -0
  89. superqode/guidance/prompts.py +71 -0
  90. superqode/harness/__init__.py +54 -0
  91. superqode/harness/accelerator.py +291 -0
  92. superqode/harness/config.py +319 -0
  93. superqode/harness/validator.py +147 -0
  94. superqode/history.py +279 -0
  95. superqode/integrations/superopt_runner.py +124 -0
  96. superqode/logging/__init__.py +49 -0
  97. superqode/logging/adapters.py +219 -0
  98. superqode/logging/formatter.py +923 -0
  99. superqode/logging/integration.py +341 -0
  100. superqode/logging/sinks.py +170 -0
  101. superqode/logging/unified_log.py +417 -0
  102. superqode/lsp/__init__.py +26 -0
  103. superqode/lsp/client.py +544 -0
  104. superqode/main.py +1069 -0
  105. superqode/mcp/__init__.py +89 -0
  106. superqode/mcp/auth_storage.py +380 -0
  107. superqode/mcp/client.py +1236 -0
  108. superqode/mcp/config.py +319 -0
  109. superqode/mcp/integration.py +337 -0
  110. superqode/mcp/oauth.py +436 -0
  111. superqode/mcp/oauth_callback.py +385 -0
  112. superqode/mcp/types.py +290 -0
  113. superqode/memory/__init__.py +31 -0
  114. superqode/memory/feedback.py +342 -0
  115. superqode/memory/store.py +522 -0
  116. superqode/notifications.py +369 -0
  117. superqode/optimization/__init__.py +5 -0
  118. superqode/optimization/config.py +33 -0
  119. superqode/permissions/__init__.py +25 -0
  120. superqode/permissions/rules.py +488 -0
  121. superqode/plan.py +323 -0
  122. superqode/providers/__init__.py +33 -0
  123. superqode/providers/gateway/__init__.py +165 -0
  124. superqode/providers/gateway/base.py +228 -0
  125. superqode/providers/gateway/litellm_gateway.py +1170 -0
  126. superqode/providers/gateway/openresponses_gateway.py +436 -0
  127. superqode/providers/health.py +297 -0
  128. superqode/providers/huggingface/__init__.py +74 -0
  129. superqode/providers/huggingface/downloader.py +472 -0
  130. superqode/providers/huggingface/endpoints.py +442 -0
  131. superqode/providers/huggingface/hub.py +531 -0
  132. superqode/providers/huggingface/inference.py +394 -0
  133. superqode/providers/huggingface/transformers_runner.py +516 -0
  134. superqode/providers/local/__init__.py +100 -0
  135. superqode/providers/local/base.py +438 -0
  136. superqode/providers/local/discovery.py +418 -0
  137. superqode/providers/local/lmstudio.py +256 -0
  138. superqode/providers/local/mlx.py +457 -0
  139. superqode/providers/local/ollama.py +486 -0
  140. superqode/providers/local/sglang.py +268 -0
  141. superqode/providers/local/tgi.py +260 -0
  142. superqode/providers/local/tool_support.py +477 -0
  143. superqode/providers/local/vllm.py +258 -0
  144. superqode/providers/manager.py +1338 -0
  145. superqode/providers/models.py +1016 -0
  146. superqode/providers/models_dev.py +578 -0
  147. superqode/providers/openresponses/__init__.py +87 -0
  148. superqode/providers/openresponses/converters/__init__.py +17 -0
  149. superqode/providers/openresponses/converters/messages.py +343 -0
  150. superqode/providers/openresponses/converters/tools.py +268 -0
  151. superqode/providers/openresponses/schema/__init__.py +56 -0
  152. superqode/providers/openresponses/schema/models.py +585 -0
  153. superqode/providers/openresponses/streaming/__init__.py +5 -0
  154. superqode/providers/openresponses/streaming/parser.py +338 -0
  155. superqode/providers/openresponses/tools/__init__.py +21 -0
  156. superqode/providers/openresponses/tools/apply_patch.py +352 -0
  157. superqode/providers/openresponses/tools/code_interpreter.py +290 -0
  158. superqode/providers/openresponses/tools/file_search.py +333 -0
  159. superqode/providers/openresponses/tools/mcp_adapter.py +252 -0
  160. superqode/providers/registry.py +716 -0
  161. superqode/providers/usage.py +332 -0
  162. superqode/pure_mode.py +384 -0
  163. superqode/qr/__init__.py +23 -0
  164. superqode/qr/dashboard.py +781 -0
  165. superqode/qr/generator.py +1018 -0
  166. superqode/qr/templates.py +135 -0
  167. superqode/safety/__init__.py +41 -0
  168. superqode/safety/sandbox.py +413 -0
  169. superqode/safety/warnings.py +256 -0
  170. superqode/server/__init__.py +33 -0
  171. superqode/server/lsp_server.py +775 -0
  172. superqode/server/web.py +250 -0
  173. superqode/session/__init__.py +25 -0
  174. superqode/session/persistence.py +580 -0
  175. superqode/session/sharing.py +477 -0
  176. superqode/session.py +475 -0
  177. superqode/sidebar.py +2991 -0
  178. superqode/stream_view.py +648 -0
  179. superqode/styles/__init__.py +3 -0
  180. superqode/superqe/__init__.py +184 -0
  181. superqode/superqe/acp_runner.py +1064 -0
  182. superqode/superqe/constitution/__init__.py +62 -0
  183. superqode/superqe/constitution/evaluator.py +308 -0
  184. superqode/superqe/constitution/loader.py +432 -0
  185. superqode/superqe/constitution/schema.py +250 -0
  186. superqode/superqe/events.py +591 -0
  187. superqode/superqe/frameworks/__init__.py +65 -0
  188. superqode/superqe/frameworks/base.py +234 -0
  189. superqode/superqe/frameworks/e2e.py +263 -0
  190. superqode/superqe/frameworks/executor.py +237 -0
  191. superqode/superqe/frameworks/javascript.py +409 -0
  192. superqode/superqe/frameworks/python.py +373 -0
  193. superqode/superqe/frameworks/registry.py +92 -0
  194. superqode/superqe/mcp_tools/__init__.py +47 -0
  195. superqode/superqe/mcp_tools/core_tools.py +418 -0
  196. superqode/superqe/mcp_tools/registry.py +230 -0
  197. superqode/superqe/mcp_tools/testing_tools.py +167 -0
  198. superqode/superqe/noise.py +89 -0
  199. superqode/superqe/orchestrator.py +778 -0
  200. superqode/superqe/roles.py +609 -0
  201. superqode/superqe/session.py +713 -0
  202. superqode/superqe/skills/__init__.py +57 -0
  203. superqode/superqe/skills/base.py +106 -0
  204. superqode/superqe/skills/core_skills.py +899 -0
  205. superqode/superqe/skills/registry.py +90 -0
  206. superqode/superqe/verifier.py +101 -0
  207. superqode/superqe_cli.py +76 -0
  208. superqode/tool_call.py +358 -0
  209. superqode/tools/__init__.py +93 -0
  210. superqode/tools/agent_tools.py +496 -0
  211. superqode/tools/base.py +324 -0
  212. superqode/tools/batch_tool.py +133 -0
  213. superqode/tools/diagnostics.py +311 -0
  214. superqode/tools/edit_tools.py +653 -0
  215. superqode/tools/enhanced_base.py +515 -0
  216. superqode/tools/file_tools.py +269 -0
  217. superqode/tools/file_tracking.py +45 -0
  218. superqode/tools/lsp_tools.py +610 -0
  219. superqode/tools/network_tools.py +350 -0
  220. superqode/tools/permissions.py +400 -0
  221. superqode/tools/question_tool.py +324 -0
  222. superqode/tools/search_tools.py +598 -0
  223. superqode/tools/shell_tools.py +259 -0
  224. superqode/tools/todo_tools.py +121 -0
  225. superqode/tools/validation.py +80 -0
  226. superqode/tools/web_tools.py +639 -0
  227. superqode/tui.py +1152 -0
  228. superqode/tui_integration.py +875 -0
  229. superqode/tui_widgets/__init__.py +27 -0
  230. superqode/tui_widgets/widgets/__init__.py +18 -0
  231. superqode/tui_widgets/widgets/progress.py +185 -0
  232. superqode/tui_widgets/widgets/tool_display.py +188 -0
  233. superqode/undo_manager.py +574 -0
  234. superqode/utils/__init__.py +5 -0
  235. superqode/utils/error_handling.py +323 -0
  236. superqode/utils/fuzzy.py +257 -0
  237. superqode/widgets/__init__.py +477 -0
  238. superqode/widgets/agent_collab.py +390 -0
  239. superqode/widgets/agent_store.py +936 -0
  240. superqode/widgets/agent_switcher.py +395 -0
  241. superqode/widgets/animation_manager.py +284 -0
  242. superqode/widgets/code_context.py +356 -0
  243. superqode/widgets/command_palette.py +412 -0
  244. superqode/widgets/connection_status.py +537 -0
  245. superqode/widgets/conversation_history.py +470 -0
  246. superqode/widgets/diff_indicator.py +155 -0
  247. superqode/widgets/enhanced_status_bar.py +385 -0
  248. superqode/widgets/enhanced_toast.py +476 -0
  249. superqode/widgets/file_browser.py +809 -0
  250. superqode/widgets/file_reference.py +585 -0
  251. superqode/widgets/issue_timeline.py +340 -0
  252. superqode/widgets/leader_key.py +264 -0
  253. superqode/widgets/mode_switcher.py +445 -0
  254. superqode/widgets/model_picker.py +234 -0
  255. superqode/widgets/permission_preview.py +1205 -0
  256. superqode/widgets/prompt.py +358 -0
  257. superqode/widgets/provider_connect.py +725 -0
  258. superqode/widgets/pty_shell.py +587 -0
  259. superqode/widgets/qe_dashboard.py +321 -0
  260. superqode/widgets/resizable_sidebar.py +377 -0
  261. superqode/widgets/response_changes.py +218 -0
  262. superqode/widgets/response_display.py +528 -0
  263. superqode/widgets/rich_tool_display.py +613 -0
  264. superqode/widgets/sidebar_panels.py +1180 -0
  265. superqode/widgets/slash_complete.py +356 -0
  266. superqode/widgets/split_view.py +612 -0
  267. superqode/widgets/status_bar.py +273 -0
  268. superqode/widgets/superqode_display.py +786 -0
  269. superqode/widgets/thinking_display.py +815 -0
  270. superqode/widgets/throbber.py +87 -0
  271. superqode/widgets/toast.py +206 -0
  272. superqode/widgets/unified_output.py +1073 -0
  273. superqode/workspace/__init__.py +75 -0
  274. superqode/workspace/artifacts.py +472 -0
  275. superqode/workspace/coordinator.py +353 -0
  276. superqode/workspace/diff_tracker.py +429 -0
  277. superqode/workspace/git_guard.py +373 -0
  278. superqode/workspace/git_snapshot.py +526 -0
  279. superqode/workspace/manager.py +750 -0
  280. superqode/workspace/snapshot.py +357 -0
  281. superqode/workspace/watcher.py +535 -0
  282. superqode/workspace/worktree.py +440 -0
  283. superqode-0.1.5.dist-info/METADATA +204 -0
  284. superqode-0.1.5.dist-info/RECORD +288 -0
  285. superqode-0.1.5.dist-info/WHEEL +5 -0
  286. superqode-0.1.5.dist-info/entry_points.txt +3 -0
  287. superqode-0.1.5.dist-info/licenses/LICENSE +648 -0
  288. superqode-0.1.5.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1338 @@
1
+ """Provider manager for discovering and managing LLM providers and models."""
2
+
3
+ import os
4
+ import pathlib
5
+ from dataclasses import dataclass
6
+ from typing import List, Optional, Dict, Any
7
+ from enum import Enum
8
+
9
+ # litellm is imported lazily to avoid import errors when CWD doesn't exist
10
+ # (litellm tries to resolve current directory during import via pydantic plugins)
11
+
12
+
13
+ def _safe_import_litellm():
14
+ """Safely import litellm, handling cases where CWD doesn't exist."""
15
+ try:
16
+ import litellm
17
+
18
+ return litellm
19
+ except (FileNotFoundError, OSError) as e:
20
+ # Handle case where current directory doesn't exist during import
21
+ # This can happen if CWD was deleted or is invalid
22
+ try:
23
+ # Try to change to a safe directory if current one doesn't exist
24
+ cwd = os.getcwd()
25
+ if not pathlib.Path(cwd).exists():
26
+ # Use home directory as fallback
27
+ os.chdir(os.path.expanduser("~"))
28
+ # Try importing again
29
+ import litellm
30
+
31
+ return litellm
32
+ except Exception:
33
+ # If we still can't import, raise a more helpful error
34
+ raise ImportError(
35
+ f"Failed to import litellm. This may be due to an invalid current working directory. "
36
+ f"Please ensure you're in a valid directory. Original error: {str(e)}"
37
+ )
38
+
39
+
40
+ @dataclass
41
+ class ModelInfo:
42
+ """Information about an LLM model."""
43
+
44
+ id: str
45
+ name: str
46
+ provider_id: str
47
+ description: Optional[str] = None
48
+ context_size: Optional[int] = None
49
+ available: bool = True
50
+
51
+
52
+ @dataclass
53
+ class ProviderInfo:
54
+ """Information about an LLM provider."""
55
+
56
+ id: str
57
+ name: str
58
+ description: str
59
+ requires_api_key: bool = True
60
+ configured: bool = False
61
+ models: List[ModelInfo] = None
62
+
63
+ def __post_init__(self):
64
+ if self.models is None:
65
+ self.models = []
66
+
67
+
68
+ class ProviderManager:
69
+ """Lightweight LLM provider manager using LiteLLM."""
70
+
71
+ # Provider priority for sorting (lower number = higher priority)
72
+ PROVIDER_PRIORITY = {
73
+ "ollama": 1,
74
+ "vllm": 2,
75
+ "sglang": 2,
76
+ "openai": 3,
77
+ "anthropic": 4,
78
+ "google": 5,
79
+ "xai": 6,
80
+ "groq": 7,
81
+ "openrouter": 8,
82
+ "qwen": 9,
83
+ "deepseek": 10,
84
+ "together": 11,
85
+ "deepinfra": 12,
86
+ "github-copilot": 13,
87
+ "perplexity": 14,
88
+ "mistral": 15,
89
+ "cerebras": 16,
90
+ "zhipu": 17,
91
+ "moonshot": 18,
92
+ "minimax": 19,
93
+ "baidu": 20,
94
+ "tencent": 21,
95
+ "doubao": 22,
96
+ "01-ai": 23,
97
+ "azure-openai": 24,
98
+ "vertex-ai": 25,
99
+ "openai-compatible": 26,
100
+ }
101
+
102
+ def __init__(self):
103
+ """Initialize the provider manager."""
104
+ self._configured_providers: Dict[str, Dict[str, Any]] = {}
105
+
106
+ # Set up LiteLLM API keys from environment
107
+ self._setup_litellm_keys()
108
+
109
+ def _setup_litellm_keys(self):
110
+ """Set up LiteLLM API keys from environment variables."""
111
+ # OpenAI
112
+ if os.getenv("OPENAI_API_KEY"):
113
+ os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
114
+
115
+ # Anthropic
116
+ if os.getenv("ANTHROPIC_API_KEY"):
117
+ os.environ["ANTHROPIC_API_KEY"] = os.getenv("ANTHROPIC_API_KEY")
118
+
119
+ # Google - supports both GOOGLE_API_KEY and GEMINI_API_KEY
120
+ google_key = os.getenv("GOOGLE_API_KEY") or os.getenv("GEMINI_API_KEY")
121
+ if google_key:
122
+ os.environ["GOOGLE_API_KEY"] = google_key
123
+ # Also set GEMINI_API_KEY if it's not already set (for compatibility)
124
+ if not os.getenv("GEMINI_API_KEY"):
125
+ os.environ["GEMINI_API_KEY"] = google_key
126
+
127
+ # xAI
128
+ if os.getenv("XAI_API_KEY"):
129
+ os.environ["XAI_API_KEY"] = os.getenv("XAI_API_KEY")
130
+
131
+ # Other providers
132
+ if os.getenv("GROQ_API_KEY"):
133
+ os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
134
+
135
+ if os.getenv("OPENROUTER_API_KEY"):
136
+ os.environ["OPENROUTER_API_KEY"] = os.getenv("OPENROUTER_API_KEY")
137
+
138
+ if os.getenv("DASHSCOPE_API_KEY"):
139
+ os.environ["DASHSCOPE_API_KEY"] = os.getenv("DASHSCOPE_API_KEY")
140
+
141
+ if os.getenv("DEEPSEEK_API_KEY"):
142
+ os.environ["DEEPSEEK_API_KEY"] = os.getenv("DEEPSEEK_API_KEY")
143
+
144
+ if os.getenv("GITHUB_TOKEN"):
145
+ os.environ["GITHUB_TOKEN"] = os.getenv("GITHUB_TOKEN")
146
+
147
+ def _is_provider_configured(self, provider_id: str) -> bool:
148
+ """Check if a provider has API keys configured."""
149
+ if provider_id in ("ollama", "mlx", "vllm", "sglang"):
150
+ # Local providers don't need API keys
151
+ return True
152
+
153
+ key_mapping = {
154
+ "openai": "OPENAI_API_KEY",
155
+ "anthropic": "ANTHROPIC_API_KEY",
156
+ "google": ["GOOGLE_API_KEY", "GEMINI_API_KEY"], # Google supports both
157
+ "xai": "XAI_API_KEY",
158
+ "groq": "GROQ_API_KEY",
159
+ "openrouter": "OPENROUTER_API_KEY",
160
+ "qwen": "DASHSCOPE_API_KEY",
161
+ "deepseek": "DEEPSEEK_API_KEY",
162
+ "github-copilot": "GITHUB_TOKEN",
163
+ "together": "TOGETHER_API_KEY",
164
+ "deepinfra": "DEEPINFRA_API_KEY",
165
+ "perplexity": "PERPLEXITY_API_KEY",
166
+ "mistral": "MISTRAL_API_KEY",
167
+ "cerebras": "CEREBRAS_API_KEY",
168
+ "zhipu": "ZHIPU_API_KEY",
169
+ "moonshot": "MOONSHOT_API_KEY",
170
+ "minimax": "MINIMAX_API_KEY",
171
+ "baidu": "BAIDU_API_KEY",
172
+ "tencent": "TENCENT_API_KEY",
173
+ "doubao": "DOUBAO_API_KEY",
174
+ "01-ai": "ZEROONE_API_KEY",
175
+ "azure-openai": "AZURE_OPENAI_API_KEY",
176
+ "vertex-ai": "GOOGLE_APPLICATION_CREDENTIALS",
177
+ "openai-compatible": "OPENAI_COMPATIBLE_API_KEY",
178
+ }
179
+
180
+ env_vars = key_mapping.get(provider_id)
181
+ if not env_vars:
182
+ return False
183
+
184
+ # Handle both single string and list of env vars (for Google)
185
+ if isinstance(env_vars, list):
186
+ # Check if any of the environment variables exist and have valid values
187
+ for env_var in env_vars:
188
+ api_key = os.getenv(env_var)
189
+ if api_key and api_key.strip():
190
+ return True
191
+ return False
192
+ else:
193
+ # Single environment variable
194
+ api_key = os.getenv(env_vars)
195
+ return bool(api_key and api_key.strip())
196
+
197
+ def _check_api_key(self, key_name: str) -> bool:
198
+ """Check if an API key is available."""
199
+ return bool(os.getenv(key_name))
200
+
201
+ def _get_ollama_models(self) -> List[ModelInfo]:
202
+ """Get available models from Ollama daemon."""
203
+ try:
204
+ import requests
205
+
206
+ # Try to connect to Ollama API
207
+ ollama_host = os.getenv("OLLAMA_HOST", "http://localhost:11434")
208
+ response = requests.get(f"{ollama_host}/api/tags", timeout=5)
209
+
210
+ if response.status_code == 200:
211
+ data = response.json()
212
+ models = []
213
+
214
+ for model_data in data.get("models", []):
215
+ name = model_data.get("name", "")
216
+ size = model_data.get("size", 0)
217
+ modified = model_data.get("modified_at", "")
218
+
219
+ # Estimate context size based on model size (rough heuristic)
220
+ if "3.1" in name or "llama3.1" in name:
221
+ if "405b" in name:
222
+ context_size = 131072
223
+ elif "70b" in name:
224
+ context_size = 131072
225
+ else:
226
+ context_size = 131072
227
+ elif "3.2" in name or "llama3.2" in name:
228
+ context_size = 32768
229
+ elif "codellama" in name or "code" in name:
230
+ context_size = 16384
231
+ elif "mistral" in name:
232
+ context_size = 32768
233
+ elif "mixtral" in name:
234
+ context_size = 32768
235
+ elif "phi3" in name:
236
+ context_size = 128000
237
+ elif "gemma" in name:
238
+ context_size = 8192
239
+ elif "qwen" in name:
240
+ context_size = 32768
241
+ else:
242
+ context_size = 4096 # Default
243
+
244
+ size_str = self._format_size(size)
245
+ models.append(
246
+ ModelInfo(
247
+ id=name,
248
+ name=f"{name} ({size_str})",
249
+ provider_id="ollama",
250
+ context_size=context_size,
251
+ )
252
+ )
253
+
254
+ return models if models else self._get_default_ollama_models()
255
+ else:
256
+ # Fallback to default models if Ollama is not running
257
+ return self._get_default_ollama_models()
258
+
259
+ except Exception as e:
260
+ # Fallback to default models if there's any error
261
+ return self._get_default_ollama_models()
262
+
263
+ def _get_default_ollama_models(self) -> List[ModelInfo]:
264
+ """Get default Ollama models when API is not available."""
265
+ return [
266
+ ModelInfo("llama3.2:3b", "Llama 3.2 3B (default)", "ollama", context_size=32768),
267
+ ModelInfo("llama3.1:8b", "Llama 3.1 8B (default)", "ollama", context_size=131072),
268
+ ModelInfo("codellama:7b", "Code Llama 7B (default)", "ollama", context_size=16384),
269
+ ]
270
+
271
+ def _get_mlx_models(self) -> List[ModelInfo]:
272
+ """Get available MLX models from server and cache."""
273
+ models = []
274
+
275
+ # Try to get models from running MLX server
276
+ try:
277
+ import asyncio
278
+ from ..providers.local.mlx import get_mlx_client
279
+
280
+ async def get_mlx_server_models():
281
+ client = await get_mlx_client()
282
+ if client:
283
+ server_models = await client.list_models()
284
+ return [
285
+ ModelInfo(
286
+ id=model.id,
287
+ name=model.name,
288
+ provider_id="mlx",
289
+ description=f"{model.family} - {model.parameter_count} params",
290
+ context_size=model.context_window or 4096,
291
+ )
292
+ for model in server_models
293
+ ]
294
+ return []
295
+
296
+ # Run in sync context
297
+ server_models = asyncio.run(get_mlx_server_models())
298
+ models.extend(server_models)
299
+ except Exception:
300
+ # If MLX client fails, continue with cached models
301
+ pass
302
+
303
+ # Add cached models if no server models found
304
+ if not models:
305
+ try:
306
+ from ..providers.local.mlx import MLXClient
307
+
308
+ cache_models = MLXClient.discover_huggingface_models()
309
+ for model_info in cache_models[:5]: # Limit to 5 cached models
310
+ model_id = model_info["id"]
311
+ size_mb = model_info["size_bytes"] / (1024 * 1024)
312
+ models.append(
313
+ ModelInfo(
314
+ id=model_id,
315
+ name=f"{model_id.split('/')[-1]} (cached)",
316
+ provider_id="mlx",
317
+ description=".1f",
318
+ context_size=4096,
319
+ )
320
+ )
321
+ except Exception:
322
+ pass
323
+
324
+ # Fallback to registry models if nothing found
325
+ if not models:
326
+ from ..providers.registry import PROVIDERS
327
+
328
+ mlx_provider = PROVIDERS.get("mlx")
329
+ if mlx_provider and mlx_provider.example_models:
330
+ for model_id in mlx_provider.example_models[:3]:
331
+ models.append(
332
+ ModelInfo(
333
+ id=model_id,
334
+ name=model_id.split("/")[-1],
335
+ provider_id="mlx",
336
+ description="Example MLX model",
337
+ context_size=4096,
338
+ )
339
+ )
340
+
341
+ return models
342
+
343
+ def _get_vllm_models(self) -> List[ModelInfo]:
344
+ """Get available vLLM models from server."""
345
+ models = []
346
+
347
+ # Try to get models from running vLLM server
348
+ try:
349
+ import asyncio
350
+ from ..providers.local.vllm import get_vllm_client
351
+
352
+ async def get_vllm_server_models():
353
+ client = await get_vllm_client()
354
+ if client:
355
+ server_models = await client.list_models()
356
+ return [
357
+ ModelInfo(
358
+ id=model.id,
359
+ name=model.name,
360
+ provider_id="vllm",
361
+ description=f"{model.family} - {model.parameter_count} params"
362
+ if model.parameter_count
363
+ else model.family,
364
+ context_size=model.context_window or 4096,
365
+ )
366
+ for model in server_models
367
+ ]
368
+ return []
369
+
370
+ # Run in sync context
371
+ server_models = asyncio.run(get_vllm_server_models())
372
+ models.extend(server_models)
373
+ except Exception:
374
+ # If vLLM client fails, continue with default models
375
+ pass
376
+
377
+ # Fallback to registry models if nothing found
378
+ if not models:
379
+ from ..providers.registry import PROVIDERS
380
+
381
+ vllm_provider = PROVIDERS.get("vllm")
382
+ if vllm_provider and vllm_provider.example_models:
383
+ for model_id in vllm_provider.example_models[:3]:
384
+ models.append(
385
+ ModelInfo(
386
+ id=model_id,
387
+ name=model_id.split("/")[-1],
388
+ provider_id="vllm",
389
+ description="Example vLLM model (server not running)",
390
+ context_size=131072,
391
+ )
392
+ )
393
+
394
+ return models
395
+
396
+ def _get_sglang_models(self) -> List[ModelInfo]:
397
+ """Get available SGLang models from server."""
398
+ models = []
399
+
400
+ # Try to get models from running SGLang server
401
+ try:
402
+ import asyncio
403
+ from ..providers.local.sglang import get_sglang_client
404
+
405
+ async def get_sglang_server_models():
406
+ client = await get_sglang_client()
407
+ if client:
408
+ server_models = await client.list_models()
409
+ return [
410
+ ModelInfo(
411
+ id=model.id,
412
+ name=model.name,
413
+ provider_id="sglang",
414
+ description=f"{model.family} - {model.parameter_count} params"
415
+ if model.parameter_count
416
+ else model.family,
417
+ context_size=model.context_window or 4096,
418
+ )
419
+ for model in server_models
420
+ ]
421
+ return []
422
+
423
+ # Run in sync context
424
+ server_models = asyncio.run(get_sglang_server_models())
425
+ models.extend(server_models)
426
+ except Exception:
427
+ # If SGLang client fails, continue with default models
428
+ pass
429
+
430
+ # Fallback to registry models if nothing found
431
+ if not models:
432
+ from ..providers.registry import PROVIDERS
433
+
434
+ sglang_provider = PROVIDERS.get("sglang")
435
+ if sglang_provider and sglang_provider.example_models:
436
+ for model_id in sglang_provider.example_models[:3]:
437
+ models.append(
438
+ ModelInfo(
439
+ id=model_id,
440
+ name=model_id.split("/")[-1],
441
+ provider_id="sglang",
442
+ description="Example SGLang model (server not running)",
443
+ context_size=131072,
444
+ )
445
+ )
446
+
447
+ return models
448
+
449
+ def _format_size(self, size_bytes: int) -> str:
450
+ """Format size in human readable format."""
451
+ if size_bytes >= 1024**3: # GB
452
+ return f"{size_bytes / 1024**3:.1f}GB"
453
+ elif size_bytes >= 1024**2: # MB
454
+ return f"{size_bytes / 1024**2:.1f}MB"
455
+ else: # KB
456
+ return f"{size_bytes / 1024:.1f}KB"
457
+
458
+ def list_providers(self) -> List[ProviderInfo]:
459
+ """List available LLM providers with latest models."""
460
+ providers = []
461
+
462
+ # Local & Self-hosted Models
463
+ ollama_models = self._get_ollama_models()
464
+ providers.append(
465
+ ProviderInfo(
466
+ id="ollama",
467
+ name="Ollama",
468
+ description="Local models via Ollama daemon (privacy-focused, no API key required)",
469
+ requires_api_key=False,
470
+ configured=self._is_provider_configured("ollama"),
471
+ models=ollama_models,
472
+ )
473
+ )
474
+
475
+ # MLX (Apple Silicon) Models
476
+ mlx_models = self._get_mlx_models()
477
+ providers.append(
478
+ ProviderInfo(
479
+ id="mlx",
480
+ name="MLX (Apple Silicon)",
481
+ description="Local MLX models optimized for Apple Silicon (requires mlx_lm.server)",
482
+ requires_api_key=False,
483
+ configured=self._is_provider_configured("mlx"),
484
+ models=mlx_models,
485
+ )
486
+ )
487
+
488
+ # vLLM (Experimental) Models
489
+ vllm_models = self._get_vllm_models()
490
+ providers.append(
491
+ ProviderInfo(
492
+ id="vllm",
493
+ name="vLLM (Experimental)",
494
+ description="High-throughput local inference with PagedAttention [EXPERIMENTAL]",
495
+ requires_api_key=False,
496
+ configured=self._is_provider_configured("vllm"),
497
+ models=vllm_models,
498
+ )
499
+ )
500
+
501
+ # SGLang (Experimental) Models
502
+ sglang_models = self._get_sglang_models()
503
+ providers.append(
504
+ ProviderInfo(
505
+ id="sglang",
506
+ name="SGLang (Experimental)",
507
+ description="Fast structured generation with RadixAttention [EXPERIMENTAL]",
508
+ requires_api_key=False,
509
+ configured=self._is_provider_configured("sglang"),
510
+ models=sglang_models,
511
+ )
512
+ )
513
+
514
+ # US Labs - Premium Cloud Models
515
+ providers.append(
516
+ ProviderInfo(
517
+ id="openai",
518
+ name="OpenAI",
519
+ description="Latest GPT-5.2, GPT-5.1, o1 models from models.dev",
520
+ requires_api_key=True,
521
+ configured=self._is_provider_configured("openai"),
522
+ models=[
523
+ ModelInfo("gpt-5.2", "GPT-5.2 (Latest)", "openai", context_size=256000),
524
+ ModelInfo("gpt-5.2-pro", "GPT-5.2 Pro", "openai", context_size=256000),
525
+ ModelInfo("gpt-5.2-codex", "GPT-5.2 Codex", "openai", context_size=256000),
526
+ ModelInfo("gpt-5.1", "GPT-5.1", "openai", context_size=200000),
527
+ ModelInfo("gpt-5.1-codex", "GPT-5.1 Codex", "openai", context_size=200000),
528
+ ModelInfo(
529
+ "gpt-5.1-codex-mini", "GPT-5.1 Codex Mini", "openai", context_size=200000
530
+ ),
531
+ ModelInfo(
532
+ "gpt-4o-2024-11-20", "GPT-4o (Nov 2024)", "openai", context_size=128000
533
+ ),
534
+ ModelInfo("gpt-4o", "GPT-4o", "openai", context_size=128000),
535
+ ModelInfo("gpt-4o-mini", "GPT-4o Mini", "openai", context_size=128000),
536
+ ModelInfo("o1", "o1 (Reasoning)", "openai", context_size=200000),
537
+ ModelInfo("o1-mini", "o1 Mini", "openai", context_size=128000),
538
+ ModelInfo("gpt-4-turbo", "GPT-4 Turbo", "openai", context_size=128000),
539
+ ],
540
+ )
541
+ )
542
+
543
+ providers.append(
544
+ ProviderInfo(
545
+ id="anthropic",
546
+ name="Anthropic",
547
+ description="Latest Claude 4.5 models from models.dev",
548
+ requires_api_key=True,
549
+ configured=self._is_provider_configured("anthropic"),
550
+ models=[
551
+ ModelInfo(
552
+ "claude-opus-4-5-20251101",
553
+ "Claude Opus 4.5 (Latest)",
554
+ "anthropic",
555
+ context_size=200000,
556
+ ),
557
+ ModelInfo(
558
+ "claude-sonnet-4-5-20250929",
559
+ "Claude Sonnet 4.5",
560
+ "anthropic",
561
+ context_size=200000,
562
+ ),
563
+ ModelInfo(
564
+ "claude-haiku-4-5-20251001",
565
+ "Claude Haiku 4.5",
566
+ "anthropic",
567
+ context_size=200000,
568
+ ),
569
+ ModelInfo(
570
+ "claude-sonnet-4-20250514",
571
+ "Claude Sonnet 4",
572
+ "anthropic",
573
+ context_size=200000,
574
+ ),
575
+ ModelInfo(
576
+ "claude-opus-4-20250514", "Claude Opus 4", "anthropic", context_size=200000
577
+ ),
578
+ ModelInfo(
579
+ "claude-haiku-4-20250514",
580
+ "Claude Haiku 4",
581
+ "anthropic",
582
+ context_size=200000,
583
+ ),
584
+ ],
585
+ )
586
+ )
587
+
588
+ providers.append(
589
+ ProviderInfo(
590
+ id="google",
591
+ name="Google",
592
+ description="Latest Gemini 3.x models from models.dev",
593
+ requires_api_key=True,
594
+ configured=self._is_provider_configured("google"),
595
+ models=[
596
+ ModelInfo(
597
+ "gemini-3-pro-preview",
598
+ "Gemini 3 Pro Preview (Latest)",
599
+ "google",
600
+ context_size=2000000,
601
+ ),
602
+ ModelInfo(
603
+ "gemini-3-flash-preview",
604
+ "Gemini 3 Flash Preview (Latest)",
605
+ "google",
606
+ context_size=1000000,
607
+ ),
608
+ ModelInfo("gemini-2.5-pro", "Gemini 2.5 Pro", "google", context_size=2000000),
609
+ ModelInfo(
610
+ "gemini-2.5-flash", "Gemini 2.5 Flash", "google", context_size=1000000
611
+ ),
612
+ ModelInfo(
613
+ "gemini-2.0-flash", "Gemini 2.0 Flash", "google", context_size=1000000
614
+ ),
615
+ ],
616
+ )
617
+ )
618
+
619
+ providers.append(
620
+ ProviderInfo(
621
+ id="xai",
622
+ name="xAI",
623
+ description="Latest Grok models",
624
+ requires_api_key=True,
625
+ configured=self._is_provider_configured("xai"),
626
+ models=[
627
+ ModelInfo("grok-3", "Grok-3 (Latest)", "xai", context_size=262144),
628
+ ModelInfo("grok-3-mini", "Grok-3 Mini", "xai", context_size=131072),
629
+ ModelInfo("grok-2", "Grok-2", "xai", context_size=131072),
630
+ ModelInfo("grok-beta", "Grok Beta", "xai", context_size=131072),
631
+ ],
632
+ )
633
+ )
634
+
635
+ # Other Labs & Providers
636
+ providers.append(
637
+ ProviderInfo(
638
+ id="groq",
639
+ name="Groq",
640
+ description="Ultra-fast inference for open-source models",
641
+ requires_api_key=True,
642
+ configured=self._is_provider_configured("groq"),
643
+ models=[
644
+ ModelInfo(
645
+ "llama-3.1-8b-instant", "Llama 3.1 8B Instant", "groq", context_size=131072
646
+ ),
647
+ ModelInfo(
648
+ "llama-3.1-70b-versatile",
649
+ "Llama 3.1 70B Versatile",
650
+ "groq",
651
+ context_size=131072,
652
+ ),
653
+ ModelInfo(
654
+ "llama-3.1-405b-instruct", "Llama 3.1 405B", "groq", context_size=131072
655
+ ),
656
+ ModelInfo("llama3-8b-8192", "Llama 3 8B", "groq", context_size=8192),
657
+ ModelInfo("llama3-70b-8192", "Llama 3 70B", "groq", context_size=8192),
658
+ ModelInfo("mixtral-8x7b-32768", "Mixtral 8x7B", "groq", context_size=32768),
659
+ ModelInfo("gemma2-9b-it", "Gemma 2 9B", "groq", context_size=8192),
660
+ ModelInfo("llama2-70b-4096", "Llama 2 70B", "groq", context_size=4096),
661
+ ],
662
+ )
663
+ )
664
+
665
+ providers.append(
666
+ ProviderInfo(
667
+ id="openrouter",
668
+ name="OpenRouter",
669
+ description="Unified API for 100+ LLMs (Claude, GPT-4, Llama, etc.)",
670
+ requires_api_key=True,
671
+ configured=self._is_provider_configured("openrouter"),
672
+ models=[
673
+ ModelInfo(
674
+ "anthropic/claude-3.5-sonnet",
675
+ "Claude 3.5 Sonnet",
676
+ "openrouter",
677
+ context_size=200000,
678
+ ),
679
+ ModelInfo("openai/gpt-4o", "GPT-4o", "openrouter", context_size=128000),
680
+ ModelInfo(
681
+ "openai/gpt-4o-mini", "GPT-4o Mini", "openrouter", context_size=128000
682
+ ),
683
+ ModelInfo("openai/o1-preview", "o1 Preview", "openrouter", context_size=128000),
684
+ ModelInfo("openai/o1-mini", "o1 Mini", "openrouter", context_size=128000),
685
+ ModelInfo(
686
+ "meta-llama/llama-3.1-405b-instruct",
687
+ "Llama 3.1 405B",
688
+ "openrouter",
689
+ context_size=131072,
690
+ ),
691
+ ModelInfo(
692
+ "meta-llama/llama-3.1-70b-instruct",
693
+ "Llama 3.1 70B",
694
+ "openrouter",
695
+ context_size=131072,
696
+ ),
697
+ ModelInfo(
698
+ "google/gemini-pro-1.5",
699
+ "Gemini Pro 1.5",
700
+ "openrouter",
701
+ context_size=2097152,
702
+ ),
703
+ ModelInfo(
704
+ "mistralai/mistral-7b-instruct",
705
+ "Mistral 7B",
706
+ "openrouter",
707
+ context_size=32768,
708
+ ),
709
+ ModelInfo(
710
+ "anthropic/claude-3-haiku",
711
+ "Claude 3 Haiku",
712
+ "openrouter",
713
+ context_size=200000,
714
+ ),
715
+ ],
716
+ )
717
+ )
718
+
719
+ providers.append(
720
+ ProviderInfo(
721
+ id="github-copilot",
722
+ name="GitHub Copilot",
723
+ description="GitHub Copilot models (Claude, GPT-4, etc.)",
724
+ requires_api_key=True,
725
+ configured=self._is_provider_configured("github-copilot"),
726
+ models=[
727
+ ModelInfo("gpt-4", "GPT-4 (Copilot)", "github-copilot", context_size=8192),
728
+ ModelInfo(
729
+ "claude-3.5-sonnet",
730
+ "Claude 3.5 Sonnet (Copilot)",
731
+ "github-copilot",
732
+ context_size=200000,
733
+ ),
734
+ ],
735
+ )
736
+ )
737
+
738
+ providers.append(
739
+ ProviderInfo(
740
+ id="together",
741
+ name="Together AI",
742
+ description="High-performance open-source models",
743
+ requires_api_key=True,
744
+ configured=self._is_provider_configured("together"),
745
+ models=[
746
+ ModelInfo(
747
+ "meta-llama/Llama-3.1-405B-Instruct-Turbo",
748
+ "Llama 3.1 405B Turbo",
749
+ "together",
750
+ context_size=131072,
751
+ ),
752
+ ModelInfo(
753
+ "meta-llama/Llama-3.1-70B-Instruct-Turbo",
754
+ "Llama 3.1 70B Turbo",
755
+ "together",
756
+ context_size=131072,
757
+ ),
758
+ ModelInfo(
759
+ "meta-llama/Llama-3.1-8B-Instruct-Turbo",
760
+ "Llama 3.1 8B Turbo",
761
+ "together",
762
+ context_size=131072,
763
+ ),
764
+ ModelInfo(
765
+ "meta-llama/Llama-3-70B-Instruct-Turbo",
766
+ "Llama 3 70B Turbo",
767
+ "together",
768
+ context_size=8192,
769
+ ),
770
+ ModelInfo(
771
+ "meta-llama/Llama-3-8B-Instruct-Turbo",
772
+ "Llama 3 8B Turbo",
773
+ "together",
774
+ context_size=8192,
775
+ ),
776
+ ModelInfo(
777
+ "mistralai/Mistral-7B-Instruct-v0.1",
778
+ "Mistral 7B",
779
+ "together",
780
+ context_size=32768,
781
+ ),
782
+ ModelInfo(
783
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
784
+ "Mixtral 8x7B",
785
+ "together",
786
+ context_size=32768,
787
+ ),
788
+ ModelInfo(
789
+ "mistralai/Mistral-7B-Instruct-v0.2",
790
+ "Mistral 7B v0.2",
791
+ "together",
792
+ context_size=32768,
793
+ ),
794
+ ModelInfo(
795
+ "Qwen/Qwen2-72B-Instruct", "Qwen2 72B", "together", context_size=32768
796
+ ),
797
+ ModelInfo(
798
+ "codellama/CodeLlama-34b-Instruct-hf",
799
+ "Code Llama 34B",
800
+ "together",
801
+ context_size=16384,
802
+ ),
803
+ ModelInfo(
804
+ "codellama/CodeLlama-13b-Instruct-hf",
805
+ "Code Llama 13B",
806
+ "together",
807
+ context_size=16384,
808
+ ),
809
+ ],
810
+ )
811
+ )
812
+
813
+ providers.append(
814
+ ProviderInfo(
815
+ id="deepinfra",
816
+ name="DeepInfra",
817
+ description="Fast inference for open source models",
818
+ requires_api_key=True,
819
+ configured=self._is_provider_configured("deepinfra"),
820
+ models=[
821
+ ModelInfo(
822
+ "meta-llama/Llama-2-70b-chat-hf",
823
+ "Llama 2 70B",
824
+ "deepinfra",
825
+ context_size=4096,
826
+ ),
827
+ ModelInfo(
828
+ "meta-llama/Llama-2-13b-chat-hf",
829
+ "Llama 2 13B",
830
+ "deepinfra",
831
+ context_size=4096,
832
+ ),
833
+ ModelInfo(
834
+ "codellama/CodeLlama-34b-Instruct-hf",
835
+ "Code Llama 34B",
836
+ "deepinfra",
837
+ context_size=16384,
838
+ ),
839
+ ModelInfo(
840
+ "jondurbin/airoboros-l2-70b-gpt4-1.4.1",
841
+ "Airoboros 70B",
842
+ "deepinfra",
843
+ context_size=4096,
844
+ ),
845
+ ],
846
+ )
847
+ )
848
+
849
+ providers.append(
850
+ ProviderInfo(
851
+ id="perplexity",
852
+ name="Perplexity",
853
+ description="Perplexity models (Sonar, etc.)",
854
+ requires_api_key=True,
855
+ configured=self._is_provider_configured("perplexity"),
856
+ models=[
857
+ ModelInfo("sonar-pro", "Sonar Pro", "perplexity", context_size=200000),
858
+ ModelInfo("sonar", "Sonar", "perplexity", context_size=127072),
859
+ ],
860
+ )
861
+ )
862
+
863
+ providers.append(
864
+ ProviderInfo(
865
+ id="mistral",
866
+ name="Mistral AI",
867
+ description="Mistral models (Mistral Large, Medium, Small, etc.)",
868
+ requires_api_key=True,
869
+ configured=self._is_provider_configured("mistral"),
870
+ models=[
871
+ ModelInfo(
872
+ "mistral-large-latest", "Mistral Large", "mistral", context_size=128000
873
+ ),
874
+ ModelInfo("mistral-medium", "Mistral Medium", "mistral", context_size=32768),
875
+ ModelInfo("mistral-small", "Mistral Small", "mistral", context_size=32768),
876
+ ModelInfo("codestral-latest", "Codestral", "mistral", context_size=32768),
877
+ ],
878
+ )
879
+ )
880
+
881
+ providers.append(
882
+ ProviderInfo(
883
+ id="cerebras",
884
+ name="Cerebras",
885
+ description="Cerebras models (Llama, etc.)",
886
+ requires_api_key=True,
887
+ configured=self._is_provider_configured("cerebras"),
888
+ models=[
889
+ ModelInfo("llama3.1-8b", "Llama 3.1 8B", "cerebras", context_size=8192),
890
+ ModelInfo("llama3.1-70b", "Llama 3.1 70B", "cerebras", context_size=8192),
891
+ ],
892
+ )
893
+ )
894
+
895
+ # Meta AI (Llama models)
896
+ providers.append(
897
+ ProviderInfo(
898
+ id="meta",
899
+ name="Meta AI",
900
+ description="Latest Llama 4 models from models.dev",
901
+ requires_api_key=True,
902
+ configured=self._is_provider_configured("meta"),
903
+ models=[
904
+ ModelInfo(
905
+ "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
906
+ "Llama 4 Maverick 17B (Latest)",
907
+ "meta",
908
+ context_size=262144,
909
+ ),
910
+ ModelInfo(
911
+ "llama-3.3-70b-versatile",
912
+ "Llama 3.3 70B Versatile",
913
+ "meta",
914
+ context_size=131072,
915
+ ),
916
+ ModelInfo(
917
+ "llama-3.1-405b-instruct", "Llama 3.1 405B", "meta", context_size=131072
918
+ ),
919
+ ModelInfo(
920
+ "llama-3.1-70b-instruct", "Llama 3.1 70B", "meta", context_size=131072
921
+ ),
922
+ ],
923
+ )
924
+ )
925
+
926
+ # Chinese AI Providers
927
+ providers.append(
928
+ ProviderInfo(
929
+ id="qwen",
930
+ name="Alibaba Qwen",
931
+ description="Latest Qwen3 models from models.dev - Alibaba Cloud",
932
+ requires_api_key=True,
933
+ configured=self._is_provider_configured("qwen"),
934
+ models=[
935
+ ModelInfo("qwen3-max", "Qwen3 Max (Latest)", "qwen", context_size=262144),
936
+ ModelInfo(
937
+ "qwen3-coder-480b-a35b-instruct",
938
+ "Qwen3 Coder 480B",
939
+ "qwen",
940
+ context_size=131072,
941
+ ),
942
+ ModelInfo("qwen-flash", "Qwen Flash", "qwen", context_size=32768),
943
+ ModelInfo("qwen2.5-72b-instruct", "Qwen2.5 72B", "qwen", context_size=32768),
944
+ ModelInfo(
945
+ "qwen2.5-coder-32b-instruct",
946
+ "Qwen2.5 Coder 32B",
947
+ "qwen",
948
+ context_size=32768,
949
+ ),
950
+ ],
951
+ )
952
+ )
953
+
954
+ providers.append(
955
+ ProviderInfo(
956
+ id="deepseek",
957
+ name="DeepSeek",
958
+ description="Latest DeepSeek V3.2, R1 models from models.dev",
959
+ requires_api_key=True,
960
+ configured=self._is_provider_configured("deepseek"),
961
+ models=[
962
+ ModelInfo(
963
+ "deepseek-ai/DeepSeek-V3.2",
964
+ "DeepSeek V3.2 (Latest)",
965
+ "deepseek",
966
+ context_size=128000,
967
+ ),
968
+ ModelInfo(
969
+ "deepseek-ai/DeepSeek-R1",
970
+ "DeepSeek R1 (Reasoning)",
971
+ "deepseek",
972
+ context_size=64000,
973
+ ),
974
+ ModelInfo(
975
+ "deepseek-chat", "DeepSeek Chat (V3)", "deepseek", context_size=64000
976
+ ),
977
+ ModelInfo("deepseek-coder", "DeepSeek Coder", "deepseek", context_size=64000),
978
+ ModelInfo(
979
+ "deepseek-reasoner", "DeepSeek Reasoner", "deepseek", context_size=64000
980
+ ),
981
+ ],
982
+ )
983
+ )
984
+
985
+ providers.append(
986
+ ProviderInfo(
987
+ id="zhipu",
988
+ name="Zhipu AI",
989
+ description="GLM models (GLM-4, ChatGLM, etc.) - Tsinghua University",
990
+ requires_api_key=True,
991
+ configured=self._is_provider_configured("zhipu"),
992
+ models=[
993
+ ModelInfo("glm-4", "GLM-4", "zhipu", context_size=128000),
994
+ ModelInfo("glm-3-turbo", "GLM-3 Turbo", "zhipu", context_size=128000),
995
+ ModelInfo("chatglm_turbo", "ChatGLM Turbo", "zhipu", context_size=32768),
996
+ ],
997
+ )
998
+ )
999
+
1000
+ providers.append(
1001
+ ProviderInfo(
1002
+ id="moonshot",
1003
+ name="Moonshot AI",
1004
+ description="Kimi models (Kimi-2, Kimi-K2, etc.)",
1005
+ requires_api_key=True,
1006
+ configured=self._is_provider_configured("moonshot"),
1007
+ models=[
1008
+ ModelInfo("moonshot-v1-8k", "Moonshot v1 8K", "moonshot", context_size=8192),
1009
+ ModelInfo("moonshot-v1-32k", "Moonshot v1 32K", "moonshot", context_size=32768),
1010
+ ModelInfo(
1011
+ "moonshot-v1-128k", "Moonshot v1 128K", "moonshot", context_size=131072
1012
+ ),
1013
+ ],
1014
+ )
1015
+ )
1016
+
1017
+ providers.append(
1018
+ ProviderInfo(
1019
+ id="minimax",
1020
+ name="MiniMax",
1021
+ description="MiniMax models (abab-6, abab-6.5, etc.)",
1022
+ requires_api_key=True,
1023
+ configured=self._is_provider_configured("minimax"),
1024
+ models=[
1025
+ ModelInfo("abab-6-5-chat", "abab-6.5 Chat", "minimax", context_size=24576),
1026
+ ModelInfo("abab-6-chat", "abab-6 Chat", "minimax", context_size=8192),
1027
+ ],
1028
+ )
1029
+ )
1030
+
1031
+ providers.append(
1032
+ ProviderInfo(
1033
+ id="baidu",
1034
+ name="Baidu",
1035
+ description="Ernie models (ERNIE-4.0, ERNIE-3.5, etc.)",
1036
+ requires_api_key=True,
1037
+ configured=self._is_provider_configured("baidu"),
1038
+ models=[
1039
+ ModelInfo("ernie-4.0-8k", "ERNIE-4.0 8K", "baidu", context_size=8192),
1040
+ ModelInfo("ernie-3.5-8k", "ERNIE-3.5 8K", "baidu", context_size=8192),
1041
+ ModelInfo("ernie-speed-8k", "ERNIE Speed 8K", "baidu", context_size=8192),
1042
+ ],
1043
+ )
1044
+ )
1045
+
1046
+ providers.append(
1047
+ ProviderInfo(
1048
+ id="tencent",
1049
+ name="Tencent",
1050
+ description="Hunyuan models (Hunyuan-Lite, etc.)",
1051
+ requires_api_key=True,
1052
+ configured=self._is_provider_configured("tencent"),
1053
+ models=[
1054
+ ModelInfo("hunyuan-lite", "Hunyuan Lite", "tencent", context_size=32768),
1055
+ ModelInfo(
1056
+ "hunyuan-standard", "Hunyuan Standard", "tencent", context_size=32768
1057
+ ),
1058
+ ModelInfo("hunyuan-pro", "Hunyuan Pro", "tencent", context_size=32768),
1059
+ ],
1060
+ )
1061
+ )
1062
+
1063
+ providers.append(
1064
+ ProviderInfo(
1065
+ id="doubao",
1066
+ name="ByteDance Doubao",
1067
+ description="Doubao models (Doubao-Pro, Doubao-Lite, etc.)",
1068
+ requires_api_key=True,
1069
+ configured=self._is_provider_configured("doubao"),
1070
+ models=[
1071
+ ModelInfo("doubao-lite-4k", "Doubao Lite 4K", "doubao", context_size=4096),
1072
+ ModelInfo("doubao-lite-32k", "Doubao Lite 32K", "doubao", context_size=32768),
1073
+ ModelInfo("doubao-pro-4k", "Doubao Pro 4K", "doubao", context_size=4096),
1074
+ ModelInfo("doubao-pro-32k", "Doubao Pro 32K", "doubao", context_size=32768),
1075
+ ],
1076
+ )
1077
+ )
1078
+
1079
+ providers.append(
1080
+ ProviderInfo(
1081
+ id="01-ai",
1082
+ name="01.AI",
1083
+ description="Yi models (Yi-1.5, Yi-34B, etc.)",
1084
+ requires_api_key=True,
1085
+ configured=self._is_provider_configured("01-ai"),
1086
+ models=[
1087
+ ModelInfo("yi-large", "Yi Large", "01-ai", context_size=32768),
1088
+ ModelInfo("yi-medium", "Yi Medium", "01-ai", context_size=16384),
1089
+ ModelInfo("yi-spark", "Yi Spark", "01-ai", context_size=16384),
1090
+ ],
1091
+ )
1092
+ )
1093
+
1094
+ # Azure OpenAI
1095
+ providers.append(
1096
+ ProviderInfo(
1097
+ id="azure-openai",
1098
+ name="Azure OpenAI",
1099
+ description="Azure-hosted OpenAI models (GPT-4, GPT-3.5, etc.)",
1100
+ requires_api_key=True,
1101
+ configured=self._is_provider_configured("azure-openai"),
1102
+ models=[
1103
+ ModelInfo("gpt-4", "GPT-4 (Azure)", "azure-openai", context_size=8192),
1104
+ ModelInfo("gpt-4-32k", "GPT-4 32K (Azure)", "azure-openai", context_size=32768),
1105
+ ModelInfo(
1106
+ "gpt-35-turbo", "GPT-3.5 Turbo (Azure)", "azure-openai", context_size=4096
1107
+ ),
1108
+ ModelInfo(
1109
+ "gpt-35-turbo-16k",
1110
+ "GPT-3.5 Turbo 16K (Azure)",
1111
+ "azure-openai",
1112
+ context_size=16384,
1113
+ ),
1114
+ ],
1115
+ )
1116
+ )
1117
+
1118
+ # Google Vertex AI
1119
+ providers.append(
1120
+ ProviderInfo(
1121
+ id="vertex-ai",
1122
+ name="Google Vertex AI",
1123
+ description="Google Vertex AI models (Gemini, PaLM, etc.)",
1124
+ requires_api_key=True,
1125
+ configured=self._is_provider_configured("vertex-ai"),
1126
+ models=[
1127
+ ModelInfo("gemini-pro", "Gemini Pro (Vertex)", "vertex-ai", context_size=32768),
1128
+ ModelInfo(
1129
+ "gemini-pro-vision",
1130
+ "Gemini Pro Vision (Vertex)",
1131
+ "vertex-ai",
1132
+ context_size=16384,
1133
+ ),
1134
+ ModelInfo(
1135
+ "palm-2-chat-bison", "PaLM 2 Chat Bison", "vertex-ai", context_size=8192
1136
+ ),
1137
+ ModelInfo(
1138
+ "palm-2-codechat-bison",
1139
+ "PaLM 2 CodeChat Bison",
1140
+ "vertex-ai",
1141
+ context_size=8192,
1142
+ ),
1143
+ ],
1144
+ )
1145
+ )
1146
+
1147
+ # OpenAI Compatible
1148
+ providers.append(
1149
+ ProviderInfo(
1150
+ id="openai-compatible",
1151
+ name="OpenAI Compatible",
1152
+ description="Any OpenAI-compatible API endpoint",
1153
+ requires_api_key=True,
1154
+ configured=self._is_provider_configured("openai-compatible"),
1155
+ models=[
1156
+ ModelInfo("gpt-4", "GPT-4 Compatible", "openai-compatible", context_size=8192),
1157
+ ModelInfo(
1158
+ "gpt-3.5-turbo",
1159
+ "GPT-3.5 Turbo Compatible",
1160
+ "openai-compatible",
1161
+ context_size=4096,
1162
+ ),
1163
+ ModelInfo(
1164
+ "claude-3-sonnet",
1165
+ "Claude 3 Sonnet Compatible",
1166
+ "openai-compatible",
1167
+ context_size=200000,
1168
+ ),
1169
+ ],
1170
+ )
1171
+ )
1172
+
1173
+ # Sort by priority
1174
+ providers.sort(key=lambda p: self.PROVIDER_PRIORITY.get(p.id, 99))
1175
+
1176
+ return providers
1177
+
1178
+ def get_models(self, provider_id: str, refresh: bool = False) -> List[ModelInfo]:
1179
+ """Get available models for a provider."""
1180
+ # Return basic model list for now
1181
+ provider = next((p for p in self.list_providers() if p.id == provider_id), None)
1182
+ return provider.models if provider else []
1183
+
1184
+ def test_connection(
1185
+ self, provider_id: str, model_id: Optional[str] = None
1186
+ ) -> tuple[bool, Optional[str]]:
1187
+ """Test connection to a provider and optionally a specific model."""
1188
+ try:
1189
+ # For Ollama, we don't need to test connection
1190
+ if provider_id == "ollama":
1191
+ if model_id:
1192
+ # Test if the specific Ollama model exists
1193
+ try:
1194
+ models = self.get_models(provider_id)
1195
+ if any(m.id == model_id for m in models):
1196
+ return True, None
1197
+ else:
1198
+ return (
1199
+ False,
1200
+ f"Model '{model_id}' not found. Available models: {', '.join([m.id for m in models[:5]])}",
1201
+ )
1202
+ except Exception as e:
1203
+ return False, f"Failed to check Ollama models: {str(e)}"
1204
+ return True, None
1205
+
1206
+ # Test specific model if provided
1207
+ if model_id:
1208
+ try:
1209
+ # Try a minimal chat completion to test the model
1210
+ messages = [{"role": "user", "content": "Hi"}]
1211
+ response = self.chat_completion(provider_id, model_id, messages, max_tokens=5)
1212
+ if response and response.strip():
1213
+ return True, None
1214
+ else:
1215
+ return False, f"Model '{model_id}' returned empty response"
1216
+ except Exception as e:
1217
+ error_msg = str(e).lower()
1218
+ if "model not found" in error_msg or "invalid model" in error_msg:
1219
+ return False, f"Model '{model_id}' not found or not available"
1220
+ elif "authentication" in error_msg or "api key" in error_msg:
1221
+ return False, f"Authentication failed for provider '{provider_id}'"
1222
+ elif "rate limit" in error_msg:
1223
+ return False, f"Rate limit exceeded for provider '{provider_id}'"
1224
+ else:
1225
+ return False, f"Model '{model_id}' failed: {str(e)}"
1226
+
1227
+ # For other providers without specific model, check API key first
1228
+ if not self._is_provider_configured(provider_id):
1229
+ key_mapping = {
1230
+ "openai": "OPENAI_API_KEY",
1231
+ "anthropic": "ANTHROPIC_API_KEY",
1232
+ "google": "GOOGLE_API_KEY or GEMINI_API_KEY", # Google supports both
1233
+ "xai": "XAI_API_KEY",
1234
+ "groq": "GROQ_API_KEY",
1235
+ "openrouter": "OPENROUTER_API_KEY",
1236
+ "qwen": "DASHSCOPE_API_KEY",
1237
+ "deepseek": "DEEPSEEK_API_KEY",
1238
+ "github-copilot": "GITHUB_TOKEN",
1239
+ "together": "TOGETHER_API_KEY",
1240
+ "deepinfra": "DEEPINFRA_API_KEY",
1241
+ "perplexity": "PERPLEXITY_API_KEY",
1242
+ "mistral": "MISTRAL_API_KEY",
1243
+ "cerebras": "CEREBRAS_API_KEY",
1244
+ "zhipu": "ZHIPU_API_KEY",
1245
+ "moonshot": "MOONSHOT_API_KEY",
1246
+ "minimax": "MINIMAX_API_KEY",
1247
+ "baidu": "BAIDU_API_KEY",
1248
+ "tencent": "TENCENT_API_KEY",
1249
+ "doubao": "DOUBAO_API_KEY",
1250
+ "01-ai": "ZEROONE_API_KEY",
1251
+ "azure-openai": "AZURE_OPENAI_API_KEY",
1252
+ "vertex-ai": "GOOGLE_APPLICATION_CREDENTIALS",
1253
+ "openai-compatible": "OPENAI_COMPATIBLE_API_KEY",
1254
+ }
1255
+ env_var = key_mapping.get(provider_id)
1256
+ if env_var:
1257
+ return False, f"API key not set. Please set {env_var} environment variable."
1258
+ else:
1259
+ return False, f"Provider '{provider_id}' requires API key configuration."
1260
+
1261
+ # Try to get models - this will also validate the API key works
1262
+ models = self.get_models(provider_id)
1263
+ if models:
1264
+ # If we have models, try a quick test with the first model to validate API key
1265
+ try:
1266
+ test_model = models[0].id
1267
+ messages = [{"role": "user", "content": "Hi"}]
1268
+ response = self.chat_completion(provider_id, test_model, messages, max_tokens=5)
1269
+ if response and response.strip():
1270
+ return True, None
1271
+ else:
1272
+ return False, f"API key validation failed - received empty response"
1273
+ except Exception as e:
1274
+ error_msg = str(e).lower()
1275
+ if (
1276
+ "authentication" in error_msg
1277
+ or "api key" in error_msg
1278
+ or "api_key" in error_msg
1279
+ ):
1280
+ key_mapping = {
1281
+ "openai": "OPENAI_API_KEY",
1282
+ "anthropic": "ANTHROPIC_API_KEY",
1283
+ "google": "GOOGLE_API_KEY or GEMINI_API_KEY", # Google supports both
1284
+ "xai": "XAI_API_KEY",
1285
+ "groq": "GROQ_API_KEY",
1286
+ "openrouter": "OPENROUTER_API_KEY",
1287
+ "qwen": "DASHSCOPE_API_KEY",
1288
+ "deepseek": "DEEPSEEK_API_KEY",
1289
+ }
1290
+ env_var = key_mapping.get(provider_id, "API_KEY")
1291
+ return (
1292
+ False,
1293
+ f"API key validation failed: {str(e)}. Please check your {env_var} environment variable.",
1294
+ )
1295
+ else:
1296
+ return False, f"Connection test failed: {str(e)}"
1297
+ else:
1298
+ return False, "No models available"
1299
+ except Exception as e:
1300
+ return False, str(e)
1301
+
1302
+ def chat_completion(
1303
+ self, provider_id: str, model_id: str, messages: List[Dict[str, str]], **kwargs
1304
+ ) -> str:
1305
+ """Make a chat completion request."""
1306
+ try:
1307
+ # Construct the full model name for LiteLLM
1308
+ if provider_id == "ollama":
1309
+ full_model = f"ollama/{model_id}"
1310
+ elif provider_id == "openai":
1311
+ full_model = f"openai/{model_id}"
1312
+ elif provider_id == "anthropic":
1313
+ full_model = f"anthropic/{model_id}"
1314
+ elif provider_id == "google":
1315
+ full_model = f"gemini/{model_id}"
1316
+ elif provider_id == "xai":
1317
+ full_model = f"xai/{model_id}"
1318
+ elif provider_id == "groq":
1319
+ full_model = f"groq/{model_id}"
1320
+ elif provider_id == "openrouter":
1321
+ full_model = f"openrouter/{model_id}"
1322
+ elif provider_id == "qwen":
1323
+ full_model = f"qwen/{model_id}"
1324
+ elif provider_id == "deepseek":
1325
+ full_model = f"deepseek/{model_id}"
1326
+ else:
1327
+ full_model = f"{provider_id}/{model_id}"
1328
+
1329
+ # Lazy import litellm to avoid import errors when CWD doesn't exist
1330
+ litellm = _safe_import_litellm()
1331
+
1332
+ # Make the request
1333
+ response = litellm.completion(model=full_model, messages=messages, **kwargs)
1334
+
1335
+ return response.choices[0].message.content
1336
+
1337
+ except Exception as e:
1338
+ raise Exception(f"Failed to get response from {provider_id}: {str(e)}")