superqode 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (288) hide show
  1. superqode/__init__.py +33 -0
  2. superqode/acp/__init__.py +23 -0
  3. superqode/acp/client.py +913 -0
  4. superqode/acp/permission_screen.py +457 -0
  5. superqode/acp/types.py +480 -0
  6. superqode/acp_discovery.py +856 -0
  7. superqode/agent/__init__.py +22 -0
  8. superqode/agent/edit_strategies.py +334 -0
  9. superqode/agent/loop.py +892 -0
  10. superqode/agent/qe_report_templates.py +39 -0
  11. superqode/agent/system_prompts.py +353 -0
  12. superqode/agent_output.py +721 -0
  13. superqode/agent_stream.py +953 -0
  14. superqode/agents/__init__.py +59 -0
  15. superqode/agents/acp_registry.py +305 -0
  16. superqode/agents/client.py +249 -0
  17. superqode/agents/data/augmentcode.com.toml +51 -0
  18. superqode/agents/data/cagent.dev.toml +51 -0
  19. superqode/agents/data/claude.com.toml +60 -0
  20. superqode/agents/data/codeassistant.dev.toml +51 -0
  21. superqode/agents/data/codex.openai.com.toml +57 -0
  22. superqode/agents/data/fastagent.ai.toml +66 -0
  23. superqode/agents/data/geminicli.com.toml +77 -0
  24. superqode/agents/data/goose.block.xyz.toml +54 -0
  25. superqode/agents/data/junie.jetbrains.com.toml +56 -0
  26. superqode/agents/data/kimi.moonshot.cn.toml +57 -0
  27. superqode/agents/data/llmlingagent.dev.toml +51 -0
  28. superqode/agents/data/molt.bot.toml +49 -0
  29. superqode/agents/data/opencode.ai.toml +60 -0
  30. superqode/agents/data/stakpak.dev.toml +51 -0
  31. superqode/agents/data/vtcode.dev.toml +51 -0
  32. superqode/agents/discovery.py +266 -0
  33. superqode/agents/messaging.py +160 -0
  34. superqode/agents/persona.py +166 -0
  35. superqode/agents/registry.py +421 -0
  36. superqode/agents/schema.py +72 -0
  37. superqode/agents/unified.py +367 -0
  38. superqode/app/__init__.py +111 -0
  39. superqode/app/constants.py +314 -0
  40. superqode/app/css.py +366 -0
  41. superqode/app/models.py +118 -0
  42. superqode/app/suggester.py +125 -0
  43. superqode/app/widgets.py +1591 -0
  44. superqode/app_enhanced.py +399 -0
  45. superqode/app_main.py +17187 -0
  46. superqode/approval.py +312 -0
  47. superqode/atomic.py +296 -0
  48. superqode/commands/__init__.py +1 -0
  49. superqode/commands/acp.py +965 -0
  50. superqode/commands/agents.py +180 -0
  51. superqode/commands/auth.py +278 -0
  52. superqode/commands/config.py +374 -0
  53. superqode/commands/init.py +826 -0
  54. superqode/commands/providers.py +819 -0
  55. superqode/commands/qe.py +1145 -0
  56. superqode/commands/roles.py +380 -0
  57. superqode/commands/serve.py +172 -0
  58. superqode/commands/suggestions.py +127 -0
  59. superqode/commands/superqe.py +460 -0
  60. superqode/config/__init__.py +51 -0
  61. superqode/config/loader.py +812 -0
  62. superqode/config/schema.py +498 -0
  63. superqode/core/__init__.py +111 -0
  64. superqode/core/roles.py +281 -0
  65. superqode/danger.py +386 -0
  66. superqode/data/superqode-template.yaml +1522 -0
  67. superqode/design_system.py +1080 -0
  68. superqode/dialogs/__init__.py +6 -0
  69. superqode/dialogs/base.py +39 -0
  70. superqode/dialogs/model.py +130 -0
  71. superqode/dialogs/provider.py +870 -0
  72. superqode/diff_view.py +919 -0
  73. superqode/enterprise.py +21 -0
  74. superqode/evaluation/__init__.py +25 -0
  75. superqode/evaluation/adapters.py +93 -0
  76. superqode/evaluation/behaviors.py +89 -0
  77. superqode/evaluation/engine.py +209 -0
  78. superqode/evaluation/scenarios.py +96 -0
  79. superqode/execution/__init__.py +36 -0
  80. superqode/execution/linter.py +538 -0
  81. superqode/execution/modes.py +347 -0
  82. superqode/execution/resolver.py +283 -0
  83. superqode/execution/runner.py +642 -0
  84. superqode/file_explorer.py +811 -0
  85. superqode/file_viewer.py +471 -0
  86. superqode/flash.py +183 -0
  87. superqode/guidance/__init__.py +58 -0
  88. superqode/guidance/config.py +203 -0
  89. superqode/guidance/prompts.py +71 -0
  90. superqode/harness/__init__.py +54 -0
  91. superqode/harness/accelerator.py +291 -0
  92. superqode/harness/config.py +319 -0
  93. superqode/harness/validator.py +147 -0
  94. superqode/history.py +279 -0
  95. superqode/integrations/superopt_runner.py +124 -0
  96. superqode/logging/__init__.py +49 -0
  97. superqode/logging/adapters.py +219 -0
  98. superqode/logging/formatter.py +923 -0
  99. superqode/logging/integration.py +341 -0
  100. superqode/logging/sinks.py +170 -0
  101. superqode/logging/unified_log.py +417 -0
  102. superqode/lsp/__init__.py +26 -0
  103. superqode/lsp/client.py +544 -0
  104. superqode/main.py +1069 -0
  105. superqode/mcp/__init__.py +89 -0
  106. superqode/mcp/auth_storage.py +380 -0
  107. superqode/mcp/client.py +1236 -0
  108. superqode/mcp/config.py +319 -0
  109. superqode/mcp/integration.py +337 -0
  110. superqode/mcp/oauth.py +436 -0
  111. superqode/mcp/oauth_callback.py +385 -0
  112. superqode/mcp/types.py +290 -0
  113. superqode/memory/__init__.py +31 -0
  114. superqode/memory/feedback.py +342 -0
  115. superqode/memory/store.py +522 -0
  116. superqode/notifications.py +369 -0
  117. superqode/optimization/__init__.py +5 -0
  118. superqode/optimization/config.py +33 -0
  119. superqode/permissions/__init__.py +25 -0
  120. superqode/permissions/rules.py +488 -0
  121. superqode/plan.py +323 -0
  122. superqode/providers/__init__.py +33 -0
  123. superqode/providers/gateway/__init__.py +165 -0
  124. superqode/providers/gateway/base.py +228 -0
  125. superqode/providers/gateway/litellm_gateway.py +1170 -0
  126. superqode/providers/gateway/openresponses_gateway.py +436 -0
  127. superqode/providers/health.py +297 -0
  128. superqode/providers/huggingface/__init__.py +74 -0
  129. superqode/providers/huggingface/downloader.py +472 -0
  130. superqode/providers/huggingface/endpoints.py +442 -0
  131. superqode/providers/huggingface/hub.py +531 -0
  132. superqode/providers/huggingface/inference.py +394 -0
  133. superqode/providers/huggingface/transformers_runner.py +516 -0
  134. superqode/providers/local/__init__.py +100 -0
  135. superqode/providers/local/base.py +438 -0
  136. superqode/providers/local/discovery.py +418 -0
  137. superqode/providers/local/lmstudio.py +256 -0
  138. superqode/providers/local/mlx.py +457 -0
  139. superqode/providers/local/ollama.py +486 -0
  140. superqode/providers/local/sglang.py +268 -0
  141. superqode/providers/local/tgi.py +260 -0
  142. superqode/providers/local/tool_support.py +477 -0
  143. superqode/providers/local/vllm.py +258 -0
  144. superqode/providers/manager.py +1338 -0
  145. superqode/providers/models.py +1016 -0
  146. superqode/providers/models_dev.py +578 -0
  147. superqode/providers/openresponses/__init__.py +87 -0
  148. superqode/providers/openresponses/converters/__init__.py +17 -0
  149. superqode/providers/openresponses/converters/messages.py +343 -0
  150. superqode/providers/openresponses/converters/tools.py +268 -0
  151. superqode/providers/openresponses/schema/__init__.py +56 -0
  152. superqode/providers/openresponses/schema/models.py +585 -0
  153. superqode/providers/openresponses/streaming/__init__.py +5 -0
  154. superqode/providers/openresponses/streaming/parser.py +338 -0
  155. superqode/providers/openresponses/tools/__init__.py +21 -0
  156. superqode/providers/openresponses/tools/apply_patch.py +352 -0
  157. superqode/providers/openresponses/tools/code_interpreter.py +290 -0
  158. superqode/providers/openresponses/tools/file_search.py +333 -0
  159. superqode/providers/openresponses/tools/mcp_adapter.py +252 -0
  160. superqode/providers/registry.py +716 -0
  161. superqode/providers/usage.py +332 -0
  162. superqode/pure_mode.py +384 -0
  163. superqode/qr/__init__.py +23 -0
  164. superqode/qr/dashboard.py +781 -0
  165. superqode/qr/generator.py +1018 -0
  166. superqode/qr/templates.py +135 -0
  167. superqode/safety/__init__.py +41 -0
  168. superqode/safety/sandbox.py +413 -0
  169. superqode/safety/warnings.py +256 -0
  170. superqode/server/__init__.py +33 -0
  171. superqode/server/lsp_server.py +775 -0
  172. superqode/server/web.py +250 -0
  173. superqode/session/__init__.py +25 -0
  174. superqode/session/persistence.py +580 -0
  175. superqode/session/sharing.py +477 -0
  176. superqode/session.py +475 -0
  177. superqode/sidebar.py +2991 -0
  178. superqode/stream_view.py +648 -0
  179. superqode/styles/__init__.py +3 -0
  180. superqode/superqe/__init__.py +184 -0
  181. superqode/superqe/acp_runner.py +1064 -0
  182. superqode/superqe/constitution/__init__.py +62 -0
  183. superqode/superqe/constitution/evaluator.py +308 -0
  184. superqode/superqe/constitution/loader.py +432 -0
  185. superqode/superqe/constitution/schema.py +250 -0
  186. superqode/superqe/events.py +591 -0
  187. superqode/superqe/frameworks/__init__.py +65 -0
  188. superqode/superqe/frameworks/base.py +234 -0
  189. superqode/superqe/frameworks/e2e.py +263 -0
  190. superqode/superqe/frameworks/executor.py +237 -0
  191. superqode/superqe/frameworks/javascript.py +409 -0
  192. superqode/superqe/frameworks/python.py +373 -0
  193. superqode/superqe/frameworks/registry.py +92 -0
  194. superqode/superqe/mcp_tools/__init__.py +47 -0
  195. superqode/superqe/mcp_tools/core_tools.py +418 -0
  196. superqode/superqe/mcp_tools/registry.py +230 -0
  197. superqode/superqe/mcp_tools/testing_tools.py +167 -0
  198. superqode/superqe/noise.py +89 -0
  199. superqode/superqe/orchestrator.py +778 -0
  200. superqode/superqe/roles.py +609 -0
  201. superqode/superqe/session.py +713 -0
  202. superqode/superqe/skills/__init__.py +57 -0
  203. superqode/superqe/skills/base.py +106 -0
  204. superqode/superqe/skills/core_skills.py +899 -0
  205. superqode/superqe/skills/registry.py +90 -0
  206. superqode/superqe/verifier.py +101 -0
  207. superqode/superqe_cli.py +76 -0
  208. superqode/tool_call.py +358 -0
  209. superqode/tools/__init__.py +93 -0
  210. superqode/tools/agent_tools.py +496 -0
  211. superqode/tools/base.py +324 -0
  212. superqode/tools/batch_tool.py +133 -0
  213. superqode/tools/diagnostics.py +311 -0
  214. superqode/tools/edit_tools.py +653 -0
  215. superqode/tools/enhanced_base.py +515 -0
  216. superqode/tools/file_tools.py +269 -0
  217. superqode/tools/file_tracking.py +45 -0
  218. superqode/tools/lsp_tools.py +610 -0
  219. superqode/tools/network_tools.py +350 -0
  220. superqode/tools/permissions.py +400 -0
  221. superqode/tools/question_tool.py +324 -0
  222. superqode/tools/search_tools.py +598 -0
  223. superqode/tools/shell_tools.py +259 -0
  224. superqode/tools/todo_tools.py +121 -0
  225. superqode/tools/validation.py +80 -0
  226. superqode/tools/web_tools.py +639 -0
  227. superqode/tui.py +1152 -0
  228. superqode/tui_integration.py +875 -0
  229. superqode/tui_widgets/__init__.py +27 -0
  230. superqode/tui_widgets/widgets/__init__.py +18 -0
  231. superqode/tui_widgets/widgets/progress.py +185 -0
  232. superqode/tui_widgets/widgets/tool_display.py +188 -0
  233. superqode/undo_manager.py +574 -0
  234. superqode/utils/__init__.py +5 -0
  235. superqode/utils/error_handling.py +323 -0
  236. superqode/utils/fuzzy.py +257 -0
  237. superqode/widgets/__init__.py +477 -0
  238. superqode/widgets/agent_collab.py +390 -0
  239. superqode/widgets/agent_store.py +936 -0
  240. superqode/widgets/agent_switcher.py +395 -0
  241. superqode/widgets/animation_manager.py +284 -0
  242. superqode/widgets/code_context.py +356 -0
  243. superqode/widgets/command_palette.py +412 -0
  244. superqode/widgets/connection_status.py +537 -0
  245. superqode/widgets/conversation_history.py +470 -0
  246. superqode/widgets/diff_indicator.py +155 -0
  247. superqode/widgets/enhanced_status_bar.py +385 -0
  248. superqode/widgets/enhanced_toast.py +476 -0
  249. superqode/widgets/file_browser.py +809 -0
  250. superqode/widgets/file_reference.py +585 -0
  251. superqode/widgets/issue_timeline.py +340 -0
  252. superqode/widgets/leader_key.py +264 -0
  253. superqode/widgets/mode_switcher.py +445 -0
  254. superqode/widgets/model_picker.py +234 -0
  255. superqode/widgets/permission_preview.py +1205 -0
  256. superqode/widgets/prompt.py +358 -0
  257. superqode/widgets/provider_connect.py +725 -0
  258. superqode/widgets/pty_shell.py +587 -0
  259. superqode/widgets/qe_dashboard.py +321 -0
  260. superqode/widgets/resizable_sidebar.py +377 -0
  261. superqode/widgets/response_changes.py +218 -0
  262. superqode/widgets/response_display.py +528 -0
  263. superqode/widgets/rich_tool_display.py +613 -0
  264. superqode/widgets/sidebar_panels.py +1180 -0
  265. superqode/widgets/slash_complete.py +356 -0
  266. superqode/widgets/split_view.py +612 -0
  267. superqode/widgets/status_bar.py +273 -0
  268. superqode/widgets/superqode_display.py +786 -0
  269. superqode/widgets/thinking_display.py +815 -0
  270. superqode/widgets/throbber.py +87 -0
  271. superqode/widgets/toast.py +206 -0
  272. superqode/widgets/unified_output.py +1073 -0
  273. superqode/workspace/__init__.py +75 -0
  274. superqode/workspace/artifacts.py +472 -0
  275. superqode/workspace/coordinator.py +353 -0
  276. superqode/workspace/diff_tracker.py +429 -0
  277. superqode/workspace/git_guard.py +373 -0
  278. superqode/workspace/git_snapshot.py +526 -0
  279. superqode/workspace/manager.py +750 -0
  280. superqode/workspace/snapshot.py +357 -0
  281. superqode/workspace/watcher.py +535 -0
  282. superqode/workspace/worktree.py +440 -0
  283. superqode-0.1.5.dist-info/METADATA +204 -0
  284. superqode-0.1.5.dist-info/RECORD +288 -0
  285. superqode-0.1.5.dist-info/WHEEL +5 -0
  286. superqode-0.1.5.dist-info/entry_points.txt +3 -0
  287. superqode-0.1.5.dist-info/licenses/LICENSE +648 -0
  288. superqode-0.1.5.dist-info/top_level.txt +1 -0
@@ -0,0 +1,268 @@
1
+ """SGLang client for structured generation.
2
+
3
+ SGLang is a fast serving framework for large language models and vision
4
+ language models with a focus on structured generation.
5
+ """
6
+
7
+ import asyncio
8
+ import json
9
+ import os
10
+ import time
11
+ from datetime import datetime
12
+ from typing import Any, Dict, List, Optional
13
+ from urllib.error import URLError
14
+ from urllib.request import Request, urlopen
15
+
16
+ from superqode.providers.local.base import (
17
+ LocalProviderClient,
18
+ LocalProviderType,
19
+ LocalModel,
20
+ ProviderStatus,
21
+ ToolTestResult,
22
+ detect_model_family,
23
+ likely_supports_tools,
24
+ )
25
+
26
+
27
+ class SGLangClient(LocalProviderClient):
28
+ """SGLang server client.
29
+
30
+ SGLang provides:
31
+ - RadixAttention for fast KV cache reuse
32
+ - Compressed FSM for structured outputs
33
+ - OpenAI-compatible API endpoints
34
+
35
+ API Endpoints:
36
+ - GET /health - Health check
37
+ - GET /v1/models - List models
38
+ - POST /v1/chat/completions - Chat completion
39
+ - POST /generate - Native generation endpoint
40
+
41
+ Environment:
42
+ SGLANG_HOST: Override default host (default: http://localhost:30000)
43
+ """
44
+
45
+ provider_type = LocalProviderType.SGLANG
46
+ default_port = 30000
47
+
48
+ def __init__(self, host: Optional[str] = None):
49
+ """Initialize SGLang client.
50
+
51
+ Args:
52
+ host: SGLang host URL. Falls back to SGLANG_HOST env var.
53
+ """
54
+ if host is None:
55
+ host = os.environ.get("SGLANG_HOST")
56
+ super().__init__(host)
57
+
58
+ def _request(
59
+ self, method: str, endpoint: str, data: Optional[Dict] = None, timeout: float = 30.0
60
+ ) -> Any:
61
+ """Make a request to the SGLang API."""
62
+ url = f"{self.host}{endpoint}"
63
+ headers = {"Content-Type": "application/json"}
64
+
65
+ body = None
66
+ if data is not None:
67
+ body = json.dumps(data).encode("utf-8")
68
+
69
+ request = Request(url, data=body, headers=headers, method=method)
70
+
71
+ with urlopen(request, timeout=timeout) as response:
72
+ content = response.read().decode("utf-8")
73
+ if content:
74
+ return json.loads(content)
75
+ return {}
76
+
77
+ async def _async_request(
78
+ self, method: str, endpoint: str, data: Optional[Dict] = None, timeout: float = 30.0
79
+ ) -> Any:
80
+ """Async wrapper for _request."""
81
+ loop = asyncio.get_event_loop()
82
+ return await loop.run_in_executor(
83
+ None, lambda: self._request(method, endpoint, data, timeout)
84
+ )
85
+
86
+ async def is_available(self) -> bool:
87
+ """Check if SGLang is running."""
88
+ try:
89
+ await self._async_request("GET", "/health", timeout=5.0)
90
+ return True
91
+ except Exception:
92
+ # Try /v1/models as fallback
93
+ try:
94
+ await self._async_request("GET", "/v1/models", timeout=5.0)
95
+ return True
96
+ except Exception:
97
+ return False
98
+
99
+ async def get_status(self) -> ProviderStatus:
100
+ """Get detailed SGLang status."""
101
+ start_time = time.time()
102
+
103
+ try:
104
+ # Check health
105
+ await self._async_request("GET", "/health", timeout=5.0)
106
+
107
+ # Get models
108
+ models_response = await self._async_request("GET", "/v1/models", timeout=5.0)
109
+ latency = (time.time() - start_time) * 1000
110
+
111
+ models = models_response.get("data", [])
112
+
113
+ # Try to get server info
114
+ version = ""
115
+ try:
116
+ info = await self._async_request("GET", "/get_server_info", timeout=5.0)
117
+ version = info.get("version", "")
118
+ except Exception:
119
+ pass
120
+
121
+ return ProviderStatus(
122
+ available=True,
123
+ provider_type=self.provider_type,
124
+ host=self.host,
125
+ version=version,
126
+ models_count=len(models),
127
+ running_models=len(models),
128
+ gpu_available=True,
129
+ latency_ms=latency,
130
+ last_checked=datetime.now(),
131
+ )
132
+
133
+ except Exception as e:
134
+ return ProviderStatus(
135
+ available=False,
136
+ provider_type=self.provider_type,
137
+ host=self.host,
138
+ error=str(e),
139
+ last_checked=datetime.now(),
140
+ )
141
+
142
+ async def list_models(self) -> List[LocalModel]:
143
+ """List available models."""
144
+ try:
145
+ response = await self._async_request("GET", "/v1/models")
146
+ models = response.get("data", [])
147
+
148
+ result = []
149
+ for model_data in models:
150
+ model_id = model_data.get("id", "")
151
+ result.append(
152
+ LocalModel(
153
+ id=model_id,
154
+ name=model_id.split("/")[-1],
155
+ family=detect_model_family(model_id),
156
+ supports_tools=likely_supports_tools(model_id),
157
+ running=True,
158
+ )
159
+ )
160
+
161
+ return result
162
+
163
+ except Exception:
164
+ return []
165
+
166
+ async def list_running(self) -> List[LocalModel]:
167
+ """List running models (same as list_models for SGLang)."""
168
+ models = await self.list_models()
169
+ for m in models:
170
+ m.running = True
171
+ return models
172
+
173
+ async def get_model_info(self, model_id: str) -> Optional[LocalModel]:
174
+ """Get model information."""
175
+ models = await self.list_models()
176
+ for m in models:
177
+ if m.id == model_id or m.id.endswith(f"/{model_id}"):
178
+ return m
179
+ return None
180
+
181
+ async def test_tool_calling(self, model_id: str) -> ToolTestResult:
182
+ """Test tool calling capability."""
183
+ start_time = time.time()
184
+
185
+ if not likely_supports_tools(model_id):
186
+ return ToolTestResult(
187
+ model_id=model_id,
188
+ supports_tools=False,
189
+ notes="Model family not known to support tools",
190
+ )
191
+
192
+ test_tools = [
193
+ {
194
+ "type": "function",
195
+ "function": {
196
+ "name": "get_weather",
197
+ "description": "Get weather for a city",
198
+ "parameters": {
199
+ "type": "object",
200
+ "properties": {"city": {"type": "string"}},
201
+ "required": ["city"],
202
+ },
203
+ },
204
+ }
205
+ ]
206
+
207
+ try:
208
+ response = await self._async_request(
209
+ "POST",
210
+ "/v1/chat/completions",
211
+ data={
212
+ "model": model_id,
213
+ "messages": [{"role": "user", "content": "What's the weather in Paris?"}],
214
+ "tools": test_tools,
215
+ },
216
+ timeout=60.0,
217
+ )
218
+
219
+ latency = (time.time() - start_time) * 1000
220
+
221
+ choices = response.get("choices", [])
222
+ if choices:
223
+ message = choices[0].get("message", {})
224
+ tool_calls = message.get("tool_calls", [])
225
+
226
+ if tool_calls:
227
+ return ToolTestResult(
228
+ model_id=model_id,
229
+ supports_tools=True,
230
+ parallel_tools=len(tool_calls) > 1,
231
+ tool_choice=["auto"],
232
+ latency_ms=latency,
233
+ notes="Tool calling verified",
234
+ )
235
+
236
+ return ToolTestResult(
237
+ model_id=model_id,
238
+ supports_tools=False,
239
+ latency_ms=latency,
240
+ notes="Model did not use tools in test",
241
+ )
242
+
243
+ except Exception as e:
244
+ return ToolTestResult(
245
+ model_id=model_id,
246
+ supports_tools=False,
247
+ error=str(e),
248
+ )
249
+
250
+ def get_litellm_model_name(self, model_id: str) -> str:
251
+ """Get LiteLLM-compatible model name."""
252
+ # SGLang uses OpenAI-compatible format
253
+ return model_id
254
+
255
+
256
+ async def get_sglang_client(host: Optional[str] = None) -> Optional[SGLangClient]:
257
+ """Get an SGLang client if available.
258
+
259
+ Args:
260
+ host: Optional host override.
261
+
262
+ Returns:
263
+ SGLangClient if SGLang is running, None otherwise.
264
+ """
265
+ client = SGLangClient(host)
266
+ if await client.is_available():
267
+ return client
268
+ return None
@@ -0,0 +1,260 @@
1
+ """HuggingFace Text Generation Inference (TGI) client.
2
+
3
+ TGI is HuggingFace's production-grade inference server for LLMs.
4
+ """
5
+
6
+ import asyncio
7
+ import json
8
+ import os
9
+ import time
10
+ from datetime import datetime
11
+ from typing import Any, Dict, List, Optional
12
+ from urllib.error import URLError
13
+ from urllib.request import Request, urlopen
14
+
15
+ from superqode.providers.local.base import (
16
+ LocalProviderClient,
17
+ LocalProviderType,
18
+ LocalModel,
19
+ ProviderStatus,
20
+ ToolTestResult,
21
+ detect_model_family,
22
+ likely_supports_tools,
23
+ )
24
+
25
+
26
+ class TGIClient(LocalProviderClient):
27
+ """HuggingFace Text Generation Inference client.
28
+
29
+ TGI provides:
30
+ - Flash Attention and Paged Attention
31
+ - Continuous batching
32
+ - Tensor parallelism for multi-GPU
33
+ - Token streaming
34
+ - Tool/function calling support
35
+
36
+ API Endpoints:
37
+ - GET /info - Model info
38
+ - GET /health - Health check
39
+ - POST /generate - Text generation
40
+ - POST /v1/chat/completions - OpenAI-compatible chat
41
+
42
+ Environment:
43
+ TGI_HOST: Override default host (default: http://localhost:8080)
44
+ """
45
+
46
+ provider_type = LocalProviderType.TGI
47
+ default_port = 8080
48
+
49
+ def __init__(self, host: Optional[str] = None):
50
+ """Initialize TGI client.
51
+
52
+ Args:
53
+ host: TGI host URL. Falls back to TGI_HOST env var.
54
+ """
55
+ if host is None:
56
+ host = os.environ.get("TGI_HOST")
57
+ super().__init__(host)
58
+
59
+ def _request(
60
+ self, method: str, endpoint: str, data: Optional[Dict] = None, timeout: float = 30.0
61
+ ) -> Any:
62
+ """Make a request to the TGI API."""
63
+ url = f"{self.host}{endpoint}"
64
+ headers = {"Content-Type": "application/json"}
65
+
66
+ body = None
67
+ if data is not None:
68
+ body = json.dumps(data).encode("utf-8")
69
+
70
+ request = Request(url, data=body, headers=headers, method=method)
71
+
72
+ with urlopen(request, timeout=timeout) as response:
73
+ return json.loads(response.read().decode("utf-8"))
74
+
75
+ async def _async_request(
76
+ self, method: str, endpoint: str, data: Optional[Dict] = None, timeout: float = 30.0
77
+ ) -> Any:
78
+ """Async wrapper for _request."""
79
+ loop = asyncio.get_event_loop()
80
+ return await loop.run_in_executor(
81
+ None, lambda: self._request(method, endpoint, data, timeout)
82
+ )
83
+
84
+ async def is_available(self) -> bool:
85
+ """Check if TGI is running."""
86
+ try:
87
+ await self._async_request("GET", "/health", timeout=5.0)
88
+ return True
89
+ except Exception:
90
+ try:
91
+ await self._async_request("GET", "/info", timeout=5.0)
92
+ return True
93
+ except Exception:
94
+ return False
95
+
96
+ async def get_status(self) -> ProviderStatus:
97
+ """Get detailed TGI status."""
98
+ start_time = time.time()
99
+
100
+ try:
101
+ # Get model info
102
+ info = await self._async_request("GET", "/info", timeout=5.0)
103
+ latency = (time.time() - start_time) * 1000
104
+
105
+ model_id = info.get("model_id", "")
106
+ version = info.get("version", "")
107
+
108
+ return ProviderStatus(
109
+ available=True,
110
+ provider_type=self.provider_type,
111
+ host=self.host,
112
+ version=version,
113
+ models_count=1, # TGI serves one model
114
+ running_models=1,
115
+ gpu_available=True,
116
+ latency_ms=latency,
117
+ last_checked=datetime.now(),
118
+ )
119
+
120
+ except Exception as e:
121
+ return ProviderStatus(
122
+ available=False,
123
+ provider_type=self.provider_type,
124
+ host=self.host,
125
+ error=str(e),
126
+ last_checked=datetime.now(),
127
+ )
128
+
129
+ async def list_models(self) -> List[LocalModel]:
130
+ """List available models (TGI serves one model)."""
131
+ try:
132
+ info = await self._async_request("GET", "/info")
133
+ model_id = info.get("model_id", "")
134
+
135
+ if not model_id:
136
+ return []
137
+
138
+ # Extract context length if available
139
+ max_input = info.get("max_input_length", 4096)
140
+ max_total = info.get("max_total_tokens", 8192)
141
+
142
+ return [
143
+ LocalModel(
144
+ id=model_id,
145
+ name=model_id.split("/")[-1],
146
+ context_window=max_total,
147
+ family=detect_model_family(model_id),
148
+ supports_tools=likely_supports_tools(model_id),
149
+ running=True,
150
+ details={
151
+ "max_input_length": max_input,
152
+ "max_total_tokens": max_total,
153
+ "max_batch_total_tokens": info.get("max_batch_total_tokens"),
154
+ },
155
+ )
156
+ ]
157
+
158
+ except Exception:
159
+ return []
160
+
161
+ async def list_running(self) -> List[LocalModel]:
162
+ """List running models."""
163
+ return await self.list_models()
164
+
165
+ async def get_model_info(self, model_id: str) -> Optional[LocalModel]:
166
+ """Get model information."""
167
+ models = await self.list_models()
168
+ if models:
169
+ return models[0]
170
+ return None
171
+
172
+ async def test_tool_calling(self, model_id: str) -> ToolTestResult:
173
+ """Test tool calling capability."""
174
+ start_time = time.time()
175
+
176
+ if not likely_supports_tools(model_id):
177
+ return ToolTestResult(
178
+ model_id=model_id,
179
+ supports_tools=False,
180
+ notes="Model family not known to support tools",
181
+ )
182
+
183
+ test_tools = [
184
+ {
185
+ "type": "function",
186
+ "function": {
187
+ "name": "get_weather",
188
+ "description": "Get weather for a city",
189
+ "parameters": {
190
+ "type": "object",
191
+ "properties": {"city": {"type": "string"}},
192
+ "required": ["city"],
193
+ },
194
+ },
195
+ }
196
+ ]
197
+
198
+ try:
199
+ response = await self._async_request(
200
+ "POST",
201
+ "/v1/chat/completions",
202
+ data={
203
+ "model": "tgi",
204
+ "messages": [{"role": "user", "content": "What's the weather in Paris?"}],
205
+ "tools": test_tools,
206
+ },
207
+ timeout=60.0,
208
+ )
209
+
210
+ latency = (time.time() - start_time) * 1000
211
+
212
+ choices = response.get("choices", [])
213
+ if choices:
214
+ message = choices[0].get("message", {})
215
+ tool_calls = message.get("tool_calls", [])
216
+
217
+ if tool_calls:
218
+ return ToolTestResult(
219
+ model_id=model_id,
220
+ supports_tools=True,
221
+ parallel_tools=len(tool_calls) > 1,
222
+ tool_choice=["auto"],
223
+ latency_ms=latency,
224
+ notes="Tool calling verified",
225
+ )
226
+
227
+ return ToolTestResult(
228
+ model_id=model_id,
229
+ supports_tools=False,
230
+ latency_ms=latency,
231
+ notes="Model did not use tools in test",
232
+ )
233
+
234
+ except Exception as e:
235
+ return ToolTestResult(
236
+ model_id=model_id,
237
+ supports_tools=False,
238
+ error=str(e),
239
+ )
240
+
241
+ def get_litellm_model_name(self, model_id: str) -> str:
242
+ """Get LiteLLM-compatible model name."""
243
+ if model_id.startswith("huggingface/"):
244
+ return model_id
245
+ return f"huggingface/{model_id}"
246
+
247
+
248
+ async def get_tgi_client(host: Optional[str] = None) -> Optional[TGIClient]:
249
+ """Get a TGI client if available.
250
+
251
+ Args:
252
+ host: Optional host override.
253
+
254
+ Returns:
255
+ TGIClient if TGI is running, None otherwise.
256
+ """
257
+ client = TGIClient(host)
258
+ if await client.is_available():
259
+ return client
260
+ return None