superqode 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- superqode/__init__.py +33 -0
- superqode/acp/__init__.py +23 -0
- superqode/acp/client.py +913 -0
- superqode/acp/permission_screen.py +457 -0
- superqode/acp/types.py +480 -0
- superqode/acp_discovery.py +856 -0
- superqode/agent/__init__.py +22 -0
- superqode/agent/edit_strategies.py +334 -0
- superqode/agent/loop.py +892 -0
- superqode/agent/qe_report_templates.py +39 -0
- superqode/agent/system_prompts.py +353 -0
- superqode/agent_output.py +721 -0
- superqode/agent_stream.py +953 -0
- superqode/agents/__init__.py +59 -0
- superqode/agents/acp_registry.py +305 -0
- superqode/agents/client.py +249 -0
- superqode/agents/data/augmentcode.com.toml +51 -0
- superqode/agents/data/cagent.dev.toml +51 -0
- superqode/agents/data/claude.com.toml +60 -0
- superqode/agents/data/codeassistant.dev.toml +51 -0
- superqode/agents/data/codex.openai.com.toml +57 -0
- superqode/agents/data/fastagent.ai.toml +66 -0
- superqode/agents/data/geminicli.com.toml +77 -0
- superqode/agents/data/goose.block.xyz.toml +54 -0
- superqode/agents/data/junie.jetbrains.com.toml +56 -0
- superqode/agents/data/kimi.moonshot.cn.toml +57 -0
- superqode/agents/data/llmlingagent.dev.toml +51 -0
- superqode/agents/data/molt.bot.toml +49 -0
- superqode/agents/data/opencode.ai.toml +60 -0
- superqode/agents/data/stakpak.dev.toml +51 -0
- superqode/agents/data/vtcode.dev.toml +51 -0
- superqode/agents/discovery.py +266 -0
- superqode/agents/messaging.py +160 -0
- superqode/agents/persona.py +166 -0
- superqode/agents/registry.py +421 -0
- superqode/agents/schema.py +72 -0
- superqode/agents/unified.py +367 -0
- superqode/app/__init__.py +111 -0
- superqode/app/constants.py +314 -0
- superqode/app/css.py +366 -0
- superqode/app/models.py +118 -0
- superqode/app/suggester.py +125 -0
- superqode/app/widgets.py +1591 -0
- superqode/app_enhanced.py +399 -0
- superqode/app_main.py +17187 -0
- superqode/approval.py +312 -0
- superqode/atomic.py +296 -0
- superqode/commands/__init__.py +1 -0
- superqode/commands/acp.py +965 -0
- superqode/commands/agents.py +180 -0
- superqode/commands/auth.py +278 -0
- superqode/commands/config.py +374 -0
- superqode/commands/init.py +826 -0
- superqode/commands/providers.py +819 -0
- superqode/commands/qe.py +1145 -0
- superqode/commands/roles.py +380 -0
- superqode/commands/serve.py +172 -0
- superqode/commands/suggestions.py +127 -0
- superqode/commands/superqe.py +460 -0
- superqode/config/__init__.py +51 -0
- superqode/config/loader.py +812 -0
- superqode/config/schema.py +498 -0
- superqode/core/__init__.py +111 -0
- superqode/core/roles.py +281 -0
- superqode/danger.py +386 -0
- superqode/data/superqode-template.yaml +1522 -0
- superqode/design_system.py +1080 -0
- superqode/dialogs/__init__.py +6 -0
- superqode/dialogs/base.py +39 -0
- superqode/dialogs/model.py +130 -0
- superqode/dialogs/provider.py +870 -0
- superqode/diff_view.py +919 -0
- superqode/enterprise.py +21 -0
- superqode/evaluation/__init__.py +25 -0
- superqode/evaluation/adapters.py +93 -0
- superqode/evaluation/behaviors.py +89 -0
- superqode/evaluation/engine.py +209 -0
- superqode/evaluation/scenarios.py +96 -0
- superqode/execution/__init__.py +36 -0
- superqode/execution/linter.py +538 -0
- superqode/execution/modes.py +347 -0
- superqode/execution/resolver.py +283 -0
- superqode/execution/runner.py +642 -0
- superqode/file_explorer.py +811 -0
- superqode/file_viewer.py +471 -0
- superqode/flash.py +183 -0
- superqode/guidance/__init__.py +58 -0
- superqode/guidance/config.py +203 -0
- superqode/guidance/prompts.py +71 -0
- superqode/harness/__init__.py +54 -0
- superqode/harness/accelerator.py +291 -0
- superqode/harness/config.py +319 -0
- superqode/harness/validator.py +147 -0
- superqode/history.py +279 -0
- superqode/integrations/superopt_runner.py +124 -0
- superqode/logging/__init__.py +49 -0
- superqode/logging/adapters.py +219 -0
- superqode/logging/formatter.py +923 -0
- superqode/logging/integration.py +341 -0
- superqode/logging/sinks.py +170 -0
- superqode/logging/unified_log.py +417 -0
- superqode/lsp/__init__.py +26 -0
- superqode/lsp/client.py +544 -0
- superqode/main.py +1069 -0
- superqode/mcp/__init__.py +89 -0
- superqode/mcp/auth_storage.py +380 -0
- superqode/mcp/client.py +1236 -0
- superqode/mcp/config.py +319 -0
- superqode/mcp/integration.py +337 -0
- superqode/mcp/oauth.py +436 -0
- superqode/mcp/oauth_callback.py +385 -0
- superqode/mcp/types.py +290 -0
- superqode/memory/__init__.py +31 -0
- superqode/memory/feedback.py +342 -0
- superqode/memory/store.py +522 -0
- superqode/notifications.py +369 -0
- superqode/optimization/__init__.py +5 -0
- superqode/optimization/config.py +33 -0
- superqode/permissions/__init__.py +25 -0
- superqode/permissions/rules.py +488 -0
- superqode/plan.py +323 -0
- superqode/providers/__init__.py +33 -0
- superqode/providers/gateway/__init__.py +165 -0
- superqode/providers/gateway/base.py +228 -0
- superqode/providers/gateway/litellm_gateway.py +1170 -0
- superqode/providers/gateway/openresponses_gateway.py +436 -0
- superqode/providers/health.py +297 -0
- superqode/providers/huggingface/__init__.py +74 -0
- superqode/providers/huggingface/downloader.py +472 -0
- superqode/providers/huggingface/endpoints.py +442 -0
- superqode/providers/huggingface/hub.py +531 -0
- superqode/providers/huggingface/inference.py +394 -0
- superqode/providers/huggingface/transformers_runner.py +516 -0
- superqode/providers/local/__init__.py +100 -0
- superqode/providers/local/base.py +438 -0
- superqode/providers/local/discovery.py +418 -0
- superqode/providers/local/lmstudio.py +256 -0
- superqode/providers/local/mlx.py +457 -0
- superqode/providers/local/ollama.py +486 -0
- superqode/providers/local/sglang.py +268 -0
- superqode/providers/local/tgi.py +260 -0
- superqode/providers/local/tool_support.py +477 -0
- superqode/providers/local/vllm.py +258 -0
- superqode/providers/manager.py +1338 -0
- superqode/providers/models.py +1016 -0
- superqode/providers/models_dev.py +578 -0
- superqode/providers/openresponses/__init__.py +87 -0
- superqode/providers/openresponses/converters/__init__.py +17 -0
- superqode/providers/openresponses/converters/messages.py +343 -0
- superqode/providers/openresponses/converters/tools.py +268 -0
- superqode/providers/openresponses/schema/__init__.py +56 -0
- superqode/providers/openresponses/schema/models.py +585 -0
- superqode/providers/openresponses/streaming/__init__.py +5 -0
- superqode/providers/openresponses/streaming/parser.py +338 -0
- superqode/providers/openresponses/tools/__init__.py +21 -0
- superqode/providers/openresponses/tools/apply_patch.py +352 -0
- superqode/providers/openresponses/tools/code_interpreter.py +290 -0
- superqode/providers/openresponses/tools/file_search.py +333 -0
- superqode/providers/openresponses/tools/mcp_adapter.py +252 -0
- superqode/providers/registry.py +716 -0
- superqode/providers/usage.py +332 -0
- superqode/pure_mode.py +384 -0
- superqode/qr/__init__.py +23 -0
- superqode/qr/dashboard.py +781 -0
- superqode/qr/generator.py +1018 -0
- superqode/qr/templates.py +135 -0
- superqode/safety/__init__.py +41 -0
- superqode/safety/sandbox.py +413 -0
- superqode/safety/warnings.py +256 -0
- superqode/server/__init__.py +33 -0
- superqode/server/lsp_server.py +775 -0
- superqode/server/web.py +250 -0
- superqode/session/__init__.py +25 -0
- superqode/session/persistence.py +580 -0
- superqode/session/sharing.py +477 -0
- superqode/session.py +475 -0
- superqode/sidebar.py +2991 -0
- superqode/stream_view.py +648 -0
- superqode/styles/__init__.py +3 -0
- superqode/superqe/__init__.py +184 -0
- superqode/superqe/acp_runner.py +1064 -0
- superqode/superqe/constitution/__init__.py +62 -0
- superqode/superqe/constitution/evaluator.py +308 -0
- superqode/superqe/constitution/loader.py +432 -0
- superqode/superqe/constitution/schema.py +250 -0
- superqode/superqe/events.py +591 -0
- superqode/superqe/frameworks/__init__.py +65 -0
- superqode/superqe/frameworks/base.py +234 -0
- superqode/superqe/frameworks/e2e.py +263 -0
- superqode/superqe/frameworks/executor.py +237 -0
- superqode/superqe/frameworks/javascript.py +409 -0
- superqode/superqe/frameworks/python.py +373 -0
- superqode/superqe/frameworks/registry.py +92 -0
- superqode/superqe/mcp_tools/__init__.py +47 -0
- superqode/superqe/mcp_tools/core_tools.py +418 -0
- superqode/superqe/mcp_tools/registry.py +230 -0
- superqode/superqe/mcp_tools/testing_tools.py +167 -0
- superqode/superqe/noise.py +89 -0
- superqode/superqe/orchestrator.py +778 -0
- superqode/superqe/roles.py +609 -0
- superqode/superqe/session.py +713 -0
- superqode/superqe/skills/__init__.py +57 -0
- superqode/superqe/skills/base.py +106 -0
- superqode/superqe/skills/core_skills.py +899 -0
- superqode/superqe/skills/registry.py +90 -0
- superqode/superqe/verifier.py +101 -0
- superqode/superqe_cli.py +76 -0
- superqode/tool_call.py +358 -0
- superqode/tools/__init__.py +93 -0
- superqode/tools/agent_tools.py +496 -0
- superqode/tools/base.py +324 -0
- superqode/tools/batch_tool.py +133 -0
- superqode/tools/diagnostics.py +311 -0
- superqode/tools/edit_tools.py +653 -0
- superqode/tools/enhanced_base.py +515 -0
- superqode/tools/file_tools.py +269 -0
- superqode/tools/file_tracking.py +45 -0
- superqode/tools/lsp_tools.py +610 -0
- superqode/tools/network_tools.py +350 -0
- superqode/tools/permissions.py +400 -0
- superqode/tools/question_tool.py +324 -0
- superqode/tools/search_tools.py +598 -0
- superqode/tools/shell_tools.py +259 -0
- superqode/tools/todo_tools.py +121 -0
- superqode/tools/validation.py +80 -0
- superqode/tools/web_tools.py +639 -0
- superqode/tui.py +1152 -0
- superqode/tui_integration.py +875 -0
- superqode/tui_widgets/__init__.py +27 -0
- superqode/tui_widgets/widgets/__init__.py +18 -0
- superqode/tui_widgets/widgets/progress.py +185 -0
- superqode/tui_widgets/widgets/tool_display.py +188 -0
- superqode/undo_manager.py +574 -0
- superqode/utils/__init__.py +5 -0
- superqode/utils/error_handling.py +323 -0
- superqode/utils/fuzzy.py +257 -0
- superqode/widgets/__init__.py +477 -0
- superqode/widgets/agent_collab.py +390 -0
- superqode/widgets/agent_store.py +936 -0
- superqode/widgets/agent_switcher.py +395 -0
- superqode/widgets/animation_manager.py +284 -0
- superqode/widgets/code_context.py +356 -0
- superqode/widgets/command_palette.py +412 -0
- superqode/widgets/connection_status.py +537 -0
- superqode/widgets/conversation_history.py +470 -0
- superqode/widgets/diff_indicator.py +155 -0
- superqode/widgets/enhanced_status_bar.py +385 -0
- superqode/widgets/enhanced_toast.py +476 -0
- superqode/widgets/file_browser.py +809 -0
- superqode/widgets/file_reference.py +585 -0
- superqode/widgets/issue_timeline.py +340 -0
- superqode/widgets/leader_key.py +264 -0
- superqode/widgets/mode_switcher.py +445 -0
- superqode/widgets/model_picker.py +234 -0
- superqode/widgets/permission_preview.py +1205 -0
- superqode/widgets/prompt.py +358 -0
- superqode/widgets/provider_connect.py +725 -0
- superqode/widgets/pty_shell.py +587 -0
- superqode/widgets/qe_dashboard.py +321 -0
- superqode/widgets/resizable_sidebar.py +377 -0
- superqode/widgets/response_changes.py +218 -0
- superqode/widgets/response_display.py +528 -0
- superqode/widgets/rich_tool_display.py +613 -0
- superqode/widgets/sidebar_panels.py +1180 -0
- superqode/widgets/slash_complete.py +356 -0
- superqode/widgets/split_view.py +612 -0
- superqode/widgets/status_bar.py +273 -0
- superqode/widgets/superqode_display.py +786 -0
- superqode/widgets/thinking_display.py +815 -0
- superqode/widgets/throbber.py +87 -0
- superqode/widgets/toast.py +206 -0
- superqode/widgets/unified_output.py +1073 -0
- superqode/workspace/__init__.py +75 -0
- superqode/workspace/artifacts.py +472 -0
- superqode/workspace/coordinator.py +353 -0
- superqode/workspace/diff_tracker.py +429 -0
- superqode/workspace/git_guard.py +373 -0
- superqode/workspace/git_snapshot.py +526 -0
- superqode/workspace/manager.py +750 -0
- superqode/workspace/snapshot.py +357 -0
- superqode/workspace/watcher.py +535 -0
- superqode/workspace/worktree.py +440 -0
- superqode-0.1.5.dist-info/METADATA +204 -0
- superqode-0.1.5.dist-info/RECORD +288 -0
- superqode-0.1.5.dist-info/WHEEL +5 -0
- superqode-0.1.5.dist-info/entry_points.txt +3 -0
- superqode-0.1.5.dist-info/licenses/LICENSE +648 -0
- superqode-0.1.5.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
"""SGLang client for structured generation.
|
|
2
|
+
|
|
3
|
+
SGLang is a fast serving framework for large language models and vision
|
|
4
|
+
language models with a focus on structured generation.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import json
|
|
9
|
+
import os
|
|
10
|
+
import time
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
from typing import Any, Dict, List, Optional
|
|
13
|
+
from urllib.error import URLError
|
|
14
|
+
from urllib.request import Request, urlopen
|
|
15
|
+
|
|
16
|
+
from superqode.providers.local.base import (
|
|
17
|
+
LocalProviderClient,
|
|
18
|
+
LocalProviderType,
|
|
19
|
+
LocalModel,
|
|
20
|
+
ProviderStatus,
|
|
21
|
+
ToolTestResult,
|
|
22
|
+
detect_model_family,
|
|
23
|
+
likely_supports_tools,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class SGLangClient(LocalProviderClient):
|
|
28
|
+
"""SGLang server client.
|
|
29
|
+
|
|
30
|
+
SGLang provides:
|
|
31
|
+
- RadixAttention for fast KV cache reuse
|
|
32
|
+
- Compressed FSM for structured outputs
|
|
33
|
+
- OpenAI-compatible API endpoints
|
|
34
|
+
|
|
35
|
+
API Endpoints:
|
|
36
|
+
- GET /health - Health check
|
|
37
|
+
- GET /v1/models - List models
|
|
38
|
+
- POST /v1/chat/completions - Chat completion
|
|
39
|
+
- POST /generate - Native generation endpoint
|
|
40
|
+
|
|
41
|
+
Environment:
|
|
42
|
+
SGLANG_HOST: Override default host (default: http://localhost:30000)
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
provider_type = LocalProviderType.SGLANG
|
|
46
|
+
default_port = 30000
|
|
47
|
+
|
|
48
|
+
def __init__(self, host: Optional[str] = None):
|
|
49
|
+
"""Initialize SGLang client.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
host: SGLang host URL. Falls back to SGLANG_HOST env var.
|
|
53
|
+
"""
|
|
54
|
+
if host is None:
|
|
55
|
+
host = os.environ.get("SGLANG_HOST")
|
|
56
|
+
super().__init__(host)
|
|
57
|
+
|
|
58
|
+
def _request(
|
|
59
|
+
self, method: str, endpoint: str, data: Optional[Dict] = None, timeout: float = 30.0
|
|
60
|
+
) -> Any:
|
|
61
|
+
"""Make a request to the SGLang API."""
|
|
62
|
+
url = f"{self.host}{endpoint}"
|
|
63
|
+
headers = {"Content-Type": "application/json"}
|
|
64
|
+
|
|
65
|
+
body = None
|
|
66
|
+
if data is not None:
|
|
67
|
+
body = json.dumps(data).encode("utf-8")
|
|
68
|
+
|
|
69
|
+
request = Request(url, data=body, headers=headers, method=method)
|
|
70
|
+
|
|
71
|
+
with urlopen(request, timeout=timeout) as response:
|
|
72
|
+
content = response.read().decode("utf-8")
|
|
73
|
+
if content:
|
|
74
|
+
return json.loads(content)
|
|
75
|
+
return {}
|
|
76
|
+
|
|
77
|
+
async def _async_request(
|
|
78
|
+
self, method: str, endpoint: str, data: Optional[Dict] = None, timeout: float = 30.0
|
|
79
|
+
) -> Any:
|
|
80
|
+
"""Async wrapper for _request."""
|
|
81
|
+
loop = asyncio.get_event_loop()
|
|
82
|
+
return await loop.run_in_executor(
|
|
83
|
+
None, lambda: self._request(method, endpoint, data, timeout)
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
async def is_available(self) -> bool:
|
|
87
|
+
"""Check if SGLang is running."""
|
|
88
|
+
try:
|
|
89
|
+
await self._async_request("GET", "/health", timeout=5.0)
|
|
90
|
+
return True
|
|
91
|
+
except Exception:
|
|
92
|
+
# Try /v1/models as fallback
|
|
93
|
+
try:
|
|
94
|
+
await self._async_request("GET", "/v1/models", timeout=5.0)
|
|
95
|
+
return True
|
|
96
|
+
except Exception:
|
|
97
|
+
return False
|
|
98
|
+
|
|
99
|
+
async def get_status(self) -> ProviderStatus:
|
|
100
|
+
"""Get detailed SGLang status."""
|
|
101
|
+
start_time = time.time()
|
|
102
|
+
|
|
103
|
+
try:
|
|
104
|
+
# Check health
|
|
105
|
+
await self._async_request("GET", "/health", timeout=5.0)
|
|
106
|
+
|
|
107
|
+
# Get models
|
|
108
|
+
models_response = await self._async_request("GET", "/v1/models", timeout=5.0)
|
|
109
|
+
latency = (time.time() - start_time) * 1000
|
|
110
|
+
|
|
111
|
+
models = models_response.get("data", [])
|
|
112
|
+
|
|
113
|
+
# Try to get server info
|
|
114
|
+
version = ""
|
|
115
|
+
try:
|
|
116
|
+
info = await self._async_request("GET", "/get_server_info", timeout=5.0)
|
|
117
|
+
version = info.get("version", "")
|
|
118
|
+
except Exception:
|
|
119
|
+
pass
|
|
120
|
+
|
|
121
|
+
return ProviderStatus(
|
|
122
|
+
available=True,
|
|
123
|
+
provider_type=self.provider_type,
|
|
124
|
+
host=self.host,
|
|
125
|
+
version=version,
|
|
126
|
+
models_count=len(models),
|
|
127
|
+
running_models=len(models),
|
|
128
|
+
gpu_available=True,
|
|
129
|
+
latency_ms=latency,
|
|
130
|
+
last_checked=datetime.now(),
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
except Exception as e:
|
|
134
|
+
return ProviderStatus(
|
|
135
|
+
available=False,
|
|
136
|
+
provider_type=self.provider_type,
|
|
137
|
+
host=self.host,
|
|
138
|
+
error=str(e),
|
|
139
|
+
last_checked=datetime.now(),
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
async def list_models(self) -> List[LocalModel]:
|
|
143
|
+
"""List available models."""
|
|
144
|
+
try:
|
|
145
|
+
response = await self._async_request("GET", "/v1/models")
|
|
146
|
+
models = response.get("data", [])
|
|
147
|
+
|
|
148
|
+
result = []
|
|
149
|
+
for model_data in models:
|
|
150
|
+
model_id = model_data.get("id", "")
|
|
151
|
+
result.append(
|
|
152
|
+
LocalModel(
|
|
153
|
+
id=model_id,
|
|
154
|
+
name=model_id.split("/")[-1],
|
|
155
|
+
family=detect_model_family(model_id),
|
|
156
|
+
supports_tools=likely_supports_tools(model_id),
|
|
157
|
+
running=True,
|
|
158
|
+
)
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
return result
|
|
162
|
+
|
|
163
|
+
except Exception:
|
|
164
|
+
return []
|
|
165
|
+
|
|
166
|
+
async def list_running(self) -> List[LocalModel]:
|
|
167
|
+
"""List running models (same as list_models for SGLang)."""
|
|
168
|
+
models = await self.list_models()
|
|
169
|
+
for m in models:
|
|
170
|
+
m.running = True
|
|
171
|
+
return models
|
|
172
|
+
|
|
173
|
+
async def get_model_info(self, model_id: str) -> Optional[LocalModel]:
|
|
174
|
+
"""Get model information."""
|
|
175
|
+
models = await self.list_models()
|
|
176
|
+
for m in models:
|
|
177
|
+
if m.id == model_id or m.id.endswith(f"/{model_id}"):
|
|
178
|
+
return m
|
|
179
|
+
return None
|
|
180
|
+
|
|
181
|
+
async def test_tool_calling(self, model_id: str) -> ToolTestResult:
|
|
182
|
+
"""Test tool calling capability."""
|
|
183
|
+
start_time = time.time()
|
|
184
|
+
|
|
185
|
+
if not likely_supports_tools(model_id):
|
|
186
|
+
return ToolTestResult(
|
|
187
|
+
model_id=model_id,
|
|
188
|
+
supports_tools=False,
|
|
189
|
+
notes="Model family not known to support tools",
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
test_tools = [
|
|
193
|
+
{
|
|
194
|
+
"type": "function",
|
|
195
|
+
"function": {
|
|
196
|
+
"name": "get_weather",
|
|
197
|
+
"description": "Get weather for a city",
|
|
198
|
+
"parameters": {
|
|
199
|
+
"type": "object",
|
|
200
|
+
"properties": {"city": {"type": "string"}},
|
|
201
|
+
"required": ["city"],
|
|
202
|
+
},
|
|
203
|
+
},
|
|
204
|
+
}
|
|
205
|
+
]
|
|
206
|
+
|
|
207
|
+
try:
|
|
208
|
+
response = await self._async_request(
|
|
209
|
+
"POST",
|
|
210
|
+
"/v1/chat/completions",
|
|
211
|
+
data={
|
|
212
|
+
"model": model_id,
|
|
213
|
+
"messages": [{"role": "user", "content": "What's the weather in Paris?"}],
|
|
214
|
+
"tools": test_tools,
|
|
215
|
+
},
|
|
216
|
+
timeout=60.0,
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
latency = (time.time() - start_time) * 1000
|
|
220
|
+
|
|
221
|
+
choices = response.get("choices", [])
|
|
222
|
+
if choices:
|
|
223
|
+
message = choices[0].get("message", {})
|
|
224
|
+
tool_calls = message.get("tool_calls", [])
|
|
225
|
+
|
|
226
|
+
if tool_calls:
|
|
227
|
+
return ToolTestResult(
|
|
228
|
+
model_id=model_id,
|
|
229
|
+
supports_tools=True,
|
|
230
|
+
parallel_tools=len(tool_calls) > 1,
|
|
231
|
+
tool_choice=["auto"],
|
|
232
|
+
latency_ms=latency,
|
|
233
|
+
notes="Tool calling verified",
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
return ToolTestResult(
|
|
237
|
+
model_id=model_id,
|
|
238
|
+
supports_tools=False,
|
|
239
|
+
latency_ms=latency,
|
|
240
|
+
notes="Model did not use tools in test",
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
except Exception as e:
|
|
244
|
+
return ToolTestResult(
|
|
245
|
+
model_id=model_id,
|
|
246
|
+
supports_tools=False,
|
|
247
|
+
error=str(e),
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
def get_litellm_model_name(self, model_id: str) -> str:
|
|
251
|
+
"""Get LiteLLM-compatible model name."""
|
|
252
|
+
# SGLang uses OpenAI-compatible format
|
|
253
|
+
return model_id
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
async def get_sglang_client(host: Optional[str] = None) -> Optional[SGLangClient]:
|
|
257
|
+
"""Get an SGLang client if available.
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
host: Optional host override.
|
|
261
|
+
|
|
262
|
+
Returns:
|
|
263
|
+
SGLangClient if SGLang is running, None otherwise.
|
|
264
|
+
"""
|
|
265
|
+
client = SGLangClient(host)
|
|
266
|
+
if await client.is_available():
|
|
267
|
+
return client
|
|
268
|
+
return None
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
"""HuggingFace Text Generation Inference (TGI) client.
|
|
2
|
+
|
|
3
|
+
TGI is HuggingFace's production-grade inference server for LLMs.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import asyncio
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
import time
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from typing import Any, Dict, List, Optional
|
|
12
|
+
from urllib.error import URLError
|
|
13
|
+
from urllib.request import Request, urlopen
|
|
14
|
+
|
|
15
|
+
from superqode.providers.local.base import (
|
|
16
|
+
LocalProviderClient,
|
|
17
|
+
LocalProviderType,
|
|
18
|
+
LocalModel,
|
|
19
|
+
ProviderStatus,
|
|
20
|
+
ToolTestResult,
|
|
21
|
+
detect_model_family,
|
|
22
|
+
likely_supports_tools,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class TGIClient(LocalProviderClient):
|
|
27
|
+
"""HuggingFace Text Generation Inference client.
|
|
28
|
+
|
|
29
|
+
TGI provides:
|
|
30
|
+
- Flash Attention and Paged Attention
|
|
31
|
+
- Continuous batching
|
|
32
|
+
- Tensor parallelism for multi-GPU
|
|
33
|
+
- Token streaming
|
|
34
|
+
- Tool/function calling support
|
|
35
|
+
|
|
36
|
+
API Endpoints:
|
|
37
|
+
- GET /info - Model info
|
|
38
|
+
- GET /health - Health check
|
|
39
|
+
- POST /generate - Text generation
|
|
40
|
+
- POST /v1/chat/completions - OpenAI-compatible chat
|
|
41
|
+
|
|
42
|
+
Environment:
|
|
43
|
+
TGI_HOST: Override default host (default: http://localhost:8080)
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
provider_type = LocalProviderType.TGI
|
|
47
|
+
default_port = 8080
|
|
48
|
+
|
|
49
|
+
def __init__(self, host: Optional[str] = None):
|
|
50
|
+
"""Initialize TGI client.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
host: TGI host URL. Falls back to TGI_HOST env var.
|
|
54
|
+
"""
|
|
55
|
+
if host is None:
|
|
56
|
+
host = os.environ.get("TGI_HOST")
|
|
57
|
+
super().__init__(host)
|
|
58
|
+
|
|
59
|
+
def _request(
|
|
60
|
+
self, method: str, endpoint: str, data: Optional[Dict] = None, timeout: float = 30.0
|
|
61
|
+
) -> Any:
|
|
62
|
+
"""Make a request to the TGI API."""
|
|
63
|
+
url = f"{self.host}{endpoint}"
|
|
64
|
+
headers = {"Content-Type": "application/json"}
|
|
65
|
+
|
|
66
|
+
body = None
|
|
67
|
+
if data is not None:
|
|
68
|
+
body = json.dumps(data).encode("utf-8")
|
|
69
|
+
|
|
70
|
+
request = Request(url, data=body, headers=headers, method=method)
|
|
71
|
+
|
|
72
|
+
with urlopen(request, timeout=timeout) as response:
|
|
73
|
+
return json.loads(response.read().decode("utf-8"))
|
|
74
|
+
|
|
75
|
+
async def _async_request(
|
|
76
|
+
self, method: str, endpoint: str, data: Optional[Dict] = None, timeout: float = 30.0
|
|
77
|
+
) -> Any:
|
|
78
|
+
"""Async wrapper for _request."""
|
|
79
|
+
loop = asyncio.get_event_loop()
|
|
80
|
+
return await loop.run_in_executor(
|
|
81
|
+
None, lambda: self._request(method, endpoint, data, timeout)
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
async def is_available(self) -> bool:
|
|
85
|
+
"""Check if TGI is running."""
|
|
86
|
+
try:
|
|
87
|
+
await self._async_request("GET", "/health", timeout=5.0)
|
|
88
|
+
return True
|
|
89
|
+
except Exception:
|
|
90
|
+
try:
|
|
91
|
+
await self._async_request("GET", "/info", timeout=5.0)
|
|
92
|
+
return True
|
|
93
|
+
except Exception:
|
|
94
|
+
return False
|
|
95
|
+
|
|
96
|
+
async def get_status(self) -> ProviderStatus:
|
|
97
|
+
"""Get detailed TGI status."""
|
|
98
|
+
start_time = time.time()
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
# Get model info
|
|
102
|
+
info = await self._async_request("GET", "/info", timeout=5.0)
|
|
103
|
+
latency = (time.time() - start_time) * 1000
|
|
104
|
+
|
|
105
|
+
model_id = info.get("model_id", "")
|
|
106
|
+
version = info.get("version", "")
|
|
107
|
+
|
|
108
|
+
return ProviderStatus(
|
|
109
|
+
available=True,
|
|
110
|
+
provider_type=self.provider_type,
|
|
111
|
+
host=self.host,
|
|
112
|
+
version=version,
|
|
113
|
+
models_count=1, # TGI serves one model
|
|
114
|
+
running_models=1,
|
|
115
|
+
gpu_available=True,
|
|
116
|
+
latency_ms=latency,
|
|
117
|
+
last_checked=datetime.now(),
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
except Exception as e:
|
|
121
|
+
return ProviderStatus(
|
|
122
|
+
available=False,
|
|
123
|
+
provider_type=self.provider_type,
|
|
124
|
+
host=self.host,
|
|
125
|
+
error=str(e),
|
|
126
|
+
last_checked=datetime.now(),
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
async def list_models(self) -> List[LocalModel]:
|
|
130
|
+
"""List available models (TGI serves one model)."""
|
|
131
|
+
try:
|
|
132
|
+
info = await self._async_request("GET", "/info")
|
|
133
|
+
model_id = info.get("model_id", "")
|
|
134
|
+
|
|
135
|
+
if not model_id:
|
|
136
|
+
return []
|
|
137
|
+
|
|
138
|
+
# Extract context length if available
|
|
139
|
+
max_input = info.get("max_input_length", 4096)
|
|
140
|
+
max_total = info.get("max_total_tokens", 8192)
|
|
141
|
+
|
|
142
|
+
return [
|
|
143
|
+
LocalModel(
|
|
144
|
+
id=model_id,
|
|
145
|
+
name=model_id.split("/")[-1],
|
|
146
|
+
context_window=max_total,
|
|
147
|
+
family=detect_model_family(model_id),
|
|
148
|
+
supports_tools=likely_supports_tools(model_id),
|
|
149
|
+
running=True,
|
|
150
|
+
details={
|
|
151
|
+
"max_input_length": max_input,
|
|
152
|
+
"max_total_tokens": max_total,
|
|
153
|
+
"max_batch_total_tokens": info.get("max_batch_total_tokens"),
|
|
154
|
+
},
|
|
155
|
+
)
|
|
156
|
+
]
|
|
157
|
+
|
|
158
|
+
except Exception:
|
|
159
|
+
return []
|
|
160
|
+
|
|
161
|
+
async def list_running(self) -> List[LocalModel]:
|
|
162
|
+
"""List running models."""
|
|
163
|
+
return await self.list_models()
|
|
164
|
+
|
|
165
|
+
async def get_model_info(self, model_id: str) -> Optional[LocalModel]:
|
|
166
|
+
"""Get model information."""
|
|
167
|
+
models = await self.list_models()
|
|
168
|
+
if models:
|
|
169
|
+
return models[0]
|
|
170
|
+
return None
|
|
171
|
+
|
|
172
|
+
async def test_tool_calling(self, model_id: str) -> ToolTestResult:
|
|
173
|
+
"""Test tool calling capability."""
|
|
174
|
+
start_time = time.time()
|
|
175
|
+
|
|
176
|
+
if not likely_supports_tools(model_id):
|
|
177
|
+
return ToolTestResult(
|
|
178
|
+
model_id=model_id,
|
|
179
|
+
supports_tools=False,
|
|
180
|
+
notes="Model family not known to support tools",
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
test_tools = [
|
|
184
|
+
{
|
|
185
|
+
"type": "function",
|
|
186
|
+
"function": {
|
|
187
|
+
"name": "get_weather",
|
|
188
|
+
"description": "Get weather for a city",
|
|
189
|
+
"parameters": {
|
|
190
|
+
"type": "object",
|
|
191
|
+
"properties": {"city": {"type": "string"}},
|
|
192
|
+
"required": ["city"],
|
|
193
|
+
},
|
|
194
|
+
},
|
|
195
|
+
}
|
|
196
|
+
]
|
|
197
|
+
|
|
198
|
+
try:
|
|
199
|
+
response = await self._async_request(
|
|
200
|
+
"POST",
|
|
201
|
+
"/v1/chat/completions",
|
|
202
|
+
data={
|
|
203
|
+
"model": "tgi",
|
|
204
|
+
"messages": [{"role": "user", "content": "What's the weather in Paris?"}],
|
|
205
|
+
"tools": test_tools,
|
|
206
|
+
},
|
|
207
|
+
timeout=60.0,
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
latency = (time.time() - start_time) * 1000
|
|
211
|
+
|
|
212
|
+
choices = response.get("choices", [])
|
|
213
|
+
if choices:
|
|
214
|
+
message = choices[0].get("message", {})
|
|
215
|
+
tool_calls = message.get("tool_calls", [])
|
|
216
|
+
|
|
217
|
+
if tool_calls:
|
|
218
|
+
return ToolTestResult(
|
|
219
|
+
model_id=model_id,
|
|
220
|
+
supports_tools=True,
|
|
221
|
+
parallel_tools=len(tool_calls) > 1,
|
|
222
|
+
tool_choice=["auto"],
|
|
223
|
+
latency_ms=latency,
|
|
224
|
+
notes="Tool calling verified",
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
return ToolTestResult(
|
|
228
|
+
model_id=model_id,
|
|
229
|
+
supports_tools=False,
|
|
230
|
+
latency_ms=latency,
|
|
231
|
+
notes="Model did not use tools in test",
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
except Exception as e:
|
|
235
|
+
return ToolTestResult(
|
|
236
|
+
model_id=model_id,
|
|
237
|
+
supports_tools=False,
|
|
238
|
+
error=str(e),
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
def get_litellm_model_name(self, model_id: str) -> str:
|
|
242
|
+
"""Get LiteLLM-compatible model name."""
|
|
243
|
+
if model_id.startswith("huggingface/"):
|
|
244
|
+
return model_id
|
|
245
|
+
return f"huggingface/{model_id}"
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
async def get_tgi_client(host: Optional[str] = None) -> Optional[TGIClient]:
|
|
249
|
+
"""Get a TGI client if available.
|
|
250
|
+
|
|
251
|
+
Args:
|
|
252
|
+
host: Optional host override.
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
TGIClient if TGI is running, None otherwise.
|
|
256
|
+
"""
|
|
257
|
+
client = TGIClient(host)
|
|
258
|
+
if await client.is_available():
|
|
259
|
+
return client
|
|
260
|
+
return None
|