superqode 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- superqode/__init__.py +33 -0
- superqode/acp/__init__.py +23 -0
- superqode/acp/client.py +913 -0
- superqode/acp/permission_screen.py +457 -0
- superqode/acp/types.py +480 -0
- superqode/acp_discovery.py +856 -0
- superqode/agent/__init__.py +22 -0
- superqode/agent/edit_strategies.py +334 -0
- superqode/agent/loop.py +892 -0
- superqode/agent/qe_report_templates.py +39 -0
- superqode/agent/system_prompts.py +353 -0
- superqode/agent_output.py +721 -0
- superqode/agent_stream.py +953 -0
- superqode/agents/__init__.py +59 -0
- superqode/agents/acp_registry.py +305 -0
- superqode/agents/client.py +249 -0
- superqode/agents/data/augmentcode.com.toml +51 -0
- superqode/agents/data/cagent.dev.toml +51 -0
- superqode/agents/data/claude.com.toml +60 -0
- superqode/agents/data/codeassistant.dev.toml +51 -0
- superqode/agents/data/codex.openai.com.toml +57 -0
- superqode/agents/data/fastagent.ai.toml +66 -0
- superqode/agents/data/geminicli.com.toml +77 -0
- superqode/agents/data/goose.block.xyz.toml +54 -0
- superqode/agents/data/junie.jetbrains.com.toml +56 -0
- superqode/agents/data/kimi.moonshot.cn.toml +57 -0
- superqode/agents/data/llmlingagent.dev.toml +51 -0
- superqode/agents/data/molt.bot.toml +49 -0
- superqode/agents/data/opencode.ai.toml +60 -0
- superqode/agents/data/stakpak.dev.toml +51 -0
- superqode/agents/data/vtcode.dev.toml +51 -0
- superqode/agents/discovery.py +266 -0
- superqode/agents/messaging.py +160 -0
- superqode/agents/persona.py +166 -0
- superqode/agents/registry.py +421 -0
- superqode/agents/schema.py +72 -0
- superqode/agents/unified.py +367 -0
- superqode/app/__init__.py +111 -0
- superqode/app/constants.py +314 -0
- superqode/app/css.py +366 -0
- superqode/app/models.py +118 -0
- superqode/app/suggester.py +125 -0
- superqode/app/widgets.py +1591 -0
- superqode/app_enhanced.py +399 -0
- superqode/app_main.py +17187 -0
- superqode/approval.py +312 -0
- superqode/atomic.py +296 -0
- superqode/commands/__init__.py +1 -0
- superqode/commands/acp.py +965 -0
- superqode/commands/agents.py +180 -0
- superqode/commands/auth.py +278 -0
- superqode/commands/config.py +374 -0
- superqode/commands/init.py +826 -0
- superqode/commands/providers.py +819 -0
- superqode/commands/qe.py +1145 -0
- superqode/commands/roles.py +380 -0
- superqode/commands/serve.py +172 -0
- superqode/commands/suggestions.py +127 -0
- superqode/commands/superqe.py +460 -0
- superqode/config/__init__.py +51 -0
- superqode/config/loader.py +812 -0
- superqode/config/schema.py +498 -0
- superqode/core/__init__.py +111 -0
- superqode/core/roles.py +281 -0
- superqode/danger.py +386 -0
- superqode/data/superqode-template.yaml +1522 -0
- superqode/design_system.py +1080 -0
- superqode/dialogs/__init__.py +6 -0
- superqode/dialogs/base.py +39 -0
- superqode/dialogs/model.py +130 -0
- superqode/dialogs/provider.py +870 -0
- superqode/diff_view.py +919 -0
- superqode/enterprise.py +21 -0
- superqode/evaluation/__init__.py +25 -0
- superqode/evaluation/adapters.py +93 -0
- superqode/evaluation/behaviors.py +89 -0
- superqode/evaluation/engine.py +209 -0
- superqode/evaluation/scenarios.py +96 -0
- superqode/execution/__init__.py +36 -0
- superqode/execution/linter.py +538 -0
- superqode/execution/modes.py +347 -0
- superqode/execution/resolver.py +283 -0
- superqode/execution/runner.py +642 -0
- superqode/file_explorer.py +811 -0
- superqode/file_viewer.py +471 -0
- superqode/flash.py +183 -0
- superqode/guidance/__init__.py +58 -0
- superqode/guidance/config.py +203 -0
- superqode/guidance/prompts.py +71 -0
- superqode/harness/__init__.py +54 -0
- superqode/harness/accelerator.py +291 -0
- superqode/harness/config.py +319 -0
- superqode/harness/validator.py +147 -0
- superqode/history.py +279 -0
- superqode/integrations/superopt_runner.py +124 -0
- superqode/logging/__init__.py +49 -0
- superqode/logging/adapters.py +219 -0
- superqode/logging/formatter.py +923 -0
- superqode/logging/integration.py +341 -0
- superqode/logging/sinks.py +170 -0
- superqode/logging/unified_log.py +417 -0
- superqode/lsp/__init__.py +26 -0
- superqode/lsp/client.py +544 -0
- superqode/main.py +1069 -0
- superqode/mcp/__init__.py +89 -0
- superqode/mcp/auth_storage.py +380 -0
- superqode/mcp/client.py +1236 -0
- superqode/mcp/config.py +319 -0
- superqode/mcp/integration.py +337 -0
- superqode/mcp/oauth.py +436 -0
- superqode/mcp/oauth_callback.py +385 -0
- superqode/mcp/types.py +290 -0
- superqode/memory/__init__.py +31 -0
- superqode/memory/feedback.py +342 -0
- superqode/memory/store.py +522 -0
- superqode/notifications.py +369 -0
- superqode/optimization/__init__.py +5 -0
- superqode/optimization/config.py +33 -0
- superqode/permissions/__init__.py +25 -0
- superqode/permissions/rules.py +488 -0
- superqode/plan.py +323 -0
- superqode/providers/__init__.py +33 -0
- superqode/providers/gateway/__init__.py +165 -0
- superqode/providers/gateway/base.py +228 -0
- superqode/providers/gateway/litellm_gateway.py +1170 -0
- superqode/providers/gateway/openresponses_gateway.py +436 -0
- superqode/providers/health.py +297 -0
- superqode/providers/huggingface/__init__.py +74 -0
- superqode/providers/huggingface/downloader.py +472 -0
- superqode/providers/huggingface/endpoints.py +442 -0
- superqode/providers/huggingface/hub.py +531 -0
- superqode/providers/huggingface/inference.py +394 -0
- superqode/providers/huggingface/transformers_runner.py +516 -0
- superqode/providers/local/__init__.py +100 -0
- superqode/providers/local/base.py +438 -0
- superqode/providers/local/discovery.py +418 -0
- superqode/providers/local/lmstudio.py +256 -0
- superqode/providers/local/mlx.py +457 -0
- superqode/providers/local/ollama.py +486 -0
- superqode/providers/local/sglang.py +268 -0
- superqode/providers/local/tgi.py +260 -0
- superqode/providers/local/tool_support.py +477 -0
- superqode/providers/local/vllm.py +258 -0
- superqode/providers/manager.py +1338 -0
- superqode/providers/models.py +1016 -0
- superqode/providers/models_dev.py +578 -0
- superqode/providers/openresponses/__init__.py +87 -0
- superqode/providers/openresponses/converters/__init__.py +17 -0
- superqode/providers/openresponses/converters/messages.py +343 -0
- superqode/providers/openresponses/converters/tools.py +268 -0
- superqode/providers/openresponses/schema/__init__.py +56 -0
- superqode/providers/openresponses/schema/models.py +585 -0
- superqode/providers/openresponses/streaming/__init__.py +5 -0
- superqode/providers/openresponses/streaming/parser.py +338 -0
- superqode/providers/openresponses/tools/__init__.py +21 -0
- superqode/providers/openresponses/tools/apply_patch.py +352 -0
- superqode/providers/openresponses/tools/code_interpreter.py +290 -0
- superqode/providers/openresponses/tools/file_search.py +333 -0
- superqode/providers/openresponses/tools/mcp_adapter.py +252 -0
- superqode/providers/registry.py +716 -0
- superqode/providers/usage.py +332 -0
- superqode/pure_mode.py +384 -0
- superqode/qr/__init__.py +23 -0
- superqode/qr/dashboard.py +781 -0
- superqode/qr/generator.py +1018 -0
- superqode/qr/templates.py +135 -0
- superqode/safety/__init__.py +41 -0
- superqode/safety/sandbox.py +413 -0
- superqode/safety/warnings.py +256 -0
- superqode/server/__init__.py +33 -0
- superqode/server/lsp_server.py +775 -0
- superqode/server/web.py +250 -0
- superqode/session/__init__.py +25 -0
- superqode/session/persistence.py +580 -0
- superqode/session/sharing.py +477 -0
- superqode/session.py +475 -0
- superqode/sidebar.py +2991 -0
- superqode/stream_view.py +648 -0
- superqode/styles/__init__.py +3 -0
- superqode/superqe/__init__.py +184 -0
- superqode/superqe/acp_runner.py +1064 -0
- superqode/superqe/constitution/__init__.py +62 -0
- superqode/superqe/constitution/evaluator.py +308 -0
- superqode/superqe/constitution/loader.py +432 -0
- superqode/superqe/constitution/schema.py +250 -0
- superqode/superqe/events.py +591 -0
- superqode/superqe/frameworks/__init__.py +65 -0
- superqode/superqe/frameworks/base.py +234 -0
- superqode/superqe/frameworks/e2e.py +263 -0
- superqode/superqe/frameworks/executor.py +237 -0
- superqode/superqe/frameworks/javascript.py +409 -0
- superqode/superqe/frameworks/python.py +373 -0
- superqode/superqe/frameworks/registry.py +92 -0
- superqode/superqe/mcp_tools/__init__.py +47 -0
- superqode/superqe/mcp_tools/core_tools.py +418 -0
- superqode/superqe/mcp_tools/registry.py +230 -0
- superqode/superqe/mcp_tools/testing_tools.py +167 -0
- superqode/superqe/noise.py +89 -0
- superqode/superqe/orchestrator.py +778 -0
- superqode/superqe/roles.py +609 -0
- superqode/superqe/session.py +713 -0
- superqode/superqe/skills/__init__.py +57 -0
- superqode/superqe/skills/base.py +106 -0
- superqode/superqe/skills/core_skills.py +899 -0
- superqode/superqe/skills/registry.py +90 -0
- superqode/superqe/verifier.py +101 -0
- superqode/superqe_cli.py +76 -0
- superqode/tool_call.py +358 -0
- superqode/tools/__init__.py +93 -0
- superqode/tools/agent_tools.py +496 -0
- superqode/tools/base.py +324 -0
- superqode/tools/batch_tool.py +133 -0
- superqode/tools/diagnostics.py +311 -0
- superqode/tools/edit_tools.py +653 -0
- superqode/tools/enhanced_base.py +515 -0
- superqode/tools/file_tools.py +269 -0
- superqode/tools/file_tracking.py +45 -0
- superqode/tools/lsp_tools.py +610 -0
- superqode/tools/network_tools.py +350 -0
- superqode/tools/permissions.py +400 -0
- superqode/tools/question_tool.py +324 -0
- superqode/tools/search_tools.py +598 -0
- superqode/tools/shell_tools.py +259 -0
- superqode/tools/todo_tools.py +121 -0
- superqode/tools/validation.py +80 -0
- superqode/tools/web_tools.py +639 -0
- superqode/tui.py +1152 -0
- superqode/tui_integration.py +875 -0
- superqode/tui_widgets/__init__.py +27 -0
- superqode/tui_widgets/widgets/__init__.py +18 -0
- superqode/tui_widgets/widgets/progress.py +185 -0
- superqode/tui_widgets/widgets/tool_display.py +188 -0
- superqode/undo_manager.py +574 -0
- superqode/utils/__init__.py +5 -0
- superqode/utils/error_handling.py +323 -0
- superqode/utils/fuzzy.py +257 -0
- superqode/widgets/__init__.py +477 -0
- superqode/widgets/agent_collab.py +390 -0
- superqode/widgets/agent_store.py +936 -0
- superqode/widgets/agent_switcher.py +395 -0
- superqode/widgets/animation_manager.py +284 -0
- superqode/widgets/code_context.py +356 -0
- superqode/widgets/command_palette.py +412 -0
- superqode/widgets/connection_status.py +537 -0
- superqode/widgets/conversation_history.py +470 -0
- superqode/widgets/diff_indicator.py +155 -0
- superqode/widgets/enhanced_status_bar.py +385 -0
- superqode/widgets/enhanced_toast.py +476 -0
- superqode/widgets/file_browser.py +809 -0
- superqode/widgets/file_reference.py +585 -0
- superqode/widgets/issue_timeline.py +340 -0
- superqode/widgets/leader_key.py +264 -0
- superqode/widgets/mode_switcher.py +445 -0
- superqode/widgets/model_picker.py +234 -0
- superqode/widgets/permission_preview.py +1205 -0
- superqode/widgets/prompt.py +358 -0
- superqode/widgets/provider_connect.py +725 -0
- superqode/widgets/pty_shell.py +587 -0
- superqode/widgets/qe_dashboard.py +321 -0
- superqode/widgets/resizable_sidebar.py +377 -0
- superqode/widgets/response_changes.py +218 -0
- superqode/widgets/response_display.py +528 -0
- superqode/widgets/rich_tool_display.py +613 -0
- superqode/widgets/sidebar_panels.py +1180 -0
- superqode/widgets/slash_complete.py +356 -0
- superqode/widgets/split_view.py +612 -0
- superqode/widgets/status_bar.py +273 -0
- superqode/widgets/superqode_display.py +786 -0
- superqode/widgets/thinking_display.py +815 -0
- superqode/widgets/throbber.py +87 -0
- superqode/widgets/toast.py +206 -0
- superqode/widgets/unified_output.py +1073 -0
- superqode/workspace/__init__.py +75 -0
- superqode/workspace/artifacts.py +472 -0
- superqode/workspace/coordinator.py +353 -0
- superqode/workspace/diff_tracker.py +429 -0
- superqode/workspace/git_guard.py +373 -0
- superqode/workspace/git_snapshot.py +526 -0
- superqode/workspace/manager.py +750 -0
- superqode/workspace/snapshot.py +357 -0
- superqode/workspace/watcher.py +535 -0
- superqode/workspace/worktree.py +440 -0
- superqode-0.1.5.dist-info/METADATA +204 -0
- superqode-0.1.5.dist-info/RECORD +288 -0
- superqode-0.1.5.dist-info/WHEEL +5 -0
- superqode-0.1.5.dist-info/entry_points.txt +3 -0
- superqode-0.1.5.dist-info/licenses/LICENSE +648 -0
- superqode-0.1.5.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,438 @@
|
|
|
1
|
+
"""Base classes and data structures for local LLM providers.
|
|
2
|
+
|
|
3
|
+
This module provides the foundation for interacting with self-hosted LLM servers
|
|
4
|
+
like Ollama, vLLM, SGLang, LM Studio, MLX, TGI, and llama.cpp.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from abc import ABC, abstractmethod
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from enum import Enum
|
|
10
|
+
from typing import Any, Dict, List, Optional
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class LocalProviderType(Enum):
|
|
15
|
+
"""Types of local LLM providers."""
|
|
16
|
+
|
|
17
|
+
OLLAMA = "ollama"
|
|
18
|
+
LMSTUDIO = "lmstudio"
|
|
19
|
+
VLLM = "vllm"
|
|
20
|
+
SGLANG = "sglang"
|
|
21
|
+
MLX = "mlx"
|
|
22
|
+
TGI = "tgi"
|
|
23
|
+
LLAMACPP = "llamacpp"
|
|
24
|
+
OPENAI_COMPAT = "openai_compatible"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class Quantization(Enum):
|
|
28
|
+
"""Common quantization formats."""
|
|
29
|
+
|
|
30
|
+
F32 = "F32" # Full precision
|
|
31
|
+
F16 = "F16" # Half precision
|
|
32
|
+
BF16 = "BF16" # Brain float 16
|
|
33
|
+
Q8_0 = "Q8_0" # 8-bit quantization
|
|
34
|
+
Q6_K = "Q6_K" # 6-bit K-quant
|
|
35
|
+
Q5_K_M = "Q5_K_M" # 5-bit K-quant medium
|
|
36
|
+
Q5_K_S = "Q5_K_S" # 5-bit K-quant small
|
|
37
|
+
Q4_K_M = "Q4_K_M" # 4-bit K-quant medium
|
|
38
|
+
Q4_K_S = "Q4_K_S" # 4-bit K-quant small
|
|
39
|
+
Q4_0 = "Q4_0" # 4-bit quantization
|
|
40
|
+
Q3_K_M = "Q3_K_M" # 3-bit K-quant medium
|
|
41
|
+
Q3_K_S = "Q3_K_S" # 3-bit K-quant small
|
|
42
|
+
Q2_K = "Q2_K" # 2-bit K-quant
|
|
43
|
+
IQ4_XS = "IQ4_XS" # Importance quantization 4-bit
|
|
44
|
+
IQ3_XS = "IQ3_XS" # Importance quantization 3-bit
|
|
45
|
+
IQ2_XS = "IQ2_XS" # Importance quantization 2-bit
|
|
46
|
+
GPTQ = "GPTQ" # GPTQ quantization
|
|
47
|
+
AWQ = "AWQ" # AWQ quantization
|
|
48
|
+
GGUF = "GGUF" # Generic GGUF (unknown quant)
|
|
49
|
+
UNKNOWN = "unknown"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass
|
|
53
|
+
class LocalModel:
|
|
54
|
+
"""Represents a model available on a local provider.
|
|
55
|
+
|
|
56
|
+
Attributes:
|
|
57
|
+
id: Unique model identifier (e.g., "llama3.2:latest")
|
|
58
|
+
name: Human-readable name (e.g., "Llama 3.2")
|
|
59
|
+
size_bytes: Model file size in bytes
|
|
60
|
+
quantization: Quantization format (Q4_K_M, Q8_0, F16, etc.)
|
|
61
|
+
context_window: Maximum context length in tokens
|
|
62
|
+
supports_tools: Whether model supports function/tool calling
|
|
63
|
+
supports_vision: Whether model supports image inputs
|
|
64
|
+
family: Model family (llama, qwen, mistral, phi, etc.)
|
|
65
|
+
running: Whether model is currently loaded in memory
|
|
66
|
+
gpu_layers: Number of layers offloaded to GPU
|
|
67
|
+
vram_usage: VRAM usage in bytes when loaded
|
|
68
|
+
parameter_count: Number of parameters (e.g., "8B", "70B")
|
|
69
|
+
modified_at: Last modification timestamp
|
|
70
|
+
digest: Model file digest/hash
|
|
71
|
+
details: Additional provider-specific details
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
id: str
|
|
75
|
+
name: str
|
|
76
|
+
size_bytes: int = 0
|
|
77
|
+
quantization: str = "unknown"
|
|
78
|
+
context_window: int = 4096
|
|
79
|
+
supports_tools: bool = False
|
|
80
|
+
supports_vision: bool = False
|
|
81
|
+
family: str = "unknown"
|
|
82
|
+
running: bool = False
|
|
83
|
+
gpu_layers: int = 0
|
|
84
|
+
vram_usage: int = 0
|
|
85
|
+
parameter_count: str = ""
|
|
86
|
+
modified_at: Optional[datetime] = None
|
|
87
|
+
digest: str = ""
|
|
88
|
+
details: Dict[str, Any] = field(default_factory=dict)
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def size_display(self) -> str:
|
|
92
|
+
"""Human-readable size."""
|
|
93
|
+
if self.size_bytes == 0:
|
|
94
|
+
return "unknown"
|
|
95
|
+
gb = self.size_bytes / (1024**3)
|
|
96
|
+
if gb >= 1:
|
|
97
|
+
return f"{gb:.1f}GB"
|
|
98
|
+
mb = self.size_bytes / (1024**2)
|
|
99
|
+
return f"{mb:.0f}MB"
|
|
100
|
+
|
|
101
|
+
@property
|
|
102
|
+
def vram_display(self) -> str:
|
|
103
|
+
"""Human-readable VRAM usage."""
|
|
104
|
+
if self.vram_usage == 0:
|
|
105
|
+
return "unknown"
|
|
106
|
+
gb = self.vram_usage / (1024**3)
|
|
107
|
+
if gb >= 1:
|
|
108
|
+
return f"{gb:.1f}GB"
|
|
109
|
+
mb = self.vram_usage / (1024**2)
|
|
110
|
+
return f"{mb:.0f}MB"
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@dataclass
|
|
114
|
+
class ProviderStatus:
|
|
115
|
+
"""Status of a local provider.
|
|
116
|
+
|
|
117
|
+
Attributes:
|
|
118
|
+
available: Whether the provider is reachable
|
|
119
|
+
provider_type: Type of provider
|
|
120
|
+
host: Provider host URL
|
|
121
|
+
version: Provider version string
|
|
122
|
+
models_count: Number of available models
|
|
123
|
+
running_models: Number of currently loaded models
|
|
124
|
+
gpu_available: Whether GPU acceleration is available
|
|
125
|
+
error: Error message if not available
|
|
126
|
+
latency_ms: Response latency in milliseconds
|
|
127
|
+
last_checked: When status was last checked
|
|
128
|
+
"""
|
|
129
|
+
|
|
130
|
+
available: bool
|
|
131
|
+
provider_type: LocalProviderType
|
|
132
|
+
host: str
|
|
133
|
+
version: str = ""
|
|
134
|
+
models_count: int = 0
|
|
135
|
+
running_models: int = 0
|
|
136
|
+
gpu_available: bool = False
|
|
137
|
+
error: str = ""
|
|
138
|
+
latency_ms: float = 0.0
|
|
139
|
+
last_checked: Optional[datetime] = None
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
@dataclass
|
|
143
|
+
class ToolTestResult:
|
|
144
|
+
"""Result of testing tool-calling capability.
|
|
145
|
+
|
|
146
|
+
Attributes:
|
|
147
|
+
model_id: Model that was tested
|
|
148
|
+
supports_tools: Whether tool calling works
|
|
149
|
+
parallel_tools: Whether parallel tool calls work
|
|
150
|
+
tool_choice: Supported tool_choice modes
|
|
151
|
+
error: Error message if test failed
|
|
152
|
+
latency_ms: Test execution time
|
|
153
|
+
notes: Additional notes about capability
|
|
154
|
+
"""
|
|
155
|
+
|
|
156
|
+
model_id: str
|
|
157
|
+
supports_tools: bool
|
|
158
|
+
parallel_tools: bool = False
|
|
159
|
+
tool_choice: List[str] = field(default_factory=list) # "auto", "required", "none"
|
|
160
|
+
error: str = ""
|
|
161
|
+
latency_ms: float = 0.0
|
|
162
|
+
notes: str = ""
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
@dataclass
|
|
166
|
+
class GenerationConfig:
|
|
167
|
+
"""Configuration for text generation.
|
|
168
|
+
|
|
169
|
+
Attributes:
|
|
170
|
+
max_tokens: Maximum tokens to generate
|
|
171
|
+
temperature: Sampling temperature (0.0-2.0)
|
|
172
|
+
top_p: Nucleus sampling threshold
|
|
173
|
+
top_k: Top-k sampling
|
|
174
|
+
stop: Stop sequences
|
|
175
|
+
num_ctx: Context window size (Ollama-specific)
|
|
176
|
+
num_gpu: GPU layers to use
|
|
177
|
+
repeat_penalty: Repetition penalty
|
|
178
|
+
seed: Random seed for reproducibility
|
|
179
|
+
"""
|
|
180
|
+
|
|
181
|
+
max_tokens: int = 2048
|
|
182
|
+
temperature: float = 0.7
|
|
183
|
+
top_p: float = 0.9
|
|
184
|
+
top_k: int = 40
|
|
185
|
+
stop: List[str] = field(default_factory=list)
|
|
186
|
+
num_ctx: int = 0 # 0 = use model default
|
|
187
|
+
num_gpu: int = -1 # -1 = auto, 0 = CPU only
|
|
188
|
+
repeat_penalty: float = 1.1
|
|
189
|
+
seed: Optional[int] = None
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class LocalProviderClient(ABC):
|
|
193
|
+
"""Abstract base class for local LLM provider clients.
|
|
194
|
+
|
|
195
|
+
All local provider clients (Ollama, vLLM, LM Studio, etc.) should
|
|
196
|
+
inherit from this class and implement the required methods.
|
|
197
|
+
"""
|
|
198
|
+
|
|
199
|
+
provider_type: LocalProviderType = LocalProviderType.OPENAI_COMPAT
|
|
200
|
+
default_port: int = 8080
|
|
201
|
+
|
|
202
|
+
def __init__(self, host: Optional[str] = None):
|
|
203
|
+
"""Initialize the client.
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
host: Provider host URL. If not provided, uses default.
|
|
207
|
+
"""
|
|
208
|
+
self._host = host
|
|
209
|
+
|
|
210
|
+
@property
|
|
211
|
+
def host(self) -> str:
|
|
212
|
+
"""Get the provider host URL."""
|
|
213
|
+
if self._host:
|
|
214
|
+
return self._host.rstrip("/")
|
|
215
|
+
return f"http://localhost:{self.default_port}"
|
|
216
|
+
|
|
217
|
+
@abstractmethod
|
|
218
|
+
async def is_available(self) -> bool:
|
|
219
|
+
"""Check if the provider is running and reachable.
|
|
220
|
+
|
|
221
|
+
Returns:
|
|
222
|
+
True if provider is available, False otherwise.
|
|
223
|
+
"""
|
|
224
|
+
pass
|
|
225
|
+
|
|
226
|
+
@abstractmethod
|
|
227
|
+
async def get_status(self) -> ProviderStatus:
|
|
228
|
+
"""Get detailed provider status.
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
ProviderStatus with availability and capability info.
|
|
232
|
+
"""
|
|
233
|
+
pass
|
|
234
|
+
|
|
235
|
+
@abstractmethod
|
|
236
|
+
async def list_models(self) -> List[LocalModel]:
|
|
237
|
+
"""List all available models on this provider.
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
List of LocalModel objects.
|
|
241
|
+
"""
|
|
242
|
+
pass
|
|
243
|
+
|
|
244
|
+
@abstractmethod
|
|
245
|
+
async def list_running(self) -> List[LocalModel]:
|
|
246
|
+
"""List models currently loaded in memory.
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
List of LocalModel objects that are running.
|
|
250
|
+
"""
|
|
251
|
+
pass
|
|
252
|
+
|
|
253
|
+
@abstractmethod
|
|
254
|
+
async def get_model_info(self, model_id: str) -> Optional[LocalModel]:
|
|
255
|
+
"""Get detailed information about a specific model.
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
model_id: The model identifier.
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
LocalModel with detailed info, or None if not found.
|
|
262
|
+
"""
|
|
263
|
+
pass
|
|
264
|
+
|
|
265
|
+
async def test_tool_calling(self, model_id: str) -> ToolTestResult:
|
|
266
|
+
"""Test if a model supports tool/function calling.
|
|
267
|
+
|
|
268
|
+
Default implementation assumes no tool support.
|
|
269
|
+
Subclasses should override for providers that support tools.
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
model_id: The model identifier to test.
|
|
273
|
+
|
|
274
|
+
Returns:
|
|
275
|
+
ToolTestResult with capability information.
|
|
276
|
+
"""
|
|
277
|
+
return ToolTestResult(
|
|
278
|
+
model_id=model_id,
|
|
279
|
+
supports_tools=False,
|
|
280
|
+
notes="Tool testing not implemented for this provider",
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
async def pull_model(self, model_id: str) -> bool:
|
|
284
|
+
"""Pull/download a model.
|
|
285
|
+
|
|
286
|
+
Not all providers support this. Default returns False.
|
|
287
|
+
|
|
288
|
+
Args:
|
|
289
|
+
model_id: The model to pull.
|
|
290
|
+
|
|
291
|
+
Returns:
|
|
292
|
+
True if pull succeeded, False otherwise.
|
|
293
|
+
"""
|
|
294
|
+
return False
|
|
295
|
+
|
|
296
|
+
async def delete_model(self, model_id: str) -> bool:
|
|
297
|
+
"""Delete a model.
|
|
298
|
+
|
|
299
|
+
Not all providers support this. Default returns False.
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
model_id: The model to delete.
|
|
303
|
+
|
|
304
|
+
Returns:
|
|
305
|
+
True if deletion succeeded, False otherwise.
|
|
306
|
+
"""
|
|
307
|
+
return False
|
|
308
|
+
|
|
309
|
+
def get_litellm_model_name(self, model_id: str) -> str:
|
|
310
|
+
"""Get the LiteLLM-compatible model name.
|
|
311
|
+
|
|
312
|
+
Args:
|
|
313
|
+
model_id: Local model identifier.
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
Model name formatted for LiteLLM.
|
|
317
|
+
"""
|
|
318
|
+
# Default: just return the model ID
|
|
319
|
+
# Subclasses should override with proper prefixes
|
|
320
|
+
return model_id
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
# Model family detection patterns
|
|
324
|
+
MODEL_FAMILIES = {
|
|
325
|
+
"llama": ["llama", "codellama", "tinyllama"],
|
|
326
|
+
"qwen": ["qwen"],
|
|
327
|
+
"mistral": ["mistral", "mixtral"],
|
|
328
|
+
"phi": ["phi"],
|
|
329
|
+
"gemma": ["gemma"],
|
|
330
|
+
"deepseek": ["deepseek"],
|
|
331
|
+
"starcoder": ["starcoder", "starcode"],
|
|
332
|
+
"codestral": ["codestral"],
|
|
333
|
+
"yi": ["yi-"],
|
|
334
|
+
"vicuna": ["vicuna"],
|
|
335
|
+
"wizard": ["wizard"],
|
|
336
|
+
"openchat": ["openchat"],
|
|
337
|
+
"neural": ["neural"],
|
|
338
|
+
"dolphin": ["dolphin"],
|
|
339
|
+
"orca": ["orca"],
|
|
340
|
+
"nous": ["nous"],
|
|
341
|
+
"hermes": ["hermes"],
|
|
342
|
+
"zephyr": ["zephyr"],
|
|
343
|
+
"solar": ["solar"],
|
|
344
|
+
"command": ["command-r"],
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
# Models known to support tool calling
|
|
348
|
+
TOOL_CAPABLE_FAMILIES = {
|
|
349
|
+
"llama", # Llama 3.1+
|
|
350
|
+
"qwen", # Qwen 2.5+
|
|
351
|
+
"mistral", # Mistral/Mixtral
|
|
352
|
+
"deepseek", # DeepSeek
|
|
353
|
+
"command", # Command-R
|
|
354
|
+
"hermes", # Hermes (fine-tuned for tools)
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def detect_model_family(model_id: str) -> str:
|
|
359
|
+
"""Detect the model family from model ID.
|
|
360
|
+
|
|
361
|
+
Args:
|
|
362
|
+
model_id: Model identifier (e.g., "llama3.2:8b-instruct-q4_K_M")
|
|
363
|
+
|
|
364
|
+
Returns:
|
|
365
|
+
Family name or "unknown".
|
|
366
|
+
"""
|
|
367
|
+
model_lower = model_id.lower()
|
|
368
|
+
for family, patterns in MODEL_FAMILIES.items():
|
|
369
|
+
for pattern in patterns:
|
|
370
|
+
if pattern in model_lower:
|
|
371
|
+
return family
|
|
372
|
+
return "unknown"
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def detect_quantization(model_id: str) -> str:
|
|
376
|
+
"""Detect quantization from model ID.
|
|
377
|
+
|
|
378
|
+
Args:
|
|
379
|
+
model_id: Model identifier.
|
|
380
|
+
|
|
381
|
+
Returns:
|
|
382
|
+
Quantization string or "unknown".
|
|
383
|
+
"""
|
|
384
|
+
model_upper = model_id.upper()
|
|
385
|
+
|
|
386
|
+
# Check for known quantization patterns
|
|
387
|
+
for quant in Quantization:
|
|
388
|
+
if quant.value in model_upper:
|
|
389
|
+
return quant.value
|
|
390
|
+
|
|
391
|
+
# Check common suffixes
|
|
392
|
+
if "FP16" in model_upper or "F16" in model_upper:
|
|
393
|
+
return "F16"
|
|
394
|
+
if "FP32" in model_upper or "F32" in model_upper:
|
|
395
|
+
return "F32"
|
|
396
|
+
if "BF16" in model_upper:
|
|
397
|
+
return "BF16"
|
|
398
|
+
|
|
399
|
+
return "unknown"
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
def likely_supports_tools(model_id: str) -> bool:
|
|
403
|
+
"""Estimate if a model likely supports tool calling.
|
|
404
|
+
|
|
405
|
+
Based on model family and version heuristics.
|
|
406
|
+
|
|
407
|
+
Args:
|
|
408
|
+
model_id: Model identifier.
|
|
409
|
+
|
|
410
|
+
Returns:
|
|
411
|
+
True if model likely supports tools.
|
|
412
|
+
"""
|
|
413
|
+
family = detect_model_family(model_id)
|
|
414
|
+
model_lower = model_id.lower()
|
|
415
|
+
|
|
416
|
+
if family not in TOOL_CAPABLE_FAMILIES:
|
|
417
|
+
return False
|
|
418
|
+
|
|
419
|
+
# Version-specific checks
|
|
420
|
+
if family == "llama":
|
|
421
|
+
# Llama 3.1+ supports tools
|
|
422
|
+
if "llama3.1" in model_lower or "llama3.2" in model_lower or "llama3.3" in model_lower:
|
|
423
|
+
return True
|
|
424
|
+
if "llama-3.1" in model_lower or "llama-3.2" in model_lower or "llama-3.3" in model_lower:
|
|
425
|
+
return True
|
|
426
|
+
return False
|
|
427
|
+
|
|
428
|
+
if family == "qwen":
|
|
429
|
+
# Qwen 2.5+ supports tools well
|
|
430
|
+
if "qwen2.5" in model_lower or "qwen2-5" in model_lower:
|
|
431
|
+
return True
|
|
432
|
+
return False
|
|
433
|
+
|
|
434
|
+
# Mistral/Mixtral generally support tools
|
|
435
|
+
if family in ("mistral", "command", "hermes", "deepseek"):
|
|
436
|
+
return True
|
|
437
|
+
|
|
438
|
+
return False
|