superqode 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- superqode/__init__.py +33 -0
- superqode/acp/__init__.py +23 -0
- superqode/acp/client.py +913 -0
- superqode/acp/permission_screen.py +457 -0
- superqode/acp/types.py +480 -0
- superqode/acp_discovery.py +856 -0
- superqode/agent/__init__.py +22 -0
- superqode/agent/edit_strategies.py +334 -0
- superqode/agent/loop.py +892 -0
- superqode/agent/qe_report_templates.py +39 -0
- superqode/agent/system_prompts.py +353 -0
- superqode/agent_output.py +721 -0
- superqode/agent_stream.py +953 -0
- superqode/agents/__init__.py +59 -0
- superqode/agents/acp_registry.py +305 -0
- superqode/agents/client.py +249 -0
- superqode/agents/data/augmentcode.com.toml +51 -0
- superqode/agents/data/cagent.dev.toml +51 -0
- superqode/agents/data/claude.com.toml +60 -0
- superqode/agents/data/codeassistant.dev.toml +51 -0
- superqode/agents/data/codex.openai.com.toml +57 -0
- superqode/agents/data/fastagent.ai.toml +66 -0
- superqode/agents/data/geminicli.com.toml +77 -0
- superqode/agents/data/goose.block.xyz.toml +54 -0
- superqode/agents/data/junie.jetbrains.com.toml +56 -0
- superqode/agents/data/kimi.moonshot.cn.toml +57 -0
- superqode/agents/data/llmlingagent.dev.toml +51 -0
- superqode/agents/data/molt.bot.toml +49 -0
- superqode/agents/data/opencode.ai.toml +60 -0
- superqode/agents/data/stakpak.dev.toml +51 -0
- superqode/agents/data/vtcode.dev.toml +51 -0
- superqode/agents/discovery.py +266 -0
- superqode/agents/messaging.py +160 -0
- superqode/agents/persona.py +166 -0
- superqode/agents/registry.py +421 -0
- superqode/agents/schema.py +72 -0
- superqode/agents/unified.py +367 -0
- superqode/app/__init__.py +111 -0
- superqode/app/constants.py +314 -0
- superqode/app/css.py +366 -0
- superqode/app/models.py +118 -0
- superqode/app/suggester.py +125 -0
- superqode/app/widgets.py +1591 -0
- superqode/app_enhanced.py +399 -0
- superqode/app_main.py +17187 -0
- superqode/approval.py +312 -0
- superqode/atomic.py +296 -0
- superqode/commands/__init__.py +1 -0
- superqode/commands/acp.py +965 -0
- superqode/commands/agents.py +180 -0
- superqode/commands/auth.py +278 -0
- superqode/commands/config.py +374 -0
- superqode/commands/init.py +826 -0
- superqode/commands/providers.py +819 -0
- superqode/commands/qe.py +1145 -0
- superqode/commands/roles.py +380 -0
- superqode/commands/serve.py +172 -0
- superqode/commands/suggestions.py +127 -0
- superqode/commands/superqe.py +460 -0
- superqode/config/__init__.py +51 -0
- superqode/config/loader.py +812 -0
- superqode/config/schema.py +498 -0
- superqode/core/__init__.py +111 -0
- superqode/core/roles.py +281 -0
- superqode/danger.py +386 -0
- superqode/data/superqode-template.yaml +1522 -0
- superqode/design_system.py +1080 -0
- superqode/dialogs/__init__.py +6 -0
- superqode/dialogs/base.py +39 -0
- superqode/dialogs/model.py +130 -0
- superqode/dialogs/provider.py +870 -0
- superqode/diff_view.py +919 -0
- superqode/enterprise.py +21 -0
- superqode/evaluation/__init__.py +25 -0
- superqode/evaluation/adapters.py +93 -0
- superqode/evaluation/behaviors.py +89 -0
- superqode/evaluation/engine.py +209 -0
- superqode/evaluation/scenarios.py +96 -0
- superqode/execution/__init__.py +36 -0
- superqode/execution/linter.py +538 -0
- superqode/execution/modes.py +347 -0
- superqode/execution/resolver.py +283 -0
- superqode/execution/runner.py +642 -0
- superqode/file_explorer.py +811 -0
- superqode/file_viewer.py +471 -0
- superqode/flash.py +183 -0
- superqode/guidance/__init__.py +58 -0
- superqode/guidance/config.py +203 -0
- superqode/guidance/prompts.py +71 -0
- superqode/harness/__init__.py +54 -0
- superqode/harness/accelerator.py +291 -0
- superqode/harness/config.py +319 -0
- superqode/harness/validator.py +147 -0
- superqode/history.py +279 -0
- superqode/integrations/superopt_runner.py +124 -0
- superqode/logging/__init__.py +49 -0
- superqode/logging/adapters.py +219 -0
- superqode/logging/formatter.py +923 -0
- superqode/logging/integration.py +341 -0
- superqode/logging/sinks.py +170 -0
- superqode/logging/unified_log.py +417 -0
- superqode/lsp/__init__.py +26 -0
- superqode/lsp/client.py +544 -0
- superqode/main.py +1069 -0
- superqode/mcp/__init__.py +89 -0
- superqode/mcp/auth_storage.py +380 -0
- superqode/mcp/client.py +1236 -0
- superqode/mcp/config.py +319 -0
- superqode/mcp/integration.py +337 -0
- superqode/mcp/oauth.py +436 -0
- superqode/mcp/oauth_callback.py +385 -0
- superqode/mcp/types.py +290 -0
- superqode/memory/__init__.py +31 -0
- superqode/memory/feedback.py +342 -0
- superqode/memory/store.py +522 -0
- superqode/notifications.py +369 -0
- superqode/optimization/__init__.py +5 -0
- superqode/optimization/config.py +33 -0
- superqode/permissions/__init__.py +25 -0
- superqode/permissions/rules.py +488 -0
- superqode/plan.py +323 -0
- superqode/providers/__init__.py +33 -0
- superqode/providers/gateway/__init__.py +165 -0
- superqode/providers/gateway/base.py +228 -0
- superqode/providers/gateway/litellm_gateway.py +1170 -0
- superqode/providers/gateway/openresponses_gateway.py +436 -0
- superqode/providers/health.py +297 -0
- superqode/providers/huggingface/__init__.py +74 -0
- superqode/providers/huggingface/downloader.py +472 -0
- superqode/providers/huggingface/endpoints.py +442 -0
- superqode/providers/huggingface/hub.py +531 -0
- superqode/providers/huggingface/inference.py +394 -0
- superqode/providers/huggingface/transformers_runner.py +516 -0
- superqode/providers/local/__init__.py +100 -0
- superqode/providers/local/base.py +438 -0
- superqode/providers/local/discovery.py +418 -0
- superqode/providers/local/lmstudio.py +256 -0
- superqode/providers/local/mlx.py +457 -0
- superqode/providers/local/ollama.py +486 -0
- superqode/providers/local/sglang.py +268 -0
- superqode/providers/local/tgi.py +260 -0
- superqode/providers/local/tool_support.py +477 -0
- superqode/providers/local/vllm.py +258 -0
- superqode/providers/manager.py +1338 -0
- superqode/providers/models.py +1016 -0
- superqode/providers/models_dev.py +578 -0
- superqode/providers/openresponses/__init__.py +87 -0
- superqode/providers/openresponses/converters/__init__.py +17 -0
- superqode/providers/openresponses/converters/messages.py +343 -0
- superqode/providers/openresponses/converters/tools.py +268 -0
- superqode/providers/openresponses/schema/__init__.py +56 -0
- superqode/providers/openresponses/schema/models.py +585 -0
- superqode/providers/openresponses/streaming/__init__.py +5 -0
- superqode/providers/openresponses/streaming/parser.py +338 -0
- superqode/providers/openresponses/tools/__init__.py +21 -0
- superqode/providers/openresponses/tools/apply_patch.py +352 -0
- superqode/providers/openresponses/tools/code_interpreter.py +290 -0
- superqode/providers/openresponses/tools/file_search.py +333 -0
- superqode/providers/openresponses/tools/mcp_adapter.py +252 -0
- superqode/providers/registry.py +716 -0
- superqode/providers/usage.py +332 -0
- superqode/pure_mode.py +384 -0
- superqode/qr/__init__.py +23 -0
- superqode/qr/dashboard.py +781 -0
- superqode/qr/generator.py +1018 -0
- superqode/qr/templates.py +135 -0
- superqode/safety/__init__.py +41 -0
- superqode/safety/sandbox.py +413 -0
- superqode/safety/warnings.py +256 -0
- superqode/server/__init__.py +33 -0
- superqode/server/lsp_server.py +775 -0
- superqode/server/web.py +250 -0
- superqode/session/__init__.py +25 -0
- superqode/session/persistence.py +580 -0
- superqode/session/sharing.py +477 -0
- superqode/session.py +475 -0
- superqode/sidebar.py +2991 -0
- superqode/stream_view.py +648 -0
- superqode/styles/__init__.py +3 -0
- superqode/superqe/__init__.py +184 -0
- superqode/superqe/acp_runner.py +1064 -0
- superqode/superqe/constitution/__init__.py +62 -0
- superqode/superqe/constitution/evaluator.py +308 -0
- superqode/superqe/constitution/loader.py +432 -0
- superqode/superqe/constitution/schema.py +250 -0
- superqode/superqe/events.py +591 -0
- superqode/superqe/frameworks/__init__.py +65 -0
- superqode/superqe/frameworks/base.py +234 -0
- superqode/superqe/frameworks/e2e.py +263 -0
- superqode/superqe/frameworks/executor.py +237 -0
- superqode/superqe/frameworks/javascript.py +409 -0
- superqode/superqe/frameworks/python.py +373 -0
- superqode/superqe/frameworks/registry.py +92 -0
- superqode/superqe/mcp_tools/__init__.py +47 -0
- superqode/superqe/mcp_tools/core_tools.py +418 -0
- superqode/superqe/mcp_tools/registry.py +230 -0
- superqode/superqe/mcp_tools/testing_tools.py +167 -0
- superqode/superqe/noise.py +89 -0
- superqode/superqe/orchestrator.py +778 -0
- superqode/superqe/roles.py +609 -0
- superqode/superqe/session.py +713 -0
- superqode/superqe/skills/__init__.py +57 -0
- superqode/superqe/skills/base.py +106 -0
- superqode/superqe/skills/core_skills.py +899 -0
- superqode/superqe/skills/registry.py +90 -0
- superqode/superqe/verifier.py +101 -0
- superqode/superqe_cli.py +76 -0
- superqode/tool_call.py +358 -0
- superqode/tools/__init__.py +93 -0
- superqode/tools/agent_tools.py +496 -0
- superqode/tools/base.py +324 -0
- superqode/tools/batch_tool.py +133 -0
- superqode/tools/diagnostics.py +311 -0
- superqode/tools/edit_tools.py +653 -0
- superqode/tools/enhanced_base.py +515 -0
- superqode/tools/file_tools.py +269 -0
- superqode/tools/file_tracking.py +45 -0
- superqode/tools/lsp_tools.py +610 -0
- superqode/tools/network_tools.py +350 -0
- superqode/tools/permissions.py +400 -0
- superqode/tools/question_tool.py +324 -0
- superqode/tools/search_tools.py +598 -0
- superqode/tools/shell_tools.py +259 -0
- superqode/tools/todo_tools.py +121 -0
- superqode/tools/validation.py +80 -0
- superqode/tools/web_tools.py +639 -0
- superqode/tui.py +1152 -0
- superqode/tui_integration.py +875 -0
- superqode/tui_widgets/__init__.py +27 -0
- superqode/tui_widgets/widgets/__init__.py +18 -0
- superqode/tui_widgets/widgets/progress.py +185 -0
- superqode/tui_widgets/widgets/tool_display.py +188 -0
- superqode/undo_manager.py +574 -0
- superqode/utils/__init__.py +5 -0
- superqode/utils/error_handling.py +323 -0
- superqode/utils/fuzzy.py +257 -0
- superqode/widgets/__init__.py +477 -0
- superqode/widgets/agent_collab.py +390 -0
- superqode/widgets/agent_store.py +936 -0
- superqode/widgets/agent_switcher.py +395 -0
- superqode/widgets/animation_manager.py +284 -0
- superqode/widgets/code_context.py +356 -0
- superqode/widgets/command_palette.py +412 -0
- superqode/widgets/connection_status.py +537 -0
- superqode/widgets/conversation_history.py +470 -0
- superqode/widgets/diff_indicator.py +155 -0
- superqode/widgets/enhanced_status_bar.py +385 -0
- superqode/widgets/enhanced_toast.py +476 -0
- superqode/widgets/file_browser.py +809 -0
- superqode/widgets/file_reference.py +585 -0
- superqode/widgets/issue_timeline.py +340 -0
- superqode/widgets/leader_key.py +264 -0
- superqode/widgets/mode_switcher.py +445 -0
- superqode/widgets/model_picker.py +234 -0
- superqode/widgets/permission_preview.py +1205 -0
- superqode/widgets/prompt.py +358 -0
- superqode/widgets/provider_connect.py +725 -0
- superqode/widgets/pty_shell.py +587 -0
- superqode/widgets/qe_dashboard.py +321 -0
- superqode/widgets/resizable_sidebar.py +377 -0
- superqode/widgets/response_changes.py +218 -0
- superqode/widgets/response_display.py +528 -0
- superqode/widgets/rich_tool_display.py +613 -0
- superqode/widgets/sidebar_panels.py +1180 -0
- superqode/widgets/slash_complete.py +356 -0
- superqode/widgets/split_view.py +612 -0
- superqode/widgets/status_bar.py +273 -0
- superqode/widgets/superqode_display.py +786 -0
- superqode/widgets/thinking_display.py +815 -0
- superqode/widgets/throbber.py +87 -0
- superqode/widgets/toast.py +206 -0
- superqode/widgets/unified_output.py +1073 -0
- superqode/workspace/__init__.py +75 -0
- superqode/workspace/artifacts.py +472 -0
- superqode/workspace/coordinator.py +353 -0
- superqode/workspace/diff_tracker.py +429 -0
- superqode/workspace/git_guard.py +373 -0
- superqode/workspace/git_snapshot.py +526 -0
- superqode/workspace/manager.py +750 -0
- superqode/workspace/snapshot.py +357 -0
- superqode/workspace/watcher.py +535 -0
- superqode/workspace/worktree.py +440 -0
- superqode-0.1.5.dist-info/METADATA +204 -0
- superqode-0.1.5.dist-info/RECORD +288 -0
- superqode-0.1.5.dist-info/WHEEL +5 -0
- superqode-0.1.5.dist-info/entry_points.txt +3 -0
- superqode-0.1.5.dist-info/licenses/LICENSE +648 -0
- superqode-0.1.5.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,457 @@
|
|
|
1
|
+
"""MLX-LM client for Apple Silicon inference.
|
|
2
|
+
|
|
3
|
+
MLX-LM is a framework for running large language models locally
|
|
4
|
+
on Apple Silicon Macs using the MLX framework.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import glob
|
|
9
|
+
import json
|
|
10
|
+
import os
|
|
11
|
+
import pathlib
|
|
12
|
+
import time
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from typing import Any, Dict, List, Optional
|
|
15
|
+
from urllib.error import URLError
|
|
16
|
+
from urllib.request import Request, urlopen
|
|
17
|
+
|
|
18
|
+
from superqode.providers.local.base import (
|
|
19
|
+
LocalProviderClient,
|
|
20
|
+
LocalProviderType,
|
|
21
|
+
LocalModel,
|
|
22
|
+
ProviderStatus,
|
|
23
|
+
ToolTestResult,
|
|
24
|
+
detect_model_family,
|
|
25
|
+
detect_quantization,
|
|
26
|
+
likely_supports_tools,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class MLXClient(LocalProviderClient):
|
|
31
|
+
"""MLX-LM server client for Apple Silicon.
|
|
32
|
+
|
|
33
|
+
MLX-LM provides:
|
|
34
|
+
- Native Apple Silicon acceleration
|
|
35
|
+
- Efficient memory management on unified memory
|
|
36
|
+
- OpenAI-compatible API via mlx_lm.server
|
|
37
|
+
|
|
38
|
+
Start server with:
|
|
39
|
+
mlx_lm.server --model <model-path>
|
|
40
|
+
|
|
41
|
+
API Endpoints (OpenAI-compatible):
|
|
42
|
+
- GET /v1/models - List models
|
|
43
|
+
- POST /v1/chat/completions - Chat completion
|
|
44
|
+
- POST /v1/completions - Text completion
|
|
45
|
+
|
|
46
|
+
Environment:
|
|
47
|
+
MLX_HOST: Override default host (default: http://localhost:8080)
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
provider_type = LocalProviderType.MLX
|
|
51
|
+
default_port = 8080
|
|
52
|
+
|
|
53
|
+
@staticmethod
|
|
54
|
+
def is_model_supported(model_id: str) -> bool:
|
|
55
|
+
"""Check if a model is supported by MLX.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
model_id: HuggingFace model ID
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
True if the model is supported, False otherwise
|
|
62
|
+
"""
|
|
63
|
+
# Known non-working models (MoE architecture issues)
|
|
64
|
+
non_working_patterns = [
|
|
65
|
+
"gpt-oss", # SuperagenticAI gpt-oss models have MoE issues
|
|
66
|
+
"mixtral", # Mixtral MoE models not supported
|
|
67
|
+
]
|
|
68
|
+
|
|
69
|
+
model_lower = model_id.lower()
|
|
70
|
+
for pattern in non_working_patterns:
|
|
71
|
+
if pattern in model_lower:
|
|
72
|
+
return False
|
|
73
|
+
|
|
74
|
+
# Known working model families
|
|
75
|
+
working_families = [
|
|
76
|
+
"qwen",
|
|
77
|
+
"llama",
|
|
78
|
+
"mistral",
|
|
79
|
+
"phi",
|
|
80
|
+
"gemma",
|
|
81
|
+
"openhermes",
|
|
82
|
+
]
|
|
83
|
+
|
|
84
|
+
# Check if it's from a known working org/family
|
|
85
|
+
for family in working_families:
|
|
86
|
+
if family in model_lower:
|
|
87
|
+
return True
|
|
88
|
+
|
|
89
|
+
# Models from mlx-community are generally well-tested
|
|
90
|
+
if "mlx-community" in model_id:
|
|
91
|
+
return True
|
|
92
|
+
|
|
93
|
+
# Default to supported for unknown models (let user try)
|
|
94
|
+
return True
|
|
95
|
+
|
|
96
|
+
@staticmethod
|
|
97
|
+
def discover_huggingface_models() -> List[Dict[str, Any]]:
|
|
98
|
+
"""Discover MLX models from HuggingFace cache.
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
List of model info dicts with keys: id, path, size, modified
|
|
102
|
+
"""
|
|
103
|
+
models = []
|
|
104
|
+
|
|
105
|
+
# Common HuggingFace cache locations
|
|
106
|
+
cache_dirs = [
|
|
107
|
+
os.path.expanduser("~/.cache/huggingface/hub"),
|
|
108
|
+
os.path.expanduser("~/.cache/transformers"),
|
|
109
|
+
# Add more potential locations
|
|
110
|
+
]
|
|
111
|
+
|
|
112
|
+
for cache_dir in cache_dirs:
|
|
113
|
+
if os.path.exists(cache_dir):
|
|
114
|
+
# Look for MLX model directories (often contain 'mlx' in path)
|
|
115
|
+
mlx_pattern = os.path.join(cache_dir, "**/*mlx*")
|
|
116
|
+
for model_path in glob.glob(mlx_pattern, recursive=True):
|
|
117
|
+
if os.path.isdir(model_path):
|
|
118
|
+
try:
|
|
119
|
+
# Extract model ID from path
|
|
120
|
+
# Path format: .../models--org--model-name/snapshots/hash/
|
|
121
|
+
path_parts = model_path.split(os.sep)
|
|
122
|
+
if "models--" in model_path:
|
|
123
|
+
# Convert HF cache format to model ID
|
|
124
|
+
model_id = model_path.split("models--")[-1]
|
|
125
|
+
model_id = model_id.split(os.sep)[0]
|
|
126
|
+
model_id = model_id.replace("--", "/")
|
|
127
|
+
|
|
128
|
+
# Get model info
|
|
129
|
+
stat = os.stat(model_path)
|
|
130
|
+
size = MLXClient._get_directory_size(model_path)
|
|
131
|
+
|
|
132
|
+
models.append(
|
|
133
|
+
{
|
|
134
|
+
"id": model_id,
|
|
135
|
+
"path": model_path,
|
|
136
|
+
"size_bytes": size,
|
|
137
|
+
"modified": datetime.fromtimestamp(stat.st_mtime),
|
|
138
|
+
}
|
|
139
|
+
)
|
|
140
|
+
except Exception:
|
|
141
|
+
continue
|
|
142
|
+
|
|
143
|
+
return models
|
|
144
|
+
|
|
145
|
+
@staticmethod
|
|
146
|
+
def _get_directory_size(path: str) -> int:
|
|
147
|
+
"""Get total size of directory in bytes."""
|
|
148
|
+
total_size = 0
|
|
149
|
+
for dirpath, dirnames, filenames in os.walk(path):
|
|
150
|
+
for filename in filenames:
|
|
151
|
+
filepath = os.path.join(dirpath, filename)
|
|
152
|
+
try:
|
|
153
|
+
total_size += os.path.getsize(filepath)
|
|
154
|
+
except OSError:
|
|
155
|
+
continue
|
|
156
|
+
return total_size
|
|
157
|
+
|
|
158
|
+
@staticmethod
|
|
159
|
+
def get_available_models() -> List[LocalModel]:
|
|
160
|
+
"""Get all available MLX models (both from server and cache)."""
|
|
161
|
+
models = []
|
|
162
|
+
|
|
163
|
+
# First try to get models from running server
|
|
164
|
+
async def get_server_models():
|
|
165
|
+
try:
|
|
166
|
+
client = MLXClient()
|
|
167
|
+
if await client.is_available():
|
|
168
|
+
return await client.list_models()
|
|
169
|
+
except Exception:
|
|
170
|
+
pass
|
|
171
|
+
return []
|
|
172
|
+
|
|
173
|
+
# Run async function in sync context
|
|
174
|
+
import asyncio
|
|
175
|
+
|
|
176
|
+
try:
|
|
177
|
+
server_models = asyncio.run(get_server_models())
|
|
178
|
+
models.extend(server_models)
|
|
179
|
+
except Exception:
|
|
180
|
+
pass
|
|
181
|
+
|
|
182
|
+
# Add models from HuggingFace cache (only supported ones)
|
|
183
|
+
cache_models = MLXClient.discover_huggingface_models()
|
|
184
|
+
for model_info in cache_models:
|
|
185
|
+
model_id = model_info["id"]
|
|
186
|
+
|
|
187
|
+
# Only include supported models
|
|
188
|
+
if not MLXClient.is_model_supported(model_id):
|
|
189
|
+
continue
|
|
190
|
+
|
|
191
|
+
# Check if we already have this model from server
|
|
192
|
+
if not any(m.id == model_id for m in models):
|
|
193
|
+
# Add note about supported formats
|
|
194
|
+
format_note = ""
|
|
195
|
+
if "mlx" in model_id.lower():
|
|
196
|
+
format_note = " (MLX format)"
|
|
197
|
+
elif "4bit" in model_id.lower() or "8bit" in model_id.lower():
|
|
198
|
+
format_note = " (quantized)"
|
|
199
|
+
|
|
200
|
+
models.append(
|
|
201
|
+
LocalModel(
|
|
202
|
+
id=model_id,
|
|
203
|
+
name=f"{model_id.split('/')[-1]} (cached){format_note}",
|
|
204
|
+
size_bytes=model_info["size_bytes"],
|
|
205
|
+
quantization=detect_quantization(model_id),
|
|
206
|
+
context_window=4096, # Default, would need model config
|
|
207
|
+
supports_tools=likely_supports_tools(model_id),
|
|
208
|
+
supports_vision=False, # MLX vision support varies
|
|
209
|
+
family=detect_model_family(model_id),
|
|
210
|
+
running=False, # Not running unless server says so
|
|
211
|
+
modified_at=model_info["modified"],
|
|
212
|
+
details={
|
|
213
|
+
"cached_path": model_info["path"],
|
|
214
|
+
"source": "huggingface_cache",
|
|
215
|
+
"supported_formats": ["MLX", "safetensors"],
|
|
216
|
+
"notes": "Works with mlx_lm.convert and mlx_lm.server",
|
|
217
|
+
},
|
|
218
|
+
)
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
return models
|
|
222
|
+
|
|
223
|
+
def __init__(self, host: Optional[str] = None):
|
|
224
|
+
"""Initialize MLX client.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
host: MLX server host URL. Falls back to MLX_HOST env var.
|
|
228
|
+
"""
|
|
229
|
+
if host is None:
|
|
230
|
+
host = os.environ.get("MLX_HOST")
|
|
231
|
+
super().__init__(host)
|
|
232
|
+
|
|
233
|
+
def _request(
|
|
234
|
+
self, method: str, endpoint: str, data: Optional[Dict] = None, timeout: float = 30.0
|
|
235
|
+
) -> Any:
|
|
236
|
+
"""Make a request to the MLX API."""
|
|
237
|
+
url = f"{self.host}{endpoint}"
|
|
238
|
+
headers = {"Content-Type": "application/json"}
|
|
239
|
+
|
|
240
|
+
body = None
|
|
241
|
+
if data is not None:
|
|
242
|
+
body = json.dumps(data).encode("utf-8")
|
|
243
|
+
|
|
244
|
+
request = Request(url, data=body, headers=headers, method=method)
|
|
245
|
+
|
|
246
|
+
with urlopen(request, timeout=timeout) as response:
|
|
247
|
+
response_data = response.read().decode("utf-8")
|
|
248
|
+
|
|
249
|
+
# Try to parse as JSON
|
|
250
|
+
try:
|
|
251
|
+
return json.loads(response_data)
|
|
252
|
+
except json.JSONDecodeError as e:
|
|
253
|
+
# If not valid JSON, check if it's an error response
|
|
254
|
+
if response.status >= 400:
|
|
255
|
+
raise URLError(f"MLX server error ({response.status}): {response_data[:200]}")
|
|
256
|
+
else:
|
|
257
|
+
# Return the raw response for debugging
|
|
258
|
+
raise URLError(f"Invalid JSON response from MLX server: {response_data[:200]}")
|
|
259
|
+
|
|
260
|
+
async def _async_request(
|
|
261
|
+
self,
|
|
262
|
+
method: str,
|
|
263
|
+
endpoint: str,
|
|
264
|
+
data: Optional[Dict] = None,
|
|
265
|
+
timeout: float = 120.0, # Increased timeout for large MLX models
|
|
266
|
+
) -> Any:
|
|
267
|
+
"""Async wrapper for _request."""
|
|
268
|
+
loop = asyncio.get_event_loop()
|
|
269
|
+
return await loop.run_in_executor(
|
|
270
|
+
None, lambda: self._request(method, endpoint, data, timeout)
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
async def is_available(self) -> bool:
|
|
274
|
+
"""Check if MLX server is running."""
|
|
275
|
+
try:
|
|
276
|
+
await self._async_request("GET", "/v1/models", timeout=5.0)
|
|
277
|
+
return True
|
|
278
|
+
except Exception:
|
|
279
|
+
return False
|
|
280
|
+
|
|
281
|
+
async def get_status(self) -> ProviderStatus:
|
|
282
|
+
"""Get detailed MLX status."""
|
|
283
|
+
start_time = time.time()
|
|
284
|
+
|
|
285
|
+
try:
|
|
286
|
+
models_response = await self._async_request("GET", "/v1/models", timeout=5.0)
|
|
287
|
+
latency = (time.time() - start_time) * 1000
|
|
288
|
+
|
|
289
|
+
models = models_response.get("data", [])
|
|
290
|
+
|
|
291
|
+
return ProviderStatus(
|
|
292
|
+
available=True,
|
|
293
|
+
provider_type=self.provider_type,
|
|
294
|
+
host=self.host,
|
|
295
|
+
models_count=len(models),
|
|
296
|
+
running_models=len(models),
|
|
297
|
+
gpu_available=True, # Apple Silicon GPU
|
|
298
|
+
latency_ms=latency,
|
|
299
|
+
last_checked=datetime.now(),
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
except Exception as e:
|
|
303
|
+
return ProviderStatus(
|
|
304
|
+
available=False,
|
|
305
|
+
provider_type=self.provider_type,
|
|
306
|
+
host=self.host,
|
|
307
|
+
error=str(e),
|
|
308
|
+
last_checked=datetime.now(),
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
async def list_models(self) -> List[LocalModel]:
|
|
312
|
+
"""List available models from MLX server."""
|
|
313
|
+
try:
|
|
314
|
+
response = await self._async_request("GET", "/v1/models")
|
|
315
|
+
models = response.get("data", [])
|
|
316
|
+
|
|
317
|
+
result = []
|
|
318
|
+
for model_data in models:
|
|
319
|
+
model_id = model_data.get("id", "")
|
|
320
|
+
# MLX server might return model path, extract model name
|
|
321
|
+
model_name = model_id.split("/")[-1] if "/" in model_id else model_id
|
|
322
|
+
|
|
323
|
+
# Try to get model info if available
|
|
324
|
+
model_info = await self._get_model_info_safe(model_id)
|
|
325
|
+
|
|
326
|
+
result.append(
|
|
327
|
+
LocalModel(
|
|
328
|
+
id=model_id,
|
|
329
|
+
name=model_name,
|
|
330
|
+
size_bytes=model_info.get("size_bytes", 0),
|
|
331
|
+
quantization=model_info.get("quantization", detect_quantization(model_id)),
|
|
332
|
+
context_window=model_info.get("context_window", 4096),
|
|
333
|
+
supports_tools=likely_supports_tools(model_id),
|
|
334
|
+
supports_vision=model_info.get("supports_vision", False),
|
|
335
|
+
family=detect_model_family(model_id),
|
|
336
|
+
running=True,
|
|
337
|
+
parameter_count=model_info.get("parameter_count", ""),
|
|
338
|
+
modified_at=model_info.get("modified_at"),
|
|
339
|
+
details=model_info,
|
|
340
|
+
)
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
return result
|
|
344
|
+
|
|
345
|
+
except Exception:
|
|
346
|
+
return []
|
|
347
|
+
|
|
348
|
+
async def _get_model_info_safe(self, model_id: str) -> Dict[str, Any]:
|
|
349
|
+
"""Safely get model info without failing if endpoint doesn't exist."""
|
|
350
|
+
try:
|
|
351
|
+
# Try to get model info from server if available
|
|
352
|
+
response = await self._async_request("GET", f"/v1/models/{model_id}")
|
|
353
|
+
return response.get("model_info", {})
|
|
354
|
+
except Exception:
|
|
355
|
+
# Fallback to basic info
|
|
356
|
+
return {
|
|
357
|
+
"context_window": 4096,
|
|
358
|
+
"supports_vision": False,
|
|
359
|
+
"quantization": detect_quantization(model_id),
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
async def list_running(self) -> List[LocalModel]:
|
|
363
|
+
"""List running models."""
|
|
364
|
+
models = await self.list_models()
|
|
365
|
+
for m in models:
|
|
366
|
+
m.running = True
|
|
367
|
+
return models
|
|
368
|
+
|
|
369
|
+
async def get_model_info(self, model_id: str) -> Optional[LocalModel]:
|
|
370
|
+
"""Get model information."""
|
|
371
|
+
models = await self.list_models()
|
|
372
|
+
for m in models:
|
|
373
|
+
if m.id == model_id or m.id.endswith(f"/{model_id}"):
|
|
374
|
+
return m
|
|
375
|
+
return None
|
|
376
|
+
|
|
377
|
+
async def test_tool_calling(self, model_id: str) -> ToolTestResult:
|
|
378
|
+
"""Test tool calling capability."""
|
|
379
|
+
# MLX-LM tool support varies by model and server version
|
|
380
|
+
return ToolTestResult(
|
|
381
|
+
model_id=model_id,
|
|
382
|
+
supports_tools=False,
|
|
383
|
+
notes="MLX-LM tool support requires server configuration",
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
def get_litellm_model_name(self, model_id: str) -> str:
|
|
387
|
+
"""Get LiteLLM-compatible model name."""
|
|
388
|
+
# MLX uses OpenAI-compatible format
|
|
389
|
+
return model_id
|
|
390
|
+
|
|
391
|
+
@staticmethod
|
|
392
|
+
def get_server_command(model_path: str, host: str = "localhost", port: int = 8080) -> List[str]:
|
|
393
|
+
"""Get command to start MLX server.
|
|
394
|
+
|
|
395
|
+
Args:
|
|
396
|
+
model_path: Path or HuggingFace model ID
|
|
397
|
+
host: Server host (default: localhost)
|
|
398
|
+
port: Server port (default: 8080)
|
|
399
|
+
|
|
400
|
+
Returns:
|
|
401
|
+
Command list for subprocess
|
|
402
|
+
"""
|
|
403
|
+
return [
|
|
404
|
+
"mlx_lm.server",
|
|
405
|
+
"--model",
|
|
406
|
+
model_path,
|
|
407
|
+
"--host",
|
|
408
|
+
host,
|
|
409
|
+
"--port",
|
|
410
|
+
str(port),
|
|
411
|
+
]
|
|
412
|
+
|
|
413
|
+
@staticmethod
|
|
414
|
+
def suggest_models() -> List[str]:
|
|
415
|
+
"""Suggest popular MLX models that work well.
|
|
416
|
+
|
|
417
|
+
Only includes models known to work with MLX (no MoE architecture issues).
|
|
418
|
+
"""
|
|
419
|
+
return [
|
|
420
|
+
"mlx-community/Mistral-7B-Instruct-v0.1", # ✅ Standard transformer
|
|
421
|
+
"mlx-community/Llama-2-7b-chat-hf", # ✅ Standard transformer
|
|
422
|
+
"mlx-community/Llama-3.2-1B-Instruct-4bit", # ✅ Quantized Llama
|
|
423
|
+
"mlx-community/Llama-3.2-3B-Instruct-4bit", # ✅ Quantized Llama
|
|
424
|
+
"mlx-community/Phi-2", # ✅ Microsoft Phi
|
|
425
|
+
"mlx-community/Qwen2.5-Coder-7B-Instruct", # ✅ QWen coder model
|
|
426
|
+
"mlx-community/OpenHermes-2.5-Mistral-7B", # ✅ Fine-tuned Mistral
|
|
427
|
+
"superagenticai/qwen3-0.6b-mlx-q4", # ✅ Small QWen model
|
|
428
|
+
]
|
|
429
|
+
|
|
430
|
+
@staticmethod
|
|
431
|
+
async def check_mlx_lm_installed() -> bool:
|
|
432
|
+
"""Check if mlx_lm is installed."""
|
|
433
|
+
import subprocess
|
|
434
|
+
|
|
435
|
+
try:
|
|
436
|
+
result = await asyncio.create_subprocess_exec(
|
|
437
|
+
"mlx_lm.server", "--help", stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
|
|
438
|
+
)
|
|
439
|
+
await result.wait()
|
|
440
|
+
return result.returncode == 0
|
|
441
|
+
except FileNotFoundError:
|
|
442
|
+
return False
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
async def get_mlx_client(host: Optional[str] = None) -> Optional[MLXClient]:
|
|
446
|
+
"""Get an MLX client if available.
|
|
447
|
+
|
|
448
|
+
Args:
|
|
449
|
+
host: Optional host override.
|
|
450
|
+
|
|
451
|
+
Returns:
|
|
452
|
+
MLXClient if MLX server is running, None otherwise.
|
|
453
|
+
"""
|
|
454
|
+
client = MLXClient(host)
|
|
455
|
+
if await client.is_available():
|
|
456
|
+
return client
|
|
457
|
+
return None
|