velune-cli 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- velune/__init__.py +5 -0
- velune/__main__.py +6 -0
- velune/cli/__init__.py +5 -0
- velune/cli/app.py +208 -0
- velune/cli/autocomplete.py +80 -0
- velune/cli/banner.py +60 -0
- velune/cli/commands/__init__.py +32 -0
- velune/cli/commands/ask.py +175 -0
- velune/cli/commands/base.py +16 -0
- velune/cli/commands/chat.py +228 -0
- velune/cli/commands/config.py +224 -0
- velune/cli/commands/daemon.py +88 -0
- velune/cli/commands/doctor.py +721 -0
- velune/cli/commands/init.py +170 -0
- velune/cli/commands/mcp.py +82 -0
- velune/cli/commands/memory.py +293 -0
- velune/cli/commands/models.py +683 -0
- velune/cli/commands/preflight.py +95 -0
- velune/cli/commands/run.py +270 -0
- velune/cli/commands/setup.py +184 -0
- velune/cli/commands/workspace.py +249 -0
- velune/cli/context.py +36 -0
- velune/cli/councilmodel_ui.py +199 -0
- velune/cli/display/council_view.py +254 -0
- velune/cli/display/memory_view.py +126 -0
- velune/cli/display/panels.py +35 -0
- velune/cli/display/progress.py +25 -0
- velune/cli/display/themes.py +25 -0
- velune/cli/main.py +15 -0
- velune/cli/model_selector.py +51 -0
- velune/cli/modes.py +86 -0
- velune/cli/pull_ui.py +123 -0
- velune/cli/registry.py +80 -0
- velune/cli/rendering/__init__.py +5 -0
- velune/cli/rendering/error_panel.py +79 -0
- velune/cli/rendering/markdown.py +63 -0
- velune/cli/repl.py +1855 -0
- velune/cli/session_manager.py +71 -0
- velune/cli/slash_commands.py +37 -0
- velune/cli/theme.py +8 -0
- velune/cognition/__init__.py +23 -0
- velune/cognition/agents/__init__.py +7 -0
- velune/cognition/agents/coder.py +209 -0
- velune/cognition/agents/planner.py +156 -0
- velune/cognition/agents/reviewer.py +195 -0
- velune/cognition/arbitrator.py +220 -0
- velune/cognition/architecture.py +415 -0
- velune/cognition/budget.py +65 -0
- velune/cognition/council/__init__.py +47 -0
- velune/cognition/council/base.py +217 -0
- velune/cognition/council/challenger.py +74 -0
- velune/cognition/council/coder.py +79 -0
- velune/cognition/council/critic_agent.py +43 -0
- velune/cognition/council/critic_configs.py +111 -0
- velune/cognition/council/critics.py +41 -0
- velune/cognition/council/debate.py +46 -0
- velune/cognition/council/factory.py +140 -0
- velune/cognition/council/messages.py +56 -0
- velune/cognition/council/planner.py +124 -0
- velune/cognition/council/reviewer.py +74 -0
- velune/cognition/council/synthesizer.py +67 -0
- velune/cognition/council/tiers.py +188 -0
- velune/cognition/council_orchestrator.py +282 -0
- velune/cognition/firewall.py +354 -0
- velune/cognition/module.py +46 -0
- velune/cognition/orchestrator.py +1205 -0
- velune/cognition/personality.py +238 -0
- velune/cognition/state.py +104 -0
- velune/cognition/style_resolver.py +64 -0
- velune/cognition/verification.py +205 -0
- velune/context/__init__.py +28 -0
- velune/context/assembler.py +240 -0
- velune/context/budget.py +97 -0
- velune/context/extractive.py +95 -0
- velune/context/prompt_adaptation.py +480 -0
- velune/context/sections.py +99 -0
- velune/context/token_counter.py +134 -0
- velune/context/utilization.py +33 -0
- velune/context/window.py +63 -0
- velune/core/__init__.py +89 -0
- velune/core/background.py +5 -0
- velune/core/config/__init__.py +37 -0
- velune/core/errors/__init__.py +90 -0
- velune/core/errors/catalog.py +188 -0
- velune/core/errors/execution.py +31 -0
- velune/core/errors/memory.py +25 -0
- velune/core/errors/orchestration.py +31 -0
- velune/core/errors/provider.py +37 -0
- velune/core/event_loop.py +35 -0
- velune/core/logging.py +83 -0
- velune/core/paths.py +165 -0
- velune/core/runtime.py +113 -0
- velune/core/startup_profiler.py +56 -0
- velune/core/task_registry.py +117 -0
- velune/core/trace.py +83 -0
- velune/core/types/__init__.py +48 -0
- velune/core/types/agent.py +53 -0
- velune/core/types/context.py +42 -0
- velune/core/types/inference.py +38 -0
- velune/core/types/memory.py +42 -0
- velune/core/types/model.py +70 -0
- velune/core/types/provider.py +62 -0
- velune/core/types/repository.py +38 -0
- velune/core/types/task.py +61 -0
- velune/core/types/workspace.py +28 -0
- velune/daemon/client.py +13 -0
- velune/daemon/server.py +127 -0
- velune/daemon/transport.py +179 -0
- velune/events.py +204 -0
- velune/execution/__init__.py +22 -0
- velune/execution/benchmarker.py +315 -0
- velune/execution/cancellation.py +53 -0
- velune/execution/checkpointer.py +130 -0
- velune/execution/command_spec.py +165 -0
- velune/execution/diff_preview.py +197 -0
- velune/execution/executor.py +181 -0
- velune/execution/module.py +18 -0
- velune/execution/multi_diff.py +67 -0
- velune/execution/path_guard.py +74 -0
- velune/execution/planner.py +91 -0
- velune/execution/rollback.py +89 -0
- velune/execution/sandbox.py +268 -0
- velune/execution/validator.py +115 -0
- velune/hardware/__init__.py +1 -0
- velune/hardware/detector.py +192 -0
- velune/kernel/__init__.py +55 -0
- velune/kernel/bootstrap.py +125 -0
- velune/kernel/config.py +426 -0
- velune/kernel/entrypoint.py +78 -0
- velune/kernel/health.py +54 -0
- velune/kernel/lifecycle.py +143 -0
- velune/kernel/module.py +17 -0
- velune/kernel/modules.py +23 -0
- velune/kernel/registry.py +96 -0
- velune/kernel/schemas.py +28 -0
- velune/main.py +9 -0
- velune/mcp/__init__.py +9 -0
- velune/mcp/client.py +115 -0
- velune/mcp/config.py +19 -0
- velune/mcp/server.py +624 -0
- velune/memory/__init__.py +32 -0
- velune/memory/compaction.py +506 -0
- velune/memory/embedding_pipeline.py +241 -0
- velune/memory/lifecycle.py +680 -0
- velune/memory/module.py +218 -0
- velune/memory/prioritizer.py +67 -0
- velune/memory/storage/episodic_schema.sql +53 -0
- velune/memory/storage/lancedb_store.py +282 -0
- velune/memory/storage/sqlite_manager.py +369 -0
- velune/memory/storage/sqlite_pool.py +149 -0
- velune/memory/tiers/episodic.py +588 -0
- velune/memory/tiers/graph.py +378 -0
- velune/memory/tiers/lineage.py +416 -0
- velune/memory/tiers/semantic.py +475 -0
- velune/memory/tiers/working.py +168 -0
- velune/memory/vitality.py +132 -0
- velune/models/__init__.py +15 -0
- velune/models/family.py +76 -0
- velune/models/module.py +20 -0
- velune/models/probes.py +192 -0
- velune/models/profile_cache.py +84 -0
- velune/models/profiler.py +108 -0
- velune/models/registry.py +251 -0
- velune/models/scorer.py +233 -0
- velune/models/specializations.py +205 -0
- velune/orchestration/__init__.py +19 -0
- velune/orchestration/engine.py +239 -0
- velune/orchestration/module.py +15 -0
- velune/orchestration/role_assignments.py +82 -0
- velune/orchestration/schemas.py +98 -0
- velune/plugins/__init__.py +20 -0
- velune/plugins/hooks.py +50 -0
- velune/plugins/loader.py +161 -0
- velune/plugins/registry.py +56 -0
- velune/plugins/schemas.py +21 -0
- velune/providers/__init__.py +23 -0
- velune/providers/adapters/anthropic.py +257 -0
- velune/providers/adapters/fireworks.py +115 -0
- velune/providers/adapters/google.py +234 -0
- velune/providers/adapters/groq.py +151 -0
- velune/providers/adapters/huggingface.py +210 -0
- velune/providers/adapters/llamacpp.py +208 -0
- velune/providers/adapters/lmstudio.py +175 -0
- velune/providers/adapters/ollama.py +233 -0
- velune/providers/adapters/openai.py +213 -0
- velune/providers/adapters/openrouter.py +81 -0
- velune/providers/adapters/together.py +134 -0
- velune/providers/adapters/xai.py +60 -0
- velune/providers/base.py +86 -0
- velune/providers/benchmarker.py +138 -0
- velune/providers/discovery/__init__.py +33 -0
- velune/providers/discovery/anthropic.py +79 -0
- velune/providers/discovery/benchmarks.py +44 -0
- velune/providers/discovery/classifier.py +69 -0
- velune/providers/discovery/fireworks.py +95 -0
- velune/providers/discovery/gguf.py +88 -0
- velune/providers/discovery/google.py +95 -0
- velune/providers/discovery/gpu.py +117 -0
- velune/providers/discovery/groq.py +21 -0
- velune/providers/discovery/huggingface.py +67 -0
- velune/providers/discovery/lmstudio.py +80 -0
- velune/providers/discovery/ollama.py +162 -0
- velune/providers/discovery/openai.py +96 -0
- velune/providers/discovery/openrouter.py +113 -0
- velune/providers/discovery/scanner.py +115 -0
- velune/providers/discovery/together.py +114 -0
- velune/providers/discovery/xai.py +57 -0
- velune/providers/health.py +67 -0
- velune/providers/health_monitor.py +169 -0
- velune/providers/keystore.py +142 -0
- velune/providers/local_paths.py +49 -0
- velune/providers/local_resolver.py +229 -0
- velune/providers/module.py +51 -0
- velune/providers/ollama_manager.py +193 -0
- velune/providers/registry.py +220 -0
- velune/providers/router.py +255 -0
- velune/providers/task_classifier.py +288 -0
- velune/py.typed +0 -0
- velune/repository/__init__.py +33 -0
- velune/repository/analyzer.py +127 -0
- velune/repository/ast_parser.py +822 -0
- velune/repository/blast_radius.py +298 -0
- velune/repository/boundary_classifier.py +295 -0
- velune/repository/cognition.py +316 -0
- velune/repository/grapher.py +179 -0
- velune/repository/import_graph.py +263 -0
- velune/repository/incremental_indexer.py +275 -0
- velune/repository/index_state.py +96 -0
- velune/repository/indexer.py +243 -0
- velune/repository/module.py +17 -0
- velune/repository/parser.py +474 -0
- velune/repository/project_type.py +300 -0
- velune/repository/rename_journal.py +287 -0
- velune/repository/scanner.py +193 -0
- velune/repository/schemas.py +102 -0
- velune/repository/symbol_registry.py +365 -0
- velune/repository/tracker.py +252 -0
- velune/retrieval/__init__.py +27 -0
- velune/retrieval/cache.py +110 -0
- velune/retrieval/fast_path.py +391 -0
- velune/retrieval/graph.py +124 -0
- velune/retrieval/hybrid.py +271 -0
- velune/retrieval/keyword.py +131 -0
- velune/retrieval/module.py +26 -0
- velune/retrieval/pipeline.py +303 -0
- velune/retrieval/reranker.py +102 -0
- velune/retrieval/schemas.py +59 -0
- velune/retrieval/slow_path.py +364 -0
- velune/retrieval/vector.py +203 -0
- velune/telemetry/__init__.py +59 -0
- velune/telemetry/cognition.py +267 -0
- velune/telemetry/cost_estimator.py +92 -0
- velune/telemetry/debug.py +304 -0
- velune/telemetry/doctor.py +244 -0
- velune/telemetry/logging.py +286 -0
- velune/telemetry/spans.py +277 -0
- velune/telemetry/token_tracker.py +140 -0
- velune/telemetry/usage_tracker.py +340 -0
- velune/tools/__init__.py +41 -0
- velune/tools/base/registry.py +87 -0
- velune/tools/base/tool.py +63 -0
- velune/tools/code/navigate.py +116 -0
- velune/tools/code/search.py +123 -0
- velune/tools/filesystem/read.py +75 -0
- velune/tools/filesystem/search.py +136 -0
- velune/tools/filesystem/write.py +163 -0
- velune/tools/git/history.py +177 -0
- velune/tools/git/operations.py +122 -0
- velune/tools/git/state.py +121 -0
- velune/tools/module.py +81 -0
- velune/tools/terminal/execute.py +72 -0
- velune/tools/terminal/history.py +47 -0
- velune/tools/web/fetch.py +55 -0
- velune/tools/web/validator.py +122 -0
- velune_cli-0.9.0.dist-info/METADATA +518 -0
- velune_cli-0.9.0.dist-info/RECORD +279 -0
- velune_cli-0.9.0.dist-info/WHEEL +4 -0
- velune_cli-0.9.0.dist-info/entry_points.txt +2 -0
- velune_cli-0.9.0.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
"""Model capability registry with empirical probe evaluation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import logging
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from velune.core.types.model import CapabilityLevel, ModelCapabilityProfile, ModelDescriptor
|
|
10
|
+
from velune.models.profile_cache import ModelProfileCache
|
|
11
|
+
from velune.providers.discovery.scanner import ModelDiscoveryScanner
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger("velune.models.registry")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ModelCapabilityRegistry:
|
|
17
|
+
"""Unified database cataloging discovered models and capabilities with empirical routing."""
|
|
18
|
+
|
|
19
|
+
def __init__(self, scanner: ModelDiscoveryScanner | None = None) -> None:
|
|
20
|
+
self.scanner = scanner or ModelDiscoveryScanner()
|
|
21
|
+
self._models: dict[str, ModelDescriptor] = {}
|
|
22
|
+
|
|
23
|
+
async def refresh(self) -> None:
|
|
24
|
+
"""Scan all providers and refresh the local catalog cache with empirical profiles."""
|
|
25
|
+
try:
|
|
26
|
+
discovered = await self.scanner.scan_all()
|
|
27
|
+
self._models.clear()
|
|
28
|
+
|
|
29
|
+
profile_cache = ModelProfileCache(Path(".velune") / "model_profiles.json")
|
|
30
|
+
from velune.models.probes import FastProbe
|
|
31
|
+
|
|
32
|
+
fast_probe = FastProbe()
|
|
33
|
+
|
|
34
|
+
probing_tasks = []
|
|
35
|
+
models_to_probe = []
|
|
36
|
+
|
|
37
|
+
for model in discovered:
|
|
38
|
+
cached = profile_cache.get(model.model_id, model.provider_id)
|
|
39
|
+
if cached:
|
|
40
|
+
# Apply cached probe results to capability profile
|
|
41
|
+
self._apply_probe_results(model, cached["probes"])
|
|
42
|
+
else:
|
|
43
|
+
# Check if provider is available to probe
|
|
44
|
+
provider = None
|
|
45
|
+
try:
|
|
46
|
+
from velune.kernel.registry import get_container
|
|
47
|
+
|
|
48
|
+
provider_reg = get_container().get("runtime.provider_registry")
|
|
49
|
+
provider = provider_reg.get(model.provider_id)
|
|
50
|
+
except Exception:
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
if provider:
|
|
54
|
+
models_to_probe.append(model)
|
|
55
|
+
probing_tasks.append(fast_probe.ping(provider, model.model_id))
|
|
56
|
+
|
|
57
|
+
# Execute fast probes concurrently
|
|
58
|
+
if probing_tasks:
|
|
59
|
+
results = await asyncio.gather(*probing_tasks, return_exceptions=True)
|
|
60
|
+
for model, responsive in zip(models_to_probe, results, strict=False):
|
|
61
|
+
if isinstance(responsive, Exception):
|
|
62
|
+
responsive = False
|
|
63
|
+
|
|
64
|
+
if responsive:
|
|
65
|
+
model.metadata["validated"] = True
|
|
66
|
+
try:
|
|
67
|
+
from velune.daemon.client import DaemonClient
|
|
68
|
+
|
|
69
|
+
if DaemonClient.is_running():
|
|
70
|
+
# Delegate background probing to the active persistent Velune daemon!
|
|
71
|
+
# Using create_task to fire-and-forget the IPC dispatch call
|
|
72
|
+
asyncio.create_task(
|
|
73
|
+
DaemonClient.send_command(
|
|
74
|
+
"probe_model",
|
|
75
|
+
model_id=model.model_id,
|
|
76
|
+
provider_id=model.provider_id,
|
|
77
|
+
)
|
|
78
|
+
)
|
|
79
|
+
logger.info(
|
|
80
|
+
"Delegated full probing of model %s to the active Velune daemon process.",
|
|
81
|
+
model.model_id,
|
|
82
|
+
)
|
|
83
|
+
else:
|
|
84
|
+
from velune.kernel.registry import get_container
|
|
85
|
+
|
|
86
|
+
task_reg = get_container().get("runtime.task_registry")
|
|
87
|
+
task_reg.submit(
|
|
88
|
+
name=f"full_probe_{model.model_id}",
|
|
89
|
+
coro=self._probe_model_background(model, profile_cache),
|
|
90
|
+
timeout_seconds=120.0,
|
|
91
|
+
)
|
|
92
|
+
except Exception:
|
|
93
|
+
pass
|
|
94
|
+
else:
|
|
95
|
+
model.metadata["validated"] = False
|
|
96
|
+
logger.info("Model %s is not responding, skipping probe", model.model_id)
|
|
97
|
+
|
|
98
|
+
# Store models in mapping
|
|
99
|
+
for model in discovered:
|
|
100
|
+
key = f"{model.provider_id}/{model.model_id}"
|
|
101
|
+
self._models[key] = model
|
|
102
|
+
if model.model_id not in self._models:
|
|
103
|
+
self._models[model.model_id] = model
|
|
104
|
+
|
|
105
|
+
logger.info(
|
|
106
|
+
"Indexed %d models (%d validated)",
|
|
107
|
+
len(discovered),
|
|
108
|
+
sum(1 for m in discovered if m.metadata.get("validated", True)),
|
|
109
|
+
)
|
|
110
|
+
except Exception as e:
|
|
111
|
+
logger.error("Failed to discover models during catalog refresh: %s", e)
|
|
112
|
+
|
|
113
|
+
def register(self, descriptor: ModelDescriptor) -> None:
|
|
114
|
+
"""Explicitly register a custom model descriptor."""
|
|
115
|
+
key = f"{descriptor.provider_id}/{descriptor.model_id}"
|
|
116
|
+
self._models[key] = descriptor
|
|
117
|
+
if descriptor.model_id not in self._models:
|
|
118
|
+
self._models[descriptor.model_id] = descriptor
|
|
119
|
+
|
|
120
|
+
def get(self, model_id: str, provider_id: str | None = None) -> ModelDescriptor | None:
|
|
121
|
+
"""Look up a model descriptor by ID and optional provider prefix."""
|
|
122
|
+
if provider_id:
|
|
123
|
+
key = f"{provider_id}/{model_id}"
|
|
124
|
+
return self._models.get(key)
|
|
125
|
+
|
|
126
|
+
# Try direct match
|
|
127
|
+
if model_id in self._models:
|
|
128
|
+
return self._models[model_id]
|
|
129
|
+
|
|
130
|
+
# Try searching values
|
|
131
|
+
for model in self._models.values():
|
|
132
|
+
if model.model_id == model_id:
|
|
133
|
+
return model
|
|
134
|
+
return None
|
|
135
|
+
|
|
136
|
+
def list_all(self) -> list[ModelDescriptor]:
|
|
137
|
+
"""List all currently indexed model descriptors."""
|
|
138
|
+
# Return unique descriptors
|
|
139
|
+
seen = set()
|
|
140
|
+
unique = []
|
|
141
|
+
for model in self._models.values():
|
|
142
|
+
ref = (model.provider_id, model.model_id)
|
|
143
|
+
if ref not in seen:
|
|
144
|
+
seen.add(ref)
|
|
145
|
+
unique.append(model)
|
|
146
|
+
return unique
|
|
147
|
+
|
|
148
|
+
def get_by_provider(self, provider_id: str) -> list[ModelDescriptor]:
|
|
149
|
+
"""List all models registered under a specific provider."""
|
|
150
|
+
return [model for model in self.list_all() if model.provider_id == provider_id]
|
|
151
|
+
|
|
152
|
+
def _apply_probe_results(self, model: ModelDescriptor, probes: dict) -> None:
|
|
153
|
+
"""Map float probe scores (0.0-1.0) to CapabilityLevel and update model descriptor.
|
|
154
|
+
|
|
155
|
+
Score mapping (empirical calibration):
|
|
156
|
+
- score > 0.85 → EXPERT (100)
|
|
157
|
+
- score > 0.70 → ADVANCED (75)
|
|
158
|
+
- score > 0.50 → INTERMEDIATE (50)
|
|
159
|
+
- else → BASIC (25)
|
|
160
|
+
"""
|
|
161
|
+
if not model.capabilities:
|
|
162
|
+
model.capabilities = ModelCapabilityProfile()
|
|
163
|
+
|
|
164
|
+
def score_to_level(score: float) -> CapabilityLevel:
|
|
165
|
+
if score > 0.85:
|
|
166
|
+
return CapabilityLevel.EXPERT
|
|
167
|
+
elif score > 0.70:
|
|
168
|
+
return CapabilityLevel.ADVANCED
|
|
169
|
+
elif score > 0.50:
|
|
170
|
+
return CapabilityLevel.INTERMEDIATE
|
|
171
|
+
elif score > 0.0:
|
|
172
|
+
return CapabilityLevel.BASIC
|
|
173
|
+
return CapabilityLevel.NONE
|
|
174
|
+
|
|
175
|
+
coding_data = probes.get("coding", {})
|
|
176
|
+
coding_score = (
|
|
177
|
+
coding_data.score if hasattr(coding_data, "score") else coding_data.get("score", 0.0)
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
reasoning_data = probes.get("reasoning", {})
|
|
181
|
+
reasoning_score = (
|
|
182
|
+
reasoning_data.score
|
|
183
|
+
if hasattr(reasoning_data, "score")
|
|
184
|
+
else reasoning_data.get("score", 0.0)
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
instruction_data = probes.get("instruction", {})
|
|
188
|
+
instruction_score = (
|
|
189
|
+
instruction_data.score
|
|
190
|
+
if hasattr(instruction_data, "score")
|
|
191
|
+
else instruction_data.get("score", 0.0)
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
model.capabilities.coding = score_to_level(coding_score)
|
|
195
|
+
model.capabilities.reasoning = score_to_level(reasoning_score)
|
|
196
|
+
model.capabilities.instruction_following = score_to_level(instruction_score)
|
|
197
|
+
|
|
198
|
+
# Infer other capabilities from primary scores
|
|
199
|
+
if model.capabilities.reasoning >= CapabilityLevel.INTERMEDIATE:
|
|
200
|
+
model.capabilities.planning = CapabilityLevel.INTERMEDIATE
|
|
201
|
+
if model.capabilities.instruction_following >= CapabilityLevel.INTERMEDIATE:
|
|
202
|
+
model.capabilities.tool_use = CapabilityLevel.INTERMEDIATE
|
|
203
|
+
if model.capabilities.coding >= CapabilityLevel.INTERMEDIATE:
|
|
204
|
+
model.capabilities.code_analysis = CapabilityLevel.INTERMEDIATE
|
|
205
|
+
|
|
206
|
+
logger.debug(
|
|
207
|
+
"Applied probe results to %s: coding=%s (%.2f), reasoning=%s (%.2f), instruction=%s (%.2f)",
|
|
208
|
+
model.model_id,
|
|
209
|
+
model.capabilities.coding.name,
|
|
210
|
+
coding_score,
|
|
211
|
+
model.capabilities.reasoning.name,
|
|
212
|
+
reasoning_score,
|
|
213
|
+
model.capabilities.instruction_following.name,
|
|
214
|
+
instruction_score,
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
async def _probe_model_background(
|
|
218
|
+
self, model: ModelDescriptor, cache: ModelProfileCache
|
|
219
|
+
) -> None:
|
|
220
|
+
"""Run probes in background, update model in registry when done."""
|
|
221
|
+
try:
|
|
222
|
+
from velune.kernel.registry import get_container
|
|
223
|
+
|
|
224
|
+
container = get_container()
|
|
225
|
+
if not container.has("runtime.provider_registry"):
|
|
226
|
+
logger.debug(
|
|
227
|
+
"No provider registry registered yet, skipping background probe for %s.",
|
|
228
|
+
model.model_id,
|
|
229
|
+
)
|
|
230
|
+
return
|
|
231
|
+
|
|
232
|
+
provider_registry = container.get("runtime.provider_registry")
|
|
233
|
+
provider = provider_registry.get(model.provider_id)
|
|
234
|
+
if not provider:
|
|
235
|
+
logger.debug("No active provider found for %s, skipping probe.", model.model_id)
|
|
236
|
+
return
|
|
237
|
+
|
|
238
|
+
from velune.models.probes import ModelProber
|
|
239
|
+
|
|
240
|
+
prober = ModelProber(provider, model.model_id)
|
|
241
|
+
results = await prober.run_all_probes()
|
|
242
|
+
cache.set(model.model_id, model.provider_id, results)
|
|
243
|
+
self._apply_probe_results(model, results)
|
|
244
|
+
logger.info(
|
|
245
|
+
"Successfully probed %s: coding=%.2f reasoning=%.2f",
|
|
246
|
+
model.model_id,
|
|
247
|
+
results["coding"].score,
|
|
248
|
+
results["reasoning"].score,
|
|
249
|
+
)
|
|
250
|
+
except Exception as e:
|
|
251
|
+
logger.debug("Background probe failed for %s: %s", model.model_id, e)
|
velune/models/scorer.py
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
"""Multi-factor scorer for model routing and selection with family-specific and quantization-aware adjustments."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
|
|
7
|
+
from velune.core.types.model import CapabilityLevel, ModelDescriptor
|
|
8
|
+
from velune.models.profiler import ModelProfile
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger("velune.models.scorer")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ModelScorer:
|
|
14
|
+
"""Calculates multidimensional matching scores for model selection."""
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
w_capability: float = 0.4,
|
|
19
|
+
w_context: float = 0.2,
|
|
20
|
+
w_speed: float = 0.2,
|
|
21
|
+
w_reliability: float = 0.1,
|
|
22
|
+
w_cost: float = 0.1,
|
|
23
|
+
) -> None:
|
|
24
|
+
"""Initialize routing weights."""
|
|
25
|
+
self.w_capability = w_capability
|
|
26
|
+
self.w_context = w_context
|
|
27
|
+
self.w_speed = w_speed
|
|
28
|
+
self.w_reliability = w_reliability
|
|
29
|
+
self.w_cost = w_cost
|
|
30
|
+
|
|
31
|
+
def _detect_model_family(self, model_id: str) -> str:
|
|
32
|
+
"""Detect model family from ID for family-specific scoring adjustments."""
|
|
33
|
+
lower = model_id.lower()
|
|
34
|
+
families = {
|
|
35
|
+
"qwen": ["qwen"],
|
|
36
|
+
"deepseek": ["deepseek"],
|
|
37
|
+
"llama": ["llama", "meta-llama"],
|
|
38
|
+
"mistral": ["mistral", "mixtral"],
|
|
39
|
+
"phi": ["phi"],
|
|
40
|
+
"gemma": ["gemma"],
|
|
41
|
+
"codellama": ["codellama"],
|
|
42
|
+
"starcoder": ["starcoder"],
|
|
43
|
+
}
|
|
44
|
+
for family, patterns in families.items():
|
|
45
|
+
if any(p in lower for p in patterns):
|
|
46
|
+
return family
|
|
47
|
+
return "unknown"
|
|
48
|
+
|
|
49
|
+
def _get_family_capability_adjustments(self, family: str, task_category: str) -> float:
|
|
50
|
+
"""
|
|
51
|
+
Return capability score adjustments for known model families.
|
|
52
|
+
Positive = boost, Negative = penalty.
|
|
53
|
+
Based on community benchmarks and known model strengths.
|
|
54
|
+
"""
|
|
55
|
+
adjustments = {
|
|
56
|
+
("qwen", "coding"): +0.1,
|
|
57
|
+
("qwen", "reasoning"): +0.05,
|
|
58
|
+
("deepseek", "coding"): +0.15,
|
|
59
|
+
("deepseek", "reasoning"): +0.1,
|
|
60
|
+
("codellama", "coding"): +0.2,
|
|
61
|
+
("codellama", "reasoning"): -0.1, # Not a reasoning model
|
|
62
|
+
("phi", "coding"): +0.05,
|
|
63
|
+
("phi", "reasoning"): +0.15, # Phi is surprisingly capable at reasoning for its size
|
|
64
|
+
("mistral", "summarization"): +0.1,
|
|
65
|
+
("gemma", "instruction_following"): +0.05,
|
|
66
|
+
("starcoder", "coding"): +0.2,
|
|
67
|
+
("starcoder", "reasoning"): -0.15,
|
|
68
|
+
}
|
|
69
|
+
return adjustments.get((family, task_category), 0.0)
|
|
70
|
+
|
|
71
|
+
def _get_quantization_penalty(self, model: ModelDescriptor) -> float:
|
|
72
|
+
"""
|
|
73
|
+
Quantization reduces quality. Apply penalty for heavily quantized models
|
|
74
|
+
on reasoning and complex tasks.
|
|
75
|
+
"""
|
|
76
|
+
quant = (model.quantization or "").upper()
|
|
77
|
+
penalties = {
|
|
78
|
+
"Q2": -0.25,
|
|
79
|
+
"Q3": -0.15,
|
|
80
|
+
"Q4_0": -0.08,
|
|
81
|
+
"Q4_K_M": -0.05,
|
|
82
|
+
"Q5": -0.02,
|
|
83
|
+
"Q5_K_M": -0.02,
|
|
84
|
+
"Q8_0": 0.0,
|
|
85
|
+
"FP16": +0.05, # Slight quality boost for full precision
|
|
86
|
+
}
|
|
87
|
+
return penalties.get(quant, 0.0)
|
|
88
|
+
|
|
89
|
+
def score(
|
|
90
|
+
self,
|
|
91
|
+
model: ModelDescriptor,
|
|
92
|
+
task_category: str,
|
|
93
|
+
required_tokens: int = 0,
|
|
94
|
+
latency_requirement: str = "medium",
|
|
95
|
+
profile: ModelProfile | None = None,
|
|
96
|
+
local_preferred: bool = False,
|
|
97
|
+
) -> float:
|
|
98
|
+
"""
|
|
99
|
+
Calculate aggregated suitability score (0.0 - 1.0) for a model based on task constraints.
|
|
100
|
+
|
|
101
|
+
agg_score = w_cap * cap_match + w_ctx * ctx_fit + w_speed * speed + w_rel * reliability - w_cost * cost
|
|
102
|
+
"""
|
|
103
|
+
# 1. Base Capability Score (0.0 to 1.0)
|
|
104
|
+
cap_score = self._calculate_capability_score(model, task_category)
|
|
105
|
+
|
|
106
|
+
# Apply model family adjustments
|
|
107
|
+
family = self._detect_model_family(model.model_id)
|
|
108
|
+
family_adj = self._get_family_capability_adjustments(family, task_category)
|
|
109
|
+
cap_score = max(0.0, min(1.0, cap_score + family_adj))
|
|
110
|
+
|
|
111
|
+
# Apply quantization penalty for reasoning-heavy tasks
|
|
112
|
+
if task_category in ("reasoning", "planning"):
|
|
113
|
+
quant_penalty = self._get_quantization_penalty(model)
|
|
114
|
+
cap_score = max(0.0, min(1.0, cap_score + quant_penalty))
|
|
115
|
+
|
|
116
|
+
# 2. Context Fit Score (0.0 to 1.0)
|
|
117
|
+
ctx_score = self._calculate_context_score(model.context_length, required_tokens)
|
|
118
|
+
|
|
119
|
+
# 3. Speed / Performance Score (0.0 to 1.0)
|
|
120
|
+
speed_score = self._calculate_speed_score(model, latency_requirement, profile)
|
|
121
|
+
|
|
122
|
+
# 4. Reliability / Validity Score (0.0 to 1.0)
|
|
123
|
+
reliability_score = self._calculate_reliability_score(model, profile, local_preferred)
|
|
124
|
+
|
|
125
|
+
# 5. Cost Penalty (0.0 to 1.0)
|
|
126
|
+
cost_penalty = self._calculate_cost_penalty(model)
|
|
127
|
+
|
|
128
|
+
# Aggregate weighted components
|
|
129
|
+
total_score = (
|
|
130
|
+
self.w_capability * cap_score
|
|
131
|
+
+ self.w_context * ctx_score
|
|
132
|
+
+ self.w_speed * speed_score
|
|
133
|
+
+ self.w_reliability * reliability_score
|
|
134
|
+
- self.w_cost * cost_penalty
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
return max(0.0, min(1.0, total_score))
|
|
138
|
+
|
|
139
|
+
def _calculate_capability_score(self, model: ModelDescriptor, task_category: str) -> float:
|
|
140
|
+
"""Evaluate how well model capability level matches task category."""
|
|
141
|
+
capabilities = getattr(model, "capabilities", None)
|
|
142
|
+
if not capabilities:
|
|
143
|
+
return 0.25 # Basic fallback
|
|
144
|
+
|
|
145
|
+
level = CapabilityLevel.NONE
|
|
146
|
+
# Extract capability level from Pydantic model or dictionary representation
|
|
147
|
+
if isinstance(capabilities, dict):
|
|
148
|
+
level_val = capabilities.get(task_category, CapabilityLevel.NONE)
|
|
149
|
+
if isinstance(level_val, int):
|
|
150
|
+
level = CapabilityLevel(level_val)
|
|
151
|
+
else:
|
|
152
|
+
if hasattr(capabilities, task_category):
|
|
153
|
+
level = getattr(capabilities, task_category)
|
|
154
|
+
|
|
155
|
+
# Score mapping
|
|
156
|
+
level_score_map = {
|
|
157
|
+
CapabilityLevel.NONE: 0.0,
|
|
158
|
+
CapabilityLevel.BASIC: 0.2,
|
|
159
|
+
CapabilityLevel.INTERMEDIATE: 0.5,
|
|
160
|
+
CapabilityLevel.ADVANCED: 0.8,
|
|
161
|
+
CapabilityLevel.EXPERT: 1.0,
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
return level_score_map.get(level, 0.2)
|
|
165
|
+
|
|
166
|
+
def _calculate_context_score(self, context_length: int, required_tokens: int) -> float:
|
|
167
|
+
"""Evaluate how well context window size fits required token limits."""
|
|
168
|
+
if required_tokens <= 0:
|
|
169
|
+
return 1.0
|
|
170
|
+
|
|
171
|
+
if context_length >= required_tokens:
|
|
172
|
+
# Having extra headroom is good, but value decays as ratio grows
|
|
173
|
+
ratio = context_length / required_tokens
|
|
174
|
+
return min(1.0, 0.8 + 0.2 / ratio)
|
|
175
|
+
else:
|
|
176
|
+
# Severe penalty for context overflow
|
|
177
|
+
return max(0.0, (context_length / required_tokens) * 0.5)
|
|
178
|
+
|
|
179
|
+
def _calculate_speed_score(
|
|
180
|
+
self, model: ModelDescriptor, latency_requirement: str, profile: ModelProfile | None
|
|
181
|
+
) -> float:
|
|
182
|
+
"""Calculate speed score using empirical metrics (TPS/TTFT) if available, falling back to static tiers."""
|
|
183
|
+
# Dynamic scoring if profile metrics exist
|
|
184
|
+
if profile and profile.tps > 0:
|
|
185
|
+
# Estimate speed based on empirical tokens per second. (assume 80 TPS is maximum optimal score)
|
|
186
|
+
empirical_tps_score = min(1.0, profile.tps / 80.0)
|
|
187
|
+
|
|
188
|
+
# Penalize long TTFT (assume > 1.5 seconds starts decaying score)
|
|
189
|
+
ttft_penalty = (
|
|
190
|
+
max(0.0, min(0.5, (profile.ttft_ms - 1500.0) / 3000.0))
|
|
191
|
+
if profile.ttft_ms > 0
|
|
192
|
+
else 0.0
|
|
193
|
+
)
|
|
194
|
+
return max(0.1, empirical_tps_score - ttft_penalty)
|
|
195
|
+
|
|
196
|
+
# Fallback to static speed tiers
|
|
197
|
+
speed_map = {"fast": 1.0, "medium": 0.6, "slow": 0.3}
|
|
198
|
+
model_speed = speed_map.get(model.speed_tier, 0.6)
|
|
199
|
+
|
|
200
|
+
req_map = {"fast": 1.0, "medium": 0.6, "slow": 0.3}
|
|
201
|
+
req_speed = req_map.get(latency_requirement, 0.6)
|
|
202
|
+
|
|
203
|
+
if model_speed >= req_speed:
|
|
204
|
+
return 1.0
|
|
205
|
+
return model_speed / req_speed
|
|
206
|
+
|
|
207
|
+
def _calculate_reliability_score(
|
|
208
|
+
self, model: ModelDescriptor, profile: ModelProfile | None, local_preferred: bool
|
|
209
|
+
) -> float:
|
|
210
|
+
"""Determine reliability and preference score based on locality and validation history."""
|
|
211
|
+
score = 0.9 # Baseline reliability
|
|
212
|
+
|
|
213
|
+
if model.is_local:
|
|
214
|
+
# Boost if local models are requested
|
|
215
|
+
score += 0.1 if local_preferred else 0.05
|
|
216
|
+
else:
|
|
217
|
+
# Slight penalty if we strictly prefer local running
|
|
218
|
+
score -= 0.2 if local_preferred else 0.0
|
|
219
|
+
|
|
220
|
+
# Empirical JSON formatting validity penalty
|
|
221
|
+
if profile and profile.json_validity < 1.0:
|
|
222
|
+
score -= (1.0 - profile.json_validity) * 0.5
|
|
223
|
+
|
|
224
|
+
return max(0.0, min(1.0, score))
|
|
225
|
+
|
|
226
|
+
def _calculate_cost_penalty(self, model: ModelDescriptor) -> float:
|
|
227
|
+
"""Calculate score penalty based on token cost."""
|
|
228
|
+
cost = model.cost_per_1k_tokens
|
|
229
|
+
if cost is None or cost <= 0.0:
|
|
230
|
+
return 0.0 # Zero cost for local offline models
|
|
231
|
+
|
|
232
|
+
# Standardize cost penalty (assuming max expected cost is $0.15 per 1k tokens)
|
|
233
|
+
return min(1.0, cost / 0.15)
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
"""Model specialization mapper for the Reasoning Council with role-specific context optimizations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from enum import StrEnum
|
|
7
|
+
|
|
8
|
+
from velune.core.types.model import ModelDescriptor
|
|
9
|
+
from velune.models.profiler import ModelProfiler
|
|
10
|
+
from velune.models.registry import ModelCapabilityRegistry
|
|
11
|
+
from velune.models.scorer import ModelScorer
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger("velune.models.specializations")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class CouncilRole(StrEnum):
|
|
17
|
+
"""Roles in the Velune Reasoning Council."""
|
|
18
|
+
|
|
19
|
+
PLANNER = "planner"
|
|
20
|
+
CODER = "coder"
|
|
21
|
+
REVIEWER = "reviewer"
|
|
22
|
+
CHALLENGER = "challenger"
|
|
23
|
+
SYNTHESIZER = "synthesizer"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
ROLE_CONTEXT_REQUIREMENTS = {
|
|
27
|
+
CouncilRole.PLANNER: 16384, # Needs full repo context
|
|
28
|
+
CouncilRole.CODER: 32768, # Needs code + context + plan
|
|
29
|
+
CouncilRole.REVIEWER: 32768, # Needs to see full code
|
|
30
|
+
CouncilRole.CHALLENGER: 16384, # Needs code summary
|
|
31
|
+
CouncilRole.SYNTHESIZER: 65536, # Needs all outputs
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class ModelSpecializationMapper:
|
|
36
|
+
"""Intelligent mapper that assigns discovered models to council roles based on scoring and role-specific context bounds."""
|
|
37
|
+
|
|
38
|
+
def __init__(
|
|
39
|
+
self,
|
|
40
|
+
registry: ModelCapabilityRegistry,
|
|
41
|
+
scorer: ModelScorer | None = None,
|
|
42
|
+
profiler: ModelProfiler | None = None,
|
|
43
|
+
) -> None:
|
|
44
|
+
self.registry = registry
|
|
45
|
+
self.scorer = scorer or ModelScorer()
|
|
46
|
+
self.profiler = profiler or ModelProfiler()
|
|
47
|
+
self.overrides: dict[CouncilRole, str] = {}
|
|
48
|
+
|
|
49
|
+
def map_roles(
|
|
50
|
+
self,
|
|
51
|
+
task_category: str = "coding",
|
|
52
|
+
required_tokens: int | None = None,
|
|
53
|
+
local_preferred: bool = False,
|
|
54
|
+
) -> dict[CouncilRole, ModelDescriptor]:
|
|
55
|
+
"""
|
|
56
|
+
Assigns the best available model for each CouncilRole based on their functional profiles and optimal context token sizes.
|
|
57
|
+
|
|
58
|
+
- Planner: High planning and instruction-following scores (optimizes for 16k context window).
|
|
59
|
+
- Coder: High coding and tool-use scores (optimizes for 32k context window).
|
|
60
|
+
- Reviewer: High reasoning and instruction-following scores (optimizes for 32k context window).
|
|
61
|
+
- Challenger: High reasoning and adversarial analysis capabilities (optimizes for 16k context window).
|
|
62
|
+
- Synthesizer: High summarization and context window capability (optimizes for 64k context window).
|
|
63
|
+
"""
|
|
64
|
+
models = self.registry.list_all()
|
|
65
|
+
if not models:
|
|
66
|
+
logger.warning(
|
|
67
|
+
"No models found in the capability registry. Council mappings will be empty."
|
|
68
|
+
)
|
|
69
|
+
return {}
|
|
70
|
+
|
|
71
|
+
assignments: dict[CouncilRole, ModelDescriptor] = {}
|
|
72
|
+
|
|
73
|
+
def get_tokens(role: CouncilRole) -> int:
|
|
74
|
+
if required_tokens is not None:
|
|
75
|
+
if role == CouncilRole.REVIEWER:
|
|
76
|
+
return required_tokens + 2048
|
|
77
|
+
elif role == CouncilRole.SYNTHESIZER:
|
|
78
|
+
return required_tokens + 4096
|
|
79
|
+
else:
|
|
80
|
+
return required_tokens
|
|
81
|
+
return ROLE_CONTEXT_REQUIREMENTS[role]
|
|
82
|
+
|
|
83
|
+
# 1. Map Planner
|
|
84
|
+
planner_model = self._select_best_model(
|
|
85
|
+
models=models,
|
|
86
|
+
role_category="planning",
|
|
87
|
+
required_tokens=get_tokens(CouncilRole.PLANNER),
|
|
88
|
+
latency_requirement="medium",
|
|
89
|
+
local_preferred=local_preferred,
|
|
90
|
+
)
|
|
91
|
+
if planner_model:
|
|
92
|
+
assignments[CouncilRole.PLANNER] = planner_model
|
|
93
|
+
|
|
94
|
+
# 2. Map Coder
|
|
95
|
+
coder_model = self._select_best_model(
|
|
96
|
+
models=models,
|
|
97
|
+
role_category="coding",
|
|
98
|
+
required_tokens=get_tokens(CouncilRole.CODER),
|
|
99
|
+
latency_requirement="medium",
|
|
100
|
+
local_preferred=local_preferred,
|
|
101
|
+
)
|
|
102
|
+
if coder_model:
|
|
103
|
+
assignments[CouncilRole.CODER] = coder_model
|
|
104
|
+
|
|
105
|
+
# 3. Map Reviewer (prefers slower, highly capable reasoning models)
|
|
106
|
+
reviewer_model = self._select_best_model(
|
|
107
|
+
models=models,
|
|
108
|
+
role_category="reasoning",
|
|
109
|
+
required_tokens=get_tokens(CouncilRole.REVIEWER),
|
|
110
|
+
latency_requirement="slow",
|
|
111
|
+
local_preferred=local_preferred,
|
|
112
|
+
)
|
|
113
|
+
if reviewer_model:
|
|
114
|
+
assignments[CouncilRole.REVIEWER] = reviewer_model
|
|
115
|
+
|
|
116
|
+
# 4. Map Challenger (needs strong reasoning)
|
|
117
|
+
challenger_model = self._select_best_model(
|
|
118
|
+
models=models,
|
|
119
|
+
role_category="reasoning",
|
|
120
|
+
required_tokens=get_tokens(CouncilRole.CHALLENGER),
|
|
121
|
+
latency_requirement="medium",
|
|
122
|
+
local_preferred=local_preferred,
|
|
123
|
+
)
|
|
124
|
+
if challenger_model:
|
|
125
|
+
assignments[CouncilRole.CHALLENGER] = challenger_model
|
|
126
|
+
|
|
127
|
+
# 5. Map Synthesizer (prefers faster summarization models with large context)
|
|
128
|
+
synthesizer_model = self._select_best_model(
|
|
129
|
+
models=models,
|
|
130
|
+
role_category="summarization",
|
|
131
|
+
required_tokens=get_tokens(CouncilRole.SYNTHESIZER),
|
|
132
|
+
latency_requirement="fast",
|
|
133
|
+
local_preferred=local_preferred,
|
|
134
|
+
)
|
|
135
|
+
if synthesizer_model:
|
|
136
|
+
assignments[CouncilRole.SYNTHESIZER] = synthesizer_model
|
|
137
|
+
|
|
138
|
+
# Ensure we have fallbacks for all roles if any fail to map
|
|
139
|
+
if models:
|
|
140
|
+
default_model = models[0]
|
|
141
|
+
for role in CouncilRole:
|
|
142
|
+
if role not in assignments:
|
|
143
|
+
logger.info(
|
|
144
|
+
"Falling back role %s to default model %s",
|
|
145
|
+
role.value,
|
|
146
|
+
default_model.model_id,
|
|
147
|
+
)
|
|
148
|
+
assignments[role] = default_model
|
|
149
|
+
|
|
150
|
+
# Apply explicitly assigned overrides
|
|
151
|
+
for role, overridden_model_id in self.overrides.items():
|
|
152
|
+
descriptor = self.registry.get(overridden_model_id)
|
|
153
|
+
if descriptor:
|
|
154
|
+
assignments[role] = descriptor
|
|
155
|
+
|
|
156
|
+
return assignments
|
|
157
|
+
|
|
158
|
+
def _select_best_model(
|
|
159
|
+
self,
|
|
160
|
+
models: list[ModelDescriptor],
|
|
161
|
+
role_category: str,
|
|
162
|
+
required_tokens: int,
|
|
163
|
+
latency_requirement: str,
|
|
164
|
+
local_preferred: bool,
|
|
165
|
+
) -> ModelDescriptor | None:
|
|
166
|
+
"""Helper to score all models and select the highest scoring candidate."""
|
|
167
|
+
try:
|
|
168
|
+
from velune.kernel.registry import get_container
|
|
169
|
+
|
|
170
|
+
gpu_info = get_container().get("runtime.gpu_info")
|
|
171
|
+
available_vram_gb = gpu_info.get("vram_free_gb")
|
|
172
|
+
except Exception:
|
|
173
|
+
available_vram_gb = None
|
|
174
|
+
|
|
175
|
+
best_model: ModelDescriptor | None = None
|
|
176
|
+
best_score = -1.0
|
|
177
|
+
|
|
178
|
+
for model in models:
|
|
179
|
+
# VRAM check for local models
|
|
180
|
+
if model.is_local and available_vram_gb is not None:
|
|
181
|
+
required_vram = model.vram_required_gb
|
|
182
|
+
if required_vram and required_vram > available_vram_gb:
|
|
183
|
+
logger.info(
|
|
184
|
+
"Skipping %s: requires %.1fGB VRAM, only %.1fGB available",
|
|
185
|
+
model.model_id,
|
|
186
|
+
required_vram,
|
|
187
|
+
available_vram_gb,
|
|
188
|
+
)
|
|
189
|
+
continue # Skip models that won't fit in VRAM
|
|
190
|
+
|
|
191
|
+
profile = self.profiler.get_profile(model.provider_id, model.model_id)
|
|
192
|
+
score = self.scorer.score(
|
|
193
|
+
model=model,
|
|
194
|
+
task_category=role_category,
|
|
195
|
+
required_tokens=required_tokens,
|
|
196
|
+
latency_requirement=latency_requirement,
|
|
197
|
+
profile=profile,
|
|
198
|
+
local_preferred=local_preferred,
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
if score > best_score:
|
|
202
|
+
best_score = score
|
|
203
|
+
best_model = model
|
|
204
|
+
|
|
205
|
+
return best_model
|