velune-cli 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- velune/__init__.py +5 -0
- velune/__main__.py +6 -0
- velune/cli/__init__.py +5 -0
- velune/cli/app.py +208 -0
- velune/cli/autocomplete.py +80 -0
- velune/cli/banner.py +60 -0
- velune/cli/commands/__init__.py +32 -0
- velune/cli/commands/ask.py +175 -0
- velune/cli/commands/base.py +16 -0
- velune/cli/commands/chat.py +228 -0
- velune/cli/commands/config.py +224 -0
- velune/cli/commands/daemon.py +88 -0
- velune/cli/commands/doctor.py +721 -0
- velune/cli/commands/init.py +170 -0
- velune/cli/commands/mcp.py +82 -0
- velune/cli/commands/memory.py +293 -0
- velune/cli/commands/models.py +683 -0
- velune/cli/commands/preflight.py +95 -0
- velune/cli/commands/run.py +270 -0
- velune/cli/commands/setup.py +184 -0
- velune/cli/commands/workspace.py +249 -0
- velune/cli/context.py +36 -0
- velune/cli/councilmodel_ui.py +199 -0
- velune/cli/display/council_view.py +254 -0
- velune/cli/display/memory_view.py +126 -0
- velune/cli/display/panels.py +35 -0
- velune/cli/display/progress.py +25 -0
- velune/cli/display/themes.py +25 -0
- velune/cli/main.py +15 -0
- velune/cli/model_selector.py +51 -0
- velune/cli/modes.py +86 -0
- velune/cli/pull_ui.py +123 -0
- velune/cli/registry.py +80 -0
- velune/cli/rendering/__init__.py +5 -0
- velune/cli/rendering/error_panel.py +79 -0
- velune/cli/rendering/markdown.py +63 -0
- velune/cli/repl.py +1855 -0
- velune/cli/session_manager.py +71 -0
- velune/cli/slash_commands.py +37 -0
- velune/cli/theme.py +8 -0
- velune/cognition/__init__.py +23 -0
- velune/cognition/agents/__init__.py +7 -0
- velune/cognition/agents/coder.py +209 -0
- velune/cognition/agents/planner.py +156 -0
- velune/cognition/agents/reviewer.py +195 -0
- velune/cognition/arbitrator.py +220 -0
- velune/cognition/architecture.py +415 -0
- velune/cognition/budget.py +65 -0
- velune/cognition/council/__init__.py +47 -0
- velune/cognition/council/base.py +217 -0
- velune/cognition/council/challenger.py +74 -0
- velune/cognition/council/coder.py +79 -0
- velune/cognition/council/critic_agent.py +43 -0
- velune/cognition/council/critic_configs.py +111 -0
- velune/cognition/council/critics.py +41 -0
- velune/cognition/council/debate.py +46 -0
- velune/cognition/council/factory.py +140 -0
- velune/cognition/council/messages.py +56 -0
- velune/cognition/council/planner.py +124 -0
- velune/cognition/council/reviewer.py +74 -0
- velune/cognition/council/synthesizer.py +67 -0
- velune/cognition/council/tiers.py +188 -0
- velune/cognition/council_orchestrator.py +282 -0
- velune/cognition/firewall.py +354 -0
- velune/cognition/module.py +46 -0
- velune/cognition/orchestrator.py +1205 -0
- velune/cognition/personality.py +238 -0
- velune/cognition/state.py +104 -0
- velune/cognition/style_resolver.py +64 -0
- velune/cognition/verification.py +205 -0
- velune/context/__init__.py +28 -0
- velune/context/assembler.py +240 -0
- velune/context/budget.py +97 -0
- velune/context/extractive.py +95 -0
- velune/context/prompt_adaptation.py +480 -0
- velune/context/sections.py +99 -0
- velune/context/token_counter.py +134 -0
- velune/context/utilization.py +33 -0
- velune/context/window.py +63 -0
- velune/core/__init__.py +89 -0
- velune/core/background.py +5 -0
- velune/core/config/__init__.py +37 -0
- velune/core/errors/__init__.py +90 -0
- velune/core/errors/catalog.py +188 -0
- velune/core/errors/execution.py +31 -0
- velune/core/errors/memory.py +25 -0
- velune/core/errors/orchestration.py +31 -0
- velune/core/errors/provider.py +37 -0
- velune/core/event_loop.py +35 -0
- velune/core/logging.py +83 -0
- velune/core/paths.py +165 -0
- velune/core/runtime.py +113 -0
- velune/core/startup_profiler.py +56 -0
- velune/core/task_registry.py +117 -0
- velune/core/trace.py +83 -0
- velune/core/types/__init__.py +48 -0
- velune/core/types/agent.py +53 -0
- velune/core/types/context.py +42 -0
- velune/core/types/inference.py +38 -0
- velune/core/types/memory.py +42 -0
- velune/core/types/model.py +70 -0
- velune/core/types/provider.py +62 -0
- velune/core/types/repository.py +38 -0
- velune/core/types/task.py +61 -0
- velune/core/types/workspace.py +28 -0
- velune/daemon/client.py +13 -0
- velune/daemon/server.py +127 -0
- velune/daemon/transport.py +179 -0
- velune/events.py +204 -0
- velune/execution/__init__.py +22 -0
- velune/execution/benchmarker.py +315 -0
- velune/execution/cancellation.py +53 -0
- velune/execution/checkpointer.py +130 -0
- velune/execution/command_spec.py +165 -0
- velune/execution/diff_preview.py +197 -0
- velune/execution/executor.py +181 -0
- velune/execution/module.py +18 -0
- velune/execution/multi_diff.py +67 -0
- velune/execution/path_guard.py +74 -0
- velune/execution/planner.py +91 -0
- velune/execution/rollback.py +89 -0
- velune/execution/sandbox.py +268 -0
- velune/execution/validator.py +115 -0
- velune/hardware/__init__.py +1 -0
- velune/hardware/detector.py +192 -0
- velune/kernel/__init__.py +55 -0
- velune/kernel/bootstrap.py +125 -0
- velune/kernel/config.py +426 -0
- velune/kernel/entrypoint.py +78 -0
- velune/kernel/health.py +54 -0
- velune/kernel/lifecycle.py +143 -0
- velune/kernel/module.py +17 -0
- velune/kernel/modules.py +23 -0
- velune/kernel/registry.py +96 -0
- velune/kernel/schemas.py +28 -0
- velune/main.py +9 -0
- velune/mcp/__init__.py +9 -0
- velune/mcp/client.py +115 -0
- velune/mcp/config.py +19 -0
- velune/mcp/server.py +624 -0
- velune/memory/__init__.py +32 -0
- velune/memory/compaction.py +506 -0
- velune/memory/embedding_pipeline.py +241 -0
- velune/memory/lifecycle.py +680 -0
- velune/memory/module.py +218 -0
- velune/memory/prioritizer.py +67 -0
- velune/memory/storage/episodic_schema.sql +53 -0
- velune/memory/storage/lancedb_store.py +282 -0
- velune/memory/storage/sqlite_manager.py +369 -0
- velune/memory/storage/sqlite_pool.py +149 -0
- velune/memory/tiers/episodic.py +588 -0
- velune/memory/tiers/graph.py +378 -0
- velune/memory/tiers/lineage.py +416 -0
- velune/memory/tiers/semantic.py +475 -0
- velune/memory/tiers/working.py +168 -0
- velune/memory/vitality.py +132 -0
- velune/models/__init__.py +15 -0
- velune/models/family.py +76 -0
- velune/models/module.py +20 -0
- velune/models/probes.py +192 -0
- velune/models/profile_cache.py +84 -0
- velune/models/profiler.py +108 -0
- velune/models/registry.py +251 -0
- velune/models/scorer.py +233 -0
- velune/models/specializations.py +205 -0
- velune/orchestration/__init__.py +19 -0
- velune/orchestration/engine.py +239 -0
- velune/orchestration/module.py +15 -0
- velune/orchestration/role_assignments.py +82 -0
- velune/orchestration/schemas.py +98 -0
- velune/plugins/__init__.py +20 -0
- velune/plugins/hooks.py +50 -0
- velune/plugins/loader.py +161 -0
- velune/plugins/registry.py +56 -0
- velune/plugins/schemas.py +21 -0
- velune/providers/__init__.py +23 -0
- velune/providers/adapters/anthropic.py +257 -0
- velune/providers/adapters/fireworks.py +115 -0
- velune/providers/adapters/google.py +234 -0
- velune/providers/adapters/groq.py +151 -0
- velune/providers/adapters/huggingface.py +210 -0
- velune/providers/adapters/llamacpp.py +208 -0
- velune/providers/adapters/lmstudio.py +175 -0
- velune/providers/adapters/ollama.py +233 -0
- velune/providers/adapters/openai.py +213 -0
- velune/providers/adapters/openrouter.py +81 -0
- velune/providers/adapters/together.py +134 -0
- velune/providers/adapters/xai.py +60 -0
- velune/providers/base.py +86 -0
- velune/providers/benchmarker.py +138 -0
- velune/providers/discovery/__init__.py +33 -0
- velune/providers/discovery/anthropic.py +79 -0
- velune/providers/discovery/benchmarks.py +44 -0
- velune/providers/discovery/classifier.py +69 -0
- velune/providers/discovery/fireworks.py +95 -0
- velune/providers/discovery/gguf.py +88 -0
- velune/providers/discovery/google.py +95 -0
- velune/providers/discovery/gpu.py +117 -0
- velune/providers/discovery/groq.py +21 -0
- velune/providers/discovery/huggingface.py +67 -0
- velune/providers/discovery/lmstudio.py +80 -0
- velune/providers/discovery/ollama.py +162 -0
- velune/providers/discovery/openai.py +96 -0
- velune/providers/discovery/openrouter.py +113 -0
- velune/providers/discovery/scanner.py +115 -0
- velune/providers/discovery/together.py +114 -0
- velune/providers/discovery/xai.py +57 -0
- velune/providers/health.py +67 -0
- velune/providers/health_monitor.py +169 -0
- velune/providers/keystore.py +142 -0
- velune/providers/local_paths.py +49 -0
- velune/providers/local_resolver.py +229 -0
- velune/providers/module.py +51 -0
- velune/providers/ollama_manager.py +193 -0
- velune/providers/registry.py +220 -0
- velune/providers/router.py +255 -0
- velune/providers/task_classifier.py +288 -0
- velune/py.typed +0 -0
- velune/repository/__init__.py +33 -0
- velune/repository/analyzer.py +127 -0
- velune/repository/ast_parser.py +822 -0
- velune/repository/blast_radius.py +298 -0
- velune/repository/boundary_classifier.py +295 -0
- velune/repository/cognition.py +316 -0
- velune/repository/grapher.py +179 -0
- velune/repository/import_graph.py +263 -0
- velune/repository/incremental_indexer.py +275 -0
- velune/repository/index_state.py +96 -0
- velune/repository/indexer.py +243 -0
- velune/repository/module.py +17 -0
- velune/repository/parser.py +474 -0
- velune/repository/project_type.py +300 -0
- velune/repository/rename_journal.py +287 -0
- velune/repository/scanner.py +193 -0
- velune/repository/schemas.py +102 -0
- velune/repository/symbol_registry.py +365 -0
- velune/repository/tracker.py +252 -0
- velune/retrieval/__init__.py +27 -0
- velune/retrieval/cache.py +110 -0
- velune/retrieval/fast_path.py +391 -0
- velune/retrieval/graph.py +124 -0
- velune/retrieval/hybrid.py +271 -0
- velune/retrieval/keyword.py +131 -0
- velune/retrieval/module.py +26 -0
- velune/retrieval/pipeline.py +303 -0
- velune/retrieval/reranker.py +102 -0
- velune/retrieval/schemas.py +59 -0
- velune/retrieval/slow_path.py +364 -0
- velune/retrieval/vector.py +203 -0
- velune/telemetry/__init__.py +59 -0
- velune/telemetry/cognition.py +267 -0
- velune/telemetry/cost_estimator.py +92 -0
- velune/telemetry/debug.py +304 -0
- velune/telemetry/doctor.py +244 -0
- velune/telemetry/logging.py +286 -0
- velune/telemetry/spans.py +277 -0
- velune/telemetry/token_tracker.py +140 -0
- velune/telemetry/usage_tracker.py +340 -0
- velune/tools/__init__.py +41 -0
- velune/tools/base/registry.py +87 -0
- velune/tools/base/tool.py +63 -0
- velune/tools/code/navigate.py +116 -0
- velune/tools/code/search.py +123 -0
- velune/tools/filesystem/read.py +75 -0
- velune/tools/filesystem/search.py +136 -0
- velune/tools/filesystem/write.py +163 -0
- velune/tools/git/history.py +177 -0
- velune/tools/git/operations.py +122 -0
- velune/tools/git/state.py +121 -0
- velune/tools/module.py +81 -0
- velune/tools/terminal/execute.py +72 -0
- velune/tools/terminal/history.py +47 -0
- velune/tools/web/fetch.py +55 -0
- velune/tools/web/validator.py +122 -0
- velune_cli-0.9.0.dist-info/METADATA +518 -0
- velune_cli-0.9.0.dist-info/RECORD +279 -0
- velune_cli-0.9.0.dist-info/WHEEL +4 -0
- velune_cli-0.9.0.dist-info/entry_points.txt +2 -0
- velune_cli-0.9.0.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,683 @@
|
|
|
1
|
+
"""Models command - velune models scan/list/assign."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import typer
|
|
8
|
+
from rich.console import Console
|
|
9
|
+
from rich.table import Table
|
|
10
|
+
|
|
11
|
+
from velune.cli.context import CLIContext
|
|
12
|
+
|
|
13
|
+
console = Console()
|
|
14
|
+
|
|
15
|
+
models_cmd = typer.Typer(help="Model management commands")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@models_cmd.command("scan")
|
|
19
|
+
def models_scan(
|
|
20
|
+
ctx: typer.Context,
|
|
21
|
+
provider: str = typer.Option(None, "--provider", "-p", help="Specific provider to scan"),
|
|
22
|
+
probe: bool = typer.Option(
|
|
23
|
+
False, "--probe", help="Run empirical capability probes synchronously and cache results"
|
|
24
|
+
),
|
|
25
|
+
) -> None:
|
|
26
|
+
"""Scan for available models."""
|
|
27
|
+
cli_context = ctx.obj if isinstance(ctx.obj, CLIContext) else None
|
|
28
|
+
if cli_context is None:
|
|
29
|
+
if ctx.obj and getattr(ctx.obj, "json_mode", False):
|
|
30
|
+
import json
|
|
31
|
+
|
|
32
|
+
print(json.dumps({"error": "Model discovery service is unavailable"}))
|
|
33
|
+
else:
|
|
34
|
+
console.print("[red]Model discovery service is unavailable.[/red]")
|
|
35
|
+
raise typer.Exit(code=1)
|
|
36
|
+
|
|
37
|
+
from velune.core.event_loop import submit
|
|
38
|
+
|
|
39
|
+
records = submit(_models_scan_async(cli_context, provider, probe))
|
|
40
|
+
|
|
41
|
+
from velune.core.types.model import CapabilityLevel
|
|
42
|
+
|
|
43
|
+
if cli_context.json_mode:
|
|
44
|
+
import json
|
|
45
|
+
|
|
46
|
+
out = []
|
|
47
|
+
for record in records:
|
|
48
|
+
capabilities_map = {
|
|
49
|
+
"coding": record.capabilities.coding,
|
|
50
|
+
"reasoning": record.capabilities.reasoning,
|
|
51
|
+
"planning": record.capabilities.planning,
|
|
52
|
+
"summarization": record.capabilities.summarization,
|
|
53
|
+
"tool_use": record.capabilities.tool_use,
|
|
54
|
+
}
|
|
55
|
+
highest_cap = "general"
|
|
56
|
+
highest_level = CapabilityLevel.NONE
|
|
57
|
+
for cap_name, level in capabilities_map.items():
|
|
58
|
+
if level > highest_level:
|
|
59
|
+
highest_level = level
|
|
60
|
+
highest_cap = cap_name
|
|
61
|
+
specialization = highest_cap if highest_level > CapabilityLevel.NONE else "general"
|
|
62
|
+
embedding_supported = record.capabilities.embedding > CapabilityLevel.NONE
|
|
63
|
+
validated = record.metadata.get("validated")
|
|
64
|
+
status = "cached" if validated is None else ("online" if validated else "offline")
|
|
65
|
+
|
|
66
|
+
out.append(
|
|
67
|
+
{
|
|
68
|
+
"provider_id": record.provider_id,
|
|
69
|
+
"model_id": record.model_id,
|
|
70
|
+
"specialization": specialization,
|
|
71
|
+
"speed_tier": record.speed_tier,
|
|
72
|
+
"context_length": record.context_length,
|
|
73
|
+
"embedding_supported": embedding_supported,
|
|
74
|
+
"status": status,
|
|
75
|
+
}
|
|
76
|
+
)
|
|
77
|
+
print(json.dumps(out))
|
|
78
|
+
return
|
|
79
|
+
|
|
80
|
+
table = Table(title="Discovered Models")
|
|
81
|
+
table.add_column("Provider", style="cyan")
|
|
82
|
+
table.add_column("Model", style="green")
|
|
83
|
+
table.add_column("Specialization", style="magenta")
|
|
84
|
+
table.add_column("Speed", style="blue")
|
|
85
|
+
table.add_column("Context", style="yellow")
|
|
86
|
+
table.add_column("Embedding", style="white")
|
|
87
|
+
table.add_column("Status", style="bold")
|
|
88
|
+
|
|
89
|
+
for record in records:
|
|
90
|
+
capabilities_map = {
|
|
91
|
+
"coding": record.capabilities.coding,
|
|
92
|
+
"reasoning": record.capabilities.reasoning,
|
|
93
|
+
"planning": record.capabilities.planning,
|
|
94
|
+
"summarization": record.capabilities.summarization,
|
|
95
|
+
"tool_use": record.capabilities.tool_use,
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
highest_cap = "general"
|
|
99
|
+
highest_level = CapabilityLevel.NONE
|
|
100
|
+
for cap_name, level in capabilities_map.items():
|
|
101
|
+
if level > highest_level:
|
|
102
|
+
highest_level = level
|
|
103
|
+
highest_cap = cap_name
|
|
104
|
+
|
|
105
|
+
specialization = highest_cap if highest_level > CapabilityLevel.NONE else "general"
|
|
106
|
+
embedding_supported = (
|
|
107
|
+
"yes" if record.capabilities.embedding > CapabilityLevel.NONE else "no"
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
validated = record.metadata.get("validated")
|
|
111
|
+
if validated is None:
|
|
112
|
+
status = "[dim]cached[/dim]"
|
|
113
|
+
elif validated:
|
|
114
|
+
status = "[green]●[/green]"
|
|
115
|
+
else:
|
|
116
|
+
status = "[red]✗ offline[/red]"
|
|
117
|
+
|
|
118
|
+
table.add_row(
|
|
119
|
+
record.provider_id,
|
|
120
|
+
record.model_id,
|
|
121
|
+
specialization,
|
|
122
|
+
record.speed_tier,
|
|
123
|
+
str(record.context_length),
|
|
124
|
+
embedding_supported,
|
|
125
|
+
status,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
console.print(table)
|
|
129
|
+
|
|
130
|
+
total = len(records)
|
|
131
|
+
providers = {r.provider_id for r in records}
|
|
132
|
+
console.print(f"[dim]Discovered {total} model(s) across {len(providers)} provider(s).[/dim]")
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
async def _models_scan_async(cli_context: CLIContext, provider_id: str | None, probe: bool) -> Any:
|
|
136
|
+
container = cli_context.container
|
|
137
|
+
lifecycle = container.get("runtime.lifecycle")
|
|
138
|
+
discovery = container.get("runtime.model_discovery")
|
|
139
|
+
provider_registry = container.get("runtime.provider_registry")
|
|
140
|
+
|
|
141
|
+
if probe:
|
|
142
|
+
await lifecycle.startup()
|
|
143
|
+
|
|
144
|
+
try:
|
|
145
|
+
if provider_id:
|
|
146
|
+
records = await discovery.scan_provider(provider_id=provider_id)
|
|
147
|
+
else:
|
|
148
|
+
records = await discovery.scan_all()
|
|
149
|
+
|
|
150
|
+
if probe:
|
|
151
|
+
from pathlib import Path
|
|
152
|
+
|
|
153
|
+
from velune.models.probes import FastProbe, ModelProber
|
|
154
|
+
from velune.models.profile_cache import ModelProfileCache
|
|
155
|
+
|
|
156
|
+
profile_cache = ModelProfileCache(Path(".velune") / "model_profiles.json")
|
|
157
|
+
fast_probe = FastProbe()
|
|
158
|
+
|
|
159
|
+
if not cli_context.json_mode:
|
|
160
|
+
console.print("[bold cyan]⠋[/bold cyan] Probing discovered models synchronously...")
|
|
161
|
+
|
|
162
|
+
probe_tasks = []
|
|
163
|
+
valid_records = []
|
|
164
|
+
|
|
165
|
+
for record in records:
|
|
166
|
+
provider = provider_registry.get(record.provider_id)
|
|
167
|
+
if provider:
|
|
168
|
+
valid_records.append(record)
|
|
169
|
+
probe_tasks.append(fast_probe.ping(provider, record.model_id))
|
|
170
|
+
|
|
171
|
+
if valid_records:
|
|
172
|
+
import asyncio
|
|
173
|
+
|
|
174
|
+
responsiveness = await asyncio.gather(*probe_tasks, return_exceptions=True)
|
|
175
|
+
|
|
176
|
+
empirical_probe_tasks = []
|
|
177
|
+
probing_models = []
|
|
178
|
+
|
|
179
|
+
for record, is_responsive in zip(valid_records, responsiveness, strict=False):
|
|
180
|
+
if is_responsive is True:
|
|
181
|
+
provider = provider_registry.get(record.provider_id)
|
|
182
|
+
prober = ModelProber(provider, record.model_id)
|
|
183
|
+
probing_models.append((record, prober))
|
|
184
|
+
empirical_probe_tasks.append(prober.run_all_probes())
|
|
185
|
+
record.metadata["validated"] = True
|
|
186
|
+
else:
|
|
187
|
+
record.metadata["validated"] = False
|
|
188
|
+
|
|
189
|
+
if empirical_probe_tasks:
|
|
190
|
+
if not cli_context.json_mode:
|
|
191
|
+
console.print(
|
|
192
|
+
f"[bold magenta]⚡ Running empirical capability probes for {len(empirical_probe_tasks)} active model(s)...[/bold magenta]"
|
|
193
|
+
)
|
|
194
|
+
results = await asyncio.gather(*empirical_probe_tasks, return_exceptions=True)
|
|
195
|
+
|
|
196
|
+
for (record, _), result in zip(probing_models, results, strict=False):
|
|
197
|
+
# gather(return_exceptions=True) can also surface BaseException
|
|
198
|
+
# subclasses (e.g. asyncio.CancelledError); treat any of them
|
|
199
|
+
# as a failed probe so they are never cached as valid results.
|
|
200
|
+
if isinstance(result, BaseException):
|
|
201
|
+
if not cli_context.json_mode:
|
|
202
|
+
console.print(
|
|
203
|
+
f"[red]✗[/red] Probe failed for {record.model_id}: {result}"
|
|
204
|
+
)
|
|
205
|
+
continue
|
|
206
|
+
|
|
207
|
+
profile_cache.set(record.model_id, record.provider_id, result)
|
|
208
|
+
|
|
209
|
+
registry = container.get("runtime.model_registry")
|
|
210
|
+
if registry:
|
|
211
|
+
registry._apply_probe_results(record, result)
|
|
212
|
+
registry.register(record)
|
|
213
|
+
|
|
214
|
+
if not cli_context.json_mode:
|
|
215
|
+
console.print(
|
|
216
|
+
"[bold green]✓[/bold green] Empirical benchmarks completed and cached."
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
return records
|
|
220
|
+
finally:
|
|
221
|
+
if probe:
|
|
222
|
+
await lifecycle.shutdown()
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
@models_cmd.command("list")
|
|
226
|
+
def models_list(ctx: typer.Context) -> None:
|
|
227
|
+
"""List registered models."""
|
|
228
|
+
cli_context = ctx.obj if isinstance(ctx.obj, CLIContext) else None
|
|
229
|
+
registry = cli_context.container.get("runtime.model_registry") if cli_context else None
|
|
230
|
+
|
|
231
|
+
if cli_context and cli_context.json_mode:
|
|
232
|
+
import json
|
|
233
|
+
|
|
234
|
+
out = []
|
|
235
|
+
if registry is not None:
|
|
236
|
+
from velune.core.types.model import CapabilityLevel
|
|
237
|
+
|
|
238
|
+
records = registry.list_all()
|
|
239
|
+
for record in records:
|
|
240
|
+
capabilities = []
|
|
241
|
+
for cap_name in [
|
|
242
|
+
"coding",
|
|
243
|
+
"reasoning",
|
|
244
|
+
"planning",
|
|
245
|
+
"summarization",
|
|
246
|
+
"tool_use",
|
|
247
|
+
"long_context",
|
|
248
|
+
]:
|
|
249
|
+
level = getattr(record.capabilities, cap_name, None)
|
|
250
|
+
if level and level > CapabilityLevel.NONE:
|
|
251
|
+
capabilities.append(cap_name)
|
|
252
|
+
out.append(
|
|
253
|
+
{
|
|
254
|
+
"model_id": record.model_id,
|
|
255
|
+
"display_name": record.display_name,
|
|
256
|
+
"provider_id": record.provider_id,
|
|
257
|
+
"capabilities": capabilities,
|
|
258
|
+
}
|
|
259
|
+
)
|
|
260
|
+
print(json.dumps(out))
|
|
261
|
+
return
|
|
262
|
+
|
|
263
|
+
table = Table(title="Registered Models")
|
|
264
|
+
table.add_column("ID", style="cyan")
|
|
265
|
+
table.add_column("Name", style="green")
|
|
266
|
+
table.add_column("Provider", style="magenta")
|
|
267
|
+
table.add_column("Capabilities", style="blue")
|
|
268
|
+
|
|
269
|
+
records = []
|
|
270
|
+
if registry is None:
|
|
271
|
+
table.add_row("<uninitialized>", "Velune", "system", "bootstrap only")
|
|
272
|
+
else:
|
|
273
|
+
from velune.core.types.model import CapabilityLevel
|
|
274
|
+
|
|
275
|
+
records = registry.list_all()
|
|
276
|
+
for record in records:
|
|
277
|
+
capabilities = []
|
|
278
|
+
for cap_name in [
|
|
279
|
+
"coding",
|
|
280
|
+
"reasoning",
|
|
281
|
+
"planning",
|
|
282
|
+
"summarization",
|
|
283
|
+
"tool_use",
|
|
284
|
+
"long_context",
|
|
285
|
+
]:
|
|
286
|
+
level = getattr(record.capabilities, cap_name, None)
|
|
287
|
+
if level and level > CapabilityLevel.NONE:
|
|
288
|
+
capabilities.append(f"{cap_name} ({level.name})")
|
|
289
|
+
|
|
290
|
+
table.add_row(
|
|
291
|
+
record.model_id,
|
|
292
|
+
record.display_name,
|
|
293
|
+
record.provider_id,
|
|
294
|
+
", ".join(capabilities) or "none",
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
console.print(table)
|
|
298
|
+
|
|
299
|
+
# Get GPU info and show VRAM details
|
|
300
|
+
gpu_info = None
|
|
301
|
+
if cli_context:
|
|
302
|
+
try:
|
|
303
|
+
gpu_info = cli_context.container.get("runtime.gpu_info")
|
|
304
|
+
except Exception:
|
|
305
|
+
pass
|
|
306
|
+
|
|
307
|
+
if gpu_info and gpu_info.get("has_gpu"):
|
|
308
|
+
free_gb = gpu_info.get("vram_free_gb")
|
|
309
|
+
if free_gb is not None:
|
|
310
|
+
console.print(f"[dim]Available VRAM: {free_gb:.1f}GB[/dim]")
|
|
311
|
+
|
|
312
|
+
over_budget = [
|
|
313
|
+
m for m in records if m.vram_required_gb and m.vram_required_gb > free_gb
|
|
314
|
+
]
|
|
315
|
+
if over_budget:
|
|
316
|
+
console.print(f"[yellow]⚠ {len(over_budget)} models exceed available VRAM[/yellow]")
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
@models_cmd.command("assign")
|
|
320
|
+
def models_assign(
|
|
321
|
+
ctx: typer.Context,
|
|
322
|
+
role: str = typer.Argument(
|
|
323
|
+
..., help="Agent role (planner, coder, reviewer, challenger, synthesizer)"
|
|
324
|
+
),
|
|
325
|
+
model_id: str = typer.Argument(..., help="Model ID to assign"),
|
|
326
|
+
) -> None:
|
|
327
|
+
"""Assign a model to an agent role."""
|
|
328
|
+
cli_context = ctx.obj if isinstance(ctx.obj, CLIContext) else None
|
|
329
|
+
orchestrator = (
|
|
330
|
+
cli_context.container.get("runtime.council_orchestrator") if cli_context else None
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
if orchestrator is None:
|
|
334
|
+
if cli_context and cli_context.json_mode:
|
|
335
|
+
import json
|
|
336
|
+
|
|
337
|
+
print(json.dumps({"error": "Council orchestrator is unavailable"}))
|
|
338
|
+
else:
|
|
339
|
+
console.print("[red]Council orchestrator is unavailable.[/red]")
|
|
340
|
+
raise typer.Exit(code=1)
|
|
341
|
+
|
|
342
|
+
mapper = orchestrator.mapper
|
|
343
|
+
try:
|
|
344
|
+
from velune.models.specializations import CouncilRole
|
|
345
|
+
|
|
346
|
+
council_role = CouncilRole(role.lower())
|
|
347
|
+
except ValueError:
|
|
348
|
+
if cli_context and cli_context.json_mode:
|
|
349
|
+
import json
|
|
350
|
+
|
|
351
|
+
print(json.dumps({"error": f"Invalid role '{role}'"}))
|
|
352
|
+
else:
|
|
353
|
+
console.print(
|
|
354
|
+
f"[red]Invalid role '{role}'. Must be one of: planner, coder, reviewer, challenger, synthesizer[/red]"
|
|
355
|
+
)
|
|
356
|
+
raise typer.Exit(code=1)
|
|
357
|
+
|
|
358
|
+
# Check if model exists
|
|
359
|
+
registry = cli_context.container.get("runtime.model_registry") if cli_context else None
|
|
360
|
+
if registry:
|
|
361
|
+
descriptor = registry.get(model_id)
|
|
362
|
+
if not descriptor and not (cli_context and cli_context.json_mode):
|
|
363
|
+
console.print(
|
|
364
|
+
f"[yellow]Warning: Model '{model_id}' is not currently registered/discovered.[/yellow]"
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
mapper.overrides[council_role] = model_id
|
|
368
|
+
if cli_context and cli_context.json_mode:
|
|
369
|
+
import json
|
|
370
|
+
|
|
371
|
+
print(json.dumps({"success": True, "role": council_role.value, "model_id": model_id}))
|
|
372
|
+
else:
|
|
373
|
+
console.print(
|
|
374
|
+
f"[green]Successfully assigned role '{council_role.value}' to model '{model_id}' for the current runtime context.[/green]"
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
@models_cmd.command("benchmark")
|
|
379
|
+
def models_benchmark(
|
|
380
|
+
ctx: typer.Context,
|
|
381
|
+
model_id: str = typer.Argument(
|
|
382
|
+
None, help="Specific model ID to benchmark. If omitted, benchmarks all registered models."
|
|
383
|
+
),
|
|
384
|
+
) -> None:
|
|
385
|
+
"""Run capability probes on a specific model or all registered models."""
|
|
386
|
+
cli_context = ctx.obj if isinstance(ctx.obj, CLIContext) else None
|
|
387
|
+
if not cli_context:
|
|
388
|
+
if ctx.obj and getattr(ctx.obj, "json_mode", False):
|
|
389
|
+
import json
|
|
390
|
+
|
|
391
|
+
print(json.dumps({"error": "CLI context is unavailable"}))
|
|
392
|
+
else:
|
|
393
|
+
console.print("[red]CLI context is unavailable.[/red]")
|
|
394
|
+
raise typer.Exit(code=1)
|
|
395
|
+
|
|
396
|
+
registry = cli_context.container.get("runtime.model_registry")
|
|
397
|
+
provider_registry = cli_context.container.get("runtime.provider_registry")
|
|
398
|
+
|
|
399
|
+
if registry is None or provider_registry is None:
|
|
400
|
+
if cli_context.json_mode:
|
|
401
|
+
import json
|
|
402
|
+
|
|
403
|
+
print(json.dumps({"error": "Model registry or provider registry is unavailable"}))
|
|
404
|
+
else:
|
|
405
|
+
console.print("[red]Model registry or provider registry is unavailable.[/red]")
|
|
406
|
+
raise typer.Exit(code=1)
|
|
407
|
+
|
|
408
|
+
# Get the models to benchmark
|
|
409
|
+
models_to_probe = []
|
|
410
|
+
if model_id:
|
|
411
|
+
model = registry.get(model_id)
|
|
412
|
+
if not model:
|
|
413
|
+
if cli_context.json_mode:
|
|
414
|
+
import json
|
|
415
|
+
|
|
416
|
+
print(json.dumps({"error": f"Model '{model_id}' is not registered"}))
|
|
417
|
+
else:
|
|
418
|
+
console.print(f"[red]Model '{model_id}' is not registered.[/red]")
|
|
419
|
+
raise typer.Exit(code=1)
|
|
420
|
+
models_to_probe.append(model)
|
|
421
|
+
else:
|
|
422
|
+
models_to_probe = registry.list_all()
|
|
423
|
+
|
|
424
|
+
if not models_to_probe:
|
|
425
|
+
if cli_context.json_mode:
|
|
426
|
+
import json
|
|
427
|
+
|
|
428
|
+
print(json.dumps([]))
|
|
429
|
+
else:
|
|
430
|
+
console.print("[yellow]No models registered for benchmarking.[/yellow]")
|
|
431
|
+
return
|
|
432
|
+
|
|
433
|
+
from velune.core.event_loop import submit
|
|
434
|
+
|
|
435
|
+
submit(_models_benchmark_async(cli_context, registry, provider_registry, models_to_probe))
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
async def _models_benchmark_async(
|
|
439
|
+
cli_context: CLIContext,
|
|
440
|
+
registry: Any,
|
|
441
|
+
provider_registry: Any,
|
|
442
|
+
models_to_probe: list[Any],
|
|
443
|
+
) -> None:
|
|
444
|
+
import json
|
|
445
|
+
from pathlib import Path
|
|
446
|
+
|
|
447
|
+
from rich.progress import BarColumn, Progress, TaskProgressColumn, TextColumn
|
|
448
|
+
|
|
449
|
+
from velune.models.probes import ModelProber
|
|
450
|
+
from velune.models.profile_cache import ModelProfileCache
|
|
451
|
+
|
|
452
|
+
profile_cache = ModelProfileCache(Path(".velune") / "model_profiles.json")
|
|
453
|
+
|
|
454
|
+
# Store benchmark results for auto-assignment
|
|
455
|
+
benchmark_results = []
|
|
456
|
+
|
|
457
|
+
if not cli_context.json_mode:
|
|
458
|
+
with Progress(
|
|
459
|
+
TextColumn("[progress.description]{task.description}"),
|
|
460
|
+
BarColumn(),
|
|
461
|
+
TaskProgressColumn(),
|
|
462
|
+
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
|
|
463
|
+
) as progress:
|
|
464
|
+
task_id = progress.add_task("[cyan]Benchmarking models...", total=len(models_to_probe))
|
|
465
|
+
|
|
466
|
+
for model in models_to_probe:
|
|
467
|
+
provider = provider_registry.get(model.provider_id)
|
|
468
|
+
if not provider:
|
|
469
|
+
console.print(
|
|
470
|
+
f"[yellow]⊘[/yellow] {model.model_id}: Provider '{model.provider_id}' unavailable"
|
|
471
|
+
)
|
|
472
|
+
progress.advance(task_id)
|
|
473
|
+
continue
|
|
474
|
+
|
|
475
|
+
progress.update(task_id, description=f"[cyan]Testing {model.model_id}...")
|
|
476
|
+
|
|
477
|
+
prober = ModelProber(provider, model.model_id)
|
|
478
|
+
results = await prober.run_all_probes()
|
|
479
|
+
|
|
480
|
+
# Save to cache and registry
|
|
481
|
+
profile_cache.set(model.model_id, model.provider_id, results)
|
|
482
|
+
registry._apply_probe_results(model, results)
|
|
483
|
+
|
|
484
|
+
coding = results["coding"]
|
|
485
|
+
reasoning = results["reasoning"]
|
|
486
|
+
instruction = results["instruction"]
|
|
487
|
+
|
|
488
|
+
# Calculate speed score as average of latencies (lower is better)
|
|
489
|
+
latencies = [
|
|
490
|
+
lat
|
|
491
|
+
for lat in [coding.latency_ms, reasoning.latency_ms, instruction.latency_ms]
|
|
492
|
+
if lat > 0
|
|
493
|
+
]
|
|
494
|
+
avg_latency = sum(latencies) / len(latencies) if latencies else 0
|
|
495
|
+
speed_score = max(0.0, 1.0 - (avg_latency / 3000.0)) # 3000ms = ~0 score
|
|
496
|
+
|
|
497
|
+
benchmark_results.append(
|
|
498
|
+
{
|
|
499
|
+
"model": model,
|
|
500
|
+
"coding": coding,
|
|
501
|
+
"reasoning": reasoning,
|
|
502
|
+
"instruction": instruction,
|
|
503
|
+
"speed_score": speed_score,
|
|
504
|
+
"avg_latency_ms": avg_latency,
|
|
505
|
+
}
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
progress.advance(task_id)
|
|
509
|
+
|
|
510
|
+
# Display results table
|
|
511
|
+
_display_benchmark_results(cli_context, benchmark_results)
|
|
512
|
+
|
|
513
|
+
# Auto-assign models based on scores
|
|
514
|
+
_auto_assign_models(cli_context, registry, benchmark_results)
|
|
515
|
+
|
|
516
|
+
else:
|
|
517
|
+
# JSON mode: just collect and output raw results
|
|
518
|
+
json_results = []
|
|
519
|
+
|
|
520
|
+
for model in models_to_probe:
|
|
521
|
+
provider = provider_registry.get(model.provider_id)
|
|
522
|
+
if not provider:
|
|
523
|
+
continue
|
|
524
|
+
|
|
525
|
+
prober = ModelProber(provider, model.model_id)
|
|
526
|
+
results = await prober.run_all_probes()
|
|
527
|
+
|
|
528
|
+
profile_cache.set(model.model_id, model.provider_id, results)
|
|
529
|
+
registry._apply_probe_results(model, results)
|
|
530
|
+
|
|
531
|
+
coding = results["coding"]
|
|
532
|
+
reasoning = results["reasoning"]
|
|
533
|
+
instruction = results["instruction"]
|
|
534
|
+
|
|
535
|
+
json_results.append(
|
|
536
|
+
{
|
|
537
|
+
"model_id": model.model_id,
|
|
538
|
+
"provider_id": model.provider_id,
|
|
539
|
+
"results": {
|
|
540
|
+
"coding": {
|
|
541
|
+
"score": coding.score,
|
|
542
|
+
"latency_ms": coding.latency_ms,
|
|
543
|
+
"passed": coding.passed,
|
|
544
|
+
},
|
|
545
|
+
"reasoning": {
|
|
546
|
+
"score": reasoning.score,
|
|
547
|
+
"latency_ms": reasoning.latency_ms,
|
|
548
|
+
"passed": reasoning.passed,
|
|
549
|
+
},
|
|
550
|
+
"instruction": {
|
|
551
|
+
"score": instruction.score,
|
|
552
|
+
"latency_ms": instruction.latency_ms,
|
|
553
|
+
"passed": instruction.passed,
|
|
554
|
+
},
|
|
555
|
+
},
|
|
556
|
+
}
|
|
557
|
+
)
|
|
558
|
+
|
|
559
|
+
print(json.dumps(json_results))
|
|
560
|
+
|
|
561
|
+
|
|
562
|
+
def _display_benchmark_results(cli_context: Any, benchmark_results: list[dict]) -> None:
|
|
563
|
+
"""Display benchmark results in a Rich table."""
|
|
564
|
+
|
|
565
|
+
table = Table(title="Benchmark Results")
|
|
566
|
+
table.add_column("Model", style="cyan", width=30)
|
|
567
|
+
table.add_column("Provider", style="magenta", width=15)
|
|
568
|
+
table.add_column("Coding", style="green", width=14)
|
|
569
|
+
table.add_column("Reasoning", style="blue", width=14)
|
|
570
|
+
table.add_column("Instruction", style="yellow", width=14)
|
|
571
|
+
table.add_column("Speed", style="white", width=14)
|
|
572
|
+
|
|
573
|
+
for result in benchmark_results:
|
|
574
|
+
model = result["model"]
|
|
575
|
+
coding = result["coding"]
|
|
576
|
+
reasoning = result["reasoning"]
|
|
577
|
+
instruction = result["instruction"]
|
|
578
|
+
speed_score = result["speed_score"]
|
|
579
|
+
result["avg_latency_ms"]
|
|
580
|
+
|
|
581
|
+
def format_score(probe_result) -> str:
|
|
582
|
+
if probe_result.latency_ms < 0:
|
|
583
|
+
return "[red]Failed[/red]"
|
|
584
|
+
color = "green" if probe_result.passed else "yellow"
|
|
585
|
+
level_name = _score_to_level_name(probe_result.score)
|
|
586
|
+
return f"[{color}]{probe_result.score:.2f}[/{color}]\n{level_name}"
|
|
587
|
+
|
|
588
|
+
def format_speed(score_val: float) -> str:
|
|
589
|
+
color = "green" if score_val > 0.7 else "yellow" if score_val > 0.4 else "red"
|
|
590
|
+
level_name = _score_to_level_name(score_val)
|
|
591
|
+
return f"[{color}]{score_val:.2f}[/{color}]\n{level_name}"
|
|
592
|
+
|
|
593
|
+
table.add_row(
|
|
594
|
+
model.model_id,
|
|
595
|
+
model.provider_id,
|
|
596
|
+
format_score(coding),
|
|
597
|
+
format_score(reasoning),
|
|
598
|
+
format_score(instruction),
|
|
599
|
+
format_speed(speed_score),
|
|
600
|
+
)
|
|
601
|
+
|
|
602
|
+
console.print(table)
|
|
603
|
+
console.print()
|
|
604
|
+
|
|
605
|
+
|
|
606
|
+
def _score_to_level_name(score: float) -> str:
|
|
607
|
+
"""Convert numerical score to capability level name."""
|
|
608
|
+
if score >= 0.85:
|
|
609
|
+
return "EXPERT"
|
|
610
|
+
elif score >= 0.70:
|
|
611
|
+
return "ADVANCED"
|
|
612
|
+
elif score >= 0.50:
|
|
613
|
+
return "INTERMEDIATE"
|
|
614
|
+
elif score >= 0.25:
|
|
615
|
+
return "BASIC"
|
|
616
|
+
else:
|
|
617
|
+
return "NONE"
|
|
618
|
+
|
|
619
|
+
|
|
620
|
+
def _auto_assign_models(cli_context: Any, registry: Any, benchmark_results: list[dict]) -> None:
|
|
621
|
+
"""Suggest model assignments based on benchmark results."""
|
|
622
|
+
if not benchmark_results:
|
|
623
|
+
return
|
|
624
|
+
|
|
625
|
+
# Find best models for each capability
|
|
626
|
+
best_coding = max(benchmark_results, key=lambda r: r["coding"].score)
|
|
627
|
+
best_reasoning = max(benchmark_results, key=lambda r: r["reasoning"].score)
|
|
628
|
+
best_speed = max(benchmark_results, key=lambda r: r["speed_score"])
|
|
629
|
+
|
|
630
|
+
console.print("[bold]Suggested Model Assignments:[/bold]\n")
|
|
631
|
+
console.print(
|
|
632
|
+
f" [cyan]Coding:[/cyan] {best_coding['model'].model_id} "
|
|
633
|
+
f"({best_coding['coding'].score:.2f} - {_score_to_level_name(best_coding['coding'].score)})"
|
|
634
|
+
)
|
|
635
|
+
console.print(
|
|
636
|
+
f" [blue]Reasoning:[/blue] {best_reasoning['model'].model_id} "
|
|
637
|
+
f"({best_reasoning['reasoning'].score:.2f} - {_score_to_level_name(best_reasoning['reasoning'].score)})"
|
|
638
|
+
)
|
|
639
|
+
console.print(
|
|
640
|
+
f" [green]Fast Model:[/green] {best_speed['model'].model_id} "
|
|
641
|
+
f"({best_speed['speed_score']:.2f} - {_score_to_level_name(best_speed['speed_score'])})\n"
|
|
642
|
+
)
|
|
643
|
+
|
|
644
|
+
# Prompt for confirmation
|
|
645
|
+
try:
|
|
646
|
+
response = input("[bold]Apply these assignments?[/bold] [y/N] ").strip().lower()
|
|
647
|
+
if response == "y":
|
|
648
|
+
# Save assignments to config
|
|
649
|
+
_save_model_assignments(
|
|
650
|
+
best_coding["model"].model_id,
|
|
651
|
+
best_reasoning["model"].model_id,
|
|
652
|
+
best_speed["model"].model_id,
|
|
653
|
+
)
|
|
654
|
+
console.print("[green]✓[/green] Model assignments saved.")
|
|
655
|
+
else:
|
|
656
|
+
console.print("[dim]Assignments not applied.[/dim]")
|
|
657
|
+
except (EOFError, KeyboardInterrupt):
|
|
658
|
+
console.print("[dim]Assignments not applied.[/dim]")
|
|
659
|
+
|
|
660
|
+
|
|
661
|
+
def _save_model_assignments(coding_model: str, reasoning_model: str, fast_model: str) -> None:
|
|
662
|
+
"""Save model assignments to the project configuration."""
|
|
663
|
+
import json
|
|
664
|
+
from pathlib import Path
|
|
665
|
+
|
|
666
|
+
config_file = Path(".velune") / "config.json"
|
|
667
|
+
config_file.parent.mkdir(parents=True, exist_ok=True)
|
|
668
|
+
|
|
669
|
+
config = {}
|
|
670
|
+
if config_file.exists():
|
|
671
|
+
try:
|
|
672
|
+
config = json.loads(config_file.read_text())
|
|
673
|
+
except Exception:
|
|
674
|
+
pass
|
|
675
|
+
|
|
676
|
+
config["model_assignments"] = {
|
|
677
|
+
"coding_model": coding_model,
|
|
678
|
+
"reasoning_model": reasoning_model,
|
|
679
|
+
"fast_model": fast_model,
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
config_file.write_text(json.dumps(config, indent=2))
|
|
683
|
+
console.print(f"[dim]Saved to {config_file}[/dim]")
|