velune-cli 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (279) hide show
  1. velune/__init__.py +5 -0
  2. velune/__main__.py +6 -0
  3. velune/cli/__init__.py +5 -0
  4. velune/cli/app.py +208 -0
  5. velune/cli/autocomplete.py +80 -0
  6. velune/cli/banner.py +60 -0
  7. velune/cli/commands/__init__.py +32 -0
  8. velune/cli/commands/ask.py +175 -0
  9. velune/cli/commands/base.py +16 -0
  10. velune/cli/commands/chat.py +228 -0
  11. velune/cli/commands/config.py +224 -0
  12. velune/cli/commands/daemon.py +88 -0
  13. velune/cli/commands/doctor.py +721 -0
  14. velune/cli/commands/init.py +170 -0
  15. velune/cli/commands/mcp.py +82 -0
  16. velune/cli/commands/memory.py +293 -0
  17. velune/cli/commands/models.py +683 -0
  18. velune/cli/commands/preflight.py +95 -0
  19. velune/cli/commands/run.py +270 -0
  20. velune/cli/commands/setup.py +184 -0
  21. velune/cli/commands/workspace.py +249 -0
  22. velune/cli/context.py +36 -0
  23. velune/cli/councilmodel_ui.py +199 -0
  24. velune/cli/display/council_view.py +254 -0
  25. velune/cli/display/memory_view.py +126 -0
  26. velune/cli/display/panels.py +35 -0
  27. velune/cli/display/progress.py +25 -0
  28. velune/cli/display/themes.py +25 -0
  29. velune/cli/main.py +15 -0
  30. velune/cli/model_selector.py +51 -0
  31. velune/cli/modes.py +86 -0
  32. velune/cli/pull_ui.py +123 -0
  33. velune/cli/registry.py +80 -0
  34. velune/cli/rendering/__init__.py +5 -0
  35. velune/cli/rendering/error_panel.py +79 -0
  36. velune/cli/rendering/markdown.py +63 -0
  37. velune/cli/repl.py +1855 -0
  38. velune/cli/session_manager.py +71 -0
  39. velune/cli/slash_commands.py +37 -0
  40. velune/cli/theme.py +8 -0
  41. velune/cognition/__init__.py +23 -0
  42. velune/cognition/agents/__init__.py +7 -0
  43. velune/cognition/agents/coder.py +209 -0
  44. velune/cognition/agents/planner.py +156 -0
  45. velune/cognition/agents/reviewer.py +195 -0
  46. velune/cognition/arbitrator.py +220 -0
  47. velune/cognition/architecture.py +415 -0
  48. velune/cognition/budget.py +65 -0
  49. velune/cognition/council/__init__.py +47 -0
  50. velune/cognition/council/base.py +217 -0
  51. velune/cognition/council/challenger.py +74 -0
  52. velune/cognition/council/coder.py +79 -0
  53. velune/cognition/council/critic_agent.py +43 -0
  54. velune/cognition/council/critic_configs.py +111 -0
  55. velune/cognition/council/critics.py +41 -0
  56. velune/cognition/council/debate.py +46 -0
  57. velune/cognition/council/factory.py +140 -0
  58. velune/cognition/council/messages.py +56 -0
  59. velune/cognition/council/planner.py +124 -0
  60. velune/cognition/council/reviewer.py +74 -0
  61. velune/cognition/council/synthesizer.py +67 -0
  62. velune/cognition/council/tiers.py +188 -0
  63. velune/cognition/council_orchestrator.py +282 -0
  64. velune/cognition/firewall.py +354 -0
  65. velune/cognition/module.py +46 -0
  66. velune/cognition/orchestrator.py +1205 -0
  67. velune/cognition/personality.py +238 -0
  68. velune/cognition/state.py +104 -0
  69. velune/cognition/style_resolver.py +64 -0
  70. velune/cognition/verification.py +205 -0
  71. velune/context/__init__.py +28 -0
  72. velune/context/assembler.py +240 -0
  73. velune/context/budget.py +97 -0
  74. velune/context/extractive.py +95 -0
  75. velune/context/prompt_adaptation.py +480 -0
  76. velune/context/sections.py +99 -0
  77. velune/context/token_counter.py +134 -0
  78. velune/context/utilization.py +33 -0
  79. velune/context/window.py +63 -0
  80. velune/core/__init__.py +89 -0
  81. velune/core/background.py +5 -0
  82. velune/core/config/__init__.py +37 -0
  83. velune/core/errors/__init__.py +90 -0
  84. velune/core/errors/catalog.py +188 -0
  85. velune/core/errors/execution.py +31 -0
  86. velune/core/errors/memory.py +25 -0
  87. velune/core/errors/orchestration.py +31 -0
  88. velune/core/errors/provider.py +37 -0
  89. velune/core/event_loop.py +35 -0
  90. velune/core/logging.py +83 -0
  91. velune/core/paths.py +165 -0
  92. velune/core/runtime.py +113 -0
  93. velune/core/startup_profiler.py +56 -0
  94. velune/core/task_registry.py +117 -0
  95. velune/core/trace.py +83 -0
  96. velune/core/types/__init__.py +48 -0
  97. velune/core/types/agent.py +53 -0
  98. velune/core/types/context.py +42 -0
  99. velune/core/types/inference.py +38 -0
  100. velune/core/types/memory.py +42 -0
  101. velune/core/types/model.py +70 -0
  102. velune/core/types/provider.py +62 -0
  103. velune/core/types/repository.py +38 -0
  104. velune/core/types/task.py +61 -0
  105. velune/core/types/workspace.py +28 -0
  106. velune/daemon/client.py +13 -0
  107. velune/daemon/server.py +127 -0
  108. velune/daemon/transport.py +179 -0
  109. velune/events.py +204 -0
  110. velune/execution/__init__.py +22 -0
  111. velune/execution/benchmarker.py +315 -0
  112. velune/execution/cancellation.py +53 -0
  113. velune/execution/checkpointer.py +130 -0
  114. velune/execution/command_spec.py +165 -0
  115. velune/execution/diff_preview.py +197 -0
  116. velune/execution/executor.py +181 -0
  117. velune/execution/module.py +18 -0
  118. velune/execution/multi_diff.py +67 -0
  119. velune/execution/path_guard.py +74 -0
  120. velune/execution/planner.py +91 -0
  121. velune/execution/rollback.py +89 -0
  122. velune/execution/sandbox.py +268 -0
  123. velune/execution/validator.py +115 -0
  124. velune/hardware/__init__.py +1 -0
  125. velune/hardware/detector.py +192 -0
  126. velune/kernel/__init__.py +55 -0
  127. velune/kernel/bootstrap.py +125 -0
  128. velune/kernel/config.py +426 -0
  129. velune/kernel/entrypoint.py +78 -0
  130. velune/kernel/health.py +54 -0
  131. velune/kernel/lifecycle.py +143 -0
  132. velune/kernel/module.py +17 -0
  133. velune/kernel/modules.py +23 -0
  134. velune/kernel/registry.py +96 -0
  135. velune/kernel/schemas.py +28 -0
  136. velune/main.py +9 -0
  137. velune/mcp/__init__.py +9 -0
  138. velune/mcp/client.py +115 -0
  139. velune/mcp/config.py +19 -0
  140. velune/mcp/server.py +624 -0
  141. velune/memory/__init__.py +32 -0
  142. velune/memory/compaction.py +506 -0
  143. velune/memory/embedding_pipeline.py +241 -0
  144. velune/memory/lifecycle.py +680 -0
  145. velune/memory/module.py +218 -0
  146. velune/memory/prioritizer.py +67 -0
  147. velune/memory/storage/episodic_schema.sql +53 -0
  148. velune/memory/storage/lancedb_store.py +282 -0
  149. velune/memory/storage/sqlite_manager.py +369 -0
  150. velune/memory/storage/sqlite_pool.py +149 -0
  151. velune/memory/tiers/episodic.py +588 -0
  152. velune/memory/tiers/graph.py +378 -0
  153. velune/memory/tiers/lineage.py +416 -0
  154. velune/memory/tiers/semantic.py +475 -0
  155. velune/memory/tiers/working.py +168 -0
  156. velune/memory/vitality.py +132 -0
  157. velune/models/__init__.py +15 -0
  158. velune/models/family.py +76 -0
  159. velune/models/module.py +20 -0
  160. velune/models/probes.py +192 -0
  161. velune/models/profile_cache.py +84 -0
  162. velune/models/profiler.py +108 -0
  163. velune/models/registry.py +251 -0
  164. velune/models/scorer.py +233 -0
  165. velune/models/specializations.py +205 -0
  166. velune/orchestration/__init__.py +19 -0
  167. velune/orchestration/engine.py +239 -0
  168. velune/orchestration/module.py +15 -0
  169. velune/orchestration/role_assignments.py +82 -0
  170. velune/orchestration/schemas.py +98 -0
  171. velune/plugins/__init__.py +20 -0
  172. velune/plugins/hooks.py +50 -0
  173. velune/plugins/loader.py +161 -0
  174. velune/plugins/registry.py +56 -0
  175. velune/plugins/schemas.py +21 -0
  176. velune/providers/__init__.py +23 -0
  177. velune/providers/adapters/anthropic.py +257 -0
  178. velune/providers/adapters/fireworks.py +115 -0
  179. velune/providers/adapters/google.py +234 -0
  180. velune/providers/adapters/groq.py +151 -0
  181. velune/providers/adapters/huggingface.py +210 -0
  182. velune/providers/adapters/llamacpp.py +208 -0
  183. velune/providers/adapters/lmstudio.py +175 -0
  184. velune/providers/adapters/ollama.py +233 -0
  185. velune/providers/adapters/openai.py +213 -0
  186. velune/providers/adapters/openrouter.py +81 -0
  187. velune/providers/adapters/together.py +134 -0
  188. velune/providers/adapters/xai.py +60 -0
  189. velune/providers/base.py +86 -0
  190. velune/providers/benchmarker.py +138 -0
  191. velune/providers/discovery/__init__.py +33 -0
  192. velune/providers/discovery/anthropic.py +79 -0
  193. velune/providers/discovery/benchmarks.py +44 -0
  194. velune/providers/discovery/classifier.py +69 -0
  195. velune/providers/discovery/fireworks.py +95 -0
  196. velune/providers/discovery/gguf.py +88 -0
  197. velune/providers/discovery/google.py +95 -0
  198. velune/providers/discovery/gpu.py +117 -0
  199. velune/providers/discovery/groq.py +21 -0
  200. velune/providers/discovery/huggingface.py +67 -0
  201. velune/providers/discovery/lmstudio.py +80 -0
  202. velune/providers/discovery/ollama.py +162 -0
  203. velune/providers/discovery/openai.py +96 -0
  204. velune/providers/discovery/openrouter.py +113 -0
  205. velune/providers/discovery/scanner.py +115 -0
  206. velune/providers/discovery/together.py +114 -0
  207. velune/providers/discovery/xai.py +57 -0
  208. velune/providers/health.py +67 -0
  209. velune/providers/health_monitor.py +169 -0
  210. velune/providers/keystore.py +142 -0
  211. velune/providers/local_paths.py +49 -0
  212. velune/providers/local_resolver.py +229 -0
  213. velune/providers/module.py +51 -0
  214. velune/providers/ollama_manager.py +193 -0
  215. velune/providers/registry.py +220 -0
  216. velune/providers/router.py +255 -0
  217. velune/providers/task_classifier.py +288 -0
  218. velune/py.typed +0 -0
  219. velune/repository/__init__.py +33 -0
  220. velune/repository/analyzer.py +127 -0
  221. velune/repository/ast_parser.py +822 -0
  222. velune/repository/blast_radius.py +298 -0
  223. velune/repository/boundary_classifier.py +295 -0
  224. velune/repository/cognition.py +316 -0
  225. velune/repository/grapher.py +179 -0
  226. velune/repository/import_graph.py +263 -0
  227. velune/repository/incremental_indexer.py +275 -0
  228. velune/repository/index_state.py +96 -0
  229. velune/repository/indexer.py +243 -0
  230. velune/repository/module.py +17 -0
  231. velune/repository/parser.py +474 -0
  232. velune/repository/project_type.py +300 -0
  233. velune/repository/rename_journal.py +287 -0
  234. velune/repository/scanner.py +193 -0
  235. velune/repository/schemas.py +102 -0
  236. velune/repository/symbol_registry.py +365 -0
  237. velune/repository/tracker.py +252 -0
  238. velune/retrieval/__init__.py +27 -0
  239. velune/retrieval/cache.py +110 -0
  240. velune/retrieval/fast_path.py +391 -0
  241. velune/retrieval/graph.py +124 -0
  242. velune/retrieval/hybrid.py +271 -0
  243. velune/retrieval/keyword.py +131 -0
  244. velune/retrieval/module.py +26 -0
  245. velune/retrieval/pipeline.py +303 -0
  246. velune/retrieval/reranker.py +102 -0
  247. velune/retrieval/schemas.py +59 -0
  248. velune/retrieval/slow_path.py +364 -0
  249. velune/retrieval/vector.py +203 -0
  250. velune/telemetry/__init__.py +59 -0
  251. velune/telemetry/cognition.py +267 -0
  252. velune/telemetry/cost_estimator.py +92 -0
  253. velune/telemetry/debug.py +304 -0
  254. velune/telemetry/doctor.py +244 -0
  255. velune/telemetry/logging.py +286 -0
  256. velune/telemetry/spans.py +277 -0
  257. velune/telemetry/token_tracker.py +140 -0
  258. velune/telemetry/usage_tracker.py +340 -0
  259. velune/tools/__init__.py +41 -0
  260. velune/tools/base/registry.py +87 -0
  261. velune/tools/base/tool.py +63 -0
  262. velune/tools/code/navigate.py +116 -0
  263. velune/tools/code/search.py +123 -0
  264. velune/tools/filesystem/read.py +75 -0
  265. velune/tools/filesystem/search.py +136 -0
  266. velune/tools/filesystem/write.py +163 -0
  267. velune/tools/git/history.py +177 -0
  268. velune/tools/git/operations.py +122 -0
  269. velune/tools/git/state.py +121 -0
  270. velune/tools/module.py +81 -0
  271. velune/tools/terminal/execute.py +72 -0
  272. velune/tools/terminal/history.py +47 -0
  273. velune/tools/web/fetch.py +55 -0
  274. velune/tools/web/validator.py +122 -0
  275. velune_cli-0.9.0.dist-info/METADATA +518 -0
  276. velune_cli-0.9.0.dist-info/RECORD +279 -0
  277. velune_cli-0.9.0.dist-info/WHEEL +4 -0
  278. velune_cli-0.9.0.dist-info/entry_points.txt +2 -0
  279. velune_cli-0.9.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,251 @@
1
+ """Model capability registry with empirical probe evaluation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import logging
7
+ from pathlib import Path
8
+
9
+ from velune.core.types.model import CapabilityLevel, ModelCapabilityProfile, ModelDescriptor
10
+ from velune.models.profile_cache import ModelProfileCache
11
+ from velune.providers.discovery.scanner import ModelDiscoveryScanner
12
+
13
+ logger = logging.getLogger("velune.models.registry")
14
+
15
+
16
+ class ModelCapabilityRegistry:
17
+ """Unified database cataloging discovered models and capabilities with empirical routing."""
18
+
19
+ def __init__(self, scanner: ModelDiscoveryScanner | None = None) -> None:
20
+ self.scanner = scanner or ModelDiscoveryScanner()
21
+ self._models: dict[str, ModelDescriptor] = {}
22
+
23
+ async def refresh(self) -> None:
24
+ """Scan all providers and refresh the local catalog cache with empirical profiles."""
25
+ try:
26
+ discovered = await self.scanner.scan_all()
27
+ self._models.clear()
28
+
29
+ profile_cache = ModelProfileCache(Path(".velune") / "model_profiles.json")
30
+ from velune.models.probes import FastProbe
31
+
32
+ fast_probe = FastProbe()
33
+
34
+ probing_tasks = []
35
+ models_to_probe = []
36
+
37
+ for model in discovered:
38
+ cached = profile_cache.get(model.model_id, model.provider_id)
39
+ if cached:
40
+ # Apply cached probe results to capability profile
41
+ self._apply_probe_results(model, cached["probes"])
42
+ else:
43
+ # Check if provider is available to probe
44
+ provider = None
45
+ try:
46
+ from velune.kernel.registry import get_container
47
+
48
+ provider_reg = get_container().get("runtime.provider_registry")
49
+ provider = provider_reg.get(model.provider_id)
50
+ except Exception:
51
+ pass
52
+
53
+ if provider:
54
+ models_to_probe.append(model)
55
+ probing_tasks.append(fast_probe.ping(provider, model.model_id))
56
+
57
+ # Execute fast probes concurrently
58
+ if probing_tasks:
59
+ results = await asyncio.gather(*probing_tasks, return_exceptions=True)
60
+ for model, responsive in zip(models_to_probe, results, strict=False):
61
+ if isinstance(responsive, Exception):
62
+ responsive = False
63
+
64
+ if responsive:
65
+ model.metadata["validated"] = True
66
+ try:
67
+ from velune.daemon.client import DaemonClient
68
+
69
+ if DaemonClient.is_running():
70
+ # Delegate background probing to the active persistent Velune daemon!
71
+ # Using create_task to fire-and-forget the IPC dispatch call
72
+ asyncio.create_task(
73
+ DaemonClient.send_command(
74
+ "probe_model",
75
+ model_id=model.model_id,
76
+ provider_id=model.provider_id,
77
+ )
78
+ )
79
+ logger.info(
80
+ "Delegated full probing of model %s to the active Velune daemon process.",
81
+ model.model_id,
82
+ )
83
+ else:
84
+ from velune.kernel.registry import get_container
85
+
86
+ task_reg = get_container().get("runtime.task_registry")
87
+ task_reg.submit(
88
+ name=f"full_probe_{model.model_id}",
89
+ coro=self._probe_model_background(model, profile_cache),
90
+ timeout_seconds=120.0,
91
+ )
92
+ except Exception:
93
+ pass
94
+ else:
95
+ model.metadata["validated"] = False
96
+ logger.info("Model %s is not responding, skipping probe", model.model_id)
97
+
98
+ # Store models in mapping
99
+ for model in discovered:
100
+ key = f"{model.provider_id}/{model.model_id}"
101
+ self._models[key] = model
102
+ if model.model_id not in self._models:
103
+ self._models[model.model_id] = model
104
+
105
+ logger.info(
106
+ "Indexed %d models (%d validated)",
107
+ len(discovered),
108
+ sum(1 for m in discovered if m.metadata.get("validated", True)),
109
+ )
110
+ except Exception as e:
111
+ logger.error("Failed to discover models during catalog refresh: %s", e)
112
+
113
+ def register(self, descriptor: ModelDescriptor) -> None:
114
+ """Explicitly register a custom model descriptor."""
115
+ key = f"{descriptor.provider_id}/{descriptor.model_id}"
116
+ self._models[key] = descriptor
117
+ if descriptor.model_id not in self._models:
118
+ self._models[descriptor.model_id] = descriptor
119
+
120
+ def get(self, model_id: str, provider_id: str | None = None) -> ModelDescriptor | None:
121
+ """Look up a model descriptor by ID and optional provider prefix."""
122
+ if provider_id:
123
+ key = f"{provider_id}/{model_id}"
124
+ return self._models.get(key)
125
+
126
+ # Try direct match
127
+ if model_id in self._models:
128
+ return self._models[model_id]
129
+
130
+ # Try searching values
131
+ for model in self._models.values():
132
+ if model.model_id == model_id:
133
+ return model
134
+ return None
135
+
136
+ def list_all(self) -> list[ModelDescriptor]:
137
+ """List all currently indexed model descriptors."""
138
+ # Return unique descriptors
139
+ seen = set()
140
+ unique = []
141
+ for model in self._models.values():
142
+ ref = (model.provider_id, model.model_id)
143
+ if ref not in seen:
144
+ seen.add(ref)
145
+ unique.append(model)
146
+ return unique
147
+
148
+ def get_by_provider(self, provider_id: str) -> list[ModelDescriptor]:
149
+ """List all models registered under a specific provider."""
150
+ return [model for model in self.list_all() if model.provider_id == provider_id]
151
+
152
+ def _apply_probe_results(self, model: ModelDescriptor, probes: dict) -> None:
153
+ """Map float probe scores (0.0-1.0) to CapabilityLevel and update model descriptor.
154
+
155
+ Score mapping (empirical calibration):
156
+ - score > 0.85 → EXPERT (100)
157
+ - score > 0.70 → ADVANCED (75)
158
+ - score > 0.50 → INTERMEDIATE (50)
159
+ - else → BASIC (25)
160
+ """
161
+ if not model.capabilities:
162
+ model.capabilities = ModelCapabilityProfile()
163
+
164
+ def score_to_level(score: float) -> CapabilityLevel:
165
+ if score > 0.85:
166
+ return CapabilityLevel.EXPERT
167
+ elif score > 0.70:
168
+ return CapabilityLevel.ADVANCED
169
+ elif score > 0.50:
170
+ return CapabilityLevel.INTERMEDIATE
171
+ elif score > 0.0:
172
+ return CapabilityLevel.BASIC
173
+ return CapabilityLevel.NONE
174
+
175
+ coding_data = probes.get("coding", {})
176
+ coding_score = (
177
+ coding_data.score if hasattr(coding_data, "score") else coding_data.get("score", 0.0)
178
+ )
179
+
180
+ reasoning_data = probes.get("reasoning", {})
181
+ reasoning_score = (
182
+ reasoning_data.score
183
+ if hasattr(reasoning_data, "score")
184
+ else reasoning_data.get("score", 0.0)
185
+ )
186
+
187
+ instruction_data = probes.get("instruction", {})
188
+ instruction_score = (
189
+ instruction_data.score
190
+ if hasattr(instruction_data, "score")
191
+ else instruction_data.get("score", 0.0)
192
+ )
193
+
194
+ model.capabilities.coding = score_to_level(coding_score)
195
+ model.capabilities.reasoning = score_to_level(reasoning_score)
196
+ model.capabilities.instruction_following = score_to_level(instruction_score)
197
+
198
+ # Infer other capabilities from primary scores
199
+ if model.capabilities.reasoning >= CapabilityLevel.INTERMEDIATE:
200
+ model.capabilities.planning = CapabilityLevel.INTERMEDIATE
201
+ if model.capabilities.instruction_following >= CapabilityLevel.INTERMEDIATE:
202
+ model.capabilities.tool_use = CapabilityLevel.INTERMEDIATE
203
+ if model.capabilities.coding >= CapabilityLevel.INTERMEDIATE:
204
+ model.capabilities.code_analysis = CapabilityLevel.INTERMEDIATE
205
+
206
+ logger.debug(
207
+ "Applied probe results to %s: coding=%s (%.2f), reasoning=%s (%.2f), instruction=%s (%.2f)",
208
+ model.model_id,
209
+ model.capabilities.coding.name,
210
+ coding_score,
211
+ model.capabilities.reasoning.name,
212
+ reasoning_score,
213
+ model.capabilities.instruction_following.name,
214
+ instruction_score,
215
+ )
216
+
217
+ async def _probe_model_background(
218
+ self, model: ModelDescriptor, cache: ModelProfileCache
219
+ ) -> None:
220
+ """Run probes in background, update model in registry when done."""
221
+ try:
222
+ from velune.kernel.registry import get_container
223
+
224
+ container = get_container()
225
+ if not container.has("runtime.provider_registry"):
226
+ logger.debug(
227
+ "No provider registry registered yet, skipping background probe for %s.",
228
+ model.model_id,
229
+ )
230
+ return
231
+
232
+ provider_registry = container.get("runtime.provider_registry")
233
+ provider = provider_registry.get(model.provider_id)
234
+ if not provider:
235
+ logger.debug("No active provider found for %s, skipping probe.", model.model_id)
236
+ return
237
+
238
+ from velune.models.probes import ModelProber
239
+
240
+ prober = ModelProber(provider, model.model_id)
241
+ results = await prober.run_all_probes()
242
+ cache.set(model.model_id, model.provider_id, results)
243
+ self._apply_probe_results(model, results)
244
+ logger.info(
245
+ "Successfully probed %s: coding=%.2f reasoning=%.2f",
246
+ model.model_id,
247
+ results["coding"].score,
248
+ results["reasoning"].score,
249
+ )
250
+ except Exception as e:
251
+ logger.debug("Background probe failed for %s: %s", model.model_id, e)
@@ -0,0 +1,233 @@
1
+ """Multi-factor scorer for model routing and selection with family-specific and quantization-aware adjustments."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+
7
+ from velune.core.types.model import CapabilityLevel, ModelDescriptor
8
+ from velune.models.profiler import ModelProfile
9
+
10
+ logger = logging.getLogger("velune.models.scorer")
11
+
12
+
13
+ class ModelScorer:
14
+ """Calculates multidimensional matching scores for model selection."""
15
+
16
+ def __init__(
17
+ self,
18
+ w_capability: float = 0.4,
19
+ w_context: float = 0.2,
20
+ w_speed: float = 0.2,
21
+ w_reliability: float = 0.1,
22
+ w_cost: float = 0.1,
23
+ ) -> None:
24
+ """Initialize routing weights."""
25
+ self.w_capability = w_capability
26
+ self.w_context = w_context
27
+ self.w_speed = w_speed
28
+ self.w_reliability = w_reliability
29
+ self.w_cost = w_cost
30
+
31
+ def _detect_model_family(self, model_id: str) -> str:
32
+ """Detect model family from ID for family-specific scoring adjustments."""
33
+ lower = model_id.lower()
34
+ families = {
35
+ "qwen": ["qwen"],
36
+ "deepseek": ["deepseek"],
37
+ "llama": ["llama", "meta-llama"],
38
+ "mistral": ["mistral", "mixtral"],
39
+ "phi": ["phi"],
40
+ "gemma": ["gemma"],
41
+ "codellama": ["codellama"],
42
+ "starcoder": ["starcoder"],
43
+ }
44
+ for family, patterns in families.items():
45
+ if any(p in lower for p in patterns):
46
+ return family
47
+ return "unknown"
48
+
49
+ def _get_family_capability_adjustments(self, family: str, task_category: str) -> float:
50
+ """
51
+ Return capability score adjustments for known model families.
52
+ Positive = boost, Negative = penalty.
53
+ Based on community benchmarks and known model strengths.
54
+ """
55
+ adjustments = {
56
+ ("qwen", "coding"): +0.1,
57
+ ("qwen", "reasoning"): +0.05,
58
+ ("deepseek", "coding"): +0.15,
59
+ ("deepseek", "reasoning"): +0.1,
60
+ ("codellama", "coding"): +0.2,
61
+ ("codellama", "reasoning"): -0.1, # Not a reasoning model
62
+ ("phi", "coding"): +0.05,
63
+ ("phi", "reasoning"): +0.15, # Phi is surprisingly capable at reasoning for its size
64
+ ("mistral", "summarization"): +0.1,
65
+ ("gemma", "instruction_following"): +0.05,
66
+ ("starcoder", "coding"): +0.2,
67
+ ("starcoder", "reasoning"): -0.15,
68
+ }
69
+ return adjustments.get((family, task_category), 0.0)
70
+
71
+ def _get_quantization_penalty(self, model: ModelDescriptor) -> float:
72
+ """
73
+ Quantization reduces quality. Apply penalty for heavily quantized models
74
+ on reasoning and complex tasks.
75
+ """
76
+ quant = (model.quantization or "").upper()
77
+ penalties = {
78
+ "Q2": -0.25,
79
+ "Q3": -0.15,
80
+ "Q4_0": -0.08,
81
+ "Q4_K_M": -0.05,
82
+ "Q5": -0.02,
83
+ "Q5_K_M": -0.02,
84
+ "Q8_0": 0.0,
85
+ "FP16": +0.05, # Slight quality boost for full precision
86
+ }
87
+ return penalties.get(quant, 0.0)
88
+
89
+ def score(
90
+ self,
91
+ model: ModelDescriptor,
92
+ task_category: str,
93
+ required_tokens: int = 0,
94
+ latency_requirement: str = "medium",
95
+ profile: ModelProfile | None = None,
96
+ local_preferred: bool = False,
97
+ ) -> float:
98
+ """
99
+ Calculate aggregated suitability score (0.0 - 1.0) for a model based on task constraints.
100
+
101
+ agg_score = w_cap * cap_match + w_ctx * ctx_fit + w_speed * speed + w_rel * reliability - w_cost * cost
102
+ """
103
+ # 1. Base Capability Score (0.0 to 1.0)
104
+ cap_score = self._calculate_capability_score(model, task_category)
105
+
106
+ # Apply model family adjustments
107
+ family = self._detect_model_family(model.model_id)
108
+ family_adj = self._get_family_capability_adjustments(family, task_category)
109
+ cap_score = max(0.0, min(1.0, cap_score + family_adj))
110
+
111
+ # Apply quantization penalty for reasoning-heavy tasks
112
+ if task_category in ("reasoning", "planning"):
113
+ quant_penalty = self._get_quantization_penalty(model)
114
+ cap_score = max(0.0, min(1.0, cap_score + quant_penalty))
115
+
116
+ # 2. Context Fit Score (0.0 to 1.0)
117
+ ctx_score = self._calculate_context_score(model.context_length, required_tokens)
118
+
119
+ # 3. Speed / Performance Score (0.0 to 1.0)
120
+ speed_score = self._calculate_speed_score(model, latency_requirement, profile)
121
+
122
+ # 4. Reliability / Validity Score (0.0 to 1.0)
123
+ reliability_score = self._calculate_reliability_score(model, profile, local_preferred)
124
+
125
+ # 5. Cost Penalty (0.0 to 1.0)
126
+ cost_penalty = self._calculate_cost_penalty(model)
127
+
128
+ # Aggregate weighted components
129
+ total_score = (
130
+ self.w_capability * cap_score
131
+ + self.w_context * ctx_score
132
+ + self.w_speed * speed_score
133
+ + self.w_reliability * reliability_score
134
+ - self.w_cost * cost_penalty
135
+ )
136
+
137
+ return max(0.0, min(1.0, total_score))
138
+
139
+ def _calculate_capability_score(self, model: ModelDescriptor, task_category: str) -> float:
140
+ """Evaluate how well model capability level matches task category."""
141
+ capabilities = getattr(model, "capabilities", None)
142
+ if not capabilities:
143
+ return 0.25 # Basic fallback
144
+
145
+ level = CapabilityLevel.NONE
146
+ # Extract capability level from Pydantic model or dictionary representation
147
+ if isinstance(capabilities, dict):
148
+ level_val = capabilities.get(task_category, CapabilityLevel.NONE)
149
+ if isinstance(level_val, int):
150
+ level = CapabilityLevel(level_val)
151
+ else:
152
+ if hasattr(capabilities, task_category):
153
+ level = getattr(capabilities, task_category)
154
+
155
+ # Score mapping
156
+ level_score_map = {
157
+ CapabilityLevel.NONE: 0.0,
158
+ CapabilityLevel.BASIC: 0.2,
159
+ CapabilityLevel.INTERMEDIATE: 0.5,
160
+ CapabilityLevel.ADVANCED: 0.8,
161
+ CapabilityLevel.EXPERT: 1.0,
162
+ }
163
+
164
+ return level_score_map.get(level, 0.2)
165
+
166
+ def _calculate_context_score(self, context_length: int, required_tokens: int) -> float:
167
+ """Evaluate how well context window size fits required token limits."""
168
+ if required_tokens <= 0:
169
+ return 1.0
170
+
171
+ if context_length >= required_tokens:
172
+ # Having extra headroom is good, but value decays as ratio grows
173
+ ratio = context_length / required_tokens
174
+ return min(1.0, 0.8 + 0.2 / ratio)
175
+ else:
176
+ # Severe penalty for context overflow
177
+ return max(0.0, (context_length / required_tokens) * 0.5)
178
+
179
+ def _calculate_speed_score(
180
+ self, model: ModelDescriptor, latency_requirement: str, profile: ModelProfile | None
181
+ ) -> float:
182
+ """Calculate speed score using empirical metrics (TPS/TTFT) if available, falling back to static tiers."""
183
+ # Dynamic scoring if profile metrics exist
184
+ if profile and profile.tps > 0:
185
+ # Estimate speed based on empirical tokens per second. (assume 80 TPS is maximum optimal score)
186
+ empirical_tps_score = min(1.0, profile.tps / 80.0)
187
+
188
+ # Penalize long TTFT (assume > 1.5 seconds starts decaying score)
189
+ ttft_penalty = (
190
+ max(0.0, min(0.5, (profile.ttft_ms - 1500.0) / 3000.0))
191
+ if profile.ttft_ms > 0
192
+ else 0.0
193
+ )
194
+ return max(0.1, empirical_tps_score - ttft_penalty)
195
+
196
+ # Fallback to static speed tiers
197
+ speed_map = {"fast": 1.0, "medium": 0.6, "slow": 0.3}
198
+ model_speed = speed_map.get(model.speed_tier, 0.6)
199
+
200
+ req_map = {"fast": 1.0, "medium": 0.6, "slow": 0.3}
201
+ req_speed = req_map.get(latency_requirement, 0.6)
202
+
203
+ if model_speed >= req_speed:
204
+ return 1.0
205
+ return model_speed / req_speed
206
+
207
+ def _calculate_reliability_score(
208
+ self, model: ModelDescriptor, profile: ModelProfile | None, local_preferred: bool
209
+ ) -> float:
210
+ """Determine reliability and preference score based on locality and validation history."""
211
+ score = 0.9 # Baseline reliability
212
+
213
+ if model.is_local:
214
+ # Boost if local models are requested
215
+ score += 0.1 if local_preferred else 0.05
216
+ else:
217
+ # Slight penalty if we strictly prefer local running
218
+ score -= 0.2 if local_preferred else 0.0
219
+
220
+ # Empirical JSON formatting validity penalty
221
+ if profile and profile.json_validity < 1.0:
222
+ score -= (1.0 - profile.json_validity) * 0.5
223
+
224
+ return max(0.0, min(1.0, score))
225
+
226
+ def _calculate_cost_penalty(self, model: ModelDescriptor) -> float:
227
+ """Calculate score penalty based on token cost."""
228
+ cost = model.cost_per_1k_tokens
229
+ if cost is None or cost <= 0.0:
230
+ return 0.0 # Zero cost for local offline models
231
+
232
+ # Standardize cost penalty (assuming max expected cost is $0.15 per 1k tokens)
233
+ return min(1.0, cost / 0.15)
@@ -0,0 +1,205 @@
1
+ """Model specialization mapper for the Reasoning Council with role-specific context optimizations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from enum import StrEnum
7
+
8
+ from velune.core.types.model import ModelDescriptor
9
+ from velune.models.profiler import ModelProfiler
10
+ from velune.models.registry import ModelCapabilityRegistry
11
+ from velune.models.scorer import ModelScorer
12
+
13
+ logger = logging.getLogger("velune.models.specializations")
14
+
15
+
16
+ class CouncilRole(StrEnum):
17
+ """Roles in the Velune Reasoning Council."""
18
+
19
+ PLANNER = "planner"
20
+ CODER = "coder"
21
+ REVIEWER = "reviewer"
22
+ CHALLENGER = "challenger"
23
+ SYNTHESIZER = "synthesizer"
24
+
25
+
26
+ ROLE_CONTEXT_REQUIREMENTS = {
27
+ CouncilRole.PLANNER: 16384, # Needs full repo context
28
+ CouncilRole.CODER: 32768, # Needs code + context + plan
29
+ CouncilRole.REVIEWER: 32768, # Needs to see full code
30
+ CouncilRole.CHALLENGER: 16384, # Needs code summary
31
+ CouncilRole.SYNTHESIZER: 65536, # Needs all outputs
32
+ }
33
+
34
+
35
+ class ModelSpecializationMapper:
36
+ """Intelligent mapper that assigns discovered models to council roles based on scoring and role-specific context bounds."""
37
+
38
+ def __init__(
39
+ self,
40
+ registry: ModelCapabilityRegistry,
41
+ scorer: ModelScorer | None = None,
42
+ profiler: ModelProfiler | None = None,
43
+ ) -> None:
44
+ self.registry = registry
45
+ self.scorer = scorer or ModelScorer()
46
+ self.profiler = profiler or ModelProfiler()
47
+ self.overrides: dict[CouncilRole, str] = {}
48
+
49
+ def map_roles(
50
+ self,
51
+ task_category: str = "coding",
52
+ required_tokens: int | None = None,
53
+ local_preferred: bool = False,
54
+ ) -> dict[CouncilRole, ModelDescriptor]:
55
+ """
56
+ Assigns the best available model for each CouncilRole based on their functional profiles and optimal context token sizes.
57
+
58
+ - Planner: High planning and instruction-following scores (optimizes for 16k context window).
59
+ - Coder: High coding and tool-use scores (optimizes for 32k context window).
60
+ - Reviewer: High reasoning and instruction-following scores (optimizes for 32k context window).
61
+ - Challenger: High reasoning and adversarial analysis capabilities (optimizes for 16k context window).
62
+ - Synthesizer: High summarization and context window capability (optimizes for 64k context window).
63
+ """
64
+ models = self.registry.list_all()
65
+ if not models:
66
+ logger.warning(
67
+ "No models found in the capability registry. Council mappings will be empty."
68
+ )
69
+ return {}
70
+
71
+ assignments: dict[CouncilRole, ModelDescriptor] = {}
72
+
73
+ def get_tokens(role: CouncilRole) -> int:
74
+ if required_tokens is not None:
75
+ if role == CouncilRole.REVIEWER:
76
+ return required_tokens + 2048
77
+ elif role == CouncilRole.SYNTHESIZER:
78
+ return required_tokens + 4096
79
+ else:
80
+ return required_tokens
81
+ return ROLE_CONTEXT_REQUIREMENTS[role]
82
+
83
+ # 1. Map Planner
84
+ planner_model = self._select_best_model(
85
+ models=models,
86
+ role_category="planning",
87
+ required_tokens=get_tokens(CouncilRole.PLANNER),
88
+ latency_requirement="medium",
89
+ local_preferred=local_preferred,
90
+ )
91
+ if planner_model:
92
+ assignments[CouncilRole.PLANNER] = planner_model
93
+
94
+ # 2. Map Coder
95
+ coder_model = self._select_best_model(
96
+ models=models,
97
+ role_category="coding",
98
+ required_tokens=get_tokens(CouncilRole.CODER),
99
+ latency_requirement="medium",
100
+ local_preferred=local_preferred,
101
+ )
102
+ if coder_model:
103
+ assignments[CouncilRole.CODER] = coder_model
104
+
105
+ # 3. Map Reviewer (prefers slower, highly capable reasoning models)
106
+ reviewer_model = self._select_best_model(
107
+ models=models,
108
+ role_category="reasoning",
109
+ required_tokens=get_tokens(CouncilRole.REVIEWER),
110
+ latency_requirement="slow",
111
+ local_preferred=local_preferred,
112
+ )
113
+ if reviewer_model:
114
+ assignments[CouncilRole.REVIEWER] = reviewer_model
115
+
116
+ # 4. Map Challenger (needs strong reasoning)
117
+ challenger_model = self._select_best_model(
118
+ models=models,
119
+ role_category="reasoning",
120
+ required_tokens=get_tokens(CouncilRole.CHALLENGER),
121
+ latency_requirement="medium",
122
+ local_preferred=local_preferred,
123
+ )
124
+ if challenger_model:
125
+ assignments[CouncilRole.CHALLENGER] = challenger_model
126
+
127
+ # 5. Map Synthesizer (prefers faster summarization models with large context)
128
+ synthesizer_model = self._select_best_model(
129
+ models=models,
130
+ role_category="summarization",
131
+ required_tokens=get_tokens(CouncilRole.SYNTHESIZER),
132
+ latency_requirement="fast",
133
+ local_preferred=local_preferred,
134
+ )
135
+ if synthesizer_model:
136
+ assignments[CouncilRole.SYNTHESIZER] = synthesizer_model
137
+
138
+ # Ensure we have fallbacks for all roles if any fail to map
139
+ if models:
140
+ default_model = models[0]
141
+ for role in CouncilRole:
142
+ if role not in assignments:
143
+ logger.info(
144
+ "Falling back role %s to default model %s",
145
+ role.value,
146
+ default_model.model_id,
147
+ )
148
+ assignments[role] = default_model
149
+
150
+ # Apply explicitly assigned overrides
151
+ for role, overridden_model_id in self.overrides.items():
152
+ descriptor = self.registry.get(overridden_model_id)
153
+ if descriptor:
154
+ assignments[role] = descriptor
155
+
156
+ return assignments
157
+
158
+ def _select_best_model(
159
+ self,
160
+ models: list[ModelDescriptor],
161
+ role_category: str,
162
+ required_tokens: int,
163
+ latency_requirement: str,
164
+ local_preferred: bool,
165
+ ) -> ModelDescriptor | None:
166
+ """Helper to score all models and select the highest scoring candidate."""
167
+ try:
168
+ from velune.kernel.registry import get_container
169
+
170
+ gpu_info = get_container().get("runtime.gpu_info")
171
+ available_vram_gb = gpu_info.get("vram_free_gb")
172
+ except Exception:
173
+ available_vram_gb = None
174
+
175
+ best_model: ModelDescriptor | None = None
176
+ best_score = -1.0
177
+
178
+ for model in models:
179
+ # VRAM check for local models
180
+ if model.is_local and available_vram_gb is not None:
181
+ required_vram = model.vram_required_gb
182
+ if required_vram and required_vram > available_vram_gb:
183
+ logger.info(
184
+ "Skipping %s: requires %.1fGB VRAM, only %.1fGB available",
185
+ model.model_id,
186
+ required_vram,
187
+ available_vram_gb,
188
+ )
189
+ continue # Skip models that won't fit in VRAM
190
+
191
+ profile = self.profiler.get_profile(model.provider_id, model.model_id)
192
+ score = self.scorer.score(
193
+ model=model,
194
+ task_category=role_category,
195
+ required_tokens=required_tokens,
196
+ latency_requirement=latency_requirement,
197
+ profile=profile,
198
+ local_preferred=local_preferred,
199
+ )
200
+
201
+ if score > best_score:
202
+ best_score = score
203
+ best_model = model
204
+
205
+ return best_model