velune-cli 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (279) hide show
  1. velune/__init__.py +5 -0
  2. velune/__main__.py +6 -0
  3. velune/cli/__init__.py +5 -0
  4. velune/cli/app.py +208 -0
  5. velune/cli/autocomplete.py +80 -0
  6. velune/cli/banner.py +60 -0
  7. velune/cli/commands/__init__.py +32 -0
  8. velune/cli/commands/ask.py +175 -0
  9. velune/cli/commands/base.py +16 -0
  10. velune/cli/commands/chat.py +228 -0
  11. velune/cli/commands/config.py +224 -0
  12. velune/cli/commands/daemon.py +88 -0
  13. velune/cli/commands/doctor.py +721 -0
  14. velune/cli/commands/init.py +170 -0
  15. velune/cli/commands/mcp.py +82 -0
  16. velune/cli/commands/memory.py +293 -0
  17. velune/cli/commands/models.py +683 -0
  18. velune/cli/commands/preflight.py +95 -0
  19. velune/cli/commands/run.py +270 -0
  20. velune/cli/commands/setup.py +184 -0
  21. velune/cli/commands/workspace.py +249 -0
  22. velune/cli/context.py +36 -0
  23. velune/cli/councilmodel_ui.py +199 -0
  24. velune/cli/display/council_view.py +254 -0
  25. velune/cli/display/memory_view.py +126 -0
  26. velune/cli/display/panels.py +35 -0
  27. velune/cli/display/progress.py +25 -0
  28. velune/cli/display/themes.py +25 -0
  29. velune/cli/main.py +15 -0
  30. velune/cli/model_selector.py +51 -0
  31. velune/cli/modes.py +86 -0
  32. velune/cli/pull_ui.py +123 -0
  33. velune/cli/registry.py +80 -0
  34. velune/cli/rendering/__init__.py +5 -0
  35. velune/cli/rendering/error_panel.py +79 -0
  36. velune/cli/rendering/markdown.py +63 -0
  37. velune/cli/repl.py +1855 -0
  38. velune/cli/session_manager.py +71 -0
  39. velune/cli/slash_commands.py +37 -0
  40. velune/cli/theme.py +8 -0
  41. velune/cognition/__init__.py +23 -0
  42. velune/cognition/agents/__init__.py +7 -0
  43. velune/cognition/agents/coder.py +209 -0
  44. velune/cognition/agents/planner.py +156 -0
  45. velune/cognition/agents/reviewer.py +195 -0
  46. velune/cognition/arbitrator.py +220 -0
  47. velune/cognition/architecture.py +415 -0
  48. velune/cognition/budget.py +65 -0
  49. velune/cognition/council/__init__.py +47 -0
  50. velune/cognition/council/base.py +217 -0
  51. velune/cognition/council/challenger.py +74 -0
  52. velune/cognition/council/coder.py +79 -0
  53. velune/cognition/council/critic_agent.py +43 -0
  54. velune/cognition/council/critic_configs.py +111 -0
  55. velune/cognition/council/critics.py +41 -0
  56. velune/cognition/council/debate.py +46 -0
  57. velune/cognition/council/factory.py +140 -0
  58. velune/cognition/council/messages.py +56 -0
  59. velune/cognition/council/planner.py +124 -0
  60. velune/cognition/council/reviewer.py +74 -0
  61. velune/cognition/council/synthesizer.py +67 -0
  62. velune/cognition/council/tiers.py +188 -0
  63. velune/cognition/council_orchestrator.py +282 -0
  64. velune/cognition/firewall.py +354 -0
  65. velune/cognition/module.py +46 -0
  66. velune/cognition/orchestrator.py +1205 -0
  67. velune/cognition/personality.py +238 -0
  68. velune/cognition/state.py +104 -0
  69. velune/cognition/style_resolver.py +64 -0
  70. velune/cognition/verification.py +205 -0
  71. velune/context/__init__.py +28 -0
  72. velune/context/assembler.py +240 -0
  73. velune/context/budget.py +97 -0
  74. velune/context/extractive.py +95 -0
  75. velune/context/prompt_adaptation.py +480 -0
  76. velune/context/sections.py +99 -0
  77. velune/context/token_counter.py +134 -0
  78. velune/context/utilization.py +33 -0
  79. velune/context/window.py +63 -0
  80. velune/core/__init__.py +89 -0
  81. velune/core/background.py +5 -0
  82. velune/core/config/__init__.py +37 -0
  83. velune/core/errors/__init__.py +90 -0
  84. velune/core/errors/catalog.py +188 -0
  85. velune/core/errors/execution.py +31 -0
  86. velune/core/errors/memory.py +25 -0
  87. velune/core/errors/orchestration.py +31 -0
  88. velune/core/errors/provider.py +37 -0
  89. velune/core/event_loop.py +35 -0
  90. velune/core/logging.py +83 -0
  91. velune/core/paths.py +165 -0
  92. velune/core/runtime.py +113 -0
  93. velune/core/startup_profiler.py +56 -0
  94. velune/core/task_registry.py +117 -0
  95. velune/core/trace.py +83 -0
  96. velune/core/types/__init__.py +48 -0
  97. velune/core/types/agent.py +53 -0
  98. velune/core/types/context.py +42 -0
  99. velune/core/types/inference.py +38 -0
  100. velune/core/types/memory.py +42 -0
  101. velune/core/types/model.py +70 -0
  102. velune/core/types/provider.py +62 -0
  103. velune/core/types/repository.py +38 -0
  104. velune/core/types/task.py +61 -0
  105. velune/core/types/workspace.py +28 -0
  106. velune/daemon/client.py +13 -0
  107. velune/daemon/server.py +127 -0
  108. velune/daemon/transport.py +179 -0
  109. velune/events.py +204 -0
  110. velune/execution/__init__.py +22 -0
  111. velune/execution/benchmarker.py +315 -0
  112. velune/execution/cancellation.py +53 -0
  113. velune/execution/checkpointer.py +130 -0
  114. velune/execution/command_spec.py +165 -0
  115. velune/execution/diff_preview.py +197 -0
  116. velune/execution/executor.py +181 -0
  117. velune/execution/module.py +18 -0
  118. velune/execution/multi_diff.py +67 -0
  119. velune/execution/path_guard.py +74 -0
  120. velune/execution/planner.py +91 -0
  121. velune/execution/rollback.py +89 -0
  122. velune/execution/sandbox.py +268 -0
  123. velune/execution/validator.py +115 -0
  124. velune/hardware/__init__.py +1 -0
  125. velune/hardware/detector.py +192 -0
  126. velune/kernel/__init__.py +55 -0
  127. velune/kernel/bootstrap.py +125 -0
  128. velune/kernel/config.py +426 -0
  129. velune/kernel/entrypoint.py +78 -0
  130. velune/kernel/health.py +54 -0
  131. velune/kernel/lifecycle.py +143 -0
  132. velune/kernel/module.py +17 -0
  133. velune/kernel/modules.py +23 -0
  134. velune/kernel/registry.py +96 -0
  135. velune/kernel/schemas.py +28 -0
  136. velune/main.py +9 -0
  137. velune/mcp/__init__.py +9 -0
  138. velune/mcp/client.py +115 -0
  139. velune/mcp/config.py +19 -0
  140. velune/mcp/server.py +624 -0
  141. velune/memory/__init__.py +32 -0
  142. velune/memory/compaction.py +506 -0
  143. velune/memory/embedding_pipeline.py +241 -0
  144. velune/memory/lifecycle.py +680 -0
  145. velune/memory/module.py +218 -0
  146. velune/memory/prioritizer.py +67 -0
  147. velune/memory/storage/episodic_schema.sql +53 -0
  148. velune/memory/storage/lancedb_store.py +282 -0
  149. velune/memory/storage/sqlite_manager.py +369 -0
  150. velune/memory/storage/sqlite_pool.py +149 -0
  151. velune/memory/tiers/episodic.py +588 -0
  152. velune/memory/tiers/graph.py +378 -0
  153. velune/memory/tiers/lineage.py +416 -0
  154. velune/memory/tiers/semantic.py +475 -0
  155. velune/memory/tiers/working.py +168 -0
  156. velune/memory/vitality.py +132 -0
  157. velune/models/__init__.py +15 -0
  158. velune/models/family.py +76 -0
  159. velune/models/module.py +20 -0
  160. velune/models/probes.py +192 -0
  161. velune/models/profile_cache.py +84 -0
  162. velune/models/profiler.py +108 -0
  163. velune/models/registry.py +251 -0
  164. velune/models/scorer.py +233 -0
  165. velune/models/specializations.py +205 -0
  166. velune/orchestration/__init__.py +19 -0
  167. velune/orchestration/engine.py +239 -0
  168. velune/orchestration/module.py +15 -0
  169. velune/orchestration/role_assignments.py +82 -0
  170. velune/orchestration/schemas.py +98 -0
  171. velune/plugins/__init__.py +20 -0
  172. velune/plugins/hooks.py +50 -0
  173. velune/plugins/loader.py +161 -0
  174. velune/plugins/registry.py +56 -0
  175. velune/plugins/schemas.py +21 -0
  176. velune/providers/__init__.py +23 -0
  177. velune/providers/adapters/anthropic.py +257 -0
  178. velune/providers/adapters/fireworks.py +115 -0
  179. velune/providers/adapters/google.py +234 -0
  180. velune/providers/adapters/groq.py +151 -0
  181. velune/providers/adapters/huggingface.py +210 -0
  182. velune/providers/adapters/llamacpp.py +208 -0
  183. velune/providers/adapters/lmstudio.py +175 -0
  184. velune/providers/adapters/ollama.py +233 -0
  185. velune/providers/adapters/openai.py +213 -0
  186. velune/providers/adapters/openrouter.py +81 -0
  187. velune/providers/adapters/together.py +134 -0
  188. velune/providers/adapters/xai.py +60 -0
  189. velune/providers/base.py +86 -0
  190. velune/providers/benchmarker.py +138 -0
  191. velune/providers/discovery/__init__.py +33 -0
  192. velune/providers/discovery/anthropic.py +79 -0
  193. velune/providers/discovery/benchmarks.py +44 -0
  194. velune/providers/discovery/classifier.py +69 -0
  195. velune/providers/discovery/fireworks.py +95 -0
  196. velune/providers/discovery/gguf.py +88 -0
  197. velune/providers/discovery/google.py +95 -0
  198. velune/providers/discovery/gpu.py +117 -0
  199. velune/providers/discovery/groq.py +21 -0
  200. velune/providers/discovery/huggingface.py +67 -0
  201. velune/providers/discovery/lmstudio.py +80 -0
  202. velune/providers/discovery/ollama.py +162 -0
  203. velune/providers/discovery/openai.py +96 -0
  204. velune/providers/discovery/openrouter.py +113 -0
  205. velune/providers/discovery/scanner.py +115 -0
  206. velune/providers/discovery/together.py +114 -0
  207. velune/providers/discovery/xai.py +57 -0
  208. velune/providers/health.py +67 -0
  209. velune/providers/health_monitor.py +169 -0
  210. velune/providers/keystore.py +142 -0
  211. velune/providers/local_paths.py +49 -0
  212. velune/providers/local_resolver.py +229 -0
  213. velune/providers/module.py +51 -0
  214. velune/providers/ollama_manager.py +193 -0
  215. velune/providers/registry.py +220 -0
  216. velune/providers/router.py +255 -0
  217. velune/providers/task_classifier.py +288 -0
  218. velune/py.typed +0 -0
  219. velune/repository/__init__.py +33 -0
  220. velune/repository/analyzer.py +127 -0
  221. velune/repository/ast_parser.py +822 -0
  222. velune/repository/blast_radius.py +298 -0
  223. velune/repository/boundary_classifier.py +295 -0
  224. velune/repository/cognition.py +316 -0
  225. velune/repository/grapher.py +179 -0
  226. velune/repository/import_graph.py +263 -0
  227. velune/repository/incremental_indexer.py +275 -0
  228. velune/repository/index_state.py +96 -0
  229. velune/repository/indexer.py +243 -0
  230. velune/repository/module.py +17 -0
  231. velune/repository/parser.py +474 -0
  232. velune/repository/project_type.py +300 -0
  233. velune/repository/rename_journal.py +287 -0
  234. velune/repository/scanner.py +193 -0
  235. velune/repository/schemas.py +102 -0
  236. velune/repository/symbol_registry.py +365 -0
  237. velune/repository/tracker.py +252 -0
  238. velune/retrieval/__init__.py +27 -0
  239. velune/retrieval/cache.py +110 -0
  240. velune/retrieval/fast_path.py +391 -0
  241. velune/retrieval/graph.py +124 -0
  242. velune/retrieval/hybrid.py +271 -0
  243. velune/retrieval/keyword.py +131 -0
  244. velune/retrieval/module.py +26 -0
  245. velune/retrieval/pipeline.py +303 -0
  246. velune/retrieval/reranker.py +102 -0
  247. velune/retrieval/schemas.py +59 -0
  248. velune/retrieval/slow_path.py +364 -0
  249. velune/retrieval/vector.py +203 -0
  250. velune/telemetry/__init__.py +59 -0
  251. velune/telemetry/cognition.py +267 -0
  252. velune/telemetry/cost_estimator.py +92 -0
  253. velune/telemetry/debug.py +304 -0
  254. velune/telemetry/doctor.py +244 -0
  255. velune/telemetry/logging.py +286 -0
  256. velune/telemetry/spans.py +277 -0
  257. velune/telemetry/token_tracker.py +140 -0
  258. velune/telemetry/usage_tracker.py +340 -0
  259. velune/tools/__init__.py +41 -0
  260. velune/tools/base/registry.py +87 -0
  261. velune/tools/base/tool.py +63 -0
  262. velune/tools/code/navigate.py +116 -0
  263. velune/tools/code/search.py +123 -0
  264. velune/tools/filesystem/read.py +75 -0
  265. velune/tools/filesystem/search.py +136 -0
  266. velune/tools/filesystem/write.py +163 -0
  267. velune/tools/git/history.py +177 -0
  268. velune/tools/git/operations.py +122 -0
  269. velune/tools/git/state.py +121 -0
  270. velune/tools/module.py +81 -0
  271. velune/tools/terminal/execute.py +72 -0
  272. velune/tools/terminal/history.py +47 -0
  273. velune/tools/web/fetch.py +55 -0
  274. velune/tools/web/validator.py +122 -0
  275. velune_cli-0.9.0.dist-info/METADATA +518 -0
  276. velune_cli-0.9.0.dist-info/RECORD +279 -0
  277. velune_cli-0.9.0.dist-info/WHEEL +4 -0
  278. velune_cli-0.9.0.dist-info/entry_points.txt +2 -0
  279. velune_cli-0.9.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,208 @@
1
+ """Llama.cpp local GGUF model provider adapter implementation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import time
7
+ from collections.abc import AsyncIterator
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ from velune.core.errors.provider import InferenceError, ProviderConnectionError
12
+ from velune.core.types.inference import InferenceRequest, InferenceResponse, StreamChunk
13
+ from velune.core.types.model import ModelDescriptor
14
+ from velune.core.types.provider import ProviderCapabilities, ProviderHealth
15
+ from velune.providers.base import ModelProvider
16
+
17
+
18
+ class LlamaCppProvider(ModelProvider):
19
+ """Llama.cpp provider for running in-process GGUF models."""
20
+
21
+ def __init__(self, models_dir: str | None = None) -> None:
22
+ self._models_dir = Path(models_dir) if models_dir else Path.home() / "models"
23
+ self._loaded_models = {}
24
+ self._capabilities = ProviderCapabilities(
25
+ supports_streaming=True,
26
+ supports_function_calling=False,
27
+ supports_embeddings=True,
28
+ max_context_window=32768,
29
+ )
30
+
31
+ @property
32
+ def provider_id(self) -> str:
33
+ return "llamacpp"
34
+
35
+ async def initialize(self) -> None:
36
+ """Verify llama-cpp-python library is available."""
37
+ try:
38
+ import llama_cpp # noqa: F401
39
+ except ImportError:
40
+ raise ProviderConnectionError(
41
+ "llama-cpp-python dependency is missing. Install with: pip install llama-cpp-python"
42
+ )
43
+
44
+ def _resolve_model_path(self, model_id: str) -> Path:
45
+ """Resolve the GGUF model path from model ID."""
46
+ from velune.providers.local_paths import get_model_path, save_model_path
47
+ from velune.providers.local_resolver import LocalModelResolver
48
+
49
+ # 1. Check persistent cache first
50
+ cached = get_model_path(model_id)
51
+ if cached:
52
+ return cached
53
+
54
+ # 2. Ask LocalModelResolver (absolute, relative, stem scan)
55
+ resolver = LocalModelResolver()
56
+ found = resolver.resolve_model_path(model_id)
57
+ if found:
58
+ save_model_path(model_id, found)
59
+ return found
60
+
61
+ # 3. Interactive prompt — only in a real terminal
62
+ prompted = resolver.prompt_for_path(model_id)
63
+ if prompted:
64
+ save_model_path(model_id, prompted)
65
+ return prompted
66
+
67
+ raise FileNotFoundError(f"GGUF model file not found for ID: {model_id}")
68
+
69
+ def _get_model(self, model_id: str, context_window: int = 4096) -> Any:
70
+ """Synchronously get or load the llama_cpp Llama instance."""
71
+ if model_id in self._loaded_models:
72
+ return self._loaded_models[model_id]
73
+
74
+ from llama_cpp import Llama
75
+
76
+ model_path = self._resolve_model_path(model_id)
77
+
78
+ # Load the model in-memory.
79
+ # Using typical defaults, letting it use GPU if compiled with CUDA/metal.
80
+ llm = Llama(
81
+ model_path=str(model_path),
82
+ n_ctx=context_window,
83
+ n_gpu_layers=-1, # Load as many layers as possible to GPU if available
84
+ verbose=False,
85
+ )
86
+ self._loaded_models[model_id] = llm
87
+ return llm
88
+
89
+ async def list_models(self) -> list[ModelDescriptor]:
90
+ """List local GGUF models via filesystem discovery."""
91
+ await self.initialize()
92
+ from velune.providers.discovery.gguf import GGUFDiscovery
93
+
94
+ return await GGUFDiscovery().discover()
95
+
96
+ async def infer(self, request: InferenceRequest) -> InferenceResponse:
97
+ """Non-blocking in-process inference using asyncio thread offloading."""
98
+ await self.initialize()
99
+ start = time.perf_counter()
100
+
101
+ try:
102
+ # Resolve model context window size
103
+ ctx_len = request.max_tokens or 4096
104
+ llm = await asyncio.to_thread(self._get_model, request.model_id, ctx_len)
105
+
106
+ # Map standard messages to llama_cpp chat completions format
107
+ messages = [
108
+ {"role": msg.get("role"), "content": msg.get("content")} for msg in request.messages
109
+ ]
110
+
111
+ completion = await asyncio.to_thread(
112
+ llm.create_chat_completion,
113
+ messages=messages,
114
+ temperature=request.temperature,
115
+ max_tokens=request.max_tokens,
116
+ top_p=request.top_p,
117
+ stream=False,
118
+ )
119
+
120
+ latency = (time.perf_counter() - start) * 1000.0
121
+ choice = completion["choices"][0]
122
+
123
+ return InferenceResponse(
124
+ content=choice["message"]["content"] or "",
125
+ model_id=request.model_id,
126
+ finish_reason=choice.get("finish_reason") or "stop",
127
+ tokens_used=completion.get("usage", {}).get("total_tokens", 0),
128
+ latency_ms=latency,
129
+ )
130
+ except Exception as e:
131
+ raise InferenceError(f"Local llama.cpp inference failed: {e}")
132
+
133
+ async def stream(self, request: InferenceRequest) -> AsyncIterator[StreamChunk]:
134
+ """Streaming chat completions in non-blocking fashion."""
135
+ await self.initialize()
136
+
137
+ try:
138
+ ctx_len = request.max_tokens or 4096
139
+ llm = await asyncio.to_thread(self._get_model, request.model_id, ctx_len)
140
+ messages = [
141
+ {"role": msg.get("role"), "content": msg.get("content")} for msg in request.messages
142
+ ]
143
+
144
+ # Run the generator in a thread pool and yield chunks back to async loop
145
+ def run_stream():
146
+ return llm.create_chat_completion(
147
+ messages=messages,
148
+ temperature=request.temperature,
149
+ max_tokens=request.max_tokens,
150
+ top_p=request.top_p,
151
+ stream=True,
152
+ )
153
+
154
+ stream_gen = await asyncio.to_thread(run_stream)
155
+
156
+ # Helper to fetch next item synchronously in thread
157
+ def next_chunk(iterator):
158
+ try:
159
+ return next(iterator)
160
+ except StopIteration:
161
+ return None
162
+
163
+ while True:
164
+ chunk = await asyncio.to_thread(next_chunk, stream_gen)
165
+ if chunk is None:
166
+ break
167
+
168
+ choice = chunk["choices"][0]
169
+ delta = choice.get("delta", {})
170
+ content = delta.get("content", "")
171
+ finish = choice.get("finish_reason")
172
+
173
+ yield StreamChunk(
174
+ content=content,
175
+ finish_reason=finish,
176
+ )
177
+
178
+ except Exception as e:
179
+ raise InferenceError(f"Local llama.cpp streaming failed: {e}")
180
+
181
+ async def embed(self, texts: list[str], model_id: str) -> list[list[float]]:
182
+ """Generate batch embeddings in-process."""
183
+ await self.initialize()
184
+
185
+ try:
186
+ llm = await asyncio.to_thread(self._get_model, model_id)
187
+ embeddings = []
188
+ for text in texts:
189
+ res = await asyncio.to_thread(llm.create_embedding, input=text)
190
+ embeddings.append(res["data"][0]["embedding"])
191
+ return embeddings
192
+ except Exception as e:
193
+ raise InferenceError(f"Local llama.cpp embedding failed: {e}")
194
+
195
+ async def health_check(self) -> ProviderHealth:
196
+ """Pings provider availability."""
197
+ try:
198
+ await self.initialize()
199
+ return ProviderHealth.HEALTHY
200
+ except Exception:
201
+ return ProviderHealth.UNAVAILABLE
202
+
203
+ def get_capabilities(self) -> ProviderCapabilities:
204
+ return self._capabilities
205
+
206
+ async def shutdown(self) -> None:
207
+ """Release loaded model states."""
208
+ self._loaded_models.clear()
@@ -0,0 +1,175 @@
1
+ """LM Studio provider adapter implementation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import time
7
+ from collections.abc import AsyncIterator
8
+
9
+ import httpx
10
+
11
+ from velune.core.errors.provider import InferenceError, ProviderConnectionError
12
+ from velune.core.types.inference import InferenceRequest, InferenceResponse, StreamChunk
13
+ from velune.core.types.model import CapabilityLevel, ModelDescriptor
14
+ from velune.core.types.provider import ProviderCapabilities, ProviderHealth
15
+ from velune.providers.base import ModelProvider
16
+
17
+
18
+ class LMStudioProvider(ModelProvider):
19
+ """LM Studio provider for local OpenAI-compatible endpoints."""
20
+
21
+ def __init__(self, base_url: str = "http://localhost:1234/v1") -> None:
22
+ self._base_url = base_url
23
+ self.client: httpx.AsyncClient | None = None
24
+ self._capabilities = ProviderCapabilities(
25
+ supports_streaming=True,
26
+ supports_function_calling=True,
27
+ supports_embeddings=True,
28
+ max_context_window=32768,
29
+ )
30
+
31
+ @property
32
+ def provider_id(self) -> str:
33
+ return "lmstudio"
34
+
35
+ async def initialize(self) -> None:
36
+ """Initialize headers and async client connection."""
37
+ if not self.client:
38
+ self.client = httpx.AsyncClient(base_url=self._base_url, timeout=300.0)
39
+
40
+ async def list_models(self) -> list[ModelDescriptor]:
41
+ """Fetch list of active models loaded in LM Studio."""
42
+ await self.initialize()
43
+ assert self.client is not None
44
+ try:
45
+ response = await self.client.get("/models")
46
+ response.raise_for_status()
47
+ data = response.json()
48
+
49
+ descriptors: list[ModelDescriptor] = []
50
+ for item in data.get("data", []):
51
+ m_id = item["id"]
52
+ descriptors.append(
53
+ ModelDescriptor(
54
+ model_id=m_id,
55
+ display_name=m_id,
56
+ provider_id="lmstudio",
57
+ context_length=32768,
58
+ capabilities={
59
+ "coding": CapabilityLevel.INTERMEDIATE,
60
+ "reasoning": CapabilityLevel.INTERMEDIATE,
61
+ "planning": CapabilityLevel.BASIC,
62
+ "summarization": CapabilityLevel.INTERMEDIATE,
63
+ "embedding": CapabilityLevel.INTERMEDIATE,
64
+ "instruction_following": CapabilityLevel.INTERMEDIATE,
65
+ "multimodal": CapabilityLevel.NONE,
66
+ "tool_use": CapabilityLevel.INTERMEDIATE,
67
+ "long_context": CapabilityLevel.BASIC,
68
+ },
69
+ is_local=True,
70
+ )
71
+ )
72
+ return descriptors
73
+ except httpx.HTTPError as e:
74
+ raise ProviderConnectionError(f"LM Studio connection error: {e}")
75
+
76
+ async def infer(self, request: InferenceRequest) -> InferenceResponse:
77
+ """Standard chat inference."""
78
+ await self.initialize()
79
+ assert self.client is not None
80
+ start = time.perf_counter()
81
+ try:
82
+ payload = {
83
+ "model": request.model_id,
84
+ "messages": request.messages,
85
+ "temperature": request.temperature,
86
+ "max_tokens": request.max_tokens,
87
+ "top_p": request.top_p,
88
+ }
89
+ if request.stop_sequences:
90
+ payload["stop"] = request.stop_sequences
91
+
92
+ response = await self.client.post("/chat/completions", json=payload)
93
+ response.raise_for_status()
94
+ data = response.json()
95
+ latency = (time.perf_counter() - start) * 1000.0
96
+
97
+ return InferenceResponse(
98
+ content=data["choices"][0]["message"]["content"],
99
+ model_id=request.model_id,
100
+ finish_reason=data["choices"][0]["finish_reason"] or "stop",
101
+ tokens_used=data.get("usage", {}).get("total_tokens", 0),
102
+ latency_ms=latency,
103
+ )
104
+ except httpx.HTTPError as e:
105
+ raise InferenceError(f"LM Studio completion failed: {e}")
106
+
107
+ async def stream(self, request: InferenceRequest) -> AsyncIterator[StreamChunk]:
108
+ """Streaming chat completions."""
109
+ await self.initialize()
110
+ assert self.client is not None
111
+ try:
112
+ payload = {
113
+ "model": request.model_id,
114
+ "messages": request.messages,
115
+ "temperature": request.temperature,
116
+ "max_tokens": request.max_tokens,
117
+ "top_p": request.top_p,
118
+ "stream": True,
119
+ }
120
+ if request.stop_sequences:
121
+ payload["stop"] = request.stop_sequences
122
+
123
+ async with self.client.stream("POST", "/chat/completions", json=payload) as response:
124
+ response.raise_for_status()
125
+ async for line in response.aiter_lines():
126
+ if line.startswith("data: "):
127
+ data_str = line[6:]
128
+ if data_str == "[DONE]":
129
+ break
130
+ try:
131
+ data = json.loads(data_str)
132
+ delta = data["choices"][0]["delta"]
133
+ yield StreamChunk(
134
+ content=delta.get("content", ""),
135
+ finish_reason=data["choices"][0].get("finish_reason"),
136
+ )
137
+ except (json.JSONDecodeError, KeyError):
138
+ continue
139
+ except httpx.HTTPError as e:
140
+ raise InferenceError(f"LM Studio stream failed: {e}")
141
+
142
+ async def embed(self, texts: list[str], model_id: str) -> list[list[float]]:
143
+ """Generate batch embeddings."""
144
+ await self.initialize()
145
+ assert self.client is not None
146
+ try:
147
+ response = await self.client.post(
148
+ "/embeddings", json={"model": model_id, "input": texts}
149
+ )
150
+ response.raise_for_status()
151
+ data = response.json()
152
+ sorted_data = sorted(data["data"], key=lambda x: x["index"])
153
+ return [item["embedding"] for item in sorted_data]
154
+ except httpx.HTTPError as e:
155
+ raise InferenceError(f"LM Studio embedding failed: {e}")
156
+
157
+ async def health_check(self) -> ProviderHealth:
158
+ """Verifies connectivity."""
159
+ try:
160
+ await self.initialize()
161
+ assert self.client is not None
162
+ resp = await self.client.get("/models")
163
+ if resp.status_code == 200:
164
+ return ProviderHealth.HEALTHY
165
+ return ProviderHealth.DEGRADED
166
+ except Exception:
167
+ return ProviderHealth.UNAVAILABLE
168
+
169
+ def get_capabilities(self) -> ProviderCapabilities:
170
+ return self._capabilities
171
+
172
+ async def shutdown(self) -> None:
173
+ if self.client:
174
+ await self.client.aclose()
175
+ self.client = None
@@ -0,0 +1,233 @@
1
+ """Ollama provider adapter implementation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ import time
8
+ from collections.abc import AsyncIterator
9
+
10
+ import httpx
11
+
12
+ from velune.core.errors.provider import InferenceError, ProviderConnectionError
13
+ from velune.core.types.inference import InferenceRequest, InferenceResponse, StreamChunk
14
+ from velune.core.types.model import CapabilityLevel, ModelDescriptor
15
+ from velune.core.types.provider import ProviderCapabilities, ProviderHealth
16
+ from velune.providers.base import ModelProvider
17
+
18
+ logger = logging.getLogger("velune.providers.adapters.ollama")
19
+
20
+
21
+ class OllamaProvider(ModelProvider):
22
+ """Ollama provider for local models."""
23
+
24
+ def __init__(self, base_url: str = "http://localhost:11434") -> None:
25
+ self._base_url = base_url
26
+ self.client: httpx.AsyncClient | None = None
27
+ self._capabilities = ProviderCapabilities(
28
+ supports_streaming=True,
29
+ supports_function_calling=False,
30
+ supports_embeddings=True,
31
+ max_context_window=8192,
32
+ )
33
+
34
+ @property
35
+ def provider_id(self) -> str:
36
+ return "ollama"
37
+
38
+ async def initialize(self) -> None:
39
+ """Initialize the async client."""
40
+ if not self.client:
41
+ self.client = httpx.AsyncClient(base_url=self._base_url, timeout=300.0)
42
+
43
+ async def _get_model_context_length(self, model_name: str) -> int:
44
+ """Query /api/show for the model's actual context window size.
45
+
46
+ Ollama's ``/api/show`` returns a ``parameters`` string that may contain
47
+ a line like ``num_ctx 131072``. We parse that to
48
+ get the real context length instead of hard-coding 8192.
49
+
50
+ Falls back to 8 192 if the endpoint is unreachable, the model is not
51
+ loaded, or the field is absent.
52
+ """
53
+ assert self.client is not None
54
+ try:
55
+ resp = await self.client.post("/api/show", json={"name": model_name})
56
+ if resp.status_code != 200:
57
+ return 8192
58
+ data = resp.json()
59
+ # ``parameters`` is a newline-delimited string of key-value pairs
60
+ params_str: str = data.get("parameters", "")
61
+ for line in params_str.splitlines():
62
+ parts = line.split()
63
+ if len(parts) >= 2 and parts[0].lower() == "num_ctx":
64
+ try:
65
+ return int(parts[1])
66
+ except ValueError:
67
+ pass
68
+ # Fallback: check model_info dict returned by newer Ollama builds
69
+ model_info: dict = data.get("model_info", {})
70
+ for key, value in model_info.items():
71
+ if "context" in key.lower() and isinstance(value, int) and value > 0:
72
+ return value
73
+ except Exception as exc:
74
+ logger.debug("Could not fetch context length for %s: %s", model_name, exc)
75
+ return 8192
76
+
77
+ async def list_models(self) -> list[ModelDescriptor]:
78
+ """Fetch models from active Ollama endpoint with accurate context lengths.
79
+
80
+ Queries ``/api/show`` for each model to populate the real ``num_ctx``
81
+ value instead of defaulting every model to 8 192 tokens.
82
+ """
83
+ await self.initialize()
84
+ assert self.client is not None
85
+ try:
86
+ response = await self.client.get("/api/tags")
87
+ response.raise_for_status()
88
+ data = response.json()
89
+
90
+ descriptors: list[ModelDescriptor] = []
91
+ for item in data.get("models", []):
92
+ model_name = item["name"]
93
+ ctx_len = await self._get_model_context_length(model_name)
94
+ descriptors.append(
95
+ ModelDescriptor(
96
+ model_id=model_name,
97
+ display_name=model_name,
98
+ provider_id="ollama",
99
+ context_length=ctx_len,
100
+ capabilities={
101
+ "coding": CapabilityLevel.INTERMEDIATE,
102
+ "reasoning": CapabilityLevel.INTERMEDIATE,
103
+ "planning": CapabilityLevel.BASIC,
104
+ "summarization": CapabilityLevel.INTERMEDIATE,
105
+ "embedding": CapabilityLevel.INTERMEDIATE,
106
+ "instruction_following": CapabilityLevel.INTERMEDIATE,
107
+ "multimodal": CapabilityLevel.NONE,
108
+ "tool_use": CapabilityLevel.NONE,
109
+ "long_context": (
110
+ CapabilityLevel.INTERMEDIATE
111
+ if ctx_len > 32768
112
+ else CapabilityLevel.NONE
113
+ ),
114
+ },
115
+ is_local=True,
116
+ )
117
+ )
118
+ return descriptors
119
+ except httpx.HTTPError as e:
120
+ raise ProviderConnectionError(f"Failed to fetch models from Ollama: {e}")
121
+
122
+ async def infer(self, request: InferenceRequest) -> InferenceResponse:
123
+ """Synchronous chat inference."""
124
+ await self.initialize()
125
+ assert self.client is not None
126
+ start = time.perf_counter()
127
+ try:
128
+ payload = {
129
+ "model": request.model_id,
130
+ "messages": request.messages,
131
+ "stream": False,
132
+ "options": {
133
+ "temperature": request.temperature,
134
+ "num_predict": request.max_tokens,
135
+ "top_p": request.top_p,
136
+ },
137
+ }
138
+ if request.stop_sequences:
139
+ payload["options"]["stop"] = request.stop_sequences
140
+
141
+ response = await self.client.post("/api/chat", json=payload)
142
+ response.raise_for_status()
143
+ data = response.json()
144
+ latency = (time.perf_counter() - start) * 1000.0
145
+
146
+ if latency > 30000.0:
147
+ logger.warning(
148
+ "Slow inference on %s (%.1fs). Consider a smaller model for your hardware.",
149
+ request.model_id,
150
+ latency / 1000.0,
151
+ )
152
+
153
+ return InferenceResponse(
154
+ content=data["message"]["content"],
155
+ model_id=request.model_id,
156
+ finish_reason=data.get("done_reason", "stop"),
157
+ tokens_used=data.get("eval_count", 0) + data.get("prompt_eval_count", 0),
158
+ latency_ms=latency,
159
+ )
160
+ except httpx.HTTPError as e:
161
+ raise InferenceError(f"Ollama inference failed: {e}")
162
+
163
+ async def stream(self, request: InferenceRequest) -> AsyncIterator[StreamChunk]:
164
+ """Streaming chat completion."""
165
+ await self.initialize()
166
+ assert self.client is not None
167
+ try:
168
+ payload = {
169
+ "model": request.model_id,
170
+ "messages": request.messages,
171
+ "stream": True,
172
+ "options": {
173
+ "temperature": request.temperature,
174
+ "num_predict": request.max_tokens,
175
+ "top_p": request.top_p,
176
+ },
177
+ }
178
+ if request.stop_sequences:
179
+ payload["options"]["stop"] = request.stop_sequences
180
+
181
+ async with self.client.stream("POST", "/api/chat", json=payload) as response:
182
+ response.raise_for_status()
183
+ async for line in response.aiter_lines():
184
+ if not line:
185
+ continue
186
+ try:
187
+ data = json.loads(line)
188
+ if "message" in data:
189
+ yield StreamChunk(
190
+ content=data["message"].get("content", ""),
191
+ finish_reason=data.get("done_reason"),
192
+ )
193
+ except json.JSONDecodeError:
194
+ continue
195
+ except httpx.HTTPError as e:
196
+ raise InferenceError(f"Ollama streaming failed: {e}")
197
+
198
+ async def embed(self, texts: list[str], model_id: str) -> list[list[float]]:
199
+ """Batch embedding generation."""
200
+ await self.initialize()
201
+ assert self.client is not None
202
+ embeddings: list[list[float]] = []
203
+ try:
204
+ for text in texts:
205
+ resp = await self.client.post(
206
+ "/api/embeddings", json={"model": model_id, "prompt": text}
207
+ )
208
+ resp.raise_for_status()
209
+ embeddings.append(resp.json()["embedding"])
210
+ return embeddings
211
+ except httpx.HTTPError as e:
212
+ raise InferenceError(f"Ollama embedding failed: {e}")
213
+
214
+ async def health_check(self) -> ProviderHealth:
215
+ """Pings Ollama core endpoint."""
216
+ await self.initialize()
217
+ assert self.client is not None
218
+ try:
219
+ resp = await self.client.get("/")
220
+ if resp.status_code == 200:
221
+ return ProviderHealth.HEALTHY
222
+ return ProviderHealth.DEGRADED
223
+ except Exception:
224
+ return ProviderHealth.UNAVAILABLE
225
+
226
+ def get_capabilities(self) -> ProviderCapabilities:
227
+ return self._capabilities
228
+
229
+ async def shutdown(self) -> None:
230
+ """Close connection pools."""
231
+ if self.client:
232
+ await self.client.aclose()
233
+ self.client = None