velune-cli 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (279) hide show
  1. velune/__init__.py +5 -0
  2. velune/__main__.py +6 -0
  3. velune/cli/__init__.py +5 -0
  4. velune/cli/app.py +208 -0
  5. velune/cli/autocomplete.py +80 -0
  6. velune/cli/banner.py +60 -0
  7. velune/cli/commands/__init__.py +32 -0
  8. velune/cli/commands/ask.py +175 -0
  9. velune/cli/commands/base.py +16 -0
  10. velune/cli/commands/chat.py +228 -0
  11. velune/cli/commands/config.py +224 -0
  12. velune/cli/commands/daemon.py +88 -0
  13. velune/cli/commands/doctor.py +721 -0
  14. velune/cli/commands/init.py +170 -0
  15. velune/cli/commands/mcp.py +82 -0
  16. velune/cli/commands/memory.py +293 -0
  17. velune/cli/commands/models.py +683 -0
  18. velune/cli/commands/preflight.py +95 -0
  19. velune/cli/commands/run.py +270 -0
  20. velune/cli/commands/setup.py +184 -0
  21. velune/cli/commands/workspace.py +249 -0
  22. velune/cli/context.py +36 -0
  23. velune/cli/councilmodel_ui.py +199 -0
  24. velune/cli/display/council_view.py +254 -0
  25. velune/cli/display/memory_view.py +126 -0
  26. velune/cli/display/panels.py +35 -0
  27. velune/cli/display/progress.py +25 -0
  28. velune/cli/display/themes.py +25 -0
  29. velune/cli/main.py +15 -0
  30. velune/cli/model_selector.py +51 -0
  31. velune/cli/modes.py +86 -0
  32. velune/cli/pull_ui.py +123 -0
  33. velune/cli/registry.py +80 -0
  34. velune/cli/rendering/__init__.py +5 -0
  35. velune/cli/rendering/error_panel.py +79 -0
  36. velune/cli/rendering/markdown.py +63 -0
  37. velune/cli/repl.py +1855 -0
  38. velune/cli/session_manager.py +71 -0
  39. velune/cli/slash_commands.py +37 -0
  40. velune/cli/theme.py +8 -0
  41. velune/cognition/__init__.py +23 -0
  42. velune/cognition/agents/__init__.py +7 -0
  43. velune/cognition/agents/coder.py +209 -0
  44. velune/cognition/agents/planner.py +156 -0
  45. velune/cognition/agents/reviewer.py +195 -0
  46. velune/cognition/arbitrator.py +220 -0
  47. velune/cognition/architecture.py +415 -0
  48. velune/cognition/budget.py +65 -0
  49. velune/cognition/council/__init__.py +47 -0
  50. velune/cognition/council/base.py +217 -0
  51. velune/cognition/council/challenger.py +74 -0
  52. velune/cognition/council/coder.py +79 -0
  53. velune/cognition/council/critic_agent.py +43 -0
  54. velune/cognition/council/critic_configs.py +111 -0
  55. velune/cognition/council/critics.py +41 -0
  56. velune/cognition/council/debate.py +46 -0
  57. velune/cognition/council/factory.py +140 -0
  58. velune/cognition/council/messages.py +56 -0
  59. velune/cognition/council/planner.py +124 -0
  60. velune/cognition/council/reviewer.py +74 -0
  61. velune/cognition/council/synthesizer.py +67 -0
  62. velune/cognition/council/tiers.py +188 -0
  63. velune/cognition/council_orchestrator.py +282 -0
  64. velune/cognition/firewall.py +354 -0
  65. velune/cognition/module.py +46 -0
  66. velune/cognition/orchestrator.py +1205 -0
  67. velune/cognition/personality.py +238 -0
  68. velune/cognition/state.py +104 -0
  69. velune/cognition/style_resolver.py +64 -0
  70. velune/cognition/verification.py +205 -0
  71. velune/context/__init__.py +28 -0
  72. velune/context/assembler.py +240 -0
  73. velune/context/budget.py +97 -0
  74. velune/context/extractive.py +95 -0
  75. velune/context/prompt_adaptation.py +480 -0
  76. velune/context/sections.py +99 -0
  77. velune/context/token_counter.py +134 -0
  78. velune/context/utilization.py +33 -0
  79. velune/context/window.py +63 -0
  80. velune/core/__init__.py +89 -0
  81. velune/core/background.py +5 -0
  82. velune/core/config/__init__.py +37 -0
  83. velune/core/errors/__init__.py +90 -0
  84. velune/core/errors/catalog.py +188 -0
  85. velune/core/errors/execution.py +31 -0
  86. velune/core/errors/memory.py +25 -0
  87. velune/core/errors/orchestration.py +31 -0
  88. velune/core/errors/provider.py +37 -0
  89. velune/core/event_loop.py +35 -0
  90. velune/core/logging.py +83 -0
  91. velune/core/paths.py +165 -0
  92. velune/core/runtime.py +113 -0
  93. velune/core/startup_profiler.py +56 -0
  94. velune/core/task_registry.py +117 -0
  95. velune/core/trace.py +83 -0
  96. velune/core/types/__init__.py +48 -0
  97. velune/core/types/agent.py +53 -0
  98. velune/core/types/context.py +42 -0
  99. velune/core/types/inference.py +38 -0
  100. velune/core/types/memory.py +42 -0
  101. velune/core/types/model.py +70 -0
  102. velune/core/types/provider.py +62 -0
  103. velune/core/types/repository.py +38 -0
  104. velune/core/types/task.py +61 -0
  105. velune/core/types/workspace.py +28 -0
  106. velune/daemon/client.py +13 -0
  107. velune/daemon/server.py +127 -0
  108. velune/daemon/transport.py +179 -0
  109. velune/events.py +204 -0
  110. velune/execution/__init__.py +22 -0
  111. velune/execution/benchmarker.py +315 -0
  112. velune/execution/cancellation.py +53 -0
  113. velune/execution/checkpointer.py +130 -0
  114. velune/execution/command_spec.py +165 -0
  115. velune/execution/diff_preview.py +197 -0
  116. velune/execution/executor.py +181 -0
  117. velune/execution/module.py +18 -0
  118. velune/execution/multi_diff.py +67 -0
  119. velune/execution/path_guard.py +74 -0
  120. velune/execution/planner.py +91 -0
  121. velune/execution/rollback.py +89 -0
  122. velune/execution/sandbox.py +268 -0
  123. velune/execution/validator.py +115 -0
  124. velune/hardware/__init__.py +1 -0
  125. velune/hardware/detector.py +192 -0
  126. velune/kernel/__init__.py +55 -0
  127. velune/kernel/bootstrap.py +125 -0
  128. velune/kernel/config.py +426 -0
  129. velune/kernel/entrypoint.py +78 -0
  130. velune/kernel/health.py +54 -0
  131. velune/kernel/lifecycle.py +143 -0
  132. velune/kernel/module.py +17 -0
  133. velune/kernel/modules.py +23 -0
  134. velune/kernel/registry.py +96 -0
  135. velune/kernel/schemas.py +28 -0
  136. velune/main.py +9 -0
  137. velune/mcp/__init__.py +9 -0
  138. velune/mcp/client.py +115 -0
  139. velune/mcp/config.py +19 -0
  140. velune/mcp/server.py +624 -0
  141. velune/memory/__init__.py +32 -0
  142. velune/memory/compaction.py +506 -0
  143. velune/memory/embedding_pipeline.py +241 -0
  144. velune/memory/lifecycle.py +680 -0
  145. velune/memory/module.py +218 -0
  146. velune/memory/prioritizer.py +67 -0
  147. velune/memory/storage/episodic_schema.sql +53 -0
  148. velune/memory/storage/lancedb_store.py +282 -0
  149. velune/memory/storage/sqlite_manager.py +369 -0
  150. velune/memory/storage/sqlite_pool.py +149 -0
  151. velune/memory/tiers/episodic.py +588 -0
  152. velune/memory/tiers/graph.py +378 -0
  153. velune/memory/tiers/lineage.py +416 -0
  154. velune/memory/tiers/semantic.py +475 -0
  155. velune/memory/tiers/working.py +168 -0
  156. velune/memory/vitality.py +132 -0
  157. velune/models/__init__.py +15 -0
  158. velune/models/family.py +76 -0
  159. velune/models/module.py +20 -0
  160. velune/models/probes.py +192 -0
  161. velune/models/profile_cache.py +84 -0
  162. velune/models/profiler.py +108 -0
  163. velune/models/registry.py +251 -0
  164. velune/models/scorer.py +233 -0
  165. velune/models/specializations.py +205 -0
  166. velune/orchestration/__init__.py +19 -0
  167. velune/orchestration/engine.py +239 -0
  168. velune/orchestration/module.py +15 -0
  169. velune/orchestration/role_assignments.py +82 -0
  170. velune/orchestration/schemas.py +98 -0
  171. velune/plugins/__init__.py +20 -0
  172. velune/plugins/hooks.py +50 -0
  173. velune/plugins/loader.py +161 -0
  174. velune/plugins/registry.py +56 -0
  175. velune/plugins/schemas.py +21 -0
  176. velune/providers/__init__.py +23 -0
  177. velune/providers/adapters/anthropic.py +257 -0
  178. velune/providers/adapters/fireworks.py +115 -0
  179. velune/providers/adapters/google.py +234 -0
  180. velune/providers/adapters/groq.py +151 -0
  181. velune/providers/adapters/huggingface.py +210 -0
  182. velune/providers/adapters/llamacpp.py +208 -0
  183. velune/providers/adapters/lmstudio.py +175 -0
  184. velune/providers/adapters/ollama.py +233 -0
  185. velune/providers/adapters/openai.py +213 -0
  186. velune/providers/adapters/openrouter.py +81 -0
  187. velune/providers/adapters/together.py +134 -0
  188. velune/providers/adapters/xai.py +60 -0
  189. velune/providers/base.py +86 -0
  190. velune/providers/benchmarker.py +138 -0
  191. velune/providers/discovery/__init__.py +33 -0
  192. velune/providers/discovery/anthropic.py +79 -0
  193. velune/providers/discovery/benchmarks.py +44 -0
  194. velune/providers/discovery/classifier.py +69 -0
  195. velune/providers/discovery/fireworks.py +95 -0
  196. velune/providers/discovery/gguf.py +88 -0
  197. velune/providers/discovery/google.py +95 -0
  198. velune/providers/discovery/gpu.py +117 -0
  199. velune/providers/discovery/groq.py +21 -0
  200. velune/providers/discovery/huggingface.py +67 -0
  201. velune/providers/discovery/lmstudio.py +80 -0
  202. velune/providers/discovery/ollama.py +162 -0
  203. velune/providers/discovery/openai.py +96 -0
  204. velune/providers/discovery/openrouter.py +113 -0
  205. velune/providers/discovery/scanner.py +115 -0
  206. velune/providers/discovery/together.py +114 -0
  207. velune/providers/discovery/xai.py +57 -0
  208. velune/providers/health.py +67 -0
  209. velune/providers/health_monitor.py +169 -0
  210. velune/providers/keystore.py +142 -0
  211. velune/providers/local_paths.py +49 -0
  212. velune/providers/local_resolver.py +229 -0
  213. velune/providers/module.py +51 -0
  214. velune/providers/ollama_manager.py +193 -0
  215. velune/providers/registry.py +220 -0
  216. velune/providers/router.py +255 -0
  217. velune/providers/task_classifier.py +288 -0
  218. velune/py.typed +0 -0
  219. velune/repository/__init__.py +33 -0
  220. velune/repository/analyzer.py +127 -0
  221. velune/repository/ast_parser.py +822 -0
  222. velune/repository/blast_radius.py +298 -0
  223. velune/repository/boundary_classifier.py +295 -0
  224. velune/repository/cognition.py +316 -0
  225. velune/repository/grapher.py +179 -0
  226. velune/repository/import_graph.py +263 -0
  227. velune/repository/incremental_indexer.py +275 -0
  228. velune/repository/index_state.py +96 -0
  229. velune/repository/indexer.py +243 -0
  230. velune/repository/module.py +17 -0
  231. velune/repository/parser.py +474 -0
  232. velune/repository/project_type.py +300 -0
  233. velune/repository/rename_journal.py +287 -0
  234. velune/repository/scanner.py +193 -0
  235. velune/repository/schemas.py +102 -0
  236. velune/repository/symbol_registry.py +365 -0
  237. velune/repository/tracker.py +252 -0
  238. velune/retrieval/__init__.py +27 -0
  239. velune/retrieval/cache.py +110 -0
  240. velune/retrieval/fast_path.py +391 -0
  241. velune/retrieval/graph.py +124 -0
  242. velune/retrieval/hybrid.py +271 -0
  243. velune/retrieval/keyword.py +131 -0
  244. velune/retrieval/module.py +26 -0
  245. velune/retrieval/pipeline.py +303 -0
  246. velune/retrieval/reranker.py +102 -0
  247. velune/retrieval/schemas.py +59 -0
  248. velune/retrieval/slow_path.py +364 -0
  249. velune/retrieval/vector.py +203 -0
  250. velune/telemetry/__init__.py +59 -0
  251. velune/telemetry/cognition.py +267 -0
  252. velune/telemetry/cost_estimator.py +92 -0
  253. velune/telemetry/debug.py +304 -0
  254. velune/telemetry/doctor.py +244 -0
  255. velune/telemetry/logging.py +286 -0
  256. velune/telemetry/spans.py +277 -0
  257. velune/telemetry/token_tracker.py +140 -0
  258. velune/telemetry/usage_tracker.py +340 -0
  259. velune/tools/__init__.py +41 -0
  260. velune/tools/base/registry.py +87 -0
  261. velune/tools/base/tool.py +63 -0
  262. velune/tools/code/navigate.py +116 -0
  263. velune/tools/code/search.py +123 -0
  264. velune/tools/filesystem/read.py +75 -0
  265. velune/tools/filesystem/search.py +136 -0
  266. velune/tools/filesystem/write.py +163 -0
  267. velune/tools/git/history.py +177 -0
  268. velune/tools/git/operations.py +122 -0
  269. velune/tools/git/state.py +121 -0
  270. velune/tools/module.py +81 -0
  271. velune/tools/terminal/execute.py +72 -0
  272. velune/tools/terminal/history.py +47 -0
  273. velune/tools/web/fetch.py +55 -0
  274. velune/tools/web/validator.py +122 -0
  275. velune_cli-0.9.0.dist-info/METADATA +518 -0
  276. velune_cli-0.9.0.dist-info/RECORD +279 -0
  277. velune_cli-0.9.0.dist-info/WHEEL +4 -0
  278. velune_cli-0.9.0.dist-info/entry_points.txt +2 -0
  279. velune_cli-0.9.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,234 @@
1
+ """Google Gemini provider adapter — Generative Language REST API."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import time
7
+ from collections.abc import AsyncIterator
8
+
9
+ import httpx
10
+
11
+ from velune.core.errors.provider import InferenceError, ProviderAuthenticationError
12
+ from velune.core.types.inference import InferenceRequest, InferenceResponse, StreamChunk
13
+ from velune.core.types.model import CapabilityLevel, ModelDescriptor
14
+ from velune.core.types.provider import ProviderCapabilities, ProviderHealth
15
+ from velune.providers.base import ModelProvider
16
+ from velune.providers.keystore import get_key
17
+
18
+ _BASE_URL = "https://generativelanguage.googleapis.com/v1beta"
19
+
20
+ _MODELS = [
21
+ ModelDescriptor(
22
+ model_id="gemini-2.0-flash",
23
+ display_name="Gemini 2.0 Flash",
24
+ provider_id="google",
25
+ context_length=1048576,
26
+ capabilities={
27
+ "coding": CapabilityLevel.ADVANCED,
28
+ "reasoning": CapabilityLevel.ADVANCED,
29
+ "planning": CapabilityLevel.ADVANCED,
30
+ "summarization": CapabilityLevel.EXPERT,
31
+ "instruction_following": CapabilityLevel.EXPERT,
32
+ "tool_use": CapabilityLevel.ADVANCED,
33
+ "long_context": CapabilityLevel.EXPERT,
34
+ },
35
+ is_local=False,
36
+ speed_tier="fast",
37
+ cost_per_1k_tokens=0.000075,
38
+ tags=["cloud", "google", "flash", "free"],
39
+ ),
40
+ ModelDescriptor(
41
+ model_id="gemini-1.5-pro",
42
+ display_name="Gemini 1.5 Pro",
43
+ provider_id="google",
44
+ context_length=2097152,
45
+ capabilities={
46
+ "coding": CapabilityLevel.EXPERT,
47
+ "reasoning": CapabilityLevel.EXPERT,
48
+ "planning": CapabilityLevel.EXPERT,
49
+ "summarization": CapabilityLevel.EXPERT,
50
+ "instruction_following": CapabilityLevel.EXPERT,
51
+ "tool_use": CapabilityLevel.EXPERT,
52
+ "long_context": CapabilityLevel.EXPERT,
53
+ },
54
+ is_local=False,
55
+ speed_tier="medium",
56
+ cost_per_1k_tokens=0.00125,
57
+ tags=["cloud", "google", "pro"],
58
+ ),
59
+ ModelDescriptor(
60
+ model_id="gemini-1.5-flash",
61
+ display_name="Gemini 1.5 Flash",
62
+ provider_id="google",
63
+ context_length=1048576,
64
+ capabilities={
65
+ "coding": CapabilityLevel.ADVANCED,
66
+ "reasoning": CapabilityLevel.ADVANCED,
67
+ "planning": CapabilityLevel.INTERMEDIATE,
68
+ "summarization": CapabilityLevel.ADVANCED,
69
+ "instruction_following": CapabilityLevel.EXPERT,
70
+ "tool_use": CapabilityLevel.ADVANCED,
71
+ "long_context": CapabilityLevel.EXPERT,
72
+ },
73
+ is_local=False,
74
+ speed_tier="fast",
75
+ cost_per_1k_tokens=0.000075,
76
+ tags=["cloud", "google", "flash", "free"],
77
+ ),
78
+ ModelDescriptor(
79
+ model_id="gemini-2.0-flash-thinking-exp",
80
+ display_name="Gemini 2.0 Flash Thinking",
81
+ provider_id="google",
82
+ context_length=32767,
83
+ capabilities={
84
+ "coding": CapabilityLevel.EXPERT,
85
+ "reasoning": CapabilityLevel.EXPERT,
86
+ "planning": CapabilityLevel.EXPERT,
87
+ "summarization": CapabilityLevel.ADVANCED,
88
+ "instruction_following": CapabilityLevel.EXPERT,
89
+ "tool_use": CapabilityLevel.ADVANCED,
90
+ "long_context": CapabilityLevel.INTERMEDIATE,
91
+ },
92
+ is_local=False,
93
+ speed_tier="medium",
94
+ cost_per_1k_tokens=0.0,
95
+ tags=["cloud", "google", "thinking", "free"],
96
+ ),
97
+ ]
98
+
99
+ # Public alias used by tests and tooling
100
+ GEMINI_MODELS = _MODELS
101
+
102
+
103
+ def _build_contents(messages: list[dict]) -> tuple[list[dict], str]:
104
+ """Split messages into Gemini *contents* + system instruction text."""
105
+ system_parts: list[str] = []
106
+ contents: list[dict] = []
107
+ for msg in messages:
108
+ role = msg.get("role", "user")
109
+ text = msg.get("content", "")
110
+ if role == "system":
111
+ system_parts.append(text)
112
+ else:
113
+ gemini_role = "model" if role == "assistant" else "user"
114
+ contents.append({"role": gemini_role, "parts": [{"text": text}]})
115
+ return contents, "\n".join(system_parts)
116
+
117
+
118
+ class GoogleProvider(ModelProvider):
119
+ """Google Gemini provider using the Generative Language REST API."""
120
+
121
+ def __init__(self, api_key: str | None = None) -> None:
122
+ self._api_key = api_key or get_key("google")
123
+ self.client: httpx.AsyncClient | None = None
124
+ self._capabilities = ProviderCapabilities(
125
+ supports_streaming=True,
126
+ supports_function_calling=True,
127
+ supports_embeddings=False,
128
+ max_context_window=2097152,
129
+ )
130
+
131
+ def _convert_messages(self, request: InferenceRequest) -> dict:
132
+ """Build the full Gemini REST payload from an InferenceRequest."""
133
+ contents, system_text = _build_contents(request.messages)
134
+ payload: dict = {
135
+ "contents": contents,
136
+ "generationConfig": {
137
+ "temperature": request.temperature,
138
+ "topP": request.top_p,
139
+ **({"maxOutputTokens": request.max_tokens} if request.max_tokens else {}),
140
+ **({"stopSequences": request.stop_sequences} if request.stop_sequences else {}),
141
+ },
142
+ }
143
+ if system_text:
144
+ payload["systemInstruction"] = {"parts": [{"text": system_text}]}
145
+ return payload
146
+
147
+ @property
148
+ def provider_id(self) -> str:
149
+ return "google"
150
+
151
+ async def initialize(self) -> None:
152
+ if not self._api_key:
153
+ raise ProviderAuthenticationError(
154
+ "Google API key not found — set GOOGLE_API_KEY or run: velune config set-key google"
155
+ )
156
+ if not self.client:
157
+ self.client = httpx.AsyncClient(base_url=_BASE_URL, timeout=300.0)
158
+
159
+ async def list_models(self) -> list[ModelDescriptor]:
160
+ return list(_MODELS)
161
+
162
+ async def infer(self, request: InferenceRequest) -> InferenceResponse:
163
+ await self.initialize()
164
+ assert self.client is not None
165
+ start = time.perf_counter()
166
+ payload = self._convert_messages(request)
167
+
168
+ try:
169
+ url = f"/models/{request.model_id}:generateContent"
170
+ resp = await self.client.post(url, json=payload, params={"key": self._api_key})
171
+ resp.raise_for_status()
172
+ data = resp.json()
173
+ latency = (time.perf_counter() - start) * 1000.0
174
+
175
+ candidate = data.get("candidates", [{}])[0]
176
+ text = "".join(p.get("text", "") for p in candidate.get("content", {}).get("parts", []))
177
+ usage = data.get("usageMetadata", {})
178
+ return InferenceResponse(
179
+ content=text,
180
+ model_id=request.model_id,
181
+ finish_reason=(candidate.get("finishReason") or "STOP").lower(),
182
+ tokens_used=usage.get("totalTokenCount", 0),
183
+ latency_ms=latency,
184
+ )
185
+ except httpx.HTTPError as e:
186
+ raise InferenceError(f"Google Gemini inference failed: {e}")
187
+
188
+ async def stream(self, request: InferenceRequest) -> AsyncIterator[StreamChunk]:
189
+ await self.initialize()
190
+ assert self.client is not None
191
+ payload = self._convert_messages(request)
192
+
193
+ try:
194
+ url = f"/models/{request.model_id}:streamGenerateContent"
195
+ params = {"key": self._api_key, "alt": "sse"}
196
+ async with self.client.stream("POST", url, json=payload, params=params) as resp:
197
+ resp.raise_for_status()
198
+ async for line in resp.aiter_lines():
199
+ if not line.startswith("data: "):
200
+ continue
201
+ try:
202
+ data = json.loads(line[6:])
203
+ candidate = data.get("candidates", [{}])[0]
204
+ parts = candidate.get("content", {}).get("parts", [])
205
+ text = "".join(p.get("text", "") for p in parts)
206
+ finish = candidate.get("finishReason")
207
+ yield StreamChunk(
208
+ content=text,
209
+ finish_reason=finish.lower() if finish else None,
210
+ )
211
+ except (json.JSONDecodeError, IndexError):
212
+ continue
213
+ except httpx.HTTPError as e:
214
+ raise InferenceError(f"Google Gemini stream failed: {e}")
215
+
216
+ async def embed(self, texts: list[str], model_id: str) -> list[list[float]]:
217
+ raise NotImplementedError("GoogleProvider does not support embeddings via this adapter.")
218
+
219
+ async def health_check(self) -> ProviderHealth:
220
+ try:
221
+ await self.initialize()
222
+ assert self.client is not None
223
+ resp = await self.client.get("/models", params={"key": self._api_key})
224
+ return ProviderHealth.HEALTHY if resp.status_code == 200 else ProviderHealth.DEGRADED
225
+ except Exception:
226
+ return ProviderHealth.UNAVAILABLE
227
+
228
+ def get_capabilities(self) -> ProviderCapabilities:
229
+ return self._capabilities
230
+
231
+ async def shutdown(self) -> None:
232
+ if self.client:
233
+ await self.client.aclose()
234
+ self.client = None
@@ -0,0 +1,151 @@
1
+ """Groq provider adapter — OpenAI-compatible endpoint, free tier."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from velune.core.types.model import CapabilityLevel, ModelCapabilityProfile, ModelDescriptor
6
+ from velune.core.types.provider import ProviderHealth
7
+ from velune.providers.adapters.openai import OpenAIProvider
8
+ from velune.providers.keystore import get_key, has_key
9
+
10
+ GROQ_MODELS: list[ModelDescriptor] = [
11
+ ModelDescriptor(
12
+ model_id="llama-3.3-70b-versatile",
13
+ provider_id="groq",
14
+ display_name="Llama 3.3 70B Versatile",
15
+ context_length=131072,
16
+ is_local=False,
17
+ free_tier=True,
18
+ cost_per_1k_tokens=0.0,
19
+ speed_tier="fast",
20
+ capabilities=ModelCapabilityProfile(
21
+ coding=CapabilityLevel.ADVANCED,
22
+ reasoning=CapabilityLevel.ADVANCED,
23
+ planning=CapabilityLevel.ADVANCED,
24
+ summarization=CapabilityLevel.EXPERT,
25
+ instruction_following=CapabilityLevel.EXPERT,
26
+ tool_use=CapabilityLevel.ADVANCED,
27
+ long_context=CapabilityLevel.EXPERT,
28
+ ),
29
+ tags=["cloud", "groq", "free", "llama"],
30
+ metadata={"free_tier": True},
31
+ ),
32
+ ModelDescriptor(
33
+ model_id="llama-3.1-8b-instant",
34
+ provider_id="groq",
35
+ display_name="Llama 3.1 8B Instant",
36
+ context_length=131072,
37
+ is_local=False,
38
+ free_tier=True,
39
+ cost_per_1k_tokens=0.0,
40
+ speed_tier="fast",
41
+ capabilities=ModelCapabilityProfile(
42
+ coding=CapabilityLevel.INTERMEDIATE,
43
+ reasoning=CapabilityLevel.INTERMEDIATE,
44
+ planning=CapabilityLevel.INTERMEDIATE,
45
+ summarization=CapabilityLevel.ADVANCED,
46
+ instruction_following=CapabilityLevel.ADVANCED,
47
+ tool_use=CapabilityLevel.INTERMEDIATE,
48
+ long_context=CapabilityLevel.ADVANCED,
49
+ ),
50
+ tags=["cloud", "groq", "free", "llama", "instant"],
51
+ metadata={"free_tier": True},
52
+ ),
53
+ ModelDescriptor(
54
+ model_id="mixtral-8x7b-32768",
55
+ provider_id="groq",
56
+ display_name="Mixtral 8x7B",
57
+ context_length=32768,
58
+ is_local=False,
59
+ free_tier=True,
60
+ cost_per_1k_tokens=0.0,
61
+ speed_tier="fast",
62
+ capabilities=ModelCapabilityProfile(
63
+ coding=CapabilityLevel.ADVANCED,
64
+ reasoning=CapabilityLevel.ADVANCED,
65
+ planning=CapabilityLevel.INTERMEDIATE,
66
+ summarization=CapabilityLevel.ADVANCED,
67
+ instruction_following=CapabilityLevel.ADVANCED,
68
+ tool_use=CapabilityLevel.INTERMEDIATE,
69
+ long_context=CapabilityLevel.INTERMEDIATE,
70
+ ),
71
+ tags=["cloud", "groq", "free", "mixtral"],
72
+ metadata={"free_tier": True},
73
+ ),
74
+ ModelDescriptor(
75
+ model_id="gemma2-9b-it",
76
+ provider_id="groq",
77
+ display_name="Gemma 2 9B Instruct",
78
+ context_length=8192,
79
+ is_local=False,
80
+ free_tier=True,
81
+ cost_per_1k_tokens=0.0,
82
+ speed_tier="fast",
83
+ capabilities=ModelCapabilityProfile(
84
+ coding=CapabilityLevel.INTERMEDIATE,
85
+ reasoning=CapabilityLevel.INTERMEDIATE,
86
+ planning=CapabilityLevel.BASIC,
87
+ summarization=CapabilityLevel.ADVANCED,
88
+ instruction_following=CapabilityLevel.ADVANCED,
89
+ tool_use=CapabilityLevel.BASIC,
90
+ long_context=CapabilityLevel.BASIC,
91
+ ),
92
+ tags=["cloud", "groq", "free", "gemma"],
93
+ metadata={"free_tier": True},
94
+ ),
95
+ ModelDescriptor(
96
+ model_id="llama-3.2-11b-vision-preview",
97
+ provider_id="groq",
98
+ display_name="Llama 3.2 11B Vision",
99
+ context_length=8192,
100
+ is_local=False,
101
+ free_tier=True,
102
+ cost_per_1k_tokens=0.0,
103
+ speed_tier="fast",
104
+ capabilities=ModelCapabilityProfile(
105
+ coding=CapabilityLevel.INTERMEDIATE,
106
+ reasoning=CapabilityLevel.INTERMEDIATE,
107
+ planning=CapabilityLevel.BASIC,
108
+ summarization=CapabilityLevel.INTERMEDIATE,
109
+ instruction_following=CapabilityLevel.ADVANCED,
110
+ tool_use=CapabilityLevel.BASIC,
111
+ long_context=CapabilityLevel.BASIC,
112
+ ),
113
+ tags=["cloud", "groq", "free", "llama", "vision"],
114
+ metadata={"free_tier": True},
115
+ ),
116
+ ]
117
+
118
+
119
+ class GroqProvider(OpenAIProvider):
120
+ """Groq Cloud provider — wire-compatible with the OpenAI chat API.
121
+
122
+ Uses Groq's custom LPU hardware for extremely fast free-tier inference.
123
+ """
124
+
125
+ def __init__(
126
+ self,
127
+ api_key: str | None = None,
128
+ base_url: str = "https://api.groq.com/openai/v1",
129
+ ) -> None:
130
+ super().__init__(api_key=api_key or get_key("groq"), base_url=base_url)
131
+
132
+ @property
133
+ def provider_id(self) -> str:
134
+ return "groq"
135
+
136
+ async def list_models(self) -> list[ModelDescriptor]:
137
+ return GROQ_MODELS
138
+
139
+ async def health_check(self) -> ProviderHealth:
140
+ if not has_key("groq"):
141
+ return ProviderHealth.UNAVAILABLE
142
+ return await super().health_check()
143
+
144
+ def get_provider_info(self) -> dict:
145
+ return {
146
+ "provider_id": "groq",
147
+ "display_name": "Groq",
148
+ "is_free_tier": True,
149
+ "base_url": "https://api.groq.com/openai/v1",
150
+ "note": "Free tier — extremely fast inference via custom LPU hardware",
151
+ }
@@ -0,0 +1,210 @@
1
+ """Hugging Face provider adapter implementation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import time
7
+ from collections.abc import AsyncIterator
8
+
9
+ import httpx
10
+ from pydantic import SecretStr
11
+
12
+ from velune.core.errors.provider import (
13
+ InferenceError,
14
+ ProviderAuthenticationError,
15
+ )
16
+ from velune.core.types.inference import InferenceRequest, InferenceResponse, StreamChunk
17
+ from velune.core.types.model import ModelDescriptor
18
+ from velune.core.types.provider import ProviderCapabilities, ProviderHealth
19
+ from velune.providers.base import ModelProvider
20
+ from velune.providers.keystore import get_key
21
+
22
+
23
+ class HuggingFaceProvider(ModelProvider):
24
+ """Hugging Face provider for serverless Inference API."""
25
+
26
+ def __init__(
27
+ self,
28
+ api_key: str | SecretStr | None = None,
29
+ base_url: str = "https://api-inference.huggingface.co",
30
+ ) -> None:
31
+ self._api_key = api_key or get_key("huggingface")
32
+ if hasattr(self._api_key, "get_secret_value"):
33
+ self._api_key = self._api_key.get_secret_value()
34
+ self._base_url = base_url
35
+ self.client: httpx.AsyncClient | None = None
36
+ self._capabilities = ProviderCapabilities(
37
+ supports_streaming=True,
38
+ supports_function_calling=False,
39
+ supports_embeddings=True,
40
+ max_context_window=32768,
41
+ )
42
+
43
+ @property
44
+ def provider_id(self) -> str:
45
+ return "huggingface"
46
+
47
+ async def initialize(self) -> None:
48
+ """Initialize client headers."""
49
+ if not self._api_key:
50
+ raise ProviderAuthenticationError(
51
+ "Hugging Face API token (HF_TOKEN) not found in environment or config"
52
+ )
53
+ if not self.client:
54
+ headers = {"Authorization": f"Bearer {self._api_key}"}
55
+ self.client = httpx.AsyncClient(base_url=self._base_url, headers=headers, timeout=300.0)
56
+
57
+ async def list_models(self) -> list[ModelDescriptor]:
58
+ """Fetch list of local cached Hugging Face models."""
59
+ from velune.providers.discovery.huggingface import HuggingFaceDiscovery
60
+
61
+ discovery = HuggingFaceDiscovery()
62
+ return await discovery.discover()
63
+
64
+ async def infer(self, request: InferenceRequest) -> InferenceResponse:
65
+ """Query Hugging Face serverless chat completion API."""
66
+ await self.initialize()
67
+ assert self.client is not None
68
+ start = time.perf_counter()
69
+ try:
70
+ # Map standard messages to conversational prompt
71
+ prompt = self._format_messages_to_prompt(request.messages)
72
+
73
+ payload = {
74
+ "inputs": prompt,
75
+ "parameters": {
76
+ "temperature": request.temperature,
77
+ "max_new_tokens": request.max_tokens or 1024,
78
+ "top_p": request.top_p,
79
+ },
80
+ "options": {"wait_for_model": True},
81
+ }
82
+
83
+ model_path = f"/models/{request.model_id}"
84
+ response = await self.client.post(model_path, json=payload)
85
+ response.raise_for_status()
86
+ data = response.json()
87
+ latency = (time.perf_counter() - start) * 1000.0
88
+
89
+ # HF Serverless response formatting varies by model/pipeline type
90
+ content = ""
91
+ if isinstance(data, list) and len(data) > 0:
92
+ content = data[0].get("generated_text", "")
93
+ # Strip the prompt from generation if the model prepends it
94
+ if content.startswith(prompt):
95
+ content = content[len(prompt) :]
96
+ elif isinstance(data, dict):
97
+ content = data.get("generated_text", "")
98
+
99
+ return InferenceResponse(
100
+ content=content.strip(),
101
+ model_id=request.model_id,
102
+ finish_reason="stop",
103
+ tokens_used=0, # HF serverless doesn't return exact token metrics consistently
104
+ latency_ms=latency,
105
+ )
106
+ except httpx.HTTPError as e:
107
+ raise InferenceError(f"Hugging Face Inference completion failed: {e}")
108
+
109
+ async def stream(self, request: InferenceRequest) -> AsyncIterator[StreamChunk]:
110
+ """Stream conversational replies from Serverless Inference API."""
111
+ await self.initialize()
112
+ assert self.client is not None
113
+ try:
114
+ prompt = self._format_messages_to_prompt(request.messages)
115
+ payload = {
116
+ "inputs": prompt,
117
+ "parameters": {
118
+ "temperature": request.temperature,
119
+ "max_new_tokens": request.max_tokens or 1024,
120
+ "top_p": request.top_p,
121
+ },
122
+ "options": {"wait_for_model": True},
123
+ "stream": True,
124
+ }
125
+
126
+ model_path = f"/models/{request.model_id}"
127
+ async with self.client.stream("POST", model_path, json=payload) as response:
128
+ response.raise_for_status()
129
+ # Serverless stream format is line-delimited SSE chunks
130
+ async for line in response.aiter_lines():
131
+ if line.startswith("data:"):
132
+ try:
133
+ chunk_data = json.loads(line[5:])
134
+ token_text = chunk_data.get("token", {}).get("text", "")
135
+ yield StreamChunk(
136
+ content=token_text,
137
+ finish_reason="stop"
138
+ if chunk_data.get("token", {}).get("special", False)
139
+ else None,
140
+ )
141
+ except Exception:
142
+ continue
143
+ except httpx.HTTPError as e:
144
+ raise InferenceError(f"Hugging Face Inference streaming failed: {e}")
145
+
146
+ async def embed(self, texts: list[str], model_id: str) -> list[list[float]]:
147
+ """Batch embeddings generation using HF feature-extraction pipeline."""
148
+ await self.initialize()
149
+ assert self.client is not None
150
+ try:
151
+ model_path = f"/models/{model_id}"
152
+ response = await self.client.post(
153
+ model_path, json={"inputs": texts, "options": {"wait_for_model": True}}
154
+ )
155
+ response.raise_for_status()
156
+ embeddings = response.json()
157
+
158
+ # Embeddings could be 1D or 2D/3D depending on token poolings. Ensure we return 2D floats.
159
+ if isinstance(embeddings, list) and len(embeddings) > 0:
160
+ if isinstance(embeddings[0], list):
161
+ # Check if it has token-level embeddings or pooled
162
+ if isinstance(embeddings[0][0], list):
163
+ # Simple average pooling for token embeddings
164
+ pooled = []
165
+ for seq in embeddings:
166
+ avg = [sum(col) / len(seq) for col in zip(*seq, strict=False)]
167
+ pooled.append(avg)
168
+ return pooled
169
+ return embeddings
170
+ # Single sequence 1D returned, wrap in list
171
+ return [embeddings]
172
+ raise ValueError("Invalid embedding response structure from HF Inference API")
173
+ except httpx.HTTPError as e:
174
+ raise InferenceError(f"Hugging Face embedding failed: {e}")
175
+
176
+ def _format_messages_to_prompt(self, messages: list[dict]) -> str:
177
+ """Utility to stitch general messages into standard chat-template prompt representation."""
178
+ prompt = ""
179
+ for msg in messages:
180
+ role = msg.get("role", "user")
181
+ content = msg.get("content", "")
182
+ if role == "system":
183
+ prompt += f"<|system|>\n{content}</s>\n"
184
+ elif role == "user":
185
+ prompt += f"<|user|>\n{content}</s>\n"
186
+ else:
187
+ prompt += f"<|assistant|>\n{content}</s>\n"
188
+ prompt += "<|assistant|>\n"
189
+ return prompt
190
+
191
+ async def health_check(self) -> ProviderHealth:
192
+ """Query HF API viability."""
193
+ try:
194
+ await self.initialize()
195
+ assert self.client is not None
196
+ # Fetch meta details for standard model to verify connection
197
+ resp = await self.client.get("/models/gpt2")
198
+ if resp.status_code == 200:
199
+ return ProviderHealth.HEALTHY
200
+ return ProviderHealth.DEGRADED
201
+ except Exception:
202
+ return ProviderHealth.UNAVAILABLE
203
+
204
+ def get_capabilities(self) -> ProviderCapabilities:
205
+ return self._capabilities
206
+
207
+ async def shutdown(self) -> None:
208
+ if self.client:
209
+ await self.client.aclose()
210
+ self.client = None