velune-cli 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- velune/__init__.py +5 -0
- velune/__main__.py +6 -0
- velune/cli/__init__.py +5 -0
- velune/cli/app.py +208 -0
- velune/cli/autocomplete.py +80 -0
- velune/cli/banner.py +60 -0
- velune/cli/commands/__init__.py +32 -0
- velune/cli/commands/ask.py +175 -0
- velune/cli/commands/base.py +16 -0
- velune/cli/commands/chat.py +228 -0
- velune/cli/commands/config.py +224 -0
- velune/cli/commands/daemon.py +88 -0
- velune/cli/commands/doctor.py +721 -0
- velune/cli/commands/init.py +170 -0
- velune/cli/commands/mcp.py +82 -0
- velune/cli/commands/memory.py +293 -0
- velune/cli/commands/models.py +683 -0
- velune/cli/commands/preflight.py +95 -0
- velune/cli/commands/run.py +270 -0
- velune/cli/commands/setup.py +184 -0
- velune/cli/commands/workspace.py +249 -0
- velune/cli/context.py +36 -0
- velune/cli/councilmodel_ui.py +199 -0
- velune/cli/display/council_view.py +254 -0
- velune/cli/display/memory_view.py +126 -0
- velune/cli/display/panels.py +35 -0
- velune/cli/display/progress.py +25 -0
- velune/cli/display/themes.py +25 -0
- velune/cli/main.py +15 -0
- velune/cli/model_selector.py +51 -0
- velune/cli/modes.py +86 -0
- velune/cli/pull_ui.py +123 -0
- velune/cli/registry.py +80 -0
- velune/cli/rendering/__init__.py +5 -0
- velune/cli/rendering/error_panel.py +79 -0
- velune/cli/rendering/markdown.py +63 -0
- velune/cli/repl.py +1855 -0
- velune/cli/session_manager.py +71 -0
- velune/cli/slash_commands.py +37 -0
- velune/cli/theme.py +8 -0
- velune/cognition/__init__.py +23 -0
- velune/cognition/agents/__init__.py +7 -0
- velune/cognition/agents/coder.py +209 -0
- velune/cognition/agents/planner.py +156 -0
- velune/cognition/agents/reviewer.py +195 -0
- velune/cognition/arbitrator.py +220 -0
- velune/cognition/architecture.py +415 -0
- velune/cognition/budget.py +65 -0
- velune/cognition/council/__init__.py +47 -0
- velune/cognition/council/base.py +217 -0
- velune/cognition/council/challenger.py +74 -0
- velune/cognition/council/coder.py +79 -0
- velune/cognition/council/critic_agent.py +43 -0
- velune/cognition/council/critic_configs.py +111 -0
- velune/cognition/council/critics.py +41 -0
- velune/cognition/council/debate.py +46 -0
- velune/cognition/council/factory.py +140 -0
- velune/cognition/council/messages.py +56 -0
- velune/cognition/council/planner.py +124 -0
- velune/cognition/council/reviewer.py +74 -0
- velune/cognition/council/synthesizer.py +67 -0
- velune/cognition/council/tiers.py +188 -0
- velune/cognition/council_orchestrator.py +282 -0
- velune/cognition/firewall.py +354 -0
- velune/cognition/module.py +46 -0
- velune/cognition/orchestrator.py +1205 -0
- velune/cognition/personality.py +238 -0
- velune/cognition/state.py +104 -0
- velune/cognition/style_resolver.py +64 -0
- velune/cognition/verification.py +205 -0
- velune/context/__init__.py +28 -0
- velune/context/assembler.py +240 -0
- velune/context/budget.py +97 -0
- velune/context/extractive.py +95 -0
- velune/context/prompt_adaptation.py +480 -0
- velune/context/sections.py +99 -0
- velune/context/token_counter.py +134 -0
- velune/context/utilization.py +33 -0
- velune/context/window.py +63 -0
- velune/core/__init__.py +89 -0
- velune/core/background.py +5 -0
- velune/core/config/__init__.py +37 -0
- velune/core/errors/__init__.py +90 -0
- velune/core/errors/catalog.py +188 -0
- velune/core/errors/execution.py +31 -0
- velune/core/errors/memory.py +25 -0
- velune/core/errors/orchestration.py +31 -0
- velune/core/errors/provider.py +37 -0
- velune/core/event_loop.py +35 -0
- velune/core/logging.py +83 -0
- velune/core/paths.py +165 -0
- velune/core/runtime.py +113 -0
- velune/core/startup_profiler.py +56 -0
- velune/core/task_registry.py +117 -0
- velune/core/trace.py +83 -0
- velune/core/types/__init__.py +48 -0
- velune/core/types/agent.py +53 -0
- velune/core/types/context.py +42 -0
- velune/core/types/inference.py +38 -0
- velune/core/types/memory.py +42 -0
- velune/core/types/model.py +70 -0
- velune/core/types/provider.py +62 -0
- velune/core/types/repository.py +38 -0
- velune/core/types/task.py +61 -0
- velune/core/types/workspace.py +28 -0
- velune/daemon/client.py +13 -0
- velune/daemon/server.py +127 -0
- velune/daemon/transport.py +179 -0
- velune/events.py +204 -0
- velune/execution/__init__.py +22 -0
- velune/execution/benchmarker.py +315 -0
- velune/execution/cancellation.py +53 -0
- velune/execution/checkpointer.py +130 -0
- velune/execution/command_spec.py +165 -0
- velune/execution/diff_preview.py +197 -0
- velune/execution/executor.py +181 -0
- velune/execution/module.py +18 -0
- velune/execution/multi_diff.py +67 -0
- velune/execution/path_guard.py +74 -0
- velune/execution/planner.py +91 -0
- velune/execution/rollback.py +89 -0
- velune/execution/sandbox.py +268 -0
- velune/execution/validator.py +115 -0
- velune/hardware/__init__.py +1 -0
- velune/hardware/detector.py +192 -0
- velune/kernel/__init__.py +55 -0
- velune/kernel/bootstrap.py +125 -0
- velune/kernel/config.py +426 -0
- velune/kernel/entrypoint.py +78 -0
- velune/kernel/health.py +54 -0
- velune/kernel/lifecycle.py +143 -0
- velune/kernel/module.py +17 -0
- velune/kernel/modules.py +23 -0
- velune/kernel/registry.py +96 -0
- velune/kernel/schemas.py +28 -0
- velune/main.py +9 -0
- velune/mcp/__init__.py +9 -0
- velune/mcp/client.py +115 -0
- velune/mcp/config.py +19 -0
- velune/mcp/server.py +624 -0
- velune/memory/__init__.py +32 -0
- velune/memory/compaction.py +506 -0
- velune/memory/embedding_pipeline.py +241 -0
- velune/memory/lifecycle.py +680 -0
- velune/memory/module.py +218 -0
- velune/memory/prioritizer.py +67 -0
- velune/memory/storage/episodic_schema.sql +53 -0
- velune/memory/storage/lancedb_store.py +282 -0
- velune/memory/storage/sqlite_manager.py +369 -0
- velune/memory/storage/sqlite_pool.py +149 -0
- velune/memory/tiers/episodic.py +588 -0
- velune/memory/tiers/graph.py +378 -0
- velune/memory/tiers/lineage.py +416 -0
- velune/memory/tiers/semantic.py +475 -0
- velune/memory/tiers/working.py +168 -0
- velune/memory/vitality.py +132 -0
- velune/models/__init__.py +15 -0
- velune/models/family.py +76 -0
- velune/models/module.py +20 -0
- velune/models/probes.py +192 -0
- velune/models/profile_cache.py +84 -0
- velune/models/profiler.py +108 -0
- velune/models/registry.py +251 -0
- velune/models/scorer.py +233 -0
- velune/models/specializations.py +205 -0
- velune/orchestration/__init__.py +19 -0
- velune/orchestration/engine.py +239 -0
- velune/orchestration/module.py +15 -0
- velune/orchestration/role_assignments.py +82 -0
- velune/orchestration/schemas.py +98 -0
- velune/plugins/__init__.py +20 -0
- velune/plugins/hooks.py +50 -0
- velune/plugins/loader.py +161 -0
- velune/plugins/registry.py +56 -0
- velune/plugins/schemas.py +21 -0
- velune/providers/__init__.py +23 -0
- velune/providers/adapters/anthropic.py +257 -0
- velune/providers/adapters/fireworks.py +115 -0
- velune/providers/adapters/google.py +234 -0
- velune/providers/adapters/groq.py +151 -0
- velune/providers/adapters/huggingface.py +210 -0
- velune/providers/adapters/llamacpp.py +208 -0
- velune/providers/adapters/lmstudio.py +175 -0
- velune/providers/adapters/ollama.py +233 -0
- velune/providers/adapters/openai.py +213 -0
- velune/providers/adapters/openrouter.py +81 -0
- velune/providers/adapters/together.py +134 -0
- velune/providers/adapters/xai.py +60 -0
- velune/providers/base.py +86 -0
- velune/providers/benchmarker.py +138 -0
- velune/providers/discovery/__init__.py +33 -0
- velune/providers/discovery/anthropic.py +79 -0
- velune/providers/discovery/benchmarks.py +44 -0
- velune/providers/discovery/classifier.py +69 -0
- velune/providers/discovery/fireworks.py +95 -0
- velune/providers/discovery/gguf.py +88 -0
- velune/providers/discovery/google.py +95 -0
- velune/providers/discovery/gpu.py +117 -0
- velune/providers/discovery/groq.py +21 -0
- velune/providers/discovery/huggingface.py +67 -0
- velune/providers/discovery/lmstudio.py +80 -0
- velune/providers/discovery/ollama.py +162 -0
- velune/providers/discovery/openai.py +96 -0
- velune/providers/discovery/openrouter.py +113 -0
- velune/providers/discovery/scanner.py +115 -0
- velune/providers/discovery/together.py +114 -0
- velune/providers/discovery/xai.py +57 -0
- velune/providers/health.py +67 -0
- velune/providers/health_monitor.py +169 -0
- velune/providers/keystore.py +142 -0
- velune/providers/local_paths.py +49 -0
- velune/providers/local_resolver.py +229 -0
- velune/providers/module.py +51 -0
- velune/providers/ollama_manager.py +193 -0
- velune/providers/registry.py +220 -0
- velune/providers/router.py +255 -0
- velune/providers/task_classifier.py +288 -0
- velune/py.typed +0 -0
- velune/repository/__init__.py +33 -0
- velune/repository/analyzer.py +127 -0
- velune/repository/ast_parser.py +822 -0
- velune/repository/blast_radius.py +298 -0
- velune/repository/boundary_classifier.py +295 -0
- velune/repository/cognition.py +316 -0
- velune/repository/grapher.py +179 -0
- velune/repository/import_graph.py +263 -0
- velune/repository/incremental_indexer.py +275 -0
- velune/repository/index_state.py +96 -0
- velune/repository/indexer.py +243 -0
- velune/repository/module.py +17 -0
- velune/repository/parser.py +474 -0
- velune/repository/project_type.py +300 -0
- velune/repository/rename_journal.py +287 -0
- velune/repository/scanner.py +193 -0
- velune/repository/schemas.py +102 -0
- velune/repository/symbol_registry.py +365 -0
- velune/repository/tracker.py +252 -0
- velune/retrieval/__init__.py +27 -0
- velune/retrieval/cache.py +110 -0
- velune/retrieval/fast_path.py +391 -0
- velune/retrieval/graph.py +124 -0
- velune/retrieval/hybrid.py +271 -0
- velune/retrieval/keyword.py +131 -0
- velune/retrieval/module.py +26 -0
- velune/retrieval/pipeline.py +303 -0
- velune/retrieval/reranker.py +102 -0
- velune/retrieval/schemas.py +59 -0
- velune/retrieval/slow_path.py +364 -0
- velune/retrieval/vector.py +203 -0
- velune/telemetry/__init__.py +59 -0
- velune/telemetry/cognition.py +267 -0
- velune/telemetry/cost_estimator.py +92 -0
- velune/telemetry/debug.py +304 -0
- velune/telemetry/doctor.py +244 -0
- velune/telemetry/logging.py +286 -0
- velune/telemetry/spans.py +277 -0
- velune/telemetry/token_tracker.py +140 -0
- velune/telemetry/usage_tracker.py +340 -0
- velune/tools/__init__.py +41 -0
- velune/tools/base/registry.py +87 -0
- velune/tools/base/tool.py +63 -0
- velune/tools/code/navigate.py +116 -0
- velune/tools/code/search.py +123 -0
- velune/tools/filesystem/read.py +75 -0
- velune/tools/filesystem/search.py +136 -0
- velune/tools/filesystem/write.py +163 -0
- velune/tools/git/history.py +177 -0
- velune/tools/git/operations.py +122 -0
- velune/tools/git/state.py +121 -0
- velune/tools/module.py +81 -0
- velune/tools/terminal/execute.py +72 -0
- velune/tools/terminal/history.py +47 -0
- velune/tools/web/fetch.py +55 -0
- velune/tools/web/validator.py +122 -0
- velune_cli-0.9.0.dist-info/METADATA +518 -0
- velune_cli-0.9.0.dist-info/RECORD +279 -0
- velune_cli-0.9.0.dist-info/WHEEL +4 -0
- velune_cli-0.9.0.dist-info/entry_points.txt +2 -0
- velune_cli-0.9.0.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
"""Google Gemini provider adapter — Generative Language REST API."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import time
|
|
7
|
+
from collections.abc import AsyncIterator
|
|
8
|
+
|
|
9
|
+
import httpx
|
|
10
|
+
|
|
11
|
+
from velune.core.errors.provider import InferenceError, ProviderAuthenticationError
|
|
12
|
+
from velune.core.types.inference import InferenceRequest, InferenceResponse, StreamChunk
|
|
13
|
+
from velune.core.types.model import CapabilityLevel, ModelDescriptor
|
|
14
|
+
from velune.core.types.provider import ProviderCapabilities, ProviderHealth
|
|
15
|
+
from velune.providers.base import ModelProvider
|
|
16
|
+
from velune.providers.keystore import get_key
|
|
17
|
+
|
|
18
|
+
_BASE_URL = "https://generativelanguage.googleapis.com/v1beta"
|
|
19
|
+
|
|
20
|
+
_MODELS = [
|
|
21
|
+
ModelDescriptor(
|
|
22
|
+
model_id="gemini-2.0-flash",
|
|
23
|
+
display_name="Gemini 2.0 Flash",
|
|
24
|
+
provider_id="google",
|
|
25
|
+
context_length=1048576,
|
|
26
|
+
capabilities={
|
|
27
|
+
"coding": CapabilityLevel.ADVANCED,
|
|
28
|
+
"reasoning": CapabilityLevel.ADVANCED,
|
|
29
|
+
"planning": CapabilityLevel.ADVANCED,
|
|
30
|
+
"summarization": CapabilityLevel.EXPERT,
|
|
31
|
+
"instruction_following": CapabilityLevel.EXPERT,
|
|
32
|
+
"tool_use": CapabilityLevel.ADVANCED,
|
|
33
|
+
"long_context": CapabilityLevel.EXPERT,
|
|
34
|
+
},
|
|
35
|
+
is_local=False,
|
|
36
|
+
speed_tier="fast",
|
|
37
|
+
cost_per_1k_tokens=0.000075,
|
|
38
|
+
tags=["cloud", "google", "flash", "free"],
|
|
39
|
+
),
|
|
40
|
+
ModelDescriptor(
|
|
41
|
+
model_id="gemini-1.5-pro",
|
|
42
|
+
display_name="Gemini 1.5 Pro",
|
|
43
|
+
provider_id="google",
|
|
44
|
+
context_length=2097152,
|
|
45
|
+
capabilities={
|
|
46
|
+
"coding": CapabilityLevel.EXPERT,
|
|
47
|
+
"reasoning": CapabilityLevel.EXPERT,
|
|
48
|
+
"planning": CapabilityLevel.EXPERT,
|
|
49
|
+
"summarization": CapabilityLevel.EXPERT,
|
|
50
|
+
"instruction_following": CapabilityLevel.EXPERT,
|
|
51
|
+
"tool_use": CapabilityLevel.EXPERT,
|
|
52
|
+
"long_context": CapabilityLevel.EXPERT,
|
|
53
|
+
},
|
|
54
|
+
is_local=False,
|
|
55
|
+
speed_tier="medium",
|
|
56
|
+
cost_per_1k_tokens=0.00125,
|
|
57
|
+
tags=["cloud", "google", "pro"],
|
|
58
|
+
),
|
|
59
|
+
ModelDescriptor(
|
|
60
|
+
model_id="gemini-1.5-flash",
|
|
61
|
+
display_name="Gemini 1.5 Flash",
|
|
62
|
+
provider_id="google",
|
|
63
|
+
context_length=1048576,
|
|
64
|
+
capabilities={
|
|
65
|
+
"coding": CapabilityLevel.ADVANCED,
|
|
66
|
+
"reasoning": CapabilityLevel.ADVANCED,
|
|
67
|
+
"planning": CapabilityLevel.INTERMEDIATE,
|
|
68
|
+
"summarization": CapabilityLevel.ADVANCED,
|
|
69
|
+
"instruction_following": CapabilityLevel.EXPERT,
|
|
70
|
+
"tool_use": CapabilityLevel.ADVANCED,
|
|
71
|
+
"long_context": CapabilityLevel.EXPERT,
|
|
72
|
+
},
|
|
73
|
+
is_local=False,
|
|
74
|
+
speed_tier="fast",
|
|
75
|
+
cost_per_1k_tokens=0.000075,
|
|
76
|
+
tags=["cloud", "google", "flash", "free"],
|
|
77
|
+
),
|
|
78
|
+
ModelDescriptor(
|
|
79
|
+
model_id="gemini-2.0-flash-thinking-exp",
|
|
80
|
+
display_name="Gemini 2.0 Flash Thinking",
|
|
81
|
+
provider_id="google",
|
|
82
|
+
context_length=32767,
|
|
83
|
+
capabilities={
|
|
84
|
+
"coding": CapabilityLevel.EXPERT,
|
|
85
|
+
"reasoning": CapabilityLevel.EXPERT,
|
|
86
|
+
"planning": CapabilityLevel.EXPERT,
|
|
87
|
+
"summarization": CapabilityLevel.ADVANCED,
|
|
88
|
+
"instruction_following": CapabilityLevel.EXPERT,
|
|
89
|
+
"tool_use": CapabilityLevel.ADVANCED,
|
|
90
|
+
"long_context": CapabilityLevel.INTERMEDIATE,
|
|
91
|
+
},
|
|
92
|
+
is_local=False,
|
|
93
|
+
speed_tier="medium",
|
|
94
|
+
cost_per_1k_tokens=0.0,
|
|
95
|
+
tags=["cloud", "google", "thinking", "free"],
|
|
96
|
+
),
|
|
97
|
+
]
|
|
98
|
+
|
|
99
|
+
# Public alias used by tests and tooling
|
|
100
|
+
GEMINI_MODELS = _MODELS
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _build_contents(messages: list[dict]) -> tuple[list[dict], str]:
|
|
104
|
+
"""Split messages into Gemini *contents* + system instruction text."""
|
|
105
|
+
system_parts: list[str] = []
|
|
106
|
+
contents: list[dict] = []
|
|
107
|
+
for msg in messages:
|
|
108
|
+
role = msg.get("role", "user")
|
|
109
|
+
text = msg.get("content", "")
|
|
110
|
+
if role == "system":
|
|
111
|
+
system_parts.append(text)
|
|
112
|
+
else:
|
|
113
|
+
gemini_role = "model" if role == "assistant" else "user"
|
|
114
|
+
contents.append({"role": gemini_role, "parts": [{"text": text}]})
|
|
115
|
+
return contents, "\n".join(system_parts)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class GoogleProvider(ModelProvider):
|
|
119
|
+
"""Google Gemini provider using the Generative Language REST API."""
|
|
120
|
+
|
|
121
|
+
def __init__(self, api_key: str | None = None) -> None:
|
|
122
|
+
self._api_key = api_key or get_key("google")
|
|
123
|
+
self.client: httpx.AsyncClient | None = None
|
|
124
|
+
self._capabilities = ProviderCapabilities(
|
|
125
|
+
supports_streaming=True,
|
|
126
|
+
supports_function_calling=True,
|
|
127
|
+
supports_embeddings=False,
|
|
128
|
+
max_context_window=2097152,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
def _convert_messages(self, request: InferenceRequest) -> dict:
|
|
132
|
+
"""Build the full Gemini REST payload from an InferenceRequest."""
|
|
133
|
+
contents, system_text = _build_contents(request.messages)
|
|
134
|
+
payload: dict = {
|
|
135
|
+
"contents": contents,
|
|
136
|
+
"generationConfig": {
|
|
137
|
+
"temperature": request.temperature,
|
|
138
|
+
"topP": request.top_p,
|
|
139
|
+
**({"maxOutputTokens": request.max_tokens} if request.max_tokens else {}),
|
|
140
|
+
**({"stopSequences": request.stop_sequences} if request.stop_sequences else {}),
|
|
141
|
+
},
|
|
142
|
+
}
|
|
143
|
+
if system_text:
|
|
144
|
+
payload["systemInstruction"] = {"parts": [{"text": system_text}]}
|
|
145
|
+
return payload
|
|
146
|
+
|
|
147
|
+
@property
|
|
148
|
+
def provider_id(self) -> str:
|
|
149
|
+
return "google"
|
|
150
|
+
|
|
151
|
+
async def initialize(self) -> None:
|
|
152
|
+
if not self._api_key:
|
|
153
|
+
raise ProviderAuthenticationError(
|
|
154
|
+
"Google API key not found — set GOOGLE_API_KEY or run: velune config set-key google"
|
|
155
|
+
)
|
|
156
|
+
if not self.client:
|
|
157
|
+
self.client = httpx.AsyncClient(base_url=_BASE_URL, timeout=300.0)
|
|
158
|
+
|
|
159
|
+
async def list_models(self) -> list[ModelDescriptor]:
|
|
160
|
+
return list(_MODELS)
|
|
161
|
+
|
|
162
|
+
async def infer(self, request: InferenceRequest) -> InferenceResponse:
|
|
163
|
+
await self.initialize()
|
|
164
|
+
assert self.client is not None
|
|
165
|
+
start = time.perf_counter()
|
|
166
|
+
payload = self._convert_messages(request)
|
|
167
|
+
|
|
168
|
+
try:
|
|
169
|
+
url = f"/models/{request.model_id}:generateContent"
|
|
170
|
+
resp = await self.client.post(url, json=payload, params={"key": self._api_key})
|
|
171
|
+
resp.raise_for_status()
|
|
172
|
+
data = resp.json()
|
|
173
|
+
latency = (time.perf_counter() - start) * 1000.0
|
|
174
|
+
|
|
175
|
+
candidate = data.get("candidates", [{}])[0]
|
|
176
|
+
text = "".join(p.get("text", "") for p in candidate.get("content", {}).get("parts", []))
|
|
177
|
+
usage = data.get("usageMetadata", {})
|
|
178
|
+
return InferenceResponse(
|
|
179
|
+
content=text,
|
|
180
|
+
model_id=request.model_id,
|
|
181
|
+
finish_reason=(candidate.get("finishReason") or "STOP").lower(),
|
|
182
|
+
tokens_used=usage.get("totalTokenCount", 0),
|
|
183
|
+
latency_ms=latency,
|
|
184
|
+
)
|
|
185
|
+
except httpx.HTTPError as e:
|
|
186
|
+
raise InferenceError(f"Google Gemini inference failed: {e}")
|
|
187
|
+
|
|
188
|
+
async def stream(self, request: InferenceRequest) -> AsyncIterator[StreamChunk]:
|
|
189
|
+
await self.initialize()
|
|
190
|
+
assert self.client is not None
|
|
191
|
+
payload = self._convert_messages(request)
|
|
192
|
+
|
|
193
|
+
try:
|
|
194
|
+
url = f"/models/{request.model_id}:streamGenerateContent"
|
|
195
|
+
params = {"key": self._api_key, "alt": "sse"}
|
|
196
|
+
async with self.client.stream("POST", url, json=payload, params=params) as resp:
|
|
197
|
+
resp.raise_for_status()
|
|
198
|
+
async for line in resp.aiter_lines():
|
|
199
|
+
if not line.startswith("data: "):
|
|
200
|
+
continue
|
|
201
|
+
try:
|
|
202
|
+
data = json.loads(line[6:])
|
|
203
|
+
candidate = data.get("candidates", [{}])[0]
|
|
204
|
+
parts = candidate.get("content", {}).get("parts", [])
|
|
205
|
+
text = "".join(p.get("text", "") for p in parts)
|
|
206
|
+
finish = candidate.get("finishReason")
|
|
207
|
+
yield StreamChunk(
|
|
208
|
+
content=text,
|
|
209
|
+
finish_reason=finish.lower() if finish else None,
|
|
210
|
+
)
|
|
211
|
+
except (json.JSONDecodeError, IndexError):
|
|
212
|
+
continue
|
|
213
|
+
except httpx.HTTPError as e:
|
|
214
|
+
raise InferenceError(f"Google Gemini stream failed: {e}")
|
|
215
|
+
|
|
216
|
+
async def embed(self, texts: list[str], model_id: str) -> list[list[float]]:
|
|
217
|
+
raise NotImplementedError("GoogleProvider does not support embeddings via this adapter.")
|
|
218
|
+
|
|
219
|
+
async def health_check(self) -> ProviderHealth:
|
|
220
|
+
try:
|
|
221
|
+
await self.initialize()
|
|
222
|
+
assert self.client is not None
|
|
223
|
+
resp = await self.client.get("/models", params={"key": self._api_key})
|
|
224
|
+
return ProviderHealth.HEALTHY if resp.status_code == 200 else ProviderHealth.DEGRADED
|
|
225
|
+
except Exception:
|
|
226
|
+
return ProviderHealth.UNAVAILABLE
|
|
227
|
+
|
|
228
|
+
def get_capabilities(self) -> ProviderCapabilities:
|
|
229
|
+
return self._capabilities
|
|
230
|
+
|
|
231
|
+
async def shutdown(self) -> None:
|
|
232
|
+
if self.client:
|
|
233
|
+
await self.client.aclose()
|
|
234
|
+
self.client = None
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
"""Groq provider adapter — OpenAI-compatible endpoint, free tier."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from velune.core.types.model import CapabilityLevel, ModelCapabilityProfile, ModelDescriptor
|
|
6
|
+
from velune.core.types.provider import ProviderHealth
|
|
7
|
+
from velune.providers.adapters.openai import OpenAIProvider
|
|
8
|
+
from velune.providers.keystore import get_key, has_key
|
|
9
|
+
|
|
10
|
+
GROQ_MODELS: list[ModelDescriptor] = [
|
|
11
|
+
ModelDescriptor(
|
|
12
|
+
model_id="llama-3.3-70b-versatile",
|
|
13
|
+
provider_id="groq",
|
|
14
|
+
display_name="Llama 3.3 70B Versatile",
|
|
15
|
+
context_length=131072,
|
|
16
|
+
is_local=False,
|
|
17
|
+
free_tier=True,
|
|
18
|
+
cost_per_1k_tokens=0.0,
|
|
19
|
+
speed_tier="fast",
|
|
20
|
+
capabilities=ModelCapabilityProfile(
|
|
21
|
+
coding=CapabilityLevel.ADVANCED,
|
|
22
|
+
reasoning=CapabilityLevel.ADVANCED,
|
|
23
|
+
planning=CapabilityLevel.ADVANCED,
|
|
24
|
+
summarization=CapabilityLevel.EXPERT,
|
|
25
|
+
instruction_following=CapabilityLevel.EXPERT,
|
|
26
|
+
tool_use=CapabilityLevel.ADVANCED,
|
|
27
|
+
long_context=CapabilityLevel.EXPERT,
|
|
28
|
+
),
|
|
29
|
+
tags=["cloud", "groq", "free", "llama"],
|
|
30
|
+
metadata={"free_tier": True},
|
|
31
|
+
),
|
|
32
|
+
ModelDescriptor(
|
|
33
|
+
model_id="llama-3.1-8b-instant",
|
|
34
|
+
provider_id="groq",
|
|
35
|
+
display_name="Llama 3.1 8B Instant",
|
|
36
|
+
context_length=131072,
|
|
37
|
+
is_local=False,
|
|
38
|
+
free_tier=True,
|
|
39
|
+
cost_per_1k_tokens=0.0,
|
|
40
|
+
speed_tier="fast",
|
|
41
|
+
capabilities=ModelCapabilityProfile(
|
|
42
|
+
coding=CapabilityLevel.INTERMEDIATE,
|
|
43
|
+
reasoning=CapabilityLevel.INTERMEDIATE,
|
|
44
|
+
planning=CapabilityLevel.INTERMEDIATE,
|
|
45
|
+
summarization=CapabilityLevel.ADVANCED,
|
|
46
|
+
instruction_following=CapabilityLevel.ADVANCED,
|
|
47
|
+
tool_use=CapabilityLevel.INTERMEDIATE,
|
|
48
|
+
long_context=CapabilityLevel.ADVANCED,
|
|
49
|
+
),
|
|
50
|
+
tags=["cloud", "groq", "free", "llama", "instant"],
|
|
51
|
+
metadata={"free_tier": True},
|
|
52
|
+
),
|
|
53
|
+
ModelDescriptor(
|
|
54
|
+
model_id="mixtral-8x7b-32768",
|
|
55
|
+
provider_id="groq",
|
|
56
|
+
display_name="Mixtral 8x7B",
|
|
57
|
+
context_length=32768,
|
|
58
|
+
is_local=False,
|
|
59
|
+
free_tier=True,
|
|
60
|
+
cost_per_1k_tokens=0.0,
|
|
61
|
+
speed_tier="fast",
|
|
62
|
+
capabilities=ModelCapabilityProfile(
|
|
63
|
+
coding=CapabilityLevel.ADVANCED,
|
|
64
|
+
reasoning=CapabilityLevel.ADVANCED,
|
|
65
|
+
planning=CapabilityLevel.INTERMEDIATE,
|
|
66
|
+
summarization=CapabilityLevel.ADVANCED,
|
|
67
|
+
instruction_following=CapabilityLevel.ADVANCED,
|
|
68
|
+
tool_use=CapabilityLevel.INTERMEDIATE,
|
|
69
|
+
long_context=CapabilityLevel.INTERMEDIATE,
|
|
70
|
+
),
|
|
71
|
+
tags=["cloud", "groq", "free", "mixtral"],
|
|
72
|
+
metadata={"free_tier": True},
|
|
73
|
+
),
|
|
74
|
+
ModelDescriptor(
|
|
75
|
+
model_id="gemma2-9b-it",
|
|
76
|
+
provider_id="groq",
|
|
77
|
+
display_name="Gemma 2 9B Instruct",
|
|
78
|
+
context_length=8192,
|
|
79
|
+
is_local=False,
|
|
80
|
+
free_tier=True,
|
|
81
|
+
cost_per_1k_tokens=0.0,
|
|
82
|
+
speed_tier="fast",
|
|
83
|
+
capabilities=ModelCapabilityProfile(
|
|
84
|
+
coding=CapabilityLevel.INTERMEDIATE,
|
|
85
|
+
reasoning=CapabilityLevel.INTERMEDIATE,
|
|
86
|
+
planning=CapabilityLevel.BASIC,
|
|
87
|
+
summarization=CapabilityLevel.ADVANCED,
|
|
88
|
+
instruction_following=CapabilityLevel.ADVANCED,
|
|
89
|
+
tool_use=CapabilityLevel.BASIC,
|
|
90
|
+
long_context=CapabilityLevel.BASIC,
|
|
91
|
+
),
|
|
92
|
+
tags=["cloud", "groq", "free", "gemma"],
|
|
93
|
+
metadata={"free_tier": True},
|
|
94
|
+
),
|
|
95
|
+
ModelDescriptor(
|
|
96
|
+
model_id="llama-3.2-11b-vision-preview",
|
|
97
|
+
provider_id="groq",
|
|
98
|
+
display_name="Llama 3.2 11B Vision",
|
|
99
|
+
context_length=8192,
|
|
100
|
+
is_local=False,
|
|
101
|
+
free_tier=True,
|
|
102
|
+
cost_per_1k_tokens=0.0,
|
|
103
|
+
speed_tier="fast",
|
|
104
|
+
capabilities=ModelCapabilityProfile(
|
|
105
|
+
coding=CapabilityLevel.INTERMEDIATE,
|
|
106
|
+
reasoning=CapabilityLevel.INTERMEDIATE,
|
|
107
|
+
planning=CapabilityLevel.BASIC,
|
|
108
|
+
summarization=CapabilityLevel.INTERMEDIATE,
|
|
109
|
+
instruction_following=CapabilityLevel.ADVANCED,
|
|
110
|
+
tool_use=CapabilityLevel.BASIC,
|
|
111
|
+
long_context=CapabilityLevel.BASIC,
|
|
112
|
+
),
|
|
113
|
+
tags=["cloud", "groq", "free", "llama", "vision"],
|
|
114
|
+
metadata={"free_tier": True},
|
|
115
|
+
),
|
|
116
|
+
]
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class GroqProvider(OpenAIProvider):
|
|
120
|
+
"""Groq Cloud provider — wire-compatible with the OpenAI chat API.
|
|
121
|
+
|
|
122
|
+
Uses Groq's custom LPU hardware for extremely fast free-tier inference.
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
def __init__(
|
|
126
|
+
self,
|
|
127
|
+
api_key: str | None = None,
|
|
128
|
+
base_url: str = "https://api.groq.com/openai/v1",
|
|
129
|
+
) -> None:
|
|
130
|
+
super().__init__(api_key=api_key or get_key("groq"), base_url=base_url)
|
|
131
|
+
|
|
132
|
+
@property
|
|
133
|
+
def provider_id(self) -> str:
|
|
134
|
+
return "groq"
|
|
135
|
+
|
|
136
|
+
async def list_models(self) -> list[ModelDescriptor]:
|
|
137
|
+
return GROQ_MODELS
|
|
138
|
+
|
|
139
|
+
async def health_check(self) -> ProviderHealth:
|
|
140
|
+
if not has_key("groq"):
|
|
141
|
+
return ProviderHealth.UNAVAILABLE
|
|
142
|
+
return await super().health_check()
|
|
143
|
+
|
|
144
|
+
def get_provider_info(self) -> dict:
|
|
145
|
+
return {
|
|
146
|
+
"provider_id": "groq",
|
|
147
|
+
"display_name": "Groq",
|
|
148
|
+
"is_free_tier": True,
|
|
149
|
+
"base_url": "https://api.groq.com/openai/v1",
|
|
150
|
+
"note": "Free tier — extremely fast inference via custom LPU hardware",
|
|
151
|
+
}
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
"""Hugging Face provider adapter implementation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import time
|
|
7
|
+
from collections.abc import AsyncIterator
|
|
8
|
+
|
|
9
|
+
import httpx
|
|
10
|
+
from pydantic import SecretStr
|
|
11
|
+
|
|
12
|
+
from velune.core.errors.provider import (
|
|
13
|
+
InferenceError,
|
|
14
|
+
ProviderAuthenticationError,
|
|
15
|
+
)
|
|
16
|
+
from velune.core.types.inference import InferenceRequest, InferenceResponse, StreamChunk
|
|
17
|
+
from velune.core.types.model import ModelDescriptor
|
|
18
|
+
from velune.core.types.provider import ProviderCapabilities, ProviderHealth
|
|
19
|
+
from velune.providers.base import ModelProvider
|
|
20
|
+
from velune.providers.keystore import get_key
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class HuggingFaceProvider(ModelProvider):
|
|
24
|
+
"""Hugging Face provider for serverless Inference API."""
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
api_key: str | SecretStr | None = None,
|
|
29
|
+
base_url: str = "https://api-inference.huggingface.co",
|
|
30
|
+
) -> None:
|
|
31
|
+
self._api_key = api_key or get_key("huggingface")
|
|
32
|
+
if hasattr(self._api_key, "get_secret_value"):
|
|
33
|
+
self._api_key = self._api_key.get_secret_value()
|
|
34
|
+
self._base_url = base_url
|
|
35
|
+
self.client: httpx.AsyncClient | None = None
|
|
36
|
+
self._capabilities = ProviderCapabilities(
|
|
37
|
+
supports_streaming=True,
|
|
38
|
+
supports_function_calling=False,
|
|
39
|
+
supports_embeddings=True,
|
|
40
|
+
max_context_window=32768,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def provider_id(self) -> str:
|
|
45
|
+
return "huggingface"
|
|
46
|
+
|
|
47
|
+
async def initialize(self) -> None:
|
|
48
|
+
"""Initialize client headers."""
|
|
49
|
+
if not self._api_key:
|
|
50
|
+
raise ProviderAuthenticationError(
|
|
51
|
+
"Hugging Face API token (HF_TOKEN) not found in environment or config"
|
|
52
|
+
)
|
|
53
|
+
if not self.client:
|
|
54
|
+
headers = {"Authorization": f"Bearer {self._api_key}"}
|
|
55
|
+
self.client = httpx.AsyncClient(base_url=self._base_url, headers=headers, timeout=300.0)
|
|
56
|
+
|
|
57
|
+
async def list_models(self) -> list[ModelDescriptor]:
|
|
58
|
+
"""Fetch list of local cached Hugging Face models."""
|
|
59
|
+
from velune.providers.discovery.huggingface import HuggingFaceDiscovery
|
|
60
|
+
|
|
61
|
+
discovery = HuggingFaceDiscovery()
|
|
62
|
+
return await discovery.discover()
|
|
63
|
+
|
|
64
|
+
async def infer(self, request: InferenceRequest) -> InferenceResponse:
|
|
65
|
+
"""Query Hugging Face serverless chat completion API."""
|
|
66
|
+
await self.initialize()
|
|
67
|
+
assert self.client is not None
|
|
68
|
+
start = time.perf_counter()
|
|
69
|
+
try:
|
|
70
|
+
# Map standard messages to conversational prompt
|
|
71
|
+
prompt = self._format_messages_to_prompt(request.messages)
|
|
72
|
+
|
|
73
|
+
payload = {
|
|
74
|
+
"inputs": prompt,
|
|
75
|
+
"parameters": {
|
|
76
|
+
"temperature": request.temperature,
|
|
77
|
+
"max_new_tokens": request.max_tokens or 1024,
|
|
78
|
+
"top_p": request.top_p,
|
|
79
|
+
},
|
|
80
|
+
"options": {"wait_for_model": True},
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
model_path = f"/models/{request.model_id}"
|
|
84
|
+
response = await self.client.post(model_path, json=payload)
|
|
85
|
+
response.raise_for_status()
|
|
86
|
+
data = response.json()
|
|
87
|
+
latency = (time.perf_counter() - start) * 1000.0
|
|
88
|
+
|
|
89
|
+
# HF Serverless response formatting varies by model/pipeline type
|
|
90
|
+
content = ""
|
|
91
|
+
if isinstance(data, list) and len(data) > 0:
|
|
92
|
+
content = data[0].get("generated_text", "")
|
|
93
|
+
# Strip the prompt from generation if the model prepends it
|
|
94
|
+
if content.startswith(prompt):
|
|
95
|
+
content = content[len(prompt) :]
|
|
96
|
+
elif isinstance(data, dict):
|
|
97
|
+
content = data.get("generated_text", "")
|
|
98
|
+
|
|
99
|
+
return InferenceResponse(
|
|
100
|
+
content=content.strip(),
|
|
101
|
+
model_id=request.model_id,
|
|
102
|
+
finish_reason="stop",
|
|
103
|
+
tokens_used=0, # HF serverless doesn't return exact token metrics consistently
|
|
104
|
+
latency_ms=latency,
|
|
105
|
+
)
|
|
106
|
+
except httpx.HTTPError as e:
|
|
107
|
+
raise InferenceError(f"Hugging Face Inference completion failed: {e}")
|
|
108
|
+
|
|
109
|
+
async def stream(self, request: InferenceRequest) -> AsyncIterator[StreamChunk]:
|
|
110
|
+
"""Stream conversational replies from Serverless Inference API."""
|
|
111
|
+
await self.initialize()
|
|
112
|
+
assert self.client is not None
|
|
113
|
+
try:
|
|
114
|
+
prompt = self._format_messages_to_prompt(request.messages)
|
|
115
|
+
payload = {
|
|
116
|
+
"inputs": prompt,
|
|
117
|
+
"parameters": {
|
|
118
|
+
"temperature": request.temperature,
|
|
119
|
+
"max_new_tokens": request.max_tokens or 1024,
|
|
120
|
+
"top_p": request.top_p,
|
|
121
|
+
},
|
|
122
|
+
"options": {"wait_for_model": True},
|
|
123
|
+
"stream": True,
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
model_path = f"/models/{request.model_id}"
|
|
127
|
+
async with self.client.stream("POST", model_path, json=payload) as response:
|
|
128
|
+
response.raise_for_status()
|
|
129
|
+
# Serverless stream format is line-delimited SSE chunks
|
|
130
|
+
async for line in response.aiter_lines():
|
|
131
|
+
if line.startswith("data:"):
|
|
132
|
+
try:
|
|
133
|
+
chunk_data = json.loads(line[5:])
|
|
134
|
+
token_text = chunk_data.get("token", {}).get("text", "")
|
|
135
|
+
yield StreamChunk(
|
|
136
|
+
content=token_text,
|
|
137
|
+
finish_reason="stop"
|
|
138
|
+
if chunk_data.get("token", {}).get("special", False)
|
|
139
|
+
else None,
|
|
140
|
+
)
|
|
141
|
+
except Exception:
|
|
142
|
+
continue
|
|
143
|
+
except httpx.HTTPError as e:
|
|
144
|
+
raise InferenceError(f"Hugging Face Inference streaming failed: {e}")
|
|
145
|
+
|
|
146
|
+
async def embed(self, texts: list[str], model_id: str) -> list[list[float]]:
|
|
147
|
+
"""Batch embeddings generation using HF feature-extraction pipeline."""
|
|
148
|
+
await self.initialize()
|
|
149
|
+
assert self.client is not None
|
|
150
|
+
try:
|
|
151
|
+
model_path = f"/models/{model_id}"
|
|
152
|
+
response = await self.client.post(
|
|
153
|
+
model_path, json={"inputs": texts, "options": {"wait_for_model": True}}
|
|
154
|
+
)
|
|
155
|
+
response.raise_for_status()
|
|
156
|
+
embeddings = response.json()
|
|
157
|
+
|
|
158
|
+
# Embeddings could be 1D or 2D/3D depending on token poolings. Ensure we return 2D floats.
|
|
159
|
+
if isinstance(embeddings, list) and len(embeddings) > 0:
|
|
160
|
+
if isinstance(embeddings[0], list):
|
|
161
|
+
# Check if it has token-level embeddings or pooled
|
|
162
|
+
if isinstance(embeddings[0][0], list):
|
|
163
|
+
# Simple average pooling for token embeddings
|
|
164
|
+
pooled = []
|
|
165
|
+
for seq in embeddings:
|
|
166
|
+
avg = [sum(col) / len(seq) for col in zip(*seq, strict=False)]
|
|
167
|
+
pooled.append(avg)
|
|
168
|
+
return pooled
|
|
169
|
+
return embeddings
|
|
170
|
+
# Single sequence 1D returned, wrap in list
|
|
171
|
+
return [embeddings]
|
|
172
|
+
raise ValueError("Invalid embedding response structure from HF Inference API")
|
|
173
|
+
except httpx.HTTPError as e:
|
|
174
|
+
raise InferenceError(f"Hugging Face embedding failed: {e}")
|
|
175
|
+
|
|
176
|
+
def _format_messages_to_prompt(self, messages: list[dict]) -> str:
|
|
177
|
+
"""Utility to stitch general messages into standard chat-template prompt representation."""
|
|
178
|
+
prompt = ""
|
|
179
|
+
for msg in messages:
|
|
180
|
+
role = msg.get("role", "user")
|
|
181
|
+
content = msg.get("content", "")
|
|
182
|
+
if role == "system":
|
|
183
|
+
prompt += f"<|system|>\n{content}</s>\n"
|
|
184
|
+
elif role == "user":
|
|
185
|
+
prompt += f"<|user|>\n{content}</s>\n"
|
|
186
|
+
else:
|
|
187
|
+
prompt += f"<|assistant|>\n{content}</s>\n"
|
|
188
|
+
prompt += "<|assistant|>\n"
|
|
189
|
+
return prompt
|
|
190
|
+
|
|
191
|
+
async def health_check(self) -> ProviderHealth:
|
|
192
|
+
"""Query HF API viability."""
|
|
193
|
+
try:
|
|
194
|
+
await self.initialize()
|
|
195
|
+
assert self.client is not None
|
|
196
|
+
# Fetch meta details for standard model to verify connection
|
|
197
|
+
resp = await self.client.get("/models/gpt2")
|
|
198
|
+
if resp.status_code == 200:
|
|
199
|
+
return ProviderHealth.HEALTHY
|
|
200
|
+
return ProviderHealth.DEGRADED
|
|
201
|
+
except Exception:
|
|
202
|
+
return ProviderHealth.UNAVAILABLE
|
|
203
|
+
|
|
204
|
+
def get_capabilities(self) -> ProviderCapabilities:
|
|
205
|
+
return self._capabilities
|
|
206
|
+
|
|
207
|
+
async def shutdown(self) -> None:
|
|
208
|
+
if self.client:
|
|
209
|
+
await self.client.aclose()
|
|
210
|
+
self.client = None
|