velune-cli 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- velune/__init__.py +5 -0
- velune/__main__.py +6 -0
- velune/cli/__init__.py +5 -0
- velune/cli/app.py +208 -0
- velune/cli/autocomplete.py +80 -0
- velune/cli/banner.py +60 -0
- velune/cli/commands/__init__.py +32 -0
- velune/cli/commands/ask.py +175 -0
- velune/cli/commands/base.py +16 -0
- velune/cli/commands/chat.py +228 -0
- velune/cli/commands/config.py +224 -0
- velune/cli/commands/daemon.py +88 -0
- velune/cli/commands/doctor.py +721 -0
- velune/cli/commands/init.py +170 -0
- velune/cli/commands/mcp.py +82 -0
- velune/cli/commands/memory.py +293 -0
- velune/cli/commands/models.py +683 -0
- velune/cli/commands/preflight.py +95 -0
- velune/cli/commands/run.py +270 -0
- velune/cli/commands/setup.py +184 -0
- velune/cli/commands/workspace.py +249 -0
- velune/cli/context.py +36 -0
- velune/cli/councilmodel_ui.py +199 -0
- velune/cli/display/council_view.py +254 -0
- velune/cli/display/memory_view.py +126 -0
- velune/cli/display/panels.py +35 -0
- velune/cli/display/progress.py +25 -0
- velune/cli/display/themes.py +25 -0
- velune/cli/main.py +15 -0
- velune/cli/model_selector.py +51 -0
- velune/cli/modes.py +86 -0
- velune/cli/pull_ui.py +123 -0
- velune/cli/registry.py +80 -0
- velune/cli/rendering/__init__.py +5 -0
- velune/cli/rendering/error_panel.py +79 -0
- velune/cli/rendering/markdown.py +63 -0
- velune/cli/repl.py +1855 -0
- velune/cli/session_manager.py +71 -0
- velune/cli/slash_commands.py +37 -0
- velune/cli/theme.py +8 -0
- velune/cognition/__init__.py +23 -0
- velune/cognition/agents/__init__.py +7 -0
- velune/cognition/agents/coder.py +209 -0
- velune/cognition/agents/planner.py +156 -0
- velune/cognition/agents/reviewer.py +195 -0
- velune/cognition/arbitrator.py +220 -0
- velune/cognition/architecture.py +415 -0
- velune/cognition/budget.py +65 -0
- velune/cognition/council/__init__.py +47 -0
- velune/cognition/council/base.py +217 -0
- velune/cognition/council/challenger.py +74 -0
- velune/cognition/council/coder.py +79 -0
- velune/cognition/council/critic_agent.py +43 -0
- velune/cognition/council/critic_configs.py +111 -0
- velune/cognition/council/critics.py +41 -0
- velune/cognition/council/debate.py +46 -0
- velune/cognition/council/factory.py +140 -0
- velune/cognition/council/messages.py +56 -0
- velune/cognition/council/planner.py +124 -0
- velune/cognition/council/reviewer.py +74 -0
- velune/cognition/council/synthesizer.py +67 -0
- velune/cognition/council/tiers.py +188 -0
- velune/cognition/council_orchestrator.py +282 -0
- velune/cognition/firewall.py +354 -0
- velune/cognition/module.py +46 -0
- velune/cognition/orchestrator.py +1205 -0
- velune/cognition/personality.py +238 -0
- velune/cognition/state.py +104 -0
- velune/cognition/style_resolver.py +64 -0
- velune/cognition/verification.py +205 -0
- velune/context/__init__.py +28 -0
- velune/context/assembler.py +240 -0
- velune/context/budget.py +97 -0
- velune/context/extractive.py +95 -0
- velune/context/prompt_adaptation.py +480 -0
- velune/context/sections.py +99 -0
- velune/context/token_counter.py +134 -0
- velune/context/utilization.py +33 -0
- velune/context/window.py +63 -0
- velune/core/__init__.py +89 -0
- velune/core/background.py +5 -0
- velune/core/config/__init__.py +37 -0
- velune/core/errors/__init__.py +90 -0
- velune/core/errors/catalog.py +188 -0
- velune/core/errors/execution.py +31 -0
- velune/core/errors/memory.py +25 -0
- velune/core/errors/orchestration.py +31 -0
- velune/core/errors/provider.py +37 -0
- velune/core/event_loop.py +35 -0
- velune/core/logging.py +83 -0
- velune/core/paths.py +165 -0
- velune/core/runtime.py +113 -0
- velune/core/startup_profiler.py +56 -0
- velune/core/task_registry.py +117 -0
- velune/core/trace.py +83 -0
- velune/core/types/__init__.py +48 -0
- velune/core/types/agent.py +53 -0
- velune/core/types/context.py +42 -0
- velune/core/types/inference.py +38 -0
- velune/core/types/memory.py +42 -0
- velune/core/types/model.py +70 -0
- velune/core/types/provider.py +62 -0
- velune/core/types/repository.py +38 -0
- velune/core/types/task.py +61 -0
- velune/core/types/workspace.py +28 -0
- velune/daemon/client.py +13 -0
- velune/daemon/server.py +127 -0
- velune/daemon/transport.py +179 -0
- velune/events.py +204 -0
- velune/execution/__init__.py +22 -0
- velune/execution/benchmarker.py +315 -0
- velune/execution/cancellation.py +53 -0
- velune/execution/checkpointer.py +130 -0
- velune/execution/command_spec.py +165 -0
- velune/execution/diff_preview.py +197 -0
- velune/execution/executor.py +181 -0
- velune/execution/module.py +18 -0
- velune/execution/multi_diff.py +67 -0
- velune/execution/path_guard.py +74 -0
- velune/execution/planner.py +91 -0
- velune/execution/rollback.py +89 -0
- velune/execution/sandbox.py +268 -0
- velune/execution/validator.py +115 -0
- velune/hardware/__init__.py +1 -0
- velune/hardware/detector.py +192 -0
- velune/kernel/__init__.py +55 -0
- velune/kernel/bootstrap.py +125 -0
- velune/kernel/config.py +426 -0
- velune/kernel/entrypoint.py +78 -0
- velune/kernel/health.py +54 -0
- velune/kernel/lifecycle.py +143 -0
- velune/kernel/module.py +17 -0
- velune/kernel/modules.py +23 -0
- velune/kernel/registry.py +96 -0
- velune/kernel/schemas.py +28 -0
- velune/main.py +9 -0
- velune/mcp/__init__.py +9 -0
- velune/mcp/client.py +115 -0
- velune/mcp/config.py +19 -0
- velune/mcp/server.py +624 -0
- velune/memory/__init__.py +32 -0
- velune/memory/compaction.py +506 -0
- velune/memory/embedding_pipeline.py +241 -0
- velune/memory/lifecycle.py +680 -0
- velune/memory/module.py +218 -0
- velune/memory/prioritizer.py +67 -0
- velune/memory/storage/episodic_schema.sql +53 -0
- velune/memory/storage/lancedb_store.py +282 -0
- velune/memory/storage/sqlite_manager.py +369 -0
- velune/memory/storage/sqlite_pool.py +149 -0
- velune/memory/tiers/episodic.py +588 -0
- velune/memory/tiers/graph.py +378 -0
- velune/memory/tiers/lineage.py +416 -0
- velune/memory/tiers/semantic.py +475 -0
- velune/memory/tiers/working.py +168 -0
- velune/memory/vitality.py +132 -0
- velune/models/__init__.py +15 -0
- velune/models/family.py +76 -0
- velune/models/module.py +20 -0
- velune/models/probes.py +192 -0
- velune/models/profile_cache.py +84 -0
- velune/models/profiler.py +108 -0
- velune/models/registry.py +251 -0
- velune/models/scorer.py +233 -0
- velune/models/specializations.py +205 -0
- velune/orchestration/__init__.py +19 -0
- velune/orchestration/engine.py +239 -0
- velune/orchestration/module.py +15 -0
- velune/orchestration/role_assignments.py +82 -0
- velune/orchestration/schemas.py +98 -0
- velune/plugins/__init__.py +20 -0
- velune/plugins/hooks.py +50 -0
- velune/plugins/loader.py +161 -0
- velune/plugins/registry.py +56 -0
- velune/plugins/schemas.py +21 -0
- velune/providers/__init__.py +23 -0
- velune/providers/adapters/anthropic.py +257 -0
- velune/providers/adapters/fireworks.py +115 -0
- velune/providers/adapters/google.py +234 -0
- velune/providers/adapters/groq.py +151 -0
- velune/providers/adapters/huggingface.py +210 -0
- velune/providers/adapters/llamacpp.py +208 -0
- velune/providers/adapters/lmstudio.py +175 -0
- velune/providers/adapters/ollama.py +233 -0
- velune/providers/adapters/openai.py +213 -0
- velune/providers/adapters/openrouter.py +81 -0
- velune/providers/adapters/together.py +134 -0
- velune/providers/adapters/xai.py +60 -0
- velune/providers/base.py +86 -0
- velune/providers/benchmarker.py +138 -0
- velune/providers/discovery/__init__.py +33 -0
- velune/providers/discovery/anthropic.py +79 -0
- velune/providers/discovery/benchmarks.py +44 -0
- velune/providers/discovery/classifier.py +69 -0
- velune/providers/discovery/fireworks.py +95 -0
- velune/providers/discovery/gguf.py +88 -0
- velune/providers/discovery/google.py +95 -0
- velune/providers/discovery/gpu.py +117 -0
- velune/providers/discovery/groq.py +21 -0
- velune/providers/discovery/huggingface.py +67 -0
- velune/providers/discovery/lmstudio.py +80 -0
- velune/providers/discovery/ollama.py +162 -0
- velune/providers/discovery/openai.py +96 -0
- velune/providers/discovery/openrouter.py +113 -0
- velune/providers/discovery/scanner.py +115 -0
- velune/providers/discovery/together.py +114 -0
- velune/providers/discovery/xai.py +57 -0
- velune/providers/health.py +67 -0
- velune/providers/health_monitor.py +169 -0
- velune/providers/keystore.py +142 -0
- velune/providers/local_paths.py +49 -0
- velune/providers/local_resolver.py +229 -0
- velune/providers/module.py +51 -0
- velune/providers/ollama_manager.py +193 -0
- velune/providers/registry.py +220 -0
- velune/providers/router.py +255 -0
- velune/providers/task_classifier.py +288 -0
- velune/py.typed +0 -0
- velune/repository/__init__.py +33 -0
- velune/repository/analyzer.py +127 -0
- velune/repository/ast_parser.py +822 -0
- velune/repository/blast_radius.py +298 -0
- velune/repository/boundary_classifier.py +295 -0
- velune/repository/cognition.py +316 -0
- velune/repository/grapher.py +179 -0
- velune/repository/import_graph.py +263 -0
- velune/repository/incremental_indexer.py +275 -0
- velune/repository/index_state.py +96 -0
- velune/repository/indexer.py +243 -0
- velune/repository/module.py +17 -0
- velune/repository/parser.py +474 -0
- velune/repository/project_type.py +300 -0
- velune/repository/rename_journal.py +287 -0
- velune/repository/scanner.py +193 -0
- velune/repository/schemas.py +102 -0
- velune/repository/symbol_registry.py +365 -0
- velune/repository/tracker.py +252 -0
- velune/retrieval/__init__.py +27 -0
- velune/retrieval/cache.py +110 -0
- velune/retrieval/fast_path.py +391 -0
- velune/retrieval/graph.py +124 -0
- velune/retrieval/hybrid.py +271 -0
- velune/retrieval/keyword.py +131 -0
- velune/retrieval/module.py +26 -0
- velune/retrieval/pipeline.py +303 -0
- velune/retrieval/reranker.py +102 -0
- velune/retrieval/schemas.py +59 -0
- velune/retrieval/slow_path.py +364 -0
- velune/retrieval/vector.py +203 -0
- velune/telemetry/__init__.py +59 -0
- velune/telemetry/cognition.py +267 -0
- velune/telemetry/cost_estimator.py +92 -0
- velune/telemetry/debug.py +304 -0
- velune/telemetry/doctor.py +244 -0
- velune/telemetry/logging.py +286 -0
- velune/telemetry/spans.py +277 -0
- velune/telemetry/token_tracker.py +140 -0
- velune/telemetry/usage_tracker.py +340 -0
- velune/tools/__init__.py +41 -0
- velune/tools/base/registry.py +87 -0
- velune/tools/base/tool.py +63 -0
- velune/tools/code/navigate.py +116 -0
- velune/tools/code/search.py +123 -0
- velune/tools/filesystem/read.py +75 -0
- velune/tools/filesystem/search.py +136 -0
- velune/tools/filesystem/write.py +163 -0
- velune/tools/git/history.py +177 -0
- velune/tools/git/operations.py +122 -0
- velune/tools/git/state.py +121 -0
- velune/tools/module.py +81 -0
- velune/tools/terminal/execute.py +72 -0
- velune/tools/terminal/history.py +47 -0
- velune/tools/web/fetch.py +55 -0
- velune/tools/web/validator.py +122 -0
- velune_cli-0.9.0.dist-info/METADATA +518 -0
- velune_cli-0.9.0.dist-info/RECORD +279 -0
- velune_cli-0.9.0.dist-info/WHEEL +4 -0
- velune_cli-0.9.0.dist-info/entry_points.txt +2 -0
- velune_cli-0.9.0.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
"""OpenAI provider adapter implementation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import time
|
|
7
|
+
from collections.abc import AsyncIterator
|
|
8
|
+
|
|
9
|
+
import httpx
|
|
10
|
+
from pydantic import SecretStr
|
|
11
|
+
|
|
12
|
+
from velune.core.errors.provider import (
|
|
13
|
+
InferenceError,
|
|
14
|
+
ProviderAuthenticationError,
|
|
15
|
+
)
|
|
16
|
+
from velune.core.types.inference import InferenceRequest, InferenceResponse, StreamChunk
|
|
17
|
+
from velune.core.types.model import CapabilityLevel, ModelDescriptor
|
|
18
|
+
from velune.core.types.provider import ProviderCapabilities, ProviderHealth
|
|
19
|
+
from velune.providers.base import ModelProvider
|
|
20
|
+
from velune.providers.keystore import get_key
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class OpenAIProvider(ModelProvider):
|
|
24
|
+
"""OpenAI provider for GPT chat and embedding models."""
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self, api_key: str | SecretStr | None = None, base_url: str = "https://api.openai.com/v1"
|
|
28
|
+
) -> None:
|
|
29
|
+
self._api_key = api_key or get_key("openai")
|
|
30
|
+
if hasattr(self._api_key, "get_secret_value"):
|
|
31
|
+
self._api_key = self._api_key.get_secret_value()
|
|
32
|
+
self._base_url = base_url
|
|
33
|
+
self.client: httpx.AsyncClient | None = None
|
|
34
|
+
self._capabilities = ProviderCapabilities(
|
|
35
|
+
supports_streaming=True,
|
|
36
|
+
supports_function_calling=True,
|
|
37
|
+
supports_embeddings=True,
|
|
38
|
+
max_context_window=128000,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def provider_id(self) -> str:
|
|
43
|
+
return "openai"
|
|
44
|
+
|
|
45
|
+
async def initialize(self) -> None:
|
|
46
|
+
"""Initialize headers and async client connection."""
|
|
47
|
+
if not self._api_key:
|
|
48
|
+
raise ProviderAuthenticationError(
|
|
49
|
+
"OpenAI API key not found in configuration or environment"
|
|
50
|
+
)
|
|
51
|
+
if not self.client:
|
|
52
|
+
headers = {"Authorization": f"Bearer {self._api_key}"}
|
|
53
|
+
self.client = httpx.AsyncClient(base_url=self._base_url, headers=headers, timeout=300.0)
|
|
54
|
+
|
|
55
|
+
async def list_models(self) -> list[ModelDescriptor]:
|
|
56
|
+
"""Return the current OpenAI model lineup."""
|
|
57
|
+
await self.initialize()
|
|
58
|
+
return [
|
|
59
|
+
ModelDescriptor(
|
|
60
|
+
model_id="gpt-4o",
|
|
61
|
+
display_name="GPT-4o",
|
|
62
|
+
provider_id="openai",
|
|
63
|
+
context_length=128000,
|
|
64
|
+
capabilities={
|
|
65
|
+
"coding": CapabilityLevel.EXPERT,
|
|
66
|
+
"reasoning": CapabilityLevel.EXPERT,
|
|
67
|
+
"planning": CapabilityLevel.EXPERT,
|
|
68
|
+
"summarization": CapabilityLevel.EXPERT,
|
|
69
|
+
"instruction_following": CapabilityLevel.EXPERT,
|
|
70
|
+
"tool_use": CapabilityLevel.EXPERT,
|
|
71
|
+
"long_context": CapabilityLevel.EXPERT,
|
|
72
|
+
},
|
|
73
|
+
is_local=False,
|
|
74
|
+
),
|
|
75
|
+
ModelDescriptor(
|
|
76
|
+
model_id="gpt-4o-mini",
|
|
77
|
+
display_name="GPT-4o Mini",
|
|
78
|
+
provider_id="openai",
|
|
79
|
+
context_length=128000,
|
|
80
|
+
capabilities={
|
|
81
|
+
"coding": CapabilityLevel.ADVANCED,
|
|
82
|
+
"reasoning": CapabilityLevel.ADVANCED,
|
|
83
|
+
"planning": CapabilityLevel.ADVANCED,
|
|
84
|
+
"summarization": CapabilityLevel.ADVANCED,
|
|
85
|
+
"instruction_following": CapabilityLevel.EXPERT,
|
|
86
|
+
"tool_use": CapabilityLevel.EXPERT,
|
|
87
|
+
"long_context": CapabilityLevel.ADVANCED,
|
|
88
|
+
},
|
|
89
|
+
is_local=False,
|
|
90
|
+
),
|
|
91
|
+
ModelDescriptor(
|
|
92
|
+
model_id="gpt-3.5-turbo",
|
|
93
|
+
display_name="GPT-3.5 Turbo",
|
|
94
|
+
provider_id="openai",
|
|
95
|
+
context_length=16385,
|
|
96
|
+
capabilities={
|
|
97
|
+
"coding": CapabilityLevel.INTERMEDIATE,
|
|
98
|
+
"reasoning": CapabilityLevel.INTERMEDIATE,
|
|
99
|
+
"planning": CapabilityLevel.INTERMEDIATE,
|
|
100
|
+
"summarization": CapabilityLevel.ADVANCED,
|
|
101
|
+
"instruction_following": CapabilityLevel.ADVANCED,
|
|
102
|
+
"tool_use": CapabilityLevel.INTERMEDIATE,
|
|
103
|
+
"long_context": CapabilityLevel.BASIC,
|
|
104
|
+
},
|
|
105
|
+
is_local=False,
|
|
106
|
+
tags=["fallback"],
|
|
107
|
+
),
|
|
108
|
+
]
|
|
109
|
+
|
|
110
|
+
async def infer(self, request: InferenceRequest) -> InferenceResponse:
|
|
111
|
+
"""Standard chat inference."""
|
|
112
|
+
await self.initialize()
|
|
113
|
+
assert self.client is not None
|
|
114
|
+
start = time.perf_counter()
|
|
115
|
+
try:
|
|
116
|
+
payload = {
|
|
117
|
+
"model": request.model_id,
|
|
118
|
+
"messages": request.messages,
|
|
119
|
+
"temperature": request.temperature,
|
|
120
|
+
"max_tokens": request.max_tokens,
|
|
121
|
+
"top_p": request.top_p,
|
|
122
|
+
}
|
|
123
|
+
if request.stop_sequences:
|
|
124
|
+
payload["stop"] = request.stop_sequences
|
|
125
|
+
|
|
126
|
+
response = await self.client.post("/chat/completions", json=payload)
|
|
127
|
+
response.raise_for_status()
|
|
128
|
+
data = response.json()
|
|
129
|
+
latency = (time.perf_counter() - start) * 1000.0
|
|
130
|
+
|
|
131
|
+
usage = data.get("usage", {})
|
|
132
|
+
return InferenceResponse(
|
|
133
|
+
content=data["choices"][0]["message"]["content"],
|
|
134
|
+
model_id=request.model_id,
|
|
135
|
+
finish_reason=data["choices"][0]["finish_reason"] or "stop",
|
|
136
|
+
tokens_used=usage.get("total_tokens", 0),
|
|
137
|
+
prompt_tokens=usage.get("prompt_tokens", 0),
|
|
138
|
+
completion_tokens=usage.get("completion_tokens", 0),
|
|
139
|
+
latency_ms=latency,
|
|
140
|
+
)
|
|
141
|
+
except httpx.HTTPError as e:
|
|
142
|
+
raise InferenceError(f"OpenAI completion failed: {e}")
|
|
143
|
+
|
|
144
|
+
async def stream(self, request: InferenceRequest) -> AsyncIterator[StreamChunk]:
|
|
145
|
+
"""Streaming chat completions."""
|
|
146
|
+
await self.initialize()
|
|
147
|
+
assert self.client is not None
|
|
148
|
+
try:
|
|
149
|
+
payload = {
|
|
150
|
+
"model": request.model_id,
|
|
151
|
+
"messages": request.messages,
|
|
152
|
+
"temperature": request.temperature,
|
|
153
|
+
"max_tokens": request.max_tokens,
|
|
154
|
+
"top_p": request.top_p,
|
|
155
|
+
"stream": True,
|
|
156
|
+
}
|
|
157
|
+
if request.stop_sequences:
|
|
158
|
+
payload["stop"] = request.stop_sequences
|
|
159
|
+
|
|
160
|
+
async with self.client.stream("POST", "/chat/completions", json=payload) as response:
|
|
161
|
+
response.raise_for_status()
|
|
162
|
+
async for line in response.aiter_lines():
|
|
163
|
+
if line.startswith("data: "):
|
|
164
|
+
data_str = line[6:]
|
|
165
|
+
if data_str == "[DONE]":
|
|
166
|
+
break
|
|
167
|
+
try:
|
|
168
|
+
data = json.loads(data_str)
|
|
169
|
+
delta = data["choices"][0]["delta"]
|
|
170
|
+
yield StreamChunk(
|
|
171
|
+
content=delta.get("content", ""),
|
|
172
|
+
finish_reason=data["choices"][0].get("finish_reason"),
|
|
173
|
+
)
|
|
174
|
+
except (json.JSONDecodeError, KeyError):
|
|
175
|
+
continue
|
|
176
|
+
except httpx.HTTPError as e:
|
|
177
|
+
raise InferenceError(f"OpenAI stream failed: {e}")
|
|
178
|
+
|
|
179
|
+
async def embed(self, texts: list[str], model_id: str) -> list[list[float]]:
|
|
180
|
+
"""Generate batch embeddings."""
|
|
181
|
+
await self.initialize()
|
|
182
|
+
assert self.client is not None
|
|
183
|
+
try:
|
|
184
|
+
response = await self.client.post(
|
|
185
|
+
"/embeddings", json={"model": model_id, "input": texts}
|
|
186
|
+
)
|
|
187
|
+
response.raise_for_status()
|
|
188
|
+
data = response.json()
|
|
189
|
+
# Sort by index to maintain token alignments
|
|
190
|
+
sorted_data = sorted(data["data"], key=lambda x: x["index"])
|
|
191
|
+
return [item["embedding"] for item in sorted_data]
|
|
192
|
+
except httpx.HTTPError as e:
|
|
193
|
+
raise InferenceError(f"OpenAI embedding failed: {e}")
|
|
194
|
+
|
|
195
|
+
async def health_check(self) -> ProviderHealth:
|
|
196
|
+
"""Verifies API credentials and connectivity."""
|
|
197
|
+
try:
|
|
198
|
+
await self.initialize()
|
|
199
|
+
assert self.client is not None
|
|
200
|
+
resp = await self.client.get("/models")
|
|
201
|
+
if resp.status_code == 200:
|
|
202
|
+
return ProviderHealth.HEALTHY
|
|
203
|
+
return ProviderHealth.DEGRADED
|
|
204
|
+
except Exception:
|
|
205
|
+
return ProviderHealth.UNAVAILABLE
|
|
206
|
+
|
|
207
|
+
def get_capabilities(self) -> ProviderCapabilities:
|
|
208
|
+
return self._capabilities
|
|
209
|
+
|
|
210
|
+
async def shutdown(self) -> None:
|
|
211
|
+
if self.client:
|
|
212
|
+
await self.client.aclose()
|
|
213
|
+
self.client = None
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""OpenRouter provider adapter — OpenAI-compatible with dynamic model listing."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import httpx
|
|
6
|
+
|
|
7
|
+
from velune.core.errors.provider import ProviderAuthenticationError
|
|
8
|
+
from velune.core.types.model import CapabilityLevel, ModelCapabilityProfile, ModelDescriptor
|
|
9
|
+
from velune.providers.adapters.openai import OpenAIProvider
|
|
10
|
+
from velune.providers.keystore import get_key
|
|
11
|
+
|
|
12
|
+
_REFERER_HEADERS = {
|
|
13
|
+
"HTTP-Referer": "Velune CLI",
|
|
14
|
+
"X-Title": "Velune CLI",
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class OpenRouterProvider(OpenAIProvider):
|
|
19
|
+
"""OpenRouter provider — routes to many upstream models via the OpenAI API shape."""
|
|
20
|
+
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
api_key: str | None = None,
|
|
24
|
+
base_url: str = "https://openrouter.ai/api/v1",
|
|
25
|
+
) -> None:
|
|
26
|
+
super().__init__(api_key=api_key or get_key("openrouter"), base_url=base_url)
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def provider_id(self) -> str:
|
|
30
|
+
return "openrouter"
|
|
31
|
+
|
|
32
|
+
async def initialize(self) -> None:
|
|
33
|
+
"""Override to inject the OpenRouter-required headers."""
|
|
34
|
+
if not self._api_key:
|
|
35
|
+
raise ProviderAuthenticationError(
|
|
36
|
+
"OpenRouter API key not found — set OPENROUTER_API_KEY or run: velune config set-key openrouter"
|
|
37
|
+
)
|
|
38
|
+
if not self.client:
|
|
39
|
+
headers = {
|
|
40
|
+
"Authorization": f"Bearer {self._api_key}",
|
|
41
|
+
**_REFERER_HEADERS,
|
|
42
|
+
}
|
|
43
|
+
self.client = httpx.AsyncClient(
|
|
44
|
+
base_url=self._base_url,
|
|
45
|
+
headers=headers,
|
|
46
|
+
timeout=300.0,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
async def list_models(self) -> list[ModelDescriptor]:
|
|
50
|
+
"""Fetch the current model catalogue from the OpenRouter API."""
|
|
51
|
+
await self.initialize()
|
|
52
|
+
assert self.client is not None
|
|
53
|
+
try:
|
|
54
|
+
resp = await self.client.get("/models")
|
|
55
|
+
resp.raise_for_status()
|
|
56
|
+
data = resp.json()
|
|
57
|
+
return [self._parse_model(m) for m in data.get("data", [])]
|
|
58
|
+
except Exception:
|
|
59
|
+
return []
|
|
60
|
+
|
|
61
|
+
def _parse_model(self, raw: dict) -> ModelDescriptor:
|
|
62
|
+
model_id = raw.get("id", "unknown")
|
|
63
|
+
context = raw.get("context_length") or 4096
|
|
64
|
+
pricing = raw.get("pricing", {})
|
|
65
|
+
cost_prompt = float(pricing.get("prompt", 0) or 0) * 1000
|
|
66
|
+
profile = ModelCapabilityProfile(
|
|
67
|
+
coding=CapabilityLevel.INTERMEDIATE,
|
|
68
|
+
reasoning=CapabilityLevel.INTERMEDIATE,
|
|
69
|
+
instruction_following=CapabilityLevel.ADVANCED,
|
|
70
|
+
)
|
|
71
|
+
return ModelDescriptor(
|
|
72
|
+
model_id=model_id,
|
|
73
|
+
display_name=raw.get("name") or model_id,
|
|
74
|
+
provider_id="openrouter",
|
|
75
|
+
context_length=context,
|
|
76
|
+
capabilities=profile,
|
|
77
|
+
is_local=False,
|
|
78
|
+
cost_per_1k_tokens=cost_prompt if cost_prompt > 0 else None,
|
|
79
|
+
tags=["cloud", "openrouter"],
|
|
80
|
+
metadata={"raw": raw},
|
|
81
|
+
)
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"""Together.AI provider adapter — OpenAI-compatible REST API."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from velune.core.types.model import CapabilityLevel, ModelDescriptor
|
|
6
|
+
from velune.core.types.provider import ProviderCapabilities
|
|
7
|
+
from velune.providers.adapters.openai import OpenAIProvider
|
|
8
|
+
from velune.providers.keystore import get_key
|
|
9
|
+
|
|
10
|
+
TOGETHER_MODELS: list[ModelDescriptor] = [
|
|
11
|
+
ModelDescriptor(
|
|
12
|
+
model_id="meta-llama/Llama-3.3-70B-Instruct-Turbo",
|
|
13
|
+
display_name="Llama 3.3 70B Instruct Turbo",
|
|
14
|
+
provider_id="together",
|
|
15
|
+
context_length=131072,
|
|
16
|
+
capabilities={
|
|
17
|
+
"coding": CapabilityLevel.ADVANCED,
|
|
18
|
+
"reasoning": CapabilityLevel.ADVANCED,
|
|
19
|
+
"planning": CapabilityLevel.ADVANCED,
|
|
20
|
+
"summarization": CapabilityLevel.EXPERT,
|
|
21
|
+
"instruction_following": CapabilityLevel.EXPERT,
|
|
22
|
+
"tool_use": CapabilityLevel.ADVANCED,
|
|
23
|
+
"long_context": CapabilityLevel.ADVANCED,
|
|
24
|
+
},
|
|
25
|
+
speed_tier="fast",
|
|
26
|
+
is_local=False,
|
|
27
|
+
cost_per_1k_tokens=0.00088,
|
|
28
|
+
tags=["cloud", "together", "llama", "turbo"],
|
|
29
|
+
),
|
|
30
|
+
ModelDescriptor(
|
|
31
|
+
model_id="meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
|
|
32
|
+
display_name="Llama 3.2 11B Vision Instruct",
|
|
33
|
+
provider_id="together",
|
|
34
|
+
context_length=131072,
|
|
35
|
+
capabilities={
|
|
36
|
+
"coding": CapabilityLevel.INTERMEDIATE,
|
|
37
|
+
"reasoning": CapabilityLevel.INTERMEDIATE,
|
|
38
|
+
"planning": CapabilityLevel.INTERMEDIATE,
|
|
39
|
+
"summarization": CapabilityLevel.ADVANCED,
|
|
40
|
+
"instruction_following": CapabilityLevel.ADVANCED,
|
|
41
|
+
"tool_use": CapabilityLevel.INTERMEDIATE,
|
|
42
|
+
"long_context": CapabilityLevel.INTERMEDIATE,
|
|
43
|
+
},
|
|
44
|
+
speed_tier="fast",
|
|
45
|
+
is_local=False,
|
|
46
|
+
cost_per_1k_tokens=0.00018,
|
|
47
|
+
tags=["cloud", "together", "llama", "vision", "cheap"],
|
|
48
|
+
),
|
|
49
|
+
ModelDescriptor(
|
|
50
|
+
model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
|
|
51
|
+
display_name="Qwen 2.5 Coder 32B Instruct",
|
|
52
|
+
provider_id="together",
|
|
53
|
+
context_length=131072,
|
|
54
|
+
capabilities={
|
|
55
|
+
"coding": CapabilityLevel.EXPERT,
|
|
56
|
+
"reasoning": CapabilityLevel.ADVANCED,
|
|
57
|
+
"planning": CapabilityLevel.ADVANCED,
|
|
58
|
+
"summarization": CapabilityLevel.ADVANCED,
|
|
59
|
+
"instruction_following": CapabilityLevel.EXPERT,
|
|
60
|
+
"tool_use": CapabilityLevel.ADVANCED,
|
|
61
|
+
"long_context": CapabilityLevel.ADVANCED,
|
|
62
|
+
},
|
|
63
|
+
speed_tier="medium",
|
|
64
|
+
is_local=False,
|
|
65
|
+
cost_per_1k_tokens=0.0008,
|
|
66
|
+
tags=["cloud", "together", "qwen", "coding"],
|
|
67
|
+
),
|
|
68
|
+
ModelDescriptor(
|
|
69
|
+
model_id="deepseek-ai/DeepSeek-R1",
|
|
70
|
+
display_name="DeepSeek R1",
|
|
71
|
+
provider_id="together",
|
|
72
|
+
context_length=163840,
|
|
73
|
+
capabilities={
|
|
74
|
+
"coding": CapabilityLevel.EXPERT,
|
|
75
|
+
"reasoning": CapabilityLevel.EXPERT,
|
|
76
|
+
"planning": CapabilityLevel.EXPERT,
|
|
77
|
+
"summarization": CapabilityLevel.ADVANCED,
|
|
78
|
+
"instruction_following": CapabilityLevel.EXPERT,
|
|
79
|
+
"tool_use": CapabilityLevel.ADVANCED,
|
|
80
|
+
"long_context": CapabilityLevel.ADVANCED,
|
|
81
|
+
},
|
|
82
|
+
speed_tier="slow",
|
|
83
|
+
is_local=False,
|
|
84
|
+
cost_per_1k_tokens=0.003,
|
|
85
|
+
tags=["cloud", "together", "deepseek", "reasoning"],
|
|
86
|
+
),
|
|
87
|
+
ModelDescriptor(
|
|
88
|
+
model_id="mistralai/Mistral-7B-Instruct-v0.3",
|
|
89
|
+
display_name="Mistral 7B Instruct v0.3",
|
|
90
|
+
provider_id="together",
|
|
91
|
+
context_length=32768,
|
|
92
|
+
capabilities={
|
|
93
|
+
"coding": CapabilityLevel.INTERMEDIATE,
|
|
94
|
+
"reasoning": CapabilityLevel.INTERMEDIATE,
|
|
95
|
+
"planning": CapabilityLevel.INTERMEDIATE,
|
|
96
|
+
"summarization": CapabilityLevel.ADVANCED,
|
|
97
|
+
"instruction_following": CapabilityLevel.ADVANCED,
|
|
98
|
+
"tool_use": CapabilityLevel.INTERMEDIATE,
|
|
99
|
+
"long_context": CapabilityLevel.BASIC,
|
|
100
|
+
},
|
|
101
|
+
speed_tier="fast",
|
|
102
|
+
is_local=False,
|
|
103
|
+
cost_per_1k_tokens=0.0002,
|
|
104
|
+
tags=["cloud", "together", "mistral", "cheap"],
|
|
105
|
+
),
|
|
106
|
+
]
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class TogetherProvider(OpenAIProvider):
|
|
110
|
+
"""Together.AI — 50+ open models via OpenAI-compatible inference."""
|
|
111
|
+
|
|
112
|
+
def __init__(
|
|
113
|
+
self,
|
|
114
|
+
api_key: str | None = None,
|
|
115
|
+
base_url: str = "https://api.together.xyz/v1",
|
|
116
|
+
) -> None:
|
|
117
|
+
self._api_key = api_key or get_key("together")
|
|
118
|
+
if hasattr(self._api_key, "get_secret_value"):
|
|
119
|
+
self._api_key = self._api_key.get_secret_value()
|
|
120
|
+
self._base_url = base_url
|
|
121
|
+
self.client = None
|
|
122
|
+
self._capabilities = ProviderCapabilities(
|
|
123
|
+
supports_streaming=True,
|
|
124
|
+
supports_function_calling=True,
|
|
125
|
+
supports_embeddings=False,
|
|
126
|
+
max_context_window=131072,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
@property
|
|
130
|
+
def provider_id(self) -> str:
|
|
131
|
+
return "together"
|
|
132
|
+
|
|
133
|
+
async def list_models(self) -> list[ModelDescriptor]:
|
|
134
|
+
return list(TOGETHER_MODELS)
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""xAI (Grok) provider adapter — OpenAI-compatible endpoint."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from velune.core.types.model import CapabilityLevel, ModelDescriptor
|
|
6
|
+
from velune.providers.adapters.openai import OpenAIProvider
|
|
7
|
+
from velune.providers.keystore import get_key
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class XAIProvider(OpenAIProvider):
|
|
11
|
+
"""xAI Grok provider. Wire-compatible with the OpenAI chat API."""
|
|
12
|
+
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
api_key: str | None = None,
|
|
16
|
+
base_url: str = "https://api.x.ai/v1",
|
|
17
|
+
) -> None:
|
|
18
|
+
super().__init__(api_key=api_key or get_key("xai"), base_url=base_url)
|
|
19
|
+
|
|
20
|
+
@property
|
|
21
|
+
def provider_id(self) -> str:
|
|
22
|
+
return "xai"
|
|
23
|
+
|
|
24
|
+
async def list_models(self) -> list[ModelDescriptor]:
|
|
25
|
+
return [
|
|
26
|
+
ModelDescriptor(
|
|
27
|
+
model_id="grok-2",
|
|
28
|
+
display_name="Grok 2",
|
|
29
|
+
provider_id="xai",
|
|
30
|
+
context_length=131072,
|
|
31
|
+
capabilities={
|
|
32
|
+
"coding": CapabilityLevel.ADVANCED,
|
|
33
|
+
"reasoning": CapabilityLevel.ADVANCED,
|
|
34
|
+
"planning": CapabilityLevel.ADVANCED,
|
|
35
|
+
"summarization": CapabilityLevel.ADVANCED,
|
|
36
|
+
"instruction_following": CapabilityLevel.EXPERT,
|
|
37
|
+
"tool_use": CapabilityLevel.ADVANCED,
|
|
38
|
+
"long_context": CapabilityLevel.ADVANCED,
|
|
39
|
+
},
|
|
40
|
+
is_local=False,
|
|
41
|
+
tags=["cloud", "xai"],
|
|
42
|
+
),
|
|
43
|
+
ModelDescriptor(
|
|
44
|
+
model_id="grok-2-mini",
|
|
45
|
+
display_name="Grok 2 Mini",
|
|
46
|
+
provider_id="xai",
|
|
47
|
+
context_length=131072,
|
|
48
|
+
capabilities={
|
|
49
|
+
"coding": CapabilityLevel.INTERMEDIATE,
|
|
50
|
+
"reasoning": CapabilityLevel.INTERMEDIATE,
|
|
51
|
+
"planning": CapabilityLevel.INTERMEDIATE,
|
|
52
|
+
"summarization": CapabilityLevel.ADVANCED,
|
|
53
|
+
"instruction_following": CapabilityLevel.ADVANCED,
|
|
54
|
+
"tool_use": CapabilityLevel.INTERMEDIATE,
|
|
55
|
+
"long_context": CapabilityLevel.ADVANCED,
|
|
56
|
+
},
|
|
57
|
+
is_local=False,
|
|
58
|
+
tags=["cloud", "xai", "mini"],
|
|
59
|
+
),
|
|
60
|
+
]
|
velune/providers/base.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""Provider abstraction base interfaces and capabilities."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from collections.abc import AsyncIterator
|
|
7
|
+
from typing import Protocol, runtime_checkable
|
|
8
|
+
|
|
9
|
+
from velune.core.types.inference import InferenceRequest, InferenceResponse, StreamChunk
|
|
10
|
+
from velune.core.types.model import ModelDescriptor
|
|
11
|
+
from velune.core.types.provider import ProviderCapabilities, ProviderHealth
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@runtime_checkable
|
|
15
|
+
class ModelProvider(Protocol):
|
|
16
|
+
"""Core provider contract. All LLM and Embedding adapters implement this."""
|
|
17
|
+
|
|
18
|
+
@property
|
|
19
|
+
def provider_id(self) -> str:
|
|
20
|
+
"""The distinct ID slug of this provider (e.g., 'ollama', 'openai')."""
|
|
21
|
+
...
|
|
22
|
+
|
|
23
|
+
async def list_models(self) -> list[ModelDescriptor]:
|
|
24
|
+
"""Query and list all active/available models for this provider."""
|
|
25
|
+
...
|
|
26
|
+
|
|
27
|
+
async def infer(self, request: InferenceRequest) -> InferenceResponse:
|
|
28
|
+
"""Single-turn, non-streaming model completion."""
|
|
29
|
+
...
|
|
30
|
+
|
|
31
|
+
async def stream(self, request: InferenceRequest) -> AsyncIterator[StreamChunk]:
|
|
32
|
+
"""Multi-turn, token-streaming model completion."""
|
|
33
|
+
...
|
|
34
|
+
|
|
35
|
+
async def embed(self, texts: list[str], model_id: str) -> list[list[float]]:
|
|
36
|
+
"""Generate vector embeddings. Raises NotImplementedError if unsupported."""
|
|
37
|
+
...
|
|
38
|
+
|
|
39
|
+
async def health_check(self) -> ProviderHealth:
|
|
40
|
+
"""Query host or ping API to verify provider state."""
|
|
41
|
+
...
|
|
42
|
+
|
|
43
|
+
def get_capabilities(self) -> ProviderCapabilities:
|
|
44
|
+
"""Query static/dynamic capabilities of the provider."""
|
|
45
|
+
...
|
|
46
|
+
|
|
47
|
+
async def initialize(self) -> None:
|
|
48
|
+
"""Perform provider connection setup and warmups."""
|
|
49
|
+
...
|
|
50
|
+
|
|
51
|
+
async def shutdown(self) -> None:
|
|
52
|
+
"""Gracefully release provider connection resource pools."""
|
|
53
|
+
...
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class InferenceEngine(ABC):
|
|
57
|
+
"""Abstract inference engine for unified task executions."""
|
|
58
|
+
|
|
59
|
+
@abstractmethod
|
|
60
|
+
async def infer(self, request: InferenceRequest) -> InferenceResponse:
|
|
61
|
+
"""Perform non-streaming inference."""
|
|
62
|
+
pass
|
|
63
|
+
|
|
64
|
+
@abstractmethod
|
|
65
|
+
async def infer_stream(self, request: InferenceRequest) -> AsyncIterator[StreamChunk]:
|
|
66
|
+
"""Perform streaming inference."""
|
|
67
|
+
pass
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class EmbeddingProvider(ABC):
|
|
71
|
+
"""Abstract embedding provider interface."""
|
|
72
|
+
|
|
73
|
+
@abstractmethod
|
|
74
|
+
async def embed(self, text: str) -> list[float]:
|
|
75
|
+
"""Generate embedding for a single text."""
|
|
76
|
+
pass
|
|
77
|
+
|
|
78
|
+
@abstractmethod
|
|
79
|
+
async def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
|
80
|
+
"""Generate embeddings for a batch of texts."""
|
|
81
|
+
pass
|
|
82
|
+
|
|
83
|
+
@abstractmethod
|
|
84
|
+
def get_dimension(self) -> int:
|
|
85
|
+
"""Get the embedding dimension vector width."""
|
|
86
|
+
pass
|