velune-cli 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- velune/__init__.py +5 -0
- velune/__main__.py +6 -0
- velune/cli/__init__.py +5 -0
- velune/cli/app.py +208 -0
- velune/cli/autocomplete.py +80 -0
- velune/cli/banner.py +60 -0
- velune/cli/commands/__init__.py +32 -0
- velune/cli/commands/ask.py +175 -0
- velune/cli/commands/base.py +16 -0
- velune/cli/commands/chat.py +228 -0
- velune/cli/commands/config.py +224 -0
- velune/cli/commands/daemon.py +88 -0
- velune/cli/commands/doctor.py +721 -0
- velune/cli/commands/init.py +170 -0
- velune/cli/commands/mcp.py +82 -0
- velune/cli/commands/memory.py +293 -0
- velune/cli/commands/models.py +683 -0
- velune/cli/commands/preflight.py +95 -0
- velune/cli/commands/run.py +270 -0
- velune/cli/commands/setup.py +184 -0
- velune/cli/commands/workspace.py +249 -0
- velune/cli/context.py +36 -0
- velune/cli/councilmodel_ui.py +199 -0
- velune/cli/display/council_view.py +254 -0
- velune/cli/display/memory_view.py +126 -0
- velune/cli/display/panels.py +35 -0
- velune/cli/display/progress.py +25 -0
- velune/cli/display/themes.py +25 -0
- velune/cli/main.py +15 -0
- velune/cli/model_selector.py +51 -0
- velune/cli/modes.py +86 -0
- velune/cli/pull_ui.py +123 -0
- velune/cli/registry.py +80 -0
- velune/cli/rendering/__init__.py +5 -0
- velune/cli/rendering/error_panel.py +79 -0
- velune/cli/rendering/markdown.py +63 -0
- velune/cli/repl.py +1855 -0
- velune/cli/session_manager.py +71 -0
- velune/cli/slash_commands.py +37 -0
- velune/cli/theme.py +8 -0
- velune/cognition/__init__.py +23 -0
- velune/cognition/agents/__init__.py +7 -0
- velune/cognition/agents/coder.py +209 -0
- velune/cognition/agents/planner.py +156 -0
- velune/cognition/agents/reviewer.py +195 -0
- velune/cognition/arbitrator.py +220 -0
- velune/cognition/architecture.py +415 -0
- velune/cognition/budget.py +65 -0
- velune/cognition/council/__init__.py +47 -0
- velune/cognition/council/base.py +217 -0
- velune/cognition/council/challenger.py +74 -0
- velune/cognition/council/coder.py +79 -0
- velune/cognition/council/critic_agent.py +43 -0
- velune/cognition/council/critic_configs.py +111 -0
- velune/cognition/council/critics.py +41 -0
- velune/cognition/council/debate.py +46 -0
- velune/cognition/council/factory.py +140 -0
- velune/cognition/council/messages.py +56 -0
- velune/cognition/council/planner.py +124 -0
- velune/cognition/council/reviewer.py +74 -0
- velune/cognition/council/synthesizer.py +67 -0
- velune/cognition/council/tiers.py +188 -0
- velune/cognition/council_orchestrator.py +282 -0
- velune/cognition/firewall.py +354 -0
- velune/cognition/module.py +46 -0
- velune/cognition/orchestrator.py +1205 -0
- velune/cognition/personality.py +238 -0
- velune/cognition/state.py +104 -0
- velune/cognition/style_resolver.py +64 -0
- velune/cognition/verification.py +205 -0
- velune/context/__init__.py +28 -0
- velune/context/assembler.py +240 -0
- velune/context/budget.py +97 -0
- velune/context/extractive.py +95 -0
- velune/context/prompt_adaptation.py +480 -0
- velune/context/sections.py +99 -0
- velune/context/token_counter.py +134 -0
- velune/context/utilization.py +33 -0
- velune/context/window.py +63 -0
- velune/core/__init__.py +89 -0
- velune/core/background.py +5 -0
- velune/core/config/__init__.py +37 -0
- velune/core/errors/__init__.py +90 -0
- velune/core/errors/catalog.py +188 -0
- velune/core/errors/execution.py +31 -0
- velune/core/errors/memory.py +25 -0
- velune/core/errors/orchestration.py +31 -0
- velune/core/errors/provider.py +37 -0
- velune/core/event_loop.py +35 -0
- velune/core/logging.py +83 -0
- velune/core/paths.py +165 -0
- velune/core/runtime.py +113 -0
- velune/core/startup_profiler.py +56 -0
- velune/core/task_registry.py +117 -0
- velune/core/trace.py +83 -0
- velune/core/types/__init__.py +48 -0
- velune/core/types/agent.py +53 -0
- velune/core/types/context.py +42 -0
- velune/core/types/inference.py +38 -0
- velune/core/types/memory.py +42 -0
- velune/core/types/model.py +70 -0
- velune/core/types/provider.py +62 -0
- velune/core/types/repository.py +38 -0
- velune/core/types/task.py +61 -0
- velune/core/types/workspace.py +28 -0
- velune/daemon/client.py +13 -0
- velune/daemon/server.py +127 -0
- velune/daemon/transport.py +179 -0
- velune/events.py +204 -0
- velune/execution/__init__.py +22 -0
- velune/execution/benchmarker.py +315 -0
- velune/execution/cancellation.py +53 -0
- velune/execution/checkpointer.py +130 -0
- velune/execution/command_spec.py +165 -0
- velune/execution/diff_preview.py +197 -0
- velune/execution/executor.py +181 -0
- velune/execution/module.py +18 -0
- velune/execution/multi_diff.py +67 -0
- velune/execution/path_guard.py +74 -0
- velune/execution/planner.py +91 -0
- velune/execution/rollback.py +89 -0
- velune/execution/sandbox.py +268 -0
- velune/execution/validator.py +115 -0
- velune/hardware/__init__.py +1 -0
- velune/hardware/detector.py +192 -0
- velune/kernel/__init__.py +55 -0
- velune/kernel/bootstrap.py +125 -0
- velune/kernel/config.py +426 -0
- velune/kernel/entrypoint.py +78 -0
- velune/kernel/health.py +54 -0
- velune/kernel/lifecycle.py +143 -0
- velune/kernel/module.py +17 -0
- velune/kernel/modules.py +23 -0
- velune/kernel/registry.py +96 -0
- velune/kernel/schemas.py +28 -0
- velune/main.py +9 -0
- velune/mcp/__init__.py +9 -0
- velune/mcp/client.py +115 -0
- velune/mcp/config.py +19 -0
- velune/mcp/server.py +624 -0
- velune/memory/__init__.py +32 -0
- velune/memory/compaction.py +506 -0
- velune/memory/embedding_pipeline.py +241 -0
- velune/memory/lifecycle.py +680 -0
- velune/memory/module.py +218 -0
- velune/memory/prioritizer.py +67 -0
- velune/memory/storage/episodic_schema.sql +53 -0
- velune/memory/storage/lancedb_store.py +282 -0
- velune/memory/storage/sqlite_manager.py +369 -0
- velune/memory/storage/sqlite_pool.py +149 -0
- velune/memory/tiers/episodic.py +588 -0
- velune/memory/tiers/graph.py +378 -0
- velune/memory/tiers/lineage.py +416 -0
- velune/memory/tiers/semantic.py +475 -0
- velune/memory/tiers/working.py +168 -0
- velune/memory/vitality.py +132 -0
- velune/models/__init__.py +15 -0
- velune/models/family.py +76 -0
- velune/models/module.py +20 -0
- velune/models/probes.py +192 -0
- velune/models/profile_cache.py +84 -0
- velune/models/profiler.py +108 -0
- velune/models/registry.py +251 -0
- velune/models/scorer.py +233 -0
- velune/models/specializations.py +205 -0
- velune/orchestration/__init__.py +19 -0
- velune/orchestration/engine.py +239 -0
- velune/orchestration/module.py +15 -0
- velune/orchestration/role_assignments.py +82 -0
- velune/orchestration/schemas.py +98 -0
- velune/plugins/__init__.py +20 -0
- velune/plugins/hooks.py +50 -0
- velune/plugins/loader.py +161 -0
- velune/plugins/registry.py +56 -0
- velune/plugins/schemas.py +21 -0
- velune/providers/__init__.py +23 -0
- velune/providers/adapters/anthropic.py +257 -0
- velune/providers/adapters/fireworks.py +115 -0
- velune/providers/adapters/google.py +234 -0
- velune/providers/adapters/groq.py +151 -0
- velune/providers/adapters/huggingface.py +210 -0
- velune/providers/adapters/llamacpp.py +208 -0
- velune/providers/adapters/lmstudio.py +175 -0
- velune/providers/adapters/ollama.py +233 -0
- velune/providers/adapters/openai.py +213 -0
- velune/providers/adapters/openrouter.py +81 -0
- velune/providers/adapters/together.py +134 -0
- velune/providers/adapters/xai.py +60 -0
- velune/providers/base.py +86 -0
- velune/providers/benchmarker.py +138 -0
- velune/providers/discovery/__init__.py +33 -0
- velune/providers/discovery/anthropic.py +79 -0
- velune/providers/discovery/benchmarks.py +44 -0
- velune/providers/discovery/classifier.py +69 -0
- velune/providers/discovery/fireworks.py +95 -0
- velune/providers/discovery/gguf.py +88 -0
- velune/providers/discovery/google.py +95 -0
- velune/providers/discovery/gpu.py +117 -0
- velune/providers/discovery/groq.py +21 -0
- velune/providers/discovery/huggingface.py +67 -0
- velune/providers/discovery/lmstudio.py +80 -0
- velune/providers/discovery/ollama.py +162 -0
- velune/providers/discovery/openai.py +96 -0
- velune/providers/discovery/openrouter.py +113 -0
- velune/providers/discovery/scanner.py +115 -0
- velune/providers/discovery/together.py +114 -0
- velune/providers/discovery/xai.py +57 -0
- velune/providers/health.py +67 -0
- velune/providers/health_monitor.py +169 -0
- velune/providers/keystore.py +142 -0
- velune/providers/local_paths.py +49 -0
- velune/providers/local_resolver.py +229 -0
- velune/providers/module.py +51 -0
- velune/providers/ollama_manager.py +193 -0
- velune/providers/registry.py +220 -0
- velune/providers/router.py +255 -0
- velune/providers/task_classifier.py +288 -0
- velune/py.typed +0 -0
- velune/repository/__init__.py +33 -0
- velune/repository/analyzer.py +127 -0
- velune/repository/ast_parser.py +822 -0
- velune/repository/blast_radius.py +298 -0
- velune/repository/boundary_classifier.py +295 -0
- velune/repository/cognition.py +316 -0
- velune/repository/grapher.py +179 -0
- velune/repository/import_graph.py +263 -0
- velune/repository/incremental_indexer.py +275 -0
- velune/repository/index_state.py +96 -0
- velune/repository/indexer.py +243 -0
- velune/repository/module.py +17 -0
- velune/repository/parser.py +474 -0
- velune/repository/project_type.py +300 -0
- velune/repository/rename_journal.py +287 -0
- velune/repository/scanner.py +193 -0
- velune/repository/schemas.py +102 -0
- velune/repository/symbol_registry.py +365 -0
- velune/repository/tracker.py +252 -0
- velune/retrieval/__init__.py +27 -0
- velune/retrieval/cache.py +110 -0
- velune/retrieval/fast_path.py +391 -0
- velune/retrieval/graph.py +124 -0
- velune/retrieval/hybrid.py +271 -0
- velune/retrieval/keyword.py +131 -0
- velune/retrieval/module.py +26 -0
- velune/retrieval/pipeline.py +303 -0
- velune/retrieval/reranker.py +102 -0
- velune/retrieval/schemas.py +59 -0
- velune/retrieval/slow_path.py +364 -0
- velune/retrieval/vector.py +203 -0
- velune/telemetry/__init__.py +59 -0
- velune/telemetry/cognition.py +267 -0
- velune/telemetry/cost_estimator.py +92 -0
- velune/telemetry/debug.py +304 -0
- velune/telemetry/doctor.py +244 -0
- velune/telemetry/logging.py +286 -0
- velune/telemetry/spans.py +277 -0
- velune/telemetry/token_tracker.py +140 -0
- velune/telemetry/usage_tracker.py +340 -0
- velune/tools/__init__.py +41 -0
- velune/tools/base/registry.py +87 -0
- velune/tools/base/tool.py +63 -0
- velune/tools/code/navigate.py +116 -0
- velune/tools/code/search.py +123 -0
- velune/tools/filesystem/read.py +75 -0
- velune/tools/filesystem/search.py +136 -0
- velune/tools/filesystem/write.py +163 -0
- velune/tools/git/history.py +177 -0
- velune/tools/git/operations.py +122 -0
- velune/tools/git/state.py +121 -0
- velune/tools/module.py +81 -0
- velune/tools/terminal/execute.py +72 -0
- velune/tools/terminal/history.py +47 -0
- velune/tools/web/fetch.py +55 -0
- velune/tools/web/validator.py +122 -0
- velune_cli-0.9.0.dist-info/METADATA +518 -0
- velune_cli-0.9.0.dist-info/RECORD +279 -0
- velune_cli-0.9.0.dist-info/WHEEL +4 -0
- velune_cli-0.9.0.dist-info/entry_points.txt +2 -0
- velune_cli-0.9.0.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"""Vitality classification for memory retention and retrieval.
|
|
2
|
+
|
|
3
|
+
Phase 2a: Recency-based vitality states (LIVE/ZOMBIE/ARCHIVED) determine
|
|
4
|
+
whether a memory is eligible for inclusion in context retrieval.
|
|
5
|
+
|
|
6
|
+
Classification is session-relative: a turn's vitality depends on how many
|
|
7
|
+
sessions have passed since it was recorded.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import enum
|
|
13
|
+
import logging
|
|
14
|
+
import time
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger("velune.memory.vitality")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Vitality(enum.Enum):
|
|
21
|
+
"""Recency-based memory state for retrieval eligibility."""
|
|
22
|
+
|
|
23
|
+
LIVE = "live" # Turn from last 3 sessions; always retrieved
|
|
24
|
+
ZOMBIE = "zombie" # Turn from 4-10 sessions ago; retrieved only on LIVE miss
|
|
25
|
+
ARCHIVED = "archived" # Turn > 10 sessions old; never retrieved (unless explicit)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class VitalityClassifier:
|
|
29
|
+
"""Classify memory vitality based on session distance and age."""
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
live_window: int = 3,
|
|
34
|
+
zombie_window: int = 10,
|
|
35
|
+
ttl_seconds: float = 2_592_000, # 30 days default
|
|
36
|
+
) -> None:
|
|
37
|
+
"""Initialize classifier with vitality thresholds.
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
live_window:
|
|
42
|
+
Sessions from current: 0-N are LIVE (default 3 sessions).
|
|
43
|
+
zombie_window:
|
|
44
|
+
Sessions from current: (N+1)-M are ZOMBIE; >M are ARCHIVED (default 10).
|
|
45
|
+
ttl_seconds:
|
|
46
|
+
Max age in seconds before forced ARCHIVED (default 30 days).
|
|
47
|
+
"""
|
|
48
|
+
self.live_window = live_window
|
|
49
|
+
self.zombie_window = zombie_window
|
|
50
|
+
self.ttl_seconds = ttl_seconds
|
|
51
|
+
|
|
52
|
+
def classify_turn(
|
|
53
|
+
self,
|
|
54
|
+
turn: Any,
|
|
55
|
+
current_session_index: int,
|
|
56
|
+
turn_session_index: int,
|
|
57
|
+
now: float | None = None,
|
|
58
|
+
) -> Vitality:
|
|
59
|
+
"""Classify a turn's vitality based on session distance and age.
|
|
60
|
+
|
|
61
|
+
Parameters
|
|
62
|
+
----------
|
|
63
|
+
turn:
|
|
64
|
+
The turn object with a 'created_at' timestamp attribute.
|
|
65
|
+
current_session_index:
|
|
66
|
+
The ordinal index of the current session (0 = oldest).
|
|
67
|
+
turn_session_index:
|
|
68
|
+
The ordinal index of the session containing this turn.
|
|
69
|
+
now:
|
|
70
|
+
Current timestamp; defaults to time.time().
|
|
71
|
+
|
|
72
|
+
Returns
|
|
73
|
+
-------
|
|
74
|
+
Vitality:
|
|
75
|
+
One of LIVE, ZOMBIE, ARCHIVED.
|
|
76
|
+
"""
|
|
77
|
+
if now is None:
|
|
78
|
+
now = time.time()
|
|
79
|
+
|
|
80
|
+
# Session distance: how many sessions old is this turn?
|
|
81
|
+
session_distance = current_session_index - turn_session_index
|
|
82
|
+
|
|
83
|
+
# Age-based cutoff: if turn is older than TTL, it's archived.
|
|
84
|
+
age = now - getattr(turn, "created_at", now)
|
|
85
|
+
if age > self.ttl_seconds:
|
|
86
|
+
return Vitality.ARCHIVED
|
|
87
|
+
|
|
88
|
+
# Session-based classification.
|
|
89
|
+
if session_distance <= self.live_window:
|
|
90
|
+
return Vitality.LIVE
|
|
91
|
+
if session_distance <= self.zombie_window:
|
|
92
|
+
return Vitality.ZOMBIE
|
|
93
|
+
return Vitality.ARCHIVED
|
|
94
|
+
|
|
95
|
+
def should_include(
|
|
96
|
+
self,
|
|
97
|
+
vitality: Vitality,
|
|
98
|
+
fallback_to_zombie: bool = False,
|
|
99
|
+
) -> bool:
|
|
100
|
+
"""Return True if a memory with this vitality should be retrieved.
|
|
101
|
+
|
|
102
|
+
Parameters
|
|
103
|
+
----------
|
|
104
|
+
vitality:
|
|
105
|
+
The turn's vitality state.
|
|
106
|
+
fallback_to_zombie:
|
|
107
|
+
If True and no LIVE results found, include ZOMBIE (not default).
|
|
108
|
+
|
|
109
|
+
Returns
|
|
110
|
+
-------
|
|
111
|
+
bool:
|
|
112
|
+
True if the memory is eligible for retrieval.
|
|
113
|
+
"""
|
|
114
|
+
if vitality == Vitality.LIVE:
|
|
115
|
+
return True
|
|
116
|
+
if fallback_to_zombie and vitality == Vitality.ZOMBIE:
|
|
117
|
+
return True
|
|
118
|
+
return False
|
|
119
|
+
|
|
120
|
+
def decay_factor(self, vitality: Vitality) -> float:
|
|
121
|
+
"""Return a trust/confidence decay factor for this vitality.
|
|
122
|
+
|
|
123
|
+
LIVE memories have full confidence; ZOMBIE memories are discounted.
|
|
124
|
+
ARCHIVED memories should not appear, but if they do, score them low.
|
|
125
|
+
"""
|
|
126
|
+
match vitality:
|
|
127
|
+
case Vitality.LIVE:
|
|
128
|
+
return 1.0
|
|
129
|
+
case Vitality.ZOMBIE:
|
|
130
|
+
return 0.6
|
|
131
|
+
case Vitality.ARCHIVED:
|
|
132
|
+
return 0.2
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Model intelligence and capability layer."""
|
|
2
|
+
|
|
3
|
+
from velune.models.profiler import ModelProfile, ModelProfiler
|
|
4
|
+
from velune.models.registry import ModelCapabilityRegistry
|
|
5
|
+
from velune.models.scorer import ModelScorer
|
|
6
|
+
from velune.models.specializations import CouncilRole, ModelSpecializationMapper
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"ModelCapabilityRegistry",
|
|
10
|
+
"ModelProfile",
|
|
11
|
+
"ModelProfiler",
|
|
12
|
+
"ModelScorer",
|
|
13
|
+
"CouncilRole",
|
|
14
|
+
"ModelSpecializationMapper",
|
|
15
|
+
]
|
velune/models/family.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""Model family detection and classification.
|
|
2
|
+
|
|
3
|
+
Identifies model families by ID pattern to enable prompt format adaptation.
|
|
4
|
+
Different families respond better to different prompt structures:
|
|
5
|
+
- Qwen: ChatML format with <|im_start|>/<|im_end|>
|
|
6
|
+
- DeepSeek: Specialized thinking prompt structure
|
|
7
|
+
- Llama3: [INST] and <<SYS>> markers
|
|
8
|
+
- Phi: Shorter, more direct prompts
|
|
9
|
+
- Mistral: [INST] markers with different system placement
|
|
10
|
+
- Gemma: Direct format similar to Llama
|
|
11
|
+
- Cloud APIs: Standard message list format
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import logging
|
|
17
|
+
from enum import StrEnum
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger("velune.models.family")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ModelFamily(StrEnum):
|
|
23
|
+
"""Enumeration of model families with distinct prompt formats."""
|
|
24
|
+
|
|
25
|
+
QWEN = "qwen"
|
|
26
|
+
DEEPSEEK = "deepseek"
|
|
27
|
+
LLAMA3 = "llama3"
|
|
28
|
+
PHI = "phi"
|
|
29
|
+
MISTRAL = "mistral"
|
|
30
|
+
GEMMA = "gemma"
|
|
31
|
+
CLAUDE = "claude"
|
|
32
|
+
GPT = "gpt"
|
|
33
|
+
GEMINI = "gemini"
|
|
34
|
+
UNKNOWN = "unknown"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def detect_family(model_id: str) -> ModelFamily:
|
|
38
|
+
"""Detect model family from model ID string (case-insensitive).
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
model_id:
|
|
43
|
+
The model identifier, e.g., "qwen:7b", "Claude-3", "deepseek-r1"
|
|
44
|
+
|
|
45
|
+
Returns
|
|
46
|
+
-------
|
|
47
|
+
ModelFamily:
|
|
48
|
+
The detected family, or UNKNOWN if no match.
|
|
49
|
+
"""
|
|
50
|
+
if not model_id:
|
|
51
|
+
return ModelFamily.UNKNOWN
|
|
52
|
+
|
|
53
|
+
model_id_lower = model_id.lower()
|
|
54
|
+
|
|
55
|
+
# Check for exact or prefix matches (order matters for specificity)
|
|
56
|
+
if "qwen" in model_id_lower:
|
|
57
|
+
return ModelFamily.QWEN
|
|
58
|
+
if "deepseek" in model_id_lower or "deepseek-r1" in model_id_lower:
|
|
59
|
+
return ModelFamily.DEEPSEEK
|
|
60
|
+
if "llama" in model_id_lower:
|
|
61
|
+
return ModelFamily.LLAMA3
|
|
62
|
+
if "phi" in model_id_lower:
|
|
63
|
+
return ModelFamily.PHI
|
|
64
|
+
if "mistral" in model_id_lower:
|
|
65
|
+
return ModelFamily.MISTRAL
|
|
66
|
+
if "gemma" in model_id_lower:
|
|
67
|
+
return ModelFamily.GEMMA
|
|
68
|
+
if "claude" in model_id_lower:
|
|
69
|
+
return ModelFamily.CLAUDE
|
|
70
|
+
if "gpt" in model_id_lower or "gpt-4" in model_id_lower or "gpt-3.5" in model_id_lower:
|
|
71
|
+
return ModelFamily.GPT
|
|
72
|
+
if "gemini" in model_id_lower:
|
|
73
|
+
return ModelFamily.GEMINI
|
|
74
|
+
|
|
75
|
+
logger.debug("Unknown model family for model_id: %s", model_id)
|
|
76
|
+
return ModelFamily.UNKNOWN
|
velune/models/module.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from velune.kernel.bootstrap import RuntimeEnvironment, SubsystemModule
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def _create_model_registry(env: RuntimeEnvironment):
|
|
5
|
+
from velune.models.registry import ModelCapabilityRegistry
|
|
6
|
+
|
|
7
|
+
registry = ModelCapabilityRegistry()
|
|
8
|
+
# Register the scanner property for backward compatibility
|
|
9
|
+
env.container.register_instance("runtime.model_discovery", registry.scanner)
|
|
10
|
+
return registry
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
MODEL_MODULES = [
|
|
14
|
+
SubsystemModule(
|
|
15
|
+
name="model_registry",
|
|
16
|
+
factory=_create_model_registry,
|
|
17
|
+
container_key="runtime.model_registry",
|
|
18
|
+
lifecycle_key="models",
|
|
19
|
+
)
|
|
20
|
+
]
|
velune/models/probes.py
ADDED
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
"""Empirical capability probes and model prober engine."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import time
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class ProbeResult:
|
|
12
|
+
capability: str
|
|
13
|
+
score: float # 0.0 to 1.0
|
|
14
|
+
latency_ms: float
|
|
15
|
+
passed: bool
|
|
16
|
+
details: str = ""
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
CODING_PROBE = """Write a Python function that finds all prime numbers up to n using the Sieve of Eratosthenes. Return only the function, no explanation."""
|
|
20
|
+
|
|
21
|
+
REASONING_PROBE = """If all bloops are razzles, and all razzles are lazzles, are all bloops lazzles? Answer with just Yes or No, then one sentence explanation."""
|
|
22
|
+
|
|
23
|
+
INSTRUCTION_PROBE = (
|
|
24
|
+
"""Respond with ONLY the JSON object {"status": "ok", "count": 42}. Nothing else."""
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _score_coding_response(response: str) -> float:
|
|
29
|
+
"""Score coding probe response (0.0 to 1.0)."""
|
|
30
|
+
score = 0.0
|
|
31
|
+
if "def " in response:
|
|
32
|
+
score += 0.3
|
|
33
|
+
if "range(" in response:
|
|
34
|
+
score += 0.2
|
|
35
|
+
if "sieve" in response.lower() or ("for" in response and "%" in response):
|
|
36
|
+
score += 0.3
|
|
37
|
+
if "return" in response:
|
|
38
|
+
score += 0.2
|
|
39
|
+
return min(1.0, score)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _score_reasoning_response(response: str) -> float:
|
|
43
|
+
"""Score reasoning probe response (0.0 to 1.0)."""
|
|
44
|
+
cleaned = response.strip().lower()
|
|
45
|
+
if not cleaned:
|
|
46
|
+
return 0.0
|
|
47
|
+
|
|
48
|
+
score = 0.0
|
|
49
|
+
if "yes" in cleaned:
|
|
50
|
+
score += 0.7
|
|
51
|
+
if len(cleaned) > 10:
|
|
52
|
+
score += 0.3
|
|
53
|
+
return min(1.0, score)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _score_instruction_response(response: str) -> float:
|
|
57
|
+
"""Score instruction following probe response (0.0 to 1.0)."""
|
|
58
|
+
cleaned = response.strip()
|
|
59
|
+
# Clean possible markdown block wrappers
|
|
60
|
+
for prefix in ("```json", "```"):
|
|
61
|
+
if cleaned.startswith(prefix):
|
|
62
|
+
cleaned = cleaned[len(prefix) :]
|
|
63
|
+
if cleaned.endswith("```"):
|
|
64
|
+
cleaned = cleaned[:-3]
|
|
65
|
+
cleaned = cleaned.strip()
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
import json
|
|
69
|
+
|
|
70
|
+
data = json.loads(cleaned)
|
|
71
|
+
if isinstance(data, dict):
|
|
72
|
+
if data.get("status") == "ok" and data.get("count") == 42:
|
|
73
|
+
return 1.0
|
|
74
|
+
return 0.5
|
|
75
|
+
except Exception:
|
|
76
|
+
pass
|
|
77
|
+
return 0.0
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class ModelProber:
|
|
81
|
+
"""Runs lightweight capability probes against a specific model via its provider."""
|
|
82
|
+
|
|
83
|
+
def __init__(self, provider: Any, model_id: str) -> None:
|
|
84
|
+
self.provider = provider
|
|
85
|
+
self.model_id = model_id
|
|
86
|
+
|
|
87
|
+
async def run_coding_probe(self) -> ProbeResult:
|
|
88
|
+
"""Run the coding capability probe."""
|
|
89
|
+
from velune.core.types.inference import InferenceRequest
|
|
90
|
+
|
|
91
|
+
start = time.perf_counter()
|
|
92
|
+
if not self.provider:
|
|
93
|
+
return ProbeResult("coding", 0.0, -1.0, False, "Provider not available")
|
|
94
|
+
|
|
95
|
+
try:
|
|
96
|
+
req = InferenceRequest(
|
|
97
|
+
model_id=self.model_id,
|
|
98
|
+
messages=[{"role": "user", "content": CODING_PROBE}],
|
|
99
|
+
temperature=0.1,
|
|
100
|
+
max_tokens=200,
|
|
101
|
+
)
|
|
102
|
+
response = await self.provider.infer(req)
|
|
103
|
+
latency_ms = (time.perf_counter() - start) * 1000.0
|
|
104
|
+
score = _score_coding_response(response.content)
|
|
105
|
+
return ProbeResult("coding", score, latency_ms, score > 0.5, response.content[:100])
|
|
106
|
+
except Exception as e:
|
|
107
|
+
return ProbeResult("coding", 0.0, -1.0, False, str(e))
|
|
108
|
+
|
|
109
|
+
async def run_reasoning_probe(self) -> ProbeResult:
|
|
110
|
+
"""Run the deductive reasoning capability probe."""
|
|
111
|
+
from velune.core.types.inference import InferenceRequest
|
|
112
|
+
|
|
113
|
+
start = time.perf_counter()
|
|
114
|
+
if not self.provider:
|
|
115
|
+
return ProbeResult("reasoning", 0.0, -1.0, False, "Provider not available")
|
|
116
|
+
|
|
117
|
+
try:
|
|
118
|
+
req = InferenceRequest(
|
|
119
|
+
model_id=self.model_id,
|
|
120
|
+
messages=[{"role": "user", "content": REASONING_PROBE}],
|
|
121
|
+
temperature=0.1,
|
|
122
|
+
max_tokens=100,
|
|
123
|
+
)
|
|
124
|
+
response = await self.provider.infer(req)
|
|
125
|
+
latency_ms = (time.perf_counter() - start) * 1000.0
|
|
126
|
+
score = _score_reasoning_response(response.content)
|
|
127
|
+
return ProbeResult("reasoning", score, latency_ms, score > 0.5, response.content[:100])
|
|
128
|
+
except Exception as e:
|
|
129
|
+
return ProbeResult("reasoning", 0.0, -1.0, False, str(e))
|
|
130
|
+
|
|
131
|
+
async def run_instruction_probe(self) -> ProbeResult:
|
|
132
|
+
"""Run the strict JSON instruction following capability probe."""
|
|
133
|
+
from velune.core.types.inference import InferenceRequest
|
|
134
|
+
|
|
135
|
+
start = time.perf_counter()
|
|
136
|
+
if not self.provider:
|
|
137
|
+
return ProbeResult("instruction", 0.0, -1.0, False, "Provider not available")
|
|
138
|
+
|
|
139
|
+
try:
|
|
140
|
+
req = InferenceRequest(
|
|
141
|
+
model_id=self.model_id,
|
|
142
|
+
messages=[{"role": "user", "content": INSTRUCTION_PROBE}],
|
|
143
|
+
temperature=0.1,
|
|
144
|
+
max_tokens=100,
|
|
145
|
+
)
|
|
146
|
+
response = await self.provider.infer(req)
|
|
147
|
+
latency_ms = (time.perf_counter() - start) * 1000.0
|
|
148
|
+
score = _score_instruction_response(response.content)
|
|
149
|
+
return ProbeResult(
|
|
150
|
+
"instruction", score, latency_ms, score > 0.5, response.content[:100]
|
|
151
|
+
)
|
|
152
|
+
except Exception as e:
|
|
153
|
+
return ProbeResult("instruction", 0.0, -1.0, False, str(e))
|
|
154
|
+
|
|
155
|
+
async def run_all_probes(self) -> dict[str, ProbeResult]:
|
|
156
|
+
"""Run all capability probes in parallel."""
|
|
157
|
+
import asyncio
|
|
158
|
+
|
|
159
|
+
coding, reasoning, instruction = await asyncio.gather(
|
|
160
|
+
self.run_coding_probe(),
|
|
161
|
+
self.run_reasoning_probe(),
|
|
162
|
+
self.run_instruction_probe(),
|
|
163
|
+
)
|
|
164
|
+
return {"coding": coding, "reasoning": reasoning, "instruction": instruction}
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
class FastProbe:
|
|
168
|
+
"""Single lightweight probe to validate a model is responding."""
|
|
169
|
+
|
|
170
|
+
PING_PROMPT = "Reply with exactly the word: PONG"
|
|
171
|
+
TIMEOUT = 10.0
|
|
172
|
+
|
|
173
|
+
async def ping(self, provider: Any, model_id: str) -> bool:
|
|
174
|
+
"""Returns True if model responds within timeout."""
|
|
175
|
+
import asyncio
|
|
176
|
+
|
|
177
|
+
from velune.core.types.inference import InferenceRequest
|
|
178
|
+
|
|
179
|
+
try:
|
|
180
|
+
req = InferenceRequest(
|
|
181
|
+
model_id=model_id,
|
|
182
|
+
messages=[{"role": "user", "content": self.PING_PROMPT}],
|
|
183
|
+
temperature=0.0,
|
|
184
|
+
max_tokens=5,
|
|
185
|
+
)
|
|
186
|
+
response = await asyncio.wait_for(
|
|
187
|
+
provider.infer(req),
|
|
188
|
+
timeout=self.TIMEOUT,
|
|
189
|
+
)
|
|
190
|
+
return bool(response.content.strip())
|
|
191
|
+
except Exception:
|
|
192
|
+
return False
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""Model profile caching layer for empirical benchmark results."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import time
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ModelProfileCache:
|
|
11
|
+
"""Persists probe results to avoid re-probing on every invocation."""
|
|
12
|
+
|
|
13
|
+
CACHE_TTL_HOURS = 168 # Re-probe weekly (7 days * 24 hours = 168 hours)
|
|
14
|
+
|
|
15
|
+
def __init__(self, cache_path: Path) -> None:
|
|
16
|
+
self.cache_path = cache_path
|
|
17
|
+
self.cache_path.parent.mkdir(parents=True, exist_ok=True)
|
|
18
|
+
|
|
19
|
+
def get(self, model_id: str, provider_id: str) -> dict | None:
|
|
20
|
+
"""Retrieve cached probe results if present and fresh."""
|
|
21
|
+
if not self.cache_path.exists():
|
|
22
|
+
return None
|
|
23
|
+
try:
|
|
24
|
+
data = json.loads(self.cache_path.read_text())
|
|
25
|
+
key = f"{provider_id}/{model_id}"
|
|
26
|
+
entry = data.get(key)
|
|
27
|
+
if not entry:
|
|
28
|
+
return None
|
|
29
|
+
|
|
30
|
+
age_hours = (time.time() - entry["probed_at"]) / 3600.0
|
|
31
|
+
if age_hours > self.CACHE_TTL_HOURS:
|
|
32
|
+
return None # Stale
|
|
33
|
+
return entry
|
|
34
|
+
except Exception:
|
|
35
|
+
return None
|
|
36
|
+
|
|
37
|
+
def set(self, model_id: str, provider_id: str, probe_results: dict) -> None:
|
|
38
|
+
"""Cache probe results locally, converting dataclass results to dictionaries."""
|
|
39
|
+
import dataclasses
|
|
40
|
+
|
|
41
|
+
data = {}
|
|
42
|
+
if self.cache_path.exists():
|
|
43
|
+
try:
|
|
44
|
+
data = json.loads(self.cache_path.read_text())
|
|
45
|
+
except Exception:
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
serialized_probes = {}
|
|
49
|
+
for cap, result in probe_results.items():
|
|
50
|
+
if dataclasses.is_dataclass(result):
|
|
51
|
+
serialized_probes[cap] = dataclasses.asdict(result)
|
|
52
|
+
elif isinstance(result, dict):
|
|
53
|
+
serialized_probes[cap] = result
|
|
54
|
+
else:
|
|
55
|
+
# Fallback mapping
|
|
56
|
+
serialized_probes[cap] = {
|
|
57
|
+
"capability": cap,
|
|
58
|
+
"score": getattr(result, "score", 0.0),
|
|
59
|
+
"latency_ms": getattr(result, "latency_ms", -1.0),
|
|
60
|
+
"passed": getattr(result, "passed", False),
|
|
61
|
+
"details": getattr(result, "details", ""),
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
key = f"{provider_id}/{model_id}"
|
|
65
|
+
data[key] = {
|
|
66
|
+
"probed_at": time.time(),
|
|
67
|
+
"probes": serialized_probes,
|
|
68
|
+
}
|
|
69
|
+
import os
|
|
70
|
+
import tempfile
|
|
71
|
+
|
|
72
|
+
# Atomically write to temp file, then rename/replace
|
|
73
|
+
temp_dir = self.cache_path.parent
|
|
74
|
+
with tempfile.NamedTemporaryFile(
|
|
75
|
+
"w", dir=temp_dir, delete=False, encoding="utf-8"
|
|
76
|
+
) as temp_file:
|
|
77
|
+
json.dump(data, temp_file, indent=2)
|
|
78
|
+
temp_file_name = temp_file.name
|
|
79
|
+
try:
|
|
80
|
+
os.replace(temp_file_name, str(self.cache_path))
|
|
81
|
+
except Exception:
|
|
82
|
+
if os.path.exists(temp_file_name):
|
|
83
|
+
os.remove(temp_file_name)
|
|
84
|
+
raise
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Model profiling and dynamic metrics analysis."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import time
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
|
+
|
|
9
|
+
from velune.core.types.model import CapabilityLevel, ModelDescriptor
|
|
10
|
+
from velune.providers.base import ModelProvider
|
|
11
|
+
from velune.providers.benchmarker import ModelBenchmarkMetrics, ProviderBenchmarker
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ModelProfile(BaseModel):
|
|
15
|
+
"""Profile representing capability, speed, and real-time execution statistics."""
|
|
16
|
+
|
|
17
|
+
model_id: str
|
|
18
|
+
provider_id: str
|
|
19
|
+
avg_latency_ms: float = 0.0
|
|
20
|
+
p95_latency_ms: float = 0.0
|
|
21
|
+
sample_count: int = 0
|
|
22
|
+
tps: float = 0.0 # Tokens per second
|
|
23
|
+
ttft_ms: float = 0.0 # Time to first token
|
|
24
|
+
json_validity: float = 1.0 # Percentage structured compliance
|
|
25
|
+
last_updated: float = Field(default_factory=time.time)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class ModelProfiler:
|
|
29
|
+
"""Measures accuracy, response speed, and structure-matching capabilities."""
|
|
30
|
+
|
|
31
|
+
def __init__(self) -> None:
|
|
32
|
+
self._profiles: dict[str, ModelProfile] = {}
|
|
33
|
+
self._latency_samples: dict[str, list[float]] = {}
|
|
34
|
+
|
|
35
|
+
def record_execution(self, provider_id: str, model_id: str, latency_ms: float) -> None:
|
|
36
|
+
"""Record real-time execution latency to build statistical latency profiles."""
|
|
37
|
+
key = f"{provider_id}/{model_id}"
|
|
38
|
+
if key not in self._latency_samples:
|
|
39
|
+
self._latency_samples[key] = []
|
|
40
|
+
|
|
41
|
+
samples = self._latency_samples[key]
|
|
42
|
+
samples.append(latency_ms)
|
|
43
|
+
|
|
44
|
+
# Enforce rolling history bounds to prevent memory bloat
|
|
45
|
+
if len(samples) > 100:
|
|
46
|
+
samples.pop(0)
|
|
47
|
+
|
|
48
|
+
sorted_samples = sorted(samples)
|
|
49
|
+
n = len(sorted_samples)
|
|
50
|
+
|
|
51
|
+
avg_lat = sum(samples) / n
|
|
52
|
+
p95_lat = sorted_samples[int(n * 0.95)] if n > 0 else avg_lat
|
|
53
|
+
|
|
54
|
+
if key in self._profiles:
|
|
55
|
+
profile = self._profiles[key]
|
|
56
|
+
profile.avg_latency_ms = avg_lat
|
|
57
|
+
profile.p95_latency_ms = p95_lat
|
|
58
|
+
profile.sample_count = n
|
|
59
|
+
profile.last_updated = time.time()
|
|
60
|
+
else:
|
|
61
|
+
self._profiles[key] = ModelProfile(
|
|
62
|
+
model_id=model_id,
|
|
63
|
+
provider_id=provider_id,
|
|
64
|
+
avg_latency_ms=avg_lat,
|
|
65
|
+
p95_latency_ms=p95_lat,
|
|
66
|
+
sample_count=n,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
async def profile_model(
|
|
70
|
+
self, provider: ModelProvider, descriptor: ModelDescriptor
|
|
71
|
+
) -> ModelProfile:
|
|
72
|
+
"""Actively benchmark an operational provider model for performance and structure."""
|
|
73
|
+
key = f"{descriptor.provider_id}/{descriptor.model_id}"
|
|
74
|
+
|
|
75
|
+
# Run benchmarks
|
|
76
|
+
benchmarker = ProviderBenchmarker(provider, descriptor.model_id)
|
|
77
|
+
metrics: ModelBenchmarkMetrics = await benchmarker.evaluate()
|
|
78
|
+
|
|
79
|
+
# Build or update the profile
|
|
80
|
+
profile = self._profiles.get(key)
|
|
81
|
+
if not profile:
|
|
82
|
+
profile = ModelProfile(
|
|
83
|
+
model_id=descriptor.model_id,
|
|
84
|
+
provider_id=descriptor.provider_id,
|
|
85
|
+
)
|
|
86
|
+
self._profiles[key] = profile
|
|
87
|
+
|
|
88
|
+
profile.tps = metrics.tps
|
|
89
|
+
profile.ttft_ms = metrics.ttft_ms
|
|
90
|
+
profile.json_validity = metrics.json_validity
|
|
91
|
+
profile.last_updated = time.time()
|
|
92
|
+
|
|
93
|
+
# Update capability profile levels based on empirical benchmark results
|
|
94
|
+
capabilities = getattr(descriptor, "capabilities", None)
|
|
95
|
+
if capabilities and hasattr(capabilities, "tool_use") and metrics.json_validity < 0.5:
|
|
96
|
+
# Degrade tool use if model repeatedly fails structure test
|
|
97
|
+
capabilities.tool_use = CapabilityLevel.NONE
|
|
98
|
+
|
|
99
|
+
return profile
|
|
100
|
+
|
|
101
|
+
def get_profile(self, provider_id: str, model_id: str) -> ModelProfile | None:
|
|
102
|
+
"""Look up the recorded profile for a model."""
|
|
103
|
+
key = f"{provider_id}/{model_id}"
|
|
104
|
+
return self._profiles.get(key)
|
|
105
|
+
|
|
106
|
+
def list_profiles(self) -> list[ModelProfile]:
|
|
107
|
+
"""Enumerate all active profiles."""
|
|
108
|
+
return list(self._profiles.values())
|