split-stack 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- split_stack/__init__.py +106 -0
- split_stack/__main__.py +4 -0
- split_stack/advice.py +12 -0
- split_stack/benchmark.py +97 -0
- split_stack/cli.py +690 -0
- split_stack/community_picks.py +247 -0
- split_stack/compare.py +194 -0
- split_stack/complexity.py +77 -0
- split_stack/discovery.py +288 -0
- split_stack/hints.py +102 -0
- split_stack/local_models.py +63 -0
- split_stack/model_guide.py +273 -0
- split_stack/model_registry.py +314 -0
- split_stack/models.py +77 -0
- split_stack/ollama_errors.py +30 -0
- split_stack/ollama_generate.py +135 -0
- split_stack/poc_models.py +131 -0
- split_stack/presets.py +75 -0
- split_stack/quantization.py +137 -0
- split_stack/requirements.py +287 -0
- split_stack/routing.py +96 -0
- split_stack/session.py +259 -0
- split_stack/setup_wizard.py +259 -0
- split_stack/startup_tips.py +169 -0
- split_stack/tiering.py +66 -0
- split_stack/validation.py +85 -0
- split_stack-0.2.0.dist-info/METADATA +364 -0
- split_stack-0.2.0.dist-info/RECORD +32 -0
- split_stack-0.2.0.dist-info/WHEEL +5 -0
- split_stack-0.2.0.dist-info/entry_points.txt +2 -0
- split_stack-0.2.0.dist-info/licenses/LICENSE +21 -0
- split_stack-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from split_stack.discovery import discover_models
|
|
8
|
+
from split_stack.model_registry import normalize_deployment_profile
|
|
9
|
+
from split_stack.presets import RECOMMENDED_STACKS, assign_recommended_tiers, list_recommended_stacks, recommended_models
|
|
10
|
+
from split_stack.tiering import describe_tiers
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(frozen=True)
|
|
14
|
+
class SetupPlan:
|
|
15
|
+
profile: str
|
|
16
|
+
description: str
|
|
17
|
+
models: tuple[str, ...]
|
|
18
|
+
installed: tuple[str, ...]
|
|
19
|
+
missing: tuple[str, ...]
|
|
20
|
+
config_path: Path
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass(frozen=True)
|
|
24
|
+
class SetupResult:
|
|
25
|
+
profile: str
|
|
26
|
+
config_path: Path
|
|
27
|
+
pulled: tuple[str, ...]
|
|
28
|
+
skipped: tuple[str, ...]
|
|
29
|
+
already_present: tuple[str, ...]
|
|
30
|
+
tiers: dict[str, str | None]
|
|
31
|
+
cancelled: bool = False
|
|
32
|
+
dry_run: bool = False
|
|
33
|
+
error: str | None = None
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def ready(self) -> bool:
|
|
37
|
+
return self.error is None and not self.cancelled
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _repo_root() -> Path:
|
|
41
|
+
return Path(__file__).resolve().parents[2]
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def default_config_path() -> Path:
|
|
45
|
+
return Path.cwd() / "split-stack.models.json"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def model_is_installed(tag: str, installed: list[str]) -> bool:
|
|
49
|
+
wanted = tag.lower()
|
|
50
|
+
for name in installed:
|
|
51
|
+
lowered = name.lower()
|
|
52
|
+
if lowered == wanted or lowered.startswith(f"{wanted}:"):
|
|
53
|
+
return True
|
|
54
|
+
if wanted in lowered and ":" in wanted:
|
|
55
|
+
return True
|
|
56
|
+
return False
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def plan_setup(
|
|
60
|
+
profile: str,
|
|
61
|
+
*,
|
|
62
|
+
base_url: str = "http://127.0.0.1:11434",
|
|
63
|
+
config_path: Path | None = None,
|
|
64
|
+
) -> SetupPlan:
|
|
65
|
+
profile_name = normalize_deployment_profile(profile)
|
|
66
|
+
stack = RECOMMENDED_STACKS[profile_name]
|
|
67
|
+
installed = discover_models(base_url=base_url)
|
|
68
|
+
missing = [tag for tag in stack.models if not model_is_installed(tag, installed)]
|
|
69
|
+
present = [tag for tag in stack.models if model_is_installed(tag, installed)]
|
|
70
|
+
return SetupPlan(
|
|
71
|
+
profile=profile_name,
|
|
72
|
+
description=stack.description,
|
|
73
|
+
models=stack.models,
|
|
74
|
+
installed=tuple(present),
|
|
75
|
+
missing=tuple(missing),
|
|
76
|
+
config_path=config_path or default_config_path(),
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def pull_model(
|
|
81
|
+
tag: str,
|
|
82
|
+
*,
|
|
83
|
+
base_url: str = "http://127.0.0.1:11434",
|
|
84
|
+
timeout_seconds: int = 3600,
|
|
85
|
+
) -> None:
|
|
86
|
+
try:
|
|
87
|
+
import requests
|
|
88
|
+
except ImportError as exc:
|
|
89
|
+
raise RuntimeError(
|
|
90
|
+
"pull_model requires optional dependency: pip install split-stack[ollama]"
|
|
91
|
+
) from exc
|
|
92
|
+
|
|
93
|
+
response = requests.post(
|
|
94
|
+
f"{base_url.rstrip('/')}/api/pull",
|
|
95
|
+
json={"name": tag, "stream": False},
|
|
96
|
+
timeout=timeout_seconds,
|
|
97
|
+
)
|
|
98
|
+
response.raise_for_status()
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def write_setup_config(profile: str, config_path: Path) -> None:
|
|
102
|
+
profile_name = normalize_deployment_profile(profile)
|
|
103
|
+
template_path = _repo_root() / "config" / "models.example.json"
|
|
104
|
+
if template_path.is_file():
|
|
105
|
+
payload = json.loads(template_path.read_text(encoding="utf-8"))
|
|
106
|
+
else:
|
|
107
|
+
payload = {"models": []}
|
|
108
|
+
payload["deployment_profile"] = profile_name
|
|
109
|
+
payload["comment"] = f"Generated by stack setup ({profile_name}). Edit models[] if your tags differ."
|
|
110
|
+
config_path.parent.mkdir(parents=True, exist_ok=True)
|
|
111
|
+
config_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def prompt_profile_choice() -> str:
|
|
115
|
+
stacks = list_recommended_stacks()
|
|
116
|
+
print("Choose your GPU preset:")
|
|
117
|
+
for index, stack in enumerate(stacks, start=1):
|
|
118
|
+
print(f" {index}. {stack.profile} — {stack.description}")
|
|
119
|
+
print("")
|
|
120
|
+
while True:
|
|
121
|
+
raw = input(f"Preset [1-{len(stacks)}] (default 2 = workstation_12gb): ").strip()
|
|
122
|
+
if not raw:
|
|
123
|
+
return "workstation_12gb"
|
|
124
|
+
if raw.isdigit():
|
|
125
|
+
choice = int(raw)
|
|
126
|
+
if 1 <= choice <= len(stacks):
|
|
127
|
+
return stacks[choice - 1].profile
|
|
128
|
+
lowered = raw.lower()
|
|
129
|
+
try:
|
|
130
|
+
return normalize_deployment_profile(lowered)
|
|
131
|
+
except ValueError:
|
|
132
|
+
print("Enter a preset number or name like workstation_12gb / 12gb.")
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def prompt_download_consent(*, missing: tuple[str, ...]) -> bool:
|
|
136
|
+
if not missing:
|
|
137
|
+
return True
|
|
138
|
+
print("")
|
|
139
|
+
print(f"Ollama will download {len(missing)} model(s) (large files, one-time):")
|
|
140
|
+
for tag in missing:
|
|
141
|
+
print(f" - {tag}")
|
|
142
|
+
print("")
|
|
143
|
+
answer = input("Proceed with download? [y/N]: ").strip().lower()
|
|
144
|
+
return answer in {"y", "yes"}
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def run_setup(
|
|
148
|
+
profile: str | None = None,
|
|
149
|
+
*,
|
|
150
|
+
base_url: str = "http://127.0.0.1:11434",
|
|
151
|
+
config_path: Path | None = None,
|
|
152
|
+
assume_yes: bool = False,
|
|
153
|
+
dry_run: bool = False,
|
|
154
|
+
interactive: bool = True,
|
|
155
|
+
) -> SetupResult:
|
|
156
|
+
chosen = profile
|
|
157
|
+
if not chosen and interactive:
|
|
158
|
+
chosen = prompt_profile_choice()
|
|
159
|
+
if not chosen:
|
|
160
|
+
chosen = "workstation_12gb"
|
|
161
|
+
|
|
162
|
+
path = config_path or default_config_path()
|
|
163
|
+
try:
|
|
164
|
+
plan = plan_setup(chosen, base_url=base_url, config_path=path)
|
|
165
|
+
except Exception as exc:
|
|
166
|
+
return SetupResult(
|
|
167
|
+
profile=chosen,
|
|
168
|
+
config_path=path,
|
|
169
|
+
pulled=(),
|
|
170
|
+
skipped=(),
|
|
171
|
+
already_present=(),
|
|
172
|
+
tiers={},
|
|
173
|
+
error=str(exc),
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
if dry_run:
|
|
177
|
+
tiers = describe_tiers(assign_recommended_tiers(plan.profile))
|
|
178
|
+
return SetupResult(
|
|
179
|
+
profile=plan.profile,
|
|
180
|
+
config_path=plan.config_path,
|
|
181
|
+
pulled=(),
|
|
182
|
+
skipped=plan.missing,
|
|
183
|
+
already_present=plan.installed,
|
|
184
|
+
tiers=tiers,
|
|
185
|
+
dry_run=True,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
if plan.missing and not assume_yes and interactive:
|
|
189
|
+
if not prompt_download_consent(missing=plan.missing):
|
|
190
|
+
return SetupResult(
|
|
191
|
+
profile=plan.profile,
|
|
192
|
+
config_path=plan.config_path,
|
|
193
|
+
pulled=(),
|
|
194
|
+
skipped=plan.missing,
|
|
195
|
+
already_present=plan.installed,
|
|
196
|
+
tiers={},
|
|
197
|
+
cancelled=True,
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
pulled: list[str] = []
|
|
201
|
+
errors: list[str] = []
|
|
202
|
+
for tag in plan.missing:
|
|
203
|
+
try:
|
|
204
|
+
print(f"Pulling {tag}...", flush=True)
|
|
205
|
+
pull_model(tag, base_url=base_url)
|
|
206
|
+
pulled.append(tag)
|
|
207
|
+
except Exception as exc:
|
|
208
|
+
errors.append(f"{tag}: {exc}")
|
|
209
|
+
|
|
210
|
+
try:
|
|
211
|
+
write_setup_config(plan.profile, plan.config_path)
|
|
212
|
+
tiers = describe_tiers(assign_recommended_tiers(plan.profile))
|
|
213
|
+
except Exception as exc:
|
|
214
|
+
errors.append(str(exc))
|
|
215
|
+
tiers = {}
|
|
216
|
+
|
|
217
|
+
if errors:
|
|
218
|
+
return SetupResult(
|
|
219
|
+
profile=plan.profile,
|
|
220
|
+
config_path=plan.config_path,
|
|
221
|
+
pulled=tuple(pulled),
|
|
222
|
+
skipped=tuple([tag for tag in plan.missing if tag not in pulled]),
|
|
223
|
+
already_present=plan.installed,
|
|
224
|
+
tiers=tiers,
|
|
225
|
+
error="; ".join(errors),
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
return SetupResult(
|
|
229
|
+
profile=plan.profile,
|
|
230
|
+
config_path=plan.config_path,
|
|
231
|
+
pulled=tuple(pulled),
|
|
232
|
+
skipped=(),
|
|
233
|
+
already_present=plan.installed,
|
|
234
|
+
tiers=tiers,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def format_setup_summary(result: SetupResult) -> str:
|
|
239
|
+
lines = [
|
|
240
|
+
f"profile={result.profile}",
|
|
241
|
+
f"config={result.config_path}",
|
|
242
|
+
]
|
|
243
|
+
if result.already_present:
|
|
244
|
+
lines.append(f"already_present={','.join(result.already_present)}")
|
|
245
|
+
if result.pulled:
|
|
246
|
+
lines.append(f"pulled={','.join(result.pulled)}")
|
|
247
|
+
if result.skipped:
|
|
248
|
+
lines.append(f"not_installed={','.join(result.skipped)}")
|
|
249
|
+
if result.tiers:
|
|
250
|
+
lines.append("tiers:")
|
|
251
|
+
for key, value in result.tiers.items():
|
|
252
|
+
lines.append(f" {key}: {value or '-'}")
|
|
253
|
+
if result.cancelled:
|
|
254
|
+
lines.append("status=cancelled (no downloads)")
|
|
255
|
+
if result.dry_run:
|
|
256
|
+
lines.append("status=dry-run")
|
|
257
|
+
if result.error:
|
|
258
|
+
lines.append(f"error={result.error}")
|
|
259
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"""Import-time and CLI tips: scan local models and surface community recommendations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
import sys
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger("split_stack")
|
|
10
|
+
|
|
11
|
+
_emitted = False
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _import_tips_mode() -> str:
|
|
15
|
+
return os.environ.get("SPLIT_STACK_IMPORT_TIPS", "off").strip().lower()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _is_disabled(mode: str) -> bool:
|
|
19
|
+
return mode in {"0", "false", "no", "off", "quiet"}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _should_echo_stderr(mode: str) -> bool:
|
|
23
|
+
if mode in {"1", "true", "yes", "stderr", "on"}:
|
|
24
|
+
return True
|
|
25
|
+
if mode != "auto":
|
|
26
|
+
return False
|
|
27
|
+
if not sys.stderr.isatty():
|
|
28
|
+
return False
|
|
29
|
+
if logger.handlers:
|
|
30
|
+
return False
|
|
31
|
+
root = logging.getLogger()
|
|
32
|
+
return not root.handlers
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _default_report_profile() -> str:
|
|
36
|
+
from split_stack.session import default_profile_from_env
|
|
37
|
+
|
|
38
|
+
return default_profile_from_env()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def model_recommendation_report(
|
|
42
|
+
*,
|
|
43
|
+
profile: str | None = None,
|
|
44
|
+
include_api: bool = False,
|
|
45
|
+
base_url: str = "http://127.0.0.1:11434",
|
|
46
|
+
) -> list[str]:
|
|
47
|
+
"""Build human-readable lines about installed vs recommended models."""
|
|
48
|
+
from split_stack.community_picks import (
|
|
49
|
+
focus_stack,
|
|
50
|
+
recommended_models_for_tier,
|
|
51
|
+
vram_tier_for_profile,
|
|
52
|
+
)
|
|
53
|
+
from split_stack.discovery import audit_model_folders, discover_models_from_disk, list_model_inventory
|
|
54
|
+
from split_stack.model_registry import normalize_deployment_profile
|
|
55
|
+
|
|
56
|
+
profile_name = normalize_deployment_profile(profile or _default_report_profile())
|
|
57
|
+
vram_tier = vram_tier_for_profile(profile)
|
|
58
|
+
|
|
59
|
+
if include_api:
|
|
60
|
+
inventory = list_model_inventory(base_url=base_url)
|
|
61
|
+
installed = sorted(set(inventory.api_models) | set(inventory.disk_models))
|
|
62
|
+
primary = inventory.manifest_roots[0] if inventory.manifest_roots else None
|
|
63
|
+
else:
|
|
64
|
+
installed = discover_models_from_disk()
|
|
65
|
+
audit = audit_model_folders()
|
|
66
|
+
primary = audit.get("primary_root")
|
|
67
|
+
|
|
68
|
+
lines: list[str] = []
|
|
69
|
+
if not installed:
|
|
70
|
+
lines.append("split-stack: no local Ollama models found on disk.")
|
|
71
|
+
lines.append(
|
|
72
|
+
" Starter agent stack: ollama pull gemma4:e4b && "
|
|
73
|
+
"ollama pull qwen3:8b && ollama pull qwen3:14b"
|
|
74
|
+
)
|
|
75
|
+
lines.append(" Then: stack models --include-disk")
|
|
76
|
+
return lines
|
|
77
|
+
|
|
78
|
+
header = f"split-stack: {len(installed)} local model(s)"
|
|
79
|
+
if primary:
|
|
80
|
+
header += f" under {primary}"
|
|
81
|
+
lines.append(f"{header} (profile {profile_name}, tier {vram_tier}).")
|
|
82
|
+
|
|
83
|
+
stack = focus_stack("agentic", vram_tier=vram_tier)
|
|
84
|
+
if stack and stack.models:
|
|
85
|
+
have = [name for name in stack.models if name in installed]
|
|
86
|
+
missing_stack = [name for name in stack.models if name not in installed]
|
|
87
|
+
lines.append(f" Reddit agent stack: {', '.join(stack.models)}")
|
|
88
|
+
if have:
|
|
89
|
+
lines.append(f" Installed from stack: {', '.join(have)}")
|
|
90
|
+
if missing_stack:
|
|
91
|
+
lines.append(f" Pull for routing spread: {', '.join(missing_stack)}")
|
|
92
|
+
|
|
93
|
+
installed_lower = {name.lower() for name in installed}
|
|
94
|
+
recommended = recommended_models_for_tier(vram_tier=vram_tier)
|
|
95
|
+
missing_picks: list[str] = []
|
|
96
|
+
for model_name in recommended:
|
|
97
|
+
lowered = model_name.lower()
|
|
98
|
+
if lowered in installed_lower:
|
|
99
|
+
continue
|
|
100
|
+
if any(lowered in name or name.startswith(lowered) for name in installed_lower):
|
|
101
|
+
continue
|
|
102
|
+
missing_picks.append(model_name)
|
|
103
|
+
|
|
104
|
+
if missing_picks:
|
|
105
|
+
preview = ", ".join(missing_picks[:6])
|
|
106
|
+
if len(missing_picks) > 6:
|
|
107
|
+
preview += ", ..."
|
|
108
|
+
lines.append(f" Community picks to explore: {preview}")
|
|
109
|
+
|
|
110
|
+
extras = sorted(
|
|
111
|
+
name
|
|
112
|
+
for name in installed
|
|
113
|
+
if stack and name not in stack.models and name not in recommended
|
|
114
|
+
)
|
|
115
|
+
if extras:
|
|
116
|
+
preview = ", ".join(extras[:6])
|
|
117
|
+
if len(extras) > 6:
|
|
118
|
+
preview += ", ..."
|
|
119
|
+
lines.append(f" Also on disk (not in default stack): {preview}")
|
|
120
|
+
|
|
121
|
+
audit = audit_model_folders()
|
|
122
|
+
duplicate_tags = audit.get("duplicate_tags") or []
|
|
123
|
+
if duplicate_tags:
|
|
124
|
+
lines.append(
|
|
125
|
+
" Duplicate tags across folders: "
|
|
126
|
+
f"{', '.join(duplicate_tags)} — keep one Ollama models directory."
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
lines.append(" Commands: stack models --include-disk | stack tips | stack stacks")
|
|
130
|
+
return lines
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def emit_import_tips(
|
|
134
|
+
*,
|
|
135
|
+
profile: str | None = None,
|
|
136
|
+
include_api: bool = False,
|
|
137
|
+
base_url: str = "http://127.0.0.1:11434",
|
|
138
|
+
) -> None:
|
|
139
|
+
"""Log model recommendations once per process (controlled by SPLIT_STACK_IMPORT_TIPS)."""
|
|
140
|
+
global _emitted
|
|
141
|
+
if _emitted:
|
|
142
|
+
return
|
|
143
|
+
|
|
144
|
+
mode = _import_tips_mode()
|
|
145
|
+
if _is_disabled(mode):
|
|
146
|
+
return
|
|
147
|
+
|
|
148
|
+
_emitted = True
|
|
149
|
+
try:
|
|
150
|
+
lines = model_recommendation_report(
|
|
151
|
+
profile=profile or _default_report_profile(),
|
|
152
|
+
include_api=include_api,
|
|
153
|
+
base_url=base_url,
|
|
154
|
+
)
|
|
155
|
+
except Exception as exc:
|
|
156
|
+
logger.debug("split-stack import tips skipped: %s", exc)
|
|
157
|
+
return
|
|
158
|
+
|
|
159
|
+
for line in lines:
|
|
160
|
+
logger.info(line)
|
|
161
|
+
|
|
162
|
+
if _should_echo_stderr(mode):
|
|
163
|
+
print("\n".join(lines), file=sys.stderr)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def reset_import_tips_for_tests() -> None:
|
|
167
|
+
"""Allow tests to re-run emit_import_tips."""
|
|
168
|
+
global _emitted
|
|
169
|
+
_emitted = False
|
split_stack/tiering.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from split_stack.model_registry import ModelRegistry, load_registry, model_weight
|
|
4
|
+
from split_stack.models import TierMap
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _is_code_specialist(name: str) -> bool:
|
|
8
|
+
lowered = name.lower()
|
|
9
|
+
if "reasoning" in lowered:
|
|
10
|
+
return False
|
|
11
|
+
tokens = ("codellama", "deepseek-coder", "starcoder", "codegemma", "coder")
|
|
12
|
+
return any(token in lowered for token in tokens)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _is_reasoning_specialist(name: str) -> bool:
|
|
16
|
+
lowered = name.lower()
|
|
17
|
+
return "deepseek-r1" in lowered or "reasoning" in lowered or ":r1" in lowered
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def assign_tiers(model_names: list[str], registry: ModelRegistry | None = None) -> TierMap:
|
|
21
|
+
if not model_names:
|
|
22
|
+
raise ValueError("model_names must contain at least one model")
|
|
23
|
+
|
|
24
|
+
reg = registry or load_registry()
|
|
25
|
+
code_models = [name for name in model_names if _is_code_specialist(name)]
|
|
26
|
+
general_models = [name for name in model_names if name not in code_models]
|
|
27
|
+
if not general_models:
|
|
28
|
+
general_models = list(model_names)
|
|
29
|
+
code_models = []
|
|
30
|
+
|
|
31
|
+
reasoning_models = [name for name in general_models if _is_reasoning_specialist(name)]
|
|
32
|
+
core_models = [name for name in general_models if name not in reasoning_models]
|
|
33
|
+
if not core_models:
|
|
34
|
+
core_models = list(general_models)
|
|
35
|
+
|
|
36
|
+
ranked = sorted(core_models, key=lambda name: model_weight(name, reg))
|
|
37
|
+
simple = ranked[0]
|
|
38
|
+
complex_model = ranked[-1]
|
|
39
|
+
medium = ranked[1] if len(ranked) > 1 else ranked[0]
|
|
40
|
+
|
|
41
|
+
if reasoning_models:
|
|
42
|
+
reasoning = sorted(reasoning_models, key=lambda name: model_weight(name, reg))[-1]
|
|
43
|
+
else:
|
|
44
|
+
reasoning = complex_model
|
|
45
|
+
|
|
46
|
+
code = None
|
|
47
|
+
if code_models:
|
|
48
|
+
code = sorted(code_models, key=lambda name: model_weight(name, reg))[-1]
|
|
49
|
+
|
|
50
|
+
return TierMap(
|
|
51
|
+
simple=simple,
|
|
52
|
+
medium=medium,
|
|
53
|
+
complex=complex_model,
|
|
54
|
+
reasoning=reasoning,
|
|
55
|
+
code=code,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def describe_tiers(tiers: TierMap) -> dict[str, str | None]:
|
|
60
|
+
return {
|
|
61
|
+
"simple": tiers.simple,
|
|
62
|
+
"medium": tiers.medium,
|
|
63
|
+
"complex": tiers.complex,
|
|
64
|
+
"reasoning": tiers.reasoning,
|
|
65
|
+
"code": tiers.code,
|
|
66
|
+
}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""Session and tier-map validation — warnings, not hard failures."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from split_stack.model_registry import load_registry, model_weight
|
|
6
|
+
from split_stack.models import TierMap
|
|
7
|
+
from split_stack.tiering import describe_tiers
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _is_reasoning_specialist(name: str) -> bool:
|
|
11
|
+
lowered = name.lower()
|
|
12
|
+
return "deepseek-r1" in lowered or "reasoning" in lowered or ":r1" in lowered
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _is_code_specialist(name: str) -> bool:
|
|
16
|
+
lowered = name.lower()
|
|
17
|
+
if "reasoning" in lowered:
|
|
18
|
+
return False
|
|
19
|
+
tokens = ("codellama", "deepseek-coder", "starcoder", "codegemma", "coder")
|
|
20
|
+
return any(token in lowered for token in tokens)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def validate_tier_map(
|
|
24
|
+
tiers: TierMap,
|
|
25
|
+
models: list[str] | tuple[str, ...],
|
|
26
|
+
*,
|
|
27
|
+
profile: str | None = None,
|
|
28
|
+
) -> list[str]:
|
|
29
|
+
"""Return human-readable warnings about a tier ladder."""
|
|
30
|
+
warnings: list[str] = []
|
|
31
|
+
model_list = list(models)
|
|
32
|
+
registry = load_registry(profile=profile)
|
|
33
|
+
|
|
34
|
+
if len(model_list) < 2:
|
|
35
|
+
warnings.append("Fewer than two models — routing cannot spread across tiers.")
|
|
36
|
+
|
|
37
|
+
slot_models = {
|
|
38
|
+
tiers.simple,
|
|
39
|
+
tiers.medium,
|
|
40
|
+
tiers.complex,
|
|
41
|
+
tiers.reasoning,
|
|
42
|
+
}
|
|
43
|
+
if tiers.code:
|
|
44
|
+
slot_models.add(tiers.code)
|
|
45
|
+
unique_slots = len(slot_models)
|
|
46
|
+
if unique_slots < min(3, len(model_list)):
|
|
47
|
+
warnings.append(
|
|
48
|
+
"Multiple tier slots map to the same model — spread is mostly cosmetic."
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
reasoning_specialists = [name for name in model_list if _is_reasoning_specialist(name)]
|
|
52
|
+
if not reasoning_specialists and tiers.reasoning == tiers.complex:
|
|
53
|
+
warnings.append(
|
|
54
|
+
"No reasoning specialist in models= — hint='reason' uses the complex model "
|
|
55
|
+
f"({tiers.complex}), not a dedicated reasoner."
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
code_specialists = [name for name in model_list if _is_code_specialist(name)]
|
|
59
|
+
if not code_specialists:
|
|
60
|
+
warnings.append(
|
|
61
|
+
"No code specialist in models= — hint='code' uses the complex tier "
|
|
62
|
+
f"({tiers.complex}) unless the prompt looks like code."
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
try:
|
|
66
|
+
simple_w = model_weight(tiers.simple, registry)
|
|
67
|
+
medium_w = model_weight(tiers.medium, registry)
|
|
68
|
+
if simple_w > medium_w:
|
|
69
|
+
warnings.append(
|
|
70
|
+
f"Simple slot ({tiers.simple}, weight {simple_w}) is heavier than "
|
|
71
|
+
f"medium ({tiers.medium}, weight {medium_w}) — check registry rows for "
|
|
72
|
+
"unknown tags (heuristic weight 1000)."
|
|
73
|
+
)
|
|
74
|
+
except Exception:
|
|
75
|
+
pass
|
|
76
|
+
|
|
77
|
+
described = describe_tiers(tiers)
|
|
78
|
+
if tiers.simple != model_list[0] and len(model_list) >= 2:
|
|
79
|
+
ranked = sorted(model_list, key=lambda name: model_weight(name, registry))
|
|
80
|
+
if tiers.simple != ranked[0]:
|
|
81
|
+
warnings.append(
|
|
82
|
+
f"Simple slot is {tiers.simple}; lightest installed tag is {ranked[0]}."
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
return warnings
|