split-stack 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,273 @@
1
+ """Model guide: map agent hints and installed models to routing tiers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+
7
+ from split_stack.community_picks import (
8
+ community_index_for_model,
9
+ community_note_for_model,
10
+ recommended_models_for_tier,
11
+ vram_tier_for_profile,
12
+ )
13
+ from split_stack.hints import list_hints
14
+ from split_stack.model_registry import load_registry, resolve_discovered_models
15
+ from split_stack.routing import route_prompt
16
+ from split_stack.tiering import assign_tiers, describe_tiers
17
+
18
+ # Example prompts per hint — same spirit as compare POC steps.
19
+ HINT_EXAMPLES: dict[str, str] = {
20
+ "lookup": "what is JWT in one sentence?",
21
+ "explain": "compare session cookies vs JWT for a small SaaS API",
22
+ "design": "design a webhook retry strategy with idempotency keys",
23
+ "code": "refactor this auth module for unit tests",
24
+ "reason": "prove this token expiry policy step by step",
25
+ }
26
+
27
+ TIER_LABELS: dict[str, str] = {
28
+ "simple": "Simple — fast, cheap steps",
29
+ "medium": "Medium — summarise and compare",
30
+ "complex": "Complex — design and heavy generation",
31
+ "reasoning": "Reasoning — proofs and step-by-step logic",
32
+ "code": "Code — implement, refactor, debug",
33
+ }
34
+
35
+ FAMILY_BEST_FOR: dict[str, str] = {
36
+ "gemma": "Lookup and quick answers; smallest Gemma tag in your stack",
37
+ "qwen": "General spine — explain on 8B, design on 14B+",
38
+ "llama": "Lightweight lookup on 1B/3B; mid tiers on 8B+",
39
+ "phi": "Reasoning and careful step-by-step (especially phi4-reasoning)",
40
+ "deepseek": "Reasoning (R1) or code (coder) specialists",
41
+ "mistral": "Solid medium-tier general work",
42
+ "starcoder": "Code-only slot when present",
43
+ }
44
+
45
+
46
+ @dataclass(frozen=True)
47
+ class HintRoute:
48
+ hint_id: str
49
+ label: str
50
+ summary: str
51
+ tier: str
52
+ model: str
53
+ example_prompt: str
54
+
55
+
56
+ @dataclass(frozen=True)
57
+ class ModelCard:
58
+ name: str
59
+ family: str | None
60
+ weight: int
61
+ vram_gb: int | None
62
+ tier_slots: tuple[str, ...]
63
+ hints: tuple[str, ...]
64
+ best_for: str
65
+ in_stack: bool
66
+ vram_ok: bool
67
+ community_note: str | None = None
68
+ community_hints: tuple[str, ...] = ()
69
+ installed: bool = True
70
+ duplicate_locations: tuple[str, ...] = ()
71
+ status: str = "installed"
72
+
73
+
74
+ @dataclass(frozen=True)
75
+ class ModelGuide:
76
+ stack: tuple[str, ...]
77
+ tiers: dict[str, str | None]
78
+ tier_labels: dict[str, str]
79
+ hint_routes: tuple[HintRoute, ...]
80
+ models: tuple[ModelCard, ...]
81
+ vram_tier: str | None = None
82
+ audit: dict[str, object] | None = None
83
+ missing_recommended: tuple[str, ...] = ()
84
+
85
+
86
+ def _tier_slots_for_model(name: str, tiers: dict[str, str | None]) -> list[str]:
87
+ slots: list[str] = []
88
+ for key, value in tiers.items():
89
+ if value == name:
90
+ slots.append(key)
91
+ return slots
92
+
93
+
94
+ def _hints_for_model(name: str, hint_routes: tuple[HintRoute, ...]) -> list[str]:
95
+ return [item.hint_id for item in hint_routes if item.model == name]
96
+
97
+
98
+ def _best_for_text(
99
+ *,
100
+ name: str,
101
+ family: str | None,
102
+ tier_slots: list[str],
103
+ in_stack: bool,
104
+ community_note: str | None,
105
+ community_hints: tuple[str, ...],
106
+ installed: bool,
107
+ ) -> str:
108
+ parts: list[str] = []
109
+ if community_note:
110
+ parts.append(community_note)
111
+ elif community_hints:
112
+ parts.append(f"Community pick for: {', '.join(community_hints)}")
113
+ if not installed:
114
+ return parts[0] if parts else "Recommended by community — not installed yet"
115
+
116
+ lowered = name.lower()
117
+ if "reasoning" in lowered or "deepseek-r1" in lowered:
118
+ parts.append("Reasoning steps — proofs and step-by-step logic")
119
+ elif any(token in lowered for token in ("coder", "codellama", "starcoder")):
120
+ parts.append("Code steps — refactor, debug, implement")
121
+ elif family and family in FAMILY_BEST_FOR and not parts:
122
+ parts.append(FAMILY_BEST_FOR[family])
123
+
124
+ if not in_stack:
125
+ suffix = "Installed but not in your active stack"
126
+ return f"{parts[0]} — {suffix}" if parts else suffix
127
+ if "simple" in tier_slots:
128
+ parts.append("Routed for lookup hints")
129
+ elif "medium" in tier_slots and "complex" not in tier_slots:
130
+ parts.append("Routed for explain hints")
131
+ elif "complex" in tier_slots:
132
+ parts.append("Routed for design/code hints")
133
+ elif "reasoning" in tier_slots:
134
+ parts.append("Routed for reason hints")
135
+ return " · ".join(dict.fromkeys(p for p in parts if p))
136
+
137
+
138
+ def build_model_guide(
139
+ stack: list[str],
140
+ *,
141
+ pool: list[str] | None = None,
142
+ config_path: str | None = None,
143
+ profile: str = "workstation_12gb",
144
+ ) -> ModelGuide:
145
+ """Build hint routes for the active stack and cards for all models in pool."""
146
+ if not stack:
147
+ raise ValueError("stack must contain at least one model")
148
+
149
+ registry = load_registry(config_path)
150
+ vram_tier = vram_tier_for_profile(profile, config_path=config_path)
151
+ from split_stack.discovery import audit_model_folders, model_locations_by_tag
152
+
153
+ locations = model_locations_by_tag()
154
+ audit = audit_model_folders()
155
+ recommended = recommended_models_for_tier(vram_tier=vram_tier, config_path=config_path)
156
+ tiers_map = assign_tiers(stack, registry=registry)
157
+ tiers = describe_tiers(tiers_map)
158
+
159
+ hint_routes: list[HintRoute] = []
160
+ for item in list_hints():
161
+ hint_id = item["id"]
162
+ example = HINT_EXAMPLES.get(hint_id, item["summary"])
163
+ tier, model = route_prompt(example, tiers_map, hint=hint_id)
164
+ hint_routes.append(
165
+ HintRoute(
166
+ hint_id=hint_id,
167
+ label=item["label"],
168
+ summary=item["summary"],
169
+ tier=tier.value,
170
+ model=model,
171
+ example_prompt=example,
172
+ )
173
+ )
174
+ hint_routes_tuple = tuple(hint_routes)
175
+
176
+ catalog = pool if pool is not None else stack
177
+ resolved = resolve_discovered_models(sorted(set(catalog)), registry=registry)
178
+ stack_set = set(stack)
179
+ seen_names: set[str] = set()
180
+
181
+ full_tiers = describe_tiers(assign_tiers(list(catalog), registry=registry)) if len(catalog) >= 2 else tiers
182
+
183
+ cards: list[ModelCard] = []
184
+ for item in resolved:
185
+ seen_names.add(item.name)
186
+ in_stack = item.name in stack_set
187
+ tier_slots = _tier_slots_for_model(item.name, tiers if in_stack else full_tiers)
188
+ route_hints = _hints_for_model(item.name, hint_routes_tuple) if in_stack else []
189
+ comm_hints = community_index_for_model(item.name, vram_tier=vram_tier, config_path=config_path)
190
+ comm_note = community_note_for_model(item.name, vram_tier=vram_tier, config_path=config_path)
191
+ locs = locations.get(item.name, ())
192
+ cards.append(
193
+ ModelCard(
194
+ name=item.name,
195
+ family=item.family,
196
+ weight=item.weight,
197
+ vram_gb=item.vram_gb,
198
+ tier_slots=tuple(tier_slots),
199
+ hints=tuple(route_hints),
200
+ best_for=_best_for_text(
201
+ name=item.name,
202
+ family=item.family,
203
+ tier_slots=tier_slots,
204
+ in_stack=in_stack,
205
+ community_note=comm_note,
206
+ community_hints=comm_hints,
207
+ installed=True,
208
+ ),
209
+ in_stack=in_stack,
210
+ vram_ok=item.vram_ok,
211
+ community_note=comm_note,
212
+ community_hints=comm_hints,
213
+ installed=True,
214
+ duplicate_locations=tuple(locs) if len(locs) > 1 else (),
215
+ status="duplicate" if len(locs) > 1 else "installed",
216
+ )
217
+ )
218
+
219
+ installed_lower = {name.lower() for name in seen_names}
220
+ missing: list[str] = []
221
+ for model_name, note in recommended.items():
222
+ if model_name.lower() in installed_lower:
223
+ continue
224
+ if any(model_name.lower() in name or name.startswith(model_name.lower()) for name in installed_lower):
225
+ continue
226
+ missing.append(model_name)
227
+ comm_hints = community_index_for_model(model_name, vram_tier=vram_tier, config_path=config_path)
228
+ cards.append(
229
+ ModelCard(
230
+ name=model_name,
231
+ family=model_name.split(":")[0],
232
+ weight=0,
233
+ vram_gb=None,
234
+ tier_slots=(),
235
+ hints=(),
236
+ best_for=_best_for_text(
237
+ name=model_name,
238
+ family=model_name.split(":")[0],
239
+ tier_slots=[],
240
+ in_stack=False,
241
+ community_note=note,
242
+ community_hints=comm_hints,
243
+ installed=False,
244
+ ),
245
+ in_stack=False,
246
+ vram_ok=True,
247
+ community_note=note,
248
+ community_hints=comm_hints,
249
+ installed=False,
250
+ status="recommended",
251
+ )
252
+ )
253
+
254
+ cards.sort(
255
+ key=lambda card: (
256
+ card.status != "installed",
257
+ card.status == "recommended",
258
+ not card.in_stack,
259
+ card.weight,
260
+ card.name,
261
+ )
262
+ )
263
+
264
+ return ModelGuide(
265
+ stack=tuple(stack),
266
+ tiers=tiers,
267
+ tier_labels=dict(TIER_LABELS),
268
+ hint_routes=hint_routes_tuple,
269
+ models=tuple(cards),
270
+ vram_tier=vram_tier,
271
+ audit=audit,
272
+ missing_recommended=tuple(missing),
273
+ )
@@ -0,0 +1,314 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ import re
6
+ from dataclasses import dataclass
7
+ from pathlib import Path
8
+
9
+ DEFAULT_ASSUMED_VRAM_GB = 12
10
+ DEFAULT_DEPLOYMENT_PROFILE = "workstation_12gb"
11
+
12
+
13
+ @dataclass(frozen=True)
14
+ class DeploymentProfileSpec:
15
+ name: str
16
+ assumed_vram_gb: int | None
17
+ apply_vram_filter: bool
18
+ description: str
19
+
20
+
21
+ DEPLOYMENT_PROFILES: dict[str, DeploymentProfileSpec] = {
22
+ "workstation_8gb": DeploymentProfileSpec(
23
+ name="workstation_8gb",
24
+ assumed_vram_gb=8,
25
+ apply_vram_filter=True,
26
+ description="8 GB GPU workstation preset",
27
+ ),
28
+ "workstation_12gb": DeploymentProfileSpec(
29
+ name="workstation_12gb",
30
+ assumed_vram_gb=12,
31
+ apply_vram_filter=True,
32
+ description="12 GB GPU workstation preset (default)",
33
+ ),
34
+ "workstation_16gb": DeploymentProfileSpec(
35
+ name="workstation_16gb",
36
+ assumed_vram_gb=16,
37
+ apply_vram_filter=True,
38
+ description="16 GB GPU workstation preset",
39
+ ),
40
+ "workstation_24gb": DeploymentProfileSpec(
41
+ name="workstation_24gb",
42
+ assumed_vram_gb=24,
43
+ apply_vram_filter=True,
44
+ description="24 GB GPU workstation preset (4090, 3090 class)",
45
+ ),
46
+ "workstation_32gb": DeploymentProfileSpec(
47
+ name="workstation_32gb",
48
+ assumed_vram_gb=32,
49
+ apply_vram_filter=True,
50
+ description="32 GB GPU workstation preset (5090 class; top single-GPU tier)",
51
+ ),
52
+ "datacenter": DeploymentProfileSpec(
53
+ name="datacenter",
54
+ assumed_vram_gb=None,
55
+ apply_vram_filter=False,
56
+ description="Private inference fleet; custom model catalog, no VRAM filter",
57
+ ),
58
+ }
59
+
60
+ _PROFILE_ALIASES: dict[str, str] = {
61
+ "workstation": DEFAULT_DEPLOYMENT_PROFILE,
62
+ "8gb": "workstation_8gb",
63
+ "12gb": "workstation_12gb",
64
+ "16gb": "workstation_16gb",
65
+ "24gb": "workstation_24gb",
66
+ "32gb": "workstation_32gb",
67
+ }
68
+
69
+ _BUILTIN_RAW: list[dict[str, object]] = [
70
+ {"match": "gemma4:e4b", "weight": 4000, "vram_gb": 4, "family": "gemma"},
71
+ {"match": "gemma4:12b", "weight": 12000, "vram_gb": 10, "family": "gemma"},
72
+ {"match": "gemma4:26b-a4b", "weight": 26000, "vram_gb": 20, "family": "gemma"},
73
+ {"match": "gemma4:26b", "weight": 26000, "vram_gb": 22, "family": "gemma"},
74
+ {"match": "gemma4:31b", "weight": 31000, "vram_gb": 28, "family": "gemma"},
75
+ {"match": "gemma3:4b", "weight": 4000, "vram_gb": 4, "family": "gemma"},
76
+ {"match": "gemma3:12b", "weight": 12000, "vram_gb": 10, "family": "gemma"},
77
+ {"match": "qwen3:4b", "weight": 4000, "vram_gb": 4, "family": "qwen"},
78
+ {"match": "qwen3:8b", "weight": 8000, "vram_gb": 6, "family": "qwen"},
79
+ {"match": "qwen3:14b", "weight": 14000, "vram_gb": 10, "family": "qwen"},
80
+ {"match": "qwen3:30b", "weight": 30000, "vram_gb": 20, "family": "qwen"},
81
+ {"match": "qwen3:30b-a3b", "weight": 30000, "vram_gb": 20, "family": "qwen"},
82
+ {"match": "llama3.2:1b", "weight": 1000, "vram_gb": 2, "family": "llama"},
83
+ {"match": "llama3.2:3b", "weight": 3000, "vram_gb": 3, "family": "llama"},
84
+ {"match": "llama3.1:8b", "weight": 8000, "vram_gb": 6, "family": "llama"},
85
+ {"match": "llama3.1:70b", "weight": 70000, "vram_gb": 48, "family": "llama"},
86
+ {"match": "mistral:7b", "weight": 7000, "vram_gb": 5, "family": "mistral"},
87
+ {"match": "mistral-nemo", "weight": 12000, "vram_gb": 8, "family": "mistral"},
88
+ {"match": "phi3:mini", "weight": 3800, "vram_gb": 4, "family": "phi"},
89
+ {"match": "phi4", "weight": 14000, "vram_gb": 10, "family": "phi"},
90
+ {"match": "phi4-reasoning", "weight": 14000, "vram_gb": 10, "family": "phi"},
91
+ {"match": "deepseek-coder:6.7b", "weight": 7000, "vram_gb": 6, "family": "deepseek"},
92
+ {"match": "deepseek-r1", "weight": 14000, "vram_gb": 10, "family": "deepseek"},
93
+ {"match": "deepseek-coder", "weight": 7000, "vram_gb": 6, "family": "deepseek"},
94
+ {"match": "codellama", "weight": 7000, "vram_gb": 6, "family": "llama"},
95
+ {"match": "starcoder2", "weight": 7000, "vram_gb": 6, "family": "starcoder"},
96
+ {"match": ":e4b", "weight": 4000, "vram_gb": 4, "family": "gemma"},
97
+ {"match": ":e2b", "weight": 2000, "vram_gb": 3, "family": "gemma"},
98
+ ]
99
+
100
+
101
+ @dataclass(frozen=True)
102
+ class ModelEntry:
103
+ match: str
104
+ weight: int
105
+ vram_gb: int | None = None
106
+ family: str | None = None
107
+
108
+
109
+ @dataclass(frozen=True)
110
+ class ModelRegistry:
111
+ profile: str
112
+ assumed_vram_gb: int | None
113
+ apply_vram_filter: bool
114
+ entries: tuple[ModelEntry, ...]
115
+
116
+
117
+ @dataclass(frozen=True)
118
+ class ResolvedModel:
119
+ name: str
120
+ weight: int
121
+ vram_gb: int | None
122
+ family: str | None
123
+ vram_ok: bool
124
+ source: str
125
+ quant_mode: str | None = None
126
+
127
+
128
+ def _entries_from_raw(raw: list[dict[str, object]]) -> tuple[ModelEntry, ...]:
129
+ return tuple(
130
+ ModelEntry(
131
+ match=str(item["match"]),
132
+ weight=int(item["weight"]), # type: ignore[arg-type]
133
+ vram_gb=int(item["vram_gb"]) if item.get("vram_gb") is not None else None,
134
+ family=str(item["family"]) if item.get("family") else None,
135
+ )
136
+ for item in raw
137
+ )
138
+
139
+
140
+ def normalize_deployment_profile(name: str | None) -> str:
141
+ if not name:
142
+ return DEFAULT_DEPLOYMENT_PROFILE
143
+ lowered = name.strip().lower()
144
+ if lowered in DEPLOYMENT_PROFILES:
145
+ return lowered
146
+ if lowered in _PROFILE_ALIASES:
147
+ return _PROFILE_ALIASES[lowered]
148
+ valid = ", ".join(sorted(DEPLOYMENT_PROFILES))
149
+ raise ValueError(f"Unknown deployment profile '{name}'. Valid profiles: {valid}")
150
+
151
+
152
+ def list_deployment_profiles() -> tuple[DeploymentProfileSpec, ...]:
153
+ return tuple(DEPLOYMENT_PROFILES[name] for name in sorted(DEPLOYMENT_PROFILES))
154
+
155
+
156
+ def _default_registry() -> ModelRegistry:
157
+ spec = DEPLOYMENT_PROFILES[DEFAULT_DEPLOYMENT_PROFILE]
158
+ return ModelRegistry(
159
+ profile=spec.name,
160
+ assumed_vram_gb=spec.assumed_vram_gb,
161
+ apply_vram_filter=spec.apply_vram_filter,
162
+ entries=_entries_from_raw(_BUILTIN_RAW),
163
+ )
164
+
165
+
166
+ def _registry_from_payload(
167
+ payload: dict[str, object],
168
+ *,
169
+ profile_override: str | None = None,
170
+ ) -> ModelRegistry:
171
+ profile_name = normalize_deployment_profile(
172
+ profile_override
173
+ or str(payload.get("deployment_profile") or payload.get("profile") or "")
174
+ or None
175
+ )
176
+ spec = DEPLOYMENT_PROFILES[profile_name]
177
+ entries_raw = payload.get("models")
178
+ entries = _entries_from_raw(list(entries_raw)) if entries_raw else _entries_from_raw(_BUILTIN_RAW)
179
+ if spec.apply_vram_filter:
180
+ if profile_override:
181
+ assumed_vram_gb = spec.assumed_vram_gb
182
+ else:
183
+ assumed = payload.get("assumed_vram_gb")
184
+ assumed_vram_gb = int(assumed) if assumed is not None else spec.assumed_vram_gb
185
+ else:
186
+ assumed_vram_gb = None
187
+ return ModelRegistry(
188
+ profile=profile_name,
189
+ assumed_vram_gb=assumed_vram_gb,
190
+ apply_vram_filter=spec.apply_vram_filter,
191
+ entries=entries,
192
+ )
193
+
194
+
195
+ def config_search_paths(explicit: str | None = None) -> list[Path]:
196
+ paths: list[Path] = []
197
+ if explicit:
198
+ paths.append(Path(explicit))
199
+ env_path = os.environ.get("SPLIT_STACK_MODELS_CONFIG")
200
+ if env_path:
201
+ paths.append(Path(env_path))
202
+ paths.extend(
203
+ [
204
+ Path.cwd() / "split-stack.models.json",
205
+ Path.home() / ".config" / "split-stack" / "models.json",
206
+ ]
207
+ )
208
+ return paths
209
+
210
+
211
+ def load_registry(
212
+ config_path: str | None = None,
213
+ *,
214
+ profile: str | None = None,
215
+ ) -> ModelRegistry:
216
+ for path in config_search_paths(config_path):
217
+ if path.is_file():
218
+ payload = json.loads(path.read_text(encoding="utf-8-sig"))
219
+ return _registry_from_payload(payload, profile_override=profile)
220
+ if profile:
221
+ spec = DEPLOYMENT_PROFILES[normalize_deployment_profile(profile)]
222
+ base = _default_registry()
223
+ return ModelRegistry(
224
+ profile=spec.name,
225
+ assumed_vram_gb=spec.assumed_vram_gb,
226
+ apply_vram_filter=spec.apply_vram_filter,
227
+ entries=base.entries,
228
+ )
229
+ return _default_registry()
230
+
231
+
232
+ def _heuristic_weight(name: str) -> int:
233
+ lowered = name.lower()
234
+ match = re.search(r":(\d+)b", lowered)
235
+ if match:
236
+ return int(match.group(1)) * 1000
237
+ match = re.search(r":e(\d+)b", lowered)
238
+ if match:
239
+ return int(match.group(1)) * 1000
240
+ if "70b" in lowered:
241
+ return 70000
242
+ if "30b" in lowered or "32b" in lowered or "34b" in lowered:
243
+ return 30000
244
+ return 1000
245
+
246
+
247
+ def infer_model_profile(
248
+ name: str,
249
+ registry: ModelRegistry | None = None,
250
+ *,
251
+ quant_mode: str | None = None,
252
+ ) -> ResolvedModel:
253
+ reg = registry or _default_registry()
254
+ lowered = name.lower()
255
+ best: ModelEntry | None = None
256
+ best_len = -1
257
+ for entry in reg.entries:
258
+ token = entry.match.lower()
259
+ if token in lowered and len(token) > best_len:
260
+ best = entry
261
+ best_len = len(token)
262
+ if best is not None:
263
+ vram_gb = best.vram_gb
264
+ source = "registry"
265
+ weight = best.weight
266
+ family = best.family
267
+ else:
268
+ source = "heuristic"
269
+ weight = _heuristic_weight(name)
270
+ family = _guess_family(name)
271
+ vram_gb = max(3, weight // 1000)
272
+ from split_stack.quantization import adjust_vram_for_quant, normalize_quant_mode
273
+
274
+ mode = normalize_quant_mode(quant_mode)
275
+ effective_vram = adjust_vram_for_quant(name, vram_gb, mode)
276
+ if not reg.apply_vram_filter or reg.assumed_vram_gb is None:
277
+ vram_ok = True
278
+ else:
279
+ vram_ok = effective_vram is None or effective_vram <= reg.assumed_vram_gb
280
+ return ResolvedModel(
281
+ name=name,
282
+ weight=weight,
283
+ vram_gb=effective_vram,
284
+ family=family,
285
+ vram_ok=vram_ok,
286
+ source=source,
287
+ quant_mode=mode if mode != "default" else None,
288
+ )
289
+
290
+
291
+ def _guess_family(name: str) -> str | None:
292
+ lowered = name.lower()
293
+ for family in ("qwen", "gemma", "llama", "mistral", "phi", "deepseek"):
294
+ if family in lowered:
295
+ return family
296
+ return None
297
+
298
+
299
+ def resolve_discovered_models(
300
+ model_names: list[str],
301
+ *,
302
+ registry: ModelRegistry | None = None,
303
+ only_vram_ok: bool = False,
304
+ quant_mode: str | None = None,
305
+ ) -> list[ResolvedModel]:
306
+ reg = registry or _default_registry()
307
+ resolved = [infer_model_profile(name, reg, quant_mode=quant_mode) for name in model_names]
308
+ if only_vram_ok:
309
+ resolved = [item for item in resolved if item.vram_ok]
310
+ return sorted(resolved, key=lambda item: item.weight)
311
+
312
+
313
+ def model_weight(name: str, registry: ModelRegistry | None = None) -> int:
314
+ return infer_model_profile(name, registry).weight
split_stack/models.py ADDED
@@ -0,0 +1,77 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from enum import Enum
5
+
6
+
7
+ class ComplexityTier(str, Enum):
8
+ SIMPLE = "simple"
9
+ MEDIUM = "medium"
10
+ COMPLEX = "complex"
11
+ REASONING = "reasoning"
12
+
13
+
14
+ class StepKind(str, Enum):
15
+ LOOKUP = "lookup"
16
+ EXPLAIN = "explain"
17
+ DESIGN = "design"
18
+ CODE = "code"
19
+ REASON = "reason"
20
+ # Deprecated aliases (still parse)
21
+ WORK = "work"
22
+ BUILD = "build"
23
+
24
+
25
+ @dataclass(frozen=True)
26
+ class TierMap:
27
+ simple: str
28
+ medium: str
29
+ complex: str
30
+ reasoning: str
31
+ code: str | None = None
32
+
33
+ def for_tier(self, tier: ComplexityTier) -> str:
34
+ lookup = {
35
+ ComplexityTier.SIMPLE: self.simple,
36
+ ComplexityTier.MEDIUM: self.medium,
37
+ ComplexityTier.COMPLEX: self.complex,
38
+ ComplexityTier.REASONING: self.reasoning,
39
+ }
40
+ return lookup[tier]
41
+
42
+
43
+ @dataclass(frozen=True)
44
+ class RouteDecision:
45
+ """Full routing outcome for logging, CLI explain, and agent-loop telemetry."""
46
+
47
+ tier: ComplexityTier
48
+ model: str
49
+ hint: str | None
50
+ step_kind: str | None
51
+ tier_source: str
52
+ model_source: str
53
+ reasons: tuple[str, ...]
54
+ tiers: dict[str, str | None]
55
+
56
+ def to_dict(self) -> dict[str, object]:
57
+ return {
58
+ "tier": self.tier.value,
59
+ "model": self.model,
60
+ "hint": self.hint,
61
+ "step_kind": self.step_kind,
62
+ "tier_source": self.tier_source,
63
+ "model_source": self.model_source,
64
+ "reasons": list(self.reasons),
65
+ "tiers": self.tiers,
66
+ }
67
+
68
+ def as_tuple(self) -> tuple[ComplexityTier, str]:
69
+ return self.tier, self.model
70
+
71
+
72
+ @dataclass(frozen=True)
73
+ class StackAdvice:
74
+ cursor_model: str
75
+ prose_path: str
76
+ local_path: str
77
+ warn_cursor_override: bool
@@ -0,0 +1,30 @@
1
+ """Actionable error messages for Ollama HTTP failures."""
2
+
3
+ from __future__ import annotations
4
+
5
+
6
+ def format_ollama_error(
7
+ exc: BaseException,
8
+ *,
9
+ model: str,
10
+ base_url: str = "http://127.0.0.1:11434",
11
+ ) -> str:
12
+ """Turn requests/Ollama failures into short fix hints."""
13
+ try:
14
+ import requests
15
+ except ImportError:
16
+ return str(exc)
17
+
18
+ if isinstance(exc, requests.Timeout):
19
+ return f"Ollama request timed out for model '{model}' at {base_url}."
20
+
21
+ if isinstance(exc, requests.ConnectionError):
22
+ return f"Ollama not reachable at {base_url}. Start Ollama first."
23
+
24
+ if isinstance(exc, requests.HTTPError) and exc.response is not None:
25
+ status = exc.response.status_code
26
+ if status == 404:
27
+ return f"Model '{model}' not found. Run: ollama pull {model}"
28
+ return f"Ollama HTTP {status} for model '{model}': {exc.response.reason}"
29
+
30
+ return str(exc)