split-stack 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {split_stack-0.2.0/src/split_stack.egg-info → split_stack-0.3.0}/PKG-INFO +1 -1
  2. {split_stack-0.2.0 → split_stack-0.3.0}/pyproject.toml +1 -1
  3. {split_stack-0.2.0 → split_stack-0.3.0}/src/split_stack/__init__.py +4 -1
  4. {split_stack-0.2.0 → split_stack-0.3.0}/src/split_stack/cli.py +62 -0
  5. {split_stack-0.2.0 → split_stack-0.3.0}/src/split_stack/discovery.py +1 -1
  6. split_stack-0.3.0/src/split_stack/poc_models.py +201 -0
  7. split_stack-0.3.0/src/split_stack/stack_health.py +360 -0
  8. {split_stack-0.2.0 → split_stack-0.3.0/src/split_stack.egg-info}/PKG-INFO +1 -1
  9. {split_stack-0.2.0 → split_stack-0.3.0}/src/split_stack.egg-info/SOURCES.txt +2 -0
  10. {split_stack-0.2.0 → split_stack-0.3.0}/tests/test_hints.py +14 -1
  11. {split_stack-0.2.0 → split_stack-0.3.0}/tests/test_setup_cli.py +23 -0
  12. split_stack-0.3.0/tests/test_stack_health.py +112 -0
  13. split_stack-0.2.0/src/split_stack/poc_models.py +0 -131
  14. {split_stack-0.2.0 → split_stack-0.3.0}/LICENSE +0 -0
  15. {split_stack-0.2.0 → split_stack-0.3.0}/README.md +0 -0
  16. {split_stack-0.2.0 → split_stack-0.3.0}/setup.cfg +0 -0
  17. {split_stack-0.2.0 → split_stack-0.3.0}/src/split_stack/__main__.py +0 -0
  18. {split_stack-0.2.0 → split_stack-0.3.0}/src/split_stack/advice.py +0 -0
  19. {split_stack-0.2.0 → split_stack-0.3.0}/src/split_stack/benchmark.py +0 -0
  20. {split_stack-0.2.0 → split_stack-0.3.0}/src/split_stack/community_picks.py +0 -0
  21. {split_stack-0.2.0 → split_stack-0.3.0}/src/split_stack/compare.py +0 -0
  22. {split_stack-0.2.0 → split_stack-0.3.0}/src/split_stack/complexity.py +0 -0
  23. {split_stack-0.2.0 → split_stack-0.3.0}/src/split_stack/hints.py +0 -0
  24. {split_stack-0.2.0 → split_stack-0.3.0}/src/split_stack/local_models.py +0 -0
  25. {split_stack-0.2.0 → split_stack-0.3.0}/src/split_stack/model_guide.py +0 -0
  26. {split_stack-0.2.0 → split_stack-0.3.0}/src/split_stack/model_registry.py +0 -0
  27. {split_stack-0.2.0 → split_stack-0.3.0}/src/split_stack/models.py +0 -0
  28. {split_stack-0.2.0 → split_stack-0.3.0}/src/split_stack/ollama_errors.py +0 -0
  29. {split_stack-0.2.0 → split_stack-0.3.0}/src/split_stack/ollama_generate.py +0 -0
  30. {split_stack-0.2.0 → split_stack-0.3.0}/src/split_stack/presets.py +0 -0
  31. {split_stack-0.2.0 → split_stack-0.3.0}/src/split_stack/quantization.py +0 -0
  32. {split_stack-0.2.0 → split_stack-0.3.0}/src/split_stack/requirements.py +0 -0
  33. {split_stack-0.2.0 → split_stack-0.3.0}/src/split_stack/routing.py +0 -0
  34. {split_stack-0.2.0 → split_stack-0.3.0}/src/split_stack/session.py +0 -0
  35. {split_stack-0.2.0 → split_stack-0.3.0}/src/split_stack/setup_wizard.py +0 -0
  36. {split_stack-0.2.0 → split_stack-0.3.0}/src/split_stack/startup_tips.py +0 -0
  37. {split_stack-0.2.0 → split_stack-0.3.0}/src/split_stack/tiering.py +0 -0
  38. {split_stack-0.2.0 → split_stack-0.3.0}/src/split_stack/validation.py +0 -0
  39. {split_stack-0.2.0 → split_stack-0.3.0}/src/split_stack.egg-info/dependency_links.txt +0 -0
  40. {split_stack-0.2.0 → split_stack-0.3.0}/src/split_stack.egg-info/entry_points.txt +0 -0
  41. {split_stack-0.2.0 → split_stack-0.3.0}/src/split_stack.egg-info/requires.txt +0 -0
  42. {split_stack-0.2.0 → split_stack-0.3.0}/src/split_stack.egg-info/top_level.txt +0 -0
  43. {split_stack-0.2.0 → split_stack-0.3.0}/tests/test_advice.py +0 -0
  44. {split_stack-0.2.0 → split_stack-0.3.0}/tests/test_benchmark.py +0 -0
  45. {split_stack-0.2.0 → split_stack-0.3.0}/tests/test_benchmark_cli.py +0 -0
  46. {split_stack-0.2.0 → split_stack-0.3.0}/tests/test_cli.py +0 -0
  47. {split_stack-0.2.0 → split_stack-0.3.0}/tests/test_community_picks.py +0 -0
  48. {split_stack-0.2.0 → split_stack-0.3.0}/tests/test_compare.py +0 -0
  49. {split_stack-0.2.0 → split_stack-0.3.0}/tests/test_complexity.py +0 -0
  50. {split_stack-0.2.0 → split_stack-0.3.0}/tests/test_discovery_audit.py +0 -0
  51. {split_stack-0.2.0 → split_stack-0.3.0}/tests/test_explain.py +0 -0
  52. {split_stack-0.2.0 → split_stack-0.3.0}/tests/test_model_guide.py +0 -0
  53. {split_stack-0.2.0 → split_stack-0.3.0}/tests/test_model_registry.py +0 -0
  54. {split_stack-0.2.0 → split_stack-0.3.0}/tests/test_presets.py +0 -0
  55. {split_stack-0.2.0 → split_stack-0.3.0}/tests/test_quantization.py +0 -0
  56. {split_stack-0.2.0 → split_stack-0.3.0}/tests/test_quickstart.py +0 -0
  57. {split_stack-0.2.0 → split_stack-0.3.0}/tests/test_requirements.py +0 -0
  58. {split_stack-0.2.0 → split_stack-0.3.0}/tests/test_routing.py +0 -0
  59. {split_stack-0.2.0 → split_stack-0.3.0}/tests/test_session.py +0 -0
  60. {split_stack-0.2.0 → split_stack-0.3.0}/tests/test_setup.py +0 -0
  61. {split_stack-0.2.0 → split_stack-0.3.0}/tests/test_startup_tips.py +0 -0
  62. {split_stack-0.2.0 → split_stack-0.3.0}/tests/test_tiering.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: split-stack
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: Python routing library for local LLM agent loops: score prompts, map tiers to model names, embed in your runner.
5
5
  Author: Eddie Baumel
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "split-stack"
7
- version = "0.2.0"
7
+ version = "0.3.0"
8
8
  description = "Python routing library for local LLM agent loops: score prompts, map tiers to model names, embed in your runner."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -46,11 +46,12 @@ from split_stack.session import (
46
46
  route,
47
47
  session_warnings,
48
48
  )
49
+ from split_stack.stack_health import check_stack_health, format_stack_health
49
50
  from split_stack.startup_tips import emit_import_tips, model_recommendation_report
50
51
  from split_stack.tiering import assign_tiers, describe_tiers
51
52
  from split_stack.validation import validate_tier_map
52
53
 
53
- __version__ = "0.2.0"
54
+ __version__ = "0.3.0"
54
55
 
55
56
  __all__ = [
56
57
  "ComplexityTier",
@@ -98,6 +99,8 @@ __all__ = [
98
99
  "score_prompt",
99
100
  "session_warnings",
100
101
  "stack_recommendation",
102
+ "check_stack_health",
103
+ "format_stack_health",
101
104
  "usage_requirements",
102
105
  "validate_tier_map",
103
106
  "__version__",
@@ -20,6 +20,7 @@ from split_stack.ollama_generate import ask_prompt_json, route_prompt_json
20
20
  from split_stack.requirements import UsageProfile, list_usage_profiles, usage_requirements
21
21
  from split_stack.presets import assign_recommended_tiers, list_recommended_stacks, recommended_models
22
22
  from split_stack.setup_wizard import format_setup_summary, plan_setup, run_setup
23
+ from split_stack.stack_health import check_stack_health, format_stack_health
23
24
  from split_stack.tiering import assign_tiers, describe_tiers
24
25
 
25
26
 
@@ -159,7 +160,47 @@ def _cmd_profiles(args: argparse.Namespace) -> int:
159
160
  return 0
160
161
 
161
162
 
163
+ def _cmd_doctor_check_stack(args: argparse.Namespace) -> int:
164
+ models = None
165
+ if args.models:
166
+ models = [part.strip() for part in args.models.split(",") if part.strip()]
167
+ report = check_stack_health(
168
+ profile=args.profile,
169
+ vram_gb=args.vram_gb,
170
+ quant=args.quant,
171
+ base_url=args.base_url,
172
+ models=models,
173
+ )
174
+ if args.json:
175
+ payload = {
176
+ "ready": report.ready,
177
+ "profile": report.profile,
178
+ "vram_gb": report.vram_gb,
179
+ "quant": report.quant,
180
+ "recommended": list(report.recommended),
181
+ "resolved": list(report.resolved),
182
+ "missing": list(report.missing),
183
+ "pool_size": report.pool_size,
184
+ "inventory_note": report.inventory_note,
185
+ "findings": [
186
+ {
187
+ "level": item.level,
188
+ "code": item.code,
189
+ "message": item.message,
190
+ "models": list(item.models),
191
+ }
192
+ for item in report.findings
193
+ ],
194
+ }
195
+ return _emit_json(payload)
196
+ print(format_stack_health(report))
197
+ return 0 if report.ready else 1
198
+
199
+
162
200
  def _cmd_doctor(args: argparse.Namespace) -> int:
201
+ if args.check_stack:
202
+ return _cmd_doctor_check_stack(args)
203
+
163
204
  advice = stack_recommendation(cursor_override_enabled=False)
164
205
  print(f"Cursor model: {advice.cursor_model}")
165
206
  print(f"Prose path: {advice.prose_path}")
@@ -505,6 +546,27 @@ def main(argv: list[str] | None = None) -> int:
505
546
  help="Path to split-stack.models.json (or set SPLIT_STACK_MODELS_CONFIG)",
506
547
  )
507
548
  _add_quant_arg(doctor_parser)
549
+ doctor_parser.add_argument(
550
+ "--check-stack",
551
+ action="store_true",
552
+ help="Offline stack health: missing models, duplicates, routing spread (exit 1 if not ready)",
553
+ )
554
+ doctor_parser.add_argument(
555
+ "--vram-gb",
556
+ type=int,
557
+ choices=[8, 12, 16, 24, 32],
558
+ help="GPU VRAM for recommended stack (alternative to --profile)",
559
+ )
560
+ doctor_parser.add_argument(
561
+ "--base-url",
562
+ default="http://127.0.0.1:11434",
563
+ help="Ollama base URL for inventory scan",
564
+ )
565
+ doctor_parser.add_argument(
566
+ "--models",
567
+ help="Comma-separated stack override when using --check-stack",
568
+ )
569
+ doctor_parser.add_argument("--json", action="store_true", help="JSON output (with --check-stack)")
508
570
  doctor_parser.set_defaults(handler=_cmd_doctor)
509
571
 
510
572
  requirements_parser = subparsers.add_parser(
@@ -240,7 +240,7 @@ def audit_model_folders(
240
240
  primary = home if home.is_dir() else None
241
241
  return {
242
242
  "primary_root": str(primary) if primary else None,
243
- "scan_roots": list(manifest_search_paths()),
243
+ "scan_roots": [str(path) for path in manifest_search_paths()],
244
244
  "tag_count": len(locations),
245
245
  "locations": {tag: list(paths) for tag, paths in locations.items()},
246
246
  "duplicates": duplicates,
@@ -0,0 +1,201 @@
1
+ """Workstation stack helpers for demos and compare POC."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+
7
+ from split_stack.discovery import list_model_inventory
8
+ from split_stack.presets import RECOMMENDED_STACKS, recommended_models
9
+ from split_stack.quantization import QAT_STACK_ADDITIONS, normalize_quant_mode
10
+ from split_stack.session import profile_for_vram_gb
11
+
12
+ DEFAULT_POC_STACK = ["gemma4:e4b", "qwen3:8b", "qwen3:14b"]
13
+
14
+ VRAM_OPTIONS: tuple[tuple[int, str], ...] = (
15
+ (8, "8 GB"),
16
+ (12, "12 GB"),
17
+ (16, "16 GB"),
18
+ (24, "24 GB"),
19
+ (32, "32 GB"),
20
+ )
21
+
22
+ QUANT_OPTIONS: tuple[tuple[str, str], ...] = (
23
+ ("default", "Default (PTQ)"),
24
+ ("qat", "Gemma QAT (int4)"),
25
+ ("qat_mobile", "Gemma mobile QAT"),
26
+ ("bf16", "BF16 (full size)"),
27
+ )
28
+
29
+ _PRESET_VRAM_ALIASES: dict[str, int] = {
30
+ "mixed_12gb": 12,
31
+ "mixed_16gb": 16,
32
+ "qwen_only": 12,
33
+ "recommended_12gb": 12,
34
+ "from_inventory": 0,
35
+ }
36
+
37
+
38
+ @dataclass(frozen=True)
39
+ class WorkstationStack:
40
+ vram_gb: int
41
+ profile: str
42
+ quant: str
43
+ models: tuple[str, ...]
44
+ description: str
45
+ notes: tuple[str, ...]
46
+
47
+
48
+ def list_vram_options() -> tuple[tuple[int, str], ...]:
49
+ return VRAM_OPTIONS
50
+
51
+
52
+ def list_quant_options() -> tuple[tuple[str, str], ...]:
53
+ return QUANT_OPTIONS
54
+
55
+
56
+ def recommended_stack_for_vram(
57
+ vram_gb: int,
58
+ *,
59
+ quant: str | None = None,
60
+ ) -> WorkstationStack:
61
+ profile = profile_for_vram_gb(vram_gb)
62
+ stack = RECOMMENDED_STACKS[profile]
63
+ quant_mode = normalize_quant_mode(quant)
64
+ models = recommended_models(profile, quant=quant_mode)
65
+ notes: list[str] = []
66
+ if quant_mode == "qat":
67
+ extras = QAT_STACK_ADDITIONS.get(profile, ())
68
+ if extras:
69
+ notes.append(
70
+ f"QAT adds {', '.join(extras)} on {vram_gb} GB — Gemma-only int4 runtime sizes."
71
+ )
72
+ elif quant_mode == "bf16":
73
+ notes.append("BF16 uses full Gemma pull sizes — prefer 24 GB+ or datacenter.")
74
+ return WorkstationStack(
75
+ vram_gb=vram_gb,
76
+ profile=profile,
77
+ quant=quant_mode,
78
+ models=tuple(models),
79
+ description=stack.description,
80
+ notes=tuple(notes),
81
+ )
82
+
83
+
84
+ def models_for_preset(
85
+ preset_id: str,
86
+ *,
87
+ base_url: str = "http://127.0.0.1:11434",
88
+ profile: str | None = None,
89
+ quant: str | None = None,
90
+ ) -> list[str]:
91
+ if preset_id == "from_inventory":
92
+ inventory = list_model_inventory(base_url=base_url)
93
+ if inventory.suggested_stack:
94
+ return list(inventory.suggested_stack)
95
+ return list(DEFAULT_POC_STACK)
96
+ if preset_id == "qwen_only":
97
+ return ["qwen3:4b", "qwen3:8b", "qwen3:14b"]
98
+ if preset_id == "community_agentic":
99
+ from split_stack.community_picks import focus_stack, vram_tier_for_profile
100
+
101
+ tier = vram_tier_for_profile(profile or "workstation_12gb")
102
+ focus = focus_stack("agentic", vram_tier=tier)
103
+ if focus and focus.models:
104
+ return list(focus.models)
105
+ return list(DEFAULT_POC_STACK)
106
+ vram = _PRESET_VRAM_ALIASES.get(preset_id)
107
+ if vram:
108
+ return list(recommended_stack_for_vram(vram, quant=quant).models)
109
+ raise ValueError(f"Unknown stack preset '{preset_id}'.")
110
+
111
+
112
+ def available_model_pool(
113
+ *,
114
+ base_url: str = "http://127.0.0.1:11434",
115
+ source: str = "both",
116
+ ) -> tuple[list[str], str | None]:
117
+ inventory = list_model_inventory(base_url=base_url)
118
+ if source == "api":
119
+ pool = list(inventory.api_models)
120
+ elif source == "disk":
121
+ pool = list(inventory.disk_models)
122
+ else:
123
+ pool = sorted(set(inventory.api_models) | set(inventory.disk_models))
124
+ return pool, inventory.note
125
+
126
+
127
+ def resolve_stack_against_pool(
128
+ desired: list[str],
129
+ installed: list[str],
130
+ ) -> tuple[list[str], list[str], str | None]:
131
+ installed_set = set(installed)
132
+ matched = [name for name in desired if name in installed_set]
133
+ missing = [name for name in desired if name not in installed_set]
134
+ if len(matched) >= 2:
135
+ return matched, missing, None
136
+
137
+ if installed:
138
+ from split_stack.model_registry import load_registry, model_weight
139
+
140
+ registry = load_registry()
141
+ ranked = sorted(installed, key=lambda name: model_weight(name, registry))
142
+ if len(ranked) >= 2:
143
+ warning = (
144
+ f"Recommended stack not fully installed ({', '.join(desired)}). "
145
+ f"Using: {', '.join(ranked)}"
146
+ )
147
+ return ranked, missing, warning
148
+ warning = (
149
+ f"Recommended stack not fully installed. "
150
+ f"Using only {ranked[0]} — need 2+ models for routing spread."
151
+ )
152
+ return ranked, missing, warning
153
+
154
+ return desired, missing, f"Using recommended list (not verified against disk): {', '.join(desired)}"
155
+
156
+
157
+ def resolve_installed_stack(
158
+ installed: list[str],
159
+ *,
160
+ preset_id: str = "mixed_12gb",
161
+ base_url: str = "http://127.0.0.1:11434",
162
+ vram_gb: int | None = None,
163
+ quant: str | None = None,
164
+ models: list[str] | None = None,
165
+ ) -> tuple[list[str], str | None]:
166
+ if models:
167
+ desired = models
168
+ elif vram_gb is not None:
169
+ desired = list(recommended_stack_for_vram(vram_gb, quant=quant).models)
170
+ else:
171
+ desired = models_for_preset(preset_id, base_url=base_url, quant=quant)
172
+ resolved, _missing, warning = resolve_stack_against_pool(desired, installed)
173
+ return resolved, warning
174
+
175
+
176
+ def stack_payload(
177
+ *,
178
+ vram_gb: int = 16,
179
+ quant: str | None = "qat",
180
+ base_url: str = "http://127.0.0.1:11434",
181
+ source: str = "both",
182
+ models_override: list[str] | None = None,
183
+ ) -> dict[str, object]:
184
+ stack = recommended_stack_for_vram(vram_gb, quant=quant)
185
+ desired = list(models_override) if models_override else list(stack.models)
186
+ pool, inventory_note = available_model_pool(base_url=base_url, source=source)
187
+ resolved, missing, warning = resolve_stack_against_pool(desired, pool)
188
+ return {
189
+ "ready": True,
190
+ "vram_gb": vram_gb,
191
+ "profile": stack.profile,
192
+ "quant": stack.quant,
193
+ "description": stack.description,
194
+ "notes": list(stack.notes),
195
+ "models": desired,
196
+ "resolved_models": resolved,
197
+ "missing_models": missing,
198
+ "warning": warning,
199
+ "inventory_note": inventory_note,
200
+ "pool_size": len(pool),
201
+ }
@@ -0,0 +1,360 @@
1
+ """Offline stack health checks — missing models, duplicates, routing spread, quant honesty."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+
7
+ from split_stack.discovery import audit_model_folders, list_model_inventory
8
+ from split_stack.poc_models import resolve_stack_against_pool, stack_payload
9
+ from split_stack.presets import recommended_models
10
+ from split_stack.quantization import adjust_vram_for_quant, normalize_quant_mode, pull_guidance_lines
11
+ from split_stack.session import default_profile_from_env, profile_for_vram_gb
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class ModelTagInfo:
16
+ name: str
17
+ size_bytes: int
18
+ quantization_level: str | None
19
+
20
+
21
+ @dataclass(frozen=True)
22
+ class StackHealthFinding:
23
+ level: str # ok, warn, error
24
+ code: str
25
+ message: str
26
+ models: tuple[str, ...] = ()
27
+
28
+
29
+ @dataclass(frozen=True)
30
+ class StackHealthReport:
31
+ ready: bool
32
+ profile: str
33
+ vram_gb: int | None
34
+ quant: str
35
+ recommended: tuple[str, ...]
36
+ resolved: tuple[str, ...]
37
+ missing: tuple[str, ...]
38
+ pool_size: int
39
+ findings: tuple[StackHealthFinding, ...]
40
+ inventory_note: str | None = None
41
+
42
+
43
+ def check_stack_health(
44
+ *,
45
+ profile: str | None = None,
46
+ vram_gb: int | None = None,
47
+ quant: str | None = None,
48
+ base_url: str = "http://127.0.0.1:11434",
49
+ models: list[str] | None = None,
50
+ source: str = "both",
51
+ ) -> StackHealthReport:
52
+ """Check recommended stack against local inventory (offline; no upstream registry)."""
53
+ quant_mode = normalize_quant_mode(quant)
54
+ if vram_gb is not None:
55
+ resolved_profile = profile_for_vram_gb(vram_gb)
56
+ else:
57
+ resolved_profile = profile or default_profile_from_env()
58
+ vram_gb = _vram_for_profile(resolved_profile)
59
+
60
+ if models:
61
+ recommended = tuple(models)
62
+ payload = stack_payload(
63
+ vram_gb=vram_gb or 12,
64
+ quant=quant_mode,
65
+ base_url=base_url,
66
+ source=source,
67
+ models_override=list(models),
68
+ )
69
+ resolved = tuple(payload.get("resolved_models") or ())
70
+ missing = tuple(payload.get("missing_models") or ())
71
+ inventory_note = payload.get("inventory_note")
72
+ if isinstance(inventory_note, str):
73
+ pass
74
+ else:
75
+ inventory_note = None
76
+ pool_size = int(payload.get("pool_size") or 0)
77
+ else:
78
+ recommended = tuple(recommended_models(resolved_profile, quant=quant_mode))
79
+ inventory = list_model_inventory(base_url=base_url)
80
+ if source == "api":
81
+ pool = list(inventory.api_models)
82
+ elif source == "disk":
83
+ pool = list(inventory.disk_models)
84
+ else:
85
+ pool = sorted(set(inventory.api_models) | set(inventory.disk_models))
86
+ resolved_list, missing_list, _warning = resolve_stack_against_pool(
87
+ list(recommended),
88
+ pool,
89
+ )
90
+ resolved = tuple(resolved_list)
91
+ missing = tuple(missing_list)
92
+ inventory_note = inventory.note
93
+ pool_size = len(pool)
94
+
95
+ findings: list[StackHealthFinding] = []
96
+
97
+ for name in recommended:
98
+ if name in missing:
99
+ findings.append(
100
+ StackHealthFinding(
101
+ level="error",
102
+ code="missing",
103
+ message=f"{name} is recommended but not found in local inventory.",
104
+ models=(name,),
105
+ )
106
+ )
107
+ elif name in resolved:
108
+ findings.append(
109
+ StackHealthFinding(
110
+ level="ok",
111
+ code="present",
112
+ message=f"{name} is installed.",
113
+ models=(name,),
114
+ )
115
+ )
116
+
117
+ if len(resolved) < 2:
118
+ findings.append(
119
+ StackHealthFinding(
120
+ level="error",
121
+ code="routing_spread",
122
+ message=(
123
+ f"Only {len(resolved)} model(s) available for routing "
124
+ f"({', '.join(resolved) or 'none'}). Need at least 2 for tier spread."
125
+ ),
126
+ models=resolved,
127
+ )
128
+ )
129
+ elif missing:
130
+ findings.append(
131
+ StackHealthFinding(
132
+ level="warn",
133
+ code="partial_stack",
134
+ message=(
135
+ f"Using {len(resolved)} installed model(s); "
136
+ f"{len(missing)} recommended tag(s) missing."
137
+ ),
138
+ models=resolved,
139
+ )
140
+ )
141
+ else:
142
+ findings.append(
143
+ StackHealthFinding(
144
+ level="ok",
145
+ code="stack_complete",
146
+ message=f"All {len(recommended)} recommended model(s) are installed.",
147
+ models=resolved,
148
+ )
149
+ )
150
+
151
+ audit = audit_model_folders()
152
+ duplicate_tags = audit.get("duplicate_tags") or []
153
+ if duplicate_tags:
154
+ dup_list = tuple(str(tag) for tag in duplicate_tags)
155
+ findings.append(
156
+ StackHealthFinding(
157
+ level="warn",
158
+ code="duplicate_tags",
159
+ message=(
160
+ f"Duplicate tags across Ollama folders: {', '.join(dup_list)}. "
161
+ "Keep one models directory or run audit cleanup."
162
+ ),
163
+ models=dup_list,
164
+ )
165
+ )
166
+
167
+ findings.extend(
168
+ _quant_mismatch_findings(
169
+ quant_mode=quant_mode,
170
+ model_names=tuple(name for name in resolved if name not in missing),
171
+ base_url=base_url,
172
+ )
173
+ )
174
+
175
+ ready = len(resolved) >= 2
176
+ return StackHealthReport(
177
+ ready=ready,
178
+ profile=resolved_profile,
179
+ vram_gb=vram_gb,
180
+ quant=quant_mode,
181
+ recommended=recommended,
182
+ resolved=resolved,
183
+ missing=missing,
184
+ pool_size=pool_size,
185
+ findings=tuple(findings),
186
+ inventory_note=inventory_note,
187
+ )
188
+
189
+
190
+ def format_stack_health(report: StackHealthReport) -> str:
191
+ lines: list[str] = []
192
+ vram_label = f"{report.vram_gb} GB" if report.vram_gb is not None else report.profile
193
+ lines.append(f"Stack health ({vram_label}, quant={report.quant})")
194
+ lines.append(f" Recommended: {', '.join(report.recommended) or '-'}")
195
+ lines.append(f" Resolved: {', '.join(report.resolved) or '-'}")
196
+ if report.missing:
197
+ lines.append(f" Missing: {', '.join(report.missing)}")
198
+ lines.append(f" Inventory: {report.pool_size} tag(s) seen (API + disk)")
199
+ if report.inventory_note:
200
+ lines.append(f" Note: {report.inventory_note}")
201
+ lines.append("")
202
+ for item in report.findings:
203
+ prefix = {"ok": "OK", "warn": "WARN", "error": "ERROR"}.get(item.level, item.level.upper())
204
+ lines.append(f" [{prefix}] {item.message}")
205
+ lines.append("")
206
+ if report.ready:
207
+ lines.append("Routing: ready (2+ models)")
208
+ else:
209
+ lines.append("Routing: not ready — install more models or adjust profile/VRAM.")
210
+ return "\n".join(lines)
211
+
212
+
213
+ def _vram_for_profile(profile: str) -> int | None:
214
+ from split_stack.model_registry import DEPLOYMENT_PROFILES
215
+
216
+ spec = DEPLOYMENT_PROFILES.get(profile)
217
+ if spec is None:
218
+ return None
219
+ return spec.assumed_vram_gb
220
+
221
+
222
+ def _fetch_ollama_tag_info(
223
+ *,
224
+ base_url: str = "http://127.0.0.1:11434",
225
+ ) -> dict[str, ModelTagInfo]:
226
+ try:
227
+ import requests
228
+ except ImportError:
229
+ return {}
230
+
231
+ url = f"{base_url.rstrip('/')}/api/tags"
232
+ try:
233
+ response = requests.get(url, timeout=5)
234
+ response.raise_for_status()
235
+ except Exception:
236
+ return {}
237
+
238
+ out: dict[str, ModelTagInfo] = {}
239
+ payload = response.json() or {}
240
+ for item in payload.get("models") or []:
241
+ name = (item.get("name") or "").strip()
242
+ if not name:
243
+ continue
244
+ details = item.get("details") or {}
245
+ quant = details.get("quantization_level")
246
+ out[name] = ModelTagInfo(
247
+ name=name,
248
+ size_bytes=int(item.get("size") or 0),
249
+ quantization_level=str(quant) if quant else None,
250
+ )
251
+ return out
252
+
253
+
254
+ def _is_gemma_tag(name: str) -> bool:
255
+ family = name.split(":")[0].lower()
256
+ return family.startswith("gemma")
257
+
258
+
259
+ def _tag_suggests_qat(name: str) -> bool:
260
+ lowered = name.lower()
261
+ markers = ("qat", "ud-q4", "ud_q4", "unsloth", "gemma-4-qat")
262
+ return any(marker in lowered for marker in markers)
263
+
264
+
265
+ def _tag_suggests_bf16(name: str) -> bool:
266
+ lowered = name.lower()
267
+ return "bf16" in lowered or "-it-bf16" in lowered
268
+
269
+
270
+ def _quant_mismatch_findings(
271
+ *,
272
+ quant_mode: str,
273
+ model_names: tuple[str, ...],
274
+ base_url: str,
275
+ ) -> list[StackHealthFinding]:
276
+ if quant_mode == "default":
277
+ return []
278
+
279
+ tag_info = _fetch_ollama_tag_info(base_url=base_url)
280
+ if not tag_info:
281
+ return [
282
+ StackHealthFinding(
283
+ level="warn",
284
+ code="quant_check_skipped",
285
+ message=(
286
+ "Quant check skipped — Ollama /api/tags unreachable or "
287
+ "install split-stack[ollama] for requests."
288
+ ),
289
+ )
290
+ ]
291
+
292
+ findings: list[StackHealthFinding] = []
293
+ gemma_tags = [name for name in model_names if _is_gemma_tag(name)]
294
+ if not gemma_tags:
295
+ return findings
296
+
297
+ for name in gemma_tags:
298
+ info = tag_info.get(name)
299
+ if info is None:
300
+ continue
301
+
302
+ expected_gb = adjust_vram_for_quant(name, base_vram_gb=999, quant_mode=quant_mode)
303
+ size_gb = info.size_bytes / (1024**3) if info.size_bytes else 0.0
304
+ quant_label = info.quantization_level or "unknown"
305
+
306
+ if quant_mode == "bf16":
307
+ if not _tag_suggests_bf16(name) and quant_label not in {"F16", "BF16", "FP16"}:
308
+ findings.append(
309
+ StackHealthFinding(
310
+ level="warn",
311
+ code="quant_mismatch",
312
+ message=(
313
+ f"{name}: quant=bf16 but installed as {quant_label} "
314
+ f"({size_gb:.1f} GB on disk). VRAM sizing may be wrong."
315
+ ),
316
+ models=(name,),
317
+ )
318
+ )
319
+ continue
320
+
321
+ # qat / qat_mobile — expect smaller runtime than library Q4_K_M pulls
322
+ if _tag_suggests_qat(name):
323
+ findings.append(
324
+ StackHealthFinding(
325
+ level="ok",
326
+ code="quant_ok",
327
+ message=f"{name}: tag looks QAT-aligned ({quant_label}, {size_gb:.1f} GB).",
328
+ models=(name,),
329
+ )
330
+ )
331
+ continue
332
+
333
+ oversized = expected_gb is not None and size_gb > expected_gb * 1.35
334
+ library_ptq = quant_label in {"Q4_K_M", "Q4_0", "Q5_K_M", "Q5_0"}
335
+ if oversized or library_ptq:
336
+ expected_text = f"~{expected_gb} GB runtime" if expected_gb else "smaller QAT runtime"
337
+ findings.append(
338
+ StackHealthFinding(
339
+ level="warn",
340
+ code="quant_mismatch",
341
+ message=(
342
+ f"{name}: quant={quant_mode} expects {expected_text} but installed "
343
+ f"{quant_label} at {size_gb:.1f} GB — likely library PTQ, not QAT."
344
+ ),
345
+ models=(name,),
346
+ )
347
+ )
348
+
349
+ if any(item.code == "quant_mismatch" for item in findings):
350
+ hint = pull_guidance_lines(quant_mode)
351
+ if hint:
352
+ findings.append(
353
+ StackHealthFinding(
354
+ level="warn",
355
+ code="quant_hint",
356
+ message=hint[0],
357
+ )
358
+ )
359
+
360
+ return findings
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: split-stack
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: Python routing library for local LLM agent loops: score prompts, map tiers to model names, embed in your runner.
5
5
  Author: Eddie Baumel
6
6
  License: MIT
@@ -24,6 +24,7 @@ src/split_stack/requirements.py
24
24
  src/split_stack/routing.py
25
25
  src/split_stack/session.py
26
26
  src/split_stack/setup_wizard.py
27
+ src/split_stack/stack_health.py
27
28
  src/split_stack/startup_tips.py
28
29
  src/split_stack/tiering.py
29
30
  src/split_stack/validation.py
@@ -53,5 +54,6 @@ tests/test_routing.py
53
54
  tests/test_session.py
54
55
  tests/test_setup.py
55
56
  tests/test_setup_cli.py
57
+ tests/test_stack_health.py
56
58
  tests/test_startup_tips.py
57
59
  tests/test_tiering.py
@@ -1,7 +1,7 @@
1
1
  from split_stack.discovery import discover_models_from_disk, list_model_inventory, manifest_search_paths
2
2
  from split_stack.hints import canonical_hint_id, list_hints, normalize_step_kind
3
3
  from split_stack.models import StepKind
4
- from split_stack.poc_models import models_for_preset, resolve_installed_stack
4
+ from split_stack.poc_models import models_for_preset, recommended_stack_for_vram, resolve_installed_stack, stack_payload
5
5
 
6
6
 
7
7
  def test_hint_catalog_has_five_entries():
@@ -29,6 +29,19 @@ def test_resolve_installed_stack_falls_back():
29
29
  assert warning is not None
30
30
 
31
31
 
32
+ def test_recommended_stack_for_vram_16gb_qat_adds_gemma26():
33
+ stack = recommended_stack_for_vram(16, quant="qat")
34
+ assert stack.profile == "workstation_16gb"
35
+ assert "gemma4:26b-a4b" in stack.models
36
+
37
+
38
+ def test_stack_payload_vram_quant_keys():
39
+ payload = stack_payload(vram_gb=16, quant="qat")
40
+ assert payload["vram_gb"] == 16
41
+ assert payload["quant"] == "qat"
42
+ assert "gemma4:26b-a4b" in payload["models"]
43
+
44
+
32
45
  def test_discover_models_from_disk_finds_user_layout():
33
46
  roots = manifest_search_paths()
34
47
  disk = discover_models_from_disk()
@@ -28,3 +28,26 @@ def test_stack_setup_json(mock_run_setup, capsys):
28
28
  assert payload["ready"] is True
29
29
  assert payload["profile"] == "workstation_12gb"
30
30
  assert "qwen3:14b" in payload["pulled"]
31
+
32
+
33
+ def test_stack_doctor_check_stack_json(capsys):
34
+ from split_stack.discovery import ModelInventory
35
+
36
+ inventory = ModelInventory(
37
+ api_models=("gemma4:e4b", "qwen3:8b", "qwen3:14b", "deepseek-r1:8b", "gemma4:26b-a4b"),
38
+ disk_models=(),
39
+ manifest_roots=(),
40
+ suggested_stack=("gemma4:e4b", "qwen3:8b", "qwen3:14b"),
41
+ note=None,
42
+ )
43
+ with patch("split_stack.stack_health.list_model_inventory", return_value=inventory):
44
+ with patch(
45
+ "split_stack.stack_health.audit_model_folders",
46
+ return_value={"duplicate_tags": []},
47
+ ):
48
+ exit_code = main(["doctor", "--check-stack", "--vram-gb", "16", "--quant", "qat", "--json"])
49
+ payload = json.loads(capsys.readouterr().out.strip())
50
+ assert exit_code == 0
51
+ assert payload["ready"] is True
52
+ assert payload["profile"] == "workstation_16gb"
53
+ assert payload["quant"] == "qat"
@@ -0,0 +1,112 @@
1
+ from unittest.mock import patch
2
+
3
+ from split_stack.discovery import ModelInventory
4
+ from split_stack.stack_health import ModelTagInfo, check_stack_health, format_stack_health
5
+
6
+
7
+ def test_check_stack_health_all_present():
8
+ inventory = ModelInventory(
9
+ api_models=("gemma4:e4b", "qwen3:8b", "qwen3:14b", "deepseek-r1:8b"),
10
+ disk_models=("gemma4:e4b", "qwen3:8b", "qwen3:14b", "deepseek-r1:8b"),
11
+ manifest_roots=("/models",),
12
+ suggested_stack=("gemma4:e4b", "qwen3:8b", "qwen3:14b"),
13
+ note=None,
14
+ )
15
+ with patch("split_stack.stack_health.list_model_inventory", return_value=inventory):
16
+ with patch(
17
+ "split_stack.stack_health.audit_model_folders",
18
+ return_value={"duplicate_tags": []},
19
+ ):
20
+ report = check_stack_health(profile="workstation_12gb", quant="default")
21
+ assert report.ready is True
22
+ assert report.missing == ()
23
+ assert "gemma4:e4b" in report.resolved
24
+ codes = {item.code for item in report.findings}
25
+ assert "stack_complete" in codes
26
+ assert "routing_spread" not in codes
27
+
28
+
29
+ def test_check_stack_health_missing_and_routing_blocked():
30
+ inventory = ModelInventory(
31
+ api_models=("qwen3:8b",),
32
+ disk_models=("qwen3:8b",),
33
+ manifest_roots=("/models",),
34
+ suggested_stack=("qwen3:8b",),
35
+ note=None,
36
+ )
37
+ with patch("split_stack.stack_health.list_model_inventory", return_value=inventory):
38
+ with patch(
39
+ "split_stack.stack_health.audit_model_folders",
40
+ return_value={"duplicate_tags": []},
41
+ ):
42
+ report = check_stack_health(profile="workstation_12gb", quant="default")
43
+ assert report.ready is False
44
+ assert "gemma4:e4b" in report.missing
45
+ codes = {item.code for item in report.findings}
46
+ assert "missing" in codes
47
+ assert "routing_spread" in codes
48
+
49
+
50
+ def test_check_stack_health_duplicate_tags_warn():
51
+ inventory = ModelInventory(
52
+ api_models=("gemma4:e4b", "qwen3:8b", "qwen3:14b"),
53
+ disk_models=("gemma4:e4b", "qwen3:8b", "qwen3:14b"),
54
+ manifest_roots=("/a", "/b"),
55
+ suggested_stack=("gemma4:e4b", "qwen3:8b", "qwen3:14b"),
56
+ note=None,
57
+ )
58
+ with patch("split_stack.stack_health.list_model_inventory", return_value=inventory):
59
+ with patch(
60
+ "split_stack.stack_health.audit_model_folders",
61
+ return_value={"duplicate_tags": ["qwen3:8b"]},
62
+ ):
63
+ report = check_stack_health(vram_gb=16, quant="qat")
64
+ assert any(item.code == "duplicate_tags" for item in report.findings)
65
+ assert report.profile == "workstation_16gb"
66
+
67
+
68
+ def test_format_stack_health_includes_routing_line():
69
+ inventory = ModelInventory(
70
+ api_models=("qwen3:8b",),
71
+ disk_models=(),
72
+ manifest_roots=(),
73
+ suggested_stack=("qwen3:8b",),
74
+ note="Ollama API unreachable.",
75
+ )
76
+ with patch("split_stack.stack_health.list_model_inventory", return_value=inventory):
77
+ with patch(
78
+ "split_stack.stack_health.audit_model_folders",
79
+ return_value={"duplicate_tags": []},
80
+ ):
81
+ report = check_stack_health(profile="workstation_12gb")
82
+ text = format_stack_health(report)
83
+ assert "Stack health" in text
84
+ assert "Routing: not ready" in text
85
+ assert "Ollama API unreachable" in text
86
+
87
+
88
+ def test_quant_mismatch_warns_library_gemma_with_qat_mode():
89
+ inventory = ModelInventory(
90
+ api_models=("gemma4:e4b", "qwen3:8b", "qwen3:14b", "deepseek-r1:8b"),
91
+ disk_models=(),
92
+ manifest_roots=(),
93
+ suggested_stack=(),
94
+ note=None,
95
+ )
96
+ tag_info = {
97
+ "gemma4:e4b": ModelTagInfo(
98
+ name="gemma4:e4b",
99
+ size_bytes=9_608_350_718,
100
+ quantization_level="Q4_K_M",
101
+ ),
102
+ }
103
+ with patch("split_stack.stack_health.list_model_inventory", return_value=inventory):
104
+ with patch(
105
+ "split_stack.stack_health.audit_model_folders",
106
+ return_value={"duplicate_tags": []},
107
+ ):
108
+ with patch("split_stack.stack_health._fetch_ollama_tag_info", return_value=tag_info):
109
+ report = check_stack_health(profile="workstation_12gb", quant="qat")
110
+ codes = {item.code for item in report.findings}
111
+ assert "quant_mismatch" in codes
112
+ assert any("gemma4:e4b" in item.message for item in report.findings)
@@ -1,131 +0,0 @@
1
- """Default model stacks for POC demos and compare benchmarks."""
2
-
3
- from __future__ import annotations
4
-
5
- from dataclasses import dataclass
6
-
7
- from split_stack.community_picks import focus_stack, vram_tier_for_profile
8
- from split_stack.discovery import list_model_inventory
9
- from split_stack.presets import recommended_models
10
-
11
- DEFAULT_POC_STACK = ["gemma4:e4b", "qwen3:8b", "qwen3:14b"]
12
- QWEN_ONLY_STACK = ["qwen3:4b", "qwen3:8b", "qwen3:14b"]
13
-
14
-
15
- @dataclass(frozen=True)
16
- class StackPreset:
17
- id: str
18
- label: str
19
- models: tuple[str, ...]
20
- description: str
21
-
22
-
23
- STACK_PRESETS: tuple[StackPreset, ...] = (
24
- StackPreset(
25
- id="mixed_12gb",
26
- label="Mixed 12 GB (Gemma + Qwen)",
27
- models=tuple(DEFAULT_POC_STACK),
28
- description="Gemma lookup, Qwen 8B medium, Qwen 14B complex",
29
- ),
30
- StackPreset(
31
- id="qwen_only",
32
- label="Qwen only (4B / 8B / 14B)",
33
- models=tuple(QWEN_ONLY_STACK),
34
- description="Single-family ladder",
35
- ),
36
- StackPreset(
37
- id="recommended_12gb",
38
- label="Full 12 GB specialist",
39
- models=tuple(recommended_models("workstation_12gb")),
40
- description="Gemma + Qwen + DeepSeek R1 for reasoning",
41
- ),
42
- StackPreset(
43
- id="community_agentic",
44
- label="Reddit agentic (M tier)",
45
- models=tuple(),
46
- description="r/LocalLLaMA Apr 2026 — Gemma lookup + Qwen spine for agent loops",
47
- ),
48
- StackPreset(
49
- id="from_inventory",
50
- label="From your Ollama (auto ladder)",
51
- models=tuple(),
52
- description="Picks small/mid/large tags from API + disk manifests",
53
- ),
54
- )
55
-
56
-
57
- def list_stack_presets() -> tuple[StackPreset, ...]:
58
- return STACK_PRESETS
59
-
60
-
61
- def models_for_preset(
62
- preset_id: str,
63
- *,
64
- base_url: str = "http://127.0.0.1:11434",
65
- profile: str = "workstation_12gb",
66
- ) -> list[str]:
67
- if preset_id == "from_inventory":
68
- inventory = list_model_inventory(base_url=base_url)
69
- if inventory.suggested_stack:
70
- return list(inventory.suggested_stack)
71
- return list(DEFAULT_POC_STACK)
72
- if preset_id == "community_agentic":
73
- tier = vram_tier_for_profile(profile)
74
- stack = focus_stack("agentic", vram_tier=tier)
75
- if stack and stack.models:
76
- return list(stack.models)
77
- return list(DEFAULT_POC_STACK)
78
- for item in STACK_PRESETS:
79
- if item.id == preset_id:
80
- return list(item.models)
81
- valid = ", ".join(item.id for item in STACK_PRESETS)
82
- raise ValueError(f"Unknown stack preset '{preset_id}'. Valid: {valid}")
83
-
84
-
85
- def available_model_pool(
86
- *,
87
- base_url: str = "http://127.0.0.1:11434",
88
- source: str = "both",
89
- ) -> tuple[list[str], str | None]:
90
- """Return model names from Ollama API, disk manifests, or both."""
91
- inventory = list_model_inventory(base_url=base_url)
92
- if source == "api":
93
- pool = list(inventory.api_models)
94
- elif source == "disk":
95
- pool = list(inventory.disk_models)
96
- else:
97
- pool = sorted(set(inventory.api_models) | set(inventory.disk_models))
98
- return pool, inventory.note
99
-
100
-
101
- def resolve_installed_stack(
102
- installed: list[str],
103
- *,
104
- preset_id: str = "mixed_12gb",
105
- base_url: str = "http://127.0.0.1:11434",
106
- ) -> tuple[list[str], str | None]:
107
- """Pick preset models that exist in the installed pool; warn when falling back."""
108
- desired = models_for_preset(preset_id, base_url=base_url)
109
- installed_set = set(installed)
110
- matched = [name for name in desired if name in installed_set]
111
- if len(matched) >= 2:
112
- return matched, None
113
-
114
- if installed:
115
- from split_stack.model_registry import load_registry, model_weight
116
-
117
- registry = load_registry()
118
- ranked = sorted(installed, key=lambda name: model_weight(name, registry))
119
- if len(ranked) >= 2:
120
- warning = (
121
- f"Preset '{preset_id}' not fully available ({', '.join(desired)}). "
122
- f"Using: {', '.join(ranked)}"
123
- )
124
- return ranked, warning
125
- warning = (
126
- f"Preset '{preset_id}' not fully available. "
127
- f"Using only {ranked[0]} — need 2+ models for compare spread."
128
- )
129
- return ranked, warning
130
-
131
- return desired, f"Using preset list (not verified): {', '.join(desired)}"
File without changes
File without changes
File without changes