split-stack 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,259 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+
7
+ from split_stack.discovery import discover_models
8
+ from split_stack.model_registry import normalize_deployment_profile
9
+ from split_stack.presets import RECOMMENDED_STACKS, assign_recommended_tiers, list_recommended_stacks, recommended_models
10
+ from split_stack.tiering import describe_tiers
11
+
12
+
13
+ @dataclass(frozen=True)
14
+ class SetupPlan:
15
+ profile: str
16
+ description: str
17
+ models: tuple[str, ...]
18
+ installed: tuple[str, ...]
19
+ missing: tuple[str, ...]
20
+ config_path: Path
21
+
22
+
23
+ @dataclass(frozen=True)
24
+ class SetupResult:
25
+ profile: str
26
+ config_path: Path
27
+ pulled: tuple[str, ...]
28
+ skipped: tuple[str, ...]
29
+ already_present: tuple[str, ...]
30
+ tiers: dict[str, str | None]
31
+ cancelled: bool = False
32
+ dry_run: bool = False
33
+ error: str | None = None
34
+
35
+ @property
36
+ def ready(self) -> bool:
37
+ return self.error is None and not self.cancelled
38
+
39
+
40
+ def _repo_root() -> Path:
41
+ return Path(__file__).resolve().parents[2]
42
+
43
+
44
+ def default_config_path() -> Path:
45
+ return Path.cwd() / "split-stack.models.json"
46
+
47
+
48
+ def model_is_installed(tag: str, installed: list[str]) -> bool:
49
+ wanted = tag.lower()
50
+ for name in installed:
51
+ lowered = name.lower()
52
+ if lowered == wanted or lowered.startswith(f"{wanted}:"):
53
+ return True
54
+ if wanted in lowered and ":" in wanted:
55
+ return True
56
+ return False
57
+
58
+
59
+ def plan_setup(
60
+ profile: str,
61
+ *,
62
+ base_url: str = "http://127.0.0.1:11434",
63
+ config_path: Path | None = None,
64
+ ) -> SetupPlan:
65
+ profile_name = normalize_deployment_profile(profile)
66
+ stack = RECOMMENDED_STACKS[profile_name]
67
+ installed = discover_models(base_url=base_url)
68
+ missing = [tag for tag in stack.models if not model_is_installed(tag, installed)]
69
+ present = [tag for tag in stack.models if model_is_installed(tag, installed)]
70
+ return SetupPlan(
71
+ profile=profile_name,
72
+ description=stack.description,
73
+ models=stack.models,
74
+ installed=tuple(present),
75
+ missing=tuple(missing),
76
+ config_path=config_path or default_config_path(),
77
+ )
78
+
79
+
80
+ def pull_model(
81
+ tag: str,
82
+ *,
83
+ base_url: str = "http://127.0.0.1:11434",
84
+ timeout_seconds: int = 3600,
85
+ ) -> None:
86
+ try:
87
+ import requests
88
+ except ImportError as exc:
89
+ raise RuntimeError(
90
+ "pull_model requires optional dependency: pip install split-stack[ollama]"
91
+ ) from exc
92
+
93
+ response = requests.post(
94
+ f"{base_url.rstrip('/')}/api/pull",
95
+ json={"name": tag, "stream": False},
96
+ timeout=timeout_seconds,
97
+ )
98
+ response.raise_for_status()
99
+
100
+
101
+ def write_setup_config(profile: str, config_path: Path) -> None:
102
+ profile_name = normalize_deployment_profile(profile)
103
+ template_path = _repo_root() / "config" / "models.example.json"
104
+ if template_path.is_file():
105
+ payload = json.loads(template_path.read_text(encoding="utf-8"))
106
+ else:
107
+ payload = {"models": []}
108
+ payload["deployment_profile"] = profile_name
109
+ payload["comment"] = f"Generated by stack setup ({profile_name}). Edit models[] if your tags differ."
110
+ config_path.parent.mkdir(parents=True, exist_ok=True)
111
+ config_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
112
+
113
+
114
+ def prompt_profile_choice() -> str:
115
+ stacks = list_recommended_stacks()
116
+ print("Choose your GPU preset:")
117
+ for index, stack in enumerate(stacks, start=1):
118
+ print(f" {index}. {stack.profile} — {stack.description}")
119
+ print("")
120
+ while True:
121
+ raw = input(f"Preset [1-{len(stacks)}] (default 2 = workstation_12gb): ").strip()
122
+ if not raw:
123
+ return "workstation_12gb"
124
+ if raw.isdigit():
125
+ choice = int(raw)
126
+ if 1 <= choice <= len(stacks):
127
+ return stacks[choice - 1].profile
128
+ lowered = raw.lower()
129
+ try:
130
+ return normalize_deployment_profile(lowered)
131
+ except ValueError:
132
+ print("Enter a preset number or name like workstation_12gb / 12gb.")
133
+
134
+
135
+ def prompt_download_consent(*, missing: tuple[str, ...]) -> bool:
136
+ if not missing:
137
+ return True
138
+ print("")
139
+ print(f"Ollama will download {len(missing)} model(s) (large files, one-time):")
140
+ for tag in missing:
141
+ print(f" - {tag}")
142
+ print("")
143
+ answer = input("Proceed with download? [y/N]: ").strip().lower()
144
+ return answer in {"y", "yes"}
145
+
146
+
147
+ def run_setup(
148
+ profile: str | None = None,
149
+ *,
150
+ base_url: str = "http://127.0.0.1:11434",
151
+ config_path: Path | None = None,
152
+ assume_yes: bool = False,
153
+ dry_run: bool = False,
154
+ interactive: bool = True,
155
+ ) -> SetupResult:
156
+ chosen = profile
157
+ if not chosen and interactive:
158
+ chosen = prompt_profile_choice()
159
+ if not chosen:
160
+ chosen = "workstation_12gb"
161
+
162
+ path = config_path or default_config_path()
163
+ try:
164
+ plan = plan_setup(chosen, base_url=base_url, config_path=path)
165
+ except Exception as exc:
166
+ return SetupResult(
167
+ profile=chosen,
168
+ config_path=path,
169
+ pulled=(),
170
+ skipped=(),
171
+ already_present=(),
172
+ tiers={},
173
+ error=str(exc),
174
+ )
175
+
176
+ if dry_run:
177
+ tiers = describe_tiers(assign_recommended_tiers(plan.profile))
178
+ return SetupResult(
179
+ profile=plan.profile,
180
+ config_path=plan.config_path,
181
+ pulled=(),
182
+ skipped=plan.missing,
183
+ already_present=plan.installed,
184
+ tiers=tiers,
185
+ dry_run=True,
186
+ )
187
+
188
+ if plan.missing and not assume_yes and interactive:
189
+ if not prompt_download_consent(missing=plan.missing):
190
+ return SetupResult(
191
+ profile=plan.profile,
192
+ config_path=plan.config_path,
193
+ pulled=(),
194
+ skipped=plan.missing,
195
+ already_present=plan.installed,
196
+ tiers={},
197
+ cancelled=True,
198
+ )
199
+
200
+ pulled: list[str] = []
201
+ errors: list[str] = []
202
+ for tag in plan.missing:
203
+ try:
204
+ print(f"Pulling {tag}...", flush=True)
205
+ pull_model(tag, base_url=base_url)
206
+ pulled.append(tag)
207
+ except Exception as exc:
208
+ errors.append(f"{tag}: {exc}")
209
+
210
+ try:
211
+ write_setup_config(plan.profile, plan.config_path)
212
+ tiers = describe_tiers(assign_recommended_tiers(plan.profile))
213
+ except Exception as exc:
214
+ errors.append(str(exc))
215
+ tiers = {}
216
+
217
+ if errors:
218
+ return SetupResult(
219
+ profile=plan.profile,
220
+ config_path=plan.config_path,
221
+ pulled=tuple(pulled),
222
+ skipped=tuple([tag for tag in plan.missing if tag not in pulled]),
223
+ already_present=plan.installed,
224
+ tiers=tiers,
225
+ error="; ".join(errors),
226
+ )
227
+
228
+ return SetupResult(
229
+ profile=plan.profile,
230
+ config_path=plan.config_path,
231
+ pulled=tuple(pulled),
232
+ skipped=(),
233
+ already_present=plan.installed,
234
+ tiers=tiers,
235
+ )
236
+
237
+
238
+ def format_setup_summary(result: SetupResult) -> str:
239
+ lines = [
240
+ f"profile={result.profile}",
241
+ f"config={result.config_path}",
242
+ ]
243
+ if result.already_present:
244
+ lines.append(f"already_present={','.join(result.already_present)}")
245
+ if result.pulled:
246
+ lines.append(f"pulled={','.join(result.pulled)}")
247
+ if result.skipped:
248
+ lines.append(f"not_installed={','.join(result.skipped)}")
249
+ if result.tiers:
250
+ lines.append("tiers:")
251
+ for key, value in result.tiers.items():
252
+ lines.append(f" {key}: {value or '-'}")
253
+ if result.cancelled:
254
+ lines.append("status=cancelled (no downloads)")
255
+ if result.dry_run:
256
+ lines.append("status=dry-run")
257
+ if result.error:
258
+ lines.append(f"error={result.error}")
259
+ return "\n".join(lines)
@@ -0,0 +1,169 @@
1
+ """Import-time and CLI tips: scan local models and surface community recommendations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import os
7
+ import sys
8
+
9
+ logger = logging.getLogger("split_stack")
10
+
11
+ _emitted = False
12
+
13
+
14
+ def _import_tips_mode() -> str:
15
+ return os.environ.get("SPLIT_STACK_IMPORT_TIPS", "off").strip().lower()
16
+
17
+
18
+ def _is_disabled(mode: str) -> bool:
19
+ return mode in {"0", "false", "no", "off", "quiet"}
20
+
21
+
22
+ def _should_echo_stderr(mode: str) -> bool:
23
+ if mode in {"1", "true", "yes", "stderr", "on"}:
24
+ return True
25
+ if mode != "auto":
26
+ return False
27
+ if not sys.stderr.isatty():
28
+ return False
29
+ if logger.handlers:
30
+ return False
31
+ root = logging.getLogger()
32
+ return not root.handlers
33
+
34
+
35
+ def _default_report_profile() -> str:
36
+ from split_stack.session import default_profile_from_env
37
+
38
+ return default_profile_from_env()
39
+
40
+
41
+ def model_recommendation_report(
42
+ *,
43
+ profile: str | None = None,
44
+ include_api: bool = False,
45
+ base_url: str = "http://127.0.0.1:11434",
46
+ ) -> list[str]:
47
+ """Build human-readable lines about installed vs recommended models."""
48
+ from split_stack.community_picks import (
49
+ focus_stack,
50
+ recommended_models_for_tier,
51
+ vram_tier_for_profile,
52
+ )
53
+ from split_stack.discovery import audit_model_folders, discover_models_from_disk, list_model_inventory
54
+ from split_stack.model_registry import normalize_deployment_profile
55
+
56
+ profile_name = normalize_deployment_profile(profile or _default_report_profile())
57
+ vram_tier = vram_tier_for_profile(profile)
58
+
59
+ if include_api:
60
+ inventory = list_model_inventory(base_url=base_url)
61
+ installed = sorted(set(inventory.api_models) | set(inventory.disk_models))
62
+ primary = inventory.manifest_roots[0] if inventory.manifest_roots else None
63
+ else:
64
+ installed = discover_models_from_disk()
65
+ audit = audit_model_folders()
66
+ primary = audit.get("primary_root")
67
+
68
+ lines: list[str] = []
69
+ if not installed:
70
+ lines.append("split-stack: no local Ollama models found on disk.")
71
+ lines.append(
72
+ " Starter agent stack: ollama pull gemma4:e4b && "
73
+ "ollama pull qwen3:8b && ollama pull qwen3:14b"
74
+ )
75
+ lines.append(" Then: stack models --include-disk")
76
+ return lines
77
+
78
+ header = f"split-stack: {len(installed)} local model(s)"
79
+ if primary:
80
+ header += f" under {primary}"
81
+ lines.append(f"{header} (profile {profile_name}, tier {vram_tier}).")
82
+
83
+ stack = focus_stack("agentic", vram_tier=vram_tier)
84
+ if stack and stack.models:
85
+ have = [name for name in stack.models if name in installed]
86
+ missing_stack = [name for name in stack.models if name not in installed]
87
+ lines.append(f" Reddit agent stack: {', '.join(stack.models)}")
88
+ if have:
89
+ lines.append(f" Installed from stack: {', '.join(have)}")
90
+ if missing_stack:
91
+ lines.append(f" Pull for routing spread: {', '.join(missing_stack)}")
92
+
93
+ installed_lower = {name.lower() for name in installed}
94
+ recommended = recommended_models_for_tier(vram_tier=vram_tier)
95
+ missing_picks: list[str] = []
96
+ for model_name in recommended:
97
+ lowered = model_name.lower()
98
+ if lowered in installed_lower:
99
+ continue
100
+ if any(lowered in name or name.startswith(lowered) for name in installed_lower):
101
+ continue
102
+ missing_picks.append(model_name)
103
+
104
+ if missing_picks:
105
+ preview = ", ".join(missing_picks[:6])
106
+ if len(missing_picks) > 6:
107
+ preview += ", ..."
108
+ lines.append(f" Community picks to explore: {preview}")
109
+
110
+ extras = sorted(
111
+ name
112
+ for name in installed
113
+ if stack and name not in stack.models and name not in recommended
114
+ )
115
+ if extras:
116
+ preview = ", ".join(extras[:6])
117
+ if len(extras) > 6:
118
+ preview += ", ..."
119
+ lines.append(f" Also on disk (not in default stack): {preview}")
120
+
121
+ audit = audit_model_folders()
122
+ duplicate_tags = audit.get("duplicate_tags") or []
123
+ if duplicate_tags:
124
+ lines.append(
125
+ " Duplicate tags across folders: "
126
+ f"{', '.join(duplicate_tags)} — keep one Ollama models directory."
127
+ )
128
+
129
+ lines.append(" Commands: stack models --include-disk | stack tips | stack stacks")
130
+ return lines
131
+
132
+
133
+ def emit_import_tips(
134
+ *,
135
+ profile: str | None = None,
136
+ include_api: bool = False,
137
+ base_url: str = "http://127.0.0.1:11434",
138
+ ) -> None:
139
+ """Log model recommendations once per process (controlled by SPLIT_STACK_IMPORT_TIPS)."""
140
+ global _emitted
141
+ if _emitted:
142
+ return
143
+
144
+ mode = _import_tips_mode()
145
+ if _is_disabled(mode):
146
+ return
147
+
148
+ _emitted = True
149
+ try:
150
+ lines = model_recommendation_report(
151
+ profile=profile or _default_report_profile(),
152
+ include_api=include_api,
153
+ base_url=base_url,
154
+ )
155
+ except Exception as exc:
156
+ logger.debug("split-stack import tips skipped: %s", exc)
157
+ return
158
+
159
+ for line in lines:
160
+ logger.info(line)
161
+
162
+ if _should_echo_stderr(mode):
163
+ print("\n".join(lines), file=sys.stderr)
164
+
165
+
166
+ def reset_import_tips_for_tests() -> None:
167
+ """Allow tests to re-run emit_import_tips."""
168
+ global _emitted
169
+ _emitted = False
split_stack/tiering.py ADDED
@@ -0,0 +1,66 @@
1
+ from __future__ import annotations
2
+
3
+ from split_stack.model_registry import ModelRegistry, load_registry, model_weight
4
+ from split_stack.models import TierMap
5
+
6
+
7
+ def _is_code_specialist(name: str) -> bool:
8
+ lowered = name.lower()
9
+ if "reasoning" in lowered:
10
+ return False
11
+ tokens = ("codellama", "deepseek-coder", "starcoder", "codegemma", "coder")
12
+ return any(token in lowered for token in tokens)
13
+
14
+
15
+ def _is_reasoning_specialist(name: str) -> bool:
16
+ lowered = name.lower()
17
+ return "deepseek-r1" in lowered or "reasoning" in lowered or ":r1" in lowered
18
+
19
+
20
+ def assign_tiers(model_names: list[str], registry: ModelRegistry | None = None) -> TierMap:
21
+ if not model_names:
22
+ raise ValueError("model_names must contain at least one model")
23
+
24
+ reg = registry or load_registry()
25
+ code_models = [name for name in model_names if _is_code_specialist(name)]
26
+ general_models = [name for name in model_names if name not in code_models]
27
+ if not general_models:
28
+ general_models = list(model_names)
29
+ code_models = []
30
+
31
+ reasoning_models = [name for name in general_models if _is_reasoning_specialist(name)]
32
+ core_models = [name for name in general_models if name not in reasoning_models]
33
+ if not core_models:
34
+ core_models = list(general_models)
35
+
36
+ ranked = sorted(core_models, key=lambda name: model_weight(name, reg))
37
+ simple = ranked[0]
38
+ complex_model = ranked[-1]
39
+ medium = ranked[1] if len(ranked) > 1 else ranked[0]
40
+
41
+ if reasoning_models:
42
+ reasoning = sorted(reasoning_models, key=lambda name: model_weight(name, reg))[-1]
43
+ else:
44
+ reasoning = complex_model
45
+
46
+ code = None
47
+ if code_models:
48
+ code = sorted(code_models, key=lambda name: model_weight(name, reg))[-1]
49
+
50
+ return TierMap(
51
+ simple=simple,
52
+ medium=medium,
53
+ complex=complex_model,
54
+ reasoning=reasoning,
55
+ code=code,
56
+ )
57
+
58
+
59
+ def describe_tiers(tiers: TierMap) -> dict[str, str | None]:
60
+ return {
61
+ "simple": tiers.simple,
62
+ "medium": tiers.medium,
63
+ "complex": tiers.complex,
64
+ "reasoning": tiers.reasoning,
65
+ "code": tiers.code,
66
+ }
@@ -0,0 +1,85 @@
1
+ """Session and tier-map validation — warnings, not hard failures."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from split_stack.model_registry import load_registry, model_weight
6
+ from split_stack.models import TierMap
7
+ from split_stack.tiering import describe_tiers
8
+
9
+
10
+ def _is_reasoning_specialist(name: str) -> bool:
11
+ lowered = name.lower()
12
+ return "deepseek-r1" in lowered or "reasoning" in lowered or ":r1" in lowered
13
+
14
+
15
+ def _is_code_specialist(name: str) -> bool:
16
+ lowered = name.lower()
17
+ if "reasoning" in lowered:
18
+ return False
19
+ tokens = ("codellama", "deepseek-coder", "starcoder", "codegemma", "coder")
20
+ return any(token in lowered for token in tokens)
21
+
22
+
23
+ def validate_tier_map(
24
+ tiers: TierMap,
25
+ models: list[str] | tuple[str, ...],
26
+ *,
27
+ profile: str | None = None,
28
+ ) -> list[str]:
29
+ """Return human-readable warnings about a tier ladder."""
30
+ warnings: list[str] = []
31
+ model_list = list(models)
32
+ registry = load_registry(profile=profile)
33
+
34
+ if len(model_list) < 2:
35
+ warnings.append("Fewer than two models — routing cannot spread across tiers.")
36
+
37
+ slot_models = {
38
+ tiers.simple,
39
+ tiers.medium,
40
+ tiers.complex,
41
+ tiers.reasoning,
42
+ }
43
+ if tiers.code:
44
+ slot_models.add(tiers.code)
45
+ unique_slots = len(slot_models)
46
+ if unique_slots < min(3, len(model_list)):
47
+ warnings.append(
48
+ "Multiple tier slots map to the same model — spread is mostly cosmetic."
49
+ )
50
+
51
+ reasoning_specialists = [name for name in model_list if _is_reasoning_specialist(name)]
52
+ if not reasoning_specialists and tiers.reasoning == tiers.complex:
53
+ warnings.append(
54
+ "No reasoning specialist in models= — hint='reason' uses the complex model "
55
+ f"({tiers.complex}), not a dedicated reasoner."
56
+ )
57
+
58
+ code_specialists = [name for name in model_list if _is_code_specialist(name)]
59
+ if not code_specialists:
60
+ warnings.append(
61
+ "No code specialist in models= — hint='code' uses the complex tier "
62
+ f"({tiers.complex}) unless the prompt looks like code."
63
+ )
64
+
65
+ try:
66
+ simple_w = model_weight(tiers.simple, registry)
67
+ medium_w = model_weight(tiers.medium, registry)
68
+ if simple_w > medium_w:
69
+ warnings.append(
70
+ f"Simple slot ({tiers.simple}, weight {simple_w}) is heavier than "
71
+ f"medium ({tiers.medium}, weight {medium_w}) — check registry rows for "
72
+ "unknown tags (heuristic weight 1000)."
73
+ )
74
+ except Exception:
75
+ pass
76
+
77
+ described = describe_tiers(tiers)
78
+ if tiers.simple != model_list[0] and len(model_list) >= 2:
79
+ ranked = sorted(model_list, key=lambda name: model_weight(name, registry))
80
+ if tiers.simple != ranked[0]:
81
+ warnings.append(
82
+ f"Simple slot is {tiers.simple}; lightest installed tag is {ranked[0]}."
83
+ )
84
+
85
+ return warnings