split-stack 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,287 @@
1
+ from __future__ import annotations
2
+
3
+ import sys
4
+ from dataclasses import dataclass
5
+ from enum import Enum
6
+ from typing import Callable
7
+
8
+
9
+ class UsageProfile(str, Enum):
10
+ """Supported ways to use split-stack."""
11
+
12
+ CORE = "core"
13
+ OLLAMA_DISCOVERY = "ollama_discovery"
14
+ LOCAL_ASSISTANT = "local_assistant"
15
+ CLI_DOCTOR = "cli_doctor"
16
+
17
+
18
+ @dataclass(frozen=True)
19
+ class Prerequisite:
20
+ id: str
21
+ description: str
22
+ kind: str
23
+ required: bool
24
+ install_command: str | None = None
25
+ verify_hint: str | None = None
26
+ satisfied: bool | None = None
27
+
28
+
29
+ @dataclass(frozen=True)
30
+ class ProfileRequirements:
31
+ profile: UsageProfile
32
+ title: str
33
+ summary: str
34
+ prerequisites: tuple[Prerequisite, ...]
35
+
36
+ @property
37
+ def ready(self) -> bool:
38
+ return all(item.satisfied is not False for item in self.prerequisites if item.required)
39
+
40
+
41
+ def _python_ok() -> bool:
42
+ return sys.version_info >= (3, 10)
43
+
44
+
45
+ def _requests_ok() -> bool:
46
+ try:
47
+ import requests # noqa: F401
48
+ except ImportError:
49
+ return False
50
+ return True
51
+
52
+
53
+ def _ollama_ok(base_url: str = "http://127.0.0.1:11434") -> bool:
54
+ if not _requests_ok():
55
+ return False
56
+ try:
57
+ import requests
58
+
59
+ response = requests.get(f"{base_url.rstrip('/')}/api/tags", timeout=2)
60
+ response.raise_for_status()
61
+ payload = response.json() or {}
62
+ return bool(payload.get("models"))
63
+ except Exception:
64
+ return False
65
+
66
+
67
+ def _catalog() -> dict[UsageProfile, tuple[Prerequisite, ...]]:
68
+ return {
69
+ UsageProfile.CORE: (
70
+ Prerequisite(
71
+ id="python",
72
+ description="Python 3.10 or newer",
73
+ kind="runtime",
74
+ required=True,
75
+ install_command="https://www.python.org/downloads/",
76
+ verify_hint="python --version",
77
+ ),
78
+ Prerequisite(
79
+ id="split_stack",
80
+ description="split-stack package installed",
81
+ kind="package",
82
+ required=True,
83
+ install_command="python -m pip install split-stack",
84
+ verify_hint="python -c \"import split_stack\"",
85
+ ),
86
+ Prerequisite(
87
+ id="model_names",
88
+ description="A list of model names for assign_tiers() (from config or your provider)",
89
+ kind="input",
90
+ required=True,
91
+ verify_hint='assign_tiers(["qwen3:4b", "qwen3:8b"])',
92
+ ),
93
+ ),
94
+ UsageProfile.OLLAMA_DISCOVERY: (
95
+ Prerequisite(
96
+ id="python",
97
+ description="Python 3.10 or newer",
98
+ kind="runtime",
99
+ required=True,
100
+ install_command="https://www.python.org/downloads/",
101
+ verify_hint="python --version",
102
+ ),
103
+ Prerequisite(
104
+ id="split_stack",
105
+ description="split-stack package installed",
106
+ kind="package",
107
+ required=True,
108
+ install_command="python -m pip install split-stack",
109
+ verify_hint="python -c \"import split_stack\"",
110
+ ),
111
+ Prerequisite(
112
+ id="requests",
113
+ description="requests library (optional extra)",
114
+ kind="package",
115
+ required=True,
116
+ install_command="python -m pip install split-stack[ollama]",
117
+ verify_hint="python -c \"import requests\"",
118
+ ),
119
+ Prerequisite(
120
+ id="ollama",
121
+ description="Ollama installed and running on localhost:11434",
122
+ kind="service",
123
+ required=True,
124
+ install_command="https://ollama.com/download",
125
+ verify_hint="ollama list",
126
+ ),
127
+ Prerequisite(
128
+ id="ollama_models",
129
+ description="At least one model pulled in Ollama (e.g. ollama pull qwen3:8b)",
130
+ kind="input",
131
+ required=True,
132
+ verify_hint="ollama list",
133
+ ),
134
+ ),
135
+ UsageProfile.LOCAL_ASSISTANT: (
136
+ Prerequisite(
137
+ id="python",
138
+ description="Python 3.10 or newer",
139
+ kind="runtime",
140
+ required=True,
141
+ install_command="https://www.python.org/downloads/",
142
+ verify_hint="python --version",
143
+ ),
144
+ Prerequisite(
145
+ id="split_stack",
146
+ description="split-stack installed in editable mode from repo checkout",
147
+ kind="package",
148
+ required=True,
149
+ install_command="python -m pip install -e .",
150
+ verify_hint="python -c \"import split_stack\"",
151
+ ),
152
+ Prerequisite(
153
+ id="requests",
154
+ description="requests library for Ollama HTTP calls",
155
+ kind="package",
156
+ required=True,
157
+ install_command="python -m pip install split-stack[ollama]",
158
+ verify_hint="python -c \"import requests\"",
159
+ ),
160
+ Prerequisite(
161
+ id="ollama",
162
+ description="Ollama installed and running on localhost:11434",
163
+ kind="service",
164
+ required=True,
165
+ install_command="https://ollama.com/download",
166
+ verify_hint="ollama serve",
167
+ ),
168
+ Prerequisite(
169
+ id="ollama_models",
170
+ description="Two or more models recommended so tiers differ (small + large)",
171
+ kind="input",
172
+ required=True,
173
+ verify_hint="ollama pull qwen3:4b && ollama pull qwen3:14b",
174
+ ),
175
+ Prerequisite(
176
+ id="gpu_ram",
177
+ description="Enough VRAM/RAM to run your largest pulled model",
178
+ kind="hardware",
179
+ required=True,
180
+ verify_hint="Start with smaller models if inference is slow or fails",
181
+ ),
182
+ ),
183
+ UsageProfile.CLI_DOCTOR: (
184
+ Prerequisite(
185
+ id="python",
186
+ description="Python 3.10 or newer",
187
+ kind="runtime",
188
+ required=True,
189
+ install_command="https://www.python.org/downloads/",
190
+ verify_hint="python --version",
191
+ ),
192
+ Prerequisite(
193
+ id="split_stack",
194
+ description="split-stack package installed (includes stack CLI)",
195
+ kind="package",
196
+ required=True,
197
+ install_command="python -m pip install split-stack",
198
+ verify_hint="stack doctor",
199
+ ),
200
+ Prerequisite(
201
+ id="requests",
202
+ description="requests library for Ollama tier detection",
203
+ kind="package",
204
+ required=False,
205
+ install_command="python -m pip install split-stack[ollama]",
206
+ verify_hint="python -c \"import requests\"",
207
+ ),
208
+ Prerequisite(
209
+ id="ollama",
210
+ description="Ollama running locally (optional; doctor skips if unavailable)",
211
+ kind="service",
212
+ required=False,
213
+ install_command="https://ollama.com/download",
214
+ verify_hint="ollama list",
215
+ ),
216
+ ),
217
+ }
218
+
219
+
220
+ _PROFILE_META: dict[UsageProfile, tuple[str, str]] = {
221
+ UsageProfile.CORE: (
222
+ "Core library",
223
+ "Use score_prompt(), assign_tiers(), and route_prompt() with your own model list.",
224
+ ),
225
+ UsageProfile.OLLAMA_DISCOVERY: (
226
+ "Ollama discovery",
227
+ "Call discover_models() to read model tags from a local Ollama instance.",
228
+ ),
229
+ UsageProfile.LOCAL_ASSISTANT: (
230
+ "Local work assistant example",
231
+ "Run examples/local_work_assistant/app.py for auto-tiered local Q&A.",
232
+ ),
233
+ UsageProfile.CLI_DOCTOR: (
234
+ "CLI doctor",
235
+ "Run stack doctor for split-stack guidance and optional Ollama tier output.",
236
+ ),
237
+ }
238
+
239
+
240
+ def list_usage_profiles() -> list[UsageProfile]:
241
+ return list(UsageProfile)
242
+
243
+
244
+ _CHECKERS: dict[str, Callable[[], bool]] = {
245
+ "python": _python_ok,
246
+ "split_stack": lambda: True,
247
+ "requests": _requests_ok,
248
+ "ollama": _ollama_ok,
249
+ "ollama_models": _ollama_ok,
250
+ }
251
+
252
+
253
+ def usage_requirements(
254
+ profile: UsageProfile = UsageProfile.CORE,
255
+ *,
256
+ check: bool = False,
257
+ ) -> ProfileRequirements:
258
+ """Return prerequisites for a usage profile.
259
+
260
+ Set check=True to probe the local machine (Python version, requests, Ollama).
261
+ """
262
+ title, summary = _PROFILE_META[profile]
263
+ catalog = _catalog()[profile]
264
+ prerequisites: list[Prerequisite] = []
265
+
266
+ for item in catalog:
267
+ satisfied = None
268
+ if check and item.id in _CHECKERS:
269
+ satisfied = _CHECKERS[item.id]()
270
+ prerequisites.append(
271
+ Prerequisite(
272
+ id=item.id,
273
+ description=item.description,
274
+ kind=item.kind,
275
+ required=item.required,
276
+ install_command=item.install_command,
277
+ verify_hint=item.verify_hint,
278
+ satisfied=satisfied,
279
+ )
280
+ )
281
+
282
+ return ProfileRequirements(
283
+ profile=profile,
284
+ title=title,
285
+ summary=summary,
286
+ prerequisites=tuple(prerequisites),
287
+ )
split_stack/routing.py ADDED
@@ -0,0 +1,96 @@
1
+ from __future__ import annotations
2
+
3
+ from split_stack.complexity import (
4
+ looks_like_code,
5
+ resolve_tier,
6
+ score_prompt,
7
+ )
8
+ from split_stack.hints import normalize_step_kind, prefer_code_model
9
+ from split_stack.models import ComplexityTier, RouteDecision, StepKind, TierMap
10
+ from split_stack.tiering import describe_tiers
11
+
12
+
13
+ def route_prompt(
14
+ prompt: str,
15
+ tiers: TierMap,
16
+ *,
17
+ hint: str | StepKind | None = None,
18
+ ) -> tuple[ComplexityTier, str]:
19
+ """Return complexity tier and selected model for a prompt."""
20
+ return explain_route(prompt, tiers, hint=hint).as_tuple()
21
+
22
+
23
+ def explain_route(
24
+ prompt: str,
25
+ tiers: TierMap,
26
+ *,
27
+ hint: str | StepKind | None = None,
28
+ ) -> RouteDecision:
29
+ """Return tier, model, and a trace of why routing chose them."""
30
+ raw_hint = hint.value if isinstance(hint, StepKind) else hint
31
+ step_kind: StepKind | None = None
32
+ tier_source = "heuristic"
33
+ reasons: list[str] = []
34
+
35
+ if hint is not None:
36
+ step_kind = normalize_step_kind(hint)
37
+ tier = resolve_tier(prompt, hint=step_kind)
38
+ tier_source = "hint"
39
+ reasons.append(f"hint={step_kind.value} maps to tier {tier.value}")
40
+ else:
41
+ tier = score_prompt(prompt)
42
+ reasons.append(f"no hint — keyword/heuristic scoring → tier {tier.value}")
43
+ if len((prompt or "").split()) > 80:
44
+ reasons.append("prompt length > 80 tokens influenced complex tier")
45
+
46
+ use_code = _should_use_code_model(prompt, tier, hint, step_kind, tiers)
47
+ if use_code and tiers.code:
48
+ model = tiers.code
49
+ model_source = "code_slot"
50
+ if prefer_code_model(hint) or step_kind == StepKind.CODE:
51
+ reasons.append(f"code specialist {model} (hint={step_kind.value if step_kind else hint})")
52
+ else:
53
+ reasons.append(f"code specialist {model} (prompt looks like code)")
54
+ elif use_code and not tiers.code:
55
+ model = tiers.for_tier(tier)
56
+ model_source = "tier_slot"
57
+ reasons.append(
58
+ f"code-like prompt but no code slot — using {tier.value} model {model}"
59
+ )
60
+ else:
61
+ model = tiers.for_tier(tier)
62
+ model_source = "tier_slot"
63
+ slot_name = tier.value
64
+ if tier == ComplexityTier.REASONING and tiers.reasoning == tiers.complex:
65
+ reasons.append(
66
+ f"reasoning tier → {model} (no separate reasoning model; complex fallback)"
67
+ )
68
+ else:
69
+ reasons.append(f"{slot_name} slot → {model}")
70
+
71
+ return RouteDecision(
72
+ tier=tier,
73
+ model=model,
74
+ hint=raw_hint,
75
+ step_kind=step_kind.value if step_kind else None,
76
+ tier_source=tier_source,
77
+ model_source=model_source,
78
+ reasons=tuple(reasons),
79
+ tiers=describe_tiers(tiers),
80
+ )
81
+
82
+
83
+ def _should_use_code_model(
84
+ prompt: str,
85
+ tier: ComplexityTier,
86
+ hint: str | StepKind | None,
87
+ step_kind: StepKind | None,
88
+ tiers: TierMap,
89
+ ) -> bool:
90
+ if prefer_code_model(hint) or step_kind == StepKind.CODE:
91
+ return True
92
+ if tiers.code is None:
93
+ return False
94
+ if tier not in (ComplexityTier.COMPLEX, ComplexityTier.MEDIUM):
95
+ return False
96
+ return looks_like_code(prompt)
split_stack/session.py ADDED
@@ -0,0 +1,259 @@
1
+ """Minimal session: set VRAM once, route every call."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from dataclasses import dataclass
7
+
8
+ from split_stack.model_registry import normalize_deployment_profile
9
+ from split_stack.models import ComplexityTier, RouteDecision, TierMap
10
+ from split_stack.presets import recommended_models
11
+ from split_stack.routing import explain_route, route_prompt
12
+ from split_stack.tiering import assign_tiers, describe_tiers
13
+ from split_stack.validation import validate_tier_map
14
+
15
+
16
+ @dataclass(frozen=True)
17
+ class Session:
18
+ profile: str
19
+ vram_gb: int | None
20
+ quant: str
21
+ models: tuple[str, ...]
22
+ tiers: TierMap
23
+ warnings: tuple[str, ...] = ()
24
+ note: str | None = None
25
+
26
+
27
+ _session: Session | None = None
28
+
29
+
30
+ def profile_for_vram_gb(vram_gb: int) -> str:
31
+ """Map discrete GPU VRAM to a workstation deployment profile."""
32
+ if vram_gb <= 8:
33
+ return "workstation_8gb"
34
+ if vram_gb <= 12:
35
+ return "workstation_12gb"
36
+ if vram_gb <= 16:
37
+ return "workstation_16gb"
38
+ if vram_gb <= 24:
39
+ return "workstation_24gb"
40
+ if vram_gb <= 32:
41
+ return "workstation_32gb"
42
+ return "datacenter"
43
+
44
+
45
+ def _vram_from_env() -> int | None:
46
+ raw = os.environ.get("SPLIT_STACK_VRAM_GB", "").strip()
47
+ if not raw:
48
+ return None
49
+ try:
50
+ value = int(raw)
51
+ except ValueError:
52
+ return None
53
+ return value if value > 0 else None
54
+
55
+
56
+ def _profile_from_env() -> str | None:
57
+ raw = os.environ.get("SPLIT_STACK_PROFILE", "").strip()
58
+ return raw or None
59
+
60
+
61
+ def default_profile_from_env(*, fallback: str = "workstation_12gb") -> str:
62
+ """Profile from SPLIT_STACK_PROFILE or SPLIT_STACK_VRAM_GB, else fallback."""
63
+ profile = _profile_from_env()
64
+ if profile:
65
+ return normalize_deployment_profile(profile)
66
+ vram = _vram_from_env()
67
+ if vram is not None:
68
+ return profile_for_vram_gb(vram)
69
+ return fallback
70
+
71
+
72
+ def _quant_from_env() -> str | None:
73
+ from split_stack.quantization import quant_from_env
74
+
75
+ return quant_from_env()
76
+
77
+
78
+ def _resolve_models(
79
+ profile: str,
80
+ models: list[str] | None,
81
+ *,
82
+ quant: str | None = None,
83
+ ) -> tuple[list[str], str | None]:
84
+ if models:
85
+ return models, None
86
+
87
+ from split_stack.quantization import normalize_quant_mode
88
+
89
+ desired = recommended_models(profile, quant=quant)
90
+ note: str | None = None
91
+ mode = normalize_quant_mode(quant)
92
+ if mode == "qat" and len(desired) > len(recommended_models(profile)):
93
+ note = "QAT stack: added Gemma 4 models that fit at int4 runtime sizes."
94
+ try:
95
+ from split_stack.discovery import discover_models_from_disk
96
+
97
+ disk = discover_models_from_disk()
98
+ except Exception:
99
+ disk = []
100
+
101
+ if disk:
102
+ matched = [name for name in desired if name in disk]
103
+ if len(matched) >= 2:
104
+ return matched, None
105
+ if len(disk) >= 2:
106
+ from split_stack.model_registry import load_registry, model_weight
107
+
108
+ registry = load_registry(profile=profile)
109
+ ranked = sorted(disk, key=lambda name: model_weight(name, registry))
110
+ note = (
111
+ f"Recommended stack not fully installed ({', '.join(desired)}). "
112
+ f"Using: {', '.join(ranked[:3])}"
113
+ )
114
+ return ranked[:3], note
115
+ note = f"Using installed models only: {', '.join(disk)}"
116
+ return disk, note
117
+
118
+ return desired, "Using recommended stack (not verified against disk)."
119
+
120
+
121
+ def configure(
122
+ *,
123
+ vram_gb: int | None = None,
124
+ profile: str | None = None,
125
+ models: list[str] | None = None,
126
+ tiers: TierMap | None = None,
127
+ quant: str | None = None,
128
+ ) -> Session:
129
+ """Set the default profile and tier map for ``route()`` and ``explain()``.
130
+
131
+ Progressive control:
132
+ - ``configure(vram_gb=16)`` — preset profile and recommended models
133
+ - ``configure(..., models=[...])`` — explicit model list, auto tier ladder
134
+ - ``configure(..., models=[...], tiers=...)`` — explicit ladder (power users)
135
+ """
136
+ global _session
137
+ from split_stack.quantization import normalize_quant_mode
138
+
139
+ if quant is None:
140
+ quant = _quant_from_env()
141
+ quant_mode = normalize_quant_mode(quant)
142
+
143
+ if profile is None:
144
+ profile = _profile_from_env()
145
+ if profile is None:
146
+ if vram_gb is None:
147
+ vram_gb = _vram_from_env()
148
+ if vram_gb is None:
149
+ raise ValueError(
150
+ "Pass vram_gb=16 (or profile='workstation_16gb'), "
151
+ "or set SPLIT_STACK_VRAM_GB / SPLIT_STACK_PROFILE."
152
+ )
153
+ profile = profile_for_vram_gb(vram_gb)
154
+ else:
155
+ profile = normalize_deployment_profile(profile)
156
+ if vram_gb is None:
157
+ vram_gb = _vram_from_env()
158
+
159
+ resolved_models, note = _resolve_models(profile, models, quant=quant_mode)
160
+ if not resolved_models:
161
+ raise ValueError(f"No models for profile {profile}")
162
+
163
+ if tiers is not None:
164
+ tier_map = tiers
165
+ if models is not None:
166
+ unknown = [
167
+ name
168
+ for name in (
169
+ tier_map.simple,
170
+ tier_map.medium,
171
+ tier_map.complex,
172
+ tier_map.reasoning,
173
+ tier_map.code,
174
+ )
175
+ if name and name not in resolved_models
176
+ ]
177
+ if unknown:
178
+ raise ValueError(
179
+ f"tiers= references models not in models=: {', '.join(sorted(set(unknown)))}"
180
+ )
181
+ else:
182
+ tier_map = assign_tiers(resolved_models)
183
+ warnings = tuple(validate_tier_map(tier_map, resolved_models, profile=profile))
184
+ _session = Session(
185
+ profile=profile,
186
+ vram_gb=vram_gb,
187
+ quant=quant_mode,
188
+ models=tuple(resolved_models),
189
+ tiers=tier_map,
190
+ warnings=warnings,
191
+ note=note,
192
+ )
193
+ return _session
194
+
195
+
196
+ def get_session() -> Session | None:
197
+ return _session
198
+
199
+
200
+ def session_warnings() -> tuple[str, ...]:
201
+ """Warnings from the last ``configure()`` (empty if none)."""
202
+ session = _session
203
+ return session.warnings if session else ()
204
+
205
+
206
+ def _ensure_session() -> Session:
207
+ session = _session
208
+ if session is None:
209
+ if _vram_from_env() is not None or _profile_from_env() is not None:
210
+ configure()
211
+ session = _session
212
+ else:
213
+ raise RuntimeError(
214
+ "split_stack.configure(vram_gb=16) first, or set SPLIT_STACK_VRAM_GB."
215
+ )
216
+ assert session is not None
217
+ return session
218
+
219
+
220
+ def route(
221
+ prompt: str,
222
+ *,
223
+ hint: str | None = None,
224
+ ) -> tuple[ComplexityTier, str]:
225
+ """Route one prompt using the configured session. Call ``configure()`` first."""
226
+ session = _ensure_session()
227
+ return route_prompt(prompt, session.tiers, hint=hint)
228
+
229
+
230
+ def explain(
231
+ prompt: str,
232
+ *,
233
+ hint: str | None = None,
234
+ ) -> RouteDecision:
235
+ """Route with a full decision trace (logging, CLI, tests)."""
236
+ session = _ensure_session()
237
+ return explain_route(prompt, session.tiers, hint=hint)
238
+
239
+
240
+ def describe_session() -> dict[str, object]:
241
+ """Snapshot of the active session for logs and ``stack explain`` without a prompt."""
242
+ session = get_session()
243
+ if session is None:
244
+ return {"configured": False}
245
+ return {
246
+ "configured": True,
247
+ "profile": session.profile,
248
+ "vram_gb": session.vram_gb,
249
+ "quant": session.quant,
250
+ "models": list(session.models),
251
+ "tiers": describe_tiers(session.tiers),
252
+ "warnings": list(session.warnings),
253
+ "note": session.note,
254
+ }
255
+
256
+
257
+ def reset_session_for_tests() -> None:
258
+ global _session
259
+ _session = None