split-stack 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,247 @@
1
+ """Community model picks — editable JSON, sourced from r/LocalLLaMA megathreads."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ from dataclasses import dataclass
8
+ from functools import lru_cache
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ _PACKAGE_DEFAULT = Path(__file__).resolve().parents[2] / "config" / "community_picks.json"
13
+
14
+
15
+ @dataclass(frozen=True)
16
+ class CommunityPick:
17
+ model: str
18
+ note: str
19
+ rank: int = 1
20
+
21
+
22
+ @dataclass(frozen=True)
23
+ class HintCommunityGuide:
24
+ hint_id: str
25
+ reddit_category: str
26
+ vram_tier: str
27
+ picks: tuple[CommunityPick, ...]
28
+
29
+
30
+ @dataclass(frozen=True)
31
+ class FocusStack:
32
+ id: str
33
+ label: str
34
+ description: str
35
+ models: tuple[str, ...]
36
+
37
+
38
+ def config_search_paths(explicit: str | None = None) -> list[Path]:
39
+ paths: list[Path] = []
40
+ if explicit:
41
+ paths.append(Path(explicit))
42
+ env_path = os.environ.get("SPLIT_STACK_COMMUNITY_CONFIG", "").strip()
43
+ if env_path:
44
+ paths.append(Path(env_path))
45
+ paths.extend(
46
+ [
47
+ Path.cwd() / "split-stack.community.json",
48
+ Path.cwd() / "config" / "community_picks.json",
49
+ _PACKAGE_DEFAULT,
50
+ ]
51
+ )
52
+ seen: set[Path] = set()
53
+ ordered: list[Path] = []
54
+ for path in paths:
55
+ try:
56
+ resolved = path.expanduser().resolve()
57
+ except OSError:
58
+ continue
59
+ if resolved in seen:
60
+ continue
61
+ seen.add(resolved)
62
+ ordered.append(resolved)
63
+ return ordered
64
+
65
+
66
+ @lru_cache(maxsize=4)
67
+ def _load_raw(config_path: str | None = None) -> dict[str, Any]:
68
+ for path in config_search_paths(config_path):
69
+ if path.is_file():
70
+ return json.loads(path.read_text(encoding="utf-8-sig"))
71
+ raise FileNotFoundError(
72
+ "community picks config not found. Copy config/community_picks.json "
73
+ "or set SPLIT_STACK_COMMUNITY_CONFIG."
74
+ )
75
+
76
+
77
+ def load_community_config(config_path: str | None = None) -> dict[str, Any]:
78
+ return _load_raw(config_path)
79
+
80
+
81
+ def vram_tier_for_profile(profile: str, *, config_path: str | None = None) -> str:
82
+ raw = _load_raw(config_path)
83
+ mapping = raw.get("profile_to_vram_tier", {})
84
+ return str(mapping.get(profile, "M"))
85
+
86
+
87
+ def picks_for_hint(
88
+ hint_id: str,
89
+ *,
90
+ vram_tier: str = "M",
91
+ config_path: str | None = None,
92
+ ) -> tuple[CommunityPick, ...]:
93
+ raw = _load_raw(config_path)
94
+ hint_block = raw.get("hints", {}).get(hint_id, {})
95
+ tier_picks = hint_block.get("picks", {}).get(vram_tier, [])
96
+ if not tier_picks and vram_tier != "M":
97
+ tier_picks = hint_block.get("picks", {}).get("M", [])
98
+ result: list[CommunityPick] = []
99
+ for index, item in enumerate(tier_picks, start=1):
100
+ if isinstance(item, str):
101
+ result.append(CommunityPick(model=item, note="", rank=index))
102
+ else:
103
+ result.append(
104
+ CommunityPick(
105
+ model=str(item.get("model", "")),
106
+ note=str(item.get("note", "")),
107
+ rank=index,
108
+ )
109
+ )
110
+ return tuple(p for p in result if p.model)
111
+
112
+
113
+ def focus_stack(
114
+ focus_id: str,
115
+ *,
116
+ vram_tier: str = "M",
117
+ config_path: str | None = None,
118
+ ) -> FocusStack | None:
119
+ raw = _load_raw(config_path)
120
+ block = raw.get("focus_stacks", {}).get(focus_id)
121
+ if not block:
122
+ return None
123
+ by_vram = block.get("by_vram", {})
124
+ models = by_vram.get(vram_tier) or by_vram.get("M") or []
125
+ return FocusStack(
126
+ id=focus_id,
127
+ label=str(block.get("label", focus_id)),
128
+ description=str(block.get("description", "")),
129
+ models=tuple(str(name) for name in models),
130
+ )
131
+
132
+
133
+ def list_focus_stacks(
134
+ *,
135
+ vram_tier: str = "M",
136
+ config_path: str | None = None,
137
+ ) -> tuple[FocusStack, ...]:
138
+ raw = _load_raw(config_path)
139
+ stacks: list[FocusStack] = []
140
+ for focus_id in raw.get("focus_stacks", {}):
141
+ item = focus_stack(focus_id, vram_tier=vram_tier, config_path=config_path)
142
+ if item and item.models:
143
+ stacks.append(item)
144
+ return tuple(stacks)
145
+
146
+
147
+ def community_index_for_model(
148
+ model_name: str,
149
+ *,
150
+ vram_tier: str = "M",
151
+ config_path: str | None = None,
152
+ ) -> tuple[str, ...]:
153
+ """Hint ids where this model appears in community picks."""
154
+ lowered = model_name.lower()
155
+ hints: list[str] = []
156
+ for hint_id in ("lookup", "explain", "design", "code", "reason"):
157
+ for pick in picks_for_hint(hint_id, vram_tier=vram_tier, config_path=config_path):
158
+ pick_lower = pick.model.lower()
159
+ if pick_lower == lowered or pick_lower in lowered or lowered.startswith(pick_lower):
160
+ hints.append(hint_id)
161
+ break
162
+ return tuple(hints)
163
+
164
+
165
+ def recommended_models_for_tier(
166
+ *,
167
+ vram_tier: str = "M",
168
+ config_path: str | None = None,
169
+ ) -> dict[str, str]:
170
+ """Flatten community picks to model -> best note for tier."""
171
+ ranked: dict[str, str] = {}
172
+ for hint_id in ("lookup", "explain", "design", "code", "reason"):
173
+ for pick in picks_for_hint(hint_id, vram_tier=vram_tier, config_path=config_path):
174
+ if pick.model not in ranked and pick.note:
175
+ ranked[pick.model] = pick.note
176
+ elif pick.model not in ranked:
177
+ ranked[pick.model] = f"Community pick for {hint_id}"
178
+ creative = _load_raw(config_path).get("not_in_agent_stack", {}).get("creative_rp", {})
179
+ for item in creative.get("picks", {}).get(vram_tier, []):
180
+ if isinstance(item, dict):
181
+ model = str(item.get("model", ""))
182
+ note = str(item.get("note", ""))
183
+ else:
184
+ model = str(item)
185
+ note = "Creative / RP (separate from agent stack)"
186
+ if model and model not in ranked:
187
+ ranked[model] = note
188
+ return ranked
189
+
190
+
191
+ def community_note_for_model(
192
+ model_name: str,
193
+ *,
194
+ vram_tier: str = "M",
195
+ config_path: str | None = None,
196
+ ) -> str | None:
197
+ notes = recommended_models_for_tier(vram_tier=vram_tier, config_path=config_path)
198
+ if model_name in notes:
199
+ return notes[model_name]
200
+ lowered = model_name.lower()
201
+ for key, note in notes.items():
202
+ if key.lower() in lowered or lowered.startswith(key.lower()):
203
+ return note
204
+ return None
205
+
206
+
207
+ def build_community_guide(
208
+ *,
209
+ profile: str = "workstation_12gb",
210
+ config_path: str | None = None,
211
+ ) -> dict[str, Any]:
212
+ """Payload for CLI/demo: hints + focus stacks for a workstation profile."""
213
+ raw = _load_raw(config_path)
214
+ vram_tier = vram_tier_for_profile(profile, config_path=config_path)
215
+ hints: list[dict[str, Any]] = []
216
+ for hint_id, block in raw.get("hints", {}).items():
217
+ picks = picks_for_hint(hint_id, vram_tier=vram_tier, config_path=config_path)
218
+ hints.append(
219
+ {
220
+ "hint_id": hint_id,
221
+ "reddit_category": block.get("reddit_category", ""),
222
+ "vram_tier": vram_tier,
223
+ "picks": [{"model": p.model, "note": p.note, "rank": p.rank} for p in picks],
224
+ }
225
+ )
226
+ creative = raw.get("not_in_agent_stack", {}).get("creative_rp", {})
227
+ creative_picks = creative.get("picks", {}).get(vram_tier, [])
228
+ return {
229
+ "source": raw.get("source", ""),
230
+ "vram_tier": vram_tier,
231
+ "vram_tier_label": raw.get("vram_tiers", {}).get(vram_tier, vram_tier),
232
+ "profile": profile,
233
+ "hints": hints,
234
+ "focus_stacks": [
235
+ {
236
+ "id": item.id,
237
+ "label": item.label,
238
+ "description": item.description,
239
+ "models": list(item.models),
240
+ }
241
+ for item in list_focus_stacks(vram_tier=vram_tier, config_path=config_path)
242
+ ],
243
+ "creative_rp": [
244
+ item if isinstance(item, dict) else {"model": item, "note": ""}
245
+ for item in creative_picks
246
+ ],
247
+ }
split_stack/compare.py ADDED
@@ -0,0 +1,194 @@
1
+ """Side-by-side compare: split-stack routing vs always-largest baseline."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import time
6
+ from dataclasses import dataclass
7
+
8
+ from split_stack.model_registry import load_registry, model_weight
9
+ from split_stack.poc_models import DEFAULT_POC_STACK
10
+ from split_stack.routing import route_prompt
11
+ from split_stack.tiering import assign_tiers
12
+
13
+ DEFAULT_MODELS = list(DEFAULT_POC_STACK)
14
+
15
+
16
+ class CompareRunError(RuntimeError):
17
+ """Live compare failed on a specific agent step."""
18
+
19
+ def __init__(self, step: str, model: str, message: str) -> None:
20
+ self.step = step
21
+ self.model = model
22
+ super().__init__(f"Failed on step '{step}' (model {model}): {message}")
23
+
24
+
25
+ @dataclass(frozen=True)
26
+ class CompareStep:
27
+ name: str
28
+ prompt: str
29
+ hint: str | None = None
30
+
31
+
32
+ DEFAULT_STEPS: tuple[CompareStep, ...] = (
33
+ CompareStep("understand_goal", "Summarise the user goal: add auth to a Flask API", "explain"),
34
+ CompareStep("quick_lookup", "what is JWT in one sentence?", "lookup"),
35
+ CompareStep("compare_options", "compare session cookies vs JWT for a small SaaS API", "explain"),
36
+ CompareStep("design", "design a webhook retry strategy with idempotency keys", "design"),
37
+ CompareStep("reason", "prove this token expiry policy step by step", "reason"),
38
+ )
39
+
40
+
41
+ @dataclass(frozen=True)
42
+ class CompareRow:
43
+ step: str
44
+ routed_tier: str
45
+ routed_model: str
46
+ baseline_model: str
47
+ routed_latency_ms: int | None = None
48
+ baseline_latency_ms: int | None = None
49
+
50
+
51
+ @dataclass(frozen=True)
52
+ class CompareSummary:
53
+ baseline_model: str
54
+ routed_models_used: int
55
+ baseline_models_used: int
56
+ steps_avoided_largest: int
57
+ total_steps: int
58
+ routed_total_latency_ms: int | None = None
59
+ baseline_total_latency_ms: int | None = None
60
+
61
+
62
+ @dataclass(frozen=True)
63
+ class CompareReport:
64
+ models: tuple[str, ...]
65
+ rows: tuple[CompareRow, ...]
66
+ summary: CompareSummary
67
+
68
+
69
+ def largest_model(model_names: list[str]) -> str:
70
+ registry = load_registry()
71
+ return max(model_names, key=lambda name: model_weight(name, registry))
72
+
73
+
74
+ def _build_summary(rows: tuple[CompareRow, ...], baseline_model: str) -> CompareSummary:
75
+ routed_models = {row.routed_model for row in rows}
76
+ avoided = sum(1 for row in rows if row.routed_model != row.baseline_model)
77
+ routed_latency = None
78
+ baseline_latency = None
79
+ if rows and rows[0].routed_latency_ms is not None:
80
+ routed_latency = sum(row.routed_latency_ms or 0 for row in rows)
81
+ baseline_latency = sum(row.baseline_latency_ms or 0 for row in rows)
82
+ return CompareSummary(
83
+ baseline_model=baseline_model,
84
+ routed_models_used=len(routed_models),
85
+ baseline_models_used=1,
86
+ steps_avoided_largest=avoided,
87
+ total_steps=len(rows),
88
+ routed_total_latency_ms=routed_latency,
89
+ baseline_total_latency_ms=baseline_latency,
90
+ )
91
+
92
+
93
+ def run_compare(
94
+ *,
95
+ steps: tuple[CompareStep, ...] = DEFAULT_STEPS,
96
+ model_names: list[str] | None = None,
97
+ base_url: str = "http://127.0.0.1:11434",
98
+ dry_run: bool = True,
99
+ timeout_seconds: int = 90,
100
+ ) -> CompareReport:
101
+ models = model_names or list(DEFAULT_MODELS)
102
+ tiers = assign_tiers(models)
103
+ baseline = largest_model(models)
104
+ rows: list[CompareRow] = []
105
+
106
+ generate_text = None
107
+ if not dry_run:
108
+ from split_stack.ollama_generate import generate_text as _generate_text
109
+
110
+ generate_text = _generate_text
111
+
112
+ for step in steps:
113
+ tier, routed_model = route_prompt(step.prompt, tiers, hint=step.hint)
114
+ routed_latency_ms: int | None = None
115
+ baseline_latency_ms: int | None = None
116
+
117
+ if generate_text is not None:
118
+ try:
119
+ start = time.perf_counter()
120
+ generate_text(
121
+ routed_model,
122
+ step.prompt,
123
+ base_url=base_url,
124
+ timeout_seconds=timeout_seconds,
125
+ )
126
+ routed_latency_ms = int((time.perf_counter() - start) * 1000)
127
+
128
+ start = time.perf_counter()
129
+ generate_text(
130
+ baseline,
131
+ step.prompt,
132
+ base_url=base_url,
133
+ timeout_seconds=timeout_seconds,
134
+ )
135
+ baseline_latency_ms = int((time.perf_counter() - start) * 1000)
136
+ except RuntimeError as exc:
137
+ active_model = routed_model if routed_latency_ms is None else baseline
138
+ raise CompareRunError(step.name, active_model, str(exc)) from exc
139
+
140
+ rows.append(
141
+ CompareRow(
142
+ step=step.name,
143
+ routed_tier=tier.value,
144
+ routed_model=routed_model,
145
+ baseline_model=baseline,
146
+ routed_latency_ms=routed_latency_ms,
147
+ baseline_latency_ms=baseline_latency_ms,
148
+ )
149
+ )
150
+
151
+ row_tuple = tuple(rows)
152
+ return CompareReport(
153
+ models=tuple(models),
154
+ rows=row_tuple,
155
+ summary=_build_summary(row_tuple, baseline),
156
+ )
157
+
158
+
159
+ def format_compare_text(report: CompareReport) -> str:
160
+ baseline = report.summary.baseline_model
161
+ lines = [
162
+ f"Compare: split-stack vs always-largest ({baseline})",
163
+ "",
164
+ f"{'step':<18} | {'routed tier':<12} | {'routed model':<12} | baseline model",
165
+ ]
166
+ for row in report.rows:
167
+ lines.append(
168
+ f"{row.step:<18} | {row.routed_tier:<12} | {row.routed_model:<12} | {row.baseline_model}"
169
+ )
170
+ if row.routed_latency_ms is not None:
171
+ lines.append(
172
+ f" routed_latency_ms={row.routed_latency_ms} "
173
+ f"baseline_latency_ms={row.baseline_latency_ms}"
174
+ )
175
+
176
+ summary = report.summary
177
+ lines.extend(
178
+ [
179
+ "",
180
+ "Summary:",
181
+ f" split-stack: {summary.routed_models_used} models used, "
182
+ f"{summary.steps_avoided_largest}/{summary.total_steps} steps avoided largest",
183
+ f" baseline: {summary.baseline_models_used} model used, "
184
+ f"{summary.total_steps}/{summary.total_steps} on largest",
185
+ ]
186
+ )
187
+ if summary.routed_total_latency_ms is not None:
188
+ lines.append(
189
+ f" routed total latency: {summary.routed_total_latency_ms} ms"
190
+ )
191
+ lines.append(
192
+ f" baseline total latency: {summary.baseline_total_latency_ms} ms"
193
+ )
194
+ return "\n".join(lines)
@@ -0,0 +1,77 @@
1
+ from __future__ import annotations
2
+
3
+ from split_stack.hints import normalize_step_kind, prefer_code_model, tier_from_step_kind
4
+ from split_stack.models import ComplexityTier, StepKind
5
+
6
+ DEFAULT_REASONING_MARKERS = (
7
+ "step by step",
8
+ "reason carefully",
9
+ "prove",
10
+ "formalize",
11
+ "rigorous",
12
+ )
13
+ DEFAULT_COMPLEX_MARKERS = (
14
+ "architecture",
15
+ "distributed",
16
+ "tradeoff",
17
+ "debug",
18
+ "refactor",
19
+ "design",
20
+ )
21
+ DEFAULT_MEDIUM_MARKERS = (
22
+ "explain",
23
+ "summarise",
24
+ "summarize",
25
+ "compare",
26
+ "outline",
27
+ "plan",
28
+ )
29
+ DEFAULT_CODE_MARKERS = (
30
+ "refactor",
31
+ "debug",
32
+ "implement",
33
+ "function",
34
+ "class ",
35
+ "traceback",
36
+ "syntax error",
37
+ "unit test",
38
+ "pytest",
39
+ "```",
40
+ )
41
+
42
+
43
+ def score_prompt(prompt: str) -> ComplexityTier:
44
+ return resolve_tier(prompt)
45
+
46
+
47
+ def resolve_tier(
48
+ prompt: str,
49
+ *,
50
+ hint: StepKind | str | None = None,
51
+ ) -> ComplexityTier:
52
+ step_kind = normalize_step_kind(hint) if hint is not None else None
53
+ if step_kind is not None:
54
+ return tier_from_step_kind(step_kind)
55
+
56
+ text = (prompt or "").strip().lower()
57
+ if not text:
58
+ return ComplexityTier.SIMPLE
59
+
60
+ if any(marker in text for marker in DEFAULT_REASONING_MARKERS):
61
+ return ComplexityTier.REASONING
62
+
63
+ token_like_count = len(text.split())
64
+ if token_like_count <= 8 and text.endswith("?"):
65
+ return ComplexityTier.SIMPLE
66
+ if any(marker in text for marker in DEFAULT_COMPLEX_MARKERS) or token_like_count > 80:
67
+ return ComplexityTier.COMPLEX
68
+ if any(marker in text for marker in DEFAULT_MEDIUM_MARKERS):
69
+ return ComplexityTier.MEDIUM
70
+ if token_like_count > 25:
71
+ return ComplexityTier.MEDIUM
72
+ return ComplexityTier.SIMPLE
73
+
74
+
75
+ def looks_like_code(prompt: str) -> bool:
76
+ text = (prompt or "").lower()
77
+ return any(marker in text for marker in DEFAULT_CODE_MARKERS)