PyPI - split-stack - Versions diffs - 0.2.0__py3-none-any.whl - Mend

split-stack 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

split_stack/__init__.py +106 -0
split_stack/__main__.py +4 -0
split_stack/advice.py +12 -0
split_stack/benchmark.py +97 -0
split_stack/cli.py +690 -0
split_stack/community_picks.py +247 -0
split_stack/compare.py +194 -0
split_stack/complexity.py +77 -0
split_stack/discovery.py +288 -0
split_stack/hints.py +102 -0
split_stack/local_models.py +63 -0
split_stack/model_guide.py +273 -0
split_stack/model_registry.py +314 -0
split_stack/models.py +77 -0
split_stack/ollama_errors.py +30 -0
split_stack/ollama_generate.py +135 -0
split_stack/poc_models.py +131 -0
split_stack/presets.py +75 -0
split_stack/quantization.py +137 -0
split_stack/requirements.py +287 -0
split_stack/routing.py +96 -0
split_stack/session.py +259 -0
split_stack/setup_wizard.py +259 -0
split_stack/startup_tips.py +169 -0
split_stack/tiering.py +66 -0
split_stack/validation.py +85 -0
split_stack-0.2.0.dist-info/METADATA +364 -0
split_stack-0.2.0.dist-info/RECORD +32 -0
split_stack-0.2.0.dist-info/WHEEL +5 -0
split_stack-0.2.0.dist-info/entry_points.txt +2 -0
split_stack-0.2.0.dist-info/licenses/LICENSE +21 -0
split_stack-0.2.0.dist-info/top_level.txt +1 -0

split_stack/community_picks.py ADDED Viewed

@@ -0,0 +1,247 @@
+"""Community model picks — editable JSON, sourced from r/LocalLLaMA megathreads."""
+from __future__ import annotations
+import json
+import os
+from dataclasses import dataclass
+from functools import lru_cache
+from pathlib import Path
+from typing import Any
+_PACKAGE_DEFAULT = Path(__file__).resolve().parents[2] / "config" / "community_picks.json"
+@dataclass(frozen=True)
+class CommunityPick:
+    model: str
+    note: str
+    rank: int = 1
+@dataclass(frozen=True)
+class HintCommunityGuide:
+    hint_id: str
+    reddit_category: str
+    vram_tier: str
+    picks: tuple[CommunityPick, ...]
+@dataclass(frozen=True)
+class FocusStack:
+    id: str
+    label: str
+    description: str
+    models: tuple[str, ...]
+def config_search_paths(explicit: str | None = None) -> list[Path]:
+    paths: list[Path] = []
+    if explicit:
+        paths.append(Path(explicit))
+    env_path = os.environ.get("SPLIT_STACK_COMMUNITY_CONFIG", "").strip()
+    if env_path:
+        paths.append(Path(env_path))
+    paths.extend(
+        [
+            Path.cwd() / "split-stack.community.json",
+            Path.cwd() / "config" / "community_picks.json",
+            _PACKAGE_DEFAULT,
+        ]
+    )
+    seen: set[Path] = set()
+    ordered: list[Path] = []
+    for path in paths:
+        try:
+            resolved = path.expanduser().resolve()
+        except OSError:
+            continue
+        if resolved in seen:
+            continue
+        seen.add(resolved)
+        ordered.append(resolved)
+    return ordered
+@lru_cache(maxsize=4)
+def _load_raw(config_path: str | None = None) -> dict[str, Any]:
+    for path in config_search_paths(config_path):
+        if path.is_file():
+            return json.loads(path.read_text(encoding="utf-8-sig"))
+    raise FileNotFoundError(
+        "community picks config not found. Copy config/community_picks.json "
+        "or set SPLIT_STACK_COMMUNITY_CONFIG."
+    )
+def load_community_config(config_path: str | None = None) -> dict[str, Any]:
+    return _load_raw(config_path)
+def vram_tier_for_profile(profile: str, *, config_path: str | None = None) -> str:
+    raw = _load_raw(config_path)
+    mapping = raw.get("profile_to_vram_tier", {})
+    return str(mapping.get(profile, "M"))
+def picks_for_hint(
+    hint_id: str,
+    *,
+    vram_tier: str = "M",
+    config_path: str | None = None,
+) -> tuple[CommunityPick, ...]:
+    raw = _load_raw(config_path)
+    hint_block = raw.get("hints", {}).get(hint_id, {})
+    tier_picks = hint_block.get("picks", {}).get(vram_tier, [])
+    if not tier_picks and vram_tier != "M":
+        tier_picks = hint_block.get("picks", {}).get("M", [])
+    result: list[CommunityPick] = []
+    for index, item in enumerate(tier_picks, start=1):
+        if isinstance(item, str):
+            result.append(CommunityPick(model=item, note="", rank=index))
+        else:
+            result.append(
+                CommunityPick(
+                    model=str(item.get("model", "")),
+                    note=str(item.get("note", "")),
+                    rank=index,
+                )
+            )
+    return tuple(p for p in result if p.model)
+def focus_stack(
+    focus_id: str,
+    *,
+    vram_tier: str = "M",
+    config_path: str | None = None,
+) -> FocusStack | None:
+    raw = _load_raw(config_path)
+    block = raw.get("focus_stacks", {}).get(focus_id)
+    if not block:
+        return None
+    by_vram = block.get("by_vram", {})
+    models = by_vram.get(vram_tier) or by_vram.get("M") or []
+    return FocusStack(
+        id=focus_id,
+        label=str(block.get("label", focus_id)),
+        description=str(block.get("description", "")),
+        models=tuple(str(name) for name in models),
+    )
+def list_focus_stacks(
+    *,
+    vram_tier: str = "M",
+    config_path: str | None = None,
+) -> tuple[FocusStack, ...]:
+    raw = _load_raw(config_path)
+    stacks: list[FocusStack] = []
+    for focus_id in raw.get("focus_stacks", {}):
+        item = focus_stack(focus_id, vram_tier=vram_tier, config_path=config_path)
+        if item and item.models:
+            stacks.append(item)
+    return tuple(stacks)
+def community_index_for_model(
+    model_name: str,
+    *,
+    vram_tier: str = "M",
+    config_path: str | None = None,
+) -> tuple[str, ...]:
+    """Hint ids where this model appears in community picks."""
+    lowered = model_name.lower()
+    hints: list[str] = []
+    for hint_id in ("lookup", "explain", "design", "code", "reason"):
+        for pick in picks_for_hint(hint_id, vram_tier=vram_tier, config_path=config_path):
+            pick_lower = pick.model.lower()
+            if pick_lower == lowered or pick_lower in lowered or lowered.startswith(pick_lower):
+                hints.append(hint_id)
+                break
+    return tuple(hints)
+def recommended_models_for_tier(
+    *,
+    vram_tier: str = "M",
+    config_path: str | None = None,
+) -> dict[str, str]:
+    """Flatten community picks to model -> best note for tier."""
+    ranked: dict[str, str] = {}
+    for hint_id in ("lookup", "explain", "design", "code", "reason"):
+        for pick in picks_for_hint(hint_id, vram_tier=vram_tier, config_path=config_path):
+            if pick.model not in ranked and pick.note:
+                ranked[pick.model] = pick.note
+            elif pick.model not in ranked:
+                ranked[pick.model] = f"Community pick for {hint_id}"
+    creative = _load_raw(config_path).get("not_in_agent_stack", {}).get("creative_rp", {})
+    for item in creative.get("picks", {}).get(vram_tier, []):
+        if isinstance(item, dict):
+            model = str(item.get("model", ""))
+            note = str(item.get("note", ""))
+        else:
+            model = str(item)
+            note = "Creative / RP (separate from agent stack)"
+        if model and model not in ranked:
+            ranked[model] = note
+    return ranked
+def community_note_for_model(
+    model_name: str,
+    *,
+    vram_tier: str = "M",
+    config_path: str | None = None,
+) -> str | None:
+    notes = recommended_models_for_tier(vram_tier=vram_tier, config_path=config_path)
+    if model_name in notes:
+        return notes[model_name]
+    lowered = model_name.lower()
+    for key, note in notes.items():
+        if key.lower() in lowered or lowered.startswith(key.lower()):
+            return note
+    return None
+def build_community_guide(
+    *,
+    profile: str = "workstation_12gb",
+    config_path: str | None = None,
+) -> dict[str, Any]:
+    """Payload for CLI/demo: hints + focus stacks for a workstation profile."""
+    raw = _load_raw(config_path)
+    vram_tier = vram_tier_for_profile(profile, config_path=config_path)
+    hints: list[dict[str, Any]] = []
+    for hint_id, block in raw.get("hints", {}).items():
+        picks = picks_for_hint(hint_id, vram_tier=vram_tier, config_path=config_path)
+        hints.append(
+            {
+                "hint_id": hint_id,
+                "reddit_category": block.get("reddit_category", ""),
+                "vram_tier": vram_tier,
+                "picks": [{"model": p.model, "note": p.note, "rank": p.rank} for p in picks],
+            }
+        )
+    creative = raw.get("not_in_agent_stack", {}).get("creative_rp", {})
+    creative_picks = creative.get("picks", {}).get(vram_tier, [])
+    return {
+        "source": raw.get("source", ""),
+        "vram_tier": vram_tier,
+        "vram_tier_label": raw.get("vram_tiers", {}).get(vram_tier, vram_tier),
+        "profile": profile,
+        "hints": hints,
+        "focus_stacks": [
+            {
+                "id": item.id,
+                "label": item.label,
+                "description": item.description,
+                "models": list(item.models),
+            }
+            for item in list_focus_stacks(vram_tier=vram_tier, config_path=config_path)
+        ],
+        "creative_rp": [
+            item if isinstance(item, dict) else {"model": item, "note": ""}
+            for item in creative_picks
+        ],
+    }

split_stack/compare.py ADDED Viewed

@@ -0,0 +1,194 @@
+"""Side-by-side compare: split-stack routing vs always-largest baseline."""
+from __future__ import annotations
+import time
+from dataclasses import dataclass
+from split_stack.model_registry import load_registry, model_weight
+from split_stack.poc_models import DEFAULT_POC_STACK
+from split_stack.routing import route_prompt
+from split_stack.tiering import assign_tiers
+DEFAULT_MODELS = list(DEFAULT_POC_STACK)
+class CompareRunError(RuntimeError):
+    """Live compare failed on a specific agent step."""
+    def __init__(self, step: str, model: str, message: str) -> None:
+        self.step = step
+        self.model = model
+        super().__init__(f"Failed on step '{step}' (model {model}): {message}")
+@dataclass(frozen=True)
+class CompareStep:
+    name: str
+    prompt: str
+    hint: str | None = None
+DEFAULT_STEPS: tuple[CompareStep, ...] = (
+    CompareStep("understand_goal", "Summarise the user goal: add auth to a Flask API", "explain"),
+    CompareStep("quick_lookup", "what is JWT in one sentence?", "lookup"),
+    CompareStep("compare_options", "compare session cookies vs JWT for a small SaaS API", "explain"),
+    CompareStep("design", "design a webhook retry strategy with idempotency keys", "design"),
+    CompareStep("reason", "prove this token expiry policy step by step", "reason"),
+)
+@dataclass(frozen=True)
+class CompareRow:
+    step: str
+    routed_tier: str
+    routed_model: str
+    baseline_model: str
+    routed_latency_ms: int | None = None
+    baseline_latency_ms: int | None = None
+@dataclass(frozen=True)
+class CompareSummary:
+    baseline_model: str
+    routed_models_used: int
+    baseline_models_used: int
+    steps_avoided_largest: int
+    total_steps: int
+    routed_total_latency_ms: int | None = None
+    baseline_total_latency_ms: int | None = None
+@dataclass(frozen=True)
+class CompareReport:
+    models: tuple[str, ...]
+    rows: tuple[CompareRow, ...]
+    summary: CompareSummary
+def largest_model(model_names: list[str]) -> str:
+    registry = load_registry()
+    return max(model_names, key=lambda name: model_weight(name, registry))
+def _build_summary(rows: tuple[CompareRow, ...], baseline_model: str) -> CompareSummary:
+    routed_models = {row.routed_model for row in rows}
+    avoided = sum(1 for row in rows if row.routed_model != row.baseline_model)
+    routed_latency = None
+    baseline_latency = None
+    if rows and rows[0].routed_latency_ms is not None:
+        routed_latency = sum(row.routed_latency_ms or 0 for row in rows)
+        baseline_latency = sum(row.baseline_latency_ms or 0 for row in rows)
+    return CompareSummary(
+        baseline_model=baseline_model,
+        routed_models_used=len(routed_models),
+        baseline_models_used=1,
+        steps_avoided_largest=avoided,
+        total_steps=len(rows),
+        routed_total_latency_ms=routed_latency,
+        baseline_total_latency_ms=baseline_latency,
+    )
+def run_compare(
+    *,
+    steps: tuple[CompareStep, ...] = DEFAULT_STEPS,
+    model_names: list[str] | None = None,
+    base_url: str = "http://127.0.0.1:11434",
+    dry_run: bool = True,
+    timeout_seconds: int = 90,
+) -> CompareReport:
+    models = model_names or list(DEFAULT_MODELS)
+    tiers = assign_tiers(models)
+    baseline = largest_model(models)
+    rows: list[CompareRow] = []
+    generate_text = None
+    if not dry_run:
+        from split_stack.ollama_generate import generate_text as _generate_text
+        generate_text = _generate_text
+    for step in steps:
+        tier, routed_model = route_prompt(step.prompt, tiers, hint=step.hint)
+        routed_latency_ms: int | None = None
+        baseline_latency_ms: int | None = None
+        if generate_text is not None:
+            try:
+                start = time.perf_counter()
+                generate_text(
+                    routed_model,
+                    step.prompt,
+                    base_url=base_url,
+                    timeout_seconds=timeout_seconds,
+                )
+                routed_latency_ms = int((time.perf_counter() - start) * 1000)
+                start = time.perf_counter()
+                generate_text(
+                    baseline,
+                    step.prompt,
+                    base_url=base_url,
+                    timeout_seconds=timeout_seconds,
+                )
+                baseline_latency_ms = int((time.perf_counter() - start) * 1000)
+            except RuntimeError as exc:
+                active_model = routed_model if routed_latency_ms is None else baseline
+                raise CompareRunError(step.name, active_model, str(exc)) from exc
+        rows.append(
+            CompareRow(
+                step=step.name,
+                routed_tier=tier.value,
+                routed_model=routed_model,
+                baseline_model=baseline,
+                routed_latency_ms=routed_latency_ms,
+                baseline_latency_ms=baseline_latency_ms,
+            )
+        )
+    row_tuple = tuple(rows)
+    return CompareReport(
+        models=tuple(models),
+        rows=row_tuple,
+        summary=_build_summary(row_tuple, baseline),
+    )
+def format_compare_text(report: CompareReport) -> str:
+    baseline = report.summary.baseline_model
+    lines = [
+        f"Compare: split-stack vs always-largest ({baseline})",
+        "",
+        f"{'step':<18} | {'routed tier':<12} | {'routed model':<12} | baseline model",
+    ]
+    for row in report.rows:
+        lines.append(
+            f"{row.step:<18} | {row.routed_tier:<12} | {row.routed_model:<12} | {row.baseline_model}"
+        )
+        if row.routed_latency_ms is not None:
+            lines.append(
+                f"  routed_latency_ms={row.routed_latency_ms} "
+                f"baseline_latency_ms={row.baseline_latency_ms}"
+            )
+    summary = report.summary
+    lines.extend(
+        [
+            "",
+            "Summary:",
+            f"  split-stack:  {summary.routed_models_used} models used, "
+            f"{summary.steps_avoided_largest}/{summary.total_steps} steps avoided largest",
+            f"  baseline:     {summary.baseline_models_used} model used, "
+            f"{summary.total_steps}/{summary.total_steps} on largest",
+        ]
+    )
+    if summary.routed_total_latency_ms is not None:
+        lines.append(
+            f"  routed total latency:   {summary.routed_total_latency_ms} ms"
+        )
+        lines.append(
+            f"  baseline total latency: {summary.baseline_total_latency_ms} ms"
+        )
+    return "\n".join(lines)

split_stack/complexity.py ADDED Viewed

@@ -0,0 +1,77 @@
+from __future__ import annotations
+from split_stack.hints import normalize_step_kind, prefer_code_model, tier_from_step_kind
+from split_stack.models import ComplexityTier, StepKind
+DEFAULT_REASONING_MARKERS = (
+    "step by step",
+    "reason carefully",
+    "prove",
+    "formalize",
+    "rigorous",
+)
+DEFAULT_COMPLEX_MARKERS = (
+    "architecture",
+    "distributed",
+    "tradeoff",
+    "debug",
+    "refactor",
+    "design",
+)
+DEFAULT_MEDIUM_MARKERS = (
+    "explain",
+    "summarise",
+    "summarize",
+    "compare",
+    "outline",
+    "plan",
+)
+DEFAULT_CODE_MARKERS = (
+    "refactor",
+    "debug",
+    "implement",
+    "function",
+    "class ",
+    "traceback",
+    "syntax error",
+    "unit test",
+    "pytest",
+    "```",
+)
+def score_prompt(prompt: str) -> ComplexityTier:
+    return resolve_tier(prompt)
+def resolve_tier(
+    prompt: str,
+    *,
+    hint: StepKind | str | None = None,
+) -> ComplexityTier:
+    step_kind = normalize_step_kind(hint) if hint is not None else None
+    if step_kind is not None:
+        return tier_from_step_kind(step_kind)
+    text = (prompt or "").strip().lower()
+    if not text:
+        return ComplexityTier.SIMPLE
+    if any(marker in text for marker in DEFAULT_REASONING_MARKERS):
+        return ComplexityTier.REASONING
+    token_like_count = len(text.split())
+    if token_like_count <= 8 and text.endswith("?"):
+        return ComplexityTier.SIMPLE
+    if any(marker in text for marker in DEFAULT_COMPLEX_MARKERS) or token_like_count > 80:
+        return ComplexityTier.COMPLEX
+    if any(marker in text for marker in DEFAULT_MEDIUM_MARKERS):
+        return ComplexityTier.MEDIUM
+    if token_like_count > 25:
+        return ComplexityTier.MEDIUM
+    return ComplexityTier.SIMPLE
+def looks_like_code(prompt: str) -> bool:
+    text = (prompt or "").lower()
+    return any(marker in text for marker in DEFAULT_CODE_MARKERS)