split-stack 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- split_stack/__init__.py +106 -0
- split_stack/__main__.py +4 -0
- split_stack/advice.py +12 -0
- split_stack/benchmark.py +97 -0
- split_stack/cli.py +690 -0
- split_stack/community_picks.py +247 -0
- split_stack/compare.py +194 -0
- split_stack/complexity.py +77 -0
- split_stack/discovery.py +288 -0
- split_stack/hints.py +102 -0
- split_stack/local_models.py +63 -0
- split_stack/model_guide.py +273 -0
- split_stack/model_registry.py +314 -0
- split_stack/models.py +77 -0
- split_stack/ollama_errors.py +30 -0
- split_stack/ollama_generate.py +135 -0
- split_stack/poc_models.py +131 -0
- split_stack/presets.py +75 -0
- split_stack/quantization.py +137 -0
- split_stack/requirements.py +287 -0
- split_stack/routing.py +96 -0
- split_stack/session.py +259 -0
- split_stack/setup_wizard.py +259 -0
- split_stack/startup_tips.py +169 -0
- split_stack/tiering.py +66 -0
- split_stack/validation.py +85 -0
- split_stack-0.2.0.dist-info/METADATA +364 -0
- split_stack-0.2.0.dist-info/RECORD +32 -0
- split_stack-0.2.0.dist-info/WHEEL +5 -0
- split_stack-0.2.0.dist-info/entry_points.txt +2 -0
- split_stack-0.2.0.dist-info/licenses/LICENSE +21 -0
- split_stack-0.2.0.dist-info/top_level.txt +1 -0
split_stack/__init__.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
from split_stack.advice import stack_recommendation
|
|
2
|
+
from split_stack.benchmark import format_markdown_table, run_benchmark
|
|
3
|
+
from split_stack.complexity import resolve_tier, score_prompt
|
|
4
|
+
from split_stack.hints import list_hints, normalize_step_kind
|
|
5
|
+
from split_stack.discovery import discover_models
|
|
6
|
+
from split_stack.local_models import assign_tiers_from_local, list_local_models
|
|
7
|
+
from split_stack.model_registry import (
|
|
8
|
+
DeploymentProfileSpec,
|
|
9
|
+
ModelEntry,
|
|
10
|
+
ModelRegistry,
|
|
11
|
+
list_deployment_profiles,
|
|
12
|
+
load_registry,
|
|
13
|
+
model_weight,
|
|
14
|
+
normalize_deployment_profile,
|
|
15
|
+
)
|
|
16
|
+
from split_stack.models import ComplexityTier, RouteDecision, StackAdvice, StepKind, TierMap
|
|
17
|
+
from split_stack.presets import (
|
|
18
|
+
assign_recommended_tiers,
|
|
19
|
+
list_recommended_stacks,
|
|
20
|
+
recommended_models,
|
|
21
|
+
)
|
|
22
|
+
from split_stack.requirements import (
|
|
23
|
+
Prerequisite,
|
|
24
|
+
ProfileRequirements,
|
|
25
|
+
UsageProfile,
|
|
26
|
+
list_usage_profiles,
|
|
27
|
+
usage_requirements,
|
|
28
|
+
)
|
|
29
|
+
from split_stack.quantization import (
|
|
30
|
+
QUANT_MODES,
|
|
31
|
+
adjust_vram_for_quant,
|
|
32
|
+
expand_models_for_quant,
|
|
33
|
+
normalize_quant_mode,
|
|
34
|
+
pull_guidance_lines,
|
|
35
|
+
quant_from_env,
|
|
36
|
+
)
|
|
37
|
+
from split_stack.routing import explain_route, route_prompt
|
|
38
|
+
from split_stack.session import (
|
|
39
|
+
Session,
|
|
40
|
+
configure,
|
|
41
|
+
default_profile_from_env,
|
|
42
|
+
describe_session,
|
|
43
|
+
explain,
|
|
44
|
+
get_session,
|
|
45
|
+
profile_for_vram_gb,
|
|
46
|
+
route,
|
|
47
|
+
session_warnings,
|
|
48
|
+
)
|
|
49
|
+
from split_stack.startup_tips import emit_import_tips, model_recommendation_report
|
|
50
|
+
from split_stack.tiering import assign_tiers, describe_tiers
|
|
51
|
+
from split_stack.validation import validate_tier_map
|
|
52
|
+
|
|
53
|
+
__version__ = "0.2.0"
|
|
54
|
+
|
|
55
|
+
__all__ = [
|
|
56
|
+
"ComplexityTier",
|
|
57
|
+
"DeploymentProfileSpec",
|
|
58
|
+
"ModelRegistry",
|
|
59
|
+
"Prerequisite",
|
|
60
|
+
"ProfileRequirements",
|
|
61
|
+
"QUANT_MODES",
|
|
62
|
+
"StackAdvice",
|
|
63
|
+
"StepKind",
|
|
64
|
+
"RouteDecision",
|
|
65
|
+
"Session",
|
|
66
|
+
"TierMap",
|
|
67
|
+
"UsageProfile",
|
|
68
|
+
"assign_recommended_tiers",
|
|
69
|
+
"assign_tiers",
|
|
70
|
+
"assign_tiers_from_local",
|
|
71
|
+
"adjust_vram_for_quant",
|
|
72
|
+
"configure",
|
|
73
|
+
"default_profile_from_env",
|
|
74
|
+
"describe_session",
|
|
75
|
+
"describe_tiers",
|
|
76
|
+
"discover_models",
|
|
77
|
+
"explain",
|
|
78
|
+
"explain_route",
|
|
79
|
+
"format_markdown_table",
|
|
80
|
+
"get_session",
|
|
81
|
+
"list_local_models",
|
|
82
|
+
"list_deployment_profiles",
|
|
83
|
+
"list_recommended_stacks",
|
|
84
|
+
"load_registry",
|
|
85
|
+
"normalize_quant_mode",
|
|
86
|
+
"normalize_step_kind",
|
|
87
|
+
"normalize_deployment_profile",
|
|
88
|
+
"list_usage_profiles",
|
|
89
|
+
"model_recommendation_report",
|
|
90
|
+
"model_weight",
|
|
91
|
+
"profile_for_vram_gb",
|
|
92
|
+
"pull_guidance_lines",
|
|
93
|
+
"quant_from_env",
|
|
94
|
+
"recommended_models",
|
|
95
|
+
"route",
|
|
96
|
+
"route_prompt",
|
|
97
|
+
"resolve_tier",
|
|
98
|
+
"score_prompt",
|
|
99
|
+
"session_warnings",
|
|
100
|
+
"stack_recommendation",
|
|
101
|
+
"usage_requirements",
|
|
102
|
+
"validate_tier_map",
|
|
103
|
+
"__version__",
|
|
104
|
+
]
|
|
105
|
+
|
|
106
|
+
# Embedded library use: no import-time stderr. Use ``stack tips`` or ``SPLIT_STACK_IMPORT_TIPS=on``.
|
split_stack/__main__.py
ADDED
split_stack/advice.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from split_stack.models import StackAdvice
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def stack_recommendation(cursor_override_enabled: bool = False) -> StackAdvice:
|
|
7
|
+
return StackAdvice(
|
|
8
|
+
cursor_model="Auto",
|
|
9
|
+
prose_path="Use Claude/ChatGPT native apps for prose-heavy work",
|
|
10
|
+
local_path="Use local Ollama for private quick questions and scripts",
|
|
11
|
+
warn_cursor_override=cursor_override_enabled,
|
|
12
|
+
)
|
split_stack/benchmark.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""Fixed prompt suite for routing benchmarks (no inference required)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
from split_stack.poc_models import DEFAULT_POC_STACK
|
|
8
|
+
from split_stack.routing import route_prompt
|
|
9
|
+
from split_stack.tiering import assign_tiers
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass(frozen=True)
|
|
13
|
+
class BenchmarkPrompt:
|
|
14
|
+
id: str
|
|
15
|
+
prompt: str
|
|
16
|
+
note: str
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
DEFAULT_MODELS = list(DEFAULT_POC_STACK) + ["qwen3:30b-a3b"]
|
|
20
|
+
|
|
21
|
+
BENCHMARK_PROMPTS: tuple[BenchmarkPrompt, ...] = (
|
|
22
|
+
BenchmarkPrompt("b01", "what is caching?", "definition"),
|
|
23
|
+
BenchmarkPrompt("b02", "what is an API?", "definition"),
|
|
24
|
+
BenchmarkPrompt("b03", "define idempotency in one paragraph", "short explain"),
|
|
25
|
+
BenchmarkPrompt("b04", "explain eventual consistency for a junior dev", "medium explain"),
|
|
26
|
+
BenchmarkPrompt("b05", "compare Redis vs Memcached for session storage", "compare"),
|
|
27
|
+
BenchmarkPrompt("b06", "outline a plan for adding logging to a Flask app", "plan"),
|
|
28
|
+
BenchmarkPrompt("b07", "debug why webhook retries duplicate orders", "debug keyword"),
|
|
29
|
+
BenchmarkPrompt("b08", "design a distributed retry strategy for webhooks", "architecture"),
|
|
30
|
+
BenchmarkPrompt("b09", "refactor this auth module for testability", "refactor keyword"),
|
|
31
|
+
BenchmarkPrompt("b10", "prove this retry policy step by step with edge cases", "reasoning"),
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass(frozen=True)
|
|
36
|
+
class BenchmarkRow:
|
|
37
|
+
id: str
|
|
38
|
+
prompt: str
|
|
39
|
+
tier: str
|
|
40
|
+
model: str
|
|
41
|
+
note: str
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass(frozen=True)
|
|
45
|
+
class BenchmarkReport:
|
|
46
|
+
models: tuple[str, ...]
|
|
47
|
+
rows: tuple[BenchmarkRow, ...]
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def tier_counts(self) -> dict[str, int]:
|
|
51
|
+
counts: dict[str, int] = {}
|
|
52
|
+
for row in self.rows:
|
|
53
|
+
counts[row.tier] = counts.get(row.tier, 0) + 1
|
|
54
|
+
return counts
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def run_benchmark(
|
|
58
|
+
model_names: list[str] | None = None,
|
|
59
|
+
) -> BenchmarkReport:
|
|
60
|
+
models = model_names or list(DEFAULT_MODELS)
|
|
61
|
+
tiers = assign_tiers(models)
|
|
62
|
+
rows: list[BenchmarkRow] = []
|
|
63
|
+
for item in BENCHMARK_PROMPTS:
|
|
64
|
+
tier, model = route_prompt(item.prompt, tiers)
|
|
65
|
+
rows.append(
|
|
66
|
+
BenchmarkRow(
|
|
67
|
+
id=item.id,
|
|
68
|
+
prompt=item.prompt,
|
|
69
|
+
tier=tier.value,
|
|
70
|
+
model=model,
|
|
71
|
+
note=item.note,
|
|
72
|
+
)
|
|
73
|
+
)
|
|
74
|
+
return BenchmarkReport(models=tuple(models), rows=tuple(rows))
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def format_markdown_table(report: BenchmarkReport) -> str:
|
|
78
|
+
lines = [
|
|
79
|
+
"| id | tier | model | note |",
|
|
80
|
+
"| --- | --- | --- | --- |",
|
|
81
|
+
]
|
|
82
|
+
for row in report.rows:
|
|
83
|
+
lines.append(f"| {row.id} | {row.tier} | {row.model} | {row.note} |")
|
|
84
|
+
return "\n".join(lines)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def naive_single_model(report: BenchmarkReport) -> str:
|
|
88
|
+
"""Largest model in tier map (simulates always-use-biggest policy)."""
|
|
89
|
+
tiers = assign_tiers(list(report.models))
|
|
90
|
+
return tiers.complex
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def routed_model_mix(report: BenchmarkReport) -> dict[str, int]:
|
|
94
|
+
counts: dict[str, int] = {}
|
|
95
|
+
for row in report.rows:
|
|
96
|
+
counts[row.model] = counts.get(row.model, 0) + 1
|
|
97
|
+
return counts
|