somm-skill 0.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,38 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.egg-info/
6
+ *.egg
7
+ build/
8
+ dist/
9
+
10
+ # Environments
11
+ .venv/
12
+ .env
13
+ .env.*
14
+
15
+ # Tooling caches
16
+ .pytest_cache/
17
+ .mypy_cache/
18
+ .ruff_cache/
19
+
20
+ # Local Claude session id log (per-machine, not source of truth)
21
+ sessions.txt
22
+
23
+ # Local data (never commit telemetry)
24
+ .somm/
25
+ *.sqlite
26
+ *.sqlite-wal
27
+ *.sqlite-shm
28
+
29
+ # Author-local notes not for open source
30
+ notes/
31
+ .claude/
32
+
33
+ # Editor
34
+ .vscode/
35
+ .idea/
36
+ *.swp
37
+
38
+ # Archived internal design/process docs (see docs/BLUEPRINT.md for the public design doc)
@@ -0,0 +1,48 @@
1
+ Metadata-Version: 2.4
2
+ Name: somm-skill
3
+ Version: 0.6.1
4
+ Summary: somm onboarding skill templates for coding agents (Claude, Codex, Cursor, Windsurf)
5
+ Project-URL: Homepage, https://github.com/lavallee/somm
6
+ Project-URL: Repository, https://github.com/lavallee/somm
7
+ Project-URL: Issues, https://github.com/lavallee/somm/issues
8
+ Project-URL: Changelog, https://github.com/lavallee/somm/blob/main/CHANGELOG.md
9
+ Author: Marc Lavallee
10
+ License: MIT
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Classifier: Topic :: Software Development :: Libraries
16
+ Requires-Python: >=3.12
17
+ Description-Content-Type: text/markdown
18
+
19
+ # somm-skill
20
+
21
+ Onboarding guidance for coding agents working in projects that use `somm`.
22
+
23
+ `SKILL.md` is the canonical content — a Claude Code skill. Agent-specific
24
+ variants (Codex, Cursor, Windsurf, …) live under `templates/` and are derived
25
+ from the same core guidance.
26
+
27
+ ## Installation
28
+
29
+ For Claude Code:
30
+
31
+ ```bash
32
+ mkdir -p ~/.claude/skills/somm
33
+ cp packages/somm-skill/SKILL.md ~/.claude/skills/somm/SKILL.md
34
+ ```
35
+
36
+ Or, once `somm mcp install --client=claude-code` lands in D2:
37
+
38
+ ```bash
39
+ somm mcp install --client=claude-code # installs MCP + skill in one step
40
+ ```
41
+
42
+ For other agents:
43
+
44
+ - **Cursor:** copy `templates/cursor.md` into `.cursor/rules/` in your project.
45
+ - **Windsurf:** copy `templates/windsurf.md` into `.windsurf/rules/`.
46
+ - **Codex:** copy `templates/codex.md` into your project's Codex config.
47
+
48
+ (Agent-specific templates ship in D2+; v0.1 has only the canonical `SKILL.md`.)
@@ -0,0 +1,30 @@
1
+ # somm-skill
2
+
3
+ Onboarding guidance for coding agents working in projects that use `somm`.
4
+
5
+ `SKILL.md` is the canonical content — a Claude Code skill. Agent-specific
6
+ variants (Codex, Cursor, Windsurf, …) live under `templates/` and are derived
7
+ from the same core guidance.
8
+
9
+ ## Installation
10
+
11
+ For Claude Code:
12
+
13
+ ```bash
14
+ mkdir -p ~/.claude/skills/somm
15
+ cp packages/somm-skill/SKILL.md ~/.claude/skills/somm/SKILL.md
16
+ ```
17
+
18
+ Or, once `somm mcp install --client=claude-code` lands in D2:
19
+
20
+ ```bash
21
+ somm mcp install --client=claude-code # installs MCP + skill in one step
22
+ ```
23
+
24
+ For other agents:
25
+
26
+ - **Cursor:** copy `templates/cursor.md` into `.cursor/rules/` in your project.
27
+ - **Windsurf:** copy `templates/windsurf.md` into `.windsurf/rules/`.
28
+ - **Codex:** copy `templates/codex.md` into your project's Codex config.
29
+
30
+ (Agent-specific templates ship in D2+; v0.1 has only the canonical `SKILL.md`.)
@@ -0,0 +1,29 @@
1
+ [project]
2
+ name = "somm-skill"
3
+ version = "0.6.1"
4
+ description = "somm onboarding skill templates for coding agents (Claude, Codex, Cursor, Windsurf)"
5
+ requires-python = ">=3.12"
6
+ license = { text = "MIT" }
7
+ readme = "README.md"
8
+ authors = [{ name = "Marc Lavallee" }]
9
+ classifiers = [
10
+ "Development Status :: 4 - Beta",
11
+ "License :: OSI Approved :: MIT License",
12
+ "Programming Language :: Python :: 3.12",
13
+ "Programming Language :: Python :: 3.13",
14
+ "Topic :: Software Development :: Libraries",
15
+ ]
16
+ dependencies = []
17
+
18
+ [project.urls]
19
+ Homepage = "https://github.com/lavallee/somm"
20
+ Repository = "https://github.com/lavallee/somm"
21
+ Issues = "https://github.com/lavallee/somm/issues"
22
+ Changelog = "https://github.com/lavallee/somm/blob/main/CHANGELOG.md"
23
+
24
+ [build-system]
25
+ requires = ["hatchling"]
26
+ build-backend = "hatchling.build"
27
+
28
+ [tool.hatch.build.targets.wheel]
29
+ packages = ["src/somm_skill"]
@@ -0,0 +1,153 @@
1
+ ---
2
+ name: somm
3
+ description: Use when writing or modifying LLM-calling code in a Python project. Guides you to `somm.llm()` instead of raw provider SDKs, keeps telemetry and provenance consistent across projects, and surfaces model recommendations grounded in real local telemetry.
4
+ ---
5
+
6
+ # somm — LLM call guidance for coding agents
7
+
8
+ You are working in a Python project that uses **somm**, a self-hosted LLM
9
+ telemetry + routing layer. This skill ensures the code you write records
10
+ useful telemetry and benefits from somm's intelligence loop.
11
+
12
+ ## When this applies
13
+
14
+ Trigger when you are about to:
15
+ - Call an LLM (chat completion, embedding, structured output, streaming).
16
+ - Add a new LLM-using feature or endpoint.
17
+ - Refactor an existing LLM wrapper in the project.
18
+ - Choose between models or providers.
19
+ - Tune a prompt.
20
+
21
+ ## Rules
22
+
23
+ ### 1. Use `somm.llm()` — not raw provider SDKs
24
+
25
+ ```python
26
+ import somm
27
+
28
+ llm = somm.llm(project="my-project")
29
+ result = llm.generate(
30
+ prompt="Extract contacts from the text below...",
31
+ workload="contact_extract", # required — tags telemetry
32
+ max_tokens=256,
33
+ )
34
+ print(result.text)
35
+ ```
36
+
37
+ Do **not** reach for `anthropic.Anthropic()`, `openai.OpenAI()`, raw `httpx`,
38
+ or provider-specific SDKs directly in project code. somm wraps them with
39
+ telemetry, routing, cost tracking, and provenance for free.
40
+
41
+ ### 2. Tag every call with a `workload`
42
+
43
+ A workload is the *task*, not the call. "extract_contacts_from_article" is a
44
+ workload; "call_anthropic" is not. Use snake_case, lowercase, stable across
45
+ time.
46
+
47
+ Register workloads before use (outside the hot path):
48
+
49
+ ```python
50
+ # run once per workload, at app startup or in a migration
51
+ somm.llm().repo.register_workload(
52
+ name="contact_extract",
53
+ project="my-project",
54
+ description="Pull person names + emails from unstructured text",
55
+ privacy_class=somm.PrivacyClass.INTERNAL,
56
+ )
57
+ ```
58
+
59
+ In `observe` mode (default) somm auto-registers unknown workloads and warns.
60
+ In `strict` mode it raises `SommStrictMode`.
61
+
62
+ ### 3. Stamp provenance on stored data
63
+
64
+ When an LLM result lands in your project's DB, stamp the provenance on the
65
+ row:
66
+
67
+ ```python
68
+ row["llm_provenance"] = {
69
+ "call_id": result.call_id,
70
+ "provider": result.provider,
71
+ "model": result.model,
72
+ "workload": "contact_extract",
73
+ }
74
+ ```
75
+
76
+ This lets you later answer "which model generated this row" without guessing.
77
+
78
+ ### 4. Check outcomes
79
+
80
+ `somm.Outcome` is a typed enum. Use `result.mark()` to tag quality signals:
81
+
82
+ ```python
83
+ data = somm.extract_json(result.text)
84
+ if data is None:
85
+ result.mark(somm.Outcome.BAD_JSON)
86
+ elif not data.get("contacts"):
87
+ result.mark(somm.Outcome.OFF_TASK)
88
+ else:
89
+ result.mark(somm.Outcome.OK)
90
+ ```
91
+
92
+ ### 5. Before choosing a model, ask somm
93
+
94
+ When `somm_recommend` or `somm_advise` is available, call one of them
95
+ before hand-picking a model. somm has telemetry from your real
96
+ workloads + pricing/capability intel from the provider APIs — it knows
97
+ more than your training data does. Do not default to "Claude because
98
+ that's what the user asked for." Ask which model fits the workload's
99
+ cost/quality profile *as of today*.
100
+
101
+ For free-form model advice ("what vision model should I use?",
102
+ "cheapest option for long context?"), the dedicated [sommelier
103
+ skill](./SOMMELIER.md) covers the full recall → advise → record loop
104
+ with cross-project decision memory. Load it when the conversation
105
+ shifts from coding to model choice.
106
+
107
+ ### 6. Streaming and structured output
108
+
109
+ - `llm.stream(prompt, workload=...)` for streamed responses.
110
+ - `llm.extract_structured(prompt, workload=...)` returns `dict | list`,
111
+ handling markdown fences, brace extraction, and provider quirks.
112
+
113
+ Do **not** implement your own JSON repair loop. somm already has one.
114
+
115
+ ### 7. Never ship these patterns
116
+
117
+ - **Raw provider SDK imports** (`from anthropic import ...`) in project code.
118
+ - **Hardcoded model names** outside config — route via workload + provider preference.
119
+ - **Inline retry loops** — routing handles cooldowns and fallback.
120
+ - **Prompt concatenation as strings** for long-lived prompts — use
121
+ `somm.prompt(workload, version="latest")` (D2+) so versions are tracked.
122
+ - **API keys in code or logs** — somm's adapters strip auth headers before
123
+ any telemetry write. Keep it that way.
124
+
125
+ ## When somm-service is running
126
+
127
+ If `somm serve` is running (usually `localhost:7878`), you can link to it in
128
+ PR descriptions or error messages: the dashboard shows the current call's
129
+ place in the workload's rollup. The service is optional — the library works
130
+ without it.
131
+
132
+ ## When the MCP is connected
133
+
134
+ If the user has configured `somm-mcp` in this agent, you can call:
135
+ - `somm_stats` — telemetry roll-up for the current project.
136
+ - `somm_recommend` — model recommendations grounded in local shadow-eval
137
+ data, with cold-start sommelier fallback when data is sparse.
138
+ - `somm_advise` — free-form candidate ranking over `model_intel` +
139
+ capability filters + past decisions. See [SOMMELIER.md](./SOMMELIER.md).
140
+ - `somm_record_decision` / `somm_search_decisions` — cross-project
141
+ advisory memory for model choices.
142
+ - `somm_compare` — run a prompt through N models side-by-side.
143
+ - `somm_replay` — replay a past call against a different model.
144
+
145
+ Call these *before* deciding on a model for new LLM code.
146
+
147
+ ## If you can't use somm
148
+
149
+ If the project intentionally doesn't use somm (e.g., a pre-existing integration
150
+ test harness with its own LLM stub), don't force it. But:
151
+ - Note this in a PR comment so the user can decide later.
152
+ - Still stamp `somm.provenance()`-shaped metadata on stored rows if feasible
153
+ — the schema is self-documenting.
@@ -0,0 +1,170 @@
1
+ ---
2
+ name: somm-sommelier
3
+ description: Use when the user asks for model advice ("what should I use for X", "good vision models", "cheapest option for long-context") or is choosing between providers/models for an LLM workload. Consults somm's local model intel + prior cross-project decisions, presents ranked options with reasoning, and records the outcome so future sessions can build on it.
4
+ ---
5
+
6
+ # somm sommelier — model advisor for coding agents
7
+
8
+ You are helping the user pick a model for a specific use case. somm has
9
+ local model-intel and a cross-project memory of past decisions — use them
10
+ instead of guessing from your training data.
11
+
12
+ This skill assumes `somm-mcp` is configured as an MCP server in this
13
+ agent. The relevant tools are `somm_advise`, `somm_record_decision`,
14
+ `somm_search_decisions`, plus the existing `somm_recommend`,
15
+ `somm_compare`, and `somm_stats`.
16
+
17
+ ## The loop
18
+
19
+ Three phases: **recall → advise → record**.
20
+
21
+ ### 1. Recall
22
+
23
+ Before recommending anything, call `somm_search_decisions` with the
24
+ user's question. If somm already has a decision for a substantively
25
+ similar question — from any project — surface it first. The user may
26
+ want to reuse, revisit, or supersede it.
27
+
28
+ ```
29
+ somm_search_decisions(
30
+ question="good free vision models on openrouter",
31
+ scope="global", # decisions mirror globally by default
32
+ limit=5,
33
+ )
34
+ ```
35
+
36
+ Do not treat past decisions as authoritative — model intel changes. But
37
+ *do* acknowledge them: "Last time in `captionapp` (2026-04-10) we picked
38
+ `openrouter/gemma-3-27b-it:free` because … — still relevant?"
39
+
40
+ ### 2. Advise
41
+
42
+ Call `somm_advise` with the user's constraints extracted from the
43
+ conversation:
44
+
45
+ ```
46
+ somm_advise(
47
+ question="good free vision models on openrouter",
48
+ capabilities=["vision"],
49
+ providers=["openrouter"],
50
+ required_output_modalities=["text"], # for captioning — excludes audio-gen
51
+ free_only=True,
52
+ workload="critique_visual", # optional — boosts ranking with shadow-eval scores
53
+ limit=8,
54
+ )
55
+ ```
56
+
57
+ The response carries ranked candidates with `reasons` — a list of
58
+ human-readable factors the sommelier weighed. Present them verbatim
59
+ rather than restating in your own words; the reasons are
60
+ tokenisation-light and calibrated.
61
+
62
+ **0.2.2 constraint knobs (all optional):**
63
+
64
+ - `required_output_modalities` — drop candidates whose output modality
65
+ isn't in this set. Pass `["text"]` for captioning / QA workloads to
66
+ exclude audio-gen or image-gen models that happen to accept image
67
+ inputs. `["image"]` for generation.
68
+ - `exclude_models` — fnmatch-style patterns against
69
+ `"<provider>/<model>"`. Inline blocklist when you hit a bad candidate
70
+ without waiting for a release.
71
+ - `include_meta_routers` — off by default. Router meta-models
72
+ (`openrouter/auto`, `openrouter/free`) pick a backend at inference
73
+ time, so they're non-deterministic and inherit capability claims from
74
+ whatever they route to. Opt in only when you specifically want one.
75
+ - `unknown_capability_penalty` — `0.9` by default. Models with
76
+ confirmed capabilities outrank models where we can't confirm. Set to
77
+ `1.0` to preserve pre-0.2.2 behavior (unknown == known-yes).
78
+
79
+ **Guidelines for turning the response into a conversation:**
80
+
81
+ - Show the top 3 with their reasons, not all 8.
82
+ - When shadow-eval data exists (`shadow_score` is not null), lead with
83
+ that — it's the only candidate-level quality signal somm has.
84
+ - If `prior_decisions` came back, cite them alongside the live
85
+ candidates: "Candidate X matches what we picked in project Y." Note
86
+ that as of 0.2.2 the sommelier also *weighs* matching priors into the
87
+ score — you'll see `prior(<project> <date>): chose — ×1.10` or
88
+ `flagged — ×0.50` in the reasons list. Weight decays with age
89
+ (half-life ~90 days).
90
+ - If `candidates` is empty, read `note` — 0.2.2 returns a filter
91
+ breakdown like "Filtered out: 3 wrong output modality, 2 meta-router"
92
+ that tells you exactly which constraint to loosen.
93
+
94
+ ### 3. Record
95
+
96
+ When the user commits to a choice ("let's go with X", "do that"), call
97
+ `somm_record_decision` immediately. Do NOT wait for the end of the
98
+ session; decisions are the advisory memory other sessions inherit.
99
+
100
+ ```
101
+ somm_record_decision(
102
+ question="good free vision models on openrouter",
103
+ rationale="Picked gemma-3-27b because it has the biggest context
104
+ window among free vision models and matches the
105
+ chart-critique workload profile",
106
+ candidates=<candidates list from somm_advise, verbatim>,
107
+ chosen_provider="openrouter",
108
+ chosen_model="google/gemma-3-27b-it:free",
109
+ workload="critique_visual",
110
+ constraints=<constraints dict from somm_advise response>,
111
+ agent="claude-code",
112
+ )
113
+ ```
114
+
115
+ Rationale is the single most valuable field. Keep it short (≤ 3
116
+ sentences) and capture the *reason* (not just the summary — "picked X
117
+ because Y beats Z on …"). Future sessions read this.
118
+
119
+ ## When NOT to use sommelier
120
+
121
+ - **Hard user intent.** If the user says "use Claude Haiku", don't
122
+ pop up a candidate list. Just record a decision with rationale
123
+ "user-specified" if it's a new choice worth remembering.
124
+ - **Inside a hot loop.** Sommelier is for choices, not for every call.
125
+ Once a workload has a chosen model, put it in the workload's default
126
+ and move on.
127
+ - **Private workloads.** Decisions themselves don't leak prompt bodies,
128
+ but if the *question text* contains sensitive info, phrase the
129
+ question abstractly ("pick a model for sensitive-workload-X") rather
130
+ than verbatim.
131
+
132
+ ## Retrospectives
133
+
134
+ If the user reports back later ("the gemma-3 pick wasn't great — kept
135
+ missing colour contrasts"), search for the original decision and call
136
+ `mark_decision_outcome` (surfaced via `somm_record_decision` with
137
+ `supersedes=<id>` once that lands, or via the repository directly in
138
+ the interim). A retrospective note on a decision is how the memory
139
+ self-corrects.
140
+
141
+ ## Example conversation shape
142
+
143
+ > **User:** I need to add chart screenshots to the critique flow. What
144
+ > vision model should I use? Free would be ideal.
145
+ >
146
+ > **Agent:** *(calls `somm_search_decisions(question="vision model for
147
+ > chart critique", scope="global")`)* — no prior decisions found.
148
+ >
149
+ > *(calls `somm_advise(question="...", capabilities=["vision"], free_only=True)`)*
150
+ >
151
+ > Here are the three strongest candidates somm knows about right now:
152
+ >
153
+ > - `openrouter/google/gemma-3-27b-it:free` — free, 128k ctx, vision✓, seen today
154
+ > - `openrouter/meta-llama/llama-3.2-11b-vision-instruct:free` — free,
155
+ > 128k ctx, vision✓
156
+ > - `ollama/llava:13b` — free (local), vision✓
157
+ >
158
+ > The gemma option has the widest context window, which helps if the
159
+ > chart comes with a lot of surrounding text. The ollama option keeps
160
+ > the data on your machine — which matters if the workload might end up
161
+ > `privacy_class=PRIVATE`. Which trade-off fits?
162
+ >
163
+ > **User:** Let's try gemma-3 first.
164
+ >
165
+ > **Agent:** *(calls `somm_record_decision(...)` with rationale "picked
166
+ > gemma-3-27b for chart critique — biggest free vision context; keep
167
+ > llava as fallback if privacy constraints tighten")*
168
+ >
169
+ > Recorded. Decision id: `<uuid>`. This will surface next time
170
+ > anyone — in this project or another — asks a similar question.
@@ -0,0 +1 @@
1
+ """somm-skill — onboarding skill content + templates for coding agents."""
File without changes