somm 0.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. somm-0.6.1/.gitignore +38 -0
  2. somm-0.6.1/PKG-INFO +44 -0
  3. somm-0.6.1/README.md +20 -0
  4. somm-0.6.1/pyproject.toml +39 -0
  5. somm-0.6.1/src/somm/__init__.py +36 -0
  6. somm-0.6.1/src/somm/capabilities.py +296 -0
  7. somm-0.6.1/src/somm/cli.py +1026 -0
  8. somm-0.6.1/src/somm/client.py +1464 -0
  9. somm-0.6.1/src/somm/compat/__init__.py +53 -0
  10. somm-0.6.1/src/somm/compat/generic.py +161 -0
  11. somm-0.6.1/src/somm/compat/openai_compat.py +177 -0
  12. somm-0.6.1/src/somm/errors.py +177 -0
  13. somm-0.6.1/src/somm/hooks.py +105 -0
  14. somm-0.6.1/src/somm/plan_governor.py +75 -0
  15. somm-0.6.1/src/somm/prompts.py +170 -0
  16. somm-0.6.1/src/somm/provenance.py +36 -0
  17. somm-0.6.1/src/somm/providers/__init__.py +46 -0
  18. somm-0.6.1/src/somm/providers/_openai_compat.py +492 -0
  19. somm-0.6.1/src/somm/providers/anthropic.py +234 -0
  20. somm-0.6.1/src/somm/providers/base.py +124 -0
  21. somm-0.6.1/src/somm/providers/claude_cli.py +112 -0
  22. somm-0.6.1/src/somm/providers/codex_cli.py +97 -0
  23. somm-0.6.1/src/somm/providers/deepseek.py +44 -0
  24. somm-0.6.1/src/somm/providers/gemini.py +73 -0
  25. somm-0.6.1/src/somm/providers/minimax.py +29 -0
  26. somm-0.6.1/src/somm/providers/ollama.py +293 -0
  27. somm-0.6.1/src/somm/providers/openai.py +17 -0
  28. somm-0.6.1/src/somm/providers/openrouter.py +296 -0
  29. somm-0.6.1/src/somm/providers/perplexity.py +59 -0
  30. somm-0.6.1/src/somm/py.typed +0 -0
  31. somm-0.6.1/src/somm/routing.py +376 -0
  32. somm-0.6.1/src/somm/slots.py +84 -0
  33. somm-0.6.1/src/somm/sommelier.py +831 -0
  34. somm-0.6.1/src/somm/telemetry.py +241 -0
  35. somm-0.6.1/tests/test_budget_gate.py +180 -0
  36. somm-0.6.1/tests/test_cli.py +250 -0
  37. somm-0.6.1/tests/test_cli_spend.py +252 -0
  38. somm-0.6.1/tests/test_compat.py +247 -0
  39. somm-0.6.1/tests/test_cross_project.py +215 -0
  40. somm-0.6.1/tests/test_embed.py +229 -0
  41. somm-0.6.1/tests/test_hooks.py +129 -0
  42. somm-0.6.1/tests/test_inprocess_workers.py +95 -0
  43. somm-0.6.1/tests/test_library_ext.py +945 -0
  44. somm-0.6.1/tests/test_multimodal.py +336 -0
  45. somm-0.6.1/tests/test_no_fallback.py +174 -0
  46. somm-0.6.1/tests/test_openrouter.py +256 -0
  47. somm-0.6.1/tests/test_outcome_classification.py +65 -0
  48. somm-0.6.1/tests/test_parse_helpers.py +88 -0
  49. somm-0.6.1/tests/test_perplexity_provider.py +89 -0
  50. somm-0.6.1/tests/test_pricing_utils.py +18 -0
  51. somm-0.6.1/tests/test_provider_adapters.py +1180 -0
  52. somm-0.6.1/tests/test_routing.py +318 -0
  53. somm-0.6.1/tests/test_smoke.py +383 -0
  54. somm-0.6.1/tests/test_sommelier.py +586 -0
  55. somm-0.6.1/tests/test_streaming.py +315 -0
  56. somm-0.6.1/tests/test_workload_frontier.py +179 -0
  57. somm-0.6.1/tests/test_writer_atexit.py +139 -0
somm-0.6.1/.gitignore ADDED
@@ -0,0 +1,38 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.egg-info/
6
+ *.egg
7
+ build/
8
+ dist/
9
+
10
+ # Environments
11
+ .venv/
12
+ .env
13
+ .env.*
14
+
15
+ # Tooling caches
16
+ .pytest_cache/
17
+ .mypy_cache/
18
+ .ruff_cache/
19
+
20
+ # Local Claude session id log (per-machine, not source of truth)
21
+ sessions.txt
22
+
23
+ # Local data (never commit telemetry)
24
+ .somm/
25
+ *.sqlite
26
+ *.sqlite-wal
27
+ *.sqlite-shm
28
+
29
+ # Author-local notes not for open source
30
+ notes/
31
+ .claude/
32
+
33
+ # Editor
34
+ .vscode/
35
+ .idea/
36
+ *.swp
37
+
38
+ # Archived internal design/process docs (see docs/BLUEPRINT.md for the public design doc)
somm-0.6.1/PKG-INFO ADDED
@@ -0,0 +1,44 @@
1
+ Metadata-Version: 2.4
2
+ Name: somm
3
+ Version: 0.6.1
4
+ Summary: somm — self-hosted LLM telemetry, routing, and intelligence loop (library)
5
+ Project-URL: Homepage, https://github.com/lavallee/somm
6
+ Project-URL: Repository, https://github.com/lavallee/somm
7
+ Project-URL: Issues, https://github.com/lavallee/somm/issues
8
+ Project-URL: Changelog, https://github.com/lavallee/somm/blob/main/CHANGELOG.md
9
+ Author: Marc Lavallee
10
+ License: MIT
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Classifier: Topic :: Software Development :: Libraries
16
+ Requires-Python: >=3.12
17
+ Requires-Dist: httpx>=0.27
18
+ Requires-Dist: somm-core==0.6.1
19
+ Provides-Extra: litellm
20
+ Requires-Dist: litellm>=1.50; extra == 'litellm'
21
+ Provides-Extra: tokenizers
22
+ Requires-Dist: tiktoken>=0.7; extra == 'tokenizers'
23
+ Description-Content-Type: text/markdown
24
+
25
+ # somm
26
+
27
+ **Self-hosted LLM telemetry, routing, and intelligence loop.**
28
+
29
+ The main library: `SommLLM` — one call wraps telemetry, provider routing
30
+ across ten providers, tool calling, streaming, embeddings, multimodal
31
+ dispatch, cost tracking, budget gates, online evaluation, and
32
+ cross-project model memory (the sommelier). Zero-config, privacy-first,
33
+ no phone-home.
34
+
35
+ ```python
36
+ import somm
37
+
38
+ llm = somm.llm(project="my_app")
39
+ result = llm.generate(prompt="Reply with exactly: pong", workload="ping")
40
+ print(result.text, result.provider, result.cost_usd)
41
+ ```
42
+
43
+ Full documentation, design docs, and examples live in the
44
+ [somm repository](https://github.com/lavallee/somm).
somm-0.6.1/README.md ADDED
@@ -0,0 +1,20 @@
1
+ # somm
2
+
3
+ **Self-hosted LLM telemetry, routing, and intelligence loop.**
4
+
5
+ The main library: `SommLLM` — one call wraps telemetry, provider routing
6
+ across ten providers, tool calling, streaming, embeddings, multimodal
7
+ dispatch, cost tracking, budget gates, online evaluation, and
8
+ cross-project model memory (the sommelier). Zero-config, privacy-first,
9
+ no phone-home.
10
+
11
+ ```python
12
+ import somm
13
+
14
+ llm = somm.llm(project="my_app")
15
+ result = llm.generate(prompt="Reply with exactly: pong", workload="ping")
16
+ print(result.text, result.provider, result.cost_usd)
17
+ ```
18
+
19
+ Full documentation, design docs, and examples live in the
20
+ [somm repository](https://github.com/lavallee/somm).
@@ -0,0 +1,39 @@
1
+ [project]
2
+ name = "somm"
3
+ version = "0.6.1"
4
+ description = "somm — self-hosted LLM telemetry, routing, and intelligence loop (library)"
5
+ requires-python = ">=3.12"
6
+ license = { text = "MIT" }
7
+ readme = "README.md"
8
+ authors = [{ name = "Marc Lavallee" }]
9
+ classifiers = [
10
+ "Development Status :: 4 - Beta",
11
+ "License :: OSI Approved :: MIT License",
12
+ "Programming Language :: Python :: 3.12",
13
+ "Programming Language :: Python :: 3.13",
14
+ "Topic :: Software Development :: Libraries",
15
+ ]
16
+ dependencies = [
17
+ "somm-core==0.6.1",
18
+ "httpx>=0.27",
19
+ ]
20
+
21
+ [project.optional-dependencies]
22
+ litellm = ["litellm>=1.50"]
23
+ tokenizers = ["tiktoken>=0.7"]
24
+
25
+ [project.scripts]
26
+ somm = "somm.cli:main"
27
+
28
+ [project.urls]
29
+ Homepage = "https://github.com/lavallee/somm"
30
+ Repository = "https://github.com/lavallee/somm"
31
+ Issues = "https://github.com/lavallee/somm/issues"
32
+ Changelog = "https://github.com/lavallee/somm/blob/main/CHANGELOG.md"
33
+
34
+ [build-system]
35
+ requires = ["hatchling"]
36
+ build-backend = "hatchling.build"
37
+
38
+ [tool.hatch.build.targets.wheel]
39
+ packages = ["src/somm"]
@@ -0,0 +1,36 @@
1
+ """somm — self-hosted LLM telemetry, routing, and intelligence loop."""
2
+
3
+ from somm_core import EmbedResult, Outcome, PrivacyClass, SommResult
4
+ from somm_core.parse import extract_json
5
+
6
+ from somm import hooks
7
+ from somm.client import SommLLM, llm
8
+ from somm.errors import (
9
+ SommBadRequest,
10
+ SommBudgetExceeded,
11
+ SommError,
12
+ SommNoCapableProvider,
13
+ SommPrivacyViolation,
14
+ SommProvidersExhausted,
15
+ SommStrictMode,
16
+ )
17
+ from somm.provenance import provenance
18
+
19
+ __all__ = [
20
+ "SommLLM",
21
+ "llm",
22
+ "hooks",
23
+ "provenance",
24
+ "extract_json",
25
+ "EmbedResult",
26
+ "Outcome",
27
+ "PrivacyClass",
28
+ "SommResult",
29
+ "SommError",
30
+ "SommBadRequest",
31
+ "SommBudgetExceeded",
32
+ "SommNoCapableProvider",
33
+ "SommPrivacyViolation",
34
+ "SommProvidersExhausted",
35
+ "SommStrictMode",
36
+ ]
@@ -0,0 +1,296 @@
1
+ """Capability lookup against `model_intel.capabilities_json`.
2
+
3
+ The router consults this to skip (provider, model) pairs that can't serve a
4
+ request's required capabilities *before* making the network call. Unknown
5
+ models fall through as capable (same behavior as pre-capability somm).
6
+
7
+ Capability sources per provider:
8
+
9
+ - **OpenRouter**: populates `modality` (e.g. `"text+image->text"`) and
10
+ `architecture` (incl. `input_modalities`) on every model. We derive
11
+ `vision` from either field.
12
+ - **Anthropic / OpenAI**: no list-models API for pricing, so capabilities
13
+ come from the static pricing seed. Vision is inferred from model name
14
+ as a conservative starter — opus/sonnet/haiku 4.x and gpt-4o* support
15
+ images natively.
16
+ - **Ollama**: family-based inference; `llava`, `bakllava`, `llama3.2-vision`
17
+ and similar carry vision. Unknown models fall through as capable.
18
+ - **Minimax**: single default model; treated as capability-unknown → allow.
19
+
20
+ Adding new capability tokens (`tool_use`, `json_mode`, `thinking`, …) is
21
+ just a matter of teaching `model_has_capability` to look them up — no
22
+ schema change needed.
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import json
28
+ from typing import TYPE_CHECKING
29
+
30
+ if TYPE_CHECKING:
31
+ from somm_core.repository import Repository
32
+
33
+
34
+ _VISION_NAME_HINTS: tuple[str, ...] = (
35
+ "claude-opus-4",
36
+ "claude-sonnet-4",
37
+ "claude-haiku-4",
38
+ "gpt-4o",
39
+ "gpt-4.1",
40
+ "gpt-5",
41
+ "llava",
42
+ "bakllava",
43
+ "vision",
44
+ "gemini",
45
+ )
46
+
47
+
48
+ # Models that consume "thinking" / reasoning tokens before emitting visible
49
+ # output. Workloads that declare ``capabilities_required=["thinking"]`` get
50
+ # routed to one of these; mechanical workloads (copyedit, simple polish)
51
+ # explicitly omit it and get steered to a non-thinking variant.
52
+ #
53
+ # Inverse: NON-thinking variants of these families exist (deepseek-v4-flash,
54
+ # claude-sonnet-4-6 without extended thinking, gemini-2.5-flash). The
55
+ # `_NON_THINKING_NAME_HINTS` list below excludes models that look like they
56
+ # want thinking but explicitly don't.
57
+ _THINKING_NAME_HINTS: tuple[str, ...] = (
58
+ "deepseek-v4-pro",
59
+ "deepseek-reasoner",
60
+ "claude-opus-4-7", # Opus families default to extended thinking
61
+ "claude-opus-4-6",
62
+ "o1-", # OpenAI o1 family
63
+ "o3-", # OpenAI o3 family
64
+ "gemini-2.5-pro",
65
+ "gemini-3-pro",
66
+ "qwq-",
67
+ "magistral",
68
+ )
69
+
70
+ _NON_THINKING_NAME_HINTS: tuple[str, ...] = (
71
+ "-flash",
72
+ "-mini",
73
+ "-haiku", # Haiku family is non-thinking by default
74
+ "deepseek-chat",
75
+ "deepseek-coder",
76
+ "gpt-4o",
77
+ "gpt-4.1",
78
+ )
79
+
80
+
81
+ # Model families that support function/tool calling. Workloads that declare
82
+ # ``capabilities_required=["tools"]`` (e.g. deepagents orchestrators) route
83
+ # only to these. Tool calling is near-universal across modern frontier and
84
+ # mid-tier models, so the list is generous and unknown models fall through
85
+ # as capable (None) rather than being blocked — there is no negative case.
86
+ # Adapters that *can't* serve tools raise SommBadRequest at call time
87
+ # instead of being filtered here.
88
+ _TOOLS_NAME_HINTS: tuple[str, ...] = (
89
+ "claude-3",
90
+ "claude-opus-4",
91
+ "claude-sonnet-4",
92
+ "claude-haiku-4",
93
+ "gpt-4", # gpt-4o, gpt-4.1, gpt-4-turbo …
94
+ "gpt-5",
95
+ "o1-",
96
+ "o3-",
97
+ "o4-",
98
+ "gemini-1.5",
99
+ "gemini-2",
100
+ "gemini-3",
101
+ "llama-3.1",
102
+ "llama-3.2",
103
+ "llama-3.3",
104
+ "llama3.1",
105
+ "llama3.2",
106
+ "llama3.3",
107
+ "llama-4",
108
+ "qwen2.5",
109
+ "qwen3",
110
+ "deepseek-chat",
111
+ "deepseek-v3",
112
+ "deepseek-v4",
113
+ "mistral",
114
+ "mixtral",
115
+ )
116
+
117
+
118
+ def _openrouter_has_vision(caps: dict) -> bool:
119
+ modality = caps.get("modality") or ""
120
+ if isinstance(modality, str) and "image" in modality.lower():
121
+ return True
122
+ arch = caps.get("architecture") or {}
123
+ if isinstance(arch, dict):
124
+ inputs = arch.get("input_modalities") or []
125
+ if isinstance(inputs, list) and any(
126
+ isinstance(m, str) and "image" in m.lower() for m in inputs
127
+ ):
128
+ return True
129
+ return False
130
+
131
+
132
+ def model_has_capability(
133
+ repo: Repository,
134
+ provider: str,
135
+ model: str,
136
+ capability: str,
137
+ ) -> bool | None:
138
+ """Return True/False if we know, None if capability data is unavailable.
139
+
140
+ Callers treat `None` as "allow — let the provider try" per the
141
+ capability-aware routing proposal.
142
+ """
143
+ if not capability:
144
+ return True
145
+
146
+ with repo._open() as conn:
147
+ row = conn.execute(
148
+ "SELECT capabilities_json FROM model_intel "
149
+ "WHERE provider = ? AND model = ?",
150
+ (provider, model),
151
+ ).fetchone()
152
+
153
+ caps: dict | None = None
154
+ if row and row[0]:
155
+ try:
156
+ parsed = json.loads(row[0])
157
+ if isinstance(parsed, dict):
158
+ caps = parsed
159
+ except json.JSONDecodeError:
160
+ caps = None
161
+
162
+ # Explicit per-capability flag wins if present ({"vision": true}).
163
+ if caps is not None and capability in caps and isinstance(caps[capability], bool):
164
+ return caps[capability]
165
+
166
+ if capability == "vision":
167
+ if provider == "openrouter" and caps is not None:
168
+ return _openrouter_has_vision(caps)
169
+ lowered = model.lower()
170
+ if any(h in lowered for h in _VISION_NAME_HINTS):
171
+ return True
172
+ if provider == "ollama":
173
+ fam = (caps or {}).get("family") if caps is not None else None
174
+ if isinstance(fam, str) and any(h in fam.lower() for h in _VISION_NAME_HINTS):
175
+ return True
176
+ # Unknown ollama model → capability-unknown, let it try.
177
+ return None
178
+ if caps is None:
179
+ return None
180
+ return False
181
+
182
+ if capability == "thinking":
183
+ # Thinking-tier models reason before emitting visible text. Routing
184
+ # workloads that declare needs_thinking="yes" to one of these is the
185
+ # difference between a calibrated answer and an empty response (the
186
+ # 8K-budget all-eaten-by-reasoning failure mode from 2026-05-06).
187
+ lowered = model.lower()
188
+ if any(h in lowered for h in _NON_THINKING_NAME_HINTS):
189
+ return False
190
+ if any(h in lowered for h in _THINKING_NAME_HINTS):
191
+ return True
192
+ return None # unknown — let the provider try
193
+
194
+ if capability == "tools":
195
+ # Function/tool calling. Known tool-capable families return True;
196
+ # everything else falls through as None (allow) — never False, since
197
+ # a provider that genuinely can't serve tools raises SommBadRequest
198
+ # at call time rather than being pre-filtered here.
199
+ lowered = model.lower()
200
+ if any(h in lowered for h in _TOOLS_NAME_HINTS):
201
+ return True
202
+ return None
203
+
204
+ if capability == "non-thinking":
205
+ # Inverse: workloads that DON'T need thinking explicitly steer away
206
+ # from reasoning models, which would burn budget on a mechanical task.
207
+ # E.g. a copyeditor pass on already-clean prose doesn't need v4-pro.
208
+ lowered = model.lower()
209
+ if any(h in lowered for h in _NON_THINKING_NAME_HINTS):
210
+ return True
211
+ if any(h in lowered for h in _THINKING_NAME_HINTS):
212
+ return False
213
+ return None
214
+
215
+ # Unknown capability — don't block.
216
+ return None
217
+
218
+
219
+ def provider_can_serve(
220
+ repo: Repository,
221
+ provider: str,
222
+ model: str,
223
+ required: list[str],
224
+ ) -> tuple[bool, str]:
225
+ """Return (ok, reason). Reason is empty on ok=True."""
226
+ for cap in required:
227
+ verdict = model_has_capability(repo, provider, model, cap)
228
+ if verdict is False:
229
+ return False, f"missing_capability:{cap}"
230
+ return True, ""
231
+
232
+
233
+ def model_output_modalities(
234
+ repo: Repository,
235
+ provider: str,
236
+ model: str,
237
+ ) -> list[str] | None:
238
+ """Return the set of output modalities this model can produce, or None
239
+ when we have no signal.
240
+
241
+ Signal sources, in order of preference:
242
+ 1. OpenRouter `architecture.output_modalities` — list of strings.
243
+ 2. OpenRouter `modality` scalar (`"text+image->text"`) — parse the
244
+ right-hand side.
245
+ 3. HuggingFace `hf.output_modalities` — set by the HF intel worker
246
+ from `pipeline_tag`.
247
+
248
+ Returns lowercased modality tokens (`"text"`, `"image"`, `"audio"`,
249
+ `"video"`, `"embedding"`). Callers that want to filter for "outputs
250
+ text" should check membership against a requested set.
251
+ """
252
+ import json
253
+
254
+ with repo._open() as conn:
255
+ row = conn.execute(
256
+ "SELECT capabilities_json FROM model_intel "
257
+ "WHERE provider = ? AND model = ?",
258
+ (provider, model),
259
+ ).fetchone()
260
+
261
+ if not row or not row[0]:
262
+ return None
263
+ try:
264
+ caps = json.loads(row[0])
265
+ except json.JSONDecodeError:
266
+ return None
267
+ if not isinstance(caps, dict):
268
+ return None
269
+
270
+ # 1. Direct OpenRouter architecture.output_modalities
271
+ arch = caps.get("architecture") or {}
272
+ if isinstance(arch, dict):
273
+ out = arch.get("output_modalities")
274
+ if isinstance(out, list) and out:
275
+ normalised = [m.lower() for m in out if isinstance(m, str)]
276
+ if normalised:
277
+ return normalised
278
+
279
+ # 2. OpenRouter scalar modality "in+out->out"
280
+ modality = caps.get("modality")
281
+ if isinstance(modality, str) and "->" in modality:
282
+ _, _, after = modality.partition("->")
283
+ parts = [p.strip().lower() for p in after.split("+") if p.strip()]
284
+ if parts:
285
+ return parts
286
+
287
+ # 3. HuggingFace enrichment
288
+ hf = caps.get("hf") or {}
289
+ if isinstance(hf, dict):
290
+ out = hf.get("output_modalities")
291
+ if isinstance(out, list) and out:
292
+ normalised = [m.lower() for m in out if isinstance(m, str)]
293
+ if normalised:
294
+ return normalised
295
+
296
+ return None