somm 0.6.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- somm-0.6.1/.gitignore +38 -0
- somm-0.6.1/PKG-INFO +44 -0
- somm-0.6.1/README.md +20 -0
- somm-0.6.1/pyproject.toml +39 -0
- somm-0.6.1/src/somm/__init__.py +36 -0
- somm-0.6.1/src/somm/capabilities.py +296 -0
- somm-0.6.1/src/somm/cli.py +1026 -0
- somm-0.6.1/src/somm/client.py +1464 -0
- somm-0.6.1/src/somm/compat/__init__.py +53 -0
- somm-0.6.1/src/somm/compat/generic.py +161 -0
- somm-0.6.1/src/somm/compat/openai_compat.py +177 -0
- somm-0.6.1/src/somm/errors.py +177 -0
- somm-0.6.1/src/somm/hooks.py +105 -0
- somm-0.6.1/src/somm/plan_governor.py +75 -0
- somm-0.6.1/src/somm/prompts.py +170 -0
- somm-0.6.1/src/somm/provenance.py +36 -0
- somm-0.6.1/src/somm/providers/__init__.py +46 -0
- somm-0.6.1/src/somm/providers/_openai_compat.py +492 -0
- somm-0.6.1/src/somm/providers/anthropic.py +234 -0
- somm-0.6.1/src/somm/providers/base.py +124 -0
- somm-0.6.1/src/somm/providers/claude_cli.py +112 -0
- somm-0.6.1/src/somm/providers/codex_cli.py +97 -0
- somm-0.6.1/src/somm/providers/deepseek.py +44 -0
- somm-0.6.1/src/somm/providers/gemini.py +73 -0
- somm-0.6.1/src/somm/providers/minimax.py +29 -0
- somm-0.6.1/src/somm/providers/ollama.py +293 -0
- somm-0.6.1/src/somm/providers/openai.py +17 -0
- somm-0.6.1/src/somm/providers/openrouter.py +296 -0
- somm-0.6.1/src/somm/providers/perplexity.py +59 -0
- somm-0.6.1/src/somm/py.typed +0 -0
- somm-0.6.1/src/somm/routing.py +376 -0
- somm-0.6.1/src/somm/slots.py +84 -0
- somm-0.6.1/src/somm/sommelier.py +831 -0
- somm-0.6.1/src/somm/telemetry.py +241 -0
- somm-0.6.1/tests/test_budget_gate.py +180 -0
- somm-0.6.1/tests/test_cli.py +250 -0
- somm-0.6.1/tests/test_cli_spend.py +252 -0
- somm-0.6.1/tests/test_compat.py +247 -0
- somm-0.6.1/tests/test_cross_project.py +215 -0
- somm-0.6.1/tests/test_embed.py +229 -0
- somm-0.6.1/tests/test_hooks.py +129 -0
- somm-0.6.1/tests/test_inprocess_workers.py +95 -0
- somm-0.6.1/tests/test_library_ext.py +945 -0
- somm-0.6.1/tests/test_multimodal.py +336 -0
- somm-0.6.1/tests/test_no_fallback.py +174 -0
- somm-0.6.1/tests/test_openrouter.py +256 -0
- somm-0.6.1/tests/test_outcome_classification.py +65 -0
- somm-0.6.1/tests/test_parse_helpers.py +88 -0
- somm-0.6.1/tests/test_perplexity_provider.py +89 -0
- somm-0.6.1/tests/test_pricing_utils.py +18 -0
- somm-0.6.1/tests/test_provider_adapters.py +1180 -0
- somm-0.6.1/tests/test_routing.py +318 -0
- somm-0.6.1/tests/test_smoke.py +383 -0
- somm-0.6.1/tests/test_sommelier.py +586 -0
- somm-0.6.1/tests/test_streaming.py +315 -0
- somm-0.6.1/tests/test_workload_frontier.py +179 -0
- somm-0.6.1/tests/test_writer_atexit.py +139 -0
somm-0.6.1/.gitignore
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.egg-info/
|
|
6
|
+
*.egg
|
|
7
|
+
build/
|
|
8
|
+
dist/
|
|
9
|
+
|
|
10
|
+
# Environments
|
|
11
|
+
.venv/
|
|
12
|
+
.env
|
|
13
|
+
.env.*
|
|
14
|
+
|
|
15
|
+
# Tooling caches
|
|
16
|
+
.pytest_cache/
|
|
17
|
+
.mypy_cache/
|
|
18
|
+
.ruff_cache/
|
|
19
|
+
|
|
20
|
+
# Local Claude session id log (per-machine, not source of truth)
|
|
21
|
+
sessions.txt
|
|
22
|
+
|
|
23
|
+
# Local data (never commit telemetry)
|
|
24
|
+
.somm/
|
|
25
|
+
*.sqlite
|
|
26
|
+
*.sqlite-wal
|
|
27
|
+
*.sqlite-shm
|
|
28
|
+
|
|
29
|
+
# Author-local notes not for open source
|
|
30
|
+
notes/
|
|
31
|
+
.claude/
|
|
32
|
+
|
|
33
|
+
# Editor
|
|
34
|
+
.vscode/
|
|
35
|
+
.idea/
|
|
36
|
+
*.swp
|
|
37
|
+
|
|
38
|
+
# Archived internal design/process docs (see docs/BLUEPRINT.md for the public design doc)
|
somm-0.6.1/PKG-INFO
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: somm
|
|
3
|
+
Version: 0.6.1
|
|
4
|
+
Summary: somm — self-hosted LLM telemetry, routing, and intelligence loop (library)
|
|
5
|
+
Project-URL: Homepage, https://github.com/lavallee/somm
|
|
6
|
+
Project-URL: Repository, https://github.com/lavallee/somm
|
|
7
|
+
Project-URL: Issues, https://github.com/lavallee/somm/issues
|
|
8
|
+
Project-URL: Changelog, https://github.com/lavallee/somm/blob/main/CHANGELOG.md
|
|
9
|
+
Author: Marc Lavallee
|
|
10
|
+
License: MIT
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
16
|
+
Requires-Python: >=3.12
|
|
17
|
+
Requires-Dist: httpx>=0.27
|
|
18
|
+
Requires-Dist: somm-core==0.6.1
|
|
19
|
+
Provides-Extra: litellm
|
|
20
|
+
Requires-Dist: litellm>=1.50; extra == 'litellm'
|
|
21
|
+
Provides-Extra: tokenizers
|
|
22
|
+
Requires-Dist: tiktoken>=0.7; extra == 'tokenizers'
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
# somm
|
|
26
|
+
|
|
27
|
+
**Self-hosted LLM telemetry, routing, and intelligence loop.**
|
|
28
|
+
|
|
29
|
+
The main library: `SommLLM` — one call wraps telemetry, provider routing
|
|
30
|
+
across ten providers, tool calling, streaming, embeddings, multimodal
|
|
31
|
+
dispatch, cost tracking, budget gates, online evaluation, and
|
|
32
|
+
cross-project model memory (the sommelier). Zero-config, privacy-first,
|
|
33
|
+
no phone-home.
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
import somm
|
|
37
|
+
|
|
38
|
+
llm = somm.llm(project="my_app")
|
|
39
|
+
result = llm.generate(prompt="Reply with exactly: pong", workload="ping")
|
|
40
|
+
print(result.text, result.provider, result.cost_usd)
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Full documentation, design docs, and examples live in the
|
|
44
|
+
[somm repository](https://github.com/lavallee/somm).
|
somm-0.6.1/README.md
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# somm
|
|
2
|
+
|
|
3
|
+
**Self-hosted LLM telemetry, routing, and intelligence loop.**
|
|
4
|
+
|
|
5
|
+
The main library: `SommLLM` — one call wraps telemetry, provider routing
|
|
6
|
+
across ten providers, tool calling, streaming, embeddings, multimodal
|
|
7
|
+
dispatch, cost tracking, budget gates, online evaluation, and
|
|
8
|
+
cross-project model memory (the sommelier). Zero-config, privacy-first,
|
|
9
|
+
no phone-home.
|
|
10
|
+
|
|
11
|
+
```python
|
|
12
|
+
import somm
|
|
13
|
+
|
|
14
|
+
llm = somm.llm(project="my_app")
|
|
15
|
+
result = llm.generate(prompt="Reply with exactly: pong", workload="ping")
|
|
16
|
+
print(result.text, result.provider, result.cost_usd)
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Full documentation, design docs, and examples live in the
|
|
20
|
+
[somm repository](https://github.com/lavallee/somm).
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "somm"
|
|
3
|
+
version = "0.6.1"
|
|
4
|
+
description = "somm — self-hosted LLM telemetry, routing, and intelligence loop (library)"
|
|
5
|
+
requires-python = ">=3.12"
|
|
6
|
+
license = { text = "MIT" }
|
|
7
|
+
readme = "README.md"
|
|
8
|
+
authors = [{ name = "Marc Lavallee" }]
|
|
9
|
+
classifiers = [
|
|
10
|
+
"Development Status :: 4 - Beta",
|
|
11
|
+
"License :: OSI Approved :: MIT License",
|
|
12
|
+
"Programming Language :: Python :: 3.12",
|
|
13
|
+
"Programming Language :: Python :: 3.13",
|
|
14
|
+
"Topic :: Software Development :: Libraries",
|
|
15
|
+
]
|
|
16
|
+
dependencies = [
|
|
17
|
+
"somm-core==0.6.1",
|
|
18
|
+
"httpx>=0.27",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
[project.optional-dependencies]
|
|
22
|
+
litellm = ["litellm>=1.50"]
|
|
23
|
+
tokenizers = ["tiktoken>=0.7"]
|
|
24
|
+
|
|
25
|
+
[project.scripts]
|
|
26
|
+
somm = "somm.cli:main"
|
|
27
|
+
|
|
28
|
+
[project.urls]
|
|
29
|
+
Homepage = "https://github.com/lavallee/somm"
|
|
30
|
+
Repository = "https://github.com/lavallee/somm"
|
|
31
|
+
Issues = "https://github.com/lavallee/somm/issues"
|
|
32
|
+
Changelog = "https://github.com/lavallee/somm/blob/main/CHANGELOG.md"
|
|
33
|
+
|
|
34
|
+
[build-system]
|
|
35
|
+
requires = ["hatchling"]
|
|
36
|
+
build-backend = "hatchling.build"
|
|
37
|
+
|
|
38
|
+
[tool.hatch.build.targets.wheel]
|
|
39
|
+
packages = ["src/somm"]
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""somm — self-hosted LLM telemetry, routing, and intelligence loop."""
|
|
2
|
+
|
|
3
|
+
from somm_core import EmbedResult, Outcome, PrivacyClass, SommResult
|
|
4
|
+
from somm_core.parse import extract_json
|
|
5
|
+
|
|
6
|
+
from somm import hooks
|
|
7
|
+
from somm.client import SommLLM, llm
|
|
8
|
+
from somm.errors import (
|
|
9
|
+
SommBadRequest,
|
|
10
|
+
SommBudgetExceeded,
|
|
11
|
+
SommError,
|
|
12
|
+
SommNoCapableProvider,
|
|
13
|
+
SommPrivacyViolation,
|
|
14
|
+
SommProvidersExhausted,
|
|
15
|
+
SommStrictMode,
|
|
16
|
+
)
|
|
17
|
+
from somm.provenance import provenance
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"SommLLM",
|
|
21
|
+
"llm",
|
|
22
|
+
"hooks",
|
|
23
|
+
"provenance",
|
|
24
|
+
"extract_json",
|
|
25
|
+
"EmbedResult",
|
|
26
|
+
"Outcome",
|
|
27
|
+
"PrivacyClass",
|
|
28
|
+
"SommResult",
|
|
29
|
+
"SommError",
|
|
30
|
+
"SommBadRequest",
|
|
31
|
+
"SommBudgetExceeded",
|
|
32
|
+
"SommNoCapableProvider",
|
|
33
|
+
"SommPrivacyViolation",
|
|
34
|
+
"SommProvidersExhausted",
|
|
35
|
+
"SommStrictMode",
|
|
36
|
+
]
|
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
"""Capability lookup against `model_intel.capabilities_json`.
|
|
2
|
+
|
|
3
|
+
The router consults this to skip (provider, model) pairs that can't serve a
|
|
4
|
+
request's required capabilities *before* making the network call. Unknown
|
|
5
|
+
models fall through as capable (same behavior as pre-capability somm).
|
|
6
|
+
|
|
7
|
+
Capability sources per provider:
|
|
8
|
+
|
|
9
|
+
- **OpenRouter**: populates `modality` (e.g. `"text+image->text"`) and
|
|
10
|
+
`architecture` (incl. `input_modalities`) on every model. We derive
|
|
11
|
+
`vision` from either field.
|
|
12
|
+
- **Anthropic / OpenAI**: no list-models API for pricing, so capabilities
|
|
13
|
+
come from the static pricing seed. Vision is inferred from model name
|
|
14
|
+
as a conservative starter — opus/sonnet/haiku 4.x and gpt-4o* support
|
|
15
|
+
images natively.
|
|
16
|
+
- **Ollama**: family-based inference; `llava`, `bakllava`, `llama3.2-vision`
|
|
17
|
+
and similar carry vision. Unknown models fall through as capable.
|
|
18
|
+
- **Minimax**: single default model; treated as capability-unknown → allow.
|
|
19
|
+
|
|
20
|
+
Adding new capability tokens (`tool_use`, `json_mode`, `thinking`, …) is
|
|
21
|
+
just a matter of teaching `model_has_capability` to look them up — no
|
|
22
|
+
schema change needed.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import json
|
|
28
|
+
from typing import TYPE_CHECKING
|
|
29
|
+
|
|
30
|
+
if TYPE_CHECKING:
|
|
31
|
+
from somm_core.repository import Repository
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
_VISION_NAME_HINTS: tuple[str, ...] = (
|
|
35
|
+
"claude-opus-4",
|
|
36
|
+
"claude-sonnet-4",
|
|
37
|
+
"claude-haiku-4",
|
|
38
|
+
"gpt-4o",
|
|
39
|
+
"gpt-4.1",
|
|
40
|
+
"gpt-5",
|
|
41
|
+
"llava",
|
|
42
|
+
"bakllava",
|
|
43
|
+
"vision",
|
|
44
|
+
"gemini",
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# Models that consume "thinking" / reasoning tokens before emitting visible
|
|
49
|
+
# output. Workloads that declare ``capabilities_required=["thinking"]`` get
|
|
50
|
+
# routed to one of these; mechanical workloads (copyedit, simple polish)
|
|
51
|
+
# explicitly omit it and get steered to a non-thinking variant.
|
|
52
|
+
#
|
|
53
|
+
# Inverse: NON-thinking variants of these families exist (deepseek-v4-flash,
|
|
54
|
+
# claude-sonnet-4-6 without extended thinking, gemini-2.5-flash). The
|
|
55
|
+
# `_NON_THINKING_NAME_HINTS` list below excludes models that look like they
|
|
56
|
+
# want thinking but explicitly don't.
|
|
57
|
+
_THINKING_NAME_HINTS: tuple[str, ...] = (
|
|
58
|
+
"deepseek-v4-pro",
|
|
59
|
+
"deepseek-reasoner",
|
|
60
|
+
"claude-opus-4-7", # Opus families default to extended thinking
|
|
61
|
+
"claude-opus-4-6",
|
|
62
|
+
"o1-", # OpenAI o1 family
|
|
63
|
+
"o3-", # OpenAI o3 family
|
|
64
|
+
"gemini-2.5-pro",
|
|
65
|
+
"gemini-3-pro",
|
|
66
|
+
"qwq-",
|
|
67
|
+
"magistral",
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
_NON_THINKING_NAME_HINTS: tuple[str, ...] = (
|
|
71
|
+
"-flash",
|
|
72
|
+
"-mini",
|
|
73
|
+
"-haiku", # Haiku family is non-thinking by default
|
|
74
|
+
"deepseek-chat",
|
|
75
|
+
"deepseek-coder",
|
|
76
|
+
"gpt-4o",
|
|
77
|
+
"gpt-4.1",
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
# Model families that support function/tool calling. Workloads that declare
|
|
82
|
+
# ``capabilities_required=["tools"]`` (e.g. deepagents orchestrators) route
|
|
83
|
+
# only to these. Tool calling is near-universal across modern frontier and
|
|
84
|
+
# mid-tier models, so the list is generous and unknown models fall through
|
|
85
|
+
# as capable (None) rather than being blocked — there is no negative case.
|
|
86
|
+
# Adapters that *can't* serve tools raise SommBadRequest at call time
|
|
87
|
+
# instead of being filtered here.
|
|
88
|
+
_TOOLS_NAME_HINTS: tuple[str, ...] = (
|
|
89
|
+
"claude-3",
|
|
90
|
+
"claude-opus-4",
|
|
91
|
+
"claude-sonnet-4",
|
|
92
|
+
"claude-haiku-4",
|
|
93
|
+
"gpt-4", # gpt-4o, gpt-4.1, gpt-4-turbo …
|
|
94
|
+
"gpt-5",
|
|
95
|
+
"o1-",
|
|
96
|
+
"o3-",
|
|
97
|
+
"o4-",
|
|
98
|
+
"gemini-1.5",
|
|
99
|
+
"gemini-2",
|
|
100
|
+
"gemini-3",
|
|
101
|
+
"llama-3.1",
|
|
102
|
+
"llama-3.2",
|
|
103
|
+
"llama-3.3",
|
|
104
|
+
"llama3.1",
|
|
105
|
+
"llama3.2",
|
|
106
|
+
"llama3.3",
|
|
107
|
+
"llama-4",
|
|
108
|
+
"qwen2.5",
|
|
109
|
+
"qwen3",
|
|
110
|
+
"deepseek-chat",
|
|
111
|
+
"deepseek-v3",
|
|
112
|
+
"deepseek-v4",
|
|
113
|
+
"mistral",
|
|
114
|
+
"mixtral",
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _openrouter_has_vision(caps: dict) -> bool:
|
|
119
|
+
modality = caps.get("modality") or ""
|
|
120
|
+
if isinstance(modality, str) and "image" in modality.lower():
|
|
121
|
+
return True
|
|
122
|
+
arch = caps.get("architecture") or {}
|
|
123
|
+
if isinstance(arch, dict):
|
|
124
|
+
inputs = arch.get("input_modalities") or []
|
|
125
|
+
if isinstance(inputs, list) and any(
|
|
126
|
+
isinstance(m, str) and "image" in m.lower() for m in inputs
|
|
127
|
+
):
|
|
128
|
+
return True
|
|
129
|
+
return False
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def model_has_capability(
|
|
133
|
+
repo: Repository,
|
|
134
|
+
provider: str,
|
|
135
|
+
model: str,
|
|
136
|
+
capability: str,
|
|
137
|
+
) -> bool | None:
|
|
138
|
+
"""Return True/False if we know, None if capability data is unavailable.
|
|
139
|
+
|
|
140
|
+
Callers treat `None` as "allow — let the provider try" per the
|
|
141
|
+
capability-aware routing proposal.
|
|
142
|
+
"""
|
|
143
|
+
if not capability:
|
|
144
|
+
return True
|
|
145
|
+
|
|
146
|
+
with repo._open() as conn:
|
|
147
|
+
row = conn.execute(
|
|
148
|
+
"SELECT capabilities_json FROM model_intel "
|
|
149
|
+
"WHERE provider = ? AND model = ?",
|
|
150
|
+
(provider, model),
|
|
151
|
+
).fetchone()
|
|
152
|
+
|
|
153
|
+
caps: dict | None = None
|
|
154
|
+
if row and row[0]:
|
|
155
|
+
try:
|
|
156
|
+
parsed = json.loads(row[0])
|
|
157
|
+
if isinstance(parsed, dict):
|
|
158
|
+
caps = parsed
|
|
159
|
+
except json.JSONDecodeError:
|
|
160
|
+
caps = None
|
|
161
|
+
|
|
162
|
+
# Explicit per-capability flag wins if present ({"vision": true}).
|
|
163
|
+
if caps is not None and capability in caps and isinstance(caps[capability], bool):
|
|
164
|
+
return caps[capability]
|
|
165
|
+
|
|
166
|
+
if capability == "vision":
|
|
167
|
+
if provider == "openrouter" and caps is not None:
|
|
168
|
+
return _openrouter_has_vision(caps)
|
|
169
|
+
lowered = model.lower()
|
|
170
|
+
if any(h in lowered for h in _VISION_NAME_HINTS):
|
|
171
|
+
return True
|
|
172
|
+
if provider == "ollama":
|
|
173
|
+
fam = (caps or {}).get("family") if caps is not None else None
|
|
174
|
+
if isinstance(fam, str) and any(h in fam.lower() for h in _VISION_NAME_HINTS):
|
|
175
|
+
return True
|
|
176
|
+
# Unknown ollama model → capability-unknown, let it try.
|
|
177
|
+
return None
|
|
178
|
+
if caps is None:
|
|
179
|
+
return None
|
|
180
|
+
return False
|
|
181
|
+
|
|
182
|
+
if capability == "thinking":
|
|
183
|
+
# Thinking-tier models reason before emitting visible text. Routing
|
|
184
|
+
# workloads that declare needs_thinking="yes" to one of these is the
|
|
185
|
+
# difference between a calibrated answer and an empty response (the
|
|
186
|
+
# 8K-budget all-eaten-by-reasoning failure mode from 2026-05-06).
|
|
187
|
+
lowered = model.lower()
|
|
188
|
+
if any(h in lowered for h in _NON_THINKING_NAME_HINTS):
|
|
189
|
+
return False
|
|
190
|
+
if any(h in lowered for h in _THINKING_NAME_HINTS):
|
|
191
|
+
return True
|
|
192
|
+
return None # unknown — let the provider try
|
|
193
|
+
|
|
194
|
+
if capability == "tools":
|
|
195
|
+
# Function/tool calling. Known tool-capable families return True;
|
|
196
|
+
# everything else falls through as None (allow) — never False, since
|
|
197
|
+
# a provider that genuinely can't serve tools raises SommBadRequest
|
|
198
|
+
# at call time rather than being pre-filtered here.
|
|
199
|
+
lowered = model.lower()
|
|
200
|
+
if any(h in lowered for h in _TOOLS_NAME_HINTS):
|
|
201
|
+
return True
|
|
202
|
+
return None
|
|
203
|
+
|
|
204
|
+
if capability == "non-thinking":
|
|
205
|
+
# Inverse: workloads that DON'T need thinking explicitly steer away
|
|
206
|
+
# from reasoning models, which would burn budget on a mechanical task.
|
|
207
|
+
# E.g. a copyeditor pass on already-clean prose doesn't need v4-pro.
|
|
208
|
+
lowered = model.lower()
|
|
209
|
+
if any(h in lowered for h in _NON_THINKING_NAME_HINTS):
|
|
210
|
+
return True
|
|
211
|
+
if any(h in lowered for h in _THINKING_NAME_HINTS):
|
|
212
|
+
return False
|
|
213
|
+
return None
|
|
214
|
+
|
|
215
|
+
# Unknown capability — don't block.
|
|
216
|
+
return None
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def provider_can_serve(
|
|
220
|
+
repo: Repository,
|
|
221
|
+
provider: str,
|
|
222
|
+
model: str,
|
|
223
|
+
required: list[str],
|
|
224
|
+
) -> tuple[bool, str]:
|
|
225
|
+
"""Return (ok, reason). Reason is empty on ok=True."""
|
|
226
|
+
for cap in required:
|
|
227
|
+
verdict = model_has_capability(repo, provider, model, cap)
|
|
228
|
+
if verdict is False:
|
|
229
|
+
return False, f"missing_capability:{cap}"
|
|
230
|
+
return True, ""
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def model_output_modalities(
|
|
234
|
+
repo: Repository,
|
|
235
|
+
provider: str,
|
|
236
|
+
model: str,
|
|
237
|
+
) -> list[str] | None:
|
|
238
|
+
"""Return the set of output modalities this model can produce, or None
|
|
239
|
+
when we have no signal.
|
|
240
|
+
|
|
241
|
+
Signal sources, in order of preference:
|
|
242
|
+
1. OpenRouter `architecture.output_modalities` — list of strings.
|
|
243
|
+
2. OpenRouter `modality` scalar (`"text+image->text"`) — parse the
|
|
244
|
+
right-hand side.
|
|
245
|
+
3. HuggingFace `hf.output_modalities` — set by the HF intel worker
|
|
246
|
+
from `pipeline_tag`.
|
|
247
|
+
|
|
248
|
+
Returns lowercased modality tokens (`"text"`, `"image"`, `"audio"`,
|
|
249
|
+
`"video"`, `"embedding"`). Callers that want to filter for "outputs
|
|
250
|
+
text" should check membership against a requested set.
|
|
251
|
+
"""
|
|
252
|
+
import json
|
|
253
|
+
|
|
254
|
+
with repo._open() as conn:
|
|
255
|
+
row = conn.execute(
|
|
256
|
+
"SELECT capabilities_json FROM model_intel "
|
|
257
|
+
"WHERE provider = ? AND model = ?",
|
|
258
|
+
(provider, model),
|
|
259
|
+
).fetchone()
|
|
260
|
+
|
|
261
|
+
if not row or not row[0]:
|
|
262
|
+
return None
|
|
263
|
+
try:
|
|
264
|
+
caps = json.loads(row[0])
|
|
265
|
+
except json.JSONDecodeError:
|
|
266
|
+
return None
|
|
267
|
+
if not isinstance(caps, dict):
|
|
268
|
+
return None
|
|
269
|
+
|
|
270
|
+
# 1. Direct OpenRouter architecture.output_modalities
|
|
271
|
+
arch = caps.get("architecture") or {}
|
|
272
|
+
if isinstance(arch, dict):
|
|
273
|
+
out = arch.get("output_modalities")
|
|
274
|
+
if isinstance(out, list) and out:
|
|
275
|
+
normalised = [m.lower() for m in out if isinstance(m, str)]
|
|
276
|
+
if normalised:
|
|
277
|
+
return normalised
|
|
278
|
+
|
|
279
|
+
# 2. OpenRouter scalar modality "in+out->out"
|
|
280
|
+
modality = caps.get("modality")
|
|
281
|
+
if isinstance(modality, str) and "->" in modality:
|
|
282
|
+
_, _, after = modality.partition("->")
|
|
283
|
+
parts = [p.strip().lower() for p in after.split("+") if p.strip()]
|
|
284
|
+
if parts:
|
|
285
|
+
return parts
|
|
286
|
+
|
|
287
|
+
# 3. HuggingFace enrichment
|
|
288
|
+
hf = caps.get("hf") or {}
|
|
289
|
+
if isinstance(hf, dict):
|
|
290
|
+
out = hf.get("output_modalities")
|
|
291
|
+
if isinstance(out, list) and out:
|
|
292
|
+
normalised = [m.lower() for m in out if isinstance(m, str)]
|
|
293
|
+
if normalised:
|
|
294
|
+
return normalised
|
|
295
|
+
|
|
296
|
+
return None
|