somm-service 0.6.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- somm_service-0.6.1/.gitignore +38 -0
- somm_service-0.6.1/PKG-INFO +38 -0
- somm_service-0.6.1/README.md +15 -0
- somm_service-0.6.1/pyproject.toml +38 -0
- somm_service-0.6.1/src/somm_service/__init__.py +9 -0
- somm_service-0.6.1/src/somm_service/app.py +491 -0
- somm_service-0.6.1/src/somm_service/cli.py +195 -0
- somm_service-0.6.1/src/somm_service/proxy.py +366 -0
- somm_service-0.6.1/src/somm_service/py.typed +0 -0
- somm_service-0.6.1/src/somm_service/workers/__init__.py +23 -0
- somm_service-0.6.1/src/somm_service/workers/_runner.py +211 -0
- somm_service-0.6.1/src/somm_service/workers/agent.py +498 -0
- somm_service-0.6.1/src/somm_service/workers/hf_intel.py +199 -0
- somm_service-0.6.1/src/somm_service/workers/model_intel.py +222 -0
- somm_service-0.6.1/src/somm_service/workers/shadow_eval.py +444 -0
- somm_service-0.6.1/tests/test_agent.py +285 -0
- somm_service-0.6.1/tests/test_app.py +144 -0
- somm_service-0.6.1/tests/test_hf_intel.py +215 -0
- somm_service-0.6.1/tests/test_model_intel.py +270 -0
- somm_service-0.6.1/tests/test_proxy_messages.py +441 -0
- somm_service-0.6.1/tests/test_scheduler.py +228 -0
- somm_service-0.6.1/tests/test_self_heal.py +147 -0
- somm_service-0.6.1/tests/test_shadow_eval.py +362 -0
- somm_service-0.6.1/tests/test_web_recs.py +146 -0
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.egg-info/
|
|
6
|
+
*.egg
|
|
7
|
+
build/
|
|
8
|
+
dist/
|
|
9
|
+
|
|
10
|
+
# Environments
|
|
11
|
+
.venv/
|
|
12
|
+
.env
|
|
13
|
+
.env.*
|
|
14
|
+
|
|
15
|
+
# Tooling caches
|
|
16
|
+
.pytest_cache/
|
|
17
|
+
.mypy_cache/
|
|
18
|
+
.ruff_cache/
|
|
19
|
+
|
|
20
|
+
# Local Claude session id log (per-machine, not source of truth)
|
|
21
|
+
sessions.txt
|
|
22
|
+
|
|
23
|
+
# Local data (never commit telemetry)
|
|
24
|
+
.somm/
|
|
25
|
+
*.sqlite
|
|
26
|
+
*.sqlite-wal
|
|
27
|
+
*.sqlite-shm
|
|
28
|
+
|
|
29
|
+
# Author-local notes not for open source
|
|
30
|
+
notes/
|
|
31
|
+
.claude/
|
|
32
|
+
|
|
33
|
+
# Editor
|
|
34
|
+
.vscode/
|
|
35
|
+
.idea/
|
|
36
|
+
*.swp
|
|
37
|
+
|
|
38
|
+
# Archived internal design/process docs (see docs/BLUEPRINT.md for the public design doc)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: somm-service
|
|
3
|
+
Version: 0.6.1
|
|
4
|
+
Summary: somm service — web admin, HTTP API, background workers
|
|
5
|
+
Project-URL: Homepage, https://github.com/lavallee/somm
|
|
6
|
+
Project-URL: Repository, https://github.com/lavallee/somm
|
|
7
|
+
Project-URL: Issues, https://github.com/lavallee/somm/issues
|
|
8
|
+
Project-URL: Changelog, https://github.com/lavallee/somm/blob/main/CHANGELOG.md
|
|
9
|
+
Author: Marc Lavallee
|
|
10
|
+
License: MIT
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
16
|
+
Requires-Python: >=3.12
|
|
17
|
+
Requires-Dist: litellm>=1.40
|
|
18
|
+
Requires-Dist: somm-core==0.6.1
|
|
19
|
+
Requires-Dist: somm==0.6.1
|
|
20
|
+
Requires-Dist: starlette>=0.40
|
|
21
|
+
Requires-Dist: uvicorn>=0.30
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
|
|
24
|
+
# somm-service
|
|
25
|
+
|
|
26
|
+
The service tier for [somm](https://github.com/lavallee/somm) — the
|
|
27
|
+
self-hosted LLM telemetry, routing, and intelligence loop.
|
|
28
|
+
|
|
29
|
+
Adds the localhost web admin, the HTTP API, and the background
|
|
30
|
+
scheduler with three workers: model-intel refresh (pricing + context
|
|
31
|
+
windows + capabilities), online evaluation (samples production calls
|
|
32
|
+
and grades them against a gold model), and the agent worker (turns
|
|
33
|
+
telemetry + eval results into concrete recommendations).
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
pip install somm somm-service
|
|
37
|
+
somm serve --project my_app # dashboard at localhost:7878
|
|
38
|
+
```
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# somm-service
|
|
2
|
+
|
|
3
|
+
The service tier for [somm](https://github.com/lavallee/somm) — the
|
|
4
|
+
self-hosted LLM telemetry, routing, and intelligence loop.
|
|
5
|
+
|
|
6
|
+
Adds the localhost web admin, the HTTP API, and the background
|
|
7
|
+
scheduler with three workers: model-intel refresh (pricing + context
|
|
8
|
+
windows + capabilities), online evaluation (samples production calls
|
|
9
|
+
and grades them against a gold model), and the agent worker (turns
|
|
10
|
+
telemetry + eval results into concrete recommendations).
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
pip install somm somm-service
|
|
14
|
+
somm serve --project my_app # dashboard at localhost:7878
|
|
15
|
+
```
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "somm-service"
|
|
3
|
+
version = "0.6.1"
|
|
4
|
+
description = "somm service — web admin, HTTP API, background workers"
|
|
5
|
+
requires-python = ">=3.12"
|
|
6
|
+
license = { text = "MIT" }
|
|
7
|
+
readme = "README.md"
|
|
8
|
+
authors = [{ name = "Marc Lavallee" }]
|
|
9
|
+
classifiers = [
|
|
10
|
+
"Development Status :: 4 - Beta",
|
|
11
|
+
"License :: OSI Approved :: MIT License",
|
|
12
|
+
"Programming Language :: Python :: 3.12",
|
|
13
|
+
"Programming Language :: Python :: 3.13",
|
|
14
|
+
"Topic :: Software Development :: Libraries",
|
|
15
|
+
]
|
|
16
|
+
dependencies = [
|
|
17
|
+
"somm-core==0.6.1",
|
|
18
|
+
"somm==0.6.1",
|
|
19
|
+
"litellm>=1.40",
|
|
20
|
+
"starlette>=0.40",
|
|
21
|
+
"uvicorn>=0.30",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
[project.scripts]
|
|
25
|
+
somm-serve = "somm_service.cli:main"
|
|
26
|
+
|
|
27
|
+
[project.urls]
|
|
28
|
+
Homepage = "https://github.com/lavallee/somm"
|
|
29
|
+
Repository = "https://github.com/lavallee/somm"
|
|
30
|
+
Issues = "https://github.com/lavallee/somm/issues"
|
|
31
|
+
Changelog = "https://github.com/lavallee/somm/blob/main/CHANGELOG.md"
|
|
32
|
+
|
|
33
|
+
[build-system]
|
|
34
|
+
requires = ["hatchling"]
|
|
35
|
+
build-backend = "hatchling.build"
|
|
36
|
+
|
|
37
|
+
[tool.hatch.build.targets.wheel]
|
|
38
|
+
packages = ["src/somm_service"]
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""somm-service — web admin + HTTP API + background workers.
|
|
2
|
+
|
|
3
|
+
v0.1 ships the minimal web dashboard + /api/stats. Workers (model_intel,
|
|
4
|
+
shadow_eval, agent) come in D3+.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from somm_service.app import create_app, run_server
|
|
8
|
+
|
|
9
|
+
__all__ = ["create_app", "run_server"]
|
|
@@ -0,0 +1,491 @@
|
|
|
1
|
+
"""Starlette app serving the web admin + HTTP API.
|
|
2
|
+
|
|
3
|
+
HTTP surface:
|
|
4
|
+
GET / HTML dashboard — status line + recs + stats
|
|
5
|
+
GET /health JSON liveness probe
|
|
6
|
+
GET /api/stats JSON roll-up (per-workload × provider × model)
|
|
7
|
+
GET /api/version JSON service + schema version
|
|
8
|
+
GET /api/recommendations JSON open recs
|
|
9
|
+
POST /api/recommendations/{id}/dismiss
|
|
10
|
+
POST /api/recommendations/{id}/apply
|
|
11
|
+
POST /v1/messages Anthropic Messages-compatible LLM proxy
|
|
12
|
+
(non-streaming v1; budget-gated; uses litellm
|
|
13
|
+
as a library; streaming + /v1/chat/completions
|
|
14
|
+
are explicit follow-ups)
|
|
15
|
+
|
|
16
|
+
Design tokens + a11y spec applied inline (v0.1 ships
|
|
17
|
+
tokens in-HTML; `packages/somm-service/web/tokens.css` lands when we extract).
|
|
18
|
+
|
|
19
|
+
`somm serve` also starts a Scheduler background thread that runs the
|
|
20
|
+
model_intel / shadow_eval / agent workers on their cadences.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import json
|
|
26
|
+
import sqlite3
|
|
27
|
+
|
|
28
|
+
from somm_core import VERSION
|
|
29
|
+
from somm_core.config import Config
|
|
30
|
+
from somm_core.config import load as load_config
|
|
31
|
+
from somm_core.repository import Repository
|
|
32
|
+
from somm_core.schema import current_schema_version
|
|
33
|
+
from starlette.applications import Starlette
|
|
34
|
+
from starlette.requests import Request
|
|
35
|
+
from starlette.responses import HTMLResponse, JSONResponse
|
|
36
|
+
from starlette.routing import Route
|
|
37
|
+
|
|
38
|
+
from somm_service.proxy import messages_endpoint
|
|
39
|
+
|
|
40
|
+
_HTML_SHELL = """<!DOCTYPE html>
|
|
41
|
+
<html lang="en">
|
|
42
|
+
<head>
|
|
43
|
+
<meta charset="utf-8">
|
|
44
|
+
<meta name="viewport" content="width=device-width,initial-scale=1">
|
|
45
|
+
<title>somm · {project}</title>
|
|
46
|
+
<style>
|
|
47
|
+
:root {{
|
|
48
|
+
--font-sans: Inter, system-ui, sans-serif;
|
|
49
|
+
--font-mono: 'JetBrains Mono', 'SF Mono', Consolas, monospace;
|
|
50
|
+
--bg: #0a0a0a; --bg-alt: #111111;
|
|
51
|
+
--fg: #e8e8e8; --fg-muted: #9ca3af;
|
|
52
|
+
--border: #27272a;
|
|
53
|
+
--ok: #059669; --warn: #d97706; --danger: #dc2626;
|
|
54
|
+
--accent: #818cf8;
|
|
55
|
+
--radius: 4px;
|
|
56
|
+
}}
|
|
57
|
+
@media (prefers-color-scheme: light) {{
|
|
58
|
+
:root {{ --bg:#fafafa; --bg-alt:#fff; --fg:#1a1a1a; --fg-muted:#6b7280; --border:#e5e7eb; }}
|
|
59
|
+
}}
|
|
60
|
+
* {{ box-sizing: border-box; }}
|
|
61
|
+
body {{ font-family: var(--font-sans); background: var(--bg); color: var(--fg);
|
|
62
|
+
margin: 0; padding: 24px 32px; line-height: 1.5; }}
|
|
63
|
+
a {{ color: var(--accent); }}
|
|
64
|
+
a:focus-visible, button:focus-visible {{ outline: 2px solid var(--accent); outline-offset: 2px; }}
|
|
65
|
+
header {{ display: flex; justify-content: space-between; align-items: baseline;
|
|
66
|
+
border-bottom: 1px solid var(--border); padding-bottom: 16px; margin-bottom: 24px; }}
|
|
67
|
+
header h1 {{ font-size: 20px; margin: 0; font-weight: 600; }}
|
|
68
|
+
header .meta {{ font-family: var(--font-mono); font-size: 12px; color: var(--fg-muted); }}
|
|
69
|
+
.status {{ font-size: 16px; padding: 16px; border: 1px solid var(--border);
|
|
70
|
+
border-radius: var(--radius); background: var(--bg-alt); margin-bottom: 24px; }}
|
|
71
|
+
.status strong {{ color: var(--ok); font-family: var(--font-mono); }}
|
|
72
|
+
.status.warn strong {{ color: var(--warn); }}
|
|
73
|
+
.status.err strong {{ color: var(--danger); }}
|
|
74
|
+
h2 {{ font-size: 14px; text-transform: uppercase; letter-spacing: 0.08em;
|
|
75
|
+
color: var(--fg-muted); margin: 24px 0 12px; }}
|
|
76
|
+
ol.recs {{ list-style: none; padding: 0; margin: 0; display: flex;
|
|
77
|
+
flex-direction: column; gap: 12px; }}
|
|
78
|
+
.rec {{ padding: 16px; border: 1px solid var(--border); border-radius: var(--radius);
|
|
79
|
+
background: var(--bg-alt); }}
|
|
80
|
+
.rec-head {{ display: flex; justify-content: space-between; align-items: baseline;
|
|
81
|
+
margin-bottom: 6px; }}
|
|
82
|
+
.rec-title {{ font-weight: 600; font-family: var(--font-mono); font-size: 13px; }}
|
|
83
|
+
.rec-conf {{ color: var(--fg-muted); font-size: 12px; font-family: var(--font-mono); }}
|
|
84
|
+
.rec-impact {{ color: var(--fg); font-size: 14px; margin-bottom: 8px; }}
|
|
85
|
+
.rec-evidence summary {{ color: var(--accent); cursor: pointer; font-size: 12px;
|
|
86
|
+
font-family: var(--font-mono); }}
|
|
87
|
+
.rec-evidence[open] summary {{ margin-bottom: 8px; }}
|
|
88
|
+
.evidence-tbl {{ margin-top: 4px; font-size: 12px; }}
|
|
89
|
+
.evidence-tbl th {{ color: var(--fg-muted); font-weight: 500; padding: 4px 10px; }}
|
|
90
|
+
.evidence-tbl td {{ padding: 4px 10px; border-bottom: 1px solid var(--border); }}
|
|
91
|
+
table {{ width: 100%; border-collapse: collapse; font-family: var(--font-mono); font-size: 13px; }}
|
|
92
|
+
th, td {{ text-align: left; padding: 8px 12px; border-bottom: 1px solid var(--border); }}
|
|
93
|
+
th {{ color: var(--fg-muted); font-weight: 500; }}
|
|
94
|
+
td.num {{ text-align: right; }}
|
|
95
|
+
.empty {{ padding: 16px; color: var(--fg-muted); font-style: italic; }}
|
|
96
|
+
footer {{ margin-top: 48px; color: var(--fg-muted); font-family: var(--font-mono);
|
|
97
|
+
font-size: 12px; }}
|
|
98
|
+
</style>
|
|
99
|
+
</head>
|
|
100
|
+
<body>
|
|
101
|
+
<header>
|
|
102
|
+
<h1>somm</h1>
|
|
103
|
+
<div class="meta">project: {project} · v{version} · schema v{schema} · {window}d window</div>
|
|
104
|
+
</header>
|
|
105
|
+
|
|
106
|
+
<section aria-label="System status" role="status" aria-live="polite">
|
|
107
|
+
<div class="status {status_class}">
|
|
108
|
+
<strong>{status_label}</strong> · {hero_line}
|
|
109
|
+
</div>
|
|
110
|
+
</section>
|
|
111
|
+
|
|
112
|
+
<section aria-label="Recommendations">
|
|
113
|
+
<h2>Top recommendations</h2>
|
|
114
|
+
{recs_html}
|
|
115
|
+
</section>
|
|
116
|
+
|
|
117
|
+
<section aria-label="Evidence">
|
|
118
|
+
<h2>Calls by workload</h2>
|
|
119
|
+
{table_html}
|
|
120
|
+
</section>
|
|
121
|
+
|
|
122
|
+
<footer>
|
|
123
|
+
somm is self-hosted. Binds <code>localhost</code> only by default. Data stays on disk.
|
|
124
|
+
<br>Endpoints: <a href="/health">/health</a> · <a href="/api/stats">/api/stats</a> · <a href="/api/version">/api/version</a>
|
|
125
|
+
</footer>
|
|
126
|
+
</body>
|
|
127
|
+
</html>
|
|
128
|
+
"""
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _render_table(stats: list[dict]) -> str:
|
|
132
|
+
if not stats:
|
|
133
|
+
return '<div class="empty">No calls yet. Run <code>somm.llm().generate(...)</code> in your Python code.</div>'
|
|
134
|
+
rows = []
|
|
135
|
+
for s in stats:
|
|
136
|
+
rows.append(
|
|
137
|
+
"<tr>"
|
|
138
|
+
f"<td>{_esc(s['workload'])}</td>"
|
|
139
|
+
f"<td>{_esc(s['provider'])}</td>"
|
|
140
|
+
f"<td>{_esc(s['model'])}</td>"
|
|
141
|
+
f"<td class='num'>{s['n_calls']}</td>"
|
|
142
|
+
f"<td class='num'>{s['tokens_in'] or 0}</td>"
|
|
143
|
+
f"<td class='num'>{s['tokens_out'] or 0}</td>"
|
|
144
|
+
f"<td class='num'>{s['n_failed']}</td>"
|
|
145
|
+
"</tr>"
|
|
146
|
+
)
|
|
147
|
+
return (
|
|
148
|
+
"<table>"
|
|
149
|
+
"<thead><tr>"
|
|
150
|
+
"<th>workload</th><th>provider</th><th>model</th>"
|
|
151
|
+
"<th class='num'>calls</th><th class='num'>tok in</th>"
|
|
152
|
+
"<th class='num'>tok out</th><th class='num'>fail</th>"
|
|
153
|
+
"</tr></thead>"
|
|
154
|
+
f"<tbody>{''.join(rows)}</tbody>"
|
|
155
|
+
"</table>"
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
_ESC_MAP = {"&": "&", "<": "<", ">": ">", '"': """, "'": "'"}
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _esc(s: str) -> str:
|
|
163
|
+
return "".join(_ESC_MAP.get(c, c) for c in str(s))
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _list_recommendations(repo: Repository) -> list[dict]:
|
|
167
|
+
"""Open (undismissed, unapplied) recommendations, newest first."""
|
|
168
|
+
with repo._open() as conn:
|
|
169
|
+
rows = conn.execute(
|
|
170
|
+
"SELECT r.id, r.workload_id, w.name, r.action, r.evidence_json, "
|
|
171
|
+
" r.expected_impact, r.confidence, r.created_at "
|
|
172
|
+
"FROM recommendations r "
|
|
173
|
+
"LEFT JOIN workloads w ON w.id = r.workload_id "
|
|
174
|
+
"WHERE r.dismissed_at IS NULL AND r.applied_at IS NULL "
|
|
175
|
+
"ORDER BY r.created_at DESC LIMIT 10"
|
|
176
|
+
).fetchall()
|
|
177
|
+
out = []
|
|
178
|
+
for r in rows:
|
|
179
|
+
try:
|
|
180
|
+
evidence = json.loads(r[4]) if r[4] else {}
|
|
181
|
+
except json.JSONDecodeError:
|
|
182
|
+
evidence = {}
|
|
183
|
+
out.append(
|
|
184
|
+
{
|
|
185
|
+
"id": r[0],
|
|
186
|
+
"workload_id": r[1],
|
|
187
|
+
"workload": r[2] or "(unknown)",
|
|
188
|
+
"action": r[3],
|
|
189
|
+
"evidence": evidence,
|
|
190
|
+
"expected_impact": r[5] or "",
|
|
191
|
+
"confidence": r[6] or 0,
|
|
192
|
+
"created_at": r[7],
|
|
193
|
+
}
|
|
194
|
+
)
|
|
195
|
+
return out
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def _render_recommendations(recs: list[dict]) -> str:
|
|
199
|
+
if not recs:
|
|
200
|
+
return (
|
|
201
|
+
'<div class="empty">No recommendations yet. '
|
|
202
|
+
"Agent runs weekly; needs shadow-eval data or model_intel deltas first.</div>"
|
|
203
|
+
)
|
|
204
|
+
items = []
|
|
205
|
+
for r in recs:
|
|
206
|
+
title = _esc(r["workload"]) + " · " + _esc(r["action"])
|
|
207
|
+
impact = _esc(r["expected_impact"])
|
|
208
|
+
confidence = f"{r['confidence']:.0%}"
|
|
209
|
+
evidence_detail = _render_evidence(r["action"], r["evidence"])
|
|
210
|
+
items.append(
|
|
211
|
+
f'<li class="rec">'
|
|
212
|
+
f' <div class="rec-head">'
|
|
213
|
+
f' <span class="rec-title">{title}</span>'
|
|
214
|
+
f' <span class="rec-conf">confidence {_esc(confidence)}</span>'
|
|
215
|
+
f" </div>"
|
|
216
|
+
f' <div class="rec-impact">{impact}</div>'
|
|
217
|
+
f' <details class="rec-evidence">'
|
|
218
|
+
f" <summary>evidence</summary>{evidence_detail}"
|
|
219
|
+
f" </details>"
|
|
220
|
+
f"</li>"
|
|
221
|
+
)
|
|
222
|
+
return f'<ol class="recs" aria-live="polite">{"".join(items)}</ol>'
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def _render_evidence(action: str, evidence: dict) -> str:
|
|
226
|
+
if action == "switch_model":
|
|
227
|
+
cur = evidence.get("current", {})
|
|
228
|
+
cand = evidence.get("candidate", {})
|
|
229
|
+
rows = [
|
|
230
|
+
("", "current", "candidate"),
|
|
231
|
+
("provider", _esc(cur.get("provider", "")), _esc(cand.get("provider", ""))),
|
|
232
|
+
("model", _esc(cur.get("model", "")), _esc(cand.get("model", ""))),
|
|
233
|
+
("quality", _esc(str(cur.get("score", ""))), _esc(str(cand.get("score", "")))),
|
|
234
|
+
("cost_usd", _esc(str(cur.get("cost_usd", ""))), _esc(str(cand.get("cost_usd", "")))),
|
|
235
|
+
(
|
|
236
|
+
"latency_ms",
|
|
237
|
+
_esc(str(cur.get("latency_ms", ""))),
|
|
238
|
+
_esc(str(cand.get("latency_ms", ""))),
|
|
239
|
+
),
|
|
240
|
+
]
|
|
241
|
+
return _evidence_table(rows)
|
|
242
|
+
if action == "new_model_landed":
|
|
243
|
+
cur = evidence.get("current", {})
|
|
244
|
+
cand = evidence.get("candidate", {})
|
|
245
|
+
rows = [
|
|
246
|
+
("", "current", "candidate"),
|
|
247
|
+
("provider", _esc(cur.get("provider", "")), _esc(cand.get("provider", ""))),
|
|
248
|
+
("model", _esc(cur.get("model", "")), _esc(cand.get("model", ""))),
|
|
249
|
+
(
|
|
250
|
+
"in $/1M",
|
|
251
|
+
_esc(str(cur.get("price_in_per_1m", ""))),
|
|
252
|
+
_esc(str(cand.get("price_in_per_1m", ""))),
|
|
253
|
+
),
|
|
254
|
+
(
|
|
255
|
+
"out $/1M",
|
|
256
|
+
_esc(str(cur.get("price_out_per_1m", ""))),
|
|
257
|
+
_esc(str(cand.get("price_out_per_1m", ""))),
|
|
258
|
+
),
|
|
259
|
+
]
|
|
260
|
+
return _evidence_table(rows)
|
|
261
|
+
if action == "chronic_cooldown":
|
|
262
|
+
return (
|
|
263
|
+
"<p>"
|
|
264
|
+
f"provider <code>{_esc(evidence.get('provider', ''))}</code> hit "
|
|
265
|
+
f"circuit-break on {_esc(str(evidence.get('n_calls', '')))} calls. "
|
|
266
|
+
f"{_esc(evidence.get('note', ''))}"
|
|
267
|
+
"</p>"
|
|
268
|
+
)
|
|
269
|
+
return f"<pre>{_esc(json.dumps(evidence, indent=2, sort_keys=True))}</pre>"
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def _evidence_table(rows: list[tuple]) -> str:
|
|
273
|
+
head = rows[0]
|
|
274
|
+
body = rows[1:]
|
|
275
|
+
thead = "".join(f"<th>{_esc(c)}</th>" for c in head)
|
|
276
|
+
tbody = "".join("<tr>" + "".join(f"<td>{_esc(c)}</td>" for c in row) + "</tr>" for row in body)
|
|
277
|
+
return (
|
|
278
|
+
f'<table class="evidence-tbl"><thead><tr>{thead}</tr></thead><tbody>{tbody}</tbody></table>'
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
async def _home(request: Request) -> HTMLResponse:
|
|
283
|
+
cfg: Config = request.app.state.config
|
|
284
|
+
repo: Repository = request.app.state.repo
|
|
285
|
+
window = int(request.query_params.get("window", "7"))
|
|
286
|
+
stats = repo.stats_by_workload(cfg.project, since_days=window)
|
|
287
|
+
|
|
288
|
+
total_calls = sum(s["n_calls"] for s in stats)
|
|
289
|
+
total_failed = sum(s["n_failed"] for s in stats)
|
|
290
|
+
|
|
291
|
+
if total_calls == 0:
|
|
292
|
+
status_class, status_label, hero = (
|
|
293
|
+
"",
|
|
294
|
+
"NO DATA YET",
|
|
295
|
+
f"run somm.llm(project={cfg.project!r}).generate(...) to get started",
|
|
296
|
+
)
|
|
297
|
+
elif total_failed == 0:
|
|
298
|
+
status_class, status_label, hero = (
|
|
299
|
+
"",
|
|
300
|
+
"HEALTHY",
|
|
301
|
+
f"{total_calls} calls · 0 failed · {len(stats)} workload(s) active",
|
|
302
|
+
)
|
|
303
|
+
else:
|
|
304
|
+
pct = 100 * total_failed / total_calls
|
|
305
|
+
status_class, status_label, hero = (
|
|
306
|
+
"warn" if pct < 20 else "err",
|
|
307
|
+
"NEEDS ATTENTION",
|
|
308
|
+
f"{total_calls} calls · {total_failed} failed ({pct:.1f}%)",
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
try:
|
|
312
|
+
with sqlite3.connect(cfg.db_path) as conn:
|
|
313
|
+
schema_ver = current_schema_version(conn)
|
|
314
|
+
except Exception:
|
|
315
|
+
schema_ver = 0
|
|
316
|
+
|
|
317
|
+
recs = _list_recommendations(repo)
|
|
318
|
+
html = _HTML_SHELL.format(
|
|
319
|
+
project=_esc(cfg.project),
|
|
320
|
+
version=_esc(VERSION),
|
|
321
|
+
schema=schema_ver,
|
|
322
|
+
window=window,
|
|
323
|
+
status_class=status_class,
|
|
324
|
+
status_label=_esc(status_label),
|
|
325
|
+
hero_line=_esc(hero),
|
|
326
|
+
recs_html=_render_recommendations(recs),
|
|
327
|
+
table_html=_render_table(stats),
|
|
328
|
+
)
|
|
329
|
+
return HTMLResponse(html)
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
async def _health(request: Request) -> JSONResponse:
|
|
333
|
+
cfg: Config = request.app.state.config
|
|
334
|
+
return JSONResponse(
|
|
335
|
+
{
|
|
336
|
+
"ok": True,
|
|
337
|
+
"project": cfg.project,
|
|
338
|
+
"db_path": str(cfg.db_path),
|
|
339
|
+
"db_exists": cfg.db_path.exists(),
|
|
340
|
+
}
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
async def _api_stats(request: Request) -> JSONResponse:
|
|
345
|
+
cfg: Config = request.app.state.config
|
|
346
|
+
repo: Repository = request.app.state.repo
|
|
347
|
+
window = int(request.query_params.get("window", "7"))
|
|
348
|
+
stats = repo.stats_by_workload(cfg.project, since_days=window)
|
|
349
|
+
return JSONResponse({"project": cfg.project, "window_days": window, "rows": stats})
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
async def _api_recommendations(request: Request) -> JSONResponse:
|
|
353
|
+
repo: Repository = request.app.state.repo
|
|
354
|
+
return JSONResponse({"recommendations": _list_recommendations(repo)})
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
async def _api_rec_dismiss(request: Request) -> JSONResponse:
|
|
358
|
+
repo: Repository = request.app.state.repo
|
|
359
|
+
rec_id = int(request.path_params["rec_id"])
|
|
360
|
+
with repo._open() as conn:
|
|
361
|
+
conn.execute(
|
|
362
|
+
"UPDATE recommendations SET dismissed_at = CURRENT_TIMESTAMP "
|
|
363
|
+
"WHERE id = ? AND dismissed_at IS NULL",
|
|
364
|
+
(rec_id,),
|
|
365
|
+
)
|
|
366
|
+
return JSONResponse({"ok": True, "id": rec_id})
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
async def _api_rec_apply(request: Request) -> JSONResponse:
|
|
370
|
+
repo: Repository = request.app.state.repo
|
|
371
|
+
rec_id = int(request.path_params["rec_id"])
|
|
372
|
+
with repo._open() as conn:
|
|
373
|
+
conn.execute(
|
|
374
|
+
"UPDATE recommendations SET applied_at = CURRENT_TIMESTAMP "
|
|
375
|
+
"WHERE id = ? AND applied_at IS NULL",
|
|
376
|
+
(rec_id,),
|
|
377
|
+
)
|
|
378
|
+
return JSONResponse({"ok": True, "id": rec_id})
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
async def _api_version(request: Request) -> JSONResponse:
|
|
382
|
+
cfg: Config = request.app.state.config
|
|
383
|
+
try:
|
|
384
|
+
with sqlite3.connect(cfg.db_path) as conn:
|
|
385
|
+
schema_ver = current_schema_version(conn)
|
|
386
|
+
except Exception:
|
|
387
|
+
schema_ver = 0
|
|
388
|
+
return JSONResponse(
|
|
389
|
+
{
|
|
390
|
+
"version": VERSION,
|
|
391
|
+
"schema_version": schema_ver,
|
|
392
|
+
"project": cfg.project,
|
|
393
|
+
}
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def create_app(config: Config | None = None) -> Starlette:
|
|
398
|
+
cfg = config or load_config()
|
|
399
|
+
repo = Repository(cfg.db_path)
|
|
400
|
+
app = Starlette(
|
|
401
|
+
debug=False,
|
|
402
|
+
routes=[
|
|
403
|
+
Route("/", _home),
|
|
404
|
+
Route("/health", _health),
|
|
405
|
+
Route("/api/stats", _api_stats),
|
|
406
|
+
Route("/api/version", _api_version),
|
|
407
|
+
Route("/api/recommendations", _api_recommendations),
|
|
408
|
+
Route("/api/recommendations/{rec_id:int}/dismiss", _api_rec_dismiss, methods=["POST"]),
|
|
409
|
+
Route("/api/recommendations/{rec_id:int}/apply", _api_rec_apply, methods=["POST"]),
|
|
410
|
+
Route("/v1/messages", messages_endpoint, methods=["POST"]),
|
|
411
|
+
],
|
|
412
|
+
)
|
|
413
|
+
app.state.config = cfg
|
|
414
|
+
app.state.repo = repo
|
|
415
|
+
return app
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
def _build_workers_factory(cfg: Config, repo: Repository):
|
|
419
|
+
"""Create a factory that returns a worker instance for a given job name."""
|
|
420
|
+
from somm.client import build_default_providers
|
|
421
|
+
|
|
422
|
+
from somm_service.workers import (
|
|
423
|
+
AgentWorker,
|
|
424
|
+
ModelIntelWorker,
|
|
425
|
+
ShadowEvalWorker,
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
def factory(job_name: str):
|
|
429
|
+
if job_name == "model_intel":
|
|
430
|
+
return ModelIntelWorker(repo, ollama_url=cfg.ollama_url)
|
|
431
|
+
if job_name == "shadow_eval":
|
|
432
|
+
# Same chain SommLLM builds — shadow grading can reach every
|
|
433
|
+
# provider the library can (gemini, deepseek, CLI executors, …).
|
|
434
|
+
return ShadowEvalWorker(repo, providers=build_default_providers(cfg))
|
|
435
|
+
if job_name == "agent":
|
|
436
|
+
return AgentWorker(repo)
|
|
437
|
+
return None
|
|
438
|
+
|
|
439
|
+
return factory
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
def start_inprocess_scheduler(cfg: Config, repo: Repository):
|
|
443
|
+
"""Start the background scheduler inside the current process.
|
|
444
|
+
|
|
445
|
+
This is what `somm serve` runs, minus the web server — for library-only
|
|
446
|
+
deployments that still want the intelligence loop (model-intel refresh,
|
|
447
|
+
online-eval grading, recommendations) without a dedicated service.
|
|
448
|
+
Enabled from the library via SOMM_INPROCESS_WORKERS=1. Returns the
|
|
449
|
+
running Scheduler; caller owns stop().
|
|
450
|
+
"""
|
|
451
|
+
from somm_service.workers import Scheduler
|
|
452
|
+
|
|
453
|
+
scheduler = Scheduler(repo, _build_workers_factory(cfg, repo))
|
|
454
|
+
scheduler.start()
|
|
455
|
+
return scheduler
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
def run_server(
|
|
459
|
+
config: Config | None = None,
|
|
460
|
+
host: str = "127.0.0.1",
|
|
461
|
+
port: int = 7878,
|
|
462
|
+
log_level: str = "info",
|
|
463
|
+
enable_scheduler: bool = True,
|
|
464
|
+
) -> None:
|
|
465
|
+
"""Run the web admin + API server. Starts the scheduler unless disabled."""
|
|
466
|
+
import uvicorn
|
|
467
|
+
|
|
468
|
+
app = create_app(config)
|
|
469
|
+
cfg: Config = app.state.config
|
|
470
|
+
repo: Repository = app.state.repo
|
|
471
|
+
|
|
472
|
+
if host not in ("127.0.0.1", "localhost", "::1"):
|
|
473
|
+
print(
|
|
474
|
+
"\n⚠️ somm serve is binding to a non-localhost address.\n"
|
|
475
|
+
" Trace data stays in plain SQLite files on disk.\n"
|
|
476
|
+
" Only do this if you know what you're doing.\n"
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
scheduler = None
|
|
480
|
+
if enable_scheduler:
|
|
481
|
+
from somm_service.workers import Scheduler
|
|
482
|
+
|
|
483
|
+
scheduler = Scheduler(repo, _build_workers_factory(cfg, repo))
|
|
484
|
+
scheduler.start()
|
|
485
|
+
app.state.scheduler = scheduler
|
|
486
|
+
|
|
487
|
+
try:
|
|
488
|
+
uvicorn.run(app, host=host, port=port, log_level=log_level)
|
|
489
|
+
finally:
|
|
490
|
+
if scheduler is not None:
|
|
491
|
+
scheduler.stop()
|