somm-service 0.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,38 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.egg-info/
6
+ *.egg
7
+ build/
8
+ dist/
9
+
10
+ # Environments
11
+ .venv/
12
+ .env
13
+ .env.*
14
+
15
+ # Tooling caches
16
+ .pytest_cache/
17
+ .mypy_cache/
18
+ .ruff_cache/
19
+
20
+ # Local Claude session id log (per-machine, not source of truth)
21
+ sessions.txt
22
+
23
+ # Local data (never commit telemetry)
24
+ .somm/
25
+ *.sqlite
26
+ *.sqlite-wal
27
+ *.sqlite-shm
28
+
29
+ # Author-local notes not for open source
30
+ notes/
31
+ .claude/
32
+
33
+ # Editor
34
+ .vscode/
35
+ .idea/
36
+ *.swp
37
+
38
+ # Archived internal design/process docs (see docs/BLUEPRINT.md for the public design doc)
@@ -0,0 +1,38 @@
1
+ Metadata-Version: 2.4
2
+ Name: somm-service
3
+ Version: 0.6.1
4
+ Summary: somm service — web admin, HTTP API, background workers
5
+ Project-URL: Homepage, https://github.com/lavallee/somm
6
+ Project-URL: Repository, https://github.com/lavallee/somm
7
+ Project-URL: Issues, https://github.com/lavallee/somm/issues
8
+ Project-URL: Changelog, https://github.com/lavallee/somm/blob/main/CHANGELOG.md
9
+ Author: Marc Lavallee
10
+ License: MIT
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Classifier: Topic :: Software Development :: Libraries
16
+ Requires-Python: >=3.12
17
+ Requires-Dist: litellm>=1.40
18
+ Requires-Dist: somm-core==0.6.1
19
+ Requires-Dist: somm==0.6.1
20
+ Requires-Dist: starlette>=0.40
21
+ Requires-Dist: uvicorn>=0.30
22
+ Description-Content-Type: text/markdown
23
+
24
+ # somm-service
25
+
26
+ The service tier for [somm](https://github.com/lavallee/somm) — the
27
+ self-hosted LLM telemetry, routing, and intelligence loop.
28
+
29
+ Adds the localhost web admin, the HTTP API, and the background
30
+ scheduler with three workers: model-intel refresh (pricing + context
31
+ windows + capabilities), online evaluation (samples production calls
32
+ and grades them against a gold model), and the agent worker (turns
33
+ telemetry + eval results into concrete recommendations).
34
+
35
+ ```bash
36
+ pip install somm somm-service
37
+ somm serve --project my_app # dashboard at localhost:7878
38
+ ```
@@ -0,0 +1,15 @@
1
+ # somm-service
2
+
3
+ The service tier for [somm](https://github.com/lavallee/somm) — the
4
+ self-hosted LLM telemetry, routing, and intelligence loop.
5
+
6
+ Adds the localhost web admin, the HTTP API, and the background
7
+ scheduler with three workers: model-intel refresh (pricing + context
8
+ windows + capabilities), online evaluation (samples production calls
9
+ and grades them against a gold model), and the agent worker (turns
10
+ telemetry + eval results into concrete recommendations).
11
+
12
+ ```bash
13
+ pip install somm somm-service
14
+ somm serve --project my_app # dashboard at localhost:7878
15
+ ```
@@ -0,0 +1,38 @@
1
+ [project]
2
+ name = "somm-service"
3
+ version = "0.6.1"
4
+ description = "somm service — web admin, HTTP API, background workers"
5
+ requires-python = ">=3.12"
6
+ license = { text = "MIT" }
7
+ readme = "README.md"
8
+ authors = [{ name = "Marc Lavallee" }]
9
+ classifiers = [
10
+ "Development Status :: 4 - Beta",
11
+ "License :: OSI Approved :: MIT License",
12
+ "Programming Language :: Python :: 3.12",
13
+ "Programming Language :: Python :: 3.13",
14
+ "Topic :: Software Development :: Libraries",
15
+ ]
16
+ dependencies = [
17
+ "somm-core==0.6.1",
18
+ "somm==0.6.1",
19
+ "litellm>=1.40",
20
+ "starlette>=0.40",
21
+ "uvicorn>=0.30",
22
+ ]
23
+
24
+ [project.scripts]
25
+ somm-serve = "somm_service.cli:main"
26
+
27
+ [project.urls]
28
+ Homepage = "https://github.com/lavallee/somm"
29
+ Repository = "https://github.com/lavallee/somm"
30
+ Issues = "https://github.com/lavallee/somm/issues"
31
+ Changelog = "https://github.com/lavallee/somm/blob/main/CHANGELOG.md"
32
+
33
+ [build-system]
34
+ requires = ["hatchling"]
35
+ build-backend = "hatchling.build"
36
+
37
+ [tool.hatch.build.targets.wheel]
38
+ packages = ["src/somm_service"]
@@ -0,0 +1,9 @@
1
+ """somm-service — web admin + HTTP API + background workers.
2
+
3
+ v0.1 ships the minimal web dashboard + /api/stats. Workers (model_intel,
4
+ shadow_eval, agent) come in D3+.
5
+ """
6
+
7
+ from somm_service.app import create_app, run_server
8
+
9
+ __all__ = ["create_app", "run_server"]
@@ -0,0 +1,491 @@
1
+ """Starlette app serving the web admin + HTTP API.
2
+
3
+ HTTP surface:
4
+ GET / HTML dashboard — status line + recs + stats
5
+ GET /health JSON liveness probe
6
+ GET /api/stats JSON roll-up (per-workload × provider × model)
7
+ GET /api/version JSON service + schema version
8
+ GET /api/recommendations JSON open recs
9
+ POST /api/recommendations/{id}/dismiss
10
+ POST /api/recommendations/{id}/apply
11
+ POST /v1/messages Anthropic Messages-compatible LLM proxy
12
+ (non-streaming v1; budget-gated; uses litellm
13
+ as a library; streaming + /v1/chat/completions
14
+ are explicit follow-ups)
15
+
16
+ Design tokens + a11y spec applied inline (v0.1 ships
17
+ tokens in-HTML; `packages/somm-service/web/tokens.css` lands when we extract).
18
+
19
+ `somm serve` also starts a Scheduler background thread that runs the
20
+ model_intel / shadow_eval / agent workers on their cadences.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import json
26
+ import sqlite3
27
+
28
+ from somm_core import VERSION
29
+ from somm_core.config import Config
30
+ from somm_core.config import load as load_config
31
+ from somm_core.repository import Repository
32
+ from somm_core.schema import current_schema_version
33
+ from starlette.applications import Starlette
34
+ from starlette.requests import Request
35
+ from starlette.responses import HTMLResponse, JSONResponse
36
+ from starlette.routing import Route
37
+
38
+ from somm_service.proxy import messages_endpoint
39
+
40
+ _HTML_SHELL = """<!DOCTYPE html>
41
+ <html lang="en">
42
+ <head>
43
+ <meta charset="utf-8">
44
+ <meta name="viewport" content="width=device-width,initial-scale=1">
45
+ <title>somm · {project}</title>
46
+ <style>
47
+ :root {{
48
+ --font-sans: Inter, system-ui, sans-serif;
49
+ --font-mono: 'JetBrains Mono', 'SF Mono', Consolas, monospace;
50
+ --bg: #0a0a0a; --bg-alt: #111111;
51
+ --fg: #e8e8e8; --fg-muted: #9ca3af;
52
+ --border: #27272a;
53
+ --ok: #059669; --warn: #d97706; --danger: #dc2626;
54
+ --accent: #818cf8;
55
+ --radius: 4px;
56
+ }}
57
+ @media (prefers-color-scheme: light) {{
58
+ :root {{ --bg:#fafafa; --bg-alt:#fff; --fg:#1a1a1a; --fg-muted:#6b7280; --border:#e5e7eb; }}
59
+ }}
60
+ * {{ box-sizing: border-box; }}
61
+ body {{ font-family: var(--font-sans); background: var(--bg); color: var(--fg);
62
+ margin: 0; padding: 24px 32px; line-height: 1.5; }}
63
+ a {{ color: var(--accent); }}
64
+ a:focus-visible, button:focus-visible {{ outline: 2px solid var(--accent); outline-offset: 2px; }}
65
+ header {{ display: flex; justify-content: space-between; align-items: baseline;
66
+ border-bottom: 1px solid var(--border); padding-bottom: 16px; margin-bottom: 24px; }}
67
+ header h1 {{ font-size: 20px; margin: 0; font-weight: 600; }}
68
+ header .meta {{ font-family: var(--font-mono); font-size: 12px; color: var(--fg-muted); }}
69
+ .status {{ font-size: 16px; padding: 16px; border: 1px solid var(--border);
70
+ border-radius: var(--radius); background: var(--bg-alt); margin-bottom: 24px; }}
71
+ .status strong {{ color: var(--ok); font-family: var(--font-mono); }}
72
+ .status.warn strong {{ color: var(--warn); }}
73
+ .status.err strong {{ color: var(--danger); }}
74
+ h2 {{ font-size: 14px; text-transform: uppercase; letter-spacing: 0.08em;
75
+ color: var(--fg-muted); margin: 24px 0 12px; }}
76
+ ol.recs {{ list-style: none; padding: 0; margin: 0; display: flex;
77
+ flex-direction: column; gap: 12px; }}
78
+ .rec {{ padding: 16px; border: 1px solid var(--border); border-radius: var(--radius);
79
+ background: var(--bg-alt); }}
80
+ .rec-head {{ display: flex; justify-content: space-between; align-items: baseline;
81
+ margin-bottom: 6px; }}
82
+ .rec-title {{ font-weight: 600; font-family: var(--font-mono); font-size: 13px; }}
83
+ .rec-conf {{ color: var(--fg-muted); font-size: 12px; font-family: var(--font-mono); }}
84
+ .rec-impact {{ color: var(--fg); font-size: 14px; margin-bottom: 8px; }}
85
+ .rec-evidence summary {{ color: var(--accent); cursor: pointer; font-size: 12px;
86
+ font-family: var(--font-mono); }}
87
+ .rec-evidence[open] summary {{ margin-bottom: 8px; }}
88
+ .evidence-tbl {{ margin-top: 4px; font-size: 12px; }}
89
+ .evidence-tbl th {{ color: var(--fg-muted); font-weight: 500; padding: 4px 10px; }}
90
+ .evidence-tbl td {{ padding: 4px 10px; border-bottom: 1px solid var(--border); }}
91
+ table {{ width: 100%; border-collapse: collapse; font-family: var(--font-mono); font-size: 13px; }}
92
+ th, td {{ text-align: left; padding: 8px 12px; border-bottom: 1px solid var(--border); }}
93
+ th {{ color: var(--fg-muted); font-weight: 500; }}
94
+ td.num {{ text-align: right; }}
95
+ .empty {{ padding: 16px; color: var(--fg-muted); font-style: italic; }}
96
+ footer {{ margin-top: 48px; color: var(--fg-muted); font-family: var(--font-mono);
97
+ font-size: 12px; }}
98
+ </style>
99
+ </head>
100
+ <body>
101
+ <header>
102
+ <h1>somm</h1>
103
+ <div class="meta">project: {project} · v{version} · schema v{schema} · {window}d window</div>
104
+ </header>
105
+
106
+ <section aria-label="System status" role="status" aria-live="polite">
107
+ <div class="status {status_class}">
108
+ <strong>{status_label}</strong> · {hero_line}
109
+ </div>
110
+ </section>
111
+
112
+ <section aria-label="Recommendations">
113
+ <h2>Top recommendations</h2>
114
+ {recs_html}
115
+ </section>
116
+
117
+ <section aria-label="Evidence">
118
+ <h2>Calls by workload</h2>
119
+ {table_html}
120
+ </section>
121
+
122
+ <footer>
123
+ somm is self-hosted. Binds <code>localhost</code> only by default. Data stays on disk.
124
+ <br>Endpoints: <a href="/health">/health</a> · <a href="/api/stats">/api/stats</a> · <a href="/api/version">/api/version</a>
125
+ </footer>
126
+ </body>
127
+ </html>
128
+ """
129
+
130
+
131
+ def _render_table(stats: list[dict]) -> str:
132
+ if not stats:
133
+ return '<div class="empty">No calls yet. Run <code>somm.llm().generate(...)</code> in your Python code.</div>'
134
+ rows = []
135
+ for s in stats:
136
+ rows.append(
137
+ "<tr>"
138
+ f"<td>{_esc(s['workload'])}</td>"
139
+ f"<td>{_esc(s['provider'])}</td>"
140
+ f"<td>{_esc(s['model'])}</td>"
141
+ f"<td class='num'>{s['n_calls']}</td>"
142
+ f"<td class='num'>{s['tokens_in'] or 0}</td>"
143
+ f"<td class='num'>{s['tokens_out'] or 0}</td>"
144
+ f"<td class='num'>{s['n_failed']}</td>"
145
+ "</tr>"
146
+ )
147
+ return (
148
+ "<table>"
149
+ "<thead><tr>"
150
+ "<th>workload</th><th>provider</th><th>model</th>"
151
+ "<th class='num'>calls</th><th class='num'>tok in</th>"
152
+ "<th class='num'>tok out</th><th class='num'>fail</th>"
153
+ "</tr></thead>"
154
+ f"<tbody>{''.join(rows)}</tbody>"
155
+ "</table>"
156
+ )
157
+
158
+
159
+ _ESC_MAP = {"&": "&amp;", "<": "&lt;", ">": "&gt;", '"': "&quot;", "'": "&#x27;"}
160
+
161
+
162
+ def _esc(s: str) -> str:
163
+ return "".join(_ESC_MAP.get(c, c) for c in str(s))
164
+
165
+
166
+ def _list_recommendations(repo: Repository) -> list[dict]:
167
+ """Open (undismissed, unapplied) recommendations, newest first."""
168
+ with repo._open() as conn:
169
+ rows = conn.execute(
170
+ "SELECT r.id, r.workload_id, w.name, r.action, r.evidence_json, "
171
+ " r.expected_impact, r.confidence, r.created_at "
172
+ "FROM recommendations r "
173
+ "LEFT JOIN workloads w ON w.id = r.workload_id "
174
+ "WHERE r.dismissed_at IS NULL AND r.applied_at IS NULL "
175
+ "ORDER BY r.created_at DESC LIMIT 10"
176
+ ).fetchall()
177
+ out = []
178
+ for r in rows:
179
+ try:
180
+ evidence = json.loads(r[4]) if r[4] else {}
181
+ except json.JSONDecodeError:
182
+ evidence = {}
183
+ out.append(
184
+ {
185
+ "id": r[0],
186
+ "workload_id": r[1],
187
+ "workload": r[2] or "(unknown)",
188
+ "action": r[3],
189
+ "evidence": evidence,
190
+ "expected_impact": r[5] or "",
191
+ "confidence": r[6] or 0,
192
+ "created_at": r[7],
193
+ }
194
+ )
195
+ return out
196
+
197
+
198
+ def _render_recommendations(recs: list[dict]) -> str:
199
+ if not recs:
200
+ return (
201
+ '<div class="empty">No recommendations yet. '
202
+ "Agent runs weekly; needs shadow-eval data or model_intel deltas first.</div>"
203
+ )
204
+ items = []
205
+ for r in recs:
206
+ title = _esc(r["workload"]) + " · " + _esc(r["action"])
207
+ impact = _esc(r["expected_impact"])
208
+ confidence = f"{r['confidence']:.0%}"
209
+ evidence_detail = _render_evidence(r["action"], r["evidence"])
210
+ items.append(
211
+ f'<li class="rec">'
212
+ f' <div class="rec-head">'
213
+ f' <span class="rec-title">{title}</span>'
214
+ f' <span class="rec-conf">confidence {_esc(confidence)}</span>'
215
+ f" </div>"
216
+ f' <div class="rec-impact">{impact}</div>'
217
+ f' <details class="rec-evidence">'
218
+ f" <summary>evidence</summary>{evidence_detail}"
219
+ f" </details>"
220
+ f"</li>"
221
+ )
222
+ return f'<ol class="recs" aria-live="polite">{"".join(items)}</ol>'
223
+
224
+
225
+ def _render_evidence(action: str, evidence: dict) -> str:
226
+ if action == "switch_model":
227
+ cur = evidence.get("current", {})
228
+ cand = evidence.get("candidate", {})
229
+ rows = [
230
+ ("", "current", "candidate"),
231
+ ("provider", _esc(cur.get("provider", "")), _esc(cand.get("provider", ""))),
232
+ ("model", _esc(cur.get("model", "")), _esc(cand.get("model", ""))),
233
+ ("quality", _esc(str(cur.get("score", ""))), _esc(str(cand.get("score", "")))),
234
+ ("cost_usd", _esc(str(cur.get("cost_usd", ""))), _esc(str(cand.get("cost_usd", "")))),
235
+ (
236
+ "latency_ms",
237
+ _esc(str(cur.get("latency_ms", ""))),
238
+ _esc(str(cand.get("latency_ms", ""))),
239
+ ),
240
+ ]
241
+ return _evidence_table(rows)
242
+ if action == "new_model_landed":
243
+ cur = evidence.get("current", {})
244
+ cand = evidence.get("candidate", {})
245
+ rows = [
246
+ ("", "current", "candidate"),
247
+ ("provider", _esc(cur.get("provider", "")), _esc(cand.get("provider", ""))),
248
+ ("model", _esc(cur.get("model", "")), _esc(cand.get("model", ""))),
249
+ (
250
+ "in $/1M",
251
+ _esc(str(cur.get("price_in_per_1m", ""))),
252
+ _esc(str(cand.get("price_in_per_1m", ""))),
253
+ ),
254
+ (
255
+ "out $/1M",
256
+ _esc(str(cur.get("price_out_per_1m", ""))),
257
+ _esc(str(cand.get("price_out_per_1m", ""))),
258
+ ),
259
+ ]
260
+ return _evidence_table(rows)
261
+ if action == "chronic_cooldown":
262
+ return (
263
+ "<p>"
264
+ f"provider <code>{_esc(evidence.get('provider', ''))}</code> hit "
265
+ f"circuit-break on {_esc(str(evidence.get('n_calls', '')))} calls. "
266
+ f"{_esc(evidence.get('note', ''))}"
267
+ "</p>"
268
+ )
269
+ return f"<pre>{_esc(json.dumps(evidence, indent=2, sort_keys=True))}</pre>"
270
+
271
+
272
+ def _evidence_table(rows: list[tuple]) -> str:
273
+ head = rows[0]
274
+ body = rows[1:]
275
+ thead = "".join(f"<th>{_esc(c)}</th>" for c in head)
276
+ tbody = "".join("<tr>" + "".join(f"<td>{_esc(c)}</td>" for c in row) + "</tr>" for row in body)
277
+ return (
278
+ f'<table class="evidence-tbl"><thead><tr>{thead}</tr></thead><tbody>{tbody}</tbody></table>'
279
+ )
280
+
281
+
282
+ async def _home(request: Request) -> HTMLResponse:
283
+ cfg: Config = request.app.state.config
284
+ repo: Repository = request.app.state.repo
285
+ window = int(request.query_params.get("window", "7"))
286
+ stats = repo.stats_by_workload(cfg.project, since_days=window)
287
+
288
+ total_calls = sum(s["n_calls"] for s in stats)
289
+ total_failed = sum(s["n_failed"] for s in stats)
290
+
291
+ if total_calls == 0:
292
+ status_class, status_label, hero = (
293
+ "",
294
+ "NO DATA YET",
295
+ f"run somm.llm(project={cfg.project!r}).generate(...) to get started",
296
+ )
297
+ elif total_failed == 0:
298
+ status_class, status_label, hero = (
299
+ "",
300
+ "HEALTHY",
301
+ f"{total_calls} calls · 0 failed · {len(stats)} workload(s) active",
302
+ )
303
+ else:
304
+ pct = 100 * total_failed / total_calls
305
+ status_class, status_label, hero = (
306
+ "warn" if pct < 20 else "err",
307
+ "NEEDS ATTENTION",
308
+ f"{total_calls} calls · {total_failed} failed ({pct:.1f}%)",
309
+ )
310
+
311
+ try:
312
+ with sqlite3.connect(cfg.db_path) as conn:
313
+ schema_ver = current_schema_version(conn)
314
+ except Exception:
315
+ schema_ver = 0
316
+
317
+ recs = _list_recommendations(repo)
318
+ html = _HTML_SHELL.format(
319
+ project=_esc(cfg.project),
320
+ version=_esc(VERSION),
321
+ schema=schema_ver,
322
+ window=window,
323
+ status_class=status_class,
324
+ status_label=_esc(status_label),
325
+ hero_line=_esc(hero),
326
+ recs_html=_render_recommendations(recs),
327
+ table_html=_render_table(stats),
328
+ )
329
+ return HTMLResponse(html)
330
+
331
+
332
+ async def _health(request: Request) -> JSONResponse:
333
+ cfg: Config = request.app.state.config
334
+ return JSONResponse(
335
+ {
336
+ "ok": True,
337
+ "project": cfg.project,
338
+ "db_path": str(cfg.db_path),
339
+ "db_exists": cfg.db_path.exists(),
340
+ }
341
+ )
342
+
343
+
344
+ async def _api_stats(request: Request) -> JSONResponse:
345
+ cfg: Config = request.app.state.config
346
+ repo: Repository = request.app.state.repo
347
+ window = int(request.query_params.get("window", "7"))
348
+ stats = repo.stats_by_workload(cfg.project, since_days=window)
349
+ return JSONResponse({"project": cfg.project, "window_days": window, "rows": stats})
350
+
351
+
352
+ async def _api_recommendations(request: Request) -> JSONResponse:
353
+ repo: Repository = request.app.state.repo
354
+ return JSONResponse({"recommendations": _list_recommendations(repo)})
355
+
356
+
357
+ async def _api_rec_dismiss(request: Request) -> JSONResponse:
358
+ repo: Repository = request.app.state.repo
359
+ rec_id = int(request.path_params["rec_id"])
360
+ with repo._open() as conn:
361
+ conn.execute(
362
+ "UPDATE recommendations SET dismissed_at = CURRENT_TIMESTAMP "
363
+ "WHERE id = ? AND dismissed_at IS NULL",
364
+ (rec_id,),
365
+ )
366
+ return JSONResponse({"ok": True, "id": rec_id})
367
+
368
+
369
+ async def _api_rec_apply(request: Request) -> JSONResponse:
370
+ repo: Repository = request.app.state.repo
371
+ rec_id = int(request.path_params["rec_id"])
372
+ with repo._open() as conn:
373
+ conn.execute(
374
+ "UPDATE recommendations SET applied_at = CURRENT_TIMESTAMP "
375
+ "WHERE id = ? AND applied_at IS NULL",
376
+ (rec_id,),
377
+ )
378
+ return JSONResponse({"ok": True, "id": rec_id})
379
+
380
+
381
+ async def _api_version(request: Request) -> JSONResponse:
382
+ cfg: Config = request.app.state.config
383
+ try:
384
+ with sqlite3.connect(cfg.db_path) as conn:
385
+ schema_ver = current_schema_version(conn)
386
+ except Exception:
387
+ schema_ver = 0
388
+ return JSONResponse(
389
+ {
390
+ "version": VERSION,
391
+ "schema_version": schema_ver,
392
+ "project": cfg.project,
393
+ }
394
+ )
395
+
396
+
397
+ def create_app(config: Config | None = None) -> Starlette:
398
+ cfg = config or load_config()
399
+ repo = Repository(cfg.db_path)
400
+ app = Starlette(
401
+ debug=False,
402
+ routes=[
403
+ Route("/", _home),
404
+ Route("/health", _health),
405
+ Route("/api/stats", _api_stats),
406
+ Route("/api/version", _api_version),
407
+ Route("/api/recommendations", _api_recommendations),
408
+ Route("/api/recommendations/{rec_id:int}/dismiss", _api_rec_dismiss, methods=["POST"]),
409
+ Route("/api/recommendations/{rec_id:int}/apply", _api_rec_apply, methods=["POST"]),
410
+ Route("/v1/messages", messages_endpoint, methods=["POST"]),
411
+ ],
412
+ )
413
+ app.state.config = cfg
414
+ app.state.repo = repo
415
+ return app
416
+
417
+
418
+ def _build_workers_factory(cfg: Config, repo: Repository):
419
+ """Create a factory that returns a worker instance for a given job name."""
420
+ from somm.client import build_default_providers
421
+
422
+ from somm_service.workers import (
423
+ AgentWorker,
424
+ ModelIntelWorker,
425
+ ShadowEvalWorker,
426
+ )
427
+
428
+ def factory(job_name: str):
429
+ if job_name == "model_intel":
430
+ return ModelIntelWorker(repo, ollama_url=cfg.ollama_url)
431
+ if job_name == "shadow_eval":
432
+ # Same chain SommLLM builds — shadow grading can reach every
433
+ # provider the library can (gemini, deepseek, CLI executors, …).
434
+ return ShadowEvalWorker(repo, providers=build_default_providers(cfg))
435
+ if job_name == "agent":
436
+ return AgentWorker(repo)
437
+ return None
438
+
439
+ return factory
440
+
441
+
442
+ def start_inprocess_scheduler(cfg: Config, repo: Repository):
443
+ """Start the background scheduler inside the current process.
444
+
445
+ This is what `somm serve` runs, minus the web server — for library-only
446
+ deployments that still want the intelligence loop (model-intel refresh,
447
+ online-eval grading, recommendations) without a dedicated service.
448
+ Enabled from the library via SOMM_INPROCESS_WORKERS=1. Returns the
449
+ running Scheduler; caller owns stop().
450
+ """
451
+ from somm_service.workers import Scheduler
452
+
453
+ scheduler = Scheduler(repo, _build_workers_factory(cfg, repo))
454
+ scheduler.start()
455
+ return scheduler
456
+
457
+
458
+ def run_server(
459
+ config: Config | None = None,
460
+ host: str = "127.0.0.1",
461
+ port: int = 7878,
462
+ log_level: str = "info",
463
+ enable_scheduler: bool = True,
464
+ ) -> None:
465
+ """Run the web admin + API server. Starts the scheduler unless disabled."""
466
+ import uvicorn
467
+
468
+ app = create_app(config)
469
+ cfg: Config = app.state.config
470
+ repo: Repository = app.state.repo
471
+
472
+ if host not in ("127.0.0.1", "localhost", "::1"):
473
+ print(
474
+ "\n⚠️ somm serve is binding to a non-localhost address.\n"
475
+ " Trace data stays in plain SQLite files on disk.\n"
476
+ " Only do this if you know what you're doing.\n"
477
+ )
478
+
479
+ scheduler = None
480
+ if enable_scheduler:
481
+ from somm_service.workers import Scheduler
482
+
483
+ scheduler = Scheduler(repo, _build_workers_factory(cfg, repo))
484
+ scheduler.start()
485
+ app.state.scheduler = scheduler
486
+
487
+ try:
488
+ uvicorn.run(app, host=host, port=port, log_level=log_level)
489
+ finally:
490
+ if scheduler is not None:
491
+ scheduler.stop()