switchboard-local 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- switchboard/__init__.py +8 -0
- switchboard/app/__init__.py +1 -0
- switchboard/app/api/__init__.py +1 -0
- switchboard/app/api/admin.py +54 -0
- switchboard/app/api/chat.py +22 -0
- switchboard/app/api/health.py +18 -0
- switchboard/app/api/personal.py +113 -0
- switchboard/app/api/ui.py +387 -0
- switchboard/app/backends/__init__.py +12 -0
- switchboard/app/backends/base.py +27 -0
- switchboard/app/backends/cli_agents.py +224 -0
- switchboard/app/backends/ollama_backend.py +124 -0
- switchboard/app/backends/registry.py +61 -0
- switchboard/app/core/__init__.py +1 -0
- switchboard/app/core/config.py +82 -0
- switchboard/app/core/errors.py +28 -0
- switchboard/app/core/logging.py +10 -0
- switchboard/app/main.py +66 -0
- switchboard/app/models/__init__.py +1 -0
- switchboard/app/models/api.py +48 -0
- switchboard/app/models/backends.py +80 -0
- switchboard/app/models/capabilities.py +67 -0
- switchboard/app/models/catalogue.py +107 -0
- switchboard/app/models/internal.py +98 -0
- switchboard/app/models/personal.py +303 -0
- switchboard/app/models/policy.py +72 -0
- switchboard/app/models/sessions.py +50 -0
- switchboard/app/models/telemetry.py +271 -0
- switchboard/app/providers/__init__.py +1 -0
- switchboard/app/providers/anthropic_provider.py +66 -0
- switchboard/app/providers/base.py +29 -0
- switchboard/app/providers/lmstudio.py +57 -0
- switchboard/app/providers/manual.py +27 -0
- switchboard/app/providers/mock.py +37 -0
- switchboard/app/providers/ollama.py +49 -0
- switchboard/app/providers/openai_provider.py +54 -0
- switchboard/app/providers/registry.py +38 -0
- switchboard/app/services/__init__.py +1 -0
- switchboard/app/services/answer_quality.py +284 -0
- switchboard/app/services/capabilities.py +451 -0
- switchboard/app/services/chat_completion.py +140 -0
- switchboard/app/services/classifier.py +733 -0
- switchboard/app/services/compression_layer.py +96 -0
- switchboard/app/services/container.py +86 -0
- switchboard/app/services/context_compression.py +149 -0
- switchboard/app/services/core_factory.py +166 -0
- switchboard/app/services/cost.py +36 -0
- switchboard/app/services/deterministic_tools.py +284 -0
- switchboard/app/services/finance_providers.py +230 -0
- switchboard/app/services/finance_tool.py +217 -0
- switchboard/app/services/learned_router.py +211 -0
- switchboard/app/services/llm_router.py +175 -0
- switchboard/app/services/local_runtime.py +165 -0
- switchboard/app/services/news_tool.py +218 -0
- switchboard/app/services/personal_switchboard.py +1338 -0
- switchboard/app/services/policy_engine.py +109 -0
- switchboard/app/services/provider_status.py +20 -0
- switchboard/app/services/response_sanitizer.py +133 -0
- switchboard/app/services/router.py +224 -0
- switchboard/app/services/runtime_context.py +70 -0
- switchboard/app/services/semantic_memory.py +240 -0
- switchboard/app/services/sensitivity_escalator.py +128 -0
- switchboard/app/services/session_context.py +199 -0
- switchboard/app/services/status_intents.py +56 -0
- switchboard/app/services/switchboard_core.py +1301 -0
- switchboard/app/services/telemetry.py +80 -0
- switchboard/app/services/tool_dispatcher.py +170 -0
- switchboard/app/services/tools.py +319 -0
- switchboard/app/services/web_search_providers.py +93 -0
- switchboard/app/services/web_search_tool.py +144 -0
- switchboard/app/storage/__init__.py +1 -0
- switchboard/app/storage/db.py +71 -0
- switchboard/app/storage/repositories.py +669 -0
- switchboard/app/utils/__init__.py +1 -0
- switchboard/app/utils/ids.py +7 -0
- switchboard/app/utils/redaction.py +37 -0
- switchboard/app/utils/secret_patterns.py +95 -0
- switchboard/app/utils/time.py +11 -0
- switchboard/cli.py +1484 -0
- switchboard/config/__init__.py +1 -0
- switchboard/config/models.yaml +295 -0
- switchboard/config/personal.example.yaml +117 -0
- switchboard/config/personal.yaml +117 -0
- switchboard/config/policies.yaml +49 -0
- switchboard/config/router_weights.json +4645 -0
- switchboard/config/sensitivity_weights.json +3101 -0
- switchboard/config/tool_dispatcher_weights.json +7733 -0
- switchboard/evals/__init__.py +12 -0
- switchboard/evals/datasets.py +864 -0
- switchboard/evals/mock_adapters.py +129 -0
- switchboard/evals/quality_bench.py +501 -0
- switchboard/evals/quality_dataset.py +1776 -0
- switchboard/evals/real_providers.py +183 -0
- switchboard/evals/real_smoke.py +473 -0
- switchboard/evals/reports.py +146 -0
- switchboard/evals/runner.py +374 -0
- switchboard/evals/scorers.py +47 -0
- switchboard/evals/types.py +155 -0
- switchboard/training/__init__.py +1 -0
- switchboard/training/augment.py +104 -0
- switchboard/training/external_datasets.py +253 -0
- switchboard/training/feedback_loop.py +458 -0
- switchboard/training/router_dataset.py +480 -0
- switchboard/training/sensitivity_dataset.py +166 -0
- switchboard/training/tool_dispatcher_dataset.py +224 -0
- switchboard/training/train_router.py +258 -0
- switchboard_local-0.1.0.dist-info/METADATA +270 -0
- switchboard_local-0.1.0.dist-info/RECORD +112 -0
- switchboard_local-0.1.0.dist-info/WHEEL +5 -0
- switchboard_local-0.1.0.dist-info/entry_points.txt +3 -0
- switchboard_local-0.1.0.dist-info/licenses/LICENSE +21 -0
- switchboard_local-0.1.0.dist-info/top_level.txt +1 -0
switchboard/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""FastAPI application package."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""API routers."""
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from fastapi import APIRouter, HTTPException, Query, Request, status
|
|
4
|
+
|
|
5
|
+
from switchboard.app.models.catalogue import ModelProfile
|
|
6
|
+
from switchboard.app.models.telemetry import TelemetryRead
|
|
7
|
+
from switchboard.app.services.container import ServiceContainer
|
|
8
|
+
|
|
9
|
+
router = APIRouter(prefix="/admin", tags=["admin"])
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@router.get("/models", response_model=list[ModelProfile])
|
|
13
|
+
async def list_models(request: Request) -> list[ModelProfile]:
|
|
14
|
+
container: ServiceContainer = request.app.state.container
|
|
15
|
+
return container.catalogue.models
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@router.get("/requests", response_model=list[TelemetryRead])
|
|
19
|
+
async def list_requests(
|
|
20
|
+
request: Request,
|
|
21
|
+
limit: int = Query(default=100, ge=1, le=1000),
|
|
22
|
+
) -> list[TelemetryRead]:
|
|
23
|
+
container: ServiceContainer = request.app.state.container
|
|
24
|
+
return container.telemetry.repository.list(limit=limit)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@router.get("/requests/{request_id}", response_model=TelemetryRead)
|
|
28
|
+
async def get_request(request_id: str, request: Request) -> TelemetryRead:
|
|
29
|
+
container: ServiceContainer = request.app.state.container
|
|
30
|
+
record = container.telemetry.repository.get(request_id)
|
|
31
|
+
if record is None:
|
|
32
|
+
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="request not found")
|
|
33
|
+
return record
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@router.get("/metrics/summary")
|
|
37
|
+
async def metrics_summary(request: Request) -> dict[str, object]:
|
|
38
|
+
container: ServiceContainer = request.app.state.container
|
|
39
|
+
return container.telemetry.repository.summary()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@router.get("/metrics/savings")
|
|
43
|
+
async def metrics_savings(request: Request) -> dict[str, object]:
|
|
44
|
+
container: ServiceContainer = request.app.state.container
|
|
45
|
+
summary = container.telemetry.repository.summary()
|
|
46
|
+
baseline_model = container.catalogue.frontier_baseline()
|
|
47
|
+
return {
|
|
48
|
+
"baseline": "everything_goes_to_frontier_model",
|
|
49
|
+
"baseline_model_id": baseline_model.model_id,
|
|
50
|
+
"estimated_total_cost_usd": summary["estimated_total_cost_usd"],
|
|
51
|
+
"estimated_baseline_cost_usd": summary["estimated_baseline_cost_usd"],
|
|
52
|
+
"estimated_savings_usd": summary["estimated_savings_usd"],
|
|
53
|
+
"total_requests": summary["total_requests"],
|
|
54
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from fastapi import APIRouter, Request
|
|
4
|
+
|
|
5
|
+
from switchboard.app.core.errors import streaming_not_implemented
|
|
6
|
+
from switchboard.app.models.api import ChatCompletionRequest, ChatCompletionResponse
|
|
7
|
+
from switchboard.app.services.chat_completion import ChatCompletionService
|
|
8
|
+
from switchboard.app.services.container import ServiceContainer
|
|
9
|
+
|
|
10
|
+
router = APIRouter(prefix="/v1", tags=["chat"])
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@router.post("/chat/completions", response_model=ChatCompletionResponse)
|
|
14
|
+
async def create_chat_completion(
|
|
15
|
+
payload: ChatCompletionRequest,
|
|
16
|
+
request: Request,
|
|
17
|
+
) -> ChatCompletionResponse:
|
|
18
|
+
if payload.stream:
|
|
19
|
+
raise streaming_not_implemented()
|
|
20
|
+
|
|
21
|
+
container: ServiceContainer = request.app.state.container
|
|
22
|
+
return await ChatCompletionService(container).complete(payload)
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from fastapi import APIRouter, Request
|
|
4
|
+
|
|
5
|
+
from switchboard.app.services.container import ServiceContainer
|
|
6
|
+
|
|
7
|
+
router = APIRouter(tags=["health"])
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@router.get("/health")
|
|
11
|
+
async def health(request: Request) -> dict[str, object]:
|
|
12
|
+
container: ServiceContainer = request.app.state.container
|
|
13
|
+
return {
|
|
14
|
+
"status": "ok",
|
|
15
|
+
"product": "Switchboard",
|
|
16
|
+
"environment": container.settings.environment,
|
|
17
|
+
"enabled_models": len(container.catalogue.enabled_models()),
|
|
18
|
+
}
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
|
|
5
|
+
from fastapi import APIRouter, HTTPException, Query, Request
|
|
6
|
+
|
|
7
|
+
from switchboard.app.models.personal import (
|
|
8
|
+
FeedbackCreate,
|
|
9
|
+
FeedbackRead,
|
|
10
|
+
PersonalAskResponse,
|
|
11
|
+
PersonalMemoryCreate,
|
|
12
|
+
PersonalMemoryRead,
|
|
13
|
+
PersonalModelRead,
|
|
14
|
+
PersonalPromptRequest,
|
|
15
|
+
PersonalRouteResponse,
|
|
16
|
+
)
|
|
17
|
+
from switchboard.app.models.telemetry import PersonalTelemetryRead
|
|
18
|
+
from switchboard.app.services.container import ServiceContainer
|
|
19
|
+
from switchboard.app.services.personal_switchboard import (
|
|
20
|
+
PersonalRoutingError,
|
|
21
|
+
PersonalSwitchboardService,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
router = APIRouter(prefix="/personal", tags=["personal"])
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def personal_service(request: Request) -> PersonalSwitchboardService:
|
|
28
|
+
container: ServiceContainer = request.app.state.container
|
|
29
|
+
return PersonalSwitchboardService(container)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@router.get("/health")
|
|
33
|
+
async def personal_health(request: Request) -> dict[str, object]:
|
|
34
|
+
container: ServiceContainer = request.app.state.container
|
|
35
|
+
return {
|
|
36
|
+
"status": "ok",
|
|
37
|
+
"product": "Switchboard",
|
|
38
|
+
"environment": container.settings.environment,
|
|
39
|
+
"user_id": container.personal_config.profile.user_id,
|
|
40
|
+
"default_project": container.personal_config.profile.default_project,
|
|
41
|
+
"local_first": container.personal_config.preferences.local_first,
|
|
42
|
+
"allow_cloud": container.personal_config.preferences.allow_cloud,
|
|
43
|
+
"private_mode": container.personal_config.preferences.private_mode,
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@router.post("/route", response_model=PersonalRouteResponse)
|
|
48
|
+
async def route_prompt(payload: PersonalPromptRequest, request: Request) -> PersonalRouteResponse:
|
|
49
|
+
try:
|
|
50
|
+
return personal_service(request).route(payload)
|
|
51
|
+
except PersonalRoutingError as exc:
|
|
52
|
+
raise HTTPException(
|
|
53
|
+
status_code=400,
|
|
54
|
+
detail={"code": "PERSONAL_ROUTING_ERROR", "message": str(exc)},
|
|
55
|
+
) from exc
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@router.post("/ask", response_model=PersonalAskResponse)
|
|
59
|
+
async def ask_prompt(payload: PersonalPromptRequest, request: Request) -> PersonalAskResponse:
|
|
60
|
+
try:
|
|
61
|
+
return await personal_service(request).ask(payload)
|
|
62
|
+
except PersonalRoutingError as exc:
|
|
63
|
+
raise HTTPException(
|
|
64
|
+
status_code=400,
|
|
65
|
+
detail={"code": "PERSONAL_ROUTING_ERROR", "message": str(exc)},
|
|
66
|
+
) from exc
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@router.get("/models", response_model=list[PersonalModelRead])
|
|
70
|
+
async def list_personal_models(request: Request) -> list[PersonalModelRead]:
|
|
71
|
+
return personal_service(request).models()
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@router.get("/usage")
|
|
75
|
+
async def usage(request: Request) -> dict[str, object]:
|
|
76
|
+
return personal_service(request).usage()
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@router.get("/savings")
|
|
80
|
+
async def savings(
|
|
81
|
+
request: Request,
|
|
82
|
+
days: int = Query(default=7, ge=1, le=365),
|
|
83
|
+
since: str | None = None,
|
|
84
|
+
) -> dict[str, object]:
|
|
85
|
+
since_dt = datetime.fromisoformat(since) if since else None
|
|
86
|
+
return personal_service(request).savings(days=None if since_dt else days, since=since_dt)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@router.get("/history", response_model=list[PersonalTelemetryRead])
|
|
90
|
+
async def history(
|
|
91
|
+
request: Request,
|
|
92
|
+
limit: int = Query(default=100, ge=1, le=1000),
|
|
93
|
+
) -> list[PersonalTelemetryRead]:
|
|
94
|
+
return personal_service(request).history(limit=limit)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@router.post("/memory", response_model=PersonalMemoryRead)
|
|
98
|
+
async def add_memory(payload: PersonalMemoryCreate, request: Request) -> PersonalMemoryRead:
|
|
99
|
+
return personal_service(request).add_memory(payload)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@router.get("/memory/search", response_model=list[PersonalMemoryRead])
|
|
103
|
+
async def search_memory(
|
|
104
|
+
request: Request,
|
|
105
|
+
q: str,
|
|
106
|
+
project: str | None = None,
|
|
107
|
+
) -> list[PersonalMemoryRead]:
|
|
108
|
+
return personal_service(request).search_memory(q, project=project)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@router.post("/feedback", response_model=FeedbackRead)
|
|
112
|
+
async def add_feedback(payload: FeedbackCreate, request: Request) -> FeedbackRead:
|
|
113
|
+
return personal_service(request).add_feedback(payload)
|
|
@@ -0,0 +1,387 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from collections.abc import Iterator
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from fastapi import APIRouter, HTTPException, Request, status
|
|
8
|
+
from fastapi.responses import StreamingResponse
|
|
9
|
+
from pydantic import BaseModel, Field
|
|
10
|
+
|
|
11
|
+
from switchboard.app.models.backends import SwitchboardResponse, backend_display_name
|
|
12
|
+
from switchboard.app.models.personal import FeedbackCreate, FeedbackRead
|
|
13
|
+
from switchboard.app.services.container import ServiceContainer
|
|
14
|
+
from switchboard.app.services.core_factory import build_configured_core_service
|
|
15
|
+
from switchboard.app.services.personal_switchboard import PersonalSwitchboardService
|
|
16
|
+
from switchboard.app.services.switchboard_core import SwitchboardCoreService
|
|
17
|
+
|
|
18
|
+
router = APIRouter(tags=["ui"])
|
|
19
|
+
|
|
20
|
+
BACKEND_BY_UI_VALUE: dict[str, str | None] = {
|
|
21
|
+
"auto": None,
|
|
22
|
+
"codex": "codex",
|
|
23
|
+
"claude": "claude-code",
|
|
24
|
+
"ollama": "ollama",
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
UI_VALUE_BY_BACKEND = {
|
|
28
|
+
"codex": "codex",
|
|
29
|
+
"claude-code": "claude",
|
|
30
|
+
"ollama": "ollama",
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class UiChatRequest(BaseModel):
|
|
35
|
+
message: str = Field(min_length=1)
|
|
36
|
+
backend: str = "auto"
|
|
37
|
+
session_id: str | None = None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class UiChatResponse(BaseModel):
|
|
41
|
+
session_id: str
|
|
42
|
+
answer: str
|
|
43
|
+
backend: str
|
|
44
|
+
display_model: str
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class UiHistoryMessage(BaseModel):
|
|
48
|
+
message_id: str
|
|
49
|
+
role: str
|
|
50
|
+
content: str
|
|
51
|
+
display_model: str | None = None
|
|
52
|
+
backend: str | None = None
|
|
53
|
+
request_id: str | None = None
|
|
54
|
+
created_at: str
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class UiHistoryResponse(BaseModel):
|
|
58
|
+
session_id: str
|
|
59
|
+
messages: list[UiHistoryMessage]
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class UiFeedbackRequest(BaseModel):
|
|
63
|
+
request_id: str = Field(min_length=1)
|
|
64
|
+
rating: str = Field(min_length=1)
|
|
65
|
+
note: str | None = None
|
|
66
|
+
# Thumbs-down disambiguation: "bad_answer" or "wrong_model".
|
|
67
|
+
detail: str | None = None
|
|
68
|
+
corrected_backend: str | None = None # ollama | codex | claude-code
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def core_service(request: Request) -> SwitchboardCoreService:
|
|
72
|
+
container: ServiceContainer = request.app.state.container
|
|
73
|
+
return build_configured_core_service(container, cwd=Path.cwd())
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def ui_backend_name(backend: str) -> str:
|
|
77
|
+
return UI_VALUE_BY_BACKEND.get(backend, backend)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def display_model_name(backend: str) -> str:
|
|
81
|
+
return backend_display_name(backend)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def response_display_model_name(response: SwitchboardResponse) -> str:
|
|
85
|
+
if response.backend in {"switchboard", "time"} and response.selected_model:
|
|
86
|
+
return response.selected_model
|
|
87
|
+
return display_model_name(response.backend)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def clean_backend_error(response: SwitchboardResponse) -> str:
|
|
91
|
+
backend = ui_backend_name(response.backend)
|
|
92
|
+
display_name = {
|
|
93
|
+
"codex": "Codex",
|
|
94
|
+
"claude": "Claude",
|
|
95
|
+
"ollama": "Ollama",
|
|
96
|
+
}.get(backend, "The selected backend")
|
|
97
|
+
raw_error = response.error_message or ""
|
|
98
|
+
lower_error = raw_error.lower()
|
|
99
|
+
|
|
100
|
+
if "private mode" in lower_error or "sensitive content" in lower_error:
|
|
101
|
+
return (
|
|
102
|
+
"Private mode blocked this request for the selected model. "
|
|
103
|
+
"Choose Ollama or redact sensitive details."
|
|
104
|
+
)
|
|
105
|
+
if "timed out" in lower_error:
|
|
106
|
+
return (
|
|
107
|
+
f"{display_name} timed out. Try a shorter prompt, increase the timeout from "
|
|
108
|
+
"the CLI, or choose another model."
|
|
109
|
+
)
|
|
110
|
+
if "unavailable" in lower_error or "not found" in lower_error:
|
|
111
|
+
if backend == "ollama":
|
|
112
|
+
return "Ollama is not running. Start Ollama or choose another model."
|
|
113
|
+
if backend == "codex":
|
|
114
|
+
return (
|
|
115
|
+
"Codex is not available. Please install and authenticate Codex, "
|
|
116
|
+
"or choose another model."
|
|
117
|
+
)
|
|
118
|
+
if backend == "claude":
|
|
119
|
+
return (
|
|
120
|
+
"Claude is not available. Please install and authenticate Claude Code, "
|
|
121
|
+
"or choose another model."
|
|
122
|
+
)
|
|
123
|
+
return f"{display_name} is not available. Choose another model."
|
|
124
|
+
if "no enabled ollama chat model" in lower_error:
|
|
125
|
+
return "Ollama has no enabled chat model. Install or enable a chat model first."
|
|
126
|
+
if "no configured switchboard model" in lower_error:
|
|
127
|
+
return "No Switchboard model is available. Install Codex, Claude Code, or Ollama."
|
|
128
|
+
return "Something went wrong. Please try again or choose another model."
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def validated_message_and_backend(payload: UiChatRequest) -> tuple[str, str]:
|
|
132
|
+
message = payload.message.strip()
|
|
133
|
+
if not message:
|
|
134
|
+
raise HTTPException(
|
|
135
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
136
|
+
detail={"message": "Enter a message before sending."},
|
|
137
|
+
)
|
|
138
|
+
selected_backend = payload.backend.strip().lower()
|
|
139
|
+
if selected_backend not in BACKEND_BY_UI_VALUE:
|
|
140
|
+
raise HTTPException(
|
|
141
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
142
|
+
detail={"message": "Choose Auto, Codex, Claude, or Ollama."},
|
|
143
|
+
)
|
|
144
|
+
return message, selected_backend
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def ask_switchboard(
|
|
148
|
+
payload: UiChatRequest,
|
|
149
|
+
request: Request,
|
|
150
|
+
) -> SwitchboardResponse:
|
|
151
|
+
message, selected_backend = validated_message_and_backend(payload)
|
|
152
|
+
response = core_service(request).ask(
|
|
153
|
+
message,
|
|
154
|
+
backend=BACKEND_BY_UI_VALUE[selected_backend],
|
|
155
|
+
project="ui",
|
|
156
|
+
metadata={"surface": "ui", "requested_backend": selected_backend},
|
|
157
|
+
session_id=payload.session_id,
|
|
158
|
+
)
|
|
159
|
+
return response
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def response_payload(response: SwitchboardResponse) -> UiChatResponse:
|
|
163
|
+
return UiChatResponse(
|
|
164
|
+
session_id=response.session_id or "",
|
|
165
|
+
answer=(response.content or "").strip(),
|
|
166
|
+
backend=response.backend,
|
|
167
|
+
display_model=response_display_model_name(response),
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def stream_event(event_type: str, **payload: object) -> str:
|
|
172
|
+
return json.dumps({"type": event_type, **payload}) + "\n"
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def answer_chunks(answer: str, chunk_size: int = 24) -> Iterator[str]:
|
|
176
|
+
for start in range(0, len(answer), chunk_size):
|
|
177
|
+
yield answer[start : start + chunk_size]
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def stream_chat_response(response: SwitchboardResponse) -> Iterator[str]:
|
|
181
|
+
yield stream_event("start", session_id=response.session_id)
|
|
182
|
+
if not response.success:
|
|
183
|
+
yield stream_event(
|
|
184
|
+
"error",
|
|
185
|
+
message=clean_backend_error(response),
|
|
186
|
+
backend=response.backend,
|
|
187
|
+
display_model=response_display_model_name(response),
|
|
188
|
+
session_id=response.session_id,
|
|
189
|
+
)
|
|
190
|
+
return
|
|
191
|
+
|
|
192
|
+
payload = response_payload(response)
|
|
193
|
+
routing_info = {
|
|
194
|
+
"request_id": response.request_id,
|
|
195
|
+
"routing_reason": response.routing_reason,
|
|
196
|
+
"latency_ms": response.latency_ms,
|
|
197
|
+
"cost_type": response.cost_type.value,
|
|
198
|
+
"selected_model": response.selected_model,
|
|
199
|
+
}
|
|
200
|
+
yield stream_event(
|
|
201
|
+
"metadata",
|
|
202
|
+
session_id=payload.session_id,
|
|
203
|
+
backend=payload.backend,
|
|
204
|
+
display_model=payload.display_model,
|
|
205
|
+
**routing_info,
|
|
206
|
+
)
|
|
207
|
+
answer = payload.answer or "No answer returned."
|
|
208
|
+
for chunk in answer_chunks(answer):
|
|
209
|
+
yield stream_event("chunk", text=chunk)
|
|
210
|
+
yield stream_event(
|
|
211
|
+
"done",
|
|
212
|
+
session_id=payload.session_id,
|
|
213
|
+
backend=payload.backend,
|
|
214
|
+
display_model=payload.display_model,
|
|
215
|
+
**routing_info,
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
@router.post("/api/chat", response_model=UiChatResponse)
|
|
220
|
+
def chat(payload: UiChatRequest, request: Request) -> UiChatResponse:
|
|
221
|
+
response = ask_switchboard(payload, request)
|
|
222
|
+
if not response.success:
|
|
223
|
+
raise HTTPException(
|
|
224
|
+
status_code=status.HTTP_502_BAD_GATEWAY,
|
|
225
|
+
detail={
|
|
226
|
+
"message": clean_backend_error(response),
|
|
227
|
+
"backend": response.backend,
|
|
228
|
+
"display_model": response_display_model_name(response),
|
|
229
|
+
"session_id": response.session_id,
|
|
230
|
+
},
|
|
231
|
+
)
|
|
232
|
+
return response_payload(response)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
@router.post("/api/chat/stream")
|
|
236
|
+
def chat_stream(payload: UiChatRequest, request: Request) -> StreamingResponse:
|
|
237
|
+
response = ask_switchboard(payload, request)
|
|
238
|
+
return StreamingResponse(
|
|
239
|
+
stream_chat_response(response),
|
|
240
|
+
media_type="application/x-ndjson",
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
@router.get("/api/chat/history", response_model=UiHistoryResponse)
|
|
245
|
+
def chat_history(session_id: str, request: Request) -> UiHistoryResponse:
|
|
246
|
+
container: ServiceContainer = request.app.state.container
|
|
247
|
+
session = container.context_store.get_session(session_id)
|
|
248
|
+
if session is None:
|
|
249
|
+
return UiHistoryResponse(session_id=session_id, messages=[])
|
|
250
|
+
records = container.context_store.list_messages(session_id)
|
|
251
|
+
messages = [
|
|
252
|
+
UiHistoryMessage(
|
|
253
|
+
message_id=record.message_id,
|
|
254
|
+
role=record.role,
|
|
255
|
+
content=record.content,
|
|
256
|
+
display_model=record.display_model,
|
|
257
|
+
backend=record.backend,
|
|
258
|
+
request_id=str(record.metadata.get("request_id") or "") or None,
|
|
259
|
+
created_at=record.created_at.isoformat(),
|
|
260
|
+
)
|
|
261
|
+
for record in records
|
|
262
|
+
if record.role in {"user", "assistant"}
|
|
263
|
+
]
|
|
264
|
+
return UiHistoryResponse(session_id=session_id, messages=messages)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
VALID_CORRECTED_BACKENDS = ("ollama", "codex", "claude-code")
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
@router.post("/api/chat/feedback", response_model=FeedbackRead)
|
|
271
|
+
def chat_feedback(payload: UiFeedbackRequest, request: Request) -> FeedbackRead:
|
|
272
|
+
container: ServiceContainer = request.app.state.container
|
|
273
|
+
rating = payload.rating.strip().lower()
|
|
274
|
+
if rating not in {"good", "too-weak", "wrong-route", "bad"}:
|
|
275
|
+
raise HTTPException(
|
|
276
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
277
|
+
detail={"message": "Rating must be good, too-weak, wrong-route, or bad."},
|
|
278
|
+
)
|
|
279
|
+
detail = (payload.detail or "").strip().lower() or None
|
|
280
|
+
corrected = (payload.corrected_backend or "").strip().lower() or None
|
|
281
|
+
if detail == "wrong_model" and corrected not in VALID_CORRECTED_BACKENDS:
|
|
282
|
+
# Reject before anything is stored: a wrong-model verdict without a
|
|
283
|
+
# valid correction cannot train the router but would still count
|
|
284
|
+
# toward the retrain threshold.
|
|
285
|
+
raise HTTPException(
|
|
286
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
287
|
+
detail={
|
|
288
|
+
"message": (
|
|
289
|
+
"corrected_backend must be one of: "
|
|
290
|
+
+ ", ".join(VALID_CORRECTED_BACKENDS)
|
|
291
|
+
+ "."
|
|
292
|
+
)
|
|
293
|
+
},
|
|
294
|
+
)
|
|
295
|
+
result = PersonalSwitchboardService(container).add_feedback(
|
|
296
|
+
FeedbackCreate(
|
|
297
|
+
request_id=payload.request_id.strip(),
|
|
298
|
+
rating=rating,
|
|
299
|
+
note=payload.note,
|
|
300
|
+
)
|
|
301
|
+
)
|
|
302
|
+
_store_feedback_example(
|
|
303
|
+
container,
|
|
304
|
+
request_id=payload.request_id.strip(),
|
|
305
|
+
rating=rating,
|
|
306
|
+
detail=detail,
|
|
307
|
+
corrected_backend=corrected,
|
|
308
|
+
)
|
|
309
|
+
return result
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def _store_feedback_example(
|
|
313
|
+
container: ServiceContainer,
|
|
314
|
+
*,
|
|
315
|
+
request_id: str,
|
|
316
|
+
rating: str,
|
|
317
|
+
detail: str | None,
|
|
318
|
+
corrected_backend: str | None,
|
|
319
|
+
) -> None:
|
|
320
|
+
"""Closed feedback loop: snapshot (prompt, context, response) for
|
|
321
|
+
thumbs-downs and trigger gated retraining at the configured threshold.
|
|
322
|
+
|
|
323
|
+
Data integrity: feedback for a request_id with no recorded metric stores
|
|
324
|
+
nothing (there is nothing to learn from), and repeat feedback for the
|
|
325
|
+
same request_id replaces the earlier example (latest verdict wins), so
|
|
326
|
+
the retrain threshold counts distinct requests only.
|
|
327
|
+
|
|
328
|
+
Privacy: requests flagged sensitive at routing time (private-mode reroute
|
|
329
|
+
or learned sensitivity escalation) never get a context snapshot, so their
|
|
330
|
+
examples store context_text="". The prompt itself is still stored on an
|
|
331
|
+
explicit thumbs-down because a "wrong model" correction can only train
|
|
332
|
+
the router from the (prompt, corrected label) pair — submitting that
|
|
333
|
+
correction is the user's deliberate choice.
|
|
334
|
+
"""
|
|
335
|
+
preferences = container.personal_config.preferences
|
|
336
|
+
if not preferences.store_feedback_examples or rating == "good":
|
|
337
|
+
return
|
|
338
|
+
try:
|
|
339
|
+
from switchboard.app.models.telemetry import FeedbackExampleRecord
|
|
340
|
+
from switchboard.training.feedback_loop import (
|
|
341
|
+
FeedbackExampleStore,
|
|
342
|
+
maybe_trigger_retraining,
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
engine = container.memory_repository.engine
|
|
346
|
+
metric = container.backend_metrics_repository.get(request_id)
|
|
347
|
+
if metric is None:
|
|
348
|
+
# Unknown request: storing an empty example would only pad the
|
|
349
|
+
# retrain threshold with noise.
|
|
350
|
+
return
|
|
351
|
+
backend = metric.backend
|
|
352
|
+
route_type = str(metric.metadata.get("route_type") or "") or None
|
|
353
|
+
sensitive = bool(
|
|
354
|
+
metric.metadata.get("private_mode_rerouted")
|
|
355
|
+
or metric.metadata.get("sensitivity_escalated")
|
|
356
|
+
)
|
|
357
|
+
prompt_text = ""
|
|
358
|
+
response_text = ""
|
|
359
|
+
session_id = str(metric.metadata.get("session_id") or "")
|
|
360
|
+
if session_id:
|
|
361
|
+
for message in container.context_store.list_messages(session_id):
|
|
362
|
+
if str(message.metadata.get("request_id") or "") == request_id:
|
|
363
|
+
if message.role == "user":
|
|
364
|
+
prompt_text = message.content
|
|
365
|
+
elif message.role == "assistant":
|
|
366
|
+
response_text = message.content
|
|
367
|
+
store = FeedbackExampleStore(engine)
|
|
368
|
+
store.add_example(
|
|
369
|
+
FeedbackExampleRecord(
|
|
370
|
+
request_id=request_id,
|
|
371
|
+
rating=rating,
|
|
372
|
+
detail=detail,
|
|
373
|
+
corrected_backend=corrected_backend if detail == "wrong_model" else None,
|
|
374
|
+
prompt=prompt_text,
|
|
375
|
+
context_text="" if sensitive else store.get_recent_context(request_id),
|
|
376
|
+
response_text=response_text,
|
|
377
|
+
route_type=route_type,
|
|
378
|
+
backend=backend,
|
|
379
|
+
)
|
|
380
|
+
)
|
|
381
|
+
maybe_trigger_retraining(
|
|
382
|
+
engine=engine,
|
|
383
|
+
threshold=preferences.feedback_retrain_threshold,
|
|
384
|
+
weights_path=preferences.router_weights_path,
|
|
385
|
+
)
|
|
386
|
+
except Exception: # feedback storage must never fail the click
|
|
387
|
+
pass
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from switchboard.app.backends.base import AgentAdapter
|
|
2
|
+
from switchboard.app.backends.cli_agents import ClaudeCodeCliAdapter, CodexCliAdapter
|
|
3
|
+
from switchboard.app.backends.ollama_backend import OllamaAdapter
|
|
4
|
+
from switchboard.app.backends.registry import BackendRegistry
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"AgentAdapter",
|
|
8
|
+
"BackendRegistry",
|
|
9
|
+
"ClaudeCodeCliAdapter",
|
|
10
|
+
"CodexCliAdapter",
|
|
11
|
+
"OllamaAdapter",
|
|
12
|
+
]
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
|
|
5
|
+
from switchboard.app.models.backends import (
|
|
6
|
+
BackendCostType,
|
|
7
|
+
BackendInfo,
|
|
8
|
+
SwitchboardRequest,
|
|
9
|
+
SwitchboardResponse,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class AgentAdapter(ABC):
|
|
14
|
+
name: str
|
|
15
|
+
cost_type: BackendCostType
|
|
16
|
+
|
|
17
|
+
@abstractmethod
|
|
18
|
+
def is_available(self) -> bool:
|
|
19
|
+
raise NotImplementedError
|
|
20
|
+
|
|
21
|
+
@abstractmethod
|
|
22
|
+
def availability(self) -> BackendInfo:
|
|
23
|
+
raise NotImplementedError
|
|
24
|
+
|
|
25
|
+
@abstractmethod
|
|
26
|
+
def ask(self, request: SwitchboardRequest) -> SwitchboardResponse:
|
|
27
|
+
raise NotImplementedError
|