truthound-dashboard 1.4.4__py3-none-any.whl → 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound_dashboard/api/alerts.py +75 -86
- truthound_dashboard/api/anomaly.py +7 -13
- truthound_dashboard/api/cross_alerts.py +38 -52
- truthound_dashboard/api/drift.py +49 -59
- truthound_dashboard/api/drift_monitor.py +234 -79
- truthound_dashboard/api/enterprise_sampling.py +498 -0
- truthound_dashboard/api/history.py +57 -5
- truthound_dashboard/api/lineage.py +3 -48
- truthound_dashboard/api/maintenance.py +104 -49
- truthound_dashboard/api/mask.py +1 -2
- truthound_dashboard/api/middleware.py +2 -1
- truthound_dashboard/api/model_monitoring.py +435 -311
- truthound_dashboard/api/notifications.py +227 -191
- truthound_dashboard/api/notifications_advanced.py +21 -20
- truthound_dashboard/api/observability.py +586 -0
- truthound_dashboard/api/plugins.py +2 -433
- truthound_dashboard/api/profile.py +199 -37
- truthound_dashboard/api/quality_reporter.py +701 -0
- truthound_dashboard/api/reports.py +7 -16
- truthound_dashboard/api/router.py +66 -0
- truthound_dashboard/api/rule_suggestions.py +5 -5
- truthound_dashboard/api/scan.py +17 -19
- truthound_dashboard/api/schedules.py +85 -50
- truthound_dashboard/api/schema_evolution.py +6 -6
- truthound_dashboard/api/schema_watcher.py +667 -0
- truthound_dashboard/api/sources.py +98 -27
- truthound_dashboard/api/tiering.py +1323 -0
- truthound_dashboard/api/triggers.py +14 -11
- truthound_dashboard/api/validations.py +12 -11
- truthound_dashboard/api/versioning.py +1 -6
- truthound_dashboard/core/__init__.py +129 -3
- truthound_dashboard/core/actions/__init__.py +62 -0
- truthound_dashboard/core/actions/custom.py +426 -0
- truthound_dashboard/core/actions/notifications.py +910 -0
- truthound_dashboard/core/actions/storage.py +472 -0
- truthound_dashboard/core/actions/webhook.py +281 -0
- truthound_dashboard/core/anomaly.py +262 -67
- truthound_dashboard/core/anomaly_explainer.py +4 -3
- truthound_dashboard/core/backends/__init__.py +67 -0
- truthound_dashboard/core/backends/base.py +299 -0
- truthound_dashboard/core/backends/errors.py +191 -0
- truthound_dashboard/core/backends/factory.py +423 -0
- truthound_dashboard/core/backends/mock_backend.py +451 -0
- truthound_dashboard/core/backends/truthound_backend.py +718 -0
- truthound_dashboard/core/checkpoint/__init__.py +87 -0
- truthound_dashboard/core/checkpoint/adapters.py +814 -0
- truthound_dashboard/core/checkpoint/checkpoint.py +491 -0
- truthound_dashboard/core/checkpoint/runner.py +270 -0
- truthound_dashboard/core/connections.py +645 -23
- truthound_dashboard/core/converters/__init__.py +14 -0
- truthound_dashboard/core/converters/truthound.py +620 -0
- truthound_dashboard/core/cross_alerts.py +540 -320
- truthound_dashboard/core/datasource_factory.py +1672 -0
- truthound_dashboard/core/drift_monitor.py +216 -20
- truthound_dashboard/core/enterprise_sampling.py +1291 -0
- truthound_dashboard/core/interfaces/__init__.py +225 -0
- truthound_dashboard/core/interfaces/actions.py +652 -0
- truthound_dashboard/core/interfaces/base.py +247 -0
- truthound_dashboard/core/interfaces/checkpoint.py +676 -0
- truthound_dashboard/core/interfaces/protocols.py +664 -0
- truthound_dashboard/core/interfaces/reporters.py +650 -0
- truthound_dashboard/core/interfaces/routing.py +646 -0
- truthound_dashboard/core/interfaces/triggers.py +619 -0
- truthound_dashboard/core/lineage.py +407 -71
- truthound_dashboard/core/model_monitoring.py +431 -3
- truthound_dashboard/core/notifications/base.py +4 -0
- truthound_dashboard/core/notifications/channels.py +501 -1203
- truthound_dashboard/core/notifications/deduplication/__init__.py +81 -115
- truthound_dashboard/core/notifications/deduplication/service.py +131 -348
- truthound_dashboard/core/notifications/dispatcher.py +202 -11
- truthound_dashboard/core/notifications/escalation/__init__.py +119 -106
- truthound_dashboard/core/notifications/escalation/engine.py +168 -358
- truthound_dashboard/core/notifications/routing/__init__.py +88 -128
- truthound_dashboard/core/notifications/routing/engine.py +90 -317
- truthound_dashboard/core/notifications/stats_aggregator.py +246 -1
- truthound_dashboard/core/notifications/throttling/__init__.py +67 -50
- truthound_dashboard/core/notifications/throttling/builder.py +117 -255
- truthound_dashboard/core/notifications/truthound_adapter.py +842 -0
- truthound_dashboard/core/phase5/collaboration.py +1 -1
- truthound_dashboard/core/plugins/lifecycle/__init__.py +0 -13
- truthound_dashboard/core/quality_reporter.py +1359 -0
- truthound_dashboard/core/report_history.py +0 -6
- truthound_dashboard/core/reporters/__init__.py +175 -14
- truthound_dashboard/core/reporters/adapters.py +943 -0
- truthound_dashboard/core/reporters/base.py +0 -3
- truthound_dashboard/core/reporters/builtin/__init__.py +18 -0
- truthound_dashboard/core/reporters/builtin/csv_reporter.py +111 -0
- truthound_dashboard/core/reporters/builtin/html_reporter.py +270 -0
- truthound_dashboard/core/reporters/builtin/json_reporter.py +127 -0
- truthound_dashboard/core/reporters/compat.py +266 -0
- truthound_dashboard/core/reporters/csv_reporter.py +2 -35
- truthound_dashboard/core/reporters/factory.py +526 -0
- truthound_dashboard/core/reporters/interfaces.py +745 -0
- truthound_dashboard/core/reporters/registry.py +1 -10
- truthound_dashboard/core/scheduler.py +165 -0
- truthound_dashboard/core/schema_evolution.py +3 -3
- truthound_dashboard/core/schema_watcher.py +1528 -0
- truthound_dashboard/core/services.py +595 -76
- truthound_dashboard/core/store_manager.py +810 -0
- truthound_dashboard/core/streaming_anomaly.py +169 -4
- truthound_dashboard/core/tiering.py +1309 -0
- truthound_dashboard/core/triggers/evaluators.py +178 -8
- truthound_dashboard/core/truthound_adapter.py +2620 -197
- truthound_dashboard/core/unified_alerts.py +23 -20
- truthound_dashboard/db/__init__.py +8 -0
- truthound_dashboard/db/database.py +8 -2
- truthound_dashboard/db/models.py +944 -25
- truthound_dashboard/db/repository.py +2 -0
- truthound_dashboard/main.py +15 -0
- truthound_dashboard/schemas/__init__.py +177 -16
- truthound_dashboard/schemas/base.py +44 -23
- truthound_dashboard/schemas/collaboration.py +19 -6
- truthound_dashboard/schemas/cross_alerts.py +19 -3
- truthound_dashboard/schemas/drift.py +61 -55
- truthound_dashboard/schemas/drift_monitor.py +67 -23
- truthound_dashboard/schemas/enterprise_sampling.py +653 -0
- truthound_dashboard/schemas/lineage.py +0 -33
- truthound_dashboard/schemas/mask.py +10 -8
- truthound_dashboard/schemas/model_monitoring.py +89 -10
- truthound_dashboard/schemas/notifications_advanced.py +13 -0
- truthound_dashboard/schemas/observability.py +453 -0
- truthound_dashboard/schemas/plugins.py +0 -280
- truthound_dashboard/schemas/profile.py +154 -247
- truthound_dashboard/schemas/quality_reporter.py +403 -0
- truthound_dashboard/schemas/reports.py +2 -2
- truthound_dashboard/schemas/rule_suggestion.py +8 -1
- truthound_dashboard/schemas/scan.py +4 -24
- truthound_dashboard/schemas/schedule.py +11 -3
- truthound_dashboard/schemas/schema_watcher.py +727 -0
- truthound_dashboard/schemas/source.py +17 -2
- truthound_dashboard/schemas/tiering.py +822 -0
- truthound_dashboard/schemas/triggers.py +16 -0
- truthound_dashboard/schemas/unified_alerts.py +7 -0
- truthound_dashboard/schemas/validation.py +0 -13
- truthound_dashboard/schemas/validators/base.py +41 -21
- truthound_dashboard/schemas/validators/business_rule_validators.py +244 -0
- truthound_dashboard/schemas/validators/localization_validators.py +273 -0
- truthound_dashboard/schemas/validators/ml_feature_validators.py +308 -0
- truthound_dashboard/schemas/validators/profiling_validators.py +275 -0
- truthound_dashboard/schemas/validators/referential_validators.py +312 -0
- truthound_dashboard/schemas/validators/registry.py +93 -8
- truthound_dashboard/schemas/validators/timeseries_validators.py +389 -0
- truthound_dashboard/schemas/versioning.py +1 -6
- truthound_dashboard/static/index.html +2 -2
- truthound_dashboard-1.5.1.dist-info/METADATA +312 -0
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/RECORD +149 -148
- truthound_dashboard/core/plugins/hooks/__init__.py +0 -63
- truthound_dashboard/core/plugins/hooks/decorators.py +0 -367
- truthound_dashboard/core/plugins/hooks/manager.py +0 -403
- truthound_dashboard/core/plugins/hooks/protocols.py +0 -265
- truthound_dashboard/core/plugins/lifecycle/hot_reload.py +0 -584
- truthound_dashboard/core/reporters/junit_reporter.py +0 -233
- truthound_dashboard/core/reporters/markdown_reporter.py +0 -207
- truthound_dashboard/core/reporters/pdf_reporter.py +0 -209
- truthound_dashboard/static/assets/_baseUniq-BcrSP13d.js +0 -1
- truthound_dashboard/static/assets/arc-DlYjKwIL.js +0 -1
- truthound_dashboard/static/assets/architectureDiagram-VXUJARFQ-Bb2drbQM.js +0 -36
- truthound_dashboard/static/assets/blockDiagram-VD42YOAC-BlsPG1CH.js +0 -122
- truthound_dashboard/static/assets/c4Diagram-YG6GDRKO-B9JdUoaC.js +0 -10
- truthound_dashboard/static/assets/channel-Q6mHF1Hd.js +0 -1
- truthound_dashboard/static/assets/chunk-4BX2VUAB-DmyoPVuJ.js +0 -1
- truthound_dashboard/static/assets/chunk-55IACEB6-Bcz6Siv8.js +0 -1
- truthound_dashboard/static/assets/chunk-B4BG7PRW-Br3G5Rum.js +0 -165
- truthound_dashboard/static/assets/chunk-DI55MBZ5-DuM9c23u.js +0 -220
- truthound_dashboard/static/assets/chunk-FMBD7UC4-DNU-5mvT.js +0 -15
- truthound_dashboard/static/assets/chunk-QN33PNHL-Im2yNcmS.js +0 -1
- truthound_dashboard/static/assets/chunk-QZHKN3VN-kZr8XFm1.js +0 -1
- truthound_dashboard/static/assets/chunk-TZMSLE5B-Q__360q_.js +0 -1
- truthound_dashboard/static/assets/classDiagram-2ON5EDUG-vtixxUyK.js +0 -1
- truthound_dashboard/static/assets/classDiagram-v2-WZHVMYZB-vtixxUyK.js +0 -1
- truthound_dashboard/static/assets/clone-BOt2LwD0.js +0 -1
- truthound_dashboard/static/assets/cose-bilkent-S5V4N54A-CBDw6iac.js +0 -1
- truthound_dashboard/static/assets/dagre-6UL2VRFP-XdKqmmY9.js +0 -4
- truthound_dashboard/static/assets/diagram-PSM6KHXK-DAZ8nx9V.js +0 -24
- truthound_dashboard/static/assets/diagram-QEK2KX5R-BRvDTbGD.js +0 -43
- truthound_dashboard/static/assets/diagram-S2PKOQOG-bQcczUkl.js +0 -24
- truthound_dashboard/static/assets/erDiagram-Q2GNP2WA-DPje7VMN.js +0 -60
- truthound_dashboard/static/assets/flowDiagram-NV44I4VS-B7BVtFVS.js +0 -162
- truthound_dashboard/static/assets/ganttDiagram-JELNMOA3-D6WKSS7U.js +0 -267
- truthound_dashboard/static/assets/gitGraphDiagram-NY62KEGX-D3vtVd3y.js +0 -65
- truthound_dashboard/static/assets/graph-BKgNKZVp.js +0 -1
- truthound_dashboard/static/assets/index-C6JSrkHo.css +0 -1
- truthound_dashboard/static/assets/index-DkU82VsU.js +0 -1800
- truthound_dashboard/static/assets/infoDiagram-WHAUD3N6-DnNCT429.js +0 -2
- truthound_dashboard/static/assets/journeyDiagram-XKPGCS4Q-DGiMozqS.js +0 -139
- truthound_dashboard/static/assets/kanban-definition-3W4ZIXB7-BV2gUgli.js +0 -89
- truthound_dashboard/static/assets/katex-Cu_Erd72.js +0 -261
- truthound_dashboard/static/assets/layout-DI2MfQ5G.js +0 -1
- truthound_dashboard/static/assets/min-DYdgXVcT.js +0 -1
- truthound_dashboard/static/assets/mindmap-definition-VGOIOE7T-C7x4ruxz.js +0 -68
- truthound_dashboard/static/assets/pieDiagram-ADFJNKIX-CAJaAB9f.js +0 -30
- truthound_dashboard/static/assets/quadrantDiagram-AYHSOK5B-DeqwDI46.js +0 -7
- truthound_dashboard/static/assets/requirementDiagram-UZGBJVZJ-e3XDpZIM.js +0 -64
- truthound_dashboard/static/assets/sankeyDiagram-TZEHDZUN-CNnAv5Ux.js +0 -10
- truthound_dashboard/static/assets/sequenceDiagram-WL72ISMW-Dsne-Of3.js +0 -145
- truthound_dashboard/static/assets/stateDiagram-FKZM4ZOC-Ee0sQXyb.js +0 -1
- truthound_dashboard/static/assets/stateDiagram-v2-4FDKWEC3-B26KqW_W.js +0 -1
- truthound_dashboard/static/assets/timeline-definition-IT6M3QCI-DZYi2yl3.js +0 -61
- truthound_dashboard/static/assets/treemap-KMMF4GRG-CY3f8In2.js +0 -128
- truthound_dashboard/static/assets/unmerged_dictionaries-Dd7xcPWG.js +0 -1
- truthound_dashboard/static/assets/xychartDiagram-PRI3JC2R-CS7fydZZ.js +0 -7
- truthound_dashboard-1.4.4.dist-info/METADATA +0 -507
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/WHEEL +0 -0
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/entry_points.txt +0 -0
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -12,11 +12,12 @@ All data is persisted to the database.
|
|
|
12
12
|
from __future__ import annotations
|
|
13
13
|
|
|
14
14
|
from fastapi import APIRouter, Depends, HTTPException, Query
|
|
15
|
+
from pydantic import BaseModel, Field
|
|
15
16
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
16
17
|
|
|
17
18
|
from ..core.model_monitoring import ModelMonitoringService
|
|
18
|
-
from ..db import
|
|
19
|
-
from ..schemas.base import
|
|
19
|
+
from ..db import get_db_session
|
|
20
|
+
from ..schemas.base import MessageResponse
|
|
20
21
|
from ..schemas.model_monitoring import (
|
|
21
22
|
AcknowledgeAlertRequest,
|
|
22
23
|
AlertHandlerListResponse,
|
|
@@ -46,7 +47,7 @@ from ..schemas.model_monitoring import (
|
|
|
46
47
|
router = APIRouter(prefix="/model-monitoring", tags=["model-monitoring"])
|
|
47
48
|
|
|
48
49
|
|
|
49
|
-
def get_service(session: AsyncSession = Depends(
|
|
50
|
+
def get_service(session: AsyncSession = Depends(get_db_session)) -> ModelMonitoringService:
|
|
50
51
|
"""Get model monitoring service instance."""
|
|
51
52
|
return ModelMonitoringService(session)
|
|
52
53
|
|
|
@@ -56,13 +57,13 @@ def get_service(session: AsyncSession = Depends(get_session)) -> ModelMonitoring
|
|
|
56
57
|
# =============================================================================
|
|
57
58
|
|
|
58
59
|
|
|
59
|
-
@router.get("/models", response_model=
|
|
60
|
+
@router.get("/models", response_model=RegisteredModelListResponse)
|
|
60
61
|
async def list_models(
|
|
61
62
|
status: ModelStatus | None = None,
|
|
62
63
|
offset: int = Query(0, ge=0),
|
|
63
64
|
limit: int = Query(50, ge=1, le=100),
|
|
64
65
|
service: ModelMonitoringService = Depends(get_service),
|
|
65
|
-
):
|
|
66
|
+
) -> RegisteredModelListResponse:
|
|
66
67
|
"""List all registered models."""
|
|
67
68
|
status_filter = status.value if status else None
|
|
68
69
|
models, total = await service.list_models(
|
|
@@ -88,21 +89,19 @@ async def list_models(
|
|
|
88
89
|
for m in models
|
|
89
90
|
]
|
|
90
91
|
|
|
91
|
-
return
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
limit=limit,
|
|
97
|
-
)
|
|
92
|
+
return RegisteredModelListResponse(
|
|
93
|
+
items=items,
|
|
94
|
+
total=total,
|
|
95
|
+
offset=offset,
|
|
96
|
+
limit=limit,
|
|
98
97
|
)
|
|
99
98
|
|
|
100
99
|
|
|
101
|
-
@router.post("/models", response_model=
|
|
100
|
+
@router.post("/models", response_model=RegisteredModelResponse, status_code=201)
|
|
102
101
|
async def register_model(
|
|
103
102
|
request: RegisterModelRequest,
|
|
104
103
|
service: ModelMonitoringService = Depends(get_service),
|
|
105
|
-
):
|
|
104
|
+
) -> RegisteredModelResponse:
|
|
106
105
|
"""Register a new model for monitoring."""
|
|
107
106
|
model = await service.register_model(
|
|
108
107
|
name=request.name,
|
|
@@ -112,60 +111,56 @@ async def register_model(
|
|
|
112
111
|
metadata=request.metadata,
|
|
113
112
|
)
|
|
114
113
|
|
|
115
|
-
return
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
updated_at=model.updated_at,
|
|
130
|
-
)
|
|
114
|
+
return RegisteredModelResponse(
|
|
115
|
+
id=model.id,
|
|
116
|
+
name=model.name,
|
|
117
|
+
version=model.version,
|
|
118
|
+
description=model.description or "",
|
|
119
|
+
status=ModelStatus(model.status),
|
|
120
|
+
config=model.config,
|
|
121
|
+
metadata=model.metadata_json or {},
|
|
122
|
+
prediction_count=model.prediction_count,
|
|
123
|
+
last_prediction_at=model.last_prediction_at,
|
|
124
|
+
current_drift_score=model.current_drift_score,
|
|
125
|
+
health_score=model.health_score,
|
|
126
|
+
created_at=model.created_at,
|
|
127
|
+
updated_at=model.updated_at,
|
|
131
128
|
)
|
|
132
129
|
|
|
133
130
|
|
|
134
|
-
@router.get("/models/{model_id}", response_model=
|
|
131
|
+
@router.get("/models/{model_id}", response_model=RegisteredModelResponse)
|
|
135
132
|
async def get_model(
|
|
136
133
|
model_id: str,
|
|
137
134
|
service: ModelMonitoringService = Depends(get_service),
|
|
138
|
-
):
|
|
135
|
+
) -> RegisteredModelResponse:
|
|
139
136
|
"""Get a registered model by ID."""
|
|
140
137
|
model = await service.get_model(model_id)
|
|
141
138
|
if not model:
|
|
142
139
|
raise HTTPException(status_code=404, detail="Model not found")
|
|
143
140
|
|
|
144
|
-
return
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
updated_at=model.updated_at,
|
|
159
|
-
)
|
|
141
|
+
return RegisteredModelResponse(
|
|
142
|
+
id=model.id,
|
|
143
|
+
name=model.name,
|
|
144
|
+
version=model.version,
|
|
145
|
+
description=model.description or "",
|
|
146
|
+
status=ModelStatus(model.status),
|
|
147
|
+
config=model.config,
|
|
148
|
+
metadata=model.metadata_json or {},
|
|
149
|
+
prediction_count=model.prediction_count,
|
|
150
|
+
last_prediction_at=model.last_prediction_at,
|
|
151
|
+
current_drift_score=model.current_drift_score,
|
|
152
|
+
health_score=model.health_score,
|
|
153
|
+
created_at=model.created_at,
|
|
154
|
+
updated_at=model.updated_at,
|
|
160
155
|
)
|
|
161
156
|
|
|
162
157
|
|
|
163
|
-
@router.put("/models/{model_id}", response_model=
|
|
158
|
+
@router.put("/models/{model_id}", response_model=RegisteredModelResponse)
|
|
164
159
|
async def update_model(
|
|
165
160
|
model_id: str,
|
|
166
161
|
request: UpdateModelRequest,
|
|
167
162
|
service: ModelMonitoringService = Depends(get_service),
|
|
168
|
-
):
|
|
163
|
+
) -> RegisteredModelResponse:
|
|
169
164
|
"""Update a registered model."""
|
|
170
165
|
updates = {}
|
|
171
166
|
if request.name is not None:
|
|
@@ -185,62 +180,60 @@ async def update_model(
|
|
|
185
180
|
if not model:
|
|
186
181
|
raise HTTPException(status_code=404, detail="Model not found")
|
|
187
182
|
|
|
188
|
-
return
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
updated_at=model.updated_at,
|
|
203
|
-
)
|
|
183
|
+
return RegisteredModelResponse(
|
|
184
|
+
id=model.id,
|
|
185
|
+
name=model.name,
|
|
186
|
+
version=model.version,
|
|
187
|
+
description=model.description or "",
|
|
188
|
+
status=ModelStatus(model.status),
|
|
189
|
+
config=model.config,
|
|
190
|
+
metadata=model.metadata_json or {},
|
|
191
|
+
prediction_count=model.prediction_count,
|
|
192
|
+
last_prediction_at=model.last_prediction_at,
|
|
193
|
+
current_drift_score=model.current_drift_score,
|
|
194
|
+
health_score=model.health_score,
|
|
195
|
+
created_at=model.created_at,
|
|
196
|
+
updated_at=model.updated_at,
|
|
204
197
|
)
|
|
205
198
|
|
|
206
199
|
|
|
207
|
-
@router.delete("/models/{model_id}")
|
|
200
|
+
@router.delete("/models/{model_id}", response_model=MessageResponse)
|
|
208
201
|
async def delete_model(
|
|
209
202
|
model_id: str,
|
|
210
203
|
service: ModelMonitoringService = Depends(get_service),
|
|
211
|
-
):
|
|
204
|
+
) -> MessageResponse:
|
|
212
205
|
"""Delete a registered model."""
|
|
213
206
|
deleted = await service.delete_model(model_id)
|
|
214
207
|
if not deleted:
|
|
215
208
|
raise HTTPException(status_code=404, detail="Model not found")
|
|
216
209
|
|
|
217
|
-
return
|
|
210
|
+
return MessageResponse(message="Model deleted")
|
|
218
211
|
|
|
219
212
|
|
|
220
|
-
@router.post("/models/{model_id}/pause")
|
|
213
|
+
@router.post("/models/{model_id}/pause", response_model=MessageResponse)
|
|
221
214
|
async def pause_model(
|
|
222
215
|
model_id: str,
|
|
223
216
|
service: ModelMonitoringService = Depends(get_service),
|
|
224
|
-
):
|
|
217
|
+
) -> MessageResponse:
|
|
225
218
|
"""Pause monitoring for a model."""
|
|
226
219
|
model = await service.pause_model(model_id)
|
|
227
220
|
if not model:
|
|
228
221
|
raise HTTPException(status_code=404, detail="Model not found")
|
|
229
222
|
|
|
230
|
-
return
|
|
223
|
+
return MessageResponse(message="Model monitoring paused")
|
|
231
224
|
|
|
232
225
|
|
|
233
|
-
@router.post("/models/{model_id}/resume")
|
|
226
|
+
@router.post("/models/{model_id}/resume", response_model=MessageResponse)
|
|
234
227
|
async def resume_model(
|
|
235
228
|
model_id: str,
|
|
236
229
|
service: ModelMonitoringService = Depends(get_service),
|
|
237
|
-
):
|
|
230
|
+
) -> MessageResponse:
|
|
238
231
|
"""Resume monitoring for a model."""
|
|
239
232
|
model = await service.resume_model(model_id)
|
|
240
233
|
if not model:
|
|
241
234
|
raise HTTPException(status_code=404, detail="Model not found")
|
|
242
235
|
|
|
243
|
-
return
|
|
236
|
+
return MessageResponse(message="Model monitoring resumed")
|
|
244
237
|
|
|
245
238
|
|
|
246
239
|
# =============================================================================
|
|
@@ -250,13 +243,14 @@ async def resume_model(
|
|
|
250
243
|
|
|
251
244
|
@router.post(
|
|
252
245
|
"/models/{model_id}/predictions",
|
|
253
|
-
response_model=
|
|
246
|
+
response_model=RecordPredictionResponse,
|
|
247
|
+
status_code=201,
|
|
254
248
|
)
|
|
255
249
|
async def record_prediction(
|
|
256
250
|
model_id: str,
|
|
257
251
|
request: RecordPredictionRequest,
|
|
258
252
|
service: ModelMonitoringService = Depends(get_service),
|
|
259
|
-
):
|
|
253
|
+
) -> RecordPredictionResponse:
|
|
260
254
|
"""Record a model prediction."""
|
|
261
255
|
try:
|
|
262
256
|
prediction = await service.record_prediction(
|
|
@@ -268,35 +262,31 @@ async def record_prediction(
|
|
|
268
262
|
metadata=request.metadata,
|
|
269
263
|
)
|
|
270
264
|
|
|
271
|
-
return
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
recorded_at=prediction.recorded_at,
|
|
276
|
-
)
|
|
265
|
+
return RecordPredictionResponse(
|
|
266
|
+
id=prediction.id,
|
|
267
|
+
model_id=model_id,
|
|
268
|
+
recorded_at=prediction.recorded_at,
|
|
277
269
|
)
|
|
278
270
|
except ValueError as e:
|
|
279
271
|
raise HTTPException(status_code=404, detail=str(e))
|
|
280
272
|
|
|
281
273
|
|
|
282
|
-
@router.get("/models/{model_id}/metrics", response_model=
|
|
274
|
+
@router.get("/models/{model_id}/metrics", response_model=MetricsResponse)
|
|
283
275
|
async def get_model_metrics(
|
|
284
276
|
model_id: str,
|
|
285
277
|
hours: int = Query(24, ge=1, le=168),
|
|
286
278
|
service: ModelMonitoringService = Depends(get_service),
|
|
287
|
-
):
|
|
279
|
+
) -> MetricsResponse:
|
|
288
280
|
"""Get metrics for a model."""
|
|
289
281
|
try:
|
|
290
282
|
metrics_data = await service.get_model_metrics(model_id, hours)
|
|
291
283
|
|
|
292
|
-
return
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
data_points=metrics_data["data_points"],
|
|
299
|
-
)
|
|
284
|
+
return MetricsResponse(
|
|
285
|
+
model_id=metrics_data["model_id"],
|
|
286
|
+
model_name=metrics_data["model_name"],
|
|
287
|
+
time_range_hours=metrics_data["time_range_hours"],
|
|
288
|
+
metrics=[MetricSummary(**m) for m in metrics_data["metrics"]],
|
|
289
|
+
data_points=metrics_data["data_points"],
|
|
300
290
|
)
|
|
301
291
|
except ValueError as e:
|
|
302
292
|
raise HTTPException(status_code=404, detail=str(e))
|
|
@@ -307,14 +297,14 @@ async def get_model_metrics(
|
|
|
307
297
|
# =============================================================================
|
|
308
298
|
|
|
309
299
|
|
|
310
|
-
@router.get("/rules", response_model=
|
|
300
|
+
@router.get("/rules", response_model=AlertRuleListResponse)
|
|
311
301
|
async def list_alert_rules(
|
|
312
302
|
model_id: str | None = None,
|
|
313
303
|
active_only: bool = False,
|
|
314
304
|
offset: int = Query(0, ge=0),
|
|
315
305
|
limit: int = Query(50, ge=1, le=100),
|
|
316
306
|
service: ModelMonitoringService = Depends(get_service),
|
|
317
|
-
):
|
|
307
|
+
) -> AlertRuleListResponse:
|
|
318
308
|
"""List all alert rules."""
|
|
319
309
|
rules = await service.get_alert_rules(model_id=model_id, active_only=active_only)
|
|
320
310
|
|
|
@@ -339,21 +329,19 @@ async def list_alert_rules(
|
|
|
339
329
|
for r in paginated
|
|
340
330
|
]
|
|
341
331
|
|
|
342
|
-
return
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
limit=limit,
|
|
348
|
-
)
|
|
332
|
+
return AlertRuleListResponse(
|
|
333
|
+
items=items,
|
|
334
|
+
total=total,
|
|
335
|
+
offset=offset,
|
|
336
|
+
limit=limit,
|
|
349
337
|
)
|
|
350
338
|
|
|
351
339
|
|
|
352
|
-
@router.post("/rules", response_model=
|
|
340
|
+
@router.post("/rules", response_model=AlertRuleResponse, status_code=201)
|
|
353
341
|
async def create_alert_rule(
|
|
354
342
|
request: CreateAlertRuleRequest,
|
|
355
343
|
service: ModelMonitoringService = Depends(get_service),
|
|
356
|
-
):
|
|
344
|
+
) -> AlertRuleResponse:
|
|
357
345
|
"""Create a new alert rule."""
|
|
358
346
|
try:
|
|
359
347
|
rule = await service.create_alert_rule(
|
|
@@ -364,30 +352,28 @@ async def create_alert_rule(
|
|
|
364
352
|
severity=request.severity.value,
|
|
365
353
|
)
|
|
366
354
|
|
|
367
|
-
return
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
updated_at=rule.updated_at,
|
|
380
|
-
)
|
|
355
|
+
return AlertRuleResponse(
|
|
356
|
+
id=rule.id,
|
|
357
|
+
name=rule.name,
|
|
358
|
+
model_id=rule.model_id,
|
|
359
|
+
rule_type=rule.rule_type,
|
|
360
|
+
severity=AlertSeverity(rule.severity),
|
|
361
|
+
config=rule.config,
|
|
362
|
+
is_active=rule.is_active,
|
|
363
|
+
last_triggered_at=rule.last_triggered_at,
|
|
364
|
+
trigger_count=rule.trigger_count,
|
|
365
|
+
created_at=rule.created_at,
|
|
366
|
+
updated_at=rule.updated_at,
|
|
381
367
|
)
|
|
382
368
|
except ValueError as e:
|
|
383
369
|
raise HTTPException(status_code=404, detail=str(e))
|
|
384
370
|
|
|
385
371
|
|
|
386
|
-
@router.get("/rules/{rule_id}", response_model=
|
|
372
|
+
@router.get("/rules/{rule_id}", response_model=AlertRuleResponse)
|
|
387
373
|
async def get_alert_rule(
|
|
388
374
|
rule_id: str,
|
|
389
375
|
service: ModelMonitoringService = Depends(get_service),
|
|
390
|
-
):
|
|
376
|
+
) -> AlertRuleResponse:
|
|
391
377
|
"""Get an alert rule by ID."""
|
|
392
378
|
rules = await service.get_alert_rules()
|
|
393
379
|
rule = next((r for r in rules if r.id == rule_id), None)
|
|
@@ -395,29 +381,27 @@ async def get_alert_rule(
|
|
|
395
381
|
if not rule:
|
|
396
382
|
raise HTTPException(status_code=404, detail="Alert rule not found")
|
|
397
383
|
|
|
398
|
-
return
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
updated_at=rule.updated_at,
|
|
411
|
-
)
|
|
384
|
+
return AlertRuleResponse(
|
|
385
|
+
id=rule.id,
|
|
386
|
+
name=rule.name,
|
|
387
|
+
model_id=rule.model_id,
|
|
388
|
+
rule_type=rule.rule_type,
|
|
389
|
+
severity=AlertSeverity(rule.severity),
|
|
390
|
+
config=rule.config,
|
|
391
|
+
is_active=rule.is_active,
|
|
392
|
+
last_triggered_at=rule.last_triggered_at,
|
|
393
|
+
trigger_count=rule.trigger_count,
|
|
394
|
+
created_at=rule.created_at,
|
|
395
|
+
updated_at=rule.updated_at,
|
|
412
396
|
)
|
|
413
397
|
|
|
414
398
|
|
|
415
|
-
@router.put("/rules/{rule_id}", response_model=
|
|
399
|
+
@router.put("/rules/{rule_id}", response_model=AlertRuleResponse)
|
|
416
400
|
async def update_alert_rule(
|
|
417
401
|
rule_id: str,
|
|
418
402
|
request: UpdateAlertRuleRequest,
|
|
419
403
|
service: ModelMonitoringService = Depends(get_service),
|
|
420
|
-
):
|
|
404
|
+
) -> AlertRuleResponse:
|
|
421
405
|
"""Update an alert rule."""
|
|
422
406
|
updates = {}
|
|
423
407
|
if request.name is not None:
|
|
@@ -433,34 +417,32 @@ async def update_alert_rule(
|
|
|
433
417
|
if not rule:
|
|
434
418
|
raise HTTPException(status_code=404, detail="Alert rule not found")
|
|
435
419
|
|
|
436
|
-
return
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
updated_at=rule.updated_at,
|
|
449
|
-
)
|
|
420
|
+
return AlertRuleResponse(
|
|
421
|
+
id=rule.id,
|
|
422
|
+
name=rule.name,
|
|
423
|
+
model_id=rule.model_id,
|
|
424
|
+
rule_type=rule.rule_type,
|
|
425
|
+
severity=AlertSeverity(rule.severity),
|
|
426
|
+
config=rule.config,
|
|
427
|
+
is_active=rule.is_active,
|
|
428
|
+
last_triggered_at=rule.last_triggered_at,
|
|
429
|
+
trigger_count=rule.trigger_count,
|
|
430
|
+
created_at=rule.created_at,
|
|
431
|
+
updated_at=rule.updated_at,
|
|
450
432
|
)
|
|
451
433
|
|
|
452
434
|
|
|
453
|
-
@router.delete("/rules/{rule_id}")
|
|
435
|
+
@router.delete("/rules/{rule_id}", response_model=MessageResponse)
|
|
454
436
|
async def delete_alert_rule(
|
|
455
437
|
rule_id: str,
|
|
456
438
|
service: ModelMonitoringService = Depends(get_service),
|
|
457
|
-
):
|
|
439
|
+
) -> MessageResponse:
|
|
458
440
|
"""Delete an alert rule."""
|
|
459
441
|
deleted = await service.delete_alert_rule(rule_id)
|
|
460
442
|
if not deleted:
|
|
461
443
|
raise HTTPException(status_code=404, detail="Alert rule not found")
|
|
462
444
|
|
|
463
|
-
return
|
|
445
|
+
return MessageResponse(message="Alert rule deleted")
|
|
464
446
|
|
|
465
447
|
|
|
466
448
|
# =============================================================================
|
|
@@ -468,13 +450,13 @@ async def delete_alert_rule(
|
|
|
468
450
|
# =============================================================================
|
|
469
451
|
|
|
470
452
|
|
|
471
|
-
@router.get("/handlers", response_model=
|
|
453
|
+
@router.get("/handlers", response_model=AlertHandlerListResponse)
|
|
472
454
|
async def list_alert_handlers(
|
|
473
455
|
active_only: bool = False,
|
|
474
456
|
offset: int = Query(0, ge=0),
|
|
475
457
|
limit: int = Query(50, ge=1, le=100),
|
|
476
458
|
service: ModelMonitoringService = Depends(get_service),
|
|
477
|
-
):
|
|
459
|
+
) -> AlertHandlerListResponse:
|
|
478
460
|
"""List all alert handlers."""
|
|
479
461
|
handlers = await service.get_alert_handlers(active_only=active_only)
|
|
480
462
|
|
|
@@ -498,21 +480,19 @@ async def list_alert_handlers(
|
|
|
498
480
|
for h in paginated
|
|
499
481
|
]
|
|
500
482
|
|
|
501
|
-
return
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
limit=limit,
|
|
507
|
-
)
|
|
483
|
+
return AlertHandlerListResponse(
|
|
484
|
+
items=items,
|
|
485
|
+
total=total,
|
|
486
|
+
offset=offset,
|
|
487
|
+
limit=limit,
|
|
508
488
|
)
|
|
509
489
|
|
|
510
490
|
|
|
511
|
-
@router.post("/handlers", response_model=
|
|
491
|
+
@router.post("/handlers", response_model=AlertHandlerResponse, status_code=201)
|
|
512
492
|
async def create_alert_handler(
|
|
513
493
|
request: CreateAlertHandlerRequest,
|
|
514
494
|
service: ModelMonitoringService = Depends(get_service),
|
|
515
|
-
):
|
|
495
|
+
) -> AlertHandlerResponse:
|
|
516
496
|
"""Create a new alert handler."""
|
|
517
497
|
handler = await service.create_alert_handler(
|
|
518
498
|
name=request.name,
|
|
@@ -520,28 +500,26 @@ async def create_alert_handler(
|
|
|
520
500
|
config=request.config,
|
|
521
501
|
)
|
|
522
502
|
|
|
523
|
-
return
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
updated_at=handler.updated_at,
|
|
535
|
-
)
|
|
503
|
+
return AlertHandlerResponse(
|
|
504
|
+
id=handler.id,
|
|
505
|
+
name=handler.name,
|
|
506
|
+
handler_type=handler.handler_type,
|
|
507
|
+
config=handler.config,
|
|
508
|
+
is_active=handler.is_active,
|
|
509
|
+
last_sent_at=handler.last_sent_at,
|
|
510
|
+
send_count=handler.send_count,
|
|
511
|
+
failure_count=handler.failure_count,
|
|
512
|
+
created_at=handler.created_at,
|
|
513
|
+
updated_at=handler.updated_at,
|
|
536
514
|
)
|
|
537
515
|
|
|
538
516
|
|
|
539
|
-
@router.put("/handlers/{handler_id}", response_model=
|
|
517
|
+
@router.put("/handlers/{handler_id}", response_model=AlertHandlerResponse)
|
|
540
518
|
async def update_alert_handler(
|
|
541
519
|
handler_id: str,
|
|
542
520
|
request: UpdateAlertHandlerRequest,
|
|
543
521
|
service: ModelMonitoringService = Depends(get_service),
|
|
544
|
-
):
|
|
522
|
+
) -> AlertHandlerResponse:
|
|
545
523
|
"""Update an alert handler."""
|
|
546
524
|
updates = {}
|
|
547
525
|
if request.name is not None:
|
|
@@ -555,33 +533,47 @@ async def update_alert_handler(
|
|
|
555
533
|
if not handler:
|
|
556
534
|
raise HTTPException(status_code=404, detail="Alert handler not found")
|
|
557
535
|
|
|
558
|
-
return
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
updated_at=handler.updated_at,
|
|
570
|
-
)
|
|
536
|
+
return AlertHandlerResponse(
|
|
537
|
+
id=handler.id,
|
|
538
|
+
name=handler.name,
|
|
539
|
+
handler_type=handler.handler_type,
|
|
540
|
+
config=handler.config,
|
|
541
|
+
is_active=handler.is_active,
|
|
542
|
+
last_sent_at=handler.last_sent_at,
|
|
543
|
+
send_count=handler.send_count,
|
|
544
|
+
failure_count=handler.failure_count,
|
|
545
|
+
created_at=handler.created_at,
|
|
546
|
+
updated_at=handler.updated_at,
|
|
571
547
|
)
|
|
572
548
|
|
|
573
549
|
|
|
574
|
-
@router.delete("/handlers/{handler_id}")
|
|
550
|
+
@router.delete("/handlers/{handler_id}", response_model=MessageResponse)
|
|
575
551
|
async def delete_alert_handler(
|
|
576
552
|
handler_id: str,
|
|
577
553
|
service: ModelMonitoringService = Depends(get_service),
|
|
578
|
-
):
|
|
554
|
+
) -> MessageResponse:
|
|
579
555
|
"""Delete an alert handler."""
|
|
580
556
|
deleted = await service.delete_alert_handler(handler_id)
|
|
581
557
|
if not deleted:
|
|
582
558
|
raise HTTPException(status_code=404, detail="Alert handler not found")
|
|
583
559
|
|
|
584
|
-
return
|
|
560
|
+
return MessageResponse(message="Alert handler deleted")
|
|
561
|
+
|
|
562
|
+
|
|
563
|
+
@router.post("/handlers/{handler_id}/test")
|
|
564
|
+
async def test_alert_handler(
|
|
565
|
+
handler_id: str,
|
|
566
|
+
service: ModelMonitoringService = Depends(get_service),
|
|
567
|
+
) -> dict:
|
|
568
|
+
"""Test an alert handler by sending a test notification.
|
|
569
|
+
|
|
570
|
+
Validates the handler configuration and simulates sending a test notification.
|
|
571
|
+
"""
|
|
572
|
+
try:
|
|
573
|
+
result = await service.test_alert_handler(handler_id)
|
|
574
|
+
return result
|
|
575
|
+
except ValueError as e:
|
|
576
|
+
raise HTTPException(status_code=404, detail=str(e))
|
|
585
577
|
|
|
586
578
|
|
|
587
579
|
# =============================================================================
|
|
@@ -589,7 +581,7 @@ async def delete_alert_handler(
|
|
|
589
581
|
# =============================================================================
|
|
590
582
|
|
|
591
583
|
|
|
592
|
-
@router.get("/alerts", response_model=
|
|
584
|
+
@router.get("/alerts", response_model=AlertListResponse)
|
|
593
585
|
async def list_alerts(
|
|
594
586
|
model_id: str | None = None,
|
|
595
587
|
active_only: bool = False,
|
|
@@ -597,7 +589,7 @@ async def list_alerts(
|
|
|
597
589
|
offset: int = Query(0, ge=0),
|
|
598
590
|
limit: int = Query(50, ge=1, le=100),
|
|
599
591
|
service: ModelMonitoringService = Depends(get_service),
|
|
600
|
-
):
|
|
592
|
+
) -> AlertListResponse:
|
|
601
593
|
"""List alerts."""
|
|
602
594
|
severity_filter = severity.value if severity else None
|
|
603
595
|
alerts, total = await service.get_alerts(
|
|
@@ -628,75 +620,68 @@ async def list_alerts(
|
|
|
628
620
|
for a in alerts
|
|
629
621
|
]
|
|
630
622
|
|
|
631
|
-
return
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
limit=limit,
|
|
637
|
-
)
|
|
623
|
+
return AlertListResponse(
|
|
624
|
+
items=items,
|
|
625
|
+
total=total,
|
|
626
|
+
offset=offset,
|
|
627
|
+
limit=limit,
|
|
638
628
|
)
|
|
639
629
|
|
|
640
630
|
|
|
641
|
-
@router.post("/alerts/{alert_id}/acknowledge")
|
|
631
|
+
@router.post("/alerts/{alert_id}/acknowledge", response_model=AlertInstance)
|
|
642
632
|
async def acknowledge_alert(
|
|
643
633
|
alert_id: str,
|
|
644
634
|
request: AcknowledgeAlertRequest,
|
|
645
635
|
service: ModelMonitoringService = Depends(get_service),
|
|
646
|
-
):
|
|
636
|
+
) -> AlertInstance:
|
|
647
637
|
"""Acknowledge an alert."""
|
|
648
638
|
alert = await service.acknowledge_alert(alert_id, request.actor)
|
|
649
639
|
if not alert:
|
|
650
640
|
raise HTTPException(status_code=404, detail="Alert not found")
|
|
651
641
|
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
updated_at=alert.updated_at,
|
|
669
|
-
)
|
|
670
|
-
)
|
|
642
|
+
return AlertInstance(
|
|
643
|
+
id=alert.id,
|
|
644
|
+
rule_id=alert.rule_id,
|
|
645
|
+
model_id=alert.model_id,
|
|
646
|
+
severity=AlertSeverity(alert.severity),
|
|
647
|
+
message=alert.message,
|
|
648
|
+
metric_value=alert.metric_value,
|
|
649
|
+
threshold_value=alert.threshold_value,
|
|
650
|
+
acknowledged=alert.acknowledged,
|
|
651
|
+
acknowledged_by=alert.acknowledged_by,
|
|
652
|
+
acknowledged_at=alert.acknowledged_at,
|
|
653
|
+
resolved=alert.resolved,
|
|
654
|
+
resolved_at=alert.resolved_at,
|
|
655
|
+
created_at=alert.created_at,
|
|
656
|
+
updated_at=alert.updated_at,
|
|
657
|
+
)
|
|
671
658
|
|
|
672
659
|
|
|
673
|
-
@router.post("/alerts/{alert_id}/resolve")
|
|
660
|
+
@router.post("/alerts/{alert_id}/resolve", response_model=AlertInstance)
|
|
674
661
|
async def resolve_alert(
|
|
675
662
|
alert_id: str,
|
|
676
663
|
service: ModelMonitoringService = Depends(get_service),
|
|
677
|
-
):
|
|
664
|
+
) -> AlertInstance:
|
|
678
665
|
"""Resolve an alert."""
|
|
679
666
|
alert = await service.resolve_alert(alert_id)
|
|
680
667
|
if not alert:
|
|
681
668
|
raise HTTPException(status_code=404, detail="Alert not found")
|
|
682
669
|
|
|
683
|
-
return
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
updated_at=alert.updated_at,
|
|
699
|
-
)
|
|
670
|
+
return AlertInstance(
|
|
671
|
+
id=alert.id,
|
|
672
|
+
rule_id=alert.rule_id,
|
|
673
|
+
model_id=alert.model_id,
|
|
674
|
+
severity=AlertSeverity(alert.severity),
|
|
675
|
+
message=alert.message,
|
|
676
|
+
metric_value=alert.metric_value,
|
|
677
|
+
threshold_value=alert.threshold_value,
|
|
678
|
+
acknowledged=alert.acknowledged,
|
|
679
|
+
acknowledged_by=alert.acknowledged_by,
|
|
680
|
+
acknowledged_at=alert.acknowledged_at,
|
|
681
|
+
resolved=alert.resolved,
|
|
682
|
+
resolved_at=alert.resolved_at,
|
|
683
|
+
created_at=alert.created_at,
|
|
684
|
+
updated_at=alert.updated_at,
|
|
700
685
|
)
|
|
701
686
|
|
|
702
687
|
|
|
@@ -705,31 +690,29 @@ async def resolve_alert(
|
|
|
705
690
|
# =============================================================================
|
|
706
691
|
|
|
707
692
|
|
|
708
|
-
@router.get("/overview", response_model=
|
|
693
|
+
@router.get("/overview", response_model=MonitoringOverview)
|
|
709
694
|
async def get_monitoring_overview(
|
|
710
695
|
service: ModelMonitoringService = Depends(get_service),
|
|
711
|
-
):
|
|
696
|
+
) -> MonitoringOverview:
|
|
712
697
|
"""Get monitoring overview for dashboard."""
|
|
713
698
|
overview = await service.get_monitoring_overview()
|
|
714
699
|
|
|
715
|
-
return
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
avg_latency_ms=overview["avg_latency_ms"],
|
|
724
|
-
)
|
|
700
|
+
return MonitoringOverview(
|
|
701
|
+
total_models=overview["total_models"],
|
|
702
|
+
active_models=overview["active_models"],
|
|
703
|
+
degraded_models=overview["degraded_models"],
|
|
704
|
+
total_predictions_24h=overview["total_predictions_24h"],
|
|
705
|
+
active_alerts=overview["active_alerts"],
|
|
706
|
+
models_with_drift=overview["models_with_drift"],
|
|
707
|
+
avg_latency_ms=overview["avg_latency_ms"],
|
|
725
708
|
)
|
|
726
709
|
|
|
727
710
|
|
|
728
|
-
@router.get("/models/{model_id}/dashboard", response_model=
|
|
711
|
+
@router.get("/models/{model_id}/dashboard", response_model=ModelDashboardData)
|
|
729
712
|
async def get_model_dashboard(
|
|
730
713
|
model_id: str,
|
|
731
714
|
service: ModelMonitoringService = Depends(get_service),
|
|
732
|
-
):
|
|
715
|
+
) -> ModelDashboardData:
|
|
733
716
|
"""Get dashboard data for a specific model."""
|
|
734
717
|
try:
|
|
735
718
|
dashboard = await service.get_model_dashboard(model_id)
|
|
@@ -737,52 +720,50 @@ async def get_model_dashboard(
|
|
|
737
720
|
model_data = dashboard["model"]
|
|
738
721
|
metrics_data = dashboard["metrics"]
|
|
739
722
|
|
|
740
|
-
return
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
health_status=dashboard["health_status"],
|
|
785
|
-
)
|
|
723
|
+
return ModelDashboardData(
|
|
724
|
+
model=RegisteredModelResponse(
|
|
725
|
+
id=model_data["id"],
|
|
726
|
+
name=model_data["name"],
|
|
727
|
+
version=model_data["version"],
|
|
728
|
+
description=model_data["description"] or "",
|
|
729
|
+
status=ModelStatus(model_data["status"]),
|
|
730
|
+
config=model_data["config"],
|
|
731
|
+
metadata=model_data["metadata"] or {},
|
|
732
|
+
prediction_count=model_data["prediction_count"],
|
|
733
|
+
last_prediction_at=model_data["last_prediction_at"],
|
|
734
|
+
current_drift_score=model_data["current_drift_score"],
|
|
735
|
+
health_score=model_data["health_score"],
|
|
736
|
+
created_at=model_data["created_at"],
|
|
737
|
+
updated_at=model_data["updated_at"],
|
|
738
|
+
),
|
|
739
|
+
metrics=MetricsResponse(
|
|
740
|
+
model_id=metrics_data["model_id"],
|
|
741
|
+
model_name=metrics_data["model_name"],
|
|
742
|
+
time_range_hours=metrics_data["time_range_hours"],
|
|
743
|
+
metrics=[MetricSummary(**m) for m in metrics_data["metrics"]],
|
|
744
|
+
data_points=metrics_data["data_points"],
|
|
745
|
+
),
|
|
746
|
+
active_alerts=[
|
|
747
|
+
AlertInstance(
|
|
748
|
+
id=a["id"],
|
|
749
|
+
rule_id=a["rule_id"],
|
|
750
|
+
model_id=a["model_id"],
|
|
751
|
+
severity=AlertSeverity(a["severity"]),
|
|
752
|
+
message=a["message"],
|
|
753
|
+
metric_value=a["metric_value"],
|
|
754
|
+
threshold_value=a["threshold_value"],
|
|
755
|
+
acknowledged=a["acknowledged"],
|
|
756
|
+
acknowledged_by=a["acknowledged_by"],
|
|
757
|
+
acknowledged_at=a["acknowledged_at"],
|
|
758
|
+
resolved=a["resolved"],
|
|
759
|
+
resolved_at=a["resolved_at"],
|
|
760
|
+
created_at=a["created_at"],
|
|
761
|
+
updated_at=a["updated_at"],
|
|
762
|
+
)
|
|
763
|
+
for a in dashboard["active_alerts"]
|
|
764
|
+
],
|
|
765
|
+
recent_predictions=dashboard["recent_predictions"],
|
|
766
|
+
health_status=dashboard["health_status"],
|
|
786
767
|
)
|
|
787
768
|
except ValueError as e:
|
|
788
769
|
raise HTTPException(status_code=404, detail=str(e))
|
|
@@ -792,14 +773,157 @@ async def get_model_dashboard(
|
|
|
792
773
|
async def evaluate_model_rules(
|
|
793
774
|
model_id: str,
|
|
794
775
|
service: ModelMonitoringService = Depends(get_service),
|
|
795
|
-
):
|
|
776
|
+
) -> dict:
|
|
796
777
|
"""Evaluate all active rules for a model and create alerts if triggered."""
|
|
797
778
|
alerts = await service.evaluate_rules(model_id)
|
|
798
779
|
|
|
799
|
-
return
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
780
|
+
return {
|
|
781
|
+
"model_id": model_id,
|
|
782
|
+
"alerts_created": len(alerts),
|
|
783
|
+
"alert_ids": [a.id for a in alerts],
|
|
784
|
+
}
|
|
785
|
+
|
|
786
|
+
|
|
787
|
+
# =============================================================================
|
|
788
|
+
# Truthound Integration - Drift Detection
|
|
789
|
+
# =============================================================================
|
|
790
|
+
|
|
791
|
+
|
|
792
|
+
class DriftDetectionRequest(BaseModel):
|
|
793
|
+
"""Request for drift detection using truthound th.compare()."""
|
|
794
|
+
|
|
795
|
+
reference_source_id: str = Field(..., description="Source ID for reference/baseline data")
|
|
796
|
+
current_source_id: str = Field(..., description="Source ID for current data to compare")
|
|
797
|
+
method: str = Field(
|
|
798
|
+
default="auto",
|
|
799
|
+
description="Drift detection method (auto, psi, ks, js, wasserstein, chi2, etc.)",
|
|
800
|
+
)
|
|
801
|
+
columns: list[str] | None = Field(
|
|
802
|
+
default=None,
|
|
803
|
+
description="Specific columns to check (default: all)",
|
|
805
804
|
)
|
|
805
|
+
|
|
806
|
+
|
|
807
|
+
class DriftDetectionResponse(BaseModel):
|
|
808
|
+
"""Response from drift detection."""
|
|
809
|
+
|
|
810
|
+
model_id: str
|
|
811
|
+
method: str
|
|
812
|
+
has_drift: bool
|
|
813
|
+
overall_score: float
|
|
814
|
+
drift_threshold: float
|
|
815
|
+
drifted_columns: list[str]
|
|
816
|
+
column_scores: dict[str, float]
|
|
817
|
+
timestamp: str
|
|
818
|
+
|
|
819
|
+
|
|
820
|
+
@router.post(
|
|
821
|
+
"/models/{model_id}/detect-drift",
|
|
822
|
+
response_model=DriftDetectionResponse,
|
|
823
|
+
summary="Detect drift for a model",
|
|
824
|
+
description="""
|
|
825
|
+
Compute drift score using truthound th.compare().
|
|
826
|
+
|
|
827
|
+
Available methods:
|
|
828
|
+
- auto: Auto-select best method based on column type
|
|
829
|
+
- psi: Population Stability Index (<0.1 stable, 0.1-0.25 small drift, >0.25 significant)
|
|
830
|
+
- ks: Kolmogorov-Smirnov test
|
|
831
|
+
- js: Jensen-Shannon divergence
|
|
832
|
+
- wasserstein: Earth Mover's Distance
|
|
833
|
+
- chi2: Chi-squared (categorical)
|
|
834
|
+
- kl: Kullback-Leibler divergence
|
|
835
|
+
- cvm: Cramér-von Mises test
|
|
836
|
+
- anderson: Anderson-Darling test
|
|
837
|
+
- hellinger: Hellinger distance
|
|
838
|
+
- bhattacharyya: Bhattacharyya distance
|
|
839
|
+
- tv: Total Variation distance
|
|
840
|
+
- energy: Energy distance
|
|
841
|
+
- mmd: Maximum Mean Discrepancy
|
|
842
|
+
""",
|
|
843
|
+
)
|
|
844
|
+
async def detect_model_drift(
|
|
845
|
+
model_id: str,
|
|
846
|
+
request: DriftDetectionRequest,
|
|
847
|
+
service: ModelMonitoringService = Depends(get_service),
|
|
848
|
+
) -> DriftDetectionResponse:
|
|
849
|
+
"""Detect drift between reference and current data for a model."""
|
|
850
|
+
from truthound_dashboard.core.services import SourceService
|
|
851
|
+
from truthound_dashboard.db import get_async_session
|
|
852
|
+
|
|
853
|
+
# Get source data
|
|
854
|
+
async with get_async_session() as session:
|
|
855
|
+
source_service = SourceService(session)
|
|
856
|
+
|
|
857
|
+
reference_source = await source_service.get_source(request.reference_source_id)
|
|
858
|
+
if reference_source is None:
|
|
859
|
+
raise HTTPException(status_code=404, detail=f"Reference source '{request.reference_source_id}' not found")
|
|
860
|
+
|
|
861
|
+
current_source = await source_service.get_source(request.current_source_id)
|
|
862
|
+
if current_source is None:
|
|
863
|
+
raise HTTPException(status_code=404, detail=f"Current source '{request.current_source_id}' not found")
|
|
864
|
+
|
|
865
|
+
try:
|
|
866
|
+
result = await service.compute_drift_score(
|
|
867
|
+
model_id=model_id,
|
|
868
|
+
reference_data=reference_source.connection_string,
|
|
869
|
+
current_data=current_source.connection_string,
|
|
870
|
+
method=request.method,
|
|
871
|
+
columns=request.columns,
|
|
872
|
+
)
|
|
873
|
+
|
|
874
|
+
return DriftDetectionResponse(**result)
|
|
875
|
+
except ValueError as e:
|
|
876
|
+
raise HTTPException(status_code=404, detail=str(e))
|
|
877
|
+
except Exception as e:
|
|
878
|
+
raise HTTPException(status_code=500, detail=f"Drift detection failed: {str(e)}")
|
|
879
|
+
|
|
880
|
+
|
|
881
|
+
# =============================================================================
|
|
882
|
+
# Truthound Integration - Quality Metrics
|
|
883
|
+
# =============================================================================
|
|
884
|
+
|
|
885
|
+
|
|
886
|
+
class QualityMetricsResponse(BaseModel):
|
|
887
|
+
"""Response from quality metrics computation."""
|
|
888
|
+
|
|
889
|
+
model_id: str
|
|
890
|
+
enabled: bool
|
|
891
|
+
has_data: bool = False
|
|
892
|
+
model_type: str | None = None
|
|
893
|
+
sample_count: int | None = None
|
|
894
|
+
time_range_hours: int | None = None
|
|
895
|
+
metrics: dict[str, float | None] | None = None
|
|
896
|
+
message: str | None = None
|
|
897
|
+
timestamp: str | None = None
|
|
898
|
+
|
|
899
|
+
|
|
900
|
+
@router.get(
|
|
901
|
+
"/models/{model_id}/quality-metrics",
|
|
902
|
+
response_model=QualityMetricsResponse,
|
|
903
|
+
summary="Get quality metrics for a model",
|
|
904
|
+
description="""
|
|
905
|
+
Compute quality metrics from predictions with actual values.
|
|
906
|
+
|
|
907
|
+
For classification models:
|
|
908
|
+
- accuracy: Overall accuracy
|
|
909
|
+
- precision: Precision (binary only)
|
|
910
|
+
- recall: Recall (binary only)
|
|
911
|
+
- f1_score: F1 score (binary only)
|
|
912
|
+
|
|
913
|
+
For regression models:
|
|
914
|
+
- mae: Mean Absolute Error
|
|
915
|
+
- mse: Mean Squared Error
|
|
916
|
+
- rmse: Root Mean Squared Error
|
|
917
|
+
""",
|
|
918
|
+
)
|
|
919
|
+
async def get_model_quality_metrics(
|
|
920
|
+
model_id: str,
|
|
921
|
+
hours: int = Query(default=24, ge=1, le=168, description="Time range in hours"),
|
|
922
|
+
service: ModelMonitoringService = Depends(get_service),
|
|
923
|
+
) -> QualityMetricsResponse:
|
|
924
|
+
"""Get quality metrics for a model."""
|
|
925
|
+
try:
|
|
926
|
+
result = await service.compute_quality_metrics(model_id, hours=hours)
|
|
927
|
+
return QualityMetricsResponse(**result)
|
|
928
|
+
except ValueError as e:
|
|
929
|
+
raise HTTPException(status_code=404, detail=str(e))
|