truthound-dashboard 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound_dashboard/api/alerts.py +258 -0
- truthound_dashboard/api/anomaly.py +1302 -0
- truthound_dashboard/api/cross_alerts.py +352 -0
- truthound_dashboard/api/deps.py +143 -0
- truthound_dashboard/api/drift_monitor.py +540 -0
- truthound_dashboard/api/lineage.py +1151 -0
- truthound_dashboard/api/maintenance.py +363 -0
- truthound_dashboard/api/middleware.py +373 -1
- truthound_dashboard/api/model_monitoring.py +805 -0
- truthound_dashboard/api/notifications_advanced.py +2452 -0
- truthound_dashboard/api/plugins.py +2096 -0
- truthound_dashboard/api/profile.py +211 -14
- truthound_dashboard/api/reports.py +853 -0
- truthound_dashboard/api/router.py +147 -0
- truthound_dashboard/api/rule_suggestions.py +310 -0
- truthound_dashboard/api/schema_evolution.py +231 -0
- truthound_dashboard/api/sources.py +47 -3
- truthound_dashboard/api/triggers.py +190 -0
- truthound_dashboard/api/validations.py +13 -0
- truthound_dashboard/api/validators.py +333 -4
- truthound_dashboard/api/versioning.py +309 -0
- truthound_dashboard/api/websocket.py +301 -0
- truthound_dashboard/core/__init__.py +27 -0
- truthound_dashboard/core/anomaly.py +1395 -0
- truthound_dashboard/core/anomaly_explainer.py +633 -0
- truthound_dashboard/core/cache.py +206 -0
- truthound_dashboard/core/cached_services.py +422 -0
- truthound_dashboard/core/charts.py +352 -0
- truthound_dashboard/core/connections.py +1069 -42
- truthound_dashboard/core/cross_alerts.py +837 -0
- truthound_dashboard/core/drift_monitor.py +1477 -0
- truthound_dashboard/core/drift_sampling.py +669 -0
- truthound_dashboard/core/i18n/__init__.py +42 -0
- truthound_dashboard/core/i18n/detector.py +173 -0
- truthound_dashboard/core/i18n/messages.py +564 -0
- truthound_dashboard/core/lineage.py +971 -0
- truthound_dashboard/core/maintenance.py +443 -5
- truthound_dashboard/core/model_monitoring.py +1043 -0
- truthound_dashboard/core/notifications/channels.py +1020 -1
- truthound_dashboard/core/notifications/deduplication/__init__.py +143 -0
- truthound_dashboard/core/notifications/deduplication/policies.py +274 -0
- truthound_dashboard/core/notifications/deduplication/service.py +400 -0
- truthound_dashboard/core/notifications/deduplication/stores.py +2365 -0
- truthound_dashboard/core/notifications/deduplication/strategies.py +422 -0
- truthound_dashboard/core/notifications/dispatcher.py +43 -0
- truthound_dashboard/core/notifications/escalation/__init__.py +149 -0
- truthound_dashboard/core/notifications/escalation/backends.py +1384 -0
- truthound_dashboard/core/notifications/escalation/engine.py +429 -0
- truthound_dashboard/core/notifications/escalation/models.py +336 -0
- truthound_dashboard/core/notifications/escalation/scheduler.py +1187 -0
- truthound_dashboard/core/notifications/escalation/state_machine.py +330 -0
- truthound_dashboard/core/notifications/escalation/stores.py +2896 -0
- truthound_dashboard/core/notifications/events.py +49 -0
- truthound_dashboard/core/notifications/metrics/__init__.py +115 -0
- truthound_dashboard/core/notifications/metrics/base.py +528 -0
- truthound_dashboard/core/notifications/metrics/collectors.py +583 -0
- truthound_dashboard/core/notifications/routing/__init__.py +169 -0
- truthound_dashboard/core/notifications/routing/combinators.py +184 -0
- truthound_dashboard/core/notifications/routing/config.py +375 -0
- truthound_dashboard/core/notifications/routing/config_parser.py +867 -0
- truthound_dashboard/core/notifications/routing/engine.py +382 -0
- truthound_dashboard/core/notifications/routing/expression_engine.py +1269 -0
- truthound_dashboard/core/notifications/routing/jinja2_engine.py +774 -0
- truthound_dashboard/core/notifications/routing/rules.py +625 -0
- truthound_dashboard/core/notifications/routing/validator.py +678 -0
- truthound_dashboard/core/notifications/service.py +2 -0
- truthound_dashboard/core/notifications/stats_aggregator.py +850 -0
- truthound_dashboard/core/notifications/throttling/__init__.py +83 -0
- truthound_dashboard/core/notifications/throttling/builder.py +311 -0
- truthound_dashboard/core/notifications/throttling/stores.py +1859 -0
- truthound_dashboard/core/notifications/throttling/throttlers.py +633 -0
- truthound_dashboard/core/openlineage.py +1028 -0
- truthound_dashboard/core/plugins/__init__.py +39 -0
- truthound_dashboard/core/plugins/docs/__init__.py +39 -0
- truthound_dashboard/core/plugins/docs/extractor.py +703 -0
- truthound_dashboard/core/plugins/docs/renderers.py +804 -0
- truthound_dashboard/core/plugins/hooks/__init__.py +63 -0
- truthound_dashboard/core/plugins/hooks/decorators.py +367 -0
- truthound_dashboard/core/plugins/hooks/manager.py +403 -0
- truthound_dashboard/core/plugins/hooks/protocols.py +265 -0
- truthound_dashboard/core/plugins/lifecycle/__init__.py +41 -0
- truthound_dashboard/core/plugins/lifecycle/hot_reload.py +584 -0
- truthound_dashboard/core/plugins/lifecycle/machine.py +419 -0
- truthound_dashboard/core/plugins/lifecycle/states.py +266 -0
- truthound_dashboard/core/plugins/loader.py +504 -0
- truthound_dashboard/core/plugins/registry.py +810 -0
- truthound_dashboard/core/plugins/reporter_executor.py +588 -0
- truthound_dashboard/core/plugins/sandbox/__init__.py +59 -0
- truthound_dashboard/core/plugins/sandbox/code_validator.py +243 -0
- truthound_dashboard/core/plugins/sandbox/engines.py +770 -0
- truthound_dashboard/core/plugins/sandbox/protocols.py +194 -0
- truthound_dashboard/core/plugins/sandbox.py +617 -0
- truthound_dashboard/core/plugins/security/__init__.py +68 -0
- truthound_dashboard/core/plugins/security/analyzer.py +535 -0
- truthound_dashboard/core/plugins/security/policies.py +311 -0
- truthound_dashboard/core/plugins/security/protocols.py +296 -0
- truthound_dashboard/core/plugins/security/signing.py +842 -0
- truthound_dashboard/core/plugins/security.py +446 -0
- truthound_dashboard/core/plugins/validator_executor.py +401 -0
- truthound_dashboard/core/plugins/versioning/__init__.py +51 -0
- truthound_dashboard/core/plugins/versioning/constraints.py +377 -0
- truthound_dashboard/core/plugins/versioning/dependencies.py +541 -0
- truthound_dashboard/core/plugins/versioning/semver.py +266 -0
- truthound_dashboard/core/profile_comparison.py +601 -0
- truthound_dashboard/core/report_history.py +570 -0
- truthound_dashboard/core/reporters/__init__.py +57 -0
- truthound_dashboard/core/reporters/base.py +296 -0
- truthound_dashboard/core/reporters/csv_reporter.py +155 -0
- truthound_dashboard/core/reporters/html_reporter.py +598 -0
- truthound_dashboard/core/reporters/i18n/__init__.py +65 -0
- truthound_dashboard/core/reporters/i18n/base.py +494 -0
- truthound_dashboard/core/reporters/i18n/catalogs.py +930 -0
- truthound_dashboard/core/reporters/json_reporter.py +160 -0
- truthound_dashboard/core/reporters/junit_reporter.py +233 -0
- truthound_dashboard/core/reporters/markdown_reporter.py +207 -0
- truthound_dashboard/core/reporters/pdf_reporter.py +209 -0
- truthound_dashboard/core/reporters/registry.py +272 -0
- truthound_dashboard/core/rule_generator.py +2088 -0
- truthound_dashboard/core/scheduler.py +822 -12
- truthound_dashboard/core/schema_evolution.py +858 -0
- truthound_dashboard/core/services.py +152 -9
- truthound_dashboard/core/statistics.py +718 -0
- truthound_dashboard/core/streaming_anomaly.py +883 -0
- truthound_dashboard/core/triggers/__init__.py +45 -0
- truthound_dashboard/core/triggers/base.py +226 -0
- truthound_dashboard/core/triggers/evaluators.py +609 -0
- truthound_dashboard/core/triggers/factory.py +363 -0
- truthound_dashboard/core/unified_alerts.py +870 -0
- truthound_dashboard/core/validation_limits.py +509 -0
- truthound_dashboard/core/versioning.py +709 -0
- truthound_dashboard/core/websocket/__init__.py +59 -0
- truthound_dashboard/core/websocket/manager.py +512 -0
- truthound_dashboard/core/websocket/messages.py +130 -0
- truthound_dashboard/db/__init__.py +30 -0
- truthound_dashboard/db/models.py +3375 -3
- truthound_dashboard/main.py +22 -0
- truthound_dashboard/schemas/__init__.py +396 -1
- truthound_dashboard/schemas/anomaly.py +1258 -0
- truthound_dashboard/schemas/base.py +4 -0
- truthound_dashboard/schemas/cross_alerts.py +334 -0
- truthound_dashboard/schemas/drift_monitor.py +890 -0
- truthound_dashboard/schemas/lineage.py +428 -0
- truthound_dashboard/schemas/maintenance.py +154 -0
- truthound_dashboard/schemas/model_monitoring.py +374 -0
- truthound_dashboard/schemas/notifications_advanced.py +1363 -0
- truthound_dashboard/schemas/openlineage.py +704 -0
- truthound_dashboard/schemas/plugins.py +1293 -0
- truthound_dashboard/schemas/profile.py +420 -34
- truthound_dashboard/schemas/profile_comparison.py +242 -0
- truthound_dashboard/schemas/reports.py +285 -0
- truthound_dashboard/schemas/rule_suggestion.py +434 -0
- truthound_dashboard/schemas/schema_evolution.py +164 -0
- truthound_dashboard/schemas/source.py +117 -2
- truthound_dashboard/schemas/triggers.py +511 -0
- truthound_dashboard/schemas/unified_alerts.py +223 -0
- truthound_dashboard/schemas/validation.py +25 -1
- truthound_dashboard/schemas/validators/__init__.py +11 -0
- truthound_dashboard/schemas/validators/base.py +151 -0
- truthound_dashboard/schemas/versioning.py +152 -0
- truthound_dashboard/static/index.html +2 -2
- {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/METADATA +142 -18
- truthound_dashboard-1.4.0.dist-info/RECORD +239 -0
- truthound_dashboard/static/assets/index-BCA8H1hO.js +0 -574
- truthound_dashboard/static/assets/index-BNsSQ2fN.css +0 -1
- truthound_dashboard/static/assets/unmerged_dictionaries-CsJWCRx9.js +0 -1
- truthound_dashboard-1.3.0.dist-info/RECORD +0 -110
- {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/WHEEL +0 -0
- {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/entry_points.txt +0 -0
- {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,805 @@
|
|
|
1
|
+
"""API endpoints for ML Model Monitoring.
|
|
2
|
+
|
|
3
|
+
Provides REST API for:
|
|
4
|
+
- Model registration and management
|
|
5
|
+
- Prediction recording and metrics
|
|
6
|
+
- Alert rules and handlers
|
|
7
|
+
- Dashboard data
|
|
8
|
+
|
|
9
|
+
All data is persisted to the database.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from fastapi import APIRouter, Depends, HTTPException, Query
|
|
15
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
16
|
+
|
|
17
|
+
from ..core.model_monitoring import ModelMonitoringService
|
|
18
|
+
from ..db import get_session
|
|
19
|
+
from ..schemas.base import DataResponse
|
|
20
|
+
from ..schemas.model_monitoring import (
|
|
21
|
+
AcknowledgeAlertRequest,
|
|
22
|
+
AlertHandlerListResponse,
|
|
23
|
+
AlertHandlerResponse,
|
|
24
|
+
AlertInstance,
|
|
25
|
+
AlertListResponse,
|
|
26
|
+
AlertRuleListResponse,
|
|
27
|
+
AlertRuleResponse,
|
|
28
|
+
AlertSeverity,
|
|
29
|
+
CreateAlertHandlerRequest,
|
|
30
|
+
CreateAlertRuleRequest,
|
|
31
|
+
MetricsResponse,
|
|
32
|
+
MetricSummary,
|
|
33
|
+
ModelDashboardData,
|
|
34
|
+
ModelStatus,
|
|
35
|
+
MonitoringOverview,
|
|
36
|
+
RecordPredictionRequest,
|
|
37
|
+
RecordPredictionResponse,
|
|
38
|
+
RegisteredModelListResponse,
|
|
39
|
+
RegisteredModelResponse,
|
|
40
|
+
RegisterModelRequest,
|
|
41
|
+
UpdateAlertHandlerRequest,
|
|
42
|
+
UpdateAlertRuleRequest,
|
|
43
|
+
UpdateModelRequest,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
router = APIRouter(prefix="/model-monitoring", tags=["model-monitoring"])
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def get_service(session: AsyncSession = Depends(get_session)) -> ModelMonitoringService:
|
|
50
|
+
"""Get model monitoring service instance."""
|
|
51
|
+
return ModelMonitoringService(session)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# =============================================================================
|
|
55
|
+
# Model Registration Endpoints
|
|
56
|
+
# =============================================================================
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@router.get("/models", response_model=DataResponse[RegisteredModelListResponse])
|
|
60
|
+
async def list_models(
|
|
61
|
+
status: ModelStatus | None = None,
|
|
62
|
+
offset: int = Query(0, ge=0),
|
|
63
|
+
limit: int = Query(50, ge=1, le=100),
|
|
64
|
+
service: ModelMonitoringService = Depends(get_service),
|
|
65
|
+
):
|
|
66
|
+
"""List all registered models."""
|
|
67
|
+
status_filter = status.value if status else None
|
|
68
|
+
models, total = await service.list_models(
|
|
69
|
+
status=status_filter, offset=offset, limit=limit
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
items = [
|
|
73
|
+
RegisteredModelResponse(
|
|
74
|
+
id=m.id,
|
|
75
|
+
name=m.name,
|
|
76
|
+
version=m.version,
|
|
77
|
+
description=m.description or "",
|
|
78
|
+
status=ModelStatus(m.status),
|
|
79
|
+
config=m.config,
|
|
80
|
+
metadata=m.metadata_json or {},
|
|
81
|
+
prediction_count=m.prediction_count,
|
|
82
|
+
last_prediction_at=m.last_prediction_at,
|
|
83
|
+
current_drift_score=m.current_drift_score,
|
|
84
|
+
health_score=m.health_score,
|
|
85
|
+
created_at=m.created_at,
|
|
86
|
+
updated_at=m.updated_at,
|
|
87
|
+
)
|
|
88
|
+
for m in models
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
return DataResponse(
|
|
92
|
+
data=RegisteredModelListResponse(
|
|
93
|
+
items=items,
|
|
94
|
+
total=total,
|
|
95
|
+
offset=offset,
|
|
96
|
+
limit=limit,
|
|
97
|
+
)
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@router.post("/models", response_model=DataResponse[RegisteredModelResponse])
|
|
102
|
+
async def register_model(
|
|
103
|
+
request: RegisterModelRequest,
|
|
104
|
+
service: ModelMonitoringService = Depends(get_service),
|
|
105
|
+
):
|
|
106
|
+
"""Register a new model for monitoring."""
|
|
107
|
+
model = await service.register_model(
|
|
108
|
+
name=request.name,
|
|
109
|
+
version=request.version,
|
|
110
|
+
description=request.description,
|
|
111
|
+
config=request.config.model_dump() if request.config else None,
|
|
112
|
+
metadata=request.metadata,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
return DataResponse(
|
|
116
|
+
data=RegisteredModelResponse(
|
|
117
|
+
id=model.id,
|
|
118
|
+
name=model.name,
|
|
119
|
+
version=model.version,
|
|
120
|
+
description=model.description or "",
|
|
121
|
+
status=ModelStatus(model.status),
|
|
122
|
+
config=model.config,
|
|
123
|
+
metadata=model.metadata_json or {},
|
|
124
|
+
prediction_count=model.prediction_count,
|
|
125
|
+
last_prediction_at=model.last_prediction_at,
|
|
126
|
+
current_drift_score=model.current_drift_score,
|
|
127
|
+
health_score=model.health_score,
|
|
128
|
+
created_at=model.created_at,
|
|
129
|
+
updated_at=model.updated_at,
|
|
130
|
+
)
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
@router.get("/models/{model_id}", response_model=DataResponse[RegisteredModelResponse])
|
|
135
|
+
async def get_model(
|
|
136
|
+
model_id: str,
|
|
137
|
+
service: ModelMonitoringService = Depends(get_service),
|
|
138
|
+
):
|
|
139
|
+
"""Get a registered model by ID."""
|
|
140
|
+
model = await service.get_model(model_id)
|
|
141
|
+
if not model:
|
|
142
|
+
raise HTTPException(status_code=404, detail="Model not found")
|
|
143
|
+
|
|
144
|
+
return DataResponse(
|
|
145
|
+
data=RegisteredModelResponse(
|
|
146
|
+
id=model.id,
|
|
147
|
+
name=model.name,
|
|
148
|
+
version=model.version,
|
|
149
|
+
description=model.description or "",
|
|
150
|
+
status=ModelStatus(model.status),
|
|
151
|
+
config=model.config,
|
|
152
|
+
metadata=model.metadata_json or {},
|
|
153
|
+
prediction_count=model.prediction_count,
|
|
154
|
+
last_prediction_at=model.last_prediction_at,
|
|
155
|
+
current_drift_score=model.current_drift_score,
|
|
156
|
+
health_score=model.health_score,
|
|
157
|
+
created_at=model.created_at,
|
|
158
|
+
updated_at=model.updated_at,
|
|
159
|
+
)
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
@router.put("/models/{model_id}", response_model=DataResponse[RegisteredModelResponse])
|
|
164
|
+
async def update_model(
|
|
165
|
+
model_id: str,
|
|
166
|
+
request: UpdateModelRequest,
|
|
167
|
+
service: ModelMonitoringService = Depends(get_service),
|
|
168
|
+
):
|
|
169
|
+
"""Update a registered model."""
|
|
170
|
+
updates = {}
|
|
171
|
+
if request.name is not None:
|
|
172
|
+
updates["name"] = request.name
|
|
173
|
+
if request.version is not None:
|
|
174
|
+
updates["version"] = request.version
|
|
175
|
+
if request.description is not None:
|
|
176
|
+
updates["description"] = request.description
|
|
177
|
+
if request.status is not None:
|
|
178
|
+
updates["status"] = request.status.value
|
|
179
|
+
if request.config is not None:
|
|
180
|
+
updates["config"] = request.config.model_dump()
|
|
181
|
+
if request.metadata is not None:
|
|
182
|
+
updates["metadata_json"] = request.metadata
|
|
183
|
+
|
|
184
|
+
model = await service.update_model(model_id, **updates)
|
|
185
|
+
if not model:
|
|
186
|
+
raise HTTPException(status_code=404, detail="Model not found")
|
|
187
|
+
|
|
188
|
+
return DataResponse(
|
|
189
|
+
data=RegisteredModelResponse(
|
|
190
|
+
id=model.id,
|
|
191
|
+
name=model.name,
|
|
192
|
+
version=model.version,
|
|
193
|
+
description=model.description or "",
|
|
194
|
+
status=ModelStatus(model.status),
|
|
195
|
+
config=model.config,
|
|
196
|
+
metadata=model.metadata_json or {},
|
|
197
|
+
prediction_count=model.prediction_count,
|
|
198
|
+
last_prediction_at=model.last_prediction_at,
|
|
199
|
+
current_drift_score=model.current_drift_score,
|
|
200
|
+
health_score=model.health_score,
|
|
201
|
+
created_at=model.created_at,
|
|
202
|
+
updated_at=model.updated_at,
|
|
203
|
+
)
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
@router.delete("/models/{model_id}")
|
|
208
|
+
async def delete_model(
|
|
209
|
+
model_id: str,
|
|
210
|
+
service: ModelMonitoringService = Depends(get_service),
|
|
211
|
+
):
|
|
212
|
+
"""Delete a registered model."""
|
|
213
|
+
deleted = await service.delete_model(model_id)
|
|
214
|
+
if not deleted:
|
|
215
|
+
raise HTTPException(status_code=404, detail="Model not found")
|
|
216
|
+
|
|
217
|
+
return {"success": True, "message": "Model deleted"}
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
@router.post("/models/{model_id}/pause")
|
|
221
|
+
async def pause_model(
|
|
222
|
+
model_id: str,
|
|
223
|
+
service: ModelMonitoringService = Depends(get_service),
|
|
224
|
+
):
|
|
225
|
+
"""Pause monitoring for a model."""
|
|
226
|
+
model = await service.pause_model(model_id)
|
|
227
|
+
if not model:
|
|
228
|
+
raise HTTPException(status_code=404, detail="Model not found")
|
|
229
|
+
|
|
230
|
+
return {"success": True, "message": "Model monitoring paused"}
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
@router.post("/models/{model_id}/resume")
|
|
234
|
+
async def resume_model(
|
|
235
|
+
model_id: str,
|
|
236
|
+
service: ModelMonitoringService = Depends(get_service),
|
|
237
|
+
):
|
|
238
|
+
"""Resume monitoring for a model."""
|
|
239
|
+
model = await service.resume_model(model_id)
|
|
240
|
+
if not model:
|
|
241
|
+
raise HTTPException(status_code=404, detail="Model not found")
|
|
242
|
+
|
|
243
|
+
return {"success": True, "message": "Model monitoring resumed"}
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
# =============================================================================
|
|
247
|
+
# Prediction Recording Endpoints
|
|
248
|
+
# =============================================================================
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
@router.post(
|
|
252
|
+
"/models/{model_id}/predictions",
|
|
253
|
+
response_model=DataResponse[RecordPredictionResponse],
|
|
254
|
+
)
|
|
255
|
+
async def record_prediction(
|
|
256
|
+
model_id: str,
|
|
257
|
+
request: RecordPredictionRequest,
|
|
258
|
+
service: ModelMonitoringService = Depends(get_service),
|
|
259
|
+
):
|
|
260
|
+
"""Record a model prediction."""
|
|
261
|
+
try:
|
|
262
|
+
prediction = await service.record_prediction(
|
|
263
|
+
model_id=model_id,
|
|
264
|
+
features=request.features,
|
|
265
|
+
prediction=request.prediction,
|
|
266
|
+
actual=request.actual,
|
|
267
|
+
latency_ms=request.latency_ms,
|
|
268
|
+
metadata=request.metadata,
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
return DataResponse(
|
|
272
|
+
data=RecordPredictionResponse(
|
|
273
|
+
id=prediction.id,
|
|
274
|
+
model_id=model_id,
|
|
275
|
+
recorded_at=prediction.recorded_at,
|
|
276
|
+
)
|
|
277
|
+
)
|
|
278
|
+
except ValueError as e:
|
|
279
|
+
raise HTTPException(status_code=404, detail=str(e))
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
@router.get("/models/{model_id}/metrics", response_model=DataResponse[MetricsResponse])
|
|
283
|
+
async def get_model_metrics(
|
|
284
|
+
model_id: str,
|
|
285
|
+
hours: int = Query(24, ge=1, le=168),
|
|
286
|
+
service: ModelMonitoringService = Depends(get_service),
|
|
287
|
+
):
|
|
288
|
+
"""Get metrics for a model."""
|
|
289
|
+
try:
|
|
290
|
+
metrics_data = await service.get_model_metrics(model_id, hours)
|
|
291
|
+
|
|
292
|
+
return DataResponse(
|
|
293
|
+
data=MetricsResponse(
|
|
294
|
+
model_id=metrics_data["model_id"],
|
|
295
|
+
model_name=metrics_data["model_name"],
|
|
296
|
+
time_range_hours=metrics_data["time_range_hours"],
|
|
297
|
+
metrics=[MetricSummary(**m) for m in metrics_data["metrics"]],
|
|
298
|
+
data_points=metrics_data["data_points"],
|
|
299
|
+
)
|
|
300
|
+
)
|
|
301
|
+
except ValueError as e:
|
|
302
|
+
raise HTTPException(status_code=404, detail=str(e))
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
# =============================================================================
|
|
306
|
+
# Alert Rule Endpoints
|
|
307
|
+
# =============================================================================
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
@router.get("/rules", response_model=DataResponse[AlertRuleListResponse])
|
|
311
|
+
async def list_alert_rules(
|
|
312
|
+
model_id: str | None = None,
|
|
313
|
+
active_only: bool = False,
|
|
314
|
+
offset: int = Query(0, ge=0),
|
|
315
|
+
limit: int = Query(50, ge=1, le=100),
|
|
316
|
+
service: ModelMonitoringService = Depends(get_service),
|
|
317
|
+
):
|
|
318
|
+
"""List all alert rules."""
|
|
319
|
+
rules = await service.get_alert_rules(model_id=model_id, active_only=active_only)
|
|
320
|
+
|
|
321
|
+
# Apply pagination
|
|
322
|
+
total = len(rules)
|
|
323
|
+
paginated = list(rules)[offset : offset + limit]
|
|
324
|
+
|
|
325
|
+
items = [
|
|
326
|
+
AlertRuleResponse(
|
|
327
|
+
id=r.id,
|
|
328
|
+
name=r.name,
|
|
329
|
+
model_id=r.model_id,
|
|
330
|
+
rule_type=r.rule_type,
|
|
331
|
+
severity=AlertSeverity(r.severity),
|
|
332
|
+
config=r.config,
|
|
333
|
+
is_active=r.is_active,
|
|
334
|
+
last_triggered_at=r.last_triggered_at,
|
|
335
|
+
trigger_count=r.trigger_count,
|
|
336
|
+
created_at=r.created_at,
|
|
337
|
+
updated_at=r.updated_at,
|
|
338
|
+
)
|
|
339
|
+
for r in paginated
|
|
340
|
+
]
|
|
341
|
+
|
|
342
|
+
return DataResponse(
|
|
343
|
+
data=AlertRuleListResponse(
|
|
344
|
+
items=items,
|
|
345
|
+
total=total,
|
|
346
|
+
offset=offset,
|
|
347
|
+
limit=limit,
|
|
348
|
+
)
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
@router.post("/rules", response_model=DataResponse[AlertRuleResponse])
|
|
353
|
+
async def create_alert_rule(
|
|
354
|
+
request: CreateAlertRuleRequest,
|
|
355
|
+
service: ModelMonitoringService = Depends(get_service),
|
|
356
|
+
):
|
|
357
|
+
"""Create a new alert rule."""
|
|
358
|
+
try:
|
|
359
|
+
rule = await service.create_alert_rule(
|
|
360
|
+
model_id=request.model_id,
|
|
361
|
+
name=request.name,
|
|
362
|
+
rule_type=request.rule_type.value,
|
|
363
|
+
config=request.config,
|
|
364
|
+
severity=request.severity.value,
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
return DataResponse(
|
|
368
|
+
data=AlertRuleResponse(
|
|
369
|
+
id=rule.id,
|
|
370
|
+
name=rule.name,
|
|
371
|
+
model_id=rule.model_id,
|
|
372
|
+
rule_type=rule.rule_type,
|
|
373
|
+
severity=AlertSeverity(rule.severity),
|
|
374
|
+
config=rule.config,
|
|
375
|
+
is_active=rule.is_active,
|
|
376
|
+
last_triggered_at=rule.last_triggered_at,
|
|
377
|
+
trigger_count=rule.trigger_count,
|
|
378
|
+
created_at=rule.created_at,
|
|
379
|
+
updated_at=rule.updated_at,
|
|
380
|
+
)
|
|
381
|
+
)
|
|
382
|
+
except ValueError as e:
|
|
383
|
+
raise HTTPException(status_code=404, detail=str(e))
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
@router.get("/rules/{rule_id}", response_model=DataResponse[AlertRuleResponse])
|
|
387
|
+
async def get_alert_rule(
|
|
388
|
+
rule_id: str,
|
|
389
|
+
service: ModelMonitoringService = Depends(get_service),
|
|
390
|
+
):
|
|
391
|
+
"""Get an alert rule by ID."""
|
|
392
|
+
rules = await service.get_alert_rules()
|
|
393
|
+
rule = next((r for r in rules if r.id == rule_id), None)
|
|
394
|
+
|
|
395
|
+
if not rule:
|
|
396
|
+
raise HTTPException(status_code=404, detail="Alert rule not found")
|
|
397
|
+
|
|
398
|
+
return DataResponse(
|
|
399
|
+
data=AlertRuleResponse(
|
|
400
|
+
id=rule.id,
|
|
401
|
+
name=rule.name,
|
|
402
|
+
model_id=rule.model_id,
|
|
403
|
+
rule_type=rule.rule_type,
|
|
404
|
+
severity=AlertSeverity(rule.severity),
|
|
405
|
+
config=rule.config,
|
|
406
|
+
is_active=rule.is_active,
|
|
407
|
+
last_triggered_at=rule.last_triggered_at,
|
|
408
|
+
trigger_count=rule.trigger_count,
|
|
409
|
+
created_at=rule.created_at,
|
|
410
|
+
updated_at=rule.updated_at,
|
|
411
|
+
)
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
@router.put("/rules/{rule_id}", response_model=DataResponse[AlertRuleResponse])
|
|
416
|
+
async def update_alert_rule(
|
|
417
|
+
rule_id: str,
|
|
418
|
+
request: UpdateAlertRuleRequest,
|
|
419
|
+
service: ModelMonitoringService = Depends(get_service),
|
|
420
|
+
):
|
|
421
|
+
"""Update an alert rule."""
|
|
422
|
+
updates = {}
|
|
423
|
+
if request.name is not None:
|
|
424
|
+
updates["name"] = request.name
|
|
425
|
+
if request.severity is not None:
|
|
426
|
+
updates["severity"] = request.severity.value
|
|
427
|
+
if request.config is not None:
|
|
428
|
+
updates["config"] = request.config
|
|
429
|
+
if request.is_active is not None:
|
|
430
|
+
updates["is_active"] = request.is_active
|
|
431
|
+
|
|
432
|
+
rule = await service.update_alert_rule(rule_id, **updates)
|
|
433
|
+
if not rule:
|
|
434
|
+
raise HTTPException(status_code=404, detail="Alert rule not found")
|
|
435
|
+
|
|
436
|
+
return DataResponse(
|
|
437
|
+
data=AlertRuleResponse(
|
|
438
|
+
id=rule.id,
|
|
439
|
+
name=rule.name,
|
|
440
|
+
model_id=rule.model_id,
|
|
441
|
+
rule_type=rule.rule_type,
|
|
442
|
+
severity=AlertSeverity(rule.severity),
|
|
443
|
+
config=rule.config,
|
|
444
|
+
is_active=rule.is_active,
|
|
445
|
+
last_triggered_at=rule.last_triggered_at,
|
|
446
|
+
trigger_count=rule.trigger_count,
|
|
447
|
+
created_at=rule.created_at,
|
|
448
|
+
updated_at=rule.updated_at,
|
|
449
|
+
)
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
@router.delete("/rules/{rule_id}")
|
|
454
|
+
async def delete_alert_rule(
|
|
455
|
+
rule_id: str,
|
|
456
|
+
service: ModelMonitoringService = Depends(get_service),
|
|
457
|
+
):
|
|
458
|
+
"""Delete an alert rule."""
|
|
459
|
+
deleted = await service.delete_alert_rule(rule_id)
|
|
460
|
+
if not deleted:
|
|
461
|
+
raise HTTPException(status_code=404, detail="Alert rule not found")
|
|
462
|
+
|
|
463
|
+
return {"success": True, "message": "Alert rule deleted"}
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
# =============================================================================
|
|
467
|
+
# Alert Handler Endpoints
|
|
468
|
+
# =============================================================================
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
@router.get("/handlers", response_model=DataResponse[AlertHandlerListResponse])
|
|
472
|
+
async def list_alert_handlers(
|
|
473
|
+
active_only: bool = False,
|
|
474
|
+
offset: int = Query(0, ge=0),
|
|
475
|
+
limit: int = Query(50, ge=1, le=100),
|
|
476
|
+
service: ModelMonitoringService = Depends(get_service),
|
|
477
|
+
):
|
|
478
|
+
"""List all alert handlers."""
|
|
479
|
+
handlers = await service.get_alert_handlers(active_only=active_only)
|
|
480
|
+
|
|
481
|
+
# Apply pagination
|
|
482
|
+
total = len(handlers)
|
|
483
|
+
paginated = list(handlers)[offset : offset + limit]
|
|
484
|
+
|
|
485
|
+
items = [
|
|
486
|
+
AlertHandlerResponse(
|
|
487
|
+
id=h.id,
|
|
488
|
+
name=h.name,
|
|
489
|
+
handler_type=h.handler_type,
|
|
490
|
+
config=h.config,
|
|
491
|
+
is_active=h.is_active,
|
|
492
|
+
last_sent_at=h.last_sent_at,
|
|
493
|
+
send_count=h.send_count,
|
|
494
|
+
failure_count=h.failure_count,
|
|
495
|
+
created_at=h.created_at,
|
|
496
|
+
updated_at=h.updated_at,
|
|
497
|
+
)
|
|
498
|
+
for h in paginated
|
|
499
|
+
]
|
|
500
|
+
|
|
501
|
+
return DataResponse(
|
|
502
|
+
data=AlertHandlerListResponse(
|
|
503
|
+
items=items,
|
|
504
|
+
total=total,
|
|
505
|
+
offset=offset,
|
|
506
|
+
limit=limit,
|
|
507
|
+
)
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
@router.post("/handlers", response_model=DataResponse[AlertHandlerResponse])
|
|
512
|
+
async def create_alert_handler(
|
|
513
|
+
request: CreateAlertHandlerRequest,
|
|
514
|
+
service: ModelMonitoringService = Depends(get_service),
|
|
515
|
+
):
|
|
516
|
+
"""Create a new alert handler."""
|
|
517
|
+
handler = await service.create_alert_handler(
|
|
518
|
+
name=request.name,
|
|
519
|
+
handler_type=request.handler_type.value,
|
|
520
|
+
config=request.config,
|
|
521
|
+
)
|
|
522
|
+
|
|
523
|
+
return DataResponse(
|
|
524
|
+
data=AlertHandlerResponse(
|
|
525
|
+
id=handler.id,
|
|
526
|
+
name=handler.name,
|
|
527
|
+
handler_type=handler.handler_type,
|
|
528
|
+
config=handler.config,
|
|
529
|
+
is_active=handler.is_active,
|
|
530
|
+
last_sent_at=handler.last_sent_at,
|
|
531
|
+
send_count=handler.send_count,
|
|
532
|
+
failure_count=handler.failure_count,
|
|
533
|
+
created_at=handler.created_at,
|
|
534
|
+
updated_at=handler.updated_at,
|
|
535
|
+
)
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
|
|
539
|
+
@router.put("/handlers/{handler_id}", response_model=DataResponse[AlertHandlerResponse])
|
|
540
|
+
async def update_alert_handler(
|
|
541
|
+
handler_id: str,
|
|
542
|
+
request: UpdateAlertHandlerRequest,
|
|
543
|
+
service: ModelMonitoringService = Depends(get_service),
|
|
544
|
+
):
|
|
545
|
+
"""Update an alert handler."""
|
|
546
|
+
updates = {}
|
|
547
|
+
if request.name is not None:
|
|
548
|
+
updates["name"] = request.name
|
|
549
|
+
if request.config is not None:
|
|
550
|
+
updates["config"] = request.config
|
|
551
|
+
if request.is_active is not None:
|
|
552
|
+
updates["is_active"] = request.is_active
|
|
553
|
+
|
|
554
|
+
handler = await service.update_alert_handler(handler_id, **updates)
|
|
555
|
+
if not handler:
|
|
556
|
+
raise HTTPException(status_code=404, detail="Alert handler not found")
|
|
557
|
+
|
|
558
|
+
return DataResponse(
|
|
559
|
+
data=AlertHandlerResponse(
|
|
560
|
+
id=handler.id,
|
|
561
|
+
name=handler.name,
|
|
562
|
+
handler_type=handler.handler_type,
|
|
563
|
+
config=handler.config,
|
|
564
|
+
is_active=handler.is_active,
|
|
565
|
+
last_sent_at=handler.last_sent_at,
|
|
566
|
+
send_count=handler.send_count,
|
|
567
|
+
failure_count=handler.failure_count,
|
|
568
|
+
created_at=handler.created_at,
|
|
569
|
+
updated_at=handler.updated_at,
|
|
570
|
+
)
|
|
571
|
+
)
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
@router.delete("/handlers/{handler_id}")
|
|
575
|
+
async def delete_alert_handler(
|
|
576
|
+
handler_id: str,
|
|
577
|
+
service: ModelMonitoringService = Depends(get_service),
|
|
578
|
+
):
|
|
579
|
+
"""Delete an alert handler."""
|
|
580
|
+
deleted = await service.delete_alert_handler(handler_id)
|
|
581
|
+
if not deleted:
|
|
582
|
+
raise HTTPException(status_code=404, detail="Alert handler not found")
|
|
583
|
+
|
|
584
|
+
return {"success": True, "message": "Alert handler deleted"}
|
|
585
|
+
|
|
586
|
+
|
|
587
|
+
# =============================================================================
|
|
588
|
+
# Alert Instance Endpoints
|
|
589
|
+
# =============================================================================
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
@router.get("/alerts", response_model=DataResponse[AlertListResponse])
|
|
593
|
+
async def list_alerts(
|
|
594
|
+
model_id: str | None = None,
|
|
595
|
+
active_only: bool = False,
|
|
596
|
+
severity: AlertSeverity | None = None,
|
|
597
|
+
offset: int = Query(0, ge=0),
|
|
598
|
+
limit: int = Query(50, ge=1, le=100),
|
|
599
|
+
service: ModelMonitoringService = Depends(get_service),
|
|
600
|
+
):
|
|
601
|
+
"""List alerts."""
|
|
602
|
+
severity_filter = severity.value if severity else None
|
|
603
|
+
alerts, total = await service.get_alerts(
|
|
604
|
+
model_id=model_id,
|
|
605
|
+
active_only=active_only,
|
|
606
|
+
severity=severity_filter,
|
|
607
|
+
offset=offset,
|
|
608
|
+
limit=limit,
|
|
609
|
+
)
|
|
610
|
+
|
|
611
|
+
items = [
|
|
612
|
+
AlertInstance(
|
|
613
|
+
id=a.id,
|
|
614
|
+
rule_id=a.rule_id,
|
|
615
|
+
model_id=a.model_id,
|
|
616
|
+
severity=AlertSeverity(a.severity),
|
|
617
|
+
message=a.message,
|
|
618
|
+
metric_value=a.metric_value,
|
|
619
|
+
threshold_value=a.threshold_value,
|
|
620
|
+
acknowledged=a.acknowledged,
|
|
621
|
+
acknowledged_by=a.acknowledged_by,
|
|
622
|
+
acknowledged_at=a.acknowledged_at,
|
|
623
|
+
resolved=a.resolved,
|
|
624
|
+
resolved_at=a.resolved_at,
|
|
625
|
+
created_at=a.created_at,
|
|
626
|
+
updated_at=a.updated_at,
|
|
627
|
+
)
|
|
628
|
+
for a in alerts
|
|
629
|
+
]
|
|
630
|
+
|
|
631
|
+
return DataResponse(
|
|
632
|
+
data=AlertListResponse(
|
|
633
|
+
items=items,
|
|
634
|
+
total=total,
|
|
635
|
+
offset=offset,
|
|
636
|
+
limit=limit,
|
|
637
|
+
)
|
|
638
|
+
)
|
|
639
|
+
|
|
640
|
+
|
|
641
|
+
@router.post("/alerts/{alert_id}/acknowledge")
|
|
642
|
+
async def acknowledge_alert(
|
|
643
|
+
alert_id: str,
|
|
644
|
+
request: AcknowledgeAlertRequest,
|
|
645
|
+
service: ModelMonitoringService = Depends(get_service),
|
|
646
|
+
):
|
|
647
|
+
"""Acknowledge an alert."""
|
|
648
|
+
alert = await service.acknowledge_alert(alert_id, request.actor)
|
|
649
|
+
if not alert:
|
|
650
|
+
raise HTTPException(status_code=404, detail="Alert not found")
|
|
651
|
+
|
|
652
|
+
if alert.acknowledged:
|
|
653
|
+
return DataResponse(
|
|
654
|
+
data=AlertInstance(
|
|
655
|
+
id=alert.id,
|
|
656
|
+
rule_id=alert.rule_id,
|
|
657
|
+
model_id=alert.model_id,
|
|
658
|
+
severity=AlertSeverity(alert.severity),
|
|
659
|
+
message=alert.message,
|
|
660
|
+
metric_value=alert.metric_value,
|
|
661
|
+
threshold_value=alert.threshold_value,
|
|
662
|
+
acknowledged=alert.acknowledged,
|
|
663
|
+
acknowledged_by=alert.acknowledged_by,
|
|
664
|
+
acknowledged_at=alert.acknowledged_at,
|
|
665
|
+
resolved=alert.resolved,
|
|
666
|
+
resolved_at=alert.resolved_at,
|
|
667
|
+
created_at=alert.created_at,
|
|
668
|
+
updated_at=alert.updated_at,
|
|
669
|
+
)
|
|
670
|
+
)
|
|
671
|
+
|
|
672
|
+
|
|
673
|
+
@router.post("/alerts/{alert_id}/resolve")
|
|
674
|
+
async def resolve_alert(
|
|
675
|
+
alert_id: str,
|
|
676
|
+
service: ModelMonitoringService = Depends(get_service),
|
|
677
|
+
):
|
|
678
|
+
"""Resolve an alert."""
|
|
679
|
+
alert = await service.resolve_alert(alert_id)
|
|
680
|
+
if not alert:
|
|
681
|
+
raise HTTPException(status_code=404, detail="Alert not found")
|
|
682
|
+
|
|
683
|
+
return DataResponse(
|
|
684
|
+
data=AlertInstance(
|
|
685
|
+
id=alert.id,
|
|
686
|
+
rule_id=alert.rule_id,
|
|
687
|
+
model_id=alert.model_id,
|
|
688
|
+
severity=AlertSeverity(alert.severity),
|
|
689
|
+
message=alert.message,
|
|
690
|
+
metric_value=alert.metric_value,
|
|
691
|
+
threshold_value=alert.threshold_value,
|
|
692
|
+
acknowledged=alert.acknowledged,
|
|
693
|
+
acknowledged_by=alert.acknowledged_by,
|
|
694
|
+
acknowledged_at=alert.acknowledged_at,
|
|
695
|
+
resolved=alert.resolved,
|
|
696
|
+
resolved_at=alert.resolved_at,
|
|
697
|
+
created_at=alert.created_at,
|
|
698
|
+
updated_at=alert.updated_at,
|
|
699
|
+
)
|
|
700
|
+
)
|
|
701
|
+
|
|
702
|
+
|
|
703
|
+
# =============================================================================
|
|
704
|
+
# Dashboard Endpoints
|
|
705
|
+
# =============================================================================
|
|
706
|
+
|
|
707
|
+
|
|
708
|
+
@router.get("/overview", response_model=DataResponse[MonitoringOverview])
|
|
709
|
+
async def get_monitoring_overview(
|
|
710
|
+
service: ModelMonitoringService = Depends(get_service),
|
|
711
|
+
):
|
|
712
|
+
"""Get monitoring overview for dashboard."""
|
|
713
|
+
overview = await service.get_monitoring_overview()
|
|
714
|
+
|
|
715
|
+
return DataResponse(
|
|
716
|
+
data=MonitoringOverview(
|
|
717
|
+
total_models=overview["total_models"],
|
|
718
|
+
active_models=overview["active_models"],
|
|
719
|
+
degraded_models=overview["degraded_models"],
|
|
720
|
+
total_predictions_24h=overview["total_predictions_24h"],
|
|
721
|
+
active_alerts=overview["active_alerts"],
|
|
722
|
+
models_with_drift=overview["models_with_drift"],
|
|
723
|
+
avg_latency_ms=overview["avg_latency_ms"],
|
|
724
|
+
)
|
|
725
|
+
)
|
|
726
|
+
|
|
727
|
+
|
|
728
|
+
@router.get("/models/{model_id}/dashboard", response_model=DataResponse[ModelDashboardData])
|
|
729
|
+
async def get_model_dashboard(
|
|
730
|
+
model_id: str,
|
|
731
|
+
service: ModelMonitoringService = Depends(get_service),
|
|
732
|
+
):
|
|
733
|
+
"""Get dashboard data for a specific model."""
|
|
734
|
+
try:
|
|
735
|
+
dashboard = await service.get_model_dashboard(model_id)
|
|
736
|
+
|
|
737
|
+
model_data = dashboard["model"]
|
|
738
|
+
metrics_data = dashboard["metrics"]
|
|
739
|
+
|
|
740
|
+
return DataResponse(
|
|
741
|
+
data=ModelDashboardData(
|
|
742
|
+
model=RegisteredModelResponse(
|
|
743
|
+
id=model_data["id"],
|
|
744
|
+
name=model_data["name"],
|
|
745
|
+
version=model_data["version"],
|
|
746
|
+
description=model_data["description"] or "",
|
|
747
|
+
status=ModelStatus(model_data["status"]),
|
|
748
|
+
config=model_data["config"],
|
|
749
|
+
metadata=model_data["metadata"] or {},
|
|
750
|
+
prediction_count=model_data["prediction_count"],
|
|
751
|
+
last_prediction_at=model_data["last_prediction_at"],
|
|
752
|
+
current_drift_score=model_data["current_drift_score"],
|
|
753
|
+
health_score=model_data["health_score"],
|
|
754
|
+
created_at=model_data["created_at"],
|
|
755
|
+
updated_at=model_data["updated_at"],
|
|
756
|
+
),
|
|
757
|
+
metrics=MetricsResponse(
|
|
758
|
+
model_id=metrics_data["model_id"],
|
|
759
|
+
model_name=metrics_data["model_name"],
|
|
760
|
+
time_range_hours=metrics_data["time_range_hours"],
|
|
761
|
+
metrics=[MetricSummary(**m) for m in metrics_data["metrics"]],
|
|
762
|
+
data_points=metrics_data["data_points"],
|
|
763
|
+
),
|
|
764
|
+
active_alerts=[
|
|
765
|
+
AlertInstance(
|
|
766
|
+
id=a["id"],
|
|
767
|
+
rule_id=a["rule_id"],
|
|
768
|
+
model_id=a["model_id"],
|
|
769
|
+
severity=AlertSeverity(a["severity"]),
|
|
770
|
+
message=a["message"],
|
|
771
|
+
metric_value=a["metric_value"],
|
|
772
|
+
threshold_value=a["threshold_value"],
|
|
773
|
+
acknowledged=a["acknowledged"],
|
|
774
|
+
acknowledged_by=a["acknowledged_by"],
|
|
775
|
+
acknowledged_at=a["acknowledged_at"],
|
|
776
|
+
resolved=a["resolved"],
|
|
777
|
+
resolved_at=a["resolved_at"],
|
|
778
|
+
created_at=a["created_at"],
|
|
779
|
+
updated_at=a["updated_at"],
|
|
780
|
+
)
|
|
781
|
+
for a in dashboard["active_alerts"]
|
|
782
|
+
],
|
|
783
|
+
recent_predictions=dashboard["recent_predictions"],
|
|
784
|
+
health_status=dashboard["health_status"],
|
|
785
|
+
)
|
|
786
|
+
)
|
|
787
|
+
except ValueError as e:
|
|
788
|
+
raise HTTPException(status_code=404, detail=str(e))
|
|
789
|
+
|
|
790
|
+
|
|
791
|
+
@router.post("/models/{model_id}/evaluate-rules")
|
|
792
|
+
async def evaluate_model_rules(
|
|
793
|
+
model_id: str,
|
|
794
|
+
service: ModelMonitoringService = Depends(get_service),
|
|
795
|
+
):
|
|
796
|
+
"""Evaluate all active rules for a model and create alerts if triggered."""
|
|
797
|
+
alerts = await service.evaluate_rules(model_id)
|
|
798
|
+
|
|
799
|
+
return DataResponse(
|
|
800
|
+
data={
|
|
801
|
+
"model_id": model_id,
|
|
802
|
+
"alerts_created": len(alerts),
|
|
803
|
+
"alert_ids": [a.id for a in alerts],
|
|
804
|
+
}
|
|
805
|
+
)
|