truthound-dashboard 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. truthound_dashboard/api/alerts.py +258 -0
  2. truthound_dashboard/api/anomaly.py +1302 -0
  3. truthound_dashboard/api/cross_alerts.py +352 -0
  4. truthound_dashboard/api/deps.py +143 -0
  5. truthound_dashboard/api/drift_monitor.py +540 -0
  6. truthound_dashboard/api/lineage.py +1151 -0
  7. truthound_dashboard/api/maintenance.py +363 -0
  8. truthound_dashboard/api/middleware.py +373 -1
  9. truthound_dashboard/api/model_monitoring.py +805 -0
  10. truthound_dashboard/api/notifications_advanced.py +2452 -0
  11. truthound_dashboard/api/plugins.py +2096 -0
  12. truthound_dashboard/api/profile.py +211 -14
  13. truthound_dashboard/api/reports.py +853 -0
  14. truthound_dashboard/api/router.py +147 -0
  15. truthound_dashboard/api/rule_suggestions.py +310 -0
  16. truthound_dashboard/api/schema_evolution.py +231 -0
  17. truthound_dashboard/api/sources.py +47 -3
  18. truthound_dashboard/api/triggers.py +190 -0
  19. truthound_dashboard/api/validations.py +13 -0
  20. truthound_dashboard/api/validators.py +333 -4
  21. truthound_dashboard/api/versioning.py +309 -0
  22. truthound_dashboard/api/websocket.py +301 -0
  23. truthound_dashboard/core/__init__.py +27 -0
  24. truthound_dashboard/core/anomaly.py +1395 -0
  25. truthound_dashboard/core/anomaly_explainer.py +633 -0
  26. truthound_dashboard/core/cache.py +206 -0
  27. truthound_dashboard/core/cached_services.py +422 -0
  28. truthound_dashboard/core/charts.py +352 -0
  29. truthound_dashboard/core/connections.py +1069 -42
  30. truthound_dashboard/core/cross_alerts.py +837 -0
  31. truthound_dashboard/core/drift_monitor.py +1477 -0
  32. truthound_dashboard/core/drift_sampling.py +669 -0
  33. truthound_dashboard/core/i18n/__init__.py +42 -0
  34. truthound_dashboard/core/i18n/detector.py +173 -0
  35. truthound_dashboard/core/i18n/messages.py +564 -0
  36. truthound_dashboard/core/lineage.py +971 -0
  37. truthound_dashboard/core/maintenance.py +443 -5
  38. truthound_dashboard/core/model_monitoring.py +1043 -0
  39. truthound_dashboard/core/notifications/channels.py +1020 -1
  40. truthound_dashboard/core/notifications/deduplication/__init__.py +143 -0
  41. truthound_dashboard/core/notifications/deduplication/policies.py +274 -0
  42. truthound_dashboard/core/notifications/deduplication/service.py +400 -0
  43. truthound_dashboard/core/notifications/deduplication/stores.py +2365 -0
  44. truthound_dashboard/core/notifications/deduplication/strategies.py +422 -0
  45. truthound_dashboard/core/notifications/dispatcher.py +43 -0
  46. truthound_dashboard/core/notifications/escalation/__init__.py +149 -0
  47. truthound_dashboard/core/notifications/escalation/backends.py +1384 -0
  48. truthound_dashboard/core/notifications/escalation/engine.py +429 -0
  49. truthound_dashboard/core/notifications/escalation/models.py +336 -0
  50. truthound_dashboard/core/notifications/escalation/scheduler.py +1187 -0
  51. truthound_dashboard/core/notifications/escalation/state_machine.py +330 -0
  52. truthound_dashboard/core/notifications/escalation/stores.py +2896 -0
  53. truthound_dashboard/core/notifications/events.py +49 -0
  54. truthound_dashboard/core/notifications/metrics/__init__.py +115 -0
  55. truthound_dashboard/core/notifications/metrics/base.py +528 -0
  56. truthound_dashboard/core/notifications/metrics/collectors.py +583 -0
  57. truthound_dashboard/core/notifications/routing/__init__.py +169 -0
  58. truthound_dashboard/core/notifications/routing/combinators.py +184 -0
  59. truthound_dashboard/core/notifications/routing/config.py +375 -0
  60. truthound_dashboard/core/notifications/routing/config_parser.py +867 -0
  61. truthound_dashboard/core/notifications/routing/engine.py +382 -0
  62. truthound_dashboard/core/notifications/routing/expression_engine.py +1269 -0
  63. truthound_dashboard/core/notifications/routing/jinja2_engine.py +774 -0
  64. truthound_dashboard/core/notifications/routing/rules.py +625 -0
  65. truthound_dashboard/core/notifications/routing/validator.py +678 -0
  66. truthound_dashboard/core/notifications/service.py +2 -0
  67. truthound_dashboard/core/notifications/stats_aggregator.py +850 -0
  68. truthound_dashboard/core/notifications/throttling/__init__.py +83 -0
  69. truthound_dashboard/core/notifications/throttling/builder.py +311 -0
  70. truthound_dashboard/core/notifications/throttling/stores.py +1859 -0
  71. truthound_dashboard/core/notifications/throttling/throttlers.py +633 -0
  72. truthound_dashboard/core/openlineage.py +1028 -0
  73. truthound_dashboard/core/plugins/__init__.py +39 -0
  74. truthound_dashboard/core/plugins/docs/__init__.py +39 -0
  75. truthound_dashboard/core/plugins/docs/extractor.py +703 -0
  76. truthound_dashboard/core/plugins/docs/renderers.py +804 -0
  77. truthound_dashboard/core/plugins/hooks/__init__.py +63 -0
  78. truthound_dashboard/core/plugins/hooks/decorators.py +367 -0
  79. truthound_dashboard/core/plugins/hooks/manager.py +403 -0
  80. truthound_dashboard/core/plugins/hooks/protocols.py +265 -0
  81. truthound_dashboard/core/plugins/lifecycle/__init__.py +41 -0
  82. truthound_dashboard/core/plugins/lifecycle/hot_reload.py +584 -0
  83. truthound_dashboard/core/plugins/lifecycle/machine.py +419 -0
  84. truthound_dashboard/core/plugins/lifecycle/states.py +266 -0
  85. truthound_dashboard/core/plugins/loader.py +504 -0
  86. truthound_dashboard/core/plugins/registry.py +810 -0
  87. truthound_dashboard/core/plugins/reporter_executor.py +588 -0
  88. truthound_dashboard/core/plugins/sandbox/__init__.py +59 -0
  89. truthound_dashboard/core/plugins/sandbox/code_validator.py +243 -0
  90. truthound_dashboard/core/plugins/sandbox/engines.py +770 -0
  91. truthound_dashboard/core/plugins/sandbox/protocols.py +194 -0
  92. truthound_dashboard/core/plugins/sandbox.py +617 -0
  93. truthound_dashboard/core/plugins/security/__init__.py +68 -0
  94. truthound_dashboard/core/plugins/security/analyzer.py +535 -0
  95. truthound_dashboard/core/plugins/security/policies.py +311 -0
  96. truthound_dashboard/core/plugins/security/protocols.py +296 -0
  97. truthound_dashboard/core/plugins/security/signing.py +842 -0
  98. truthound_dashboard/core/plugins/security.py +446 -0
  99. truthound_dashboard/core/plugins/validator_executor.py +401 -0
  100. truthound_dashboard/core/plugins/versioning/__init__.py +51 -0
  101. truthound_dashboard/core/plugins/versioning/constraints.py +377 -0
  102. truthound_dashboard/core/plugins/versioning/dependencies.py +541 -0
  103. truthound_dashboard/core/plugins/versioning/semver.py +266 -0
  104. truthound_dashboard/core/profile_comparison.py +601 -0
  105. truthound_dashboard/core/report_history.py +570 -0
  106. truthound_dashboard/core/reporters/__init__.py +57 -0
  107. truthound_dashboard/core/reporters/base.py +296 -0
  108. truthound_dashboard/core/reporters/csv_reporter.py +155 -0
  109. truthound_dashboard/core/reporters/html_reporter.py +598 -0
  110. truthound_dashboard/core/reporters/i18n/__init__.py +65 -0
  111. truthound_dashboard/core/reporters/i18n/base.py +494 -0
  112. truthound_dashboard/core/reporters/i18n/catalogs.py +930 -0
  113. truthound_dashboard/core/reporters/json_reporter.py +160 -0
  114. truthound_dashboard/core/reporters/junit_reporter.py +233 -0
  115. truthound_dashboard/core/reporters/markdown_reporter.py +207 -0
  116. truthound_dashboard/core/reporters/pdf_reporter.py +209 -0
  117. truthound_dashboard/core/reporters/registry.py +272 -0
  118. truthound_dashboard/core/rule_generator.py +2088 -0
  119. truthound_dashboard/core/scheduler.py +822 -12
  120. truthound_dashboard/core/schema_evolution.py +858 -0
  121. truthound_dashboard/core/services.py +152 -9
  122. truthound_dashboard/core/statistics.py +718 -0
  123. truthound_dashboard/core/streaming_anomaly.py +883 -0
  124. truthound_dashboard/core/triggers/__init__.py +45 -0
  125. truthound_dashboard/core/triggers/base.py +226 -0
  126. truthound_dashboard/core/triggers/evaluators.py +609 -0
  127. truthound_dashboard/core/triggers/factory.py +363 -0
  128. truthound_dashboard/core/unified_alerts.py +870 -0
  129. truthound_dashboard/core/validation_limits.py +509 -0
  130. truthound_dashboard/core/versioning.py +709 -0
  131. truthound_dashboard/core/websocket/__init__.py +59 -0
  132. truthound_dashboard/core/websocket/manager.py +512 -0
  133. truthound_dashboard/core/websocket/messages.py +130 -0
  134. truthound_dashboard/db/__init__.py +30 -0
  135. truthound_dashboard/db/models.py +3375 -3
  136. truthound_dashboard/main.py +22 -0
  137. truthound_dashboard/schemas/__init__.py +396 -1
  138. truthound_dashboard/schemas/anomaly.py +1258 -0
  139. truthound_dashboard/schemas/base.py +4 -0
  140. truthound_dashboard/schemas/cross_alerts.py +334 -0
  141. truthound_dashboard/schemas/drift_monitor.py +890 -0
  142. truthound_dashboard/schemas/lineage.py +428 -0
  143. truthound_dashboard/schemas/maintenance.py +154 -0
  144. truthound_dashboard/schemas/model_monitoring.py +374 -0
  145. truthound_dashboard/schemas/notifications_advanced.py +1363 -0
  146. truthound_dashboard/schemas/openlineage.py +704 -0
  147. truthound_dashboard/schemas/plugins.py +1293 -0
  148. truthound_dashboard/schemas/profile.py +420 -34
  149. truthound_dashboard/schemas/profile_comparison.py +242 -0
  150. truthound_dashboard/schemas/reports.py +285 -0
  151. truthound_dashboard/schemas/rule_suggestion.py +434 -0
  152. truthound_dashboard/schemas/schema_evolution.py +164 -0
  153. truthound_dashboard/schemas/source.py +117 -2
  154. truthound_dashboard/schemas/triggers.py +511 -0
  155. truthound_dashboard/schemas/unified_alerts.py +223 -0
  156. truthound_dashboard/schemas/validation.py +25 -1
  157. truthound_dashboard/schemas/validators/__init__.py +11 -0
  158. truthound_dashboard/schemas/validators/base.py +151 -0
  159. truthound_dashboard/schemas/versioning.py +152 -0
  160. truthound_dashboard/static/index.html +2 -2
  161. {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/METADATA +142 -18
  162. truthound_dashboard-1.4.0.dist-info/RECORD +239 -0
  163. truthound_dashboard/static/assets/index-BCA8H1hO.js +0 -574
  164. truthound_dashboard/static/assets/index-BNsSQ2fN.css +0 -1
  165. truthound_dashboard/static/assets/unmerged_dictionaries-CsJWCRx9.js +0 -1
  166. truthound_dashboard-1.3.0.dist-info/RECORD +0 -110
  167. {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/WHEEL +0 -0
  168. {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/entry_points.txt +0 -0
  169. {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,805 @@
1
+ """API endpoints for ML Model Monitoring.
2
+
3
+ Provides REST API for:
4
+ - Model registration and management
5
+ - Prediction recording and metrics
6
+ - Alert rules and handlers
7
+ - Dashboard data
8
+
9
+ All data is persisted to the database.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from fastapi import APIRouter, Depends, HTTPException, Query
15
+ from sqlalchemy.ext.asyncio import AsyncSession
16
+
17
+ from ..core.model_monitoring import ModelMonitoringService
18
+ from ..db import get_session
19
+ from ..schemas.base import DataResponse
20
+ from ..schemas.model_monitoring import (
21
+ AcknowledgeAlertRequest,
22
+ AlertHandlerListResponse,
23
+ AlertHandlerResponse,
24
+ AlertInstance,
25
+ AlertListResponse,
26
+ AlertRuleListResponse,
27
+ AlertRuleResponse,
28
+ AlertSeverity,
29
+ CreateAlertHandlerRequest,
30
+ CreateAlertRuleRequest,
31
+ MetricsResponse,
32
+ MetricSummary,
33
+ ModelDashboardData,
34
+ ModelStatus,
35
+ MonitoringOverview,
36
+ RecordPredictionRequest,
37
+ RecordPredictionResponse,
38
+ RegisteredModelListResponse,
39
+ RegisteredModelResponse,
40
+ RegisterModelRequest,
41
+ UpdateAlertHandlerRequest,
42
+ UpdateAlertRuleRequest,
43
+ UpdateModelRequest,
44
+ )
45
+
46
+ router = APIRouter(prefix="/model-monitoring", tags=["model-monitoring"])
47
+
48
+
49
+ def get_service(session: AsyncSession = Depends(get_session)) -> ModelMonitoringService:
50
+ """Get model monitoring service instance."""
51
+ return ModelMonitoringService(session)
52
+
53
+
54
+ # =============================================================================
55
+ # Model Registration Endpoints
56
+ # =============================================================================
57
+
58
+
59
+ @router.get("/models", response_model=DataResponse[RegisteredModelListResponse])
60
+ async def list_models(
61
+ status: ModelStatus | None = None,
62
+ offset: int = Query(0, ge=0),
63
+ limit: int = Query(50, ge=1, le=100),
64
+ service: ModelMonitoringService = Depends(get_service),
65
+ ):
66
+ """List all registered models."""
67
+ status_filter = status.value if status else None
68
+ models, total = await service.list_models(
69
+ status=status_filter, offset=offset, limit=limit
70
+ )
71
+
72
+ items = [
73
+ RegisteredModelResponse(
74
+ id=m.id,
75
+ name=m.name,
76
+ version=m.version,
77
+ description=m.description or "",
78
+ status=ModelStatus(m.status),
79
+ config=m.config,
80
+ metadata=m.metadata_json or {},
81
+ prediction_count=m.prediction_count,
82
+ last_prediction_at=m.last_prediction_at,
83
+ current_drift_score=m.current_drift_score,
84
+ health_score=m.health_score,
85
+ created_at=m.created_at,
86
+ updated_at=m.updated_at,
87
+ )
88
+ for m in models
89
+ ]
90
+
91
+ return DataResponse(
92
+ data=RegisteredModelListResponse(
93
+ items=items,
94
+ total=total,
95
+ offset=offset,
96
+ limit=limit,
97
+ )
98
+ )
99
+
100
+
101
+ @router.post("/models", response_model=DataResponse[RegisteredModelResponse])
102
+ async def register_model(
103
+ request: RegisterModelRequest,
104
+ service: ModelMonitoringService = Depends(get_service),
105
+ ):
106
+ """Register a new model for monitoring."""
107
+ model = await service.register_model(
108
+ name=request.name,
109
+ version=request.version,
110
+ description=request.description,
111
+ config=request.config.model_dump() if request.config else None,
112
+ metadata=request.metadata,
113
+ )
114
+
115
+ return DataResponse(
116
+ data=RegisteredModelResponse(
117
+ id=model.id,
118
+ name=model.name,
119
+ version=model.version,
120
+ description=model.description or "",
121
+ status=ModelStatus(model.status),
122
+ config=model.config,
123
+ metadata=model.metadata_json or {},
124
+ prediction_count=model.prediction_count,
125
+ last_prediction_at=model.last_prediction_at,
126
+ current_drift_score=model.current_drift_score,
127
+ health_score=model.health_score,
128
+ created_at=model.created_at,
129
+ updated_at=model.updated_at,
130
+ )
131
+ )
132
+
133
+
134
+ @router.get("/models/{model_id}", response_model=DataResponse[RegisteredModelResponse])
135
+ async def get_model(
136
+ model_id: str,
137
+ service: ModelMonitoringService = Depends(get_service),
138
+ ):
139
+ """Get a registered model by ID."""
140
+ model = await service.get_model(model_id)
141
+ if not model:
142
+ raise HTTPException(status_code=404, detail="Model not found")
143
+
144
+ return DataResponse(
145
+ data=RegisteredModelResponse(
146
+ id=model.id,
147
+ name=model.name,
148
+ version=model.version,
149
+ description=model.description or "",
150
+ status=ModelStatus(model.status),
151
+ config=model.config,
152
+ metadata=model.metadata_json or {},
153
+ prediction_count=model.prediction_count,
154
+ last_prediction_at=model.last_prediction_at,
155
+ current_drift_score=model.current_drift_score,
156
+ health_score=model.health_score,
157
+ created_at=model.created_at,
158
+ updated_at=model.updated_at,
159
+ )
160
+ )
161
+
162
+
163
+ @router.put("/models/{model_id}", response_model=DataResponse[RegisteredModelResponse])
164
+ async def update_model(
165
+ model_id: str,
166
+ request: UpdateModelRequest,
167
+ service: ModelMonitoringService = Depends(get_service),
168
+ ):
169
+ """Update a registered model."""
170
+ updates = {}
171
+ if request.name is not None:
172
+ updates["name"] = request.name
173
+ if request.version is not None:
174
+ updates["version"] = request.version
175
+ if request.description is not None:
176
+ updates["description"] = request.description
177
+ if request.status is not None:
178
+ updates["status"] = request.status.value
179
+ if request.config is not None:
180
+ updates["config"] = request.config.model_dump()
181
+ if request.metadata is not None:
182
+ updates["metadata_json"] = request.metadata
183
+
184
+ model = await service.update_model(model_id, **updates)
185
+ if not model:
186
+ raise HTTPException(status_code=404, detail="Model not found")
187
+
188
+ return DataResponse(
189
+ data=RegisteredModelResponse(
190
+ id=model.id,
191
+ name=model.name,
192
+ version=model.version,
193
+ description=model.description or "",
194
+ status=ModelStatus(model.status),
195
+ config=model.config,
196
+ metadata=model.metadata_json or {},
197
+ prediction_count=model.prediction_count,
198
+ last_prediction_at=model.last_prediction_at,
199
+ current_drift_score=model.current_drift_score,
200
+ health_score=model.health_score,
201
+ created_at=model.created_at,
202
+ updated_at=model.updated_at,
203
+ )
204
+ )
205
+
206
+
207
+ @router.delete("/models/{model_id}")
208
+ async def delete_model(
209
+ model_id: str,
210
+ service: ModelMonitoringService = Depends(get_service),
211
+ ):
212
+ """Delete a registered model."""
213
+ deleted = await service.delete_model(model_id)
214
+ if not deleted:
215
+ raise HTTPException(status_code=404, detail="Model not found")
216
+
217
+ return {"success": True, "message": "Model deleted"}
218
+
219
+
220
+ @router.post("/models/{model_id}/pause")
221
+ async def pause_model(
222
+ model_id: str,
223
+ service: ModelMonitoringService = Depends(get_service),
224
+ ):
225
+ """Pause monitoring for a model."""
226
+ model = await service.pause_model(model_id)
227
+ if not model:
228
+ raise HTTPException(status_code=404, detail="Model not found")
229
+
230
+ return {"success": True, "message": "Model monitoring paused"}
231
+
232
+
233
+ @router.post("/models/{model_id}/resume")
234
+ async def resume_model(
235
+ model_id: str,
236
+ service: ModelMonitoringService = Depends(get_service),
237
+ ):
238
+ """Resume monitoring for a model."""
239
+ model = await service.resume_model(model_id)
240
+ if not model:
241
+ raise HTTPException(status_code=404, detail="Model not found")
242
+
243
+ return {"success": True, "message": "Model monitoring resumed"}
244
+
245
+
246
+ # =============================================================================
247
+ # Prediction Recording Endpoints
248
+ # =============================================================================
249
+
250
+
251
+ @router.post(
252
+ "/models/{model_id}/predictions",
253
+ response_model=DataResponse[RecordPredictionResponse],
254
+ )
255
+ async def record_prediction(
256
+ model_id: str,
257
+ request: RecordPredictionRequest,
258
+ service: ModelMonitoringService = Depends(get_service),
259
+ ):
260
+ """Record a model prediction."""
261
+ try:
262
+ prediction = await service.record_prediction(
263
+ model_id=model_id,
264
+ features=request.features,
265
+ prediction=request.prediction,
266
+ actual=request.actual,
267
+ latency_ms=request.latency_ms,
268
+ metadata=request.metadata,
269
+ )
270
+
271
+ return DataResponse(
272
+ data=RecordPredictionResponse(
273
+ id=prediction.id,
274
+ model_id=model_id,
275
+ recorded_at=prediction.recorded_at,
276
+ )
277
+ )
278
+ except ValueError as e:
279
+ raise HTTPException(status_code=404, detail=str(e))
280
+
281
+
282
+ @router.get("/models/{model_id}/metrics", response_model=DataResponse[MetricsResponse])
283
+ async def get_model_metrics(
284
+ model_id: str,
285
+ hours: int = Query(24, ge=1, le=168),
286
+ service: ModelMonitoringService = Depends(get_service),
287
+ ):
288
+ """Get metrics for a model."""
289
+ try:
290
+ metrics_data = await service.get_model_metrics(model_id, hours)
291
+
292
+ return DataResponse(
293
+ data=MetricsResponse(
294
+ model_id=metrics_data["model_id"],
295
+ model_name=metrics_data["model_name"],
296
+ time_range_hours=metrics_data["time_range_hours"],
297
+ metrics=[MetricSummary(**m) for m in metrics_data["metrics"]],
298
+ data_points=metrics_data["data_points"],
299
+ )
300
+ )
301
+ except ValueError as e:
302
+ raise HTTPException(status_code=404, detail=str(e))
303
+
304
+
305
+ # =============================================================================
306
+ # Alert Rule Endpoints
307
+ # =============================================================================
308
+
309
+
310
+ @router.get("/rules", response_model=DataResponse[AlertRuleListResponse])
311
+ async def list_alert_rules(
312
+ model_id: str | None = None,
313
+ active_only: bool = False,
314
+ offset: int = Query(0, ge=0),
315
+ limit: int = Query(50, ge=1, le=100),
316
+ service: ModelMonitoringService = Depends(get_service),
317
+ ):
318
+ """List all alert rules."""
319
+ rules = await service.get_alert_rules(model_id=model_id, active_only=active_only)
320
+
321
+ # Apply pagination
322
+ total = len(rules)
323
+ paginated = list(rules)[offset : offset + limit]
324
+
325
+ items = [
326
+ AlertRuleResponse(
327
+ id=r.id,
328
+ name=r.name,
329
+ model_id=r.model_id,
330
+ rule_type=r.rule_type,
331
+ severity=AlertSeverity(r.severity),
332
+ config=r.config,
333
+ is_active=r.is_active,
334
+ last_triggered_at=r.last_triggered_at,
335
+ trigger_count=r.trigger_count,
336
+ created_at=r.created_at,
337
+ updated_at=r.updated_at,
338
+ )
339
+ for r in paginated
340
+ ]
341
+
342
+ return DataResponse(
343
+ data=AlertRuleListResponse(
344
+ items=items,
345
+ total=total,
346
+ offset=offset,
347
+ limit=limit,
348
+ )
349
+ )
350
+
351
+
352
+ @router.post("/rules", response_model=DataResponse[AlertRuleResponse])
353
+ async def create_alert_rule(
354
+ request: CreateAlertRuleRequest,
355
+ service: ModelMonitoringService = Depends(get_service),
356
+ ):
357
+ """Create a new alert rule."""
358
+ try:
359
+ rule = await service.create_alert_rule(
360
+ model_id=request.model_id,
361
+ name=request.name,
362
+ rule_type=request.rule_type.value,
363
+ config=request.config,
364
+ severity=request.severity.value,
365
+ )
366
+
367
+ return DataResponse(
368
+ data=AlertRuleResponse(
369
+ id=rule.id,
370
+ name=rule.name,
371
+ model_id=rule.model_id,
372
+ rule_type=rule.rule_type,
373
+ severity=AlertSeverity(rule.severity),
374
+ config=rule.config,
375
+ is_active=rule.is_active,
376
+ last_triggered_at=rule.last_triggered_at,
377
+ trigger_count=rule.trigger_count,
378
+ created_at=rule.created_at,
379
+ updated_at=rule.updated_at,
380
+ )
381
+ )
382
+ except ValueError as e:
383
+ raise HTTPException(status_code=404, detail=str(e))
384
+
385
+
386
+ @router.get("/rules/{rule_id}", response_model=DataResponse[AlertRuleResponse])
387
+ async def get_alert_rule(
388
+ rule_id: str,
389
+ service: ModelMonitoringService = Depends(get_service),
390
+ ):
391
+ """Get an alert rule by ID."""
392
+ rules = await service.get_alert_rules()
393
+ rule = next((r for r in rules if r.id == rule_id), None)
394
+
395
+ if not rule:
396
+ raise HTTPException(status_code=404, detail="Alert rule not found")
397
+
398
+ return DataResponse(
399
+ data=AlertRuleResponse(
400
+ id=rule.id,
401
+ name=rule.name,
402
+ model_id=rule.model_id,
403
+ rule_type=rule.rule_type,
404
+ severity=AlertSeverity(rule.severity),
405
+ config=rule.config,
406
+ is_active=rule.is_active,
407
+ last_triggered_at=rule.last_triggered_at,
408
+ trigger_count=rule.trigger_count,
409
+ created_at=rule.created_at,
410
+ updated_at=rule.updated_at,
411
+ )
412
+ )
413
+
414
+
415
+ @router.put("/rules/{rule_id}", response_model=DataResponse[AlertRuleResponse])
416
+ async def update_alert_rule(
417
+ rule_id: str,
418
+ request: UpdateAlertRuleRequest,
419
+ service: ModelMonitoringService = Depends(get_service),
420
+ ):
421
+ """Update an alert rule."""
422
+ updates = {}
423
+ if request.name is not None:
424
+ updates["name"] = request.name
425
+ if request.severity is not None:
426
+ updates["severity"] = request.severity.value
427
+ if request.config is not None:
428
+ updates["config"] = request.config
429
+ if request.is_active is not None:
430
+ updates["is_active"] = request.is_active
431
+
432
+ rule = await service.update_alert_rule(rule_id, **updates)
433
+ if not rule:
434
+ raise HTTPException(status_code=404, detail="Alert rule not found")
435
+
436
+ return DataResponse(
437
+ data=AlertRuleResponse(
438
+ id=rule.id,
439
+ name=rule.name,
440
+ model_id=rule.model_id,
441
+ rule_type=rule.rule_type,
442
+ severity=AlertSeverity(rule.severity),
443
+ config=rule.config,
444
+ is_active=rule.is_active,
445
+ last_triggered_at=rule.last_triggered_at,
446
+ trigger_count=rule.trigger_count,
447
+ created_at=rule.created_at,
448
+ updated_at=rule.updated_at,
449
+ )
450
+ )
451
+
452
+
453
+ @router.delete("/rules/{rule_id}")
454
+ async def delete_alert_rule(
455
+ rule_id: str,
456
+ service: ModelMonitoringService = Depends(get_service),
457
+ ):
458
+ """Delete an alert rule."""
459
+ deleted = await service.delete_alert_rule(rule_id)
460
+ if not deleted:
461
+ raise HTTPException(status_code=404, detail="Alert rule not found")
462
+
463
+ return {"success": True, "message": "Alert rule deleted"}
464
+
465
+
466
+ # =============================================================================
467
+ # Alert Handler Endpoints
468
+ # =============================================================================
469
+
470
+
471
+ @router.get("/handlers", response_model=DataResponse[AlertHandlerListResponse])
472
+ async def list_alert_handlers(
473
+ active_only: bool = False,
474
+ offset: int = Query(0, ge=0),
475
+ limit: int = Query(50, ge=1, le=100),
476
+ service: ModelMonitoringService = Depends(get_service),
477
+ ):
478
+ """List all alert handlers."""
479
+ handlers = await service.get_alert_handlers(active_only=active_only)
480
+
481
+ # Apply pagination
482
+ total = len(handlers)
483
+ paginated = list(handlers)[offset : offset + limit]
484
+
485
+ items = [
486
+ AlertHandlerResponse(
487
+ id=h.id,
488
+ name=h.name,
489
+ handler_type=h.handler_type,
490
+ config=h.config,
491
+ is_active=h.is_active,
492
+ last_sent_at=h.last_sent_at,
493
+ send_count=h.send_count,
494
+ failure_count=h.failure_count,
495
+ created_at=h.created_at,
496
+ updated_at=h.updated_at,
497
+ )
498
+ for h in paginated
499
+ ]
500
+
501
+ return DataResponse(
502
+ data=AlertHandlerListResponse(
503
+ items=items,
504
+ total=total,
505
+ offset=offset,
506
+ limit=limit,
507
+ )
508
+ )
509
+
510
+
511
+ @router.post("/handlers", response_model=DataResponse[AlertHandlerResponse])
512
+ async def create_alert_handler(
513
+ request: CreateAlertHandlerRequest,
514
+ service: ModelMonitoringService = Depends(get_service),
515
+ ):
516
+ """Create a new alert handler."""
517
+ handler = await service.create_alert_handler(
518
+ name=request.name,
519
+ handler_type=request.handler_type.value,
520
+ config=request.config,
521
+ )
522
+
523
+ return DataResponse(
524
+ data=AlertHandlerResponse(
525
+ id=handler.id,
526
+ name=handler.name,
527
+ handler_type=handler.handler_type,
528
+ config=handler.config,
529
+ is_active=handler.is_active,
530
+ last_sent_at=handler.last_sent_at,
531
+ send_count=handler.send_count,
532
+ failure_count=handler.failure_count,
533
+ created_at=handler.created_at,
534
+ updated_at=handler.updated_at,
535
+ )
536
+ )
537
+
538
+
539
+ @router.put("/handlers/{handler_id}", response_model=DataResponse[AlertHandlerResponse])
540
+ async def update_alert_handler(
541
+ handler_id: str,
542
+ request: UpdateAlertHandlerRequest,
543
+ service: ModelMonitoringService = Depends(get_service),
544
+ ):
545
+ """Update an alert handler."""
546
+ updates = {}
547
+ if request.name is not None:
548
+ updates["name"] = request.name
549
+ if request.config is not None:
550
+ updates["config"] = request.config
551
+ if request.is_active is not None:
552
+ updates["is_active"] = request.is_active
553
+
554
+ handler = await service.update_alert_handler(handler_id, **updates)
555
+ if not handler:
556
+ raise HTTPException(status_code=404, detail="Alert handler not found")
557
+
558
+ return DataResponse(
559
+ data=AlertHandlerResponse(
560
+ id=handler.id,
561
+ name=handler.name,
562
+ handler_type=handler.handler_type,
563
+ config=handler.config,
564
+ is_active=handler.is_active,
565
+ last_sent_at=handler.last_sent_at,
566
+ send_count=handler.send_count,
567
+ failure_count=handler.failure_count,
568
+ created_at=handler.created_at,
569
+ updated_at=handler.updated_at,
570
+ )
571
+ )
572
+
573
+
574
+ @router.delete("/handlers/{handler_id}")
575
+ async def delete_alert_handler(
576
+ handler_id: str,
577
+ service: ModelMonitoringService = Depends(get_service),
578
+ ):
579
+ """Delete an alert handler."""
580
+ deleted = await service.delete_alert_handler(handler_id)
581
+ if not deleted:
582
+ raise HTTPException(status_code=404, detail="Alert handler not found")
583
+
584
+ return {"success": True, "message": "Alert handler deleted"}
585
+
586
+
587
+ # =============================================================================
588
+ # Alert Instance Endpoints
589
+ # =============================================================================
590
+
591
+
592
+ @router.get("/alerts", response_model=DataResponse[AlertListResponse])
593
+ async def list_alerts(
594
+ model_id: str | None = None,
595
+ active_only: bool = False,
596
+ severity: AlertSeverity | None = None,
597
+ offset: int = Query(0, ge=0),
598
+ limit: int = Query(50, ge=1, le=100),
599
+ service: ModelMonitoringService = Depends(get_service),
600
+ ):
601
+ """List alerts."""
602
+ severity_filter = severity.value if severity else None
603
+ alerts, total = await service.get_alerts(
604
+ model_id=model_id,
605
+ active_only=active_only,
606
+ severity=severity_filter,
607
+ offset=offset,
608
+ limit=limit,
609
+ )
610
+
611
+ items = [
612
+ AlertInstance(
613
+ id=a.id,
614
+ rule_id=a.rule_id,
615
+ model_id=a.model_id,
616
+ severity=AlertSeverity(a.severity),
617
+ message=a.message,
618
+ metric_value=a.metric_value,
619
+ threshold_value=a.threshold_value,
620
+ acknowledged=a.acknowledged,
621
+ acknowledged_by=a.acknowledged_by,
622
+ acknowledged_at=a.acknowledged_at,
623
+ resolved=a.resolved,
624
+ resolved_at=a.resolved_at,
625
+ created_at=a.created_at,
626
+ updated_at=a.updated_at,
627
+ )
628
+ for a in alerts
629
+ ]
630
+
631
+ return DataResponse(
632
+ data=AlertListResponse(
633
+ items=items,
634
+ total=total,
635
+ offset=offset,
636
+ limit=limit,
637
+ )
638
+ )
639
+
640
+
641
+ @router.post("/alerts/{alert_id}/acknowledge")
642
+ async def acknowledge_alert(
643
+ alert_id: str,
644
+ request: AcknowledgeAlertRequest,
645
+ service: ModelMonitoringService = Depends(get_service),
646
+ ):
647
+ """Acknowledge an alert."""
648
+ alert = await service.acknowledge_alert(alert_id, request.actor)
649
+ if not alert:
650
+ raise HTTPException(status_code=404, detail="Alert not found")
651
+
652
+ if alert.acknowledged:
653
+ return DataResponse(
654
+ data=AlertInstance(
655
+ id=alert.id,
656
+ rule_id=alert.rule_id,
657
+ model_id=alert.model_id,
658
+ severity=AlertSeverity(alert.severity),
659
+ message=alert.message,
660
+ metric_value=alert.metric_value,
661
+ threshold_value=alert.threshold_value,
662
+ acknowledged=alert.acknowledged,
663
+ acknowledged_by=alert.acknowledged_by,
664
+ acknowledged_at=alert.acknowledged_at,
665
+ resolved=alert.resolved,
666
+ resolved_at=alert.resolved_at,
667
+ created_at=alert.created_at,
668
+ updated_at=alert.updated_at,
669
+ )
670
+ )
671
+
672
+
673
+ @router.post("/alerts/{alert_id}/resolve")
674
+ async def resolve_alert(
675
+ alert_id: str,
676
+ service: ModelMonitoringService = Depends(get_service),
677
+ ):
678
+ """Resolve an alert."""
679
+ alert = await service.resolve_alert(alert_id)
680
+ if not alert:
681
+ raise HTTPException(status_code=404, detail="Alert not found")
682
+
683
+ return DataResponse(
684
+ data=AlertInstance(
685
+ id=alert.id,
686
+ rule_id=alert.rule_id,
687
+ model_id=alert.model_id,
688
+ severity=AlertSeverity(alert.severity),
689
+ message=alert.message,
690
+ metric_value=alert.metric_value,
691
+ threshold_value=alert.threshold_value,
692
+ acknowledged=alert.acknowledged,
693
+ acknowledged_by=alert.acknowledged_by,
694
+ acknowledged_at=alert.acknowledged_at,
695
+ resolved=alert.resolved,
696
+ resolved_at=alert.resolved_at,
697
+ created_at=alert.created_at,
698
+ updated_at=alert.updated_at,
699
+ )
700
+ )
701
+
702
+
703
+ # =============================================================================
704
+ # Dashboard Endpoints
705
+ # =============================================================================
706
+
707
+
708
+ @router.get("/overview", response_model=DataResponse[MonitoringOverview])
709
+ async def get_monitoring_overview(
710
+ service: ModelMonitoringService = Depends(get_service),
711
+ ):
712
+ """Get monitoring overview for dashboard."""
713
+ overview = await service.get_monitoring_overview()
714
+
715
+ return DataResponse(
716
+ data=MonitoringOverview(
717
+ total_models=overview["total_models"],
718
+ active_models=overview["active_models"],
719
+ degraded_models=overview["degraded_models"],
720
+ total_predictions_24h=overview["total_predictions_24h"],
721
+ active_alerts=overview["active_alerts"],
722
+ models_with_drift=overview["models_with_drift"],
723
+ avg_latency_ms=overview["avg_latency_ms"],
724
+ )
725
+ )
726
+
727
+
728
+ @router.get("/models/{model_id}/dashboard", response_model=DataResponse[ModelDashboardData])
729
+ async def get_model_dashboard(
730
+ model_id: str,
731
+ service: ModelMonitoringService = Depends(get_service),
732
+ ):
733
+ """Get dashboard data for a specific model."""
734
+ try:
735
+ dashboard = await service.get_model_dashboard(model_id)
736
+
737
+ model_data = dashboard["model"]
738
+ metrics_data = dashboard["metrics"]
739
+
740
+ return DataResponse(
741
+ data=ModelDashboardData(
742
+ model=RegisteredModelResponse(
743
+ id=model_data["id"],
744
+ name=model_data["name"],
745
+ version=model_data["version"],
746
+ description=model_data["description"] or "",
747
+ status=ModelStatus(model_data["status"]),
748
+ config=model_data["config"],
749
+ metadata=model_data["metadata"] or {},
750
+ prediction_count=model_data["prediction_count"],
751
+ last_prediction_at=model_data["last_prediction_at"],
752
+ current_drift_score=model_data["current_drift_score"],
753
+ health_score=model_data["health_score"],
754
+ created_at=model_data["created_at"],
755
+ updated_at=model_data["updated_at"],
756
+ ),
757
+ metrics=MetricsResponse(
758
+ model_id=metrics_data["model_id"],
759
+ model_name=metrics_data["model_name"],
760
+ time_range_hours=metrics_data["time_range_hours"],
761
+ metrics=[MetricSummary(**m) for m in metrics_data["metrics"]],
762
+ data_points=metrics_data["data_points"],
763
+ ),
764
+ active_alerts=[
765
+ AlertInstance(
766
+ id=a["id"],
767
+ rule_id=a["rule_id"],
768
+ model_id=a["model_id"],
769
+ severity=AlertSeverity(a["severity"]),
770
+ message=a["message"],
771
+ metric_value=a["metric_value"],
772
+ threshold_value=a["threshold_value"],
773
+ acknowledged=a["acknowledged"],
774
+ acknowledged_by=a["acknowledged_by"],
775
+ acknowledged_at=a["acknowledged_at"],
776
+ resolved=a["resolved"],
777
+ resolved_at=a["resolved_at"],
778
+ created_at=a["created_at"],
779
+ updated_at=a["updated_at"],
780
+ )
781
+ for a in dashboard["active_alerts"]
782
+ ],
783
+ recent_predictions=dashboard["recent_predictions"],
784
+ health_status=dashboard["health_status"],
785
+ )
786
+ )
787
+ except ValueError as e:
788
+ raise HTTPException(status_code=404, detail=str(e))
789
+
790
+
791
+ @router.post("/models/{model_id}/evaluate-rules")
792
+ async def evaluate_model_rules(
793
+ model_id: str,
794
+ service: ModelMonitoringService = Depends(get_service),
795
+ ):
796
+ """Evaluate all active rules for a model and create alerts if triggered."""
797
+ alerts = await service.evaluate_rules(model_id)
798
+
799
+ return DataResponse(
800
+ data={
801
+ "model_id": model_id,
802
+ "alerts_created": len(alerts),
803
+ "alert_ids": [a.id for a in alerts],
804
+ }
805
+ )