truthound-dashboard 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. truthound_dashboard/api/alerts.py +258 -0
  2. truthound_dashboard/api/anomaly.py +1302 -0
  3. truthound_dashboard/api/cross_alerts.py +352 -0
  4. truthound_dashboard/api/deps.py +143 -0
  5. truthound_dashboard/api/drift_monitor.py +540 -0
  6. truthound_dashboard/api/lineage.py +1151 -0
  7. truthound_dashboard/api/maintenance.py +363 -0
  8. truthound_dashboard/api/middleware.py +373 -1
  9. truthound_dashboard/api/model_monitoring.py +805 -0
  10. truthound_dashboard/api/notifications_advanced.py +2452 -0
  11. truthound_dashboard/api/plugins.py +2096 -0
  12. truthound_dashboard/api/profile.py +211 -14
  13. truthound_dashboard/api/reports.py +853 -0
  14. truthound_dashboard/api/router.py +147 -0
  15. truthound_dashboard/api/rule_suggestions.py +310 -0
  16. truthound_dashboard/api/schema_evolution.py +231 -0
  17. truthound_dashboard/api/sources.py +47 -3
  18. truthound_dashboard/api/triggers.py +190 -0
  19. truthound_dashboard/api/validations.py +13 -0
  20. truthound_dashboard/api/validators.py +333 -4
  21. truthound_dashboard/api/versioning.py +309 -0
  22. truthound_dashboard/api/websocket.py +301 -0
  23. truthound_dashboard/core/__init__.py +27 -0
  24. truthound_dashboard/core/anomaly.py +1395 -0
  25. truthound_dashboard/core/anomaly_explainer.py +633 -0
  26. truthound_dashboard/core/cache.py +206 -0
  27. truthound_dashboard/core/cached_services.py +422 -0
  28. truthound_dashboard/core/charts.py +352 -0
  29. truthound_dashboard/core/connections.py +1069 -42
  30. truthound_dashboard/core/cross_alerts.py +837 -0
  31. truthound_dashboard/core/drift_monitor.py +1477 -0
  32. truthound_dashboard/core/drift_sampling.py +669 -0
  33. truthound_dashboard/core/i18n/__init__.py +42 -0
  34. truthound_dashboard/core/i18n/detector.py +173 -0
  35. truthound_dashboard/core/i18n/messages.py +564 -0
  36. truthound_dashboard/core/lineage.py +971 -0
  37. truthound_dashboard/core/maintenance.py +443 -5
  38. truthound_dashboard/core/model_monitoring.py +1043 -0
  39. truthound_dashboard/core/notifications/channels.py +1020 -1
  40. truthound_dashboard/core/notifications/deduplication/__init__.py +143 -0
  41. truthound_dashboard/core/notifications/deduplication/policies.py +274 -0
  42. truthound_dashboard/core/notifications/deduplication/service.py +400 -0
  43. truthound_dashboard/core/notifications/deduplication/stores.py +2365 -0
  44. truthound_dashboard/core/notifications/deduplication/strategies.py +422 -0
  45. truthound_dashboard/core/notifications/dispatcher.py +43 -0
  46. truthound_dashboard/core/notifications/escalation/__init__.py +149 -0
  47. truthound_dashboard/core/notifications/escalation/backends.py +1384 -0
  48. truthound_dashboard/core/notifications/escalation/engine.py +429 -0
  49. truthound_dashboard/core/notifications/escalation/models.py +336 -0
  50. truthound_dashboard/core/notifications/escalation/scheduler.py +1187 -0
  51. truthound_dashboard/core/notifications/escalation/state_machine.py +330 -0
  52. truthound_dashboard/core/notifications/escalation/stores.py +2896 -0
  53. truthound_dashboard/core/notifications/events.py +49 -0
  54. truthound_dashboard/core/notifications/metrics/__init__.py +115 -0
  55. truthound_dashboard/core/notifications/metrics/base.py +528 -0
  56. truthound_dashboard/core/notifications/metrics/collectors.py +583 -0
  57. truthound_dashboard/core/notifications/routing/__init__.py +169 -0
  58. truthound_dashboard/core/notifications/routing/combinators.py +184 -0
  59. truthound_dashboard/core/notifications/routing/config.py +375 -0
  60. truthound_dashboard/core/notifications/routing/config_parser.py +867 -0
  61. truthound_dashboard/core/notifications/routing/engine.py +382 -0
  62. truthound_dashboard/core/notifications/routing/expression_engine.py +1269 -0
  63. truthound_dashboard/core/notifications/routing/jinja2_engine.py +774 -0
  64. truthound_dashboard/core/notifications/routing/rules.py +625 -0
  65. truthound_dashboard/core/notifications/routing/validator.py +678 -0
  66. truthound_dashboard/core/notifications/service.py +2 -0
  67. truthound_dashboard/core/notifications/stats_aggregator.py +850 -0
  68. truthound_dashboard/core/notifications/throttling/__init__.py +83 -0
  69. truthound_dashboard/core/notifications/throttling/builder.py +311 -0
  70. truthound_dashboard/core/notifications/throttling/stores.py +1859 -0
  71. truthound_dashboard/core/notifications/throttling/throttlers.py +633 -0
  72. truthound_dashboard/core/openlineage.py +1028 -0
  73. truthound_dashboard/core/plugins/__init__.py +39 -0
  74. truthound_dashboard/core/plugins/docs/__init__.py +39 -0
  75. truthound_dashboard/core/plugins/docs/extractor.py +703 -0
  76. truthound_dashboard/core/plugins/docs/renderers.py +804 -0
  77. truthound_dashboard/core/plugins/hooks/__init__.py +63 -0
  78. truthound_dashboard/core/plugins/hooks/decorators.py +367 -0
  79. truthound_dashboard/core/plugins/hooks/manager.py +403 -0
  80. truthound_dashboard/core/plugins/hooks/protocols.py +265 -0
  81. truthound_dashboard/core/plugins/lifecycle/__init__.py +41 -0
  82. truthound_dashboard/core/plugins/lifecycle/hot_reload.py +584 -0
  83. truthound_dashboard/core/plugins/lifecycle/machine.py +419 -0
  84. truthound_dashboard/core/plugins/lifecycle/states.py +266 -0
  85. truthound_dashboard/core/plugins/loader.py +504 -0
  86. truthound_dashboard/core/plugins/registry.py +810 -0
  87. truthound_dashboard/core/plugins/reporter_executor.py +588 -0
  88. truthound_dashboard/core/plugins/sandbox/__init__.py +59 -0
  89. truthound_dashboard/core/plugins/sandbox/code_validator.py +243 -0
  90. truthound_dashboard/core/plugins/sandbox/engines.py +770 -0
  91. truthound_dashboard/core/plugins/sandbox/protocols.py +194 -0
  92. truthound_dashboard/core/plugins/sandbox.py +617 -0
  93. truthound_dashboard/core/plugins/security/__init__.py +68 -0
  94. truthound_dashboard/core/plugins/security/analyzer.py +535 -0
  95. truthound_dashboard/core/plugins/security/policies.py +311 -0
  96. truthound_dashboard/core/plugins/security/protocols.py +296 -0
  97. truthound_dashboard/core/plugins/security/signing.py +842 -0
  98. truthound_dashboard/core/plugins/security.py +446 -0
  99. truthound_dashboard/core/plugins/validator_executor.py +401 -0
  100. truthound_dashboard/core/plugins/versioning/__init__.py +51 -0
  101. truthound_dashboard/core/plugins/versioning/constraints.py +377 -0
  102. truthound_dashboard/core/plugins/versioning/dependencies.py +541 -0
  103. truthound_dashboard/core/plugins/versioning/semver.py +266 -0
  104. truthound_dashboard/core/profile_comparison.py +601 -0
  105. truthound_dashboard/core/report_history.py +570 -0
  106. truthound_dashboard/core/reporters/__init__.py +57 -0
  107. truthound_dashboard/core/reporters/base.py +296 -0
  108. truthound_dashboard/core/reporters/csv_reporter.py +155 -0
  109. truthound_dashboard/core/reporters/html_reporter.py +598 -0
  110. truthound_dashboard/core/reporters/i18n/__init__.py +65 -0
  111. truthound_dashboard/core/reporters/i18n/base.py +494 -0
  112. truthound_dashboard/core/reporters/i18n/catalogs.py +930 -0
  113. truthound_dashboard/core/reporters/json_reporter.py +160 -0
  114. truthound_dashboard/core/reporters/junit_reporter.py +233 -0
  115. truthound_dashboard/core/reporters/markdown_reporter.py +207 -0
  116. truthound_dashboard/core/reporters/pdf_reporter.py +209 -0
  117. truthound_dashboard/core/reporters/registry.py +272 -0
  118. truthound_dashboard/core/rule_generator.py +2088 -0
  119. truthound_dashboard/core/scheduler.py +822 -12
  120. truthound_dashboard/core/schema_evolution.py +858 -0
  121. truthound_dashboard/core/services.py +152 -9
  122. truthound_dashboard/core/statistics.py +718 -0
  123. truthound_dashboard/core/streaming_anomaly.py +883 -0
  124. truthound_dashboard/core/triggers/__init__.py +45 -0
  125. truthound_dashboard/core/triggers/base.py +226 -0
  126. truthound_dashboard/core/triggers/evaluators.py +609 -0
  127. truthound_dashboard/core/triggers/factory.py +363 -0
  128. truthound_dashboard/core/unified_alerts.py +870 -0
  129. truthound_dashboard/core/validation_limits.py +509 -0
  130. truthound_dashboard/core/versioning.py +709 -0
  131. truthound_dashboard/core/websocket/__init__.py +59 -0
  132. truthound_dashboard/core/websocket/manager.py +512 -0
  133. truthound_dashboard/core/websocket/messages.py +130 -0
  134. truthound_dashboard/db/__init__.py +30 -0
  135. truthound_dashboard/db/models.py +3375 -3
  136. truthound_dashboard/main.py +22 -0
  137. truthound_dashboard/schemas/__init__.py +396 -1
  138. truthound_dashboard/schemas/anomaly.py +1258 -0
  139. truthound_dashboard/schemas/base.py +4 -0
  140. truthound_dashboard/schemas/cross_alerts.py +334 -0
  141. truthound_dashboard/schemas/drift_monitor.py +890 -0
  142. truthound_dashboard/schemas/lineage.py +428 -0
  143. truthound_dashboard/schemas/maintenance.py +154 -0
  144. truthound_dashboard/schemas/model_monitoring.py +374 -0
  145. truthound_dashboard/schemas/notifications_advanced.py +1363 -0
  146. truthound_dashboard/schemas/openlineage.py +704 -0
  147. truthound_dashboard/schemas/plugins.py +1293 -0
  148. truthound_dashboard/schemas/profile.py +420 -34
  149. truthound_dashboard/schemas/profile_comparison.py +242 -0
  150. truthound_dashboard/schemas/reports.py +285 -0
  151. truthound_dashboard/schemas/rule_suggestion.py +434 -0
  152. truthound_dashboard/schemas/schema_evolution.py +164 -0
  153. truthound_dashboard/schemas/source.py +117 -2
  154. truthound_dashboard/schemas/triggers.py +511 -0
  155. truthound_dashboard/schemas/unified_alerts.py +223 -0
  156. truthound_dashboard/schemas/validation.py +25 -1
  157. truthound_dashboard/schemas/validators/__init__.py +11 -0
  158. truthound_dashboard/schemas/validators/base.py +151 -0
  159. truthound_dashboard/schemas/versioning.py +152 -0
  160. truthound_dashboard/static/index.html +2 -2
  161. {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/METADATA +142 -18
  162. truthound_dashboard-1.4.0.dist-info/RECORD +239 -0
  163. truthound_dashboard/static/assets/index-BCA8H1hO.js +0 -574
  164. truthound_dashboard/static/assets/index-BNsSQ2fN.css +0 -1
  165. truthound_dashboard/static/assets/unmerged_dictionaries-CsJWCRx9.js +0 -1
  166. truthound_dashboard-1.3.0.dist-info/RECORD +0 -110
  167. {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/WHEEL +0 -0
  168. {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/entry_points.txt +0 -0
  169. {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,870 @@
1
+ """Unified Alerts Service.
2
+
3
+ Aggregates alerts from all monitoring systems:
4
+ - Model monitoring alerts
5
+ - Drift monitoring alerts
6
+ - Anomaly detection alerts (generated from high anomaly rates)
7
+ - Validation failures
8
+
9
+ Provides unified view, correlation, and management.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import uuid
15
+ from collections.abc import Sequence
16
+ from datetime import datetime, timedelta
17
+ from typing import Any
18
+
19
+ from sqlalchemy import and_, func, or_, select
20
+ from sqlalchemy.ext.asyncio import AsyncSession
21
+
22
+ from ..db import (
23
+ AnomalyDetection,
24
+ BaseRepository,
25
+ DriftAlert,
26
+ DriftMonitor,
27
+ ModelAlert,
28
+ MonitoredModel,
29
+ Source,
30
+ Validation,
31
+ )
32
+ from ..schemas.unified_alerts import (
33
+ AlertCorrelation,
34
+ AlertCountBySource,
35
+ AlertCountBySeverity,
36
+ AlertCountByStatus,
37
+ AlertSeverity,
38
+ AlertSource,
39
+ AlertStatus,
40
+ AlertSummary,
41
+ AlertTrendPoint,
42
+ UnifiedAlertResponse,
43
+ )
44
+
45
+
46
+ # =============================================================================
47
+ # Helper functions
48
+ # =============================================================================
49
+
50
+
51
+ def _generate_alert_id(source: AlertSource, source_id: str) -> str:
52
+ """Generate a unified alert ID from source info."""
53
+ return f"{source.value}:{source_id}"
54
+
55
+
56
+ def _parse_alert_id(unified_id: str) -> tuple[AlertSource, str]:
57
+ """Parse a unified alert ID into source and original ID."""
58
+ parts = unified_id.split(":", 1)
59
+ if len(parts) != 2:
60
+ raise ValueError(f"Invalid unified alert ID: {unified_id}")
61
+ return AlertSource(parts[0]), parts[1]
62
+
63
+
64
+ def _map_model_severity(severity: str) -> AlertSeverity:
65
+ """Map model monitoring severity to unified severity."""
66
+ mapping = {
67
+ "critical": AlertSeverity.CRITICAL,
68
+ "warning": AlertSeverity.HIGH,
69
+ "info": AlertSeverity.INFO,
70
+ }
71
+ return mapping.get(severity.lower(), AlertSeverity.MEDIUM)
72
+
73
+
74
+ def _map_drift_severity(severity: str) -> AlertSeverity:
75
+ """Map drift severity to unified severity."""
76
+ mapping = {
77
+ "critical": AlertSeverity.CRITICAL,
78
+ "high": AlertSeverity.HIGH,
79
+ "medium": AlertSeverity.MEDIUM,
80
+ "low": AlertSeverity.LOW,
81
+ "info": AlertSeverity.INFO,
82
+ }
83
+ return mapping.get(severity.lower(), AlertSeverity.MEDIUM)
84
+
85
+
86
+ def _map_drift_status(status: str) -> AlertStatus:
87
+ """Map drift status to unified status."""
88
+ mapping = {
89
+ "open": AlertStatus.OPEN,
90
+ "acknowledged": AlertStatus.ACKNOWLEDGED,
91
+ "resolved": AlertStatus.RESOLVED,
92
+ "ignored": AlertStatus.IGNORED,
93
+ }
94
+ return mapping.get(status.lower(), AlertStatus.OPEN)
95
+
96
+
97
+ # =============================================================================
98
+ # Unified Alerts Service
99
+ # =============================================================================
100
+
101
+
102
+ class UnifiedAlertsService:
103
+ """Service for unified alert management.
104
+
105
+ Aggregates alerts from multiple sources and provides
106
+ a unified interface for querying and managing them.
107
+ """
108
+
109
+ def __init__(self, session: AsyncSession) -> None:
110
+ """Initialize the service.
111
+
112
+ Args:
113
+ session: Database session.
114
+ """
115
+ self.session = session
116
+
117
+ async def get_all_alerts(
118
+ self,
119
+ *,
120
+ source: AlertSource | None = None,
121
+ severity: AlertSeverity | None = None,
122
+ status: AlertStatus | None = None,
123
+ source_name: str | None = None,
124
+ time_range_hours: int | None = 24,
125
+ offset: int = 0,
126
+ limit: int = 50,
127
+ ) -> tuple[list[UnifiedAlertResponse], int]:
128
+ """Get all alerts from all sources.
129
+
130
+ Args:
131
+ source: Filter by alert source.
132
+ severity: Filter by severity.
133
+ status: Filter by status.
134
+ source_name: Filter by source name (partial match).
135
+ time_range_hours: Filter by time range.
136
+ offset: Pagination offset.
137
+ limit: Pagination limit.
138
+
139
+ Returns:
140
+ Tuple of (alerts, total count).
141
+ """
142
+ alerts: list[UnifiedAlertResponse] = []
143
+ cutoff = None
144
+ if time_range_hours:
145
+ cutoff = datetime.utcnow() - timedelta(hours=time_range_hours)
146
+
147
+ # Gather alerts from all sources
148
+ if source is None or source == AlertSource.MODEL:
149
+ model_alerts = await self._get_model_alerts(
150
+ severity=severity,
151
+ status=status,
152
+ cutoff=cutoff,
153
+ )
154
+ alerts.extend(model_alerts)
155
+
156
+ if source is None or source == AlertSource.DRIFT:
157
+ drift_alerts = await self._get_drift_alerts(
158
+ severity=severity,
159
+ status=status,
160
+ cutoff=cutoff,
161
+ )
162
+ alerts.extend(drift_alerts)
163
+
164
+ if source is None or source == AlertSource.ANOMALY:
165
+ anomaly_alerts = await self._get_anomaly_alerts(
166
+ severity=severity,
167
+ status=status,
168
+ cutoff=cutoff,
169
+ )
170
+ alerts.extend(anomaly_alerts)
171
+
172
+ if source is None or source == AlertSource.VALIDATION:
173
+ validation_alerts = await self._get_validation_alerts(
174
+ severity=severity,
175
+ status=status,
176
+ cutoff=cutoff,
177
+ )
178
+ alerts.extend(validation_alerts)
179
+
180
+ # Filter by source name if provided
181
+ if source_name:
182
+ source_name_lower = source_name.lower()
183
+ alerts = [a for a in alerts if source_name_lower in a.source_name.lower()]
184
+
185
+ # Sort by created_at descending
186
+ alerts.sort(key=lambda a: a.created_at, reverse=True)
187
+
188
+ # Get total and paginate
189
+ total = len(alerts)
190
+ paginated = alerts[offset : offset + limit]
191
+
192
+ return paginated, total
193
+
194
+ async def get_alert_by_id(self, alert_id: str) -> UnifiedAlertResponse | None:
195
+ """Get a specific unified alert by ID.
196
+
197
+ Args:
198
+ alert_id: Unified alert ID.
199
+
200
+ Returns:
201
+ Alert if found, None otherwise.
202
+ """
203
+ try:
204
+ source, source_id = _parse_alert_id(alert_id)
205
+ except ValueError:
206
+ return None
207
+
208
+ if source == AlertSource.MODEL:
209
+ return await self._get_single_model_alert(source_id)
210
+ elif source == AlertSource.DRIFT:
211
+ return await self._get_single_drift_alert(source_id)
212
+ elif source == AlertSource.ANOMALY:
213
+ return await self._get_single_anomaly_alert(source_id)
214
+ elif source == AlertSource.VALIDATION:
215
+ return await self._get_single_validation_alert(source_id)
216
+
217
+ return None
218
+
219
+ async def get_alert_summary(self, time_range_hours: int = 24) -> AlertSummary:
220
+ """Get summary statistics for alerts.
221
+
222
+ Args:
223
+ time_range_hours: Time range for summary.
224
+
225
+ Returns:
226
+ Alert summary.
227
+ """
228
+ alerts, total = await self.get_all_alerts(
229
+ time_range_hours=time_range_hours,
230
+ limit=10000, # Get all for stats
231
+ )
232
+
233
+ # Count by severity
234
+ by_severity = AlertCountBySeverity()
235
+ for alert in alerts:
236
+ if alert.severity == AlertSeverity.CRITICAL:
237
+ by_severity.critical += 1
238
+ elif alert.severity == AlertSeverity.HIGH:
239
+ by_severity.high += 1
240
+ elif alert.severity == AlertSeverity.MEDIUM:
241
+ by_severity.medium += 1
242
+ elif alert.severity == AlertSeverity.LOW:
243
+ by_severity.low += 1
244
+ elif alert.severity == AlertSeverity.INFO:
245
+ by_severity.info += 1
246
+
247
+ # Count by source
248
+ by_source = AlertCountBySource()
249
+ for alert in alerts:
250
+ if alert.source == AlertSource.MODEL:
251
+ by_source.model += 1
252
+ elif alert.source == AlertSource.DRIFT:
253
+ by_source.drift += 1
254
+ elif alert.source == AlertSource.ANOMALY:
255
+ by_source.anomaly += 1
256
+ elif alert.source == AlertSource.VALIDATION:
257
+ by_source.validation += 1
258
+
259
+ # Count by status
260
+ by_status = AlertCountByStatus()
261
+ active_count = 0
262
+ for alert in alerts:
263
+ if alert.status == AlertStatus.OPEN:
264
+ by_status.open += 1
265
+ active_count += 1
266
+ elif alert.status == AlertStatus.ACKNOWLEDGED:
267
+ by_status.acknowledged += 1
268
+ active_count += 1
269
+ elif alert.status == AlertStatus.RESOLVED:
270
+ by_status.resolved += 1
271
+ elif alert.status == AlertStatus.IGNORED:
272
+ by_status.ignored += 1
273
+
274
+ # Generate trend data (hourly for last 24h)
275
+ trend = []
276
+ now = datetime.utcnow()
277
+ for hours_ago in range(24, -1, -1):
278
+ point_time = now - timedelta(hours=hours_ago)
279
+ point_start = point_time.replace(minute=0, second=0, microsecond=0)
280
+ point_end = point_start + timedelta(hours=1)
281
+ count = sum(
282
+ 1
283
+ for a in alerts
284
+ if point_start <= a.created_at < point_end
285
+ )
286
+ trend.append(AlertTrendPoint(timestamp=point_start, count=count))
287
+
288
+ # Top sources with most alerts
289
+ source_counts: dict[str, int] = {}
290
+ for alert in alerts:
291
+ key = alert.source_name
292
+ source_counts[key] = source_counts.get(key, 0) + 1
293
+
294
+ top_sources = sorted(
295
+ [{"name": k, "count": v} for k, v in source_counts.items()],
296
+ key=lambda x: x["count"],
297
+ reverse=True,
298
+ )[:5]
299
+
300
+ return AlertSummary(
301
+ total_alerts=total,
302
+ active_alerts=active_count,
303
+ by_severity=by_severity,
304
+ by_source=by_source,
305
+ by_status=by_status,
306
+ trend_24h=trend,
307
+ top_sources=top_sources,
308
+ )
309
+
310
+ async def acknowledge_alert(
311
+ self,
312
+ alert_id: str,
313
+ actor: str,
314
+ message: str = "",
315
+ ) -> UnifiedAlertResponse | None:
316
+ """Acknowledge an alert.
317
+
318
+ Args:
319
+ alert_id: Unified alert ID.
320
+ actor: Who is acknowledging.
321
+ message: Optional message.
322
+
323
+ Returns:
324
+ Updated alert if found.
325
+ """
326
+ try:
327
+ source, source_id = _parse_alert_id(alert_id)
328
+ except ValueError:
329
+ return None
330
+
331
+ now = datetime.utcnow()
332
+
333
+ if source == AlertSource.MODEL:
334
+ result = await self.session.execute(
335
+ select(ModelAlert).where(ModelAlert.id == source_id)
336
+ )
337
+ alert = result.scalar_one_or_none()
338
+ if alert:
339
+ alert.acknowledged = True
340
+ alert.acknowledged_by = actor
341
+ alert.acknowledged_at = now
342
+ await self.session.commit()
343
+ return await self._get_single_model_alert(source_id)
344
+
345
+ elif source == AlertSource.DRIFT:
346
+ result = await self.session.execute(
347
+ select(DriftAlert).where(DriftAlert.id == source_id)
348
+ )
349
+ alert = result.scalar_one_or_none()
350
+ if alert:
351
+ alert.status = "acknowledged"
352
+ alert.acknowledged_by = actor
353
+ alert.acknowledged_at = now
354
+ if message:
355
+ alert.notes = message
356
+ await self.session.commit()
357
+ return await self._get_single_drift_alert(source_id)
358
+
359
+ elif source == AlertSource.VALIDATION:
360
+ # Validation alerts are read-only status derived from results
361
+ return await self._get_single_validation_alert(source_id)
362
+
363
+ elif source == AlertSource.ANOMALY:
364
+ # Anomaly alerts are derived from detection results (read-only)
365
+ return await self._get_single_anomaly_alert(source_id)
366
+
367
+ return None
368
+
369
+ async def resolve_alert(
370
+ self,
371
+ alert_id: str,
372
+ actor: str = "",
373
+ message: str = "",
374
+ ) -> UnifiedAlertResponse | None:
375
+ """Resolve an alert.
376
+
377
+ Args:
378
+ alert_id: Unified alert ID.
379
+ actor: Who is resolving.
380
+ message: Optional resolution message.
381
+
382
+ Returns:
383
+ Updated alert if found.
384
+ """
385
+ try:
386
+ source, source_id = _parse_alert_id(alert_id)
387
+ except ValueError:
388
+ return None
389
+
390
+ now = datetime.utcnow()
391
+
392
+ if source == AlertSource.MODEL:
393
+ result = await self.session.execute(
394
+ select(ModelAlert).where(ModelAlert.id == source_id)
395
+ )
396
+ alert = result.scalar_one_or_none()
397
+ if alert:
398
+ alert.resolved = True
399
+ alert.resolved_at = now
400
+ await self.session.commit()
401
+ return await self._get_single_model_alert(source_id)
402
+
403
+ elif source == AlertSource.DRIFT:
404
+ result = await self.session.execute(
405
+ select(DriftAlert).where(DriftAlert.id == source_id)
406
+ )
407
+ alert = result.scalar_one_or_none()
408
+ if alert:
409
+ alert.status = "resolved"
410
+ alert.resolved_at = now
411
+ if message:
412
+ alert.notes = message
413
+ await self.session.commit()
414
+ return await self._get_single_drift_alert(source_id)
415
+
416
+ # Validation and anomaly alerts are derived/read-only
417
+ return None
418
+
419
+ async def bulk_acknowledge(
420
+ self,
421
+ alert_ids: list[str],
422
+ actor: str,
423
+ message: str = "",
424
+ ) -> tuple[int, int, list[str]]:
425
+ """Bulk acknowledge alerts.
426
+
427
+ Args:
428
+ alert_ids: List of alert IDs.
429
+ actor: Who is acknowledging.
430
+ message: Optional message.
431
+
432
+ Returns:
433
+ Tuple of (success_count, failed_count, failed_ids).
434
+ """
435
+ success = 0
436
+ failed_ids = []
437
+
438
+ for alert_id in alert_ids:
439
+ result = await self.acknowledge_alert(alert_id, actor, message)
440
+ if result:
441
+ success += 1
442
+ else:
443
+ failed_ids.append(alert_id)
444
+
445
+ return success, len(failed_ids), failed_ids
446
+
447
+ async def bulk_resolve(
448
+ self,
449
+ alert_ids: list[str],
450
+ actor: str,
451
+ message: str = "",
452
+ ) -> tuple[int, int, list[str]]:
453
+ """Bulk resolve alerts.
454
+
455
+ Args:
456
+ alert_ids: List of alert IDs.
457
+ actor: Who is resolving.
458
+ message: Optional message.
459
+
460
+ Returns:
461
+ Tuple of (success_count, failed_count, failed_ids).
462
+ """
463
+ success = 0
464
+ failed_ids = []
465
+
466
+ for alert_id in alert_ids:
467
+ result = await self.resolve_alert(alert_id, actor, message)
468
+ if result:
469
+ success += 1
470
+ else:
471
+ failed_ids.append(alert_id)
472
+
473
+ return success, len(failed_ids), failed_ids
474
+
475
+ async def get_alert_correlations(
476
+ self,
477
+ alert_id: str,
478
+ time_window_hours: int = 1,
479
+ ) -> list[AlertCorrelation]:
480
+ """Get correlated alerts for a given alert.
481
+
482
+ Looks for alerts from:
483
+ - Same source (data source, model)
484
+ - Similar time frame
485
+
486
+ Args:
487
+ alert_id: Alert to find correlations for.
488
+ time_window_hours: Time window for correlation.
489
+
490
+ Returns:
491
+ List of correlations.
492
+ """
493
+ alert = await self.get_alert_by_id(alert_id)
494
+ if not alert:
495
+ return []
496
+
497
+ correlations = []
498
+
499
+ # Time window for correlation
500
+ time_start = alert.created_at - timedelta(hours=time_window_hours)
501
+ time_end = alert.created_at + timedelta(hours=time_window_hours)
502
+
503
+ # Get all alerts in time window
504
+ all_alerts, _ = await self.get_all_alerts(
505
+ time_range_hours=time_window_hours * 2 + 24, # Generous window
506
+ limit=1000,
507
+ )
508
+
509
+ # Filter to time window and exclude self
510
+ window_alerts = [
511
+ a for a in all_alerts
512
+ if time_start <= a.created_at <= time_end and a.id != alert_id
513
+ ]
514
+
515
+ # Group by source name (same data source/model)
516
+ same_source_alerts = [
517
+ a for a in window_alerts
518
+ if a.source_name == alert.source_name
519
+ ]
520
+ if same_source_alerts:
521
+ correlations.append(
522
+ AlertCorrelation(
523
+ alert_id=alert_id,
524
+ related_alerts=same_source_alerts[:10], # Limit
525
+ correlation_type="same_source",
526
+ correlation_score=0.9,
527
+ common_factors=[f"Same source: {alert.source_name}"],
528
+ )
529
+ )
530
+
531
+ # Group by similar severity in time window
532
+ same_severity_alerts = [
533
+ a for a in window_alerts
534
+ if a.severity == alert.severity and a not in same_source_alerts
535
+ ]
536
+ if same_severity_alerts:
537
+ correlations.append(
538
+ AlertCorrelation(
539
+ alert_id=alert_id,
540
+ related_alerts=same_severity_alerts[:10],
541
+ correlation_type="temporal_severity",
542
+ correlation_score=0.6,
543
+ common_factors=[
544
+ f"Same severity: {alert.severity.value}",
545
+ f"Within {time_window_hours}h window",
546
+ ],
547
+ )
548
+ )
549
+
550
+ return correlations
551
+
552
+ # =========================================================================
553
+ # Private methods for fetching from each source
554
+ # =========================================================================
555
+
556
+ async def _get_model_alerts(
557
+ self,
558
+ severity: AlertSeverity | None = None,
559
+ status: AlertStatus | None = None,
560
+ cutoff: datetime | None = None,
561
+ ) -> list[UnifiedAlertResponse]:
562
+ """Get alerts from model monitoring."""
563
+ filters = []
564
+ if cutoff:
565
+ filters.append(ModelAlert.created_at >= cutoff)
566
+ if status:
567
+ if status == AlertStatus.OPEN:
568
+ filters.append(ModelAlert.acknowledged == False)
569
+ filters.append(ModelAlert.resolved == False)
570
+ elif status == AlertStatus.ACKNOWLEDGED:
571
+ filters.append(ModelAlert.acknowledged == True)
572
+ filters.append(ModelAlert.resolved == False)
573
+ elif status == AlertStatus.RESOLVED:
574
+ filters.append(ModelAlert.resolved == True)
575
+
576
+ query = select(ModelAlert)
577
+ if filters:
578
+ query = query.where(and_(*filters))
579
+
580
+ result = await self.session.execute(query)
581
+ alerts = result.scalars().all()
582
+
583
+ # Get model names
584
+ model_ids = {a.model_id for a in alerts}
585
+ model_names = {}
586
+ if model_ids:
587
+ models_result = await self.session.execute(
588
+ select(MonitoredModel).where(MonitoredModel.id.in_(model_ids))
589
+ )
590
+ for model in models_result.scalars():
591
+ model_names[model.id] = model.name
592
+
593
+ unified = []
594
+ for alert in alerts:
595
+ alert_severity = _map_model_severity(alert.severity)
596
+ if severity and alert_severity != severity:
597
+ continue
598
+
599
+ alert_status = AlertStatus.OPEN
600
+ if alert.resolved:
601
+ alert_status = AlertStatus.RESOLVED
602
+ elif alert.acknowledged:
603
+ alert_status = AlertStatus.ACKNOWLEDGED
604
+
605
+ unified.append(
606
+ UnifiedAlertResponse(
607
+ id=_generate_alert_id(AlertSource.MODEL, alert.id),
608
+ source=AlertSource.MODEL,
609
+ source_id=alert.id,
610
+ source_name=model_names.get(alert.model_id, "Unknown Model"),
611
+ severity=alert_severity,
612
+ status=alert_status,
613
+ title=f"Model Alert: {alert.message[:50]}",
614
+ message=alert.message,
615
+ details={
616
+ "rule_id": alert.rule_id,
617
+ "metric_value": alert.metric_value,
618
+ "threshold_value": alert.threshold_value,
619
+ },
620
+ acknowledged_at=alert.acknowledged_at,
621
+ acknowledged_by=alert.acknowledged_by,
622
+ resolved_at=alert.resolved_at,
623
+ created_at=alert.created_at,
624
+ updated_at=alert.updated_at,
625
+ )
626
+ )
627
+
628
+ return unified
629
+
630
+ async def _get_drift_alerts(
631
+ self,
632
+ severity: AlertSeverity | None = None,
633
+ status: AlertStatus | None = None,
634
+ cutoff: datetime | None = None,
635
+ ) -> list[UnifiedAlertResponse]:
636
+ """Get alerts from drift monitoring."""
637
+ filters = []
638
+ if cutoff:
639
+ filters.append(DriftAlert.created_at >= cutoff)
640
+ if status:
641
+ filters.append(DriftAlert.status == status.value)
642
+
643
+ query = select(DriftAlert)
644
+ if filters:
645
+ query = query.where(and_(*filters))
646
+
647
+ result = await self.session.execute(query)
648
+ alerts = result.scalars().all()
649
+
650
+ # Get monitor names
651
+ monitor_ids = {a.monitor_id for a in alerts}
652
+ monitor_names = {}
653
+ if monitor_ids:
654
+ monitors_result = await self.session.execute(
655
+ select(DriftMonitor).where(DriftMonitor.id.in_(monitor_ids))
656
+ )
657
+ for monitor in monitors_result.scalars():
658
+ monitor_names[monitor.id] = monitor.name
659
+
660
+ unified = []
661
+ for alert in alerts:
662
+ alert_severity = _map_drift_severity(alert.severity)
663
+ if severity and alert_severity != severity:
664
+ continue
665
+
666
+ unified.append(
667
+ UnifiedAlertResponse(
668
+ id=_generate_alert_id(AlertSource.DRIFT, alert.id),
669
+ source=AlertSource.DRIFT,
670
+ source_id=alert.id,
671
+ source_name=monitor_names.get(alert.monitor_id, "Unknown Monitor"),
672
+ severity=alert_severity,
673
+ status=_map_drift_status(alert.status),
674
+ title=f"Drift Alert: {alert.drift_percentage:.1f}% drift detected",
675
+ message=alert.message,
676
+ details={
677
+ "comparison_id": alert.comparison_id,
678
+ "drift_percentage": alert.drift_percentage,
679
+ "drifted_columns": alert.drifted_columns,
680
+ },
681
+ acknowledged_at=alert.acknowledged_at,
682
+ acknowledged_by=alert.acknowledged_by,
683
+ resolved_at=alert.resolved_at,
684
+ created_at=alert.created_at,
685
+ updated_at=alert.updated_at,
686
+ )
687
+ )
688
+
689
+ return unified
690
+
691
+ async def _get_anomaly_alerts(
692
+ self,
693
+ severity: AlertSeverity | None = None,
694
+ status: AlertStatus | None = None,
695
+ cutoff: datetime | None = None,
696
+ ) -> list[UnifiedAlertResponse]:
697
+ """Get alerts derived from anomaly detection results.
698
+
699
+ Creates alerts when anomaly rate exceeds thresholds.
700
+ """
701
+ filters = [AnomalyDetection.status == "success"]
702
+ if cutoff:
703
+ filters.append(AnomalyDetection.created_at >= cutoff)
704
+
705
+ query = select(AnomalyDetection).where(and_(*filters))
706
+ result = await self.session.execute(query)
707
+ detections = result.scalars().all()
708
+
709
+ # Get source names
710
+ source_ids = {d.source_id for d in detections}
711
+ source_names = {}
712
+ if source_ids:
713
+ sources_result = await self.session.execute(
714
+ select(Source).where(Source.id.in_(source_ids))
715
+ )
716
+ for source in sources_result.scalars():
717
+ source_names[source.id] = source.name
718
+
719
+ unified = []
720
+ for detection in detections:
721
+ # Only create alerts for high anomaly rates
722
+ if not detection.anomaly_rate or detection.anomaly_rate < 0.1:
723
+ continue
724
+
725
+ # Determine severity based on anomaly rate
726
+ if detection.anomaly_rate >= 0.3:
727
+ alert_severity = AlertSeverity.CRITICAL
728
+ elif detection.anomaly_rate >= 0.2:
729
+ alert_severity = AlertSeverity.HIGH
730
+ elif detection.anomaly_rate >= 0.1:
731
+ alert_severity = AlertSeverity.MEDIUM
732
+ else:
733
+ continue
734
+
735
+ if severity and alert_severity != severity:
736
+ continue
737
+
738
+ # Anomaly alerts are always open (derived state)
739
+ if status and status != AlertStatus.OPEN:
740
+ continue
741
+
742
+ pct = detection.anomaly_rate * 100
743
+ unified.append(
744
+ UnifiedAlertResponse(
745
+ id=_generate_alert_id(AlertSource.ANOMALY, detection.id),
746
+ source=AlertSource.ANOMALY,
747
+ source_id=detection.id,
748
+ source_name=source_names.get(detection.source_id, "Unknown Source"),
749
+ severity=alert_severity,
750
+ status=AlertStatus.OPEN,
751
+ title=f"High Anomaly Rate: {pct:.1f}%",
752
+ message=f"Anomaly detection found {detection.anomaly_count} anomalies "
753
+ f"out of {detection.total_rows} rows ({pct:.1f}% rate) "
754
+ f"using {detection.algorithm} algorithm.",
755
+ details={
756
+ "algorithm": detection.algorithm,
757
+ "anomaly_count": detection.anomaly_count,
758
+ "total_rows": detection.total_rows,
759
+ "anomaly_rate": detection.anomaly_rate,
760
+ "columns_analyzed": detection.columns_analyzed,
761
+ },
762
+ created_at=detection.created_at,
763
+ updated_at=detection.updated_at,
764
+ )
765
+ )
766
+
767
+ return unified
768
+
769
+ async def _get_validation_alerts(
770
+ self,
771
+ severity: AlertSeverity | None = None,
772
+ status: AlertStatus | None = None,
773
+ cutoff: datetime | None = None,
774
+ ) -> list[UnifiedAlertResponse]:
775
+ """Get alerts from validation failures."""
776
+ filters = [Validation.status == "failed"]
777
+ if cutoff:
778
+ filters.append(Validation.created_at >= cutoff)
779
+
780
+ query = select(Validation).where(and_(*filters))
781
+ result = await self.session.execute(query)
782
+ validations = result.scalars().all()
783
+
784
+ # Get source names
785
+ source_ids = {v.source_id for v in validations}
786
+ source_names = {}
787
+ if source_ids:
788
+ sources_result = await self.session.execute(
789
+ select(Source).where(Source.id.in_(source_ids))
790
+ )
791
+ for source in sources_result.scalars():
792
+ source_names[source.id] = source.name
793
+
794
+ unified = []
795
+ for validation in validations:
796
+ # Determine severity based on critical/high issue counts
797
+ issues = validation.issues or []
798
+ critical_count = sum(1 for i in issues if i.get("severity") == "critical")
799
+ high_count = sum(1 for i in issues if i.get("severity") == "high")
800
+
801
+ if critical_count > 0:
802
+ alert_severity = AlertSeverity.CRITICAL
803
+ elif high_count > 0:
804
+ alert_severity = AlertSeverity.HIGH
805
+ else:
806
+ alert_severity = AlertSeverity.MEDIUM
807
+
808
+ if severity and alert_severity != severity:
809
+ continue
810
+
811
+ # Validation alerts are always open (derived state)
812
+ if status and status != AlertStatus.OPEN:
813
+ continue
814
+
815
+ unified.append(
816
+ UnifiedAlertResponse(
817
+ id=_generate_alert_id(AlertSource.VALIDATION, validation.id),
818
+ source=AlertSource.VALIDATION,
819
+ source_id=validation.id,
820
+ source_name=source_names.get(validation.source_id, "Unknown Source"),
821
+ severity=alert_severity,
822
+ status=AlertStatus.OPEN,
823
+ title=f"Validation Failed: {len(issues)} issues",
824
+ message=f"Validation failed with {critical_count} critical, "
825
+ f"{high_count} high severity issues. "
826
+ f"Pass rate: {validation.pass_rate:.1f}%",
827
+ details={
828
+ "pass_rate": validation.pass_rate,
829
+ "total_issues": len(issues),
830
+ "critical_issues": critical_count,
831
+ "high_issues": high_count,
832
+ },
833
+ created_at=validation.created_at,
834
+ updated_at=validation.updated_at,
835
+ )
836
+ )
837
+
838
+ return unified
839
+
840
+ async def _get_single_model_alert(self, source_id: str) -> UnifiedAlertResponse | None:
841
+ """Get a single model monitoring alert."""
842
+ alerts = await self._get_model_alerts()
843
+ for alert in alerts:
844
+ if alert.source_id == source_id:
845
+ return alert
846
+ return None
847
+
848
+ async def _get_single_drift_alert(self, source_id: str) -> UnifiedAlertResponse | None:
849
+ """Get a single drift alert."""
850
+ alerts = await self._get_drift_alerts()
851
+ for alert in alerts:
852
+ if alert.source_id == source_id:
853
+ return alert
854
+ return None
855
+
856
+ async def _get_single_anomaly_alert(self, source_id: str) -> UnifiedAlertResponse | None:
857
+ """Get a single anomaly alert."""
858
+ alerts = await self._get_anomaly_alerts()
859
+ for alert in alerts:
860
+ if alert.source_id == source_id:
861
+ return alert
862
+ return None
863
+
864
+ async def _get_single_validation_alert(self, source_id: str) -> UnifiedAlertResponse | None:
865
+ """Get a single validation alert."""
866
+ alerts = await self._get_validation_alerts()
867
+ for alert in alerts:
868
+ if alert.source_id == source_id:
869
+ return alert
870
+ return None