truthound-dashboard 1.3.1__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound_dashboard/api/alerts.py +258 -0
- truthound_dashboard/api/anomaly.py +1302 -0
- truthound_dashboard/api/cross_alerts.py +352 -0
- truthound_dashboard/api/deps.py +143 -0
- truthound_dashboard/api/drift_monitor.py +540 -0
- truthound_dashboard/api/lineage.py +1151 -0
- truthound_dashboard/api/maintenance.py +363 -0
- truthound_dashboard/api/middleware.py +373 -1
- truthound_dashboard/api/model_monitoring.py +805 -0
- truthound_dashboard/api/notifications_advanced.py +2452 -0
- truthound_dashboard/api/plugins.py +2096 -0
- truthound_dashboard/api/profile.py +211 -14
- truthound_dashboard/api/reports.py +853 -0
- truthound_dashboard/api/router.py +147 -0
- truthound_dashboard/api/rule_suggestions.py +310 -0
- truthound_dashboard/api/schema_evolution.py +231 -0
- truthound_dashboard/api/sources.py +47 -3
- truthound_dashboard/api/triggers.py +190 -0
- truthound_dashboard/api/validations.py +13 -0
- truthound_dashboard/api/validators.py +333 -4
- truthound_dashboard/api/versioning.py +309 -0
- truthound_dashboard/api/websocket.py +301 -0
- truthound_dashboard/core/__init__.py +27 -0
- truthound_dashboard/core/anomaly.py +1395 -0
- truthound_dashboard/core/anomaly_explainer.py +633 -0
- truthound_dashboard/core/cache.py +206 -0
- truthound_dashboard/core/cached_services.py +422 -0
- truthound_dashboard/core/charts.py +352 -0
- truthound_dashboard/core/connections.py +1069 -42
- truthound_dashboard/core/cross_alerts.py +837 -0
- truthound_dashboard/core/drift_monitor.py +1477 -0
- truthound_dashboard/core/drift_sampling.py +669 -0
- truthound_dashboard/core/i18n/__init__.py +42 -0
- truthound_dashboard/core/i18n/detector.py +173 -0
- truthound_dashboard/core/i18n/messages.py +564 -0
- truthound_dashboard/core/lineage.py +971 -0
- truthound_dashboard/core/maintenance.py +443 -5
- truthound_dashboard/core/model_monitoring.py +1043 -0
- truthound_dashboard/core/notifications/channels.py +1020 -1
- truthound_dashboard/core/notifications/deduplication/__init__.py +143 -0
- truthound_dashboard/core/notifications/deduplication/policies.py +274 -0
- truthound_dashboard/core/notifications/deduplication/service.py +400 -0
- truthound_dashboard/core/notifications/deduplication/stores.py +2365 -0
- truthound_dashboard/core/notifications/deduplication/strategies.py +422 -0
- truthound_dashboard/core/notifications/dispatcher.py +43 -0
- truthound_dashboard/core/notifications/escalation/__init__.py +149 -0
- truthound_dashboard/core/notifications/escalation/backends.py +1384 -0
- truthound_dashboard/core/notifications/escalation/engine.py +429 -0
- truthound_dashboard/core/notifications/escalation/models.py +336 -0
- truthound_dashboard/core/notifications/escalation/scheduler.py +1187 -0
- truthound_dashboard/core/notifications/escalation/state_machine.py +330 -0
- truthound_dashboard/core/notifications/escalation/stores.py +2896 -0
- truthound_dashboard/core/notifications/events.py +49 -0
- truthound_dashboard/core/notifications/metrics/__init__.py +115 -0
- truthound_dashboard/core/notifications/metrics/base.py +528 -0
- truthound_dashboard/core/notifications/metrics/collectors.py +583 -0
- truthound_dashboard/core/notifications/routing/__init__.py +169 -0
- truthound_dashboard/core/notifications/routing/combinators.py +184 -0
- truthound_dashboard/core/notifications/routing/config.py +375 -0
- truthound_dashboard/core/notifications/routing/config_parser.py +867 -0
- truthound_dashboard/core/notifications/routing/engine.py +382 -0
- truthound_dashboard/core/notifications/routing/expression_engine.py +1269 -0
- truthound_dashboard/core/notifications/routing/jinja2_engine.py +774 -0
- truthound_dashboard/core/notifications/routing/rules.py +625 -0
- truthound_dashboard/core/notifications/routing/validator.py +678 -0
- truthound_dashboard/core/notifications/service.py +2 -0
- truthound_dashboard/core/notifications/stats_aggregator.py +850 -0
- truthound_dashboard/core/notifications/throttling/__init__.py +83 -0
- truthound_dashboard/core/notifications/throttling/builder.py +311 -0
- truthound_dashboard/core/notifications/throttling/stores.py +1859 -0
- truthound_dashboard/core/notifications/throttling/throttlers.py +633 -0
- truthound_dashboard/core/openlineage.py +1028 -0
- truthound_dashboard/core/plugins/__init__.py +39 -0
- truthound_dashboard/core/plugins/docs/__init__.py +39 -0
- truthound_dashboard/core/plugins/docs/extractor.py +703 -0
- truthound_dashboard/core/plugins/docs/renderers.py +804 -0
- truthound_dashboard/core/plugins/hooks/__init__.py +63 -0
- truthound_dashboard/core/plugins/hooks/decorators.py +367 -0
- truthound_dashboard/core/plugins/hooks/manager.py +403 -0
- truthound_dashboard/core/plugins/hooks/protocols.py +265 -0
- truthound_dashboard/core/plugins/lifecycle/__init__.py +41 -0
- truthound_dashboard/core/plugins/lifecycle/hot_reload.py +584 -0
- truthound_dashboard/core/plugins/lifecycle/machine.py +419 -0
- truthound_dashboard/core/plugins/lifecycle/states.py +266 -0
- truthound_dashboard/core/plugins/loader.py +504 -0
- truthound_dashboard/core/plugins/registry.py +810 -0
- truthound_dashboard/core/plugins/reporter_executor.py +588 -0
- truthound_dashboard/core/plugins/sandbox/__init__.py +59 -0
- truthound_dashboard/core/plugins/sandbox/code_validator.py +243 -0
- truthound_dashboard/core/plugins/sandbox/engines.py +770 -0
- truthound_dashboard/core/plugins/sandbox/protocols.py +194 -0
- truthound_dashboard/core/plugins/sandbox.py +617 -0
- truthound_dashboard/core/plugins/security/__init__.py +68 -0
- truthound_dashboard/core/plugins/security/analyzer.py +535 -0
- truthound_dashboard/core/plugins/security/policies.py +311 -0
- truthound_dashboard/core/plugins/security/protocols.py +296 -0
- truthound_dashboard/core/plugins/security/signing.py +842 -0
- truthound_dashboard/core/plugins/security.py +446 -0
- truthound_dashboard/core/plugins/validator_executor.py +401 -0
- truthound_dashboard/core/plugins/versioning/__init__.py +51 -0
- truthound_dashboard/core/plugins/versioning/constraints.py +377 -0
- truthound_dashboard/core/plugins/versioning/dependencies.py +541 -0
- truthound_dashboard/core/plugins/versioning/semver.py +266 -0
- truthound_dashboard/core/profile_comparison.py +601 -0
- truthound_dashboard/core/report_history.py +570 -0
- truthound_dashboard/core/reporters/__init__.py +57 -0
- truthound_dashboard/core/reporters/base.py +296 -0
- truthound_dashboard/core/reporters/csv_reporter.py +155 -0
- truthound_dashboard/core/reporters/html_reporter.py +598 -0
- truthound_dashboard/core/reporters/i18n/__init__.py +65 -0
- truthound_dashboard/core/reporters/i18n/base.py +494 -0
- truthound_dashboard/core/reporters/i18n/catalogs.py +930 -0
- truthound_dashboard/core/reporters/json_reporter.py +160 -0
- truthound_dashboard/core/reporters/junit_reporter.py +233 -0
- truthound_dashboard/core/reporters/markdown_reporter.py +207 -0
- truthound_dashboard/core/reporters/pdf_reporter.py +209 -0
- truthound_dashboard/core/reporters/registry.py +272 -0
- truthound_dashboard/core/rule_generator.py +2088 -0
- truthound_dashboard/core/scheduler.py +822 -12
- truthound_dashboard/core/schema_evolution.py +858 -0
- truthound_dashboard/core/services.py +152 -9
- truthound_dashboard/core/statistics.py +718 -0
- truthound_dashboard/core/streaming_anomaly.py +883 -0
- truthound_dashboard/core/triggers/__init__.py +45 -0
- truthound_dashboard/core/triggers/base.py +226 -0
- truthound_dashboard/core/triggers/evaluators.py +609 -0
- truthound_dashboard/core/triggers/factory.py +363 -0
- truthound_dashboard/core/unified_alerts.py +870 -0
- truthound_dashboard/core/validation_limits.py +509 -0
- truthound_dashboard/core/versioning.py +709 -0
- truthound_dashboard/core/websocket/__init__.py +59 -0
- truthound_dashboard/core/websocket/manager.py +512 -0
- truthound_dashboard/core/websocket/messages.py +130 -0
- truthound_dashboard/db/__init__.py +30 -0
- truthound_dashboard/db/models.py +3375 -3
- truthound_dashboard/main.py +22 -0
- truthound_dashboard/schemas/__init__.py +396 -1
- truthound_dashboard/schemas/anomaly.py +1258 -0
- truthound_dashboard/schemas/base.py +4 -0
- truthound_dashboard/schemas/cross_alerts.py +334 -0
- truthound_dashboard/schemas/drift_monitor.py +890 -0
- truthound_dashboard/schemas/lineage.py +428 -0
- truthound_dashboard/schemas/maintenance.py +154 -0
- truthound_dashboard/schemas/model_monitoring.py +374 -0
- truthound_dashboard/schemas/notifications_advanced.py +1363 -0
- truthound_dashboard/schemas/openlineage.py +704 -0
- truthound_dashboard/schemas/plugins.py +1293 -0
- truthound_dashboard/schemas/profile.py +420 -34
- truthound_dashboard/schemas/profile_comparison.py +242 -0
- truthound_dashboard/schemas/reports.py +285 -0
- truthound_dashboard/schemas/rule_suggestion.py +434 -0
- truthound_dashboard/schemas/schema_evolution.py +164 -0
- truthound_dashboard/schemas/source.py +117 -2
- truthound_dashboard/schemas/triggers.py +511 -0
- truthound_dashboard/schemas/unified_alerts.py +223 -0
- truthound_dashboard/schemas/validation.py +25 -1
- truthound_dashboard/schemas/validators/__init__.py +11 -0
- truthound_dashboard/schemas/validators/base.py +151 -0
- truthound_dashboard/schemas/versioning.py +152 -0
- truthound_dashboard/static/index.html +2 -2
- {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.0.dist-info}/METADATA +142 -22
- truthound_dashboard-1.4.0.dist-info/RECORD +239 -0
- truthound_dashboard/static/assets/index-BZG20KuF.js +0 -586
- truthound_dashboard/static/assets/index-D_HyZ3pb.css +0 -1
- truthound_dashboard/static/assets/unmerged_dictionaries-CtpqQBm0.js +0 -1
- truthound_dashboard-1.3.1.dist-info/RECORD +0 -110
- {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.0.dist-info}/WHEEL +0 -0
- {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.0.dist-info}/entry_points.txt +0 -0
- {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,870 @@
|
|
|
1
|
+
"""Unified Alerts Service.
|
|
2
|
+
|
|
3
|
+
Aggregates alerts from all monitoring systems:
|
|
4
|
+
- Model monitoring alerts
|
|
5
|
+
- Drift monitoring alerts
|
|
6
|
+
- Anomaly detection alerts (generated from high anomaly rates)
|
|
7
|
+
- Validation failures
|
|
8
|
+
|
|
9
|
+
Provides unified view, correlation, and management.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import uuid
|
|
15
|
+
from collections.abc import Sequence
|
|
16
|
+
from datetime import datetime, timedelta
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
from sqlalchemy import and_, func, or_, select
|
|
20
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
21
|
+
|
|
22
|
+
from ..db import (
|
|
23
|
+
AnomalyDetection,
|
|
24
|
+
BaseRepository,
|
|
25
|
+
DriftAlert,
|
|
26
|
+
DriftMonitor,
|
|
27
|
+
ModelAlert,
|
|
28
|
+
MonitoredModel,
|
|
29
|
+
Source,
|
|
30
|
+
Validation,
|
|
31
|
+
)
|
|
32
|
+
from ..schemas.unified_alerts import (
|
|
33
|
+
AlertCorrelation,
|
|
34
|
+
AlertCountBySource,
|
|
35
|
+
AlertCountBySeverity,
|
|
36
|
+
AlertCountByStatus,
|
|
37
|
+
AlertSeverity,
|
|
38
|
+
AlertSource,
|
|
39
|
+
AlertStatus,
|
|
40
|
+
AlertSummary,
|
|
41
|
+
AlertTrendPoint,
|
|
42
|
+
UnifiedAlertResponse,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# =============================================================================
|
|
47
|
+
# Helper functions
|
|
48
|
+
# =============================================================================
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _generate_alert_id(source: AlertSource, source_id: str) -> str:
|
|
52
|
+
"""Generate a unified alert ID from source info."""
|
|
53
|
+
return f"{source.value}:{source_id}"
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _parse_alert_id(unified_id: str) -> tuple[AlertSource, str]:
|
|
57
|
+
"""Parse a unified alert ID into source and original ID."""
|
|
58
|
+
parts = unified_id.split(":", 1)
|
|
59
|
+
if len(parts) != 2:
|
|
60
|
+
raise ValueError(f"Invalid unified alert ID: {unified_id}")
|
|
61
|
+
return AlertSource(parts[0]), parts[1]
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _map_model_severity(severity: str) -> AlertSeverity:
|
|
65
|
+
"""Map model monitoring severity to unified severity."""
|
|
66
|
+
mapping = {
|
|
67
|
+
"critical": AlertSeverity.CRITICAL,
|
|
68
|
+
"warning": AlertSeverity.HIGH,
|
|
69
|
+
"info": AlertSeverity.INFO,
|
|
70
|
+
}
|
|
71
|
+
return mapping.get(severity.lower(), AlertSeverity.MEDIUM)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _map_drift_severity(severity: str) -> AlertSeverity:
|
|
75
|
+
"""Map drift severity to unified severity."""
|
|
76
|
+
mapping = {
|
|
77
|
+
"critical": AlertSeverity.CRITICAL,
|
|
78
|
+
"high": AlertSeverity.HIGH,
|
|
79
|
+
"medium": AlertSeverity.MEDIUM,
|
|
80
|
+
"low": AlertSeverity.LOW,
|
|
81
|
+
"info": AlertSeverity.INFO,
|
|
82
|
+
}
|
|
83
|
+
return mapping.get(severity.lower(), AlertSeverity.MEDIUM)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _map_drift_status(status: str) -> AlertStatus:
|
|
87
|
+
"""Map drift status to unified status."""
|
|
88
|
+
mapping = {
|
|
89
|
+
"open": AlertStatus.OPEN,
|
|
90
|
+
"acknowledged": AlertStatus.ACKNOWLEDGED,
|
|
91
|
+
"resolved": AlertStatus.RESOLVED,
|
|
92
|
+
"ignored": AlertStatus.IGNORED,
|
|
93
|
+
}
|
|
94
|
+
return mapping.get(status.lower(), AlertStatus.OPEN)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
# =============================================================================
|
|
98
|
+
# Unified Alerts Service
|
|
99
|
+
# =============================================================================
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class UnifiedAlertsService:
|
|
103
|
+
"""Service for unified alert management.
|
|
104
|
+
|
|
105
|
+
Aggregates alerts from multiple sources and provides
|
|
106
|
+
a unified interface for querying and managing them.
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
def __init__(self, session: AsyncSession) -> None:
|
|
110
|
+
"""Initialize the service.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
session: Database session.
|
|
114
|
+
"""
|
|
115
|
+
self.session = session
|
|
116
|
+
|
|
117
|
+
async def get_all_alerts(
|
|
118
|
+
self,
|
|
119
|
+
*,
|
|
120
|
+
source: AlertSource | None = None,
|
|
121
|
+
severity: AlertSeverity | None = None,
|
|
122
|
+
status: AlertStatus | None = None,
|
|
123
|
+
source_name: str | None = None,
|
|
124
|
+
time_range_hours: int | None = 24,
|
|
125
|
+
offset: int = 0,
|
|
126
|
+
limit: int = 50,
|
|
127
|
+
) -> tuple[list[UnifiedAlertResponse], int]:
|
|
128
|
+
"""Get all alerts from all sources.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
source: Filter by alert source.
|
|
132
|
+
severity: Filter by severity.
|
|
133
|
+
status: Filter by status.
|
|
134
|
+
source_name: Filter by source name (partial match).
|
|
135
|
+
time_range_hours: Filter by time range.
|
|
136
|
+
offset: Pagination offset.
|
|
137
|
+
limit: Pagination limit.
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
Tuple of (alerts, total count).
|
|
141
|
+
"""
|
|
142
|
+
alerts: list[UnifiedAlertResponse] = []
|
|
143
|
+
cutoff = None
|
|
144
|
+
if time_range_hours:
|
|
145
|
+
cutoff = datetime.utcnow() - timedelta(hours=time_range_hours)
|
|
146
|
+
|
|
147
|
+
# Gather alerts from all sources
|
|
148
|
+
if source is None or source == AlertSource.MODEL:
|
|
149
|
+
model_alerts = await self._get_model_alerts(
|
|
150
|
+
severity=severity,
|
|
151
|
+
status=status,
|
|
152
|
+
cutoff=cutoff,
|
|
153
|
+
)
|
|
154
|
+
alerts.extend(model_alerts)
|
|
155
|
+
|
|
156
|
+
if source is None or source == AlertSource.DRIFT:
|
|
157
|
+
drift_alerts = await self._get_drift_alerts(
|
|
158
|
+
severity=severity,
|
|
159
|
+
status=status,
|
|
160
|
+
cutoff=cutoff,
|
|
161
|
+
)
|
|
162
|
+
alerts.extend(drift_alerts)
|
|
163
|
+
|
|
164
|
+
if source is None or source == AlertSource.ANOMALY:
|
|
165
|
+
anomaly_alerts = await self._get_anomaly_alerts(
|
|
166
|
+
severity=severity,
|
|
167
|
+
status=status,
|
|
168
|
+
cutoff=cutoff,
|
|
169
|
+
)
|
|
170
|
+
alerts.extend(anomaly_alerts)
|
|
171
|
+
|
|
172
|
+
if source is None or source == AlertSource.VALIDATION:
|
|
173
|
+
validation_alerts = await self._get_validation_alerts(
|
|
174
|
+
severity=severity,
|
|
175
|
+
status=status,
|
|
176
|
+
cutoff=cutoff,
|
|
177
|
+
)
|
|
178
|
+
alerts.extend(validation_alerts)
|
|
179
|
+
|
|
180
|
+
# Filter by source name if provided
|
|
181
|
+
if source_name:
|
|
182
|
+
source_name_lower = source_name.lower()
|
|
183
|
+
alerts = [a for a in alerts if source_name_lower in a.source_name.lower()]
|
|
184
|
+
|
|
185
|
+
# Sort by created_at descending
|
|
186
|
+
alerts.sort(key=lambda a: a.created_at, reverse=True)
|
|
187
|
+
|
|
188
|
+
# Get total and paginate
|
|
189
|
+
total = len(alerts)
|
|
190
|
+
paginated = alerts[offset : offset + limit]
|
|
191
|
+
|
|
192
|
+
return paginated, total
|
|
193
|
+
|
|
194
|
+
async def get_alert_by_id(self, alert_id: str) -> UnifiedAlertResponse | None:
|
|
195
|
+
"""Get a specific unified alert by ID.
|
|
196
|
+
|
|
197
|
+
Args:
|
|
198
|
+
alert_id: Unified alert ID.
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
Alert if found, None otherwise.
|
|
202
|
+
"""
|
|
203
|
+
try:
|
|
204
|
+
source, source_id = _parse_alert_id(alert_id)
|
|
205
|
+
except ValueError:
|
|
206
|
+
return None
|
|
207
|
+
|
|
208
|
+
if source == AlertSource.MODEL:
|
|
209
|
+
return await self._get_single_model_alert(source_id)
|
|
210
|
+
elif source == AlertSource.DRIFT:
|
|
211
|
+
return await self._get_single_drift_alert(source_id)
|
|
212
|
+
elif source == AlertSource.ANOMALY:
|
|
213
|
+
return await self._get_single_anomaly_alert(source_id)
|
|
214
|
+
elif source == AlertSource.VALIDATION:
|
|
215
|
+
return await self._get_single_validation_alert(source_id)
|
|
216
|
+
|
|
217
|
+
return None
|
|
218
|
+
|
|
219
|
+
async def get_alert_summary(self, time_range_hours: int = 24) -> AlertSummary:
|
|
220
|
+
"""Get summary statistics for alerts.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
time_range_hours: Time range for summary.
|
|
224
|
+
|
|
225
|
+
Returns:
|
|
226
|
+
Alert summary.
|
|
227
|
+
"""
|
|
228
|
+
alerts, total = await self.get_all_alerts(
|
|
229
|
+
time_range_hours=time_range_hours,
|
|
230
|
+
limit=10000, # Get all for stats
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
# Count by severity
|
|
234
|
+
by_severity = AlertCountBySeverity()
|
|
235
|
+
for alert in alerts:
|
|
236
|
+
if alert.severity == AlertSeverity.CRITICAL:
|
|
237
|
+
by_severity.critical += 1
|
|
238
|
+
elif alert.severity == AlertSeverity.HIGH:
|
|
239
|
+
by_severity.high += 1
|
|
240
|
+
elif alert.severity == AlertSeverity.MEDIUM:
|
|
241
|
+
by_severity.medium += 1
|
|
242
|
+
elif alert.severity == AlertSeverity.LOW:
|
|
243
|
+
by_severity.low += 1
|
|
244
|
+
elif alert.severity == AlertSeverity.INFO:
|
|
245
|
+
by_severity.info += 1
|
|
246
|
+
|
|
247
|
+
# Count by source
|
|
248
|
+
by_source = AlertCountBySource()
|
|
249
|
+
for alert in alerts:
|
|
250
|
+
if alert.source == AlertSource.MODEL:
|
|
251
|
+
by_source.model += 1
|
|
252
|
+
elif alert.source == AlertSource.DRIFT:
|
|
253
|
+
by_source.drift += 1
|
|
254
|
+
elif alert.source == AlertSource.ANOMALY:
|
|
255
|
+
by_source.anomaly += 1
|
|
256
|
+
elif alert.source == AlertSource.VALIDATION:
|
|
257
|
+
by_source.validation += 1
|
|
258
|
+
|
|
259
|
+
# Count by status
|
|
260
|
+
by_status = AlertCountByStatus()
|
|
261
|
+
active_count = 0
|
|
262
|
+
for alert in alerts:
|
|
263
|
+
if alert.status == AlertStatus.OPEN:
|
|
264
|
+
by_status.open += 1
|
|
265
|
+
active_count += 1
|
|
266
|
+
elif alert.status == AlertStatus.ACKNOWLEDGED:
|
|
267
|
+
by_status.acknowledged += 1
|
|
268
|
+
active_count += 1
|
|
269
|
+
elif alert.status == AlertStatus.RESOLVED:
|
|
270
|
+
by_status.resolved += 1
|
|
271
|
+
elif alert.status == AlertStatus.IGNORED:
|
|
272
|
+
by_status.ignored += 1
|
|
273
|
+
|
|
274
|
+
# Generate trend data (hourly for last 24h)
|
|
275
|
+
trend = []
|
|
276
|
+
now = datetime.utcnow()
|
|
277
|
+
for hours_ago in range(24, -1, -1):
|
|
278
|
+
point_time = now - timedelta(hours=hours_ago)
|
|
279
|
+
point_start = point_time.replace(minute=0, second=0, microsecond=0)
|
|
280
|
+
point_end = point_start + timedelta(hours=1)
|
|
281
|
+
count = sum(
|
|
282
|
+
1
|
|
283
|
+
for a in alerts
|
|
284
|
+
if point_start <= a.created_at < point_end
|
|
285
|
+
)
|
|
286
|
+
trend.append(AlertTrendPoint(timestamp=point_start, count=count))
|
|
287
|
+
|
|
288
|
+
# Top sources with most alerts
|
|
289
|
+
source_counts: dict[str, int] = {}
|
|
290
|
+
for alert in alerts:
|
|
291
|
+
key = alert.source_name
|
|
292
|
+
source_counts[key] = source_counts.get(key, 0) + 1
|
|
293
|
+
|
|
294
|
+
top_sources = sorted(
|
|
295
|
+
[{"name": k, "count": v} for k, v in source_counts.items()],
|
|
296
|
+
key=lambda x: x["count"],
|
|
297
|
+
reverse=True,
|
|
298
|
+
)[:5]
|
|
299
|
+
|
|
300
|
+
return AlertSummary(
|
|
301
|
+
total_alerts=total,
|
|
302
|
+
active_alerts=active_count,
|
|
303
|
+
by_severity=by_severity,
|
|
304
|
+
by_source=by_source,
|
|
305
|
+
by_status=by_status,
|
|
306
|
+
trend_24h=trend,
|
|
307
|
+
top_sources=top_sources,
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
async def acknowledge_alert(
|
|
311
|
+
self,
|
|
312
|
+
alert_id: str,
|
|
313
|
+
actor: str,
|
|
314
|
+
message: str = "",
|
|
315
|
+
) -> UnifiedAlertResponse | None:
|
|
316
|
+
"""Acknowledge an alert.
|
|
317
|
+
|
|
318
|
+
Args:
|
|
319
|
+
alert_id: Unified alert ID.
|
|
320
|
+
actor: Who is acknowledging.
|
|
321
|
+
message: Optional message.
|
|
322
|
+
|
|
323
|
+
Returns:
|
|
324
|
+
Updated alert if found.
|
|
325
|
+
"""
|
|
326
|
+
try:
|
|
327
|
+
source, source_id = _parse_alert_id(alert_id)
|
|
328
|
+
except ValueError:
|
|
329
|
+
return None
|
|
330
|
+
|
|
331
|
+
now = datetime.utcnow()
|
|
332
|
+
|
|
333
|
+
if source == AlertSource.MODEL:
|
|
334
|
+
result = await self.session.execute(
|
|
335
|
+
select(ModelAlert).where(ModelAlert.id == source_id)
|
|
336
|
+
)
|
|
337
|
+
alert = result.scalar_one_or_none()
|
|
338
|
+
if alert:
|
|
339
|
+
alert.acknowledged = True
|
|
340
|
+
alert.acknowledged_by = actor
|
|
341
|
+
alert.acknowledged_at = now
|
|
342
|
+
await self.session.commit()
|
|
343
|
+
return await self._get_single_model_alert(source_id)
|
|
344
|
+
|
|
345
|
+
elif source == AlertSource.DRIFT:
|
|
346
|
+
result = await self.session.execute(
|
|
347
|
+
select(DriftAlert).where(DriftAlert.id == source_id)
|
|
348
|
+
)
|
|
349
|
+
alert = result.scalar_one_or_none()
|
|
350
|
+
if alert:
|
|
351
|
+
alert.status = "acknowledged"
|
|
352
|
+
alert.acknowledged_by = actor
|
|
353
|
+
alert.acknowledged_at = now
|
|
354
|
+
if message:
|
|
355
|
+
alert.notes = message
|
|
356
|
+
await self.session.commit()
|
|
357
|
+
return await self._get_single_drift_alert(source_id)
|
|
358
|
+
|
|
359
|
+
elif source == AlertSource.VALIDATION:
|
|
360
|
+
# Validation alerts are read-only status derived from results
|
|
361
|
+
return await self._get_single_validation_alert(source_id)
|
|
362
|
+
|
|
363
|
+
elif source == AlertSource.ANOMALY:
|
|
364
|
+
# Anomaly alerts are derived from detection results (read-only)
|
|
365
|
+
return await self._get_single_anomaly_alert(source_id)
|
|
366
|
+
|
|
367
|
+
return None
|
|
368
|
+
|
|
369
|
+
async def resolve_alert(
|
|
370
|
+
self,
|
|
371
|
+
alert_id: str,
|
|
372
|
+
actor: str = "",
|
|
373
|
+
message: str = "",
|
|
374
|
+
) -> UnifiedAlertResponse | None:
|
|
375
|
+
"""Resolve an alert.
|
|
376
|
+
|
|
377
|
+
Args:
|
|
378
|
+
alert_id: Unified alert ID.
|
|
379
|
+
actor: Who is resolving.
|
|
380
|
+
message: Optional resolution message.
|
|
381
|
+
|
|
382
|
+
Returns:
|
|
383
|
+
Updated alert if found.
|
|
384
|
+
"""
|
|
385
|
+
try:
|
|
386
|
+
source, source_id = _parse_alert_id(alert_id)
|
|
387
|
+
except ValueError:
|
|
388
|
+
return None
|
|
389
|
+
|
|
390
|
+
now = datetime.utcnow()
|
|
391
|
+
|
|
392
|
+
if source == AlertSource.MODEL:
|
|
393
|
+
result = await self.session.execute(
|
|
394
|
+
select(ModelAlert).where(ModelAlert.id == source_id)
|
|
395
|
+
)
|
|
396
|
+
alert = result.scalar_one_or_none()
|
|
397
|
+
if alert:
|
|
398
|
+
alert.resolved = True
|
|
399
|
+
alert.resolved_at = now
|
|
400
|
+
await self.session.commit()
|
|
401
|
+
return await self._get_single_model_alert(source_id)
|
|
402
|
+
|
|
403
|
+
elif source == AlertSource.DRIFT:
|
|
404
|
+
result = await self.session.execute(
|
|
405
|
+
select(DriftAlert).where(DriftAlert.id == source_id)
|
|
406
|
+
)
|
|
407
|
+
alert = result.scalar_one_or_none()
|
|
408
|
+
if alert:
|
|
409
|
+
alert.status = "resolved"
|
|
410
|
+
alert.resolved_at = now
|
|
411
|
+
if message:
|
|
412
|
+
alert.notes = message
|
|
413
|
+
await self.session.commit()
|
|
414
|
+
return await self._get_single_drift_alert(source_id)
|
|
415
|
+
|
|
416
|
+
# Validation and anomaly alerts are derived/read-only
|
|
417
|
+
return None
|
|
418
|
+
|
|
419
|
+
async def bulk_acknowledge(
|
|
420
|
+
self,
|
|
421
|
+
alert_ids: list[str],
|
|
422
|
+
actor: str,
|
|
423
|
+
message: str = "",
|
|
424
|
+
) -> tuple[int, int, list[str]]:
|
|
425
|
+
"""Bulk acknowledge alerts.
|
|
426
|
+
|
|
427
|
+
Args:
|
|
428
|
+
alert_ids: List of alert IDs.
|
|
429
|
+
actor: Who is acknowledging.
|
|
430
|
+
message: Optional message.
|
|
431
|
+
|
|
432
|
+
Returns:
|
|
433
|
+
Tuple of (success_count, failed_count, failed_ids).
|
|
434
|
+
"""
|
|
435
|
+
success = 0
|
|
436
|
+
failed_ids = []
|
|
437
|
+
|
|
438
|
+
for alert_id in alert_ids:
|
|
439
|
+
result = await self.acknowledge_alert(alert_id, actor, message)
|
|
440
|
+
if result:
|
|
441
|
+
success += 1
|
|
442
|
+
else:
|
|
443
|
+
failed_ids.append(alert_id)
|
|
444
|
+
|
|
445
|
+
return success, len(failed_ids), failed_ids
|
|
446
|
+
|
|
447
|
+
async def bulk_resolve(
|
|
448
|
+
self,
|
|
449
|
+
alert_ids: list[str],
|
|
450
|
+
actor: str,
|
|
451
|
+
message: str = "",
|
|
452
|
+
) -> tuple[int, int, list[str]]:
|
|
453
|
+
"""Bulk resolve alerts.
|
|
454
|
+
|
|
455
|
+
Args:
|
|
456
|
+
alert_ids: List of alert IDs.
|
|
457
|
+
actor: Who is resolving.
|
|
458
|
+
message: Optional message.
|
|
459
|
+
|
|
460
|
+
Returns:
|
|
461
|
+
Tuple of (success_count, failed_count, failed_ids).
|
|
462
|
+
"""
|
|
463
|
+
success = 0
|
|
464
|
+
failed_ids = []
|
|
465
|
+
|
|
466
|
+
for alert_id in alert_ids:
|
|
467
|
+
result = await self.resolve_alert(alert_id, actor, message)
|
|
468
|
+
if result:
|
|
469
|
+
success += 1
|
|
470
|
+
else:
|
|
471
|
+
failed_ids.append(alert_id)
|
|
472
|
+
|
|
473
|
+
return success, len(failed_ids), failed_ids
|
|
474
|
+
|
|
475
|
+
async def get_alert_correlations(
|
|
476
|
+
self,
|
|
477
|
+
alert_id: str,
|
|
478
|
+
time_window_hours: int = 1,
|
|
479
|
+
) -> list[AlertCorrelation]:
|
|
480
|
+
"""Get correlated alerts for a given alert.
|
|
481
|
+
|
|
482
|
+
Looks for alerts from:
|
|
483
|
+
- Same source (data source, model)
|
|
484
|
+
- Similar time frame
|
|
485
|
+
|
|
486
|
+
Args:
|
|
487
|
+
alert_id: Alert to find correlations for.
|
|
488
|
+
time_window_hours: Time window for correlation.
|
|
489
|
+
|
|
490
|
+
Returns:
|
|
491
|
+
List of correlations.
|
|
492
|
+
"""
|
|
493
|
+
alert = await self.get_alert_by_id(alert_id)
|
|
494
|
+
if not alert:
|
|
495
|
+
return []
|
|
496
|
+
|
|
497
|
+
correlations = []
|
|
498
|
+
|
|
499
|
+
# Time window for correlation
|
|
500
|
+
time_start = alert.created_at - timedelta(hours=time_window_hours)
|
|
501
|
+
time_end = alert.created_at + timedelta(hours=time_window_hours)
|
|
502
|
+
|
|
503
|
+
# Get all alerts in time window
|
|
504
|
+
all_alerts, _ = await self.get_all_alerts(
|
|
505
|
+
time_range_hours=time_window_hours * 2 + 24, # Generous window
|
|
506
|
+
limit=1000,
|
|
507
|
+
)
|
|
508
|
+
|
|
509
|
+
# Filter to time window and exclude self
|
|
510
|
+
window_alerts = [
|
|
511
|
+
a for a in all_alerts
|
|
512
|
+
if time_start <= a.created_at <= time_end and a.id != alert_id
|
|
513
|
+
]
|
|
514
|
+
|
|
515
|
+
# Group by source name (same data source/model)
|
|
516
|
+
same_source_alerts = [
|
|
517
|
+
a for a in window_alerts
|
|
518
|
+
if a.source_name == alert.source_name
|
|
519
|
+
]
|
|
520
|
+
if same_source_alerts:
|
|
521
|
+
correlations.append(
|
|
522
|
+
AlertCorrelation(
|
|
523
|
+
alert_id=alert_id,
|
|
524
|
+
related_alerts=same_source_alerts[:10], # Limit
|
|
525
|
+
correlation_type="same_source",
|
|
526
|
+
correlation_score=0.9,
|
|
527
|
+
common_factors=[f"Same source: {alert.source_name}"],
|
|
528
|
+
)
|
|
529
|
+
)
|
|
530
|
+
|
|
531
|
+
# Group by similar severity in time window
|
|
532
|
+
same_severity_alerts = [
|
|
533
|
+
a for a in window_alerts
|
|
534
|
+
if a.severity == alert.severity and a not in same_source_alerts
|
|
535
|
+
]
|
|
536
|
+
if same_severity_alerts:
|
|
537
|
+
correlations.append(
|
|
538
|
+
AlertCorrelation(
|
|
539
|
+
alert_id=alert_id,
|
|
540
|
+
related_alerts=same_severity_alerts[:10],
|
|
541
|
+
correlation_type="temporal_severity",
|
|
542
|
+
correlation_score=0.6,
|
|
543
|
+
common_factors=[
|
|
544
|
+
f"Same severity: {alert.severity.value}",
|
|
545
|
+
f"Within {time_window_hours}h window",
|
|
546
|
+
],
|
|
547
|
+
)
|
|
548
|
+
)
|
|
549
|
+
|
|
550
|
+
return correlations
|
|
551
|
+
|
|
552
|
+
# =========================================================================
|
|
553
|
+
# Private methods for fetching from each source
|
|
554
|
+
# =========================================================================
|
|
555
|
+
|
|
556
|
+
async def _get_model_alerts(
|
|
557
|
+
self,
|
|
558
|
+
severity: AlertSeverity | None = None,
|
|
559
|
+
status: AlertStatus | None = None,
|
|
560
|
+
cutoff: datetime | None = None,
|
|
561
|
+
) -> list[UnifiedAlertResponse]:
|
|
562
|
+
"""Get alerts from model monitoring."""
|
|
563
|
+
filters = []
|
|
564
|
+
if cutoff:
|
|
565
|
+
filters.append(ModelAlert.created_at >= cutoff)
|
|
566
|
+
if status:
|
|
567
|
+
if status == AlertStatus.OPEN:
|
|
568
|
+
filters.append(ModelAlert.acknowledged == False)
|
|
569
|
+
filters.append(ModelAlert.resolved == False)
|
|
570
|
+
elif status == AlertStatus.ACKNOWLEDGED:
|
|
571
|
+
filters.append(ModelAlert.acknowledged == True)
|
|
572
|
+
filters.append(ModelAlert.resolved == False)
|
|
573
|
+
elif status == AlertStatus.RESOLVED:
|
|
574
|
+
filters.append(ModelAlert.resolved == True)
|
|
575
|
+
|
|
576
|
+
query = select(ModelAlert)
|
|
577
|
+
if filters:
|
|
578
|
+
query = query.where(and_(*filters))
|
|
579
|
+
|
|
580
|
+
result = await self.session.execute(query)
|
|
581
|
+
alerts = result.scalars().all()
|
|
582
|
+
|
|
583
|
+
# Get model names
|
|
584
|
+
model_ids = {a.model_id for a in alerts}
|
|
585
|
+
model_names = {}
|
|
586
|
+
if model_ids:
|
|
587
|
+
models_result = await self.session.execute(
|
|
588
|
+
select(MonitoredModel).where(MonitoredModel.id.in_(model_ids))
|
|
589
|
+
)
|
|
590
|
+
for model in models_result.scalars():
|
|
591
|
+
model_names[model.id] = model.name
|
|
592
|
+
|
|
593
|
+
unified = []
|
|
594
|
+
for alert in alerts:
|
|
595
|
+
alert_severity = _map_model_severity(alert.severity)
|
|
596
|
+
if severity and alert_severity != severity:
|
|
597
|
+
continue
|
|
598
|
+
|
|
599
|
+
alert_status = AlertStatus.OPEN
|
|
600
|
+
if alert.resolved:
|
|
601
|
+
alert_status = AlertStatus.RESOLVED
|
|
602
|
+
elif alert.acknowledged:
|
|
603
|
+
alert_status = AlertStatus.ACKNOWLEDGED
|
|
604
|
+
|
|
605
|
+
unified.append(
|
|
606
|
+
UnifiedAlertResponse(
|
|
607
|
+
id=_generate_alert_id(AlertSource.MODEL, alert.id),
|
|
608
|
+
source=AlertSource.MODEL,
|
|
609
|
+
source_id=alert.id,
|
|
610
|
+
source_name=model_names.get(alert.model_id, "Unknown Model"),
|
|
611
|
+
severity=alert_severity,
|
|
612
|
+
status=alert_status,
|
|
613
|
+
title=f"Model Alert: {alert.message[:50]}",
|
|
614
|
+
message=alert.message,
|
|
615
|
+
details={
|
|
616
|
+
"rule_id": alert.rule_id,
|
|
617
|
+
"metric_value": alert.metric_value,
|
|
618
|
+
"threshold_value": alert.threshold_value,
|
|
619
|
+
},
|
|
620
|
+
acknowledged_at=alert.acknowledged_at,
|
|
621
|
+
acknowledged_by=alert.acknowledged_by,
|
|
622
|
+
resolved_at=alert.resolved_at,
|
|
623
|
+
created_at=alert.created_at,
|
|
624
|
+
updated_at=alert.updated_at,
|
|
625
|
+
)
|
|
626
|
+
)
|
|
627
|
+
|
|
628
|
+
return unified
|
|
629
|
+
|
|
630
|
+
async def _get_drift_alerts(
|
|
631
|
+
self,
|
|
632
|
+
severity: AlertSeverity | None = None,
|
|
633
|
+
status: AlertStatus | None = None,
|
|
634
|
+
cutoff: datetime | None = None,
|
|
635
|
+
) -> list[UnifiedAlertResponse]:
|
|
636
|
+
"""Get alerts from drift monitoring."""
|
|
637
|
+
filters = []
|
|
638
|
+
if cutoff:
|
|
639
|
+
filters.append(DriftAlert.created_at >= cutoff)
|
|
640
|
+
if status:
|
|
641
|
+
filters.append(DriftAlert.status == status.value)
|
|
642
|
+
|
|
643
|
+
query = select(DriftAlert)
|
|
644
|
+
if filters:
|
|
645
|
+
query = query.where(and_(*filters))
|
|
646
|
+
|
|
647
|
+
result = await self.session.execute(query)
|
|
648
|
+
alerts = result.scalars().all()
|
|
649
|
+
|
|
650
|
+
# Get monitor names
|
|
651
|
+
monitor_ids = {a.monitor_id for a in alerts}
|
|
652
|
+
monitor_names = {}
|
|
653
|
+
if monitor_ids:
|
|
654
|
+
monitors_result = await self.session.execute(
|
|
655
|
+
select(DriftMonitor).where(DriftMonitor.id.in_(monitor_ids))
|
|
656
|
+
)
|
|
657
|
+
for monitor in monitors_result.scalars():
|
|
658
|
+
monitor_names[monitor.id] = monitor.name
|
|
659
|
+
|
|
660
|
+
unified = []
|
|
661
|
+
for alert in alerts:
|
|
662
|
+
alert_severity = _map_drift_severity(alert.severity)
|
|
663
|
+
if severity and alert_severity != severity:
|
|
664
|
+
continue
|
|
665
|
+
|
|
666
|
+
unified.append(
|
|
667
|
+
UnifiedAlertResponse(
|
|
668
|
+
id=_generate_alert_id(AlertSource.DRIFT, alert.id),
|
|
669
|
+
source=AlertSource.DRIFT,
|
|
670
|
+
source_id=alert.id,
|
|
671
|
+
source_name=monitor_names.get(alert.monitor_id, "Unknown Monitor"),
|
|
672
|
+
severity=alert_severity,
|
|
673
|
+
status=_map_drift_status(alert.status),
|
|
674
|
+
title=f"Drift Alert: {alert.drift_percentage:.1f}% drift detected",
|
|
675
|
+
message=alert.message,
|
|
676
|
+
details={
|
|
677
|
+
"comparison_id": alert.comparison_id,
|
|
678
|
+
"drift_percentage": alert.drift_percentage,
|
|
679
|
+
"drifted_columns": alert.drifted_columns,
|
|
680
|
+
},
|
|
681
|
+
acknowledged_at=alert.acknowledged_at,
|
|
682
|
+
acknowledged_by=alert.acknowledged_by,
|
|
683
|
+
resolved_at=alert.resolved_at,
|
|
684
|
+
created_at=alert.created_at,
|
|
685
|
+
updated_at=alert.updated_at,
|
|
686
|
+
)
|
|
687
|
+
)
|
|
688
|
+
|
|
689
|
+
return unified
|
|
690
|
+
|
|
691
|
+
async def _get_anomaly_alerts(
|
|
692
|
+
self,
|
|
693
|
+
severity: AlertSeverity | None = None,
|
|
694
|
+
status: AlertStatus | None = None,
|
|
695
|
+
cutoff: datetime | None = None,
|
|
696
|
+
) -> list[UnifiedAlertResponse]:
|
|
697
|
+
"""Get alerts derived from anomaly detection results.
|
|
698
|
+
|
|
699
|
+
Creates alerts when anomaly rate exceeds thresholds.
|
|
700
|
+
"""
|
|
701
|
+
filters = [AnomalyDetection.status == "success"]
|
|
702
|
+
if cutoff:
|
|
703
|
+
filters.append(AnomalyDetection.created_at >= cutoff)
|
|
704
|
+
|
|
705
|
+
query = select(AnomalyDetection).where(and_(*filters))
|
|
706
|
+
result = await self.session.execute(query)
|
|
707
|
+
detections = result.scalars().all()
|
|
708
|
+
|
|
709
|
+
# Get source names
|
|
710
|
+
source_ids = {d.source_id for d in detections}
|
|
711
|
+
source_names = {}
|
|
712
|
+
if source_ids:
|
|
713
|
+
sources_result = await self.session.execute(
|
|
714
|
+
select(Source).where(Source.id.in_(source_ids))
|
|
715
|
+
)
|
|
716
|
+
for source in sources_result.scalars():
|
|
717
|
+
source_names[source.id] = source.name
|
|
718
|
+
|
|
719
|
+
unified = []
|
|
720
|
+
for detection in detections:
|
|
721
|
+
# Only create alerts for high anomaly rates
|
|
722
|
+
if not detection.anomaly_rate or detection.anomaly_rate < 0.1:
|
|
723
|
+
continue
|
|
724
|
+
|
|
725
|
+
# Determine severity based on anomaly rate
|
|
726
|
+
if detection.anomaly_rate >= 0.3:
|
|
727
|
+
alert_severity = AlertSeverity.CRITICAL
|
|
728
|
+
elif detection.anomaly_rate >= 0.2:
|
|
729
|
+
alert_severity = AlertSeverity.HIGH
|
|
730
|
+
elif detection.anomaly_rate >= 0.1:
|
|
731
|
+
alert_severity = AlertSeverity.MEDIUM
|
|
732
|
+
else:
|
|
733
|
+
continue
|
|
734
|
+
|
|
735
|
+
if severity and alert_severity != severity:
|
|
736
|
+
continue
|
|
737
|
+
|
|
738
|
+
# Anomaly alerts are always open (derived state)
|
|
739
|
+
if status and status != AlertStatus.OPEN:
|
|
740
|
+
continue
|
|
741
|
+
|
|
742
|
+
pct = detection.anomaly_rate * 100
|
|
743
|
+
unified.append(
|
|
744
|
+
UnifiedAlertResponse(
|
|
745
|
+
id=_generate_alert_id(AlertSource.ANOMALY, detection.id),
|
|
746
|
+
source=AlertSource.ANOMALY,
|
|
747
|
+
source_id=detection.id,
|
|
748
|
+
source_name=source_names.get(detection.source_id, "Unknown Source"),
|
|
749
|
+
severity=alert_severity,
|
|
750
|
+
status=AlertStatus.OPEN,
|
|
751
|
+
title=f"High Anomaly Rate: {pct:.1f}%",
|
|
752
|
+
message=f"Anomaly detection found {detection.anomaly_count} anomalies "
|
|
753
|
+
f"out of {detection.total_rows} rows ({pct:.1f}% rate) "
|
|
754
|
+
f"using {detection.algorithm} algorithm.",
|
|
755
|
+
details={
|
|
756
|
+
"algorithm": detection.algorithm,
|
|
757
|
+
"anomaly_count": detection.anomaly_count,
|
|
758
|
+
"total_rows": detection.total_rows,
|
|
759
|
+
"anomaly_rate": detection.anomaly_rate,
|
|
760
|
+
"columns_analyzed": detection.columns_analyzed,
|
|
761
|
+
},
|
|
762
|
+
created_at=detection.created_at,
|
|
763
|
+
updated_at=detection.updated_at,
|
|
764
|
+
)
|
|
765
|
+
)
|
|
766
|
+
|
|
767
|
+
return unified
|
|
768
|
+
|
|
769
|
+
async def _get_validation_alerts(
|
|
770
|
+
self,
|
|
771
|
+
severity: AlertSeverity | None = None,
|
|
772
|
+
status: AlertStatus | None = None,
|
|
773
|
+
cutoff: datetime | None = None,
|
|
774
|
+
) -> list[UnifiedAlertResponse]:
|
|
775
|
+
"""Get alerts from validation failures."""
|
|
776
|
+
filters = [Validation.status == "failed"]
|
|
777
|
+
if cutoff:
|
|
778
|
+
filters.append(Validation.created_at >= cutoff)
|
|
779
|
+
|
|
780
|
+
query = select(Validation).where(and_(*filters))
|
|
781
|
+
result = await self.session.execute(query)
|
|
782
|
+
validations = result.scalars().all()
|
|
783
|
+
|
|
784
|
+
# Get source names
|
|
785
|
+
source_ids = {v.source_id for v in validations}
|
|
786
|
+
source_names = {}
|
|
787
|
+
if source_ids:
|
|
788
|
+
sources_result = await self.session.execute(
|
|
789
|
+
select(Source).where(Source.id.in_(source_ids))
|
|
790
|
+
)
|
|
791
|
+
for source in sources_result.scalars():
|
|
792
|
+
source_names[source.id] = source.name
|
|
793
|
+
|
|
794
|
+
unified = []
|
|
795
|
+
for validation in validations:
|
|
796
|
+
# Determine severity based on critical/high issue counts
|
|
797
|
+
issues = validation.issues or []
|
|
798
|
+
critical_count = sum(1 for i in issues if i.get("severity") == "critical")
|
|
799
|
+
high_count = sum(1 for i in issues if i.get("severity") == "high")
|
|
800
|
+
|
|
801
|
+
if critical_count > 0:
|
|
802
|
+
alert_severity = AlertSeverity.CRITICAL
|
|
803
|
+
elif high_count > 0:
|
|
804
|
+
alert_severity = AlertSeverity.HIGH
|
|
805
|
+
else:
|
|
806
|
+
alert_severity = AlertSeverity.MEDIUM
|
|
807
|
+
|
|
808
|
+
if severity and alert_severity != severity:
|
|
809
|
+
continue
|
|
810
|
+
|
|
811
|
+
# Validation alerts are always open (derived state)
|
|
812
|
+
if status and status != AlertStatus.OPEN:
|
|
813
|
+
continue
|
|
814
|
+
|
|
815
|
+
unified.append(
|
|
816
|
+
UnifiedAlertResponse(
|
|
817
|
+
id=_generate_alert_id(AlertSource.VALIDATION, validation.id),
|
|
818
|
+
source=AlertSource.VALIDATION,
|
|
819
|
+
source_id=validation.id,
|
|
820
|
+
source_name=source_names.get(validation.source_id, "Unknown Source"),
|
|
821
|
+
severity=alert_severity,
|
|
822
|
+
status=AlertStatus.OPEN,
|
|
823
|
+
title=f"Validation Failed: {len(issues)} issues",
|
|
824
|
+
message=f"Validation failed with {critical_count} critical, "
|
|
825
|
+
f"{high_count} high severity issues. "
|
|
826
|
+
f"Pass rate: {validation.pass_rate:.1f}%",
|
|
827
|
+
details={
|
|
828
|
+
"pass_rate": validation.pass_rate,
|
|
829
|
+
"total_issues": len(issues),
|
|
830
|
+
"critical_issues": critical_count,
|
|
831
|
+
"high_issues": high_count,
|
|
832
|
+
},
|
|
833
|
+
created_at=validation.created_at,
|
|
834
|
+
updated_at=validation.updated_at,
|
|
835
|
+
)
|
|
836
|
+
)
|
|
837
|
+
|
|
838
|
+
return unified
|
|
839
|
+
|
|
840
|
+
async def _get_single_model_alert(self, source_id: str) -> UnifiedAlertResponse | None:
|
|
841
|
+
"""Get a single model monitoring alert."""
|
|
842
|
+
alerts = await self._get_model_alerts()
|
|
843
|
+
for alert in alerts:
|
|
844
|
+
if alert.source_id == source_id:
|
|
845
|
+
return alert
|
|
846
|
+
return None
|
|
847
|
+
|
|
848
|
+
async def _get_single_drift_alert(self, source_id: str) -> UnifiedAlertResponse | None:
|
|
849
|
+
"""Get a single drift alert."""
|
|
850
|
+
alerts = await self._get_drift_alerts()
|
|
851
|
+
for alert in alerts:
|
|
852
|
+
if alert.source_id == source_id:
|
|
853
|
+
return alert
|
|
854
|
+
return None
|
|
855
|
+
|
|
856
|
+
async def _get_single_anomaly_alert(self, source_id: str) -> UnifiedAlertResponse | None:
|
|
857
|
+
"""Get a single anomaly alert."""
|
|
858
|
+
alerts = await self._get_anomaly_alerts()
|
|
859
|
+
for alert in alerts:
|
|
860
|
+
if alert.source_id == source_id:
|
|
861
|
+
return alert
|
|
862
|
+
return None
|
|
863
|
+
|
|
864
|
+
async def _get_single_validation_alert(self, source_id: str) -> UnifiedAlertResponse | None:
|
|
865
|
+
"""Get a single validation alert."""
|
|
866
|
+
alerts = await self._get_validation_alerts()
|
|
867
|
+
for alert in alerts:
|
|
868
|
+
if alert.source_id == source_id:
|
|
869
|
+
return alert
|
|
870
|
+
return None
|