truthound-dashboard 1.3.1__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound_dashboard/api/alerts.py +258 -0
- truthound_dashboard/api/anomaly.py +1302 -0
- truthound_dashboard/api/cross_alerts.py +352 -0
- truthound_dashboard/api/deps.py +143 -0
- truthound_dashboard/api/drift_monitor.py +540 -0
- truthound_dashboard/api/lineage.py +1151 -0
- truthound_dashboard/api/maintenance.py +363 -0
- truthound_dashboard/api/middleware.py +373 -1
- truthound_dashboard/api/model_monitoring.py +805 -0
- truthound_dashboard/api/notifications_advanced.py +2452 -0
- truthound_dashboard/api/plugins.py +2096 -0
- truthound_dashboard/api/profile.py +211 -14
- truthound_dashboard/api/reports.py +853 -0
- truthound_dashboard/api/router.py +147 -0
- truthound_dashboard/api/rule_suggestions.py +310 -0
- truthound_dashboard/api/schema_evolution.py +231 -0
- truthound_dashboard/api/sources.py +47 -3
- truthound_dashboard/api/triggers.py +190 -0
- truthound_dashboard/api/validations.py +13 -0
- truthound_dashboard/api/validators.py +333 -4
- truthound_dashboard/api/versioning.py +309 -0
- truthound_dashboard/api/websocket.py +301 -0
- truthound_dashboard/core/__init__.py +27 -0
- truthound_dashboard/core/anomaly.py +1395 -0
- truthound_dashboard/core/anomaly_explainer.py +633 -0
- truthound_dashboard/core/cache.py +206 -0
- truthound_dashboard/core/cached_services.py +422 -0
- truthound_dashboard/core/charts.py +352 -0
- truthound_dashboard/core/connections.py +1069 -42
- truthound_dashboard/core/cross_alerts.py +837 -0
- truthound_dashboard/core/drift_monitor.py +1477 -0
- truthound_dashboard/core/drift_sampling.py +669 -0
- truthound_dashboard/core/i18n/__init__.py +42 -0
- truthound_dashboard/core/i18n/detector.py +173 -0
- truthound_dashboard/core/i18n/messages.py +564 -0
- truthound_dashboard/core/lineage.py +971 -0
- truthound_dashboard/core/maintenance.py +443 -5
- truthound_dashboard/core/model_monitoring.py +1043 -0
- truthound_dashboard/core/notifications/channels.py +1020 -1
- truthound_dashboard/core/notifications/deduplication/__init__.py +143 -0
- truthound_dashboard/core/notifications/deduplication/policies.py +274 -0
- truthound_dashboard/core/notifications/deduplication/service.py +400 -0
- truthound_dashboard/core/notifications/deduplication/stores.py +2365 -0
- truthound_dashboard/core/notifications/deduplication/strategies.py +422 -0
- truthound_dashboard/core/notifications/dispatcher.py +43 -0
- truthound_dashboard/core/notifications/escalation/__init__.py +149 -0
- truthound_dashboard/core/notifications/escalation/backends.py +1384 -0
- truthound_dashboard/core/notifications/escalation/engine.py +429 -0
- truthound_dashboard/core/notifications/escalation/models.py +336 -0
- truthound_dashboard/core/notifications/escalation/scheduler.py +1187 -0
- truthound_dashboard/core/notifications/escalation/state_machine.py +330 -0
- truthound_dashboard/core/notifications/escalation/stores.py +2896 -0
- truthound_dashboard/core/notifications/events.py +49 -0
- truthound_dashboard/core/notifications/metrics/__init__.py +115 -0
- truthound_dashboard/core/notifications/metrics/base.py +528 -0
- truthound_dashboard/core/notifications/metrics/collectors.py +583 -0
- truthound_dashboard/core/notifications/routing/__init__.py +169 -0
- truthound_dashboard/core/notifications/routing/combinators.py +184 -0
- truthound_dashboard/core/notifications/routing/config.py +375 -0
- truthound_dashboard/core/notifications/routing/config_parser.py +867 -0
- truthound_dashboard/core/notifications/routing/engine.py +382 -0
- truthound_dashboard/core/notifications/routing/expression_engine.py +1269 -0
- truthound_dashboard/core/notifications/routing/jinja2_engine.py +774 -0
- truthound_dashboard/core/notifications/routing/rules.py +625 -0
- truthound_dashboard/core/notifications/routing/validator.py +678 -0
- truthound_dashboard/core/notifications/service.py +2 -0
- truthound_dashboard/core/notifications/stats_aggregator.py +850 -0
- truthound_dashboard/core/notifications/throttling/__init__.py +83 -0
- truthound_dashboard/core/notifications/throttling/builder.py +311 -0
- truthound_dashboard/core/notifications/throttling/stores.py +1859 -0
- truthound_dashboard/core/notifications/throttling/throttlers.py +633 -0
- truthound_dashboard/core/openlineage.py +1028 -0
- truthound_dashboard/core/plugins/__init__.py +39 -0
- truthound_dashboard/core/plugins/docs/__init__.py +39 -0
- truthound_dashboard/core/plugins/docs/extractor.py +703 -0
- truthound_dashboard/core/plugins/docs/renderers.py +804 -0
- truthound_dashboard/core/plugins/hooks/__init__.py +63 -0
- truthound_dashboard/core/plugins/hooks/decorators.py +367 -0
- truthound_dashboard/core/plugins/hooks/manager.py +403 -0
- truthound_dashboard/core/plugins/hooks/protocols.py +265 -0
- truthound_dashboard/core/plugins/lifecycle/__init__.py +41 -0
- truthound_dashboard/core/plugins/lifecycle/hot_reload.py +584 -0
- truthound_dashboard/core/plugins/lifecycle/machine.py +419 -0
- truthound_dashboard/core/plugins/lifecycle/states.py +266 -0
- truthound_dashboard/core/plugins/loader.py +504 -0
- truthound_dashboard/core/plugins/registry.py +810 -0
- truthound_dashboard/core/plugins/reporter_executor.py +588 -0
- truthound_dashboard/core/plugins/sandbox/__init__.py +59 -0
- truthound_dashboard/core/plugins/sandbox/code_validator.py +243 -0
- truthound_dashboard/core/plugins/sandbox/engines.py +770 -0
- truthound_dashboard/core/plugins/sandbox/protocols.py +194 -0
- truthound_dashboard/core/plugins/sandbox.py +617 -0
- truthound_dashboard/core/plugins/security/__init__.py +68 -0
- truthound_dashboard/core/plugins/security/analyzer.py +535 -0
- truthound_dashboard/core/plugins/security/policies.py +311 -0
- truthound_dashboard/core/plugins/security/protocols.py +296 -0
- truthound_dashboard/core/plugins/security/signing.py +842 -0
- truthound_dashboard/core/plugins/security.py +446 -0
- truthound_dashboard/core/plugins/validator_executor.py +401 -0
- truthound_dashboard/core/plugins/versioning/__init__.py +51 -0
- truthound_dashboard/core/plugins/versioning/constraints.py +377 -0
- truthound_dashboard/core/plugins/versioning/dependencies.py +541 -0
- truthound_dashboard/core/plugins/versioning/semver.py +266 -0
- truthound_dashboard/core/profile_comparison.py +601 -0
- truthound_dashboard/core/report_history.py +570 -0
- truthound_dashboard/core/reporters/__init__.py +57 -0
- truthound_dashboard/core/reporters/base.py +296 -0
- truthound_dashboard/core/reporters/csv_reporter.py +155 -0
- truthound_dashboard/core/reporters/html_reporter.py +598 -0
- truthound_dashboard/core/reporters/i18n/__init__.py +65 -0
- truthound_dashboard/core/reporters/i18n/base.py +494 -0
- truthound_dashboard/core/reporters/i18n/catalogs.py +930 -0
- truthound_dashboard/core/reporters/json_reporter.py +160 -0
- truthound_dashboard/core/reporters/junit_reporter.py +233 -0
- truthound_dashboard/core/reporters/markdown_reporter.py +207 -0
- truthound_dashboard/core/reporters/pdf_reporter.py +209 -0
- truthound_dashboard/core/reporters/registry.py +272 -0
- truthound_dashboard/core/rule_generator.py +2088 -0
- truthound_dashboard/core/scheduler.py +822 -12
- truthound_dashboard/core/schema_evolution.py +858 -0
- truthound_dashboard/core/services.py +152 -9
- truthound_dashboard/core/statistics.py +718 -0
- truthound_dashboard/core/streaming_anomaly.py +883 -0
- truthound_dashboard/core/triggers/__init__.py +45 -0
- truthound_dashboard/core/triggers/base.py +226 -0
- truthound_dashboard/core/triggers/evaluators.py +609 -0
- truthound_dashboard/core/triggers/factory.py +363 -0
- truthound_dashboard/core/unified_alerts.py +870 -0
- truthound_dashboard/core/validation_limits.py +509 -0
- truthound_dashboard/core/versioning.py +709 -0
- truthound_dashboard/core/websocket/__init__.py +59 -0
- truthound_dashboard/core/websocket/manager.py +512 -0
- truthound_dashboard/core/websocket/messages.py +130 -0
- truthound_dashboard/db/__init__.py +30 -0
- truthound_dashboard/db/models.py +3375 -3
- truthound_dashboard/main.py +22 -0
- truthound_dashboard/schemas/__init__.py +396 -1
- truthound_dashboard/schemas/anomaly.py +1258 -0
- truthound_dashboard/schemas/base.py +4 -0
- truthound_dashboard/schemas/cross_alerts.py +334 -0
- truthound_dashboard/schemas/drift_monitor.py +890 -0
- truthound_dashboard/schemas/lineage.py +428 -0
- truthound_dashboard/schemas/maintenance.py +154 -0
- truthound_dashboard/schemas/model_monitoring.py +374 -0
- truthound_dashboard/schemas/notifications_advanced.py +1363 -0
- truthound_dashboard/schemas/openlineage.py +704 -0
- truthound_dashboard/schemas/plugins.py +1293 -0
- truthound_dashboard/schemas/profile.py +420 -34
- truthound_dashboard/schemas/profile_comparison.py +242 -0
- truthound_dashboard/schemas/reports.py +285 -0
- truthound_dashboard/schemas/rule_suggestion.py +434 -0
- truthound_dashboard/schemas/schema_evolution.py +164 -0
- truthound_dashboard/schemas/source.py +117 -2
- truthound_dashboard/schemas/triggers.py +511 -0
- truthound_dashboard/schemas/unified_alerts.py +223 -0
- truthound_dashboard/schemas/validation.py +25 -1
- truthound_dashboard/schemas/validators/__init__.py +11 -0
- truthound_dashboard/schemas/validators/base.py +151 -0
- truthound_dashboard/schemas/versioning.py +152 -0
- truthound_dashboard/static/index.html +2 -2
- {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.0.dist-info}/METADATA +142 -22
- truthound_dashboard-1.4.0.dist-info/RECORD +239 -0
- truthound_dashboard/static/assets/index-BZG20KuF.js +0 -586
- truthound_dashboard/static/assets/index-D_HyZ3pb.css +0 -1
- truthound_dashboard/static/assets/unmerged_dictionaries-CtpqQBm0.js +0 -1
- truthound_dashboard-1.3.1.dist-info/RECORD +0 -110
- {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.0.dist-info}/WHEEL +0 -0
- {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.0.dist-info}/entry_points.txt +0 -0
- {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,633 @@
|
|
|
1
|
+
"""Anomaly explainability service using SHAP/LIME.
|
|
2
|
+
|
|
3
|
+
This module provides interpretability for ML-based anomaly detection results
|
|
4
|
+
using SHAP (SHapley Additive exPlanations) and LIME (Local Interpretable
|
|
5
|
+
Model-agnostic Explanations).
|
|
6
|
+
|
|
7
|
+
Features:
|
|
8
|
+
- SHAP TreeExplainer for tree-based models (Isolation Forest)
|
|
9
|
+
- SHAP KernelExplainer as fallback for other models
|
|
10
|
+
- Feature importance ranking
|
|
11
|
+
- Local explanations per anomaly
|
|
12
|
+
- Human-readable summary generation
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import hashlib
|
|
18
|
+
import json
|
|
19
|
+
from collections.abc import Sequence
|
|
20
|
+
from datetime import datetime
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
import numpy as np
|
|
24
|
+
from sqlalchemy import select
|
|
25
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
26
|
+
|
|
27
|
+
from truthound_dashboard.db.models import AnomalyDetection, AnomalyExplanation, Source
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class AnomalyExplainerService:
|
|
31
|
+
"""Service for generating SHAP/LIME explanations for anomaly detections.
|
|
32
|
+
|
|
33
|
+
This service provides interpretability for ML-based anomaly detection
|
|
34
|
+
results, helping users understand why specific rows were flagged as
|
|
35
|
+
anomalies.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(self, session: AsyncSession) -> None:
|
|
39
|
+
"""Initialize the explainer service.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
session: Database session for persistence.
|
|
43
|
+
"""
|
|
44
|
+
self.session = session
|
|
45
|
+
self._model_cache: dict[str, Any] = {}
|
|
46
|
+
|
|
47
|
+
async def explain_anomaly(
|
|
48
|
+
self,
|
|
49
|
+
detection_id: str,
|
|
50
|
+
row_indices: list[int],
|
|
51
|
+
*,
|
|
52
|
+
max_features: int = 10,
|
|
53
|
+
sample_background: int = 100,
|
|
54
|
+
) -> dict[str, Any]:
|
|
55
|
+
"""Generate SHAP explanations for specific anomaly rows.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
detection_id: ID of the anomaly detection run.
|
|
59
|
+
row_indices: List of row indices to explain.
|
|
60
|
+
max_features: Maximum features to include in explanation.
|
|
61
|
+
sample_background: Background samples for SHAP KernelExplainer.
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Dictionary containing explanations with feature contributions.
|
|
65
|
+
|
|
66
|
+
Raises:
|
|
67
|
+
ValueError: If detection not found or has no results.
|
|
68
|
+
"""
|
|
69
|
+
# Get the detection record
|
|
70
|
+
detection = await self._get_detection(detection_id)
|
|
71
|
+
if detection is None:
|
|
72
|
+
raise ValueError(f"Detection '{detection_id}' not found")
|
|
73
|
+
|
|
74
|
+
if detection.status != "success":
|
|
75
|
+
raise ValueError(f"Detection status is '{detection.status}', not 'success'")
|
|
76
|
+
|
|
77
|
+
# Get the source and load data
|
|
78
|
+
source = await self._get_source(detection.source_id)
|
|
79
|
+
if source is None:
|
|
80
|
+
raise ValueError(f"Source '{detection.source_id}' not found")
|
|
81
|
+
|
|
82
|
+
# Generate explanations
|
|
83
|
+
try:
|
|
84
|
+
explanations = await self._generate_explanations(
|
|
85
|
+
detection=detection,
|
|
86
|
+
source=source,
|
|
87
|
+
row_indices=row_indices,
|
|
88
|
+
max_features=max_features,
|
|
89
|
+
sample_background=sample_background,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# Save explanations to database
|
|
93
|
+
await self._save_explanations(detection_id, row_indices, explanations)
|
|
94
|
+
|
|
95
|
+
return explanations
|
|
96
|
+
|
|
97
|
+
except Exception as e:
|
|
98
|
+
# Log and return error
|
|
99
|
+
return {
|
|
100
|
+
"detection_id": detection_id,
|
|
101
|
+
"row_indices": row_indices,
|
|
102
|
+
"error": str(e),
|
|
103
|
+
"explanations": [],
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
async def get_cached_explanations(
|
|
107
|
+
self,
|
|
108
|
+
detection_id: str,
|
|
109
|
+
row_indices: list[int] | None = None,
|
|
110
|
+
) -> list[dict[str, Any]]:
|
|
111
|
+
"""Get cached explanations for a detection.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
detection_id: ID of the anomaly detection.
|
|
115
|
+
row_indices: Optional list of specific row indices to retrieve.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
List of cached explanation dictionaries.
|
|
119
|
+
"""
|
|
120
|
+
query = select(AnomalyExplanation).where(
|
|
121
|
+
AnomalyExplanation.detection_id == detection_id
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
if row_indices:
|
|
125
|
+
query = query.where(AnomalyExplanation.row_index.in_(row_indices))
|
|
126
|
+
|
|
127
|
+
query = query.order_by(AnomalyExplanation.row_index)
|
|
128
|
+
|
|
129
|
+
result = await self.session.execute(query)
|
|
130
|
+
explanations = result.scalars().all()
|
|
131
|
+
|
|
132
|
+
return [self._explanation_to_dict(exp) for exp in explanations]
|
|
133
|
+
|
|
134
|
+
async def _get_detection(self, detection_id: str) -> AnomalyDetection | None:
|
|
135
|
+
"""Get an anomaly detection by ID."""
|
|
136
|
+
result = await self.session.execute(
|
|
137
|
+
select(AnomalyDetection).where(AnomalyDetection.id == detection_id)
|
|
138
|
+
)
|
|
139
|
+
return result.scalar_one_or_none()
|
|
140
|
+
|
|
141
|
+
async def _get_source(self, source_id: str) -> Source | None:
|
|
142
|
+
"""Get a source by ID."""
|
|
143
|
+
result = await self.session.execute(
|
|
144
|
+
select(Source).where(Source.id == source_id)
|
|
145
|
+
)
|
|
146
|
+
return result.scalar_one_or_none()
|
|
147
|
+
|
|
148
|
+
async def _generate_explanations(
|
|
149
|
+
self,
|
|
150
|
+
detection: AnomalyDetection,
|
|
151
|
+
source: Source,
|
|
152
|
+
row_indices: list[int],
|
|
153
|
+
max_features: int,
|
|
154
|
+
sample_background: int,
|
|
155
|
+
) -> dict[str, Any]:
|
|
156
|
+
"""Generate SHAP explanations for anomaly rows.
|
|
157
|
+
|
|
158
|
+
This method uses the appropriate SHAP explainer based on the
|
|
159
|
+
algorithm used for detection.
|
|
160
|
+
"""
|
|
161
|
+
try:
|
|
162
|
+
import truthound as th
|
|
163
|
+
|
|
164
|
+
# Load data
|
|
165
|
+
df = th.read(source.config)
|
|
166
|
+
|
|
167
|
+
# Get columns that were analyzed
|
|
168
|
+
columns = detection.columns_analyzed or list(
|
|
169
|
+
df.select_dtypes(include=[np.number]).columns
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
# Filter to analyzed columns and handle NaN
|
|
173
|
+
df_analyze = df[columns].select_dtypes(include=[np.number])
|
|
174
|
+
df_clean = df_analyze.fillna(df_analyze.mean())
|
|
175
|
+
|
|
176
|
+
if df_clean.empty:
|
|
177
|
+
return {
|
|
178
|
+
"detection_id": detection.id,
|
|
179
|
+
"row_indices": row_indices,
|
|
180
|
+
"error": "No numeric columns to explain",
|
|
181
|
+
"explanations": [],
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
# Get the rows to explain
|
|
185
|
+
valid_indices = [i for i in row_indices if i < len(df_clean)]
|
|
186
|
+
if not valid_indices:
|
|
187
|
+
return {
|
|
188
|
+
"detection_id": detection.id,
|
|
189
|
+
"row_indices": row_indices,
|
|
190
|
+
"error": "No valid row indices",
|
|
191
|
+
"explanations": [],
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
X = df_clean.values
|
|
195
|
+
X_explain = X[valid_indices]
|
|
196
|
+
feature_names = list(df_clean.columns)
|
|
197
|
+
|
|
198
|
+
# Generate SHAP values based on algorithm
|
|
199
|
+
shap_values = self._compute_shap_values(
|
|
200
|
+
X=X,
|
|
201
|
+
X_explain=X_explain,
|
|
202
|
+
algorithm=detection.algorithm,
|
|
203
|
+
config=detection.config,
|
|
204
|
+
sample_background=sample_background,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
# Build explanation results
|
|
208
|
+
explanations = []
|
|
209
|
+
for i, row_idx in enumerate(valid_indices):
|
|
210
|
+
# Get feature contributions for this row
|
|
211
|
+
row_shap = shap_values[i] if i < len(shap_values) else np.zeros(len(feature_names))
|
|
212
|
+
row_values = X_explain[i] if i < len(X_explain) else np.zeros(len(feature_names))
|
|
213
|
+
|
|
214
|
+
# Create feature contributions
|
|
215
|
+
contributions = []
|
|
216
|
+
for j, (fname, shap_val, feat_val) in enumerate(
|
|
217
|
+
zip(feature_names, row_shap, row_values)
|
|
218
|
+
):
|
|
219
|
+
contributions.append({
|
|
220
|
+
"feature": fname,
|
|
221
|
+
"value": float(feat_val),
|
|
222
|
+
"shap_value": float(shap_val),
|
|
223
|
+
"contribution": float(abs(shap_val)),
|
|
224
|
+
})
|
|
225
|
+
|
|
226
|
+
# Sort by absolute contribution
|
|
227
|
+
contributions.sort(key=lambda x: x["contribution"], reverse=True)
|
|
228
|
+
|
|
229
|
+
# Limit to max features
|
|
230
|
+
top_contributions = contributions[:max_features]
|
|
231
|
+
|
|
232
|
+
# Get anomaly score from detection result
|
|
233
|
+
anomaly_score = self._get_anomaly_score(detection, row_idx)
|
|
234
|
+
|
|
235
|
+
# Generate summary text
|
|
236
|
+
summary = self._generate_summary(top_contributions, anomaly_score)
|
|
237
|
+
|
|
238
|
+
explanations.append({
|
|
239
|
+
"row_index": row_idx,
|
|
240
|
+
"anomaly_score": anomaly_score,
|
|
241
|
+
"feature_contributions": top_contributions,
|
|
242
|
+
"total_shap": float(np.sum(row_shap)),
|
|
243
|
+
"summary": summary,
|
|
244
|
+
})
|
|
245
|
+
|
|
246
|
+
return {
|
|
247
|
+
"detection_id": detection.id,
|
|
248
|
+
"algorithm": detection.algorithm,
|
|
249
|
+
"row_indices": valid_indices,
|
|
250
|
+
"feature_names": feature_names,
|
|
251
|
+
"explanations": explanations,
|
|
252
|
+
"generated_at": datetime.utcnow().isoformat(),
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
except ImportError:
|
|
256
|
+
# Fallback: generate mock explanations
|
|
257
|
+
return self._generate_mock_explanations(
|
|
258
|
+
detection, row_indices, max_features
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
def _compute_shap_values(
|
|
262
|
+
self,
|
|
263
|
+
X: np.ndarray,
|
|
264
|
+
X_explain: np.ndarray,
|
|
265
|
+
algorithm: str,
|
|
266
|
+
config: dict[str, Any] | None,
|
|
267
|
+
sample_background: int,
|
|
268
|
+
) -> np.ndarray:
|
|
269
|
+
"""Compute SHAP values using the appropriate explainer.
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
X: Full feature matrix for background data.
|
|
273
|
+
X_explain: Feature matrix for rows to explain.
|
|
274
|
+
algorithm: Detection algorithm used.
|
|
275
|
+
config: Algorithm configuration.
|
|
276
|
+
sample_background: Number of background samples.
|
|
277
|
+
|
|
278
|
+
Returns:
|
|
279
|
+
Array of SHAP values for each row and feature.
|
|
280
|
+
"""
|
|
281
|
+
try:
|
|
282
|
+
import shap
|
|
283
|
+
|
|
284
|
+
# Use TreeExplainer for tree-based models
|
|
285
|
+
if algorithm == "isolation_forest":
|
|
286
|
+
return self._compute_isolation_forest_shap(
|
|
287
|
+
X, X_explain, config, sample_background
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
# Use KernelExplainer as fallback
|
|
291
|
+
return self._compute_kernel_shap(X, X_explain, algorithm, config, sample_background)
|
|
292
|
+
|
|
293
|
+
except ImportError:
|
|
294
|
+
# SHAP not installed, use permutation importance
|
|
295
|
+
return self._compute_permutation_importance(X, X_explain, algorithm, config)
|
|
296
|
+
|
|
297
|
+
def _compute_isolation_forest_shap(
|
|
298
|
+
self,
|
|
299
|
+
X: np.ndarray,
|
|
300
|
+
X_explain: np.ndarray,
|
|
301
|
+
config: dict[str, Any] | None,
|
|
302
|
+
sample_background: int,
|
|
303
|
+
) -> np.ndarray:
|
|
304
|
+
"""Compute SHAP values for Isolation Forest using TreeExplainer."""
|
|
305
|
+
import shap
|
|
306
|
+
from sklearn.ensemble import IsolationForest
|
|
307
|
+
|
|
308
|
+
config = config or {}
|
|
309
|
+
|
|
310
|
+
# Build and train Isolation Forest
|
|
311
|
+
clf = IsolationForest(
|
|
312
|
+
n_estimators=config.get("n_estimators", 100),
|
|
313
|
+
contamination=config.get("contamination", 0.1),
|
|
314
|
+
max_samples=config.get("max_samples", "auto"),
|
|
315
|
+
random_state=config.get("random_state", 42),
|
|
316
|
+
)
|
|
317
|
+
clf.fit(X)
|
|
318
|
+
|
|
319
|
+
# Use TreeExplainer for efficient SHAP calculation
|
|
320
|
+
explainer = shap.TreeExplainer(clf)
|
|
321
|
+
shap_values = explainer.shap_values(X_explain)
|
|
322
|
+
|
|
323
|
+
return np.array(shap_values)
|
|
324
|
+
|
|
325
|
+
def _compute_kernel_shap(
|
|
326
|
+
self,
|
|
327
|
+
X: np.ndarray,
|
|
328
|
+
X_explain: np.ndarray,
|
|
329
|
+
algorithm: str,
|
|
330
|
+
config: dict[str, Any] | None,
|
|
331
|
+
sample_background: int,
|
|
332
|
+
) -> np.ndarray:
|
|
333
|
+
"""Compute SHAP values using KernelExplainer (model-agnostic)."""
|
|
334
|
+
import shap
|
|
335
|
+
from sklearn.preprocessing import StandardScaler
|
|
336
|
+
|
|
337
|
+
config = config or {}
|
|
338
|
+
|
|
339
|
+
# Scale data
|
|
340
|
+
scaler = StandardScaler()
|
|
341
|
+
X_scaled = scaler.fit_transform(X)
|
|
342
|
+
X_explain_scaled = scaler.transform(X_explain)
|
|
343
|
+
|
|
344
|
+
# Build model based on algorithm
|
|
345
|
+
model = self._build_model(algorithm, config)
|
|
346
|
+
model.fit(X_scaled)
|
|
347
|
+
|
|
348
|
+
# Get prediction function
|
|
349
|
+
if hasattr(model, "score_samples"):
|
|
350
|
+
predict_fn = lambda x: -model.score_samples(x)
|
|
351
|
+
elif hasattr(model, "decision_function"):
|
|
352
|
+
predict_fn = lambda x: -model.decision_function(x)
|
|
353
|
+
else:
|
|
354
|
+
predict_fn = lambda x: model.fit_predict(x).astype(float)
|
|
355
|
+
|
|
356
|
+
# Sample background data
|
|
357
|
+
background_size = min(sample_background, len(X_scaled))
|
|
358
|
+
background_indices = np.random.choice(
|
|
359
|
+
len(X_scaled), background_size, replace=False
|
|
360
|
+
)
|
|
361
|
+
background = X_scaled[background_indices]
|
|
362
|
+
|
|
363
|
+
# Create KernelExplainer
|
|
364
|
+
explainer = shap.KernelExplainer(predict_fn, background)
|
|
365
|
+
|
|
366
|
+
# Compute SHAP values
|
|
367
|
+
shap_values = explainer.shap_values(X_explain_scaled, nsamples=100)
|
|
368
|
+
|
|
369
|
+
return np.array(shap_values)
|
|
370
|
+
|
|
371
|
+
def _build_model(self, algorithm: str, config: dict[str, Any]) -> Any:
|
|
372
|
+
"""Build the appropriate sklearn model for the algorithm."""
|
|
373
|
+
if algorithm == "isolation_forest":
|
|
374
|
+
from sklearn.ensemble import IsolationForest
|
|
375
|
+
return IsolationForest(
|
|
376
|
+
n_estimators=config.get("n_estimators", 100),
|
|
377
|
+
contamination=config.get("contamination", 0.1),
|
|
378
|
+
random_state=config.get("random_state", 42),
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
elif algorithm == "lof":
|
|
382
|
+
from sklearn.neighbors import LocalOutlierFactor
|
|
383
|
+
return LocalOutlierFactor(
|
|
384
|
+
n_neighbors=config.get("n_neighbors", 20),
|
|
385
|
+
contamination=config.get("contamination", 0.1),
|
|
386
|
+
novelty=False,
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
elif algorithm == "one_class_svm":
|
|
390
|
+
from sklearn.svm import OneClassSVM
|
|
391
|
+
return OneClassSVM(
|
|
392
|
+
kernel=config.get("kernel", "rbf"),
|
|
393
|
+
nu=config.get("nu", 0.1),
|
|
394
|
+
gamma=config.get("gamma", "scale"),
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
elif algorithm == "dbscan":
|
|
398
|
+
from sklearn.cluster import DBSCAN
|
|
399
|
+
return DBSCAN(
|
|
400
|
+
eps=config.get("eps", 0.5),
|
|
401
|
+
min_samples=config.get("min_samples", 5),
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
else:
|
|
405
|
+
# Default to Isolation Forest
|
|
406
|
+
from sklearn.ensemble import IsolationForest
|
|
407
|
+
return IsolationForest(random_state=42)
|
|
408
|
+
|
|
409
|
+
def _compute_permutation_importance(
|
|
410
|
+
self,
|
|
411
|
+
X: np.ndarray,
|
|
412
|
+
X_explain: np.ndarray,
|
|
413
|
+
algorithm: str,
|
|
414
|
+
config: dict[str, Any] | None,
|
|
415
|
+
) -> np.ndarray:
|
|
416
|
+
"""Fallback: compute approximate feature importance via permutation."""
|
|
417
|
+
config = config or {}
|
|
418
|
+
|
|
419
|
+
# Build and train model
|
|
420
|
+
model = self._build_model(algorithm, config)
|
|
421
|
+
|
|
422
|
+
from sklearn.preprocessing import StandardScaler
|
|
423
|
+
scaler = StandardScaler()
|
|
424
|
+
X_scaled = scaler.fit_transform(X)
|
|
425
|
+
X_explain_scaled = scaler.transform(X_explain)
|
|
426
|
+
|
|
427
|
+
model.fit(X_scaled)
|
|
428
|
+
|
|
429
|
+
# Get base predictions/scores
|
|
430
|
+
if hasattr(model, "score_samples"):
|
|
431
|
+
base_scores = -model.score_samples(X_explain_scaled)
|
|
432
|
+
elif hasattr(model, "decision_function"):
|
|
433
|
+
base_scores = -model.decision_function(X_explain_scaled)
|
|
434
|
+
else:
|
|
435
|
+
base_scores = np.zeros(len(X_explain_scaled))
|
|
436
|
+
|
|
437
|
+
# Compute permutation importance for each feature
|
|
438
|
+
n_features = X_explain_scaled.shape[1]
|
|
439
|
+
importances = np.zeros((len(X_explain_scaled), n_features))
|
|
440
|
+
|
|
441
|
+
for j in range(n_features):
|
|
442
|
+
X_permuted = X_explain_scaled.copy()
|
|
443
|
+
# Permute column j
|
|
444
|
+
X_permuted[:, j] = np.random.permutation(X_permuted[:, j])
|
|
445
|
+
|
|
446
|
+
if hasattr(model, "score_samples"):
|
|
447
|
+
permuted_scores = -model.score_samples(X_permuted)
|
|
448
|
+
elif hasattr(model, "decision_function"):
|
|
449
|
+
permuted_scores = -model.decision_function(X_permuted)
|
|
450
|
+
else:
|
|
451
|
+
permuted_scores = np.zeros(len(X_permuted))
|
|
452
|
+
|
|
453
|
+
# Importance is the change in score
|
|
454
|
+
importances[:, j] = permuted_scores - base_scores
|
|
455
|
+
|
|
456
|
+
return importances
|
|
457
|
+
|
|
458
|
+
def _get_anomaly_score(
|
|
459
|
+
self,
|
|
460
|
+
detection: AnomalyDetection,
|
|
461
|
+
row_index: int,
|
|
462
|
+
) -> float:
|
|
463
|
+
"""Get the anomaly score for a specific row from detection results."""
|
|
464
|
+
if detection.result_json and "anomalies" in detection.result_json:
|
|
465
|
+
for anomaly in detection.result_json["anomalies"]:
|
|
466
|
+
if anomaly.get("row_index") == row_index:
|
|
467
|
+
return anomaly.get("anomaly_score", 0.0)
|
|
468
|
+
return 0.0
|
|
469
|
+
|
|
470
|
+
def _generate_summary(
|
|
471
|
+
self,
|
|
472
|
+
contributions: list[dict[str, Any]],
|
|
473
|
+
anomaly_score: float,
|
|
474
|
+
) -> str:
|
|
475
|
+
"""Generate human-readable summary of why a row is anomalous.
|
|
476
|
+
|
|
477
|
+
Args:
|
|
478
|
+
contributions: Feature contributions sorted by importance.
|
|
479
|
+
anomaly_score: Overall anomaly score for the row.
|
|
480
|
+
|
|
481
|
+
Returns:
|
|
482
|
+
Human-readable summary string.
|
|
483
|
+
"""
|
|
484
|
+
if not contributions:
|
|
485
|
+
return "No significant features identified."
|
|
486
|
+
|
|
487
|
+
# Classify anomaly severity
|
|
488
|
+
if anomaly_score >= 0.9:
|
|
489
|
+
severity = "highly anomalous"
|
|
490
|
+
elif anomaly_score >= 0.7:
|
|
491
|
+
severity = "moderately anomalous"
|
|
492
|
+
elif anomaly_score >= 0.5:
|
|
493
|
+
severity = "slightly anomalous"
|
|
494
|
+
else:
|
|
495
|
+
severity = "borderline anomalous"
|
|
496
|
+
|
|
497
|
+
# Get top contributing features
|
|
498
|
+
top_features = contributions[:3]
|
|
499
|
+
feature_descriptions = []
|
|
500
|
+
|
|
501
|
+
for feat in top_features:
|
|
502
|
+
name = feat["feature"]
|
|
503
|
+
value = feat["value"]
|
|
504
|
+
shap_val = feat["shap_value"]
|
|
505
|
+
|
|
506
|
+
# Describe contribution direction
|
|
507
|
+
direction = "unusually high" if shap_val > 0 else "unusually low"
|
|
508
|
+
feature_descriptions.append(
|
|
509
|
+
f"{name} ({value:.2f}) is {direction}"
|
|
510
|
+
)
|
|
511
|
+
|
|
512
|
+
if len(feature_descriptions) == 1:
|
|
513
|
+
features_text = feature_descriptions[0]
|
|
514
|
+
elif len(feature_descriptions) == 2:
|
|
515
|
+
features_text = " and ".join(feature_descriptions)
|
|
516
|
+
else:
|
|
517
|
+
features_text = (
|
|
518
|
+
", ".join(feature_descriptions[:-1])
|
|
519
|
+
+ f", and {feature_descriptions[-1]}"
|
|
520
|
+
)
|
|
521
|
+
|
|
522
|
+
return f"This row is {severity} (score: {anomaly_score:.3f}). The main contributing factors are: {features_text}."
|
|
523
|
+
|
|
524
|
+
def _generate_mock_explanations(
|
|
525
|
+
self,
|
|
526
|
+
detection: AnomalyDetection,
|
|
527
|
+
row_indices: list[int],
|
|
528
|
+
max_features: int,
|
|
529
|
+
) -> dict[str, Any]:
|
|
530
|
+
"""Generate mock explanations when SHAP/sklearn is not available."""
|
|
531
|
+
import random
|
|
532
|
+
|
|
533
|
+
columns = detection.columns_analyzed or ["feature_1", "feature_2", "feature_3"]
|
|
534
|
+
|
|
535
|
+
explanations = []
|
|
536
|
+
for row_idx in row_indices:
|
|
537
|
+
anomaly_score = self._get_anomaly_score(detection, row_idx)
|
|
538
|
+
if anomaly_score == 0:
|
|
539
|
+
anomaly_score = random.uniform(0.5, 1.0)
|
|
540
|
+
|
|
541
|
+
contributions = []
|
|
542
|
+
for col in columns[:max_features]:
|
|
543
|
+
shap_val = random.uniform(-1.0, 1.0)
|
|
544
|
+
contributions.append({
|
|
545
|
+
"feature": col,
|
|
546
|
+
"value": random.uniform(-100, 100),
|
|
547
|
+
"shap_value": shap_val,
|
|
548
|
+
"contribution": abs(shap_val),
|
|
549
|
+
})
|
|
550
|
+
|
|
551
|
+
contributions.sort(key=lambda x: x["contribution"], reverse=True)
|
|
552
|
+
summary = self._generate_summary(contributions[:3], anomaly_score)
|
|
553
|
+
|
|
554
|
+
explanations.append({
|
|
555
|
+
"row_index": row_idx,
|
|
556
|
+
"anomaly_score": anomaly_score,
|
|
557
|
+
"feature_contributions": contributions,
|
|
558
|
+
"total_shap": sum(c["shap_value"] for c in contributions),
|
|
559
|
+
"summary": summary,
|
|
560
|
+
})
|
|
561
|
+
|
|
562
|
+
return {
|
|
563
|
+
"detection_id": detection.id,
|
|
564
|
+
"algorithm": detection.algorithm,
|
|
565
|
+
"row_indices": row_indices,
|
|
566
|
+
"feature_names": columns,
|
|
567
|
+
"explanations": explanations,
|
|
568
|
+
"generated_at": datetime.utcnow().isoformat(),
|
|
569
|
+
"mock": True,
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
async def _save_explanations(
|
|
573
|
+
self,
|
|
574
|
+
detection_id: str,
|
|
575
|
+
row_indices: list[int],
|
|
576
|
+
explanations_data: dict[str, Any],
|
|
577
|
+
) -> None:
|
|
578
|
+
"""Save explanations to database for caching."""
|
|
579
|
+
for explanation in explanations_data.get("explanations", []):
|
|
580
|
+
row_idx = explanation["row_index"]
|
|
581
|
+
|
|
582
|
+
# Check if explanation already exists
|
|
583
|
+
existing = await self.session.execute(
|
|
584
|
+
select(AnomalyExplanation).where(
|
|
585
|
+
AnomalyExplanation.detection_id == detection_id,
|
|
586
|
+
AnomalyExplanation.row_index == row_idx,
|
|
587
|
+
)
|
|
588
|
+
)
|
|
589
|
+
existing_exp = existing.scalar_one_or_none()
|
|
590
|
+
|
|
591
|
+
if existing_exp:
|
|
592
|
+
# Update existing
|
|
593
|
+
existing_exp.anomaly_score = explanation["anomaly_score"]
|
|
594
|
+
existing_exp.feature_contributions = explanation["feature_contributions"]
|
|
595
|
+
existing_exp.total_shap = explanation["total_shap"]
|
|
596
|
+
existing_exp.summary = explanation["summary"]
|
|
597
|
+
existing_exp.generated_at = datetime.utcnow()
|
|
598
|
+
else:
|
|
599
|
+
# Create new
|
|
600
|
+
new_explanation = AnomalyExplanation(
|
|
601
|
+
detection_id=detection_id,
|
|
602
|
+
row_index=row_idx,
|
|
603
|
+
anomaly_score=explanation["anomaly_score"],
|
|
604
|
+
feature_contributions=explanation["feature_contributions"],
|
|
605
|
+
total_shap=explanation["total_shap"],
|
|
606
|
+
summary=explanation["summary"],
|
|
607
|
+
)
|
|
608
|
+
self.session.add(new_explanation)
|
|
609
|
+
|
|
610
|
+
await self.session.flush()
|
|
611
|
+
|
|
612
|
+
def _explanation_to_dict(self, explanation: AnomalyExplanation) -> dict[str, Any]:
|
|
613
|
+
"""Convert AnomalyExplanation model to dictionary."""
|
|
614
|
+
return {
|
|
615
|
+
"id": explanation.id,
|
|
616
|
+
"detection_id": explanation.detection_id,
|
|
617
|
+
"row_index": explanation.row_index,
|
|
618
|
+
"anomaly_score": explanation.anomaly_score,
|
|
619
|
+
"feature_contributions": explanation.feature_contributions,
|
|
620
|
+
"total_shap": explanation.total_shap,
|
|
621
|
+
"summary": explanation.summary,
|
|
622
|
+
"generated_at": (
|
|
623
|
+
explanation.generated_at.isoformat()
|
|
624
|
+
if explanation.generated_at
|
|
625
|
+
else None
|
|
626
|
+
),
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
|
|
630
|
+
# Singleton-style factory for dependency injection
|
|
631
|
+
def get_anomaly_explainer_service(session: AsyncSession) -> AnomalyExplainerService:
|
|
632
|
+
"""Factory function to get AnomalyExplainerService instance."""
|
|
633
|
+
return AnomalyExplainerService(session)
|