truthound-dashboard 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. truthound_dashboard/api/alerts.py +258 -0
  2. truthound_dashboard/api/anomaly.py +1302 -0
  3. truthound_dashboard/api/cross_alerts.py +352 -0
  4. truthound_dashboard/api/deps.py +143 -0
  5. truthound_dashboard/api/drift_monitor.py +540 -0
  6. truthound_dashboard/api/lineage.py +1151 -0
  7. truthound_dashboard/api/maintenance.py +363 -0
  8. truthound_dashboard/api/middleware.py +373 -1
  9. truthound_dashboard/api/model_monitoring.py +805 -0
  10. truthound_dashboard/api/notifications_advanced.py +2452 -0
  11. truthound_dashboard/api/plugins.py +2096 -0
  12. truthound_dashboard/api/profile.py +211 -14
  13. truthound_dashboard/api/reports.py +853 -0
  14. truthound_dashboard/api/router.py +147 -0
  15. truthound_dashboard/api/rule_suggestions.py +310 -0
  16. truthound_dashboard/api/schema_evolution.py +231 -0
  17. truthound_dashboard/api/sources.py +47 -3
  18. truthound_dashboard/api/triggers.py +190 -0
  19. truthound_dashboard/api/validations.py +13 -0
  20. truthound_dashboard/api/validators.py +333 -4
  21. truthound_dashboard/api/versioning.py +309 -0
  22. truthound_dashboard/api/websocket.py +301 -0
  23. truthound_dashboard/core/__init__.py +27 -0
  24. truthound_dashboard/core/anomaly.py +1395 -0
  25. truthound_dashboard/core/anomaly_explainer.py +633 -0
  26. truthound_dashboard/core/cache.py +206 -0
  27. truthound_dashboard/core/cached_services.py +422 -0
  28. truthound_dashboard/core/charts.py +352 -0
  29. truthound_dashboard/core/connections.py +1069 -42
  30. truthound_dashboard/core/cross_alerts.py +837 -0
  31. truthound_dashboard/core/drift_monitor.py +1477 -0
  32. truthound_dashboard/core/drift_sampling.py +669 -0
  33. truthound_dashboard/core/i18n/__init__.py +42 -0
  34. truthound_dashboard/core/i18n/detector.py +173 -0
  35. truthound_dashboard/core/i18n/messages.py +564 -0
  36. truthound_dashboard/core/lineage.py +971 -0
  37. truthound_dashboard/core/maintenance.py +443 -5
  38. truthound_dashboard/core/model_monitoring.py +1043 -0
  39. truthound_dashboard/core/notifications/channels.py +1020 -1
  40. truthound_dashboard/core/notifications/deduplication/__init__.py +143 -0
  41. truthound_dashboard/core/notifications/deduplication/policies.py +274 -0
  42. truthound_dashboard/core/notifications/deduplication/service.py +400 -0
  43. truthound_dashboard/core/notifications/deduplication/stores.py +2365 -0
  44. truthound_dashboard/core/notifications/deduplication/strategies.py +422 -0
  45. truthound_dashboard/core/notifications/dispatcher.py +43 -0
  46. truthound_dashboard/core/notifications/escalation/__init__.py +149 -0
  47. truthound_dashboard/core/notifications/escalation/backends.py +1384 -0
  48. truthound_dashboard/core/notifications/escalation/engine.py +429 -0
  49. truthound_dashboard/core/notifications/escalation/models.py +336 -0
  50. truthound_dashboard/core/notifications/escalation/scheduler.py +1187 -0
  51. truthound_dashboard/core/notifications/escalation/state_machine.py +330 -0
  52. truthound_dashboard/core/notifications/escalation/stores.py +2896 -0
  53. truthound_dashboard/core/notifications/events.py +49 -0
  54. truthound_dashboard/core/notifications/metrics/__init__.py +115 -0
  55. truthound_dashboard/core/notifications/metrics/base.py +528 -0
  56. truthound_dashboard/core/notifications/metrics/collectors.py +583 -0
  57. truthound_dashboard/core/notifications/routing/__init__.py +169 -0
  58. truthound_dashboard/core/notifications/routing/combinators.py +184 -0
  59. truthound_dashboard/core/notifications/routing/config.py +375 -0
  60. truthound_dashboard/core/notifications/routing/config_parser.py +867 -0
  61. truthound_dashboard/core/notifications/routing/engine.py +382 -0
  62. truthound_dashboard/core/notifications/routing/expression_engine.py +1269 -0
  63. truthound_dashboard/core/notifications/routing/jinja2_engine.py +774 -0
  64. truthound_dashboard/core/notifications/routing/rules.py +625 -0
  65. truthound_dashboard/core/notifications/routing/validator.py +678 -0
  66. truthound_dashboard/core/notifications/service.py +2 -0
  67. truthound_dashboard/core/notifications/stats_aggregator.py +850 -0
  68. truthound_dashboard/core/notifications/throttling/__init__.py +83 -0
  69. truthound_dashboard/core/notifications/throttling/builder.py +311 -0
  70. truthound_dashboard/core/notifications/throttling/stores.py +1859 -0
  71. truthound_dashboard/core/notifications/throttling/throttlers.py +633 -0
  72. truthound_dashboard/core/openlineage.py +1028 -0
  73. truthound_dashboard/core/plugins/__init__.py +39 -0
  74. truthound_dashboard/core/plugins/docs/__init__.py +39 -0
  75. truthound_dashboard/core/plugins/docs/extractor.py +703 -0
  76. truthound_dashboard/core/plugins/docs/renderers.py +804 -0
  77. truthound_dashboard/core/plugins/hooks/__init__.py +63 -0
  78. truthound_dashboard/core/plugins/hooks/decorators.py +367 -0
  79. truthound_dashboard/core/plugins/hooks/manager.py +403 -0
  80. truthound_dashboard/core/plugins/hooks/protocols.py +265 -0
  81. truthound_dashboard/core/plugins/lifecycle/__init__.py +41 -0
  82. truthound_dashboard/core/plugins/lifecycle/hot_reload.py +584 -0
  83. truthound_dashboard/core/plugins/lifecycle/machine.py +419 -0
  84. truthound_dashboard/core/plugins/lifecycle/states.py +266 -0
  85. truthound_dashboard/core/plugins/loader.py +504 -0
  86. truthound_dashboard/core/plugins/registry.py +810 -0
  87. truthound_dashboard/core/plugins/reporter_executor.py +588 -0
  88. truthound_dashboard/core/plugins/sandbox/__init__.py +59 -0
  89. truthound_dashboard/core/plugins/sandbox/code_validator.py +243 -0
  90. truthound_dashboard/core/plugins/sandbox/engines.py +770 -0
  91. truthound_dashboard/core/plugins/sandbox/protocols.py +194 -0
  92. truthound_dashboard/core/plugins/sandbox.py +617 -0
  93. truthound_dashboard/core/plugins/security/__init__.py +68 -0
  94. truthound_dashboard/core/plugins/security/analyzer.py +535 -0
  95. truthound_dashboard/core/plugins/security/policies.py +311 -0
  96. truthound_dashboard/core/plugins/security/protocols.py +296 -0
  97. truthound_dashboard/core/plugins/security/signing.py +842 -0
  98. truthound_dashboard/core/plugins/security.py +446 -0
  99. truthound_dashboard/core/plugins/validator_executor.py +401 -0
  100. truthound_dashboard/core/plugins/versioning/__init__.py +51 -0
  101. truthound_dashboard/core/plugins/versioning/constraints.py +377 -0
  102. truthound_dashboard/core/plugins/versioning/dependencies.py +541 -0
  103. truthound_dashboard/core/plugins/versioning/semver.py +266 -0
  104. truthound_dashboard/core/profile_comparison.py +601 -0
  105. truthound_dashboard/core/report_history.py +570 -0
  106. truthound_dashboard/core/reporters/__init__.py +57 -0
  107. truthound_dashboard/core/reporters/base.py +296 -0
  108. truthound_dashboard/core/reporters/csv_reporter.py +155 -0
  109. truthound_dashboard/core/reporters/html_reporter.py +598 -0
  110. truthound_dashboard/core/reporters/i18n/__init__.py +65 -0
  111. truthound_dashboard/core/reporters/i18n/base.py +494 -0
  112. truthound_dashboard/core/reporters/i18n/catalogs.py +930 -0
  113. truthound_dashboard/core/reporters/json_reporter.py +160 -0
  114. truthound_dashboard/core/reporters/junit_reporter.py +233 -0
  115. truthound_dashboard/core/reporters/markdown_reporter.py +207 -0
  116. truthound_dashboard/core/reporters/pdf_reporter.py +209 -0
  117. truthound_dashboard/core/reporters/registry.py +272 -0
  118. truthound_dashboard/core/rule_generator.py +2088 -0
  119. truthound_dashboard/core/scheduler.py +822 -12
  120. truthound_dashboard/core/schema_evolution.py +858 -0
  121. truthound_dashboard/core/services.py +152 -9
  122. truthound_dashboard/core/statistics.py +718 -0
  123. truthound_dashboard/core/streaming_anomaly.py +883 -0
  124. truthound_dashboard/core/triggers/__init__.py +45 -0
  125. truthound_dashboard/core/triggers/base.py +226 -0
  126. truthound_dashboard/core/triggers/evaluators.py +609 -0
  127. truthound_dashboard/core/triggers/factory.py +363 -0
  128. truthound_dashboard/core/unified_alerts.py +870 -0
  129. truthound_dashboard/core/validation_limits.py +509 -0
  130. truthound_dashboard/core/versioning.py +709 -0
  131. truthound_dashboard/core/websocket/__init__.py +59 -0
  132. truthound_dashboard/core/websocket/manager.py +512 -0
  133. truthound_dashboard/core/websocket/messages.py +130 -0
  134. truthound_dashboard/db/__init__.py +30 -0
  135. truthound_dashboard/db/models.py +3375 -3
  136. truthound_dashboard/main.py +22 -0
  137. truthound_dashboard/schemas/__init__.py +396 -1
  138. truthound_dashboard/schemas/anomaly.py +1258 -0
  139. truthound_dashboard/schemas/base.py +4 -0
  140. truthound_dashboard/schemas/cross_alerts.py +334 -0
  141. truthound_dashboard/schemas/drift_monitor.py +890 -0
  142. truthound_dashboard/schemas/lineage.py +428 -0
  143. truthound_dashboard/schemas/maintenance.py +154 -0
  144. truthound_dashboard/schemas/model_monitoring.py +374 -0
  145. truthound_dashboard/schemas/notifications_advanced.py +1363 -0
  146. truthound_dashboard/schemas/openlineage.py +704 -0
  147. truthound_dashboard/schemas/plugins.py +1293 -0
  148. truthound_dashboard/schemas/profile.py +420 -34
  149. truthound_dashboard/schemas/profile_comparison.py +242 -0
  150. truthound_dashboard/schemas/reports.py +285 -0
  151. truthound_dashboard/schemas/rule_suggestion.py +434 -0
  152. truthound_dashboard/schemas/schema_evolution.py +164 -0
  153. truthound_dashboard/schemas/source.py +117 -2
  154. truthound_dashboard/schemas/triggers.py +511 -0
  155. truthound_dashboard/schemas/unified_alerts.py +223 -0
  156. truthound_dashboard/schemas/validation.py +25 -1
  157. truthound_dashboard/schemas/validators/__init__.py +11 -0
  158. truthound_dashboard/schemas/validators/base.py +151 -0
  159. truthound_dashboard/schemas/versioning.py +152 -0
  160. truthound_dashboard/static/index.html +2 -2
  161. {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/METADATA +142 -18
  162. truthound_dashboard-1.4.0.dist-info/RECORD +239 -0
  163. truthound_dashboard/static/assets/index-BCA8H1hO.js +0 -574
  164. truthound_dashboard/static/assets/index-BNsSQ2fN.css +0 -1
  165. truthound_dashboard/static/assets/unmerged_dictionaries-CsJWCRx9.js +0 -1
  166. truthound_dashboard-1.3.0.dist-info/RECORD +0 -110
  167. {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/WHEEL +0 -0
  168. {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/entry_points.txt +0 -0
  169. {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,633 @@
1
+ """Anomaly explainability service using SHAP/LIME.
2
+
3
+ This module provides interpretability for ML-based anomaly detection results
4
+ using SHAP (SHapley Additive exPlanations) and LIME (Local Interpretable
5
+ Model-agnostic Explanations).
6
+
7
+ Features:
8
+ - SHAP TreeExplainer for tree-based models (Isolation Forest)
9
+ - SHAP KernelExplainer as fallback for other models
10
+ - Feature importance ranking
11
+ - Local explanations per anomaly
12
+ - Human-readable summary generation
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import hashlib
18
+ import json
19
+ from collections.abc import Sequence
20
+ from datetime import datetime
21
+ from typing import Any
22
+
23
+ import numpy as np
24
+ from sqlalchemy import select
25
+ from sqlalchemy.ext.asyncio import AsyncSession
26
+
27
+ from truthound_dashboard.db.models import AnomalyDetection, AnomalyExplanation, Source
28
+
29
+
30
+ class AnomalyExplainerService:
31
+ """Service for generating SHAP/LIME explanations for anomaly detections.
32
+
33
+ This service provides interpretability for ML-based anomaly detection
34
+ results, helping users understand why specific rows were flagged as
35
+ anomalies.
36
+ """
37
+
38
+ def __init__(self, session: AsyncSession) -> None:
39
+ """Initialize the explainer service.
40
+
41
+ Args:
42
+ session: Database session for persistence.
43
+ """
44
+ self.session = session
45
+ self._model_cache: dict[str, Any] = {}
46
+
47
+ async def explain_anomaly(
48
+ self,
49
+ detection_id: str,
50
+ row_indices: list[int],
51
+ *,
52
+ max_features: int = 10,
53
+ sample_background: int = 100,
54
+ ) -> dict[str, Any]:
55
+ """Generate SHAP explanations for specific anomaly rows.
56
+
57
+ Args:
58
+ detection_id: ID of the anomaly detection run.
59
+ row_indices: List of row indices to explain.
60
+ max_features: Maximum features to include in explanation.
61
+ sample_background: Background samples for SHAP KernelExplainer.
62
+
63
+ Returns:
64
+ Dictionary containing explanations with feature contributions.
65
+
66
+ Raises:
67
+ ValueError: If detection not found or has no results.
68
+ """
69
+ # Get the detection record
70
+ detection = await self._get_detection(detection_id)
71
+ if detection is None:
72
+ raise ValueError(f"Detection '{detection_id}' not found")
73
+
74
+ if detection.status != "success":
75
+ raise ValueError(f"Detection status is '{detection.status}', not 'success'")
76
+
77
+ # Get the source and load data
78
+ source = await self._get_source(detection.source_id)
79
+ if source is None:
80
+ raise ValueError(f"Source '{detection.source_id}' not found")
81
+
82
+ # Generate explanations
83
+ try:
84
+ explanations = await self._generate_explanations(
85
+ detection=detection,
86
+ source=source,
87
+ row_indices=row_indices,
88
+ max_features=max_features,
89
+ sample_background=sample_background,
90
+ )
91
+
92
+ # Save explanations to database
93
+ await self._save_explanations(detection_id, row_indices, explanations)
94
+
95
+ return explanations
96
+
97
+ except Exception as e:
98
+ # Log and return error
99
+ return {
100
+ "detection_id": detection_id,
101
+ "row_indices": row_indices,
102
+ "error": str(e),
103
+ "explanations": [],
104
+ }
105
+
106
+ async def get_cached_explanations(
107
+ self,
108
+ detection_id: str,
109
+ row_indices: list[int] | None = None,
110
+ ) -> list[dict[str, Any]]:
111
+ """Get cached explanations for a detection.
112
+
113
+ Args:
114
+ detection_id: ID of the anomaly detection.
115
+ row_indices: Optional list of specific row indices to retrieve.
116
+
117
+ Returns:
118
+ List of cached explanation dictionaries.
119
+ """
120
+ query = select(AnomalyExplanation).where(
121
+ AnomalyExplanation.detection_id == detection_id
122
+ )
123
+
124
+ if row_indices:
125
+ query = query.where(AnomalyExplanation.row_index.in_(row_indices))
126
+
127
+ query = query.order_by(AnomalyExplanation.row_index)
128
+
129
+ result = await self.session.execute(query)
130
+ explanations = result.scalars().all()
131
+
132
+ return [self._explanation_to_dict(exp) for exp in explanations]
133
+
134
+ async def _get_detection(self, detection_id: str) -> AnomalyDetection | None:
135
+ """Get an anomaly detection by ID."""
136
+ result = await self.session.execute(
137
+ select(AnomalyDetection).where(AnomalyDetection.id == detection_id)
138
+ )
139
+ return result.scalar_one_or_none()
140
+
141
+ async def _get_source(self, source_id: str) -> Source | None:
142
+ """Get a source by ID."""
143
+ result = await self.session.execute(
144
+ select(Source).where(Source.id == source_id)
145
+ )
146
+ return result.scalar_one_or_none()
147
+
148
+ async def _generate_explanations(
149
+ self,
150
+ detection: AnomalyDetection,
151
+ source: Source,
152
+ row_indices: list[int],
153
+ max_features: int,
154
+ sample_background: int,
155
+ ) -> dict[str, Any]:
156
+ """Generate SHAP explanations for anomaly rows.
157
+
158
+ This method uses the appropriate SHAP explainer based on the
159
+ algorithm used for detection.
160
+ """
161
+ try:
162
+ import truthound as th
163
+
164
+ # Load data
165
+ df = th.read(source.config)
166
+
167
+ # Get columns that were analyzed
168
+ columns = detection.columns_analyzed or list(
169
+ df.select_dtypes(include=[np.number]).columns
170
+ )
171
+
172
+ # Filter to analyzed columns and handle NaN
173
+ df_analyze = df[columns].select_dtypes(include=[np.number])
174
+ df_clean = df_analyze.fillna(df_analyze.mean())
175
+
176
+ if df_clean.empty:
177
+ return {
178
+ "detection_id": detection.id,
179
+ "row_indices": row_indices,
180
+ "error": "No numeric columns to explain",
181
+ "explanations": [],
182
+ }
183
+
184
+ # Get the rows to explain
185
+ valid_indices = [i for i in row_indices if i < len(df_clean)]
186
+ if not valid_indices:
187
+ return {
188
+ "detection_id": detection.id,
189
+ "row_indices": row_indices,
190
+ "error": "No valid row indices",
191
+ "explanations": [],
192
+ }
193
+
194
+ X = df_clean.values
195
+ X_explain = X[valid_indices]
196
+ feature_names = list(df_clean.columns)
197
+
198
+ # Generate SHAP values based on algorithm
199
+ shap_values = self._compute_shap_values(
200
+ X=X,
201
+ X_explain=X_explain,
202
+ algorithm=detection.algorithm,
203
+ config=detection.config,
204
+ sample_background=sample_background,
205
+ )
206
+
207
+ # Build explanation results
208
+ explanations = []
209
+ for i, row_idx in enumerate(valid_indices):
210
+ # Get feature contributions for this row
211
+ row_shap = shap_values[i] if i < len(shap_values) else np.zeros(len(feature_names))
212
+ row_values = X_explain[i] if i < len(X_explain) else np.zeros(len(feature_names))
213
+
214
+ # Create feature contributions
215
+ contributions = []
216
+ for j, (fname, shap_val, feat_val) in enumerate(
217
+ zip(feature_names, row_shap, row_values)
218
+ ):
219
+ contributions.append({
220
+ "feature": fname,
221
+ "value": float(feat_val),
222
+ "shap_value": float(shap_val),
223
+ "contribution": float(abs(shap_val)),
224
+ })
225
+
226
+ # Sort by absolute contribution
227
+ contributions.sort(key=lambda x: x["contribution"], reverse=True)
228
+
229
+ # Limit to max features
230
+ top_contributions = contributions[:max_features]
231
+
232
+ # Get anomaly score from detection result
233
+ anomaly_score = self._get_anomaly_score(detection, row_idx)
234
+
235
+ # Generate summary text
236
+ summary = self._generate_summary(top_contributions, anomaly_score)
237
+
238
+ explanations.append({
239
+ "row_index": row_idx,
240
+ "anomaly_score": anomaly_score,
241
+ "feature_contributions": top_contributions,
242
+ "total_shap": float(np.sum(row_shap)),
243
+ "summary": summary,
244
+ })
245
+
246
+ return {
247
+ "detection_id": detection.id,
248
+ "algorithm": detection.algorithm,
249
+ "row_indices": valid_indices,
250
+ "feature_names": feature_names,
251
+ "explanations": explanations,
252
+ "generated_at": datetime.utcnow().isoformat(),
253
+ }
254
+
255
+ except ImportError:
256
+ # Fallback: generate mock explanations
257
+ return self._generate_mock_explanations(
258
+ detection, row_indices, max_features
259
+ )
260
+
261
+ def _compute_shap_values(
262
+ self,
263
+ X: np.ndarray,
264
+ X_explain: np.ndarray,
265
+ algorithm: str,
266
+ config: dict[str, Any] | None,
267
+ sample_background: int,
268
+ ) -> np.ndarray:
269
+ """Compute SHAP values using the appropriate explainer.
270
+
271
+ Args:
272
+ X: Full feature matrix for background data.
273
+ X_explain: Feature matrix for rows to explain.
274
+ algorithm: Detection algorithm used.
275
+ config: Algorithm configuration.
276
+ sample_background: Number of background samples.
277
+
278
+ Returns:
279
+ Array of SHAP values for each row and feature.
280
+ """
281
+ try:
282
+ import shap
283
+
284
+ # Use TreeExplainer for tree-based models
285
+ if algorithm == "isolation_forest":
286
+ return self._compute_isolation_forest_shap(
287
+ X, X_explain, config, sample_background
288
+ )
289
+
290
+ # Use KernelExplainer as fallback
291
+ return self._compute_kernel_shap(X, X_explain, algorithm, config, sample_background)
292
+
293
+ except ImportError:
294
+ # SHAP not installed, use permutation importance
295
+ return self._compute_permutation_importance(X, X_explain, algorithm, config)
296
+
297
+ def _compute_isolation_forest_shap(
298
+ self,
299
+ X: np.ndarray,
300
+ X_explain: np.ndarray,
301
+ config: dict[str, Any] | None,
302
+ sample_background: int,
303
+ ) -> np.ndarray:
304
+ """Compute SHAP values for Isolation Forest using TreeExplainer."""
305
+ import shap
306
+ from sklearn.ensemble import IsolationForest
307
+
308
+ config = config or {}
309
+
310
+ # Build and train Isolation Forest
311
+ clf = IsolationForest(
312
+ n_estimators=config.get("n_estimators", 100),
313
+ contamination=config.get("contamination", 0.1),
314
+ max_samples=config.get("max_samples", "auto"),
315
+ random_state=config.get("random_state", 42),
316
+ )
317
+ clf.fit(X)
318
+
319
+ # Use TreeExplainer for efficient SHAP calculation
320
+ explainer = shap.TreeExplainer(clf)
321
+ shap_values = explainer.shap_values(X_explain)
322
+
323
+ return np.array(shap_values)
324
+
325
+ def _compute_kernel_shap(
326
+ self,
327
+ X: np.ndarray,
328
+ X_explain: np.ndarray,
329
+ algorithm: str,
330
+ config: dict[str, Any] | None,
331
+ sample_background: int,
332
+ ) -> np.ndarray:
333
+ """Compute SHAP values using KernelExplainer (model-agnostic)."""
334
+ import shap
335
+ from sklearn.preprocessing import StandardScaler
336
+
337
+ config = config or {}
338
+
339
+ # Scale data
340
+ scaler = StandardScaler()
341
+ X_scaled = scaler.fit_transform(X)
342
+ X_explain_scaled = scaler.transform(X_explain)
343
+
344
+ # Build model based on algorithm
345
+ model = self._build_model(algorithm, config)
346
+ model.fit(X_scaled)
347
+
348
+ # Get prediction function
349
+ if hasattr(model, "score_samples"):
350
+ predict_fn = lambda x: -model.score_samples(x)
351
+ elif hasattr(model, "decision_function"):
352
+ predict_fn = lambda x: -model.decision_function(x)
353
+ else:
354
+ predict_fn = lambda x: model.fit_predict(x).astype(float)
355
+
356
+ # Sample background data
357
+ background_size = min(sample_background, len(X_scaled))
358
+ background_indices = np.random.choice(
359
+ len(X_scaled), background_size, replace=False
360
+ )
361
+ background = X_scaled[background_indices]
362
+
363
+ # Create KernelExplainer
364
+ explainer = shap.KernelExplainer(predict_fn, background)
365
+
366
+ # Compute SHAP values
367
+ shap_values = explainer.shap_values(X_explain_scaled, nsamples=100)
368
+
369
+ return np.array(shap_values)
370
+
371
+ def _build_model(self, algorithm: str, config: dict[str, Any]) -> Any:
372
+ """Build the appropriate sklearn model for the algorithm."""
373
+ if algorithm == "isolation_forest":
374
+ from sklearn.ensemble import IsolationForest
375
+ return IsolationForest(
376
+ n_estimators=config.get("n_estimators", 100),
377
+ contamination=config.get("contamination", 0.1),
378
+ random_state=config.get("random_state", 42),
379
+ )
380
+
381
+ elif algorithm == "lof":
382
+ from sklearn.neighbors import LocalOutlierFactor
383
+ return LocalOutlierFactor(
384
+ n_neighbors=config.get("n_neighbors", 20),
385
+ contamination=config.get("contamination", 0.1),
386
+ novelty=False,
387
+ )
388
+
389
+ elif algorithm == "one_class_svm":
390
+ from sklearn.svm import OneClassSVM
391
+ return OneClassSVM(
392
+ kernel=config.get("kernel", "rbf"),
393
+ nu=config.get("nu", 0.1),
394
+ gamma=config.get("gamma", "scale"),
395
+ )
396
+
397
+ elif algorithm == "dbscan":
398
+ from sklearn.cluster import DBSCAN
399
+ return DBSCAN(
400
+ eps=config.get("eps", 0.5),
401
+ min_samples=config.get("min_samples", 5),
402
+ )
403
+
404
+ else:
405
+ # Default to Isolation Forest
406
+ from sklearn.ensemble import IsolationForest
407
+ return IsolationForest(random_state=42)
408
+
409
+ def _compute_permutation_importance(
410
+ self,
411
+ X: np.ndarray,
412
+ X_explain: np.ndarray,
413
+ algorithm: str,
414
+ config: dict[str, Any] | None,
415
+ ) -> np.ndarray:
416
+ """Fallback: compute approximate feature importance via permutation."""
417
+ config = config or {}
418
+
419
+ # Build and train model
420
+ model = self._build_model(algorithm, config)
421
+
422
+ from sklearn.preprocessing import StandardScaler
423
+ scaler = StandardScaler()
424
+ X_scaled = scaler.fit_transform(X)
425
+ X_explain_scaled = scaler.transform(X_explain)
426
+
427
+ model.fit(X_scaled)
428
+
429
+ # Get base predictions/scores
430
+ if hasattr(model, "score_samples"):
431
+ base_scores = -model.score_samples(X_explain_scaled)
432
+ elif hasattr(model, "decision_function"):
433
+ base_scores = -model.decision_function(X_explain_scaled)
434
+ else:
435
+ base_scores = np.zeros(len(X_explain_scaled))
436
+
437
+ # Compute permutation importance for each feature
438
+ n_features = X_explain_scaled.shape[1]
439
+ importances = np.zeros((len(X_explain_scaled), n_features))
440
+
441
+ for j in range(n_features):
442
+ X_permuted = X_explain_scaled.copy()
443
+ # Permute column j
444
+ X_permuted[:, j] = np.random.permutation(X_permuted[:, j])
445
+
446
+ if hasattr(model, "score_samples"):
447
+ permuted_scores = -model.score_samples(X_permuted)
448
+ elif hasattr(model, "decision_function"):
449
+ permuted_scores = -model.decision_function(X_permuted)
450
+ else:
451
+ permuted_scores = np.zeros(len(X_permuted))
452
+
453
+ # Importance is the change in score
454
+ importances[:, j] = permuted_scores - base_scores
455
+
456
+ return importances
457
+
458
+ def _get_anomaly_score(
459
+ self,
460
+ detection: AnomalyDetection,
461
+ row_index: int,
462
+ ) -> float:
463
+ """Get the anomaly score for a specific row from detection results."""
464
+ if detection.result_json and "anomalies" in detection.result_json:
465
+ for anomaly in detection.result_json["anomalies"]:
466
+ if anomaly.get("row_index") == row_index:
467
+ return anomaly.get("anomaly_score", 0.0)
468
+ return 0.0
469
+
470
+ def _generate_summary(
471
+ self,
472
+ contributions: list[dict[str, Any]],
473
+ anomaly_score: float,
474
+ ) -> str:
475
+ """Generate human-readable summary of why a row is anomalous.
476
+
477
+ Args:
478
+ contributions: Feature contributions sorted by importance.
479
+ anomaly_score: Overall anomaly score for the row.
480
+
481
+ Returns:
482
+ Human-readable summary string.
483
+ """
484
+ if not contributions:
485
+ return "No significant features identified."
486
+
487
+ # Classify anomaly severity
488
+ if anomaly_score >= 0.9:
489
+ severity = "highly anomalous"
490
+ elif anomaly_score >= 0.7:
491
+ severity = "moderately anomalous"
492
+ elif anomaly_score >= 0.5:
493
+ severity = "slightly anomalous"
494
+ else:
495
+ severity = "borderline anomalous"
496
+
497
+ # Get top contributing features
498
+ top_features = contributions[:3]
499
+ feature_descriptions = []
500
+
501
+ for feat in top_features:
502
+ name = feat["feature"]
503
+ value = feat["value"]
504
+ shap_val = feat["shap_value"]
505
+
506
+ # Describe contribution direction
507
+ direction = "unusually high" if shap_val > 0 else "unusually low"
508
+ feature_descriptions.append(
509
+ f"{name} ({value:.2f}) is {direction}"
510
+ )
511
+
512
+ if len(feature_descriptions) == 1:
513
+ features_text = feature_descriptions[0]
514
+ elif len(feature_descriptions) == 2:
515
+ features_text = " and ".join(feature_descriptions)
516
+ else:
517
+ features_text = (
518
+ ", ".join(feature_descriptions[:-1])
519
+ + f", and {feature_descriptions[-1]}"
520
+ )
521
+
522
+ return f"This row is {severity} (score: {anomaly_score:.3f}). The main contributing factors are: {features_text}."
523
+
524
+ def _generate_mock_explanations(
525
+ self,
526
+ detection: AnomalyDetection,
527
+ row_indices: list[int],
528
+ max_features: int,
529
+ ) -> dict[str, Any]:
530
+ """Generate mock explanations when SHAP/sklearn is not available."""
531
+ import random
532
+
533
+ columns = detection.columns_analyzed or ["feature_1", "feature_2", "feature_3"]
534
+
535
+ explanations = []
536
+ for row_idx in row_indices:
537
+ anomaly_score = self._get_anomaly_score(detection, row_idx)
538
+ if anomaly_score == 0:
539
+ anomaly_score = random.uniform(0.5, 1.0)
540
+
541
+ contributions = []
542
+ for col in columns[:max_features]:
543
+ shap_val = random.uniform(-1.0, 1.0)
544
+ contributions.append({
545
+ "feature": col,
546
+ "value": random.uniform(-100, 100),
547
+ "shap_value": shap_val,
548
+ "contribution": abs(shap_val),
549
+ })
550
+
551
+ contributions.sort(key=lambda x: x["contribution"], reverse=True)
552
+ summary = self._generate_summary(contributions[:3], anomaly_score)
553
+
554
+ explanations.append({
555
+ "row_index": row_idx,
556
+ "anomaly_score": anomaly_score,
557
+ "feature_contributions": contributions,
558
+ "total_shap": sum(c["shap_value"] for c in contributions),
559
+ "summary": summary,
560
+ })
561
+
562
+ return {
563
+ "detection_id": detection.id,
564
+ "algorithm": detection.algorithm,
565
+ "row_indices": row_indices,
566
+ "feature_names": columns,
567
+ "explanations": explanations,
568
+ "generated_at": datetime.utcnow().isoformat(),
569
+ "mock": True,
570
+ }
571
+
572
+ async def _save_explanations(
573
+ self,
574
+ detection_id: str,
575
+ row_indices: list[int],
576
+ explanations_data: dict[str, Any],
577
+ ) -> None:
578
+ """Save explanations to database for caching."""
579
+ for explanation in explanations_data.get("explanations", []):
580
+ row_idx = explanation["row_index"]
581
+
582
+ # Check if explanation already exists
583
+ existing = await self.session.execute(
584
+ select(AnomalyExplanation).where(
585
+ AnomalyExplanation.detection_id == detection_id,
586
+ AnomalyExplanation.row_index == row_idx,
587
+ )
588
+ )
589
+ existing_exp = existing.scalar_one_or_none()
590
+
591
+ if existing_exp:
592
+ # Update existing
593
+ existing_exp.anomaly_score = explanation["anomaly_score"]
594
+ existing_exp.feature_contributions = explanation["feature_contributions"]
595
+ existing_exp.total_shap = explanation["total_shap"]
596
+ existing_exp.summary = explanation["summary"]
597
+ existing_exp.generated_at = datetime.utcnow()
598
+ else:
599
+ # Create new
600
+ new_explanation = AnomalyExplanation(
601
+ detection_id=detection_id,
602
+ row_index=row_idx,
603
+ anomaly_score=explanation["anomaly_score"],
604
+ feature_contributions=explanation["feature_contributions"],
605
+ total_shap=explanation["total_shap"],
606
+ summary=explanation["summary"],
607
+ )
608
+ self.session.add(new_explanation)
609
+
610
+ await self.session.flush()
611
+
612
+ def _explanation_to_dict(self, explanation: AnomalyExplanation) -> dict[str, Any]:
613
+ """Convert AnomalyExplanation model to dictionary."""
614
+ return {
615
+ "id": explanation.id,
616
+ "detection_id": explanation.detection_id,
617
+ "row_index": explanation.row_index,
618
+ "anomaly_score": explanation.anomaly_score,
619
+ "feature_contributions": explanation.feature_contributions,
620
+ "total_shap": explanation.total_shap,
621
+ "summary": explanation.summary,
622
+ "generated_at": (
623
+ explanation.generated_at.isoformat()
624
+ if explanation.generated_at
625
+ else None
626
+ ),
627
+ }
628
+
629
+
630
+ # Singleton-style factory for dependency injection
631
+ def get_anomaly_explainer_service(session: AsyncSession) -> AnomalyExplainerService:
632
+ """Factory function to get AnomalyExplainerService instance."""
633
+ return AnomalyExplainerService(session)