truthound-dashboard 1.4.4__py3-none-any.whl → 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. truthound_dashboard/api/alerts.py +75 -86
  2. truthound_dashboard/api/anomaly.py +7 -13
  3. truthound_dashboard/api/cross_alerts.py +38 -52
  4. truthound_dashboard/api/drift.py +49 -59
  5. truthound_dashboard/api/drift_monitor.py +234 -79
  6. truthound_dashboard/api/enterprise_sampling.py +498 -0
  7. truthound_dashboard/api/history.py +57 -5
  8. truthound_dashboard/api/lineage.py +3 -48
  9. truthound_dashboard/api/maintenance.py +104 -49
  10. truthound_dashboard/api/mask.py +1 -2
  11. truthound_dashboard/api/middleware.py +2 -1
  12. truthound_dashboard/api/model_monitoring.py +435 -311
  13. truthound_dashboard/api/notifications.py +227 -191
  14. truthound_dashboard/api/notifications_advanced.py +21 -20
  15. truthound_dashboard/api/observability.py +586 -0
  16. truthound_dashboard/api/plugins.py +2 -433
  17. truthound_dashboard/api/profile.py +199 -37
  18. truthound_dashboard/api/quality_reporter.py +701 -0
  19. truthound_dashboard/api/reports.py +7 -16
  20. truthound_dashboard/api/router.py +66 -0
  21. truthound_dashboard/api/rule_suggestions.py +5 -5
  22. truthound_dashboard/api/scan.py +17 -19
  23. truthound_dashboard/api/schedules.py +85 -50
  24. truthound_dashboard/api/schema_evolution.py +6 -6
  25. truthound_dashboard/api/schema_watcher.py +667 -0
  26. truthound_dashboard/api/sources.py +98 -27
  27. truthound_dashboard/api/tiering.py +1323 -0
  28. truthound_dashboard/api/triggers.py +14 -11
  29. truthound_dashboard/api/validations.py +12 -11
  30. truthound_dashboard/api/versioning.py +1 -6
  31. truthound_dashboard/core/__init__.py +129 -3
  32. truthound_dashboard/core/actions/__init__.py +62 -0
  33. truthound_dashboard/core/actions/custom.py +426 -0
  34. truthound_dashboard/core/actions/notifications.py +910 -0
  35. truthound_dashboard/core/actions/storage.py +472 -0
  36. truthound_dashboard/core/actions/webhook.py +281 -0
  37. truthound_dashboard/core/anomaly.py +262 -67
  38. truthound_dashboard/core/anomaly_explainer.py +4 -3
  39. truthound_dashboard/core/backends/__init__.py +67 -0
  40. truthound_dashboard/core/backends/base.py +299 -0
  41. truthound_dashboard/core/backends/errors.py +191 -0
  42. truthound_dashboard/core/backends/factory.py +423 -0
  43. truthound_dashboard/core/backends/mock_backend.py +451 -0
  44. truthound_dashboard/core/backends/truthound_backend.py +718 -0
  45. truthound_dashboard/core/checkpoint/__init__.py +87 -0
  46. truthound_dashboard/core/checkpoint/adapters.py +814 -0
  47. truthound_dashboard/core/checkpoint/checkpoint.py +491 -0
  48. truthound_dashboard/core/checkpoint/runner.py +270 -0
  49. truthound_dashboard/core/connections.py +645 -23
  50. truthound_dashboard/core/converters/__init__.py +14 -0
  51. truthound_dashboard/core/converters/truthound.py +620 -0
  52. truthound_dashboard/core/cross_alerts.py +540 -320
  53. truthound_dashboard/core/datasource_factory.py +1672 -0
  54. truthound_dashboard/core/drift_monitor.py +216 -20
  55. truthound_dashboard/core/enterprise_sampling.py +1291 -0
  56. truthound_dashboard/core/interfaces/__init__.py +225 -0
  57. truthound_dashboard/core/interfaces/actions.py +652 -0
  58. truthound_dashboard/core/interfaces/base.py +247 -0
  59. truthound_dashboard/core/interfaces/checkpoint.py +676 -0
  60. truthound_dashboard/core/interfaces/protocols.py +664 -0
  61. truthound_dashboard/core/interfaces/reporters.py +650 -0
  62. truthound_dashboard/core/interfaces/routing.py +646 -0
  63. truthound_dashboard/core/interfaces/triggers.py +619 -0
  64. truthound_dashboard/core/lineage.py +407 -71
  65. truthound_dashboard/core/model_monitoring.py +431 -3
  66. truthound_dashboard/core/notifications/base.py +4 -0
  67. truthound_dashboard/core/notifications/channels.py +501 -1203
  68. truthound_dashboard/core/notifications/deduplication/__init__.py +81 -115
  69. truthound_dashboard/core/notifications/deduplication/service.py +131 -348
  70. truthound_dashboard/core/notifications/dispatcher.py +202 -11
  71. truthound_dashboard/core/notifications/escalation/__init__.py +119 -106
  72. truthound_dashboard/core/notifications/escalation/engine.py +168 -358
  73. truthound_dashboard/core/notifications/routing/__init__.py +88 -128
  74. truthound_dashboard/core/notifications/routing/engine.py +90 -317
  75. truthound_dashboard/core/notifications/stats_aggregator.py +246 -1
  76. truthound_dashboard/core/notifications/throttling/__init__.py +67 -50
  77. truthound_dashboard/core/notifications/throttling/builder.py +117 -255
  78. truthound_dashboard/core/notifications/truthound_adapter.py +842 -0
  79. truthound_dashboard/core/phase5/collaboration.py +1 -1
  80. truthound_dashboard/core/plugins/lifecycle/__init__.py +0 -13
  81. truthound_dashboard/core/quality_reporter.py +1359 -0
  82. truthound_dashboard/core/report_history.py +0 -6
  83. truthound_dashboard/core/reporters/__init__.py +175 -14
  84. truthound_dashboard/core/reporters/adapters.py +943 -0
  85. truthound_dashboard/core/reporters/base.py +0 -3
  86. truthound_dashboard/core/reporters/builtin/__init__.py +18 -0
  87. truthound_dashboard/core/reporters/builtin/csv_reporter.py +111 -0
  88. truthound_dashboard/core/reporters/builtin/html_reporter.py +270 -0
  89. truthound_dashboard/core/reporters/builtin/json_reporter.py +127 -0
  90. truthound_dashboard/core/reporters/compat.py +266 -0
  91. truthound_dashboard/core/reporters/csv_reporter.py +2 -35
  92. truthound_dashboard/core/reporters/factory.py +526 -0
  93. truthound_dashboard/core/reporters/interfaces.py +745 -0
  94. truthound_dashboard/core/reporters/registry.py +1 -10
  95. truthound_dashboard/core/scheduler.py +165 -0
  96. truthound_dashboard/core/schema_evolution.py +3 -3
  97. truthound_dashboard/core/schema_watcher.py +1528 -0
  98. truthound_dashboard/core/services.py +595 -76
  99. truthound_dashboard/core/store_manager.py +810 -0
  100. truthound_dashboard/core/streaming_anomaly.py +169 -4
  101. truthound_dashboard/core/tiering.py +1309 -0
  102. truthound_dashboard/core/triggers/evaluators.py +178 -8
  103. truthound_dashboard/core/truthound_adapter.py +2620 -197
  104. truthound_dashboard/core/unified_alerts.py +23 -20
  105. truthound_dashboard/db/__init__.py +8 -0
  106. truthound_dashboard/db/database.py +8 -2
  107. truthound_dashboard/db/models.py +944 -25
  108. truthound_dashboard/db/repository.py +2 -0
  109. truthound_dashboard/main.py +15 -0
  110. truthound_dashboard/schemas/__init__.py +177 -16
  111. truthound_dashboard/schemas/base.py +44 -23
  112. truthound_dashboard/schemas/collaboration.py +19 -6
  113. truthound_dashboard/schemas/cross_alerts.py +19 -3
  114. truthound_dashboard/schemas/drift.py +61 -55
  115. truthound_dashboard/schemas/drift_monitor.py +67 -23
  116. truthound_dashboard/schemas/enterprise_sampling.py +653 -0
  117. truthound_dashboard/schemas/lineage.py +0 -33
  118. truthound_dashboard/schemas/mask.py +10 -8
  119. truthound_dashboard/schemas/model_monitoring.py +89 -10
  120. truthound_dashboard/schemas/notifications_advanced.py +13 -0
  121. truthound_dashboard/schemas/observability.py +453 -0
  122. truthound_dashboard/schemas/plugins.py +0 -280
  123. truthound_dashboard/schemas/profile.py +154 -247
  124. truthound_dashboard/schemas/quality_reporter.py +403 -0
  125. truthound_dashboard/schemas/reports.py +2 -2
  126. truthound_dashboard/schemas/rule_suggestion.py +8 -1
  127. truthound_dashboard/schemas/scan.py +4 -24
  128. truthound_dashboard/schemas/schedule.py +11 -3
  129. truthound_dashboard/schemas/schema_watcher.py +727 -0
  130. truthound_dashboard/schemas/source.py +17 -2
  131. truthound_dashboard/schemas/tiering.py +822 -0
  132. truthound_dashboard/schemas/triggers.py +16 -0
  133. truthound_dashboard/schemas/unified_alerts.py +7 -0
  134. truthound_dashboard/schemas/validation.py +0 -13
  135. truthound_dashboard/schemas/validators/base.py +41 -21
  136. truthound_dashboard/schemas/validators/business_rule_validators.py +244 -0
  137. truthound_dashboard/schemas/validators/localization_validators.py +273 -0
  138. truthound_dashboard/schemas/validators/ml_feature_validators.py +308 -0
  139. truthound_dashboard/schemas/validators/profiling_validators.py +275 -0
  140. truthound_dashboard/schemas/validators/referential_validators.py +312 -0
  141. truthound_dashboard/schemas/validators/registry.py +93 -8
  142. truthound_dashboard/schemas/validators/timeseries_validators.py +389 -0
  143. truthound_dashboard/schemas/versioning.py +1 -6
  144. truthound_dashboard/static/index.html +2 -2
  145. truthound_dashboard-1.5.1.dist-info/METADATA +312 -0
  146. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/RECORD +149 -148
  147. truthound_dashboard/core/plugins/hooks/__init__.py +0 -63
  148. truthound_dashboard/core/plugins/hooks/decorators.py +0 -367
  149. truthound_dashboard/core/plugins/hooks/manager.py +0 -403
  150. truthound_dashboard/core/plugins/hooks/protocols.py +0 -265
  151. truthound_dashboard/core/plugins/lifecycle/hot_reload.py +0 -584
  152. truthound_dashboard/core/reporters/junit_reporter.py +0 -233
  153. truthound_dashboard/core/reporters/markdown_reporter.py +0 -207
  154. truthound_dashboard/core/reporters/pdf_reporter.py +0 -209
  155. truthound_dashboard/static/assets/_baseUniq-BcrSP13d.js +0 -1
  156. truthound_dashboard/static/assets/arc-DlYjKwIL.js +0 -1
  157. truthound_dashboard/static/assets/architectureDiagram-VXUJARFQ-Bb2drbQM.js +0 -36
  158. truthound_dashboard/static/assets/blockDiagram-VD42YOAC-BlsPG1CH.js +0 -122
  159. truthound_dashboard/static/assets/c4Diagram-YG6GDRKO-B9JdUoaC.js +0 -10
  160. truthound_dashboard/static/assets/channel-Q6mHF1Hd.js +0 -1
  161. truthound_dashboard/static/assets/chunk-4BX2VUAB-DmyoPVuJ.js +0 -1
  162. truthound_dashboard/static/assets/chunk-55IACEB6-Bcz6Siv8.js +0 -1
  163. truthound_dashboard/static/assets/chunk-B4BG7PRW-Br3G5Rum.js +0 -165
  164. truthound_dashboard/static/assets/chunk-DI55MBZ5-DuM9c23u.js +0 -220
  165. truthound_dashboard/static/assets/chunk-FMBD7UC4-DNU-5mvT.js +0 -15
  166. truthound_dashboard/static/assets/chunk-QN33PNHL-Im2yNcmS.js +0 -1
  167. truthound_dashboard/static/assets/chunk-QZHKN3VN-kZr8XFm1.js +0 -1
  168. truthound_dashboard/static/assets/chunk-TZMSLE5B-Q__360q_.js +0 -1
  169. truthound_dashboard/static/assets/classDiagram-2ON5EDUG-vtixxUyK.js +0 -1
  170. truthound_dashboard/static/assets/classDiagram-v2-WZHVMYZB-vtixxUyK.js +0 -1
  171. truthound_dashboard/static/assets/clone-BOt2LwD0.js +0 -1
  172. truthound_dashboard/static/assets/cose-bilkent-S5V4N54A-CBDw6iac.js +0 -1
  173. truthound_dashboard/static/assets/dagre-6UL2VRFP-XdKqmmY9.js +0 -4
  174. truthound_dashboard/static/assets/diagram-PSM6KHXK-DAZ8nx9V.js +0 -24
  175. truthound_dashboard/static/assets/diagram-QEK2KX5R-BRvDTbGD.js +0 -43
  176. truthound_dashboard/static/assets/diagram-S2PKOQOG-bQcczUkl.js +0 -24
  177. truthound_dashboard/static/assets/erDiagram-Q2GNP2WA-DPje7VMN.js +0 -60
  178. truthound_dashboard/static/assets/flowDiagram-NV44I4VS-B7BVtFVS.js +0 -162
  179. truthound_dashboard/static/assets/ganttDiagram-JELNMOA3-D6WKSS7U.js +0 -267
  180. truthound_dashboard/static/assets/gitGraphDiagram-NY62KEGX-D3vtVd3y.js +0 -65
  181. truthound_dashboard/static/assets/graph-BKgNKZVp.js +0 -1
  182. truthound_dashboard/static/assets/index-C6JSrkHo.css +0 -1
  183. truthound_dashboard/static/assets/index-DkU82VsU.js +0 -1800
  184. truthound_dashboard/static/assets/infoDiagram-WHAUD3N6-DnNCT429.js +0 -2
  185. truthound_dashboard/static/assets/journeyDiagram-XKPGCS4Q-DGiMozqS.js +0 -139
  186. truthound_dashboard/static/assets/kanban-definition-3W4ZIXB7-BV2gUgli.js +0 -89
  187. truthound_dashboard/static/assets/katex-Cu_Erd72.js +0 -261
  188. truthound_dashboard/static/assets/layout-DI2MfQ5G.js +0 -1
  189. truthound_dashboard/static/assets/min-DYdgXVcT.js +0 -1
  190. truthound_dashboard/static/assets/mindmap-definition-VGOIOE7T-C7x4ruxz.js +0 -68
  191. truthound_dashboard/static/assets/pieDiagram-ADFJNKIX-CAJaAB9f.js +0 -30
  192. truthound_dashboard/static/assets/quadrantDiagram-AYHSOK5B-DeqwDI46.js +0 -7
  193. truthound_dashboard/static/assets/requirementDiagram-UZGBJVZJ-e3XDpZIM.js +0 -64
  194. truthound_dashboard/static/assets/sankeyDiagram-TZEHDZUN-CNnAv5Ux.js +0 -10
  195. truthound_dashboard/static/assets/sequenceDiagram-WL72ISMW-Dsne-Of3.js +0 -145
  196. truthound_dashboard/static/assets/stateDiagram-FKZM4ZOC-Ee0sQXyb.js +0 -1
  197. truthound_dashboard/static/assets/stateDiagram-v2-4FDKWEC3-B26KqW_W.js +0 -1
  198. truthound_dashboard/static/assets/timeline-definition-IT6M3QCI-DZYi2yl3.js +0 -61
  199. truthound_dashboard/static/assets/treemap-KMMF4GRG-CY3f8In2.js +0 -128
  200. truthound_dashboard/static/assets/unmerged_dictionaries-Dd7xcPWG.js +0 -1
  201. truthound_dashboard/static/assets/xychartDiagram-PRI3JC2R-CS7fydZZ.js +0 -7
  202. truthound_dashboard-1.4.4.dist-info/METADATA +0 -507
  203. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/WHEEL +0 -0
  204. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/entry_points.txt +0 -0
  205. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/licenses/LICENSE +0 -0
@@ -248,10 +248,12 @@ class AnomalyDetectionService:
248
248
  Detection results dictionary.
249
249
  """
250
250
  try:
251
- import truthound as th
251
+ from truthound.datasources import get_datasource
252
252
 
253
- # Load data from source
254
- df = th.read(source.config)
253
+ # Load data from source using truthound datasources factory
254
+ # The source.config contains the path or connection info
255
+ datasource = get_datasource(source.config.get("path", source.config))
256
+ df = datasource.to_polars_lazyframe().collect()
255
257
 
256
258
  # Get columns to analyze
257
259
  columns = None
@@ -311,7 +313,9 @@ class AnomalyDetectionService:
311
313
  sample_size: int | None,
312
314
  params: dict[str, Any],
313
315
  ) -> dict[str, Any]:
314
- """Run the specified anomaly detection algorithm.
316
+ """Run the specified anomaly detection algorithm using truthound.ml.
317
+
318
+ Uses truthound.ml.anomaly_models when available, falls back to sklearn.
315
319
 
316
320
  Args:
317
321
  df: DataFrame to analyze.
@@ -360,6 +364,8 @@ class AnomalyDetectionService:
360
364
  result = self._run_statistical(df_analyze, params)
361
365
  elif algorithm == "autoencoder":
362
366
  result = self._run_autoencoder(df_analyze, params)
367
+ elif algorithm == "ensemble":
368
+ result = self._run_ensemble(df_analyze, params)
363
369
  else:
364
370
  raise ValueError(f"Unknown algorithm: {algorithm}")
365
371
 
@@ -412,32 +418,67 @@ class AnomalyDetectionService:
412
418
  df: Any,
413
419
  params: dict[str, Any],
414
420
  ) -> dict[str, Any]:
415
- """Run Isolation Forest algorithm."""
416
- from sklearn.ensemble import IsolationForest
421
+ """Run Isolation Forest algorithm using truthound.ml."""
417
422
  import numpy as np
418
423
 
419
424
  # Get parameters with defaults
420
425
  n_estimators = params.get("n_estimators", 100)
421
426
  contamination = params.get("contamination", 0.1)
422
- max_samples = params.get("max_samples", "auto")
427
+ max_samples = params.get("max_samples", 256)
423
428
  random_state = params.get("random_state", 42)
424
429
 
425
430
  # Handle NaN values
426
431
  df_clean = df.fillna(df.mean())
427
432
 
428
- clf = IsolationForest(
429
- n_estimators=n_estimators,
430
- contamination=contamination,
431
- max_samples=max_samples,
432
- random_state=random_state,
433
- )
434
- predictions = clf.fit_predict(df_clean)
435
- scores = -clf.score_samples(df_clean) # Higher = more anomalous
433
+ try:
434
+ from truthound.ml.anomaly_models.isolation_forest import (
435
+ IsolationForestDetector,
436
+ IsolationForestConfig,
437
+ )
438
+ import polars as pl
436
439
 
437
- return {
438
- "is_anomaly": predictions == -1,
439
- "scores": scores,
440
- }
440
+ # Create truthound detector
441
+ config = IsolationForestConfig(
442
+ n_estimators=n_estimators,
443
+ max_samples=max_samples if isinstance(max_samples, int) else 256,
444
+ columns=list(df_clean.columns),
445
+ )
446
+
447
+ detector = IsolationForestDetector(config)
448
+
449
+ # Convert to Polars for truthound
450
+ pl_df = pl.from_pandas(df_clean).lazy()
451
+ detector.fit(pl_df)
452
+
453
+ # Get predictions
454
+ result = detector.predict(pl_df)
455
+
456
+ # Extract scores and anomaly flags
457
+ is_anomaly = np.array([score.is_anomaly for score in result])
458
+ scores = np.array([score.score for score in result])
459
+
460
+ return {
461
+ "is_anomaly": is_anomaly,
462
+ "scores": scores,
463
+ }
464
+
465
+ except ImportError:
466
+ # Fallback to sklearn
467
+ from sklearn.ensemble import IsolationForest
468
+
469
+ clf = IsolationForest(
470
+ n_estimators=n_estimators,
471
+ contamination=contamination,
472
+ max_samples=max_samples,
473
+ random_state=random_state,
474
+ )
475
+ predictions = clf.fit_predict(df_clean)
476
+ scores = -clf.score_samples(df_clean) # Higher = more anomalous
477
+
478
+ return {
479
+ "is_anomaly": predictions == -1,
480
+ "scores": scores,
481
+ }
441
482
 
442
483
  def _run_lof(
443
484
  self,
@@ -446,6 +487,7 @@ class AnomalyDetectionService:
446
487
  ) -> dict[str, Any]:
447
488
  """Run Local Outlier Factor algorithm."""
448
489
  from sklearn.neighbors import LocalOutlierFactor
490
+ from sklearn.preprocessing import StandardScaler
449
491
  import numpy as np
450
492
 
451
493
  n_neighbors = params.get("n_neighbors", 20)
@@ -453,7 +495,6 @@ class AnomalyDetectionService:
453
495
  algorithm = params.get("algorithm", "auto")
454
496
 
455
497
  # Handle NaN values and scale
456
- from sklearn.preprocessing import StandardScaler
457
498
  df_clean = df.fillna(df.mean())
458
499
  scaler = StandardScaler()
459
500
  df_scaled = scaler.fit_transform(df_clean)
@@ -512,6 +553,7 @@ class AnomalyDetectionService:
512
553
  """Run DBSCAN algorithm."""
513
554
  from sklearn.cluster import DBSCAN
514
555
  from sklearn.preprocessing import StandardScaler
556
+ from sklearn.metrics import pairwise_distances
515
557
  import numpy as np
516
558
 
517
559
  eps = params.get("eps", 0.5)
@@ -534,15 +576,14 @@ class AnomalyDetectionService:
534
576
  is_anomaly = labels == -1
535
577
 
536
578
  # Calculate distance-based scores (distance to nearest cluster centroid)
537
- from sklearn.metrics import pairwise_distances
538
579
  scores = np.zeros(len(df_scaled))
539
580
  if not is_anomaly.all():
540
581
  # Get centroids of each cluster
541
582
  unique_labels = set(labels) - {-1}
542
583
  if unique_labels:
543
584
  centroids = np.array([
544
- df_scaled[labels == l].mean(axis=0)
545
- for l in unique_labels
585
+ df_scaled[labels == label].mean(axis=0)
586
+ for label in unique_labels
546
587
  ])
547
588
  distances = pairwise_distances(df_scaled, centroids, metric=metric)
548
589
  scores = distances.min(axis=1)
@@ -557,7 +598,7 @@ class AnomalyDetectionService:
557
598
  df: Any,
558
599
  params: dict[str, Any],
559
600
  ) -> dict[str, Any]:
560
- """Run statistical anomaly detection."""
601
+ """Run statistical anomaly detection using truthound.ml."""
561
602
  import numpy as np
562
603
 
563
604
  method = params.get("method", "zscore")
@@ -566,49 +607,201 @@ class AnomalyDetectionService:
566
607
  # Handle NaN values
567
608
  df_clean = df.fillna(df.mean())
568
609
 
569
- if method == "zscore":
570
- mean = df_clean.mean()
571
- std = df_clean.std()
572
- z_scores = np.abs((df_clean - mean) / std)
573
- # Take max z-score across all columns for each row
574
- max_z = z_scores.max(axis=1)
575
- is_anomaly = max_z > threshold
576
- scores = max_z.values
577
-
578
- elif method == "iqr":
579
- q1 = df_clean.quantile(0.25)
580
- q3 = df_clean.quantile(0.75)
581
- iqr = q3 - q1
582
- lower = q1 - threshold * iqr
583
- upper = q3 + threshold * iqr
584
- is_outlier = ((df_clean < lower) | (df_clean > upper)).any(axis=1)
585
- is_anomaly = is_outlier.values
586
- # Score based on distance from bounds
587
- scores = np.zeros(len(df_clean))
588
- for col in df_clean.columns:
589
- col_scores = np.maximum(
590
- (lower[col] - df_clean[col]) / iqr[col],
591
- (df_clean[col] - upper[col]) / iqr[col],
592
- )
593
- col_scores = np.maximum(col_scores, 0)
594
- scores = np.maximum(scores, col_scores.values)
595
-
596
- elif method == "mad":
597
- median = df_clean.median()
598
- mad = np.abs(df_clean - median).median()
599
- # Modified z-score using MAD
600
- modified_z = 0.6745 * (df_clean - median) / mad
601
- max_z = np.abs(modified_z).max(axis=1)
602
- is_anomaly = max_z > threshold
603
- scores = max_z.values
610
+ try:
611
+ from truthound.ml.anomaly_models.statistical import (
612
+ StatisticalAnomalyDetector,
613
+ StatisticalConfig,
614
+ )
615
+ import polars as pl
616
+
617
+ # Create truthound detector
618
+ config = StatisticalConfig(
619
+ z_threshold=threshold,
620
+ iqr_multiplier=threshold if method == "iqr" else 1.5,
621
+ use_robust_stats=(method == "mad"),
622
+ per_column=True,
623
+ columns=list(df_clean.columns),
624
+ )
604
625
 
605
- else:
606
- raise ValueError(f"Unknown statistical method: {method}")
626
+ detector = StatisticalAnomalyDetector(config)
607
627
 
608
- return {
609
- "is_anomaly": np.array(is_anomaly),
610
- "scores": np.array(scores),
611
- }
628
+ # Convert to Polars for truthound
629
+ pl_df = pl.from_pandas(df_clean).lazy()
630
+ detector.fit(pl_df)
631
+
632
+ # Get predictions
633
+ result = detector.predict(pl_df)
634
+
635
+ # Extract scores and anomaly flags
636
+ is_anomaly = np.array([score.is_anomaly for score in result])
637
+ scores = np.array([score.score for score in result])
638
+
639
+ return {
640
+ "is_anomaly": is_anomaly,
641
+ "scores": scores,
642
+ }
643
+
644
+ except ImportError:
645
+ # Fallback to manual implementation
646
+ if method == "zscore":
647
+ mean = df_clean.mean()
648
+ std = df_clean.std()
649
+ z_scores = np.abs((df_clean - mean) / std)
650
+ # Take max z-score across all columns for each row
651
+ max_z = z_scores.max(axis=1)
652
+ is_anomaly = max_z > threshold
653
+ scores = max_z.values
654
+
655
+ elif method == "iqr":
656
+ q1 = df_clean.quantile(0.25)
657
+ q3 = df_clean.quantile(0.75)
658
+ iqr = q3 - q1
659
+ lower = q1 - threshold * iqr
660
+ upper = q3 + threshold * iqr
661
+ is_outlier = ((df_clean < lower) | (df_clean > upper)).any(axis=1)
662
+ is_anomaly = is_outlier.values
663
+ # Score based on distance from bounds
664
+ scores = np.zeros(len(df_clean))
665
+ for col in df_clean.columns:
666
+ col_scores = np.maximum(
667
+ (lower[col] - df_clean[col]) / iqr[col],
668
+ (df_clean[col] - upper[col]) / iqr[col],
669
+ )
670
+ col_scores = np.maximum(col_scores, 0)
671
+ scores = np.maximum(scores, col_scores.values)
672
+
673
+ elif method == "mad":
674
+ median = df_clean.median()
675
+ mad = np.abs(df_clean - median).median()
676
+ # Modified z-score using MAD
677
+ modified_z = 0.6745 * (df_clean - median) / mad
678
+ max_z = np.abs(modified_z).max(axis=1)
679
+ is_anomaly = max_z > threshold
680
+ scores = max_z.values
681
+
682
+ else:
683
+ raise ValueError(f"Unknown statistical method: {method}")
684
+
685
+ return {
686
+ "is_anomaly": np.array(is_anomaly),
687
+ "scores": np.array(scores),
688
+ }
689
+
690
+ def _run_ensemble(
691
+ self,
692
+ df: Any,
693
+ params: dict[str, Any],
694
+ ) -> dict[str, Any]:
695
+ """Run ensemble anomaly detection using truthound.ml."""
696
+ import numpy as np
697
+
698
+ strategy = params.get("strategy", "weighted_average")
699
+ weights = params.get("weights", [0.3, 0.3, 0.4])
700
+ vote_threshold = params.get("vote_threshold", 0.5)
701
+
702
+ # Handle NaN values
703
+ df_clean = df.fillna(df.mean())
704
+
705
+ try:
706
+ from truthound.ml.anomaly_models.ensemble import (
707
+ EnsembleAnomalyDetector,
708
+ EnsembleConfig,
709
+ EnsembleStrategy,
710
+ )
711
+ from truthound.ml.anomaly_models.statistical import (
712
+ StatisticalAnomalyDetector,
713
+ StatisticalConfig,
714
+ )
715
+ from truthound.ml.anomaly_models.isolation_forest import (
716
+ IsolationForestDetector,
717
+ IsolationForestConfig,
718
+ )
719
+ import polars as pl
720
+
721
+ # Map strategy string to enum
722
+ strategy_map = {
723
+ "average": EnsembleStrategy.AVERAGE,
724
+ "weighted_average": EnsembleStrategy.WEIGHTED_AVERAGE,
725
+ "max": EnsembleStrategy.MAX,
726
+ "min": EnsembleStrategy.MIN,
727
+ "vote": EnsembleStrategy.VOTE,
728
+ "unanimous": EnsembleStrategy.UNANIMOUS,
729
+ }
730
+
731
+ # Create ensemble config
732
+ config = EnsembleConfig(
733
+ strategy=strategy_map.get(strategy, EnsembleStrategy.WEIGHTED_AVERAGE),
734
+ weights=weights,
735
+ vote_threshold=vote_threshold,
736
+ )
737
+
738
+ ensemble = EnsembleAnomalyDetector(config)
739
+
740
+ # Add detectors
741
+ columns = list(df_clean.columns)
742
+
743
+ # Z-Score detector
744
+ zscore_config = StatisticalConfig(z_threshold=3.0, columns=columns)
745
+ ensemble.add_detector(StatisticalAnomalyDetector(zscore_config), weight=weights[0] if len(weights) > 0 else 0.33)
746
+
747
+ # IQR detector
748
+ iqr_config = StatisticalConfig(iqr_multiplier=1.5, columns=columns)
749
+ ensemble.add_detector(StatisticalAnomalyDetector(iqr_config), weight=weights[1] if len(weights) > 1 else 0.33)
750
+
751
+ # Isolation Forest detector
752
+ if_config = IsolationForestConfig(n_estimators=100, columns=columns)
753
+ ensemble.add_detector(IsolationForestDetector(if_config), weight=weights[2] if len(weights) > 2 else 0.34)
754
+
755
+ # Convert to Polars for truthound
756
+ pl_df = pl.from_pandas(df_clean).lazy()
757
+ ensemble.fit(pl_df)
758
+
759
+ # Get predictions
760
+ result = ensemble.predict(pl_df)
761
+
762
+ # Extract scores and anomaly flags
763
+ is_anomaly = np.array([score.is_anomaly for score in result])
764
+ scores = np.array([score.score for score in result])
765
+
766
+ return {
767
+ "is_anomaly": is_anomaly,
768
+ "scores": scores,
769
+ }
770
+
771
+ except ImportError:
772
+ # Fallback: run individual algorithms and combine
773
+ results = []
774
+
775
+ # Run zscore
776
+ zscore_result = self._run_statistical(df, {"method": "zscore", "threshold": 3.0})
777
+ results.append(zscore_result)
778
+
779
+ # Run IQR
780
+ iqr_result = self._run_statistical(df, {"method": "iqr", "threshold": 1.5})
781
+ results.append(iqr_result)
782
+
783
+ # Run isolation forest
784
+ if_result = self._run_isolation_forest(df, {"n_estimators": 100})
785
+ results.append(if_result)
786
+
787
+ # Combine using weighted average
788
+ combined_scores = np.zeros(len(df_clean))
789
+ for i, result in enumerate(results):
790
+ weight = weights[i] if i < len(weights) else 1.0 / len(results)
791
+ combined_scores += weight * result["scores"]
792
+
793
+ # Normalize scores
794
+ if combined_scores.max() > 0:
795
+ combined_scores = combined_scores / combined_scores.max()
796
+
797
+ # Determine anomalies based on threshold (mean + 2*std)
798
+ threshold = combined_scores.mean() + 2 * combined_scores.std()
799
+ is_anomaly = combined_scores > threshold
800
+
801
+ return {
802
+ "is_anomaly": is_anomaly,
803
+ "scores": combined_scores,
804
+ }
612
805
 
613
806
  def _run_autoencoder(
614
807
  self,
@@ -1146,11 +1339,13 @@ class AnomalyDetectionService:
1146
1339
 
1147
1340
  # Load data once
1148
1341
  try:
1149
- import truthound as th
1342
+ from truthound.datasources import get_datasource
1150
1343
  import numpy as np
1151
1344
  import pandas as pd
1152
1345
 
1153
- df = th.read(source.config)
1346
+ # Load data using truthound datasources factory
1347
+ datasource = get_datasource(source.config.get("path", source.config))
1348
+ df = datasource.to_polars_lazyframe().collect().to_pandas()
1154
1349
 
1155
1350
  # Sample if needed
1156
1351
  if sample_size and len(df) > sample_size:
@@ -159,10 +159,11 @@ class AnomalyExplainerService:
159
159
  algorithm used for detection.
160
160
  """
161
161
  try:
162
- import truthound as th
162
+ from truthound.datasources import get_datasource
163
163
 
164
- # Load data
165
- df = th.read(source.config)
164
+ # Load data using truthound datasources factory
165
+ datasource = get_datasource(source.config.get("path", source.config))
166
+ df = datasource.to_polars_lazyframe().collect().to_pandas()
166
167
 
167
168
  # Get columns that were analyzed
168
169
  columns = detection.columns_analyzed or list(
@@ -0,0 +1,67 @@
1
+ """Data quality backend implementations.
2
+
3
+ This module provides backend implementations for data quality operations.
4
+ The backends abstract away the specific library (truthound) and provide
5
+ a unified interface for the dashboard services.
6
+
7
+ Architecture:
8
+ BackendFactory
9
+
10
+ BaseDataQualityBackend (ABC)
11
+
12
+ ┌─────────────────────────────┐
13
+ │ TruthoundBackend │ MockBackend │
14
+ └─────────────────────────────┘
15
+
16
+ Usage:
17
+ from truthound_dashboard.core.backends import BackendFactory
18
+
19
+ # Get the default backend (truthound)
20
+ backend = BackendFactory.get_backend()
21
+
22
+ # Check if backend is available
23
+ if backend.is_available():
24
+ result = await backend.check("data.csv")
25
+
26
+ # Use a specific backend
27
+ backend = BackendFactory.get_backend("mock")
28
+ """
29
+
30
+ from .base import BaseDataQualityBackend
31
+ from .errors import (
32
+ BackendError,
33
+ BackendOperationError,
34
+ BackendUnavailableError,
35
+ BackendVersionError,
36
+ )
37
+ from .factory import (
38
+ BackendFactory,
39
+ get_backend,
40
+ reset_backend,
41
+ get_truthound_version,
42
+ get_backend_capabilities,
43
+ get_backend_info,
44
+ )
45
+ from .mock_backend import MockBackend
46
+ from .truthound_backend import TruthoundBackend
47
+
48
+ __all__ = [
49
+ # Base class
50
+ "BaseDataQualityBackend",
51
+ # Backend implementations
52
+ "TruthoundBackend",
53
+ "MockBackend",
54
+ # Factory
55
+ "BackendFactory",
56
+ "get_backend",
57
+ "reset_backend",
58
+ # Capability detection
59
+ "get_truthound_version",
60
+ "get_backend_capabilities",
61
+ "get_backend_info",
62
+ # Errors
63
+ "BackendError",
64
+ "BackendUnavailableError",
65
+ "BackendVersionError",
66
+ "BackendOperationError",
67
+ ]