truthound-dashboard 1.4.4__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound_dashboard/api/alerts.py +75 -86
- truthound_dashboard/api/anomaly.py +7 -13
- truthound_dashboard/api/cross_alerts.py +38 -52
- truthound_dashboard/api/drift.py +49 -59
- truthound_dashboard/api/drift_monitor.py +234 -79
- truthound_dashboard/api/enterprise_sampling.py +498 -0
- truthound_dashboard/api/history.py +57 -5
- truthound_dashboard/api/lineage.py +3 -48
- truthound_dashboard/api/maintenance.py +104 -49
- truthound_dashboard/api/mask.py +1 -2
- truthound_dashboard/api/middleware.py +2 -1
- truthound_dashboard/api/model_monitoring.py +435 -311
- truthound_dashboard/api/notifications.py +227 -191
- truthound_dashboard/api/notifications_advanced.py +21 -20
- truthound_dashboard/api/observability.py +586 -0
- truthound_dashboard/api/plugins.py +2 -433
- truthound_dashboard/api/profile.py +199 -37
- truthound_dashboard/api/quality_reporter.py +701 -0
- truthound_dashboard/api/reports.py +7 -16
- truthound_dashboard/api/router.py +66 -0
- truthound_dashboard/api/rule_suggestions.py +5 -5
- truthound_dashboard/api/scan.py +17 -19
- truthound_dashboard/api/schedules.py +85 -50
- truthound_dashboard/api/schema_evolution.py +6 -6
- truthound_dashboard/api/schema_watcher.py +667 -0
- truthound_dashboard/api/sources.py +98 -27
- truthound_dashboard/api/tiering.py +1323 -0
- truthound_dashboard/api/triggers.py +14 -11
- truthound_dashboard/api/validations.py +12 -11
- truthound_dashboard/api/versioning.py +1 -6
- truthound_dashboard/core/__init__.py +129 -3
- truthound_dashboard/core/actions/__init__.py +62 -0
- truthound_dashboard/core/actions/custom.py +426 -0
- truthound_dashboard/core/actions/notifications.py +910 -0
- truthound_dashboard/core/actions/storage.py +472 -0
- truthound_dashboard/core/actions/webhook.py +281 -0
- truthound_dashboard/core/anomaly.py +262 -67
- truthound_dashboard/core/anomaly_explainer.py +4 -3
- truthound_dashboard/core/backends/__init__.py +67 -0
- truthound_dashboard/core/backends/base.py +299 -0
- truthound_dashboard/core/backends/errors.py +191 -0
- truthound_dashboard/core/backends/factory.py +423 -0
- truthound_dashboard/core/backends/mock_backend.py +451 -0
- truthound_dashboard/core/backends/truthound_backend.py +718 -0
- truthound_dashboard/core/checkpoint/__init__.py +87 -0
- truthound_dashboard/core/checkpoint/adapters.py +814 -0
- truthound_dashboard/core/checkpoint/checkpoint.py +491 -0
- truthound_dashboard/core/checkpoint/runner.py +270 -0
- truthound_dashboard/core/connections.py +437 -10
- truthound_dashboard/core/converters/__init__.py +14 -0
- truthound_dashboard/core/converters/truthound.py +620 -0
- truthound_dashboard/core/cross_alerts.py +540 -320
- truthound_dashboard/core/datasource_factory.py +1672 -0
- truthound_dashboard/core/drift_monitor.py +216 -20
- truthound_dashboard/core/enterprise_sampling.py +1291 -0
- truthound_dashboard/core/interfaces/__init__.py +225 -0
- truthound_dashboard/core/interfaces/actions.py +652 -0
- truthound_dashboard/core/interfaces/base.py +247 -0
- truthound_dashboard/core/interfaces/checkpoint.py +676 -0
- truthound_dashboard/core/interfaces/protocols.py +664 -0
- truthound_dashboard/core/interfaces/reporters.py +650 -0
- truthound_dashboard/core/interfaces/routing.py +646 -0
- truthound_dashboard/core/interfaces/triggers.py +619 -0
- truthound_dashboard/core/lineage.py +407 -71
- truthound_dashboard/core/model_monitoring.py +431 -3
- truthound_dashboard/core/notifications/base.py +4 -0
- truthound_dashboard/core/notifications/channels.py +501 -1203
- truthound_dashboard/core/notifications/deduplication/__init__.py +81 -115
- truthound_dashboard/core/notifications/deduplication/service.py +131 -348
- truthound_dashboard/core/notifications/dispatcher.py +202 -11
- truthound_dashboard/core/notifications/escalation/__init__.py +119 -106
- truthound_dashboard/core/notifications/escalation/engine.py +168 -358
- truthound_dashboard/core/notifications/routing/__init__.py +88 -128
- truthound_dashboard/core/notifications/routing/engine.py +90 -317
- truthound_dashboard/core/notifications/stats_aggregator.py +246 -1
- truthound_dashboard/core/notifications/throttling/__init__.py +67 -50
- truthound_dashboard/core/notifications/throttling/builder.py +117 -255
- truthound_dashboard/core/notifications/truthound_adapter.py +842 -0
- truthound_dashboard/core/phase5/collaboration.py +1 -1
- truthound_dashboard/core/plugins/lifecycle/__init__.py +0 -13
- truthound_dashboard/core/quality_reporter.py +1359 -0
- truthound_dashboard/core/report_history.py +0 -6
- truthound_dashboard/core/reporters/__init__.py +175 -14
- truthound_dashboard/core/reporters/adapters.py +943 -0
- truthound_dashboard/core/reporters/base.py +0 -3
- truthound_dashboard/core/reporters/builtin/__init__.py +18 -0
- truthound_dashboard/core/reporters/builtin/csv_reporter.py +111 -0
- truthound_dashboard/core/reporters/builtin/html_reporter.py +270 -0
- truthound_dashboard/core/reporters/builtin/json_reporter.py +127 -0
- truthound_dashboard/core/reporters/compat.py +266 -0
- truthound_dashboard/core/reporters/csv_reporter.py +2 -35
- truthound_dashboard/core/reporters/factory.py +526 -0
- truthound_dashboard/core/reporters/interfaces.py +745 -0
- truthound_dashboard/core/reporters/registry.py +1 -10
- truthound_dashboard/core/scheduler.py +165 -0
- truthound_dashboard/core/schema_evolution.py +3 -3
- truthound_dashboard/core/schema_watcher.py +1528 -0
- truthound_dashboard/core/services.py +595 -76
- truthound_dashboard/core/store_manager.py +810 -0
- truthound_dashboard/core/streaming_anomaly.py +169 -4
- truthound_dashboard/core/tiering.py +1309 -0
- truthound_dashboard/core/triggers/evaluators.py +178 -8
- truthound_dashboard/core/truthound_adapter.py +2620 -197
- truthound_dashboard/core/unified_alerts.py +23 -20
- truthound_dashboard/db/__init__.py +8 -0
- truthound_dashboard/db/database.py +8 -2
- truthound_dashboard/db/models.py +944 -25
- truthound_dashboard/db/repository.py +2 -0
- truthound_dashboard/main.py +11 -0
- truthound_dashboard/schemas/__init__.py +177 -16
- truthound_dashboard/schemas/base.py +44 -23
- truthound_dashboard/schemas/collaboration.py +19 -6
- truthound_dashboard/schemas/cross_alerts.py +19 -3
- truthound_dashboard/schemas/drift.py +61 -55
- truthound_dashboard/schemas/drift_monitor.py +67 -23
- truthound_dashboard/schemas/enterprise_sampling.py +653 -0
- truthound_dashboard/schemas/lineage.py +0 -33
- truthound_dashboard/schemas/mask.py +10 -8
- truthound_dashboard/schemas/model_monitoring.py +89 -10
- truthound_dashboard/schemas/notifications_advanced.py +13 -0
- truthound_dashboard/schemas/observability.py +453 -0
- truthound_dashboard/schemas/plugins.py +0 -280
- truthound_dashboard/schemas/profile.py +154 -247
- truthound_dashboard/schemas/quality_reporter.py +403 -0
- truthound_dashboard/schemas/reports.py +2 -2
- truthound_dashboard/schemas/rule_suggestion.py +8 -1
- truthound_dashboard/schemas/scan.py +4 -24
- truthound_dashboard/schemas/schedule.py +11 -3
- truthound_dashboard/schemas/schema_watcher.py +727 -0
- truthound_dashboard/schemas/source.py +17 -2
- truthound_dashboard/schemas/tiering.py +822 -0
- truthound_dashboard/schemas/triggers.py +16 -0
- truthound_dashboard/schemas/unified_alerts.py +7 -0
- truthound_dashboard/schemas/validation.py +0 -13
- truthound_dashboard/schemas/validators/base.py +41 -21
- truthound_dashboard/schemas/validators/business_rule_validators.py +244 -0
- truthound_dashboard/schemas/validators/localization_validators.py +273 -0
- truthound_dashboard/schemas/validators/ml_feature_validators.py +308 -0
- truthound_dashboard/schemas/validators/profiling_validators.py +275 -0
- truthound_dashboard/schemas/validators/referential_validators.py +312 -0
- truthound_dashboard/schemas/validators/registry.py +93 -8
- truthound_dashboard/schemas/validators/timeseries_validators.py +389 -0
- truthound_dashboard/schemas/versioning.py +1 -6
- truthound_dashboard/static/index.html +2 -2
- truthound_dashboard-1.5.0.dist-info/METADATA +309 -0
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.0.dist-info}/RECORD +149 -148
- truthound_dashboard/core/plugins/hooks/__init__.py +0 -63
- truthound_dashboard/core/plugins/hooks/decorators.py +0 -367
- truthound_dashboard/core/plugins/hooks/manager.py +0 -403
- truthound_dashboard/core/plugins/hooks/protocols.py +0 -265
- truthound_dashboard/core/plugins/lifecycle/hot_reload.py +0 -584
- truthound_dashboard/core/reporters/junit_reporter.py +0 -233
- truthound_dashboard/core/reporters/markdown_reporter.py +0 -207
- truthound_dashboard/core/reporters/pdf_reporter.py +0 -209
- truthound_dashboard/static/assets/_baseUniq-BcrSP13d.js +0 -1
- truthound_dashboard/static/assets/arc-DlYjKwIL.js +0 -1
- truthound_dashboard/static/assets/architectureDiagram-VXUJARFQ-Bb2drbQM.js +0 -36
- truthound_dashboard/static/assets/blockDiagram-VD42YOAC-BlsPG1CH.js +0 -122
- truthound_dashboard/static/assets/c4Diagram-YG6GDRKO-B9JdUoaC.js +0 -10
- truthound_dashboard/static/assets/channel-Q6mHF1Hd.js +0 -1
- truthound_dashboard/static/assets/chunk-4BX2VUAB-DmyoPVuJ.js +0 -1
- truthound_dashboard/static/assets/chunk-55IACEB6-Bcz6Siv8.js +0 -1
- truthound_dashboard/static/assets/chunk-B4BG7PRW-Br3G5Rum.js +0 -165
- truthound_dashboard/static/assets/chunk-DI55MBZ5-DuM9c23u.js +0 -220
- truthound_dashboard/static/assets/chunk-FMBD7UC4-DNU-5mvT.js +0 -15
- truthound_dashboard/static/assets/chunk-QN33PNHL-Im2yNcmS.js +0 -1
- truthound_dashboard/static/assets/chunk-QZHKN3VN-kZr8XFm1.js +0 -1
- truthound_dashboard/static/assets/chunk-TZMSLE5B-Q__360q_.js +0 -1
- truthound_dashboard/static/assets/classDiagram-2ON5EDUG-vtixxUyK.js +0 -1
- truthound_dashboard/static/assets/classDiagram-v2-WZHVMYZB-vtixxUyK.js +0 -1
- truthound_dashboard/static/assets/clone-BOt2LwD0.js +0 -1
- truthound_dashboard/static/assets/cose-bilkent-S5V4N54A-CBDw6iac.js +0 -1
- truthound_dashboard/static/assets/dagre-6UL2VRFP-XdKqmmY9.js +0 -4
- truthound_dashboard/static/assets/diagram-PSM6KHXK-DAZ8nx9V.js +0 -24
- truthound_dashboard/static/assets/diagram-QEK2KX5R-BRvDTbGD.js +0 -43
- truthound_dashboard/static/assets/diagram-S2PKOQOG-bQcczUkl.js +0 -24
- truthound_dashboard/static/assets/erDiagram-Q2GNP2WA-DPje7VMN.js +0 -60
- truthound_dashboard/static/assets/flowDiagram-NV44I4VS-B7BVtFVS.js +0 -162
- truthound_dashboard/static/assets/ganttDiagram-JELNMOA3-D6WKSS7U.js +0 -267
- truthound_dashboard/static/assets/gitGraphDiagram-NY62KEGX-D3vtVd3y.js +0 -65
- truthound_dashboard/static/assets/graph-BKgNKZVp.js +0 -1
- truthound_dashboard/static/assets/index-C6JSrkHo.css +0 -1
- truthound_dashboard/static/assets/index-DkU82VsU.js +0 -1800
- truthound_dashboard/static/assets/infoDiagram-WHAUD3N6-DnNCT429.js +0 -2
- truthound_dashboard/static/assets/journeyDiagram-XKPGCS4Q-DGiMozqS.js +0 -139
- truthound_dashboard/static/assets/kanban-definition-3W4ZIXB7-BV2gUgli.js +0 -89
- truthound_dashboard/static/assets/katex-Cu_Erd72.js +0 -261
- truthound_dashboard/static/assets/layout-DI2MfQ5G.js +0 -1
- truthound_dashboard/static/assets/min-DYdgXVcT.js +0 -1
- truthound_dashboard/static/assets/mindmap-definition-VGOIOE7T-C7x4ruxz.js +0 -68
- truthound_dashboard/static/assets/pieDiagram-ADFJNKIX-CAJaAB9f.js +0 -30
- truthound_dashboard/static/assets/quadrantDiagram-AYHSOK5B-DeqwDI46.js +0 -7
- truthound_dashboard/static/assets/requirementDiagram-UZGBJVZJ-e3XDpZIM.js +0 -64
- truthound_dashboard/static/assets/sankeyDiagram-TZEHDZUN-CNnAv5Ux.js +0 -10
- truthound_dashboard/static/assets/sequenceDiagram-WL72ISMW-Dsne-Of3.js +0 -145
- truthound_dashboard/static/assets/stateDiagram-FKZM4ZOC-Ee0sQXyb.js +0 -1
- truthound_dashboard/static/assets/stateDiagram-v2-4FDKWEC3-B26KqW_W.js +0 -1
- truthound_dashboard/static/assets/timeline-definition-IT6M3QCI-DZYi2yl3.js +0 -61
- truthound_dashboard/static/assets/treemap-KMMF4GRG-CY3f8In2.js +0 -128
- truthound_dashboard/static/assets/unmerged_dictionaries-Dd7xcPWG.js +0 -1
- truthound_dashboard/static/assets/xychartDiagram-PRI3JC2R-CS7fydZZ.js +0 -7
- truthound_dashboard-1.4.4.dist-info/METADATA +0 -507
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.0.dist-info}/WHEEL +0 -0
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.0.dist-info}/entry_points.txt +0 -0
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -248,10 +248,12 @@ class AnomalyDetectionService:
|
|
|
248
248
|
Detection results dictionary.
|
|
249
249
|
"""
|
|
250
250
|
try:
|
|
251
|
-
|
|
251
|
+
from truthound.datasources import get_datasource
|
|
252
252
|
|
|
253
|
-
# Load data from source
|
|
254
|
-
|
|
253
|
+
# Load data from source using truthound datasources factory
|
|
254
|
+
# The source.config contains the path or connection info
|
|
255
|
+
datasource = get_datasource(source.config.get("path", source.config))
|
|
256
|
+
df = datasource.to_polars_lazyframe().collect()
|
|
255
257
|
|
|
256
258
|
# Get columns to analyze
|
|
257
259
|
columns = None
|
|
@@ -311,7 +313,9 @@ class AnomalyDetectionService:
|
|
|
311
313
|
sample_size: int | None,
|
|
312
314
|
params: dict[str, Any],
|
|
313
315
|
) -> dict[str, Any]:
|
|
314
|
-
"""Run the specified anomaly detection algorithm.
|
|
316
|
+
"""Run the specified anomaly detection algorithm using truthound.ml.
|
|
317
|
+
|
|
318
|
+
Uses truthound.ml.anomaly_models when available, falls back to sklearn.
|
|
315
319
|
|
|
316
320
|
Args:
|
|
317
321
|
df: DataFrame to analyze.
|
|
@@ -360,6 +364,8 @@ class AnomalyDetectionService:
|
|
|
360
364
|
result = self._run_statistical(df_analyze, params)
|
|
361
365
|
elif algorithm == "autoencoder":
|
|
362
366
|
result = self._run_autoencoder(df_analyze, params)
|
|
367
|
+
elif algorithm == "ensemble":
|
|
368
|
+
result = self._run_ensemble(df_analyze, params)
|
|
363
369
|
else:
|
|
364
370
|
raise ValueError(f"Unknown algorithm: {algorithm}")
|
|
365
371
|
|
|
@@ -412,32 +418,67 @@ class AnomalyDetectionService:
|
|
|
412
418
|
df: Any,
|
|
413
419
|
params: dict[str, Any],
|
|
414
420
|
) -> dict[str, Any]:
|
|
415
|
-
"""Run Isolation Forest algorithm."""
|
|
416
|
-
from sklearn.ensemble import IsolationForest
|
|
421
|
+
"""Run Isolation Forest algorithm using truthound.ml."""
|
|
417
422
|
import numpy as np
|
|
418
423
|
|
|
419
424
|
# Get parameters with defaults
|
|
420
425
|
n_estimators = params.get("n_estimators", 100)
|
|
421
426
|
contamination = params.get("contamination", 0.1)
|
|
422
|
-
max_samples = params.get("max_samples",
|
|
427
|
+
max_samples = params.get("max_samples", 256)
|
|
423
428
|
random_state = params.get("random_state", 42)
|
|
424
429
|
|
|
425
430
|
# Handle NaN values
|
|
426
431
|
df_clean = df.fillna(df.mean())
|
|
427
432
|
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
predictions = clf.fit_predict(df_clean)
|
|
435
|
-
scores = -clf.score_samples(df_clean) # Higher = more anomalous
|
|
433
|
+
try:
|
|
434
|
+
from truthound.ml.anomaly_models.isolation_forest import (
|
|
435
|
+
IsolationForestDetector,
|
|
436
|
+
IsolationForestConfig,
|
|
437
|
+
)
|
|
438
|
+
import polars as pl
|
|
436
439
|
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
440
|
+
# Create truthound detector
|
|
441
|
+
config = IsolationForestConfig(
|
|
442
|
+
n_estimators=n_estimators,
|
|
443
|
+
max_samples=max_samples if isinstance(max_samples, int) else 256,
|
|
444
|
+
columns=list(df_clean.columns),
|
|
445
|
+
)
|
|
446
|
+
|
|
447
|
+
detector = IsolationForestDetector(config)
|
|
448
|
+
|
|
449
|
+
# Convert to Polars for truthound
|
|
450
|
+
pl_df = pl.from_pandas(df_clean).lazy()
|
|
451
|
+
detector.fit(pl_df)
|
|
452
|
+
|
|
453
|
+
# Get predictions
|
|
454
|
+
result = detector.predict(pl_df)
|
|
455
|
+
|
|
456
|
+
# Extract scores and anomaly flags
|
|
457
|
+
is_anomaly = np.array([score.is_anomaly for score in result])
|
|
458
|
+
scores = np.array([score.score for score in result])
|
|
459
|
+
|
|
460
|
+
return {
|
|
461
|
+
"is_anomaly": is_anomaly,
|
|
462
|
+
"scores": scores,
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
except ImportError:
|
|
466
|
+
# Fallback to sklearn
|
|
467
|
+
from sklearn.ensemble import IsolationForest
|
|
468
|
+
|
|
469
|
+
clf = IsolationForest(
|
|
470
|
+
n_estimators=n_estimators,
|
|
471
|
+
contamination=contamination,
|
|
472
|
+
max_samples=max_samples,
|
|
473
|
+
random_state=random_state,
|
|
474
|
+
)
|
|
475
|
+
predictions = clf.fit_predict(df_clean)
|
|
476
|
+
scores = -clf.score_samples(df_clean) # Higher = more anomalous
|
|
477
|
+
|
|
478
|
+
return {
|
|
479
|
+
"is_anomaly": predictions == -1,
|
|
480
|
+
"scores": scores,
|
|
481
|
+
}
|
|
441
482
|
|
|
442
483
|
def _run_lof(
|
|
443
484
|
self,
|
|
@@ -446,6 +487,7 @@ class AnomalyDetectionService:
|
|
|
446
487
|
) -> dict[str, Any]:
|
|
447
488
|
"""Run Local Outlier Factor algorithm."""
|
|
448
489
|
from sklearn.neighbors import LocalOutlierFactor
|
|
490
|
+
from sklearn.preprocessing import StandardScaler
|
|
449
491
|
import numpy as np
|
|
450
492
|
|
|
451
493
|
n_neighbors = params.get("n_neighbors", 20)
|
|
@@ -453,7 +495,6 @@ class AnomalyDetectionService:
|
|
|
453
495
|
algorithm = params.get("algorithm", "auto")
|
|
454
496
|
|
|
455
497
|
# Handle NaN values and scale
|
|
456
|
-
from sklearn.preprocessing import StandardScaler
|
|
457
498
|
df_clean = df.fillna(df.mean())
|
|
458
499
|
scaler = StandardScaler()
|
|
459
500
|
df_scaled = scaler.fit_transform(df_clean)
|
|
@@ -512,6 +553,7 @@ class AnomalyDetectionService:
|
|
|
512
553
|
"""Run DBSCAN algorithm."""
|
|
513
554
|
from sklearn.cluster import DBSCAN
|
|
514
555
|
from sklearn.preprocessing import StandardScaler
|
|
556
|
+
from sklearn.metrics import pairwise_distances
|
|
515
557
|
import numpy as np
|
|
516
558
|
|
|
517
559
|
eps = params.get("eps", 0.5)
|
|
@@ -534,15 +576,14 @@ class AnomalyDetectionService:
|
|
|
534
576
|
is_anomaly = labels == -1
|
|
535
577
|
|
|
536
578
|
# Calculate distance-based scores (distance to nearest cluster centroid)
|
|
537
|
-
from sklearn.metrics import pairwise_distances
|
|
538
579
|
scores = np.zeros(len(df_scaled))
|
|
539
580
|
if not is_anomaly.all():
|
|
540
581
|
# Get centroids of each cluster
|
|
541
582
|
unique_labels = set(labels) - {-1}
|
|
542
583
|
if unique_labels:
|
|
543
584
|
centroids = np.array([
|
|
544
|
-
df_scaled[labels ==
|
|
545
|
-
for
|
|
585
|
+
df_scaled[labels == label].mean(axis=0)
|
|
586
|
+
for label in unique_labels
|
|
546
587
|
])
|
|
547
588
|
distances = pairwise_distances(df_scaled, centroids, metric=metric)
|
|
548
589
|
scores = distances.min(axis=1)
|
|
@@ -557,7 +598,7 @@ class AnomalyDetectionService:
|
|
|
557
598
|
df: Any,
|
|
558
599
|
params: dict[str, Any],
|
|
559
600
|
) -> dict[str, Any]:
|
|
560
|
-
"""Run statistical anomaly detection."""
|
|
601
|
+
"""Run statistical anomaly detection using truthound.ml."""
|
|
561
602
|
import numpy as np
|
|
562
603
|
|
|
563
604
|
method = params.get("method", "zscore")
|
|
@@ -566,49 +607,201 @@ class AnomalyDetectionService:
|
|
|
566
607
|
# Handle NaN values
|
|
567
608
|
df_clean = df.fillna(df.mean())
|
|
568
609
|
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
is_outlier = ((df_clean < lower) | (df_clean > upper)).any(axis=1)
|
|
585
|
-
is_anomaly = is_outlier.values
|
|
586
|
-
# Score based on distance from bounds
|
|
587
|
-
scores = np.zeros(len(df_clean))
|
|
588
|
-
for col in df_clean.columns:
|
|
589
|
-
col_scores = np.maximum(
|
|
590
|
-
(lower[col] - df_clean[col]) / iqr[col],
|
|
591
|
-
(df_clean[col] - upper[col]) / iqr[col],
|
|
592
|
-
)
|
|
593
|
-
col_scores = np.maximum(col_scores, 0)
|
|
594
|
-
scores = np.maximum(scores, col_scores.values)
|
|
595
|
-
|
|
596
|
-
elif method == "mad":
|
|
597
|
-
median = df_clean.median()
|
|
598
|
-
mad = np.abs(df_clean - median).median()
|
|
599
|
-
# Modified z-score using MAD
|
|
600
|
-
modified_z = 0.6745 * (df_clean - median) / mad
|
|
601
|
-
max_z = np.abs(modified_z).max(axis=1)
|
|
602
|
-
is_anomaly = max_z > threshold
|
|
603
|
-
scores = max_z.values
|
|
610
|
+
try:
|
|
611
|
+
from truthound.ml.anomaly_models.statistical import (
|
|
612
|
+
StatisticalAnomalyDetector,
|
|
613
|
+
StatisticalConfig,
|
|
614
|
+
)
|
|
615
|
+
import polars as pl
|
|
616
|
+
|
|
617
|
+
# Create truthound detector
|
|
618
|
+
config = StatisticalConfig(
|
|
619
|
+
z_threshold=threshold,
|
|
620
|
+
iqr_multiplier=threshold if method == "iqr" else 1.5,
|
|
621
|
+
use_robust_stats=(method == "mad"),
|
|
622
|
+
per_column=True,
|
|
623
|
+
columns=list(df_clean.columns),
|
|
624
|
+
)
|
|
604
625
|
|
|
605
|
-
|
|
606
|
-
raise ValueError(f"Unknown statistical method: {method}")
|
|
626
|
+
detector = StatisticalAnomalyDetector(config)
|
|
607
627
|
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
628
|
+
# Convert to Polars for truthound
|
|
629
|
+
pl_df = pl.from_pandas(df_clean).lazy()
|
|
630
|
+
detector.fit(pl_df)
|
|
631
|
+
|
|
632
|
+
# Get predictions
|
|
633
|
+
result = detector.predict(pl_df)
|
|
634
|
+
|
|
635
|
+
# Extract scores and anomaly flags
|
|
636
|
+
is_anomaly = np.array([score.is_anomaly for score in result])
|
|
637
|
+
scores = np.array([score.score for score in result])
|
|
638
|
+
|
|
639
|
+
return {
|
|
640
|
+
"is_anomaly": is_anomaly,
|
|
641
|
+
"scores": scores,
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
except ImportError:
|
|
645
|
+
# Fallback to manual implementation
|
|
646
|
+
if method == "zscore":
|
|
647
|
+
mean = df_clean.mean()
|
|
648
|
+
std = df_clean.std()
|
|
649
|
+
z_scores = np.abs((df_clean - mean) / std)
|
|
650
|
+
# Take max z-score across all columns for each row
|
|
651
|
+
max_z = z_scores.max(axis=1)
|
|
652
|
+
is_anomaly = max_z > threshold
|
|
653
|
+
scores = max_z.values
|
|
654
|
+
|
|
655
|
+
elif method == "iqr":
|
|
656
|
+
q1 = df_clean.quantile(0.25)
|
|
657
|
+
q3 = df_clean.quantile(0.75)
|
|
658
|
+
iqr = q3 - q1
|
|
659
|
+
lower = q1 - threshold * iqr
|
|
660
|
+
upper = q3 + threshold * iqr
|
|
661
|
+
is_outlier = ((df_clean < lower) | (df_clean > upper)).any(axis=1)
|
|
662
|
+
is_anomaly = is_outlier.values
|
|
663
|
+
# Score based on distance from bounds
|
|
664
|
+
scores = np.zeros(len(df_clean))
|
|
665
|
+
for col in df_clean.columns:
|
|
666
|
+
col_scores = np.maximum(
|
|
667
|
+
(lower[col] - df_clean[col]) / iqr[col],
|
|
668
|
+
(df_clean[col] - upper[col]) / iqr[col],
|
|
669
|
+
)
|
|
670
|
+
col_scores = np.maximum(col_scores, 0)
|
|
671
|
+
scores = np.maximum(scores, col_scores.values)
|
|
672
|
+
|
|
673
|
+
elif method == "mad":
|
|
674
|
+
median = df_clean.median()
|
|
675
|
+
mad = np.abs(df_clean - median).median()
|
|
676
|
+
# Modified z-score using MAD
|
|
677
|
+
modified_z = 0.6745 * (df_clean - median) / mad
|
|
678
|
+
max_z = np.abs(modified_z).max(axis=1)
|
|
679
|
+
is_anomaly = max_z > threshold
|
|
680
|
+
scores = max_z.values
|
|
681
|
+
|
|
682
|
+
else:
|
|
683
|
+
raise ValueError(f"Unknown statistical method: {method}")
|
|
684
|
+
|
|
685
|
+
return {
|
|
686
|
+
"is_anomaly": np.array(is_anomaly),
|
|
687
|
+
"scores": np.array(scores),
|
|
688
|
+
}
|
|
689
|
+
|
|
690
|
+
def _run_ensemble(
|
|
691
|
+
self,
|
|
692
|
+
df: Any,
|
|
693
|
+
params: dict[str, Any],
|
|
694
|
+
) -> dict[str, Any]:
|
|
695
|
+
"""Run ensemble anomaly detection using truthound.ml."""
|
|
696
|
+
import numpy as np
|
|
697
|
+
|
|
698
|
+
strategy = params.get("strategy", "weighted_average")
|
|
699
|
+
weights = params.get("weights", [0.3, 0.3, 0.4])
|
|
700
|
+
vote_threshold = params.get("vote_threshold", 0.5)
|
|
701
|
+
|
|
702
|
+
# Handle NaN values
|
|
703
|
+
df_clean = df.fillna(df.mean())
|
|
704
|
+
|
|
705
|
+
try:
|
|
706
|
+
from truthound.ml.anomaly_models.ensemble import (
|
|
707
|
+
EnsembleAnomalyDetector,
|
|
708
|
+
EnsembleConfig,
|
|
709
|
+
EnsembleStrategy,
|
|
710
|
+
)
|
|
711
|
+
from truthound.ml.anomaly_models.statistical import (
|
|
712
|
+
StatisticalAnomalyDetector,
|
|
713
|
+
StatisticalConfig,
|
|
714
|
+
)
|
|
715
|
+
from truthound.ml.anomaly_models.isolation_forest import (
|
|
716
|
+
IsolationForestDetector,
|
|
717
|
+
IsolationForestConfig,
|
|
718
|
+
)
|
|
719
|
+
import polars as pl
|
|
720
|
+
|
|
721
|
+
# Map strategy string to enum
|
|
722
|
+
strategy_map = {
|
|
723
|
+
"average": EnsembleStrategy.AVERAGE,
|
|
724
|
+
"weighted_average": EnsembleStrategy.WEIGHTED_AVERAGE,
|
|
725
|
+
"max": EnsembleStrategy.MAX,
|
|
726
|
+
"min": EnsembleStrategy.MIN,
|
|
727
|
+
"vote": EnsembleStrategy.VOTE,
|
|
728
|
+
"unanimous": EnsembleStrategy.UNANIMOUS,
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
# Create ensemble config
|
|
732
|
+
config = EnsembleConfig(
|
|
733
|
+
strategy=strategy_map.get(strategy, EnsembleStrategy.WEIGHTED_AVERAGE),
|
|
734
|
+
weights=weights,
|
|
735
|
+
vote_threshold=vote_threshold,
|
|
736
|
+
)
|
|
737
|
+
|
|
738
|
+
ensemble = EnsembleAnomalyDetector(config)
|
|
739
|
+
|
|
740
|
+
# Add detectors
|
|
741
|
+
columns = list(df_clean.columns)
|
|
742
|
+
|
|
743
|
+
# Z-Score detector
|
|
744
|
+
zscore_config = StatisticalConfig(z_threshold=3.0, columns=columns)
|
|
745
|
+
ensemble.add_detector(StatisticalAnomalyDetector(zscore_config), weight=weights[0] if len(weights) > 0 else 0.33)
|
|
746
|
+
|
|
747
|
+
# IQR detector
|
|
748
|
+
iqr_config = StatisticalConfig(iqr_multiplier=1.5, columns=columns)
|
|
749
|
+
ensemble.add_detector(StatisticalAnomalyDetector(iqr_config), weight=weights[1] if len(weights) > 1 else 0.33)
|
|
750
|
+
|
|
751
|
+
# Isolation Forest detector
|
|
752
|
+
if_config = IsolationForestConfig(n_estimators=100, columns=columns)
|
|
753
|
+
ensemble.add_detector(IsolationForestDetector(if_config), weight=weights[2] if len(weights) > 2 else 0.34)
|
|
754
|
+
|
|
755
|
+
# Convert to Polars for truthound
|
|
756
|
+
pl_df = pl.from_pandas(df_clean).lazy()
|
|
757
|
+
ensemble.fit(pl_df)
|
|
758
|
+
|
|
759
|
+
# Get predictions
|
|
760
|
+
result = ensemble.predict(pl_df)
|
|
761
|
+
|
|
762
|
+
# Extract scores and anomaly flags
|
|
763
|
+
is_anomaly = np.array([score.is_anomaly for score in result])
|
|
764
|
+
scores = np.array([score.score for score in result])
|
|
765
|
+
|
|
766
|
+
return {
|
|
767
|
+
"is_anomaly": is_anomaly,
|
|
768
|
+
"scores": scores,
|
|
769
|
+
}
|
|
770
|
+
|
|
771
|
+
except ImportError:
|
|
772
|
+
# Fallback: run individual algorithms and combine
|
|
773
|
+
results = []
|
|
774
|
+
|
|
775
|
+
# Run zscore
|
|
776
|
+
zscore_result = self._run_statistical(df, {"method": "zscore", "threshold": 3.0})
|
|
777
|
+
results.append(zscore_result)
|
|
778
|
+
|
|
779
|
+
# Run IQR
|
|
780
|
+
iqr_result = self._run_statistical(df, {"method": "iqr", "threshold": 1.5})
|
|
781
|
+
results.append(iqr_result)
|
|
782
|
+
|
|
783
|
+
# Run isolation forest
|
|
784
|
+
if_result = self._run_isolation_forest(df, {"n_estimators": 100})
|
|
785
|
+
results.append(if_result)
|
|
786
|
+
|
|
787
|
+
# Combine using weighted average
|
|
788
|
+
combined_scores = np.zeros(len(df_clean))
|
|
789
|
+
for i, result in enumerate(results):
|
|
790
|
+
weight = weights[i] if i < len(weights) else 1.0 / len(results)
|
|
791
|
+
combined_scores += weight * result["scores"]
|
|
792
|
+
|
|
793
|
+
# Normalize scores
|
|
794
|
+
if combined_scores.max() > 0:
|
|
795
|
+
combined_scores = combined_scores / combined_scores.max()
|
|
796
|
+
|
|
797
|
+
# Determine anomalies based on threshold (mean + 2*std)
|
|
798
|
+
threshold = combined_scores.mean() + 2 * combined_scores.std()
|
|
799
|
+
is_anomaly = combined_scores > threshold
|
|
800
|
+
|
|
801
|
+
return {
|
|
802
|
+
"is_anomaly": is_anomaly,
|
|
803
|
+
"scores": combined_scores,
|
|
804
|
+
}
|
|
612
805
|
|
|
613
806
|
def _run_autoencoder(
|
|
614
807
|
self,
|
|
@@ -1146,11 +1339,13 @@ class AnomalyDetectionService:
|
|
|
1146
1339
|
|
|
1147
1340
|
# Load data once
|
|
1148
1341
|
try:
|
|
1149
|
-
|
|
1342
|
+
from truthound.datasources import get_datasource
|
|
1150
1343
|
import numpy as np
|
|
1151
1344
|
import pandas as pd
|
|
1152
1345
|
|
|
1153
|
-
|
|
1346
|
+
# Load data using truthound datasources factory
|
|
1347
|
+
datasource = get_datasource(source.config.get("path", source.config))
|
|
1348
|
+
df = datasource.to_polars_lazyframe().collect().to_pandas()
|
|
1154
1349
|
|
|
1155
1350
|
# Sample if needed
|
|
1156
1351
|
if sample_size and len(df) > sample_size:
|
|
@@ -159,10 +159,11 @@ class AnomalyExplainerService:
|
|
|
159
159
|
algorithm used for detection.
|
|
160
160
|
"""
|
|
161
161
|
try:
|
|
162
|
-
|
|
162
|
+
from truthound.datasources import get_datasource
|
|
163
163
|
|
|
164
|
-
# Load data
|
|
165
|
-
|
|
164
|
+
# Load data using truthound datasources factory
|
|
165
|
+
datasource = get_datasource(source.config.get("path", source.config))
|
|
166
|
+
df = datasource.to_polars_lazyframe().collect().to_pandas()
|
|
166
167
|
|
|
167
168
|
# Get columns that were analyzed
|
|
168
169
|
columns = detection.columns_analyzed or list(
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""Data quality backend implementations.
|
|
2
|
+
|
|
3
|
+
This module provides backend implementations for data quality operations.
|
|
4
|
+
The backends abstract away the specific library (truthound) and provide
|
|
5
|
+
a unified interface for the dashboard services.
|
|
6
|
+
|
|
7
|
+
Architecture:
|
|
8
|
+
BackendFactory
|
|
9
|
+
↓
|
|
10
|
+
BaseDataQualityBackend (ABC)
|
|
11
|
+
↓
|
|
12
|
+
┌─────────────────────────────┐
|
|
13
|
+
│ TruthoundBackend │ MockBackend │
|
|
14
|
+
└─────────────────────────────┘
|
|
15
|
+
|
|
16
|
+
Usage:
|
|
17
|
+
from truthound_dashboard.core.backends import BackendFactory
|
|
18
|
+
|
|
19
|
+
# Get the default backend (truthound)
|
|
20
|
+
backend = BackendFactory.get_backend()
|
|
21
|
+
|
|
22
|
+
# Check if backend is available
|
|
23
|
+
if backend.is_available():
|
|
24
|
+
result = await backend.check("data.csv")
|
|
25
|
+
|
|
26
|
+
# Use a specific backend
|
|
27
|
+
backend = BackendFactory.get_backend("mock")
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
from .base import BaseDataQualityBackend
|
|
31
|
+
from .errors import (
|
|
32
|
+
BackendError,
|
|
33
|
+
BackendOperationError,
|
|
34
|
+
BackendUnavailableError,
|
|
35
|
+
BackendVersionError,
|
|
36
|
+
)
|
|
37
|
+
from .factory import (
|
|
38
|
+
BackendFactory,
|
|
39
|
+
get_backend,
|
|
40
|
+
reset_backend,
|
|
41
|
+
get_truthound_version,
|
|
42
|
+
get_backend_capabilities,
|
|
43
|
+
get_backend_info,
|
|
44
|
+
)
|
|
45
|
+
from .mock_backend import MockBackend
|
|
46
|
+
from .truthound_backend import TruthoundBackend
|
|
47
|
+
|
|
48
|
+
__all__ = [
|
|
49
|
+
# Base class
|
|
50
|
+
"BaseDataQualityBackend",
|
|
51
|
+
# Backend implementations
|
|
52
|
+
"TruthoundBackend",
|
|
53
|
+
"MockBackend",
|
|
54
|
+
# Factory
|
|
55
|
+
"BackendFactory",
|
|
56
|
+
"get_backend",
|
|
57
|
+
"reset_backend",
|
|
58
|
+
# Capability detection
|
|
59
|
+
"get_truthound_version",
|
|
60
|
+
"get_backend_capabilities",
|
|
61
|
+
"get_backend_info",
|
|
62
|
+
# Errors
|
|
63
|
+
"BackendError",
|
|
64
|
+
"BackendUnavailableError",
|
|
65
|
+
"BackendVersionError",
|
|
66
|
+
"BackendOperationError",
|
|
67
|
+
]
|