PyPI - truthound-dashboard - Versions diffs - 1.4.4__py3-none-any.whl → 1.5.0__py3-none-any.whl - Mend

truthound-dashboard 1.4.4py3-none-any.whl → 1.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (205) hide show

truthound_dashboard/api/alerts.py +75 -86
truthound_dashboard/api/anomaly.py +7 -13
truthound_dashboard/api/cross_alerts.py +38 -52
truthound_dashboard/api/drift.py +49 -59
truthound_dashboard/api/drift_monitor.py +234 -79
truthound_dashboard/api/enterprise_sampling.py +498 -0
truthound_dashboard/api/history.py +57 -5
truthound_dashboard/api/lineage.py +3 -48
truthound_dashboard/api/maintenance.py +104 -49
truthound_dashboard/api/mask.py +1 -2
truthound_dashboard/api/middleware.py +2 -1
truthound_dashboard/api/model_monitoring.py +435 -311
truthound_dashboard/api/notifications.py +227 -191
truthound_dashboard/api/notifications_advanced.py +21 -20
truthound_dashboard/api/observability.py +586 -0
truthound_dashboard/api/plugins.py +2 -433
truthound_dashboard/api/profile.py +199 -37
truthound_dashboard/api/quality_reporter.py +701 -0
truthound_dashboard/api/reports.py +7 -16
truthound_dashboard/api/router.py +66 -0
truthound_dashboard/api/rule_suggestions.py +5 -5
truthound_dashboard/api/scan.py +17 -19
truthound_dashboard/api/schedules.py +85 -50
truthound_dashboard/api/schema_evolution.py +6 -6
truthound_dashboard/api/schema_watcher.py +667 -0
truthound_dashboard/api/sources.py +98 -27
truthound_dashboard/api/tiering.py +1323 -0
truthound_dashboard/api/triggers.py +14 -11
truthound_dashboard/api/validations.py +12 -11
truthound_dashboard/api/versioning.py +1 -6
truthound_dashboard/core/__init__.py +129 -3
truthound_dashboard/core/actions/__init__.py +62 -0
truthound_dashboard/core/actions/custom.py +426 -0
truthound_dashboard/core/actions/notifications.py +910 -0
truthound_dashboard/core/actions/storage.py +472 -0
truthound_dashboard/core/actions/webhook.py +281 -0
truthound_dashboard/core/anomaly.py +262 -67
truthound_dashboard/core/anomaly_explainer.py +4 -3
truthound_dashboard/core/backends/__init__.py +67 -0
truthound_dashboard/core/backends/base.py +299 -0
truthound_dashboard/core/backends/errors.py +191 -0
truthound_dashboard/core/backends/factory.py +423 -0
truthound_dashboard/core/backends/mock_backend.py +451 -0
truthound_dashboard/core/backends/truthound_backend.py +718 -0
truthound_dashboard/core/checkpoint/__init__.py +87 -0
truthound_dashboard/core/checkpoint/adapters.py +814 -0
truthound_dashboard/core/checkpoint/checkpoint.py +491 -0
truthound_dashboard/core/checkpoint/runner.py +270 -0
truthound_dashboard/core/connections.py +437 -10
truthound_dashboard/core/converters/__init__.py +14 -0
truthound_dashboard/core/converters/truthound.py +620 -0
truthound_dashboard/core/cross_alerts.py +540 -320
truthound_dashboard/core/datasource_factory.py +1672 -0
truthound_dashboard/core/drift_monitor.py +216 -20
truthound_dashboard/core/enterprise_sampling.py +1291 -0
truthound_dashboard/core/interfaces/__init__.py +225 -0
truthound_dashboard/core/interfaces/actions.py +652 -0
truthound_dashboard/core/interfaces/base.py +247 -0
truthound_dashboard/core/interfaces/checkpoint.py +676 -0
truthound_dashboard/core/interfaces/protocols.py +664 -0
truthound_dashboard/core/interfaces/reporters.py +650 -0
truthound_dashboard/core/interfaces/routing.py +646 -0
truthound_dashboard/core/interfaces/triggers.py +619 -0
truthound_dashboard/core/lineage.py +407 -71
truthound_dashboard/core/model_monitoring.py +431 -3
truthound_dashboard/core/notifications/base.py +4 -0
truthound_dashboard/core/notifications/channels.py +501 -1203
truthound_dashboard/core/notifications/deduplication/__init__.py +81 -115
truthound_dashboard/core/notifications/deduplication/service.py +131 -348
truthound_dashboard/core/notifications/dispatcher.py +202 -11
truthound_dashboard/core/notifications/escalation/__init__.py +119 -106
truthound_dashboard/core/notifications/escalation/engine.py +168 -358
truthound_dashboard/core/notifications/routing/__init__.py +88 -128
truthound_dashboard/core/notifications/routing/engine.py +90 -317
truthound_dashboard/core/notifications/stats_aggregator.py +246 -1
truthound_dashboard/core/notifications/throttling/__init__.py +67 -50
truthound_dashboard/core/notifications/throttling/builder.py +117 -255
truthound_dashboard/core/notifications/truthound_adapter.py +842 -0
truthound_dashboard/core/phase5/collaboration.py +1 -1
truthound_dashboard/core/plugins/lifecycle/__init__.py +0 -13
truthound_dashboard/core/quality_reporter.py +1359 -0
truthound_dashboard/core/report_history.py +0 -6
truthound_dashboard/core/reporters/__init__.py +175 -14
truthound_dashboard/core/reporters/adapters.py +943 -0
truthound_dashboard/core/reporters/base.py +0 -3
truthound_dashboard/core/reporters/builtin/__init__.py +18 -0
truthound_dashboard/core/reporters/builtin/csv_reporter.py +111 -0
truthound_dashboard/core/reporters/builtin/html_reporter.py +270 -0
truthound_dashboard/core/reporters/builtin/json_reporter.py +127 -0
truthound_dashboard/core/reporters/compat.py +266 -0
truthound_dashboard/core/reporters/csv_reporter.py +2 -35
truthound_dashboard/core/reporters/factory.py +526 -0
truthound_dashboard/core/reporters/interfaces.py +745 -0
truthound_dashboard/core/reporters/registry.py +1 -10
truthound_dashboard/core/scheduler.py +165 -0
truthound_dashboard/core/schema_evolution.py +3 -3
truthound_dashboard/core/schema_watcher.py +1528 -0
truthound_dashboard/core/services.py +595 -76
truthound_dashboard/core/store_manager.py +810 -0
truthound_dashboard/core/streaming_anomaly.py +169 -4
truthound_dashboard/core/tiering.py +1309 -0
truthound_dashboard/core/triggers/evaluators.py +178 -8
truthound_dashboard/core/truthound_adapter.py +2620 -197
truthound_dashboard/core/unified_alerts.py +23 -20
truthound_dashboard/db/__init__.py +8 -0
truthound_dashboard/db/database.py +8 -2
truthound_dashboard/db/models.py +944 -25
truthound_dashboard/db/repository.py +2 -0
truthound_dashboard/main.py +11 -0
truthound_dashboard/schemas/__init__.py +177 -16
truthound_dashboard/schemas/base.py +44 -23
truthound_dashboard/schemas/collaboration.py +19 -6
truthound_dashboard/schemas/cross_alerts.py +19 -3
truthound_dashboard/schemas/drift.py +61 -55
truthound_dashboard/schemas/drift_monitor.py +67 -23
truthound_dashboard/schemas/enterprise_sampling.py +653 -0
truthound_dashboard/schemas/lineage.py +0 -33
truthound_dashboard/schemas/mask.py +10 -8
truthound_dashboard/schemas/model_monitoring.py +89 -10
truthound_dashboard/schemas/notifications_advanced.py +13 -0
truthound_dashboard/schemas/observability.py +453 -0
truthound_dashboard/schemas/plugins.py +0 -280
truthound_dashboard/schemas/profile.py +154 -247
truthound_dashboard/schemas/quality_reporter.py +403 -0
truthound_dashboard/schemas/reports.py +2 -2
truthound_dashboard/schemas/rule_suggestion.py +8 -1
truthound_dashboard/schemas/scan.py +4 -24
truthound_dashboard/schemas/schedule.py +11 -3
truthound_dashboard/schemas/schema_watcher.py +727 -0
truthound_dashboard/schemas/source.py +17 -2
truthound_dashboard/schemas/tiering.py +822 -0
truthound_dashboard/schemas/triggers.py +16 -0
truthound_dashboard/schemas/unified_alerts.py +7 -0
truthound_dashboard/schemas/validation.py +0 -13
truthound_dashboard/schemas/validators/base.py +41 -21
truthound_dashboard/schemas/validators/business_rule_validators.py +244 -0
truthound_dashboard/schemas/validators/localization_validators.py +273 -0
truthound_dashboard/schemas/validators/ml_feature_validators.py +308 -0
truthound_dashboard/schemas/validators/profiling_validators.py +275 -0
truthound_dashboard/schemas/validators/referential_validators.py +312 -0
truthound_dashboard/schemas/validators/registry.py +93 -8
truthound_dashboard/schemas/validators/timeseries_validators.py +389 -0
truthound_dashboard/schemas/versioning.py +1 -6
truthound_dashboard/static/index.html +2 -2
truthound_dashboard-1.5.0.dist-info/METADATA +309 -0
{truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.0.dist-info}/RECORD +149 -148
truthound_dashboard/core/plugins/hooks/__init__.py +0 -63
truthound_dashboard/core/plugins/hooks/decorators.py +0 -367
truthound_dashboard/core/plugins/hooks/manager.py +0 -403
truthound_dashboard/core/plugins/hooks/protocols.py +0 -265
truthound_dashboard/core/plugins/lifecycle/hot_reload.py +0 -584
truthound_dashboard/core/reporters/junit_reporter.py +0 -233
truthound_dashboard/core/reporters/markdown_reporter.py +0 -207
truthound_dashboard/core/reporters/pdf_reporter.py +0 -209
truthound_dashboard/static/assets/_baseUniq-BcrSP13d.js +0 -1
truthound_dashboard/static/assets/arc-DlYjKwIL.js +0 -1
truthound_dashboard/static/assets/architectureDiagram-VXUJARFQ-Bb2drbQM.js +0 -36
truthound_dashboard/static/assets/blockDiagram-VD42YOAC-BlsPG1CH.js +0 -122
truthound_dashboard/static/assets/c4Diagram-YG6GDRKO-B9JdUoaC.js +0 -10
truthound_dashboard/static/assets/channel-Q6mHF1Hd.js +0 -1
truthound_dashboard/static/assets/chunk-4BX2VUAB-DmyoPVuJ.js +0 -1
truthound_dashboard/static/assets/chunk-55IACEB6-Bcz6Siv8.js +0 -1
truthound_dashboard/static/assets/chunk-B4BG7PRW-Br3G5Rum.js +0 -165
truthound_dashboard/static/assets/chunk-DI55MBZ5-DuM9c23u.js +0 -220
truthound_dashboard/static/assets/chunk-FMBD7UC4-DNU-5mvT.js +0 -15
truthound_dashboard/static/assets/chunk-QN33PNHL-Im2yNcmS.js +0 -1
truthound_dashboard/static/assets/chunk-QZHKN3VN-kZr8XFm1.js +0 -1
truthound_dashboard/static/assets/chunk-TZMSLE5B-Q__360q_.js +0 -1
truthound_dashboard/static/assets/classDiagram-2ON5EDUG-vtixxUyK.js +0 -1
truthound_dashboard/static/assets/classDiagram-v2-WZHVMYZB-vtixxUyK.js +0 -1
truthound_dashboard/static/assets/clone-BOt2LwD0.js +0 -1
truthound_dashboard/static/assets/cose-bilkent-S5V4N54A-CBDw6iac.js +0 -1
truthound_dashboard/static/assets/dagre-6UL2VRFP-XdKqmmY9.js +0 -4
truthound_dashboard/static/assets/diagram-PSM6KHXK-DAZ8nx9V.js +0 -24
truthound_dashboard/static/assets/diagram-QEK2KX5R-BRvDTbGD.js +0 -43
truthound_dashboard/static/assets/diagram-S2PKOQOG-bQcczUkl.js +0 -24
truthound_dashboard/static/assets/erDiagram-Q2GNP2WA-DPje7VMN.js +0 -60
truthound_dashboard/static/assets/flowDiagram-NV44I4VS-B7BVtFVS.js +0 -162
truthound_dashboard/static/assets/ganttDiagram-JELNMOA3-D6WKSS7U.js +0 -267
truthound_dashboard/static/assets/gitGraphDiagram-NY62KEGX-D3vtVd3y.js +0 -65
truthound_dashboard/static/assets/graph-BKgNKZVp.js +0 -1
truthound_dashboard/static/assets/index-C6JSrkHo.css +0 -1
truthound_dashboard/static/assets/index-DkU82VsU.js +0 -1800
truthound_dashboard/static/assets/infoDiagram-WHAUD3N6-DnNCT429.js +0 -2
truthound_dashboard/static/assets/journeyDiagram-XKPGCS4Q-DGiMozqS.js +0 -139
truthound_dashboard/static/assets/kanban-definition-3W4ZIXB7-BV2gUgli.js +0 -89
truthound_dashboard/static/assets/katex-Cu_Erd72.js +0 -261
truthound_dashboard/static/assets/layout-DI2MfQ5G.js +0 -1
truthound_dashboard/static/assets/min-DYdgXVcT.js +0 -1
truthound_dashboard/static/assets/mindmap-definition-VGOIOE7T-C7x4ruxz.js +0 -68
truthound_dashboard/static/assets/pieDiagram-ADFJNKIX-CAJaAB9f.js +0 -30
truthound_dashboard/static/assets/quadrantDiagram-AYHSOK5B-DeqwDI46.js +0 -7
truthound_dashboard/static/assets/requirementDiagram-UZGBJVZJ-e3XDpZIM.js +0 -64
truthound_dashboard/static/assets/sankeyDiagram-TZEHDZUN-CNnAv5Ux.js +0 -10
truthound_dashboard/static/assets/sequenceDiagram-WL72ISMW-Dsne-Of3.js +0 -145
truthound_dashboard/static/assets/stateDiagram-FKZM4ZOC-Ee0sQXyb.js +0 -1
truthound_dashboard/static/assets/stateDiagram-v2-4FDKWEC3-B26KqW_W.js +0 -1
truthound_dashboard/static/assets/timeline-definition-IT6M3QCI-DZYi2yl3.js +0 -61
truthound_dashboard/static/assets/treemap-KMMF4GRG-CY3f8In2.js +0 -128
truthound_dashboard/static/assets/unmerged_dictionaries-Dd7xcPWG.js +0 -1
truthound_dashboard/static/assets/xychartDiagram-PRI3JC2R-CS7fydZZ.js +0 -7
truthound_dashboard-1.4.4.dist-info/METADATA +0 -507
{truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.0.dist-info}/WHEEL +0 -0
{truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.0.dist-info}/entry_points.txt +0 -0
{truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.0.dist-info}/licenses/LICENSE +0 -0

truthound_dashboard/core/anomaly.py CHANGED Viewed

@@ -248,10 +248,12 @@ class AnomalyDetectionService:
             Detection results dictionary.
         """
         try:
-            import truthound as th
+            from truthound.datasources import get_datasource
-            # Load data from source
-            df = th.read(source.config)
+            # Load data from source using truthound datasources factory
+            # The source.config contains the path or connection info
+            datasource = get_datasource(source.config.get("path", source.config))
+            df = datasource.to_polars_lazyframe().collect()
             # Get columns to analyze
             columns = None
@@ -311,7 +313,9 @@ class AnomalyDetectionService:
         sample_size: int | None,
         params: dict[str, Any],
     ) -> dict[str, Any]:
-        """Run the specified anomaly detection algorithm.
+        """Run the specified anomaly detection algorithm using truthound.ml.
+        Uses truthound.ml.anomaly_models when available, falls back to sklearn.
         Args:
             df: DataFrame to analyze.
@@ -360,6 +364,8 @@ class AnomalyDetectionService:
             result = self._run_statistical(df_analyze, params)
         elif algorithm == "autoencoder":
             result = self._run_autoencoder(df_analyze, params)
+        elif algorithm == "ensemble":
+            result = self._run_ensemble(df_analyze, params)
         else:
             raise ValueError(f"Unknown algorithm: {algorithm}")
@@ -412,32 +418,67 @@ class AnomalyDetectionService:
         df: Any,
         params: dict[str, Any],
     ) -> dict[str, Any]:
-        """Run Isolation Forest algorithm."""
-        from sklearn.ensemble import IsolationForest
+        """Run Isolation Forest algorithm using truthound.ml."""
         import numpy as np
         # Get parameters with defaults
         n_estimators = params.get("n_estimators", 100)
         contamination = params.get("contamination", 0.1)
-        max_samples = params.get("max_samples", "auto")
+        max_samples = params.get("max_samples", 256)
         random_state = params.get("random_state", 42)
         # Handle NaN values
         df_clean = df.fillna(df.mean())
-        clf = IsolationForest(
-            n_estimators=n_estimators,
-            contamination=contamination,
-            max_samples=max_samples,
-            random_state=random_state,
-        )
-        predictions = clf.fit_predict(df_clean)
-        scores = -clf.score_samples(df_clean)  # Higher = more anomalous
+        try:
+            from truthound.ml.anomaly_models.isolation_forest import (
+                IsolationForestDetector,
+                IsolationForestConfig,
+            )
+            import polars as pl
-        return {
-            "is_anomaly": predictions == -1,
-            "scores": scores,
-        }
+            # Create truthound detector
+            config = IsolationForestConfig(
+                n_estimators=n_estimators,
+                max_samples=max_samples if isinstance(max_samples, int) else 256,
+                columns=list(df_clean.columns),
+            )
+            detector = IsolationForestDetector(config)
+            # Convert to Polars for truthound
+            pl_df = pl.from_pandas(df_clean).lazy()
+            detector.fit(pl_df)
+            # Get predictions
+            result = detector.predict(pl_df)
+            # Extract scores and anomaly flags
+            is_anomaly = np.array([score.is_anomaly for score in result])
+            scores = np.array([score.score for score in result])
+            return {
+                "is_anomaly": is_anomaly,
+                "scores": scores,
+            }
+        except ImportError:
+            # Fallback to sklearn
+            from sklearn.ensemble import IsolationForest
+            clf = IsolationForest(
+                n_estimators=n_estimators,
+                contamination=contamination,
+                max_samples=max_samples,
+                random_state=random_state,
+            )
+            predictions = clf.fit_predict(df_clean)
+            scores = -clf.score_samples(df_clean)  # Higher = more anomalous
+            return {
+                "is_anomaly": predictions == -1,
+                "scores": scores,
+            }
     def _run_lof(
         self,
@@ -446,6 +487,7 @@ class AnomalyDetectionService:
     ) -> dict[str, Any]:
         """Run Local Outlier Factor algorithm."""
         from sklearn.neighbors import LocalOutlierFactor
+        from sklearn.preprocessing import StandardScaler
         import numpy as np
         n_neighbors = params.get("n_neighbors", 20)
@@ -453,7 +495,6 @@ class AnomalyDetectionService:
         algorithm = params.get("algorithm", "auto")
         # Handle NaN values and scale
-        from sklearn.preprocessing import StandardScaler
         df_clean = df.fillna(df.mean())
         scaler = StandardScaler()
         df_scaled = scaler.fit_transform(df_clean)
@@ -512,6 +553,7 @@ class AnomalyDetectionService:
         """Run DBSCAN algorithm."""
         from sklearn.cluster import DBSCAN
         from sklearn.preprocessing import StandardScaler
+        from sklearn.metrics import pairwise_distances
         import numpy as np
         eps = params.get("eps", 0.5)
@@ -534,15 +576,14 @@ class AnomalyDetectionService:
         is_anomaly = labels == -1
         # Calculate distance-based scores (distance to nearest cluster centroid)
-        from sklearn.metrics import pairwise_distances
         scores = np.zeros(len(df_scaled))
         if not is_anomaly.all():
             # Get centroids of each cluster
             unique_labels = set(labels) - {-1}
             if unique_labels:
                 centroids = np.array([
-                    df_scaled[labels == l].mean(axis=0)
-                    for l in unique_labels
+                    df_scaled[labels == label].mean(axis=0)
+                    for label in unique_labels
                 ])
                 distances = pairwise_distances(df_scaled, centroids, metric=metric)
                 scores = distances.min(axis=1)
@@ -557,7 +598,7 @@ class AnomalyDetectionService:
         df: Any,
         params: dict[str, Any],
     ) -> dict[str, Any]:
-        """Run statistical anomaly detection."""
+        """Run statistical anomaly detection using truthound.ml."""
         import numpy as np
         method = params.get("method", "zscore")
@@ -566,49 +607,201 @@ class AnomalyDetectionService:
         # Handle NaN values
         df_clean = df.fillna(df.mean())
-        if method == "zscore":
-            mean = df_clean.mean()
-            std = df_clean.std()
-            z_scores = np.abs((df_clean - mean) / std)
-            # Take max z-score across all columns for each row
-            max_z = z_scores.max(axis=1)
-            is_anomaly = max_z > threshold
-            scores = max_z.values
-        elif method == "iqr":
-            q1 = df_clean.quantile(0.25)
-            q3 = df_clean.quantile(0.75)
-            iqr = q3 - q1
-            lower = q1 - threshold * iqr
-            upper = q3 + threshold * iqr
-            is_outlier = ((df_clean < lower) | (df_clean > upper)).any(axis=1)
-            is_anomaly = is_outlier.values
-            # Score based on distance from bounds
-            scores = np.zeros(len(df_clean))
-            for col in df_clean.columns:
-                col_scores = np.maximum(
-                    (lower[col] - df_clean[col]) / iqr[col],
-                    (df_clean[col] - upper[col]) / iqr[col],
-                )
-                col_scores = np.maximum(col_scores, 0)
-                scores = np.maximum(scores, col_scores.values)
-        elif method == "mad":
-            median = df_clean.median()
-            mad = np.abs(df_clean - median).median()
-            # Modified z-score using MAD
-            modified_z = 0.6745 * (df_clean - median) / mad
-            max_z = np.abs(modified_z).max(axis=1)
-            is_anomaly = max_z > threshold
-            scores = max_z.values
+        try:
+            from truthound.ml.anomaly_models.statistical import (
+                StatisticalAnomalyDetector,
+                StatisticalConfig,
+            )
+            import polars as pl
+            # Create truthound detector
+            config = StatisticalConfig(
+                z_threshold=threshold,
+                iqr_multiplier=threshold if method == "iqr" else 1.5,
+                use_robust_stats=(method == "mad"),
+                per_column=True,
+                columns=list(df_clean.columns),
+            )
-        else:
-            raise ValueError(f"Unknown statistical method: {method}")
+            detector = StatisticalAnomalyDetector(config)
-        return {
-            "is_anomaly": np.array(is_anomaly),
-            "scores": np.array(scores),
-        }
+            # Convert to Polars for truthound
+            pl_df = pl.from_pandas(df_clean).lazy()
+            detector.fit(pl_df)
+            # Get predictions
+            result = detector.predict(pl_df)
+            # Extract scores and anomaly flags
+            is_anomaly = np.array([score.is_anomaly for score in result])
+            scores = np.array([score.score for score in result])
+            return {
+                "is_anomaly": is_anomaly,
+                "scores": scores,
+            }
+        except ImportError:
+            # Fallback to manual implementation
+            if method == "zscore":
+                mean = df_clean.mean()
+                std = df_clean.std()
+                z_scores = np.abs((df_clean - mean) / std)
+                # Take max z-score across all columns for each row
+                max_z = z_scores.max(axis=1)
+                is_anomaly = max_z > threshold
+                scores = max_z.values
+            elif method == "iqr":
+                q1 = df_clean.quantile(0.25)
+                q3 = df_clean.quantile(0.75)
+                iqr = q3 - q1
+                lower = q1 - threshold * iqr
+                upper = q3 + threshold * iqr
+                is_outlier = ((df_clean < lower) | (df_clean > upper)).any(axis=1)
+                is_anomaly = is_outlier.values
+                # Score based on distance from bounds
+                scores = np.zeros(len(df_clean))
+                for col in df_clean.columns:
+                    col_scores = np.maximum(
+                        (lower[col] - df_clean[col]) / iqr[col],
+                        (df_clean[col] - upper[col]) / iqr[col],
+                    )
+                    col_scores = np.maximum(col_scores, 0)
+                    scores = np.maximum(scores, col_scores.values)
+            elif method == "mad":
+                median = df_clean.median()
+                mad = np.abs(df_clean - median).median()
+                # Modified z-score using MAD
+                modified_z = 0.6745 * (df_clean - median) / mad
+                max_z = np.abs(modified_z).max(axis=1)
+                is_anomaly = max_z > threshold
+                scores = max_z.values
+            else:
+                raise ValueError(f"Unknown statistical method: {method}")
+            return {
+                "is_anomaly": np.array(is_anomaly),
+                "scores": np.array(scores),
+            }
+    def _run_ensemble(
+        self,
+        df: Any,
+        params: dict[str, Any],
+    ) -> dict[str, Any]:
+        """Run ensemble anomaly detection using truthound.ml."""
+        import numpy as np
+        strategy = params.get("strategy", "weighted_average")
+        weights = params.get("weights", [0.3, 0.3, 0.4])
+        vote_threshold = params.get("vote_threshold", 0.5)
+        # Handle NaN values
+        df_clean = df.fillna(df.mean())
+        try:
+            from truthound.ml.anomaly_models.ensemble import (
+                EnsembleAnomalyDetector,
+                EnsembleConfig,
+                EnsembleStrategy,
+            )
+            from truthound.ml.anomaly_models.statistical import (
+                StatisticalAnomalyDetector,
+                StatisticalConfig,
+            )
+            from truthound.ml.anomaly_models.isolation_forest import (
+                IsolationForestDetector,
+                IsolationForestConfig,
+            )
+            import polars as pl
+            # Map strategy string to enum
+            strategy_map = {
+                "average": EnsembleStrategy.AVERAGE,
+                "weighted_average": EnsembleStrategy.WEIGHTED_AVERAGE,
+                "max": EnsembleStrategy.MAX,
+                "min": EnsembleStrategy.MIN,
+                "vote": EnsembleStrategy.VOTE,
+                "unanimous": EnsembleStrategy.UNANIMOUS,
+            }
+            # Create ensemble config
+            config = EnsembleConfig(
+                strategy=strategy_map.get(strategy, EnsembleStrategy.WEIGHTED_AVERAGE),
+                weights=weights,
+                vote_threshold=vote_threshold,
+            )
+            ensemble = EnsembleAnomalyDetector(config)
+            # Add detectors
+            columns = list(df_clean.columns)
+            # Z-Score detector
+            zscore_config = StatisticalConfig(z_threshold=3.0, columns=columns)
+            ensemble.add_detector(StatisticalAnomalyDetector(zscore_config), weight=weights[0] if len(weights) > 0 else 0.33)
+            # IQR detector
+            iqr_config = StatisticalConfig(iqr_multiplier=1.5, columns=columns)
+            ensemble.add_detector(StatisticalAnomalyDetector(iqr_config), weight=weights[1] if len(weights) > 1 else 0.33)
+            # Isolation Forest detector
+            if_config = IsolationForestConfig(n_estimators=100, columns=columns)
+            ensemble.add_detector(IsolationForestDetector(if_config), weight=weights[2] if len(weights) > 2 else 0.34)
+            # Convert to Polars for truthound
+            pl_df = pl.from_pandas(df_clean).lazy()
+            ensemble.fit(pl_df)
+            # Get predictions
+            result = ensemble.predict(pl_df)
+            # Extract scores and anomaly flags
+            is_anomaly = np.array([score.is_anomaly for score in result])
+            scores = np.array([score.score for score in result])
+            return {
+                "is_anomaly": is_anomaly,
+                "scores": scores,
+            }
+        except ImportError:
+            # Fallback: run individual algorithms and combine
+            results = []
+            # Run zscore
+            zscore_result = self._run_statistical(df, {"method": "zscore", "threshold": 3.0})
+            results.append(zscore_result)
+            # Run IQR
+            iqr_result = self._run_statistical(df, {"method": "iqr", "threshold": 1.5})
+            results.append(iqr_result)
+            # Run isolation forest
+            if_result = self._run_isolation_forest(df, {"n_estimators": 100})
+            results.append(if_result)
+            # Combine using weighted average
+            combined_scores = np.zeros(len(df_clean))
+            for i, result in enumerate(results):
+                weight = weights[i] if i < len(weights) else 1.0 / len(results)
+                combined_scores += weight * result["scores"]
+            # Normalize scores
+            if combined_scores.max() > 0:
+                combined_scores = combined_scores / combined_scores.max()
+            # Determine anomalies based on threshold (mean + 2*std)
+            threshold = combined_scores.mean() + 2 * combined_scores.std()
+            is_anomaly = combined_scores > threshold
+            return {
+                "is_anomaly": is_anomaly,
+                "scores": combined_scores,
+            }
     def _run_autoencoder(
         self,
@@ -1146,11 +1339,13 @@ class AnomalyDetectionService:
         # Load data once
         try:
-            import truthound as th
+            from truthound.datasources import get_datasource
             import numpy as np
             import pandas as pd
-            df = th.read(source.config)
+            # Load data using truthound datasources factory
+            datasource = get_datasource(source.config.get("path", source.config))
+            df = datasource.to_polars_lazyframe().collect().to_pandas()
             # Sample if needed
             if sample_size and len(df) > sample_size:

truthound_dashboard/core/anomaly_explainer.py CHANGED Viewed

@@ -159,10 +159,11 @@ class AnomalyExplainerService:
         algorithm used for detection.
         """
         try:
-            import truthound as th
+            from truthound.datasources import get_datasource
-            # Load data
-            df = th.read(source.config)
+            # Load data using truthound datasources factory
+            datasource = get_datasource(source.config.get("path", source.config))
+            df = datasource.to_polars_lazyframe().collect().to_pandas()
             # Get columns that were analyzed
             columns = detection.columns_analyzed or list(

truthound_dashboard/core/backends/__init__.py ADDED Viewed

@@ -0,0 +1,67 @@
+"""Data quality backend implementations.
+This module provides backend implementations for data quality operations.
+The backends abstract away the specific library (truthound) and provide
+a unified interface for the dashboard services.
+Architecture:
+    BackendFactory
+        ↓
+    BaseDataQualityBackend (ABC)
+        ↓
+    ┌─────────────────────────────┐
+    │  TruthoundBackend  │ MockBackend │
+    └─────────────────────────────┘
+Usage:
+    from truthound_dashboard.core.backends import BackendFactory
+    # Get the default backend (truthound)
+    backend = BackendFactory.get_backend()
+    # Check if backend is available
+    if backend.is_available():
+        result = await backend.check("data.csv")
+    # Use a specific backend
+    backend = BackendFactory.get_backend("mock")
+"""
+from .base import BaseDataQualityBackend
+from .errors import (
+    BackendError,
+    BackendOperationError,
+    BackendUnavailableError,
+    BackendVersionError,
+)
+from .factory import (
+    BackendFactory,
+    get_backend,
+    reset_backend,
+    get_truthound_version,
+    get_backend_capabilities,
+    get_backend_info,
+)
+from .mock_backend import MockBackend
+from .truthound_backend import TruthoundBackend
+__all__ = [
+    # Base class
+    "BaseDataQualityBackend",
+    # Backend implementations
+    "TruthoundBackend",
+    "MockBackend",
+    # Factory
+    "BackendFactory",
+    "get_backend",
+    "reset_backend",
+    # Capability detection
+    "get_truthound_version",
+    "get_backend_capabilities",
+    "get_backend_info",
+    # Errors
+    "BackendError",
+    "BackendUnavailableError",
+    "BackendVersionError",
+    "BackendOperationError",
+]

truthound-dashboard 1.4.4__py3-none-any.whl → 1.5.0__py3-none-any.whl

truthound-dashboard 1.4.4py3-none-any.whl → 1.5.0py3-none-any.whl