PyPI - truthound-dashboard - Versions diffs - 1.4.4__py3-none-any.whl → 1.5.0__py3-none-any.whl - Mend

truthound-dashboard 1.4.4py3-none-any.whl → 1.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (205) hide show

truthound_dashboard/api/alerts.py +75 -86
truthound_dashboard/api/anomaly.py +7 -13
truthound_dashboard/api/cross_alerts.py +38 -52
truthound_dashboard/api/drift.py +49 -59
truthound_dashboard/api/drift_monitor.py +234 -79
truthound_dashboard/api/enterprise_sampling.py +498 -0
truthound_dashboard/api/history.py +57 -5
truthound_dashboard/api/lineage.py +3 -48
truthound_dashboard/api/maintenance.py +104 -49
truthound_dashboard/api/mask.py +1 -2
truthound_dashboard/api/middleware.py +2 -1
truthound_dashboard/api/model_monitoring.py +435 -311
truthound_dashboard/api/notifications.py +227 -191
truthound_dashboard/api/notifications_advanced.py +21 -20
truthound_dashboard/api/observability.py +586 -0
truthound_dashboard/api/plugins.py +2 -433
truthound_dashboard/api/profile.py +199 -37
truthound_dashboard/api/quality_reporter.py +701 -0
truthound_dashboard/api/reports.py +7 -16
truthound_dashboard/api/router.py +66 -0
truthound_dashboard/api/rule_suggestions.py +5 -5
truthound_dashboard/api/scan.py +17 -19
truthound_dashboard/api/schedules.py +85 -50
truthound_dashboard/api/schema_evolution.py +6 -6
truthound_dashboard/api/schema_watcher.py +667 -0
truthound_dashboard/api/sources.py +98 -27
truthound_dashboard/api/tiering.py +1323 -0
truthound_dashboard/api/triggers.py +14 -11
truthound_dashboard/api/validations.py +12 -11
truthound_dashboard/api/versioning.py +1 -6
truthound_dashboard/core/__init__.py +129 -3
truthound_dashboard/core/actions/__init__.py +62 -0
truthound_dashboard/core/actions/custom.py +426 -0
truthound_dashboard/core/actions/notifications.py +910 -0
truthound_dashboard/core/actions/storage.py +472 -0
truthound_dashboard/core/actions/webhook.py +281 -0
truthound_dashboard/core/anomaly.py +262 -67
truthound_dashboard/core/anomaly_explainer.py +4 -3
truthound_dashboard/core/backends/__init__.py +67 -0
truthound_dashboard/core/backends/base.py +299 -0
truthound_dashboard/core/backends/errors.py +191 -0
truthound_dashboard/core/backends/factory.py +423 -0
truthound_dashboard/core/backends/mock_backend.py +451 -0
truthound_dashboard/core/backends/truthound_backend.py +718 -0
truthound_dashboard/core/checkpoint/__init__.py +87 -0
truthound_dashboard/core/checkpoint/adapters.py +814 -0
truthound_dashboard/core/checkpoint/checkpoint.py +491 -0
truthound_dashboard/core/checkpoint/runner.py +270 -0
truthound_dashboard/core/connections.py +437 -10
truthound_dashboard/core/converters/__init__.py +14 -0
truthound_dashboard/core/converters/truthound.py +620 -0
truthound_dashboard/core/cross_alerts.py +540 -320
truthound_dashboard/core/datasource_factory.py +1672 -0
truthound_dashboard/core/drift_monitor.py +216 -20
truthound_dashboard/core/enterprise_sampling.py +1291 -0
truthound_dashboard/core/interfaces/__init__.py +225 -0
truthound_dashboard/core/interfaces/actions.py +652 -0
truthound_dashboard/core/interfaces/base.py +247 -0
truthound_dashboard/core/interfaces/checkpoint.py +676 -0
truthound_dashboard/core/interfaces/protocols.py +664 -0
truthound_dashboard/core/interfaces/reporters.py +650 -0
truthound_dashboard/core/interfaces/routing.py +646 -0
truthound_dashboard/core/interfaces/triggers.py +619 -0
truthound_dashboard/core/lineage.py +407 -71
truthound_dashboard/core/model_monitoring.py +431 -3
truthound_dashboard/core/notifications/base.py +4 -0
truthound_dashboard/core/notifications/channels.py +501 -1203
truthound_dashboard/core/notifications/deduplication/__init__.py +81 -115
truthound_dashboard/core/notifications/deduplication/service.py +131 -348
truthound_dashboard/core/notifications/dispatcher.py +202 -11
truthound_dashboard/core/notifications/escalation/__init__.py +119 -106
truthound_dashboard/core/notifications/escalation/engine.py +168 -358
truthound_dashboard/core/notifications/routing/__init__.py +88 -128
truthound_dashboard/core/notifications/routing/engine.py +90 -317
truthound_dashboard/core/notifications/stats_aggregator.py +246 -1
truthound_dashboard/core/notifications/throttling/__init__.py +67 -50
truthound_dashboard/core/notifications/throttling/builder.py +117 -255
truthound_dashboard/core/notifications/truthound_adapter.py +842 -0
truthound_dashboard/core/phase5/collaboration.py +1 -1
truthound_dashboard/core/plugins/lifecycle/__init__.py +0 -13
truthound_dashboard/core/quality_reporter.py +1359 -0
truthound_dashboard/core/report_history.py +0 -6
truthound_dashboard/core/reporters/__init__.py +175 -14
truthound_dashboard/core/reporters/adapters.py +943 -0
truthound_dashboard/core/reporters/base.py +0 -3
truthound_dashboard/core/reporters/builtin/__init__.py +18 -0
truthound_dashboard/core/reporters/builtin/csv_reporter.py +111 -0
truthound_dashboard/core/reporters/builtin/html_reporter.py +270 -0
truthound_dashboard/core/reporters/builtin/json_reporter.py +127 -0
truthound_dashboard/core/reporters/compat.py +266 -0
truthound_dashboard/core/reporters/csv_reporter.py +2 -35
truthound_dashboard/core/reporters/factory.py +526 -0
truthound_dashboard/core/reporters/interfaces.py +745 -0
truthound_dashboard/core/reporters/registry.py +1 -10
truthound_dashboard/core/scheduler.py +165 -0
truthound_dashboard/core/schema_evolution.py +3 -3
truthound_dashboard/core/schema_watcher.py +1528 -0
truthound_dashboard/core/services.py +595 -76
truthound_dashboard/core/store_manager.py +810 -0
truthound_dashboard/core/streaming_anomaly.py +169 -4
truthound_dashboard/core/tiering.py +1309 -0
truthound_dashboard/core/triggers/evaluators.py +178 -8
truthound_dashboard/core/truthound_adapter.py +2620 -197
truthound_dashboard/core/unified_alerts.py +23 -20
truthound_dashboard/db/__init__.py +8 -0
truthound_dashboard/db/database.py +8 -2
truthound_dashboard/db/models.py +944 -25
truthound_dashboard/db/repository.py +2 -0
truthound_dashboard/main.py +11 -0
truthound_dashboard/schemas/__init__.py +177 -16
truthound_dashboard/schemas/base.py +44 -23
truthound_dashboard/schemas/collaboration.py +19 -6
truthound_dashboard/schemas/cross_alerts.py +19 -3
truthound_dashboard/schemas/drift.py +61 -55
truthound_dashboard/schemas/drift_monitor.py +67 -23
truthound_dashboard/schemas/enterprise_sampling.py +653 -0
truthound_dashboard/schemas/lineage.py +0 -33
truthound_dashboard/schemas/mask.py +10 -8
truthound_dashboard/schemas/model_monitoring.py +89 -10
truthound_dashboard/schemas/notifications_advanced.py +13 -0
truthound_dashboard/schemas/observability.py +453 -0
truthound_dashboard/schemas/plugins.py +0 -280
truthound_dashboard/schemas/profile.py +154 -247
truthound_dashboard/schemas/quality_reporter.py +403 -0
truthound_dashboard/schemas/reports.py +2 -2
truthound_dashboard/schemas/rule_suggestion.py +8 -1
truthound_dashboard/schemas/scan.py +4 -24
truthound_dashboard/schemas/schedule.py +11 -3
truthound_dashboard/schemas/schema_watcher.py +727 -0
truthound_dashboard/schemas/source.py +17 -2
truthound_dashboard/schemas/tiering.py +822 -0
truthound_dashboard/schemas/triggers.py +16 -0
truthound_dashboard/schemas/unified_alerts.py +7 -0
truthound_dashboard/schemas/validation.py +0 -13
truthound_dashboard/schemas/validators/base.py +41 -21
truthound_dashboard/schemas/validators/business_rule_validators.py +244 -0
truthound_dashboard/schemas/validators/localization_validators.py +273 -0
truthound_dashboard/schemas/validators/ml_feature_validators.py +308 -0
truthound_dashboard/schemas/validators/profiling_validators.py +275 -0
truthound_dashboard/schemas/validators/referential_validators.py +312 -0
truthound_dashboard/schemas/validators/registry.py +93 -8
truthound_dashboard/schemas/validators/timeseries_validators.py +389 -0
truthound_dashboard/schemas/versioning.py +1 -6
truthound_dashboard/static/index.html +2 -2
truthound_dashboard-1.5.0.dist-info/METADATA +309 -0
{truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.0.dist-info}/RECORD +149 -148
truthound_dashboard/core/plugins/hooks/__init__.py +0 -63
truthound_dashboard/core/plugins/hooks/decorators.py +0 -367
truthound_dashboard/core/plugins/hooks/manager.py +0 -403
truthound_dashboard/core/plugins/hooks/protocols.py +0 -265
truthound_dashboard/core/plugins/lifecycle/hot_reload.py +0 -584
truthound_dashboard/core/reporters/junit_reporter.py +0 -233
truthound_dashboard/core/reporters/markdown_reporter.py +0 -207
truthound_dashboard/core/reporters/pdf_reporter.py +0 -209
truthound_dashboard/static/assets/_baseUniq-BcrSP13d.js +0 -1
truthound_dashboard/static/assets/arc-DlYjKwIL.js +0 -1
truthound_dashboard/static/assets/architectureDiagram-VXUJARFQ-Bb2drbQM.js +0 -36
truthound_dashboard/static/assets/blockDiagram-VD42YOAC-BlsPG1CH.js +0 -122
truthound_dashboard/static/assets/c4Diagram-YG6GDRKO-B9JdUoaC.js +0 -10
truthound_dashboard/static/assets/channel-Q6mHF1Hd.js +0 -1
truthound_dashboard/static/assets/chunk-4BX2VUAB-DmyoPVuJ.js +0 -1
truthound_dashboard/static/assets/chunk-55IACEB6-Bcz6Siv8.js +0 -1
truthound_dashboard/static/assets/chunk-B4BG7PRW-Br3G5Rum.js +0 -165
truthound_dashboard/static/assets/chunk-DI55MBZ5-DuM9c23u.js +0 -220
truthound_dashboard/static/assets/chunk-FMBD7UC4-DNU-5mvT.js +0 -15
truthound_dashboard/static/assets/chunk-QN33PNHL-Im2yNcmS.js +0 -1
truthound_dashboard/static/assets/chunk-QZHKN3VN-kZr8XFm1.js +0 -1
truthound_dashboard/static/assets/chunk-TZMSLE5B-Q__360q_.js +0 -1
truthound_dashboard/static/assets/classDiagram-2ON5EDUG-vtixxUyK.js +0 -1
truthound_dashboard/static/assets/classDiagram-v2-WZHVMYZB-vtixxUyK.js +0 -1
truthound_dashboard/static/assets/clone-BOt2LwD0.js +0 -1
truthound_dashboard/static/assets/cose-bilkent-S5V4N54A-CBDw6iac.js +0 -1
truthound_dashboard/static/assets/dagre-6UL2VRFP-XdKqmmY9.js +0 -4
truthound_dashboard/static/assets/diagram-PSM6KHXK-DAZ8nx9V.js +0 -24
truthound_dashboard/static/assets/diagram-QEK2KX5R-BRvDTbGD.js +0 -43
truthound_dashboard/static/assets/diagram-S2PKOQOG-bQcczUkl.js +0 -24
truthound_dashboard/static/assets/erDiagram-Q2GNP2WA-DPje7VMN.js +0 -60
truthound_dashboard/static/assets/flowDiagram-NV44I4VS-B7BVtFVS.js +0 -162
truthound_dashboard/static/assets/ganttDiagram-JELNMOA3-D6WKSS7U.js +0 -267
truthound_dashboard/static/assets/gitGraphDiagram-NY62KEGX-D3vtVd3y.js +0 -65
truthound_dashboard/static/assets/graph-BKgNKZVp.js +0 -1
truthound_dashboard/static/assets/index-C6JSrkHo.css +0 -1
truthound_dashboard/static/assets/index-DkU82VsU.js +0 -1800
truthound_dashboard/static/assets/infoDiagram-WHAUD3N6-DnNCT429.js +0 -2
truthound_dashboard/static/assets/journeyDiagram-XKPGCS4Q-DGiMozqS.js +0 -139
truthound_dashboard/static/assets/kanban-definition-3W4ZIXB7-BV2gUgli.js +0 -89
truthound_dashboard/static/assets/katex-Cu_Erd72.js +0 -261
truthound_dashboard/static/assets/layout-DI2MfQ5G.js +0 -1
truthound_dashboard/static/assets/min-DYdgXVcT.js +0 -1
truthound_dashboard/static/assets/mindmap-definition-VGOIOE7T-C7x4ruxz.js +0 -68
truthound_dashboard/static/assets/pieDiagram-ADFJNKIX-CAJaAB9f.js +0 -30
truthound_dashboard/static/assets/quadrantDiagram-AYHSOK5B-DeqwDI46.js +0 -7
truthound_dashboard/static/assets/requirementDiagram-UZGBJVZJ-e3XDpZIM.js +0 -64
truthound_dashboard/static/assets/sankeyDiagram-TZEHDZUN-CNnAv5Ux.js +0 -10
truthound_dashboard/static/assets/sequenceDiagram-WL72ISMW-Dsne-Of3.js +0 -145
truthound_dashboard/static/assets/stateDiagram-FKZM4ZOC-Ee0sQXyb.js +0 -1
truthound_dashboard/static/assets/stateDiagram-v2-4FDKWEC3-B26KqW_W.js +0 -1
truthound_dashboard/static/assets/timeline-definition-IT6M3QCI-DZYi2yl3.js +0 -61
truthound_dashboard/static/assets/treemap-KMMF4GRG-CY3f8In2.js +0 -128
truthound_dashboard/static/assets/unmerged_dictionaries-Dd7xcPWG.js +0 -1
truthound_dashboard/static/assets/xychartDiagram-PRI3JC2R-CS7fydZZ.js +0 -7
truthound_dashboard-1.4.4.dist-info/METADATA +0 -507
{truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.0.dist-info}/WHEEL +0 -0
{truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.0.dist-info}/entry_points.txt +0 -0
{truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.0.dist-info}/licenses/LICENSE +0 -0

truthound_dashboard/api/enterprise_sampling.py ADDED Viewed

@@ -0,0 +1,498 @@
+"""Enterprise Sampling API endpoints.
+This module provides REST API endpoints for truthound 1.2.10's enterprise-scale
+sampling capabilities.
+Endpoints:
+- POST /api/v1/sampling/enterprise: Run enterprise sampling
+- POST /api/v1/sampling/estimate-size: Estimate optimal sample size
+- POST /api/v1/sampling/sketch: Run sketch-based estimation
+- GET /api/v1/sampling/jobs: List sampling jobs
+- GET /api/v1/sampling/jobs/{job_id}: Get job status
+- POST /api/v1/sampling/jobs/{job_id}/cancel: Cancel job
+- GET /api/v1/sampling/strategies: List available strategies
+- GET /api/v1/sampling/quality-presets: List quality presets
+"""
+from __future__ import annotations
+import logging
+from typing import Any
+from fastapi import APIRouter, Depends, HTTPException, Query, status
+from sqlalchemy.ext.asyncio import AsyncSession
+from truthound_dashboard.api.deps import get_session
+from truthound_dashboard.core.enterprise_sampling import (
+    QUALITY_PRESETS,
+    SCALE_STRATEGY_MAP,
+    classify_dataset_scale,
+    get_enterprise_sampler,
+    get_sample_size_estimator,
+    get_sketch_estimator,
+)
+from sqlalchemy import select
+from truthound_dashboard.db import Source
+from truthound_dashboard.schemas.enterprise_sampling import (
+    BlockSamplingConfig,
+    ColumnAwareSamplingConfig,
+    EnterpriseSamplingRequest,
+    EnterpriseSamplingResponse,
+    EnterpriseSamplingStrategy,
+    MemoryBudgetConfig,
+    MultiStageSamplingConfig,
+    ParallelSamplingConfig,
+    ProgressiveSamplingConfig,
+    SampleSizeEstimateRequest,
+    SampleSizeEstimateResponse,
+    SamplingJobListResponse,
+    SamplingJobStatus,
+    SamplingQuality,
+    ScaleCategory,
+    SchedulingPolicy,
+    SketchConfig,
+    SketchEstimateRequest,
+    SketchEstimateResponse,
+    SketchType,
+)
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/sampling", tags=["Enterprise Sampling"])
+# ============================================================================
+# Response Models for API Documentation
+# ============================================================================
+class StrategyInfo:
+    """Strategy information for documentation."""
+    def __init__(
+        self,
+        name: str,
+        value: str,
+        description: str,
+        best_for: str,
+        supports_parallel: bool = False,
+        supports_streaming: bool = False,
+    ):
+        self.name = name
+        self.value = value
+        self.description = description
+        self.best_for = best_for
+        self.supports_parallel = supports_parallel
+        self.supports_streaming = supports_streaming
+STRATEGY_DOCS = {
+    EnterpriseSamplingStrategy.NONE: StrategyInfo(
+        name="No Sampling",
+        value="none",
+        description="Use full dataset without sampling",
+        best_for="Datasets < 1M rows",
+    ),
+    EnterpriseSamplingStrategy.RANDOM: StrategyInfo(
+        name="Random Sampling",
+        value="random",
+        description="Simple random sampling without replacement",
+        best_for="General purpose, uniform distributions",
+    ),
+    EnterpriseSamplingStrategy.BLOCK: StrategyInfo(
+        name="Block Sampling",
+        value="block",
+        description="Divides data into blocks and samples proportionally from each",
+        best_for="10M-100M rows, when coverage across data is important",
+        supports_parallel=True,
+    ),
+    EnterpriseSamplingStrategy.MULTI_STAGE: StrategyInfo(
+        name="Multi-Stage Sampling",
+        value="multi_stage",
+        description="Hierarchical sampling in multiple progressive passes",
+        best_for="100M-1B rows, when quick estimates are acceptable",
+    ),
+    EnterpriseSamplingStrategy.COLUMN_AWARE: StrategyInfo(
+        name="Column-Aware Sampling",
+        value="column_aware",
+        description="Adjusts sample size based on column type complexity",
+        best_for="Datasets with mixed column types",
+    ),
+    EnterpriseSamplingStrategy.PROGRESSIVE: StrategyInfo(
+        name="Progressive Sampling",
+        value="progressive",
+        description="Iteratively increases sample size until convergence",
+        best_for="Exploratory analysis, early stopping when possible",
+    ),
+    EnterpriseSamplingStrategy.ADAPTIVE: StrategyInfo(
+        name="Adaptive (Auto-Select)",
+        value="adaptive",
+        description="Automatically selects best strategy based on data characteristics",
+        best_for="When unsure which strategy to use",
+    ),
+}
+# ============================================================================
+# Endpoints
+# ============================================================================
+@router.post(
+    "/enterprise",
+    response_model=EnterpriseSamplingResponse,
+    summary="Run enterprise-scale sampling",
+    description="""
+    Execute enterprise-scale sampling on a data source.
+    Supports datasets from 100M to billions of rows with:
+    - Block sampling for parallel processing
+    - Multi-stage hierarchical sampling
+    - Column-aware adaptive sampling
+    - Progressive sampling with convergence detection
+    The response includes detailed metrics about the sampling operation.
+    """,
+)
+async def run_enterprise_sampling(
+    request: EnterpriseSamplingRequest,
+    db: AsyncSession = Depends(get_session),
+) -> EnterpriseSamplingResponse:
+    """Run enterprise-scale sampling on a data source."""
+    # Get source
+    result = await db.execute(select(Source).where(Source.id == request.source_id))
+    source = result.scalar_one_or_none()
+    if not source:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"Source not found: {request.source_id}",
+        )
+    try:
+        import polars as pl
+        # Load data
+        data_path = source.path
+        if data_path.endswith(".csv"):
+            lf = pl.scan_csv(data_path)
+        elif data_path.endswith(".parquet"):
+            lf = pl.scan_parquet(data_path)
+        elif data_path.endswith(".json"):
+            lf = pl.read_json(data_path).lazy()
+        elif data_path.endswith(".jsonl") or data_path.endswith(".ndjson"):
+            lf = pl.read_ndjson(data_path).lazy()
+        else:
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail=f"Unsupported file format: {data_path}",
+            )
+        # Get row count (may require a collect for some formats)
+        schema = lf.collect_schema()
+        column_count = len(schema)
+        # Estimate row count
+        try:
+            row_count = lf.select(pl.len()).collect().item()
+        except Exception:
+            # Fall back to collecting and counting
+            row_count = len(lf.collect())
+        # Run sampling
+        sampler = get_enterprise_sampler()
+        response = await sampler.sample(
+            config=request,
+            data=lf,
+            row_count=row_count,
+            column_count=column_count,
+        )
+        return response
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Enterprise sampling failed: {e}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Sampling failed: {str(e)}",
+        )
+@router.post(
+    "/estimate-size",
+    response_model=SampleSizeEstimateResponse,
+    summary="Estimate optimal sample size",
+    description="""
+    Calculate the optimal sample size for a given population using Cochran's formula.
+    Returns:
+    - Recommended sample size with statistical confidence
+    - Minimum and maximum useful sample sizes
+    - Estimated processing time and memory usage
+    - Recommended sampling strategy with rationale
+    """,
+)
+async def estimate_sample_size(
+    request: SampleSizeEstimateRequest,
+) -> SampleSizeEstimateResponse:
+    """Estimate optimal sample size for statistical confidence."""
+    estimator = get_sample_size_estimator()
+    return estimator.estimate(request)
+@router.post(
+    "/sketch",
+    response_model=SketchEstimateResponse,
+    summary="Run sketch-based estimation",
+    description="""
+    Use probabilistic data structures for O(1) memory aggregations on massive datasets.
+    Supported sketch types:
+    - **HyperLogLog**: Cardinality estimation (distinct count)
+    - **Count-Min Sketch**: Frequency estimation (heavy hitters)
+    - **Bloom Filter**: Membership testing
+    Ideal for datasets exceeding 10B rows where exact computation is impractical.
+    """,
+)
+async def run_sketch_estimation(
+    request: SketchEstimateRequest,
+    db: AsyncSession = Depends(get_session),
+) -> SketchEstimateResponse:
+    """Run sketch-based estimation using probabilistic data structures."""
+    # Get source
+    result = await db.execute(select(Source).where(Source.id == request.source_id))
+    source = result.scalar_one_or_none()
+    if not source:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"Source not found: {request.source_id}",
+        )
+    try:
+        import polars as pl
+        # Load data
+        data_path = source.path
+        if data_path.endswith(".csv"):
+            lf = pl.scan_csv(data_path)
+        elif data_path.endswith(".parquet"):
+            lf = pl.scan_parquet(data_path)
+        else:
+            lf = pl.read_csv(data_path).lazy()
+        # Validate columns exist
+        schema = lf.collect_schema()
+        for col in request.columns:
+            if col not in schema:
+                raise HTTPException(
+                    status_code=status.HTTP_400_BAD_REQUEST,
+                    detail=f"Column not found: {col}",
+                )
+        # Run sketch estimation
+        estimator = get_sketch_estimator()
+        response = await estimator.estimate(request, lf)
+        return response
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Sketch estimation failed: {e}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Sketch estimation failed: {str(e)}",
+        )
+@router.get(
+    "/jobs",
+    response_model=SamplingJobListResponse,
+    summary="List sampling jobs",
+    description="List all active and recent sampling jobs.",
+)
+async def list_sampling_jobs(
+    status_filter: str | None = Query(
+        None,
+        description="Filter by status: pending, running, completed, failed",
+    ),
+    limit: int = Query(50, ge=1, le=100, description="Maximum jobs to return"),
+) -> SamplingJobListResponse:
+    """List all sampling jobs."""
+    sampler = get_enterprise_sampler()
+    all_jobs = sampler.list_jobs()
+    # Filter by status if specified
+    if status_filter:
+        all_jobs = [j for j in all_jobs if j.status == status_filter]
+    # Apply limit
+    jobs = all_jobs[:limit]
+    return SamplingJobListResponse(
+        jobs=jobs,
+        total=len(all_jobs),
+        active_count=sum(1 for j in all_jobs if j.status in ("pending", "running")),
+    )
+@router.get(
+    "/jobs/{job_id}",
+    response_model=SamplingJobStatus,
+    summary="Get job status",
+    description="Get the status of a specific sampling job.",
+)
+async def get_job_status(job_id: str) -> SamplingJobStatus:
+    """Get status of a specific sampling job."""
+    sampler = get_enterprise_sampler()
+    job = sampler.get_job_status(job_id)
+    if not job:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"Job not found: {job_id}",
+        )
+    return job
+@router.post(
+    "/jobs/{job_id}/cancel",
+    summary="Cancel sampling job",
+    description="Cancel an active sampling job.",
+)
+async def cancel_sampling_job(job_id: str) -> dict[str, Any]:
+    """Cancel an active sampling job."""
+    sampler = get_enterprise_sampler()
+    job = sampler.get_job_status(job_id)
+    if not job:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"Job not found: {job_id}",
+        )
+    if job.status not in ("pending", "running"):
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"Job cannot be cancelled: status is {job.status}",
+        )
+    # In production, this would actually cancel the job
+    # For now, just mark it as cancelled
+    job.status = "cancelled"
+    return {"job_id": job_id, "status": "cancelled", "message": "Job cancellation requested"}
+@router.get(
+    "/strategies",
+    summary="List available strategies",
+    description="List all available enterprise sampling strategies with descriptions.",
+)
+async def list_strategies() -> list[dict[str, Any]]:
+    """List available sampling strategies."""
+    strategies = []
+    for strategy_type, info in STRATEGY_DOCS.items():
+        strategies.append({
+            "name": info.name,
+            "value": info.value,
+            "description": info.description,
+            "best_for": info.best_for,
+            "supports_parallel": info.supports_parallel,
+            "supports_streaming": info.supports_streaming,
+        })
+    return strategies
+@router.get(
+    "/quality-presets",
+    summary="List quality presets",
+    description="List available sampling quality presets with their configurations.",
+)
+async def list_quality_presets() -> list[dict[str, Any]]:
+    """List available quality presets."""
+    presets = []
+    preset_descriptions = {
+        SamplingQuality.SKETCH: "Fast approximation using probabilistic structures",
+        SamplingQuality.QUICK: "Quick estimates with 90% confidence",
+        SamplingQuality.STANDARD: "Balanced sampling with 95% confidence (recommended)",
+        SamplingQuality.HIGH: "High accuracy with 99% confidence",
+        SamplingQuality.EXACT: "Full scan without sampling",
+    }
+    for quality, config in QUALITY_PRESETS.items():
+        presets.append({
+            "name": quality.value,
+            "description": preset_descriptions.get(quality, ""),
+            "target_rows": config["target_rows"],
+            "confidence_level": config["confidence_level"],
+            "margin_of_error": config["margin_of_error"],
+        })
+    return presets
+@router.get(
+    "/scale-categories",
+    summary="List scale categories",
+    description="List dataset scale categories with recommended strategies.",
+)
+async def list_scale_categories() -> list[dict[str, Any]]:
+    """List scale categories with recommended strategies."""
+    categories = [
+        {
+            "name": ScaleCategory.SMALL.value,
+            "row_count_range": "< 1M",
+            "recommended_strategy": SCALE_STRATEGY_MAP[ScaleCategory.SMALL].value,
+            "description": "Small datasets that don't require sampling",
+        },
+        {
+            "name": ScaleCategory.MEDIUM.value,
+            "row_count_range": "1M - 10M",
+            "recommended_strategy": SCALE_STRATEGY_MAP[ScaleCategory.MEDIUM].value,
+            "description": "Medium datasets suitable for column-aware sampling",
+        },
+        {
+            "name": ScaleCategory.LARGE.value,
+            "row_count_range": "10M - 100M",
+            "recommended_strategy": SCALE_STRATEGY_MAP[ScaleCategory.LARGE].value,
+            "description": "Large datasets requiring block-based parallel sampling",
+        },
+        {
+            "name": ScaleCategory.XLARGE.value,
+            "row_count_range": "100M - 1B",
+            "recommended_strategy": SCALE_STRATEGY_MAP[ScaleCategory.XLARGE].value,
+            "description": "Extra-large datasets requiring multi-stage sampling",
+        },
+        {
+            "name": ScaleCategory.XXLARGE.value,
+            "row_count_range": "> 1B",
+            "recommended_strategy": SCALE_STRATEGY_MAP[ScaleCategory.XXLARGE].value,
+            "description": "Massive datasets requiring sketches and multi-stage sampling",
+        },
+    ]
+    return categories
+@router.post(
+    "/classify-scale",
+    summary="Classify dataset scale",
+    description="Classify a dataset by row count into a scale category.",
+)
+async def classify_scale(row_count: int = Query(..., ge=0)) -> dict[str, Any]:
+    """Classify dataset scale by row count."""
+    scale = classify_dataset_scale(row_count)
+    strategy = SCALE_STRATEGY_MAP.get(scale, EnterpriseSamplingStrategy.COLUMN_AWARE)
+    return {
+        "row_count": row_count,
+        "scale_category": scale.value,
+        "recommended_strategy": strategy.value,
+    }

truthound_dashboard/api/history.py CHANGED Viewed

@@ -1,13 +1,18 @@
 """Validation history API endpoints.
 Provides endpoints for validation history and trend analysis.
+API Design: Direct Response Style
+- Returns data directly without success wrapper
+- Errors handled via HTTPException
 """
 from __future__ import annotations
-from typing import Annotated, Literal
+from typing import Annotated, Any, Literal
 from fastapi import APIRouter, Depends, HTTPException, Query
+from pydantic import BaseModel, Field
 from truthound_dashboard.core import HistoryService
@@ -24,9 +29,56 @@ async def get_history_service(session: SessionDep) -> HistoryService:
 HistoryServiceDep = Annotated[HistoryService, Depends(get_history_service)]
+class HistorySummary(BaseModel):
+    """Validation history summary."""
+    total_runs: int
+    passed_runs: int
+    failed_runs: int
+    success_rate: float
+class TrendDataPoint(BaseModel):
+    """Single data point in trend."""
+    date: str
+    success_rate: float
+    run_count: int
+    passed_count: int
+    failed_count: int
+class FailureFrequency(BaseModel):
+    """Failure frequency item."""
+    issue: str
+    count: int
+class RecentValidation(BaseModel):
+    """Recent validation item."""
+    id: str
+    status: str
+    passed: bool
+    has_critical: bool
+    has_high: bool
+    total_issues: int
+    created_at: str
+class HistoryResponse(BaseModel):
+    """Validation history response."""
+    summary: HistorySummary
+    trend: list[TrendDataPoint]
+    failure_frequency: list[FailureFrequency]
+    recent_validations: list[RecentValidation]
 @router.get(
     "/sources/{source_id}/history",
-    response_model=dict,
+    response_model=HistoryResponse,
     summary="Get validation history",
     description="Get validation history with trend analysis for a source.",
 )
@@ -37,7 +89,7 @@ async def get_validation_history(
     granularity: Literal["hourly", "daily", "weekly"] = Query(
         "daily", description="Aggregation granularity"
     ),
-) -> dict:
+) -> HistoryResponse:
     """Get validation history with trend data.
     Args:
@@ -47,7 +99,7 @@ async def get_validation_history(
         granularity: Aggregation granularity (hourly, daily, weekly).
     Returns:
-        Dictionary with summary, trend, failure_frequency, and recent_validations.
+        History data with summary, trend, failure_frequency, and recent_validations.
     """
     try:
         data = await service.get_history(
@@ -55,7 +107,7 @@ async def get_validation_history(
             period=period,
             granularity=granularity,
         )
-        return {"success": True, "data": data}
+        return HistoryResponse(**data)
     except ValueError as e:
         raise HTTPException(status_code=404, detail=str(e))
     except Exception as e:

truthound_dashboard/api/lineage.py CHANGED Viewed

@@ -12,8 +12,6 @@ from fastapi import APIRouter, HTTPException, Path, Query, Body
 from truthound_dashboard.schemas.lineage import (
     AnomalyImpactResponse,
     AnomalyStatus,
-    AutoDiscoverRequest,
-    AutoDiscoverResponse,
     ImpactAnalysisRequest,
     ImpactAnalysisResponse,
     ImpactDirection,
@@ -368,7 +366,7 @@ async def create_edge(
         HTTPException: 400 if nodes not found or edge already exists.
     """
     try:
-        created = await service.create_edge(
+        created, source_node, target_node = await service.create_edge(
             source_node_id=edge.source_node_id,
             target_node_id=edge.target_node_id,
             edge_type=edge.edge_type,
@@ -378,8 +376,8 @@ async def create_edge(
             id=created.id,
             source_node_id=created.source_node_id,
             target_node_id=created.target_node_id,
-            source_node_name=created.source_node.name if created.source_node else None,
-            target_node_name=created.target_node.name if created.target_node else None,
+            source_node_name=source_node.name if source_node else None,
+            target_node_name=target_node.name if target_node else None,
             edge_type=created.edge_type,
             metadata=created.metadata_json,
             created_at=created.created_at.isoformat() if created.created_at else "",
@@ -609,49 +607,6 @@ async def get_anomaly_impact(
         raise HTTPException(status_code=404, detail=str(e))
-# =============================================================================
-# Auto-Discovery Endpoints
-# =============================================================================
-@router.post(
-    "/auto-discover",
-    response_model=AutoDiscoverResponse,
-    summary="Auto-discover lineage",
-    description="Auto-discover lineage from a data source",
-)
-async def auto_discover(
-    service: LineageServiceDep,
-    request: AutoDiscoverRequest,
-) -> AutoDiscoverResponse:
-    """Auto-discover lineage from a source.
-    Args:
-        service: Injected lineage service.
-        request: Auto-discovery request.
-    Returns:
-        Discovery results.
-    Raises:
-        HTTPException: 404 if source not found.
-    """
-    try:
-        result = await service.auto_discover(
-            source_id=request.source_id,
-            include_fk_relations=request.include_fk_relations,
-            max_depth=request.max_depth,
-        )
-        return AutoDiscoverResponse(
-            source_id=result["source_id"],
-            discovered_nodes=result["discovered_nodes"],
-            discovered_edges=result["discovered_edges"],
-            graph=LineageGraphResponse(**result["graph"]),
-        )
-    except ValueError as e:
-        raise HTTPException(status_code=404, detail=str(e))
 # =============================================================================
 # Position Update Endpoints
 # =============================================================================

truthound-dashboard 1.4.4__py3-none-any.whl → 1.5.0__py3-none-any.whl

truthound-dashboard 1.4.4py3-none-any.whl → 1.5.0py3-none-any.whl