PyPI - truthound-dashboard - Versions diffs - 1.4.3__py3-none-any.whl → 1.5.0__py3-none-any.whl - Mend

truthound-dashboard 1.4.3py3-none-any.whl → 1.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (205) hide show

truthound_dashboard/api/alerts.py +75 -86
truthound_dashboard/api/anomaly.py +7 -13
truthound_dashboard/api/cross_alerts.py +38 -52
truthound_dashboard/api/drift.py +49 -59
truthound_dashboard/api/drift_monitor.py +234 -79
truthound_dashboard/api/enterprise_sampling.py +498 -0
truthound_dashboard/api/history.py +57 -5
truthound_dashboard/api/lineage.py +3 -48
truthound_dashboard/api/maintenance.py +104 -49
truthound_dashboard/api/mask.py +1 -2
truthound_dashboard/api/middleware.py +2 -1
truthound_dashboard/api/model_monitoring.py +435 -311
truthound_dashboard/api/notifications.py +227 -191
truthound_dashboard/api/notifications_advanced.py +21 -20
truthound_dashboard/api/observability.py +586 -0
truthound_dashboard/api/plugins.py +2 -433
truthound_dashboard/api/profile.py +199 -37
truthound_dashboard/api/quality_reporter.py +701 -0
truthound_dashboard/api/reports.py +7 -16
truthound_dashboard/api/router.py +66 -0
truthound_dashboard/api/rule_suggestions.py +5 -5
truthound_dashboard/api/scan.py +17 -19
truthound_dashboard/api/schedules.py +85 -50
truthound_dashboard/api/schema_evolution.py +6 -6
truthound_dashboard/api/schema_watcher.py +667 -0
truthound_dashboard/api/sources.py +98 -27
truthound_dashboard/api/tiering.py +1323 -0
truthound_dashboard/api/triggers.py +14 -11
truthound_dashboard/api/validations.py +12 -11
truthound_dashboard/api/versioning.py +1 -6
truthound_dashboard/core/__init__.py +129 -3
truthound_dashboard/core/actions/__init__.py +62 -0
truthound_dashboard/core/actions/custom.py +426 -0
truthound_dashboard/core/actions/notifications.py +910 -0
truthound_dashboard/core/actions/storage.py +472 -0
truthound_dashboard/core/actions/webhook.py +281 -0
truthound_dashboard/core/anomaly.py +262 -67
truthound_dashboard/core/anomaly_explainer.py +4 -3
truthound_dashboard/core/backends/__init__.py +67 -0
truthound_dashboard/core/backends/base.py +299 -0
truthound_dashboard/core/backends/errors.py +191 -0
truthound_dashboard/core/backends/factory.py +423 -0
truthound_dashboard/core/backends/mock_backend.py +451 -0
truthound_dashboard/core/backends/truthound_backend.py +718 -0
truthound_dashboard/core/checkpoint/__init__.py +87 -0
truthound_dashboard/core/checkpoint/adapters.py +814 -0
truthound_dashboard/core/checkpoint/checkpoint.py +491 -0
truthound_dashboard/core/checkpoint/runner.py +270 -0
truthound_dashboard/core/connections.py +437 -10
truthound_dashboard/core/converters/__init__.py +14 -0
truthound_dashboard/core/converters/truthound.py +620 -0
truthound_dashboard/core/cross_alerts.py +540 -320
truthound_dashboard/core/datasource_factory.py +1672 -0
truthound_dashboard/core/drift_monitor.py +216 -20
truthound_dashboard/core/enterprise_sampling.py +1291 -0
truthound_dashboard/core/interfaces/__init__.py +225 -0
truthound_dashboard/core/interfaces/actions.py +652 -0
truthound_dashboard/core/interfaces/base.py +247 -0
truthound_dashboard/core/interfaces/checkpoint.py +676 -0
truthound_dashboard/core/interfaces/protocols.py +664 -0
truthound_dashboard/core/interfaces/reporters.py +650 -0
truthound_dashboard/core/interfaces/routing.py +646 -0
truthound_dashboard/core/interfaces/triggers.py +619 -0
truthound_dashboard/core/lineage.py +407 -71
truthound_dashboard/core/model_monitoring.py +431 -3
truthound_dashboard/core/notifications/base.py +4 -0
truthound_dashboard/core/notifications/channels.py +501 -1203
truthound_dashboard/core/notifications/deduplication/__init__.py +81 -115
truthound_dashboard/core/notifications/deduplication/service.py +131 -348
truthound_dashboard/core/notifications/dispatcher.py +202 -11
truthound_dashboard/core/notifications/escalation/__init__.py +119 -106
truthound_dashboard/core/notifications/escalation/engine.py +168 -358
truthound_dashboard/core/notifications/routing/__init__.py +88 -128
truthound_dashboard/core/notifications/routing/engine.py +90 -317
truthound_dashboard/core/notifications/stats_aggregator.py +246 -1
truthound_dashboard/core/notifications/throttling/__init__.py +67 -50
truthound_dashboard/core/notifications/throttling/builder.py +117 -255
truthound_dashboard/core/notifications/truthound_adapter.py +842 -0
truthound_dashboard/core/phase5/collaboration.py +1 -1
truthound_dashboard/core/plugins/lifecycle/__init__.py +0 -13
truthound_dashboard/core/quality_reporter.py +1359 -0
truthound_dashboard/core/report_history.py +0 -6
truthound_dashboard/core/reporters/__init__.py +175 -14
truthound_dashboard/core/reporters/adapters.py +943 -0
truthound_dashboard/core/reporters/base.py +0 -3
truthound_dashboard/core/reporters/builtin/__init__.py +18 -0
truthound_dashboard/core/reporters/builtin/csv_reporter.py +111 -0
truthound_dashboard/core/reporters/builtin/html_reporter.py +270 -0
truthound_dashboard/core/reporters/builtin/json_reporter.py +127 -0
truthound_dashboard/core/reporters/compat.py +266 -0
truthound_dashboard/core/reporters/csv_reporter.py +2 -35
truthound_dashboard/core/reporters/factory.py +526 -0
truthound_dashboard/core/reporters/interfaces.py +745 -0
truthound_dashboard/core/reporters/registry.py +1 -10
truthound_dashboard/core/scheduler.py +165 -0
truthound_dashboard/core/schema_evolution.py +3 -3
truthound_dashboard/core/schema_watcher.py +1528 -0
truthound_dashboard/core/services.py +595 -76
truthound_dashboard/core/store_manager.py +810 -0
truthound_dashboard/core/streaming_anomaly.py +169 -4
truthound_dashboard/core/tiering.py +1309 -0
truthound_dashboard/core/triggers/evaluators.py +178 -8
truthound_dashboard/core/truthound_adapter.py +2620 -197
truthound_dashboard/core/unified_alerts.py +23 -20
truthound_dashboard/db/__init__.py +8 -0
truthound_dashboard/db/database.py +8 -2
truthound_dashboard/db/models.py +944 -25
truthound_dashboard/db/repository.py +2 -0
truthound_dashboard/main.py +11 -0
truthound_dashboard/schemas/__init__.py +177 -16
truthound_dashboard/schemas/base.py +44 -23
truthound_dashboard/schemas/collaboration.py +19 -6
truthound_dashboard/schemas/cross_alerts.py +19 -3
truthound_dashboard/schemas/drift.py +61 -55
truthound_dashboard/schemas/drift_monitor.py +67 -23
truthound_dashboard/schemas/enterprise_sampling.py +653 -0
truthound_dashboard/schemas/lineage.py +0 -33
truthound_dashboard/schemas/mask.py +10 -8
truthound_dashboard/schemas/model_monitoring.py +89 -10
truthound_dashboard/schemas/notifications_advanced.py +13 -0
truthound_dashboard/schemas/observability.py +453 -0
truthound_dashboard/schemas/plugins.py +0 -280
truthound_dashboard/schemas/profile.py +154 -247
truthound_dashboard/schemas/quality_reporter.py +403 -0
truthound_dashboard/schemas/reports.py +2 -2
truthound_dashboard/schemas/rule_suggestion.py +8 -1
truthound_dashboard/schemas/scan.py +4 -24
truthound_dashboard/schemas/schedule.py +11 -3
truthound_dashboard/schemas/schema_watcher.py +727 -0
truthound_dashboard/schemas/source.py +17 -2
truthound_dashboard/schemas/tiering.py +822 -0
truthound_dashboard/schemas/triggers.py +16 -0
truthound_dashboard/schemas/unified_alerts.py +7 -0
truthound_dashboard/schemas/validation.py +0 -13
truthound_dashboard/schemas/validators/base.py +41 -21
truthound_dashboard/schemas/validators/business_rule_validators.py +244 -0
truthound_dashboard/schemas/validators/localization_validators.py +273 -0
truthound_dashboard/schemas/validators/ml_feature_validators.py +308 -0
truthound_dashboard/schemas/validators/profiling_validators.py +275 -0
truthound_dashboard/schemas/validators/referential_validators.py +312 -0
truthound_dashboard/schemas/validators/registry.py +93 -8
truthound_dashboard/schemas/validators/timeseries_validators.py +389 -0
truthound_dashboard/schemas/versioning.py +1 -6
truthound_dashboard/static/index.html +2 -2
truthound_dashboard-1.5.0.dist-info/METADATA +309 -0
{truthound_dashboard-1.4.3.dist-info → truthound_dashboard-1.5.0.dist-info}/RECORD +149 -148
truthound_dashboard/core/plugins/hooks/__init__.py +0 -63
truthound_dashboard/core/plugins/hooks/decorators.py +0 -367
truthound_dashboard/core/plugins/hooks/manager.py +0 -403
truthound_dashboard/core/plugins/hooks/protocols.py +0 -265
truthound_dashboard/core/plugins/lifecycle/hot_reload.py +0 -584
truthound_dashboard/core/reporters/junit_reporter.py +0 -233
truthound_dashboard/core/reporters/markdown_reporter.py +0 -207
truthound_dashboard/core/reporters/pdf_reporter.py +0 -209
truthound_dashboard/static/assets/_baseUniq-BcrSP13d.js +0 -1
truthound_dashboard/static/assets/arc-DlYjKwIL.js +0 -1
truthound_dashboard/static/assets/architectureDiagram-VXUJARFQ-Bb2drbQM.js +0 -36
truthound_dashboard/static/assets/blockDiagram-VD42YOAC-BlsPG1CH.js +0 -122
truthound_dashboard/static/assets/c4Diagram-YG6GDRKO-B9JdUoaC.js +0 -10
truthound_dashboard/static/assets/channel-Q6mHF1Hd.js +0 -1
truthound_dashboard/static/assets/chunk-4BX2VUAB-DmyoPVuJ.js +0 -1
truthound_dashboard/static/assets/chunk-55IACEB6-Bcz6Siv8.js +0 -1
truthound_dashboard/static/assets/chunk-B4BG7PRW-Br3G5Rum.js +0 -165
truthound_dashboard/static/assets/chunk-DI55MBZ5-DuM9c23u.js +0 -220
truthound_dashboard/static/assets/chunk-FMBD7UC4-DNU-5mvT.js +0 -15
truthound_dashboard/static/assets/chunk-QN33PNHL-Im2yNcmS.js +0 -1
truthound_dashboard/static/assets/chunk-QZHKN3VN-kZr8XFm1.js +0 -1
truthound_dashboard/static/assets/chunk-TZMSLE5B-Q__360q_.js +0 -1
truthound_dashboard/static/assets/classDiagram-2ON5EDUG-vtixxUyK.js +0 -1
truthound_dashboard/static/assets/classDiagram-v2-WZHVMYZB-vtixxUyK.js +0 -1
truthound_dashboard/static/assets/clone-BOt2LwD0.js +0 -1
truthound_dashboard/static/assets/cose-bilkent-S5V4N54A-CBDw6iac.js +0 -1
truthound_dashboard/static/assets/dagre-6UL2VRFP-XdKqmmY9.js +0 -4
truthound_dashboard/static/assets/diagram-PSM6KHXK-DAZ8nx9V.js +0 -24
truthound_dashboard/static/assets/diagram-QEK2KX5R-BRvDTbGD.js +0 -43
truthound_dashboard/static/assets/diagram-S2PKOQOG-bQcczUkl.js +0 -24
truthound_dashboard/static/assets/erDiagram-Q2GNP2WA-DPje7VMN.js +0 -60
truthound_dashboard/static/assets/flowDiagram-NV44I4VS-B7BVtFVS.js +0 -162
truthound_dashboard/static/assets/ganttDiagram-JELNMOA3-D6WKSS7U.js +0 -267
truthound_dashboard/static/assets/gitGraphDiagram-NY62KEGX-D3vtVd3y.js +0 -65
truthound_dashboard/static/assets/graph-BKgNKZVp.js +0 -1
truthound_dashboard/static/assets/index-C6JSrkHo.css +0 -1
truthound_dashboard/static/assets/index-DkU82VsU.js +0 -1800
truthound_dashboard/static/assets/infoDiagram-WHAUD3N6-DnNCT429.js +0 -2
truthound_dashboard/static/assets/journeyDiagram-XKPGCS4Q-DGiMozqS.js +0 -139
truthound_dashboard/static/assets/kanban-definition-3W4ZIXB7-BV2gUgli.js +0 -89
truthound_dashboard/static/assets/katex-Cu_Erd72.js +0 -261
truthound_dashboard/static/assets/layout-DI2MfQ5G.js +0 -1
truthound_dashboard/static/assets/min-DYdgXVcT.js +0 -1
truthound_dashboard/static/assets/mindmap-definition-VGOIOE7T-C7x4ruxz.js +0 -68
truthound_dashboard/static/assets/pieDiagram-ADFJNKIX-CAJaAB9f.js +0 -30
truthound_dashboard/static/assets/quadrantDiagram-AYHSOK5B-DeqwDI46.js +0 -7
truthound_dashboard/static/assets/requirementDiagram-UZGBJVZJ-e3XDpZIM.js +0 -64
truthound_dashboard/static/assets/sankeyDiagram-TZEHDZUN-CNnAv5Ux.js +0 -10
truthound_dashboard/static/assets/sequenceDiagram-WL72ISMW-Dsne-Of3.js +0 -145
truthound_dashboard/static/assets/stateDiagram-FKZM4ZOC-Ee0sQXyb.js +0 -1
truthound_dashboard/static/assets/stateDiagram-v2-4FDKWEC3-B26KqW_W.js +0 -1
truthound_dashboard/static/assets/timeline-definition-IT6M3QCI-DZYi2yl3.js +0 -61
truthound_dashboard/static/assets/treemap-KMMF4GRG-CY3f8In2.js +0 -128
truthound_dashboard/static/assets/unmerged_dictionaries-Dd7xcPWG.js +0 -1
truthound_dashboard/static/assets/xychartDiagram-PRI3JC2R-CS7fydZZ.js +0 -7
truthound_dashboard-1.4.3.dist-info/METADATA +0 -505
{truthound_dashboard-1.4.3.dist-info → truthound_dashboard-1.5.0.dist-info}/WHEEL +0 -0
{truthound_dashboard-1.4.3.dist-info → truthound_dashboard-1.5.0.dist-info}/entry_points.txt +0 -0
{truthound_dashboard-1.4.3.dist-info → truthound_dashboard-1.5.0.dist-info}/licenses/LICENSE +0 -0

truthound_dashboard/schemas/enterprise_sampling.py ADDED Viewed

@@ -0,0 +1,653 @@
+"""Enterprise Sampling Schemas.
+This module provides Pydantic models for truthound 1.2.10's enterprise-scale
+sampling capabilities, supporting 100M+ row datasets with:
+- Block Sampling
+- Multi-Stage Sampling
+- Column-Aware Sampling
+- Progressive Sampling
+- Probabilistic Data Structures (HyperLogLog, Count-Min Sketch, Bloom Filter)
+Architecture follows the Strategy pattern for extensibility.
+"""
+from __future__ import annotations
+from datetime import datetime
+from enum import Enum
+from typing import Any
+from pydantic import BaseModel, ConfigDict, Field
+# ============================================================================
+# Enums
+# ============================================================================
+class ScaleCategory(str, Enum):
+    """Dataset scale categories for automatic strategy selection."""
+    SMALL = "small"  # < 1M rows - no sampling needed
+    MEDIUM = "medium"  # 1M - 10M rows - column-aware sampling
+    LARGE = "large"  # 10M - 100M rows - block sampling
+    XLARGE = "xlarge"  # 100M - 1B rows - multi-stage sampling
+    XXLARGE = "xxlarge"  # > 1B rows - sketches + multi-stage
+class EnterpriseSamplingStrategy(str, Enum):
+    """Enterprise-scale sampling strategies from truthound 1.2.10."""
+    # Basic strategies (already supported)
+    NONE = "none"
+    RANDOM = "random"
+    HEAD = "head"
+    TAIL = "tail"
+    STRATIFIED = "stratified"
+    RESERVOIR = "reservoir"
+    SYSTEMATIC = "systematic"
+    ADAPTIVE = "adaptive"
+    HASH = "hash"
+    # Enterprise strategies (new in 1.2.10)
+    BLOCK = "block"  # Block-based parallel sampling
+    MULTI_STAGE = "multi_stage"  # Hierarchical multi-stage sampling
+    COLUMN_AWARE = "column_aware"  # Type-weighted adaptive sampling
+    PROGRESSIVE = "progressive"  # Convergence-based iterative sampling
+    PARALLEL_BLOCK = "parallel_block"  # Multi-threaded block sampling
+class SamplingQuality(str, Enum):
+    """Sampling quality presets."""
+    SKETCH = "sketch"  # Fast approximation, 10K samples
+    QUICK = "quick"  # 90% confidence, 50K samples
+    STANDARD = "standard"  # 95% confidence, 100K samples (default)
+    HIGH = "high"  # 99% confidence, 500K samples
+    EXACT = "exact"  # Full scan, 100% accuracy
+class SketchType(str, Enum):
+    """Probabilistic data structure types."""
+    HYPERLOGLOG = "hyperloglog"  # Cardinality estimation
+    COUNTMIN = "countmin"  # Frequency estimation
+    BLOOM = "bloom"  # Membership testing
+class SchedulingPolicy(str, Enum):
+    """Parallel execution scheduling policies."""
+    ROUND_ROBIN = "round_robin"
+    WORK_STEALING = "work_stealing"
+    ADAPTIVE = "adaptive"
+# ============================================================================
+# Configuration Models
+# ============================================================================
+class MemoryBudgetConfig(BaseModel):
+    """Memory budget configuration for enterprise sampling."""
+    model_config = ConfigDict(extra="forbid")
+    max_memory_mb: int = Field(
+        default=1024,
+        ge=128,
+        le=65536,
+        description="Maximum memory in MB",
+    )
+    reserved_memory_mb: int = Field(
+        default=256,
+        ge=64,
+        le=8192,
+        description="Reserved memory for system operations",
+    )
+    gc_threshold_mb: int | None = Field(
+        default=None,
+        description="GC trigger threshold (default: 75% of max)",
+    )
+    backpressure_enabled: bool = Field(
+        default=True,
+        description="Enable memory backpressure",
+    )
+class ParallelSamplingConfig(BaseModel):
+    """Parallel block sampling configuration."""
+    model_config = ConfigDict(extra="forbid")
+    max_workers: int = Field(
+        default=4,
+        ge=1,
+        le=32,
+        description="Maximum parallel workers (0 = auto)",
+    )
+    enable_work_stealing: bool = Field(
+        default=True,
+        description="Enable work stealing for load balancing",
+    )
+    scheduling_policy: SchedulingPolicy = Field(
+        default=SchedulingPolicy.ADAPTIVE,
+        description="Task scheduling policy",
+    )
+    backpressure_threshold: float = Field(
+        default=0.75,
+        ge=0.5,
+        le=0.95,
+        description="Memory threshold for backpressure (0.0-1.0)",
+    )
+    chunk_timeout_seconds: float = Field(
+        default=30.0,
+        ge=1.0,
+        le=3600.0,
+        description="Timeout per block in seconds",
+    )
+class BlockSamplingConfig(BaseModel):
+    """Block sampling specific configuration."""
+    model_config = ConfigDict(extra="forbid")
+    block_size: int = Field(
+        default=0,
+        ge=0,
+        description="Rows per block (0 = auto-detect)",
+    )
+    sample_per_block: int | None = Field(
+        default=None,
+        description="Samples per block (None = proportional)",
+    )
+    parallel: ParallelSamplingConfig = Field(
+        default_factory=ParallelSamplingConfig,
+        description="Parallel processing configuration",
+    )
+class MultiStageSamplingConfig(BaseModel):
+    """Multi-stage hierarchical sampling configuration."""
+    model_config = ConfigDict(extra="forbid")
+    num_stages: int = Field(
+        default=3,
+        ge=2,
+        le=5,
+        description="Number of sampling stages",
+    )
+    stage_reduction_factor: float | None = Field(
+        default=None,
+        description="Reduction factor per stage (None = auto)",
+    )
+    early_stop_enabled: bool = Field(
+        default=True,
+        description="Enable early stopping on convergence",
+    )
+class ColumnAwareSamplingConfig(BaseModel):
+    """Column-aware adaptive sampling configuration."""
+    model_config = ConfigDict(extra="forbid")
+    string_multiplier: float = Field(
+        default=2.0,
+        ge=1.0,
+        le=5.0,
+        description="Sample multiplier for string columns",
+    )
+    categorical_multiplier: float = Field(
+        default=0.5,
+        ge=0.1,
+        le=2.0,
+        description="Sample multiplier for categorical columns",
+    )
+    complex_multiplier: float = Field(
+        default=3.0,
+        ge=1.0,
+        le=10.0,
+        description="Sample multiplier for complex types (List/Struct)",
+    )
+    numeric_multiplier: float = Field(
+        default=1.0,
+        ge=0.5,
+        le=2.0,
+        description="Baseline multiplier for numeric columns",
+    )
+class ProgressiveSamplingConfig(BaseModel):
+    """Progressive sampling with convergence detection."""
+    model_config = ConfigDict(extra="forbid")
+    convergence_threshold: float = Field(
+        default=0.01,
+        ge=0.001,
+        le=0.1,
+        description="Convergence threshold (stop when estimates stabilize)",
+    )
+    max_stages: int = Field(
+        default=5,
+        ge=2,
+        le=10,
+        description="Maximum number of progressive stages",
+    )
+    initial_sample_ratio: float = Field(
+        default=0.01,
+        ge=0.001,
+        le=0.1,
+        description="Initial sample ratio (0.01 = 1%)",
+    )
+    growth_factor: float = Field(
+        default=2.0,
+        ge=1.5,
+        le=4.0,
+        description="Sample size growth factor per stage",
+    )
+class SketchConfig(BaseModel):
+    """Probabilistic data structure configuration."""
+    model_config = ConfigDict(extra="forbid")
+    sketch_type: SketchType = Field(
+        default=SketchType.HYPERLOGLOG,
+        description="Type of sketch to use",
+    )
+    # HyperLogLog parameters
+    hll_precision: int = Field(
+        default=14,
+        ge=10,
+        le=18,
+        description="HyperLogLog precision (10-18, higher = more accurate)",
+    )
+    # Count-Min Sketch parameters
+    cms_width: int = Field(
+        default=2000,
+        ge=100,
+        le=100000,
+        description="Count-Min Sketch width",
+    )
+    cms_depth: int = Field(
+        default=5,
+        ge=3,
+        le=10,
+        description="Count-Min Sketch depth",
+    )
+    cms_epsilon: float | None = Field(
+        default=None,
+        description="Error bound (alternative to width)",
+    )
+    cms_delta: float | None = Field(
+        default=None,
+        description="Confidence level (alternative to depth)",
+    )
+    # Bloom Filter parameters
+    bloom_capacity: int = Field(
+        default=10_000_000,
+        ge=1000,
+        description="Expected number of items",
+    )
+    bloom_error_rate: float = Field(
+        default=0.01,
+        ge=0.0001,
+        le=0.1,
+        description="Desired false positive rate",
+    )
+# ============================================================================
+# Main Request/Response Models
+# ============================================================================
+class EnterpriseSamplingRequest(BaseModel):
+    """Request model for enterprise-scale sampling operations."""
+    model_config = ConfigDict(extra="forbid")
+    # Basic parameters
+    source_id: str = Field(..., description="Source ID to sample from")
+    target_rows: int = Field(
+        default=100_000,
+        ge=1000,
+        le=10_000_000,
+        description="Target number of rows to sample",
+    )
+    quality: SamplingQuality = Field(
+        default=SamplingQuality.STANDARD,
+        description="Sampling quality preset",
+    )
+    # Strategy selection
+    strategy: EnterpriseSamplingStrategy = Field(
+        default=EnterpriseSamplingStrategy.ADAPTIVE,
+        description="Sampling strategy (adaptive = auto-select)",
+    )
+    # Resource budgets
+    memory_budget: MemoryBudgetConfig = Field(
+        default_factory=MemoryBudgetConfig,
+        description="Memory budget configuration",
+    )
+    time_budget_seconds: float = Field(
+        default=0.0,
+        ge=0.0,
+        le=3600.0,
+        description="Time budget in seconds (0 = unlimited)",
+    )
+    # Statistical parameters
+    confidence_level: float = Field(
+        default=0.95,
+        ge=0.80,
+        le=0.99,
+        description="Statistical confidence level",
+    )
+    margin_of_error: float = Field(
+        default=0.05,
+        ge=0.01,
+        le=0.10,
+        description="Acceptable margin of error",
+    )
+    # Adaptive parameters
+    min_sample_ratio: float = Field(
+        default=0.001,
+        ge=0.0001,
+        le=0.1,
+        description="Minimum sample ratio",
+    )
+    max_sample_ratio: float = Field(
+        default=0.10,
+        ge=0.01,
+        le=1.0,
+        description="Maximum sample ratio",
+    )
+    # Reproducibility
+    seed: int | None = Field(
+        default=None,
+        description="Random seed for reproducibility",
+    )
+    # Strategy-specific configurations
+    block_config: BlockSamplingConfig | None = Field(
+        default=None,
+        description="Block sampling configuration",
+    )
+    multi_stage_config: MultiStageSamplingConfig | None = Field(
+        default=None,
+        description="Multi-stage sampling configuration",
+    )
+    column_aware_config: ColumnAwareSamplingConfig | None = Field(
+        default=None,
+        description="Column-aware sampling configuration",
+    )
+    progressive_config: ProgressiveSamplingConfig | None = Field(
+        default=None,
+        description="Progressive sampling configuration",
+    )
+    # Sketch parameters (for XXLARGE datasets)
+    sketch_config: SketchConfig | None = Field(
+        default=None,
+        description="Probabilistic sketch configuration",
+    )
+class SamplingMetrics(BaseModel):
+    """Metrics from sampling operation."""
+    model_config = ConfigDict(extra="forbid")
+    # Basic metrics
+    original_rows: int = Field(..., description="Original row count")
+    sampled_rows: int = Field(..., description="Sampled row count")
+    sampling_ratio: float = Field(..., description="Actual sampling ratio")
+    # Strategy info
+    strategy_used: EnterpriseSamplingStrategy = Field(..., description="Strategy used")
+    scale_category: ScaleCategory = Field(..., description="Dataset scale category")
+    is_sampled: bool = Field(..., description="Whether sampling was performed")
+    # Performance metrics
+    sampling_time_ms: float = Field(..., description="Total sampling time in ms")
+    throughput_rows_per_sec: float = Field(..., description="Processing throughput")
+    speedup_factor: float = Field(
+        default=1.0,
+        description="Speedup compared to full scan",
+    )
+    # Resource usage
+    peak_memory_mb: float = Field(default=0.0, description="Peak memory usage in MB")
+    workers_used: int = Field(default=1, description="Number of workers used")
+    worker_utilization: float = Field(
+        default=0.0,
+        description="Worker utilization (0.0-1.0)",
+    )
+    # Block metrics (for block-based strategies)
+    blocks_processed: int | None = Field(
+        default=None,
+        description="Number of blocks processed",
+    )
+    time_per_block_ms: float | None = Field(
+        default=None,
+        description="Average time per block",
+    )
+    # Progressive metrics
+    stages_completed: int | None = Field(
+        default=None,
+        description="Number of progressive stages",
+    )
+    converged_early: bool | None = Field(
+        default=None,
+        description="Whether converged before max stages",
+    )
+    # Backpressure metrics
+    backpressure_events: int = Field(
+        default=0,
+        description="Number of backpressure events",
+    )
+    # Statistical info
+    margin_of_error_actual: float | None = Field(
+        default=None,
+        description="Achieved margin of error",
+    )
+    confidence_achieved: float | None = Field(
+        default=None,
+        description="Achieved confidence level",
+    )
+class EnterpriseSamplingResponse(BaseModel):
+    """Response model for enterprise sampling operations."""
+    model_config = ConfigDict(extra="forbid")
+    # Request info
+    source_id: str = Field(..., description="Source ID")
+    job_id: str = Field(..., description="Sampling job ID")
+    # Status
+    status: str = Field(..., description="Job status: pending, running, completed, failed")
+    started_at: datetime = Field(..., description="Job start time")
+    completed_at: datetime | None = Field(None, description="Job completion time")
+    # Results
+    metrics: SamplingMetrics | None = Field(
+        None,
+        description="Sampling metrics (available when completed)",
+    )
+    sampled_data_path: str | None = Field(
+        None,
+        description="Path to sampled data file",
+    )
+    # Error info
+    error_message: str | None = Field(None, description="Error message if failed")
+class SampleSizeEstimateRequest(BaseModel):
+    """Request for sample size estimation."""
+    model_config = ConfigDict(extra="forbid")
+    population_size: int = Field(..., ge=1, description="Total population size")
+    confidence_level: float = Field(
+        default=0.95,
+        ge=0.80,
+        le=0.99,
+        description="Desired confidence level",
+    )
+    margin_of_error: float = Field(
+        default=0.05,
+        ge=0.01,
+        le=0.10,
+        description="Desired margin of error",
+    )
+    quality: SamplingQuality = Field(
+        default=SamplingQuality.STANDARD,
+        description="Quality preset",
+    )
+class SampleSizeEstimateResponse(BaseModel):
+    """Response with sample size recommendations."""
+    model_config = ConfigDict(extra="forbid")
+    population_size: int = Field(..., description="Input population size")
+    scale_category: ScaleCategory = Field(..., description="Dataset scale category")
+    # Recommended sizes
+    recommended_size: int = Field(..., description="Recommended sample size")
+    min_size: int = Field(..., description="Minimum acceptable sample size")
+    max_size: int = Field(..., description="Maximum useful sample size")
+    # Estimates
+    estimated_time_seconds: float = Field(..., description="Estimated processing time")
+    estimated_memory_mb: float = Field(..., description="Estimated memory usage")
+    speedup_factor: float = Field(..., description="Expected speedup factor")
+    # Strategy recommendation
+    recommended_strategy: EnterpriseSamplingStrategy = Field(
+        ...,
+        description="Recommended sampling strategy",
+    )
+    strategy_rationale: str = Field(..., description="Why this strategy is recommended")
+class SketchEstimateRequest(BaseModel):
+    """Request for sketch-based estimation."""
+    model_config = ConfigDict(extra="forbid")
+    source_id: str = Field(..., description="Source ID")
+    columns: list[str] = Field(..., min_length=1, description="Columns to analyze")
+    sketch_type: SketchType = Field(..., description="Sketch type")
+    sketch_config: SketchConfig | None = Field(
+        None,
+        description="Sketch configuration",
+    )
+class SketchEstimateResult(BaseModel):
+    """Result from sketch-based estimation."""
+    model_config = ConfigDict(extra="forbid")
+    column: str = Field(..., description="Column name")
+    sketch_type: SketchType = Field(..., description="Sketch type used")
+    # HyperLogLog results
+    cardinality_estimate: int | None = Field(
+        None,
+        description="Estimated distinct count",
+    )
+    cardinality_error: float | None = Field(
+        None,
+        description="Standard error of cardinality estimate",
+    )
+    # Count-Min Sketch results
+    heavy_hitters: list[dict[str, Any]] | None = Field(
+        None,
+        description="Frequent items with estimated counts",
+    )
+    # Bloom Filter results
+    membership_tests: dict[str, bool] | None = Field(
+        None,
+        description="Membership test results",
+    )
+    # Common metrics
+    memory_used_bytes: int = Field(..., description="Memory used by sketch")
+    processing_time_ms: float = Field(..., description="Processing time in ms")
+class SketchEstimateResponse(BaseModel):
+    """Response with sketch-based estimates."""
+    model_config = ConfigDict(extra="forbid")
+    source_id: str = Field(..., description="Source ID")
+    results: list[SketchEstimateResult] = Field(..., description="Results per column")
+    total_time_ms: float = Field(..., description="Total processing time")
+    total_memory_mb: float = Field(..., description="Total memory used")
+# ============================================================================
+# Job Management Models
+# ============================================================================
+class SamplingJobStatus(BaseModel):
+    """Sampling job status for monitoring."""
+    model_config = ConfigDict(extra="forbid")
+    job_id: str = Field(..., description="Job ID")
+    source_id: str = Field(..., description="Source ID")
+    status: str = Field(..., description="Job status")
+    progress: float = Field(
+        default=0.0,
+        ge=0.0,
+        le=1.0,
+        description="Progress (0.0-1.0)",
+    )
+    current_stage: str | None = Field(None, description="Current processing stage")
+    started_at: datetime = Field(..., description="Start time")
+    estimated_completion: datetime | None = Field(
+        None,
+        description="Estimated completion time",
+    )
+    # Progress details
+    rows_processed: int = Field(default=0, description="Rows processed so far")
+    blocks_completed: int | None = Field(None, description="Blocks completed")
+    blocks_total: int | None = Field(None, description="Total blocks")
+class SamplingJobListResponse(BaseModel):
+    """Response listing sampling jobs."""
+    model_config = ConfigDict(extra="forbid")
+    jobs: list[SamplingJobStatus] = Field(..., description="List of jobs")
+    total: int = Field(..., description="Total job count")
+    active_count: int = Field(..., description="Active job count")

truthound_dashboard/schemas/lineage.py CHANGED Viewed

@@ -246,39 +246,6 @@ class ImpactAnalysisResponse(BaseSchema):
     total_affected: int = Field(default=0, description="Total affected nodes")
-# =============================================================================
-# Auto-Discovery Schemas
-# =============================================================================
-class AutoDiscoverRequest(BaseSchema):
-    """Request to auto-discover lineage from a source."""
-    source_id: str = Field(..., description="Source ID to discover from")
-    include_fk_relations: bool = Field(
-        default=True,
-        description="Include foreign key relationships (for DB sources)",
-    )
-    max_depth: int = Field(
-        default=3,
-        ge=1,
-        le=10,
-        description="Maximum depth for discovery",
-    )
-class AutoDiscoverResponse(BaseSchema):
-    """Response from auto-discovery."""
-    source_id: str = Field(..., description="Source ID that was analyzed")
-    discovered_nodes: int = Field(default=0, description="Number of nodes discovered")
-    discovered_edges: int = Field(default=0, description="Number of edges discovered")
-    graph: LineageGraphResponse = Field(
-        ...,
-        description="Discovered lineage graph",
-    )
 # =============================================================================
 # Position Update Schemas
 # =============================================================================

truthound-dashboard 1.4.3__py3-none-any.whl → 1.5.0__py3-none-any.whl

truthound-dashboard 1.4.3py3-none-any.whl → 1.5.0py3-none-any.whl