PyPI - truthound-dashboard - Versions diffs - 1.3.1__py3-none-any.whl → 1.4.1__py3-none-any.whl - Mend

truthound-dashboard 1.3.1py3-none-any.whl → 1.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (169) hide show

truthound_dashboard/api/alerts.py +258 -0
truthound_dashboard/api/anomaly.py +1302 -0
truthound_dashboard/api/cross_alerts.py +352 -0
truthound_dashboard/api/deps.py +143 -0
truthound_dashboard/api/drift_monitor.py +540 -0
truthound_dashboard/api/lineage.py +1151 -0
truthound_dashboard/api/maintenance.py +363 -0
truthound_dashboard/api/middleware.py +373 -1
truthound_dashboard/api/model_monitoring.py +805 -0
truthound_dashboard/api/notifications_advanced.py +2452 -0
truthound_dashboard/api/plugins.py +2096 -0
truthound_dashboard/api/profile.py +211 -14
truthound_dashboard/api/reports.py +853 -0
truthound_dashboard/api/router.py +147 -0
truthound_dashboard/api/rule_suggestions.py +310 -0
truthound_dashboard/api/schema_evolution.py +231 -0
truthound_dashboard/api/sources.py +47 -3
truthound_dashboard/api/triggers.py +190 -0
truthound_dashboard/api/validations.py +13 -0
truthound_dashboard/api/validators.py +333 -4
truthound_dashboard/api/versioning.py +309 -0
truthound_dashboard/api/websocket.py +301 -0
truthound_dashboard/core/__init__.py +27 -0
truthound_dashboard/core/anomaly.py +1395 -0
truthound_dashboard/core/anomaly_explainer.py +633 -0
truthound_dashboard/core/cache.py +206 -0
truthound_dashboard/core/cached_services.py +422 -0
truthound_dashboard/core/charts.py +352 -0
truthound_dashboard/core/connections.py +1069 -42
truthound_dashboard/core/cross_alerts.py +837 -0
truthound_dashboard/core/drift_monitor.py +1477 -0
truthound_dashboard/core/drift_sampling.py +669 -0
truthound_dashboard/core/i18n/__init__.py +42 -0
truthound_dashboard/core/i18n/detector.py +173 -0
truthound_dashboard/core/i18n/messages.py +564 -0
truthound_dashboard/core/lineage.py +971 -0
truthound_dashboard/core/maintenance.py +443 -5
truthound_dashboard/core/model_monitoring.py +1043 -0
truthound_dashboard/core/notifications/channels.py +1020 -1
truthound_dashboard/core/notifications/deduplication/__init__.py +143 -0
truthound_dashboard/core/notifications/deduplication/policies.py +274 -0
truthound_dashboard/core/notifications/deduplication/service.py +400 -0
truthound_dashboard/core/notifications/deduplication/stores.py +2365 -0
truthound_dashboard/core/notifications/deduplication/strategies.py +422 -0
truthound_dashboard/core/notifications/dispatcher.py +43 -0
truthound_dashboard/core/notifications/escalation/__init__.py +149 -0
truthound_dashboard/core/notifications/escalation/backends.py +1384 -0
truthound_dashboard/core/notifications/escalation/engine.py +429 -0
truthound_dashboard/core/notifications/escalation/models.py +336 -0
truthound_dashboard/core/notifications/escalation/scheduler.py +1187 -0
truthound_dashboard/core/notifications/escalation/state_machine.py +330 -0
truthound_dashboard/core/notifications/escalation/stores.py +2896 -0
truthound_dashboard/core/notifications/events.py +49 -0
truthound_dashboard/core/notifications/metrics/__init__.py +115 -0
truthound_dashboard/core/notifications/metrics/base.py +528 -0
truthound_dashboard/core/notifications/metrics/collectors.py +583 -0
truthound_dashboard/core/notifications/routing/__init__.py +169 -0
truthound_dashboard/core/notifications/routing/combinators.py +184 -0
truthound_dashboard/core/notifications/routing/config.py +375 -0
truthound_dashboard/core/notifications/routing/config_parser.py +867 -0
truthound_dashboard/core/notifications/routing/engine.py +382 -0
truthound_dashboard/core/notifications/routing/expression_engine.py +1269 -0
truthound_dashboard/core/notifications/routing/jinja2_engine.py +774 -0
truthound_dashboard/core/notifications/routing/rules.py +625 -0
truthound_dashboard/core/notifications/routing/validator.py +678 -0
truthound_dashboard/core/notifications/service.py +2 -0
truthound_dashboard/core/notifications/stats_aggregator.py +850 -0
truthound_dashboard/core/notifications/throttling/__init__.py +83 -0
truthound_dashboard/core/notifications/throttling/builder.py +311 -0
truthound_dashboard/core/notifications/throttling/stores.py +1859 -0
truthound_dashboard/core/notifications/throttling/throttlers.py +633 -0
truthound_dashboard/core/openlineage.py +1028 -0
truthound_dashboard/core/plugins/__init__.py +39 -0
truthound_dashboard/core/plugins/docs/__init__.py +39 -0
truthound_dashboard/core/plugins/docs/extractor.py +703 -0
truthound_dashboard/core/plugins/docs/renderers.py +804 -0
truthound_dashboard/core/plugins/hooks/__init__.py +63 -0
truthound_dashboard/core/plugins/hooks/decorators.py +367 -0
truthound_dashboard/core/plugins/hooks/manager.py +403 -0
truthound_dashboard/core/plugins/hooks/protocols.py +265 -0
truthound_dashboard/core/plugins/lifecycle/__init__.py +41 -0
truthound_dashboard/core/plugins/lifecycle/hot_reload.py +584 -0
truthound_dashboard/core/plugins/lifecycle/machine.py +419 -0
truthound_dashboard/core/plugins/lifecycle/states.py +266 -0
truthound_dashboard/core/plugins/loader.py +504 -0
truthound_dashboard/core/plugins/registry.py +810 -0
truthound_dashboard/core/plugins/reporter_executor.py +588 -0
truthound_dashboard/core/plugins/sandbox/__init__.py +59 -0
truthound_dashboard/core/plugins/sandbox/code_validator.py +243 -0
truthound_dashboard/core/plugins/sandbox/engines.py +770 -0
truthound_dashboard/core/plugins/sandbox/protocols.py +194 -0
truthound_dashboard/core/plugins/sandbox.py +617 -0
truthound_dashboard/core/plugins/security/__init__.py +68 -0
truthound_dashboard/core/plugins/security/analyzer.py +535 -0
truthound_dashboard/core/plugins/security/policies.py +311 -0
truthound_dashboard/core/plugins/security/protocols.py +296 -0
truthound_dashboard/core/plugins/security/signing.py +842 -0
truthound_dashboard/core/plugins/security.py +446 -0
truthound_dashboard/core/plugins/validator_executor.py +401 -0
truthound_dashboard/core/plugins/versioning/__init__.py +51 -0
truthound_dashboard/core/plugins/versioning/constraints.py +377 -0
truthound_dashboard/core/plugins/versioning/dependencies.py +541 -0
truthound_dashboard/core/plugins/versioning/semver.py +266 -0
truthound_dashboard/core/profile_comparison.py +601 -0
truthound_dashboard/core/report_history.py +570 -0
truthound_dashboard/core/reporters/__init__.py +57 -0
truthound_dashboard/core/reporters/base.py +296 -0
truthound_dashboard/core/reporters/csv_reporter.py +155 -0
truthound_dashboard/core/reporters/html_reporter.py +598 -0
truthound_dashboard/core/reporters/i18n/__init__.py +65 -0
truthound_dashboard/core/reporters/i18n/base.py +494 -0
truthound_dashboard/core/reporters/i18n/catalogs.py +930 -0
truthound_dashboard/core/reporters/json_reporter.py +160 -0
truthound_dashboard/core/reporters/junit_reporter.py +233 -0
truthound_dashboard/core/reporters/markdown_reporter.py +207 -0
truthound_dashboard/core/reporters/pdf_reporter.py +209 -0
truthound_dashboard/core/reporters/registry.py +272 -0
truthound_dashboard/core/rule_generator.py +2088 -0
truthound_dashboard/core/scheduler.py +822 -12
truthound_dashboard/core/schema_evolution.py +858 -0
truthound_dashboard/core/services.py +152 -9
truthound_dashboard/core/statistics.py +718 -0
truthound_dashboard/core/streaming_anomaly.py +883 -0
truthound_dashboard/core/triggers/__init__.py +45 -0
truthound_dashboard/core/triggers/base.py +226 -0
truthound_dashboard/core/triggers/evaluators.py +609 -0
truthound_dashboard/core/triggers/factory.py +363 -0
truthound_dashboard/core/unified_alerts.py +870 -0
truthound_dashboard/core/validation_limits.py +509 -0
truthound_dashboard/core/versioning.py +709 -0
truthound_dashboard/core/websocket/__init__.py +59 -0
truthound_dashboard/core/websocket/manager.py +512 -0
truthound_dashboard/core/websocket/messages.py +130 -0
truthound_dashboard/db/__init__.py +30 -0
truthound_dashboard/db/models.py +3375 -3
truthound_dashboard/main.py +22 -0
truthound_dashboard/schemas/__init__.py +396 -1
truthound_dashboard/schemas/anomaly.py +1258 -0
truthound_dashboard/schemas/base.py +4 -0
truthound_dashboard/schemas/cross_alerts.py +334 -0
truthound_dashboard/schemas/drift_monitor.py +890 -0
truthound_dashboard/schemas/lineage.py +428 -0
truthound_dashboard/schemas/maintenance.py +154 -0
truthound_dashboard/schemas/model_monitoring.py +374 -0
truthound_dashboard/schemas/notifications_advanced.py +1363 -0
truthound_dashboard/schemas/openlineage.py +704 -0
truthound_dashboard/schemas/plugins.py +1293 -0
truthound_dashboard/schemas/profile.py +420 -34
truthound_dashboard/schemas/profile_comparison.py +242 -0
truthound_dashboard/schemas/reports.py +285 -0
truthound_dashboard/schemas/rule_suggestion.py +434 -0
truthound_dashboard/schemas/schema_evolution.py +164 -0
truthound_dashboard/schemas/source.py +117 -2
truthound_dashboard/schemas/triggers.py +511 -0
truthound_dashboard/schemas/unified_alerts.py +223 -0
truthound_dashboard/schemas/validation.py +25 -1
truthound_dashboard/schemas/validators/__init__.py +11 -0
truthound_dashboard/schemas/validators/base.py +151 -0
truthound_dashboard/schemas/versioning.py +152 -0
truthound_dashboard/static/index.html +2 -2
{truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.1.dist-info}/METADATA +147 -23
truthound_dashboard-1.4.1.dist-info/RECORD +239 -0
truthound_dashboard/static/assets/index-BZG20KuF.js +0 -586
truthound_dashboard/static/assets/index-D_HyZ3pb.css +0 -1
truthound_dashboard/static/assets/unmerged_dictionaries-CtpqQBm0.js +0 -1
truthound_dashboard-1.3.1.dist-info/RECORD +0 -110
{truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.1.dist-info}/WHEEL +0 -0
{truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.1.dist-info}/entry_points.txt +0 -0
{truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.1.dist-info}/licenses/LICENSE +0 -0

truthound_dashboard/schemas/profile.py CHANGED Viewed

@@ -5,54 +5,341 @@ This module defines schemas for data profiling API operations.
 from __future__ import annotations
-from typing import Any
+from enum import Enum
+from typing import Any, Literal
 from pydantic import Field
 from .base import BaseSchema
+# =============================================================================
+# Sampling Strategy Enums and Types
+# =============================================================================
+class SamplingStrategy(str, Enum):
+    """Sampling strategies for data profiling.
+    Supports 8+ strategies from truthound profiler:
+    - NONE: Profile all data (for small datasets < 100K rows)
+    - HEAD: First N rows (for quick previews)
+    - RANDOM: Random sampling (general purpose)
+    - SYSTEMATIC: Every Nth row (for ordered data)
+    - STRATIFIED: Maintain distribution across categories
+    - RESERVOIR: Streaming-friendly sampling
+    - ADAPTIVE: Auto-select based on data characteristics (default)
+    - HASH: Deterministic sampling for reproducibility
+    """
+    NONE = "none"
+    HEAD = "head"
+    RANDOM = "random"
+    SYSTEMATIC = "systematic"
+    STRATIFIED = "stratified"
+    RESERVOIR = "reservoir"
+    ADAPTIVE = "adaptive"
+    HASH = "hash"
+# Literal type for API validation
+SamplingStrategyType = Literal[
+    "none", "head", "random", "systematic", "stratified", "reservoir", "adaptive", "hash"
+]
+class SamplingConfig(BaseSchema):
+    """Advanced sampling configuration for profiling.
+    Provides fine-grained control over sampling behavior for large datasets.
+    """
+    strategy: SamplingStrategyType = Field(
+        default="adaptive",
+        description="Sampling strategy to use. 'adaptive' auto-selects based on data size.",
+    )
+    sample_size: int | None = Field(
+        default=None,
+        ge=100,
+        description="Target sample size. If None, auto-estimated based on confidence level.",
+    )
+    confidence_level: float = Field(
+        default=0.95,
+        ge=0.80,
+        le=0.99,
+        description="Statistical confidence level for sample size estimation (0.80-0.99).",
+    )
+    margin_of_error: float = Field(
+        default=0.03,
+        ge=0.01,
+        le=0.10,
+        description="Acceptable margin of error for statistical estimates (0.01-0.10).",
+    )
+    strata_column: str | None = Field(
+        default=None,
+        description="Column for stratified sampling to maintain distribution.",
+    )
+    seed: int | None = Field(
+        default=None,
+        description="Random seed for reproducible sampling results.",
+    )
+# =============================================================================
+# Pattern Detection Configuration
+# =============================================================================
+class PatternType(str, Enum):
+    """Supported data pattern types for detection."""
+    EMAIL = "email"
+    PHONE = "phone"
+    UUID = "uuid"
+    URL = "url"
+    IP_ADDRESS = "ip_address"
+    CREDIT_CARD = "credit_card"
+    DATE = "date"
+    DATETIME = "datetime"
+    KOREAN_RRN = "korean_rrn"
+    KOREAN_PHONE = "korean_phone"
+    SSN = "ssn"
+    POSTAL_CODE = "postal_code"
+    CURRENCY = "currency"
+    PERCENTAGE = "percentage"
+    CUSTOM = "custom"
+class PatternDetectionConfig(BaseSchema):
+    """Configuration for pattern detection during profiling.
+    Enables automatic detection of common data patterns like
+    emails, phone numbers, UUIDs, etc.
+    """
+    enabled: bool = Field(
+        default=True,
+        description="Enable pattern detection during profiling.",
+    )
+    sample_size: int = Field(
+        default=1000,
+        ge=100,
+        le=100000,
+        description="Number of values to sample for pattern detection.",
+    )
+    min_confidence: float = Field(
+        default=0.8,
+        ge=0.5,
+        le=1.0,
+        description="Minimum confidence threshold for pattern matches (0.5-1.0).",
+    )
+    patterns_to_detect: list[str] | None = Field(
+        default=None,
+        description="Specific patterns to detect. If None, detects all supported patterns.",
+    )
+# =============================================================================
+# Profile Request Schema (Enhanced)
+# =============================================================================
 class ProfileRequest(BaseSchema):
     """Request schema for data profiling.
-    Provides optional configuration for profiling operations.
-    All fields are optional with sensible defaults.
+    Provides comprehensive configuration for profiling operations including
+    sampling strategies, pattern detection, and statistical analysis options.
     """
+    # Basic sampling (backward compatible)
     sample_size: int | None = Field(
         default=None,
         ge=1,
         description="Maximum number of rows to sample for profiling. "
-        "If None, profiles all data. Useful for large datasets.",
+        "If None, profiles all data. For advanced sampling, use 'sampling' config.",
         examples=[10000, 50000, 100000],
     )
+    # Advanced sampling configuration
+    sampling: SamplingConfig | None = Field(
+        default=None,
+        description="Advanced sampling configuration. If provided, overrides sample_size.",
+    )
+    # Pattern detection configuration
+    pattern_detection: PatternDetectionConfig | None = Field(
+        default=None,
+        description="Pattern detection configuration. If None, uses default settings.",
+    )
+    # Additional profiling options
+    include_histograms: bool = Field(
+        default=True,
+        description="Include value distribution histograms in the profile.",
+    )
+    include_correlations: bool = Field(
+        default=False,
+        description="Include column correlation analysis (increases processing time).",
+    )
+    include_cardinality: bool = Field(
+        default=True,
+        description="Include cardinality estimates for high-cardinality columns.",
+    )
+# =============================================================================
+# Pattern Detection Results
+# =============================================================================
+class DetectedPattern(BaseSchema):
+    """A detected data pattern in a column."""
+    pattern_type: str = Field(
+        ...,
+        description="Type of pattern detected (email, phone, uuid, etc.)",
+    )
+    confidence: float = Field(
+        ...,
+        ge=0.0,
+        le=1.0,
+        description="Confidence score of the pattern match (0-1).",
+    )
+    match_count: int = Field(
+        ...,
+        ge=0,
+        description="Number of values matching this pattern.",
+    )
+    match_percentage: float = Field(
+        ...,
+        ge=0.0,
+        le=100.0,
+        description="Percentage of non-null values matching this pattern.",
+    )
+    sample_matches: list[str] | None = Field(
+        default=None,
+        description="Sample values matching this pattern (masked for sensitive data).",
+    )
+class HistogramBucket(BaseSchema):
+    """A bucket in a value distribution histogram."""
+    bucket: str = Field(..., description="Bucket label (range or category)")
+    count: int = Field(..., ge=0, description="Count of values in this bucket")
+    percentage: float = Field(..., ge=0.0, le=100.0, description="Percentage of total")
+# =============================================================================
+# Column Profile Schema (Enhanced)
+# =============================================================================
 class ColumnProfile(BaseSchema):
-    """Profile information for a single column."""
+    """Profile information for a single column.
+    Includes basic statistics, pattern detection results, and distribution data.
+    """
+    # Basic identification
     name: str = Field(..., description="Column name")
-    dtype: str = Field(..., description="Data type")
+    dtype: str = Field(..., description="Physical data type (string, int64, float64, etc.)")
+    # Inferred semantic type (NEW)
+    inferred_type: str | None = Field(
+        default=None,
+        description="Inferred semantic type based on pattern detection "
+        "(email, phone, uuid, url, date, currency, etc.)",
+    )
+    # Completeness metrics
     null_pct: str = Field(default="0%", description="Percentage of null values")
+    null_count: int | None = Field(default=None, description="Count of null values")
+    # Uniqueness metrics
     unique_pct: str = Field(default="0%", description="Percentage of unique values")
+    distinct_count: int | None = Field(
+        default=None,
+        description="Count of distinct values",
+    )
+    is_unique: bool | None = Field(
+        default=None,
+        description="Whether all non-null values are unique",
+    )
+    # Value range (for numeric/date columns)
     min: Any | None = Field(default=None, description="Minimum value")
     max: Any | None = Field(default=None, description="Maximum value")
+    # Statistical measures (for numeric columns)
     mean: float | None = Field(default=None, description="Mean value (numeric columns)")
     std: float | None = Field(default=None, description="Standard deviation (numeric)")
+    median: float | None = Field(default=None, description="Median value (numeric)")
+    q1: float | None = Field(default=None, description="25th percentile (Q1)")
+    q3: float | None = Field(default=None, description="75th percentile (Q3)")
+    skewness: float | None = Field(default=None, description="Skewness of distribution")
+    kurtosis: float | None = Field(default=None, description="Kurtosis of distribution")
-    # Additional statistics (optional)
-    distinct_count: int | None = Field(
+    # String-specific metrics
+    min_length: int | None = Field(default=None, description="Minimum string length")
+    max_length: int | None = Field(default=None, description="Maximum string length")
+    avg_length: float | None = Field(default=None, description="Average string length")
+    # Pattern detection results (NEW)
+    patterns: list[DetectedPattern] | None = Field(
         default=None,
-        description="Count of distinct values",
+        description="Detected data patterns (email, phone, uuid, etc.)",
+    )
+    primary_pattern: str | None = Field(
+        default=None,
+        description="The most prevalent detected pattern type",
     )
+    # Distribution data
     most_common: list[dict[str, Any]] | None = Field(
         default=None,
         description="Most common values with counts",
     )
+    histogram: list[HistogramBucket] | None = Field(
+        default=None,
+        description="Value distribution histogram",
+    )
+    # Cardinality estimate for high-cardinality columns
+    cardinality_estimate: int | None = Field(
+        default=None,
+        description="Estimated cardinality using HyperLogLog (for high-cardinality columns)",
+    )
+# =============================================================================
+# Sampling Metadata for Response
+# =============================================================================
+class SamplingMetadata(BaseSchema):
+    """Metadata about sampling used during profiling."""
+    strategy_used: str = Field(..., description="Sampling strategy that was applied")
+    sample_size: int = Field(..., description="Actual sample size used")
+    total_rows: int = Field(..., description="Total rows in the dataset")
+    sampling_ratio: float = Field(..., description="Ratio of sampled to total rows")
+    seed: int | None = Field(default=None, description="Random seed used (if applicable)")
+    confidence_level: float | None = Field(
+        default=None, description="Confidence level achieved"
+    )
+    margin_of_error: float | None = Field(
+        default=None, description="Estimated margin of error"
+    )
+# =============================================================================
+# Profile Response Schema (Enhanced)
+# =============================================================================
 class ProfileResponse(BaseSchema):
-    """Data profiling response."""
+    """Data profiling response with enhanced statistics and pattern detection."""
     source: str = Field(..., description="Source path/identifier")
     row_count: int = Field(..., ge=0, description="Total number of rows")
@@ -63,6 +350,28 @@ class ProfileResponse(BaseSchema):
         description="Profile for each column",
     )
+    # Sampling metadata (NEW)
+    sampling: SamplingMetadata | None = Field(
+        default=None,
+        description="Information about sampling applied during profiling",
+    )
+    # Pattern detection summary (NEW)
+    detected_patterns_summary: dict[str, int] | None = Field(
+        default=None,
+        description="Summary of detected patterns across all columns {pattern_type: count}",
+    )
+    # Profiling metadata (NEW)
+    profiled_at: str | None = Field(
+        default=None,
+        description="ISO timestamp when profiling was performed",
+    )
+    profiling_duration_ms: int | None = Field(
+        default=None,
+        description="Time taken to profile in milliseconds",
+    )
     # Computed properties
     @property
     def size_human(self) -> str:
@@ -74,6 +383,70 @@ class ProfileResponse(BaseSchema):
             size /= 1024
         return f"{size:.1f} PB"
+    @classmethod
+    def _build_column_profile(cls, col: dict[str, Any]) -> ColumnProfile:
+        """Build a ColumnProfile from column data dict.
+        Args:
+            col: Column data dictionary from adapter or database.
+        Returns:
+            ColumnProfile instance with all available fields.
+        """
+        # Build patterns list if present
+        patterns = None
+        if col.get("patterns"):
+            patterns = [
+                DetectedPattern(
+                    pattern_type=p.get("pattern_type", p.get("type", "unknown")),
+                    confidence=p.get("confidence", 0.0),
+                    match_count=p.get("match_count", 0),
+                    match_percentage=p.get("match_percentage", 0.0),
+                    sample_matches=p.get("sample_matches"),
+                )
+                for p in col["patterns"]
+            ]
+        # Build histogram if present
+        histogram = None
+        if col.get("histogram"):
+            histogram = [
+                HistogramBucket(
+                    bucket=h.get("bucket", ""),
+                    count=h.get("count", 0),
+                    percentage=h.get("percentage", 0.0),
+                )
+                for h in col["histogram"]
+            ]
+        return ColumnProfile(
+            name=col["name"],
+            dtype=col["dtype"],
+            inferred_type=col.get("inferred_type"),
+            null_pct=col.get("null_pct", "0%"),
+            null_count=col.get("null_count"),
+            unique_pct=col.get("unique_pct", "0%"),
+            distinct_count=col.get("distinct_count"),
+            is_unique=col.get("is_unique"),
+            min=col.get("min"),
+            max=col.get("max"),
+            mean=col.get("mean"),
+            std=col.get("std"),
+            median=col.get("median"),
+            q1=col.get("q1"),
+            q3=col.get("q3"),
+            skewness=col.get("skewness"),
+            kurtosis=col.get("kurtosis"),
+            min_length=col.get("min_length"),
+            max_length=col.get("max_length"),
+            avg_length=col.get("avg_length"),
+            patterns=patterns,
+            primary_pattern=col.get("primary_pattern"),
+            most_common=col.get("most_common"),
+            histogram=histogram,
+            cardinality_estimate=col.get("cardinality_estimate"),
+        )
     @classmethod
     def from_result(cls, result: Any) -> ProfileResponse:
         """Create response from adapter result or Profile model.
@@ -89,41 +462,50 @@ class ProfileResponse(BaseSchema):
             profile_json = result.profile_json
             source_name = profile_json.get("source", result.source_id)
             columns_data = profile_json.get("columns", [])
-            columns = [
-                ColumnProfile(
-                    name=col["name"],
-                    dtype=col["dtype"],
-                    null_pct=col.get("null_pct", "0%"),
-                    unique_pct=col.get("unique_pct", "0%"),
-                    min=col.get("min"),
-                    max=col.get("max"),
-                    mean=col.get("mean"),
-                    std=col.get("std"),
+            columns = [cls._build_column_profile(col) for col in columns_data]
+            # Build sampling metadata if present
+            sampling = None
+            if profile_json.get("sampling"):
+                s = profile_json["sampling"]
+                sampling = SamplingMetadata(
+                    strategy_used=s.get("strategy_used", "none"),
+                    sample_size=s.get("sample_size", result.row_count or 0),
+                    total_rows=s.get("total_rows", result.row_count or 0),
+                    sampling_ratio=s.get("sampling_ratio", 1.0),
+                    seed=s.get("seed"),
+                    confidence_level=s.get("confidence_level"),
+                    margin_of_error=s.get("margin_of_error"),
                 )
-                for col in columns_data
-            ]
             return cls(
                 source=source_name,
                 row_count=result.row_count or 0,
                 column_count=result.column_count or 0,
                 size_bytes=result.size_bytes or 0,
                 columns=columns,
+                sampling=sampling,
+                detected_patterns_summary=profile_json.get("detected_patterns_summary"),
+                profiled_at=profile_json.get("profiled_at"),
+                profiling_duration_ms=profile_json.get("profiling_duration_ms"),
             )
         # Handle ProfileResult (from adapter)
-        columns = [
-            ColumnProfile(
-                name=col["name"],
-                dtype=col["dtype"],
-                null_pct=col.get("null_pct", "0%"),
-                unique_pct=col.get("unique_pct", "0%"),
-                min=col.get("min"),
-                max=col.get("max"),
-                mean=col.get("mean"),
-                std=col.get("std"),
+        columns = [cls._build_column_profile(col) for col in result.columns]
+        # Build sampling metadata if present
+        sampling = None
+        if hasattr(result, "sampling") and result.sampling:
+            s = result.sampling
+            sampling = SamplingMetadata(
+                strategy_used=getattr(s, "strategy_used", "none"),
+                sample_size=getattr(s, "sample_size", result.row_count),
+                total_rows=getattr(s, "total_rows", result.row_count),
+                sampling_ratio=getattr(s, "sampling_ratio", 1.0),
+                seed=getattr(s, "seed", None),
+                confidence_level=getattr(s, "confidence_level", None),
+                margin_of_error=getattr(s, "margin_of_error", None),
             )
-            for col in result.columns
-        ]
         return cls(
             source=result.source,
@@ -131,4 +513,8 @@ class ProfileResponse(BaseSchema):
             column_count=result.column_count,
             size_bytes=result.size_bytes,
             columns=columns,
+            sampling=sampling,
+            detected_patterns_summary=getattr(result, "detected_patterns_summary", None),
+            profiled_at=getattr(result, "profiled_at", None),
+            profiling_duration_ms=getattr(result, "profiling_duration_ms", None),
         )

truthound-dashboard 1.3.1__py3-none-any.whl → 1.4.1__py3-none-any.whl

truthound-dashboard 1.3.1py3-none-any.whl → 1.4.1py3-none-any.whl