PyPI - truthound-dashboard - Versions diffs - 1.4.4__py3-none-any.whl → 1.5.0__py3-none-any.whl - Mend

truthound-dashboard 1.4.4py3-none-any.whl → 1.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (205) hide show

truthound_dashboard/api/alerts.py +75 -86
truthound_dashboard/api/anomaly.py +7 -13
truthound_dashboard/api/cross_alerts.py +38 -52
truthound_dashboard/api/drift.py +49 -59
truthound_dashboard/api/drift_monitor.py +234 -79
truthound_dashboard/api/enterprise_sampling.py +498 -0
truthound_dashboard/api/history.py +57 -5
truthound_dashboard/api/lineage.py +3 -48
truthound_dashboard/api/maintenance.py +104 -49
truthound_dashboard/api/mask.py +1 -2
truthound_dashboard/api/middleware.py +2 -1
truthound_dashboard/api/model_monitoring.py +435 -311
truthound_dashboard/api/notifications.py +227 -191
truthound_dashboard/api/notifications_advanced.py +21 -20
truthound_dashboard/api/observability.py +586 -0
truthound_dashboard/api/plugins.py +2 -433
truthound_dashboard/api/profile.py +199 -37
truthound_dashboard/api/quality_reporter.py +701 -0
truthound_dashboard/api/reports.py +7 -16
truthound_dashboard/api/router.py +66 -0
truthound_dashboard/api/rule_suggestions.py +5 -5
truthound_dashboard/api/scan.py +17 -19
truthound_dashboard/api/schedules.py +85 -50
truthound_dashboard/api/schema_evolution.py +6 -6
truthound_dashboard/api/schema_watcher.py +667 -0
truthound_dashboard/api/sources.py +98 -27
truthound_dashboard/api/tiering.py +1323 -0
truthound_dashboard/api/triggers.py +14 -11
truthound_dashboard/api/validations.py +12 -11
truthound_dashboard/api/versioning.py +1 -6
truthound_dashboard/core/__init__.py +129 -3
truthound_dashboard/core/actions/__init__.py +62 -0
truthound_dashboard/core/actions/custom.py +426 -0
truthound_dashboard/core/actions/notifications.py +910 -0
truthound_dashboard/core/actions/storage.py +472 -0
truthound_dashboard/core/actions/webhook.py +281 -0
truthound_dashboard/core/anomaly.py +262 -67
truthound_dashboard/core/anomaly_explainer.py +4 -3
truthound_dashboard/core/backends/__init__.py +67 -0
truthound_dashboard/core/backends/base.py +299 -0
truthound_dashboard/core/backends/errors.py +191 -0
truthound_dashboard/core/backends/factory.py +423 -0
truthound_dashboard/core/backends/mock_backend.py +451 -0
truthound_dashboard/core/backends/truthound_backend.py +718 -0
truthound_dashboard/core/checkpoint/__init__.py +87 -0
truthound_dashboard/core/checkpoint/adapters.py +814 -0
truthound_dashboard/core/checkpoint/checkpoint.py +491 -0
truthound_dashboard/core/checkpoint/runner.py +270 -0
truthound_dashboard/core/connections.py +437 -10
truthound_dashboard/core/converters/__init__.py +14 -0
truthound_dashboard/core/converters/truthound.py +620 -0
truthound_dashboard/core/cross_alerts.py +540 -320
truthound_dashboard/core/datasource_factory.py +1672 -0
truthound_dashboard/core/drift_monitor.py +216 -20
truthound_dashboard/core/enterprise_sampling.py +1291 -0
truthound_dashboard/core/interfaces/__init__.py +225 -0
truthound_dashboard/core/interfaces/actions.py +652 -0
truthound_dashboard/core/interfaces/base.py +247 -0
truthound_dashboard/core/interfaces/checkpoint.py +676 -0
truthound_dashboard/core/interfaces/protocols.py +664 -0
truthound_dashboard/core/interfaces/reporters.py +650 -0
truthound_dashboard/core/interfaces/routing.py +646 -0
truthound_dashboard/core/interfaces/triggers.py +619 -0
truthound_dashboard/core/lineage.py +407 -71
truthound_dashboard/core/model_monitoring.py +431 -3
truthound_dashboard/core/notifications/base.py +4 -0
truthound_dashboard/core/notifications/channels.py +501 -1203
truthound_dashboard/core/notifications/deduplication/__init__.py +81 -115
truthound_dashboard/core/notifications/deduplication/service.py +131 -348
truthound_dashboard/core/notifications/dispatcher.py +202 -11
truthound_dashboard/core/notifications/escalation/__init__.py +119 -106
truthound_dashboard/core/notifications/escalation/engine.py +168 -358
truthound_dashboard/core/notifications/routing/__init__.py +88 -128
truthound_dashboard/core/notifications/routing/engine.py +90 -317
truthound_dashboard/core/notifications/stats_aggregator.py +246 -1
truthound_dashboard/core/notifications/throttling/__init__.py +67 -50
truthound_dashboard/core/notifications/throttling/builder.py +117 -255
truthound_dashboard/core/notifications/truthound_adapter.py +842 -0
truthound_dashboard/core/phase5/collaboration.py +1 -1
truthound_dashboard/core/plugins/lifecycle/__init__.py +0 -13
truthound_dashboard/core/quality_reporter.py +1359 -0
truthound_dashboard/core/report_history.py +0 -6
truthound_dashboard/core/reporters/__init__.py +175 -14
truthound_dashboard/core/reporters/adapters.py +943 -0
truthound_dashboard/core/reporters/base.py +0 -3
truthound_dashboard/core/reporters/builtin/__init__.py +18 -0
truthound_dashboard/core/reporters/builtin/csv_reporter.py +111 -0
truthound_dashboard/core/reporters/builtin/html_reporter.py +270 -0
truthound_dashboard/core/reporters/builtin/json_reporter.py +127 -0
truthound_dashboard/core/reporters/compat.py +266 -0
truthound_dashboard/core/reporters/csv_reporter.py +2 -35
truthound_dashboard/core/reporters/factory.py +526 -0
truthound_dashboard/core/reporters/interfaces.py +745 -0
truthound_dashboard/core/reporters/registry.py +1 -10
truthound_dashboard/core/scheduler.py +165 -0
truthound_dashboard/core/schema_evolution.py +3 -3
truthound_dashboard/core/schema_watcher.py +1528 -0
truthound_dashboard/core/services.py +595 -76
truthound_dashboard/core/store_manager.py +810 -0
truthound_dashboard/core/streaming_anomaly.py +169 -4
truthound_dashboard/core/tiering.py +1309 -0
truthound_dashboard/core/triggers/evaluators.py +178 -8
truthound_dashboard/core/truthound_adapter.py +2620 -197
truthound_dashboard/core/unified_alerts.py +23 -20
truthound_dashboard/db/__init__.py +8 -0
truthound_dashboard/db/database.py +8 -2
truthound_dashboard/db/models.py +944 -25
truthound_dashboard/db/repository.py +2 -0
truthound_dashboard/main.py +11 -0
truthound_dashboard/schemas/__init__.py +177 -16
truthound_dashboard/schemas/base.py +44 -23
truthound_dashboard/schemas/collaboration.py +19 -6
truthound_dashboard/schemas/cross_alerts.py +19 -3
truthound_dashboard/schemas/drift.py +61 -55
truthound_dashboard/schemas/drift_monitor.py +67 -23
truthound_dashboard/schemas/enterprise_sampling.py +653 -0
truthound_dashboard/schemas/lineage.py +0 -33
truthound_dashboard/schemas/mask.py +10 -8
truthound_dashboard/schemas/model_monitoring.py +89 -10
truthound_dashboard/schemas/notifications_advanced.py +13 -0
truthound_dashboard/schemas/observability.py +453 -0
truthound_dashboard/schemas/plugins.py +0 -280
truthound_dashboard/schemas/profile.py +154 -247
truthound_dashboard/schemas/quality_reporter.py +403 -0
truthound_dashboard/schemas/reports.py +2 -2
truthound_dashboard/schemas/rule_suggestion.py +8 -1
truthound_dashboard/schemas/scan.py +4 -24
truthound_dashboard/schemas/schedule.py +11 -3
truthound_dashboard/schemas/schema_watcher.py +727 -0
truthound_dashboard/schemas/source.py +17 -2
truthound_dashboard/schemas/tiering.py +822 -0
truthound_dashboard/schemas/triggers.py +16 -0
truthound_dashboard/schemas/unified_alerts.py +7 -0
truthound_dashboard/schemas/validation.py +0 -13
truthound_dashboard/schemas/validators/base.py +41 -21
truthound_dashboard/schemas/validators/business_rule_validators.py +244 -0
truthound_dashboard/schemas/validators/localization_validators.py +273 -0
truthound_dashboard/schemas/validators/ml_feature_validators.py +308 -0
truthound_dashboard/schemas/validators/profiling_validators.py +275 -0
truthound_dashboard/schemas/validators/referential_validators.py +312 -0
truthound_dashboard/schemas/validators/registry.py +93 -8
truthound_dashboard/schemas/validators/timeseries_validators.py +389 -0
truthound_dashboard/schemas/versioning.py +1 -6
truthound_dashboard/static/index.html +2 -2
truthound_dashboard-1.5.0.dist-info/METADATA +309 -0
{truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.0.dist-info}/RECORD +149 -148
truthound_dashboard/core/plugins/hooks/__init__.py +0 -63
truthound_dashboard/core/plugins/hooks/decorators.py +0 -367
truthound_dashboard/core/plugins/hooks/manager.py +0 -403
truthound_dashboard/core/plugins/hooks/protocols.py +0 -265
truthound_dashboard/core/plugins/lifecycle/hot_reload.py +0 -584
truthound_dashboard/core/reporters/junit_reporter.py +0 -233
truthound_dashboard/core/reporters/markdown_reporter.py +0 -207
truthound_dashboard/core/reporters/pdf_reporter.py +0 -209
truthound_dashboard/static/assets/_baseUniq-BcrSP13d.js +0 -1
truthound_dashboard/static/assets/arc-DlYjKwIL.js +0 -1
truthound_dashboard/static/assets/architectureDiagram-VXUJARFQ-Bb2drbQM.js +0 -36
truthound_dashboard/static/assets/blockDiagram-VD42YOAC-BlsPG1CH.js +0 -122
truthound_dashboard/static/assets/c4Diagram-YG6GDRKO-B9JdUoaC.js +0 -10
truthound_dashboard/static/assets/channel-Q6mHF1Hd.js +0 -1
truthound_dashboard/static/assets/chunk-4BX2VUAB-DmyoPVuJ.js +0 -1
truthound_dashboard/static/assets/chunk-55IACEB6-Bcz6Siv8.js +0 -1
truthound_dashboard/static/assets/chunk-B4BG7PRW-Br3G5Rum.js +0 -165
truthound_dashboard/static/assets/chunk-DI55MBZ5-DuM9c23u.js +0 -220
truthound_dashboard/static/assets/chunk-FMBD7UC4-DNU-5mvT.js +0 -15
truthound_dashboard/static/assets/chunk-QN33PNHL-Im2yNcmS.js +0 -1
truthound_dashboard/static/assets/chunk-QZHKN3VN-kZr8XFm1.js +0 -1
truthound_dashboard/static/assets/chunk-TZMSLE5B-Q__360q_.js +0 -1
truthound_dashboard/static/assets/classDiagram-2ON5EDUG-vtixxUyK.js +0 -1
truthound_dashboard/static/assets/classDiagram-v2-WZHVMYZB-vtixxUyK.js +0 -1
truthound_dashboard/static/assets/clone-BOt2LwD0.js +0 -1
truthound_dashboard/static/assets/cose-bilkent-S5V4N54A-CBDw6iac.js +0 -1
truthound_dashboard/static/assets/dagre-6UL2VRFP-XdKqmmY9.js +0 -4
truthound_dashboard/static/assets/diagram-PSM6KHXK-DAZ8nx9V.js +0 -24
truthound_dashboard/static/assets/diagram-QEK2KX5R-BRvDTbGD.js +0 -43
truthound_dashboard/static/assets/diagram-S2PKOQOG-bQcczUkl.js +0 -24
truthound_dashboard/static/assets/erDiagram-Q2GNP2WA-DPje7VMN.js +0 -60
truthound_dashboard/static/assets/flowDiagram-NV44I4VS-B7BVtFVS.js +0 -162
truthound_dashboard/static/assets/ganttDiagram-JELNMOA3-D6WKSS7U.js +0 -267
truthound_dashboard/static/assets/gitGraphDiagram-NY62KEGX-D3vtVd3y.js +0 -65
truthound_dashboard/static/assets/graph-BKgNKZVp.js +0 -1
truthound_dashboard/static/assets/index-C6JSrkHo.css +0 -1
truthound_dashboard/static/assets/index-DkU82VsU.js +0 -1800
truthound_dashboard/static/assets/infoDiagram-WHAUD3N6-DnNCT429.js +0 -2
truthound_dashboard/static/assets/journeyDiagram-XKPGCS4Q-DGiMozqS.js +0 -139
truthound_dashboard/static/assets/kanban-definition-3W4ZIXB7-BV2gUgli.js +0 -89
truthound_dashboard/static/assets/katex-Cu_Erd72.js +0 -261
truthound_dashboard/static/assets/layout-DI2MfQ5G.js +0 -1
truthound_dashboard/static/assets/min-DYdgXVcT.js +0 -1
truthound_dashboard/static/assets/mindmap-definition-VGOIOE7T-C7x4ruxz.js +0 -68
truthound_dashboard/static/assets/pieDiagram-ADFJNKIX-CAJaAB9f.js +0 -30
truthound_dashboard/static/assets/quadrantDiagram-AYHSOK5B-DeqwDI46.js +0 -7
truthound_dashboard/static/assets/requirementDiagram-UZGBJVZJ-e3XDpZIM.js +0 -64
truthound_dashboard/static/assets/sankeyDiagram-TZEHDZUN-CNnAv5Ux.js +0 -10
truthound_dashboard/static/assets/sequenceDiagram-WL72ISMW-Dsne-Of3.js +0 -145
truthound_dashboard/static/assets/stateDiagram-FKZM4ZOC-Ee0sQXyb.js +0 -1
truthound_dashboard/static/assets/stateDiagram-v2-4FDKWEC3-B26KqW_W.js +0 -1
truthound_dashboard/static/assets/timeline-definition-IT6M3QCI-DZYi2yl3.js +0 -61
truthound_dashboard/static/assets/treemap-KMMF4GRG-CY3f8In2.js +0 -128
truthound_dashboard/static/assets/unmerged_dictionaries-Dd7xcPWG.js +0 -1
truthound_dashboard/static/assets/xychartDiagram-PRI3JC2R-CS7fydZZ.js +0 -7
truthound_dashboard-1.4.4.dist-info/METADATA +0 -507
{truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.0.dist-info}/WHEEL +0 -0
{truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.0.dist-info}/entry_points.txt +0 -0
{truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.0.dist-info}/licenses/LICENSE +0 -0

truthound_dashboard/schemas/profile.py CHANGED Viewed

@@ -1,12 +1,15 @@
 """Profile-related Pydantic schemas.
 This module defines schemas for data profiling API operations.
+Note: truthound's th.profile() only supports (data, source) parameters.
+Advanced options like sampling strategies, pattern detection configuration,
+and correlation analysis are NOT supported by the underlying library.
 """
 from __future__ import annotations
-from enum import Enum
-from typing import Any, Literal
+from typing import Any
 from pydantic import Field
@@ -14,176 +17,79 @@ from .base import BaseSchema
 # =============================================================================
-# Sampling Strategy Enums and Types
+# Profile Request Schema (Simplified)
 # =============================================================================
-class SamplingStrategy(str, Enum):
-    """Sampling strategies for data profiling.
+class ProfileRequest(BaseSchema):
+    """Request schema for basic data profiling.
-    Supports 8+ strategies from truthound profiler:
-    - NONE: Profile all data (for small datasets < 100K rows)
-    - HEAD: First N rows (for quick previews)
-    - RANDOM: Random sampling (general purpose)
-    - SYSTEMATIC: Every Nth row (for ordered data)
-    - STRATIFIED: Maintain distribution across categories
-    - RESERVOIR: Streaming-friendly sampling
-    - ADAPTIVE: Auto-select based on data characteristics (default)
-    - HASH: Deterministic sampling for reproducibility
+    Note: truthound's th.profile() does not support advanced configuration.
+    This schema exists for API compatibility but options are not used.
+    For advanced profiling with configuration, use ProfileAdvancedRequest.
     """
-    NONE = "none"
-    HEAD = "head"
-    RANDOM = "random"
-    SYSTEMATIC = "systematic"
-    STRATIFIED = "stratified"
-    RESERVOIR = "reservoir"
-    ADAPTIVE = "adaptive"
-    HASH = "hash"
+    pass
-# Literal type for API validation
-SamplingStrategyType = Literal[
-    "none", "head", "random", "systematic", "stratified", "reservoir", "adaptive", "hash"
-]
+class ProfileAdvancedRequest(BaseSchema):
+    """Request schema for advanced data profiling with ProfilerConfig options.
-class SamplingConfig(BaseSchema):
-    """Advanced sampling configuration for profiling.
-    Provides fine-grained control over sampling behavior for large datasets.
+    This schema maps to truthound's ProfilerConfig for fine-grained control
+    over profiling behavior.
     """
-    strategy: SamplingStrategyType = Field(
-        default="adaptive",
-        description="Sampling strategy to use. 'adaptive' auto-selects based on data size.",
-    )
     sample_size: int | None = Field(
         default=None,
         ge=100,
-        description="Target sample size. If None, auto-estimated based on confidence level.",
+        description="Maximum rows to sample (None for all rows)",
     )
-    confidence_level: float = Field(
-        default=0.95,
-        ge=0.80,
-        le=0.99,
-        description="Statistical confidence level for sample size estimation (0.80-0.99).",
-    )
-    margin_of_error: float = Field(
-        default=0.03,
-        ge=0.01,
-        le=0.10,
-        description="Acceptable margin of error for statistical estimates (0.01-0.10).",
+    random_seed: int = Field(
+        default=42,
+        ge=0,
+        description="Random seed for reproducible sampling",
     )
-    strata_column: str | None = Field(
-        default=None,
-        description="Column for stratified sampling to maintain distribution.",
+    include_patterns: bool = Field(
+        default=True,
+        description="Enable pattern detection (email, phone, uuid, etc.)",
     )
-    seed: int | None = Field(
-        default=None,
-        description="Random seed for reproducible sampling results.",
+    include_correlations: bool = Field(
+        default=False,
+        description="Calculate column correlations (can be slow for many columns)",
     )
-# =============================================================================
-# Pattern Detection Configuration
-# =============================================================================
-class PatternType(str, Enum):
-    """Supported data pattern types for detection."""
-    EMAIL = "email"
-    PHONE = "phone"
-    UUID = "uuid"
-    URL = "url"
-    IP_ADDRESS = "ip_address"
-    CREDIT_CARD = "credit_card"
-    DATE = "date"
-    DATETIME = "datetime"
-    KOREAN_RRN = "korean_rrn"
-    KOREAN_PHONE = "korean_phone"
-    SSN = "ssn"
-    POSTAL_CODE = "postal_code"
-    CURRENCY = "currency"
-    PERCENTAGE = "percentage"
-    CUSTOM = "custom"
-class PatternDetectionConfig(BaseSchema):
-    """Configuration for pattern detection during profiling.
-    Enables automatic detection of common data patterns like
-    emails, phone numbers, UUIDs, etc.
-    """
-    enabled: bool = Field(
+    include_distributions: bool = Field(
         default=True,
-        description="Enable pattern detection during profiling.",
+        description="Include value distribution histograms",
     )
-    sample_size: int = Field(
+    top_n_values: int = Field(
+        default=10,
+        ge=1,
+        le=100,
+        description="Number of top values to return per column",
+    )
+    pattern_sample_size: int = Field(
         default=1000,
         ge=100,
-        le=100000,
-        description="Number of values to sample for pattern detection.",
+        le=10000,
+        description="Sample size for pattern detection",
+    )
+    correlation_threshold: float = Field(
+        default=0.7,
+        ge=0.0,
+        le=1.0,
+        description="Minimum correlation to report",
     )
-    min_confidence: float = Field(
+    min_pattern_match_ratio: float = Field(
         default=0.8,
         ge=0.5,
         le=1.0,
-        description="Minimum confidence threshold for pattern matches (0.5-1.0).",
+        description="Minimum match ratio to consider a pattern detected",
     )
-    patterns_to_detect: list[str] | None = Field(
-        default=None,
-        description="Specific patterns to detect. If None, detects all supported patterns.",
-    )
-# =============================================================================
-# Profile Request Schema (Enhanced)
-# =============================================================================
-class ProfileRequest(BaseSchema):
-    """Request schema for data profiling.
-    Provides comprehensive configuration for profiling operations including
-    sampling strategies, pattern detection, and statistical analysis options.
-    """
-    # Basic sampling (backward compatible)
-    sample_size: int | None = Field(
-        default=None,
+    n_jobs: int = Field(
+        default=1,
         ge=1,
-        description="Maximum number of rows to sample for profiling. "
-        "If None, profiles all data. For advanced sampling, use 'sampling' config.",
-        examples=[10000, 50000, 100000],
-    )
-    # Advanced sampling configuration
-    sampling: SamplingConfig | None = Field(
-        default=None,
-        description="Advanced sampling configuration. If provided, overrides sample_size.",
-    )
-    # Pattern detection configuration
-    pattern_detection: PatternDetectionConfig | None = Field(
-        default=None,
-        description="Pattern detection configuration. If None, uses default settings.",
-    )
-    # Additional profiling options
-    include_histograms: bool = Field(
-        default=True,
-        description="Include value distribution histograms in the profile.",
-    )
-    include_correlations: bool = Field(
-        default=False,
-        description="Include column correlation analysis (increases processing time).",
-    )
-    include_cardinality: bool = Field(
-        default=True,
-        description="Include cardinality estimates for high-cardinality columns.",
+        le=16,
+        description="Number of parallel jobs for profiling",
     )
@@ -231,21 +137,21 @@ class HistogramBucket(BaseSchema):
 # =============================================================================
-# Column Profile Schema (Enhanced)
+# Column Profile Schema
 # =============================================================================
 class ColumnProfile(BaseSchema):
     """Profile information for a single column.
-    Includes basic statistics, pattern detection results, and distribution data.
+    Includes basic statistics and distribution data.
     """
     # Basic identification
     name: str = Field(..., description="Column name")
     dtype: str = Field(..., description="Physical data type (string, int64, float64, etc.)")
-    # Inferred semantic type (NEW)
+    # Inferred semantic type
     inferred_type: str | None = Field(
         default=None,
         description="Inferred semantic type based on pattern detection "
@@ -285,7 +191,7 @@ class ColumnProfile(BaseSchema):
     max_length: int | None = Field(default=None, description="Maximum string length")
     avg_length: float | None = Field(default=None, description="Average string length")
-    # Pattern detection results (NEW)
+    # Pattern detection results
     patterns: list[DetectedPattern] | None = Field(
         default=None,
         description="Detected data patterns (email, phone, uuid, etc.)",
@@ -313,33 +219,12 @@ class ColumnProfile(BaseSchema):
 # =============================================================================
-# Sampling Metadata for Response
-# =============================================================================
-class SamplingMetadata(BaseSchema):
-    """Metadata about sampling used during profiling."""
-    strategy_used: str = Field(..., description="Sampling strategy that was applied")
-    sample_size: int = Field(..., description="Actual sample size used")
-    total_rows: int = Field(..., description="Total rows in the dataset")
-    sampling_ratio: float = Field(..., description="Ratio of sampled to total rows")
-    seed: int | None = Field(default=None, description="Random seed used (if applicable)")
-    confidence_level: float | None = Field(
-        default=None, description="Confidence level achieved"
-    )
-    margin_of_error: float | None = Field(
-        default=None, description="Estimated margin of error"
-    )
-# =============================================================================
-# Profile Response Schema (Enhanced)
+# Profile Response Schema
 # =============================================================================
 class ProfileResponse(BaseSchema):
-    """Data profiling response with enhanced statistics and pattern detection."""
+    """Data profiling response with statistics."""
     source: str = Field(..., description="Source path/identifier")
     row_count: int = Field(..., ge=0, description="Total number of rows")
@@ -350,19 +235,13 @@ class ProfileResponse(BaseSchema):
         description="Profile for each column",
     )
-    # Sampling metadata (NEW)
-    sampling: SamplingMetadata | None = Field(
-        default=None,
-        description="Information about sampling applied during profiling",
-    )
-    # Pattern detection summary (NEW)
+    # Pattern detection summary
     detected_patterns_summary: dict[str, int] | None = Field(
         default=None,
         description="Summary of detected patterns across all columns {pattern_type: count}",
     )
-    # Profiling metadata (NEW)
+    # Profiling metadata
     profiled_at: str | None = Field(
         default=None,
         description="ISO timestamp when profiling was performed",
@@ -384,67 +263,125 @@ class ProfileResponse(BaseSchema):
         return f"{size:.1f} PB"
     @classmethod
-    def _build_column_profile(cls, col: dict[str, Any]) -> ColumnProfile:
-        """Build a ColumnProfile from column data dict.
+    def _build_column_profile(cls, col: dict[str, Any] | Any) -> ColumnProfile:
+        """Build a ColumnProfile from column data dict or ColumnProfileResult object.
         Args:
-            col: Column data dictionary from adapter or database.
+            col: Column data dictionary from adapter or database, or ColumnProfileResult object.
         Returns:
             ColumnProfile instance with all available fields.
         """
+        # Helper function to get attribute from dict or object
+        def get_val(key: str, default: Any = None) -> Any:
+            if isinstance(col, dict):
+                return col.get(key, default)
+            return getattr(col, key, default)
         # Build patterns list if present
         patterns = None
-        if col.get("patterns"):
+        patterns_data = get_val("patterns") or get_val("detected_patterns")
+        if patterns_data:
             patterns = [
                 DetectedPattern(
-                    pattern_type=p.get("pattern_type", p.get("type", "unknown")),
-                    confidence=p.get("confidence", 0.0),
-                    match_count=p.get("match_count", 0),
-                    match_percentage=p.get("match_percentage", 0.0),
-                    sample_matches=p.get("sample_matches"),
+                    pattern_type=p.get("pattern_type", p.get("type", p.get("pattern", "unknown")))
+                    if isinstance(p, dict)
+                    else getattr(p, "pattern_type", getattr(p, "pattern", "unknown")),
+                    confidence=p.get("confidence", 0.0)
+                    if isinstance(p, dict)
+                    else getattr(p, "confidence", getattr(p, "match_ratio", 0.0)),
+                    match_count=p.get("match_count", 0) if isinstance(p, dict) else getattr(p, "match_count", 0),
+                    match_percentage=p.get("match_percentage", 0.0)
+                    if isinstance(p, dict)
+                    else getattr(p, "match_percentage", getattr(p, "match_ratio", 0.0) * 100),
+                    sample_matches=p.get("sample_matches") if isinstance(p, dict) else getattr(p, "sample_matches", None),
                 )
-                for p in col["patterns"]
+                for p in patterns_data
             ]
         # Build histogram if present
         histogram = None
-        if col.get("histogram"):
+        histogram_data = get_val("histogram")
+        if histogram_data:
             histogram = [
                 HistogramBucket(
-                    bucket=h.get("bucket", ""),
-                    count=h.get("count", 0),
-                    percentage=h.get("percentage", 0.0),
+                    bucket=h.get("bucket", "") if isinstance(h, dict) else getattr(h, "bucket", ""),
+                    count=h.get("count", 0) if isinstance(h, dict) else getattr(h, "count", 0),
+                    percentage=h.get("percentage", 0.0) if isinstance(h, dict) else getattr(h, "percentage", 0.0),
                 )
-                for h in col["histogram"]
+                for h in histogram_data
             ]
+        # Get dtype from dict or object (physical_type for ColumnProfileResult)
+        dtype = get_val("dtype") or get_val("physical_type") or "unknown"
+        # Get null_pct - format from ratio if needed
+        null_pct = get_val("null_pct", "0%")
+        if null_pct == "0%" and get_val("null_ratio") is not None:
+            null_ratio = get_val("null_ratio", 0.0)
+            null_pct = f"{null_ratio * 100:.1f}%"
+        # Get unique_pct - format from ratio if needed
+        unique_pct = get_val("unique_pct", "0%")
+        if unique_pct == "0%" and get_val("unique_ratio") is not None:
+            unique_ratio = get_val("unique_ratio", 0.0)
+            unique_pct = f"{unique_ratio * 100:.1f}%"
+        # Get distribution stats
+        distribution = get_val("distribution")
+        mean = get_val("mean")
+        std = get_val("std")
+        median = get_val("median")
+        q1 = get_val("q1")
+        q3 = get_val("q3")
+        skewness = get_val("skewness")
+        kurtosis = get_val("kurtosis")
+        min_val = get_val("min")
+        max_val = get_val("max")
+        # Extract from distribution dict if present
+        if distribution and isinstance(distribution, dict):
+            mean = mean or distribution.get("mean")
+            std = std or distribution.get("std")
+            median = median or distribution.get("median")
+            q1 = q1 or distribution.get("q1")
+            q3 = q3 or distribution.get("q3")
+            skewness = skewness or distribution.get("skewness")
+            kurtosis = kurtosis or distribution.get("kurtosis")
+            min_val = min_val or distribution.get("min")
+            max_val = max_val or distribution.get("max")
+        # Get most_common from top_values if needed
+        most_common = get_val("most_common")
+        if not most_common and get_val("top_values"):
+            most_common = get_val("top_values")
         return ColumnProfile(
-            name=col["name"],
-            dtype=col["dtype"],
-            inferred_type=col.get("inferred_type"),
-            null_pct=col.get("null_pct", "0%"),
-            null_count=col.get("null_count"),
-            unique_pct=col.get("unique_pct", "0%"),
-            distinct_count=col.get("distinct_count"),
-            is_unique=col.get("is_unique"),
-            min=col.get("min"),
-            max=col.get("max"),
-            mean=col.get("mean"),
-            std=col.get("std"),
-            median=col.get("median"),
-            q1=col.get("q1"),
-            q3=col.get("q3"),
-            skewness=col.get("skewness"),
-            kurtosis=col.get("kurtosis"),
-            min_length=col.get("min_length"),
-            max_length=col.get("max_length"),
-            avg_length=col.get("avg_length"),
+            name=get_val("name"),
+            dtype=dtype,
+            inferred_type=get_val("inferred_type"),
+            null_pct=null_pct,
+            null_count=get_val("null_count"),
+            unique_pct=unique_pct,
+            distinct_count=get_val("distinct_count"),
+            is_unique=get_val("is_unique"),
+            min=min_val,
+            max=max_val,
+            mean=mean,
+            std=std,
+            median=median,
+            q1=q1,
+            q3=q3,
+            skewness=skewness,
+            kurtosis=kurtosis,
+            min_length=get_val("min_length"),
+            max_length=get_val("max_length"),
+            avg_length=get_val("avg_length"),
             patterns=patterns,
-            primary_pattern=col.get("primary_pattern"),
-            most_common=col.get("most_common"),
+            primary_pattern=get_val("primary_pattern"),
+            most_common=most_common,
             histogram=histogram,
-            cardinality_estimate=col.get("cardinality_estimate"),
+            cardinality_estimate=get_val("cardinality_estimate"),
         )
     @classmethod
@@ -464,27 +401,12 @@ class ProfileResponse(BaseSchema):
             columns_data = profile_json.get("columns", [])
             columns = [cls._build_column_profile(col) for col in columns_data]
-            # Build sampling metadata if present
-            sampling = None
-            if profile_json.get("sampling"):
-                s = profile_json["sampling"]
-                sampling = SamplingMetadata(
-                    strategy_used=s.get("strategy_used", "none"),
-                    sample_size=s.get("sample_size", result.row_count or 0),
-                    total_rows=s.get("total_rows", result.row_count or 0),
-                    sampling_ratio=s.get("sampling_ratio", 1.0),
-                    seed=s.get("seed"),
-                    confidence_level=s.get("confidence_level"),
-                    margin_of_error=s.get("margin_of_error"),
-                )
             return cls(
                 source=source_name,
                 row_count=result.row_count or 0,
                 column_count=result.column_count or 0,
                 size_bytes=result.size_bytes or 0,
                 columns=columns,
-                sampling=sampling,
                 detected_patterns_summary=profile_json.get("detected_patterns_summary"),
                 profiled_at=profile_json.get("profiled_at"),
                 profiling_duration_ms=profile_json.get("profiling_duration_ms"),
@@ -493,27 +415,12 @@ class ProfileResponse(BaseSchema):
         # Handle ProfileResult (from adapter)
         columns = [cls._build_column_profile(col) for col in result.columns]
-        # Build sampling metadata if present
-        sampling = None
-        if hasattr(result, "sampling") and result.sampling:
-            s = result.sampling
-            sampling = SamplingMetadata(
-                strategy_used=getattr(s, "strategy_used", "none"),
-                sample_size=getattr(s, "sample_size", result.row_count),
-                total_rows=getattr(s, "total_rows", result.row_count),
-                sampling_ratio=getattr(s, "sampling_ratio", 1.0),
-                seed=getattr(s, "seed", None),
-                confidence_level=getattr(s, "confidence_level", None),
-                margin_of_error=getattr(s, "margin_of_error", None),
-            )
         return cls(
             source=result.source,
             row_count=result.row_count,
             column_count=result.column_count,
             size_bytes=result.size_bytes,
             columns=columns,
-            sampling=sampling,
             detected_patterns_summary=getattr(result, "detected_patterns_summary", None),
             profiled_at=getattr(result, "profiled_at", None),
             profiling_duration_ms=getattr(result, "profiling_duration_ms", None),

truthound-dashboard 1.4.4__py3-none-any.whl → 1.5.0__py3-none-any.whl

truthound-dashboard 1.4.4py3-none-any.whl → 1.5.0py3-none-any.whl