truthound-dashboard 1.4.4__py3-none-any.whl → 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound_dashboard/api/alerts.py +75 -86
- truthound_dashboard/api/anomaly.py +7 -13
- truthound_dashboard/api/cross_alerts.py +38 -52
- truthound_dashboard/api/drift.py +49 -59
- truthound_dashboard/api/drift_monitor.py +234 -79
- truthound_dashboard/api/enterprise_sampling.py +498 -0
- truthound_dashboard/api/history.py +57 -5
- truthound_dashboard/api/lineage.py +3 -48
- truthound_dashboard/api/maintenance.py +104 -49
- truthound_dashboard/api/mask.py +1 -2
- truthound_dashboard/api/middleware.py +2 -1
- truthound_dashboard/api/model_monitoring.py +435 -311
- truthound_dashboard/api/notifications.py +227 -191
- truthound_dashboard/api/notifications_advanced.py +21 -20
- truthound_dashboard/api/observability.py +586 -0
- truthound_dashboard/api/plugins.py +2 -433
- truthound_dashboard/api/profile.py +199 -37
- truthound_dashboard/api/quality_reporter.py +701 -0
- truthound_dashboard/api/reports.py +7 -16
- truthound_dashboard/api/router.py +66 -0
- truthound_dashboard/api/rule_suggestions.py +5 -5
- truthound_dashboard/api/scan.py +17 -19
- truthound_dashboard/api/schedules.py +85 -50
- truthound_dashboard/api/schema_evolution.py +6 -6
- truthound_dashboard/api/schema_watcher.py +667 -0
- truthound_dashboard/api/sources.py +98 -27
- truthound_dashboard/api/tiering.py +1323 -0
- truthound_dashboard/api/triggers.py +14 -11
- truthound_dashboard/api/validations.py +12 -11
- truthound_dashboard/api/versioning.py +1 -6
- truthound_dashboard/core/__init__.py +129 -3
- truthound_dashboard/core/actions/__init__.py +62 -0
- truthound_dashboard/core/actions/custom.py +426 -0
- truthound_dashboard/core/actions/notifications.py +910 -0
- truthound_dashboard/core/actions/storage.py +472 -0
- truthound_dashboard/core/actions/webhook.py +281 -0
- truthound_dashboard/core/anomaly.py +262 -67
- truthound_dashboard/core/anomaly_explainer.py +4 -3
- truthound_dashboard/core/backends/__init__.py +67 -0
- truthound_dashboard/core/backends/base.py +299 -0
- truthound_dashboard/core/backends/errors.py +191 -0
- truthound_dashboard/core/backends/factory.py +423 -0
- truthound_dashboard/core/backends/mock_backend.py +451 -0
- truthound_dashboard/core/backends/truthound_backend.py +718 -0
- truthound_dashboard/core/checkpoint/__init__.py +87 -0
- truthound_dashboard/core/checkpoint/adapters.py +814 -0
- truthound_dashboard/core/checkpoint/checkpoint.py +491 -0
- truthound_dashboard/core/checkpoint/runner.py +270 -0
- truthound_dashboard/core/connections.py +645 -23
- truthound_dashboard/core/converters/__init__.py +14 -0
- truthound_dashboard/core/converters/truthound.py +620 -0
- truthound_dashboard/core/cross_alerts.py +540 -320
- truthound_dashboard/core/datasource_factory.py +1672 -0
- truthound_dashboard/core/drift_monitor.py +216 -20
- truthound_dashboard/core/enterprise_sampling.py +1291 -0
- truthound_dashboard/core/interfaces/__init__.py +225 -0
- truthound_dashboard/core/interfaces/actions.py +652 -0
- truthound_dashboard/core/interfaces/base.py +247 -0
- truthound_dashboard/core/interfaces/checkpoint.py +676 -0
- truthound_dashboard/core/interfaces/protocols.py +664 -0
- truthound_dashboard/core/interfaces/reporters.py +650 -0
- truthound_dashboard/core/interfaces/routing.py +646 -0
- truthound_dashboard/core/interfaces/triggers.py +619 -0
- truthound_dashboard/core/lineage.py +407 -71
- truthound_dashboard/core/model_monitoring.py +431 -3
- truthound_dashboard/core/notifications/base.py +4 -0
- truthound_dashboard/core/notifications/channels.py +501 -1203
- truthound_dashboard/core/notifications/deduplication/__init__.py +81 -115
- truthound_dashboard/core/notifications/deduplication/service.py +131 -348
- truthound_dashboard/core/notifications/dispatcher.py +202 -11
- truthound_dashboard/core/notifications/escalation/__init__.py +119 -106
- truthound_dashboard/core/notifications/escalation/engine.py +168 -358
- truthound_dashboard/core/notifications/routing/__init__.py +88 -128
- truthound_dashboard/core/notifications/routing/engine.py +90 -317
- truthound_dashboard/core/notifications/stats_aggregator.py +246 -1
- truthound_dashboard/core/notifications/throttling/__init__.py +67 -50
- truthound_dashboard/core/notifications/throttling/builder.py +117 -255
- truthound_dashboard/core/notifications/truthound_adapter.py +842 -0
- truthound_dashboard/core/phase5/collaboration.py +1 -1
- truthound_dashboard/core/plugins/lifecycle/__init__.py +0 -13
- truthound_dashboard/core/quality_reporter.py +1359 -0
- truthound_dashboard/core/report_history.py +0 -6
- truthound_dashboard/core/reporters/__init__.py +175 -14
- truthound_dashboard/core/reporters/adapters.py +943 -0
- truthound_dashboard/core/reporters/base.py +0 -3
- truthound_dashboard/core/reporters/builtin/__init__.py +18 -0
- truthound_dashboard/core/reporters/builtin/csv_reporter.py +111 -0
- truthound_dashboard/core/reporters/builtin/html_reporter.py +270 -0
- truthound_dashboard/core/reporters/builtin/json_reporter.py +127 -0
- truthound_dashboard/core/reporters/compat.py +266 -0
- truthound_dashboard/core/reporters/csv_reporter.py +2 -35
- truthound_dashboard/core/reporters/factory.py +526 -0
- truthound_dashboard/core/reporters/interfaces.py +745 -0
- truthound_dashboard/core/reporters/registry.py +1 -10
- truthound_dashboard/core/scheduler.py +165 -0
- truthound_dashboard/core/schema_evolution.py +3 -3
- truthound_dashboard/core/schema_watcher.py +1528 -0
- truthound_dashboard/core/services.py +595 -76
- truthound_dashboard/core/store_manager.py +810 -0
- truthound_dashboard/core/streaming_anomaly.py +169 -4
- truthound_dashboard/core/tiering.py +1309 -0
- truthound_dashboard/core/triggers/evaluators.py +178 -8
- truthound_dashboard/core/truthound_adapter.py +2620 -197
- truthound_dashboard/core/unified_alerts.py +23 -20
- truthound_dashboard/db/__init__.py +8 -0
- truthound_dashboard/db/database.py +8 -2
- truthound_dashboard/db/models.py +944 -25
- truthound_dashboard/db/repository.py +2 -0
- truthound_dashboard/main.py +15 -0
- truthound_dashboard/schemas/__init__.py +177 -16
- truthound_dashboard/schemas/base.py +44 -23
- truthound_dashboard/schemas/collaboration.py +19 -6
- truthound_dashboard/schemas/cross_alerts.py +19 -3
- truthound_dashboard/schemas/drift.py +61 -55
- truthound_dashboard/schemas/drift_monitor.py +67 -23
- truthound_dashboard/schemas/enterprise_sampling.py +653 -0
- truthound_dashboard/schemas/lineage.py +0 -33
- truthound_dashboard/schemas/mask.py +10 -8
- truthound_dashboard/schemas/model_monitoring.py +89 -10
- truthound_dashboard/schemas/notifications_advanced.py +13 -0
- truthound_dashboard/schemas/observability.py +453 -0
- truthound_dashboard/schemas/plugins.py +0 -280
- truthound_dashboard/schemas/profile.py +154 -247
- truthound_dashboard/schemas/quality_reporter.py +403 -0
- truthound_dashboard/schemas/reports.py +2 -2
- truthound_dashboard/schemas/rule_suggestion.py +8 -1
- truthound_dashboard/schemas/scan.py +4 -24
- truthound_dashboard/schemas/schedule.py +11 -3
- truthound_dashboard/schemas/schema_watcher.py +727 -0
- truthound_dashboard/schemas/source.py +17 -2
- truthound_dashboard/schemas/tiering.py +822 -0
- truthound_dashboard/schemas/triggers.py +16 -0
- truthound_dashboard/schemas/unified_alerts.py +7 -0
- truthound_dashboard/schemas/validation.py +0 -13
- truthound_dashboard/schemas/validators/base.py +41 -21
- truthound_dashboard/schemas/validators/business_rule_validators.py +244 -0
- truthound_dashboard/schemas/validators/localization_validators.py +273 -0
- truthound_dashboard/schemas/validators/ml_feature_validators.py +308 -0
- truthound_dashboard/schemas/validators/profiling_validators.py +275 -0
- truthound_dashboard/schemas/validators/referential_validators.py +312 -0
- truthound_dashboard/schemas/validators/registry.py +93 -8
- truthound_dashboard/schemas/validators/timeseries_validators.py +389 -0
- truthound_dashboard/schemas/versioning.py +1 -6
- truthound_dashboard/static/index.html +2 -2
- truthound_dashboard-1.5.1.dist-info/METADATA +312 -0
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/RECORD +149 -148
- truthound_dashboard/core/plugins/hooks/__init__.py +0 -63
- truthound_dashboard/core/plugins/hooks/decorators.py +0 -367
- truthound_dashboard/core/plugins/hooks/manager.py +0 -403
- truthound_dashboard/core/plugins/hooks/protocols.py +0 -265
- truthound_dashboard/core/plugins/lifecycle/hot_reload.py +0 -584
- truthound_dashboard/core/reporters/junit_reporter.py +0 -233
- truthound_dashboard/core/reporters/markdown_reporter.py +0 -207
- truthound_dashboard/core/reporters/pdf_reporter.py +0 -209
- truthound_dashboard/static/assets/_baseUniq-BcrSP13d.js +0 -1
- truthound_dashboard/static/assets/arc-DlYjKwIL.js +0 -1
- truthound_dashboard/static/assets/architectureDiagram-VXUJARFQ-Bb2drbQM.js +0 -36
- truthound_dashboard/static/assets/blockDiagram-VD42YOAC-BlsPG1CH.js +0 -122
- truthound_dashboard/static/assets/c4Diagram-YG6GDRKO-B9JdUoaC.js +0 -10
- truthound_dashboard/static/assets/channel-Q6mHF1Hd.js +0 -1
- truthound_dashboard/static/assets/chunk-4BX2VUAB-DmyoPVuJ.js +0 -1
- truthound_dashboard/static/assets/chunk-55IACEB6-Bcz6Siv8.js +0 -1
- truthound_dashboard/static/assets/chunk-B4BG7PRW-Br3G5Rum.js +0 -165
- truthound_dashboard/static/assets/chunk-DI55MBZ5-DuM9c23u.js +0 -220
- truthound_dashboard/static/assets/chunk-FMBD7UC4-DNU-5mvT.js +0 -15
- truthound_dashboard/static/assets/chunk-QN33PNHL-Im2yNcmS.js +0 -1
- truthound_dashboard/static/assets/chunk-QZHKN3VN-kZr8XFm1.js +0 -1
- truthound_dashboard/static/assets/chunk-TZMSLE5B-Q__360q_.js +0 -1
- truthound_dashboard/static/assets/classDiagram-2ON5EDUG-vtixxUyK.js +0 -1
- truthound_dashboard/static/assets/classDiagram-v2-WZHVMYZB-vtixxUyK.js +0 -1
- truthound_dashboard/static/assets/clone-BOt2LwD0.js +0 -1
- truthound_dashboard/static/assets/cose-bilkent-S5V4N54A-CBDw6iac.js +0 -1
- truthound_dashboard/static/assets/dagre-6UL2VRFP-XdKqmmY9.js +0 -4
- truthound_dashboard/static/assets/diagram-PSM6KHXK-DAZ8nx9V.js +0 -24
- truthound_dashboard/static/assets/diagram-QEK2KX5R-BRvDTbGD.js +0 -43
- truthound_dashboard/static/assets/diagram-S2PKOQOG-bQcczUkl.js +0 -24
- truthound_dashboard/static/assets/erDiagram-Q2GNP2WA-DPje7VMN.js +0 -60
- truthound_dashboard/static/assets/flowDiagram-NV44I4VS-B7BVtFVS.js +0 -162
- truthound_dashboard/static/assets/ganttDiagram-JELNMOA3-D6WKSS7U.js +0 -267
- truthound_dashboard/static/assets/gitGraphDiagram-NY62KEGX-D3vtVd3y.js +0 -65
- truthound_dashboard/static/assets/graph-BKgNKZVp.js +0 -1
- truthound_dashboard/static/assets/index-C6JSrkHo.css +0 -1
- truthound_dashboard/static/assets/index-DkU82VsU.js +0 -1800
- truthound_dashboard/static/assets/infoDiagram-WHAUD3N6-DnNCT429.js +0 -2
- truthound_dashboard/static/assets/journeyDiagram-XKPGCS4Q-DGiMozqS.js +0 -139
- truthound_dashboard/static/assets/kanban-definition-3W4ZIXB7-BV2gUgli.js +0 -89
- truthound_dashboard/static/assets/katex-Cu_Erd72.js +0 -261
- truthound_dashboard/static/assets/layout-DI2MfQ5G.js +0 -1
- truthound_dashboard/static/assets/min-DYdgXVcT.js +0 -1
- truthound_dashboard/static/assets/mindmap-definition-VGOIOE7T-C7x4ruxz.js +0 -68
- truthound_dashboard/static/assets/pieDiagram-ADFJNKIX-CAJaAB9f.js +0 -30
- truthound_dashboard/static/assets/quadrantDiagram-AYHSOK5B-DeqwDI46.js +0 -7
- truthound_dashboard/static/assets/requirementDiagram-UZGBJVZJ-e3XDpZIM.js +0 -64
- truthound_dashboard/static/assets/sankeyDiagram-TZEHDZUN-CNnAv5Ux.js +0 -10
- truthound_dashboard/static/assets/sequenceDiagram-WL72ISMW-Dsne-Of3.js +0 -145
- truthound_dashboard/static/assets/stateDiagram-FKZM4ZOC-Ee0sQXyb.js +0 -1
- truthound_dashboard/static/assets/stateDiagram-v2-4FDKWEC3-B26KqW_W.js +0 -1
- truthound_dashboard/static/assets/timeline-definition-IT6M3QCI-DZYi2yl3.js +0 -61
- truthound_dashboard/static/assets/treemap-KMMF4GRG-CY3f8In2.js +0 -128
- truthound_dashboard/static/assets/unmerged_dictionaries-Dd7xcPWG.js +0 -1
- truthound_dashboard/static/assets/xychartDiagram-PRI3JC2R-CS7fydZZ.js +0 -7
- truthound_dashboard-1.4.4.dist-info/METADATA +0 -507
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/WHEEL +0 -0
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/entry_points.txt +0 -0
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,718 @@
|
|
|
1
|
+
"""Truthound backend implementation.
|
|
2
|
+
|
|
3
|
+
This module provides the concrete implementation of the data quality
|
|
4
|
+
backend using the truthound library. All truthound imports are isolated
|
|
5
|
+
here with lazy loading for better independence.
|
|
6
|
+
|
|
7
|
+
Updated for truthound 2.x API:
|
|
8
|
+
- Uses truthound.datasources.get_datasource() for auto-detection
|
|
9
|
+
- Supports both old and new import paths for backward compatibility
|
|
10
|
+
- Uses DataSourceCapability for feature detection
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import logging
|
|
16
|
+
from functools import partial
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
from truthound_dashboard.core.converters import TruthoundResultConverter
|
|
20
|
+
from truthound_dashboard.core.interfaces import DataInput, DataSourceCapability
|
|
21
|
+
from truthound_dashboard.core.truthound_adapter import (
|
|
22
|
+
CheckResult,
|
|
23
|
+
ColumnProfileResult,
|
|
24
|
+
CompareResult,
|
|
25
|
+
GenerateSuiteResult,
|
|
26
|
+
LearnResult,
|
|
27
|
+
MaskResult,
|
|
28
|
+
ProfileResult,
|
|
29
|
+
ScanResult,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
from .base import BaseDataQualityBackend
|
|
33
|
+
from .errors import BackendOperationError, BackendUnavailableError
|
|
34
|
+
|
|
35
|
+
logger = logging.getLogger(__name__)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class TruthoundBackend(BaseDataQualityBackend):
|
|
39
|
+
"""Truthound-based data quality backend.
|
|
40
|
+
|
|
41
|
+
This backend uses the truthound library for all data quality operations.
|
|
42
|
+
Truthound imports are lazy-loaded to allow the dashboard to start
|
|
43
|
+
even if truthound is not installed (for testing or limited functionality).
|
|
44
|
+
|
|
45
|
+
Example:
|
|
46
|
+
backend = TruthoundBackend()
|
|
47
|
+
if backend.is_available():
|
|
48
|
+
result = await backend.check("data.csv")
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(self, max_workers: int = 4) -> None:
|
|
52
|
+
"""Initialize truthound backend.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
max_workers: Maximum worker threads for async operations.
|
|
56
|
+
"""
|
|
57
|
+
super().__init__(max_workers=max_workers)
|
|
58
|
+
self._th = None # Lazy-loaded truthound module
|
|
59
|
+
self._converter = TruthoundResultConverter()
|
|
60
|
+
|
|
61
|
+
def _get_truthound(self):
|
|
62
|
+
"""Get truthound module with lazy loading.
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
Truthound module.
|
|
66
|
+
|
|
67
|
+
Raises:
|
|
68
|
+
BackendUnavailableError: If truthound is not installed.
|
|
69
|
+
"""
|
|
70
|
+
if self._th is None:
|
|
71
|
+
try:
|
|
72
|
+
import truthound as th
|
|
73
|
+
self._th = th
|
|
74
|
+
except ImportError as e:
|
|
75
|
+
raise BackendUnavailableError(
|
|
76
|
+
"truthound",
|
|
77
|
+
"Library not installed. Install with: pip install truthound"
|
|
78
|
+
) from e
|
|
79
|
+
return self._th
|
|
80
|
+
|
|
81
|
+
def is_available(self) -> bool:
|
|
82
|
+
"""Check if truthound is available.
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
True if truthound is installed and importable.
|
|
86
|
+
"""
|
|
87
|
+
try:
|
|
88
|
+
import truthound
|
|
89
|
+
return True
|
|
90
|
+
except ImportError:
|
|
91
|
+
return False
|
|
92
|
+
|
|
93
|
+
def get_version(self) -> str | None:
|
|
94
|
+
"""Get truthound version.
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
Truthound version string or None if not available.
|
|
98
|
+
"""
|
|
99
|
+
try:
|
|
100
|
+
import truthound
|
|
101
|
+
return getattr(truthound, "__version__", None)
|
|
102
|
+
except ImportError:
|
|
103
|
+
return None
|
|
104
|
+
|
|
105
|
+
def _resolve_data_input(self, data: DataInput) -> Any:
|
|
106
|
+
"""Resolve DataInput to a format truthound can process.
|
|
107
|
+
|
|
108
|
+
Truthound 2.x accepts DataSource objects directly, so we try to
|
|
109
|
+
pass them through. For backward compatibility, we also support
|
|
110
|
+
extracting LazyFrames from DataSource objects.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
data: File path string, DataSource object, or DataFrame.
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
File path string, DataSource, or DataFrame that truthound can process.
|
|
117
|
+
"""
|
|
118
|
+
if isinstance(data, str):
|
|
119
|
+
return data
|
|
120
|
+
|
|
121
|
+
# Check if it's a truthound DataSource (new API)
|
|
122
|
+
# These should be passed directly to truthound functions
|
|
123
|
+
if hasattr(data, "capabilities"):
|
|
124
|
+
# It's likely a truthound 2.x DataSource
|
|
125
|
+
return data
|
|
126
|
+
|
|
127
|
+
# Check if it's a DataSource with to_polars_lazyframe method (legacy)
|
|
128
|
+
if hasattr(data, "to_polars_lazyframe"):
|
|
129
|
+
try:
|
|
130
|
+
return data.to_polars_lazyframe()
|
|
131
|
+
except Exception:
|
|
132
|
+
# If extraction fails, try passing the object directly
|
|
133
|
+
return data
|
|
134
|
+
|
|
135
|
+
# If it's already a LazyFrame or DataFrame, return as-is
|
|
136
|
+
return data
|
|
137
|
+
|
|
138
|
+
def _get_source_capabilities(self, data: DataInput) -> set[str]:
|
|
139
|
+
"""Get capabilities from a data source if available.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
data: DataInput object.
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
Set of capability names, or empty set if not available.
|
|
146
|
+
"""
|
|
147
|
+
if hasattr(data, "capabilities"):
|
|
148
|
+
try:
|
|
149
|
+
capabilities = data.capabilities
|
|
150
|
+
return {c.name for c in capabilities}
|
|
151
|
+
except Exception:
|
|
152
|
+
pass
|
|
153
|
+
return set()
|
|
154
|
+
|
|
155
|
+
async def check(
|
|
156
|
+
self,
|
|
157
|
+
data: DataInput,
|
|
158
|
+
*,
|
|
159
|
+
validators: list[str] | None = None,
|
|
160
|
+
validator_config: dict[str, dict[str, Any]] | None = None,
|
|
161
|
+
schema: str | None = None,
|
|
162
|
+
auto_schema: bool = False,
|
|
163
|
+
columns: list[str] | None = None,
|
|
164
|
+
min_severity: str | None = None,
|
|
165
|
+
strict: bool = False,
|
|
166
|
+
parallel: bool = False,
|
|
167
|
+
max_workers: int | None = None,
|
|
168
|
+
pushdown: bool | None = None,
|
|
169
|
+
) -> CheckResult:
|
|
170
|
+
"""Run data validation using truthound.
|
|
171
|
+
|
|
172
|
+
Updated for truthound 2.x API:
|
|
173
|
+
- Supports passing DataSource objects directly via 'source' parameter
|
|
174
|
+
- Falls back to 'data' parameter for file paths and DataFrames
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
data: File path, DataSource object, or DataFrame.
|
|
178
|
+
validators: List of validator names to run.
|
|
179
|
+
validator_config: Per-validator configuration.
|
|
180
|
+
schema: Path to schema YAML file.
|
|
181
|
+
auto_schema: Auto-learn schema for validation.
|
|
182
|
+
columns: Columns to validate.
|
|
183
|
+
min_severity: Minimum severity to report.
|
|
184
|
+
strict: Raise exception on failures.
|
|
185
|
+
parallel: Use parallel execution.
|
|
186
|
+
max_workers: Max threads for parallel.
|
|
187
|
+
pushdown: Enable query pushdown. If None, auto-detect from source capabilities.
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
CheckResult with validation results.
|
|
191
|
+
"""
|
|
192
|
+
th = self._get_truthound()
|
|
193
|
+
|
|
194
|
+
# Resolve DataSource
|
|
195
|
+
resolved_data = self._resolve_data_input(data)
|
|
196
|
+
|
|
197
|
+
# Build kwargs
|
|
198
|
+
kwargs: dict[str, Any] = {}
|
|
199
|
+
|
|
200
|
+
# Truthound 2.x prefers 'source' for DataSource objects
|
|
201
|
+
# but also accepts 'data' for backward compatibility
|
|
202
|
+
if hasattr(resolved_data, "capabilities"):
|
|
203
|
+
# It's a truthound 2.x DataSource, use 'source' parameter
|
|
204
|
+
kwargs["source"] = resolved_data
|
|
205
|
+
|
|
206
|
+
# Auto-enable pushdown if source supports it and not explicitly set
|
|
207
|
+
if pushdown is None:
|
|
208
|
+
source_caps = self._get_source_capabilities(resolved_data)
|
|
209
|
+
if "SQL_PUSHDOWN" in source_caps:
|
|
210
|
+
pushdown = True
|
|
211
|
+
else:
|
|
212
|
+
# File path or DataFrame, use 'data' parameter
|
|
213
|
+
kwargs["data"] = resolved_data
|
|
214
|
+
|
|
215
|
+
kwargs.update({
|
|
216
|
+
"validators": validators,
|
|
217
|
+
"schema": schema,
|
|
218
|
+
"auto_schema": auto_schema,
|
|
219
|
+
"parallel": parallel,
|
|
220
|
+
})
|
|
221
|
+
|
|
222
|
+
if validator_config:
|
|
223
|
+
kwargs["validator_config"] = validator_config
|
|
224
|
+
if columns is not None:
|
|
225
|
+
kwargs["columns"] = columns
|
|
226
|
+
if min_severity is not None:
|
|
227
|
+
kwargs["min_severity"] = min_severity
|
|
228
|
+
if strict:
|
|
229
|
+
kwargs["strict"] = strict
|
|
230
|
+
if max_workers is not None:
|
|
231
|
+
kwargs["max_workers"] = max_workers
|
|
232
|
+
if pushdown is not None:
|
|
233
|
+
kwargs["pushdown"] = pushdown
|
|
234
|
+
|
|
235
|
+
try:
|
|
236
|
+
func = partial(th.check, **kwargs)
|
|
237
|
+
result = await self._run_in_executor(func)
|
|
238
|
+
return self._convert_check_result(result)
|
|
239
|
+
except Exception as e:
|
|
240
|
+
if "truthound" in str(type(e).__module__):
|
|
241
|
+
raise BackendOperationError(
|
|
242
|
+
"truthound", "check", str(e), original_error=e
|
|
243
|
+
) from e
|
|
244
|
+
raise
|
|
245
|
+
|
|
246
|
+
async def learn(
|
|
247
|
+
self,
|
|
248
|
+
source: DataInput,
|
|
249
|
+
*,
|
|
250
|
+
infer_constraints: bool = True,
|
|
251
|
+
categorical_threshold: int | None = None,
|
|
252
|
+
sample_size: int | None = None,
|
|
253
|
+
) -> LearnResult:
|
|
254
|
+
"""Learn schema from data using truthound.
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
source: File path or DataSource object.
|
|
258
|
+
infer_constraints: Infer constraints from statistics.
|
|
259
|
+
categorical_threshold: Max unique values for categorical.
|
|
260
|
+
sample_size: Number of rows to sample.
|
|
261
|
+
|
|
262
|
+
Returns:
|
|
263
|
+
LearnResult with schema information.
|
|
264
|
+
"""
|
|
265
|
+
th = self._get_truthound()
|
|
266
|
+
|
|
267
|
+
# Resolve DataSource to LazyFrame if needed
|
|
268
|
+
resolved_source = self._resolve_data_input(source)
|
|
269
|
+
|
|
270
|
+
kwargs: dict[str, Any] = {"infer_constraints": infer_constraints}
|
|
271
|
+
if categorical_threshold is not None:
|
|
272
|
+
kwargs["categorical_threshold"] = categorical_threshold
|
|
273
|
+
if sample_size is not None:
|
|
274
|
+
kwargs["sample_size"] = sample_size
|
|
275
|
+
|
|
276
|
+
try:
|
|
277
|
+
func = partial(th.learn, resolved_source, **kwargs)
|
|
278
|
+
result = await self._run_in_executor(func)
|
|
279
|
+
return self._convert_learn_result(result)
|
|
280
|
+
except Exception as e:
|
|
281
|
+
if "truthound" in str(type(e).__module__):
|
|
282
|
+
raise BackendOperationError(
|
|
283
|
+
"truthound", "learn", str(e), original_error=e
|
|
284
|
+
) from e
|
|
285
|
+
raise
|
|
286
|
+
|
|
287
|
+
async def profile(
|
|
288
|
+
self,
|
|
289
|
+
source: DataInput,
|
|
290
|
+
*,
|
|
291
|
+
sample_size: int | None = None,
|
|
292
|
+
include_patterns: bool = True,
|
|
293
|
+
include_correlations: bool = False,
|
|
294
|
+
include_distributions: bool = True,
|
|
295
|
+
top_n_values: int = 10,
|
|
296
|
+
pattern_sample_size: int = 1000,
|
|
297
|
+
correlation_threshold: float = 0.7,
|
|
298
|
+
min_pattern_match_ratio: float = 0.8,
|
|
299
|
+
n_jobs: int = 1,
|
|
300
|
+
) -> ProfileResult:
|
|
301
|
+
"""Run data profiling using truthound.
|
|
302
|
+
|
|
303
|
+
Args:
|
|
304
|
+
source: File path or DataSource object.
|
|
305
|
+
sample_size: Max rows to sample.
|
|
306
|
+
include_patterns: Enable pattern detection.
|
|
307
|
+
include_correlations: Calculate correlations.
|
|
308
|
+
include_distributions: Include distribution stats.
|
|
309
|
+
top_n_values: Top/bottom values per column.
|
|
310
|
+
pattern_sample_size: Sample size for pattern matching.
|
|
311
|
+
correlation_threshold: Minimum correlation to report.
|
|
312
|
+
min_pattern_match_ratio: Minimum pattern match ratio.
|
|
313
|
+
n_jobs: Number of parallel jobs.
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
ProfileResult with profiling information.
|
|
317
|
+
"""
|
|
318
|
+
# Resolve DataSource to LazyFrame if needed
|
|
319
|
+
resolved_source = self._resolve_data_input(source)
|
|
320
|
+
|
|
321
|
+
# Use th.profile() API which handles file paths and DataFrames
|
|
322
|
+
# Note: th.profile() doesn't support advanced ProfilerConfig options,
|
|
323
|
+
# those are only available via DataProfiler with LazyFrame input.
|
|
324
|
+
# See: .truthound_docs/python-api/core-functions.md
|
|
325
|
+
th = self._get_truthound()
|
|
326
|
+
|
|
327
|
+
func = partial(th.profile, resolved_source)
|
|
328
|
+
result = await self._run_in_executor(func)
|
|
329
|
+
return self._convert_profile_result(result)
|
|
330
|
+
|
|
331
|
+
async def compare(
|
|
332
|
+
self,
|
|
333
|
+
baseline: DataInput,
|
|
334
|
+
current: DataInput,
|
|
335
|
+
*,
|
|
336
|
+
columns: list[str] | None = None,
|
|
337
|
+
method: str = "auto",
|
|
338
|
+
threshold: float | None = None,
|
|
339
|
+
sample_size: int | None = None,
|
|
340
|
+
) -> CompareResult:
|
|
341
|
+
"""Compare datasets for drift detection using truthound.
|
|
342
|
+
|
|
343
|
+
Args:
|
|
344
|
+
baseline: Reference data.
|
|
345
|
+
current: Current data to compare.
|
|
346
|
+
columns: Columns to compare.
|
|
347
|
+
method: Detection method.
|
|
348
|
+
threshold: Drift threshold.
|
|
349
|
+
sample_size: Sample size for large datasets.
|
|
350
|
+
|
|
351
|
+
Returns:
|
|
352
|
+
CompareResult with drift results.
|
|
353
|
+
"""
|
|
354
|
+
th = self._get_truthound()
|
|
355
|
+
|
|
356
|
+
# Resolve DataSource inputs to LazyFrame if needed
|
|
357
|
+
resolved_baseline = self._resolve_data_input(baseline)
|
|
358
|
+
resolved_current = self._resolve_data_input(current)
|
|
359
|
+
|
|
360
|
+
kwargs: dict[str, Any] = {
|
|
361
|
+
"columns": columns,
|
|
362
|
+
"method": method,
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
if threshold is not None:
|
|
366
|
+
kwargs["threshold"] = threshold
|
|
367
|
+
if sample_size is not None:
|
|
368
|
+
kwargs["sample_size"] = sample_size
|
|
369
|
+
|
|
370
|
+
try:
|
|
371
|
+
func = partial(th.compare, resolved_baseline, resolved_current, **kwargs)
|
|
372
|
+
result = await self._run_in_executor(func)
|
|
373
|
+
return self._convert_compare_result(result)
|
|
374
|
+
except Exception as e:
|
|
375
|
+
if "truthound" in str(type(e).__module__):
|
|
376
|
+
raise BackendOperationError(
|
|
377
|
+
"truthound", "compare", str(e), original_error=e
|
|
378
|
+
) from e
|
|
379
|
+
raise
|
|
380
|
+
|
|
381
|
+
async def scan(
|
|
382
|
+
self,
|
|
383
|
+
data: DataInput,
|
|
384
|
+
*,
|
|
385
|
+
columns: list[str] | None = None,
|
|
386
|
+
regulations: list[str] | None = None,
|
|
387
|
+
min_confidence: float = 0.8,
|
|
388
|
+
) -> ScanResult:
|
|
389
|
+
"""Scan for PII using truthound.
|
|
390
|
+
|
|
391
|
+
Args:
|
|
392
|
+
data: File path or DataSource object.
|
|
393
|
+
columns: Columns to scan.
|
|
394
|
+
regulations: Regulations to check.
|
|
395
|
+
min_confidence: Minimum PII confidence.
|
|
396
|
+
|
|
397
|
+
Returns:
|
|
398
|
+
ScanResult with PII findings.
|
|
399
|
+
"""
|
|
400
|
+
th = self._get_truthound()
|
|
401
|
+
|
|
402
|
+
# Resolve DataSource to LazyFrame if needed
|
|
403
|
+
resolved_data = self._resolve_data_input(data)
|
|
404
|
+
|
|
405
|
+
# Note: truthound's th.scan() does not support min_confidence, columns,
|
|
406
|
+
# or regulations parameters. We filter results after scanning.
|
|
407
|
+
# See: .truthound_docs/python-api/core-functions.md
|
|
408
|
+
|
|
409
|
+
try:
|
|
410
|
+
func = partial(th.scan, resolved_data)
|
|
411
|
+
result = await self._run_in_executor(func)
|
|
412
|
+
return self._convert_scan_result(
|
|
413
|
+
result,
|
|
414
|
+
min_confidence=min_confidence,
|
|
415
|
+
columns=columns,
|
|
416
|
+
regulations=regulations,
|
|
417
|
+
)
|
|
418
|
+
except Exception as e:
|
|
419
|
+
if "truthound" in str(type(e).__module__):
|
|
420
|
+
raise BackendOperationError(
|
|
421
|
+
"truthound", "scan", str(e), original_error=e
|
|
422
|
+
) from e
|
|
423
|
+
raise
|
|
424
|
+
|
|
425
|
+
async def mask(
|
|
426
|
+
self,
|
|
427
|
+
data: DataInput,
|
|
428
|
+
output: str,
|
|
429
|
+
*,
|
|
430
|
+
columns: list[str] | None = None,
|
|
431
|
+
strategy: str = "redact",
|
|
432
|
+
) -> MaskResult:
|
|
433
|
+
"""Mask sensitive data using truthound.
|
|
434
|
+
|
|
435
|
+
Args:
|
|
436
|
+
data: File path or DataSource object.
|
|
437
|
+
output: Output file path.
|
|
438
|
+
columns: Columns to mask.
|
|
439
|
+
strategy: Masking strategy.
|
|
440
|
+
|
|
441
|
+
Returns:
|
|
442
|
+
MaskResult with masking details.
|
|
443
|
+
"""
|
|
444
|
+
th = self._get_truthound()
|
|
445
|
+
|
|
446
|
+
# Resolve DataSource to LazyFrame if needed
|
|
447
|
+
resolved_data = self._resolve_data_input(data)
|
|
448
|
+
|
|
449
|
+
if strategy not in ("redact", "hash", "fake"):
|
|
450
|
+
raise ValueError(
|
|
451
|
+
f"Invalid strategy: {strategy}. Use 'redact', 'hash', or 'fake'."
|
|
452
|
+
)
|
|
453
|
+
|
|
454
|
+
kwargs: dict[str, Any] = {
|
|
455
|
+
"strategy": strategy,
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
if columns is not None:
|
|
459
|
+
kwargs["columns"] = columns
|
|
460
|
+
|
|
461
|
+
try:
|
|
462
|
+
func = partial(th.mask, resolved_data, **kwargs)
|
|
463
|
+
masked_df = await self._run_in_executor(func)
|
|
464
|
+
return self._convert_mask_result(data, output, masked_df, strategy, columns)
|
|
465
|
+
except Exception as e:
|
|
466
|
+
if "truthound" in str(type(e).__module__):
|
|
467
|
+
raise BackendOperationError(
|
|
468
|
+
"truthound", "mask", str(e), original_error=e
|
|
469
|
+
) from e
|
|
470
|
+
raise
|
|
471
|
+
|
|
472
|
+
async def generate_suite(
|
|
473
|
+
self,
|
|
474
|
+
profile: ProfileResult | dict[str, Any],
|
|
475
|
+
*,
|
|
476
|
+
strictness: str = "medium",
|
|
477
|
+
preset: str = "default",
|
|
478
|
+
include: list[str] | None = None,
|
|
479
|
+
exclude: list[str] | None = None,
|
|
480
|
+
output_format: str = "yaml",
|
|
481
|
+
) -> GenerateSuiteResult:
|
|
482
|
+
"""Generate validation suite from profile using truthound.
|
|
483
|
+
|
|
484
|
+
Args:
|
|
485
|
+
profile: Profile result or dictionary.
|
|
486
|
+
strictness: Rule strictness level.
|
|
487
|
+
preset: Rule generation preset.
|
|
488
|
+
include: Rule categories to include.
|
|
489
|
+
exclude: Rule categories to exclude.
|
|
490
|
+
output_format: Output format.
|
|
491
|
+
|
|
492
|
+
Returns:
|
|
493
|
+
GenerateSuiteResult with generated rules.
|
|
494
|
+
"""
|
|
495
|
+
from truthound.profiler import generate_suite
|
|
496
|
+
from truthound.profiler.generators import Strictness
|
|
497
|
+
|
|
498
|
+
strictness_map = {
|
|
499
|
+
"loose": Strictness.LOOSE,
|
|
500
|
+
"medium": Strictness.MEDIUM,
|
|
501
|
+
"strict": Strictness.STRICT,
|
|
502
|
+
}
|
|
503
|
+
strictness_enum = strictness_map.get(strictness.lower(), Strictness.MEDIUM)
|
|
504
|
+
|
|
505
|
+
if isinstance(profile, ProfileResult):
|
|
506
|
+
profile_data = profile.to_dict()
|
|
507
|
+
else:
|
|
508
|
+
profile_data = profile
|
|
509
|
+
|
|
510
|
+
kwargs: dict[str, Any] = {
|
|
511
|
+
"strictness": strictness_enum,
|
|
512
|
+
"preset": preset,
|
|
513
|
+
}
|
|
514
|
+
if include:
|
|
515
|
+
kwargs["include"] = include
|
|
516
|
+
if exclude:
|
|
517
|
+
kwargs["exclude"] = exclude
|
|
518
|
+
|
|
519
|
+
def _generate():
|
|
520
|
+
return generate_suite(profile_data, **kwargs)
|
|
521
|
+
|
|
522
|
+
suite = await self._run_in_executor(_generate)
|
|
523
|
+
return self._convert_suite_result(suite, strictness, output_format)
|
|
524
|
+
|
|
525
|
+
# =========================================================================
|
|
526
|
+
# Result Conversion Methods
|
|
527
|
+
# =========================================================================
|
|
528
|
+
|
|
529
|
+
def _convert_check_result(self, result: Any) -> CheckResult:
|
|
530
|
+
"""Convert truthound Report to CheckResult."""
|
|
531
|
+
data = self._converter.convert_check_result(result)
|
|
532
|
+
return CheckResult(
|
|
533
|
+
passed=data["passed"],
|
|
534
|
+
has_critical=data["has_critical"],
|
|
535
|
+
has_high=data["has_high"],
|
|
536
|
+
total_issues=data["total_issues"],
|
|
537
|
+
critical_issues=data["critical_issues"],
|
|
538
|
+
high_issues=data["high_issues"],
|
|
539
|
+
medium_issues=data["medium_issues"],
|
|
540
|
+
low_issues=data["low_issues"],
|
|
541
|
+
source=data["source"],
|
|
542
|
+
row_count=data["row_count"],
|
|
543
|
+
column_count=data["column_count"],
|
|
544
|
+
issues=data["issues"],
|
|
545
|
+
)
|
|
546
|
+
|
|
547
|
+
def _convert_learn_result(self, result: Any) -> LearnResult:
|
|
548
|
+
"""Convert truthound Schema to LearnResult."""
|
|
549
|
+
data = self._converter.convert_learn_result(result)
|
|
550
|
+
return LearnResult(
|
|
551
|
+
schema=data["schema"],
|
|
552
|
+
schema_yaml=data["schema_yaml"],
|
|
553
|
+
row_count=data["row_count"],
|
|
554
|
+
column_count=data["column_count"],
|
|
555
|
+
columns=data["columns"],
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
def _convert_profile_result(self, result: Any) -> ProfileResult:
|
|
559
|
+
"""Convert truthound TableProfile to ProfileResult."""
|
|
560
|
+
data = self._converter.convert_profile_result(result)
|
|
561
|
+
|
|
562
|
+
columns = [
|
|
563
|
+
ColumnProfileResult(
|
|
564
|
+
name=col["name"],
|
|
565
|
+
physical_type=col["physical_type"],
|
|
566
|
+
inferred_type=col.get("inferred_type", "unknown"),
|
|
567
|
+
row_count=col.get("row_count", 0),
|
|
568
|
+
null_count=col.get("null_count", 0),
|
|
569
|
+
null_ratio=col.get("null_ratio", 0.0),
|
|
570
|
+
empty_string_count=col.get("empty_string_count", 0),
|
|
571
|
+
distinct_count=col.get("distinct_count", 0),
|
|
572
|
+
unique_ratio=col.get("unique_ratio", 0.0),
|
|
573
|
+
is_unique=col.get("is_unique", False),
|
|
574
|
+
is_constant=col.get("is_constant", False),
|
|
575
|
+
distribution=col.get("distribution"),
|
|
576
|
+
top_values=col.get("top_values"),
|
|
577
|
+
bottom_values=col.get("bottom_values"),
|
|
578
|
+
min_length=col.get("min_length"),
|
|
579
|
+
max_length=col.get("max_length"),
|
|
580
|
+
avg_length=col.get("avg_length"),
|
|
581
|
+
detected_patterns=col.get("detected_patterns"),
|
|
582
|
+
min_date=col.get("min_date"),
|
|
583
|
+
max_date=col.get("max_date"),
|
|
584
|
+
date_gaps=col.get("date_gaps", 0),
|
|
585
|
+
suggested_validators=col.get("suggested_validators"),
|
|
586
|
+
profile_duration_ms=col.get("profile_duration_ms", 0.0),
|
|
587
|
+
)
|
|
588
|
+
for col in data["columns"]
|
|
589
|
+
]
|
|
590
|
+
|
|
591
|
+
return ProfileResult(
|
|
592
|
+
name=data["name"],
|
|
593
|
+
source=data["source"],
|
|
594
|
+
row_count=data["row_count"],
|
|
595
|
+
column_count=data["column_count"],
|
|
596
|
+
estimated_memory_bytes=data["estimated_memory_bytes"],
|
|
597
|
+
columns=columns,
|
|
598
|
+
duplicate_row_count=data.get("duplicate_row_count", 0),
|
|
599
|
+
duplicate_row_ratio=data.get("duplicate_row_ratio", 0.0),
|
|
600
|
+
correlations=data.get("correlations"),
|
|
601
|
+
profiled_at=data.get("profiled_at"),
|
|
602
|
+
profile_duration_ms=data.get("profile_duration_ms", 0.0),
|
|
603
|
+
size_bytes=data.get("size_bytes", 0),
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
def _convert_compare_result(self, result: Any) -> CompareResult:
|
|
607
|
+
"""Convert truthound DriftReport to CompareResult."""
|
|
608
|
+
data = self._converter.convert_compare_result(result)
|
|
609
|
+
return CompareResult(
|
|
610
|
+
baseline_source=data["baseline_source"],
|
|
611
|
+
current_source=data["current_source"],
|
|
612
|
+
baseline_rows=data["baseline_rows"],
|
|
613
|
+
current_rows=data["current_rows"],
|
|
614
|
+
has_drift=data["has_drift"],
|
|
615
|
+
has_high_drift=data["has_high_drift"],
|
|
616
|
+
total_columns=data["total_columns"],
|
|
617
|
+
drifted_columns=data["drifted_columns"],
|
|
618
|
+
columns=data["columns"],
|
|
619
|
+
)
|
|
620
|
+
|
|
621
|
+
def _convert_scan_result(
|
|
622
|
+
self,
|
|
623
|
+
result: Any,
|
|
624
|
+
*,
|
|
625
|
+
min_confidence: float = 0.8,
|
|
626
|
+
columns: list[str] | None = None,
|
|
627
|
+
regulations: list[str] | None = None,
|
|
628
|
+
) -> ScanResult:
|
|
629
|
+
"""Convert truthound PIIReport to ScanResult with optional filtering.
|
|
630
|
+
|
|
631
|
+
Args:
|
|
632
|
+
result: truthound PIIReport object.
|
|
633
|
+
min_confidence: Filter findings by minimum confidence (0.0-1.0).
|
|
634
|
+
columns: Filter findings to specific columns only.
|
|
635
|
+
regulations: Filter findings by regulation types.
|
|
636
|
+
|
|
637
|
+
Returns:
|
|
638
|
+
ScanResult with filtered PII findings.
|
|
639
|
+
"""
|
|
640
|
+
data = self._converter.convert_scan_result(result)
|
|
641
|
+
|
|
642
|
+
# Filter findings based on parameters
|
|
643
|
+
findings = data["findings"]
|
|
644
|
+
if findings:
|
|
645
|
+
# Filter by min_confidence (confidence is 0-100 in findings)
|
|
646
|
+
findings = [
|
|
647
|
+
f for f in findings
|
|
648
|
+
if f.get("confidence", 100) >= min_confidence * 100
|
|
649
|
+
]
|
|
650
|
+
|
|
651
|
+
# Filter by columns
|
|
652
|
+
if columns:
|
|
653
|
+
findings = [
|
|
654
|
+
f for f in findings
|
|
655
|
+
if f.get("column") in columns
|
|
656
|
+
]
|
|
657
|
+
|
|
658
|
+
# Filter by regulations (if finding has regulation info)
|
|
659
|
+
if regulations:
|
|
660
|
+
findings = [
|
|
661
|
+
f for f in findings
|
|
662
|
+
if not f.get("regulation") or f.get("regulation") in regulations
|
|
663
|
+
]
|
|
664
|
+
|
|
665
|
+
# Recalculate summary stats after filtering
|
|
666
|
+
columns_with_pii = len({f.get("column") for f in findings if f.get("column")})
|
|
667
|
+
|
|
668
|
+
return ScanResult(
|
|
669
|
+
source=data["source"],
|
|
670
|
+
row_count=data["row_count"],
|
|
671
|
+
column_count=data["column_count"],
|
|
672
|
+
total_columns_scanned=data["total_columns_scanned"],
|
|
673
|
+
columns_with_pii=columns_with_pii,
|
|
674
|
+
total_findings=len(findings),
|
|
675
|
+
has_violations=data["has_violations"],
|
|
676
|
+
total_violations=data["total_violations"],
|
|
677
|
+
findings=findings,
|
|
678
|
+
violations=data["violations"],
|
|
679
|
+
)
|
|
680
|
+
|
|
681
|
+
def _convert_mask_result(
|
|
682
|
+
self,
|
|
683
|
+
source: DataInput,
|
|
684
|
+
output: str,
|
|
685
|
+
masked_df: Any,
|
|
686
|
+
strategy: str,
|
|
687
|
+
columns: list[str] | None,
|
|
688
|
+
) -> MaskResult:
|
|
689
|
+
"""Convert truthound mask result to MaskResult."""
|
|
690
|
+
data = self._converter.convert_mask_result(
|
|
691
|
+
source, output, masked_df, strategy, columns
|
|
692
|
+
)
|
|
693
|
+
return MaskResult(
|
|
694
|
+
source=data["source"],
|
|
695
|
+
output_path=data["output_path"],
|
|
696
|
+
row_count=data["row_count"],
|
|
697
|
+
column_count=data["column_count"],
|
|
698
|
+
columns_masked=data["columns_masked"],
|
|
699
|
+
strategy=data["strategy"],
|
|
700
|
+
original_columns=data["original_columns"],
|
|
701
|
+
)
|
|
702
|
+
|
|
703
|
+
def _convert_suite_result(
|
|
704
|
+
self,
|
|
705
|
+
suite: Any,
|
|
706
|
+
strictness: str,
|
|
707
|
+
output_format: str,
|
|
708
|
+
) -> GenerateSuiteResult:
|
|
709
|
+
"""Convert truthound ValidationSuite to GenerateSuiteResult."""
|
|
710
|
+
data = self._converter.convert_suite_result(suite, strictness, output_format)
|
|
711
|
+
return GenerateSuiteResult(
|
|
712
|
+
rules=data["rules"],
|
|
713
|
+
rule_count=data["rule_count"],
|
|
714
|
+
categories=data["categories"],
|
|
715
|
+
strictness=data["strictness"],
|
|
716
|
+
yaml_content=data["yaml_content"],
|
|
717
|
+
json_content=data["json_content"],
|
|
718
|
+
)
|