truthound-dashboard 1.4.4__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound_dashboard/api/alerts.py +75 -86
- truthound_dashboard/api/anomaly.py +7 -13
- truthound_dashboard/api/cross_alerts.py +38 -52
- truthound_dashboard/api/drift.py +49 -59
- truthound_dashboard/api/drift_monitor.py +234 -79
- truthound_dashboard/api/enterprise_sampling.py +498 -0
- truthound_dashboard/api/history.py +57 -5
- truthound_dashboard/api/lineage.py +3 -48
- truthound_dashboard/api/maintenance.py +104 -49
- truthound_dashboard/api/mask.py +1 -2
- truthound_dashboard/api/middleware.py +2 -1
- truthound_dashboard/api/model_monitoring.py +435 -311
- truthound_dashboard/api/notifications.py +227 -191
- truthound_dashboard/api/notifications_advanced.py +21 -20
- truthound_dashboard/api/observability.py +586 -0
- truthound_dashboard/api/plugins.py +2 -433
- truthound_dashboard/api/profile.py +199 -37
- truthound_dashboard/api/quality_reporter.py +701 -0
- truthound_dashboard/api/reports.py +7 -16
- truthound_dashboard/api/router.py +66 -0
- truthound_dashboard/api/rule_suggestions.py +5 -5
- truthound_dashboard/api/scan.py +17 -19
- truthound_dashboard/api/schedules.py +85 -50
- truthound_dashboard/api/schema_evolution.py +6 -6
- truthound_dashboard/api/schema_watcher.py +667 -0
- truthound_dashboard/api/sources.py +98 -27
- truthound_dashboard/api/tiering.py +1323 -0
- truthound_dashboard/api/triggers.py +14 -11
- truthound_dashboard/api/validations.py +12 -11
- truthound_dashboard/api/versioning.py +1 -6
- truthound_dashboard/core/__init__.py +129 -3
- truthound_dashboard/core/actions/__init__.py +62 -0
- truthound_dashboard/core/actions/custom.py +426 -0
- truthound_dashboard/core/actions/notifications.py +910 -0
- truthound_dashboard/core/actions/storage.py +472 -0
- truthound_dashboard/core/actions/webhook.py +281 -0
- truthound_dashboard/core/anomaly.py +262 -67
- truthound_dashboard/core/anomaly_explainer.py +4 -3
- truthound_dashboard/core/backends/__init__.py +67 -0
- truthound_dashboard/core/backends/base.py +299 -0
- truthound_dashboard/core/backends/errors.py +191 -0
- truthound_dashboard/core/backends/factory.py +423 -0
- truthound_dashboard/core/backends/mock_backend.py +451 -0
- truthound_dashboard/core/backends/truthound_backend.py +718 -0
- truthound_dashboard/core/checkpoint/__init__.py +87 -0
- truthound_dashboard/core/checkpoint/adapters.py +814 -0
- truthound_dashboard/core/checkpoint/checkpoint.py +491 -0
- truthound_dashboard/core/checkpoint/runner.py +270 -0
- truthound_dashboard/core/connections.py +437 -10
- truthound_dashboard/core/converters/__init__.py +14 -0
- truthound_dashboard/core/converters/truthound.py +620 -0
- truthound_dashboard/core/cross_alerts.py +540 -320
- truthound_dashboard/core/datasource_factory.py +1672 -0
- truthound_dashboard/core/drift_monitor.py +216 -20
- truthound_dashboard/core/enterprise_sampling.py +1291 -0
- truthound_dashboard/core/interfaces/__init__.py +225 -0
- truthound_dashboard/core/interfaces/actions.py +652 -0
- truthound_dashboard/core/interfaces/base.py +247 -0
- truthound_dashboard/core/interfaces/checkpoint.py +676 -0
- truthound_dashboard/core/interfaces/protocols.py +664 -0
- truthound_dashboard/core/interfaces/reporters.py +650 -0
- truthound_dashboard/core/interfaces/routing.py +646 -0
- truthound_dashboard/core/interfaces/triggers.py +619 -0
- truthound_dashboard/core/lineage.py +407 -71
- truthound_dashboard/core/model_monitoring.py +431 -3
- truthound_dashboard/core/notifications/base.py +4 -0
- truthound_dashboard/core/notifications/channels.py +501 -1203
- truthound_dashboard/core/notifications/deduplication/__init__.py +81 -115
- truthound_dashboard/core/notifications/deduplication/service.py +131 -348
- truthound_dashboard/core/notifications/dispatcher.py +202 -11
- truthound_dashboard/core/notifications/escalation/__init__.py +119 -106
- truthound_dashboard/core/notifications/escalation/engine.py +168 -358
- truthound_dashboard/core/notifications/routing/__init__.py +88 -128
- truthound_dashboard/core/notifications/routing/engine.py +90 -317
- truthound_dashboard/core/notifications/stats_aggregator.py +246 -1
- truthound_dashboard/core/notifications/throttling/__init__.py +67 -50
- truthound_dashboard/core/notifications/throttling/builder.py +117 -255
- truthound_dashboard/core/notifications/truthound_adapter.py +842 -0
- truthound_dashboard/core/phase5/collaboration.py +1 -1
- truthound_dashboard/core/plugins/lifecycle/__init__.py +0 -13
- truthound_dashboard/core/quality_reporter.py +1359 -0
- truthound_dashboard/core/report_history.py +0 -6
- truthound_dashboard/core/reporters/__init__.py +175 -14
- truthound_dashboard/core/reporters/adapters.py +943 -0
- truthound_dashboard/core/reporters/base.py +0 -3
- truthound_dashboard/core/reporters/builtin/__init__.py +18 -0
- truthound_dashboard/core/reporters/builtin/csv_reporter.py +111 -0
- truthound_dashboard/core/reporters/builtin/html_reporter.py +270 -0
- truthound_dashboard/core/reporters/builtin/json_reporter.py +127 -0
- truthound_dashboard/core/reporters/compat.py +266 -0
- truthound_dashboard/core/reporters/csv_reporter.py +2 -35
- truthound_dashboard/core/reporters/factory.py +526 -0
- truthound_dashboard/core/reporters/interfaces.py +745 -0
- truthound_dashboard/core/reporters/registry.py +1 -10
- truthound_dashboard/core/scheduler.py +165 -0
- truthound_dashboard/core/schema_evolution.py +3 -3
- truthound_dashboard/core/schema_watcher.py +1528 -0
- truthound_dashboard/core/services.py +595 -76
- truthound_dashboard/core/store_manager.py +810 -0
- truthound_dashboard/core/streaming_anomaly.py +169 -4
- truthound_dashboard/core/tiering.py +1309 -0
- truthound_dashboard/core/triggers/evaluators.py +178 -8
- truthound_dashboard/core/truthound_adapter.py +2620 -197
- truthound_dashboard/core/unified_alerts.py +23 -20
- truthound_dashboard/db/__init__.py +8 -0
- truthound_dashboard/db/database.py +8 -2
- truthound_dashboard/db/models.py +944 -25
- truthound_dashboard/db/repository.py +2 -0
- truthound_dashboard/main.py +11 -0
- truthound_dashboard/schemas/__init__.py +177 -16
- truthound_dashboard/schemas/base.py +44 -23
- truthound_dashboard/schemas/collaboration.py +19 -6
- truthound_dashboard/schemas/cross_alerts.py +19 -3
- truthound_dashboard/schemas/drift.py +61 -55
- truthound_dashboard/schemas/drift_monitor.py +67 -23
- truthound_dashboard/schemas/enterprise_sampling.py +653 -0
- truthound_dashboard/schemas/lineage.py +0 -33
- truthound_dashboard/schemas/mask.py +10 -8
- truthound_dashboard/schemas/model_monitoring.py +89 -10
- truthound_dashboard/schemas/notifications_advanced.py +13 -0
- truthound_dashboard/schemas/observability.py +453 -0
- truthound_dashboard/schemas/plugins.py +0 -280
- truthound_dashboard/schemas/profile.py +154 -247
- truthound_dashboard/schemas/quality_reporter.py +403 -0
- truthound_dashboard/schemas/reports.py +2 -2
- truthound_dashboard/schemas/rule_suggestion.py +8 -1
- truthound_dashboard/schemas/scan.py +4 -24
- truthound_dashboard/schemas/schedule.py +11 -3
- truthound_dashboard/schemas/schema_watcher.py +727 -0
- truthound_dashboard/schemas/source.py +17 -2
- truthound_dashboard/schemas/tiering.py +822 -0
- truthound_dashboard/schemas/triggers.py +16 -0
- truthound_dashboard/schemas/unified_alerts.py +7 -0
- truthound_dashboard/schemas/validation.py +0 -13
- truthound_dashboard/schemas/validators/base.py +41 -21
- truthound_dashboard/schemas/validators/business_rule_validators.py +244 -0
- truthound_dashboard/schemas/validators/localization_validators.py +273 -0
- truthound_dashboard/schemas/validators/ml_feature_validators.py +308 -0
- truthound_dashboard/schemas/validators/profiling_validators.py +275 -0
- truthound_dashboard/schemas/validators/referential_validators.py +312 -0
- truthound_dashboard/schemas/validators/registry.py +93 -8
- truthound_dashboard/schemas/validators/timeseries_validators.py +389 -0
- truthound_dashboard/schemas/versioning.py +1 -6
- truthound_dashboard/static/index.html +2 -2
- truthound_dashboard-1.5.0.dist-info/METADATA +309 -0
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.0.dist-info}/RECORD +149 -148
- truthound_dashboard/core/plugins/hooks/__init__.py +0 -63
- truthound_dashboard/core/plugins/hooks/decorators.py +0 -367
- truthound_dashboard/core/plugins/hooks/manager.py +0 -403
- truthound_dashboard/core/plugins/hooks/protocols.py +0 -265
- truthound_dashboard/core/plugins/lifecycle/hot_reload.py +0 -584
- truthound_dashboard/core/reporters/junit_reporter.py +0 -233
- truthound_dashboard/core/reporters/markdown_reporter.py +0 -207
- truthound_dashboard/core/reporters/pdf_reporter.py +0 -209
- truthound_dashboard/static/assets/_baseUniq-BcrSP13d.js +0 -1
- truthound_dashboard/static/assets/arc-DlYjKwIL.js +0 -1
- truthound_dashboard/static/assets/architectureDiagram-VXUJARFQ-Bb2drbQM.js +0 -36
- truthound_dashboard/static/assets/blockDiagram-VD42YOAC-BlsPG1CH.js +0 -122
- truthound_dashboard/static/assets/c4Diagram-YG6GDRKO-B9JdUoaC.js +0 -10
- truthound_dashboard/static/assets/channel-Q6mHF1Hd.js +0 -1
- truthound_dashboard/static/assets/chunk-4BX2VUAB-DmyoPVuJ.js +0 -1
- truthound_dashboard/static/assets/chunk-55IACEB6-Bcz6Siv8.js +0 -1
- truthound_dashboard/static/assets/chunk-B4BG7PRW-Br3G5Rum.js +0 -165
- truthound_dashboard/static/assets/chunk-DI55MBZ5-DuM9c23u.js +0 -220
- truthound_dashboard/static/assets/chunk-FMBD7UC4-DNU-5mvT.js +0 -15
- truthound_dashboard/static/assets/chunk-QN33PNHL-Im2yNcmS.js +0 -1
- truthound_dashboard/static/assets/chunk-QZHKN3VN-kZr8XFm1.js +0 -1
- truthound_dashboard/static/assets/chunk-TZMSLE5B-Q__360q_.js +0 -1
- truthound_dashboard/static/assets/classDiagram-2ON5EDUG-vtixxUyK.js +0 -1
- truthound_dashboard/static/assets/classDiagram-v2-WZHVMYZB-vtixxUyK.js +0 -1
- truthound_dashboard/static/assets/clone-BOt2LwD0.js +0 -1
- truthound_dashboard/static/assets/cose-bilkent-S5V4N54A-CBDw6iac.js +0 -1
- truthound_dashboard/static/assets/dagre-6UL2VRFP-XdKqmmY9.js +0 -4
- truthound_dashboard/static/assets/diagram-PSM6KHXK-DAZ8nx9V.js +0 -24
- truthound_dashboard/static/assets/diagram-QEK2KX5R-BRvDTbGD.js +0 -43
- truthound_dashboard/static/assets/diagram-S2PKOQOG-bQcczUkl.js +0 -24
- truthound_dashboard/static/assets/erDiagram-Q2GNP2WA-DPje7VMN.js +0 -60
- truthound_dashboard/static/assets/flowDiagram-NV44I4VS-B7BVtFVS.js +0 -162
- truthound_dashboard/static/assets/ganttDiagram-JELNMOA3-D6WKSS7U.js +0 -267
- truthound_dashboard/static/assets/gitGraphDiagram-NY62KEGX-D3vtVd3y.js +0 -65
- truthound_dashboard/static/assets/graph-BKgNKZVp.js +0 -1
- truthound_dashboard/static/assets/index-C6JSrkHo.css +0 -1
- truthound_dashboard/static/assets/index-DkU82VsU.js +0 -1800
- truthound_dashboard/static/assets/infoDiagram-WHAUD3N6-DnNCT429.js +0 -2
- truthound_dashboard/static/assets/journeyDiagram-XKPGCS4Q-DGiMozqS.js +0 -139
- truthound_dashboard/static/assets/kanban-definition-3W4ZIXB7-BV2gUgli.js +0 -89
- truthound_dashboard/static/assets/katex-Cu_Erd72.js +0 -261
- truthound_dashboard/static/assets/layout-DI2MfQ5G.js +0 -1
- truthound_dashboard/static/assets/min-DYdgXVcT.js +0 -1
- truthound_dashboard/static/assets/mindmap-definition-VGOIOE7T-C7x4ruxz.js +0 -68
- truthound_dashboard/static/assets/pieDiagram-ADFJNKIX-CAJaAB9f.js +0 -30
- truthound_dashboard/static/assets/quadrantDiagram-AYHSOK5B-DeqwDI46.js +0 -7
- truthound_dashboard/static/assets/requirementDiagram-UZGBJVZJ-e3XDpZIM.js +0 -64
- truthound_dashboard/static/assets/sankeyDiagram-TZEHDZUN-CNnAv5Ux.js +0 -10
- truthound_dashboard/static/assets/sequenceDiagram-WL72ISMW-Dsne-Of3.js +0 -145
- truthound_dashboard/static/assets/stateDiagram-FKZM4ZOC-Ee0sQXyb.js +0 -1
- truthound_dashboard/static/assets/stateDiagram-v2-4FDKWEC3-B26KqW_W.js +0 -1
- truthound_dashboard/static/assets/timeline-definition-IT6M3QCI-DZYi2yl3.js +0 -61
- truthound_dashboard/static/assets/treemap-KMMF4GRG-CY3f8In2.js +0 -128
- truthound_dashboard/static/assets/unmerged_dictionaries-Dd7xcPWG.js +0 -1
- truthound_dashboard/static/assets/xychartDiagram-PRI3JC2R-CS7fydZZ.js +0 -7
- truthound_dashboard-1.4.4.dist-info/METADATA +0 -507
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.0.dist-info}/WHEEL +0 -0
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.0.dist-info}/entry_points.txt +0 -0
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,620 @@
|
|
|
1
|
+
"""Truthound result converters.
|
|
2
|
+
|
|
3
|
+
This module isolates all truthound-specific result object conversions.
|
|
4
|
+
It handles converting truthound's Report, Schema, TableProfile, etc.
|
|
5
|
+
into dashboard-standard result dataclasses.
|
|
6
|
+
|
|
7
|
+
By isolating conversions here, we can:
|
|
8
|
+
- Handle truthound API changes in one place
|
|
9
|
+
- Support multiple truthound versions
|
|
10
|
+
- Provide graceful fallbacks for missing attributes
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import logging
|
|
16
|
+
from datetime import datetime
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import Any
|
|
19
|
+
|
|
20
|
+
import yaml
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class TruthoundResultConverter:
|
|
26
|
+
"""Converter for truthound result objects.
|
|
27
|
+
|
|
28
|
+
This class provides static methods to convert truthound-specific
|
|
29
|
+
objects into dashboard result dataclasses.
|
|
30
|
+
|
|
31
|
+
All conversions use defensive attribute access (getattr with defaults)
|
|
32
|
+
to handle different truthound versions gracefully.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
@staticmethod
|
|
36
|
+
def convert_severity(severity: Any) -> str:
|
|
37
|
+
"""Safely convert severity enum or value to lowercase string.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
severity: Severity value (enum with .value or string).
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
Lowercase severity string.
|
|
44
|
+
"""
|
|
45
|
+
if hasattr(severity, "value"):
|
|
46
|
+
return str(severity.value).lower()
|
|
47
|
+
return str(severity).lower()
|
|
48
|
+
|
|
49
|
+
@staticmethod
|
|
50
|
+
def convert_check_result(result: Any) -> dict[str, Any]:
|
|
51
|
+
"""Convert truthound Report to CheckResult dict.
|
|
52
|
+
|
|
53
|
+
The truthound Report contains:
|
|
54
|
+
- issues: list[ValidationIssue]
|
|
55
|
+
- source: str
|
|
56
|
+
- row_count: int
|
|
57
|
+
- column_count: int
|
|
58
|
+
- has_issues: bool
|
|
59
|
+
- has_critical: bool
|
|
60
|
+
- has_high: bool
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
result: Truthound Report object.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
Dictionary with CheckResult fields.
|
|
67
|
+
"""
|
|
68
|
+
issues = getattr(result, "issues", [])
|
|
69
|
+
severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0}
|
|
70
|
+
|
|
71
|
+
converted_issues = []
|
|
72
|
+
for issue in issues:
|
|
73
|
+
severity = TruthoundResultConverter.convert_severity(issue.severity)
|
|
74
|
+
if severity in severity_counts:
|
|
75
|
+
severity_counts[severity] += 1
|
|
76
|
+
|
|
77
|
+
converted_issues.append({
|
|
78
|
+
"column": getattr(issue, "column", ""),
|
|
79
|
+
"issue_type": getattr(issue, "issue_type", "unknown"),
|
|
80
|
+
"count": getattr(issue, "count", 0),
|
|
81
|
+
"severity": severity,
|
|
82
|
+
"details": getattr(issue, "details", None),
|
|
83
|
+
"expected": getattr(issue, "expected", None),
|
|
84
|
+
"actual": getattr(issue, "actual", None),
|
|
85
|
+
"sample_values": getattr(issue, "sample_values", None),
|
|
86
|
+
})
|
|
87
|
+
|
|
88
|
+
return {
|
|
89
|
+
"passed": not getattr(result, "has_issues", len(issues) > 0),
|
|
90
|
+
"has_critical": getattr(result, "has_critical", severity_counts["critical"] > 0),
|
|
91
|
+
"has_high": getattr(result, "has_high", severity_counts["high"] > 0),
|
|
92
|
+
"total_issues": len(issues),
|
|
93
|
+
"critical_issues": severity_counts["critical"],
|
|
94
|
+
"high_issues": severity_counts["high"],
|
|
95
|
+
"medium_issues": severity_counts["medium"],
|
|
96
|
+
"low_issues": severity_counts["low"],
|
|
97
|
+
"source": getattr(result, "source", ""),
|
|
98
|
+
"row_count": getattr(result, "row_count", 0),
|
|
99
|
+
"column_count": getattr(result, "column_count", 0),
|
|
100
|
+
"issues": converted_issues,
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
@staticmethod
|
|
104
|
+
def convert_learn_result(result: Any) -> dict[str, Any]:
|
|
105
|
+
"""Convert truthound Schema to LearnResult dict.
|
|
106
|
+
|
|
107
|
+
The truthound Schema contains:
|
|
108
|
+
- columns: dict[str, ColumnSchema]
|
|
109
|
+
- row_count: int | None
|
|
110
|
+
- version: str
|
|
111
|
+
- to_dict(): Convert to dictionary
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
result: Truthound Schema object.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
Dictionary with LearnResult fields.
|
|
118
|
+
"""
|
|
119
|
+
schema_dict = result.to_dict() if hasattr(result, "to_dict") else {}
|
|
120
|
+
schema_yaml = yaml.dump(
|
|
121
|
+
schema_dict,
|
|
122
|
+
default_flow_style=False,
|
|
123
|
+
sort_keys=False,
|
|
124
|
+
allow_unicode=True,
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
columns = getattr(result, "columns", {})
|
|
128
|
+
column_list = list(columns.keys()) if isinstance(columns, dict) else []
|
|
129
|
+
|
|
130
|
+
return {
|
|
131
|
+
"schema": schema_dict,
|
|
132
|
+
"schema_yaml": schema_yaml,
|
|
133
|
+
"row_count": getattr(result, "row_count", None),
|
|
134
|
+
"column_count": len(column_list),
|
|
135
|
+
"columns": column_list,
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
@staticmethod
|
|
139
|
+
def convert_profile_result(result: Any) -> dict[str, Any]:
|
|
140
|
+
"""Convert truthound TableProfile or ProfileReport to ProfileResult dict.
|
|
141
|
+
|
|
142
|
+
Supports both new TableProfile and legacy ProfileReport formats.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
result: Truthound profile result object.
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
Dictionary with ProfileResult fields.
|
|
149
|
+
"""
|
|
150
|
+
# Check if this is the new TableProfile or legacy ProfileReport
|
|
151
|
+
if hasattr(result, "estimated_memory_bytes"):
|
|
152
|
+
return TruthoundResultConverter._convert_table_profile(result)
|
|
153
|
+
else:
|
|
154
|
+
return TruthoundResultConverter._convert_legacy_profile(result)
|
|
155
|
+
|
|
156
|
+
@staticmethod
|
|
157
|
+
def _convert_table_profile(result: Any) -> dict[str, Any]:
|
|
158
|
+
"""Convert new truthound TableProfile to ProfileResult dict."""
|
|
159
|
+
columns = []
|
|
160
|
+
for col in getattr(result, "columns", []):
|
|
161
|
+
col_data = TruthoundResultConverter._convert_column_profile(col)
|
|
162
|
+
columns.append(col_data)
|
|
163
|
+
|
|
164
|
+
# Convert correlations
|
|
165
|
+
correlations = None
|
|
166
|
+
raw_correlations = getattr(result, "correlations", None)
|
|
167
|
+
if raw_correlations:
|
|
168
|
+
correlations = [(c[0], c[1], c[2]) for c in raw_correlations]
|
|
169
|
+
|
|
170
|
+
# Get profiled_at as ISO string
|
|
171
|
+
profiled_at = None
|
|
172
|
+
raw_profiled_at = getattr(result, "profiled_at", None)
|
|
173
|
+
if raw_profiled_at:
|
|
174
|
+
profiled_at = (
|
|
175
|
+
raw_profiled_at.isoformat()
|
|
176
|
+
if isinstance(raw_profiled_at, datetime)
|
|
177
|
+
else str(raw_profiled_at)
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
estimated_memory = getattr(result, "estimated_memory_bytes", 0)
|
|
181
|
+
|
|
182
|
+
return {
|
|
183
|
+
"name": getattr(result, "name", ""),
|
|
184
|
+
"source": getattr(result, "source", ""),
|
|
185
|
+
"row_count": getattr(result, "row_count", 0),
|
|
186
|
+
"column_count": getattr(result, "column_count", 0),
|
|
187
|
+
"estimated_memory_bytes": estimated_memory,
|
|
188
|
+
"columns": columns,
|
|
189
|
+
"duplicate_row_count": getattr(result, "duplicate_row_count", 0),
|
|
190
|
+
"duplicate_row_ratio": getattr(result, "duplicate_row_ratio", 0.0),
|
|
191
|
+
"correlations": correlations,
|
|
192
|
+
"profiled_at": profiled_at,
|
|
193
|
+
"profile_duration_ms": getattr(result, "profile_duration_ms", 0.0),
|
|
194
|
+
"size_bytes": estimated_memory,
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
@staticmethod
|
|
198
|
+
def _convert_column_profile(col: Any) -> dict[str, Any]:
|
|
199
|
+
"""Convert a single column profile."""
|
|
200
|
+
# Extract distribution stats if present
|
|
201
|
+
distribution = None
|
|
202
|
+
raw_distribution = getattr(col, "distribution", None)
|
|
203
|
+
if raw_distribution:
|
|
204
|
+
distribution = {
|
|
205
|
+
"mean": getattr(raw_distribution, "mean", None),
|
|
206
|
+
"std": getattr(raw_distribution, "std", None),
|
|
207
|
+
"min": getattr(raw_distribution, "min", None),
|
|
208
|
+
"max": getattr(raw_distribution, "max", None),
|
|
209
|
+
"median": getattr(raw_distribution, "median", None),
|
|
210
|
+
"q1": getattr(raw_distribution, "q1", None),
|
|
211
|
+
"q3": getattr(raw_distribution, "q3", None),
|
|
212
|
+
"skewness": getattr(raw_distribution, "skewness", None),
|
|
213
|
+
"kurtosis": getattr(raw_distribution, "kurtosis", None),
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
# Convert top_values
|
|
217
|
+
top_values = None
|
|
218
|
+
raw_top_values = getattr(col, "top_values", None)
|
|
219
|
+
if raw_top_values:
|
|
220
|
+
top_values = [
|
|
221
|
+
{
|
|
222
|
+
"value": str(v.value) if getattr(v, "value", None) is not None else None,
|
|
223
|
+
"count": getattr(v, "count", 0),
|
|
224
|
+
"ratio": getattr(v, "ratio", 0.0),
|
|
225
|
+
}
|
|
226
|
+
for v in raw_top_values
|
|
227
|
+
]
|
|
228
|
+
|
|
229
|
+
# Convert bottom_values
|
|
230
|
+
bottom_values = None
|
|
231
|
+
raw_bottom_values = getattr(col, "bottom_values", None)
|
|
232
|
+
if raw_bottom_values:
|
|
233
|
+
bottom_values = [
|
|
234
|
+
{
|
|
235
|
+
"value": str(v.value) if getattr(v, "value", None) is not None else None,
|
|
236
|
+
"count": getattr(v, "count", 0),
|
|
237
|
+
"ratio": getattr(v, "ratio", 0.0),
|
|
238
|
+
}
|
|
239
|
+
for v in raw_bottom_values
|
|
240
|
+
]
|
|
241
|
+
|
|
242
|
+
# Convert detected_patterns
|
|
243
|
+
detected_patterns = None
|
|
244
|
+
raw_patterns = getattr(col, "detected_patterns", None)
|
|
245
|
+
if raw_patterns:
|
|
246
|
+
detected_patterns = [
|
|
247
|
+
{
|
|
248
|
+
"pattern": getattr(p, "pattern", None),
|
|
249
|
+
"regex": getattr(p, "regex", None),
|
|
250
|
+
"match_ratio": getattr(p, "match_ratio", 0.0),
|
|
251
|
+
"sample_matches": list(getattr(p, "sample_matches", [])),
|
|
252
|
+
}
|
|
253
|
+
for p in raw_patterns
|
|
254
|
+
]
|
|
255
|
+
|
|
256
|
+
# Get inferred type value
|
|
257
|
+
inferred_type = "unknown"
|
|
258
|
+
raw_inferred_type = getattr(col, "inferred_type", None)
|
|
259
|
+
if raw_inferred_type:
|
|
260
|
+
inferred_type = (
|
|
261
|
+
raw_inferred_type.value
|
|
262
|
+
if hasattr(raw_inferred_type, "value")
|
|
263
|
+
else str(raw_inferred_type)
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
# Convert datetime fields
|
|
267
|
+
min_date = None
|
|
268
|
+
max_date = None
|
|
269
|
+
raw_min_date = getattr(col, "min_date", None)
|
|
270
|
+
raw_max_date = getattr(col, "max_date", None)
|
|
271
|
+
if raw_min_date:
|
|
272
|
+
min_date = (
|
|
273
|
+
raw_min_date.isoformat()
|
|
274
|
+
if isinstance(raw_min_date, datetime)
|
|
275
|
+
else str(raw_min_date)
|
|
276
|
+
)
|
|
277
|
+
if raw_max_date:
|
|
278
|
+
max_date = (
|
|
279
|
+
raw_max_date.isoformat()
|
|
280
|
+
if isinstance(raw_max_date, datetime)
|
|
281
|
+
else str(raw_max_date)
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
# Get suggested validators
|
|
285
|
+
suggested_validators = None
|
|
286
|
+
raw_validators = getattr(col, "suggested_validators", None)
|
|
287
|
+
if raw_validators:
|
|
288
|
+
suggested_validators = list(raw_validators)
|
|
289
|
+
|
|
290
|
+
return {
|
|
291
|
+
"name": getattr(col, "name", ""),
|
|
292
|
+
"physical_type": getattr(col, "physical_type", "unknown"),
|
|
293
|
+
"inferred_type": inferred_type,
|
|
294
|
+
"row_count": getattr(col, "row_count", 0),
|
|
295
|
+
"null_count": getattr(col, "null_count", 0),
|
|
296
|
+
"null_ratio": getattr(col, "null_ratio", 0.0),
|
|
297
|
+
"empty_string_count": getattr(col, "empty_string_count", 0),
|
|
298
|
+
"distinct_count": getattr(col, "distinct_count", 0),
|
|
299
|
+
"unique_ratio": getattr(col, "unique_ratio", 0.0),
|
|
300
|
+
"is_unique": getattr(col, "is_unique", False),
|
|
301
|
+
"is_constant": getattr(col, "is_constant", False),
|
|
302
|
+
"distribution": distribution,
|
|
303
|
+
"top_values": top_values,
|
|
304
|
+
"bottom_values": bottom_values,
|
|
305
|
+
"min_length": getattr(col, "min_length", None),
|
|
306
|
+
"max_length": getattr(col, "max_length", None),
|
|
307
|
+
"avg_length": getattr(col, "avg_length", None),
|
|
308
|
+
"detected_patterns": detected_patterns,
|
|
309
|
+
"min_date": min_date,
|
|
310
|
+
"max_date": max_date,
|
|
311
|
+
"date_gaps": getattr(col, "date_gaps", 0),
|
|
312
|
+
"suggested_validators": suggested_validators,
|
|
313
|
+
"profile_duration_ms": getattr(col, "profile_duration_ms", 0.0),
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
@staticmethod
|
|
317
|
+
def _convert_legacy_profile(result: Any) -> dict[str, Any]:
|
|
318
|
+
"""Convert legacy truthound ProfileReport to ProfileResult dict."""
|
|
319
|
+
row_count = getattr(result, "row_count", 0)
|
|
320
|
+
columns = []
|
|
321
|
+
|
|
322
|
+
for col in getattr(result, "columns", []):
|
|
323
|
+
if isinstance(col, dict):
|
|
324
|
+
col_data = TruthoundResultConverter._convert_legacy_column(col, row_count)
|
|
325
|
+
else:
|
|
326
|
+
col_data = TruthoundResultConverter._convert_column_profile(col)
|
|
327
|
+
columns.append(col_data)
|
|
328
|
+
|
|
329
|
+
size_bytes = getattr(result, "size_bytes", 0)
|
|
330
|
+
|
|
331
|
+
return {
|
|
332
|
+
"name": getattr(result, "source", ""),
|
|
333
|
+
"source": getattr(result, "source", ""),
|
|
334
|
+
"row_count": row_count,
|
|
335
|
+
"column_count": getattr(result, "column_count", len(columns)),
|
|
336
|
+
"estimated_memory_bytes": size_bytes,
|
|
337
|
+
"columns": columns,
|
|
338
|
+
"duplicate_row_count": 0,
|
|
339
|
+
"duplicate_row_ratio": 0.0,
|
|
340
|
+
"correlations": None,
|
|
341
|
+
"profiled_at": None,
|
|
342
|
+
"profile_duration_ms": 0.0,
|
|
343
|
+
"size_bytes": size_bytes,
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
@staticmethod
|
|
347
|
+
def _convert_legacy_column(col: dict, row_count: int) -> dict[str, Any]:
|
|
348
|
+
"""Convert legacy column dict to column profile dict."""
|
|
349
|
+
# Parse null_pct and unique_pct
|
|
350
|
+
null_ratio = 0.0
|
|
351
|
+
unique_ratio = 0.0
|
|
352
|
+
|
|
353
|
+
null_pct = col.get("null_pct")
|
|
354
|
+
if isinstance(null_pct, str):
|
|
355
|
+
null_ratio = float(null_pct.rstrip("%")) / 100.0
|
|
356
|
+
elif isinstance(null_pct, (int, float)):
|
|
357
|
+
null_ratio = float(null_pct)
|
|
358
|
+
|
|
359
|
+
unique_pct = col.get("unique_pct")
|
|
360
|
+
if isinstance(unique_pct, str):
|
|
361
|
+
unique_ratio = float(unique_pct.rstrip("%")) / 100.0
|
|
362
|
+
elif isinstance(unique_pct, (int, float)):
|
|
363
|
+
unique_ratio = float(unique_pct)
|
|
364
|
+
|
|
365
|
+
# Build distribution if numeric stats present
|
|
366
|
+
distribution = None
|
|
367
|
+
if col.get("min") is not None or col.get("mean") is not None:
|
|
368
|
+
distribution = {
|
|
369
|
+
"min": col.get("min"),
|
|
370
|
+
"max": col.get("max"),
|
|
371
|
+
"mean": col.get("mean"),
|
|
372
|
+
"std": col.get("std"),
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
return {
|
|
376
|
+
"name": col.get("name", ""),
|
|
377
|
+
"physical_type": col.get("dtype", "unknown"),
|
|
378
|
+
"inferred_type": col.get("dtype", "unknown"),
|
|
379
|
+
"row_count": row_count,
|
|
380
|
+
"null_count": 0,
|
|
381
|
+
"null_ratio": null_ratio,
|
|
382
|
+
"empty_string_count": 0,
|
|
383
|
+
"distinct_count": 0,
|
|
384
|
+
"unique_ratio": unique_ratio,
|
|
385
|
+
"is_unique": False,
|
|
386
|
+
"is_constant": False,
|
|
387
|
+
"distribution": distribution,
|
|
388
|
+
"top_values": None,
|
|
389
|
+
"bottom_values": None,
|
|
390
|
+
"min_length": None,
|
|
391
|
+
"max_length": None,
|
|
392
|
+
"avg_length": None,
|
|
393
|
+
"detected_patterns": None,
|
|
394
|
+
"min_date": None,
|
|
395
|
+
"max_date": None,
|
|
396
|
+
"date_gaps": 0,
|
|
397
|
+
"suggested_validators": None,
|
|
398
|
+
"profile_duration_ms": 0.0,
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
@staticmethod
|
|
402
|
+
def convert_compare_result(result: Any) -> dict[str, Any]:
|
|
403
|
+
"""Convert truthound DriftReport to CompareResult dict.
|
|
404
|
+
|
|
405
|
+
The truthound DriftReport contains:
|
|
406
|
+
- baseline_source: str
|
|
407
|
+
- current_source: str
|
|
408
|
+
- baseline_rows: int
|
|
409
|
+
- current_rows: int
|
|
410
|
+
- columns: list[ColumnDrift]
|
|
411
|
+
- has_drift: bool
|
|
412
|
+
- has_high_drift: bool
|
|
413
|
+
- get_drifted_columns(): list[str]
|
|
414
|
+
|
|
415
|
+
Args:
|
|
416
|
+
result: Truthound DriftReport object.
|
|
417
|
+
|
|
418
|
+
Returns:
|
|
419
|
+
Dictionary with CompareResult fields.
|
|
420
|
+
"""
|
|
421
|
+
columns = []
|
|
422
|
+
for col in getattr(result, "columns", []):
|
|
423
|
+
col_result = getattr(col, "result", None)
|
|
424
|
+
if col_result:
|
|
425
|
+
level = getattr(col_result, "level", "none")
|
|
426
|
+
level_str = (
|
|
427
|
+
level.value if hasattr(level, "value") else str(level)
|
|
428
|
+
)
|
|
429
|
+
columns.append({
|
|
430
|
+
"column": getattr(col, "column", ""),
|
|
431
|
+
"dtype": getattr(col, "dtype", "unknown"),
|
|
432
|
+
"drifted": getattr(col_result, "drifted", False),
|
|
433
|
+
"level": level_str,
|
|
434
|
+
"method": getattr(col_result, "method", "unknown"),
|
|
435
|
+
"statistic": getattr(col_result, "statistic", 0.0),
|
|
436
|
+
"p_value": getattr(col_result, "p_value", 1.0),
|
|
437
|
+
"baseline_stats": getattr(col, "baseline_stats", {}),
|
|
438
|
+
"current_stats": getattr(col, "current_stats", {}),
|
|
439
|
+
})
|
|
440
|
+
|
|
441
|
+
# Get drifted columns
|
|
442
|
+
drifted_columns = []
|
|
443
|
+
if hasattr(result, "get_drifted_columns"):
|
|
444
|
+
drifted_columns = result.get_drifted_columns()
|
|
445
|
+
else:
|
|
446
|
+
drifted_columns = [c["column"] for c in columns if c.get("drifted")]
|
|
447
|
+
|
|
448
|
+
return {
|
|
449
|
+
"baseline_source": getattr(result, "baseline_source", ""),
|
|
450
|
+
"current_source": getattr(result, "current_source", ""),
|
|
451
|
+
"baseline_rows": getattr(result, "baseline_rows", 0),
|
|
452
|
+
"current_rows": getattr(result, "current_rows", 0),
|
|
453
|
+
"has_drift": getattr(result, "has_drift", False),
|
|
454
|
+
"has_high_drift": getattr(result, "has_high_drift", False),
|
|
455
|
+
"total_columns": len(columns),
|
|
456
|
+
"drifted_columns": drifted_columns,
|
|
457
|
+
"columns": columns,
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
@staticmethod
|
|
461
|
+
def convert_scan_result(result: Any) -> dict[str, Any]:
|
|
462
|
+
"""Convert truthound PIIReport to ScanResult dict.
|
|
463
|
+
|
|
464
|
+
Args:
|
|
465
|
+
result: Truthound PIIReport object.
|
|
466
|
+
|
|
467
|
+
Returns:
|
|
468
|
+
Dictionary with ScanResult fields.
|
|
469
|
+
"""
|
|
470
|
+
# Convert findings
|
|
471
|
+
findings = []
|
|
472
|
+
columns_with_pii = set()
|
|
473
|
+
for finding in getattr(result, "findings", []):
|
|
474
|
+
col = getattr(finding, "column", "")
|
|
475
|
+
columns_with_pii.add(col)
|
|
476
|
+
findings.append({
|
|
477
|
+
"column": col,
|
|
478
|
+
"pii_type": getattr(finding, "pii_type", "unknown"),
|
|
479
|
+
"confidence": getattr(finding, "confidence", 0.0),
|
|
480
|
+
"sample_count": getattr(finding, "sample_count", 0),
|
|
481
|
+
"sample_values": getattr(finding, "sample_values", None),
|
|
482
|
+
})
|
|
483
|
+
|
|
484
|
+
# Convert violations
|
|
485
|
+
violations = []
|
|
486
|
+
for violation in getattr(result, "violations", []):
|
|
487
|
+
violations.append({
|
|
488
|
+
"regulation": getattr(violation, "regulation", "unknown"),
|
|
489
|
+
"column": getattr(violation, "column", ""),
|
|
490
|
+
"pii_type": getattr(violation, "pii_type", "unknown"),
|
|
491
|
+
"message": getattr(violation, "message", ""),
|
|
492
|
+
"severity": getattr(violation, "severity", "high"),
|
|
493
|
+
})
|
|
494
|
+
|
|
495
|
+
return {
|
|
496
|
+
"source": getattr(result, "source", ""),
|
|
497
|
+
"row_count": getattr(result, "row_count", 0),
|
|
498
|
+
"column_count": getattr(result, "column_count", 0),
|
|
499
|
+
"total_columns_scanned": getattr(result, "column_count", 0),
|
|
500
|
+
"columns_with_pii": len(columns_with_pii),
|
|
501
|
+
"total_findings": len(findings),
|
|
502
|
+
"has_violations": getattr(result, "has_violations", len(violations) > 0),
|
|
503
|
+
"total_violations": len(violations),
|
|
504
|
+
"findings": findings,
|
|
505
|
+
"violations": violations,
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
@staticmethod
|
|
509
|
+
def convert_mask_result(
|
|
510
|
+
source: Any,
|
|
511
|
+
output: str,
|
|
512
|
+
masked_df: Any,
|
|
513
|
+
strategy: str,
|
|
514
|
+
columns: list[str] | None,
|
|
515
|
+
) -> dict[str, Any]:
|
|
516
|
+
"""Convert truthound mask result to MaskResult dict.
|
|
517
|
+
|
|
518
|
+
Args:
|
|
519
|
+
source: Original data source.
|
|
520
|
+
output: Output file path.
|
|
521
|
+
masked_df: Polars DataFrame with masked data.
|
|
522
|
+
strategy: Masking strategy used.
|
|
523
|
+
columns: Columns that were masked.
|
|
524
|
+
|
|
525
|
+
Returns:
|
|
526
|
+
Dictionary with MaskResult fields.
|
|
527
|
+
"""
|
|
528
|
+
# Get column information from the DataFrame
|
|
529
|
+
all_columns = list(masked_df.columns) if hasattr(masked_df, "columns") else []
|
|
530
|
+
row_count = len(masked_df) if hasattr(masked_df, "__len__") else 0
|
|
531
|
+
|
|
532
|
+
# Get source name
|
|
533
|
+
if isinstance(source, str):
|
|
534
|
+
source_name = source
|
|
535
|
+
else:
|
|
536
|
+
source_name = getattr(source, "name", str(type(source).__name__))
|
|
537
|
+
|
|
538
|
+
# Write the masked data to output file
|
|
539
|
+
output_path = Path(output)
|
|
540
|
+
suffix = output_path.suffix.lower()
|
|
541
|
+
|
|
542
|
+
if hasattr(masked_df, "write_csv"):
|
|
543
|
+
if suffix == ".csv":
|
|
544
|
+
masked_df.write_csv(output)
|
|
545
|
+
elif suffix == ".parquet" and hasattr(masked_df, "write_parquet"):
|
|
546
|
+
masked_df.write_parquet(output)
|
|
547
|
+
elif suffix == ".json" and hasattr(masked_df, "write_json"):
|
|
548
|
+
masked_df.write_json(output)
|
|
549
|
+
else:
|
|
550
|
+
# Default to CSV
|
|
551
|
+
masked_df.write_csv(output)
|
|
552
|
+
|
|
553
|
+
return {
|
|
554
|
+
"source": source_name,
|
|
555
|
+
"output_path": str(output_path.absolute()),
|
|
556
|
+
"row_count": row_count,
|
|
557
|
+
"column_count": len(all_columns),
|
|
558
|
+
"columns_masked": columns if columns else [],
|
|
559
|
+
"strategy": strategy,
|
|
560
|
+
"original_columns": all_columns,
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
@staticmethod
|
|
564
|
+
def convert_suite_result(
|
|
565
|
+
suite: Any,
|
|
566
|
+
strictness: str,
|
|
567
|
+
output_format: str = "yaml",
|
|
568
|
+
) -> dict[str, Any]:
|
|
569
|
+
"""Convert truthound ValidationSuite to GenerateSuiteResult dict.
|
|
570
|
+
|
|
571
|
+
Args:
|
|
572
|
+
suite: ValidationSuite from generate_suite().
|
|
573
|
+
strictness: Strictness level used.
|
|
574
|
+
output_format: Requested output format.
|
|
575
|
+
|
|
576
|
+
Returns:
|
|
577
|
+
Dictionary with GenerateSuiteResult fields.
|
|
578
|
+
"""
|
|
579
|
+
rules = []
|
|
580
|
+
categories = set()
|
|
581
|
+
|
|
582
|
+
if hasattr(suite, "rules"):
|
|
583
|
+
for rule in suite.rules:
|
|
584
|
+
rule_dict = {
|
|
585
|
+
"name": getattr(rule, "name", ""),
|
|
586
|
+
"validator": getattr(rule, "validator", ""),
|
|
587
|
+
"column": getattr(rule, "column", None),
|
|
588
|
+
"params": getattr(rule, "params", {}),
|
|
589
|
+
"severity": getattr(rule, "severity", "medium"),
|
|
590
|
+
"category": getattr(rule, "category", "unknown"),
|
|
591
|
+
}
|
|
592
|
+
rules.append(rule_dict)
|
|
593
|
+
if rule_dict["category"]:
|
|
594
|
+
categories.add(rule_dict["category"])
|
|
595
|
+
|
|
596
|
+
# Generate YAML content
|
|
597
|
+
yaml_content = ""
|
|
598
|
+
if hasattr(suite, "to_yaml"):
|
|
599
|
+
yaml_content = suite.to_yaml()
|
|
600
|
+
else:
|
|
601
|
+
yaml_content = yaml.dump(
|
|
602
|
+
{"rules": rules},
|
|
603
|
+
default_flow_style=False,
|
|
604
|
+
sort_keys=False,
|
|
605
|
+
allow_unicode=True,
|
|
606
|
+
)
|
|
607
|
+
|
|
608
|
+
# Generate JSON content
|
|
609
|
+
json_content = {"rules": rules}
|
|
610
|
+
if hasattr(suite, "to_dict"):
|
|
611
|
+
json_content = suite.to_dict()
|
|
612
|
+
|
|
613
|
+
return {
|
|
614
|
+
"rules": rules,
|
|
615
|
+
"rule_count": len(rules),
|
|
616
|
+
"categories": sorted(categories),
|
|
617
|
+
"strictness": strictness,
|
|
618
|
+
"yaml_content": yaml_content,
|
|
619
|
+
"json_content": json_content,
|
|
620
|
+
}
|