truthound-dashboard 1.4.4__py3-none-any.whl → 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. truthound_dashboard/api/alerts.py +75 -86
  2. truthound_dashboard/api/anomaly.py +7 -13
  3. truthound_dashboard/api/cross_alerts.py +38 -52
  4. truthound_dashboard/api/drift.py +49 -59
  5. truthound_dashboard/api/drift_monitor.py +234 -79
  6. truthound_dashboard/api/enterprise_sampling.py +498 -0
  7. truthound_dashboard/api/history.py +57 -5
  8. truthound_dashboard/api/lineage.py +3 -48
  9. truthound_dashboard/api/maintenance.py +104 -49
  10. truthound_dashboard/api/mask.py +1 -2
  11. truthound_dashboard/api/middleware.py +2 -1
  12. truthound_dashboard/api/model_monitoring.py +435 -311
  13. truthound_dashboard/api/notifications.py +227 -191
  14. truthound_dashboard/api/notifications_advanced.py +21 -20
  15. truthound_dashboard/api/observability.py +586 -0
  16. truthound_dashboard/api/plugins.py +2 -433
  17. truthound_dashboard/api/profile.py +199 -37
  18. truthound_dashboard/api/quality_reporter.py +701 -0
  19. truthound_dashboard/api/reports.py +7 -16
  20. truthound_dashboard/api/router.py +66 -0
  21. truthound_dashboard/api/rule_suggestions.py +5 -5
  22. truthound_dashboard/api/scan.py +17 -19
  23. truthound_dashboard/api/schedules.py +85 -50
  24. truthound_dashboard/api/schema_evolution.py +6 -6
  25. truthound_dashboard/api/schema_watcher.py +667 -0
  26. truthound_dashboard/api/sources.py +98 -27
  27. truthound_dashboard/api/tiering.py +1323 -0
  28. truthound_dashboard/api/triggers.py +14 -11
  29. truthound_dashboard/api/validations.py +12 -11
  30. truthound_dashboard/api/versioning.py +1 -6
  31. truthound_dashboard/core/__init__.py +129 -3
  32. truthound_dashboard/core/actions/__init__.py +62 -0
  33. truthound_dashboard/core/actions/custom.py +426 -0
  34. truthound_dashboard/core/actions/notifications.py +910 -0
  35. truthound_dashboard/core/actions/storage.py +472 -0
  36. truthound_dashboard/core/actions/webhook.py +281 -0
  37. truthound_dashboard/core/anomaly.py +262 -67
  38. truthound_dashboard/core/anomaly_explainer.py +4 -3
  39. truthound_dashboard/core/backends/__init__.py +67 -0
  40. truthound_dashboard/core/backends/base.py +299 -0
  41. truthound_dashboard/core/backends/errors.py +191 -0
  42. truthound_dashboard/core/backends/factory.py +423 -0
  43. truthound_dashboard/core/backends/mock_backend.py +451 -0
  44. truthound_dashboard/core/backends/truthound_backend.py +718 -0
  45. truthound_dashboard/core/checkpoint/__init__.py +87 -0
  46. truthound_dashboard/core/checkpoint/adapters.py +814 -0
  47. truthound_dashboard/core/checkpoint/checkpoint.py +491 -0
  48. truthound_dashboard/core/checkpoint/runner.py +270 -0
  49. truthound_dashboard/core/connections.py +645 -23
  50. truthound_dashboard/core/converters/__init__.py +14 -0
  51. truthound_dashboard/core/converters/truthound.py +620 -0
  52. truthound_dashboard/core/cross_alerts.py +540 -320
  53. truthound_dashboard/core/datasource_factory.py +1672 -0
  54. truthound_dashboard/core/drift_monitor.py +216 -20
  55. truthound_dashboard/core/enterprise_sampling.py +1291 -0
  56. truthound_dashboard/core/interfaces/__init__.py +225 -0
  57. truthound_dashboard/core/interfaces/actions.py +652 -0
  58. truthound_dashboard/core/interfaces/base.py +247 -0
  59. truthound_dashboard/core/interfaces/checkpoint.py +676 -0
  60. truthound_dashboard/core/interfaces/protocols.py +664 -0
  61. truthound_dashboard/core/interfaces/reporters.py +650 -0
  62. truthound_dashboard/core/interfaces/routing.py +646 -0
  63. truthound_dashboard/core/interfaces/triggers.py +619 -0
  64. truthound_dashboard/core/lineage.py +407 -71
  65. truthound_dashboard/core/model_monitoring.py +431 -3
  66. truthound_dashboard/core/notifications/base.py +4 -0
  67. truthound_dashboard/core/notifications/channels.py +501 -1203
  68. truthound_dashboard/core/notifications/deduplication/__init__.py +81 -115
  69. truthound_dashboard/core/notifications/deduplication/service.py +131 -348
  70. truthound_dashboard/core/notifications/dispatcher.py +202 -11
  71. truthound_dashboard/core/notifications/escalation/__init__.py +119 -106
  72. truthound_dashboard/core/notifications/escalation/engine.py +168 -358
  73. truthound_dashboard/core/notifications/routing/__init__.py +88 -128
  74. truthound_dashboard/core/notifications/routing/engine.py +90 -317
  75. truthound_dashboard/core/notifications/stats_aggregator.py +246 -1
  76. truthound_dashboard/core/notifications/throttling/__init__.py +67 -50
  77. truthound_dashboard/core/notifications/throttling/builder.py +117 -255
  78. truthound_dashboard/core/notifications/truthound_adapter.py +842 -0
  79. truthound_dashboard/core/phase5/collaboration.py +1 -1
  80. truthound_dashboard/core/plugins/lifecycle/__init__.py +0 -13
  81. truthound_dashboard/core/quality_reporter.py +1359 -0
  82. truthound_dashboard/core/report_history.py +0 -6
  83. truthound_dashboard/core/reporters/__init__.py +175 -14
  84. truthound_dashboard/core/reporters/adapters.py +943 -0
  85. truthound_dashboard/core/reporters/base.py +0 -3
  86. truthound_dashboard/core/reporters/builtin/__init__.py +18 -0
  87. truthound_dashboard/core/reporters/builtin/csv_reporter.py +111 -0
  88. truthound_dashboard/core/reporters/builtin/html_reporter.py +270 -0
  89. truthound_dashboard/core/reporters/builtin/json_reporter.py +127 -0
  90. truthound_dashboard/core/reporters/compat.py +266 -0
  91. truthound_dashboard/core/reporters/csv_reporter.py +2 -35
  92. truthound_dashboard/core/reporters/factory.py +526 -0
  93. truthound_dashboard/core/reporters/interfaces.py +745 -0
  94. truthound_dashboard/core/reporters/registry.py +1 -10
  95. truthound_dashboard/core/scheduler.py +165 -0
  96. truthound_dashboard/core/schema_evolution.py +3 -3
  97. truthound_dashboard/core/schema_watcher.py +1528 -0
  98. truthound_dashboard/core/services.py +595 -76
  99. truthound_dashboard/core/store_manager.py +810 -0
  100. truthound_dashboard/core/streaming_anomaly.py +169 -4
  101. truthound_dashboard/core/tiering.py +1309 -0
  102. truthound_dashboard/core/triggers/evaluators.py +178 -8
  103. truthound_dashboard/core/truthound_adapter.py +2620 -197
  104. truthound_dashboard/core/unified_alerts.py +23 -20
  105. truthound_dashboard/db/__init__.py +8 -0
  106. truthound_dashboard/db/database.py +8 -2
  107. truthound_dashboard/db/models.py +944 -25
  108. truthound_dashboard/db/repository.py +2 -0
  109. truthound_dashboard/main.py +15 -0
  110. truthound_dashboard/schemas/__init__.py +177 -16
  111. truthound_dashboard/schemas/base.py +44 -23
  112. truthound_dashboard/schemas/collaboration.py +19 -6
  113. truthound_dashboard/schemas/cross_alerts.py +19 -3
  114. truthound_dashboard/schemas/drift.py +61 -55
  115. truthound_dashboard/schemas/drift_monitor.py +67 -23
  116. truthound_dashboard/schemas/enterprise_sampling.py +653 -0
  117. truthound_dashboard/schemas/lineage.py +0 -33
  118. truthound_dashboard/schemas/mask.py +10 -8
  119. truthound_dashboard/schemas/model_monitoring.py +89 -10
  120. truthound_dashboard/schemas/notifications_advanced.py +13 -0
  121. truthound_dashboard/schemas/observability.py +453 -0
  122. truthound_dashboard/schemas/plugins.py +0 -280
  123. truthound_dashboard/schemas/profile.py +154 -247
  124. truthound_dashboard/schemas/quality_reporter.py +403 -0
  125. truthound_dashboard/schemas/reports.py +2 -2
  126. truthound_dashboard/schemas/rule_suggestion.py +8 -1
  127. truthound_dashboard/schemas/scan.py +4 -24
  128. truthound_dashboard/schemas/schedule.py +11 -3
  129. truthound_dashboard/schemas/schema_watcher.py +727 -0
  130. truthound_dashboard/schemas/source.py +17 -2
  131. truthound_dashboard/schemas/tiering.py +822 -0
  132. truthound_dashboard/schemas/triggers.py +16 -0
  133. truthound_dashboard/schemas/unified_alerts.py +7 -0
  134. truthound_dashboard/schemas/validation.py +0 -13
  135. truthound_dashboard/schemas/validators/base.py +41 -21
  136. truthound_dashboard/schemas/validators/business_rule_validators.py +244 -0
  137. truthound_dashboard/schemas/validators/localization_validators.py +273 -0
  138. truthound_dashboard/schemas/validators/ml_feature_validators.py +308 -0
  139. truthound_dashboard/schemas/validators/profiling_validators.py +275 -0
  140. truthound_dashboard/schemas/validators/referential_validators.py +312 -0
  141. truthound_dashboard/schemas/validators/registry.py +93 -8
  142. truthound_dashboard/schemas/validators/timeseries_validators.py +389 -0
  143. truthound_dashboard/schemas/versioning.py +1 -6
  144. truthound_dashboard/static/index.html +2 -2
  145. truthound_dashboard-1.5.1.dist-info/METADATA +312 -0
  146. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/RECORD +149 -148
  147. truthound_dashboard/core/plugins/hooks/__init__.py +0 -63
  148. truthound_dashboard/core/plugins/hooks/decorators.py +0 -367
  149. truthound_dashboard/core/plugins/hooks/manager.py +0 -403
  150. truthound_dashboard/core/plugins/hooks/protocols.py +0 -265
  151. truthound_dashboard/core/plugins/lifecycle/hot_reload.py +0 -584
  152. truthound_dashboard/core/reporters/junit_reporter.py +0 -233
  153. truthound_dashboard/core/reporters/markdown_reporter.py +0 -207
  154. truthound_dashboard/core/reporters/pdf_reporter.py +0 -209
  155. truthound_dashboard/static/assets/_baseUniq-BcrSP13d.js +0 -1
  156. truthound_dashboard/static/assets/arc-DlYjKwIL.js +0 -1
  157. truthound_dashboard/static/assets/architectureDiagram-VXUJARFQ-Bb2drbQM.js +0 -36
  158. truthound_dashboard/static/assets/blockDiagram-VD42YOAC-BlsPG1CH.js +0 -122
  159. truthound_dashboard/static/assets/c4Diagram-YG6GDRKO-B9JdUoaC.js +0 -10
  160. truthound_dashboard/static/assets/channel-Q6mHF1Hd.js +0 -1
  161. truthound_dashboard/static/assets/chunk-4BX2VUAB-DmyoPVuJ.js +0 -1
  162. truthound_dashboard/static/assets/chunk-55IACEB6-Bcz6Siv8.js +0 -1
  163. truthound_dashboard/static/assets/chunk-B4BG7PRW-Br3G5Rum.js +0 -165
  164. truthound_dashboard/static/assets/chunk-DI55MBZ5-DuM9c23u.js +0 -220
  165. truthound_dashboard/static/assets/chunk-FMBD7UC4-DNU-5mvT.js +0 -15
  166. truthound_dashboard/static/assets/chunk-QN33PNHL-Im2yNcmS.js +0 -1
  167. truthound_dashboard/static/assets/chunk-QZHKN3VN-kZr8XFm1.js +0 -1
  168. truthound_dashboard/static/assets/chunk-TZMSLE5B-Q__360q_.js +0 -1
  169. truthound_dashboard/static/assets/classDiagram-2ON5EDUG-vtixxUyK.js +0 -1
  170. truthound_dashboard/static/assets/classDiagram-v2-WZHVMYZB-vtixxUyK.js +0 -1
  171. truthound_dashboard/static/assets/clone-BOt2LwD0.js +0 -1
  172. truthound_dashboard/static/assets/cose-bilkent-S5V4N54A-CBDw6iac.js +0 -1
  173. truthound_dashboard/static/assets/dagre-6UL2VRFP-XdKqmmY9.js +0 -4
  174. truthound_dashboard/static/assets/diagram-PSM6KHXK-DAZ8nx9V.js +0 -24
  175. truthound_dashboard/static/assets/diagram-QEK2KX5R-BRvDTbGD.js +0 -43
  176. truthound_dashboard/static/assets/diagram-S2PKOQOG-bQcczUkl.js +0 -24
  177. truthound_dashboard/static/assets/erDiagram-Q2GNP2WA-DPje7VMN.js +0 -60
  178. truthound_dashboard/static/assets/flowDiagram-NV44I4VS-B7BVtFVS.js +0 -162
  179. truthound_dashboard/static/assets/ganttDiagram-JELNMOA3-D6WKSS7U.js +0 -267
  180. truthound_dashboard/static/assets/gitGraphDiagram-NY62KEGX-D3vtVd3y.js +0 -65
  181. truthound_dashboard/static/assets/graph-BKgNKZVp.js +0 -1
  182. truthound_dashboard/static/assets/index-C6JSrkHo.css +0 -1
  183. truthound_dashboard/static/assets/index-DkU82VsU.js +0 -1800
  184. truthound_dashboard/static/assets/infoDiagram-WHAUD3N6-DnNCT429.js +0 -2
  185. truthound_dashboard/static/assets/journeyDiagram-XKPGCS4Q-DGiMozqS.js +0 -139
  186. truthound_dashboard/static/assets/kanban-definition-3W4ZIXB7-BV2gUgli.js +0 -89
  187. truthound_dashboard/static/assets/katex-Cu_Erd72.js +0 -261
  188. truthound_dashboard/static/assets/layout-DI2MfQ5G.js +0 -1
  189. truthound_dashboard/static/assets/min-DYdgXVcT.js +0 -1
  190. truthound_dashboard/static/assets/mindmap-definition-VGOIOE7T-C7x4ruxz.js +0 -68
  191. truthound_dashboard/static/assets/pieDiagram-ADFJNKIX-CAJaAB9f.js +0 -30
  192. truthound_dashboard/static/assets/quadrantDiagram-AYHSOK5B-DeqwDI46.js +0 -7
  193. truthound_dashboard/static/assets/requirementDiagram-UZGBJVZJ-e3XDpZIM.js +0 -64
  194. truthound_dashboard/static/assets/sankeyDiagram-TZEHDZUN-CNnAv5Ux.js +0 -10
  195. truthound_dashboard/static/assets/sequenceDiagram-WL72ISMW-Dsne-Of3.js +0 -145
  196. truthound_dashboard/static/assets/stateDiagram-FKZM4ZOC-Ee0sQXyb.js +0 -1
  197. truthound_dashboard/static/assets/stateDiagram-v2-4FDKWEC3-B26KqW_W.js +0 -1
  198. truthound_dashboard/static/assets/timeline-definition-IT6M3QCI-DZYi2yl3.js +0 -61
  199. truthound_dashboard/static/assets/treemap-KMMF4GRG-CY3f8In2.js +0 -128
  200. truthound_dashboard/static/assets/unmerged_dictionaries-Dd7xcPWG.js +0 -1
  201. truthound_dashboard/static/assets/xychartDiagram-PRI3JC2R-CS7fydZZ.js +0 -7
  202. truthound_dashboard-1.4.4.dist-info/METADATA +0 -507
  203. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/WHEEL +0 -0
  204. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/entry_points.txt +0 -0
  205. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,14 @@
1
+ """Result converters for data quality backends.
2
+
3
+ This module provides converters that transform backend-specific result
4
+ objects into dashboard-standard result models.
5
+
6
+ The converter pattern isolates backend-specific code and makes it easy
7
+ to support multiple backends or handle API changes.
8
+ """
9
+
10
+ from .truthound import TruthoundResultConverter
11
+
12
+ __all__ = [
13
+ "TruthoundResultConverter",
14
+ ]
@@ -0,0 +1,620 @@
1
+ """Truthound result converters.
2
+
3
+ This module isolates all truthound-specific result object conversions.
4
+ It handles converting truthound's Report, Schema, TableProfile, etc.
5
+ into dashboard-standard result dataclasses.
6
+
7
+ By isolating conversions here, we can:
8
+ - Handle truthound API changes in one place
9
+ - Support multiple truthound versions
10
+ - Provide graceful fallbacks for missing attributes
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import logging
16
+ from datetime import datetime
17
+ from pathlib import Path
18
+ from typing import Any
19
+
20
+ import yaml
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class TruthoundResultConverter:
26
+ """Converter for truthound result objects.
27
+
28
+ This class provides static methods to convert truthound-specific
29
+ objects into dashboard result dataclasses.
30
+
31
+ All conversions use defensive attribute access (getattr with defaults)
32
+ to handle different truthound versions gracefully.
33
+ """
34
+
35
+ @staticmethod
36
+ def convert_severity(severity: Any) -> str:
37
+ """Safely convert severity enum or value to lowercase string.
38
+
39
+ Args:
40
+ severity: Severity value (enum with .value or string).
41
+
42
+ Returns:
43
+ Lowercase severity string.
44
+ """
45
+ if hasattr(severity, "value"):
46
+ return str(severity.value).lower()
47
+ return str(severity).lower()
48
+
49
+ @staticmethod
50
+ def convert_check_result(result: Any) -> dict[str, Any]:
51
+ """Convert truthound Report to CheckResult dict.
52
+
53
+ The truthound Report contains:
54
+ - issues: list[ValidationIssue]
55
+ - source: str
56
+ - row_count: int
57
+ - column_count: int
58
+ - has_issues: bool
59
+ - has_critical: bool
60
+ - has_high: bool
61
+
62
+ Args:
63
+ result: Truthound Report object.
64
+
65
+ Returns:
66
+ Dictionary with CheckResult fields.
67
+ """
68
+ issues = getattr(result, "issues", [])
69
+ severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0}
70
+
71
+ converted_issues = []
72
+ for issue in issues:
73
+ severity = TruthoundResultConverter.convert_severity(issue.severity)
74
+ if severity in severity_counts:
75
+ severity_counts[severity] += 1
76
+
77
+ converted_issues.append({
78
+ "column": getattr(issue, "column", ""),
79
+ "issue_type": getattr(issue, "issue_type", "unknown"),
80
+ "count": getattr(issue, "count", 0),
81
+ "severity": severity,
82
+ "details": getattr(issue, "details", None),
83
+ "expected": getattr(issue, "expected", None),
84
+ "actual": getattr(issue, "actual", None),
85
+ "sample_values": getattr(issue, "sample_values", None),
86
+ })
87
+
88
+ return {
89
+ "passed": not getattr(result, "has_issues", len(issues) > 0),
90
+ "has_critical": getattr(result, "has_critical", severity_counts["critical"] > 0),
91
+ "has_high": getattr(result, "has_high", severity_counts["high"] > 0),
92
+ "total_issues": len(issues),
93
+ "critical_issues": severity_counts["critical"],
94
+ "high_issues": severity_counts["high"],
95
+ "medium_issues": severity_counts["medium"],
96
+ "low_issues": severity_counts["low"],
97
+ "source": getattr(result, "source", ""),
98
+ "row_count": getattr(result, "row_count", 0),
99
+ "column_count": getattr(result, "column_count", 0),
100
+ "issues": converted_issues,
101
+ }
102
+
103
+ @staticmethod
104
+ def convert_learn_result(result: Any) -> dict[str, Any]:
105
+ """Convert truthound Schema to LearnResult dict.
106
+
107
+ The truthound Schema contains:
108
+ - columns: dict[str, ColumnSchema]
109
+ - row_count: int | None
110
+ - version: str
111
+ - to_dict(): Convert to dictionary
112
+
113
+ Args:
114
+ result: Truthound Schema object.
115
+
116
+ Returns:
117
+ Dictionary with LearnResult fields.
118
+ """
119
+ schema_dict = result.to_dict() if hasattr(result, "to_dict") else {}
120
+ schema_yaml = yaml.dump(
121
+ schema_dict,
122
+ default_flow_style=False,
123
+ sort_keys=False,
124
+ allow_unicode=True,
125
+ )
126
+
127
+ columns = getattr(result, "columns", {})
128
+ column_list = list(columns.keys()) if isinstance(columns, dict) else []
129
+
130
+ return {
131
+ "schema": schema_dict,
132
+ "schema_yaml": schema_yaml,
133
+ "row_count": getattr(result, "row_count", None),
134
+ "column_count": len(column_list),
135
+ "columns": column_list,
136
+ }
137
+
138
+ @staticmethod
139
+ def convert_profile_result(result: Any) -> dict[str, Any]:
140
+ """Convert truthound TableProfile or ProfileReport to ProfileResult dict.
141
+
142
+ Supports both new TableProfile and legacy ProfileReport formats.
143
+
144
+ Args:
145
+ result: Truthound profile result object.
146
+
147
+ Returns:
148
+ Dictionary with ProfileResult fields.
149
+ """
150
+ # Check if this is the new TableProfile or legacy ProfileReport
151
+ if hasattr(result, "estimated_memory_bytes"):
152
+ return TruthoundResultConverter._convert_table_profile(result)
153
+ else:
154
+ return TruthoundResultConverter._convert_legacy_profile(result)
155
+
156
+ @staticmethod
157
+ def _convert_table_profile(result: Any) -> dict[str, Any]:
158
+ """Convert new truthound TableProfile to ProfileResult dict."""
159
+ columns = []
160
+ for col in getattr(result, "columns", []):
161
+ col_data = TruthoundResultConverter._convert_column_profile(col)
162
+ columns.append(col_data)
163
+
164
+ # Convert correlations
165
+ correlations = None
166
+ raw_correlations = getattr(result, "correlations", None)
167
+ if raw_correlations:
168
+ correlations = [(c[0], c[1], c[2]) for c in raw_correlations]
169
+
170
+ # Get profiled_at as ISO string
171
+ profiled_at = None
172
+ raw_profiled_at = getattr(result, "profiled_at", None)
173
+ if raw_profiled_at:
174
+ profiled_at = (
175
+ raw_profiled_at.isoformat()
176
+ if isinstance(raw_profiled_at, datetime)
177
+ else str(raw_profiled_at)
178
+ )
179
+
180
+ estimated_memory = getattr(result, "estimated_memory_bytes", 0)
181
+
182
+ return {
183
+ "name": getattr(result, "name", ""),
184
+ "source": getattr(result, "source", ""),
185
+ "row_count": getattr(result, "row_count", 0),
186
+ "column_count": getattr(result, "column_count", 0),
187
+ "estimated_memory_bytes": estimated_memory,
188
+ "columns": columns,
189
+ "duplicate_row_count": getattr(result, "duplicate_row_count", 0),
190
+ "duplicate_row_ratio": getattr(result, "duplicate_row_ratio", 0.0),
191
+ "correlations": correlations,
192
+ "profiled_at": profiled_at,
193
+ "profile_duration_ms": getattr(result, "profile_duration_ms", 0.0),
194
+ "size_bytes": estimated_memory,
195
+ }
196
+
197
+ @staticmethod
198
+ def _convert_column_profile(col: Any) -> dict[str, Any]:
199
+ """Convert a single column profile."""
200
+ # Extract distribution stats if present
201
+ distribution = None
202
+ raw_distribution = getattr(col, "distribution", None)
203
+ if raw_distribution:
204
+ distribution = {
205
+ "mean": getattr(raw_distribution, "mean", None),
206
+ "std": getattr(raw_distribution, "std", None),
207
+ "min": getattr(raw_distribution, "min", None),
208
+ "max": getattr(raw_distribution, "max", None),
209
+ "median": getattr(raw_distribution, "median", None),
210
+ "q1": getattr(raw_distribution, "q1", None),
211
+ "q3": getattr(raw_distribution, "q3", None),
212
+ "skewness": getattr(raw_distribution, "skewness", None),
213
+ "kurtosis": getattr(raw_distribution, "kurtosis", None),
214
+ }
215
+
216
+ # Convert top_values
217
+ top_values = None
218
+ raw_top_values = getattr(col, "top_values", None)
219
+ if raw_top_values:
220
+ top_values = [
221
+ {
222
+ "value": str(v.value) if getattr(v, "value", None) is not None else None,
223
+ "count": getattr(v, "count", 0),
224
+ "ratio": getattr(v, "ratio", 0.0),
225
+ }
226
+ for v in raw_top_values
227
+ ]
228
+
229
+ # Convert bottom_values
230
+ bottom_values = None
231
+ raw_bottom_values = getattr(col, "bottom_values", None)
232
+ if raw_bottom_values:
233
+ bottom_values = [
234
+ {
235
+ "value": str(v.value) if getattr(v, "value", None) is not None else None,
236
+ "count": getattr(v, "count", 0),
237
+ "ratio": getattr(v, "ratio", 0.0),
238
+ }
239
+ for v in raw_bottom_values
240
+ ]
241
+
242
+ # Convert detected_patterns
243
+ detected_patterns = None
244
+ raw_patterns = getattr(col, "detected_patterns", None)
245
+ if raw_patterns:
246
+ detected_patterns = [
247
+ {
248
+ "pattern": getattr(p, "pattern", None),
249
+ "regex": getattr(p, "regex", None),
250
+ "match_ratio": getattr(p, "match_ratio", 0.0),
251
+ "sample_matches": list(getattr(p, "sample_matches", [])),
252
+ }
253
+ for p in raw_patterns
254
+ ]
255
+
256
+ # Get inferred type value
257
+ inferred_type = "unknown"
258
+ raw_inferred_type = getattr(col, "inferred_type", None)
259
+ if raw_inferred_type:
260
+ inferred_type = (
261
+ raw_inferred_type.value
262
+ if hasattr(raw_inferred_type, "value")
263
+ else str(raw_inferred_type)
264
+ )
265
+
266
+ # Convert datetime fields
267
+ min_date = None
268
+ max_date = None
269
+ raw_min_date = getattr(col, "min_date", None)
270
+ raw_max_date = getattr(col, "max_date", None)
271
+ if raw_min_date:
272
+ min_date = (
273
+ raw_min_date.isoformat()
274
+ if isinstance(raw_min_date, datetime)
275
+ else str(raw_min_date)
276
+ )
277
+ if raw_max_date:
278
+ max_date = (
279
+ raw_max_date.isoformat()
280
+ if isinstance(raw_max_date, datetime)
281
+ else str(raw_max_date)
282
+ )
283
+
284
+ # Get suggested validators
285
+ suggested_validators = None
286
+ raw_validators = getattr(col, "suggested_validators", None)
287
+ if raw_validators:
288
+ suggested_validators = list(raw_validators)
289
+
290
+ return {
291
+ "name": getattr(col, "name", ""),
292
+ "physical_type": getattr(col, "physical_type", "unknown"),
293
+ "inferred_type": inferred_type,
294
+ "row_count": getattr(col, "row_count", 0),
295
+ "null_count": getattr(col, "null_count", 0),
296
+ "null_ratio": getattr(col, "null_ratio", 0.0),
297
+ "empty_string_count": getattr(col, "empty_string_count", 0),
298
+ "distinct_count": getattr(col, "distinct_count", 0),
299
+ "unique_ratio": getattr(col, "unique_ratio", 0.0),
300
+ "is_unique": getattr(col, "is_unique", False),
301
+ "is_constant": getattr(col, "is_constant", False),
302
+ "distribution": distribution,
303
+ "top_values": top_values,
304
+ "bottom_values": bottom_values,
305
+ "min_length": getattr(col, "min_length", None),
306
+ "max_length": getattr(col, "max_length", None),
307
+ "avg_length": getattr(col, "avg_length", None),
308
+ "detected_patterns": detected_patterns,
309
+ "min_date": min_date,
310
+ "max_date": max_date,
311
+ "date_gaps": getattr(col, "date_gaps", 0),
312
+ "suggested_validators": suggested_validators,
313
+ "profile_duration_ms": getattr(col, "profile_duration_ms", 0.0),
314
+ }
315
+
316
+ @staticmethod
317
+ def _convert_legacy_profile(result: Any) -> dict[str, Any]:
318
+ """Convert legacy truthound ProfileReport to ProfileResult dict."""
319
+ row_count = getattr(result, "row_count", 0)
320
+ columns = []
321
+
322
+ for col in getattr(result, "columns", []):
323
+ if isinstance(col, dict):
324
+ col_data = TruthoundResultConverter._convert_legacy_column(col, row_count)
325
+ else:
326
+ col_data = TruthoundResultConverter._convert_column_profile(col)
327
+ columns.append(col_data)
328
+
329
+ size_bytes = getattr(result, "size_bytes", 0)
330
+
331
+ return {
332
+ "name": getattr(result, "source", ""),
333
+ "source": getattr(result, "source", ""),
334
+ "row_count": row_count,
335
+ "column_count": getattr(result, "column_count", len(columns)),
336
+ "estimated_memory_bytes": size_bytes,
337
+ "columns": columns,
338
+ "duplicate_row_count": 0,
339
+ "duplicate_row_ratio": 0.0,
340
+ "correlations": None,
341
+ "profiled_at": None,
342
+ "profile_duration_ms": 0.0,
343
+ "size_bytes": size_bytes,
344
+ }
345
+
346
+ @staticmethod
347
+ def _convert_legacy_column(col: dict, row_count: int) -> dict[str, Any]:
348
+ """Convert legacy column dict to column profile dict."""
349
+ # Parse null_pct and unique_pct
350
+ null_ratio = 0.0
351
+ unique_ratio = 0.0
352
+
353
+ null_pct = col.get("null_pct")
354
+ if isinstance(null_pct, str):
355
+ null_ratio = float(null_pct.rstrip("%")) / 100.0
356
+ elif isinstance(null_pct, (int, float)):
357
+ null_ratio = float(null_pct)
358
+
359
+ unique_pct = col.get("unique_pct")
360
+ if isinstance(unique_pct, str):
361
+ unique_ratio = float(unique_pct.rstrip("%")) / 100.0
362
+ elif isinstance(unique_pct, (int, float)):
363
+ unique_ratio = float(unique_pct)
364
+
365
+ # Build distribution if numeric stats present
366
+ distribution = None
367
+ if col.get("min") is not None or col.get("mean") is not None:
368
+ distribution = {
369
+ "min": col.get("min"),
370
+ "max": col.get("max"),
371
+ "mean": col.get("mean"),
372
+ "std": col.get("std"),
373
+ }
374
+
375
+ return {
376
+ "name": col.get("name", ""),
377
+ "physical_type": col.get("dtype", "unknown"),
378
+ "inferred_type": col.get("dtype", "unknown"),
379
+ "row_count": row_count,
380
+ "null_count": 0,
381
+ "null_ratio": null_ratio,
382
+ "empty_string_count": 0,
383
+ "distinct_count": 0,
384
+ "unique_ratio": unique_ratio,
385
+ "is_unique": False,
386
+ "is_constant": False,
387
+ "distribution": distribution,
388
+ "top_values": None,
389
+ "bottom_values": None,
390
+ "min_length": None,
391
+ "max_length": None,
392
+ "avg_length": None,
393
+ "detected_patterns": None,
394
+ "min_date": None,
395
+ "max_date": None,
396
+ "date_gaps": 0,
397
+ "suggested_validators": None,
398
+ "profile_duration_ms": 0.0,
399
+ }
400
+
401
+ @staticmethod
402
+ def convert_compare_result(result: Any) -> dict[str, Any]:
403
+ """Convert truthound DriftReport to CompareResult dict.
404
+
405
+ The truthound DriftReport contains:
406
+ - baseline_source: str
407
+ - current_source: str
408
+ - baseline_rows: int
409
+ - current_rows: int
410
+ - columns: list[ColumnDrift]
411
+ - has_drift: bool
412
+ - has_high_drift: bool
413
+ - get_drifted_columns(): list[str]
414
+
415
+ Args:
416
+ result: Truthound DriftReport object.
417
+
418
+ Returns:
419
+ Dictionary with CompareResult fields.
420
+ """
421
+ columns = []
422
+ for col in getattr(result, "columns", []):
423
+ col_result = getattr(col, "result", None)
424
+ if col_result:
425
+ level = getattr(col_result, "level", "none")
426
+ level_str = (
427
+ level.value if hasattr(level, "value") else str(level)
428
+ )
429
+ columns.append({
430
+ "column": getattr(col, "column", ""),
431
+ "dtype": getattr(col, "dtype", "unknown"),
432
+ "drifted": getattr(col_result, "drifted", False),
433
+ "level": level_str,
434
+ "method": getattr(col_result, "method", "unknown"),
435
+ "statistic": getattr(col_result, "statistic", 0.0),
436
+ "p_value": getattr(col_result, "p_value", 1.0),
437
+ "baseline_stats": getattr(col, "baseline_stats", {}),
438
+ "current_stats": getattr(col, "current_stats", {}),
439
+ })
440
+
441
+ # Get drifted columns
442
+ drifted_columns = []
443
+ if hasattr(result, "get_drifted_columns"):
444
+ drifted_columns = result.get_drifted_columns()
445
+ else:
446
+ drifted_columns = [c["column"] for c in columns if c.get("drifted")]
447
+
448
+ return {
449
+ "baseline_source": getattr(result, "baseline_source", ""),
450
+ "current_source": getattr(result, "current_source", ""),
451
+ "baseline_rows": getattr(result, "baseline_rows", 0),
452
+ "current_rows": getattr(result, "current_rows", 0),
453
+ "has_drift": getattr(result, "has_drift", False),
454
+ "has_high_drift": getattr(result, "has_high_drift", False),
455
+ "total_columns": len(columns),
456
+ "drifted_columns": drifted_columns,
457
+ "columns": columns,
458
+ }
459
+
460
+ @staticmethod
461
+ def convert_scan_result(result: Any) -> dict[str, Any]:
462
+ """Convert truthound PIIReport to ScanResult dict.
463
+
464
+ Args:
465
+ result: Truthound PIIReport object.
466
+
467
+ Returns:
468
+ Dictionary with ScanResult fields.
469
+ """
470
+ # Convert findings
471
+ findings = []
472
+ columns_with_pii = set()
473
+ for finding in getattr(result, "findings", []):
474
+ col = getattr(finding, "column", "")
475
+ columns_with_pii.add(col)
476
+ findings.append({
477
+ "column": col,
478
+ "pii_type": getattr(finding, "pii_type", "unknown"),
479
+ "confidence": getattr(finding, "confidence", 0.0),
480
+ "sample_count": getattr(finding, "sample_count", 0),
481
+ "sample_values": getattr(finding, "sample_values", None),
482
+ })
483
+
484
+ # Convert violations
485
+ violations = []
486
+ for violation in getattr(result, "violations", []):
487
+ violations.append({
488
+ "regulation": getattr(violation, "regulation", "unknown"),
489
+ "column": getattr(violation, "column", ""),
490
+ "pii_type": getattr(violation, "pii_type", "unknown"),
491
+ "message": getattr(violation, "message", ""),
492
+ "severity": getattr(violation, "severity", "high"),
493
+ })
494
+
495
+ return {
496
+ "source": getattr(result, "source", ""),
497
+ "row_count": getattr(result, "row_count", 0),
498
+ "column_count": getattr(result, "column_count", 0),
499
+ "total_columns_scanned": getattr(result, "column_count", 0),
500
+ "columns_with_pii": len(columns_with_pii),
501
+ "total_findings": len(findings),
502
+ "has_violations": getattr(result, "has_violations", len(violations) > 0),
503
+ "total_violations": len(violations),
504
+ "findings": findings,
505
+ "violations": violations,
506
+ }
507
+
508
+ @staticmethod
509
+ def convert_mask_result(
510
+ source: Any,
511
+ output: str,
512
+ masked_df: Any,
513
+ strategy: str,
514
+ columns: list[str] | None,
515
+ ) -> dict[str, Any]:
516
+ """Convert truthound mask result to MaskResult dict.
517
+
518
+ Args:
519
+ source: Original data source.
520
+ output: Output file path.
521
+ masked_df: Polars DataFrame with masked data.
522
+ strategy: Masking strategy used.
523
+ columns: Columns that were masked.
524
+
525
+ Returns:
526
+ Dictionary with MaskResult fields.
527
+ """
528
+ # Get column information from the DataFrame
529
+ all_columns = list(masked_df.columns) if hasattr(masked_df, "columns") else []
530
+ row_count = len(masked_df) if hasattr(masked_df, "__len__") else 0
531
+
532
+ # Get source name
533
+ if isinstance(source, str):
534
+ source_name = source
535
+ else:
536
+ source_name = getattr(source, "name", str(type(source).__name__))
537
+
538
+ # Write the masked data to output file
539
+ output_path = Path(output)
540
+ suffix = output_path.suffix.lower()
541
+
542
+ if hasattr(masked_df, "write_csv"):
543
+ if suffix == ".csv":
544
+ masked_df.write_csv(output)
545
+ elif suffix == ".parquet" and hasattr(masked_df, "write_parquet"):
546
+ masked_df.write_parquet(output)
547
+ elif suffix == ".json" and hasattr(masked_df, "write_json"):
548
+ masked_df.write_json(output)
549
+ else:
550
+ # Default to CSV
551
+ masked_df.write_csv(output)
552
+
553
+ return {
554
+ "source": source_name,
555
+ "output_path": str(output_path.absolute()),
556
+ "row_count": row_count,
557
+ "column_count": len(all_columns),
558
+ "columns_masked": columns if columns else [],
559
+ "strategy": strategy,
560
+ "original_columns": all_columns,
561
+ }
562
+
563
+ @staticmethod
564
+ def convert_suite_result(
565
+ suite: Any,
566
+ strictness: str,
567
+ output_format: str = "yaml",
568
+ ) -> dict[str, Any]:
569
+ """Convert truthound ValidationSuite to GenerateSuiteResult dict.
570
+
571
+ Args:
572
+ suite: ValidationSuite from generate_suite().
573
+ strictness: Strictness level used.
574
+ output_format: Requested output format.
575
+
576
+ Returns:
577
+ Dictionary with GenerateSuiteResult fields.
578
+ """
579
+ rules = []
580
+ categories = set()
581
+
582
+ if hasattr(suite, "rules"):
583
+ for rule in suite.rules:
584
+ rule_dict = {
585
+ "name": getattr(rule, "name", ""),
586
+ "validator": getattr(rule, "validator", ""),
587
+ "column": getattr(rule, "column", None),
588
+ "params": getattr(rule, "params", {}),
589
+ "severity": getattr(rule, "severity", "medium"),
590
+ "category": getattr(rule, "category", "unknown"),
591
+ }
592
+ rules.append(rule_dict)
593
+ if rule_dict["category"]:
594
+ categories.add(rule_dict["category"])
595
+
596
+ # Generate YAML content
597
+ yaml_content = ""
598
+ if hasattr(suite, "to_yaml"):
599
+ yaml_content = suite.to_yaml()
600
+ else:
601
+ yaml_content = yaml.dump(
602
+ {"rules": rules},
603
+ default_flow_style=False,
604
+ sort_keys=False,
605
+ allow_unicode=True,
606
+ )
607
+
608
+ # Generate JSON content
609
+ json_content = {"rules": rules}
610
+ if hasattr(suite, "to_dict"):
611
+ json_content = suite.to_dict()
612
+
613
+ return {
614
+ "rules": rules,
615
+ "rule_count": len(rules),
616
+ "categories": sorted(categories),
617
+ "strictness": strictness,
618
+ "yaml_content": yaml_content,
619
+ "json_content": json_content,
620
+ }