truthound-dashboard 1.4.4__py3-none-any.whl → 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. truthound_dashboard/api/alerts.py +75 -86
  2. truthound_dashboard/api/anomaly.py +7 -13
  3. truthound_dashboard/api/cross_alerts.py +38 -52
  4. truthound_dashboard/api/drift.py +49 -59
  5. truthound_dashboard/api/drift_monitor.py +234 -79
  6. truthound_dashboard/api/enterprise_sampling.py +498 -0
  7. truthound_dashboard/api/history.py +57 -5
  8. truthound_dashboard/api/lineage.py +3 -48
  9. truthound_dashboard/api/maintenance.py +104 -49
  10. truthound_dashboard/api/mask.py +1 -2
  11. truthound_dashboard/api/middleware.py +2 -1
  12. truthound_dashboard/api/model_monitoring.py +435 -311
  13. truthound_dashboard/api/notifications.py +227 -191
  14. truthound_dashboard/api/notifications_advanced.py +21 -20
  15. truthound_dashboard/api/observability.py +586 -0
  16. truthound_dashboard/api/plugins.py +2 -433
  17. truthound_dashboard/api/profile.py +199 -37
  18. truthound_dashboard/api/quality_reporter.py +701 -0
  19. truthound_dashboard/api/reports.py +7 -16
  20. truthound_dashboard/api/router.py +66 -0
  21. truthound_dashboard/api/rule_suggestions.py +5 -5
  22. truthound_dashboard/api/scan.py +17 -19
  23. truthound_dashboard/api/schedules.py +85 -50
  24. truthound_dashboard/api/schema_evolution.py +6 -6
  25. truthound_dashboard/api/schema_watcher.py +667 -0
  26. truthound_dashboard/api/sources.py +98 -27
  27. truthound_dashboard/api/tiering.py +1323 -0
  28. truthound_dashboard/api/triggers.py +14 -11
  29. truthound_dashboard/api/validations.py +12 -11
  30. truthound_dashboard/api/versioning.py +1 -6
  31. truthound_dashboard/core/__init__.py +129 -3
  32. truthound_dashboard/core/actions/__init__.py +62 -0
  33. truthound_dashboard/core/actions/custom.py +426 -0
  34. truthound_dashboard/core/actions/notifications.py +910 -0
  35. truthound_dashboard/core/actions/storage.py +472 -0
  36. truthound_dashboard/core/actions/webhook.py +281 -0
  37. truthound_dashboard/core/anomaly.py +262 -67
  38. truthound_dashboard/core/anomaly_explainer.py +4 -3
  39. truthound_dashboard/core/backends/__init__.py +67 -0
  40. truthound_dashboard/core/backends/base.py +299 -0
  41. truthound_dashboard/core/backends/errors.py +191 -0
  42. truthound_dashboard/core/backends/factory.py +423 -0
  43. truthound_dashboard/core/backends/mock_backend.py +451 -0
  44. truthound_dashboard/core/backends/truthound_backend.py +718 -0
  45. truthound_dashboard/core/checkpoint/__init__.py +87 -0
  46. truthound_dashboard/core/checkpoint/adapters.py +814 -0
  47. truthound_dashboard/core/checkpoint/checkpoint.py +491 -0
  48. truthound_dashboard/core/checkpoint/runner.py +270 -0
  49. truthound_dashboard/core/connections.py +645 -23
  50. truthound_dashboard/core/converters/__init__.py +14 -0
  51. truthound_dashboard/core/converters/truthound.py +620 -0
  52. truthound_dashboard/core/cross_alerts.py +540 -320
  53. truthound_dashboard/core/datasource_factory.py +1672 -0
  54. truthound_dashboard/core/drift_monitor.py +216 -20
  55. truthound_dashboard/core/enterprise_sampling.py +1291 -0
  56. truthound_dashboard/core/interfaces/__init__.py +225 -0
  57. truthound_dashboard/core/interfaces/actions.py +652 -0
  58. truthound_dashboard/core/interfaces/base.py +247 -0
  59. truthound_dashboard/core/interfaces/checkpoint.py +676 -0
  60. truthound_dashboard/core/interfaces/protocols.py +664 -0
  61. truthound_dashboard/core/interfaces/reporters.py +650 -0
  62. truthound_dashboard/core/interfaces/routing.py +646 -0
  63. truthound_dashboard/core/interfaces/triggers.py +619 -0
  64. truthound_dashboard/core/lineage.py +407 -71
  65. truthound_dashboard/core/model_monitoring.py +431 -3
  66. truthound_dashboard/core/notifications/base.py +4 -0
  67. truthound_dashboard/core/notifications/channels.py +501 -1203
  68. truthound_dashboard/core/notifications/deduplication/__init__.py +81 -115
  69. truthound_dashboard/core/notifications/deduplication/service.py +131 -348
  70. truthound_dashboard/core/notifications/dispatcher.py +202 -11
  71. truthound_dashboard/core/notifications/escalation/__init__.py +119 -106
  72. truthound_dashboard/core/notifications/escalation/engine.py +168 -358
  73. truthound_dashboard/core/notifications/routing/__init__.py +88 -128
  74. truthound_dashboard/core/notifications/routing/engine.py +90 -317
  75. truthound_dashboard/core/notifications/stats_aggregator.py +246 -1
  76. truthound_dashboard/core/notifications/throttling/__init__.py +67 -50
  77. truthound_dashboard/core/notifications/throttling/builder.py +117 -255
  78. truthound_dashboard/core/notifications/truthound_adapter.py +842 -0
  79. truthound_dashboard/core/phase5/collaboration.py +1 -1
  80. truthound_dashboard/core/plugins/lifecycle/__init__.py +0 -13
  81. truthound_dashboard/core/quality_reporter.py +1359 -0
  82. truthound_dashboard/core/report_history.py +0 -6
  83. truthound_dashboard/core/reporters/__init__.py +175 -14
  84. truthound_dashboard/core/reporters/adapters.py +943 -0
  85. truthound_dashboard/core/reporters/base.py +0 -3
  86. truthound_dashboard/core/reporters/builtin/__init__.py +18 -0
  87. truthound_dashboard/core/reporters/builtin/csv_reporter.py +111 -0
  88. truthound_dashboard/core/reporters/builtin/html_reporter.py +270 -0
  89. truthound_dashboard/core/reporters/builtin/json_reporter.py +127 -0
  90. truthound_dashboard/core/reporters/compat.py +266 -0
  91. truthound_dashboard/core/reporters/csv_reporter.py +2 -35
  92. truthound_dashboard/core/reporters/factory.py +526 -0
  93. truthound_dashboard/core/reporters/interfaces.py +745 -0
  94. truthound_dashboard/core/reporters/registry.py +1 -10
  95. truthound_dashboard/core/scheduler.py +165 -0
  96. truthound_dashboard/core/schema_evolution.py +3 -3
  97. truthound_dashboard/core/schema_watcher.py +1528 -0
  98. truthound_dashboard/core/services.py +595 -76
  99. truthound_dashboard/core/store_manager.py +810 -0
  100. truthound_dashboard/core/streaming_anomaly.py +169 -4
  101. truthound_dashboard/core/tiering.py +1309 -0
  102. truthound_dashboard/core/triggers/evaluators.py +178 -8
  103. truthound_dashboard/core/truthound_adapter.py +2620 -197
  104. truthound_dashboard/core/unified_alerts.py +23 -20
  105. truthound_dashboard/db/__init__.py +8 -0
  106. truthound_dashboard/db/database.py +8 -2
  107. truthound_dashboard/db/models.py +944 -25
  108. truthound_dashboard/db/repository.py +2 -0
  109. truthound_dashboard/main.py +15 -0
  110. truthound_dashboard/schemas/__init__.py +177 -16
  111. truthound_dashboard/schemas/base.py +44 -23
  112. truthound_dashboard/schemas/collaboration.py +19 -6
  113. truthound_dashboard/schemas/cross_alerts.py +19 -3
  114. truthound_dashboard/schemas/drift.py +61 -55
  115. truthound_dashboard/schemas/drift_monitor.py +67 -23
  116. truthound_dashboard/schemas/enterprise_sampling.py +653 -0
  117. truthound_dashboard/schemas/lineage.py +0 -33
  118. truthound_dashboard/schemas/mask.py +10 -8
  119. truthound_dashboard/schemas/model_monitoring.py +89 -10
  120. truthound_dashboard/schemas/notifications_advanced.py +13 -0
  121. truthound_dashboard/schemas/observability.py +453 -0
  122. truthound_dashboard/schemas/plugins.py +0 -280
  123. truthound_dashboard/schemas/profile.py +154 -247
  124. truthound_dashboard/schemas/quality_reporter.py +403 -0
  125. truthound_dashboard/schemas/reports.py +2 -2
  126. truthound_dashboard/schemas/rule_suggestion.py +8 -1
  127. truthound_dashboard/schemas/scan.py +4 -24
  128. truthound_dashboard/schemas/schedule.py +11 -3
  129. truthound_dashboard/schemas/schema_watcher.py +727 -0
  130. truthound_dashboard/schemas/source.py +17 -2
  131. truthound_dashboard/schemas/tiering.py +822 -0
  132. truthound_dashboard/schemas/triggers.py +16 -0
  133. truthound_dashboard/schemas/unified_alerts.py +7 -0
  134. truthound_dashboard/schemas/validation.py +0 -13
  135. truthound_dashboard/schemas/validators/base.py +41 -21
  136. truthound_dashboard/schemas/validators/business_rule_validators.py +244 -0
  137. truthound_dashboard/schemas/validators/localization_validators.py +273 -0
  138. truthound_dashboard/schemas/validators/ml_feature_validators.py +308 -0
  139. truthound_dashboard/schemas/validators/profiling_validators.py +275 -0
  140. truthound_dashboard/schemas/validators/referential_validators.py +312 -0
  141. truthound_dashboard/schemas/validators/registry.py +93 -8
  142. truthound_dashboard/schemas/validators/timeseries_validators.py +389 -0
  143. truthound_dashboard/schemas/versioning.py +1 -6
  144. truthound_dashboard/static/index.html +2 -2
  145. truthound_dashboard-1.5.1.dist-info/METADATA +312 -0
  146. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/RECORD +149 -148
  147. truthound_dashboard/core/plugins/hooks/__init__.py +0 -63
  148. truthound_dashboard/core/plugins/hooks/decorators.py +0 -367
  149. truthound_dashboard/core/plugins/hooks/manager.py +0 -403
  150. truthound_dashboard/core/plugins/hooks/protocols.py +0 -265
  151. truthound_dashboard/core/plugins/lifecycle/hot_reload.py +0 -584
  152. truthound_dashboard/core/reporters/junit_reporter.py +0 -233
  153. truthound_dashboard/core/reporters/markdown_reporter.py +0 -207
  154. truthound_dashboard/core/reporters/pdf_reporter.py +0 -209
  155. truthound_dashboard/static/assets/_baseUniq-BcrSP13d.js +0 -1
  156. truthound_dashboard/static/assets/arc-DlYjKwIL.js +0 -1
  157. truthound_dashboard/static/assets/architectureDiagram-VXUJARFQ-Bb2drbQM.js +0 -36
  158. truthound_dashboard/static/assets/blockDiagram-VD42YOAC-BlsPG1CH.js +0 -122
  159. truthound_dashboard/static/assets/c4Diagram-YG6GDRKO-B9JdUoaC.js +0 -10
  160. truthound_dashboard/static/assets/channel-Q6mHF1Hd.js +0 -1
  161. truthound_dashboard/static/assets/chunk-4BX2VUAB-DmyoPVuJ.js +0 -1
  162. truthound_dashboard/static/assets/chunk-55IACEB6-Bcz6Siv8.js +0 -1
  163. truthound_dashboard/static/assets/chunk-B4BG7PRW-Br3G5Rum.js +0 -165
  164. truthound_dashboard/static/assets/chunk-DI55MBZ5-DuM9c23u.js +0 -220
  165. truthound_dashboard/static/assets/chunk-FMBD7UC4-DNU-5mvT.js +0 -15
  166. truthound_dashboard/static/assets/chunk-QN33PNHL-Im2yNcmS.js +0 -1
  167. truthound_dashboard/static/assets/chunk-QZHKN3VN-kZr8XFm1.js +0 -1
  168. truthound_dashboard/static/assets/chunk-TZMSLE5B-Q__360q_.js +0 -1
  169. truthound_dashboard/static/assets/classDiagram-2ON5EDUG-vtixxUyK.js +0 -1
  170. truthound_dashboard/static/assets/classDiagram-v2-WZHVMYZB-vtixxUyK.js +0 -1
  171. truthound_dashboard/static/assets/clone-BOt2LwD0.js +0 -1
  172. truthound_dashboard/static/assets/cose-bilkent-S5V4N54A-CBDw6iac.js +0 -1
  173. truthound_dashboard/static/assets/dagre-6UL2VRFP-XdKqmmY9.js +0 -4
  174. truthound_dashboard/static/assets/diagram-PSM6KHXK-DAZ8nx9V.js +0 -24
  175. truthound_dashboard/static/assets/diagram-QEK2KX5R-BRvDTbGD.js +0 -43
  176. truthound_dashboard/static/assets/diagram-S2PKOQOG-bQcczUkl.js +0 -24
  177. truthound_dashboard/static/assets/erDiagram-Q2GNP2WA-DPje7VMN.js +0 -60
  178. truthound_dashboard/static/assets/flowDiagram-NV44I4VS-B7BVtFVS.js +0 -162
  179. truthound_dashboard/static/assets/ganttDiagram-JELNMOA3-D6WKSS7U.js +0 -267
  180. truthound_dashboard/static/assets/gitGraphDiagram-NY62KEGX-D3vtVd3y.js +0 -65
  181. truthound_dashboard/static/assets/graph-BKgNKZVp.js +0 -1
  182. truthound_dashboard/static/assets/index-C6JSrkHo.css +0 -1
  183. truthound_dashboard/static/assets/index-DkU82VsU.js +0 -1800
  184. truthound_dashboard/static/assets/infoDiagram-WHAUD3N6-DnNCT429.js +0 -2
  185. truthound_dashboard/static/assets/journeyDiagram-XKPGCS4Q-DGiMozqS.js +0 -139
  186. truthound_dashboard/static/assets/kanban-definition-3W4ZIXB7-BV2gUgli.js +0 -89
  187. truthound_dashboard/static/assets/katex-Cu_Erd72.js +0 -261
  188. truthound_dashboard/static/assets/layout-DI2MfQ5G.js +0 -1
  189. truthound_dashboard/static/assets/min-DYdgXVcT.js +0 -1
  190. truthound_dashboard/static/assets/mindmap-definition-VGOIOE7T-C7x4ruxz.js +0 -68
  191. truthound_dashboard/static/assets/pieDiagram-ADFJNKIX-CAJaAB9f.js +0 -30
  192. truthound_dashboard/static/assets/quadrantDiagram-AYHSOK5B-DeqwDI46.js +0 -7
  193. truthound_dashboard/static/assets/requirementDiagram-UZGBJVZJ-e3XDpZIM.js +0 -64
  194. truthound_dashboard/static/assets/sankeyDiagram-TZEHDZUN-CNnAv5Ux.js +0 -10
  195. truthound_dashboard/static/assets/sequenceDiagram-WL72ISMW-Dsne-Of3.js +0 -145
  196. truthound_dashboard/static/assets/stateDiagram-FKZM4ZOC-Ee0sQXyb.js +0 -1
  197. truthound_dashboard/static/assets/stateDiagram-v2-4FDKWEC3-B26KqW_W.js +0 -1
  198. truthound_dashboard/static/assets/timeline-definition-IT6M3QCI-DZYi2yl3.js +0 -61
  199. truthound_dashboard/static/assets/treemap-KMMF4GRG-CY3f8In2.js +0 -128
  200. truthound_dashboard/static/assets/unmerged_dictionaries-Dd7xcPWG.js +0 -1
  201. truthound_dashboard/static/assets/xychartDiagram-PRI3JC2R-CS7fydZZ.js +0 -7
  202. truthound_dashboard-1.4.4.dist-info/METADATA +0 -507
  203. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/WHEEL +0 -0
  204. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/entry_points.txt +0 -0
  205. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,275 @@
1
+ """Profiling validators.
2
+
3
+ Validators for data profiling metrics including cardinality, entropy,
4
+ and value frequency distributions.
5
+
6
+ Import path in truthound: `from truthound.validators.profiling import *`
7
+ """
8
+
9
+ from .base import (
10
+ ParameterDefinition,
11
+ ParameterType,
12
+ ValidatorCategory,
13
+ ValidatorDefinition,
14
+ )
15
+
16
+ PROFILING_VALIDATORS: list[ValidatorDefinition] = [
17
+ ValidatorDefinition(
18
+ name="Cardinality",
19
+ display_name="Cardinality Validation",
20
+ category=ValidatorCategory.PROFILING,
21
+ description="Validates that column cardinality (distinct count) falls within expected bounds.",
22
+ parameters=[
23
+ ParameterDefinition(
24
+ name="column",
25
+ label="Column",
26
+ type=ParameterType.COLUMN,
27
+ required=True,
28
+ description="Column to check cardinality",
29
+ ),
30
+ ParameterDefinition(
31
+ name="min_cardinality",
32
+ label="Minimum Cardinality",
33
+ type=ParameterType.INTEGER,
34
+ min_value=0,
35
+ description="Minimum expected distinct values",
36
+ ),
37
+ ParameterDefinition(
38
+ name="max_cardinality",
39
+ label="Maximum Cardinality",
40
+ type=ParameterType.INTEGER,
41
+ min_value=0,
42
+ description="Maximum expected distinct values",
43
+ ),
44
+ ],
45
+ tags=["profiling", "cardinality", "distinct", "count"],
46
+ severity_default="medium",
47
+ ),
48
+ ValidatorDefinition(
49
+ name="UniquenessRatio",
50
+ display_name="Uniqueness Ratio",
51
+ category=ValidatorCategory.PROFILING,
52
+ description="Validates that the ratio of unique values to total values meets expectations.",
53
+ parameters=[
54
+ ParameterDefinition(
55
+ name="column",
56
+ label="Column",
57
+ type=ParameterType.COLUMN,
58
+ required=True,
59
+ description="Column to analyze",
60
+ ),
61
+ ParameterDefinition(
62
+ name="min_ratio",
63
+ label="Minimum Ratio",
64
+ type=ParameterType.FLOAT,
65
+ min_value=0,
66
+ max_value=1,
67
+ description="Minimum uniqueness ratio (0-1)",
68
+ ),
69
+ ParameterDefinition(
70
+ name="max_ratio",
71
+ label="Maximum Ratio",
72
+ type=ParameterType.FLOAT,
73
+ min_value=0,
74
+ max_value=1,
75
+ description="Maximum uniqueness ratio (0-1)",
76
+ ),
77
+ ],
78
+ tags=["profiling", "uniqueness", "ratio", "distinct"],
79
+ severity_default="medium",
80
+ ),
81
+ ValidatorDefinition(
82
+ name="Entropy",
83
+ display_name="Shannon Entropy",
84
+ category=ValidatorCategory.PROFILING,
85
+ description="Validates that column Shannon entropy falls within expected bounds.",
86
+ parameters=[
87
+ ParameterDefinition(
88
+ name="column",
89
+ label="Column",
90
+ type=ParameterType.COLUMN,
91
+ required=True,
92
+ description="Column to calculate entropy",
93
+ ),
94
+ ParameterDefinition(
95
+ name="min_entropy",
96
+ label="Minimum Entropy",
97
+ type=ParameterType.FLOAT,
98
+ min_value=0,
99
+ description="Minimum expected entropy (bits)",
100
+ ),
101
+ ParameterDefinition(
102
+ name="max_entropy",
103
+ label="Maximum Entropy",
104
+ type=ParameterType.FLOAT,
105
+ min_value=0,
106
+ description="Maximum expected entropy (bits)",
107
+ ),
108
+ ],
109
+ tags=["profiling", "entropy", "information", "randomness"],
110
+ severity_default="medium",
111
+ ),
112
+ ValidatorDefinition(
113
+ name="InformationGain",
114
+ display_name="Information Gain",
115
+ category=ValidatorCategory.PROFILING,
116
+ description="Validates information gain between two columns (for feature selection).",
117
+ parameters=[
118
+ ParameterDefinition(
119
+ name="feature_column",
120
+ label="Feature Column",
121
+ type=ParameterType.COLUMN,
122
+ required=True,
123
+ description="Feature column",
124
+ ),
125
+ ParameterDefinition(
126
+ name="target_column",
127
+ label="Target Column",
128
+ type=ParameterType.COLUMN,
129
+ required=True,
130
+ description="Target column",
131
+ ),
132
+ ParameterDefinition(
133
+ name="min_gain",
134
+ label="Minimum Gain",
135
+ type=ParameterType.FLOAT,
136
+ min_value=0,
137
+ description="Minimum required information gain",
138
+ ),
139
+ ],
140
+ tags=["profiling", "information_gain", "feature", "ml"],
141
+ severity_default="low",
142
+ ),
143
+ ValidatorDefinition(
144
+ name="ValueFrequency",
145
+ display_name="Value Frequency Distribution",
146
+ category=ValidatorCategory.PROFILING,
147
+ description="Validates that value frequency distribution meets expectations.",
148
+ parameters=[
149
+ ParameterDefinition(
150
+ name="column",
151
+ label="Column",
152
+ type=ParameterType.COLUMN,
153
+ required=True,
154
+ description="Column to analyze",
155
+ ),
156
+ ParameterDefinition(
157
+ name="expected_distribution",
158
+ label="Expected Distribution",
159
+ type=ParameterType.SELECT,
160
+ options=[
161
+ {"value": "uniform", "label": "Uniform"},
162
+ {"value": "normal", "label": "Normal (Gaussian)"},
163
+ {"value": "exponential", "label": "Exponential"},
164
+ {"value": "power_law", "label": "Power Law (Zipf)"},
165
+ ],
166
+ default="uniform",
167
+ description="Expected frequency distribution type",
168
+ ),
169
+ ParameterDefinition(
170
+ name="tolerance",
171
+ label="Tolerance",
172
+ type=ParameterType.FLOAT,
173
+ default=0.1,
174
+ min_value=0,
175
+ max_value=1,
176
+ description="Acceptable deviation from expected distribution",
177
+ ),
178
+ ],
179
+ tags=["profiling", "frequency", "distribution", "histogram"],
180
+ severity_default="low",
181
+ ),
182
+ ValidatorDefinition(
183
+ name="DistributionShape",
184
+ display_name="Distribution Shape",
185
+ category=ValidatorCategory.PROFILING,
186
+ description="Validates the shape of value distribution (skewness, kurtosis).",
187
+ parameters=[
188
+ ParameterDefinition(
189
+ name="column",
190
+ label="Column",
191
+ type=ParameterType.COLUMN,
192
+ required=True,
193
+ description="Column to analyze",
194
+ ),
195
+ ParameterDefinition(
196
+ name="max_skewness",
197
+ label="Maximum Skewness",
198
+ type=ParameterType.FLOAT,
199
+ description="Maximum acceptable skewness (absolute value)",
200
+ ),
201
+ ParameterDefinition(
202
+ name="max_kurtosis",
203
+ label="Maximum Kurtosis",
204
+ type=ParameterType.FLOAT,
205
+ description="Maximum acceptable excess kurtosis",
206
+ ),
207
+ ],
208
+ tags=["profiling", "distribution", "skewness", "kurtosis"],
209
+ severity_default="low",
210
+ ),
211
+ ValidatorDefinition(
212
+ name="MostCommonValue",
213
+ display_name="Most Common Value Check",
214
+ category=ValidatorCategory.PROFILING,
215
+ description="Validates the most common value and its frequency.",
216
+ parameters=[
217
+ ParameterDefinition(
218
+ name="column",
219
+ label="Column",
220
+ type=ParameterType.COLUMN,
221
+ required=True,
222
+ description="Column to analyze",
223
+ ),
224
+ ParameterDefinition(
225
+ name="expected_value",
226
+ label="Expected Most Common Value",
227
+ type=ParameterType.STRING,
228
+ description="Expected most common value (optional)",
229
+ ),
230
+ ParameterDefinition(
231
+ name="max_frequency_ratio",
232
+ label="Maximum Frequency Ratio",
233
+ type=ParameterType.FLOAT,
234
+ min_value=0,
235
+ max_value=1,
236
+ description="Maximum ratio of most common value (detect imbalance)",
237
+ ),
238
+ ],
239
+ tags=["profiling", "mode", "common", "frequency"],
240
+ severity_default="low",
241
+ ),
242
+ ValidatorDefinition(
243
+ name="DataTypeConsistency",
244
+ display_name="Data Type Consistency",
245
+ category=ValidatorCategory.PROFILING,
246
+ description="Validates that inferred data types are consistent with expected types.",
247
+ parameters=[
248
+ ParameterDefinition(
249
+ name="column",
250
+ label="Column",
251
+ type=ParameterType.COLUMN,
252
+ required=True,
253
+ description="Column to validate",
254
+ ),
255
+ ParameterDefinition(
256
+ name="expected_type",
257
+ label="Expected Data Type",
258
+ type=ParameterType.SELECT,
259
+ options=[
260
+ {"value": "integer", "label": "Integer"},
261
+ {"value": "float", "label": "Float"},
262
+ {"value": "string", "label": "String"},
263
+ {"value": "boolean", "label": "Boolean"},
264
+ {"value": "date", "label": "Date"},
265
+ {"value": "datetime", "label": "DateTime"},
266
+ {"value": "categorical", "label": "Categorical"},
267
+ ],
268
+ required=True,
269
+ description="Expected inferred data type",
270
+ ),
271
+ ],
272
+ tags=["profiling", "type", "consistency", "inference"],
273
+ severity_default="medium",
274
+ ),
275
+ ]
@@ -0,0 +1,312 @@
1
+ """Referential validators.
2
+
3
+ Validators for foreign key relationships, orphan detection, and hierarchy integrity.
4
+
5
+ Import path in truthound: `from truthound.validators.referential import *`
6
+ """
7
+
8
+ from .base import (
9
+ ParameterDefinition,
10
+ ParameterType,
11
+ ValidatorCategory,
12
+ ValidatorDefinition,
13
+ )
14
+
15
+ REFERENTIAL_VALIDATORS: list[ValidatorDefinition] = [
16
+ ValidatorDefinition(
17
+ name="ForeignKey",
18
+ display_name="Foreign Key",
19
+ category=ValidatorCategory.REFERENTIAL,
20
+ description="Validates that all values in a column exist in a reference table's column.",
21
+ parameters=[
22
+ ParameterDefinition(
23
+ name="column",
24
+ label="Foreign Key Column",
25
+ type=ParameterType.COLUMN,
26
+ required=True,
27
+ description="Column containing foreign key values",
28
+ ),
29
+ ParameterDefinition(
30
+ name="reference_source",
31
+ label="Reference Data Source",
32
+ type=ParameterType.SOURCE_REF,
33
+ required=True,
34
+ description="Reference data source containing the primary key",
35
+ ),
36
+ ParameterDefinition(
37
+ name="reference_column",
38
+ label="Reference Column",
39
+ type=ParameterType.STRING,
40
+ required=True,
41
+ description="Primary key column in the reference data",
42
+ ),
43
+ ],
44
+ tags=["referential", "foreign_key", "fk", "relationship"],
45
+ severity_default="high",
46
+ ),
47
+ ValidatorDefinition(
48
+ name="CompositeForeignKey",
49
+ display_name="Composite Foreign Key",
50
+ category=ValidatorCategory.REFERENTIAL,
51
+ description="Validates composite foreign key relationships across multiple columns.",
52
+ parameters=[
53
+ ParameterDefinition(
54
+ name="columns",
55
+ label="Foreign Key Columns",
56
+ type=ParameterType.COLUMN_LIST,
57
+ required=True,
58
+ description="Columns forming the composite foreign key",
59
+ ),
60
+ ParameterDefinition(
61
+ name="reference_source",
62
+ label="Reference Data Source",
63
+ type=ParameterType.SOURCE_REF,
64
+ required=True,
65
+ description="Reference data source containing the composite primary key",
66
+ ),
67
+ ParameterDefinition(
68
+ name="reference_columns",
69
+ label="Reference Columns",
70
+ type=ParameterType.STRING_LIST,
71
+ required=True,
72
+ description="Columns forming the composite primary key in reference",
73
+ ),
74
+ ],
75
+ tags=["referential", "composite_key", "foreign_key", "relationship"],
76
+ severity_default="high",
77
+ ),
78
+ ValidatorDefinition(
79
+ name="SelfReferentialFK",
80
+ display_name="Self-Referential Foreign Key",
81
+ category=ValidatorCategory.REFERENTIAL,
82
+ description="Validates self-referential foreign keys (e.g., parent_id references id in same table).",
83
+ parameters=[
84
+ ParameterDefinition(
85
+ name="column",
86
+ label="Foreign Key Column",
87
+ type=ParameterType.COLUMN,
88
+ required=True,
89
+ description="Column containing self-referential foreign key (e.g., parent_id)",
90
+ ),
91
+ ParameterDefinition(
92
+ name="reference_column",
93
+ label="Primary Key Column",
94
+ type=ParameterType.COLUMN,
95
+ required=True,
96
+ description="Primary key column being referenced (e.g., id)",
97
+ ),
98
+ ParameterDefinition(
99
+ name="allow_null",
100
+ label="Allow Null Values",
101
+ type=ParameterType.BOOLEAN,
102
+ default=True,
103
+ description="Whether null values are allowed (root nodes)",
104
+ ),
105
+ ],
106
+ tags=["referential", "self_referential", "hierarchy", "tree"],
107
+ severity_default="high",
108
+ ),
109
+ ValidatorDefinition(
110
+ name="OrphanRecord",
111
+ display_name="Orphan Record Detection",
112
+ category=ValidatorCategory.REFERENTIAL,
113
+ description="Detects orphan records that reference non-existent parent records.",
114
+ parameters=[
115
+ ParameterDefinition(
116
+ name="column",
117
+ label="Foreign Key Column",
118
+ type=ParameterType.COLUMN,
119
+ required=True,
120
+ description="Column containing foreign key values",
121
+ ),
122
+ ParameterDefinition(
123
+ name="reference_source",
124
+ label="Reference Data Source",
125
+ type=ParameterType.SOURCE_REF,
126
+ required=True,
127
+ description="Parent data source",
128
+ ),
129
+ ParameterDefinition(
130
+ name="reference_column",
131
+ label="Reference Column",
132
+ type=ParameterType.STRING,
133
+ required=True,
134
+ description="Primary key column in the parent data",
135
+ ),
136
+ ],
137
+ tags=["referential", "orphan", "integrity", "dangling"],
138
+ severity_default="high",
139
+ ),
140
+ ValidatorDefinition(
141
+ name="DanglingReference",
142
+ display_name="Dangling Reference Detection",
143
+ category=ValidatorCategory.REFERENTIAL,
144
+ description="Detects references pointing to deleted or non-existent records.",
145
+ parameters=[
146
+ ParameterDefinition(
147
+ name="column",
148
+ label="Reference Column",
149
+ type=ParameterType.COLUMN,
150
+ required=True,
151
+ description="Column containing reference values",
152
+ ),
153
+ ParameterDefinition(
154
+ name="reference_source",
155
+ label="Reference Data Source",
156
+ type=ParameterType.SOURCE_REF,
157
+ required=True,
158
+ description="Target data source",
159
+ ),
160
+ ParameterDefinition(
161
+ name="reference_column",
162
+ label="Target Column",
163
+ type=ParameterType.STRING,
164
+ required=True,
165
+ description="Column in target data being referenced",
166
+ ),
167
+ ],
168
+ tags=["referential", "dangling", "broken_link", "integrity"],
169
+ severity_default="high",
170
+ ),
171
+ ValidatorDefinition(
172
+ name="CircularReference",
173
+ display_name="Circular Reference Detection",
174
+ category=ValidatorCategory.REFERENTIAL,
175
+ description="Detects circular references in self-referential data (e.g., A→B→C→A).",
176
+ parameters=[
177
+ ParameterDefinition(
178
+ name="id_column",
179
+ label="ID Column",
180
+ type=ParameterType.COLUMN,
181
+ required=True,
182
+ description="Primary key column (e.g., id)",
183
+ ),
184
+ ParameterDefinition(
185
+ name="parent_column",
186
+ label="Parent Column",
187
+ type=ParameterType.COLUMN,
188
+ required=True,
189
+ description="Column referencing parent (e.g., parent_id)",
190
+ ),
191
+ ParameterDefinition(
192
+ name="max_depth",
193
+ label="Maximum Detection Depth",
194
+ type=ParameterType.INTEGER,
195
+ default=100,
196
+ min_value=1,
197
+ max_value=1000,
198
+ description="Maximum depth to search for cycles",
199
+ ),
200
+ ],
201
+ tags=["referential", "circular", "cycle", "hierarchy"],
202
+ severity_default="critical",
203
+ ),
204
+ ValidatorDefinition(
205
+ name="HierarchyDepth",
206
+ display_name="Hierarchy Depth Validation",
207
+ category=ValidatorCategory.REFERENTIAL,
208
+ description="Validates that hierarchy depth doesn't exceed a maximum limit.",
209
+ parameters=[
210
+ ParameterDefinition(
211
+ name="id_column",
212
+ label="ID Column",
213
+ type=ParameterType.COLUMN,
214
+ required=True,
215
+ description="Primary key column",
216
+ ),
217
+ ParameterDefinition(
218
+ name="parent_column",
219
+ label="Parent Column",
220
+ type=ParameterType.COLUMN,
221
+ required=True,
222
+ description="Column referencing parent",
223
+ ),
224
+ ParameterDefinition(
225
+ name="max_depth",
226
+ label="Maximum Allowed Depth",
227
+ type=ParameterType.INTEGER,
228
+ required=True,
229
+ min_value=1,
230
+ max_value=100,
231
+ description="Maximum allowed hierarchy depth",
232
+ ),
233
+ ],
234
+ tags=["referential", "hierarchy", "depth", "tree"],
235
+ severity_default="medium",
236
+ ),
237
+ ValidatorDefinition(
238
+ name="CascadeIntegrity",
239
+ display_name="Cascade Integrity",
240
+ category=ValidatorCategory.REFERENTIAL,
241
+ description="Validates cascade delete/update integrity across related tables.",
242
+ parameters=[
243
+ ParameterDefinition(
244
+ name="column",
245
+ label="Foreign Key Column",
246
+ type=ParameterType.COLUMN,
247
+ required=True,
248
+ description="Column with cascade relationship",
249
+ ),
250
+ ParameterDefinition(
251
+ name="reference_source",
252
+ label="Reference Data Source",
253
+ type=ParameterType.SOURCE_REF,
254
+ required=True,
255
+ description="Parent data source",
256
+ ),
257
+ ParameterDefinition(
258
+ name="reference_column",
259
+ label="Reference Column",
260
+ type=ParameterType.STRING,
261
+ required=True,
262
+ description="Primary key in parent",
263
+ ),
264
+ ParameterDefinition(
265
+ name="action",
266
+ label="Cascade Action",
267
+ type=ParameterType.SELECT,
268
+ options=[
269
+ {"value": "cascade", "label": "Cascade"},
270
+ {"value": "set_null", "label": "Set Null"},
271
+ {"value": "restrict", "label": "Restrict"},
272
+ {"value": "no_action", "label": "No Action"},
273
+ ],
274
+ default="cascade",
275
+ description="Expected cascade behavior",
276
+ ),
277
+ ],
278
+ tags=["referential", "cascade", "delete", "update"],
279
+ severity_default="high",
280
+ ),
281
+ ValidatorDefinition(
282
+ name="MultiTableOrphan",
283
+ display_name="Multi-Table Orphan Check",
284
+ category=ValidatorCategory.REFERENTIAL,
285
+ description="Detects orphan records across multiple related tables.",
286
+ parameters=[
287
+ ParameterDefinition(
288
+ name="column",
289
+ label="Foreign Key Column",
290
+ type=ParameterType.COLUMN,
291
+ required=True,
292
+ description="Column containing foreign key",
293
+ ),
294
+ ParameterDefinition(
295
+ name="reference_sources",
296
+ label="Reference Data Sources",
297
+ type=ParameterType.STRING_LIST,
298
+ required=True,
299
+ description="List of parent data source IDs",
300
+ ),
301
+ ParameterDefinition(
302
+ name="reference_columns",
303
+ label="Reference Columns",
304
+ type=ParameterType.STRING_LIST,
305
+ required=True,
306
+ description="Corresponding primary key columns",
307
+ ),
308
+ ],
309
+ tags=["referential", "orphan", "multi_table", "integrity"],
310
+ severity_default="high",
311
+ ),
312
+ ]