truthound-dashboard 1.4.4__py3-none-any.whl → 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. truthound_dashboard/api/alerts.py +75 -86
  2. truthound_dashboard/api/anomaly.py +7 -13
  3. truthound_dashboard/api/cross_alerts.py +38 -52
  4. truthound_dashboard/api/drift.py +49 -59
  5. truthound_dashboard/api/drift_monitor.py +234 -79
  6. truthound_dashboard/api/enterprise_sampling.py +498 -0
  7. truthound_dashboard/api/history.py +57 -5
  8. truthound_dashboard/api/lineage.py +3 -48
  9. truthound_dashboard/api/maintenance.py +104 -49
  10. truthound_dashboard/api/mask.py +1 -2
  11. truthound_dashboard/api/middleware.py +2 -1
  12. truthound_dashboard/api/model_monitoring.py +435 -311
  13. truthound_dashboard/api/notifications.py +227 -191
  14. truthound_dashboard/api/notifications_advanced.py +21 -20
  15. truthound_dashboard/api/observability.py +586 -0
  16. truthound_dashboard/api/plugins.py +2 -433
  17. truthound_dashboard/api/profile.py +199 -37
  18. truthound_dashboard/api/quality_reporter.py +701 -0
  19. truthound_dashboard/api/reports.py +7 -16
  20. truthound_dashboard/api/router.py +66 -0
  21. truthound_dashboard/api/rule_suggestions.py +5 -5
  22. truthound_dashboard/api/scan.py +17 -19
  23. truthound_dashboard/api/schedules.py +85 -50
  24. truthound_dashboard/api/schema_evolution.py +6 -6
  25. truthound_dashboard/api/schema_watcher.py +667 -0
  26. truthound_dashboard/api/sources.py +98 -27
  27. truthound_dashboard/api/tiering.py +1323 -0
  28. truthound_dashboard/api/triggers.py +14 -11
  29. truthound_dashboard/api/validations.py +12 -11
  30. truthound_dashboard/api/versioning.py +1 -6
  31. truthound_dashboard/core/__init__.py +129 -3
  32. truthound_dashboard/core/actions/__init__.py +62 -0
  33. truthound_dashboard/core/actions/custom.py +426 -0
  34. truthound_dashboard/core/actions/notifications.py +910 -0
  35. truthound_dashboard/core/actions/storage.py +472 -0
  36. truthound_dashboard/core/actions/webhook.py +281 -0
  37. truthound_dashboard/core/anomaly.py +262 -67
  38. truthound_dashboard/core/anomaly_explainer.py +4 -3
  39. truthound_dashboard/core/backends/__init__.py +67 -0
  40. truthound_dashboard/core/backends/base.py +299 -0
  41. truthound_dashboard/core/backends/errors.py +191 -0
  42. truthound_dashboard/core/backends/factory.py +423 -0
  43. truthound_dashboard/core/backends/mock_backend.py +451 -0
  44. truthound_dashboard/core/backends/truthound_backend.py +718 -0
  45. truthound_dashboard/core/checkpoint/__init__.py +87 -0
  46. truthound_dashboard/core/checkpoint/adapters.py +814 -0
  47. truthound_dashboard/core/checkpoint/checkpoint.py +491 -0
  48. truthound_dashboard/core/checkpoint/runner.py +270 -0
  49. truthound_dashboard/core/connections.py +645 -23
  50. truthound_dashboard/core/converters/__init__.py +14 -0
  51. truthound_dashboard/core/converters/truthound.py +620 -0
  52. truthound_dashboard/core/cross_alerts.py +540 -320
  53. truthound_dashboard/core/datasource_factory.py +1672 -0
  54. truthound_dashboard/core/drift_monitor.py +216 -20
  55. truthound_dashboard/core/enterprise_sampling.py +1291 -0
  56. truthound_dashboard/core/interfaces/__init__.py +225 -0
  57. truthound_dashboard/core/interfaces/actions.py +652 -0
  58. truthound_dashboard/core/interfaces/base.py +247 -0
  59. truthound_dashboard/core/interfaces/checkpoint.py +676 -0
  60. truthound_dashboard/core/interfaces/protocols.py +664 -0
  61. truthound_dashboard/core/interfaces/reporters.py +650 -0
  62. truthound_dashboard/core/interfaces/routing.py +646 -0
  63. truthound_dashboard/core/interfaces/triggers.py +619 -0
  64. truthound_dashboard/core/lineage.py +407 -71
  65. truthound_dashboard/core/model_monitoring.py +431 -3
  66. truthound_dashboard/core/notifications/base.py +4 -0
  67. truthound_dashboard/core/notifications/channels.py +501 -1203
  68. truthound_dashboard/core/notifications/deduplication/__init__.py +81 -115
  69. truthound_dashboard/core/notifications/deduplication/service.py +131 -348
  70. truthound_dashboard/core/notifications/dispatcher.py +202 -11
  71. truthound_dashboard/core/notifications/escalation/__init__.py +119 -106
  72. truthound_dashboard/core/notifications/escalation/engine.py +168 -358
  73. truthound_dashboard/core/notifications/routing/__init__.py +88 -128
  74. truthound_dashboard/core/notifications/routing/engine.py +90 -317
  75. truthound_dashboard/core/notifications/stats_aggregator.py +246 -1
  76. truthound_dashboard/core/notifications/throttling/__init__.py +67 -50
  77. truthound_dashboard/core/notifications/throttling/builder.py +117 -255
  78. truthound_dashboard/core/notifications/truthound_adapter.py +842 -0
  79. truthound_dashboard/core/phase5/collaboration.py +1 -1
  80. truthound_dashboard/core/plugins/lifecycle/__init__.py +0 -13
  81. truthound_dashboard/core/quality_reporter.py +1359 -0
  82. truthound_dashboard/core/report_history.py +0 -6
  83. truthound_dashboard/core/reporters/__init__.py +175 -14
  84. truthound_dashboard/core/reporters/adapters.py +943 -0
  85. truthound_dashboard/core/reporters/base.py +0 -3
  86. truthound_dashboard/core/reporters/builtin/__init__.py +18 -0
  87. truthound_dashboard/core/reporters/builtin/csv_reporter.py +111 -0
  88. truthound_dashboard/core/reporters/builtin/html_reporter.py +270 -0
  89. truthound_dashboard/core/reporters/builtin/json_reporter.py +127 -0
  90. truthound_dashboard/core/reporters/compat.py +266 -0
  91. truthound_dashboard/core/reporters/csv_reporter.py +2 -35
  92. truthound_dashboard/core/reporters/factory.py +526 -0
  93. truthound_dashboard/core/reporters/interfaces.py +745 -0
  94. truthound_dashboard/core/reporters/registry.py +1 -10
  95. truthound_dashboard/core/scheduler.py +165 -0
  96. truthound_dashboard/core/schema_evolution.py +3 -3
  97. truthound_dashboard/core/schema_watcher.py +1528 -0
  98. truthound_dashboard/core/services.py +595 -76
  99. truthound_dashboard/core/store_manager.py +810 -0
  100. truthound_dashboard/core/streaming_anomaly.py +169 -4
  101. truthound_dashboard/core/tiering.py +1309 -0
  102. truthound_dashboard/core/triggers/evaluators.py +178 -8
  103. truthound_dashboard/core/truthound_adapter.py +2620 -197
  104. truthound_dashboard/core/unified_alerts.py +23 -20
  105. truthound_dashboard/db/__init__.py +8 -0
  106. truthound_dashboard/db/database.py +8 -2
  107. truthound_dashboard/db/models.py +944 -25
  108. truthound_dashboard/db/repository.py +2 -0
  109. truthound_dashboard/main.py +15 -0
  110. truthound_dashboard/schemas/__init__.py +177 -16
  111. truthound_dashboard/schemas/base.py +44 -23
  112. truthound_dashboard/schemas/collaboration.py +19 -6
  113. truthound_dashboard/schemas/cross_alerts.py +19 -3
  114. truthound_dashboard/schemas/drift.py +61 -55
  115. truthound_dashboard/schemas/drift_monitor.py +67 -23
  116. truthound_dashboard/schemas/enterprise_sampling.py +653 -0
  117. truthound_dashboard/schemas/lineage.py +0 -33
  118. truthound_dashboard/schemas/mask.py +10 -8
  119. truthound_dashboard/schemas/model_monitoring.py +89 -10
  120. truthound_dashboard/schemas/notifications_advanced.py +13 -0
  121. truthound_dashboard/schemas/observability.py +453 -0
  122. truthound_dashboard/schemas/plugins.py +0 -280
  123. truthound_dashboard/schemas/profile.py +154 -247
  124. truthound_dashboard/schemas/quality_reporter.py +403 -0
  125. truthound_dashboard/schemas/reports.py +2 -2
  126. truthound_dashboard/schemas/rule_suggestion.py +8 -1
  127. truthound_dashboard/schemas/scan.py +4 -24
  128. truthound_dashboard/schemas/schedule.py +11 -3
  129. truthound_dashboard/schemas/schema_watcher.py +727 -0
  130. truthound_dashboard/schemas/source.py +17 -2
  131. truthound_dashboard/schemas/tiering.py +822 -0
  132. truthound_dashboard/schemas/triggers.py +16 -0
  133. truthound_dashboard/schemas/unified_alerts.py +7 -0
  134. truthound_dashboard/schemas/validation.py +0 -13
  135. truthound_dashboard/schemas/validators/base.py +41 -21
  136. truthound_dashboard/schemas/validators/business_rule_validators.py +244 -0
  137. truthound_dashboard/schemas/validators/localization_validators.py +273 -0
  138. truthound_dashboard/schemas/validators/ml_feature_validators.py +308 -0
  139. truthound_dashboard/schemas/validators/profiling_validators.py +275 -0
  140. truthound_dashboard/schemas/validators/referential_validators.py +312 -0
  141. truthound_dashboard/schemas/validators/registry.py +93 -8
  142. truthound_dashboard/schemas/validators/timeseries_validators.py +389 -0
  143. truthound_dashboard/schemas/versioning.py +1 -6
  144. truthound_dashboard/static/index.html +2 -2
  145. truthound_dashboard-1.5.1.dist-info/METADATA +312 -0
  146. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/RECORD +149 -148
  147. truthound_dashboard/core/plugins/hooks/__init__.py +0 -63
  148. truthound_dashboard/core/plugins/hooks/decorators.py +0 -367
  149. truthound_dashboard/core/plugins/hooks/manager.py +0 -403
  150. truthound_dashboard/core/plugins/hooks/protocols.py +0 -265
  151. truthound_dashboard/core/plugins/lifecycle/hot_reload.py +0 -584
  152. truthound_dashboard/core/reporters/junit_reporter.py +0 -233
  153. truthound_dashboard/core/reporters/markdown_reporter.py +0 -207
  154. truthound_dashboard/core/reporters/pdf_reporter.py +0 -209
  155. truthound_dashboard/static/assets/_baseUniq-BcrSP13d.js +0 -1
  156. truthound_dashboard/static/assets/arc-DlYjKwIL.js +0 -1
  157. truthound_dashboard/static/assets/architectureDiagram-VXUJARFQ-Bb2drbQM.js +0 -36
  158. truthound_dashboard/static/assets/blockDiagram-VD42YOAC-BlsPG1CH.js +0 -122
  159. truthound_dashboard/static/assets/c4Diagram-YG6GDRKO-B9JdUoaC.js +0 -10
  160. truthound_dashboard/static/assets/channel-Q6mHF1Hd.js +0 -1
  161. truthound_dashboard/static/assets/chunk-4BX2VUAB-DmyoPVuJ.js +0 -1
  162. truthound_dashboard/static/assets/chunk-55IACEB6-Bcz6Siv8.js +0 -1
  163. truthound_dashboard/static/assets/chunk-B4BG7PRW-Br3G5Rum.js +0 -165
  164. truthound_dashboard/static/assets/chunk-DI55MBZ5-DuM9c23u.js +0 -220
  165. truthound_dashboard/static/assets/chunk-FMBD7UC4-DNU-5mvT.js +0 -15
  166. truthound_dashboard/static/assets/chunk-QN33PNHL-Im2yNcmS.js +0 -1
  167. truthound_dashboard/static/assets/chunk-QZHKN3VN-kZr8XFm1.js +0 -1
  168. truthound_dashboard/static/assets/chunk-TZMSLE5B-Q__360q_.js +0 -1
  169. truthound_dashboard/static/assets/classDiagram-2ON5EDUG-vtixxUyK.js +0 -1
  170. truthound_dashboard/static/assets/classDiagram-v2-WZHVMYZB-vtixxUyK.js +0 -1
  171. truthound_dashboard/static/assets/clone-BOt2LwD0.js +0 -1
  172. truthound_dashboard/static/assets/cose-bilkent-S5V4N54A-CBDw6iac.js +0 -1
  173. truthound_dashboard/static/assets/dagre-6UL2VRFP-XdKqmmY9.js +0 -4
  174. truthound_dashboard/static/assets/diagram-PSM6KHXK-DAZ8nx9V.js +0 -24
  175. truthound_dashboard/static/assets/diagram-QEK2KX5R-BRvDTbGD.js +0 -43
  176. truthound_dashboard/static/assets/diagram-S2PKOQOG-bQcczUkl.js +0 -24
  177. truthound_dashboard/static/assets/erDiagram-Q2GNP2WA-DPje7VMN.js +0 -60
  178. truthound_dashboard/static/assets/flowDiagram-NV44I4VS-B7BVtFVS.js +0 -162
  179. truthound_dashboard/static/assets/ganttDiagram-JELNMOA3-D6WKSS7U.js +0 -267
  180. truthound_dashboard/static/assets/gitGraphDiagram-NY62KEGX-D3vtVd3y.js +0 -65
  181. truthound_dashboard/static/assets/graph-BKgNKZVp.js +0 -1
  182. truthound_dashboard/static/assets/index-C6JSrkHo.css +0 -1
  183. truthound_dashboard/static/assets/index-DkU82VsU.js +0 -1800
  184. truthound_dashboard/static/assets/infoDiagram-WHAUD3N6-DnNCT429.js +0 -2
  185. truthound_dashboard/static/assets/journeyDiagram-XKPGCS4Q-DGiMozqS.js +0 -139
  186. truthound_dashboard/static/assets/kanban-definition-3W4ZIXB7-BV2gUgli.js +0 -89
  187. truthound_dashboard/static/assets/katex-Cu_Erd72.js +0 -261
  188. truthound_dashboard/static/assets/layout-DI2MfQ5G.js +0 -1
  189. truthound_dashboard/static/assets/min-DYdgXVcT.js +0 -1
  190. truthound_dashboard/static/assets/mindmap-definition-VGOIOE7T-C7x4ruxz.js +0 -68
  191. truthound_dashboard/static/assets/pieDiagram-ADFJNKIX-CAJaAB9f.js +0 -30
  192. truthound_dashboard/static/assets/quadrantDiagram-AYHSOK5B-DeqwDI46.js +0 -7
  193. truthound_dashboard/static/assets/requirementDiagram-UZGBJVZJ-e3XDpZIM.js +0 -64
  194. truthound_dashboard/static/assets/sankeyDiagram-TZEHDZUN-CNnAv5Ux.js +0 -10
  195. truthound_dashboard/static/assets/sequenceDiagram-WL72ISMW-Dsne-Of3.js +0 -145
  196. truthound_dashboard/static/assets/stateDiagram-FKZM4ZOC-Ee0sQXyb.js +0 -1
  197. truthound_dashboard/static/assets/stateDiagram-v2-4FDKWEC3-B26KqW_W.js +0 -1
  198. truthound_dashboard/static/assets/timeline-definition-IT6M3QCI-DZYi2yl3.js +0 -61
  199. truthound_dashboard/static/assets/treemap-KMMF4GRG-CY3f8In2.js +0 -128
  200. truthound_dashboard/static/assets/unmerged_dictionaries-Dd7xcPWG.js +0 -1
  201. truthound_dashboard/static/assets/xychartDiagram-PRI3JC2R-CS7fydZZ.js +0 -7
  202. truthound_dashboard-1.4.4.dist-info/METADATA +0 -507
  203. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/WHEEL +0 -0
  204. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/entry_points.txt +0 -0
  205. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,273 @@
1
+ """Localization validators.
2
+
3
+ Validators for regional identifier formats including Korean, Japanese,
4
+ and Chinese specific formats.
5
+
6
+ Import path in truthound: `from truthound.validators.localization import *`
7
+ """
8
+
9
+ from .base import (
10
+ ParameterDefinition,
11
+ ParameterType,
12
+ ValidatorCategory,
13
+ ValidatorDefinition,
14
+ )
15
+
16
+ LOCALIZATION_VALIDATORS: list[ValidatorDefinition] = [
17
+ # Korean validators
18
+ ValidatorDefinition(
19
+ name="KoreanBusinessNumber",
20
+ display_name="Korean Business Registration Number",
21
+ category=ValidatorCategory.LOCALIZATION,
22
+ description="Validates Korean business registration numbers (사업자등록번호, 10 digits).",
23
+ parameters=[
24
+ ParameterDefinition(
25
+ name="column",
26
+ label="Column",
27
+ type=ParameterType.COLUMN,
28
+ required=True,
29
+ description="Column containing business registration numbers",
30
+ ),
31
+ ParameterDefinition(
32
+ name="allow_hyphen",
33
+ label="Allow Hyphens",
34
+ type=ParameterType.BOOLEAN,
35
+ default=True,
36
+ description="Accept numbers with hyphens (123-45-67890)",
37
+ ),
38
+ ],
39
+ tags=["localization", "korean", "business", "registration"],
40
+ severity_default="high",
41
+ ),
42
+ ValidatorDefinition(
43
+ name="KoreanRRN",
44
+ display_name="Korean Resident Registration Number",
45
+ category=ValidatorCategory.LOCALIZATION,
46
+ description="Validates Korean Resident Registration Numbers (주민등록번호, 13 digits).",
47
+ parameters=[
48
+ ParameterDefinition(
49
+ name="column",
50
+ label="Column",
51
+ type=ParameterType.COLUMN,
52
+ required=True,
53
+ description="Column containing RRN values",
54
+ ),
55
+ ParameterDefinition(
56
+ name="masked",
57
+ label="Masked Format",
58
+ type=ParameterType.BOOLEAN,
59
+ default=True,
60
+ description="Accept partially masked numbers (123456-1******)",
61
+ ),
62
+ ],
63
+ tags=["localization", "korean", "rrn", "pii", "identity"],
64
+ severity_default="critical",
65
+ ),
66
+ ValidatorDefinition(
67
+ name="KoreanPhone",
68
+ display_name="Korean Phone Number",
69
+ category=ValidatorCategory.LOCALIZATION,
70
+ description="Validates Korean phone number formats (mobile and landline).",
71
+ parameters=[
72
+ ParameterDefinition(
73
+ name="column",
74
+ label="Column",
75
+ type=ParameterType.COLUMN,
76
+ required=True,
77
+ description="Column containing phone numbers",
78
+ ),
79
+ ParameterDefinition(
80
+ name="type",
81
+ label="Phone Type",
82
+ type=ParameterType.SELECT,
83
+ options=[
84
+ {"value": "any", "label": "Any (Mobile or Landline)"},
85
+ {"value": "mobile", "label": "Mobile (010/011/016/017/018/019)"},
86
+ {"value": "landline", "label": "Landline (02/031-064)"},
87
+ ],
88
+ default="any",
89
+ description="Accepted phone number types",
90
+ ),
91
+ ],
92
+ tags=["localization", "korean", "phone", "contact"],
93
+ severity_default="medium",
94
+ ),
95
+ ValidatorDefinition(
96
+ name="KoreanBankAccount",
97
+ display_name="Korean Bank Account Number",
98
+ category=ValidatorCategory.LOCALIZATION,
99
+ description="Validates Korean bank account number formats by bank.",
100
+ parameters=[
101
+ ParameterDefinition(
102
+ name="column",
103
+ label="Column",
104
+ type=ParameterType.COLUMN,
105
+ required=True,
106
+ description="Column containing bank account numbers",
107
+ ),
108
+ ParameterDefinition(
109
+ name="bank",
110
+ label="Bank",
111
+ type=ParameterType.SELECT,
112
+ options=[
113
+ {"value": "any", "label": "Any Bank"},
114
+ {"value": "kb", "label": "KB Kookmin Bank"},
115
+ {"value": "shinhan", "label": "Shinhan Bank"},
116
+ {"value": "woori", "label": "Woori Bank"},
117
+ {"value": "hana", "label": "Hana Bank"},
118
+ {"value": "nh", "label": "NH Bank"},
119
+ {"value": "ibk", "label": "IBK"},
120
+ {"value": "kakao", "label": "Kakao Bank"},
121
+ {"value": "toss", "label": "Toss Bank"},
122
+ ],
123
+ default="any",
124
+ description="Bank for format validation",
125
+ ),
126
+ ],
127
+ tags=["localization", "korean", "bank", "financial"],
128
+ severity_default="high",
129
+ ),
130
+ # Japanese validators
131
+ ValidatorDefinition(
132
+ name="JapanesePostalCode",
133
+ display_name="Japanese Postal Code",
134
+ category=ValidatorCategory.LOCALIZATION,
135
+ description="Validates Japanese postal codes (〒xxx-xxxx format).",
136
+ parameters=[
137
+ ParameterDefinition(
138
+ name="column",
139
+ label="Column",
140
+ type=ParameterType.COLUMN,
141
+ required=True,
142
+ description="Column containing postal codes",
143
+ ),
144
+ ParameterDefinition(
145
+ name="allow_symbol",
146
+ label="Allow 〒 Symbol",
147
+ type=ParameterType.BOOLEAN,
148
+ default=True,
149
+ description="Accept codes with 〒 prefix",
150
+ ),
151
+ ],
152
+ tags=["localization", "japanese", "postal", "address"],
153
+ severity_default="medium",
154
+ ),
155
+ ValidatorDefinition(
156
+ name="JapaneseMyNumber",
157
+ display_name="Japanese My Number",
158
+ category=ValidatorCategory.LOCALIZATION,
159
+ description="Validates Japanese Individual Number (マイナンバー, 12 digits).",
160
+ parameters=[
161
+ ParameterDefinition(
162
+ name="column",
163
+ label="Column",
164
+ type=ParameterType.COLUMN,
165
+ required=True,
166
+ description="Column containing My Number values",
167
+ ),
168
+ ],
169
+ tags=["localization", "japanese", "mynumber", "pii", "identity"],
170
+ severity_default="critical",
171
+ ),
172
+ ValidatorDefinition(
173
+ name="JapanesePhone",
174
+ display_name="Japanese Phone Number",
175
+ category=ValidatorCategory.LOCALIZATION,
176
+ description="Validates Japanese phone number formats.",
177
+ parameters=[
178
+ ParameterDefinition(
179
+ name="column",
180
+ label="Column",
181
+ type=ParameterType.COLUMN,
182
+ required=True,
183
+ description="Column containing phone numbers",
184
+ ),
185
+ ParameterDefinition(
186
+ name="type",
187
+ label="Phone Type",
188
+ type=ParameterType.SELECT,
189
+ options=[
190
+ {"value": "any", "label": "Any"},
191
+ {"value": "mobile", "label": "Mobile (070/080/090)"},
192
+ {"value": "landline", "label": "Landline"},
193
+ {"value": "toll_free", "label": "Toll Free (0120/0800)"},
194
+ ],
195
+ default="any",
196
+ description="Accepted phone number types",
197
+ ),
198
+ ],
199
+ tags=["localization", "japanese", "phone", "contact"],
200
+ severity_default="medium",
201
+ ),
202
+ # Chinese validators
203
+ ValidatorDefinition(
204
+ name="ChineseID",
205
+ display_name="Chinese ID Number",
206
+ category=ValidatorCategory.LOCALIZATION,
207
+ description="Validates Chinese Resident Identity Card numbers (18 digits).",
208
+ parameters=[
209
+ ParameterDefinition(
210
+ name="column",
211
+ label="Column",
212
+ type=ParameterType.COLUMN,
213
+ required=True,
214
+ description="Column containing ID numbers",
215
+ ),
216
+ ParameterDefinition(
217
+ name="validate_region",
218
+ label="Validate Region Code",
219
+ type=ParameterType.BOOLEAN,
220
+ default=True,
221
+ description="Validate the 6-digit region code",
222
+ ),
223
+ ],
224
+ tags=["localization", "chinese", "id", "pii", "identity"],
225
+ severity_default="critical",
226
+ ),
227
+ ValidatorDefinition(
228
+ name="ChineseUSCC",
229
+ display_name="Chinese Unified Social Credit Code",
230
+ category=ValidatorCategory.LOCALIZATION,
231
+ description="Validates Chinese USCC (统一社会信用代码, 18 characters).",
232
+ parameters=[
233
+ ParameterDefinition(
234
+ name="column",
235
+ label="Column",
236
+ type=ParameterType.COLUMN,
237
+ required=True,
238
+ description="Column containing USCC values",
239
+ ),
240
+ ],
241
+ tags=["localization", "chinese", "uscc", "business", "registration"],
242
+ severity_default="high",
243
+ ),
244
+ ValidatorDefinition(
245
+ name="ChinesePhone",
246
+ display_name="Chinese Phone Number",
247
+ category=ValidatorCategory.LOCALIZATION,
248
+ description="Validates Chinese phone number formats.",
249
+ parameters=[
250
+ ParameterDefinition(
251
+ name="column",
252
+ label="Column",
253
+ type=ParameterType.COLUMN,
254
+ required=True,
255
+ description="Column containing phone numbers",
256
+ ),
257
+ ParameterDefinition(
258
+ name="type",
259
+ label="Phone Type",
260
+ type=ParameterType.SELECT,
261
+ options=[
262
+ {"value": "any", "label": "Any"},
263
+ {"value": "mobile", "label": "Mobile (1xx)"},
264
+ {"value": "landline", "label": "Landline"},
265
+ ],
266
+ default="any",
267
+ description="Accepted phone number types",
268
+ ),
269
+ ],
270
+ tags=["localization", "chinese", "phone", "contact"],
271
+ severity_default="medium",
272
+ ),
273
+ ]
@@ -0,0 +1,308 @@
1
+ """ML Feature validators.
2
+
3
+ Validators for machine learning feature quality including null impact,
4
+ scale validation, correlation analysis, and target leakage detection.
5
+
6
+ Import path in truthound: `from truthound.validators.ml_feature import *`
7
+ """
8
+
9
+ from .base import (
10
+ ParameterDefinition,
11
+ ParameterType,
12
+ ValidatorCategory,
13
+ ValidatorDefinition,
14
+ )
15
+
16
+ ML_FEATURE_VALIDATORS: list[ValidatorDefinition] = [
17
+ ValidatorDefinition(
18
+ name="FeatureNullImpact",
19
+ display_name="Feature Null Impact Analysis",
20
+ category=ValidatorCategory.ML_FEATURE,
21
+ description="Analyzes and validates the impact of null values on ML features.",
22
+ parameters=[
23
+ ParameterDefinition(
24
+ name="column",
25
+ label="Feature Column",
26
+ type=ParameterType.COLUMN,
27
+ required=True,
28
+ description="Feature column to analyze",
29
+ ),
30
+ ParameterDefinition(
31
+ name="target_column",
32
+ label="Target Column",
33
+ type=ParameterType.COLUMN,
34
+ description="Target column for impact analysis",
35
+ ),
36
+ ParameterDefinition(
37
+ name="max_null_ratio",
38
+ label="Maximum Null Ratio",
39
+ type=ParameterType.FLOAT,
40
+ default=0.1,
41
+ min_value=0,
42
+ max_value=1,
43
+ description="Maximum acceptable ratio of null values",
44
+ ),
45
+ ParameterDefinition(
46
+ name="max_impact_score",
47
+ label="Maximum Impact Score",
48
+ type=ParameterType.FLOAT,
49
+ min_value=0,
50
+ max_value=1,
51
+ description="Maximum acceptable impact score on target",
52
+ ),
53
+ ],
54
+ tags=["ml_feature", "null", "impact", "missing"],
55
+ severity_default="medium",
56
+ ),
57
+ ValidatorDefinition(
58
+ name="FeatureScale",
59
+ display_name="Feature Scale Validation",
60
+ category=ValidatorCategory.ML_FEATURE,
61
+ description="Validates that features are properly scaled for ML algorithms.",
62
+ parameters=[
63
+ ParameterDefinition(
64
+ name="column",
65
+ label="Feature Column",
66
+ type=ParameterType.COLUMN,
67
+ required=True,
68
+ description="Feature column to validate",
69
+ ),
70
+ ParameterDefinition(
71
+ name="expected_scale",
72
+ label="Expected Scale Type",
73
+ type=ParameterType.SELECT,
74
+ options=[
75
+ {"value": "standard", "label": "Standard (mean=0, std=1)"},
76
+ {"value": "minmax", "label": "Min-Max (0 to 1)"},
77
+ {"value": "robust", "label": "Robust (median-centered)"},
78
+ {"value": "unit_norm", "label": "Unit Norm (L2=1)"},
79
+ ],
80
+ required=True,
81
+ description="Expected feature scale type",
82
+ ),
83
+ ParameterDefinition(
84
+ name="tolerance",
85
+ label="Tolerance",
86
+ type=ParameterType.FLOAT,
87
+ default=0.1,
88
+ min_value=0,
89
+ max_value=1,
90
+ description="Acceptable deviation from expected scale",
91
+ ),
92
+ ],
93
+ tags=["ml_feature", "scale", "normalization", "preprocessing"],
94
+ severity_default="medium",
95
+ ),
96
+ ValidatorDefinition(
97
+ name="FeatureCorrelationMatrix",
98
+ display_name="Feature Correlation Matrix",
99
+ category=ValidatorCategory.ML_FEATURE,
100
+ description="Validates correlation between features for multicollinearity detection.",
101
+ parameters=[
102
+ ParameterDefinition(
103
+ name="columns",
104
+ label="Feature Columns",
105
+ type=ParameterType.COLUMN_LIST,
106
+ required=True,
107
+ description="Feature columns to analyze correlation",
108
+ ),
109
+ ParameterDefinition(
110
+ name="max_correlation",
111
+ label="Maximum Correlation",
112
+ type=ParameterType.FLOAT,
113
+ default=0.9,
114
+ min_value=0,
115
+ max_value=1,
116
+ description="Maximum acceptable absolute correlation between features",
117
+ ),
118
+ ParameterDefinition(
119
+ name="method",
120
+ label="Correlation Method",
121
+ type=ParameterType.SELECT,
122
+ options=[
123
+ {"value": "pearson", "label": "Pearson"},
124
+ {"value": "spearman", "label": "Spearman"},
125
+ {"value": "kendall", "label": "Kendall"},
126
+ ],
127
+ default="pearson",
128
+ description="Correlation calculation method",
129
+ ),
130
+ ],
131
+ tags=["ml_feature", "correlation", "multicollinearity", "redundancy"],
132
+ severity_default="medium",
133
+ ),
134
+ ValidatorDefinition(
135
+ name="TargetLeakage",
136
+ display_name="Target Leakage Detection",
137
+ category=ValidatorCategory.ML_FEATURE,
138
+ description="Detects potential target leakage in features.",
139
+ parameters=[
140
+ ParameterDefinition(
141
+ name="feature_columns",
142
+ label="Feature Columns",
143
+ type=ParameterType.COLUMN_LIST,
144
+ required=True,
145
+ description="Feature columns to check for leakage",
146
+ ),
147
+ ParameterDefinition(
148
+ name="target_column",
149
+ label="Target Column",
150
+ type=ParameterType.COLUMN,
151
+ required=True,
152
+ description="Target column",
153
+ ),
154
+ ParameterDefinition(
155
+ name="leakage_threshold",
156
+ label="Leakage Threshold",
157
+ type=ParameterType.FLOAT,
158
+ default=0.95,
159
+ min_value=0.5,
160
+ max_value=1,
161
+ description="Correlation threshold indicating potential leakage",
162
+ ),
163
+ ],
164
+ tags=["ml_feature", "leakage", "target", "data_leakage"],
165
+ severity_default="critical",
166
+ ),
167
+ ValidatorDefinition(
168
+ name="FeatureImportance",
169
+ display_name="Feature Importance Threshold",
170
+ category=ValidatorCategory.ML_FEATURE,
171
+ description="Validates minimum feature importance for ML model utility.",
172
+ parameters=[
173
+ ParameterDefinition(
174
+ name="column",
175
+ label="Feature Column",
176
+ type=ParameterType.COLUMN,
177
+ required=True,
178
+ description="Feature column to validate",
179
+ ),
180
+ ParameterDefinition(
181
+ name="target_column",
182
+ label="Target Column",
183
+ type=ParameterType.COLUMN,
184
+ required=True,
185
+ description="Target column for importance calculation",
186
+ ),
187
+ ParameterDefinition(
188
+ name="min_importance",
189
+ label="Minimum Importance",
190
+ type=ParameterType.FLOAT,
191
+ default=0.01,
192
+ min_value=0,
193
+ max_value=1,
194
+ description="Minimum required feature importance score",
195
+ ),
196
+ ParameterDefinition(
197
+ name="method",
198
+ label="Importance Method",
199
+ type=ParameterType.SELECT,
200
+ options=[
201
+ {"value": "mutual_info", "label": "Mutual Information"},
202
+ {"value": "correlation", "label": "Correlation"},
203
+ {"value": "f_score", "label": "F-Score (ANOVA)"},
204
+ ],
205
+ default="mutual_info",
206
+ description="Method for calculating feature importance",
207
+ ),
208
+ ],
209
+ tags=["ml_feature", "importance", "relevance", "selection"],
210
+ severity_default="low",
211
+ ),
212
+ ValidatorDefinition(
213
+ name="FeatureVariance",
214
+ display_name="Feature Variance Threshold",
215
+ category=ValidatorCategory.ML_FEATURE,
216
+ description="Validates minimum feature variance (removes near-constant features).",
217
+ parameters=[
218
+ ParameterDefinition(
219
+ name="column",
220
+ label="Feature Column",
221
+ type=ParameterType.COLUMN,
222
+ required=True,
223
+ description="Feature column to validate",
224
+ ),
225
+ ParameterDefinition(
226
+ name="min_variance",
227
+ label="Minimum Variance",
228
+ type=ParameterType.FLOAT,
229
+ default=0.01,
230
+ min_value=0,
231
+ description="Minimum required variance",
232
+ ),
233
+ ],
234
+ tags=["ml_feature", "variance", "constant", "low_variance"],
235
+ severity_default="medium",
236
+ ),
237
+ ValidatorDefinition(
238
+ name="FeatureOutlierRatio",
239
+ display_name="Feature Outlier Ratio",
240
+ category=ValidatorCategory.ML_FEATURE,
241
+ description="Validates that outlier ratio in features is within acceptable bounds.",
242
+ parameters=[
243
+ ParameterDefinition(
244
+ name="column",
245
+ label="Feature Column",
246
+ type=ParameterType.COLUMN,
247
+ required=True,
248
+ description="Feature column to check",
249
+ ),
250
+ ParameterDefinition(
251
+ name="max_outlier_ratio",
252
+ label="Maximum Outlier Ratio",
253
+ type=ParameterType.FLOAT,
254
+ default=0.05,
255
+ min_value=0,
256
+ max_value=1,
257
+ description="Maximum acceptable ratio of outliers",
258
+ ),
259
+ ParameterDefinition(
260
+ name="method",
261
+ label="Outlier Detection Method",
262
+ type=ParameterType.SELECT,
263
+ options=[
264
+ {"value": "iqr", "label": "IQR (1.5x)"},
265
+ {"value": "zscore", "label": "Z-Score (3 sigma)"},
266
+ {"value": "mad", "label": "MAD"},
267
+ ],
268
+ default="iqr",
269
+ description="Method for outlier detection",
270
+ ),
271
+ ],
272
+ tags=["ml_feature", "outlier", "anomaly", "preprocessing"],
273
+ severity_default="medium",
274
+ ),
275
+ ValidatorDefinition(
276
+ name="ClassImbalance",
277
+ display_name="Class Imbalance Check",
278
+ category=ValidatorCategory.ML_FEATURE,
279
+ description="Validates class distribution for classification targets.",
280
+ parameters=[
281
+ ParameterDefinition(
282
+ name="column",
283
+ label="Target Column",
284
+ type=ParameterType.COLUMN,
285
+ required=True,
286
+ description="Classification target column",
287
+ ),
288
+ ParameterDefinition(
289
+ name="max_imbalance_ratio",
290
+ label="Maximum Imbalance Ratio",
291
+ type=ParameterType.FLOAT,
292
+ default=10,
293
+ min_value=1,
294
+ description="Maximum ratio of majority to minority class",
295
+ ),
296
+ ParameterDefinition(
297
+ name="min_samples_per_class",
298
+ label="Minimum Samples per Class",
299
+ type=ParameterType.INTEGER,
300
+ default=10,
301
+ min_value=1,
302
+ description="Minimum required samples for each class",
303
+ ),
304
+ ],
305
+ tags=["ml_feature", "class_imbalance", "target", "classification"],
306
+ severity_default="high",
307
+ ),
308
+ ]