truthound-dashboard 1.4.4__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound_dashboard/api/alerts.py +75 -86
- truthound_dashboard/api/anomaly.py +7 -13
- truthound_dashboard/api/cross_alerts.py +38 -52
- truthound_dashboard/api/drift.py +49 -59
- truthound_dashboard/api/drift_monitor.py +234 -79
- truthound_dashboard/api/enterprise_sampling.py +498 -0
- truthound_dashboard/api/history.py +57 -5
- truthound_dashboard/api/lineage.py +3 -48
- truthound_dashboard/api/maintenance.py +104 -49
- truthound_dashboard/api/mask.py +1 -2
- truthound_dashboard/api/middleware.py +2 -1
- truthound_dashboard/api/model_monitoring.py +435 -311
- truthound_dashboard/api/notifications.py +227 -191
- truthound_dashboard/api/notifications_advanced.py +21 -20
- truthound_dashboard/api/observability.py +586 -0
- truthound_dashboard/api/plugins.py +2 -433
- truthound_dashboard/api/profile.py +199 -37
- truthound_dashboard/api/quality_reporter.py +701 -0
- truthound_dashboard/api/reports.py +7 -16
- truthound_dashboard/api/router.py +66 -0
- truthound_dashboard/api/rule_suggestions.py +5 -5
- truthound_dashboard/api/scan.py +17 -19
- truthound_dashboard/api/schedules.py +85 -50
- truthound_dashboard/api/schema_evolution.py +6 -6
- truthound_dashboard/api/schema_watcher.py +667 -0
- truthound_dashboard/api/sources.py +98 -27
- truthound_dashboard/api/tiering.py +1323 -0
- truthound_dashboard/api/triggers.py +14 -11
- truthound_dashboard/api/validations.py +12 -11
- truthound_dashboard/api/versioning.py +1 -6
- truthound_dashboard/core/__init__.py +129 -3
- truthound_dashboard/core/actions/__init__.py +62 -0
- truthound_dashboard/core/actions/custom.py +426 -0
- truthound_dashboard/core/actions/notifications.py +910 -0
- truthound_dashboard/core/actions/storage.py +472 -0
- truthound_dashboard/core/actions/webhook.py +281 -0
- truthound_dashboard/core/anomaly.py +262 -67
- truthound_dashboard/core/anomaly_explainer.py +4 -3
- truthound_dashboard/core/backends/__init__.py +67 -0
- truthound_dashboard/core/backends/base.py +299 -0
- truthound_dashboard/core/backends/errors.py +191 -0
- truthound_dashboard/core/backends/factory.py +423 -0
- truthound_dashboard/core/backends/mock_backend.py +451 -0
- truthound_dashboard/core/backends/truthound_backend.py +718 -0
- truthound_dashboard/core/checkpoint/__init__.py +87 -0
- truthound_dashboard/core/checkpoint/adapters.py +814 -0
- truthound_dashboard/core/checkpoint/checkpoint.py +491 -0
- truthound_dashboard/core/checkpoint/runner.py +270 -0
- truthound_dashboard/core/connections.py +437 -10
- truthound_dashboard/core/converters/__init__.py +14 -0
- truthound_dashboard/core/converters/truthound.py +620 -0
- truthound_dashboard/core/cross_alerts.py +540 -320
- truthound_dashboard/core/datasource_factory.py +1672 -0
- truthound_dashboard/core/drift_monitor.py +216 -20
- truthound_dashboard/core/enterprise_sampling.py +1291 -0
- truthound_dashboard/core/interfaces/__init__.py +225 -0
- truthound_dashboard/core/interfaces/actions.py +652 -0
- truthound_dashboard/core/interfaces/base.py +247 -0
- truthound_dashboard/core/interfaces/checkpoint.py +676 -0
- truthound_dashboard/core/interfaces/protocols.py +664 -0
- truthound_dashboard/core/interfaces/reporters.py +650 -0
- truthound_dashboard/core/interfaces/routing.py +646 -0
- truthound_dashboard/core/interfaces/triggers.py +619 -0
- truthound_dashboard/core/lineage.py +407 -71
- truthound_dashboard/core/model_monitoring.py +431 -3
- truthound_dashboard/core/notifications/base.py +4 -0
- truthound_dashboard/core/notifications/channels.py +501 -1203
- truthound_dashboard/core/notifications/deduplication/__init__.py +81 -115
- truthound_dashboard/core/notifications/deduplication/service.py +131 -348
- truthound_dashboard/core/notifications/dispatcher.py +202 -11
- truthound_dashboard/core/notifications/escalation/__init__.py +119 -106
- truthound_dashboard/core/notifications/escalation/engine.py +168 -358
- truthound_dashboard/core/notifications/routing/__init__.py +88 -128
- truthound_dashboard/core/notifications/routing/engine.py +90 -317
- truthound_dashboard/core/notifications/stats_aggregator.py +246 -1
- truthound_dashboard/core/notifications/throttling/__init__.py +67 -50
- truthound_dashboard/core/notifications/throttling/builder.py +117 -255
- truthound_dashboard/core/notifications/truthound_adapter.py +842 -0
- truthound_dashboard/core/phase5/collaboration.py +1 -1
- truthound_dashboard/core/plugins/lifecycle/__init__.py +0 -13
- truthound_dashboard/core/quality_reporter.py +1359 -0
- truthound_dashboard/core/report_history.py +0 -6
- truthound_dashboard/core/reporters/__init__.py +175 -14
- truthound_dashboard/core/reporters/adapters.py +943 -0
- truthound_dashboard/core/reporters/base.py +0 -3
- truthound_dashboard/core/reporters/builtin/__init__.py +18 -0
- truthound_dashboard/core/reporters/builtin/csv_reporter.py +111 -0
- truthound_dashboard/core/reporters/builtin/html_reporter.py +270 -0
- truthound_dashboard/core/reporters/builtin/json_reporter.py +127 -0
- truthound_dashboard/core/reporters/compat.py +266 -0
- truthound_dashboard/core/reporters/csv_reporter.py +2 -35
- truthound_dashboard/core/reporters/factory.py +526 -0
- truthound_dashboard/core/reporters/interfaces.py +745 -0
- truthound_dashboard/core/reporters/registry.py +1 -10
- truthound_dashboard/core/scheduler.py +165 -0
- truthound_dashboard/core/schema_evolution.py +3 -3
- truthound_dashboard/core/schema_watcher.py +1528 -0
- truthound_dashboard/core/services.py +595 -76
- truthound_dashboard/core/store_manager.py +810 -0
- truthound_dashboard/core/streaming_anomaly.py +169 -4
- truthound_dashboard/core/tiering.py +1309 -0
- truthound_dashboard/core/triggers/evaluators.py +178 -8
- truthound_dashboard/core/truthound_adapter.py +2620 -197
- truthound_dashboard/core/unified_alerts.py +23 -20
- truthound_dashboard/db/__init__.py +8 -0
- truthound_dashboard/db/database.py +8 -2
- truthound_dashboard/db/models.py +944 -25
- truthound_dashboard/db/repository.py +2 -0
- truthound_dashboard/main.py +11 -0
- truthound_dashboard/schemas/__init__.py +177 -16
- truthound_dashboard/schemas/base.py +44 -23
- truthound_dashboard/schemas/collaboration.py +19 -6
- truthound_dashboard/schemas/cross_alerts.py +19 -3
- truthound_dashboard/schemas/drift.py +61 -55
- truthound_dashboard/schemas/drift_monitor.py +67 -23
- truthound_dashboard/schemas/enterprise_sampling.py +653 -0
- truthound_dashboard/schemas/lineage.py +0 -33
- truthound_dashboard/schemas/mask.py +10 -8
- truthound_dashboard/schemas/model_monitoring.py +89 -10
- truthound_dashboard/schemas/notifications_advanced.py +13 -0
- truthound_dashboard/schemas/observability.py +453 -0
- truthound_dashboard/schemas/plugins.py +0 -280
- truthound_dashboard/schemas/profile.py +154 -247
- truthound_dashboard/schemas/quality_reporter.py +403 -0
- truthound_dashboard/schemas/reports.py +2 -2
- truthound_dashboard/schemas/rule_suggestion.py +8 -1
- truthound_dashboard/schemas/scan.py +4 -24
- truthound_dashboard/schemas/schedule.py +11 -3
- truthound_dashboard/schemas/schema_watcher.py +727 -0
- truthound_dashboard/schemas/source.py +17 -2
- truthound_dashboard/schemas/tiering.py +822 -0
- truthound_dashboard/schemas/triggers.py +16 -0
- truthound_dashboard/schemas/unified_alerts.py +7 -0
- truthound_dashboard/schemas/validation.py +0 -13
- truthound_dashboard/schemas/validators/base.py +41 -21
- truthound_dashboard/schemas/validators/business_rule_validators.py +244 -0
- truthound_dashboard/schemas/validators/localization_validators.py +273 -0
- truthound_dashboard/schemas/validators/ml_feature_validators.py +308 -0
- truthound_dashboard/schemas/validators/profiling_validators.py +275 -0
- truthound_dashboard/schemas/validators/referential_validators.py +312 -0
- truthound_dashboard/schemas/validators/registry.py +93 -8
- truthound_dashboard/schemas/validators/timeseries_validators.py +389 -0
- truthound_dashboard/schemas/versioning.py +1 -6
- truthound_dashboard/static/index.html +2 -2
- truthound_dashboard-1.5.0.dist-info/METADATA +309 -0
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.0.dist-info}/RECORD +149 -148
- truthound_dashboard/core/plugins/hooks/__init__.py +0 -63
- truthound_dashboard/core/plugins/hooks/decorators.py +0 -367
- truthound_dashboard/core/plugins/hooks/manager.py +0 -403
- truthound_dashboard/core/plugins/hooks/protocols.py +0 -265
- truthound_dashboard/core/plugins/lifecycle/hot_reload.py +0 -584
- truthound_dashboard/core/reporters/junit_reporter.py +0 -233
- truthound_dashboard/core/reporters/markdown_reporter.py +0 -207
- truthound_dashboard/core/reporters/pdf_reporter.py +0 -209
- truthound_dashboard/static/assets/_baseUniq-BcrSP13d.js +0 -1
- truthound_dashboard/static/assets/arc-DlYjKwIL.js +0 -1
- truthound_dashboard/static/assets/architectureDiagram-VXUJARFQ-Bb2drbQM.js +0 -36
- truthound_dashboard/static/assets/blockDiagram-VD42YOAC-BlsPG1CH.js +0 -122
- truthound_dashboard/static/assets/c4Diagram-YG6GDRKO-B9JdUoaC.js +0 -10
- truthound_dashboard/static/assets/channel-Q6mHF1Hd.js +0 -1
- truthound_dashboard/static/assets/chunk-4BX2VUAB-DmyoPVuJ.js +0 -1
- truthound_dashboard/static/assets/chunk-55IACEB6-Bcz6Siv8.js +0 -1
- truthound_dashboard/static/assets/chunk-B4BG7PRW-Br3G5Rum.js +0 -165
- truthound_dashboard/static/assets/chunk-DI55MBZ5-DuM9c23u.js +0 -220
- truthound_dashboard/static/assets/chunk-FMBD7UC4-DNU-5mvT.js +0 -15
- truthound_dashboard/static/assets/chunk-QN33PNHL-Im2yNcmS.js +0 -1
- truthound_dashboard/static/assets/chunk-QZHKN3VN-kZr8XFm1.js +0 -1
- truthound_dashboard/static/assets/chunk-TZMSLE5B-Q__360q_.js +0 -1
- truthound_dashboard/static/assets/classDiagram-2ON5EDUG-vtixxUyK.js +0 -1
- truthound_dashboard/static/assets/classDiagram-v2-WZHVMYZB-vtixxUyK.js +0 -1
- truthound_dashboard/static/assets/clone-BOt2LwD0.js +0 -1
- truthound_dashboard/static/assets/cose-bilkent-S5V4N54A-CBDw6iac.js +0 -1
- truthound_dashboard/static/assets/dagre-6UL2VRFP-XdKqmmY9.js +0 -4
- truthound_dashboard/static/assets/diagram-PSM6KHXK-DAZ8nx9V.js +0 -24
- truthound_dashboard/static/assets/diagram-QEK2KX5R-BRvDTbGD.js +0 -43
- truthound_dashboard/static/assets/diagram-S2PKOQOG-bQcczUkl.js +0 -24
- truthound_dashboard/static/assets/erDiagram-Q2GNP2WA-DPje7VMN.js +0 -60
- truthound_dashboard/static/assets/flowDiagram-NV44I4VS-B7BVtFVS.js +0 -162
- truthound_dashboard/static/assets/ganttDiagram-JELNMOA3-D6WKSS7U.js +0 -267
- truthound_dashboard/static/assets/gitGraphDiagram-NY62KEGX-D3vtVd3y.js +0 -65
- truthound_dashboard/static/assets/graph-BKgNKZVp.js +0 -1
- truthound_dashboard/static/assets/index-C6JSrkHo.css +0 -1
- truthound_dashboard/static/assets/index-DkU82VsU.js +0 -1800
- truthound_dashboard/static/assets/infoDiagram-WHAUD3N6-DnNCT429.js +0 -2
- truthound_dashboard/static/assets/journeyDiagram-XKPGCS4Q-DGiMozqS.js +0 -139
- truthound_dashboard/static/assets/kanban-definition-3W4ZIXB7-BV2gUgli.js +0 -89
- truthound_dashboard/static/assets/katex-Cu_Erd72.js +0 -261
- truthound_dashboard/static/assets/layout-DI2MfQ5G.js +0 -1
- truthound_dashboard/static/assets/min-DYdgXVcT.js +0 -1
- truthound_dashboard/static/assets/mindmap-definition-VGOIOE7T-C7x4ruxz.js +0 -68
- truthound_dashboard/static/assets/pieDiagram-ADFJNKIX-CAJaAB9f.js +0 -30
- truthound_dashboard/static/assets/quadrantDiagram-AYHSOK5B-DeqwDI46.js +0 -7
- truthound_dashboard/static/assets/requirementDiagram-UZGBJVZJ-e3XDpZIM.js +0 -64
- truthound_dashboard/static/assets/sankeyDiagram-TZEHDZUN-CNnAv5Ux.js +0 -10
- truthound_dashboard/static/assets/sequenceDiagram-WL72ISMW-Dsne-Of3.js +0 -145
- truthound_dashboard/static/assets/stateDiagram-FKZM4ZOC-Ee0sQXyb.js +0 -1
- truthound_dashboard/static/assets/stateDiagram-v2-4FDKWEC3-B26KqW_W.js +0 -1
- truthound_dashboard/static/assets/timeline-definition-IT6M3QCI-DZYi2yl3.js +0 -61
- truthound_dashboard/static/assets/treemap-KMMF4GRG-CY3f8In2.js +0 -128
- truthound_dashboard/static/assets/unmerged_dictionaries-Dd7xcPWG.js +0 -1
- truthound_dashboard/static/assets/xychartDiagram-PRI3JC2R-CS7fydZZ.js +0 -7
- truthound_dashboard-1.4.4.dist-info/METADATA +0 -507
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.0.dist-info}/WHEEL +0 -0
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.0.dist-info}/entry_points.txt +0 -0
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
"""Localization validators.
|
|
2
|
+
|
|
3
|
+
Validators for regional identifier formats including Korean, Japanese,
|
|
4
|
+
and Chinese specific formats.
|
|
5
|
+
|
|
6
|
+
Import path in truthound: `from truthound.validators.localization import *`
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from .base import (
|
|
10
|
+
ParameterDefinition,
|
|
11
|
+
ParameterType,
|
|
12
|
+
ValidatorCategory,
|
|
13
|
+
ValidatorDefinition,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
LOCALIZATION_VALIDATORS: list[ValidatorDefinition] = [
|
|
17
|
+
# Korean validators
|
|
18
|
+
ValidatorDefinition(
|
|
19
|
+
name="KoreanBusinessNumber",
|
|
20
|
+
display_name="Korean Business Registration Number",
|
|
21
|
+
category=ValidatorCategory.LOCALIZATION,
|
|
22
|
+
description="Validates Korean business registration numbers (사업자등록번호, 10 digits).",
|
|
23
|
+
parameters=[
|
|
24
|
+
ParameterDefinition(
|
|
25
|
+
name="column",
|
|
26
|
+
label="Column",
|
|
27
|
+
type=ParameterType.COLUMN,
|
|
28
|
+
required=True,
|
|
29
|
+
description="Column containing business registration numbers",
|
|
30
|
+
),
|
|
31
|
+
ParameterDefinition(
|
|
32
|
+
name="allow_hyphen",
|
|
33
|
+
label="Allow Hyphens",
|
|
34
|
+
type=ParameterType.BOOLEAN,
|
|
35
|
+
default=True,
|
|
36
|
+
description="Accept numbers with hyphens (123-45-67890)",
|
|
37
|
+
),
|
|
38
|
+
],
|
|
39
|
+
tags=["localization", "korean", "business", "registration"],
|
|
40
|
+
severity_default="high",
|
|
41
|
+
),
|
|
42
|
+
ValidatorDefinition(
|
|
43
|
+
name="KoreanRRN",
|
|
44
|
+
display_name="Korean Resident Registration Number",
|
|
45
|
+
category=ValidatorCategory.LOCALIZATION,
|
|
46
|
+
description="Validates Korean Resident Registration Numbers (주민등록번호, 13 digits).",
|
|
47
|
+
parameters=[
|
|
48
|
+
ParameterDefinition(
|
|
49
|
+
name="column",
|
|
50
|
+
label="Column",
|
|
51
|
+
type=ParameterType.COLUMN,
|
|
52
|
+
required=True,
|
|
53
|
+
description="Column containing RRN values",
|
|
54
|
+
),
|
|
55
|
+
ParameterDefinition(
|
|
56
|
+
name="masked",
|
|
57
|
+
label="Masked Format",
|
|
58
|
+
type=ParameterType.BOOLEAN,
|
|
59
|
+
default=True,
|
|
60
|
+
description="Accept partially masked numbers (123456-1******)",
|
|
61
|
+
),
|
|
62
|
+
],
|
|
63
|
+
tags=["localization", "korean", "rrn", "pii", "identity"],
|
|
64
|
+
severity_default="critical",
|
|
65
|
+
),
|
|
66
|
+
ValidatorDefinition(
|
|
67
|
+
name="KoreanPhone",
|
|
68
|
+
display_name="Korean Phone Number",
|
|
69
|
+
category=ValidatorCategory.LOCALIZATION,
|
|
70
|
+
description="Validates Korean phone number formats (mobile and landline).",
|
|
71
|
+
parameters=[
|
|
72
|
+
ParameterDefinition(
|
|
73
|
+
name="column",
|
|
74
|
+
label="Column",
|
|
75
|
+
type=ParameterType.COLUMN,
|
|
76
|
+
required=True,
|
|
77
|
+
description="Column containing phone numbers",
|
|
78
|
+
),
|
|
79
|
+
ParameterDefinition(
|
|
80
|
+
name="type",
|
|
81
|
+
label="Phone Type",
|
|
82
|
+
type=ParameterType.SELECT,
|
|
83
|
+
options=[
|
|
84
|
+
{"value": "any", "label": "Any (Mobile or Landline)"},
|
|
85
|
+
{"value": "mobile", "label": "Mobile (010/011/016/017/018/019)"},
|
|
86
|
+
{"value": "landline", "label": "Landline (02/031-064)"},
|
|
87
|
+
],
|
|
88
|
+
default="any",
|
|
89
|
+
description="Accepted phone number types",
|
|
90
|
+
),
|
|
91
|
+
],
|
|
92
|
+
tags=["localization", "korean", "phone", "contact"],
|
|
93
|
+
severity_default="medium",
|
|
94
|
+
),
|
|
95
|
+
ValidatorDefinition(
|
|
96
|
+
name="KoreanBankAccount",
|
|
97
|
+
display_name="Korean Bank Account Number",
|
|
98
|
+
category=ValidatorCategory.LOCALIZATION,
|
|
99
|
+
description="Validates Korean bank account number formats by bank.",
|
|
100
|
+
parameters=[
|
|
101
|
+
ParameterDefinition(
|
|
102
|
+
name="column",
|
|
103
|
+
label="Column",
|
|
104
|
+
type=ParameterType.COLUMN,
|
|
105
|
+
required=True,
|
|
106
|
+
description="Column containing bank account numbers",
|
|
107
|
+
),
|
|
108
|
+
ParameterDefinition(
|
|
109
|
+
name="bank",
|
|
110
|
+
label="Bank",
|
|
111
|
+
type=ParameterType.SELECT,
|
|
112
|
+
options=[
|
|
113
|
+
{"value": "any", "label": "Any Bank"},
|
|
114
|
+
{"value": "kb", "label": "KB Kookmin Bank"},
|
|
115
|
+
{"value": "shinhan", "label": "Shinhan Bank"},
|
|
116
|
+
{"value": "woori", "label": "Woori Bank"},
|
|
117
|
+
{"value": "hana", "label": "Hana Bank"},
|
|
118
|
+
{"value": "nh", "label": "NH Bank"},
|
|
119
|
+
{"value": "ibk", "label": "IBK"},
|
|
120
|
+
{"value": "kakao", "label": "Kakao Bank"},
|
|
121
|
+
{"value": "toss", "label": "Toss Bank"},
|
|
122
|
+
],
|
|
123
|
+
default="any",
|
|
124
|
+
description="Bank for format validation",
|
|
125
|
+
),
|
|
126
|
+
],
|
|
127
|
+
tags=["localization", "korean", "bank", "financial"],
|
|
128
|
+
severity_default="high",
|
|
129
|
+
),
|
|
130
|
+
# Japanese validators
|
|
131
|
+
ValidatorDefinition(
|
|
132
|
+
name="JapanesePostalCode",
|
|
133
|
+
display_name="Japanese Postal Code",
|
|
134
|
+
category=ValidatorCategory.LOCALIZATION,
|
|
135
|
+
description="Validates Japanese postal codes (〒xxx-xxxx format).",
|
|
136
|
+
parameters=[
|
|
137
|
+
ParameterDefinition(
|
|
138
|
+
name="column",
|
|
139
|
+
label="Column",
|
|
140
|
+
type=ParameterType.COLUMN,
|
|
141
|
+
required=True,
|
|
142
|
+
description="Column containing postal codes",
|
|
143
|
+
),
|
|
144
|
+
ParameterDefinition(
|
|
145
|
+
name="allow_symbol",
|
|
146
|
+
label="Allow 〒 Symbol",
|
|
147
|
+
type=ParameterType.BOOLEAN,
|
|
148
|
+
default=True,
|
|
149
|
+
description="Accept codes with 〒 prefix",
|
|
150
|
+
),
|
|
151
|
+
],
|
|
152
|
+
tags=["localization", "japanese", "postal", "address"],
|
|
153
|
+
severity_default="medium",
|
|
154
|
+
),
|
|
155
|
+
ValidatorDefinition(
|
|
156
|
+
name="JapaneseMyNumber",
|
|
157
|
+
display_name="Japanese My Number",
|
|
158
|
+
category=ValidatorCategory.LOCALIZATION,
|
|
159
|
+
description="Validates Japanese Individual Number (マイナンバー, 12 digits).",
|
|
160
|
+
parameters=[
|
|
161
|
+
ParameterDefinition(
|
|
162
|
+
name="column",
|
|
163
|
+
label="Column",
|
|
164
|
+
type=ParameterType.COLUMN,
|
|
165
|
+
required=True,
|
|
166
|
+
description="Column containing My Number values",
|
|
167
|
+
),
|
|
168
|
+
],
|
|
169
|
+
tags=["localization", "japanese", "mynumber", "pii", "identity"],
|
|
170
|
+
severity_default="critical",
|
|
171
|
+
),
|
|
172
|
+
ValidatorDefinition(
|
|
173
|
+
name="JapanesePhone",
|
|
174
|
+
display_name="Japanese Phone Number",
|
|
175
|
+
category=ValidatorCategory.LOCALIZATION,
|
|
176
|
+
description="Validates Japanese phone number formats.",
|
|
177
|
+
parameters=[
|
|
178
|
+
ParameterDefinition(
|
|
179
|
+
name="column",
|
|
180
|
+
label="Column",
|
|
181
|
+
type=ParameterType.COLUMN,
|
|
182
|
+
required=True,
|
|
183
|
+
description="Column containing phone numbers",
|
|
184
|
+
),
|
|
185
|
+
ParameterDefinition(
|
|
186
|
+
name="type",
|
|
187
|
+
label="Phone Type",
|
|
188
|
+
type=ParameterType.SELECT,
|
|
189
|
+
options=[
|
|
190
|
+
{"value": "any", "label": "Any"},
|
|
191
|
+
{"value": "mobile", "label": "Mobile (070/080/090)"},
|
|
192
|
+
{"value": "landline", "label": "Landline"},
|
|
193
|
+
{"value": "toll_free", "label": "Toll Free (0120/0800)"},
|
|
194
|
+
],
|
|
195
|
+
default="any",
|
|
196
|
+
description="Accepted phone number types",
|
|
197
|
+
),
|
|
198
|
+
],
|
|
199
|
+
tags=["localization", "japanese", "phone", "contact"],
|
|
200
|
+
severity_default="medium",
|
|
201
|
+
),
|
|
202
|
+
# Chinese validators
|
|
203
|
+
ValidatorDefinition(
|
|
204
|
+
name="ChineseID",
|
|
205
|
+
display_name="Chinese ID Number",
|
|
206
|
+
category=ValidatorCategory.LOCALIZATION,
|
|
207
|
+
description="Validates Chinese Resident Identity Card numbers (18 digits).",
|
|
208
|
+
parameters=[
|
|
209
|
+
ParameterDefinition(
|
|
210
|
+
name="column",
|
|
211
|
+
label="Column",
|
|
212
|
+
type=ParameterType.COLUMN,
|
|
213
|
+
required=True,
|
|
214
|
+
description="Column containing ID numbers",
|
|
215
|
+
),
|
|
216
|
+
ParameterDefinition(
|
|
217
|
+
name="validate_region",
|
|
218
|
+
label="Validate Region Code",
|
|
219
|
+
type=ParameterType.BOOLEAN,
|
|
220
|
+
default=True,
|
|
221
|
+
description="Validate the 6-digit region code",
|
|
222
|
+
),
|
|
223
|
+
],
|
|
224
|
+
tags=["localization", "chinese", "id", "pii", "identity"],
|
|
225
|
+
severity_default="critical",
|
|
226
|
+
),
|
|
227
|
+
ValidatorDefinition(
|
|
228
|
+
name="ChineseUSCC",
|
|
229
|
+
display_name="Chinese Unified Social Credit Code",
|
|
230
|
+
category=ValidatorCategory.LOCALIZATION,
|
|
231
|
+
description="Validates Chinese USCC (统一社会信用代码, 18 characters).",
|
|
232
|
+
parameters=[
|
|
233
|
+
ParameterDefinition(
|
|
234
|
+
name="column",
|
|
235
|
+
label="Column",
|
|
236
|
+
type=ParameterType.COLUMN,
|
|
237
|
+
required=True,
|
|
238
|
+
description="Column containing USCC values",
|
|
239
|
+
),
|
|
240
|
+
],
|
|
241
|
+
tags=["localization", "chinese", "uscc", "business", "registration"],
|
|
242
|
+
severity_default="high",
|
|
243
|
+
),
|
|
244
|
+
ValidatorDefinition(
|
|
245
|
+
name="ChinesePhone",
|
|
246
|
+
display_name="Chinese Phone Number",
|
|
247
|
+
category=ValidatorCategory.LOCALIZATION,
|
|
248
|
+
description="Validates Chinese phone number formats.",
|
|
249
|
+
parameters=[
|
|
250
|
+
ParameterDefinition(
|
|
251
|
+
name="column",
|
|
252
|
+
label="Column",
|
|
253
|
+
type=ParameterType.COLUMN,
|
|
254
|
+
required=True,
|
|
255
|
+
description="Column containing phone numbers",
|
|
256
|
+
),
|
|
257
|
+
ParameterDefinition(
|
|
258
|
+
name="type",
|
|
259
|
+
label="Phone Type",
|
|
260
|
+
type=ParameterType.SELECT,
|
|
261
|
+
options=[
|
|
262
|
+
{"value": "any", "label": "Any"},
|
|
263
|
+
{"value": "mobile", "label": "Mobile (1xx)"},
|
|
264
|
+
{"value": "landline", "label": "Landline"},
|
|
265
|
+
],
|
|
266
|
+
default="any",
|
|
267
|
+
description="Accepted phone number types",
|
|
268
|
+
),
|
|
269
|
+
],
|
|
270
|
+
tags=["localization", "chinese", "phone", "contact"],
|
|
271
|
+
severity_default="medium",
|
|
272
|
+
),
|
|
273
|
+
]
|
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
"""ML Feature validators.
|
|
2
|
+
|
|
3
|
+
Validators for machine learning feature quality including null impact,
|
|
4
|
+
scale validation, correlation analysis, and target leakage detection.
|
|
5
|
+
|
|
6
|
+
Import path in truthound: `from truthound.validators.ml_feature import *`
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from .base import (
|
|
10
|
+
ParameterDefinition,
|
|
11
|
+
ParameterType,
|
|
12
|
+
ValidatorCategory,
|
|
13
|
+
ValidatorDefinition,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
ML_FEATURE_VALIDATORS: list[ValidatorDefinition] = [
|
|
17
|
+
ValidatorDefinition(
|
|
18
|
+
name="FeatureNullImpact",
|
|
19
|
+
display_name="Feature Null Impact Analysis",
|
|
20
|
+
category=ValidatorCategory.ML_FEATURE,
|
|
21
|
+
description="Analyzes and validates the impact of null values on ML features.",
|
|
22
|
+
parameters=[
|
|
23
|
+
ParameterDefinition(
|
|
24
|
+
name="column",
|
|
25
|
+
label="Feature Column",
|
|
26
|
+
type=ParameterType.COLUMN,
|
|
27
|
+
required=True,
|
|
28
|
+
description="Feature column to analyze",
|
|
29
|
+
),
|
|
30
|
+
ParameterDefinition(
|
|
31
|
+
name="target_column",
|
|
32
|
+
label="Target Column",
|
|
33
|
+
type=ParameterType.COLUMN,
|
|
34
|
+
description="Target column for impact analysis",
|
|
35
|
+
),
|
|
36
|
+
ParameterDefinition(
|
|
37
|
+
name="max_null_ratio",
|
|
38
|
+
label="Maximum Null Ratio",
|
|
39
|
+
type=ParameterType.FLOAT,
|
|
40
|
+
default=0.1,
|
|
41
|
+
min_value=0,
|
|
42
|
+
max_value=1,
|
|
43
|
+
description="Maximum acceptable ratio of null values",
|
|
44
|
+
),
|
|
45
|
+
ParameterDefinition(
|
|
46
|
+
name="max_impact_score",
|
|
47
|
+
label="Maximum Impact Score",
|
|
48
|
+
type=ParameterType.FLOAT,
|
|
49
|
+
min_value=0,
|
|
50
|
+
max_value=1,
|
|
51
|
+
description="Maximum acceptable impact score on target",
|
|
52
|
+
),
|
|
53
|
+
],
|
|
54
|
+
tags=["ml_feature", "null", "impact", "missing"],
|
|
55
|
+
severity_default="medium",
|
|
56
|
+
),
|
|
57
|
+
ValidatorDefinition(
|
|
58
|
+
name="FeatureScale",
|
|
59
|
+
display_name="Feature Scale Validation",
|
|
60
|
+
category=ValidatorCategory.ML_FEATURE,
|
|
61
|
+
description="Validates that features are properly scaled for ML algorithms.",
|
|
62
|
+
parameters=[
|
|
63
|
+
ParameterDefinition(
|
|
64
|
+
name="column",
|
|
65
|
+
label="Feature Column",
|
|
66
|
+
type=ParameterType.COLUMN,
|
|
67
|
+
required=True,
|
|
68
|
+
description="Feature column to validate",
|
|
69
|
+
),
|
|
70
|
+
ParameterDefinition(
|
|
71
|
+
name="expected_scale",
|
|
72
|
+
label="Expected Scale Type",
|
|
73
|
+
type=ParameterType.SELECT,
|
|
74
|
+
options=[
|
|
75
|
+
{"value": "standard", "label": "Standard (mean=0, std=1)"},
|
|
76
|
+
{"value": "minmax", "label": "Min-Max (0 to 1)"},
|
|
77
|
+
{"value": "robust", "label": "Robust (median-centered)"},
|
|
78
|
+
{"value": "unit_norm", "label": "Unit Norm (L2=1)"},
|
|
79
|
+
],
|
|
80
|
+
required=True,
|
|
81
|
+
description="Expected feature scale type",
|
|
82
|
+
),
|
|
83
|
+
ParameterDefinition(
|
|
84
|
+
name="tolerance",
|
|
85
|
+
label="Tolerance",
|
|
86
|
+
type=ParameterType.FLOAT,
|
|
87
|
+
default=0.1,
|
|
88
|
+
min_value=0,
|
|
89
|
+
max_value=1,
|
|
90
|
+
description="Acceptable deviation from expected scale",
|
|
91
|
+
),
|
|
92
|
+
],
|
|
93
|
+
tags=["ml_feature", "scale", "normalization", "preprocessing"],
|
|
94
|
+
severity_default="medium",
|
|
95
|
+
),
|
|
96
|
+
ValidatorDefinition(
|
|
97
|
+
name="FeatureCorrelationMatrix",
|
|
98
|
+
display_name="Feature Correlation Matrix",
|
|
99
|
+
category=ValidatorCategory.ML_FEATURE,
|
|
100
|
+
description="Validates correlation between features for multicollinearity detection.",
|
|
101
|
+
parameters=[
|
|
102
|
+
ParameterDefinition(
|
|
103
|
+
name="columns",
|
|
104
|
+
label="Feature Columns",
|
|
105
|
+
type=ParameterType.COLUMN_LIST,
|
|
106
|
+
required=True,
|
|
107
|
+
description="Feature columns to analyze correlation",
|
|
108
|
+
),
|
|
109
|
+
ParameterDefinition(
|
|
110
|
+
name="max_correlation",
|
|
111
|
+
label="Maximum Correlation",
|
|
112
|
+
type=ParameterType.FLOAT,
|
|
113
|
+
default=0.9,
|
|
114
|
+
min_value=0,
|
|
115
|
+
max_value=1,
|
|
116
|
+
description="Maximum acceptable absolute correlation between features",
|
|
117
|
+
),
|
|
118
|
+
ParameterDefinition(
|
|
119
|
+
name="method",
|
|
120
|
+
label="Correlation Method",
|
|
121
|
+
type=ParameterType.SELECT,
|
|
122
|
+
options=[
|
|
123
|
+
{"value": "pearson", "label": "Pearson"},
|
|
124
|
+
{"value": "spearman", "label": "Spearman"},
|
|
125
|
+
{"value": "kendall", "label": "Kendall"},
|
|
126
|
+
],
|
|
127
|
+
default="pearson",
|
|
128
|
+
description="Correlation calculation method",
|
|
129
|
+
),
|
|
130
|
+
],
|
|
131
|
+
tags=["ml_feature", "correlation", "multicollinearity", "redundancy"],
|
|
132
|
+
severity_default="medium",
|
|
133
|
+
),
|
|
134
|
+
ValidatorDefinition(
|
|
135
|
+
name="TargetLeakage",
|
|
136
|
+
display_name="Target Leakage Detection",
|
|
137
|
+
category=ValidatorCategory.ML_FEATURE,
|
|
138
|
+
description="Detects potential target leakage in features.",
|
|
139
|
+
parameters=[
|
|
140
|
+
ParameterDefinition(
|
|
141
|
+
name="feature_columns",
|
|
142
|
+
label="Feature Columns",
|
|
143
|
+
type=ParameterType.COLUMN_LIST,
|
|
144
|
+
required=True,
|
|
145
|
+
description="Feature columns to check for leakage",
|
|
146
|
+
),
|
|
147
|
+
ParameterDefinition(
|
|
148
|
+
name="target_column",
|
|
149
|
+
label="Target Column",
|
|
150
|
+
type=ParameterType.COLUMN,
|
|
151
|
+
required=True,
|
|
152
|
+
description="Target column",
|
|
153
|
+
),
|
|
154
|
+
ParameterDefinition(
|
|
155
|
+
name="leakage_threshold",
|
|
156
|
+
label="Leakage Threshold",
|
|
157
|
+
type=ParameterType.FLOAT,
|
|
158
|
+
default=0.95,
|
|
159
|
+
min_value=0.5,
|
|
160
|
+
max_value=1,
|
|
161
|
+
description="Correlation threshold indicating potential leakage",
|
|
162
|
+
),
|
|
163
|
+
],
|
|
164
|
+
tags=["ml_feature", "leakage", "target", "data_leakage"],
|
|
165
|
+
severity_default="critical",
|
|
166
|
+
),
|
|
167
|
+
ValidatorDefinition(
|
|
168
|
+
name="FeatureImportance",
|
|
169
|
+
display_name="Feature Importance Threshold",
|
|
170
|
+
category=ValidatorCategory.ML_FEATURE,
|
|
171
|
+
description="Validates minimum feature importance for ML model utility.",
|
|
172
|
+
parameters=[
|
|
173
|
+
ParameterDefinition(
|
|
174
|
+
name="column",
|
|
175
|
+
label="Feature Column",
|
|
176
|
+
type=ParameterType.COLUMN,
|
|
177
|
+
required=True,
|
|
178
|
+
description="Feature column to validate",
|
|
179
|
+
),
|
|
180
|
+
ParameterDefinition(
|
|
181
|
+
name="target_column",
|
|
182
|
+
label="Target Column",
|
|
183
|
+
type=ParameterType.COLUMN,
|
|
184
|
+
required=True,
|
|
185
|
+
description="Target column for importance calculation",
|
|
186
|
+
),
|
|
187
|
+
ParameterDefinition(
|
|
188
|
+
name="min_importance",
|
|
189
|
+
label="Minimum Importance",
|
|
190
|
+
type=ParameterType.FLOAT,
|
|
191
|
+
default=0.01,
|
|
192
|
+
min_value=0,
|
|
193
|
+
max_value=1,
|
|
194
|
+
description="Minimum required feature importance score",
|
|
195
|
+
),
|
|
196
|
+
ParameterDefinition(
|
|
197
|
+
name="method",
|
|
198
|
+
label="Importance Method",
|
|
199
|
+
type=ParameterType.SELECT,
|
|
200
|
+
options=[
|
|
201
|
+
{"value": "mutual_info", "label": "Mutual Information"},
|
|
202
|
+
{"value": "correlation", "label": "Correlation"},
|
|
203
|
+
{"value": "f_score", "label": "F-Score (ANOVA)"},
|
|
204
|
+
],
|
|
205
|
+
default="mutual_info",
|
|
206
|
+
description="Method for calculating feature importance",
|
|
207
|
+
),
|
|
208
|
+
],
|
|
209
|
+
tags=["ml_feature", "importance", "relevance", "selection"],
|
|
210
|
+
severity_default="low",
|
|
211
|
+
),
|
|
212
|
+
ValidatorDefinition(
|
|
213
|
+
name="FeatureVariance",
|
|
214
|
+
display_name="Feature Variance Threshold",
|
|
215
|
+
category=ValidatorCategory.ML_FEATURE,
|
|
216
|
+
description="Validates minimum feature variance (removes near-constant features).",
|
|
217
|
+
parameters=[
|
|
218
|
+
ParameterDefinition(
|
|
219
|
+
name="column",
|
|
220
|
+
label="Feature Column",
|
|
221
|
+
type=ParameterType.COLUMN,
|
|
222
|
+
required=True,
|
|
223
|
+
description="Feature column to validate",
|
|
224
|
+
),
|
|
225
|
+
ParameterDefinition(
|
|
226
|
+
name="min_variance",
|
|
227
|
+
label="Minimum Variance",
|
|
228
|
+
type=ParameterType.FLOAT,
|
|
229
|
+
default=0.01,
|
|
230
|
+
min_value=0,
|
|
231
|
+
description="Minimum required variance",
|
|
232
|
+
),
|
|
233
|
+
],
|
|
234
|
+
tags=["ml_feature", "variance", "constant", "low_variance"],
|
|
235
|
+
severity_default="medium",
|
|
236
|
+
),
|
|
237
|
+
ValidatorDefinition(
|
|
238
|
+
name="FeatureOutlierRatio",
|
|
239
|
+
display_name="Feature Outlier Ratio",
|
|
240
|
+
category=ValidatorCategory.ML_FEATURE,
|
|
241
|
+
description="Validates that outlier ratio in features is within acceptable bounds.",
|
|
242
|
+
parameters=[
|
|
243
|
+
ParameterDefinition(
|
|
244
|
+
name="column",
|
|
245
|
+
label="Feature Column",
|
|
246
|
+
type=ParameterType.COLUMN,
|
|
247
|
+
required=True,
|
|
248
|
+
description="Feature column to check",
|
|
249
|
+
),
|
|
250
|
+
ParameterDefinition(
|
|
251
|
+
name="max_outlier_ratio",
|
|
252
|
+
label="Maximum Outlier Ratio",
|
|
253
|
+
type=ParameterType.FLOAT,
|
|
254
|
+
default=0.05,
|
|
255
|
+
min_value=0,
|
|
256
|
+
max_value=1,
|
|
257
|
+
description="Maximum acceptable ratio of outliers",
|
|
258
|
+
),
|
|
259
|
+
ParameterDefinition(
|
|
260
|
+
name="method",
|
|
261
|
+
label="Outlier Detection Method",
|
|
262
|
+
type=ParameterType.SELECT,
|
|
263
|
+
options=[
|
|
264
|
+
{"value": "iqr", "label": "IQR (1.5x)"},
|
|
265
|
+
{"value": "zscore", "label": "Z-Score (3 sigma)"},
|
|
266
|
+
{"value": "mad", "label": "MAD"},
|
|
267
|
+
],
|
|
268
|
+
default="iqr",
|
|
269
|
+
description="Method for outlier detection",
|
|
270
|
+
),
|
|
271
|
+
],
|
|
272
|
+
tags=["ml_feature", "outlier", "anomaly", "preprocessing"],
|
|
273
|
+
severity_default="medium",
|
|
274
|
+
),
|
|
275
|
+
ValidatorDefinition(
|
|
276
|
+
name="ClassImbalance",
|
|
277
|
+
display_name="Class Imbalance Check",
|
|
278
|
+
category=ValidatorCategory.ML_FEATURE,
|
|
279
|
+
description="Validates class distribution for classification targets.",
|
|
280
|
+
parameters=[
|
|
281
|
+
ParameterDefinition(
|
|
282
|
+
name="column",
|
|
283
|
+
label="Target Column",
|
|
284
|
+
type=ParameterType.COLUMN,
|
|
285
|
+
required=True,
|
|
286
|
+
description="Classification target column",
|
|
287
|
+
),
|
|
288
|
+
ParameterDefinition(
|
|
289
|
+
name="max_imbalance_ratio",
|
|
290
|
+
label="Maximum Imbalance Ratio",
|
|
291
|
+
type=ParameterType.FLOAT,
|
|
292
|
+
default=10,
|
|
293
|
+
min_value=1,
|
|
294
|
+
description="Maximum ratio of majority to minority class",
|
|
295
|
+
),
|
|
296
|
+
ParameterDefinition(
|
|
297
|
+
name="min_samples_per_class",
|
|
298
|
+
label="Minimum Samples per Class",
|
|
299
|
+
type=ParameterType.INTEGER,
|
|
300
|
+
default=10,
|
|
301
|
+
min_value=1,
|
|
302
|
+
description="Minimum required samples for each class",
|
|
303
|
+
),
|
|
304
|
+
],
|
|
305
|
+
tags=["ml_feature", "class_imbalance", "target", "classification"],
|
|
306
|
+
severity_default="high",
|
|
307
|
+
),
|
|
308
|
+
]
|