truthound-dashboard 1.2.0__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound_dashboard/api/deps.py +28 -0
- truthound_dashboard/api/drift.py +1 -0
- truthound_dashboard/api/mask.py +164 -0
- truthound_dashboard/api/profile.py +11 -3
- truthound_dashboard/api/router.py +22 -0
- truthound_dashboard/api/scan.py +168 -0
- truthound_dashboard/api/schemas.py +13 -4
- truthound_dashboard/api/validations.py +33 -1
- truthound_dashboard/api/validators.py +85 -0
- truthound_dashboard/core/__init__.py +8 -0
- truthound_dashboard/core/phase5/activity.py +1 -1
- truthound_dashboard/core/services.py +457 -7
- truthound_dashboard/core/truthound_adapter.py +441 -26
- truthound_dashboard/db/__init__.py +6 -0
- truthound_dashboard/db/models.py +250 -1
- truthound_dashboard/schemas/__init__.py +52 -1
- truthound_dashboard/schemas/collaboration.py +1 -1
- truthound_dashboard/schemas/drift.py +118 -3
- truthound_dashboard/schemas/mask.py +209 -0
- truthound_dashboard/schemas/profile.py +45 -2
- truthound_dashboard/schemas/scan.py +312 -0
- truthound_dashboard/schemas/schema.py +30 -2
- truthound_dashboard/schemas/validation.py +60 -3
- truthound_dashboard/schemas/validators/__init__.py +59 -0
- truthound_dashboard/schemas/validators/aggregate_validators.py +238 -0
- truthound_dashboard/schemas/validators/anomaly_validators.py +723 -0
- truthound_dashboard/schemas/validators/base.py +263 -0
- truthound_dashboard/schemas/validators/completeness_validators.py +269 -0
- truthound_dashboard/schemas/validators/cross_table_validators.py +375 -0
- truthound_dashboard/schemas/validators/datetime_validators.py +253 -0
- truthound_dashboard/schemas/validators/distribution_validators.py +422 -0
- truthound_dashboard/schemas/validators/drift_validators.py +615 -0
- truthound_dashboard/schemas/validators/geospatial_validators.py +486 -0
- truthound_dashboard/schemas/validators/multi_column_validators.py +706 -0
- truthound_dashboard/schemas/validators/privacy_validators.py +531 -0
- truthound_dashboard/schemas/validators/query_validators.py +510 -0
- truthound_dashboard/schemas/validators/registry.py +318 -0
- truthound_dashboard/schemas/validators/schema_validators.py +408 -0
- truthound_dashboard/schemas/validators/string_validators.py +396 -0
- truthound_dashboard/schemas/validators/table_validators.py +412 -0
- truthound_dashboard/schemas/validators/uniqueness_validators.py +355 -0
- truthound_dashboard/schemas/validators.py +59 -0
- truthound_dashboard/static/assets/index-BCA8H1hO.js +574 -0
- truthound_dashboard/static/assets/index-BNsSQ2fN.css +1 -0
- truthound_dashboard/static/assets/logo--IpBiMPK.png +0 -0
- truthound_dashboard/static/assets/unmerged_dictionaries-CsJWCRx9.js +1 -0
- truthound_dashboard/static/favicon.ico +0 -0
- truthound_dashboard/static/index.html +3 -3
- {truthound_dashboard-1.2.0.dist-info → truthound_dashboard-1.3.0.dist-info}/METADATA +46 -11
- {truthound_dashboard-1.2.0.dist-info → truthound_dashboard-1.3.0.dist-info}/RECORD +53 -28
- truthound_dashboard/static/assets/index-BqJMyAHX.js +0 -110
- truthound_dashboard/static/assets/index-DMDxHCTs.js +0 -465
- truthound_dashboard/static/assets/index-Dm2D11TK.css +0 -1
- truthound_dashboard/static/mockServiceWorker.js +0 -349
- {truthound_dashboard-1.2.0.dist-info → truthound_dashboard-1.3.0.dist-info}/WHEEL +0 -0
- {truthound_dashboard-1.2.0.dist-info → truthound_dashboard-1.3.0.dist-info}/entry_points.txt +0 -0
- {truthound_dashboard-1.2.0.dist-info → truthound_dashboard-1.3.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,615 @@
|
|
|
1
|
+
"""Drift validators.
|
|
2
|
+
|
|
3
|
+
Distribution change detection validators for monitoring data quality over time.
|
|
4
|
+
Requires: pip install truthound[drift] (scipy)
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .base import (
|
|
8
|
+
ParameterDefinition,
|
|
9
|
+
ParameterType,
|
|
10
|
+
ValidatorCategory,
|
|
11
|
+
ValidatorDefinition,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
DRIFT_VALIDATORS: list[ValidatorDefinition] = [
|
|
15
|
+
ValidatorDefinition(
|
|
16
|
+
name="KSTest",
|
|
17
|
+
display_name="Kolmogorov-Smirnov Test",
|
|
18
|
+
category=ValidatorCategory.DRIFT,
|
|
19
|
+
description="Detects distribution drift using Kolmogorov-Smirnov statistical test.",
|
|
20
|
+
parameters=[
|
|
21
|
+
ParameterDefinition(
|
|
22
|
+
name="column",
|
|
23
|
+
label="Column",
|
|
24
|
+
type=ParameterType.COLUMN,
|
|
25
|
+
required=True,
|
|
26
|
+
),
|
|
27
|
+
ParameterDefinition(
|
|
28
|
+
name="reference_source_id",
|
|
29
|
+
label="Reference Source",
|
|
30
|
+
type=ParameterType.SOURCE_REF,
|
|
31
|
+
description="ID of the reference data source (baseline)",
|
|
32
|
+
required=True,
|
|
33
|
+
),
|
|
34
|
+
ParameterDefinition(
|
|
35
|
+
name="reference_column",
|
|
36
|
+
label="Reference Column",
|
|
37
|
+
type=ParameterType.STRING,
|
|
38
|
+
description="Column in reference data (default: same name)",
|
|
39
|
+
),
|
|
40
|
+
ParameterDefinition(
|
|
41
|
+
name="p_value_threshold",
|
|
42
|
+
label="P-Value Threshold",
|
|
43
|
+
type=ParameterType.FLOAT,
|
|
44
|
+
description="P-value threshold for significance (lower = more strict)",
|
|
45
|
+
default=0.05,
|
|
46
|
+
min_value=0,
|
|
47
|
+
max_value=1,
|
|
48
|
+
),
|
|
49
|
+
ParameterDefinition(
|
|
50
|
+
name="statistic_threshold",
|
|
51
|
+
label="Statistic Threshold",
|
|
52
|
+
type=ParameterType.FLOAT,
|
|
53
|
+
description="Maximum KS statistic threshold (optional)",
|
|
54
|
+
min_value=0,
|
|
55
|
+
max_value=1,
|
|
56
|
+
),
|
|
57
|
+
],
|
|
58
|
+
tags=["drift", "ks", "statistical", "distribution"],
|
|
59
|
+
severity_default="high",
|
|
60
|
+
requires_extra="drift",
|
|
61
|
+
),
|
|
62
|
+
ValidatorDefinition(
|
|
63
|
+
name="PSI",
|
|
64
|
+
display_name="Population Stability Index",
|
|
65
|
+
category=ValidatorCategory.DRIFT,
|
|
66
|
+
description="Measures population stability using PSI metric.",
|
|
67
|
+
parameters=[
|
|
68
|
+
ParameterDefinition(
|
|
69
|
+
name="column",
|
|
70
|
+
label="Column",
|
|
71
|
+
type=ParameterType.COLUMN,
|
|
72
|
+
required=True,
|
|
73
|
+
),
|
|
74
|
+
ParameterDefinition(
|
|
75
|
+
name="reference_source_id",
|
|
76
|
+
label="Reference Source",
|
|
77
|
+
type=ParameterType.SOURCE_REF,
|
|
78
|
+
required=True,
|
|
79
|
+
),
|
|
80
|
+
ParameterDefinition(
|
|
81
|
+
name="reference_column",
|
|
82
|
+
label="Reference Column",
|
|
83
|
+
type=ParameterType.STRING,
|
|
84
|
+
),
|
|
85
|
+
ParameterDefinition(
|
|
86
|
+
name="threshold",
|
|
87
|
+
label="PSI Threshold",
|
|
88
|
+
type=ParameterType.FLOAT,
|
|
89
|
+
description="PSI threshold (0.1 = slight, 0.25 = significant)",
|
|
90
|
+
default=0.25,
|
|
91
|
+
min_value=0,
|
|
92
|
+
),
|
|
93
|
+
ParameterDefinition(
|
|
94
|
+
name="n_bins",
|
|
95
|
+
label="Number of Bins",
|
|
96
|
+
type=ParameterType.INTEGER,
|
|
97
|
+
description="Number of bins for numeric columns",
|
|
98
|
+
default=10,
|
|
99
|
+
min_value=2,
|
|
100
|
+
),
|
|
101
|
+
ParameterDefinition(
|
|
102
|
+
name="is_categorical",
|
|
103
|
+
label="Is Categorical",
|
|
104
|
+
type=ParameterType.BOOLEAN,
|
|
105
|
+
description="Treat column as categorical",
|
|
106
|
+
default=False,
|
|
107
|
+
),
|
|
108
|
+
],
|
|
109
|
+
tags=["drift", "psi", "stability", "monitoring"],
|
|
110
|
+
severity_default="high",
|
|
111
|
+
requires_extra="drift",
|
|
112
|
+
),
|
|
113
|
+
ValidatorDefinition(
|
|
114
|
+
name="ChiSquareDrift",
|
|
115
|
+
display_name="Chi-Square Drift Test",
|
|
116
|
+
category=ValidatorCategory.DRIFT,
|
|
117
|
+
description="Detects categorical distribution drift using Chi-square test.",
|
|
118
|
+
parameters=[
|
|
119
|
+
ParameterDefinition(
|
|
120
|
+
name="column",
|
|
121
|
+
label="Column",
|
|
122
|
+
type=ParameterType.COLUMN,
|
|
123
|
+
required=True,
|
|
124
|
+
),
|
|
125
|
+
ParameterDefinition(
|
|
126
|
+
name="reference_source_id",
|
|
127
|
+
label="Reference Source",
|
|
128
|
+
type=ParameterType.SOURCE_REF,
|
|
129
|
+
required=True,
|
|
130
|
+
),
|
|
131
|
+
ParameterDefinition(
|
|
132
|
+
name="reference_column",
|
|
133
|
+
label="Reference Column",
|
|
134
|
+
type=ParameterType.STRING,
|
|
135
|
+
),
|
|
136
|
+
ParameterDefinition(
|
|
137
|
+
name="p_value_threshold",
|
|
138
|
+
label="P-Value Threshold",
|
|
139
|
+
type=ParameterType.FLOAT,
|
|
140
|
+
default=0.05,
|
|
141
|
+
min_value=0,
|
|
142
|
+
max_value=1,
|
|
143
|
+
),
|
|
144
|
+
ParameterDefinition(
|
|
145
|
+
name="min_expected_frequency",
|
|
146
|
+
label="Min Expected Frequency",
|
|
147
|
+
type=ParameterType.FLOAT,
|
|
148
|
+
description="Minimum expected frequency per bin",
|
|
149
|
+
default=5.0,
|
|
150
|
+
min_value=0,
|
|
151
|
+
),
|
|
152
|
+
],
|
|
153
|
+
tags=["drift", "chi_square", "categorical", "statistical"],
|
|
154
|
+
severity_default="high",
|
|
155
|
+
requires_extra="drift",
|
|
156
|
+
),
|
|
157
|
+
ValidatorDefinition(
|
|
158
|
+
name="JSDivergence",
|
|
159
|
+
display_name="Jensen-Shannon Divergence",
|
|
160
|
+
category=ValidatorCategory.DRIFT,
|
|
161
|
+
description="Measures distribution drift using Jensen-Shannon divergence.",
|
|
162
|
+
parameters=[
|
|
163
|
+
ParameterDefinition(
|
|
164
|
+
name="column",
|
|
165
|
+
label="Column",
|
|
166
|
+
type=ParameterType.COLUMN,
|
|
167
|
+
required=True,
|
|
168
|
+
),
|
|
169
|
+
ParameterDefinition(
|
|
170
|
+
name="reference_source_id",
|
|
171
|
+
label="Reference Source",
|
|
172
|
+
type=ParameterType.SOURCE_REF,
|
|
173
|
+
required=True,
|
|
174
|
+
),
|
|
175
|
+
ParameterDefinition(
|
|
176
|
+
name="reference_column",
|
|
177
|
+
label="Reference Column",
|
|
178
|
+
type=ParameterType.STRING,
|
|
179
|
+
),
|
|
180
|
+
ParameterDefinition(
|
|
181
|
+
name="threshold",
|
|
182
|
+
label="JS Divergence Threshold",
|
|
183
|
+
type=ParameterType.FLOAT,
|
|
184
|
+
description="Maximum JS divergence (0-1, lower = more strict)",
|
|
185
|
+
default=0.1,
|
|
186
|
+
min_value=0,
|
|
187
|
+
max_value=1,
|
|
188
|
+
),
|
|
189
|
+
ParameterDefinition(
|
|
190
|
+
name="is_categorical",
|
|
191
|
+
label="Is Categorical",
|
|
192
|
+
type=ParameterType.BOOLEAN,
|
|
193
|
+
default=False,
|
|
194
|
+
),
|
|
195
|
+
ParameterDefinition(
|
|
196
|
+
name="n_bins",
|
|
197
|
+
label="Number of Bins",
|
|
198
|
+
type=ParameterType.INTEGER,
|
|
199
|
+
description="Number of bins for numeric columns",
|
|
200
|
+
default=10,
|
|
201
|
+
min_value=2,
|
|
202
|
+
),
|
|
203
|
+
],
|
|
204
|
+
tags=["drift", "js", "divergence", "information_theory"],
|
|
205
|
+
severity_default="medium",
|
|
206
|
+
requires_extra="drift",
|
|
207
|
+
),
|
|
208
|
+
ValidatorDefinition(
|
|
209
|
+
name="Wasserstein",
|
|
210
|
+
display_name="Wasserstein Distance",
|
|
211
|
+
category=ValidatorCategory.DRIFT,
|
|
212
|
+
description="Measures distribution drift using Wasserstein (Earth Mover's) distance.",
|
|
213
|
+
parameters=[
|
|
214
|
+
ParameterDefinition(
|
|
215
|
+
name="column",
|
|
216
|
+
label="Column",
|
|
217
|
+
type=ParameterType.COLUMN,
|
|
218
|
+
required=True,
|
|
219
|
+
),
|
|
220
|
+
ParameterDefinition(
|
|
221
|
+
name="reference_source_id",
|
|
222
|
+
label="Reference Source",
|
|
223
|
+
type=ParameterType.SOURCE_REF,
|
|
224
|
+
required=True,
|
|
225
|
+
),
|
|
226
|
+
ParameterDefinition(
|
|
227
|
+
name="reference_column",
|
|
228
|
+
label="Reference Column",
|
|
229
|
+
type=ParameterType.STRING,
|
|
230
|
+
),
|
|
231
|
+
ParameterDefinition(
|
|
232
|
+
name="threshold",
|
|
233
|
+
label="Distance Threshold",
|
|
234
|
+
type=ParameterType.FLOAT,
|
|
235
|
+
description="Maximum Wasserstein distance",
|
|
236
|
+
required=True,
|
|
237
|
+
),
|
|
238
|
+
ParameterDefinition(
|
|
239
|
+
name="normalize",
|
|
240
|
+
label="Normalize",
|
|
241
|
+
type=ParameterType.BOOLEAN,
|
|
242
|
+
description="Normalize distance by data range",
|
|
243
|
+
default=True,
|
|
244
|
+
),
|
|
245
|
+
],
|
|
246
|
+
tags=["drift", "wasserstein", "emd", "distance"],
|
|
247
|
+
severity_default="medium",
|
|
248
|
+
requires_extra="drift",
|
|
249
|
+
),
|
|
250
|
+
ValidatorDefinition(
|
|
251
|
+
name="CSI",
|
|
252
|
+
display_name="Characteristic Stability Index",
|
|
253
|
+
category=ValidatorCategory.DRIFT,
|
|
254
|
+
description="Measures stability using CSI (granular bin-level analysis).",
|
|
255
|
+
parameters=[
|
|
256
|
+
ParameterDefinition(
|
|
257
|
+
name="column",
|
|
258
|
+
label="Column",
|
|
259
|
+
type=ParameterType.COLUMN,
|
|
260
|
+
required=True,
|
|
261
|
+
),
|
|
262
|
+
ParameterDefinition(
|
|
263
|
+
name="reference_source_id",
|
|
264
|
+
label="Reference Source",
|
|
265
|
+
type=ParameterType.SOURCE_REF,
|
|
266
|
+
required=True,
|
|
267
|
+
),
|
|
268
|
+
ParameterDefinition(
|
|
269
|
+
name="reference_column",
|
|
270
|
+
label="Reference Column",
|
|
271
|
+
type=ParameterType.STRING,
|
|
272
|
+
),
|
|
273
|
+
ParameterDefinition(
|
|
274
|
+
name="threshold_per_bin",
|
|
275
|
+
label="Per-Bin Threshold",
|
|
276
|
+
type=ParameterType.FLOAT,
|
|
277
|
+
description="CSI threshold per bin",
|
|
278
|
+
default=0.25,
|
|
279
|
+
min_value=0,
|
|
280
|
+
),
|
|
281
|
+
ParameterDefinition(
|
|
282
|
+
name="n_bins",
|
|
283
|
+
label="Number of Bins",
|
|
284
|
+
type=ParameterType.INTEGER,
|
|
285
|
+
default=10,
|
|
286
|
+
min_value=2,
|
|
287
|
+
),
|
|
288
|
+
],
|
|
289
|
+
tags=["drift", "csi", "stability", "binned"],
|
|
290
|
+
severity_default="medium",
|
|
291
|
+
requires_extra="drift",
|
|
292
|
+
),
|
|
293
|
+
ValidatorDefinition(
|
|
294
|
+
name="MeanDrift",
|
|
295
|
+
display_name="Mean Drift",
|
|
296
|
+
category=ValidatorCategory.DRIFT,
|
|
297
|
+
description="Detects drift in column mean values.",
|
|
298
|
+
parameters=[
|
|
299
|
+
ParameterDefinition(
|
|
300
|
+
name="column",
|
|
301
|
+
label="Column",
|
|
302
|
+
type=ParameterType.COLUMN,
|
|
303
|
+
required=True,
|
|
304
|
+
),
|
|
305
|
+
ParameterDefinition(
|
|
306
|
+
name="reference_source_id",
|
|
307
|
+
label="Reference Source",
|
|
308
|
+
type=ParameterType.SOURCE_REF,
|
|
309
|
+
required=True,
|
|
310
|
+
),
|
|
311
|
+
ParameterDefinition(
|
|
312
|
+
name="reference_column",
|
|
313
|
+
label="Reference Column",
|
|
314
|
+
type=ParameterType.STRING,
|
|
315
|
+
),
|
|
316
|
+
ParameterDefinition(
|
|
317
|
+
name="threshold_pct",
|
|
318
|
+
label="Percentage Threshold",
|
|
319
|
+
type=ParameterType.FLOAT,
|
|
320
|
+
description="Maximum percentage change in mean",
|
|
321
|
+
min_value=0,
|
|
322
|
+
),
|
|
323
|
+
ParameterDefinition(
|
|
324
|
+
name="threshold_abs",
|
|
325
|
+
label="Absolute Threshold",
|
|
326
|
+
type=ParameterType.FLOAT,
|
|
327
|
+
description="Maximum absolute change in mean",
|
|
328
|
+
min_value=0,
|
|
329
|
+
),
|
|
330
|
+
],
|
|
331
|
+
tags=["drift", "mean", "simple", "statistics"],
|
|
332
|
+
severity_default="medium",
|
|
333
|
+
requires_extra="drift",
|
|
334
|
+
),
|
|
335
|
+
ValidatorDefinition(
|
|
336
|
+
name="VarianceDrift",
|
|
337
|
+
display_name="Variance Drift",
|
|
338
|
+
category=ValidatorCategory.DRIFT,
|
|
339
|
+
description="Detects drift in column variance or standard deviation.",
|
|
340
|
+
parameters=[
|
|
341
|
+
ParameterDefinition(
|
|
342
|
+
name="column",
|
|
343
|
+
label="Column",
|
|
344
|
+
type=ParameterType.COLUMN,
|
|
345
|
+
required=True,
|
|
346
|
+
),
|
|
347
|
+
ParameterDefinition(
|
|
348
|
+
name="reference_source_id",
|
|
349
|
+
label="Reference Source",
|
|
350
|
+
type=ParameterType.SOURCE_REF,
|
|
351
|
+
required=True,
|
|
352
|
+
),
|
|
353
|
+
ParameterDefinition(
|
|
354
|
+
name="reference_column",
|
|
355
|
+
label="Reference Column",
|
|
356
|
+
type=ParameterType.STRING,
|
|
357
|
+
),
|
|
358
|
+
ParameterDefinition(
|
|
359
|
+
name="threshold_pct",
|
|
360
|
+
label="Percentage Threshold",
|
|
361
|
+
type=ParameterType.FLOAT,
|
|
362
|
+
description="Maximum percentage change",
|
|
363
|
+
default=0.2,
|
|
364
|
+
min_value=0,
|
|
365
|
+
),
|
|
366
|
+
ParameterDefinition(
|
|
367
|
+
name="use_std",
|
|
368
|
+
label="Use Standard Deviation",
|
|
369
|
+
type=ParameterType.BOOLEAN,
|
|
370
|
+
description="Compare std dev instead of variance",
|
|
371
|
+
default=True,
|
|
372
|
+
),
|
|
373
|
+
],
|
|
374
|
+
tags=["drift", "variance", "std", "statistics"],
|
|
375
|
+
severity_default="medium",
|
|
376
|
+
requires_extra="drift",
|
|
377
|
+
),
|
|
378
|
+
ValidatorDefinition(
|
|
379
|
+
name="QuantileDrift",
|
|
380
|
+
display_name="Quantile Drift",
|
|
381
|
+
category=ValidatorCategory.DRIFT,
|
|
382
|
+
description="Detects drift at specific quantile points.",
|
|
383
|
+
parameters=[
|
|
384
|
+
ParameterDefinition(
|
|
385
|
+
name="column",
|
|
386
|
+
label="Column",
|
|
387
|
+
type=ParameterType.COLUMN,
|
|
388
|
+
required=True,
|
|
389
|
+
),
|
|
390
|
+
ParameterDefinition(
|
|
391
|
+
name="reference_source_id",
|
|
392
|
+
label="Reference Source",
|
|
393
|
+
type=ParameterType.SOURCE_REF,
|
|
394
|
+
required=True,
|
|
395
|
+
),
|
|
396
|
+
ParameterDefinition(
|
|
397
|
+
name="reference_column",
|
|
398
|
+
label="Reference Column",
|
|
399
|
+
type=ParameterType.STRING,
|
|
400
|
+
),
|
|
401
|
+
ParameterDefinition(
|
|
402
|
+
name="quantiles",
|
|
403
|
+
label="Quantiles to Check",
|
|
404
|
+
type=ParameterType.STRING_LIST,
|
|
405
|
+
description="Quantiles (e.g., 0.25, 0.5, 0.75)",
|
|
406
|
+
default=["0.25", "0.5", "0.75"],
|
|
407
|
+
),
|
|
408
|
+
ParameterDefinition(
|
|
409
|
+
name="threshold_pct",
|
|
410
|
+
label="Percentage Threshold",
|
|
411
|
+
type=ParameterType.FLOAT,
|
|
412
|
+
description="Maximum percentage change at each quantile",
|
|
413
|
+
default=0.2,
|
|
414
|
+
min_value=0,
|
|
415
|
+
),
|
|
416
|
+
],
|
|
417
|
+
tags=["drift", "quantile", "percentile", "distribution"],
|
|
418
|
+
severity_default="medium",
|
|
419
|
+
requires_extra="drift",
|
|
420
|
+
),
|
|
421
|
+
ValidatorDefinition(
|
|
422
|
+
name="RangeDrift",
|
|
423
|
+
display_name="Range Drift",
|
|
424
|
+
category=ValidatorCategory.DRIFT,
|
|
425
|
+
description="Detects drift in min/max range of values.",
|
|
426
|
+
parameters=[
|
|
427
|
+
ParameterDefinition(
|
|
428
|
+
name="column",
|
|
429
|
+
label="Column",
|
|
430
|
+
type=ParameterType.COLUMN,
|
|
431
|
+
required=True,
|
|
432
|
+
),
|
|
433
|
+
ParameterDefinition(
|
|
434
|
+
name="reference_source_id",
|
|
435
|
+
label="Reference Source",
|
|
436
|
+
type=ParameterType.SOURCE_REF,
|
|
437
|
+
required=True,
|
|
438
|
+
),
|
|
439
|
+
ParameterDefinition(
|
|
440
|
+
name="reference_column",
|
|
441
|
+
label="Reference Column",
|
|
442
|
+
type=ParameterType.STRING,
|
|
443
|
+
),
|
|
444
|
+
ParameterDefinition(
|
|
445
|
+
name="threshold_pct",
|
|
446
|
+
label="Percentage Threshold",
|
|
447
|
+
type=ParameterType.FLOAT,
|
|
448
|
+
description="Maximum percentage change in range",
|
|
449
|
+
default=0.1,
|
|
450
|
+
min_value=0,
|
|
451
|
+
),
|
|
452
|
+
ParameterDefinition(
|
|
453
|
+
name="allow_expansion",
|
|
454
|
+
label="Allow Expansion",
|
|
455
|
+
type=ParameterType.BOOLEAN,
|
|
456
|
+
description="Only alert on range contraction, not expansion",
|
|
457
|
+
default=False,
|
|
458
|
+
),
|
|
459
|
+
],
|
|
460
|
+
tags=["drift", "range", "bounds"],
|
|
461
|
+
severity_default="medium",
|
|
462
|
+
requires_extra="drift",
|
|
463
|
+
),
|
|
464
|
+
ValidatorDefinition(
|
|
465
|
+
name="FeatureDrift",
|
|
466
|
+
display_name="Multi-Feature Drift",
|
|
467
|
+
category=ValidatorCategory.DRIFT,
|
|
468
|
+
description="Detects drift across multiple features simultaneously.",
|
|
469
|
+
parameters=[
|
|
470
|
+
ParameterDefinition(
|
|
471
|
+
name="columns",
|
|
472
|
+
label="Columns",
|
|
473
|
+
type=ParameterType.COLUMN_LIST,
|
|
474
|
+
description="Columns to monitor for drift",
|
|
475
|
+
required=True,
|
|
476
|
+
),
|
|
477
|
+
ParameterDefinition(
|
|
478
|
+
name="reference_source_id",
|
|
479
|
+
label="Reference Source",
|
|
480
|
+
type=ParameterType.SOURCE_REF,
|
|
481
|
+
required=True,
|
|
482
|
+
),
|
|
483
|
+
ParameterDefinition(
|
|
484
|
+
name="method",
|
|
485
|
+
label="Drift Method",
|
|
486
|
+
type=ParameterType.SELECT,
|
|
487
|
+
options=[
|
|
488
|
+
{"value": "psi", "label": "PSI"},
|
|
489
|
+
{"value": "ks", "label": "KS Test"},
|
|
490
|
+
{"value": "wasserstein", "label": "Wasserstein"},
|
|
491
|
+
{"value": "chi_square", "label": "Chi-Square"},
|
|
492
|
+
],
|
|
493
|
+
default="psi",
|
|
494
|
+
),
|
|
495
|
+
ParameterDefinition(
|
|
496
|
+
name="threshold",
|
|
497
|
+
label="Threshold",
|
|
498
|
+
type=ParameterType.FLOAT,
|
|
499
|
+
default=0.25,
|
|
500
|
+
min_value=0,
|
|
501
|
+
),
|
|
502
|
+
ParameterDefinition(
|
|
503
|
+
name="alert_on_any",
|
|
504
|
+
label="Alert on Any",
|
|
505
|
+
type=ParameterType.BOOLEAN,
|
|
506
|
+
description="Alert if any column drifts (vs. minimum count)",
|
|
507
|
+
default=True,
|
|
508
|
+
),
|
|
509
|
+
ParameterDefinition(
|
|
510
|
+
name="min_drift_count",
|
|
511
|
+
label="Min Drift Count",
|
|
512
|
+
type=ParameterType.INTEGER,
|
|
513
|
+
description="Minimum columns with drift to trigger alert",
|
|
514
|
+
default=1,
|
|
515
|
+
min_value=1,
|
|
516
|
+
),
|
|
517
|
+
ParameterDefinition(
|
|
518
|
+
name="categorical_columns",
|
|
519
|
+
label="Categorical Columns",
|
|
520
|
+
type=ParameterType.STRING_LIST,
|
|
521
|
+
description="Columns to treat as categorical",
|
|
522
|
+
),
|
|
523
|
+
],
|
|
524
|
+
tags=["drift", "multi_feature", "monitoring"],
|
|
525
|
+
severity_default="high",
|
|
526
|
+
requires_extra="drift",
|
|
527
|
+
),
|
|
528
|
+
ValidatorDefinition(
|
|
529
|
+
name="NullRateDrift",
|
|
530
|
+
display_name="Null Rate Drift",
|
|
531
|
+
category=ValidatorCategory.DRIFT,
|
|
532
|
+
description="Detects changes in null value rates.",
|
|
533
|
+
parameters=[
|
|
534
|
+
ParameterDefinition(
|
|
535
|
+
name="column",
|
|
536
|
+
label="Column",
|
|
537
|
+
type=ParameterType.COLUMN,
|
|
538
|
+
required=True,
|
|
539
|
+
),
|
|
540
|
+
ParameterDefinition(
|
|
541
|
+
name="reference_source_id",
|
|
542
|
+
label="Reference Source",
|
|
543
|
+
type=ParameterType.SOURCE_REF,
|
|
544
|
+
required=True,
|
|
545
|
+
),
|
|
546
|
+
ParameterDefinition(
|
|
547
|
+
name="reference_column",
|
|
548
|
+
label="Reference Column",
|
|
549
|
+
type=ParameterType.STRING,
|
|
550
|
+
),
|
|
551
|
+
ParameterDefinition(
|
|
552
|
+
name="threshold_abs",
|
|
553
|
+
label="Absolute Threshold",
|
|
554
|
+
type=ParameterType.FLOAT,
|
|
555
|
+
description="Maximum absolute change in null rate (0.0-1.0)",
|
|
556
|
+
default=0.05,
|
|
557
|
+
min_value=0,
|
|
558
|
+
max_value=1,
|
|
559
|
+
),
|
|
560
|
+
],
|
|
561
|
+
tags=["drift", "null", "completeness"],
|
|
562
|
+
severity_default="medium",
|
|
563
|
+
requires_extra="drift",
|
|
564
|
+
),
|
|
565
|
+
ValidatorDefinition(
|
|
566
|
+
name="CardinalityDrift",
|
|
567
|
+
display_name="Cardinality Drift",
|
|
568
|
+
category=ValidatorCategory.DRIFT,
|
|
569
|
+
description="Detects changes in the number of distinct values.",
|
|
570
|
+
parameters=[
|
|
571
|
+
ParameterDefinition(
|
|
572
|
+
name="column",
|
|
573
|
+
label="Column",
|
|
574
|
+
type=ParameterType.COLUMN,
|
|
575
|
+
required=True,
|
|
576
|
+
),
|
|
577
|
+
ParameterDefinition(
|
|
578
|
+
name="reference_source_id",
|
|
579
|
+
label="Reference Source",
|
|
580
|
+
type=ParameterType.SOURCE_REF,
|
|
581
|
+
required=True,
|
|
582
|
+
),
|
|
583
|
+
ParameterDefinition(
|
|
584
|
+
name="reference_column",
|
|
585
|
+
label="Reference Column",
|
|
586
|
+
type=ParameterType.STRING,
|
|
587
|
+
),
|
|
588
|
+
ParameterDefinition(
|
|
589
|
+
name="threshold_pct",
|
|
590
|
+
label="Percentage Threshold",
|
|
591
|
+
type=ParameterType.FLOAT,
|
|
592
|
+
description="Maximum percentage change in cardinality",
|
|
593
|
+
default=0.2,
|
|
594
|
+
min_value=0,
|
|
595
|
+
),
|
|
596
|
+
ParameterDefinition(
|
|
597
|
+
name="detect_new_values",
|
|
598
|
+
label="Detect New Values",
|
|
599
|
+
type=ParameterType.BOOLEAN,
|
|
600
|
+
description="Alert on new categorical values",
|
|
601
|
+
default=True,
|
|
602
|
+
),
|
|
603
|
+
ParameterDefinition(
|
|
604
|
+
name="detect_missing_values",
|
|
605
|
+
label="Detect Missing Values",
|
|
606
|
+
type=ParameterType.BOOLEAN,
|
|
607
|
+
description="Alert on values that disappeared",
|
|
608
|
+
default=True,
|
|
609
|
+
),
|
|
610
|
+
],
|
|
611
|
+
tags=["drift", "cardinality", "distinct"],
|
|
612
|
+
severity_default="medium",
|
|
613
|
+
requires_extra="drift",
|
|
614
|
+
),
|
|
615
|
+
]
|